flurryx-code-memory 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. code_memory/__init__.py +1 -0
  2. code_memory/claims/__init__.py +32 -0
  3. code_memory/claims/extractor.py +325 -0
  4. code_memory/claims/indexer.py +258 -0
  5. code_memory/claims/resolver.py +186 -0
  6. code_memory/claims/store.py +424 -0
  7. code_memory/cli.py +1192 -0
  8. code_memory/config.py +268 -0
  9. code_memory/embed/__init__.py +224 -0
  10. code_memory/embed/cache.py +204 -0
  11. code_memory/embed/m3.py +174 -0
  12. code_memory/embed/ollama.py +92 -0
  13. code_memory/embed/tei.py +106 -0
  14. code_memory/episodic/__init__.py +3 -0
  15. code_memory/episodic/sqlite_store.py +278 -0
  16. code_memory/extractor/__init__.py +3 -0
  17. code_memory/extractor/csproj.py +166 -0
  18. code_memory/extractor/dll.py +385 -0
  19. code_memory/extractor/gitignore.py +162 -0
  20. code_memory/extractor/nuget.py +275 -0
  21. code_memory/extractor/sanity.py +124 -0
  22. code_memory/extractor/sln.py +108 -0
  23. code_memory/extractor/treesitter.py +1172 -0
  24. code_memory/graph/__init__.py +3 -0
  25. code_memory/graph/falkor_store.py +740 -0
  26. code_memory/mcp_server.py +1816 -0
  27. code_memory/metrics.py +260 -0
  28. code_memory/orchestrator/__init__.py +13 -0
  29. code_memory/orchestrator/git_delta.py +211 -0
  30. code_memory/orchestrator/ingest_state.py +71 -0
  31. code_memory/orchestrator/pipeline.py +1478 -0
  32. code_memory/orchestrator/reset.py +130 -0
  33. code_memory/orchestrator/resolver.py +825 -0
  34. code_memory/orchestrator/retrieve.py +505 -0
  35. code_memory/resilience.py +73 -0
  36. code_memory/sync/__init__.py +20 -0
  37. code_memory/sync/autostart/__init__.py +42 -0
  38. code_memory/sync/autostart/base.py +106 -0
  39. code_memory/sync/autostart/launchd.py +115 -0
  40. code_memory/sync/autostart/schtasks.py +155 -0
  41. code_memory/sync/autostart/systemd.py +113 -0
  42. code_memory/sync/hooks.py +164 -0
  43. code_memory/sync/safety.py +65 -0
  44. code_memory/sync/snapshot.py +461 -0
  45. code_memory/sync/store.py +399 -0
  46. code_memory/sync/sync.py +405 -0
  47. code_memory/sync/watcher.py +320 -0
  48. code_memory/vector/__init__.py +3 -0
  49. code_memory/vector/qdrant_store.py +302 -0
  50. flurryx_code_memory-0.4.0.dist-info/METADATA +26 -0
  51. flurryx_code_memory-0.4.0.dist-info/RECORD +53 -0
  52. flurryx_code_memory-0.4.0.dist-info/WHEEL +4 -0
  53. flurryx_code_memory-0.4.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,1816 @@
1
+ """MCP server exposing code-memory as native tools for coding agents.
2
+
3
+ Tools:
4
+ - codememory_retrieve(query, k?, eps?, project?) — orientation
5
+ - codememory_record(prompt, plan?, patch?, verdict?, project?)
6
+ - codememory_reingest(path, project?)
7
+ - codememory_ingest(root, project, full?, since?, dry_run?, confirmed?)
8
+ - codememory_callers(symbol, depth?, project?) — topology
9
+ - codememory_callees(symbol, depth?, project?)
10
+ - codememory_importers(target, project?)
11
+ - codememory_dependencies(file, depth?, project?)
12
+ - codememory_injects(symbol, project?) — Angular/Razor DI
13
+ - codememory_injectors(token, project?)
14
+ - codememory_definitions(symbol, project?)
15
+ - codememory_assembly_members(type, assembly?, project?) — .NET DLL surface
16
+ - codememory_drift(head_sha, project?) — temporal
17
+ - codememory_at_sha(sha, sha_ord, label?, limit?, project?)
18
+ - codememory_callers_at_sha(symbol, sha, sha_ord, project?)
19
+ - codememory_extract_claims(prompts, project, session_id?) — Graphiti-style
20
+ - codememory_assert_claim(subject, predicate, object, project, ...) —
21
+ agent-authored direct claim (no LLM)
22
+ - codememory_claims(subject?, as_of?, project)
23
+
24
+ Transport: stdio. Register via `code-memory-mcp` script entrypoint.
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import json
30
+ import logging
31
+ import os
32
+ from collections.abc import Callable
33
+ from pathlib import Path
34
+ from typing import Any
35
+
36
+ import anyio
37
+ from mcp.server import Server
38
+ from mcp.server.stdio import stdio_server
39
+ from mcp.types import TextContent, Tool
40
+
41
+ from dataclasses import asdict
42
+
43
+ from .config import CONFIG, detect_project_slug
44
+ from .episodic import Episode
45
+ from .graph import FalkorStore
46
+ from .orchestrator import Pipeline, Retriever
47
+ from .orchestrator.pipeline import IngestMode
48
+
49
+ log = logging.getLogger("codememory.mcp")
50
+
51
+ SERVER_NAME = "code-memory"
52
+
53
+
54
+ def _resolved_default_slug() -> str:
55
+ """Best-effort project slug at server startup.
56
+
57
+ Surfaced in every tool's ``project`` field description so smaller /
58
+ open-weight models — which often omit optional parameters or invent
59
+ wrong ones — see the *concrete* default and don't need to guess.
60
+ """
61
+ try:
62
+ return detect_project_slug()
63
+ except Exception:
64
+ return "<auto-detect failed>"
65
+
66
+
67
+ _DEFAULT_SLUG = _resolved_default_slug()
68
+
69
+
70
+ def _project_schema() -> dict[str, Any]:
71
+ """Shared schema fragment for the ``project`` field.
72
+
73
+ The field is mandatory on every tool — silent cwd-fallback was hiding
74
+ namespace bugs (see commit `6ff8a27`). The description hands the model
75
+ the *exact* slug to pass for the current working directory so it
76
+ doesn't have to guess.
77
+ """
78
+ return {
79
+ "type": "string",
80
+ "description": (
81
+ f"REQUIRED. Project slug for namespaced storage. For this server, "
82
+ f"pass exactly `{_DEFAULT_SLUG}` to query the current project. "
83
+ f"Pass a different slug only when you intentionally want another "
84
+ f"project. Sentinel values like 'auto' or 'default' are rejected."
85
+ ),
86
+ }
87
+
88
+
89
+ _TOOLS: list[Tool] = [
90
+ Tool(
91
+ name="codememory_retrieve",
92
+ description=(
93
+ "Retrieve a context pack (code chunks, past episodes, graph neighbors) "
94
+ "for a natural-language query. Use first for codebase/repo/docs "
95
+ "orientation before grep/glob/read, and before editing unfamiliar code."
96
+ ),
97
+ inputSchema={
98
+ "type": "object",
99
+ "properties": {
100
+ "query": {"type": "string", "description": "Natural-language query."},
101
+ "k": {"type": "integer", "default": 8, "description": "Top-k code chunks."},
102
+ "eps": {"type": "integer", "default": 5, "description": "Top-k episodes."},
103
+ "include_idle_episodes": {
104
+ "type": "boolean",
105
+ "default": False,
106
+ "description": "Include episodes with verdict='idle' (off by default).",
107
+ },
108
+ "project": _project_schema(),
109
+ },
110
+ "required": ["query", "project"],
111
+ },
112
+ ),
113
+ Tool(
114
+ name="codememory_record",
115
+ description=(
116
+ "Record a task episode (prompt + optional plan/patch/verdict). "
117
+ "Call after completing a task so future queries can recall it."
118
+ ),
119
+ inputSchema={
120
+ "type": "object",
121
+ "properties": {
122
+ "prompt": {"type": "string"},
123
+ "plan": {"type": "string"},
124
+ "patch": {"type": "string"},
125
+ "verdict": {"type": "string", "description": "e.g. 'success', 'reverted'."},
126
+ "project": _project_schema(),
127
+ },
128
+ "required": ["prompt", "project"],
129
+ },
130
+ ),
131
+ Tool(
132
+ name="codememory_reingest",
133
+ description=(
134
+ "Re-index a single file after edits so subsequent retrieval reflects "
135
+ "current state. Call after writing or editing source files."
136
+ ),
137
+ inputSchema={
138
+ "type": "object",
139
+ "properties": {
140
+ "path": {"type": "string", "description": "Absolute or cwd-relative file path."},
141
+ "project": _project_schema(),
142
+ },
143
+ "required": ["path", "project"],
144
+ },
145
+ ),
146
+ Tool(
147
+ name="codememory_ingest",
148
+ description=(
149
+ "DISABLED BY DEFAULT — do NOT call this tool to actually run an "
150
+ "ingest. MCP transport blocks for the full duration of the call "
151
+ "and the host (Claude Code / OpenCode / ...) does not surface "
152
+ "mid-call `notifications/progress` back to the agent, so the user "
153
+ "would see no progress feedback. Instead, run the Bash CLI:\n\n"
154
+ " code-memory ingest <root> --project <slug>\n\n"
155
+ "Prefer `run_in_background=true` + periodic `BashOutput` polls so "
156
+ "you (the agent) can narrate progress turn-by-turn; the CLI emits "
157
+ "throttled `[code-memory] files=… symbols=… rate=…/s` lines to "
158
+ "stderr every 50 files. The user can `tail -f` the same stream "
159
+ "independently. Calling this MCP tool returns a steering payload "
160
+ "pointing to the CLI. The blocking MCP path can be re-enabled "
161
+ "by setting `CODE_MEMORY_MCP_INGEST_ENABLED=1` in the server env "
162
+ "(then `confirmed=true` is also required)."
163
+ ),
164
+ inputSchema={
165
+ "type": "object",
166
+ "properties": {
167
+ "root": {
168
+ "type": "string",
169
+ "description": "Absolute path to the repo root to ingest.",
170
+ },
171
+ "project": _project_schema(),
172
+ "full": {
173
+ "type": "boolean",
174
+ "default": False,
175
+ "description": (
176
+ "If true, purge this project's storage and walk every "
177
+ "file. Equivalent to CLI `ingest --full`."
178
+ ),
179
+ },
180
+ "since": {
181
+ "type": "string",
182
+ "description": (
183
+ "Optional base ref (branch/tag/sha) to diff against "
184
+ "HEAD. Overrides stored ingest state when set."
185
+ ),
186
+ },
187
+ "dry_run": {
188
+ "type": "boolean",
189
+ "default": False,
190
+ "description": "Compute plan only; do not write to storage.",
191
+ },
192
+ "confirmed": {
193
+ "type": "boolean",
194
+ "default": False,
195
+ "description": (
196
+ "Must be true to actually run. Set only after the "
197
+ "user explicitly authorized this ingest in chat."
198
+ ),
199
+ },
200
+ },
201
+ "required": ["root", "project"],
202
+ },
203
+ ),
204
+ Tool(
205
+ name="codememory_callers",
206
+ description=(
207
+ "Files that call a symbol. Use for impact analysis ('what breaks "
208
+ "if I rename X?') and to navigate from a definition to its uses."
209
+ ),
210
+ inputSchema={
211
+ "type": "object",
212
+ "properties": {
213
+ "symbol": {"type": "string", "description": "Symbol name (e.g. 'getBearerToken')."},
214
+ "depth": {"type": "integer", "default": 1, "description": "Traversal depth, 1-3."},
215
+ "project": _project_schema(),
216
+ },
217
+ "required": ["symbol", "project"],
218
+ },
219
+ ),
220
+ Tool(
221
+ name="codememory_callees",
222
+ description=(
223
+ "Symbols called from the file that defines a given symbol. "
224
+ "Use to map outgoing dependencies of a service or class."
225
+ ),
226
+ inputSchema={
227
+ "type": "object",
228
+ "properties": {
229
+ "symbol": {"type": "string"},
230
+ "depth": {"type": "integer", "default": 1, "description": "Traversal depth, 1-3."},
231
+ "project": _project_schema(),
232
+ },
233
+ "required": ["symbol", "project"],
234
+ },
235
+ ),
236
+ Tool(
237
+ name="codememory_importers",
238
+ description=(
239
+ "Files that import a module or package. Pass a package name "
240
+ "('@acme-ng/security', 'rxjs') or a relative path that was "
241
+ "preserved during ingest ('./bar')."
242
+ ),
243
+ inputSchema={
244
+ "type": "object",
245
+ "properties": {
246
+ "target": {"type": "string", "description": "Module / package / path to look up."},
247
+ "project": _project_schema(),
248
+ },
249
+ "required": ["target", "project"],
250
+ },
251
+ ),
252
+ Tool(
253
+ name="codememory_dependencies",
254
+ description=(
255
+ "Modules imported by a file (forward import graph). Use to "
256
+ "answer 'what does this file depend on?'."
257
+ ),
258
+ inputSchema={
259
+ "type": "object",
260
+ "properties": {
261
+ "file": {"type": "string", "description": "Absolute file path."},
262
+ "depth": {"type": "integer", "default": 1, "description": "Traversal depth, 1-3."},
263
+ "project": _project_schema(),
264
+ },
265
+ "required": ["file", "project"],
266
+ },
267
+ ),
268
+ Tool(
269
+ name="codememory_injects",
270
+ description=(
271
+ "DI dependencies of a symbol — the tokens its defining file "
272
+ "injects via Angular's ``inject(Token)`` or Razor's ``@inject``. "
273
+ "Use to answer 'what does this use case / service depend on?' "
274
+ "without sifting raw imports. Complements ``codememory_callees``."
275
+ ),
276
+ inputSchema={
277
+ "type": "object",
278
+ "properties": {
279
+ "symbol": {"type": "string", "description": "Symbol whose defining file is the inspection target."},
280
+ "project": _project_schema(),
281
+ },
282
+ "required": ["symbol", "project"],
283
+ },
284
+ ),
285
+ Tool(
286
+ name="codememory_injectors",
287
+ description=(
288
+ "Reverse DI lookup: files that inject a given token. Use to "
289
+ "find every consumer of an Angular DI token / Razor service."
290
+ ),
291
+ inputSchema={
292
+ "type": "object",
293
+ "properties": {
294
+ "token": {"type": "string", "description": "DI token name (e.g. abstract class used as an Angular port)."},
295
+ "project": _project_schema(),
296
+ },
297
+ "required": ["token", "project"],
298
+ },
299
+ ),
300
+ Tool(
301
+ name="codememory_definitions",
302
+ description=(
303
+ "All files+line ranges that define a given symbol name. Use to "
304
+ "disambiguate before calling callers/callees."
305
+ ),
306
+ inputSchema={
307
+ "type": "object",
308
+ "properties": {
309
+ "symbol": {"type": "string"},
310
+ "project": _project_schema(),
311
+ },
312
+ "required": ["symbol", "project"],
313
+ },
314
+ ),
315
+ Tool(
316
+ name="codememory_assembly_members",
317
+ description=(
318
+ "List the public methods declared on a Type from an indexed .NET "
319
+ "Assembly. Members are NOT bulk-indexed (would multiply the graph "
320
+ "by 50-100x for a typical solution); this tool reads them on-demand "
321
+ "from the DLL when the agent needs to disambiguate an overload or "
322
+ "look up an API surface. Same DLL may be parsed multiple times — "
323
+ "fast enough for interactive use (~tens of ms)."
324
+ ),
325
+ inputSchema={
326
+ "type": "object",
327
+ "properties": {
328
+ "type": {
329
+ "type": "string",
330
+ "description": (
331
+ "Fully qualified type name (Namespace.Name). Run "
332
+ "codememory_definitions on the bare name first if "
333
+ "you're unsure which assembly exposes it."
334
+ ),
335
+ },
336
+ "assembly": {
337
+ "type": "string",
338
+ "description": (
339
+ "Optional assembly identity ('Name, Version=X.Y.Z.W'). "
340
+ "When omitted, the first matching assembly wins."
341
+ ),
342
+ },
343
+ "project": _project_schema(),
344
+ },
345
+ "required": ["type", "project"],
346
+ },
347
+ ),
348
+ Tool(
349
+ name="codememory_drift",
350
+ description=(
351
+ "List symbols whose last_seen_sha doesn't match the supplied "
352
+ "git HEAD — either tombstoned (deleted) or drifted (the most "
353
+ "recent ingest didn't confirm them at HEAD). Use to sanity-check "
354
+ "a long-running watcher or to find stale references in comments."
355
+ ),
356
+ inputSchema={
357
+ "type": "object",
358
+ "properties": {
359
+ "head_sha": {
360
+ "type": "string",
361
+ "description": (
362
+ "Git HEAD to compare against. Pass the SHA you consider "
363
+ "'current' (usually the HEAD of the repo associated "
364
+ "with this project)."
365
+ ),
366
+ },
367
+ "project": _project_schema(),
368
+ },
369
+ "required": ["head_sha", "project"],
370
+ },
371
+ ),
372
+ Tool(
373
+ name="codememory_at_sha",
374
+ description=(
375
+ "List Symbol (default) or File nodes that were "
376
+ "alive at the supplied commit. Combine with "
377
+ "codememory_callers_at_sha for 'what called X back then'."
378
+ ),
379
+ inputSchema={
380
+ "type": "object",
381
+ "properties": {
382
+ "sha": {
383
+ "type": "string",
384
+ "description": "Full git SHA to query the graph state at.",
385
+ },
386
+ "sha_ord": {
387
+ "type": "integer",
388
+ "description": (
389
+ "Topological ordinal of sha. Server auto-computes "
390
+ "if omitted."
391
+ ),
392
+ },
393
+ "sha_ord": {
394
+ "type": "integer",
395
+ "description": (
396
+ "Topological ordinal of sha. Compute on the caller "
397
+ "side with `git rev-list --count --first-parent <sha>` "
398
+ "so the MCP server doesn't shell out."
399
+ ),
400
+ },
401
+ "label": {
402
+ "type": "string",
403
+ "enum": ["Symbol", "File"],
404
+ "default": "Symbol",
405
+ "description": "Which node label to enumerate.",
406
+ },
407
+ "limit": {
408
+ "type": "integer",
409
+ "default": 200,
410
+ "description": "Maximum rows to return.",
411
+ },
412
+ "project": _project_schema(),
413
+ },
414
+ "required": ["sha", "project"],
415
+ },
416
+ ),
417
+ Tool(
418
+ name="codememory_callers_at_sha",
419
+ description=(
420
+ "Callers of a symbol as the graph looked at the supplied commit. "
421
+ "Answers 'what used to call X before commit Y deleted it' without "
422
+ "a worktree checkout."
423
+ ),
424
+ inputSchema={
425
+ "type": "object",
426
+ "properties": {
427
+ "symbol": {"type": "string"},
428
+ "sha": {"type": "string"},
429
+ "sha_ord": {
430
+ "type": "integer",
431
+ "description": (
432
+ "Topological ordinal of sha. Server auto-computes "
433
+ "if omitted."
434
+ ),
435
+ },
436
+ "project": _project_schema(),
437
+ },
438
+ "required": ["symbol", "sha", "project"],
439
+ },
440
+ ),
441
+ Tool(
442
+ name="codememory_extract_claims",
443
+ description=(
444
+ "Graphiti-style: extract bi-temporal (subject, predicate, object) "
445
+ "claims from user prompts via a local LLM (gemma2:9b by default) "
446
+ "and store them with valid_at = prompt timestamp, recorded_at = "
447
+ "now, head_sha = current HEAD. Single-valued predicates ('uses', "
448
+ "'prefers', 'deployed-to', ...) close prior conflicting "
449
+ "assertions. On by default. Set CLAIMS_EXTRACTION=false to "
450
+ "disable. Fire-and-forget — call from a Stop hook, not inline "
451
+ "in a turn."
452
+ ),
453
+ inputSchema={
454
+ "type": "object",
455
+ "properties": {
456
+ "prompts": {
457
+ "type": "array",
458
+ "description": (
459
+ "List of user prompts to extract from. Each item "
460
+ "is either a raw string or "
461
+ "{text: string, ts?: number, id?: string}."
462
+ ),
463
+ "items": {"type": ["string", "object"]},
464
+ },
465
+ "session_id": {
466
+ "type": "string",
467
+ "description": "Originating session for provenance.",
468
+ },
469
+ "project": _project_schema(),
470
+ },
471
+ "required": ["prompts", "project"],
472
+ },
473
+ ),
474
+ Tool(
475
+ name="codememory_assert_claim",
476
+ description=(
477
+ "Agent-authored direct claim. Use this when YOU (the agent) "
478
+ "judge that a user message contains a durable assertion worth "
479
+ "remembering across sessions: stable preferences, ownership, "
480
+ "tech-stack decisions, rejections, or explicit corrections of "
481
+ "your behavior. NO LLM is invoked — you supply the structured "
482
+ "triple yourself. Prefer this over codememory_extract_claims "
483
+ "when the assertion is unambiguous; reserve extract_claims for "
484
+ "batch processing of multiple prompts.\n\n"
485
+ "Predicate vocab (kebab-case verbs): `uses`, `prefers`, "
486
+ "`rejected`, `wants-to`, `is-located-at`, `depends-on`, "
487
+ "`deployed-to`, `owns`, `is-a`, `mentioned`, `worked-on`. "
488
+ "Single-valued predicates (uses/prefers/deployed-to/...) "
489
+ "auto-close prior conflicting assertions.\n\n"
490
+ "Worth asserting: 'we use Postgres', 'I prefer terse output', "
491
+ "'don't ship dark mode'. Not worth asserting: questions, "
492
+ "hypotheticals, transient task state, info already in code."
493
+ ),
494
+ inputSchema={
495
+ "type": "object",
496
+ "properties": {
497
+ "subject": {
498
+ "type": "string",
499
+ "description": (
500
+ "Noun phrase: `user`, `project`, a service name, "
501
+ "a person, a module."
502
+ ),
503
+ },
504
+ "predicate": {
505
+ "type": "string",
506
+ "description": (
507
+ "Kebab-case verb phrase. See description for vocab."
508
+ ),
509
+ },
510
+ "object": {
511
+ "type": "string",
512
+ "description": "Noun phrase for what the predicate links to.",
513
+ },
514
+ "polarity": {
515
+ "type": "boolean",
516
+ "default": True,
517
+ "description": (
518
+ "True asserts, False negates "
519
+ "('user does not use X')."
520
+ ),
521
+ },
522
+ "confidence": {
523
+ "type": "number",
524
+ "minimum": 0,
525
+ "maximum": 1,
526
+ "default": 0.95,
527
+ "description": (
528
+ "How sure are you this is a durable assertion? "
529
+ "0.95 default since you triaged it yourself."
530
+ ),
531
+ },
532
+ "evidence_span": {
533
+ "type": "string",
534
+ "description": (
535
+ "Optional verbatim quote from the user message "
536
+ "that justifies this claim. Recommended for "
537
+ "auditability."
538
+ ),
539
+ },
540
+ "valid_at": {
541
+ "type": "number",
542
+ "description": (
543
+ "Optional unix epoch seconds. Defaults to now. "
544
+ "Set this to the user-message timestamp if "
545
+ "available."
546
+ ),
547
+ },
548
+ "session_id": {
549
+ "type": "string",
550
+ "description": "Originating session for provenance.",
551
+ },
552
+ "source_prompt_id": {
553
+ "type": "string",
554
+ "description": "Optional ID of the source user prompt.",
555
+ },
556
+ "project": _project_schema(),
557
+ },
558
+ "required": ["subject", "predicate", "object", "project"],
559
+ },
560
+ ),
561
+ Tool(
562
+ name="codememory_claims",
563
+ description=(
564
+ "Read currently-valid user claims (or claims as of a given "
565
+ "world-time). Use to surface user preferences and stated facts "
566
+ "in retrieve packs or to answer 'what did the user say about X?'."
567
+ ),
568
+ inputSchema={
569
+ "type": "object",
570
+ "properties": {
571
+ "subject": {
572
+ "type": "string",
573
+ "description": "Filter by subject (exact match).",
574
+ },
575
+ "as_of": {
576
+ "type": "number",
577
+ "description": (
578
+ "Optional unix epoch seconds; returns claims valid "
579
+ "at that world-time. Omit for current state."
580
+ ),
581
+ },
582
+ "limit": {"type": "integer", "default": 50},
583
+ "project": _project_schema(),
584
+ },
585
+ "required": ["project"],
586
+ },
587
+ ),
588
+ Tool(
589
+ name="codememory_health",
590
+ description=(
591
+ "Check backend connectivity and stats. Returns Ollama, Qdrant, "
592
+ "FalkorDB status plus collection counts, metric summaries, and "
593
+ "last ingest timestamp."
594
+ ),
595
+ inputSchema={
596
+ "type": "object",
597
+ "properties": {
598
+ "project": _project_schema(),
599
+ },
600
+ "required": ["project"],
601
+ },
602
+ ),
603
+ Tool(
604
+ name="codememory_record_read",
605
+ description="Record a filesystem read after an MCP tool call for efficiency tracking.",
606
+ inputSchema={
607
+ "type": "object",
608
+ "properties": {
609
+ "tool": {"type": "string", "description": "Filesystem tool name (grep, read, bash, glob)"},
610
+ "path": {"type": "string", "description": "File path or pattern accessed"},
611
+ "session_id": {"type": "string", "description": "Session ID for correlation"},
612
+ "project": _project_schema(),
613
+ "chars": {"type": "integer", "description": "Output character count (optional)"},
614
+ },
615
+ "required": ["tool", "project"],
616
+ },
617
+ ),
618
+ ]
619
+
620
+
621
+ class MissingProjectError(ValueError):
622
+ """Raised when an MCP tool call omits the required `project` parameter.
623
+
624
+ We surface a *helpful* error rather than silently falling back to
625
+ cwd-detection: that fallback was hiding bugs where models invented
626
+ project names or omitted the field entirely, and downstream queries
627
+ were quietly hitting the wrong namespace. See commit `6ff8a27`.
628
+ """
629
+
630
+ def __init__(self) -> None:
631
+ super().__init__(
632
+ f"`project` parameter is required. Pass the slug of the project "
633
+ f"you're querying. The server's cwd-detected default is "
634
+ f"`{_DEFAULT_SLUG}` — pass that exact value to use it, or pass "
635
+ f"a different slug to query another project. Use the "
636
+ f"`code-memory projects` CLI to list available slugs."
637
+ )
638
+
639
+
640
+ def _require_project(args: dict[str, Any]) -> str:
641
+ """Return the caller-supplied project slug or raise.
642
+
643
+ Sentinel values (``auto``, ``default``, blank) are rejected — those are
644
+ not real slugs and accepting them would re-introduce the silent
645
+ namespace bug we just fixed.
646
+ """
647
+ raw = args.get("project")
648
+ if not isinstance(raw, str):
649
+ raise MissingProjectError()
650
+ slug = raw.strip()
651
+ if not slug or slug.lower() in {"auto", "default"}:
652
+ raise MissingProjectError()
653
+ return slug
654
+
655
+
656
+ def _graph_for(project: str) -> tuple[FalkorStore, str]:
657
+ """Return (graph, resolved_slug). ``project`` must already be validated."""
658
+ cfg = CONFIG.for_project(project)
659
+ return FalkorStore(graph_name=cfg.falkor_graph), project
660
+
661
+
662
+ def _text(payload: Any) -> list[TextContent]:
663
+ if isinstance(payload, str):
664
+ return [TextContent(type="text", text=payload)]
665
+ return [TextContent(type="text", text=json.dumps(payload, default=str, indent=2))]
666
+
667
+
668
+ def _ensure_fresh(project: str) -> None:
669
+ """Pre-query guard: sync the active repo if HEAD has drifted.
670
+
671
+ Cheap no-op when state already matches HEAD and the worktree is
672
+ clean. Skipped entirely when ``CODE_MEMORY_NO_GUARD`` is set.
673
+ """
674
+ if os.environ.get("CODE_MEMORY_NO_GUARD"):
675
+ return
676
+ repo = Path(os.environ.get("CODE_MEMORY_REPO") or os.getcwd()).resolve()
677
+ if not (repo / ".git").exists():
678
+ return
679
+ try:
680
+ from .sync import sync_repo
681
+
682
+ sync_repo(repo, project=project, trigger="pre-query", fetch=False)
683
+ except Exception: # noqa: BLE001
684
+ log.exception("pre-query guard sync failed")
685
+
686
+
687
+ def _retrieve(args: dict[str, Any]) -> list[TextContent]:
688
+ project = _require_project(args)
689
+ _ensure_fresh(project)
690
+ query = args["query"]
691
+ k = int(args.get("k", 8))
692
+ eps = int(args.get("eps", 5))
693
+ include_idle = bool(args.get("include_idle_episodes", False))
694
+ retriever = Retriever(project=project)
695
+ pack = retriever.retrieve(
696
+ query,
697
+ top_k_code=k,
698
+ top_k_eps=eps,
699
+ include_idle_episodes=include_idle,
700
+ )
701
+ return _text(f"_Project: `{retriever.slug}`_\n\n{pack.render()}")
702
+
703
+
704
+ def _record(args: dict[str, Any]) -> list[TextContent]:
705
+ project = _require_project(args)
706
+ pipe = Pipeline(project=project)
707
+ ep = Episode(
708
+ prompt=args["prompt"],
709
+ plan=args.get("plan") or None,
710
+ patch=args.get("patch") or None,
711
+ verdict=args.get("verdict") or None,
712
+ )
713
+ ep_id = pipe.record_episode(ep)
714
+ return _text({"project": pipe.slug, "id": ep_id})
715
+
716
+
717
+ def _reingest(args: dict[str, Any]) -> list[TextContent]:
718
+ project = _require_project(args)
719
+ path = Path(args["path"])
720
+ if not path.exists() or not path.is_file():
721
+ return _text({"error": f"not a file: {path}"})
722
+ pipe = Pipeline(project=project)
723
+ ex = pipe.reingest_file(path)
724
+ if ex is None:
725
+ return _text({"error": "unsupported file type", "path": str(path)})
726
+ return _text(
727
+ {
728
+ "project": pipe.slug,
729
+ "path": ex.path,
730
+ "symbols": len(ex.symbols),
731
+ "imports": len(ex.imports),
732
+ }
733
+ )
734
+
735
+
736
+ def _ingest(
737
+ args: dict[str, Any],
738
+ *,
739
+ on_progress: Callable[[int, int | None, str], None] | None = None,
740
+ ) -> list[TextContent]:
741
+ project = _require_project(args)
742
+ raw_root = args.get("root")
743
+ if not isinstance(raw_root, str) or not raw_root.strip():
744
+ return _text({"error": "`root` is required (absolute repo path)."})
745
+ root = Path(raw_root).expanduser()
746
+ if not root.exists() or not root.is_dir():
747
+ return _text({"error": f"not a directory: {root}"})
748
+
749
+ full = bool(args.get("full", False))
750
+ since = args.get("since") or None
751
+ dry_run = bool(args.get("dry_run", False))
752
+ confirmed = bool(args.get("confirmed", False))
753
+ mode: IngestMode = "full" if full else "auto"
754
+
755
+ # Default OFF: MCP ingest blocks transport and the host cannot show
756
+ # progress mid-call. Steer the agent to the Bash CLI where progress
757
+ # lines stream to stderr and `run_in_background` + `BashOutput` lets
758
+ # the agent narrate progress turn-by-turn. Operators can re-enable
759
+ # the in-MCP path via env var.
760
+ mcp_path_enabled = os.environ.get("CODE_MEMORY_MCP_INGEST_ENABLED", "0") == "1"
761
+ if not mcp_path_enabled:
762
+ slug_arg = f" --project {project}" if project else ""
763
+ full_arg = " --full" if full else ""
764
+ since_arg = f" --since {since}" if since else ""
765
+ dry_arg = " --dry-run" if dry_run else ""
766
+ cmd = (
767
+ f"code-memory ingest {root.resolve()}"
768
+ f"{slug_arg}{full_arg}{since_arg}{dry_arg}"
769
+ )
770
+ return _text(
771
+ {
772
+ "status": "disabled_use_cli",
773
+ "project": project,
774
+ "root": str(root.resolve()),
775
+ "mode": mode,
776
+ "reason": (
777
+ "MCP ingest is disabled because the transport blocks for "
778
+ "the full duration of the call and the host does not "
779
+ "surface `notifications/progress` mid-call. The agent "
780
+ "would see only the final result and the user would see "
781
+ "no progress feedback."
782
+ ),
783
+ "run_this_instead": cmd,
784
+ "agent_guidance": [
785
+ "Invoke the command above with the Bash tool.",
786
+ "Pass `run_in_background=true` so the call returns "
787
+ "immediately with a shell id.",
788
+ "Between turns, call `BashOutput(shell_id)` to read new "
789
+ "stderr lines and narrate progress to the user.",
790
+ "On completion, the final stdout payload is the same "
791
+ "JSON shape this MCP tool would have returned.",
792
+ ],
793
+ "human_guidance": (
794
+ "Run `code-memory ingest-watch` in a separate real "
795
+ "terminal pane (iTerm split / tmux pane / new window) "
796
+ "for a true live rich progressbar — the ingest pipeline "
797
+ "always writes a snapshot file the watcher tails. Falls "
798
+ "back to `tail -f` on the stderr/log if rich isn't "
799
+ "desired."
800
+ ),
801
+ "override": (
802
+ "Set CODE_MEMORY_MCP_INGEST_ENABLED=1 in the MCP server "
803
+ "env to re-enable the in-MCP ingest path; then pass "
804
+ "`confirmed=true` on the tool call."
805
+ ),
806
+ }
807
+ )
808
+
809
+ if not confirmed:
810
+ return _text(
811
+ {
812
+ "status": "confirmation_required",
813
+ "project": project,
814
+ "root": str(root.resolve()),
815
+ "mode": mode,
816
+ "since": since,
817
+ "dry_run": dry_run,
818
+ "warning": (
819
+ "LONG-RUNNING / BLOCKING operation. Ask the user to "
820
+ "confirm before re-invoking with `confirmed=true`. "
821
+ "Full ingests can take minutes to hours."
822
+ ),
823
+ }
824
+ )
825
+
826
+ pipe = Pipeline(project=project)
827
+ stats = pipe.ingest_repo(
828
+ root, mode=mode, since=since, dry_run=dry_run, on_progress=on_progress
829
+ )
830
+ return _text(
831
+ {
832
+ "project": pipe.slug,
833
+ "root": str(root.resolve()),
834
+ "mode": mode,
835
+ "dry_run": dry_run,
836
+ "ingested": asdict(stats),
837
+ }
838
+ )
839
+
840
+
841
+ def _callers(args: dict[str, Any]) -> list[TextContent]:
842
+ g, slug = _graph_for(_require_project(args))
843
+ rows = g.callers(args["symbol"], depth=int(args.get("depth", 1)))
844
+ return _text({"project": slug, "symbol": args["symbol"], "callers": rows})
845
+
846
+
847
+ def _callees(args: dict[str, Any]) -> list[TextContent]:
848
+ g, slug = _graph_for(_require_project(args))
849
+ rows = g.callees(args["symbol"], depth=int(args.get("depth", 1)))
850
+ return _text({"project": slug, "symbol": args["symbol"], "callees": rows})
851
+
852
+
853
+ def _importers(args: dict[str, Any]) -> list[TextContent]:
854
+ g, slug = _graph_for(_require_project(args))
855
+ rows = g.importers(args["target"])
856
+ return _text({"project": slug, "target": args["target"], "importers": rows})
857
+
858
+
859
+ def _dependencies(args: dict[str, Any]) -> list[TextContent]:
860
+ g, slug = _graph_for(_require_project(args))
861
+ rows = g.dependencies(args["file"], depth=int(args.get("depth", 1)))
862
+ return _text({"project": slug, "file": args["file"], "dependencies": rows})
863
+
864
+
865
+ def _injects(args: dict[str, Any]) -> list[TextContent]:
866
+ g, slug = _graph_for(_require_project(args))
867
+ rows = g.injects(args["symbol"])
868
+ return _text({"project": slug, "symbol": args["symbol"], "injects": rows})
869
+
870
+
871
+ def _injectors(args: dict[str, Any]) -> list[TextContent]:
872
+ g, slug = _graph_for(_require_project(args))
873
+ rows = g.injectors(args["token"])
874
+ return _text({"project": slug, "token": args["token"], "injectors": rows})
875
+
876
+
877
+ def _definitions(args: dict[str, Any]) -> list[TextContent]:
878
+ g, slug = _graph_for(_require_project(args))
879
+ rows = g.definitions(args["symbol"])
880
+ return _text({"project": slug, "symbol": args["symbol"], "definitions": rows})
881
+
882
+
883
+ def _assembly_members(args: dict[str, Any]) -> list[TextContent]:
884
+ """List the public methods of one Type from an indexed Assembly.
885
+
886
+ Members aren't bulk-indexed (a NuGet pkg can expose 10k+ of them).
887
+ This tool reads them on-demand directly from the DLL when the
888
+ agent needs to disambiguate an overload or look up an API surface.
889
+ """
890
+ from .extractor.dll import parse_type_members
891
+
892
+ g, slug = _graph_for(_require_project(args))
893
+ type_arg = args.get("type")
894
+ if not isinstance(type_arg, str) or not type_arg:
895
+ return _text({"error": "ValueError", "message": "type is required"})
896
+
897
+ namespace, _, name = type_arg.rpartition(".")
898
+ if not name:
899
+ return _text(
900
+ {"error": "ValueError", "message": "type must be fully qualified"}
901
+ )
902
+
903
+ asm_filter = args.get("assembly")
904
+ cypher = (
905
+ "MATCH (a:Assembly)-[:EXPOSES_TYPE]->(t:Type) "
906
+ "WHERE t.name = $name AND t.namespace = $ns"
907
+ )
908
+ params: dict[str, Any] = {"name": name, "ns": namespace}
909
+ if isinstance(asm_filter, str) and asm_filter:
910
+ cypher += " AND a.key = $asm"
911
+ params["asm"] = asm_filter
912
+ cypher += " RETURN a.key, a.path"
913
+
914
+ rows = g.graph.query(cypher, params).result_set
915
+ if not rows:
916
+ return _text(
917
+ {
918
+ "project": slug,
919
+ "type": type_arg,
920
+ "assembly": asm_filter,
921
+ "error": "type not found in indexed assemblies",
922
+ }
923
+ )
924
+
925
+ for asm_key, asm_path in rows:
926
+ members = parse_type_members(asm_path, namespace, name)
927
+ if members is None:
928
+ continue
929
+ return _text(
930
+ {
931
+ "project": slug,
932
+ "type": type_arg,
933
+ "assembly": asm_key,
934
+ "count": len(members),
935
+ "members": [
936
+ {
937
+ "name": m.name,
938
+ "kind": m.kind,
939
+ "static": m.static,
940
+ "params": m.params,
941
+ }
942
+ for m in members
943
+ ],
944
+ }
945
+ )
946
+
947
+ return _text(
948
+ {
949
+ "project": slug,
950
+ "type": type_arg,
951
+ "error": "no parsable DLL found for the type's assemblies",
952
+ }
953
+ )
954
+
955
+
956
+ def _drift(args: dict[str, Any]) -> list[TextContent]:
957
+ g, slug = _graph_for(_require_project(args))
958
+ head = args.get("head_sha")
959
+ if not isinstance(head, str) or not head:
960
+ return _text({"error": "ValueError", "message": "head_sha is required"})
961
+ rows = g.drift(head)
962
+ return _text(
963
+ {"project": slug, "head_sha": head, "count": len(rows), "items": rows}
964
+ )
965
+
966
+
967
+ def _compute_sha_ord(sha: str) -> int:
968
+ """Compute topological ordinal for a git sha."""
969
+ import subprocess
970
+
971
+ repo = Path(os.environ.get("CODE_MEMORY_REPO") or os.getcwd()).resolve()
972
+ out = subprocess.run(
973
+ ["git", "-C", str(repo), "rev-list", "--count", "--first-parent", sha],
974
+ capture_output=True,
975
+ text=True,
976
+ check=False,
977
+ timeout=5,
978
+ )
979
+ if out.returncode != 0 or not out.stdout.strip():
980
+ raise ValueError(f"Cannot compute ordinal for sha {sha[:12]}: {out.stderr.strip()}")
981
+ return int(out.stdout.strip())
982
+
983
+
984
+ def _at_sha(args: dict[str, Any]) -> list[TextContent]:
985
+ g, slug = _graph_for(_require_project(args))
986
+ sha = args.get("sha")
987
+ sha_ord = args.get("sha_ord")
988
+ if not isinstance(sha, str) or not sha:
989
+ return _text({"error": "ValueError", "message": "sha is required"})
990
+ if not isinstance(sha_ord, int):
991
+ try:
992
+ sha_ord = _compute_sha_ord(sha)
993
+ except ValueError as e:
994
+ return _text({"error": "ValueError", "message": str(e)})
995
+ label = args.get("label", "Symbol")
996
+ if label not in {"Symbol", "File"}:
997
+ return _text(
998
+ {"error": "ValueError", "message": "label must be 'Symbol' or 'File'"}
999
+ )
1000
+ limit = int(args.get("limit", 200))
1001
+ rows = g.at_sha(sha, sha_ord, label=label, limit=limit)
1002
+ return _text(
1003
+ {
1004
+ "project": slug,
1005
+ "sha": sha,
1006
+ "sha_ord": sha_ord,
1007
+ "label": label,
1008
+ "count": len(rows),
1009
+ "items": rows,
1010
+ }
1011
+ )
1012
+
1013
+
1014
+ def _callers_at_sha(args: dict[str, Any]) -> list[TextContent]:
1015
+ g, slug = _graph_for(_require_project(args))
1016
+ sha = args.get("sha")
1017
+ sha_ord = args.get("sha_ord")
1018
+ if not isinstance(sha, str) or not sha:
1019
+ return _text({"error": "ValueError", "message": "sha is required"})
1020
+ if not isinstance(sha_ord, int):
1021
+ try:
1022
+ sha_ord = _compute_sha_ord(sha)
1023
+ except ValueError as e:
1024
+ return _text({"error": "ValueError", "message": str(e)})
1025
+ rows = g.callers_at_sha(args["symbol"], sha, sha_ord)
1026
+ return _text(
1027
+ {
1028
+ "project": slug,
1029
+ "symbol": args["symbol"],
1030
+ "sha": sha,
1031
+ "sha_ord": sha_ord,
1032
+ "count": len(rows),
1033
+ "items": rows,
1034
+ }
1035
+ )
1036
+
1037
+
1038
+ def _extract_claims(args: dict[str, Any]) -> list[TextContent]:
1039
+ """Run claim extraction over user prompts and persist results.
1040
+
1041
+ Fire-and-forget contract from the caller's perspective: we never
1042
+ raise on a malformed prompt or a model glitch. Infra failures
1043
+ (Ollama unreachable) are returned as ``{"error": ...}`` so the
1044
+ hook can log and move on.
1045
+ """
1046
+ project = _require_project(args)
1047
+ if not CONFIG.claims_enabled:
1048
+ return _text(
1049
+ {
1050
+ "status": "disabled",
1051
+ "hint": "set CLAIMS_EXTRACTION=true (if disabled).",
1052
+ }
1053
+ )
1054
+
1055
+ raw_prompts = args.get("prompts") or []
1056
+ if not isinstance(raw_prompts, list):
1057
+ return _text({"error": "ValueError", "message": "`prompts` must be a list."})
1058
+
1059
+ normalized: list[tuple[str, float, str | None]] = []
1060
+ for item in raw_prompts:
1061
+ if isinstance(item, str):
1062
+ text = item.strip()
1063
+ if text:
1064
+ normalized.append((text, _now(), None))
1065
+ elif isinstance(item, dict):
1066
+ text = str(item.get("text") or "").strip()
1067
+ if not text:
1068
+ continue
1069
+ ts = item.get("ts")
1070
+ ts_val = float(ts) if isinstance(ts, (int, float)) else _now()
1071
+ pid = item.get("id")
1072
+ pid_val = str(pid) if isinstance(pid, str) and pid else None
1073
+ normalized.append((text, ts_val, pid_val))
1074
+
1075
+ if not normalized:
1076
+ return _text({"project": project, "claims_added": 0, "claims": []})
1077
+
1078
+ session_id = args.get("session_id")
1079
+ session_val = str(session_id) if isinstance(session_id, str) and session_id else None
1080
+
1081
+ repo = Path(os.environ.get("CODE_MEMORY_REPO") or os.getcwd()).resolve()
1082
+ head_sha = _head_sha_safe(repo)
1083
+
1084
+ from .claims import (
1085
+ ClaimExtractor,
1086
+ ClaimRecord,
1087
+ ClaimsStore,
1088
+ EntityResolver,
1089
+ )
1090
+ from .claims.extractor import ExtractionError
1091
+
1092
+ cfg = CONFIG.for_project(project)
1093
+ store = ClaimsStore(path=cfg.claims_db)
1094
+ extractor = ClaimExtractor()
1095
+ resolver: EntityResolver | None
1096
+ try:
1097
+ resolver = EntityResolver(project=project, cfg=cfg)
1098
+ except Exception: # noqa: BLE001
1099
+ resolver = None
1100
+ added = 0
1101
+ samples: list[dict[str, Any]] = []
1102
+ try:
1103
+ for text, ts, pid in normalized:
1104
+ try:
1105
+ claims = extractor.extract(text)
1106
+ except ExtractionError as exc:
1107
+ return _text(
1108
+ {
1109
+ "project": project,
1110
+ "error": "ExtractionError",
1111
+ "message": str(exc),
1112
+ "claims_added": added,
1113
+ }
1114
+ )
1115
+ for c in claims:
1116
+ subj_id = _resolve_or_none(resolver, c.subject)
1117
+ obj_id = _resolve_or_none(resolver, c.object)
1118
+ rec = ClaimRecord(
1119
+ subject=c.subject,
1120
+ predicate=c.predicate,
1121
+ object=c.object,
1122
+ polarity=c.polarity,
1123
+ confidence=c.confidence,
1124
+ evidence_span=c.evidence_span,
1125
+ valid_at=ts,
1126
+ head_sha=head_sha,
1127
+ session_id=session_val,
1128
+ source_prompt_id=pid,
1129
+ entity_subject_id=subj_id,
1130
+ entity_object_id=obj_id,
1131
+ )
1132
+ store.upsert(rec)
1133
+ added += 1
1134
+ if len(samples) < 5:
1135
+ samples.append(
1136
+ {
1137
+ "subject": rec.subject,
1138
+ "predicate": rec.predicate,
1139
+ "object": rec.object,
1140
+ "confidence": rec.confidence,
1141
+ }
1142
+ )
1143
+ finally:
1144
+ extractor.close()
1145
+ store.close()
1146
+
1147
+ return _text(
1148
+ {
1149
+ "project": project,
1150
+ "claims_added": added,
1151
+ "sample": samples,
1152
+ }
1153
+ )
1154
+
1155
+
1156
+ def _assert_claim(args: dict[str, Any]) -> list[TextContent]:
1157
+ """Agent-authored claim. No LLM in the loop.
1158
+
1159
+ Bypasses the ``claims_enabled`` (CLAIMS_EXTRACTION) flag because no
1160
+ Ollama call is made — the agent supplies the triple directly. The
1161
+ flag still gates the extractor path (``_extract_claims``).
1162
+ """
1163
+ project = _require_project(args)
1164
+
1165
+ subject = args.get("subject")
1166
+ predicate = args.get("predicate")
1167
+ obj = args.get("object")
1168
+ for field_name, value in (
1169
+ ("subject", subject),
1170
+ ("predicate", predicate),
1171
+ ("object", obj),
1172
+ ):
1173
+ if not isinstance(value, str) or not value.strip():
1174
+ return _text(
1175
+ {
1176
+ "error": "ValueError",
1177
+ "message": f"`{field_name}` is required (non-empty string).",
1178
+ }
1179
+ )
1180
+
1181
+ polarity = args.get("polarity", True)
1182
+ if not isinstance(polarity, bool):
1183
+ return _text(
1184
+ {"error": "ValueError", "message": "`polarity` must be a boolean."}
1185
+ )
1186
+
1187
+ confidence = args.get("confidence", 0.95)
1188
+ try:
1189
+ confidence_val = float(confidence)
1190
+ except (TypeError, ValueError):
1191
+ return _text(
1192
+ {"error": "ValueError", "message": "`confidence` must be a number."}
1193
+ )
1194
+ if not 0.0 <= confidence_val <= 1.0:
1195
+ return _text(
1196
+ {
1197
+ "error": "ValueError",
1198
+ "message": "`confidence` must be in [0, 1].",
1199
+ }
1200
+ )
1201
+
1202
+ evidence_raw = args.get("evidence_span")
1203
+ evidence_span = (
1204
+ evidence_raw.strip()
1205
+ if isinstance(evidence_raw, str) and evidence_raw.strip()
1206
+ else ""
1207
+ )
1208
+
1209
+ valid_at_raw = args.get("valid_at")
1210
+ valid_at = (
1211
+ float(valid_at_raw)
1212
+ if isinstance(valid_at_raw, (int, float))
1213
+ else _now()
1214
+ )
1215
+
1216
+ session_raw = args.get("session_id")
1217
+ session_id = (
1218
+ str(session_raw)
1219
+ if isinstance(session_raw, str) and session_raw
1220
+ else None
1221
+ )
1222
+ pid_raw = args.get("source_prompt_id")
1223
+ source_prompt_id = (
1224
+ str(pid_raw) if isinstance(pid_raw, str) and pid_raw else None
1225
+ )
1226
+
1227
+ repo = Path(os.environ.get("CODE_MEMORY_REPO") or os.getcwd()).resolve()
1228
+ head_sha = _head_sha_safe(repo)
1229
+
1230
+ from .claims import ClaimRecord, ClaimsStore, EntityResolver
1231
+
1232
+ cfg = CONFIG.for_project(project)
1233
+ resolver: EntityResolver | None
1234
+ try:
1235
+ resolver = EntityResolver(project=project, cfg=cfg)
1236
+ except Exception: # noqa: BLE001
1237
+ resolver = None
1238
+
1239
+ subj_id = _resolve_or_none(resolver, subject)
1240
+ obj_id = _resolve_or_none(resolver, obj)
1241
+
1242
+ # Predicate canonicalization mirrors the extractor: lowercase
1243
+ # kebab-case so single-valued contradiction handling works.
1244
+ canonical_pred = predicate.strip().lower().replace(" ", "-")
1245
+
1246
+ rec = ClaimRecord(
1247
+ subject=subject.strip(),
1248
+ predicate=canonical_pred,
1249
+ object=obj.strip(),
1250
+ polarity=polarity,
1251
+ confidence=confidence_val,
1252
+ evidence_span=evidence_span,
1253
+ valid_at=valid_at,
1254
+ head_sha=head_sha,
1255
+ session_id=session_id,
1256
+ source_prompt_id=source_prompt_id,
1257
+ entity_subject_id=subj_id,
1258
+ entity_object_id=obj_id,
1259
+ )
1260
+
1261
+ store = ClaimsStore(path=cfg.claims_db)
1262
+ try:
1263
+ claim_id = store.upsert(rec)
1264
+ finally:
1265
+ store.close()
1266
+
1267
+ return _text(
1268
+ {
1269
+ "project": project,
1270
+ "claim_id": claim_id,
1271
+ "subject": rec.subject,
1272
+ "predicate": rec.predicate,
1273
+ "object": rec.object,
1274
+ "polarity": rec.polarity,
1275
+ "confidence": rec.confidence,
1276
+ "valid_at": rec.valid_at,
1277
+ }
1278
+ )
1279
+
1280
+
1281
+ def _read_claims(args: dict[str, Any]) -> list[TextContent]:
1282
+ project = _require_project(args)
1283
+ from .claims import ClaimsStore
1284
+
1285
+ cfg = CONFIG.for_project(project)
1286
+ store = ClaimsStore(path=cfg.claims_db)
1287
+ try:
1288
+ subject = args.get("subject")
1289
+ subject_val = str(subject) if isinstance(subject, str) and subject else None
1290
+ as_of = args.get("as_of")
1291
+ rows = (
1292
+ store.as_of(float(as_of), subject=subject_val)
1293
+ if isinstance(as_of, (int, float))
1294
+ else store.current(subject=subject_val)
1295
+ )
1296
+ limit = int(args.get("limit", 50))
1297
+ rows = rows[:limit]
1298
+ finally:
1299
+ store.close()
1300
+
1301
+ return _text(
1302
+ {
1303
+ "project": project,
1304
+ "count": len(rows),
1305
+ "claims": [
1306
+ {
1307
+ "subject": r.subject,
1308
+ "predicate": r.predicate,
1309
+ "object": r.object,
1310
+ "polarity": r.polarity,
1311
+ "confidence": r.confidence,
1312
+ "valid_at": r.valid_at,
1313
+ "valid_to": r.valid_to,
1314
+ "head_sha": r.head_sha,
1315
+ }
1316
+ for r in rows
1317
+ ],
1318
+ }
1319
+ )
1320
+
1321
+
1322
+ def _health(args: dict[str, Any]) -> list[TextContent]:
1323
+ """Health check across all backends and storage."""
1324
+ import time as _time
1325
+ import httpx as _httpx
1326
+
1327
+ project = _require_project(args)
1328
+ cfg = CONFIG.for_project(project)
1329
+
1330
+ results: dict[str, Any] = {"project": project, "backends": {}}
1331
+
1332
+ # Ollama
1333
+ t0 = _time.time()
1334
+ try:
1335
+ with _httpx.Client(timeout=5) as c:
1336
+ r = c.get(f"{CONFIG.ollama_url}/api/tags")
1337
+ r.raise_for_status()
1338
+ models = [m["name"] for m in r.json().get("models", [])]
1339
+ results["backends"]["ollama"] = {
1340
+ "status": "ok",
1341
+ "url": CONFIG.ollama_url,
1342
+ "latency_ms": round((_time.time() - t0) * 1000),
1343
+ "models": models,
1344
+ }
1345
+ except Exception as exc:
1346
+ results["backends"]["ollama"] = {
1347
+ "status": "error",
1348
+ "url": CONFIG.ollama_url,
1349
+ "error": str(exc),
1350
+ }
1351
+
1352
+ # Qdrant
1353
+ t0 = _time.time()
1354
+ try:
1355
+ from .vector import QdrantStore
1356
+
1357
+ q = QdrantStore()
1358
+ info = q.client.get_collection(cfg.qdrant_code)
1359
+ results["backends"]["qdrant"] = {
1360
+ "status": "ok",
1361
+ "url": CONFIG.qdrant_url,
1362
+ "latency_ms": round((_time.time() - t0) * 1000),
1363
+ "collections": {
1364
+ "code": {
1365
+ "name": cfg.qdrant_code,
1366
+ "vectors": info.points_count,
1367
+ },
1368
+ },
1369
+ }
1370
+ except Exception as exc:
1371
+ results["backends"]["qdrant"] = {
1372
+ "status": "error",
1373
+ "url": CONFIG.qdrant_url,
1374
+ "error": str(exc),
1375
+ }
1376
+
1377
+ # FalkorDB
1378
+ t0 = _time.time()
1379
+ try:
1380
+ from .graph.falkor_store import FalkorStore
1381
+
1382
+ g = FalkorStore(graph_name=cfg.falkor_graph)
1383
+ node_count = int(g.graph.query("MATCH (n) RETURN count(n)").result_set[0][0])
1384
+ results["backends"]["falkordb"] = {
1385
+ "status": "ok",
1386
+ "host": CONFIG.falkor_host,
1387
+ "port": CONFIG.falkor_port,
1388
+ "latency_ms": round((_time.time() - t0) * 1000),
1389
+ "graph": cfg.falkor_graph,
1390
+ "nodes": node_count,
1391
+ }
1392
+ except Exception as exc:
1393
+ results["backends"]["falkordb"] = {
1394
+ "status": "error",
1395
+ "host": CONFIG.falkor_host,
1396
+ "port": CONFIG.falkor_port,
1397
+ "error": str(exc),
1398
+ }
1399
+
1400
+ # Storage stats
1401
+ results["storage"] = {
1402
+ "data_dir": str(cfg.data_dir),
1403
+ }
1404
+
1405
+ # Episodic count
1406
+ try:
1407
+ from .episodic import EpisodicStore
1408
+
1409
+ eps = EpisodicStore(path=cfg.episodic_db)
1410
+ row = eps.conn.execute("SELECT COUNT(*) FROM episodes").fetchone()
1411
+ results["storage"]["episodes"] = {
1412
+ "path": str(cfg.episodic_db),
1413
+ "exists": cfg.episodic_db.exists(),
1414
+ "count": row[0] if row else 0,
1415
+ }
1416
+ except Exception:
1417
+ pass
1418
+
1419
+ # Claims count
1420
+ try:
1421
+ from .claims import ClaimsStore
1422
+
1423
+ cs = ClaimsStore(path=cfg.claims_db)
1424
+ results["storage"]["claims"] = {
1425
+ "path": str(cfg.claims_db),
1426
+ "exists": cfg.claims_db.exists(),
1427
+ "count": len(cs.current()) if cfg.claims_db.exists() else 0,
1428
+ }
1429
+ cs.close()
1430
+ except Exception:
1431
+ pass
1432
+
1433
+ # Metrics summary. Always emit the block so callers can tell
1434
+ # "no data yet" apart from "metrics module disabled or broken".
1435
+ env_metrics = os.environ.get("CODEMEMORY_METRICS_DB")
1436
+ metrics_path = Path(env_metrics) if env_metrics else cfg.data_dir / "metrics.db"
1437
+ metrics_block: dict[str, Any] = {
1438
+ "path": str(metrics_path),
1439
+ "exists": metrics_path.exists(),
1440
+ }
1441
+ if metrics_block["exists"]:
1442
+ try:
1443
+ from .metrics import MetricsStore
1444
+
1445
+ ms = MetricsStore(metrics_path)
1446
+ metrics_block.update(ms.summary())
1447
+ metrics_block["tool_usage"] = ms.tool_usage_summary()
1448
+ metrics_block["efficiency"] = ms.efficiency_summary()
1449
+ except Exception as exc:
1450
+ metrics_block["error"] = f"{type(exc).__name__}: {exc}"
1451
+ results["metrics"] = metrics_block
1452
+
1453
+ # Last ingest SHA
1454
+ try:
1455
+ repo = Path(
1456
+ os.environ.get("CODE_MEMORY_REPO") or os.getcwd()
1457
+ ).resolve()
1458
+ if (repo / ".git").exists():
1459
+ from .orchestrator.ingest_state import IngestStateStore
1460
+
1461
+ # IngestState lives alongside episodes in the same DB
1462
+ epdb = cfg.episodic_db
1463
+ if epdb.exists():
1464
+ st = IngestStateStore(epdb)
1465
+ state = st.get(repo)
1466
+ if state is not None:
1467
+ results["last_ingest"] = {
1468
+ "sha": state.last_sha,
1469
+ "ts": state.last_ts,
1470
+ }
1471
+ except Exception:
1472
+ pass
1473
+
1474
+ return _text(results)
1475
+
1476
+
1477
+ def _record_read(args: dict[str, Any]) -> list[TextContent]:
1478
+ project = _require_project(args)
1479
+ tool = args.get("tool", "")
1480
+ path = args.get("path", "")
1481
+ chars = int(args.get("chars", 0) or 0)
1482
+ session_id = str(args.get("session_id") or "")
1483
+ db_path = os.environ.get("CODEMEMORY_METRICS_DB") or str(CONFIG.data_dir / "metrics.db")
1484
+ try:
1485
+ from .metrics import MetricsStore
1486
+ ms = MetricsStore(Path(db_path))
1487
+ ms.record_fs_read(tool=tool, path=path, project=project, output_chars=chars, session_id=session_id)
1488
+ return _text({"recorded": True})
1489
+ except Exception as exc:
1490
+ return _text({"recorded": False, "error": str(exc)})
1491
+
1492
+
1493
+ def _now() -> float:
1494
+ import time
1495
+
1496
+ return time.time()
1497
+
1498
+
1499
+ def _resolve_or_none(resolver: Any, text: str) -> str | None:
1500
+ """Defensive entity resolution helper (see CLI counterpart)."""
1501
+ if resolver is None:
1502
+ return None
1503
+ try:
1504
+ ref = resolver.resolve(text)
1505
+ except Exception: # noqa: BLE001
1506
+ return None
1507
+ return ref.id if ref is not None else None
1508
+
1509
+
1510
+ def _head_sha_safe(repo: Path) -> str | None:
1511
+ if not (repo / ".git").exists():
1512
+ return None
1513
+ try:
1514
+ from .orchestrator import git_delta
1515
+
1516
+ return git_delta.head_sha(repo)
1517
+ except Exception: # noqa: BLE001
1518
+ return None
1519
+
1520
+
1521
+ def _record_tool_call_if_configured(tool: str, args: dict, output_chars: int) -> None:
1522
+ """Record tool call to MetricsStore if configured. Fire-and-forget."""
1523
+ try:
1524
+ db_path = os.environ.get("CODEMEMORY_METRICS_DB") or str(CONFIG.data_dir / "metrics.db")
1525
+ from .metrics import MetricsStore
1526
+ ms = MetricsStore(Path(db_path))
1527
+ query_text = str(args.get("query") or args.get("symbol") or args.get("target") or args.get("prompt") or "")
1528
+ result_count = _extract_result_count(tool, args, output_chars)
1529
+ ms.record_tool_call(
1530
+ tool=tool,
1531
+ project=args.get("project", ""),
1532
+ query_text=query_text[:500],
1533
+ output_chars=output_chars,
1534
+ result_count=result_count,
1535
+ session_id=str(args.get("session_id") or ""),
1536
+ )
1537
+ except Exception:
1538
+ pass
1539
+
1540
+
1541
+ def _extract_result_count(tool: str, args: dict, output_chars: int) -> int:
1542
+ """Estimate result count from context. Best-effort."""
1543
+ k = int(args.get("k", 0) or 0)
1544
+ eps = int(args.get("eps", 0) or 0)
1545
+ if k or eps:
1546
+ return k + eps
1547
+ return 0
1548
+
1549
+
1550
+ _HANDLERS = {
1551
+ "codememory_retrieve": _retrieve,
1552
+ "codememory_record": _record,
1553
+ "codememory_reingest": _reingest,
1554
+ "codememory_ingest": _ingest,
1555
+ "codememory_callers": _callers,
1556
+ "codememory_callees": _callees,
1557
+ "codememory_importers": _importers,
1558
+ "codememory_dependencies": _dependencies,
1559
+ "codememory_injects": _injects,
1560
+ "codememory_injectors": _injectors,
1561
+ "codememory_definitions": _definitions,
1562
+ "codememory_assembly_members": _assembly_members,
1563
+ "codememory_drift": _drift,
1564
+ "codememory_at_sha": _at_sha,
1565
+ "codememory_callers_at_sha": _callers_at_sha,
1566
+ "codememory_extract_claims": _extract_claims,
1567
+ "codememory_assert_claim": _assert_claim,
1568
+ "codememory_claims": _read_claims,
1569
+ "codememory_health": _health,
1570
+ "codememory_record_read": _record_read,
1571
+ }
1572
+
1573
+
1574
+ def build_server() -> Server:
1575
+ server: Server = Server(SERVER_NAME)
1576
+
1577
+ @server.list_tools()
1578
+ async def list_tools() -> list[Tool]:
1579
+ return _TOOLS
1580
+
1581
+ @server.call_tool()
1582
+ async def call_tool(name: str, arguments: dict[str, Any]) -> list[TextContent]:
1583
+ handler = _HANDLERS.get(name)
1584
+ if handler is None:
1585
+ return _text({"error": f"unknown tool: {name}"})
1586
+
1587
+ # Bridge MCP `notifications/progress` for long-running tools. The
1588
+ # client opts in by sending `_meta.progressToken` on the request;
1589
+ # the SDK exposes it as `request_context.meta.progressToken`. We
1590
+ # hand the ingest pipeline a *sync* callback that schedules the
1591
+ # async send back onto this event loop via `from_thread.run`.
1592
+ progress_token: str | int | None = None
1593
+ try:
1594
+ ctx = server.request_context
1595
+ if ctx.meta is not None:
1596
+ progress_token = ctx.meta.progressToken
1597
+ except LookupError:
1598
+ ctx = None # type: ignore[assignment]
1599
+
1600
+ on_progress: Callable[[int, int | None, str], None] | None = None
1601
+ if name == "codememory_ingest":
1602
+ # Log token state on every ingest invocation so the user can
1603
+ # tell from MCP server logs whether the host (Claude Code,
1604
+ # OpenCode, ...) opted into progress at all. Without a token
1605
+ # the spec forbids sending; the call runs silently end-to-end.
1606
+ if progress_token is None:
1607
+ log.info(
1608
+ "ingest: no progressToken on request — client did "
1609
+ "not opt into notifications/progress"
1610
+ )
1611
+ else:
1612
+ log.info("ingest: progressToken=%r — streaming progress", progress_token)
1613
+ if progress_token is not None and ctx is not None:
1614
+ session = ctx.session
1615
+ token = progress_token
1616
+ request_id = str(ctx.request_id) if ctx.request_id is not None else None
1617
+ send_count = {"n": 0}
1618
+
1619
+ async def _emit(
1620
+ completed: float, total: float | None, message: str
1621
+ ) -> None:
1622
+ await session.send_progress_notification(
1623
+ token,
1624
+ completed,
1625
+ total,
1626
+ message,
1627
+ related_request_id=request_id,
1628
+ )
1629
+
1630
+ def _send(completed: int, total: int | None, message: str) -> None:
1631
+ try:
1632
+ from anyio.from_thread import run as _run_in_loop
1633
+
1634
+ _run_in_loop(
1635
+ _emit,
1636
+ float(completed),
1637
+ float(total) if total is not None else None,
1638
+ message,
1639
+ )
1640
+ send_count["n"] += 1
1641
+ if send_count["n"] == 1 or send_count["n"] % 25 == 0:
1642
+ log.info(
1643
+ "progress sent #%d: %s", send_count["n"], message
1644
+ )
1645
+ except Exception as exc: # noqa: BLE001 — UI errors must
1646
+ # never abort the ingest worker thread.
1647
+ log.warning("progress notification failed: %s", exc)
1648
+
1649
+ on_progress = _send
1650
+
1651
+ def _invoke() -> list[TextContent]:
1652
+ if name == "codememory_ingest":
1653
+ return _ingest(arguments, on_progress=on_progress)
1654
+ return handler(arguments)
1655
+
1656
+ try:
1657
+ result = await anyio.to_thread.run_sync(_invoke)
1658
+ except Exception as exc: # surface, don't crash the server
1659
+ return _text({"error": type(exc).__name__, "message": str(exc)})
1660
+ # Auto-record MCP tool call for efficiency tracking (fire-and-forget)
1661
+ output_chars = sum(len(t.text) for t in result if hasattr(t, "text"))
1662
+ _record_tool_call_if_configured(name, arguments, output_chars)
1663
+ return result
1664
+
1665
+ return server
1666
+
1667
+
1668
+ def _bootstrap_repo() -> Path | None:
1669
+ """Locate the active repo and ensure autostart + in-process watcher.
1670
+
1671
+ Best-effort: any failure (no git, no write permission, missing
1672
+ watchdog dep) logs and continues. The MCP server still serves
1673
+ queries even if these side-channels can't be set up.
1674
+ """
1675
+ # 0. Backend health check (best-effort)
1676
+ if not os.environ.get("CODE_MEMORY_NO_HEALTH_CHECK"):
1677
+ _check_backends()
1678
+
1679
+ candidate = os.environ.get("CODE_MEMORY_REPO") or os.getcwd()
1680
+ repo = Path(candidate).resolve()
1681
+ if not (repo / ".git").exists():
1682
+ # try git toplevel
1683
+ from .orchestrator import git_delta
1684
+
1685
+ if git_delta.is_git_repo(repo):
1686
+ try:
1687
+ import subprocess
1688
+
1689
+ top = subprocess.run(
1690
+ ["git", "-C", str(repo), "rev-parse", "--show-toplevel"],
1691
+ capture_output=True,
1692
+ text=True,
1693
+ check=False,
1694
+ timeout=2,
1695
+ ).stdout.strip()
1696
+ if top:
1697
+ repo = Path(top)
1698
+ except Exception: # noqa: BLE001
1699
+ pass
1700
+ if not (repo / ".git").exists():
1701
+ log.info("mcp bootstrap: not a git repo (%s); skipping autostart", repo)
1702
+ return None
1703
+
1704
+ # 1. autostart registration (idempotent)
1705
+ if not os.environ.get("CODE_MEMORY_NO_AUTOSTART"):
1706
+ try:
1707
+ from .sync.autostart import ensure_autostart
1708
+
1709
+ st = ensure_autostart(repo)
1710
+ log.info(
1711
+ "mcp bootstrap: autostart installed=%s running=%s label=%s",
1712
+ st.installed,
1713
+ st.running,
1714
+ st.label,
1715
+ )
1716
+ except Exception: # noqa: BLE001
1717
+ log.exception("mcp bootstrap: autostart registration failed")
1718
+
1719
+ # 2. one-shot sync to catch up to HEAD
1720
+ if not os.environ.get("CODE_MEMORY_NO_BOOT_SYNC"):
1721
+ try:
1722
+ from .sync import sync_repo
1723
+
1724
+ result = sync_repo(repo, trigger="mcp-boot")
1725
+ log.info(
1726
+ "mcp bootstrap: sync action=%s head=%s",
1727
+ result.action,
1728
+ (result.head_sha or "")[:12],
1729
+ )
1730
+ except Exception: # noqa: BLE001
1731
+ log.exception("mcp bootstrap: initial sync failed")
1732
+
1733
+ # 3. in-process watcher as belt-and-suspenders (won't double-start
1734
+ # because OS autostart runs in its own process)
1735
+ if not os.environ.get("CODE_MEMORY_NO_INPROC_WATCHER"):
1736
+ try:
1737
+ from .sync.watcher import Watcher
1738
+
1739
+ w = Watcher(repo)
1740
+ w.start()
1741
+ log.info("mcp bootstrap: in-process watcher started")
1742
+ _BOOTSTRAP_REFS["watcher"] = w
1743
+ except Exception: # noqa: BLE001
1744
+ log.exception("mcp bootstrap: in-process watcher failed to start")
1745
+
1746
+ return repo
1747
+
1748
+
1749
+ _BOOTSTRAP_REFS: dict[str, Any] = {}
1750
+
1751
+
1752
+ def _check_backends() -> None:
1753
+ """Ping backends at startup. Logs errors but never crashes."""
1754
+ import httpx as _httpx
1755
+
1756
+ # Ollama
1757
+ try:
1758
+ with _httpx.Client(timeout=5) as c:
1759
+ r = c.get(f"{CONFIG.ollama_url}/api/tags")
1760
+ r.raise_for_status()
1761
+ log.info("health: ollama ok (%s)", CONFIG.ollama_url)
1762
+ except Exception as exc:
1763
+ log.error("health: ollama UNREACHABLE (%s): %s", CONFIG.ollama_url, exc)
1764
+ # Qdrant
1765
+ try:
1766
+ with _httpx.Client(timeout=3) as c:
1767
+ r = c.get(f"{CONFIG.qdrant_url}/healthz")
1768
+ r.raise_for_status()
1769
+ log.info("health: qdrant ok (%s)", CONFIG.qdrant_url)
1770
+ except Exception as exc:
1771
+ log.error("health: qdrant UNREACHABLE (%s): %s", CONFIG.qdrant_url, exc)
1772
+ # FalkorDB
1773
+ try:
1774
+ import redis as _redis
1775
+
1776
+ r = _redis.Redis(
1777
+ host=CONFIG.falkor_host,
1778
+ port=CONFIG.falkor_port,
1779
+ socket_timeout=3,
1780
+ )
1781
+ r.ping()
1782
+ log.info(
1783
+ "health: falkordb ok (%s:%d)",
1784
+ CONFIG.falkor_host,
1785
+ CONFIG.falkor_port,
1786
+ )
1787
+ except Exception as exc:
1788
+ log.error(
1789
+ "health: falkordb UNREACHABLE (%s:%d): %s",
1790
+ CONFIG.falkor_host,
1791
+ CONFIG.falkor_port,
1792
+ exc,
1793
+ )
1794
+
1795
+
1796
+ async def _run() -> None:
1797
+ _bootstrap_repo()
1798
+ server = build_server()
1799
+ async with stdio_server() as (read_stream, write_stream):
1800
+ await server.run(
1801
+ read_stream,
1802
+ write_stream,
1803
+ server.create_initialization_options(),
1804
+ )
1805
+
1806
+
1807
+ def main() -> None:
1808
+ logging.basicConfig(
1809
+ level=os.environ.get("CODE_MEMORY_LOG_LEVEL", "INFO"),
1810
+ format="%(asctime)s %(levelname)s %(name)s %(message)s",
1811
+ )
1812
+ anyio.run(_run)
1813
+
1814
+
1815
+ if __name__ == "__main__":
1816
+ main()