flurryx-code-memory 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. code_memory/__init__.py +1 -0
  2. code_memory/claims/__init__.py +32 -0
  3. code_memory/claims/extractor.py +325 -0
  4. code_memory/claims/indexer.py +258 -0
  5. code_memory/claims/resolver.py +186 -0
  6. code_memory/claims/store.py +424 -0
  7. code_memory/cli.py +1192 -0
  8. code_memory/config.py +268 -0
  9. code_memory/embed/__init__.py +224 -0
  10. code_memory/embed/cache.py +204 -0
  11. code_memory/embed/m3.py +174 -0
  12. code_memory/embed/ollama.py +92 -0
  13. code_memory/embed/tei.py +106 -0
  14. code_memory/episodic/__init__.py +3 -0
  15. code_memory/episodic/sqlite_store.py +278 -0
  16. code_memory/extractor/__init__.py +3 -0
  17. code_memory/extractor/csproj.py +166 -0
  18. code_memory/extractor/dll.py +385 -0
  19. code_memory/extractor/gitignore.py +162 -0
  20. code_memory/extractor/nuget.py +275 -0
  21. code_memory/extractor/sanity.py +124 -0
  22. code_memory/extractor/sln.py +108 -0
  23. code_memory/extractor/treesitter.py +1172 -0
  24. code_memory/graph/__init__.py +3 -0
  25. code_memory/graph/falkor_store.py +740 -0
  26. code_memory/mcp_server.py +1816 -0
  27. code_memory/metrics.py +260 -0
  28. code_memory/orchestrator/__init__.py +13 -0
  29. code_memory/orchestrator/git_delta.py +211 -0
  30. code_memory/orchestrator/ingest_state.py +71 -0
  31. code_memory/orchestrator/pipeline.py +1478 -0
  32. code_memory/orchestrator/reset.py +130 -0
  33. code_memory/orchestrator/resolver.py +825 -0
  34. code_memory/orchestrator/retrieve.py +505 -0
  35. code_memory/resilience.py +73 -0
  36. code_memory/sync/__init__.py +20 -0
  37. code_memory/sync/autostart/__init__.py +42 -0
  38. code_memory/sync/autostart/base.py +106 -0
  39. code_memory/sync/autostart/launchd.py +115 -0
  40. code_memory/sync/autostart/schtasks.py +155 -0
  41. code_memory/sync/autostart/systemd.py +113 -0
  42. code_memory/sync/hooks.py +164 -0
  43. code_memory/sync/safety.py +65 -0
  44. code_memory/sync/snapshot.py +461 -0
  45. code_memory/sync/store.py +399 -0
  46. code_memory/sync/sync.py +405 -0
  47. code_memory/sync/watcher.py +320 -0
  48. code_memory/vector/__init__.py +3 -0
  49. code_memory/vector/qdrant_store.py +302 -0
  50. flurryx_code_memory-0.4.0.dist-info/METADATA +26 -0
  51. flurryx_code_memory-0.4.0.dist-info/RECORD +53 -0
  52. flurryx_code_memory-0.4.0.dist-info/WHEEL +4 -0
  53. flurryx_code_memory-0.4.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,505 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import math
5
+ import re
6
+ import time
7
+ from dataclasses import dataclass, field
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ import os
12
+
13
+ from ..claims import ClaimRecord, ClaimsIndexer, ClaimsStore, make_claims_indexer
14
+ from ..metrics import MetricsStore, RetrieveTiming
15
+ from ..config import CONFIG, Config, detect_project_slug
16
+ from ..embed import HybridVec, M3Embedder, get_embedder
17
+ from ..episodic import Episode, EpisodicStore
18
+ from ..vector import QdrantStore, VectorHit
19
+ # Hybrid (dense + sparse RRF) is opt-in. Benchmarks on the sample-webapp
20
+ # Angular corpus showed dense-only m3 outperforms hybrid on natural
21
+ # language queries (see docs/BENCHMARK.md). Sparse over-promotes spec
22
+ # files and generated API stubs whose identifier vocabulary overlaps
23
+ # heavily with the query. The collection still stores both vectors so
24
+ # users can toggle at query time without re-ingesting.
25
+ ENV_HYBRID = "CODEMEMORY_HYBRID"
26
+ ENV_METRICS = "CODEMEMORY_METRICS_DB"
27
+
28
+ log = logging.getLogger(__name__)
29
+
30
+
31
+ def _retrieval_mode() -> str:
32
+ raw = os.environ.get(ENV_HYBRID, "0").strip().lower()
33
+ return "hybrid" if raw in ("1", "true", "on", "yes") else "dense"
34
+
35
+ # Per-hit score adjustments applied after Qdrant's cosine ranking.
36
+ GENERATED_PENALTY = 0.15 # subtract from generated code hits
37
+ ENTRYPOINT_BOOST = 0.05 # add to framework entrypoint files
38
+
39
+ # Path patterns that match Angular / Node framework entrypoints worth surfacing
40
+ # even when symbol-level similarity is lower than other hits.
41
+ _ENTRYPOINT_BASENAMES = frozenset(
42
+ {
43
+ "app.config.ts",
44
+ "app.routes.ts",
45
+ "app-routing.module.ts",
46
+ "main.ts",
47
+ "main.server.ts",
48
+ "app.module.ts",
49
+ "app.component.ts",
50
+ "index.ts",
51
+ "providers.ts",
52
+ }
53
+ )
54
+ _ENTRYPOINT_SUFFIXES = (".module.ts", ".routing.ts", ".routes.ts", ".config.ts")
55
+
56
+
57
+ def _is_entrypoint(path: str | None) -> bool:
58
+ if not path:
59
+ return False
60
+ name = Path(path).name.lower()
61
+ if name in _ENTRYPOINT_BASENAMES:
62
+ return True
63
+ return any(name.endswith(suf) for suf in _ENTRYPOINT_SUFFIXES)
64
+
65
+
66
+ def _normalize_prompt(text: str) -> str:
67
+ """Lowercase + collapse whitespace for cheap near-duplicate detection."""
68
+ return re.sub(r"\s+", " ", text.strip().lower())[:160]
69
+
70
+
71
+ @dataclass
72
+ class ContextPack:
73
+ """Orientation payload for a natural-language query.
74
+
75
+ Topology questions (who calls X, who imports Y, …) deliberately do
76
+ **not** live here — they have dedicated MCP tools so the agent can
77
+ issue precise graph queries instead of skimming a noisy neighbor
78
+ dump.
79
+
80
+ User claims (Graphiti-style ``(subject, predicate, object)`` facts
81
+ extracted from prior prompts) DO live here when the local
82
+ ``claims.db`` has rows relevant to the query — they're cheap to read
83
+ and answer "what has the user already told me about X?" without
84
+ re-reading every past prompt.
85
+ """
86
+
87
+ query: str
88
+ code_hits: list[VectorHit] = field(default_factory=list)
89
+ episode_hits: list[VectorHit] = field(default_factory=list)
90
+ episodes: list[Episode] = field(default_factory=list)
91
+ claims: list[ClaimRecord] = field(default_factory=list)
92
+
93
+ def render(self) -> str:
94
+ lines = [f"# Query\n{self.query}\n"]
95
+ if self.code_hits:
96
+ lines.append("## Code")
97
+ for h in self.code_hits:
98
+ p = h.payload
99
+ lines.append(
100
+ f"- {p.get('path')}:{p.get('start')}-{p.get('end')} "
101
+ f"[{p.get('kind')} {p.get('name')}] score={h.score:.3f}"
102
+ )
103
+ if self.episode_hits:
104
+ lines.append("\n## Episodes")
105
+ for ep in self.episodes:
106
+ lines.append(
107
+ f"- {ep.id} verdict={ep.verdict} :: {ep.prompt[:120]}"
108
+ )
109
+ if self.claims:
110
+ lines.append("\n## User claims")
111
+ for c in self.claims:
112
+ neg = "" if c.polarity else " (NEGATED)"
113
+ lines.append(
114
+ f"- {c.subject} {c.predicate} {c.object}{neg} "
115
+ f"(conf={c.confidence:.2f})"
116
+ )
117
+ lines.append(
118
+ "\n_For topology (callers/callees/importers/dependencies/definitions) "
119
+ "use the dedicated codememory_* tools._"
120
+ )
121
+ return "\n".join(lines)
122
+
123
+ def to_dict(self) -> dict[str, Any]:
124
+ """Machine-readable representation for plugin / tool consumers."""
125
+ return {
126
+ "query": self.query,
127
+ "code": [
128
+ {
129
+ "path": h.payload.get("path"),
130
+ "start": h.payload.get("start"),
131
+ "end": h.payload.get("end"),
132
+ "kind": h.payload.get("kind"),
133
+ "name": h.payload.get("name"),
134
+ "score": h.score,
135
+ }
136
+ for h in self.code_hits
137
+ ],
138
+ "episodes": [
139
+ {
140
+ "id": ep.id,
141
+ "verdict": ep.verdict,
142
+ "prompt": ep.prompt[:240],
143
+ "score": next(
144
+ (h.score for h in self.episode_hits if h.id == ep.id),
145
+ None,
146
+ ),
147
+ }
148
+ for ep in self.episodes
149
+ ],
150
+ "claims": [
151
+ {
152
+ "subject": c.subject,
153
+ "predicate": c.predicate,
154
+ "object": c.object,
155
+ "polarity": c.polarity,
156
+ "confidence": c.confidence,
157
+ "valid_at": c.valid_at,
158
+ "head_sha": c.head_sha,
159
+ }
160
+ for c in self.claims
161
+ ],
162
+ }
163
+
164
+
165
+ class Retriever:
166
+ """Vector + episode retrieval. Topology lives in dedicated MCP tools."""
167
+
168
+ def __init__(
169
+ self,
170
+ project: str | None = None,
171
+ embedder: M3Embedder | None = None,
172
+ vector: QdrantStore | None = None,
173
+ episodic: EpisodicStore | None = None,
174
+ claims_indexer: ClaimsIndexer | None = None,
175
+ ) -> None:
176
+ self.slug = project or detect_project_slug()
177
+ self.cfg: Config = CONFIG.for_project(self.slug)
178
+ self.embedder = embedder or get_embedder()
179
+ self.vector = vector or QdrantStore()
180
+ self.episodic = episodic or EpisodicStore(path=self.cfg.episodic_db)
181
+ # Injected for tests; lazily built on first claim retrieval in
182
+ # prod so projects without ``claims.db`` pay zero cost.
183
+ self._claims_indexer = claims_indexer
184
+ # Mirror writer fallback in mcp_server / cli: when env var is
185
+ # unset, persist alongside the other per-project SQLite stores.
186
+ env_metrics = os.environ.get(ENV_METRICS)
187
+ self._metrics_path = Path(env_metrics) if env_metrics else CONFIG.data_dir / "metrics.db"
188
+ self._metrics: MetricsStore | None = None
189
+
190
+ def retrieve(
191
+ self,
192
+ query: str,
193
+ top_k_code: int = 8,
194
+ top_k_eps: int = 5,
195
+ top_k_claims: int = 5,
196
+ include_idle_episodes: bool = False,
197
+ ) -> ContextPack:
198
+ t0 = time.time()
199
+
200
+ qvec = self.embedder.embed_one(query)
201
+ t_embed = time.time()
202
+
203
+ # Fetch 2x candidates so re-rank has room to lift entrypoints
204
+ # and demote generated code without losing depth. Mode is
205
+ # selected per ``CODEMEMORY_HYBRID``; default ``dense`` reflects
206
+ # the benchmark winner — see docs/BENCHMARK.md.
207
+ raw_code = self.vector.search(
208
+ self.cfg.qdrant_code,
209
+ qvec,
210
+ top_k=top_k_code * 2,
211
+ mode=_retrieval_mode(),
212
+ )
213
+ t_code = time.time()
214
+ code_hits = _rerank_code(raw_code)[:top_k_code]
215
+
216
+ raw_eps = self.vector.search(
217
+ self.cfg.qdrant_episodes, qvec, top_k=top_k_eps * 3
218
+ )
219
+ t_eps = time.time()
220
+ episodes = self.episodic.by_ids([h.id for h in raw_eps])
221
+ ep_hits, episodes = _filter_episodes(
222
+ query,
223
+ raw_eps,
224
+ episodes,
225
+ limit=top_k_eps,
226
+ include_idle=include_idle_episodes,
227
+ )
228
+
229
+ claims = self._retrieve_claims(query, qvec, limit=top_k_claims)
230
+ t_claims = time.time()
231
+
232
+ total = time.time() - t0
233
+
234
+ # Timing log
235
+ log.debug(
236
+ "retrieve query=%r embed=%.0fms code=%.0fms eps=%.0fms claims=%.0fms total=%.0fms "
237
+ "code_hits=%d eps_hits=%d claims_hits=%d",
238
+ query,
239
+ (t_embed - t0) * 1000,
240
+ (t_code - t_embed) * 1000,
241
+ (t_eps - t_code) * 1000,
242
+ (t_claims - t_eps) * 1000,
243
+ total * 1000,
244
+ len(code_hits),
245
+ len(ep_hits),
246
+ len(claims),
247
+ )
248
+
249
+ # Warn on slow stages
250
+ if (t_embed - t0) > 5.0:
251
+ log.warning(
252
+ "retrieve: embed took %.1fs for query=%r", t_embed - t0, query
253
+ )
254
+ if (t_code - t_embed) > 2.0:
255
+ log.warning(
256
+ "retrieve: code search took %.1fs for query=%r",
257
+ t_code - t_embed,
258
+ query,
259
+ )
260
+
261
+ # Record metrics (non-fatal)
262
+ if self._metrics_path and self._metrics_path.suffix == ".db":
263
+ try:
264
+ if self._metrics is None:
265
+ self._metrics = MetricsStore(self._metrics_path)
266
+ self._metrics.record_retrieve(
267
+ RetrieveTiming(
268
+ query=query,
269
+ embed_ms=(t_embed - t0) * 1000,
270
+ code_search_ms=(t_code - t_embed) * 1000,
271
+ eps_search_ms=(t_eps - t_code) * 1000,
272
+ claims_ms=(t_claims - t_eps) * 1000,
273
+ total_ms=total * 1000,
274
+ code_hit_count=len(code_hits),
275
+ eps_hit_count=len(ep_hits),
276
+ claims_hit_count=len(claims),
277
+ )
278
+ )
279
+ except Exception:
280
+ pass # metrics never fail retrieval
281
+
282
+ return ContextPack(
283
+ query=query,
284
+ code_hits=code_hits,
285
+ episode_hits=ep_hits,
286
+ episodes=episodes,
287
+ claims=claims,
288
+ )
289
+
290
+ def _retrieve_claims(
291
+ self,
292
+ query: str,
293
+ query_vec: HybridVec,
294
+ limit: int,
295
+ ) -> list[ClaimRecord]:
296
+ """Best-effort claim recall.
297
+
298
+ Two-stage strategy:
299
+
300
+ 1. **Semantic.** If the per-project Qdrant claim collection
301
+ exists (or can be backfilled from SQLite), search it and
302
+ hydrate the top-k via ``store.by_ids``. Reranked by cosine
303
+ score × confidence × recency decay.
304
+ 2. **Token-overlap fallback.** When no claims collection is
305
+ reachable (Qdrant down, embedder unavailable, or the
306
+ project never opted into claim extraction), fall back to
307
+ the lexical ``_rank_claims`` heuristic.
308
+
309
+ Returns empty when the per-project ``claims.db`` doesn't exist
310
+ (the common case for projects that never opted into extraction)
311
+ so this is free for non-claim users.
312
+ """
313
+ if limit <= 0:
314
+ return []
315
+ path = self.cfg.claims_db
316
+ if not path.exists():
317
+ return []
318
+
319
+ indexer = self._ensure_claims_indexer()
320
+ if indexer is not None:
321
+ ranked = _semantic_claim_search(indexer, query_vec, limit)
322
+ if ranked is not None:
323
+ return ranked[:limit]
324
+
325
+ # Fallback path: SQLite-only token-overlap. Still useful when
326
+ # the embedder is offline (CI without Ollama, dev box without
327
+ # GPU, etc.) — keeps the orientation payload non-empty.
328
+ try:
329
+ store = ClaimsStore(path=path)
330
+ except Exception: # noqa: BLE001
331
+ return []
332
+ try:
333
+ rows = store.current()
334
+ finally:
335
+ store.close()
336
+ if not rows:
337
+ return []
338
+ return _rank_claims(query, rows)[:limit]
339
+
340
+ def _ensure_claims_indexer(self) -> ClaimsIndexer | None:
341
+ """Lazy-init the indexer. Returns ``None`` if construction fails.
342
+
343
+ Failure modes are silent on purpose: a broken Qdrant or
344
+ embedder must not take down the whole retrieve call — the
345
+ fallback path will still return token-overlap claims.
346
+ """
347
+ if self._claims_indexer is not None:
348
+ return self._claims_indexer
349
+ try:
350
+ self._claims_indexer = make_claims_indexer(
351
+ project=self.slug,
352
+ cfg=self.cfg,
353
+ embedder=self.embedder,
354
+ vector=self.vector,
355
+ )
356
+ except Exception: # noqa: BLE001
357
+ self._claims_indexer = None
358
+ return None
359
+ return self._claims_indexer
360
+
361
+
362
+ def _rerank_code(hits: list[VectorHit]) -> list[VectorHit]:
363
+ """Apply generated-code penalty + entrypoint boost; resort by score.
364
+
365
+ Returns new ``VectorHit`` instances so caller doesn't see mutated cosine
366
+ scores from Qdrant.
367
+ """
368
+ adjusted: list[VectorHit] = []
369
+ for h in hits:
370
+ score = h.score
371
+ path = h.payload.get("path")
372
+ if h.payload.get("generated"):
373
+ score -= GENERATED_PENALTY
374
+ if _is_entrypoint(path):
375
+ score += ENTRYPOINT_BOOST
376
+ adjusted.append(VectorHit(id=h.id, score=score, payload=h.payload))
377
+ adjusted.sort(key=lambda h: h.score, reverse=True)
378
+ return adjusted
379
+
380
+
381
+ def _semantic_claim_search(
382
+ indexer: ClaimsIndexer,
383
+ query_vec: HybridVec,
384
+ limit: int,
385
+ ) -> list[ClaimRecord] | None:
386
+ """Run a Qdrant search over claim points and hydrate rows.
387
+
388
+ Returns ``None`` when the indexer's collection is missing AND
389
+ backfill produced no rows — the caller falls back to lexical
390
+ ranking. Returns ``[]`` (not ``None``) when the collection exists
391
+ but yielded no semantic matches: the lexical path would just
392
+ re-scan the same rows for nothing, so we short-circuit.
393
+ """
394
+ try:
395
+ embedded = indexer.ensure_backfilled()
396
+ except Exception: # noqa: BLE001
397
+ return None
398
+ if embedded == 0 and indexer.store.count() == 0:
399
+ return None
400
+
401
+ hits = indexer.search(query_vec, top_k=limit * 3)
402
+ if not hits:
403
+ # Backfill ran (or skipped because already in sync) but no
404
+ # semantic neighbors — surface nothing rather than recompute
405
+ # the same answer via token overlap.
406
+ return []
407
+
408
+ claims = indexer.store.by_ids([h.id for h in hits])
409
+ by_id = {c.id: c for c in claims}
410
+ ranked: list[tuple[float, ClaimRecord]] = []
411
+ now = time.time()
412
+ for h in hits:
413
+ claim = by_id.get(h.id)
414
+ if claim is None:
415
+ continue
416
+ age_s = max(0.0, now - claim.valid_at)
417
+ decay = math.exp(-age_s / _CLAIM_RECENCY_HALF_LIFE_S)
418
+ score = h.score * claim.confidence * (0.5 + 0.5 * decay)
419
+ ranked.append((score, claim))
420
+ ranked.sort(key=lambda pair: pair[0], reverse=True)
421
+ return [c for _, c in ranked]
422
+
423
+
424
+ _CLAIM_TOKEN_RE = re.compile(r"[A-Za-z][A-Za-z0-9_-]+")
425
+ # Recency half-life: a claim asserted 30 days ago weighs half as much
426
+ # as one asserted today, all else equal. Long enough that durable
427
+ # preferences ("we use Postgres") survive across sessions; short enough
428
+ # that abandoned-then-reasserted facts surface fresh.
429
+ _CLAIM_RECENCY_HALF_LIFE_S = 30 * 24 * 3600
430
+
431
+
432
+ def _claim_tokens(text: str) -> set[str]:
433
+ return {m.group(0).lower() for m in _CLAIM_TOKEN_RE.finditer(text or "")}
434
+
435
+
436
+ def _rank_claims(query: str, claims: list[ClaimRecord]) -> list[ClaimRecord]:
437
+ """Score claims by query-token overlap + recency + confidence.
438
+
439
+ Cheap token-bag scoring is good enough for the current scale (tens
440
+ to hundreds of claims per project). Once volumes justify it we'll
441
+ swap in vector relevance via a dedicated Qdrant collection (see the
442
+ entity-resolution work).
443
+ """
444
+ q_tokens = _claim_tokens(query)
445
+ if not q_tokens:
446
+ # No tokens to match — fall back to recency * confidence.
447
+ return sorted(
448
+ claims,
449
+ key=lambda c: (c.confidence, c.valid_at),
450
+ reverse=True,
451
+ )
452
+
453
+ now = time.time()
454
+ scored: list[tuple[float, ClaimRecord]] = []
455
+ for c in claims:
456
+ bag = _claim_tokens(c.subject) | _claim_tokens(c.object)
457
+ # evidence_span is verbatim user text — strong signal when it
458
+ # mentions the query terms, so we count it in the bag too.
459
+ bag |= _claim_tokens(c.evidence_span)
460
+ overlap = len(bag & q_tokens)
461
+ if overlap == 0:
462
+ continue
463
+ age_s = max(0.0, now - c.valid_at)
464
+ decay = math.exp(-age_s / _CLAIM_RECENCY_HALF_LIFE_S)
465
+ score = overlap * c.confidence * (0.5 + 0.5 * decay)
466
+ scored.append((score, c))
467
+ scored.sort(key=lambda pair: pair[0], reverse=True)
468
+ return [c for _, c in scored]
469
+
470
+
471
+ def _filter_episodes(
472
+ query: str,
473
+ hits: list[VectorHit],
474
+ episodes: list[Episode],
475
+ *,
476
+ limit: int,
477
+ include_idle: bool,
478
+ ) -> tuple[list[VectorHit], list[Episode]]:
479
+ """Drop idle verdicts (opt-in) and dedupe near-identical prompts.
480
+
481
+ Episodes whose normalized prompt prefix matches the current query or
482
+ another already-kept episode are suppressed. This eliminates the
483
+ "10 copies of my own prior question" noise without needing a second
484
+ embedding round-trip.
485
+ """
486
+ by_id = {ep.id: ep for ep in episodes}
487
+ query_key = _normalize_prompt(query)
488
+ kept_hits: list[VectorHit] = []
489
+ kept_eps: list[Episode] = []
490
+ seen_keys: set[str] = {query_key}
491
+ for h in hits:
492
+ ep = by_id.get(h.id)
493
+ if ep is None:
494
+ continue
495
+ if not include_idle and (ep.verdict or "").lower() == "idle":
496
+ continue
497
+ key = _normalize_prompt(ep.prompt or "")
498
+ if key in seen_keys:
499
+ continue
500
+ seen_keys.add(key)
501
+ kept_hits.append(h)
502
+ kept_eps.append(ep)
503
+ if len(kept_hits) >= limit:
504
+ break
505
+ return kept_hits, kept_eps
@@ -0,0 +1,73 @@
1
+ import time
2
+ import httpx
3
+
4
+ def is_retryable(exc: Exception) -> bool:
5
+ """True for transient failures that retry can fix."""
6
+ if isinstance(exc, httpx.ConnectError):
7
+ return True
8
+ if isinstance(exc, httpx.TimeoutException):
9
+ return True
10
+ if isinstance(exc, httpx.HTTPStatusError):
11
+ return exc.response.status_code >= 500
12
+ return False
13
+
14
+ def with_retry(fn, *, max_retries=3, backoff_s=1.0, on_retry=None):
15
+ """Call fn(), retry on transient httpx errors with exponential backoff.
16
+
17
+ After max_retries attempts, re-raises the last exception.
18
+ on_retry(attempt, exc) is called before each retry for logging.
19
+ """
20
+ for attempt in range(max_retries + 1):
21
+ try:
22
+ return fn()
23
+ except (httpx.ConnectError, httpx.TimeoutException, httpx.HTTPStatusError) as exc:
24
+ if attempt == max_retries:
25
+ raise
26
+ if not is_retryable(exc):
27
+ raise
28
+ if on_retry:
29
+ on_retry(attempt + 1, exc)
30
+ delay = backoff_s * (2 ** attempt)
31
+ time.sleep(delay)
32
+ raise RuntimeError("unreachable")
33
+
34
+ class CircuitBreaker:
35
+ """Opens after `threshold` consecutive failures; stays open for `cooldown_s`.
36
+
37
+ While open, raises CircuitBreakerOpenError immediately without calling fn.
38
+ On first success after cooldown, transitions to half-open; on next success, closes.
39
+ """
40
+
41
+ def __init__(self, name="default", threshold=5, cooldown_s=30.0):
42
+ self.name = name
43
+ self.threshold = threshold
44
+ self.cooldown_s = cooldown_s
45
+ self._failures = 0
46
+ self._last_failure = 0.0
47
+ self._state = "closed" # closed | open | half_open
48
+
49
+ @property
50
+ def state(self) -> str:
51
+ if self._state == "open" and time.time() - self._last_failure > self.cooldown_s:
52
+ self._state = "half_open"
53
+ return self._state
54
+
55
+ def call(self, fn, *args, **kwargs):
56
+ if self._state == "open" or (self._state == "open" and time.time() - self._last_failure <= self.cooldown_s):
57
+ raise CircuitBreakerOpenError(self.name, self._failures)
58
+ try:
59
+ result = fn(*args, **kwargs)
60
+ if self._state == "half_open":
61
+ self._state = "closed"
62
+ self._failures = 0
63
+ return result
64
+ except Exception as exc:
65
+ self._failures += 1
66
+ self._last_failure = time.time()
67
+ if self._failures >= self.threshold:
68
+ self._state = "open"
69
+ raise
70
+
71
+ class CircuitBreakerOpenError(Exception):
72
+ def __init__(self, name, failures):
73
+ super().__init__(f"Circuit breaker '{name}' open after {failures} failures")
@@ -0,0 +1,20 @@
1
+ """Team-shared code-memory: snapshot, sync, hooks, autostart, watcher."""
2
+
3
+ from .snapshot import (
4
+ Snapshot,
5
+ SnapshotManifest,
6
+ apply_snapshot,
7
+ build_snapshot,
8
+ verify_snapshot,
9
+ )
10
+ from .sync import SyncResult, sync_repo
11
+
12
+ __all__ = [
13
+ "Snapshot",
14
+ "SnapshotManifest",
15
+ "SyncResult",
16
+ "apply_snapshot",
17
+ "build_snapshot",
18
+ "sync_repo",
19
+ "verify_snapshot",
20
+ ]
@@ -0,0 +1,42 @@
1
+ """Cross-platform autostart registration for the code-memory watcher.
2
+
3
+ Adapters write a user-level service unit that runs ``code-memory watch <repo>``
4
+ at user logon. No root/admin required.
5
+
6
+ - macOS -> launchd LaunchAgent (~/Library/LaunchAgents/*.plist)
7
+ - Linux -> systemd --user unit (~/.config/systemd/user/*.service)
8
+ - Windows -> Scheduled Task at logon (schtasks /SC ONLOGON /RL LIMITED)
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import platform
14
+ from pathlib import Path
15
+ from typing import Protocol
16
+
17
+ from .base import AutostartStatus, ensure_autostart, get_adapter
18
+ from .launchd import LaunchdAdapter
19
+ from .schtasks import SchtasksAdapter
20
+ from .systemd import SystemdUserAdapter
21
+
22
+ __all__ = [
23
+ "AutostartStatus",
24
+ "LaunchdAdapter",
25
+ "SchtasksAdapter",
26
+ "SystemdUserAdapter",
27
+ "ensure_autostart",
28
+ "get_adapter",
29
+ "Adapter",
30
+ ]
31
+
32
+
33
+ class Adapter(Protocol):
34
+ def install(self, repo: Path) -> AutostartStatus: ...
35
+ def uninstall(self, repo: Path) -> AutostartStatus: ...
36
+ def status(self, repo: Path) -> AutostartStatus: ...
37
+ def start(self, repo: Path) -> AutostartStatus: ...
38
+
39
+
40
+ # convenience re-export
41
+ def current_platform() -> str:
42
+ return platform.system()