PyPI - flurryx-code-memory - Versions diffs - 0.4.0__py3-none-any.whl - Mend

flurryx-code-memory 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

code_memory/__init__.py +1 -0
code_memory/claims/__init__.py +32 -0
code_memory/claims/extractor.py +325 -0
code_memory/claims/indexer.py +258 -0
code_memory/claims/resolver.py +186 -0
code_memory/claims/store.py +424 -0
code_memory/cli.py +1192 -0
code_memory/config.py +268 -0
code_memory/embed/__init__.py +224 -0
code_memory/embed/cache.py +204 -0
code_memory/embed/m3.py +174 -0
code_memory/embed/ollama.py +92 -0
code_memory/embed/tei.py +106 -0
code_memory/episodic/__init__.py +3 -0
code_memory/episodic/sqlite_store.py +278 -0
code_memory/extractor/__init__.py +3 -0
code_memory/extractor/csproj.py +166 -0
code_memory/extractor/dll.py +385 -0
code_memory/extractor/gitignore.py +162 -0
code_memory/extractor/nuget.py +275 -0
code_memory/extractor/sanity.py +124 -0
code_memory/extractor/sln.py +108 -0
code_memory/extractor/treesitter.py +1172 -0
code_memory/graph/__init__.py +3 -0
code_memory/graph/falkor_store.py +740 -0
code_memory/mcp_server.py +1816 -0
code_memory/metrics.py +260 -0
code_memory/orchestrator/__init__.py +13 -0
code_memory/orchestrator/git_delta.py +211 -0
code_memory/orchestrator/ingest_state.py +71 -0
code_memory/orchestrator/pipeline.py +1478 -0
code_memory/orchestrator/reset.py +130 -0
code_memory/orchestrator/resolver.py +825 -0
code_memory/orchestrator/retrieve.py +505 -0
code_memory/resilience.py +73 -0
code_memory/sync/__init__.py +20 -0
code_memory/sync/autostart/__init__.py +42 -0
code_memory/sync/autostart/base.py +106 -0
code_memory/sync/autostart/launchd.py +115 -0
code_memory/sync/autostart/schtasks.py +155 -0
code_memory/sync/autostart/systemd.py +113 -0
code_memory/sync/hooks.py +164 -0
code_memory/sync/safety.py +65 -0
code_memory/sync/snapshot.py +461 -0
code_memory/sync/store.py +399 -0
code_memory/sync/sync.py +405 -0
code_memory/sync/watcher.py +320 -0
code_memory/vector/__init__.py +3 -0
code_memory/vector/qdrant_store.py +302 -0
flurryx_code_memory-0.4.0.dist-info/METADATA +26 -0
flurryx_code_memory-0.4.0.dist-info/RECORD +53 -0
flurryx_code_memory-0.4.0.dist-info/WHEEL +4 -0
flurryx_code_memory-0.4.0.dist-info/entry_points.txt +3 -0

code_memory/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.1.0"

code_memory/claims/__init__.py ADDED Viewed

@@ -0,0 +1,32 @@
+"""User-prompt claim extraction (Graphiti-style).
+The pipeline turns substantive user prompts into structured
+``(subject, predicate, object)`` claims with bi-temporal validity so a
+later session can answer "what did the user say about X last Tuesday?"
+without re-reading every prompt.
+Layout:
+  * :mod:`.extractor` — local-LLM extraction (Ollama, gemma2:9b default).
+  * :mod:`.store`     — SQLite store with bi-temporal columns and a
+                        single-valued predicate registry for contradiction
+                        handling.
+"""
+from .extractor import Claim, ClaimExtractor, ExtractionError
+from .indexer import ClaimsIndexer, make_claims_indexer
+from .resolver import EntityRef, EntityResolver
+from .store import ClaimRecord, ClaimsStore, SINGLE_VALUED_PREDICATES, UpsertResult
+__all__ = [
+    "Claim",
+    "ClaimExtractor",
+    "ClaimRecord",
+    "ClaimsIndexer",
+    "ClaimsStore",
+    "EntityRef",
+    "EntityResolver",
+    "ExtractionError",
+    "SINGLE_VALUED_PREDICATES",
+    "UpsertResult",
+    "make_claims_indexer",
+]

code_memory/claims/extractor.py ADDED Viewed

@@ -0,0 +1,325 @@
+"""Local-LLM claim extractor.
+Calls an Ollama-served instruct model (gemma2:9b by default) in JSON
+mode and returns a list of :class:`Claim` records. Output is validated
+defensively because LLMs lie:
+  * ``evidence_span`` must be a literal substring of the source prompt.
+    Hallucinated triples that paraphrase the input are dropped.
+  * ``confidence`` below ``CLAIMS_MIN_CONFIDENCE`` is dropped.
+  * Empty / non-string subject or object is dropped.
+The extractor never raises on a malformed model response — it returns
+an empty list so the caller (an async hook) never blocks the session.
+The only raised exception is :class:`ExtractionError` for hard
+infrastructure failures (Ollama unreachable, model not pulled).
+"""
+from __future__ import annotations
+import json
+import logging
+from dataclasses import dataclass
+from typing import Any
+import httpx
+from ..config import CONFIG
+_LOG = logging.getLogger(__name__)
+class ExtractionError(RuntimeError):
+    """Raised when the LLM backend itself is unreachable or misconfigured."""
+# Closed predicate vocabulary. The system prompt instructs the model to
+# stay inside this set; _coerce enforces it so a noisy generation can't
+# smuggle in free-form predicates that would defeat single-valued
+# contradiction handling downstream.
+_ALLOWED_PREDICATES: frozenset[str] = frozenset(
+    {
+        "uses",
+        "prefers",
+        "rejected",
+        "wants-to",
+        "is-located-at",
+        "depends-on",
+        "deployed-to",
+        "owns",
+        "is-a",
+        "mentioned",
+        "worked-on",
+    }
+)
+@dataclass(frozen=True)
+class Claim:
+    subject: str
+    predicate: str
+    object: str
+    polarity: bool  # True = asserts, False = negates ("does not use X")
+    confidence: float
+    evidence_span: str
+# JSON schema embedded in the prompt. Ollama's structured-output mode
+# uses this verbatim to constrain the decoder. The schema is intentionally
+# narrow — predicates are normalized to kebab-case verbs so downstream
+# resolution doesn't have to disambiguate "uses" / "USES" / "Uses".
+_OUTPUT_SCHEMA = {
+    "type": "object",
+    "properties": {
+        "claims": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "subject": {"type": "string", "minLength": 1},
+                    "predicate": {"type": "string", "minLength": 1},
+                    "object": {"type": "string", "minLength": 1},
+                    "polarity": {"type": "boolean"},
+                    "confidence": {"type": "number", "minimum": 0, "maximum": 1},
+                    "evidence_span": {"type": "string", "minLength": 1},
+                },
+                "required": [
+                    "subject",
+                    "predicate",
+                    "object",
+                    "polarity",
+                    "confidence",
+                    "evidence_span",
+                ],
+            },
+        }
+    },
+    "required": ["claims"],
+}
+_SYSTEM_PROMPT = """\
+You extract DURABLE factual claims from a software engineer's chat
+message. Durable = the assertion is likely still true in a future
+session, not transient task state.
+Output JSON only, matching the provided schema. Each claim is a
+(subject, predicate, object) triple plus polarity, confidence, and an
+``evidence_span`` that is a verbatim substring of the input.
+Rules:
+- Predicate is kebab-case verb phrase from this closed vocabulary:
+  "uses", "prefers", "rejected", "wants-to", "is-located-at",
+  "depends-on", "deployed-to", "owns", "is-a", "mentioned",
+  "worked-on". Reject any predicate outside this list.
+- Subject and object are short noun phrases lifted from the message;
+  normalize case but keep technical identifiers as written.
+- HARD FILTER — skip and emit no claim for:
+  * Questions of any kind ("should I…", "why does…", "is X…").
+  * Hypotheticals / counterfactuals ("if we used…", "suppose X…").
+  * Imperatives directed at YOU the assistant ("fix this", "run X",
+    "look at Y") — those are task state, not durable facts.
+  * Opinions about third parties or general industry statements.
+  * Small talk, acknowledgments, meta-comments about the conversation.
+  * Anything that would be obvious from the codebase itself (e.g.
+    "this file imports React" — already in the source).
+- Only extract assertions the user is making about their PROJECT, their
+  TOOLING choices, their PREFERENCES, OWNERSHIP, or LOCATIONS — facts
+  worth recalling next week.
+- ``confidence`` ∈ [0,1] reflects how certain you are this is a
+  durable assertion (not a question, speculation, or task state).
+  Below 0.7 → don't emit at all.
+- Be CONSERVATIVE. If in doubt, emit nothing. Empty output is the
+  correct answer for most messages.
+- If nothing qualifies, return {"claims": []}.
+Examples:
+INPUT: "we use Qdrant for vectors and FalkorDB for the graph"
+OUTPUT: {"claims": [
+  {"subject":"project","predicate":"uses","object":"Qdrant",
+   "polarity":true,"confidence":0.95,"evidence_span":"use Qdrant for vectors"},
+  {"subject":"project","predicate":"uses","object":"FalkorDB",
+   "polarity":true,"confidence":0.95,"evidence_span":"FalkorDB for the graph"}
+]}
+INPUT: "should I use Redis here?"
+OUTPUT: {"claims": []}
+INPUT: "fix the bug in auth.py"
+OUTPUT: {"claims": []}
+INPUT: "look at this file and tell me what it does"
+OUTPUT: {"claims": []}
+INPUT: "I don't want to ship dark mode"
+OUTPUT: {"claims": [
+  {"subject":"user","predicate":"rejected","object":"dark mode",
+   "polarity":true,"confidence":0.9,"evidence_span":"don't want to ship dark mode"}
+]}
+INPUT: "stop summarizing at the end of every response"
+OUTPUT: {"claims": [
+  {"subject":"user","predicate":"prefers","object":"no end-of-turn summaries",
+   "polarity":true,"confidence":0.9,
+   "evidence_span":"stop summarizing at the end of every response"}
+]}
+INPUT: "the billing service lives in apps/api/billing"
+OUTPUT: {"claims": [
+  {"subject":"billing service","predicate":"is-located-at",
+   "object":"apps/api/billing","polarity":true,"confidence":0.95,
+   "evidence_span":"billing service lives in apps/api/billing"}
+]}
+"""
+class ClaimExtractor:
+    """Thin sync wrapper over Ollama's /api/chat with JSON-mode output.
+    Construction is cheap; the HTTP client is created lazily so import
+    of this module never touches the network.
+    """
+    def __init__(
+        self,
+        url: str | None = None,
+        model: str | None = None,
+        timeout: float | None = None,
+        min_confidence: float | None = None,
+    ) -> None:
+        self.url = (url or CONFIG.ollama_url).rstrip("/")
+        self.model = model or CONFIG.claims_llm_model
+        self.timeout = timeout if timeout is not None else CONFIG.claims_llm_timeout
+        self.min_confidence = (
+            min_confidence
+            if min_confidence is not None
+            else CONFIG.claims_min_confidence
+        )
+        self._client: httpx.Client | None = None
+    # ------------------------------------------------------------------ http
+    def _http(self) -> httpx.Client:
+        if self._client is None:
+            self._client = httpx.Client(timeout=self.timeout)
+        return self._client
+    def close(self) -> None:
+        if self._client is not None:
+            self._client.close()
+            self._client = None
+    def __enter__(self) -> ClaimExtractor:
+        return self
+    def __exit__(self, *exc: object) -> None:
+        self.close()
+    # ----------------------------------------------------------------- extract
+    def extract(self, prompt: str) -> list[Claim]:
+        """Run extraction over a single user prompt.
+        Returns the validated, deduplicated, confidence-filtered list.
+        Never raises on a malformed model response — returns ``[]``.
+        Raises :class:`ExtractionError` only on transport-level failures.
+        """
+        prompt = prompt.strip()
+        if not prompt:
+            return []
+        try:
+            raw = self._call_ollama(prompt)
+        except httpx.HTTPError as exc:
+            raise ExtractionError(f"Ollama call failed: {exc}") from exc
+        return self._parse_and_validate(raw, prompt)
+    # ------------------------------------------------------------ internals
+    def _call_ollama(self, prompt: str) -> str:
+        payload: dict[str, Any] = {
+            "model": self.model,
+            "format": _OUTPUT_SCHEMA,
+            "stream": False,
+            "options": {"temperature": 0.0},
+            "messages": [
+                {"role": "system", "content": _SYSTEM_PROMPT},
+                {"role": "user", "content": prompt},
+            ],
+        }
+        res = self._http().post(f"{self.url}/api/chat", json=payload)
+        res.raise_for_status()
+        data = res.json()
+        msg = data.get("message") or {}
+        return str(msg.get("content") or "")
+    def _parse_and_validate(self, raw: str, source_prompt: str) -> list[Claim]:
+        if not raw.strip():
+            return []
+        try:
+            parsed = json.loads(raw)
+        except json.JSONDecodeError:
+            _LOG.warning("claim extractor: non-JSON response, dropping")
+            return []
+        items = parsed.get("claims")
+        if not isinstance(items, list):
+            return []
+        out: list[Claim] = []
+        seen: set[tuple[str, str, str, bool]] = set()
+        for item in items:
+            claim = self._coerce(item, source_prompt)
+            if claim is None:
+                continue
+            key = (
+                claim.subject.lower(),
+                claim.predicate.lower(),
+                claim.object.lower(),
+                claim.polarity,
+            )
+            if key in seen:
+                continue
+            seen.add(key)
+            out.append(claim)
+        return out
+    def _coerce(self, item: Any, source_prompt: str) -> Claim | None:
+        if not isinstance(item, dict):
+            return None
+        try:
+            subject = str(item["subject"]).strip()
+            predicate = str(item["predicate"]).strip().lower().replace(" ", "-")
+            obj = str(item["object"]).strip()
+            polarity = bool(item["polarity"])
+            confidence = float(item["confidence"])
+            evidence = str(item["evidence_span"]).strip()
+        except (KeyError, TypeError, ValueError):
+            return None
+        if not subject or not predicate or not obj or not evidence:
+            return None
+        if predicate not in _ALLOWED_PREDICATES:
+            _LOG.debug(
+                "claim extractor: dropping out-of-vocab predicate %r", predicate
+            )
+            return None
+        if confidence < self.min_confidence:
+            return None
+        # Anti-hallucination: evidence must be present in the source.
+        if evidence.lower() not in source_prompt.lower():
+            _LOG.debug(
+                "claim extractor: dropping hallucinated span %r", evidence
+            )
+            return None
+        return Claim(
+            subject=subject,
+            predicate=predicate,
+            object=obj,
+            polarity=polarity,
+            confidence=confidence,
+            evidence_span=evidence,
+        )

code_memory/claims/indexer.py ADDED Viewed

@@ -0,0 +1,258 @@
+"""Qdrant-backed semantic index over user claims.
+``ClaimsStore`` (SQLite) is the source of truth for the bi-temporal
+claim history. This module layers a vector index on top so retrieval
+can match claims semantically — "we use Postgres" surfaces for a query
+about "DB choice" — instead of relying on the token-overlap heuristic
+in :func:`code_memory.orchestrator.retrieve._rank_claims`.
+Design choices:
+* **Keep + flag, not delete.** When a claim is superseded
+  (``valid_to`` set), the Qdrant point stays and gets
+  ``payload.open = False``. Default retrieval filters ``open=true``;
+  this keeps the door open for bi-temporal ``as_of`` semantic queries
+  later without re-embedding the corpus.
+* **Embed triple + evidence.** The evidence span carries the user's
+  raw phrasing, which is where synonym recall lives ("DB" vs
+  "Postgres"). The triple alone is too terse to embed well.
+* **Lazy backfill.** First access detects ``len(qdrant_claims) == 0``
+  while ``claims.db`` is non-empty and re-embeds every row. Idempotent:
+  re-runs are cheap because the embedder caches per-text.
+* **Token-overlap fallback.** If the embedder or Qdrant is unavailable
+  the caller falls back to ``_rank_claims`` (see ``retrieve.py``). The
+  indexer raises only when the operation is fundamentally
+  side-effecting (upsert), not when reads fail.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any, Sequence
+from ..config import CONFIG, Config, detect_project_slug
+from ..embed import Embedder, HybridVec, get_embedder
+from ..vector import QdrantStore, VectorHit, VectorRecord
+from .store import ClaimRecord, ClaimsStore, UpsertResult
+@dataclass
+class ClaimsIndexer:
+    """SQLite + Qdrant facade for claim writes and semantic reads.
+    Holds references to the persistent components; callers re-use one
+    indexer per project across many upserts. Not thread-safe — the
+    underlying SQLite connection isn't either.
+    """
+    store: ClaimsStore
+    vector: QdrantStore
+    embedder: Embedder
+    collection: str
+    _backfilled: bool = False
+    # ------------------------------------------------------------- write
+    def upsert(self, claim: ClaimRecord) -> UpsertResult:
+        """Persist ``claim`` to SQLite + Qdrant atomically (SQLite first).
+        SQLite write is authoritative — if Qdrant raises after the
+        SQLite commit, the row still lands and a later
+        :meth:`ensure_backfilled` call (or the next ``retrieve``) will
+        re-embed it. We prefer "Qdrant temporarily behind" over "lose
+        the claim entirely."
+        """
+        self.ensure_backfilled()
+        result = self.store.upsert(claim)
+        # Close path: any predecessor rows closed by this insert get
+        # their ``open`` payload flipped. Cheap: no re-embed.
+        if result.closed_ids:
+            self.vector.set_payload(
+                self.collection,
+                result.closed_ids,
+                {"open": False},
+            )
+        if result.was_new:
+            self._embed_and_upsert(result.claim_id, claim)
+        else:
+            # Existing-row refresh: triple unchanged, but confidence and
+            # ``recorded_at`` may have moved. Update payload so rerank
+            # sees the new score without paying for an embed.
+            self.vector.set_payload(
+                self.collection,
+                [result.claim_id],
+                _payload_for(claim, open_=True),
+            )
+        return result
+    def upsert_many(self, claims: Sequence[ClaimRecord]) -> list[UpsertResult]:
+        return [self.upsert(c) for c in claims]
+    # -------------------------------------------------------------- read
+    def search(
+        self,
+        query_vec: HybridVec,
+        top_k: int = 5,
+        *,
+        include_closed: bool = False,
+    ) -> list[VectorHit]:
+        """Semantic top-k over claim points.
+        Default filter is ``open=true`` so superseded claims don't leak
+        into the orientation context. ``include_closed`` opens the door
+        for future bi-temporal point-in-time queries (see the design
+        doc note in :mod:`code_memory.orchestrator.retrieve`).
+        Returns an empty list (not an error) when the collection is
+        missing — that's the "claims-disabled project" path and the
+        caller should fall back to token-overlap silently.
+        """
+        if self.vector._inspect_collection(self.collection) == "missing":
+            return []
+        filt: dict[str, Any] | None = None if include_closed else {"open": True}
+        try:
+            return self.vector.search(
+                self.collection,
+                query_vec,
+                top_k=top_k,
+                filt=filt,
+                mode="dense",
+            )
+        except Exception:  # noqa: BLE001
+            # Vector backend hiccup — return empty so the orchestrator
+            # falls through to the SQLite token-overlap fallback rather
+            # than dropping claims from the context pack entirely.
+            return []
+    # --------------------------------------------------------- backfill
+    def ensure_backfilled(self) -> int:
+        """Embed every claim row missing from Qdrant. Idempotent.
+        Runs once per indexer instance. Re-creates the collection if it
+        was missing. Returns the count of rows embedded (``0`` when
+        already in sync). Cheap on warm runs — the embedder cache hits
+        for previously-seen triples.
+        We compare row counts as a soft sync check, not point IDs. If
+        SQLite has 42 rows and Qdrant has 42 points we trust they're
+        the same set; drift detection would need a per-id scan and we
+        don't currently need it.
+        """
+        if self._backfilled:
+            return 0
+        self.vector.ensure_collection(self.collection)
+        sqlite_count = self.store.count()
+        if sqlite_count == 0:
+            self._backfilled = True
+            return 0
+        qdrant_count = self.vector.count(self.collection)
+        if qdrant_count >= sqlite_count:
+            self._backfilled = True
+            return 0
+        # Backfill all rows (open + closed) so bi-temporal queries work
+        # later. ``current()`` returns only open rows, so use a wider
+        # accessor.
+        rows = self._all_rows()
+        records: list[VectorRecord] = []
+        for claim in rows:
+            hv = self.embedder.embed_one(_text_for(claim))
+            records.append(
+                VectorRecord(
+                    id=claim.id,
+                    vector=hv,
+                    payload=_payload_for(claim, open_=claim.valid_to is None),
+                )
+            )
+        if records:
+            self.vector.upsert(self.collection, records)
+        self._backfilled = True
+        return len(records)
+    # ------------------------------------------------------------ helpers
+    def _embed_and_upsert(self, claim_id: str, claim: ClaimRecord) -> None:
+        hv = self.embedder.embed_one(_text_for(claim))
+        self.vector.upsert(
+            self.collection,
+            [
+                VectorRecord(
+                    id=claim_id,
+                    vector=hv,
+                    payload=_payload_for(claim, open_=True),
+                )
+            ],
+        )
+    def _all_rows(self) -> list[ClaimRecord]:
+        """Every row, open or closed. Used for backfill only."""
+        rows = self.store.conn.execute(
+            "SELECT id, subject, predicate, object, polarity, confidence, "
+            "evidence_span, valid_at, valid_to, recorded_at, "
+            "head_sha, session_id, source_prompt_id, "
+            "entity_subject_id, entity_object_id FROM claims"
+        ).fetchall()
+        # Reuse the row->record decoder.
+        from .store import _row_to_claim
+        return [_row_to_claim(r) for r in rows]
+def _text_for(claim: ClaimRecord) -> str:
+    """Composite text used as the embed input for a claim.
+    ``subject {predicate} object`` is the canonical triple. The
+    evidence span — the verbatim user phrasing — gets appended so the
+    embedder also sees the natural-language vocabulary the user used
+    when asserting the claim. That's where synonym recall comes from
+    (e.g. "DB" in evidence aligns with "Postgres" in object).
+    """
+    polarity = "" if claim.polarity else "not "
+    head = f"{claim.subject} {polarity}{claim.predicate} {claim.object}".strip()
+    if claim.evidence_span:
+        return f"{head}\n\n{claim.evidence_span}"
+    return head
+def _payload_for(claim: ClaimRecord, *, open_: bool) -> dict[str, Any]:
+    """Payload stored alongside each Qdrant point.
+    Carries just enough metadata for reranking (confidence, recency
+    via valid_at) and filtering (open). Anything else stays in SQLite.
+    """
+    return {
+        "open": open_,
+        "subject": claim.subject,
+        "predicate": claim.predicate,
+        "object": claim.object,
+        "polarity": claim.polarity,
+        "confidence": claim.confidence,
+        "valid_at": claim.valid_at,
+        "head_sha": claim.head_sha,
+    }
+def make_claims_indexer(
+    project: str | None = None,
+    *,
+    cfg: Config | None = None,
+    embedder: Embedder | None = None,
+    vector: QdrantStore | None = None,
+    store: ClaimsStore | None = None,
+) -> ClaimsIndexer:
+    """Construct a fully wired :class:`ClaimsIndexer` for ``project``.
+    All deps are optional so tests can inject fakes. Production callers
+    typically pass nothing and get the configured embedder + Qdrant
+    client + per-project SQLite path.
+    """
+    slug = project or detect_project_slug()
+    config = cfg or CONFIG.for_project(slug)
+    return ClaimsIndexer(
+        store=store or ClaimsStore(path=config.claims_db),
+        vector=vector or QdrantStore(),
+        embedder=embedder or get_embedder(),
+        collection=config.qdrant_claims,
+    )