npm - @simbimbo/memory-ocmemog - Versions diffs - 0.1.16 → 0.1.18 - Mend

@simbimbo/memory-ocmemog 0.1.16 → 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/CHANGELOG.md +72 -0
package/README.md +33 -1
package/docs/architecture/memory.md +89 -10
package/docs/release-checklist.md +5 -0
package/docs/usage.md +71 -1
package/index.ts +90 -6
package/ocmemog/doctor.py +23 -1
package/ocmemog/runtime/memory/api.py +103 -19
package/ocmemog/runtime/memory/conversation_state.py +8 -1
package/ocmemog/runtime/memory/embedding_engine.py +24 -0
package/ocmemog/runtime/memory/promote.py +183 -10
package/ocmemog/runtime/memory/retrieval.py +185 -16
package/ocmemog/runtime/memory/vector_index.py +79 -1
package/ocmemog/sidecar/app.py +339 -6
package/ocmemog/sidecar/compat.py +160 -2
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,77 @@
 # Changelog
+## Unreleased
+## 0.1.17 — 2026-03-26
+Promotion/governance observability, anti-cruft hardening, queue/runtime summary parity, and release validation recovery.
+### Highlights
+- surfaced agent-scoped auto-hydration policy and decision reasons in runtime and dedicated sidecar diagnostics
+- added request-level embedding execution diagnostics and promoted key embedding outcomes into the search execution path summary
+- added compact governance, reinforcement, and suppression rollups to `/memory/search` diagnostics, including per-bucket parity
+- added queue health, severity, invalid/retrying payload indicators, and doctor-style aliases to `runtimeSummary.queue`
+- added governance queue/review/audit/rollback/auto-resolve diagnostics plus normalized priority labels and concise explanations
+- added promotion decision explanations, verification summaries, quality summaries, and richer rejection reasons
+- activated anti-cruft promotion gating for low-confidence generic memories, including a distinct redundant-generic rejection path
+- repaired a malformed local edit in `promote.py` and revalidated the branch tip with full test coverage before release
+- full validation passed: `188 passed`
+### Highlights
+- improved lexical retrieval scoring to consider token overlap, ordered phrase overlap, and light prefix matching instead of relying on blunt substring-or-overlap behavior
+- kept the retrieval path bounded and hybrid by continuing to blend lexical, semantic, reinforcement, promotion, recency, and lane-aware signals
+- added lightweight `searchDiagnostics` to `/memory/search` so retrieval strategy, lane, bucket counts, result compaction, timing, and vector-search scan/prefilter details are visible in the API response
+- added a bounded lexical prefilter inside `vector_index.search_memory()` so semantic ranking can prefer lexically relevant candidates before cosine scoring without introducing ANN complexity
+- aligned README and architecture/usage docs with the actual shipped hybrid retrieval behavior
+- added regression coverage for partial-phrase lexical matches, sidecar search diagnostics, vector prefilter behavior, malformed queue-line recovery, bounded async retry behavior, and doctor visibility for retrying queue payloads
+- hardened async queue processing so malformed queue JSON is skipped/acknowledged instead of blocking later valid entries in the same queue file
+- added bounded retry tracking for valid queue payload failures so poison items are retried a small number of times and then dropped/acknowledged instead of blocking the queue forever
+- improved doctor queue health output so malformed queue lines and retrying poison items are reported separately with clearer hints and samples
+- added `runtimeSummary` to sidecar/runtime payloads so provider path, hash-fallback state, degraded/ready mode, and compatibility residue are explicit to operators
+- expanded `/memory/search` diagnostics with a request-level `execution_path` block so provider-configured, provider-skipped, local-fallback-expected, and route-exception-fallback behavior is explicit per request
+- added `reviewDiagnostics` to `/memory/governance/review/summary` so cache freshness, item count, kind breakdown, and active filters are explicit to operators
+- added an `explanation` block to `/memory/governance/review` items so per-item rationale and source/target status context are easier to render and review
+- added normalized governance `priority_label` values on review items and `priority_label_counts` in review summary diagnostics for simpler operator triage
+- added per-agent auto-hydration controls (`OCMEMOG_AUTO_HYDRATION_ALLOW_AGENT_IDS` / `OCMEMOG_AUTO_HYDRATION_DENY_AGENT_IDS`) so prompt-time continuity can be scoped by `ctx.agentId` without disabling global ingest/checkpoint behavior
+- surfaced the active auto-hydration agent policy in `runtimeSummary.auto_hydration` for easier operator verification and debugging
+- added explicit plugin-side hydration decision reasons so skips can be traced to global disable vs denylist vs allowlist mismatch
+- added `/memory/auto_hydration/policy` so operators can query the current agent-specific prompt-hydration decision from the sidecar
+- improved plugin hydration observability with structured skip/apply decision logs that include agent id, decision reason, and prepend sizes
+- added compact `governance_summary` payloads to retrieval results so search consumers can triage governance state without unpacking the full provenance structure
+- enriched `runtimeSummary` embedding observability with local embedding model and embedding path readiness details so provider vs local/simple fallback is clearer to operators
+- added request-level embedding execution diagnostics to vector/search responses so operators can tell whether provider embedding was attempted, whether local fallback ran, and which path actually produced the query embedding
+- made retrieval reinforcement scoring frequency-aware and exposed `reinforcement_count` in retrieval signals so repeated successful experience can influence ranking without unbounded growth
+- added light recency-aware reinforcement weighting and exposed `reinforcement_weighted_count` so newer successful experiences matter more than stale ones
+- added bounded negative reinforcement handling and exposed `reinforcement_negative_count` / `reinforcement_negative_penalty` so failed experience can depress ranking in an explainable way
+- added compact promotion decision explanations so distill/promote outcomes are easier to inspect and render
+- added compact promotion verification summaries so confidence/threshold semantics are easier to interpret consistently
+- enriched rejected promotion reasons so generic-destination low-confidence cases are easier to distinguish from ordinary below-threshold outcomes
+- added `quality_summary` to promotion decisions so low-value generic candidates are easier to identify as likely memory cruft (`drop` / high-noise-risk) while stronger specific memories are easier to keep
+- activated a first anti-cruft retention gate: low-confidence candidates that only resolve to generic `knowledge` are now rejected as `rejected_as_generic_cruft`
+- added a second anti-cruft distinction for low-confidence generic candidates that merely restate existing generic knowledge: `rejected_as_redundant_generic_cruft`
+- added a weak-specific ambiguity distinction so below-threshold candidates that fit a specific bucket only loosely are labeled `rejected_as_ambiguous_specific_memory` and surfaced for review more clearly
+- added compact reinforcement rollups to `/memory/search` diagnostics so operators can see visible and retrieval-side reinforcement totals, including per-bucket visible counts
+- extended retrieval-side reinforcement diagnostics with per-bucket totals for parity with other search/operator rollups
+- extended visible and retrieval-side reinforcement rollups with bounded negative/polarity totals so failed experience is inspectable too
+- promoted the key embedding outcome fields into `searchDiagnostics.execution_path` so request-level scanability is better without drilling into nested vector diagnostics
+- added compact queue health snapshots to `runtimeSummary` so operators can see queue depth, last run, processed totals, error counts, and worker status from normal sidecar payloads
+- extended `runtimeSummary.queue` with lightweight severity/hints so backlog/worker/error conditions are easier to judge from normal runtime payloads
+- added compact invalid-line and retrying-payload indicators to `runtimeSummary.queue` so normal runtime payloads can distinguish queue corruption from poison-item retry churn
+- added doctor-style queue aliases (`queue_depth`, `queue_backlog_severity`) to `runtimeSummary.queue` to reduce translation friction between runtime payloads and doctor output
+- added compact queue worker config issues to `runtimeSummary.queue` so invalid poll/batch settings surface in normal runtime payloads too
+- normalized runtime summary sub-blocks so `queue`, `embedding_path_summary`, and `auto_hydration` all expose a small shared operator shape (`enabled`, `status`, `issues`) where appropriate
+- added compact `queueDiagnostics` to `/memory/governance/queue` so operators can quickly see item counts plus bucket/kind/priority-label breakdowns
+- added compact `explanation` blocks to governance queue items so queue consumers get short rationale and target-reference context without unpacking raw fields
+- added normalized `priority_label` values to governance queue items so queue and review surfaces share the same urgency vocabulary
+- added compact `autoResolveDiagnostics` to `/memory/governance/auto_resolve` so operators can quickly see action totals plus reason/kind breakdowns and the active policy profile
+- added compact `auditDiagnostics` to `/memory/governance/audit` so operators can quickly see audit item totals plus event/status breakdowns
+- added compact `rollbackDiagnostics` to `/memory/governance/rollback` so operators can quickly see whether rollback succeeded and how the outcome was classified
+- reframed governance review actions as apply/dismiss and added `/memory/governance/review/auto_apply` so routine review handling does not depend on dashboard/user approval input
+- added `governance_rollup` to `/memory/search` diagnostics so search consumers can quickly see visible result status counts and needs-review totals
+- extended visible governance rollups with per-bucket breakdowns so search consumers can see where visible governance pressure is concentrated
+- added retrieval governance suppression counts so `/memory/search` diagnostics can report how many candidates were hidden as `superseded` or `duplicate`
+- extended retrieval governance suppression diagnostics with per-bucket breakdowns so search consumers can see where hidden-governance pressure is concentrated
 ## 0.1.16 — 2026-03-25
 Platform support doc clarification for Linux/Windows service guidance.

package/README.md CHANGED Viewed

@@ -3,11 +3,38 @@
 **ocmemog** is an advanced memory engine for OpenClaw that combines durable long-term memory, transcript-backed continuity, conversation hydration, checkpoint expansion, and pondering inside a sidecar-based plugin architecture.
 It is designed to go beyond simple memory search by providing:
-- **durable memory and semantic retrieval**
+- **durable memory and hybrid retrieval (lexical + semantic)**
+- **operator-visible search diagnostics for retrieval and vector-search behavior**
+- **bounded vector search with lightweight lexical prefiltering**
 - **lossless-style conversation continuity**
 - **checkpointing, branch-aware hydration, and turn expansion**
 - **transcript ingestion with anchored context recovery**
 - **pondering and reflection generation**
+- **durable queue behavior that skips malformed queued payloads, bounds poison-item retries, and exposes clearer queue health diagnostics**
+- **compact runtime summaries that make provider/fallback/degraded state explicit in sidecar responses, including local embedding model/path readiness**
+- **request-level embedding path diagnostics inside search/vector-search responses, with promoted top-level execution summaries**
+- **frequency-aware, recency-aware, and polarity-aware reinforcement signals in retrieval ranking**
+- **compact reinforcement rollups in search diagnostics, including per-bucket and negative/polarity parity**
+- **more consistent runtime summary sub-blocks across embedding, queue, and auto-hydration surfaces**
+- **compact queue health snapshots in runtime summaries, including severity, hints, invalid/retrying indicators, doctor-style aliases, and worker-config issues**
+- **request-level search execution diagnostics that show provider-skip vs local-fallback vs route-fallback behavior**
+- **governance review summary diagnostics for cache freshness and review-kind breakdowns**
+- **governance review item explanations that make duplicate/contradiction/supersession rationale easier to render**
+- **normalized governance priority labels for easier operator triage**
+- **a sidecar hydration-policy diagnostics route for agent-specific continuity debugging**
+- **compact governance summaries in retrieval results to bridge search and review workflows**
+- **promotion decision explanations, verification summaries, and quality summaries for better distill/promote operator clarity**
+- **an explicit anti-cruft quality signal so weak generic memories are easier to spot and avoid keeping long-term**
+- **active anti-cruft retention gates that reject low-confidence generic memories, especially redundant generic junk, while flagging weak specific-fit memories more explicitly for review**
+- **compact governance queue diagnostics for faster operator triage**
+- **governance review apply/dismiss + auto-apply flows that do not depend on dashboard approval input**
+- **governance queue item explanations that align queue and review surfaces**
+- **shared normalized priority labels across governance queue and review items**
+- **compact governance auto-resolve diagnostics for faster operator triage**
+- **compact governance audit diagnostics for faster operator triage**
+- **compact governance rollback diagnostics for faster operator triage**
+- **governance rollups in search diagnostics for faster operator triage, including per-bucket visible breakdowns**
+- **hidden-by-governance suppression counts in retrieval diagnostics, including per-bucket breakdowns**
 Architecture at a glance:
 - **OpenClaw plugin (`index.ts`)** handles tools and hook integration
@@ -136,6 +163,11 @@ Optional environment variables:
 - `OCMEMOG_SHUTDOWN_TIMING` (`true` enables shutdown timing logs; defaults to `true`)
 - `OCMEMOG_API_TOKEN` (optional; if set, requests must include `x-ocmemog-token` or `Authorization: Bearer ...`; OpenClaw plugin users should also set the plugin `config.token` field)
 - `OCMEMOG_AUTO_HYDRATION` (`true` to re-enable prompt-time continuity prepending; defaults to `false` as a safety guard until the host runtime is verified not to persist prepended context into session history)
+- `OCMEMOG_AUTO_HYDRATION_ALLOW_AGENT_IDS` (comma-separated `ctx.agentId` allowlist for prompt-time hydration; when set, only matching agents receive before-prompt hydration)
+- `OCMEMOG_AUTO_HYDRATION_DENY_AGENT_IDS` (comma-separated `ctx.agentId` denylist for prompt-time hydration; checked before the allowlist so specific agents can be blocked even when global hydration remains enabled)
+- `runtimeSummary.auto_hydration` now exposes the active auto-hydration policy so operators can verify agent scoping from sidecar/runtime payloads
+- plugin-side hydration gating now has explicit decision reasons (`disabled_globally`, `denied_by_agent_id`, `not_in_allowlist`, `allowed_by_allowlist`, `allowed_globally`) for clearer debugging/logging
+- plugin logs now record structured prompt-hydration decision context for both skipped and applied hydration events
 - `OCMEMOG_LAPTOP_MODE` (`auto` by default; on macOS battery power this slows watcher polling, reduces ingest batch size, and disables sentiment reinforcement unless explicitly overridden)
 - `OCMEMOG_LOCAL_LLM_BASE_URL` (default: `http://127.0.0.1:18080/v1`; local OpenAI-compatible text endpoint, e.g. llama.cpp)
 - `OCMEMOG_LOCAL_LLM_MODEL` (default: `qwen2.5-7b-instruct`; matches the active Qwen2.5-7B-Instruct GGUF runtime)

package/docs/architecture/memory.md CHANGED Viewed

@@ -30,22 +30,81 @@ The main SQLite database owns these tables:
 ## Retrieval flow
-The current sidecar behavior is simpler than brAIn's full memory architecture:
+The current sidecar retrieval path is a bounded hybrid ranker rather than a pure substring search:
 1. `/memory/search` calls `retrieval.retrieve_for_queries()`.
-2. Retrieval scans `knowledge`, `reflections`, `directives`, and `tasks` for substring matches.
-3. Result scoring combines:
-   - keyword hit: `1.0` on substring match
-   - reinforcement bonus: `reward_score * 0.5`
-   - confidence bonus: `promotion confidence * 0.3`
-4. If `knowledge` has no keyword hit, retrieval falls back to `vector_index.search_memory()`.
-5. The sidecar flattens the bucketed results into a plugin-friendly response.
+2. Each query fans into `retrieval.retrieve()` across the selected categories.
+3. Lexical ranking now combines:
+   - exact substring hit (`1.0` when the full query appears)
+   - token overlap ratio
+   - ordered phrase/sequence overlap
+   - light prefix matching for partial-word queries
+4. Semantic ranking runs through `vector_index.search_memory()` across the selected embedded categories.
+5. Final scoring blends:
+   - keyword score
+   - semantic score
+   - reinforcement history
+   - promotion confidence
+   - recency
+   - optional lane bonus when lane-aware metadata matches
+6. Superseded / duplicate memories are filtered out, contested memories are penalized, and the sidecar flattens the ranked bucketed results into a plugin-friendly response.
+7. The sidecar response now includes lightweight `searchDiagnostics` so operators can inspect the active retrieval strategy, lane selection, per-bucket counts, result compaction, elapsed time, vector-search scan/prefilter behavior, and request-level execution path (provider-configured/provider-skipped/local-fallback-expected/route-exception-fallback) without scraping logs.
+   - vector search diagnostics now also carry the actual embedding execution outcome for the request (provider attempted, local fallback used, winning path, embedding generated)
+   - the top-level execution-path summary now promotes the key embedding outcome fields for faster operator scanning
+8. Retrieval items now also carry a compact `governance_summary` so retrieval and governance surfaces share a simpler bridge for status/triage without forcing every consumer to parse the full governance/provenance structure.
+   - retrieval signals now also expose `reinforcement_count`, `reinforcement_weighted_count`, `reinforcement_negative_count`, and `reinforcement_negative_penalty`, and reinforcement weighting is now frequency-aware, lightly recency-aware, and polarity-aware instead of pure flat averaging
+9. `/memory/search` diagnostics now include a governance rollup over the visible results so search consumers can quickly see how governance state is affecting the returned set.
+   - this now includes both overall visible status counts and per-bucket visible rollups
+10. Retrieval diagnostics also track governance-suppressed candidates (`superseded` / `duplicate`) so the search response can explain what governance hid before the visible result set was assembled.
+11. Suppression diagnostics now include per-bucket breakdowns so operators can see which memory classes are carrying the most governance cleanup pressure.
+12. Search diagnostics now also include compact reinforcement rollups so operators can see how much visible retrieval weight is coming from repeated successful experience, overall and by bucket.
+    - retrieval-side reinforcement diagnostics now mirror that with their own per-bucket totals for parity with governance suppression reporting
+    - reinforcement rollups now also expose bounded negative/polarity totals so operators can see when failed experience is dragging visible results downward
 Operational limits:
-- Semantic fallback now rehydrates any embedded bucket (`knowledge`, `runbooks`, `lessons`) when there are no keyword hits.
+- Retrieval is still bounded to recent rows per category before ranking, so this is not a full-corpus search engine yet.
 - Default embeddings are local hash vectors (`OCMEMOG_EMBED_MODEL_LOCAL=simple`; legacy alias: `BRAIN_EMBED_MODEL_LOCAL`), which are deterministic but weak.
-- `runbooks`, `lessons`, `directives`, `reflections`, and `tasks` are now included in the default searchable categories and embedding index.
+- `runbooks`, `lessons`, `directives`, `reflections`, and `tasks` are included in the default searchable categories and embedding index.
+- Semantic ranking currently depends on the active embedding backend and the bounded candidate window in `vector_index.search_memory()`.
+- Vector search now supports a lightweight lexical prefilter over the bounded scan window before cosine ranking, which improves relevance without changing the no-ANN local-first design.
+Queue/async ingest behavior note:
+- the async ingest queue is append-only on disk and processed in bounded batches
+- malformed queue lines are skipped and acknowledged rather than blocking valid entries behind them
+- valid payload failures are retried in-queue with a bounded retry counter before eventual drop/ack to avoid permanent poison-pill blockage
+- operational visibility for these cases remains in queue stats / doctor health rather than crashing the sidecar, and doctor now distinguishes malformed queue damage from retrying poison items
+Promotion decisions now expose a compact explanation object so operator surfaces can render why a candidate was promoted or rejected, what threshold applied, and which bucket was selected.
+They now also expose a compact verification summary so confidence/threshold semantics are easier to interpret uniformly.
+Rejected promotions also now distinguish generic-destination low-confidence cases from ordinary below-threshold destination-specific failures.
+To combat long-term memory cruft, promotion decisions now also expose a compact `quality_summary`.
+That summary is intentionally simple and operator-oriented:
+- `quality`: low / medium / high
+- `keep_recommendation`: drop / review / keep
+- `noise_risk`: high / medium / low
+- `destination_specificity`: generic / specific
+- `margin`: confidence minus threshold
+Design intent:
+- weak generic memories should be visibly low-quality and easier to reject/prune later
+- more specific, higher-margin memories should be visibly safer to keep
+- this started as an explainability/control surface, but it now also drives a small active anti-cruft gate
+- it gives future automation a better input for “only good memories are remembered” without requiring a risky schema or policy rewrite first
+Current active anti-cruft rules:
+- if a candidate is below the promotion threshold **and** only resolves to the generic `knowledge` destination,
+  it is treated as likely cruft and rejected with `rejected_as_generic_cruft`
+- if that same low-confidence generic candidate is also textually redundant with existing generic knowledge,
+  it is classified more specifically as `rejected_as_redundant_generic_cruft`
+- if a candidate resolves to a more specific destination but still falls modestly below threshold, it is now called out as `rejected_as_ambiguous_specific_memory`
+- why these are the right first rules:
+  - generic low-confidence memories are among the easiest ways for memory stores to accumulate junk
+  - redundant generic memories are even worse because they increase clutter without increasing recall value
+  - weak specific-fit memories are not necessarily junk, but they should be made explicitly reviewable instead of being treated as cleanly trustworthy
+  - the rules are intentionally narrow to reduce surprise and avoid over-pruning while the quality system matures
 ## Write paths
@@ -84,6 +143,26 @@ Known caveat:
 ## Sidecar contract
+The sidecar exposes a compact runtime summary in route payloads so operators can quickly tell whether the sidecar is in ready/degraded mode, which embedding provider path is active, which local embedding model is configured, whether hash-embedding fallback is in effect, what the current queue health snapshot looks like, and how much compatibility residue remains.
+That queue snapshot now includes lightweight severity/hints so the normal runtime payload carries some operational judgment instead of raw counters only.
+It also now distinguishes invalid queue lines from retrying payloads, which brings a compact slice of doctor-style queue diagnosis into ordinary runtime payloads.
+To reduce translation friction with doctor output, the runtime queue snapshot now also carries doctor-style aliases such as `queue_depth` and `queue_backlog_severity`.
+It now also carries compact worker-config issue reporting so invalid poll/batch settings can surface in normal runtime payloads without a separate doctor run.
+As part of the runtime-summary consistency pass, the main operator-facing sub-blocks now expose a small shared shape (`enabled`, `status`, `issues`) where it fits, which makes the overall summary easier to consume uniformly.
+Governance review summary responses now also expose lightweight diagnostics so operators can tell whether they are seeing cached data, how many review items are present, and how the queue splits across review kinds without scraping the full list.
+Governance queue responses now also expose lightweight queue diagnostics so operators can quickly see item counts plus bucket/kind/priority-label breakdowns.
+Governance auto-resolve responses now also expose lightweight diagnostics so operators can quickly see action totals plus reason/kind breakdowns and the active policy profile.
+Governance audit responses now also expose lightweight diagnostics so operators can quickly see audit item totals plus event/status breakdowns.
+Governance rollback responses now also expose lightweight diagnostics so operators can quickly see whether rollback succeeded and how the outcome was classified.
+Governance queue items now also carry compact explanation blocks so queue surfaces and review surfaces are more aligned in how they present rationale.
+Queue items now also share the same normalized priority-label vocabulary as review items, reducing operator/UI translation work.
+Individual governance review items now also carry a compact explanation object so operator surfaces can render human-readable rationale and status context without reverse-engineering the raw review payload.
+The governance review flow is now framed as apply/dismiss plus optional auto-apply, rather than requiring a dashboard-bound human approval step for routine cases.
+Review items and review-summary diagnostics now also expose normalized priority labels so operator surfaces can reason about urgency without inventing their own bucket thresholds.
 The sidecar exposes:
 - `GET /healthz`

package/docs/release-checklist.md CHANGED Viewed

@@ -17,6 +17,9 @@ The release gate is now codified by:
 ## Validation
 - [ ] Install test deps for sidecar route tests: `python3 -m pip install -r requirements-test.txt`
 - [ ] `./scripts/ocmemog-release-check.sh`
+- [ ] If prompt-time hydration behavior changed, validate the plugin gating path too (for example `node --test tests/test_auto_hydration_agent_scope.ts`) so agent-scoped `before_prompt_build` controls are covered
+- [ ] If runtime/operator summary surfaces changed, validate the targeted runtime parity tests too (for example `tests/test_namespace_compat.py`) so `runtimeSummary` queue / embedding / auto-hydration blocks stay aligned
+- [ ] If promotion/retention behavior changed, validate targeted promotion tests (for example `tests/test_profile_buckets.py`) and verify docs still reflect current anti-cruft gates and quality signals, including redundant-generic and ambiguous-specific rejection behavior when applicable
 - [ ] Verify `tests/test_doctor.py` still passes for doctor health surfaces if you changed check coverage
 - [ ] Verify `reports/release-gate-proof.json` exists after a passing gate and documents:
   - live ingest/search/get/hydrate verification
@@ -34,6 +37,8 @@ GitHub CI runs the same release check command so local and CI validation remain
 - [ ] Verify optional prereq install path is documented correctly
 - [ ] Verify LaunchAgent load path still matches repo scripts
 - [ ] Verify sidecar health check passes after install
+- [ ] Verify any new plugin env controls are documented in README/usage/release notes (for example `OCMEMOG_AUTO_HYDRATION_ALLOW_AGENT_IDS` / `OCMEMOG_AUTO_HYDRATION_DENY_AGENT_IDS`)
+- [ ] Verify README/usage/release notes still describe the current operator/runtime surfaces (`runtimeSummary`, `searchDiagnostics`, queue snapshot, hydration policy route) accurately after release-bound changes
 ## Public artifacts
 - [ ] Push `main`

package/docs/usage.md CHANGED Viewed

@@ -81,6 +81,13 @@ Default state location in this repo is `.ocmemog-state/`.
 On shutdown, set `OCMEMOG_SHUTDOWN_DRAIN_QUEUE=true` to synchronously flush queued ingest entries before exit. This is useful for short-running deployments and tests that expect strong delivery guarantees.
+Queue behavior notes:
+- malformed queue lines are now treated as durable queue errors and skipped/acknowledged so a single bad payload does not block later valid work
+- valid payload failures are retried with a bounded in-queue retry marker (`_ocmemog_retry_count`) instead of blocking forever on the first poison item
+- `OCMEMOG_INGEST_MAX_RETRIES` controls how many failed attempts a queued payload gets before it is dropped and recorded as a retry-exhausted error
+- runtime queue stats keep the last queue parse/retry error visible via `QUEUE_STATS["last_error"]`
+- `ocmemog-doctor` queue health now distinguishes invalid queue lines from retrying payloads so operators can tell parsing damage apart from poison-item retries
 ## Plugin API
 Health:
@@ -191,12 +198,75 @@ Notes:
 - Valid sidecar categories today are `knowledge`, `reflections`, `directives`, `tasks`, `runbooks`, and `lessons`.
 - `/memory/get` currently expects a `table:id` reference.
 - Runtime degradation is reported in every sidecar response.
+- Sidecar responses now also include `runtimeSummary`, a compact operator-facing summary of runtime mode, embedding provider, local embedding model, embedding path readiness/fallback state, queue health snapshot, shim surface count, and missing dependency count.
+- `runtimeSummary.queue` now includes lightweight operational judgment too: `severity` (`ok|warn|high`) plus short `hints` for backlog/worker/error situations.
+- `runtimeSummary.queue` now also distinguishes `invalid_lines`, `retrying_lines`, and `max_retry_seen`, so normal runtime payloads can hint at queue corruption vs poison-item retry churn without a full doctor pass.
+- For parity with doctor-style outputs, `runtimeSummary.queue` now also exposes `queue_depth` and `queue_backlog_severity` aliases alongside the compact fields.
+- `runtimeSummary.queue.config_issues` now surfaces compact worker-config validation problems (for example invalid poll interval or batch size), plus a hint when config is invalid.
+- For consistency across runtime summary sub-blocks, `queue`, `embedding_path_summary`, and `auto_hydration` now all expose a small common shape: `enabled`, `status`, and `issues`.
+- Prompt-time auto-hydration can now be scoped per OpenClaw agent via plugin env vars:
+  - `OCMEMOG_AUTO_HYDRATION_ALLOW_AGENT_IDS=agent-a,agent-b`
+  - `OCMEMOG_AUTO_HYDRATION_DENY_AGENT_IDS=agent-x`
+  - ingest/checkpoint hooks remain global; only `before_prompt_build` hydration is agent-scoped
+  - the active auto-hydration policy is surfaced in `runtimeSummary.auto_hydration`
+  - plugin-side decision reasons now distinguish `disabled_globally`, `denied_by_agent_id`, `not_in_allowlist`, `allowed_by_allowlist`, and `allowed_globally` for easier debugging
+  - plugin logs now include structured decision context for both skipped and applied prompt hydration, including agent id, reason, and prepend sizes
+- `/memory/search` now also returns `searchDiagnostics` with lightweight operator-facing retrieval metadata such as strategy, lane, bucket counts, result counts, query token count, elapsed time, vector-search diagnostics (`scan_limit`, `prefilter_limit`, candidate rows, fallback usage), and an `execution_path` block that clarifies provider-configured vs provider-skipped vs local-fallback-expected vs route-exception-fallback behavior.
+- `searchDiagnostics.execution_path` now also promotes key embedding outcome fields (`provider_attempted`, `embedding_generated`, `embedding_path_used`, `local_fallback_used`) so the top-level request summary is easier to scan without drilling into nested vector diagnostics.
+- `searchDiagnostics.vector_search.embedding` now carries per-request embedding execution details such as whether a provider was attempted, whether local fallback was actually used, what path won (`provider`, `local_simple`, `local_model`), and whether an embedding was generated at all.
+- `searchDiagnostics` now also includes `governance_rollup` so operators can quickly see visible result status counts, how many returned items still need governance review, and per-bucket visible rollups for categories such as `knowledge`, `runbooks`, or `lessons`.
+- `searchDiagnostics.retrieval_governance` now reports how many candidates were hidden before return because governance marked them `superseded` or `duplicate`, including per-bucket breakdowns such as `knowledge`, `runbooks`, or `lessons`.
+- `searchDiagnostics.reinforcement_rollup` and `searchDiagnostics.retrieval_reinforcement` now summarize visible reinforcement pressure and retrieval-side reinforcement totals, including per-bucket visible and retrieval-side reinforcement counts.
+- Reinforcement rollups now also include negative/polarity totals (`negative_reinforcement_result_count`, `total_negative_penalty`) so operators can see when failed experience is actively depressing the visible result set.
+- Retrieval results now include a compact `governance_summary` alongside the full governance payload so dashboards/operators can quickly see status, canonical/relationship references, contradiction count, and `needs_review` without unpacking the full provenance structure.
+- `/memory/governance/review/summary` now returns `reviewDiagnostics` so operators can see cache hit/freshness, item count, kind breakdown, and active filters without inferring from the raw item list.
+- `/memory/governance/review` items now include an `explanation` block with a short human-facing rationale plus source/target memory status, so dashboards and operators do not have to reconstruct meaning from raw fields alone.
+- Governance review items now also include a normalized `priority_label` (`none|low|medium|high|critical`), and review summary diagnostics include `priority_label_counts` for quick operator triage.
+- Governance review actions are now modeled as `apply` / `dismiss` rather than human-approval language, and `/memory/governance/review/auto_apply` can apply current review items directly without relying on dashboard/user approval input.
+- `/memory/governance/queue` now returns `queueDiagnostics` so operators can see item count plus bucket/kind/priority-label breakdowns without scanning the full queue manually.
+- Governance queue items now also include an `explanation` block with short human-facing rationale and target-reference context, so queue consumers do not have to reconstruct meaning from raw kind/priority fields alone.
+- Governance queue items now also carry the same normalized `priority_label` (`none|low|medium|high|critical`) used by governance review items.
+- `/memory/governance/auto_resolve` now returns `autoResolveDiagnostics` so operators can see action counts plus reason/kind breakdowns and the active policy profile without unpacking the full action list manually.
+- `/memory/governance/audit` now returns `auditDiagnostics` so operators can quickly see audit item counts plus event/status breakdowns without scanning the raw log-derived entries manually.
+- `/memory/governance/rollback` now returns `rollbackDiagnostics` so operators can quickly see whether rollback succeeded and how the outcome was classified.
+- `/memory/auto_hydration/policy` accepts an `agent_id` and returns the current prompt-time hydration decision (`allowed`, `reason`, allowlist, denylist, and scoping state) so agent-specific continuity policy can be debugged from the sidecar.
 ## What is safe to rely on
 - `store.init_db()` creates the local schema automatically
+- promotion decisions now return an `explanation` block describing why a candidate was promoted or rejected, what threshold applied, and which destination bucket was chosen
+- promotion decisions now also return a compact `verification_summary` (`status`, `reason`, `confidence`, `threshold`, `margin`) so verification/confidence semantics are easier to interpret consistently
+- rejected promotions now use slightly richer reasons, distinguishing plain below-threshold outcomes from below-threshold generic-destination cases
+- promotion decisions now also return a `quality_summary` designed specifically to fight long-term memory cruft:
+  - `quality` (`low|medium|high`)
+  - `keep_recommendation` (`drop|review|keep`)
+  - `noise_risk` (`high|medium|low`)
+  - `destination_specificity` (`generic|specific`)
+  - `margin` (confidence minus threshold)
+- practical meaning:
+  - low-confidence generic `knowledge` candidates are now explicitly labeled as high-risk noise and recommended for drop
+  - stronger, more specific promoted memories are labeled as keep-worthy
+  - this does not replace governance/review yet, but it gives operator surfaces and future automation a clearer signal for “remember this” vs “don’t keep this around”
+- the anti-cruft gate is now partially active, not just advisory:
+  - low-confidence candidates that only resolve to the generic `knowledge` destination are rejected as likely cruft instead of being treated like ordinary generic rejects
+  - this shows up explicitly as `rejected_as_generic_cruft` in both `verification_summary.reason` and `explanation.reason`
+  - if the low-confidence generic candidate is also textually redundant with existing generic knowledge, it is now flagged more specifically as `rejected_as_redundant_generic_cruft`
+  - `quality_summary.redundant_generic=true` marks that stronger duplicate-ish generic junk case
+  - low-confidence candidates that do resolve to a more specific bucket can now still be called out as weak/ambiguous specific memories instead of being lumped into an undifferentiated threshold failure
+  - `quality_summary.ambiguous_specific=true` plus `rejected_as_ambiguous_specific_memory` mark these cases for review rather than treating them like generic junk
+  - intent: weak generic memories should fail earlier so they do not accumulate as low-value long-term memory objects, especially when they merely restate already-kept generic knowledge, while weak specific-fit memories stay visible as review-worthy instead of silently blending into ordinary rejects
 - `retrieval.retrieve_for_queries()` is the main sidecar search path
-- `vector_index.search_memory()` provides a semantic fallback over `knowledge`, `runbooks`, `lessons`, `directives`, `reflections`, and `tasks` when keyword retrieval misses
+- search is hybrid-ranked, not substring-only:
+  - lexical scoring blends exact match, token overlap, ordered phrase overlap, and light prefix matching
+  - semantic scoring comes from `vector_index.search_memory()` across the selected embedded categories
+  - final ranking also considers reinforcement history, promotion confidence, recency, and optional lane bonuses
+  - reinforcement is now frequency-aware rather than flat-average only; repeated successful experiences increase strength up to a bounded cap and expose `reinforcement_count` in retrieval signals
+  - reinforcement is also recency-aware: newer successful experiences count more than stale ones, and retrieval signals now expose `reinforcement_weighted_count`
+  - negative reinforcement is now modeled explicitly with bounded penalties; retrieval signals expose `reinforcement_negative_count` and `reinforcement_negative_penalty`
+- `vector_index.search_memory()` remains a bounded semantic scan rather than a full ANN index
+  - it now supports a lightweight lexical prefilter before cosine ranking
+  - `OCMEMOG_SEARCH_VECTOR_SCAN_LIMIT` bounds the candidate window
+  - `OCMEMOG_SEARCH_VECTOR_PREFILTER_LIMIT` bounds the lexically-biased shortlist used before cosine scoring
 - `probe_runtime()` exposes missing shim replacements and optional embedding warnings
 ## What is not safe to rely on yet

package/index.ts CHANGED Viewed

@@ -12,9 +12,6 @@ type PluginConfig = {
   token?: string;
 };
-const AUTO_HYDRATION_ENABLED = ["1", "true", "yes"].includes(
-  String(process.env.OCMEMOG_AUTO_HYDRATION ?? "false").trim().toLowerCase(),
-);
 const DURABLE_OUTBOX_ENABLED = !["0", "false", "no"].includes(
   String(process.env.OCMEMOG_DURABLE_OUTBOX ?? "true").trim().toLowerCase(),
 );
@@ -521,6 +518,83 @@ function buildTurnMetadata(message: unknown, ctx: { agentId?: string; sessionKey
   };
 }
+function autoHydrationEnabled(): boolean {
+  return ["1", "true", "yes"].includes(String(process.env.OCMEMOG_AUTO_HYDRATION ?? "false").trim().toLowerCase());
+}
+function parseAgentIdList(raw: string | undefined): string[] {
+  return String(raw ?? "")
+    .split(",")
+    .map((value) => value.trim())
+    .filter(Boolean);
+}
+export function getAutoHydrationDecision(agentId?: string): {
+  enabled: boolean;
+  allowed: boolean;
+  reason:
+    | 'disabled_globally'
+    | 'denied_by_agent_id'
+    | 'not_in_allowlist'
+    | 'allowed_by_allowlist'
+    | 'allowed_globally';
+  agentId?: string;
+  allowAgentIds: string[];
+  denyAgentIds: string[];
+} {
+  const normalized = String(agentId ?? '').trim() || undefined;
+  const allowAgentIds = parseAgentIdList(process.env.OCMEMOG_AUTO_HYDRATION_ALLOW_AGENT_IDS);
+  const denyAgentIds = parseAgentIdList(process.env.OCMEMOG_AUTO_HYDRATION_DENY_AGENT_IDS);
+  if (!autoHydrationEnabled()) {
+    return {
+      enabled: false,
+      allowed: false,
+      reason: 'disabled_globally',
+      agentId: normalized,
+      allowAgentIds,
+      denyAgentIds,
+    };
+  }
+  if (normalized && denyAgentIds.includes(normalized)) {
+    return {
+      enabled: true,
+      allowed: false,
+      reason: 'denied_by_agent_id',
+      agentId: normalized,
+      allowAgentIds,
+      denyAgentIds,
+    };
+  }
+  if (allowAgentIds.length > 0) {
+    const allowed = Boolean(normalized && allowAgentIds.includes(normalized));
+    return {
+      enabled: true,
+      allowed,
+      reason: allowed ? 'allowed_by_allowlist' : 'not_in_allowlist',
+      agentId: normalized,
+      allowAgentIds,
+      denyAgentIds,
+    };
+  }
+  return {
+    enabled: true,
+    allowed: true,
+    reason: 'allowed_globally',
+    agentId: normalized,
+    allowAgentIds,
+    denyAgentIds,
+  };
+}
+export function formatAutoHydrationDecisionLog(decision: ReturnType<typeof getAutoHydrationDecision>): string {
+  const agent = decision.agentId ?? '<none>';
+  return `agent=${agent} allowed=${String(decision.allowed)} reason=${decision.reason} allow_agents=${decision.allowAgentIds.join('|') || '<all>'} deny_agents=${decision.denyAgentIds.join('|') || '<none>'}`;
+}
+export function shouldAutoHydrateForAgent(agentId?: string): boolean {
+  return getAutoHydrationDecision(agentId).allowed;
+}
 function registerAutomaticContinuityHooks(api: OpenClawPluginApi, config: PluginConfig) {
   void flushOutbox(api, config).catch((error) => {
     api.logger.warn(`ocmemog durable outbox startup flush failed: ${error instanceof Error ? error.message : String(error)}`);
@@ -562,10 +636,20 @@ function registerAutomaticContinuityHooks(api: OpenClawPluginApi, config: Plugin
   // failures if a host runtime persists prepended context into transcript history.
   // Keep the memory backend and sidecar tools active, but only prepend continuity
   // when explicitly enabled and after the host runtime has been validated.
-  api.logger.info(`ocmemog auto hydration env raw=${String(process.env.OCMEMOG_AUTO_HYDRATION ?? '<unset>')} computed=${String(AUTO_HYDRATION_ENABLED)}`);
-  if (AUTO_HYDRATION_ENABLED) {
+  const allowAgentIds = parseAgentIdList(process.env.OCMEMOG_AUTO_HYDRATION_ALLOW_AGENT_IDS);
+  const denyAgentIds = parseAgentIdList(process.env.OCMEMOG_AUTO_HYDRATION_DENY_AGENT_IDS);
+  const hydrationEnabled = autoHydrationEnabled();
+  api.logger.info(
+    `ocmemog auto hydration env raw=${String(process.env.OCMEMOG_AUTO_HYDRATION ?? '<unset>')} computed=${String(hydrationEnabled)} allow_agents=${allowAgentIds.join('|') || '<all>'} deny_agents=${denyAgentIds.join('|') || '<none>'}`,
+  );
+  if (hydrationEnabled) {
     api.on("before_prompt_build", async (event, ctx) => {
       try {
+        const hydrationDecision = getAutoHydrationDecision(ctx.agentId);
+        if (!hydrationDecision.allowed) {
+          api.logger.info(`ocmemog auto hydration skipped ${formatAutoHydrationDecisionLog(hydrationDecision)}`);
+          return;
+        }
         const scope = resolveHydrationScope(event.messages ?? [], ctx);
         if (!scope.session_id && !scope.thread_id && !scope.conversation_id) {
           return;
@@ -579,7 +663,7 @@ function registerAutomaticContinuityHooks(api: OpenClawPluginApi, config: Plugin
         const continuityContext = buildHydrationContext(payload);
         const prependContext = [briefContext, continuityContext].filter(Boolean).join("\n\n");
         api.logger.info(
-          `ocmemog hydration prepend sizes brief=${briefContext.length} continuity=${continuityContext.length} combined=${prependContext.length}`,
+          `ocmemog auto hydration applied ${formatAutoHydrationDecisionLog(hydrationDecision)} brief=${briefContext.length} continuity=${continuityContext.length} combined=${prependContext.length}`,
         );
         if (!prependContext) {
           return;

package/ocmemog/doctor.py CHANGED Viewed

@@ -412,14 +412,28 @@ def _run_queue_health(_: None) -> CheckResult:
         invalid = 0
         total = 0
+        retrying = 0
+        max_retry_seen = 0
         invalid_samples: list[dict[str, Any]] = []
+        retry_samples: list[dict[str, Any]] = []
         for raw_line in queue_path.read_text(encoding="utf-8").splitlines():
             line = raw_line.strip()
             if not line:
                 continue
             total += 1
             try:
-                json.loads(line)
+                payload = json.loads(line)
+                if isinstance(payload, dict):
+                    retry_count = int(payload.get("_ocmemog_retry_count", 0) or 0)
+                    if retry_count > 0:
+                        retrying += 1
+                        max_retry_seen = max(max_retry_seen, retry_count)
+                        if len(retry_samples) < 3:
+                            retry_samples.append({
+                                "line_no": total,
+                                "retry_count": retry_count,
+                                "kind": str(payload.get("kind") or payload.get("_ocmemog_task") or ""),
+                            })
             except Exception:
                 invalid += 1
                 if len(invalid_samples) < 3:
@@ -430,6 +444,9 @@ def _run_queue_health(_: None) -> CheckResult:
         if invalid:
             status = "warn"
             messages.append(f"Queue has {invalid} invalid line(s).")
+        if retrying:
+            status = "warn"
+            messages.append(f"Queue has {retrying} retrying payload(s) (max retry count {max_retry_seen}).")
         if depth > 25:
             status = "warn"
             messages.append(f"Queue backlog is elevated ({depth}).")
@@ -448,6 +465,8 @@ def _run_queue_health(_: None) -> CheckResult:
         hints: list[str] = []
         if invalid > 0:
             hints.append("Run --fix repair-queue to drop invalid queue entries.")
+        if retrying > 0:
+            hints.append("Inspect the queue retrying payloads; repeated retries usually indicate a poison item or downstream ingest/postprocess failure.")
         if depth > 0 and not worker_enabled:
             hints.append("Enable OCMEMOG_INGEST_ASYNC_WORKER or flush with POST /memory/ingest_flush.")
         if depth > 1000:
@@ -479,6 +498,8 @@ def _run_queue_health(_: None) -> CheckResult:
             "queue_depth": depth,
             "queue_path": str(queue_path),
             "invalid_lines": invalid,
+            "retrying_lines": retrying,
+            "max_retry_seen": max_retry_seen,
             "lines_seen": total,
             "stats": stats,
             "queue_bytes": queue_size,
@@ -487,6 +508,7 @@ def _run_queue_health(_: None) -> CheckResult:
             "queue_worker_batch_max": worker_batch_max,
             "queue_config_issues": queue_config,
             "invalid_payload_samples": invalid_samples,
+            "retrying_payload_samples": retry_samples,
             "ingest_worker_running": bool(app._INGEST_WORKER_THREAD and app._INGEST_WORKER_THREAD.is_alive()),
             "queue_backlog_severity": backlog_severity,
             "queue_hints": hints,