npm - @shadowforge0/aquifer-memory - Versions diffs - 1.2.1 → 1.5.8 - Mend

@shadowforge0/aquifer-memory 1.2.1 → 1.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/README.md +8 -9
package/consumers/cli.js +11 -1
package/consumers/default/index.js +17 -4
package/consumers/mcp.js +21 -0
package/consumers/miranda/index.js +15 -4
package/consumers/miranda/profile.json +145 -0
package/consumers/miranda/recall-format.js +5 -3
package/consumers/miranda/render-daily-md.js +186 -0
package/consumers/shared/config.js +8 -0
package/consumers/shared/factory.js +2 -1
package/consumers/shared/llm.js +1 -1
package/consumers/shared/recall-format.js +21 -1
package/core/aquifer.js +693 -87
package/core/artifacts.js +174 -0
package/core/bundles.js +400 -0
package/core/consolidation.js +340 -0
package/core/decisions.js +164 -0
package/core/entity-state.js +483 -0
package/core/errors.js +97 -0
package/core/handoff.js +153 -0
package/core/insights.js +499 -0
package/core/mcp-manifest.js +131 -0
package/core/narratives.js +212 -0
package/core/profiles.js +171 -0
package/core/state.js +163 -0
package/core/storage.js +82 -5
package/core/timeline.js +152 -0
package/index.js +14 -0
package/package.json +1 -1
package/pipeline/extract-state-changes.js +205 -0
package/schema/001-base.sql +186 -16
package/schema/002-entities.sql +35 -1
package/schema/004-completion.sql +391 -0
package/schema/005-entity-state-history.sql +87 -0
package/schema/006-insights.sql +138 -0
package/scripts/diagnose-fts-zh.js +37 -4
package/scripts/drop-entity-state-history.sql +17 -0
package/scripts/drop-insights.sql +12 -0
package/scripts/extract-insights-from-recent-sessions.js +315 -0
package/scripts/find-dburl-hints.js +29 -0
package/scripts/queries.json +45 -0
package/scripts/retro-recall-bench.js +409 -0
package/scripts/sample-bench-queries.sql +75 -0

package/schema/004-completion.sql ADDED Viewed

@@ -0,0 +1,391 @@
+-- 004-completion.sql — cross-session completion schema (P1 foundation)
+--
+-- Adds the minimal DDL needed for the aquifer-completion capability surface:
+--   * shared set_updated_at() trigger function (reused by narratives, consumer_profiles,
+--     and future completion tables)
+--   * sessions.consolidation_phases JSONB (per-phase state map; see consolidation
+--     orchestration spec)
+--   * narratives table — cross-session state snapshot with supersede chain
+--   * consumer_profiles table — consumer schema registry with composite primary key
+--     (tenant_id, consumer_id, version) for future multi-tenant safety
+--
+-- All identifiers stay parameterised on ${schema} so P4 schema rename
+-- (miranda → aquifer) is a one-line config change rather than a DDL rewrite.
+-- Ensure pg_trgm available (used by existing migrations; re-declared for independent
+-- run safety).
+CREATE EXTENSION IF NOT EXISTS pg_trgm;
+-- Shared trigger: bump updated_at on row modification.
+CREATE OR REPLACE FUNCTION ${schema}.set_updated_at()
+RETURNS trigger
+LANGUAGE plpgsql
+AS $$
+BEGIN
+  NEW.updated_at := now();
+  RETURN NEW;
+END;
+$$;
+-- sessions.consolidation_phases: per-phase state map keyed by phase name.
+-- Shape (documented in spec, enforced at application layer):
+--   {
+--     "<phase>": {
+--       "status": "pending|claimed|running|succeeded|failed|skipped",
+--       "attempts": int,
+--       "idempotencyKey": string?, "claimToken": string?, "workerId": string?,
+--       "startedAt": iso?, "finishedAt": iso?, "retryAfter": iso?,
+--       "errorCode": string?, "errorMessage": string?,
+--       "outputRef": { ... }?
+--     }
+--   }
+ALTER TABLE ${schema}.sessions
+  ADD COLUMN IF NOT EXISTS consolidation_phases JSONB NOT NULL DEFAULT '{}'::jsonb;
+-- narratives: cross-session state snapshots with scope-based addressing and
+-- supersede chain. Only one 'active' row per (tenant, agent, scope, scope_key).
+CREATE TABLE IF NOT EXISTS ${schema}.narratives (
+  id                          BIGSERIAL    PRIMARY KEY,
+  tenant_id                   TEXT         NOT NULL DEFAULT 'default',
+  session_row_id              BIGINT       REFERENCES ${schema}.sessions(id) ON DELETE SET NULL,
+  source_session_id           TEXT,
+  agent_id                    TEXT         NOT NULL DEFAULT 'main',
+  consumer_profile_id         TEXT         NOT NULL,
+  consumer_profile_version    INT          NOT NULL,
+  consumer_schema_hash        TEXT         NOT NULL,
+  idempotency_key             TEXT         UNIQUE,
+  scope                       TEXT         NOT NULL DEFAULT 'agent'
+    CHECK (scope IN ('agent', 'workspace', 'project', 'custom')),
+  scope_key                   TEXT         NOT NULL,
+  text                        TEXT         NOT NULL,
+  status                      TEXT         NOT NULL DEFAULT 'active'
+    CHECK (status IN ('active', 'archived', 'superseded')),
+  based_on_fact_ids           BIGINT[]     NOT NULL DEFAULT '{}',
+  metadata                    JSONB        NOT NULL DEFAULT '{}'::jsonb,
+  superseded_by_narrative_id  BIGINT       REFERENCES ${schema}.narratives(id) ON DELETE SET NULL,
+  effective_at                TIMESTAMPTZ  NOT NULL DEFAULT now(),
+  search_tsv                  TSVECTOR,
+  search_text                 TEXT,
+  created_at                  TIMESTAMPTZ  NOT NULL DEFAULT now(),
+  updated_at                  TIMESTAMPTZ  NOT NULL DEFAULT now()
+);
+-- Only one active narrative per (tenant, agent, scope, scope_key).
+CREATE UNIQUE INDEX IF NOT EXISTS idx_narratives_active_scope
+  ON ${schema}.narratives (tenant_id, agent_id, scope, scope_key)
+  WHERE status = 'active';
+CREATE INDEX IF NOT EXISTS idx_narratives_effective_at
+  ON ${schema}.narratives (tenant_id, agent_id, effective_at DESC);
+CREATE INDEX IF NOT EXISTS idx_narratives_search_tsv
+  ON ${schema}.narratives USING GIN (search_tsv);
+CREATE INDEX IF NOT EXISTS idx_narratives_search_text_trgm
+  ON ${schema}.narratives USING GIN (search_text gin_trgm_ops);
+CREATE OR REPLACE FUNCTION ${schema}.narratives_search_tsv_update()
+RETURNS trigger
+LANGUAGE plpgsql
+AS $$
+BEGIN
+  NEW.search_text := COALESCE(NEW.text, '') || ' ' || COALESCE(NEW.metadata::text, '');
+  IF EXISTS (SELECT 1 FROM pg_ts_config WHERE cfgname = 'zhcfg') THEN
+    NEW.search_tsv := setweight(to_tsvector('zhcfg', COALESCE(NEW.text, '')), 'A');
+  ELSE
+    NEW.search_tsv := setweight(to_tsvector('simple', COALESCE(NEW.text, '')), 'A');
+  END IF;
+  RETURN NEW;
+END;
+$$;
+DROP TRIGGER IF EXISTS trg_narratives_search_tsv ON ${schema}.narratives;
+CREATE TRIGGER trg_narratives_search_tsv
+  BEFORE INSERT OR UPDATE OF text, metadata
+  ON ${schema}.narratives
+  FOR EACH ROW
+  EXECUTE FUNCTION ${schema}.narratives_search_tsv_update();
+DROP TRIGGER IF EXISTS trg_narratives_updated_at ON ${schema}.narratives;
+CREATE TRIGGER trg_narratives_updated_at
+  BEFORE UPDATE ON ${schema}.narratives
+  FOR EACH ROW
+  EXECUTE FUNCTION ${schema}.set_updated_at();
+-- consumer_profiles: registry for consumer output contracts.
+-- Composite primary key (tenant_id, consumer_id, version) future-proofs multi-tenant.
+-- profile_hash UNIQUE per (consumer_id, version) catches accidental hash drift within
+-- a consumer version.
+CREATE TABLE IF NOT EXISTS ${schema}.consumer_profiles (
+  tenant_id       TEXT         NOT NULL DEFAULT 'default',
+  consumer_id     TEXT         NOT NULL,
+  version         INT          NOT NULL,
+  profile_hash    TEXT         NOT NULL,
+  profile_json    JSONB        NOT NULL,
+  loaded_at       TIMESTAMPTZ  NOT NULL DEFAULT now(),
+  deprecated_at   TIMESTAMPTZ,
+  created_at      TIMESTAMPTZ  NOT NULL DEFAULT now(),
+  updated_at      TIMESTAMPTZ  NOT NULL DEFAULT now(),
+  PRIMARY KEY (tenant_id, consumer_id, version),
+  UNIQUE (consumer_id, version, profile_hash)
+);
+CREATE INDEX IF NOT EXISTS idx_consumer_profiles_active
+  ON ${schema}.consumer_profiles (tenant_id, consumer_id, version DESC)
+  WHERE deprecated_at IS NULL;
+DROP TRIGGER IF EXISTS trg_consumer_profiles_updated_at ON ${schema}.consumer_profiles;
+CREATE TRIGGER trg_consumer_profiles_updated_at
+  BEFORE UPDATE ON ${schema}.consumer_profiles
+  FOR EACH ROW
+  EXECUTE FUNCTION ${schema}.set_updated_at();
+-- timeline_events: append-only event log keyed by (tenant, agent, occurred_at).
+-- category vocabulary is consumer-owned (focus/todo/mood/handoff/narrative/cli
+-- for Miranda default), event shape is strict core. idempotency_key UNIQUE
+-- across the table to make caller-driven dedupe safe.
+CREATE TABLE IF NOT EXISTS ${schema}.timeline_events (
+  id                          BIGSERIAL    PRIMARY KEY,
+  tenant_id                   TEXT         NOT NULL DEFAULT 'default',
+  session_row_id              BIGINT       REFERENCES ${schema}.sessions(id) ON DELETE SET NULL,
+  source_session_id           TEXT,
+  agent_id                    TEXT         NOT NULL DEFAULT 'main',
+  consumer_profile_id         TEXT         NOT NULL,
+  consumer_profile_version    INT          NOT NULL,
+  consumer_schema_hash        TEXT         NOT NULL,
+  idempotency_key             TEXT         UNIQUE,
+  occurred_at                 TIMESTAMPTZ  NOT NULL,
+  source                      TEXT         NOT NULL,
+  session_ref                 TEXT,
+  category                    TEXT         NOT NULL,
+  text                        TEXT         NOT NULL,
+  metadata                    JSONB        NOT NULL DEFAULT '{}'::jsonb,
+  search_tsv                  TSVECTOR,
+  search_text                 TEXT,
+  created_at                  TIMESTAMPTZ  NOT NULL DEFAULT now(),
+  updated_at                  TIMESTAMPTZ  NOT NULL DEFAULT now()
+);
+CREATE INDEX IF NOT EXISTS idx_timeline_events_occurred_at
+  ON ${schema}.timeline_events (tenant_id, agent_id, occurred_at DESC);
+CREATE INDEX IF NOT EXISTS idx_timeline_events_category
+  ON ${schema}.timeline_events (tenant_id, agent_id, category, occurred_at DESC);
+CREATE INDEX IF NOT EXISTS idx_timeline_events_search_tsv
+  ON ${schema}.timeline_events USING GIN (search_tsv);
+CREATE INDEX IF NOT EXISTS idx_timeline_events_search_text_trgm
+  ON ${schema}.timeline_events USING GIN (search_text gin_trgm_ops);
+CREATE OR REPLACE FUNCTION ${schema}.timeline_events_search_tsv_update()
+RETURNS trigger
+LANGUAGE plpgsql
+AS $$
+BEGIN
+  NEW.search_text :=
+    COALESCE(NEW.category, '') || ' ' ||
+    COALESCE(NEW.text, '') || ' ' ||
+    COALESCE(NEW.metadata::text, '');
+  IF EXISTS (SELECT 1 FROM pg_ts_config WHERE cfgname = 'zhcfg') THEN
+    NEW.search_tsv :=
+      setweight(to_tsvector('zhcfg', COALESCE(NEW.category, '')), 'B') ||
+      setweight(to_tsvector('zhcfg', COALESCE(NEW.text, '')), 'A');
+  ELSE
+    NEW.search_tsv :=
+      setweight(to_tsvector('simple', COALESCE(NEW.category, '')), 'B') ||
+      setweight(to_tsvector('simple', COALESCE(NEW.text, '')), 'A');
+  END IF;
+  RETURN NEW;
+END;
+$$;
+DROP TRIGGER IF EXISTS trg_timeline_events_search_tsv ON ${schema}.timeline_events;
+CREATE TRIGGER trg_timeline_events_search_tsv
+  BEFORE INSERT OR UPDATE OF category, text, metadata
+  ON ${schema}.timeline_events
+  FOR EACH ROW
+  EXECUTE FUNCTION ${schema}.timeline_events_search_tsv_update();
+DROP TRIGGER IF EXISTS trg_timeline_events_updated_at ON ${schema}.timeline_events;
+CREATE TRIGGER trg_timeline_events_updated_at
+  BEFORE UPDATE ON ${schema}.timeline_events
+  FOR EACH ROW
+  EXECUTE FUNCTION ${schema}.set_updated_at();
+-- session_states: latest-snapshot-per-scope with supersede chain.
+-- is_latest + partial unique index enforces at-most-one latest per
+-- (tenant, agent, scope_key); writer supersedes prior latest atomically.
+CREATE TABLE IF NOT EXISTS ${schema}.session_states (
+  id                          BIGSERIAL    PRIMARY KEY,
+  tenant_id                   TEXT         NOT NULL DEFAULT 'default',
+  session_row_id              BIGINT       REFERENCES ${schema}.sessions(id) ON DELETE SET NULL,
+  source_session_id           TEXT,
+  agent_id                    TEXT         NOT NULL DEFAULT 'main',
+  scope_key                   TEXT         NOT NULL,
+  consumer_profile_id         TEXT         NOT NULL,
+  consumer_profile_version    INT          NOT NULL,
+  consumer_schema_hash        TEXT         NOT NULL,
+  idempotency_key             TEXT         UNIQUE,
+  goal                        TEXT,
+  active_work                 JSONB        NOT NULL DEFAULT '[]'::jsonb,
+  blockers                    JSONB        NOT NULL DEFAULT '[]'::jsonb,
+  affect                      JSONB        NOT NULL DEFAULT '{}'::jsonb,
+  payload                     JSONB        NOT NULL,
+  is_latest                   BOOLEAN      NOT NULL DEFAULT true,
+  supersedes_state_id         BIGINT       REFERENCES ${schema}.session_states(id) ON DELETE SET NULL,
+  created_at                  TIMESTAMPTZ  NOT NULL DEFAULT now()
+);
+CREATE UNIQUE INDEX IF NOT EXISTS idx_session_states_latest
+  ON ${schema}.session_states (tenant_id, agent_id, scope_key)
+  WHERE is_latest = true;
+CREATE INDEX IF NOT EXISTS idx_session_states_agent
+  ON ${schema}.session_states (tenant_id, agent_id, created_at DESC);
+-- session_handoffs: append-only handoff log. getLatest by (agent) or (agent, session).
+-- No latest-enforcement — every write is a row; retrieval sorts by created_at DESC.
+CREATE TABLE IF NOT EXISTS ${schema}.session_handoffs (
+  id                          BIGSERIAL    PRIMARY KEY,
+  tenant_id                   TEXT         NOT NULL DEFAULT 'default',
+  session_row_id              BIGINT       REFERENCES ${schema}.sessions(id) ON DELETE SET NULL,
+  source_session_id           TEXT         NOT NULL,
+  agent_id                    TEXT         NOT NULL DEFAULT 'main',
+  consumer_profile_id         TEXT         NOT NULL,
+  consumer_profile_version    INT          NOT NULL,
+  consumer_schema_hash        TEXT         NOT NULL,
+  idempotency_key             TEXT         UNIQUE,
+  status                      TEXT         NOT NULL,
+  last_step                   TEXT,
+  next_step                   TEXT,
+  blockers                    JSONB        NOT NULL DEFAULT '[]'::jsonb,
+  decided                     JSONB        NOT NULL DEFAULT '[]'::jsonb,
+  open_loops                  JSONB        NOT NULL DEFAULT '[]'::jsonb,
+  payload                     JSONB        NOT NULL,
+  created_at                  TIMESTAMPTZ  NOT NULL DEFAULT now()
+);
+CREATE INDEX IF NOT EXISTS idx_session_handoffs_agent
+  ON ${schema}.session_handoffs (tenant_id, agent_id, created_at DESC);
+CREATE INDEX IF NOT EXISTS idx_session_handoffs_session
+  ON ${schema}.session_handoffs (tenant_id, source_session_id, created_at DESC);
+-- decisions: append-only decision log. status vocabulary
+-- (proposed/committed/reversed) lives in a CHECK constraint so bad writes
+-- fail at DB boundary. reversed_by_decision_id forms a supersede chain.
+CREATE TABLE IF NOT EXISTS ${schema}.decisions (
+  id                          BIGSERIAL    PRIMARY KEY,
+  tenant_id                   TEXT         NOT NULL DEFAULT 'default',
+  session_row_id              BIGINT       REFERENCES ${schema}.sessions(id) ON DELETE SET NULL,
+  source_session_id           TEXT,
+  agent_id                    TEXT         NOT NULL DEFAULT 'main',
+  consumer_profile_id         TEXT         NOT NULL,
+  consumer_profile_version    INT          NOT NULL,
+  consumer_schema_hash        TEXT         NOT NULL,
+  idempotency_key             TEXT         UNIQUE,
+  payload                     JSONB        NOT NULL,
+  status                      TEXT         NOT NULL
+    CHECK (status IN ('proposed', 'committed', 'reversed')),
+  decision_text               TEXT         NOT NULL,
+  reason_text                 TEXT,
+  decided_at                  TIMESTAMPTZ  NOT NULL DEFAULT now(),
+  reversed_by_decision_id     BIGINT       REFERENCES ${schema}.decisions(id) ON DELETE SET NULL,
+  metadata                    JSONB        NOT NULL DEFAULT '{}'::jsonb,
+  search_tsv                  TSVECTOR,
+  search_text                 TEXT,
+  created_at                  TIMESTAMPTZ  NOT NULL DEFAULT now(),
+  updated_at                  TIMESTAMPTZ  NOT NULL DEFAULT now()
+);
+CREATE INDEX IF NOT EXISTS idx_decisions_status
+  ON ${schema}.decisions (tenant_id, agent_id, status, decided_at DESC);
+CREATE INDEX IF NOT EXISTS idx_decisions_session
+  ON ${schema}.decisions (tenant_id, source_session_id);
+CREATE INDEX IF NOT EXISTS idx_decisions_search_tsv
+  ON ${schema}.decisions USING GIN (search_tsv);
+CREATE OR REPLACE FUNCTION ${schema}.decisions_search_tsv_update()
+RETURNS trigger
+LANGUAGE plpgsql
+AS $$
+BEGIN
+  NEW.search_text :=
+    COALESCE(NEW.decision_text, '') || ' ' ||
+    COALESCE(NEW.reason_text, '') || ' ' ||
+    COALESCE(NEW.metadata::text, '');
+  IF EXISTS (SELECT 1 FROM pg_ts_config WHERE cfgname = 'zhcfg') THEN
+    NEW.search_tsv :=
+      setweight(to_tsvector('zhcfg', COALESCE(NEW.decision_text, '')), 'A') ||
+      setweight(to_tsvector('zhcfg', COALESCE(NEW.reason_text, '')), 'B');
+  ELSE
+    NEW.search_tsv :=
+      setweight(to_tsvector('simple', COALESCE(NEW.decision_text, '')), 'A') ||
+      setweight(to_tsvector('simple', COALESCE(NEW.reason_text, '')), 'B');
+  END IF;
+  RETURN NEW;
+END;
+$$;
+DROP TRIGGER IF EXISTS trg_decisions_search_tsv ON ${schema}.decisions;
+CREATE TRIGGER trg_decisions_search_tsv
+  BEFORE INSERT OR UPDATE OF decision_text, reason_text, metadata
+  ON ${schema}.decisions
+  FOR EACH ROW
+  EXECUTE FUNCTION ${schema}.decisions_search_tsv_update();
+DROP TRIGGER IF EXISTS trg_decisions_updated_at ON ${schema}.decisions;
+CREATE TRIGGER trg_decisions_updated_at
+  BEFORE UPDATE ON ${schema}.decisions
+  FOR EACH ROW
+  EXECUTE FUNCTION ${schema}.set_updated_at();
+-- artifacts: records producer-declared outputs (daily md, render, export).
+-- Aquifer doesn't interpret payload — producers own shape. status lifecycle
+-- pending → produced|failed|discarded.
+CREATE TABLE IF NOT EXISTS ${schema}.artifacts (
+  id                          BIGSERIAL    PRIMARY KEY,
+  tenant_id                   TEXT         NOT NULL DEFAULT 'default',
+  session_row_id              BIGINT       REFERENCES ${schema}.sessions(id) ON DELETE SET NULL,
+  source_session_id           TEXT,
+  agent_id                    TEXT         NOT NULL DEFAULT 'main',
+  consumer_profile_id         TEXT         NOT NULL,
+  consumer_profile_version    INT          NOT NULL,
+  consumer_schema_hash        TEXT         NOT NULL,
+  idempotency_key             TEXT         UNIQUE,
+  producer_id                 TEXT         NOT NULL,
+  artifact_type               TEXT         NOT NULL,
+  trigger_phase               TEXT,
+  format                      TEXT         NOT NULL,
+  destination                 TEXT         NOT NULL,
+  status                      TEXT         NOT NULL DEFAULT 'pending'
+    CHECK (status IN ('pending', 'produced', 'failed', 'discarded')),
+  content_ref                 TEXT,
+  payload                     JSONB        NOT NULL DEFAULT '{}'::jsonb,
+  metadata                    JSONB        NOT NULL DEFAULT '{}'::jsonb,
+  produced_at                 TIMESTAMPTZ,
+  created_at                  TIMESTAMPTZ  NOT NULL DEFAULT now(),
+  updated_at                  TIMESTAMPTZ  NOT NULL DEFAULT now()
+);
+CREATE INDEX IF NOT EXISTS idx_artifacts_lookup
+  ON ${schema}.artifacts (tenant_id, agent_id, producer_id, created_at DESC);
+CREATE INDEX IF NOT EXISTS idx_artifacts_session
+  ON ${schema}.artifacts (tenant_id, source_session_id, created_at DESC);
+CREATE INDEX IF NOT EXISTS idx_artifacts_status
+  ON ${schema}.artifacts (tenant_id, status, created_at DESC);
+DROP TRIGGER IF EXISTS trg_artifacts_updated_at ON ${schema}.artifacts;
+CREATE TRIGGER trg_artifacts_updated_at
+  BEFORE UPDATE ON ${schema}.artifacts
+  FOR EACH ROW
+  EXECUTE FUNCTION ${schema}.set_updated_at();

package/schema/005-entity-state-history.sql ADDED Viewed

@@ -0,0 +1,87 @@
+-- entity_state_history: temporal state-change tracking on entities.
+--
+-- Captures discrete attribute transitions (e.g. version.stable=1.2.1 -> 1.3.0,
+-- editor.preference=vim -> nvim). Designed as additive overlay on the entities
+-- table; DROP-clean — no triggers/functions/views, removing this table leaves
+-- the rest of Aquifer untouched.
+--
+-- See spec.md Q3 and ~/.claude/develop-runs/20260419-142432-aquifer-memory-routes/.
+CREATE TABLE IF NOT EXISTS ${schema}.entity_state_history (
+  id                  BIGSERIAL PRIMARY KEY,
+  tenant_id           TEXT          NOT NULL DEFAULT 'default',
+  agent_id            TEXT          NOT NULL DEFAULT 'main',
+  entity_id           BIGINT        NOT NULL
+    REFERENCES ${schema}.entities(id) ON DELETE CASCADE,
+  session_row_id      BIGINT
+    REFERENCES ${schema}.sessions(id) ON DELETE SET NULL,
+  evidence_session_id TEXT,
+  attribute           TEXT          NOT NULL CHECK (btrim(attribute) <> ''),
+  value               JSONB         NOT NULL,
+  valid_from          TIMESTAMPTZ   NOT NULL,
+  valid_to            TIMESTAMPTZ,
+  evidence_text       TEXT          NOT NULL DEFAULT '',
+  confidence          NUMERIC(4,3)  NOT NULL DEFAULT 0.7
+    CHECK (confidence >= 0 AND confidence <= 1),
+  source              TEXT          NOT NULL DEFAULT 'llm'
+    CHECK (source IN ('llm', 'manual', 'infra')),
+  idempotency_key     TEXT,
+  supersedes_state_id BIGINT
+    REFERENCES ${schema}.entity_state_history(id) ON DELETE SET NULL,
+  created_at          TIMESTAMPTZ   NOT NULL DEFAULT now(),
+  CHECK (valid_to IS NULL OR valid_to > valid_from)
+);
+-- Partial UNIQUE: only one "current" (valid_to IS NULL) row per
+-- (tenant, agent, entity, attribute). This is the temporal invariant —
+-- two open intervals on the same key would mean the table is corrupt.
+CREATE UNIQUE INDEX IF NOT EXISTS idx_entity_state_history_current
+  ON ${schema}.entity_state_history (tenant_id, agent_id, entity_id, attribute)
+  WHERE valid_to IS NULL;
+-- Idempotency: same caller-supplied key writes once. Partial allows NULL keys
+-- (manual writes don't always need them).
+CREATE UNIQUE INDEX IF NOT EXISTS idx_entity_state_history_idempotency
+  ON ${schema}.entity_state_history (idempotency_key)
+  WHERE idempotency_key IS NOT NULL;
+-- Hot path: history-by-attribute timeline scan, newest-first.
+CREATE INDEX IF NOT EXISTS idx_entity_state_history_entity_attr_time
+  ON ${schema}.entity_state_history
+     (tenant_id, agent_id, entity_id, attribute, valid_from DESC, id DESC);
+-- Hot path: full history for an entity (no attribute filter).
+CREATE INDEX IF NOT EXISTS idx_entity_state_history_entity_time
+  ON ${schema}.entity_state_history
+     (tenant_id, agent_id, entity_id, valid_from DESC, id DESC);
+-- Diagnostic: trace all state changes captured from a single session.
+CREATE INDEX IF NOT EXISTS idx_entity_state_history_evidence_session
+  ON ${schema}.entity_state_history
+     (tenant_id, agent_id, evidence_session_id, created_at DESC)
+  WHERE evidence_session_id IS NOT NULL;
+CREATE INDEX IF NOT EXISTS idx_entity_state_history_session_row
+  ON ${schema}.entity_state_history (session_row_id)
+  WHERE session_row_id IS NOT NULL;
+COMMENT ON TABLE ${schema}.entity_state_history IS
+  'Bi-temporal state changes on entities. Each row = one (entity, attribute) value valid over [valid_from, valid_to). NULL valid_to = current. supersedes_state_id chains supersession history.';
+COMMENT ON COLUMN ${schema}.entity_state_history.attribute IS
+  'Stable snake_case path identifying what changed (e.g. version.stable, editor.preference, runtime.node.version). Caller-defined; treat as opaque key.';
+COMMENT ON COLUMN ${schema}.entity_state_history.valid_from IS
+  'When the new value became true in the real world (not when it was observed). Use evidence anchor; fall back to session started_at if unspecified.';
+COMMENT ON COLUMN ${schema}.entity_state_history.valid_to IS
+  'NULL = currently valid. Otherwise, the timestamp at which a successor row took over. Closed intervals must satisfy valid_to > valid_from.';
+COMMENT ON COLUMN ${schema}.entity_state_history.idempotency_key IS
+  'Caller-supplied dedupe key. Default: sha256(tenant, agent, entity, attribute, canonical_json(value), valid_from, source). Replay safe.';
+COMMENT ON COLUMN ${schema}.entity_state_history.supersedes_state_id IS
+  'Chain pointer to the row this one closed (set valid_to on). NULL if this is the first known value for (entity, attribute).';
+COMMENT ON COLUMN ${schema}.entity_state_history.evidence_session_id IS
+  'Session that produced this evidence (text-level session_id, not session_row_id). For audit / re-extraction.';

package/schema/006-insights.sql ADDED Viewed

@@ -0,0 +1,138 @@
+-- insights: higher-order reflection from session content (Q4).
+--
+-- Holds preferences, recurring patterns, frustrations, and successful
+-- workflows distilled from session_summaries over a window. Vector-indexed
+-- for natural-language recall via aquifer.recallInsights().
+--
+-- DROP-clean: no triggers/functions, no FK from anywhere else into this table.
+-- See scripts/drop-insights.sql.
+CREATE TABLE IF NOT EXISTS ${schema}.insights (
+  id                  BIGSERIAL    PRIMARY KEY,
+  tenant_id           TEXT         NOT NULL DEFAULT 'default',
+  agent_id            TEXT         NOT NULL,
+  insight_type        TEXT         NOT NULL
+    CHECK (insight_type IN ('preference', 'pattern', 'frustration', 'workflow')),
+  title               TEXT         NOT NULL CHECK (btrim(title) <> ''),
+  body                TEXT         NOT NULL CHECK (btrim(body) <> ''),
+  source_session_ids  TEXT[]       NOT NULL DEFAULT '{}',
+  evidence_window     TSTZRANGE    NOT NULL,
+  -- embedding: sized vector so HNSW can be built at migrate time. 1024 matches
+  -- the autodetect default (ollama bge-m3). Operators using a provider with
+  -- different dimensions (e.g. openai text-embedding-3-small = 1536) should
+  -- set `aquifer.embedding_dim` via GUC before running migrate(), or the
+  -- coerce block below will pick it up.
+  embedding           vector(1024),
+  importance          REAL         NOT NULL DEFAULT 0.5
+    CHECK (importance >= 0 AND importance <= 1),
+  status              TEXT         NOT NULL DEFAULT 'active'
+    CHECK (status IN ('active', 'stale', 'superseded')),
+  superseded_by       BIGINT       REFERENCES ${schema}.insights(id) ON DELETE SET NULL,
+  idempotency_key     TEXT,
+  metadata            JSONB        NOT NULL DEFAULT '{}'::jsonb,
+  created_at          TIMESTAMPTZ  NOT NULL DEFAULT now(),
+  updated_at          TIMESTAMPTZ  NOT NULL DEFAULT now()
+);
+-- Phase 2 C1: canonical_key_v2 identifies the CLAIM (type + canonicalClaim +
+-- entitySet). idempotency_key keeps its revision-level role. Old rows have
+-- canonical_key_v2 = NULL and are not retrofitted; new writes populate it.
+ALTER TABLE ${schema}.insights
+  ADD COLUMN IF NOT EXISTS canonical_key_v2 TEXT;
+-- Hot path: recall by agent + type, importance-ranked. Partial idx keeps
+-- the index small by skipping stale/superseded rows.
+CREATE INDEX IF NOT EXISTS idx_insights_active
+  ON ${schema}.insights (tenant_id, agent_id, insight_type, importance DESC, created_at DESC)
+  WHERE status = 'active';
+-- Idempotency: caller-supplied key writes once. Partial allows NULL keys.
+CREATE UNIQUE INDEX IF NOT EXISTS idx_insights_idempotency
+  ON ${schema}.insights (idempotency_key)
+  WHERE idempotency_key IS NOT NULL;
+-- Phase 2 C1: preflight lookup for canonical_key_v2 active row.
+-- NOT unique — canonical identity can have multiple revisions (legacy as
+-- 'superseded'); only the latest stays 'active'. Partial keeps index small.
+CREATE INDEX IF NOT EXISTS idx_insights_canonical_v2_active
+  ON ${schema}.insights (tenant_id, agent_id, insight_type, canonical_key_v2, created_at DESC)
+  WHERE status = 'active' AND canonical_key_v2 IS NOT NULL;
+-- Coerce pre-1.5.1 unsized `vector` column to a sized type so HNSW can be
+-- built. Pre-1.5.1 declared `embedding vector` (no dim) which makes HNSW
+-- creation permanently impossible — the "defer until first row" pattern
+-- was a broken diagnosis of the real problem (pgvector needs a dim on the
+-- COLUMN, not just the data). Idempotent: skipped if already sized.
+-- Dim priority: existing row dim > `aquifer.embedding_dim` GUC > 1024 default.
+-- Note: ${schema} is substituted to a quoted identifier by the loader, so
+-- we string-concat rather than format(%I, ...) to avoid double-quoting.
+DO $$
+DECLARE
+  is_unsized BOOLEAN;
+  existing_dim INT;
+  target_dim INT;
+BEGIN
+  SELECT format_type(atttypid, atttypmod) = 'vector'
+    INTO is_unsized
+    FROM pg_attribute
+    WHERE attrelid = '${schema}.insights'::regclass
+      AND attname = 'embedding';
+  IF is_unsized THEN
+    EXECUTE 'SELECT vector_dims(embedding) FROM ${schema}.insights WHERE embedding IS NOT NULL LIMIT 1'
+      INTO existing_dim;
+    target_dim := COALESCE(
+      existing_dim,
+      NULLIF(current_setting('aquifer.embedding_dim', true), '')::int,
+      1024
+    );
+    EXECUTE 'ALTER TABLE ${schema}.insights ALTER COLUMN embedding TYPE vector('
+         || target_dim::text
+         || ') USING embedding::vector('
+         || target_dim::text
+         || ')';
+    RAISE NOTICE '[aquifer] insights.embedding coerced from unsized vector to vector(%)', target_dim;
+  END IF;
+END$$;
+-- Vector index: HNSW for cosine distance, only over active insights with
+-- embeddings. Column is now sized so this builds on fresh installs too.
+-- Defer / out-of-memory / unavailable handlers kept as safety nets.
+DO $$
+BEGIN
+  EXECUTE 'CREATE INDEX IF NOT EXISTS idx_insights_embedding
+    ON ${schema}.insights USING hnsw (embedding vector_cosine_ops)
+    WHERE status = ''active'' AND embedding IS NOT NULL';
+EXCEPTION
+  WHEN undefined_object THEN
+    RAISE NOTICE '[aquifer] pgvector hnsw operator not available; skipping HNSW index on insights';
+  WHEN feature_not_supported THEN
+    RAISE NOTICE '[aquifer] HNSW not available on this pgvector; upgrade to >= 0.5.0 for index-accelerated insights recall';
+  WHEN out_of_memory THEN
+    RAISE WARNING '[aquifer] HNSW build on insights.embedding ran out of memory; raise maintenance_work_mem and re-run migrate()';
+  WHEN program_limit_exceeded THEN
+    RAISE WARNING '[aquifer] HNSW build on insights.embedding exceeded an internal limit; inspect pgvector logs';
+END$$;
+-- Diagnostic: who-references-which-session, for audit / re-extraction.
+CREATE INDEX IF NOT EXISTS idx_insights_source_sessions
+  ON ${schema}.insights USING GIN (source_session_ids)
+  WHERE status = 'active';
+COMMENT ON TABLE ${schema}.insights IS
+  'Higher-order observations distilled from sessions. NOT facts (use entity_state_history). NOT raw recap (use session_summaries). Reflection / skill memory.';
+COMMENT ON COLUMN ${schema}.insights.insight_type IS
+  'preference = stable user preference; pattern = recurring behaviour/decision; frustration = repeated pain point; workflow = reusable procedure that worked.';
+COMMENT ON COLUMN ${schema}.insights.evidence_window IS
+  'Time range of source sessions used to derive this insight. Half-open by convention.';
+COMMENT ON COLUMN ${schema}.insights.importance IS
+  'Caller-supplied [0,1]; recall ranking blends with semantic score and recency.';
+COMMENT ON COLUMN ${schema}.insights.canonical_key_v2 IS
+  'Phase 2 C1: stable claim identity = sha256(tenant|agent|type|normalizeCanonicalClaim(claim)|normalizeEntitySet(entities)). Survives LLM title drift. idempotency_key tracks revisions within a claim.';
+COMMENT ON COLUMN ${schema}.insights.idempotency_key IS
+  'Revision-level dedupe key. Default in writer: sha256(canonical_key_v2, normalized_body, sorted_session_ids, window). Same claim in same window with same body = duplicate; body change or window extend = new revision (old superseded).';