npm - @shadowforge0/aquifer-memory - Versions diffs - 0.2.0 - Mend

@shadowforge0/aquifer-memory 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/README.md +354 -0
package/consumers/cli.js +314 -0
package/consumers/mcp.js +135 -0
package/consumers/openclaw-plugin.js +235 -0
package/consumers/shared/config.js +143 -0
package/consumers/shared/factory.js +77 -0
package/consumers/shared/llm.js +119 -0
package/core/aquifer.js +634 -0
package/core/entity.js +360 -0
package/core/hybrid-rank.js +166 -0
package/core/storage.js +550 -0
package/index.js +6 -0
package/package.json +57 -0
package/pipeline/embed.js +230 -0
package/pipeline/extract-entities.js +73 -0
package/pipeline/summarize.js +245 -0
package/schema/001-base.sql +180 -0
package/schema/002-entities.sql +120 -0

package/schema/001-base.sql ADDED Viewed

@@ -0,0 +1,180 @@
+-- Aquifer base schema
+-- Usage: replace ${schema} with actual schema name (e.g., 'aquifer')
+CREATE EXTENSION IF NOT EXISTS vector;
+CREATE SCHEMA IF NOT EXISTS ${schema};
+-- =========================================================================
+-- Sessions: raw conversation data
+-- =========================================================================
+CREATE TABLE IF NOT EXISTS ${schema}.sessions (
+  id                 BIGSERIAL    PRIMARY KEY,
+  tenant_id          TEXT         NOT NULL DEFAULT 'default',
+  session_id         TEXT         NOT NULL,
+  session_key        TEXT,
+  agent_id           TEXT         NOT NULL DEFAULT 'main',
+  source             TEXT         NOT NULL DEFAULT 'api',
+  messages           JSONB,
+  msg_count          INT          NOT NULL DEFAULT 0,
+  user_count         INT          NOT NULL DEFAULT 0,
+  assistant_count    INT          NOT NULL DEFAULT 0,
+  model              TEXT,
+  tokens_in          INT          NOT NULL DEFAULT 0,
+  tokens_out         INT          NOT NULL DEFAULT 0,
+  started_at         TIMESTAMPTZ,
+  ended_at           TIMESTAMPTZ,
+  last_message_at    TIMESTAMPTZ,
+  processing_status  TEXT         NOT NULL DEFAULT 'pending',
+  processed_at       TIMESTAMPTZ,
+  processing_error   TEXT,
+  created_at         TIMESTAMPTZ  NOT NULL DEFAULT now(),
+  UNIQUE (tenant_id, agent_id, session_id)
+);
+CREATE INDEX IF NOT EXISTS idx_sessions_tenant_agent
+  ON ${schema}.sessions (tenant_id, agent_id);
+CREATE INDEX IF NOT EXISTS idx_sessions_started_at
+  ON ${schema}.sessions (started_at DESC);
+CREATE INDEX IF NOT EXISTS idx_sessions_processing_status
+  ON ${schema}.sessions (processing_status)
+  WHERE processing_status IN ('pending', 'processing');
+-- =========================================================================
+-- Session segments: conversation boundary metadata
+-- =========================================================================
+CREATE TABLE IF NOT EXISTS ${schema}.session_segments (
+  id                  BIGSERIAL    PRIMARY KEY,
+  session_row_id      BIGINT       NOT NULL REFERENCES ${schema}.sessions(id) ON DELETE CASCADE,
+  segment_no          INT          NOT NULL,
+  start_msg_idx       INT,
+  end_msg_idx         INT,
+  started_at          TIMESTAMPTZ,
+  ended_at            TIMESTAMPTZ,
+  raw_msg_count       INT          NOT NULL DEFAULT 0,
+  effective_msg_count INT          NOT NULL DEFAULT 0,
+  boundary_type       TEXT,
+  boundary_meta       JSONB        NOT NULL DEFAULT '{}',
+  UNIQUE (session_row_id, segment_no)
+);
+CREATE INDEX IF NOT EXISTS idx_session_segments_row
+  ON ${schema}.session_segments (session_row_id);
+-- =========================================================================
+-- Session summaries: LLM-generated or extractive summaries
+-- =========================================================================
+CREATE TABLE IF NOT EXISTS ${schema}.session_summaries (
+  session_row_id           BIGINT       PRIMARY KEY REFERENCES ${schema}.sessions(id) ON DELETE CASCADE,
+  tenant_id                TEXT         NOT NULL DEFAULT 'default',
+  agent_id                 TEXT,
+  session_id               TEXT,
+  summary_version          INT          NOT NULL DEFAULT 1,
+  model                    TEXT,
+  source_hash              TEXT,
+  message_count            INT          NOT NULL DEFAULT 0,
+  user_message_count       INT          NOT NULL DEFAULT 0,
+  assistant_message_count  INT          NOT NULL DEFAULT 0,
+  boundary_count           INT          NOT NULL DEFAULT 0,
+  fresh_tail_count         INT          NOT NULL DEFAULT 0,
+  started_at               TIMESTAMPTZ,
+  ended_at                 TIMESTAMPTZ,
+  summary_text             TEXT,
+  structured_summary       JSONB        NOT NULL DEFAULT '{}',
+  embedding                vector,
+  search_tsv               TSVECTOR,
+  access_count             INT          NOT NULL DEFAULT 0,
+  last_accessed_at         TIMESTAMPTZ,
+  updated_at               TIMESTAMPTZ  NOT NULL DEFAULT now()
+);
+CREATE INDEX IF NOT EXISTS idx_summaries_tenant
+  ON ${schema}.session_summaries (tenant_id);
+CREATE INDEX IF NOT EXISTS idx_summaries_search_tsv
+  ON ${schema}.session_summaries USING GIN (search_tsv);
+CREATE INDEX IF NOT EXISTS idx_summaries_embedding
+  ON ${schema}.session_summaries (session_row_id)
+  WHERE embedding IS NOT NULL;
+-- FTS trigger: auto-update search_tsv on INSERT/UPDATE
+CREATE OR REPLACE FUNCTION ${schema}.session_summaries_search_tsv_update()
+RETURNS trigger
+LANGUAGE plpgsql
+AS $$
+DECLARE
+  ss jsonb;
+  title_text text;
+  overview_text text;
+  topics_text text;
+  decisions_text text;
+  open_loops_text text;
+  facts_text text;
+BEGIN
+  ss := COALESCE(NEW.structured_summary, '{}'::jsonb);
+  title_text := COALESCE(ss->>'title', '');
+  overview_text := COALESCE(ss->>'overview', '');
+  SELECT COALESCE(string_agg(elem->>'name' || ' ' || COALESCE(elem->>'summary', ''), ' '), '')
+  INTO topics_text
+  FROM jsonb_array_elements(COALESCE(ss->'topics', '[]'::jsonb)) AS elem;
+  SELECT COALESCE(string_agg(elem->>'decision' || ' ' || COALESCE(elem->>'reason', ''), ' '), '')
+  INTO decisions_text
+  FROM jsonb_array_elements(COALESCE(ss->'decisions', '[]'::jsonb)) AS elem;
+  SELECT COALESCE(string_agg(elem->>'item', ' '), '')
+  INTO open_loops_text
+  FROM jsonb_array_elements(COALESCE(ss->'open_loops', '[]'::jsonb)) AS elem;
+  SELECT COALESCE(string_agg(elem#>>'{}', ' '), '')
+  INTO facts_text
+  FROM jsonb_array_elements(COALESCE(ss->'important_facts', '[]'::jsonb)) AS elem;
+  NEW.search_tsv :=
+    setweight(to_tsvector('simple', title_text), 'A') ||
+    setweight(to_tsvector('simple', overview_text || ' ' || topics_text || ' ' || decisions_text), 'B') ||
+    setweight(to_tsvector('simple', COALESCE(NEW.summary_text, '')), 'C') ||
+    setweight(to_tsvector('simple', open_loops_text || ' ' || facts_text), 'D');
+  RETURN NEW;
+END;
+$$;
+DROP TRIGGER IF EXISTS trg_session_summaries_search_tsv
+  ON ${schema}.session_summaries;
+CREATE TRIGGER trg_session_summaries_search_tsv
+  BEFORE INSERT OR UPDATE OF summary_text, structured_summary
+  ON ${schema}.session_summaries
+  FOR EACH ROW
+  EXECUTE FUNCTION ${schema}.session_summaries_search_tsv_update();
+-- =========================================================================
+-- Turn embeddings: per-user-turn vector embeddings
+-- =========================================================================
+CREATE TABLE IF NOT EXISTS ${schema}.turn_embeddings (
+  id               BIGSERIAL    PRIMARY KEY,
+  session_row_id   BIGINT       NOT NULL REFERENCES ${schema}.sessions(id) ON DELETE CASCADE,
+  tenant_id        TEXT         NOT NULL DEFAULT 'default',
+  session_id       TEXT         NOT NULL,
+  agent_id         TEXT         NOT NULL,
+  source           TEXT,
+  turn_index       INT          NOT NULL,
+  message_index    INT          NOT NULL,
+  role             TEXT         NOT NULL DEFAULT 'user' CHECK (role = 'user'),
+  content_text     TEXT         NOT NULL,
+  content_hash     TEXT         NOT NULL,
+  embedding        vector       NOT NULL,
+  created_at       TIMESTAMPTZ  NOT NULL DEFAULT now(),
+  UNIQUE (session_row_id, message_index)
+);
+CREATE INDEX IF NOT EXISTS idx_turn_emb_session_row
+  ON ${schema}.turn_embeddings (session_row_id);
+CREATE INDEX IF NOT EXISTS idx_turn_emb_tenant_agent
+  ON ${schema}.turn_embeddings (tenant_id, agent_id, source);

package/schema/002-entities.sql ADDED Viewed

@@ -0,0 +1,120 @@
+-- Aquifer entity / knowledge graph extension
+-- Requires: 001-base.sql applied first
+-- Usage: replace ${schema} with actual schema name
+CREATE EXTENSION IF NOT EXISTS pg_trgm;
+-- =========================================================================
+-- Entities: unique named concepts
+-- =========================================================================
+CREATE TABLE IF NOT EXISTS ${schema}.entities (
+  id              BIGSERIAL    PRIMARY KEY,
+  tenant_id       TEXT         NOT NULL DEFAULT 'default',
+  name            TEXT         NOT NULL,
+  normalized_name TEXT         NOT NULL,
+  aliases         TEXT[]       NOT NULL DEFAULT '{}',
+  type            TEXT         NOT NULL DEFAULT 'other'
+                    CHECK (type IN ('person','project','concept','tool','metric','org',
+                                    'place','event','doc','task','topic','other')),
+  status          TEXT         NOT NULL DEFAULT 'active'
+                    CHECK (status IN ('active','merged','deleted')),
+  frequency       INT          NOT NULL DEFAULT 1,
+  agent_id        TEXT         NOT NULL DEFAULT 'main',
+  created_by      TEXT,
+  metadata        JSONB        NOT NULL DEFAULT '{}',
+  embedding       vector,
+  first_seen_at   TIMESTAMPTZ  NOT NULL DEFAULT now(),
+  last_seen_at    TIMESTAMPTZ  NOT NULL DEFAULT now(),
+  UNIQUE (tenant_id, normalized_name, agent_id)
+);
+CREATE INDEX IF NOT EXISTS idx_entities_tenant_agent
+  ON ${schema}.entities (tenant_id, agent_id);
+CREATE INDEX IF NOT EXISTS idx_entities_type
+  ON ${schema}.entities (type);
+CREATE INDEX IF NOT EXISTS idx_entities_last_seen
+  ON ${schema}.entities (last_seen_at DESC);
+CREATE INDEX IF NOT EXISTS idx_entities_name_trgm
+  ON ${schema}.entities USING GIN (normalized_name gin_trgm_ops);
+CREATE INDEX IF NOT EXISTS idx_entities_aliases
+  ON ${schema}.entities USING GIN (aliases);
+CREATE INDEX IF NOT EXISTS idx_entities_active
+  ON ${schema}.entities (tenant_id, agent_id, frequency DESC)
+  WHERE status = 'active';
+-- =========================================================================
+-- Entity mentions: links entity to session (deduped per session)
+-- =========================================================================
+CREATE TABLE IF NOT EXISTS ${schema}.entity_mentions (
+  id                BIGSERIAL    PRIMARY KEY,
+  entity_id         BIGINT       NOT NULL REFERENCES ${schema}.entities(id) ON DELETE CASCADE,
+  session_row_id    BIGINT       NOT NULL REFERENCES ${schema}.sessions(id) ON DELETE CASCADE,
+  turn_embedding_id BIGINT       REFERENCES ${schema}.turn_embeddings(id) ON DELETE SET NULL,
+  source            TEXT,
+  mention_text      TEXT,
+  confidence        FLOAT        NOT NULL DEFAULT 1.0,
+  occurred_at       TIMESTAMPTZ  NOT NULL DEFAULT now()
+);
+CREATE INDEX IF NOT EXISTS idx_entity_mentions_entity_id
+  ON ${schema}.entity_mentions (entity_id);
+CREATE INDEX IF NOT EXISTS idx_entity_mentions_session_row_id
+  ON ${schema}.entity_mentions (session_row_id);
+CREATE INDEX IF NOT EXISTS idx_entity_mentions_turn_embedding_id
+  ON ${schema}.entity_mentions (turn_embedding_id)
+  WHERE turn_embedding_id IS NOT NULL;
+CREATE UNIQUE INDEX IF NOT EXISTS idx_entity_mentions_dedup
+  ON ${schema}.entity_mentions (entity_id, session_row_id);
+-- =========================================================================
+-- Entity relations: undirected co-occurrence (src < dst enforced)
+-- =========================================================================
+CREATE TABLE IF NOT EXISTS ${schema}.entity_relations (
+  id                   BIGSERIAL    PRIMARY KEY,
+  src_entity_id        BIGINT       NOT NULL REFERENCES ${schema}.entities(id) ON DELETE CASCADE,
+  dst_entity_id        BIGINT       NOT NULL REFERENCES ${schema}.entities(id) ON DELETE CASCADE,
+  co_occurrence_count  INT          NOT NULL DEFAULT 1,
+  first_seen_at        TIMESTAMPTZ  NOT NULL DEFAULT now(),
+  last_seen_at         TIMESTAMPTZ  NOT NULL DEFAULT now(),
+  CHECK (src_entity_id < dst_entity_id),
+  UNIQUE (src_entity_id, dst_entity_id)
+);
+CREATE INDEX IF NOT EXISTS idx_entity_relations_src
+  ON ${schema}.entity_relations (src_entity_id);
+CREATE INDEX IF NOT EXISTS idx_entity_relations_dst
+  ON ${schema}.entity_relations (dst_entity_id);
+CREATE INDEX IF NOT EXISTS idx_entity_relations_cooccurrence
+  ON ${schema}.entity_relations (co_occurrence_count DESC);
+-- =========================================================================
+-- Entity sessions: which entities appeared in which sessions (for boost scoring)
+-- =========================================================================
+CREATE TABLE IF NOT EXISTS ${schema}.entity_sessions (
+  id             BIGSERIAL    PRIMARY KEY,
+  entity_id      BIGINT       NOT NULL REFERENCES ${schema}.entities(id) ON DELETE CASCADE,
+  session_row_id BIGINT       NOT NULL REFERENCES ${schema}.sessions(id) ON DELETE CASCADE,
+  mention_count  INT          NOT NULL DEFAULT 1,
+  occurred_at    TIMESTAMPTZ  NOT NULL DEFAULT now(),
+  UNIQUE (entity_id, session_row_id)
+);
+CREATE INDEX IF NOT EXISTS idx_entity_sessions_entity_id
+  ON ${schema}.entity_sessions (entity_id);
+CREATE INDEX IF NOT EXISTS idx_entity_sessions_session_row_id
+  ON ${schema}.entity_sessions (session_row_id);
+CREATE INDEX IF NOT EXISTS idx_entity_sessions_frequent
+  ON ${schema}.entity_sessions (session_row_id, entity_id)
+  WHERE mention_count >= 2;