@shadowforge0/aquifer-memory 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +354 -0
- package/consumers/cli.js +314 -0
- package/consumers/mcp.js +135 -0
- package/consumers/openclaw-plugin.js +235 -0
- package/consumers/shared/config.js +143 -0
- package/consumers/shared/factory.js +77 -0
- package/consumers/shared/llm.js +119 -0
- package/core/aquifer.js +634 -0
- package/core/entity.js +360 -0
- package/core/hybrid-rank.js +166 -0
- package/core/storage.js +550 -0
- package/index.js +6 -0
- package/package.json +57 -0
- package/pipeline/embed.js +230 -0
- package/pipeline/extract-entities.js +73 -0
- package/pipeline/summarize.js +245 -0
- package/schema/001-base.sql +180 -0
- package/schema/002-entities.sql +120 -0
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
-- Aquifer base schema
|
|
2
|
+
-- Usage: replace ${schema} with actual schema name (e.g., 'aquifer')
|
|
3
|
+
|
|
4
|
+
CREATE EXTENSION IF NOT EXISTS vector;
|
|
5
|
+
CREATE SCHEMA IF NOT EXISTS ${schema};
|
|
6
|
+
|
|
7
|
+
-- =========================================================================
|
|
8
|
+
-- Sessions: raw conversation data
|
|
9
|
+
-- =========================================================================
|
|
10
|
+
CREATE TABLE IF NOT EXISTS ${schema}.sessions (
|
|
11
|
+
id BIGSERIAL PRIMARY KEY,
|
|
12
|
+
tenant_id TEXT NOT NULL DEFAULT 'default',
|
|
13
|
+
session_id TEXT NOT NULL,
|
|
14
|
+
session_key TEXT,
|
|
15
|
+
agent_id TEXT NOT NULL DEFAULT 'main',
|
|
16
|
+
source TEXT NOT NULL DEFAULT 'api',
|
|
17
|
+
messages JSONB,
|
|
18
|
+
msg_count INT NOT NULL DEFAULT 0,
|
|
19
|
+
user_count INT NOT NULL DEFAULT 0,
|
|
20
|
+
assistant_count INT NOT NULL DEFAULT 0,
|
|
21
|
+
model TEXT,
|
|
22
|
+
tokens_in INT NOT NULL DEFAULT 0,
|
|
23
|
+
tokens_out INT NOT NULL DEFAULT 0,
|
|
24
|
+
started_at TIMESTAMPTZ,
|
|
25
|
+
ended_at TIMESTAMPTZ,
|
|
26
|
+
last_message_at TIMESTAMPTZ,
|
|
27
|
+
processing_status TEXT NOT NULL DEFAULT 'pending',
|
|
28
|
+
processed_at TIMESTAMPTZ,
|
|
29
|
+
processing_error TEXT,
|
|
30
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
31
|
+
UNIQUE (tenant_id, agent_id, session_id)
|
|
32
|
+
);
|
|
33
|
+
|
|
34
|
+
CREATE INDEX IF NOT EXISTS idx_sessions_tenant_agent
|
|
35
|
+
ON ${schema}.sessions (tenant_id, agent_id);
|
|
36
|
+
|
|
37
|
+
CREATE INDEX IF NOT EXISTS idx_sessions_started_at
|
|
38
|
+
ON ${schema}.sessions (started_at DESC);
|
|
39
|
+
|
|
40
|
+
CREATE INDEX IF NOT EXISTS idx_sessions_processing_status
|
|
41
|
+
ON ${schema}.sessions (processing_status)
|
|
42
|
+
WHERE processing_status IN ('pending', 'processing');
|
|
43
|
+
|
|
44
|
+
-- =========================================================================
|
|
45
|
+
-- Session segments: conversation boundary metadata
|
|
46
|
+
-- =========================================================================
|
|
47
|
+
CREATE TABLE IF NOT EXISTS ${schema}.session_segments (
|
|
48
|
+
id BIGSERIAL PRIMARY KEY,
|
|
49
|
+
session_row_id BIGINT NOT NULL REFERENCES ${schema}.sessions(id) ON DELETE CASCADE,
|
|
50
|
+
segment_no INT NOT NULL,
|
|
51
|
+
start_msg_idx INT,
|
|
52
|
+
end_msg_idx INT,
|
|
53
|
+
started_at TIMESTAMPTZ,
|
|
54
|
+
ended_at TIMESTAMPTZ,
|
|
55
|
+
raw_msg_count INT NOT NULL DEFAULT 0,
|
|
56
|
+
effective_msg_count INT NOT NULL DEFAULT 0,
|
|
57
|
+
boundary_type TEXT,
|
|
58
|
+
boundary_meta JSONB NOT NULL DEFAULT '{}',
|
|
59
|
+
UNIQUE (session_row_id, segment_no)
|
|
60
|
+
);
|
|
61
|
+
|
|
62
|
+
CREATE INDEX IF NOT EXISTS idx_session_segments_row
|
|
63
|
+
ON ${schema}.session_segments (session_row_id);
|
|
64
|
+
|
|
65
|
+
-- =========================================================================
|
|
66
|
+
-- Session summaries: LLM-generated or extractive summaries
|
|
67
|
+
-- =========================================================================
|
|
68
|
+
CREATE TABLE IF NOT EXISTS ${schema}.session_summaries (
|
|
69
|
+
session_row_id BIGINT PRIMARY KEY REFERENCES ${schema}.sessions(id) ON DELETE CASCADE,
|
|
70
|
+
tenant_id TEXT NOT NULL DEFAULT 'default',
|
|
71
|
+
agent_id TEXT,
|
|
72
|
+
session_id TEXT,
|
|
73
|
+
summary_version INT NOT NULL DEFAULT 1,
|
|
74
|
+
model TEXT,
|
|
75
|
+
source_hash TEXT,
|
|
76
|
+
message_count INT NOT NULL DEFAULT 0,
|
|
77
|
+
user_message_count INT NOT NULL DEFAULT 0,
|
|
78
|
+
assistant_message_count INT NOT NULL DEFAULT 0,
|
|
79
|
+
boundary_count INT NOT NULL DEFAULT 0,
|
|
80
|
+
fresh_tail_count INT NOT NULL DEFAULT 0,
|
|
81
|
+
started_at TIMESTAMPTZ,
|
|
82
|
+
ended_at TIMESTAMPTZ,
|
|
83
|
+
summary_text TEXT,
|
|
84
|
+
structured_summary JSONB NOT NULL DEFAULT '{}',
|
|
85
|
+
embedding vector,
|
|
86
|
+
search_tsv TSVECTOR,
|
|
87
|
+
access_count INT NOT NULL DEFAULT 0,
|
|
88
|
+
last_accessed_at TIMESTAMPTZ,
|
|
89
|
+
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
90
|
+
);
|
|
91
|
+
|
|
92
|
+
CREATE INDEX IF NOT EXISTS idx_summaries_tenant
|
|
93
|
+
ON ${schema}.session_summaries (tenant_id);
|
|
94
|
+
|
|
95
|
+
CREATE INDEX IF NOT EXISTS idx_summaries_search_tsv
|
|
96
|
+
ON ${schema}.session_summaries USING GIN (search_tsv);
|
|
97
|
+
|
|
98
|
+
CREATE INDEX IF NOT EXISTS idx_summaries_embedding
|
|
99
|
+
ON ${schema}.session_summaries (session_row_id)
|
|
100
|
+
WHERE embedding IS NOT NULL;
|
|
101
|
+
|
|
102
|
+
-- FTS trigger: auto-update search_tsv on INSERT/UPDATE
|
|
103
|
+
CREATE OR REPLACE FUNCTION ${schema}.session_summaries_search_tsv_update()
|
|
104
|
+
RETURNS trigger
|
|
105
|
+
LANGUAGE plpgsql
|
|
106
|
+
AS $$
|
|
107
|
+
DECLARE
|
|
108
|
+
ss jsonb;
|
|
109
|
+
title_text text;
|
|
110
|
+
overview_text text;
|
|
111
|
+
topics_text text;
|
|
112
|
+
decisions_text text;
|
|
113
|
+
open_loops_text text;
|
|
114
|
+
facts_text text;
|
|
115
|
+
BEGIN
|
|
116
|
+
ss := COALESCE(NEW.structured_summary, '{}'::jsonb);
|
|
117
|
+
|
|
118
|
+
title_text := COALESCE(ss->>'title', '');
|
|
119
|
+
overview_text := COALESCE(ss->>'overview', '');
|
|
120
|
+
|
|
121
|
+
SELECT COALESCE(string_agg(elem->>'name' || ' ' || COALESCE(elem->>'summary', ''), ' '), '')
|
|
122
|
+
INTO topics_text
|
|
123
|
+
FROM jsonb_array_elements(COALESCE(ss->'topics', '[]'::jsonb)) AS elem;
|
|
124
|
+
|
|
125
|
+
SELECT COALESCE(string_agg(elem->>'decision' || ' ' || COALESCE(elem->>'reason', ''), ' '), '')
|
|
126
|
+
INTO decisions_text
|
|
127
|
+
FROM jsonb_array_elements(COALESCE(ss->'decisions', '[]'::jsonb)) AS elem;
|
|
128
|
+
|
|
129
|
+
SELECT COALESCE(string_agg(elem->>'item', ' '), '')
|
|
130
|
+
INTO open_loops_text
|
|
131
|
+
FROM jsonb_array_elements(COALESCE(ss->'open_loops', '[]'::jsonb)) AS elem;
|
|
132
|
+
|
|
133
|
+
SELECT COALESCE(string_agg(elem#>>'{}', ' '), '')
|
|
134
|
+
INTO facts_text
|
|
135
|
+
FROM jsonb_array_elements(COALESCE(ss->'important_facts', '[]'::jsonb)) AS elem;
|
|
136
|
+
|
|
137
|
+
NEW.search_tsv :=
|
|
138
|
+
setweight(to_tsvector('simple', title_text), 'A') ||
|
|
139
|
+
setweight(to_tsvector('simple', overview_text || ' ' || topics_text || ' ' || decisions_text), 'B') ||
|
|
140
|
+
setweight(to_tsvector('simple', COALESCE(NEW.summary_text, '')), 'C') ||
|
|
141
|
+
setweight(to_tsvector('simple', open_loops_text || ' ' || facts_text), 'D');
|
|
142
|
+
|
|
143
|
+
RETURN NEW;
|
|
144
|
+
END;
|
|
145
|
+
$$;
|
|
146
|
+
|
|
147
|
+
DROP TRIGGER IF EXISTS trg_session_summaries_search_tsv
|
|
148
|
+
ON ${schema}.session_summaries;
|
|
149
|
+
|
|
150
|
+
CREATE TRIGGER trg_session_summaries_search_tsv
|
|
151
|
+
BEFORE INSERT OR UPDATE OF summary_text, structured_summary
|
|
152
|
+
ON ${schema}.session_summaries
|
|
153
|
+
FOR EACH ROW
|
|
154
|
+
EXECUTE FUNCTION ${schema}.session_summaries_search_tsv_update();
|
|
155
|
+
|
|
156
|
+
-- =========================================================================
|
|
157
|
+
-- Turn embeddings: per-user-turn vector embeddings
|
|
158
|
+
-- =========================================================================
|
|
159
|
+
CREATE TABLE IF NOT EXISTS ${schema}.turn_embeddings (
|
|
160
|
+
id BIGSERIAL PRIMARY KEY,
|
|
161
|
+
session_row_id BIGINT NOT NULL REFERENCES ${schema}.sessions(id) ON DELETE CASCADE,
|
|
162
|
+
tenant_id TEXT NOT NULL DEFAULT 'default',
|
|
163
|
+
session_id TEXT NOT NULL,
|
|
164
|
+
agent_id TEXT NOT NULL,
|
|
165
|
+
source TEXT,
|
|
166
|
+
turn_index INT NOT NULL,
|
|
167
|
+
message_index INT NOT NULL,
|
|
168
|
+
role TEXT NOT NULL DEFAULT 'user' CHECK (role = 'user'),
|
|
169
|
+
content_text TEXT NOT NULL,
|
|
170
|
+
content_hash TEXT NOT NULL,
|
|
171
|
+
embedding vector NOT NULL,
|
|
172
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
173
|
+
UNIQUE (session_row_id, message_index)
|
|
174
|
+
);
|
|
175
|
+
|
|
176
|
+
CREATE INDEX IF NOT EXISTS idx_turn_emb_session_row
|
|
177
|
+
ON ${schema}.turn_embeddings (session_row_id);
|
|
178
|
+
|
|
179
|
+
CREATE INDEX IF NOT EXISTS idx_turn_emb_tenant_agent
|
|
180
|
+
ON ${schema}.turn_embeddings (tenant_id, agent_id, source);
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
-- Aquifer entity / knowledge graph extension
|
|
2
|
+
-- Requires: 001-base.sql applied first
|
|
3
|
+
-- Usage: replace ${schema} with actual schema name
|
|
4
|
+
|
|
5
|
+
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
|
6
|
+
|
|
7
|
+
-- =========================================================================
|
|
8
|
+
-- Entities: unique named concepts
|
|
9
|
+
-- =========================================================================
|
|
10
|
+
CREATE TABLE IF NOT EXISTS ${schema}.entities (
|
|
11
|
+
id BIGSERIAL PRIMARY KEY,
|
|
12
|
+
tenant_id TEXT NOT NULL DEFAULT 'default',
|
|
13
|
+
name TEXT NOT NULL,
|
|
14
|
+
normalized_name TEXT NOT NULL,
|
|
15
|
+
aliases TEXT[] NOT NULL DEFAULT '{}',
|
|
16
|
+
type TEXT NOT NULL DEFAULT 'other'
|
|
17
|
+
CHECK (type IN ('person','project','concept','tool','metric','org',
|
|
18
|
+
'place','event','doc','task','topic','other')),
|
|
19
|
+
status TEXT NOT NULL DEFAULT 'active'
|
|
20
|
+
CHECK (status IN ('active','merged','deleted')),
|
|
21
|
+
frequency INT NOT NULL DEFAULT 1,
|
|
22
|
+
agent_id TEXT NOT NULL DEFAULT 'main',
|
|
23
|
+
created_by TEXT,
|
|
24
|
+
metadata JSONB NOT NULL DEFAULT '{}',
|
|
25
|
+
embedding vector,
|
|
26
|
+
first_seen_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
27
|
+
last_seen_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
28
|
+
UNIQUE (tenant_id, normalized_name, agent_id)
|
|
29
|
+
);
|
|
30
|
+
|
|
31
|
+
CREATE INDEX IF NOT EXISTS idx_entities_tenant_agent
|
|
32
|
+
ON ${schema}.entities (tenant_id, agent_id);
|
|
33
|
+
|
|
34
|
+
CREATE INDEX IF NOT EXISTS idx_entities_type
|
|
35
|
+
ON ${schema}.entities (type);
|
|
36
|
+
|
|
37
|
+
CREATE INDEX IF NOT EXISTS idx_entities_last_seen
|
|
38
|
+
ON ${schema}.entities (last_seen_at DESC);
|
|
39
|
+
|
|
40
|
+
CREATE INDEX IF NOT EXISTS idx_entities_name_trgm
|
|
41
|
+
ON ${schema}.entities USING GIN (normalized_name gin_trgm_ops);
|
|
42
|
+
|
|
43
|
+
CREATE INDEX IF NOT EXISTS idx_entities_aliases
|
|
44
|
+
ON ${schema}.entities USING GIN (aliases);
|
|
45
|
+
|
|
46
|
+
CREATE INDEX IF NOT EXISTS idx_entities_active
|
|
47
|
+
ON ${schema}.entities (tenant_id, agent_id, frequency DESC)
|
|
48
|
+
WHERE status = 'active';
|
|
49
|
+
|
|
50
|
+
-- =========================================================================
|
|
51
|
+
-- Entity mentions: links entity to session (deduped per session)
|
|
52
|
+
-- =========================================================================
|
|
53
|
+
CREATE TABLE IF NOT EXISTS ${schema}.entity_mentions (
|
|
54
|
+
id BIGSERIAL PRIMARY KEY,
|
|
55
|
+
entity_id BIGINT NOT NULL REFERENCES ${schema}.entities(id) ON DELETE CASCADE,
|
|
56
|
+
session_row_id BIGINT NOT NULL REFERENCES ${schema}.sessions(id) ON DELETE CASCADE,
|
|
57
|
+
turn_embedding_id BIGINT REFERENCES ${schema}.turn_embeddings(id) ON DELETE SET NULL,
|
|
58
|
+
source TEXT,
|
|
59
|
+
mention_text TEXT,
|
|
60
|
+
confidence FLOAT NOT NULL DEFAULT 1.0,
|
|
61
|
+
occurred_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
62
|
+
);
|
|
63
|
+
|
|
64
|
+
CREATE INDEX IF NOT EXISTS idx_entity_mentions_entity_id
|
|
65
|
+
ON ${schema}.entity_mentions (entity_id);
|
|
66
|
+
|
|
67
|
+
CREATE INDEX IF NOT EXISTS idx_entity_mentions_session_row_id
|
|
68
|
+
ON ${schema}.entity_mentions (session_row_id);
|
|
69
|
+
|
|
70
|
+
CREATE INDEX IF NOT EXISTS idx_entity_mentions_turn_embedding_id
|
|
71
|
+
ON ${schema}.entity_mentions (turn_embedding_id)
|
|
72
|
+
WHERE turn_embedding_id IS NOT NULL;
|
|
73
|
+
|
|
74
|
+
CREATE UNIQUE INDEX IF NOT EXISTS idx_entity_mentions_dedup
|
|
75
|
+
ON ${schema}.entity_mentions (entity_id, session_row_id);
|
|
76
|
+
|
|
77
|
+
-- =========================================================================
|
|
78
|
+
-- Entity relations: undirected co-occurrence (src < dst enforced)
|
|
79
|
+
-- =========================================================================
|
|
80
|
+
CREATE TABLE IF NOT EXISTS ${schema}.entity_relations (
|
|
81
|
+
id BIGSERIAL PRIMARY KEY,
|
|
82
|
+
src_entity_id BIGINT NOT NULL REFERENCES ${schema}.entities(id) ON DELETE CASCADE,
|
|
83
|
+
dst_entity_id BIGINT NOT NULL REFERENCES ${schema}.entities(id) ON DELETE CASCADE,
|
|
84
|
+
co_occurrence_count INT NOT NULL DEFAULT 1,
|
|
85
|
+
first_seen_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
86
|
+
last_seen_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
87
|
+
CHECK (src_entity_id < dst_entity_id),
|
|
88
|
+
UNIQUE (src_entity_id, dst_entity_id)
|
|
89
|
+
);
|
|
90
|
+
|
|
91
|
+
CREATE INDEX IF NOT EXISTS idx_entity_relations_src
|
|
92
|
+
ON ${schema}.entity_relations (src_entity_id);
|
|
93
|
+
|
|
94
|
+
CREATE INDEX IF NOT EXISTS idx_entity_relations_dst
|
|
95
|
+
ON ${schema}.entity_relations (dst_entity_id);
|
|
96
|
+
|
|
97
|
+
CREATE INDEX IF NOT EXISTS idx_entity_relations_cooccurrence
|
|
98
|
+
ON ${schema}.entity_relations (co_occurrence_count DESC);
|
|
99
|
+
|
|
100
|
+
-- =========================================================================
|
|
101
|
+
-- Entity sessions: which entities appeared in which sessions (for boost scoring)
|
|
102
|
+
-- =========================================================================
|
|
103
|
+
CREATE TABLE IF NOT EXISTS ${schema}.entity_sessions (
|
|
104
|
+
id BIGSERIAL PRIMARY KEY,
|
|
105
|
+
entity_id BIGINT NOT NULL REFERENCES ${schema}.entities(id) ON DELETE CASCADE,
|
|
106
|
+
session_row_id BIGINT NOT NULL REFERENCES ${schema}.sessions(id) ON DELETE CASCADE,
|
|
107
|
+
mention_count INT NOT NULL DEFAULT 1,
|
|
108
|
+
occurred_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
109
|
+
UNIQUE (entity_id, session_row_id)
|
|
110
|
+
);
|
|
111
|
+
|
|
112
|
+
CREATE INDEX IF NOT EXISTS idx_entity_sessions_entity_id
|
|
113
|
+
ON ${schema}.entity_sessions (entity_id);
|
|
114
|
+
|
|
115
|
+
CREATE INDEX IF NOT EXISTS idx_entity_sessions_session_row_id
|
|
116
|
+
ON ${schema}.entity_sessions (session_row_id);
|
|
117
|
+
|
|
118
|
+
CREATE INDEX IF NOT EXISTS idx_entity_sessions_frequent
|
|
119
|
+
ON ${schema}.entity_sessions (session_row_id, entity_id)
|
|
120
|
+
WHERE mention_count >= 2;
|