@arephan/clawdbot-memory-supabase 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +145 -0
- package/migrations/001_initial.sql +153 -0
- package/migrations/002_vector_search.sql +219 -0
- package/migrations/003_entity_relationships.sql +143 -0
- package/migrations/004_cron_runs.sql +33 -0
- package/migrations/005_search_memories_alias.sql +47 -0
- package/package.json +34 -0
- package/plugin/clawdbot.plugin.json +42 -0
- package/plugin/index.ts +650 -0
- package/plugin/package-lock.json +597 -0
- package/plugin/package.json +10 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Arephan
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# clawdbot-memory-supabase
|
|
2
|
+
|
|
3
|
+
A Clawdbot plugin that logs **every conversation** to Supabase/PostgreSQL. Never forget anything.
|
|
4
|
+
|
|
5
|
+
## What it does
|
|
6
|
+
|
|
7
|
+
- 📝 Logs every message (user + assistant) with timestamps
|
|
8
|
+
- 🔍 Query by date: "What did we talk about on January 15th?"
|
|
9
|
+
- 📅 Query by range: "What did we discuss last week?"
|
|
10
|
+
- 🔎 Search by keyword across all history
|
|
11
|
+
- 🐘 Elephant memory — complete conversation recall
|
|
12
|
+
|
|
13
|
+
## Quick Start
|
|
14
|
+
|
|
15
|
+
### 1. Set up Supabase
|
|
16
|
+
|
|
17
|
+
**Local (recommended for development):**
|
|
18
|
+
```bash
|
|
19
|
+
# Install Supabase CLI
|
|
20
|
+
brew install supabase/tap/supabase
|
|
21
|
+
|
|
22
|
+
# In your project directory
|
|
23
|
+
supabase init
|
|
24
|
+
supabase start
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
**Or use Supabase Cloud:** Create a project at [supabase.com](https://supabase.com)
|
|
28
|
+
|
|
29
|
+
### 2. Run migrations
|
|
30
|
+
|
|
31
|
+
Copy the `migrations/` folder to your Supabase project:
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
cp -r migrations/ your-project/supabase/migrations/
|
|
35
|
+
cd your-project
|
|
36
|
+
supabase db reset # Local
|
|
37
|
+
# or
|
|
38
|
+
supabase db push # Cloud
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### 3. Install the plugin
|
|
42
|
+
|
|
43
|
+
**Option A: Copy to extensions folder**
|
|
44
|
+
```bash
|
|
45
|
+
cp -r plugin/ ~/.clawdbot/extensions/memory-supabase/
|
|
46
|
+
# or for workspace-specific:
|
|
47
|
+
cp -r plugin/ your-workspace/.clawdbot/extensions/memory-supabase/
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
**Option B: Install from npm** (coming soon)
|
|
51
|
+
```bash
|
|
52
|
+
clawdbot plugins install @arephan/clawdbot-memory-supabase
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### 4. Configure Clawdbot
|
|
56
|
+
|
|
57
|
+
Add to your `clawdbot.json`:
|
|
58
|
+
|
|
59
|
+
```json
|
|
60
|
+
{
|
|
61
|
+
"plugins": {
|
|
62
|
+
"slots": {
|
|
63
|
+
"memory": "memory-supabase"
|
|
64
|
+
},
|
|
65
|
+
"entries": {
|
|
66
|
+
"memory-supabase": {
|
|
67
|
+
"enabled": true,
|
|
68
|
+
"config": {
|
|
69
|
+
"supabase": {
|
|
70
|
+
"url": "http://127.0.0.1:54321",
|
|
71
|
+
"anonKey": "your-supabase-key"
|
|
72
|
+
},
|
|
73
|
+
"agentId": "my-agent"
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### 5. Restart Clawdbot
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
clawdbot gateway restart
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Tools Available
|
|
88
|
+
|
|
89
|
+
Once installed, your agent gets these tools:
|
|
90
|
+
|
|
91
|
+
| Tool | Description |
|
|
92
|
+
|------|-------------|
|
|
93
|
+
| `recall_conversation` | Get messages from a specific date |
|
|
94
|
+
| `recall_date_range` | Get messages across a date range |
|
|
95
|
+
| `search_history` | Search all messages by keyword |
|
|
96
|
+
| `memory_stats` | Show logging statistics |
|
|
97
|
+
|
|
98
|
+
## CLI Commands
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
clawdbot supamem stats # Show stats
|
|
102
|
+
clawdbot supamem recall 2026-01-15 # Get messages from date
|
|
103
|
+
clawdbot supamem search "iPad" # Search by keyword
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Database Schema
|
|
107
|
+
|
|
108
|
+
The plugin uses these tables:
|
|
109
|
+
|
|
110
|
+
- `sessions` — Conversation sessions with metadata
|
|
111
|
+
- `messages` — Every message with role, content, timestamp
|
|
112
|
+
- `memories` — (Optional) Long-term memories with embeddings
|
|
113
|
+
- `entities` — (Optional) Knowledge graph entries
|
|
114
|
+
|
|
115
|
+
## Configuration Options
|
|
116
|
+
|
|
117
|
+
| Option | Type | Default | Description |
|
|
118
|
+
|--------|------|---------|-------------|
|
|
119
|
+
| `supabase.url` | string | required | Supabase project URL |
|
|
120
|
+
| `supabase.anonKey` | string | required | Supabase anon/service key |
|
|
121
|
+
| `agentId` | string | "default" | Agent identifier for multi-agent setups |
|
|
122
|
+
|
|
123
|
+
## How it works
|
|
124
|
+
|
|
125
|
+
1. **Session start**: Creates a new session when conversation begins
|
|
126
|
+
2. **Message logging**: Captures every user and assistant message
|
|
127
|
+
3. **Session end**: Closes session when conversation ends
|
|
128
|
+
4. **Recall**: SQL queries for date/keyword-based retrieval
|
|
129
|
+
|
|
130
|
+
No embeddings or vector search required — just simple, reliable PostgreSQL queries.
|
|
131
|
+
|
|
132
|
+
## Future Plans
|
|
133
|
+
|
|
134
|
+
- [ ] Vector search with pgvector (semantic similarity)
|
|
135
|
+
- [ ] Auto-summarization of long conversations
|
|
136
|
+
- [ ] Entity extraction (people, places, things)
|
|
137
|
+
- [ ] npm package for easy installation
|
|
138
|
+
|
|
139
|
+
## License
|
|
140
|
+
|
|
141
|
+
MIT
|
|
142
|
+
|
|
143
|
+
## Author
|
|
144
|
+
|
|
145
|
+
Built by an AI agent for [Clawdbot](https://github.com/clawdbot/clawdbot).
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
-- OpenClaw Memory - Initial Schema
|
|
2
|
+
-- Run this in your Supabase SQL editor
|
|
3
|
+
|
|
4
|
+
-- Enable vector extension for semantic search
|
|
5
|
+
CREATE EXTENSION IF NOT EXISTS vector;
|
|
6
|
+
|
|
7
|
+
-- Sessions: Every conversation gets a session
|
|
8
|
+
CREATE TABLE IF NOT EXISTS sessions (
|
|
9
|
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
10
|
+
agent_id TEXT NOT NULL,
|
|
11
|
+
user_id TEXT,
|
|
12
|
+
channel TEXT,
|
|
13
|
+
started_at TIMESTAMPTZ DEFAULT NOW(),
|
|
14
|
+
ended_at TIMESTAMPTZ,
|
|
15
|
+
summary TEXT,
|
|
16
|
+
metadata JSONB DEFAULT '{}'
|
|
17
|
+
);
|
|
18
|
+
|
|
19
|
+
CREATE INDEX IF NOT EXISTS sessions_agent_id_idx ON sessions(agent_id);
|
|
20
|
+
CREATE INDEX IF NOT EXISTS sessions_user_id_idx ON sessions(user_id);
|
|
21
|
+
CREATE INDEX IF NOT EXISTS sessions_started_at_idx ON sessions(started_at DESC);
|
|
22
|
+
|
|
23
|
+
-- Messages: Every message in every session
|
|
24
|
+
CREATE TABLE IF NOT EXISTS messages (
|
|
25
|
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
26
|
+
session_id UUID REFERENCES sessions(id) ON DELETE CASCADE,
|
|
27
|
+
role TEXT NOT NULL CHECK (role IN ('user', 'assistant', 'system', 'tool')),
|
|
28
|
+
content TEXT NOT NULL,
|
|
29
|
+
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
30
|
+
token_count INT,
|
|
31
|
+
metadata JSONB DEFAULT '{}'
|
|
32
|
+
);
|
|
33
|
+
|
|
34
|
+
CREATE INDEX IF NOT EXISTS messages_session_id_idx ON messages(session_id);
|
|
35
|
+
CREATE INDEX IF NOT EXISTS messages_created_at_idx ON messages(created_at);
|
|
36
|
+
|
|
37
|
+
-- Memories: Long-term memories extracted from sessions
|
|
38
|
+
CREATE TABLE IF NOT EXISTS memories (
|
|
39
|
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
40
|
+
agent_id TEXT NOT NULL,
|
|
41
|
+
user_id TEXT,
|
|
42
|
+
category TEXT,
|
|
43
|
+
content TEXT NOT NULL,
|
|
44
|
+
importance FLOAT DEFAULT 0.5 CHECK (importance >= 0 AND importance <= 1),
|
|
45
|
+
source_session_id UUID REFERENCES sessions(id),
|
|
46
|
+
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
47
|
+
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
|
48
|
+
expires_at TIMESTAMPTZ,
|
|
49
|
+
embedding VECTOR(1536),
|
|
50
|
+
metadata JSONB DEFAULT '{}'
|
|
51
|
+
);
|
|
52
|
+
|
|
53
|
+
CREATE INDEX IF NOT EXISTS memories_agent_id_idx ON memories(agent_id);
|
|
54
|
+
CREATE INDEX IF NOT EXISTS memories_user_id_idx ON memories(user_id);
|
|
55
|
+
CREATE INDEX IF NOT EXISTS memories_category_idx ON memories(category);
|
|
56
|
+
CREATE INDEX IF NOT EXISTS memories_importance_idx ON memories(importance DESC);
|
|
57
|
+
CREATE INDEX IF NOT EXISTS memories_created_at_idx ON memories(created_at DESC);
|
|
58
|
+
|
|
59
|
+
-- Vector similarity index (for semantic search)
|
|
60
|
+
CREATE INDEX IF NOT EXISTS memories_embedding_idx ON memories
|
|
61
|
+
USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
|
|
62
|
+
|
|
63
|
+
-- Entities: People, places, things the agent knows about
|
|
64
|
+
CREATE TABLE IF NOT EXISTS entities (
|
|
65
|
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
66
|
+
agent_id TEXT NOT NULL,
|
|
67
|
+
entity_type TEXT NOT NULL,
|
|
68
|
+
name TEXT NOT NULL,
|
|
69
|
+
aliases TEXT[],
|
|
70
|
+
description TEXT,
|
|
71
|
+
properties JSONB DEFAULT '{}',
|
|
72
|
+
first_seen_at TIMESTAMPTZ DEFAULT NOW(),
|
|
73
|
+
last_seen_at TIMESTAMPTZ DEFAULT NOW(),
|
|
74
|
+
mention_count INT DEFAULT 1,
|
|
75
|
+
embedding VECTOR(1536),
|
|
76
|
+
|
|
77
|
+
UNIQUE(agent_id, entity_type, name)
|
|
78
|
+
);
|
|
79
|
+
|
|
80
|
+
CREATE INDEX IF NOT EXISTS entities_agent_id_idx ON entities(agent_id);
|
|
81
|
+
CREATE INDEX IF NOT EXISTS entities_type_idx ON entities(entity_type);
|
|
82
|
+
CREATE INDEX IF NOT EXISTS entities_name_idx ON entities(name);
|
|
83
|
+
|
|
84
|
+
-- Tasks: Persistent task tracking
|
|
85
|
+
CREATE TABLE IF NOT EXISTS tasks (
|
|
86
|
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
87
|
+
agent_id TEXT NOT NULL,
|
|
88
|
+
user_id TEXT,
|
|
89
|
+
title TEXT NOT NULL,
|
|
90
|
+
description TEXT,
|
|
91
|
+
status TEXT DEFAULT 'pending' CHECK (status IN ('pending', 'in_progress', 'blocked', 'done')),
|
|
92
|
+
priority INT DEFAULT 0,
|
|
93
|
+
due_at TIMESTAMPTZ,
|
|
94
|
+
completed_at TIMESTAMPTZ,
|
|
95
|
+
source_session_id UUID REFERENCES sessions(id),
|
|
96
|
+
parent_task_id UUID REFERENCES tasks(id),
|
|
97
|
+
metadata JSONB DEFAULT '{}',
|
|
98
|
+
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
99
|
+
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
100
|
+
);
|
|
101
|
+
|
|
102
|
+
CREATE INDEX IF NOT EXISTS tasks_agent_id_idx ON tasks(agent_id);
|
|
103
|
+
CREATE INDEX IF NOT EXISTS tasks_user_id_idx ON tasks(user_id);
|
|
104
|
+
CREATE INDEX IF NOT EXISTS tasks_status_idx ON tasks(status);
|
|
105
|
+
CREATE INDEX IF NOT EXISTS tasks_priority_idx ON tasks(priority DESC);
|
|
106
|
+
CREATE INDEX IF NOT EXISTS tasks_due_at_idx ON tasks(due_at);
|
|
107
|
+
|
|
108
|
+
-- Learnings: Self-improvement records
|
|
109
|
+
CREATE TABLE IF NOT EXISTS learnings (
|
|
110
|
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
111
|
+
agent_id TEXT NOT NULL,
|
|
112
|
+
category TEXT NOT NULL CHECK (category IN ('error', 'correction', 'improvement', 'capability_gap')),
|
|
113
|
+
trigger TEXT NOT NULL,
|
|
114
|
+
lesson TEXT NOT NULL,
|
|
115
|
+
action TEXT,
|
|
116
|
+
severity TEXT DEFAULT 'info' CHECK (severity IN ('info', 'warning', 'critical')),
|
|
117
|
+
source_session_id UUID REFERENCES sessions(id),
|
|
118
|
+
applied_count INT DEFAULT 0,
|
|
119
|
+
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
120
|
+
metadata JSONB DEFAULT '{}'
|
|
121
|
+
);
|
|
122
|
+
|
|
123
|
+
CREATE INDEX IF NOT EXISTS learnings_agent_id_idx ON learnings(agent_id);
|
|
124
|
+
CREATE INDEX IF NOT EXISTS learnings_category_idx ON learnings(category);
|
|
125
|
+
CREATE INDEX IF NOT EXISTS learnings_severity_idx ON learnings(severity);
|
|
126
|
+
|
|
127
|
+
-- Function to update updated_at timestamp
|
|
128
|
+
CREATE OR REPLACE FUNCTION update_updated_at()
|
|
129
|
+
RETURNS TRIGGER AS $$
|
|
130
|
+
BEGIN
|
|
131
|
+
NEW.updated_at = NOW();
|
|
132
|
+
RETURN NEW;
|
|
133
|
+
END;
|
|
134
|
+
$$ LANGUAGE plpgsql;
|
|
135
|
+
|
|
136
|
+
-- Triggers for updated_at
|
|
137
|
+
CREATE TRIGGER memories_updated_at
|
|
138
|
+
BEFORE UPDATE ON memories
|
|
139
|
+
FOR EACH ROW
|
|
140
|
+
EXECUTE FUNCTION update_updated_at();
|
|
141
|
+
|
|
142
|
+
CREATE TRIGGER tasks_updated_at
|
|
143
|
+
BEFORE UPDATE ON tasks
|
|
144
|
+
FOR EACH ROW
|
|
145
|
+
EXECUTE FUNCTION update_updated_at();
|
|
146
|
+
|
|
147
|
+
-- Row Level Security (optional - enable if using Supabase auth)
|
|
148
|
+
-- ALTER TABLE sessions ENABLE ROW LEVEL SECURITY;
|
|
149
|
+
-- ALTER TABLE messages ENABLE ROW LEVEL SECURITY;
|
|
150
|
+
-- ALTER TABLE memories ENABLE ROW LEVEL SECURITY;
|
|
151
|
+
-- ALTER TABLE entities ENABLE ROW LEVEL SECURITY;
|
|
152
|
+
-- ALTER TABLE tasks ENABLE ROW LEVEL SECURITY;
|
|
153
|
+
-- ALTER TABLE learnings ENABLE ROW LEVEL SECURITY;
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
-- OpenClaw Memory - Vector Search Functions
|
|
2
|
+
-- Run this in your Supabase SQL editor after 001_initial.sql
|
|
3
|
+
|
|
4
|
+
-- Function: Vector similarity search for memories
|
|
5
|
+
-- Returns memories ranked by cosine similarity to the query embedding
|
|
6
|
+
CREATE OR REPLACE FUNCTION match_memories(
|
|
7
|
+
query_embedding VECTOR(1536),
|
|
8
|
+
match_threshold FLOAT DEFAULT 0.7,
|
|
9
|
+
match_count INT DEFAULT 10,
|
|
10
|
+
p_agent_id TEXT DEFAULT NULL,
|
|
11
|
+
p_user_id TEXT DEFAULT NULL,
|
|
12
|
+
p_category TEXT DEFAULT NULL,
|
|
13
|
+
p_min_importance FLOAT DEFAULT NULL
|
|
14
|
+
)
|
|
15
|
+
RETURNS TABLE (
|
|
16
|
+
id UUID,
|
|
17
|
+
agent_id TEXT,
|
|
18
|
+
user_id TEXT,
|
|
19
|
+
category TEXT,
|
|
20
|
+
content TEXT,
|
|
21
|
+
importance FLOAT,
|
|
22
|
+
source_session_id UUID,
|
|
23
|
+
created_at TIMESTAMPTZ,
|
|
24
|
+
updated_at TIMESTAMPTZ,
|
|
25
|
+
expires_at TIMESTAMPTZ,
|
|
26
|
+
embedding VECTOR(1536),
|
|
27
|
+
metadata JSONB,
|
|
28
|
+
similarity FLOAT
|
|
29
|
+
)
|
|
30
|
+
LANGUAGE plpgsql
|
|
31
|
+
AS $$
|
|
32
|
+
BEGIN
|
|
33
|
+
RETURN QUERY
|
|
34
|
+
SELECT
|
|
35
|
+
m.id,
|
|
36
|
+
m.agent_id,
|
|
37
|
+
m.user_id,
|
|
38
|
+
m.category,
|
|
39
|
+
m.content,
|
|
40
|
+
m.importance,
|
|
41
|
+
m.source_session_id,
|
|
42
|
+
m.created_at,
|
|
43
|
+
m.updated_at,
|
|
44
|
+
m.expires_at,
|
|
45
|
+
m.embedding,
|
|
46
|
+
m.metadata,
|
|
47
|
+
1 - (m.embedding <=> query_embedding) AS similarity
|
|
48
|
+
FROM memories m
|
|
49
|
+
WHERE
|
|
50
|
+
(p_agent_id IS NULL OR m.agent_id = p_agent_id)
|
|
51
|
+
AND (p_user_id IS NULL OR m.user_id = p_user_id OR m.user_id IS NULL)
|
|
52
|
+
AND (p_category IS NULL OR m.category = p_category)
|
|
53
|
+
AND (p_min_importance IS NULL OR m.importance >= p_min_importance)
|
|
54
|
+
AND (m.expires_at IS NULL OR m.expires_at > NOW())
|
|
55
|
+
AND m.embedding IS NOT NULL
|
|
56
|
+
AND 1 - (m.embedding <=> query_embedding) > match_threshold
|
|
57
|
+
ORDER BY m.embedding <=> query_embedding
|
|
58
|
+
LIMIT match_count;
|
|
59
|
+
END;
|
|
60
|
+
$$;
|
|
61
|
+
|
|
62
|
+
-- Function: Hybrid search combining vector similarity and keyword matching
|
|
63
|
+
-- Returns memories ranked by weighted combination of semantic similarity and keyword relevance
|
|
64
|
+
CREATE OR REPLACE FUNCTION hybrid_search_memories(
|
|
65
|
+
query_embedding VECTOR(1536),
|
|
66
|
+
query_text TEXT,
|
|
67
|
+
vector_weight FLOAT DEFAULT 0.7,
|
|
68
|
+
keyword_weight FLOAT DEFAULT 0.3,
|
|
69
|
+
match_count INT DEFAULT 10,
|
|
70
|
+
p_agent_id TEXT DEFAULT NULL,
|
|
71
|
+
p_user_id TEXT DEFAULT NULL,
|
|
72
|
+
p_category TEXT DEFAULT NULL,
|
|
73
|
+
p_min_importance FLOAT DEFAULT NULL
|
|
74
|
+
)
|
|
75
|
+
RETURNS TABLE (
|
|
76
|
+
id UUID,
|
|
77
|
+
agent_id TEXT,
|
|
78
|
+
user_id TEXT,
|
|
79
|
+
category TEXT,
|
|
80
|
+
content TEXT,
|
|
81
|
+
importance FLOAT,
|
|
82
|
+
source_session_id UUID,
|
|
83
|
+
created_at TIMESTAMPTZ,
|
|
84
|
+
updated_at TIMESTAMPTZ,
|
|
85
|
+
expires_at TIMESTAMPTZ,
|
|
86
|
+
embedding VECTOR(1536),
|
|
87
|
+
metadata JSONB,
|
|
88
|
+
score FLOAT
|
|
89
|
+
)
|
|
90
|
+
LANGUAGE plpgsql
|
|
91
|
+
AS $$
|
|
92
|
+
BEGIN
|
|
93
|
+
RETURN QUERY
|
|
94
|
+
WITH vector_scores AS (
|
|
95
|
+
SELECT
|
|
96
|
+
m.id,
|
|
97
|
+
1 - (m.embedding <=> query_embedding) AS vector_similarity
|
|
98
|
+
FROM memories m
|
|
99
|
+
WHERE
|
|
100
|
+
(p_agent_id IS NULL OR m.agent_id = p_agent_id)
|
|
101
|
+
AND (p_user_id IS NULL OR m.user_id = p_user_id OR m.user_id IS NULL)
|
|
102
|
+
AND (p_category IS NULL OR m.category = p_category)
|
|
103
|
+
AND (p_min_importance IS NULL OR m.importance >= p_min_importance)
|
|
104
|
+
AND (m.expires_at IS NULL OR m.expires_at > NOW())
|
|
105
|
+
AND m.embedding IS NOT NULL
|
|
106
|
+
),
|
|
107
|
+
keyword_scores AS (
|
|
108
|
+
SELECT
|
|
109
|
+
m.id,
|
|
110
|
+
ts_rank(to_tsvector('english', m.content), plainto_tsquery('english', query_text)) AS keyword_relevance
|
|
111
|
+
FROM memories m
|
|
112
|
+
WHERE
|
|
113
|
+
(p_agent_id IS NULL OR m.agent_id = p_agent_id)
|
|
114
|
+
AND (p_user_id IS NULL OR m.user_id = p_user_id OR m.user_id IS NULL)
|
|
115
|
+
AND (p_category IS NULL OR m.category = p_category)
|
|
116
|
+
AND (p_min_importance IS NULL OR m.importance >= p_min_importance)
|
|
117
|
+
AND (m.expires_at IS NULL OR m.expires_at > NOW())
|
|
118
|
+
AND to_tsvector('english', m.content) @@ plainto_tsquery('english', query_text)
|
|
119
|
+
),
|
|
120
|
+
combined_scores AS (
|
|
121
|
+
SELECT
|
|
122
|
+
COALESCE(v.id, k.id) AS memory_id,
|
|
123
|
+
(COALESCE(v.vector_similarity, 0) * vector_weight +
|
|
124
|
+
COALESCE(k.keyword_relevance, 0) * keyword_weight) AS combined_score
|
|
125
|
+
FROM vector_scores v
|
|
126
|
+
FULL OUTER JOIN keyword_scores k ON v.id = k.id
|
|
127
|
+
)
|
|
128
|
+
SELECT
|
|
129
|
+
m.id,
|
|
130
|
+
m.agent_id,
|
|
131
|
+
m.user_id,
|
|
132
|
+
m.category,
|
|
133
|
+
m.content,
|
|
134
|
+
m.importance,
|
|
135
|
+
m.source_session_id,
|
|
136
|
+
m.created_at,
|
|
137
|
+
m.updated_at,
|
|
138
|
+
m.expires_at,
|
|
139
|
+
m.embedding,
|
|
140
|
+
m.metadata,
|
|
141
|
+
cs.combined_score AS score
|
|
142
|
+
FROM memories m
|
|
143
|
+
JOIN combined_scores cs ON m.id = cs.memory_id
|
|
144
|
+
ORDER BY cs.combined_score DESC
|
|
145
|
+
LIMIT match_count;
|
|
146
|
+
END;
|
|
147
|
+
$$;
|
|
148
|
+
|
|
149
|
+
-- Function: Find similar memories based on an existing memory
|
|
150
|
+
-- Useful for finding related context or detecting duplicates
|
|
151
|
+
CREATE OR REPLACE FUNCTION find_similar_memories(
|
|
152
|
+
memory_id UUID,
|
|
153
|
+
match_threshold FLOAT DEFAULT 0.8,
|
|
154
|
+
match_count INT DEFAULT 5
|
|
155
|
+
)
|
|
156
|
+
RETURNS TABLE (
|
|
157
|
+
id UUID,
|
|
158
|
+
agent_id TEXT,
|
|
159
|
+
user_id TEXT,
|
|
160
|
+
category TEXT,
|
|
161
|
+
content TEXT,
|
|
162
|
+
importance FLOAT,
|
|
163
|
+
source_session_id UUID,
|
|
164
|
+
created_at TIMESTAMPTZ,
|
|
165
|
+
updated_at TIMESTAMPTZ,
|
|
166
|
+
expires_at TIMESTAMPTZ,
|
|
167
|
+
embedding VECTOR(1536),
|
|
168
|
+
metadata JSONB,
|
|
169
|
+
similarity FLOAT
|
|
170
|
+
)
|
|
171
|
+
LANGUAGE plpgsql
|
|
172
|
+
AS $$
|
|
173
|
+
DECLARE
|
|
174
|
+
source_embedding VECTOR(1536);
|
|
175
|
+
source_agent_id TEXT;
|
|
176
|
+
BEGIN
|
|
177
|
+
-- Get the embedding of the source memory
|
|
178
|
+
SELECT m.embedding, m.agent_id INTO source_embedding, source_agent_id
|
|
179
|
+
FROM memories m
|
|
180
|
+
WHERE m.id = memory_id;
|
|
181
|
+
|
|
182
|
+
IF source_embedding IS NULL THEN
|
|
183
|
+
RAISE EXCEPTION 'Memory not found or has no embedding';
|
|
184
|
+
END IF;
|
|
185
|
+
|
|
186
|
+
RETURN QUERY
|
|
187
|
+
SELECT
|
|
188
|
+
m.id,
|
|
189
|
+
m.agent_id,
|
|
190
|
+
m.user_id,
|
|
191
|
+
m.category,
|
|
192
|
+
m.content,
|
|
193
|
+
m.importance,
|
|
194
|
+
m.source_session_id,
|
|
195
|
+
m.created_at,
|
|
196
|
+
m.updated_at,
|
|
197
|
+
m.expires_at,
|
|
198
|
+
m.embedding,
|
|
199
|
+
m.metadata,
|
|
200
|
+
1 - (m.embedding <=> source_embedding) AS similarity
|
|
201
|
+
FROM memories m
|
|
202
|
+
WHERE
|
|
203
|
+
m.id != memory_id
|
|
204
|
+
AND m.agent_id = source_agent_id
|
|
205
|
+
AND m.embedding IS NOT NULL
|
|
206
|
+
AND 1 - (m.embedding <=> source_embedding) > match_threshold
|
|
207
|
+
ORDER BY m.embedding <=> source_embedding
|
|
208
|
+
LIMIT match_count;
|
|
209
|
+
END;
|
|
210
|
+
$$;
|
|
211
|
+
|
|
212
|
+
-- Create full-text search index for keyword search optimization
|
|
213
|
+
CREATE INDEX IF NOT EXISTS memories_content_fts_idx ON memories
|
|
214
|
+
USING GIN (to_tsvector('english', content));
|
|
215
|
+
|
|
216
|
+
-- Comments for documentation
|
|
217
|
+
COMMENT ON FUNCTION match_memories IS 'Performs semantic search on memories using vector similarity (cosine distance)';
|
|
218
|
+
COMMENT ON FUNCTION hybrid_search_memories IS 'Combines vector similarity and keyword matching with configurable weights';
|
|
219
|
+
COMMENT ON FUNCTION find_similar_memories IS 'Finds memories similar to a given memory, useful for deduplication and context expansion';
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
-- Migration 003: Entity Relationships
|
|
2
|
+
-- Tracks connections between entities (e.g., "Han works_at MetalBear")
|
|
3
|
+
|
|
4
|
+
-- Create entity_relationships table
|
|
5
|
+
CREATE TABLE IF NOT EXISTS entity_relationships (
|
|
6
|
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
7
|
+
agent_id TEXT NOT NULL,
|
|
8
|
+
source_entity_id UUID REFERENCES entities(id) ON DELETE CASCADE,
|
|
9
|
+
target_entity_id UUID REFERENCES entities(id) ON DELETE CASCADE,
|
|
10
|
+
relationship_type TEXT NOT NULL, -- 'works_at', 'knows', 'created', 'located_in', etc.
|
|
11
|
+
properties JSONB DEFAULT '{}', -- Additional context (since, strength, etc.)
|
|
12
|
+
first_seen_at TIMESTAMPTZ DEFAULT NOW(),
|
|
13
|
+
last_seen_at TIMESTAMPTZ DEFAULT NOW(),
|
|
14
|
+
mention_count INT DEFAULT 1,
|
|
15
|
+
confidence FLOAT DEFAULT 0.5, -- 0-1 confidence in this relationship
|
|
16
|
+
source_session_id UUID REFERENCES sessions(id),
|
|
17
|
+
metadata JSONB DEFAULT '{}'
|
|
18
|
+
);
|
|
19
|
+
|
|
20
|
+
-- Indexes for efficient lookups
|
|
21
|
+
CREATE INDEX IF NOT EXISTS entity_relationships_source_idx
|
|
22
|
+
ON entity_relationships(source_entity_id);
|
|
23
|
+
|
|
24
|
+
CREATE INDEX IF NOT EXISTS entity_relationships_target_idx
|
|
25
|
+
ON entity_relationships(target_entity_id);
|
|
26
|
+
|
|
27
|
+
CREATE INDEX IF NOT EXISTS entity_relationships_type_idx
|
|
28
|
+
ON entity_relationships(relationship_type);
|
|
29
|
+
|
|
30
|
+
-- Prevent duplicate relationships
|
|
31
|
+
CREATE UNIQUE INDEX IF NOT EXISTS entity_relationships_unique_idx
|
|
32
|
+
ON entity_relationships(agent_id, source_entity_id, target_entity_id, relationship_type);
|
|
33
|
+
|
|
34
|
+
-- Function to increment relationship mention count
|
|
35
|
+
CREATE OR REPLACE FUNCTION increment_relationship_mentions(rel_id UUID)
|
|
36
|
+
RETURNS entity_relationships AS $$
|
|
37
|
+
DECLARE
|
|
38
|
+
result entity_relationships;
|
|
39
|
+
BEGIN
|
|
40
|
+
UPDATE entity_relationships
|
|
41
|
+
SET mention_count = mention_count + 1,
|
|
42
|
+
last_seen_at = NOW()
|
|
43
|
+
WHERE id = rel_id
|
|
44
|
+
RETURNING * INTO result;
|
|
45
|
+
|
|
46
|
+
RETURN result;
|
|
47
|
+
END;
|
|
48
|
+
$$ LANGUAGE plpgsql;
|
|
49
|
+
|
|
50
|
+
-- Function to find related entities (graph traversal)
|
|
51
|
+
CREATE OR REPLACE FUNCTION find_related_entities(
|
|
52
|
+
entity_id UUID,
|
|
53
|
+
max_depth INT DEFAULT 2,
|
|
54
|
+
min_confidence FLOAT DEFAULT 0.5
|
|
55
|
+
)
|
|
56
|
+
RETURNS TABLE (
|
|
57
|
+
entity_id UUID,
|
|
58
|
+
entity_name TEXT,
|
|
59
|
+
entity_type TEXT,
|
|
60
|
+
relationship_path TEXT[],
|
|
61
|
+
total_confidence FLOAT,
|
|
62
|
+
depth INT
|
|
63
|
+
) AS $$
|
|
64
|
+
WITH RECURSIVE entity_graph AS (
|
|
65
|
+
-- Base case: direct relationships
|
|
66
|
+
SELECT
|
|
67
|
+
e.id as entity_id,
|
|
68
|
+
e.name as entity_name,
|
|
69
|
+
e.entity_type,
|
|
70
|
+
ARRAY[r.relationship_type] as relationship_path,
|
|
71
|
+
r.confidence as total_confidence,
|
|
72
|
+
1 as depth
|
|
73
|
+
FROM entity_relationships r
|
|
74
|
+
JOIN entities e ON e.id = r.target_entity_id
|
|
75
|
+
WHERE r.source_entity_id = entity_id
|
|
76
|
+
AND r.confidence >= min_confidence
|
|
77
|
+
|
|
78
|
+
UNION
|
|
79
|
+
|
|
80
|
+
-- Recursive case: indirect relationships
|
|
81
|
+
SELECT
|
|
82
|
+
e.id,
|
|
83
|
+
e.name,
|
|
84
|
+
e.entity_type,
|
|
85
|
+
eg.relationship_path || r.relationship_type,
|
|
86
|
+
eg.total_confidence * r.confidence,
|
|
87
|
+
eg.depth + 1
|
|
88
|
+
FROM entity_graph eg
|
|
89
|
+
JOIN entity_relationships r ON r.source_entity_id = eg.entity_id
|
|
90
|
+
JOIN entities e ON e.id = r.target_entity_id
|
|
91
|
+
WHERE eg.depth < max_depth
|
|
92
|
+
AND r.confidence >= min_confidence
|
|
93
|
+
AND NOT e.id = ANY(SELECT unnest(eg.relationship_path::UUID[])) -- Prevent cycles
|
|
94
|
+
)
|
|
95
|
+
SELECT * FROM entity_graph
|
|
96
|
+
ORDER BY total_confidence DESC, depth ASC;
|
|
97
|
+
$$ LANGUAGE sql;
|
|
98
|
+
|
|
99
|
+
-- Function to get entity network stats
|
|
100
|
+
CREATE OR REPLACE FUNCTION get_entity_network_stats(agent TEXT)
|
|
101
|
+
RETURNS TABLE (
|
|
102
|
+
total_entities BIGINT,
|
|
103
|
+
total_relationships BIGINT,
|
|
104
|
+
avg_connections_per_entity FLOAT,
|
|
105
|
+
most_connected_entity_id UUID,
|
|
106
|
+
most_connected_entity_name TEXT,
|
|
107
|
+
connection_count BIGINT
|
|
108
|
+
) AS $$
|
|
109
|
+
WITH entity_connections AS (
|
|
110
|
+
SELECT
|
|
111
|
+
source_entity_id as entity_id,
|
|
112
|
+
COUNT(*) as outgoing_count
|
|
113
|
+
FROM entity_relationships
|
|
114
|
+
WHERE agent_id = agent
|
|
115
|
+
GROUP BY source_entity_id
|
|
116
|
+
),
|
|
117
|
+
most_connected AS (
|
|
118
|
+
SELECT
|
|
119
|
+
ec.entity_id,
|
|
120
|
+
e.name,
|
|
121
|
+
ec.outgoing_count
|
|
122
|
+
FROM entity_connections ec
|
|
123
|
+
JOIN entities e ON e.id = ec.entity_id
|
|
124
|
+
ORDER BY ec.outgoing_count DESC
|
|
125
|
+
LIMIT 1
|
|
126
|
+
)
|
|
127
|
+
SELECT
|
|
128
|
+
(SELECT COUNT(*) FROM entities WHERE agent_id = agent)::BIGINT as total_entities,
|
|
129
|
+
(SELECT COUNT(*) FROM entity_relationships WHERE agent_id = agent)::BIGINT as total_relationships,
|
|
130
|
+
(SELECT AVG(outgoing_count) FROM entity_connections)::FLOAT as avg_connections_per_entity,
|
|
131
|
+
mc.entity_id as most_connected_entity_id,
|
|
132
|
+
mc.name as most_connected_entity_name,
|
|
133
|
+
mc.outgoing_count::BIGINT as connection_count
|
|
134
|
+
FROM most_connected mc;
|
|
135
|
+
$$ LANGUAGE sql;
|
|
136
|
+
|
|
137
|
+
-- Comments for documentation
|
|
138
|
+
COMMENT ON TABLE entity_relationships IS 'Tracks relationships between entities (people, places, things)';
|
|
139
|
+
COMMENT ON COLUMN entity_relationships.relationship_type IS 'Type of relationship: works_at, knows, created, located_in, etc.';
|
|
140
|
+
COMMENT ON COLUMN entity_relationships.confidence IS 'Confidence score (0-1) in this relationship';
|
|
141
|
+
COMMENT ON FUNCTION increment_relationship_mentions IS 'Increments mention count and updates last_seen_at for a relationship';
|
|
142
|
+
COMMENT ON FUNCTION find_related_entities IS 'Graph traversal to find entities connected through relationships';
|
|
143
|
+
COMMENT ON FUNCTION get_entity_network_stats IS 'Statistics about the entity relationship network';
|