episodicdb 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
episodicdb/__init__.py ADDED
@@ -0,0 +1,8 @@
1
+ from episodicdb.db import EpisodicDB
2
+
3
+
4
+ class EpisodicDBError(Exception):
5
+ """Base exception for EpisodicDB."""
6
+
7
+
8
+ __all__ = ["EpisodicDB", "EpisodicDBError"]
@@ -0,0 +1,220 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Literal
4
+
5
+ from episodicdb.schema import EMBEDDING_DIM
6
+
7
+
8
+ class AnalyticsMixin:
9
+ """Mixin providing analytics methods. Requires self._conn and self.agent_id."""
10
+
11
+ def top_failing_tools(
12
+ self,
13
+ days: int = 7,
14
+ limit: int = 5,
15
+ ) -> list[dict]:
16
+ rows = self._conn.execute(
17
+ """
18
+ SELECT tc.tool_name, COUNT(*) AS failures
19
+ FROM tool_calls tc
20
+ JOIN episodes e ON e.id = tc.episode_id
21
+ WHERE tc.outcome = 'failure'
22
+ AND e.agent_id = $1
23
+ AND tc.called_at >= NOW() - INTERVAL (CAST($2 AS VARCHAR) || ' days')
24
+ GROUP BY tc.tool_name
25
+ ORDER BY failures DESC
26
+ LIMIT $3
27
+ """,
28
+ [self.agent_id, days, limit],
29
+ ).fetchall()
30
+ return [{"tool_name": r[0], "failures": r[1]} for r in rows]
31
+
32
+ def never_succeeded_tools(self) -> list[str]:
33
+ rows = self._conn.execute(
34
+ """
35
+ SELECT DISTINCT tc.tool_name
36
+ FROM tool_calls tc
37
+ JOIN episodes e ON e.id = tc.episode_id
38
+ WHERE e.agent_id = $1
39
+ AND tc.tool_name NOT IN (
40
+ SELECT DISTINCT tc2.tool_name
41
+ FROM tool_calls tc2
42
+ JOIN episodes e2 ON e2.id = tc2.episode_id
43
+ WHERE e2.agent_id = $1
44
+ AND tc2.outcome = 'success'
45
+ )
46
+ ORDER BY tc.tool_name
47
+ """,
48
+ [self.agent_id],
49
+ ).fetchall()
50
+ return [r[0] for r in rows]
51
+
52
+ def hourly_failure_rate(
53
+ self,
54
+ days: int = 7,
55
+ ) -> list[dict]:
56
+ rows = self._conn.execute(
57
+ """
58
+ SELECT
59
+ hour(tc.called_at) AS hour,
60
+ COUNT(*) AS total,
61
+ SUM(CASE WHEN tc.outcome = 'failure' THEN 1 ELSE 0 END) AS failures
62
+ FROM tool_calls tc
63
+ JOIN episodes e ON e.id = tc.episode_id
64
+ WHERE e.agent_id = $1
65
+ AND tc.called_at >= NOW() - INTERVAL (CAST($2 AS VARCHAR) || ' days')
66
+ GROUP BY hour(tc.called_at)
67
+ ORDER BY hour(tc.called_at)
68
+ """,
69
+ [self.agent_id, days],
70
+ ).fetchall()
71
+ return [{"hour": r[0], "total": r[1], "failures": r[2]} for r in rows]
72
+
73
+ def compare_periods(
74
+ self,
75
+ metric: Literal["failure_rate", "episode_count", "tool_calls"],
76
+ days: int = 7,
77
+ ) -> dict:
78
+ if metric == "failure_rate":
79
+ row = self._conn.execute(
80
+ """
81
+ SELECT
82
+ AVG(CASE WHEN tc.called_at >= NOW() - INTERVAL (CAST($2 AS VARCHAR) || ' days')
83
+ THEN CASE WHEN tc.outcome = 'failure' THEN 1.0 ELSE 0.0 END
84
+ ELSE NULL END) AS period_a,
85
+ AVG(CASE WHEN tc.called_at >= NOW() - INTERVAL (CAST($3 AS VARCHAR) || ' days')
86
+ AND tc.called_at < NOW() - INTERVAL (CAST($2 AS VARCHAR) || ' days')
87
+ THEN CASE WHEN tc.outcome = 'failure' THEN 1.0 ELSE 0.0 END
88
+ ELSE NULL END) AS period_b
89
+ FROM tool_calls tc
90
+ JOIN episodes e ON e.id = tc.episode_id
91
+ WHERE e.agent_id = $1
92
+ AND tc.called_at >= NOW() - INTERVAL (CAST($3 AS VARCHAR) || ' days')
93
+ """,
94
+ [self.agent_id, days, days * 2],
95
+ ).fetchone()
96
+ a = row[0] or 0.0
97
+ b = row[1] or 0.0
98
+ elif metric == "episode_count":
99
+ row = self._conn.execute(
100
+ """
101
+ SELECT
102
+ COUNT(*) FILTER (WHERE started_at >= NOW() - INTERVAL (CAST($2 AS VARCHAR) || ' days')) AS period_a,
103
+ COUNT(*) FILTER (WHERE started_at >= NOW() - INTERVAL (CAST($3 AS VARCHAR) || ' days')
104
+ AND started_at < NOW() - INTERVAL (CAST($2 AS VARCHAR) || ' days')) AS period_b
105
+ FROM episodes
106
+ WHERE agent_id = $1
107
+ AND started_at >= NOW() - INTERVAL (CAST($3 AS VARCHAR) || ' days')
108
+ """,
109
+ [self.agent_id, days, days * 2],
110
+ ).fetchone()
111
+ a = float(row[0] or 0)
112
+ b = float(row[1] or 0)
113
+ else: # tool_calls
114
+ row = self._conn.execute(
115
+ """
116
+ SELECT
117
+ COUNT(*) FILTER (WHERE tc.called_at >= NOW() - INTERVAL (CAST($2 AS VARCHAR) || ' days')) AS period_a,
118
+ COUNT(*) FILTER (WHERE tc.called_at >= NOW() - INTERVAL (CAST($3 AS VARCHAR) || ' days')
119
+ AND tc.called_at < NOW() - INTERVAL (CAST($2 AS VARCHAR) || ' days')) AS period_b
120
+ FROM tool_calls tc
121
+ JOIN episodes e ON e.id = tc.episode_id
122
+ WHERE e.agent_id = $1
123
+ AND tc.called_at >= NOW() - INTERVAL (CAST($3 AS VARCHAR) || ' days')
124
+ """,
125
+ [self.agent_id, days, days * 2],
126
+ ).fetchone()
127
+ a = float(row[0] or 0)
128
+ b = float(row[1] or 0)
129
+
130
+ return {"period_a": round(a, 4), "period_b": round(b, 4), "delta": round(a - b, 4)}
131
+
132
+ def before_failure_sequence(
133
+ self,
134
+ tool_name: str,
135
+ lookback: int = 3,
136
+ ) -> list[dict]:
137
+ """Aggregate tool calls immediately before tool_name failures."""
138
+ lag_cols = ", ".join(
139
+ f"LAG(tc.tool_name, {i}) OVER (ORDER BY tc.called_at) AS prev_{i}"
140
+ for i in range(1, lookback + 1)
141
+ )
142
+ prev_selects = " UNION ALL ".join(
143
+ f"SELECT prev_{i} AS prev_tool FROM failures WHERE prev_{i} IS NOT NULL"
144
+ for i in range(1, lookback + 1)
145
+ )
146
+ rows = self._conn.execute(
147
+ f"""
148
+ WITH sequenced AS (
149
+ SELECT
150
+ tc.tool_name,
151
+ tc.outcome,
152
+ {lag_cols}
153
+ FROM tool_calls tc
154
+ JOIN episodes e ON e.id = tc.episode_id
155
+ WHERE e.agent_id = $1
156
+ ),
157
+ failures AS (
158
+ SELECT * FROM sequenced
159
+ WHERE tool_name = $2 AND outcome = 'failure'
160
+ )
161
+ SELECT prev_tool, COUNT(*) AS cnt FROM (
162
+ {prev_selects}
163
+ )
164
+ GROUP BY prev_tool
165
+ ORDER BY cnt DESC
166
+ """,
167
+ [self.agent_id, tool_name],
168
+ ).fetchall()
169
+ return [{"prev_tool": r[0], "count": r[1]} for r in rows]
170
+
171
+ def similar_episodes(
172
+ self,
173
+ embedding: list[float],
174
+ status: str | None = None,
175
+ limit: int = 5,
176
+ ) -> list[dict]:
177
+ """SQL predicate + vector similarity in a single execution plan."""
178
+ if len(embedding) != EMBEDDING_DIM:
179
+ raise ValueError(f"Expected {EMBEDDING_DIM} dimensions, got {len(embedding)}")
180
+
181
+ params: list = [embedding, self.agent_id]
182
+ status_clause = ""
183
+ if status is not None:
184
+ status_clause = "AND status = $3"
185
+ params.append(status)
186
+ params.append(limit)
187
+ limit_param = f"${len(params)}"
188
+
189
+ rows = self._conn.execute(
190
+ f"""
191
+ SELECT
192
+ id::TEXT,
193
+ agent_id,
194
+ status,
195
+ task_type,
196
+ started_at,
197
+ ended_at,
198
+ array_cosine_distance(context_embedding, $1::FLOAT[{EMBEDDING_DIM}]) AS distance
199
+ FROM episodes
200
+ WHERE context_embedding IS NOT NULL
201
+ AND agent_id = $2
202
+ {status_clause}
203
+ ORDER BY distance ASC
204
+ LIMIT {limit_param}
205
+ """,
206
+ params,
207
+ ).fetchall()
208
+
209
+ return [
210
+ {
211
+ "id": r[0],
212
+ "agent_id": r[1],
213
+ "status": r[2],
214
+ "task_type": r[3],
215
+ "started_at": r[4],
216
+ "ended_at": r[5],
217
+ "distance": r[6],
218
+ }
219
+ for r in rows
220
+ ]
episodicdb/db.py ADDED
@@ -0,0 +1,80 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from pathlib import Path
5
+
6
+ import duckdb
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ from episodicdb.analytics import AnalyticsMixin
11
+ from episodicdb.temporal import TemporalMixin
12
+ from episodicdb.writer import WriterMixin
13
+ from episodicdb.schema import (
14
+ CALLED_AT_INDEX_DDL,
15
+ DECISIONS_DDL,
16
+ EPISODES_DDL,
17
+ FACTS_DDL,
18
+ FACTS_KEY_IDX_DDL,
19
+ HNSW_INDEX_DDL,
20
+ STARTED_AT_INDEX_DDL,
21
+ TOOL_CALLS_DDL,
22
+ )
23
+
24
+ _DEFAULT_DIR = Path.home() / ".episodicdb"
25
+
26
+
27
+ class EpisodicDB(WriterMixin, AnalyticsMixin, TemporalMixin):
28
+ def __init__(self, agent_id: str, path: str | None = None) -> None:
29
+ self.agent_id = agent_id
30
+
31
+ if path is None:
32
+ _DEFAULT_DIR.mkdir(parents=True, exist_ok=True)
33
+ resolved = str(_DEFAULT_DIR / f"{agent_id}.db")
34
+ else:
35
+ resolved = path
36
+
37
+ try:
38
+ self._conn = duckdb.connect(resolved)
39
+ except Exception as exc:
40
+ from episodicdb import EpisodicDBError
41
+ raise EpisodicDBError(f"Cannot open database: {resolved}") from exc
42
+
43
+ self._load_vss()
44
+ self._init_schema()
45
+
46
+ def _load_vss(self) -> None:
47
+ try:
48
+ self._conn.execute("LOAD vss")
49
+ except Exception:
50
+ try:
51
+ self._conn.execute("INSTALL vss; LOAD vss")
52
+ except Exception as exc:
53
+ from episodicdb import EpisodicDBError
54
+ raise EpisodicDBError("VSS extension unavailable") from exc
55
+
56
+ def _init_schema(self) -> None:
57
+ self._conn.execute(EPISODES_DDL)
58
+ self._conn.execute(TOOL_CALLS_DDL)
59
+ self._conn.execute(DECISIONS_DDL)
60
+ self._conn.execute(FACTS_DDL)
61
+ try:
62
+ self._conn.execute("SET hnsw_enable_experimental_persistence = true")
63
+ except Exception:
64
+ logger.warning(
65
+ "hnsw_enable_experimental_persistence not supported; "
66
+ "HNSW index will not persist across restarts"
67
+ )
68
+ self._conn.execute(HNSW_INDEX_DDL)
69
+ self._conn.execute(CALLED_AT_INDEX_DDL)
70
+ self._conn.execute(STARTED_AT_INDEX_DDL)
71
+ self._conn.execute(FACTS_KEY_IDX_DDL)
72
+
73
+ def close(self) -> None:
74
+ self._conn.close()
75
+
76
+ def __enter__(self) -> "EpisodicDB":
77
+ return self
78
+
79
+ def __exit__(self, *_) -> None:
80
+ self.close()
File without changes
@@ -0,0 +1,17 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+
5
+
6
+ def main() -> None:
7
+ parser = argparse.ArgumentParser(description="EpisodicDB MCP Server")
8
+ parser.add_argument("--agent-id", required=True, help="Default agent ID")
9
+ parser.add_argument("--db", default=None, help="DuckDB file path (default: ~/.episodicdb/{agent_id}.db)")
10
+ args = parser.parse_args()
11
+
12
+ from episodicdb.mcp.server import serve
13
+ serve(agent_id=args.agent_id, db_path=args.db)
14
+
15
+
16
+ if __name__ == "__main__":
17
+ main()
@@ -0,0 +1,235 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from contextlib import contextmanager
5
+ from datetime import datetime
6
+
7
+ from mcp.server.fastmcp import FastMCP
8
+
9
+ from episodicdb.db import EpisodicDB
10
+
11
+ _db: EpisodicDB | None = None
12
+ _default_agent_id: str = ""
13
+
14
+
15
+ def _get_db() -> EpisodicDB:
16
+ assert _db is not None, "Server not initialized"
17
+ return _db
18
+
19
+
20
+ @contextmanager
21
+ def _agent_scope(agent_id: str | None):
22
+ """Temporarily override the DB's agent_id, restoring it on exit."""
23
+ db = _get_db()
24
+ original = db.agent_id
25
+ db.agent_id = agent_id if agent_id is not None else _default_agent_id
26
+ try:
27
+ yield db
28
+ finally:
29
+ db.agent_id = original
30
+
31
+
32
+ def _serialize_timestamps(rows: list[dict], keys: list[str]) -> list[dict]:
33
+ """Convert datetime fields to ISO strings for JSON serialization."""
34
+ for r in rows:
35
+ for k in keys:
36
+ if r.get(k):
37
+ r[k] = r[k].isoformat()
38
+ return rows
39
+
40
+
41
+ mcp_server = FastMCP("episodicdb")
42
+
43
+
44
+ # --- Writer tools ---
45
+
46
+
47
+ @mcp_server.tool()
48
+ def record_episode(
49
+ status: str,
50
+ task_type: str | None = None,
51
+ context: dict | None = None,
52
+ embedding: list[float] | None = None,
53
+ tags: list[str] | None = None,
54
+ started_at: str | None = None,
55
+ ended_at: str | None = None,
56
+ agent_id: str | None = None,
57
+ ) -> str:
58
+ """Record an episode (task/session) for an agent."""
59
+ with _agent_scope(agent_id) as db:
60
+ return db.record_episode(
61
+ status=status,
62
+ task_type=task_type,
63
+ context=context,
64
+ embedding=embedding,
65
+ tags=tags,
66
+ started_at=datetime.fromisoformat(started_at) if started_at else None,
67
+ ended_at=datetime.fromisoformat(ended_at) if ended_at else None,
68
+ )
69
+
70
+
71
+ @mcp_server.tool()
72
+ def record_tool_call(
73
+ episode_id: str,
74
+ tool_name: str,
75
+ outcome: str,
76
+ parameters: dict | None = None,
77
+ result: dict | None = None,
78
+ duration_ms: int | None = None,
79
+ error_message: str | None = None,
80
+ called_at_override: str | None = None,
81
+ agent_id: str | None = None,
82
+ ) -> str:
83
+ """Record a tool call within an episode."""
84
+ with _agent_scope(agent_id) as db:
85
+ return db.record_tool_call(
86
+ episode_id=episode_id,
87
+ tool_name=tool_name,
88
+ outcome=outcome,
89
+ parameters=parameters,
90
+ result=result,
91
+ duration_ms=duration_ms,
92
+ error_message=error_message,
93
+ called_at_override=datetime.fromisoformat(called_at_override) if called_at_override else None,
94
+ )
95
+
96
+
97
+ @mcp_server.tool()
98
+ def record_decision(
99
+ episode_id: str,
100
+ rationale: str,
101
+ decision_type: str | None = None,
102
+ alternatives: list | None = None,
103
+ outcome: str | None = None,
104
+ agent_id: str | None = None,
105
+ ) -> str:
106
+ """Record a decision made during an episode."""
107
+ with _agent_scope(agent_id) as db:
108
+ return db.record_decision(
109
+ episode_id=episode_id,
110
+ rationale=rationale,
111
+ decision_type=decision_type,
112
+ alternatives=alternatives,
113
+ outcome=outcome,
114
+ )
115
+
116
+
117
+ @mcp_server.tool()
118
+ def record_fact(
119
+ key: str,
120
+ value: str,
121
+ episode_id: str | None = None,
122
+ valid_from: str | None = None,
123
+ agent_id: str | None = None,
124
+ ) -> str:
125
+ """Record a fact with automatic supersession of previous values."""
126
+ with _agent_scope(agent_id) as db:
127
+ return db.record_fact(
128
+ key=key,
129
+ value=value,
130
+ episode_id=episode_id,
131
+ valid_from=datetime.fromisoformat(valid_from) if valid_from else None,
132
+ )
133
+
134
+
135
+ # --- Analytics tools ---
136
+
137
+
138
+ @mcp_server.tool()
139
+ def top_failing_tools(
140
+ days: int = 7,
141
+ limit: int = 5,
142
+ agent_id: str | None = None,
143
+ ) -> str:
144
+ """Get tools with the most failures, ranked by failure count."""
145
+ with _agent_scope(agent_id) as db:
146
+ return json.dumps(db.top_failing_tools(days=days, limit=limit))
147
+
148
+
149
+ @mcp_server.tool()
150
+ def never_succeeded_tools(
151
+ agent_id: str | None = None,
152
+ ) -> str:
153
+ """List tools that have never had a successful outcome."""
154
+ with _agent_scope(agent_id) as db:
155
+ return json.dumps(db.never_succeeded_tools())
156
+
157
+
158
+ @mcp_server.tool()
159
+ def hourly_failure_rate(
160
+ days: int = 7,
161
+ agent_id: str | None = None,
162
+ ) -> str:
163
+ """Get failure counts grouped by hour of day."""
164
+ with _agent_scope(agent_id) as db:
165
+ return json.dumps(db.hourly_failure_rate(days=days))
166
+
167
+
168
+ @mcp_server.tool()
169
+ def compare_periods(
170
+ metric: str,
171
+ days: int = 7,
172
+ agent_id: str | None = None,
173
+ ) -> str:
174
+ """Compare a metric between two consecutive time periods."""
175
+ with _agent_scope(agent_id) as db:
176
+ return json.dumps(db.compare_periods(metric=metric, days=days))
177
+
178
+
179
+ @mcp_server.tool()
180
+ def before_failure_sequence(
181
+ tool_name: str,
182
+ lookback: int = 3,
183
+ agent_id: str | None = None,
184
+ ) -> str:
185
+ """Find which tools commonly precede failures of a given tool."""
186
+ with _agent_scope(agent_id) as db:
187
+ return json.dumps(db.before_failure_sequence(tool_name=tool_name, lookback=lookback))
188
+
189
+
190
+ @mcp_server.tool()
191
+ def similar_episodes(
192
+ embedding: list[float],
193
+ status: str | None = None,
194
+ limit: int = 5,
195
+ agent_id: str | None = None,
196
+ ) -> str:
197
+ """Find episodes most similar to a given embedding vector."""
198
+ with _agent_scope(agent_id) as db:
199
+ results = db.similar_episodes(embedding=embedding, status=status, limit=limit)
200
+ return json.dumps(_serialize_timestamps(results, ["started_at", "ended_at"]))
201
+
202
+
203
+ # --- Temporal tools ---
204
+
205
+
206
+ @mcp_server.tool()
207
+ def facts_as_of(
208
+ as_of: str,
209
+ agent_id: str | None = None,
210
+ ) -> str:
211
+ """Return all facts that were valid at a specific point in time."""
212
+ with _agent_scope(agent_id) as db:
213
+ results = db.facts_as_of(as_of=datetime.fromisoformat(as_of))
214
+ return json.dumps(_serialize_timestamps(results, ["valid_from", "valid_until"]))
215
+
216
+
217
+ @mcp_server.tool()
218
+ def fact_history(
219
+ key: str,
220
+ agent_id: str | None = None,
221
+ ) -> str:
222
+ """Return the full change history of a fact key."""
223
+ with _agent_scope(agent_id) as db:
224
+ results = db.fact_history(key=key)
225
+ return json.dumps(_serialize_timestamps(results, ["valid_from", "valid_until"]))
226
+
227
+
228
+ def serve(agent_id: str, db_path: str | None = None) -> None:
229
+ global _db, _default_agent_id
230
+ _default_agent_id = agent_id
231
+ _db = EpisodicDB(agent_id=agent_id, path=db_path)
232
+ try:
233
+ mcp_server.run(transport="stdio")
234
+ finally:
235
+ _db.close()
@@ -0,0 +1,75 @@
1
+ EMBEDDING_DIM = 1536
2
+
3
+ EPISODES_DDL = """
4
+ CREATE TABLE IF NOT EXISTS episodes (
5
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
6
+ agent_id TEXT NOT NULL,
7
+ started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
8
+ ended_at TIMESTAMPTZ,
9
+ status TEXT NOT NULL
10
+ CHECK (status IN ('success', 'failure', 'partial', 'aborted')),
11
+ task_type TEXT,
12
+ context JSON,
13
+ context_embedding FLOAT[1536],
14
+ tags TEXT[]
15
+ );
16
+ """
17
+
18
+ TOOL_CALLS_DDL = """
19
+ CREATE TABLE IF NOT EXISTS tool_calls (
20
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
21
+ episode_id UUID NOT NULL REFERENCES episodes(id),
22
+ called_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
23
+ tool_name TEXT NOT NULL,
24
+ parameters JSON,
25
+ result JSON,
26
+ outcome TEXT NOT NULL
27
+ CHECK (outcome IN ('success', 'failure', 'timeout', 'error')),
28
+ duration_ms INTEGER,
29
+ error_message TEXT
30
+ );
31
+ """
32
+
33
+ DECISIONS_DDL = """
34
+ CREATE TABLE IF NOT EXISTS decisions (
35
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
36
+ episode_id UUID NOT NULL REFERENCES episodes(id),
37
+ decided_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
38
+ decision_type TEXT,
39
+ rationale TEXT,
40
+ alternatives JSON,
41
+ outcome TEXT
42
+ );
43
+ """
44
+
45
+ FACTS_DDL = """
46
+ CREATE TABLE IF NOT EXISTS facts (
47
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
48
+ agent_id TEXT NOT NULL,
49
+ key TEXT NOT NULL,
50
+ value TEXT NOT NULL,
51
+ valid_from TIMESTAMPTZ NOT NULL DEFAULT NOW(),
52
+ valid_until TIMESTAMPTZ,
53
+ episode_id UUID REFERENCES episodes(id)
54
+ );
55
+ """
56
+
57
+ FACTS_KEY_IDX_DDL = """
58
+ CREATE INDEX IF NOT EXISTS facts_key_idx
59
+ ON facts (agent_id, key, valid_from);
60
+ """
61
+
62
+ HNSW_INDEX_DDL = """
63
+ CREATE INDEX IF NOT EXISTS episodes_embedding_idx
64
+ ON episodes USING HNSW (context_embedding);
65
+ """
66
+
67
+ CALLED_AT_INDEX_DDL = """
68
+ CREATE INDEX IF NOT EXISTS tool_calls_called_at_idx
69
+ ON tool_calls (called_at);
70
+ """
71
+
72
+ STARTED_AT_INDEX_DDL = """
73
+ CREATE INDEX IF NOT EXISTS episodes_started_at_idx
74
+ ON episodes (started_at);
75
+ """
episodicdb/temporal.py ADDED
@@ -0,0 +1,63 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime
4
+
5
+
6
+ class TemporalMixin:
7
+ """Mixin providing temporal fact queries. Requires self._conn and self.agent_id."""
8
+
9
+ def facts_as_of(
10
+ self,
11
+ as_of: datetime,
12
+ ) -> list[dict]:
13
+ """Return all facts that were valid at a specific point in time.
14
+
15
+ This is the temporal point-in-time query that DuckDB has no native
16
+ syntax for (no AS OF, no temporal tables). Every caller would need
17
+ to manually write the valid_from/valid_until predicate.
18
+ """
19
+ rows = self._conn.execute(
20
+ """
21
+ SELECT key, value, valid_from, valid_until, episode_id::TEXT
22
+ FROM facts
23
+ WHERE agent_id = $1
24
+ AND valid_from <= $2
25
+ AND (valid_until IS NULL OR valid_until > $2)
26
+ ORDER BY key
27
+ """,
28
+ [self.agent_id, as_of],
29
+ ).fetchall()
30
+ return [
31
+ {
32
+ "key": r[0],
33
+ "value": r[1],
34
+ "valid_from": r[2],
35
+ "valid_until": r[3],
36
+ "episode_id": r[4],
37
+ }
38
+ for r in rows
39
+ ]
40
+
41
+ def fact_history(
42
+ self,
43
+ key: str,
44
+ ) -> list[dict]:
45
+ """Return the full change history of a fact key, ordered chronologically."""
46
+ rows = self._conn.execute(
47
+ """
48
+ SELECT value, valid_from, valid_until, episode_id::TEXT
49
+ FROM facts
50
+ WHERE agent_id = $1 AND key = $2
51
+ ORDER BY valid_from ASC
52
+ """,
53
+ [self.agent_id, key],
54
+ ).fetchall()
55
+ return [
56
+ {
57
+ "value": r[0],
58
+ "valid_from": r[1],
59
+ "valid_until": r[2],
60
+ "episode_id": r[3],
61
+ }
62
+ for r in rows
63
+ ]
episodicdb/writer.py ADDED
@@ -0,0 +1,150 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from datetime import datetime
5
+ from typing import Literal
6
+
7
+ from episodicdb.schema import EMBEDDING_DIM
8
+
9
+
10
+ class WriterMixin:
11
+ """Mixin providing write methods. Requires self._conn and self.agent_id."""
12
+
13
+ def record_episode(
14
+ self,
15
+ status: Literal["success", "failure", "partial", "aborted"],
16
+ task_type: str | None = None,
17
+ context: dict | None = None,
18
+ embedding: list[float] | None = None,
19
+ tags: list[str] | None = None,
20
+ started_at: datetime | None = None,
21
+ ended_at: datetime | None = None,
22
+ ) -> str:
23
+ if embedding is not None and len(embedding) != EMBEDDING_DIM:
24
+ raise ValueError(
25
+ f"Expected {EMBEDDING_DIM} dimensions, got {len(embedding)}"
26
+ )
27
+
28
+ context_json = json.dumps(context) if context is not None else None
29
+
30
+ row = self._conn.execute(
31
+ """
32
+ INSERT INTO episodes
33
+ (agent_id, status, task_type, context, context_embedding, tags,
34
+ started_at, ended_at)
35
+ VALUES ($1, $2, $3, $4, $5, $6,
36
+ COALESCE($7, NOW()), $8)
37
+ RETURNING id::TEXT
38
+ """,
39
+ [
40
+ self.agent_id,
41
+ status,
42
+ task_type,
43
+ context_json,
44
+ embedding,
45
+ tags,
46
+ started_at,
47
+ ended_at,
48
+ ],
49
+ ).fetchone()
50
+ return row[0]
51
+
52
+ def record_tool_call(
53
+ self,
54
+ episode_id: str,
55
+ tool_name: str,
56
+ outcome: Literal["success", "failure", "timeout", "error"],
57
+ parameters: dict | None = None,
58
+ result: dict | None = None,
59
+ duration_ms: int | None = None,
60
+ error_message: str | None = None,
61
+ called_at_override: datetime | None = None,
62
+ ) -> str:
63
+ params_json = json.dumps(parameters) if parameters is not None else None
64
+ result_json = json.dumps(result) if result is not None else None
65
+
66
+ row = self._conn.execute(
67
+ """
68
+ INSERT INTO tool_calls
69
+ (episode_id, tool_name, outcome, parameters, result,
70
+ duration_ms, error_message, called_at)
71
+ VALUES ($1, $2, $3, $4, $5, $6, $7,
72
+ COALESCE($8, NOW()))
73
+ RETURNING id::TEXT
74
+ """,
75
+ [
76
+ episode_id,
77
+ tool_name,
78
+ outcome,
79
+ params_json,
80
+ result_json,
81
+ duration_ms,
82
+ error_message,
83
+ called_at_override,
84
+ ],
85
+ ).fetchone()
86
+ return row[0]
87
+
88
+ def record_decision(
89
+ self,
90
+ episode_id: str,
91
+ rationale: str,
92
+ decision_type: str | None = None,
93
+ alternatives: list | None = None,
94
+ outcome: str | None = None,
95
+ ) -> str:
96
+ alts_json = json.dumps(alternatives) if alternatives is not None else None
97
+
98
+ row = self._conn.execute(
99
+ """
100
+ INSERT INTO decisions
101
+ (episode_id, rationale, decision_type, alternatives, outcome)
102
+ VALUES ($1, $2, $3, $4, $5)
103
+ RETURNING id::TEXT
104
+ """,
105
+ [episode_id, rationale, decision_type, alts_json, outcome],
106
+ ).fetchone()
107
+ return row[0]
108
+
109
+ def record_fact(
110
+ self,
111
+ key: str,
112
+ value: str,
113
+ episode_id: str | None = None,
114
+ valid_from: datetime | None = None,
115
+ ) -> str:
116
+ """Record a fact with automatic supersession.
117
+
118
+ If a fact with the same key already exists (for this agent) and has no
119
+ valid_until, its validity window is closed at the new fact's valid_from.
120
+ This is the temporal supersession pattern that DuckDB cannot enforce
121
+ natively (no triggers, no temporal constraints).
122
+ """
123
+ self._conn.execute("BEGIN TRANSACTION")
124
+ try:
125
+ # Close the currently-active fact for this key (auto-supersession)
126
+ self._conn.execute(
127
+ """
128
+ UPDATE facts
129
+ SET valid_until = COALESCE($3, NOW())
130
+ WHERE agent_id = $1
131
+ AND key = $2
132
+ AND valid_until IS NULL
133
+ """,
134
+ [self.agent_id, key, valid_from],
135
+ )
136
+
137
+ row = self._conn.execute(
138
+ """
139
+ INSERT INTO facts
140
+ (agent_id, key, value, valid_from, episode_id)
141
+ VALUES ($1, $2, $3, COALESCE($4, NOW()), $5)
142
+ RETURNING id::TEXT
143
+ """,
144
+ [self.agent_id, key, value, valid_from, episode_id],
145
+ ).fetchone()
146
+ self._conn.execute("COMMIT")
147
+ except Exception:
148
+ self._conn.execute("ROLLBACK")
149
+ raise
150
+ return row[0]
@@ -0,0 +1,260 @@
1
+ Metadata-Version: 2.4
2
+ Name: episodicdb
3
+ Version: 0.1.0
4
+ Summary: OLAP-based memory engine for AI agents
5
+ Project-URL: Homepage, https://github.com/KsPsD/EpisodicDB
6
+ Project-URL: Repository, https://github.com/KsPsD/EpisodicDB
7
+ Project-URL: Issues, https://github.com/KsPsD/EpisodicDB/issues
8
+ Author: KsPsD
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: agents,ai,duckdb,mcp,memory,olap,vector
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Database
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Requires-Python: >=3.11
22
+ Requires-Dist: duckdb>=1.2.0
23
+ Requires-Dist: mcp[cli]>=1.0.0
24
+ Provides-Extra: dev
25
+ Requires-Dist: pytest>=8.0; extra == 'dev'
26
+ Description-Content-Type: text/markdown
27
+
28
+ # EpisodicDB
29
+
30
+ **An OLAP-based memory engine for AI agents.**
31
+
32
+ Existing agent memory systems (Mem0, Zep, Letta) are designed as *search systems*. EpisodicDB treats agent memory as an *analytics problem* — aggregation, time-series patterns, causal tracing, temporal facts, and vector similarity all in a single query engine.
33
+
34
+ ```sql
35
+ -- "Which tools failed most this week?"
36
+ SELECT tool_name, COUNT(*) AS failures
37
+ FROM tool_calls
38
+ WHERE outcome = 'failure'
39
+ AND called_at >= NOW() - INTERVAL '7 days'
40
+ GROUP BY tool_name
41
+ ORDER BY failures DESC;
42
+
43
+ -- "Find past episodes similar to this context that succeeded"
44
+ SELECT *, array_cosine_distance(context_embedding, ?) AS dist
45
+ FROM episodes
46
+ WHERE status = 'success'
47
+ ORDER BY dist ASC
48
+ LIMIT 5;
49
+ ```
50
+
51
+ ## The Problem
52
+
53
+ | Query type | Example | Vector search? |
54
+ |------------|---------|----------------|
55
+ | Similarity | "Seen a similar error before?" | Yes |
56
+ | Aggregation | "How many tool failures this week?" | No |
57
+ | Time-series | "Do failures spike in the afternoon?" | No |
58
+ | Causal trace | "What tool ran right before failures?" | No |
59
+ | Comparison | "Worse than last week?" | No |
60
+ | Absence | "Tools that never succeeded?" | No |
61
+ | Temporal | "What was the user's timezone last Tuesday?" | No |
62
+
63
+ EpisodicDB answers all of them. Vector similarity is just another SQL operator.
64
+
65
+ ## Architecture
66
+
67
+ ```
68
+ EpisodicDB
69
+ ├── WriterMixin record_episode / record_tool_call / record_decision / record_fact
70
+ ├── AnalyticsMixin 6 analytics methods + vector similarity
71
+ └── TemporalMixin facts_as_of / fact_history
72
+
73
+ Engine: DuckDB (OLAP) + VSS extension (HNSW vector index)
74
+ Schema: episodes + tool_calls + decisions + facts
75
+ ```
76
+
77
+ ## Install
78
+
79
+ ```bash
80
+ pip install episodicdb
81
+ ```
82
+
83
+ ## Quick Start
84
+
85
+ ### Python SDK
86
+
87
+ ```python
88
+ from episodicdb import EpisodicDB
89
+
90
+ with EpisodicDB(agent_id="my-agent") as db:
91
+ # Record what happened
92
+ ep_id = db.record_episode(
93
+ status="failure",
94
+ task_type="file_edit",
95
+ context={"file": "auth.py", "error": "permission denied"},
96
+ )
97
+ db.record_tool_call(ep_id, "Edit", "failure",
98
+ duration_ms=120, error_message="permission denied")
99
+ db.record_tool_call(ep_id, "Bash", "success", duration_ms=50)
100
+
101
+ # Record temporal facts (auto-supersedes previous values)
102
+ db.record_fact("user_timezone", "Asia/Seoul", episode_id=ep_id)
103
+ db.record_fact("user_timezone", "America/New_York") # closes previous
104
+
105
+ # Analyze patterns
106
+ print(db.top_failing_tools(days=7))
107
+ # [{"tool_name": "Edit", "failures": 5}, ...]
108
+
109
+ print(db.before_failure_sequence("Edit"))
110
+ # [{"prev_tool": "Bash", "count": 4}, ...]
111
+
112
+ print(db.compare_periods("failure_rate", days=7))
113
+ # {"period_a": 0.32, "period_b": 0.18, "delta": 0.14}
114
+
115
+ # Time-travel query
116
+ from datetime import datetime
117
+ print(db.facts_as_of(datetime(2025, 3, 15)))
118
+ # [{"key": "user_timezone", "value": "Asia/Seoul", ...}]
119
+ ```
120
+
121
+ ### MCP Server (Claude, OpenAI Agents SDK)
122
+
123
+ EpisodicDB ships an MCP server with 12 tools over stdio.
124
+
125
+ ```bash
126
+ episodicdb-mcp --agent-id my-agent
127
+ episodicdb-mcp --agent-id my-agent --db ./memory.db
128
+ ```
129
+
130
+ **Claude Desktop** (`claude_desktop_config.json`):
131
+
132
+ ```json
133
+ {
134
+ "mcpServers": {
135
+ "episodicdb": {
136
+ "command": "episodicdb-mcp",
137
+ "args": ["--agent-id", "my-agent"]
138
+ }
139
+ }
140
+ }
141
+ ```
142
+
143
+ **Claude Code** (`.mcp.json`):
144
+
145
+ ```json
146
+ {
147
+ "mcpServers": {
148
+ "episodicdb": {
149
+ "command": "episodicdb-mcp",
150
+ "args": ["--agent-id", "my-agent"]
151
+ }
152
+ }
153
+ }
154
+ ```
155
+
156
+ **OpenAI Agents SDK**:
157
+
158
+ ```python
159
+ from agents import Agent
160
+ from agents.mcp import MCPServerStdio
161
+
162
+ agent = Agent(
163
+ name="my-agent",
164
+ instructions="You have access to episodic memory.",
165
+ mcp_servers=[MCPServerStdio(
166
+ command="episodicdb-mcp",
167
+ args=["--agent-id", "my-agent"],
168
+ )],
169
+ )
170
+ ```
171
+
172
+ ## API
173
+
174
+ ### Writer
175
+
176
+ ```python
177
+ db.record_episode(status, task_type=None, context=None,
178
+ embedding=None, tags=None,
179
+ started_at=None, ended_at=None) -> str # episode UUID
180
+
181
+ db.record_tool_call(episode_id, tool_name, outcome,
182
+ parameters=None, result=None,
183
+ duration_ms=None, error_message=None) -> str
184
+
185
+ db.record_decision(episode_id, rationale,
186
+ decision_type=None, alternatives=None,
187
+ outcome=None) -> str
188
+
189
+ db.record_fact(key, value, episode_id=None,
190
+ valid_from=None) -> str # auto-supersedes previous
191
+ ```
192
+
193
+ ### Analytics
194
+
195
+ | Method | Description |
196
+ |--------|-------------|
197
+ | `top_failing_tools(days, limit)` | Most-failed tools in the last N days |
198
+ | `hourly_failure_rate(days)` | Failure count by hour of day |
199
+ | `before_failure_sequence(tool_name, lookback)` | Tools that precede failures |
200
+ | `compare_periods(metric, days)` | Period-over-period comparison |
201
+ | `never_succeeded_tools()` | Tools with zero successful calls |
202
+ | `similar_episodes(embedding, status, limit)` | Vector similarity + SQL filter |
203
+
204
+ ### Temporal Facts
205
+
206
+ Facts are key-value pairs with automatic temporal validity. Recording a new value for the same key closes the previous one.
207
+
208
+ ```python
209
+ db.record_fact("preferred_model", "gpt-4o")
210
+ # later...
211
+ db.record_fact("preferred_model", "claude-sonnet") # supersedes gpt-4o
212
+
213
+ db.facts_as_of(some_datetime) # point-in-time snapshot
214
+ db.fact_history("preferred_model") # full change log
215
+ ```
216
+
217
+ ### Persistence
218
+
219
+ ```python
220
+ EpisodicDB(agent_id="my-agent") # ~/.episodicdb/my-agent.db
221
+ EpisodicDB(agent_id="my-agent", path="./x.db") # explicit path
222
+ EpisodicDB(agent_id="my-agent", path=":memory:") # in-memory (testing)
223
+ ```
224
+
225
+ ### Embeddings
226
+
227
+ EpisodicDB does not generate embeddings. Pass them in:
228
+
229
+ ```python
230
+ import openai
231
+
232
+ response = openai.embeddings.create(
233
+ model="text-embedding-3-small",
234
+ input="what the agent was doing"
235
+ )
236
+ embedding = response.data[0].embedding # 1536 dims
237
+
238
+ db.record_episode(status="success", embedding=embedding)
239
+ db.similar_episodes(embedding, status="failure", limit=5)
240
+ ```
241
+
242
+ ## Development
243
+
244
+ ```bash
245
+ git clone https://github.com/KsPsD/EpisodicDB
246
+ cd EpisodicDB
247
+ pip install -e ".[dev]"
248
+ pytest
249
+ ```
250
+
251
+ ## Stack
252
+
253
+ - [DuckDB](https://duckdb.org/) — embedded OLAP engine
254
+ - [DuckDB VSS](https://duckdb.org/docs/extensions/vss) — HNSW vector index
255
+ - [MCP](https://modelcontextprotocol.io/) — Model Context Protocol server
256
+ - Python 3.11+
257
+
258
+ ## License
259
+
260
+ MIT
@@ -0,0 +1,14 @@
1
+ episodicdb/__init__.py,sha256=wV1B5rUdxe4yYOCy7alw1hYTkYFwUULYiXZ80W5whXc,160
2
+ episodicdb/analytics.py,sha256=Pfu38f84MFBCso_l1gfQxtVCmQ8mUI4JqI_X0pFYMqc,8152
3
+ episodicdb/db.py,sha256=KjWJzlKRnKgRQK3sRSdX10LHc0KLLJBh3XY2SnRsRME,2370
4
+ episodicdb/temporal.py,sha256=0hTpCDlEuN5vGeGuDTrQcN4ZefZCGE6oNqS3RJmNJ5A,1859
5
+ episodicdb/writer.py,sha256=k2o9EZRD_AWxWT3seiTVXaByRAKfMyu45midJWSjeO4,4898
6
+ episodicdb/mcp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ episodicdb/mcp/__main__.py,sha256=y5LecI5Xc-pWlyb0Ow52Y8FT3Vs4bNVTIEFTcae-XSc,503
8
+ episodicdb/mcp/server.py,sha256=G-lDOlgxJ9B6B-OuLAJ4fkUi4Etc9mBuVkXje_v9N_E,6524
9
+ episodicdb/schema/__init__.py,sha256=rtWMgekSDKUnqkOaPLxNJU5E6zkYFSl1xFplLueroKc,2141
10
+ episodicdb-0.1.0.dist-info/METADATA,sha256=Crq3moPt2U5fTxqvFUmweR-PgWlvX6kvrt_-OQF1Dkc,7367
11
+ episodicdb-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
12
+ episodicdb-0.1.0.dist-info/entry_points.txt,sha256=dQMBwLmSqV9YUFLoNIEV8e4SyXFyWDF_oVMfCWQnYaM,64
13
+ episodicdb-0.1.0.dist-info/licenses/LICENSE,sha256=_9RoRqFzqcEFikLzPZ0ATwV4787xKOGKrLzJTHUhCH4,1062
14
+ episodicdb-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ episodicdb-mcp = episodicdb.mcp.__main__:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 KsPsD
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.