echostate 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- echostate/__init__.py +27 -0
- echostate/database.py +413 -0
- echostate/embeddings.py +143 -0
- echostate/event.py +43 -0
- echostate/exceptions.py +31 -0
- echostate/path_utils.py +103 -0
- echostate/state.py +605 -0
- echostate-0.1.0.dist-info/METADATA +110 -0
- echostate-0.1.0.dist-info/RECORD +11 -0
- echostate-0.1.0.dist-info/WHEEL +5 -0
- echostate-0.1.0.dist-info/top_level.txt +1 -0
echostate/__init__.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""
|
|
2
|
+
EchoState - Semantic, event-sourced state for intelligent systems
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from echostate.state import EchoState
|
|
6
|
+
from echostate.event import Event
|
|
7
|
+
from echostate.embeddings import SearchHit
|
|
8
|
+
from echostate.exceptions import (
|
|
9
|
+
EchoStateError,
|
|
10
|
+
EchoStateLockedError,
|
|
11
|
+
EchoStateSerializationError,
|
|
12
|
+
EchoStatePathError,
|
|
13
|
+
EchoStateEmbeddingError,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
__version__ = "0.1.0"
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"EchoState",
|
|
20
|
+
"Event",
|
|
21
|
+
"SearchHit",
|
|
22
|
+
"EchoStateError",
|
|
23
|
+
"EchoStateLockedError",
|
|
24
|
+
"EchoStateSerializationError",
|
|
25
|
+
"EchoStatePathError",
|
|
26
|
+
"EchoStateEmbeddingError",
|
|
27
|
+
]
|
echostate/database.py
ADDED
|
@@ -0,0 +1,413 @@
|
|
|
1
|
+
"""SQLite database management for EchoState."""
|
|
2
|
+
|
|
3
|
+
import sqlite3
|
|
4
|
+
import json
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Optional, List, Dict, Any
|
|
7
|
+
from contextlib import contextmanager
|
|
8
|
+
|
|
9
|
+
from echostate.exceptions import EchoStateLockedError
|
|
10
|
+
from echostate.event import Event
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Database:
|
|
14
|
+
"""Manages SQLite database for EchoState."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, db_path: str):
|
|
17
|
+
"""
|
|
18
|
+
Initialize database connection.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
db_path: SQLite database path (e.g., "sqlite:///state.db" or "state.db")
|
|
22
|
+
"""
|
|
23
|
+
# Parse sqlite:/// prefix if present
|
|
24
|
+
if db_path.startswith("sqlite:///"):
|
|
25
|
+
db_path = db_path[10:] # Remove "sqlite:///"
|
|
26
|
+
elif db_path.startswith("sqlite://"):
|
|
27
|
+
db_path = db_path[9:] # Remove "sqlite://"
|
|
28
|
+
|
|
29
|
+
self.db_path = Path(db_path)
|
|
30
|
+
self._conn: Optional[sqlite3.Connection] = None
|
|
31
|
+
self._ensure_schema()
|
|
32
|
+
|
|
33
|
+
def _ensure_schema(self):
|
|
34
|
+
"""Create database schema if it doesn't exist."""
|
|
35
|
+
conn = self._get_connection()
|
|
36
|
+
cursor = conn.cursor()
|
|
37
|
+
|
|
38
|
+
# Events table
|
|
39
|
+
cursor.execute("""
|
|
40
|
+
CREATE TABLE IF NOT EXISTS events (
|
|
41
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
42
|
+
timestamp INTEGER NOT NULL,
|
|
43
|
+
path TEXT NOT NULL,
|
|
44
|
+
operation TEXT NOT NULL,
|
|
45
|
+
value TEXT,
|
|
46
|
+
event_version INTEGER NOT NULL DEFAULT 1,
|
|
47
|
+
metadata TEXT
|
|
48
|
+
)
|
|
49
|
+
""")
|
|
50
|
+
|
|
51
|
+
# Snapshots table
|
|
52
|
+
cursor.execute("""
|
|
53
|
+
CREATE TABLE IF NOT EXISTS snapshots (
|
|
54
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
55
|
+
last_event_id INTEGER NOT NULL,
|
|
56
|
+
snapshot TEXT NOT NULL,
|
|
57
|
+
timestamp INTEGER NOT NULL
|
|
58
|
+
)
|
|
59
|
+
""")
|
|
60
|
+
|
|
61
|
+
# Metadata table (for EchoState instance metadata)
|
|
62
|
+
cursor.execute("""
|
|
63
|
+
CREATE TABLE IF NOT EXISTS metadata (
|
|
64
|
+
key TEXT PRIMARY KEY,
|
|
65
|
+
value TEXT NOT NULL
|
|
66
|
+
)
|
|
67
|
+
""")
|
|
68
|
+
|
|
69
|
+
# Embeddings table (for semantic index)
|
|
70
|
+
cursor.execute("""
|
|
71
|
+
CREATE TABLE IF NOT EXISTS embeddings (
|
|
72
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
73
|
+
event_id INTEGER NOT NULL,
|
|
74
|
+
path TEXT NOT NULL,
|
|
75
|
+
text TEXT NOT NULL,
|
|
76
|
+
embedding BLOB NOT NULL,
|
|
77
|
+
model_id TEXT NOT NULL,
|
|
78
|
+
metadata TEXT
|
|
79
|
+
)
|
|
80
|
+
""")
|
|
81
|
+
|
|
82
|
+
# Indexes
|
|
83
|
+
cursor.execute("""
|
|
84
|
+
CREATE INDEX IF NOT EXISTS idx_events_timestamp ON events(timestamp)
|
|
85
|
+
""")
|
|
86
|
+
cursor.execute("""
|
|
87
|
+
CREATE INDEX IF NOT EXISTS idx_events_path ON events(path)
|
|
88
|
+
""")
|
|
89
|
+
cursor.execute("""
|
|
90
|
+
CREATE INDEX IF NOT EXISTS idx_embeddings_event_id ON embeddings(event_id)
|
|
91
|
+
""")
|
|
92
|
+
cursor.execute("""
|
|
93
|
+
CREATE INDEX IF NOT EXISTS idx_embeddings_path ON embeddings(path)
|
|
94
|
+
""")
|
|
95
|
+
|
|
96
|
+
conn.commit()
|
|
97
|
+
|
|
98
|
+
def _get_connection(self) -> sqlite3.Connection:
|
|
99
|
+
"""Get or create database connection."""
|
|
100
|
+
if self._conn is None:
|
|
101
|
+
try:
|
|
102
|
+
self._conn = sqlite3.connect(
|
|
103
|
+
str(self.db_path),
|
|
104
|
+
check_same_thread=False,
|
|
105
|
+
timeout=5.0,
|
|
106
|
+
)
|
|
107
|
+
self._conn.row_factory = sqlite3.Row
|
|
108
|
+
except sqlite3.OperationalError as e:
|
|
109
|
+
if "locked" in str(e).lower():
|
|
110
|
+
raise EchoStateLockedError(
|
|
111
|
+
f"Database is locked: {self.db_path}"
|
|
112
|
+
) from e
|
|
113
|
+
raise
|
|
114
|
+
return self._conn
|
|
115
|
+
|
|
116
|
+
@contextmanager
|
|
117
|
+
def transaction(self):
|
|
118
|
+
"""Context manager for database transactions."""
|
|
119
|
+
conn = self._get_connection()
|
|
120
|
+
try:
|
|
121
|
+
yield conn
|
|
122
|
+
conn.commit()
|
|
123
|
+
except Exception:
|
|
124
|
+
conn.rollback()
|
|
125
|
+
raise
|
|
126
|
+
|
|
127
|
+
def append_event(self, event: Event) -> int:
|
|
128
|
+
"""
|
|
129
|
+
Append an event to the event log.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
event: Event to append
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
The event ID assigned by the database
|
|
136
|
+
"""
|
|
137
|
+
with self.transaction() as conn:
|
|
138
|
+
cursor = conn.cursor()
|
|
139
|
+
event_dict = event.to_dict()
|
|
140
|
+
cursor.execute("""
|
|
141
|
+
INSERT INTO events (timestamp, path, operation, value, event_version, metadata)
|
|
142
|
+
VALUES (:timestamp, :path, :operation, :value, :event_version, :metadata)
|
|
143
|
+
""", event_dict)
|
|
144
|
+
return cursor.lastrowid
|
|
145
|
+
|
|
146
|
+
def get_events(
|
|
147
|
+
self,
|
|
148
|
+
path: Optional[str] = None,
|
|
149
|
+
since_event_id: Optional[int] = None,
|
|
150
|
+
limit: Optional[int] = None,
|
|
151
|
+
order_desc: bool = False,
|
|
152
|
+
) -> List[Event]:
|
|
153
|
+
"""
|
|
154
|
+
Retrieve events from the event log.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
path: Optional path filter (exact match)
|
|
158
|
+
since_event_id: Only return events with id > since_event_id
|
|
159
|
+
limit: Maximum number of events to return
|
|
160
|
+
order_desc: If True, order by id descending (most recent first)
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
List of events, ordered by id (ascending by default, descending if order_desc=True)
|
|
164
|
+
"""
|
|
165
|
+
conn = self._get_connection()
|
|
166
|
+
cursor = conn.cursor()
|
|
167
|
+
|
|
168
|
+
query = "SELECT * FROM events WHERE 1=1"
|
|
169
|
+
params = []
|
|
170
|
+
|
|
171
|
+
if path is not None:
|
|
172
|
+
query += " AND path = ?"
|
|
173
|
+
params.append(path)
|
|
174
|
+
|
|
175
|
+
if since_event_id is not None:
|
|
176
|
+
query += " AND id > ?"
|
|
177
|
+
params.append(since_event_id)
|
|
178
|
+
|
|
179
|
+
query += f" ORDER BY id {'DESC' if order_desc else 'ASC'}"
|
|
180
|
+
|
|
181
|
+
if limit is not None:
|
|
182
|
+
query += " LIMIT ?"
|
|
183
|
+
params.append(limit)
|
|
184
|
+
|
|
185
|
+
cursor.execute(query, params)
|
|
186
|
+
rows = cursor.fetchall()
|
|
187
|
+
return [Event.from_dict(dict(row)) for row in rows]
|
|
188
|
+
|
|
189
|
+
def get_events_with_metadata_filter(
|
|
190
|
+
self,
|
|
191
|
+
path: Optional[str] = None,
|
|
192
|
+
metadata_filters: Optional[Dict[str, Any]] = None,
|
|
193
|
+
limit: Optional[int] = None,
|
|
194
|
+
) -> List[Event]:
|
|
195
|
+
"""
|
|
196
|
+
Retrieve events with metadata filtering (Python-side filtering in v0.1).
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
path: Optional path filter (exact match)
|
|
200
|
+
metadata_filters: Dict of metadata fields to match
|
|
201
|
+
limit: Maximum number of events to return (most recent first)
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
List of events matching filters, ordered by id descending
|
|
205
|
+
"""
|
|
206
|
+
# Get all events matching path (ordered descending for most recent first)
|
|
207
|
+
events = self.get_events(path=path, limit=None, order_desc=True)
|
|
208
|
+
|
|
209
|
+
# Apply metadata filters in Python (v0.1 approach)
|
|
210
|
+
if metadata_filters:
|
|
211
|
+
filtered = []
|
|
212
|
+
for event in events:
|
|
213
|
+
if event.metadata:
|
|
214
|
+
match = True
|
|
215
|
+
for key, value in metadata_filters.items():
|
|
216
|
+
if event.metadata.get(key) != value:
|
|
217
|
+
match = False
|
|
218
|
+
break
|
|
219
|
+
if match:
|
|
220
|
+
filtered.append(event)
|
|
221
|
+
else:
|
|
222
|
+
# Event has no metadata, skip if filters are specified
|
|
223
|
+
continue
|
|
224
|
+
events = filtered
|
|
225
|
+
|
|
226
|
+
# Apply limit after filtering
|
|
227
|
+
if limit is not None:
|
|
228
|
+
events = events[:limit]
|
|
229
|
+
|
|
230
|
+
return events
|
|
231
|
+
|
|
232
|
+
def get_snapshot_at_event_id(self, event_id: int) -> Optional[Dict[str, Any]]:
|
|
233
|
+
"""
|
|
234
|
+
Get the latest snapshot that includes events up to the given event_id.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
event_id: Target event ID
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
Snapshot dict with keys: id, last_event_id, snapshot, timestamp
|
|
241
|
+
or None if no suitable snapshot exists
|
|
242
|
+
"""
|
|
243
|
+
conn = self._get_connection()
|
|
244
|
+
cursor = conn.cursor()
|
|
245
|
+
cursor.execute("""
|
|
246
|
+
SELECT * FROM snapshots
|
|
247
|
+
WHERE last_event_id <= ?
|
|
248
|
+
ORDER BY last_event_id DESC
|
|
249
|
+
LIMIT 1
|
|
250
|
+
""", (event_id,))
|
|
251
|
+
row = cursor.fetchone()
|
|
252
|
+
if row is None:
|
|
253
|
+
return None
|
|
254
|
+
result = dict(row)
|
|
255
|
+
result["snapshot"] = json.loads(result["snapshot"])
|
|
256
|
+
return result
|
|
257
|
+
|
|
258
|
+
def get_events_up_to(self, event_id: int, since_event_id: Optional[int] = None) -> List[Event]:
|
|
259
|
+
"""
|
|
260
|
+
Get events up to and including a specific event_id.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
event_id: Maximum event ID to include
|
|
264
|
+
since_event_id: Only return events with id > since_event_id
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
List of events ordered by id ascending
|
|
268
|
+
"""
|
|
269
|
+
conn = self._get_connection()
|
|
270
|
+
cursor = conn.cursor()
|
|
271
|
+
|
|
272
|
+
query = "SELECT * FROM events WHERE id <= ?"
|
|
273
|
+
params = [event_id]
|
|
274
|
+
|
|
275
|
+
if since_event_id is not None:
|
|
276
|
+
query += " AND id > ?"
|
|
277
|
+
params.append(since_event_id)
|
|
278
|
+
|
|
279
|
+
query += " ORDER BY id ASC"
|
|
280
|
+
|
|
281
|
+
cursor.execute(query, params)
|
|
282
|
+
rows = cursor.fetchall()
|
|
283
|
+
return [Event.from_dict(dict(row)) for row in rows]
|
|
284
|
+
|
|
285
|
+
def store_embedding(
|
|
286
|
+
self,
|
|
287
|
+
event_id: int,
|
|
288
|
+
path: str,
|
|
289
|
+
text: str,
|
|
290
|
+
embedding: bytes,
|
|
291
|
+
model_id: str,
|
|
292
|
+
metadata: Optional[str] = None,
|
|
293
|
+
) -> int:
|
|
294
|
+
"""
|
|
295
|
+
Store an embedding for an event.
|
|
296
|
+
|
|
297
|
+
Args:
|
|
298
|
+
event_id: Associated event ID
|
|
299
|
+
path: Event path
|
|
300
|
+
text: Derived record text
|
|
301
|
+
embedding: Embedding as bytes
|
|
302
|
+
model_id: Model identifier
|
|
303
|
+
metadata: Optional metadata JSON string
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
Embedding ID
|
|
307
|
+
"""
|
|
308
|
+
with self.transaction() as conn:
|
|
309
|
+
cursor = conn.cursor()
|
|
310
|
+
cursor.execute("""
|
|
311
|
+
INSERT INTO embeddings (event_id, path, text, embedding, model_id, metadata)
|
|
312
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
313
|
+
""", (event_id, path, text, embedding, model_id, metadata))
|
|
314
|
+
return cursor.lastrowid
|
|
315
|
+
|
|
316
|
+
def get_all_embeddings(self, model_id: Optional[str] = None) -> List[Dict[str, Any]]:
|
|
317
|
+
"""
|
|
318
|
+
Get all embeddings, optionally filtered by model_id.
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
model_id: Optional model ID filter
|
|
322
|
+
|
|
323
|
+
Returns:
|
|
324
|
+
List of embedding records as dicts
|
|
325
|
+
"""
|
|
326
|
+
conn = self._get_connection()
|
|
327
|
+
cursor = conn.cursor()
|
|
328
|
+
|
|
329
|
+
if model_id:
|
|
330
|
+
cursor.execute("""
|
|
331
|
+
SELECT * FROM embeddings
|
|
332
|
+
WHERE model_id = ?
|
|
333
|
+
ORDER BY id ASC
|
|
334
|
+
""", (model_id,))
|
|
335
|
+
else:
|
|
336
|
+
cursor.execute("""
|
|
337
|
+
SELECT * FROM embeddings
|
|
338
|
+
ORDER BY id ASC
|
|
339
|
+
""")
|
|
340
|
+
|
|
341
|
+
rows = cursor.fetchall()
|
|
342
|
+
return [dict(row) for row in rows]
|
|
343
|
+
|
|
344
|
+
def truncate_embeddings(self, model_id: Optional[str] = None):
|
|
345
|
+
"""
|
|
346
|
+
Delete all embeddings, optionally filtered by model_id.
|
|
347
|
+
|
|
348
|
+
Args:
|
|
349
|
+
model_id: If provided, only delete embeddings for this model
|
|
350
|
+
"""
|
|
351
|
+
with self.transaction() as conn:
|
|
352
|
+
cursor = conn.cursor()
|
|
353
|
+
if model_id:
|
|
354
|
+
cursor.execute("DELETE FROM embeddings WHERE model_id = ?", (model_id,))
|
|
355
|
+
else:
|
|
356
|
+
cursor.execute("DELETE FROM embeddings")
|
|
357
|
+
|
|
358
|
+
def get_latest_snapshot(self) -> Optional[Dict[str, Any]]:
|
|
359
|
+
"""
|
|
360
|
+
Get the latest snapshot.
|
|
361
|
+
|
|
362
|
+
Returns:
|
|
363
|
+
Dict with keys: id, last_event_id, snapshot (parsed JSON), timestamp
|
|
364
|
+
or None if no snapshot exists
|
|
365
|
+
"""
|
|
366
|
+
conn = self._get_connection()
|
|
367
|
+
cursor = conn.cursor()
|
|
368
|
+
cursor.execute("""
|
|
369
|
+
SELECT * FROM snapshots
|
|
370
|
+
ORDER BY last_event_id DESC
|
|
371
|
+
LIMIT 1
|
|
372
|
+
""")
|
|
373
|
+
row = cursor.fetchone()
|
|
374
|
+
if row is None:
|
|
375
|
+
return None
|
|
376
|
+
result = dict(row)
|
|
377
|
+
result["snapshot"] = json.loads(result["snapshot"])
|
|
378
|
+
return result
|
|
379
|
+
|
|
380
|
+
def create_snapshot(self, last_event_id: int, snapshot: Dict[str, Any]) -> int:
|
|
381
|
+
"""
|
|
382
|
+
Create a new snapshot.
|
|
383
|
+
|
|
384
|
+
Args:
|
|
385
|
+
last_event_id: The last event ID included in this snapshot
|
|
386
|
+
snapshot: The state snapshot as a dictionary
|
|
387
|
+
|
|
388
|
+
Returns:
|
|
389
|
+
The snapshot ID
|
|
390
|
+
"""
|
|
391
|
+
with self.transaction() as conn:
|
|
392
|
+
cursor = conn.cursor()
|
|
393
|
+
import time
|
|
394
|
+
timestamp = int(time.time() * 1000)
|
|
395
|
+
cursor.execute("""
|
|
396
|
+
INSERT INTO snapshots (last_event_id, snapshot, timestamp)
|
|
397
|
+
VALUES (?, ?, ?)
|
|
398
|
+
""", (last_event_id, json.dumps(snapshot), timestamp))
|
|
399
|
+
return cursor.lastrowid
|
|
400
|
+
|
|
401
|
+
def close(self):
|
|
402
|
+
"""Close database connection."""
|
|
403
|
+
if self._conn is not None:
|
|
404
|
+
self._conn.close()
|
|
405
|
+
self._conn = None
|
|
406
|
+
|
|
407
|
+
def __enter__(self):
|
|
408
|
+
"""Context manager entry."""
|
|
409
|
+
return self
|
|
410
|
+
|
|
411
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
412
|
+
"""Context manager exit."""
|
|
413
|
+
self.close()
|
echostate/embeddings.py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""Embedding generation and storage for semantic search."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import numpy as np
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import List, Optional, Dict, Any, Tuple
|
|
7
|
+
from sentence_transformers import SentenceTransformer
|
|
8
|
+
|
|
9
|
+
from echostate.event import Event
|
|
10
|
+
from echostate.exceptions import EchoStateEmbeddingError
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class EmbeddingIndex:
|
|
14
|
+
"""Manages embeddings for semantic search."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, model_id: str, model_name: str):
|
|
17
|
+
"""
|
|
18
|
+
Initialize embedding index.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
model_id: Model identifier (e.g., "st:all-MiniLM-L6-v2")
|
|
22
|
+
model_name: Model name for sentence-transformers
|
|
23
|
+
"""
|
|
24
|
+
self.model_id = model_id
|
|
25
|
+
self.model_name = model_name
|
|
26
|
+
self._model: Optional[SentenceTransformer] = None
|
|
27
|
+
|
|
28
|
+
def _get_model(self) -> SentenceTransformer:
|
|
29
|
+
"""Lazy-load the embedding model."""
|
|
30
|
+
if self._model is None:
|
|
31
|
+
try:
|
|
32
|
+
self._model = SentenceTransformer(self.model_name)
|
|
33
|
+
except Exception as e:
|
|
34
|
+
raise EchoStateEmbeddingError(
|
|
35
|
+
f"Failed to load embedding model '{self.model_name}': {e}"
|
|
36
|
+
) from e
|
|
37
|
+
return self._model
|
|
38
|
+
|
|
39
|
+
def create_derived_record(self, event: Event) -> Optional[str]:
|
|
40
|
+
"""
|
|
41
|
+
Create a derived record (text representation) from an event.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
event: Event to convert
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Text representation suitable for embedding, or None if event shouldn't be indexed
|
|
48
|
+
"""
|
|
49
|
+
# Deletes don't produce searchable content
|
|
50
|
+
if event.operation == "delete":
|
|
51
|
+
return None
|
|
52
|
+
|
|
53
|
+
value = event.value
|
|
54
|
+
|
|
55
|
+
# Convert value to text based on type
|
|
56
|
+
if isinstance(value, str):
|
|
57
|
+
text = value
|
|
58
|
+
elif isinstance(value, (dict, list)):
|
|
59
|
+
# Serialize to compact JSON
|
|
60
|
+
text = json.dumps(value, separators=(",", ":"), ensure_ascii=False)
|
|
61
|
+
elif value is None:
|
|
62
|
+
return None
|
|
63
|
+
else:
|
|
64
|
+
# Primitives
|
|
65
|
+
text = str(value)
|
|
66
|
+
|
|
67
|
+
# For append operations, add lightweight context
|
|
68
|
+
if event.operation == "append":
|
|
69
|
+
# Include path context
|
|
70
|
+
text = f"{event.path}: {text}"
|
|
71
|
+
|
|
72
|
+
return text
|
|
73
|
+
|
|
74
|
+
def generate_embedding(self, text: str) -> bytes:
|
|
75
|
+
"""
|
|
76
|
+
Generate embedding for a text string.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
text: Text to embed
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
Embedding as bytes (numpy array serialized)
|
|
83
|
+
"""
|
|
84
|
+
model = self._get_model()
|
|
85
|
+
try:
|
|
86
|
+
embedding = model.encode(text, convert_to_numpy=True)
|
|
87
|
+
# Convert to bytes for storage
|
|
88
|
+
return embedding.tobytes()
|
|
89
|
+
except Exception as e:
|
|
90
|
+
raise EchoStateEmbeddingError(f"Failed to generate embedding: {e}") from e
|
|
91
|
+
|
|
92
|
+
def compute_similarity(self, query_embedding: bytes, target_embedding: bytes) -> float:
|
|
93
|
+
"""
|
|
94
|
+
Compute cosine similarity between two embeddings.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
query_embedding: Query embedding as bytes
|
|
98
|
+
target_embedding: Target embedding as bytes
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
Cosine similarity score (0-1, higher is more similar)
|
|
102
|
+
"""
|
|
103
|
+
query_vec = np.frombuffer(query_embedding, dtype=np.float32)
|
|
104
|
+
target_vec = np.frombuffer(target_embedding, dtype=np.float32)
|
|
105
|
+
|
|
106
|
+
# Cosine similarity
|
|
107
|
+
dot_product = np.dot(query_vec, target_vec)
|
|
108
|
+
norm_query = np.linalg.norm(query_vec)
|
|
109
|
+
norm_target = np.linalg.norm(target_vec)
|
|
110
|
+
|
|
111
|
+
if norm_query == 0 or norm_target == 0:
|
|
112
|
+
return 0.0
|
|
113
|
+
|
|
114
|
+
similarity = dot_product / (norm_query * norm_target)
|
|
115
|
+
# Normalize to 0-1 range (cosine similarity is -1 to 1)
|
|
116
|
+
normalized = (similarity + 1) / 2
|
|
117
|
+
# Convert numpy scalar to Python float
|
|
118
|
+
return float(normalized)
|
|
119
|
+
|
|
120
|
+
def embed_query(self, query: str) -> bytes:
|
|
121
|
+
"""
|
|
122
|
+
Generate embedding for a search query.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
query: Search query string
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
Embedding as bytes
|
|
129
|
+
"""
|
|
130
|
+
return self.generate_embedding(query)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@dataclass
|
|
134
|
+
class SearchHit:
|
|
135
|
+
"""Represents a search result hit."""
|
|
136
|
+
|
|
137
|
+
event_id: int
|
|
138
|
+
score: float
|
|
139
|
+
path: str
|
|
140
|
+
operation: str
|
|
141
|
+
text: str
|
|
142
|
+
metadata: Optional[Dict[str, Any]] = None
|
|
143
|
+
value: Optional[Any] = None # Best-effort, may be None
|
echostate/event.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""Event model for EchoState."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import Any, Dict, Optional
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class Event:
|
|
11
|
+
"""Represents a single state mutation event."""
|
|
12
|
+
|
|
13
|
+
id: Optional[int] = None # Set by database on insert
|
|
14
|
+
timestamp: int = field(default_factory=lambda: int(datetime.now().timestamp() * 1000))
|
|
15
|
+
path: str = ""
|
|
16
|
+
operation: str = "" # set | append | delete | update
|
|
17
|
+
value: Any = None
|
|
18
|
+
event_version: int = 1
|
|
19
|
+
metadata: Optional[Dict[str, Any]] = None
|
|
20
|
+
|
|
21
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
22
|
+
"""Convert event to dictionary for database storage."""
|
|
23
|
+
return {
|
|
24
|
+
"timestamp": self.timestamp,
|
|
25
|
+
"path": self.path,
|
|
26
|
+
"operation": self.operation,
|
|
27
|
+
"value": json.dumps(self.value) if self.value is not None else None,
|
|
28
|
+
"event_version": self.event_version,
|
|
29
|
+
"metadata": json.dumps(self.metadata) if self.metadata else None,
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
@classmethod
|
|
33
|
+
def from_dict(cls, data: Dict[str, Any]) -> "Event":
|
|
34
|
+
"""Create event from database row."""
|
|
35
|
+
return cls(
|
|
36
|
+
id=data.get("id"),
|
|
37
|
+
timestamp=data["timestamp"],
|
|
38
|
+
path=data["path"],
|
|
39
|
+
operation=data["operation"],
|
|
40
|
+
value=json.loads(data["value"]) if data["value"] else None,
|
|
41
|
+
event_version=data.get("event_version", 1),
|
|
42
|
+
metadata=json.loads(data["metadata"]) if data.get("metadata") else None,
|
|
43
|
+
)
|
echostate/exceptions.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Custom exceptions for EchoState."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class EchoStateError(Exception):
|
|
5
|
+
"""Base exception for all EchoState errors."""
|
|
6
|
+
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class EchoStateLockedError(EchoStateError):
|
|
11
|
+
"""Raised when SQLite database is locked by another process."""
|
|
12
|
+
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class EchoStateSerializationError(EchoStateError):
|
|
17
|
+
"""Raised when a value cannot be JSON-serialized."""
|
|
18
|
+
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class EchoStatePathError(EchoStateError):
|
|
23
|
+
"""Raised when a path operation fails (e.g., setting a child under a non-dict)."""
|
|
24
|
+
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class EchoStateEmbeddingError(EchoStateError):
|
|
29
|
+
"""Raised when embedding generation fails (only for synchronous indexing)."""
|
|
30
|
+
|
|
31
|
+
pass
|