echostate 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
echostate/__init__.py ADDED
@@ -0,0 +1,27 @@
1
+ """
2
+ EchoState - Semantic, event-sourced state for intelligent systems
3
+ """
4
+
5
+ from echostate.state import EchoState
6
+ from echostate.event import Event
7
+ from echostate.embeddings import SearchHit
8
+ from echostate.exceptions import (
9
+ EchoStateError,
10
+ EchoStateLockedError,
11
+ EchoStateSerializationError,
12
+ EchoStatePathError,
13
+ EchoStateEmbeddingError,
14
+ )
15
+
16
+ __version__ = "0.1.0"
17
+
18
+ __all__ = [
19
+ "EchoState",
20
+ "Event",
21
+ "SearchHit",
22
+ "EchoStateError",
23
+ "EchoStateLockedError",
24
+ "EchoStateSerializationError",
25
+ "EchoStatePathError",
26
+ "EchoStateEmbeddingError",
27
+ ]
echostate/database.py ADDED
@@ -0,0 +1,413 @@
1
+ """SQLite database management for EchoState."""
2
+
3
+ import sqlite3
4
+ import json
5
+ from pathlib import Path
6
+ from typing import Optional, List, Dict, Any
7
+ from contextlib import contextmanager
8
+
9
+ from echostate.exceptions import EchoStateLockedError
10
+ from echostate.event import Event
11
+
12
+
13
+ class Database:
14
+ """Manages SQLite database for EchoState."""
15
+
16
+ def __init__(self, db_path: str):
17
+ """
18
+ Initialize database connection.
19
+
20
+ Args:
21
+ db_path: SQLite database path (e.g., "sqlite:///state.db" or "state.db")
22
+ """
23
+ # Parse sqlite:/// prefix if present
24
+ if db_path.startswith("sqlite:///"):
25
+ db_path = db_path[10:] # Remove "sqlite:///"
26
+ elif db_path.startswith("sqlite://"):
27
+ db_path = db_path[9:] # Remove "sqlite://"
28
+
29
+ self.db_path = Path(db_path)
30
+ self._conn: Optional[sqlite3.Connection] = None
31
+ self._ensure_schema()
32
+
33
+ def _ensure_schema(self):
34
+ """Create database schema if it doesn't exist."""
35
+ conn = self._get_connection()
36
+ cursor = conn.cursor()
37
+
38
+ # Events table
39
+ cursor.execute("""
40
+ CREATE TABLE IF NOT EXISTS events (
41
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
42
+ timestamp INTEGER NOT NULL,
43
+ path TEXT NOT NULL,
44
+ operation TEXT NOT NULL,
45
+ value TEXT,
46
+ event_version INTEGER NOT NULL DEFAULT 1,
47
+ metadata TEXT
48
+ )
49
+ """)
50
+
51
+ # Snapshots table
52
+ cursor.execute("""
53
+ CREATE TABLE IF NOT EXISTS snapshots (
54
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
55
+ last_event_id INTEGER NOT NULL,
56
+ snapshot TEXT NOT NULL,
57
+ timestamp INTEGER NOT NULL
58
+ )
59
+ """)
60
+
61
+ # Metadata table (for EchoState instance metadata)
62
+ cursor.execute("""
63
+ CREATE TABLE IF NOT EXISTS metadata (
64
+ key TEXT PRIMARY KEY,
65
+ value TEXT NOT NULL
66
+ )
67
+ """)
68
+
69
+ # Embeddings table (for semantic index)
70
+ cursor.execute("""
71
+ CREATE TABLE IF NOT EXISTS embeddings (
72
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
73
+ event_id INTEGER NOT NULL,
74
+ path TEXT NOT NULL,
75
+ text TEXT NOT NULL,
76
+ embedding BLOB NOT NULL,
77
+ model_id TEXT NOT NULL,
78
+ metadata TEXT
79
+ )
80
+ """)
81
+
82
+ # Indexes
83
+ cursor.execute("""
84
+ CREATE INDEX IF NOT EXISTS idx_events_timestamp ON events(timestamp)
85
+ """)
86
+ cursor.execute("""
87
+ CREATE INDEX IF NOT EXISTS idx_events_path ON events(path)
88
+ """)
89
+ cursor.execute("""
90
+ CREATE INDEX IF NOT EXISTS idx_embeddings_event_id ON embeddings(event_id)
91
+ """)
92
+ cursor.execute("""
93
+ CREATE INDEX IF NOT EXISTS idx_embeddings_path ON embeddings(path)
94
+ """)
95
+
96
+ conn.commit()
97
+
98
+ def _get_connection(self) -> sqlite3.Connection:
99
+ """Get or create database connection."""
100
+ if self._conn is None:
101
+ try:
102
+ self._conn = sqlite3.connect(
103
+ str(self.db_path),
104
+ check_same_thread=False,
105
+ timeout=5.0,
106
+ )
107
+ self._conn.row_factory = sqlite3.Row
108
+ except sqlite3.OperationalError as e:
109
+ if "locked" in str(e).lower():
110
+ raise EchoStateLockedError(
111
+ f"Database is locked: {self.db_path}"
112
+ ) from e
113
+ raise
114
+ return self._conn
115
+
116
+ @contextmanager
117
+ def transaction(self):
118
+ """Context manager for database transactions."""
119
+ conn = self._get_connection()
120
+ try:
121
+ yield conn
122
+ conn.commit()
123
+ except Exception:
124
+ conn.rollback()
125
+ raise
126
+
127
+ def append_event(self, event: Event) -> int:
128
+ """
129
+ Append an event to the event log.
130
+
131
+ Args:
132
+ event: Event to append
133
+
134
+ Returns:
135
+ The event ID assigned by the database
136
+ """
137
+ with self.transaction() as conn:
138
+ cursor = conn.cursor()
139
+ event_dict = event.to_dict()
140
+ cursor.execute("""
141
+ INSERT INTO events (timestamp, path, operation, value, event_version, metadata)
142
+ VALUES (:timestamp, :path, :operation, :value, :event_version, :metadata)
143
+ """, event_dict)
144
+ return cursor.lastrowid
145
+
146
+ def get_events(
147
+ self,
148
+ path: Optional[str] = None,
149
+ since_event_id: Optional[int] = None,
150
+ limit: Optional[int] = None,
151
+ order_desc: bool = False,
152
+ ) -> List[Event]:
153
+ """
154
+ Retrieve events from the event log.
155
+
156
+ Args:
157
+ path: Optional path filter (exact match)
158
+ since_event_id: Only return events with id > since_event_id
159
+ limit: Maximum number of events to return
160
+ order_desc: If True, order by id descending (most recent first)
161
+
162
+ Returns:
163
+ List of events, ordered by id (ascending by default, descending if order_desc=True)
164
+ """
165
+ conn = self._get_connection()
166
+ cursor = conn.cursor()
167
+
168
+ query = "SELECT * FROM events WHERE 1=1"
169
+ params = []
170
+
171
+ if path is not None:
172
+ query += " AND path = ?"
173
+ params.append(path)
174
+
175
+ if since_event_id is not None:
176
+ query += " AND id > ?"
177
+ params.append(since_event_id)
178
+
179
+ query += f" ORDER BY id {'DESC' if order_desc else 'ASC'}"
180
+
181
+ if limit is not None:
182
+ query += " LIMIT ?"
183
+ params.append(limit)
184
+
185
+ cursor.execute(query, params)
186
+ rows = cursor.fetchall()
187
+ return [Event.from_dict(dict(row)) for row in rows]
188
+
189
+ def get_events_with_metadata_filter(
190
+ self,
191
+ path: Optional[str] = None,
192
+ metadata_filters: Optional[Dict[str, Any]] = None,
193
+ limit: Optional[int] = None,
194
+ ) -> List[Event]:
195
+ """
196
+ Retrieve events with metadata filtering (Python-side filtering in v0.1).
197
+
198
+ Args:
199
+ path: Optional path filter (exact match)
200
+ metadata_filters: Dict of metadata fields to match
201
+ limit: Maximum number of events to return (most recent first)
202
+
203
+ Returns:
204
+ List of events matching filters, ordered by id descending
205
+ """
206
+ # Get all events matching path (ordered descending for most recent first)
207
+ events = self.get_events(path=path, limit=None, order_desc=True)
208
+
209
+ # Apply metadata filters in Python (v0.1 approach)
210
+ if metadata_filters:
211
+ filtered = []
212
+ for event in events:
213
+ if event.metadata:
214
+ match = True
215
+ for key, value in metadata_filters.items():
216
+ if event.metadata.get(key) != value:
217
+ match = False
218
+ break
219
+ if match:
220
+ filtered.append(event)
221
+ else:
222
+ # Event has no metadata, skip if filters are specified
223
+ continue
224
+ events = filtered
225
+
226
+ # Apply limit after filtering
227
+ if limit is not None:
228
+ events = events[:limit]
229
+
230
+ return events
231
+
232
+ def get_snapshot_at_event_id(self, event_id: int) -> Optional[Dict[str, Any]]:
233
+ """
234
+ Get the latest snapshot that includes events up to the given event_id.
235
+
236
+ Args:
237
+ event_id: Target event ID
238
+
239
+ Returns:
240
+ Snapshot dict with keys: id, last_event_id, snapshot, timestamp
241
+ or None if no suitable snapshot exists
242
+ """
243
+ conn = self._get_connection()
244
+ cursor = conn.cursor()
245
+ cursor.execute("""
246
+ SELECT * FROM snapshots
247
+ WHERE last_event_id <= ?
248
+ ORDER BY last_event_id DESC
249
+ LIMIT 1
250
+ """, (event_id,))
251
+ row = cursor.fetchone()
252
+ if row is None:
253
+ return None
254
+ result = dict(row)
255
+ result["snapshot"] = json.loads(result["snapshot"])
256
+ return result
257
+
258
+ def get_events_up_to(self, event_id: int, since_event_id: Optional[int] = None) -> List[Event]:
259
+ """
260
+ Get events up to and including a specific event_id.
261
+
262
+ Args:
263
+ event_id: Maximum event ID to include
264
+ since_event_id: Only return events with id > since_event_id
265
+
266
+ Returns:
267
+ List of events ordered by id ascending
268
+ """
269
+ conn = self._get_connection()
270
+ cursor = conn.cursor()
271
+
272
+ query = "SELECT * FROM events WHERE id <= ?"
273
+ params = [event_id]
274
+
275
+ if since_event_id is not None:
276
+ query += " AND id > ?"
277
+ params.append(since_event_id)
278
+
279
+ query += " ORDER BY id ASC"
280
+
281
+ cursor.execute(query, params)
282
+ rows = cursor.fetchall()
283
+ return [Event.from_dict(dict(row)) for row in rows]
284
+
285
+ def store_embedding(
286
+ self,
287
+ event_id: int,
288
+ path: str,
289
+ text: str,
290
+ embedding: bytes,
291
+ model_id: str,
292
+ metadata: Optional[str] = None,
293
+ ) -> int:
294
+ """
295
+ Store an embedding for an event.
296
+
297
+ Args:
298
+ event_id: Associated event ID
299
+ path: Event path
300
+ text: Derived record text
301
+ embedding: Embedding as bytes
302
+ model_id: Model identifier
303
+ metadata: Optional metadata JSON string
304
+
305
+ Returns:
306
+ Embedding ID
307
+ """
308
+ with self.transaction() as conn:
309
+ cursor = conn.cursor()
310
+ cursor.execute("""
311
+ INSERT INTO embeddings (event_id, path, text, embedding, model_id, metadata)
312
+ VALUES (?, ?, ?, ?, ?, ?)
313
+ """, (event_id, path, text, embedding, model_id, metadata))
314
+ return cursor.lastrowid
315
+
316
+ def get_all_embeddings(self, model_id: Optional[str] = None) -> List[Dict[str, Any]]:
317
+ """
318
+ Get all embeddings, optionally filtered by model_id.
319
+
320
+ Args:
321
+ model_id: Optional model ID filter
322
+
323
+ Returns:
324
+ List of embedding records as dicts
325
+ """
326
+ conn = self._get_connection()
327
+ cursor = conn.cursor()
328
+
329
+ if model_id:
330
+ cursor.execute("""
331
+ SELECT * FROM embeddings
332
+ WHERE model_id = ?
333
+ ORDER BY id ASC
334
+ """, (model_id,))
335
+ else:
336
+ cursor.execute("""
337
+ SELECT * FROM embeddings
338
+ ORDER BY id ASC
339
+ """)
340
+
341
+ rows = cursor.fetchall()
342
+ return [dict(row) for row in rows]
343
+
344
+ def truncate_embeddings(self, model_id: Optional[str] = None):
345
+ """
346
+ Delete all embeddings, optionally filtered by model_id.
347
+
348
+ Args:
349
+ model_id: If provided, only delete embeddings for this model
350
+ """
351
+ with self.transaction() as conn:
352
+ cursor = conn.cursor()
353
+ if model_id:
354
+ cursor.execute("DELETE FROM embeddings WHERE model_id = ?", (model_id,))
355
+ else:
356
+ cursor.execute("DELETE FROM embeddings")
357
+
358
+ def get_latest_snapshot(self) -> Optional[Dict[str, Any]]:
359
+ """
360
+ Get the latest snapshot.
361
+
362
+ Returns:
363
+ Dict with keys: id, last_event_id, snapshot (parsed JSON), timestamp
364
+ or None if no snapshot exists
365
+ """
366
+ conn = self._get_connection()
367
+ cursor = conn.cursor()
368
+ cursor.execute("""
369
+ SELECT * FROM snapshots
370
+ ORDER BY last_event_id DESC
371
+ LIMIT 1
372
+ """)
373
+ row = cursor.fetchone()
374
+ if row is None:
375
+ return None
376
+ result = dict(row)
377
+ result["snapshot"] = json.loads(result["snapshot"])
378
+ return result
379
+
380
+ def create_snapshot(self, last_event_id: int, snapshot: Dict[str, Any]) -> int:
381
+ """
382
+ Create a new snapshot.
383
+
384
+ Args:
385
+ last_event_id: The last event ID included in this snapshot
386
+ snapshot: The state snapshot as a dictionary
387
+
388
+ Returns:
389
+ The snapshot ID
390
+ """
391
+ with self.transaction() as conn:
392
+ cursor = conn.cursor()
393
+ import time
394
+ timestamp = int(time.time() * 1000)
395
+ cursor.execute("""
396
+ INSERT INTO snapshots (last_event_id, snapshot, timestamp)
397
+ VALUES (?, ?, ?)
398
+ """, (last_event_id, json.dumps(snapshot), timestamp))
399
+ return cursor.lastrowid
400
+
401
+ def close(self):
402
+ """Close database connection."""
403
+ if self._conn is not None:
404
+ self._conn.close()
405
+ self._conn = None
406
+
407
+ def __enter__(self):
408
+ """Context manager entry."""
409
+ return self
410
+
411
+ def __exit__(self, exc_type, exc_val, exc_tb):
412
+ """Context manager exit."""
413
+ self.close()
@@ -0,0 +1,143 @@
1
+ """Embedding generation and storage for semantic search."""
2
+
3
+ import json
4
+ import numpy as np
5
+ from dataclasses import dataclass
6
+ from typing import List, Optional, Dict, Any, Tuple
7
+ from sentence_transformers import SentenceTransformer
8
+
9
+ from echostate.event import Event
10
+ from echostate.exceptions import EchoStateEmbeddingError
11
+
12
+
13
+ class EmbeddingIndex:
14
+ """Manages embeddings for semantic search."""
15
+
16
+ def __init__(self, model_id: str, model_name: str):
17
+ """
18
+ Initialize embedding index.
19
+
20
+ Args:
21
+ model_id: Model identifier (e.g., "st:all-MiniLM-L6-v2")
22
+ model_name: Model name for sentence-transformers
23
+ """
24
+ self.model_id = model_id
25
+ self.model_name = model_name
26
+ self._model: Optional[SentenceTransformer] = None
27
+
28
+ def _get_model(self) -> SentenceTransformer:
29
+ """Lazy-load the embedding model."""
30
+ if self._model is None:
31
+ try:
32
+ self._model = SentenceTransformer(self.model_name)
33
+ except Exception as e:
34
+ raise EchoStateEmbeddingError(
35
+ f"Failed to load embedding model '{self.model_name}': {e}"
36
+ ) from e
37
+ return self._model
38
+
39
+ def create_derived_record(self, event: Event) -> Optional[str]:
40
+ """
41
+ Create a derived record (text representation) from an event.
42
+
43
+ Args:
44
+ event: Event to convert
45
+
46
+ Returns:
47
+ Text representation suitable for embedding, or None if event shouldn't be indexed
48
+ """
49
+ # Deletes don't produce searchable content
50
+ if event.operation == "delete":
51
+ return None
52
+
53
+ value = event.value
54
+
55
+ # Convert value to text based on type
56
+ if isinstance(value, str):
57
+ text = value
58
+ elif isinstance(value, (dict, list)):
59
+ # Serialize to compact JSON
60
+ text = json.dumps(value, separators=(",", ":"), ensure_ascii=False)
61
+ elif value is None:
62
+ return None
63
+ else:
64
+ # Primitives
65
+ text = str(value)
66
+
67
+ # For append operations, add lightweight context
68
+ if event.operation == "append":
69
+ # Include path context
70
+ text = f"{event.path}: {text}"
71
+
72
+ return text
73
+
74
+ def generate_embedding(self, text: str) -> bytes:
75
+ """
76
+ Generate embedding for a text string.
77
+
78
+ Args:
79
+ text: Text to embed
80
+
81
+ Returns:
82
+ Embedding as bytes (numpy array serialized)
83
+ """
84
+ model = self._get_model()
85
+ try:
86
+ embedding = model.encode(text, convert_to_numpy=True)
87
+ # Convert to bytes for storage
88
+ return embedding.tobytes()
89
+ except Exception as e:
90
+ raise EchoStateEmbeddingError(f"Failed to generate embedding: {e}") from e
91
+
92
+ def compute_similarity(self, query_embedding: bytes, target_embedding: bytes) -> float:
93
+ """
94
+ Compute cosine similarity between two embeddings.
95
+
96
+ Args:
97
+ query_embedding: Query embedding as bytes
98
+ target_embedding: Target embedding as bytes
99
+
100
+ Returns:
101
+ Cosine similarity score (0-1, higher is more similar)
102
+ """
103
+ query_vec = np.frombuffer(query_embedding, dtype=np.float32)
104
+ target_vec = np.frombuffer(target_embedding, dtype=np.float32)
105
+
106
+ # Cosine similarity
107
+ dot_product = np.dot(query_vec, target_vec)
108
+ norm_query = np.linalg.norm(query_vec)
109
+ norm_target = np.linalg.norm(target_vec)
110
+
111
+ if norm_query == 0 or norm_target == 0:
112
+ return 0.0
113
+
114
+ similarity = dot_product / (norm_query * norm_target)
115
+ # Normalize to 0-1 range (cosine similarity is -1 to 1)
116
+ normalized = (similarity + 1) / 2
117
+ # Convert numpy scalar to Python float
118
+ return float(normalized)
119
+
120
+ def embed_query(self, query: str) -> bytes:
121
+ """
122
+ Generate embedding for a search query.
123
+
124
+ Args:
125
+ query: Search query string
126
+
127
+ Returns:
128
+ Embedding as bytes
129
+ """
130
+ return self.generate_embedding(query)
131
+
132
+
133
+ @dataclass
134
+ class SearchHit:
135
+ """Represents a search result hit."""
136
+
137
+ event_id: int
138
+ score: float
139
+ path: str
140
+ operation: str
141
+ text: str
142
+ metadata: Optional[Dict[str, Any]] = None
143
+ value: Optional[Any] = None # Best-effort, may be None
echostate/event.py ADDED
@@ -0,0 +1,43 @@
1
+ """Event model for EchoState."""
2
+
3
+ import json
4
+ from dataclasses import dataclass, field
5
+ from typing import Any, Dict, Optional
6
+ from datetime import datetime
7
+
8
+
9
+ @dataclass
10
+ class Event:
11
+ """Represents a single state mutation event."""
12
+
13
+ id: Optional[int] = None # Set by database on insert
14
+ timestamp: int = field(default_factory=lambda: int(datetime.now().timestamp() * 1000))
15
+ path: str = ""
16
+ operation: str = "" # set | append | delete | update
17
+ value: Any = None
18
+ event_version: int = 1
19
+ metadata: Optional[Dict[str, Any]] = None
20
+
21
+ def to_dict(self) -> Dict[str, Any]:
22
+ """Convert event to dictionary for database storage."""
23
+ return {
24
+ "timestamp": self.timestamp,
25
+ "path": self.path,
26
+ "operation": self.operation,
27
+ "value": json.dumps(self.value) if self.value is not None else None,
28
+ "event_version": self.event_version,
29
+ "metadata": json.dumps(self.metadata) if self.metadata else None,
30
+ }
31
+
32
+ @classmethod
33
+ def from_dict(cls, data: Dict[str, Any]) -> "Event":
34
+ """Create event from database row."""
35
+ return cls(
36
+ id=data.get("id"),
37
+ timestamp=data["timestamp"],
38
+ path=data["path"],
39
+ operation=data["operation"],
40
+ value=json.loads(data["value"]) if data["value"] else None,
41
+ event_version=data.get("event_version", 1),
42
+ metadata=json.loads(data["metadata"]) if data.get("metadata") else None,
43
+ )
@@ -0,0 +1,31 @@
1
+ """Custom exceptions for EchoState."""
2
+
3
+
4
+ class EchoStateError(Exception):
5
+ """Base exception for all EchoState errors."""
6
+
7
+ pass
8
+
9
+
10
+ class EchoStateLockedError(EchoStateError):
11
+ """Raised when SQLite database is locked by another process."""
12
+
13
+ pass
14
+
15
+
16
+ class EchoStateSerializationError(EchoStateError):
17
+ """Raised when a value cannot be JSON-serialized."""
18
+
19
+ pass
20
+
21
+
22
+ class EchoStatePathError(EchoStateError):
23
+ """Raised when a path operation fails (e.g., setting a child under a non-dict)."""
24
+
25
+ pass
26
+
27
+
28
+ class EchoStateEmbeddingError(EchoStateError):
29
+ """Raised when embedding generation fails (only for synchronous indexing)."""
30
+
31
+ pass