semantic-buffer 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- semantic_buffer-0.1.2.dist-info/METADATA +126 -0
- semantic_buffer-0.1.2.dist-info/RECORD +9 -0
- semantic_buffer-0.1.2.dist-info/WHEEL +4 -0
- semantic_buffer-0.1.2.dist-info/licenses/LICENSE +21 -0
- semanticbuffer/__init__.py +6 -0
- semanticbuffer/core.py +154 -0
- semanticbuffer/database.py +150 -0
- semanticbuffer/embeddings.py +131 -0
- semanticbuffer/scoring.py +73 -0
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: semantic-buffer
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: A lightweight, local-first semantic memory buffer for AI agents with hybrid time-decay scoring.
|
|
5
|
+
Author-email: Your Name <your.email@example.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Keywords: agents,ai,memory,rag,semantic-search,sqlite
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
13
|
+
Requires-Python: >=3.9
|
|
14
|
+
Requires-Dist: numpy>=1.20.0
|
|
15
|
+
Provides-Extra: dev
|
|
16
|
+
Requires-Dist: build>=1.0.0; extra == 'dev'
|
|
17
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
18
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
19
|
+
Requires-Dist: twine>=4.0.0; extra == 'dev'
|
|
20
|
+
Provides-Extra: local
|
|
21
|
+
Requires-Dist: sentence-transformers>=2.2.0; extra == 'local'
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
|
|
24
|
+
# semantic-buffer 🧠
|
|
25
|
+
|
|
26
|
+
[](https://pypi.org/project/semantic-buffer/)
|
|
27
|
+
[](https://pypi.org/project/semantic-buffer/)
|
|
28
|
+
[](https://opensource.org/licenses/MIT)
|
|
29
|
+
|
|
30
|
+
A lightweight, local-first semantic memory buffer for AI agents. It implements a hybrid scoring system that prioritizes memories based on **semantic relevance**, **importance**, and **exponential time-decay (recency)**.
|
|
31
|
+
|
|
32
|
+
## 🌟 Features
|
|
33
|
+
- **Local-first Vector Storage**: Simple SQLite-based vector storage with zero cloud dependencies or bulky database installations.
|
|
34
|
+
- **Hybrid Scoring**: Combines cosine similarity, recency (exponential time decay), and importance weighting to retrieve the most contextual memories.
|
|
35
|
+
- **Pluggable Embeddings**: Run lightweight model locally (`sentence-transformers`) or plug in any API (Gemini, OpenAI, Cohere).
|
|
36
|
+
- **Agent Decorators**: `@remember` decorator automatically tracks function inputs, returns, and metadata in the background.
|
|
37
|
+
|
|
38
|
+
## 🏗️ Architecture
|
|
39
|
+
|
|
40
|
+
```text
|
|
41
|
+
[ Agent Action ] ──► ( @remember Decorator ) ──► [ SemanticBuffer ]
|
|
42
|
+
│
|
|
43
|
+
( Generate Embeddings )
|
|
44
|
+
│
|
|
45
|
+
[ LLM Prompt ] ◄── [ Ranked Top N Memories ] ◄── [ SQLite DB ]
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## 🚀 Installation & Import Namespace
|
|
49
|
+
|
|
50
|
+
> [!IMPORTANT]
|
|
51
|
+
> **Install Name vs. Import Name Namespace**
|
|
52
|
+
> - **Pip Install Name**: `semantic-buffer` (with hyphen)
|
|
53
|
+
> - **Python Import Name**: `semanticbuffer` (no hyphens or underscores, e.g. `import semanticbuffer`)
|
|
54
|
+
|
|
55
|
+
### 1. Minimal Installation (API Embedders)
|
|
56
|
+
If you only plan to use external API models (Gemini, OpenAI, etc.) and want to keep dependencies lightweight:
|
|
57
|
+
```bash
|
|
58
|
+
pip install semantic-buffer
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### 2. Local-First Installation (Offline CPU Models)
|
|
62
|
+
If you want to use the default offline embeddings (`sentence-transformers` running locally on your CPU):
|
|
63
|
+
```bash
|
|
64
|
+
pip install "semantic-buffer[local]"
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
> [!WARNING]
|
|
68
|
+
> If you instantiate `SemanticBuffer()` without passing an embedder, it defaults to using the local offline model. If you did not install the `[local]` extra, it will throw an `ImportError` requesting `sentence-transformers`.
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
## ⚡ Quickstart
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
from semanticbuffer import SemanticBuffer
|
|
75
|
+
|
|
76
|
+
# 1. Initialize local memory buffer
|
|
77
|
+
buffer = SemanticBuffer(db_path="my_memory.db")
|
|
78
|
+
|
|
79
|
+
# 2. Add memories with importance scores
|
|
80
|
+
buffer.add("User's favorite programming language is Python.", importance=0.9)
|
|
81
|
+
buffer.add("It rained today in Paris.", importance=0.4)
|
|
82
|
+
|
|
83
|
+
# 3. Retrieve relevant context dynamically
|
|
84
|
+
memories = buffer.search("What coding preferences does the user have?", limit=1)
|
|
85
|
+
print(memories[0]["content"])
|
|
86
|
+
# Output: "User's favorite programming language is Python."
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Auto-Capture Agent Interactions with Decorators
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
buffer = SemanticBuffer()
|
|
93
|
+
|
|
94
|
+
@buffer.remember(importance=0.7)
|
|
95
|
+
def run_web_search(query: str):
|
|
96
|
+
# Mocking search execution
|
|
97
|
+
return f"Results for {query}: Python is a versatile language."
|
|
98
|
+
|
|
99
|
+
# Automatically logged to database
|
|
100
|
+
run_web_search("Python info")
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## ⚙️ Custom Embedders (OpenAI Example)
|
|
104
|
+
You can easily swap out the local model for your preferred embedding API:
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
from semanticbuffer import SemanticBuffer, BaseEmbedder
|
|
108
|
+
|
|
109
|
+
class OpenAIEmbedder(BaseEmbedder):
|
|
110
|
+
def __init__(self, api_key):
|
|
111
|
+
from openai import OpenAI
|
|
112
|
+
self.client = OpenAI(api_key=api_key)
|
|
113
|
+
|
|
114
|
+
def embed_query(self, text: str):
|
|
115
|
+
res = self.client.embeddings.create(input=[text], model="text-embedding-3-small")
|
|
116
|
+
return res.data[0].embedding
|
|
117
|
+
|
|
118
|
+
def embed_documents(self, texts: list[str]):
|
|
119
|
+
return [self.embed_query(t) for t in texts]
|
|
120
|
+
|
|
121
|
+
# Instantiate with custom embedder
|
|
122
|
+
buffer = SemanticBuffer(embedder=OpenAIEmbedder(api_key="your-key"))
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## 📄 License
|
|
126
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
semanticbuffer/__init__.py,sha256=YT8bGWicg-ZlrW1BfDu48jyA2PfDkJ45hkbJRmRXorI,205
|
|
2
|
+
semanticbuffer/core.py,sha256=tPhLJfC23kG4C4LKM5mzyku0iFWsMp26B3n2MvVZt6E,5816
|
|
3
|
+
semanticbuffer/database.py,sha256=pqAWm7cJ_xbOrTq3tkkYCW_6gS_QIS33-z6k1IYhazw,5247
|
|
4
|
+
semanticbuffer/embeddings.py,sha256=ylIycMI1Rbyl6E9nI46UJsr_Q54Xis3PUOnjL-Tkr8A,3898
|
|
5
|
+
semanticbuffer/scoring.py,sha256=IsflRJ5zs9UwLBq0VMsuT8-kPWhOsOCqdH_Pui2XlFY,2345
|
|
6
|
+
semantic_buffer-0.1.2.dist-info/METADATA,sha256=Z58OHGlp5REk4KZFlm2oP6kDPfzoSu_SoQtE_BmVu9A,5047
|
|
7
|
+
semantic_buffer-0.1.2.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
8
|
+
semantic_buffer-0.1.2.dist-info/licenses/LICENSE,sha256=v5sKwAPGsjHXh71ki03x99TjxEfzrba8oPGVigUvf-w,1063
|
|
9
|
+
semantic_buffer-0.1.2.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Mastan
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
semanticbuffer/core.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
from typing import Any, Callable, Dict, List, Optional
|
|
3
|
+
|
|
4
|
+
from .database import MemoryDB
|
|
5
|
+
from .embeddings import BaseEmbedder, LocalEmbedder
|
|
6
|
+
from .scoring import calculate_hybrid_score, cosine_similarity
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class SemanticBuffer:
|
|
10
|
+
"""Core memory coordinator for managing semantic retrieval and logging.
|
|
11
|
+
|
|
12
|
+
Ties together local SQLite database storage, embedding generators, and
|
|
13
|
+
hybrid relevancy scoring logic.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
db_path: str = "memory.db",
|
|
19
|
+
embedder: Optional[BaseEmbedder] = None,
|
|
20
|
+
recency_weight: float = 0.4,
|
|
21
|
+
decay_rate: float = 0.005,
|
|
22
|
+
importance_weight: float = 0.3,
|
|
23
|
+
):
|
|
24
|
+
"""Initializes the SemanticBuffer instance.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
db_path (str, optional): Path to the SQLite storage file.
|
|
28
|
+
Defaults to "memory.db".
|
|
29
|
+
embedder (BaseEmbedder, optional): The vector embedding model backend.
|
|
30
|
+
If None, uses a LocalEmbedder defaults to sentence-transformers.
|
|
31
|
+
recency_weight (float, optional): Weight score contribution of
|
|
32
|
+
time decay. Defaults to 0.4.
|
|
33
|
+
decay_rate (float, optional): Exponential decay rate per hour.
|
|
34
|
+
Defaults to 0.005.
|
|
35
|
+
importance_weight (float, optional): Weight score contribution of
|
|
36
|
+
importance. Defaults to 0.3.
|
|
37
|
+
"""
|
|
38
|
+
self.db = MemoryDB(db_path)
|
|
39
|
+
self.embedder = embedder or LocalEmbedder()
|
|
40
|
+
self.recency_weight = recency_weight
|
|
41
|
+
self.decay_rate = decay_rate
|
|
42
|
+
self.importance_weight = importance_weight
|
|
43
|
+
|
|
44
|
+
def add(
|
|
45
|
+
self,
|
|
46
|
+
text: str,
|
|
47
|
+
importance: float = 0.5,
|
|
48
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
49
|
+
) -> int:
|
|
50
|
+
"""Saves a new memory into the local vector buffer.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
text (str): Content text to remember.
|
|
54
|
+
importance (float, optional): Manual importance weight (0.0 to 1.0).
|
|
55
|
+
Defaults to 0.5.
|
|
56
|
+
metadata (Dict[str, Any], optional): Contextual key-value metrics.
|
|
57
|
+
Defaults to None.
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
int: The inserted record row ID.
|
|
61
|
+
"""
|
|
62
|
+
embedding = self.embedder.embed_query(text)
|
|
63
|
+
return self.db.insert_memory(text, embedding, importance, metadata)
|
|
64
|
+
|
|
65
|
+
def search(self, query: str, limit: int = 5) -> List[Dict[str, Any]]:
|
|
66
|
+
"""Searches and returns memories sorted by their hybrid relevance score.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
query (str): The search phrase.
|
|
70
|
+
limit (int, optional): Maximum count of memories to return.
|
|
71
|
+
Defaults to 5.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
List[Dict[str, Any]]: Ranked results (highest score first) without the
|
|
75
|
+
raw embedding vectors to keep the payloads lightweight.
|
|
76
|
+
"""
|
|
77
|
+
query_vector = self.embedder.embed_query(query)
|
|
78
|
+
memories = self.db.fetch_all_memories()
|
|
79
|
+
|
|
80
|
+
if not memories:
|
|
81
|
+
return []
|
|
82
|
+
|
|
83
|
+
scored_memories = []
|
|
84
|
+
for mem in memories:
|
|
85
|
+
semantic_sim = cosine_similarity(query_vector, mem["embedding"])
|
|
86
|
+
hybrid_score = calculate_hybrid_score(
|
|
87
|
+
semantic_sim=semantic_sim,
|
|
88
|
+
created_at=mem["created_at"],
|
|
89
|
+
importance=mem["importance"],
|
|
90
|
+
recency_weight=self.recency_weight,
|
|
91
|
+
decay_rate=self.decay_rate,
|
|
92
|
+
importance_weight=self.importance_weight,
|
|
93
|
+
)
|
|
94
|
+
# Remove embedding from return payloads to keep output clean
|
|
95
|
+
result = mem.copy()
|
|
96
|
+
del result["embedding"]
|
|
97
|
+
result["score"] = hybrid_score
|
|
98
|
+
scored_memories.append(result)
|
|
99
|
+
|
|
100
|
+
# Sort descending by score
|
|
101
|
+
scored_memories.sort(key=lambda x: x["score"], reverse=True)
|
|
102
|
+
return scored_memories[:limit]
|
|
103
|
+
|
|
104
|
+
def clear(self):
|
|
105
|
+
"""Removes all memories from the storage database table."""
|
|
106
|
+
self.db.clear()
|
|
107
|
+
|
|
108
|
+
def remember(
|
|
109
|
+
self,
|
|
110
|
+
importance: float = 0.5,
|
|
111
|
+
capture_inputs: bool = True,
|
|
112
|
+
capture_output: bool = True,
|
|
113
|
+
summary_extractor: Optional[Callable[[Any, Any, tuple, dict], str]] = None,
|
|
114
|
+
):
|
|
115
|
+
"""Decorator to automatically log function calls and outputs to the buffer.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
importance (float, optional): Floating importance score for the call record.
|
|
119
|
+
Defaults to 0.5.
|
|
120
|
+
capture_inputs (bool, optional): Logs the values of function arguments.
|
|
121
|
+
Defaults to True.
|
|
122
|
+
capture_output (bool, optional): Logs the returned function output.
|
|
123
|
+
Defaults to True.
|
|
124
|
+
summary_extractor (Callable, optional): Custom function to parse inputs
|
|
125
|
+
and returns and convert them into a structured log text string.
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
def decorator(func):
|
|
129
|
+
@functools.wraps(func)
|
|
130
|
+
def wrapper(*args, **kwargs):
|
|
131
|
+
result = func(*args, **kwargs)
|
|
132
|
+
|
|
133
|
+
# Extract description of what happened
|
|
134
|
+
if summary_extractor:
|
|
135
|
+
content = summary_extractor(result, func.__name__, args, kwargs)
|
|
136
|
+
else:
|
|
137
|
+
parts = [f"Function '{func.__name__}' was called."]
|
|
138
|
+
if capture_inputs:
|
|
139
|
+
parts.append(f"Arguments: args={args}, kwargs={kwargs}")
|
|
140
|
+
if capture_output:
|
|
141
|
+
parts.append(f"Returned output: {result}")
|
|
142
|
+
content = "\n".join(parts)
|
|
143
|
+
|
|
144
|
+
# Save to database
|
|
145
|
+
self.add(
|
|
146
|
+
text=content,
|
|
147
|
+
importance=importance,
|
|
148
|
+
metadata={"function": func.__name__, "source": "decorator"},
|
|
149
|
+
)
|
|
150
|
+
return result
|
|
151
|
+
|
|
152
|
+
return wrapper
|
|
153
|
+
|
|
154
|
+
return decorator
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import sqlite3
|
|
3
|
+
import struct
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Any, Dict, List
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class MemoryDB:
|
|
9
|
+
"""Manages the local SQLite database for storing and retrieving agent memories.
|
|
10
|
+
|
|
11
|
+
This class handles the creation of schemas, serialization/deserialization of
|
|
12
|
+
embeddings into SQLite BLOB formats, and standard database operations.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, db_path: str):
|
|
16
|
+
"""Initializes the database connection and builds schemas if not present.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
db_path (str): Filepath to the SQLite database.
|
|
20
|
+
"""
|
|
21
|
+
self.db_path = db_path
|
|
22
|
+
self._conn = None
|
|
23
|
+
self._init_db()
|
|
24
|
+
|
|
25
|
+
def _get_connection(self):
|
|
26
|
+
"""Helper to get a thread-safe connection to the SQLite database.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
sqlite3.Connection: The active SQLite connection object.
|
|
30
|
+
"""
|
|
31
|
+
return sqlite3.connect(self.db_path)
|
|
32
|
+
|
|
33
|
+
def _init_db(self):
|
|
34
|
+
"""Creates the memories table if it does not already exist."""
|
|
35
|
+
with self._get_connection() as conn:
|
|
36
|
+
conn.execute("""
|
|
37
|
+
CREATE TABLE IF NOT EXISTS memories (
|
|
38
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
39
|
+
content TEXT NOT NULL,
|
|
40
|
+
embedding BLOB NOT NULL,
|
|
41
|
+
importance REAL DEFAULT 0.5,
|
|
42
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
43
|
+
metadata_json TEXT
|
|
44
|
+
)
|
|
45
|
+
""")
|
|
46
|
+
conn.commit()
|
|
47
|
+
|
|
48
|
+
@staticmethod
|
|
49
|
+
def _vector_to_blob(vector: List[float]) -> bytes:
|
|
50
|
+
"""Packs a list of python floats into binary bytes.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
vector (List[float]): List of floats to serialize.
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
bytes: Standard packed binary float data (64-bit double precision).
|
|
57
|
+
"""
|
|
58
|
+
return struct.pack(f"{len(vector)}d", *vector)
|
|
59
|
+
|
|
60
|
+
@staticmethod
|
|
61
|
+
def _blob_to_vector(blob: bytes) -> List[float]:
|
|
62
|
+
"""Unpacks binary bytes back to a list of python floats.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
blob (bytes): Packed binary data.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
List[float]: Restored float vector.
|
|
69
|
+
"""
|
|
70
|
+
num_floats = len(blob) // 8
|
|
71
|
+
return list(struct.unpack(f"{num_floats}d", blob))
|
|
72
|
+
|
|
73
|
+
def insert_memory(
|
|
74
|
+
self,
|
|
75
|
+
content: str,
|
|
76
|
+
embedding: List[float],
|
|
77
|
+
importance: float,
|
|
78
|
+
metadata: Dict[str, Any] = None,
|
|
79
|
+
) -> int:
|
|
80
|
+
"""Inserts a new memory record into the SQLite database.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
content (str): The raw text contents of the memory.
|
|
84
|
+
embedding (List[float]): Multi-dimensional semantic embedding vector.
|
|
85
|
+
importance (float): Floating-point score between 0.0 and 1.0.
|
|
86
|
+
metadata (Dict[str, Any], optional): Key-value properties to associate.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
int: The unique row ID of the inserted record.
|
|
90
|
+
"""
|
|
91
|
+
metadata_str = json.dumps(metadata) if metadata else None
|
|
92
|
+
vector_blob = self._vector_to_blob(embedding)
|
|
93
|
+
|
|
94
|
+
with self._get_connection() as conn:
|
|
95
|
+
cursor = conn.cursor()
|
|
96
|
+
query = (
|
|
97
|
+
"INSERT INTO memories "
|
|
98
|
+
"(content, embedding, importance, metadata_json) "
|
|
99
|
+
"VALUES (?, ?, ?, ?)"
|
|
100
|
+
)
|
|
101
|
+
cursor.execute(query, (content, vector_blob, importance, metadata_str))
|
|
102
|
+
conn.commit()
|
|
103
|
+
return cursor.lastrowid
|
|
104
|
+
|
|
105
|
+
def fetch_all_memories(self) -> List[Dict[str, Any]]:
|
|
106
|
+
"""Queries and retrieves all saved memories from the database.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
List[Dict[str, Any]]: A list of dictionaries representing memories,
|
|
110
|
+
including id, content, embedding, importance, created_at, and metadata.
|
|
111
|
+
"""
|
|
112
|
+
with self._get_connection() as conn:
|
|
113
|
+
conn.row_factory = sqlite3.Row
|
|
114
|
+
cursor = conn.cursor()
|
|
115
|
+
query = (
|
|
116
|
+
"SELECT id, content, embedding, importance, "
|
|
117
|
+
"created_at, metadata_json FROM memories"
|
|
118
|
+
)
|
|
119
|
+
cursor.execute(query)
|
|
120
|
+
rows = cursor.fetchall()
|
|
121
|
+
|
|
122
|
+
memories = []
|
|
123
|
+
for row in rows:
|
|
124
|
+
created_at_val = row["created_at"]
|
|
125
|
+
if "T" in created_at_val:
|
|
126
|
+
parsed_time = datetime.fromisoformat(created_at_val)
|
|
127
|
+
else:
|
|
128
|
+
parsed_time = datetime.strptime(created_at_val, "%Y-%m-%d %H:%M:%S")
|
|
129
|
+
|
|
130
|
+
memories.append(
|
|
131
|
+
{
|
|
132
|
+
"id": row["id"],
|
|
133
|
+
"content": row["content"],
|
|
134
|
+
"embedding": self._blob_to_vector(row["embedding"]),
|
|
135
|
+
"importance": row["importance"],
|
|
136
|
+
"created_at": parsed_time,
|
|
137
|
+
"metadata": (
|
|
138
|
+
json.loads(row["metadata_json"])
|
|
139
|
+
if row["metadata_json"]
|
|
140
|
+
else {}
|
|
141
|
+
),
|
|
142
|
+
}
|
|
143
|
+
)
|
|
144
|
+
return memories
|
|
145
|
+
|
|
146
|
+
def clear(self):
|
|
147
|
+
"""Deletes all memory entries from the table."""
|
|
148
|
+
with self._get_connection() as conn:
|
|
149
|
+
conn.execute("DELETE FROM memories")
|
|
150
|
+
conn.commit()
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class BaseEmbedder(ABC):
|
|
6
|
+
"""Abstract base class for semantic text embedders.
|
|
7
|
+
|
|
8
|
+
All custom embedding adapters (e.g. OpenAI, Gemini, Cohere) must
|
|
9
|
+
inherit from this class and implement the abstract methods.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
@abstractmethod
|
|
13
|
+
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
|
14
|
+
"""Embed a list of text strings into a list of vector floats.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
texts (List[str]): List of texts to embed.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
List[List[float]]: List of float vectors corresponding to each text.
|
|
21
|
+
"""
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
@abstractmethod
|
|
25
|
+
def embed_query(self, text: str) -> List[float]:
|
|
26
|
+
"""Embed a single query string into a vector float.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
text (str): Query string to embed.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
List[float]: The semantic embedding vector.
|
|
33
|
+
"""
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class LocalEmbedder(BaseEmbedder):
|
|
38
|
+
"""A local CPU-friendly embedder using sentence-transformers.
|
|
39
|
+
|
|
40
|
+
Loads the models lazily on first access to keep imports fast.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
|
|
44
|
+
"""Initializes the LocalEmbedder with a target model.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
model_name (str, optional): Hugging Face sentence-transformers model.
|
|
48
|
+
Defaults to "all-MiniLM-L6-v2".
|
|
49
|
+
"""
|
|
50
|
+
self.model_name = model_name
|
|
51
|
+
self._model = None
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def model(self):
|
|
55
|
+
"""Loads and caches the local SentenceTransformer model.
|
|
56
|
+
|
|
57
|
+
Raises:
|
|
58
|
+
ImportError: If sentence-transformers package is missing.
|
|
59
|
+
"""
|
|
60
|
+
if self._model is None:
|
|
61
|
+
try:
|
|
62
|
+
from sentence_transformers import SentenceTransformer
|
|
63
|
+
except ImportError:
|
|
64
|
+
raise ImportError(
|
|
65
|
+
"sentence-transformers is required for local embeddings. "
|
|
66
|
+
"Install it using `pip install semantic-buffer[local]`."
|
|
67
|
+
)
|
|
68
|
+
self._model = SentenceTransformer(self.model_name)
|
|
69
|
+
return self._model
|
|
70
|
+
|
|
71
|
+
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
|
72
|
+
"""Generates vectors for a list of documents.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
texts (List[str]): Input documents.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
List[List[float]]: Generated float vectors.
|
|
79
|
+
"""
|
|
80
|
+
embeddings = self.model.encode(texts, convert_to_numpy=True)
|
|
81
|
+
return embeddings.tolist()
|
|
82
|
+
|
|
83
|
+
def embed_query(self, text: str) -> List[float]:
|
|
84
|
+
"""Generates a vector for a single query.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
text (str): Query string.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
List[float]: Generated float vector.
|
|
91
|
+
"""
|
|
92
|
+
embedding = self.model.encode(text, convert_to_numpy=True)
|
|
93
|
+
return embedding.tolist()
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class CustomEmbedder(BaseEmbedder):
|
|
97
|
+
"""Wraps any standard user-provided function that generates embeddings.
|
|
98
|
+
|
|
99
|
+
Useful when you want to pass a simple inline callable or lambda function.
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
def __init__(self, embed_fn):
|
|
103
|
+
"""Initializes the CustomEmbedder with a callable.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
embed_fn (Callable): Function that accepts a list of strings
|
|
107
|
+
and returns a list of float vectors.
|
|
108
|
+
"""
|
|
109
|
+
self.embed_fn = embed_fn
|
|
110
|
+
|
|
111
|
+
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
|
112
|
+
"""Passes texts to the custom embedding function.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
texts (List[str]): Input texts.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
List[List[float]]: Generated float vectors.
|
|
119
|
+
"""
|
|
120
|
+
return self.embed_fn(texts)
|
|
121
|
+
|
|
122
|
+
def embed_query(self, text: str) -> List[float]:
|
|
123
|
+
"""Passes query to the custom embedding function.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
text (str): Input query.
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
List[float]: Generated float vector.
|
|
130
|
+
"""
|
|
131
|
+
return self.embed_fn([text])[0]
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import math
|
|
2
|
+
from datetime import datetime, timezone
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def cosine_similarity(v1: List[float], v2: List[float]) -> float:
|
|
9
|
+
"""Calculates the cosine similarity metric between two float vectors.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
v1 (List[float]): First vector.
|
|
13
|
+
v2 (List[float]): Second vector.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
float: Cosine similarity score between -1.0 and 1.0 (0.0 if zero vectors).
|
|
17
|
+
"""
|
|
18
|
+
a = np.array(v1)
|
|
19
|
+
b = np.array(v2)
|
|
20
|
+
dot = np.dot(a, b)
|
|
21
|
+
norm_a = np.linalg.norm(a)
|
|
22
|
+
norm_b = np.linalg.norm(b)
|
|
23
|
+
if norm_a == 0 or norm_b == 0:
|
|
24
|
+
return 0.0
|
|
25
|
+
return float(dot / (norm_a * norm_b))
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def calculate_hybrid_score(
|
|
29
|
+
semantic_sim: float,
|
|
30
|
+
created_at: datetime,
|
|
31
|
+
importance: float,
|
|
32
|
+
recency_weight: float = 0.5,
|
|
33
|
+
decay_rate: float = 0.01,
|
|
34
|
+
importance_weight: float = 0.3,
|
|
35
|
+
) -> float:
|
|
36
|
+
"""Computes a hybrid retrieval score combining relevance, recency, and importance.
|
|
37
|
+
|
|
38
|
+
Implements the standard Generative Agents memory retrieval algorithm:
|
|
39
|
+
Score = (1.0 - recency_weight - importance_weight) * Similarity
|
|
40
|
+
+ recency_weight * RecencyScore
|
|
41
|
+
+ importance_weight * ImportanceScore
|
|
42
|
+
|
|
43
|
+
RecencyScore = e^(-decay_rate * age_in_hours)
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
semantic_sim (float): Cosine similarity value between 0.0 and 1.0.
|
|
47
|
+
created_at (datetime): Timestamp when the memory was stored.
|
|
48
|
+
importance (float): Floating-point score between 0.0 and 1.0.
|
|
49
|
+
recency_weight (float, optional): Score weighting for recency.
|
|
50
|
+
Defaults to 0.5.
|
|
51
|
+
decay_rate (float, optional): Exponential decay rate per hour.
|
|
52
|
+
Defaults to 0.01.
|
|
53
|
+
importance_weight (float, optional): Score weighting for importance.
|
|
54
|
+
Defaults to 0.3.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
float: Normalized hybrid retrieval score.
|
|
58
|
+
"""
|
|
59
|
+
# Calculate age in hours
|
|
60
|
+
now_utc = datetime.now(timezone.utc).replace(tzinfo=None)
|
|
61
|
+
age_seconds = (now_utc - created_at).total_seconds()
|
|
62
|
+
age_hours = max(0.0, age_seconds / 3600.0)
|
|
63
|
+
|
|
64
|
+
# Exponential decay for recency
|
|
65
|
+
recency_score = math.exp(-decay_rate * age_hours)
|
|
66
|
+
|
|
67
|
+
# Combined score
|
|
68
|
+
score = (
|
|
69
|
+
(1.0 - recency_weight - importance_weight) * semantic_sim
|
|
70
|
+
+ recency_weight * recency_score
|
|
71
|
+
+ importance_weight * importance
|
|
72
|
+
)
|
|
73
|
+
return score
|