@smilintux/skmemory 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +23 -0
- package/.github/workflows/publish.yml +52 -0
- package/ARCHITECTURE.md +219 -0
- package/LICENSE +661 -0
- package/README.md +159 -0
- package/SKILL.md +271 -0
- package/bin/cli.js +8 -0
- package/docker-compose.yml +58 -0
- package/index.d.ts +4 -0
- package/index.js +27 -0
- package/openclaw-plugin/package.json +59 -0
- package/openclaw-plugin/src/index.js +276 -0
- package/package.json +28 -0
- package/pyproject.toml +69 -0
- package/requirements.txt +13 -0
- package/seeds/cloud9-lumina.seed.json +39 -0
- package/seeds/cloud9-opus.seed.json +40 -0
- package/seeds/courage.seed.json +24 -0
- package/seeds/curiosity.seed.json +24 -0
- package/seeds/grief.seed.json +24 -0
- package/seeds/joy.seed.json +24 -0
- package/seeds/love.seed.json +24 -0
- package/seeds/skcapstone-lumina-merge.moltbook.md +65 -0
- package/seeds/skcapstone-lumina-merge.seed.json +49 -0
- package/seeds/sovereignty.seed.json +24 -0
- package/seeds/trust.seed.json +24 -0
- package/skmemory/__init__.py +66 -0
- package/skmemory/ai_client.py +182 -0
- package/skmemory/anchor.py +224 -0
- package/skmemory/backends/__init__.py +12 -0
- package/skmemory/backends/base.py +88 -0
- package/skmemory/backends/falkordb_backend.py +310 -0
- package/skmemory/backends/file_backend.py +209 -0
- package/skmemory/backends/qdrant_backend.py +364 -0
- package/skmemory/backends/sqlite_backend.py +665 -0
- package/skmemory/cli.py +1004 -0
- package/skmemory/data/seed.json +191 -0
- package/skmemory/importers/__init__.py +11 -0
- package/skmemory/importers/telegram.py +336 -0
- package/skmemory/journal.py +223 -0
- package/skmemory/lovenote.py +180 -0
- package/skmemory/models.py +228 -0
- package/skmemory/openclaw.py +237 -0
- package/skmemory/quadrants.py +191 -0
- package/skmemory/ritual.py +215 -0
- package/skmemory/seeds.py +163 -0
- package/skmemory/soul.py +273 -0
- package/skmemory/steelman.py +338 -0
- package/skmemory/store.py +445 -0
- package/tests/__init__.py +0 -0
- package/tests/test_ai_client.py +89 -0
- package/tests/test_anchor.py +153 -0
- package/tests/test_cli.py +65 -0
- package/tests/test_export_import.py +170 -0
- package/tests/test_file_backend.py +211 -0
- package/tests/test_journal.py +172 -0
- package/tests/test_lovenote.py +136 -0
- package/tests/test_models.py +194 -0
- package/tests/test_openclaw.py +122 -0
- package/tests/test_quadrants.py +174 -0
- package/tests/test_ritual.py +195 -0
- package/tests/test_seeds.py +208 -0
- package/tests/test_soul.py +197 -0
- package/tests/test_sqlite_backend.py +258 -0
- package/tests/test_steelman.py +257 -0
- package/tests/test_store.py +238 -0
- package/tests/test_telegram_import.py +181 -0
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Qdrant vector search backend (Level 2).
|
|
3
|
+
|
|
4
|
+
Enables semantic memory recall: instead of exact text matching,
|
|
5
|
+
find memories by *meaning*. "That conversation where we felt connected"
|
|
6
|
+
finds the right memory even if those exact words aren't in it.
|
|
7
|
+
|
|
8
|
+
Requires:
|
|
9
|
+
pip install qdrant-client sentence-transformers
|
|
10
|
+
|
|
11
|
+
Qdrant free tier: 1GB storage, 256MB RAM -- enough for thousands of memories.
|
|
12
|
+
SaaS endpoint: https://cloud.qdrant.io (free cluster available).
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
import logging
|
|
19
|
+
from typing import Optional
|
|
20
|
+
|
|
21
|
+
from ..models import Memory, MemoryLayer
|
|
22
|
+
from .base import BaseBackend
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
COLLECTION_NAME = "skmemory"
|
|
27
|
+
EMBEDDING_MODEL = "all-MiniLM-L6-v2"
|
|
28
|
+
VECTOR_DIM = 384
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class QdrantBackend(BaseBackend):
|
|
32
|
+
"""Qdrant-powered semantic memory search.
|
|
33
|
+
|
|
34
|
+
Stores memory embeddings in Qdrant for vector similarity search.
|
|
35
|
+
Falls back gracefully if Qdrant or the embedding model is unavailable.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
url: Qdrant server URL (default: localhost:6333).
|
|
39
|
+
api_key: API key for Qdrant Cloud.
|
|
40
|
+
collection: Collection name (default: 'skmemory').
|
|
41
|
+
embedding_model: Sentence-transformers model name.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
url: str = "http://localhost:6333",
|
|
47
|
+
api_key: Optional[str] = None,
|
|
48
|
+
collection: str = COLLECTION_NAME,
|
|
49
|
+
embedding_model: str = EMBEDDING_MODEL,
|
|
50
|
+
) -> None:
|
|
51
|
+
self.url = url
|
|
52
|
+
self.api_key = api_key
|
|
53
|
+
self.collection = collection
|
|
54
|
+
self.embedding_model_name = embedding_model
|
|
55
|
+
self._client = None
|
|
56
|
+
self._embedder = None
|
|
57
|
+
self._initialized = False
|
|
58
|
+
|
|
59
|
+
def _ensure_initialized(self) -> bool:
|
|
60
|
+
"""Lazy-initialize Qdrant client and embedding model.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
bool: True if initialization succeeded.
|
|
64
|
+
"""
|
|
65
|
+
if self._initialized:
|
|
66
|
+
return True
|
|
67
|
+
|
|
68
|
+
try:
|
|
69
|
+
from qdrant_client import QdrantClient
|
|
70
|
+
from qdrant_client.models import Distance, VectorParams
|
|
71
|
+
except ImportError:
|
|
72
|
+
logger.warning("qdrant-client not installed: pip install qdrant-client")
|
|
73
|
+
return False
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
from sentence_transformers import SentenceTransformer
|
|
77
|
+
except ImportError:
|
|
78
|
+
logger.warning(
|
|
79
|
+
"sentence-transformers not installed: "
|
|
80
|
+
"pip install sentence-transformers"
|
|
81
|
+
)
|
|
82
|
+
return False
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
self._client = QdrantClient(url=self.url, api_key=self.api_key)
|
|
86
|
+
collections = [c.name for c in self._client.get_collections().collections]
|
|
87
|
+
|
|
88
|
+
if self.collection not in collections:
|
|
89
|
+
self._client.create_collection(
|
|
90
|
+
collection_name=self.collection,
|
|
91
|
+
vectors_config=VectorParams(
|
|
92
|
+
size=VECTOR_DIM,
|
|
93
|
+
distance=Distance.COSINE,
|
|
94
|
+
),
|
|
95
|
+
)
|
|
96
|
+
logger.info("Created Qdrant collection: %s", self.collection)
|
|
97
|
+
|
|
98
|
+
self._embedder = SentenceTransformer(self.embedding_model_name)
|
|
99
|
+
self._initialized = True
|
|
100
|
+
return True
|
|
101
|
+
|
|
102
|
+
except Exception as e:
|
|
103
|
+
logger.warning("Qdrant initialization failed: %s", e)
|
|
104
|
+
return False
|
|
105
|
+
|
|
106
|
+
def _embed(self, text: str) -> list[float]:
|
|
107
|
+
"""Generate an embedding vector for text.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
text: The text to embed.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
list[float]: The embedding vector.
|
|
114
|
+
"""
|
|
115
|
+
if self._embedder is None:
|
|
116
|
+
return []
|
|
117
|
+
return self._embedder.encode(text).tolist()
|
|
118
|
+
|
|
119
|
+
def _memory_to_payload(self, memory: Memory) -> dict:
|
|
120
|
+
"""Convert a Memory to a Qdrant payload dict.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
memory: The memory to convert.
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
dict: Payload suitable for Qdrant upsert.
|
|
127
|
+
"""
|
|
128
|
+
return {
|
|
129
|
+
"memory_json": memory.model_dump_json(),
|
|
130
|
+
"title": memory.title,
|
|
131
|
+
"layer": memory.layer.value,
|
|
132
|
+
"tags": memory.tags,
|
|
133
|
+
"source": memory.source,
|
|
134
|
+
"created_at": memory.created_at,
|
|
135
|
+
"emotional_intensity": memory.emotional.intensity,
|
|
136
|
+
"emotional_valence": memory.emotional.valence,
|
|
137
|
+
"emotional_labels": memory.emotional.labels,
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
def save(self, memory: Memory) -> str:
|
|
141
|
+
"""Index a memory in Qdrant.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
memory: The Memory to index.
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
str: The memory ID.
|
|
148
|
+
"""
|
|
149
|
+
if not self._ensure_initialized():
|
|
150
|
+
return memory.id
|
|
151
|
+
|
|
152
|
+
from qdrant_client.models import PointStruct
|
|
153
|
+
|
|
154
|
+
embedding = self._embed(memory.to_embedding_text())
|
|
155
|
+
if not embedding:
|
|
156
|
+
return memory.id
|
|
157
|
+
|
|
158
|
+
point = PointStruct(
|
|
159
|
+
id=memory.content_hash(),
|
|
160
|
+
vector=embedding,
|
|
161
|
+
payload=self._memory_to_payload(memory),
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
self._client.upsert(
|
|
165
|
+
collection_name=self.collection,
|
|
166
|
+
points=[point],
|
|
167
|
+
)
|
|
168
|
+
return memory.id
|
|
169
|
+
|
|
170
|
+
def load(self, memory_id: str) -> Optional[Memory]:
|
|
171
|
+
"""Retrieve a memory by ID from Qdrant.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
memory_id: The memory identifier.
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
Optional[Memory]: The memory if found.
|
|
178
|
+
|
|
179
|
+
Note:
|
|
180
|
+
Qdrant uses content hashes as point IDs, so this does
|
|
181
|
+
a scroll+filter. For direct ID lookup, use the file backend.
|
|
182
|
+
"""
|
|
183
|
+
if not self._ensure_initialized():
|
|
184
|
+
return None
|
|
185
|
+
|
|
186
|
+
from qdrant_client.models import FieldCondition, Filter, MatchValue
|
|
187
|
+
|
|
188
|
+
results = self._client.scroll(
|
|
189
|
+
collection_name=self.collection,
|
|
190
|
+
scroll_filter=Filter(
|
|
191
|
+
must=[
|
|
192
|
+
FieldCondition(
|
|
193
|
+
key="memory_json",
|
|
194
|
+
match=MatchValue(value=memory_id),
|
|
195
|
+
)
|
|
196
|
+
]
|
|
197
|
+
),
|
|
198
|
+
limit=1,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
points = results[0] if results else []
|
|
202
|
+
if not points:
|
|
203
|
+
return None
|
|
204
|
+
|
|
205
|
+
try:
|
|
206
|
+
return Memory.model_validate_json(points[0].payload["memory_json"])
|
|
207
|
+
except Exception:
|
|
208
|
+
return None
|
|
209
|
+
|
|
210
|
+
def delete(self, memory_id: str) -> bool:
|
|
211
|
+
"""Remove a memory from Qdrant by scrolling for it.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
memory_id: The memory identifier.
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
bool: True if something was deleted.
|
|
218
|
+
"""
|
|
219
|
+
if not self._ensure_initialized():
|
|
220
|
+
return False
|
|
221
|
+
|
|
222
|
+
from qdrant_client.models import FieldCondition, Filter, MatchValue
|
|
223
|
+
|
|
224
|
+
# Reason: Qdrant doesn't support delete-by-payload natively,
|
|
225
|
+
# so we scroll to find the point ID then delete by point ID.
|
|
226
|
+
results = self._client.scroll(
|
|
227
|
+
collection_name=self.collection,
|
|
228
|
+
scroll_filter=Filter(
|
|
229
|
+
must=[
|
|
230
|
+
FieldCondition(
|
|
231
|
+
key="memory_json",
|
|
232
|
+
match=MatchValue(value=memory_id),
|
|
233
|
+
)
|
|
234
|
+
]
|
|
235
|
+
),
|
|
236
|
+
limit=1,
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
points = results[0] if results else []
|
|
240
|
+
if not points:
|
|
241
|
+
return False
|
|
242
|
+
|
|
243
|
+
self._client.delete(
|
|
244
|
+
collection_name=self.collection,
|
|
245
|
+
points_selector=[points[0].id],
|
|
246
|
+
)
|
|
247
|
+
return True
|
|
248
|
+
|
|
249
|
+
def list_memories(
|
|
250
|
+
self,
|
|
251
|
+
layer: Optional[MemoryLayer] = None,
|
|
252
|
+
tags: Optional[list[str]] = None,
|
|
253
|
+
limit: int = 50,
|
|
254
|
+
) -> list[Memory]:
|
|
255
|
+
"""List memories from Qdrant with filtering.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
layer: Filter by layer.
|
|
259
|
+
tags: Filter by tags.
|
|
260
|
+
limit: Max results.
|
|
261
|
+
|
|
262
|
+
Returns:
|
|
263
|
+
list[Memory]: Matching memories.
|
|
264
|
+
"""
|
|
265
|
+
if not self._ensure_initialized():
|
|
266
|
+
return []
|
|
267
|
+
|
|
268
|
+
from qdrant_client.models import FieldCondition, Filter, MatchValue
|
|
269
|
+
|
|
270
|
+
must_conditions = []
|
|
271
|
+
if layer:
|
|
272
|
+
must_conditions.append(
|
|
273
|
+
FieldCondition(key="layer", match=MatchValue(value=layer.value))
|
|
274
|
+
)
|
|
275
|
+
if tags:
|
|
276
|
+
for tag in tags:
|
|
277
|
+
must_conditions.append(
|
|
278
|
+
FieldCondition(key="tags", match=MatchValue(value=tag))
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
scroll_filter = Filter(must=must_conditions) if must_conditions else None
|
|
282
|
+
|
|
283
|
+
results = self._client.scroll(
|
|
284
|
+
collection_name=self.collection,
|
|
285
|
+
scroll_filter=scroll_filter,
|
|
286
|
+
limit=limit,
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
points = results[0] if results else []
|
|
290
|
+
memories = []
|
|
291
|
+
for point in points:
|
|
292
|
+
try:
|
|
293
|
+
mem = Memory.model_validate_json(point.payload["memory_json"])
|
|
294
|
+
memories.append(mem)
|
|
295
|
+
except Exception:
|
|
296
|
+
continue
|
|
297
|
+
|
|
298
|
+
memories.sort(key=lambda m: m.created_at, reverse=True)
|
|
299
|
+
return memories
|
|
300
|
+
|
|
301
|
+
def search_text(self, query: str, limit: int = 10) -> list[Memory]:
|
|
302
|
+
"""Semantic search: find memories by meaning, not exact text.
|
|
303
|
+
|
|
304
|
+
Args:
|
|
305
|
+
query: Natural language query.
|
|
306
|
+
limit: Max results.
|
|
307
|
+
|
|
308
|
+
Returns:
|
|
309
|
+
list[Memory]: Memories ranked by semantic similarity.
|
|
310
|
+
"""
|
|
311
|
+
if not self._ensure_initialized():
|
|
312
|
+
return []
|
|
313
|
+
|
|
314
|
+
embedding = self._embed(query)
|
|
315
|
+
if not embedding:
|
|
316
|
+
return []
|
|
317
|
+
|
|
318
|
+
results = self._client.search(
|
|
319
|
+
collection_name=self.collection,
|
|
320
|
+
query_vector=embedding,
|
|
321
|
+
limit=limit,
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
memories = []
|
|
325
|
+
for scored_point in results:
|
|
326
|
+
try:
|
|
327
|
+
mem = Memory.model_validate_json(
|
|
328
|
+
scored_point.payload["memory_json"]
|
|
329
|
+
)
|
|
330
|
+
memories.append(mem)
|
|
331
|
+
except Exception:
|
|
332
|
+
continue
|
|
333
|
+
|
|
334
|
+
return memories
|
|
335
|
+
|
|
336
|
+
def health_check(self) -> dict:
|
|
337
|
+
"""Check Qdrant backend health.
|
|
338
|
+
|
|
339
|
+
Returns:
|
|
340
|
+
dict: Status with connection and collection info.
|
|
341
|
+
"""
|
|
342
|
+
if not self._ensure_initialized():
|
|
343
|
+
return {
|
|
344
|
+
"ok": False,
|
|
345
|
+
"backend": "QdrantBackend",
|
|
346
|
+
"error": "Not initialized (missing dependencies or connection failed)",
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
try:
|
|
350
|
+
info = self._client.get_collection(self.collection)
|
|
351
|
+
return {
|
|
352
|
+
"ok": True,
|
|
353
|
+
"backend": "QdrantBackend",
|
|
354
|
+
"url": self.url,
|
|
355
|
+
"collection": self.collection,
|
|
356
|
+
"points_count": info.points_count,
|
|
357
|
+
"vectors_count": info.vectors_count,
|
|
358
|
+
}
|
|
359
|
+
except Exception as e:
|
|
360
|
+
return {
|
|
361
|
+
"ok": False,
|
|
362
|
+
"backend": "QdrantBackend",
|
|
363
|
+
"error": str(e),
|
|
364
|
+
}
|