openvector_dev 0.1.3__tar.gz → 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openvector_dev-0.1.3 → openvector_dev-0.1.5}/PKG-INFO +2 -1
- {openvector_dev-0.1.3 → openvector_dev-0.1.5}/pyproject.toml +2 -1
- openvector_dev-0.1.5/src/lein_vector/__init__.py +17 -0
- openvector_dev-0.1.5/src/lein_vector/api/facade.py +227 -0
- openvector_dev-0.1.5/src/lein_vector/bases/embeding_provider_abc.py +9 -0
- openvector_dev-0.1.5/src/lein_vector/bases/memory_manager_abc.py +31 -0
- openvector_dev-0.1.5/src/lein_vector/memory_manager_qdrant.py +155 -0
- {openvector_dev-0.1.3 → openvector_dev-0.1.5}/src/lein_vector/memory_manager_ram.py +31 -25
- {openvector_dev-0.1.3 → openvector_dev-0.1.5}/src/lein_vector/qdrant_adapter.py +47 -24
- openvector_dev-0.1.5/src/lein_vector/redis_short_term.py +92 -0
- {openvector_dev-0.1.3 → openvector_dev-0.1.5}/src/lein_vector/schemas/chunk.py +16 -10
- {openvector_dev-0.1.3 → openvector_dev-0.1.5}/src/lein_vector/sentence_transformer.py +0 -15
- {openvector_dev-0.1.3 → openvector_dev-0.1.5}/src/lein_vector/short_term.py +16 -12
- openvector_dev-0.1.3/src/lein_vector/__init__.py +0 -11
- openvector_dev-0.1.3/src/lein_vector/api/facade.py +0 -142
- openvector_dev-0.1.3/src/lein_vector/bases/embeding_provider_abc.py +0 -11
- openvector_dev-0.1.3/src/lein_vector/bases/memory_manager_abc.py +0 -33
- openvector_dev-0.1.3/src/lein_vector/memory_manager_qdrant.py +0 -100
- {openvector_dev-0.1.3 → openvector_dev-0.1.5}/README.md +0 -0
- {openvector_dev-0.1.3 → openvector_dev-0.1.5}/src/lein_vector/api/__init__.py +0 -0
- {openvector_dev-0.1.3 → openvector_dev-0.1.5}/src/lein_vector/bases/__init__.py +0 -0
- {openvector_dev-0.1.3 → openvector_dev-0.1.5}/src/lein_vector/schemas/__init__.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: openvector_dev
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.5
|
4
4
|
Summary:
|
5
5
|
Author: p00ler
|
6
6
|
Author-email: liveitspain@gmail.com
|
@@ -12,6 +12,7 @@ Classifier: Programming Language :: Python :: 3.13
|
|
12
12
|
Requires-Dist: aiohttp (>=3.12.13,<4.0.0)
|
13
13
|
Requires-Dist: google-genai[aiohttp] (>=1.21.1,<2.0.0)
|
14
14
|
Requires-Dist: qdrant-client (>=1.14.3,<2.0.0)
|
15
|
+
Requires-Dist: redis (>=6.2.0,<7.0.0)
|
15
16
|
Description-Content-Type: text/markdown
|
16
17
|
|
17
18
|
# Persona-Memory Subsystem
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "openvector_dev"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.5"
|
4
4
|
description = ""
|
5
5
|
authors = [
|
6
6
|
{name = "p00ler",email = "liveitspain@gmail.com"}
|
@@ -11,6 +11,7 @@ dependencies = [
|
|
11
11
|
"qdrant-client (>=1.14.3,<2.0.0)",
|
12
12
|
"aiohttp (>=3.12.13,<4.0.0)",
|
13
13
|
"google-genai[aiohttp] (>=1.21.1,<2.0.0)",
|
14
|
+
"redis (>=6.2.0,<7.0.0)",
|
14
15
|
]
|
15
16
|
|
16
17
|
|
@@ -0,0 +1,17 @@
|
|
1
|
+
from . import api, bases, schemas
|
2
|
+
from .memory_manager_qdrant import MemoryManagerQdrant
|
3
|
+
from .qdrant_adapter import QdrantAdapter
|
4
|
+
from .redis_short_term import RedisShortTermMemory
|
5
|
+
from .sentence_transformer import EmbeddingProviderGemini as EmbeddingProvider
|
6
|
+
from .short_term import ShortTermMemory
|
7
|
+
|
8
|
+
__all__ = [
|
9
|
+
"EmbeddingProvider",
|
10
|
+
"MemoryManagerQdrant",
|
11
|
+
"QdrantAdapter",
|
12
|
+
"RedisShortTermMemory",
|
13
|
+
"ShortTermMemory",
|
14
|
+
"api",
|
15
|
+
"bases",
|
16
|
+
"schemas",
|
17
|
+
]
|
@@ -0,0 +1,227 @@
|
|
1
|
+
from collections.abc import Sequence
|
2
|
+
from datetime import UTC, datetime
|
3
|
+
from uuid import uuid4
|
4
|
+
|
5
|
+
import redis.asyncio as aioredis
|
6
|
+
|
7
|
+
from lein_vector import MemoryManagerQdrant, QdrantAdapter, RedisShortTermMemory
|
8
|
+
from lein_vector.schemas.chunk import Chunk, ChunkPayload
|
9
|
+
from lein_vector.sentence_transformer import EmbeddingProviderGemini
|
10
|
+
|
11
|
+
|
12
|
+
class Memory:
|
13
|
+
def __init__(self, short_term, memory_manager, embedder, merge_n: int = 5):
|
14
|
+
"""
|
15
|
+
:param short_term: Кратковременное хранилище сообщений пользователя (RedisShortTermMemory).
|
16
|
+
:param memory_manager: Менеджер долговременной памяти (MemoryManagerQdrant).
|
17
|
+
:param embedder: Провайдер эмбеддингов (EmbeddingProviderGemini).
|
18
|
+
:param merge_n: Количество чанков для слияния при мердже.
|
19
|
+
"""
|
20
|
+
self.short = short_term
|
21
|
+
self.long = memory_manager
|
22
|
+
self.embed = embedder
|
23
|
+
self._msg_no: dict[int, int] = {}
|
24
|
+
self.merge_n = merge_n
|
25
|
+
|
26
|
+
@classmethod
|
27
|
+
async def from_qdrant(
|
28
|
+
cls,
|
29
|
+
host: str,
|
30
|
+
port: int,
|
31
|
+
collection: str,
|
32
|
+
vector_size: int = 768,
|
33
|
+
api_key: str | None = None,
|
34
|
+
short_maxlen: int = 20,
|
35
|
+
) -> "Memory":
|
36
|
+
"""
|
37
|
+
Создаёт MemoryFacade со всеми зависимостями
|
38
|
+
:param host: Адрес Qdrant.
|
39
|
+
:param port: Порт Qdrant.
|
40
|
+
:param collection: Название коллекции Qdrant.
|
41
|
+
:param vector_size: Размерность векторного пространства.
|
42
|
+
:param api_key: Ключ для эмбеддера (если требуется).
|
43
|
+
:param short_maxlen: Максимальная длина окна кратковременной памяти.
|
44
|
+
:returns: Экземпляр Memory с инициализированными зависимостями.
|
45
|
+
"""
|
46
|
+
_redis = aioredis.from_url("redis://localhost:6379", decode_responses=True)
|
47
|
+
short_mem = RedisShortTermMemory(_redis, maxlen=short_maxlen)
|
48
|
+
embedder = EmbeddingProviderGemini(api_key=api_key)
|
49
|
+
adapter = QdrantAdapter(host, port, collection, vector_size)
|
50
|
+
await adapter.init_collection()
|
51
|
+
long_mem = MemoryManagerQdrant(adapter, embedder)
|
52
|
+
return cls(short_mem, long_mem, embedder)
|
53
|
+
|
54
|
+
async def step_user(
|
55
|
+
self, *, bot: str, user_id: int, user_msg: str, topk: int = 3, history_n: int = 20
|
56
|
+
):
|
57
|
+
"""
|
58
|
+
:param bot: Идентификатор бота.
|
59
|
+
:param user_id: Идентификатор пользователя.
|
60
|
+
:param user_msg: Текст сообщения пользователя.
|
61
|
+
:param topk: Количество релевантных чанков из долговременной памяти.
|
62
|
+
:param history_n: Размер окна кратковременной памяти.
|
63
|
+
:returns: Словарь с short_term и long_term сообщениями.
|
64
|
+
"""
|
65
|
+
key = (user_id, bot)
|
66
|
+
curr_no = self._msg_no.get(key, 0) + 1
|
67
|
+
self._msg_no[key] = curr_no
|
68
|
+
await self.short.add(
|
69
|
+
bot=bot,
|
70
|
+
user_id=user_id,
|
71
|
+
role="user",
|
72
|
+
text=user_msg,
|
73
|
+
extra={"msg_no": curr_no},
|
74
|
+
)
|
75
|
+
embedding = await self.embed.get_embedding(user_msg)
|
76
|
+
long_memories = await self.long.retrieve_by_embedding(
|
77
|
+
user_id=user_id,
|
78
|
+
embedding=embedding,
|
79
|
+
topk=topk,
|
80
|
+
filter_={"bot": bot},
|
81
|
+
score_threshold=0.7
|
82
|
+
)
|
83
|
+
short_ctx = await self.short.window(user_id, history_n)
|
84
|
+
return {"short_term": short_ctx, "long_term": long_memories}
|
85
|
+
|
86
|
+
async def step_user_oai(
|
87
|
+
self,
|
88
|
+
user_id: int,
|
89
|
+
user_msg: str,
|
90
|
+
bot: str,
|
91
|
+
*,
|
92
|
+
topk: int = 3,
|
93
|
+
history_n: int = 20,
|
94
|
+
) -> dict:
|
95
|
+
"""
|
96
|
+
:param user_id: Идентификатор пользователя.
|
97
|
+
:param user_msg: Текст сообщения пользователя.
|
98
|
+
:param bot: Идентификатор бота.
|
99
|
+
:param topk: Количество релевантных чанков из долговременной памяти.
|
100
|
+
:param history_n: Размер окна кратковременной памяти.
|
101
|
+
:returns: Данные short_term и long_term в формате OpenAI-совместимого вывода.
|
102
|
+
"""
|
103
|
+
data = await self.step_user(user_id, bot, user_msg, topk=topk, history_n=history_n)
|
104
|
+
data["short_term"] = self._to_openai(data["short_term"])
|
105
|
+
data["long_term"] = self._chunk_texts(data["long_term"])
|
106
|
+
return data
|
107
|
+
|
108
|
+
@staticmethod
|
109
|
+
def _to_openai(msgs: list[dict]) -> list[dict]:
|
110
|
+
"""
|
111
|
+
:param msgs: Список сообщений внутреннего формата.
|
112
|
+
:returns: Список сообщений в формате OpenAI (role, content).
|
113
|
+
"""
|
114
|
+
role_map = {"gf": "assistant"} # «gf» → OpenAI «assistant»
|
115
|
+
return [
|
116
|
+
{"role": role_map.get(m["role"], m["role"]), "content": m["text"]}
|
117
|
+
for m in msgs
|
118
|
+
]
|
119
|
+
|
120
|
+
async def step_gf(
|
121
|
+
self,
|
122
|
+
user_id: int,
|
123
|
+
gf_msg: str,
|
124
|
+
bot: str,
|
125
|
+
*,
|
126
|
+
block_size: int = 4,
|
127
|
+
save_pair: bool = True,
|
128
|
+
):
|
129
|
+
"""
|
130
|
+
:param user_id: Идентификатор пользователя.
|
131
|
+
:param gf_msg: Сообщение от gf (assistant).
|
132
|
+
:param bot: Идентификатор бота.
|
133
|
+
:param block_size: Размер блока для сохранения в долговременной памяти.
|
134
|
+
:param save_pair: Флаг, сохранять ли пару сообщений при достижении block_size.
|
135
|
+
"""
|
136
|
+
key = (user_id, bot)
|
137
|
+
curr_no = self._msg_no.get(key, 0) + 1
|
138
|
+
self._msg_no[key] = curr_no
|
139
|
+
ts = datetime.now(UTC).timestamp()
|
140
|
+
await self.short.add(
|
141
|
+
bot=bot,
|
142
|
+
user_id=user_id,
|
143
|
+
role="gf",
|
144
|
+
text=str(gf_msg),
|
145
|
+
extra={"msg_no": curr_no, "ts": ts},
|
146
|
+
)
|
147
|
+
|
148
|
+
if save_pair and curr_no % block_size == 0:
|
149
|
+
last_block = await self.short.window(user_id, block_size)
|
150
|
+
|
151
|
+
block_text = "\n".join(f'{m["role"]}: {m["text"]}' for m in last_block)
|
152
|
+
|
153
|
+
vector = await self.embed.get_embedding(block_text)
|
154
|
+
|
155
|
+
new_chunk = Chunk(
|
156
|
+
chunk_id=uuid4(),
|
157
|
+
bot=bot,
|
158
|
+
user_id=user_id,
|
159
|
+
chunk_type="type0",
|
160
|
+
created_at=datetime.now(UTC),
|
161
|
+
last_hit=datetime.now(UTC),
|
162
|
+
hit_count=0,
|
163
|
+
text=block_text,
|
164
|
+
persistent=False,
|
165
|
+
extra={"msg_no": curr_no},
|
166
|
+
)
|
167
|
+
await self.long.upsert_chunk_with_vector(new_chunk, vector)
|
168
|
+
# self.short.clear_until(block_size)
|
169
|
+
|
170
|
+
if curr_no % 40 == 0:
|
171
|
+
await self.long.merge_old_chunks(user_id, "type0", n=self.merge_n)
|
172
|
+
|
173
|
+
async def _get_short_term(self, n=10) -> list:
|
174
|
+
"""
|
175
|
+
:param n: Количество последних сообщений кратковременной памяти.
|
176
|
+
:returns: Список сообщений.
|
177
|
+
"""
|
178
|
+
return await self.short.window(n)
|
179
|
+
|
180
|
+
async def _get_long_term(
|
181
|
+
self, user_id: int, embedding: list[float], topk: int = 3
|
182
|
+
) -> list:
|
183
|
+
"""
|
184
|
+
:param user_id: Идентификатор пользователя.
|
185
|
+
:param embedding: Вектор эмбеддинга для поиска.
|
186
|
+
:param topk: Количество возвращаемых чанков.
|
187
|
+
:returns: Список чанков.
|
188
|
+
"""
|
189
|
+
return await self.long.retrieve_by_embedding(user_id, embedding, topk)
|
190
|
+
|
191
|
+
async def get_memories(self, user_id: int, search_terms: list[str], topk: int = 3) -> list:
|
192
|
+
"""
|
193
|
+
:param user_id: Идентификатор пользователя.
|
194
|
+
:param search_terms: Список поисковых запросов (строк).
|
195
|
+
:param topk: Количество возвращаемых чанков на запрос.
|
196
|
+
:returns: Список релевантных чанков.
|
197
|
+
"""
|
198
|
+
search_terms = [await self.embed.get_embedding(term) for term in search_terms]
|
199
|
+
return await self.long.retrieve_by_embeddings(user_id, search_terms, topk)
|
200
|
+
|
201
|
+
async def _add_to_short(self, role: str, text: str) -> None:
|
202
|
+
"""
|
203
|
+
:param role: Роль сообщения.
|
204
|
+
:param text: Текст сообщения.
|
205
|
+
"""
|
206
|
+
await self.short.add(role, text)
|
207
|
+
|
208
|
+
async def _add_to_long(self, user_id: int, chunk: Chunk) -> None:
|
209
|
+
"""
|
210
|
+
:param user_id: Идентификатор пользователя.
|
211
|
+
:param chunk: Экземпляр Chunk для сохранения.
|
212
|
+
"""
|
213
|
+
await self.long.upsert_chunk(user_id, chunk)
|
214
|
+
|
215
|
+
async def delete_memory(self, user_id: int) -> None:
|
216
|
+
"""
|
217
|
+
:param user_id: Идентификатор пользователя.
|
218
|
+
"""
|
219
|
+
await self.long.delete_all(user_id)
|
220
|
+
|
221
|
+
@staticmethod
|
222
|
+
def _chunk_texts(chunks: Sequence[Chunk | ChunkPayload]) -> list[str]:
|
223
|
+
"""
|
224
|
+
:param chunks: Последовательность Chunk или ChunkPayload.
|
225
|
+
:returns: Список текстов из чанков.
|
226
|
+
"""
|
227
|
+
return [c.text for c in chunks]
|
@@ -0,0 +1,31 @@
|
|
1
|
+
from uuid import UUID
|
2
|
+
from lein_vector.schemas.chunk import Chunk
|
3
|
+
|
4
|
+
class MemoryManagerABC:
|
5
|
+
async def upsert_chunk(self, user_id: int, bot: str, chunk: Chunk) -> None: ...
|
6
|
+
async def upsert_chunks(self, user_id: int, bot: str, chunks: list[Chunk]) -> None: ...
|
7
|
+
async def retrieve_by_embedding(
|
8
|
+
self, user_id: int, bot: str, embedding: list[float], topk: int = 3
|
9
|
+
) -> list[Chunk]: ...
|
10
|
+
async def retrieve_by_embedding_batch(
|
11
|
+
self, user_id: int, bot: str, embeddings: list[list[float]], topk: int = 3
|
12
|
+
) -> list[list[Chunk]]:
|
13
|
+
raise NotImplementedError("Not implemented in this backend")
|
14
|
+
|
15
|
+
async def retrieve_by_type(
|
16
|
+
self, user_id: int, bot: str, chunk_type: str, topk: int = 3
|
17
|
+
) -> list[Chunk]: ...
|
18
|
+
async def retrieve_by_text(
|
19
|
+
self, user_id: int, bot: str, query: str, topk: int = 3
|
20
|
+
) -> list[Chunk]: ...
|
21
|
+
async def merge_old_chunks(self, user_id: int, bot: str, chunk_type: str) -> None: ...
|
22
|
+
async def archive_user(self, user_id: int, bot: str) -> None: ...
|
23
|
+
async def restore_user(self, user_id: int, bot: str) -> None: ...
|
24
|
+
async def increment_hit(self, user_id: int, bot: str, chunk_id: UUID) -> None: ...
|
25
|
+
async def pop_first_n(
|
26
|
+
self, user_id: int, bot: str, chunk_type: str, n: int
|
27
|
+
) -> list[Chunk]: ...
|
28
|
+
async def delete_oldest_nonpersistent(self, user_id: int, bot: str, keep: int) -> None: ...
|
29
|
+
async def delete_chunk(self, user_id: int, bot: str, chunk_id: UUID) -> None: ...
|
30
|
+
async def delete_chunks(self, user_id: int, bot: str, chunk_ids: list[UUID]) -> None: ...
|
31
|
+
async def delete_all(self, user_id: int, bot: str) -> None: ...
|
@@ -0,0 +1,155 @@
|
|
1
|
+
import asyncio
|
2
|
+
from datetime import UTC, datetime
|
3
|
+
from typing import Any
|
4
|
+
from uuid import UUID
|
5
|
+
|
6
|
+
from lein_vector.bases.memory_manager_abc import MemoryManagerABC
|
7
|
+
from lein_vector.schemas.chunk import Chunk, ChunkPayload
|
8
|
+
|
9
|
+
|
10
|
+
class MemoryManagerQdrant(MemoryManagerABC):
|
11
|
+
def __init__(self, qdrant_adapter, embedding_provider, archive_storage=None):
|
12
|
+
self.qdrant = qdrant_adapter
|
13
|
+
self.embed = embedding_provider
|
14
|
+
self.archive = archive_storage # твой модуль S3/minio (интерфейс: save(user_id, List[ChunkPayload]), load(user_id) -> List[ChunkPayload])
|
15
|
+
|
16
|
+
async def upsert_chunk(self, user_id: int, bot: str, chunk: Chunk) -> None:
|
17
|
+
assert chunk.bot == bot
|
18
|
+
embedding = await self.embed.get_embedding(chunk.text)
|
19
|
+
await self.qdrant.upsert(chunk.chunk_id, embedding, chunk.to_payload())
|
20
|
+
|
21
|
+
async def upsert_chunk_with_vector(
|
22
|
+
self, chunk: Chunk, embedding: list[float]
|
23
|
+
) -> None:
|
24
|
+
await self.qdrant.upsert(chunk.chunk_id, embedding, chunk.to_payload())
|
25
|
+
|
26
|
+
async def upsert_chunks(self, user_id: int, bot: str, chunks: list[Chunk]) -> None:
|
27
|
+
for c in chunks:
|
28
|
+
if c.bot != bot:
|
29
|
+
raise ValueError(f"chunk.bot ({c.bot}) != bot ({bot})")
|
30
|
+
texts = [c.text for c in chunks]
|
31
|
+
embeddings = await self.embed.get_embeddings(texts)
|
32
|
+
points = [
|
33
|
+
{"point_id": c.chunk_id, "embedding": emb, "payload": c.to_payload()}
|
34
|
+
for c, emb in zip(chunks, embeddings)
|
35
|
+
]
|
36
|
+
await self.qdrant.upsert_batch(points)
|
37
|
+
|
38
|
+
async def retrieve_by_embedding(
|
39
|
+
self,
|
40
|
+
user_id: int,
|
41
|
+
embedding: list[float],
|
42
|
+
*,
|
43
|
+
bot: str,
|
44
|
+
topk: int = 3,
|
45
|
+
filter_: dict[str, Any] = None,
|
46
|
+
score_threshold: float | None = None,
|
47
|
+
) -> list[ChunkPayload]:
|
48
|
+
q_filter = {"user_id": user_id, "bot": bot}
|
49
|
+
if filter_:
|
50
|
+
q_filter.update(filter_)
|
51
|
+
return await self.qdrant.search(embedding, q_filter, topk, score_threshold)
|
52
|
+
|
53
|
+
async def retrieve_by_embeddings(
|
54
|
+
self,
|
55
|
+
user_id: int,
|
56
|
+
embeddings: list[list[float]],
|
57
|
+
*,
|
58
|
+
bot: str,
|
59
|
+
topk: int = 3,
|
60
|
+
filter_: dict[str, Any] | None = None,
|
61
|
+
score_threshold: float | None = None,
|
62
|
+
) -> list[list[ChunkPayload]]:
|
63
|
+
"""
|
64
|
+
Возвращает список результатов для каждого Embedding.
|
65
|
+
На выходе: [[ChunkPayload, …] для emb-0, [ChunkPayload, …] для emb-1, …]
|
66
|
+
"""
|
67
|
+
q_filter = {"user_id": user_id, "bot": bot}
|
68
|
+
if filter_:
|
69
|
+
q_filter.update(filter_)
|
70
|
+
|
71
|
+
if hasattr(self.qdrant, "search_batch"):
|
72
|
+
return await self.qdrant.search_batch(
|
73
|
+
query_vectors=embeddings,
|
74
|
+
query_filter=q_filter,
|
75
|
+
topk=topk,
|
76
|
+
score_threshold=score_threshold,
|
77
|
+
)
|
78
|
+
|
79
|
+
async def _one(e):
|
80
|
+
return await self.qdrant.search(
|
81
|
+
e, q_filter, topk, score_threshold
|
82
|
+
)
|
83
|
+
|
84
|
+
return await asyncio.gather(*[_one(e) for e in embeddings])
|
85
|
+
|
86
|
+
#ToDO: filter
|
87
|
+
async def retrieve_by_type(
|
88
|
+
self, user_id: int, bot: str, chunk_type: str, topk: int = 3
|
89
|
+
) -> list[ChunkPayload]:
|
90
|
+
# Лучше использовать scroll по фильтру
|
91
|
+
filter_ = {"user_id": user_id, "bot": bot, "chunk_type": chunk_type}
|
92
|
+
return await self.qdrant.get_all_chunks_with_filter(filter_)
|
93
|
+
|
94
|
+
async def merge_old_chunks(self, user_id: int, bot: str, chunk_type: str, n: int = 5) -> None:
|
95
|
+
chunks = await self.qdrant.get_n_oldest_chunks(user_id, bot, chunk_type, n)
|
96
|
+
if len(chunks) < n:
|
97
|
+
return
|
98
|
+
merged_text = " | ".join([c.text for c in chunks])
|
99
|
+
from datetime import datetime
|
100
|
+
from uuid import uuid4
|
101
|
+
|
102
|
+
# ToDo: LLM summary
|
103
|
+
summary_chunk = Chunk(
|
104
|
+
chunk_id=uuid4(),
|
105
|
+
user_id=user_id,
|
106
|
+
bot=bot,
|
107
|
+
chunk_type=self._next_type(chunk_type),
|
108
|
+
created_at=datetime.now(UTC),
|
109
|
+
last_hit=datetime.now(UTC),
|
110
|
+
hit_count=0,
|
111
|
+
text=merged_text,
|
112
|
+
persistent=False,
|
113
|
+
summary_of=[c.chunk_id for c in chunks],
|
114
|
+
)
|
115
|
+
await self.upsert_chunk(user_id, bot, summary_chunk)
|
116
|
+
|
117
|
+
await self.delete_chunks(user_id, bot, [c.chunk_id for c in chunks])
|
118
|
+
|
119
|
+
async def archive_user(self, user_id: int, bot: str) -> None:
|
120
|
+
all_chunks = await self.qdrant.get_all_chunks(user_id, bot)
|
121
|
+
await self.archive.save(user_id, bot, all_chunks)
|
122
|
+
await self.delete_all(user_id, bot)
|
123
|
+
|
124
|
+
async def restore_user(self, user_id: int, bot: str) -> None:
|
125
|
+
chunks = await self.archive.load(user_id, bot)
|
126
|
+
await self.upsert_chunks(
|
127
|
+
user_id,
|
128
|
+
[
|
129
|
+
Chunk(**c.dict(), last_hit=datetime.now(UTC), hit_count=0, bot=bot)
|
130
|
+
for c in chunks
|
131
|
+
],
|
132
|
+
)
|
133
|
+
|
134
|
+
async def delete_chunk(self, user_id: int, bot: str, chunk_id: UUID) -> None:
|
135
|
+
await self.qdrant.delete(chunk_id)
|
136
|
+
|
137
|
+
async def delete_chunks(self, user_id: int, bot: str, chunk_ids: list[UUID]) -> None:
|
138
|
+
await self.qdrant.delete_batch(chunk_ids)
|
139
|
+
|
140
|
+
async def delete_all(self, user_id: int, bot: str) -> None:
|
141
|
+
all_chunks = await self.qdrant.get_all_chunks(user_id, bot)
|
142
|
+
await self.delete_chunks(user_id, bot, [c.chunk_id for c in all_chunks])
|
143
|
+
|
144
|
+
async def retrieve_filtered(
|
145
|
+
self, user_id: int, bot: str, filter_: dict[str, Any], topk: int = 10
|
146
|
+
) -> list[ChunkPayload]:
|
147
|
+
q_filter = {"user_id": user_id, "bot": bot}
|
148
|
+
q_filter.update(filter_)
|
149
|
+
return await self.qdrant.get_all_chunks_with_filter(q_filter, topk=topk)
|
150
|
+
|
151
|
+
@staticmethod
|
152
|
+
def _next_type(chunk_type: str) -> str:
|
153
|
+
# Логика типа next_type
|
154
|
+
mapping = {"type0": "type1", "type1": "type2"}
|
155
|
+
return mapping.get(chunk_type, "summary")
|
@@ -1,9 +1,9 @@
|
|
1
|
-
from
|
2
|
-
from uuid import UUID
|
3
|
-
|
1
|
+
from datetime import UTC, datetime
|
2
|
+
from uuid import UUID, uuid4
|
3
|
+
|
4
4
|
from lein_vector.bases.memory_manager_abc import MemoryManagerABC
|
5
|
-
from
|
6
|
-
|
5
|
+
from lein_vector.schemas.chunk import Chunk
|
6
|
+
|
7
7
|
|
8
8
|
class MemoryManagerRAM(MemoryManagerABC):
|
9
9
|
def __init__(self):
|
@@ -15,27 +15,37 @@ class MemoryManagerRAM(MemoryManagerABC):
|
|
15
15
|
self._data[user_id] = {}
|
16
16
|
self._data[user_id][chunk.chunk_id] = chunk
|
17
17
|
|
18
|
-
async def upsert_chunks(self, user_id: int, chunks:
|
18
|
+
async def upsert_chunks(self, user_id: int, chunks: list[Chunk]) -> None:
|
19
19
|
if user_id not in self._data:
|
20
20
|
self._data[user_id] = {}
|
21
21
|
for chunk in chunks:
|
22
22
|
self._data[user_id][chunk.chunk_id] = chunk
|
23
23
|
|
24
|
-
async def retrieve_by_embedding(
|
24
|
+
async def retrieve_by_embedding(
|
25
|
+
self, user_id: int, embedding: list[float], topk: int = 3
|
26
|
+
) -> list[Chunk]:
|
25
27
|
user_chunks = self._data.get(user_id, {})
|
26
|
-
sorted_chunks = sorted(
|
28
|
+
sorted_chunks = sorted(
|
29
|
+
user_chunks.values(), key=lambda c: c.created_at, reverse=True
|
30
|
+
)
|
27
31
|
return sorted_chunks[:topk]
|
28
32
|
|
29
|
-
async def retrieve_by_embedding_batch(
|
33
|
+
async def retrieve_by_embedding_batch(
|
34
|
+
self, user_id: int, embeddings: list[list[float]], topk: int = 3
|
35
|
+
) -> list[list[Chunk]]:
|
30
36
|
raise NotImplementedError("Not implemented in RAM backend")
|
31
37
|
|
32
|
-
async def retrieve_by_type(
|
38
|
+
async def retrieve_by_type(
|
39
|
+
self, user_id: int, chunk_type: str, topk: int = 3
|
40
|
+
) -> list[Chunk]:
|
33
41
|
user_chunks = self._data.get(user_id, {})
|
34
42
|
filtered = [c for c in user_chunks.values() if c.chunk_type == chunk_type]
|
35
43
|
filtered.sort(key=lambda c: c.created_at, reverse=True)
|
36
44
|
return filtered[:topk]
|
37
45
|
|
38
|
-
async def retrieve_by_text(
|
46
|
+
async def retrieve_by_text(
|
47
|
+
self, user_id: int, query: str, topk: int = 3
|
48
|
+
) -> list[Chunk]:
|
39
49
|
user_chunks = self._data.get(user_id, {})
|
40
50
|
filtered = [c for c in user_chunks.values() if query.lower() in c.text.lower()]
|
41
51
|
filtered.sort(key=lambda c: c.created_at, reverse=True)
|
@@ -43,10 +53,7 @@ class MemoryManagerRAM(MemoryManagerABC):
|
|
43
53
|
|
44
54
|
async def merge_old_chunks(self, user_id: int, chunk_type: str, n: int = 5) -> None:
|
45
55
|
user_chunks = self._data.get(user_id, {})
|
46
|
-
next_type = {
|
47
|
-
"type0": "type1",
|
48
|
-
"type1": "type2"
|
49
|
-
}.get(chunk_type)
|
56
|
+
next_type = {"type0": "type1", "type1": "type2"}.get(chunk_type)
|
50
57
|
if not next_type:
|
51
58
|
return
|
52
59
|
|
@@ -63,8 +70,8 @@ class MemoryManagerRAM(MemoryManagerABC):
|
|
63
70
|
chunk_id=uuid4(),
|
64
71
|
user_id=user_id,
|
65
72
|
chunk_type=next_type,
|
66
|
-
created_at=datetime.now(
|
67
|
-
last_hit=datetime.now(
|
73
|
+
created_at=datetime.now(UTC),
|
74
|
+
last_hit=datetime.now(UTC),
|
68
75
|
hit_count=0,
|
69
76
|
text=merged_text,
|
70
77
|
persistent=False,
|
@@ -89,10 +96,11 @@ class MemoryManagerRAM(MemoryManagerABC):
|
|
89
96
|
chunk = user_chunks.get(chunk_id)
|
90
97
|
if chunk is not None:
|
91
98
|
chunk.hit_count += 1
|
92
|
-
from datetime import datetime
|
93
|
-
|
99
|
+
from datetime import datetime
|
100
|
+
|
101
|
+
chunk.last_hit = datetime.now(UTC)
|
94
102
|
|
95
|
-
async def pop_first_n(self, user_id: int, chunk_type: str, n: int) ->
|
103
|
+
async def pop_first_n(self, user_id: int, chunk_type: str, n: int) -> list[Chunk]:
|
96
104
|
user_chunks = self._data.get(user_id, {})
|
97
105
|
filtered = [c for c in user_chunks.values() if c.chunk_type == chunk_type]
|
98
106
|
# сортировка по created_at (старые — первые)
|
@@ -113,13 +121,11 @@ class MemoryManagerRAM(MemoryManagerABC):
|
|
113
121
|
for chunk in nonpersistent[:-keep]:
|
114
122
|
del self._data[user_id][chunk.chunk_id]
|
115
123
|
|
116
|
-
|
117
124
|
async def delete_chunk(self, user_id: int, chunk_id: UUID) -> None:
|
118
125
|
user_chunks = self._data.get(user_id, {})
|
119
126
|
user_chunks.pop(chunk_id, None)
|
120
127
|
|
121
|
-
|
122
|
-
async def delete_chunks(self, user_id: int, chunk_ids: List[UUID]) -> None:
|
128
|
+
async def delete_chunks(self, user_id: int, chunk_ids: list[UUID]) -> None:
|
123
129
|
user_chunks = self._data.get(user_id, {})
|
124
130
|
for chunk_id in chunk_ids:
|
125
131
|
user_chunks.pop(chunk_id, None)
|
@@ -127,11 +133,11 @@ class MemoryManagerRAM(MemoryManagerABC):
|
|
127
133
|
async def delete_all(self, user_id: int) -> None:
|
128
134
|
self._data.pop(user_id, None)
|
129
135
|
|
130
|
-
def get_all_chunks(self, user_id: int) ->
|
136
|
+
def get_all_chunks(self, user_id: int) -> list[Chunk]:
|
131
137
|
"""Для тестов — все чанки пользователя."""
|
132
138
|
return list(self._data.get(user_id, {}).values())
|
133
139
|
|
134
|
-
def get_all_archive(self, user_id: int) ->
|
140
|
+
def get_all_archive(self, user_id: int) -> list[Chunk]:
|
135
141
|
"""Для тестов — все чанки в архиве."""
|
136
142
|
return list(self._archive.get(user_id, {}).values())
|
137
143
|
|
@@ -1,16 +1,29 @@
|
|
1
|
+
from typing import Any
|
2
|
+
from uuid import UUID
|
3
|
+
|
1
4
|
from qdrant_client import AsyncQdrantClient
|
2
5
|
from qdrant_client.http.models import MatchText
|
3
6
|
from qdrant_client.models import (
|
4
|
-
|
5
|
-
|
7
|
+
Distance,
|
8
|
+
FieldCondition,
|
9
|
+
Filter,
|
10
|
+
MatchValue,
|
11
|
+
PointStruct,
|
12
|
+
Range,
|
13
|
+
VectorParams,
|
6
14
|
)
|
15
|
+
|
7
16
|
from lein_vector.schemas.chunk import ChunkPayload
|
8
|
-
from typing import List, Dict, Any
|
9
|
-
from uuid import UUID
|
10
17
|
|
11
18
|
|
12
19
|
class QdrantAdapter:
|
13
|
-
def __init__(
|
20
|
+
def __init__(
|
21
|
+
self,
|
22
|
+
host: str,
|
23
|
+
port: int,
|
24
|
+
collection: str = "persona_mem",
|
25
|
+
vector_size: int = 768,
|
26
|
+
):
|
14
27
|
self.collection = collection
|
15
28
|
self.client = AsyncQdrantClient(host=host, port=port)
|
16
29
|
self.vector_size = vector_size
|
@@ -20,33 +33,41 @@ class QdrantAdapter:
|
|
20
33
|
if not exists:
|
21
34
|
await self.client.create_collection(
|
22
35
|
collection_name=self.collection,
|
23
|
-
vectors_config=VectorParams(
|
36
|
+
vectors_config=VectorParams(
|
37
|
+
size=self.vector_size, distance=Distance.COSINE
|
38
|
+
),
|
24
39
|
)
|
25
40
|
|
26
|
-
async def upsert(
|
41
|
+
async def upsert(
|
42
|
+
self, point_id: UUID, embedding: list[float], payload: ChunkPayload
|
43
|
+
) -> None:
|
27
44
|
await self.client.upsert(
|
28
45
|
collection_name=self.collection,
|
29
46
|
points=[
|
30
47
|
PointStruct(
|
31
|
-
id=str(point_id),
|
32
|
-
vector=embedding,
|
33
|
-
payload=payload.model_dump()
|
48
|
+
id=str(point_id), vector=embedding, payload=payload.model_dump()
|
34
49
|
)
|
35
|
-
]
|
50
|
+
],
|
36
51
|
)
|
37
52
|
|
38
|
-
async def upsert_batch(self, points:
|
53
|
+
async def upsert_batch(self, points: list[dict[str, Any]]) -> None:
|
39
54
|
structs = [
|
40
55
|
PointStruct(
|
41
56
|
id=str(point["point_id"]),
|
42
57
|
vector=point["embedding"],
|
43
|
-
payload=point["payload"].dict()
|
58
|
+
payload=point["payload"].dict(),
|
44
59
|
)
|
45
60
|
for point in points
|
46
61
|
]
|
47
62
|
await self.client.upsert(collection_name=self.collection, points=structs)
|
48
63
|
|
49
|
-
async def search(
|
64
|
+
async def search(
|
65
|
+
self,
|
66
|
+
embedding: list[float],
|
67
|
+
filter_: dict[str, Any],
|
68
|
+
topk: int,
|
69
|
+
score_threshold: float | None = None,
|
70
|
+
) -> list[ChunkPayload]:
|
50
71
|
# Пример фильтра {"user_id": 123, "chunk_type": "type1", "created_at_gt": "2024-01-01T00:00:00"}
|
51
72
|
conditions = []
|
52
73
|
for k, v in filter_.items():
|
@@ -56,37 +77,39 @@ class QdrantAdapter:
|
|
56
77
|
elif k.endswith("_lt"):
|
57
78
|
field = k[:-3]
|
58
79
|
conditions.append(FieldCondition(key=field, range=Range(lt=v)))
|
80
|
+
elif isinstance(v, str):
|
81
|
+
conditions.append(FieldCondition(key=k, match=MatchText(text=v)))
|
59
82
|
else:
|
60
|
-
|
61
|
-
conditions.append(FieldCondition(key=k, match=MatchText(text=v)))
|
62
|
-
else:
|
63
|
-
conditions.append(FieldCondition(key=k, match=MatchValue(value=v)))
|
83
|
+
conditions.append(FieldCondition(key=k, match=MatchValue(value=v)))
|
64
84
|
q_filter = Filter(must=conditions)
|
65
85
|
result = await self.client.query_points(
|
66
86
|
collection_name=self.collection,
|
67
87
|
query=embedding,
|
68
88
|
query_filter=q_filter,
|
69
89
|
limit=topk,
|
90
|
+
score_threshold=score_threshold,
|
70
91
|
)
|
71
92
|
points = result.points
|
93
|
+
if len(points) > 0:
|
94
|
+
for chunk in points:
|
95
|
+
print(chunk.score)
|
72
96
|
return [ChunkPayload(**point.payload) for point in points]
|
73
97
|
|
74
98
|
async def delete(self, point_id: UUID) -> None:
|
75
99
|
await self.client.delete(
|
76
|
-
collection_name=self.collection,
|
77
|
-
points_selector=[str(point_id)]
|
100
|
+
collection_name=self.collection, points_selector=[str(point_id)]
|
78
101
|
)
|
79
102
|
|
80
|
-
async def delete_batch(self, point_ids:
|
103
|
+
async def delete_batch(self, point_ids: list[UUID]) -> None:
|
81
104
|
await self.client.delete(
|
82
105
|
collection_name=self.collection,
|
83
|
-
points_selector=[str(pid) for pid in point_ids]
|
106
|
+
points_selector=[str(pid) for pid in point_ids],
|
84
107
|
)
|
85
108
|
|
86
109
|
async def delete_collection(self) -> None:
|
87
110
|
await self.client.delete_collection(collection_name=self.collection)
|
88
111
|
|
89
|
-
async def get_all_chunks(self, user_id: int) ->
|
112
|
+
async def get_all_chunks(self, user_id: int) -> list[ChunkPayload]:
|
90
113
|
q_filter = Filter(
|
91
114
|
must=[FieldCondition(key="user_id", match=MatchValue(value=user_id))]
|
92
115
|
)
|
@@ -95,4 +118,4 @@ class QdrantAdapter:
|
|
95
118
|
scroll_filter=q_filter,
|
96
119
|
limit=2048,
|
97
120
|
)
|
98
|
-
return [ChunkPayload(**p.payload) for p in scroll[0]]
|
121
|
+
return [ChunkPayload(**p.payload) for p in scroll[0]]
|
@@ -0,0 +1,92 @@
|
|
1
|
+
import json
|
2
|
+
import logging
|
3
|
+
from datetime import UTC, datetime
|
4
|
+
|
5
|
+
import redis.asyncio as aioredis
|
6
|
+
|
7
|
+
|
8
|
+
class RedisShortTermMemory:
|
9
|
+
"""
|
10
|
+
Хранит окно последних сообщений пользователя в Redis-списке.
|
11
|
+
Формат элемента — JSON-строка с полями role / text / ts / extra…
|
12
|
+
Ключ для пользователя: {codename}:{user_id}:short_term
|
13
|
+
"""
|
14
|
+
|
15
|
+
def __init__(self, redis: aioredis.Redis, maxlen: int = 20):
|
16
|
+
self.r = redis
|
17
|
+
self.maxlen = maxlen
|
18
|
+
|
19
|
+
@staticmethod
|
20
|
+
def _key(user_id: int, bot: str) -> str:
|
21
|
+
return f"{bot}:{user_id}:short_term"
|
22
|
+
|
23
|
+
@staticmethod
|
24
|
+
def _dump(msg: dict) -> str:
|
25
|
+
# datetime => iso
|
26
|
+
if isinstance(msg.get("ts"), datetime):
|
27
|
+
msg = {**msg, "ts": msg["ts"].isoformat()}
|
28
|
+
return json.dumps(msg, ensure_ascii=False)
|
29
|
+
|
30
|
+
async def add(
|
31
|
+
self,
|
32
|
+
user_id: int,
|
33
|
+
bot: str,
|
34
|
+
role: str,
|
35
|
+
text: str,
|
36
|
+
ts: datetime | None = None,
|
37
|
+
**extra,
|
38
|
+
) -> None:
|
39
|
+
if ts is None:
|
40
|
+
ts = datetime.now(UTC)
|
41
|
+
|
42
|
+
msg = self._dump({"role": role, "text": text, "ts": ts, **extra})
|
43
|
+
key = self._key(user_id, bot=bot)
|
44
|
+
|
45
|
+
pipe = self.r.pipeline()
|
46
|
+
await pipe.rpush(key, msg)
|
47
|
+
await pipe.ltrim(key, -self.maxlen, -1)
|
48
|
+
await pipe.execute()
|
49
|
+
|
50
|
+
@staticmethod
|
51
|
+
def _load(raw: str | bytes) -> dict:
|
52
|
+
if isinstance(raw, bytes):
|
53
|
+
raw = raw.decode("utf-8")
|
54
|
+
d = json.loads(raw)
|
55
|
+
if "ts" in d:
|
56
|
+
try:
|
57
|
+
d["ts"] = datetime.fromisoformat(d["ts"])
|
58
|
+
except ValueError:
|
59
|
+
logging.error("Invalid timestamp: %s", d["ts"])
|
60
|
+
return d
|
61
|
+
|
62
|
+
async def window(self, user_id: int, bot: str, n: int | None = None) -> list[dict]:
|
63
|
+
n = n or self.maxlen
|
64
|
+
raw = await self.r.lrange(self._key(user_id, bot=bot), -n, -1)
|
65
|
+
return [self._load(r) for r in raw]
|
66
|
+
|
67
|
+
async def clear(self, bot: str, user_id: int) -> None:
|
68
|
+
await self.r.delete(self._key(user_id, bot=bot))
|
69
|
+
|
70
|
+
async def load(self, user_id: int, bot: str, history: list[dict]) -> None:
|
71
|
+
history = history[-self.maxlen :]
|
72
|
+
if not history:
|
73
|
+
await self.clear(user_id)
|
74
|
+
return
|
75
|
+
key = self._key(user_id, bot=bot)
|
76
|
+
pipe = self.r.pipeline()
|
77
|
+
await pipe.delete(key)
|
78
|
+
await pipe.rpush(key, *[self._dump(m) for m in history])
|
79
|
+
await pipe.execute()
|
80
|
+
|
81
|
+
async def to_list(self, user_id: int, bot: str) -> list[dict]:
|
82
|
+
raw = await self.r.lrange(self._key(user_id, bot=bot), 0, -1)
|
83
|
+
return [self._load(r) for r in raw]
|
84
|
+
|
85
|
+
async def chunk_for_vector(
|
86
|
+
self, user_id: int, bot: str, chunk_size: int = 6
|
87
|
+
) -> list[dict] | None:
|
88
|
+
raw_len = await self.r.llen(self._key(user_id, bot=bot))
|
89
|
+
if raw_len < chunk_size:
|
90
|
+
return None
|
91
|
+
raw = await self.r.lrange(self._key(user_id, bot=bot), -chunk_size, -1)
|
92
|
+
return [self._load(r) for r in raw]
|
@@ -1,13 +1,15 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from
|
4
|
-
from typing import List, Optional, Dict
|
5
|
-
from datetime import datetime, UTC
|
3
|
+
from datetime import UTC, datetime
|
6
4
|
from uuid import UUID
|
7
5
|
|
6
|
+
from pydantic import BaseModel, Field
|
7
|
+
|
8
|
+
|
8
9
|
class Chunk(BaseModel):
|
9
10
|
chunk_id: UUID
|
10
11
|
user_id: int
|
12
|
+
bot: str
|
11
13
|
chunk_type: str # "type0" | "type1" | "fact"
|
12
14
|
created_at: datetime
|
13
15
|
last_hit: datetime
|
@@ -15,9 +17,9 @@ class Chunk(BaseModel):
|
|
15
17
|
text: str
|
16
18
|
persistent: bool = False
|
17
19
|
|
18
|
-
summary_of:
|
19
|
-
source_chunk_id:
|
20
|
-
extra:
|
20
|
+
summary_of: list[UUID] | None = None # для type1
|
21
|
+
source_chunk_id: UUID | None = None # для fact
|
22
|
+
extra: dict | None = Field(default_factory=dict)
|
21
23
|
|
22
24
|
def to_payload(self) -> ChunkPayload:
|
23
25
|
return ChunkPayload(**self.model_dump())
|
@@ -30,9 +32,13 @@ class ChunkPayload(BaseModel):
|
|
30
32
|
created_at: datetime
|
31
33
|
text: str
|
32
34
|
persistent: bool = False
|
33
|
-
summary_of:
|
34
|
-
source_chunk_id:
|
35
|
-
extra:
|
35
|
+
summary_of: list[UUID] | None = None
|
36
|
+
source_chunk_id: UUID | None = None
|
37
|
+
extra: dict | None = Field(default_factory=dict)
|
36
38
|
|
37
39
|
def to_chunk(self, last_hit: datetime = None, hit_count: int = 0) -> Chunk:
|
38
|
-
return Chunk(
|
40
|
+
return Chunk(
|
41
|
+
**self.model_dump(),
|
42
|
+
last_hit=last_hit or datetime.now(UTC),
|
43
|
+
hit_count=hit_count,
|
44
|
+
)
|
@@ -6,20 +6,6 @@ from google.genai import types
|
|
6
6
|
from lein_vector.bases.embeding_provider_abc import EmbeddingProviderABC
|
7
7
|
|
8
8
|
|
9
|
-
class EmbeddingProviderSentenceTransformer(EmbeddingProviderABC):
|
10
|
-
def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
|
11
|
-
self.model = None
|
12
|
-
raise NotImplementedError
|
13
|
-
|
14
|
-
async def get_embedding(self, text: str) -> list[float]:
|
15
|
-
loop = asyncio.get_running_loop()
|
16
|
-
return await loop.run_in_executor(None, self.model.encode, text)
|
17
|
-
|
18
|
-
async def get_embeddings(self, texts: list[str]) -> list[list[float]]:
|
19
|
-
loop = asyncio.get_running_loop()
|
20
|
-
return await loop.run_in_executor(None, self.model.encode, texts)
|
21
|
-
|
22
|
-
|
23
9
|
class EmbeddingProviderGemini(EmbeddingProviderABC):
|
24
10
|
def __init__(self, api_key: str, model_name: str = "models/embedding-001"):
|
25
11
|
|
@@ -63,6 +49,5 @@ async def main():
|
|
63
49
|
print(str(e)[:50] + "... TRIMMED]")
|
64
50
|
|
65
51
|
|
66
|
-
|
67
52
|
if __name__ == "__main__":
|
68
53
|
asyncio.run(main())
|
@@ -1,22 +1,26 @@
|
|
1
1
|
from collections import deque
|
2
|
-
from typing import List, Dict, Optional
|
3
2
|
from datetime import datetime
|
4
3
|
|
4
|
+
|
5
5
|
class ShortTermMemory:
|
6
6
|
def __init__(self, maxlen: int = 10):
|
7
7
|
self._buffer: deque = deque(maxlen=maxlen)
|
8
8
|
|
9
|
-
def add(
|
9
|
+
def add(
|
10
|
+
self,
|
11
|
+
codename: str,
|
12
|
+
user_id: int,
|
13
|
+
role: str,
|
14
|
+
text: str,
|
15
|
+
ts: datetime | None = None,
|
16
|
+
**extra
|
17
|
+
) -> None:
|
10
18
|
"""Добавить сообщение в память (роль, текст, ts - время, по умолчанию now)."""
|
11
19
|
if ts is None:
|
12
20
|
ts = datetime.now()
|
13
|
-
self._buffer.append({
|
14
|
-
"role": role,
|
15
|
-
"text": text,
|
16
|
-
"ts": ts
|
17
|
-
})
|
21
|
+
self._buffer.append({"role": role, "text": text, "ts": ts, **extra})
|
18
22
|
|
19
|
-
def window(self, n:
|
23
|
+
def window(self, n: int | None = None) -> list[dict]:
|
20
24
|
"""Получить последние n сообщений (по умолчанию все)."""
|
21
25
|
if n is None or n > len(self._buffer):
|
22
26
|
return list(self._buffer)
|
@@ -26,17 +30,17 @@ class ShortTermMemory:
|
|
26
30
|
"""Очистить память."""
|
27
31
|
self._buffer.clear()
|
28
32
|
|
29
|
-
def load(self, history:
|
33
|
+
def load(self, history: list[dict]) -> None:
|
30
34
|
"""Инициализировать память списком сообщений."""
|
31
35
|
self._buffer.clear()
|
32
|
-
for msg in history[-self._buffer.maxlen:]:
|
36
|
+
for msg in history[-self._buffer.maxlen :]:
|
33
37
|
self._buffer.append(msg)
|
34
38
|
|
35
|
-
def to_list(self) ->
|
39
|
+
def to_list(self) -> list[dict]:
|
36
40
|
"""Выгрузить всю память как список."""
|
37
41
|
return list(self._buffer)
|
38
42
|
|
39
|
-
def chunk_for_vector(self, chunk_size: int = 6) ->
|
43
|
+
def chunk_for_vector(self, chunk_size: int = 6) -> list[dict] | None:
|
40
44
|
"""Сформировать чанк для векторной БД — N последних сообщений по хронологии."""
|
41
45
|
if len(self._buffer) < chunk_size:
|
42
46
|
return None
|
@@ -1,11 +0,0 @@
|
|
1
|
-
from . import api, bases, schemas
|
2
|
-
from .memory_manager_qdrant import MemoryManagerQdrant
|
3
|
-
from .qdrant_adapter import QdrantAdapter
|
4
|
-
from .sentence_transformer import EmbeddingProviderGemini as EmbeddingProvider
|
5
|
-
from .short_term import ShortTermMemory
|
6
|
-
|
7
|
-
__all__ = [
|
8
|
-
"api", "bases", "schemas",
|
9
|
-
"MemoryManagerQdrant",
|
10
|
-
"QdrantAdapter", "EmbeddingProvider", "ShortTermMemory"
|
11
|
-
]
|
@@ -1,142 +0,0 @@
|
|
1
|
-
from typing import Sequence, Union
|
2
|
-
from uuid import uuid4
|
3
|
-
from datetime import datetime, UTC
|
4
|
-
|
5
|
-
from lein_vector import ShortTermMemory, QdrantAdapter, MemoryManagerQdrant
|
6
|
-
from lein_vector.schemas.chunk import Chunk, ChunkPayload
|
7
|
-
from lein_vector.sentence_transformer import EmbeddingProviderGemini
|
8
|
-
|
9
|
-
class MemoryFacade:
|
10
|
-
def __init__(self, short_term, memory_manager, embedder):
|
11
|
-
self.short = short_term
|
12
|
-
self.long = memory_manager
|
13
|
-
self.embed = embedder
|
14
|
-
self._msg_no: dict[int, int] = {}
|
15
|
-
|
16
|
-
@classmethod
|
17
|
-
async def from_qdrant(
|
18
|
-
cls,
|
19
|
-
host: str,
|
20
|
-
port: int,
|
21
|
-
collection: str,
|
22
|
-
vector_size: int = 768,
|
23
|
-
api_key: str | None = None,
|
24
|
-
short_maxlen: int = 20,
|
25
|
-
) -> "MemoryFacade":
|
26
|
-
"""
|
27
|
-
Создаёт MemoryFacade со всеми зависимостями:
|
28
|
-
- ShortTermMemory(maxlen=short_maxlen)
|
29
|
-
- EmbeddingProviderGemini(api_key)
|
30
|
-
- QdrantAdapter(host, port, collection, vector_size) + init_collection()
|
31
|
-
- MemoryManagerQdrant(adapter, embedder)
|
32
|
-
"""
|
33
|
-
# 1. short-term
|
34
|
-
short_mem = ShortTermMemory(maxlen=short_maxlen)
|
35
|
-
|
36
|
-
# 2. эмбеддер
|
37
|
-
embedder = EmbeddingProviderGemini(api_key=api_key)
|
38
|
-
|
39
|
-
# 3. адаптер Qdrant
|
40
|
-
adapter = QdrantAdapter(host, port, collection, vector_size)
|
41
|
-
await adapter.init_collection()
|
42
|
-
|
43
|
-
# 4. менеджер долгой памяти
|
44
|
-
long_mem = MemoryManagerQdrant(adapter, embedder)
|
45
|
-
|
46
|
-
# 5. возвращаем фасад
|
47
|
-
return cls(short_mem, long_mem, embedder)
|
48
|
-
|
49
|
-
async def step_user(self, user_id: int, user_msg: str, topk: int = 3, history_n: int = 20):
|
50
|
-
self.short.add("user", user_msg)
|
51
|
-
embedding = await self.embed.get_embedding(user_msg)
|
52
|
-
long_memories = await self.long.retrieve_by_embedding(user_id, embedding, topk=topk)
|
53
|
-
short_ctx = self.short.window(history_n)
|
54
|
-
return {
|
55
|
-
"short_term": short_ctx,
|
56
|
-
"long_term": long_memories
|
57
|
-
}
|
58
|
-
|
59
|
-
async def step_user_oai(
|
60
|
-
self,
|
61
|
-
user_id: int,
|
62
|
-
user_msg: str,
|
63
|
-
*,
|
64
|
-
topk: int = 3,
|
65
|
-
history_n: int = 20,
|
66
|
-
) -> dict:
|
67
|
-
"""
|
68
|
-
Полный шаг для OpenAI-совместимого вывода:
|
69
|
-
1. Записывает сообщение пользователя в short-term.
|
70
|
-
2. Достаёт релевантные чанки из long-term.
|
71
|
-
3. Возвращает short-term уже в формате OpenAI.
|
72
|
-
"""
|
73
|
-
data = await self.step_user(user_id, user_msg, topk=topk, history_n=history_n)
|
74
|
-
data["short_term"] = self._to_openai(data["short_term"])
|
75
|
-
data["long_term"] = self._chunk_texts(data["long_term"])
|
76
|
-
return data
|
77
|
-
|
78
|
-
@staticmethod
|
79
|
-
def _to_openai(msgs: list[dict]) -> list[dict]:
|
80
|
-
role_map = {"gf": "assistant"} # «gf» → OpenAI «assistant»
|
81
|
-
return [
|
82
|
-
{"role": role_map.get(m["role"], m["role"]), "content": m["text"]}
|
83
|
-
for m in msgs
|
84
|
-
]
|
85
|
-
|
86
|
-
async def step_gf(
|
87
|
-
self,
|
88
|
-
user_id: int,
|
89
|
-
gf_msg: str,
|
90
|
-
*,
|
91
|
-
block_size: int = 8,
|
92
|
-
save_pair: bool = True,
|
93
|
-
):
|
94
|
-
# 1) кладём ответ в short-term
|
95
|
-
curr_no = self._msg_no.get(user_id, 0) + 1
|
96
|
-
self._msg_no[user_id] = curr_no
|
97
|
-
self.short.add("gf", gf_msg, extra={"msg_no": curr_no})
|
98
|
-
|
99
|
-
# 2) если блок из 'block_size' сообщений готов → формируем long-term чанк
|
100
|
-
if save_pair and len(self.short.window()) >= block_size:
|
101
|
-
last_block = self.short.window(block_size) # последние 8 сообщений
|
102
|
-
block_text = "\n".join(m["text"] for m in last_block)
|
103
|
-
|
104
|
-
# считаем embedding один раз
|
105
|
-
vector = await self.embed.get_embedding(block_text)
|
106
|
-
|
107
|
-
new_chunk = Chunk(
|
108
|
-
chunk_id=uuid4(),
|
109
|
-
user_id=user_id,
|
110
|
-
chunk_type="type0",
|
111
|
-
created_at=datetime.now(UTC),
|
112
|
-
last_hit=datetime.now(UTC),
|
113
|
-
hit_count=0,
|
114
|
-
text=block_text,
|
115
|
-
persistent=False,
|
116
|
-
extra={"msg_no": curr_no},
|
117
|
-
)
|
118
|
-
await self.long.upsert_chunk_with_vector(user_id, new_chunk, vector)
|
119
|
-
|
120
|
-
# (необязательно) можешь очистить short-term, если maxlen маленький
|
121
|
-
# self.short.clear_until(block_size) ← если нужен скользящий сдвиг
|
122
|
-
|
123
|
-
# 3) при необходимости запускаем merge / maintenance
|
124
|
-
if curr_no % 40 == 0: # каждые 40 сообщений
|
125
|
-
await self.long.merge_old_chunks(user_id, "type0", n=5)
|
126
|
-
|
127
|
-
def get_short_term(self, n=10) -> list:
|
128
|
-
return self.short.window(n)
|
129
|
-
|
130
|
-
async def get_long_term(self, user_id: int, embedding: list[float], topk: int = 3) -> list:
|
131
|
-
return await self.long.retrieve_by_embedding(user_id, embedding, topk)
|
132
|
-
|
133
|
-
def add_to_short(self, role: str, text: str) -> None:
|
134
|
-
self.short.add(role, text)
|
135
|
-
|
136
|
-
async def add_to_long(self, user_id: int, chunk: Chunk) -> None:
|
137
|
-
await self.long.upsert_chunk(user_id, chunk)
|
138
|
-
|
139
|
-
@staticmethod
|
140
|
-
def _chunk_texts(chunks: Sequence[Union[Chunk, ChunkPayload]]) -> list[str]:
|
141
|
-
"""Вернуть список текстов из любых Chunk/ChunkPayload."""
|
142
|
-
return [c.text for c in chunks]
|
@@ -1,11 +0,0 @@
|
|
1
|
-
from typing import List
|
2
|
-
from abc import ABC, abstractmethod
|
3
|
-
|
4
|
-
class EmbeddingProviderABC(ABC):
|
5
|
-
@abstractmethod
|
6
|
-
async def get_embedding(self, text: str) -> List[float]:
|
7
|
-
...
|
8
|
-
|
9
|
-
@abstractmethod
|
10
|
-
async def get_embeddings(self, texts: List[str]) -> List[List[float]]:
|
11
|
-
...
|
@@ -1,33 +0,0 @@
|
|
1
|
-
from typing import List
|
2
|
-
from uuid import UUID
|
3
|
-
from lein_vector.schemas.chunk import Chunk
|
4
|
-
|
5
|
-
|
6
|
-
class MemoryManagerABC:
|
7
|
-
async def upsert_chunk(self, user_id: int, chunk: Chunk) -> None: ...
|
8
|
-
async def upsert_chunks(self, user_id: int, chunks: List[Chunk]) -> None: ...
|
9
|
-
async def retrieve_by_embedding(
|
10
|
-
self, user_id: int, embedding: List[float], topk: int = 3
|
11
|
-
) -> List[Chunk]: ...
|
12
|
-
async def retrieve_by_embedding_batch(
|
13
|
-
self, user_id: int, embeddings: List[List[float]], topk: int = 3
|
14
|
-
) -> List[List[Chunk]]:
|
15
|
-
raise NotImplementedError("Not implemented in this backend")
|
16
|
-
|
17
|
-
async def retrieve_by_type(
|
18
|
-
self, user_id: int, chunk_type: str, topk: int = 3
|
19
|
-
) -> List[Chunk]: ...
|
20
|
-
async def retrieve_by_text(
|
21
|
-
self, user_id: int, query: str, topk: int = 3
|
22
|
-
) -> List[Chunk]: ...
|
23
|
-
async def merge_old_chunks(self, user_id: int, chunk_type: str) -> None: ...
|
24
|
-
async def archive_user(self, user_id: int) -> None: ...
|
25
|
-
async def restore_user(self, user_id: int) -> None: ...
|
26
|
-
async def increment_hit(self, user_id: int, chunk_id: UUID) -> None: ...
|
27
|
-
async def pop_first_n(
|
28
|
-
self, user_id: int, chunk_type: str, n: int
|
29
|
-
) -> List[Chunk]: ...
|
30
|
-
async def delete_oldest_nonpersistent(self, user_id: int, keep: int) -> None: ...
|
31
|
-
async def delete_chunk(self, user_id: int, chunk_id: UUID) -> None: ...
|
32
|
-
async def delete_chunks(self, user_id: int, chunk_ids: List[UUID]) -> None: ...
|
33
|
-
async def delete_all(self, user_id: int) -> None: ...
|
@@ -1,100 +0,0 @@
|
|
1
|
-
from datetime import datetime, UTC
|
2
|
-
from typing import List, Dict, Any
|
3
|
-
from uuid import UUID
|
4
|
-
|
5
|
-
from lein_vector.bases.memory_manager_abc import MemoryManagerABC
|
6
|
-
from lein_vector.schemas.chunk import Chunk, ChunkPayload
|
7
|
-
|
8
|
-
class MemoryManagerQdrant(MemoryManagerABC):
|
9
|
-
def __init__(self, qdrant_adapter, embedding_provider, archive_storage=None):
|
10
|
-
self.qdrant = qdrant_adapter
|
11
|
-
self.embed = embedding_provider
|
12
|
-
self.archive = archive_storage # твой модуль S3/minio (интерфейс: save(user_id, List[ChunkPayload]), load(user_id) -> List[ChunkPayload])
|
13
|
-
|
14
|
-
async def upsert_chunk(self, user_id: int, chunk: Chunk) -> None:
|
15
|
-
embedding = await self.embed.get_embedding(chunk.text)
|
16
|
-
await self.qdrant.upsert(chunk.chunk_id, embedding, chunk.to_payload())
|
17
|
-
|
18
|
-
async def upsert_chunk_with_vector(
|
19
|
-
self,
|
20
|
-
user_id: int,
|
21
|
-
chunk: Chunk,
|
22
|
-
embedding: list[float]
|
23
|
-
) -> None:
|
24
|
-
await self.qdrant.upsert(chunk.chunk_id, embedding, chunk.to_payload())
|
25
|
-
|
26
|
-
async def upsert_chunks(self, user_id: int, chunks: List[Chunk]) -> None:
|
27
|
-
texts = [c.text for c in chunks]
|
28
|
-
embeddings = await self.embed.get_embeddings(texts)
|
29
|
-
points = [
|
30
|
-
{"point_id": c.chunk_id, "embedding": emb, "payload": c.to_payload()}
|
31
|
-
for c, emb in zip(chunks, embeddings)
|
32
|
-
]
|
33
|
-
await self.qdrant.upsert_batch(points)
|
34
|
-
|
35
|
-
async def retrieve_by_embedding(self, user_id: int, embedding: List[float], topk: int = 3, filter_: Dict[str, Any] = None) -> List[ChunkPayload]:
|
36
|
-
# Фильтр по user_id + кастомные условия
|
37
|
-
filter_ = filter_ or {}
|
38
|
-
filter_["user_id"] = user_id
|
39
|
-
return await self.qdrant.search(embedding, filter_, topk)
|
40
|
-
|
41
|
-
async def retrieve_by_type(self, user_id: int, chunk_type: str, topk: int = 3) -> List[ChunkPayload]:
|
42
|
-
# Заглушка embedding (пустой вектор не сработает, нужно реальный запрос!):
|
43
|
-
# Лучше использовать scroll по фильтру
|
44
|
-
filter_ = {"user_id": user_id, "chunk_type": chunk_type}
|
45
|
-
return await self.qdrant.get_all_chunks_with_filter(filter_)
|
46
|
-
|
47
|
-
async def merge_old_chunks(self, user_id: int, chunk_type: str, n: int = 5) -> None:
|
48
|
-
# 1. Получить n старых чанков нужного типа
|
49
|
-
chunks = await self.qdrant.get_n_oldest_chunks(user_id, chunk_type, n)
|
50
|
-
if len(chunks) < n:
|
51
|
-
return
|
52
|
-
# 2. Суммаризация (mock или через LLM)
|
53
|
-
merged_text = " | ".join([c.text for c in chunks])
|
54
|
-
from uuid import uuid4
|
55
|
-
from datetime import datetime
|
56
|
-
summary_chunk = Chunk(
|
57
|
-
chunk_id=uuid4(),
|
58
|
-
user_id=user_id,
|
59
|
-
chunk_type=self._next_type(chunk_type),
|
60
|
-
created_at=datetime.now(UTC),
|
61
|
-
last_hit=datetime.now(UTC),
|
62
|
-
hit_count=0,
|
63
|
-
text=merged_text,
|
64
|
-
persistent=False,
|
65
|
-
summary_of=[c.chunk_id for c in chunks],
|
66
|
-
)
|
67
|
-
await self.upsert_chunk(user_id, summary_chunk)
|
68
|
-
# 3. Удалить исходники
|
69
|
-
await self.delete_chunks(user_id, [c.chunk_id for c in chunks])
|
70
|
-
|
71
|
-
async def archive_user(self, user_id: int) -> None:
|
72
|
-
all_chunks = await self.qdrant.get_all_chunks(user_id)
|
73
|
-
await self.archive.save(user_id, all_chunks)
|
74
|
-
await self.delete_all(user_id)
|
75
|
-
|
76
|
-
async def restore_user(self, user_id: int) -> None:
|
77
|
-
chunks = await self.archive.load(user_id)
|
78
|
-
await self.upsert_chunks(
|
79
|
-
user_id,
|
80
|
-
[Chunk(**c.dict(), last_hit=datetime.now(UTC), hit_count=0) for c in chunks]
|
81
|
-
)
|
82
|
-
|
83
|
-
async def delete_chunk(self, user_id: int, chunk_id: UUID) -> None:
|
84
|
-
await self.qdrant.delete(chunk_id)
|
85
|
-
|
86
|
-
async def delete_chunks(self, user_id: int, chunk_ids: List[UUID]) -> None:
|
87
|
-
await self.qdrant.delete_batch(chunk_ids)
|
88
|
-
|
89
|
-
async def delete_all(self, user_id: int) -> None:
|
90
|
-
all_chunks = await self.qdrant.get_all_chunks(user_id)
|
91
|
-
await self.delete_chunks(user_id, [c.chunk_id for c in all_chunks])
|
92
|
-
|
93
|
-
# Доп. методы поиска (по времени, hit_count, last_hit)
|
94
|
-
async def retrieve_filtered(self, user_id: int, filter_: Dict[str, Any], topk: int = 10) -> List[ChunkPayload]:
|
95
|
-
return await self.qdrant.get_all_chunks_with_filter({"user_id": user_id, **filter_}, topk=topk)
|
96
|
-
|
97
|
-
def _next_type(self, chunk_type: str) -> str:
|
98
|
-
# Логика типа next_type
|
99
|
-
mapping = {"type0": "type1", "type1": "type2"}
|
100
|
-
return mapping.get(chunk_type, "summary")
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|