openvector_dev 0.1.8__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lein_vector/api/facade.py +60 -8
- lein_vector/bases/memory_manager_abc.py +14 -4
- lein_vector/memory_manager_qdrant.py +31 -27
- lein_vector/schemas/chunk.py +1 -1
- lein_vector/sentence_transformer.py +1 -0
- {openvector_dev-0.1.8.dist-info → openvector_dev-0.1.9.dist-info}/METADATA +1 -1
- {openvector_dev-0.1.8.dist-info → openvector_dev-0.1.9.dist-info}/RECORD +8 -8
- {openvector_dev-0.1.8.dist-info → openvector_dev-0.1.9.dist-info}/WHEEL +0 -0
lein_vector/api/facade.py
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
from collections.abc import Sequence
|
2
2
|
from datetime import UTC, datetime
|
3
|
-
from uuid import uuid4
|
3
|
+
from uuid import UUID, uuid4
|
4
4
|
|
5
5
|
import redis.asyncio as aioredis
|
6
6
|
|
7
7
|
from lein_vector import MemoryManagerQdrant, QdrantAdapter, RedisShortTermMemory
|
8
8
|
from lein_vector.schemas.chunk import Chunk, ChunkPayload
|
9
9
|
from lein_vector.sentence_transformer import EmbeddingProviderGemini
|
10
|
-
import warnings
|
11
10
|
|
12
11
|
|
13
12
|
class Memory:
|
@@ -117,10 +116,12 @@ class Memory:
|
|
117
116
|
await self.long.upsert_chunk_with_vector(new_chunk, vector)
|
118
117
|
# self.short.clear_until(block_size)
|
119
118
|
|
120
|
-
if curr_no %
|
121
|
-
await self.long.merge_old_chunks(user_id, "type0", n=self.merge_n)
|
119
|
+
if curr_no % (block_size * self.merge_n) == 0:
|
120
|
+
return await self.long.merge_old_chunks(user_id=user_id, bot=bot, chunk_type="type0", n=self.merge_n)
|
122
121
|
|
123
|
-
async def get_long_memories(
|
122
|
+
async def get_long_memories(
|
123
|
+
self, user_id: int, bot: str, search_terms: list[str], topk: int = 3
|
124
|
+
) -> list:
|
124
125
|
"""
|
125
126
|
Возвращает ТОЛЬКО длительную память по списку тем.
|
126
127
|
:param bot: Кодовое имя бота
|
@@ -139,7 +140,9 @@ class Memory:
|
|
139
140
|
filter_={"chunk_type": "type1"},
|
140
141
|
)
|
141
142
|
|
142
|
-
async def get_short_memories(
|
143
|
+
async def get_short_memories(
|
144
|
+
self, user_id: int, bot: str, n_memories: int = 20
|
145
|
+
) -> list:
|
143
146
|
"""
|
144
147
|
Возвращает ТОЛЬКО кратковременную память по списку тем
|
145
148
|
:param bot: Кодовое имя бота
|
@@ -150,7 +153,9 @@ class Memory:
|
|
150
153
|
data = await self.short.window(user_id, bot, n_memories)
|
151
154
|
return self._to_openai(data)
|
152
155
|
|
153
|
-
async def add_short_msg(
|
156
|
+
async def add_short_msg(
|
157
|
+
self, user_id: int, bot: str, text: str, *, role: str = "user"
|
158
|
+
):
|
154
159
|
"""
|
155
160
|
Добавляет в "короткую память" новое сообщение
|
156
161
|
:param user_id: Идентификатор пользователя
|
@@ -162,7 +167,54 @@ class Memory:
|
|
162
167
|
"""
|
163
168
|
await self.short.add(bot=bot, user_id=user_id, role=role, text=text)
|
164
169
|
|
165
|
-
async def
|
170
|
+
async def add_summary_chunk(
|
171
|
+
self,
|
172
|
+
user_id: int,
|
173
|
+
bot: str,
|
174
|
+
text: str,
|
175
|
+
old_chunks: list[UUID],
|
176
|
+
*,
|
177
|
+
chunk_type: str = "type1",
|
178
|
+
) -> None:
|
179
|
+
"""
|
180
|
+
Ручное добавление summary-чанка:
|
181
|
+
1) upsert нового чанка-саммари;
|
182
|
+
2) удаление использованных чанков.
|
183
|
+
:param user_id: id пользователя
|
184
|
+
:param bot: кодовое имя бота
|
185
|
+
:param text: текст саммари
|
186
|
+
:param old_chunks: список UUID чанков, вошедших в саммари
|
187
|
+
:param chunk_type: тип нового чанка (по умолчанию 'type1')
|
188
|
+
"""
|
189
|
+
# Эмбеддинг для саммари
|
190
|
+
embedding = await self.embed.get_embedding(text)
|
191
|
+
|
192
|
+
# Создаём новый Chunk
|
193
|
+
summary_chunk = Chunk(
|
194
|
+
chunk_id=uuid4(),
|
195
|
+
bot=bot,
|
196
|
+
user_id=user_id,
|
197
|
+
chunk_type=chunk_type,
|
198
|
+
created_at=datetime.now(UTC),
|
199
|
+
last_hit=datetime.now(UTC),
|
200
|
+
hit_count=0,
|
201
|
+
text=text,
|
202
|
+
persistent=False,
|
203
|
+
summary_of=old_chunks,
|
204
|
+
)
|
205
|
+
|
206
|
+
# Записываем в Qdrant
|
207
|
+
await self.long.upsert_chunk_with_vector(
|
208
|
+
user_id=user_id,
|
209
|
+
chunk=summary_chunk,
|
210
|
+
embedding=embedding,
|
211
|
+
)
|
212
|
+
|
213
|
+
# Удаляем исходные чанки
|
214
|
+
if old_chunks:
|
215
|
+
await self.long.delete_chunks(user_id, old_chunks)
|
216
|
+
|
217
|
+
async def delete_memory(self, user_id: int, bot: str) -> None:
|
166
218
|
"""
|
167
219
|
Удаляет
|
168
220
|
:param user_id: Идентификатор пользователя.
|
@@ -1,9 +1,13 @@
|
|
1
1
|
from uuid import UUID
|
2
|
+
|
2
3
|
from lein_vector.schemas.chunk import Chunk
|
3
4
|
|
5
|
+
|
4
6
|
class MemoryManagerABC:
|
5
7
|
async def upsert_chunk(self, user_id: int, bot: str, chunk: Chunk) -> None: ...
|
6
|
-
async def upsert_chunks(
|
8
|
+
async def upsert_chunks(
|
9
|
+
self, user_id: int, bot: str, chunks: list[Chunk]
|
10
|
+
) -> None: ...
|
7
11
|
async def retrieve_by_embedding(
|
8
12
|
self, user_id: int, bot: str, embedding: list[float], topk: int = 3
|
9
13
|
) -> list[Chunk]: ...
|
@@ -18,14 +22,20 @@ class MemoryManagerABC:
|
|
18
22
|
async def retrieve_by_text(
|
19
23
|
self, user_id: int, bot: str, query: str, topk: int = 3
|
20
24
|
) -> list[Chunk]: ...
|
21
|
-
async def merge_old_chunks(
|
25
|
+
async def merge_old_chunks(
|
26
|
+
self, user_id: int, bot: str, chunk_type: str
|
27
|
+
) -> None: ...
|
22
28
|
async def archive_user(self, user_id: int, bot: str) -> None: ...
|
23
29
|
async def restore_user(self, user_id: int, bot: str) -> None: ...
|
24
30
|
async def increment_hit(self, user_id: int, bot: str, chunk_id: UUID) -> None: ...
|
25
31
|
async def pop_first_n(
|
26
32
|
self, user_id: int, bot: str, chunk_type: str, n: int
|
27
33
|
) -> list[Chunk]: ...
|
28
|
-
async def delete_oldest_nonpersistent(
|
34
|
+
async def delete_oldest_nonpersistent(
|
35
|
+
self, user_id: int, bot: str, keep: int
|
36
|
+
) -> None: ...
|
29
37
|
async def delete_chunk(self, user_id: int, bot: str, chunk_id: UUID) -> None: ...
|
30
|
-
async def delete_chunks(
|
38
|
+
async def delete_chunks(
|
39
|
+
self, user_id: int, bot: str, chunk_ids: list[UUID]
|
40
|
+
) -> None: ...
|
31
41
|
async def delete_all(self, user_id: int, bot: str) -> None: ...
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import asyncio
|
2
2
|
from datetime import UTC, datetime
|
3
|
-
from typing import Any
|
3
|
+
from typing import Any, Tuple, List
|
4
4
|
from uuid import UUID
|
5
5
|
|
6
6
|
from lein_vector.bases.memory_manager_abc import MemoryManagerABC
|
@@ -77,13 +77,11 @@ class MemoryManagerQdrant(MemoryManagerABC):
|
|
77
77
|
)
|
78
78
|
|
79
79
|
async def _one(e):
|
80
|
-
return await self.qdrant.search(
|
81
|
-
e, q_filter, topk, score_threshold
|
82
|
-
)
|
80
|
+
return await self.qdrant.search(e, q_filter, topk, score_threshold)
|
83
81
|
|
84
82
|
return await asyncio.gather(*[_one(e) for e in embeddings])
|
85
83
|
|
86
|
-
#ToDO: filter
|
84
|
+
# ToDO: filter
|
87
85
|
async def retrieve_by_type(
|
88
86
|
self, user_id: int, bot: str, chunk_type: str, topk: int = 3
|
89
87
|
) -> list[ChunkPayload]:
|
@@ -91,30 +89,34 @@ class MemoryManagerQdrant(MemoryManagerABC):
|
|
91
89
|
filter_ = {"user_id": user_id, "bot": bot, "chunk_type": chunk_type}
|
92
90
|
return await self.qdrant.get_all_chunks_with_filter(filter_)
|
93
91
|
|
94
|
-
async def merge_old_chunks(
|
92
|
+
async def merge_old_chunks(
|
93
|
+
self,
|
94
|
+
user_id: int,
|
95
|
+
bot: str,
|
96
|
+
chunk_type: str,
|
97
|
+
n: int = 5,
|
98
|
+
) -> Tuple[str | None, List[UUID]]:
|
99
|
+
"""
|
100
|
+
Собрать n старых чанков и вернуть:
|
101
|
+
• merged_text — склеенные сообщения c ролями,
|
102
|
+
• used_ids — UUID этих чанков.
|
103
|
+
|
104
|
+
Если чанков меньше n — вернётся (None, []).
|
105
|
+
"""
|
95
106
|
chunks = await self.qdrant.get_n_oldest_chunks(user_id, bot, chunk_type, n)
|
96
107
|
if len(chunks) < n:
|
97
|
-
return
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
chunk_id=uuid4(),
|
105
|
-
user_id=user_id,
|
106
|
-
bot=bot,
|
107
|
-
chunk_type=self._next_type(chunk_type),
|
108
|
-
created_at=datetime.now(UTC),
|
109
|
-
last_hit=datetime.now(UTC),
|
110
|
-
hit_count=0,
|
111
|
-
text=merged_text,
|
112
|
-
persistent=False,
|
113
|
-
summary_of=[c.chunk_id for c in chunks],
|
114
|
-
)
|
115
|
-
await self.upsert_chunk(user_id, bot, summary_chunk)
|
108
|
+
return None, []
|
109
|
+
|
110
|
+
def _ensure_role(txt: str, default_role: str = "gf") -> str:
|
111
|
+
# если строка уже начинается с 'role: ', оставляем как есть
|
112
|
+
if txt.split(":", 1)[0] in {"user", "gf", "assistant"}:
|
113
|
+
return txt
|
114
|
+
return f"{default_role}: {txt}"
|
116
115
|
|
117
|
-
|
116
|
+
merged_text = "\n".join(_ensure_role(c.text) for c in chunks)
|
117
|
+
used_ids = [c.chunk_id for c in chunks]
|
118
|
+
|
119
|
+
return merged_text, used_ids
|
118
120
|
|
119
121
|
async def archive_user(self, user_id: int, bot: str) -> None:
|
120
122
|
all_chunks = await self.qdrant.get_all_chunks(user_id, bot)
|
@@ -134,7 +136,9 @@ class MemoryManagerQdrant(MemoryManagerABC):
|
|
134
136
|
async def delete_chunk(self, user_id: int, bot: str, chunk_id: UUID) -> None:
|
135
137
|
await self.qdrant.delete(chunk_id)
|
136
138
|
|
137
|
-
async def delete_chunks(
|
139
|
+
async def delete_chunks(
|
140
|
+
self, user_id: int, bot: str, chunk_ids: list[UUID]
|
141
|
+
) -> None:
|
138
142
|
await self.qdrant.delete_batch(chunk_ids)
|
139
143
|
|
140
144
|
async def delete_all(self, user_id: int, bot: str) -> None:
|
lein_vector/schemas/chunk.py
CHANGED
@@ -36,7 +36,7 @@ class ChunkPayload(BaseModel):
|
|
36
36
|
source_chunk_id: UUID | None = None
|
37
37
|
extra: dict | None = Field(default_factory=dict)
|
38
38
|
|
39
|
-
def to_chunk(self, last_hit: datetime = None, hit_count: int = 0) -> Chunk:
|
39
|
+
def to_chunk(self, last_hit: datetime | None = None, hit_count: int = 0) -> Chunk:
|
40
40
|
return Chunk(
|
41
41
|
**self.model_dump(),
|
42
42
|
last_hit=last_hit or datetime.now(UTC),
|
@@ -22,6 +22,7 @@ class EmbeddingProviderGemini(EmbeddingProviderABC):
|
|
22
22
|
async def get_embedding(self, text: str) -> list[float]:
|
23
23
|
# В Gemini SDK обычно нет async, значит — обёртка через run_in_executor:
|
24
24
|
import asyncio
|
25
|
+
|
25
26
|
if not isinstance(text, str):
|
26
27
|
return
|
27
28
|
loop = asyncio.get_running_loop()
|
@@ -1,17 +1,17 @@
|
|
1
1
|
lein_vector/__init__.py,sha256=ieXy65X4eVzUtSHNvlh00s4aMwKReafNni4s2q68z3Q,496
|
2
2
|
lein_vector/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
-
lein_vector/api/facade.py,sha256=
|
3
|
+
lein_vector/api/facade.py,sha256=OC94eCQnkTgq0BycEEaEMX5YUs5EUtACpiLspYcqusk,9651
|
4
4
|
lein_vector/bases/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
lein_vector/bases/embeding_provider_abc.py,sha256=WBpVC6ra-SYqhJeZs8R8U679wfGebXgwOOVPIii-IvY,265
|
6
|
-
lein_vector/bases/memory_manager_abc.py,sha256=
|
7
|
-
lein_vector/memory_manager_qdrant.py,sha256=
|
6
|
+
lein_vector/bases/memory_manager_abc.py,sha256=NE13Lz-3aX3RjkRhsd5FsezPNk0xju34fFpO3Fa1n9A,1755
|
7
|
+
lein_vector/memory_manager_qdrant.py,sha256=jzb04gnESVRH62QSYhnwsl-HpS6Kci721rPA3_6LSDQ,6382
|
8
8
|
lein_vector/memory_manager_ram.py,sha256=lUATu6U-cZuRHnR6U1L8aJQB8FNK0Wi40vXsZ5fjDgI,6002
|
9
9
|
lein_vector/qdrant_adapter.py,sha256=uPhYAp0KmYQhY3DF29JMO1JTbM__xDWGlnZZPor3VKI,4193
|
10
10
|
lein_vector/redis_short_term.py,sha256=uCcncjgXAlNAY46b3_Pjvrn_G0NEI16kRRR9hxayzak,3155
|
11
11
|
lein_vector/schemas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
|
-
lein_vector/schemas/chunk.py,sha256=
|
13
|
-
lein_vector/sentence_transformer.py,sha256=
|
12
|
+
lein_vector/schemas/chunk.py,sha256=qq2J7UM94AhKXKg_gnErac5GQF9DCoaEeEHyxZg_xuk,1202
|
13
|
+
lein_vector/sentence_transformer.py,sha256=SjjIa43Y7JxcU9VkpT2Ml-CVXb8tqmrbMutZN6O_I6s,1853
|
14
14
|
lein_vector/short_term.py,sha256=KFot9r26d9TZYLscVT42V0NwufIQUkx-hftOe2p7qLU,1933
|
15
|
-
openvector_dev-0.1.
|
16
|
-
openvector_dev-0.1.
|
17
|
-
openvector_dev-0.1.
|
15
|
+
openvector_dev-0.1.9.dist-info/METADATA,sha256=ZnabZl7TnXLZXj7xFqQk5awRGPzFpuyWnDjMyUAx_Z0,3167
|
16
|
+
openvector_dev-0.1.9.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
17
|
+
openvector_dev-0.1.9.dist-info/RECORD,,
|
File without changes
|