openvector_dev 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lein_vector/api/facade.py CHANGED
@@ -1,13 +1,13 @@
1
1
  from collections.abc import Sequence
2
2
  from datetime import UTC, datetime
3
- from uuid import uuid4
3
+ from typing import Optional, Any
4
+ from uuid import UUID, uuid4
4
5
 
5
6
  import redis.asyncio as aioredis
6
7
 
7
8
  from lein_vector import MemoryManagerQdrant, QdrantAdapter, RedisShortTermMemory
8
9
  from lein_vector.schemas.chunk import Chunk, ChunkPayload
9
10
  from lein_vector.sentence_transformer import EmbeddingProviderGemini
10
- import warnings
11
11
 
12
12
 
13
13
  class Memory:
@@ -115,12 +115,15 @@ class Memory:
115
115
  extra={"msg_no": curr_no},
116
116
  )
117
117
  await self.long.upsert_chunk_with_vector(new_chunk, vector)
118
- # self.short.clear_until(block_size)
119
118
 
120
- if curr_no % 40 == 0:
121
- await self.long.merge_old_chunks(user_id, "type0", n=self.merge_n)
119
+ if curr_no % (block_size * self.merge_n) == 0:
120
+ return await self.long.merge_old_chunks(user_id=user_id, bot=bot, chunk_type="type0", n=self.merge_n)
121
+ else:
122
+ return None, None
122
123
 
123
- async def get_long_memories(self, user_id: int, bot: str, search_terms: list[str], topk: int = 3) -> list:
124
+ async def get_long_memories(
125
+ self, user_id: int, bot: str, search_terms: list[str], topk: int = 3
126
+ ) -> list:
124
127
  """
125
128
  Возвращает ТОЛЬКО длительную память по списку тем.
126
129
  :param bot: Кодовое имя бота
@@ -139,7 +142,9 @@ class Memory:
139
142
  filter_={"chunk_type": "type1"},
140
143
  )
141
144
 
142
- async def get_short_memories(self, user_id: int, bot: str, n_memories: int = 20) -> list:
145
+ async def get_short_memories(
146
+ self, user_id: int, bot: str, n_memories: int = 20
147
+ ) -> list:
143
148
  """
144
149
  Возвращает ТОЛЬКО кратковременную память по списку тем
145
150
  :param bot: Кодовое имя бота
@@ -150,7 +155,9 @@ class Memory:
150
155
  data = await self.short.window(user_id, bot, n_memories)
151
156
  return self._to_openai(data)
152
157
 
153
- async def add_short_msg(self, user_id: int, bot: str, text: str, *, role: str = "user"):
158
+ async def add_short_msg(
159
+ self, user_id: int, bot: str, text: str, *, role: str = "user"
160
+ ):
154
161
  """
155
162
  Добавляет в "короткую память" новое сообщение
156
163
  :param user_id: Идентификатор пользователя
@@ -162,12 +169,60 @@ class Memory:
162
169
  """
163
170
  await self.short.add(bot=bot, user_id=user_id, role=role, text=text)
164
171
 
165
- async def delete_memory(self, user_id: int) -> None:
172
+ async def add_summary_chunk(
173
+ self,
174
+ user_id: int,
175
+ bot: str,
176
+ text: str,
177
+ old_chunks: list[UUID],
178
+ *,
179
+ chunk_type: str = "type1",
180
+ ) -> None:
181
+ """
182
+ Ручное добавление summary-чанка:
183
+ 1) upsert нового чанка-саммари;
184
+ 2) удаление использованных чанков.
185
+ :param user_id: id пользователя
186
+ :param bot: кодовое имя бота
187
+ :param text: текст саммари
188
+ :param old_chunks: список UUID чанков, вошедших в саммари
189
+ :param chunk_type: тип нового чанка (по умолчанию 'type1')
190
+ """
191
+ # Эмбеддинг для саммари
192
+ embedding = await self.embed.get_embedding(text)
193
+
194
+ # Создаём новый Chunk
195
+ summary_chunk = Chunk(
196
+ chunk_id=uuid4(),
197
+ bot=bot,
198
+ user_id=user_id,
199
+ chunk_type=chunk_type,
200
+ created_at=datetime.now(UTC),
201
+ last_hit=datetime.now(UTC),
202
+ hit_count=0,
203
+ text=text,
204
+ persistent=False,
205
+ summary_of=old_chunks,
206
+ )
207
+
208
+ # Записываем в Qdrant
209
+ await self.long.upsert_chunk_with_vector(
210
+ user_id=user_id,
211
+ chunk=summary_chunk,
212
+ embedding=embedding,
213
+ )
214
+
215
+ # Удаляем исходные чанки
216
+ if old_chunks:
217
+ await self.long.delete_chunks(user_id, old_chunks)
218
+
219
+ async def delete_memory(self, user_id: int, bot: str) -> None:
166
220
  """
167
221
  Удаляет
168
222
  :param user_id: Идентификатор пользователя.
223
+ :param bot: Codename бота
169
224
  """
170
- await self.long.delete_all(user_id)
225
+ await self.long.delete_all(user_id, bot)
171
226
 
172
227
  @staticmethod
173
228
  def _chunk_texts(chunks: Sequence[Chunk | ChunkPayload]) -> list[str]:
@@ -1,9 +1,13 @@
1
1
  from uuid import UUID
2
+
2
3
  from lein_vector.schemas.chunk import Chunk
3
4
 
5
+
4
6
  class MemoryManagerABC:
5
7
  async def upsert_chunk(self, user_id: int, bot: str, chunk: Chunk) -> None: ...
6
- async def upsert_chunks(self, user_id: int, bot: str, chunks: list[Chunk]) -> None: ...
8
+ async def upsert_chunks(
9
+ self, user_id: int, bot: str, chunks: list[Chunk]
10
+ ) -> None: ...
7
11
  async def retrieve_by_embedding(
8
12
  self, user_id: int, bot: str, embedding: list[float], topk: int = 3
9
13
  ) -> list[Chunk]: ...
@@ -18,14 +22,20 @@ class MemoryManagerABC:
18
22
  async def retrieve_by_text(
19
23
  self, user_id: int, bot: str, query: str, topk: int = 3
20
24
  ) -> list[Chunk]: ...
21
- async def merge_old_chunks(self, user_id: int, bot: str, chunk_type: str) -> None: ...
25
+ async def merge_old_chunks(
26
+ self, user_id: int, bot: str, chunk_type: str
27
+ ) -> None: ...
22
28
  async def archive_user(self, user_id: int, bot: str) -> None: ...
23
29
  async def restore_user(self, user_id: int, bot: str) -> None: ...
24
30
  async def increment_hit(self, user_id: int, bot: str, chunk_id: UUID) -> None: ...
25
31
  async def pop_first_n(
26
32
  self, user_id: int, bot: str, chunk_type: str, n: int
27
33
  ) -> list[Chunk]: ...
28
- async def delete_oldest_nonpersistent(self, user_id: int, bot: str, keep: int) -> None: ...
34
+ async def delete_oldest_nonpersistent(
35
+ self, user_id: int, bot: str, keep: int
36
+ ) -> None: ...
29
37
  async def delete_chunk(self, user_id: int, bot: str, chunk_id: UUID) -> None: ...
30
- async def delete_chunks(self, user_id: int, bot: str, chunk_ids: list[UUID]) -> None: ...
38
+ async def delete_chunks(
39
+ self, user_id: int, bot: str, chunk_ids: list[UUID]
40
+ ) -> None: ...
31
41
  async def delete_all(self, user_id: int, bot: str) -> None: ...
@@ -1,6 +1,6 @@
1
1
  import asyncio
2
2
  from datetime import UTC, datetime
3
- from typing import Any
3
+ from typing import Any, Tuple, List
4
4
  from uuid import UUID
5
5
 
6
6
  from lein_vector.bases.memory_manager_abc import MemoryManagerABC
@@ -77,13 +77,11 @@ class MemoryManagerQdrant(MemoryManagerABC):
77
77
  )
78
78
 
79
79
  async def _one(e):
80
- return await self.qdrant.search(
81
- e, q_filter, topk, score_threshold
82
- )
80
+ return await self.qdrant.search(e, q_filter, topk, score_threshold)
83
81
 
84
82
  return await asyncio.gather(*[_one(e) for e in embeddings])
85
83
 
86
- #ToDO: filter
84
+ # ToDO: filter
87
85
  async def retrieve_by_type(
88
86
  self, user_id: int, bot: str, chunk_type: str, topk: int = 3
89
87
  ) -> list[ChunkPayload]:
@@ -91,30 +89,34 @@ class MemoryManagerQdrant(MemoryManagerABC):
91
89
  filter_ = {"user_id": user_id, "bot": bot, "chunk_type": chunk_type}
92
90
  return await self.qdrant.get_all_chunks_with_filter(filter_)
93
91
 
94
- async def merge_old_chunks(self, user_id: int, bot: str, chunk_type: str, n: int = 5) -> None:
92
+ async def merge_old_chunks(
93
+ self,
94
+ user_id: int,
95
+ bot: str,
96
+ chunk_type: str,
97
+ n: int = 5,
98
+ ) -> Tuple[str | None, List[UUID]]:
99
+ """
100
+ Собрать n старых чанков и вернуть:
101
+ • merged_text — склеенные сообщения c ролями,
102
+ • used_ids — UUID этих чанков.
103
+
104
+ Если чанков меньше n — вернётся (None, []).
105
+ """
95
106
  chunks = await self.qdrant.get_n_oldest_chunks(user_id, bot, chunk_type, n)
96
107
  if len(chunks) < n:
97
- return
98
- merged_text = " | ".join([c.text for c in chunks])
99
- from datetime import datetime
100
- from uuid import uuid4
101
-
102
- # ToDo: LLM summary
103
- summary_chunk = Chunk(
104
- chunk_id=uuid4(),
105
- user_id=user_id,
106
- bot=bot,
107
- chunk_type=self._next_type(chunk_type),
108
- created_at=datetime.now(UTC),
109
- last_hit=datetime.now(UTC),
110
- hit_count=0,
111
- text=merged_text,
112
- persistent=False,
113
- summary_of=[c.chunk_id for c in chunks],
114
- )
115
- await self.upsert_chunk(user_id, bot, summary_chunk)
108
+ return None, []
109
+
110
+ def _ensure_role(txt: str, default_role: str = "gf") -> str:
111
+ # если строка уже начинается с 'role: ', оставляем как есть
112
+ if txt.split(":", 1)[0] in {"user", "gf", "assistant"}:
113
+ return txt
114
+ return f"{default_role}: {txt}"
116
115
 
117
- await self.delete_chunks(user_id, bot, [c.chunk_id for c in chunks])
116
+ merged_text = "\n".join(_ensure_role(c.text) for c in chunks)
117
+ used_ids = [c.chunk_id for c in chunks]
118
+
119
+ return merged_text, used_ids
118
120
 
119
121
  async def archive_user(self, user_id: int, bot: str) -> None:
120
122
  all_chunks = await self.qdrant.get_all_chunks(user_id, bot)
@@ -134,7 +136,9 @@ class MemoryManagerQdrant(MemoryManagerABC):
134
136
  async def delete_chunk(self, user_id: int, bot: str, chunk_id: UUID) -> None:
135
137
  await self.qdrant.delete(chunk_id)
136
138
 
137
- async def delete_chunks(self, user_id: int, bot: str, chunk_ids: list[UUID]) -> None:
139
+ async def delete_chunks(
140
+ self, user_id: int, bot: str, chunk_ids: list[UUID]
141
+ ) -> None:
138
142
  await self.qdrant.delete_batch(chunk_ids)
139
143
 
140
144
  async def delete_all(self, user_id: int, bot: str) -> None:
@@ -109,13 +109,21 @@ class QdrantAdapter:
109
109
  async def delete_collection(self) -> None:
110
110
  await self.client.delete_collection(collection_name=self.collection)
111
111
 
112
- async def get_all_chunks(self, user_id: int) -> list[ChunkPayload]:
112
+ async def get_all_chunks(self, user_id: int, bot: str) -> list[ChunkPayload]:
113
+ """
114
+ Вернуть ВСЕ чанки заданного пользователя и конкретного бота.
115
+ """
113
116
  q_filter = Filter(
114
- must=[FieldCondition(key="user_id", match=MatchValue(value=user_id))]
117
+ must=[
118
+ FieldCondition(key="user_id", match=MatchValue(value=user_id)),
119
+ FieldCondition(key="bot", match=MatchValue(value=bot)),
120
+ ]
115
121
  )
122
+
116
123
  scroll = await self.client.scroll(
117
124
  collection_name=self.collection,
118
125
  scroll_filter=q_filter,
119
126
  limit=2048,
120
127
  )
128
+
121
129
  return [ChunkPayload(**p.payload) for p in scroll[0]]
@@ -36,7 +36,7 @@ class ChunkPayload(BaseModel):
36
36
  source_chunk_id: UUID | None = None
37
37
  extra: dict | None = Field(default_factory=dict)
38
38
 
39
- def to_chunk(self, last_hit: datetime = None, hit_count: int = 0) -> Chunk:
39
+ def to_chunk(self, last_hit: datetime | None = None, hit_count: int = 0) -> Chunk:
40
40
  return Chunk(
41
41
  **self.model_dump(),
42
42
  last_hit=last_hit or datetime.now(UTC),
@@ -22,6 +22,7 @@ class EmbeddingProviderGemini(EmbeddingProviderABC):
22
22
  async def get_embedding(self, text: str) -> list[float]:
23
23
  # В Gemini SDK обычно нет async, значит — обёртка через run_in_executor:
24
24
  import asyncio
25
+
25
26
  if not isinstance(text, str):
26
27
  return
27
28
  loop = asyncio.get_running_loop()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: openvector_dev
3
- Version: 0.1.8
3
+ Version: 0.1.10
4
4
  Summary:
5
5
  Author: p00ler
6
6
  Author-email: liveitspain@gmail.com
@@ -0,0 +1,17 @@
1
+ lein_vector/__init__.py,sha256=ieXy65X4eVzUtSHNvlh00s4aMwKReafNni4s2q68z3Q,496
2
+ lein_vector/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ lein_vector/api/facade.py,sha256=Z5jHOoU4iYiJ3hGrhpOEJbJvx_grp2YLgmeQbuCrkTo,9725
4
+ lein_vector/bases/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ lein_vector/bases/embeding_provider_abc.py,sha256=WBpVC6ra-SYqhJeZs8R8U679wfGebXgwOOVPIii-IvY,265
6
+ lein_vector/bases/memory_manager_abc.py,sha256=NE13Lz-3aX3RjkRhsd5FsezPNk0xju34fFpO3Fa1n9A,1755
7
+ lein_vector/memory_manager_qdrant.py,sha256=jzb04gnESVRH62QSYhnwsl-HpS6Kci721rPA3_6LSDQ,6382
8
+ lein_vector/memory_manager_ram.py,sha256=lUATu6U-cZuRHnR6U1L8aJQB8FNK0Wi40vXsZ5fjDgI,6002
9
+ lein_vector/qdrant_adapter.py,sha256=CJsGvlDdTf4gGCFZIRzfYDeOT_7pzw8pf4VT_QIYDWg,4466
10
+ lein_vector/redis_short_term.py,sha256=uCcncjgXAlNAY46b3_Pjvrn_G0NEI16kRRR9hxayzak,3155
11
+ lein_vector/schemas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ lein_vector/schemas/chunk.py,sha256=qq2J7UM94AhKXKg_gnErac5GQF9DCoaEeEHyxZg_xuk,1202
13
+ lein_vector/sentence_transformer.py,sha256=SjjIa43Y7JxcU9VkpT2Ml-CVXb8tqmrbMutZN6O_I6s,1853
14
+ lein_vector/short_term.py,sha256=KFot9r26d9TZYLscVT42V0NwufIQUkx-hftOe2p7qLU,1933
15
+ openvector_dev-0.1.10.dist-info/METADATA,sha256=KxlvCA-i9l8m4M8e_6aVRWW8FFTiwo5UYSAhz6k3uwM,3168
16
+ openvector_dev-0.1.10.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
17
+ openvector_dev-0.1.10.dist-info/RECORD,,
@@ -1,17 +0,0 @@
1
- lein_vector/__init__.py,sha256=ieXy65X4eVzUtSHNvlh00s4aMwKReafNni4s2q68z3Q,496
2
- lein_vector/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- lein_vector/api/facade.py,sha256=URxGEad8QrhYY6QJTrz552mk_9z8TWi7RB1veykyD-4,7886
4
- lein_vector/bases/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- lein_vector/bases/embeding_provider_abc.py,sha256=WBpVC6ra-SYqhJeZs8R8U679wfGebXgwOOVPIii-IvY,265
6
- lein_vector/bases/memory_manager_abc.py,sha256=pWuIw_wYlvUlt40jbiE92lWzdYaowZKg3XK0Mq0tQrw,1687
7
- lein_vector/memory_manager_qdrant.py,sha256=DFhvtpDl3OxLSpCQiNXnKVSoqQ3475cSDcfE5wF8Njw,6175
8
- lein_vector/memory_manager_ram.py,sha256=lUATu6U-cZuRHnR6U1L8aJQB8FNK0Wi40vXsZ5fjDgI,6002
9
- lein_vector/qdrant_adapter.py,sha256=uPhYAp0KmYQhY3DF29JMO1JTbM__xDWGlnZZPor3VKI,4193
10
- lein_vector/redis_short_term.py,sha256=uCcncjgXAlNAY46b3_Pjvrn_G0NEI16kRRR9hxayzak,3155
11
- lein_vector/schemas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
- lein_vector/schemas/chunk.py,sha256=FSUwaFUSZk2ptoxupsdIfj5HYAMnHnu3eBfNpXCUMpM,1195
13
- lein_vector/sentence_transformer.py,sha256=gbWtkT01QFFA0IR_Ta-xsfBfb6owDzGnZDKY1g9fvBQ,1851
14
- lein_vector/short_term.py,sha256=KFot9r26d9TZYLscVT42V0NwufIQUkx-hftOe2p7qLU,1933
15
- openvector_dev-0.1.8.dist-info/METADATA,sha256=GOE9yKHgwxWZ5jmu8sOpqnz1XBcmKh0dSuP3zoGWOwQ,3167
16
- openvector_dev-0.1.8.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
17
- openvector_dev-0.1.8.dist-info/RECORD,,