loom-agent 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of loom-agent might be problematic. Click here for more details.
- loom/__init__.py +77 -0
- loom/agent.py +217 -0
- loom/agents/__init__.py +10 -0
- loom/agents/refs.py +28 -0
- loom/agents/registry.py +50 -0
- loom/builtin/compression/__init__.py +4 -0
- loom/builtin/compression/structured.py +79 -0
- loom/builtin/embeddings/__init__.py +9 -0
- loom/builtin/embeddings/openai_embedding.py +135 -0
- loom/builtin/embeddings/sentence_transformers_embedding.py +145 -0
- loom/builtin/llms/__init__.py +8 -0
- loom/builtin/llms/mock.py +34 -0
- loom/builtin/llms/openai.py +168 -0
- loom/builtin/llms/rule.py +102 -0
- loom/builtin/memory/__init__.py +5 -0
- loom/builtin/memory/in_memory.py +21 -0
- loom/builtin/memory/persistent_memory.py +278 -0
- loom/builtin/retriever/__init__.py +9 -0
- loom/builtin/retriever/chroma_store.py +265 -0
- loom/builtin/retriever/in_memory.py +106 -0
- loom/builtin/retriever/milvus_store.py +307 -0
- loom/builtin/retriever/pinecone_store.py +237 -0
- loom/builtin/retriever/qdrant_store.py +274 -0
- loom/builtin/retriever/vector_store.py +128 -0
- loom/builtin/retriever/vector_store_config.py +217 -0
- loom/builtin/tools/__init__.py +32 -0
- loom/builtin/tools/calculator.py +49 -0
- loom/builtin/tools/document_search.py +111 -0
- loom/builtin/tools/glob.py +27 -0
- loom/builtin/tools/grep.py +56 -0
- loom/builtin/tools/http_request.py +86 -0
- loom/builtin/tools/python_repl.py +73 -0
- loom/builtin/tools/read_file.py +32 -0
- loom/builtin/tools/task.py +158 -0
- loom/builtin/tools/web_search.py +64 -0
- loom/builtin/tools/write_file.py +31 -0
- loom/callbacks/base.py +9 -0
- loom/callbacks/logging.py +12 -0
- loom/callbacks/metrics.py +27 -0
- loom/callbacks/observability.py +248 -0
- loom/components/agent.py +107 -0
- loom/core/agent_executor.py +450 -0
- loom/core/circuit_breaker.py +178 -0
- loom/core/compression_manager.py +329 -0
- loom/core/context_retriever.py +185 -0
- loom/core/error_classifier.py +193 -0
- loom/core/errors.py +66 -0
- loom/core/message_queue.py +167 -0
- loom/core/permission_store.py +62 -0
- loom/core/permissions.py +69 -0
- loom/core/scheduler.py +125 -0
- loom/core/steering_control.py +47 -0
- loom/core/structured_logger.py +279 -0
- loom/core/subagent_pool.py +232 -0
- loom/core/system_prompt.py +141 -0
- loom/core/system_reminders.py +283 -0
- loom/core/tool_pipeline.py +113 -0
- loom/core/types.py +269 -0
- loom/interfaces/compressor.py +59 -0
- loom/interfaces/embedding.py +51 -0
- loom/interfaces/llm.py +33 -0
- loom/interfaces/memory.py +29 -0
- loom/interfaces/retriever.py +179 -0
- loom/interfaces/tool.py +27 -0
- loom/interfaces/vector_store.py +80 -0
- loom/llm/__init__.py +14 -0
- loom/llm/config.py +228 -0
- loom/llm/factory.py +111 -0
- loom/llm/model_health.py +235 -0
- loom/llm/model_pool_advanced.py +305 -0
- loom/llm/pool.py +170 -0
- loom/llm/registry.py +201 -0
- loom/mcp/__init__.py +4 -0
- loom/mcp/client.py +86 -0
- loom/mcp/registry.py +58 -0
- loom/mcp/tool_adapter.py +48 -0
- loom/observability/__init__.py +5 -0
- loom/patterns/__init__.py +5 -0
- loom/patterns/multi_agent.py +123 -0
- loom/patterns/rag.py +262 -0
- loom/plugins/registry.py +55 -0
- loom/resilience/__init__.py +5 -0
- loom/tooling.py +72 -0
- loom/utils/agent_loader.py +218 -0
- loom/utils/token_counter.py +19 -0
- loom_agent-0.0.1.dist-info/METADATA +457 -0
- loom_agent-0.0.1.dist-info/RECORD +89 -0
- loom_agent-0.0.1.dist-info/WHEEL +4 -0
- loom_agent-0.0.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
"""Milvus 向量存储适配器"""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
6
|
+
|
|
7
|
+
from loom.interfaces.retriever import Document
|
|
8
|
+
from loom.interfaces.vector_store import BaseVectorStore
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
from pymilvus import connections, Collection, CollectionSchema, FieldSchema, DataType, utility
|
|
12
|
+
MILVUS_AVAILABLE = True
|
|
13
|
+
except ImportError:
|
|
14
|
+
MILVUS_AVAILABLE = False
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class MilvusVectorStore(BaseVectorStore):
|
|
18
|
+
"""
|
|
19
|
+
Milvus 向量存储适配器
|
|
20
|
+
|
|
21
|
+
特点:
|
|
22
|
+
- ✅ 开源向量数据库
|
|
23
|
+
- ✅ 支持海量数据(10B+ 向量)
|
|
24
|
+
- ✅ 高性能检索
|
|
25
|
+
- ✅ 多种索引类型(IVF_FLAT, HNSW, etc.)
|
|
26
|
+
- ✅ 分布式架构
|
|
27
|
+
|
|
28
|
+
示例:
|
|
29
|
+
from loom.builtin.retriever.milvus_store import MilvusVectorStore
|
|
30
|
+
from loom.builtin.retriever.vector_store_config import MilvusConfig
|
|
31
|
+
|
|
32
|
+
# 本地 Milvus
|
|
33
|
+
config = MilvusConfig.create(
|
|
34
|
+
host="localhost",
|
|
35
|
+
port=19530,
|
|
36
|
+
collection_name="loom_docs"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# Zilliz Cloud (托管 Milvus)
|
|
40
|
+
config = MilvusConfig.create(
|
|
41
|
+
host="your-cluster.zillizcloud.com",
|
|
42
|
+
port=443,
|
|
43
|
+
user="username",
|
|
44
|
+
password="password",
|
|
45
|
+
secure=True
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
vector_store = MilvusVectorStore(config)
|
|
49
|
+
await vector_store.initialize()
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(self, config: Dict[str, Any] | Any):
|
|
53
|
+
"""
|
|
54
|
+
Parameters:
|
|
55
|
+
config: MilvusConfig 对象或配置字典
|
|
56
|
+
"""
|
|
57
|
+
if not MILVUS_AVAILABLE:
|
|
58
|
+
raise ImportError(
|
|
59
|
+
"Milvus client is not installed. "
|
|
60
|
+
"Install with: pip install pymilvus"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# 支持字典或 Pydantic 模型
|
|
64
|
+
if hasattr(config, "model_dump"):
|
|
65
|
+
self.config = config.model_dump()
|
|
66
|
+
else:
|
|
67
|
+
self.config = config
|
|
68
|
+
|
|
69
|
+
self.host = self.config.get("host", "localhost")
|
|
70
|
+
self.port = self.config.get("port", 19530)
|
|
71
|
+
self.collection_name = self.config.get("collection_name", "loom_documents")
|
|
72
|
+
self.dimension = self.config.get("dimension", 1536)
|
|
73
|
+
self.metric = self.config.get("metric", "cosine")
|
|
74
|
+
self.user = self.config.get("user")
|
|
75
|
+
self.password = self.config.get("password")
|
|
76
|
+
self.secure = self.config.get("secure", False)
|
|
77
|
+
self.index_type = self.config.get("index_type", "IVF_FLAT")
|
|
78
|
+
self.index_params = self.config.get("index_params", {})
|
|
79
|
+
|
|
80
|
+
self.collection: Optional[Collection] = None
|
|
81
|
+
self._initialized = False
|
|
82
|
+
self._connection_alias = "default"
|
|
83
|
+
|
|
84
|
+
async def initialize(self) -> None:
|
|
85
|
+
"""初始化 Milvus 连接和集合"""
|
|
86
|
+
if self._initialized:
|
|
87
|
+
return
|
|
88
|
+
|
|
89
|
+
# 连接到 Milvus
|
|
90
|
+
connections.connect(
|
|
91
|
+
alias=self._connection_alias,
|
|
92
|
+
host=self.host,
|
|
93
|
+
port=self.port,
|
|
94
|
+
user=self.user,
|
|
95
|
+
password=self.password,
|
|
96
|
+
secure=self.secure
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# 检查集合是否存在
|
|
100
|
+
if utility.has_collection(self.collection_name):
|
|
101
|
+
self.collection = Collection(self.collection_name)
|
|
102
|
+
else:
|
|
103
|
+
# 创建集合
|
|
104
|
+
self.collection = self._create_collection()
|
|
105
|
+
|
|
106
|
+
# 加载集合到内存
|
|
107
|
+
self.collection.load()
|
|
108
|
+
|
|
109
|
+
self._initialized = True
|
|
110
|
+
|
|
111
|
+
def _create_collection(self) -> Collection:
|
|
112
|
+
"""创建 Milvus 集合"""
|
|
113
|
+
# 定义 Schema
|
|
114
|
+
fields = [
|
|
115
|
+
FieldSchema(name="id", dtype=DataType.VARCHAR, is_primary=True, max_length=100),
|
|
116
|
+
FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=self.dimension),
|
|
117
|
+
FieldSchema(name="content", dtype=DataType.VARCHAR, max_length=65535),
|
|
118
|
+
FieldSchema(name="metadata", dtype=DataType.JSON),
|
|
119
|
+
]
|
|
120
|
+
|
|
121
|
+
schema = CollectionSchema(
|
|
122
|
+
fields=fields,
|
|
123
|
+
description="Loom document collection"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# 创建集合
|
|
127
|
+
collection = Collection(
|
|
128
|
+
name=self.collection_name,
|
|
129
|
+
schema=schema
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# 创建索引
|
|
133
|
+
metric_map = {
|
|
134
|
+
"cosine": "COSINE",
|
|
135
|
+
"euclidean": "L2",
|
|
136
|
+
"dot_product": "IP",
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
index_params = {
|
|
140
|
+
"metric_type": metric_map.get(self.metric, "COSINE"),
|
|
141
|
+
"index_type": self.index_type,
|
|
142
|
+
"params": self.index_params or {"nlist": 128}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
collection.create_index(
|
|
146
|
+
field_name="vector",
|
|
147
|
+
index_params=index_params
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
return collection
|
|
151
|
+
|
|
152
|
+
async def add_vectors(
|
|
153
|
+
self,
|
|
154
|
+
vectors: List[List[float]],
|
|
155
|
+
documents: List[Document]
|
|
156
|
+
) -> None:
|
|
157
|
+
"""
|
|
158
|
+
添加向量到 Milvus
|
|
159
|
+
|
|
160
|
+
Parameters:
|
|
161
|
+
vectors: 向量列表
|
|
162
|
+
documents: 对应的文档列表
|
|
163
|
+
"""
|
|
164
|
+
if not self._initialized:
|
|
165
|
+
await self.initialize()
|
|
166
|
+
|
|
167
|
+
# 构建数据
|
|
168
|
+
ids = []
|
|
169
|
+
contents = []
|
|
170
|
+
metadatas = []
|
|
171
|
+
|
|
172
|
+
for i, doc in enumerate(documents):
|
|
173
|
+
doc_id = doc.doc_id or f"doc_{i}"
|
|
174
|
+
ids.append(doc_id)
|
|
175
|
+
contents.append(doc.content)
|
|
176
|
+
|
|
177
|
+
# 构建 JSON 元数据
|
|
178
|
+
metadata = doc.metadata or {}
|
|
179
|
+
if doc.score is not None:
|
|
180
|
+
metadata["score"] = doc.score
|
|
181
|
+
metadatas.append(metadata)
|
|
182
|
+
|
|
183
|
+
# 插入数据
|
|
184
|
+
data = [
|
|
185
|
+
ids,
|
|
186
|
+
vectors,
|
|
187
|
+
contents,
|
|
188
|
+
metadatas
|
|
189
|
+
]
|
|
190
|
+
|
|
191
|
+
self.collection.insert(data)
|
|
192
|
+
self.collection.flush() # 确保数据持久化
|
|
193
|
+
|
|
194
|
+
async def search(
|
|
195
|
+
self,
|
|
196
|
+
query_vector: List[float],
|
|
197
|
+
top_k: int = 5,
|
|
198
|
+
filters: Optional[Dict[str, Any]] = None
|
|
199
|
+
) -> List[Tuple[Document, float]]:
|
|
200
|
+
"""
|
|
201
|
+
搜索相似向量
|
|
202
|
+
|
|
203
|
+
Parameters:
|
|
204
|
+
query_vector: 查询向量
|
|
205
|
+
top_k: 返回结果数量
|
|
206
|
+
filters: 元数据过滤条件
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
[(Document, score), ...] 列表
|
|
210
|
+
"""
|
|
211
|
+
if not self._initialized:
|
|
212
|
+
await self.initialize()
|
|
213
|
+
|
|
214
|
+
# 构建搜索参数
|
|
215
|
+
search_params = {
|
|
216
|
+
"metric_type": self.collection.indexes[0].params["metric_type"],
|
|
217
|
+
"params": {"nprobe": 10}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
# 构建过滤表达式
|
|
221
|
+
expr = None
|
|
222
|
+
if filters:
|
|
223
|
+
expr = self._build_milvus_filter(filters)
|
|
224
|
+
|
|
225
|
+
# 执行搜索
|
|
226
|
+
results = self.collection.search(
|
|
227
|
+
data=[query_vector],
|
|
228
|
+
anns_field="vector",
|
|
229
|
+
param=search_params,
|
|
230
|
+
limit=top_k,
|
|
231
|
+
expr=expr,
|
|
232
|
+
output_fields=["content", "metadata"]
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
# 转换结果
|
|
236
|
+
documents_with_scores = []
|
|
237
|
+
for hits in results:
|
|
238
|
+
for hit in hits:
|
|
239
|
+
content = hit.entity.get("content", "")
|
|
240
|
+
metadata = hit.entity.get("metadata", {})
|
|
241
|
+
|
|
242
|
+
doc = Document(
|
|
243
|
+
content=content,
|
|
244
|
+
metadata=metadata,
|
|
245
|
+
score=hit.score,
|
|
246
|
+
doc_id=hit.id
|
|
247
|
+
)
|
|
248
|
+
documents_with_scores.append((doc, hit.score))
|
|
249
|
+
|
|
250
|
+
return documents_with_scores
|
|
251
|
+
|
|
252
|
+
async def delete(self, doc_ids: List[str]) -> None:
|
|
253
|
+
"""
|
|
254
|
+
删除文档
|
|
255
|
+
|
|
256
|
+
Parameters:
|
|
257
|
+
doc_ids: 文档 ID 列表
|
|
258
|
+
"""
|
|
259
|
+
if not self._initialized:
|
|
260
|
+
await self.initialize()
|
|
261
|
+
|
|
262
|
+
# 构建删除表达式
|
|
263
|
+
ids_str = ", ".join([f'"{doc_id}"' for doc_id in doc_ids])
|
|
264
|
+
expr = f"id in [{ids_str}]"
|
|
265
|
+
|
|
266
|
+
self.collection.delete(expr)
|
|
267
|
+
|
|
268
|
+
async def clear(self) -> None:
|
|
269
|
+
"""清空集合"""
|
|
270
|
+
if not self._initialized:
|
|
271
|
+
await self.initialize()
|
|
272
|
+
|
|
273
|
+
# 删除并重建集合
|
|
274
|
+
self.collection.drop()
|
|
275
|
+
self.collection = self._create_collection()
|
|
276
|
+
self.collection.load()
|
|
277
|
+
|
|
278
|
+
def _build_milvus_filter(self, filters: Dict[str, Any]) -> str:
|
|
279
|
+
"""
|
|
280
|
+
构建 Milvus 过滤表达式
|
|
281
|
+
|
|
282
|
+
示例:
|
|
283
|
+
{"category": "python", "price": 100}
|
|
284
|
+
→
|
|
285
|
+
'metadata["category"] == "python" and metadata["price"] == 100'
|
|
286
|
+
|
|
287
|
+
注意: Milvus 使用 JSON 字段查询语法
|
|
288
|
+
"""
|
|
289
|
+
conditions = []
|
|
290
|
+
|
|
291
|
+
for key, value in filters.items():
|
|
292
|
+
if isinstance(value, str):
|
|
293
|
+
conditions.append(f'metadata["{key}"] == "{value}"')
|
|
294
|
+
elif isinstance(value, (int, float)):
|
|
295
|
+
conditions.append(f'metadata["{key}"] == {value}')
|
|
296
|
+
elif isinstance(value, bool):
|
|
297
|
+
val_str = "true" if value else "false"
|
|
298
|
+
conditions.append(f'metadata["{key}"] == {val_str}')
|
|
299
|
+
|
|
300
|
+
return " and ".join(conditions) if conditions else None
|
|
301
|
+
|
|
302
|
+
async def close(self) -> None:
|
|
303
|
+
"""关闭连接"""
|
|
304
|
+
if self.collection:
|
|
305
|
+
self.collection.release()
|
|
306
|
+
connections.disconnect(alias=self._connection_alias)
|
|
307
|
+
self._initialized = False
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
"""Pinecone 向量存储适配器"""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
6
|
+
|
|
7
|
+
from loom.interfaces.retriever import Document
|
|
8
|
+
from loom.interfaces.vector_store import BaseVectorStore
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
import pinecone
|
|
12
|
+
from pinecone import Pinecone, ServerlessSpec
|
|
13
|
+
PINECONE_AVAILABLE = True
|
|
14
|
+
except ImportError:
|
|
15
|
+
PINECONE_AVAILABLE = False
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class PineconeVectorStore(BaseVectorStore):
|
|
19
|
+
"""
|
|
20
|
+
Pinecone 向量存储适配器
|
|
21
|
+
|
|
22
|
+
特点:
|
|
23
|
+
- ✅ 云原生向量数据库
|
|
24
|
+
- ✅ 自动扩展
|
|
25
|
+
- ✅ 低延迟查询
|
|
26
|
+
- ✅ 支持元数据过滤
|
|
27
|
+
|
|
28
|
+
示例:
|
|
29
|
+
from loom.builtin.retriever.pinecone_store import PineconeVectorStore
|
|
30
|
+
from loom.builtin.retriever.vector_store_config import PineconeConfig
|
|
31
|
+
|
|
32
|
+
config = PineconeConfig.create(
|
|
33
|
+
api_key="your-api-key",
|
|
34
|
+
environment="us-west1-gcp",
|
|
35
|
+
index_name="loom-docs"
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
vector_store = PineconeVectorStore(config)
|
|
39
|
+
await vector_store.initialize()
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(self, config: Dict[str, Any] | Any):
|
|
43
|
+
"""
|
|
44
|
+
Parameters:
|
|
45
|
+
config: PineconeConfig 对象或配置字典
|
|
46
|
+
"""
|
|
47
|
+
if not PINECONE_AVAILABLE:
|
|
48
|
+
raise ImportError(
|
|
49
|
+
"Pinecone is not installed. "
|
|
50
|
+
"Install with: pip install pinecone-client"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# 支持字典或 Pydantic 模型
|
|
54
|
+
if hasattr(config, "model_dump"):
|
|
55
|
+
self.config = config.model_dump()
|
|
56
|
+
else:
|
|
57
|
+
self.config = config
|
|
58
|
+
|
|
59
|
+
self.api_key = self.config["api_key"]
|
|
60
|
+
self.index_name = self.config.get("index_name", self.config.get("collection_name"))
|
|
61
|
+
self.namespace = self.config.get("namespace")
|
|
62
|
+
self.dimension = self.config.get("dimension", 1536)
|
|
63
|
+
self.metric = self.config.get("metric", "cosine")
|
|
64
|
+
|
|
65
|
+
self.pc: Optional[Pinecone] = None
|
|
66
|
+
self.index = None
|
|
67
|
+
self._initialized = False
|
|
68
|
+
|
|
69
|
+
async def initialize(self) -> None:
|
|
70
|
+
"""初始化 Pinecone 连接和索引"""
|
|
71
|
+
if self._initialized:
|
|
72
|
+
return
|
|
73
|
+
|
|
74
|
+
# 初始化 Pinecone 客户端
|
|
75
|
+
self.pc = Pinecone(api_key=self.api_key)
|
|
76
|
+
|
|
77
|
+
# 检查索引是否存在
|
|
78
|
+
existing_indexes = [idx.name for idx in self.pc.list_indexes()]
|
|
79
|
+
|
|
80
|
+
if self.index_name not in existing_indexes:
|
|
81
|
+
# 创建索引(Serverless 规格)
|
|
82
|
+
self.pc.create_index(
|
|
83
|
+
name=self.index_name,
|
|
84
|
+
dimension=self.dimension,
|
|
85
|
+
metric=self.metric,
|
|
86
|
+
spec=ServerlessSpec(
|
|
87
|
+
cloud=self.config.get("cloud", "aws"),
|
|
88
|
+
region=self.config.get("region", "us-west-2")
|
|
89
|
+
)
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# 连接到索引
|
|
93
|
+
self.index = self.pc.Index(self.index_name)
|
|
94
|
+
self._initialized = True
|
|
95
|
+
|
|
96
|
+
async def add_vectors(
|
|
97
|
+
self,
|
|
98
|
+
vectors: List[List[float]],
|
|
99
|
+
documents: List[Document]
|
|
100
|
+
) -> None:
|
|
101
|
+
"""
|
|
102
|
+
添加向量到 Pinecone
|
|
103
|
+
|
|
104
|
+
Parameters:
|
|
105
|
+
vectors: 向量列表
|
|
106
|
+
documents: 对应的文档列表
|
|
107
|
+
"""
|
|
108
|
+
if not self._initialized:
|
|
109
|
+
await self.initialize()
|
|
110
|
+
|
|
111
|
+
# 构建 Pinecone 向量格式
|
|
112
|
+
vectors_to_upsert = []
|
|
113
|
+
for i, (vector, doc) in enumerate(zip(vectors, documents)):
|
|
114
|
+
# 生成或使用文档 ID
|
|
115
|
+
doc_id = doc.doc_id or f"doc_{i}"
|
|
116
|
+
|
|
117
|
+
# 构建元数据
|
|
118
|
+
metadata = {
|
|
119
|
+
"content": doc.content,
|
|
120
|
+
**(doc.metadata or {})
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
vectors_to_upsert.append({
|
|
124
|
+
"id": doc_id,
|
|
125
|
+
"values": vector,
|
|
126
|
+
"metadata": metadata
|
|
127
|
+
})
|
|
128
|
+
|
|
129
|
+
# 批量上传(Pinecone 推荐批量大小 100)
|
|
130
|
+
batch_size = 100
|
|
131
|
+
for i in range(0, len(vectors_to_upsert), batch_size):
|
|
132
|
+
batch = vectors_to_upsert[i:i + batch_size]
|
|
133
|
+
self.index.upsert(
|
|
134
|
+
vectors=batch,
|
|
135
|
+
namespace=self.namespace
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
async def search(
|
|
139
|
+
self,
|
|
140
|
+
query_vector: List[float],
|
|
141
|
+
top_k: int = 5,
|
|
142
|
+
filters: Optional[Dict[str, Any]] = None
|
|
143
|
+
) -> List[Tuple[Document, float]]:
|
|
144
|
+
"""
|
|
145
|
+
搜索相似向量
|
|
146
|
+
|
|
147
|
+
Parameters:
|
|
148
|
+
query_vector: 查询向量
|
|
149
|
+
top_k: 返回结果数量
|
|
150
|
+
filters: 元数据过滤条件
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
[(Document, score), ...] 列表
|
|
154
|
+
"""
|
|
155
|
+
if not self._initialized:
|
|
156
|
+
await self.initialize()
|
|
157
|
+
|
|
158
|
+
# 构建过滤器(Pinecone 格式)
|
|
159
|
+
pinecone_filter = None
|
|
160
|
+
if filters:
|
|
161
|
+
pinecone_filter = self._build_pinecone_filter(filters)
|
|
162
|
+
|
|
163
|
+
# 执行查询
|
|
164
|
+
results = self.index.query(
|
|
165
|
+
vector=query_vector,
|
|
166
|
+
top_k=top_k,
|
|
167
|
+
include_metadata=True,
|
|
168
|
+
namespace=self.namespace,
|
|
169
|
+
filter=pinecone_filter
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# 转换结果
|
|
173
|
+
documents_with_scores = []
|
|
174
|
+
for match in results.matches:
|
|
175
|
+
# 提取内容和元数据
|
|
176
|
+
content = match.metadata.pop("content", "")
|
|
177
|
+
metadata = match.metadata
|
|
178
|
+
|
|
179
|
+
doc = Document(
|
|
180
|
+
content=content,
|
|
181
|
+
metadata=metadata,
|
|
182
|
+
score=match.score,
|
|
183
|
+
doc_id=match.id
|
|
184
|
+
)
|
|
185
|
+
documents_with_scores.append((doc, match.score))
|
|
186
|
+
|
|
187
|
+
return documents_with_scores
|
|
188
|
+
|
|
189
|
+
async def delete(self, doc_ids: List[str]) -> None:
|
|
190
|
+
"""
|
|
191
|
+
删除文档
|
|
192
|
+
|
|
193
|
+
Parameters:
|
|
194
|
+
doc_ids: 文档 ID 列表
|
|
195
|
+
"""
|
|
196
|
+
if not self._initialized:
|
|
197
|
+
await self.initialize()
|
|
198
|
+
|
|
199
|
+
self.index.delete(
|
|
200
|
+
ids=doc_ids,
|
|
201
|
+
namespace=self.namespace
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
async def clear(self) -> None:
|
|
205
|
+
"""清空索引"""
|
|
206
|
+
if not self._initialized:
|
|
207
|
+
await self.initialize()
|
|
208
|
+
|
|
209
|
+
# Pinecone 需要删除并重建索引来清空
|
|
210
|
+
self.index.delete(delete_all=True, namespace=self.namespace)
|
|
211
|
+
|
|
212
|
+
def _build_pinecone_filter(self, filters: Dict[str, Any]) -> Dict[str, Any]:
|
|
213
|
+
"""
|
|
214
|
+
构建 Pinecone 元数据过滤器
|
|
215
|
+
|
|
216
|
+
Pinecone 过滤语法:
|
|
217
|
+
{
|
|
218
|
+
"field": {"$eq": "value"},
|
|
219
|
+
"numeric_field": {"$gte": 10}
|
|
220
|
+
}
|
|
221
|
+
"""
|
|
222
|
+
pinecone_filter = {}
|
|
223
|
+
|
|
224
|
+
for key, value in filters.items():
|
|
225
|
+
if isinstance(value, dict):
|
|
226
|
+
# 支持复杂查询(例如 {"price": {"$gte": 100}})
|
|
227
|
+
pinecone_filter[key] = value
|
|
228
|
+
else:
|
|
229
|
+
# 简单相等查询
|
|
230
|
+
pinecone_filter[key] = {"$eq": value}
|
|
231
|
+
|
|
232
|
+
return pinecone_filter
|
|
233
|
+
|
|
234
|
+
async def close(self) -> None:
|
|
235
|
+
"""关闭连接"""
|
|
236
|
+
# Pinecone 客户端自动管理连接
|
|
237
|
+
self._initialized = False
|