loom-agent 0.0.4__py3-none-any.whl → 0.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of loom-agent might be problematic. Click here for more details.
- loom/api/__init__.py +19 -0
- loom/api/v0_0_3.py +1 -0
- loom/builtin/retriever/faiss_store.py +403 -0
- loom/core/agent_executor.py +212 -26
- loom/core/events.py +3 -0
- loom/core/recursion_control.py +298 -0
- loom/core/turn_state.py +58 -6
- loom/retrieval/__init__.py +61 -0
- loom/retrieval/domain_adapter.py +195 -0
- loom/retrieval/embedding_retriever.py +393 -0
- loom_agent-0.0.5.dist-info/METADATA +561 -0
- {loom_agent-0.0.4.dist-info → loom_agent-0.0.5.dist-info}/RECORD +14 -8
- loom_agent-0.0.4.dist-info/METADATA +0 -292
- {loom_agent-0.0.4.dist-info → loom_agent-0.0.5.dist-info}/WHEEL +0 -0
- {loom_agent-0.0.4.dist-info → loom_agent-0.0.5.dist-info}/licenses/LICENSE +0 -0
loom/api/__init__.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Loom Unified API (Version-agnostic)
|
|
2
|
+
|
|
3
|
+
This module provides the stable, version-agnostic API for Loom Agent.
|
|
4
|
+
Users should import from here instead of versioned modules.
|
|
5
|
+
|
|
6
|
+
Example:
|
|
7
|
+
from loom.api import loom_agent
|
|
8
|
+
from loom.builtin.llms import OpenAILLM
|
|
9
|
+
|
|
10
|
+
agent = loom_agent(llm=OpenAILLM(model="gpt-4"), tools={})
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from .v0_0_3 import LoomAgent, loom_agent, unified_executor
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"LoomAgent",
|
|
17
|
+
"loom_agent",
|
|
18
|
+
"unified_executor",
|
|
19
|
+
]
|
loom/api/v0_0_3.py
CHANGED
|
@@ -0,0 +1,403 @@
|
|
|
1
|
+
"""
|
|
2
|
+
FAISS Vector Store
|
|
3
|
+
|
|
4
|
+
Lightweight, in-memory vector storage using FAISS.
|
|
5
|
+
Ideal for development and small to medium scale deployments.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
12
|
+
|
|
13
|
+
from loom.interfaces.vector_store import BaseVectorStore
|
|
14
|
+
from loom.interfaces.retriever import Document
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class FAISSVectorStore(BaseVectorStore):
|
|
20
|
+
"""
|
|
21
|
+
FAISS-based vector storage
|
|
22
|
+
|
|
23
|
+
Lightweight, in-memory vector database using Facebook's FAISS library.
|
|
24
|
+
Ideal for:
|
|
25
|
+
- Development and testing
|
|
26
|
+
- Small to medium scale deployments (< 1M documents)
|
|
27
|
+
- Applications without persistence requirements
|
|
28
|
+
|
|
29
|
+
Features:
|
|
30
|
+
- Fast similarity search
|
|
31
|
+
- Multiple index types (Flat, IVF, HNSW)
|
|
32
|
+
- In-memory storage
|
|
33
|
+
- Optional persistence
|
|
34
|
+
|
|
35
|
+
Example:
|
|
36
|
+
# Basic usage
|
|
37
|
+
store = FAISSVectorStore(dimension=1536)
|
|
38
|
+
await store.initialize()
|
|
39
|
+
|
|
40
|
+
# Add documents
|
|
41
|
+
await store.add_documents(
|
|
42
|
+
documents=[doc1, doc2],
|
|
43
|
+
embeddings=[[0.1, ...], [0.2, ...]]
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Search
|
|
47
|
+
results = await store.search(
|
|
48
|
+
query_embedding=[0.15, ...],
|
|
49
|
+
top_k=5
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
# Advanced: Use IVF index for larger datasets
|
|
53
|
+
store = FAISSVectorStore(
|
|
54
|
+
dimension=1536,
|
|
55
|
+
index_type="IVF",
|
|
56
|
+
nlist=100 # Number of clusters
|
|
57
|
+
)
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def __init__(
|
|
61
|
+
self,
|
|
62
|
+
dimension: int,
|
|
63
|
+
index_type: str = "Flat",
|
|
64
|
+
metric: str = "L2",
|
|
65
|
+
nlist: int = 100,
|
|
66
|
+
nprobe: int = 10
|
|
67
|
+
):
|
|
68
|
+
"""
|
|
69
|
+
Args:
|
|
70
|
+
dimension: Embedding dimension
|
|
71
|
+
index_type: Index type ("Flat", "IVF", "HNSW")
|
|
72
|
+
metric: Distance metric ("L2" or "IP" for inner product)
|
|
73
|
+
nlist: Number of clusters for IVF index
|
|
74
|
+
nprobe: Number of clusters to search in IVF
|
|
75
|
+
"""
|
|
76
|
+
self.dimension = dimension
|
|
77
|
+
self.index_type = index_type
|
|
78
|
+
self.metric = metric
|
|
79
|
+
self.nlist = nlist
|
|
80
|
+
self.nprobe = nprobe
|
|
81
|
+
|
|
82
|
+
# FAISS index
|
|
83
|
+
self.index = None
|
|
84
|
+
|
|
85
|
+
# Document storage
|
|
86
|
+
self.documents: Dict[str, Document] = {}
|
|
87
|
+
self.id_to_index: Dict[str, int] = {}
|
|
88
|
+
self.index_to_id: Dict[int, str] = {}
|
|
89
|
+
|
|
90
|
+
self._initialized = False
|
|
91
|
+
|
|
92
|
+
async def initialize(self) -> None:
|
|
93
|
+
"""Initialize FAISS index"""
|
|
94
|
+
if self._initialized:
|
|
95
|
+
return
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
import faiss
|
|
99
|
+
except ImportError:
|
|
100
|
+
raise ImportError(
|
|
101
|
+
"FAISS is required for FAISSVectorStore. "
|
|
102
|
+
"Install it with: pip install faiss-cpu or pip install faiss-gpu"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Create index based on type
|
|
106
|
+
if self.index_type == "Flat":
|
|
107
|
+
if self.metric == "L2":
|
|
108
|
+
self.index = faiss.IndexFlatL2(self.dimension)
|
|
109
|
+
else: # IP (Inner Product)
|
|
110
|
+
self.index = faiss.IndexFlatIP(self.dimension)
|
|
111
|
+
|
|
112
|
+
elif self.index_type == "IVF":
|
|
113
|
+
if self.metric == "L2":
|
|
114
|
+
quantizer = faiss.IndexFlatL2(self.dimension)
|
|
115
|
+
self.index = faiss.IndexIVFFlat(
|
|
116
|
+
quantizer,
|
|
117
|
+
self.dimension,
|
|
118
|
+
self.nlist
|
|
119
|
+
)
|
|
120
|
+
else:
|
|
121
|
+
quantizer = faiss.IndexFlatIP(self.dimension)
|
|
122
|
+
self.index = faiss.IndexIVFFlat(
|
|
123
|
+
quantizer,
|
|
124
|
+
self.dimension,
|
|
125
|
+
self.nlist
|
|
126
|
+
)
|
|
127
|
+
# IVF needs training (will be done when first batch is added)
|
|
128
|
+
self.index.nprobe = self.nprobe
|
|
129
|
+
|
|
130
|
+
elif self.index_type == "HNSW":
|
|
131
|
+
self.index = faiss.IndexHNSWFlat(self.dimension, 32)
|
|
132
|
+
|
|
133
|
+
else:
|
|
134
|
+
raise ValueError(f"Unknown index type: {self.index_type}")
|
|
135
|
+
|
|
136
|
+
self._initialized = True
|
|
137
|
+
logger.info(f"FAISS index initialized: type={self.index_type}, dimension={self.dimension}")
|
|
138
|
+
|
|
139
|
+
async def add_documents(
|
|
140
|
+
self,
|
|
141
|
+
documents: List[Document],
|
|
142
|
+
embeddings: List[List[float]]
|
|
143
|
+
) -> None:
|
|
144
|
+
"""
|
|
145
|
+
Add documents with their embeddings
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
documents: List of documents
|
|
149
|
+
embeddings: List of embedding vectors
|
|
150
|
+
"""
|
|
151
|
+
if not self._initialized:
|
|
152
|
+
await self.initialize()
|
|
153
|
+
|
|
154
|
+
if len(documents) != len(embeddings):
|
|
155
|
+
raise ValueError("Number of documents must match number of embeddings")
|
|
156
|
+
|
|
157
|
+
import numpy as np
|
|
158
|
+
|
|
159
|
+
# Convert embeddings to numpy array
|
|
160
|
+
embeddings_array = np.array(embeddings, dtype=np.float32)
|
|
161
|
+
|
|
162
|
+
# Train IVF index if needed
|
|
163
|
+
if self.index_type == "IVF" and not self.index.is_trained:
|
|
164
|
+
logger.info(f"Training IVF index with {len(embeddings)} vectors")
|
|
165
|
+
self.index.train(embeddings_array)
|
|
166
|
+
|
|
167
|
+
# Get current index size
|
|
168
|
+
start_index = len(self.id_to_index)
|
|
169
|
+
|
|
170
|
+
# Add to FAISS
|
|
171
|
+
self.index.add(embeddings_array)
|
|
172
|
+
|
|
173
|
+
# Store documents and mappings
|
|
174
|
+
for i, doc in enumerate(documents):
|
|
175
|
+
index = start_index + i
|
|
176
|
+
self.documents[doc.doc_id] = doc
|
|
177
|
+
self.id_to_index[doc.doc_id] = index
|
|
178
|
+
self.index_to_id[index] = doc.doc_id
|
|
179
|
+
|
|
180
|
+
logger.debug(f"Added {len(documents)} documents. Total: {len(self.documents)}")
|
|
181
|
+
|
|
182
|
+
async def search(
|
|
183
|
+
self,
|
|
184
|
+
query_embedding: List[float],
|
|
185
|
+
top_k: int = 5,
|
|
186
|
+
filters: Optional[Dict[str, Any]] = None
|
|
187
|
+
) -> List[Document]:
|
|
188
|
+
"""
|
|
189
|
+
Search for similar documents
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
query_embedding: Query embedding vector
|
|
193
|
+
top_k: Number of results to return
|
|
194
|
+
filters: Metadata filters (applied post-search)
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
List of documents with similarity scores
|
|
198
|
+
"""
|
|
199
|
+
if not self._initialized:
|
|
200
|
+
await self.initialize()
|
|
201
|
+
|
|
202
|
+
if self.index.ntotal == 0:
|
|
203
|
+
logger.warning("No documents in index")
|
|
204
|
+
return []
|
|
205
|
+
|
|
206
|
+
import numpy as np
|
|
207
|
+
|
|
208
|
+
# Convert query to numpy array
|
|
209
|
+
query_array = np.array([query_embedding], dtype=np.float32)
|
|
210
|
+
|
|
211
|
+
# Search
|
|
212
|
+
# Get more results if we need to filter
|
|
213
|
+
search_k = top_k * 3 if filters else top_k
|
|
214
|
+
distances, indices = self.index.search(query_array, search_k)
|
|
215
|
+
|
|
216
|
+
# Convert results to documents
|
|
217
|
+
results = []
|
|
218
|
+
for i, idx in enumerate(indices[0]):
|
|
219
|
+
if idx == -1: # FAISS returns -1 for missing results
|
|
220
|
+
break
|
|
221
|
+
|
|
222
|
+
# Get document
|
|
223
|
+
doc_id = self.index_to_id[idx]
|
|
224
|
+
doc = self.documents[doc_id]
|
|
225
|
+
|
|
226
|
+
# Apply filters
|
|
227
|
+
if filters and not self._match_filters(doc, filters):
|
|
228
|
+
continue
|
|
229
|
+
|
|
230
|
+
# Calculate similarity score
|
|
231
|
+
distance = distances[0][i]
|
|
232
|
+
score = self._distance_to_score(distance)
|
|
233
|
+
|
|
234
|
+
# Create result document with score
|
|
235
|
+
result_doc = Document(
|
|
236
|
+
doc_id=doc.doc_id,
|
|
237
|
+
content=doc.content,
|
|
238
|
+
score=score,
|
|
239
|
+
metadata=doc.metadata
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
results.append(result_doc)
|
|
243
|
+
|
|
244
|
+
if len(results) >= top_k:
|
|
245
|
+
break
|
|
246
|
+
|
|
247
|
+
return results
|
|
248
|
+
|
|
249
|
+
async def get_document(self, doc_id: str) -> Optional[Document]:
|
|
250
|
+
"""
|
|
251
|
+
Get document by ID
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
doc_id: Document identifier
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
Document if found, None otherwise
|
|
258
|
+
"""
|
|
259
|
+
return self.documents.get(doc_id)
|
|
260
|
+
|
|
261
|
+
async def delete(self, doc_ids: List[str]) -> None:
|
|
262
|
+
"""
|
|
263
|
+
Delete documents
|
|
264
|
+
|
|
265
|
+
Note: FAISS doesn't support efficient deletion.
|
|
266
|
+
This implementation removes from metadata but not from index.
|
|
267
|
+
For true deletion, rebuild the index.
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
doc_ids: List of document IDs to delete
|
|
271
|
+
"""
|
|
272
|
+
for doc_id in doc_ids:
|
|
273
|
+
if doc_id in self.documents:
|
|
274
|
+
del self.documents[doc_id]
|
|
275
|
+
if doc_id in self.id_to_index:
|
|
276
|
+
index = self.id_to_index[doc_id]
|
|
277
|
+
del self.id_to_index[doc_id]
|
|
278
|
+
del self.index_to_id[index]
|
|
279
|
+
|
|
280
|
+
logger.warning(
|
|
281
|
+
f"Deleted {len(doc_ids)} documents from metadata. "
|
|
282
|
+
"Note: FAISS index still contains vectors. Rebuild index for full deletion."
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
def _match_filters(self, doc: Document, filters: Dict[str, Any]) -> bool:
|
|
286
|
+
"""Check if document matches metadata filters"""
|
|
287
|
+
if not doc.metadata:
|
|
288
|
+
return False
|
|
289
|
+
|
|
290
|
+
for key, value in filters.items():
|
|
291
|
+
if doc.metadata.get(key) != value:
|
|
292
|
+
return False
|
|
293
|
+
|
|
294
|
+
return True
|
|
295
|
+
|
|
296
|
+
def _distance_to_score(self, distance: float) -> float:
|
|
297
|
+
"""
|
|
298
|
+
Convert distance to similarity score
|
|
299
|
+
|
|
300
|
+
Args:
|
|
301
|
+
distance: Distance from FAISS (L2 or IP)
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
Similarity score (0-1, higher is better)
|
|
305
|
+
"""
|
|
306
|
+
if self.metric == "L2":
|
|
307
|
+
# L2 distance: lower is better
|
|
308
|
+
# Convert to similarity: 1 / (1 + distance)
|
|
309
|
+
return 1.0 / (1.0 + distance)
|
|
310
|
+
else:
|
|
311
|
+
# Inner product: higher is better
|
|
312
|
+
# Assuming normalized vectors, IP is in [-1, 1]
|
|
313
|
+
# Convert to [0, 1]
|
|
314
|
+
return (distance + 1.0) / 2.0
|
|
315
|
+
|
|
316
|
+
async def persist(self, path: str) -> None:
|
|
317
|
+
"""
|
|
318
|
+
Save index to disk
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
path: File path to save index
|
|
322
|
+
"""
|
|
323
|
+
if not self._initialized:
|
|
324
|
+
raise RuntimeError("Index not initialized")
|
|
325
|
+
|
|
326
|
+
import faiss
|
|
327
|
+
import pickle
|
|
328
|
+
|
|
329
|
+
# Save FAISS index
|
|
330
|
+
faiss.write_index(self.index, f"{path}.index")
|
|
331
|
+
|
|
332
|
+
# Save metadata
|
|
333
|
+
metadata = {
|
|
334
|
+
"documents": self.documents,
|
|
335
|
+
"id_to_index": self.id_to_index,
|
|
336
|
+
"index_to_id": self.index_to_id,
|
|
337
|
+
"dimension": self.dimension,
|
|
338
|
+
"index_type": self.index_type,
|
|
339
|
+
"metric": self.metric,
|
|
340
|
+
"nlist": self.nlist,
|
|
341
|
+
"nprobe": self.nprobe
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
with open(f"{path}.metadata", "wb") as f:
|
|
345
|
+
pickle.dump(metadata, f)
|
|
346
|
+
|
|
347
|
+
logger.info(f"Index persisted to {path}")
|
|
348
|
+
|
|
349
|
+
@classmethod
|
|
350
|
+
async def load(cls, path: str) -> "FAISSVectorStore":
|
|
351
|
+
"""
|
|
352
|
+
Load index from disk
|
|
353
|
+
|
|
354
|
+
Args:
|
|
355
|
+
path: File path to load index from
|
|
356
|
+
|
|
357
|
+
Returns:
|
|
358
|
+
FAISSVectorStore instance
|
|
359
|
+
"""
|
|
360
|
+
import faiss
|
|
361
|
+
import pickle
|
|
362
|
+
|
|
363
|
+
# Load metadata
|
|
364
|
+
with open(f"{path}.metadata", "rb") as f:
|
|
365
|
+
metadata = pickle.load(f)
|
|
366
|
+
|
|
367
|
+
# Create instance
|
|
368
|
+
instance = cls(
|
|
369
|
+
dimension=metadata["dimension"],
|
|
370
|
+
index_type=metadata["index_type"],
|
|
371
|
+
metric=metadata["metric"],
|
|
372
|
+
nlist=metadata["nlist"],
|
|
373
|
+
nprobe=metadata["nprobe"]
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
# Load FAISS index
|
|
377
|
+
instance.index = faiss.read_index(f"{path}.index")
|
|
378
|
+
instance._initialized = True
|
|
379
|
+
|
|
380
|
+
# Load metadata
|
|
381
|
+
instance.documents = metadata["documents"]
|
|
382
|
+
instance.id_to_index = metadata["id_to_index"]
|
|
383
|
+
instance.index_to_id = metadata["index_to_id"]
|
|
384
|
+
|
|
385
|
+
logger.info(f"Index loaded from {path}")
|
|
386
|
+
|
|
387
|
+
return instance
|
|
388
|
+
|
|
389
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
390
|
+
"""
|
|
391
|
+
Get statistics
|
|
392
|
+
|
|
393
|
+
Returns:
|
|
394
|
+
Statistics dictionary
|
|
395
|
+
"""
|
|
396
|
+
return {
|
|
397
|
+
"initialized": self._initialized,
|
|
398
|
+
"total_documents": len(self.documents),
|
|
399
|
+
"index_size": self.index.ntotal if self.index else 0,
|
|
400
|
+
"dimension": self.dimension,
|
|
401
|
+
"index_type": self.index_type,
|
|
402
|
+
"metric": self.metric
|
|
403
|
+
}
|