loom-agent 0.0.4__py3-none-any.whl → 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of loom-agent might be problematic. Click here for more details.

loom/api/__init__.py ADDED
@@ -0,0 +1,19 @@
1
+ """Loom Unified API (Version-agnostic)
2
+
3
+ This module provides the stable, version-agnostic API for Loom Agent.
4
+ Users should import from here instead of versioned modules.
5
+
6
+ Example:
7
+ from loom.api import loom_agent
8
+ from loom.builtin.llms import OpenAILLM
9
+
10
+ agent = loom_agent(llm=OpenAILLM(model="gpt-4"), tools={})
11
+ """
12
+
13
+ from .v0_0_3 import LoomAgent, loom_agent, unified_executor
14
+
15
+ __all__ = [
16
+ "LoomAgent",
17
+ "loom_agent",
18
+ "unified_executor",
19
+ ]
loom/api/v0_0_3.py CHANGED
@@ -297,3 +297,4 @@ __all__ = [
297
297
  "loom_agent",
298
298
  "unified_executor",
299
299
  ]
300
+
@@ -0,0 +1,403 @@
1
+ """
2
+ FAISS Vector Store
3
+
4
+ Lightweight, in-memory vector storage using FAISS.
5
+ Ideal for development and small to medium scale deployments.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ from typing import Any, Dict, List, Optional, Tuple
12
+
13
+ from loom.interfaces.vector_store import BaseVectorStore
14
+ from loom.interfaces.retriever import Document
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class FAISSVectorStore(BaseVectorStore):
20
+ """
21
+ FAISS-based vector storage
22
+
23
+ Lightweight, in-memory vector database using Facebook's FAISS library.
24
+ Ideal for:
25
+ - Development and testing
26
+ - Small to medium scale deployments (< 1M documents)
27
+ - Applications without persistence requirements
28
+
29
+ Features:
30
+ - Fast similarity search
31
+ - Multiple index types (Flat, IVF, HNSW)
32
+ - In-memory storage
33
+ - Optional persistence
34
+
35
+ Example:
36
+ # Basic usage
37
+ store = FAISSVectorStore(dimension=1536)
38
+ await store.initialize()
39
+
40
+ # Add documents
41
+ await store.add_documents(
42
+ documents=[doc1, doc2],
43
+ embeddings=[[0.1, ...], [0.2, ...]]
44
+ )
45
+
46
+ # Search
47
+ results = await store.search(
48
+ query_embedding=[0.15, ...],
49
+ top_k=5
50
+ )
51
+
52
+ # Advanced: Use IVF index for larger datasets
53
+ store = FAISSVectorStore(
54
+ dimension=1536,
55
+ index_type="IVF",
56
+ nlist=100 # Number of clusters
57
+ )
58
+ """
59
+
60
+ def __init__(
61
+ self,
62
+ dimension: int,
63
+ index_type: str = "Flat",
64
+ metric: str = "L2",
65
+ nlist: int = 100,
66
+ nprobe: int = 10
67
+ ):
68
+ """
69
+ Args:
70
+ dimension: Embedding dimension
71
+ index_type: Index type ("Flat", "IVF", "HNSW")
72
+ metric: Distance metric ("L2" or "IP" for inner product)
73
+ nlist: Number of clusters for IVF index
74
+ nprobe: Number of clusters to search in IVF
75
+ """
76
+ self.dimension = dimension
77
+ self.index_type = index_type
78
+ self.metric = metric
79
+ self.nlist = nlist
80
+ self.nprobe = nprobe
81
+
82
+ # FAISS index
83
+ self.index = None
84
+
85
+ # Document storage
86
+ self.documents: Dict[str, Document] = {}
87
+ self.id_to_index: Dict[str, int] = {}
88
+ self.index_to_id: Dict[int, str] = {}
89
+
90
+ self._initialized = False
91
+
92
+ async def initialize(self) -> None:
93
+ """Initialize FAISS index"""
94
+ if self._initialized:
95
+ return
96
+
97
+ try:
98
+ import faiss
99
+ except ImportError:
100
+ raise ImportError(
101
+ "FAISS is required for FAISSVectorStore. "
102
+ "Install it with: pip install faiss-cpu or pip install faiss-gpu"
103
+ )
104
+
105
+ # Create index based on type
106
+ if self.index_type == "Flat":
107
+ if self.metric == "L2":
108
+ self.index = faiss.IndexFlatL2(self.dimension)
109
+ else: # IP (Inner Product)
110
+ self.index = faiss.IndexFlatIP(self.dimension)
111
+
112
+ elif self.index_type == "IVF":
113
+ if self.metric == "L2":
114
+ quantizer = faiss.IndexFlatL2(self.dimension)
115
+ self.index = faiss.IndexIVFFlat(
116
+ quantizer,
117
+ self.dimension,
118
+ self.nlist
119
+ )
120
+ else:
121
+ quantizer = faiss.IndexFlatIP(self.dimension)
122
+ self.index = faiss.IndexIVFFlat(
123
+ quantizer,
124
+ self.dimension,
125
+ self.nlist
126
+ )
127
+ # IVF needs training (will be done when first batch is added)
128
+ self.index.nprobe = self.nprobe
129
+
130
+ elif self.index_type == "HNSW":
131
+ self.index = faiss.IndexHNSWFlat(self.dimension, 32)
132
+
133
+ else:
134
+ raise ValueError(f"Unknown index type: {self.index_type}")
135
+
136
+ self._initialized = True
137
+ logger.info(f"FAISS index initialized: type={self.index_type}, dimension={self.dimension}")
138
+
139
+ async def add_documents(
140
+ self,
141
+ documents: List[Document],
142
+ embeddings: List[List[float]]
143
+ ) -> None:
144
+ """
145
+ Add documents with their embeddings
146
+
147
+ Args:
148
+ documents: List of documents
149
+ embeddings: List of embedding vectors
150
+ """
151
+ if not self._initialized:
152
+ await self.initialize()
153
+
154
+ if len(documents) != len(embeddings):
155
+ raise ValueError("Number of documents must match number of embeddings")
156
+
157
+ import numpy as np
158
+
159
+ # Convert embeddings to numpy array
160
+ embeddings_array = np.array(embeddings, dtype=np.float32)
161
+
162
+ # Train IVF index if needed
163
+ if self.index_type == "IVF" and not self.index.is_trained:
164
+ logger.info(f"Training IVF index with {len(embeddings)} vectors")
165
+ self.index.train(embeddings_array)
166
+
167
+ # Get current index size
168
+ start_index = len(self.id_to_index)
169
+
170
+ # Add to FAISS
171
+ self.index.add(embeddings_array)
172
+
173
+ # Store documents and mappings
174
+ for i, doc in enumerate(documents):
175
+ index = start_index + i
176
+ self.documents[doc.doc_id] = doc
177
+ self.id_to_index[doc.doc_id] = index
178
+ self.index_to_id[index] = doc.doc_id
179
+
180
+ logger.debug(f"Added {len(documents)} documents. Total: {len(self.documents)}")
181
+
182
+ async def search(
183
+ self,
184
+ query_embedding: List[float],
185
+ top_k: int = 5,
186
+ filters: Optional[Dict[str, Any]] = None
187
+ ) -> List[Document]:
188
+ """
189
+ Search for similar documents
190
+
191
+ Args:
192
+ query_embedding: Query embedding vector
193
+ top_k: Number of results to return
194
+ filters: Metadata filters (applied post-search)
195
+
196
+ Returns:
197
+ List of documents with similarity scores
198
+ """
199
+ if not self._initialized:
200
+ await self.initialize()
201
+
202
+ if self.index.ntotal == 0:
203
+ logger.warning("No documents in index")
204
+ return []
205
+
206
+ import numpy as np
207
+
208
+ # Convert query to numpy array
209
+ query_array = np.array([query_embedding], dtype=np.float32)
210
+
211
+ # Search
212
+ # Get more results if we need to filter
213
+ search_k = top_k * 3 if filters else top_k
214
+ distances, indices = self.index.search(query_array, search_k)
215
+
216
+ # Convert results to documents
217
+ results = []
218
+ for i, idx in enumerate(indices[0]):
219
+ if idx == -1: # FAISS returns -1 for missing results
220
+ break
221
+
222
+ # Get document
223
+ doc_id = self.index_to_id[idx]
224
+ doc = self.documents[doc_id]
225
+
226
+ # Apply filters
227
+ if filters and not self._match_filters(doc, filters):
228
+ continue
229
+
230
+ # Calculate similarity score
231
+ distance = distances[0][i]
232
+ score = self._distance_to_score(distance)
233
+
234
+ # Create result document with score
235
+ result_doc = Document(
236
+ doc_id=doc.doc_id,
237
+ content=doc.content,
238
+ score=score,
239
+ metadata=doc.metadata
240
+ )
241
+
242
+ results.append(result_doc)
243
+
244
+ if len(results) >= top_k:
245
+ break
246
+
247
+ return results
248
+
249
+ async def get_document(self, doc_id: str) -> Optional[Document]:
250
+ """
251
+ Get document by ID
252
+
253
+ Args:
254
+ doc_id: Document identifier
255
+
256
+ Returns:
257
+ Document if found, None otherwise
258
+ """
259
+ return self.documents.get(doc_id)
260
+
261
+ async def delete(self, doc_ids: List[str]) -> None:
262
+ """
263
+ Delete documents
264
+
265
+ Note: FAISS doesn't support efficient deletion.
266
+ This implementation removes from metadata but not from index.
267
+ For true deletion, rebuild the index.
268
+
269
+ Args:
270
+ doc_ids: List of document IDs to delete
271
+ """
272
+ for doc_id in doc_ids:
273
+ if doc_id in self.documents:
274
+ del self.documents[doc_id]
275
+ if doc_id in self.id_to_index:
276
+ index = self.id_to_index[doc_id]
277
+ del self.id_to_index[doc_id]
278
+ del self.index_to_id[index]
279
+
280
+ logger.warning(
281
+ f"Deleted {len(doc_ids)} documents from metadata. "
282
+ "Note: FAISS index still contains vectors. Rebuild index for full deletion."
283
+ )
284
+
285
+ def _match_filters(self, doc: Document, filters: Dict[str, Any]) -> bool:
286
+ """Check if document matches metadata filters"""
287
+ if not doc.metadata:
288
+ return False
289
+
290
+ for key, value in filters.items():
291
+ if doc.metadata.get(key) != value:
292
+ return False
293
+
294
+ return True
295
+
296
+ def _distance_to_score(self, distance: float) -> float:
297
+ """
298
+ Convert distance to similarity score
299
+
300
+ Args:
301
+ distance: Distance from FAISS (L2 or IP)
302
+
303
+ Returns:
304
+ Similarity score (0-1, higher is better)
305
+ """
306
+ if self.metric == "L2":
307
+ # L2 distance: lower is better
308
+ # Convert to similarity: 1 / (1 + distance)
309
+ return 1.0 / (1.0 + distance)
310
+ else:
311
+ # Inner product: higher is better
312
+ # Assuming normalized vectors, IP is in [-1, 1]
313
+ # Convert to [0, 1]
314
+ return (distance + 1.0) / 2.0
315
+
316
+ async def persist(self, path: str) -> None:
317
+ """
318
+ Save index to disk
319
+
320
+ Args:
321
+ path: File path to save index
322
+ """
323
+ if not self._initialized:
324
+ raise RuntimeError("Index not initialized")
325
+
326
+ import faiss
327
+ import pickle
328
+
329
+ # Save FAISS index
330
+ faiss.write_index(self.index, f"{path}.index")
331
+
332
+ # Save metadata
333
+ metadata = {
334
+ "documents": self.documents,
335
+ "id_to_index": self.id_to_index,
336
+ "index_to_id": self.index_to_id,
337
+ "dimension": self.dimension,
338
+ "index_type": self.index_type,
339
+ "metric": self.metric,
340
+ "nlist": self.nlist,
341
+ "nprobe": self.nprobe
342
+ }
343
+
344
+ with open(f"{path}.metadata", "wb") as f:
345
+ pickle.dump(metadata, f)
346
+
347
+ logger.info(f"Index persisted to {path}")
348
+
349
+ @classmethod
350
+ async def load(cls, path: str) -> "FAISSVectorStore":
351
+ """
352
+ Load index from disk
353
+
354
+ Args:
355
+ path: File path to load index from
356
+
357
+ Returns:
358
+ FAISSVectorStore instance
359
+ """
360
+ import faiss
361
+ import pickle
362
+
363
+ # Load metadata
364
+ with open(f"{path}.metadata", "rb") as f:
365
+ metadata = pickle.load(f)
366
+
367
+ # Create instance
368
+ instance = cls(
369
+ dimension=metadata["dimension"],
370
+ index_type=metadata["index_type"],
371
+ metric=metadata["metric"],
372
+ nlist=metadata["nlist"],
373
+ nprobe=metadata["nprobe"]
374
+ )
375
+
376
+ # Load FAISS index
377
+ instance.index = faiss.read_index(f"{path}.index")
378
+ instance._initialized = True
379
+
380
+ # Load metadata
381
+ instance.documents = metadata["documents"]
382
+ instance.id_to_index = metadata["id_to_index"]
383
+ instance.index_to_id = metadata["index_to_id"]
384
+
385
+ logger.info(f"Index loaded from {path}")
386
+
387
+ return instance
388
+
389
+ def get_stats(self) -> Dict[str, Any]:
390
+ """
391
+ Get statistics
392
+
393
+ Returns:
394
+ Statistics dictionary
395
+ """
396
+ return {
397
+ "initialized": self._initialized,
398
+ "total_documents": len(self.documents),
399
+ "index_size": self.index.ntotal if self.index else 0,
400
+ "dimension": self.dimension,
401
+ "index_type": self.index_type,
402
+ "metric": self.metric
403
+ }