kite-agent 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kite/__init__.py +46 -0
- kite/ab_testing.py +384 -0
- kite/agent.py +556 -0
- kite/agents/__init__.py +3 -0
- kite/agents/plan_execute.py +191 -0
- kite/agents/react_agent.py +509 -0
- kite/agents/reflective_agent.py +90 -0
- kite/agents/rewoo.py +119 -0
- kite/agents/tot.py +151 -0
- kite/conversation.py +125 -0
- kite/core.py +974 -0
- kite/data_loaders.py +111 -0
- kite/embedding_providers.py +372 -0
- kite/llm_providers.py +1278 -0
- kite/memory/__init__.py +6 -0
- kite/memory/advanced_rag.py +333 -0
- kite/memory/graph_rag.py +719 -0
- kite/memory/session_memory.py +423 -0
- kite/memory/vector_memory.py +579 -0
- kite/monitoring.py +611 -0
- kite/observers.py +107 -0
- kite/optimization/__init__.py +9 -0
- kite/optimization/resource_router.py +80 -0
- kite/persistence.py +42 -0
- kite/pipeline/__init__.py +5 -0
- kite/pipeline/deterministic_pipeline.py +323 -0
- kite/pipeline/reactive_pipeline.py +171 -0
- kite/pipeline_manager.py +15 -0
- kite/routing/__init__.py +6 -0
- kite/routing/aggregator_router.py +325 -0
- kite/routing/llm_router.py +149 -0
- kite/routing/semantic_router.py +228 -0
- kite/safety/__init__.py +6 -0
- kite/safety/circuit_breaker.py +360 -0
- kite/safety/guardrails.py +82 -0
- kite/safety/idempotency_manager.py +304 -0
- kite/safety/kill_switch.py +75 -0
- kite/tool.py +183 -0
- kite/tool_registry.py +87 -0
- kite/tools/__init__.py +21 -0
- kite/tools/code_execution.py +53 -0
- kite/tools/contrib/__init__.py +19 -0
- kite/tools/contrib/calculator.py +26 -0
- kite/tools/contrib/datetime_utils.py +20 -0
- kite/tools/contrib/linkedin.py +428 -0
- kite/tools/contrib/web_search.py +30 -0
- kite/tools/mcp/__init__.py +31 -0
- kite/tools/mcp/database_mcp.py +267 -0
- kite/tools/mcp/gdrive_mcp_server.py +503 -0
- kite/tools/mcp/gmail_mcp_server.py +601 -0
- kite/tools/mcp/postgres_mcp_server.py +490 -0
- kite/tools/mcp/slack_mcp_server.py +538 -0
- kite/tools/mcp/stripe_mcp_server.py +219 -0
- kite/tools/search.py +90 -0
- kite/tools/system_tools.py +54 -0
- kite/tools_manager.py +27 -0
- kite_agent-0.1.0.dist-info/METADATA +621 -0
- kite_agent-0.1.0.dist-info/RECORD +61 -0
- kite_agent-0.1.0.dist-info/WHEEL +5 -0
- kite_agent-0.1.0.dist-info/licenses/LICENSE +21 -0
- kite_agent-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,579 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Vector Memory System
|
|
3
|
+
Based on Chapter 3.3: Long-Term Memory
|
|
4
|
+
|
|
5
|
+
Semantic search using vector embeddings for agent knowledge retrieval.
|
|
6
|
+
|
|
7
|
+
Key insight from book:
|
|
8
|
+
Scenario A (Smart): Retrieve 1 relevant page $0.01 per query
|
|
9
|
+
Scenario B (Lazy): Dump 500-page manual $1.00 per query
|
|
10
|
+
|
|
11
|
+
For 1,000 users/day: Smart = $300/month, Lazy = $30,000/month!
|
|
12
|
+
|
|
13
|
+
Run: python vector_memory.py
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import os
|
|
17
|
+
from typing import List, Dict, Optional, Tuple, Any
|
|
18
|
+
from dataclasses import dataclass
|
|
19
|
+
import logging
|
|
20
|
+
import numpy as np
|
|
21
|
+
import json
|
|
22
|
+
|
|
23
|
+
# import chromadb # Moved inside _init_chroma to avoid top-level Pydantic side effects
|
|
24
|
+
# from chromadb.config import Settings
|
|
25
|
+
try:
|
|
26
|
+
import faiss
|
|
27
|
+
except ImportError:
|
|
28
|
+
faiss = None
|
|
29
|
+
|
|
30
|
+
# Production Vector DBs
|
|
31
|
+
# import qdrant_client # Moved inside _init_qdrant
|
|
32
|
+
# import pinecone # Moved inside _init_pinecone
|
|
33
|
+
# import weaviate # Moved inside _init_weaviate
|
|
34
|
+
# import pymilvus # Moved inside _init_milvus
|
|
35
|
+
# import psycopg2 # Moved inside _init_pgvector
|
|
36
|
+
# import redis # Moved inside _init_redis
|
|
37
|
+
# import elasticsearch # Moved inside _init_elasticsearch
|
|
38
|
+
|
|
39
|
+
from dotenv import load_dotenv
|
|
40
|
+
|
|
41
|
+
load_dotenv()
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class Document:
|
|
46
|
+
"""A document to store in vector memory."""
|
|
47
|
+
id: str
|
|
48
|
+
text: str
|
|
49
|
+
metadata: Dict = None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class VectorMemory:
|
|
53
|
+
"""
|
|
54
|
+
Vector-based long-term memory for AI agents.
|
|
55
|
+
Supports multiple backends: chroma, faiss, qdrant, memory.
|
|
56
|
+
"""
|
|
57
|
+
def __init__(self,
|
|
58
|
+
backend: str = "chroma",
|
|
59
|
+
collection_name: str = "agent_memory",
|
|
60
|
+
persist_dir: str = "./vector_db",
|
|
61
|
+
embedding_provider = None):
|
|
62
|
+
self.backend = backend.lower()
|
|
63
|
+
self.collection_name = collection_name
|
|
64
|
+
self.persist_dir = persist_dir
|
|
65
|
+
self.embedding_provider = embedding_provider
|
|
66
|
+
self.logger = logging.getLogger(f"VectorMemory({self.backend})")
|
|
67
|
+
|
|
68
|
+
if self.backend == "chroma":
|
|
69
|
+
self._init_chroma()
|
|
70
|
+
elif self.backend == "faiss":
|
|
71
|
+
self._init_faiss()
|
|
72
|
+
elif self.backend == "qdrant":
|
|
73
|
+
self._init_qdrant()
|
|
74
|
+
elif self.backend == "pinecone":
|
|
75
|
+
self._init_pinecone()
|
|
76
|
+
elif self.backend == "weaviate":
|
|
77
|
+
self._init_weaviate()
|
|
78
|
+
elif self.backend == "milvus":
|
|
79
|
+
self._init_milvus()
|
|
80
|
+
elif self.backend == "pgvector":
|
|
81
|
+
self._init_pgvector()
|
|
82
|
+
elif self.backend == "redis":
|
|
83
|
+
self._init_redis()
|
|
84
|
+
elif self.backend == "elasticsearch":
|
|
85
|
+
self._init_elasticsearch()
|
|
86
|
+
elif self.backend == "memory":
|
|
87
|
+
self._init_memory_backend()
|
|
88
|
+
else:
|
|
89
|
+
self._init_chroma() # Default
|
|
90
|
+
|
|
91
|
+
def _init_chroma(self):
|
|
92
|
+
import chromadb
|
|
93
|
+
try:
|
|
94
|
+
# New Chromadb API (0.4.0+)
|
|
95
|
+
self.chroma_client = chromadb.PersistentClient(path=self.persist_dir)
|
|
96
|
+
except AttributeError:
|
|
97
|
+
# Old Chromadb API (0.3.x)
|
|
98
|
+
from chromadb.config import Settings
|
|
99
|
+
self.chroma_client = chromadb.Client(Settings(
|
|
100
|
+
chroma_db_impl="duckdb+parquet",
|
|
101
|
+
persist_directory=self.persist_dir
|
|
102
|
+
))
|
|
103
|
+
self.collection = self.chroma_client.get_or_create_collection(name=self.collection_name)
|
|
104
|
+
print(f"[OK] Vector memory (Chroma) initialized")
|
|
105
|
+
|
|
106
|
+
def _init_faiss(self):
|
|
107
|
+
if faiss is None:
|
|
108
|
+
raise ImportError("faiss-cpu not installed. Run 'pip install faiss-cpu'")
|
|
109
|
+
# Dimension depends on embedding model, 384 for all-MiniLM-L6-v2
|
|
110
|
+
dim = 384
|
|
111
|
+
self.index = faiss.IndexFlatL2(dim)
|
|
112
|
+
self.doc_store = {} # int_id -> (id, text, metadata)
|
|
113
|
+
self.id_to_int = {} # string_id -> int_id
|
|
114
|
+
print(f"[OK] Vector memory (FAISS) initialized")
|
|
115
|
+
|
|
116
|
+
def _init_qdrant(self):
|
|
117
|
+
from qdrant_client import QdrantClient
|
|
118
|
+
from qdrant_client.http import models as qmodels
|
|
119
|
+
|
|
120
|
+
url = os.getenv("QDRANT_URL", "http://localhost:6333")
|
|
121
|
+
api_key = os.getenv("QDRANT_API_KEY")
|
|
122
|
+
self.client = QdrantClient(url=url, api_key=api_key)
|
|
123
|
+
|
|
124
|
+
# Ensure collection exists
|
|
125
|
+
self.client.recreate_collection(
|
|
126
|
+
collection_name=self.collection_name,
|
|
127
|
+
vectors_config=qmodels.VectorParams(size=384, distance=qmodels.Distance.COSINE),
|
|
128
|
+
)
|
|
129
|
+
print(f"[OK] Vector memory (Qdrant) initialized")
|
|
130
|
+
|
|
131
|
+
def _init_pinecone(self):
|
|
132
|
+
import pinecone
|
|
133
|
+
|
|
134
|
+
api_key = os.getenv("PINECONE_API_KEY")
|
|
135
|
+
if not api_key:
|
|
136
|
+
raise ValueError("PINECONE_API_KEY environment variable is required")
|
|
137
|
+
|
|
138
|
+
pc = pinecone.Pinecone(api_key=api_key)
|
|
139
|
+
# Check if index exists, else create
|
|
140
|
+
if self.collection_name not in pc.list_indexes().names():
|
|
141
|
+
pc.create_index(
|
|
142
|
+
name=self.collection_name,
|
|
143
|
+
dimension=384,
|
|
144
|
+
metric='cosine',
|
|
145
|
+
spec=pinecone.ServerlessSpec(cloud='aws', region='us-east-1')
|
|
146
|
+
)
|
|
147
|
+
self.index = pc.Index(self.collection_name)
|
|
148
|
+
print(f"[OK] Vector memory (Pinecone) initialized")
|
|
149
|
+
|
|
150
|
+
def _init_weaviate(self):
|
|
151
|
+
import weaviate
|
|
152
|
+
|
|
153
|
+
auth_config = weaviate.auth.AuthApiKey(api_key=os.getenv("WEAVIATE_API_KEY")) if os.getenv("WEAVIATE_API_KEY") else None
|
|
154
|
+
self.client = weaviate.Client(
|
|
155
|
+
url=os.getenv("WEAVIATE_URL", "http://localhost:8080"),
|
|
156
|
+
auth_client_config=auth_config
|
|
157
|
+
)
|
|
158
|
+
print(f"[OK] Vector memory (Weaviate) initialized")
|
|
159
|
+
|
|
160
|
+
def _init_milvus(self):
|
|
161
|
+
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
|
|
162
|
+
|
|
163
|
+
connections.connect("default", host=os.getenv("MILVUS_HOST", "localhost"), port=os.getenv("MILVUS_PORT", "19530"))
|
|
164
|
+
|
|
165
|
+
if not utility.has_collection(self.collection_name):
|
|
166
|
+
fields = [
|
|
167
|
+
FieldSchema(name="id", dtype=DataType.VARCHAR, is_primary=True, max_length=100),
|
|
168
|
+
FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=65535),
|
|
169
|
+
FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=384)
|
|
170
|
+
]
|
|
171
|
+
schema = CollectionSchema(fields, "Kite vector memory")
|
|
172
|
+
self.collection = Collection(self.collection_name, schema)
|
|
173
|
+
|
|
174
|
+
index_params = {
|
|
175
|
+
"metric_type": "L2",
|
|
176
|
+
"index_type": "IVF_FLAT",
|
|
177
|
+
"params": {"nlist": 1024}
|
|
178
|
+
}
|
|
179
|
+
self.collection.create_index(field_name="vector", index_params=index_params)
|
|
180
|
+
else:
|
|
181
|
+
self.collection = Collection(self.collection_name)
|
|
182
|
+
|
|
183
|
+
self.collection.load()
|
|
184
|
+
print(f"[OK] Vector memory (Milvus) initialized")
|
|
185
|
+
|
|
186
|
+
def _init_pgvector(self):
|
|
187
|
+
import psycopg2
|
|
188
|
+
from psycopg2.extras import execute_values
|
|
189
|
+
|
|
190
|
+
self.conn = psycopg2.connect(os.getenv("DATABASE_URL", "postgresql://postgres:postgres@localhost:5432/postgres"))
|
|
191
|
+
with self.conn.cursor() as cur:
|
|
192
|
+
cur.execute("CREATE EXTENSION IF NOT EXISTS vector")
|
|
193
|
+
cur.execute(f"CREATE TABLE IF NOT EXISTS {self.collection_name} (id TEXT PRIMARY KEY, text TEXT, embedding vector(384), metadata JSONB)")
|
|
194
|
+
self.conn.commit()
|
|
195
|
+
print(f"[OK] Vector memory (PGVector) initialized")
|
|
196
|
+
|
|
197
|
+
def _init_redis(self):
|
|
198
|
+
import redis
|
|
199
|
+
|
|
200
|
+
self.client = redis.Redis(
|
|
201
|
+
host=os.getenv("REDIS_HOST", "localhost"),
|
|
202
|
+
port=int(os.getenv("REDIS_PORT", 6379)),
|
|
203
|
+
password=os.getenv("REDIS_PASSWORD")
|
|
204
|
+
)
|
|
205
|
+
# RediSearch logic would go here
|
|
206
|
+
print(f"[OK] Vector memory (Redis) initialized")
|
|
207
|
+
|
|
208
|
+
def _init_elasticsearch(self):
|
|
209
|
+
from elasticsearch import Elasticsearch, helpers
|
|
210
|
+
|
|
211
|
+
self.client = Elasticsearch(os.getenv("ELASTICSEARCH_URL", "http://localhost:9200"))
|
|
212
|
+
if not self.client.indices.exists(index=self.collection_name):
|
|
213
|
+
self.client.indices.create(
|
|
214
|
+
index=self.collection_name,
|
|
215
|
+
body={
|
|
216
|
+
"mappings": {
|
|
217
|
+
"properties": {
|
|
218
|
+
"embedding": {"type": "dense_vector", "dims": 384, "index": True, "similarity": "cosine"},
|
|
219
|
+
"text": {"type": "text"},
|
|
220
|
+
"metadata": {"type": "object"}
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
)
|
|
225
|
+
print(f"[OK] Vector memory (Elasticsearch) initialized")
|
|
226
|
+
|
|
227
|
+
def _init_memory_backend(self):
|
|
228
|
+
self.storage = [] # list of (id, text, vector, metadata)
|
|
229
|
+
print(f"[OK] Vector memory (In-Memory) initialized")
|
|
230
|
+
|
|
231
|
+
def _chunk_text(self, text: str, chunk_size: int = 500, overlap: int = 50) -> List[str]:
|
|
232
|
+
"""Split text into semantic chunks."""
|
|
233
|
+
sentences = text.replace('! ', '!|').replace('? ', '?|').replace('. ', '.|').split('|')
|
|
234
|
+
chunks = []
|
|
235
|
+
current_chunk = ""
|
|
236
|
+
for sentence in sentences:
|
|
237
|
+
if len(current_chunk) + len(sentence) < chunk_size:
|
|
238
|
+
current_chunk += sentence + " "
|
|
239
|
+
else:
|
|
240
|
+
if current_chunk:
|
|
241
|
+
chunks.append(current_chunk.strip())
|
|
242
|
+
current_chunk = sentence + " "
|
|
243
|
+
if current_chunk:
|
|
244
|
+
chunks.append(current_chunk.strip())
|
|
245
|
+
if overlap > 0 and len(chunks) > 1:
|
|
246
|
+
overlapped_chunks = [chunks[0]]
|
|
247
|
+
for i in range(1, len(chunks)):
|
|
248
|
+
prev_sentences = chunks[i-1].split('. ')
|
|
249
|
+
overlap_text = prev_sentences[-1] + ". " if len(prev_sentences) > 0 else ""
|
|
250
|
+
overlapped_chunks.append(overlap_text + chunks[i])
|
|
251
|
+
chunks = overlapped_chunks
|
|
252
|
+
return chunks
|
|
253
|
+
|
|
254
|
+
def _get_embedding(self, text: str) -> List[float]:
|
|
255
|
+
"""Generate embedding."""
|
|
256
|
+
if self.embedding_provider:
|
|
257
|
+
return self.embedding_provider.embed(text)
|
|
258
|
+
# Fallback dummy embedding if none
|
|
259
|
+
return [0.1] * 384
|
|
260
|
+
|
|
261
|
+
def store(self, doc_id: str, text: str, metadata: Optional[Dict] = None):
|
|
262
|
+
"""Alias for add_document for compatibility."""
|
|
263
|
+
return self.add_document(doc_id, text, metadata)
|
|
264
|
+
|
|
265
|
+
def add_document(
|
|
266
|
+
self,
|
|
267
|
+
doc_id: str,
|
|
268
|
+
text: str,
|
|
269
|
+
metadata: Optional[Dict] = None,
|
|
270
|
+
auto_chunk: bool = True
|
|
271
|
+
) -> int:
|
|
272
|
+
"""
|
|
273
|
+
Add document to vector memory.
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
doc_id: Unique document ID
|
|
277
|
+
text: Document text
|
|
278
|
+
metadata: Optional metadata
|
|
279
|
+
auto_chunk: Whether to auto-chunk large documents
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
Number of chunks added
|
|
283
|
+
"""
|
|
284
|
+
print(f"\n Adding document: {doc_id}")
|
|
285
|
+
|
|
286
|
+
# Chunk if needed
|
|
287
|
+
if auto_chunk and len(text) > 500:
|
|
288
|
+
chunks = self._chunk_text(text)
|
|
289
|
+
print(f" Chunked into {len(chunks)} pieces")
|
|
290
|
+
else:
|
|
291
|
+
chunks = [text]
|
|
292
|
+
|
|
293
|
+
# Generate embeddings
|
|
294
|
+
embeddings = [self._get_embedding(chunk) for chunk in chunks]
|
|
295
|
+
|
|
296
|
+
ids = [f"{doc_id}_chunk_{i}" for i in range(len(chunks))]
|
|
297
|
+
metadatas = [metadata or {"source": "Kite", "id": doc_id} for _ in chunks]
|
|
298
|
+
|
|
299
|
+
if self.backend == "chroma":
|
|
300
|
+
self.collection.add(
|
|
301
|
+
ids=ids,
|
|
302
|
+
documents=chunks,
|
|
303
|
+
embeddings=embeddings,
|
|
304
|
+
metadatas=metadatas
|
|
305
|
+
)
|
|
306
|
+
elif self.backend == "faiss":
|
|
307
|
+
for i, (cid, chunk, emb, meta) in enumerate(zip(ids, chunks, embeddings, metadatas)):
|
|
308
|
+
int_id = len(self.id_to_int)
|
|
309
|
+
self.id_to_int[cid] = int_id
|
|
310
|
+
self.index.add(np.array([emb]).astype('float32'))
|
|
311
|
+
self.doc_store[int_id] = (cid, chunk, meta)
|
|
312
|
+
elif self.backend == "qdrant":
|
|
313
|
+
points = [
|
|
314
|
+
qmodels.PointStruct(
|
|
315
|
+
id=cid,
|
|
316
|
+
vector=emb,
|
|
317
|
+
payload={"text": chunk, **meta}
|
|
318
|
+
) for cid, chunk, emb, meta in zip(ids, chunks, embeddings, metadatas)
|
|
319
|
+
]
|
|
320
|
+
self.client.upsert(collection_name=self.collection_name, points=points)
|
|
321
|
+
elif self.backend == "pinecone":
|
|
322
|
+
to_upsert = [
|
|
323
|
+
(cid, emb, {"text": chunk, **meta})
|
|
324
|
+
for cid, chunk, emb, meta in zip(ids, chunks, embeddings, metadatas)
|
|
325
|
+
]
|
|
326
|
+
self.index.upsert(vectors=to_upsert)
|
|
327
|
+
elif self.backend == "weaviate":
|
|
328
|
+
with self.client.batch as batch:
|
|
329
|
+
for cid, chunk, emb, meta in zip(ids, chunks, embeddings, metadatas):
|
|
330
|
+
batch.add_data_object(
|
|
331
|
+
data_object={"text": chunk, **meta},
|
|
332
|
+
class_name="Document",
|
|
333
|
+
vector=emb
|
|
334
|
+
)
|
|
335
|
+
elif self.backend == "milvus":
|
|
336
|
+
entities = [ids, chunks, embeddings]
|
|
337
|
+
self.collection.insert(entities)
|
|
338
|
+
elif self.backend == "pgvector":
|
|
339
|
+
with self.conn.cursor() as cur:
|
|
340
|
+
execute_values(cur,
|
|
341
|
+
f"INSERT INTO {self.collection_name} (id, text, embedding, metadata) VALUES %s ON CONFLICT (id) DO UPDATE SET text=EXCLUDED.text, embedding=EXCLUDED.embedding, metadata=EXCLUDED.metadata",
|
|
342
|
+
[(cid, chunk, emb, json.dumps(meta)) for cid, chunk, emb, meta in zip(ids, chunks, embeddings, metadatas)])
|
|
343
|
+
self.conn.commit()
|
|
344
|
+
elif self.backend == "elasticsearch":
|
|
345
|
+
actions = [
|
|
346
|
+
{
|
|
347
|
+
"_index": self.collection_name,
|
|
348
|
+
"_id": cid,
|
|
349
|
+
"_source": {"text": chunk, "embedding": emb, "metadata": meta}
|
|
350
|
+
}
|
|
351
|
+
for cid, chunk, emb, meta in zip(ids, chunks, embeddings, metadatas)
|
|
352
|
+
]
|
|
353
|
+
helpers.bulk(self.client, actions)
|
|
354
|
+
elif self.backend == "memory":
|
|
355
|
+
for cid, chunk, emb, meta in zip(ids, chunks, embeddings, metadatas):
|
|
356
|
+
self.storage.append((cid, chunk, emb, meta))
|
|
357
|
+
# Qdrant would follow similar pattern
|
|
358
|
+
|
|
359
|
+
print(f" [OK] Added {len(chunks)} chunks with embeddings")
|
|
360
|
+
return len(chunks)
|
|
361
|
+
|
|
362
|
+
def search(
|
|
363
|
+
self,
|
|
364
|
+
query: str,
|
|
365
|
+
k: int = 3,
|
|
366
|
+
top_k: Optional[int] = None,
|
|
367
|
+
filter_metadata: Optional[Dict] = None
|
|
368
|
+
) -> List[Tuple[str, str, float]]:
|
|
369
|
+
"""
|
|
370
|
+
Search for most similar documents.
|
|
371
|
+
"""
|
|
372
|
+
k = top_k or k
|
|
373
|
+
query_emb = self._get_embedding(query)
|
|
374
|
+
|
|
375
|
+
if self.backend == "chroma":
|
|
376
|
+
results = self.collection.query(
|
|
377
|
+
query_embeddings=[query_emb],
|
|
378
|
+
n_results=k,
|
|
379
|
+
where=filter_metadata
|
|
380
|
+
)
|
|
381
|
+
output = []
|
|
382
|
+
if results['ids']:
|
|
383
|
+
for i in range(len(results['ids'][0])):
|
|
384
|
+
output.append((
|
|
385
|
+
results['ids'][0][i],
|
|
386
|
+
results['documents'][0][i],
|
|
387
|
+
results['distances'][0][i]
|
|
388
|
+
))
|
|
389
|
+
return output
|
|
390
|
+
|
|
391
|
+
elif self.backend == "qdrant":
|
|
392
|
+
res = self.client.search(collection_name=self.collection_name, query_vector=query_emb, limit=k)
|
|
393
|
+
return [(str(p.id), p.payload["text"], p.score) for p in res]
|
|
394
|
+
|
|
395
|
+
elif self.backend == "pinecone":
|
|
396
|
+
res = self.index.query(vector=query_emb, top_k=k, include_metadata=True)
|
|
397
|
+
return [(m.id, m.metadata["text"], m.score) for m in res.matches]
|
|
398
|
+
|
|
399
|
+
elif self.backend == "weaviate":
|
|
400
|
+
res = (self.client.query.get("Document", ["text"])
|
|
401
|
+
.with_near_vector({"vector": query_emb})
|
|
402
|
+
.with_limit(k)
|
|
403
|
+
.with_additional(["id", "distance"]).do())
|
|
404
|
+
docs = res["data"]["Get"]["Document"]
|
|
405
|
+
return [(d["_additional"]["id"], d["text"], d["_additional"]["distance"]) for d in docs]
|
|
406
|
+
|
|
407
|
+
elif self.backend == "milvus":
|
|
408
|
+
res = self.collection.search(data=[query_emb], anns_field="vector", param={"metric_type": "L2"}, limit=k, output_fields=["text"])
|
|
409
|
+
output = []
|
|
410
|
+
for hits in res:
|
|
411
|
+
for hit in hits:
|
|
412
|
+
output.append((hit.id, hit.entity.get("text"), hit.distance))
|
|
413
|
+
return output
|
|
414
|
+
|
|
415
|
+
elif self.backend == "pgvector":
|
|
416
|
+
with self.conn.cursor() as cur:
|
|
417
|
+
cur.execute(f"SELECT id, text, embedding <=> %s::vector AS distance FROM {self.collection_name} ORDER BY distance LIMIT %s", (query_emb, k))
|
|
418
|
+
return [(r[0], r[1], float(r[2])) for r in cur.fetchall()]
|
|
419
|
+
|
|
420
|
+
elif self.backend == "elasticsearch":
|
|
421
|
+
res = self.client.search(index=self.collection_name, body={
|
|
422
|
+
"knn": {"field": "embedding", "query_vector": query_emb, "k": k, "num_candidates": 100},
|
|
423
|
+
"_source": ["text"]
|
|
424
|
+
})
|
|
425
|
+
return [(hit["_id"], hit["_source"]["text"], hit["_score"]) for hit in res["hits"]["hits"]]
|
|
426
|
+
|
|
427
|
+
elif self.backend == "faiss":
|
|
428
|
+
D, I = self.index.search(np.array([query_emb]).astype('float32'), k)
|
|
429
|
+
output = []
|
|
430
|
+
for dist, idx in zip(D[0], I[0]):
|
|
431
|
+
if idx != -1 and idx in self.doc_store:
|
|
432
|
+
doc_id, text, meta = self.doc_store[idx]
|
|
433
|
+
output.append((doc_id, text, float(dist)))
|
|
434
|
+
return output
|
|
435
|
+
|
|
436
|
+
elif self.backend == "memory":
|
|
437
|
+
# Simple cosine similarity (unoptimized)
|
|
438
|
+
scores = []
|
|
439
|
+
for cid, text, emb, meta in self.storage:
|
|
440
|
+
sim = np.dot(query_emb, emb) / (np.linalg.norm(query_emb) * np.linalg.norm(emb))
|
|
441
|
+
scores.append((cid, text, 1 - sim)) # Return distance
|
|
442
|
+
|
|
443
|
+
scores.sort(key=lambda x: x[2])
|
|
444
|
+
return scores[:k]
|
|
445
|
+
|
|
446
|
+
return []
|
|
447
|
+
|
|
448
|
+
def delete_document(self, doc_id: str):
|
|
449
|
+
"""Delete all chunks of a document."""
|
|
450
|
+
# Find all chunks
|
|
451
|
+
results = self.collection.get(
|
|
452
|
+
where={"doc_id": doc_id}
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
if results['ids']:
|
|
456
|
+
self.collection.delete(ids=results['ids'])
|
|
457
|
+
print(f"[OK] Deleted document: {doc_id} ({len(results['ids'])} chunks)")
|
|
458
|
+
else:
|
|
459
|
+
print(f"Document not found: {doc_id}")
|
|
460
|
+
|
|
461
|
+
def clear_all(self):
|
|
462
|
+
"""Clear all documents from memory."""
|
|
463
|
+
self.chroma_client.delete_collection(self.collection_name)
|
|
464
|
+
self.collection = self.chroma_client.create_collection(self.collection_name)
|
|
465
|
+
print("[OK] Cleared all documents")
|
|
466
|
+
|
|
467
|
+
def get_stats(self) -> Dict:
|
|
468
|
+
"""Get memory statistics."""
|
|
469
|
+
count = self.collection.count()
|
|
470
|
+
|
|
471
|
+
# Estimate cost savings
|
|
472
|
+
# Assuming average query retrieves 3 chunks vs. all chunks
|
|
473
|
+
avg_chunk_size = 500 # characters
|
|
474
|
+
avg_chunks_retrieved = 3
|
|
475
|
+
cost_per_1k_tokens = 0.0001 # rough estimate
|
|
476
|
+
|
|
477
|
+
total_size = count * avg_chunk_size
|
|
478
|
+
smart_retrieval_size = avg_chunks_retrieved * avg_chunk_size
|
|
479
|
+
|
|
480
|
+
cost_smart = (smart_retrieval_size / 1000) * cost_per_1k_tokens
|
|
481
|
+
cost_dump_all = (total_size / 1000) * cost_per_1k_tokens
|
|
482
|
+
|
|
483
|
+
return {
|
|
484
|
+
"total_chunks": count,
|
|
485
|
+
"estimated_total_size": total_size,
|
|
486
|
+
"cost_per_smart_query": cost_smart,
|
|
487
|
+
"cost_per_dump_all_query": cost_dump_all,
|
|
488
|
+
"cost_savings_per_query": cost_dump_all - cost_smart,
|
|
489
|
+
"monthly_savings_1k_queries": (cost_dump_all - cost_smart) * 1000
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
# ============================================================================
|
|
494
|
+
# DEMO
|
|
495
|
+
# ============================================================================
|
|
496
|
+
|
|
497
|
+
def demo():
|
|
498
|
+
print("=" * 70)
|
|
499
|
+
print("VECTOR MEMORY SYSTEM DEMO")
|
|
500
|
+
print("=" * 70)
|
|
501
|
+
print("\nDemonstrating Chapter 3 concept:")
|
|
502
|
+
print("Smart retrieval vs. dumping everything into context\n")
|
|
503
|
+
|
|
504
|
+
# Initialize
|
|
505
|
+
memory = VectorMemory(collection_name="demo_memory")
|
|
506
|
+
memory.clear_all()
|
|
507
|
+
|
|
508
|
+
# Add sample documents
|
|
509
|
+
documents = {
|
|
510
|
+
"python_intro": """
|
|
511
|
+
Python is a high-level programming language known for its simplicity
|
|
512
|
+
and readability. It was created by Guido van Rossum and first released
|
|
513
|
+
in 1991. Python supports multiple programming paradigms including
|
|
514
|
+
procedural, object-oriented, and functional programming.
|
|
515
|
+
""",
|
|
516
|
+
"javascript_intro": """
|
|
517
|
+
JavaScript is a programming language primarily used for web development.
|
|
518
|
+
It allows developers to create interactive websites and is essential for
|
|
519
|
+
front-end development. JavaScript can also be used on the server-side
|
|
520
|
+
with Node.js.
|
|
521
|
+
""",
|
|
522
|
+
"database_intro": """
|
|
523
|
+
A database is an organized collection of data stored and accessed
|
|
524
|
+
electronically. SQL (Structured Query Language) is used to manage
|
|
525
|
+
relational databases. PostgreSQL and MySQL are popular database systems.
|
|
526
|
+
""",
|
|
527
|
+
"ai_intro": """
|
|
528
|
+
Artificial Intelligence (AI) is the simulation of human intelligence by
|
|
529
|
+
machines. Machine learning is a subset of AI that allows systems to
|
|
530
|
+
learn from data. Large Language Models like GPT-4 are examples of AI.
|
|
531
|
+
"""
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
print(" Adding documents to memory...")
|
|
535
|
+
for doc_id, text in documents.items():
|
|
536
|
+
memory.add_document(doc_id, text.strip())
|
|
537
|
+
|
|
538
|
+
# Example searches
|
|
539
|
+
queries = [
|
|
540
|
+
"programming languages for web development",
|
|
541
|
+
"how to store data",
|
|
542
|
+
"what is machine learning"
|
|
543
|
+
]
|
|
544
|
+
|
|
545
|
+
for query in queries:
|
|
546
|
+
print(f"\n{'='*70}")
|
|
547
|
+
results = memory.search(query, k=2)
|
|
548
|
+
|
|
549
|
+
print(f"\n[CHART] Results:")
|
|
550
|
+
for i, result in enumerate(results, 1):
|
|
551
|
+
print(f"\n {i}. Score: {1 - result['distance']:.3f}")
|
|
552
|
+
print(f" Text: {result['text'][:100]}...")
|
|
553
|
+
print(f" From: {result['metadata']['doc_id']}")
|
|
554
|
+
|
|
555
|
+
# Show cost analysis
|
|
556
|
+
print(f"\n{'='*70}")
|
|
557
|
+
print(" COST ANALYSIS (Chapter 3)")
|
|
558
|
+
print('='*70)
|
|
559
|
+
|
|
560
|
+
stats = memory.get_stats()
|
|
561
|
+
print(f"Total chunks in memory: {stats['total_chunks']}")
|
|
562
|
+
print(f"Total estimated size: {stats['estimated_total_size']:,} characters")
|
|
563
|
+
print()
|
|
564
|
+
print("Per Query Cost:")
|
|
565
|
+
print(f" Smart retrieval (3 chunks): ${stats['cost_per_smart_query']:.6f}")
|
|
566
|
+
print(f" Dump all ({stats['total_chunks']} chunks): ${stats['cost_per_dump_all_query']:.6f}")
|
|
567
|
+
print(f" Savings per query: ${stats['cost_savings_per_query']:.6f}")
|
|
568
|
+
print()
|
|
569
|
+
print(f"For 1,000 queries/day (30 days):")
|
|
570
|
+
print(f" Smart: ${stats['cost_per_smart_query'] * 30000:.2f}/month")
|
|
571
|
+
print(f" Dump all: ${stats['cost_per_dump_all_query'] * 30000:.2f}/month")
|
|
572
|
+
print(f" Total savings: ${stats['monthly_savings_1k_queries'] * 30:.2f}/month")
|
|
573
|
+
|
|
574
|
+
print("\n" + "="*70)
|
|
575
|
+
print("[OK] Demo complete!")
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
if __name__ == "__main__":
|
|
579
|
+
demo()
|