kailash 0.8.3__py3-none-any.whl → 0.8.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,558 @@
1
+ """
2
+ Semantic memory nodes for A2A enhancement.
3
+
4
+ These nodes add embeddings and vector search capabilities to the A2A system,
5
+ allowing for semantic matching and contextual agent selection.
6
+ """
7
+
8
+ import asyncio
9
+ import hashlib
10
+ import json
11
+ from dataclasses import dataclass
12
+ from datetime import datetime
13
+ from typing import Any, Dict, List, Optional, Tuple, Union
14
+ from uuid import uuid4
15
+
16
+ import aiohttp
17
+ import numpy as np
18
+
19
+ from ..base import Node, NodeParameter, register_node
20
+
21
+
22
+ @dataclass
23
+ class EmbeddingResult:
24
+ """Result of an embedding operation."""
25
+
26
+ embeddings: np.ndarray
27
+ model: str
28
+ dimension: int
29
+ metadata: Dict[str, Any]
30
+
31
+
32
+ @dataclass
33
+ class SemanticMemoryItem:
34
+ """An item stored in semantic memory."""
35
+
36
+ id: str
37
+ content: str
38
+ embedding: np.ndarray
39
+ metadata: Dict[str, Any]
40
+ created_at: datetime
41
+ collection: str = "default"
42
+
43
+ def to_dict(self) -> Dict[str, Any]:
44
+ """Convert to dictionary."""
45
+ return {
46
+ "id": self.id,
47
+ "content": self.content,
48
+ "embedding": self.embedding.tolist(),
49
+ "metadata": self.metadata,
50
+ "created_at": self.created_at.isoformat(),
51
+ "collection": self.collection,
52
+ }
53
+
54
+
55
+ class SimpleEmbeddingProvider:
56
+ """Simple embedding provider using Ollama by default."""
57
+
58
+ def __init__(
59
+ self, model_name: str = "nomic-embed-text", host: str = "http://localhost:11434"
60
+ ):
61
+ self.model_name = model_name
62
+ self.host = host
63
+ self.embed_url = f"{host}/api/embeddings"
64
+ self._cache = {}
65
+
66
+ def _get_cache_key(self, text: str) -> str:
67
+ """Generate cache key for text."""
68
+ return hashlib.md5(f"{self.model_name}:{text}".encode()).hexdigest()
69
+
70
+ async def embed_text(self, text: Union[str, List[str]]) -> EmbeddingResult:
71
+ """Generate embeddings for text."""
72
+ if isinstance(text, str):
73
+ texts = [text]
74
+ else:
75
+ texts = text
76
+
77
+ all_embeddings = []
78
+
79
+ async with aiohttp.ClientSession() as session:
80
+ for txt in texts:
81
+ # Check cache
82
+ cache_key = self._get_cache_key(txt)
83
+ if cache_key in self._cache:
84
+ all_embeddings.append(self._cache[cache_key])
85
+ continue
86
+
87
+ data = {"model": self.model_name, "prompt": txt}
88
+
89
+ try:
90
+ async with session.post(self.embed_url, json=data) as response:
91
+ if response.status == 200:
92
+ result = await response.json()
93
+ embedding = np.array(result["embedding"])
94
+ all_embeddings.append(embedding)
95
+
96
+ # Cache the embedding
97
+ self._cache[cache_key] = embedding
98
+ else:
99
+ # Fallback to simple hash-based embedding
100
+ embedding = self._hash_embedding(txt)
101
+ all_embeddings.append(embedding)
102
+ except Exception:
103
+ # Fallback to simple hash-based embedding
104
+ embedding = self._hash_embedding(txt)
105
+ all_embeddings.append(embedding)
106
+
107
+ embeddings_array = np.vstack(all_embeddings)
108
+
109
+ return EmbeddingResult(
110
+ embeddings=embeddings_array,
111
+ model=self.model_name,
112
+ dimension=embeddings_array.shape[1],
113
+ metadata={"host": self.host},
114
+ )
115
+
116
+ def _hash_embedding(self, text: str, dimension: int = 384) -> np.ndarray:
117
+ """Create a simple hash-based embedding as fallback."""
118
+ # Simple deterministic embedding based on text content
119
+ hash_str = hashlib.md5(text.encode()).hexdigest()
120
+ # Convert hex to numbers and normalize
121
+ values = [
122
+ int(hash_str[i : i + 2], 16) / 255.0
123
+ for i in range(0, min(len(hash_str), dimension * 2), 2)
124
+ ]
125
+ # Pad or truncate to desired dimension
126
+ while len(values) < dimension:
127
+ values.extend(values[: dimension - len(values)])
128
+ return np.array(values[:dimension])
129
+
130
+
131
+ class InMemoryVectorStore:
132
+ """Simple in-memory vector store for development."""
133
+
134
+ def __init__(self):
135
+ self.items: Dict[str, SemanticMemoryItem] = {}
136
+ self.collections: Dict[str, List[str]] = {}
137
+
138
+ async def add(self, item: SemanticMemoryItem) -> str:
139
+ """Add item to store."""
140
+ self.items[item.id] = item
141
+
142
+ # Add to collection index
143
+ if item.collection not in self.collections:
144
+ self.collections[item.collection] = []
145
+ self.collections[item.collection].append(item.id)
146
+
147
+ return item.id
148
+
149
+ async def search_similar(
150
+ self,
151
+ embedding: np.ndarray,
152
+ collection: Optional[str] = None,
153
+ limit: int = 10,
154
+ threshold: float = 0.5,
155
+ ) -> List[Tuple[SemanticMemoryItem, float]]:
156
+ """Search for similar items."""
157
+ results = []
158
+
159
+ # Filter by collection if specified
160
+ if collection:
161
+ item_ids = self.collections.get(collection, [])
162
+ else:
163
+ item_ids = list(self.items.keys())
164
+
165
+ # Calculate similarities
166
+ for item_id in item_ids:
167
+ item = self.items[item_id]
168
+
169
+ # Cosine similarity
170
+ similarity = np.dot(embedding, item.embedding) / (
171
+ np.linalg.norm(embedding) * np.linalg.norm(item.embedding)
172
+ )
173
+
174
+ if similarity >= threshold:
175
+ results.append((item, similarity))
176
+
177
+ # Sort by similarity and limit
178
+ results.sort(key=lambda x: x[1], reverse=True)
179
+ return results[:limit]
180
+
181
+ async def get_collections(self) -> List[str]:
182
+ """Get all collection names."""
183
+ return list(self.collections.keys())
184
+
185
+
186
+ @register_node()
187
+ class SemanticMemoryStoreNode(Node):
188
+ """Store content in semantic memory with embeddings."""
189
+
190
+ def __init__(self, name: str = "semantic_memory_store", **kwargs):
191
+ """Initialize semantic memory store node."""
192
+ self.content = None
193
+ self.metadata = None
194
+ self.collection = "default"
195
+ self.embedding_model = "nomic-embed-text"
196
+ self.embedding_host = "http://localhost:11434"
197
+
198
+ # Set attributes from kwargs
199
+ for key, value in kwargs.items():
200
+ if hasattr(self, key):
201
+ setattr(self, key, value)
202
+
203
+ super().__init__(name=name, **kwargs)
204
+
205
+ # Shared store and provider (in production, use persistent storage)
206
+ if not hasattr(self.__class__, "_store"):
207
+ self.__class__._store = InMemoryVectorStore()
208
+ if not hasattr(self.__class__, "_provider"):
209
+ self.__class__._provider = SimpleEmbeddingProvider(
210
+ model_name=self.embedding_model, host=self.embedding_host
211
+ )
212
+
213
+ def get_parameters(self) -> Dict[str, NodeParameter]:
214
+ """Get node parameters."""
215
+ return {
216
+ "content": NodeParameter(
217
+ name="content",
218
+ type=str,
219
+ required=True,
220
+ description="Content to store (string or list of strings)",
221
+ ),
222
+ "metadata": NodeParameter(
223
+ name="metadata",
224
+ type=dict,
225
+ required=False,
226
+ description="Metadata to attach",
227
+ ),
228
+ "collection": NodeParameter(
229
+ name="collection",
230
+ type=str,
231
+ required=False,
232
+ default="default",
233
+ description="Collection name",
234
+ ),
235
+ "embedding_model": NodeParameter(
236
+ name="embedding_model",
237
+ type=str,
238
+ required=False,
239
+ default="nomic-embed-text",
240
+ description="Embedding model name",
241
+ ),
242
+ "embedding_host": NodeParameter(
243
+ name="embedding_host",
244
+ type=str,
245
+ required=False,
246
+ default="http://localhost:11434",
247
+ description="Embedding service host",
248
+ ),
249
+ }
250
+
251
+ async def run(self, **kwargs) -> Dict[str, Any]:
252
+ """Store content in semantic memory."""
253
+ # Get parameters
254
+ content = kwargs.get("content", self.content)
255
+ metadata = kwargs.get("metadata", self.metadata) or {}
256
+ collection = kwargs.get("collection", self.collection)
257
+
258
+ if not content:
259
+ raise ValueError("Content is required")
260
+
261
+ # Handle single or multiple content
262
+ if isinstance(content, str):
263
+ contents = [content]
264
+ else:
265
+ contents = content
266
+
267
+ # Generate embeddings
268
+ result = await self._provider.embed_text(contents)
269
+
270
+ # Store items
271
+ ids = []
272
+ now = datetime.utcnow()
273
+
274
+ for i, (text, embedding) in enumerate(zip(contents, result.embeddings)):
275
+ item = SemanticMemoryItem(
276
+ id=str(uuid4()),
277
+ content=text,
278
+ embedding=embedding,
279
+ metadata=metadata,
280
+ created_at=now,
281
+ collection=collection,
282
+ )
283
+
284
+ item_id = await self._store.add(item)
285
+ ids.append(item_id)
286
+
287
+ return {
288
+ "success": True,
289
+ "ids": ids,
290
+ "count": len(ids),
291
+ "collection": collection,
292
+ "embedding_model": result.model,
293
+ }
294
+
295
+
296
+ @register_node()
297
+ class SemanticMemorySearchNode(Node):
298
+ """Search semantic memory for similar content."""
299
+
300
+ def __init__(self, name: str = "semantic_memory_search", **kwargs):
301
+ """Initialize semantic memory search node."""
302
+ self.query = None
303
+ self.limit = 10
304
+ self.threshold = 0.5
305
+ self.collection = None
306
+ self.embedding_model = "nomic-embed-text"
307
+ self.embedding_host = "http://localhost:11434"
308
+
309
+ # Set attributes from kwargs
310
+ for key, value in kwargs.items():
311
+ if hasattr(self, key):
312
+ setattr(self, key, value)
313
+
314
+ super().__init__(name=name, **kwargs)
315
+
316
+ # Use shared store and provider
317
+ if not hasattr(self.__class__, "_store"):
318
+ self.__class__._store = InMemoryVectorStore()
319
+ if not hasattr(self.__class__, "_provider"):
320
+ self.__class__._provider = SimpleEmbeddingProvider(
321
+ model_name=self.embedding_model, host=self.embedding_host
322
+ )
323
+
324
+ def get_parameters(self) -> Dict[str, NodeParameter]:
325
+ """Get node parameters."""
326
+ return {
327
+ "query": NodeParameter(
328
+ name="query", type=str, required=True, description="Search query"
329
+ ),
330
+ "limit": NodeParameter(
331
+ name="limit",
332
+ type=int,
333
+ required=False,
334
+ default=10,
335
+ description="Maximum number of results",
336
+ ),
337
+ "threshold": NodeParameter(
338
+ name="threshold",
339
+ type=float,
340
+ required=False,
341
+ default=0.5,
342
+ description="Minimum similarity threshold",
343
+ ),
344
+ "collection": NodeParameter(
345
+ name="collection",
346
+ type=str,
347
+ required=False,
348
+ description="Collection to search",
349
+ ),
350
+ "embedding_model": NodeParameter(
351
+ name="embedding_model",
352
+ type=str,
353
+ required=False,
354
+ default="nomic-embed-text",
355
+ description="Embedding model name",
356
+ ),
357
+ "embedding_host": NodeParameter(
358
+ name="embedding_host",
359
+ type=str,
360
+ required=False,
361
+ default="http://localhost:11434",
362
+ description="Embedding service host",
363
+ ),
364
+ }
365
+
366
+ async def run(self, **kwargs) -> Dict[str, Any]:
367
+ """Search semantic memory."""
368
+ # Get parameters
369
+ query = kwargs.get("query", self.query)
370
+ limit = kwargs.get("limit", self.limit)
371
+ threshold = kwargs.get("threshold", self.threshold)
372
+ collection = kwargs.get("collection", self.collection)
373
+
374
+ if not query:
375
+ raise ValueError("Query is required")
376
+
377
+ # Generate query embedding
378
+ result = await self._provider.embed_text(query)
379
+ query_embedding = result.embeddings[0]
380
+
381
+ # Search store
382
+ results = await self._store.search_similar(
383
+ embedding=query_embedding,
384
+ collection=collection,
385
+ limit=limit,
386
+ threshold=threshold,
387
+ )
388
+
389
+ # Format results
390
+ formatted_results = []
391
+ for item, similarity in results:
392
+ formatted_results.append(
393
+ {
394
+ "id": item.id,
395
+ "content": item.content,
396
+ "similarity": similarity,
397
+ "metadata": item.metadata,
398
+ "collection": item.collection,
399
+ }
400
+ )
401
+
402
+ return {
403
+ "success": True,
404
+ "query": query,
405
+ "results": formatted_results,
406
+ "count": len(formatted_results),
407
+ "embedding_model": result.model,
408
+ }
409
+
410
+
411
+ @register_node()
412
+ class SemanticAgentMatchingNode(Node):
413
+ """Enhanced agent matching using semantic similarity."""
414
+
415
+ def __init__(self, name: str = "semantic_agent_matching", **kwargs):
416
+ """Initialize semantic agent matching node."""
417
+ self.requirements = None
418
+ self.agents = None
419
+ self.limit = 5
420
+ self.threshold = 0.3
421
+ self.weight_semantic = 0.6
422
+ self.weight_keyword = 0.4
423
+
424
+ # Set attributes from kwargs
425
+ for key, value in kwargs.items():
426
+ if hasattr(self, key):
427
+ setattr(self, key, value)
428
+
429
+ super().__init__(name=name, **kwargs)
430
+
431
+ # Use shared store and provider
432
+ if not hasattr(self.__class__, "_store"):
433
+ self.__class__._store = InMemoryVectorStore()
434
+ if not hasattr(self.__class__, "_provider"):
435
+ self.__class__._provider = SimpleEmbeddingProvider()
436
+
437
+ def get_parameters(self) -> Dict[str, NodeParameter]:
438
+ """Get node parameters."""
439
+ return {
440
+ "requirements": NodeParameter(
441
+ name="requirements",
442
+ type=str,
443
+ required=True,
444
+ description="Task requirements (string or list)",
445
+ ),
446
+ "agents": NodeParameter(
447
+ name="agents",
448
+ type=list,
449
+ required=True,
450
+ description="List of agent descriptions",
451
+ ),
452
+ "limit": NodeParameter(
453
+ name="limit",
454
+ type=int,
455
+ required=False,
456
+ default=5,
457
+ description="Maximum matches to return",
458
+ ),
459
+ "threshold": NodeParameter(
460
+ name="threshold",
461
+ type=float,
462
+ required=False,
463
+ default=0.3,
464
+ description="Minimum similarity threshold",
465
+ ),
466
+ "weight_semantic": NodeParameter(
467
+ name="weight_semantic",
468
+ type=float,
469
+ required=False,
470
+ default=0.6,
471
+ description="Weight for semantic similarity",
472
+ ),
473
+ "weight_keyword": NodeParameter(
474
+ name="weight_keyword",
475
+ type=float,
476
+ required=False,
477
+ default=0.4,
478
+ description="Weight for keyword matching",
479
+ ),
480
+ }
481
+
482
+ async def run(self, **kwargs) -> Dict[str, Any]:
483
+ """Perform semantic agent matching."""
484
+ # Get parameters
485
+ requirements = kwargs.get("requirements", self.requirements)
486
+ agents = kwargs.get("agents", self.agents)
487
+ limit = kwargs.get("limit", self.limit)
488
+ threshold = kwargs.get("threshold", self.threshold)
489
+ weight_semantic = kwargs.get("weight_semantic", self.weight_semantic)
490
+ weight_keyword = kwargs.get("weight_keyword", self.weight_keyword)
491
+
492
+ if not requirements or not agents:
493
+ raise ValueError("Requirements and agents are required")
494
+
495
+ # Convert requirements to text
496
+ if isinstance(requirements, list):
497
+ req_text = " ".join(str(req) for req in requirements)
498
+ else:
499
+ req_text = str(requirements)
500
+
501
+ # Generate embeddings for requirements and agents
502
+ all_texts = [req_text] + [str(agent) for agent in agents]
503
+ result = await self._provider.embed_text(all_texts)
504
+
505
+ req_embedding = result.embeddings[0]
506
+ agent_embeddings = result.embeddings[1:]
507
+
508
+ # Calculate similarities
509
+ matches = []
510
+ for i, (agent, agent_embedding) in enumerate(zip(agents, agent_embeddings)):
511
+ # Semantic similarity
512
+ semantic_sim = np.dot(req_embedding, agent_embedding) / (
513
+ np.linalg.norm(req_embedding) * np.linalg.norm(agent_embedding)
514
+ )
515
+
516
+ # Keyword similarity (simple approach)
517
+ keyword_sim = self._calculate_keyword_similarity(req_text, str(agent))
518
+
519
+ # Combined score
520
+ combined_score = (
521
+ semantic_sim * weight_semantic + keyword_sim * weight_keyword
522
+ )
523
+
524
+ if combined_score >= threshold:
525
+ matches.append(
526
+ {
527
+ "agent": agent,
528
+ "agent_index": i,
529
+ "semantic_similarity": semantic_sim,
530
+ "keyword_similarity": keyword_sim,
531
+ "combined_score": combined_score,
532
+ }
533
+ )
534
+
535
+ # Sort by combined score
536
+ matches.sort(key=lambda x: x["combined_score"], reverse=True)
537
+
538
+ return {
539
+ "success": True,
540
+ "requirements": req_text,
541
+ "matches": matches[:limit],
542
+ "count": len(matches),
543
+ "embedding_model": result.model,
544
+ }
545
+
546
+ def _calculate_keyword_similarity(self, text1: str, text2: str) -> float:
547
+ """Calculate simple keyword similarity."""
548
+ # Simple word overlap similarity
549
+ words1 = set(text1.lower().split())
550
+ words2 = set(text2.lower().split())
551
+
552
+ if not words1 or not words2:
553
+ return 0.0
554
+
555
+ intersection = words1.intersection(words2)
556
+ union = words1.union(words2)
557
+
558
+ return len(intersection) / len(union)