kailash 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. kailash/__init__.py +33 -1
  2. kailash/access_control/__init__.py +129 -0
  3. kailash/access_control/managers.py +461 -0
  4. kailash/access_control/rule_evaluators.py +467 -0
  5. kailash/access_control_abac.py +825 -0
  6. kailash/config/__init__.py +27 -0
  7. kailash/config/database_config.py +359 -0
  8. kailash/database/__init__.py +28 -0
  9. kailash/database/execution_pipeline.py +499 -0
  10. kailash/middleware/__init__.py +306 -0
  11. kailash/middleware/auth/__init__.py +33 -0
  12. kailash/middleware/auth/access_control.py +436 -0
  13. kailash/middleware/auth/auth_manager.py +422 -0
  14. kailash/middleware/auth/jwt_auth.py +477 -0
  15. kailash/middleware/auth/kailash_jwt_auth.py +616 -0
  16. kailash/middleware/communication/__init__.py +37 -0
  17. kailash/middleware/communication/ai_chat.py +989 -0
  18. kailash/middleware/communication/api_gateway.py +802 -0
  19. kailash/middleware/communication/events.py +470 -0
  20. kailash/middleware/communication/realtime.py +710 -0
  21. kailash/middleware/core/__init__.py +21 -0
  22. kailash/middleware/core/agent_ui.py +890 -0
  23. kailash/middleware/core/schema.py +643 -0
  24. kailash/middleware/core/workflows.py +396 -0
  25. kailash/middleware/database/__init__.py +63 -0
  26. kailash/middleware/database/base.py +113 -0
  27. kailash/middleware/database/base_models.py +525 -0
  28. kailash/middleware/database/enums.py +106 -0
  29. kailash/middleware/database/migrations.py +12 -0
  30. kailash/{api/database.py → middleware/database/models.py} +183 -291
  31. kailash/middleware/database/repositories.py +685 -0
  32. kailash/middleware/database/session_manager.py +19 -0
  33. kailash/middleware/mcp/__init__.py +38 -0
  34. kailash/middleware/mcp/client_integration.py +585 -0
  35. kailash/middleware/mcp/enhanced_server.py +576 -0
  36. kailash/nodes/__init__.py +25 -3
  37. kailash/nodes/admin/__init__.py +35 -0
  38. kailash/nodes/admin/audit_log.py +794 -0
  39. kailash/nodes/admin/permission_check.py +864 -0
  40. kailash/nodes/admin/role_management.py +823 -0
  41. kailash/nodes/admin/security_event.py +1519 -0
  42. kailash/nodes/admin/user_management.py +944 -0
  43. kailash/nodes/ai/a2a.py +24 -7
  44. kailash/nodes/ai/ai_providers.py +1 -0
  45. kailash/nodes/ai/embedding_generator.py +11 -11
  46. kailash/nodes/ai/intelligent_agent_orchestrator.py +99 -11
  47. kailash/nodes/ai/llm_agent.py +407 -2
  48. kailash/nodes/ai/self_organizing.py +85 -10
  49. kailash/nodes/api/auth.py +287 -6
  50. kailash/nodes/api/rest.py +151 -0
  51. kailash/nodes/auth/__init__.py +17 -0
  52. kailash/nodes/auth/directory_integration.py +1228 -0
  53. kailash/nodes/auth/enterprise_auth_provider.py +1328 -0
  54. kailash/nodes/auth/mfa.py +2338 -0
  55. kailash/nodes/auth/risk_assessment.py +872 -0
  56. kailash/nodes/auth/session_management.py +1093 -0
  57. kailash/nodes/auth/sso.py +1040 -0
  58. kailash/nodes/base.py +344 -13
  59. kailash/nodes/base_cycle_aware.py +4 -2
  60. kailash/nodes/base_with_acl.py +1 -1
  61. kailash/nodes/code/python.py +283 -10
  62. kailash/nodes/compliance/__init__.py +9 -0
  63. kailash/nodes/compliance/data_retention.py +1888 -0
  64. kailash/nodes/compliance/gdpr.py +2004 -0
  65. kailash/nodes/data/__init__.py +22 -2
  66. kailash/nodes/data/async_connection.py +469 -0
  67. kailash/nodes/data/async_sql.py +757 -0
  68. kailash/nodes/data/async_vector.py +598 -0
  69. kailash/nodes/data/readers.py +767 -0
  70. kailash/nodes/data/retrieval.py +360 -1
  71. kailash/nodes/data/sharepoint_graph.py +397 -21
  72. kailash/nodes/data/sql.py +94 -5
  73. kailash/nodes/data/streaming.py +68 -8
  74. kailash/nodes/data/vector_db.py +54 -4
  75. kailash/nodes/enterprise/__init__.py +13 -0
  76. kailash/nodes/enterprise/batch_processor.py +741 -0
  77. kailash/nodes/enterprise/data_lineage.py +497 -0
  78. kailash/nodes/logic/convergence.py +31 -9
  79. kailash/nodes/logic/operations.py +14 -3
  80. kailash/nodes/mixins/__init__.py +8 -0
  81. kailash/nodes/mixins/event_emitter.py +201 -0
  82. kailash/nodes/mixins/mcp.py +9 -4
  83. kailash/nodes/mixins/security.py +165 -0
  84. kailash/nodes/monitoring/__init__.py +7 -0
  85. kailash/nodes/monitoring/performance_benchmark.py +2497 -0
  86. kailash/nodes/rag/__init__.py +284 -0
  87. kailash/nodes/rag/advanced.py +1615 -0
  88. kailash/nodes/rag/agentic.py +773 -0
  89. kailash/nodes/rag/conversational.py +999 -0
  90. kailash/nodes/rag/evaluation.py +875 -0
  91. kailash/nodes/rag/federated.py +1188 -0
  92. kailash/nodes/rag/graph.py +721 -0
  93. kailash/nodes/rag/multimodal.py +671 -0
  94. kailash/nodes/rag/optimized.py +933 -0
  95. kailash/nodes/rag/privacy.py +1059 -0
  96. kailash/nodes/rag/query_processing.py +1335 -0
  97. kailash/nodes/rag/realtime.py +764 -0
  98. kailash/nodes/rag/registry.py +547 -0
  99. kailash/nodes/rag/router.py +837 -0
  100. kailash/nodes/rag/similarity.py +1854 -0
  101. kailash/nodes/rag/strategies.py +566 -0
  102. kailash/nodes/rag/workflows.py +575 -0
  103. kailash/nodes/security/__init__.py +19 -0
  104. kailash/nodes/security/abac_evaluator.py +1411 -0
  105. kailash/nodes/security/audit_log.py +91 -0
  106. kailash/nodes/security/behavior_analysis.py +1893 -0
  107. kailash/nodes/security/credential_manager.py +401 -0
  108. kailash/nodes/security/rotating_credentials.py +760 -0
  109. kailash/nodes/security/security_event.py +132 -0
  110. kailash/nodes/security/threat_detection.py +1103 -0
  111. kailash/nodes/testing/__init__.py +9 -0
  112. kailash/nodes/testing/credential_testing.py +499 -0
  113. kailash/nodes/transform/__init__.py +10 -2
  114. kailash/nodes/transform/chunkers.py +592 -1
  115. kailash/nodes/transform/processors.py +484 -14
  116. kailash/nodes/validation.py +321 -0
  117. kailash/runtime/access_controlled.py +1 -1
  118. kailash/runtime/async_local.py +41 -7
  119. kailash/runtime/docker.py +1 -1
  120. kailash/runtime/local.py +474 -55
  121. kailash/runtime/parallel.py +1 -1
  122. kailash/runtime/parallel_cyclic.py +1 -1
  123. kailash/runtime/testing.py +210 -2
  124. kailash/utils/migrations/__init__.py +25 -0
  125. kailash/utils/migrations/generator.py +433 -0
  126. kailash/utils/migrations/models.py +231 -0
  127. kailash/utils/migrations/runner.py +489 -0
  128. kailash/utils/secure_logging.py +342 -0
  129. kailash/workflow/__init__.py +16 -0
  130. kailash/workflow/cyclic_runner.py +3 -4
  131. kailash/workflow/graph.py +70 -2
  132. kailash/workflow/resilience.py +249 -0
  133. kailash/workflow/templates.py +726 -0
  134. {kailash-0.3.2.dist-info → kailash-0.4.0.dist-info}/METADATA +253 -20
  135. kailash-0.4.0.dist-info/RECORD +223 -0
  136. kailash/api/__init__.py +0 -17
  137. kailash/api/__main__.py +0 -6
  138. kailash/api/studio_secure.py +0 -893
  139. kailash/mcp/__main__.py +0 -13
  140. kailash/mcp/server_new.py +0 -336
  141. kailash/mcp/servers/__init__.py +0 -12
  142. kailash-0.3.2.dist-info/RECORD +0 -136
  143. {kailash-0.3.2.dist-info → kailash-0.4.0.dist-info}/WHEEL +0 -0
  144. {kailash-0.3.2.dist-info → kailash-0.4.0.dist-info}/entry_points.txt +0 -0
  145. {kailash-0.3.2.dist-info → kailash-0.4.0.dist-info}/licenses/LICENSE +0 -0
  146. {kailash-0.3.2.dist-info → kailash-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,999 @@
1
+ """
2
+ Conversational RAG Implementation
3
+
4
+ Implements RAG with conversation context and memory management:
5
+ - Multi-turn conversation support
6
+ - Context window management
7
+ - Conversation memory and summarization
8
+ - Coreference resolution
9
+ - Topic tracking and switching
10
+ - Personalization based on conversation history
11
+
12
+ Based on conversational AI and dialogue systems research.
13
+ """
14
+
15
+ import hashlib
16
+ import json
17
+ import logging
18
+ from collections import defaultdict, deque
19
+ from datetime import datetime, timedelta
20
+ from typing import Any, Deque, Dict, List, Optional, Union
21
+
22
+ # from ..data.cache import CacheNode # TODO: Implement CacheNode
23
+ from ...workflow.builder import WorkflowBuilder
24
+ from ..ai.llm_agent import LLMAgentNode
25
+ from ..base import Node, NodeParameter, register_node
26
+ from ..code.python import PythonCodeNode
27
+ from ..logic.workflow import WorkflowNode
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ @register_node()
33
+ class ConversationalRAGNode(WorkflowNode):
34
+ """
35
+ Conversational RAG with Context Management
36
+
37
+ Implements RAG that maintains conversation context across multiple turns,
38
+ enabling coherent multi-turn interactions with memory of previous exchanges.
39
+
40
+ When to use:
41
+ - Best for: Chatbots, virtual assistants, interactive help systems
42
+ - Not ideal for: Single-turn queries, stateless interactions
43
+ - Performance: 200-500ms per turn (with context loading)
44
+ - Context quality: Maintains coherence across 10-20 turns
45
+
46
+ Key features:
47
+ - Conversation memory with sliding window
48
+ - Automatic context summarization
49
+ - Coreference resolution (it, they, this, etc.)
50
+ - Topic tracking and smooth transitions
51
+ - Personalization based on user history
52
+ - Session management and persistence
53
+
54
+ Example:
55
+ conv_rag = ConversationalRAGNode(
56
+ max_context_turns=10,
57
+ enable_summarization=True,
58
+ personalization_enabled=True
59
+ )
60
+
61
+ # Initialize conversation
62
+ session = await conv_rag.create_session(user_id="user123")
63
+
64
+ # First turn
65
+ response1 = await conv_rag.run(
66
+ query="What is transformer architecture?",
67
+ session_id=session.id
68
+ )
69
+
70
+ # Follow-up with context
71
+ response2 = await conv_rag.run(
72
+ query="How does its attention mechanism work?", # "its" refers to transformer
73
+ session_id=session.id
74
+ )
75
+
76
+ # Topic switch with smooth transition
77
+ response3 = await conv_rag.run(
78
+ query="Now tell me about BERT",
79
+ session_id=session.id
80
+ )
81
+
82
+ Parameters:
83
+ max_context_turns: Maximum conversation turns to maintain
84
+ enable_summarization: Summarize old context when window exceeds
85
+ personalization_enabled: Use user history for personalization
86
+ coreference_resolution: Resolve pronouns and references
87
+ topic_tracking: Track and manage topic changes
88
+
89
+ Returns:
90
+ response: Contextual response to current query
91
+ session_state: Current conversation state
92
+ topic_info: Current topic and transitions
93
+ conversation_metrics: Engagement and coherence metrics
94
+ """
95
+
96
+ def __init__(
97
+ self,
98
+ name: str = "conversational_rag",
99
+ max_context_turns: int = 10,
100
+ enable_summarization: bool = True,
101
+ personalization_enabled: bool = True,
102
+ coreference_resolution: bool = True,
103
+ topic_tracking: bool = True,
104
+ ):
105
+ self.max_context_turns = max_context_turns
106
+ self.enable_summarization = enable_summarization
107
+ self.personalization_enabled = personalization_enabled
108
+ self.coreference_resolution = coreference_resolution
109
+ self.topic_tracking = topic_tracking
110
+ # In-memory session storage (use persistent storage in production)
111
+ self.sessions = {}
112
+ super().__init__(name, self._create_workflow())
113
+
114
+ def _create_workflow(self) -> WorkflowNode:
115
+ """Create conversational RAG workflow"""
116
+ builder = WorkflowBuilder()
117
+
118
+ # Session context loader
119
+ context_loader_id = builder.add_node(
120
+ "PythonCodeNode",
121
+ node_id="context_loader",
122
+ config={
123
+ "code": f"""
124
+ import json
125
+ from collections import deque
126
+
127
+ def load_conversation_context(session_id, sessions_store):
128
+ '''Load conversation context for session'''
129
+
130
+ if session_id not in sessions_store:
131
+ # Create new session
132
+ session = {{
133
+ "id": session_id,
134
+ "created_at": datetime.now().isoformat(),
135
+ "turns": [],
136
+ "summary": "",
137
+ "current_topic": None,
138
+ "user_preferences": {{}},
139
+ "metrics": {{
140
+ "turn_count": 0,
141
+ "topics_discussed": [],
142
+ "avg_response_length": 0
143
+ }}
144
+ }}
145
+ sessions_store[session_id] = session
146
+
147
+ session = sessions_store[session_id]
148
+
149
+ # Get recent context (sliding window)
150
+ recent_turns = session["turns"][-{self.max_context_turns}:]
151
+
152
+ # Format context for processing
153
+ context_text = ""
154
+ for turn in recent_turns:
155
+ context_text += f"User: {{turn['query']}}\\n"
156
+ context_text += f"Assistant: {{turn['response']}}\\n\\n"
157
+
158
+ result = {{
159
+ "session_context": {{
160
+ "session_id": session_id,
161
+ "recent_turns": recent_turns,
162
+ "context_text": context_text,
163
+ "summary": session.get("summary", ""),
164
+ "current_topic": session.get("current_topic"),
165
+ "turn_count": len(session["turns"]),
166
+ "user_preferences": session.get("user_preferences", {{}})
167
+ }}
168
+ }}
169
+ """
170
+ },
171
+ )
172
+
173
+ # Coreference resolver
174
+ if self.coreference_resolution:
175
+ coreference_resolver_id = builder.add_node(
176
+ "LLMAgentNode",
177
+ node_id="coreference_resolver",
178
+ config={
179
+ "system_prompt": """Resolve coreferences in the user query based on conversation context.
180
+
181
+ Replace pronouns (it, they, this, that, these, those) and other references with their specific antecedents from the conversation history.
182
+
183
+ Given:
184
+ - Current query
185
+ - Recent conversation context
186
+
187
+ Return JSON:
188
+ {
189
+ "resolved_query": "query with coreferences resolved",
190
+ "replacements": [
191
+ {"original": "it", "resolved": "transformer architecture"},
192
+ {"original": "they", "resolved": "attention heads"}
193
+ ],
194
+ "confidence": 0.0-1.0
195
+ }
196
+
197
+ If no coreferences found, return the original query.""",
198
+ "model": "gpt-4",
199
+ },
200
+ )
201
+
202
+ # Topic tracker
203
+ if self.topic_tracking:
204
+ topic_tracker_id = builder.add_node(
205
+ "PythonCodeNode",
206
+ node_id="topic_tracker",
207
+ config={
208
+ "code": """
209
+ def track_conversation_topic(current_query, session_context):
210
+ '''Track and identify topic changes in conversation'''
211
+
212
+ current_topic = session_context.get("current_topic")
213
+ recent_turns = session_context.get("recent_turns", [])
214
+
215
+ # Extract key terms from current query
216
+ query_terms = set(current_query.lower().split())
217
+
218
+ # Define topic keywords (simplified - use NER/classification in production)
219
+ topics = {
220
+ "transformers": ["transformer", "attention", "self-attention", "encoder", "decoder"],
221
+ "bert": ["bert", "bidirectional", "masked", "mlm", "pretraining"],
222
+ "gpt": ["gpt", "generative", "autoregressive", "language model"],
223
+ "training": ["training", "optimization", "learning rate", "batch", "epoch"],
224
+ "architecture": ["architecture", "layer", "network", "model", "structure"]
225
+ }
226
+
227
+ # Identify current query topic
228
+ query_topics = []
229
+ for topic, keywords in topics.items():
230
+ if any(keyword in query_terms for keyword in keywords):
231
+ query_topics.append(topic)
232
+
233
+ # Determine if topic changed
234
+ topic_changed = False
235
+ transition_type = "continuation"
236
+
237
+ if not current_topic and query_topics:
238
+ # First topic
239
+ new_topic = query_topics[0]
240
+ transition_type = "new_conversation"
241
+ elif query_topics and query_topics[0] != current_topic:
242
+ # Topic switch
243
+ new_topic = query_topics[0]
244
+ topic_changed = True
245
+ transition_type = "topic_switch"
246
+ elif query_topics:
247
+ # Same topic
248
+ new_topic = query_topics[0]
249
+ transition_type = "deep_dive"
250
+ else:
251
+ # No clear topic
252
+ new_topic = current_topic or "general"
253
+ transition_type = "clarification"
254
+
255
+ # Check for explicit transitions
256
+ transition_phrases = {
257
+ "now tell me about": "explicit_switch",
258
+ "switching to": "explicit_switch",
259
+ "different topic": "explicit_switch",
260
+ "another question": "soft_switch",
261
+ "related to this": "expansion",
262
+ "furthermore": "continuation",
263
+ "however": "contrast"
264
+ }
265
+
266
+ for phrase, trans_type in transition_phrases.items():
267
+ if phrase in current_query.lower():
268
+ transition_type = trans_type
269
+ break
270
+
271
+ result = {
272
+ "topic_analysis": {
273
+ "current_topic": new_topic,
274
+ "previous_topic": current_topic,
275
+ "topic_changed": topic_changed,
276
+ "transition_type": transition_type,
277
+ "identified_topics": query_topics,
278
+ "confidence": 0.8 if query_topics else 0.3
279
+ }
280
+ }
281
+ """
282
+ },
283
+ )
284
+
285
+ # Context-aware retriever
286
+ context_retriever_id = builder.add_node(
287
+ "PythonCodeNode",
288
+ node_id="context_retriever",
289
+ config={
290
+ "code": """
291
+ def retrieve_with_context(query, documents, session_context, topic_info=None):
292
+ '''Retrieve documents considering conversation context'''
293
+
294
+ # Combine current query with context
295
+ context_summary = session_context.get("summary", "")
296
+ recent_context = session_context.get("context_text", "")
297
+
298
+ # Build enhanced query
299
+ enhanced_query = query
300
+
301
+ # Add topic context if available
302
+ if topic_info and topic_info.get("topic_analysis"):
303
+ current_topic = topic_info["topic_analysis"].get("current_topic")
304
+ if current_topic:
305
+ enhanced_query = f"{current_topic} context: {query}"
306
+
307
+ # Add conversation context keywords
308
+ if recent_context:
309
+ # Extract key terms from recent context
310
+ context_words = set(recent_context.lower().split())
311
+ important_words = [w for w in context_words if len(w) > 4][:5]
312
+ enhanced_query += " " + " ".join(important_words)
313
+
314
+ # Score documents with context awareness
315
+ scored_docs = []
316
+ query_words = set(enhanced_query.lower().split())
317
+
318
+ for doc in documents:
319
+ content = doc.get("content", "").lower()
320
+ doc_words = set(content.split())
321
+
322
+ # Base relevance score
323
+ if query_words:
324
+ relevance = len(query_words & doc_words) / len(query_words)
325
+ else:
326
+ relevance = 0
327
+
328
+ # Boost score for topic-relevant documents
329
+ if topic_info and current_topic in content:
330
+ relevance *= 1.3
331
+
332
+ # Boost for documents related to recent context
333
+ if recent_context and any(turn.get("response", "") in content for turn in session_context.get("recent_turns", [])):
334
+ relevance *= 1.2
335
+
336
+ scored_docs.append({
337
+ "document": doc,
338
+ "score": min(1.0, relevance),
339
+ "context_boosted": relevance > len(query_words & doc_words) / len(query_words) if query_words else False
340
+ })
341
+
342
+ # Sort by score
343
+ scored_docs.sort(key=lambda x: x["score"], reverse=True)
344
+
345
+ result = {
346
+ "contextual_retrieval": {
347
+ "documents": [d["document"] for d in scored_docs[:10]],
348
+ "scores": [d["score"] for d in scored_docs[:10]],
349
+ "enhanced_query": enhanced_query,
350
+ "context_influence": sum(1 for d in scored_docs[:10] if d["context_boosted"]) / min(10, len(scored_docs))
351
+ }
352
+ }
353
+ """
354
+ },
355
+ )
356
+
357
+ # Response generator with context
358
+ response_generator_id = builder.add_node(
359
+ "LLMAgentNode",
360
+ node_id="response_generator",
361
+ config={
362
+ "system_prompt": f"""Generate a contextual response considering the conversation history.
363
+
364
+ Guidelines:
365
+ 1. Reference previous conversation naturally
366
+ 2. Use appropriate pronouns when context is clear
367
+ 3. Handle topic transitions smoothly
368
+ 4. Maintain consistent persona and tone
369
+ 5. Build on previous explanations
370
+ 6. Acknowledge when changing topics
371
+
372
+ For topic switches, use transitional phrases like:
373
+ - "Moving on to [new topic]..."
374
+ - "Regarding your question about [new topic]..."
375
+ - "That's a different but interesting topic..."
376
+
377
+ For continuations, reference previous context:
378
+ - "As I mentioned earlier..."
379
+ - "Building on what we discussed..."
380
+ - "To elaborate further..."
381
+
382
+ {"Personalize based on user preferences when available." if self.personalization_enabled else ""}
383
+
384
+ Keep responses conversational and engaging.""",
385
+ "model": "gpt-4",
386
+ },
387
+ )
388
+
389
+ # Context summarizer (for long conversations)
390
+ if self.enable_summarization:
391
+ summarizer_id = builder.add_node(
392
+ "LLMAgentNode",
393
+ node_id="context_summarizer",
394
+ config={
395
+ "system_prompt": """Summarize the conversation history concisely.
396
+
397
+ Focus on:
398
+ 1. Main topics discussed
399
+ 2. Key information provided
400
+ 3. User's apparent interests
401
+ 4. Any preferences expressed
402
+ 5. Important clarifications made
403
+
404
+ Keep the summary under 100 words.
405
+ This will be used to maintain context in future turns.""",
406
+ "model": "gpt-4",
407
+ },
408
+ )
409
+
410
+ # Session updater
411
+ session_updater_id = builder.add_node(
412
+ "PythonCodeNode",
413
+ node_id="session_updater",
414
+ config={
415
+ "code": f"""
416
+ def update_session(session_id, sessions_store, query, response, topic_info, summary=None):
417
+ '''Update session with new turn'''
418
+
419
+ session = sessions_store[session_id]
420
+
421
+ # Add new turn
422
+ new_turn = {{
423
+ "turn_number": len(session["turns"]) + 1,
424
+ "timestamp": datetime.now().isoformat(),
425
+ "query": query,
426
+ "response": response.get("response", ""),
427
+ "topic": topic_info.get("topic_analysis", {{}}).get("current_topic")
428
+ }}
429
+
430
+ session["turns"].append(new_turn)
431
+
432
+ # Update summary if provided
433
+ if summary and summary.get("response"):
434
+ session["summary"] = summary["response"]
435
+
436
+ # Update current topic
437
+ if topic_info and topic_info.get("topic_analysis"):
438
+ session["current_topic"] = topic_info["topic_analysis"]["current_topic"]
439
+
440
+ # Track topics discussed
441
+ topic = topic_info["topic_analysis"]["current_topic"]
442
+ if topic and topic not in session["metrics"]["topics_discussed"]:
443
+ session["metrics"]["topics_discussed"].append(topic)
444
+
445
+ # Update metrics
446
+ session["metrics"]["turn_count"] = len(session["turns"])
447
+ total_response_length = sum(len(turn.get("response", "")) for turn in session["turns"])
448
+ session["metrics"]["avg_response_length"] = total_response_length / len(session["turns"]) if session["turns"] else 0
449
+
450
+ # Trim old turns if exceeds max + buffer for summarization
451
+ if len(session["turns"]) > {self.max_context_turns} * 1.5:
452
+ # Keep recent turns and rely on summary for older context
453
+ session["turns"] = session["turns"][-{self.max_context_turns}:]
454
+
455
+ # Calculate conversation health metrics
456
+ conversation_metrics = {{
457
+ "coherence_score": 0.85, # Would calculate based on topic consistency
458
+ "engagement_level": min(1.0, len(session["turns"]) / 10), # Higher with more turns
459
+ "topic_diversity": len(session["metrics"]["topics_discussed"]) / max(1, session["metrics"]["turn_count"]),
460
+ "avg_turn_length": session["metrics"]["avg_response_length"]
461
+ }}
462
+
463
+ result = {{
464
+ "session_update": {{
465
+ "session_id": session_id,
466
+ "turn_added": new_turn["turn_number"],
467
+ "total_turns": len(session["turns"]),
468
+ "current_topic": session["current_topic"],
469
+ "conversation_metrics": conversation_metrics
470
+ }}
471
+ }}
472
+ """
473
+ },
474
+ )
475
+
476
+ # Result formatter
477
+ result_formatter_id = builder.add_node(
478
+ "PythonCodeNode",
479
+ node_id="result_formatter",
480
+ config={
481
+ "code": """
482
+ # Format conversational response
483
+ response = response.get("response", "")
484
+ session_update = session_update.get("session_update", {})
485
+ topic_info = topic_info.get("topic_analysis", {}) if topic_info else {}
486
+ contextual_retrieval = contextual_retrieval.get("contextual_retrieval", {})
487
+
488
+ # Build session state summary
489
+ session_state = {
490
+ "session_id": session_update.get("session_id"),
491
+ "turn_number": session_update.get("turn_added"),
492
+ "total_turns": session_update.get("total_turns"),
493
+ "context_window": {self.max_context_turns},
494
+ "summary_available": {self.enable_summarization}
495
+ }
496
+
497
+ # Topic information
498
+ topic_summary = {
499
+ "current_topic": topic_info.get("current_topic"),
500
+ "topic_changed": topic_info.get("topic_changed", False),
501
+ "transition_type": topic_info.get("transition_type", "continuation"),
502
+ "topics_discussed": session_update.get("conversation_metrics", {}).get("topic_diversity", 0)
503
+ }
504
+
505
+ # Conversation metrics
506
+ metrics = session_update.get("conversation_metrics", {})
507
+ metrics["retrieval_context_influence"] = contextual_retrieval.get("context_influence", 0)
508
+
509
+ result = {
510
+ "conversational_response": {
511
+ "response": response,
512
+ "session_state": session_state,
513
+ "topic_info": topic_summary,
514
+ "conversation_metrics": metrics,
515
+ "metadata": {
516
+ "coreference_resolution": {self.coreference_resolution},
517
+ "personalization": {self.personalization_enabled},
518
+ "context_enhanced_retrieval": True
519
+ }
520
+ }
521
+ }
522
+ """
523
+ },
524
+ )
525
+
526
+ # Connect workflow
527
+ builder.add_connection(
528
+ context_loader_id,
529
+ "session_context",
530
+ context_retriever_id,
531
+ "session_context",
532
+ )
533
+
534
+ if self.coreference_resolution:
535
+ builder.add_connection(
536
+ context_loader_id, "session_context", coreference_resolver_id, "context"
537
+ )
538
+ builder.add_connection(
539
+ coreference_resolver_id, "resolved_query", context_retriever_id, "query"
540
+ )
541
+
542
+ if self.topic_tracking:
543
+ builder.add_connection(
544
+ context_loader_id,
545
+ "session_context",
546
+ topic_tracker_id,
547
+ "session_context",
548
+ )
549
+ builder.add_connection(
550
+ topic_tracker_id, "topic_analysis", context_retriever_id, "topic_info"
551
+ )
552
+
553
+ builder.add_connection(
554
+ context_retriever_id,
555
+ "contextual_retrieval",
556
+ response_generator_id,
557
+ "retrieval_results",
558
+ )
559
+ builder.add_connection(
560
+ context_loader_id,
561
+ "session_context",
562
+ response_generator_id,
563
+ "conversation_context",
564
+ )
565
+
566
+ if self.enable_summarization:
567
+ builder.add_connection(
568
+ context_loader_id,
569
+ "session_context",
570
+ summarizer_id,
571
+ "conversation_history",
572
+ )
573
+ builder.add_connection(
574
+ summarizer_id, "response", session_updater_id, "summary"
575
+ )
576
+
577
+ builder.add_connection(
578
+ response_generator_id, "response", session_updater_id, "response"
579
+ )
580
+ if self.topic_tracking:
581
+ builder.add_connection(
582
+ topic_tracker_id, "topic_analysis", session_updater_id, "topic_info"
583
+ )
584
+
585
+ builder.add_connection(
586
+ session_updater_id, "session_update", result_formatter_id, "session_update"
587
+ )
588
+ builder.add_connection(
589
+ response_generator_id, "response", result_formatter_id, "response"
590
+ )
591
+ if self.topic_tracking:
592
+ builder.add_connection(
593
+ topic_tracker_id, "topic_analysis", result_formatter_id, "topic_info"
594
+ )
595
+ builder.add_connection(
596
+ context_retriever_id,
597
+ "contextual_retrieval",
598
+ result_formatter_id,
599
+ "contextual_retrieval",
600
+ )
601
+
602
+ return builder.build(name="conversational_rag_workflow")
603
+
604
+ def create_session(self, user_id: str = None) -> Dict[str, Any]:
605
+ """Create a new conversation session"""
606
+ session_id = hashlib.sha256(
607
+ f"{user_id or 'anonymous'}_{datetime.now().isoformat()}".encode()
608
+ ).hexdigest()[:16]
609
+
610
+ session = {
611
+ "id": session_id,
612
+ "user_id": user_id,
613
+ "created_at": datetime.now().isoformat(),
614
+ "turns": [],
615
+ "summary": "",
616
+ "current_topic": None,
617
+ "user_preferences": {},
618
+ "metrics": {
619
+ "turn_count": 0,
620
+ "topics_discussed": [],
621
+ "avg_response_length": 0,
622
+ },
623
+ }
624
+
625
+ self.sessions[session_id] = session
626
+
627
+ return {
628
+ "session_id": session_id,
629
+ "created": True,
630
+ "expires_in": 3600, # 1 hour default expiry
631
+ }
632
+
633
+
634
+ @register_node()
635
+ class ConversationMemoryNode(Node):
636
+ """
637
+ Long-term Conversation Memory Management
638
+
639
+ Manages persistent conversation memory across sessions.
640
+
641
+ When to use:
642
+ - Best for: Virtual assistants, customer support, personalized systems
643
+ - Memory types: Episodic, semantic, user preferences
644
+ - Retention: Configurable from hours to permanent
645
+
646
+ Example:
647
+ memory = ConversationMemoryNode(
648
+ memory_types=["episodic", "semantic", "preferences"],
649
+ retention_policy="adaptive"
650
+ )
651
+
652
+ # Store conversation insights
653
+ await memory.store(
654
+ user_id="user123",
655
+ conversation_id="conv456",
656
+ insights={
657
+ "topics": ["machine learning", "python"],
658
+ "preferences": {"explanation_style": "detailed"},
659
+ "key_facts": ["user is a beginner", "interested in NLP"]
660
+ }
661
+ )
662
+
663
+ # Retrieve relevant memories
664
+ memories = await memory.retrieve(
665
+ user_id="user123",
666
+ context="python programming question"
667
+ )
668
+
669
+ Parameters:
670
+ memory_types: Types of memory to maintain
671
+ retention_policy: How long to retain memories
672
+ max_memories_per_user: Memory limit per user
673
+
674
+ Returns:
675
+ relevant_memories: Memories relevant to current context
676
+ memory_summary: Aggregated user knowledge
677
+ personalization_hints: Suggestions for personalization
678
+ """
679
+
680
+ def __init__(
681
+ self,
682
+ name: str = "conversation_memory",
683
+ memory_types: List[str] = None,
684
+ retention_policy: str = "adaptive",
685
+ max_memories_per_user: int = 1000,
686
+ ):
687
+ self.memory_types = memory_types or ["episodic", "semantic", "preferences"]
688
+ self.retention_policy = retention_policy
689
+ self.max_memories_per_user = max_memories_per_user
690
+ # In-memory storage (use persistent DB in production)
691
+ self.memory_store = defaultdict(
692
+ lambda: {
693
+ "episodic": deque(maxlen=max_memories_per_user),
694
+ "semantic": {},
695
+ "preferences": {},
696
+ }
697
+ )
698
+ super().__init__(name)
699
+
700
+ def get_parameters(self) -> Dict[str, NodeParameter]:
701
+ return {
702
+ "operation": NodeParameter(
703
+ name="operation",
704
+ type=str,
705
+ required=True,
706
+ description="Operation: store, retrieve, update, forget",
707
+ ),
708
+ "user_id": NodeParameter(
709
+ name="user_id", type=str, required=True, description="User identifier"
710
+ ),
711
+ "data": NodeParameter(
712
+ name="data",
713
+ type=dict,
714
+ required=False,
715
+ description="Data to store or update",
716
+ ),
717
+ "context": NodeParameter(
718
+ name="context",
719
+ type=str,
720
+ required=False,
721
+ description="Context for retrieval",
722
+ ),
723
+ }
724
+
725
+ def run(self, **kwargs) -> Dict[str, Any]:
726
+ """Execute memory operation"""
727
+ operation = kwargs.get("operation", "retrieve")
728
+ user_id = kwargs.get("user_id", "")
729
+
730
+ if operation == "store":
731
+ return self._store_memory(user_id, kwargs.get("data", {}))
732
+ elif operation == "retrieve":
733
+ return self._retrieve_memories(user_id, kwargs.get("context", ""))
734
+ elif operation == "update":
735
+ return self._update_memory(user_id, kwargs.get("data", {}))
736
+ elif operation == "forget":
737
+ return self._forget_memories(user_id, kwargs.get("data", {}))
738
+ else:
739
+ return {"error": f"Unknown operation: {operation}"}
740
+
741
+ def _store_memory(self, user_id: str, data: Dict) -> Dict[str, Any]:
742
+ """Store new memories"""
743
+ user_memory = self.memory_store[user_id]
744
+ stored = defaultdict(int)
745
+
746
+ # Store episodic memory (specific interactions)
747
+ if "episodic" in self.memory_types and "conversation" in data:
748
+ episode = {
749
+ "timestamp": datetime.now().isoformat(),
750
+ "conversation_id": data.get("conversation_id"),
751
+ "summary": data["conversation"].get("summary", ""),
752
+ "topics": data["conversation"].get("topics", []),
753
+ "sentiment": data["conversation"].get("sentiment", "neutral"),
754
+ "importance": data["conversation"].get("importance", 0.5),
755
+ }
756
+ user_memory["episodic"].append(episode)
757
+ stored["episodic"] += 1
758
+
759
+ # Store semantic memory (facts and knowledge)
760
+ if "semantic" in self.memory_types and "facts" in data:
761
+ for fact in data["facts"]:
762
+ fact_key = fact.get("key", "")
763
+ if fact_key:
764
+ user_memory["semantic"][fact_key] = {
765
+ "value": fact.get("value"),
766
+ "confidence": fact.get("confidence", 0.8),
767
+ "source": fact.get("source", "conversation"),
768
+ "timestamp": datetime.now().isoformat(),
769
+ }
770
+ stored["semantic"] += 1
771
+
772
+ # Store preferences
773
+ if "preferences" in self.memory_types and "preferences" in data:
774
+ user_memory["preferences"].update(data["preferences"])
775
+ stored["preferences"] += len(data["preferences"])
776
+
777
+ # Apply retention policy
778
+ self._apply_retention_policy(user_id)
779
+
780
+ return {
781
+ "stored": dict(stored),
782
+ "total_memories": {
783
+ "episodic": len(user_memory["episodic"]),
784
+ "semantic": len(user_memory["semantic"]),
785
+ "preferences": len(user_memory["preferences"]),
786
+ },
787
+ "storage_status": "success",
788
+ }
789
+
790
+ def _retrieve_memories(self, user_id: str, context: str) -> Dict[str, Any]:
791
+ """Retrieve relevant memories"""
792
+ if user_id not in self.memory_store:
793
+ return {
794
+ "relevant_memories": [],
795
+ "memory_summary": "No memories found",
796
+ "personalization_hints": {},
797
+ }
798
+
799
+ user_memory = self.memory_store[user_id]
800
+ relevant_memories = []
801
+
802
+ # Search episodic memories
803
+ context_words = set(context.lower().split())
804
+ for episode in user_memory["episodic"]:
805
+ # Check topic overlap
806
+ episode_topics = set(topic.lower() for topic in episode.get("topics", []))
807
+ if context_words & episode_topics:
808
+ relevant_memories.append(
809
+ {
810
+ "type": "episodic",
811
+ "content": episode,
812
+ "relevance": (
813
+ len(context_words & episode_topics) / len(context_words)
814
+ if context_words
815
+ else 0
816
+ ),
817
+ }
818
+ )
819
+
820
+ # Search semantic memories
821
+ for key, fact in user_memory["semantic"].items():
822
+ if any(word in key.lower() for word in context_words):
823
+ relevant_memories.append(
824
+ {
825
+ "type": "semantic",
826
+ "content": {"key": key, **fact},
827
+ "relevance": fact.get("confidence", 0.5),
828
+ }
829
+ )
830
+
831
+ # Sort by relevance
832
+ relevant_memories.sort(key=lambda x: x["relevance"], reverse=True)
833
+
834
+ # Generate memory summary
835
+ memory_summary = self._generate_memory_summary(user_memory)
836
+
837
+ # Extract personalization hints
838
+ personalization_hints = {
839
+ "preferences": dict(user_memory["preferences"]),
840
+ "frequent_topics": self._extract_frequent_topics(user_memory["episodic"]),
841
+ "interaction_style": self._infer_interaction_style(user_memory["episodic"]),
842
+ }
843
+
844
+ return {
845
+ "relevant_memories": relevant_memories[:10],
846
+ "memory_summary": memory_summary,
847
+ "personalization_hints": personalization_hints,
848
+ }
849
+
850
+ def _update_memory(self, user_id: str, data: Dict) -> Dict[str, Any]:
851
+ """Update existing memories"""
852
+ if user_id not in self.memory_store:
853
+ return {"error": "No memories found for user"}
854
+
855
+ user_memory = self.memory_store[user_id]
856
+ updated = defaultdict(int)
857
+
858
+ # Update semantic facts
859
+ if "facts_update" in data:
860
+ for fact_update in data["facts_update"]:
861
+ key = fact_update.get("key")
862
+ if key in user_memory["semantic"]:
863
+ user_memory["semantic"][key].update(fact_update.get("updates", {}))
864
+ user_memory["semantic"][key][
865
+ "timestamp"
866
+ ] = datetime.now().isoformat()
867
+ updated["semantic"] += 1
868
+
869
+ # Update preferences
870
+ if "preferences_update" in data:
871
+ user_memory["preferences"].update(data["preferences_update"])
872
+ updated["preferences"] += len(data["preferences_update"])
873
+
874
+ return {"updated": dict(updated), "update_status": "success"}
875
+
876
+ def _forget_memories(self, user_id: str, data: Dict) -> Dict[str, Any]:
877
+ """Forget specific memories (GDPR compliance)"""
878
+ if user_id not in self.memory_store:
879
+ return {"error": "No memories found for user"}
880
+
881
+ forgotten = defaultdict(int)
882
+
883
+ if data.get("forget_all"):
884
+ # Complete memory wipe
885
+ del self.memory_store[user_id]
886
+ return {"forgotten": "all", "status": "complete"}
887
+
888
+ user_memory = self.memory_store[user_id]
889
+
890
+ # Forget specific types
891
+ if "forget_types" in data:
892
+ for memory_type in data["forget_types"]:
893
+ if memory_type == "episodic":
894
+ forgotten["episodic"] = len(user_memory["episodic"])
895
+ user_memory["episodic"].clear()
896
+ elif memory_type == "semantic":
897
+ forgotten["semantic"] = len(user_memory["semantic"])
898
+ user_memory["semantic"].clear()
899
+ elif memory_type == "preferences":
900
+ forgotten["preferences"] = len(user_memory["preferences"])
901
+ user_memory["preferences"].clear()
902
+
903
+ # Forget specific items
904
+ if "forget_items" in data:
905
+ for item in data["forget_items"]:
906
+ if (
907
+ item["type"] == "semantic"
908
+ and item["key"] in user_memory["semantic"]
909
+ ):
910
+ del user_memory["semantic"][item["key"]]
911
+ forgotten["semantic"] += 1
912
+
913
+ return {"forgotten": dict(forgotten), "forget_status": "success"}
914
+
915
+ def _apply_retention_policy(self, user_id: str):
916
+ """Apply retention policy to memories"""
917
+ if self.retention_policy == "adaptive":
918
+ # Keep important and recent memories
919
+ user_memory = self.memory_store[user_id]
920
+
921
+ # Remove old low-importance episodic memories
922
+ if len(user_memory["episodic"]) > self.max_memories_per_user * 0.8:
923
+ # Keep high importance memories
924
+ important_episodes = [
925
+ ep
926
+ for ep in user_memory["episodic"]
927
+ if ep.get("importance", 0.5) > 0.7
928
+ ]
929
+ recent_episodes = list(user_memory["episodic"])[-100:]
930
+
931
+ # Combine and deduplicate
932
+ kept_episodes = []
933
+ seen = set()
934
+ for ep in important_episodes + recent_episodes:
935
+ ep_key = f"{ep.get('conversation_id')}_{ep.get('timestamp')}"
936
+ if ep_key not in seen:
937
+ kept_episodes.append(ep)
938
+ seen.add(ep_key)
939
+
940
+ user_memory["episodic"] = deque(
941
+ kept_episodes, maxlen=self.max_memories_per_user
942
+ )
943
+
944
+ def _generate_memory_summary(self, user_memory: Dict) -> str:
945
+ """Generate a summary of user's memories"""
946
+ num_episodes = len(user_memory["episodic"])
947
+ num_facts = len(user_memory["semantic"])
948
+ num_preferences = len(user_memory["preferences"])
949
+
950
+ topics = []
951
+ for episode in user_memory["episodic"]:
952
+ topics.extend(episode.get("topics", []))
953
+
954
+ unique_topics = list(set(topics))[:5]
955
+
956
+ summary = f"User has {num_episodes} conversation memories covering topics like {', '.join(unique_topics)}. "
957
+ summary += (
958
+ f"Knows {num_facts} facts about the user and {num_preferences} preferences."
959
+ )
960
+
961
+ return summary
962
+
963
+ def _extract_frequent_topics(self, episodes: Deque) -> List[str]:
964
+ """Extract frequently discussed topics"""
965
+ topic_counts = defaultdict(int)
966
+
967
+ for episode in episodes:
968
+ for topic in episode.get("topics", []):
969
+ topic_counts[topic] += 1
970
+
971
+ # Sort by frequency
972
+ sorted_topics = sorted(topic_counts.items(), key=lambda x: x[1], reverse=True)
973
+ return [topic for topic, _ in sorted_topics[:10]]
974
+
975
+ def _infer_interaction_style(self, episodes: Deque) -> Dict[str, Any]:
976
+ """Infer user's preferred interaction style"""
977
+ if not episodes:
978
+ return {"style": "unknown", "confidence": 0}
979
+
980
+ # Analyze recent interactions
981
+ recent_episodes = list(episodes)[-20:]
982
+
983
+ # Simple heuristics (would use ML in production)
984
+ avg_importance = sum(ep.get("importance", 0.5) for ep in recent_episodes) / len(
985
+ recent_episodes
986
+ )
987
+
988
+ if avg_importance > 0.7:
989
+ style = "detailed"
990
+ elif avg_importance < 0.3:
991
+ style = "concise"
992
+ else:
993
+ style = "balanced"
994
+
995
+ return {"style": style, "confidence": 0.8, "avg_importance": avg_importance}
996
+
997
+
998
+ # Export all conversational nodes
999
+ __all__ = ["ConversationalRAGNode", "ConversationMemoryNode"]