kssrag 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kssrag/core/agents.py +375 -45
- kssrag/server.py +4 -10
- {kssrag-0.2.1.dist-info → kssrag-0.2.3.dist-info}/METADATA +2 -2
- {kssrag-0.2.1.dist-info → kssrag-0.2.3.dist-info}/RECORD +7 -7
- {kssrag-0.2.1.dist-info → kssrag-0.2.3.dist-info}/WHEEL +1 -1
- {kssrag-0.2.1.dist-info → kssrag-0.2.3.dist-info}/entry_points.txt +0 -0
- {kssrag-0.2.1.dist-info → kssrag-0.2.3.dist-info}/top_level.txt +0 -0
kssrag/core/agents.py
CHANGED
|
@@ -2,7 +2,7 @@ from typing import Generator, List, Dict, Any, Optional
|
|
|
2
2
|
from ..utils.helpers import logger
|
|
3
3
|
|
|
4
4
|
class RAGAgent:
|
|
5
|
-
"""RAG agent implementation"""
|
|
5
|
+
"""RAG agent implementation with discrete conversation summaries"""
|
|
6
6
|
|
|
7
7
|
def __init__(self, retriever, llm, system_prompt: Optional[str] = None,
|
|
8
8
|
conversation_history: Optional[List[Dict[str, str]]] = None):
|
|
@@ -11,23 +11,71 @@ class RAGAgent:
|
|
|
11
11
|
self.conversation = conversation_history or []
|
|
12
12
|
self.system_prompt = system_prompt or """You are a helpful AI assistant. Use the following context to answer the user's question.
|
|
13
13
|
If you don't know the answer based on the context, say so."""
|
|
14
|
+
self.conversation_summaries = [] # Discrete summaries instead of single blob
|
|
15
|
+
|
|
16
|
+
logger.info(f"RAGAgent initialized with {len(conversation_history or [])} history messages")
|
|
17
|
+
logger.info(f"System prompt: {self.system_prompt[:100]}..." if self.system_prompt else "No system prompt")
|
|
18
|
+
|
|
14
19
|
|
|
15
20
|
# Initialize with system message if not already present
|
|
16
21
|
if not any(msg.get("role") == "system" for msg in self.conversation):
|
|
17
22
|
self.add_message("system", self.system_prompt)
|
|
18
23
|
|
|
24
|
+
# def add_message(self, role: str, content: str):
|
|
25
|
+
# """Add a message to the conversation history"""
|
|
26
|
+
# self.conversation.append({"role": role, "content": content})
|
|
27
|
+
|
|
28
|
+
# # Keep conversation manageable (last 15 messages)
|
|
29
|
+
# if len(self.conversation) > 15:
|
|
30
|
+
# self._smart_trim_conversation()
|
|
31
|
+
|
|
19
32
|
def add_message(self, role: str, content: str):
|
|
20
|
-
"""Add a message to the conversation history"""
|
|
33
|
+
"""Add a message to the conversation history (with simple dedupe for assistant)."""
|
|
34
|
+
content = content.strip()
|
|
35
|
+
# Prevent adding empty messages
|
|
36
|
+
if not content:
|
|
37
|
+
logger.info("Attempted to add empty message – ignored.")
|
|
38
|
+
return
|
|
39
|
+
|
|
40
|
+
# If last message is identical assistant content, skip to avoid duplicates
|
|
41
|
+
if self.conversation:
|
|
42
|
+
last = self.conversation[-1]
|
|
43
|
+
if role == "assistant" and last.get("role") == "assistant":
|
|
44
|
+
if last.get("content", "").strip() == content:
|
|
45
|
+
logger.info("Duplicate assistant message suppressed.")
|
|
46
|
+
return
|
|
47
|
+
|
|
21
48
|
self.conversation.append({"role": role, "content": content})
|
|
49
|
+
|
|
50
|
+
# Keep conversation manageable (last 15 messages)
|
|
51
|
+
if len(self.conversation) > 15:
|
|
52
|
+
self._smart_trim_conversation()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _smart_trim_conversation(self):
|
|
56
|
+
"""Trim conversation while preserving system message and recent exchanges"""
|
|
57
|
+
if len(self.conversation) <= 15:
|
|
58
|
+
return
|
|
22
59
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
60
|
+
original_count = len(self.conversation)
|
|
61
|
+
# Always keep system message
|
|
62
|
+
system_msg = next((msg for msg in self.conversation if msg["role"] == "system"), None)
|
|
63
|
+
|
|
64
|
+
# Keep recent messages (last 14)
|
|
65
|
+
recent_messages = self.conversation[-14:]
|
|
66
|
+
|
|
67
|
+
# Rebuild: system + recent
|
|
68
|
+
new_conv = []
|
|
69
|
+
if system_msg:
|
|
70
|
+
new_conv.append(system_msg)
|
|
71
|
+
new_conv.extend(recent_messages)
|
|
72
|
+
|
|
73
|
+
self.conversation = new_conv
|
|
74
|
+
|
|
75
|
+
# Also trim summaries to match conversation scope
|
|
76
|
+
if len(self.conversation_summaries) > 7:
|
|
77
|
+
self.conversation_summaries = self.conversation_summaries[-7:]
|
|
78
|
+
logger.info(f"Trimmed conversation from {original_count} to {len(self.conversation)} messages")
|
|
31
79
|
|
|
32
80
|
def _build_context(self, context_docs: List[Dict[str, Any]]) -> str:
|
|
33
81
|
"""Build context string from documents"""
|
|
@@ -40,26 +88,198 @@ class RAGAgent:
|
|
|
40
88
|
return context
|
|
41
89
|
|
|
42
90
|
def _build_messages(self, question: str, context: str = "") -> List[Dict[str, str]]:
|
|
43
|
-
"""Build messages for LLM including context"""
|
|
91
|
+
"""Build messages for LLM including context and conversation summaries"""
|
|
44
92
|
# Start with conversation history
|
|
45
93
|
messages = self.conversation.copy()
|
|
46
94
|
|
|
47
|
-
|
|
95
|
+
logger.info(f"Building messages for query: '{question}'")
|
|
96
|
+
logger.info(f"Conversation history: {len(self.conversation)} messages")
|
|
97
|
+
logger.info(f"Active summaries: {len(self.conversation_summaries)}")
|
|
98
|
+
logger.info(f"Retrieved context: {len(context)} chars" if context else "No retrieved context")
|
|
99
|
+
|
|
100
|
+
# Add conversation summaries as context if available
|
|
101
|
+
if self.conversation_summaries:
|
|
102
|
+
logger.info(f"Using summaries: {self.conversation_summaries}")
|
|
103
|
+
summary_context = "Previous conversation context:\n" + "\n".join(
|
|
104
|
+
f"- {summary}" for summary in self.conversation_summaries[-3:] # Last 3 summaries
|
|
105
|
+
)
|
|
106
|
+
messages.append({
|
|
107
|
+
"role": "system",
|
|
108
|
+
"content": summary_context
|
|
109
|
+
})
|
|
110
|
+
|
|
111
|
+
# Add retrieved document context
|
|
48
112
|
user_message = f"{context}\n\nQuestion: {question}" if context else question
|
|
49
113
|
|
|
50
|
-
#
|
|
51
|
-
|
|
52
|
-
messages[-1]["content"] = user_message
|
|
53
|
-
else:
|
|
54
|
-
messages.append({"role": "user", "content": user_message})
|
|
114
|
+
# ✅ FIX: Always append new user message (don't replace existing ones)
|
|
115
|
+
messages.append({"role": "user", "content": user_message})
|
|
55
116
|
|
|
117
|
+
# Add stealth summarization instruction for ongoing conversations
|
|
118
|
+
if len(self.conversation) >= 1: # More than just system + current user message + 2nd Query
|
|
119
|
+
summary_instruction = self._create_summary_instruction()
|
|
120
|
+
messages.append({"role": "system", "content": summary_instruction})
|
|
121
|
+
logger.info(f" Summary instruction added to prompt: {len(summary_instruction)} chars")
|
|
122
|
+
logger.debug(f"Instruction content: {summary_instruction}")
|
|
123
|
+
|
|
124
|
+
logger.info(f" Final message count to LLM: {len(messages)}")
|
|
56
125
|
return messages
|
|
57
126
|
|
|
127
|
+
def _create_summary_instruction(self) -> str:
|
|
128
|
+
"""Create the stealth summarization instruction with examples"""
|
|
129
|
+
return """IMPORTANT: You MUST follow this response structure:
|
|
130
|
+
|
|
131
|
+
[YOUR MAIN RESPONSE TO THE USER GOES HERE]
|
|
132
|
+
|
|
133
|
+
[SUMMARY_START]
|
|
134
|
+
Key context from this exchange: [Brief summary of new information]
|
|
135
|
+
[SUMMARY_END]
|
|
136
|
+
|
|
137
|
+
EXAMPLES:
|
|
138
|
+
If user says "My name is John", your summary should be: "User's name is John"
|
|
139
|
+
If user says "I prefer formal language", your summary should be: "User prefers formal communication style"
|
|
140
|
+
If user shares a preference, summarize it: "User mentioned [preference]"
|
|
141
|
+
|
|
142
|
+
RULES:
|
|
143
|
+
- ALWAYS include the summary section
|
|
144
|
+
- Use EXACT markers: [SUMMARY_START] and [SUMMARY_END]
|
|
145
|
+
- Keep summary 1-2 sentences
|
|
146
|
+
- Focus on user preferences, names, important context
|
|
147
|
+
|
|
148
|
+
The summary will be automatically hidden from the user."""
|
|
149
|
+
|
|
150
|
+
# def _extract_summary_and_response(self, full_response: str) -> tuple[str, Optional[str]]:
|
|
151
|
+
# """Extract summary from response and return clean user response - handles partial markers"""
|
|
152
|
+
# # Keep original markers for backward compatibility
|
|
153
|
+
# summary_start = "[SUMMARY_START]"
|
|
154
|
+
# summary_end = "[SUMMARY_END]"
|
|
155
|
+
|
|
156
|
+
# # NEW: Normalize the response first (improvement from new version)
|
|
157
|
+
# normalized = full_response.replace('\n', ' ').replace('\r', ' ').strip()
|
|
158
|
+
|
|
159
|
+
# # Check if we have complete markers - KEEP original logic but use normalized
|
|
160
|
+
# if summary_start in normalized and summary_end in normalized:
|
|
161
|
+
# start_idx = normalized.find(summary_start) + len(summary_start)
|
|
162
|
+
# end_idx = normalized.find(summary_end)
|
|
163
|
+
|
|
164
|
+
# summary = normalized[start_idx:end_idx].strip()
|
|
165
|
+
# user_response = normalized[:normalized.find(summary_start)].strip()
|
|
166
|
+
|
|
167
|
+
# logger.info(f"✅ SUCCESS: Summary extracted and separated from user response")
|
|
168
|
+
# logger.info(f"User response length: {len(user_response)} chars")
|
|
169
|
+
# logger.info(f"Summary extracted: '{summary}'")
|
|
170
|
+
|
|
171
|
+
# # NEW: Add validation from improved version
|
|
172
|
+
# if not summary or len(summary) < 5:
|
|
173
|
+
# logger.info("❌ Summary too short, returning full response")
|
|
174
|
+
# return full_response.strip(), None
|
|
175
|
+
|
|
176
|
+
# return user_response, summary
|
|
177
|
+
|
|
178
|
+
def _extract_summary_and_response(self, full_response: str) -> tuple[str, Optional[str]]:
|
|
179
|
+
"""Extract summary from response and return clean user response."""
|
|
180
|
+
|
|
181
|
+
if not full_response:
|
|
182
|
+
return "", None
|
|
183
|
+
|
|
184
|
+
summary_start = "[SUMMARY_START]"
|
|
185
|
+
summary_end = "[SUMMARY_END]"
|
|
186
|
+
|
|
187
|
+
original = full_response
|
|
188
|
+
normalized = original.replace('\r\n', '\n').replace('\r', '\n')
|
|
189
|
+
|
|
190
|
+
# Case 1: Complete markers
|
|
191
|
+
if summary_start in normalized and summary_end in normalized:
|
|
192
|
+
start_idx = normalized.find(summary_start) + len(summary_start)
|
|
193
|
+
end_idx = normalized.find(summary_end)
|
|
194
|
+
summary = normalized[start_idx:end_idx].strip()
|
|
195
|
+
|
|
196
|
+
user_response = original.split(summary_start)[0].strip()
|
|
197
|
+
|
|
198
|
+
if not summary or len(summary) < 5:
|
|
199
|
+
logger.info("Summary too short or invalid")
|
|
200
|
+
return original.strip(), None
|
|
201
|
+
|
|
202
|
+
logger.info("Summary extracted successfully")
|
|
203
|
+
return user_response, summary
|
|
204
|
+
|
|
205
|
+
# Case 2: Partial marker (start only)
|
|
206
|
+
if summary_start in normalized:
|
|
207
|
+
start_idx = normalized.find(summary_start) + len(summary_start)
|
|
208
|
+
potential = normalized[start_idx:start_idx + 200].strip()
|
|
209
|
+
|
|
210
|
+
cleaned_summary = (
|
|
211
|
+
potential
|
|
212
|
+
.split('[SUMMARY_')[0]
|
|
213
|
+
.split('[SUMMARY')[0]
|
|
214
|
+
.split('[')[0]
|
|
215
|
+
.strip()
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
user_response = original.split(summary_start)[0].strip()
|
|
219
|
+
|
|
220
|
+
if cleaned_summary and len(cleaned_summary) >= 10:
|
|
221
|
+
logger.info("Partial summary extracted")
|
|
222
|
+
return user_response, cleaned_summary
|
|
223
|
+
|
|
224
|
+
logger.info("Partial summary invalid")
|
|
225
|
+
return original.strip(), None
|
|
226
|
+
|
|
227
|
+
# Case 3: No markers at all
|
|
228
|
+
logger.info("No summary markers found")
|
|
229
|
+
# No markers found - KEEP original but with normalization
|
|
230
|
+
# logger.info(" No summary markers found, returning full response")
|
|
231
|
+
logger.info(f"Full response length: {len(original)} chars")
|
|
232
|
+
return original.strip(), None
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
# return full_response.strip(), None # NEW: strip for consistency
|
|
237
|
+
|
|
238
|
+
# def _add_conversation_summary(self, new_summary: str):
|
|
239
|
+
# """Add a new discrete conversation summary"""
|
|
240
|
+
# if not new_summary or new_summary.lower() == "none":
|
|
241
|
+
# logger.info("🔄 No summary to add (empty or 'none')")
|
|
242
|
+
# return
|
|
243
|
+
|
|
244
|
+
# # Add as a new discrete summary
|
|
245
|
+
# self.conversation_summaries.append(new_summary)
|
|
246
|
+
# logger.info(f"📝 ADDED Summary #{len(self.conversation_summaries)}: '{new_summary}'")
|
|
247
|
+
|
|
248
|
+
# # Keep only recent summaries (last 7)
|
|
249
|
+
# if len(self.conversation_summaries) > 7:
|
|
250
|
+
# self.conversation_summaries = self.conversation_summaries[-7:]
|
|
251
|
+
# removed = self.conversation_summaries.pop(0)
|
|
252
|
+
# logger.info(f"🗑️ DROPPED Oldest summary: '{removed}'")
|
|
253
|
+
# logger.info(f"📊 Summary count maintained at {len(self.conversation_summaries)}")
|
|
254
|
+
# logger.info(f"Added conversation summary #{len(self.conversation_summaries)}: {new_summary}")
|
|
255
|
+
def _add_conversation_summary(self, new_summary: str):
|
|
256
|
+
"""Add a new discrete conversation summary"""
|
|
257
|
+
if not new_summary or new_summary.lower() == "none":
|
|
258
|
+
logger.info(" No summary to add (empty or 'none')")
|
|
259
|
+
return
|
|
260
|
+
|
|
261
|
+
new_summary = new_summary.strip()
|
|
262
|
+
if not new_summary:
|
|
263
|
+
logger.info(" No summary to add after strip")
|
|
264
|
+
return
|
|
265
|
+
|
|
266
|
+
# Append new summary
|
|
267
|
+
self.conversation_summaries.append(new_summary)
|
|
268
|
+
logger.info(f" ADDED Summary #{len(self.conversation_summaries)}: '{new_summary}'")
|
|
269
|
+
|
|
270
|
+
# Keep only recent summaries (last 7)
|
|
271
|
+
if len(self.conversation_summaries) > 7:
|
|
272
|
+
self.conversation_summaries = self.conversation_summaries[-7:]
|
|
273
|
+
logger.info(f" Summary count trimmed to {len(self.conversation_summaries)}")
|
|
274
|
+
|
|
275
|
+
|
|
58
276
|
def query(self, question: str, top_k: int = 5, include_context: bool = True) -> str:
|
|
59
|
-
"""Process a query
|
|
277
|
+
"""Process a query with stealth conversation summarization"""
|
|
60
278
|
try:
|
|
61
279
|
# Retrieve relevant context
|
|
280
|
+
logger.info(f" QUERY START: '{question}' (top_k: {top_k})")
|
|
62
281
|
context_docs = self.retriever.retrieve(question, top_k)
|
|
282
|
+
logger.info(f" Retrieved {len(context_docs)} context documents")
|
|
63
283
|
|
|
64
284
|
if not context_docs and include_context:
|
|
65
285
|
logger.warning(f"No context found for query: {question}")
|
|
@@ -72,49 +292,159 @@ class RAGAgent:
|
|
|
72
292
|
messages = self._build_messages(question, context)
|
|
73
293
|
|
|
74
294
|
# Generate response
|
|
75
|
-
|
|
295
|
+
full_response = self.llm.predict(messages)
|
|
296
|
+
logger.info(f" LLM response received: {len(full_response)} chars")
|
|
297
|
+
|
|
298
|
+
# Extract summary and clean response
|
|
299
|
+
user_response, conversation_summary = self._extract_summary_and_response(full_response)
|
|
300
|
+
|
|
301
|
+
# Add new summary if found
|
|
302
|
+
if conversation_summary:
|
|
303
|
+
self._add_conversation_summary(conversation_summary)
|
|
304
|
+
logger.info(" Summary processing completed successfully")
|
|
305
|
+
else:
|
|
306
|
+
logger.info("Bitch No summary generated for this exchange")
|
|
76
307
|
|
|
77
|
-
# Add assistant response to conversation
|
|
78
|
-
self.add_message("assistant",
|
|
308
|
+
# Add assistant response to conversation (clean version only)
|
|
309
|
+
self.add_message("assistant", user_response)
|
|
79
310
|
|
|
80
|
-
|
|
311
|
+
logger.info(f" Final user response: {len(user_response)} chars")
|
|
312
|
+
return user_response
|
|
81
313
|
|
|
82
314
|
except Exception as e:
|
|
83
315
|
logger.error(f"Error processing query: {str(e)}")
|
|
316
|
+
# logger.error(f" QUERY FAILED: {str(e)}")
|
|
84
317
|
return "I encountered an issue processing your query. Please try again."
|
|
85
318
|
|
|
86
319
|
def query_stream(self, question: str, top_k: int = 5) -> Generator[str, None, None]:
|
|
87
|
-
"""
|
|
320
|
+
"""
|
|
321
|
+
Professional-grade streaming with multiple fallback strategies
|
|
322
|
+
"""
|
|
88
323
|
try:
|
|
89
|
-
|
|
90
|
-
relevant_docs = self.retriever.retrieve(question, top_k=top_k)
|
|
324
|
+
logger.info(f" STREAMING QUERY START: '{question}'")
|
|
91
325
|
|
|
92
|
-
#
|
|
93
|
-
|
|
326
|
+
# Strategy 1: Try true streaming first
|
|
327
|
+
if hasattr(self.llm, 'predict_stream'):
|
|
328
|
+
try:
|
|
329
|
+
yield from self._stream_with_summary_protection(question, top_k)
|
|
330
|
+
return
|
|
331
|
+
except Exception as stream_error:
|
|
332
|
+
logger.warning(f"Streaming failed, falling back: {stream_error}")
|
|
94
333
|
|
|
95
|
-
#
|
|
334
|
+
# Strategy 2: Fallback to simulated streaming
|
|
335
|
+
logger.info(" Falling back to simulated streaming")
|
|
336
|
+
yield from self._simulated_streaming(question, top_k)
|
|
337
|
+
|
|
338
|
+
except Exception as e:
|
|
339
|
+
logger.error(f" ALL STREAMING STRATEGIES FAILED: {str(e)}")
|
|
340
|
+
yield f"Error: {str(e)}"
|
|
341
|
+
|
|
342
|
+
def _stream_with_summary_protection(self, question: str, top_k: int) -> Generator[str, None, None]:
|
|
343
|
+
"""True streaming with better error handling"""
|
|
344
|
+
try:
|
|
345
|
+
relevant_docs = self.retriever.retrieve(question, top_k=top_k)
|
|
346
|
+
context = self._build_context(relevant_docs)
|
|
96
347
|
messages = self._build_messages(question, context)
|
|
97
348
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
yield chunk
|
|
349
|
+
buffer = ""
|
|
350
|
+
summary_started = False
|
|
351
|
+
|
|
352
|
+
for chunk in self.llm.predict_stream(messages):
|
|
353
|
+
buffer += chunk
|
|
104
354
|
|
|
105
|
-
#
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
355
|
+
# Check for summary markers
|
|
356
|
+
if any(marker in chunk for marker in ['[SUMMARY', 'SUMMARY_']):
|
|
357
|
+
if not summary_started:
|
|
358
|
+
logger.info(" Summary markers detected - cutting stream")
|
|
359
|
+
summary_started = True
|
|
360
|
+
clean_part = self._extract_clean_content(buffer)
|
|
361
|
+
if clean_part:
|
|
362
|
+
yield clean_part
|
|
363
|
+
# Don't break here - let the method complete naturally
|
|
364
|
+
continue
|
|
112
365
|
|
|
366
|
+
if not summary_started:
|
|
367
|
+
yield chunk
|
|
368
|
+
|
|
369
|
+
# Process the complete response
|
|
370
|
+
self._process_complete_response(buffer)
|
|
371
|
+
|
|
113
372
|
except Exception as e:
|
|
114
|
-
logger.error(f"
|
|
115
|
-
|
|
373
|
+
logger.error(f"Streaming error: {e}")
|
|
374
|
+
raise # Re-raise to trigger fallback
|
|
375
|
+
|
|
376
|
+
# def _process_complete_response(self, full_response: str):
|
|
377
|
+
# """Process complete response and extract summary"""
|
|
378
|
+
# user_response, conversation_summary = self._extract_summary_and_response(full_response)
|
|
379
|
+
|
|
380
|
+
# if conversation_summary:
|
|
381
|
+
# logger.info(f" Summary extracted: '{conversation_summary}'")
|
|
382
|
+
# self._add_conversation_summary(conversation_summary)
|
|
383
|
+
|
|
384
|
+
# self.add_message("assistant", user_response)
|
|
385
|
+
def _process_complete_response(self, full_response: str):
|
|
386
|
+
"""Process complete response and extract summary"""
|
|
387
|
+
user_response, conversation_summary = self._extract_summary_and_response(full_response)
|
|
388
|
+
|
|
389
|
+
if conversation_summary:
|
|
390
|
+
logger.info(f" Summary extracted: '{conversation_summary}'")
|
|
391
|
+
self._add_conversation_summary(conversation_summary)
|
|
392
|
+
|
|
393
|
+
# extra guard: only add assistant message if different from last assistant message
|
|
394
|
+
if user_response:
|
|
395
|
+
last = self.conversation[-1] if self.conversation else None
|
|
396
|
+
if not (last and last.get("role") == "assistant" and last.get("content", "").strip() == user_response.strip()):
|
|
397
|
+
self.add_message("assistant", user_response)
|
|
398
|
+
else:
|
|
399
|
+
logger.info("Skipped adding duplicate assistant message in _process_complete_response.")
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def _simulated_streaming(self, question: str, top_k: int) -> Generator[str, None, None]:
|
|
403
|
+
"""Simulated streaming that guarantees no summary leakage"""
|
|
404
|
+
relevant_docs = self.retriever.retrieve(question, top_k=top_k)
|
|
405
|
+
context = self._build_context(relevant_docs)
|
|
406
|
+
messages = self._build_messages(question, context)
|
|
407
|
+
|
|
408
|
+
# Get complete response
|
|
409
|
+
complete_response = self.llm.predict(messages)
|
|
410
|
+
|
|
411
|
+
# Extract clean response
|
|
412
|
+
user_response, conversation_summary = self._extract_summary_and_response(complete_response)
|
|
413
|
+
|
|
414
|
+
if conversation_summary:
|
|
415
|
+
logger.info(f" Summary extracted: '{conversation_summary}'")
|
|
416
|
+
self._add_conversation_summary(conversation_summary)
|
|
417
|
+
|
|
418
|
+
self.add_message("assistant", user_response)
|
|
419
|
+
|
|
420
|
+
# Simulate streaming (smaller chunks for better UX)
|
|
421
|
+
chunk_size = 2 # Even smaller chunks for smoother streaming
|
|
422
|
+
for i in range(0, len(user_response), chunk_add_conversation_summary_size):
|
|
423
|
+
yield user_response[i:i+chunk_size]
|
|
424
|
+
import time
|
|
425
|
+
time.sleep(0.02) # Slightly longer delay for readability
|
|
426
|
+
|
|
427
|
+
def _extract_clean_content(self, buffer: str) -> str:
|
|
428
|
+
"""Extract clean content before any summary markers"""
|
|
429
|
+
markers = ['[SUMMARY_START]', '[SUMMARY', 'SUMMARY_']
|
|
430
|
+
for marker in markers:
|
|
431
|
+
if marker in buffer:
|
|
432
|
+
return buffer.split(marker)[0].strip()
|
|
433
|
+
return buffer.strip()
|
|
116
434
|
|
|
117
435
|
def clear_conversation(self):
|
|
118
|
-
"""Clear conversation history except system message"""
|
|
436
|
+
"""Clear conversation history except system message and summaries"""
|
|
119
437
|
system_msg = next((msg for msg in self.conversation if msg["role"] == "system"), None)
|
|
120
|
-
self.conversation = [system_msg] if system_msg else []
|
|
438
|
+
self.conversation = [system_msg] if system_msg else []
|
|
439
|
+
# I wanna Keep conversation summaries - they're the compressed memory!
|
|
440
|
+
# self.conversation_summaries = [] TO:DO(If bug noticed) # Optional: clear summaries too
|
|
441
|
+
|
|
442
|
+
def get_conversation_context(self) -> Dict[str, Any]:
|
|
443
|
+
context = {
|
|
444
|
+
"summary_count": len(self.conversation_summaries),
|
|
445
|
+
"summaries": self.conversation_summaries,
|
|
446
|
+
"message_count": len(self.conversation),
|
|
447
|
+
"recent_messages": [f"{msg['role']}: {msg['content'][:50]}..." for msg in self.conversation[-3:]]
|
|
448
|
+
}
|
|
449
|
+
logger.info(f" Context snapshot: {context}")
|
|
450
|
+
return context
|
kssrag/server.py
CHANGED
|
@@ -98,27 +98,21 @@ def create_app(rag_agent: RAGAgent, server_config: Optional[ServerConfig] = None
|
|
|
98
98
|
raise HTTPException(status_code=400, detail="Query cannot be empty")
|
|
99
99
|
|
|
100
100
|
try:
|
|
101
|
-
# Get or create session
|
|
101
|
+
# Get or create session
|
|
102
102
|
if session_id not in sessions:
|
|
103
103
|
logger.info(f"Creating new streaming session: {session_id}")
|
|
104
|
-
# Use the same LLM configuration but enable streaming
|
|
105
104
|
sessions[session_id] = RAGAgent(
|
|
106
105
|
retriever=rag_agent.retriever,
|
|
107
|
-
llm=rag_agent.llm,
|
|
106
|
+
llm=rag_agent.llm,
|
|
108
107
|
system_prompt=rag_agent.system_prompt
|
|
109
108
|
)
|
|
110
109
|
|
|
111
110
|
agent = sessions[session_id]
|
|
112
111
|
|
|
113
|
-
# Build messages using agent's conversation history
|
|
114
|
-
context_docs = agent.retriever.retrieve(query, top_k=5)
|
|
115
|
-
context = agent._build_context(context_docs)
|
|
116
|
-
messages = agent._build_messages(query, context)
|
|
117
|
-
|
|
118
112
|
async def generate():
|
|
119
113
|
full_response = ""
|
|
120
114
|
try:
|
|
121
|
-
# Use
|
|
115
|
+
# Use agent's query_stream which handles context and summarization
|
|
122
116
|
for chunk in agent.query_stream(query, top_k=5):
|
|
123
117
|
full_response += chunk
|
|
124
118
|
yield f"data: {json.dumps({'chunk': chunk, 'done': False})}\n\n"
|
|
@@ -131,7 +125,7 @@ def create_app(rag_agent: RAGAgent, server_config: Optional[ServerConfig] = None
|
|
|
131
125
|
|
|
132
126
|
return StreamingResponse(
|
|
133
127
|
generate(),
|
|
134
|
-
media_type="text/
|
|
128
|
+
media_type="text/event-stream",
|
|
135
129
|
headers={
|
|
136
130
|
"Cache-Control": "no-cache",
|
|
137
131
|
"Connection": "keep-alive",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kssrag
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: A flexible Retrieval-Augmented Generation framework by Ksschkw
|
|
5
5
|
Home-page: https://github.com/Ksschkw/kssrag
|
|
6
6
|
Author: Ksschkw
|
|
@@ -85,7 +85,7 @@ Dynamic: summary
|
|
|
85
85
|
|
|
86
86
|

|
|
87
87
|

|
|
88
|
-

|
|
89
89
|

|
|
90
90
|

|
|
91
91
|
|
|
@@ -2,9 +2,9 @@ kssrag/__init__.py,sha256=N1XfR8IRKtEJAzcOVyHnKXtgx-ztlrSFtFwiVkGbAX8,2041
|
|
|
2
2
|
kssrag/cli.py,sha256=9AbtUEV9X63bhRj4EU-eHhud8iPM7LJAGSbu_IPlMUE,9703
|
|
3
3
|
kssrag/config.py,sha256=zd978GZQ66TlLZnk9yP7uvoXoWD89BS8VHi7w_yGXrM,6529
|
|
4
4
|
kssrag/kssrag.py,sha256=vy3oCHeHFAp_dJW0JjLbTxeEwCcwtXuOL_Ejmv0qz8Y,5251
|
|
5
|
-
kssrag/server.py,sha256=
|
|
5
|
+
kssrag/server.py,sha256=P2ztL-OF_bSo5xaMB0Gpb4w1RWEEL-YAyOqC1EMZdQc,6241
|
|
6
6
|
kssrag/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
-
kssrag/core/agents.py,sha256=
|
|
7
|
+
kssrag/core/agents.py,sha256=M5NnJbypk5HW4CxI2MdGQ271Q4teFT5NUUiEZf7JqQM,21115
|
|
8
8
|
kssrag/core/chunkers.py,sha256=HmWL3y2DhhobV5zIlIdZP2KK2N7TASqeirPqmc3_inI,7324
|
|
9
9
|
kssrag/core/retrievers.py,sha256=1e9c7ukUD4pFSVasOMTXSKoz_rapXQTl-FrSHK6Osqg,3037
|
|
10
10
|
kssrag/core/vectorstores.py,sha256=H8hTpjc6hAFMhqAO2Cjq-Jp6xrJhsJKiRN9qxb_-6XM,21003
|
|
@@ -26,8 +26,8 @@ tests/test_integration.py,sha256=TY7MrTcAiu1KG4MlgIC7VVlzUTnOoqp9pieK8rhBNrg,105
|
|
|
26
26
|
tests/test_ocr.py,sha256=PoGKLNISpAwaoPvGuS7qiOf6dsVnsFRFtYkG1WFi6TU,6202
|
|
27
27
|
tests/test_streaming.py,sha256=rMQ0w8_HQFFV0PbHDqQXRBqaNfbd3WqJVNT2hKVbsqw,1442
|
|
28
28
|
tests/test_vectorstores.py,sha256=YOwI2bfqprzbq8ahIw4pbbbEOaKGcg-XPcLCO7WiLxE,1474
|
|
29
|
-
kssrag-0.2.
|
|
30
|
-
kssrag-0.2.
|
|
31
|
-
kssrag-0.2.
|
|
32
|
-
kssrag-0.2.
|
|
33
|
-
kssrag-0.2.
|
|
29
|
+
kssrag-0.2.3.dist-info/METADATA,sha256=APBpMOiuIASOLPv4kRTAeUilihNjVDPf-tO9GC8DDgo,24015
|
|
30
|
+
kssrag-0.2.3.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
31
|
+
kssrag-0.2.3.dist-info/entry_points.txt,sha256=g4tQj5YUqPK3Osb9BI85tsErxleSBUENiqlnX0fWK5M,43
|
|
32
|
+
kssrag-0.2.3.dist-info/top_level.txt,sha256=sO9LGINa0GEjLoHTtufpz01yM5SmeTw6M4zWHEF0R2s,13
|
|
33
|
+
kssrag-0.2.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|