kssrag 0.2.1__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {kssrag-0.2.1 → kssrag-0.2.2}/PKG-INFO +1 -1
  2. kssrag-0.2.2/kssrag/core/agents.py +407 -0
  3. {kssrag-0.2.1 → kssrag-0.2.2}/kssrag/server.py +3 -9
  4. {kssrag-0.2.1 → kssrag-0.2.2}/kssrag.egg-info/PKG-INFO +1 -1
  5. {kssrag-0.2.1 → kssrag-0.2.2}/setup.py +1 -1
  6. kssrag-0.2.1/kssrag/core/agents.py +0 -120
  7. {kssrag-0.2.1 → kssrag-0.2.2}/README.md +0 -0
  8. {kssrag-0.2.1 → kssrag-0.2.2}/kssrag/__init__.py +0 -0
  9. {kssrag-0.2.1 → kssrag-0.2.2}/kssrag/cli.py +0 -0
  10. {kssrag-0.2.1 → kssrag-0.2.2}/kssrag/config.py +0 -0
  11. {kssrag-0.2.1 → kssrag-0.2.2}/kssrag/core/__init__.py +0 -0
  12. {kssrag-0.2.1 → kssrag-0.2.2}/kssrag/core/chunkers.py +0 -0
  13. {kssrag-0.2.1 → kssrag-0.2.2}/kssrag/core/retrievers.py +0 -0
  14. {kssrag-0.2.1 → kssrag-0.2.2}/kssrag/core/vectorstores.py +0 -0
  15. {kssrag-0.2.1 → kssrag-0.2.2}/kssrag/kssrag.py +0 -0
  16. {kssrag-0.2.1 → kssrag-0.2.2}/kssrag/models/__init__.py +0 -0
  17. {kssrag-0.2.1 → kssrag-0.2.2}/kssrag/models/local_llms.py +0 -0
  18. {kssrag-0.2.1 → kssrag-0.2.2}/kssrag/models/openrouter.py +0 -0
  19. {kssrag-0.2.1 → kssrag-0.2.2}/kssrag/utils/__init__.py +0 -0
  20. {kssrag-0.2.1 → kssrag-0.2.2}/kssrag/utils/document_loaders.py +0 -0
  21. {kssrag-0.2.1 → kssrag-0.2.2}/kssrag/utils/helpers.py +0 -0
  22. {kssrag-0.2.1 → kssrag-0.2.2}/kssrag/utils/ocr.py +0 -0
  23. {kssrag-0.2.1 → kssrag-0.2.2}/kssrag/utils/ocr_loader.py +0 -0
  24. {kssrag-0.2.1 → kssrag-0.2.2}/kssrag/utils/preprocessors.py +0 -0
  25. {kssrag-0.2.1 → kssrag-0.2.2}/kssrag.egg-info/SOURCES.txt +0 -0
  26. {kssrag-0.2.1 → kssrag-0.2.2}/kssrag.egg-info/dependency_links.txt +0 -0
  27. {kssrag-0.2.1 → kssrag-0.2.2}/kssrag.egg-info/entry_points.txt +0 -0
  28. {kssrag-0.2.1 → kssrag-0.2.2}/kssrag.egg-info/requires.txt +0 -0
  29. {kssrag-0.2.1 → kssrag-0.2.2}/kssrag.egg-info/top_level.txt +0 -0
  30. {kssrag-0.2.1 → kssrag-0.2.2}/setup.cfg +0 -0
  31. {kssrag-0.2.1 → kssrag-0.2.2}/tests/__init__.py +0 -0
  32. {kssrag-0.2.1 → kssrag-0.2.2}/tests/test_basic.py +0 -0
  33. {kssrag-0.2.1 → kssrag-0.2.2}/tests/test_bm25s.py +0 -0
  34. {kssrag-0.2.1 → kssrag-0.2.2}/tests/test_config.py +0 -0
  35. {kssrag-0.2.1 → kssrag-0.2.2}/tests/test_image_chunker.py +0 -0
  36. {kssrag-0.2.1 → kssrag-0.2.2}/tests/test_integration.py +0 -0
  37. {kssrag-0.2.1 → kssrag-0.2.2}/tests/test_ocr.py +0 -0
  38. {kssrag-0.2.1 → kssrag-0.2.2}/tests/test_streaming.py +0 -0
  39. {kssrag-0.2.1 → kssrag-0.2.2}/tests/test_vectorstores.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kssrag
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: A flexible Retrieval-Augmented Generation framework by Ksschkw
5
5
  Home-page: https://github.com/Ksschkw/kssrag
6
6
  Author: Ksschkw
@@ -0,0 +1,407 @@
1
+ from typing import Generator, List, Dict, Any, Optional
2
+ from ..utils.helpers import logger
3
+
4
+ class RAGAgent:
5
+ """RAG agent implementation with discrete conversation summaries"""
6
+
7
+ def __init__(self, retriever, llm, system_prompt: Optional[str] = None,
8
+ conversation_history: Optional[List[Dict[str, str]]] = None):
9
+ self.retriever = retriever
10
+ self.llm = llm
11
+ self.conversation = conversation_history or []
12
+ self.system_prompt = system_prompt or """You are a helpful AI assistant. Use the following context to answer the user's question.
13
+ If you don't know the answer based on the context, say so."""
14
+ self.conversation_summaries = [] # Discrete summaries instead of single blob
15
+
16
+ logger.info(f"RAGAgent initialized with {len(conversation_history or [])} history messages")
17
+ logger.info(f"System prompt: {self.system_prompt[:100]}..." if self.system_prompt else "No system prompt")
18
+
19
+
20
+ # Initialize with system message if not already present
21
+ if not any(msg.get("role") == "system" for msg in self.conversation):
22
+ self.add_message("system", self.system_prompt)
23
+
24
+ def add_message(self, role: str, content: str):
25
+ """Add a message to the conversation history"""
26
+ self.conversation.append({"role": role, "content": content})
27
+
28
+ # Keep conversation manageable (last 15 messages)
29
+ if len(self.conversation) > 15:
30
+ self._smart_trim_conversation()
31
+
32
+ def _smart_trim_conversation(self):
33
+ """Trim conversation while preserving system message and recent exchanges"""
34
+ if len(self.conversation) <= 15:
35
+ return
36
+
37
+ original_count = len(self.conversation)
38
+ # Always keep system message
39
+ system_msg = next((msg for msg in self.conversation if msg["role"] == "system"), None)
40
+
41
+ # Keep recent messages (last 14)
42
+ recent_messages = self.conversation[-14:]
43
+
44
+ # Rebuild: system + recent
45
+ new_conv = []
46
+ if system_msg:
47
+ new_conv.append(system_msg)
48
+ new_conv.extend(recent_messages)
49
+
50
+ self.conversation = new_conv
51
+
52
+ # Also trim summaries to match conversation scope
53
+ if len(self.conversation_summaries) > 7:
54
+ self.conversation_summaries = self.conversation_summaries[-7:]
55
+ logger.info(f"Trimmed conversation from {original_count} to {len(self.conversation)} messages")
56
+
57
+ def _build_context(self, context_docs: List[Dict[str, Any]]) -> str:
58
+ """Build context string from documents"""
59
+ if not context_docs:
60
+ return ""
61
+
62
+ context = "Relevant information:\n"
63
+ for i, doc in enumerate(context_docs, 1):
64
+ context += f"\n--- Document {i} ---\n{doc['content']}\n"
65
+ return context
66
+
67
+ def _build_messages(self, question: str, context: str = "") -> List[Dict[str, str]]:
68
+ """Build messages for LLM including context and conversation summaries"""
69
+ # Start with conversation history
70
+ messages = self.conversation.copy()
71
+
72
+ logger.info(f"Building messages for query: '{question}'")
73
+ logger.info(f"Conversation history: {len(self.conversation)} messages")
74
+ logger.info(f"Active summaries: {len(self.conversation_summaries)}")
75
+ logger.info(f"Retrieved context: {len(context)} chars" if context else "No retrieved context")
76
+
77
+ # Add conversation summaries as context if available
78
+ if self.conversation_summaries:
79
+ logger.info(f"Using summaries: {self.conversation_summaries}")
80
+ summary_context = "Previous conversation context:\n" + "\n".join(
81
+ f"- {summary}" for summary in self.conversation_summaries[-3:] # Last 3 summaries
82
+ )
83
+ messages.append({
84
+ "role": "system",
85
+ "content": summary_context
86
+ })
87
+
88
+ # Add retrieved document context
89
+ user_message = f"{context}\n\nQuestion: {question}" if context else question
90
+
91
+ # ✅ FIX: Always append new user message (don't replace existing ones)
92
+ messages.append({"role": "user", "content": user_message})
93
+
94
+ # Add stealth summarization instruction for ongoing conversations
95
+ if len(self.conversation) >= 1: # More than just system + current user message + 2nd Query
96
+ summary_instruction = self._create_summary_instruction()
97
+ messages.append({"role": "system", "content": summary_instruction})
98
+ logger.info(f" Summary instruction added to prompt: {len(summary_instruction)} chars")
99
+ logger.debug(f"Instruction content: {summary_instruction}")
100
+
101
+ logger.info(f" Final message count to LLM: {len(messages)}")
102
+ return messages
103
+
104
+ def _create_summary_instruction(self) -> str:
105
+ """Create the stealth summarization instruction with examples"""
106
+ return """IMPORTANT: You MUST follow this response structure:
107
+
108
+ [YOUR MAIN RESPONSE TO THE USER GOES HERE]
109
+
110
+ [SUMMARY_START]
111
+ Key context from this exchange: [Brief summary of new information]
112
+ [SUMMARY_END]
113
+
114
+ EXAMPLES:
115
+ If user says "My name is John", your summary should be: "User's name is John"
116
+ If user says "I prefer formal language", your summary should be: "User prefers formal communication style"
117
+ If user shares a preference, summarize it: "User mentioned [preference]"
118
+
119
+ RULES:
120
+ - ALWAYS include the summary section
121
+ - Use EXACT markers: [SUMMARY_START] and [SUMMARY_END]
122
+ - Keep summary 1-2 sentences
123
+ - Focus on user preferences, names, important context
124
+
125
+ The summary will be automatically hidden from the user."""
126
+
127
+ # def _extract_summary_and_response(self, full_response: str) -> tuple[str, Optional[str]]:
128
+ # """Extract summary from response and return clean user response - handles partial markers"""
129
+ # summary_start = "[SUMMARY_START]"
130
+ # summary_end = "[SUMMARY_END]"
131
+
132
+ # # Check if we have complete markers
133
+ # if summary_start in full_response and summary_end in full_response:
134
+ # start_idx = full_response.find(summary_start) + len(summary_start)
135
+ # end_idx = full_response.find(summary_end)
136
+
137
+ # summary = full_response[start_idx:end_idx].strip()
138
+ # user_response = full_response[:full_response.find(summary_start)].strip()
139
+
140
+ # logger.info(f"✅ SUCCESS: Summary extracted and separated from user response")
141
+ # logger.info(f"User response length: {len(user_response)} chars")
142
+ # logger.info(f"Summary extracted: '{summary}'")
143
+ # return user_response, summary
144
+
145
+ # # Check if we have partial markers (common in streaming)
146
+ # elif summary_start in full_response:
147
+ # # We have start marker but no end marker - extract what we can
148
+ # start_idx = full_response.find(summary_start) + len(summary_start)
149
+ # potential_summary = full_response[start_idx:].strip()
150
+
151
+ # # Clean up any partial end markers or weird formatting
152
+ # if potential_summary:
153
+ # # Remove any trailing partial markers or whitespace
154
+ # cleaned_summary = potential_summary.split('[SUMMARY_')[0].split('[SUMMARY')[0].strip()
155
+ # user_response = full_response[:full_response.find(summary_start)].strip()
156
+
157
+ # if cleaned_summary and len(cleaned_summary) > 10: # Only if meaningful content
158
+ # logger.info(f"⚠️ Partial summary extracted (missing end marker): '{cleaned_summary}'")
159
+ # return user_response, cleaned_summary
160
+
161
+ # logger.info("❌ Incomplete summary markers found")
162
+ # return full_response, None
163
+
164
+ # logger.info("❌ No summary markers found, returning full response")
165
+ # logger.info(f"Full response length: {len(full_response)} chars")
166
+ # return full_response, None
167
+
168
+ def _extract_summary_and_response(self, full_response: str) -> tuple[str, Optional[str]]:
169
+ """Extract summary from response and return clean user response - handles partial markers"""
170
+ # Keep original markers for backward compatibility
171
+ summary_start = "[SUMMARY_START]"
172
+ summary_end = "[SUMMARY_END]"
173
+
174
+ # NEW: Normalize the response first (improvement from new version)
175
+ normalized = full_response.replace('\n', ' ').replace('\r', ' ').strip()
176
+
177
+ # Check if we have complete markers - KEEP original logic but use normalized
178
+ if summary_start in normalized and summary_end in normalized:
179
+ start_idx = normalized.find(summary_start) + len(summary_start)
180
+ end_idx = normalized.find(summary_end)
181
+
182
+ summary = normalized[start_idx:end_idx].strip()
183
+ user_response = normalized[:normalized.find(summary_start)].strip()
184
+
185
+ logger.info(f"✅ SUCCESS: Summary extracted and separated from user response")
186
+ logger.info(f"User response length: {len(user_response)} chars")
187
+ logger.info(f"Summary extracted: '{summary}'")
188
+
189
+ # NEW: Add validation from improved version
190
+ if not summary or len(summary) < 5:
191
+ logger.info("❌ Summary too short, returning full response")
192
+ return full_response.strip(), None
193
+
194
+ return user_response, summary
195
+
196
+ # Check if we have partial markers (common in streaming) - IMPROVED logic
197
+ elif summary_start in normalized:
198
+ # We have start marker but no end marker - extract what we can
199
+ start_idx = normalized.find(summary_start) + len(summary_start)
200
+
201
+ # NEW: Take reasonable chunk (200 chars) instead of everything
202
+ potential_summary = normalized[start_idx:start_idx+200].strip()
203
+
204
+ # COMBINED: Clean up from both versions
205
+ if potential_summary:
206
+ # Clean up any partial markers or weird formatting
207
+ cleaned_summary = (potential_summary
208
+ .split('[SUMMARY_')[0]
209
+ .split('[SUMMARY')[0]
210
+ .split('[')[0] # NEW from improved version
211
+ .split('\n')[0] # NEW from improved version
212
+ .strip())
213
+
214
+ user_response = normalized[:normalized.find(summary_start)].strip()
215
+
216
+ # COMBINED validation: meaningful content check
217
+ if cleaned_summary and len(cleaned_summary) >= 10: # Original threshold
218
+ logger.info(f"⚠️ Partial summary extracted (missing end marker): '{cleaned_summary}'")
219
+ # NEW: Additional validation
220
+ if len(cleaned_summary) >= 5: # Improved version threshold
221
+ return user_response, cleaned_summary
222
+
223
+ logger.info("❌ Incomplete summary markers found")
224
+ return full_response.strip(), None # NEW: strip for consistency
225
+
226
+ # No markers found - KEEP original but with normalization
227
+ logger.info("❌ No summary markers found, returning full response")
228
+ logger.info(f"Full response length: {len(full_response)} chars")
229
+ return full_response.strip(), None # NEW: strip for consistency
230
+
231
+ def _add_conversation_summary(self, new_summary: str):
232
+ """Add a new discrete conversation summary"""
233
+ if not new_summary or new_summary.lower() == "none":
234
+ logger.info("🔄 No summary to add (empty or 'none')")
235
+ return
236
+
237
+ # Add as a new discrete summary
238
+ self.conversation_summaries.append(new_summary)
239
+ logger.info(f"📝 ADDED Summary #{len(self.conversation_summaries)}: '{new_summary}'")
240
+
241
+ # Keep only recent summaries (last 7)
242
+ if len(self.conversation_summaries) > 7:
243
+ self.conversation_summaries = self.conversation_summaries[-7:]
244
+ removed = self.conversation_summaries.pop(0)
245
+ logger.info(f"🗑️ DROPPED Oldest summary: '{removed}'")
246
+ logger.info(f"📊 Summary count maintained at {len(self.conversation_summaries)}")
247
+ logger.info(f"Added conversation summary #{len(self.conversation_summaries)}: {new_summary}")
248
+
249
+ def query(self, question: str, top_k: int = 5, include_context: bool = True) -> str:
250
+ """Process a query with stealth conversation summarization"""
251
+ try:
252
+ # Retrieve relevant context
253
+ logger.info(f"🔍 QUERY START: '{question}' (top_k: {top_k})")
254
+ context_docs = self.retriever.retrieve(question, top_k)
255
+ logger.info(f"📄 Retrieved {len(context_docs)} context documents")
256
+
257
+ if not context_docs and include_context:
258
+ logger.warning(f"No context found for query: {question}")
259
+ return "I couldn't find relevant information to answer your question."
260
+
261
+ # Format context
262
+ context = self._build_context(context_docs) if include_context and context_docs else ""
263
+
264
+ # Build messages
265
+ messages = self._build_messages(question, context)
266
+
267
+ # Generate response
268
+ full_response = self.llm.predict(messages)
269
+ logger.info(f"🤖 LLM response received: {len(full_response)} chars")
270
+
271
+ # Extract summary and clean response
272
+ user_response, conversation_summary = self._extract_summary_and_response(full_response)
273
+
274
+ # Add new summary if found
275
+ if conversation_summary:
276
+ self._add_conversation_summary(conversation_summary)
277
+ logger.info(" Summary processing completed successfully")
278
+ else:
279
+ logger.info("Bitch No summary generated for this exchange")
280
+
281
+ # Add assistant response to conversation (clean version only)
282
+ self.add_message("assistant", user_response)
283
+
284
+ logger.info(f"💬 Final user response: {len(user_response)} chars")
285
+ return user_response
286
+
287
+ except Exception as e:
288
+ logger.error(f"Error processing query: {str(e)}")
289
+ # logger.error(f"💥 QUERY FAILED: {str(e)}")
290
+ return "I encountered an issue processing your query. Please try again."
291
+
292
+ def query_stream(self, question: str, top_k: int = 5) -> Generator[str, None, None]:
293
+ """
294
+ Professional-grade streaming with multiple fallback strategies
295
+ """
296
+ try:
297
+ logger.info(f"🌊 STREAMING QUERY START: '{question}'")
298
+
299
+ # Strategy 1: Try true streaming first
300
+ if hasattr(self.llm, 'predict_stream'):
301
+ try:
302
+ yield from self._stream_with_summary_protection(question, top_k)
303
+ return
304
+ except Exception as stream_error:
305
+ logger.warning(f"Streaming failed, falling back: {stream_error}")
306
+
307
+ # Strategy 2: Fallback to simulated streaming
308
+ logger.info("🔄 Falling back to simulated streaming")
309
+ yield from self._simulated_streaming(question, top_k)
310
+
311
+ except Exception as e:
312
+ logger.error(f"💥 ALL STREAMING STRATEGIES FAILED: {str(e)}")
313
+ yield f"Error: {str(e)}"
314
+
315
+ def _stream_with_summary_protection(self, question: str, top_k: int) -> Generator[str, None, None]:
316
+ """True streaming with better error handling"""
317
+ try:
318
+ relevant_docs = self.retriever.retrieve(question, top_k=top_k)
319
+ context = self._build_context(relevant_docs)
320
+ messages = self._build_messages(question, context)
321
+
322
+ buffer = ""
323
+ summary_started = False
324
+
325
+ for chunk in self.llm.predict_stream(messages):
326
+ buffer += chunk
327
+
328
+ # Check for summary markers
329
+ if any(marker in chunk for marker in ['[SUMMARY', 'SUMMARY_']):
330
+ if not summary_started:
331
+ logger.info("🚨 Summary markers detected - cutting stream")
332
+ summary_started = True
333
+ clean_part = self._extract_clean_content(buffer)
334
+ if clean_part:
335
+ yield clean_part
336
+ # Don't break here - let the method complete naturally
337
+ continue
338
+
339
+ if not summary_started:
340
+ yield chunk
341
+
342
+ # Process the complete response
343
+ self._process_complete_response(buffer)
344
+
345
+ except Exception as e:
346
+ logger.error(f"Streaming error: {e}")
347
+ raise # Re-raise to trigger fallback
348
+
349
+ def _process_complete_response(self, full_response: str):
350
+ """Process complete response and extract summary"""
351
+ user_response, conversation_summary = self._extract_summary_and_response(full_response)
352
+
353
+ if conversation_summary:
354
+ logger.info(f"📝 Summary extracted: '{conversation_summary}'")
355
+ self._add_conversation_summary(conversation_summary)
356
+
357
+ self.add_message("assistant", user_response)
358
+
359
+ def _simulated_streaming(self, question: str, top_k: int) -> Generator[str, None, None]:
360
+ """Simulated streaming that guarantees no summary leakage"""
361
+ relevant_docs = self.retriever.retrieve(question, top_k=top_k)
362
+ context = self._build_context(relevant_docs)
363
+ messages = self._build_messages(question, context)
364
+
365
+ # Get complete response
366
+ complete_response = self.llm.predict(messages)
367
+
368
+ # Extract clean response
369
+ user_response, conversation_summary = self._extract_summary_and_response(complete_response)
370
+
371
+ if conversation_summary:
372
+ logger.info(f"📝 Summary extracted: '{conversation_summary}'")
373
+ self._add_conversation_summary(conversation_summary)
374
+
375
+ self.add_message("assistant", user_response)
376
+
377
+ # Simulate streaming (smaller chunks for better UX)
378
+ chunk_size = 2 # Even smaller chunks for smoother streaming
379
+ for i in range(0, len(user_response), chunk_size):
380
+ yield user_response[i:i+chunk_size]
381
+ import time
382
+ time.sleep(0.02) # Slightly longer delay for readability
383
+
384
+ def _extract_clean_content(self, buffer: str) -> str:
385
+ """Extract clean content before any summary markers"""
386
+ markers = ['[SUMMARY_START]', '[SUMMARY', 'SUMMARY_']
387
+ for marker in markers:
388
+ if marker in buffer:
389
+ return buffer.split(marker)[0].strip()
390
+ return buffer.strip()
391
+
392
+ def clear_conversation(self):
393
+ """Clear conversation history except system message and summaries"""
394
+ system_msg = next((msg for msg in self.conversation if msg["role"] == "system"), None)
395
+ self.conversation = [system_msg] if system_msg else []
396
+ # I wanna Keep conversation summaries - they're the compressed memory!
397
+ # self.conversation_summaries = [] TO:DO(If bug noticed) # Optional: clear summaries too
398
+
399
+ def get_conversation_context(self) -> Dict[str, Any]:
400
+ context = {
401
+ "summary_count": len(self.conversation_summaries),
402
+ "summaries": self.conversation_summaries,
403
+ "message_count": len(self.conversation),
404
+ "recent_messages": [f"{msg['role']}: {msg['content'][:50]}..." for msg in self.conversation[-3:]]
405
+ }
406
+ logger.info(f"📊 Context snapshot: {context}")
407
+ return context
@@ -98,27 +98,21 @@ def create_app(rag_agent: RAGAgent, server_config: Optional[ServerConfig] = None
98
98
  raise HTTPException(status_code=400, detail="Query cannot be empty")
99
99
 
100
100
  try:
101
- # Get or create session - USE THE SAME LLM INSTANCE
101
+ # Get or create session
102
102
  if session_id not in sessions:
103
103
  logger.info(f"Creating new streaming session: {session_id}")
104
- # Use the same LLM configuration but enable streaming
105
104
  sessions[session_id] = RAGAgent(
106
105
  retriever=rag_agent.retriever,
107
- llm=rag_agent.llm, # Use the same LLM instance
106
+ llm=rag_agent.llm,
108
107
  system_prompt=rag_agent.system_prompt
109
108
  )
110
109
 
111
110
  agent = sessions[session_id]
112
111
 
113
- # Build messages using agent's conversation history
114
- context_docs = agent.retriever.retrieve(query, top_k=5)
115
- context = agent._build_context(context_docs)
116
- messages = agent._build_messages(query, context)
117
-
118
112
  async def generate():
119
113
  full_response = ""
120
114
  try:
121
- # Use the agent's query_stream method instead of calling LLM directly
115
+ # Use agent's query_stream which handles context and summarization
122
116
  for chunk in agent.query_stream(query, top_k=5):
123
117
  full_response += chunk
124
118
  yield f"data: {json.dumps({'chunk': chunk, 'done': False})}\n\n"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kssrag
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: A flexible Retrieval-Augmented Generation framework by Ksschkw
5
5
  Home-page: https://github.com/Ksschkw/kssrag
6
6
  Author: Ksschkw
@@ -6,7 +6,7 @@ long_description = (here / "README.md").read_text(encoding="utf-8")
6
6
 
7
7
  setup(
8
8
  name="kssrag",
9
- version="0.2.1",
9
+ version="0.2.2",
10
10
  description="A flexible Retrieval-Augmented Generation framework by Ksschkw",
11
11
  long_description=long_description,
12
12
  long_description_content_type="text/markdown",
@@ -1,120 +0,0 @@
1
- from typing import Generator, List, Dict, Any, Optional
2
- from ..utils.helpers import logger
3
-
4
- class RAGAgent:
5
- """RAG agent implementation"""
6
-
7
- def __init__(self, retriever, llm, system_prompt: Optional[str] = None,
8
- conversation_history: Optional[List[Dict[str, str]]] = None):
9
- self.retriever = retriever
10
- self.llm = llm
11
- self.conversation = conversation_history or []
12
- self.system_prompt = system_prompt or """You are a helpful AI assistant. Use the following context to answer the user's question.
13
- If you don't know the answer based on the context, say so."""
14
-
15
- # Initialize with system message if not already present
16
- if not any(msg.get("role") == "system" for msg in self.conversation):
17
- self.add_message("system", self.system_prompt)
18
-
19
- def add_message(self, role: str, content: str):
20
- """Add a message to the conversation history"""
21
- self.conversation.append({"role": role, "content": content})
22
-
23
- # Keep conversation manageable (last 10 messages)
24
- if len(self.conversation) > 10:
25
- # Always keep the system message
26
- system_msg = next((msg for msg in self.conversation if msg["role"] == "system"), None)
27
- other_msgs = [msg for msg in self.conversation if msg["role"] != "system"]
28
-
29
- # Keep the most recent messages
30
- self.conversation = [system_msg] + other_msgs[-9:] if system_msg else other_msgs[-10:]
31
-
32
- def _build_context(self, context_docs: List[Dict[str, Any]]) -> str:
33
- """Build context string from documents"""
34
- if not context_docs:
35
- return ""
36
-
37
- context = "Relevant information:\n"
38
- for i, doc in enumerate(context_docs, 1):
39
- context += f"\n--- Document {i} ---\n{doc['content']}\n"
40
- return context
41
-
42
- def _build_messages(self, question: str, context: str = "") -> List[Dict[str, str]]:
43
- """Build messages for LLM including context"""
44
- # Start with conversation history
45
- messages = self.conversation.copy()
46
-
47
- # Add user query with context
48
- user_message = f"{context}\n\nQuestion: {question}" if context else question
49
-
50
- # Replace the last user message if it exists, otherwise add new one
51
- if messages and messages[-1]["role"] == "user":
52
- messages[-1]["content"] = user_message
53
- else:
54
- messages.append({"role": "user", "content": user_message})
55
-
56
- return messages
57
-
58
- def query(self, question: str, top_k: int = 5, include_context: bool = True) -> str:
59
- """Process a query and return a response"""
60
- try:
61
- # Retrieve relevant context
62
- context_docs = self.retriever.retrieve(question, top_k)
63
-
64
- if not context_docs and include_context:
65
- logger.warning(f"No context found for query: {question}")
66
- return "I couldn't find relevant information to answer your question."
67
-
68
- # Format context
69
- context = self._build_context(context_docs) if include_context and context_docs else ""
70
-
71
- # Build messages
72
- messages = self._build_messages(question, context)
73
-
74
- # Generate response
75
- response = self.llm.predict(messages)
76
-
77
- # Add assistant response to conversation
78
- self.add_message("assistant", response)
79
-
80
- return response
81
-
82
- except Exception as e:
83
- logger.error(f"Error processing query: {str(e)}")
84
- return "I encountered an issue processing your query. Please try again."
85
-
86
- def query_stream(self, question: str, top_k: int = 5) -> Generator[str, None, None]:
87
- """Query the RAG system with streaming response"""
88
- try:
89
- # Retrieve relevant documents
90
- relevant_docs = self.retriever.retrieve(question, top_k=top_k)
91
-
92
- # Build context from documents
93
- context = self._build_context(relevant_docs)
94
-
95
- # Build messages
96
- messages = self._build_messages(question, context)
97
-
98
- # Stream response from LLM
99
- if hasattr(self.llm, 'predict_stream'):
100
- full_response = ""
101
- for chunk in self.llm.predict_stream(messages):
102
- full_response += chunk
103
- yield chunk
104
-
105
- # Add the complete response to conversation history
106
- self.add_message("assistant", full_response)
107
- else:
108
- # Fallback to non-streaming
109
- response = self.llm.predict(messages)
110
- self.add_message("assistant", response)
111
- yield response
112
-
113
- except Exception as e:
114
- logger.error(f"Error in streaming query: {str(e)}")
115
- yield f"Error: {str(e)}"
116
-
117
- def clear_conversation(self):
118
- """Clear conversation history except system message"""
119
- system_msg = next((msg for msg in self.conversation if msg["role"] == "system"), None)
120
- self.conversation = [system_msg] if system_msg else []
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes