kssrag 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {kssrag-0.2.0 → kssrag-0.2.2}/PKG-INFO +2 -2
  2. {kssrag-0.2.0 → kssrag-0.2.2}/README.md +2 -2
  3. kssrag-0.2.2/kssrag/core/agents.py +407 -0
  4. {kssrag-0.2.0 → kssrag-0.2.2}/kssrag/models/openrouter.py +1 -1
  5. {kssrag-0.2.0 → kssrag-0.2.2}/kssrag/server.py +13 -6
  6. {kssrag-0.2.0 → kssrag-0.2.2}/kssrag.egg-info/PKG-INFO +2 -2
  7. {kssrag-0.2.0 → kssrag-0.2.2}/setup.py +1 -1
  8. kssrag-0.2.0/kssrag/core/agents.py +0 -119
  9. {kssrag-0.2.0 → kssrag-0.2.2}/kssrag/__init__.py +0 -0
  10. {kssrag-0.2.0 → kssrag-0.2.2}/kssrag/cli.py +0 -0
  11. {kssrag-0.2.0 → kssrag-0.2.2}/kssrag/config.py +0 -0
  12. {kssrag-0.2.0 → kssrag-0.2.2}/kssrag/core/__init__.py +0 -0
  13. {kssrag-0.2.0 → kssrag-0.2.2}/kssrag/core/chunkers.py +0 -0
  14. {kssrag-0.2.0 → kssrag-0.2.2}/kssrag/core/retrievers.py +0 -0
  15. {kssrag-0.2.0 → kssrag-0.2.2}/kssrag/core/vectorstores.py +0 -0
  16. {kssrag-0.2.0 → kssrag-0.2.2}/kssrag/kssrag.py +0 -0
  17. {kssrag-0.2.0 → kssrag-0.2.2}/kssrag/models/__init__.py +0 -0
  18. {kssrag-0.2.0 → kssrag-0.2.2}/kssrag/models/local_llms.py +0 -0
  19. {kssrag-0.2.0 → kssrag-0.2.2}/kssrag/utils/__init__.py +0 -0
  20. {kssrag-0.2.0 → kssrag-0.2.2}/kssrag/utils/document_loaders.py +0 -0
  21. {kssrag-0.2.0 → kssrag-0.2.2}/kssrag/utils/helpers.py +0 -0
  22. {kssrag-0.2.0 → kssrag-0.2.2}/kssrag/utils/ocr.py +0 -0
  23. {kssrag-0.2.0 → kssrag-0.2.2}/kssrag/utils/ocr_loader.py +0 -0
  24. {kssrag-0.2.0 → kssrag-0.2.2}/kssrag/utils/preprocessors.py +0 -0
  25. {kssrag-0.2.0 → kssrag-0.2.2}/kssrag.egg-info/SOURCES.txt +0 -0
  26. {kssrag-0.2.0 → kssrag-0.2.2}/kssrag.egg-info/dependency_links.txt +0 -0
  27. {kssrag-0.2.0 → kssrag-0.2.2}/kssrag.egg-info/entry_points.txt +0 -0
  28. {kssrag-0.2.0 → kssrag-0.2.2}/kssrag.egg-info/requires.txt +0 -0
  29. {kssrag-0.2.0 → kssrag-0.2.2}/kssrag.egg-info/top_level.txt +0 -0
  30. {kssrag-0.2.0 → kssrag-0.2.2}/setup.cfg +0 -0
  31. {kssrag-0.2.0 → kssrag-0.2.2}/tests/__init__.py +0 -0
  32. {kssrag-0.2.0 → kssrag-0.2.2}/tests/test_basic.py +0 -0
  33. {kssrag-0.2.0 → kssrag-0.2.2}/tests/test_bm25s.py +0 -0
  34. {kssrag-0.2.0 → kssrag-0.2.2}/tests/test_config.py +0 -0
  35. {kssrag-0.2.0 → kssrag-0.2.2}/tests/test_image_chunker.py +0 -0
  36. {kssrag-0.2.0 → kssrag-0.2.2}/tests/test_integration.py +0 -0
  37. {kssrag-0.2.0 → kssrag-0.2.2}/tests/test_ocr.py +0 -0
  38. {kssrag-0.2.0 → kssrag-0.2.2}/tests/test_streaming.py +0 -0
  39. {kssrag-0.2.0 → kssrag-0.2.2}/tests/test_vectorstores.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kssrag
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: A flexible Retrieval-Augmented Generation framework by Ksschkw
5
5
  Home-page: https://github.com/Ksschkw/kssrag
6
6
  Author: Ksschkw
@@ -85,7 +85,7 @@ Dynamic: summary
85
85
 
86
86
  ![Python Version](https://img.shields.io/badge/python-3.8%2B-blue)
87
87
  ![License](https://img.shields.io/badge/license-MIT-green)
88
- ![Version](https://img.shields.io/badge/version-0.1.30-brightgreen)
88
+ ![Version](https://img.shields.io/badge/version-0.2.0-brightgreen)
89
89
  ![Framework](https://img.shields.io/badge/framework-RAG-orange)
90
90
  ![Documentation](https://img.shields.io/badge/docs-comprehensive-brightgreen)
91
91
 
@@ -4,7 +4,7 @@
4
4
 
5
5
  ![Python Version](https://img.shields.io/badge/python-3.8%2B-blue)
6
6
  ![License](https://img.shields.io/badge/license-MIT-green)
7
- ![Version](https://img.shields.io/badge/version-0.1.30-brightgreen)
7
+ ![Version](https://img.shields.io/badge/version-0.2.0-brightgreen)
8
8
  ![Framework](https://img.shields.io/badge/framework-RAG-orange)
9
9
  ![Documentation](https://img.shields.io/badge/docs-comprehensive-brightgreen)
10
10
 
@@ -756,4 +756,4 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
756
756
 
757
757
  [Get Started](#quick-start) • [Explore Features](#features) • [View Examples](#examples)
758
758
 
759
- </div>
759
+ </div>
@@ -0,0 +1,407 @@
1
+ from typing import Generator, List, Dict, Any, Optional
2
+ from ..utils.helpers import logger
3
+
4
+ class RAGAgent:
5
+ """RAG agent implementation with discrete conversation summaries"""
6
+
7
+ def __init__(self, retriever, llm, system_prompt: Optional[str] = None,
8
+ conversation_history: Optional[List[Dict[str, str]]] = None):
9
+ self.retriever = retriever
10
+ self.llm = llm
11
+ self.conversation = conversation_history or []
12
+ self.system_prompt = system_prompt or """You are a helpful AI assistant. Use the following context to answer the user's question.
13
+ If you don't know the answer based on the context, say so."""
14
+ self.conversation_summaries = [] # Discrete summaries instead of single blob
15
+
16
+ logger.info(f"RAGAgent initialized with {len(conversation_history or [])} history messages")
17
+ logger.info(f"System prompt: {self.system_prompt[:100]}..." if self.system_prompt else "No system prompt")
18
+
19
+
20
+ # Initialize with system message if not already present
21
+ if not any(msg.get("role") == "system" for msg in self.conversation):
22
+ self.add_message("system", self.system_prompt)
23
+
24
+ def add_message(self, role: str, content: str):
25
+ """Add a message to the conversation history"""
26
+ self.conversation.append({"role": role, "content": content})
27
+
28
+ # Keep conversation manageable (last 15 messages)
29
+ if len(self.conversation) > 15:
30
+ self._smart_trim_conversation()
31
+
32
+ def _smart_trim_conversation(self):
33
+ """Trim conversation while preserving system message and recent exchanges"""
34
+ if len(self.conversation) <= 15:
35
+ return
36
+
37
+ original_count = len(self.conversation)
38
+ # Always keep system message
39
+ system_msg = next((msg for msg in self.conversation if msg["role"] == "system"), None)
40
+
41
+ # Keep recent messages (last 14)
42
+ recent_messages = self.conversation[-14:]
43
+
44
+ # Rebuild: system + recent
45
+ new_conv = []
46
+ if system_msg:
47
+ new_conv.append(system_msg)
48
+ new_conv.extend(recent_messages)
49
+
50
+ self.conversation = new_conv
51
+
52
+ # Also trim summaries to match conversation scope
53
+ if len(self.conversation_summaries) > 7:
54
+ self.conversation_summaries = self.conversation_summaries[-7:]
55
+ logger.info(f"Trimmed conversation from {original_count} to {len(self.conversation)} messages")
56
+
57
+ def _build_context(self, context_docs: List[Dict[str, Any]]) -> str:
58
+ """Build context string from documents"""
59
+ if not context_docs:
60
+ return ""
61
+
62
+ context = "Relevant information:\n"
63
+ for i, doc in enumerate(context_docs, 1):
64
+ context += f"\n--- Document {i} ---\n{doc['content']}\n"
65
+ return context
66
+
67
+ def _build_messages(self, question: str, context: str = "") -> List[Dict[str, str]]:
68
+ """Build messages for LLM including context and conversation summaries"""
69
+ # Start with conversation history
70
+ messages = self.conversation.copy()
71
+
72
+ logger.info(f"Building messages for query: '{question}'")
73
+ logger.info(f"Conversation history: {len(self.conversation)} messages")
74
+ logger.info(f"Active summaries: {len(self.conversation_summaries)}")
75
+ logger.info(f"Retrieved context: {len(context)} chars" if context else "No retrieved context")
76
+
77
+ # Add conversation summaries as context if available
78
+ if self.conversation_summaries:
79
+ logger.info(f"Using summaries: {self.conversation_summaries}")
80
+ summary_context = "Previous conversation context:\n" + "\n".join(
81
+ f"- {summary}" for summary in self.conversation_summaries[-3:] # Last 3 summaries
82
+ )
83
+ messages.append({
84
+ "role": "system",
85
+ "content": summary_context
86
+ })
87
+
88
+ # Add retrieved document context
89
+ user_message = f"{context}\n\nQuestion: {question}" if context else question
90
+
91
+ # ✅ FIX: Always append new user message (don't replace existing ones)
92
+ messages.append({"role": "user", "content": user_message})
93
+
94
+ # Add stealth summarization instruction for ongoing conversations
95
+ if len(self.conversation) >= 1: # More than just system + current user message + 2nd Query
96
+ summary_instruction = self._create_summary_instruction()
97
+ messages.append({"role": "system", "content": summary_instruction})
98
+ logger.info(f" Summary instruction added to prompt: {len(summary_instruction)} chars")
99
+ logger.debug(f"Instruction content: {summary_instruction}")
100
+
101
+ logger.info(f" Final message count to LLM: {len(messages)}")
102
+ return messages
103
+
104
+ def _create_summary_instruction(self) -> str:
105
+ """Create the stealth summarization instruction with examples"""
106
+ return """IMPORTANT: You MUST follow this response structure:
107
+
108
+ [YOUR MAIN RESPONSE TO THE USER GOES HERE]
109
+
110
+ [SUMMARY_START]
111
+ Key context from this exchange: [Brief summary of new information]
112
+ [SUMMARY_END]
113
+
114
+ EXAMPLES:
115
+ If user says "My name is John", your summary should be: "User's name is John"
116
+ If user says "I prefer formal language", your summary should be: "User prefers formal communication style"
117
+ If user shares a preference, summarize it: "User mentioned [preference]"
118
+
119
+ RULES:
120
+ - ALWAYS include the summary section
121
+ - Use EXACT markers: [SUMMARY_START] and [SUMMARY_END]
122
+ - Keep summary 1-2 sentences
123
+ - Focus on user preferences, names, important context
124
+
125
+ The summary will be automatically hidden from the user."""
126
+
127
+ # def _extract_summary_and_response(self, full_response: str) -> tuple[str, Optional[str]]:
128
+ # """Extract summary from response and return clean user response - handles partial markers"""
129
+ # summary_start = "[SUMMARY_START]"
130
+ # summary_end = "[SUMMARY_END]"
131
+
132
+ # # Check if we have complete markers
133
+ # if summary_start in full_response and summary_end in full_response:
134
+ # start_idx = full_response.find(summary_start) + len(summary_start)
135
+ # end_idx = full_response.find(summary_end)
136
+
137
+ # summary = full_response[start_idx:end_idx].strip()
138
+ # user_response = full_response[:full_response.find(summary_start)].strip()
139
+
140
+ # logger.info(f"✅ SUCCESS: Summary extracted and separated from user response")
141
+ # logger.info(f"User response length: {len(user_response)} chars")
142
+ # logger.info(f"Summary extracted: '{summary}'")
143
+ # return user_response, summary
144
+
145
+ # # Check if we have partial markers (common in streaming)
146
+ # elif summary_start in full_response:
147
+ # # We have start marker but no end marker - extract what we can
148
+ # start_idx = full_response.find(summary_start) + len(summary_start)
149
+ # potential_summary = full_response[start_idx:].strip()
150
+
151
+ # # Clean up any partial end markers or weird formatting
152
+ # if potential_summary:
153
+ # # Remove any trailing partial markers or whitespace
154
+ # cleaned_summary = potential_summary.split('[SUMMARY_')[0].split('[SUMMARY')[0].strip()
155
+ # user_response = full_response[:full_response.find(summary_start)].strip()
156
+
157
+ # if cleaned_summary and len(cleaned_summary) > 10: # Only if meaningful content
158
+ # logger.info(f"⚠️ Partial summary extracted (missing end marker): '{cleaned_summary}'")
159
+ # return user_response, cleaned_summary
160
+
161
+ # logger.info("❌ Incomplete summary markers found")
162
+ # return full_response, None
163
+
164
+ # logger.info("❌ No summary markers found, returning full response")
165
+ # logger.info(f"Full response length: {len(full_response)} chars")
166
+ # return full_response, None
167
+
168
+ def _extract_summary_and_response(self, full_response: str) -> tuple[str, Optional[str]]:
169
+ """Extract summary from response and return clean user response - handles partial markers"""
170
+ # Keep original markers for backward compatibility
171
+ summary_start = "[SUMMARY_START]"
172
+ summary_end = "[SUMMARY_END]"
173
+
174
+ # NEW: Normalize the response first (improvement from new version)
175
+ normalized = full_response.replace('\n', ' ').replace('\r', ' ').strip()
176
+
177
+ # Check if we have complete markers - KEEP original logic but use normalized
178
+ if summary_start in normalized and summary_end in normalized:
179
+ start_idx = normalized.find(summary_start) + len(summary_start)
180
+ end_idx = normalized.find(summary_end)
181
+
182
+ summary = normalized[start_idx:end_idx].strip()
183
+ user_response = normalized[:normalized.find(summary_start)].strip()
184
+
185
+ logger.info(f"✅ SUCCESS: Summary extracted and separated from user response")
186
+ logger.info(f"User response length: {len(user_response)} chars")
187
+ logger.info(f"Summary extracted: '{summary}'")
188
+
189
+ # NEW: Add validation from improved version
190
+ if not summary or len(summary) < 5:
191
+ logger.info("❌ Summary too short, returning full response")
192
+ return full_response.strip(), None
193
+
194
+ return user_response, summary
195
+
196
+ # Check if we have partial markers (common in streaming) - IMPROVED logic
197
+ elif summary_start in normalized:
198
+ # We have start marker but no end marker - extract what we can
199
+ start_idx = normalized.find(summary_start) + len(summary_start)
200
+
201
+ # NEW: Take reasonable chunk (200 chars) instead of everything
202
+ potential_summary = normalized[start_idx:start_idx+200].strip()
203
+
204
+ # COMBINED: Clean up from both versions
205
+ if potential_summary:
206
+ # Clean up any partial markers or weird formatting
207
+ cleaned_summary = (potential_summary
208
+ .split('[SUMMARY_')[0]
209
+ .split('[SUMMARY')[0]
210
+ .split('[')[0] # NEW from improved version
211
+ .split('\n')[0] # NEW from improved version
212
+ .strip())
213
+
214
+ user_response = normalized[:normalized.find(summary_start)].strip()
215
+
216
+ # COMBINED validation: meaningful content check
217
+ if cleaned_summary and len(cleaned_summary) >= 10: # Original threshold
218
+ logger.info(f"⚠️ Partial summary extracted (missing end marker): '{cleaned_summary}'")
219
+ # NEW: Additional validation
220
+ if len(cleaned_summary) >= 5: # Improved version threshold
221
+ return user_response, cleaned_summary
222
+
223
+ logger.info("❌ Incomplete summary markers found")
224
+ return full_response.strip(), None # NEW: strip for consistency
225
+
226
+ # No markers found - KEEP original but with normalization
227
+ logger.info("❌ No summary markers found, returning full response")
228
+ logger.info(f"Full response length: {len(full_response)} chars")
229
+ return full_response.strip(), None # NEW: strip for consistency
230
+
231
+ def _add_conversation_summary(self, new_summary: str):
232
+ """Add a new discrete conversation summary"""
233
+ if not new_summary or new_summary.lower() == "none":
234
+ logger.info("🔄 No summary to add (empty or 'none')")
235
+ return
236
+
237
+ # Add as a new discrete summary
238
+ self.conversation_summaries.append(new_summary)
239
+ logger.info(f"📝 ADDED Summary #{len(self.conversation_summaries)}: '{new_summary}'")
240
+
241
+ # Keep only recent summaries (last 7)
242
+ if len(self.conversation_summaries) > 7:
243
+ self.conversation_summaries = self.conversation_summaries[-7:]
244
+ removed = self.conversation_summaries.pop(0)
245
+ logger.info(f"🗑️ DROPPED Oldest summary: '{removed}'")
246
+ logger.info(f"📊 Summary count maintained at {len(self.conversation_summaries)}")
247
+ logger.info(f"Added conversation summary #{len(self.conversation_summaries)}: {new_summary}")
248
+
249
+ def query(self, question: str, top_k: int = 5, include_context: bool = True) -> str:
250
+ """Process a query with stealth conversation summarization"""
251
+ try:
252
+ # Retrieve relevant context
253
+ logger.info(f"🔍 QUERY START: '{question}' (top_k: {top_k})")
254
+ context_docs = self.retriever.retrieve(question, top_k)
255
+ logger.info(f"📄 Retrieved {len(context_docs)} context documents")
256
+
257
+ if not context_docs and include_context:
258
+ logger.warning(f"No context found for query: {question}")
259
+ return "I couldn't find relevant information to answer your question."
260
+
261
+ # Format context
262
+ context = self._build_context(context_docs) if include_context and context_docs else ""
263
+
264
+ # Build messages
265
+ messages = self._build_messages(question, context)
266
+
267
+ # Generate response
268
+ full_response = self.llm.predict(messages)
269
+ logger.info(f"🤖 LLM response received: {len(full_response)} chars")
270
+
271
+ # Extract summary and clean response
272
+ user_response, conversation_summary = self._extract_summary_and_response(full_response)
273
+
274
+ # Add new summary if found
275
+ if conversation_summary:
276
+ self._add_conversation_summary(conversation_summary)
277
+ logger.info(" Summary processing completed successfully")
278
+ else:
279
+ logger.info("Bitch No summary generated for this exchange")
280
+
281
+ # Add assistant response to conversation (clean version only)
282
+ self.add_message("assistant", user_response)
283
+
284
+ logger.info(f"💬 Final user response: {len(user_response)} chars")
285
+ return user_response
286
+
287
+ except Exception as e:
288
+ logger.error(f"Error processing query: {str(e)}")
289
+ # logger.error(f"💥 QUERY FAILED: {str(e)}")
290
+ return "I encountered an issue processing your query. Please try again."
291
+
292
+ def query_stream(self, question: str, top_k: int = 5) -> Generator[str, None, None]:
293
+ """
294
+ Professional-grade streaming with multiple fallback strategies
295
+ """
296
+ try:
297
+ logger.info(f"🌊 STREAMING QUERY START: '{question}'")
298
+
299
+ # Strategy 1: Try true streaming first
300
+ if hasattr(self.llm, 'predict_stream'):
301
+ try:
302
+ yield from self._stream_with_summary_protection(question, top_k)
303
+ return
304
+ except Exception as stream_error:
305
+ logger.warning(f"Streaming failed, falling back: {stream_error}")
306
+
307
+ # Strategy 2: Fallback to simulated streaming
308
+ logger.info("🔄 Falling back to simulated streaming")
309
+ yield from self._simulated_streaming(question, top_k)
310
+
311
+ except Exception as e:
312
+ logger.error(f"💥 ALL STREAMING STRATEGIES FAILED: {str(e)}")
313
+ yield f"Error: {str(e)}"
314
+
315
+ def _stream_with_summary_protection(self, question: str, top_k: int) -> Generator[str, None, None]:
316
+ """True streaming with better error handling"""
317
+ try:
318
+ relevant_docs = self.retriever.retrieve(question, top_k=top_k)
319
+ context = self._build_context(relevant_docs)
320
+ messages = self._build_messages(question, context)
321
+
322
+ buffer = ""
323
+ summary_started = False
324
+
325
+ for chunk in self.llm.predict_stream(messages):
326
+ buffer += chunk
327
+
328
+ # Check for summary markers
329
+ if any(marker in chunk for marker in ['[SUMMARY', 'SUMMARY_']):
330
+ if not summary_started:
331
+ logger.info("🚨 Summary markers detected - cutting stream")
332
+ summary_started = True
333
+ clean_part = self._extract_clean_content(buffer)
334
+ if clean_part:
335
+ yield clean_part
336
+ # Don't break here - let the method complete naturally
337
+ continue
338
+
339
+ if not summary_started:
340
+ yield chunk
341
+
342
+ # Process the complete response
343
+ self._process_complete_response(buffer)
344
+
345
+ except Exception as e:
346
+ logger.error(f"Streaming error: {e}")
347
+ raise # Re-raise to trigger fallback
348
+
349
+ def _process_complete_response(self, full_response: str):
350
+ """Process complete response and extract summary"""
351
+ user_response, conversation_summary = self._extract_summary_and_response(full_response)
352
+
353
+ if conversation_summary:
354
+ logger.info(f"📝 Summary extracted: '{conversation_summary}'")
355
+ self._add_conversation_summary(conversation_summary)
356
+
357
+ self.add_message("assistant", user_response)
358
+
359
+ def _simulated_streaming(self, question: str, top_k: int) -> Generator[str, None, None]:
360
+ """Simulated streaming that guarantees no summary leakage"""
361
+ relevant_docs = self.retriever.retrieve(question, top_k=top_k)
362
+ context = self._build_context(relevant_docs)
363
+ messages = self._build_messages(question, context)
364
+
365
+ # Get complete response
366
+ complete_response = self.llm.predict(messages)
367
+
368
+ # Extract clean response
369
+ user_response, conversation_summary = self._extract_summary_and_response(complete_response)
370
+
371
+ if conversation_summary:
372
+ logger.info(f"📝 Summary extracted: '{conversation_summary}'")
373
+ self._add_conversation_summary(conversation_summary)
374
+
375
+ self.add_message("assistant", user_response)
376
+
377
+ # Simulate streaming (smaller chunks for better UX)
378
+ chunk_size = 2 # Even smaller chunks for smoother streaming
379
+ for i in range(0, len(user_response), chunk_size):
380
+ yield user_response[i:i+chunk_size]
381
+ import time
382
+ time.sleep(0.02) # Slightly longer delay for readability
383
+
384
+ def _extract_clean_content(self, buffer: str) -> str:
385
+ """Extract clean content before any summary markers"""
386
+ markers = ['[SUMMARY_START]', '[SUMMARY', 'SUMMARY_']
387
+ for marker in markers:
388
+ if marker in buffer:
389
+ return buffer.split(marker)[0].strip()
390
+ return buffer.strip()
391
+
392
+ def clear_conversation(self):
393
+ """Clear conversation history except system message and summaries"""
394
+ system_msg = next((msg for msg in self.conversation if msg["role"] == "system"), None)
395
+ self.conversation = [system_msg] if system_msg else []
396
+ # I wanna Keep conversation summaries - they're the compressed memory!
397
+ # self.conversation_summaries = [] TO:DO(If bug noticed) # Optional: clear summaries too
398
+
399
+ def get_conversation_context(self) -> Dict[str, Any]:
400
+ context = {
401
+ "summary_count": len(self.conversation_summaries),
402
+ "summaries": self.conversation_summaries,
403
+ "message_count": len(self.conversation),
404
+ "recent_messages": [f"{msg['role']}: {msg['content'][:50]}..." for msg in self.conversation[-3:]]
405
+ }
406
+ logger.info(f"📊 Context snapshot: {context}")
407
+ return context
@@ -18,7 +18,7 @@ class OpenRouterLLM:
18
18
  "Authorization": f"Bearer {self.api_key}",
19
19
  "Content-Type": "application/json",
20
20
  "HTTP-Referer": "https://github.com/Ksschkw/kssrag",
21
- "X-Title": "KSS RAG Agent"
21
+ "X-Title": "KSSRAG"
22
22
  }
23
23
 
24
24
  def predict(self, messages: List[Dict[str, str]]) -> str:
@@ -28,9 +28,9 @@ class ServerConfig(BaseModel):
28
28
  cors_allow_credentials: bool = config.CORS_ALLOW_CREDENTIALS
29
29
  cors_allow_methods: List[str] = config.CORS_ALLOW_METHODS
30
30
  cors_allow_headers: List[str] = config.CORS_ALLOW_HEADERS
31
- title: str = "KSS RAG API"
32
- description: str = "A Retrieval-Augmented Generation API by Ksschkw"
33
- version: str = "0.1.0"
31
+ title: str = "KSSSwagger"
32
+ description: str = "[kssrag](https://github.com/Ksschkw/kssrag)"
33
+ version: str = "0.2.0"
34
34
 
35
35
  def create_app(rag_agent: RAGAgent, server_config: Optional[ServerConfig] = None):
36
36
  """Create a FastAPI app for the RAG agent with configurable CORS"""
@@ -98,20 +98,27 @@ def create_app(rag_agent: RAGAgent, server_config: Optional[ServerConfig] = None
98
98
  raise HTTPException(status_code=400, detail="Query cannot be empty")
99
99
 
100
100
  try:
101
+ # Get or create session
101
102
  if session_id not in sessions:
103
+ logger.info(f"Creating new streaming session: {session_id}")
102
104
  sessions[session_id] = RAGAgent(
103
105
  retriever=rag_agent.retriever,
104
- llm=OpenRouterLLM(stream=True),
106
+ llm=rag_agent.llm,
105
107
  system_prompt=rag_agent.system_prompt
106
108
  )
107
109
 
108
110
  agent = sessions[session_id]
109
111
 
110
112
  async def generate():
113
+ full_response = ""
111
114
  try:
112
- for chunk in agent.llm.predict_stream(agent._build_messages(query)):
115
+ # Use agent's query_stream which handles context and summarization
116
+ for chunk in agent.query_stream(query, top_k=5):
117
+ full_response += chunk
113
118
  yield f"data: {json.dumps({'chunk': chunk, 'done': False})}\n\n"
119
+
114
120
  yield f"data: {json.dumps({'chunk': '', 'done': True})}\n\n"
121
+
115
122
  except Exception as e:
116
123
  logger.error(f"Streaming error: {str(e)}")
117
124
  yield f"data: {json.dumps({'error': str(e), 'done': True})}\n\n"
@@ -156,7 +163,7 @@ def create_app(rag_agent: RAGAgent, server_config: Optional[ServerConfig] = None
156
163
  async def root():
157
164
  """Root endpoint with API information"""
158
165
  return {
159
- "message": "Welcome to KSS RAG API",
166
+ "message": "Welcome to KSSRAG API",
160
167
  "version": server_config.version,
161
168
  "docs": "/docs",
162
169
  "health": "/health"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kssrag
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: A flexible Retrieval-Augmented Generation framework by Ksschkw
5
5
  Home-page: https://github.com/Ksschkw/kssrag
6
6
  Author: Ksschkw
@@ -85,7 +85,7 @@ Dynamic: summary
85
85
 
86
86
  ![Python Version](https://img.shields.io/badge/python-3.8%2B-blue)
87
87
  ![License](https://img.shields.io/badge/license-MIT-green)
88
- ![Version](https://img.shields.io/badge/version-0.1.30-brightgreen)
88
+ ![Version](https://img.shields.io/badge/version-0.2.0-brightgreen)
89
89
  ![Framework](https://img.shields.io/badge/framework-RAG-orange)
90
90
  ![Documentation](https://img.shields.io/badge/docs-comprehensive-brightgreen)
91
91
 
@@ -6,7 +6,7 @@ long_description = (here / "README.md").read_text(encoding="utf-8")
6
6
 
7
7
  setup(
8
8
  name="kssrag",
9
- version="0.2.0",
9
+ version="0.2.2",
10
10
  description="A flexible Retrieval-Augmented Generation framework by Ksschkw",
11
11
  long_description=long_description,
12
12
  long_description_content_type="text/markdown",
@@ -1,119 +0,0 @@
1
- from typing import Generator, List, Dict, Any, Optional
2
- from ..utils.helpers import logger
3
-
4
- class RAGAgent:
5
- """RAG agent implementation"""
6
-
7
- def __init__(self, retriever, llm, system_prompt: Optional[str] = None,
8
- conversation_history: Optional[List[Dict[str, str]]] = None):
9
- self.retriever = retriever
10
- self.llm = llm
11
- self.conversation = conversation_history or []
12
- self.system_prompt = system_prompt or """You are a helpful AI assistant. Use the following context to answer the user's question.
13
- If you don't know the answer based on the context, say so."""
14
-
15
- # Initialize with system message if not already present
16
- if not any(msg.get("role") == "system" for msg in self.conversation):
17
- self.add_message("system", self.system_prompt)
18
-
19
- def add_message(self, role: str, content: str):
20
- """Add a message to the conversation history"""
21
- self.conversation.append({"role": role, "content": content})
22
-
23
- # Keep conversation manageable (last 10 messages)
24
- if len(self.conversation) > 10:
25
- # Always keep the system message
26
- system_msg = next((msg for msg in self.conversation if msg["role"] == "system"), None)
27
- other_msgs = [msg for msg in self.conversation if msg["role"] != "system"]
28
-
29
- # Keep the most recent messages
30
- self.conversation = [system_msg] + other_msgs[-9:] if system_msg else other_msgs[-10:]
31
-
32
- def _build_context(self, context_docs: List[Dict[str, Any]]) -> str:
33
- """Build context string from documents"""
34
- if not context_docs:
35
- return ""
36
-
37
- context = "Relevant information:\n"
38
- for i, doc in enumerate(context_docs, 1):
39
- context += f"\n--- Document {i} ---\n{doc['content']}\n"
40
- return context
41
-
42
- def _build_messages(self, question: str, context: str = "") -> List[Dict[str, str]]:
43
- """Build messages for LLM including context"""
44
- # Start with conversation history
45
- messages = self.conversation.copy()
46
-
47
- # Add user query with context
48
- user_message = f"{context}\n\nQuestion: {question}" if context else question
49
-
50
- # Replace the last user message if it exists, otherwise add new one
51
- if messages and messages[-1]["role"] == "user":
52
- messages[-1]["content"] = user_message
53
- else:
54
- messages.append({"role": "user", "content": user_message})
55
-
56
- return messages
57
-
58
- def query(self, question: str, top_k: int = 5, include_context: bool = True) -> str:
59
- """Process a query and return a response"""
60
- try:
61
- # Retrieve relevant context
62
- context_docs = self.retriever.retrieve(question, top_k)
63
-
64
- if not context_docs and include_context:
65
- logger.warning(f"No context found for query: {question}")
66
- return "I couldn't find relevant information to answer your question."
67
-
68
- # Format context
69
- context = self._build_context(context_docs) if include_context and context_docs else ""
70
-
71
- # Build messages
72
- messages = self._build_messages(question, context)
73
-
74
- # Generate response
75
- response = self.llm.predict(messages)
76
-
77
- # Add assistant response to conversation
78
- self.add_message("assistant", response)
79
-
80
- return response
81
-
82
- except Exception as e:
83
- logger.error(f"Error processing query: {str(e)}")
84
- return "I encountered an issue processing your query. Please try again."
85
-
86
- def query_stream(self, question: str, top_k: int = 5) -> Generator[str, None, None]:
87
- """Query the RAG system with streaming response"""
88
- try:
89
- # Retrieve relevant documents
90
- relevant_docs = self.retriever.retrieve(question, top_k=top_k)
91
-
92
- # Build context from documents
93
- context = self._build_context(relevant_docs)
94
-
95
- # Build messages
96
- messages = self._build_messages(question, context)
97
-
98
- # Stream response from LLM
99
- if hasattr(self.llm, 'predict_stream'):
100
- for chunk in self.llm.predict_stream(messages):
101
- yield chunk
102
-
103
- # Add the complete response to conversation history
104
- full_response = "".join([chunk for chunk in self.llm.predict_stream(messages)])
105
- self.add_message("assistant", full_response)
106
- else:
107
- # Fallback to non-streaming
108
- response = self.llm.predict(messages)
109
- self.add_message("assistant", response)
110
- yield response
111
-
112
- except Exception as e:
113
- logger.error(f"Error in streaming query: {str(e)}")
114
- yield f"Error: {str(e)}"
115
-
116
- def clear_conversation(self):
117
- """Clear conversation history except system message"""
118
- system_msg = next((msg for msg in self.conversation if msg["role"] == "system"), None)
119
- self.conversation = [system_msg] if system_msg else []
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes