kssrag 0.2.2__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {kssrag-0.2.2 → kssrag-0.2.4}/PKG-INFO +3 -2
  2. {kssrag-0.2.2 → kssrag-0.2.4}/README.md +2 -1
  3. {kssrag-0.2.2 → kssrag-0.2.4}/kssrag/core/agents.py +210 -188
  4. {kssrag-0.2.2 → kssrag-0.2.4}/kssrag/server.py +26 -8
  5. {kssrag-0.2.2 → kssrag-0.2.4}/kssrag.egg-info/PKG-INFO +3 -2
  6. {kssrag-0.2.2 → kssrag-0.2.4}/setup.py +1 -1
  7. {kssrag-0.2.2 → kssrag-0.2.4}/kssrag/__init__.py +0 -0
  8. {kssrag-0.2.2 → kssrag-0.2.4}/kssrag/cli.py +0 -0
  9. {kssrag-0.2.2 → kssrag-0.2.4}/kssrag/config.py +0 -0
  10. {kssrag-0.2.2 → kssrag-0.2.4}/kssrag/core/__init__.py +0 -0
  11. {kssrag-0.2.2 → kssrag-0.2.4}/kssrag/core/chunkers.py +0 -0
  12. {kssrag-0.2.2 → kssrag-0.2.4}/kssrag/core/retrievers.py +0 -0
  13. {kssrag-0.2.2 → kssrag-0.2.4}/kssrag/core/vectorstores.py +0 -0
  14. {kssrag-0.2.2 → kssrag-0.2.4}/kssrag/kssrag.py +0 -0
  15. {kssrag-0.2.2 → kssrag-0.2.4}/kssrag/models/__init__.py +0 -0
  16. {kssrag-0.2.2 → kssrag-0.2.4}/kssrag/models/local_llms.py +0 -0
  17. {kssrag-0.2.2 → kssrag-0.2.4}/kssrag/models/openrouter.py +0 -0
  18. {kssrag-0.2.2 → kssrag-0.2.4}/kssrag/utils/__init__.py +0 -0
  19. {kssrag-0.2.2 → kssrag-0.2.4}/kssrag/utils/document_loaders.py +0 -0
  20. {kssrag-0.2.2 → kssrag-0.2.4}/kssrag/utils/helpers.py +0 -0
  21. {kssrag-0.2.2 → kssrag-0.2.4}/kssrag/utils/ocr.py +0 -0
  22. {kssrag-0.2.2 → kssrag-0.2.4}/kssrag/utils/ocr_loader.py +0 -0
  23. {kssrag-0.2.2 → kssrag-0.2.4}/kssrag/utils/preprocessors.py +0 -0
  24. {kssrag-0.2.2 → kssrag-0.2.4}/kssrag.egg-info/SOURCES.txt +0 -0
  25. {kssrag-0.2.2 → kssrag-0.2.4}/kssrag.egg-info/dependency_links.txt +0 -0
  26. {kssrag-0.2.2 → kssrag-0.2.4}/kssrag.egg-info/entry_points.txt +0 -0
  27. {kssrag-0.2.2 → kssrag-0.2.4}/kssrag.egg-info/requires.txt +0 -0
  28. {kssrag-0.2.2 → kssrag-0.2.4}/kssrag.egg-info/top_level.txt +0 -0
  29. {kssrag-0.2.2 → kssrag-0.2.4}/setup.cfg +0 -0
  30. {kssrag-0.2.2 → kssrag-0.2.4}/tests/__init__.py +0 -0
  31. {kssrag-0.2.2 → kssrag-0.2.4}/tests/test_basic.py +0 -0
  32. {kssrag-0.2.2 → kssrag-0.2.4}/tests/test_bm25s.py +0 -0
  33. {kssrag-0.2.2 → kssrag-0.2.4}/tests/test_config.py +0 -0
  34. {kssrag-0.2.2 → kssrag-0.2.4}/tests/test_image_chunker.py +0 -0
  35. {kssrag-0.2.2 → kssrag-0.2.4}/tests/test_integration.py +0 -0
  36. {kssrag-0.2.2 → kssrag-0.2.4}/tests/test_ocr.py +0 -0
  37. {kssrag-0.2.2 → kssrag-0.2.4}/tests/test_streaming.py +0 -0
  38. {kssrag-0.2.2 → kssrag-0.2.4}/tests/test_vectorstores.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kssrag
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: A flexible Retrieval-Augmented Generation framework by Ksschkw
5
5
  Home-page: https://github.com/Ksschkw/kssrag
6
6
  Author: Ksschkw
@@ -85,7 +85,7 @@ Dynamic: summary
85
85
 
86
86
  ![Python Version](https://img.shields.io/badge/python-3.8%2B-blue)
87
87
  ![License](https://img.shields.io/badge/license-MIT-green)
88
- ![Version](https://img.shields.io/badge/version-0.2.0-brightgreen)
88
+ ![Version](https://img.shields.io/badge/version-0.2.4-brightgreen)
89
89
  ![Framework](https://img.shields.io/badge/framework-RAG-orange)
90
90
  ![Documentation](https://img.shields.io/badge/docs-comprehensive-brightgreen)
91
91
 
@@ -809,6 +809,7 @@ kssrag/
809
809
  - [**Full Documentation**](https://github.com/Ksschkw/kssrag/docs)
810
810
  - [**API Reference**](https://github.com/Ksschkw/kssrag/docs/api_reference.md)
811
811
  - [**Examples Directory**](https://github.com/Ksschkw/kssrag/examples)
812
+ - [**PyPi**](https://pypi.org/project/kssrag/0.2.4/)
812
813
 
813
814
  ### Community
814
815
  - [**GitHub Issues**](https://github.com/Ksschkw/kssrag/issues) - Bug reports and feature requests
@@ -4,7 +4,7 @@
4
4
 
5
5
  ![Python Version](https://img.shields.io/badge/python-3.8%2B-blue)
6
6
  ![License](https://img.shields.io/badge/license-MIT-green)
7
- ![Version](https://img.shields.io/badge/version-0.2.0-brightgreen)
7
+ ![Version](https://img.shields.io/badge/version-0.2.4-brightgreen)
8
8
  ![Framework](https://img.shields.io/badge/framework-RAG-orange)
9
9
  ![Documentation](https://img.shields.io/badge/docs-comprehensive-brightgreen)
10
10
 
@@ -728,6 +728,7 @@ kssrag/
728
728
  - [**Full Documentation**](https://github.com/Ksschkw/kssrag/docs)
729
729
  - [**API Reference**](https://github.com/Ksschkw/kssrag/docs/api_reference.md)
730
730
  - [**Examples Directory**](https://github.com/Ksschkw/kssrag/examples)
731
+ - [**PyPi**](https://pypi.org/project/kssrag/0.2.4/)
731
732
 
732
733
  ### Community
733
734
  - [**GitHub Issues**](https://github.com/Ksschkw/kssrag/issues) - Bug reports and feature requests
@@ -21,13 +21,36 @@ class RAGAgent:
21
21
  if not any(msg.get("role") == "system" for msg in self.conversation):
22
22
  self.add_message("system", self.system_prompt)
23
23
 
24
+ # def add_message(self, role: str, content: str):
25
+ # """Add a message to the conversation history"""
26
+ # self.conversation.append({"role": role, "content": content})
27
+
28
+ # # Keep conversation manageable (last 15 messages)
29
+ # if len(self.conversation) > 15:
30
+ # self._smart_trim_conversation()
31
+
24
32
  def add_message(self, role: str, content: str):
25
- """Add a message to the conversation history"""
33
+ """Add a message to the conversation history (with simple dedupe for assistant)."""
34
+ content = content.strip()
35
+ # Prevent adding empty messages
36
+ if not content:
37
+ logger.info("Attempted to add empty message – ignored.")
38
+ return
39
+
40
+ # If last message is identical assistant content, skip to avoid duplicates
41
+ if self.conversation:
42
+ last = self.conversation[-1]
43
+ if role == "assistant" and last.get("role") == "assistant":
44
+ if last.get("content", "").strip() == content:
45
+ logger.info("Duplicate assistant message suppressed.")
46
+ return
47
+
26
48
  self.conversation.append({"role": role, "content": content})
27
-
49
+
28
50
  # Keep conversation manageable (last 15 messages)
29
51
  if len(self.conversation) > 15:
30
52
  self._smart_trim_conversation()
53
+
31
54
 
32
55
  def _smart_trim_conversation(self):
33
56
  """Trim conversation while preserving system message and recent exchanges"""
@@ -63,43 +86,55 @@ class RAGAgent:
63
86
  for i, doc in enumerate(context_docs, 1):
64
87
  context += f"\n--- Document {i} ---\n{doc['content']}\n"
65
88
  return context
66
-
89
+
67
90
  def _build_messages(self, question: str, context: str = "") -> List[Dict[str, str]]:
68
- """Build messages for LLM including context and conversation summaries"""
69
- # Start with conversation history
70
- messages = self.conversation.copy()
91
+ """
92
+ Build messages for the LLM including context, conversation history, and summaries.
93
+
94
+ Improvements:
95
+ - Prevents token explosion by trimming conversation smartly
96
+ - Injects last 3 summaries only
97
+ - Adds stealth summarization only if there are at least 2 user-assistant exchanges
98
+ - Preserves system messages and formatting
99
+ """
100
+ # Start with system + conversation history
101
+ messages: List[Dict[str, str]] = []
102
+
103
+ # Always include system message at top
104
+ system_msg = next((msg for msg in self.conversation if msg["role"] == "system"), None)
105
+ if system_msg:
106
+ messages.append(system_msg)
71
107
 
108
+ # Keep only last 12 user/assistant messages to prevent token overload
109
+ conversation_tail = [msg for msg in self.conversation if msg["role"] != "system"][-12:]
110
+ messages.extend(conversation_tail)
111
+
72
112
  logger.info(f"Building messages for query: '{question}'")
73
- logger.info(f"Conversation history: {len(self.conversation)} messages")
113
+ logger.info(f"Conversation tail: {len(conversation_tail)} messages")
74
114
  logger.info(f"Active summaries: {len(self.conversation_summaries)}")
75
- logger.info(f"Retrieved context: {len(context)} chars" if context else "No retrieved context")
115
+ logger.info(f"Context length: {len(context)} chars" if context else "No retrieved context")
76
116
 
77
- # Add conversation summaries as context if available
117
+ # Inject last 5 summaries safely as a system message
78
118
  if self.conversation_summaries:
79
- logger.info(f"Using summaries: {self.conversation_summaries}")
80
- summary_context = "Previous conversation context:\n" + "\n".join(
81
- f"- {summary}" for summary in self.conversation_summaries[-3:] # Last 3 summaries
82
- )
83
- messages.append({
84
- "role": "system",
85
- "content": summary_context
86
- })
87
-
88
- # Add retrieved document context
89
- user_message = f"{context}\n\nQuestion: {question}" if context else question
90
-
91
- # ✅ FIX: Always append new user message (don't replace existing ones)
92
- messages.append({"role": "user", "content": user_message})
93
-
94
- # Add stealth summarization instruction for ongoing conversations
95
- if len(self.conversation) >= 1: # More than just system + current user message + 2nd Query
119
+ summaries_to_use = self.conversation_summaries[-5:]
120
+ summary_context = "Previous conversation context:\n" + "\n".join(f"- {s}" for s in summaries_to_use)
121
+ messages.append({"role": "system", "content": summary_context})
122
+ logger.info(f"Injected {len(summaries_to_use)} conversation summaries")
123
+
124
+ # Add the user's current question + retrieved context
125
+ user_content = f"{context}\n\nQuestion: {question}" if context else question
126
+ messages.append({"role": "user", "content": user_content})
127
+
128
+ # Add stealth summarization only if conversation has at least 2 user-assistant pairs
129
+ exchange_count = sum(1 for msg in self.conversation if msg["role"] != "system") // 2
130
+ if exchange_count >= 2:
96
131
  summary_instruction = self._create_summary_instruction()
97
132
  messages.append({"role": "system", "content": summary_instruction})
98
- logger.info(f" Summary instruction added to prompt: {len(summary_instruction)} chars")
99
- logger.debug(f"Instruction content: {summary_instruction}")
133
+ logger.info(f"Stealth summary instruction added ({len(summary_instruction)} chars)")
100
134
 
101
- logger.info(f" Final message count to LLM: {len(messages)}")
135
+ logger.info(f"Final message count to LLM: {len(messages)}")
102
136
  return messages
137
+
103
138
 
104
139
  def _create_summary_instruction(self) -> str:
105
140
  """Create the stealth summarization instruction with examples"""
@@ -123,136 +158,85 @@ class RAGAgent:
123
158
  - Focus on user preferences, names, important context
124
159
 
125
160
  The summary will be automatically hidden from the user."""
126
-
127
- # def _extract_summary_and_response(self, full_response: str) -> tuple[str, Optional[str]]:
128
- # """Extract summary from response and return clean user response - handles partial markers"""
129
- # summary_start = "[SUMMARY_START]"
130
- # summary_end = "[SUMMARY_END]"
131
-
132
- # # Check if we have complete markers
133
- # if summary_start in full_response and summary_end in full_response:
134
- # start_idx = full_response.find(summary_start) + len(summary_start)
135
- # end_idx = full_response.find(summary_end)
136
-
137
- # summary = full_response[start_idx:end_idx].strip()
138
- # user_response = full_response[:full_response.find(summary_start)].strip()
139
-
140
- # logger.info(f"✅ SUCCESS: Summary extracted and separated from user response")
141
- # logger.info(f"User response length: {len(user_response)} chars")
142
- # logger.info(f"Summary extracted: '{summary}'")
143
- # return user_response, summary
144
-
145
- # # Check if we have partial markers (common in streaming)
146
- # elif summary_start in full_response:
147
- # # We have start marker but no end marker - extract what we can
148
- # start_idx = full_response.find(summary_start) + len(summary_start)
149
- # potential_summary = full_response[start_idx:].strip()
150
-
151
- # # Clean up any partial end markers or weird formatting
152
- # if potential_summary:
153
- # # Remove any trailing partial markers or whitespace
154
- # cleaned_summary = potential_summary.split('[SUMMARY_')[0].split('[SUMMARY')[0].strip()
155
- # user_response = full_response[:full_response.find(summary_start)].strip()
156
-
157
- # if cleaned_summary and len(cleaned_summary) > 10: # Only if meaningful content
158
- # logger.info(f"⚠️ Partial summary extracted (missing end marker): '{cleaned_summary}'")
159
- # return user_response, cleaned_summary
160
-
161
- # logger.info("❌ Incomplete summary markers found")
162
- # return full_response, None
163
-
164
- # logger.info("❌ No summary markers found, returning full response")
165
- # logger.info(f"Full response length: {len(full_response)} chars")
166
- # return full_response, None
167
161
 
168
162
  def _extract_summary_and_response(self, full_response: str) -> tuple[str, Optional[str]]:
169
- """Extract summary from response and return clean user response - handles partial markers"""
170
- # Keep original markers for backward compatibility
163
+ """Extract summary from response and return clean user response safely."""
164
+ if not full_response:
165
+ return "", None
166
+
171
167
  summary_start = "[SUMMARY_START]"
172
168
  summary_end = "[SUMMARY_END]"
173
-
174
- # NEW: Normalize the response first (improvement from new version)
175
- normalized = full_response.replace('\n', ' ').replace('\r', ' ').strip()
176
-
177
- # Check if we have complete markers - KEEP original logic but use normalized
169
+
170
+ original = full_response
171
+ normalized = original.replace('\r\n', '\n').replace('\r', '\n')
172
+
173
+ # Case 1: Full summary markers
178
174
  if summary_start in normalized and summary_end in normalized:
179
175
  start_idx = normalized.find(summary_start) + len(summary_start)
180
176
  end_idx = normalized.find(summary_end)
181
-
182
177
  summary = normalized[start_idx:end_idx].strip()
183
- user_response = normalized[:normalized.find(summary_start)].strip()
184
-
185
- logger.info(f"✅ SUCCESS: Summary extracted and separated from user response")
186
- logger.info(f"User response length: {len(user_response)} chars")
187
- logger.info(f"Summary extracted: '{summary}'")
188
-
189
- # NEW: Add validation from improved version
178
+
179
+ user_response = original.split(summary_start)[0].strip()
180
+
190
181
  if not summary or len(summary) < 5:
191
- logger.info("Summary too short, returning full response")
192
- return full_response.strip(), None
193
-
182
+ logger.info("Summary too short or invalid – returning full response as user response")
183
+ return original.strip(), None
184
+
194
185
  return user_response, summary
195
-
196
- # Check if we have partial markers (common in streaming) - IMPROVED logic
197
- elif summary_start in normalized:
198
- # We have start marker but no end marker - extract what we can
186
+
187
+ # Case 2: Partial summary start only
188
+ if summary_start in normalized:
199
189
  start_idx = normalized.find(summary_start) + len(summary_start)
200
-
201
- # NEW: Take reasonable chunk (200 chars) instead of everything
202
- potential_summary = normalized[start_idx:start_idx+200].strip()
203
-
204
- # COMBINED: Clean up from both versions
205
- if potential_summary:
206
- # Clean up any partial markers or weird formatting
207
- cleaned_summary = (potential_summary
208
- .split('[SUMMARY_')[0]
209
- .split('[SUMMARY')[0]
210
- .split('[')[0] # NEW from improved version
211
- .split('\n')[0] # NEW from improved version
212
- .strip())
213
-
214
- user_response = normalized[:normalized.find(summary_start)].strip()
215
-
216
- # COMBINED validation: meaningful content check
217
- if cleaned_summary and len(cleaned_summary) >= 10: # Original threshold
218
- logger.info(f"⚠️ Partial summary extracted (missing end marker): '{cleaned_summary}'")
219
- # NEW: Additional validation
220
- if len(cleaned_summary) >= 5: # Improved version threshold
221
- return user_response, cleaned_summary
222
-
223
- logger.info("❌ Incomplete summary markers found")
224
- return full_response.strip(), None # NEW: strip for consistency
225
-
226
- # No markers found - KEEP original but with normalization
227
- logger.info("❌ No summary markers found, returning full response")
228
- logger.info(f"Full response length: {len(full_response)} chars")
229
- return full_response.strip(), None # NEW: strip for consistency
190
+ potential = normalized[start_idx:start_idx + 200].strip()
191
+
192
+ cleaned_summary = (
193
+ potential
194
+ .split('[SUMMARY_')[0]
195
+ .split('[SUMMARY')[0]
196
+ .split('[')[0]
197
+ .strip()
198
+ )
199
+
200
+ user_response = original.split(summary_start)[0].strip()
201
+
202
+ if cleaned_summary and len(cleaned_summary) >= 10:
203
+ logger.info("Partial summary extracted safely")
204
+ return user_response, cleaned_summary
205
+
206
+ logger.info("Partial summary invalid or too short")
207
+ return original.strip(), None
208
+
209
+ # Case 3: No markers
210
+ return original.strip(), None
230
211
 
231
212
  def _add_conversation_summary(self, new_summary: str):
232
213
  """Add a new discrete conversation summary"""
233
214
  if not new_summary or new_summary.lower() == "none":
234
- logger.info("🔄 No summary to add (empty or 'none')")
215
+ logger.info(" No summary to add (empty or 'none')")
235
216
  return
236
-
237
- # Add as a new discrete summary
217
+
218
+ new_summary = new_summary.strip()
219
+ if not new_summary:
220
+ logger.info(" No summary to add after strip")
221
+ return
222
+
223
+ # Append new summary
238
224
  self.conversation_summaries.append(new_summary)
239
- logger.info(f"📝 ADDED Summary #{len(self.conversation_summaries)}: '{new_summary}'")
225
+ logger.info(f" ADDED Summary #{len(self.conversation_summaries)}: '{new_summary}'")
240
226
 
241
227
  # Keep only recent summaries (last 7)
242
228
  if len(self.conversation_summaries) > 7:
243
229
  self.conversation_summaries = self.conversation_summaries[-7:]
244
- removed = self.conversation_summaries.pop(0)
245
- logger.info(f"🗑️ DROPPED Oldest summary: '{removed}'")
246
- logger.info(f"📊 Summary count maintained at {len(self.conversation_summaries)}")
247
- logger.info(f"Added conversation summary #{len(self.conversation_summaries)}: {new_summary}")
230
+ logger.info(f" Summary count trimmed to {len(self.conversation_summaries)}")
231
+
248
232
 
249
233
  def query(self, question: str, top_k: int = 5, include_context: bool = True) -> str:
250
234
  """Process a query with stealth conversation summarization"""
251
235
  try:
252
236
  # Retrieve relevant context
253
- logger.info(f"🔍 QUERY START: '{question}' (top_k: {top_k})")
237
+ logger.info(f" QUERY START: '{question}' (top_k: {top_k})")
254
238
  context_docs = self.retriever.retrieve(question, top_k)
255
- logger.info(f"📄 Retrieved {len(context_docs)} context documents")
239
+ logger.info(f" Retrieved {len(context_docs)} context documents")
256
240
 
257
241
  if not context_docs and include_context:
258
242
  logger.warning(f"No context found for query: {question}")
@@ -266,7 +250,7 @@ class RAGAgent:
266
250
 
267
251
  # Generate response
268
252
  full_response = self.llm.predict(messages)
269
- logger.info(f"🤖 LLM response received: {len(full_response)} chars")
253
+ logger.info(f" LLM response received: {len(full_response)} chars")
270
254
 
271
255
  # Extract summary and clean response
272
256
  user_response, conversation_summary = self._extract_summary_and_response(full_response)
@@ -281,12 +265,12 @@ class RAGAgent:
281
265
  # Add assistant response to conversation (clean version only)
282
266
  self.add_message("assistant", user_response)
283
267
 
284
- logger.info(f"💬 Final user response: {len(user_response)} chars")
268
+ logger.info(f" Final user response: {len(user_response)} chars")
285
269
  return user_response
286
270
 
287
271
  except Exception as e:
288
272
  logger.error(f"Error processing query: {str(e)}")
289
- # logger.error(f"💥 QUERY FAILED: {str(e)}")
273
+ # logger.error(f" QUERY FAILED: {str(e)}")
290
274
  return "I encountered an issue processing your query. Please try again."
291
275
 
292
276
  def query_stream(self, question: str, top_k: int = 5) -> Generator[str, None, None]:
@@ -294,7 +278,7 @@ class RAGAgent:
294
278
  Professional-grade streaming with multiple fallback strategies
295
279
  """
296
280
  try:
297
- logger.info(f"🌊 STREAMING QUERY START: '{question}'")
281
+ logger.info(f" STREAMING QUERY START: '{question}'")
298
282
 
299
283
  # Strategy 1: Try true streaming first
300
284
  if hasattr(self.llm, 'predict_stream'):
@@ -305,81 +289,119 @@ class RAGAgent:
305
289
  logger.warning(f"Streaming failed, falling back: {stream_error}")
306
290
 
307
291
  # Strategy 2: Fallback to simulated streaming
308
- logger.info("🔄 Falling back to simulated streaming")
292
+ logger.info(" Falling back to simulated streaming")
309
293
  yield from self._simulated_streaming(question, top_k)
310
294
 
311
295
  except Exception as e:
312
- logger.error(f"💥 ALL STREAMING STRATEGIES FAILED: {str(e)}")
296
+ logger.error(f" ALL STREAMING STRATEGIES FAILED: {str(e)}")
313
297
  yield f"Error: {str(e)}"
314
298
 
299
+ # def _stream_with_summary_protection(self, question: str, top_k: int) -> Generator[str, None, None]:
300
+ # """Streaming-safe: never leak summary markers mid-stream."""
301
+ # relevant_docs = self.retriever.retrieve(question, top_k=top_k)
302
+ # context = self._build_context(relevant_docs)
303
+ # messages = self._build_messages(question, context)
304
+
305
+ # buffer = ""
306
+ # summary_buffer = ""
307
+ # in_summary = False
308
+
309
+ # for chunk in self.llm.predict_stream(messages):
310
+ # buffer += chunk
311
+
312
+ # # Detect summary start
313
+ # if '[SUMMARY_START]' in buffer:
314
+ # in_summary = True
315
+ # clean_part = buffer.split('[SUMMARY_START]')[0].strip()
316
+ # if clean_part:
317
+ # yield clean_part
318
+ # summary_buffer = buffer.split('[SUMMARY_START]')[1]
319
+ # buffer = ""
320
+ # continue
321
+
322
+ # if in_summary:
323
+ # summary_buffer += chunk
324
+ # if '[SUMMARY_END]' in summary_buffer:
325
+ # in_summary = False
326
+ # summary_content = summary_buffer.split('[SUMMARY_END]')[0].strip()
327
+ # if summary_content:
328
+ # self._add_conversation_summary(summary_content)
329
+ # logger.info(f"Summary extracted in stream: '{summary_content}'")
330
+ # buffer = summary_buffer.split('[SUMMARY_END]')[1] # remainder
331
+ # summary_buffer = ""
332
+ # if buffer:
333
+ # yield buffer.strip()
334
+ # buffer = ""
335
+ # continue
336
+
337
+ # if not in_summary:
338
+ # yield chunk
339
+
340
+ # # Flush leftover buffer
341
+ # if buffer.strip() and not in_summary:
342
+ # yield buffer.strip()
343
+ # elif in_summary:
344
+ # logger.info("Leftover buffer contains partial summary – discarded to prevent marker leak")
345
+
315
346
  def _stream_with_summary_protection(self, question: str, top_k: int) -> Generator[str, None, None]:
316
- """True streaming with better error handling"""
317
- try:
318
- relevant_docs = self.retriever.retrieve(question, top_k=top_k)
319
- context = self._build_context(relevant_docs)
320
- messages = self._build_messages(question, context)
321
-
322
- buffer = ""
323
- summary_started = False
324
-
325
- for chunk in self.llm.predict_stream(messages):
326
- buffer += chunk
327
-
328
- # Check for summary markers
329
- if any(marker in chunk for marker in ['[SUMMARY', 'SUMMARY_']):
330
- if not summary_started:
331
- logger.info("🚨 Summary markers detected - cutting stream")
332
- summary_started = True
333
- clean_part = self._extract_clean_content(buffer)
334
- if clean_part:
335
- yield clean_part
336
- # Don't break here - let the method complete naturally
337
- continue
338
-
339
- if not summary_started:
340
- yield chunk
341
-
342
- # Process the complete response
343
- self._process_complete_response(buffer)
344
-
345
- except Exception as e:
346
- logger.error(f"Streaming error: {e}")
347
- raise # Re-raise to trigger fallback
347
+ """Token-only streaming. Never reconstruct or re-emit content."""
348
+ relevant_docs = self.retriever.retrieve(question, top_k=top_k)
349
+ context = self._build_context(relevant_docs)
350
+ messages = self._build_messages(question, context)
351
+
352
+ buffer = ""
353
+
354
+ for chunk in self.llm.predict_stream(messages):
355
+ buffer += chunk
356
+
357
+ # The moment summary markers appear, stop streaming to client
358
+ if '[SUMMARY_START]' in buffer or 'SUMMARY_' in buffer:
359
+ logger.info("Summary marker detected stopping client stream")
360
+ break
361
+
362
+ # Yield ONLY raw tokens
363
+ yield chunk
364
+
365
+ # After streaming finishes, process full response exactly once
366
+ self._process_complete_response(buffer)
348
367
 
349
368
  def _process_complete_response(self, full_response: str):
350
369
  """Process complete response and extract summary"""
351
370
  user_response, conversation_summary = self._extract_summary_and_response(full_response)
352
-
371
+
353
372
  if conversation_summary:
354
- logger.info(f"📝 Summary extracted: '{conversation_summary}'")
373
+ logger.info(f" Summary extracted: '{conversation_summary}'")
355
374
  self._add_conversation_summary(conversation_summary)
356
-
357
- self.add_message("assistant", user_response)
375
+
376
+ # extra guard: only add assistant message if different from last assistant message
377
+ if user_response:
378
+ last = self.conversation[-1] if self.conversation else None
379
+ if not (last and last.get("role") == "assistant" and last.get("content", "").strip() == user_response.strip()):
380
+ self.add_message("assistant", user_response)
381
+ else:
382
+ logger.info("Skipped adding duplicate assistant message in _process_complete_response.")
358
383
 
359
384
  def _simulated_streaming(self, question: str, top_k: int) -> Generator[str, None, None]:
360
- """Simulated streaming that guarantees no summary leakage"""
385
+ """Simulated streaming that guarantees no summary leakage."""
361
386
  relevant_docs = self.retriever.retrieve(question, top_k=top_k)
362
387
  context = self._build_context(relevant_docs)
363
388
  messages = self._build_messages(question, context)
364
-
365
- # Get complete response
389
+
366
390
  complete_response = self.llm.predict(messages)
367
-
368
- # Extract clean response
369
391
  user_response, conversation_summary = self._extract_summary_and_response(complete_response)
370
-
392
+
371
393
  if conversation_summary:
372
- logger.info(f"📝 Summary extracted: '{conversation_summary}'")
373
394
  self._add_conversation_summary(conversation_summary)
374
-
395
+
375
396
  self.add_message("assistant", user_response)
376
-
377
- # Simulate streaming (smaller chunks for better UX)
378
- chunk_size = 2 # Even smaller chunks for smoother streaming
397
+
398
+ # Simulate streaming chunks
399
+ chunk_size = 2
379
400
  for i in range(0, len(user_response), chunk_size):
380
- yield user_response[i:i+chunk_size]
401
+ yield user_response[i:i + chunk_size]
381
402
  import time
382
- time.sleep(0.02) # Slightly longer delay for readability
403
+ time.sleep(0.02)
404
+
383
405
 
384
406
  def _extract_clean_content(self, buffer: str) -> str:
385
407
  """Extract clean content before any summary markers"""
@@ -403,5 +425,5 @@ class RAGAgent:
403
425
  "message_count": len(self.conversation),
404
426
  "recent_messages": [f"{msg['role']}: {msg['content'][:50]}..." for msg in self.conversation[-3:]]
405
427
  }
406
- logger.info(f"📊 Context snapshot: {context}")
428
+ logger.info(f" Context snapshot: {context}")
407
429
  return context
@@ -109,23 +109,41 @@ def create_app(rag_agent: RAGAgent, server_config: Optional[ServerConfig] = None
109
109
 
110
110
  agent = sessions[session_id]
111
111
 
112
+ # async def generate():
113
+ # full_response = ""
114
+ # try:
115
+ # # Use agent's query_stream which handles context and summarization
116
+ # for chunk in agent.query_stream(query, top_k=5):
117
+ # full_response += chunk
118
+ # yield f"data: {json.dumps({'chunk': chunk, 'done': False})}\n\n"
119
+
120
+ # yield f"data: {json.dumps({'chunk': '', 'done': True})}\n\n"
121
+
122
+ # except Exception as e:
123
+ # logger.error(f"Streaming error: {str(e)}")
124
+ # yield f"data: {json.dumps({'error': str(e), 'done': True})}\n\n"
125
+
112
126
  async def generate():
113
- full_response = ""
114
127
  try:
115
- # Use agent's query_stream which handles context and summarization
116
- for chunk in agent.query_stream(query, top_k=5):
117
- full_response += chunk
118
- yield f"data: {json.dumps({'chunk': chunk, 'done': False})}\n\n"
119
-
128
+ # Stream tokens ONLY
129
+ for token in agent.query_stream(query, top_k=5):
130
+ if not token:
131
+ continue
132
+
133
+ yield f"data: {json.dumps({'chunk': token, 'done': False})}\n\n"
134
+
135
+ # Signal completion (no payload)
120
136
  yield f"data: {json.dumps({'chunk': '', 'done': True})}\n\n"
121
-
137
+
122
138
  except Exception as e:
123
139
  logger.error(f"Streaming error: {str(e)}")
124
140
  yield f"data: {json.dumps({'error': str(e), 'done': True})}\n\n"
141
+
142
+
125
143
 
126
144
  return StreamingResponse(
127
145
  generate(),
128
- media_type="text/plain",
146
+ media_type="text/event-stream",
129
147
  headers={
130
148
  "Cache-Control": "no-cache",
131
149
  "Connection": "keep-alive",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kssrag
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: A flexible Retrieval-Augmented Generation framework by Ksschkw
5
5
  Home-page: https://github.com/Ksschkw/kssrag
6
6
  Author: Ksschkw
@@ -85,7 +85,7 @@ Dynamic: summary
85
85
 
86
86
  ![Python Version](https://img.shields.io/badge/python-3.8%2B-blue)
87
87
  ![License](https://img.shields.io/badge/license-MIT-green)
88
- ![Version](https://img.shields.io/badge/version-0.2.0-brightgreen)
88
+ ![Version](https://img.shields.io/badge/version-0.2.4-brightgreen)
89
89
  ![Framework](https://img.shields.io/badge/framework-RAG-orange)
90
90
  ![Documentation](https://img.shields.io/badge/docs-comprehensive-brightgreen)
91
91
 
@@ -809,6 +809,7 @@ kssrag/
809
809
  - [**Full Documentation**](https://github.com/Ksschkw/kssrag/docs)
810
810
  - [**API Reference**](https://github.com/Ksschkw/kssrag/docs/api_reference.md)
811
811
  - [**Examples Directory**](https://github.com/Ksschkw/kssrag/examples)
812
+ - [**PyPi**](https://pypi.org/project/kssrag/0.2.4/)
812
813
 
813
814
  ### Community
814
815
  - [**GitHub Issues**](https://github.com/Ksschkw/kssrag/issues) - Bug reports and feature requests
@@ -6,7 +6,7 @@ long_description = (here / "README.md").read_text(encoding="utf-8")
6
6
 
7
7
  setup(
8
8
  name="kssrag",
9
- version="0.2.2",
9
+ version="0.2.4",
10
10
  description="A flexible Retrieval-Augmented Generation framework by Ksschkw",
11
11
  long_description=long_description,
12
12
  long_description_content_type="text/markdown",
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes