kssrag 0.2.3__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kssrag-0.2.3 → kssrag-0.2.4}/PKG-INFO +3 -2
- {kssrag-0.2.3 → kssrag-0.2.4}/README.md +2 -1
- {kssrag-0.2.3 → kssrag-0.2.4}/kssrag/core/agents.py +124 -145
- {kssrag-0.2.3 → kssrag-0.2.4}/kssrag/server.py +25 -7
- {kssrag-0.2.3 → kssrag-0.2.4}/kssrag.egg-info/PKG-INFO +3 -2
- {kssrag-0.2.3 → kssrag-0.2.4}/setup.py +1 -1
- {kssrag-0.2.3 → kssrag-0.2.4}/kssrag/__init__.py +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/kssrag/cli.py +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/kssrag/config.py +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/kssrag/core/__init__.py +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/kssrag/core/chunkers.py +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/kssrag/core/retrievers.py +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/kssrag/core/vectorstores.py +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/kssrag/kssrag.py +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/kssrag/models/__init__.py +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/kssrag/models/local_llms.py +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/kssrag/models/openrouter.py +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/kssrag/utils/__init__.py +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/kssrag/utils/document_loaders.py +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/kssrag/utils/helpers.py +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/kssrag/utils/ocr.py +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/kssrag/utils/ocr_loader.py +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/kssrag/utils/preprocessors.py +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/kssrag.egg-info/SOURCES.txt +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/kssrag.egg-info/dependency_links.txt +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/kssrag.egg-info/entry_points.txt +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/kssrag.egg-info/requires.txt +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/kssrag.egg-info/top_level.txt +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/setup.cfg +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/tests/__init__.py +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/tests/test_basic.py +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/tests/test_bm25s.py +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/tests/test_config.py +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/tests/test_image_chunker.py +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/tests/test_integration.py +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/tests/test_ocr.py +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/tests/test_streaming.py +0 -0
- {kssrag-0.2.3 → kssrag-0.2.4}/tests/test_vectorstores.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kssrag
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: A flexible Retrieval-Augmented Generation framework by Ksschkw
|
|
5
5
|
Home-page: https://github.com/Ksschkw/kssrag
|
|
6
6
|
Author: Ksschkw
|
|
@@ -85,7 +85,7 @@ Dynamic: summary
|
|
|
85
85
|
|
|
86
86
|

|
|
87
87
|

|
|
88
|
-

|
|
89
89
|

|
|
90
90
|

|
|
91
91
|
|
|
@@ -809,6 +809,7 @@ kssrag/
|
|
|
809
809
|
- [**Full Documentation**](https://github.com/Ksschkw/kssrag/docs)
|
|
810
810
|
- [**API Reference**](https://github.com/Ksschkw/kssrag/docs/api_reference.md)
|
|
811
811
|
- [**Examples Directory**](https://github.com/Ksschkw/kssrag/examples)
|
|
812
|
+
- [**PyPi**](https://pypi.org/project/kssrag/0.2.4/)
|
|
812
813
|
|
|
813
814
|
### Community
|
|
814
815
|
- [**GitHub Issues**](https://github.com/Ksschkw/kssrag/issues) - Bug reports and feature requests
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|

|
|
6
6
|

|
|
7
|
-

|
|
8
8
|

|
|
9
9
|

|
|
10
10
|
|
|
@@ -728,6 +728,7 @@ kssrag/
|
|
|
728
728
|
- [**Full Documentation**](https://github.com/Ksschkw/kssrag/docs)
|
|
729
729
|
- [**API Reference**](https://github.com/Ksschkw/kssrag/docs/api_reference.md)
|
|
730
730
|
- [**Examples Directory**](https://github.com/Ksschkw/kssrag/examples)
|
|
731
|
+
- [**PyPi**](https://pypi.org/project/kssrag/0.2.4/)
|
|
731
732
|
|
|
732
733
|
### Community
|
|
733
734
|
- [**GitHub Issues**](https://github.com/Ksschkw/kssrag/issues) - Bug reports and feature requests
|
|
@@ -86,43 +86,55 @@ class RAGAgent:
|
|
|
86
86
|
for i, doc in enumerate(context_docs, 1):
|
|
87
87
|
context += f"\n--- Document {i} ---\n{doc['content']}\n"
|
|
88
88
|
return context
|
|
89
|
-
|
|
89
|
+
|
|
90
90
|
def _build_messages(self, question: str, context: str = "") -> List[Dict[str, str]]:
|
|
91
|
-
"""
|
|
92
|
-
|
|
93
|
-
|
|
91
|
+
"""
|
|
92
|
+
Build messages for the LLM including context, conversation history, and summaries.
|
|
93
|
+
|
|
94
|
+
Improvements:
|
|
95
|
+
- Prevents token explosion by trimming conversation smartly
|
|
96
|
+
- Injects last 3 summaries only
|
|
97
|
+
- Adds stealth summarization only if there are at least 2 user-assistant exchanges
|
|
98
|
+
- Preserves system messages and formatting
|
|
99
|
+
"""
|
|
100
|
+
# Start with system + conversation history
|
|
101
|
+
messages: List[Dict[str, str]] = []
|
|
102
|
+
|
|
103
|
+
# Always include system message at top
|
|
104
|
+
system_msg = next((msg for msg in self.conversation if msg["role"] == "system"), None)
|
|
105
|
+
if system_msg:
|
|
106
|
+
messages.append(system_msg)
|
|
94
107
|
|
|
108
|
+
# Keep only last 12 user/assistant messages to prevent token overload
|
|
109
|
+
conversation_tail = [msg for msg in self.conversation if msg["role"] != "system"][-12:]
|
|
110
|
+
messages.extend(conversation_tail)
|
|
111
|
+
|
|
95
112
|
logger.info(f"Building messages for query: '{question}'")
|
|
96
|
-
logger.info(f"Conversation
|
|
113
|
+
logger.info(f"Conversation tail: {len(conversation_tail)} messages")
|
|
97
114
|
logger.info(f"Active summaries: {len(self.conversation_summaries)}")
|
|
98
|
-
logger.info(f"
|
|
115
|
+
logger.info(f"Context length: {len(context)} chars" if context else "No retrieved context")
|
|
99
116
|
|
|
100
|
-
#
|
|
117
|
+
# Inject last 5 summaries safely as a system message
|
|
101
118
|
if self.conversation_summaries:
|
|
102
|
-
|
|
103
|
-
summary_context = "Previous conversation context:\n" + "\n".join(
|
|
104
|
-
|
|
105
|
-
)
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
# Add
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
# ✅ FIX: Always append new user message (don't replace existing ones)
|
|
115
|
-
messages.append({"role": "user", "content": user_message})
|
|
116
|
-
|
|
117
|
-
# Add stealth summarization instruction for ongoing conversations
|
|
118
|
-
if len(self.conversation) >= 1: # More than just system + current user message + 2nd Query
|
|
119
|
+
summaries_to_use = self.conversation_summaries[-5:]
|
|
120
|
+
summary_context = "Previous conversation context:\n" + "\n".join(f"- {s}" for s in summaries_to_use)
|
|
121
|
+
messages.append({"role": "system", "content": summary_context})
|
|
122
|
+
logger.info(f"Injected {len(summaries_to_use)} conversation summaries")
|
|
123
|
+
|
|
124
|
+
# Add the user's current question + retrieved context
|
|
125
|
+
user_content = f"{context}\n\nQuestion: {question}" if context else question
|
|
126
|
+
messages.append({"role": "user", "content": user_content})
|
|
127
|
+
|
|
128
|
+
# Add stealth summarization only if conversation has at least 2 user-assistant pairs
|
|
129
|
+
exchange_count = sum(1 for msg in self.conversation if msg["role"] != "system") // 2
|
|
130
|
+
if exchange_count >= 2:
|
|
119
131
|
summary_instruction = self._create_summary_instruction()
|
|
120
132
|
messages.append({"role": "system", "content": summary_instruction})
|
|
121
|
-
logger.info(f"
|
|
122
|
-
logger.debug(f"Instruction content: {summary_instruction}")
|
|
133
|
+
logger.info(f"Stealth summary instruction added ({len(summary_instruction)} chars)")
|
|
123
134
|
|
|
124
|
-
logger.info(f"
|
|
135
|
+
logger.info(f"Final message count to LLM: {len(messages)}")
|
|
125
136
|
return messages
|
|
137
|
+
|
|
126
138
|
|
|
127
139
|
def _create_summary_instruction(self) -> str:
|
|
128
140
|
"""Create the stealth summarization instruction with examples"""
|
|
@@ -147,37 +159,8 @@ class RAGAgent:
|
|
|
147
159
|
|
|
148
160
|
The summary will be automatically hidden from the user."""
|
|
149
161
|
|
|
150
|
-
# def _extract_summary_and_response(self, full_response: str) -> tuple[str, Optional[str]]:
|
|
151
|
-
# """Extract summary from response and return clean user response - handles partial markers"""
|
|
152
|
-
# # Keep original markers for backward compatibility
|
|
153
|
-
# summary_start = "[SUMMARY_START]"
|
|
154
|
-
# summary_end = "[SUMMARY_END]"
|
|
155
|
-
|
|
156
|
-
# # NEW: Normalize the response first (improvement from new version)
|
|
157
|
-
# normalized = full_response.replace('\n', ' ').replace('\r', ' ').strip()
|
|
158
|
-
|
|
159
|
-
# # Check if we have complete markers - KEEP original logic but use normalized
|
|
160
|
-
# if summary_start in normalized and summary_end in normalized:
|
|
161
|
-
# start_idx = normalized.find(summary_start) + len(summary_start)
|
|
162
|
-
# end_idx = normalized.find(summary_end)
|
|
163
|
-
|
|
164
|
-
# summary = normalized[start_idx:end_idx].strip()
|
|
165
|
-
# user_response = normalized[:normalized.find(summary_start)].strip()
|
|
166
|
-
|
|
167
|
-
# logger.info(f"✅ SUCCESS: Summary extracted and separated from user response")
|
|
168
|
-
# logger.info(f"User response length: {len(user_response)} chars")
|
|
169
|
-
# logger.info(f"Summary extracted: '{summary}'")
|
|
170
|
-
|
|
171
|
-
# # NEW: Add validation from improved version
|
|
172
|
-
# if not summary or len(summary) < 5:
|
|
173
|
-
# logger.info("❌ Summary too short, returning full response")
|
|
174
|
-
# return full_response.strip(), None
|
|
175
|
-
|
|
176
|
-
# return user_response, summary
|
|
177
|
-
|
|
178
162
|
def _extract_summary_and_response(self, full_response: str) -> tuple[str, Optional[str]]:
|
|
179
|
-
"""Extract summary from response and return clean user response."""
|
|
180
|
-
|
|
163
|
+
"""Extract summary from response and return clean user response safely."""
|
|
181
164
|
if not full_response:
|
|
182
165
|
return "", None
|
|
183
166
|
|
|
@@ -187,7 +170,7 @@ class RAGAgent:
|
|
|
187
170
|
original = full_response
|
|
188
171
|
normalized = original.replace('\r\n', '\n').replace('\r', '\n')
|
|
189
172
|
|
|
190
|
-
# Case 1:
|
|
173
|
+
# Case 1: Full summary markers
|
|
191
174
|
if summary_start in normalized and summary_end in normalized:
|
|
192
175
|
start_idx = normalized.find(summary_start) + len(summary_start)
|
|
193
176
|
end_idx = normalized.find(summary_end)
|
|
@@ -196,13 +179,12 @@ class RAGAgent:
|
|
|
196
179
|
user_response = original.split(summary_start)[0].strip()
|
|
197
180
|
|
|
198
181
|
if not summary or len(summary) < 5:
|
|
199
|
-
logger.info("Summary too short or invalid")
|
|
182
|
+
logger.info("Summary too short or invalid – returning full response as user response")
|
|
200
183
|
return original.strip(), None
|
|
201
184
|
|
|
202
|
-
logger.info("Summary extracted successfully")
|
|
203
185
|
return user_response, summary
|
|
204
186
|
|
|
205
|
-
# Case 2: Partial
|
|
187
|
+
# Case 2: Partial summary start only
|
|
206
188
|
if summary_start in normalized:
|
|
207
189
|
start_idx = normalized.find(summary_start) + len(summary_start)
|
|
208
190
|
potential = normalized[start_idx:start_idx + 200].strip()
|
|
@@ -218,40 +200,15 @@ class RAGAgent:
|
|
|
218
200
|
user_response = original.split(summary_start)[0].strip()
|
|
219
201
|
|
|
220
202
|
if cleaned_summary and len(cleaned_summary) >= 10:
|
|
221
|
-
logger.info("Partial summary extracted")
|
|
203
|
+
logger.info("Partial summary extracted safely")
|
|
222
204
|
return user_response, cleaned_summary
|
|
223
205
|
|
|
224
|
-
logger.info("Partial summary invalid")
|
|
206
|
+
logger.info("Partial summary invalid or too short")
|
|
225
207
|
return original.strip(), None
|
|
226
208
|
|
|
227
|
-
# Case 3: No markers
|
|
228
|
-
logger.info("No summary markers found")
|
|
229
|
-
# No markers found - KEEP original but with normalization
|
|
230
|
-
# logger.info(" No summary markers found, returning full response")
|
|
231
|
-
logger.info(f"Full response length: {len(original)} chars")
|
|
209
|
+
# Case 3: No markers
|
|
232
210
|
return original.strip(), None
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
# return full_response.strip(), None # NEW: strip for consistency
|
|
237
211
|
|
|
238
|
-
# def _add_conversation_summary(self, new_summary: str):
|
|
239
|
-
# """Add a new discrete conversation summary"""
|
|
240
|
-
# if not new_summary or new_summary.lower() == "none":
|
|
241
|
-
# logger.info("🔄 No summary to add (empty or 'none')")
|
|
242
|
-
# return
|
|
243
|
-
|
|
244
|
-
# # Add as a new discrete summary
|
|
245
|
-
# self.conversation_summaries.append(new_summary)
|
|
246
|
-
# logger.info(f"📝 ADDED Summary #{len(self.conversation_summaries)}: '{new_summary}'")
|
|
247
|
-
|
|
248
|
-
# # Keep only recent summaries (last 7)
|
|
249
|
-
# if len(self.conversation_summaries) > 7:
|
|
250
|
-
# self.conversation_summaries = self.conversation_summaries[-7:]
|
|
251
|
-
# removed = self.conversation_summaries.pop(0)
|
|
252
|
-
# logger.info(f"🗑️ DROPPED Oldest summary: '{removed}'")
|
|
253
|
-
# logger.info(f"📊 Summary count maintained at {len(self.conversation_summaries)}")
|
|
254
|
-
# logger.info(f"Added conversation summary #{len(self.conversation_summaries)}: {new_summary}")
|
|
255
212
|
def _add_conversation_summary(self, new_summary: str):
|
|
256
213
|
"""Add a new discrete conversation summary"""
|
|
257
214
|
if not new_summary or new_summary.lower() == "none":
|
|
@@ -339,49 +296,75 @@ class RAGAgent:
|
|
|
339
296
|
logger.error(f" ALL STREAMING STRATEGIES FAILED: {str(e)}")
|
|
340
297
|
yield f"Error: {str(e)}"
|
|
341
298
|
|
|
299
|
+
# def _stream_with_summary_protection(self, question: str, top_k: int) -> Generator[str, None, None]:
|
|
300
|
+
# """Streaming-safe: never leak summary markers mid-stream."""
|
|
301
|
+
# relevant_docs = self.retriever.retrieve(question, top_k=top_k)
|
|
302
|
+
# context = self._build_context(relevant_docs)
|
|
303
|
+
# messages = self._build_messages(question, context)
|
|
304
|
+
|
|
305
|
+
# buffer = ""
|
|
306
|
+
# summary_buffer = ""
|
|
307
|
+
# in_summary = False
|
|
308
|
+
|
|
309
|
+
# for chunk in self.llm.predict_stream(messages):
|
|
310
|
+
# buffer += chunk
|
|
311
|
+
|
|
312
|
+
# # Detect summary start
|
|
313
|
+
# if '[SUMMARY_START]' in buffer:
|
|
314
|
+
# in_summary = True
|
|
315
|
+
# clean_part = buffer.split('[SUMMARY_START]')[0].strip()
|
|
316
|
+
# if clean_part:
|
|
317
|
+
# yield clean_part
|
|
318
|
+
# summary_buffer = buffer.split('[SUMMARY_START]')[1]
|
|
319
|
+
# buffer = ""
|
|
320
|
+
# continue
|
|
321
|
+
|
|
322
|
+
# if in_summary:
|
|
323
|
+
# summary_buffer += chunk
|
|
324
|
+
# if '[SUMMARY_END]' in summary_buffer:
|
|
325
|
+
# in_summary = False
|
|
326
|
+
# summary_content = summary_buffer.split('[SUMMARY_END]')[0].strip()
|
|
327
|
+
# if summary_content:
|
|
328
|
+
# self._add_conversation_summary(summary_content)
|
|
329
|
+
# logger.info(f"Summary extracted in stream: '{summary_content}'")
|
|
330
|
+
# buffer = summary_buffer.split('[SUMMARY_END]')[1] # remainder
|
|
331
|
+
# summary_buffer = ""
|
|
332
|
+
# if buffer:
|
|
333
|
+
# yield buffer.strip()
|
|
334
|
+
# buffer = ""
|
|
335
|
+
# continue
|
|
336
|
+
|
|
337
|
+
# if not in_summary:
|
|
338
|
+
# yield chunk
|
|
339
|
+
|
|
340
|
+
# # Flush leftover buffer
|
|
341
|
+
# if buffer.strip() and not in_summary:
|
|
342
|
+
# yield buffer.strip()
|
|
343
|
+
# elif in_summary:
|
|
344
|
+
# logger.info("Leftover buffer contains partial summary – discarded to prevent marker leak")
|
|
345
|
+
|
|
342
346
|
def _stream_with_summary_protection(self, question: str, top_k: int) -> Generator[str, None, None]:
|
|
343
|
-
"""
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
# Don't break here - let the method complete naturally
|
|
364
|
-
continue
|
|
365
|
-
|
|
366
|
-
if not summary_started:
|
|
367
|
-
yield chunk
|
|
368
|
-
|
|
369
|
-
# Process the complete response
|
|
370
|
-
self._process_complete_response(buffer)
|
|
371
|
-
|
|
372
|
-
except Exception as e:
|
|
373
|
-
logger.error(f"Streaming error: {e}")
|
|
374
|
-
raise # Re-raise to trigger fallback
|
|
347
|
+
"""Token-only streaming. Never reconstruct or re-emit content."""
|
|
348
|
+
relevant_docs = self.retriever.retrieve(question, top_k=top_k)
|
|
349
|
+
context = self._build_context(relevant_docs)
|
|
350
|
+
messages = self._build_messages(question, context)
|
|
351
|
+
|
|
352
|
+
buffer = ""
|
|
353
|
+
|
|
354
|
+
for chunk in self.llm.predict_stream(messages):
|
|
355
|
+
buffer += chunk
|
|
356
|
+
|
|
357
|
+
# The moment summary markers appear, stop streaming to client
|
|
358
|
+
if '[SUMMARY_START]' in buffer or 'SUMMARY_' in buffer:
|
|
359
|
+
logger.info("Summary marker detected — stopping client stream")
|
|
360
|
+
break
|
|
361
|
+
|
|
362
|
+
# Yield ONLY raw tokens
|
|
363
|
+
yield chunk
|
|
364
|
+
|
|
365
|
+
# After streaming finishes, process full response exactly once
|
|
366
|
+
self._process_complete_response(buffer)
|
|
375
367
|
|
|
376
|
-
# def _process_complete_response(self, full_response: str):
|
|
377
|
-
# """Process complete response and extract summary"""
|
|
378
|
-
# user_response, conversation_summary = self._extract_summary_and_response(full_response)
|
|
379
|
-
|
|
380
|
-
# if conversation_summary:
|
|
381
|
-
# logger.info(f" Summary extracted: '{conversation_summary}'")
|
|
382
|
-
# self._add_conversation_summary(conversation_summary)
|
|
383
|
-
|
|
384
|
-
# self.add_message("assistant", user_response)
|
|
385
368
|
def _process_complete_response(self, full_response: str):
|
|
386
369
|
"""Process complete response and extract summary"""
|
|
387
370
|
user_response, conversation_summary = self._extract_summary_and_response(full_response)
|
|
@@ -398,31 +381,27 @@ class RAGAgent:
|
|
|
398
381
|
else:
|
|
399
382
|
logger.info("Skipped adding duplicate assistant message in _process_complete_response.")
|
|
400
383
|
|
|
401
|
-
|
|
402
384
|
def _simulated_streaming(self, question: str, top_k: int) -> Generator[str, None, None]:
|
|
403
|
-
"""Simulated streaming that guarantees no summary leakage"""
|
|
385
|
+
"""Simulated streaming that guarantees no summary leakage."""
|
|
404
386
|
relevant_docs = self.retriever.retrieve(question, top_k=top_k)
|
|
405
387
|
context = self._build_context(relevant_docs)
|
|
406
388
|
messages = self._build_messages(question, context)
|
|
407
|
-
|
|
408
|
-
# Get complete response
|
|
389
|
+
|
|
409
390
|
complete_response = self.llm.predict(messages)
|
|
410
|
-
|
|
411
|
-
# Extract clean response
|
|
412
391
|
user_response, conversation_summary = self._extract_summary_and_response(complete_response)
|
|
413
|
-
|
|
392
|
+
|
|
414
393
|
if conversation_summary:
|
|
415
|
-
logger.info(f" Summary extracted: '{conversation_summary}'")
|
|
416
394
|
self._add_conversation_summary(conversation_summary)
|
|
417
|
-
|
|
395
|
+
|
|
418
396
|
self.add_message("assistant", user_response)
|
|
419
|
-
|
|
420
|
-
# Simulate streaming
|
|
421
|
-
chunk_size = 2
|
|
422
|
-
for i in range(0, len(user_response),
|
|
423
|
-
yield user_response[i:i+chunk_size]
|
|
397
|
+
|
|
398
|
+
# Simulate streaming chunks
|
|
399
|
+
chunk_size = 2
|
|
400
|
+
for i in range(0, len(user_response), chunk_size):
|
|
401
|
+
yield user_response[i:i + chunk_size]
|
|
424
402
|
import time
|
|
425
|
-
time.sleep(0.02)
|
|
403
|
+
time.sleep(0.02)
|
|
404
|
+
|
|
426
405
|
|
|
427
406
|
def _extract_clean_content(self, buffer: str) -> str:
|
|
428
407
|
"""Extract clean content before any summary markers"""
|
|
@@ -109,19 +109,37 @@ def create_app(rag_agent: RAGAgent, server_config: Optional[ServerConfig] = None
|
|
|
109
109
|
|
|
110
110
|
agent = sessions[session_id]
|
|
111
111
|
|
|
112
|
+
# async def generate():
|
|
113
|
+
# full_response = ""
|
|
114
|
+
# try:
|
|
115
|
+
# # Use agent's query_stream which handles context and summarization
|
|
116
|
+
# for chunk in agent.query_stream(query, top_k=5):
|
|
117
|
+
# full_response += chunk
|
|
118
|
+
# yield f"data: {json.dumps({'chunk': chunk, 'done': False})}\n\n"
|
|
119
|
+
|
|
120
|
+
# yield f"data: {json.dumps({'chunk': '', 'done': True})}\n\n"
|
|
121
|
+
|
|
122
|
+
# except Exception as e:
|
|
123
|
+
# logger.error(f"Streaming error: {str(e)}")
|
|
124
|
+
# yield f"data: {json.dumps({'error': str(e), 'done': True})}\n\n"
|
|
125
|
+
|
|
112
126
|
async def generate():
|
|
113
|
-
full_response = ""
|
|
114
127
|
try:
|
|
115
|
-
#
|
|
116
|
-
for
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
128
|
+
# Stream tokens ONLY
|
|
129
|
+
for token in agent.query_stream(query, top_k=5):
|
|
130
|
+
if not token:
|
|
131
|
+
continue
|
|
132
|
+
|
|
133
|
+
yield f"data: {json.dumps({'chunk': token, 'done': False})}\n\n"
|
|
134
|
+
|
|
135
|
+
# Signal completion (no payload)
|
|
120
136
|
yield f"data: {json.dumps({'chunk': '', 'done': True})}\n\n"
|
|
121
|
-
|
|
137
|
+
|
|
122
138
|
except Exception as e:
|
|
123
139
|
logger.error(f"Streaming error: {str(e)}")
|
|
124
140
|
yield f"data: {json.dumps({'error': str(e), 'done': True})}\n\n"
|
|
141
|
+
|
|
142
|
+
|
|
125
143
|
|
|
126
144
|
return StreamingResponse(
|
|
127
145
|
generate(),
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kssrag
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: A flexible Retrieval-Augmented Generation framework by Ksschkw
|
|
5
5
|
Home-page: https://github.com/Ksschkw/kssrag
|
|
6
6
|
Author: Ksschkw
|
|
@@ -85,7 +85,7 @@ Dynamic: summary
|
|
|
85
85
|
|
|
86
86
|

|
|
87
87
|

|
|
88
|
-

|
|
89
89
|

|
|
90
90
|

|
|
91
91
|
|
|
@@ -809,6 +809,7 @@ kssrag/
|
|
|
809
809
|
- [**Full Documentation**](https://github.com/Ksschkw/kssrag/docs)
|
|
810
810
|
- [**API Reference**](https://github.com/Ksschkw/kssrag/docs/api_reference.md)
|
|
811
811
|
- [**Examples Directory**](https://github.com/Ksschkw/kssrag/examples)
|
|
812
|
+
- [**PyPi**](https://pypi.org/project/kssrag/0.2.4/)
|
|
812
813
|
|
|
813
814
|
### Community
|
|
814
815
|
- [**GitHub Issues**](https://github.com/Ksschkw/kssrag/issues) - Bug reports and feature requests
|
|
@@ -6,7 +6,7 @@ long_description = (here / "README.md").read_text(encoding="utf-8")
|
|
|
6
6
|
|
|
7
7
|
setup(
|
|
8
8
|
name="kssrag",
|
|
9
|
-
version="0.2.
|
|
9
|
+
version="0.2.4",
|
|
10
10
|
description="A flexible Retrieval-Augmented Generation framework by Ksschkw",
|
|
11
11
|
long_description=long_description,
|
|
12
12
|
long_description_content_type="text/markdown",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|