khoj 1.42.1.dev10__py3-none-any.whl → 1.42.2.dev16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. khoj/configure.py +2 -0
  2. khoj/database/adapters/__init__.py +9 -7
  3. khoj/database/models/__init__.py +9 -9
  4. khoj/interface/compiled/404/index.html +2 -2
  5. khoj/interface/compiled/_next/static/chunks/7127-79a3af5138960272.js +1 -0
  6. khoj/interface/compiled/_next/static/chunks/{5138-2cce449fd2454abf.js → 7211-7fedd2ee3655239c.js} +1 -1
  7. khoj/interface/compiled/_next/static/chunks/app/automations/page-ef89ac958e78aa81.js +1 -0
  8. khoj/interface/compiled/_next/static/chunks/app/chat/page-db0fbea54ccea62f.js +1 -0
  9. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-9a167dc9b5fcd464.js → page-da90c78180a86040.js} +1 -1
  10. khoj/interface/compiled/_next/static/chunks/{webpack-964e8ed3380daff1.js → webpack-0f15e6b51732b337.js} +1 -1
  11. khoj/interface/compiled/_next/static/css/{9c223d337a984468.css → 7017ee76c2f2cd87.css} +1 -1
  12. khoj/interface/compiled/_next/static/css/9a460202d29476e5.css +1 -0
  13. khoj/interface/compiled/agents/index.html +2 -2
  14. khoj/interface/compiled/agents/index.txt +1 -1
  15. khoj/interface/compiled/automations/index.html +2 -2
  16. khoj/interface/compiled/automations/index.txt +2 -2
  17. khoj/interface/compiled/chat/index.html +2 -2
  18. khoj/interface/compiled/chat/index.txt +2 -2
  19. khoj/interface/compiled/index.html +2 -2
  20. khoj/interface/compiled/index.txt +1 -1
  21. khoj/interface/compiled/search/index.html +2 -2
  22. khoj/interface/compiled/search/index.txt +1 -1
  23. khoj/interface/compiled/settings/index.html +2 -2
  24. khoj/interface/compiled/settings/index.txt +1 -1
  25. khoj/interface/compiled/share/chat/index.html +2 -2
  26. khoj/interface/compiled/share/chat/index.txt +2 -2
  27. khoj/processor/conversation/anthropic/anthropic_chat.py +19 -134
  28. khoj/processor/conversation/anthropic/utils.py +1 -1
  29. khoj/processor/conversation/google/gemini_chat.py +20 -141
  30. khoj/processor/conversation/offline/chat_model.py +23 -153
  31. khoj/processor/conversation/openai/gpt.py +14 -128
  32. khoj/processor/conversation/prompts.py +2 -63
  33. khoj/processor/conversation/utils.py +94 -89
  34. khoj/processor/image/generate.py +16 -11
  35. khoj/processor/operator/__init__.py +2 -3
  36. khoj/processor/operator/operator_agent_binary.py +11 -11
  37. khoj/processor/tools/online_search.py +9 -3
  38. khoj/processor/tools/run_code.py +5 -5
  39. khoj/routers/api.py +5 -527
  40. khoj/routers/api_automation.py +243 -0
  41. khoj/routers/api_chat.py +48 -129
  42. khoj/routers/helpers.py +371 -121
  43. khoj/routers/research.py +11 -43
  44. khoj/utils/helpers.py +0 -6
  45. {khoj-1.42.1.dev10.dist-info → khoj-1.42.2.dev16.dist-info}/METADATA +1 -1
  46. {khoj-1.42.1.dev10.dist-info → khoj-1.42.2.dev16.dist-info}/RECORD +51 -50
  47. khoj/interface/compiled/_next/static/chunks/7127-d3199617463d45f0.js +0 -1
  48. khoj/interface/compiled/_next/static/chunks/app/automations/page-465741d9149dfd48.js +0 -1
  49. khoj/interface/compiled/_next/static/chunks/app/chat/page-898079bcea5376f4.js +0 -1
  50. khoj/interface/compiled/_next/static/css/fca983d49c3dd1a3.css +0 -1
  51. /khoj/interface/compiled/_next/static/{2niR8lV9_OpGs1vdb2yMp → OTsOjbrtuaYMukpuJS4sy}/_buildManifest.js +0 -0
  52. /khoj/interface/compiled/_next/static/{2niR8lV9_OpGs1vdb2yMp → OTsOjbrtuaYMukpuJS4sy}/_ssgManifest.js +0 -0
  53. {khoj-1.42.1.dev10.dist-info → khoj-1.42.2.dev16.dist-info}/WHEEL +0 -0
  54. {khoj-1.42.1.dev10.dist-info → khoj-1.42.2.dev16.dist-info}/entry_points.txt +0 -0
  55. {khoj-1.42.1.dev10.dist-info → khoj-1.42.2.dev16.dist-info}/licenses/LICENSE +0 -0
@@ -1,13 +1,8 @@
1
- import asyncio
2
1
  import logging
3
- from datetime import datetime, timedelta
2
+ from datetime import datetime
4
3
  from typing import AsyncGenerator, Dict, List, Optional
5
4
 
6
- import pyjson5
7
- from langchain_core.messages.chat import ChatMessage
8
- from pydantic import BaseModel, Field
9
-
10
- from khoj.database.models import Agent, ChatModel, KhojUser
5
+ from khoj.database.models import Agent, ChatMessageModel, ChatModel
11
6
  from khoj.processor.conversation import prompts
12
7
  from khoj.processor.conversation.google.utils import (
13
8
  gemini_chat_completion_with_backoff,
@@ -15,113 +10,17 @@ from khoj.processor.conversation.google.utils import (
15
10
  )
16
11
  from khoj.processor.conversation.utils import (
17
12
  OperatorRun,
18
- clean_json,
19
- construct_question_history,
20
- construct_structured_message,
13
+ ResponseWithThought,
21
14
  generate_chatml_messages_with_context,
22
15
  messages_to_print,
23
16
  )
24
- from khoj.utils.helpers import (
25
- ConversationCommand,
26
- is_none_or_empty,
27
- truncate_code_context,
28
- )
17
+ from khoj.utils.helpers import is_none_or_empty, truncate_code_context
29
18
  from khoj.utils.rawconfig import FileAttachment, LocationData
30
19
  from khoj.utils.yaml import yaml_dump
31
20
 
32
21
  logger = logging.getLogger(__name__)
33
22
 
34
23
 
35
- def extract_questions_gemini(
36
- text,
37
- model: Optional[str] = "gemini-2.0-flash",
38
- conversation_log={},
39
- api_key=None,
40
- api_base_url=None,
41
- max_tokens=None,
42
- location_data: LocationData = None,
43
- user: KhojUser = None,
44
- query_images: Optional[list[str]] = None,
45
- vision_enabled: bool = False,
46
- personality_context: Optional[str] = None,
47
- query_files: str = None,
48
- tracer: dict = {},
49
- ):
50
- """
51
- Infer search queries to retrieve relevant notes to answer user query
52
- """
53
- # Extract Past User Message and Inferred Questions from Conversation Log
54
- location = f"{location_data}" if location_data else "Unknown"
55
- username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else ""
56
-
57
- # Extract Past User Message and Inferred Questions from Conversation Log
58
- chat_history = construct_question_history(conversation_log, query_prefix="User", agent_name="Assistant")
59
-
60
- # Get dates relative to today for prompt creation
61
- today = datetime.today()
62
- current_new_year = today.replace(month=1, day=1)
63
- last_new_year = current_new_year.replace(year=today.year - 1)
64
-
65
- system_prompt = prompts.extract_questions_anthropic_system_prompt.format(
66
- current_date=today.strftime("%Y-%m-%d"),
67
- day_of_week=today.strftime("%A"),
68
- current_month=today.strftime("%Y-%m"),
69
- last_new_year=last_new_year.strftime("%Y"),
70
- last_new_year_date=last_new_year.strftime("%Y-%m-%d"),
71
- current_new_year_date=current_new_year.strftime("%Y-%m-%d"),
72
- yesterday_date=(today - timedelta(days=1)).strftime("%Y-%m-%d"),
73
- location=location,
74
- username=username,
75
- personality_context=personality_context,
76
- )
77
-
78
- prompt = prompts.extract_questions_anthropic_user_message.format(
79
- chat_history=chat_history,
80
- text=text,
81
- )
82
-
83
- prompt = construct_structured_message(
84
- message=prompt,
85
- images=query_images,
86
- model_type=ChatModel.ModelType.GOOGLE,
87
- vision_enabled=vision_enabled,
88
- attached_file_context=query_files,
89
- )
90
-
91
- messages = []
92
-
93
- messages.append(ChatMessage(content=prompt, role="user"))
94
- messages.append(ChatMessage(content=system_prompt, role="system"))
95
-
96
- class DocumentQueries(BaseModel):
97
- queries: List[str] = Field(..., min_items=1)
98
-
99
- response = gemini_send_message_to_model(
100
- messages,
101
- api_key,
102
- model,
103
- api_base_url=api_base_url,
104
- response_type="json_object",
105
- response_schema=DocumentQueries,
106
- tracer=tracer,
107
- )
108
-
109
- # Extract, Clean Message from Gemini's Response
110
- try:
111
- response = clean_json(response)
112
- response = pyjson5.loads(response)
113
- response = [q.strip() for q in response["queries"] if q.strip()]
114
- if not isinstance(response, list) or not response:
115
- logger.error(f"Invalid response for constructing subqueries: {response}")
116
- return [text]
117
- return response
118
- except:
119
- logger.warning(f"Gemini returned invalid JSON. Falling back to using user message as search query.\n{response}")
120
- questions = [text]
121
- logger.debug(f"Extracted Questions by Gemini: {questions}")
122
- return questions
123
-
124
-
125
24
  def gemini_send_message_to_model(
126
25
  messages,
127
26
  api_key,
@@ -158,32 +57,33 @@ def gemini_send_message_to_model(
158
57
 
159
58
 
160
59
  async def converse_gemini(
60
+ # Query
161
61
  user_query: str,
62
+ # Context
162
63
  references: list[dict],
163
64
  online_results: Optional[Dict[str, Dict]] = None,
164
65
  code_results: Optional[Dict[str, Dict]] = None,
165
66
  operator_results: Optional[List[OperatorRun]] = None,
166
- conversation_log={},
67
+ query_images: Optional[list[str]] = None,
68
+ query_files: str = None,
69
+ generated_files: List[FileAttachment] = None,
70
+ generated_asset_results: Dict[str, Dict] = {},
71
+ program_execution_context: List[str] = None,
72
+ location_data: LocationData = None,
73
+ user_name: str = None,
74
+ chat_history: List[ChatMessageModel] = [],
75
+ # Model
167
76
  model: Optional[str] = "gemini-2.0-flash",
168
77
  api_key: Optional[str] = None,
169
78
  api_base_url: Optional[str] = None,
170
79
  temperature: float = 1.0,
171
- completion_func=None,
172
- conversation_commands=[ConversationCommand.Default],
173
80
  max_prompt_size=None,
174
81
  tokenizer_name=None,
175
- location_data: LocationData = None,
176
- user_name: str = None,
177
82
  agent: Agent = None,
178
- query_images: Optional[list[str]] = None,
179
83
  vision_available: bool = False,
180
- query_files: str = None,
181
- generated_files: List[FileAttachment] = None,
182
- generated_asset_results: Dict[str, Dict] = {},
183
- program_execution_context: List[str] = None,
184
84
  deepthought: Optional[bool] = False,
185
85
  tracer={},
186
- ) -> AsyncGenerator[str, None]:
86
+ ) -> AsyncGenerator[ResponseWithThought, None]:
187
87
  """
188
88
  Converse with user using Google's Gemini
189
89
  """
@@ -212,30 +112,16 @@ async def converse_gemini(
212
112
  user_name_prompt = prompts.user_name.format(name=user_name)
213
113
  system_prompt = f"{system_prompt}\n{user_name_prompt}"
214
114
 
215
- # Get Conversation Primer appropriate to Conversation Type
216
- if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
217
- response = prompts.no_notes_found.format()
218
- if completion_func:
219
- asyncio.create_task(completion_func(chat_response=response))
220
- yield response
221
- return
222
- elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
223
- response = prompts.no_online_results_found.format()
224
- if completion_func:
225
- asyncio.create_task(completion_func(chat_response=response))
226
- yield response
227
- return
228
-
229
115
  context_message = ""
230
116
  if not is_none_or_empty(references):
231
117
  context_message = f"{prompts.notes_conversation.format(query=user_query, references=yaml_dump(references))}\n\n"
232
- if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
118
+ if not is_none_or_empty(online_results):
233
119
  context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
234
- if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results):
120
+ if not is_none_or_empty(code_results):
235
121
  context_message += (
236
122
  f"{prompts.code_executed_context.format(code_results=truncate_code_context(code_results))}\n\n"
237
123
  )
238
- if ConversationCommand.Operator in conversation_commands and not is_none_or_empty(operator_results):
124
+ if not is_none_or_empty(operator_results):
239
125
  operator_content = [
240
126
  {"query": oc.query, "response": oc.response, "webpages": oc.webpages} for oc in operator_results
241
127
  ]
@@ -248,7 +134,7 @@ async def converse_gemini(
248
134
  messages = generate_chatml_messages_with_context(
249
135
  user_query,
250
136
  context_message=context_message,
251
- conversation_log=conversation_log,
137
+ chat_history=chat_history,
252
138
  model_name=model,
253
139
  max_prompt_size=max_prompt_size,
254
140
  tokenizer_name=tokenizer_name,
@@ -264,7 +150,6 @@ async def converse_gemini(
264
150
  logger.debug(f"Conversation Context for Gemini: {messages_to_print(messages)}")
265
151
 
266
152
  # Get Response from Google AI
267
- full_response = ""
268
153
  async for chunk in gemini_chat_completion_with_backoff(
269
154
  messages=messages,
270
155
  model_name=model,
@@ -275,10 +160,4 @@ async def converse_gemini(
275
160
  deepthought=deepthought,
276
161
  tracer=tracer,
277
162
  ):
278
- if chunk.response:
279
- full_response += chunk.response
280
163
  yield chunk
281
-
282
- # Call completion_func once finish streaming and we have the full response
283
- if completion_func:
284
- asyncio.create_task(completion_func(chat_response=full_response))
@@ -1,29 +1,25 @@
1
1
  import asyncio
2
2
  import logging
3
3
  import os
4
- from datetime import datetime, timedelta
4
+ from datetime import datetime
5
5
  from threading import Thread
6
6
  from time import perf_counter
7
- from typing import Any, AsyncGenerator, Dict, List, Optional, Union
7
+ from typing import Any, AsyncGenerator, Dict, List, Union
8
8
 
9
- import pyjson5
10
9
  from langchain_core.messages.chat import ChatMessage
11
10
  from llama_cpp import Llama
12
11
 
13
- from khoj.database.models import Agent, ChatModel, KhojUser
12
+ from khoj.database.models import Agent, ChatMessageModel, ChatModel
14
13
  from khoj.processor.conversation import prompts
15
14
  from khoj.processor.conversation.offline.utils import download_model
16
15
  from khoj.processor.conversation.utils import (
17
- clean_json,
16
+ ResponseWithThought,
18
17
  commit_conversation_trace,
19
- construct_question_history,
20
18
  generate_chatml_messages_with_context,
21
19
  messages_to_print,
22
20
  )
23
21
  from khoj.utils import state
24
- from khoj.utils.constants import empty_escape_sequences
25
22
  from khoj.utils.helpers import (
26
- ConversationCommand,
27
23
  is_none_or_empty,
28
24
  is_promptrace_enabled,
29
25
  truncate_code_context,
@@ -34,135 +30,28 @@ from khoj.utils.yaml import yaml_dump
34
30
  logger = logging.getLogger(__name__)
35
31
 
36
32
 
37
- def extract_questions_offline(
38
- text: str,
39
- model: str = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
40
- loaded_model: Union[Any, None] = None,
41
- conversation_log={},
42
- use_history: bool = True,
43
- should_extract_questions: bool = True,
44
- location_data: LocationData = None,
45
- user: KhojUser = None,
46
- max_prompt_size: int = None,
47
- temperature: float = 0.7,
48
- personality_context: Optional[str] = None,
49
- query_files: str = None,
50
- tracer: dict = {},
51
- ) -> List[str]:
52
- """
53
- Infer search queries to retrieve relevant notes to answer user query
54
- """
55
- all_questions = text.split("? ")
56
- all_questions = [q + "?" for q in all_questions[:-1]] + [all_questions[-1]]
57
-
58
- if not should_extract_questions:
59
- return all_questions
60
-
61
- assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
62
- offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
63
-
64
- location = f"{location_data}" if location_data else "Unknown"
65
- username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else ""
66
-
67
- # Extract Past User Message and Inferred Questions from Conversation Log
68
- chat_history = construct_question_history(conversation_log, include_query=False) if use_history else ""
69
-
70
- # Get dates relative to today for prompt creation
71
- today = datetime.today()
72
- yesterday = (today - timedelta(days=1)).strftime("%Y-%m-%d")
73
- last_year = today.year - 1
74
- example_questions = prompts.extract_questions_offline.format(
75
- query=text,
76
- chat_history=chat_history,
77
- current_date=today.strftime("%Y-%m-%d"),
78
- day_of_week=today.strftime("%A"),
79
- current_month=today.strftime("%Y-%m"),
80
- yesterday_date=yesterday,
81
- last_year=last_year,
82
- this_year=today.year,
83
- location=location,
84
- username=username,
85
- personality_context=personality_context,
86
- )
87
-
88
- messages = generate_chatml_messages_with_context(
89
- example_questions,
90
- model_name=model,
91
- loaded_model=offline_chat_model,
92
- max_prompt_size=max_prompt_size,
93
- model_type=ChatModel.ModelType.OFFLINE,
94
- query_files=query_files,
95
- )
96
-
97
- state.chat_lock.acquire()
98
- try:
99
- response = send_message_to_model_offline(
100
- messages,
101
- loaded_model=offline_chat_model,
102
- model_name=model,
103
- max_prompt_size=max_prompt_size,
104
- temperature=temperature,
105
- response_type="json_object",
106
- tracer=tracer,
107
- )
108
- finally:
109
- state.chat_lock.release()
110
-
111
- # Extract and clean the chat model's response
112
- try:
113
- response = clean_json(empty_escape_sequences)
114
- response = pyjson5.loads(response)
115
- questions = [q.strip() for q in response["queries"] if q.strip()]
116
- questions = filter_questions(questions)
117
- except:
118
- logger.warning(f"Llama returned invalid JSON. Falling back to using user message as search query.\n{response}")
119
- return all_questions
120
- logger.debug(f"Questions extracted by {model}: {questions}")
121
- return questions
122
-
123
-
124
- def filter_questions(questions: List[str]):
125
- # Skip questions that seem to be apologizing for not being able to answer the question
126
- hint_words = [
127
- "sorry",
128
- "apologize",
129
- "unable",
130
- "can't",
131
- "cannot",
132
- "don't know",
133
- "don't understand",
134
- "do not know",
135
- "do not understand",
136
- ]
137
- filtered_questions = set()
138
- for q in questions:
139
- if not any([word in q.lower() for word in hint_words]) and not is_none_or_empty(q):
140
- filtered_questions.add(q)
141
-
142
- return list(filtered_questions)
143
-
144
-
145
33
  async def converse_offline(
34
+ # Query
146
35
  user_query: str,
36
+ # Context
147
37
  references: list[dict] = [],
148
38
  online_results={},
149
39
  code_results={},
150
- conversation_log={},
40
+ query_files: str = None,
41
+ generated_files: List[FileAttachment] = None,
42
+ additional_context: List[str] = None,
43
+ generated_asset_results: Dict[str, Dict] = {},
44
+ location_data: LocationData = None,
45
+ user_name: str = None,
46
+ chat_history: list[ChatMessageModel] = [],
47
+ # Model
151
48
  model_name: str = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
152
49
  loaded_model: Union[Any, None] = None,
153
- completion_func=None,
154
- conversation_commands=[ConversationCommand.Default],
155
50
  max_prompt_size=None,
156
51
  tokenizer_name=None,
157
- location_data: LocationData = None,
158
- user_name: str = None,
159
52
  agent: Agent = None,
160
- query_files: str = None,
161
- generated_files: List[FileAttachment] = None,
162
- additional_context: List[str] = None,
163
- generated_asset_results: Dict[str, Dict] = {},
164
53
  tracer: dict = {},
165
- ) -> AsyncGenerator[str, None]:
54
+ ) -> AsyncGenerator[ResponseWithThought, None]:
166
55
  """
167
56
  Converse with user using Llama (Async Version)
168
57
  """
@@ -194,30 +83,17 @@ async def converse_offline(
194
83
  system_prompt = f"{system_prompt}\n{user_name_prompt}"
195
84
 
196
85
  # Get Conversation Primer appropriate to Conversation Type
197
- if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
198
- response = prompts.no_notes_found.format()
199
- if completion_func:
200
- asyncio.create_task(completion_func(chat_response=response))
201
- yield response
202
- return
203
- elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
204
- response = prompts.no_online_results_found.format()
205
- if completion_func:
206
- asyncio.create_task(completion_func(chat_response=response))
207
- yield response
208
- return
209
-
210
86
  context_message = ""
211
87
  if not is_none_or_empty(references):
212
88
  context_message = f"{prompts.notes_conversation_offline.format(references=yaml_dump(references))}\n\n"
213
- if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
89
+ if not is_none_or_empty(online_results):
214
90
  simplified_online_results = online_results.copy()
215
91
  for result in online_results:
216
92
  if online_results[result].get("webpages"):
217
93
  simplified_online_results[result] = online_results[result]["webpages"]
218
94
 
219
95
  context_message += f"{prompts.online_search_conversation_offline.format(online_results=yaml_dump(simplified_online_results))}\n\n"
220
- if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results):
96
+ if not is_none_or_empty(code_results):
221
97
  context_message += (
222
98
  f"{prompts.code_executed_context.format(code_results=truncate_code_context(code_results))}\n\n"
223
99
  )
@@ -227,7 +103,7 @@ async def converse_offline(
227
103
  messages = generate_chatml_messages_with_context(
228
104
  user_query,
229
105
  system_prompt,
230
- conversation_log,
106
+ chat_history,
231
107
  context_message=context_message,
232
108
  model_name=model_name,
233
109
  loaded_model=offline_chat_model,
@@ -243,9 +119,8 @@ async def converse_offline(
243
119
  logger.debug(f"Conversation Context for {model_name}: {messages_to_print(messages)}")
244
120
 
245
121
  # Use asyncio.Queue and a thread to bridge sync iterator
246
- queue: asyncio.Queue = asyncio.Queue()
122
+ queue: asyncio.Queue[ResponseWithThought] = asyncio.Queue()
247
123
  stop_phrases = ["<s>", "INST]", "Notes:"]
248
- aggregated_response_container = {"response": ""}
249
124
 
250
125
  def _sync_llm_thread():
251
126
  """Synchronous function to run in a separate thread."""
@@ -262,7 +137,7 @@ async def converse_offline(
262
137
  tracer=tracer,
263
138
  )
264
139
  for response in response_iterator:
265
- response_delta = response["choices"][0]["delta"].get("content", "")
140
+ response_delta: str = response["choices"][0]["delta"].get("content", "")
266
141
  # Log the time taken to start response
267
142
  if aggregated_response == "" and response_delta != "":
268
143
  logger.info(f"First response took: {perf_counter() - start_time:.3f} seconds")
@@ -270,12 +145,12 @@ async def converse_offline(
270
145
  aggregated_response += response_delta
271
146
  # Put chunk into the asyncio queue (non-blocking)
272
147
  try:
273
- queue.put_nowait(response_delta)
148
+ queue.put_nowait(ResponseWithThought(response=response_delta))
274
149
  except asyncio.QueueFull:
275
150
  # Should not happen with default queue size unless consumer is very slow
276
151
  logger.warning("Asyncio queue full during offline LLM streaming.")
277
152
  # Potentially block here or handle differently if needed
278
- asyncio.run(queue.put(response_delta))
153
+ asyncio.run(queue.put(ResponseWithThought(response=response_delta)))
279
154
 
280
155
  # Log the time taken to stream the entire response
281
156
  logger.info(f"Chat streaming took: {perf_counter() - start_time:.3f} seconds")
@@ -291,7 +166,6 @@ async def converse_offline(
291
166
  state.chat_lock.release()
292
167
  # Signal end of stream
293
168
  queue.put_nowait(None)
294
- aggregated_response_container["response"] = aggregated_response
295
169
 
296
170
  # Start the synchronous thread
297
171
  thread = Thread(target=_sync_llm_thread)
@@ -310,10 +184,6 @@ async def converse_offline(
310
184
  loop = asyncio.get_running_loop()
311
185
  await loop.run_in_executor(None, thread.join)
312
186
 
313
- # Call the completion function after streaming is done
314
- if completion_func:
315
- asyncio.create_task(completion_func(chat_response=aggregated_response_container["response"]))
316
-
317
187
 
318
188
  def send_message_to_model_offline(
319
189
  messages: List[ChatMessage],
@@ -342,7 +212,7 @@ def send_message_to_model_offline(
342
212
  if streaming:
343
213
  return response
344
214
 
345
- response_text = response["choices"][0]["message"].get("content", "")
215
+ response_text: str = response["choices"][0]["message"].get("content", "")
346
216
 
347
217
  # Save conversation trace for non-streaming responses
348
218
  # Streamed responses need to be saved by the calling function