khoj 1.27.2.dev18__py3-none-any.whl → 1.27.2.dev130__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. khoj/database/adapters/__init__.py +34 -10
  2. khoj/interface/compiled/404/index.html +1 -1
  3. khoj/interface/compiled/_next/static/chunks/1034-da58b679fcbb79c1.js +1 -0
  4. khoj/interface/compiled/_next/static/chunks/1467-5a191c1cd5bf0b83.js +1 -0
  5. khoj/interface/compiled/_next/static/chunks/1603-5d70d9dfcdcb1f10.js +1 -0
  6. khoj/interface/compiled/_next/static/chunks/3423-fa918f4e5365a35e.js +1 -0
  7. khoj/interface/compiled/_next/static/chunks/8423-3ad0bfb299801220.js +1 -0
  8. khoj/interface/compiled/_next/static/chunks/app/chat/page-7dc98df9c88828f0.js +1 -0
  9. khoj/interface/compiled/_next/static/chunks/app/factchecker/page-d887f55fe6d4f35d.js +1 -0
  10. khoj/interface/compiled/_next/static/chunks/app/{page-8f22b790e50dd722.js → page-d46244282af16509.js} +1 -1
  11. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-6a01e07fb244c10c.js → page-505b07bce608b34e.js} +1 -1
  12. khoj/interface/compiled/_next/static/chunks/{webpack-31239d193815e49e.js → webpack-8ae5ce45161bd98e.js} +1 -1
  13. khoj/interface/compiled/_next/static/css/{2272c73fc7a3b571.css → 26c1c33d0423a7d8.css} +1 -1
  14. khoj/interface/compiled/_next/static/css/e9c5fe555dd3050b.css +25 -0
  15. khoj/interface/compiled/agents/index.html +1 -1
  16. khoj/interface/compiled/agents/index.txt +2 -2
  17. khoj/interface/compiled/automations/index.html +1 -1
  18. khoj/interface/compiled/automations/index.txt +2 -2
  19. khoj/interface/compiled/chat/index.html +1 -1
  20. khoj/interface/compiled/chat/index.txt +2 -2
  21. khoj/interface/compiled/factchecker/index.html +1 -1
  22. khoj/interface/compiled/factchecker/index.txt +2 -2
  23. khoj/interface/compiled/index.html +1 -1
  24. khoj/interface/compiled/index.txt +2 -2
  25. khoj/interface/compiled/search/index.html +1 -1
  26. khoj/interface/compiled/search/index.txt +2 -2
  27. khoj/interface/compiled/settings/index.html +1 -1
  28. khoj/interface/compiled/settings/index.txt +2 -2
  29. khoj/interface/compiled/share/chat/index.html +1 -1
  30. khoj/interface/compiled/share/chat/index.txt +2 -2
  31. khoj/processor/conversation/anthropic/anthropic_chat.py +19 -10
  32. khoj/processor/conversation/anthropic/utils.py +37 -6
  33. khoj/processor/conversation/google/gemini_chat.py +23 -13
  34. khoj/processor/conversation/google/utils.py +34 -10
  35. khoj/processor/conversation/offline/chat_model.py +40 -15
  36. khoj/processor/conversation/openai/gpt.py +25 -10
  37. khoj/processor/conversation/openai/utils.py +43 -9
  38. khoj/processor/conversation/prompts.py +131 -22
  39. khoj/processor/conversation/utils.py +299 -6
  40. khoj/processor/image/generate.py +2 -0
  41. khoj/processor/tools/online_search.py +19 -8
  42. khoj/processor/tools/run_code.py +144 -0
  43. khoj/routers/api.py +11 -6
  44. khoj/routers/api_chat.py +177 -88
  45. khoj/routers/helpers.py +155 -59
  46. khoj/routers/research.py +321 -0
  47. khoj/search_filter/date_filter.py +1 -3
  48. khoj/search_filter/file_filter.py +1 -2
  49. khoj/search_type/text_search.py +3 -3
  50. khoj/utils/helpers.py +15 -2
  51. khoj/utils/yaml.py +4 -0
  52. {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/METADATA +2 -1
  53. {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/RECORD +61 -58
  54. khoj/interface/compiled/_next/static/chunks/1603-5138bb7c8035d9a6.js +0 -1
  55. khoj/interface/compiled/_next/static/chunks/2697-61fcba89fd87eab4.js +0 -1
  56. khoj/interface/compiled/_next/static/chunks/3423-8e9c420574a9fbe3.js +0 -1
  57. khoj/interface/compiled/_next/static/chunks/9479-a5e7ff4c7d1d7ee7.js +0 -1
  58. khoj/interface/compiled/_next/static/chunks/app/chat/page-151232d8417a1ea1.js +0 -1
  59. khoj/interface/compiled/_next/static/chunks/app/factchecker/page-798904432c2417c4.js +0 -1
  60. khoj/interface/compiled/_next/static/css/76d55eb435962b19.css +0 -25
  61. /khoj/interface/compiled/_next/static/{_gBBcNbs4wMKxKXhQs5E4 → N19uqHAJYqRAVxvuVwHfE}/_buildManifest.js +0 -0
  62. /khoj/interface/compiled/_next/static/{_gBBcNbs4wMKxKXhQs5E4 → N19uqHAJYqRAVxvuVwHfE}/_ssgManifest.js +0 -0
  63. /khoj/interface/compiled/_next/static/chunks/{1970-1d6d0c1b00b4f343.js → 1970-444843bea1d17d61.js} +0 -0
  64. /khoj/interface/compiled/_next/static/chunks/{9417-759984ad62caa3dc.js → 9417-19cfd1a9cb758e71.js} +0 -0
  65. /khoj/interface/compiled/_next/static/chunks/app/settings/{page-7946cabb9c54e22d.js → page-89e6737b2cc9fb3a.js} +0 -0
  66. {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/WHEEL +0 -0
  67. {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/entry_points.txt +0 -0
  68. {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/licenses/LICENSE +0 -0
@@ -14,11 +14,13 @@ from khoj.processor.conversation.google.utils import (
14
14
  gemini_completion_with_backoff,
15
15
  )
16
16
  from khoj.processor.conversation.utils import (
17
+ clean_json,
17
18
  construct_structured_message,
18
19
  generate_chatml_messages_with_context,
19
20
  )
20
21
  from khoj.utils.helpers import ConversationCommand, is_none_or_empty
21
22
  from khoj.utils.rawconfig import LocationData
23
+ from khoj.utils.yaml import yaml_dump
22
24
 
23
25
  logger = logging.getLogger(__name__)
24
26
 
@@ -35,6 +37,7 @@ def extract_questions_gemini(
35
37
  query_images: Optional[list[str]] = None,
36
38
  vision_enabled: bool = False,
37
39
  personality_context: Optional[str] = None,
40
+ tracer: dict = {},
38
41
  ):
39
42
  """
40
43
  Infer search queries to retrieve relevant notes to answer user query
@@ -85,15 +88,12 @@ def extract_questions_gemini(
85
88
  messages = [ChatMessage(content=prompt, role="user"), ChatMessage(content=system_prompt, role="system")]
86
89
 
87
90
  response = gemini_send_message_to_model(
88
- messages, api_key, model, response_type="json_object", temperature=temperature
91
+ messages, api_key, model, response_type="json_object", temperature=temperature, tracer=tracer
89
92
  )
90
93
 
91
94
  # Extract, Clean Message from Gemini's Response
92
95
  try:
93
- response = response.strip()
94
- match = re.search(r"\{.*?\}", response)
95
- if match:
96
- response = match.group()
96
+ response = clean_json(response)
97
97
  response = json.loads(response)
98
98
  response = [q.strip() for q in response["queries"] if q.strip()]
99
99
  if not isinstance(response, list) or not response:
@@ -107,15 +107,19 @@ def extract_questions_gemini(
107
107
  return questions
108
108
 
109
109
 
110
- def gemini_send_message_to_model(messages, api_key, model, response_type="text", temperature=0, model_kwargs=None):
110
+ def gemini_send_message_to_model(
111
+ messages, api_key, model, response_type="text", temperature=0, model_kwargs=None, tracer={}
112
+ ):
111
113
  """
112
114
  Send message to model
113
115
  """
114
116
  messages, system_prompt = format_messages_for_gemini(messages)
115
117
 
116
118
  model_kwargs = {}
117
- if response_type == "json_object":
118
- model_kwargs["response_mime_type"] = "application/json"
119
+
120
+ # Sometimes, this causes unwanted behavior and terminates response early. Disable for now while it's flaky.
121
+ # if response_type == "json_object":
122
+ # model_kwargs["response_mime_type"] = "application/json"
119
123
 
120
124
  # Get Response from Gemini
121
125
  return gemini_completion_with_backoff(
@@ -125,6 +129,7 @@ def gemini_send_message_to_model(messages, api_key, model, response_type="text",
125
129
  api_key=api_key,
126
130
  temperature=temperature,
127
131
  model_kwargs=model_kwargs,
132
+ tracer=tracer,
128
133
  )
129
134
 
130
135
 
@@ -132,6 +137,7 @@ def converse_gemini(
132
137
  references,
133
138
  user_query,
134
139
  online_results: Optional[Dict[str, Dict]] = None,
140
+ code_results: Optional[Dict[str, Dict]] = None,
135
141
  conversation_log={},
136
142
  model: Optional[str] = "gemini-1.5-flash",
137
143
  api_key: Optional[str] = None,
@@ -145,13 +151,13 @@ def converse_gemini(
145
151
  agent: Agent = None,
146
152
  query_images: Optional[list[str]] = None,
147
153
  vision_available: bool = False,
154
+ tracer={},
148
155
  ):
149
156
  """
150
157
  Converse with user using Google's Gemini
151
158
  """
152
159
  # Initialize Variables
153
160
  current_date = datetime.now()
154
- compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
155
161
 
156
162
  if agent and agent.personality:
157
163
  system_prompt = prompts.custom_personality.format(
@@ -176,7 +182,7 @@ def converse_gemini(
176
182
  system_prompt = f"{system_prompt}\n{user_name_prompt}"
177
183
 
178
184
  # Get Conversation Primer appropriate to Conversation Type
179
- if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(compiled_references):
185
+ if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
180
186
  completion_func(chat_response=prompts.no_notes_found.format())
181
187
  return iter([prompts.no_notes_found.format()])
182
188
  elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
@@ -184,10 +190,13 @@ def converse_gemini(
184
190
  return iter([prompts.no_online_results_found.format()])
185
191
 
186
192
  context_message = ""
187
- if not is_none_or_empty(compiled_references):
188
- context_message = f"{prompts.notes_conversation.format(query=user_query, references=compiled_references)}\n\n"
193
+ if not is_none_or_empty(references):
194
+ context_message = f"{prompts.notes_conversation.format(query=user_query, references=yaml_dump(references))}\n\n"
189
195
  if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
190
- context_message += f"{prompts.online_search_conversation.format(online_results=str(online_results))}"
196
+ context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
197
+ if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results):
198
+ context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
199
+ context_message = context_message.strip()
191
200
 
192
201
  # Setup Prompt with Primer or Conversation History
193
202
  messages = generate_chatml_messages_with_context(
@@ -217,4 +226,5 @@ def converse_gemini(
217
226
  api_key=api_key,
218
227
  system_prompt=system_prompt,
219
228
  completion_func=completion_func,
229
+ tracer=tracer,
220
230
  )
@@ -19,8 +19,13 @@ from tenacity import (
19
19
  wait_random_exponential,
20
20
  )
21
21
 
22
- from khoj.processor.conversation.utils import ThreadedGenerator, get_image_from_url
23
- from khoj.utils.helpers import is_none_or_empty
22
+ from khoj.processor.conversation.utils import (
23
+ ThreadedGenerator,
24
+ commit_conversation_trace,
25
+ get_image_from_url,
26
+ )
27
+ from khoj.utils import state
28
+ from khoj.utils.helpers import in_debug_mode, is_none_or_empty
24
29
 
25
30
  logger = logging.getLogger(__name__)
26
31
 
@@ -35,7 +40,7 @@ MAX_OUTPUT_TOKENS_GEMINI = 8192
35
40
  reraise=True,
36
41
  )
37
42
  def gemini_completion_with_backoff(
38
- messages, system_prompt, model_name, temperature=0, api_key=None, model_kwargs=None
43
+ messages, system_prompt, model_name, temperature=0, api_key=None, model_kwargs=None, tracer={}
39
44
  ) -> str:
40
45
  genai.configure(api_key=api_key)
41
46
  model_kwargs = model_kwargs or dict()
@@ -60,16 +65,23 @@ def gemini_completion_with_backoff(
60
65
 
61
66
  try:
62
67
  # Generate the response. The last message is considered to be the current prompt
63
- aggregated_response = chat_session.send_message(formatted_messages[-1]["parts"])
64
- return aggregated_response.text
68
+ response = chat_session.send_message(formatted_messages[-1]["parts"])
69
+ response_text = response.text
65
70
  except StopCandidateException as e:
66
- response_message, _ = handle_gemini_response(e.args)
71
+ response_text, _ = handle_gemini_response(e.args)
67
72
  # Respond with reason for stopping
68
73
  logger.warning(
69
- f"LLM Response Prevented for {model_name}: {response_message}.\n"
74
+ f"LLM Response Prevented for {model_name}: {response_text}.\n"
70
75
  + f"Last Message by {messages[-1].role}: {messages[-1].content}"
71
76
  )
72
- return response_message
77
+
78
+ # Save conversation trace
79
+ tracer["chat_model"] = model_name
80
+ tracer["temperature"] = temperature
81
+ if in_debug_mode() or state.verbose > 1:
82
+ commit_conversation_trace(messages, response_text, tracer)
83
+
84
+ return response_text
73
85
 
74
86
 
75
87
  @retry(
@@ -88,17 +100,20 @@ def gemini_chat_completion_with_backoff(
88
100
  system_prompt,
89
101
  completion_func=None,
90
102
  model_kwargs=None,
103
+ tracer: dict = {},
91
104
  ):
92
105
  g = ThreadedGenerator(compiled_references, online_results, completion_func=completion_func)
93
106
  t = Thread(
94
107
  target=gemini_llm_thread,
95
- args=(g, messages, system_prompt, model_name, temperature, api_key, model_kwargs),
108
+ args=(g, messages, system_prompt, model_name, temperature, api_key, model_kwargs, tracer),
96
109
  )
97
110
  t.start()
98
111
  return g
99
112
 
100
113
 
101
- def gemini_llm_thread(g, messages, system_prompt, model_name, temperature, api_key, model_kwargs=None):
114
+ def gemini_llm_thread(
115
+ g, messages, system_prompt, model_name, temperature, api_key, model_kwargs=None, tracer: dict = {}
116
+ ):
102
117
  try:
103
118
  genai.configure(api_key=api_key)
104
119
  model_kwargs = model_kwargs or dict()
@@ -117,16 +132,25 @@ def gemini_llm_thread(g, messages, system_prompt, model_name, temperature, api_k
117
132
  },
118
133
  )
119
134
 
135
+ aggregated_response = ""
120
136
  formatted_messages = [{"role": message.role, "parts": message.content} for message in messages]
137
+
121
138
  # all messages up to the last are considered to be part of the chat history
122
139
  chat_session = model.start_chat(history=formatted_messages[0:-1])
123
140
  # the last message is considered to be the current prompt
124
141
  for chunk in chat_session.send_message(formatted_messages[-1]["parts"], stream=True):
125
142
  message, stopped = handle_gemini_response(chunk.candidates, chunk.prompt_feedback)
126
143
  message = message or chunk.text
144
+ aggregated_response += message
127
145
  g.send(message)
128
146
  if stopped:
129
147
  raise StopCandidateException(message)
148
+
149
+ # Save conversation trace
150
+ tracer["chat_model"] = model_name
151
+ tracer["temperature"] = temperature
152
+ if in_debug_mode() or state.verbose > 1:
153
+ commit_conversation_trace(messages, aggregated_response, tracer)
130
154
  except StopCandidateException as e:
131
155
  logger.warning(
132
156
  f"LLM Response Prevented for {model_name}: {e.args[0]}.\n"
@@ -12,12 +12,14 @@ from khoj.processor.conversation import prompts
12
12
  from khoj.processor.conversation.offline.utils import download_model
13
13
  from khoj.processor.conversation.utils import (
14
14
  ThreadedGenerator,
15
+ commit_conversation_trace,
15
16
  generate_chatml_messages_with_context,
16
17
  )
17
18
  from khoj.utils import state
18
19
  from khoj.utils.constants import empty_escape_sequences
19
- from khoj.utils.helpers import ConversationCommand, is_none_or_empty
20
+ from khoj.utils.helpers import ConversationCommand, in_debug_mode, is_none_or_empty
20
21
  from khoj.utils.rawconfig import LocationData
22
+ from khoj.utils.yaml import yaml_dump
21
23
 
22
24
  logger = logging.getLogger(__name__)
23
25
 
@@ -34,6 +36,7 @@ def extract_questions_offline(
34
36
  max_prompt_size: int = None,
35
37
  temperature: float = 0.7,
36
38
  personality_context: Optional[str] = None,
39
+ tracer: dict = {},
37
40
  ) -> List[str]:
38
41
  """
39
42
  Infer search queries to retrieve relevant notes to answer user query
@@ -94,6 +97,7 @@ def extract_questions_offline(
94
97
  max_prompt_size=max_prompt_size,
95
98
  temperature=temperature,
96
99
  response_type="json_object",
100
+ tracer=tracer,
97
101
  )
98
102
  finally:
99
103
  state.chat_lock.release()
@@ -135,7 +139,8 @@ def filter_questions(questions: List[str]):
135
139
  def converse_offline(
136
140
  user_query,
137
141
  references=[],
138
- online_results=[],
142
+ online_results={},
143
+ code_results={},
139
144
  conversation_log={},
140
145
  model: str = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
141
146
  loaded_model: Union[Any, None] = None,
@@ -146,6 +151,7 @@ def converse_offline(
146
151
  location_data: LocationData = None,
147
152
  user_name: str = None,
148
153
  agent: Agent = None,
154
+ tracer: dict = {},
149
155
  ) -> Union[ThreadedGenerator, Iterator[str]]:
150
156
  """
151
157
  Converse with user using Llama
@@ -153,8 +159,7 @@ def converse_offline(
153
159
  # Initialize Variables
154
160
  assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
155
161
  offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
156
- compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
157
-
162
+ tracer["chat_model"] = model
158
163
  current_date = datetime.now()
159
164
 
160
165
  if agent and agent.personality:
@@ -179,24 +184,25 @@ def converse_offline(
179
184
  system_prompt = f"{system_prompt}\n{user_name_prompt}"
180
185
 
181
186
  # Get Conversation Primer appropriate to Conversation Type
182
- if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(compiled_references):
187
+ if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
183
188
  return iter([prompts.no_notes_found.format()])
184
189
  elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
185
190
  completion_func(chat_response=prompts.no_online_results_found.format())
186
191
  return iter([prompts.no_online_results_found.format()])
187
192
 
188
193
  context_message = ""
189
- if not is_none_or_empty(compiled_references):
190
- context_message += f"{prompts.notes_conversation_offline.format(references=compiled_references)}\n\n"
194
+ if not is_none_or_empty(references):
195
+ context_message = f"{prompts.notes_conversation_offline.format(references=yaml_dump(references))}\n\n"
191
196
  if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
192
197
  simplified_online_results = online_results.copy()
193
198
  for result in online_results:
194
199
  if online_results[result].get("webpages"):
195
200
  simplified_online_results[result] = online_results[result]["webpages"]
196
201
 
197
- context_message += (
198
- f"{prompts.online_search_conversation_offline.format(online_results=str(simplified_online_results))}"
199
- )
202
+ context_message += f"{prompts.online_search_conversation_offline.format(online_results=yaml_dump(simplified_online_results))}\n\n"
203
+ if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results):
204
+ context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
205
+ context_message = context_message.strip()
200
206
 
201
207
  # Setup Prompt with Primer or Conversation History
202
208
  messages = generate_chatml_messages_with_context(
@@ -215,13 +221,14 @@ def converse_offline(
215
221
  logger.debug(f"Conversation Context for {model}: {truncated_messages}")
216
222
 
217
223
  g = ThreadedGenerator(references, online_results, completion_func=completion_func)
218
- t = Thread(target=llm_thread, args=(g, messages, offline_chat_model, max_prompt_size))
224
+ t = Thread(target=llm_thread, args=(g, messages, offline_chat_model, max_prompt_size, tracer))
219
225
  t.start()
220
226
  return g
221
227
 
222
228
 
223
- def llm_thread(g, messages: List[ChatMessage], model: Any, max_prompt_size: int = None):
229
+ def llm_thread(g, messages: List[ChatMessage], model: Any, max_prompt_size: int = None, tracer: dict = {}):
224
230
  stop_phrases = ["<s>", "INST]", "Notes:"]
231
+ aggregated_response = ""
225
232
 
226
233
  state.chat_lock.acquire()
227
234
  try:
@@ -229,7 +236,14 @@ def llm_thread(g, messages: List[ChatMessage], model: Any, max_prompt_size: int
229
236
  messages, loaded_model=model, stop=stop_phrases, max_prompt_size=max_prompt_size, streaming=True
230
237
  )
231
238
  for response in response_iterator:
232
- g.send(response["choices"][0]["delta"].get("content", ""))
239
+ response_delta = response["choices"][0]["delta"].get("content", "")
240
+ aggregated_response += response_delta
241
+ g.send(response_delta)
242
+
243
+ # Save conversation trace
244
+ if in_debug_mode() or state.verbose > 1:
245
+ commit_conversation_trace(messages, aggregated_response, tracer)
246
+
233
247
  finally:
234
248
  state.chat_lock.release()
235
249
  g.close()
@@ -244,6 +258,7 @@ def send_message_to_model_offline(
244
258
  stop=[],
245
259
  max_prompt_size: int = None,
246
260
  response_type: str = "text",
261
+ tracer: dict = {},
247
262
  ):
248
263
  assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
249
264
  offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
@@ -251,7 +266,17 @@ def send_message_to_model_offline(
251
266
  response = offline_chat_model.create_chat_completion(
252
267
  messages_dict, stop=stop, stream=streaming, temperature=temperature, response_format={"type": response_type}
253
268
  )
269
+
254
270
  if streaming:
255
271
  return response
256
- else:
257
- return response["choices"][0]["message"].get("content", "")
272
+
273
+ response_text = response["choices"][0]["message"].get("content", "")
274
+
275
+ # Save conversation trace for non-streaming responses
276
+ # Streamed responses need to be saved by the calling function
277
+ tracer["chat_model"] = model
278
+ tracer["temperature"] = temperature
279
+ if in_debug_mode() or state.verbose > 1:
280
+ commit_conversation_trace(messages, response_text, tracer)
281
+
282
+ return response_text
@@ -12,12 +12,13 @@ from khoj.processor.conversation.openai.utils import (
12
12
  completion_with_backoff,
13
13
  )
14
14
  from khoj.processor.conversation.utils import (
15
+ clean_json,
15
16
  construct_structured_message,
16
17
  generate_chatml_messages_with_context,
17
- remove_json_codeblock,
18
18
  )
19
19
  from khoj.utils.helpers import ConversationCommand, is_none_or_empty
20
20
  from khoj.utils.rawconfig import LocationData
21
+ from khoj.utils.yaml import yaml_dump
21
22
 
22
23
  logger = logging.getLogger(__name__)
23
24
 
@@ -33,6 +34,7 @@ def extract_questions(
33
34
  query_images: Optional[list[str]] = None,
34
35
  vision_enabled: bool = False,
35
36
  personality_context: Optional[str] = None,
37
+ tracer: dict = {},
36
38
  ):
37
39
  """
38
40
  Infer search queries to retrieve relevant notes to answer user query
@@ -82,13 +84,18 @@ def extract_questions(
82
84
  messages = [ChatMessage(content=prompt, role="user")]
83
85
 
84
86
  response = send_message_to_model(
85
- messages, api_key, model, response_type="json_object", api_base_url=api_base_url, temperature=temperature
87
+ messages,
88
+ api_key,
89
+ model,
90
+ response_type="json_object",
91
+ api_base_url=api_base_url,
92
+ temperature=temperature,
93
+ tracer=tracer,
86
94
  )
87
95
 
88
96
  # Extract, Clean Message from GPT's Response
89
97
  try:
90
- response = response.strip()
91
- response = remove_json_codeblock(response)
98
+ response = clean_json(response)
92
99
  response = json.loads(response)
93
100
  response = [q.strip() for q in response["queries"] if q.strip()]
94
101
  if not isinstance(response, list) or not response:
@@ -103,7 +110,9 @@ def extract_questions(
103
110
  return questions
104
111
 
105
112
 
106
- def send_message_to_model(messages, api_key, model, response_type="text", api_base_url=None, temperature=0):
113
+ def send_message_to_model(
114
+ messages, api_key, model, response_type="text", api_base_url=None, temperature=0, tracer: dict = {}
115
+ ):
107
116
  """
108
117
  Send message to model
109
118
  """
@@ -116,6 +125,7 @@ def send_message_to_model(messages, api_key, model, response_type="text", api_ba
116
125
  temperature=temperature,
117
126
  api_base_url=api_base_url,
118
127
  model_kwargs={"response_format": {"type": response_type}},
128
+ tracer=tracer,
119
129
  )
120
130
 
121
131
 
@@ -123,6 +133,7 @@ def converse(
123
133
  references,
124
134
  user_query,
125
135
  online_results: Optional[Dict[str, Dict]] = None,
136
+ code_results: Optional[Dict[str, Dict]] = None,
126
137
  conversation_log={},
127
138
  model: str = "gpt-4o-mini",
128
139
  api_key: Optional[str] = None,
@@ -137,13 +148,13 @@ def converse(
137
148
  agent: Agent = None,
138
149
  query_images: Optional[list[str]] = None,
139
150
  vision_available: bool = False,
151
+ tracer: dict = {},
140
152
  ):
141
153
  """
142
154
  Converse with user using OpenAI's ChatGPT
143
155
  """
144
156
  # Initialize Variables
145
157
  current_date = datetime.now()
146
- compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
147
158
 
148
159
  if agent and agent.personality:
149
160
  system_prompt = prompts.custom_personality.format(
@@ -167,7 +178,7 @@ def converse(
167
178
  system_prompt = f"{system_prompt}\n{user_name_prompt}"
168
179
 
169
180
  # Get Conversation Primer appropriate to Conversation Type
170
- if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(compiled_references):
181
+ if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
171
182
  completion_func(chat_response=prompts.no_notes_found.format())
172
183
  return iter([prompts.no_notes_found.format()])
173
184
  elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
@@ -175,10 +186,13 @@ def converse(
175
186
  return iter([prompts.no_online_results_found.format()])
176
187
 
177
188
  context_message = ""
178
- if not is_none_or_empty(compiled_references):
179
- context_message = f"{prompts.notes_conversation.format(references=compiled_references)}\n\n"
189
+ if not is_none_or_empty(references):
190
+ context_message = f"{prompts.notes_conversation.format(references=yaml_dump(references))}\n\n"
180
191
  if not is_none_or_empty(online_results):
181
- context_message += f"{prompts.online_search_conversation.format(online_results=str(online_results))}"
192
+ context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
193
+ if not is_none_or_empty(code_results):
194
+ context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
195
+ context_message = context_message.strip()
182
196
 
183
197
  # Setup Prompt with Primer or Conversation History
184
198
  messages = generate_chatml_messages_with_context(
@@ -207,4 +221,5 @@ def converse(
207
221
  api_base_url=api_base_url,
208
222
  completion_func=completion_func,
209
223
  model_kwargs={"stop": ["Notes:\n["]},
224
+ tracer=tracer,
210
225
  )
@@ -12,7 +12,12 @@ from tenacity import (
12
12
  wait_random_exponential,
13
13
  )
14
14
 
15
- from khoj.processor.conversation.utils import ThreadedGenerator
15
+ from khoj.processor.conversation.utils import (
16
+ ThreadedGenerator,
17
+ commit_conversation_trace,
18
+ )
19
+ from khoj.utils import state
20
+ from khoj.utils.helpers import in_debug_mode
16
21
 
17
22
  logger = logging.getLogger(__name__)
18
23
 
@@ -33,7 +38,7 @@ openai_clients: Dict[str, openai.OpenAI] = {}
33
38
  reraise=True,
34
39
  )
35
40
  def completion_with_backoff(
36
- messages, model, temperature=0, openai_api_key=None, api_base_url=None, model_kwargs=None
41
+ messages, model, temperature=0, openai_api_key=None, api_base_url=None, model_kwargs=None, tracer: dict = {}
37
42
  ) -> str:
38
43
  client_key = f"{openai_api_key}--{api_base_url}"
39
44
  client: openai.OpenAI | None = openai_clients.get(client_key)
@@ -77,6 +82,12 @@ def completion_with_backoff(
77
82
  elif delta_chunk.content:
78
83
  aggregated_response += delta_chunk.content
79
84
 
85
+ # Save conversation trace
86
+ tracer["chat_model"] = model
87
+ tracer["temperature"] = temperature
88
+ if in_debug_mode() or state.verbose > 1:
89
+ commit_conversation_trace(messages, aggregated_response, tracer)
90
+
80
91
  return aggregated_response
81
92
 
82
93
 
@@ -103,26 +114,37 @@ def chat_completion_with_backoff(
103
114
  api_base_url=None,
104
115
  completion_func=None,
105
116
  model_kwargs=None,
117
+ tracer: dict = {},
106
118
  ):
107
119
  g = ThreadedGenerator(compiled_references, online_results, completion_func=completion_func)
108
120
  t = Thread(
109
- target=llm_thread, args=(g, messages, model_name, temperature, openai_api_key, api_base_url, model_kwargs)
121
+ target=llm_thread,
122
+ args=(g, messages, model_name, temperature, openai_api_key, api_base_url, model_kwargs, tracer),
110
123
  )
111
124
  t.start()
112
125
  return g
113
126
 
114
127
 
115
- def llm_thread(g, messages, model_name, temperature, openai_api_key=None, api_base_url=None, model_kwargs=None):
128
+ def llm_thread(
129
+ g,
130
+ messages,
131
+ model_name,
132
+ temperature,
133
+ openai_api_key=None,
134
+ api_base_url=None,
135
+ model_kwargs=None,
136
+ tracer: dict = {},
137
+ ):
116
138
  try:
117
139
  client_key = f"{openai_api_key}--{api_base_url}"
118
140
  if client_key not in openai_clients:
119
- client: openai.OpenAI = openai.OpenAI(
141
+ client = openai.OpenAI(
120
142
  api_key=openai_api_key,
121
143
  base_url=api_base_url,
122
144
  )
123
145
  openai_clients[client_key] = client
124
146
  else:
125
- client: openai.OpenAI = openai_clients[client_key]
147
+ client = openai_clients[client_key]
126
148
 
127
149
  formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
128
150
  stream = True
@@ -144,17 +166,29 @@ def llm_thread(g, messages, model_name, temperature, openai_api_key=None, api_ba
144
166
  **(model_kwargs or dict()),
145
167
  )
146
168
 
169
+ aggregated_response = ""
147
170
  if not stream:
148
- g.send(chat.choices[0].message.content)
171
+ aggregated_response = chat.choices[0].message.content
172
+ g.send(aggregated_response)
149
173
  else:
150
174
  for chunk in chat:
151
175
  if len(chunk.choices) == 0:
152
176
  continue
153
177
  delta_chunk = chunk.choices[0].delta
178
+ text_chunk = ""
154
179
  if isinstance(delta_chunk, str):
155
- g.send(delta_chunk)
180
+ text_chunk = delta_chunk
156
181
  elif delta_chunk.content:
157
- g.send(delta_chunk.content)
182
+ text_chunk = delta_chunk.content
183
+ if text_chunk:
184
+ aggregated_response += text_chunk
185
+ g.send(text_chunk)
186
+
187
+ # Save conversation trace
188
+ tracer["chat_model"] = model_name
189
+ tracer["temperature"] = temperature
190
+ if in_debug_mode() or state.verbose > 1:
191
+ commit_conversation_trace(messages, aggregated_response, tracer)
158
192
  except Exception as e:
159
193
  logger.error(f"Error in llm_thread: {e}", exc_info=True)
160
194
  finally: