khoj 1.27.2.dev15__py3-none-any.whl → 1.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. khoj/configure.py +1 -1
  2. khoj/database/adapters/__init__.py +50 -12
  3. khoj/interface/compiled/404/index.html +1 -1
  4. khoj/interface/compiled/_next/static/chunks/1034-da58b679fcbb79c1.js +1 -0
  5. khoj/interface/compiled/_next/static/chunks/1467-b331e469fe411347.js +1 -0
  6. khoj/interface/compiled/_next/static/chunks/1603-c1568f45947e9f2c.js +1 -0
  7. khoj/interface/compiled/_next/static/chunks/3423-f4b7df2f6f3362f7.js +1 -0
  8. khoj/interface/compiled/_next/static/chunks/8423-da57554315eebcbe.js +1 -0
  9. khoj/interface/compiled/_next/static/chunks/app/agents/{page-2beaba7c9bb750bd.js → page-5ae1e540bb5be8a9.js} +1 -1
  10. khoj/interface/compiled/_next/static/chunks/app/automations/{page-9b5c77e0b0dd772c.js → page-774ae3e033f938cd.js} +1 -1
  11. khoj/interface/compiled/_next/static/chunks/app/chat/page-d8f4c107ad78e9e9.js +1 -0
  12. khoj/interface/compiled/_next/static/chunks/app/factchecker/page-1cc42ee55f89fb2e.js +1 -0
  13. khoj/interface/compiled/_next/static/chunks/app/{page-4b6008223ea79955.js → page-07e54186b066f5ce.js} +1 -1
  14. khoj/interface/compiled/_next/static/chunks/app/search/{page-ab2995529ece3140.js → page-9b64f61caa5bd7f9.js} +1 -1
  15. khoj/interface/compiled/_next/static/chunks/app/settings/{page-7946cabb9c54e22d.js → page-10b288c103f19468.js} +1 -1
  16. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-6a01e07fb244c10c.js → page-db775d42e820afb2.js} +1 -1
  17. khoj/interface/compiled/_next/static/chunks/{webpack-878569182b3af4c6.js → webpack-8f2abab7b11aa120.js} +1 -1
  18. khoj/interface/compiled/_next/static/css/{2272c73fc7a3b571.css → 26c1c33d0423a7d8.css} +1 -1
  19. khoj/interface/compiled/_next/static/css/4cae6c0e5c72fb2d.css +1 -0
  20. khoj/interface/compiled/_next/static/css/a795ee88875f4853.css +25 -0
  21. khoj/interface/compiled/_next/static/css/ddcc0cf73e062476.css +1 -0
  22. khoj/interface/compiled/agents/index.html +1 -1
  23. khoj/interface/compiled/agents/index.txt +2 -2
  24. khoj/interface/compiled/automations/index.html +1 -1
  25. khoj/interface/compiled/automations/index.txt +2 -2
  26. khoj/interface/compiled/chat/index.html +1 -1
  27. khoj/interface/compiled/chat/index.txt +2 -2
  28. khoj/interface/compiled/factchecker/index.html +1 -1
  29. khoj/interface/compiled/factchecker/index.txt +2 -2
  30. khoj/interface/compiled/index.html +1 -1
  31. khoj/interface/compiled/index.txt +2 -2
  32. khoj/interface/compiled/search/index.html +1 -1
  33. khoj/interface/compiled/search/index.txt +2 -2
  34. khoj/interface/compiled/settings/index.html +1 -1
  35. khoj/interface/compiled/settings/index.txt +2 -2
  36. khoj/interface/compiled/share/chat/index.html +1 -1
  37. khoj/interface/compiled/share/chat/index.txt +2 -2
  38. khoj/processor/conversation/anthropic/anthropic_chat.py +19 -10
  39. khoj/processor/conversation/anthropic/utils.py +37 -6
  40. khoj/processor/conversation/google/gemini_chat.py +23 -13
  41. khoj/processor/conversation/google/utils.py +34 -10
  42. khoj/processor/conversation/offline/chat_model.py +48 -16
  43. khoj/processor/conversation/openai/gpt.py +25 -10
  44. khoj/processor/conversation/openai/utils.py +50 -9
  45. khoj/processor/conversation/prompts.py +156 -65
  46. khoj/processor/conversation/utils.py +306 -6
  47. khoj/processor/embeddings.py +4 -4
  48. khoj/processor/image/generate.py +2 -0
  49. khoj/processor/tools/online_search.py +27 -12
  50. khoj/processor/tools/run_code.py +144 -0
  51. khoj/routers/api.py +11 -6
  52. khoj/routers/api_chat.py +213 -111
  53. khoj/routers/helpers.py +171 -60
  54. khoj/routers/research.py +320 -0
  55. khoj/search_filter/date_filter.py +1 -3
  56. khoj/search_filter/file_filter.py +1 -2
  57. khoj/search_type/text_search.py +3 -3
  58. khoj/utils/helpers.py +24 -2
  59. khoj/utils/yaml.py +4 -0
  60. {khoj-1.27.2.dev15.dist-info → khoj-1.28.0.dist-info}/METADATA +3 -2
  61. {khoj-1.27.2.dev15.dist-info → khoj-1.28.0.dist-info}/RECORD +68 -65
  62. khoj/interface/compiled/_next/static/chunks/1603-b9d95833e0e025e8.js +0 -1
  63. khoj/interface/compiled/_next/static/chunks/2697-61fcba89fd87eab4.js +0 -1
  64. khoj/interface/compiled/_next/static/chunks/3423-0b533af8bf6ac218.js +0 -1
  65. khoj/interface/compiled/_next/static/chunks/9479-ff7d8c4dae2014d1.js +0 -1
  66. khoj/interface/compiled/_next/static/chunks/app/chat/page-151232d8417a1ea1.js +0 -1
  67. khoj/interface/compiled/_next/static/chunks/app/factchecker/page-798904432c2417c4.js +0 -1
  68. khoj/interface/compiled/_next/static/css/592ca99f5122e75a.css +0 -1
  69. khoj/interface/compiled/_next/static/css/76d55eb435962b19.css +0 -25
  70. khoj/interface/compiled/_next/static/css/d738728883c68af8.css +0 -1
  71. /khoj/interface/compiled/_next/static/{vcyFRDGArOFXwUVotHIuv → cC7ahn2y_DddSVovjlztj}/_buildManifest.js +0 -0
  72. /khoj/interface/compiled/_next/static/{vcyFRDGArOFXwUVotHIuv → cC7ahn2y_DddSVovjlztj}/_ssgManifest.js +0 -0
  73. /khoj/interface/compiled/_next/static/chunks/{1970-60c96aed937a4928.js → 1970-d44050bf658ae5cc.js} +0 -0
  74. /khoj/interface/compiled/_next/static/chunks/{9417-2ca87207387fc790.js → 9417-0d0fc7eb49a86abb.js} +0 -0
  75. {khoj-1.27.2.dev15.dist-info → khoj-1.28.0.dist-info}/WHEEL +0 -0
  76. {khoj-1.27.2.dev15.dist-info → khoj-1.28.0.dist-info}/entry_points.txt +0 -0
  77. {khoj-1.27.2.dev15.dist-info → khoj-1.28.0.dist-info}/licenses/LICENSE +0 -0
@@ -14,11 +14,13 @@ from khoj.processor.conversation.google.utils import (
14
14
  gemini_completion_with_backoff,
15
15
  )
16
16
  from khoj.processor.conversation.utils import (
17
+ clean_json,
17
18
  construct_structured_message,
18
19
  generate_chatml_messages_with_context,
19
20
  )
20
21
  from khoj.utils.helpers import ConversationCommand, is_none_or_empty
21
22
  from khoj.utils.rawconfig import LocationData
23
+ from khoj.utils.yaml import yaml_dump
22
24
 
23
25
  logger = logging.getLogger(__name__)
24
26
 
@@ -35,6 +37,7 @@ def extract_questions_gemini(
35
37
  query_images: Optional[list[str]] = None,
36
38
  vision_enabled: bool = False,
37
39
  personality_context: Optional[str] = None,
40
+ tracer: dict = {},
38
41
  ):
39
42
  """
40
43
  Infer search queries to retrieve relevant notes to answer user query
@@ -85,15 +88,12 @@ def extract_questions_gemini(
85
88
  messages = [ChatMessage(content=prompt, role="user"), ChatMessage(content=system_prompt, role="system")]
86
89
 
87
90
  response = gemini_send_message_to_model(
88
- messages, api_key, model, response_type="json_object", temperature=temperature
91
+ messages, api_key, model, response_type="json_object", temperature=temperature, tracer=tracer
89
92
  )
90
93
 
91
94
  # Extract, Clean Message from Gemini's Response
92
95
  try:
93
- response = response.strip()
94
- match = re.search(r"\{.*?\}", response)
95
- if match:
96
- response = match.group()
96
+ response = clean_json(response)
97
97
  response = json.loads(response)
98
98
  response = [q.strip() for q in response["queries"] if q.strip()]
99
99
  if not isinstance(response, list) or not response:
@@ -107,15 +107,19 @@ def extract_questions_gemini(
107
107
  return questions
108
108
 
109
109
 
110
- def gemini_send_message_to_model(messages, api_key, model, response_type="text", temperature=0, model_kwargs=None):
110
+ def gemini_send_message_to_model(
111
+ messages, api_key, model, response_type="text", temperature=0, model_kwargs=None, tracer={}
112
+ ):
111
113
  """
112
114
  Send message to model
113
115
  """
114
116
  messages, system_prompt = format_messages_for_gemini(messages)
115
117
 
116
118
  model_kwargs = {}
117
- if response_type == "json_object":
118
- model_kwargs["response_mime_type"] = "application/json"
119
+
120
+ # Sometimes, this causes unwanted behavior and terminates response early. Disable for now while it's flaky.
121
+ # if response_type == "json_object":
122
+ # model_kwargs["response_mime_type"] = "application/json"
119
123
 
120
124
  # Get Response from Gemini
121
125
  return gemini_completion_with_backoff(
@@ -125,6 +129,7 @@ def gemini_send_message_to_model(messages, api_key, model, response_type="text",
125
129
  api_key=api_key,
126
130
  temperature=temperature,
127
131
  model_kwargs=model_kwargs,
132
+ tracer=tracer,
128
133
  )
129
134
 
130
135
 
@@ -132,6 +137,7 @@ def converse_gemini(
132
137
  references,
133
138
  user_query,
134
139
  online_results: Optional[Dict[str, Dict]] = None,
140
+ code_results: Optional[Dict[str, Dict]] = None,
135
141
  conversation_log={},
136
142
  model: Optional[str] = "gemini-1.5-flash",
137
143
  api_key: Optional[str] = None,
@@ -145,13 +151,13 @@ def converse_gemini(
145
151
  agent: Agent = None,
146
152
  query_images: Optional[list[str]] = None,
147
153
  vision_available: bool = False,
154
+ tracer={},
148
155
  ):
149
156
  """
150
157
  Converse with user using Google's Gemini
151
158
  """
152
159
  # Initialize Variables
153
160
  current_date = datetime.now()
154
- compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
155
161
 
156
162
  if agent and agent.personality:
157
163
  system_prompt = prompts.custom_personality.format(
@@ -176,7 +182,7 @@ def converse_gemini(
176
182
  system_prompt = f"{system_prompt}\n{user_name_prompt}"
177
183
 
178
184
  # Get Conversation Primer appropriate to Conversation Type
179
- if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(compiled_references):
185
+ if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
180
186
  completion_func(chat_response=prompts.no_notes_found.format())
181
187
  return iter([prompts.no_notes_found.format()])
182
188
  elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
@@ -184,10 +190,13 @@ def converse_gemini(
184
190
  return iter([prompts.no_online_results_found.format()])
185
191
 
186
192
  context_message = ""
187
- if not is_none_or_empty(compiled_references):
188
- context_message = f"{prompts.notes_conversation.format(query=user_query, references=compiled_references)}\n\n"
193
+ if not is_none_or_empty(references):
194
+ context_message = f"{prompts.notes_conversation.format(query=user_query, references=yaml_dump(references))}\n\n"
189
195
  if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
190
- context_message += f"{prompts.online_search_conversation.format(online_results=str(online_results))}"
196
+ context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
197
+ if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results):
198
+ context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
199
+ context_message = context_message.strip()
191
200
 
192
201
  # Setup Prompt with Primer or Conversation History
193
202
  messages = generate_chatml_messages_with_context(
@@ -217,4 +226,5 @@ def converse_gemini(
217
226
  api_key=api_key,
218
227
  system_prompt=system_prompt,
219
228
  completion_func=completion_func,
229
+ tracer=tracer,
220
230
  )
@@ -19,8 +19,13 @@ from tenacity import (
19
19
  wait_random_exponential,
20
20
  )
21
21
 
22
- from khoj.processor.conversation.utils import ThreadedGenerator, get_image_from_url
23
- from khoj.utils.helpers import is_none_or_empty
22
+ from khoj.processor.conversation.utils import (
23
+ ThreadedGenerator,
24
+ commit_conversation_trace,
25
+ get_image_from_url,
26
+ )
27
+ from khoj.utils import state
28
+ from khoj.utils.helpers import in_debug_mode, is_none_or_empty
24
29
 
25
30
  logger = logging.getLogger(__name__)
26
31
 
@@ -35,7 +40,7 @@ MAX_OUTPUT_TOKENS_GEMINI = 8192
35
40
  reraise=True,
36
41
  )
37
42
  def gemini_completion_with_backoff(
38
- messages, system_prompt, model_name, temperature=0, api_key=None, model_kwargs=None
43
+ messages, system_prompt, model_name, temperature=0, api_key=None, model_kwargs=None, tracer={}
39
44
  ) -> str:
40
45
  genai.configure(api_key=api_key)
41
46
  model_kwargs = model_kwargs or dict()
@@ -60,16 +65,23 @@ def gemini_completion_with_backoff(
60
65
 
61
66
  try:
62
67
  # Generate the response. The last message is considered to be the current prompt
63
- aggregated_response = chat_session.send_message(formatted_messages[-1]["parts"])
64
- return aggregated_response.text
68
+ response = chat_session.send_message(formatted_messages[-1]["parts"])
69
+ response_text = response.text
65
70
  except StopCandidateException as e:
66
- response_message, _ = handle_gemini_response(e.args)
71
+ response_text, _ = handle_gemini_response(e.args)
67
72
  # Respond with reason for stopping
68
73
  logger.warning(
69
- f"LLM Response Prevented for {model_name}: {response_message}.\n"
74
+ f"LLM Response Prevented for {model_name}: {response_text}.\n"
70
75
  + f"Last Message by {messages[-1].role}: {messages[-1].content}"
71
76
  )
72
- return response_message
77
+
78
+ # Save conversation trace
79
+ tracer["chat_model"] = model_name
80
+ tracer["temperature"] = temperature
81
+ if in_debug_mode() or state.verbose > 1:
82
+ commit_conversation_trace(messages, response_text, tracer)
83
+
84
+ return response_text
73
85
 
74
86
 
75
87
  @retry(
@@ -88,17 +100,20 @@ def gemini_chat_completion_with_backoff(
88
100
  system_prompt,
89
101
  completion_func=None,
90
102
  model_kwargs=None,
103
+ tracer: dict = {},
91
104
  ):
92
105
  g = ThreadedGenerator(compiled_references, online_results, completion_func=completion_func)
93
106
  t = Thread(
94
107
  target=gemini_llm_thread,
95
- args=(g, messages, system_prompt, model_name, temperature, api_key, model_kwargs),
108
+ args=(g, messages, system_prompt, model_name, temperature, api_key, model_kwargs, tracer),
96
109
  )
97
110
  t.start()
98
111
  return g
99
112
 
100
113
 
101
- def gemini_llm_thread(g, messages, system_prompt, model_name, temperature, api_key, model_kwargs=None):
114
+ def gemini_llm_thread(
115
+ g, messages, system_prompt, model_name, temperature, api_key, model_kwargs=None, tracer: dict = {}
116
+ ):
102
117
  try:
103
118
  genai.configure(api_key=api_key)
104
119
  model_kwargs = model_kwargs or dict()
@@ -117,16 +132,25 @@ def gemini_llm_thread(g, messages, system_prompt, model_name, temperature, api_k
117
132
  },
118
133
  )
119
134
 
135
+ aggregated_response = ""
120
136
  formatted_messages = [{"role": message.role, "parts": message.content} for message in messages]
137
+
121
138
  # all messages up to the last are considered to be part of the chat history
122
139
  chat_session = model.start_chat(history=formatted_messages[0:-1])
123
140
  # the last message is considered to be the current prompt
124
141
  for chunk in chat_session.send_message(formatted_messages[-1]["parts"], stream=True):
125
142
  message, stopped = handle_gemini_response(chunk.candidates, chunk.prompt_feedback)
126
143
  message = message or chunk.text
144
+ aggregated_response += message
127
145
  g.send(message)
128
146
  if stopped:
129
147
  raise StopCandidateException(message)
148
+
149
+ # Save conversation trace
150
+ tracer["chat_model"] = model_name
151
+ tracer["temperature"] = temperature
152
+ if in_debug_mode() or state.verbose > 1:
153
+ commit_conversation_trace(messages, aggregated_response, tracer)
130
154
  except StopCandidateException as e:
131
155
  logger.warning(
132
156
  f"LLM Response Prevented for {model_name}: {e.args[0]}.\n"
@@ -1,5 +1,6 @@
1
1
  import json
2
2
  import logging
3
+ import os
3
4
  from datetime import datetime, timedelta
4
5
  from threading import Thread
5
6
  from typing import Any, Iterator, List, Optional, Union
@@ -12,12 +13,14 @@ from khoj.processor.conversation import prompts
12
13
  from khoj.processor.conversation.offline.utils import download_model
13
14
  from khoj.processor.conversation.utils import (
14
15
  ThreadedGenerator,
16
+ commit_conversation_trace,
15
17
  generate_chatml_messages_with_context,
16
18
  )
17
19
  from khoj.utils import state
18
20
  from khoj.utils.constants import empty_escape_sequences
19
- from khoj.utils.helpers import ConversationCommand, is_none_or_empty
21
+ from khoj.utils.helpers import ConversationCommand, in_debug_mode, is_none_or_empty
20
22
  from khoj.utils.rawconfig import LocationData
23
+ from khoj.utils.yaml import yaml_dump
21
24
 
22
25
  logger = logging.getLogger(__name__)
23
26
 
@@ -34,6 +37,7 @@ def extract_questions_offline(
34
37
  max_prompt_size: int = None,
35
38
  temperature: float = 0.7,
36
39
  personality_context: Optional[str] = None,
40
+ tracer: dict = {},
37
41
  ) -> List[str]:
38
42
  """
39
43
  Infer search queries to retrieve relevant notes to answer user query
@@ -94,6 +98,7 @@ def extract_questions_offline(
94
98
  max_prompt_size=max_prompt_size,
95
99
  temperature=temperature,
96
100
  response_type="json_object",
101
+ tracer=tracer,
97
102
  )
98
103
  finally:
99
104
  state.chat_lock.release()
@@ -135,7 +140,8 @@ def filter_questions(questions: List[str]):
135
140
  def converse_offline(
136
141
  user_query,
137
142
  references=[],
138
- online_results=[],
143
+ online_results={},
144
+ code_results={},
139
145
  conversation_log={},
140
146
  model: str = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
141
147
  loaded_model: Union[Any, None] = None,
@@ -146,6 +152,7 @@ def converse_offline(
146
152
  location_data: LocationData = None,
147
153
  user_name: str = None,
148
154
  agent: Agent = None,
155
+ tracer: dict = {},
149
156
  ) -> Union[ThreadedGenerator, Iterator[str]]:
150
157
  """
151
158
  Converse with user using Llama
@@ -153,8 +160,7 @@ def converse_offline(
153
160
  # Initialize Variables
154
161
  assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
155
162
  offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
156
- compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
157
-
163
+ tracer["chat_model"] = model
158
164
  current_date = datetime.now()
159
165
 
160
166
  if agent and agent.personality:
@@ -179,24 +185,25 @@ def converse_offline(
179
185
  system_prompt = f"{system_prompt}\n{user_name_prompt}"
180
186
 
181
187
  # Get Conversation Primer appropriate to Conversation Type
182
- if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(compiled_references):
188
+ if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
183
189
  return iter([prompts.no_notes_found.format()])
184
190
  elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
185
191
  completion_func(chat_response=prompts.no_online_results_found.format())
186
192
  return iter([prompts.no_online_results_found.format()])
187
193
 
188
194
  context_message = ""
189
- if not is_none_or_empty(compiled_references):
190
- context_message += f"{prompts.notes_conversation_offline.format(references=compiled_references)}\n\n"
195
+ if not is_none_or_empty(references):
196
+ context_message = f"{prompts.notes_conversation_offline.format(references=yaml_dump(references))}\n\n"
191
197
  if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
192
198
  simplified_online_results = online_results.copy()
193
199
  for result in online_results:
194
200
  if online_results[result].get("webpages"):
195
201
  simplified_online_results[result] = online_results[result]["webpages"]
196
202
 
197
- context_message += (
198
- f"{prompts.online_search_conversation_offline.format(online_results=str(simplified_online_results))}"
199
- )
203
+ context_message += f"{prompts.online_search_conversation_offline.format(online_results=yaml_dump(simplified_online_results))}\n\n"
204
+ if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results):
205
+ context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
206
+ context_message = context_message.strip()
200
207
 
201
208
  # Setup Prompt with Primer or Conversation History
202
209
  messages = generate_chatml_messages_with_context(
@@ -215,13 +222,14 @@ def converse_offline(
215
222
  logger.debug(f"Conversation Context for {model}: {truncated_messages}")
216
223
 
217
224
  g = ThreadedGenerator(references, online_results, completion_func=completion_func)
218
- t = Thread(target=llm_thread, args=(g, messages, offline_chat_model, max_prompt_size))
225
+ t = Thread(target=llm_thread, args=(g, messages, offline_chat_model, max_prompt_size, tracer))
219
226
  t.start()
220
227
  return g
221
228
 
222
229
 
223
- def llm_thread(g, messages: List[ChatMessage], model: Any, max_prompt_size: int = None):
230
+ def llm_thread(g, messages: List[ChatMessage], model: Any, max_prompt_size: int = None, tracer: dict = {}):
224
231
  stop_phrases = ["<s>", "INST]", "Notes:"]
232
+ aggregated_response = ""
225
233
 
226
234
  state.chat_lock.acquire()
227
235
  try:
@@ -229,7 +237,14 @@ def llm_thread(g, messages: List[ChatMessage], model: Any, max_prompt_size: int
229
237
  messages, loaded_model=model, stop=stop_phrases, max_prompt_size=max_prompt_size, streaming=True
230
238
  )
231
239
  for response in response_iterator:
232
- g.send(response["choices"][0]["delta"].get("content", ""))
240
+ response_delta = response["choices"][0]["delta"].get("content", "")
241
+ aggregated_response += response_delta
242
+ g.send(response_delta)
243
+
244
+ # Save conversation trace
245
+ if in_debug_mode() or state.verbose > 1:
246
+ commit_conversation_trace(messages, aggregated_response, tracer)
247
+
233
248
  finally:
234
249
  state.chat_lock.release()
235
250
  g.close()
@@ -244,14 +259,31 @@ def send_message_to_model_offline(
244
259
  stop=[],
245
260
  max_prompt_size: int = None,
246
261
  response_type: str = "text",
262
+ tracer: dict = {},
247
263
  ):
248
264
  assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
249
265
  offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
250
266
  messages_dict = [{"role": message.role, "content": message.content} for message in messages]
267
+ seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
251
268
  response = offline_chat_model.create_chat_completion(
252
- messages_dict, stop=stop, stream=streaming, temperature=temperature, response_format={"type": response_type}
269
+ messages_dict,
270
+ stop=stop,
271
+ stream=streaming,
272
+ temperature=temperature,
273
+ response_format={"type": response_type},
274
+ seed=seed,
253
275
  )
276
+
254
277
  if streaming:
255
278
  return response
256
- else:
257
- return response["choices"][0]["message"].get("content", "")
279
+
280
+ response_text = response["choices"][0]["message"].get("content", "")
281
+
282
+ # Save conversation trace for non-streaming responses
283
+ # Streamed responses need to be saved by the calling function
284
+ tracer["chat_model"] = model
285
+ tracer["temperature"] = temperature
286
+ if in_debug_mode() or state.verbose > 1:
287
+ commit_conversation_trace(messages, response_text, tracer)
288
+
289
+ return response_text
@@ -12,12 +12,13 @@ from khoj.processor.conversation.openai.utils import (
12
12
  completion_with_backoff,
13
13
  )
14
14
  from khoj.processor.conversation.utils import (
15
+ clean_json,
15
16
  construct_structured_message,
16
17
  generate_chatml_messages_with_context,
17
- remove_json_codeblock,
18
18
  )
19
19
  from khoj.utils.helpers import ConversationCommand, is_none_or_empty
20
20
  from khoj.utils.rawconfig import LocationData
21
+ from khoj.utils.yaml import yaml_dump
21
22
 
22
23
  logger = logging.getLogger(__name__)
23
24
 
@@ -33,6 +34,7 @@ def extract_questions(
33
34
  query_images: Optional[list[str]] = None,
34
35
  vision_enabled: bool = False,
35
36
  personality_context: Optional[str] = None,
37
+ tracer: dict = {},
36
38
  ):
37
39
  """
38
40
  Infer search queries to retrieve relevant notes to answer user query
@@ -82,13 +84,18 @@ def extract_questions(
82
84
  messages = [ChatMessage(content=prompt, role="user")]
83
85
 
84
86
  response = send_message_to_model(
85
- messages, api_key, model, response_type="json_object", api_base_url=api_base_url, temperature=temperature
87
+ messages,
88
+ api_key,
89
+ model,
90
+ response_type="json_object",
91
+ api_base_url=api_base_url,
92
+ temperature=temperature,
93
+ tracer=tracer,
86
94
  )
87
95
 
88
96
  # Extract, Clean Message from GPT's Response
89
97
  try:
90
- response = response.strip()
91
- response = remove_json_codeblock(response)
98
+ response = clean_json(response)
92
99
  response = json.loads(response)
93
100
  response = [q.strip() for q in response["queries"] if q.strip()]
94
101
  if not isinstance(response, list) or not response:
@@ -103,7 +110,9 @@ def extract_questions(
103
110
  return questions
104
111
 
105
112
 
106
- def send_message_to_model(messages, api_key, model, response_type="text", api_base_url=None, temperature=0):
113
+ def send_message_to_model(
114
+ messages, api_key, model, response_type="text", api_base_url=None, temperature=0, tracer: dict = {}
115
+ ):
107
116
  """
108
117
  Send message to model
109
118
  """
@@ -116,6 +125,7 @@ def send_message_to_model(messages, api_key, model, response_type="text", api_ba
116
125
  temperature=temperature,
117
126
  api_base_url=api_base_url,
118
127
  model_kwargs={"response_format": {"type": response_type}},
128
+ tracer=tracer,
119
129
  )
120
130
 
121
131
 
@@ -123,6 +133,7 @@ def converse(
123
133
  references,
124
134
  user_query,
125
135
  online_results: Optional[Dict[str, Dict]] = None,
136
+ code_results: Optional[Dict[str, Dict]] = None,
126
137
  conversation_log={},
127
138
  model: str = "gpt-4o-mini",
128
139
  api_key: Optional[str] = None,
@@ -137,13 +148,13 @@ def converse(
137
148
  agent: Agent = None,
138
149
  query_images: Optional[list[str]] = None,
139
150
  vision_available: bool = False,
151
+ tracer: dict = {},
140
152
  ):
141
153
  """
142
154
  Converse with user using OpenAI's ChatGPT
143
155
  """
144
156
  # Initialize Variables
145
157
  current_date = datetime.now()
146
- compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
147
158
 
148
159
  if agent and agent.personality:
149
160
  system_prompt = prompts.custom_personality.format(
@@ -167,7 +178,7 @@ def converse(
167
178
  system_prompt = f"{system_prompt}\n{user_name_prompt}"
168
179
 
169
180
  # Get Conversation Primer appropriate to Conversation Type
170
- if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(compiled_references):
181
+ if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
171
182
  completion_func(chat_response=prompts.no_notes_found.format())
172
183
  return iter([prompts.no_notes_found.format()])
173
184
  elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
@@ -175,10 +186,13 @@ def converse(
175
186
  return iter([prompts.no_online_results_found.format()])
176
187
 
177
188
  context_message = ""
178
- if not is_none_or_empty(compiled_references):
179
- context_message = f"{prompts.notes_conversation.format(references=compiled_references)}\n\n"
189
+ if not is_none_or_empty(references):
190
+ context_message = f"{prompts.notes_conversation.format(references=yaml_dump(references))}\n\n"
180
191
  if not is_none_or_empty(online_results):
181
- context_message += f"{prompts.online_search_conversation.format(online_results=str(online_results))}"
192
+ context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
193
+ if not is_none_or_empty(code_results):
194
+ context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
195
+ context_message = context_message.strip()
182
196
 
183
197
  # Setup Prompt with Primer or Conversation History
184
198
  messages = generate_chatml_messages_with_context(
@@ -207,4 +221,5 @@ def converse(
207
221
  api_base_url=api_base_url,
208
222
  completion_func=completion_func,
209
223
  model_kwargs={"stop": ["Notes:\n["]},
224
+ tracer=tracer,
210
225
  )
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import os
2
3
  from threading import Thread
3
4
  from typing import Dict
4
5
 
@@ -12,7 +13,12 @@ from tenacity import (
12
13
  wait_random_exponential,
13
14
  )
14
15
 
15
- from khoj.processor.conversation.utils import ThreadedGenerator
16
+ from khoj.processor.conversation.utils import (
17
+ ThreadedGenerator,
18
+ commit_conversation_trace,
19
+ )
20
+ from khoj.utils import state
21
+ from khoj.utils.helpers import in_debug_mode
16
22
 
17
23
  logger = logging.getLogger(__name__)
18
24
 
@@ -33,7 +39,7 @@ openai_clients: Dict[str, openai.OpenAI] = {}
33
39
  reraise=True,
34
40
  )
35
41
  def completion_with_backoff(
36
- messages, model, temperature=0, openai_api_key=None, api_base_url=None, model_kwargs=None
42
+ messages, model, temperature=0, openai_api_key=None, api_base_url=None, model_kwargs=None, tracer: dict = {}
37
43
  ) -> str:
38
44
  client_key = f"{openai_api_key}--{api_base_url}"
39
45
  client: openai.OpenAI | None = openai_clients.get(client_key)
@@ -55,6 +61,9 @@ def completion_with_backoff(
55
61
  model_kwargs.pop("stop", None)
56
62
  model_kwargs.pop("response_format", None)
57
63
 
64
+ if os.getenv("KHOJ_LLM_SEED"):
65
+ model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
66
+
58
67
  chat = client.chat.completions.create(
59
68
  stream=stream,
60
69
  messages=formatted_messages, # type: ignore
@@ -77,6 +86,12 @@ def completion_with_backoff(
77
86
  elif delta_chunk.content:
78
87
  aggregated_response += delta_chunk.content
79
88
 
89
+ # Save conversation trace
90
+ tracer["chat_model"] = model
91
+ tracer["temperature"] = temperature
92
+ if in_debug_mode() or state.verbose > 1:
93
+ commit_conversation_trace(messages, aggregated_response, tracer)
94
+
80
95
  return aggregated_response
81
96
 
82
97
 
@@ -103,26 +118,37 @@ def chat_completion_with_backoff(
103
118
  api_base_url=None,
104
119
  completion_func=None,
105
120
  model_kwargs=None,
121
+ tracer: dict = {},
106
122
  ):
107
123
  g = ThreadedGenerator(compiled_references, online_results, completion_func=completion_func)
108
124
  t = Thread(
109
- target=llm_thread, args=(g, messages, model_name, temperature, openai_api_key, api_base_url, model_kwargs)
125
+ target=llm_thread,
126
+ args=(g, messages, model_name, temperature, openai_api_key, api_base_url, model_kwargs, tracer),
110
127
  )
111
128
  t.start()
112
129
  return g
113
130
 
114
131
 
115
- def llm_thread(g, messages, model_name, temperature, openai_api_key=None, api_base_url=None, model_kwargs=None):
132
+ def llm_thread(
133
+ g,
134
+ messages,
135
+ model_name,
136
+ temperature,
137
+ openai_api_key=None,
138
+ api_base_url=None,
139
+ model_kwargs=None,
140
+ tracer: dict = {},
141
+ ):
116
142
  try:
117
143
  client_key = f"{openai_api_key}--{api_base_url}"
118
144
  if client_key not in openai_clients:
119
- client: openai.OpenAI = openai.OpenAI(
145
+ client = openai.OpenAI(
120
146
  api_key=openai_api_key,
121
147
  base_url=api_base_url,
122
148
  )
123
149
  openai_clients[client_key] = client
124
150
  else:
125
- client: openai.OpenAI = openai_clients[client_key]
151
+ client = openai_clients[client_key]
126
152
 
127
153
  formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
128
154
  stream = True
@@ -135,6 +161,9 @@ def llm_thread(g, messages, model_name, temperature, openai_api_key=None, api_ba
135
161
  model_kwargs.pop("stop", None)
136
162
  model_kwargs.pop("response_format", None)
137
163
 
164
+ if os.getenv("KHOJ_LLM_SEED"):
165
+ model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
166
+
138
167
  chat = client.chat.completions.create(
139
168
  stream=stream,
140
169
  messages=formatted_messages,
@@ -144,17 +173,29 @@ def llm_thread(g, messages, model_name, temperature, openai_api_key=None, api_ba
144
173
  **(model_kwargs or dict()),
145
174
  )
146
175
 
176
+ aggregated_response = ""
147
177
  if not stream:
148
- g.send(chat.choices[0].message.content)
178
+ aggregated_response = chat.choices[0].message.content
179
+ g.send(aggregated_response)
149
180
  else:
150
181
  for chunk in chat:
151
182
  if len(chunk.choices) == 0:
152
183
  continue
153
184
  delta_chunk = chunk.choices[0].delta
185
+ text_chunk = ""
154
186
  if isinstance(delta_chunk, str):
155
- g.send(delta_chunk)
187
+ text_chunk = delta_chunk
156
188
  elif delta_chunk.content:
157
- g.send(delta_chunk.content)
189
+ text_chunk = delta_chunk.content
190
+ if text_chunk:
191
+ aggregated_response += text_chunk
192
+ g.send(text_chunk)
193
+
194
+ # Save conversation trace
195
+ tracer["chat_model"] = model_name
196
+ tracer["temperature"] = temperature
197
+ if in_debug_mode() or state.verbose > 1:
198
+ commit_conversation_trace(messages, aggregated_response, tracer)
158
199
  except Exception as e:
159
200
  logger.error(f"Error in llm_thread: {e}", exc_info=True)
160
201
  finally: