khoj 1.27.2.dev15__py3-none-any.whl → 1.28.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. khoj/configure.py +1 -1
  2. khoj/database/adapters/__init__.py +50 -12
  3. khoj/interface/compiled/404/index.html +1 -1
  4. khoj/interface/compiled/_next/static/chunks/1034-da58b679fcbb79c1.js +1 -0
  5. khoj/interface/compiled/_next/static/chunks/1467-b331e469fe411347.js +1 -0
  6. khoj/interface/compiled/_next/static/chunks/1603-c1568f45947e9f2c.js +1 -0
  7. khoj/interface/compiled/_next/static/chunks/3423-ff7402ae1dd66592.js +1 -0
  8. khoj/interface/compiled/_next/static/chunks/8423-e80647edf6c92c27.js +1 -0
  9. khoj/interface/compiled/_next/static/chunks/app/agents/{page-2beaba7c9bb750bd.js → page-fc492762298e975e.js} +1 -1
  10. khoj/interface/compiled/_next/static/chunks/app/automations/{page-9b5c77e0b0dd772c.js → page-416ee13a00575c39.js} +1 -1
  11. khoj/interface/compiled/_next/static/chunks/app/chat/page-c70f5b0c722d7627.js +1 -0
  12. khoj/interface/compiled/_next/static/chunks/app/factchecker/page-1541d90140794f63.js +1 -0
  13. khoj/interface/compiled/_next/static/chunks/app/{page-4b6008223ea79955.js → page-b269e444fc067759.js} +1 -1
  14. khoj/interface/compiled/_next/static/chunks/app/search/{page-ab2995529ece3140.js → page-7d431ce8e565c7c3.js} +1 -1
  15. khoj/interface/compiled/_next/static/chunks/app/settings/{page-7946cabb9c54e22d.js → page-95f56e53f48f0289.js} +1 -1
  16. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-6a01e07fb244c10c.js → page-4eba6154f7bb9771.js} +1 -1
  17. khoj/interface/compiled/_next/static/chunks/{webpack-878569182b3af4c6.js → webpack-33a82ccca02cd2b8.js} +1 -1
  18. khoj/interface/compiled/_next/static/css/2196fae09c2f906e.css +1 -0
  19. khoj/interface/compiled/_next/static/css/6bde1f2045622ef7.css +1 -0
  20. khoj/interface/compiled/_next/static/css/a795ee88875f4853.css +25 -0
  21. khoj/interface/compiled/_next/static/css/ebef43da1c0651d5.css +1 -0
  22. khoj/interface/compiled/agents/index.html +1 -1
  23. khoj/interface/compiled/agents/index.txt +2 -2
  24. khoj/interface/compiled/automations/index.html +1 -1
  25. khoj/interface/compiled/automations/index.txt +2 -2
  26. khoj/interface/compiled/chat/index.html +1 -1
  27. khoj/interface/compiled/chat/index.txt +2 -2
  28. khoj/interface/compiled/factchecker/index.html +1 -1
  29. khoj/interface/compiled/factchecker/index.txt +2 -2
  30. khoj/interface/compiled/index.html +1 -1
  31. khoj/interface/compiled/index.txt +2 -2
  32. khoj/interface/compiled/search/index.html +1 -1
  33. khoj/interface/compiled/search/index.txt +2 -2
  34. khoj/interface/compiled/settings/index.html +1 -1
  35. khoj/interface/compiled/settings/index.txt +2 -2
  36. khoj/interface/compiled/share/chat/index.html +1 -1
  37. khoj/interface/compiled/share/chat/index.txt +2 -2
  38. khoj/processor/conversation/anthropic/anthropic_chat.py +19 -10
  39. khoj/processor/conversation/anthropic/utils.py +37 -6
  40. khoj/processor/conversation/google/gemini_chat.py +23 -13
  41. khoj/processor/conversation/google/utils.py +34 -10
  42. khoj/processor/conversation/offline/chat_model.py +48 -16
  43. khoj/processor/conversation/openai/gpt.py +25 -10
  44. khoj/processor/conversation/openai/utils.py +50 -9
  45. khoj/processor/conversation/prompts.py +156 -65
  46. khoj/processor/conversation/utils.py +306 -6
  47. khoj/processor/embeddings.py +4 -4
  48. khoj/processor/image/generate.py +2 -0
  49. khoj/processor/tools/online_search.py +27 -12
  50. khoj/processor/tools/run_code.py +144 -0
  51. khoj/routers/api.py +11 -6
  52. khoj/routers/api_chat.py +213 -111
  53. khoj/routers/helpers.py +171 -60
  54. khoj/routers/research.py +320 -0
  55. khoj/search_filter/date_filter.py +1 -3
  56. khoj/search_filter/file_filter.py +1 -2
  57. khoj/search_type/text_search.py +3 -3
  58. khoj/utils/helpers.py +24 -2
  59. khoj/utils/yaml.py +4 -0
  60. {khoj-1.27.2.dev15.dist-info → khoj-1.28.1.dist-info}/METADATA +3 -2
  61. {khoj-1.27.2.dev15.dist-info → khoj-1.28.1.dist-info}/RECORD +68 -65
  62. khoj/interface/compiled/_next/static/chunks/1603-b9d95833e0e025e8.js +0 -1
  63. khoj/interface/compiled/_next/static/chunks/2697-61fcba89fd87eab4.js +0 -1
  64. khoj/interface/compiled/_next/static/chunks/3423-0b533af8bf6ac218.js +0 -1
  65. khoj/interface/compiled/_next/static/chunks/9479-ff7d8c4dae2014d1.js +0 -1
  66. khoj/interface/compiled/_next/static/chunks/app/chat/page-151232d8417a1ea1.js +0 -1
  67. khoj/interface/compiled/_next/static/chunks/app/factchecker/page-798904432c2417c4.js +0 -1
  68. khoj/interface/compiled/_next/static/css/2272c73fc7a3b571.css +0 -1
  69. khoj/interface/compiled/_next/static/css/553f9cdcc7a2bcd6.css +0 -1
  70. khoj/interface/compiled/_next/static/css/76d55eb435962b19.css +0 -25
  71. khoj/interface/compiled/_next/static/css/d738728883c68af8.css +0 -1
  72. /khoj/interface/compiled/_next/static/{vcyFRDGArOFXwUVotHIuv → JcTomiF3o0dIo4RxHR9Vu}/_buildManifest.js +0 -0
  73. /khoj/interface/compiled/_next/static/{vcyFRDGArOFXwUVotHIuv → JcTomiF3o0dIo4RxHR9Vu}/_ssgManifest.js +0 -0
  74. /khoj/interface/compiled/_next/static/chunks/{1970-60c96aed937a4928.js → 1970-90dd510762d820ba.js} +0 -0
  75. /khoj/interface/compiled/_next/static/chunks/{9417-2ca87207387fc790.js → 9417-951f46451a8dd6d7.js} +0 -0
  76. {khoj-1.27.2.dev15.dist-info → khoj-1.28.1.dist-info}/WHEEL +0 -0
  77. {khoj-1.27.2.dev15.dist-info → khoj-1.28.1.dist-info}/entry_points.txt +0 -0
  78. {khoj-1.27.2.dev15.dist-info → khoj-1.28.1.dist-info}/licenses/LICENSE +0 -0
@@ -14,11 +14,13 @@ from khoj.processor.conversation.anthropic.utils import (
14
14
  format_messages_for_anthropic,
15
15
  )
16
16
  from khoj.processor.conversation.utils import (
17
+ clean_json,
17
18
  construct_structured_message,
18
19
  generate_chatml_messages_with_context,
19
20
  )
20
21
  from khoj.utils.helpers import ConversationCommand, is_none_or_empty
21
22
  from khoj.utils.rawconfig import LocationData
23
+ from khoj.utils.yaml import yaml_dump
22
24
 
23
25
  logger = logging.getLogger(__name__)
24
26
 
@@ -34,6 +36,7 @@ def extract_questions_anthropic(
34
36
  query_images: Optional[list[str]] = None,
35
37
  vision_enabled: bool = False,
36
38
  personality_context: Optional[str] = None,
39
+ tracer: dict = {},
37
40
  ):
38
41
  """
39
42
  Infer search queries to retrieve relevant notes to answer user query
@@ -89,14 +92,13 @@ def extract_questions_anthropic(
89
92
  model_name=model,
90
93
  temperature=temperature,
91
94
  api_key=api_key,
95
+ response_type="json_object",
96
+ tracer=tracer,
92
97
  )
93
98
 
94
99
  # Extract, Clean Message from Claude's Response
95
100
  try:
96
- response = response.strip()
97
- match = re.search(r"\{.*?\}", response)
98
- if match:
99
- response = match.group()
101
+ response = clean_json(response)
100
102
  response = json.loads(response)
101
103
  response = [q.strip() for q in response["queries"] if q.strip()]
102
104
  if not isinstance(response, list) or not response:
@@ -110,7 +112,7 @@ def extract_questions_anthropic(
110
112
  return questions
111
113
 
112
114
 
113
- def anthropic_send_message_to_model(messages, api_key, model):
115
+ def anthropic_send_message_to_model(messages, api_key, model, response_type="text", tracer={}):
114
116
  """
115
117
  Send message to model
116
118
  """
@@ -122,6 +124,8 @@ def anthropic_send_message_to_model(messages, api_key, model):
122
124
  system_prompt=system_prompt,
123
125
  model_name=model,
124
126
  api_key=api_key,
127
+ response_type=response_type,
128
+ tracer=tracer,
125
129
  )
126
130
 
127
131
 
@@ -129,6 +133,7 @@ def converse_anthropic(
129
133
  references,
130
134
  user_query,
131
135
  online_results: Optional[Dict[str, Dict]] = None,
136
+ code_results: Optional[Dict[str, Dict]] = None,
132
137
  conversation_log={},
133
138
  model: Optional[str] = "claude-3-5-sonnet-20241022",
134
139
  api_key: Optional[str] = None,
@@ -141,13 +146,13 @@ def converse_anthropic(
141
146
  agent: Agent = None,
142
147
  query_images: Optional[list[str]] = None,
143
148
  vision_available: bool = False,
149
+ tracer: dict = {},
144
150
  ):
145
151
  """
146
152
  Converse with user using Anthropic's Claude
147
153
  """
148
154
  # Initialize Variables
149
155
  current_date = datetime.now()
150
- compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
151
156
 
152
157
  if agent and agent.personality:
153
158
  system_prompt = prompts.custom_personality.format(
@@ -171,7 +176,7 @@ def converse_anthropic(
171
176
  system_prompt = f"{system_prompt}\n{user_name_prompt}"
172
177
 
173
178
  # Get Conversation Primer appropriate to Conversation Type
174
- if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(compiled_references):
179
+ if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
175
180
  completion_func(chat_response=prompts.no_notes_found.format())
176
181
  return iter([prompts.no_notes_found.format()])
177
182
  elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
@@ -179,10 +184,13 @@ def converse_anthropic(
179
184
  return iter([prompts.no_online_results_found.format()])
180
185
 
181
186
  context_message = ""
182
- if not is_none_or_empty(compiled_references):
183
- context_message = f"{prompts.notes_conversation.format(query=user_query, references=compiled_references)}\n\n"
187
+ if not is_none_or_empty(references):
188
+ context_message = f"{prompts.notes_conversation.format(query=user_query, references=yaml_dump(references))}\n\n"
184
189
  if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
185
- context_message += f"{prompts.online_search_conversation.format(online_results=str(online_results))}"
190
+ context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
191
+ if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results):
192
+ context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
193
+ context_message = context_message.strip()
186
194
 
187
195
  # Setup Prompt with Primer or Conversation History
188
196
  messages = generate_chatml_messages_with_context(
@@ -213,4 +221,5 @@ def converse_anthropic(
213
221
  system_prompt=system_prompt,
214
222
  completion_func=completion_func,
215
223
  max_prompt_size=max_prompt_size,
224
+ tracer=tracer,
216
225
  )
@@ -12,8 +12,13 @@ from tenacity import (
12
12
  wait_random_exponential,
13
13
  )
14
14
 
15
- from khoj.processor.conversation.utils import ThreadedGenerator, get_image_from_url
16
- from khoj.utils.helpers import is_none_or_empty
15
+ from khoj.processor.conversation.utils import (
16
+ ThreadedGenerator,
17
+ commit_conversation_trace,
18
+ get_image_from_url,
19
+ )
20
+ from khoj.utils import state
21
+ from khoj.utils.helpers import in_debug_mode, is_none_or_empty
17
22
 
18
23
  logger = logging.getLogger(__name__)
19
24
 
@@ -30,7 +35,15 @@ DEFAULT_MAX_TOKENS_ANTHROPIC = 3000
30
35
  reraise=True,
31
36
  )
32
37
  def anthropic_completion_with_backoff(
33
- messages, system_prompt, model_name, temperature=0, api_key=None, model_kwargs=None, max_tokens=None
38
+ messages,
39
+ system_prompt,
40
+ model_name,
41
+ temperature=0,
42
+ api_key=None,
43
+ model_kwargs=None,
44
+ max_tokens=None,
45
+ response_type="text",
46
+ tracer={},
34
47
  ) -> str:
35
48
  if api_key not in anthropic_clients:
36
49
  client: anthropic.Anthropic = anthropic.Anthropic(api_key=api_key)
@@ -39,8 +52,11 @@ def anthropic_completion_with_backoff(
39
52
  client = anthropic_clients[api_key]
40
53
 
41
54
  formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
55
+ if response_type == "json_object":
56
+ # Prefill model response with '{' to make it output a valid JSON object
57
+ formatted_messages += [{"role": "assistant", "content": "{"}]
42
58
 
43
- aggregated_response = ""
59
+ aggregated_response = "{" if response_type == "json_object" else ""
44
60
  max_tokens = max_tokens or DEFAULT_MAX_TOKENS_ANTHROPIC
45
61
 
46
62
  model_kwargs = model_kwargs or dict()
@@ -58,6 +74,12 @@ def anthropic_completion_with_backoff(
58
74
  for text in stream.text_stream:
59
75
  aggregated_response += text
60
76
 
77
+ # Save conversation trace
78
+ tracer["chat_model"] = model_name
79
+ tracer["temperature"] = temperature
80
+ if in_debug_mode() or state.verbose > 1:
81
+ commit_conversation_trace(messages, aggregated_response, tracer)
82
+
61
83
  return aggregated_response
62
84
 
63
85
 
@@ -78,18 +100,19 @@ def anthropic_chat_completion_with_backoff(
78
100
  max_prompt_size=None,
79
101
  completion_func=None,
80
102
  model_kwargs=None,
103
+ tracer={},
81
104
  ):
82
105
  g = ThreadedGenerator(compiled_references, online_results, completion_func=completion_func)
83
106
  t = Thread(
84
107
  target=anthropic_llm_thread,
85
- args=(g, messages, system_prompt, model_name, temperature, api_key, max_prompt_size, model_kwargs),
108
+ args=(g, messages, system_prompt, model_name, temperature, api_key, max_prompt_size, model_kwargs, tracer),
86
109
  )
87
110
  t.start()
88
111
  return g
89
112
 
90
113
 
91
114
  def anthropic_llm_thread(
92
- g, messages, system_prompt, model_name, temperature, api_key, max_prompt_size=None, model_kwargs=None
115
+ g, messages, system_prompt, model_name, temperature, api_key, max_prompt_size=None, model_kwargs=None, tracer={}
93
116
  ):
94
117
  try:
95
118
  if api_key not in anthropic_clients:
@@ -102,6 +125,7 @@ def anthropic_llm_thread(
102
125
  anthropic.types.MessageParam(role=message.role, content=message.content) for message in messages
103
126
  ]
104
127
 
128
+ aggregated_response = ""
105
129
  with client.messages.stream(
106
130
  messages=formatted_messages,
107
131
  model=model_name, # type: ignore
@@ -112,7 +136,14 @@ def anthropic_llm_thread(
112
136
  **(model_kwargs or dict()),
113
137
  ) as stream:
114
138
  for text in stream.text_stream:
139
+ aggregated_response += text
115
140
  g.send(text)
141
+
142
+ # Save conversation trace
143
+ tracer["chat_model"] = model_name
144
+ tracer["temperature"] = temperature
145
+ if in_debug_mode() or state.verbose > 1:
146
+ commit_conversation_trace(messages, aggregated_response, tracer)
116
147
  except Exception as e:
117
148
  logger.error(f"Error in anthropic_llm_thread: {e}", exc_info=True)
118
149
  finally:
@@ -14,11 +14,13 @@ from khoj.processor.conversation.google.utils import (
14
14
  gemini_completion_with_backoff,
15
15
  )
16
16
  from khoj.processor.conversation.utils import (
17
+ clean_json,
17
18
  construct_structured_message,
18
19
  generate_chatml_messages_with_context,
19
20
  )
20
21
  from khoj.utils.helpers import ConversationCommand, is_none_or_empty
21
22
  from khoj.utils.rawconfig import LocationData
23
+ from khoj.utils.yaml import yaml_dump
22
24
 
23
25
  logger = logging.getLogger(__name__)
24
26
 
@@ -35,6 +37,7 @@ def extract_questions_gemini(
35
37
  query_images: Optional[list[str]] = None,
36
38
  vision_enabled: bool = False,
37
39
  personality_context: Optional[str] = None,
40
+ tracer: dict = {},
38
41
  ):
39
42
  """
40
43
  Infer search queries to retrieve relevant notes to answer user query
@@ -85,15 +88,12 @@ def extract_questions_gemini(
85
88
  messages = [ChatMessage(content=prompt, role="user"), ChatMessage(content=system_prompt, role="system")]
86
89
 
87
90
  response = gemini_send_message_to_model(
88
- messages, api_key, model, response_type="json_object", temperature=temperature
91
+ messages, api_key, model, response_type="json_object", temperature=temperature, tracer=tracer
89
92
  )
90
93
 
91
94
  # Extract, Clean Message from Gemini's Response
92
95
  try:
93
- response = response.strip()
94
- match = re.search(r"\{.*?\}", response)
95
- if match:
96
- response = match.group()
96
+ response = clean_json(response)
97
97
  response = json.loads(response)
98
98
  response = [q.strip() for q in response["queries"] if q.strip()]
99
99
  if not isinstance(response, list) or not response:
@@ -107,15 +107,19 @@ def extract_questions_gemini(
107
107
  return questions
108
108
 
109
109
 
110
- def gemini_send_message_to_model(messages, api_key, model, response_type="text", temperature=0, model_kwargs=None):
110
+ def gemini_send_message_to_model(
111
+ messages, api_key, model, response_type="text", temperature=0, model_kwargs=None, tracer={}
112
+ ):
111
113
  """
112
114
  Send message to model
113
115
  """
114
116
  messages, system_prompt = format_messages_for_gemini(messages)
115
117
 
116
118
  model_kwargs = {}
117
- if response_type == "json_object":
118
- model_kwargs["response_mime_type"] = "application/json"
119
+
120
+ # Sometimes, this causes unwanted behavior and terminates response early. Disable for now while it's flaky.
121
+ # if response_type == "json_object":
122
+ # model_kwargs["response_mime_type"] = "application/json"
119
123
 
120
124
  # Get Response from Gemini
121
125
  return gemini_completion_with_backoff(
@@ -125,6 +129,7 @@ def gemini_send_message_to_model(messages, api_key, model, response_type="text",
125
129
  api_key=api_key,
126
130
  temperature=temperature,
127
131
  model_kwargs=model_kwargs,
132
+ tracer=tracer,
128
133
  )
129
134
 
130
135
 
@@ -132,6 +137,7 @@ def converse_gemini(
132
137
  references,
133
138
  user_query,
134
139
  online_results: Optional[Dict[str, Dict]] = None,
140
+ code_results: Optional[Dict[str, Dict]] = None,
135
141
  conversation_log={},
136
142
  model: Optional[str] = "gemini-1.5-flash",
137
143
  api_key: Optional[str] = None,
@@ -145,13 +151,13 @@ def converse_gemini(
145
151
  agent: Agent = None,
146
152
  query_images: Optional[list[str]] = None,
147
153
  vision_available: bool = False,
154
+ tracer={},
148
155
  ):
149
156
  """
150
157
  Converse with user using Google's Gemini
151
158
  """
152
159
  # Initialize Variables
153
160
  current_date = datetime.now()
154
- compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
155
161
 
156
162
  if agent and agent.personality:
157
163
  system_prompt = prompts.custom_personality.format(
@@ -176,7 +182,7 @@ def converse_gemini(
176
182
  system_prompt = f"{system_prompt}\n{user_name_prompt}"
177
183
 
178
184
  # Get Conversation Primer appropriate to Conversation Type
179
- if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(compiled_references):
185
+ if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
180
186
  completion_func(chat_response=prompts.no_notes_found.format())
181
187
  return iter([prompts.no_notes_found.format()])
182
188
  elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
@@ -184,10 +190,13 @@ def converse_gemini(
184
190
  return iter([prompts.no_online_results_found.format()])
185
191
 
186
192
  context_message = ""
187
- if not is_none_or_empty(compiled_references):
188
- context_message = f"{prompts.notes_conversation.format(query=user_query, references=compiled_references)}\n\n"
193
+ if not is_none_or_empty(references):
194
+ context_message = f"{prompts.notes_conversation.format(query=user_query, references=yaml_dump(references))}\n\n"
189
195
  if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
190
- context_message += f"{prompts.online_search_conversation.format(online_results=str(online_results))}"
196
+ context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
197
+ if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results):
198
+ context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
199
+ context_message = context_message.strip()
191
200
 
192
201
  # Setup Prompt with Primer or Conversation History
193
202
  messages = generate_chatml_messages_with_context(
@@ -217,4 +226,5 @@ def converse_gemini(
217
226
  api_key=api_key,
218
227
  system_prompt=system_prompt,
219
228
  completion_func=completion_func,
229
+ tracer=tracer,
220
230
  )
@@ -19,8 +19,13 @@ from tenacity import (
19
19
  wait_random_exponential,
20
20
  )
21
21
 
22
- from khoj.processor.conversation.utils import ThreadedGenerator, get_image_from_url
23
- from khoj.utils.helpers import is_none_or_empty
22
+ from khoj.processor.conversation.utils import (
23
+ ThreadedGenerator,
24
+ commit_conversation_trace,
25
+ get_image_from_url,
26
+ )
27
+ from khoj.utils import state
28
+ from khoj.utils.helpers import in_debug_mode, is_none_or_empty
24
29
 
25
30
  logger = logging.getLogger(__name__)
26
31
 
@@ -35,7 +40,7 @@ MAX_OUTPUT_TOKENS_GEMINI = 8192
35
40
  reraise=True,
36
41
  )
37
42
  def gemini_completion_with_backoff(
38
- messages, system_prompt, model_name, temperature=0, api_key=None, model_kwargs=None
43
+ messages, system_prompt, model_name, temperature=0, api_key=None, model_kwargs=None, tracer={}
39
44
  ) -> str:
40
45
  genai.configure(api_key=api_key)
41
46
  model_kwargs = model_kwargs or dict()
@@ -60,16 +65,23 @@ def gemini_completion_with_backoff(
60
65
 
61
66
  try:
62
67
  # Generate the response. The last message is considered to be the current prompt
63
- aggregated_response = chat_session.send_message(formatted_messages[-1]["parts"])
64
- return aggregated_response.text
68
+ response = chat_session.send_message(formatted_messages[-1]["parts"])
69
+ response_text = response.text
65
70
  except StopCandidateException as e:
66
- response_message, _ = handle_gemini_response(e.args)
71
+ response_text, _ = handle_gemini_response(e.args)
67
72
  # Respond with reason for stopping
68
73
  logger.warning(
69
- f"LLM Response Prevented for {model_name}: {response_message}.\n"
74
+ f"LLM Response Prevented for {model_name}: {response_text}.\n"
70
75
  + f"Last Message by {messages[-1].role}: {messages[-1].content}"
71
76
  )
72
- return response_message
77
+
78
+ # Save conversation trace
79
+ tracer["chat_model"] = model_name
80
+ tracer["temperature"] = temperature
81
+ if in_debug_mode() or state.verbose > 1:
82
+ commit_conversation_trace(messages, response_text, tracer)
83
+
84
+ return response_text
73
85
 
74
86
 
75
87
  @retry(
@@ -88,17 +100,20 @@ def gemini_chat_completion_with_backoff(
88
100
  system_prompt,
89
101
  completion_func=None,
90
102
  model_kwargs=None,
103
+ tracer: dict = {},
91
104
  ):
92
105
  g = ThreadedGenerator(compiled_references, online_results, completion_func=completion_func)
93
106
  t = Thread(
94
107
  target=gemini_llm_thread,
95
- args=(g, messages, system_prompt, model_name, temperature, api_key, model_kwargs),
108
+ args=(g, messages, system_prompt, model_name, temperature, api_key, model_kwargs, tracer),
96
109
  )
97
110
  t.start()
98
111
  return g
99
112
 
100
113
 
101
- def gemini_llm_thread(g, messages, system_prompt, model_name, temperature, api_key, model_kwargs=None):
114
+ def gemini_llm_thread(
115
+ g, messages, system_prompt, model_name, temperature, api_key, model_kwargs=None, tracer: dict = {}
116
+ ):
102
117
  try:
103
118
  genai.configure(api_key=api_key)
104
119
  model_kwargs = model_kwargs or dict()
@@ -117,16 +132,25 @@ def gemini_llm_thread(g, messages, system_prompt, model_name, temperature, api_k
117
132
  },
118
133
  )
119
134
 
135
+ aggregated_response = ""
120
136
  formatted_messages = [{"role": message.role, "parts": message.content} for message in messages]
137
+
121
138
  # all messages up to the last are considered to be part of the chat history
122
139
  chat_session = model.start_chat(history=formatted_messages[0:-1])
123
140
  # the last message is considered to be the current prompt
124
141
  for chunk in chat_session.send_message(formatted_messages[-1]["parts"], stream=True):
125
142
  message, stopped = handle_gemini_response(chunk.candidates, chunk.prompt_feedback)
126
143
  message = message or chunk.text
144
+ aggregated_response += message
127
145
  g.send(message)
128
146
  if stopped:
129
147
  raise StopCandidateException(message)
148
+
149
+ # Save conversation trace
150
+ tracer["chat_model"] = model_name
151
+ tracer["temperature"] = temperature
152
+ if in_debug_mode() or state.verbose > 1:
153
+ commit_conversation_trace(messages, aggregated_response, tracer)
130
154
  except StopCandidateException as e:
131
155
  logger.warning(
132
156
  f"LLM Response Prevented for {model_name}: {e.args[0]}.\n"
@@ -1,5 +1,6 @@
1
1
  import json
2
2
  import logging
3
+ import os
3
4
  from datetime import datetime, timedelta
4
5
  from threading import Thread
5
6
  from typing import Any, Iterator, List, Optional, Union
@@ -12,12 +13,14 @@ from khoj.processor.conversation import prompts
12
13
  from khoj.processor.conversation.offline.utils import download_model
13
14
  from khoj.processor.conversation.utils import (
14
15
  ThreadedGenerator,
16
+ commit_conversation_trace,
15
17
  generate_chatml_messages_with_context,
16
18
  )
17
19
  from khoj.utils import state
18
20
  from khoj.utils.constants import empty_escape_sequences
19
- from khoj.utils.helpers import ConversationCommand, is_none_or_empty
21
+ from khoj.utils.helpers import ConversationCommand, in_debug_mode, is_none_or_empty
20
22
  from khoj.utils.rawconfig import LocationData
23
+ from khoj.utils.yaml import yaml_dump
21
24
 
22
25
  logger = logging.getLogger(__name__)
23
26
 
@@ -34,6 +37,7 @@ def extract_questions_offline(
34
37
  max_prompt_size: int = None,
35
38
  temperature: float = 0.7,
36
39
  personality_context: Optional[str] = None,
40
+ tracer: dict = {},
37
41
  ) -> List[str]:
38
42
  """
39
43
  Infer search queries to retrieve relevant notes to answer user query
@@ -94,6 +98,7 @@ def extract_questions_offline(
94
98
  max_prompt_size=max_prompt_size,
95
99
  temperature=temperature,
96
100
  response_type="json_object",
101
+ tracer=tracer,
97
102
  )
98
103
  finally:
99
104
  state.chat_lock.release()
@@ -135,7 +140,8 @@ def filter_questions(questions: List[str]):
135
140
  def converse_offline(
136
141
  user_query,
137
142
  references=[],
138
- online_results=[],
143
+ online_results={},
144
+ code_results={},
139
145
  conversation_log={},
140
146
  model: str = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
141
147
  loaded_model: Union[Any, None] = None,
@@ -146,6 +152,7 @@ def converse_offline(
146
152
  location_data: LocationData = None,
147
153
  user_name: str = None,
148
154
  agent: Agent = None,
155
+ tracer: dict = {},
149
156
  ) -> Union[ThreadedGenerator, Iterator[str]]:
150
157
  """
151
158
  Converse with user using Llama
@@ -153,8 +160,7 @@ def converse_offline(
153
160
  # Initialize Variables
154
161
  assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
155
162
  offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
156
- compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
157
-
163
+ tracer["chat_model"] = model
158
164
  current_date = datetime.now()
159
165
 
160
166
  if agent and agent.personality:
@@ -179,24 +185,25 @@ def converse_offline(
179
185
  system_prompt = f"{system_prompt}\n{user_name_prompt}"
180
186
 
181
187
  # Get Conversation Primer appropriate to Conversation Type
182
- if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(compiled_references):
188
+ if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
183
189
  return iter([prompts.no_notes_found.format()])
184
190
  elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
185
191
  completion_func(chat_response=prompts.no_online_results_found.format())
186
192
  return iter([prompts.no_online_results_found.format()])
187
193
 
188
194
  context_message = ""
189
- if not is_none_or_empty(compiled_references):
190
- context_message += f"{prompts.notes_conversation_offline.format(references=compiled_references)}\n\n"
195
+ if not is_none_or_empty(references):
196
+ context_message = f"{prompts.notes_conversation_offline.format(references=yaml_dump(references))}\n\n"
191
197
  if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
192
198
  simplified_online_results = online_results.copy()
193
199
  for result in online_results:
194
200
  if online_results[result].get("webpages"):
195
201
  simplified_online_results[result] = online_results[result]["webpages"]
196
202
 
197
- context_message += (
198
- f"{prompts.online_search_conversation_offline.format(online_results=str(simplified_online_results))}"
199
- )
203
+ context_message += f"{prompts.online_search_conversation_offline.format(online_results=yaml_dump(simplified_online_results))}\n\n"
204
+ if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results):
205
+ context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
206
+ context_message = context_message.strip()
200
207
 
201
208
  # Setup Prompt with Primer or Conversation History
202
209
  messages = generate_chatml_messages_with_context(
@@ -215,13 +222,14 @@ def converse_offline(
215
222
  logger.debug(f"Conversation Context for {model}: {truncated_messages}")
216
223
 
217
224
  g = ThreadedGenerator(references, online_results, completion_func=completion_func)
218
- t = Thread(target=llm_thread, args=(g, messages, offline_chat_model, max_prompt_size))
225
+ t = Thread(target=llm_thread, args=(g, messages, offline_chat_model, max_prompt_size, tracer))
219
226
  t.start()
220
227
  return g
221
228
 
222
229
 
223
- def llm_thread(g, messages: List[ChatMessage], model: Any, max_prompt_size: int = None):
230
+ def llm_thread(g, messages: List[ChatMessage], model: Any, max_prompt_size: int = None, tracer: dict = {}):
224
231
  stop_phrases = ["<s>", "INST]", "Notes:"]
232
+ aggregated_response = ""
225
233
 
226
234
  state.chat_lock.acquire()
227
235
  try:
@@ -229,7 +237,14 @@ def llm_thread(g, messages: List[ChatMessage], model: Any, max_prompt_size: int
229
237
  messages, loaded_model=model, stop=stop_phrases, max_prompt_size=max_prompt_size, streaming=True
230
238
  )
231
239
  for response in response_iterator:
232
- g.send(response["choices"][0]["delta"].get("content", ""))
240
+ response_delta = response["choices"][0]["delta"].get("content", "")
241
+ aggregated_response += response_delta
242
+ g.send(response_delta)
243
+
244
+ # Save conversation trace
245
+ if in_debug_mode() or state.verbose > 1:
246
+ commit_conversation_trace(messages, aggregated_response, tracer)
247
+
233
248
  finally:
234
249
  state.chat_lock.release()
235
250
  g.close()
@@ -244,14 +259,31 @@ def send_message_to_model_offline(
244
259
  stop=[],
245
260
  max_prompt_size: int = None,
246
261
  response_type: str = "text",
262
+ tracer: dict = {},
247
263
  ):
248
264
  assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
249
265
  offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
250
266
  messages_dict = [{"role": message.role, "content": message.content} for message in messages]
267
+ seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
251
268
  response = offline_chat_model.create_chat_completion(
252
- messages_dict, stop=stop, stream=streaming, temperature=temperature, response_format={"type": response_type}
269
+ messages_dict,
270
+ stop=stop,
271
+ stream=streaming,
272
+ temperature=temperature,
273
+ response_format={"type": response_type},
274
+ seed=seed,
253
275
  )
276
+
254
277
  if streaming:
255
278
  return response
256
- else:
257
- return response["choices"][0]["message"].get("content", "")
279
+
280
+ response_text = response["choices"][0]["message"].get("content", "")
281
+
282
+ # Save conversation trace for non-streaming responses
283
+ # Streamed responses need to be saved by the calling function
284
+ tracer["chat_model"] = model
285
+ tracer["temperature"] = temperature
286
+ if in_debug_mode() or state.verbose > 1:
287
+ commit_conversation_trace(messages, response_text, tracer)
288
+
289
+ return response_text