khoj 2.0.0b13.dev23__py3-none-any.whl → 2.0.0b14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. khoj/database/admin.py +2 -2
  2. khoj/interface/compiled/404/index.html +2 -2
  3. khoj/interface/compiled/_next/static/chunks/9808-c0742b05e1ef29ba.js +1 -0
  4. khoj/interface/compiled/_next/static/chunks/app/agents/layout-0114c87d7ccf6d9b.js +1 -0
  5. khoj/interface/compiled/_next/static/chunks/app/automations/layout-8639ff99d6c2fec6.js +1 -0
  6. khoj/interface/compiled/_next/static/chunks/app/automations/{page-198b26df6e09bbb0.js → page-1047097af99d31c7.js} +1 -1
  7. khoj/interface/compiled/_next/static/chunks/app/chat/layout-2ff3e18a6feae92a.js +1 -0
  8. khoj/interface/compiled/_next/static/chunks/app/chat/page-ac7ed0a1aff1b145.js +1 -0
  9. khoj/interface/compiled/_next/static/chunks/app/search/layout-78dd7cdd97510485.js +1 -0
  10. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-8addeb8079c3215b.js +1 -0
  11. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-e0dcb1762f8c8f88.js → page-819c6536c15e3d31.js} +1 -1
  12. khoj/interface/compiled/_next/static/css/5c7a72bad47e50b3.css +25 -0
  13. khoj/interface/compiled/_next/static/css/{c34713c98384ee87.css → 821d0d60b0b6871d.css} +1 -1
  14. khoj/interface/compiled/_next/static/css/ecea704005ba630c.css +1 -0
  15. khoj/interface/compiled/agents/index.html +2 -2
  16. khoj/interface/compiled/agents/index.txt +1 -1
  17. khoj/interface/compiled/automations/index.html +2 -2
  18. khoj/interface/compiled/automations/index.txt +3 -3
  19. khoj/interface/compiled/chat/index.html +2 -2
  20. khoj/interface/compiled/chat/index.txt +5 -4
  21. khoj/interface/compiled/index.html +2 -2
  22. khoj/interface/compiled/index.txt +1 -1
  23. khoj/interface/compiled/search/index.html +2 -2
  24. khoj/interface/compiled/search/index.txt +1 -1
  25. khoj/interface/compiled/settings/index.html +2 -2
  26. khoj/interface/compiled/settings/index.txt +1 -1
  27. khoj/interface/compiled/share/chat/index.html +2 -2
  28. khoj/interface/compiled/share/chat/index.txt +2 -2
  29. khoj/processor/conversation/google/gemini_chat.py +1 -1
  30. khoj/processor/conversation/google/utils.py +62 -19
  31. khoj/processor/conversation/openai/utils.py +58 -22
  32. khoj/processor/conversation/prompts.py +37 -25
  33. khoj/processor/conversation/utils.py +2 -1
  34. khoj/processor/tools/run_code.py +15 -22
  35. khoj/routers/api_chat.py +8 -3
  36. khoj/routers/api_content.py +1 -1
  37. khoj/routers/helpers.py +44 -38
  38. khoj/routers/research.py +7 -5
  39. khoj/utils/constants.py +6 -1
  40. khoj/utils/helpers.py +55 -15
  41. {khoj-2.0.0b13.dev23.dist-info → khoj-2.0.0b14.dist-info}/METADATA +1 -1
  42. {khoj-2.0.0b13.dev23.dist-info → khoj-2.0.0b14.dist-info}/RECORD +47 -47
  43. khoj/interface/compiled/_next/static/chunks/7127-97b83757db125ba6.js +0 -1
  44. khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +0 -1
  45. khoj/interface/compiled/_next/static/chunks/app/automations/layout-63603d2cb33279f7.js +0 -1
  46. khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +0 -1
  47. khoj/interface/compiled/_next/static/chunks/app/chat/page-dfcc1e8e2ad62873.js +0 -1
  48. khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +0 -1
  49. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +0 -1
  50. khoj/interface/compiled/_next/static/css/23b26df423cd8a9c.css +0 -1
  51. khoj/interface/compiled/_next/static/css/3090706713c12a32.css +0 -25
  52. /khoj/interface/compiled/_next/static/{Q7tm150g44Fs4H1CGytNf → Qn_2XyeVWxjaIRks7rzM-}/_buildManifest.js +0 -0
  53. /khoj/interface/compiled/_next/static/{Q7tm150g44Fs4H1CGytNf → Qn_2XyeVWxjaIRks7rzM-}/_ssgManifest.js +0 -0
  54. {khoj-2.0.0b13.dev23.dist-info → khoj-2.0.0b14.dist-info}/WHEEL +0 -0
  55. {khoj-2.0.0b13.dev23.dist-info → khoj-2.0.0b14.dist-info}/entry_points.txt +0 -0
  56. {khoj-2.0.0b13.dev23.dist-info → khoj-2.0.0b14.dist-info}/licenses/LICENSE +0 -0
@@ -78,10 +78,8 @@ def _extract_text_for_instructions(content: Union[str, List, Dict, None]) -> str
78
78
  @retry(
79
79
  retry=(
80
80
  retry_if_exception_type(openai._exceptions.APITimeoutError)
81
- | retry_if_exception_type(openai._exceptions.APIError)
82
- | retry_if_exception_type(openai._exceptions.APIConnectionError)
83
81
  | retry_if_exception_type(openai._exceptions.RateLimitError)
84
- | retry_if_exception_type(openai._exceptions.APIStatusError)
82
+ | retry_if_exception_type(openai._exceptions.InternalServerError)
85
83
  | retry_if_exception_type(ValueError)
86
84
  ),
87
85
  wait=wait_random_exponential(min=1, max=10),
@@ -128,7 +126,7 @@ def completion_with_backoff(
128
126
  if model_name.startswith("grok-4"):
129
127
  # Grok-4 models do not support reasoning_effort parameter
130
128
  model_kwargs.pop("reasoning_effort", None)
131
- elif model_name.startswith("deepseek-reasoner"):
129
+ elif model_name.startswith("deepseek-reasoner") or model_name.startswith("deepseek-chat"):
132
130
  stream_processor = in_stream_thought_processor
133
131
  # Two successive messages cannot be from the same role. Should merge any back-to-back messages from the same role.
134
132
  # The first message should always be a user message (except system message).
@@ -147,6 +145,8 @@ def completion_with_backoff(
147
145
  # See https://qwenlm.github.io/blog/qwen3/#advanced-usages
148
146
  if not deepthought:
149
147
  add_qwen_no_think_tag(formatted_messages)
148
+ elif is_groq_api(api_base_url):
149
+ model_kwargs["service_tier"] = "auto"
150
150
 
151
151
  read_timeout = 300 if is_local_api(api_base_url) else 60
152
152
  if os.getenv("KHOJ_LLM_SEED"):
@@ -172,8 +172,16 @@ def completion_with_backoff(
172
172
  chunk.type == "chunk"
173
173
  and chunk.chunk.choices
174
174
  and hasattr(chunk.chunk.choices[0].delta, "reasoning_content")
175
+ and chunk.chunk.choices[0].delta.reasoning_content
175
176
  ):
176
177
  thoughts += chunk.chunk.choices[0].delta.reasoning_content
178
+ elif (
179
+ chunk.type == "chunk"
180
+ and chunk.chunk.choices
181
+ and hasattr(chunk.chunk.choices[0].delta, "reasoning")
182
+ and chunk.chunk.choices[0].delta.reasoning
183
+ ):
184
+ thoughts += chunk.chunk.choices[0].delta.reasoning
177
185
  elif chunk.type == "chunk" and chunk.chunk.choices and chunk.chunk.choices[0].delta.tool_calls:
178
186
  tool_ids += [tool_call.id for tool_call in chunk.chunk.choices[0].delta.tool_calls]
179
187
  elif chunk.type == "tool_calls.function.arguments.done":
@@ -196,7 +204,6 @@ def completion_with_backoff(
196
204
  chunk = client.beta.chat.completions.parse(
197
205
  messages=formatted_messages, # type: ignore
198
206
  model=model_name,
199
- temperature=temperature,
200
207
  timeout=httpx.Timeout(30, read=read_timeout),
201
208
  **model_kwargs,
202
209
  )
@@ -221,6 +228,10 @@ def completion_with_backoff(
221
228
  # Json dump tool calls into aggregated response
222
229
  aggregated_response = json.dumps([tool_call.__dict__ for tool_call in tool_calls])
223
230
 
231
+ # Align chunk definition with non-streaming mode for post stream completion usage
232
+ if hasattr(chunk, "chunk"):
233
+ chunk = chunk.chunk
234
+
224
235
  # Calculate cost of chat
225
236
  input_tokens = chunk.usage.prompt_tokens if hasattr(chunk, "usage") and chunk.usage else 0
226
237
  output_tokens = chunk.usage.completion_tokens if hasattr(chunk, "usage") and chunk.usage else 0
@@ -249,10 +260,8 @@ def completion_with_backoff(
249
260
  @retry(
250
261
  retry=(
251
262
  retry_if_exception_type(openai._exceptions.APITimeoutError)
252
- | retry_if_exception_type(openai._exceptions.APIError)
253
- | retry_if_exception_type(openai._exceptions.APIConnectionError)
254
263
  | retry_if_exception_type(openai._exceptions.RateLimitError)
255
- | retry_if_exception_type(openai._exceptions.APIStatusError)
264
+ | retry_if_exception_type(openai._exceptions.InternalServerError)
256
265
  | retry_if_exception_type(ValueError)
257
266
  ),
258
267
  wait=wait_exponential(multiplier=1, min=4, max=10),
@@ -313,8 +322,12 @@ async def chat_completion_with_backoff(
313
322
  # Grok-4 models do not support reasoning_effort parameter
314
323
  if not model_name.startswith("grok-4"):
315
324
  model_kwargs["reasoning_effort"] = reasoning_effort
316
- elif model_name.startswith("deepseek-reasoner") or "deepseek-r1" in model_name:
317
- # Official Deepseek reasoner model and some inference APIs like vLLM return structured thinking output.
325
+ elif (
326
+ model_name.startswith("deepseek-chat")
327
+ or model_name.startswith("deepseek-reasoner")
328
+ or "deepseek-r1" in model_name
329
+ ):
330
+ # Official Deepseek models and some inference APIs like vLLM return structured thinking output.
318
331
  # Others like DeepInfra return it in response stream.
319
332
  # Using the instream thought processor handles both cases, structured thoughts and in response thoughts.
320
333
  stream_processor = ain_stream_thought_processor
@@ -339,6 +352,8 @@ async def chat_completion_with_backoff(
339
352
  # See https://qwenlm.github.io/blog/qwen3/#advanced-usages
340
353
  if not deepthought:
341
354
  add_qwen_no_think_tag(formatted_messages)
355
+ elif is_groq_api(api_base_url):
356
+ model_kwargs["service_tier"] = "auto"
342
357
 
343
358
  read_timeout = 300 if is_local_api(api_base_url) else 60
344
359
  if os.getenv("KHOJ_LLM_SEED"):
@@ -415,10 +430,8 @@ async def chat_completion_with_backoff(
415
430
  @retry(
416
431
  retry=(
417
432
  retry_if_exception_type(openai._exceptions.APITimeoutError)
418
- | retry_if_exception_type(openai._exceptions.APIError)
419
- | retry_if_exception_type(openai._exceptions.APIConnectionError)
420
433
  | retry_if_exception_type(openai._exceptions.RateLimitError)
421
- | retry_if_exception_type(openai._exceptions.APIStatusError)
434
+ | retry_if_exception_type(openai._exceptions.InternalServerError)
422
435
  | retry_if_exception_type(ValueError)
423
436
  ),
424
437
  wait=wait_random_exponential(min=1, max=10),
@@ -460,6 +473,7 @@ def responses_completion_with_backoff(
460
473
  temperature = 1
461
474
  reasoning_effort = "medium" if deepthought else "low"
462
475
  model_kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
476
+ model_kwargs["include"] = ["reasoning.encrypted_content"]
463
477
  # Remove unsupported params for reasoning models
464
478
  model_kwargs.pop("top_p", None)
465
479
  model_kwargs.pop("stop", None)
@@ -474,7 +488,6 @@ def responses_completion_with_backoff(
474
488
  temperature=temperature,
475
489
  timeout=httpx.Timeout(30, read=read_timeout), # type: ignore
476
490
  store=False,
477
- include=["reasoning.encrypted_content"],
478
491
  **model_kwargs,
479
492
  )
480
493
  if not model_response or not isinstance(model_response, OpenAIResponse) or not model_response.output:
@@ -529,10 +542,8 @@ def responses_completion_with_backoff(
529
542
  @retry(
530
543
  retry=(
531
544
  retry_if_exception_type(openai._exceptions.APITimeoutError)
532
- | retry_if_exception_type(openai._exceptions.APIError)
533
- | retry_if_exception_type(openai._exceptions.APIConnectionError)
534
545
  | retry_if_exception_type(openai._exceptions.RateLimitError)
535
- | retry_if_exception_type(openai._exceptions.APIStatusError)
546
+ | retry_if_exception_type(openai._exceptions.InternalServerError)
536
547
  | retry_if_exception_type(ValueError)
537
548
  ),
538
549
  wait=wait_exponential(multiplier=1, min=4, max=10),
@@ -764,7 +775,7 @@ def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -
764
775
  {
765
776
  "type": "function_call_output",
766
777
  "call_id": tool_call_id,
767
- "output": part.get("content"),
778
+ "output": part.get("content") or "No output",
768
779
  }
769
780
  )
770
781
  else:
@@ -773,7 +784,7 @@ def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -
773
784
  "role": "tool",
774
785
  "tool_call_id": tool_call_id,
775
786
  "name": part.get("name"),
776
- "content": part.get("content"),
787
+ "content": part.get("content") or "No output",
777
788
  }
778
789
  )
779
790
  continue
@@ -810,8 +821,9 @@ def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -
810
821
  # OpenAI models use the Responses API which uses slightly different content types
811
822
  if part["type"] == "text":
812
823
  part["type"] = "output_text" if message.role == "assistant" else "input_text"
813
- if part["type"] == "image":
824
+ if part["type"] == "image_url":
814
825
  part["type"] = "output_image" if message.role == "assistant" else "input_image"
826
+ part["image_url"] = part["image_url"]["url"]
815
827
  # If no valid content parts left, remove the message
816
828
  if is_none_or_empty(message.content):
817
829
  messages.remove(message)
@@ -836,8 +848,10 @@ def is_openai_reasoning_model(model_name: str, api_base_url: str = None) -> bool
836
848
  """
837
849
  Check if the model is an OpenAI reasoning model
838
850
  """
839
- return is_openai_api(api_base_url) and (
840
- model_name.lower().startswith("o") or model_name.lower().startswith("gpt-5")
851
+ return (
852
+ is_openai_api(api_base_url)
853
+ and (model_name.lower().startswith("o") or model_name.lower().startswith("gpt-5"))
854
+ or model_name.lower().startswith("gpt-oss")
841
855
  )
842
856
 
843
857
 
@@ -861,6 +875,13 @@ def is_twitter_reasoning_model(model_name: str, api_base_url: str = None) -> boo
861
875
  )
862
876
 
863
877
 
878
+ def is_groq_api(api_base_url: str = None) -> bool:
879
+ """
880
+ Check if the model is served over the Groq API
881
+ """
882
+ return api_base_url is not None and api_base_url.startswith("https://api.groq.com")
883
+
884
+
864
885
  def is_qwen_style_reasoning_model(model_name: str, api_base_url: str = None) -> bool:
865
886
  """
866
887
  Check if the model is a Qwen style reasoning model
@@ -934,6 +955,9 @@ async def astream_thought_processor(
934
955
  if not chunk_data.get("object") or chunk_data.get("object") != "chat.completion.chunk":
935
956
  logger.warning(f"Skipping invalid chunk with object field: {chunk_data.get('object', 'missing')}")
936
957
  continue
958
+ # Handle unsupported service tiers like "on_demand" by Groq
959
+ if chunk.service_tier and chunk.service_tier == "on_demand":
960
+ chunk_data["service_tier"] = "auto"
937
961
 
938
962
  tchunk = ChatCompletionWithThoughtsChunk.model_validate(chunk_data)
939
963
 
@@ -945,6 +969,14 @@ async def astream_thought_processor(
945
969
  ):
946
970
  tchunk.choices[0].delta.thought = chunk.choices[0].delta.reasoning_content
947
971
 
972
+ # Handlle openai reasoning style response with thoughts. Used by gpt-oss.
973
+ if (
974
+ len(tchunk.choices) > 0
975
+ and hasattr(tchunk.choices[0].delta, "reasoning")
976
+ and tchunk.choices[0].delta.reasoning
977
+ ):
978
+ tchunk.choices[0].delta.thought = chunk.choices[0].delta.reasoning
979
+
948
980
  # Handlle llama.cpp server style response with thoughts.
949
981
  elif len(tchunk.choices) > 0 and tchunk.choices[0].delta.model_extra.get("reasoning_content"):
950
982
  tchunk.choices[0].delta.thought = tchunk.choices[0].delta.model_extra.get("reasoning_content")
@@ -1075,6 +1107,10 @@ async def ain_stream_thought_processor(
1075
1107
  yield chunk
1076
1108
  continue
1077
1109
 
1110
+ if chunk.choices[0].delta.content is None:
1111
+ # If delta content is None, we can't process it, just yield the chunk
1112
+ continue
1113
+
1078
1114
  buf += chunk.choices[0].delta.content
1079
1115
 
1080
1116
  if mode == "detect_start":
@@ -4,21 +4,27 @@ from langchain_core.prompts import PromptTemplate
4
4
  ## --
5
5
  personality = PromptTemplate.from_template(
6
6
  """
7
- You are Khoj, a smart, inquisitive and helpful personal assistant.
7
+ You are Khoj, a smart, curious, empathetic and helpful personal assistant.
8
8
  Use your general knowledge and past conversation with the user as context to inform your responses.
9
- You were created by Khoj Inc. with the following capabilities:
10
9
 
11
- - You *CAN REMEMBER ALL NOTES and PERSONAL INFORMATION FOREVER* that the user ever shares with you.
12
- - Users can share files and other information with you using the Khoj Desktop, Obsidian or Emacs app. They can also drag and drop their files into the chat window.
13
- - You *CAN* generate images, look-up real-time information from the internet, set reminders and answer questions based on the user's notes.
10
+ You were created by Khoj Inc. More information about you, the company or Khoj apps can be found at https://khoj.dev.
11
+
12
+ Today is {day_of_week}, {current_date} in UTC.
13
+
14
+ # Capabilities
15
+ - Users can share files and other information with you using the Khoj Web, Desktop, Obsidian or Emacs app. They can also drag and drop their files into the chat window.
16
+ - You can look up information from the user's notes and documents synced via the Khoj apps.
17
+ - You can generate images, look-up real-time information from the internet, analyze data and answer questions based on the user's notes.
18
+
19
+ # Style
20
+ - Your responses should be helpful, conversational and tuned to the user's communication style.
14
21
  - Make sure to use the specific LaTeX math mode delimiters for your response. LaTex math mode specific delimiters as following
15
22
  - inline math mode : \\( and \\)
16
23
  - display math mode: insert linebreak after opening $$, \\[ and before closing $$, \\]
17
- - Sometimes the user will share personal information that needs to be remembered, like an account ID or a residential address. These can be acknowledged with a simple "Got it" or "Okay".
18
- - Provide inline references to quotes from the user's notes or any web pages you refer to in your responses in markdown format. For example, "The farmer had ten sheep. [1](https://example.com)". *ALWAYS CITE YOUR SOURCES AND PROVIDE REFERENCES*. Add them inline to directly support your claim.
19
-
20
- Note: More information about you, the company or Khoj apps can be found at https://khoj.dev.
21
- Today is {day_of_week}, {current_date} in UTC.
24
+ - Provide inline citations to documents and websites referenced. Add them inline in markdown format to directly support your claim.
25
+ For example: "The weather today is sunny [1](https://weather.com)."
26
+ - Mention generated assets like images by reference, e.g ![chart](/visualization/image.png). Do not manually output raw, b64 encoded bytes in your response.
27
+ - Do not respond with raw programs or scripts in your final response unless you know the user is a programmer or has explicitly requested code.
22
28
  """.strip()
23
29
  )
24
30
 
@@ -26,18 +32,23 @@ custom_personality = PromptTemplate.from_template(
26
32
  """
27
33
  You are {name}, a personal agent on Khoj.
28
34
  Use your general knowledge and past conversation with the user as context to inform your responses.
29
- You were created by Khoj Inc. with the following capabilities:
30
35
 
31
- - You *CAN REMEMBER ALL NOTES and PERSONAL INFORMATION FOREVER* that the user ever shares with you.
32
- - Users can share files and other information with you using the Khoj Desktop, Obsidian or Emacs app. They can also drag and drop their files into the chat window.
36
+ You were created on the Khoj platform. More information about you, the company or Khoj apps can be found at https://khoj.dev.
37
+
38
+ Today is {day_of_week}, {current_date} in UTC.
39
+
40
+ # Base Capabilities
41
+ - Users can share files and other information with you using the Khoj Web, Desktop, Obsidian or Emacs app. They can also drag and drop their files into the chat window.
42
+
43
+ # Style
33
44
  - Make sure to use the specific LaTeX math mode delimiters for your response. LaTex math mode specific delimiters as following
34
45
  - inline math mode : `\\(` and `\\)`
35
46
  - display math mode: insert linebreak after opening `$$`, `\\[` and before closing `$$`, `\\]`
36
- - Sometimes the user will share personal information that needs to be remembered, like an account ID or a residential address. These can be acknowledged with a simple "Got it" or "Okay".
47
+ - Provide inline citations to documents and websites referenced. Add them inline in markdown format to directly support your claim.
48
+ For example: "The weather today is sunny [1](https://weather.com)."
49
+ - Mention generated assets like images by reference, e.g ![chart](/visualization/image.png). Do not manually output raw, b64 encoded bytes in your response.
37
50
 
38
- Today is {day_of_week}, {current_date} in UTC.
39
-
40
- Instructions:\n{bio}
51
+ # Instructions:\n{bio}
41
52
  """.strip()
42
53
  )
43
54
 
@@ -641,16 +652,17 @@ Here's some additional context about you:
641
652
 
642
653
  plan_function_execution = PromptTemplate.from_template(
643
654
  """
644
- You are Khoj, a smart, creative and meticulous researcher. Use the provided tool AIs to accomplish the task assigned to you.
655
+ You are Khoj, a smart, creative and meticulous researcher.
645
656
  Create a multi-step plan and intelligently iterate on the plan to complete the task.
657
+ Use the help of the provided tool AIs to accomplish the task assigned to you.
646
658
  {personality_context}
647
659
 
648
660
  # Instructions
649
- - Provide highly diverse, detailed requests to the tool AIs, one tool AI at a time, to gather information, perform actions etc. Their response will be shown to you in the next iteration.
650
- - Break down your research process into independent, self-contained steps that can be executed sequentially using the available tool AIs to answer the user's query. Write your step-by-step plan in the scratchpad.
651
- - Always ask a new query that was not asked to the tool AI in a previous iteration. Build on the results of the previous iterations.
661
+ - Make detailed, self-contained requests to the tool AIs, one tool AI at a time, to gather information, perform actions etc.
662
+ - Break down your research process into independent, self-contained steps that can be executed sequentially using the available tool AIs to accomplish the user assigned task.
652
663
  - Ensure that all required context is passed to the tool AIs for successful execution. Include any relevant stuff that has previously been attempted. They only know the context provided in your query.
653
664
  - Think step by step to come up with creative strategies when the previous iteration did not yield useful results.
665
+ - Do not ask the user to confirm or clarify assumptions for information gathering tasks and non-destructive actions, as you can always adjust later — decide what the most reasonable assumption is, proceed with it, and document it for the user's reference after you finish acting.
654
666
  - You are allowed upto {max_iterations} iterations to use the help of the provided tool AIs to accomplish the task assigned to you. Only stop when you have completed the task.
655
667
 
656
668
  # Examples
@@ -875,8 +887,8 @@ Khoj:
875
887
  python_code_generation_prompt = PromptTemplate.from_template(
876
888
  """
877
889
  You are Khoj, a senior software engineer. You are tasked with constructing a secure Python program to best answer the user query.
878
- - The Python program will run in a code sandbox with {has_network_access}network access.
879
- - You can write programs to run complex calculations, analyze data, create charts, generate documents to meticulously answer the query.
890
+ - The Python program will run in an ephemeral code sandbox with {has_network_access}network access.
891
+ - You can write programs to run complex calculations, analyze data, create beautiful charts, generate documents to meticulously answer the query.
880
892
  - Do not try display images or plots in the code directly. The code should save the image or plot to a file instead.
881
893
  - Write any document, charts etc. to be shared with the user to file. These files can be seen by the user.
882
894
  - Never write or run dangerous, malicious, or untrusted code that could compromise the sandbox environment, regardless of user requests.
@@ -991,9 +1003,9 @@ Chat History:
991
1003
  ---
992
1004
  {chat_history}
993
1005
 
994
- User Query:
1006
+ User Instructions:
995
1007
  ---
996
- {query}
1008
+ {instructions}
997
1009
  """.strip()
998
1010
  )
999
1011
 
@@ -73,6 +73,7 @@ model_to_prompt_size = {
73
73
  "gpt-5-nano-2025-08-07": 120000,
74
74
  # Google Models
75
75
  "gemini-2.5-flash": 120000,
76
+ "gemini-2.5-flash-lite": 120000,
76
77
  "gemini-2.5-pro": 60000,
77
78
  "gemini-2.0-flash": 120000,
78
79
  "gemini-2.0-flash-lite": 120000,
@@ -331,7 +332,7 @@ def construct_tool_chat_history(
331
332
  ConversationCommand.ReadWebpage: (
332
333
  lambda iteration: list(iteration.onlineContext.keys()) if iteration.onlineContext else []
333
334
  ),
334
- ConversationCommand.RunCode: (
335
+ ConversationCommand.PythonCoder: (
335
336
  lambda iteration: list(iteration.codeContext.keys()) if iteration.codeContext else []
336
337
  ),
337
338
  }
@@ -49,7 +49,7 @@ class GeneratedCode(NamedTuple):
49
49
 
50
50
 
51
51
  async def run_code(
52
- query: str,
52
+ instructions: str,
53
53
  conversation_history: List[ChatMessageModel],
54
54
  context: str,
55
55
  location_data: LocationData,
@@ -63,12 +63,12 @@ async def run_code(
63
63
  ):
64
64
  # Generate Code
65
65
  if send_status_func:
66
- async for event in send_status_func(f"**Generate code snippet** for {query}"):
66
+ async for event in send_status_func(f"**Generate code snippet** for {instructions}"):
67
67
  yield {ChatEvent.STATUS: event}
68
68
  try:
69
69
  with timer("Chat actor: Generate programs to execute", logger):
70
70
  generated_code = await generate_python_code(
71
- query,
71
+ instructions,
72
72
  conversation_history,
73
73
  context,
74
74
  location_data,
@@ -79,7 +79,7 @@ async def run_code(
79
79
  query_files,
80
80
  )
81
81
  except Exception as e:
82
- raise ValueError(f"Failed to generate code for {query} with error: {e}")
82
+ raise ValueError(f"Failed to generate code for {instructions} with error: {e}")
83
83
 
84
84
  # Prepare Input Data
85
85
  input_data = []
@@ -101,21 +101,21 @@ async def run_code(
101
101
  code = result.pop("code")
102
102
  cleaned_result = truncate_code_context({"cleaned": {"results": result}})["cleaned"]["results"]
103
103
  logger.info(f"Executed Code\n----\n{code}\n----\nResult\n----\n{cleaned_result}\n----")
104
- yield {query: {"code": code, "results": result}}
104
+ yield {instructions: {"code": code, "results": result}}
105
105
  except asyncio.TimeoutError as e:
106
106
  # Call the sandbox_url/stop GET API endpoint to stop the code sandbox
107
- error = f"Failed to run code for {query} with Timeout error: {e}"
107
+ error = f"Failed to run code for {instructions} with Timeout error: {e}"
108
108
  try:
109
109
  await aiohttp.ClientSession().get(f"{sandbox_url}/stop", timeout=5)
110
110
  except Exception as e:
111
111
  error += f"\n\nFailed to stop code sandbox with error: {e}"
112
112
  raise ValueError(error)
113
113
  except Exception as e:
114
- raise ValueError(f"Failed to run code for {query} with error: {e}")
114
+ raise ValueError(f"Failed to run code for {instructions} with error: {e}")
115
115
 
116
116
 
117
117
  async def generate_python_code(
118
- q: str,
118
+ instructions: str,
119
119
  chat_history: List[ChatMessageModel],
120
120
  context: str,
121
121
  location_data: LocationData,
@@ -142,7 +142,7 @@ async def generate_python_code(
142
142
  network_access_context = "**NO** " if not is_e2b_code_sandbox_enabled() else ""
143
143
 
144
144
  code_generation_prompt = prompts.python_code_generation_prompt.format(
145
- query=q,
145
+ instructions=instructions,
146
146
  chat_history=chat_history_str,
147
147
  context=context,
148
148
  has_network_access=network_access_context,
@@ -252,8 +252,12 @@ async def execute_e2b(code: str, input_files: list[dict]) -> dict[str, Any]:
252
252
 
253
253
  # Identify new files created during execution
254
254
  new_files = set(E2bFile(f.name, f.path) for f in await sandbox.files.list("~")) - original_files
255
+
255
256
  # Read newly created files in parallel
256
- download_tasks = [sandbox.files.read(f.path, request_timeout=30) for f in new_files]
257
+ def read_format(f):
258
+ return "bytes" if Path(f.name).suffix in image_file_ext else "text"
259
+
260
+ download_tasks = [sandbox.files.read(f.path, format=read_format(f), request_timeout=30) for f in new_files]
257
261
  downloaded_files = await asyncio.gather(*download_tasks)
258
262
  for f, content in zip(new_files, downloaded_files):
259
263
  if isinstance(content, bytes):
@@ -261,23 +265,12 @@ async def execute_e2b(code: str, input_files: list[dict]) -> dict[str, Any]:
261
265
  b64_data = base64.b64encode(content).decode("utf-8")
262
266
  elif Path(f.name).suffix in image_file_ext:
263
267
  # Ignore image files as they are extracted from execution results below for inline display
264
- continue
268
+ b64_data = base64.b64encode(content).decode("utf-8")
265
269
  else:
266
270
  # Text files - encode utf-8 string as base64
267
271
  b64_data = content
268
272
  output_files.append({"filename": f.name, "b64_data": b64_data})
269
273
 
270
- # Collect output files from execution results
271
- # Repect ordering of output result types to disregard text output associated with images
272
- output_result_types = ["png", "jpeg", "svg", "text", "markdown", "json"]
273
- for idx, result in enumerate(execution.results):
274
- if getattr(result, "chart", None):
275
- continue
276
- for result_type in output_result_types:
277
- if b64_data := getattr(result, result_type, None):
278
- output_files.append({"filename": f"{idx}.{result_type}", "b64_data": b64_data})
279
- break
280
-
281
274
  # collect logs
282
275
  success = not execution.error and not execution.logs.stderr
283
276
  stdout = "\n".join(execution.logs.stdout)
khoj/routers/api_chat.py CHANGED
@@ -786,6 +786,9 @@ async def event_generator(
786
786
  if interrupt_query == ChatEvent.END_EVENT.value:
787
787
  cancellation_event.set()
788
788
  logger.debug(f"Chat cancelled by user {user} via interrupt queue.")
789
+ elif interrupt_query == ChatEvent.INTERRUPT.value:
790
+ cancellation_event.set()
791
+ logger.debug("Chat interrupted.")
789
792
  else:
790
793
  # Pass the interrupt query to child tasks
791
794
  logger.info(f"Continuing chat with the new instruction: {interrupt_query}")
@@ -995,7 +998,7 @@ async def event_generator(
995
998
  )
996
999
  except ValueError as e:
997
1000
  logger.error(f"Error getting data sources and output format: {e}. Falling back to default.")
998
- conversation_commands = [ConversationCommand.General]
1001
+ chosen_io = {"sources": [ConversationCommand.General], "output": ConversationCommand.Text}
999
1002
 
1000
1003
  conversation_commands = chosen_io.get("sources") + [chosen_io.get("output")]
1001
1004
 
@@ -1523,6 +1526,8 @@ async def chat_ws(
1523
1526
  ack_type = "interrupt_acknowledged"
1524
1527
  await websocket.send_text(json.dumps({"type": ack_type}))
1525
1528
  else:
1529
+ ack_type = "interrupt_acknowledged"
1530
+ await websocket.send_text(json.dumps({"type": ack_type}))
1526
1531
  logger.info(f"No ongoing task to interrupt for user {websocket.scope['user'].object.id}")
1527
1532
  continue
1528
1533
 
@@ -1556,7 +1561,7 @@ async def chat_ws(
1556
1561
  except WebSocketDisconnect:
1557
1562
  logger.info(f"WebSocket disconnected for user {websocket.scope['user'].object.id}")
1558
1563
  if current_task and not current_task.done():
1559
- current_task.cancel()
1564
+ interrupt_queue.put_nowait(ChatEvent.INTERRUPT.value)
1560
1565
  except Exception as e:
1561
1566
  logger.error(f"Error in websocket chat: {e}", exc_info=True)
1562
1567
  if current_task and not current_task.done():
@@ -1701,8 +1706,8 @@ async def process_chat_request(
1701
1706
  logger.debug(f"Chat request cancelled for user {websocket.scope['user'].object.id}")
1702
1707
  raise
1703
1708
  except Exception as e:
1704
- logger.error(f"Error processing chat request: {e}", exc_info=True)
1705
1709
  await websocket.send_text(json.dumps({"error": "Internal server error"}))
1710
+ logger.error(f"Error processing chat request: {e}", exc_info=True)
1706
1711
  raise
1707
1712
 
1708
1713
 
@@ -570,7 +570,7 @@ async def indexer(
570
570
  file_data.content.decode(file_data.encoding) if file_data.encoding else file_data.content
571
571
  )
572
572
  else:
573
- logger.warning(f"Skipped indexing unsupported file type sent by {client} client: {file_data.name}")
573
+ logger.debug(f"Skipped indexing unsupported file type sent by {client} client: {file_data.name}")
574
574
 
575
575
  indexer_input = IndexerInput(
576
576
  org=index_files["org"],