khoj 2.0.0b10__py3-none-any.whl → 2.0.0b11.dev15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. khoj/configure.py +74 -15
  2. khoj/interface/compiled/404/index.html +2 -2
  3. khoj/interface/compiled/_next/static/chunks/app/agents/layout-4e2a134ec26aa606.js +1 -0
  4. khoj/interface/compiled/_next/static/chunks/app/agents/{page-0006674668eb5a4d.js → page-9a4610474cd59a71.js} +1 -1
  5. khoj/interface/compiled/_next/static/chunks/app/automations/{page-4c465cde2d14cb52.js → page-f7bb9d777b7745d4.js} +1 -1
  6. khoj/interface/compiled/_next/static/chunks/app/chat/layout-ad4d1792ab1a4108.js +1 -0
  7. khoj/interface/compiled/_next/static/chunks/app/chat/{page-4408125f66c165cf.js → page-8e1c4f2af3c9429e.js} +1 -1
  8. khoj/interface/compiled/_next/static/chunks/app/{page-85b9b416898738f7.js → page-2b3056cba8aa96ce.js} +1 -1
  9. khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +1 -0
  10. khoj/interface/compiled/_next/static/chunks/app/search/{page-883b7d8d2e3abe3e.js → page-4885df3cd175c957.js} +1 -1
  11. khoj/interface/compiled/_next/static/chunks/app/settings/{page-95e994ddac31473f.js → page-8be3b35178abf2ec.js} +1 -1
  12. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +1 -0
  13. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-c062269e6906ef22.js → page-4a4b0c0f4749c2b2.js} +1 -1
  14. khoj/interface/compiled/_next/static/chunks/{webpack-c375c47fee5a4dda.js → webpack-2d7431816511b8a5.js} +1 -1
  15. khoj/interface/compiled/_next/static/css/{a0c2fd63bb396f04.css → 23b26df423cd8a9c.css} +1 -1
  16. khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +1 -0
  17. khoj/interface/compiled/_next/static/css/821d0d60b0b6871d.css +1 -0
  18. khoj/interface/compiled/agents/index.html +2 -2
  19. khoj/interface/compiled/agents/index.txt +2 -2
  20. khoj/interface/compiled/automations/index.html +2 -2
  21. khoj/interface/compiled/automations/index.txt +3 -3
  22. khoj/interface/compiled/chat/index.html +2 -2
  23. khoj/interface/compiled/chat/index.txt +2 -2
  24. khoj/interface/compiled/index.html +2 -2
  25. khoj/interface/compiled/index.txt +2 -2
  26. khoj/interface/compiled/search/index.html +2 -2
  27. khoj/interface/compiled/search/index.txt +2 -2
  28. khoj/interface/compiled/settings/index.html +2 -2
  29. khoj/interface/compiled/settings/index.txt +4 -4
  30. khoj/interface/compiled/share/chat/index.html +2 -2
  31. khoj/interface/compiled/share/chat/index.txt +2 -2
  32. khoj/interface/web/error.html +149 -0
  33. khoj/processor/conversation/google/utils.py +71 -5
  34. khoj/processor/conversation/openai/utils.py +54 -39
  35. khoj/processor/conversation/utils.py +1 -0
  36. khoj/processor/operator/__init__.py +1 -1
  37. khoj/routers/api_agents.py +1 -1
  38. khoj/routers/api_chat.py +95 -20
  39. khoj/routers/helpers.py +4 -4
  40. khoj/routers/research.py +1 -1
  41. khoj/routers/web_client.py +5 -0
  42. {khoj-2.0.0b10.dist-info → khoj-2.0.0b11.dev15.dist-info}/METADATA +1 -1
  43. {khoj-2.0.0b10.dist-info → khoj-2.0.0b11.dev15.dist-info}/RECORD +55 -54
  44. khoj/interface/compiled/_next/static/chunks/app/agents/layout-e49165209d2e406c.js +0 -1
  45. khoj/interface/compiled/_next/static/chunks/app/chat/layout-d5ae861e1ade9d08.js +0 -1
  46. khoj/interface/compiled/_next/static/chunks/app/search/layout-f5881c7ae3ba0795.js +0 -1
  47. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-64a53f8ec4afa6b3.js +0 -1
  48. khoj/interface/compiled/_next/static/css/ee66643a6a5bf71c.css +0 -1
  49. khoj/interface/compiled/_next/static/css/fbacbdfd5e7f3f0e.css +0 -1
  50. /khoj/interface/compiled/_next/static/chunks/{1327-3b1a41af530fa8ee.js → 1327-1a9107b9a2a04a98.js} +0 -0
  51. /khoj/interface/compiled/_next/static/chunks/{1915-fbfe167c84ad60c5.js → 1915-5c6508f6ebb62a30.js} +0 -0
  52. /khoj/interface/compiled/_next/static/chunks/{2117-e78b6902ad6f75ec.js → 2117-080746c8e170c81a.js} +0 -0
  53. /khoj/interface/compiled/_next/static/chunks/{2939-4d4084c5b888b960.js → 2939-4af3fd24b8ffc9ad.js} +0 -0
  54. /khoj/interface/compiled/_next/static/chunks/{4447-d6cf93724d57e34b.js → 4447-cd95608f8e93e711.js} +0 -0
  55. /khoj/interface/compiled/_next/static/chunks/{8667-4b7790573b08c50d.js → 8667-50b03a89e82e0ba7.js} +0 -0
  56. /khoj/interface/compiled/_next/static/chunks/{9139-ce1ae935dac9c871.js → 9139-8ac4d9feb10f8869.js} +0 -0
  57. /khoj/interface/compiled/_next/static/{Ieo_9KsHXi-opl1-yfWnK → nqIeU27JxQkTS-5OXP3OU}/_buildManifest.js +0 -0
  58. /khoj/interface/compiled/_next/static/{Ieo_9KsHXi-opl1-yfWnK → nqIeU27JxQkTS-5OXP3OU}/_ssgManifest.js +0 -0
  59. {khoj-2.0.0b10.dist-info → khoj-2.0.0b11.dev15.dist-info}/WHEEL +0 -0
  60. {khoj-2.0.0b10.dist-info → khoj-2.0.0b11.dev15.dist-info}/entry_points.txt +0 -0
  61. {khoj-2.0.0b10.dist-info → khoj-2.0.0b11.dev15.dist-info}/licenses/LICENSE +0 -0
@@ -2,6 +2,7 @@ import json
2
2
  import logging
3
3
  import os
4
4
  import random
5
+ import re
5
6
  from copy import deepcopy
6
7
  from time import perf_counter
7
8
  from typing import Any, AsyncGenerator, AsyncIterator, Dict, List
@@ -13,6 +14,7 @@ from google.genai import types as gtypes
13
14
  from langchain_core.messages.chat import ChatMessage
14
15
  from pydantic import BaseModel
15
16
  from tenacity import (
17
+ RetryCallState,
16
18
  before_sleep_log,
17
19
  retry,
18
20
  retry_if_exception,
@@ -73,7 +75,7 @@ SAFETY_SETTINGS = [
73
75
  def _is_retryable_error(exception: BaseException) -> bool:
74
76
  """Check if the exception is a retryable error"""
75
77
  # server errors
76
- if isinstance(exception, gerrors.APIError):
78
+ if isinstance(exception, (gerrors.APIError, gerrors.ClientError)):
77
79
  return exception.code in [429, 502, 503, 504]
78
80
  # client errors
79
81
  if (
@@ -88,9 +90,48 @@ def _is_retryable_error(exception: BaseException) -> bool:
88
90
  return False
89
91
 
90
92
 
93
+ def _extract_retry_delay(exception: BaseException) -> float:
94
+ """Extract retry delay from Gemini error response, return in seconds"""
95
+ if (
96
+ isinstance(exception, (gerrors.ClientError, gerrors.APIError))
97
+ and hasattr(exception, "details")
98
+ and isinstance(exception.details, dict)
99
+ ):
100
+ # Look for retryDelay key, value pair. E.g "retryDelay": "54s"
101
+ if delay_str := exception.details.get("retryDelay"):
102
+ delay_seconds_match = re.search(r"(\d+)s", delay_str)
103
+ if delay_seconds_match:
104
+ delay_seconds = float(delay_seconds_match.group(1))
105
+ return delay_seconds
106
+ return None
107
+
108
+
109
+ def _wait_with_gemini_delay(min_wait=4, max_wait=120, multiplier=1, fallback_wait=None):
110
+ """Custom wait strategy that respects Gemini's retryDelay if present"""
111
+
112
+ def wait_func(retry_state: RetryCallState) -> float:
113
+ # Use backoff time if last exception suggests a retry delay
114
+ if retry_state.outcome and retry_state.outcome.failed:
115
+ exception = retry_state.outcome.exception()
116
+ gemini_delay = _extract_retry_delay(exception)
117
+ if gemini_delay:
118
+ # Use the Gemini-suggested delay, but cap it at max_wait
119
+ suggested_delay = min(gemini_delay, max_wait)
120
+ logger.info(f"Using Gemini suggested retry delay: {suggested_delay} seconds")
121
+ return suggested_delay
122
+ # Else use fallback backoff if provided
123
+ if fallback_wait:
124
+ return fallback_wait(retry_state)
125
+ # Else use exponential backoff with provided parameters
126
+ else:
127
+ return wait_exponential(multiplier=multiplier, min=min_wait, max=max_wait)(retry_state)
128
+
129
+ return wait_func
130
+
131
+
91
132
  @retry(
92
133
  retry=retry_if_exception(_is_retryable_error),
93
- wait=wait_random_exponential(min=1, max=10),
134
+ wait=_wait_with_gemini_delay(min_wait=1, max_wait=10, fallback_wait=wait_random_exponential(min=1, max=10)),
94
135
  stop=stop_after_attempt(2),
95
136
  before_sleep=before_sleep_log(logger, logging.DEBUG),
96
137
  reraise=True,
@@ -169,7 +210,14 @@ def gemini_completion_with_backoff(
169
210
  )
170
211
  except gerrors.ClientError as e:
171
212
  response = None
172
- response_text, _ = handle_gemini_response(e.args)
213
+ # Handle 429 rate limit errors directly
214
+ if e.code == 429:
215
+ response_text = f"My brain is exhausted. Can you please try again in a bit?"
216
+ # Log the full error details for debugging
217
+ logger.error(f"Gemini ClientError: {e.code} {e.status}. Details: {e.details}")
218
+ # Handle other errors
219
+ else:
220
+ response_text, _ = handle_gemini_response(e.args)
173
221
  # Respond with reason for stopping
174
222
  logger.warning(
175
223
  f"LLM Response Prevented for {model_name}: {response_text}.\n"
@@ -206,7 +254,7 @@ def gemini_completion_with_backoff(
206
254
 
207
255
  @retry(
208
256
  retry=retry_if_exception(_is_retryable_error),
209
- wait=wait_exponential(multiplier=1, min=4, max=10),
257
+ wait=_wait_with_gemini_delay(multiplier=1, min_wait=4, max_wait=10),
210
258
  stop=stop_after_attempt(3),
211
259
  before_sleep=before_sleep_log(logger, logging.WARNING),
212
260
  reraise=False,
@@ -310,6 +358,13 @@ def handle_gemini_response(
310
358
  candidates: list[gtypes.Candidate], prompt_feedback: gtypes.GenerateContentResponsePromptFeedback = None
311
359
  ):
312
360
  """Check if Gemini response was blocked and return an explanatory error message."""
361
+
362
+ # Ensure we have a proper list of candidates
363
+ if not isinstance(candidates, list):
364
+ message = f"\nUnexpected response format. Try again."
365
+ stopped = True
366
+ return message, stopped
367
+
313
368
  # Check if the response was blocked due to safety concerns with the prompt
314
369
  if len(candidates) == 0 and prompt_feedback:
315
370
  message = f"\nI'd prefer to not respond to that due to **{prompt_feedback.block_reason.name}** issues with your query."
@@ -428,7 +483,18 @@ def format_messages_for_gemini(
428
483
  if len(messages) == 1:
429
484
  messages[0].role = "user"
430
485
 
431
- formatted_messages = [gtypes.Content(role=message.role, parts=message.content) for message in messages]
486
+ # Ensure messages are properly formatted for Content creation
487
+ valid_messages = []
488
+ for message in messages:
489
+ try:
490
+ # Try create Content object to validate the structure before adding to valid messages
491
+ gtypes.Content(role=message.role, parts=message.content)
492
+ valid_messages.append(message)
493
+ except Exception as e:
494
+ logger.warning(f"Dropping message with invalid content structure: {e}. Message: {message}")
495
+ continue
496
+
497
+ formatted_messages = [gtypes.Content(role=message.role, parts=message.content) for message in valid_messages]
432
498
  return formatted_messages, system_prompt
433
499
 
434
500
 
@@ -100,6 +100,7 @@ def completion_with_backoff(
100
100
  reasoning_effort = "high" if deepthought else "low"
101
101
  model_kwargs["reasoning_effort"] = reasoning_effort
102
102
  elif model_name.startswith("deepseek-reasoner"):
103
+ stream_processor = in_stream_thought_processor
103
104
  # Two successive messages cannot be from the same role. Should merge any back-to-back messages from the same role.
104
105
  # The first message should always be a user message (except system message).
105
106
  updated_messages: List[dict] = []
@@ -111,8 +112,8 @@ def completion_with_backoff(
111
112
  else:
112
113
  updated_messages.append(message)
113
114
  formatted_messages = updated_messages
114
- elif is_qwen_reasoning_model(model_name, api_base_url):
115
- stream_processor = partial(in_stream_thought_processor, thought_tag="think")
115
+ elif is_qwen_style_reasoning_model(model_name, api_base_url):
116
+ stream_processor = in_stream_thought_processor
116
117
  # Reasoning is enabled by default. Disable when deepthought is False.
117
118
  # See https://qwenlm.github.io/blog/qwen3/#advanced-usages
118
119
  if not deepthought:
@@ -144,6 +145,14 @@ def completion_with_backoff(
144
145
  elif chunk.type == "tool_calls.function.arguments.done":
145
146
  tool_calls += [ToolCall(name=chunk.name, args=json.loads(chunk.arguments), id=None)]
146
147
  if tool_calls:
148
+ # If there are tool calls, aggregate thoughts and responses into thoughts
149
+ if thoughts and aggregated_response:
150
+ # wrap each line of thought in italics
151
+ thoughts = "\n".join([f"*{line.strip()}*" for line in thoughts.splitlines() if line.strip()])
152
+ thoughts = f"{thoughts}\n\n{aggregated_response}"
153
+ else:
154
+ thoughts = thoughts or aggregated_response
155
+ # Json dump tool calls into aggregated response
147
156
  tool_calls = [
148
157
  ToolCall(name=chunk.name, args=chunk.args, id=tool_id) for chunk, tool_id in zip(tool_calls, tool_ids)
149
158
  ]
@@ -158,6 +167,25 @@ def completion_with_backoff(
158
167
  **model_kwargs,
159
168
  )
160
169
  aggregated_response = chunk.choices[0].message.content
170
+ if hasattr(chunk.choices[0].message, "reasoning_content"):
171
+ thoughts = chunk.choices[0].message.reasoning_content
172
+ else:
173
+ thoughts = chunk.choices[0].message.model_extra.get("reasoning_content", "")
174
+ raw_tool_calls = chunk.choices[0].message.tool_calls
175
+ if raw_tool_calls:
176
+ tool_calls = [
177
+ ToolCall(name=tool.function.name, args=tool.function.parsed_arguments, id=tool.id)
178
+ for tool in raw_tool_calls
179
+ ]
180
+ # If there are tool calls, aggregate thoughts and responses into thoughts
181
+ if thoughts and aggregated_response:
182
+ # wrap each line of thought in italics
183
+ thoughts = "\n".join([f"*{line.strip()}*" for line in thoughts.splitlines() if line.strip()])
184
+ thoughts = f"{thoughts}\n\n{aggregated_response}"
185
+ else:
186
+ thoughts = thoughts or aggregated_response
187
+ # Json dump tool calls into aggregated response
188
+ aggregated_response = json.dumps([tool_call.__dict__ for tool_call in tool_calls])
161
189
 
162
190
  # Calculate cost of chat
163
191
  input_tokens = chunk.usage.prompt_tokens if hasattr(chunk, "usage") and chunk.usage else 0
@@ -216,7 +244,7 @@ async def chat_completion_with_backoff(
216
244
  openai_async_clients[client_key] = client
217
245
 
218
246
  stream = not is_non_streaming_model(model_name, api_base_url)
219
- stream_processor = adefault_stream_processor
247
+ stream_processor = astream_thought_processor
220
248
  if stream:
221
249
  model_kwargs["stream_options"] = {"include_usage": True}
222
250
  else:
@@ -244,13 +272,13 @@ async def chat_completion_with_backoff(
244
272
  "content"
245
273
  ] = f"{first_system_message_content}\nFormatting re-enabled"
246
274
  elif is_twitter_reasoning_model(model_name, api_base_url):
247
- stream_processor = adeepseek_stream_processor
248
275
  reasoning_effort = "high" if deepthought else "low"
249
276
  model_kwargs["reasoning_effort"] = reasoning_effort
250
277
  elif model_name.startswith("deepseek-reasoner") or "deepseek-r1" in model_name:
251
- # Official Deepseek reasoner model returns structured thinking output.
252
- # Deepseek r1 served via other AI model API providers return it in response stream
253
- stream_processor = ain_stream_thought_processor if "deepseek-r1" in model_name else adeepseek_stream_processor # type: ignore[assignment]
278
+ # Official Deepseek reasoner model and some inference APIs like vLLM return structured thinking output.
279
+ # Others like DeepInfra return it in response stream.
280
+ # Using the instream thought processor handles both cases, structured thoughts and in response thoughts.
281
+ stream_processor = ain_stream_thought_processor
254
282
  # Two successive messages cannot be from the same role. Should merge any back-to-back messages from the same role.
255
283
  # The first message should always be a user message (except system message).
256
284
  updated_messages: List[dict] = []
@@ -266,8 +294,8 @@ async def chat_completion_with_backoff(
266
294
  else:
267
295
  updated_messages.append(message)
268
296
  formatted_messages = updated_messages
269
- elif is_qwen_reasoning_model(model_name, api_base_url):
270
- stream_processor = partial(ain_stream_thought_processor, thought_tag="think")
297
+ elif is_qwen_style_reasoning_model(model_name, api_base_url):
298
+ stream_processor = ain_stream_thought_processor
271
299
  # Reasoning is enabled by default. Disable when deepthought is False.
272
300
  # See https://qwenlm.github.io/blog/qwen3/#advanced-usages
273
301
  if not deepthought:
@@ -492,11 +520,12 @@ def is_twitter_reasoning_model(model_name: str, api_base_url: str = None) -> boo
492
520
  )
493
521
 
494
522
 
495
- def is_qwen_reasoning_model(model_name: str, api_base_url: str = None) -> bool:
523
+ def is_qwen_style_reasoning_model(model_name: str, api_base_url: str = None) -> bool:
496
524
  """
497
- Check if the model is a Qwen reasoning model
525
+ Check if the model is a Qwen style reasoning model
498
526
  """
499
- return "qwen3" in model_name.lower() and api_base_url is not None
527
+ qwen_style_reason_model = ["qwen3", "smollm3"]
528
+ return any(prefix in model_name.lower() for prefix in qwen_style_reason_model) and api_base_url is not None
500
529
 
501
530
 
502
531
  def is_local_api(api_base_url: str) -> bool:
@@ -543,39 +572,17 @@ def default_stream_processor(
543
572
  chat_stream: ChatCompletionStream,
544
573
  ) -> Generator[ChatCompletionStreamWithThoughtEvent, None, None]:
545
574
  """
546
- Async generator to cast and return chunks from the standard openai chat completions stream.
575
+ Generator of chunks from the standard openai chat completions stream.
547
576
  """
548
577
  for chunk in chat_stream:
549
578
  yield chunk
550
579
 
551
580
 
552
- async def adefault_stream_processor(
581
+ async def astream_thought_processor(
553
582
  chat_stream: openai.AsyncStream[ChatCompletionChunk],
554
583
  ) -> AsyncGenerator[ChatCompletionWithThoughtsChunk, None]:
555
584
  """
556
- Async generator to cast and return chunks from the standard openai chat completions stream.
557
- """
558
- async for chunk in chat_stream:
559
- try:
560
- # Validate the chunk has the required fields before processing
561
- chunk_data = chunk.model_dump()
562
-
563
- # Skip chunks that don't have the required object field or have invalid values
564
- if not chunk_data.get("object") or chunk_data.get("object") != "chat.completion.chunk":
565
- logger.warning(f"Skipping invalid chunk with object field: {chunk_data.get('object', 'missing')}")
566
- continue
567
-
568
- yield ChatCompletionWithThoughtsChunk.model_validate(chunk_data)
569
- except Exception as e:
570
- logger.warning(f"Error processing chunk: {e}. Skipping malformed chunk.")
571
- continue
572
-
573
-
574
- async def adeepseek_stream_processor(
575
- chat_stream: openai.AsyncStream[ChatCompletionChunk],
576
- ) -> AsyncGenerator[ChatCompletionWithThoughtsChunk, None]:
577
- """
578
- Async generator to cast and return chunks from the deepseek chat completions stream.
585
+ Async generator of chunks from standard openai chat completions stream with thoughts/reasoning.
579
586
  """
580
587
  async for chunk in chat_stream:
581
588
  try:
@@ -588,12 +595,19 @@ async def adeepseek_stream_processor(
588
595
  continue
589
596
 
590
597
  tchunk = ChatCompletionWithThoughtsChunk.model_validate(chunk_data)
598
+
599
+ # Handlle deepseek style response with thoughts. Used by AI APIs like vLLM, sgLang, DeepSeek, LiteLLM.
591
600
  if (
592
601
  len(tchunk.choices) > 0
593
602
  and hasattr(tchunk.choices[0].delta, "reasoning_content")
594
603
  and tchunk.choices[0].delta.reasoning_content
595
604
  ):
596
605
  tchunk.choices[0].delta.thought = chunk.choices[0].delta.reasoning_content
606
+
607
+ # Handlle llama.cpp server style response with thoughts.
608
+ elif len(tchunk.choices) > 0 and tchunk.choices[0].delta.model_extra.get("reasoning_content"):
609
+ tchunk.choices[0].delta.thought = tchunk.choices[0].delta.model_extra.get("reasoning_content")
610
+
597
611
  yield tchunk
598
612
  except Exception as e:
599
613
  logger.warning(f"Error processing chunk: {e}. Skipping malformed chunk.")
@@ -702,7 +716,7 @@ async def ain_stream_thought_processor(
702
716
  chat_stream: openai.AsyncStream[ChatCompletionChunk], thought_tag="think"
703
717
  ) -> AsyncGenerator[ChatCompletionWithThoughtsChunk, None]:
704
718
  """
705
- Async generator for chat completion with thought chunks.
719
+ Async generator for chat completion with structured and inline thought chunks.
706
720
  Assumes <thought_tag>...</thought_tag> can only appear once at the start.
707
721
  Handles partial tags across streamed chunks.
708
722
  """
@@ -712,7 +726,7 @@ async def ain_stream_thought_processor(
712
726
  # Modes and transitions: detect_start > thought (optional) > message
713
727
  mode = "detect_start"
714
728
 
715
- async for chunk in adefault_stream_processor(chat_stream):
729
+ async for chunk in astream_thought_processor(chat_stream):
716
730
  if len(chunk.choices) == 0:
717
731
  continue
718
732
  if mode == "message":
@@ -829,6 +843,7 @@ def to_openai_tools(tools: List[ToolDefinition]) -> List[Dict] | None:
829
843
  "name": tool.name,
830
844
  "description": tool.description,
831
845
  "parameters": clean_response_schema(tool.schema),
846
+ "strict": True,
832
847
  },
833
848
  }
834
849
  for tool in tools
@@ -385,6 +385,7 @@ class ChatEvent(Enum):
385
385
  USAGE = "usage"
386
386
  END_RESPONSE = "end_response"
387
387
  INTERRUPT = "interrupt"
388
+ END_EVENT = "␃🔚␗"
388
389
 
389
390
 
390
391
  def message_to_log(
@@ -44,7 +44,7 @@ async def operate_environment(
44
44
  query_files: str = None, # TODO: Handle query files
45
45
  cancellation_event: Optional[asyncio.Event] = None,
46
46
  interrupt_queue: Optional[asyncio.Queue] = None,
47
- abort_message: Optional[str] = "␃🔚␗",
47
+ abort_message: Optional[str] = ChatEvent.END_EVENT.value,
48
48
  tracer: dict = {},
49
49
  ):
50
50
  response, user_input_message = None, None
@@ -138,7 +138,7 @@ async def get_agent_by_conversation(
138
138
  else:
139
139
  agent_chat_model = None
140
140
 
141
- has_files = agent.fileobject_set.exists()
141
+ has_files = await agent.fileobject_set.aexists()
142
142
 
143
143
  agents_packet = {
144
144
  "slug": agent.slug,
khoj/routers/api_chat.py CHANGED
@@ -4,6 +4,7 @@ import json
4
4
  import logging
5
5
  import time
6
6
  import uuid
7
+ from dataclasses import dataclass
7
8
  from datetime import datetime
8
9
  from functools import partial
9
10
  from typing import Any, Dict, List, Optional
@@ -703,7 +704,6 @@ async def event_generator(
703
704
  train_of_thought = []
704
705
  cancellation_event = asyncio.Event()
705
706
  child_interrupt_queue: asyncio.Queue = asyncio.Queue(maxsize=10)
706
- event_delimiter = "␃🔚␗"
707
707
 
708
708
  tracer: dict = {
709
709
  "mid": turn_id,
@@ -790,7 +790,7 @@ async def event_generator(
790
790
 
791
791
  # Check if any interrupt query is received
792
792
  if interrupt_query := get_message_from_queue(parent_interrupt_queue):
793
- if interrupt_query == event_delimiter:
793
+ if interrupt_query == ChatEvent.END_EVENT.value:
794
794
  cancellation_event.set()
795
795
  logger.debug(f"Chat cancelled by user {user} via interrupt queue.")
796
796
  else:
@@ -871,7 +871,7 @@ async def event_generator(
871
871
  )
872
872
  finally:
873
873
  if not cancellation_event.is_set():
874
- yield event_delimiter
874
+ yield ChatEvent.END_EVENT.value
875
875
  # Cancel the disconnect monitor task if it is still running
876
876
  if cancellation_event.is_set() or event_type == ChatEvent.END_RESPONSE:
877
877
  await cancel_disconnect_monitor()
@@ -1043,7 +1043,7 @@ async def event_generator(
1043
1043
  tracer=tracer,
1044
1044
  cancellation_event=cancellation_event,
1045
1045
  interrupt_queue=child_interrupt_queue,
1046
- abort_message=event_delimiter,
1046
+ abort_message=ChatEvent.END_EVENT.value,
1047
1047
  ):
1048
1048
  if isinstance(research_result, ResearchIteration):
1049
1049
  if research_result.summarizedResult:
@@ -1397,6 +1397,7 @@ async def event_generator(
1397
1397
  )
1398
1398
 
1399
1399
  full_response = ""
1400
+ message_start = True
1400
1401
  async for item in llm_response:
1401
1402
  # Should not happen with async generator. Skip.
1402
1403
  if item is None or not isinstance(item, ResponseWithThought):
@@ -1410,10 +1411,11 @@ async def event_generator(
1410
1411
  async for result in send_event(ChatEvent.THOUGHT, item.thought):
1411
1412
  yield result
1412
1413
  continue
1413
-
1414
1414
  # Start sending response
1415
- async for result in send_event(ChatEvent.START_LLM_RESPONSE, ""):
1416
- yield result
1415
+ elif message_start:
1416
+ message_start = False
1417
+ async for result in send_event(ChatEvent.START_LLM_RESPONSE, ""):
1418
+ yield result
1417
1419
 
1418
1420
  try:
1419
1421
  async for result in send_event(ChatEvent.MESSAGE, message):
@@ -1423,6 +1425,13 @@ async def event_generator(
1423
1425
  logger.warning(f"Error during streaming. Stopping send: {e}")
1424
1426
  break
1425
1427
 
1428
+ # Check if the user has disconnected
1429
+ if cancellation_event.is_set():
1430
+ logger.debug(f"Stopping LLM response to user {user} on {common.client} client.")
1431
+ # Cancel the disconnect monitor task if it is still running
1432
+ await cancel_disconnect_monitor()
1433
+ return
1434
+
1426
1435
  # Save conversation once finish streaming
1427
1436
  asyncio.create_task(
1428
1437
  save_to_conversation_log(
@@ -1448,16 +1457,16 @@ async def event_generator(
1448
1457
  )
1449
1458
 
1450
1459
  # Signal end of LLM response after the loop finishes
1451
- if not cancellation_event.is_set():
1452
- async for result in send_event(ChatEvent.END_LLM_RESPONSE, ""):
1453
- yield result
1454
- # Send Usage Metadata once llm interactions are complete
1455
- if tracer.get("usage"):
1456
- async for event in send_event(ChatEvent.USAGE, tracer.get("usage")):
1457
- yield event
1458
- async for result in send_event(ChatEvent.END_RESPONSE, ""):
1459
- yield result
1460
- logger.debug("Finished streaming response")
1460
+ async for result in send_event(ChatEvent.END_LLM_RESPONSE, ""):
1461
+ yield result
1462
+
1463
+ # Send Usage Metadata once llm interactions are complete
1464
+ if tracer.get("usage"):
1465
+ async for event in send_event(ChatEvent.USAGE, tracer.get("usage")):
1466
+ yield event
1467
+ async for result in send_event(ChatEvent.END_RESPONSE, ""):
1468
+ yield result
1469
+ logger.debug("Finished streaming response")
1461
1470
 
1462
1471
  # Cancel the disconnect monitor task if it is still running
1463
1472
  await cancel_disconnect_monitor()
@@ -1509,8 +1518,7 @@ async def chat_ws(
1509
1518
  if data.get("type") == "interrupt":
1510
1519
  if current_task and not current_task.done():
1511
1520
  # Send interrupt signal to the ongoing task
1512
- abort_message = "␃🔚␗"
1513
- await interrupt_queue.put(data.get("query") or abort_message)
1521
+ await interrupt_queue.put(data.get("query") or ChatEvent.END_EVENT.value)
1514
1522
  logger.info(
1515
1523
  f"Interrupt signal sent to ongoing task for user {websocket.scope['user'].object.id} with query: {data.get('query')}"
1516
1524
  )
@@ -1572,6 +1580,37 @@ async def process_chat_request(
1572
1580
  interrupt_queue: asyncio.Queue,
1573
1581
  ):
1574
1582
  """Process a single chat request with interrupt support"""
1583
+
1584
+ # Server-side message buffering for better streaming performance
1585
+ @dataclass
1586
+ class MessageBuffer:
1587
+ """Buffer for managing streamed chat messages with timing control."""
1588
+
1589
+ content: str = ""
1590
+ timeout: Optional[asyncio.Task] = None
1591
+ last_flush: float = 0.0
1592
+
1593
+ def __post_init__(self):
1594
+ """Initialize last_flush with current time if not provided."""
1595
+ if self.last_flush == 0.0:
1596
+ self.last_flush = time.perf_counter()
1597
+
1598
+ message_buffer = MessageBuffer()
1599
+ BUFFER_FLUSH_INTERVAL = 0.1 # 100ms buffer interval
1600
+ BUFFER_MAX_SIZE = 512 # Flush if buffer reaches this size
1601
+
1602
+ async def flush_message_buffer():
1603
+ """Flush the accumulated message buffer to the client"""
1604
+ nonlocal message_buffer
1605
+ if message_buffer.content:
1606
+ buffered_content = message_buffer.content
1607
+ message_buffer.content = ""
1608
+ message_buffer.last_flush = time.perf_counter()
1609
+ if message_buffer.timeout:
1610
+ message_buffer.timeout.cancel()
1611
+ message_buffer.timeout = None
1612
+ yield buffered_content
1613
+
1575
1614
  try:
1576
1615
  # Since we are using websockets, we can ignore the stream parameter and always stream
1577
1616
  response_iterator = event_generator(
@@ -1583,7 +1622,43 @@ async def process_chat_request(
1583
1622
  interrupt_queue,
1584
1623
  )
1585
1624
  async for event in response_iterator:
1586
- await websocket.send_text(event)
1625
+ if event.startswith("{") and event.endswith("}"):
1626
+ evt_json = json.loads(event)
1627
+ if evt_json["type"] == ChatEvent.END_LLM_RESPONSE.value:
1628
+ # Flush remaining buffer content on end llm response event
1629
+ chunks = "".join([chunk async for chunk in flush_message_buffer()])
1630
+ await websocket.send_text(chunks)
1631
+ await websocket.send_text(ChatEvent.END_EVENT.value)
1632
+ await websocket.send_text(event)
1633
+ await websocket.send_text(ChatEvent.END_EVENT.value)
1634
+ elif event != ChatEvent.END_EVENT.value:
1635
+ # Buffer MESSAGE events for better streaming performance
1636
+ message_buffer.content += str(event)
1637
+
1638
+ # Flush if buffer is too large or enough time has passed
1639
+ current_time = time.perf_counter()
1640
+ should_flush_time = (current_time - message_buffer.last_flush) >= BUFFER_FLUSH_INTERVAL
1641
+ should_flush_size = len(message_buffer.content) >= BUFFER_MAX_SIZE
1642
+
1643
+ if should_flush_size or should_flush_time:
1644
+ chunks = "".join([chunk async for chunk in flush_message_buffer()])
1645
+ await websocket.send_text(chunks)
1646
+ await websocket.send_text(ChatEvent.END_EVENT.value)
1647
+ else:
1648
+ # Cancel any previous timeout tasks to reset the flush timer
1649
+ if message_buffer.timeout:
1650
+ message_buffer.timeout.cancel()
1651
+
1652
+ async def delayed_flush():
1653
+ """Flush message buffer if no new messages arrive within debounce interval."""
1654
+ await asyncio.sleep(BUFFER_FLUSH_INTERVAL)
1655
+ # Check if there's still content to flush
1656
+ chunks = "".join([chunk async for chunk in flush_message_buffer()])
1657
+ await websocket.send_text(chunks)
1658
+ await websocket.send_text(ChatEvent.END_EVENT.value)
1659
+
1660
+ # Flush buffer if no new messages arrive within debounce interval
1661
+ message_buffer.timeout = asyncio.create_task(delayed_flush())
1587
1662
  except asyncio.CancelledError:
1588
1663
  logger.debug(f"Chat request cancelled for user {websocket.scope['user'].object.id}")
1589
1664
  raise
khoj/routers/helpers.py CHANGED
@@ -2099,7 +2099,8 @@ class WebSocketConnectionManager:
2099
2099
  user=user, slug__startswith=self.connection_slug_prefix
2100
2100
  ).acount()
2101
2101
 
2102
- return active_connections < max_connections
2102
+ # Restrict max active connections per user in production
2103
+ return active_connections < max_connections or state.anonymous_mode or in_debug_mode()
2103
2104
 
2104
2105
  async def register_connection(self, user: KhojUser, connection_id: str) -> None:
2105
2106
  """Register a new WebSocket connection."""
@@ -2616,7 +2617,6 @@ class MessageProcessor:
2616
2617
 
2617
2618
  async def read_chat_stream(response_iterator: AsyncGenerator[str, None]) -> Dict[str, Any]:
2618
2619
  processor = MessageProcessor()
2619
- event_delimiter = "␃🔚␗"
2620
2620
  buffer = ""
2621
2621
 
2622
2622
  async for chunk in response_iterator:
@@ -2624,9 +2624,9 @@ async def read_chat_stream(response_iterator: AsyncGenerator[str, None]) -> Dict
2624
2624
  buffer += chunk
2625
2625
 
2626
2626
  # Once the buffer contains a complete event
2627
- while event_delimiter in buffer:
2627
+ while ChatEvent.END_EVENT.value in buffer:
2628
2628
  # Extract the event from the buffer
2629
- event, buffer = buffer.split(event_delimiter, 1)
2629
+ event, buffer = buffer.split(ChatEvent.END_EVENT.value, 1)
2630
2630
  # Process the event
2631
2631
  if event:
2632
2632
  processor.process_message_chunk(event)
khoj/routers/research.py CHANGED
@@ -224,7 +224,7 @@ async def research(
224
224
  query_files: str = None,
225
225
  cancellation_event: Optional[asyncio.Event] = None,
226
226
  interrupt_queue: Optional[asyncio.Queue] = None,
227
- abort_message: str = "␃🔚␗",
227
+ abort_message: str = ChatEvent.END_EVENT.value,
228
228
  ):
229
229
  max_document_searches = 7
230
230
  max_online_searches = 3
@@ -139,3 +139,8 @@ def automations_config_page(
139
139
  @web_client.get("/.well-known/assetlinks.json", response_class=FileResponse)
140
140
  def assetlinks(request: Request):
141
141
  return FileResponse(constants.assetlinks_file_path)
142
+
143
+
144
+ @web_client.get("/server/error", response_class=HTMLResponse)
145
+ def server_error_page(request: Request):
146
+ return templates.TemplateResponse("error.html", context={"request": request})
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: khoj
3
- Version: 2.0.0b10
3
+ Version: 2.0.0b11.dev15
4
4
  Summary: Your Second Brain
5
5
  Project-URL: Homepage, https://khoj.dev
6
6
  Project-URL: Documentation, https://docs.khoj.dev