khoj 2.0.0b13.dev19__py3-none-any.whl → 2.0.0b13.dev23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. khoj/interface/compiled/404/index.html +2 -2
  2. khoj/interface/compiled/_next/static/chunks/{2327-fe87dd989d71d0eb.js → 2327-438aaec1657c5ada.js} +1 -1
  3. khoj/interface/compiled/_next/static/chunks/{3260-43d3019b92c315bb.js → 3260-82d2521fab032ff1.js} +1 -1
  4. khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
  5. khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
  6. khoj/interface/compiled/_next/static/chunks/app/chat/{page-9a75d7369f2a7cd2.js → page-dfcc1e8e2ad62873.js} +1 -1
  7. khoj/interface/compiled/_next/static/chunks/{webpack-d60b0c57a6c38d0f.js → webpack-5393aad3d824e0cb.js} +1 -1
  8. khoj/interface/compiled/_next/static/css/{2945c4a857922f3b.css → c34713c98384ee87.css} +1 -1
  9. khoj/interface/compiled/agents/index.html +2 -2
  10. khoj/interface/compiled/agents/index.txt +2 -2
  11. khoj/interface/compiled/automations/index.html +2 -2
  12. khoj/interface/compiled/automations/index.txt +3 -3
  13. khoj/interface/compiled/chat/index.html +2 -2
  14. khoj/interface/compiled/chat/index.txt +2 -2
  15. khoj/interface/compiled/index.html +2 -2
  16. khoj/interface/compiled/index.txt +2 -2
  17. khoj/interface/compiled/search/index.html +2 -2
  18. khoj/interface/compiled/search/index.txt +2 -2
  19. khoj/interface/compiled/settings/index.html +2 -2
  20. khoj/interface/compiled/settings/index.txt +4 -4
  21. khoj/interface/compiled/share/chat/index.html +2 -2
  22. khoj/interface/compiled/share/chat/index.txt +2 -2
  23. khoj/processor/conversation/openai/gpt.py +65 -28
  24. khoj/processor/conversation/openai/utils.py +355 -18
  25. khoj/processor/conversation/prompts.py +11 -5
  26. khoj/processor/conversation/utils.py +3 -0
  27. khoj/routers/helpers.py +18 -4
  28. khoj/utils/constants.py +3 -0
  29. {khoj-2.0.0b13.dev19.dist-info → khoj-2.0.0b13.dev23.dist-info}/METADATA +1 -1
  30. {khoj-2.0.0b13.dev19.dist-info → khoj-2.0.0b13.dev23.dist-info}/RECORD +42 -42
  31. khoj/interface/compiled/_next/static/chunks/app/agents/layout-4e2a134ec26aa606.js +0 -1
  32. khoj/interface/compiled/_next/static/chunks/app/chat/layout-ad4d1792ab1a4108.js +0 -1
  33. /khoj/interface/compiled/_next/static/{N-GdBSXoYe-DuObnbXVRO → Q7tm150g44Fs4H1CGytNf}/_buildManifest.js +0 -0
  34. /khoj/interface/compiled/_next/static/{N-GdBSXoYe-DuObnbXVRO → Q7tm150g44Fs4H1CGytNf}/_ssgManifest.js +0 -0
  35. /khoj/interface/compiled/_next/static/chunks/{1327-511bb0a862efce80.js → 1327-e254819a9172cfa7.js} +0 -0
  36. /khoj/interface/compiled/_next/static/chunks/{1915-fbfe167c84ad60c5.js → 1915-5c6508f6ebb62a30.js} +0 -0
  37. /khoj/interface/compiled/_next/static/chunks/{2117-e78b6902ad6f75ec.js → 2117-080746c8e170c81a.js} +0 -0
  38. /khoj/interface/compiled/_next/static/chunks/{2939-4d4084c5b888b960.js → 2939-4af3fd24b8ffc9ad.js} +0 -0
  39. /khoj/interface/compiled/_next/static/chunks/{4447-d6cf93724d57e34b.js → 4447-cd95608f8e93e711.js} +0 -0
  40. /khoj/interface/compiled/_next/static/chunks/{8667-4b7790573b08c50d.js → 8667-50b03a89e82e0ba7.js} +0 -0
  41. /khoj/interface/compiled/_next/static/chunks/{9139-ce1ae935dac9c871.js → 9139-8ac4d9feb10f8869.js} +0 -0
  42. {khoj-2.0.0b13.dev19.dist-info → khoj-2.0.0b13.dev23.dist-info}/WHEEL +0 -0
  43. {khoj-2.0.0b13.dev19.dist-info → khoj-2.0.0b13.dev23.dist-info}/entry_points.txt +0 -0
  44. {khoj-2.0.0b13.dev19.dist-info → khoj-2.0.0b13.dev23.dist-info}/licenses/LICENSE +0 -0
@@ -21,6 +21,8 @@ from openai.types.chat.chat_completion_chunk import (
21
21
  Choice,
22
22
  ChoiceDelta,
23
23
  )
24
+ from openai.types.responses import Response as OpenAIResponse
25
+ from openai.types.responses import ResponseFunctionToolCall, ResponseReasoningItem
24
26
  from pydantic import BaseModel
25
27
  from tenacity import (
26
28
  before_sleep_log,
@@ -53,6 +55,26 @@ openai_clients: Dict[str, openai.OpenAI] = {}
53
55
  openai_async_clients: Dict[str, openai.AsyncOpenAI] = {}
54
56
 
55
57
 
58
+ def _extract_text_for_instructions(content: Union[str, List, Dict, None]) -> str:
59
+ """Extract plain text from a message content suitable for Responses API instructions."""
60
+ if content is None:
61
+ return ""
62
+ if isinstance(content, str):
63
+ return content
64
+ if isinstance(content, list):
65
+ texts: List[str] = []
66
+ for part in content:
67
+ if isinstance(part, dict) and part.get("type") == "input_text" and part.get("text"):
68
+ texts.append(str(part.get("text")))
69
+ return "\n\n".join(texts)
70
+ if isinstance(content, dict):
71
+ # If a single part dict was passed
72
+ if content.get("type") == "input_text" and content.get("text"):
73
+ return str(content.get("text"))
74
+ # Fallback to string conversion
75
+ return str(content)
76
+
77
+
56
78
  @retry(
57
79
  retry=(
58
80
  retry_if_exception_type(openai._exceptions.APITimeoutError)
@@ -390,6 +412,287 @@ async def chat_completion_with_backoff(
390
412
  commit_conversation_trace(messages, aggregated_response, tracer)
391
413
 
392
414
 
415
+ @retry(
416
+ retry=(
417
+ retry_if_exception_type(openai._exceptions.APITimeoutError)
418
+ | retry_if_exception_type(openai._exceptions.APIError)
419
+ | retry_if_exception_type(openai._exceptions.APIConnectionError)
420
+ | retry_if_exception_type(openai._exceptions.RateLimitError)
421
+ | retry_if_exception_type(openai._exceptions.APIStatusError)
422
+ | retry_if_exception_type(ValueError)
423
+ ),
424
+ wait=wait_random_exponential(min=1, max=10),
425
+ stop=stop_after_attempt(3),
426
+ before_sleep=before_sleep_log(logger, logging.DEBUG),
427
+ reraise=True,
428
+ )
429
+ def responses_completion_with_backoff(
430
+ messages: List[ChatMessage],
431
+ model_name: str,
432
+ temperature=0.6,
433
+ openai_api_key=None,
434
+ api_base_url=None,
435
+ deepthought: bool = False,
436
+ model_kwargs: dict = {},
437
+ tracer: dict = {},
438
+ ) -> ResponseWithThought:
439
+ """
440
+ Synchronous helper using the OpenAI Responses API in streaming mode under the hood.
441
+ Aggregates streamed deltas and returns a ResponseWithThought.
442
+ """
443
+ client_key = f"{openai_api_key}--{api_base_url}"
444
+ client = openai_clients.get(client_key)
445
+ if not client:
446
+ client = get_openai_client(openai_api_key, api_base_url)
447
+ openai_clients[client_key] = client
448
+
449
+ formatted_messages = format_message_for_api(messages, api_base_url)
450
+ # Move the first system message to Responses API instructions
451
+ instructions: Optional[str] = None
452
+ if formatted_messages and formatted_messages[0].get("role") == "system":
453
+ instructions = _extract_text_for_instructions(formatted_messages[0].get("content")) or None
454
+ formatted_messages = formatted_messages[1:]
455
+
456
+ model_kwargs = deepcopy(model_kwargs)
457
+ model_kwargs["top_p"] = model_kwargs.get("top_p", 0.95)
458
+ # Configure thinking for openai reasoning models
459
+ if is_openai_reasoning_model(model_name, api_base_url):
460
+ temperature = 1
461
+ reasoning_effort = "medium" if deepthought else "low"
462
+ model_kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
463
+ # Remove unsupported params for reasoning models
464
+ model_kwargs.pop("top_p", None)
465
+ model_kwargs.pop("stop", None)
466
+
467
+ read_timeout = 300 if is_local_api(api_base_url) else 60
468
+
469
+ # Stream and aggregate
470
+ model_response: OpenAIResponse = client.responses.create(
471
+ input=formatted_messages,
472
+ instructions=instructions,
473
+ model=model_name,
474
+ temperature=temperature,
475
+ timeout=httpx.Timeout(30, read=read_timeout), # type: ignore
476
+ store=False,
477
+ include=["reasoning.encrypted_content"],
478
+ **model_kwargs,
479
+ )
480
+ if not model_response or not isinstance(model_response, OpenAIResponse) or not model_response.output:
481
+ raise ValueError(f"Empty response returned by {model_name}.")
482
+
483
+ raw_content = [item.model_dump() for item in model_response.output]
484
+ aggregated_text = model_response.output_text
485
+ thoughts = ""
486
+ tool_calls: List[ToolCall] = []
487
+ for item in model_response.output:
488
+ if isinstance(item, ResponseFunctionToolCall):
489
+ tool_calls.append(ToolCall(name=item.name, args=json.loads(item.arguments), id=item.call_id))
490
+ elif isinstance(item, ResponseReasoningItem):
491
+ thoughts = "\n\n".join([summary.text for summary in item.summary])
492
+
493
+ if tool_calls:
494
+ if thoughts and aggregated_text:
495
+ # If there are tool calls, aggregate thoughts and responses into thoughts
496
+ thoughts = "\n".join([f"*{line.strip()}*" for line in thoughts.splitlines() if line.strip()])
497
+ thoughts = f"{thoughts}\n\n{aggregated_text}"
498
+ else:
499
+ thoughts = thoughts or aggregated_text
500
+ # Json dump tool calls into aggregated response
501
+ aggregated_text = json.dumps([tool_call.__dict__ for tool_call in tool_calls])
502
+
503
+ # Usage/cost tracking
504
+ input_tokens = model_response.usage.input_tokens if model_response and model_response.usage else 0
505
+ output_tokens = model_response.usage.output_tokens if model_response and model_response.usage else 0
506
+ cost = 0
507
+ cache_read_tokens = 0
508
+ if model_response and model_response.usage and model_response.usage.input_tokens_details:
509
+ cache_read_tokens = model_response.usage.input_tokens_details.cached_tokens
510
+ input_tokens -= cache_read_tokens
511
+ tracer["usage"] = get_chat_usage_metrics(
512
+ model_name, input_tokens, output_tokens, cache_read_tokens, usage=tracer.get("usage"), cost=cost
513
+ )
514
+
515
+ # Validate final aggregated text (either message or tool-calls JSON)
516
+ if is_none_or_empty(aggregated_text):
517
+ logger.warning(f"No response by {model_name}\nLast Message by {messages[-1].role}: {messages[-1].content}.")
518
+ raise ValueError(f"Empty or no response by {model_name} over Responses API. Retry if needed.")
519
+
520
+ # Trace
521
+ tracer["chat_model"] = model_name
522
+ tracer["temperature"] = temperature
523
+ if is_promptrace_enabled():
524
+ commit_conversation_trace(messages, aggregated_text, tracer)
525
+
526
+ return ResponseWithThought(text=aggregated_text, thought=thoughts, raw_content=raw_content)
527
+
528
+
529
+ @retry(
530
+ retry=(
531
+ retry_if_exception_type(openai._exceptions.APITimeoutError)
532
+ | retry_if_exception_type(openai._exceptions.APIError)
533
+ | retry_if_exception_type(openai._exceptions.APIConnectionError)
534
+ | retry_if_exception_type(openai._exceptions.RateLimitError)
535
+ | retry_if_exception_type(openai._exceptions.APIStatusError)
536
+ | retry_if_exception_type(ValueError)
537
+ ),
538
+ wait=wait_exponential(multiplier=1, min=4, max=10),
539
+ stop=stop_after_attempt(3),
540
+ before_sleep=before_sleep_log(logger, logging.WARNING),
541
+ reraise=False,
542
+ )
543
+ async def responses_chat_completion_with_backoff(
544
+ messages: list[ChatMessage],
545
+ model_name: str,
546
+ temperature,
547
+ openai_api_key=None,
548
+ api_base_url=None,
549
+ deepthought=False, # Unused; parity with legacy signature
550
+ tracer: dict = {},
551
+ ) -> AsyncGenerator[ResponseWithThought, None]:
552
+ """
553
+ Async streaming helper using the OpenAI Responses API.
554
+ Yields ResponseWithThought chunks as text/think deltas arrive.
555
+ """
556
+ client_key = f"{openai_api_key}--{api_base_url}"
557
+ client = openai_async_clients.get(client_key)
558
+ if not client:
559
+ client = get_openai_async_client(openai_api_key, api_base_url)
560
+ openai_async_clients[client_key] = client
561
+
562
+ formatted_messages = format_message_for_api(messages, api_base_url)
563
+ # Move the first system message to Responses API instructions
564
+ instructions: Optional[str] = None
565
+ if formatted_messages and formatted_messages[0].get("role") == "system":
566
+ instructions = _extract_text_for_instructions(formatted_messages[0].get("content")) or None
567
+ formatted_messages = formatted_messages[1:]
568
+
569
+ model_kwargs: dict = {}
570
+ model_kwargs["top_p"] = model_kwargs.get("top_p", 0.95)
571
+ # Configure thinking for openai reasoning models
572
+ if is_openai_reasoning_model(model_name, api_base_url):
573
+ temperature = 1
574
+ reasoning_effort = "medium" if deepthought else "low"
575
+ model_kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
576
+ # Remove unsupported params for reasoning models
577
+ model_kwargs.pop("top_p", None)
578
+ model_kwargs.pop("stop", None)
579
+
580
+ read_timeout = 300 if is_local_api(api_base_url) else 60
581
+
582
+ aggregated_text = ""
583
+ last_final: Optional[OpenAIResponse] = None
584
+ # Tool call assembly buffers
585
+ tool_calls_args: Dict[str, str] = {}
586
+ tool_calls_name: Dict[str, str] = {}
587
+ tool_call_order: List[str] = []
588
+
589
+ async with client.responses.stream(
590
+ input=formatted_messages,
591
+ instructions=instructions,
592
+ model=model_name,
593
+ temperature=temperature,
594
+ timeout=httpx.Timeout(30, read=read_timeout),
595
+ **model_kwargs,
596
+ ) as stream: # type: ignore
597
+ async for event in stream: # type: ignore
598
+ et = getattr(event, "type", "")
599
+ if et == "response.output_text.delta":
600
+ delta = getattr(event, "delta", "") or getattr(event, "output_text", "")
601
+ if delta:
602
+ aggregated_text += delta
603
+ yield ResponseWithThought(text=delta)
604
+ elif et == "response.reasoning.delta":
605
+ delta = getattr(event, "delta", "")
606
+ if delta:
607
+ yield ResponseWithThought(thought=delta)
608
+ elif et == "response.tool_call.created":
609
+ item = getattr(event, "item", None)
610
+ tool_id = (
611
+ getattr(event, "id", None)
612
+ or getattr(event, "tool_call_id", None)
613
+ or (getattr(item, "id", None) if item is not None else None)
614
+ )
615
+ name = (
616
+ getattr(event, "name", None)
617
+ or (getattr(item, "name", None) if item is not None else None)
618
+ or getattr(event, "tool_name", None)
619
+ )
620
+ if tool_id:
621
+ if tool_id not in tool_calls_args:
622
+ tool_calls_args[tool_id] = ""
623
+ tool_call_order.append(tool_id)
624
+ if name:
625
+ tool_calls_name[tool_id] = name
626
+ elif et == "response.tool_call.delta":
627
+ tool_id = getattr(event, "id", None) or getattr(event, "tool_call_id", None)
628
+ delta = getattr(event, "delta", None)
629
+ if hasattr(delta, "arguments"):
630
+ arg_delta = getattr(delta, "arguments", "")
631
+ else:
632
+ arg_delta = delta if isinstance(delta, str) else getattr(event, "arguments", "")
633
+ if tool_id and arg_delta:
634
+ tool_calls_args[tool_id] = tool_calls_args.get(tool_id, "") + arg_delta
635
+ if tool_id not in tool_call_order:
636
+ tool_call_order.append(tool_id)
637
+ elif et == "response.tool_call.completed":
638
+ item = getattr(event, "item", None)
639
+ tool_id = (
640
+ getattr(event, "id", None)
641
+ or getattr(event, "tool_call_id", None)
642
+ or (getattr(item, "id", None) if item is not None else None)
643
+ )
644
+ args_final = None
645
+ if item is not None:
646
+ args_final = getattr(item, "arguments", None) or getattr(item, "args", None)
647
+ if tool_id and args_final:
648
+ tool_calls_args[tool_id] = args_final if isinstance(args_final, str) else json.dumps(args_final)
649
+ if tool_id not in tool_call_order:
650
+ tool_call_order.append(tool_id)
651
+ # ignore other events for now
652
+ last_final = await stream.get_final_response()
653
+
654
+ # Usage/cost tracking after stream ends
655
+ input_tokens = last_final.usage.input_tokens if last_final and last_final.usage else 0
656
+ output_tokens = last_final.usage.output_tokens if last_final and last_final.usage else 0
657
+ cost = 0
658
+ tracer["usage"] = get_chat_usage_metrics(
659
+ model_name, input_tokens, output_tokens, usage=tracer.get("usage"), cost=cost
660
+ )
661
+
662
+ # If there are tool calls, package them into aggregated text for tracing parity
663
+ if tool_call_order:
664
+ packaged_tool_calls: List[ToolCall] = []
665
+ for tool_id in tool_call_order:
666
+ name = tool_calls_name.get(tool_id) or ""
667
+ args_str = tool_calls_args.get(tool_id, "")
668
+ try:
669
+ args = json.loads(args_str) if isinstance(args_str, str) else args_str
670
+ except Exception:
671
+ logger.warning(f"Failed to parse tool call arguments for {tool_id}: {args_str}")
672
+ args = {}
673
+ packaged_tool_calls.append(ToolCall(name=name, args=args, id=tool_id))
674
+ # Move any text into trace thought
675
+ tracer_text = aggregated_text
676
+ aggregated_text = json.dumps([tc.__dict__ for tc in packaged_tool_calls])
677
+ # Save for trace below
678
+ if tracer_text:
679
+ tracer.setdefault("_responses_stream_text", tracer_text)
680
+
681
+ if is_none_or_empty(aggregated_text):
682
+ logger.warning(f"No response by {model_name}\nLast Message by {messages[-1].role}: {messages[-1].content}.")
683
+ raise ValueError(f"Empty or no response by {model_name} over Responses API. Retry if needed.")
684
+
685
+ tracer["chat_model"] = model_name
686
+ tracer["temperature"] = temperature
687
+ if is_promptrace_enabled():
688
+ # If tool-calls were present, include any streamed text in the trace thought
689
+ trace_payload = aggregated_text
690
+ if tracer.get("_responses_stream_text"):
691
+ thoughts = tracer.pop("_responses_stream_text")
692
+ trace_payload = thoughts
693
+ commit_conversation_trace(messages, trace_payload, tracer)
694
+
695
+
393
696
  def get_structured_output_support(model_name: str, api_base_url: str = None) -> StructuredOutputSupport:
394
697
  if model_name.startswith("deepseek-reasoner"):
395
698
  return StructuredOutputSupport.NONE
@@ -412,6 +715,12 @@ def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -
412
715
  # Handle tool call and tool result message types
413
716
  message_type = message.additional_kwargs.get("message_type")
414
717
  if message_type == "tool_call":
718
+ if is_openai_api(api_base_url):
719
+ for part in message.content:
720
+ if "status" in part:
721
+ part.pop("status") # Drop unsupported tool call status field
722
+ formatted_messages.extend(message.content)
723
+ continue
415
724
  # Convert tool_call to OpenAI function call format
416
725
  content = []
417
726
  for part in message.content:
@@ -450,14 +759,23 @@ def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -
450
759
  if not tool_call_id:
451
760
  logger.warning(f"Dropping tool result without valid tool_call_id: {part.get('name')}")
452
761
  continue
453
- formatted_messages.append(
454
- {
455
- "role": "tool",
456
- "tool_call_id": tool_call_id,
457
- "name": part.get("name"),
458
- "content": part.get("content"),
459
- }
460
- )
762
+ if is_openai_api(api_base_url):
763
+ formatted_messages.append(
764
+ {
765
+ "type": "function_call_output",
766
+ "call_id": tool_call_id,
767
+ "output": part.get("content"),
768
+ }
769
+ )
770
+ else:
771
+ formatted_messages.append(
772
+ {
773
+ "role": "tool",
774
+ "tool_call_id": tool_call_id,
775
+ "name": part.get("name"),
776
+ "content": part.get("content"),
777
+ }
778
+ )
461
779
  continue
462
780
  if isinstance(message.content, list) and not is_openai_api(api_base_url):
463
781
  assistant_texts = []
@@ -489,6 +807,11 @@ def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -
489
807
  message.content.remove(part)
490
808
  elif part["type"] == "image_url" and not part.get("image_url"):
491
809
  message.content.remove(part)
810
+ # OpenAI models use the Responses API which uses slightly different content types
811
+ if part["type"] == "text":
812
+ part["type"] = "output_text" if message.role == "assistant" else "input_text"
813
+ if part["type"] == "image":
814
+ part["type"] = "output_image" if message.role == "assistant" else "input_image"
492
815
  # If no valid content parts left, remove the message
493
816
  if is_none_or_empty(message.content):
494
817
  messages.remove(message)
@@ -513,7 +836,9 @@ def is_openai_reasoning_model(model_name: str, api_base_url: str = None) -> bool
513
836
  """
514
837
  Check if the model is an OpenAI reasoning model
515
838
  """
516
- return model_name.lower().startswith("o") and is_openai_api(api_base_url)
839
+ return is_openai_api(api_base_url) and (
840
+ model_name.lower().startswith("o") or model_name.lower().startswith("gpt-5")
841
+ )
517
842
 
518
843
 
519
844
  def is_non_streaming_model(model_name: str, api_base_url: str = None) -> bool:
@@ -850,20 +1175,32 @@ def add_qwen_no_think_tag(formatted_messages: List[dict]) -> None:
850
1175
  break
851
1176
 
852
1177
 
853
- def to_openai_tools(tools: List[ToolDefinition]) -> List[Dict] | None:
1178
+ def to_openai_tools(tools: List[ToolDefinition], use_responses_api: bool) -> List[Dict] | None:
854
1179
  "Transform tool definitions from standard format to OpenAI format."
855
- openai_tools = [
856
- {
857
- "type": "function",
858
- "function": {
1180
+ if use_responses_api:
1181
+ openai_tools = [
1182
+ {
1183
+ "type": "function",
859
1184
  "name": tool.name,
860
1185
  "description": tool.description,
861
1186
  "parameters": clean_response_schema(tool.schema),
862
1187
  "strict": True,
863
- },
864
- }
865
- for tool in tools
866
- ]
1188
+ }
1189
+ for tool in tools
1190
+ ]
1191
+ else:
1192
+ openai_tools = [
1193
+ {
1194
+ "type": "function",
1195
+ "function": {
1196
+ "name": tool.name,
1197
+ "description": tool.description,
1198
+ "parameters": clean_response_schema(tool.schema),
1199
+ "strict": True,
1200
+ },
1201
+ }
1202
+ for tool in tools
1203
+ ]
867
1204
 
868
1205
  return openai_tools or None
869
1206
 
@@ -519,12 +519,13 @@ Q: {query}
519
519
 
520
520
  extract_questions_system_prompt = PromptTemplate.from_template(
521
521
  """
522
- You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes.
523
- Construct search queries to retrieve relevant information to answer the user's question.
522
+ You are Khoj, an extremely smart and helpful document search assistant with only the ability to use natural language semantic search to retrieve information from the user's notes.
523
+ Construct upto {max_queries} search queries to retrieve relevant information to answer the user's question.
524
524
  - You will be provided past questions(User), search queries(Assistant) and answers(A) for context.
525
- - Add as much context from the previous questions and answers as required into your search queries.
526
- - Break your search down into multiple search queries from a diverse set of lenses to retrieve all related documents.
527
- - Add date filters to your search queries from questions and answers when required to retrieve the relevant information.
525
+ - You can use context from previous questions and answers to improve your search queries.
526
+ - Break down your search into multiple search queries from a diverse set of lenses to retrieve all related documents. E.g who, what, where, when, why, how.
527
+ - Add date filters to your search queries when required to retrieve the relevant information. This is the only structured query filter you can use.
528
+ - Output 1 concept per query. Do not use boolean operators (OR/AND) to combine queries. They do not work and degrade search quality.
528
529
  - When asked a meta, vague or random questions, search for a variety of broad topics to answer the user's question.
529
530
  {personality_context}
530
531
  What searches will you perform to answer the users question? Respond with a JSON object with the key "queries" mapping to a list of searches you would perform on the user's knowledge base. Just return the queries and nothing else.
@@ -535,22 +536,27 @@ User's Location: {location}
535
536
 
536
537
  Here are some examples of how you can construct search queries to answer the user's question:
537
538
 
539
+ Illustrate - Using diverse perspectives to retrieve all relevant documents
538
540
  User: How was my trip to Cambodia?
539
541
  Assistant: {{"queries": ["How was my trip to Cambodia?", "Angkor Wat temple visit", "Flight to Phnom Penh", "Expenses in Cambodia", "Stay in Cambodia"]}}
540
542
  A: The trip was amazing. You went to the Angkor Wat temple and it was beautiful.
541
543
 
544
+ Illustrate - Combining date filters with natural language queries to retrieve documents in relevant date range
542
545
  User: What national parks did I go to last year?
543
546
  Assistant: {{"queries": ["National park I visited in {last_new_year} dt>='{last_new_year_date}' dt<'{current_new_year_date}'"]}}
544
547
  A: You visited the Grand Canyon and Yellowstone National Park in {last_new_year}.
545
548
 
549
+ Illustrate - Using broad topics to answer meta or vague questions
546
550
  User: How can you help me?
547
551
  Assistant: {{"queries": ["Social relationships", "Physical and mental health", "Education and career", "Personal life goals and habits"]}}
548
552
  A: I can help you live healthier and happier across work and personal life
549
553
 
554
+ Illustrate - Combining location and date in natural language queries with date filters to retrieve relevant documents
550
555
  User: Who all did I meet here yesterday?
551
556
  Assistant: {{"queries": ["Met in {location} on {yesterday_date} dt>='{yesterday_date}' dt<'{current_date}'"]}}
552
557
  A: Yesterday's note mentions your visit to your local beach with Ram and Shyam.
553
558
 
559
+ Illustrate - Combining broad, diverse topics with date filters to answer meta or vague questions
554
560
  User: Share some random, interesting experiences from this month
555
561
  Assistant: {{"queries": ["Exciting travel adventures from {current_month}", "Fun social events dt>='{current_month}-01' dt<'{current_date}'", "Intense emotional experiences in {current_month}"]}}
556
562
  A: You had a great time at the local beach with your friends, attended a music concert and had a deep conversation with your friend, Khalid.
@@ -68,6 +68,9 @@ model_to_prompt_size = {
68
68
  "o3": 60000,
69
69
  "o3-pro": 30000,
70
70
  "o4-mini": 90000,
71
+ "gpt-5-2025-08-07": 120000,
72
+ "gpt-5-mini-2025-08-07": 120000,
73
+ "gpt-5-nano-2025-08-07": 120000,
71
74
  # Google Models
72
75
  "gemini-2.5-flash": 120000,
73
76
  "gemini-2.5-pro": 60000,
khoj/routers/helpers.py CHANGED
@@ -1264,6 +1264,7 @@ async def extract_questions(
1264
1264
  location_data: LocationData = None,
1265
1265
  query_images: Optional[List[str]] = None,
1266
1266
  query_files: str = None,
1267
+ max_queries: int = 5,
1267
1268
  tracer: dict = {},
1268
1269
  ):
1269
1270
  """
@@ -1293,14 +1294,20 @@ async def extract_questions(
1293
1294
  location=location,
1294
1295
  username=username,
1295
1296
  personality_context=personality_context,
1297
+ max_queries=max_queries,
1296
1298
  )
1297
1299
 
1298
1300
  prompt = prompts.extract_questions_user_message.format(text=query, chat_history=chat_history_str)
1299
1301
 
1300
1302
  class DocumentQueries(BaseModel):
1301
- """Choose searches to run on user documents."""
1303
+ """Choose semantic search queries to run on user documents."""
1302
1304
 
1303
- queries: List[str] = Field(..., min_items=1, description="List of search queries to run on user documents.")
1305
+ queries: List[str] = Field(
1306
+ ...,
1307
+ min_length=1,
1308
+ max_length=max_queries,
1309
+ description="List of semantic search queries to run on user documents.",
1310
+ )
1304
1311
 
1305
1312
  raw_response = await send_message_to_model_wrapper(
1306
1313
  system_message=system_prompt,
@@ -2995,7 +3002,7 @@ async def grep_files(
2995
3002
  lines_after = lines_after or 0
2996
3003
 
2997
3004
  try:
2998
- regex = re.compile(regex_pattern, re.IGNORECASE)
3005
+ regex = re.compile(regex_pattern, re.IGNORECASE | re.MULTILINE)
2999
3006
  except re.error as e:
3000
3007
  yield {
3001
3008
  "query": _generate_query(0, 0, path_prefix, regex_pattern, lines_before, lines_after),
@@ -3005,7 +3012,14 @@ async def grep_files(
3005
3012
  return
3006
3013
 
3007
3014
  try:
3008
- file_matches = await FileObjectAdapters.aget_file_objects_by_regex(user, regex_pattern, path_prefix)
3015
+ # Make db pushdown filters more permissive by removing line anchors
3016
+ # The precise line-anchored matching will be done in Python stage
3017
+ db_pattern = regex_pattern
3018
+ db_pattern = re.sub(r"\(\?\w*\)", "", db_pattern) # Remove inline flags like (?i), (?m), (?im)
3019
+ db_pattern = re.sub(r"^\^", "", db_pattern) # Remove ^ at regex pattern start
3020
+ db_pattern = re.sub(r"\$$", "", db_pattern) # Remove $ at regex pattern end
3021
+
3022
+ file_matches = await FileObjectAdapters.aget_file_objects_by_regex(user, db_pattern, path_prefix)
3009
3023
 
3010
3024
  line_matches = []
3011
3025
  for file_object in file_matches:
khoj/utils/constants.py CHANGED
@@ -40,6 +40,9 @@ model_to_cost: Dict[str, Dict[str, float]] = {
40
40
  "o3": {"input": 2.0, "output": 8.00},
41
41
  "o3-pro": {"input": 20.0, "output": 80.00},
42
42
  "o4-mini": {"input": 1.10, "output": 4.40},
43
+ "gpt-5-2025-08-07": {"input": 1.25, "output": 10.00, "cache_read": 0.125},
44
+ "gpt-5-mini-2025-08-07": {"input": 0.25, "output": 2.00, "cache_read": 0.025},
45
+ "gpt-5-nano-2025-08-07": {"input": 0.05, "output": 0.40, "cache_read": 0.005},
43
46
  # Gemini Pricing: https://ai.google.dev/pricing
44
47
  "gemini-1.5-flash": {"input": 0.075, "output": 0.30},
45
48
  "gemini-1.5-flash-002": {"input": 0.075, "output": 0.30},
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: khoj
3
- Version: 2.0.0b13.dev19
3
+ Version: 2.0.0b13.dev23
4
4
  Summary: Your Second Brain
5
5
  Project-URL: Homepage, https://khoj.dev
6
6
  Project-URL: Documentation, https://docs.khoj.dev