khoj 2.0.0b13.dev19__py3-none-any.whl → 2.0.0b13.dev23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/interface/compiled/404/index.html +2 -2
- khoj/interface/compiled/_next/static/chunks/{2327-fe87dd989d71d0eb.js → 2327-438aaec1657c5ada.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{3260-43d3019b92c315bb.js → 3260-82d2521fab032ff1.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/{page-9a75d7369f2a7cd2.js → page-dfcc1e8e2ad62873.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-d60b0c57a6c38d0f.js → webpack-5393aad3d824e0cb.js} +1 -1
- khoj/interface/compiled/_next/static/css/{2945c4a857922f3b.css → c34713c98384ee87.css} +1 -1
- khoj/interface/compiled/agents/index.html +2 -2
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +2 -2
- khoj/interface/compiled/automations/index.txt +3 -3
- khoj/interface/compiled/chat/index.html +2 -2
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/index.html +2 -2
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +2 -2
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +2 -2
- khoj/interface/compiled/settings/index.txt +4 -4
- khoj/interface/compiled/share/chat/index.html +2 -2
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/openai/gpt.py +65 -28
- khoj/processor/conversation/openai/utils.py +355 -18
- khoj/processor/conversation/prompts.py +11 -5
- khoj/processor/conversation/utils.py +3 -0
- khoj/routers/helpers.py +18 -4
- khoj/utils/constants.py +3 -0
- {khoj-2.0.0b13.dev19.dist-info → khoj-2.0.0b13.dev23.dist-info}/METADATA +1 -1
- {khoj-2.0.0b13.dev19.dist-info → khoj-2.0.0b13.dev23.dist-info}/RECORD +42 -42
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-4e2a134ec26aa606.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-ad4d1792ab1a4108.js +0 -1
- /khoj/interface/compiled/_next/static/{N-GdBSXoYe-DuObnbXVRO → Q7tm150g44Fs4H1CGytNf}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{N-GdBSXoYe-DuObnbXVRO → Q7tm150g44Fs4H1CGytNf}/_ssgManifest.js +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1327-511bb0a862efce80.js → 1327-e254819a9172cfa7.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1915-fbfe167c84ad60c5.js → 1915-5c6508f6ebb62a30.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{2117-e78b6902ad6f75ec.js → 2117-080746c8e170c81a.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{2939-4d4084c5b888b960.js → 2939-4af3fd24b8ffc9ad.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{4447-d6cf93724d57e34b.js → 4447-cd95608f8e93e711.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{8667-4b7790573b08c50d.js → 8667-50b03a89e82e0ba7.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{9139-ce1ae935dac9c871.js → 9139-8ac4d9feb10f8869.js} +0 -0
- {khoj-2.0.0b13.dev19.dist-info → khoj-2.0.0b13.dev23.dist-info}/WHEEL +0 -0
- {khoj-2.0.0b13.dev19.dist-info → khoj-2.0.0b13.dev23.dist-info}/entry_points.txt +0 -0
- {khoj-2.0.0b13.dev19.dist-info → khoj-2.0.0b13.dev23.dist-info}/licenses/LICENSE +0 -0
@@ -21,6 +21,8 @@ from openai.types.chat.chat_completion_chunk import (
|
|
21
21
|
Choice,
|
22
22
|
ChoiceDelta,
|
23
23
|
)
|
24
|
+
from openai.types.responses import Response as OpenAIResponse
|
25
|
+
from openai.types.responses import ResponseFunctionToolCall, ResponseReasoningItem
|
24
26
|
from pydantic import BaseModel
|
25
27
|
from tenacity import (
|
26
28
|
before_sleep_log,
|
@@ -53,6 +55,26 @@ openai_clients: Dict[str, openai.OpenAI] = {}
|
|
53
55
|
openai_async_clients: Dict[str, openai.AsyncOpenAI] = {}
|
54
56
|
|
55
57
|
|
58
|
+
def _extract_text_for_instructions(content: Union[str, List, Dict, None]) -> str:
|
59
|
+
"""Extract plain text from a message content suitable for Responses API instructions."""
|
60
|
+
if content is None:
|
61
|
+
return ""
|
62
|
+
if isinstance(content, str):
|
63
|
+
return content
|
64
|
+
if isinstance(content, list):
|
65
|
+
texts: List[str] = []
|
66
|
+
for part in content:
|
67
|
+
if isinstance(part, dict) and part.get("type") == "input_text" and part.get("text"):
|
68
|
+
texts.append(str(part.get("text")))
|
69
|
+
return "\n\n".join(texts)
|
70
|
+
if isinstance(content, dict):
|
71
|
+
# If a single part dict was passed
|
72
|
+
if content.get("type") == "input_text" and content.get("text"):
|
73
|
+
return str(content.get("text"))
|
74
|
+
# Fallback to string conversion
|
75
|
+
return str(content)
|
76
|
+
|
77
|
+
|
56
78
|
@retry(
|
57
79
|
retry=(
|
58
80
|
retry_if_exception_type(openai._exceptions.APITimeoutError)
|
@@ -390,6 +412,287 @@ async def chat_completion_with_backoff(
|
|
390
412
|
commit_conversation_trace(messages, aggregated_response, tracer)
|
391
413
|
|
392
414
|
|
415
|
+
@retry(
|
416
|
+
retry=(
|
417
|
+
retry_if_exception_type(openai._exceptions.APITimeoutError)
|
418
|
+
| retry_if_exception_type(openai._exceptions.APIError)
|
419
|
+
| retry_if_exception_type(openai._exceptions.APIConnectionError)
|
420
|
+
| retry_if_exception_type(openai._exceptions.RateLimitError)
|
421
|
+
| retry_if_exception_type(openai._exceptions.APIStatusError)
|
422
|
+
| retry_if_exception_type(ValueError)
|
423
|
+
),
|
424
|
+
wait=wait_random_exponential(min=1, max=10),
|
425
|
+
stop=stop_after_attempt(3),
|
426
|
+
before_sleep=before_sleep_log(logger, logging.DEBUG),
|
427
|
+
reraise=True,
|
428
|
+
)
|
429
|
+
def responses_completion_with_backoff(
|
430
|
+
messages: List[ChatMessage],
|
431
|
+
model_name: str,
|
432
|
+
temperature=0.6,
|
433
|
+
openai_api_key=None,
|
434
|
+
api_base_url=None,
|
435
|
+
deepthought: bool = False,
|
436
|
+
model_kwargs: dict = {},
|
437
|
+
tracer: dict = {},
|
438
|
+
) -> ResponseWithThought:
|
439
|
+
"""
|
440
|
+
Synchronous helper using the OpenAI Responses API in streaming mode under the hood.
|
441
|
+
Aggregates streamed deltas and returns a ResponseWithThought.
|
442
|
+
"""
|
443
|
+
client_key = f"{openai_api_key}--{api_base_url}"
|
444
|
+
client = openai_clients.get(client_key)
|
445
|
+
if not client:
|
446
|
+
client = get_openai_client(openai_api_key, api_base_url)
|
447
|
+
openai_clients[client_key] = client
|
448
|
+
|
449
|
+
formatted_messages = format_message_for_api(messages, api_base_url)
|
450
|
+
# Move the first system message to Responses API instructions
|
451
|
+
instructions: Optional[str] = None
|
452
|
+
if formatted_messages and formatted_messages[0].get("role") == "system":
|
453
|
+
instructions = _extract_text_for_instructions(formatted_messages[0].get("content")) or None
|
454
|
+
formatted_messages = formatted_messages[1:]
|
455
|
+
|
456
|
+
model_kwargs = deepcopy(model_kwargs)
|
457
|
+
model_kwargs["top_p"] = model_kwargs.get("top_p", 0.95)
|
458
|
+
# Configure thinking for openai reasoning models
|
459
|
+
if is_openai_reasoning_model(model_name, api_base_url):
|
460
|
+
temperature = 1
|
461
|
+
reasoning_effort = "medium" if deepthought else "low"
|
462
|
+
model_kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
|
463
|
+
# Remove unsupported params for reasoning models
|
464
|
+
model_kwargs.pop("top_p", None)
|
465
|
+
model_kwargs.pop("stop", None)
|
466
|
+
|
467
|
+
read_timeout = 300 if is_local_api(api_base_url) else 60
|
468
|
+
|
469
|
+
# Stream and aggregate
|
470
|
+
model_response: OpenAIResponse = client.responses.create(
|
471
|
+
input=formatted_messages,
|
472
|
+
instructions=instructions,
|
473
|
+
model=model_name,
|
474
|
+
temperature=temperature,
|
475
|
+
timeout=httpx.Timeout(30, read=read_timeout), # type: ignore
|
476
|
+
store=False,
|
477
|
+
include=["reasoning.encrypted_content"],
|
478
|
+
**model_kwargs,
|
479
|
+
)
|
480
|
+
if not model_response or not isinstance(model_response, OpenAIResponse) or not model_response.output:
|
481
|
+
raise ValueError(f"Empty response returned by {model_name}.")
|
482
|
+
|
483
|
+
raw_content = [item.model_dump() for item in model_response.output]
|
484
|
+
aggregated_text = model_response.output_text
|
485
|
+
thoughts = ""
|
486
|
+
tool_calls: List[ToolCall] = []
|
487
|
+
for item in model_response.output:
|
488
|
+
if isinstance(item, ResponseFunctionToolCall):
|
489
|
+
tool_calls.append(ToolCall(name=item.name, args=json.loads(item.arguments), id=item.call_id))
|
490
|
+
elif isinstance(item, ResponseReasoningItem):
|
491
|
+
thoughts = "\n\n".join([summary.text for summary in item.summary])
|
492
|
+
|
493
|
+
if tool_calls:
|
494
|
+
if thoughts and aggregated_text:
|
495
|
+
# If there are tool calls, aggregate thoughts and responses into thoughts
|
496
|
+
thoughts = "\n".join([f"*{line.strip()}*" for line in thoughts.splitlines() if line.strip()])
|
497
|
+
thoughts = f"{thoughts}\n\n{aggregated_text}"
|
498
|
+
else:
|
499
|
+
thoughts = thoughts or aggregated_text
|
500
|
+
# Json dump tool calls into aggregated response
|
501
|
+
aggregated_text = json.dumps([tool_call.__dict__ for tool_call in tool_calls])
|
502
|
+
|
503
|
+
# Usage/cost tracking
|
504
|
+
input_tokens = model_response.usage.input_tokens if model_response and model_response.usage else 0
|
505
|
+
output_tokens = model_response.usage.output_tokens if model_response and model_response.usage else 0
|
506
|
+
cost = 0
|
507
|
+
cache_read_tokens = 0
|
508
|
+
if model_response and model_response.usage and model_response.usage.input_tokens_details:
|
509
|
+
cache_read_tokens = model_response.usage.input_tokens_details.cached_tokens
|
510
|
+
input_tokens -= cache_read_tokens
|
511
|
+
tracer["usage"] = get_chat_usage_metrics(
|
512
|
+
model_name, input_tokens, output_tokens, cache_read_tokens, usage=tracer.get("usage"), cost=cost
|
513
|
+
)
|
514
|
+
|
515
|
+
# Validate final aggregated text (either message or tool-calls JSON)
|
516
|
+
if is_none_or_empty(aggregated_text):
|
517
|
+
logger.warning(f"No response by {model_name}\nLast Message by {messages[-1].role}: {messages[-1].content}.")
|
518
|
+
raise ValueError(f"Empty or no response by {model_name} over Responses API. Retry if needed.")
|
519
|
+
|
520
|
+
# Trace
|
521
|
+
tracer["chat_model"] = model_name
|
522
|
+
tracer["temperature"] = temperature
|
523
|
+
if is_promptrace_enabled():
|
524
|
+
commit_conversation_trace(messages, aggregated_text, tracer)
|
525
|
+
|
526
|
+
return ResponseWithThought(text=aggregated_text, thought=thoughts, raw_content=raw_content)
|
527
|
+
|
528
|
+
|
529
|
+
@retry(
|
530
|
+
retry=(
|
531
|
+
retry_if_exception_type(openai._exceptions.APITimeoutError)
|
532
|
+
| retry_if_exception_type(openai._exceptions.APIError)
|
533
|
+
| retry_if_exception_type(openai._exceptions.APIConnectionError)
|
534
|
+
| retry_if_exception_type(openai._exceptions.RateLimitError)
|
535
|
+
| retry_if_exception_type(openai._exceptions.APIStatusError)
|
536
|
+
| retry_if_exception_type(ValueError)
|
537
|
+
),
|
538
|
+
wait=wait_exponential(multiplier=1, min=4, max=10),
|
539
|
+
stop=stop_after_attempt(3),
|
540
|
+
before_sleep=before_sleep_log(logger, logging.WARNING),
|
541
|
+
reraise=False,
|
542
|
+
)
|
543
|
+
async def responses_chat_completion_with_backoff(
|
544
|
+
messages: list[ChatMessage],
|
545
|
+
model_name: str,
|
546
|
+
temperature,
|
547
|
+
openai_api_key=None,
|
548
|
+
api_base_url=None,
|
549
|
+
deepthought=False, # Unused; parity with legacy signature
|
550
|
+
tracer: dict = {},
|
551
|
+
) -> AsyncGenerator[ResponseWithThought, None]:
|
552
|
+
"""
|
553
|
+
Async streaming helper using the OpenAI Responses API.
|
554
|
+
Yields ResponseWithThought chunks as text/think deltas arrive.
|
555
|
+
"""
|
556
|
+
client_key = f"{openai_api_key}--{api_base_url}"
|
557
|
+
client = openai_async_clients.get(client_key)
|
558
|
+
if not client:
|
559
|
+
client = get_openai_async_client(openai_api_key, api_base_url)
|
560
|
+
openai_async_clients[client_key] = client
|
561
|
+
|
562
|
+
formatted_messages = format_message_for_api(messages, api_base_url)
|
563
|
+
# Move the first system message to Responses API instructions
|
564
|
+
instructions: Optional[str] = None
|
565
|
+
if formatted_messages and formatted_messages[0].get("role") == "system":
|
566
|
+
instructions = _extract_text_for_instructions(formatted_messages[0].get("content")) or None
|
567
|
+
formatted_messages = formatted_messages[1:]
|
568
|
+
|
569
|
+
model_kwargs: dict = {}
|
570
|
+
model_kwargs["top_p"] = model_kwargs.get("top_p", 0.95)
|
571
|
+
# Configure thinking for openai reasoning models
|
572
|
+
if is_openai_reasoning_model(model_name, api_base_url):
|
573
|
+
temperature = 1
|
574
|
+
reasoning_effort = "medium" if deepthought else "low"
|
575
|
+
model_kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
|
576
|
+
# Remove unsupported params for reasoning models
|
577
|
+
model_kwargs.pop("top_p", None)
|
578
|
+
model_kwargs.pop("stop", None)
|
579
|
+
|
580
|
+
read_timeout = 300 if is_local_api(api_base_url) else 60
|
581
|
+
|
582
|
+
aggregated_text = ""
|
583
|
+
last_final: Optional[OpenAIResponse] = None
|
584
|
+
# Tool call assembly buffers
|
585
|
+
tool_calls_args: Dict[str, str] = {}
|
586
|
+
tool_calls_name: Dict[str, str] = {}
|
587
|
+
tool_call_order: List[str] = []
|
588
|
+
|
589
|
+
async with client.responses.stream(
|
590
|
+
input=formatted_messages,
|
591
|
+
instructions=instructions,
|
592
|
+
model=model_name,
|
593
|
+
temperature=temperature,
|
594
|
+
timeout=httpx.Timeout(30, read=read_timeout),
|
595
|
+
**model_kwargs,
|
596
|
+
) as stream: # type: ignore
|
597
|
+
async for event in stream: # type: ignore
|
598
|
+
et = getattr(event, "type", "")
|
599
|
+
if et == "response.output_text.delta":
|
600
|
+
delta = getattr(event, "delta", "") or getattr(event, "output_text", "")
|
601
|
+
if delta:
|
602
|
+
aggregated_text += delta
|
603
|
+
yield ResponseWithThought(text=delta)
|
604
|
+
elif et == "response.reasoning.delta":
|
605
|
+
delta = getattr(event, "delta", "")
|
606
|
+
if delta:
|
607
|
+
yield ResponseWithThought(thought=delta)
|
608
|
+
elif et == "response.tool_call.created":
|
609
|
+
item = getattr(event, "item", None)
|
610
|
+
tool_id = (
|
611
|
+
getattr(event, "id", None)
|
612
|
+
or getattr(event, "tool_call_id", None)
|
613
|
+
or (getattr(item, "id", None) if item is not None else None)
|
614
|
+
)
|
615
|
+
name = (
|
616
|
+
getattr(event, "name", None)
|
617
|
+
or (getattr(item, "name", None) if item is not None else None)
|
618
|
+
or getattr(event, "tool_name", None)
|
619
|
+
)
|
620
|
+
if tool_id:
|
621
|
+
if tool_id not in tool_calls_args:
|
622
|
+
tool_calls_args[tool_id] = ""
|
623
|
+
tool_call_order.append(tool_id)
|
624
|
+
if name:
|
625
|
+
tool_calls_name[tool_id] = name
|
626
|
+
elif et == "response.tool_call.delta":
|
627
|
+
tool_id = getattr(event, "id", None) or getattr(event, "tool_call_id", None)
|
628
|
+
delta = getattr(event, "delta", None)
|
629
|
+
if hasattr(delta, "arguments"):
|
630
|
+
arg_delta = getattr(delta, "arguments", "")
|
631
|
+
else:
|
632
|
+
arg_delta = delta if isinstance(delta, str) else getattr(event, "arguments", "")
|
633
|
+
if tool_id and arg_delta:
|
634
|
+
tool_calls_args[tool_id] = tool_calls_args.get(tool_id, "") + arg_delta
|
635
|
+
if tool_id not in tool_call_order:
|
636
|
+
tool_call_order.append(tool_id)
|
637
|
+
elif et == "response.tool_call.completed":
|
638
|
+
item = getattr(event, "item", None)
|
639
|
+
tool_id = (
|
640
|
+
getattr(event, "id", None)
|
641
|
+
or getattr(event, "tool_call_id", None)
|
642
|
+
or (getattr(item, "id", None) if item is not None else None)
|
643
|
+
)
|
644
|
+
args_final = None
|
645
|
+
if item is not None:
|
646
|
+
args_final = getattr(item, "arguments", None) or getattr(item, "args", None)
|
647
|
+
if tool_id and args_final:
|
648
|
+
tool_calls_args[tool_id] = args_final if isinstance(args_final, str) else json.dumps(args_final)
|
649
|
+
if tool_id not in tool_call_order:
|
650
|
+
tool_call_order.append(tool_id)
|
651
|
+
# ignore other events for now
|
652
|
+
last_final = await stream.get_final_response()
|
653
|
+
|
654
|
+
# Usage/cost tracking after stream ends
|
655
|
+
input_tokens = last_final.usage.input_tokens if last_final and last_final.usage else 0
|
656
|
+
output_tokens = last_final.usage.output_tokens if last_final and last_final.usage else 0
|
657
|
+
cost = 0
|
658
|
+
tracer["usage"] = get_chat_usage_metrics(
|
659
|
+
model_name, input_tokens, output_tokens, usage=tracer.get("usage"), cost=cost
|
660
|
+
)
|
661
|
+
|
662
|
+
# If there are tool calls, package them into aggregated text for tracing parity
|
663
|
+
if tool_call_order:
|
664
|
+
packaged_tool_calls: List[ToolCall] = []
|
665
|
+
for tool_id in tool_call_order:
|
666
|
+
name = tool_calls_name.get(tool_id) or ""
|
667
|
+
args_str = tool_calls_args.get(tool_id, "")
|
668
|
+
try:
|
669
|
+
args = json.loads(args_str) if isinstance(args_str, str) else args_str
|
670
|
+
except Exception:
|
671
|
+
logger.warning(f"Failed to parse tool call arguments for {tool_id}: {args_str}")
|
672
|
+
args = {}
|
673
|
+
packaged_tool_calls.append(ToolCall(name=name, args=args, id=tool_id))
|
674
|
+
# Move any text into trace thought
|
675
|
+
tracer_text = aggregated_text
|
676
|
+
aggregated_text = json.dumps([tc.__dict__ for tc in packaged_tool_calls])
|
677
|
+
# Save for trace below
|
678
|
+
if tracer_text:
|
679
|
+
tracer.setdefault("_responses_stream_text", tracer_text)
|
680
|
+
|
681
|
+
if is_none_or_empty(aggregated_text):
|
682
|
+
logger.warning(f"No response by {model_name}\nLast Message by {messages[-1].role}: {messages[-1].content}.")
|
683
|
+
raise ValueError(f"Empty or no response by {model_name} over Responses API. Retry if needed.")
|
684
|
+
|
685
|
+
tracer["chat_model"] = model_name
|
686
|
+
tracer["temperature"] = temperature
|
687
|
+
if is_promptrace_enabled():
|
688
|
+
# If tool-calls were present, include any streamed text in the trace thought
|
689
|
+
trace_payload = aggregated_text
|
690
|
+
if tracer.get("_responses_stream_text"):
|
691
|
+
thoughts = tracer.pop("_responses_stream_text")
|
692
|
+
trace_payload = thoughts
|
693
|
+
commit_conversation_trace(messages, trace_payload, tracer)
|
694
|
+
|
695
|
+
|
393
696
|
def get_structured_output_support(model_name: str, api_base_url: str = None) -> StructuredOutputSupport:
|
394
697
|
if model_name.startswith("deepseek-reasoner"):
|
395
698
|
return StructuredOutputSupport.NONE
|
@@ -412,6 +715,12 @@ def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -
|
|
412
715
|
# Handle tool call and tool result message types
|
413
716
|
message_type = message.additional_kwargs.get("message_type")
|
414
717
|
if message_type == "tool_call":
|
718
|
+
if is_openai_api(api_base_url):
|
719
|
+
for part in message.content:
|
720
|
+
if "status" in part:
|
721
|
+
part.pop("status") # Drop unsupported tool call status field
|
722
|
+
formatted_messages.extend(message.content)
|
723
|
+
continue
|
415
724
|
# Convert tool_call to OpenAI function call format
|
416
725
|
content = []
|
417
726
|
for part in message.content:
|
@@ -450,14 +759,23 @@ def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -
|
|
450
759
|
if not tool_call_id:
|
451
760
|
logger.warning(f"Dropping tool result without valid tool_call_id: {part.get('name')}")
|
452
761
|
continue
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
762
|
+
if is_openai_api(api_base_url):
|
763
|
+
formatted_messages.append(
|
764
|
+
{
|
765
|
+
"type": "function_call_output",
|
766
|
+
"call_id": tool_call_id,
|
767
|
+
"output": part.get("content"),
|
768
|
+
}
|
769
|
+
)
|
770
|
+
else:
|
771
|
+
formatted_messages.append(
|
772
|
+
{
|
773
|
+
"role": "tool",
|
774
|
+
"tool_call_id": tool_call_id,
|
775
|
+
"name": part.get("name"),
|
776
|
+
"content": part.get("content"),
|
777
|
+
}
|
778
|
+
)
|
461
779
|
continue
|
462
780
|
if isinstance(message.content, list) and not is_openai_api(api_base_url):
|
463
781
|
assistant_texts = []
|
@@ -489,6 +807,11 @@ def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -
|
|
489
807
|
message.content.remove(part)
|
490
808
|
elif part["type"] == "image_url" and not part.get("image_url"):
|
491
809
|
message.content.remove(part)
|
810
|
+
# OpenAI models use the Responses API which uses slightly different content types
|
811
|
+
if part["type"] == "text":
|
812
|
+
part["type"] = "output_text" if message.role == "assistant" else "input_text"
|
813
|
+
if part["type"] == "image":
|
814
|
+
part["type"] = "output_image" if message.role == "assistant" else "input_image"
|
492
815
|
# If no valid content parts left, remove the message
|
493
816
|
if is_none_or_empty(message.content):
|
494
817
|
messages.remove(message)
|
@@ -513,7 +836,9 @@ def is_openai_reasoning_model(model_name: str, api_base_url: str = None) -> bool
|
|
513
836
|
"""
|
514
837
|
Check if the model is an OpenAI reasoning model
|
515
838
|
"""
|
516
|
-
return
|
839
|
+
return is_openai_api(api_base_url) and (
|
840
|
+
model_name.lower().startswith("o") or model_name.lower().startswith("gpt-5")
|
841
|
+
)
|
517
842
|
|
518
843
|
|
519
844
|
def is_non_streaming_model(model_name: str, api_base_url: str = None) -> bool:
|
@@ -850,20 +1175,32 @@ def add_qwen_no_think_tag(formatted_messages: List[dict]) -> None:
|
|
850
1175
|
break
|
851
1176
|
|
852
1177
|
|
853
|
-
def to_openai_tools(tools: List[ToolDefinition]) -> List[Dict] | None:
|
1178
|
+
def to_openai_tools(tools: List[ToolDefinition], use_responses_api: bool) -> List[Dict] | None:
|
854
1179
|
"Transform tool definitions from standard format to OpenAI format."
|
855
|
-
|
856
|
-
|
857
|
-
|
858
|
-
|
1180
|
+
if use_responses_api:
|
1181
|
+
openai_tools = [
|
1182
|
+
{
|
1183
|
+
"type": "function",
|
859
1184
|
"name": tool.name,
|
860
1185
|
"description": tool.description,
|
861
1186
|
"parameters": clean_response_schema(tool.schema),
|
862
1187
|
"strict": True,
|
863
|
-
}
|
864
|
-
|
865
|
-
|
866
|
-
|
1188
|
+
}
|
1189
|
+
for tool in tools
|
1190
|
+
]
|
1191
|
+
else:
|
1192
|
+
openai_tools = [
|
1193
|
+
{
|
1194
|
+
"type": "function",
|
1195
|
+
"function": {
|
1196
|
+
"name": tool.name,
|
1197
|
+
"description": tool.description,
|
1198
|
+
"parameters": clean_response_schema(tool.schema),
|
1199
|
+
"strict": True,
|
1200
|
+
},
|
1201
|
+
}
|
1202
|
+
for tool in tools
|
1203
|
+
]
|
867
1204
|
|
868
1205
|
return openai_tools or None
|
869
1206
|
|
@@ -519,12 +519,13 @@ Q: {query}
|
|
519
519
|
|
520
520
|
extract_questions_system_prompt = PromptTemplate.from_template(
|
521
521
|
"""
|
522
|
-
You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes.
|
523
|
-
Construct search queries to retrieve relevant information to answer the user's question.
|
522
|
+
You are Khoj, an extremely smart and helpful document search assistant with only the ability to use natural language semantic search to retrieve information from the user's notes.
|
523
|
+
Construct upto {max_queries} search queries to retrieve relevant information to answer the user's question.
|
524
524
|
- You will be provided past questions(User), search queries(Assistant) and answers(A) for context.
|
525
|
-
-
|
526
|
-
- Break your search
|
527
|
-
- Add date filters to your search queries
|
525
|
+
- You can use context from previous questions and answers to improve your search queries.
|
526
|
+
- Break down your search into multiple search queries from a diverse set of lenses to retrieve all related documents. E.g who, what, where, when, why, how.
|
527
|
+
- Add date filters to your search queries when required to retrieve the relevant information. This is the only structured query filter you can use.
|
528
|
+
- Output 1 concept per query. Do not use boolean operators (OR/AND) to combine queries. They do not work and degrade search quality.
|
528
529
|
- When asked a meta, vague or random questions, search for a variety of broad topics to answer the user's question.
|
529
530
|
{personality_context}
|
530
531
|
What searches will you perform to answer the users question? Respond with a JSON object with the key "queries" mapping to a list of searches you would perform on the user's knowledge base. Just return the queries and nothing else.
|
@@ -535,22 +536,27 @@ User's Location: {location}
|
|
535
536
|
|
536
537
|
Here are some examples of how you can construct search queries to answer the user's question:
|
537
538
|
|
539
|
+
Illustrate - Using diverse perspectives to retrieve all relevant documents
|
538
540
|
User: How was my trip to Cambodia?
|
539
541
|
Assistant: {{"queries": ["How was my trip to Cambodia?", "Angkor Wat temple visit", "Flight to Phnom Penh", "Expenses in Cambodia", "Stay in Cambodia"]}}
|
540
542
|
A: The trip was amazing. You went to the Angkor Wat temple and it was beautiful.
|
541
543
|
|
544
|
+
Illustrate - Combining date filters with natural language queries to retrieve documents in relevant date range
|
542
545
|
User: What national parks did I go to last year?
|
543
546
|
Assistant: {{"queries": ["National park I visited in {last_new_year} dt>='{last_new_year_date}' dt<'{current_new_year_date}'"]}}
|
544
547
|
A: You visited the Grand Canyon and Yellowstone National Park in {last_new_year}.
|
545
548
|
|
549
|
+
Illustrate - Using broad topics to answer meta or vague questions
|
546
550
|
User: How can you help me?
|
547
551
|
Assistant: {{"queries": ["Social relationships", "Physical and mental health", "Education and career", "Personal life goals and habits"]}}
|
548
552
|
A: I can help you live healthier and happier across work and personal life
|
549
553
|
|
554
|
+
Illustrate - Combining location and date in natural language queries with date filters to retrieve relevant documents
|
550
555
|
User: Who all did I meet here yesterday?
|
551
556
|
Assistant: {{"queries": ["Met in {location} on {yesterday_date} dt>='{yesterday_date}' dt<'{current_date}'"]}}
|
552
557
|
A: Yesterday's note mentions your visit to your local beach with Ram and Shyam.
|
553
558
|
|
559
|
+
Illustrate - Combining broad, diverse topics with date filters to answer meta or vague questions
|
554
560
|
User: Share some random, interesting experiences from this month
|
555
561
|
Assistant: {{"queries": ["Exciting travel adventures from {current_month}", "Fun social events dt>='{current_month}-01' dt<'{current_date}'", "Intense emotional experiences in {current_month}"]}}
|
556
562
|
A: You had a great time at the local beach with your friends, attended a music concert and had a deep conversation with your friend, Khalid.
|
@@ -68,6 +68,9 @@ model_to_prompt_size = {
|
|
68
68
|
"o3": 60000,
|
69
69
|
"o3-pro": 30000,
|
70
70
|
"o4-mini": 90000,
|
71
|
+
"gpt-5-2025-08-07": 120000,
|
72
|
+
"gpt-5-mini-2025-08-07": 120000,
|
73
|
+
"gpt-5-nano-2025-08-07": 120000,
|
71
74
|
# Google Models
|
72
75
|
"gemini-2.5-flash": 120000,
|
73
76
|
"gemini-2.5-pro": 60000,
|
khoj/routers/helpers.py
CHANGED
@@ -1264,6 +1264,7 @@ async def extract_questions(
|
|
1264
1264
|
location_data: LocationData = None,
|
1265
1265
|
query_images: Optional[List[str]] = None,
|
1266
1266
|
query_files: str = None,
|
1267
|
+
max_queries: int = 5,
|
1267
1268
|
tracer: dict = {},
|
1268
1269
|
):
|
1269
1270
|
"""
|
@@ -1293,14 +1294,20 @@ async def extract_questions(
|
|
1293
1294
|
location=location,
|
1294
1295
|
username=username,
|
1295
1296
|
personality_context=personality_context,
|
1297
|
+
max_queries=max_queries,
|
1296
1298
|
)
|
1297
1299
|
|
1298
1300
|
prompt = prompts.extract_questions_user_message.format(text=query, chat_history=chat_history_str)
|
1299
1301
|
|
1300
1302
|
class DocumentQueries(BaseModel):
|
1301
|
-
"""Choose
|
1303
|
+
"""Choose semantic search queries to run on user documents."""
|
1302
1304
|
|
1303
|
-
queries: List[str] = Field(
|
1305
|
+
queries: List[str] = Field(
|
1306
|
+
...,
|
1307
|
+
min_length=1,
|
1308
|
+
max_length=max_queries,
|
1309
|
+
description="List of semantic search queries to run on user documents.",
|
1310
|
+
)
|
1304
1311
|
|
1305
1312
|
raw_response = await send_message_to_model_wrapper(
|
1306
1313
|
system_message=system_prompt,
|
@@ -2995,7 +3002,7 @@ async def grep_files(
|
|
2995
3002
|
lines_after = lines_after or 0
|
2996
3003
|
|
2997
3004
|
try:
|
2998
|
-
regex = re.compile(regex_pattern, re.IGNORECASE)
|
3005
|
+
regex = re.compile(regex_pattern, re.IGNORECASE | re.MULTILINE)
|
2999
3006
|
except re.error as e:
|
3000
3007
|
yield {
|
3001
3008
|
"query": _generate_query(0, 0, path_prefix, regex_pattern, lines_before, lines_after),
|
@@ -3005,7 +3012,14 @@ async def grep_files(
|
|
3005
3012
|
return
|
3006
3013
|
|
3007
3014
|
try:
|
3008
|
-
|
3015
|
+
# Make db pushdown filters more permissive by removing line anchors
|
3016
|
+
# The precise line-anchored matching will be done in Python stage
|
3017
|
+
db_pattern = regex_pattern
|
3018
|
+
db_pattern = re.sub(r"\(\?\w*\)", "", db_pattern) # Remove inline flags like (?i), (?m), (?im)
|
3019
|
+
db_pattern = re.sub(r"^\^", "", db_pattern) # Remove ^ at regex pattern start
|
3020
|
+
db_pattern = re.sub(r"\$$", "", db_pattern) # Remove $ at regex pattern end
|
3021
|
+
|
3022
|
+
file_matches = await FileObjectAdapters.aget_file_objects_by_regex(user, db_pattern, path_prefix)
|
3009
3023
|
|
3010
3024
|
line_matches = []
|
3011
3025
|
for file_object in file_matches:
|
khoj/utils/constants.py
CHANGED
@@ -40,6 +40,9 @@ model_to_cost: Dict[str, Dict[str, float]] = {
|
|
40
40
|
"o3": {"input": 2.0, "output": 8.00},
|
41
41
|
"o3-pro": {"input": 20.0, "output": 80.00},
|
42
42
|
"o4-mini": {"input": 1.10, "output": 4.40},
|
43
|
+
"gpt-5-2025-08-07": {"input": 1.25, "output": 10.00, "cache_read": 0.125},
|
44
|
+
"gpt-5-mini-2025-08-07": {"input": 0.25, "output": 2.00, "cache_read": 0.025},
|
45
|
+
"gpt-5-nano-2025-08-07": {"input": 0.05, "output": 0.40, "cache_read": 0.005},
|
43
46
|
# Gemini Pricing: https://ai.google.dev/pricing
|
44
47
|
"gemini-1.5-flash": {"input": 0.075, "output": 0.30},
|
45
48
|
"gemini-1.5-flash-002": {"input": 0.075, "output": 0.30},
|