khoj 1.29.2.dev35__py3-none-any.whl → 1.30.2.dev22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/configure.py +25 -0
- khoj/interface/compiled/404/index.html +1 -1
- khoj/interface/compiled/_next/static/chunks/1210.ef7a0f9a7e43da1d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1279-4cb23143aa2c0228.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1603-1407afe510f0145a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1970-1b63ac1497b03a10.js +1 -0
- khoj/interface/compiled/_next/static/chunks/216-b2e4344315b88832.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3072-be830e4f8412b9d2.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3690-51312931ba1eae30.js +1 -0
- khoj/interface/compiled/_next/static/chunks/4504-62ac13e7d94c52f9.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5512-7cc62049bbe60e11.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5538-e5f3c9f4d67a64b9.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{7883-b1305ec254213afe.js → 6901-e3dc0d315e3f6033.js} +2 -2
- khoj/interface/compiled/_next/static/chunks/7592-a09c39a38e60634b.js +1 -0
- khoj/interface/compiled/_next/static/chunks/8423-1dda16bc56236523.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/{page-ee4f0da14df15091.js → page-5f6e0dacc34e33ad.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/{layout-27c28e923c9b1ff0.js → layout-7f1b79a2c67af0b4.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/{page-da59a2b9ec07da16.js → page-60bc7454bc3ea881.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/{page-e60a55d029b6216a.js → page-ac366c9111374312.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/{page-fcf7411ff80b6bf5.js → page-358154a4436ef316.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/{page-4f44549ba3807021.js → page-64ea1717528979af.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{layout-254eaaf916449a60.js → layout-1f4d76a8b09517b1.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/page-17a538580c65e7fe.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-4a4c0f199b89bd80.js → page-47641b3691fb0856.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/webpack-2389f756cfc04602.js +1 -0
- khoj/interface/compiled/_next/static/css/1a4038cc4acc8ee4.css +25 -0
- khoj/interface/compiled/agents/index.html +1 -1
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +1 -1
- khoj/interface/compiled/automations/index.txt +3 -3
- khoj/interface/compiled/chat/index.html +1 -1
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/index.html +1 -1
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +1 -1
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +1 -1
- khoj/interface/compiled/settings/index.txt +3 -3
- khoj/interface/compiled/share/chat/index.html +1 -1
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/anthropic/utils.py +15 -1
- khoj/processor/conversation/google/utils.py +12 -1
- khoj/processor/conversation/openai/utils.py +33 -19
- khoj/processor/conversation/prompts.py +61 -41
- khoj/processor/conversation/utils.py +32 -14
- khoj/routers/api.py +1 -1
- khoj/routers/api_chat.py +33 -17
- khoj/routers/helpers.py +70 -57
- khoj/utils/constants.py +19 -1
- khoj/utils/helpers.py +24 -0
- {khoj-1.29.2.dev35.dist-info → khoj-1.30.2.dev22.dist-info}/METADATA +1 -1
- {khoj-1.29.2.dev35.dist-info → khoj-1.30.2.dev22.dist-info}/RECORD +65 -65
- khoj/interface/compiled/_next/static/chunks/1210.132a7e1910006bbb.js +0 -1
- khoj/interface/compiled/_next/static/chunks/1279-f37ee4a388ebf544.js +0 -1
- khoj/interface/compiled/_next/static/chunks/1603-dc5fd983dbcd070d.js +0 -1
- khoj/interface/compiled/_next/static/chunks/1970-c78f6acc8e16e30b.js +0 -1
- khoj/interface/compiled/_next/static/chunks/2261-748f7c327df3c8c1.js +0 -1
- khoj/interface/compiled/_next/static/chunks/3062-71ed4b46ac2bb87c.js +0 -1
- khoj/interface/compiled/_next/static/chunks/3803-d74118a2d0182c52.js +0 -1
- khoj/interface/compiled/_next/static/chunks/4504-1629487c8bc82203.js +0 -1
- khoj/interface/compiled/_next/static/chunks/5512-94c7c2bbcf58c19d.js +0 -1
- khoj/interface/compiled/_next/static/chunks/5538-b87b60ecc0c27ceb.js +0 -1
- khoj/interface/compiled/_next/static/chunks/8423-c0123d454681e03a.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9001-3b27af6d5f21df44.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/page-5591490850437232.js +0 -1
- khoj/interface/compiled/_next/static/chunks/webpack-323bbe2678102a2f.js +0 -1
- khoj/interface/compiled/_next/static/css/ed437164d77aa600.css +0 -25
- /khoj/interface/compiled/_next/static/{bkshWraYdEa_w254xnxBc → Tua1S1yzYQvGP_toWnaaz}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{bkshWraYdEa_w254xnxBc → Tua1S1yzYQvGP_toWnaaz}/_ssgManifest.js +0 -0
- /khoj/interface/compiled/_next/static/chunks/{3124-a4cea2eda163128d.js → 3124-e8410bbd01f6f188.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{4602-8eeb4b76385ad159.js → 4602-460621c3241e0d13.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{6297-d1c842ed3f714ab0.js → 6297-55f82537bb7068dd.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{7023-a5bf5744d19b3bd3.js → 7023-e8de2bded4df6539.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{796-68f9e87f9cdfda1d.js → 796-36ee2d6829448c6d.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{9417-32c4db52ca42e681.js → 9417-06236cd650f1abcd.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/app/_not-found/{page-07ff4ab42b07845e.js → page-cfba071f5a657256.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{fd9d1056-2b978342deb60015.js → fd9d1056-2e6c8140e79afc3b.js} +0 -0
- {khoj-1.29.2.dev35.dist-info → khoj-1.30.2.dev22.dist-info}/WHEEL +0 -0
- {khoj-1.29.2.dev35.dist-info → khoj-1.30.2.dev22.dist-info}/entry_points.txt +0 -0
- {khoj-1.29.2.dev35.dist-info → khoj-1.30.2.dev22.dist-info}/licenses/LICENSE +0 -0
@@ -5,7 +5,6 @@ import math
|
|
5
5
|
import mimetypes
|
6
6
|
import os
|
7
7
|
import queue
|
8
|
-
import re
|
9
8
|
import uuid
|
10
9
|
from dataclasses import dataclass
|
11
10
|
from datetime import datetime
|
@@ -49,8 +48,6 @@ except ImportError:
|
|
49
48
|
|
50
49
|
model_to_prompt_size = {
|
51
50
|
# OpenAI Models
|
52
|
-
"gpt-3.5-turbo": 12000,
|
53
|
-
"gpt-4-turbo-preview": 20000,
|
54
51
|
"gpt-4o": 20000,
|
55
52
|
"gpt-4o-mini": 20000,
|
56
53
|
"o1-preview": 20000,
|
@@ -59,12 +56,15 @@ model_to_prompt_size = {
|
|
59
56
|
"gemini-1.5-flash": 20000,
|
60
57
|
"gemini-1.5-pro": 20000,
|
61
58
|
# Anthropic Models
|
62
|
-
"claude-3-5-sonnet-
|
63
|
-
"claude-3-
|
59
|
+
"claude-3-5-sonnet-20241022": 20000,
|
60
|
+
"claude-3-5-haiku-20241022": 20000,
|
64
61
|
# Offline Models
|
65
|
-
"TheBloke/Mistral-7B-Instruct-v0.2-GGUF": 3500,
|
66
|
-
"NousResearch/Hermes-2-Pro-Mistral-7B-GGUF": 3500,
|
67
62
|
"bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000,
|
63
|
+
"bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000,
|
64
|
+
"bartowski/Llama-3.2-3B-Instruct-GGUF": 20000,
|
65
|
+
"bartowski/gemma-2-9b-it-GGUF": 6000,
|
66
|
+
"bartowski/gemma-2-2b-it-GGUF": 6000,
|
67
|
+
"Qwen/Qwen2.5-14B-Instruct-GGUF": 20000,
|
68
68
|
}
|
69
69
|
model_to_tokenizer: Dict[str, str] = {}
|
70
70
|
|
@@ -212,6 +212,8 @@ class ChatEvent(Enum):
|
|
212
212
|
REFERENCES = "references"
|
213
213
|
STATUS = "status"
|
214
214
|
METADATA = "metadata"
|
215
|
+
USAGE = "usage"
|
216
|
+
END_RESPONSE = "end_response"
|
215
217
|
|
216
218
|
|
217
219
|
def message_to_log(
|
@@ -290,7 +292,7 @@ def save_to_conversation_log(
|
|
290
292
|
user_message=q,
|
291
293
|
)
|
292
294
|
|
293
|
-
if
|
295
|
+
if os.getenv("PROMPTRACE_DIR"):
|
294
296
|
merge_message_into_conversation_trace(q, chat_response, tracer)
|
295
297
|
|
296
298
|
logger.info(
|
@@ -577,7 +579,7 @@ def commit_conversation_trace(
|
|
577
579
|
response: str | list[dict],
|
578
580
|
tracer: dict,
|
579
581
|
system_message: str | list[dict] = "",
|
580
|
-
repo_path: str =
|
582
|
+
repo_path: str = None,
|
581
583
|
) -> str:
|
582
584
|
"""
|
583
585
|
Save trace of conversation step using git. Useful to visualize, compare and debug traces.
|
@@ -588,6 +590,11 @@ def commit_conversation_trace(
|
|
588
590
|
except ImportError:
|
589
591
|
return None
|
590
592
|
|
593
|
+
# Infer repository path from environment variable or provided path
|
594
|
+
repo_path = repo_path or os.getenv("PROMPTRACE_DIR")
|
595
|
+
if not repo_path:
|
596
|
+
return None
|
597
|
+
|
591
598
|
# Serialize session, system message and response to yaml
|
592
599
|
system_message_yaml = json.dumps(system_message, ensure_ascii=False, sort_keys=False)
|
593
600
|
response_yaml = json.dumps(response, ensure_ascii=False, sort_keys=False)
|
@@ -600,9 +607,6 @@ def commit_conversation_trace(
|
|
600
607
|
# Extract chat metadata for session
|
601
608
|
uid, cid, mid = tracer.get("uid", "main"), tracer.get("cid", "main"), tracer.get("mid")
|
602
609
|
|
603
|
-
# Infer repository path from environment variable or provided path
|
604
|
-
repo_path = os.getenv("PROMPTRACE_DIR", repo_path)
|
605
|
-
|
606
610
|
try:
|
607
611
|
# Prepare git repository
|
608
612
|
os.makedirs(repo_path, exist_ok=True)
|
@@ -740,6 +744,20 @@ Metadata
|
|
740
744
|
|
741
745
|
def messages_to_print(messages: list[ChatMessage], max_length: int = 70) -> str:
|
742
746
|
"""
|
743
|
-
Format
|
747
|
+
Format and truncate messages to print, ensuring JSON serializable content
|
744
748
|
"""
|
745
|
-
|
749
|
+
|
750
|
+
def safe_serialize(content: Any) -> str:
|
751
|
+
try:
|
752
|
+
# Try JSON serialization
|
753
|
+
json.dumps(content)
|
754
|
+
return content
|
755
|
+
except (TypeError, json.JSONDecodeError):
|
756
|
+
# Handle non-serializable types
|
757
|
+
if hasattr(content, "format") and content.format == "WEBP":
|
758
|
+
return "[WebP Image]"
|
759
|
+
elif hasattr(content, "__dict__"):
|
760
|
+
return str(content.__dict__)
|
761
|
+
return str(content)
|
762
|
+
|
763
|
+
return "\n".join([f"{json.dumps(safe_serialize(message.content))[:max_length]}..." for message in messages])
|
khoj/routers/api.py
CHANGED
khoj/routers/api_chat.py
CHANGED
@@ -46,7 +46,7 @@ from khoj.routers.helpers import (
|
|
46
46
|
FeedbackData,
|
47
47
|
acreate_title_from_history,
|
48
48
|
agenerate_chat_response,
|
49
|
-
|
49
|
+
aget_data_sources_and_output_format,
|
50
50
|
construct_automation_created_message,
|
51
51
|
create_automation,
|
52
52
|
gather_raw_query_files,
|
@@ -667,27 +667,37 @@ async def chat(
|
|
667
667
|
finally:
|
668
668
|
yield event_delimiter
|
669
669
|
|
670
|
-
async def send_llm_response(response: str):
|
670
|
+
async def send_llm_response(response: str, usage: dict = None):
|
671
|
+
# Send Chat Response
|
671
672
|
async for result in send_event(ChatEvent.START_LLM_RESPONSE, ""):
|
672
673
|
yield result
|
673
674
|
async for result in send_event(ChatEvent.MESSAGE, response):
|
674
675
|
yield result
|
675
676
|
async for result in send_event(ChatEvent.END_LLM_RESPONSE, ""):
|
676
677
|
yield result
|
678
|
+
# Send Usage Metadata once llm interactions are complete
|
679
|
+
if usage:
|
680
|
+
async for event in send_event(ChatEvent.USAGE, usage):
|
681
|
+
yield event
|
682
|
+
async for result in send_event(ChatEvent.END_RESPONSE, ""):
|
683
|
+
yield result
|
677
684
|
|
678
685
|
def collect_telemetry():
|
679
686
|
# Gather chat response telemetry
|
680
687
|
nonlocal chat_metadata
|
681
688
|
latency = time.perf_counter() - start_time
|
682
689
|
cmd_set = set([cmd.value for cmd in conversation_commands])
|
690
|
+
cost = (tracer.get("usage", {}) or {}).get("cost", 0)
|
683
691
|
chat_metadata = chat_metadata or {}
|
684
692
|
chat_metadata["conversation_command"] = cmd_set
|
685
|
-
chat_metadata["agent"] = conversation.agent.slug if conversation.agent else None
|
693
|
+
chat_metadata["agent"] = conversation.agent.slug if conversation and conversation.agent else None
|
686
694
|
chat_metadata["latency"] = f"{latency:.3f}"
|
687
695
|
chat_metadata["ttft_latency"] = f"{ttft:.3f}"
|
696
|
+
chat_metadata["usage"] = tracer.get("usage")
|
688
697
|
|
689
698
|
logger.info(f"Chat response time to first token: {ttft:.3f} seconds")
|
690
699
|
logger.info(f"Chat response total time: {latency:.3f} seconds")
|
700
|
+
logger.info(f"Chat response cost: ${cost:.5f}")
|
691
701
|
update_telemetry_state(
|
692
702
|
request=request,
|
693
703
|
telemetry_type="api",
|
@@ -699,7 +709,7 @@ async def chat(
|
|
699
709
|
)
|
700
710
|
|
701
711
|
if is_query_empty(q):
|
702
|
-
async for result in send_llm_response("Please ask your query to get started."):
|
712
|
+
async for result in send_llm_response("Please ask your query to get started.", tracer.get("usage")):
|
703
713
|
yield result
|
704
714
|
return
|
705
715
|
|
@@ -713,7 +723,7 @@ async def chat(
|
|
713
723
|
create_new=body.create_new,
|
714
724
|
)
|
715
725
|
if not conversation:
|
716
|
-
async for result in send_llm_response(f"Conversation {conversation_id} not found"):
|
726
|
+
async for result in send_llm_response(f"Conversation {conversation_id} not found", tracer.get("usage")):
|
717
727
|
yield result
|
718
728
|
return
|
719
729
|
conversation_id = conversation.id
|
@@ -752,7 +762,7 @@ async def chat(
|
|
752
762
|
attached_file_context = gather_raw_query_files(query_files)
|
753
763
|
|
754
764
|
if conversation_commands == [ConversationCommand.Default] or is_automated_task:
|
755
|
-
|
765
|
+
chosen_io = await aget_data_sources_and_output_format(
|
756
766
|
q,
|
757
767
|
meta_log,
|
758
768
|
is_automated_task,
|
@@ -762,6 +772,7 @@ async def chat(
|
|
762
772
|
query_files=attached_file_context,
|
763
773
|
tracer=tracer,
|
764
774
|
)
|
775
|
+
conversation_commands = chosen_io.get("sources") + [chosen_io.get("output")]
|
765
776
|
|
766
777
|
# If we're doing research, we don't want to do anything else
|
767
778
|
if ConversationCommand.Research in conversation_commands:
|
@@ -776,7 +787,7 @@ async def chat(
|
|
776
787
|
await conversation_command_rate_limiter.update_and_check_if_valid(request, cmd)
|
777
788
|
q = q.replace(f"/{cmd.value}", "").strip()
|
778
789
|
except HTTPException as e:
|
779
|
-
async for result in send_llm_response(str(e.detail)):
|
790
|
+
async for result in send_llm_response(str(e.detail), tracer.get("usage")):
|
780
791
|
yield result
|
781
792
|
return
|
782
793
|
|
@@ -833,7 +844,7 @@ async def chat(
|
|
833
844
|
agent_has_entries = await EntryAdapters.aagent_has_entries(agent)
|
834
845
|
if len(file_filters) == 0 and not agent_has_entries:
|
835
846
|
response_log = "No files selected for summarization. Please add files using the section on the left."
|
836
|
-
async for result in send_llm_response(response_log):
|
847
|
+
async for result in send_llm_response(response_log, tracer.get("usage")):
|
837
848
|
yield result
|
838
849
|
else:
|
839
850
|
async for response in generate_summary_from_files(
|
@@ -852,7 +863,7 @@ async def chat(
|
|
852
863
|
else:
|
853
864
|
if isinstance(response, str):
|
854
865
|
response_log = response
|
855
|
-
async for result in send_llm_response(response):
|
866
|
+
async for result in send_llm_response(response, tracer.get("usage")):
|
856
867
|
yield result
|
857
868
|
|
858
869
|
await sync_to_async(save_to_conversation_log)(
|
@@ -879,7 +890,7 @@ async def chat(
|
|
879
890
|
conversation_config = await ConversationAdapters.aget_default_conversation_config(user)
|
880
891
|
model_type = conversation_config.model_type
|
881
892
|
formatted_help = help_message.format(model=model_type, version=state.khoj_version, device=get_device())
|
882
|
-
async for result in send_llm_response(formatted_help):
|
893
|
+
async for result in send_llm_response(formatted_help, tracer.get("usage")):
|
883
894
|
yield result
|
884
895
|
return
|
885
896
|
# Adding specification to search online specifically on khoj.dev pages.
|
@@ -894,7 +905,7 @@ async def chat(
|
|
894
905
|
except Exception as e:
|
895
906
|
logger.error(f"Error scheduling task {q} for {user.email}: {e}")
|
896
907
|
error_message = f"Unable to create automation. Ensure the automation doesn't already exist."
|
897
|
-
async for result in send_llm_response(error_message):
|
908
|
+
async for result in send_llm_response(error_message, tracer.get("usage")):
|
898
909
|
yield result
|
899
910
|
return
|
900
911
|
|
@@ -915,7 +926,7 @@ async def chat(
|
|
915
926
|
raw_query_files=raw_query_files,
|
916
927
|
tracer=tracer,
|
917
928
|
)
|
918
|
-
async for result in send_llm_response(llm_response):
|
929
|
+
async for result in send_llm_response(llm_response, tracer.get("usage")):
|
919
930
|
yield result
|
920
931
|
return
|
921
932
|
|
@@ -962,7 +973,7 @@ async def chat(
|
|
962
973
|
yield result
|
963
974
|
|
964
975
|
if conversation_commands == [ConversationCommand.Notes] and not await EntryAdapters.auser_has_entries(user):
|
965
|
-
async for result in send_llm_response(f"{no_entries_found.format()}"):
|
976
|
+
async for result in send_llm_response(f"{no_entries_found.format()}", tracer.get("usage")):
|
966
977
|
yield result
|
967
978
|
return
|
968
979
|
|
@@ -1104,7 +1115,7 @@ async def chat(
|
|
1104
1115
|
"detail": improved_image_prompt,
|
1105
1116
|
"image": None,
|
1106
1117
|
}
|
1107
|
-
async for result in send_llm_response(json.dumps(content_obj)):
|
1118
|
+
async for result in send_llm_response(json.dumps(content_obj), tracer.get("usage")):
|
1108
1119
|
yield result
|
1109
1120
|
return
|
1110
1121
|
|
@@ -1131,7 +1142,7 @@ async def chat(
|
|
1131
1142
|
"inferredQueries": [improved_image_prompt],
|
1132
1143
|
"image": generated_image,
|
1133
1144
|
}
|
1134
|
-
async for result in send_llm_response(json.dumps(content_obj)):
|
1145
|
+
async for result in send_llm_response(json.dumps(content_obj), tracer.get("usage")):
|
1135
1146
|
yield result
|
1136
1147
|
return
|
1137
1148
|
|
@@ -1165,7 +1176,7 @@ async def chat(
|
|
1165
1176
|
diagram_description = excalidraw_diagram_description
|
1166
1177
|
else:
|
1167
1178
|
error_message = "Failed to generate diagram. Please try again later."
|
1168
|
-
async for result in send_llm_response(error_message):
|
1179
|
+
async for result in send_llm_response(error_message, tracer.get("usage")):
|
1169
1180
|
yield result
|
1170
1181
|
|
1171
1182
|
await sync_to_async(save_to_conversation_log)(
|
@@ -1212,7 +1223,7 @@ async def chat(
|
|
1212
1223
|
tracer=tracer,
|
1213
1224
|
)
|
1214
1225
|
|
1215
|
-
async for result in send_llm_response(json.dumps(content_obj)):
|
1226
|
+
async for result in send_llm_response(json.dumps(content_obj), tracer.get("usage")):
|
1216
1227
|
yield result
|
1217
1228
|
return
|
1218
1229
|
|
@@ -1251,6 +1262,11 @@ async def chat(
|
|
1251
1262
|
if item is None:
|
1252
1263
|
async for result in send_event(ChatEvent.END_LLM_RESPONSE, ""):
|
1253
1264
|
yield result
|
1265
|
+
# Send Usage Metadata once llm interactions are complete
|
1266
|
+
async for event in send_event(ChatEvent.USAGE, tracer.get("usage")):
|
1267
|
+
yield event
|
1268
|
+
async for result in send_event(ChatEvent.END_RESPONSE, ""):
|
1269
|
+
yield result
|
1254
1270
|
logger.debug("Finished streaming response")
|
1255
1271
|
return
|
1256
1272
|
if not connection_alive or not continue_stream:
|
khoj/routers/helpers.py
CHANGED
@@ -336,7 +336,7 @@ async def acheck_if_safe_prompt(system_prompt: str, user: KhojUser = None, lax:
|
|
336
336
|
return is_safe, reason
|
337
337
|
|
338
338
|
|
339
|
-
async def
|
339
|
+
async def aget_data_sources_and_output_format(
|
340
340
|
query: str,
|
341
341
|
conversation_history: dict,
|
342
342
|
is_task: bool,
|
@@ -345,33 +345,33 @@ async def aget_relevant_tools_to_execute(
|
|
345
345
|
agent: Agent = None,
|
346
346
|
query_files: str = None,
|
347
347
|
tracer: dict = {},
|
348
|
-
):
|
348
|
+
) -> Dict[str, Any]:
|
349
349
|
"""
|
350
|
-
Given a query, determine which of the available
|
350
|
+
Given a query, determine which of the available data sources and output modes the agent should use to answer appropriately.
|
351
351
|
"""
|
352
352
|
|
353
|
-
|
354
|
-
|
353
|
+
source_options = dict()
|
354
|
+
source_options_str = ""
|
355
355
|
|
356
|
-
|
356
|
+
agent_sources = agent.input_tools if agent else []
|
357
357
|
|
358
|
-
for
|
359
|
-
|
360
|
-
if len(
|
361
|
-
|
358
|
+
for source, description in tool_descriptions_for_llm.items():
|
359
|
+
source_options[source.value] = description
|
360
|
+
if len(agent_sources) == 0 or source.value in agent_sources:
|
361
|
+
source_options_str += f'- "{source.value}": "{description}"\n'
|
362
362
|
|
363
|
-
|
364
|
-
|
363
|
+
output_options = dict()
|
364
|
+
output_options_str = ""
|
365
365
|
|
366
|
-
|
366
|
+
agent_outputs = agent.output_modes if agent else []
|
367
367
|
|
368
|
-
for
|
368
|
+
for output, description in mode_descriptions_for_llm.items():
|
369
369
|
# Do not allow tasks to schedule another task
|
370
|
-
if is_task and
|
370
|
+
if is_task and output == ConversationCommand.Automation:
|
371
371
|
continue
|
372
|
-
|
373
|
-
if len(
|
374
|
-
|
372
|
+
output_options[output.value] = description
|
373
|
+
if len(agent_outputs) == 0 or output.value in agent_outputs:
|
374
|
+
output_options_str += f'- "{output.value}": "{description}"\n'
|
375
375
|
|
376
376
|
chat_history = construct_chat_history(conversation_history)
|
377
377
|
|
@@ -384,8 +384,8 @@ async def aget_relevant_tools_to_execute(
|
|
384
384
|
|
385
385
|
relevant_tools_prompt = prompts.pick_relevant_tools.format(
|
386
386
|
query=query,
|
387
|
-
|
388
|
-
outputs=
|
387
|
+
sources=source_options_str,
|
388
|
+
outputs=output_options_str,
|
389
389
|
chat_history=chat_history,
|
390
390
|
personality_context=personality_context,
|
391
391
|
)
|
@@ -402,45 +402,43 @@ async def aget_relevant_tools_to_execute(
|
|
402
402
|
try:
|
403
403
|
response = clean_json(response)
|
404
404
|
response = json.loads(response)
|
405
|
-
input_tools = [q.strip() for q in response["source"] if q.strip()]
|
406
|
-
if not isinstance(input_tools, list) or not input_tools or len(input_tools) == 0:
|
407
|
-
logger.error(f"Invalid response for determining relevant tools: {input_tools}")
|
408
|
-
return tool_options
|
409
|
-
|
410
|
-
output_modes = [q.strip() for q in response["output"] if q.strip()]
|
411
|
-
if not isinstance(output_modes, list) or not output_modes or len(output_modes) == 0:
|
412
|
-
logger.error(f"Invalid response for determining relevant output modes: {output_modes}")
|
413
|
-
return mode_options
|
414
|
-
|
415
|
-
final_response = [] if not is_task else [ConversationCommand.AutomatedTask]
|
416
|
-
for llm_suggested_tool in input_tools:
|
417
|
-
# Add a double check to verify it's in the agent list, because the LLM sometimes gets confused by the tool options.
|
418
|
-
if llm_suggested_tool in tool_options.keys() and (
|
419
|
-
len(agent_tools) == 0 or llm_suggested_tool in agent_tools
|
420
|
-
):
|
421
|
-
# Check whether the tool exists as a valid ConversationCommand
|
422
|
-
final_response.append(ConversationCommand(llm_suggested_tool))
|
423
405
|
|
424
|
-
for
|
406
|
+
selected_sources = [q.strip() for q in response.get("source", []) if q.strip()]
|
407
|
+
selected_output = response.get("output", "text").strip() # Default to text output
|
408
|
+
|
409
|
+
if not isinstance(selected_sources, list) or not selected_sources or len(selected_sources) == 0:
|
410
|
+
raise ValueError(
|
411
|
+
f"Invalid response for determining relevant tools: {selected_sources}. Raw Response: {response}"
|
412
|
+
)
|
413
|
+
|
414
|
+
result: Dict = {"sources": [], "output": None if not is_task else ConversationCommand.AutomatedTask}
|
415
|
+
for selected_source in selected_sources:
|
425
416
|
# Add a double check to verify it's in the agent list, because the LLM sometimes gets confused by the tool options.
|
426
|
-
if
|
427
|
-
|
417
|
+
if (
|
418
|
+
selected_source in source_options.keys()
|
419
|
+
and isinstance(result["sources"], list)
|
420
|
+
and (len(agent_sources) == 0 or selected_source in agent_sources)
|
428
421
|
):
|
429
422
|
# Check whether the tool exists as a valid ConversationCommand
|
430
|
-
|
423
|
+
result["sources"].append(ConversationCommand(selected_source))
|
424
|
+
|
425
|
+
# Add a double check to verify it's in the agent list, because the LLM sometimes gets confused by the tool options.
|
426
|
+
if selected_output in output_options.keys() and (len(agent_outputs) == 0 or selected_output in agent_outputs):
|
427
|
+
# Check whether the tool exists as a valid ConversationCommand
|
428
|
+
result["output"] = ConversationCommand(selected_output)
|
431
429
|
|
432
|
-
if is_none_or_empty(
|
433
|
-
if len(
|
434
|
-
|
430
|
+
if is_none_or_empty(result):
|
431
|
+
if len(agent_sources) == 0:
|
432
|
+
result = {"sources": [ConversationCommand.Default], "output": ConversationCommand.Text}
|
435
433
|
else:
|
436
|
-
|
437
|
-
except Exception:
|
438
|
-
logger.error(f"Invalid response for determining relevant tools: {response}")
|
439
|
-
if len(
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
return
|
434
|
+
result = {"sources": [ConversationCommand.General], "output": ConversationCommand.Text}
|
435
|
+
except Exception as e:
|
436
|
+
logger.error(f"Invalid response for determining relevant tools: {response}. Error: {e}", exc_info=True)
|
437
|
+
sources = agent_sources if len(agent_sources) > 0 else [ConversationCommand.Default]
|
438
|
+
output = agent_outputs[0] if len(agent_outputs) > 0 else ConversationCommand.Text
|
439
|
+
result = {"sources": sources, "output": output}
|
440
|
+
|
441
|
+
return result
|
444
442
|
|
445
443
|
|
446
444
|
async def infer_webpage_urls(
|
@@ -755,7 +753,11 @@ async def generate_excalidraw_diagram(
|
|
755
753
|
yield None, None
|
756
754
|
return
|
757
755
|
|
758
|
-
|
756
|
+
scratchpad = excalidraw_diagram_description.get("scratchpad")
|
757
|
+
|
758
|
+
inferred_queries = f"Instruction: {better_diagram_description_prompt}\n\nScratchpad: {scratchpad}"
|
759
|
+
|
760
|
+
yield inferred_queries, excalidraw_diagram_description.get("elements")
|
759
761
|
|
760
762
|
|
761
763
|
async def generate_better_diagram_description(
|
@@ -824,7 +826,7 @@ async def generate_excalidraw_diagram_from_description(
|
|
824
826
|
user: KhojUser = None,
|
825
827
|
agent: Agent = None,
|
826
828
|
tracer: dict = {},
|
827
|
-
) -> str:
|
829
|
+
) -> Dict[str, Any]:
|
828
830
|
personality_context = (
|
829
831
|
prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
|
830
832
|
)
|
@@ -840,10 +842,18 @@ async def generate_excalidraw_diagram_from_description(
|
|
840
842
|
)
|
841
843
|
raw_response = clean_json(raw_response)
|
842
844
|
try:
|
845
|
+
# Expect response to have `elements` and `scratchpad` keys
|
843
846
|
response: Dict[str, str] = json.loads(raw_response)
|
847
|
+
if (
|
848
|
+
not response
|
849
|
+
or not isinstance(response, Dict)
|
850
|
+
or not response.get("elements")
|
851
|
+
or not response.get("scratchpad")
|
852
|
+
):
|
853
|
+
raise AssertionError(f"Invalid response for generating Excalidraw diagram: {response}")
|
844
854
|
except Exception:
|
845
855
|
raise AssertionError(f"Invalid response for generating Excalidraw diagram: {raw_response}")
|
846
|
-
if not response or not isinstance(response, List) or not isinstance(response[0], Dict):
|
856
|
+
if not response or not isinstance(response["elements"], List) or not isinstance(response["elements"][0], Dict):
|
847
857
|
# TODO Some additional validation here that it's a valid Excalidraw diagram
|
848
858
|
raise AssertionError(f"Invalid response for improving diagram description: {response}")
|
849
859
|
|
@@ -1772,6 +1782,7 @@ Manage your automations [here](/automations).
|
|
1772
1782
|
class MessageProcessor:
|
1773
1783
|
def __init__(self):
|
1774
1784
|
self.references = {}
|
1785
|
+
self.usage = {}
|
1775
1786
|
self.raw_response = ""
|
1776
1787
|
|
1777
1788
|
def convert_message_chunk_to_json(self, raw_chunk: str) -> Dict[str, Any]:
|
@@ -1795,6 +1806,8 @@ class MessageProcessor:
|
|
1795
1806
|
chunk_type = ChatEvent(chunk["type"])
|
1796
1807
|
if chunk_type == ChatEvent.REFERENCES:
|
1797
1808
|
self.references = chunk["data"]
|
1809
|
+
elif chunk_type == ChatEvent.USAGE:
|
1810
|
+
self.usage = chunk["data"]
|
1798
1811
|
elif chunk_type == ChatEvent.MESSAGE:
|
1799
1812
|
chunk_data = chunk["data"]
|
1800
1813
|
if isinstance(chunk_data, dict):
|
@@ -1839,7 +1852,7 @@ async def read_chat_stream(response_iterator: AsyncGenerator[str, None]) -> Dict
|
|
1839
1852
|
if buffer:
|
1840
1853
|
processor.process_message_chunk(buffer)
|
1841
1854
|
|
1842
|
-
return {"response": processor.raw_response, "references": processor.references}
|
1855
|
+
return {"response": processor.raw_response, "references": processor.references, "usage": processor.usage}
|
1843
1856
|
|
1844
1857
|
|
1845
1858
|
def get_user_config(user: KhojUser, request: Request, is_detailed: bool = False):
|
khoj/utils/constants.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
from pathlib import Path
|
2
|
+
from typing import Dict
|
2
3
|
|
3
4
|
app_root_directory = Path(__file__).parent.parent.parent
|
4
5
|
web_directory = app_root_directory / "khoj/interface/web/"
|
@@ -10,9 +11,10 @@ telemetry_server = "https://khoj.beta.haletic.com/v1/telemetry"
|
|
10
11
|
content_directory = "~/.khoj/content/"
|
11
12
|
default_offline_chat_models = [
|
12
13
|
"bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
|
14
|
+
"bartowski/Llama-3.2-3B-Instruct-GGUF",
|
13
15
|
"bartowski/gemma-2-9b-it-GGUF",
|
14
16
|
"bartowski/gemma-2-2b-it-GGUF",
|
15
|
-
"
|
17
|
+
"Qwen/Qwen2.5-14B-Instruct-GGUF",
|
16
18
|
]
|
17
19
|
default_openai_chat_models = ["gpt-4o-mini", "gpt-4o"]
|
18
20
|
default_gemini_chat_models = ["gemini-1.5-flash", "gemini-1.5-pro"]
|
@@ -30,3 +32,19 @@ default_config = {
|
|
30
32
|
"image": {"encoder": "sentence-transformers/clip-ViT-B-32", "model_directory": "~/.khoj/search/image/"},
|
31
33
|
},
|
32
34
|
}
|
35
|
+
|
36
|
+
model_to_cost: Dict[str, Dict[str, float]] = {
|
37
|
+
# OpenAI Pricing: https://openai.com/api/pricing/
|
38
|
+
"gpt-4o": {"input": 2.50, "output": 10.00},
|
39
|
+
"gpt-4o-mini": {"input": 0.15, "output": 0.60},
|
40
|
+
"o1-preview": {"input": 15.0, "output": 60.00},
|
41
|
+
"o1-mini": {"input": 3.0, "output": 12.0},
|
42
|
+
# Gemini Pricing: https://ai.google.dev/pricing
|
43
|
+
"gemini-1.5-flash": {"input": 0.075, "output": 0.30},
|
44
|
+
"gemini-1.5-flash-002": {"input": 0.075, "output": 0.30},
|
45
|
+
"gemini-1.5-pro": {"input": 1.25, "output": 5.00},
|
46
|
+
"gemini-1.5-pro-002": {"input": 1.25, "output": 5.00},
|
47
|
+
# Anthropic Pricing: https://www.anthropic.com/pricing#anthropic-api_
|
48
|
+
"claude-3-5-sonnet-20241022": {"input": 3.0, "output": 15.0},
|
49
|
+
"claude-3-5-haiku-20241022": {"input": 1.0, "output": 5.0},
|
50
|
+
}
|
khoj/utils/helpers.py
CHANGED
@@ -540,3 +540,27 @@ def get_country_code_from_timezone(tz: str) -> str:
|
|
540
540
|
def get_country_name_from_timezone(tz: str) -> str:
|
541
541
|
"""Get country name from timezone"""
|
542
542
|
return country_names.get(get_country_code_from_timezone(tz), "United States")
|
543
|
+
|
544
|
+
|
545
|
+
def get_cost_of_chat_message(model_name: str, input_tokens: int = 0, output_tokens: int = 0, prev_cost: float = 0.0):
|
546
|
+
"""
|
547
|
+
Calculate cost of chat message based on input and output tokens
|
548
|
+
"""
|
549
|
+
|
550
|
+
# Calculate cost of input and output tokens. Costs are per million tokens
|
551
|
+
input_cost = constants.model_to_cost.get(model_name, {}).get("input", 0) * (input_tokens / 1e6)
|
552
|
+
output_cost = constants.model_to_cost.get(model_name, {}).get("output", 0) * (output_tokens / 1e6)
|
553
|
+
|
554
|
+
return input_cost + output_cost + prev_cost
|
555
|
+
|
556
|
+
|
557
|
+
def get_chat_usage_metrics(model_name: str, input_tokens: int = 0, output_tokens: int = 0, usage: dict = {}):
|
558
|
+
"""
|
559
|
+
Get usage metrics for chat message based on input and output tokens
|
560
|
+
"""
|
561
|
+
prev_usage = usage or {"input_tokens": 0, "output_tokens": 0, "cost": 0.0}
|
562
|
+
return {
|
563
|
+
"input_tokens": prev_usage["input_tokens"] + input_tokens,
|
564
|
+
"output_tokens": prev_usage["output_tokens"] + output_tokens,
|
565
|
+
"cost": get_cost_of_chat_message(model_name, input_tokens, output_tokens, prev_cost=prev_usage["cost"]),
|
566
|
+
}
|