khoj 1.21.6.dev14__py3-none-any.whl → 1.21.7.dev6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/database/adapters/__init__.py +12 -0
- khoj/database/migrations/0056_chatmodeloptions_vision_enabled.py +17 -0
- khoj/database/migrations/0057_merge_20240816_1409.py +13 -0
- khoj/database/migrations/0060_merge_20240905_1828.py +14 -0
- khoj/database/models/__init__.py +1 -0
- khoj/interface/compiled/404/index.html +1 -1
- khoj/interface/compiled/_next/static/chunks/{3062-a42d847c919a9ea4.js → 3062-9be9a4e34f82ed3a.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/3678-0732dd9d2f472171.js +25 -0
- khoj/interface/compiled/_next/static/chunks/8423-b6a61d82233d1a82.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9001-3b27af6d5f21df44.js +21 -0
- khoj/interface/compiled/_next/static/chunks/9162-0be016519a18568b.js +11 -0
- khoj/interface/compiled/_next/static/chunks/{9178-d23cb0dbee40a775.js → 9178-3a0baad1c172d515.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{9693-91b03052c5cabded.js → 9984-e410179c6fac7cf1.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-f8bc420192b8a6f4.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-5c55afdb9dbe8dac.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/page-8f61b4bd2032384a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/page-6d1313e0d33b0451.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{webpack-0f6d4805ea01afda.js → webpack-07fad5db87344b82.js} +1 -1
- khoj/interface/compiled/_next/static/css/3e49e5ee49c6bda1.css +25 -0
- khoj/interface/compiled/_next/static/css/{2bfe35fbe2c97a56.css → 3f8ae5fd7b73a9e5.css} +1 -1
- khoj/interface/compiled/_next/static/css/5a400c87d295e68a.css +1 -0
- khoj/interface/compiled/_next/static/css/c808691c459e3887.css +1 -0
- khoj/interface/compiled/agents/index.html +1 -1
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +1 -1
- khoj/interface/compiled/automations/index.txt +3 -3
- khoj/interface/compiled/chat/index.html +1 -1
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/factchecker/index.html +1 -1
- khoj/interface/compiled/factchecker/index.txt +2 -2
- khoj/interface/compiled/index.html +1 -1
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +1 -1
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +1 -1
- khoj/interface/compiled/settings/index.txt +3 -3
- khoj/interface/compiled/share/chat/index.html +1 -1
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/openai/gpt.py +4 -0
- khoj/processor/conversation/utils.py +31 -13
- khoj/processor/tools/online_search.py +6 -2
- khoj/routers/api_chat.py +64 -18
- khoj/routers/helpers.py +73 -21
- khoj/routers/storage.py +28 -0
- khoj/utils/helpers.py +15 -0
- {khoj-1.21.6.dev14.dist-info → khoj-1.21.7.dev6.dist-info}/METADATA +1 -1
- {khoj-1.21.6.dev14.dist-info → khoj-1.21.7.dev6.dist-info}/RECORD +52 -49
- khoj/interface/compiled/_next/static/chunks/3678-8c0e55c3b5d83a22.js +0 -25
- khoj/interface/compiled/_next/static/chunks/8423-132ea64eac83fd43.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9001-acbca3e19b1a5ddf.js +0 -21
- khoj/interface/compiled/_next/static/chunks/9162-4a6d0d0dc5e27618.js +0 -11
- khoj/interface/compiled/_next/static/chunks/app/chat/page-c2ebc47a09abc8ae.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-6ca723a9ff0dfd70.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/page-d403fc59c9c3f8cc.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/page-0ae8f5b868af65c1.js +0 -1
- khoj/interface/compiled/_next/static/css/9d5b867ec04494a6.css +0 -25
- khoj/interface/compiled/_next/static/css/a22d83f18a32957e.css +0 -1
- khoj/interface/compiled/_next/static/css/b81e909d403fb2df.css +0 -1
- /khoj/interface/compiled/_next/static/{OHjya9xQJWrEMTXUfPYon → IFgQ9YOS_lUMnLw-CPszn}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{OHjya9xQJWrEMTXUfPYon → IFgQ9YOS_lUMnLw-CPszn}/_ssgManifest.js +0 -0
- {khoj-1.21.6.dev14.dist-info → khoj-1.21.7.dev6.dist-info}/WHEEL +0 -0
- {khoj-1.21.6.dev14.dist-info → khoj-1.21.7.dev6.dist-info}/entry_points.txt +0 -0
- {khoj-1.21.6.dev14.dist-info → khoj-1.21.7.dev6.dist-info}/licenses/LICENSE +0 -0
|
@@ -101,12 +101,16 @@ def save_to_conversation_log(
|
|
|
101
101
|
client_application: ClientApplication = None,
|
|
102
102
|
conversation_id: int = None,
|
|
103
103
|
automation_id: str = None,
|
|
104
|
+
uploaded_image_url: str = None,
|
|
104
105
|
):
|
|
105
106
|
user_message_time = user_message_time or datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
106
107
|
updated_conversation = message_to_log(
|
|
107
108
|
user_message=q,
|
|
108
109
|
chat_response=chat_response,
|
|
109
|
-
user_message_metadata={
|
|
110
|
+
user_message_metadata={
|
|
111
|
+
"created": user_message_time,
|
|
112
|
+
"uploadedImageData": uploaded_image_url,
|
|
113
|
+
},
|
|
110
114
|
khoj_message_metadata={
|
|
111
115
|
"context": compiled_references,
|
|
112
116
|
"intent": {"inferred-queries": inferred_queries, "type": intent_type},
|
|
@@ -141,6 +145,8 @@ def generate_chatml_messages_with_context(
|
|
|
141
145
|
loaded_model: Optional[Llama] = None,
|
|
142
146
|
max_prompt_size=None,
|
|
143
147
|
tokenizer_name=None,
|
|
148
|
+
uploaded_image_url=None,
|
|
149
|
+
vision_enabled=False,
|
|
144
150
|
):
|
|
145
151
|
"""Generate messages for ChatGPT with context from previous conversation"""
|
|
146
152
|
# Set max prompt size from user config or based on pre-configured for model and machine specs
|
|
@@ -150,28 +156,40 @@ def generate_chatml_messages_with_context(
|
|
|
150
156
|
else:
|
|
151
157
|
max_prompt_size = model_to_prompt_size.get(model_name, 2000)
|
|
152
158
|
|
|
159
|
+
# Format user and system messages to chatml format
|
|
160
|
+
def construct_structured_message(message, image_url):
|
|
161
|
+
if image_url and vision_enabled:
|
|
162
|
+
return [{"type": "text", "text": message}, {"type": "image_url", "image_url": {"url": image_url}}]
|
|
163
|
+
return message
|
|
164
|
+
|
|
153
165
|
# Scale lookback turns proportional to max prompt size supported by model
|
|
154
166
|
lookback_turns = max_prompt_size // 750
|
|
155
167
|
|
|
156
168
|
# Extract Chat History for Context
|
|
157
|
-
|
|
169
|
+
chatml_messages: List[ChatMessage] = []
|
|
158
170
|
for chat in conversation_log.get("chat", []):
|
|
159
|
-
|
|
160
|
-
|
|
171
|
+
message_notes = f'\n\n Notes:\n{chat.get("context")}' if chat.get("context") else "\n"
|
|
172
|
+
role = "user" if chat["by"] == "you" else "assistant"
|
|
173
|
+
|
|
174
|
+
message_content = chat["message"] + message_notes
|
|
175
|
+
|
|
176
|
+
if chat.get("uploadedImageData") and vision_enabled:
|
|
177
|
+
message_content = construct_structured_message(message_content, chat.get("uploadedImageData"))
|
|
161
178
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
179
|
+
reconstructed_message = ChatMessage(content=message_content, role=role)
|
|
180
|
+
|
|
181
|
+
chatml_messages.insert(0, reconstructed_message)
|
|
182
|
+
|
|
183
|
+
if len(chatml_messages) >= 2 * lookback_turns:
|
|
166
184
|
break
|
|
167
|
-
rest_backnforths += reciprocal_conversation_to_chatml([user_msg, assistant_msg])[::-1]
|
|
168
185
|
|
|
169
|
-
# Format user and system messages to chatml format
|
|
170
186
|
messages = []
|
|
171
187
|
if not is_none_or_empty(user_message):
|
|
172
|
-
messages.append(
|
|
173
|
-
|
|
174
|
-
|
|
188
|
+
messages.append(
|
|
189
|
+
ChatMessage(content=construct_structured_message(user_message, uploaded_image_url), role="user")
|
|
190
|
+
)
|
|
191
|
+
if len(chatml_messages) > 0:
|
|
192
|
+
messages += chatml_messages
|
|
175
193
|
if not is_none_or_empty(system_message):
|
|
176
194
|
messages.append(ChatMessage(content=system_message, role="system"))
|
|
177
195
|
|
|
@@ -56,6 +56,7 @@ async def search_online(
|
|
|
56
56
|
subscribed: bool = False,
|
|
57
57
|
send_status_func: Optional[Callable] = None,
|
|
58
58
|
custom_filters: List[str] = [],
|
|
59
|
+
uploaded_image_url: str = None,
|
|
59
60
|
):
|
|
60
61
|
query += " ".join(custom_filters)
|
|
61
62
|
if not is_internet_connected():
|
|
@@ -64,7 +65,9 @@ async def search_online(
|
|
|
64
65
|
return
|
|
65
66
|
|
|
66
67
|
# Breakdown the query into subqueries to get the correct answer
|
|
67
|
-
subqueries = await generate_online_subqueries(
|
|
68
|
+
subqueries = await generate_online_subqueries(
|
|
69
|
+
query, conversation_history, location, user, uploaded_image_url=uploaded_image_url
|
|
70
|
+
)
|
|
68
71
|
response_dict = {}
|
|
69
72
|
|
|
70
73
|
if subqueries:
|
|
@@ -138,13 +141,14 @@ async def read_webpages(
|
|
|
138
141
|
user: KhojUser,
|
|
139
142
|
subscribed: bool = False,
|
|
140
143
|
send_status_func: Optional[Callable] = None,
|
|
144
|
+
uploaded_image_url: str = None,
|
|
141
145
|
):
|
|
142
146
|
"Infer web pages to read from the query and extract relevant information from them"
|
|
143
147
|
logger.info(f"Inferring web pages to read")
|
|
144
148
|
if send_status_func:
|
|
145
149
|
async for event in send_status_func(f"**Inferring web pages to read**"):
|
|
146
150
|
yield {ChatEvent.STATUS: event}
|
|
147
|
-
urls = await infer_webpage_urls(query, conversation_history, location, user)
|
|
151
|
+
urls = await infer_webpage_urls(query, conversation_history, location, user, uploaded_image_url)
|
|
148
152
|
|
|
149
153
|
logger.info(f"Reading web pages at: {urls}")
|
|
150
154
|
if send_status_func:
|
khoj/routers/api_chat.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import base64
|
|
2
3
|
import json
|
|
3
4
|
import logging
|
|
4
5
|
import time
|
|
@@ -46,11 +47,13 @@ from khoj.routers.helpers import (
|
|
|
46
47
|
update_telemetry_state,
|
|
47
48
|
validate_conversation_config,
|
|
48
49
|
)
|
|
50
|
+
from khoj.routers.storage import upload_image_to_bucket
|
|
49
51
|
from khoj.utils import state
|
|
50
52
|
from khoj.utils.helpers import (
|
|
51
53
|
AsyncIteratorWrapper,
|
|
52
54
|
ConversationCommand,
|
|
53
55
|
command_descriptions,
|
|
56
|
+
convert_image_to_webp,
|
|
54
57
|
get_device,
|
|
55
58
|
is_none_or_empty,
|
|
56
59
|
)
|
|
@@ -517,21 +520,26 @@ async def set_conversation_title(
|
|
|
517
520
|
)
|
|
518
521
|
|
|
519
522
|
|
|
520
|
-
|
|
523
|
+
class ChatRequestBody(BaseModel):
|
|
524
|
+
q: str
|
|
525
|
+
n: Optional[int] = 7
|
|
526
|
+
d: Optional[float] = None
|
|
527
|
+
stream: Optional[bool] = False
|
|
528
|
+
title: Optional[str] = None
|
|
529
|
+
conversation_id: Optional[int] = None
|
|
530
|
+
city: Optional[str] = None
|
|
531
|
+
region: Optional[str] = None
|
|
532
|
+
country: Optional[str] = None
|
|
533
|
+
timezone: Optional[str] = None
|
|
534
|
+
image: Optional[str] = None
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
@api_chat.post("")
|
|
521
538
|
@requires(["authenticated"])
|
|
522
539
|
async def chat(
|
|
523
540
|
request: Request,
|
|
524
541
|
common: CommonQueryParams,
|
|
525
|
-
|
|
526
|
-
n: int = 7,
|
|
527
|
-
d: float = None,
|
|
528
|
-
stream: Optional[bool] = False,
|
|
529
|
-
title: Optional[str] = None,
|
|
530
|
-
conversation_id: Optional[int] = None,
|
|
531
|
-
city: Optional[str] = None,
|
|
532
|
-
region: Optional[str] = None,
|
|
533
|
-
country: Optional[str] = None,
|
|
534
|
-
timezone: Optional[str] = None,
|
|
542
|
+
body: ChatRequestBody,
|
|
535
543
|
rate_limiter_per_minute=Depends(
|
|
536
544
|
ApiUserRateLimiter(requests=60, subscribed_requests=60, window=60, slug="chat_minute")
|
|
537
545
|
),
|
|
@@ -539,7 +547,20 @@ async def chat(
|
|
|
539
547
|
ApiUserRateLimiter(requests=600, subscribed_requests=600, window=60 * 60 * 24, slug="chat_day")
|
|
540
548
|
),
|
|
541
549
|
):
|
|
542
|
-
|
|
550
|
+
# Access the parameters from the body
|
|
551
|
+
q = body.q
|
|
552
|
+
n = body.n
|
|
553
|
+
d = body.d
|
|
554
|
+
stream = body.stream
|
|
555
|
+
title = body.title
|
|
556
|
+
conversation_id = body.conversation_id
|
|
557
|
+
city = body.city
|
|
558
|
+
region = body.region
|
|
559
|
+
country = body.country
|
|
560
|
+
timezone = body.timezone
|
|
561
|
+
image = body.image
|
|
562
|
+
|
|
563
|
+
async def event_generator(q: str, image: str):
|
|
543
564
|
start_time = time.perf_counter()
|
|
544
565
|
ttft = None
|
|
545
566
|
chat_metadata: dict = {}
|
|
@@ -550,6 +571,17 @@ async def chat(
|
|
|
550
571
|
q = unquote(q)
|
|
551
572
|
nonlocal conversation_id
|
|
552
573
|
|
|
574
|
+
uploaded_image_url = None
|
|
575
|
+
if image:
|
|
576
|
+
decoded_string = unquote(image)
|
|
577
|
+
base64_data = decoded_string.split(",", 1)[1]
|
|
578
|
+
image_bytes = base64.b64decode(base64_data)
|
|
579
|
+
webp_image_bytes = convert_image_to_webp(image_bytes)
|
|
580
|
+
try:
|
|
581
|
+
uploaded_image_url = upload_image_to_bucket(webp_image_bytes, request.user.object.id)
|
|
582
|
+
except:
|
|
583
|
+
uploaded_image_url = None
|
|
584
|
+
|
|
553
585
|
async def send_event(event_type: ChatEvent, data: str | dict):
|
|
554
586
|
nonlocal connection_alive, ttft
|
|
555
587
|
if not connection_alive or await request.is_disconnected():
|
|
@@ -637,7 +669,7 @@ async def chat(
|
|
|
637
669
|
|
|
638
670
|
if conversation_commands == [ConversationCommand.Default] or is_automated_task:
|
|
639
671
|
conversation_commands = await aget_relevant_information_sources(
|
|
640
|
-
q, meta_log, is_automated_task, subscribed=subscribed
|
|
672
|
+
q, meta_log, is_automated_task, subscribed=subscribed, uploaded_image_url=uploaded_image_url
|
|
641
673
|
)
|
|
642
674
|
conversation_commands_str = ", ".join([cmd.value for cmd in conversation_commands])
|
|
643
675
|
async for result in send_event(
|
|
@@ -645,7 +677,7 @@ async def chat(
|
|
|
645
677
|
):
|
|
646
678
|
yield result
|
|
647
679
|
|
|
648
|
-
mode = await aget_relevant_output_modes(q, meta_log, is_automated_task)
|
|
680
|
+
mode = await aget_relevant_output_modes(q, meta_log, is_automated_task, uploaded_image_url)
|
|
649
681
|
async for result in send_event(ChatEvent.STATUS, f"**Decided Response Mode:** {mode.value}"):
|
|
650
682
|
yield result
|
|
651
683
|
if mode not in conversation_commands:
|
|
@@ -693,7 +725,9 @@ async def chat(
|
|
|
693
725
|
):
|
|
694
726
|
yield result
|
|
695
727
|
|
|
696
|
-
response = await extract_relevant_summary(
|
|
728
|
+
response = await extract_relevant_summary(
|
|
729
|
+
q, contextual_data, subscribed=subscribed, uploaded_image_url=uploaded_image_url
|
|
730
|
+
)
|
|
697
731
|
response_log = str(response)
|
|
698
732
|
async for result in send_llm_response(response_log):
|
|
699
733
|
yield result
|
|
@@ -711,6 +745,7 @@ async def chat(
|
|
|
711
745
|
intent_type="summarize",
|
|
712
746
|
client_application=request.user.client_app,
|
|
713
747
|
conversation_id=conversation_id,
|
|
748
|
+
uploaded_image_url=uploaded_image_url,
|
|
714
749
|
)
|
|
715
750
|
return
|
|
716
751
|
|
|
@@ -753,6 +788,7 @@ async def chat(
|
|
|
753
788
|
conversation_id=conversation_id,
|
|
754
789
|
inferred_queries=[query_to_run],
|
|
755
790
|
automation_id=automation.id,
|
|
791
|
+
uploaded_image_url=uploaded_image_url,
|
|
756
792
|
)
|
|
757
793
|
async for result in send_llm_response(llm_response):
|
|
758
794
|
yield result
|
|
@@ -807,6 +843,7 @@ async def chat(
|
|
|
807
843
|
subscribed,
|
|
808
844
|
partial(send_event, ChatEvent.STATUS),
|
|
809
845
|
custom_filters,
|
|
846
|
+
uploaded_image_url=uploaded_image_url,
|
|
810
847
|
):
|
|
811
848
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
|
812
849
|
yield result[ChatEvent.STATUS]
|
|
@@ -823,7 +860,13 @@ async def chat(
|
|
|
823
860
|
if ConversationCommand.Webpage in conversation_commands:
|
|
824
861
|
try:
|
|
825
862
|
async for result in read_webpages(
|
|
826
|
-
defiltered_query,
|
|
863
|
+
defiltered_query,
|
|
864
|
+
meta_log,
|
|
865
|
+
location,
|
|
866
|
+
user,
|
|
867
|
+
subscribed,
|
|
868
|
+
partial(send_event, ChatEvent.STATUS),
|
|
869
|
+
uploaded_image_url=uploaded_image_url,
|
|
827
870
|
):
|
|
828
871
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
|
829
872
|
yield result[ChatEvent.STATUS]
|
|
@@ -869,6 +912,7 @@ async def chat(
|
|
|
869
912
|
online_results=online_results,
|
|
870
913
|
subscribed=subscribed,
|
|
871
914
|
send_status_func=partial(send_event, ChatEvent.STATUS),
|
|
915
|
+
uploaded_image_url=uploaded_image_url,
|
|
872
916
|
):
|
|
873
917
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
|
874
918
|
yield result[ChatEvent.STATUS]
|
|
@@ -898,6 +942,7 @@ async def chat(
|
|
|
898
942
|
conversation_id=conversation_id,
|
|
899
943
|
compiled_references=compiled_references,
|
|
900
944
|
online_results=online_results,
|
|
945
|
+
uploaded_image_url=uploaded_image_url,
|
|
901
946
|
)
|
|
902
947
|
content_obj = {
|
|
903
948
|
"intentType": intent_type,
|
|
@@ -924,6 +969,7 @@ async def chat(
|
|
|
924
969
|
conversation_id,
|
|
925
970
|
location,
|
|
926
971
|
user_name,
|
|
972
|
+
uploaded_image_url,
|
|
927
973
|
)
|
|
928
974
|
|
|
929
975
|
# Send Response
|
|
@@ -949,9 +995,9 @@ async def chat(
|
|
|
949
995
|
|
|
950
996
|
## Stream Text Response
|
|
951
997
|
if stream:
|
|
952
|
-
return StreamingResponse(event_generator(q), media_type="text/plain")
|
|
998
|
+
return StreamingResponse(event_generator(q, image=image), media_type="text/plain")
|
|
953
999
|
## Non-Streaming Text Response
|
|
954
1000
|
else:
|
|
955
|
-
response_iterator = event_generator(q)
|
|
1001
|
+
response_iterator = event_generator(q, image=image)
|
|
956
1002
|
response_data = await read_chat_stream(response_iterator)
|
|
957
1003
|
return Response(content=json.dumps(response_data), media_type="application/json", status_code=200)
|
khoj/routers/helpers.py
CHANGED
|
@@ -97,6 +97,7 @@ from khoj.utils.helpers import (
|
|
|
97
97
|
LRU,
|
|
98
98
|
ConversationCommand,
|
|
99
99
|
ImageIntentType,
|
|
100
|
+
convert_image_to_webp,
|
|
100
101
|
is_none_or_empty,
|
|
101
102
|
is_valid_url,
|
|
102
103
|
log_telemetry,
|
|
@@ -252,7 +253,9 @@ async def acreate_title_from_query(query: str) -> str:
|
|
|
252
253
|
return response.strip()
|
|
253
254
|
|
|
254
255
|
|
|
255
|
-
async def aget_relevant_information_sources(
|
|
256
|
+
async def aget_relevant_information_sources(
|
|
257
|
+
query: str, conversation_history: dict, is_task: bool, subscribed: bool, uploaded_image_url: str = None
|
|
258
|
+
):
|
|
256
259
|
"""
|
|
257
260
|
Given a query, determine which of the available tools the agent should use in order to answer appropriately.
|
|
258
261
|
"""
|
|
@@ -266,6 +269,9 @@ async def aget_relevant_information_sources(query: str, conversation_history: di
|
|
|
266
269
|
|
|
267
270
|
chat_history = construct_chat_history(conversation_history)
|
|
268
271
|
|
|
272
|
+
if uploaded_image_url:
|
|
273
|
+
query = f"[placeholder for image attached to this message]\n{query}"
|
|
274
|
+
|
|
269
275
|
relevant_tools_prompt = prompts.pick_relevant_information_collection_tools.format(
|
|
270
276
|
query=query,
|
|
271
277
|
tools=tool_options_str,
|
|
@@ -274,7 +280,9 @@ async def aget_relevant_information_sources(query: str, conversation_history: di
|
|
|
274
280
|
|
|
275
281
|
with timer("Chat actor: Infer information sources to refer", logger):
|
|
276
282
|
response = await send_message_to_model_wrapper(
|
|
277
|
-
relevant_tools_prompt,
|
|
283
|
+
relevant_tools_prompt,
|
|
284
|
+
response_type="json_object",
|
|
285
|
+
subscribed=subscribed,
|
|
278
286
|
)
|
|
279
287
|
|
|
280
288
|
try:
|
|
@@ -302,7 +310,9 @@ async def aget_relevant_information_sources(query: str, conversation_history: di
|
|
|
302
310
|
return [ConversationCommand.Default]
|
|
303
311
|
|
|
304
312
|
|
|
305
|
-
async def aget_relevant_output_modes(
|
|
313
|
+
async def aget_relevant_output_modes(
|
|
314
|
+
query: str, conversation_history: dict, is_task: bool = False, uploaded_image_url: str = None
|
|
315
|
+
):
|
|
306
316
|
"""
|
|
307
317
|
Given a query, determine which of the available tools the agent should use in order to answer appropriately.
|
|
308
318
|
"""
|
|
@@ -319,6 +329,9 @@ async def aget_relevant_output_modes(query: str, conversation_history: dict, is_
|
|
|
319
329
|
|
|
320
330
|
chat_history = construct_chat_history(conversation_history)
|
|
321
331
|
|
|
332
|
+
if uploaded_image_url:
|
|
333
|
+
query = f"[placeholder for image attached to this message]\n{query}"
|
|
334
|
+
|
|
322
335
|
relevant_mode_prompt = prompts.pick_relevant_output_mode.format(
|
|
323
336
|
query=query,
|
|
324
337
|
modes=mode_options_str,
|
|
@@ -347,7 +360,7 @@ async def aget_relevant_output_modes(query: str, conversation_history: dict, is_
|
|
|
347
360
|
|
|
348
361
|
|
|
349
362
|
async def infer_webpage_urls(
|
|
350
|
-
q: str, conversation_history: dict, location_data: LocationData, user: KhojUser
|
|
363
|
+
q: str, conversation_history: dict, location_data: LocationData, user: KhojUser, uploaded_image_url: str = None
|
|
351
364
|
) -> List[str]:
|
|
352
365
|
"""
|
|
353
366
|
Infer webpage links from the given query
|
|
@@ -366,7 +379,9 @@ async def infer_webpage_urls(
|
|
|
366
379
|
)
|
|
367
380
|
|
|
368
381
|
with timer("Chat actor: Infer webpage urls to read", logger):
|
|
369
|
-
response = await send_message_to_model_wrapper(
|
|
382
|
+
response = await send_message_to_model_wrapper(
|
|
383
|
+
online_queries_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object"
|
|
384
|
+
)
|
|
370
385
|
|
|
371
386
|
# Validate that the response is a non-empty, JSON-serializable list of URLs
|
|
372
387
|
try:
|
|
@@ -381,7 +396,7 @@ async def infer_webpage_urls(
|
|
|
381
396
|
|
|
382
397
|
|
|
383
398
|
async def generate_online_subqueries(
|
|
384
|
-
q: str, conversation_history: dict, location_data: LocationData, user: KhojUser
|
|
399
|
+
q: str, conversation_history: dict, location_data: LocationData, user: KhojUser, uploaded_image_url: str = None
|
|
385
400
|
) -> List[str]:
|
|
386
401
|
"""
|
|
387
402
|
Generate subqueries from the given query
|
|
@@ -400,7 +415,9 @@ async def generate_online_subqueries(
|
|
|
400
415
|
)
|
|
401
416
|
|
|
402
417
|
with timer("Chat actor: Generate online search subqueries", logger):
|
|
403
|
-
response = await send_message_to_model_wrapper(
|
|
418
|
+
response = await send_message_to_model_wrapper(
|
|
419
|
+
online_queries_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object"
|
|
420
|
+
)
|
|
404
421
|
|
|
405
422
|
# Validate that the response is a non-empty, JSON-serializable list
|
|
406
423
|
try:
|
|
@@ -419,7 +436,7 @@ async def generate_online_subqueries(
|
|
|
419
436
|
return [q]
|
|
420
437
|
|
|
421
438
|
|
|
422
|
-
async def schedule_query(q: str, conversation_history: dict) -> Tuple[str, ...]:
|
|
439
|
+
async def schedule_query(q: str, conversation_history: dict, uploaded_image_url: str = None) -> Tuple[str, ...]:
|
|
423
440
|
"""
|
|
424
441
|
Schedule the date, time to run the query. Assume the server timezone is UTC.
|
|
425
442
|
"""
|
|
@@ -430,7 +447,9 @@ async def schedule_query(q: str, conversation_history: dict) -> Tuple[str, ...]:
|
|
|
430
447
|
chat_history=chat_history,
|
|
431
448
|
)
|
|
432
449
|
|
|
433
|
-
raw_response = await send_message_to_model_wrapper(
|
|
450
|
+
raw_response = await send_message_to_model_wrapper(
|
|
451
|
+
crontime_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object"
|
|
452
|
+
)
|
|
434
453
|
|
|
435
454
|
# Validate that the response is a non-empty, JSON-serializable list
|
|
436
455
|
try:
|
|
@@ -468,7 +487,9 @@ async def extract_relevant_info(q: str, corpus: str, subscribed: bool) -> Union[
|
|
|
468
487
|
return response.strip()
|
|
469
488
|
|
|
470
489
|
|
|
471
|
-
async def extract_relevant_summary(
|
|
490
|
+
async def extract_relevant_summary(
|
|
491
|
+
q: str, corpus: str, subscribed: bool = False, uploaded_image_url: str = None
|
|
492
|
+
) -> Union[str, None]:
|
|
472
493
|
"""
|
|
473
494
|
Extract relevant information for a given query from the target corpus
|
|
474
495
|
"""
|
|
@@ -489,6 +510,7 @@ async def extract_relevant_summary(q: str, corpus: str, subscribed: bool = False
|
|
|
489
510
|
prompts.system_prompt_extract_relevant_summary,
|
|
490
511
|
chat_model_option=chat_model,
|
|
491
512
|
subscribed=subscribed,
|
|
513
|
+
uploaded_image_url=uploaded_image_url,
|
|
492
514
|
)
|
|
493
515
|
return response.strip()
|
|
494
516
|
|
|
@@ -501,6 +523,7 @@ async def generate_better_image_prompt(
|
|
|
501
523
|
online_results: Optional[dict] = None,
|
|
502
524
|
model_type: Optional[str] = None,
|
|
503
525
|
subscribed: bool = False,
|
|
526
|
+
uploaded_image_url: Optional[str] = None,
|
|
504
527
|
) -> str:
|
|
505
528
|
"""
|
|
506
529
|
Generate a better image prompt from the given query
|
|
@@ -549,7 +572,7 @@ async def generate_better_image_prompt(
|
|
|
549
572
|
|
|
550
573
|
with timer("Chat actor: Generate contextual image prompt", logger):
|
|
551
574
|
response = await send_message_to_model_wrapper(
|
|
552
|
-
image_prompt, chat_model_option=chat_model, subscribed=subscribed
|
|
575
|
+
image_prompt, chat_model_option=chat_model, subscribed=subscribed, uploaded_image_url=uploaded_image_url
|
|
553
576
|
)
|
|
554
577
|
response = response.strip()
|
|
555
578
|
if response.startswith(('"', "'")) and response.endswith(('"', "'")):
|
|
@@ -564,11 +587,19 @@ async def send_message_to_model_wrapper(
|
|
|
564
587
|
response_type: str = "text",
|
|
565
588
|
chat_model_option: ChatModelOptions = None,
|
|
566
589
|
subscribed: bool = False,
|
|
590
|
+
uploaded_image_url: str = None,
|
|
567
591
|
):
|
|
568
592
|
conversation_config: ChatModelOptions = (
|
|
569
593
|
chat_model_option or await ConversationAdapters.aget_default_conversation_config()
|
|
570
594
|
)
|
|
571
595
|
|
|
596
|
+
vision_available = conversation_config.vision_enabled
|
|
597
|
+
if not vision_available and uploaded_image_url:
|
|
598
|
+
vision_enabled_config = ConversationAdapters.get_vision_enabled_config()
|
|
599
|
+
if vision_enabled_config:
|
|
600
|
+
conversation_config = vision_enabled_config
|
|
601
|
+
vision_available = True
|
|
602
|
+
|
|
572
603
|
chat_model = conversation_config.chat_model
|
|
573
604
|
max_tokens = (
|
|
574
605
|
conversation_config.subscribed_max_prompt_size
|
|
@@ -576,6 +607,7 @@ async def send_message_to_model_wrapper(
|
|
|
576
607
|
else conversation_config.max_prompt_size
|
|
577
608
|
)
|
|
578
609
|
tokenizer = conversation_config.tokenizer
|
|
610
|
+
vision_available = conversation_config.vision_enabled
|
|
579
611
|
|
|
580
612
|
if conversation_config.model_type == "offline":
|
|
581
613
|
if state.offline_chat_processor_config is None or state.offline_chat_processor_config.loaded_model is None:
|
|
@@ -589,6 +621,7 @@ async def send_message_to_model_wrapper(
|
|
|
589
621
|
loaded_model=loaded_model,
|
|
590
622
|
tokenizer_name=tokenizer,
|
|
591
623
|
max_prompt_size=max_tokens,
|
|
624
|
+
vision_enabled=vision_available,
|
|
592
625
|
)
|
|
593
626
|
|
|
594
627
|
return send_message_to_model_offline(
|
|
@@ -609,6 +642,8 @@ async def send_message_to_model_wrapper(
|
|
|
609
642
|
model_name=chat_model,
|
|
610
643
|
max_prompt_size=max_tokens,
|
|
611
644
|
tokenizer_name=tokenizer,
|
|
645
|
+
vision_enabled=vision_available,
|
|
646
|
+
uploaded_image_url=uploaded_image_url,
|
|
612
647
|
)
|
|
613
648
|
|
|
614
649
|
openai_response = send_message_to_model(
|
|
@@ -628,6 +663,7 @@ async def send_message_to_model_wrapper(
|
|
|
628
663
|
model_name=chat_model,
|
|
629
664
|
max_prompt_size=max_tokens,
|
|
630
665
|
tokenizer_name=tokenizer,
|
|
666
|
+
vision_enabled=vision_available,
|
|
631
667
|
)
|
|
632
668
|
|
|
633
669
|
return anthropic_send_message_to_model(
|
|
@@ -651,6 +687,7 @@ def send_message_to_model_wrapper_sync(
|
|
|
651
687
|
|
|
652
688
|
chat_model = conversation_config.chat_model
|
|
653
689
|
max_tokens = conversation_config.max_prompt_size
|
|
690
|
+
vision_available = conversation_config.vision_enabled
|
|
654
691
|
|
|
655
692
|
if conversation_config.model_type == "offline":
|
|
656
693
|
if state.offline_chat_processor_config is None or state.offline_chat_processor_config.loaded_model is None:
|
|
@@ -658,7 +695,11 @@ def send_message_to_model_wrapper_sync(
|
|
|
658
695
|
|
|
659
696
|
loaded_model = state.offline_chat_processor_config.loaded_model
|
|
660
697
|
truncated_messages = generate_chatml_messages_with_context(
|
|
661
|
-
user_message=message,
|
|
698
|
+
user_message=message,
|
|
699
|
+
system_message=system_message,
|
|
700
|
+
model_name=chat_model,
|
|
701
|
+
loaded_model=loaded_model,
|
|
702
|
+
vision_enabled=vision_available,
|
|
662
703
|
)
|
|
663
704
|
|
|
664
705
|
return send_message_to_model_offline(
|
|
@@ -672,7 +713,10 @@ def send_message_to_model_wrapper_sync(
|
|
|
672
713
|
elif conversation_config.model_type == "openai":
|
|
673
714
|
api_key = conversation_config.openai_config.api_key
|
|
674
715
|
truncated_messages = generate_chatml_messages_with_context(
|
|
675
|
-
user_message=message,
|
|
716
|
+
user_message=message,
|
|
717
|
+
system_message=system_message,
|
|
718
|
+
model_name=chat_model,
|
|
719
|
+
vision_enabled=vision_available,
|
|
676
720
|
)
|
|
677
721
|
|
|
678
722
|
openai_response = send_message_to_model(
|
|
@@ -688,6 +732,7 @@ def send_message_to_model_wrapper_sync(
|
|
|
688
732
|
system_message=system_message,
|
|
689
733
|
model_name=chat_model,
|
|
690
734
|
max_prompt_size=max_tokens,
|
|
735
|
+
vision_enabled=vision_available,
|
|
691
736
|
)
|
|
692
737
|
|
|
693
738
|
return anthropic_send_message_to_model(
|
|
@@ -712,6 +757,7 @@ def generate_chat_response(
|
|
|
712
757
|
conversation_id: int = None,
|
|
713
758
|
location_data: LocationData = None,
|
|
714
759
|
user_name: Optional[str] = None,
|
|
760
|
+
uploaded_image_url: Optional[str] = None,
|
|
715
761
|
) -> Tuple[Union[ThreadedGenerator, Iterator[str]], Dict[str, str]]:
|
|
716
762
|
# Initialize Variables
|
|
717
763
|
chat_response = None
|
|
@@ -719,7 +765,6 @@ def generate_chat_response(
|
|
|
719
765
|
|
|
720
766
|
metadata = {}
|
|
721
767
|
agent = AgentAdapters.get_conversation_agent_by_id(conversation.agent.id) if conversation.agent else None
|
|
722
|
-
|
|
723
768
|
try:
|
|
724
769
|
partial_completion = partial(
|
|
725
770
|
save_to_conversation_log,
|
|
@@ -731,9 +776,17 @@ def generate_chat_response(
|
|
|
731
776
|
inferred_queries=inferred_queries,
|
|
732
777
|
client_application=client_application,
|
|
733
778
|
conversation_id=conversation_id,
|
|
779
|
+
uploaded_image_url=uploaded_image_url,
|
|
734
780
|
)
|
|
735
781
|
|
|
736
782
|
conversation_config = ConversationAdapters.get_valid_conversation_config(user, conversation)
|
|
783
|
+
vision_available = conversation_config.vision_enabled
|
|
784
|
+
if not vision_available and uploaded_image_url:
|
|
785
|
+
vision_enabled_config = ConversationAdapters.get_vision_enabled_config()
|
|
786
|
+
if vision_enabled_config:
|
|
787
|
+
conversation_config = vision_enabled_config
|
|
788
|
+
vision_available = True
|
|
789
|
+
|
|
737
790
|
if conversation_config.model_type == "offline":
|
|
738
791
|
loaded_model = state.offline_chat_processor_config.loaded_model
|
|
739
792
|
chat_response = converse_offline(
|
|
@@ -759,6 +812,7 @@ def generate_chat_response(
|
|
|
759
812
|
chat_response = converse(
|
|
760
813
|
compiled_references,
|
|
761
814
|
q,
|
|
815
|
+
image_url=uploaded_image_url,
|
|
762
816
|
online_results=online_results,
|
|
763
817
|
conversation_log=meta_log,
|
|
764
818
|
model=chat_model,
|
|
@@ -771,6 +825,7 @@ def generate_chat_response(
|
|
|
771
825
|
location_data=location_data,
|
|
772
826
|
user_name=user_name,
|
|
773
827
|
agent=agent,
|
|
828
|
+
vision_available=vision_available,
|
|
774
829
|
)
|
|
775
830
|
|
|
776
831
|
elif conversation_config.model_type == "anthropic":
|
|
@@ -809,6 +864,7 @@ async def text_to_image(
|
|
|
809
864
|
online_results: Dict[str, Any],
|
|
810
865
|
subscribed: bool = False,
|
|
811
866
|
send_status_func: Optional[Callable] = None,
|
|
867
|
+
uploaded_image_url: Optional[str] = None,
|
|
812
868
|
):
|
|
813
869
|
status_code = 200
|
|
814
870
|
image = None
|
|
@@ -845,6 +901,7 @@ async def text_to_image(
|
|
|
845
901
|
online_results=online_results,
|
|
846
902
|
model_type=text_to_image_config.model_type,
|
|
847
903
|
subscribed=subscribed,
|
|
904
|
+
uploaded_image_url=uploaded_image_url,
|
|
848
905
|
)
|
|
849
906
|
|
|
850
907
|
if send_status_func:
|
|
@@ -908,13 +965,7 @@ async def text_to_image(
|
|
|
908
965
|
|
|
909
966
|
with timer("Convert image to webp", logger):
|
|
910
967
|
# Convert png to webp for faster loading
|
|
911
|
-
|
|
912
|
-
png_image = Image.open(image_io)
|
|
913
|
-
webp_image_io = io.BytesIO()
|
|
914
|
-
png_image.save(webp_image_io, "WEBP")
|
|
915
|
-
webp_image_bytes = webp_image_io.getvalue()
|
|
916
|
-
webp_image_io.close()
|
|
917
|
-
image_io.close()
|
|
968
|
+
webp_image_bytes = convert_image_to_webp(decoded_image)
|
|
918
969
|
|
|
919
970
|
with timer("Upload image to S3", logger):
|
|
920
971
|
image_url = upload_image(webp_image_bytes, user.uuid)
|
|
@@ -1095,6 +1146,7 @@ def should_notify(original_query: str, executed_query: str, ai_response: str) ->
|
|
|
1095
1146
|
|
|
1096
1147
|
with timer("Chat actor: Decide to notify user of automation response", logger):
|
|
1097
1148
|
try:
|
|
1149
|
+
# TODO Replace with async call so we don't have to maintain a sync version
|
|
1098
1150
|
response = send_message_to_model_wrapper_sync(to_notify_or_not)
|
|
1099
1151
|
should_notify_result = "no" not in response.lower()
|
|
1100
1152
|
logger.info(f'Decided to {"not " if not should_notify_result else ""}notify user of automation response.')
|
khoj/routers/storage.py
CHANGED
|
@@ -33,3 +33,31 @@ def upload_image(image: bytes, user_id: uuid.UUID):
|
|
|
33
33
|
except Exception as e:
|
|
34
34
|
logger.error(f"Failed to upload image to S3: {e}")
|
|
35
35
|
return None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
AWS_USER_UPLOADED_IMAGES_BUCKET_NAME = os.getenv("AWS_USER_UPLOADED_IMAGES_BUCKET_NAME")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def upload_image_to_bucket(image: bytes, user_id: uuid.UUID):
|
|
42
|
+
"""Upload the image to the S3 bucket"""
|
|
43
|
+
if not aws_enabled:
|
|
44
|
+
logger.info("AWS is not enabled. Skipping image upload")
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
image_key = f"{user_id}/{uuid.uuid4()}.webp"
|
|
48
|
+
if not AWS_USER_UPLOADED_IMAGES_BUCKET_NAME:
|
|
49
|
+
logger.error("AWS_USER_UPLOADED_IMAGES_BUCKET_NAME is not set")
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
s3_client.put_object(
|
|
54
|
+
Bucket=AWS_USER_UPLOADED_IMAGES_BUCKET_NAME,
|
|
55
|
+
Key=image_key,
|
|
56
|
+
Body=image,
|
|
57
|
+
ACL="public-read",
|
|
58
|
+
ContentType="image/webp",
|
|
59
|
+
)
|
|
60
|
+
return f"https://{AWS_USER_UPLOADED_IMAGES_BUCKET_NAME}/{image_key}"
|
|
61
|
+
except Exception as e:
|
|
62
|
+
logger.error(f"Failed to upload image to S3: {e}")
|
|
63
|
+
return None
|