khoj 1.21.6.dev13__py3-none-any.whl → 1.21.7.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/database/adapters/__init__.py +12 -0
- khoj/database/migrations/0056_chatmodeloptions_vision_enabled.py +17 -0
- khoj/database/migrations/0057_merge_20240816_1409.py +13 -0
- khoj/database/migrations/0060_merge_20240905_1828.py +14 -0
- khoj/database/models/__init__.py +1 -0
- khoj/interface/compiled/404/index.html +1 -1
- khoj/interface/compiled/_next/static/chunks/{3062-a42d847c919a9ea4.js → 3062-9be9a4e34f82ed3a.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/3678-0732dd9d2f472171.js +25 -0
- khoj/interface/compiled/_next/static/chunks/8423-ee6746f47901db2f.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9001-3b27af6d5f21df44.js +21 -0
- khoj/interface/compiled/_next/static/chunks/9162-0be016519a18568b.js +11 -0
- khoj/interface/compiled/_next/static/chunks/{9178-d23cb0dbee40a775.js → 9178-3a0baad1c172d515.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{9693-91b03052c5cabded.js → 9984-e410179c6fac7cf1.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/{page-3c01900e7b5c7e50.js → page-462502107217be82.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/{page-6ea3381528603372.js → page-e30a75db8719f439.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-ed970e05064ff12c.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-693fe53982bf33e1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/page-c26f689e39b400ba.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/{page-fa15807b1ad7e30b.js → page-0798bb43c2e368bf.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{page-1a2acc46cdabaf4a.js → page-f518555f8e2fd794.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/page-ad9d2e9787eed215.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{webpack-2651a68f46ac3cb7.js → webpack-95ea8d2b149d6bad.js} +1 -1
- khoj/interface/compiled/_next/static/css/2a860030cf7c384b.css +1 -0
- khoj/interface/compiled/_next/static/css/4cae6c0e5c72fb2d.css +1 -0
- khoj/interface/compiled/_next/static/css/5a400c87d295e68a.css +1 -0
- khoj/interface/compiled/_next/static/css/76db8c247950117c.css +25 -0
- khoj/interface/compiled/_next/static/css/c808691c459e3887.css +1 -0
- khoj/interface/compiled/agents/index.html +1 -1
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +1 -1
- khoj/interface/compiled/automations/index.txt +3 -3
- khoj/interface/compiled/chat/index.html +1 -1
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/factchecker/index.html +1 -1
- khoj/interface/compiled/factchecker/index.txt +2 -2
- khoj/interface/compiled/index.html +1 -1
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +1 -1
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +1 -1
- khoj/interface/compiled/settings/index.txt +3 -3
- khoj/interface/compiled/share/chat/index.html +1 -1
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/interface/email/welcome.html +1 -1
- khoj/processor/conversation/openai/gpt.py +4 -0
- khoj/processor/conversation/utils.py +31 -13
- khoj/processor/tools/online_search.py +6 -2
- khoj/routers/api_chat.py +41 -8
- khoj/routers/helpers.py +73 -21
- khoj/routers/storage.py +28 -0
- khoj/utils/helpers.py +15 -0
- {khoj-1.21.6.dev13.dist-info → khoj-1.21.7.dev1.dist-info}/METADATA +1 -1
- {khoj-1.21.6.dev13.dist-info → khoj-1.21.7.dev1.dist-info}/RECORD +58 -55
- khoj/interface/compiled/_next/static/chunks/3678-8c0e55c3b5d83a22.js +0 -25
- khoj/interface/compiled/_next/static/chunks/8423-132ea64eac83fd43.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9001-acbca3e19b1a5ddf.js +0 -21
- khoj/interface/compiled/_next/static/chunks/9162-4a6d0d0dc5e27618.js +0 -11
- khoj/interface/compiled/_next/static/chunks/app/chat/page-c2ebc47a09abc8ae.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-6ca723a9ff0dfd70.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/page-d403fc59c9c3f8cc.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/page-0ae8f5b868af65c1.js +0 -1
- khoj/interface/compiled/_next/static/css/2bfe35fbe2c97a56.css +0 -1
- khoj/interface/compiled/_next/static/css/9d5b867ec04494a6.css +0 -25
- khoj/interface/compiled/_next/static/css/a22d83f18a32957e.css +0 -1
- khoj/interface/compiled/_next/static/css/a3530ec58b0b660f.css +0 -1
- khoj/interface/compiled/_next/static/css/b81e909d403fb2df.css +0 -1
- /khoj/interface/compiled/_next/static/{8iiVOXR-wgRrEcg-Q3shk → ZZXQatJ9SszXKA3rhPWXF}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{8iiVOXR-wgRrEcg-Q3shk → ZZXQatJ9SszXKA3rhPWXF}/_ssgManifest.js +0 -0
- {khoj-1.21.6.dev13.dist-info → khoj-1.21.7.dev1.dist-info}/WHEEL +0 -0
- {khoj-1.21.6.dev13.dist-info → khoj-1.21.7.dev1.dist-info}/entry_points.txt +0 -0
- {khoj-1.21.6.dev13.dist-info → khoj-1.21.7.dev1.dist-info}/licenses/LICENSE +0 -0
khoj/routers/api_chat.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import base64
|
|
2
3
|
import json
|
|
3
4
|
import logging
|
|
4
5
|
import time
|
|
@@ -46,11 +47,13 @@ from khoj.routers.helpers import (
|
|
|
46
47
|
update_telemetry_state,
|
|
47
48
|
validate_conversation_config,
|
|
48
49
|
)
|
|
50
|
+
from khoj.routers.storage import upload_image_to_bucket
|
|
49
51
|
from khoj.utils import state
|
|
50
52
|
from khoj.utils.helpers import (
|
|
51
53
|
AsyncIteratorWrapper,
|
|
52
54
|
ConversationCommand,
|
|
53
55
|
command_descriptions,
|
|
56
|
+
convert_image_to_webp,
|
|
54
57
|
get_device,
|
|
55
58
|
is_none_or_empty,
|
|
56
59
|
)
|
|
@@ -517,7 +520,11 @@ async def set_conversation_title(
|
|
|
517
520
|
)
|
|
518
521
|
|
|
519
522
|
|
|
520
|
-
|
|
523
|
+
class ImageUploadObject(BaseModel):
|
|
524
|
+
image: str
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
@api_chat.post("")
|
|
521
528
|
@requires(["authenticated"])
|
|
522
529
|
async def chat(
|
|
523
530
|
request: Request,
|
|
@@ -532,6 +539,7 @@ async def chat(
|
|
|
532
539
|
region: Optional[str] = None,
|
|
533
540
|
country: Optional[str] = None,
|
|
534
541
|
timezone: Optional[str] = None,
|
|
542
|
+
image: Optional[ImageUploadObject] = None,
|
|
535
543
|
rate_limiter_per_minute=Depends(
|
|
536
544
|
ApiUserRateLimiter(requests=60, subscribed_requests=60, window=60, slug="chat_minute")
|
|
537
545
|
),
|
|
@@ -539,7 +547,7 @@ async def chat(
|
|
|
539
547
|
ApiUserRateLimiter(requests=600, subscribed_requests=600, window=60 * 60 * 24, slug="chat_day")
|
|
540
548
|
),
|
|
541
549
|
):
|
|
542
|
-
async def event_generator(q: str):
|
|
550
|
+
async def event_generator(q: str, image: ImageUploadObject):
|
|
543
551
|
start_time = time.perf_counter()
|
|
544
552
|
ttft = None
|
|
545
553
|
chat_metadata: dict = {}
|
|
@@ -550,6 +558,17 @@ async def chat(
|
|
|
550
558
|
q = unquote(q)
|
|
551
559
|
nonlocal conversation_id
|
|
552
560
|
|
|
561
|
+
uploaded_image_url = None
|
|
562
|
+
if image:
|
|
563
|
+
decoded_string = unquote(image.image)
|
|
564
|
+
base64_data = decoded_string.split(",", 1)[1]
|
|
565
|
+
image_bytes = base64.b64decode(base64_data)
|
|
566
|
+
webp_image_bytes = convert_image_to_webp(image_bytes)
|
|
567
|
+
try:
|
|
568
|
+
uploaded_image_url = upload_image_to_bucket(webp_image_bytes, request.user.object.id)
|
|
569
|
+
except:
|
|
570
|
+
uploaded_image_url = None
|
|
571
|
+
|
|
553
572
|
async def send_event(event_type: ChatEvent, data: str | dict):
|
|
554
573
|
nonlocal connection_alive, ttft
|
|
555
574
|
if not connection_alive or await request.is_disconnected():
|
|
@@ -637,7 +656,7 @@ async def chat(
|
|
|
637
656
|
|
|
638
657
|
if conversation_commands == [ConversationCommand.Default] or is_automated_task:
|
|
639
658
|
conversation_commands = await aget_relevant_information_sources(
|
|
640
|
-
q, meta_log, is_automated_task, subscribed=subscribed
|
|
659
|
+
q, meta_log, is_automated_task, subscribed=subscribed, uploaded_image_url=uploaded_image_url
|
|
641
660
|
)
|
|
642
661
|
conversation_commands_str = ", ".join([cmd.value for cmd in conversation_commands])
|
|
643
662
|
async for result in send_event(
|
|
@@ -645,7 +664,7 @@ async def chat(
|
|
|
645
664
|
):
|
|
646
665
|
yield result
|
|
647
666
|
|
|
648
|
-
mode = await aget_relevant_output_modes(q, meta_log, is_automated_task)
|
|
667
|
+
mode = await aget_relevant_output_modes(q, meta_log, is_automated_task, uploaded_image_url)
|
|
649
668
|
async for result in send_event(ChatEvent.STATUS, f"**Decided Response Mode:** {mode.value}"):
|
|
650
669
|
yield result
|
|
651
670
|
if mode not in conversation_commands:
|
|
@@ -693,7 +712,9 @@ async def chat(
|
|
|
693
712
|
):
|
|
694
713
|
yield result
|
|
695
714
|
|
|
696
|
-
response = await extract_relevant_summary(
|
|
715
|
+
response = await extract_relevant_summary(
|
|
716
|
+
q, contextual_data, subscribed=subscribed, uploaded_image_url=uploaded_image_url
|
|
717
|
+
)
|
|
697
718
|
response_log = str(response)
|
|
698
719
|
async for result in send_llm_response(response_log):
|
|
699
720
|
yield result
|
|
@@ -711,6 +732,7 @@ async def chat(
|
|
|
711
732
|
intent_type="summarize",
|
|
712
733
|
client_application=request.user.client_app,
|
|
713
734
|
conversation_id=conversation_id,
|
|
735
|
+
uploaded_image_url=uploaded_image_url,
|
|
714
736
|
)
|
|
715
737
|
return
|
|
716
738
|
|
|
@@ -753,6 +775,7 @@ async def chat(
|
|
|
753
775
|
conversation_id=conversation_id,
|
|
754
776
|
inferred_queries=[query_to_run],
|
|
755
777
|
automation_id=automation.id,
|
|
778
|
+
uploaded_image_url=uploaded_image_url,
|
|
756
779
|
)
|
|
757
780
|
async for result in send_llm_response(llm_response):
|
|
758
781
|
yield result
|
|
@@ -807,6 +830,7 @@ async def chat(
|
|
|
807
830
|
subscribed,
|
|
808
831
|
partial(send_event, ChatEvent.STATUS),
|
|
809
832
|
custom_filters,
|
|
833
|
+
uploaded_image_url=uploaded_image_url,
|
|
810
834
|
):
|
|
811
835
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
|
812
836
|
yield result[ChatEvent.STATUS]
|
|
@@ -823,7 +847,13 @@ async def chat(
|
|
|
823
847
|
if ConversationCommand.Webpage in conversation_commands:
|
|
824
848
|
try:
|
|
825
849
|
async for result in read_webpages(
|
|
826
|
-
defiltered_query,
|
|
850
|
+
defiltered_query,
|
|
851
|
+
meta_log,
|
|
852
|
+
location,
|
|
853
|
+
user,
|
|
854
|
+
subscribed,
|
|
855
|
+
partial(send_event, ChatEvent.STATUS),
|
|
856
|
+
uploaded_image_url=uploaded_image_url,
|
|
827
857
|
):
|
|
828
858
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
|
829
859
|
yield result[ChatEvent.STATUS]
|
|
@@ -869,6 +899,7 @@ async def chat(
|
|
|
869
899
|
online_results=online_results,
|
|
870
900
|
subscribed=subscribed,
|
|
871
901
|
send_status_func=partial(send_event, ChatEvent.STATUS),
|
|
902
|
+
uploaded_image_url=uploaded_image_url,
|
|
872
903
|
):
|
|
873
904
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
|
874
905
|
yield result[ChatEvent.STATUS]
|
|
@@ -898,6 +929,7 @@ async def chat(
|
|
|
898
929
|
conversation_id=conversation_id,
|
|
899
930
|
compiled_references=compiled_references,
|
|
900
931
|
online_results=online_results,
|
|
932
|
+
uploaded_image_url=uploaded_image_url,
|
|
901
933
|
)
|
|
902
934
|
content_obj = {
|
|
903
935
|
"intentType": intent_type,
|
|
@@ -924,6 +956,7 @@ async def chat(
|
|
|
924
956
|
conversation_id,
|
|
925
957
|
location,
|
|
926
958
|
user_name,
|
|
959
|
+
uploaded_image_url,
|
|
927
960
|
)
|
|
928
961
|
|
|
929
962
|
# Send Response
|
|
@@ -949,9 +982,9 @@ async def chat(
|
|
|
949
982
|
|
|
950
983
|
## Stream Text Response
|
|
951
984
|
if stream:
|
|
952
|
-
return StreamingResponse(event_generator(q), media_type="text/plain")
|
|
985
|
+
return StreamingResponse(event_generator(q, image=image), media_type="text/plain")
|
|
953
986
|
## Non-Streaming Text Response
|
|
954
987
|
else:
|
|
955
|
-
response_iterator = event_generator(q)
|
|
988
|
+
response_iterator = event_generator(q, image=image)
|
|
956
989
|
response_data = await read_chat_stream(response_iterator)
|
|
957
990
|
return Response(content=json.dumps(response_data), media_type="application/json", status_code=200)
|
khoj/routers/helpers.py
CHANGED
|
@@ -97,6 +97,7 @@ from khoj.utils.helpers import (
|
|
|
97
97
|
LRU,
|
|
98
98
|
ConversationCommand,
|
|
99
99
|
ImageIntentType,
|
|
100
|
+
convert_image_to_webp,
|
|
100
101
|
is_none_or_empty,
|
|
101
102
|
is_valid_url,
|
|
102
103
|
log_telemetry,
|
|
@@ -252,7 +253,9 @@ async def acreate_title_from_query(query: str) -> str:
|
|
|
252
253
|
return response.strip()
|
|
253
254
|
|
|
254
255
|
|
|
255
|
-
async def aget_relevant_information_sources(
|
|
256
|
+
async def aget_relevant_information_sources(
|
|
257
|
+
query: str, conversation_history: dict, is_task: bool, subscribed: bool, uploaded_image_url: str = None
|
|
258
|
+
):
|
|
256
259
|
"""
|
|
257
260
|
Given a query, determine which of the available tools the agent should use in order to answer appropriately.
|
|
258
261
|
"""
|
|
@@ -266,6 +269,9 @@ async def aget_relevant_information_sources(query: str, conversation_history: di
|
|
|
266
269
|
|
|
267
270
|
chat_history = construct_chat_history(conversation_history)
|
|
268
271
|
|
|
272
|
+
if uploaded_image_url:
|
|
273
|
+
query = f"[placeholder for image attached to this message]\n{query}"
|
|
274
|
+
|
|
269
275
|
relevant_tools_prompt = prompts.pick_relevant_information_collection_tools.format(
|
|
270
276
|
query=query,
|
|
271
277
|
tools=tool_options_str,
|
|
@@ -274,7 +280,9 @@ async def aget_relevant_information_sources(query: str, conversation_history: di
|
|
|
274
280
|
|
|
275
281
|
with timer("Chat actor: Infer information sources to refer", logger):
|
|
276
282
|
response = await send_message_to_model_wrapper(
|
|
277
|
-
relevant_tools_prompt,
|
|
283
|
+
relevant_tools_prompt,
|
|
284
|
+
response_type="json_object",
|
|
285
|
+
subscribed=subscribed,
|
|
278
286
|
)
|
|
279
287
|
|
|
280
288
|
try:
|
|
@@ -302,7 +310,9 @@ async def aget_relevant_information_sources(query: str, conversation_history: di
|
|
|
302
310
|
return [ConversationCommand.Default]
|
|
303
311
|
|
|
304
312
|
|
|
305
|
-
async def aget_relevant_output_modes(
|
|
313
|
+
async def aget_relevant_output_modes(
|
|
314
|
+
query: str, conversation_history: dict, is_task: bool = False, uploaded_image_url: str = None
|
|
315
|
+
):
|
|
306
316
|
"""
|
|
307
317
|
Given a query, determine which of the available tools the agent should use in order to answer appropriately.
|
|
308
318
|
"""
|
|
@@ -319,6 +329,9 @@ async def aget_relevant_output_modes(query: str, conversation_history: dict, is_
|
|
|
319
329
|
|
|
320
330
|
chat_history = construct_chat_history(conversation_history)
|
|
321
331
|
|
|
332
|
+
if uploaded_image_url:
|
|
333
|
+
query = f"[placeholder for image attached to this message]\n{query}"
|
|
334
|
+
|
|
322
335
|
relevant_mode_prompt = prompts.pick_relevant_output_mode.format(
|
|
323
336
|
query=query,
|
|
324
337
|
modes=mode_options_str,
|
|
@@ -347,7 +360,7 @@ async def aget_relevant_output_modes(query: str, conversation_history: dict, is_
|
|
|
347
360
|
|
|
348
361
|
|
|
349
362
|
async def infer_webpage_urls(
|
|
350
|
-
q: str, conversation_history: dict, location_data: LocationData, user: KhojUser
|
|
363
|
+
q: str, conversation_history: dict, location_data: LocationData, user: KhojUser, uploaded_image_url: str = None
|
|
351
364
|
) -> List[str]:
|
|
352
365
|
"""
|
|
353
366
|
Infer webpage links from the given query
|
|
@@ -366,7 +379,9 @@ async def infer_webpage_urls(
|
|
|
366
379
|
)
|
|
367
380
|
|
|
368
381
|
with timer("Chat actor: Infer webpage urls to read", logger):
|
|
369
|
-
response = await send_message_to_model_wrapper(
|
|
382
|
+
response = await send_message_to_model_wrapper(
|
|
383
|
+
online_queries_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object"
|
|
384
|
+
)
|
|
370
385
|
|
|
371
386
|
# Validate that the response is a non-empty, JSON-serializable list of URLs
|
|
372
387
|
try:
|
|
@@ -381,7 +396,7 @@ async def infer_webpage_urls(
|
|
|
381
396
|
|
|
382
397
|
|
|
383
398
|
async def generate_online_subqueries(
|
|
384
|
-
q: str, conversation_history: dict, location_data: LocationData, user: KhojUser
|
|
399
|
+
q: str, conversation_history: dict, location_data: LocationData, user: KhojUser, uploaded_image_url: str = None
|
|
385
400
|
) -> List[str]:
|
|
386
401
|
"""
|
|
387
402
|
Generate subqueries from the given query
|
|
@@ -400,7 +415,9 @@ async def generate_online_subqueries(
|
|
|
400
415
|
)
|
|
401
416
|
|
|
402
417
|
with timer("Chat actor: Generate online search subqueries", logger):
|
|
403
|
-
response = await send_message_to_model_wrapper(
|
|
418
|
+
response = await send_message_to_model_wrapper(
|
|
419
|
+
online_queries_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object"
|
|
420
|
+
)
|
|
404
421
|
|
|
405
422
|
# Validate that the response is a non-empty, JSON-serializable list
|
|
406
423
|
try:
|
|
@@ -419,7 +436,7 @@ async def generate_online_subqueries(
|
|
|
419
436
|
return [q]
|
|
420
437
|
|
|
421
438
|
|
|
422
|
-
async def schedule_query(q: str, conversation_history: dict) -> Tuple[str, ...]:
|
|
439
|
+
async def schedule_query(q: str, conversation_history: dict, uploaded_image_url: str = None) -> Tuple[str, ...]:
|
|
423
440
|
"""
|
|
424
441
|
Schedule the date, time to run the query. Assume the server timezone is UTC.
|
|
425
442
|
"""
|
|
@@ -430,7 +447,9 @@ async def schedule_query(q: str, conversation_history: dict) -> Tuple[str, ...]:
|
|
|
430
447
|
chat_history=chat_history,
|
|
431
448
|
)
|
|
432
449
|
|
|
433
|
-
raw_response = await send_message_to_model_wrapper(
|
|
450
|
+
raw_response = await send_message_to_model_wrapper(
|
|
451
|
+
crontime_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object"
|
|
452
|
+
)
|
|
434
453
|
|
|
435
454
|
# Validate that the response is a non-empty, JSON-serializable list
|
|
436
455
|
try:
|
|
@@ -468,7 +487,9 @@ async def extract_relevant_info(q: str, corpus: str, subscribed: bool) -> Union[
|
|
|
468
487
|
return response.strip()
|
|
469
488
|
|
|
470
489
|
|
|
471
|
-
async def extract_relevant_summary(
|
|
490
|
+
async def extract_relevant_summary(
|
|
491
|
+
q: str, corpus: str, subscribed: bool = False, uploaded_image_url: str = None
|
|
492
|
+
) -> Union[str, None]:
|
|
472
493
|
"""
|
|
473
494
|
Extract relevant information for a given query from the target corpus
|
|
474
495
|
"""
|
|
@@ -489,6 +510,7 @@ async def extract_relevant_summary(q: str, corpus: str, subscribed: bool = False
|
|
|
489
510
|
prompts.system_prompt_extract_relevant_summary,
|
|
490
511
|
chat_model_option=chat_model,
|
|
491
512
|
subscribed=subscribed,
|
|
513
|
+
uploaded_image_url=uploaded_image_url,
|
|
492
514
|
)
|
|
493
515
|
return response.strip()
|
|
494
516
|
|
|
@@ -501,6 +523,7 @@ async def generate_better_image_prompt(
|
|
|
501
523
|
online_results: Optional[dict] = None,
|
|
502
524
|
model_type: Optional[str] = None,
|
|
503
525
|
subscribed: bool = False,
|
|
526
|
+
uploaded_image_url: Optional[str] = None,
|
|
504
527
|
) -> str:
|
|
505
528
|
"""
|
|
506
529
|
Generate a better image prompt from the given query
|
|
@@ -549,7 +572,7 @@ async def generate_better_image_prompt(
|
|
|
549
572
|
|
|
550
573
|
with timer("Chat actor: Generate contextual image prompt", logger):
|
|
551
574
|
response = await send_message_to_model_wrapper(
|
|
552
|
-
image_prompt, chat_model_option=chat_model, subscribed=subscribed
|
|
575
|
+
image_prompt, chat_model_option=chat_model, subscribed=subscribed, uploaded_image_url=uploaded_image_url
|
|
553
576
|
)
|
|
554
577
|
response = response.strip()
|
|
555
578
|
if response.startswith(('"', "'")) and response.endswith(('"', "'")):
|
|
@@ -564,11 +587,19 @@ async def send_message_to_model_wrapper(
|
|
|
564
587
|
response_type: str = "text",
|
|
565
588
|
chat_model_option: ChatModelOptions = None,
|
|
566
589
|
subscribed: bool = False,
|
|
590
|
+
uploaded_image_url: str = None,
|
|
567
591
|
):
|
|
568
592
|
conversation_config: ChatModelOptions = (
|
|
569
593
|
chat_model_option or await ConversationAdapters.aget_default_conversation_config()
|
|
570
594
|
)
|
|
571
595
|
|
|
596
|
+
vision_available = conversation_config.vision_enabled
|
|
597
|
+
if not vision_available and uploaded_image_url:
|
|
598
|
+
vision_enabled_config = ConversationAdapters.get_vision_enabled_config()
|
|
599
|
+
if vision_enabled_config:
|
|
600
|
+
conversation_config = vision_enabled_config
|
|
601
|
+
vision_available = True
|
|
602
|
+
|
|
572
603
|
chat_model = conversation_config.chat_model
|
|
573
604
|
max_tokens = (
|
|
574
605
|
conversation_config.subscribed_max_prompt_size
|
|
@@ -576,6 +607,7 @@ async def send_message_to_model_wrapper(
|
|
|
576
607
|
else conversation_config.max_prompt_size
|
|
577
608
|
)
|
|
578
609
|
tokenizer = conversation_config.tokenizer
|
|
610
|
+
vision_available = conversation_config.vision_enabled
|
|
579
611
|
|
|
580
612
|
if conversation_config.model_type == "offline":
|
|
581
613
|
if state.offline_chat_processor_config is None or state.offline_chat_processor_config.loaded_model is None:
|
|
@@ -589,6 +621,7 @@ async def send_message_to_model_wrapper(
|
|
|
589
621
|
loaded_model=loaded_model,
|
|
590
622
|
tokenizer_name=tokenizer,
|
|
591
623
|
max_prompt_size=max_tokens,
|
|
624
|
+
vision_enabled=vision_available,
|
|
592
625
|
)
|
|
593
626
|
|
|
594
627
|
return send_message_to_model_offline(
|
|
@@ -609,6 +642,8 @@ async def send_message_to_model_wrapper(
|
|
|
609
642
|
model_name=chat_model,
|
|
610
643
|
max_prompt_size=max_tokens,
|
|
611
644
|
tokenizer_name=tokenizer,
|
|
645
|
+
vision_enabled=vision_available,
|
|
646
|
+
uploaded_image_url=uploaded_image_url,
|
|
612
647
|
)
|
|
613
648
|
|
|
614
649
|
openai_response = send_message_to_model(
|
|
@@ -628,6 +663,7 @@ async def send_message_to_model_wrapper(
|
|
|
628
663
|
model_name=chat_model,
|
|
629
664
|
max_prompt_size=max_tokens,
|
|
630
665
|
tokenizer_name=tokenizer,
|
|
666
|
+
vision_enabled=vision_available,
|
|
631
667
|
)
|
|
632
668
|
|
|
633
669
|
return anthropic_send_message_to_model(
|
|
@@ -651,6 +687,7 @@ def send_message_to_model_wrapper_sync(
|
|
|
651
687
|
|
|
652
688
|
chat_model = conversation_config.chat_model
|
|
653
689
|
max_tokens = conversation_config.max_prompt_size
|
|
690
|
+
vision_available = conversation_config.vision_enabled
|
|
654
691
|
|
|
655
692
|
if conversation_config.model_type == "offline":
|
|
656
693
|
if state.offline_chat_processor_config is None or state.offline_chat_processor_config.loaded_model is None:
|
|
@@ -658,7 +695,11 @@ def send_message_to_model_wrapper_sync(
|
|
|
658
695
|
|
|
659
696
|
loaded_model = state.offline_chat_processor_config.loaded_model
|
|
660
697
|
truncated_messages = generate_chatml_messages_with_context(
|
|
661
|
-
user_message=message,
|
|
698
|
+
user_message=message,
|
|
699
|
+
system_message=system_message,
|
|
700
|
+
model_name=chat_model,
|
|
701
|
+
loaded_model=loaded_model,
|
|
702
|
+
vision_enabled=vision_available,
|
|
662
703
|
)
|
|
663
704
|
|
|
664
705
|
return send_message_to_model_offline(
|
|
@@ -672,7 +713,10 @@ def send_message_to_model_wrapper_sync(
|
|
|
672
713
|
elif conversation_config.model_type == "openai":
|
|
673
714
|
api_key = conversation_config.openai_config.api_key
|
|
674
715
|
truncated_messages = generate_chatml_messages_with_context(
|
|
675
|
-
user_message=message,
|
|
716
|
+
user_message=message,
|
|
717
|
+
system_message=system_message,
|
|
718
|
+
model_name=chat_model,
|
|
719
|
+
vision_enabled=vision_available,
|
|
676
720
|
)
|
|
677
721
|
|
|
678
722
|
openai_response = send_message_to_model(
|
|
@@ -688,6 +732,7 @@ def send_message_to_model_wrapper_sync(
|
|
|
688
732
|
system_message=system_message,
|
|
689
733
|
model_name=chat_model,
|
|
690
734
|
max_prompt_size=max_tokens,
|
|
735
|
+
vision_enabled=vision_available,
|
|
691
736
|
)
|
|
692
737
|
|
|
693
738
|
return anthropic_send_message_to_model(
|
|
@@ -712,6 +757,7 @@ def generate_chat_response(
|
|
|
712
757
|
conversation_id: int = None,
|
|
713
758
|
location_data: LocationData = None,
|
|
714
759
|
user_name: Optional[str] = None,
|
|
760
|
+
uploaded_image_url: Optional[str] = None,
|
|
715
761
|
) -> Tuple[Union[ThreadedGenerator, Iterator[str]], Dict[str, str]]:
|
|
716
762
|
# Initialize Variables
|
|
717
763
|
chat_response = None
|
|
@@ -719,7 +765,6 @@ def generate_chat_response(
|
|
|
719
765
|
|
|
720
766
|
metadata = {}
|
|
721
767
|
agent = AgentAdapters.get_conversation_agent_by_id(conversation.agent.id) if conversation.agent else None
|
|
722
|
-
|
|
723
768
|
try:
|
|
724
769
|
partial_completion = partial(
|
|
725
770
|
save_to_conversation_log,
|
|
@@ -731,9 +776,17 @@ def generate_chat_response(
|
|
|
731
776
|
inferred_queries=inferred_queries,
|
|
732
777
|
client_application=client_application,
|
|
733
778
|
conversation_id=conversation_id,
|
|
779
|
+
uploaded_image_url=uploaded_image_url,
|
|
734
780
|
)
|
|
735
781
|
|
|
736
782
|
conversation_config = ConversationAdapters.get_valid_conversation_config(user, conversation)
|
|
783
|
+
vision_available = conversation_config.vision_enabled
|
|
784
|
+
if not vision_available and uploaded_image_url:
|
|
785
|
+
vision_enabled_config = ConversationAdapters.get_vision_enabled_config()
|
|
786
|
+
if vision_enabled_config:
|
|
787
|
+
conversation_config = vision_enabled_config
|
|
788
|
+
vision_available = True
|
|
789
|
+
|
|
737
790
|
if conversation_config.model_type == "offline":
|
|
738
791
|
loaded_model = state.offline_chat_processor_config.loaded_model
|
|
739
792
|
chat_response = converse_offline(
|
|
@@ -759,6 +812,7 @@ def generate_chat_response(
|
|
|
759
812
|
chat_response = converse(
|
|
760
813
|
compiled_references,
|
|
761
814
|
q,
|
|
815
|
+
image_url=uploaded_image_url,
|
|
762
816
|
online_results=online_results,
|
|
763
817
|
conversation_log=meta_log,
|
|
764
818
|
model=chat_model,
|
|
@@ -771,6 +825,7 @@ def generate_chat_response(
|
|
|
771
825
|
location_data=location_data,
|
|
772
826
|
user_name=user_name,
|
|
773
827
|
agent=agent,
|
|
828
|
+
vision_available=vision_available,
|
|
774
829
|
)
|
|
775
830
|
|
|
776
831
|
elif conversation_config.model_type == "anthropic":
|
|
@@ -809,6 +864,7 @@ async def text_to_image(
|
|
|
809
864
|
online_results: Dict[str, Any],
|
|
810
865
|
subscribed: bool = False,
|
|
811
866
|
send_status_func: Optional[Callable] = None,
|
|
867
|
+
uploaded_image_url: Optional[str] = None,
|
|
812
868
|
):
|
|
813
869
|
status_code = 200
|
|
814
870
|
image = None
|
|
@@ -845,6 +901,7 @@ async def text_to_image(
|
|
|
845
901
|
online_results=online_results,
|
|
846
902
|
model_type=text_to_image_config.model_type,
|
|
847
903
|
subscribed=subscribed,
|
|
904
|
+
uploaded_image_url=uploaded_image_url,
|
|
848
905
|
)
|
|
849
906
|
|
|
850
907
|
if send_status_func:
|
|
@@ -908,13 +965,7 @@ async def text_to_image(
|
|
|
908
965
|
|
|
909
966
|
with timer("Convert image to webp", logger):
|
|
910
967
|
# Convert png to webp for faster loading
|
|
911
|
-
|
|
912
|
-
png_image = Image.open(image_io)
|
|
913
|
-
webp_image_io = io.BytesIO()
|
|
914
|
-
png_image.save(webp_image_io, "WEBP")
|
|
915
|
-
webp_image_bytes = webp_image_io.getvalue()
|
|
916
|
-
webp_image_io.close()
|
|
917
|
-
image_io.close()
|
|
968
|
+
webp_image_bytes = convert_image_to_webp(decoded_image)
|
|
918
969
|
|
|
919
970
|
with timer("Upload image to S3", logger):
|
|
920
971
|
image_url = upload_image(webp_image_bytes, user.uuid)
|
|
@@ -1095,6 +1146,7 @@ def should_notify(original_query: str, executed_query: str, ai_response: str) ->
|
|
|
1095
1146
|
|
|
1096
1147
|
with timer("Chat actor: Decide to notify user of automation response", logger):
|
|
1097
1148
|
try:
|
|
1149
|
+
# TODO Replace with async call so we don't have to maintain a sync version
|
|
1098
1150
|
response = send_message_to_model_wrapper_sync(to_notify_or_not)
|
|
1099
1151
|
should_notify_result = "no" not in response.lower()
|
|
1100
1152
|
logger.info(f'Decided to {"not " if not should_notify_result else ""}notify user of automation response.')
|
khoj/routers/storage.py
CHANGED
|
@@ -33,3 +33,31 @@ def upload_image(image: bytes, user_id: uuid.UUID):
|
|
|
33
33
|
except Exception as e:
|
|
34
34
|
logger.error(f"Failed to upload image to S3: {e}")
|
|
35
35
|
return None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
AWS_USER_UPLOADED_IMAGES_BUCKET_NAME = os.getenv("AWS_USER_UPLOADED_IMAGES_BUCKET_NAME")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def upload_image_to_bucket(image: bytes, user_id: uuid.UUID):
|
|
42
|
+
"""Upload the image to the S3 bucket"""
|
|
43
|
+
if not aws_enabled:
|
|
44
|
+
logger.info("AWS is not enabled. Skipping image upload")
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
image_key = f"{user_id}/{uuid.uuid4()}.webp"
|
|
48
|
+
if not AWS_USER_UPLOADED_IMAGES_BUCKET_NAME:
|
|
49
|
+
logger.error("AWS_USER_UPLOADED_IMAGES_BUCKET_NAME is not set")
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
s3_client.put_object(
|
|
54
|
+
Bucket=AWS_USER_UPLOADED_IMAGES_BUCKET_NAME,
|
|
55
|
+
Key=image_key,
|
|
56
|
+
Body=image,
|
|
57
|
+
ACL="public-read",
|
|
58
|
+
ContentType="image/webp",
|
|
59
|
+
)
|
|
60
|
+
return f"https://{AWS_USER_UPLOADED_IMAGES_BUCKET_NAME}/{image_key}"
|
|
61
|
+
except Exception as e:
|
|
62
|
+
logger.error(f"Failed to upload image to S3: {e}")
|
|
63
|
+
return None
|
khoj/utils/helpers.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations # to avoid quoting type hints
|
|
2
2
|
|
|
3
3
|
import datetime
|
|
4
|
+
import io
|
|
4
5
|
import logging
|
|
5
6
|
import os
|
|
6
7
|
import platform
|
|
@@ -22,6 +23,7 @@ import requests
|
|
|
22
23
|
import torch
|
|
23
24
|
from asgiref.sync import sync_to_async
|
|
24
25
|
from magika import Magika
|
|
26
|
+
from PIL import Image
|
|
25
27
|
|
|
26
28
|
from khoj.utils import constants
|
|
27
29
|
|
|
@@ -416,3 +418,16 @@ def is_internet_connected():
|
|
|
416
418
|
return response.status_code == 200
|
|
417
419
|
except:
|
|
418
420
|
return False
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
def convert_image_to_webp(image_bytes):
|
|
424
|
+
"""Convert image bytes to webp format for faster loading"""
|
|
425
|
+
image_io = io.BytesIO(image_bytes)
|
|
426
|
+
with Image.open(image_io) as original_image:
|
|
427
|
+
webp_image_io = io.BytesIO()
|
|
428
|
+
original_image.save(webp_image_io, "WEBP")
|
|
429
|
+
|
|
430
|
+
# Encode the WebP image back to base64
|
|
431
|
+
webp_image_bytes = webp_image_io.getvalue()
|
|
432
|
+
webp_image_io.close()
|
|
433
|
+
return webp_image_bytes
|