khoj 1.21.6.dev14__py3-none-any.whl → 1.21.7.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. khoj/database/adapters/__init__.py +12 -0
  2. khoj/database/migrations/0056_chatmodeloptions_vision_enabled.py +17 -0
  3. khoj/database/migrations/0057_merge_20240816_1409.py +13 -0
  4. khoj/database/migrations/0060_merge_20240905_1828.py +14 -0
  5. khoj/database/models/__init__.py +1 -0
  6. khoj/interface/compiled/404/index.html +1 -1
  7. khoj/interface/compiled/_next/static/chunks/{3062-a42d847c919a9ea4.js → 3062-9be9a4e34f82ed3a.js} +1 -1
  8. khoj/interface/compiled/_next/static/chunks/3678-0732dd9d2f472171.js +25 -0
  9. khoj/interface/compiled/_next/static/chunks/8423-b6a61d82233d1a82.js +1 -0
  10. khoj/interface/compiled/_next/static/chunks/9001-3b27af6d5f21df44.js +21 -0
  11. khoj/interface/compiled/_next/static/chunks/9162-0be016519a18568b.js +11 -0
  12. khoj/interface/compiled/_next/static/chunks/{9178-d23cb0dbee40a775.js → 9178-3a0baad1c172d515.js} +1 -1
  13. khoj/interface/compiled/_next/static/chunks/{9693-91b03052c5cabded.js → 9984-e410179c6fac7cf1.js} +1 -1
  14. khoj/interface/compiled/_next/static/chunks/app/agents/{page-3c01900e7b5c7e50.js → page-462502107217be82.js} +1 -1
  15. khoj/interface/compiled/_next/static/chunks/app/automations/{page-6ea3381528603372.js → page-e30a75db8719f439.js} +1 -1
  16. khoj/interface/compiled/_next/static/chunks/app/chat/page-4bb4f2422f5ec5f2.js +1 -0
  17. khoj/interface/compiled/_next/static/chunks/app/factchecker/page-693fe53982bf33e1.js +1 -0
  18. khoj/interface/compiled/_next/static/chunks/app/page-c26f689e39b400ba.js +1 -0
  19. khoj/interface/compiled/_next/static/chunks/app/search/{page-fa15807b1ad7e30b.js → page-0798bb43c2e368bf.js} +1 -1
  20. khoj/interface/compiled/_next/static/chunks/app/settings/{page-1a2acc46cdabaf4a.js → page-f518555f8e2fd794.js} +1 -1
  21. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-1a639fb3f120fee6.js +1 -0
  22. khoj/interface/compiled/_next/static/chunks/{webpack-0f6d4805ea01afda.js → webpack-f52083d548d804fa.js} +1 -1
  23. khoj/interface/compiled/_next/static/css/2a860030cf7c384b.css +1 -0
  24. khoj/interface/compiled/_next/static/css/4cae6c0e5c72fb2d.css +1 -0
  25. khoj/interface/compiled/_next/static/css/5a400c87d295e68a.css +1 -0
  26. khoj/interface/compiled/_next/static/css/76db8c247950117c.css +25 -0
  27. khoj/interface/compiled/_next/static/css/c808691c459e3887.css +1 -0
  28. khoj/interface/compiled/agents/index.html +1 -1
  29. khoj/interface/compiled/agents/index.txt +2 -2
  30. khoj/interface/compiled/automations/index.html +1 -1
  31. khoj/interface/compiled/automations/index.txt +3 -3
  32. khoj/interface/compiled/chat/index.html +1 -1
  33. khoj/interface/compiled/chat/index.txt +2 -2
  34. khoj/interface/compiled/factchecker/index.html +1 -1
  35. khoj/interface/compiled/factchecker/index.txt +2 -2
  36. khoj/interface/compiled/index.html +1 -1
  37. khoj/interface/compiled/index.txt +2 -2
  38. khoj/interface/compiled/search/index.html +1 -1
  39. khoj/interface/compiled/search/index.txt +2 -2
  40. khoj/interface/compiled/settings/index.html +1 -1
  41. khoj/interface/compiled/settings/index.txt +3 -3
  42. khoj/interface/compiled/share/chat/index.html +1 -1
  43. khoj/interface/compiled/share/chat/index.txt +2 -2
  44. khoj/processor/conversation/openai/gpt.py +4 -0
  45. khoj/processor/conversation/utils.py +31 -13
  46. khoj/processor/tools/online_search.py +6 -2
  47. khoj/routers/api_chat.py +64 -18
  48. khoj/routers/helpers.py +73 -21
  49. khoj/routers/storage.py +28 -0
  50. khoj/utils/helpers.py +15 -0
  51. {khoj-1.21.6.dev14.dist-info → khoj-1.21.7.dev4.dist-info}/METADATA +1 -1
  52. {khoj-1.21.6.dev14.dist-info → khoj-1.21.7.dev4.dist-info}/RECORD +57 -54
  53. khoj/interface/compiled/_next/static/chunks/3678-8c0e55c3b5d83a22.js +0 -25
  54. khoj/interface/compiled/_next/static/chunks/8423-132ea64eac83fd43.js +0 -1
  55. khoj/interface/compiled/_next/static/chunks/9001-acbca3e19b1a5ddf.js +0 -21
  56. khoj/interface/compiled/_next/static/chunks/9162-4a6d0d0dc5e27618.js +0 -11
  57. khoj/interface/compiled/_next/static/chunks/app/chat/page-c2ebc47a09abc8ae.js +0 -1
  58. khoj/interface/compiled/_next/static/chunks/app/factchecker/page-6ca723a9ff0dfd70.js +0 -1
  59. khoj/interface/compiled/_next/static/chunks/app/page-d403fc59c9c3f8cc.js +0 -1
  60. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-0ae8f5b868af65c1.js +0 -1
  61. khoj/interface/compiled/_next/static/css/2bfe35fbe2c97a56.css +0 -1
  62. khoj/interface/compiled/_next/static/css/9d5b867ec04494a6.css +0 -25
  63. khoj/interface/compiled/_next/static/css/a22d83f18a32957e.css +0 -1
  64. khoj/interface/compiled/_next/static/css/a3530ec58b0b660f.css +0 -1
  65. khoj/interface/compiled/_next/static/css/b81e909d403fb2df.css +0 -1
  66. /khoj/interface/compiled/_next/static/{OHjya9xQJWrEMTXUfPYon → c64fbtnncuTwnoxHlRnk6}/_buildManifest.js +0 -0
  67. /khoj/interface/compiled/_next/static/{OHjya9xQJWrEMTXUfPYon → c64fbtnncuTwnoxHlRnk6}/_ssgManifest.js +0 -0
  68. {khoj-1.21.6.dev14.dist-info → khoj-1.21.7.dev4.dist-info}/WHEEL +0 -0
  69. {khoj-1.21.6.dev14.dist-info → khoj-1.21.7.dev4.dist-info}/entry_points.txt +0 -0
  70. {khoj-1.21.6.dev14.dist-info → khoj-1.21.7.dev4.dist-info}/licenses/LICENSE +0 -0
khoj/routers/api_chat.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import asyncio
2
+ import base64
2
3
  import json
3
4
  import logging
4
5
  import time
@@ -46,11 +47,13 @@ from khoj.routers.helpers import (
46
47
  update_telemetry_state,
47
48
  validate_conversation_config,
48
49
  )
50
+ from khoj.routers.storage import upload_image_to_bucket
49
51
  from khoj.utils import state
50
52
  from khoj.utils.helpers import (
51
53
  AsyncIteratorWrapper,
52
54
  ConversationCommand,
53
55
  command_descriptions,
56
+ convert_image_to_webp,
54
57
  get_device,
55
58
  is_none_or_empty,
56
59
  )
@@ -517,21 +520,26 @@ async def set_conversation_title(
517
520
  )
518
521
 
519
522
 
520
- @api_chat.get("")
523
+ class ChatRequestBody(BaseModel):
524
+ q: str
525
+ n: Optional[int] = 7
526
+ d: Optional[float] = None
527
+ stream: Optional[bool] = False
528
+ title: Optional[str] = None
529
+ conversation_id: Optional[int] = None
530
+ city: Optional[str] = None
531
+ region: Optional[str] = None
532
+ country: Optional[str] = None
533
+ timezone: Optional[str] = None
534
+ image: Optional[str] = None
535
+
536
+
537
+ @api_chat.post("")
521
538
  @requires(["authenticated"])
522
539
  async def chat(
523
540
  request: Request,
524
541
  common: CommonQueryParams,
525
- q: str,
526
- n: int = 7,
527
- d: float = None,
528
- stream: Optional[bool] = False,
529
- title: Optional[str] = None,
530
- conversation_id: Optional[int] = None,
531
- city: Optional[str] = None,
532
- region: Optional[str] = None,
533
- country: Optional[str] = None,
534
- timezone: Optional[str] = None,
542
+ body: ChatRequestBody,
535
543
  rate_limiter_per_minute=Depends(
536
544
  ApiUserRateLimiter(requests=60, subscribed_requests=60, window=60, slug="chat_minute")
537
545
  ),
@@ -539,7 +547,20 @@ async def chat(
539
547
  ApiUserRateLimiter(requests=600, subscribed_requests=600, window=60 * 60 * 24, slug="chat_day")
540
548
  ),
541
549
  ):
542
- async def event_generator(q: str):
550
+ # Access the parameters from the body
551
+ q = body.q
552
+ n = body.n
553
+ d = body.d
554
+ stream = body.stream
555
+ title = body.title
556
+ conversation_id = body.conversation_id
557
+ city = body.city
558
+ region = body.region
559
+ country = body.country
560
+ timezone = body.timezone
561
+ image = body.image
562
+
563
+ async def event_generator(q: str, image: str):
543
564
  start_time = time.perf_counter()
544
565
  ttft = None
545
566
  chat_metadata: dict = {}
@@ -550,6 +571,17 @@ async def chat(
550
571
  q = unquote(q)
551
572
  nonlocal conversation_id
552
573
 
574
+ uploaded_image_url = None
575
+ if image:
576
+ decoded_string = unquote(image)
577
+ base64_data = decoded_string.split(",", 1)[1]
578
+ image_bytes = base64.b64decode(base64_data)
579
+ webp_image_bytes = convert_image_to_webp(image_bytes)
580
+ try:
581
+ uploaded_image_url = upload_image_to_bucket(webp_image_bytes, request.user.object.id)
582
+ except:
583
+ uploaded_image_url = None
584
+
553
585
  async def send_event(event_type: ChatEvent, data: str | dict):
554
586
  nonlocal connection_alive, ttft
555
587
  if not connection_alive or await request.is_disconnected():
@@ -637,7 +669,7 @@ async def chat(
637
669
 
638
670
  if conversation_commands == [ConversationCommand.Default] or is_automated_task:
639
671
  conversation_commands = await aget_relevant_information_sources(
640
- q, meta_log, is_automated_task, subscribed=subscribed
672
+ q, meta_log, is_automated_task, subscribed=subscribed, uploaded_image_url=uploaded_image_url
641
673
  )
642
674
  conversation_commands_str = ", ".join([cmd.value for cmd in conversation_commands])
643
675
  async for result in send_event(
@@ -645,7 +677,7 @@ async def chat(
645
677
  ):
646
678
  yield result
647
679
 
648
- mode = await aget_relevant_output_modes(q, meta_log, is_automated_task)
680
+ mode = await aget_relevant_output_modes(q, meta_log, is_automated_task, uploaded_image_url)
649
681
  async for result in send_event(ChatEvent.STATUS, f"**Decided Response Mode:** {mode.value}"):
650
682
  yield result
651
683
  if mode not in conversation_commands:
@@ -693,7 +725,9 @@ async def chat(
693
725
  ):
694
726
  yield result
695
727
 
696
- response = await extract_relevant_summary(q, contextual_data, subscribed=subscribed)
728
+ response = await extract_relevant_summary(
729
+ q, contextual_data, subscribed=subscribed, uploaded_image_url=uploaded_image_url
730
+ )
697
731
  response_log = str(response)
698
732
  async for result in send_llm_response(response_log):
699
733
  yield result
@@ -711,6 +745,7 @@ async def chat(
711
745
  intent_type="summarize",
712
746
  client_application=request.user.client_app,
713
747
  conversation_id=conversation_id,
748
+ uploaded_image_url=uploaded_image_url,
714
749
  )
715
750
  return
716
751
 
@@ -753,6 +788,7 @@ async def chat(
753
788
  conversation_id=conversation_id,
754
789
  inferred_queries=[query_to_run],
755
790
  automation_id=automation.id,
791
+ uploaded_image_url=uploaded_image_url,
756
792
  )
757
793
  async for result in send_llm_response(llm_response):
758
794
  yield result
@@ -807,6 +843,7 @@ async def chat(
807
843
  subscribed,
808
844
  partial(send_event, ChatEvent.STATUS),
809
845
  custom_filters,
846
+ uploaded_image_url=uploaded_image_url,
810
847
  ):
811
848
  if isinstance(result, dict) and ChatEvent.STATUS in result:
812
849
  yield result[ChatEvent.STATUS]
@@ -823,7 +860,13 @@ async def chat(
823
860
  if ConversationCommand.Webpage in conversation_commands:
824
861
  try:
825
862
  async for result in read_webpages(
826
- defiltered_query, meta_log, location, user, subscribed, partial(send_event, ChatEvent.STATUS)
863
+ defiltered_query,
864
+ meta_log,
865
+ location,
866
+ user,
867
+ subscribed,
868
+ partial(send_event, ChatEvent.STATUS),
869
+ uploaded_image_url=uploaded_image_url,
827
870
  ):
828
871
  if isinstance(result, dict) and ChatEvent.STATUS in result:
829
872
  yield result[ChatEvent.STATUS]
@@ -869,6 +912,7 @@ async def chat(
869
912
  online_results=online_results,
870
913
  subscribed=subscribed,
871
914
  send_status_func=partial(send_event, ChatEvent.STATUS),
915
+ uploaded_image_url=uploaded_image_url,
872
916
  ):
873
917
  if isinstance(result, dict) and ChatEvent.STATUS in result:
874
918
  yield result[ChatEvent.STATUS]
@@ -898,6 +942,7 @@ async def chat(
898
942
  conversation_id=conversation_id,
899
943
  compiled_references=compiled_references,
900
944
  online_results=online_results,
945
+ uploaded_image_url=uploaded_image_url,
901
946
  )
902
947
  content_obj = {
903
948
  "intentType": intent_type,
@@ -924,6 +969,7 @@ async def chat(
924
969
  conversation_id,
925
970
  location,
926
971
  user_name,
972
+ uploaded_image_url,
927
973
  )
928
974
 
929
975
  # Send Response
@@ -949,9 +995,9 @@ async def chat(
949
995
 
950
996
  ## Stream Text Response
951
997
  if stream:
952
- return StreamingResponse(event_generator(q), media_type="text/plain")
998
+ return StreamingResponse(event_generator(q, image=image), media_type="text/plain")
953
999
  ## Non-Streaming Text Response
954
1000
  else:
955
- response_iterator = event_generator(q)
1001
+ response_iterator = event_generator(q, image=image)
956
1002
  response_data = await read_chat_stream(response_iterator)
957
1003
  return Response(content=json.dumps(response_data), media_type="application/json", status_code=200)
khoj/routers/helpers.py CHANGED
@@ -97,6 +97,7 @@ from khoj.utils.helpers import (
97
97
  LRU,
98
98
  ConversationCommand,
99
99
  ImageIntentType,
100
+ convert_image_to_webp,
100
101
  is_none_or_empty,
101
102
  is_valid_url,
102
103
  log_telemetry,
@@ -252,7 +253,9 @@ async def acreate_title_from_query(query: str) -> str:
252
253
  return response.strip()
253
254
 
254
255
 
255
- async def aget_relevant_information_sources(query: str, conversation_history: dict, is_task: bool, subscribed: bool):
256
+ async def aget_relevant_information_sources(
257
+ query: str, conversation_history: dict, is_task: bool, subscribed: bool, uploaded_image_url: str = None
258
+ ):
256
259
  """
257
260
  Given a query, determine which of the available tools the agent should use in order to answer appropriately.
258
261
  """
@@ -266,6 +269,9 @@ async def aget_relevant_information_sources(query: str, conversation_history: di
266
269
 
267
270
  chat_history = construct_chat_history(conversation_history)
268
271
 
272
+ if uploaded_image_url:
273
+ query = f"[placeholder for image attached to this message]\n{query}"
274
+
269
275
  relevant_tools_prompt = prompts.pick_relevant_information_collection_tools.format(
270
276
  query=query,
271
277
  tools=tool_options_str,
@@ -274,7 +280,9 @@ async def aget_relevant_information_sources(query: str, conversation_history: di
274
280
 
275
281
  with timer("Chat actor: Infer information sources to refer", logger):
276
282
  response = await send_message_to_model_wrapper(
277
- relevant_tools_prompt, response_type="json_object", subscribed=subscribed
283
+ relevant_tools_prompt,
284
+ response_type="json_object",
285
+ subscribed=subscribed,
278
286
  )
279
287
 
280
288
  try:
@@ -302,7 +310,9 @@ async def aget_relevant_information_sources(query: str, conversation_history: di
302
310
  return [ConversationCommand.Default]
303
311
 
304
312
 
305
- async def aget_relevant_output_modes(query: str, conversation_history: dict, is_task: bool = False):
313
+ async def aget_relevant_output_modes(
314
+ query: str, conversation_history: dict, is_task: bool = False, uploaded_image_url: str = None
315
+ ):
306
316
  """
307
317
  Given a query, determine which of the available tools the agent should use in order to answer appropriately.
308
318
  """
@@ -319,6 +329,9 @@ async def aget_relevant_output_modes(query: str, conversation_history: dict, is_
319
329
 
320
330
  chat_history = construct_chat_history(conversation_history)
321
331
 
332
+ if uploaded_image_url:
333
+ query = f"[placeholder for image attached to this message]\n{query}"
334
+
322
335
  relevant_mode_prompt = prompts.pick_relevant_output_mode.format(
323
336
  query=query,
324
337
  modes=mode_options_str,
@@ -347,7 +360,7 @@ async def aget_relevant_output_modes(query: str, conversation_history: dict, is_
347
360
 
348
361
 
349
362
  async def infer_webpage_urls(
350
- q: str, conversation_history: dict, location_data: LocationData, user: KhojUser
363
+ q: str, conversation_history: dict, location_data: LocationData, user: KhojUser, uploaded_image_url: str = None
351
364
  ) -> List[str]:
352
365
  """
353
366
  Infer webpage links from the given query
@@ -366,7 +379,9 @@ async def infer_webpage_urls(
366
379
  )
367
380
 
368
381
  with timer("Chat actor: Infer webpage urls to read", logger):
369
- response = await send_message_to_model_wrapper(online_queries_prompt, response_type="json_object")
382
+ response = await send_message_to_model_wrapper(
383
+ online_queries_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object"
384
+ )
370
385
 
371
386
  # Validate that the response is a non-empty, JSON-serializable list of URLs
372
387
  try:
@@ -381,7 +396,7 @@ async def infer_webpage_urls(
381
396
 
382
397
 
383
398
  async def generate_online_subqueries(
384
- q: str, conversation_history: dict, location_data: LocationData, user: KhojUser
399
+ q: str, conversation_history: dict, location_data: LocationData, user: KhojUser, uploaded_image_url: str = None
385
400
  ) -> List[str]:
386
401
  """
387
402
  Generate subqueries from the given query
@@ -400,7 +415,9 @@ async def generate_online_subqueries(
400
415
  )
401
416
 
402
417
  with timer("Chat actor: Generate online search subqueries", logger):
403
- response = await send_message_to_model_wrapper(online_queries_prompt, response_type="json_object")
418
+ response = await send_message_to_model_wrapper(
419
+ online_queries_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object"
420
+ )
404
421
 
405
422
  # Validate that the response is a non-empty, JSON-serializable list
406
423
  try:
@@ -419,7 +436,7 @@ async def generate_online_subqueries(
419
436
  return [q]
420
437
 
421
438
 
422
- async def schedule_query(q: str, conversation_history: dict) -> Tuple[str, ...]:
439
+ async def schedule_query(q: str, conversation_history: dict, uploaded_image_url: str = None) -> Tuple[str, ...]:
423
440
  """
424
441
  Schedule the date, time to run the query. Assume the server timezone is UTC.
425
442
  """
@@ -430,7 +447,9 @@ async def schedule_query(q: str, conversation_history: dict) -> Tuple[str, ...]:
430
447
  chat_history=chat_history,
431
448
  )
432
449
 
433
- raw_response = await send_message_to_model_wrapper(crontime_prompt, response_type="json_object")
450
+ raw_response = await send_message_to_model_wrapper(
451
+ crontime_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object"
452
+ )
434
453
 
435
454
  # Validate that the response is a non-empty, JSON-serializable list
436
455
  try:
@@ -468,7 +487,9 @@ async def extract_relevant_info(q: str, corpus: str, subscribed: bool) -> Union[
468
487
  return response.strip()
469
488
 
470
489
 
471
- async def extract_relevant_summary(q: str, corpus: str, subscribed: bool = False) -> Union[str, None]:
490
+ async def extract_relevant_summary(
491
+ q: str, corpus: str, subscribed: bool = False, uploaded_image_url: str = None
492
+ ) -> Union[str, None]:
472
493
  """
473
494
  Extract relevant information for a given query from the target corpus
474
495
  """
@@ -489,6 +510,7 @@ async def extract_relevant_summary(q: str, corpus: str, subscribed: bool = False
489
510
  prompts.system_prompt_extract_relevant_summary,
490
511
  chat_model_option=chat_model,
491
512
  subscribed=subscribed,
513
+ uploaded_image_url=uploaded_image_url,
492
514
  )
493
515
  return response.strip()
494
516
 
@@ -501,6 +523,7 @@ async def generate_better_image_prompt(
501
523
  online_results: Optional[dict] = None,
502
524
  model_type: Optional[str] = None,
503
525
  subscribed: bool = False,
526
+ uploaded_image_url: Optional[str] = None,
504
527
  ) -> str:
505
528
  """
506
529
  Generate a better image prompt from the given query
@@ -549,7 +572,7 @@ async def generate_better_image_prompt(
549
572
 
550
573
  with timer("Chat actor: Generate contextual image prompt", logger):
551
574
  response = await send_message_to_model_wrapper(
552
- image_prompt, chat_model_option=chat_model, subscribed=subscribed
575
+ image_prompt, chat_model_option=chat_model, subscribed=subscribed, uploaded_image_url=uploaded_image_url
553
576
  )
554
577
  response = response.strip()
555
578
  if response.startswith(('"', "'")) and response.endswith(('"', "'")):
@@ -564,11 +587,19 @@ async def send_message_to_model_wrapper(
564
587
  response_type: str = "text",
565
588
  chat_model_option: ChatModelOptions = None,
566
589
  subscribed: bool = False,
590
+ uploaded_image_url: str = None,
567
591
  ):
568
592
  conversation_config: ChatModelOptions = (
569
593
  chat_model_option or await ConversationAdapters.aget_default_conversation_config()
570
594
  )
571
595
 
596
+ vision_available = conversation_config.vision_enabled
597
+ if not vision_available and uploaded_image_url:
598
+ vision_enabled_config = ConversationAdapters.get_vision_enabled_config()
599
+ if vision_enabled_config:
600
+ conversation_config = vision_enabled_config
601
+ vision_available = True
602
+
572
603
  chat_model = conversation_config.chat_model
573
604
  max_tokens = (
574
605
  conversation_config.subscribed_max_prompt_size
@@ -576,6 +607,7 @@ async def send_message_to_model_wrapper(
576
607
  else conversation_config.max_prompt_size
577
608
  )
578
609
  tokenizer = conversation_config.tokenizer
610
+ vision_available = conversation_config.vision_enabled
579
611
 
580
612
  if conversation_config.model_type == "offline":
581
613
  if state.offline_chat_processor_config is None or state.offline_chat_processor_config.loaded_model is None:
@@ -589,6 +621,7 @@ async def send_message_to_model_wrapper(
589
621
  loaded_model=loaded_model,
590
622
  tokenizer_name=tokenizer,
591
623
  max_prompt_size=max_tokens,
624
+ vision_enabled=vision_available,
592
625
  )
593
626
 
594
627
  return send_message_to_model_offline(
@@ -609,6 +642,8 @@ async def send_message_to_model_wrapper(
609
642
  model_name=chat_model,
610
643
  max_prompt_size=max_tokens,
611
644
  tokenizer_name=tokenizer,
645
+ vision_enabled=vision_available,
646
+ uploaded_image_url=uploaded_image_url,
612
647
  )
613
648
 
614
649
  openai_response = send_message_to_model(
@@ -628,6 +663,7 @@ async def send_message_to_model_wrapper(
628
663
  model_name=chat_model,
629
664
  max_prompt_size=max_tokens,
630
665
  tokenizer_name=tokenizer,
666
+ vision_enabled=vision_available,
631
667
  )
632
668
 
633
669
  return anthropic_send_message_to_model(
@@ -651,6 +687,7 @@ def send_message_to_model_wrapper_sync(
651
687
 
652
688
  chat_model = conversation_config.chat_model
653
689
  max_tokens = conversation_config.max_prompt_size
690
+ vision_available = conversation_config.vision_enabled
654
691
 
655
692
  if conversation_config.model_type == "offline":
656
693
  if state.offline_chat_processor_config is None or state.offline_chat_processor_config.loaded_model is None:
@@ -658,7 +695,11 @@ def send_message_to_model_wrapper_sync(
658
695
 
659
696
  loaded_model = state.offline_chat_processor_config.loaded_model
660
697
  truncated_messages = generate_chatml_messages_with_context(
661
- user_message=message, system_message=system_message, model_name=chat_model, loaded_model=loaded_model
698
+ user_message=message,
699
+ system_message=system_message,
700
+ model_name=chat_model,
701
+ loaded_model=loaded_model,
702
+ vision_enabled=vision_available,
662
703
  )
663
704
 
664
705
  return send_message_to_model_offline(
@@ -672,7 +713,10 @@ def send_message_to_model_wrapper_sync(
672
713
  elif conversation_config.model_type == "openai":
673
714
  api_key = conversation_config.openai_config.api_key
674
715
  truncated_messages = generate_chatml_messages_with_context(
675
- user_message=message, system_message=system_message, model_name=chat_model
716
+ user_message=message,
717
+ system_message=system_message,
718
+ model_name=chat_model,
719
+ vision_enabled=vision_available,
676
720
  )
677
721
 
678
722
  openai_response = send_message_to_model(
@@ -688,6 +732,7 @@ def send_message_to_model_wrapper_sync(
688
732
  system_message=system_message,
689
733
  model_name=chat_model,
690
734
  max_prompt_size=max_tokens,
735
+ vision_enabled=vision_available,
691
736
  )
692
737
 
693
738
  return anthropic_send_message_to_model(
@@ -712,6 +757,7 @@ def generate_chat_response(
712
757
  conversation_id: int = None,
713
758
  location_data: LocationData = None,
714
759
  user_name: Optional[str] = None,
760
+ uploaded_image_url: Optional[str] = None,
715
761
  ) -> Tuple[Union[ThreadedGenerator, Iterator[str]], Dict[str, str]]:
716
762
  # Initialize Variables
717
763
  chat_response = None
@@ -719,7 +765,6 @@ def generate_chat_response(
719
765
 
720
766
  metadata = {}
721
767
  agent = AgentAdapters.get_conversation_agent_by_id(conversation.agent.id) if conversation.agent else None
722
-
723
768
  try:
724
769
  partial_completion = partial(
725
770
  save_to_conversation_log,
@@ -731,9 +776,17 @@ def generate_chat_response(
731
776
  inferred_queries=inferred_queries,
732
777
  client_application=client_application,
733
778
  conversation_id=conversation_id,
779
+ uploaded_image_url=uploaded_image_url,
734
780
  )
735
781
 
736
782
  conversation_config = ConversationAdapters.get_valid_conversation_config(user, conversation)
783
+ vision_available = conversation_config.vision_enabled
784
+ if not vision_available and uploaded_image_url:
785
+ vision_enabled_config = ConversationAdapters.get_vision_enabled_config()
786
+ if vision_enabled_config:
787
+ conversation_config = vision_enabled_config
788
+ vision_available = True
789
+
737
790
  if conversation_config.model_type == "offline":
738
791
  loaded_model = state.offline_chat_processor_config.loaded_model
739
792
  chat_response = converse_offline(
@@ -759,6 +812,7 @@ def generate_chat_response(
759
812
  chat_response = converse(
760
813
  compiled_references,
761
814
  q,
815
+ image_url=uploaded_image_url,
762
816
  online_results=online_results,
763
817
  conversation_log=meta_log,
764
818
  model=chat_model,
@@ -771,6 +825,7 @@ def generate_chat_response(
771
825
  location_data=location_data,
772
826
  user_name=user_name,
773
827
  agent=agent,
828
+ vision_available=vision_available,
774
829
  )
775
830
 
776
831
  elif conversation_config.model_type == "anthropic":
@@ -809,6 +864,7 @@ async def text_to_image(
809
864
  online_results: Dict[str, Any],
810
865
  subscribed: bool = False,
811
866
  send_status_func: Optional[Callable] = None,
867
+ uploaded_image_url: Optional[str] = None,
812
868
  ):
813
869
  status_code = 200
814
870
  image = None
@@ -845,6 +901,7 @@ async def text_to_image(
845
901
  online_results=online_results,
846
902
  model_type=text_to_image_config.model_type,
847
903
  subscribed=subscribed,
904
+ uploaded_image_url=uploaded_image_url,
848
905
  )
849
906
 
850
907
  if send_status_func:
@@ -908,13 +965,7 @@ async def text_to_image(
908
965
 
909
966
  with timer("Convert image to webp", logger):
910
967
  # Convert png to webp for faster loading
911
- image_io = io.BytesIO(decoded_image)
912
- png_image = Image.open(image_io)
913
- webp_image_io = io.BytesIO()
914
- png_image.save(webp_image_io, "WEBP")
915
- webp_image_bytes = webp_image_io.getvalue()
916
- webp_image_io.close()
917
- image_io.close()
968
+ webp_image_bytes = convert_image_to_webp(decoded_image)
918
969
 
919
970
  with timer("Upload image to S3", logger):
920
971
  image_url = upload_image(webp_image_bytes, user.uuid)
@@ -1095,6 +1146,7 @@ def should_notify(original_query: str, executed_query: str, ai_response: str) ->
1095
1146
 
1096
1147
  with timer("Chat actor: Decide to notify user of automation response", logger):
1097
1148
  try:
1149
+ # TODO Replace with async call so we don't have to maintain a sync version
1098
1150
  response = send_message_to_model_wrapper_sync(to_notify_or_not)
1099
1151
  should_notify_result = "no" not in response.lower()
1100
1152
  logger.info(f'Decided to {"not " if not should_notify_result else ""}notify user of automation response.')
khoj/routers/storage.py CHANGED
@@ -33,3 +33,31 @@ def upload_image(image: bytes, user_id: uuid.UUID):
33
33
  except Exception as e:
34
34
  logger.error(f"Failed to upload image to S3: {e}")
35
35
  return None
36
+
37
+
38
+ AWS_USER_UPLOADED_IMAGES_BUCKET_NAME = os.getenv("AWS_USER_UPLOADED_IMAGES_BUCKET_NAME")
39
+
40
+
41
+ def upload_image_to_bucket(image: bytes, user_id: uuid.UUID):
42
+ """Upload the image to the S3 bucket"""
43
+ if not aws_enabled:
44
+ logger.info("AWS is not enabled. Skipping image upload")
45
+ return None
46
+
47
+ image_key = f"{user_id}/{uuid.uuid4()}.webp"
48
+ if not AWS_USER_UPLOADED_IMAGES_BUCKET_NAME:
49
+ logger.error("AWS_USER_UPLOADED_IMAGES_BUCKET_NAME is not set")
50
+ return None
51
+
52
+ try:
53
+ s3_client.put_object(
54
+ Bucket=AWS_USER_UPLOADED_IMAGES_BUCKET_NAME,
55
+ Key=image_key,
56
+ Body=image,
57
+ ACL="public-read",
58
+ ContentType="image/webp",
59
+ )
60
+ return f"https://{AWS_USER_UPLOADED_IMAGES_BUCKET_NAME}/{image_key}"
61
+ except Exception as e:
62
+ logger.error(f"Failed to upload image to S3: {e}")
63
+ return None
khoj/utils/helpers.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations # to avoid quoting type hints
2
2
 
3
3
  import datetime
4
+ import io
4
5
  import logging
5
6
  import os
6
7
  import platform
@@ -22,6 +23,7 @@ import requests
22
23
  import torch
23
24
  from asgiref.sync import sync_to_async
24
25
  from magika import Magika
26
+ from PIL import Image
25
27
 
26
28
  from khoj.utils import constants
27
29
 
@@ -416,3 +418,16 @@ def is_internet_connected():
416
418
  return response.status_code == 200
417
419
  except:
418
420
  return False
421
+
422
+
423
+ def convert_image_to_webp(image_bytes):
424
+ """Convert image bytes to webp format for faster loading"""
425
+ image_io = io.BytesIO(image_bytes)
426
+ with Image.open(image_io) as original_image:
427
+ webp_image_io = io.BytesIO()
428
+ original_image.save(webp_image_io, "WEBP")
429
+
430
+ # Encode the WebP image back to base64
431
+ webp_image_bytes = webp_image_io.getvalue()
432
+ webp_image_io.close()
433
+ return webp_image_bytes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: khoj
3
- Version: 1.21.6.dev14
3
+ Version: 1.21.7.dev4
4
4
  Summary: Your Second Brain
5
5
  Project-URL: Homepage, https://khoj.dev
6
6
  Project-URL: Documentation, https://docs.khoj.dev