khoj 1.21.6.dev14__py3-none-any.whl → 1.21.7.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. khoj/database/adapters/__init__.py +12 -0
  2. khoj/database/migrations/0056_chatmodeloptions_vision_enabled.py +17 -0
  3. khoj/database/migrations/0057_merge_20240816_1409.py +13 -0
  4. khoj/database/migrations/0060_merge_20240905_1828.py +14 -0
  5. khoj/database/models/__init__.py +1 -0
  6. khoj/interface/compiled/404/index.html +1 -1
  7. khoj/interface/compiled/_next/static/chunks/{3062-a42d847c919a9ea4.js → 3062-9be9a4e34f82ed3a.js} +1 -1
  8. khoj/interface/compiled/_next/static/chunks/3678-0732dd9d2f472171.js +25 -0
  9. khoj/interface/compiled/_next/static/chunks/8423-ee6746f47901db2f.js +1 -0
  10. khoj/interface/compiled/_next/static/chunks/9001-3b27af6d5f21df44.js +21 -0
  11. khoj/interface/compiled/_next/static/chunks/9162-0be016519a18568b.js +11 -0
  12. khoj/interface/compiled/_next/static/chunks/{9178-d23cb0dbee40a775.js → 9178-3a0baad1c172d515.js} +1 -1
  13. khoj/interface/compiled/_next/static/chunks/{9693-91b03052c5cabded.js → 9984-e410179c6fac7cf1.js} +1 -1
  14. khoj/interface/compiled/_next/static/chunks/app/agents/{page-3c01900e7b5c7e50.js → page-462502107217be82.js} +1 -1
  15. khoj/interface/compiled/_next/static/chunks/app/automations/{page-6ea3381528603372.js → page-e30a75db8719f439.js} +1 -1
  16. khoj/interface/compiled/_next/static/chunks/app/chat/page-ed970e05064ff12c.js +1 -0
  17. khoj/interface/compiled/_next/static/chunks/app/factchecker/page-693fe53982bf33e1.js +1 -0
  18. khoj/interface/compiled/_next/static/chunks/app/page-c26f689e39b400ba.js +1 -0
  19. khoj/interface/compiled/_next/static/chunks/app/search/{page-fa15807b1ad7e30b.js → page-0798bb43c2e368bf.js} +1 -1
  20. khoj/interface/compiled/_next/static/chunks/app/settings/{page-1a2acc46cdabaf4a.js → page-f518555f8e2fd794.js} +1 -1
  21. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-ad9d2e9787eed215.js +1 -0
  22. khoj/interface/compiled/_next/static/chunks/{webpack-0f6d4805ea01afda.js → webpack-95ea8d2b149d6bad.js} +1 -1
  23. khoj/interface/compiled/_next/static/css/2a860030cf7c384b.css +1 -0
  24. khoj/interface/compiled/_next/static/css/4cae6c0e5c72fb2d.css +1 -0
  25. khoj/interface/compiled/_next/static/css/5a400c87d295e68a.css +1 -0
  26. khoj/interface/compiled/_next/static/css/76db8c247950117c.css +25 -0
  27. khoj/interface/compiled/_next/static/css/c808691c459e3887.css +1 -0
  28. khoj/interface/compiled/agents/index.html +1 -1
  29. khoj/interface/compiled/agents/index.txt +2 -2
  30. khoj/interface/compiled/automations/index.html +1 -1
  31. khoj/interface/compiled/automations/index.txt +3 -3
  32. khoj/interface/compiled/chat/index.html +1 -1
  33. khoj/interface/compiled/chat/index.txt +2 -2
  34. khoj/interface/compiled/factchecker/index.html +1 -1
  35. khoj/interface/compiled/factchecker/index.txt +2 -2
  36. khoj/interface/compiled/index.html +1 -1
  37. khoj/interface/compiled/index.txt +2 -2
  38. khoj/interface/compiled/search/index.html +1 -1
  39. khoj/interface/compiled/search/index.txt +2 -2
  40. khoj/interface/compiled/settings/index.html +1 -1
  41. khoj/interface/compiled/settings/index.txt +3 -3
  42. khoj/interface/compiled/share/chat/index.html +1 -1
  43. khoj/interface/compiled/share/chat/index.txt +2 -2
  44. khoj/processor/conversation/openai/gpt.py +4 -0
  45. khoj/processor/conversation/utils.py +31 -13
  46. khoj/processor/tools/online_search.py +6 -2
  47. khoj/routers/api_chat.py +41 -8
  48. khoj/routers/helpers.py +73 -21
  49. khoj/routers/storage.py +28 -0
  50. khoj/utils/helpers.py +15 -0
  51. {khoj-1.21.6.dev14.dist-info → khoj-1.21.7.dev1.dist-info}/METADATA +1 -1
  52. {khoj-1.21.6.dev14.dist-info → khoj-1.21.7.dev1.dist-info}/RECORD +57 -54
  53. khoj/interface/compiled/_next/static/chunks/3678-8c0e55c3b5d83a22.js +0 -25
  54. khoj/interface/compiled/_next/static/chunks/8423-132ea64eac83fd43.js +0 -1
  55. khoj/interface/compiled/_next/static/chunks/9001-acbca3e19b1a5ddf.js +0 -21
  56. khoj/interface/compiled/_next/static/chunks/9162-4a6d0d0dc5e27618.js +0 -11
  57. khoj/interface/compiled/_next/static/chunks/app/chat/page-c2ebc47a09abc8ae.js +0 -1
  58. khoj/interface/compiled/_next/static/chunks/app/factchecker/page-6ca723a9ff0dfd70.js +0 -1
  59. khoj/interface/compiled/_next/static/chunks/app/page-d403fc59c9c3f8cc.js +0 -1
  60. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-0ae8f5b868af65c1.js +0 -1
  61. khoj/interface/compiled/_next/static/css/2bfe35fbe2c97a56.css +0 -1
  62. khoj/interface/compiled/_next/static/css/9d5b867ec04494a6.css +0 -25
  63. khoj/interface/compiled/_next/static/css/a22d83f18a32957e.css +0 -1
  64. khoj/interface/compiled/_next/static/css/a3530ec58b0b660f.css +0 -1
  65. khoj/interface/compiled/_next/static/css/b81e909d403fb2df.css +0 -1
  66. /khoj/interface/compiled/_next/static/{OHjya9xQJWrEMTXUfPYon → ZZXQatJ9SszXKA3rhPWXF}/_buildManifest.js +0 -0
  67. /khoj/interface/compiled/_next/static/{OHjya9xQJWrEMTXUfPYon → ZZXQatJ9SszXKA3rhPWXF}/_ssgManifest.js +0 -0
  68. {khoj-1.21.6.dev14.dist-info → khoj-1.21.7.dev1.dist-info}/WHEEL +0 -0
  69. {khoj-1.21.6.dev14.dist-info → khoj-1.21.7.dev1.dist-info}/entry_points.txt +0 -0
  70. {khoj-1.21.6.dev14.dist-info → khoj-1.21.7.dev1.dist-info}/licenses/LICENSE +0 -0
khoj/routers/api_chat.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import asyncio
2
+ import base64
2
3
  import json
3
4
  import logging
4
5
  import time
@@ -46,11 +47,13 @@ from khoj.routers.helpers import (
46
47
  update_telemetry_state,
47
48
  validate_conversation_config,
48
49
  )
50
+ from khoj.routers.storage import upload_image_to_bucket
49
51
  from khoj.utils import state
50
52
  from khoj.utils.helpers import (
51
53
  AsyncIteratorWrapper,
52
54
  ConversationCommand,
53
55
  command_descriptions,
56
+ convert_image_to_webp,
54
57
  get_device,
55
58
  is_none_or_empty,
56
59
  )
@@ -517,7 +520,11 @@ async def set_conversation_title(
517
520
  )
518
521
 
519
522
 
520
- @api_chat.get("")
523
+ class ImageUploadObject(BaseModel):
524
+ image: str
525
+
526
+
527
+ @api_chat.post("")
521
528
  @requires(["authenticated"])
522
529
  async def chat(
523
530
  request: Request,
@@ -532,6 +539,7 @@ async def chat(
532
539
  region: Optional[str] = None,
533
540
  country: Optional[str] = None,
534
541
  timezone: Optional[str] = None,
542
+ image: Optional[ImageUploadObject] = None,
535
543
  rate_limiter_per_minute=Depends(
536
544
  ApiUserRateLimiter(requests=60, subscribed_requests=60, window=60, slug="chat_minute")
537
545
  ),
@@ -539,7 +547,7 @@ async def chat(
539
547
  ApiUserRateLimiter(requests=600, subscribed_requests=600, window=60 * 60 * 24, slug="chat_day")
540
548
  ),
541
549
  ):
542
- async def event_generator(q: str):
550
+ async def event_generator(q: str, image: ImageUploadObject):
543
551
  start_time = time.perf_counter()
544
552
  ttft = None
545
553
  chat_metadata: dict = {}
@@ -550,6 +558,17 @@ async def chat(
550
558
  q = unquote(q)
551
559
  nonlocal conversation_id
552
560
 
561
+ uploaded_image_url = None
562
+ if image:
563
+ decoded_string = unquote(image.image)
564
+ base64_data = decoded_string.split(",", 1)[1]
565
+ image_bytes = base64.b64decode(base64_data)
566
+ webp_image_bytes = convert_image_to_webp(image_bytes)
567
+ try:
568
+ uploaded_image_url = upload_image_to_bucket(webp_image_bytes, request.user.object.id)
569
+ except:
570
+ uploaded_image_url = None
571
+
553
572
  async def send_event(event_type: ChatEvent, data: str | dict):
554
573
  nonlocal connection_alive, ttft
555
574
  if not connection_alive or await request.is_disconnected():
@@ -637,7 +656,7 @@ async def chat(
637
656
 
638
657
  if conversation_commands == [ConversationCommand.Default] or is_automated_task:
639
658
  conversation_commands = await aget_relevant_information_sources(
640
- q, meta_log, is_automated_task, subscribed=subscribed
659
+ q, meta_log, is_automated_task, subscribed=subscribed, uploaded_image_url=uploaded_image_url
641
660
  )
642
661
  conversation_commands_str = ", ".join([cmd.value for cmd in conversation_commands])
643
662
  async for result in send_event(
@@ -645,7 +664,7 @@ async def chat(
645
664
  ):
646
665
  yield result
647
666
 
648
- mode = await aget_relevant_output_modes(q, meta_log, is_automated_task)
667
+ mode = await aget_relevant_output_modes(q, meta_log, is_automated_task, uploaded_image_url)
649
668
  async for result in send_event(ChatEvent.STATUS, f"**Decided Response Mode:** {mode.value}"):
650
669
  yield result
651
670
  if mode not in conversation_commands:
@@ -693,7 +712,9 @@ async def chat(
693
712
  ):
694
713
  yield result
695
714
 
696
- response = await extract_relevant_summary(q, contextual_data, subscribed=subscribed)
715
+ response = await extract_relevant_summary(
716
+ q, contextual_data, subscribed=subscribed, uploaded_image_url=uploaded_image_url
717
+ )
697
718
  response_log = str(response)
698
719
  async for result in send_llm_response(response_log):
699
720
  yield result
@@ -711,6 +732,7 @@ async def chat(
711
732
  intent_type="summarize",
712
733
  client_application=request.user.client_app,
713
734
  conversation_id=conversation_id,
735
+ uploaded_image_url=uploaded_image_url,
714
736
  )
715
737
  return
716
738
 
@@ -753,6 +775,7 @@ async def chat(
753
775
  conversation_id=conversation_id,
754
776
  inferred_queries=[query_to_run],
755
777
  automation_id=automation.id,
778
+ uploaded_image_url=uploaded_image_url,
756
779
  )
757
780
  async for result in send_llm_response(llm_response):
758
781
  yield result
@@ -807,6 +830,7 @@ async def chat(
807
830
  subscribed,
808
831
  partial(send_event, ChatEvent.STATUS),
809
832
  custom_filters,
833
+ uploaded_image_url=uploaded_image_url,
810
834
  ):
811
835
  if isinstance(result, dict) and ChatEvent.STATUS in result:
812
836
  yield result[ChatEvent.STATUS]
@@ -823,7 +847,13 @@ async def chat(
823
847
  if ConversationCommand.Webpage in conversation_commands:
824
848
  try:
825
849
  async for result in read_webpages(
826
- defiltered_query, meta_log, location, user, subscribed, partial(send_event, ChatEvent.STATUS)
850
+ defiltered_query,
851
+ meta_log,
852
+ location,
853
+ user,
854
+ subscribed,
855
+ partial(send_event, ChatEvent.STATUS),
856
+ uploaded_image_url=uploaded_image_url,
827
857
  ):
828
858
  if isinstance(result, dict) and ChatEvent.STATUS in result:
829
859
  yield result[ChatEvent.STATUS]
@@ -869,6 +899,7 @@ async def chat(
869
899
  online_results=online_results,
870
900
  subscribed=subscribed,
871
901
  send_status_func=partial(send_event, ChatEvent.STATUS),
902
+ uploaded_image_url=uploaded_image_url,
872
903
  ):
873
904
  if isinstance(result, dict) and ChatEvent.STATUS in result:
874
905
  yield result[ChatEvent.STATUS]
@@ -898,6 +929,7 @@ async def chat(
898
929
  conversation_id=conversation_id,
899
930
  compiled_references=compiled_references,
900
931
  online_results=online_results,
932
+ uploaded_image_url=uploaded_image_url,
901
933
  )
902
934
  content_obj = {
903
935
  "intentType": intent_type,
@@ -924,6 +956,7 @@ async def chat(
924
956
  conversation_id,
925
957
  location,
926
958
  user_name,
959
+ uploaded_image_url,
927
960
  )
928
961
 
929
962
  # Send Response
@@ -949,9 +982,9 @@ async def chat(
949
982
 
950
983
  ## Stream Text Response
951
984
  if stream:
952
- return StreamingResponse(event_generator(q), media_type="text/plain")
985
+ return StreamingResponse(event_generator(q, image=image), media_type="text/plain")
953
986
  ## Non-Streaming Text Response
954
987
  else:
955
- response_iterator = event_generator(q)
988
+ response_iterator = event_generator(q, image=image)
956
989
  response_data = await read_chat_stream(response_iterator)
957
990
  return Response(content=json.dumps(response_data), media_type="application/json", status_code=200)
khoj/routers/helpers.py CHANGED
@@ -97,6 +97,7 @@ from khoj.utils.helpers import (
97
97
  LRU,
98
98
  ConversationCommand,
99
99
  ImageIntentType,
100
+ convert_image_to_webp,
100
101
  is_none_or_empty,
101
102
  is_valid_url,
102
103
  log_telemetry,
@@ -252,7 +253,9 @@ async def acreate_title_from_query(query: str) -> str:
252
253
  return response.strip()
253
254
 
254
255
 
255
- async def aget_relevant_information_sources(query: str, conversation_history: dict, is_task: bool, subscribed: bool):
256
+ async def aget_relevant_information_sources(
257
+ query: str, conversation_history: dict, is_task: bool, subscribed: bool, uploaded_image_url: str = None
258
+ ):
256
259
  """
257
260
  Given a query, determine which of the available tools the agent should use in order to answer appropriately.
258
261
  """
@@ -266,6 +269,9 @@ async def aget_relevant_information_sources(query: str, conversation_history: di
266
269
 
267
270
  chat_history = construct_chat_history(conversation_history)
268
271
 
272
+ if uploaded_image_url:
273
+ query = f"[placeholder for image attached to this message]\n{query}"
274
+
269
275
  relevant_tools_prompt = prompts.pick_relevant_information_collection_tools.format(
270
276
  query=query,
271
277
  tools=tool_options_str,
@@ -274,7 +280,9 @@ async def aget_relevant_information_sources(query: str, conversation_history: di
274
280
 
275
281
  with timer("Chat actor: Infer information sources to refer", logger):
276
282
  response = await send_message_to_model_wrapper(
277
- relevant_tools_prompt, response_type="json_object", subscribed=subscribed
283
+ relevant_tools_prompt,
284
+ response_type="json_object",
285
+ subscribed=subscribed,
278
286
  )
279
287
 
280
288
  try:
@@ -302,7 +310,9 @@ async def aget_relevant_information_sources(query: str, conversation_history: di
302
310
  return [ConversationCommand.Default]
303
311
 
304
312
 
305
- async def aget_relevant_output_modes(query: str, conversation_history: dict, is_task: bool = False):
313
+ async def aget_relevant_output_modes(
314
+ query: str, conversation_history: dict, is_task: bool = False, uploaded_image_url: str = None
315
+ ):
306
316
  """
307
317
  Given a query, determine which of the available tools the agent should use in order to answer appropriately.
308
318
  """
@@ -319,6 +329,9 @@ async def aget_relevant_output_modes(query: str, conversation_history: dict, is_
319
329
 
320
330
  chat_history = construct_chat_history(conversation_history)
321
331
 
332
+ if uploaded_image_url:
333
+ query = f"[placeholder for image attached to this message]\n{query}"
334
+
322
335
  relevant_mode_prompt = prompts.pick_relevant_output_mode.format(
323
336
  query=query,
324
337
  modes=mode_options_str,
@@ -347,7 +360,7 @@ async def aget_relevant_output_modes(query: str, conversation_history: dict, is_
347
360
 
348
361
 
349
362
  async def infer_webpage_urls(
350
- q: str, conversation_history: dict, location_data: LocationData, user: KhojUser
363
+ q: str, conversation_history: dict, location_data: LocationData, user: KhojUser, uploaded_image_url: str = None
351
364
  ) -> List[str]:
352
365
  """
353
366
  Infer webpage links from the given query
@@ -366,7 +379,9 @@ async def infer_webpage_urls(
366
379
  )
367
380
 
368
381
  with timer("Chat actor: Infer webpage urls to read", logger):
369
- response = await send_message_to_model_wrapper(online_queries_prompt, response_type="json_object")
382
+ response = await send_message_to_model_wrapper(
383
+ online_queries_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object"
384
+ )
370
385
 
371
386
  # Validate that the response is a non-empty, JSON-serializable list of URLs
372
387
  try:
@@ -381,7 +396,7 @@ async def infer_webpage_urls(
381
396
 
382
397
 
383
398
  async def generate_online_subqueries(
384
- q: str, conversation_history: dict, location_data: LocationData, user: KhojUser
399
+ q: str, conversation_history: dict, location_data: LocationData, user: KhojUser, uploaded_image_url: str = None
385
400
  ) -> List[str]:
386
401
  """
387
402
  Generate subqueries from the given query
@@ -400,7 +415,9 @@ async def generate_online_subqueries(
400
415
  )
401
416
 
402
417
  with timer("Chat actor: Generate online search subqueries", logger):
403
- response = await send_message_to_model_wrapper(online_queries_prompt, response_type="json_object")
418
+ response = await send_message_to_model_wrapper(
419
+ online_queries_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object"
420
+ )
404
421
 
405
422
  # Validate that the response is a non-empty, JSON-serializable list
406
423
  try:
@@ -419,7 +436,7 @@ async def generate_online_subqueries(
419
436
  return [q]
420
437
 
421
438
 
422
- async def schedule_query(q: str, conversation_history: dict) -> Tuple[str, ...]:
439
+ async def schedule_query(q: str, conversation_history: dict, uploaded_image_url: str = None) -> Tuple[str, ...]:
423
440
  """
424
441
  Schedule the date, time to run the query. Assume the server timezone is UTC.
425
442
  """
@@ -430,7 +447,9 @@ async def schedule_query(q: str, conversation_history: dict) -> Tuple[str, ...]:
430
447
  chat_history=chat_history,
431
448
  )
432
449
 
433
- raw_response = await send_message_to_model_wrapper(crontime_prompt, response_type="json_object")
450
+ raw_response = await send_message_to_model_wrapper(
451
+ crontime_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object"
452
+ )
434
453
 
435
454
  # Validate that the response is a non-empty, JSON-serializable list
436
455
  try:
@@ -468,7 +487,9 @@ async def extract_relevant_info(q: str, corpus: str, subscribed: bool) -> Union[
468
487
  return response.strip()
469
488
 
470
489
 
471
- async def extract_relevant_summary(q: str, corpus: str, subscribed: bool = False) -> Union[str, None]:
490
+ async def extract_relevant_summary(
491
+ q: str, corpus: str, subscribed: bool = False, uploaded_image_url: str = None
492
+ ) -> Union[str, None]:
472
493
  """
473
494
  Extract relevant information for a given query from the target corpus
474
495
  """
@@ -489,6 +510,7 @@ async def extract_relevant_summary(q: str, corpus: str, subscribed: bool = False
489
510
  prompts.system_prompt_extract_relevant_summary,
490
511
  chat_model_option=chat_model,
491
512
  subscribed=subscribed,
513
+ uploaded_image_url=uploaded_image_url,
492
514
  )
493
515
  return response.strip()
494
516
 
@@ -501,6 +523,7 @@ async def generate_better_image_prompt(
501
523
  online_results: Optional[dict] = None,
502
524
  model_type: Optional[str] = None,
503
525
  subscribed: bool = False,
526
+ uploaded_image_url: Optional[str] = None,
504
527
  ) -> str:
505
528
  """
506
529
  Generate a better image prompt from the given query
@@ -549,7 +572,7 @@ async def generate_better_image_prompt(
549
572
 
550
573
  with timer("Chat actor: Generate contextual image prompt", logger):
551
574
  response = await send_message_to_model_wrapper(
552
- image_prompt, chat_model_option=chat_model, subscribed=subscribed
575
+ image_prompt, chat_model_option=chat_model, subscribed=subscribed, uploaded_image_url=uploaded_image_url
553
576
  )
554
577
  response = response.strip()
555
578
  if response.startswith(('"', "'")) and response.endswith(('"', "'")):
@@ -564,11 +587,19 @@ async def send_message_to_model_wrapper(
564
587
  response_type: str = "text",
565
588
  chat_model_option: ChatModelOptions = None,
566
589
  subscribed: bool = False,
590
+ uploaded_image_url: str = None,
567
591
  ):
568
592
  conversation_config: ChatModelOptions = (
569
593
  chat_model_option or await ConversationAdapters.aget_default_conversation_config()
570
594
  )
571
595
 
596
+ vision_available = conversation_config.vision_enabled
597
+ if not vision_available and uploaded_image_url:
598
+ vision_enabled_config = ConversationAdapters.get_vision_enabled_config()
599
+ if vision_enabled_config:
600
+ conversation_config = vision_enabled_config
601
+ vision_available = True
602
+
572
603
  chat_model = conversation_config.chat_model
573
604
  max_tokens = (
574
605
  conversation_config.subscribed_max_prompt_size
@@ -576,6 +607,7 @@ async def send_message_to_model_wrapper(
576
607
  else conversation_config.max_prompt_size
577
608
  )
578
609
  tokenizer = conversation_config.tokenizer
610
+ vision_available = conversation_config.vision_enabled
579
611
 
580
612
  if conversation_config.model_type == "offline":
581
613
  if state.offline_chat_processor_config is None or state.offline_chat_processor_config.loaded_model is None:
@@ -589,6 +621,7 @@ async def send_message_to_model_wrapper(
589
621
  loaded_model=loaded_model,
590
622
  tokenizer_name=tokenizer,
591
623
  max_prompt_size=max_tokens,
624
+ vision_enabled=vision_available,
592
625
  )
593
626
 
594
627
  return send_message_to_model_offline(
@@ -609,6 +642,8 @@ async def send_message_to_model_wrapper(
609
642
  model_name=chat_model,
610
643
  max_prompt_size=max_tokens,
611
644
  tokenizer_name=tokenizer,
645
+ vision_enabled=vision_available,
646
+ uploaded_image_url=uploaded_image_url,
612
647
  )
613
648
 
614
649
  openai_response = send_message_to_model(
@@ -628,6 +663,7 @@ async def send_message_to_model_wrapper(
628
663
  model_name=chat_model,
629
664
  max_prompt_size=max_tokens,
630
665
  tokenizer_name=tokenizer,
666
+ vision_enabled=vision_available,
631
667
  )
632
668
 
633
669
  return anthropic_send_message_to_model(
@@ -651,6 +687,7 @@ def send_message_to_model_wrapper_sync(
651
687
 
652
688
  chat_model = conversation_config.chat_model
653
689
  max_tokens = conversation_config.max_prompt_size
690
+ vision_available = conversation_config.vision_enabled
654
691
 
655
692
  if conversation_config.model_type == "offline":
656
693
  if state.offline_chat_processor_config is None or state.offline_chat_processor_config.loaded_model is None:
@@ -658,7 +695,11 @@ def send_message_to_model_wrapper_sync(
658
695
 
659
696
  loaded_model = state.offline_chat_processor_config.loaded_model
660
697
  truncated_messages = generate_chatml_messages_with_context(
661
- user_message=message, system_message=system_message, model_name=chat_model, loaded_model=loaded_model
698
+ user_message=message,
699
+ system_message=system_message,
700
+ model_name=chat_model,
701
+ loaded_model=loaded_model,
702
+ vision_enabled=vision_available,
662
703
  )
663
704
 
664
705
  return send_message_to_model_offline(
@@ -672,7 +713,10 @@ def send_message_to_model_wrapper_sync(
672
713
  elif conversation_config.model_type == "openai":
673
714
  api_key = conversation_config.openai_config.api_key
674
715
  truncated_messages = generate_chatml_messages_with_context(
675
- user_message=message, system_message=system_message, model_name=chat_model
716
+ user_message=message,
717
+ system_message=system_message,
718
+ model_name=chat_model,
719
+ vision_enabled=vision_available,
676
720
  )
677
721
 
678
722
  openai_response = send_message_to_model(
@@ -688,6 +732,7 @@ def send_message_to_model_wrapper_sync(
688
732
  system_message=system_message,
689
733
  model_name=chat_model,
690
734
  max_prompt_size=max_tokens,
735
+ vision_enabled=vision_available,
691
736
  )
692
737
 
693
738
  return anthropic_send_message_to_model(
@@ -712,6 +757,7 @@ def generate_chat_response(
712
757
  conversation_id: int = None,
713
758
  location_data: LocationData = None,
714
759
  user_name: Optional[str] = None,
760
+ uploaded_image_url: Optional[str] = None,
715
761
  ) -> Tuple[Union[ThreadedGenerator, Iterator[str]], Dict[str, str]]:
716
762
  # Initialize Variables
717
763
  chat_response = None
@@ -719,7 +765,6 @@ def generate_chat_response(
719
765
 
720
766
  metadata = {}
721
767
  agent = AgentAdapters.get_conversation_agent_by_id(conversation.agent.id) if conversation.agent else None
722
-
723
768
  try:
724
769
  partial_completion = partial(
725
770
  save_to_conversation_log,
@@ -731,9 +776,17 @@ def generate_chat_response(
731
776
  inferred_queries=inferred_queries,
732
777
  client_application=client_application,
733
778
  conversation_id=conversation_id,
779
+ uploaded_image_url=uploaded_image_url,
734
780
  )
735
781
 
736
782
  conversation_config = ConversationAdapters.get_valid_conversation_config(user, conversation)
783
+ vision_available = conversation_config.vision_enabled
784
+ if not vision_available and uploaded_image_url:
785
+ vision_enabled_config = ConversationAdapters.get_vision_enabled_config()
786
+ if vision_enabled_config:
787
+ conversation_config = vision_enabled_config
788
+ vision_available = True
789
+
737
790
  if conversation_config.model_type == "offline":
738
791
  loaded_model = state.offline_chat_processor_config.loaded_model
739
792
  chat_response = converse_offline(
@@ -759,6 +812,7 @@ def generate_chat_response(
759
812
  chat_response = converse(
760
813
  compiled_references,
761
814
  q,
815
+ image_url=uploaded_image_url,
762
816
  online_results=online_results,
763
817
  conversation_log=meta_log,
764
818
  model=chat_model,
@@ -771,6 +825,7 @@ def generate_chat_response(
771
825
  location_data=location_data,
772
826
  user_name=user_name,
773
827
  agent=agent,
828
+ vision_available=vision_available,
774
829
  )
775
830
 
776
831
  elif conversation_config.model_type == "anthropic":
@@ -809,6 +864,7 @@ async def text_to_image(
809
864
  online_results: Dict[str, Any],
810
865
  subscribed: bool = False,
811
866
  send_status_func: Optional[Callable] = None,
867
+ uploaded_image_url: Optional[str] = None,
812
868
  ):
813
869
  status_code = 200
814
870
  image = None
@@ -845,6 +901,7 @@ async def text_to_image(
845
901
  online_results=online_results,
846
902
  model_type=text_to_image_config.model_type,
847
903
  subscribed=subscribed,
904
+ uploaded_image_url=uploaded_image_url,
848
905
  )
849
906
 
850
907
  if send_status_func:
@@ -908,13 +965,7 @@ async def text_to_image(
908
965
 
909
966
  with timer("Convert image to webp", logger):
910
967
  # Convert png to webp for faster loading
911
- image_io = io.BytesIO(decoded_image)
912
- png_image = Image.open(image_io)
913
- webp_image_io = io.BytesIO()
914
- png_image.save(webp_image_io, "WEBP")
915
- webp_image_bytes = webp_image_io.getvalue()
916
- webp_image_io.close()
917
- image_io.close()
968
+ webp_image_bytes = convert_image_to_webp(decoded_image)
918
969
 
919
970
  with timer("Upload image to S3", logger):
920
971
  image_url = upload_image(webp_image_bytes, user.uuid)
@@ -1095,6 +1146,7 @@ def should_notify(original_query: str, executed_query: str, ai_response: str) ->
1095
1146
 
1096
1147
  with timer("Chat actor: Decide to notify user of automation response", logger):
1097
1148
  try:
1149
+ # TODO Replace with async call so we don't have to maintain a sync version
1098
1150
  response = send_message_to_model_wrapper_sync(to_notify_or_not)
1099
1151
  should_notify_result = "no" not in response.lower()
1100
1152
  logger.info(f'Decided to {"not " if not should_notify_result else ""}notify user of automation response.')
khoj/routers/storage.py CHANGED
@@ -33,3 +33,31 @@ def upload_image(image: bytes, user_id: uuid.UUID):
33
33
  except Exception as e:
34
34
  logger.error(f"Failed to upload image to S3: {e}")
35
35
  return None
36
+
37
+
38
+ AWS_USER_UPLOADED_IMAGES_BUCKET_NAME = os.getenv("AWS_USER_UPLOADED_IMAGES_BUCKET_NAME")
39
+
40
+
41
+ def upload_image_to_bucket(image: bytes, user_id: uuid.UUID):
42
+ """Upload the image to the S3 bucket"""
43
+ if not aws_enabled:
44
+ logger.info("AWS is not enabled. Skipping image upload")
45
+ return None
46
+
47
+ image_key = f"{user_id}/{uuid.uuid4()}.webp"
48
+ if not AWS_USER_UPLOADED_IMAGES_BUCKET_NAME:
49
+ logger.error("AWS_USER_UPLOADED_IMAGES_BUCKET_NAME is not set")
50
+ return None
51
+
52
+ try:
53
+ s3_client.put_object(
54
+ Bucket=AWS_USER_UPLOADED_IMAGES_BUCKET_NAME,
55
+ Key=image_key,
56
+ Body=image,
57
+ ACL="public-read",
58
+ ContentType="image/webp",
59
+ )
60
+ return f"https://{AWS_USER_UPLOADED_IMAGES_BUCKET_NAME}/{image_key}"
61
+ except Exception as e:
62
+ logger.error(f"Failed to upload image to S3: {e}")
63
+ return None
khoj/utils/helpers.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations # to avoid quoting type hints
2
2
 
3
3
  import datetime
4
+ import io
4
5
  import logging
5
6
  import os
6
7
  import platform
@@ -22,6 +23,7 @@ import requests
22
23
  import torch
23
24
  from asgiref.sync import sync_to_async
24
25
  from magika import Magika
26
+ from PIL import Image
25
27
 
26
28
  from khoj.utils import constants
27
29
 
@@ -416,3 +418,16 @@ def is_internet_connected():
416
418
  return response.status_code == 200
417
419
  except:
418
420
  return False
421
+
422
+
423
+ def convert_image_to_webp(image_bytes):
424
+ """Convert image bytes to webp format for faster loading"""
425
+ image_io = io.BytesIO(image_bytes)
426
+ with Image.open(image_io) as original_image:
427
+ webp_image_io = io.BytesIO()
428
+ original_image.save(webp_image_io, "WEBP")
429
+
430
+ # Encode the WebP image back to base64
431
+ webp_image_bytes = webp_image_io.getvalue()
432
+ webp_image_io.close()
433
+ return webp_image_bytes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: khoj
3
- Version: 1.21.6.dev14
3
+ Version: 1.21.7.dev1
4
4
  Summary: Your Second Brain
5
5
  Project-URL: Homepage, https://khoj.dev
6
6
  Project-URL: Documentation, https://docs.khoj.dev