khoj 1.26.2__py3-none-any.whl → 1.26.5.dev34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. khoj/configure.py +3 -3
  2. khoj/database/adapters/__init__.py +40 -8
  3. khoj/database/migrations/0070_alter_agent_input_tools_alter_agent_output_modes.py +46 -0
  4. khoj/database/migrations/0071_subscription_enabled_trial_at_and_more.py +32 -0
  5. khoj/database/models/__init__.py +8 -3
  6. khoj/interface/compiled/404/index.html +1 -1
  7. khoj/interface/compiled/_next/static/chunks/1210.132a7e1910006bbb.js +1 -0
  8. khoj/interface/compiled/_next/static/chunks/1279-f37ee4a388ebf544.js +1 -0
  9. khoj/interface/compiled/_next/static/chunks/1459.690bf20e7d7b7090.js +1 -0
  10. khoj/interface/compiled/_next/static/chunks/1603-b9d95833e0e025e8.js +1 -0
  11. khoj/interface/compiled/_next/static/chunks/1970-1d6d0c1b00b4f343.js +1 -0
  12. khoj/interface/compiled/_next/static/chunks/2697-61fcba89fd87eab4.js +1 -0
  13. khoj/interface/compiled/_next/static/chunks/3423-8e9c420574a9fbe3.js +1 -0
  14. khoj/interface/compiled/_next/static/chunks/394-6bcb8c429f168f21.js +3 -0
  15. khoj/interface/compiled/_next/static/chunks/4602-8eeb4b76385ad159.js +1 -0
  16. khoj/interface/compiled/_next/static/chunks/5512-94c7c2bbcf58c19d.js +1 -0
  17. khoj/interface/compiled/_next/static/chunks/7113-f2e114d7034a0835.js +1 -0
  18. khoj/interface/compiled/_next/static/chunks/{4086-2c74808ba38a5a0f.js → 8840-b8d7b9f0923c6651.js} +1 -1
  19. khoj/interface/compiled/_next/static/chunks/9417-759984ad62caa3dc.js +1 -0
  20. khoj/interface/compiled/_next/static/chunks/9479-4b443fdcc99141c9.js +1 -0
  21. khoj/interface/compiled/_next/static/chunks/94ca1967.5584df65931cfe83.js +1 -0
  22. khoj/interface/compiled/_next/static/chunks/964ecbae.ea4eab2a3a835ffe.js +1 -0
  23. khoj/interface/compiled/_next/static/chunks/app/agents/page-2beaba7c9bb750bd.js +1 -0
  24. khoj/interface/compiled/_next/static/chunks/app/automations/{page-5480731341f34450.js → page-9b5c77e0b0dd772c.js} +1 -1
  25. khoj/interface/compiled/_next/static/chunks/app/chat/page-151232d8417a1ea1.js +1 -0
  26. khoj/interface/compiled/_next/static/chunks/app/factchecker/{page-e7b34316ec6f44de.js → page-798904432c2417c4.js} +1 -1
  27. khoj/interface/compiled/_next/static/chunks/app/{page-10a5aad6e04f3cf8.js → page-db4e38a5255af7ad.js} +1 -1
  28. khoj/interface/compiled/_next/static/chunks/app/search/{page-d56541c746fded7d.js → page-ab2995529ece3140.js} +1 -1
  29. khoj/interface/compiled/_next/static/chunks/app/settings/page-3e9cf5ed5ace4310.js +1 -0
  30. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-6a01e07fb244c10c.js +1 -0
  31. khoj/interface/compiled/_next/static/chunks/webpack-313247d7eb764923.js +1 -0
  32. khoj/interface/compiled/_next/static/css/{c808691c459e3887.css → 3cf13271869a4aeb.css} +1 -1
  33. khoj/interface/compiled/_next/static/css/76d55eb435962b19.css +25 -0
  34. khoj/interface/compiled/_next/static/css/{3e1f1fdd70775091.css → 80bd6301fc657983.css} +1 -1
  35. khoj/interface/compiled/_next/static/css/{2de69f0be774c768.css → b70402177a7c3207.css} +1 -1
  36. khoj/interface/compiled/agents/index.html +1 -1
  37. khoj/interface/compiled/agents/index.txt +2 -2
  38. khoj/interface/compiled/automations/index.html +1 -1
  39. khoj/interface/compiled/automations/index.txt +2 -2
  40. khoj/interface/compiled/chat/index.html +1 -1
  41. khoj/interface/compiled/chat/index.txt +2 -2
  42. khoj/interface/compiled/factchecker/index.html +1 -1
  43. khoj/interface/compiled/factchecker/index.txt +2 -2
  44. khoj/interface/compiled/index.html +1 -1
  45. khoj/interface/compiled/index.txt +2 -2
  46. khoj/interface/compiled/search/index.html +1 -1
  47. khoj/interface/compiled/search/index.txt +2 -2
  48. khoj/interface/compiled/settings/index.html +1 -1
  49. khoj/interface/compiled/settings/index.txt +2 -2
  50. khoj/interface/compiled/share/chat/index.html +1 -1
  51. khoj/interface/compiled/share/chat/index.txt +2 -2
  52. khoj/processor/conversation/google/gemini_chat.py +28 -13
  53. khoj/processor/conversation/google/utils.py +34 -12
  54. khoj/processor/conversation/openai/gpt.py +4 -4
  55. khoj/processor/conversation/prompts.py +144 -0
  56. khoj/processor/conversation/utils.py +22 -13
  57. khoj/processor/image/generate.py +5 -5
  58. khoj/processor/tools/online_search.py +4 -4
  59. khoj/routers/api.py +13 -4
  60. khoj/routers/api_agents.py +41 -20
  61. khoj/routers/api_chat.py +85 -46
  62. khoj/routers/{subscription.py → api_subscription.py} +21 -3
  63. khoj/routers/auth.py +2 -2
  64. khoj/routers/helpers.py +235 -30
  65. khoj/routers/web_client.py +0 -11
  66. khoj/utils/helpers.py +7 -3
  67. {khoj-1.26.2.dist-info → khoj-1.26.5.dev34.dist-info}/METADATA +2 -2
  68. {khoj-1.26.2.dist-info → khoj-1.26.5.dev34.dist-info}/RECORD +73 -66
  69. khoj/interface/compiled/_next/static/chunks/121-7024f479c297aef0.js +0 -1
  70. khoj/interface/compiled/_next/static/chunks/1603-fa3ee48860b9dc5c.js +0 -1
  71. khoj/interface/compiled/_next/static/chunks/2697-a38d01981ad3bdf8.js +0 -1
  72. khoj/interface/compiled/_next/static/chunks/4051-2cf66369d6ca0f1d.js +0 -3
  73. khoj/interface/compiled/_next/static/chunks/477-ec86e93db10571c1.js +0 -1
  74. khoj/interface/compiled/_next/static/chunks/51-e8f5bdb69b5ea421.js +0 -1
  75. khoj/interface/compiled/_next/static/chunks/7762-79f2205740622b5c.js +0 -1
  76. khoj/interface/compiled/_next/static/chunks/9178-899fe9a6b754ecfe.js +0 -1
  77. khoj/interface/compiled/_next/static/chunks/9417-46ed3aaa639c85ef.js +0 -1
  78. khoj/interface/compiled/_next/static/chunks/9479-ea776e73f549090c.js +0 -1
  79. khoj/interface/compiled/_next/static/chunks/app/agents/page-88aa3042711107b7.js +0 -1
  80. khoj/interface/compiled/_next/static/chunks/app/chat/page-702057ccbcf27881.js +0 -1
  81. khoj/interface/compiled/_next/static/chunks/app/settings/page-e044a999468a7c5d.js +0 -1
  82. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-fbbd66a4d4633438.js +0 -1
  83. khoj/interface/compiled/_next/static/chunks/webpack-64dc39af85cd2625.js +0 -1
  84. khoj/interface/compiled/_next/static/css/b9a6bf04305d98d7.css +0 -25
  85. /khoj/interface/compiled/_next/static/{eim4XajTfG4ub4ft5AEkJ → 7viHIza-WalEOzloM67l4}/_buildManifest.js +0 -0
  86. /khoj/interface/compiled/_next/static/{eim4XajTfG4ub4ft5AEkJ → 7viHIza-WalEOzloM67l4}/_ssgManifest.js +0 -0
  87. {khoj-1.26.2.dist-info → khoj-1.26.5.dev34.dist-info}/WHEEL +0 -0
  88. {khoj-1.26.2.dist-info → khoj-1.26.5.dev34.dist-info}/entry_points.txt +0 -0
  89. {khoj-1.26.2.dist-info → khoj-1.26.5.dev34.dist-info}/licenses/LICENSE +0 -0
@@ -6,14 +6,17 @@ from typing import Dict, Optional
6
6
 
7
7
  from langchain.schema import ChatMessage
8
8
 
9
- from khoj.database.models import Agent, KhojUser
9
+ from khoj.database.models import Agent, ChatModelOptions, KhojUser
10
10
  from khoj.processor.conversation import prompts
11
11
  from khoj.processor.conversation.google.utils import (
12
12
  format_messages_for_gemini,
13
13
  gemini_chat_completion_with_backoff,
14
14
  gemini_completion_with_backoff,
15
15
  )
16
- from khoj.processor.conversation.utils import generate_chatml_messages_with_context
16
+ from khoj.processor.conversation.utils import (
17
+ construct_structured_message,
18
+ generate_chatml_messages_with_context,
19
+ )
17
20
  from khoj.utils.helpers import ConversationCommand, is_none_or_empty
18
21
  from khoj.utils.rawconfig import LocationData
19
22
 
@@ -29,6 +32,8 @@ def extract_questions_gemini(
29
32
  max_tokens=None,
30
33
  location_data: LocationData = None,
31
34
  user: KhojUser = None,
35
+ query_images: Optional[list[str]] = None,
36
+ vision_enabled: bool = False,
32
37
  personality_context: Optional[str] = None,
33
38
  ):
34
39
  """
@@ -70,17 +75,17 @@ def extract_questions_gemini(
70
75
  text=text,
71
76
  )
72
77
 
73
- messages = [ChatMessage(content=prompt, role="user")]
78
+ prompt = construct_structured_message(
79
+ message=prompt,
80
+ images=query_images,
81
+ model_type=ChatModelOptions.ModelType.GOOGLE,
82
+ vision_enabled=vision_enabled,
83
+ )
74
84
 
75
- model_kwargs = {"response_mime_type": "application/json"}
85
+ messages = [ChatMessage(content=prompt, role="user"), ChatMessage(content=system_prompt, role="system")]
76
86
 
77
- response = gemini_completion_with_backoff(
78
- messages=messages,
79
- system_prompt=system_prompt,
80
- model_name=model,
81
- temperature=temperature,
82
- api_key=api_key,
83
- model_kwargs=model_kwargs,
87
+ response = gemini_send_message_to_model(
88
+ messages, api_key, model, response_type="json_object", temperature=temperature
84
89
  )
85
90
 
86
91
  # Extract, Clean Message from Gemini's Response
@@ -102,7 +107,7 @@ def extract_questions_gemini(
102
107
  return questions
103
108
 
104
109
 
105
- def gemini_send_message_to_model(messages, api_key, model, response_type="text"):
110
+ def gemini_send_message_to_model(messages, api_key, model, response_type="text", temperature=0, model_kwargs=None):
106
111
  """
107
112
  Send message to model
108
113
  """
@@ -114,7 +119,12 @@ def gemini_send_message_to_model(messages, api_key, model, response_type="text")
114
119
 
115
120
  # Get Response from Gemini
116
121
  return gemini_completion_with_backoff(
117
- messages=messages, system_prompt=system_prompt, model_name=model, api_key=api_key, model_kwargs=model_kwargs
122
+ messages=messages,
123
+ system_prompt=system_prompt,
124
+ model_name=model,
125
+ api_key=api_key,
126
+ temperature=temperature,
127
+ model_kwargs=model_kwargs,
118
128
  )
119
129
 
120
130
 
@@ -133,6 +143,8 @@ def converse_gemini(
133
143
  location_data: LocationData = None,
134
144
  user_name: str = None,
135
145
  agent: Agent = None,
146
+ query_images: Optional[list[str]] = None,
147
+ vision_available: bool = False,
136
148
  ):
137
149
  """
138
150
  Converse with user using Google's Gemini
@@ -187,6 +199,9 @@ def converse_gemini(
187
199
  model_name=model,
188
200
  max_prompt_size=max_prompt_size,
189
201
  tokenizer_name=tokenizer_name,
202
+ query_images=query_images,
203
+ vision_enabled=vision_available,
204
+ model_type=ChatModelOptions.ModelType.GOOGLE,
190
205
  )
191
206
 
192
207
  messages, system_prompt = format_messages_for_gemini(messages, system_prompt)
@@ -1,8 +1,11 @@
1
1
  import logging
2
2
  import random
3
+ from io import BytesIO
3
4
  from threading import Thread
4
5
 
5
6
  import google.generativeai as genai
7
+ import PIL.Image
8
+ import requests
6
9
  from google.generativeai.types.answer_types import FinishReason
7
10
  from google.generativeai.types.generation_types import StopCandidateException
8
11
  from google.generativeai.types.safety_types import (
@@ -53,14 +56,14 @@ def gemini_completion_with_backoff(
53
56
  },
54
57
  )
55
58
 
56
- formatted_messages = [{"role": message.role, "parts": [message.content]} for message in messages]
59
+ formatted_messages = [{"role": message.role, "parts": message.content} for message in messages]
57
60
 
58
61
  # Start chat session. All messages up to the last are considered to be part of the chat history
59
62
  chat_session = model.start_chat(history=formatted_messages[0:-1])
60
63
 
61
64
  try:
62
65
  # Generate the response. The last message is considered to be the current prompt
63
- aggregated_response = chat_session.send_message(formatted_messages[-1]["parts"][0])
66
+ aggregated_response = chat_session.send_message(formatted_messages[-1]["parts"])
64
67
  return aggregated_response.text
65
68
  except StopCandidateException as e:
66
69
  response_message, _ = handle_gemini_response(e.args)
@@ -117,11 +120,11 @@ def gemini_llm_thread(g, messages, system_prompt, model_name, temperature, api_k
117
120
  },
118
121
  )
119
122
 
120
- formatted_messages = [{"role": message.role, "parts": [message.content]} for message in messages]
123
+ formatted_messages = [{"role": message.role, "parts": message.content} for message in messages]
121
124
  # all messages up to the last are considered to be part of the chat history
122
125
  chat_session = model.start_chat(history=formatted_messages[0:-1])
123
126
  # the last message is considered to be the current prompt
124
- for chunk in chat_session.send_message(formatted_messages[-1]["parts"][0], stream=True):
127
+ for chunk in chat_session.send_message(formatted_messages[-1]["parts"], stream=True):
125
128
  message, stopped = handle_gemini_response(chunk.candidates, chunk.prompt_feedback)
126
129
  message = message or chunk.text
127
130
  g.send(message)
@@ -191,14 +194,6 @@ def generate_safety_response(safety_ratings):
191
194
 
192
195
 
193
196
  def format_messages_for_gemini(messages: list[ChatMessage], system_prompt: str = None) -> tuple[list[str], str]:
194
- if len(messages) == 1:
195
- messages[0].role = "user"
196
- return messages, system_prompt
197
-
198
- for message in messages:
199
- if message.role == "assistant":
200
- message.role = "model"
201
-
202
197
  # Extract system message
203
198
  system_prompt = system_prompt or ""
204
199
  for message in messages.copy():
@@ -207,4 +202,31 @@ def format_messages_for_gemini(messages: list[ChatMessage], system_prompt: str =
207
202
  messages.remove(message)
208
203
  system_prompt = None if is_none_or_empty(system_prompt) else system_prompt
209
204
 
205
+ for message in messages:
206
+ # Convert message content to string list from chatml dictionary list
207
+ if isinstance(message.content, list):
208
+ # Convert image_urls to PIL.Image and place them at beginning of list (better for Gemini)
209
+ message.content = [
210
+ get_image_from_url(item["image_url"]["url"]) if item["type"] == "image_url" else item["text"]
211
+ for item in sorted(message.content, key=lambda x: 0 if x["type"] == "image_url" else 1)
212
+ ]
213
+ elif isinstance(message.content, str):
214
+ message.content = [message.content]
215
+
216
+ if message.role == "assistant":
217
+ message.role = "model"
218
+
219
+ if len(messages) == 1:
220
+ messages[0].role = "user"
221
+
210
222
  return messages, system_prompt
223
+
224
+
225
+ def get_image_from_url(image_url: str) -> PIL.Image:
226
+ try:
227
+ response = requests.get(image_url)
228
+ response.raise_for_status() # Check if the request was successful
229
+ return PIL.Image.open(BytesIO(response.content))
230
+ except requests.exceptions.RequestException as e:
231
+ logger.error(f"Failed to get image from URL {image_url}: {e}")
232
+ return None
@@ -30,7 +30,7 @@ def extract_questions(
30
30
  api_base_url=None,
31
31
  location_data: LocationData = None,
32
32
  user: KhojUser = None,
33
- uploaded_image_url: Optional[str] = None,
33
+ query_images: Optional[list[str]] = None,
34
34
  vision_enabled: bool = False,
35
35
  personality_context: Optional[str] = None,
36
36
  ):
@@ -74,7 +74,7 @@ def extract_questions(
74
74
 
75
75
  prompt = construct_structured_message(
76
76
  message=prompt,
77
- image_url=uploaded_image_url,
77
+ images=query_images,
78
78
  model_type=ChatModelOptions.ModelType.OPENAI,
79
79
  vision_enabled=vision_enabled,
80
80
  )
@@ -135,7 +135,7 @@ def converse(
135
135
  location_data: LocationData = None,
136
136
  user_name: str = None,
137
137
  agent: Agent = None,
138
- image_url: Optional[str] = None,
138
+ query_images: Optional[list[str]] = None,
139
139
  vision_available: bool = False,
140
140
  ):
141
141
  """
@@ -191,7 +191,7 @@ def converse(
191
191
  model_name=model,
192
192
  max_prompt_size=max_prompt_size,
193
193
  tokenizer_name=tokenizer_name,
194
- uploaded_image_url=image_url,
194
+ query_images=query_images,
195
195
  vision_enabled=vision_available,
196
196
  model_type=ChatModelOptions.ModelType.OPENAI,
197
197
  )
@@ -176,6 +176,150 @@ Improved Prompt:
176
176
  """.strip()
177
177
  )
178
178
 
179
+ ## Diagram Generation
180
+ ## --
181
+
182
+ improve_diagram_description_prompt = PromptTemplate.from_template(
183
+ """
184
+ you are an architect working with a novice artist using a diagramming tool.
185
+ {personality_context}
186
+
187
+ you need to convert the user's query to a description format that the novice artist can use very well. you are allowed to use primitives like
188
+ - text
189
+ - rectangle
190
+ - diamond
191
+ - ellipse
192
+ - line
193
+ - arrow
194
+ - frame
195
+
196
+ use these primitives to describe what sort of diagram the drawer should create. the artist must recreate the diagram every time, so include all relevant prior information in your description.
197
+
198
+ use simple, concise language.
199
+
200
+ Today's Date: {current_date}
201
+ User's Location: {location}
202
+
203
+ User's Notes:
204
+ {references}
205
+
206
+ Online References:
207
+ {online_results}
208
+
209
+ Conversation Log:
210
+ {chat_history}
211
+
212
+ Query: {query}
213
+
214
+
215
+ """.strip()
216
+ )
217
+
218
+ excalidraw_diagram_generation_prompt = PromptTemplate.from_template(
219
+ """
220
+ You are a program manager with the ability to describe diagrams to compose in professional, fine detail.
221
+ {personality_context}
222
+
223
+ You need to create a declarative description of the diagram and relevant components, using this base schema. Use the `label` property to specify the text to be rendered in the respective elements. Always use light colors for the `backgroundColor` property, like white, or light blue, green, red. "type", "x", "y", "id", are required properties for all elements.
224
+
225
+ {{
226
+ type: string,
227
+ x: number,
228
+ y: number,
229
+ strokeColor: string,
230
+ backgroundColor: string,
231
+ width: number,
232
+ height: number,
233
+ id: string,
234
+ label: {{
235
+ text: string,
236
+ }}
237
+ }}
238
+
239
+ Valid types:
240
+ - text
241
+ - rectangle
242
+ - diamond
243
+ - ellipse
244
+ - line
245
+ - arrow
246
+
247
+ For arrows and lines, you can use the `points` property to specify the start and end points of the arrow. You may also use the `label` property to specify the text to be rendered. You may use the `start` and `end` properties to connect the linear elements to other elements. The start and end point can either be the ID to map to an existing object, or the `type` to create a new object. Mapping to an existing object is useful if you want to connect it to multiple objects. Lines and arrows can only start and end at rectangle, text, diamond, or ellipse elements.
248
+
249
+ {{
250
+ type: "arrow",
251
+ id: string,
252
+ x: number,
253
+ y: number,
254
+ width: number,
255
+ height: number,
256
+ strokeColor: string,
257
+ start: {{
258
+ id: string,
259
+ type: string,
260
+ }},
261
+ end: {{
262
+ id: string,
263
+ type: string,
264
+ }},
265
+ label: {{
266
+ text: string,
267
+ }}
268
+ points: [
269
+ [number, number],
270
+ [number, number],
271
+ ]
272
+ }}
273
+
274
+ For text, you must use the `text` property to specify the text to be rendered. You may also use `fontSize` property to specify the font size of the text. Only use the `text` element for titles, subtitles, and overviews. For labels, use the `label` property in the respective elements.
275
+
276
+ {{
277
+ type: "text",
278
+ id: string,
279
+ x: number,
280
+ y: number,
281
+ fontSize: number,
282
+ text: string,
283
+ }}
284
+
285
+ For frames, use the `children` property to specify the elements that are inside the frame by their ids.
286
+
287
+ {{
288
+ type: "frame",
289
+ id: string,
290
+ x: number,
291
+ y: number,
292
+ width: number,
293
+ height: number,
294
+ name: string,
295
+ children: [
296
+ string
297
+ ]
298
+ }}
299
+
300
+ Here's an example of a valid diagram:
301
+
302
+ Design Description: Create a diagram describing a circular development process with 3 stages: design, implementation and feedback. The design stage is connected to the implementation stage and the implementation stage is connected to the feedback stage and the feedback stage is connected to the design stage. Each stage should be labeled with the stage name.
303
+
304
+ Response:
305
+
306
+ [
307
+ {{"type":"text","x":-150,"y":50,"width":300,"height":40,"id":"title_text","text":"Circular Development Process","fontSize":24}},
308
+ {{"type":"ellipse","x":-169,"y":113,"width":188,"height":202,"id":"design_ellipse", "label": {{"text": "Design"}}}},
309
+ {{"type":"ellipse","x":62,"y":394,"width":186,"height":188,"id":"implement_ellipse", "label": {{"text": "Implement"}}}},
310
+ {{"type":"ellipse","x":-348,"y":430,"width":184,"height":170,"id":"feedback_ellipse", "label": {{"text": "Feedback"}}}},
311
+ {{"type":"arrow","x":21,"y":273,"id":"design_to_implement_arrow","points":[[0,0],[86,105]],"start":{{"id":"design_ellipse"}}, "end":{{"id":"implement_ellipse"}}}},
312
+ {{"type":"arrow","x":50,"y":519,"id":"implement_to_feedback_arrow","points":[[0,0],[-198,-6]],"start":{{"id":"implement_ellipse"}}, "end":{{"id":"feedback_ellipse"}}}},
313
+ {{"type":"arrow","x":-228,"y":417,"id":"feedback_to_design_arrow","points":[[0,0],[85,-123]],"start":{{"id":"feedback_ellipse"}}, "end":{{"id":"design_ellipse"}}}},
314
+ ]
315
+
316
+ Create a detailed diagram from the provided context and user prompt below. Return a valid JSON object:
317
+
318
+ Diagram Description: {query}
319
+
320
+ """.strip()
321
+ )
322
+
179
323
  ## Online Search Conversation
180
324
  ## --
181
325
  online_search_conversation = PromptTemplate.from_template(
@@ -109,7 +109,7 @@ def save_to_conversation_log(
109
109
  client_application: ClientApplication = None,
110
110
  conversation_id: str = None,
111
111
  automation_id: str = None,
112
- uploaded_image_url: str = None,
112
+ query_images: List[str] = None,
113
113
  ):
114
114
  user_message_time = user_message_time or datetime.now().strftime("%Y-%m-%d %H:%M:%S")
115
115
  updated_conversation = message_to_log(
@@ -117,7 +117,7 @@ def save_to_conversation_log(
117
117
  chat_response=chat_response,
118
118
  user_message_metadata={
119
119
  "created": user_message_time,
120
- "uploadedImageData": uploaded_image_url,
120
+ "images": query_images,
121
121
  },
122
122
  khoj_message_metadata={
123
123
  "context": compiled_references,
@@ -145,10 +145,18 @@ Khoj: "{inferred_queries if ("text-to-image" in intent_type) else chat_response}
145
145
  )
146
146
 
147
147
 
148
- # Format user and system messages to chatml format
149
- def construct_structured_message(message, image_url, model_type, vision_enabled):
150
- if image_url and vision_enabled and model_type == ChatModelOptions.ModelType.OPENAI:
151
- return [{"type": "text", "text": message}, {"type": "image_url", "image_url": {"url": image_url}}]
148
+ def construct_structured_message(message: str, images: list[str], model_type: str, vision_enabled: bool):
149
+ """
150
+ Format messages into appropriate multimedia format for supported chat model types
151
+ """
152
+ if not images or not vision_enabled:
153
+ return message
154
+
155
+ if model_type in [ChatModelOptions.ModelType.OPENAI, ChatModelOptions.ModelType.GOOGLE]:
156
+ return [
157
+ {"type": "text", "text": message},
158
+ *[{"type": "image_url", "image_url": {"url": image}} for image in images],
159
+ ]
152
160
  return message
153
161
 
154
162
 
@@ -160,7 +168,7 @@ def generate_chatml_messages_with_context(
160
168
  loaded_model: Optional[Llama] = None,
161
169
  max_prompt_size=None,
162
170
  tokenizer_name=None,
163
- uploaded_image_url=None,
171
+ query_images=None,
164
172
  vision_enabled=False,
165
173
  model_type="",
166
174
  ):
@@ -181,11 +189,12 @@ def generate_chatml_messages_with_context(
181
189
  message_notes = f'\n\n Notes:\n{chat.get("context")}' if chat.get("context") else "\n"
182
190
  role = "user" if chat["by"] == "you" else "assistant"
183
191
 
184
- message_content = chat["message"] + message_notes
192
+ if chat["by"] == "khoj" and "excalidraw" in chat["intent"].get("type"):
193
+ message_content = chat.get("intent").get("inferred-queries")[0] + message_notes
194
+ else:
195
+ message_content = chat["message"] + message_notes
185
196
 
186
- message_content = construct_structured_message(
187
- message_content, chat.get("uploadedImageData"), model_type, vision_enabled
188
- )
197
+ message_content = construct_structured_message(message_content, chat.get("images"), model_type, vision_enabled)
189
198
 
190
199
  reconstructed_message = ChatMessage(content=message_content, role=role)
191
200
 
@@ -198,7 +207,7 @@ def generate_chatml_messages_with_context(
198
207
  if not is_none_or_empty(user_message):
199
208
  messages.append(
200
209
  ChatMessage(
201
- content=construct_structured_message(user_message, uploaded_image_url, model_type, vision_enabled),
210
+ content=construct_structured_message(user_message, query_images, model_type, vision_enabled),
202
211
  role="user",
203
212
  )
204
213
  )
@@ -222,7 +231,6 @@ def truncate_messages(
222
231
  tokenizer_name=None,
223
232
  ) -> list[ChatMessage]:
224
233
  """Truncate messages to fit within max prompt size supported by model"""
225
-
226
234
  default_tokenizer = "gpt-4o"
227
235
 
228
236
  try:
@@ -252,6 +260,7 @@ def truncate_messages(
252
260
  system_message = messages.pop(idx)
253
261
  break
254
262
 
263
+ # TODO: Handle truncation of multi-part message.content, i.e when message.content is a list[dict] rather than a string
255
264
  system_message_tokens = (
256
265
  len(encoder.encode(system_message.content)) if system_message and type(system_message.content) == str else 0
257
266
  )
@@ -26,7 +26,7 @@ async def text_to_image(
26
26
  references: List[Dict[str, Any]],
27
27
  online_results: Dict[str, Any],
28
28
  send_status_func: Optional[Callable] = None,
29
- uploaded_image_url: Optional[str] = None,
29
+ query_images: Optional[List[str]] = None,
30
30
  agent: Agent = None,
31
31
  ):
32
32
  status_code = 200
@@ -65,7 +65,7 @@ async def text_to_image(
65
65
  note_references=references,
66
66
  online_results=online_results,
67
67
  model_type=text_to_image_config.model_type,
68
- uploaded_image_url=uploaded_image_url,
68
+ query_images=query_images,
69
69
  user=user,
70
70
  agent=agent,
71
71
  )
@@ -87,18 +87,18 @@ async def text_to_image(
87
87
  if "content_policy_violation" in e.message:
88
88
  logger.error(f"Image Generation blocked by OpenAI: {e}")
89
89
  status_code = e.status_code # type: ignore
90
- message = f"Image generation blocked by OpenAI: {e.message}" # type: ignore
90
+ message = f"Image generation blocked by OpenAI due to policy violation" # type: ignore
91
91
  yield image_url or image, status_code, message, intent_type.value
92
92
  return
93
93
  else:
94
94
  logger.error(f"Image Generation failed with {e}", exc_info=True)
95
- message = f"Image generation failed with OpenAI error: {e.message}" # type: ignore
95
+ message = f"Image generation failed using OpenAI" # type: ignore
96
96
  status_code = e.status_code # type: ignore
97
97
  yield image_url or image, status_code, message, intent_type.value
98
98
  return
99
99
  except requests.RequestException as e:
100
100
  logger.error(f"Image Generation failed with {e}", exc_info=True)
101
- message = f"Image generation using {text2image_model} via {text_to_image_config.model_type} failed with error: {e}"
101
+ message = f"Image generation using {text2image_model} via {text_to_image_config.model_type} failed due to a network error."
102
102
  status_code = 502
103
103
  yield image_url or image, status_code, message, intent_type.value
104
104
  return
@@ -62,7 +62,7 @@ async def search_online(
62
62
  user: KhojUser,
63
63
  send_status_func: Optional[Callable] = None,
64
64
  custom_filters: List[str] = [],
65
- uploaded_image_url: str = None,
65
+ query_images: List[str] = None,
66
66
  agent: Agent = None,
67
67
  ):
68
68
  query += " ".join(custom_filters)
@@ -73,7 +73,7 @@ async def search_online(
73
73
 
74
74
  # Breakdown the query into subqueries to get the correct answer
75
75
  subqueries = await generate_online_subqueries(
76
- query, conversation_history, location, user, uploaded_image_url=uploaded_image_url, agent=agent
76
+ query, conversation_history, location, user, query_images=query_images, agent=agent
77
77
  )
78
78
  response_dict = {}
79
79
 
@@ -151,7 +151,7 @@ async def read_webpages(
151
151
  location: LocationData,
152
152
  user: KhojUser,
153
153
  send_status_func: Optional[Callable] = None,
154
- uploaded_image_url: str = None,
154
+ query_images: List[str] = None,
155
155
  agent: Agent = None,
156
156
  ):
157
157
  "Infer web pages to read from the query and extract relevant information from them"
@@ -159,7 +159,7 @@ async def read_webpages(
159
159
  if send_status_func:
160
160
  async for event in send_status_func(f"**Inferring web pages to read**"):
161
161
  yield {ChatEvent.STATUS: event}
162
- urls = await infer_webpage_urls(query, conversation_history, location, user, uploaded_image_url)
162
+ urls = await infer_webpage_urls(query, conversation_history, location, user, query_images)
163
163
 
164
164
  logger.info(f"Reading web pages at: {urls}")
165
165
  if send_status_func:
khoj/routers/api.py CHANGED
@@ -21,6 +21,7 @@ from starlette.authentication import has_required_scope, requires
21
21
  from khoj.configure import initialize_content
22
22
  from khoj.database import adapters
23
23
  from khoj.database.adapters import (
24
+ AgentAdapters,
24
25
  AutomationAdapters,
25
26
  ConversationAdapters,
26
27
  EntryAdapters,
@@ -114,10 +115,16 @@ async def execute_search(
114
115
  dedupe: Optional[bool] = True,
115
116
  agent: Optional[Agent] = None,
116
117
  ):
117
- start_time = time.time()
118
-
119
118
  # Run validation checks
120
119
  results: List[SearchResponse] = []
120
+
121
+ start_time = time.time()
122
+
123
+ # Ensure the agent, if present, is accessible by the user
124
+ if user and agent and not await AgentAdapters.ais_agent_accessible(agent, user):
125
+ logger.error(f"Agent {agent.slug} is not accessible by user {user}")
126
+ return results
127
+
121
128
  if q is None or q == "":
122
129
  logger.warning(f"No query param (q) passed in API call to initiate search")
123
130
  return results
@@ -340,7 +347,7 @@ async def extract_references_and_questions(
340
347
  conversation_commands: List[ConversationCommand] = [ConversationCommand.Default],
341
348
  location_data: LocationData = None,
342
349
  send_status_func: Optional[Callable] = None,
343
- uploaded_image_url: Optional[str] = None,
350
+ query_images: Optional[List[str]] = None,
344
351
  agent: Agent = None,
345
352
  ):
346
353
  user = request.user.object if request.user.is_authenticated else None
@@ -431,7 +438,7 @@ async def extract_references_and_questions(
431
438
  conversation_log=meta_log,
432
439
  location_data=location_data,
433
440
  user=user,
434
- uploaded_image_url=uploaded_image_url,
441
+ query_images=query_images,
435
442
  vision_enabled=vision_enabled,
436
443
  personality_context=personality_context,
437
444
  )
@@ -452,12 +459,14 @@ async def extract_references_and_questions(
452
459
  chat_model = conversation_config.chat_model
453
460
  inferred_queries = extract_questions_gemini(
454
461
  defiltered_query,
462
+ query_images=query_images,
455
463
  model=chat_model,
456
464
  api_key=api_key,
457
465
  conversation_log=meta_log,
458
466
  location_data=location_data,
459
467
  max_tokens=conversation_config.max_prompt_size,
460
468
  user=user,
469
+ vision_enabled=vision_enabled,
461
470
  personality_context=personality_context,
462
471
  )
463
472