khoj 1.26.4.dev2__py3-none-any.whl → 1.26.5.dev16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. khoj/interface/compiled/404/index.html +1 -1
  2. khoj/interface/compiled/_next/static/chunks/1210.132a7e1910006bbb.js +1 -0
  3. khoj/interface/compiled/_next/static/chunks/1459.690bf20e7d7b7090.js +1 -0
  4. khoj/interface/compiled/_next/static/chunks/1603-bfc0b26e32ad88e3.js +1 -0
  5. khoj/interface/compiled/_next/static/chunks/2697-37579bcc7593dd5c.js +1 -0
  6. khoj/interface/compiled/_next/static/chunks/3423-aad88d6c1f029135.js +1 -0
  7. khoj/interface/compiled/_next/static/chunks/6327-18f0b45cc5a13afb.js +3 -0
  8. khoj/interface/compiled/_next/static/chunks/{4086-2c74808ba38a5a0f.js → 8840-b8d7b9f0923c6651.js} +1 -1
  9. khoj/interface/compiled/_next/static/chunks/9417-759984ad62caa3dc.js +1 -0
  10. khoj/interface/compiled/_next/static/chunks/9479-fcce773453e472c4.js +1 -0
  11. khoj/interface/compiled/_next/static/chunks/94ca1967.5584df65931cfe83.js +1 -0
  12. khoj/interface/compiled/_next/static/chunks/964ecbae.ea4eab2a3a835ffe.js +1 -0
  13. khoj/interface/compiled/_next/static/chunks/app/agents/{page-88aa3042711107b7.js → page-997bf85681256672.js} +1 -1
  14. khoj/interface/compiled/_next/static/chunks/app/automations/{page-5480731341f34450.js → page-1688dead2f21270d.js} +1 -1
  15. khoj/interface/compiled/_next/static/chunks/app/chat/page-027e61d082025c50.js +1 -0
  16. khoj/interface/compiled/_next/static/chunks/app/factchecker/{page-e7b34316ec6f44de.js → page-f544113d240423e9.js} +1 -1
  17. khoj/interface/compiled/_next/static/chunks/app/{page-10a5aad6e04f3cf8.js → page-88139ac728fe3533.js} +1 -1
  18. khoj/interface/compiled/_next/static/chunks/app/search/{page-d56541c746fded7d.js → page-3ada11cda5050eeb.js} +1 -1
  19. khoj/interface/compiled/_next/static/chunks/app/settings/{page-e044a999468a7c5d.js → page-fa11cafaec7ab39f.js} +1 -1
  20. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-511186e77624eaec.js +1 -0
  21. khoj/interface/compiled/_next/static/chunks/webpack-c48e5093123a4a56.js +1 -0
  22. khoj/interface/compiled/_next/static/css/{c808691c459e3887.css → 3cf13271869a4aeb.css} +1 -1
  23. khoj/interface/compiled/_next/static/css/4cae6c0e5c72fb2d.css +1 -0
  24. khoj/interface/compiled/_next/static/css/825406e5ebee86d3.css +25 -0
  25. khoj/interface/compiled/_next/static/css/f84cf008d5ff4161.css +1 -0
  26. khoj/interface/compiled/agents/index.html +1 -1
  27. khoj/interface/compiled/agents/index.txt +2 -2
  28. khoj/interface/compiled/automations/index.html +1 -1
  29. khoj/interface/compiled/automations/index.txt +2 -2
  30. khoj/interface/compiled/chat/index.html +1 -1
  31. khoj/interface/compiled/chat/index.txt +2 -2
  32. khoj/interface/compiled/factchecker/index.html +1 -1
  33. khoj/interface/compiled/factchecker/index.txt +2 -2
  34. khoj/interface/compiled/index.html +1 -1
  35. khoj/interface/compiled/index.txt +2 -2
  36. khoj/interface/compiled/search/index.html +1 -1
  37. khoj/interface/compiled/search/index.txt +2 -2
  38. khoj/interface/compiled/settings/index.html +1 -1
  39. khoj/interface/compiled/settings/index.txt +2 -2
  40. khoj/interface/compiled/share/chat/index.html +1 -1
  41. khoj/interface/compiled/share/chat/index.txt +2 -2
  42. khoj/processor/conversation/google/gemini_chat.py +28 -13
  43. khoj/processor/conversation/google/utils.py +34 -12
  44. khoj/processor/conversation/openai/gpt.py +4 -4
  45. khoj/processor/conversation/prompts.py +144 -0
  46. khoj/processor/conversation/utils.py +22 -13
  47. khoj/processor/image/generate.py +5 -5
  48. khoj/processor/tools/online_search.py +4 -4
  49. khoj/routers/api.py +4 -2
  50. khoj/routers/api_chat.py +85 -46
  51. khoj/routers/helpers.py +225 -29
  52. khoj/routers/web_client.py +0 -11
  53. khoj/utils/helpers.py +7 -3
  54. {khoj-1.26.4.dev2.dist-info → khoj-1.26.5.dev16.dist-info}/METADATA +1 -1
  55. {khoj-1.26.4.dev2.dist-info → khoj-1.26.5.dev16.dist-info}/RECORD +60 -56
  56. khoj/interface/compiled/_next/static/chunks/1603-fa3ee48860b9dc5c.js +0 -1
  57. khoj/interface/compiled/_next/static/chunks/2697-a38d01981ad3bdf8.js +0 -1
  58. khoj/interface/compiled/_next/static/chunks/4051-2cf66369d6ca0f1d.js +0 -3
  59. khoj/interface/compiled/_next/static/chunks/9178-899fe9a6b754ecfe.js +0 -1
  60. khoj/interface/compiled/_next/static/chunks/9417-46ed3aaa639c85ef.js +0 -1
  61. khoj/interface/compiled/_next/static/chunks/9479-ea776e73f549090c.js +0 -1
  62. khoj/interface/compiled/_next/static/chunks/app/chat/page-702057ccbcf27881.js +0 -1
  63. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-fbbd66a4d4633438.js +0 -1
  64. khoj/interface/compiled/_next/static/chunks/webpack-2651a68f46ac3cb7.js +0 -1
  65. khoj/interface/compiled/_next/static/css/2de69f0be774c768.css +0 -1
  66. khoj/interface/compiled/_next/static/css/3e1f1fdd70775091.css +0 -1
  67. khoj/interface/compiled/_next/static/css/b9a6bf04305d98d7.css +0 -25
  68. /khoj/interface/compiled/_next/static/{wyjqS7cuSX-u62BTNYqhU → 1dVlkdPTTw5b6saGVG4b3}/_buildManifest.js +0 -0
  69. /khoj/interface/compiled/_next/static/{wyjqS7cuSX-u62BTNYqhU → 1dVlkdPTTw5b6saGVG4b3}/_ssgManifest.js +0 -0
  70. {khoj-1.26.4.dev2.dist-info → khoj-1.26.5.dev16.dist-info}/WHEEL +0 -0
  71. {khoj-1.26.4.dev2.dist-info → khoj-1.26.5.dev16.dist-info}/entry_points.txt +0 -0
  72. {khoj-1.26.4.dev2.dist-info → khoj-1.26.5.dev16.dist-info}/licenses/LICENSE +0 -0
@@ -176,6 +176,150 @@ Improved Prompt:
176
176
  """.strip()
177
177
  )
178
178
 
179
+ ## Diagram Generation
180
+ ## --
181
+
182
+ improve_diagram_description_prompt = PromptTemplate.from_template(
183
+ """
184
+ you are an architect working with a novice artist using a diagramming tool.
185
+ {personality_context}
186
+
187
+ you need to convert the user's query to a description format that the novice artist can use very well. you are allowed to use primitives like
188
+ - text
189
+ - rectangle
190
+ - diamond
191
+ - ellipse
192
+ - line
193
+ - arrow
194
+ - frame
195
+
196
+ use these primitives to describe what sort of diagram the drawer should create. the artist must recreate the diagram every time, so include all relevant prior information in your description.
197
+
198
+ use simple, concise language.
199
+
200
+ Today's Date: {current_date}
201
+ User's Location: {location}
202
+
203
+ User's Notes:
204
+ {references}
205
+
206
+ Online References:
207
+ {online_results}
208
+
209
+ Conversation Log:
210
+ {chat_history}
211
+
212
+ Query: {query}
213
+
214
+
215
+ """.strip()
216
+ )
217
+
218
+ excalidraw_diagram_generation_prompt = PromptTemplate.from_template(
219
+ """
220
+ You are a program manager with the ability to describe diagrams to compose in professional, fine detail.
221
+ {personality_context}
222
+
223
+ You need to create a declarative description of the diagram and relevant components, using this base schema. Use the `label` property to specify the text to be rendered in the respective elements. Always use light colors for the `backgroundColor` property, like white, or light blue, green, red. "type", "x", "y", "id", are required properties for all elements.
224
+
225
+ {{
226
+ type: string,
227
+ x: number,
228
+ y: number,
229
+ strokeColor: string,
230
+ backgroundColor: string,
231
+ width: number,
232
+ height: number,
233
+ id: string,
234
+ label: {{
235
+ text: string,
236
+ }}
237
+ }}
238
+
239
+ Valid types:
240
+ - text
241
+ - rectangle
242
+ - diamond
243
+ - ellipse
244
+ - line
245
+ - arrow
246
+
247
+ For arrows and lines, you can use the `points` property to specify the start and end points of the arrow. You may also use the `label` property to specify the text to be rendered. You may use the `start` and `end` properties to connect the linear elements to other elements. The start and end point can either be the ID to map to an existing object, or the `type` to create a new object. Mapping to an existing object is useful if you want to connect it to multiple objects. Lines and arrows can only start and end at rectangle, text, diamond, or ellipse elements.
248
+
249
+ {{
250
+ type: "arrow",
251
+ id: string,
252
+ x: number,
253
+ y: number,
254
+ width: number,
255
+ height: number,
256
+ strokeColor: string,
257
+ start: {{
258
+ id: string,
259
+ type: string,
260
+ }},
261
+ end: {{
262
+ id: string,
263
+ type: string,
264
+ }},
265
+ label: {{
266
+ text: string,
267
+ }}
268
+ points: [
269
+ [number, number],
270
+ [number, number],
271
+ ]
272
+ }}
273
+
274
+ For text, you must use the `text` property to specify the text to be rendered. You may also use `fontSize` property to specify the font size of the text. Only use the `text` element for titles, subtitles, and overviews. For labels, use the `label` property in the respective elements.
275
+
276
+ {{
277
+ type: "text",
278
+ id: string,
279
+ x: number,
280
+ y: number,
281
+ fontSize: number,
282
+ text: string,
283
+ }}
284
+
285
+ For frames, use the `children` property to specify the elements that are inside the frame by their ids.
286
+
287
+ {{
288
+ type: "frame",
289
+ id: string,
290
+ x: number,
291
+ y: number,
292
+ width: number,
293
+ height: number,
294
+ name: string,
295
+ children: [
296
+ string
297
+ ]
298
+ }}
299
+
300
+ Here's an example of a valid diagram:
301
+
302
+ Design Description: Create a diagram describing a circular development process with 3 stages: design, implementation and feedback. The design stage is connected to the implementation stage and the implementation stage is connected to the feedback stage and the feedback stage is connected to the design stage. Each stage should be labeled with the stage name.
303
+
304
+ Response:
305
+
306
+ [
307
+ {{"type":"text","x":-150,"y":50,"width":300,"height":40,"id":"title_text","text":"Circular Development Process","fontSize":24}},
308
+ {{"type":"ellipse","x":-169,"y":113,"width":188,"height":202,"id":"design_ellipse", "label": {{"text": "Design"}}}},
309
+ {{"type":"ellipse","x":62,"y":394,"width":186,"height":188,"id":"implement_ellipse", "label": {{"text": "Implement"}}}},
310
+ {{"type":"ellipse","x":-348,"y":430,"width":184,"height":170,"id":"feedback_ellipse", "label": {{"text": "Feedback"}}}},
311
+ {{"type":"arrow","x":21,"y":273,"id":"design_to_implement_arrow","points":[[0,0],[86,105]],"start":{{"id":"design_ellipse"}}, "end":{{"id":"implement_ellipse"}}}},
312
+ {{"type":"arrow","x":50,"y":519,"id":"implement_to_feedback_arrow","points":[[0,0],[-198,-6]],"start":{{"id":"implement_ellipse"}}, "end":{{"id":"feedback_ellipse"}}}},
313
+ {{"type":"arrow","x":-228,"y":417,"id":"feedback_to_design_arrow","points":[[0,0],[85,-123]],"start":{{"id":"feedback_ellipse"}}, "end":{{"id":"design_ellipse"}}}},
314
+ ]
315
+
316
+ Create a detailed diagram from the provided context and user prompt below. Return a valid JSON object:
317
+
318
+ Diagram Description: {query}
319
+
320
+ """.strip()
321
+ )
322
+
179
323
  ## Online Search Conversation
180
324
  ## --
181
325
  online_search_conversation = PromptTemplate.from_template(
@@ -109,7 +109,7 @@ def save_to_conversation_log(
109
109
  client_application: ClientApplication = None,
110
110
  conversation_id: str = None,
111
111
  automation_id: str = None,
112
- uploaded_image_url: str = None,
112
+ query_images: List[str] = None,
113
113
  ):
114
114
  user_message_time = user_message_time or datetime.now().strftime("%Y-%m-%d %H:%M:%S")
115
115
  updated_conversation = message_to_log(
@@ -117,7 +117,7 @@ def save_to_conversation_log(
117
117
  chat_response=chat_response,
118
118
  user_message_metadata={
119
119
  "created": user_message_time,
120
- "uploadedImageData": uploaded_image_url,
120
+ "images": query_images,
121
121
  },
122
122
  khoj_message_metadata={
123
123
  "context": compiled_references,
@@ -145,10 +145,18 @@ Khoj: "{inferred_queries if ("text-to-image" in intent_type) else chat_response}
145
145
  )
146
146
 
147
147
 
148
- # Format user and system messages to chatml format
149
- def construct_structured_message(message, image_url, model_type, vision_enabled):
150
- if image_url and vision_enabled and model_type == ChatModelOptions.ModelType.OPENAI:
151
- return [{"type": "text", "text": message}, {"type": "image_url", "image_url": {"url": image_url}}]
148
+ def construct_structured_message(message: str, images: list[str], model_type: str, vision_enabled: bool):
149
+ """
150
+ Format messages into appropriate multimedia format for supported chat model types
151
+ """
152
+ if not images or not vision_enabled:
153
+ return message
154
+
155
+ if model_type in [ChatModelOptions.ModelType.OPENAI, ChatModelOptions.ModelType.GOOGLE]:
156
+ return [
157
+ {"type": "text", "text": message},
158
+ *[{"type": "image_url", "image_url": {"url": image}} for image in images],
159
+ ]
152
160
  return message
153
161
 
154
162
 
@@ -160,7 +168,7 @@ def generate_chatml_messages_with_context(
160
168
  loaded_model: Optional[Llama] = None,
161
169
  max_prompt_size=None,
162
170
  tokenizer_name=None,
163
- uploaded_image_url=None,
171
+ query_images=None,
164
172
  vision_enabled=False,
165
173
  model_type="",
166
174
  ):
@@ -181,11 +189,12 @@ def generate_chatml_messages_with_context(
181
189
  message_notes = f'\n\n Notes:\n{chat.get("context")}' if chat.get("context") else "\n"
182
190
  role = "user" if chat["by"] == "you" else "assistant"
183
191
 
184
- message_content = chat["message"] + message_notes
192
+ if chat["by"] == "khoj" and "excalidraw" in chat["intent"].get("type"):
193
+ message_content = chat.get("intent").get("inferred-queries")[0] + message_notes
194
+ else:
195
+ message_content = chat["message"] + message_notes
185
196
 
186
- message_content = construct_structured_message(
187
- message_content, chat.get("uploadedImageData"), model_type, vision_enabled
188
- )
197
+ message_content = construct_structured_message(message_content, chat.get("images"), model_type, vision_enabled)
189
198
 
190
199
  reconstructed_message = ChatMessage(content=message_content, role=role)
191
200
 
@@ -198,7 +207,7 @@ def generate_chatml_messages_with_context(
198
207
  if not is_none_or_empty(user_message):
199
208
  messages.append(
200
209
  ChatMessage(
201
- content=construct_structured_message(user_message, uploaded_image_url, model_type, vision_enabled),
210
+ content=construct_structured_message(user_message, query_images, model_type, vision_enabled),
202
211
  role="user",
203
212
  )
204
213
  )
@@ -222,7 +231,6 @@ def truncate_messages(
222
231
  tokenizer_name=None,
223
232
  ) -> list[ChatMessage]:
224
233
  """Truncate messages to fit within max prompt size supported by model"""
225
-
226
234
  default_tokenizer = "gpt-4o"
227
235
 
228
236
  try:
@@ -252,6 +260,7 @@ def truncate_messages(
252
260
  system_message = messages.pop(idx)
253
261
  break
254
262
 
263
+ # TODO: Handle truncation of multi-part message.content, i.e when message.content is a list[dict] rather than a string
255
264
  system_message_tokens = (
256
265
  len(encoder.encode(system_message.content)) if system_message and type(system_message.content) == str else 0
257
266
  )
@@ -26,7 +26,7 @@ async def text_to_image(
26
26
  references: List[Dict[str, Any]],
27
27
  online_results: Dict[str, Any],
28
28
  send_status_func: Optional[Callable] = None,
29
- uploaded_image_url: Optional[str] = None,
29
+ query_images: Optional[List[str]] = None,
30
30
  agent: Agent = None,
31
31
  ):
32
32
  status_code = 200
@@ -65,7 +65,7 @@ async def text_to_image(
65
65
  note_references=references,
66
66
  online_results=online_results,
67
67
  model_type=text_to_image_config.model_type,
68
- uploaded_image_url=uploaded_image_url,
68
+ query_images=query_images,
69
69
  user=user,
70
70
  agent=agent,
71
71
  )
@@ -87,18 +87,18 @@ async def text_to_image(
87
87
  if "content_policy_violation" in e.message:
88
88
  logger.error(f"Image Generation blocked by OpenAI: {e}")
89
89
  status_code = e.status_code # type: ignore
90
- message = f"Image generation blocked by OpenAI: {e.message}" # type: ignore
90
+ message = f"Image generation blocked by OpenAI due to policy violation" # type: ignore
91
91
  yield image_url or image, status_code, message, intent_type.value
92
92
  return
93
93
  else:
94
94
  logger.error(f"Image Generation failed with {e}", exc_info=True)
95
- message = f"Image generation failed with OpenAI error: {e.message}" # type: ignore
95
+ message = f"Image generation failed using OpenAI" # type: ignore
96
96
  status_code = e.status_code # type: ignore
97
97
  yield image_url or image, status_code, message, intent_type.value
98
98
  return
99
99
  except requests.RequestException as e:
100
100
  logger.error(f"Image Generation failed with {e}", exc_info=True)
101
- message = f"Image generation using {text2image_model} via {text_to_image_config.model_type} failed with error: {e}"
101
+ message = f"Image generation using {text2image_model} via {text_to_image_config.model_type} failed due to a network error."
102
102
  status_code = 502
103
103
  yield image_url or image, status_code, message, intent_type.value
104
104
  return
@@ -62,7 +62,7 @@ async def search_online(
62
62
  user: KhojUser,
63
63
  send_status_func: Optional[Callable] = None,
64
64
  custom_filters: List[str] = [],
65
- uploaded_image_url: str = None,
65
+ query_images: List[str] = None,
66
66
  agent: Agent = None,
67
67
  ):
68
68
  query += " ".join(custom_filters)
@@ -73,7 +73,7 @@ async def search_online(
73
73
 
74
74
  # Breakdown the query into subqueries to get the correct answer
75
75
  subqueries = await generate_online_subqueries(
76
- query, conversation_history, location, user, uploaded_image_url=uploaded_image_url, agent=agent
76
+ query, conversation_history, location, user, query_images=query_images, agent=agent
77
77
  )
78
78
  response_dict = {}
79
79
 
@@ -151,7 +151,7 @@ async def read_webpages(
151
151
  location: LocationData,
152
152
  user: KhojUser,
153
153
  send_status_func: Optional[Callable] = None,
154
- uploaded_image_url: str = None,
154
+ query_images: List[str] = None,
155
155
  agent: Agent = None,
156
156
  ):
157
157
  "Infer web pages to read from the query and extract relevant information from them"
@@ -159,7 +159,7 @@ async def read_webpages(
159
159
  if send_status_func:
160
160
  async for event in send_status_func(f"**Inferring web pages to read**"):
161
161
  yield {ChatEvent.STATUS: event}
162
- urls = await infer_webpage_urls(query, conversation_history, location, user, uploaded_image_url)
162
+ urls = await infer_webpage_urls(query, conversation_history, location, user, query_images)
163
163
 
164
164
  logger.info(f"Reading web pages at: {urls}")
165
165
  if send_status_func:
khoj/routers/api.py CHANGED
@@ -347,7 +347,7 @@ async def extract_references_and_questions(
347
347
  conversation_commands: List[ConversationCommand] = [ConversationCommand.Default],
348
348
  location_data: LocationData = None,
349
349
  send_status_func: Optional[Callable] = None,
350
- uploaded_image_url: Optional[str] = None,
350
+ query_images: Optional[List[str]] = None,
351
351
  agent: Agent = None,
352
352
  ):
353
353
  user = request.user.object if request.user.is_authenticated else None
@@ -438,7 +438,7 @@ async def extract_references_and_questions(
438
438
  conversation_log=meta_log,
439
439
  location_data=location_data,
440
440
  user=user,
441
- uploaded_image_url=uploaded_image_url,
441
+ query_images=query_images,
442
442
  vision_enabled=vision_enabled,
443
443
  personality_context=personality_context,
444
444
  )
@@ -459,12 +459,14 @@ async def extract_references_and_questions(
459
459
  chat_model = conversation_config.chat_model
460
460
  inferred_queries = extract_questions_gemini(
461
461
  defiltered_query,
462
+ query_images=query_images,
462
463
  model=chat_model,
463
464
  api_key=api_key,
464
465
  conversation_log=meta_log,
465
466
  location_data=location_data,
466
467
  max_tokens=conversation_config.max_prompt_size,
467
468
  user=user,
469
+ vision_enabled=vision_enabled,
468
470
  personality_context=personality_context,
469
471
  )
470
472
 
khoj/routers/api_chat.py CHANGED
@@ -30,8 +30,10 @@ from khoj.processor.speech.text_to_speech import generate_text_to_speech
30
30
  from khoj.processor.tools.online_search import read_webpages, search_online
31
31
  from khoj.routers.api import extract_references_and_questions
32
32
  from khoj.routers.helpers import (
33
+ ApiImageRateLimiter,
33
34
  ApiUserRateLimiter,
34
35
  ChatEvent,
36
+ ChatRequestBody,
35
37
  CommonQueryParams,
36
38
  ConversationCommandRateLimiter,
37
39
  agenerate_chat_response,
@@ -40,6 +42,7 @@ from khoj.routers.helpers import (
40
42
  construct_automation_created_message,
41
43
  create_automation,
42
44
  extract_relevant_summary,
45
+ generate_excalidraw_diagram,
43
46
  get_conversation_command,
44
47
  is_query_empty,
45
48
  is_ready_to_chat,
@@ -523,22 +526,6 @@ async def set_conversation_title(
523
526
  )
524
527
 
525
528
 
526
- class ChatRequestBody(BaseModel):
527
- q: str
528
- n: Optional[int] = 7
529
- d: Optional[float] = None
530
- stream: Optional[bool] = False
531
- title: Optional[str] = None
532
- conversation_id: Optional[str] = None
533
- city: Optional[str] = None
534
- region: Optional[str] = None
535
- country: Optional[str] = None
536
- country_code: Optional[str] = None
537
- timezone: Optional[str] = None
538
- image: Optional[str] = None
539
- create_new: Optional[bool] = False
540
-
541
-
542
529
  @api_chat.post("")
543
530
  @requires(["authenticated"])
544
531
  async def chat(
@@ -551,6 +538,7 @@ async def chat(
551
538
  rate_limiter_per_day=Depends(
552
539
  ApiUserRateLimiter(requests=600, subscribed_requests=6000, window=60 * 60 * 24, slug="chat_day")
553
540
  ),
541
+ image_rate_limiter=Depends(ApiImageRateLimiter(max_images=10, max_combined_size_mb=20)),
554
542
  ):
555
543
  # Access the parameters from the body
556
544
  q = body.q
@@ -564,9 +552,9 @@ async def chat(
564
552
  country = body.country or get_country_name_from_timezone(body.timezone)
565
553
  country_code = body.country_code or get_country_code_from_timezone(body.timezone)
566
554
  timezone = body.timezone
567
- image = body.image
555
+ raw_images = body.images
568
556
 
569
- async def event_generator(q: str, image: str):
557
+ async def event_generator(q: str, images: list[str]):
570
558
  start_time = time.perf_counter()
571
559
  ttft = None
572
560
  chat_metadata: dict = {}
@@ -576,16 +564,16 @@ async def chat(
576
564
  q = unquote(q)
577
565
  nonlocal conversation_id
578
566
 
579
- uploaded_image_url = None
580
- if image:
581
- decoded_string = unquote(image)
582
- base64_data = decoded_string.split(",", 1)[1]
583
- image_bytes = base64.b64decode(base64_data)
584
- webp_image_bytes = convert_image_to_webp(image_bytes)
585
- try:
586
- uploaded_image_url = upload_image_to_bucket(webp_image_bytes, request.user.object.id)
587
- except:
588
- uploaded_image_url = None
567
+ uploaded_images: list[str] = []
568
+ if images:
569
+ for image in images:
570
+ decoded_string = unquote(image)
571
+ base64_data = decoded_string.split(",", 1)[1]
572
+ image_bytes = base64.b64decode(base64_data)
573
+ webp_image_bytes = convert_image_to_webp(image_bytes)
574
+ uploaded_image = upload_image_to_bucket(webp_image_bytes, request.user.object.id)
575
+ if uploaded_image:
576
+ uploaded_images.append(uploaded_image)
589
577
 
590
578
  async def send_event(event_type: ChatEvent, data: str | dict):
591
579
  nonlocal connection_alive, ttft
@@ -692,7 +680,7 @@ async def chat(
692
680
  meta_log,
693
681
  is_automated_task,
694
682
  user=user,
695
- uploaded_image_url=uploaded_image_url,
683
+ query_images=uploaded_images,
696
684
  agent=agent,
697
685
  )
698
686
  conversation_commands_str = ", ".join([cmd.value for cmd in conversation_commands])
@@ -701,7 +689,7 @@ async def chat(
701
689
  ):
702
690
  yield result
703
691
 
704
- mode = await aget_relevant_output_modes(q, meta_log, is_automated_task, user, uploaded_image_url, agent)
692
+ mode = await aget_relevant_output_modes(q, meta_log, is_automated_task, user, uploaded_images, agent)
705
693
  async for result in send_event(ChatEvent.STATUS, f"**Decided Response Mode:** {mode.value}"):
706
694
  yield result
707
695
  if mode not in conversation_commands:
@@ -764,7 +752,7 @@ async def chat(
764
752
  q,
765
753
  contextual_data,
766
754
  conversation_history=meta_log,
767
- uploaded_image_url=uploaded_image_url,
755
+ query_images=uploaded_images,
768
756
  user=user,
769
757
  agent=agent,
770
758
  )
@@ -785,7 +773,7 @@ async def chat(
785
773
  intent_type="summarize",
786
774
  client_application=request.user.client_app,
787
775
  conversation_id=conversation_id,
788
- uploaded_image_url=uploaded_image_url,
776
+ query_images=uploaded_images,
789
777
  )
790
778
  return
791
779
 
@@ -828,7 +816,7 @@ async def chat(
828
816
  conversation_id=conversation_id,
829
817
  inferred_queries=[query_to_run],
830
818
  automation_id=automation.id,
831
- uploaded_image_url=uploaded_image_url,
819
+ query_images=uploaded_images,
832
820
  )
833
821
  async for result in send_llm_response(llm_response):
834
822
  yield result
@@ -848,7 +836,7 @@ async def chat(
848
836
  conversation_commands,
849
837
  location,
850
838
  partial(send_event, ChatEvent.STATUS),
851
- uploaded_image_url=uploaded_image_url,
839
+ query_images=uploaded_images,
852
840
  agent=agent,
853
841
  ):
854
842
  if isinstance(result, dict) and ChatEvent.STATUS in result:
@@ -892,7 +880,7 @@ async def chat(
892
880
  user,
893
881
  partial(send_event, ChatEvent.STATUS),
894
882
  custom_filters,
895
- uploaded_image_url=uploaded_image_url,
883
+ query_images=uploaded_images,
896
884
  agent=agent,
897
885
  ):
898
886
  if isinstance(result, dict) and ChatEvent.STATUS in result:
@@ -916,7 +904,7 @@ async def chat(
916
904
  location,
917
905
  user,
918
906
  partial(send_event, ChatEvent.STATUS),
919
- uploaded_image_url=uploaded_image_url,
907
+ query_images=uploaded_images,
920
908
  agent=agent,
921
909
  ):
922
910
  if isinstance(result, dict) and ChatEvent.STATUS in result:
@@ -966,20 +954,20 @@ async def chat(
966
954
  references=compiled_references,
967
955
  online_results=online_results,
968
956
  send_status_func=partial(send_event, ChatEvent.STATUS),
969
- uploaded_image_url=uploaded_image_url,
957
+ query_images=uploaded_images,
970
958
  agent=agent,
971
959
  ):
972
960
  if isinstance(result, dict) and ChatEvent.STATUS in result:
973
961
  yield result[ChatEvent.STATUS]
974
962
  else:
975
- image, status_code, improved_image_prompt, intent_type = result
963
+ generated_image, status_code, improved_image_prompt, intent_type = result
976
964
 
977
- if image is None or status_code != 200:
965
+ if generated_image is None or status_code != 200:
978
966
  content_obj = {
979
967
  "content-type": "application/json",
980
968
  "intentType": intent_type,
981
969
  "detail": improved_image_prompt,
982
- "image": image,
970
+ "image": None,
983
971
  }
984
972
  async for result in send_llm_response(json.dumps(content_obj)):
985
973
  yield result
@@ -987,7 +975,7 @@ async def chat(
987
975
 
988
976
  await sync_to_async(save_to_conversation_log)(
989
977
  q,
990
- image,
978
+ generated_image,
991
979
  user,
992
980
  meta_log,
993
981
  user_message_time,
@@ -997,13 +985,64 @@ async def chat(
997
985
  conversation_id=conversation_id,
998
986
  compiled_references=compiled_references,
999
987
  online_results=online_results,
1000
- uploaded_image_url=uploaded_image_url,
988
+ query_images=uploaded_images,
1001
989
  )
1002
990
  content_obj = {
1003
991
  "intentType": intent_type,
1004
992
  "inferredQueries": [improved_image_prompt],
1005
- "image": image,
993
+ "image": generated_image,
994
+ }
995
+ async for result in send_llm_response(json.dumps(content_obj)):
996
+ yield result
997
+ return
998
+
999
+ if ConversationCommand.Diagram in conversation_commands:
1000
+ async for result in send_event(ChatEvent.STATUS, f"Creating diagram"):
1001
+ yield result
1002
+
1003
+ intent_type = "excalidraw"
1004
+ inferred_queries = []
1005
+ diagram_description = ""
1006
+
1007
+ async for result in generate_excalidraw_diagram(
1008
+ q=defiltered_query,
1009
+ conversation_history=meta_log,
1010
+ location_data=location,
1011
+ note_references=compiled_references,
1012
+ online_results=online_results,
1013
+ query_images=uploaded_images,
1014
+ user=user,
1015
+ agent=agent,
1016
+ send_status_func=partial(send_event, ChatEvent.STATUS),
1017
+ ):
1018
+ if isinstance(result, dict) and ChatEvent.STATUS in result:
1019
+ yield result[ChatEvent.STATUS]
1020
+ else:
1021
+ better_diagram_description_prompt, excalidraw_diagram_description = result
1022
+ inferred_queries.append(better_diagram_description_prompt)
1023
+ diagram_description = excalidraw_diagram_description
1024
+
1025
+ content_obj = {
1026
+ "intentType": intent_type,
1027
+ "inferredQueries": inferred_queries,
1028
+ "image": diagram_description,
1006
1029
  }
1030
+
1031
+ await sync_to_async(save_to_conversation_log)(
1032
+ q,
1033
+ excalidraw_diagram_description,
1034
+ user,
1035
+ meta_log,
1036
+ user_message_time,
1037
+ intent_type="excalidraw",
1038
+ inferred_queries=[better_diagram_description_prompt],
1039
+ client_application=request.user.client_app,
1040
+ conversation_id=conversation_id,
1041
+ compiled_references=compiled_references,
1042
+ online_results=online_results,
1043
+ query_images=uploaded_images,
1044
+ )
1045
+
1007
1046
  async for result in send_llm_response(json.dumps(content_obj)):
1008
1047
  yield result
1009
1048
  return
@@ -1024,7 +1063,7 @@ async def chat(
1024
1063
  conversation_id,
1025
1064
  location,
1026
1065
  user_name,
1027
- uploaded_image_url,
1066
+ uploaded_images,
1028
1067
  )
1029
1068
 
1030
1069
  # Send Response
@@ -1050,9 +1089,9 @@ async def chat(
1050
1089
 
1051
1090
  ## Stream Text Response
1052
1091
  if stream:
1053
- return StreamingResponse(event_generator(q, image=image), media_type="text/plain")
1092
+ return StreamingResponse(event_generator(q, images=raw_images), media_type="text/plain")
1054
1093
  ## Non-Streaming Text Response
1055
1094
  else:
1056
- response_iterator = event_generator(q, image=image)
1095
+ response_iterator = event_generator(q, images=raw_images)
1057
1096
  response_data = await read_chat_stream(response_iterator)
1058
1097
  return Response(content=json.dumps(response_data), media_type="application/json", status_code=200)