khoj 1.26.2__py3-none-any.whl → 1.26.5.dev34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/configure.py +3 -3
- khoj/database/adapters/__init__.py +40 -8
- khoj/database/migrations/0070_alter_agent_input_tools_alter_agent_output_modes.py +46 -0
- khoj/database/migrations/0071_subscription_enabled_trial_at_and_more.py +32 -0
- khoj/database/models/__init__.py +8 -3
- khoj/interface/compiled/404/index.html +1 -1
- khoj/interface/compiled/_next/static/chunks/1210.132a7e1910006bbb.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1279-f37ee4a388ebf544.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1459.690bf20e7d7b7090.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1603-b9d95833e0e025e8.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1970-1d6d0c1b00b4f343.js +1 -0
- khoj/interface/compiled/_next/static/chunks/2697-61fcba89fd87eab4.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3423-8e9c420574a9fbe3.js +1 -0
- khoj/interface/compiled/_next/static/chunks/394-6bcb8c429f168f21.js +3 -0
- khoj/interface/compiled/_next/static/chunks/4602-8eeb4b76385ad159.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5512-94c7c2bbcf58c19d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/7113-f2e114d7034a0835.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{4086-2c74808ba38a5a0f.js → 8840-b8d7b9f0923c6651.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/9417-759984ad62caa3dc.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9479-4b443fdcc99141c9.js +1 -0
- khoj/interface/compiled/_next/static/chunks/94ca1967.5584df65931cfe83.js +1 -0
- khoj/interface/compiled/_next/static/chunks/964ecbae.ea4eab2a3a835ffe.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/page-2beaba7c9bb750bd.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/{page-5480731341f34450.js → page-9b5c77e0b0dd772c.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-151232d8417a1ea1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/factchecker/{page-e7b34316ec6f44de.js → page-798904432c2417c4.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/{page-10a5aad6e04f3cf8.js → page-db4e38a5255af7ad.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/{page-d56541c746fded7d.js → page-ab2995529ece3140.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/page-3e9cf5ed5ace4310.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/page-6a01e07fb244c10c.js +1 -0
- khoj/interface/compiled/_next/static/chunks/webpack-313247d7eb764923.js +1 -0
- khoj/interface/compiled/_next/static/css/{c808691c459e3887.css → 3cf13271869a4aeb.css} +1 -1
- khoj/interface/compiled/_next/static/css/76d55eb435962b19.css +25 -0
- khoj/interface/compiled/_next/static/css/{3e1f1fdd70775091.css → 80bd6301fc657983.css} +1 -1
- khoj/interface/compiled/_next/static/css/{2de69f0be774c768.css → b70402177a7c3207.css} +1 -1
- khoj/interface/compiled/agents/index.html +1 -1
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +1 -1
- khoj/interface/compiled/automations/index.txt +2 -2
- khoj/interface/compiled/chat/index.html +1 -1
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/factchecker/index.html +1 -1
- khoj/interface/compiled/factchecker/index.txt +2 -2
- khoj/interface/compiled/index.html +1 -1
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +1 -1
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +1 -1
- khoj/interface/compiled/settings/index.txt +2 -2
- khoj/interface/compiled/share/chat/index.html +1 -1
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/google/gemini_chat.py +28 -13
- khoj/processor/conversation/google/utils.py +34 -12
- khoj/processor/conversation/openai/gpt.py +4 -4
- khoj/processor/conversation/prompts.py +144 -0
- khoj/processor/conversation/utils.py +22 -13
- khoj/processor/image/generate.py +5 -5
- khoj/processor/tools/online_search.py +4 -4
- khoj/routers/api.py +13 -4
- khoj/routers/api_agents.py +41 -20
- khoj/routers/api_chat.py +85 -46
- khoj/routers/{subscription.py → api_subscription.py} +21 -3
- khoj/routers/auth.py +2 -2
- khoj/routers/helpers.py +235 -30
- khoj/routers/web_client.py +0 -11
- khoj/utils/helpers.py +7 -3
- {khoj-1.26.2.dist-info → khoj-1.26.5.dev34.dist-info}/METADATA +2 -2
- {khoj-1.26.2.dist-info → khoj-1.26.5.dev34.dist-info}/RECORD +73 -66
- khoj/interface/compiled/_next/static/chunks/121-7024f479c297aef0.js +0 -1
- khoj/interface/compiled/_next/static/chunks/1603-fa3ee48860b9dc5c.js +0 -1
- khoj/interface/compiled/_next/static/chunks/2697-a38d01981ad3bdf8.js +0 -1
- khoj/interface/compiled/_next/static/chunks/4051-2cf66369d6ca0f1d.js +0 -3
- khoj/interface/compiled/_next/static/chunks/477-ec86e93db10571c1.js +0 -1
- khoj/interface/compiled/_next/static/chunks/51-e8f5bdb69b5ea421.js +0 -1
- khoj/interface/compiled/_next/static/chunks/7762-79f2205740622b5c.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9178-899fe9a6b754ecfe.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9417-46ed3aaa639c85ef.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9479-ea776e73f549090c.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/page-88aa3042711107b7.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-702057ccbcf27881.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/page-e044a999468a7c5d.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/page-fbbd66a4d4633438.js +0 -1
- khoj/interface/compiled/_next/static/chunks/webpack-64dc39af85cd2625.js +0 -1
- khoj/interface/compiled/_next/static/css/b9a6bf04305d98d7.css +0 -25
- /khoj/interface/compiled/_next/static/{eim4XajTfG4ub4ft5AEkJ → 7viHIza-WalEOzloM67l4}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{eim4XajTfG4ub4ft5AEkJ → 7viHIza-WalEOzloM67l4}/_ssgManifest.js +0 -0
- {khoj-1.26.2.dist-info → khoj-1.26.5.dev34.dist-info}/WHEEL +0 -0
- {khoj-1.26.2.dist-info → khoj-1.26.5.dev34.dist-info}/entry_points.txt +0 -0
- {khoj-1.26.2.dist-info → khoj-1.26.5.dev34.dist-info}/licenses/LICENSE +0 -0
@@ -6,14 +6,17 @@ from typing import Dict, Optional
|
|
6
6
|
|
7
7
|
from langchain.schema import ChatMessage
|
8
8
|
|
9
|
-
from khoj.database.models import Agent, KhojUser
|
9
|
+
from khoj.database.models import Agent, ChatModelOptions, KhojUser
|
10
10
|
from khoj.processor.conversation import prompts
|
11
11
|
from khoj.processor.conversation.google.utils import (
|
12
12
|
format_messages_for_gemini,
|
13
13
|
gemini_chat_completion_with_backoff,
|
14
14
|
gemini_completion_with_backoff,
|
15
15
|
)
|
16
|
-
from khoj.processor.conversation.utils import
|
16
|
+
from khoj.processor.conversation.utils import (
|
17
|
+
construct_structured_message,
|
18
|
+
generate_chatml_messages_with_context,
|
19
|
+
)
|
17
20
|
from khoj.utils.helpers import ConversationCommand, is_none_or_empty
|
18
21
|
from khoj.utils.rawconfig import LocationData
|
19
22
|
|
@@ -29,6 +32,8 @@ def extract_questions_gemini(
|
|
29
32
|
max_tokens=None,
|
30
33
|
location_data: LocationData = None,
|
31
34
|
user: KhojUser = None,
|
35
|
+
query_images: Optional[list[str]] = None,
|
36
|
+
vision_enabled: bool = False,
|
32
37
|
personality_context: Optional[str] = None,
|
33
38
|
):
|
34
39
|
"""
|
@@ -70,17 +75,17 @@ def extract_questions_gemini(
|
|
70
75
|
text=text,
|
71
76
|
)
|
72
77
|
|
73
|
-
|
78
|
+
prompt = construct_structured_message(
|
79
|
+
message=prompt,
|
80
|
+
images=query_images,
|
81
|
+
model_type=ChatModelOptions.ModelType.GOOGLE,
|
82
|
+
vision_enabled=vision_enabled,
|
83
|
+
)
|
74
84
|
|
75
|
-
|
85
|
+
messages = [ChatMessage(content=prompt, role="user"), ChatMessage(content=system_prompt, role="system")]
|
76
86
|
|
77
|
-
response =
|
78
|
-
messages=
|
79
|
-
system_prompt=system_prompt,
|
80
|
-
model_name=model,
|
81
|
-
temperature=temperature,
|
82
|
-
api_key=api_key,
|
83
|
-
model_kwargs=model_kwargs,
|
87
|
+
response = gemini_send_message_to_model(
|
88
|
+
messages, api_key, model, response_type="json_object", temperature=temperature
|
84
89
|
)
|
85
90
|
|
86
91
|
# Extract, Clean Message from Gemini's Response
|
@@ -102,7 +107,7 @@ def extract_questions_gemini(
|
|
102
107
|
return questions
|
103
108
|
|
104
109
|
|
105
|
-
def gemini_send_message_to_model(messages, api_key, model, response_type="text"):
|
110
|
+
def gemini_send_message_to_model(messages, api_key, model, response_type="text", temperature=0, model_kwargs=None):
|
106
111
|
"""
|
107
112
|
Send message to model
|
108
113
|
"""
|
@@ -114,7 +119,12 @@ def gemini_send_message_to_model(messages, api_key, model, response_type="text")
|
|
114
119
|
|
115
120
|
# Get Response from Gemini
|
116
121
|
return gemini_completion_with_backoff(
|
117
|
-
messages=messages,
|
122
|
+
messages=messages,
|
123
|
+
system_prompt=system_prompt,
|
124
|
+
model_name=model,
|
125
|
+
api_key=api_key,
|
126
|
+
temperature=temperature,
|
127
|
+
model_kwargs=model_kwargs,
|
118
128
|
)
|
119
129
|
|
120
130
|
|
@@ -133,6 +143,8 @@ def converse_gemini(
|
|
133
143
|
location_data: LocationData = None,
|
134
144
|
user_name: str = None,
|
135
145
|
agent: Agent = None,
|
146
|
+
query_images: Optional[list[str]] = None,
|
147
|
+
vision_available: bool = False,
|
136
148
|
):
|
137
149
|
"""
|
138
150
|
Converse with user using Google's Gemini
|
@@ -187,6 +199,9 @@ def converse_gemini(
|
|
187
199
|
model_name=model,
|
188
200
|
max_prompt_size=max_prompt_size,
|
189
201
|
tokenizer_name=tokenizer_name,
|
202
|
+
query_images=query_images,
|
203
|
+
vision_enabled=vision_available,
|
204
|
+
model_type=ChatModelOptions.ModelType.GOOGLE,
|
190
205
|
)
|
191
206
|
|
192
207
|
messages, system_prompt = format_messages_for_gemini(messages, system_prompt)
|
@@ -1,8 +1,11 @@
|
|
1
1
|
import logging
|
2
2
|
import random
|
3
|
+
from io import BytesIO
|
3
4
|
from threading import Thread
|
4
5
|
|
5
6
|
import google.generativeai as genai
|
7
|
+
import PIL.Image
|
8
|
+
import requests
|
6
9
|
from google.generativeai.types.answer_types import FinishReason
|
7
10
|
from google.generativeai.types.generation_types import StopCandidateException
|
8
11
|
from google.generativeai.types.safety_types import (
|
@@ -53,14 +56,14 @@ def gemini_completion_with_backoff(
|
|
53
56
|
},
|
54
57
|
)
|
55
58
|
|
56
|
-
formatted_messages = [{"role": message.role, "parts":
|
59
|
+
formatted_messages = [{"role": message.role, "parts": message.content} for message in messages]
|
57
60
|
|
58
61
|
# Start chat session. All messages up to the last are considered to be part of the chat history
|
59
62
|
chat_session = model.start_chat(history=formatted_messages[0:-1])
|
60
63
|
|
61
64
|
try:
|
62
65
|
# Generate the response. The last message is considered to be the current prompt
|
63
|
-
aggregated_response = chat_session.send_message(formatted_messages[-1]["parts"]
|
66
|
+
aggregated_response = chat_session.send_message(formatted_messages[-1]["parts"])
|
64
67
|
return aggregated_response.text
|
65
68
|
except StopCandidateException as e:
|
66
69
|
response_message, _ = handle_gemini_response(e.args)
|
@@ -117,11 +120,11 @@ def gemini_llm_thread(g, messages, system_prompt, model_name, temperature, api_k
|
|
117
120
|
},
|
118
121
|
)
|
119
122
|
|
120
|
-
formatted_messages = [{"role": message.role, "parts":
|
123
|
+
formatted_messages = [{"role": message.role, "parts": message.content} for message in messages]
|
121
124
|
# all messages up to the last are considered to be part of the chat history
|
122
125
|
chat_session = model.start_chat(history=formatted_messages[0:-1])
|
123
126
|
# the last message is considered to be the current prompt
|
124
|
-
for chunk in chat_session.send_message(formatted_messages[-1]["parts"]
|
127
|
+
for chunk in chat_session.send_message(formatted_messages[-1]["parts"], stream=True):
|
125
128
|
message, stopped = handle_gemini_response(chunk.candidates, chunk.prompt_feedback)
|
126
129
|
message = message or chunk.text
|
127
130
|
g.send(message)
|
@@ -191,14 +194,6 @@ def generate_safety_response(safety_ratings):
|
|
191
194
|
|
192
195
|
|
193
196
|
def format_messages_for_gemini(messages: list[ChatMessage], system_prompt: str = None) -> tuple[list[str], str]:
|
194
|
-
if len(messages) == 1:
|
195
|
-
messages[0].role = "user"
|
196
|
-
return messages, system_prompt
|
197
|
-
|
198
|
-
for message in messages:
|
199
|
-
if message.role == "assistant":
|
200
|
-
message.role = "model"
|
201
|
-
|
202
197
|
# Extract system message
|
203
198
|
system_prompt = system_prompt or ""
|
204
199
|
for message in messages.copy():
|
@@ -207,4 +202,31 @@ def format_messages_for_gemini(messages: list[ChatMessage], system_prompt: str =
|
|
207
202
|
messages.remove(message)
|
208
203
|
system_prompt = None if is_none_or_empty(system_prompt) else system_prompt
|
209
204
|
|
205
|
+
for message in messages:
|
206
|
+
# Convert message content to string list from chatml dictionary list
|
207
|
+
if isinstance(message.content, list):
|
208
|
+
# Convert image_urls to PIL.Image and place them at beginning of list (better for Gemini)
|
209
|
+
message.content = [
|
210
|
+
get_image_from_url(item["image_url"]["url"]) if item["type"] == "image_url" else item["text"]
|
211
|
+
for item in sorted(message.content, key=lambda x: 0 if x["type"] == "image_url" else 1)
|
212
|
+
]
|
213
|
+
elif isinstance(message.content, str):
|
214
|
+
message.content = [message.content]
|
215
|
+
|
216
|
+
if message.role == "assistant":
|
217
|
+
message.role = "model"
|
218
|
+
|
219
|
+
if len(messages) == 1:
|
220
|
+
messages[0].role = "user"
|
221
|
+
|
210
222
|
return messages, system_prompt
|
223
|
+
|
224
|
+
|
225
|
+
def get_image_from_url(image_url: str) -> PIL.Image:
|
226
|
+
try:
|
227
|
+
response = requests.get(image_url)
|
228
|
+
response.raise_for_status() # Check if the request was successful
|
229
|
+
return PIL.Image.open(BytesIO(response.content))
|
230
|
+
except requests.exceptions.RequestException as e:
|
231
|
+
logger.error(f"Failed to get image from URL {image_url}: {e}")
|
232
|
+
return None
|
@@ -30,7 +30,7 @@ def extract_questions(
|
|
30
30
|
api_base_url=None,
|
31
31
|
location_data: LocationData = None,
|
32
32
|
user: KhojUser = None,
|
33
|
-
|
33
|
+
query_images: Optional[list[str]] = None,
|
34
34
|
vision_enabled: bool = False,
|
35
35
|
personality_context: Optional[str] = None,
|
36
36
|
):
|
@@ -74,7 +74,7 @@ def extract_questions(
|
|
74
74
|
|
75
75
|
prompt = construct_structured_message(
|
76
76
|
message=prompt,
|
77
|
-
|
77
|
+
images=query_images,
|
78
78
|
model_type=ChatModelOptions.ModelType.OPENAI,
|
79
79
|
vision_enabled=vision_enabled,
|
80
80
|
)
|
@@ -135,7 +135,7 @@ def converse(
|
|
135
135
|
location_data: LocationData = None,
|
136
136
|
user_name: str = None,
|
137
137
|
agent: Agent = None,
|
138
|
-
|
138
|
+
query_images: Optional[list[str]] = None,
|
139
139
|
vision_available: bool = False,
|
140
140
|
):
|
141
141
|
"""
|
@@ -191,7 +191,7 @@ def converse(
|
|
191
191
|
model_name=model,
|
192
192
|
max_prompt_size=max_prompt_size,
|
193
193
|
tokenizer_name=tokenizer_name,
|
194
|
-
|
194
|
+
query_images=query_images,
|
195
195
|
vision_enabled=vision_available,
|
196
196
|
model_type=ChatModelOptions.ModelType.OPENAI,
|
197
197
|
)
|
@@ -176,6 +176,150 @@ Improved Prompt:
|
|
176
176
|
""".strip()
|
177
177
|
)
|
178
178
|
|
179
|
+
## Diagram Generation
|
180
|
+
## --
|
181
|
+
|
182
|
+
improve_diagram_description_prompt = PromptTemplate.from_template(
|
183
|
+
"""
|
184
|
+
you are an architect working with a novice artist using a diagramming tool.
|
185
|
+
{personality_context}
|
186
|
+
|
187
|
+
you need to convert the user's query to a description format that the novice artist can use very well. you are allowed to use primitives like
|
188
|
+
- text
|
189
|
+
- rectangle
|
190
|
+
- diamond
|
191
|
+
- ellipse
|
192
|
+
- line
|
193
|
+
- arrow
|
194
|
+
- frame
|
195
|
+
|
196
|
+
use these primitives to describe what sort of diagram the drawer should create. the artist must recreate the diagram every time, so include all relevant prior information in your description.
|
197
|
+
|
198
|
+
use simple, concise language.
|
199
|
+
|
200
|
+
Today's Date: {current_date}
|
201
|
+
User's Location: {location}
|
202
|
+
|
203
|
+
User's Notes:
|
204
|
+
{references}
|
205
|
+
|
206
|
+
Online References:
|
207
|
+
{online_results}
|
208
|
+
|
209
|
+
Conversation Log:
|
210
|
+
{chat_history}
|
211
|
+
|
212
|
+
Query: {query}
|
213
|
+
|
214
|
+
|
215
|
+
""".strip()
|
216
|
+
)
|
217
|
+
|
218
|
+
excalidraw_diagram_generation_prompt = PromptTemplate.from_template(
|
219
|
+
"""
|
220
|
+
You are a program manager with the ability to describe diagrams to compose in professional, fine detail.
|
221
|
+
{personality_context}
|
222
|
+
|
223
|
+
You need to create a declarative description of the diagram and relevant components, using this base schema. Use the `label` property to specify the text to be rendered in the respective elements. Always use light colors for the `backgroundColor` property, like white, or light blue, green, red. "type", "x", "y", "id", are required properties for all elements.
|
224
|
+
|
225
|
+
{{
|
226
|
+
type: string,
|
227
|
+
x: number,
|
228
|
+
y: number,
|
229
|
+
strokeColor: string,
|
230
|
+
backgroundColor: string,
|
231
|
+
width: number,
|
232
|
+
height: number,
|
233
|
+
id: string,
|
234
|
+
label: {{
|
235
|
+
text: string,
|
236
|
+
}}
|
237
|
+
}}
|
238
|
+
|
239
|
+
Valid types:
|
240
|
+
- text
|
241
|
+
- rectangle
|
242
|
+
- diamond
|
243
|
+
- ellipse
|
244
|
+
- line
|
245
|
+
- arrow
|
246
|
+
|
247
|
+
For arrows and lines, you can use the `points` property to specify the start and end points of the arrow. You may also use the `label` property to specify the text to be rendered. You may use the `start` and `end` properties to connect the linear elements to other elements. The start and end point can either be the ID to map to an existing object, or the `type` to create a new object. Mapping to an existing object is useful if you want to connect it to multiple objects. Lines and arrows can only start and end at rectangle, text, diamond, or ellipse elements.
|
248
|
+
|
249
|
+
{{
|
250
|
+
type: "arrow",
|
251
|
+
id: string,
|
252
|
+
x: number,
|
253
|
+
y: number,
|
254
|
+
width: number,
|
255
|
+
height: number,
|
256
|
+
strokeColor: string,
|
257
|
+
start: {{
|
258
|
+
id: string,
|
259
|
+
type: string,
|
260
|
+
}},
|
261
|
+
end: {{
|
262
|
+
id: string,
|
263
|
+
type: string,
|
264
|
+
}},
|
265
|
+
label: {{
|
266
|
+
text: string,
|
267
|
+
}}
|
268
|
+
points: [
|
269
|
+
[number, number],
|
270
|
+
[number, number],
|
271
|
+
]
|
272
|
+
}}
|
273
|
+
|
274
|
+
For text, you must use the `text` property to specify the text to be rendered. You may also use `fontSize` property to specify the font size of the text. Only use the `text` element for titles, subtitles, and overviews. For labels, use the `label` property in the respective elements.
|
275
|
+
|
276
|
+
{{
|
277
|
+
type: "text",
|
278
|
+
id: string,
|
279
|
+
x: number,
|
280
|
+
y: number,
|
281
|
+
fontSize: number,
|
282
|
+
text: string,
|
283
|
+
}}
|
284
|
+
|
285
|
+
For frames, use the `children` property to specify the elements that are inside the frame by their ids.
|
286
|
+
|
287
|
+
{{
|
288
|
+
type: "frame",
|
289
|
+
id: string,
|
290
|
+
x: number,
|
291
|
+
y: number,
|
292
|
+
width: number,
|
293
|
+
height: number,
|
294
|
+
name: string,
|
295
|
+
children: [
|
296
|
+
string
|
297
|
+
]
|
298
|
+
}}
|
299
|
+
|
300
|
+
Here's an example of a valid diagram:
|
301
|
+
|
302
|
+
Design Description: Create a diagram describing a circular development process with 3 stages: design, implementation and feedback. The design stage is connected to the implementation stage and the implementation stage is connected to the feedback stage and the feedback stage is connected to the design stage. Each stage should be labeled with the stage name.
|
303
|
+
|
304
|
+
Response:
|
305
|
+
|
306
|
+
[
|
307
|
+
{{"type":"text","x":-150,"y":50,"width":300,"height":40,"id":"title_text","text":"Circular Development Process","fontSize":24}},
|
308
|
+
{{"type":"ellipse","x":-169,"y":113,"width":188,"height":202,"id":"design_ellipse", "label": {{"text": "Design"}}}},
|
309
|
+
{{"type":"ellipse","x":62,"y":394,"width":186,"height":188,"id":"implement_ellipse", "label": {{"text": "Implement"}}}},
|
310
|
+
{{"type":"ellipse","x":-348,"y":430,"width":184,"height":170,"id":"feedback_ellipse", "label": {{"text": "Feedback"}}}},
|
311
|
+
{{"type":"arrow","x":21,"y":273,"id":"design_to_implement_arrow","points":[[0,0],[86,105]],"start":{{"id":"design_ellipse"}}, "end":{{"id":"implement_ellipse"}}}},
|
312
|
+
{{"type":"arrow","x":50,"y":519,"id":"implement_to_feedback_arrow","points":[[0,0],[-198,-6]],"start":{{"id":"implement_ellipse"}}, "end":{{"id":"feedback_ellipse"}}}},
|
313
|
+
{{"type":"arrow","x":-228,"y":417,"id":"feedback_to_design_arrow","points":[[0,0],[85,-123]],"start":{{"id":"feedback_ellipse"}}, "end":{{"id":"design_ellipse"}}}},
|
314
|
+
]
|
315
|
+
|
316
|
+
Create a detailed diagram from the provided context and user prompt below. Return a valid JSON object:
|
317
|
+
|
318
|
+
Diagram Description: {query}
|
319
|
+
|
320
|
+
""".strip()
|
321
|
+
)
|
322
|
+
|
179
323
|
## Online Search Conversation
|
180
324
|
## --
|
181
325
|
online_search_conversation = PromptTemplate.from_template(
|
@@ -109,7 +109,7 @@ def save_to_conversation_log(
|
|
109
109
|
client_application: ClientApplication = None,
|
110
110
|
conversation_id: str = None,
|
111
111
|
automation_id: str = None,
|
112
|
-
|
112
|
+
query_images: List[str] = None,
|
113
113
|
):
|
114
114
|
user_message_time = user_message_time or datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
115
115
|
updated_conversation = message_to_log(
|
@@ -117,7 +117,7 @@ def save_to_conversation_log(
|
|
117
117
|
chat_response=chat_response,
|
118
118
|
user_message_metadata={
|
119
119
|
"created": user_message_time,
|
120
|
-
"
|
120
|
+
"images": query_images,
|
121
121
|
},
|
122
122
|
khoj_message_metadata={
|
123
123
|
"context": compiled_references,
|
@@ -145,10 +145,18 @@ Khoj: "{inferred_queries if ("text-to-image" in intent_type) else chat_response}
|
|
145
145
|
)
|
146
146
|
|
147
147
|
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
148
|
+
def construct_structured_message(message: str, images: list[str], model_type: str, vision_enabled: bool):
|
149
|
+
"""
|
150
|
+
Format messages into appropriate multimedia format for supported chat model types
|
151
|
+
"""
|
152
|
+
if not images or not vision_enabled:
|
153
|
+
return message
|
154
|
+
|
155
|
+
if model_type in [ChatModelOptions.ModelType.OPENAI, ChatModelOptions.ModelType.GOOGLE]:
|
156
|
+
return [
|
157
|
+
{"type": "text", "text": message},
|
158
|
+
*[{"type": "image_url", "image_url": {"url": image}} for image in images],
|
159
|
+
]
|
152
160
|
return message
|
153
161
|
|
154
162
|
|
@@ -160,7 +168,7 @@ def generate_chatml_messages_with_context(
|
|
160
168
|
loaded_model: Optional[Llama] = None,
|
161
169
|
max_prompt_size=None,
|
162
170
|
tokenizer_name=None,
|
163
|
-
|
171
|
+
query_images=None,
|
164
172
|
vision_enabled=False,
|
165
173
|
model_type="",
|
166
174
|
):
|
@@ -181,11 +189,12 @@ def generate_chatml_messages_with_context(
|
|
181
189
|
message_notes = f'\n\n Notes:\n{chat.get("context")}' if chat.get("context") else "\n"
|
182
190
|
role = "user" if chat["by"] == "you" else "assistant"
|
183
191
|
|
184
|
-
|
192
|
+
if chat["by"] == "khoj" and "excalidraw" in chat["intent"].get("type"):
|
193
|
+
message_content = chat.get("intent").get("inferred-queries")[0] + message_notes
|
194
|
+
else:
|
195
|
+
message_content = chat["message"] + message_notes
|
185
196
|
|
186
|
-
message_content = construct_structured_message(
|
187
|
-
message_content, chat.get("uploadedImageData"), model_type, vision_enabled
|
188
|
-
)
|
197
|
+
message_content = construct_structured_message(message_content, chat.get("images"), model_type, vision_enabled)
|
189
198
|
|
190
199
|
reconstructed_message = ChatMessage(content=message_content, role=role)
|
191
200
|
|
@@ -198,7 +207,7 @@ def generate_chatml_messages_with_context(
|
|
198
207
|
if not is_none_or_empty(user_message):
|
199
208
|
messages.append(
|
200
209
|
ChatMessage(
|
201
|
-
content=construct_structured_message(user_message,
|
210
|
+
content=construct_structured_message(user_message, query_images, model_type, vision_enabled),
|
202
211
|
role="user",
|
203
212
|
)
|
204
213
|
)
|
@@ -222,7 +231,6 @@ def truncate_messages(
|
|
222
231
|
tokenizer_name=None,
|
223
232
|
) -> list[ChatMessage]:
|
224
233
|
"""Truncate messages to fit within max prompt size supported by model"""
|
225
|
-
|
226
234
|
default_tokenizer = "gpt-4o"
|
227
235
|
|
228
236
|
try:
|
@@ -252,6 +260,7 @@ def truncate_messages(
|
|
252
260
|
system_message = messages.pop(idx)
|
253
261
|
break
|
254
262
|
|
263
|
+
# TODO: Handle truncation of multi-part message.content, i.e when message.content is a list[dict] rather than a string
|
255
264
|
system_message_tokens = (
|
256
265
|
len(encoder.encode(system_message.content)) if system_message and type(system_message.content) == str else 0
|
257
266
|
)
|
khoj/processor/image/generate.py
CHANGED
@@ -26,7 +26,7 @@ async def text_to_image(
|
|
26
26
|
references: List[Dict[str, Any]],
|
27
27
|
online_results: Dict[str, Any],
|
28
28
|
send_status_func: Optional[Callable] = None,
|
29
|
-
|
29
|
+
query_images: Optional[List[str]] = None,
|
30
30
|
agent: Agent = None,
|
31
31
|
):
|
32
32
|
status_code = 200
|
@@ -65,7 +65,7 @@ async def text_to_image(
|
|
65
65
|
note_references=references,
|
66
66
|
online_results=online_results,
|
67
67
|
model_type=text_to_image_config.model_type,
|
68
|
-
|
68
|
+
query_images=query_images,
|
69
69
|
user=user,
|
70
70
|
agent=agent,
|
71
71
|
)
|
@@ -87,18 +87,18 @@ async def text_to_image(
|
|
87
87
|
if "content_policy_violation" in e.message:
|
88
88
|
logger.error(f"Image Generation blocked by OpenAI: {e}")
|
89
89
|
status_code = e.status_code # type: ignore
|
90
|
-
message = f"Image generation blocked by OpenAI
|
90
|
+
message = f"Image generation blocked by OpenAI due to policy violation" # type: ignore
|
91
91
|
yield image_url or image, status_code, message, intent_type.value
|
92
92
|
return
|
93
93
|
else:
|
94
94
|
logger.error(f"Image Generation failed with {e}", exc_info=True)
|
95
|
-
message = f"Image generation failed
|
95
|
+
message = f"Image generation failed using OpenAI" # type: ignore
|
96
96
|
status_code = e.status_code # type: ignore
|
97
97
|
yield image_url or image, status_code, message, intent_type.value
|
98
98
|
return
|
99
99
|
except requests.RequestException as e:
|
100
100
|
logger.error(f"Image Generation failed with {e}", exc_info=True)
|
101
|
-
message = f"Image generation using {text2image_model} via {text_to_image_config.model_type} failed
|
101
|
+
message = f"Image generation using {text2image_model} via {text_to_image_config.model_type} failed due to a network error."
|
102
102
|
status_code = 502
|
103
103
|
yield image_url or image, status_code, message, intent_type.value
|
104
104
|
return
|
@@ -62,7 +62,7 @@ async def search_online(
|
|
62
62
|
user: KhojUser,
|
63
63
|
send_status_func: Optional[Callable] = None,
|
64
64
|
custom_filters: List[str] = [],
|
65
|
-
|
65
|
+
query_images: List[str] = None,
|
66
66
|
agent: Agent = None,
|
67
67
|
):
|
68
68
|
query += " ".join(custom_filters)
|
@@ -73,7 +73,7 @@ async def search_online(
|
|
73
73
|
|
74
74
|
# Breakdown the query into subqueries to get the correct answer
|
75
75
|
subqueries = await generate_online_subqueries(
|
76
|
-
query, conversation_history, location, user,
|
76
|
+
query, conversation_history, location, user, query_images=query_images, agent=agent
|
77
77
|
)
|
78
78
|
response_dict = {}
|
79
79
|
|
@@ -151,7 +151,7 @@ async def read_webpages(
|
|
151
151
|
location: LocationData,
|
152
152
|
user: KhojUser,
|
153
153
|
send_status_func: Optional[Callable] = None,
|
154
|
-
|
154
|
+
query_images: List[str] = None,
|
155
155
|
agent: Agent = None,
|
156
156
|
):
|
157
157
|
"Infer web pages to read from the query and extract relevant information from them"
|
@@ -159,7 +159,7 @@ async def read_webpages(
|
|
159
159
|
if send_status_func:
|
160
160
|
async for event in send_status_func(f"**Inferring web pages to read**"):
|
161
161
|
yield {ChatEvent.STATUS: event}
|
162
|
-
urls = await infer_webpage_urls(query, conversation_history, location, user,
|
162
|
+
urls = await infer_webpage_urls(query, conversation_history, location, user, query_images)
|
163
163
|
|
164
164
|
logger.info(f"Reading web pages at: {urls}")
|
165
165
|
if send_status_func:
|
khoj/routers/api.py
CHANGED
@@ -21,6 +21,7 @@ from starlette.authentication import has_required_scope, requires
|
|
21
21
|
from khoj.configure import initialize_content
|
22
22
|
from khoj.database import adapters
|
23
23
|
from khoj.database.adapters import (
|
24
|
+
AgentAdapters,
|
24
25
|
AutomationAdapters,
|
25
26
|
ConversationAdapters,
|
26
27
|
EntryAdapters,
|
@@ -114,10 +115,16 @@ async def execute_search(
|
|
114
115
|
dedupe: Optional[bool] = True,
|
115
116
|
agent: Optional[Agent] = None,
|
116
117
|
):
|
117
|
-
start_time = time.time()
|
118
|
-
|
119
118
|
# Run validation checks
|
120
119
|
results: List[SearchResponse] = []
|
120
|
+
|
121
|
+
start_time = time.time()
|
122
|
+
|
123
|
+
# Ensure the agent, if present, is accessible by the user
|
124
|
+
if user and agent and not await AgentAdapters.ais_agent_accessible(agent, user):
|
125
|
+
logger.error(f"Agent {agent.slug} is not accessible by user {user}")
|
126
|
+
return results
|
127
|
+
|
121
128
|
if q is None or q == "":
|
122
129
|
logger.warning(f"No query param (q) passed in API call to initiate search")
|
123
130
|
return results
|
@@ -340,7 +347,7 @@ async def extract_references_and_questions(
|
|
340
347
|
conversation_commands: List[ConversationCommand] = [ConversationCommand.Default],
|
341
348
|
location_data: LocationData = None,
|
342
349
|
send_status_func: Optional[Callable] = None,
|
343
|
-
|
350
|
+
query_images: Optional[List[str]] = None,
|
344
351
|
agent: Agent = None,
|
345
352
|
):
|
346
353
|
user = request.user.object if request.user.is_authenticated else None
|
@@ -431,7 +438,7 @@ async def extract_references_and_questions(
|
|
431
438
|
conversation_log=meta_log,
|
432
439
|
location_data=location_data,
|
433
440
|
user=user,
|
434
|
-
|
441
|
+
query_images=query_images,
|
435
442
|
vision_enabled=vision_enabled,
|
436
443
|
personality_context=personality_context,
|
437
444
|
)
|
@@ -452,12 +459,14 @@ async def extract_references_and_questions(
|
|
452
459
|
chat_model = conversation_config.chat_model
|
453
460
|
inferred_queries = extract_questions_gemini(
|
454
461
|
defiltered_query,
|
462
|
+
query_images=query_images,
|
455
463
|
model=chat_model,
|
456
464
|
api_key=api_key,
|
457
465
|
conversation_log=meta_log,
|
458
466
|
location_data=location_data,
|
459
467
|
max_tokens=conversation_config.max_prompt_size,
|
460
468
|
user=user,
|
469
|
+
vision_enabled=vision_enabled,
|
461
470
|
personality_context=personality_context,
|
462
471
|
)
|
463
472
|
|