khoj 1.36.7.dev66__py3-none-any.whl → 1.37.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. khoj/database/migrations/0087_alter_aimodelapi_api_key.py +17 -0
  2. khoj/database/models/__init__.py +1 -1
  3. khoj/interface/compiled/404/index.html +2 -2
  4. khoj/interface/compiled/_next/static/chunks/{2117-f99825f0a867a42d.js → 2117-1c18aa2098982bf9.js} +1 -1
  5. khoj/interface/compiled/_next/static/chunks/{2327-b21ecded25471e6c.js → 2327-0bbe3ee35f80659f.js} +1 -1
  6. khoj/interface/compiled/_next/static/chunks/{5477-9ff77f49e6cf375c.js → 5477-a5b2688736f51b8c.js} +1 -1
  7. khoj/interface/compiled/_next/static/chunks/{8515-010dd769c584b672.js → 8515-f305779d95dd5780.js} +1 -1
  8. khoj/interface/compiled/_next/static/chunks/app/agents/{layout-948ca256650845ce.js → layout-dd7f2b45a9c30bd7.js} +1 -1
  9. khoj/interface/compiled/_next/static/chunks/app/chat/{layout-603285e3b1400e74.js → layout-904fbbb3974588da.js} +1 -1
  10. khoj/interface/compiled/_next/static/chunks/app/chat/{page-50cb9b62b10b5f3d.js → page-5175e747d3cb4a33.js} +1 -1
  11. khoj/interface/compiled/_next/static/chunks/app/{page-29e3b092fe46f190.js → page-44ac22beb2619af0.js} +1 -1
  12. khoj/interface/compiled/_next/static/chunks/app/search/{layout-d7f7528ff387fba5.js → layout-51d73830842461d5.js} +1 -1
  13. khoj/interface/compiled/_next/static/chunks/app/share/chat/{layout-246d0e8125219fff.js → layout-d090bd23befd0207.js} +1 -1
  14. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-6f26fe7f2f7edc56.js → page-e8f0cc65930b214e.js} +1 -1
  15. khoj/interface/compiled/_next/static/chunks/main-876327ac335776ab.js +1 -0
  16. khoj/interface/compiled/_next/static/chunks/{webpack-1169ca6e9e7e6247.js → webpack-d1d79c1576702da7.js} +1 -1
  17. khoj/interface/compiled/_next/static/css/440ae0f0f650dc35.css +1 -0
  18. khoj/interface/compiled/_next/static/css/b061a6aedf367349.css +25 -0
  19. khoj/interface/compiled/_next/static/css/b62829e3bf683b86.css +1 -0
  20. khoj/interface/compiled/agents/index.html +2 -2
  21. khoj/interface/compiled/agents/index.txt +3 -3
  22. khoj/interface/compiled/automations/index.html +2 -2
  23. khoj/interface/compiled/automations/index.txt +2 -2
  24. khoj/interface/compiled/chat/index.html +2 -2
  25. khoj/interface/compiled/chat/index.txt +3 -3
  26. khoj/interface/compiled/index.html +2 -2
  27. khoj/interface/compiled/index.txt +2 -2
  28. khoj/interface/compiled/search/index.html +2 -2
  29. khoj/interface/compiled/search/index.txt +3 -3
  30. khoj/interface/compiled/settings/index.html +2 -2
  31. khoj/interface/compiled/settings/index.txt +2 -2
  32. khoj/interface/compiled/share/chat/index.html +2 -2
  33. khoj/interface/compiled/share/chat/index.txt +3 -3
  34. khoj/processor/conversation/anthropic/anthropic_chat.py +9 -4
  35. khoj/processor/conversation/anthropic/utils.py +32 -12
  36. khoj/processor/conversation/google/gemini_chat.py +14 -5
  37. khoj/processor/conversation/google/utils.py +49 -6
  38. khoj/processor/conversation/openai/gpt.py +18 -6
  39. khoj/processor/conversation/openai/utils.py +37 -46
  40. khoj/processor/conversation/utils.py +24 -2
  41. khoj/processor/image/generate.py +2 -2
  42. khoj/processor/tools/run_code.py +1 -1
  43. khoj/routers/api.py +4 -0
  44. khoj/routers/api_chat.py +6 -4
  45. khoj/routers/auth.py +2 -5
  46. khoj/routers/helpers.py +23 -3
  47. khoj/routers/research.py +44 -3
  48. khoj/routers/storage.py +28 -29
  49. khoj/utils/constants.py +2 -0
  50. khoj/utils/helpers.py +58 -2
  51. {khoj-1.36.7.dev66.dist-info → khoj-1.37.1.dist-info}/METADATA +5 -6
  52. {khoj-1.36.7.dev66.dist-info → khoj-1.37.1.dist-info}/RECORD +57 -56
  53. khoj/interface/compiled/_next/static/chunks/main-98eb5932d6b2e3fa.js +0 -1
  54. khoj/interface/compiled/_next/static/css/5384e98d63fe6f0e.css +0 -25
  55. khoj/interface/compiled/_next/static/css/8051073dc55b92b3.css +0 -1
  56. khoj/interface/compiled/_next/static/css/b15666ef52060cd0.css +0 -1
  57. /khoj/interface/compiled/_next/static/{iZ9Zhm-BkOf7hfAqqzokr → PzXuumAYUnzr_Egd_JDmj}/_buildManifest.js +0 -0
  58. /khoj/interface/compiled/_next/static/{iZ9Zhm-BkOf7hfAqqzokr → PzXuumAYUnzr_Egd_JDmj}/_ssgManifest.js +0 -0
  59. {khoj-1.36.7.dev66.dist-info → khoj-1.37.1.dist-info}/WHEEL +0 -0
  60. {khoj-1.36.7.dev66.dist-info → khoj-1.37.1.dist-info}/entry_points.txt +0 -0
  61. {khoj-1.36.7.dev66.dist-info → khoj-1.37.1.dist-info}/licenses/LICENSE +0 -0
@@ -34,7 +34,7 @@ def extract_questions_gemini(
34
34
  model: Optional[str] = "gemini-2.0-flash",
35
35
  conversation_log={},
36
36
  api_key=None,
37
- temperature=0.6,
37
+ api_base_url=None,
38
38
  max_tokens=None,
39
39
  location_data: LocationData = None,
40
40
  user: KhojUser = None,
@@ -97,7 +97,12 @@ def extract_questions_gemini(
97
97
  messages.append(ChatMessage(content=system_prompt, role="system"))
98
98
 
99
99
  response = gemini_send_message_to_model(
100
- messages, api_key, model, response_type="json_object", temperature=temperature, tracer=tracer
100
+ messages,
101
+ api_key,
102
+ model,
103
+ api_base_url=api_base_url,
104
+ response_type="json_object",
105
+ tracer=tracer,
101
106
  )
102
107
 
103
108
  # Extract, Clean Message from Gemini's Response
@@ -120,8 +125,9 @@ def gemini_send_message_to_model(
120
125
  messages,
121
126
  api_key,
122
127
  model,
128
+ api_base_url=None,
123
129
  response_type="text",
124
- temperature=0.6,
130
+ response_schema=None,
125
131
  model_kwargs=None,
126
132
  tracer={},
127
133
  ):
@@ -135,6 +141,7 @@ def gemini_send_message_to_model(
135
141
  # This caused unwanted behavior and terminates response early for gemini 1.5 series. Monitor for flakiness with 2.0 series.
136
142
  if response_type == "json_object" and model in ["gemini-2.0-flash"]:
137
143
  model_kwargs["response_mime_type"] = "application/json"
144
+ model_kwargs["response_schema"] = response_schema
138
145
 
139
146
  # Get Response from Gemini
140
147
  return gemini_completion_with_backoff(
@@ -142,7 +149,7 @@ def gemini_send_message_to_model(
142
149
  system_prompt=system_prompt,
143
150
  model_name=model,
144
151
  api_key=api_key,
145
- temperature=temperature,
152
+ api_base_url=api_base_url,
146
153
  model_kwargs=model_kwargs,
147
154
  tracer=tracer,
148
155
  )
@@ -156,7 +163,8 @@ def converse_gemini(
156
163
  conversation_log={},
157
164
  model: Optional[str] = "gemini-2.0-flash",
158
165
  api_key: Optional[str] = None,
159
- temperature: float = 0.6,
166
+ api_base_url: Optional[str] = None,
167
+ temperature: float = 0.4,
160
168
  completion_func=None,
161
169
  conversation_commands=[ConversationCommand.Default],
162
170
  max_prompt_size=None,
@@ -247,6 +255,7 @@ def converse_gemini(
247
255
  model_name=model,
248
256
  temperature=temperature,
249
257
  api_key=api_key,
258
+ api_base_url=api_base_url,
250
259
  system_prompt=system_prompt,
251
260
  completion_func=completion_func,
252
261
  tracer=tracer,
@@ -1,7 +1,9 @@
1
1
  import logging
2
+ import os
2
3
  import random
3
4
  from copy import deepcopy
4
5
  from threading import Thread
6
+ from typing import Dict
5
7
 
6
8
  from google import genai
7
9
  from google.genai import errors as gerrors
@@ -18,9 +20,11 @@ from tenacity import (
18
20
  from khoj.processor.conversation.utils import (
19
21
  ThreadedGenerator,
20
22
  commit_conversation_trace,
23
+ get_image_from_base64,
21
24
  get_image_from_url,
22
25
  )
23
26
  from khoj.utils.helpers import (
27
+ get_ai_api_info,
24
28
  get_chat_usage_metrics,
25
29
  is_none_or_empty,
26
30
  is_promptrace_enabled,
@@ -28,6 +32,7 @@ from khoj.utils.helpers import (
28
32
 
29
33
  logger = logging.getLogger(__name__)
30
34
 
35
+ gemini_clients: Dict[str, genai.Client] = {}
31
36
 
32
37
  MAX_OUTPUT_TOKENS_GEMINI = 8192
33
38
  SAFETY_SETTINGS = [
@@ -50,6 +55,17 @@ SAFETY_SETTINGS = [
50
55
  ]
51
56
 
52
57
 
58
+ def get_gemini_client(api_key, api_base_url=None) -> genai.Client:
59
+ api_info = get_ai_api_info(api_key, api_base_url)
60
+ return genai.Client(
61
+ location=api_info.region,
62
+ project=api_info.project,
63
+ credentials=api_info.credentials,
64
+ api_key=api_info.api_key,
65
+ vertexai=api_info.api_key is None,
66
+ )
67
+
68
+
53
69
  @retry(
54
70
  wait=wait_random_exponential(min=1, max=10),
55
71
  stop=stop_after_attempt(2),
@@ -57,14 +73,22 @@ SAFETY_SETTINGS = [
57
73
  reraise=True,
58
74
  )
59
75
  def gemini_completion_with_backoff(
60
- messages, system_prompt, model_name, temperature=0, api_key=None, model_kwargs=None, tracer={}
76
+ messages, system_prompt, model_name, temperature=0.8, api_key=None, api_base_url=None, model_kwargs=None, tracer={}
61
77
  ) -> str:
62
- client = genai.Client(api_key=api_key)
78
+ client = gemini_clients.get(api_key)
79
+ if not client:
80
+ client = get_gemini_client(api_key, api_base_url)
81
+ gemini_clients[api_key] = client
82
+
83
+ seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
63
84
  config = gtypes.GenerateContentConfig(
64
85
  system_instruction=system_prompt,
65
86
  temperature=temperature,
66
87
  max_output_tokens=MAX_OUTPUT_TOKENS_GEMINI,
67
88
  safety_settings=SAFETY_SETTINGS,
89
+ response_mime_type=model_kwargs.get("response_mime_type", "text/plain") if model_kwargs else "text/plain",
90
+ response_schema=model_kwargs.get("response_schema", None) if model_kwargs else None,
91
+ seed=seed,
68
92
  )
69
93
 
70
94
  formatted_messages = [gtypes.Content(role=message.role, parts=message.content) for message in messages]
@@ -109,6 +133,7 @@ def gemini_chat_completion_with_backoff(
109
133
  model_name,
110
134
  temperature,
111
135
  api_key,
136
+ api_base_url,
112
137
  system_prompt,
113
138
  completion_func=None,
114
139
  model_kwargs=None,
@@ -117,23 +142,37 @@ def gemini_chat_completion_with_backoff(
117
142
  g = ThreadedGenerator(compiled_references, online_results, completion_func=completion_func)
118
143
  t = Thread(
119
144
  target=gemini_llm_thread,
120
- args=(g, messages, system_prompt, model_name, temperature, api_key, model_kwargs, tracer),
145
+ args=(g, messages, system_prompt, model_name, temperature, api_key, api_base_url, model_kwargs, tracer),
121
146
  )
122
147
  t.start()
123
148
  return g
124
149
 
125
150
 
126
151
  def gemini_llm_thread(
127
- g, messages, system_prompt, model_name, temperature, api_key, model_kwargs=None, tracer: dict = {}
152
+ g,
153
+ messages,
154
+ system_prompt,
155
+ model_name,
156
+ temperature,
157
+ api_key,
158
+ api_base_url=None,
159
+ model_kwargs=None,
160
+ tracer: dict = {},
128
161
  ):
129
162
  try:
130
- client = genai.Client(api_key=api_key)
163
+ client = gemini_clients.get(api_key)
164
+ if not client:
165
+ client = get_gemini_client(api_key, api_base_url)
166
+ gemini_clients[api_key] = client
167
+
168
+ seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
131
169
  config = gtypes.GenerateContentConfig(
132
170
  system_instruction=system_prompt,
133
171
  temperature=temperature,
134
172
  max_output_tokens=MAX_OUTPUT_TOKENS_GEMINI,
135
173
  stop_sequences=["Notes:\n["],
136
174
  safety_settings=SAFETY_SETTINGS,
175
+ seed=seed,
137
176
  )
138
177
 
139
178
  aggregated_response = ""
@@ -243,7 +282,11 @@ def format_messages_for_gemini(
243
282
  message_content = []
244
283
  for item in sorted(message.content, key=lambda x: 0 if x["type"] == "image_url" else 1):
245
284
  if item["type"] == "image_url":
246
- image = get_image_from_url(item["image_url"]["url"], type="bytes")
285
+ image_data = item["image_url"]["url"]
286
+ if image_data.startswith("http"):
287
+ image = get_image_from_url(image_data, type="bytes")
288
+ else:
289
+ image = get_image_from_base64(image_data, type="bytes")
247
290
  message_content += [gtypes.Part.from_bytes(data=image.content, mime_type=image.type)]
248
291
  else:
249
292
  message_content += [gtypes.Part.from_text(text=item.get("text", ""))]
@@ -10,8 +10,10 @@ from khoj.processor.conversation import prompts
10
10
  from khoj.processor.conversation.openai.utils import (
11
11
  chat_completion_with_backoff,
12
12
  completion_with_backoff,
13
+ get_openai_api_json_support,
13
14
  )
14
15
  from khoj.processor.conversation.utils import (
16
+ JsonSupport,
15
17
  clean_json,
16
18
  construct_structured_message,
17
19
  generate_chatml_messages_with_context,
@@ -61,7 +63,6 @@ def extract_questions(
61
63
  today = datetime.today()
62
64
  current_new_year = today.replace(month=1, day=1)
63
65
  last_new_year = current_new_year.replace(year=today.year - 1)
64
- temperature = 0.7
65
66
 
66
67
  prompt = prompts.extract_questions.format(
67
68
  current_date=today.strftime("%Y-%m-%d"),
@@ -97,7 +98,6 @@ def extract_questions(
97
98
  model,
98
99
  response_type="json_object",
99
100
  api_base_url=api_base_url,
100
- temperature=temperature,
101
101
  tracer=tracer,
102
102
  )
103
103
 
@@ -119,20 +119,32 @@ def extract_questions(
119
119
 
120
120
 
121
121
  def send_message_to_model(
122
- messages, api_key, model, response_type="text", api_base_url=None, temperature=0, tracer: dict = {}
122
+ messages,
123
+ api_key,
124
+ model,
125
+ response_type="text",
126
+ response_schema=None,
127
+ api_base_url=None,
128
+ tracer: dict = {},
123
129
  ):
124
130
  """
125
131
  Send message to model
126
132
  """
127
133
 
134
+ model_kwargs = {}
135
+ json_support = get_openai_api_json_support(model, api_base_url)
136
+ if response_schema and json_support == JsonSupport.SCHEMA:
137
+ model_kwargs["response_format"] = response_schema
138
+ elif response_type == "json_object" and json_support == JsonSupport.OBJECT:
139
+ model_kwargs["response_format"] = {"type": response_type}
140
+
128
141
  # Get Response from GPT
129
142
  return completion_with_backoff(
130
143
  messages=messages,
131
144
  model_name=model,
132
145
  openai_api_key=api_key,
133
- temperature=temperature,
134
146
  api_base_url=api_base_url,
135
- model_kwargs={"response_format": {"type": response_type}},
147
+ model_kwargs=model_kwargs,
136
148
  tracer=tracer,
137
149
  )
138
150
 
@@ -146,7 +158,7 @@ def converse_openai(
146
158
  model: str = "gpt-4o-mini",
147
159
  api_key: Optional[str] = None,
148
160
  api_base_url: Optional[str] = None,
149
- temperature: float = 0.2,
161
+ temperature: float = 0.4,
150
162
  completion_func=None,
151
163
  conversation_commands=[ConversationCommand.Default],
152
164
  max_prompt_size=None,
@@ -2,6 +2,7 @@ import logging
2
2
  import os
3
3
  from threading import Thread
4
4
  from typing import Dict, List
5
+ from urllib.parse import urlparse
5
6
 
6
7
  import openai
7
8
  from openai.types.chat.chat_completion import ChatCompletion
@@ -16,6 +17,7 @@ from tenacity import (
16
17
  )
17
18
 
18
19
  from khoj.processor.conversation.utils import (
20
+ JsonSupport,
19
21
  ThreadedGenerator,
20
22
  commit_conversation_trace,
21
23
  )
@@ -46,59 +48,43 @@ openai_clients: Dict[str, openai.OpenAI] = {}
46
48
  def completion_with_backoff(
47
49
  messages,
48
50
  model_name: str,
49
- temperature=0,
51
+ temperature=0.8,
50
52
  openai_api_key=None,
51
53
  api_base_url=None,
52
54
  model_kwargs: dict = {},
53
55
  tracer: dict = {},
54
56
  ) -> str:
55
57
  client_key = f"{openai_api_key}--{api_base_url}"
56
- client: openai.OpenAI | None = openai_clients.get(client_key)
58
+ client = openai_clients.get(client_key)
57
59
  if not client:
58
60
  client = get_openai_client(openai_api_key, api_base_url)
59
61
  openai_clients[client_key] = client
60
62
 
61
63
  formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
62
64
 
63
- # Update request parameters for compatability with o1 model series
64
- # Refer: https://platform.openai.com/docs/guides/reasoning/beta-limitations
65
- stream = True
66
- model_kwargs["stream_options"] = {"include_usage": True}
67
- if model_name == "o1":
68
- temperature = 1
69
- stream = False
70
- model_kwargs.pop("stream_options", None)
71
- elif model_name.startswith("o1"):
72
- temperature = 1
73
- model_kwargs.pop("response_format", None)
74
- elif model_name.startswith("o3-"):
65
+ # Tune reasoning models arguments
66
+ if model_name.startswith("o1") or model_name.startswith("o3"):
75
67
  temperature = 1
68
+ model_kwargs["reasoning_effort"] = "medium"
76
69
 
70
+ model_kwargs["stream_options"] = {"include_usage": True}
77
71
  if os.getenv("KHOJ_LLM_SEED"):
78
72
  model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
79
73
 
80
- chat: ChatCompletion | openai.Stream[ChatCompletionChunk] = client.chat.completions.create(
74
+ aggregated_response = ""
75
+ with client.beta.chat.completions.stream(
81
76
  messages=formatted_messages, # type: ignore
82
- model=model_name, # type: ignore
83
- stream=stream,
77
+ model=model_name,
84
78
  temperature=temperature,
85
79
  timeout=20,
86
80
  **model_kwargs,
87
- )
88
-
89
- aggregated_response = ""
90
- if not stream:
91
- chunk = chat
92
- aggregated_response = chunk.choices[0].message.content
93
- else:
81
+ ) as chat:
94
82
  for chunk in chat:
95
- if len(chunk.choices) == 0:
83
+ if chunk.type == "error":
84
+ logger.error(f"Openai api response error: {chunk.error}", exc_info=True)
96
85
  continue
97
- delta_chunk = chunk.choices[0].delta # type: ignore
98
- if isinstance(delta_chunk, str):
99
- aggregated_response += delta_chunk
100
- elif delta_chunk.content:
101
- aggregated_response += delta_chunk.content
86
+ elif chunk.type == "content.delta":
87
+ aggregated_response += chunk.delta
102
88
 
103
89
  # Calculate cost of chat
104
90
  input_tokens = chunk.usage.prompt_tokens if hasattr(chunk, "usage") and chunk.usage else 0
@@ -164,28 +150,20 @@ def llm_thread(
164
150
  ):
165
151
  try:
166
152
  client_key = f"{openai_api_key}--{api_base_url}"
167
- if client_key in openai_clients:
168
- client = openai_clients[client_key]
169
- else:
153
+ client = openai_clients.get(client_key)
154
+ if not client:
170
155
  client = get_openai_client(openai_api_key, api_base_url)
171
156
  openai_clients[client_key] = client
172
157
 
173
158
  formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
174
159
 
175
- # Update request parameters for compatability with o1 model series
176
- # Refer: https://platform.openai.com/docs/guides/reasoning/beta-limitations
177
- stream = True
178
- model_kwargs["stream_options"] = {"include_usage": True}
179
- if model_name == "o1":
160
+ # Tune reasoning models arguments
161
+ if model_name.startswith("o1"):
180
162
  temperature = 1
181
- stream = False
182
- model_kwargs.pop("stream_options", None)
183
- elif model_name.startswith("o1-"):
163
+ elif model_name.startswith("o3"):
184
164
  temperature = 1
185
- model_kwargs.pop("response_format", None)
186
- elif model_name.startswith("o3-"):
187
- temperature = 1
188
- # Get the first system message and add the string `Formatting re-enabled` to it. See https://platform.openai.com/docs/guides/reasoning-best-practices
165
+ # Get the first system message and add the string `Formatting re-enabled` to it.
166
+ # See https://platform.openai.com/docs/guides/reasoning-best-practices
189
167
  if len(formatted_messages) > 0:
190
168
  system_messages = [
191
169
  (i, message) for i, message in enumerate(formatted_messages) if message["role"] == "system"
@@ -195,7 +173,6 @@ def llm_thread(
195
173
  formatted_messages[first_system_message_index][
196
174
  "content"
197
175
  ] = f"{first_system_message} Formatting re-enabled"
198
-
199
176
  elif model_name.startswith("deepseek-reasoner"):
200
177
  # Two successive messages cannot be from the same role. Should merge any back-to-back messages from the same role.
201
178
  # The first message should always be a user message (except system message).
@@ -210,6 +187,8 @@ def llm_thread(
210
187
 
211
188
  formatted_messages = updated_messages
212
189
 
190
+ stream = True
191
+ model_kwargs["stream_options"] = {"include_usage": True}
213
192
  if os.getenv("KHOJ_LLM_SEED"):
214
193
  model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
215
194
 
@@ -258,3 +237,15 @@ def llm_thread(
258
237
  logger.error(f"Error in llm_thread: {e}", exc_info=True)
259
238
  finally:
260
239
  g.close()
240
+
241
+
242
+ def get_openai_api_json_support(model_name: str, api_base_url: str = None) -> JsonSupport:
243
+ if model_name.startswith("deepseek-reasoner"):
244
+ return JsonSupport.NONE
245
+ if api_base_url:
246
+ host = urlparse(api_base_url).hostname
247
+ if host and host.endswith(".ai.azure.com"):
248
+ return JsonSupport.OBJECT
249
+ if host == "api.deepinfra.com":
250
+ return JsonSupport.OBJECT
251
+ return JsonSupport.SCHEMA
@@ -345,8 +345,7 @@ def construct_structured_message(
345
345
  constructed_messages.append({"type": "text", "text": attached_file_context})
346
346
  if vision_enabled and images:
347
347
  for image in images:
348
- if image.startswith("https://"):
349
- constructed_messages.append({"type": "image_url", "image_url": {"url": image}})
348
+ constructed_messages.append({"type": "image_url", "image_url": {"url": image}})
350
349
  return constructed_messages
351
350
 
352
351
  if not is_none_or_empty(attached_file_context):
@@ -664,6 +663,23 @@ class ImageWithType:
664
663
  type: str
665
664
 
666
665
 
666
+ def get_image_from_base64(image: str, type="b64"):
667
+ # Extract image type and base64 data from inline image data
668
+ image_base64 = image.split(",", 1)[1]
669
+ image_type = image.split(";", 1)[0].split(":", 1)[1]
670
+
671
+ # Convert image to desired format
672
+ if type == "b64":
673
+ return ImageWithType(content=image_base64, type=image_type)
674
+ elif type == "pil":
675
+ image_data = base64.b64decode(image_base64)
676
+ image_pil = PIL.Image.open(BytesIO(image_data))
677
+ return ImageWithType(content=image_pil, type=image_type)
678
+ elif type == "bytes":
679
+ image_data = base64.b64decode(image_base64)
680
+ return ImageWithType(content=image_data, type=image_type)
681
+
682
+
667
683
  def get_image_from_url(image_url: str, type="pil"):
668
684
  try:
669
685
  response = requests.get(image_url)
@@ -878,3 +894,9 @@ def messages_to_print(messages: list[ChatMessage], max_length: int = 70) -> str:
878
894
  return str(content)
879
895
 
880
896
  return "\n".join([f"{json.dumps(safe_serialize(message.content))[:max_length]}..." for message in messages])
897
+
898
+
899
+ class JsonSupport(int, Enum):
900
+ NONE = 0
901
+ OBJECT = 1
902
+ SCHEMA = 2
@@ -12,7 +12,7 @@ from google.genai import types as gtypes
12
12
  from khoj.database.adapters import ConversationAdapters
13
13
  from khoj.database.models import Agent, KhojUser, TextToImageModelConfig
14
14
  from khoj.routers.helpers import ChatEvent, generate_better_image_prompt
15
- from khoj.routers.storage import upload_image
15
+ from khoj.routers.storage import upload_generated_image_to_bucket
16
16
  from khoj.utils import state
17
17
  from khoj.utils.helpers import convert_image_to_webp, timer
18
18
  from khoj.utils.rawconfig import LocationData
@@ -118,7 +118,7 @@ async def text_to_image(
118
118
 
119
119
  # Decide how to store the generated image
120
120
  with timer("Upload image to S3", logger):
121
- image_url = upload_image(webp_image_bytes, user.uuid)
121
+ image_url = upload_generated_image_to_bucket(webp_image_bytes, user.uuid)
122
122
 
123
123
  if not image_url:
124
124
  image = f"data:image/webp;base64,{base64.b64encode(webp_image_bytes).decode('utf-8')}"
@@ -257,7 +257,7 @@ async def execute_e2b(code: str, input_files: list[dict]) -> dict[str, Any]:
257
257
  continue
258
258
  else:
259
259
  # Text files - encode utf-8 string as base64
260
- b64_data = base64.b64encode(content.encode("utf-8")).decode("utf-8")
260
+ b64_data = content
261
261
  output_files.append({"filename": f.name, "b64_data": b64_data})
262
262
 
263
263
  # Collect output files from execution results
khoj/routers/api.py CHANGED
@@ -463,12 +463,14 @@ async def extract_references_and_questions(
463
463
  )
464
464
  elif chat_model.model_type == ChatModel.ModelType.ANTHROPIC:
465
465
  api_key = chat_model.ai_model_api.api_key
466
+ api_base_url = chat_model.ai_model_api.api_base_url
466
467
  chat_model_name = chat_model.name
467
468
  inferred_queries = extract_questions_anthropic(
468
469
  defiltered_query,
469
470
  query_images=query_images,
470
471
  model=chat_model_name,
471
472
  api_key=api_key,
473
+ api_base_url=api_base_url,
472
474
  conversation_log=meta_log,
473
475
  location_data=location_data,
474
476
  user=user,
@@ -479,12 +481,14 @@ async def extract_references_and_questions(
479
481
  )
480
482
  elif chat_model.model_type == ChatModel.ModelType.GOOGLE:
481
483
  api_key = chat_model.ai_model_api.api_key
484
+ api_base_url = chat_model.ai_model_api.api_base_url
482
485
  chat_model_name = chat_model.name
483
486
  inferred_queries = extract_questions_gemini(
484
487
  defiltered_query,
485
488
  query_images=query_images,
486
489
  model=chat_model_name,
487
490
  api_key=api_key,
491
+ api_base_url=api_base_url,
488
492
  conversation_log=meta_log,
489
493
  location_data=location_data,
490
494
  max_tokens=chat_model.max_prompt_size,
khoj/routers/api_chat.py CHANGED
@@ -64,7 +64,7 @@ from khoj.routers.research import (
64
64
  InformationCollectionIteration,
65
65
  execute_information_collection,
66
66
  )
67
- from khoj.routers.storage import upload_image_to_bucket
67
+ from khoj.routers.storage import upload_user_image_to_bucket
68
68
  from khoj.utils import state
69
69
  from khoj.utils.helpers import (
70
70
  AsyncIteratorWrapper,
@@ -674,9 +674,11 @@ async def chat(
674
674
  base64_data = decoded_string.split(",", 1)[1]
675
675
  image_bytes = base64.b64decode(base64_data)
676
676
  webp_image_bytes = convert_image_to_webp(image_bytes)
677
- uploaded_image = upload_image_to_bucket(webp_image_bytes, request.user.object.id)
678
- if uploaded_image:
679
- uploaded_images.append(uploaded_image)
677
+ uploaded_image = upload_user_image_to_bucket(webp_image_bytes, request.user.object.id)
678
+ if not uploaded_image:
679
+ base64_webp_image = base64.b64encode(webp_image_bytes).decode("utf-8")
680
+ uploaded_image = f"data:image/webp;base64,{base64_webp_image}"
681
+ uploaded_images.append(uploaded_image)
680
682
 
681
683
  query_files: Dict[str, str] = {}
682
684
  if raw_query_files:
khoj/routers/auth.py CHANGED
@@ -43,12 +43,9 @@ class MagicLinkForm(BaseModel):
43
43
  if not state.anonymous_mode:
44
44
  missing_requirements = []
45
45
  from authlib.integrations.starlette_client import OAuth, OAuthError
46
+ from google.auth.transport import requests as google_requests
47
+ from google.oauth2 import id_token
46
48
 
47
- try:
48
- from google.auth.transport import requests as google_requests
49
- from google.oauth2 import id_token
50
- except ImportError:
51
- missing_requirements += ["Install the Khoj production package with `pip install khoj[prod]`"]
52
49
  if not os.environ.get("RESEND_API_KEY") and (
53
50
  not os.environ.get("GOOGLE_CLIENT_ID") or not os.environ.get("GOOGLE_CLIENT_SECRET")
54
51
  ):