khoj 1.36.7.dev66__py3-none-any.whl → 1.37.1.dev6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. khoj/interface/compiled/404/index.html +2 -2
  2. khoj/interface/compiled/_next/static/chunks/{2327-b21ecded25471e6c.js → 2327-0bbe3ee35f80659f.js} +1 -1
  3. khoj/interface/compiled/_next/static/chunks/{5477-9ff77f49e6cf375c.js → 5477-a5b2688736f51b8c.js} +1 -1
  4. khoj/interface/compiled/_next/static/chunks/{8515-010dd769c584b672.js → 8515-f305779d95dd5780.js} +1 -1
  5. khoj/interface/compiled/_next/static/chunks/app/agents/{layout-948ca256650845ce.js → layout-5961c3717e1d8813.js} +1 -1
  6. khoj/interface/compiled/_next/static/chunks/app/agents/{page-df5446aa4fb82e1a.js → page-0d31f76257d6ec11.js} +1 -1
  7. khoj/interface/compiled/_next/static/chunks/app/automations/{page-0a44416f9183aec0.js → page-c6180af69fc9c766.js} +1 -1
  8. khoj/interface/compiled/_next/static/chunks/app/chat/{layout-603285e3b1400e74.js → layout-3e25af7224d678a0.js} +1 -1
  9. khoj/interface/compiled/_next/static/chunks/app/chat/{page-50cb9b62b10b5f3d.js → page-926ee054e844236a.js} +1 -1
  10. khoj/interface/compiled/_next/static/chunks/app/{page-29e3b092fe46f190.js → page-df51e61295c4a9b5.js} +1 -1
  11. khoj/interface/compiled/_next/static/chunks/app/search/{layout-d7f7528ff387fba5.js → layout-9ccd090dcc2aa58a.js} +1 -1
  12. khoj/interface/compiled/_next/static/chunks/app/search/{page-1df7b236b30620f7.js → page-098017fa7f6ba0bf.js} +1 -1
  13. khoj/interface/compiled/_next/static/chunks/app/settings/{page-3473bab693ef81b2.js → page-1ff027f4e0a5c468.js} +1 -1
  14. khoj/interface/compiled/_next/static/chunks/app/share/chat/{layout-246d0e8125219fff.js → layout-9cc742afcea0b421.js} +1 -1
  15. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-6f26fe7f2f7edc56.js → page-ea29a4633737cb59.js} +1 -1
  16. khoj/interface/compiled/_next/static/chunks/{webpack-1169ca6e9e7e6247.js → webpack-449ea2835ec65e75.js} +1 -1
  17. khoj/interface/compiled/_next/static/css/0db53bacf81896f5.css +1 -0
  18. khoj/interface/compiled/_next/static/css/b061a6aedf367349.css +25 -0
  19. khoj/interface/compiled/_next/static/css/bb7ea98028b368f3.css +1 -0
  20. khoj/interface/compiled/_next/static/css/ee66643a6a5bf71c.css +1 -0
  21. khoj/interface/compiled/agents/index.html +2 -2
  22. khoj/interface/compiled/agents/index.txt +3 -3
  23. khoj/interface/compiled/automations/index.html +2 -2
  24. khoj/interface/compiled/automations/index.txt +3 -3
  25. khoj/interface/compiled/chat/index.html +2 -2
  26. khoj/interface/compiled/chat/index.txt +3 -3
  27. khoj/interface/compiled/index.html +2 -2
  28. khoj/interface/compiled/index.txt +2 -2
  29. khoj/interface/compiled/search/index.html +2 -2
  30. khoj/interface/compiled/search/index.txt +3 -3
  31. khoj/interface/compiled/settings/index.html +2 -2
  32. khoj/interface/compiled/settings/index.txt +5 -5
  33. khoj/interface/compiled/share/chat/index.html +2 -2
  34. khoj/interface/compiled/share/chat/index.txt +3 -3
  35. khoj/processor/conversation/anthropic/utils.py +6 -1
  36. khoj/processor/conversation/google/gemini_chat.py +2 -0
  37. khoj/processor/conversation/google/utils.py +13 -1
  38. khoj/processor/conversation/openai/gpt.py +18 -2
  39. khoj/processor/conversation/openai/utils.py +33 -41
  40. khoj/processor/conversation/utils.py +24 -2
  41. khoj/processor/image/generate.py +2 -2
  42. khoj/processor/tools/run_code.py +1 -1
  43. khoj/routers/api_chat.py +6 -4
  44. khoj/routers/auth.py +2 -5
  45. khoj/routers/helpers.py +10 -0
  46. khoj/routers/research.py +44 -3
  47. khoj/routers/storage.py +28 -29
  48. {khoj-1.36.7.dev66.dist-info → khoj-1.37.1.dev6.dist-info}/METADATA +4 -5
  49. {khoj-1.36.7.dev66.dist-info → khoj-1.37.1.dev6.dist-info}/RECORD +60 -60
  50. khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +0 -1
  51. khoj/interface/compiled/_next/static/css/5384e98d63fe6f0e.css +0 -25
  52. khoj/interface/compiled/_next/static/css/8051073dc55b92b3.css +0 -1
  53. khoj/interface/compiled/_next/static/css/f29752d6e1be7624.css +0 -1
  54. /khoj/interface/compiled/_next/static/chunks/{1915-ab4353eaca76f690.js → 1915-1943ee8a628b893c.js} +0 -0
  55. /khoj/interface/compiled/_next/static/chunks/{2117-f99825f0a867a42d.js → 2117-ce1f0a4598f5e4fe.js} +0 -0
  56. /khoj/interface/compiled/_next/static/chunks/{4363-4efaf12abe696251.js → 4363-e6ac2203564d1a3b.js} +0 -0
  57. /khoj/interface/compiled/_next/static/chunks/{4447-5d44807c40355b1a.js → 4447-e038b251d626c340.js} +0 -0
  58. /khoj/interface/compiled/_next/static/chunks/{8667-adbe6017a66cef10.js → 8667-8136f74e9a086fca.js} +0 -0
  59. /khoj/interface/compiled/_next/static/chunks/{9259-d8bcd9da9e80c81e.js → 9259-640fdd77408475df.js} +0 -0
  60. /khoj/interface/compiled/_next/static/{iZ9Zhm-BkOf7hfAqqzokr → dWV1ok4ndbQQtpEVqLZAh}/_buildManifest.js +0 -0
  61. /khoj/interface/compiled/_next/static/{iZ9Zhm-BkOf7hfAqqzokr → dWV1ok4ndbQQtpEVqLZAh}/_ssgManifest.js +0 -0
  62. {khoj-1.36.7.dev66.dist-info → khoj-1.37.1.dev6.dist-info}/WHEEL +0 -0
  63. {khoj-1.36.7.dev66.dist-info → khoj-1.37.1.dev6.dist-info}/entry_points.txt +0 -0
  64. {khoj-1.36.7.dev66.dist-info → khoj-1.37.1.dev6.dist-info}/licenses/LICENSE +0 -0
@@ -2,6 +2,7 @@ import logging
2
2
  import os
3
3
  from threading import Thread
4
4
  from typing import Dict, List
5
+ from urllib.parse import urlparse
5
6
 
6
7
  import openai
7
8
  from openai.types.chat.chat_completion import ChatCompletion
@@ -16,6 +17,7 @@ from tenacity import (
16
17
  )
17
18
 
18
19
  from khoj.processor.conversation.utils import (
20
+ JsonSupport,
19
21
  ThreadedGenerator,
20
22
  commit_conversation_trace,
21
23
  )
@@ -60,45 +62,29 @@ def completion_with_backoff(
60
62
 
61
63
  formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
62
64
 
63
- # Update request parameters for compatability with o1 model series
64
- # Refer: https://platform.openai.com/docs/guides/reasoning/beta-limitations
65
- stream = True
66
- model_kwargs["stream_options"] = {"include_usage": True}
67
- if model_name == "o1":
68
- temperature = 1
69
- stream = False
70
- model_kwargs.pop("stream_options", None)
71
- elif model_name.startswith("o1"):
72
- temperature = 1
73
- model_kwargs.pop("response_format", None)
74
- elif model_name.startswith("o3-"):
65
+ # Tune reasoning models arguments
66
+ if model_name.startswith("o1") or model_name.startswith("o3"):
75
67
  temperature = 1
68
+ model_kwargs["reasoning_effort"] = "medium"
76
69
 
70
+ model_kwargs["stream_options"] = {"include_usage": True}
77
71
  if os.getenv("KHOJ_LLM_SEED"):
78
72
  model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
79
73
 
80
- chat: ChatCompletion | openai.Stream[ChatCompletionChunk] = client.chat.completions.create(
74
+ aggregated_response = ""
75
+ with client.beta.chat.completions.stream(
81
76
  messages=formatted_messages, # type: ignore
82
- model=model_name, # type: ignore
83
- stream=stream,
77
+ model=model_name,
84
78
  temperature=temperature,
85
79
  timeout=20,
86
80
  **model_kwargs,
87
- )
88
-
89
- aggregated_response = ""
90
- if not stream:
91
- chunk = chat
92
- aggregated_response = chunk.choices[0].message.content
93
- else:
81
+ ) as chat:
94
82
  for chunk in chat:
95
- if len(chunk.choices) == 0:
83
+ if chunk.type == "error":
84
+ logger.error(f"Openai api response error: {chunk.error}", exc_info=True)
96
85
  continue
97
- delta_chunk = chunk.choices[0].delta # type: ignore
98
- if isinstance(delta_chunk, str):
99
- aggregated_response += delta_chunk
100
- elif delta_chunk.content:
101
- aggregated_response += delta_chunk.content
86
+ elif chunk.type == "content.delta":
87
+ aggregated_response += chunk.delta
102
88
 
103
89
  # Calculate cost of chat
104
90
  input_tokens = chunk.usage.prompt_tokens if hasattr(chunk, "usage") and chunk.usage else 0
@@ -172,20 +158,13 @@ def llm_thread(
172
158
 
173
159
  formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
174
160
 
175
- # Update request parameters for compatability with o1 model series
176
- # Refer: https://platform.openai.com/docs/guides/reasoning/beta-limitations
177
- stream = True
178
- model_kwargs["stream_options"] = {"include_usage": True}
179
- if model_name == "o1":
161
+ # Tune reasoning models arguments
162
+ if model_name.startswith("o1"):
180
163
  temperature = 1
181
- stream = False
182
- model_kwargs.pop("stream_options", None)
183
- elif model_name.startswith("o1-"):
164
+ elif model_name.startswith("o3"):
184
165
  temperature = 1
185
- model_kwargs.pop("response_format", None)
186
- elif model_name.startswith("o3-"):
187
- temperature = 1
188
- # Get the first system message and add the string `Formatting re-enabled` to it. See https://platform.openai.com/docs/guides/reasoning-best-practices
166
+ # Get the first system message and add the string `Formatting re-enabled` to it.
167
+ # See https://platform.openai.com/docs/guides/reasoning-best-practices
189
168
  if len(formatted_messages) > 0:
190
169
  system_messages = [
191
170
  (i, message) for i, message in enumerate(formatted_messages) if message["role"] == "system"
@@ -195,7 +174,6 @@ def llm_thread(
195
174
  formatted_messages[first_system_message_index][
196
175
  "content"
197
176
  ] = f"{first_system_message} Formatting re-enabled"
198
-
199
177
  elif model_name.startswith("deepseek-reasoner"):
200
178
  # Two successive messages cannot be from the same role. Should merge any back-to-back messages from the same role.
201
179
  # The first message should always be a user message (except system message).
@@ -210,6 +188,8 @@ def llm_thread(
210
188
 
211
189
  formatted_messages = updated_messages
212
190
 
191
+ stream = True
192
+ model_kwargs["stream_options"] = {"include_usage": True}
213
193
  if os.getenv("KHOJ_LLM_SEED"):
214
194
  model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
215
195
 
@@ -258,3 +238,15 @@ def llm_thread(
258
238
  logger.error(f"Error in llm_thread: {e}", exc_info=True)
259
239
  finally:
260
240
  g.close()
241
+
242
+
243
+ def get_openai_api_json_support(model_name: str, api_base_url: str = None) -> JsonSupport:
244
+ if model_name.startswith("deepseek-reasoner"):
245
+ return JsonSupport.NONE
246
+ if api_base_url:
247
+ host = urlparse(api_base_url).hostname
248
+ if host and host.endswith(".ai.azure.com"):
249
+ return JsonSupport.OBJECT
250
+ if host == "api.deepinfra.com":
251
+ return JsonSupport.OBJECT
252
+ return JsonSupport.SCHEMA
@@ -345,8 +345,7 @@ def construct_structured_message(
345
345
  constructed_messages.append({"type": "text", "text": attached_file_context})
346
346
  if vision_enabled and images:
347
347
  for image in images:
348
- if image.startswith("https://"):
349
- constructed_messages.append({"type": "image_url", "image_url": {"url": image}})
348
+ constructed_messages.append({"type": "image_url", "image_url": {"url": image}})
350
349
  return constructed_messages
351
350
 
352
351
  if not is_none_or_empty(attached_file_context):
@@ -664,6 +663,23 @@ class ImageWithType:
664
663
  type: str
665
664
 
666
665
 
666
+ def get_image_from_base64(image: str, type="b64"):
667
+ # Extract image type and base64 data from inline image data
668
+ image_base64 = image.split(",", 1)[1]
669
+ image_type = image.split(";", 1)[0].split(":", 1)[1]
670
+
671
+ # Convert image to desired format
672
+ if type == "b64":
673
+ return ImageWithType(content=image_base64, type=image_type)
674
+ elif type == "pil":
675
+ image_data = base64.b64decode(image_base64)
676
+ image_pil = PIL.Image.open(BytesIO(image_data))
677
+ return ImageWithType(content=image_pil, type=image_type)
678
+ elif type == "bytes":
679
+ image_data = base64.b64decode(image_base64)
680
+ return ImageWithType(content=image_data, type=image_type)
681
+
682
+
667
683
  def get_image_from_url(image_url: str, type="pil"):
668
684
  try:
669
685
  response = requests.get(image_url)
@@ -878,3 +894,9 @@ def messages_to_print(messages: list[ChatMessage], max_length: int = 70) -> str:
878
894
  return str(content)
879
895
 
880
896
  return "\n".join([f"{json.dumps(safe_serialize(message.content))[:max_length]}..." for message in messages])
897
+
898
+
899
+ class JsonSupport(int, Enum):
900
+ NONE = 0
901
+ OBJECT = 1
902
+ SCHEMA = 2
@@ -12,7 +12,7 @@ from google.genai import types as gtypes
12
12
  from khoj.database.adapters import ConversationAdapters
13
13
  from khoj.database.models import Agent, KhojUser, TextToImageModelConfig
14
14
  from khoj.routers.helpers import ChatEvent, generate_better_image_prompt
15
- from khoj.routers.storage import upload_image
15
+ from khoj.routers.storage import upload_generated_image_to_bucket
16
16
  from khoj.utils import state
17
17
  from khoj.utils.helpers import convert_image_to_webp, timer
18
18
  from khoj.utils.rawconfig import LocationData
@@ -118,7 +118,7 @@ async def text_to_image(
118
118
 
119
119
  # Decide how to store the generated image
120
120
  with timer("Upload image to S3", logger):
121
- image_url = upload_image(webp_image_bytes, user.uuid)
121
+ image_url = upload_generated_image_to_bucket(webp_image_bytes, user.uuid)
122
122
 
123
123
  if not image_url:
124
124
  image = f"data:image/webp;base64,{base64.b64encode(webp_image_bytes).decode('utf-8')}"
@@ -257,7 +257,7 @@ async def execute_e2b(code: str, input_files: list[dict]) -> dict[str, Any]:
257
257
  continue
258
258
  else:
259
259
  # Text files - encode utf-8 string as base64
260
- b64_data = base64.b64encode(content.encode("utf-8")).decode("utf-8")
260
+ b64_data = content
261
261
  output_files.append({"filename": f.name, "b64_data": b64_data})
262
262
 
263
263
  # Collect output files from execution results
khoj/routers/api_chat.py CHANGED
@@ -64,7 +64,7 @@ from khoj.routers.research import (
64
64
  InformationCollectionIteration,
65
65
  execute_information_collection,
66
66
  )
67
- from khoj.routers.storage import upload_image_to_bucket
67
+ from khoj.routers.storage import upload_user_image_to_bucket
68
68
  from khoj.utils import state
69
69
  from khoj.utils.helpers import (
70
70
  AsyncIteratorWrapper,
@@ -674,9 +674,11 @@ async def chat(
674
674
  base64_data = decoded_string.split(",", 1)[1]
675
675
  image_bytes = base64.b64decode(base64_data)
676
676
  webp_image_bytes = convert_image_to_webp(image_bytes)
677
- uploaded_image = upload_image_to_bucket(webp_image_bytes, request.user.object.id)
678
- if uploaded_image:
679
- uploaded_images.append(uploaded_image)
677
+ uploaded_image = upload_user_image_to_bucket(webp_image_bytes, request.user.object.id)
678
+ if not uploaded_image:
679
+ base64_webp_image = base64.b64encode(webp_image_bytes).decode("utf-8")
680
+ uploaded_image = f"data:image/webp;base64,{base64_webp_image}"
681
+ uploaded_images.append(uploaded_image)
680
682
 
681
683
  query_files: Dict[str, str] = {}
682
684
  if raw_query_files:
khoj/routers/auth.py CHANGED
@@ -43,12 +43,9 @@ class MagicLinkForm(BaseModel):
43
43
  if not state.anonymous_mode:
44
44
  missing_requirements = []
45
45
  from authlib.integrations.starlette_client import OAuth, OAuthError
46
+ from google.auth.transport import requests as google_requests
47
+ from google.oauth2 import id_token
46
48
 
47
- try:
48
- from google.auth.transport import requests as google_requests
49
- from google.oauth2 import id_token
50
- except ImportError:
51
- missing_requirements += ["Install the Khoj production package with `pip install khoj[prod]`"]
52
49
  if not os.environ.get("RESEND_API_KEY") and (
53
50
  not os.environ.get("GOOGLE_CLIENT_ID") or not os.environ.get("GOOGLE_CLIENT_SECRET")
54
51
  ):
khoj/routers/helpers.py CHANGED
@@ -540,11 +540,15 @@ async def generate_online_subqueries(
540
540
 
541
541
  agent_chat_model = agent.chat_model if agent else None
542
542
 
543
+ class OnlineQueries(BaseModel):
544
+ queries: List[str]
545
+
543
546
  with timer("Chat actor: Generate online search subqueries", logger):
544
547
  response = await send_message_to_model_wrapper(
545
548
  online_queries_prompt,
546
549
  query_images=query_images,
547
550
  response_type="json_object",
551
+ response_schema=OnlineQueries,
548
552
  user=user,
549
553
  query_files=query_files,
550
554
  agent_chat_model=agent_chat_model,
@@ -1129,6 +1133,7 @@ async def send_message_to_model_wrapper(
1129
1133
  query: str,
1130
1134
  system_message: str = "",
1131
1135
  response_type: str = "text",
1136
+ response_schema: BaseModel = None,
1132
1137
  deepthought: bool = False,
1133
1138
  user: KhojUser = None,
1134
1139
  query_images: List[str] = None,
@@ -1209,6 +1214,7 @@ async def send_message_to_model_wrapper(
1209
1214
  api_key=api_key,
1210
1215
  model=chat_model_name,
1211
1216
  response_type=response_type,
1217
+ response_schema=response_schema,
1212
1218
  api_base_url=api_base_url,
1213
1219
  tracer=tracer,
1214
1220
  )
@@ -1255,6 +1261,7 @@ async def send_message_to_model_wrapper(
1255
1261
  api_key=api_key,
1256
1262
  model=chat_model_name,
1257
1263
  response_type=response_type,
1264
+ response_schema=response_schema,
1258
1265
  tracer=tracer,
1259
1266
  )
1260
1267
  else:
@@ -1265,6 +1272,7 @@ def send_message_to_model_wrapper_sync(
1265
1272
  message: str,
1266
1273
  system_message: str = "",
1267
1274
  response_type: str = "text",
1275
+ response_schema: BaseModel = None,
1268
1276
  user: KhojUser = None,
1269
1277
  query_images: List[str] = None,
1270
1278
  query_files: str = "",
@@ -1326,6 +1334,7 @@ def send_message_to_model_wrapper_sync(
1326
1334
  api_base_url=api_base_url,
1327
1335
  model=chat_model_name,
1328
1336
  response_type=response_type,
1337
+ response_schema=response_schema,
1329
1338
  tracer=tracer,
1330
1339
  )
1331
1340
 
@@ -1370,6 +1379,7 @@ def send_message_to_model_wrapper_sync(
1370
1379
  api_key=api_key,
1371
1380
  model=chat_model_name,
1372
1381
  response_type=response_type,
1382
+ response_schema=response_schema,
1373
1383
  tracer=tracer,
1374
1384
  )
1375
1385
  else:
khoj/routers/research.py CHANGED
@@ -1,9 +1,12 @@
1
1
  import logging
2
+ import os
2
3
  from datetime import datetime
3
- from typing import Callable, Dict, List, Optional
4
+ from enum import Enum
5
+ from typing import Callable, Dict, List, Optional, Type
4
6
 
5
7
  import yaml
6
8
  from fastapi import Request
9
+ from pydantic import BaseModel, Field
7
10
 
8
11
  from khoj.database.adapters import EntryAdapters
9
12
  from khoj.database.models import Agent, KhojUser
@@ -35,6 +38,40 @@ from khoj.utils.rawconfig import LocationData
35
38
  logger = logging.getLogger(__name__)
36
39
 
37
40
 
41
+ class PlanningResponse(BaseModel):
42
+ """
43
+ Schema for the response from planning agent when deciding the next tool to pick.
44
+ The tool field is dynamically validated based on available tools.
45
+ """
46
+
47
+ scratchpad: str = Field(..., description="Reasoning about which tool to use next")
48
+ query: str = Field(..., description="Detailed query for the selected tool")
49
+
50
+ class Config:
51
+ arbitrary_types_allowed = True
52
+
53
+ @classmethod
54
+ def create_model_with_enum(cls: Type["PlanningResponse"], tool_options: dict) -> Type["PlanningResponse"]:
55
+ """
56
+ Factory method that creates a customized PlanningResponse model
57
+ with a properly typed tool field based on available tools.
58
+
59
+ Args:
60
+ tool_options: Dictionary mapping tool names to values
61
+
62
+ Returns:
63
+ A customized PlanningResponse class
64
+ """
65
+ # Create dynamic enum from tool options
66
+ tool_enum = Enum("ToolEnum", tool_options) # type: ignore
67
+
68
+ # Create and return a customized response model with the enum
69
+ class PlanningResponseWithTool(PlanningResponse):
70
+ tool: tool_enum = Field(..., description="Name of the tool to use")
71
+
72
+ return PlanningResponseWithTool
73
+
74
+
38
75
  async def apick_next_tool(
39
76
  query: str,
40
77
  conversation_history: dict,
@@ -60,10 +97,13 @@ async def apick_next_tool(
60
97
  # Skip showing Notes tool as an option if user has no entries
61
98
  if tool == ConversationCommand.Notes and not user_has_entries:
62
99
  continue
63
- tool_options[tool.value] = description
64
100
  if len(agent_tools) == 0 or tool.value in agent_tools:
101
+ tool_options[tool.name] = tool.value
65
102
  tool_options_str += f'- "{tool.value}": "{description}"\n'
66
103
 
104
+ # Create planning reponse model with dynamically populated tool enum class
105
+ planning_response_model = PlanningResponse.create_model_with_enum(tool_options)
106
+
67
107
  # Construct chat history with user and iteration history with researcher agent for context
68
108
  chat_history = construct_chat_history(conversation_history, agent_name=agent.name if agent else "Khoj")
69
109
  previous_iterations_history = construct_iteration_history(previous_iterations, prompts.previous_iteration)
@@ -95,6 +135,7 @@ async def apick_next_tool(
95
135
  query=query,
96
136
  context=function_planning_prompt,
97
137
  response_type="json_object",
138
+ response_schema=planning_response_model,
98
139
  deepthought=True,
99
140
  user=user,
100
141
  query_images=query_images,
@@ -160,7 +201,7 @@ async def execute_information_collection(
160
201
  query_files: str = None,
161
202
  ):
162
203
  current_iteration = 0
163
- MAX_ITERATIONS = 5
204
+ MAX_ITERATIONS = int(os.getenv("KHOJ_RESEARCH_ITERATIONS", 5))
164
205
  previous_iterations: List[InformationCollectionIteration] = []
165
206
  while current_iteration < MAX_ITERATIONS:
166
207
  online_results: Dict = dict()
khoj/routers/storage.py CHANGED
@@ -9,9 +9,10 @@ AWS_SECRET_KEY = os.getenv("AWS_SECRET_KEY")
9
9
  # S3 supports serving assets via your domain. Khoj expects this to be used in production. To enable it:
10
10
  # 1. Your bucket name for images should be of the form sub.domain.tld. For example, generated.khoj.dev
11
11
  # 2. Add CNAME entry to your domain's DNS records pointing to the S3 bucket. For example, CNAME generated.khoj.dev generated-khoj-dev.s3.amazonaws.com
12
- AWS_UPLOAD_IMAGE_BUCKET_NAME = os.getenv("AWS_IMAGE_UPLOAD_BUCKET")
12
+ AWS_KHOJ_IMAGES_BUCKET_NAME = os.getenv("AWS_IMAGE_UPLOAD_BUCKET")
13
+ AWS_USER_IMAGES_BUCKET_NAME = os.getenv("AWS_USER_UPLOADED_IMAGES_BUCKET_NAME")
13
14
 
14
- aws_enabled = AWS_ACCESS_KEY is not None and AWS_SECRET_KEY is not None and AWS_UPLOAD_IMAGE_BUCKET_NAME is not None
15
+ aws_enabled = AWS_ACCESS_KEY is not None and AWS_SECRET_KEY is not None
15
16
 
16
17
  if aws_enabled:
17
18
  from boto3 import client
@@ -19,45 +20,43 @@ if aws_enabled:
19
20
  s3_client = client("s3", aws_access_key_id=AWS_ACCESS_KEY, aws_secret_access_key=AWS_SECRET_KEY)
20
21
 
21
22
 
22
- def upload_image(image: bytes, user_id: uuid.UUID):
23
- """Upload the image to the S3 bucket"""
23
+ def upload_image_to_bucket(webp_image: bytes, user_id: uuid.UUID, bucket_name: str):
24
+ """Upload webp image to an S3 bucket"""
24
25
  if not aws_enabled:
25
26
  logger.info("AWS is not enabled. Skipping image upload")
26
27
  return None
27
-
28
- image_key = f"{user_id}/{uuid.uuid4()}.webp"
29
- try:
30
- s3_client.put_object(Bucket=AWS_UPLOAD_IMAGE_BUCKET_NAME, Key=image_key, Body=image, ACL="public-read")
31
- url = f"https://{AWS_UPLOAD_IMAGE_BUCKET_NAME}/{image_key}"
32
- return url
33
- except Exception as e:
34
- logger.error(f"Failed to upload image to S3: {e}")
35
- return None
36
-
37
-
38
- AWS_USER_UPLOADED_IMAGES_BUCKET_NAME = os.getenv("AWS_USER_UPLOADED_IMAGES_BUCKET_NAME")
39
-
40
-
41
- def upload_image_to_bucket(image: bytes, user_id: uuid.UUID):
42
- """Upload the image to the S3 bucket"""
43
- if not aws_enabled:
44
- logger.info("AWS is not enabled. Skipping image upload")
28
+ if not bucket_name:
29
+ logger.error(f"{bucket_name} is not set")
45
30
  return None
46
31
 
47
32
  image_key = f"{user_id}/{uuid.uuid4()}.webp"
48
- if not AWS_USER_UPLOADED_IMAGES_BUCKET_NAME:
49
- logger.error("AWS_USER_UPLOADED_IMAGES_BUCKET_NAME is not set")
50
- return None
51
-
52
33
  try:
53
34
  s3_client.put_object(
54
- Bucket=AWS_USER_UPLOADED_IMAGES_BUCKET_NAME,
35
+ Bucket=bucket_name,
55
36
  Key=image_key,
56
- Body=image,
37
+ Body=webp_image,
57
38
  ACL="public-read",
58
39
  ContentType="image/webp",
59
40
  )
60
- return f"https://{AWS_USER_UPLOADED_IMAGES_BUCKET_NAME}/{image_key}"
41
+ return f"https://{bucket_name}/{image_key}"
61
42
  except Exception as e:
62
43
  logger.error(f"Failed to upload image to S3: {e}")
63
44
  return None
45
+
46
+
47
+ def upload_generated_image_to_bucket(image: bytes, user_id: uuid.UUID):
48
+ """Upload khoj generated image to an S3 bucket"""
49
+ return upload_image_to_bucket(
50
+ webp_image=image,
51
+ user_id=user_id,
52
+ bucket_name=AWS_KHOJ_IMAGES_BUCKET_NAME,
53
+ )
54
+
55
+
56
+ def upload_user_image_to_bucket(image: bytes, user_id: uuid.UUID):
57
+ """Upload user attached image to an S3 bucket"""
58
+ return upload_image_to_bucket(
59
+ webp_image=image,
60
+ user_id=user_id,
61
+ bucket_name=AWS_USER_IMAGES_BUCKET_NAME,
62
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: khoj
3
- Version: 1.36.7.dev66
3
+ Version: 1.37.1.dev6
4
4
  Summary: Your Second Brain
5
5
  Project-URL: Homepage, https://khoj.dev
6
6
  Project-URL: Documentation, https://docs.khoj.dev
@@ -39,6 +39,7 @@ Requires-Dist: e2b-code-interpreter~=1.0.0
39
39
  Requires-Dist: einops==0.8.0
40
40
  Requires-Dist: email-validator==2.2.0
41
41
  Requires-Dist: fastapi>=0.110.0
42
+ Requires-Dist: google-auth~=2.23.3
42
43
  Requires-Dist: google-genai==1.5.0
43
44
  Requires-Dist: httpx==0.28.1
44
45
  Requires-Dist: huggingface-hub>=0.22.2
@@ -69,12 +70,12 @@ Requires-Dist: requests>=2.26.0
69
70
  Requires-Dist: resend==1.0.1
70
71
  Requires-Dist: rich>=13.3.1
71
72
  Requires-Dist: schedule==1.1.0
72
- Requires-Dist: sentence-transformers==3.0.1
73
+ Requires-Dist: sentence-transformers==3.4.1
73
74
  Requires-Dist: tenacity==8.3.0
74
75
  Requires-Dist: tenacity>=8.2.2
75
76
  Requires-Dist: tiktoken>=0.3.2
76
77
  Requires-Dist: torch==2.2.2
77
- Requires-Dist: transformers>=4.28.0
78
+ Requires-Dist: transformers<4.50.0,>=4.28.0
78
79
  Requires-Dist: tzdata==2023.3
79
80
  Requires-Dist: uvicorn==0.30.6
80
81
  Requires-Dist: websockets==13.0
@@ -85,7 +86,6 @@ Requires-Dist: datasets; extra == 'dev'
85
86
  Requires-Dist: factory-boy>=3.2.1; extra == 'dev'
86
87
  Requires-Dist: freezegun>=1.2.0; extra == 'dev'
87
88
  Requires-Dist: gitpython~=3.1.43; extra == 'dev'
88
- Requires-Dist: google-auth==2.23.3; extra == 'dev'
89
89
  Requires-Dist: gunicorn==22.0.0; extra == 'dev'
90
90
  Requires-Dist: mypy>=1.0.1; extra == 'dev'
91
91
  Requires-Dist: pandas; extra == 'dev'
@@ -98,7 +98,6 @@ Requires-Dist: stripe==7.3.0; extra == 'dev'
98
98
  Requires-Dist: twilio==8.11; extra == 'dev'
99
99
  Provides-Extra: prod
100
100
  Requires-Dist: boto3>=1.34.57; extra == 'prod'
101
- Requires-Dist: google-auth==2.23.3; extra == 'prod'
102
101
  Requires-Dist: gunicorn==22.0.0; extra == 'prod'
103
102
  Requires-Dist: stripe==7.3.0; extra == 'prod'
104
103
  Requires-Dist: twilio==8.11; extra == 'prod'