khoj 1.36.7.dev7__py3-none-any.whl → 1.36.7.dev22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. khoj/database/adapters/__init__.py +15 -0
  2. khoj/interface/compiled/404/index.html +2 -2
  3. khoj/interface/compiled/_next/static/chunks/{2327-02e86a50c65e575a.js → 2327-36d17f2483e80f60.js} +1 -1
  4. khoj/interface/compiled/_next/static/chunks/{8155-ad130153ddcc930f.js → 8155-87b4d2ea2cf725cc.js} +1 -1
  5. khoj/interface/compiled/_next/static/chunks/app/agents/{layout-64b81f8eeac13427.js → layout-447b58869479276c.js} +1 -1
  6. khoj/interface/compiled/_next/static/chunks/app/agents/{page-2f55f9d0da49bf31.js → page-fbe2c1c661cd14ac.js} +1 -1
  7. khoj/interface/compiled/_next/static/chunks/app/automations/{page-d0a630a2b4ecc41d.js → page-ad620b194fd508fe.js} +1 -1
  8. khoj/interface/compiled/_next/static/chunks/app/chat/{layout-9e151fb837f53026.js → layout-4d0b1ba93124fccb.js} +1 -1
  9. khoj/interface/compiled/_next/static/chunks/app/chat/{page-53ba9f1424043383.js → page-4108f46796c1c606.js} +1 -1
  10. khoj/interface/compiled/_next/static/chunks/app/{layout-26139159e500852a.js → layout-6dba801826c4fe59.js} +1 -1
  11. khoj/interface/compiled/_next/static/chunks/app/{page-642bd02fc4f16606.js → page-f91e6a6a849baf5e.js} +1 -1
  12. khoj/interface/compiled/_next/static/chunks/app/search/{layout-ff081947c70ea9b7.js → layout-ab5dbb69fb914900.js} +1 -1
  13. khoj/interface/compiled/_next/static/chunks/app/search/{page-7c80e369ee1cdfad.js → page-30e231665f1f3796.js} +1 -1
  14. khoj/interface/compiled/_next/static/chunks/app/settings/{page-c961681e308a334b.js → page-c580520d59d92267.js} +1 -1
  15. khoj/interface/compiled/_next/static/chunks/app/share/chat/{layout-94a33aa0eae034fc.js → layout-2ce0cb95b1219d97.js} +1 -1
  16. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-6253896a84300e9b.js → page-ffcb3ce5c6af9988.js} +1 -1
  17. khoj/interface/compiled/_next/static/chunks/{webpack-f813971dd4615afd.js → webpack-f83765a7accac982.js} +1 -1
  18. khoj/interface/compiled/_next/static/css/7889a30fe9c83846.css +1 -0
  19. khoj/interface/compiled/_next/static/css/{804ceddd6c935d4a.css → 8051073dc55b92b3.css} +1 -1
  20. khoj/interface/compiled/_next/static/css/f29752d6e1be7624.css +1 -0
  21. khoj/interface/compiled/_next/static/media/2aa11a72f7f24b58-s.woff2 +0 -0
  22. khoj/interface/compiled/_next/static/media/383a65b63658737d-s.woff2 +0 -0
  23. khoj/interface/compiled/_next/static/media/40381518f67e6cb9-s.p.woff2 +0 -0
  24. khoj/interface/compiled/_next/static/media/85fe2766c5e6072a-s.woff2 +0 -0
  25. khoj/interface/compiled/_next/static/media/8a6e4d7cd15e805a-s.woff2 +0 -0
  26. khoj/interface/compiled/agents/index.html +3 -3
  27. khoj/interface/compiled/agents/index.txt +3 -3
  28. khoj/interface/compiled/automations/index.html +2 -2
  29. khoj/interface/compiled/automations/index.txt +3 -3
  30. khoj/interface/compiled/chat/index.html +3 -3
  31. khoj/interface/compiled/chat/index.txt +3 -3
  32. khoj/interface/compiled/index.html +2 -2
  33. khoj/interface/compiled/index.txt +3 -3
  34. khoj/interface/compiled/search/index.html +2 -2
  35. khoj/interface/compiled/search/index.txt +3 -3
  36. khoj/interface/compiled/settings/index.html +3 -3
  37. khoj/interface/compiled/settings/index.txt +2 -2
  38. khoj/interface/compiled/share/chat/index.html +3 -3
  39. khoj/interface/compiled/share/chat/index.txt +3 -3
  40. khoj/processor/conversation/anthropic/anthropic_chat.py +6 -3
  41. khoj/processor/conversation/anthropic/utils.py +48 -13
  42. khoj/processor/conversation/google/gemini_chat.py +12 -12
  43. khoj/processor/conversation/google/utils.py +63 -63
  44. khoj/processor/conversation/prompts.py +100 -19
  45. khoj/processor/conversation/utils.py +6 -0
  46. khoj/processor/tools/run_code.py +163 -21
  47. khoj/routers/helpers.py +5 -0
  48. khoj/routers/research.py +1 -0
  49. khoj/utils/constants.py +6 -2
  50. khoj/utils/helpers.py +11 -2
  51. khoj/utils/initialization.py +24 -7
  52. {khoj-1.36.7.dev7.dist-info → khoj-1.36.7.dev22.dist-info}/METADATA +7 -6
  53. {khoj-1.36.7.dev7.dist-info → khoj-1.36.7.dev22.dist-info}/RECORD +58 -54
  54. khoj/interface/compiled/_next/static/css/089de1d8526b96e9.css +0 -1
  55. khoj/interface/compiled/_next/static/css/55d4a822f8d94b67.css +0 -1
  56. khoj/interface/compiled/_next/static/media/e098aaaecc9cfbb2-s.p.woff2 +0 -0
  57. /khoj/interface/compiled/_next/static/{bzWTm19u6qe1y98Xfrqoo → w25ObnntxL_4D4MY2j-Yc}/_buildManifest.js +0 -0
  58. /khoj/interface/compiled/_next/static/{bzWTm19u6qe1y98Xfrqoo → w25ObnntxL_4D4MY2j-Yc}/_ssgManifest.js +0 -0
  59. {khoj-1.36.7.dev7.dist-info → khoj-1.36.7.dev22.dist-info}/WHEEL +0 -0
  60. {khoj-1.36.7.dev7.dist-info → khoj-1.36.7.dev22.dist-info}/entry_points.txt +0 -0
  61. {khoj-1.36.7.dev7.dist-info → khoj-1.36.7.dev22.dist-info}/licenses/LICENSE +0 -0
@@ -1,12 +1,23 @@
1
+ import asyncio
1
2
  import base64
2
3
  import datetime
3
4
  import logging
4
5
  import mimetypes
5
6
  import os
7
+ import re
6
8
  from pathlib import Path
7
9
  from typing import Any, Callable, List, NamedTuple, Optional
8
10
 
9
11
  import aiohttp
12
+ from asgiref.sync import sync_to_async
13
+ from httpx import RemoteProtocolError
14
+ from tenacity import (
15
+ before_sleep_log,
16
+ retry,
17
+ retry_if_exception_type,
18
+ stop_after_attempt,
19
+ wait_random_exponential,
20
+ )
10
21
 
11
22
  from khoj.database.adapters import FileObjectAdapters
12
23
  from khoj.database.models import Agent, FileObject, KhojUser
@@ -15,22 +26,26 @@ from khoj.processor.conversation.utils import (
15
26
  ChatEvent,
16
27
  clean_code_python,
17
28
  construct_chat_history,
18
- load_complex_json,
19
29
  )
20
30
  from khoj.routers.helpers import send_message_to_model_wrapper
21
- from khoj.utils.helpers import is_none_or_empty, timer, truncate_code_context
31
+ from khoj.utils.helpers import (
32
+ is_e2b_code_sandbox_enabled,
33
+ is_none_or_empty,
34
+ timer,
35
+ truncate_code_context,
36
+ )
22
37
  from khoj.utils.rawconfig import LocationData
23
38
 
24
39
  logger = logging.getLogger(__name__)
25
40
 
26
41
 
27
42
  SANDBOX_URL = os.getenv("KHOJ_TERRARIUM_URL", "http://localhost:8080")
43
+ DEFAULT_E2B_TEMPLATE = "pmt2o0ghpang8gbiys57"
28
44
 
29
45
 
30
46
  class GeneratedCode(NamedTuple):
31
47
  code: str
32
- input_files: List[str]
33
- input_links: List[str]
48
+ input_files: List[FileObject]
34
49
 
35
50
 
36
51
  async def run_code(
@@ -68,13 +83,10 @@ async def run_code(
68
83
 
69
84
  # Prepare Input Data
70
85
  input_data = []
71
- user_input_files: List[FileObject] = []
72
- for input_file in generated_code.input_files:
73
- user_input_files += await FileObjectAdapters.aget_file_objects_by_name(user, input_file)
74
- for f in user_input_files:
86
+ for f in generated_code.input_files:
75
87
  input_data.append(
76
88
  {
77
- "filename": os.path.basename(f.file_name),
89
+ "filename": f.file_name,
78
90
  "b64_data": base64.b64encode(f.raw_text.encode("utf-8")).decode("utf-8"),
79
91
  }
80
92
  )
@@ -90,6 +102,14 @@ async def run_code(
90
102
  cleaned_result = truncate_code_context({"cleaned": {"results": result}})["cleaned"]["results"]
91
103
  logger.info(f"Executed Code\n----\n{code}\n----\nResult\n----\n{cleaned_result}\n----")
92
104
  yield {query: {"code": code, "results": result}}
105
+ except asyncio.TimeoutError as e:
106
+ # Call the sandbox_url/stop GET API endpoint to stop the code sandbox
107
+ error = f"Failed to run code for {query} with Timeout error: {e}"
108
+ try:
109
+ await aiohttp.ClientSession().get(f"{sandbox_url}/stop", timeout=5)
110
+ except Exception as e:
111
+ error += f"\n\nFailed to stop code sandbox with error: {e}"
112
+ raise ValueError(error)
93
113
  except Exception as e:
94
114
  raise ValueError(f"Failed to run code for {query} with error: {e}")
95
115
 
@@ -114,6 +134,12 @@ async def generate_python_code(
114
134
  prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
115
135
  )
116
136
 
137
+ # add sandbox specific context like available packages
138
+ sandbox_context = (
139
+ prompts.e2b_sandbox_context if is_e2b_code_sandbox_enabled() else prompts.terrarium_sandbox_context
140
+ )
141
+ personality_context = f"{sandbox_context}\n{personality_context}"
142
+
117
143
  code_generation_prompt = prompts.python_code_generation_prompt.format(
118
144
  current_date=utc_date,
119
145
  query=q,
@@ -127,23 +153,50 @@ async def generate_python_code(
127
153
  response = await send_message_to_model_wrapper(
128
154
  code_generation_prompt,
129
155
  query_images=query_images,
130
- response_type="json_object",
131
156
  user=user,
132
157
  tracer=tracer,
133
158
  query_files=query_files,
134
159
  )
135
160
 
136
- # Validate that the response is a non-empty, JSON-serializable list
137
- response = load_complex_json(response)
138
- code = response.get("code", "").strip()
139
- input_files = response.get("input_files", [])
140
- input_links = response.get("input_links", [])
161
+ # Extract python code wrapped in markdown code blocks from the response
162
+ code_blocks = re.findall(r"```(?:python)?\n(.*?)\n```", response, re.DOTALL)
163
+
164
+ if not code_blocks:
165
+ raise ValueError("No Python code blocks found in response")
166
+
167
+ # Join multiple code blocks with newlines and strip any leading/trailing whitespace
168
+ code = "\n".join(code_blocks).strip()
141
169
 
142
170
  if not isinstance(code, str) or is_none_or_empty(code):
143
171
  raise ValueError
144
- return GeneratedCode(code, input_files, input_links)
145
172
 
173
+ # Infer user files required in sandbox based on user file paths mentioned in code
174
+ input_files: List[FileObject] = []
175
+ user_files = await sync_to_async(set)(FileObjectAdapters.get_all_file_objects(user))
176
+ for user_file in user_files:
177
+ if user_file.file_name in code:
178
+ # Replace references to full file path used in code with just the file basename to ease reference in sandbox
179
+ file_basename = os.path.basename(user_file.file_name)
180
+ code = code.replace(user_file.file_name, file_basename)
181
+ user_file.file_name = file_basename
182
+ input_files.append(user_file)
183
+
184
+ return GeneratedCode(code, input_files)
146
185
 
186
+
187
+ @retry(
188
+ retry=(
189
+ retry_if_exception_type(aiohttp.ClientError)
190
+ | retry_if_exception_type(aiohttp.ClientTimeout)
191
+ | retry_if_exception_type(asyncio.TimeoutError)
192
+ | retry_if_exception_type(ConnectionError)
193
+ | retry_if_exception_type(RemoteProtocolError)
194
+ ),
195
+ wait=wait_random_exponential(min=1, max=5),
196
+ stop=stop_after_attempt(3),
197
+ before_sleep=before_sleep_log(logger, logging.DEBUG),
198
+ reraise=True,
199
+ )
147
200
  async def execute_sandboxed_python(code: str, input_data: list[dict], sandbox_url: str = SANDBOX_URL) -> dict[str, Any]:
148
201
  """
149
202
  Takes code to run as a string and calls the terrarium API to execute it.
@@ -152,15 +205,104 @@ async def execute_sandboxed_python(code: str, input_data: list[dict], sandbox_ur
152
205
  Reference data i/o format based on Terrarium example client code at:
153
206
  https://github.com/cohere-ai/cohere-terrarium/blob/main/example-clients/python/terrarium_client.py
154
207
  """
155
- headers = {"Content-Type": "application/json"}
156
208
  cleaned_code = clean_code_python(code)
157
- data = {"code": cleaned_code, "files": input_data}
209
+ if is_e2b_code_sandbox_enabled():
210
+ try:
211
+ return await execute_e2b(cleaned_code, input_data)
212
+ except ImportError:
213
+ pass
214
+ return await execute_terrarium(cleaned_code, input_data, sandbox_url)
215
+
216
+
217
+ async def execute_e2b(code: str, input_files: list[dict]) -> dict[str, Any]:
218
+ """Execute code and handle file I/O in e2b sandbox"""
219
+ from e2b_code_interpreter import AsyncSandbox
220
+
221
+ sandbox = await AsyncSandbox.create(
222
+ api_key=os.getenv("E2B_API_KEY"),
223
+ template=os.getenv("E2B_TEMPLATE", DEFAULT_E2B_TEMPLATE),
224
+ timeout=120,
225
+ request_timeout=30,
226
+ )
158
227
 
228
+ try:
229
+ # Upload input files in parallel
230
+ upload_tasks = [
231
+ sandbox.files.write(path=file["filename"], data=base64.b64decode(file["b64_data"]), request_timeout=30)
232
+ for file in input_files
233
+ ]
234
+ await asyncio.gather(*upload_tasks)
235
+
236
+ # Note stored files before execution to identify new files created during execution
237
+ E2bFile = NamedTuple("E2bFile", [("name", str), ("path", str)])
238
+ original_files = {E2bFile(f.name, f.path) for f in await sandbox.files.list("~")}
239
+
240
+ # Execute code from main.py file
241
+ execution = await sandbox.run_code(code=code, timeout=60)
242
+
243
+ # Collect output files
244
+ output_files = []
245
+
246
+ # Identify new files created during execution
247
+ new_files = set(E2bFile(f.name, f.path) for f in await sandbox.files.list("~")) - original_files
248
+ # Read newly created files in parallel
249
+ download_tasks = [sandbox.files.read(f.path, request_timeout=30) for f in new_files]
250
+ downloaded_files = await asyncio.gather(*download_tasks)
251
+ for f, content in zip(new_files, downloaded_files):
252
+ if isinstance(content, bytes):
253
+ # Binary files like PNG - encode as base64
254
+ b64_data = base64.b64encode(content).decode("utf-8")
255
+ elif Path(f.name).suffix in [".png", ".jpeg", ".jpg", ".svg"]:
256
+ # Ignore image files as they are extracted from execution results below for inline display
257
+ continue
258
+ else:
259
+ # Text files - encode utf-8 string as base64
260
+ b64_data = base64.b64encode(content.encode("utf-8")).decode("utf-8")
261
+ output_files.append({"filename": f.name, "b64_data": b64_data})
262
+
263
+ # Collect output files from execution results
264
+ for idx, result in enumerate(execution.results):
265
+ for result_type in {"png", "jpeg", "svg", "text", "markdown", "json"}:
266
+ if b64_data := getattr(result, result_type, None):
267
+ output_files.append({"filename": f"{idx}.{result_type}", "b64_data": b64_data})
268
+ break
269
+
270
+ # collect logs
271
+ success = not execution.error and not execution.logs.stderr
272
+ stdout = "\n".join(execution.logs.stdout)
273
+ errors = "\n".join(execution.logs.stderr)
274
+ if execution.error:
275
+ errors = f"{execution.error}\n{errors}"
276
+
277
+ return {
278
+ "code": code,
279
+ "success": success,
280
+ "std_out": stdout,
281
+ "std_err": errors,
282
+ "output_files": output_files,
283
+ }
284
+ except Exception as e:
285
+ return {
286
+ "code": code,
287
+ "success": False,
288
+ "std_err": f"Sandbox failed to execute code: {str(e)}",
289
+ "output_files": [],
290
+ }
291
+
292
+
293
+ async def execute_terrarium(
294
+ code: str,
295
+ input_data: list[dict],
296
+ sandbox_url: str,
297
+ ) -> dict[str, Any]:
298
+ """Execute code using Terrarium sandbox"""
299
+ headers = {"Content-Type": "application/json"}
300
+ data = {"code": code, "files": input_data}
159
301
  async with aiohttp.ClientSession() as session:
160
- async with session.post(sandbox_url, json=data, headers=headers) as response:
302
+ async with session.post(sandbox_url, json=data, headers=headers, timeout=30) as response:
161
303
  if response.status == 200:
162
304
  result: dict[str, Any] = await response.json()
163
- result["code"] = cleaned_code
305
+ result["code"] = code
164
306
  # Store decoded output files
165
307
  result["output_files"] = result.get("output_files", [])
166
308
  for output_file in result["output_files"]:
@@ -172,7 +314,7 @@ async def execute_sandboxed_python(code: str, input_data: list[dict], sandbox_ur
172
314
  return result
173
315
  else:
174
316
  return {
175
- "code": cleaned_code,
317
+ "code": code,
176
318
  "success": False,
177
319
  "std_err": f"Failed to execute code with {response.status}",
178
320
  "output_files": [],
khoj/routers/helpers.py CHANGED
@@ -1125,6 +1125,7 @@ async def send_message_to_model_wrapper(
1125
1125
  query: str,
1126
1126
  system_message: str = "",
1127
1127
  response_type: str = "text",
1128
+ deepthought: bool = False,
1128
1129
  user: KhojUser = None,
1129
1130
  query_images: List[str] = None,
1130
1131
  context: str = "",
@@ -1227,6 +1228,7 @@ async def send_message_to_model_wrapper(
1227
1228
  api_key=api_key,
1228
1229
  model=chat_model_name,
1229
1230
  response_type=response_type,
1231
+ deepthought=deepthought,
1230
1232
  tracer=tracer,
1231
1233
  )
1232
1234
  elif model_type == ChatModel.ModelType.GOOGLE:
@@ -1425,11 +1427,13 @@ def generate_chat_response(
1425
1427
  )
1426
1428
 
1427
1429
  query_to_run = q
1430
+ deepthought = False
1428
1431
  if meta_research:
1429
1432
  query_to_run = f"<query>{q}</query>\n<collected_research>\n{meta_research}\n</collected_research>"
1430
1433
  compiled_references = []
1431
1434
  online_results = {}
1432
1435
  code_results = {}
1436
+ deepthought = True
1433
1437
 
1434
1438
  chat_model = ConversationAdapters.get_valid_chat_model(user, conversation, is_subscribed)
1435
1439
  vision_available = chat_model.vision_enabled
@@ -1513,6 +1517,7 @@ def generate_chat_response(
1513
1517
  generated_files=raw_generated_files,
1514
1518
  generated_asset_results=generated_asset_results,
1515
1519
  program_execution_context=program_execution_context,
1520
+ deepthought=deepthought,
1516
1521
  tracer=tracer,
1517
1522
  )
1518
1523
  elif chat_model.model_type == ChatModel.ModelType.GOOGLE:
khoj/routers/research.py CHANGED
@@ -95,6 +95,7 @@ async def apick_next_tool(
95
95
  query=query,
96
96
  context=function_planning_prompt,
97
97
  response_type="json_object",
98
+ deepthought=True,
98
99
  user=user,
99
100
  query_images=query_images,
100
101
  query_files=query_files,
khoj/utils/constants.py CHANGED
@@ -18,7 +18,7 @@ default_offline_chat_models = [
18
18
  "bartowski/Qwen2.5-14B-Instruct-GGUF",
19
19
  ]
20
20
  default_openai_chat_models = ["gpt-4o-mini", "gpt-4o"]
21
- default_gemini_chat_models = ["gemini-1.5-flash", "gemini-1.5-pro"]
21
+ default_gemini_chat_models = ["gemini-2.0-flash", "gemini-1.5-pro"]
22
22
  default_anthropic_chat_models = ["claude-3-5-sonnet-20241022", "claude-3-5-haiku-20241022"]
23
23
 
24
24
  empty_config = {
@@ -46,7 +46,11 @@ model_to_cost: Dict[str, Dict[str, float]] = {
46
46
  "gemini-1.5-flash-002": {"input": 0.075, "output": 0.30},
47
47
  "gemini-1.5-pro": {"input": 1.25, "output": 5.00},
48
48
  "gemini-1.5-pro-002": {"input": 1.25, "output": 5.00},
49
+ "gemini-2.0-flash": {"input": 0.10, "output": 0.40},
49
50
  # Anthropic Pricing: https://www.anthropic.com/pricing#anthropic-api_
50
- "claude-3-5-sonnet-20241022": {"input": 3.0, "output": 15.0},
51
51
  "claude-3-5-haiku-20241022": {"input": 1.0, "output": 5.0},
52
+ "claude-3-5-sonnet-20241022": {"input": 3.0, "output": 15.0},
53
+ "claude-3-5-sonnet-latest": {"input": 3.0, "output": 15.0},
54
+ "claude-3-7-sonnet-20250219": {"input": 3.0, "output": 15.0},
55
+ "claude-3-7-sonnet-latest": {"input": 3.0, "output": 15.0},
52
56
  }
khoj/utils/helpers.py CHANGED
@@ -321,6 +321,12 @@ def get_device() -> torch.device:
321
321
  return torch.device("cpu")
322
322
 
323
323
 
324
+ def is_e2b_code_sandbox_enabled():
325
+ """Check if E2B code sandbox is enabled.
326
+ Set E2B_API_KEY environment variable to use it."""
327
+ return not is_none_or_empty(os.getenv("E2B_API_KEY"))
328
+
329
+
324
330
  class ConversationCommand(str, Enum):
325
331
  Default = "default"
326
332
  General = "general"
@@ -362,20 +368,23 @@ command_descriptions_for_agent = {
362
368
  ConversationCommand.Code: "Agent can run Python code to parse information, run complex calculations, create documents and charts.",
363
369
  }
364
370
 
371
+ e2b_tool_description = "To run Python code in a E2B sandbox with no network access. Helpful to parse complex information, run calculations, create text documents and create charts with quantitative data. Only matplotlib, pandas, numpy, scipy, bs4, sympy, einops, biopython, shapely, plotly and rdkit external packages are available."
372
+ terrarium_tool_description = "To run Python code in a Terrarium, Pyodide sandbox with no network access. Helpful to parse complex information, run complex calculations, create plaintext documents and create charts with quantitative data. Only matplotlib, panda, numpy, scipy, bs4 and sympy external packages are available."
373
+
365
374
  tool_descriptions_for_llm = {
366
375
  ConversationCommand.Default: "To use a mix of your internal knowledge and the user's personal knowledge, or if you don't entirely understand the query.",
367
376
  ConversationCommand.General: "To use when you can answer the question without any outside information or personal knowledge",
368
377
  ConversationCommand.Notes: "To search the user's personal knowledge base. Especially helpful if the question expects context from the user's notes or documents.",
369
378
  ConversationCommand.Online: "To search for the latest, up-to-date information from the internet. Note: **Questions about Khoj should always use this data source**",
370
379
  ConversationCommand.Webpage: "To use if the user has directly provided the webpage urls or you are certain of the webpage urls to read.",
371
- ConversationCommand.Code: "To run Python code in a Pyodide sandbox with no network access. Helpful when need to parse complex information, run complex calculations, create plaintext documents, and create charts with quantitative data. Only matplotlib, panda, numpy, scipy, bs4 and sympy external packages are available.",
380
+ ConversationCommand.Code: e2b_tool_description if is_e2b_code_sandbox_enabled() else terrarium_tool_description,
372
381
  }
373
382
 
374
383
  function_calling_description_for_llm = {
375
384
  ConversationCommand.Notes: "To search the user's personal knowledge base. Especially helpful if the question expects context from the user's notes or documents.",
376
385
  ConversationCommand.Online: "To search the internet for information. Useful to get a quick, broad overview from the internet. Provide all relevant context to ensure new searches, not in previous iterations, are performed.",
377
386
  ConversationCommand.Webpage: "To extract information from webpages. Useful for more detailed research from the internet. Usually used when you know the webpage links to refer to. Share the webpage links and information to extract in your query.",
378
- ConversationCommand.Code: "To run Python code in a Pyodide sandbox with no network access. Helpful when need to parse complex information, run complex calculations, create plaintext documents, and create charts with quantitative data. Only matplotlib, panda, numpy, scipy, bs4 and sympy external packages are available.",
387
+ ConversationCommand.Code: e2b_tool_description if is_e2b_code_sandbox_enabled() else terrarium_tool_description,
379
388
  }
380
389
 
381
390
  mode_descriptions_for_llm = {
@@ -185,16 +185,18 @@ def initialization(interactive: bool = True):
185
185
  )
186
186
  provider_name = provider_name or model_type.name.capitalize()
187
187
 
188
- default_use_model = {True: "y", False: "n"}[default_api_key is not None]
189
-
190
- # If not in interactive mode & in the offline setting, it's most likely that we're running in a containerized environment. This usually means there's not enough RAM to load offline models directly within the application. In such cases, we default to not using the model -- it's recommended to use another service like Ollama to host the model locally in that case.
191
- default_use_model = {True: "n", False: default_use_model}[is_offline]
188
+ default_use_model = default_api_key is not None
189
+ # If not in interactive mode & in the offline setting, it's most likely that we're running in a containerized environment.
190
+ # This usually means there's not enough RAM to load offline models directly within the application.
191
+ # In such cases, we default to not using the model -- it's recommended to use another service like Ollama to host the model locally in that case.
192
+ if is_offline:
193
+ default_use_model = False
192
194
 
193
195
  use_model_provider = (
194
- default_use_model if not interactive else input(f"Add {provider_name} chat models? (y/n): ")
196
+ default_use_model if not interactive else input(f"Add {provider_name} chat models? (y/n): ") == "y"
195
197
  )
196
198
 
197
- if use_model_provider != "y":
199
+ if not use_model_provider:
198
200
  return False, None
199
201
 
200
202
  logger.info(f"️💬 Setting up your {provider_name} chat configuration")
@@ -303,4 +305,19 @@ def initialization(interactive: bool = True):
303
305
  logger.error(f"🚨 Failed to create chat configuration: {e}", exc_info=True)
304
306
  else:
305
307
  _update_chat_model_options()
306
- logger.info("🗣️ Chat model configuration updated")
308
+ logger.info("🗣️ Chat model options updated")
309
+
310
+ # Update the default chat model if it doesn't match
311
+ chat_config = ConversationAdapters.get_default_chat_model()
312
+ env_default_chat_model = os.getenv("KHOJ_DEFAULT_CHAT_MODEL")
313
+ if not chat_config or not env_default_chat_model:
314
+ return
315
+ if chat_config.name != env_default_chat_model:
316
+ chat_model = ConversationAdapters.get_chat_model_by_name(env_default_chat_model)
317
+ if not chat_model:
318
+ logger.error(
319
+ f"🚨 Not setting default chat model. Chat model {env_default_chat_model} not found in existing chat model options."
320
+ )
321
+ return
322
+ ConversationAdapters.set_default_chat_model(chat_model)
323
+ logger.info(f"🗣️ Default chat model set to {chat_model.name}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: khoj
3
- Version: 1.36.7.dev7
3
+ Version: 1.36.7.dev22
4
4
  Summary: Your Second Brain
5
5
  Project-URL: Homepage, https://khoj.dev
6
6
  Project-URL: Documentation, https://docs.khoj.dev
@@ -22,8 +22,8 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
22
  Classifier: Topic :: Scientific/Engineering :: Human Machine Interfaces
23
23
  Requires-Python: >=3.10
24
24
  Requires-Dist: aiohttp~=3.9.0
25
- Requires-Dist: anthropic==0.26.1
26
- Requires-Dist: anyio==3.7.1
25
+ Requires-Dist: anthropic==0.49.0
26
+ Requires-Dist: anyio~=4.8.0
27
27
  Requires-Dist: apscheduler~=3.10.0
28
28
  Requires-Dist: authlib==1.2.1
29
29
  Requires-Dist: beautifulsoup4~=4.12.3
@@ -35,11 +35,12 @@ Requires-Dist: django-phonenumber-field==7.3.0
35
35
  Requires-Dist: django-unfold==0.42.0
36
36
  Requires-Dist: django==5.0.10
37
37
  Requires-Dist: docx2txt==0.8
38
+ Requires-Dist: e2b-code-interpreter~=1.0.0
38
39
  Requires-Dist: einops==0.8.0
39
40
  Requires-Dist: email-validator==2.2.0
40
41
  Requires-Dist: fastapi>=0.110.0
41
- Requires-Dist: google-generativeai==0.8.3
42
- Requires-Dist: httpx==0.25.0
42
+ Requires-Dist: google-genai==1.5.0
43
+ Requires-Dist: httpx==0.28.1
43
44
  Requires-Dist: huggingface-hub>=0.22.2
44
45
  Requires-Dist: itsdangerous==2.1.2
45
46
  Requires-Dist: jinja2==3.1.5
@@ -76,7 +77,7 @@ Requires-Dist: torch==2.2.2
76
77
  Requires-Dist: transformers>=4.28.0
77
78
  Requires-Dist: tzdata==2023.3
78
79
  Requires-Dist: uvicorn==0.30.6
79
- Requires-Dist: websockets==12.0
80
+ Requires-Dist: websockets==13.0
80
81
  Provides-Extra: dev
81
82
  Requires-Dist: black>=23.1.0; extra == 'dev'
82
83
  Requires-Dist: boto3>=1.34.57; extra == 'dev'