khoj 1.42.3.dev1__py3-none-any.whl → 1.42.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. khoj/database/models/__init__.py +3 -3
  2. khoj/interface/compiled/404/index.html +2 -2
  3. khoj/interface/compiled/_next/static/chunks/2327-916342b58294de9c.js +1 -0
  4. khoj/interface/compiled/_next/static/chunks/7127-a6dc754bce8b6855.js +1 -0
  5. khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
  6. khoj/interface/compiled/_next/static/chunks/app/agents/{page-e18e67cff45758c8.js → page-2fac1d5ac7192e73.js} +1 -1
  7. khoj/interface/compiled/_next/static/chunks/app/automations/{page-1c2280ae9678b4ce.js → page-ef89ac958e78aa81.js} +1 -1
  8. khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
  9. khoj/interface/compiled/_next/static/chunks/app/chat/page-0b31c505ddbff52d.js +1 -0
  10. khoj/interface/compiled/_next/static/chunks/app/{page-a4b97dd0c2a70cfb.js → page-45ae5e99e8a61821.js} +1 -1
  11. khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +1 -0
  12. khoj/interface/compiled/_next/static/chunks/app/search/{page-44072d929427ee56.js → page-afb5e7ed13d221c1.js} +1 -1
  13. khoj/interface/compiled/_next/static/chunks/app/settings/{page-4e8fdd30a3238357.js → page-8fb6cc97be8774a7.js} +1 -1
  14. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +1 -0
  15. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-27560c92db5fc2d9.js → page-da90c78180a86040.js} +1 -1
  16. khoj/interface/compiled/_next/static/chunks/{webpack-b61b76223325589e.js → webpack-1c900156837baf90.js} +1 -1
  17. khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +1 -0
  18. khoj/interface/compiled/_next/static/css/821d0d60b0b6871d.css +1 -0
  19. khoj/interface/compiled/_next/static/css/{e1bf03aa79521f86.css → 9a460202d29476e5.css} +1 -1
  20. khoj/interface/compiled/agents/index.html +2 -2
  21. khoj/interface/compiled/agents/index.txt +2 -2
  22. khoj/interface/compiled/automations/index.html +2 -2
  23. khoj/interface/compiled/automations/index.txt +3 -3
  24. khoj/interface/compiled/chat/index.html +2 -2
  25. khoj/interface/compiled/chat/index.txt +2 -2
  26. khoj/interface/compiled/index.html +2 -2
  27. khoj/interface/compiled/index.txt +2 -2
  28. khoj/interface/compiled/search/index.html +2 -2
  29. khoj/interface/compiled/search/index.txt +2 -2
  30. khoj/interface/compiled/settings/index.html +2 -2
  31. khoj/interface/compiled/settings/index.txt +4 -4
  32. khoj/interface/compiled/share/chat/index.html +2 -2
  33. khoj/interface/compiled/share/chat/index.txt +2 -2
  34. khoj/processor/conversation/openai/utils.py +115 -42
  35. khoj/processor/conversation/utils.py +16 -15
  36. khoj/processor/image/generate.py +3 -3
  37. khoj/routers/api_agents.py +1 -1
  38. khoj/routers/api_chat.py +6 -2
  39. khoj/utils/constants.py +7 -6
  40. {khoj-1.42.3.dev1.dist-info → khoj-1.42.4.dist-info}/METADATA +2 -2
  41. {khoj-1.42.3.dev1.dist-info → khoj-1.42.4.dist-info}/RECORD +52 -52
  42. khoj/interface/compiled/_next/static/chunks/2327-f03b2a77f67b8f8c.js +0 -1
  43. khoj/interface/compiled/_next/static/chunks/7127-79a3af5138960272.js +0 -1
  44. khoj/interface/compiled/_next/static/chunks/app/agents/layout-e49165209d2e406c.js +0 -1
  45. khoj/interface/compiled/_next/static/chunks/app/chat/layout-d5ae861e1ade9d08.js +0 -1
  46. khoj/interface/compiled/_next/static/chunks/app/chat/page-2714aec91bd8f3ea.js +0 -1
  47. khoj/interface/compiled/_next/static/chunks/app/search/layout-f5881c7ae3ba0795.js +0 -1
  48. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-64a53f8ec4afa6b3.js +0 -1
  49. khoj/interface/compiled/_next/static/css/1e9b757ee2a2b34b.css +0 -1
  50. khoj/interface/compiled/_next/static/css/ee66643a6a5bf71c.css +0 -1
  51. /khoj/interface/compiled/_next/static/{tl-RqF8W5lpwWPPHYumnV → Cv2JBC6ve4VZFIpF82jO8}/_buildManifest.js +0 -0
  52. /khoj/interface/compiled/_next/static/{tl-RqF8W5lpwWPPHYumnV → Cv2JBC6ve4VZFIpF82jO8}/_ssgManifest.js +0 -0
  53. /khoj/interface/compiled/_next/static/chunks/{1915-1943ee8a628b893c.js → 1915-ab4353eaca76f690.js} +0 -0
  54. /khoj/interface/compiled/_next/static/chunks/{2117-056a00add390772b.js → 2117-3537ef9986be74d3.js} +0 -0
  55. /khoj/interface/compiled/_next/static/chunks/{4363-e6ac2203564d1a3b.js → 4363-4efaf12abe696251.js} +0 -0
  56. /khoj/interface/compiled/_next/static/chunks/{4447-e038b251d626c340.js → 4447-5d44807c40355b1a.js} +0 -0
  57. /khoj/interface/compiled/_next/static/chunks/{8667-8136f74e9a086fca.js → 8667-adbe6017a66cef10.js} +0 -0
  58. /khoj/interface/compiled/_next/static/chunks/{9259-640fdd77408475df.js → 9259-d8bcd9da9e80c81e.js} +0 -0
  59. {khoj-1.42.3.dev1.dist-info → khoj-1.42.4.dist-info}/WHEEL +0 -0
  60. {khoj-1.42.3.dev1.dist-info → khoj-1.42.4.dist-info}/entry_points.txt +0 -0
  61. {khoj-1.42.3.dev1.dist-info → khoj-1.42.4.dist-info}/licenses/LICENSE +0 -0
@@ -14,6 +14,7 @@ from openai.lib.streaming.chat import (
14
14
  ChatCompletionStreamEvent,
15
15
  ContentDeltaEvent,
16
16
  )
17
+ from openai.types.chat.chat_completion import ChatCompletion
17
18
  from openai.types.chat.chat_completion_chunk import (
18
19
  ChatCompletionChunk,
19
20
  Choice,
@@ -78,7 +79,11 @@ def completion_with_backoff(
78
79
  client = get_openai_client(openai_api_key, api_base_url)
79
80
  openai_clients[client_key] = client
80
81
 
82
+ stream = not is_non_streaming_model(model_name, api_base_url)
81
83
  stream_processor = default_stream_processor
84
+ if stream:
85
+ model_kwargs["stream_options"] = {"include_usage": True}
86
+
82
87
  formatted_messages = format_message_for_api(messages, api_base_url)
83
88
 
84
89
  # Tune reasoning models arguments
@@ -105,27 +110,37 @@ def completion_with_backoff(
105
110
  stream_processor = partial(in_stream_thought_processor, thought_tag="think")
106
111
  # Reasoning is enabled by default. Disable when deepthought is False.
107
112
  # See https://qwenlm.github.io/blog/qwen3/#advanced-usages
108
- if not deepthought and len(formatted_messages) > 0:
109
- formatted_messages[-1]["content"] = formatted_messages[-1]["content"] + " /no_think"
113
+ if not deepthought:
114
+ add_qwen_no_think_tag(formatted_messages)
110
115
 
111
116
  read_timeout = 300 if is_local_api(api_base_url) else 60
112
- model_kwargs["stream_options"] = {"include_usage": True}
113
117
  if os.getenv("KHOJ_LLM_SEED"):
114
118
  model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
115
119
 
116
120
  aggregated_response = ""
117
- with client.beta.chat.completions.stream(
118
- messages=formatted_messages, # type: ignore
119
- model=model_name,
120
- temperature=temperature,
121
- timeout=httpx.Timeout(30, read=read_timeout),
122
- **model_kwargs,
123
- ) as chat:
124
- for chunk in stream_processor(chat):
125
- if chunk.type == "content.delta":
126
- aggregated_response += chunk.delta
127
- elif chunk.type == "thought.delta":
128
- pass
121
+ if stream:
122
+ with client.beta.chat.completions.stream(
123
+ messages=formatted_messages, # type: ignore
124
+ model=model_name,
125
+ temperature=temperature,
126
+ timeout=httpx.Timeout(30, read=read_timeout),
127
+ **model_kwargs,
128
+ ) as chat:
129
+ for chunk in stream_processor(chat):
130
+ if chunk.type == "content.delta":
131
+ aggregated_response += chunk.delta
132
+ elif chunk.type == "thought.delta":
133
+ pass
134
+ else:
135
+ # Non-streaming chat completion
136
+ chunk = client.beta.chat.completions.parse(
137
+ messages=formatted_messages, # type: ignore
138
+ model=model_name,
139
+ temperature=temperature,
140
+ timeout=httpx.Timeout(30, read=read_timeout),
141
+ **model_kwargs,
142
+ )
143
+ aggregated_response = chunk.choices[0].message.content
129
144
 
130
145
  # Calculate cost of chat
131
146
  input_tokens = chunk.usage.prompt_tokens if hasattr(chunk, "usage") and chunk.usage else 0
@@ -182,7 +197,11 @@ async def chat_completion_with_backoff(
182
197
  client = get_openai_async_client(openai_api_key, api_base_url)
183
198
  openai_async_clients[client_key] = client
184
199
 
200
+ stream = not is_non_streaming_model(model_name, api_base_url)
185
201
  stream_processor = adefault_stream_processor
202
+ if stream:
203
+ model_kwargs["stream_options"] = {"include_usage": True}
204
+
186
205
  formatted_messages = format_message_for_api(messages, api_base_url)
187
206
 
188
207
  # Configure thinking for openai reasoning models
@@ -225,12 +244,10 @@ async def chat_completion_with_backoff(
225
244
  stream_processor = partial(ain_stream_thought_processor, thought_tag="think")
226
245
  # Reasoning is enabled by default. Disable when deepthought is False.
227
246
  # See https://qwenlm.github.io/blog/qwen3/#advanced-usages
228
- if not deepthought and len(formatted_messages) > 0:
229
- formatted_messages[-1]["content"] = formatted_messages[-1]["content"] + " /no_think"
247
+ if not deepthought:
248
+ add_qwen_no_think_tag(formatted_messages)
230
249
 
231
- stream = True
232
250
  read_timeout = 300 if is_local_api(api_base_url) else 60
233
- model_kwargs["stream_options"] = {"include_usage": True}
234
251
  if os.getenv("KHOJ_LLM_SEED"):
235
252
  model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
236
253
 
@@ -238,7 +255,7 @@ async def chat_completion_with_backoff(
238
255
  final_chunk = None
239
256
  response_started = False
240
257
  start_time = perf_counter()
241
- chat_stream: openai.AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(
258
+ response: openai.AsyncStream[ChatCompletionChunk] | ChatCompletion = await client.chat.completions.create(
242
259
  messages=formatted_messages, # type: ignore
243
260
  model=model_name,
244
261
  stream=stream,
@@ -246,26 +263,34 @@ async def chat_completion_with_backoff(
246
263
  timeout=httpx.Timeout(30, read=read_timeout),
247
264
  **model_kwargs,
248
265
  )
249
- async for chunk in stream_processor(chat_stream):
250
- # Log the time taken to start response
251
- if not response_started:
252
- response_started = True
253
- logger.info(f"First response took: {perf_counter() - start_time:.3f} seconds")
254
- # Keep track of the last chunk for usage data
255
- final_chunk = chunk
256
- # Skip empty chunks
257
- if len(chunk.choices) == 0:
258
- continue
259
- # Handle streamed response chunk
260
- response_chunk: ResponseWithThought = None
261
- response_delta = chunk.choices[0].delta
262
- if response_delta.content:
263
- response_chunk = ResponseWithThought(response=response_delta.content)
264
- aggregated_response += response_chunk.response
265
- elif response_delta.thought:
266
- response_chunk = ResponseWithThought(thought=response_delta.thought)
267
- if response_chunk:
268
- yield response_chunk
266
+ if not stream:
267
+ # If not streaming, we can return the response directly
268
+ if len(response.choices) == 0 or not response.choices[0].message:
269
+ raise ValueError("No response by model.")
270
+ aggregated_response = response.choices[0].message.content
271
+ final_chunk = response
272
+ yield ResponseWithThought(response=aggregated_response)
273
+ else:
274
+ async for chunk in stream_processor(response):
275
+ # Log the time taken to start response
276
+ if not response_started:
277
+ response_started = True
278
+ logger.info(f"First response took: {perf_counter() - start_time:.3f} seconds")
279
+ # Keep track of the last chunk for usage data
280
+ final_chunk = chunk
281
+ # Skip empty chunks
282
+ if len(chunk.choices) == 0:
283
+ continue
284
+ # Handle streamed response chunk
285
+ response_chunk: ResponseWithThought = None
286
+ response_delta = chunk.choices[0].delta
287
+ if response_delta.content:
288
+ response_chunk = ResponseWithThought(response=response_delta.content)
289
+ aggregated_response += response_chunk.response
290
+ elif response_delta.thought:
291
+ response_chunk = ResponseWithThought(thought=response_delta.thought)
292
+ if response_chunk:
293
+ yield response_chunk
269
294
 
270
295
  # Calculate cost of chat after stream finishes
271
296
  input_tokens, output_tokens, cost = 0, 0, 0
@@ -312,11 +337,29 @@ def format_message_for_api(messages: List[ChatMessage], api_base_url: str) -> Li
312
337
  """
313
338
  formatted_messages = []
314
339
  for message in deepcopy(messages):
315
- # Convert images to PNG format if message to be sent to non OpenAI API
316
340
  if isinstance(message.content, list) and not is_openai_api(api_base_url):
317
- for part in message.content:
341
+ assistant_texts = []
342
+ has_images = False
343
+ for idx, part in enumerate(message.content):
344
+ # Convert images to PNG format if message to be sent to non OpenAI API
318
345
  if part.get("type") == "image_url":
346
+ has_images = True
319
347
  part["image_url"]["url"] = convert_image_data_uri(part["image_url"]["url"], target_format="png")
348
+ # Deepinfra API does not support text content list in assistant messages
349
+ # So we merge text content list into a single text string
350
+ if (
351
+ part.get("type") == "text"
352
+ and message.role == "assistant"
353
+ and api_base_url.startswith("https://api.deepinfra.com/v1")
354
+ ):
355
+ assistant_texts += [part["text"]]
356
+ message.content.pop(idx)
357
+ if assistant_texts:
358
+ assistant_texts_str = "\n\n".join(assistant_texts)
359
+ if has_images:
360
+ message.content += [{"type": "text", "text": assistant_texts_str}]
361
+ else:
362
+ message.content = assistant_texts_str
320
363
  formatted_messages.append({"role": message.role, "content": message.content})
321
364
 
322
365
  return formatted_messages
@@ -336,6 +379,14 @@ def is_openai_reasoning_model(model_name: str, api_base_url: str = None) -> bool
336
379
  return model_name.startswith("o") and is_openai_api(api_base_url)
337
380
 
338
381
 
382
+ def is_non_streaming_model(model_name: str, api_base_url: str = None) -> bool:
383
+ """
384
+ Check if model response should not be streamed.
385
+ """
386
+ # Some OpenAI models requires biometrics to stream. Avoid streaming their responses.
387
+ return model_name in ["o3", "o3-pro"] and is_openai_api(api_base_url)
388
+
389
+
339
390
  def is_twitter_reasoning_model(model_name: str, api_base_url: str = None) -> bool:
340
391
  """
341
392
  Check if the model is a Twitter reasoning model
@@ -627,3 +678,25 @@ async def ain_stream_thought_processor(
627
678
  elif mode == "message":
628
679
  chunk.choices[0].delta.content = buf
629
680
  yield chunk
681
+
682
+
683
+ def add_qwen_no_think_tag(formatted_messages: List[dict]) -> None:
684
+ """
685
+ Add /no_think tag to the last message content if it is a user message.
686
+ This is used to disable reasoning in Qwen models when deepthought is False.
687
+ """
688
+ if len(formatted_messages) > 0 and formatted_messages[-1]["role"] == "user":
689
+ last_message = formatted_messages[-1]
690
+ if isinstance(last_message["content"], str):
691
+ # Append /no_think to the last message content
692
+ formatted_messages[-1]["content"] = last_message["content"] + " /no_think"
693
+ elif isinstance(last_message["content"], list) and len(last_message["content"]) > 0:
694
+ # Append /no_think to the last content part
695
+ if isinstance(last_message["content"][-1], str):
696
+ last_message["content"][-1] = last_message["content"][-1] + " /no_think"
697
+ else:
698
+ # Find last content part of type text and append /no_think to "text" part
699
+ for content_part in reversed(last_message["content"]):
700
+ if isinstance(content_part, dict) and content_part.get("type") == "text":
701
+ content_part["text"] += " /no_think"
702
+ break
@@ -62,14 +62,15 @@ model_to_prompt_size = {
62
62
  "gpt-4.1": 60000,
63
63
  "gpt-4.1-mini": 120000,
64
64
  "gpt-4.1-nano": 120000,
65
- "o1": 20000,
66
- "o3": 30000,
67
- "o1-mini": 60000,
68
- "o3-mini": 60000,
69
- "o4-mini": 60000,
65
+ "o1-mini": 90000,
66
+ "o1": 30000,
67
+ "o3-mini": 90000,
68
+ "o3": 60000,
69
+ "o3-pro": 30000,
70
+ "o4-mini": 90000,
70
71
  # Google Models
71
- "gemini-2.5-flash-preview-04-17": 120000,
72
- "gemini-2.5-pro-preview-03-25": 60000,
72
+ "gemini-2.5-flash-preview-05-20": 120000,
73
+ "gemini-2.5-pro-preview-06-05": 60000,
73
74
  "gemini-2.0-flash": 120000,
74
75
  "gemini-2.0-flash-lite": 120000,
75
76
  "gemini-1.5-flash": 120000,
@@ -186,7 +187,7 @@ def construct_iteration_history(
186
187
  iteration_history.append(
187
188
  ChatMessageModel(
188
189
  by="khoj",
189
- intent={"type": "remember", "query": query},
190
+ intent=Intent(type="remember", query=query),
190
191
  message=previous_iteration_messages,
191
192
  )
192
193
  )
@@ -196,16 +197,16 @@ def construct_iteration_history(
196
197
  def construct_chat_history(chat_history: list[ChatMessageModel], n: int = 4, agent_name="AI") -> str:
197
198
  chat_history_str = ""
198
199
  for chat in chat_history[-n:]:
199
- if chat.by == "khoj" and chat.intent.type in ["remember", "reminder", "summarize"]:
200
- if chat.intent.inferred_queries:
201
- chat_history_str += f'{agent_name}: {{"queries": {chat.intent.inferred_queries}}}\n'
200
+ intent_type = chat.intent.type if chat.intent and chat.intent.type else ""
201
+ inferred_queries = chat.intent.inferred_queries if chat.intent else None
202
+ if chat.by == "khoj" and intent_type in ["remember", "reminder", "summarize"]:
203
+ if inferred_queries:
204
+ chat_history_str += f'{agent_name}: {{"queries": {inferred_queries}}}\n'
202
205
  chat_history_str += f"{agent_name}: {chat.message}\n\n"
203
206
  elif chat.by == "khoj" and chat.images:
204
- chat_history_str += f"User: {chat.intent.query}\n"
205
207
  chat_history_str += f"{agent_name}: [generated image redacted for space]\n"
206
- elif chat.by == "khoj" and ("excalidraw" in chat.intent.type):
207
- chat_history_str += f"User: {chat.intent.query}\n"
208
- chat_history_str += f"{agent_name}: {chat.intent.inferred_queries[0]}\n"
208
+ elif chat.by == "khoj" and ("excalidraw" in intent_type):
209
+ chat_history_str += f"{agent_name}: {inferred_queries[0]}\n"
209
210
  elif chat.by == "you":
210
211
  chat_history_str += f"User: {chat.message}\n"
211
212
  raw_query_files = chat.queryFiles
@@ -53,11 +53,11 @@ async def text_to_image(
53
53
  text2image_model = text_to_image_config.model_name
54
54
  chat_history_str = ""
55
55
  for chat in chat_history[-4:]:
56
- if chat.by == "khoj" and chat.intent and chat.intent.type in ["remember", "reminder"]:
57
- chat_history_str += f"Q: {chat.intent.query or ''}\n"
56
+ if chat.by == "you":
57
+ chat_history_str += f"Q: {chat.message}\n"
58
+ elif chat.by == "khoj" and chat.intent and chat.intent.type in ["remember", "reminder"]:
58
59
  chat_history_str += f"A: {chat.message}\n"
59
60
  elif chat.by == "khoj" and chat.images:
60
- chat_history_str += f"Q: {chat.intent.query}\n"
61
61
  chat_history_str += f"A: Improved Prompt: {chat.intent.inferred_queries[0]}\n"
62
62
 
63
63
  if send_status_func:
@@ -62,7 +62,7 @@ async def all_agents(
62
62
  for agent in agents:
63
63
  files = agent.fileobject_set.all()
64
64
  file_names = [file.file_name for file in files]
65
- agent_chat_model = await AgentAdapters.aget_agent_chat_model(default_agent, user)
65
+ agent_chat_model = await AgentAdapters.aget_agent_chat_model(agent, user)
66
66
  agent_packet = {
67
67
  "slug": agent.slug,
68
68
  "name": agent.name,
khoj/routers/api_chat.py CHANGED
@@ -960,7 +960,11 @@ async def chat(
960
960
  online_results = {key: val.model_dump() for key, val in last_message.onlineContext.items() or []}
961
961
  code_results = {key: val.model_dump() for key, val in last_message.codeContext.items() or []}
962
962
  compiled_references = [ref.model_dump() for ref in last_message.context or []]
963
- research_results = [ResearchIteration(**iter_dict) for iter_dict in last_message.researchContext or []]
963
+ research_results = [
964
+ ResearchIteration(**iter_dict)
965
+ for iter_dict in last_message.researchContext or []
966
+ if iter_dict.get("summarizedResult")
967
+ ]
964
968
  operator_results = [OperatorRun(**iter_dict) for iter_dict in last_message.operatorContext or []]
965
969
  train_of_thought = [thought.model_dump() for thought in last_message.trainOfThought or []]
966
970
  # Drop the interrupted message from conversation history
@@ -1011,7 +1015,7 @@ async def chat(
1011
1015
  user=user,
1012
1016
  query=defiltered_query,
1013
1017
  conversation_id=conversation_id,
1014
- conversation_history=conversation.messages,
1018
+ conversation_history=chat_history,
1015
1019
  previous_iterations=list(research_results),
1016
1020
  query_images=uploaded_images,
1017
1021
  agent=agent,
khoj/utils/constants.py CHANGED
@@ -17,8 +17,8 @@ default_offline_chat_models = [
17
17
  "bartowski/gemma-2-2b-it-GGUF",
18
18
  "bartowski/Qwen2.5-14B-Instruct-GGUF",
19
19
  ]
20
- default_openai_chat_models = ["gpt-4o-mini", "gpt-4.1"]
21
- default_gemini_chat_models = ["gemini-2.0-flash", "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-05-06"]
20
+ default_openai_chat_models = ["gpt-4o-mini", "gpt-4.1", "o3", "o4-mini"]
21
+ default_gemini_chat_models = ["gemini-2.0-flash", "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-06-05"]
22
22
  default_anthropic_chat_models = ["claude-sonnet-4-0", "claude-3-5-haiku-latest"]
23
23
 
24
24
  empty_config = {
@@ -41,10 +41,11 @@ model_to_cost: Dict[str, Dict[str, float]] = {
41
41
  "gpt-4.1": {"input": 2.00, "output": 8.00},
42
42
  "gpt-4.1-mini": {"input": 0.40, "output": 1.60},
43
43
  "gpt-4.1-nano": {"input": 0.10, "output": 0.40},
44
- "o1": {"input": 15.0, "output": 60.00},
45
- "o3": {"input": 10.0, "output": 40.00},
46
44
  "o1-mini": {"input": 3.0, "output": 12.0},
45
+ "o1": {"input": 15.0, "output": 60.00},
47
46
  "o3-mini": {"input": 1.10, "output": 4.40},
47
+ "o3": {"input": 2.0, "output": 8.00},
48
+ "o3-pro": {"input": 20.0, "output": 80.00},
48
49
  "o4-mini": {"input": 1.10, "output": 4.40},
49
50
  # Gemini Pricing: https://ai.google.dev/pricing
50
51
  "gemini-1.5-flash": {"input": 0.075, "output": 0.30},
@@ -53,8 +54,8 @@ model_to_cost: Dict[str, Dict[str, float]] = {
53
54
  "gemini-1.5-pro-002": {"input": 1.25, "output": 5.00},
54
55
  "gemini-2.0-flash": {"input": 0.10, "output": 0.40},
55
56
  "gemini-2.0-flash-lite": {"input": 0.0075, "output": 0.30},
56
- "gemini-2.5-flash-preview-04-17": {"input": 0.15, "output": 0.60, "thought": 3.50},
57
- "gemini-2.5-pro-preview-03-25": {"input": 1.25, "output": 10.0},
57
+ "gemini-2.5-flash-preview-05-20": {"input": 0.15, "output": 0.60, "thought": 3.50},
58
+ "gemini-2.5-pro-preview-06-05": {"input": 1.25, "output": 10.0},
58
59
  # Anthropic Pricing: https://www.anthropic.com/pricing#anthropic-api
59
60
  "claude-3-5-haiku-20241022": {"input": 1.0, "output": 5.0, "cache_read": 0.08, "cache_write": 1.0},
60
61
  "claude-3-5-haiku@20241022": {"input": 1.0, "output": 5.0, "cache_read": 0.08, "cache_write": 1.0},
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: khoj
3
- Version: 1.42.3.dev1
3
+ Version: 1.42.4
4
4
  Summary: Your Second Brain
5
5
  Project-URL: Homepage, https://khoj.dev
6
6
  Project-URL: Documentation, https://docs.khoj.dev
@@ -53,7 +53,7 @@ Requires-Dist: magika~=0.5.1
53
53
  Requires-Dist: markdown-it-py~=3.0.0
54
54
  Requires-Dist: markdownify~=0.11.6
55
55
  Requires-Dist: openai-whisper>=20231117
56
- Requires-Dist: openai>=1.0.0
56
+ Requires-Dist: openai>=1.86.0
57
57
  Requires-Dist: pgvector==0.2.4
58
58
  Requires-Dist: phonenumbers==8.13.27
59
59
  Requires-Dist: pillow~=10.0.0