PyPI - khoj - Versions diffs - 1.36.7.dev66__py3-none-any.whl → 1.37.1__py3-none-any.whl - Mend

khoj 1.36.7.dev66py3-none-any.whl → 1.37.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

khoj/processor/conversation/google/gemini_chat.py CHANGED Viewed

@@ -34,7 +34,7 @@ def extract_questions_gemini(
     model: Optional[str] = "gemini-2.0-flash",
     conversation_log={},
     api_key=None,
-    temperature=0.6,
+    api_base_url=None,
     max_tokens=None,
     location_data: LocationData = None,
     user: KhojUser = None,
@@ -97,7 +97,12 @@ def extract_questions_gemini(
     messages.append(ChatMessage(content=system_prompt, role="system"))
     response = gemini_send_message_to_model(
-        messages, api_key, model, response_type="json_object", temperature=temperature, tracer=tracer
+        messages,
+        api_key,
+        model,
+        api_base_url=api_base_url,
+        response_type="json_object",
+        tracer=tracer,
     )
     # Extract, Clean Message from Gemini's Response
@@ -120,8 +125,9 @@ def gemini_send_message_to_model(
     messages,
     api_key,
     model,
+    api_base_url=None,
     response_type="text",
-    temperature=0.6,
+    response_schema=None,
     model_kwargs=None,
     tracer={},
 ):
@@ -135,6 +141,7 @@ def gemini_send_message_to_model(
     # This caused unwanted behavior and terminates response early for gemini 1.5 series. Monitor for flakiness with 2.0 series.
     if response_type == "json_object" and model in ["gemini-2.0-flash"]:
         model_kwargs["response_mime_type"] = "application/json"
+        model_kwargs["response_schema"] = response_schema
     # Get Response from Gemini
     return gemini_completion_with_backoff(
@@ -142,7 +149,7 @@ def gemini_send_message_to_model(
         system_prompt=system_prompt,
         model_name=model,
         api_key=api_key,
-        temperature=temperature,
+        api_base_url=api_base_url,
         model_kwargs=model_kwargs,
         tracer=tracer,
     )
@@ -156,7 +163,8 @@ def converse_gemini(
     conversation_log={},
     model: Optional[str] = "gemini-2.0-flash",
     api_key: Optional[str] = None,
-    temperature: float = 0.6,
+    api_base_url: Optional[str] = None,
+    temperature: float = 0.4,
     completion_func=None,
     conversation_commands=[ConversationCommand.Default],
     max_prompt_size=None,
@@ -247,6 +255,7 @@ def converse_gemini(
         model_name=model,
         temperature=temperature,
         api_key=api_key,
+        api_base_url=api_base_url,
         system_prompt=system_prompt,
         completion_func=completion_func,
         tracer=tracer,

khoj/processor/conversation/google/utils.py CHANGED Viewed

@@ -1,7 +1,9 @@
 import logging
+import os
 import random
 from copy import deepcopy
 from threading import Thread
+from typing import Dict
 from google import genai
 from google.genai import errors as gerrors
@@ -18,9 +20,11 @@ from tenacity import (
 from khoj.processor.conversation.utils import (
     ThreadedGenerator,
     commit_conversation_trace,
+    get_image_from_base64,
     get_image_from_url,
 )
 from khoj.utils.helpers import (
+    get_ai_api_info,
     get_chat_usage_metrics,
     is_none_or_empty,
     is_promptrace_enabled,
@@ -28,6 +32,7 @@ from khoj.utils.helpers import (
 logger = logging.getLogger(__name__)
+gemini_clients: Dict[str, genai.Client] = {}
 MAX_OUTPUT_TOKENS_GEMINI = 8192
 SAFETY_SETTINGS = [
@@ -50,6 +55,17 @@ SAFETY_SETTINGS = [
 ]
+def get_gemini_client(api_key, api_base_url=None) -> genai.Client:
+    api_info = get_ai_api_info(api_key, api_base_url)
+    return genai.Client(
+        location=api_info.region,
+        project=api_info.project,
+        credentials=api_info.credentials,
+        api_key=api_info.api_key,
+        vertexai=api_info.api_key is None,
+    )
 @retry(
     wait=wait_random_exponential(min=1, max=10),
     stop=stop_after_attempt(2),
@@ -57,14 +73,22 @@ SAFETY_SETTINGS = [
     reraise=True,
 )
 def gemini_completion_with_backoff(
-    messages, system_prompt, model_name, temperature=0, api_key=None, model_kwargs=None, tracer={}
+    messages, system_prompt, model_name, temperature=0.8, api_key=None, api_base_url=None, model_kwargs=None, tracer={}
 ) -> str:
-    client = genai.Client(api_key=api_key)
+    client = gemini_clients.get(api_key)
+    if not client:
+        client = get_gemini_client(api_key, api_base_url)
+        gemini_clients[api_key] = client
+    seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
     config = gtypes.GenerateContentConfig(
         system_instruction=system_prompt,
         temperature=temperature,
         max_output_tokens=MAX_OUTPUT_TOKENS_GEMINI,
         safety_settings=SAFETY_SETTINGS,
+        response_mime_type=model_kwargs.get("response_mime_type", "text/plain") if model_kwargs else "text/plain",
+        response_schema=model_kwargs.get("response_schema", None) if model_kwargs else None,
+        seed=seed,
     )
     formatted_messages = [gtypes.Content(role=message.role, parts=message.content) for message in messages]
@@ -109,6 +133,7 @@ def gemini_chat_completion_with_backoff(
     model_name,
     temperature,
     api_key,
+    api_base_url,
     system_prompt,
     completion_func=None,
     model_kwargs=None,
@@ -117,23 +142,37 @@ def gemini_chat_completion_with_backoff(
     g = ThreadedGenerator(compiled_references, online_results, completion_func=completion_func)
     t = Thread(
         target=gemini_llm_thread,
-        args=(g, messages, system_prompt, model_name, temperature, api_key, model_kwargs, tracer),
+        args=(g, messages, system_prompt, model_name, temperature, api_key, api_base_url, model_kwargs, tracer),
     )
     t.start()
     return g
 def gemini_llm_thread(
-    g, messages, system_prompt, model_name, temperature, api_key, model_kwargs=None, tracer: dict = {}
+    g,
+    messages,
+    system_prompt,
+    model_name,
+    temperature,
+    api_key,
+    api_base_url=None,
+    model_kwargs=None,
+    tracer: dict = {},
 ):
     try:
-        client = genai.Client(api_key=api_key)
+        client = gemini_clients.get(api_key)
+        if not client:
+            client = get_gemini_client(api_key, api_base_url)
+            gemini_clients[api_key] = client
+        seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
         config = gtypes.GenerateContentConfig(
             system_instruction=system_prompt,
             temperature=temperature,
             max_output_tokens=MAX_OUTPUT_TOKENS_GEMINI,
             stop_sequences=["Notes:\n["],
             safety_settings=SAFETY_SETTINGS,
+            seed=seed,
         )
         aggregated_response = ""
@@ -243,7 +282,11 @@ def format_messages_for_gemini(
             message_content = []
             for item in sorted(message.content, key=lambda x: 0 if x["type"] == "image_url" else 1):
                 if item["type"] == "image_url":
-                    image = get_image_from_url(item["image_url"]["url"], type="bytes")
+                    image_data = item["image_url"]["url"]
+                    if image_data.startswith("http"):
+                        image = get_image_from_url(image_data, type="bytes")
+                    else:
+                        image = get_image_from_base64(image_data, type="bytes")
                     message_content += [gtypes.Part.from_bytes(data=image.content, mime_type=image.type)]
                 else:
                     message_content += [gtypes.Part.from_text(text=item.get("text", ""))]

khoj/processor/conversation/openai/gpt.py CHANGED Viewed

@@ -10,8 +10,10 @@ from khoj.processor.conversation import prompts
 from khoj.processor.conversation.openai.utils import (
     chat_completion_with_backoff,
     completion_with_backoff,
+    get_openai_api_json_support,
 )
 from khoj.processor.conversation.utils import (
+    JsonSupport,
     clean_json,
     construct_structured_message,
     generate_chatml_messages_with_context,
@@ -61,7 +63,6 @@ def extract_questions(
     today = datetime.today()
     current_new_year = today.replace(month=1, day=1)
     last_new_year = current_new_year.replace(year=today.year - 1)
-    temperature = 0.7
     prompt = prompts.extract_questions.format(
         current_date=today.strftime("%Y-%m-%d"),
@@ -97,7 +98,6 @@ def extract_questions(
         model,
         response_type="json_object",
         api_base_url=api_base_url,
-        temperature=temperature,
         tracer=tracer,
     )
@@ -119,20 +119,32 @@ def extract_questions(
 def send_message_to_model(
-    messages, api_key, model, response_type="text", api_base_url=None, temperature=0, tracer: dict = {}
+    messages,
+    api_key,
+    model,
+    response_type="text",
+    response_schema=None,
+    api_base_url=None,
+    tracer: dict = {},
 ):
     """
     Send message to model
     """
+    model_kwargs = {}
+    json_support = get_openai_api_json_support(model, api_base_url)
+    if response_schema and json_support == JsonSupport.SCHEMA:
+        model_kwargs["response_format"] = response_schema
+    elif response_type == "json_object" and json_support == JsonSupport.OBJECT:
+        model_kwargs["response_format"] = {"type": response_type}
     # Get Response from GPT
     return completion_with_backoff(
         messages=messages,
         model_name=model,
         openai_api_key=api_key,
-        temperature=temperature,
         api_base_url=api_base_url,
-        model_kwargs={"response_format": {"type": response_type}},
+        model_kwargs=model_kwargs,
         tracer=tracer,
     )
@@ -146,7 +158,7 @@ def converse_openai(
     model: str = "gpt-4o-mini",
     api_key: Optional[str] = None,
     api_base_url: Optional[str] = None,
-    temperature: float = 0.2,
+    temperature: float = 0.4,
     completion_func=None,
     conversation_commands=[ConversationCommand.Default],
     max_prompt_size=None,

khoj/processor/conversation/openai/utils.py CHANGED Viewed

@@ -2,6 +2,7 @@ import logging
 import os
 from threading import Thread
 from typing import Dict, List
+from urllib.parse import urlparse
 import openai
 from openai.types.chat.chat_completion import ChatCompletion
@@ -16,6 +17,7 @@ from tenacity import (
 )
 from khoj.processor.conversation.utils import (
+    JsonSupport,
     ThreadedGenerator,
     commit_conversation_trace,
 )
@@ -46,59 +48,43 @@ openai_clients: Dict[str, openai.OpenAI] = {}
 def completion_with_backoff(
     messages,
     model_name: str,
-    temperature=0,
+    temperature=0.8,
     openai_api_key=None,
     api_base_url=None,
     model_kwargs: dict = {},
     tracer: dict = {},
 ) -> str:
     client_key = f"{openai_api_key}--{api_base_url}"
-    client: openai.OpenAI | None = openai_clients.get(client_key)
+    client = openai_clients.get(client_key)
     if not client:
         client = get_openai_client(openai_api_key, api_base_url)
         openai_clients[client_key] = client
     formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
-    # Update request parameters for compatability with o1 model series
-    # Refer: https://platform.openai.com/docs/guides/reasoning/beta-limitations
-    stream = True
-    model_kwargs["stream_options"] = {"include_usage": True}
-    if model_name == "o1":
-        temperature = 1
-        stream = False
-        model_kwargs.pop("stream_options", None)
-    elif model_name.startswith("o1"):
-        temperature = 1
-        model_kwargs.pop("response_format", None)
-    elif model_name.startswith("o3-"):
+    # Tune reasoning models arguments
+    if model_name.startswith("o1") or model_name.startswith("o3"):
         temperature = 1
+        model_kwargs["reasoning_effort"] = "medium"
+    model_kwargs["stream_options"] = {"include_usage": True}
     if os.getenv("KHOJ_LLM_SEED"):
         model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
-    chat: ChatCompletion | openai.Stream[ChatCompletionChunk] = client.chat.completions.create(
+    aggregated_response = ""
+    with client.beta.chat.completions.stream(
         messages=formatted_messages,  # type: ignore
-        model=model_name,  # type: ignore
-        stream=stream,
+        model=model_name,
         temperature=temperature,
         timeout=20,
         **model_kwargs,
-    )
-    aggregated_response = ""
-    if not stream:
-        chunk = chat
-        aggregated_response = chunk.choices[0].message.content
-    else:
+    ) as chat:
         for chunk in chat:
-            if len(chunk.choices) == 0:
+            if chunk.type == "error":
+                logger.error(f"Openai api response error: {chunk.error}", exc_info=True)
                 continue
-            delta_chunk = chunk.choices[0].delta  # type: ignore
-            if isinstance(delta_chunk, str):
-                aggregated_response += delta_chunk
-            elif delta_chunk.content:
-                aggregated_response += delta_chunk.content
+            elif chunk.type == "content.delta":
+                aggregated_response += chunk.delta
     # Calculate cost of chat
     input_tokens = chunk.usage.prompt_tokens if hasattr(chunk, "usage") and chunk.usage else 0
@@ -164,28 +150,20 @@ def llm_thread(
 ):
     try:
         client_key = f"{openai_api_key}--{api_base_url}"
-        if client_key in openai_clients:
-            client = openai_clients[client_key]
-        else:
+        client = openai_clients.get(client_key)
+        if not client:
             client = get_openai_client(openai_api_key, api_base_url)
             openai_clients[client_key] = client
         formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
-        # Update request parameters for compatability with o1 model series
-        # Refer: https://platform.openai.com/docs/guides/reasoning/beta-limitations
-        stream = True
-        model_kwargs["stream_options"] = {"include_usage": True}
-        if model_name == "o1":
+        # Tune reasoning models arguments
+        if model_name.startswith("o1"):
             temperature = 1
-            stream = False
-            model_kwargs.pop("stream_options", None)
-        elif model_name.startswith("o1-"):
+        elif model_name.startswith("o3"):
             temperature = 1
-            model_kwargs.pop("response_format", None)
-        elif model_name.startswith("o3-"):
-            temperature = 1
-            # Get the first system message and add the string `Formatting re-enabled` to it. See https://platform.openai.com/docs/guides/reasoning-best-practices
+            # Get the first system message and add the string `Formatting re-enabled` to it.
+            # See https://platform.openai.com/docs/guides/reasoning-best-practices
             if len(formatted_messages) > 0:
                 system_messages = [
                     (i, message) for i, message in enumerate(formatted_messages) if message["role"] == "system"
@@ -195,7 +173,6 @@ def llm_thread(
                     formatted_messages[first_system_message_index][
                         "content"
                     ] = f"{first_system_message} Formatting re-enabled"
         elif model_name.startswith("deepseek-reasoner"):
             # Two successive messages cannot be from the same role. Should merge any back-to-back messages from the same role.
             # The first message should always be a user message (except system message).
@@ -210,6 +187,8 @@ def llm_thread(
             formatted_messages = updated_messages
+        stream = True
+        model_kwargs["stream_options"] = {"include_usage": True}
         if os.getenv("KHOJ_LLM_SEED"):
             model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
@@ -258,3 +237,15 @@ def llm_thread(
         logger.error(f"Error in llm_thread: {e}", exc_info=True)
     finally:
         g.close()
+def get_openai_api_json_support(model_name: str, api_base_url: str = None) -> JsonSupport:
+    if model_name.startswith("deepseek-reasoner"):
+        return JsonSupport.NONE
+    if api_base_url:
+        host = urlparse(api_base_url).hostname
+        if host and host.endswith(".ai.azure.com"):
+            return JsonSupport.OBJECT
+        if host == "api.deepinfra.com":
+            return JsonSupport.OBJECT
+    return JsonSupport.SCHEMA

khoj/processor/conversation/utils.py CHANGED Viewed

@@ -345,8 +345,7 @@ def construct_structured_message(
             constructed_messages.append({"type": "text", "text": attached_file_context})
         if vision_enabled and images:
             for image in images:
-                if image.startswith("https://"):
-                    constructed_messages.append({"type": "image_url", "image_url": {"url": image}})
+                constructed_messages.append({"type": "image_url", "image_url": {"url": image}})
         return constructed_messages
     if not is_none_or_empty(attached_file_context):
@@ -664,6 +663,23 @@ class ImageWithType:
     type: str
+def get_image_from_base64(image: str, type="b64"):
+    # Extract image type and base64 data from inline image data
+    image_base64 = image.split(",", 1)[1]
+    image_type = image.split(";", 1)[0].split(":", 1)[1]
+    # Convert image to desired format
+    if type == "b64":
+        return ImageWithType(content=image_base64, type=image_type)
+    elif type == "pil":
+        image_data = base64.b64decode(image_base64)
+        image_pil = PIL.Image.open(BytesIO(image_data))
+        return ImageWithType(content=image_pil, type=image_type)
+    elif type == "bytes":
+        image_data = base64.b64decode(image_base64)
+        return ImageWithType(content=image_data, type=image_type)
 def get_image_from_url(image_url: str, type="pil"):
     try:
         response = requests.get(image_url)
@@ -878,3 +894,9 @@ def messages_to_print(messages: list[ChatMessage], max_length: int = 70) -> str:
             return str(content)
     return "\n".join([f"{json.dumps(safe_serialize(message.content))[:max_length]}..." for message in messages])
+class JsonSupport(int, Enum):
+    NONE = 0
+    OBJECT = 1
+    SCHEMA = 2

khoj/processor/image/generate.py CHANGED Viewed

@@ -12,7 +12,7 @@ from google.genai import types as gtypes
 from khoj.database.adapters import ConversationAdapters
 from khoj.database.models import Agent, KhojUser, TextToImageModelConfig
 from khoj.routers.helpers import ChatEvent, generate_better_image_prompt
-from khoj.routers.storage import upload_image
+from khoj.routers.storage import upload_generated_image_to_bucket
 from khoj.utils import state
 from khoj.utils.helpers import convert_image_to_webp, timer
 from khoj.utils.rawconfig import LocationData
@@ -118,7 +118,7 @@ async def text_to_image(
     # Decide how to store the generated image
     with timer("Upload image to S3", logger):
-        image_url = upload_image(webp_image_bytes, user.uuid)
+        image_url = upload_generated_image_to_bucket(webp_image_bytes, user.uuid)
     if not image_url:
         image = f"data:image/webp;base64,{base64.b64encode(webp_image_bytes).decode('utf-8')}"

khoj/processor/tools/run_code.py CHANGED Viewed

@@ -257,7 +257,7 @@ async def execute_e2b(code: str, input_files: list[dict]) -> dict[str, Any]:
                 continue
             else:
                 # Text files - encode utf-8 string as base64
-                b64_data = base64.b64encode(content.encode("utf-8")).decode("utf-8")
+                b64_data = content
             output_files.append({"filename": f.name, "b64_data": b64_data})
         # Collect output files from execution results

khoj/routers/api.py CHANGED Viewed

@@ -463,12 +463,14 @@ async def extract_references_and_questions(
             )
         elif chat_model.model_type == ChatModel.ModelType.ANTHROPIC:
             api_key = chat_model.ai_model_api.api_key
+            api_base_url = chat_model.ai_model_api.api_base_url
             chat_model_name = chat_model.name
             inferred_queries = extract_questions_anthropic(
                 defiltered_query,
                 query_images=query_images,
                 model=chat_model_name,
                 api_key=api_key,
+                api_base_url=api_base_url,
                 conversation_log=meta_log,
                 location_data=location_data,
                 user=user,
@@ -479,12 +481,14 @@ async def extract_references_and_questions(
             )
         elif chat_model.model_type == ChatModel.ModelType.GOOGLE:
             api_key = chat_model.ai_model_api.api_key
+            api_base_url = chat_model.ai_model_api.api_base_url
             chat_model_name = chat_model.name
             inferred_queries = extract_questions_gemini(
                 defiltered_query,
                 query_images=query_images,
                 model=chat_model_name,
                 api_key=api_key,
+                api_base_url=api_base_url,
                 conversation_log=meta_log,
                 location_data=location_data,
                 max_tokens=chat_model.max_prompt_size,

khoj/routers/api_chat.py CHANGED Viewed

@@ -64,7 +64,7 @@ from khoj.routers.research import (
     InformationCollectionIteration,
     execute_information_collection,
 )
-from khoj.routers.storage import upload_image_to_bucket
+from khoj.routers.storage import upload_user_image_to_bucket
 from khoj.utils import state
 from khoj.utils.helpers import (
     AsyncIteratorWrapper,
@@ -674,9 +674,11 @@ async def chat(
                 base64_data = decoded_string.split(",", 1)[1]
                 image_bytes = base64.b64decode(base64_data)
                 webp_image_bytes = convert_image_to_webp(image_bytes)
-                uploaded_image = upload_image_to_bucket(webp_image_bytes, request.user.object.id)
-                if uploaded_image:
-                    uploaded_images.append(uploaded_image)
+                uploaded_image = upload_user_image_to_bucket(webp_image_bytes, request.user.object.id)
+                if not uploaded_image:
+                    base64_webp_image = base64.b64encode(webp_image_bytes).decode("utf-8")
+                    uploaded_image = f"data:image/webp;base64,{base64_webp_image}"
+                uploaded_images.append(uploaded_image)
         query_files: Dict[str, str] = {}
         if raw_query_files:

khoj/routers/auth.py CHANGED Viewed

@@ -43,12 +43,9 @@ class MagicLinkForm(BaseModel):
 if not state.anonymous_mode:
     missing_requirements = []
     from authlib.integrations.starlette_client import OAuth, OAuthError
+    from google.auth.transport import requests as google_requests
+    from google.oauth2 import id_token
-    try:
-        from google.auth.transport import requests as google_requests
-        from google.oauth2 import id_token
-    except ImportError:
-        missing_requirements += ["Install the Khoj production package with `pip install khoj[prod]`"]
     if not os.environ.get("RESEND_API_KEY") and (
         not os.environ.get("GOOGLE_CLIENT_ID") or not os.environ.get("GOOGLE_CLIENT_SECRET")
     ):

khoj 1.36.7.dev66__py3-none-any.whl → 1.37.1__py3-none-any.whl

khoj 1.36.7.dev66py3-none-any.whl → 1.37.1py3-none-any.whl