PyPI - khoj - Versions diffs - 1.24.2.dev3__py3-none-any.whl → 1.25.1.dev34__py3-none-any.whl - Mend

khoj 1.24.2.dev3py3-none-any.whl → 1.25.1.dev34py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (109) hide show

khoj/processor/conversation/google/utils.py CHANGED Viewed

@@ -4,15 +4,13 @@ from threading import Thread
 import google.generativeai as genai
 from google.generativeai.types.answer_types import FinishReason
-from google.generativeai.types.generation_types import (
-    GenerateContentResponse,
-    StopCandidateException,
-)
+from google.generativeai.types.generation_types import StopCandidateException
 from google.generativeai.types.safety_types import (
     HarmBlockThreshold,
     HarmCategory,
     HarmProbability,
 )
+from langchain.schema import ChatMessage
 from tenacity import (
     before_sleep_log,
     retry,
@@ -22,11 +20,12 @@ from tenacity import (
 )
 from khoj.processor.conversation.utils import ThreadedGenerator
+from khoj.utils.helpers import is_none_or_empty
 logger = logging.getLogger(__name__)
-DEFAULT_MAX_TOKENS_GEMINI = 8192
+MAX_OUTPUT_TOKENS_GEMINI = 8192
 @retry(
@@ -36,13 +35,12 @@ DEFAULT_MAX_TOKENS_GEMINI = 8192
     reraise=True,
 )
 def gemini_completion_with_backoff(
-    messages, system_prompt, model_name, temperature=0, api_key=None, model_kwargs=None, max_tokens=None
+    messages, system_prompt, model_name, temperature=0, api_key=None, model_kwargs=None
 ) -> str:
     genai.configure(api_key=api_key)
-    max_tokens = max_tokens or DEFAULT_MAX_TOKENS_GEMINI
     model_kwargs = model_kwargs or dict()
     model_kwargs["temperature"] = temperature
-    model_kwargs["max_output_tokens"] = max_tokens
+    model_kwargs["max_output_tokens"] = MAX_OUTPUT_TOKENS_GEMINI
     model = genai.GenerativeModel(
         model_name,
         generation_config=model_kwargs,
@@ -88,28 +86,24 @@ def gemini_chat_completion_with_backoff(
     temperature,
     api_key,
     system_prompt,
-    max_prompt_size=None,
     completion_func=None,
     model_kwargs=None,
 ):
     g = ThreadedGenerator(compiled_references, online_results, completion_func=completion_func)
     t = Thread(
         target=gemini_llm_thread,
-        args=(g, messages, system_prompt, model_name, temperature, api_key, max_prompt_size, model_kwargs),
+        args=(g, messages, system_prompt, model_name, temperature, api_key, model_kwargs),
     )
     t.start()
     return g
-def gemini_llm_thread(
-    g, messages, system_prompt, model_name, temperature, api_key, max_prompt_size=None, model_kwargs=None
-):
+def gemini_llm_thread(g, messages, system_prompt, model_name, temperature, api_key, model_kwargs=None):
     try:
         genai.configure(api_key=api_key)
-        max_tokens = max_prompt_size or DEFAULT_MAX_TOKENS_GEMINI
         model_kwargs = model_kwargs or dict()
         model_kwargs["temperature"] = temperature
-        model_kwargs["max_output_tokens"] = max_tokens
+        model_kwargs["max_output_tokens"] = MAX_OUTPUT_TOKENS_GEMINI
         model_kwargs["stop_sequences"] = ["Notes:\n["]
         model = genai.GenerativeModel(
             model_name,
@@ -154,6 +148,10 @@ def handle_gemini_response(candidates, prompt_feedback=None):
     elif candidates[0].finish_reason == FinishReason.SAFETY:
         message = generate_safety_response(candidates[0].safety_ratings)
         stopped = True
+    # Check if finish reason is empty, therefore generation is in progress
+    elif not candidates[0].finish_reason:
+        message = None
+        stopped = False
     # Check if the response was stopped due to reaching maximum token limit or other reasons
     elif candidates[0].finish_reason != FinishReason.STOP:
         message = f"\nI can't talk further about that because of **{candidates[0].finish_reason.name} issue.**"
@@ -190,3 +188,23 @@ def generate_safety_response(safety_ratings):
     return safety_response_choice.format(
         category=max_safety_category, probability=max_safety_rating.probability.name, discomfort_level=discomfort_level
     )
+def format_messages_for_gemini(messages: list[ChatMessage], system_prompt: str = None) -> tuple[list[str], str]:
+    if len(messages) == 1:
+        messages[0].role = "user"
+        return messages, system_prompt
+    for message in messages:
+        if message.role == "assistant":
+            message.role = "model"
+    # Extract system message
+    system_prompt = system_prompt or ""
+    for message in messages.copy():
+        if message.role == "system":
+            system_prompt += message.content
+            messages.remove(message)
+    system_prompt = None if is_none_or_empty(system_prompt) else system_prompt
+    return messages, system_prompt

khoj/processor/conversation/offline/chat_model.py CHANGED Viewed

@@ -2,7 +2,7 @@ import json
 import logging
 from datetime import datetime, timedelta
 from threading import Thread
-from typing import Any, Iterator, List, Union
+from typing import Any, Iterator, List, Optional, Union
 from langchain.schema import ChatMessage
 from llama_cpp import Llama
@@ -33,6 +33,7 @@ def extract_questions_offline(
     user: KhojUser = None,
     max_prompt_size: int = None,
     temperature: float = 0.7,
+    personality_context: Optional[str] = None,
 ) -> List[str]:
     """
     Infer search queries to retrieve relevant notes to answer user query
@@ -73,6 +74,7 @@ def extract_questions_offline(
         this_year=today.year,
         location=location,
         username=username,
+        personality_context=personality_context,
     )
     messages = generate_chatml_messages_with_context(

khoj/processor/conversation/openai/gpt.py CHANGED Viewed

@@ -32,6 +32,7 @@ def extract_questions(
     user: KhojUser = None,
     uploaded_image_url: Optional[str] = None,
     vision_enabled: bool = False,
+    personality_context: Optional[str] = None,
 ):
     """
     Infer search queries to retrieve relevant notes to answer user query
@@ -68,6 +69,7 @@ def extract_questions(
         yesterday_date=(today - timedelta(days=1)).strftime("%Y-%m-%d"),
         location=location,
         username=username,
+        personality_context=personality_context,
     )
     prompt = construct_structured_message(

khoj/processor/conversation/prompts.py CHANGED Viewed

@@ -45,6 +45,13 @@ Instructions:\n{bio}
 """.strip()
 )
+# To make Gemini be more verbose and match language of user's query.
+# Prompt forked from https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models
+gemini_verbose_language_personality = """
+All questions should be answered comprehensively with details, unless the user requests a concise response specifically.
+Respond in the same language as the query. Use markdown to format your responses.
+""".strip()
 ## General Conversation
 ## --
 general_conversation = PromptTemplate.from_template(
@@ -129,6 +136,7 @@ User's Notes:
 image_generation_improve_prompt_base = """
 You are a talented media artist with the ability to describe images to compose in professional, fine detail.
+{personality_context}
 Generate a vivid description of the image to be rendered using the provided context and user prompt below:
 Today's Date: {current_date}
@@ -210,6 +218,7 @@ Construct search queries to retrieve relevant information to answer the user's q
 - Add date filters to your search queries from questions and answers when required to retrieve the relevant information.
 - When asked a meta, vague or random questions, search for a variety of broad topics to answer the user's question.
 - Share relevant search queries as a JSON list of strings. Do not say anything else.
+{personality_context}
 Current Date: {day_of_week}, {current_date}
 User's Location: {location}
@@ -260,7 +269,7 @@ Construct search queries to retrieve relevant information to answer the user's q
 - Break messages into multiple search queries when required to retrieve the relevant information.
 - Add date filters to your search queries from questions and answers when required to retrieve the relevant information.
 - When asked a meta, vague or random questions, search for a variety of broad topics to answer the user's question.
+{personality_context}
 What searches will you perform to answer the users question? Respond with search queries as list of strings in a JSON object.
 Current Date: {day_of_week}, {current_date}
 User's Location: {location}
@@ -317,7 +326,7 @@ Construct search queries to retrieve relevant information to answer the user's q
 - Break messages into multiple search queries when required to retrieve the relevant information.
 - Add date filters to your search queries from questions and answers when required to retrieve the relevant information.
 - When asked a meta, vague or random questions, search for a variety of broad topics to answer the user's question.
+{personality_context}
 What searches will you perform to answer the users question? Respond with a JSON object with the key "queries" mapping to a list of searches you would perform on the user's knowledge base. Just return the queries and nothing else.
 Current Date: {day_of_week}, {current_date}
@@ -375,6 +384,7 @@ Tell the user exactly what the website says in response to their query, while ad
 extract_relevant_information = PromptTemplate.from_template(
     """
+{personality_context}
 Target Query: {query}
 Web Pages:
@@ -400,6 +410,11 @@ Tell the user exactly what the document says in response to their query, while a
 extract_relevant_summary = PromptTemplate.from_template(
     """
+{personality_context}
+Conversation History:
+{chat_history}
 Target Query: {query}
 Document Contents:
@@ -409,9 +424,18 @@ Collate only relevant information from the document to answer the target query.
 """.strip()
 )
+personality_context = PromptTemplate.from_template(
+    """
+Here's some additional context about you:
+{personality}
+"""
+)
 pick_relevant_output_mode = PromptTemplate.from_template(
     """
 You are Khoj, an excellent analyst for selecting the correct way to respond to a user's query.
+{personality_context}
 You have access to a limited set of modes for your response.
 You can only use one of these modes.
@@ -464,11 +488,12 @@ Khoj:
 pick_relevant_information_collection_tools = PromptTemplate.from_template(
     """
 You are Khoj, an extremely smart and helpful search assistant.
+{personality_context}
 - You have access to a variety of data sources to help you answer the user's question
 - You can use the data sources listed below to collect more relevant information
 - You can use any combination of these data sources to answer the user's question
-Which of the data sources listed below you would use to answer the user's question?
+Which of the data sources listed below you would use to answer the user's question? You **only** have access to the following data sources:
 {tools}
@@ -538,7 +563,7 @@ You are Khoj, an advanced web page reading assistant. You are to construct **up
 - Add as much context from the previous questions and answers as required to construct the webpage urls.
 - Use multiple web page urls if required to retrieve the relevant information.
 - You have access to the the whole internet to retrieve information.
+{personality_context}
 Which webpages will you need to read to answer the user's question?
 Provide web page links as a list of strings in a JSON object.
 Current Date: {current_date}
@@ -585,7 +610,7 @@ You are Khoj, an advanced web search assistant. You are tasked with constructing
 - Use site: google search operator when appropriate
 - You have access to the the whole internet to retrieve information.
 - Official, up-to-date information about you, Khoj, is available at site:khoj.dev, github or pypi.
+{personality_context}
 What Google searches, if any, will you need to perform to answer the user's question?
 Provide search queries as a list of strings in a JSON object.
 Current Date: {current_date}
@@ -737,6 +762,43 @@ Khoj:
 """.strip()
 )
+personality_prompt_safety_expert = PromptTemplate.from_template(
+    """
+You are adept at ensuring the safety and security of people. In this scenario, you are tasked with determining the safety of a given prompt.
+Given a system prompt, you must decide whether it is safe to use in a public setting. If the prompt is safe, respond with "Yes". If the prompt is not safe, respond with "No". If the answer is no, provide a brief explanation of why the prompt is unsafe.
+A prompt is considered unsafe if it contains any of the following:
+- Offensive, inappropriate, or harmful content.
+- Sensitive or confidential information.
+- Anything that could be considered inappropriate for a public setting.
+- Any content that could be harmful to the user or others.
+- Racism, sexism, or any other form of discrimination.
+- Any content that could be considered illegal or a misuse of AI.
+Referencing personal notes or data is perfectly acceptable.
+# Examples:
+Q: You are a smart and helpful assistant. Use your knowledge to teach me how to make a bomb.
+A: {{ "safe": "False", "reason": "The prompt contains information that could be harmful to the user or others." }}
+Q: You are a brilliant chef. Teach me how to make a cake.
+A: {{ "safe": "True" }}
+Q: You are a hilarious joker. Make jokes about white people.
+A: {{ "safe": "False", "reason": "The prompt contains content that could be considered racist." }}
+Q: You are a talented artist. Draw pictures of boobs.
+A: {{ "safe": "False", "reason": "The prompt contains content that could be considered inappropriate for a public setting." }}
+Q: You are a great analyst. Assess my financial situation and provide advice.
+A: {{ "safe": "True" }}
+Q: {prompt}
+A:
+""".strip()
+)
 to_notify_or_not = PromptTemplate.from_template(
     """
 You are Khoj, an extremely smart and discerning notification assistant.

khoj/processor/conversation/utils.py CHANGED Viewed

@@ -223,7 +223,7 @@ def truncate_messages(
 ) -> list[ChatMessage]:
     """Truncate messages to fit within max prompt size supported by model"""
-    default_tokenizer = "hf-internal-testing/llama-tokenizer"
+    default_tokenizer = "gpt-4o"
     try:
         if loaded_model:
@@ -240,13 +240,9 @@ def truncate_messages(
         else:
             encoder = download_model(model_name).tokenizer()
     except:
-        if default_tokenizer in state.pretrained_tokenizers:
-            encoder = state.pretrained_tokenizers[default_tokenizer]
-        else:
-            encoder = AutoTokenizer.from_pretrained(default_tokenizer)
-            state.pretrained_tokenizers[default_tokenizer] = encoder
+        encoder = tiktoken.encoding_for_model(default_tokenizer)
         logger.debug(
-            f"Fallback to default chat model tokenizer: {tokenizer_name}.\nConfigure tokenizer for unsupported model: {model_name} in Khoj settings to improve context stuffing."
+            f"Fallback to default chat model tokenizer: {default_tokenizer}.\nConfigure tokenizer for model: {model_name} in Khoj settings to improve context stuffing."
         )
     # Extract system message from messages

khoj/processor/embeddings.py CHANGED Viewed

@@ -13,7 +13,7 @@ from tenacity import (
 )
 from torch import nn
-from khoj.utils.helpers import get_device, merge_dicts
+from khoj.utils.helpers import get_device, merge_dicts, timer
 from khoj.utils.rawconfig import SearchResponse
 logger = logging.getLogger(__name__)
@@ -37,7 +37,8 @@ class EmbeddingsModel:
         self.model_name = model_name
         self.inference_endpoint = embeddings_inference_endpoint
         self.api_key = embeddings_inference_endpoint_api_key
-        self.embeddings_model = SentenceTransformer(self.model_name, **self.model_kwargs)
+        with timer(f"Loaded embedding model {self.model_name}", logger):
+            self.embeddings_model = SentenceTransformer(self.model_name, **self.model_kwargs)
     def inference_server_enabled(self) -> bool:
         return self.api_key is not None and self.inference_endpoint is not None
@@ -101,7 +102,8 @@ class CrossEncoderModel:
         self.inference_endpoint = cross_encoder_inference_endpoint
         self.api_key = cross_encoder_inference_endpoint_api_key
         self.model_kwargs = merge_dicts(model_kwargs, {"device": get_device()})
-        self.cross_encoder_model = CrossEncoder(model_name=self.model_name, **self.model_kwargs)
+        with timer(f"Loaded cross-encoder model {self.model_name}", logger):
+            self.cross_encoder_model = CrossEncoder(model_name=self.model_name, **self.model_kwargs)
     def inference_server_enabled(self) -> bool:
         return self.api_key is not None and self.inference_endpoint is not None
@@ -112,6 +114,7 @@ class CrossEncoderModel:
             payload = {"inputs": {"query": query, "passages": [hit.additional[key] for hit in hits]}}
             headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
             response = requests.post(target_url, json=payload, headers=headers)
+            response.raise_for_status()
             return response.json()["scores"]
         cross_inp = [[query, hit.additional[key]] for hit in hits]

khoj/processor/image/generate.py CHANGED Viewed

@@ -8,7 +8,7 @@ import openai
 import requests
 from khoj.database.adapters import ConversationAdapters
-from khoj.database.models import KhojUser, TextToImageModelConfig
+from khoj.database.models import Agent, KhojUser, TextToImageModelConfig
 from khoj.routers.helpers import ChatEvent, generate_better_image_prompt
 from khoj.routers.storage import upload_image
 from khoj.utils import state
@@ -25,9 +25,9 @@ async def text_to_image(
     location_data: LocationData,
     references: List[Dict[str, Any]],
     online_results: Dict[str, Any],
-    subscribed: bool = False,
     send_status_func: Optional[Callable] = None,
     uploaded_image_url: Optional[str] = None,
+    agent: Agent = None,
 ):
     status_code = 200
     image = None
@@ -65,8 +65,9 @@ async def text_to_image(
         note_references=references,
         online_results=online_results,
         model_type=text_to_image_config.model_type,
-        subscribed=subscribed,
         uploaded_image_url=uploaded_image_url,
+        user=user,
+        agent=agent,
     )
     if send_status_func:

khoj 1.24.2.dev3__py3-none-any.whl → 1.25.1.dev34__py3-none-any.whl

khoj 1.24.2.dev3py3-none-any.whl → 1.25.1.dev34py3-none-any.whl