PyPI - khoj - Versions diffs - 1.30.2.dev11__py3-none-any.whl → 1.30.2.dev23__py3-none-any.whl - Mend

khoj 1.30.2.dev11py3-none-any.whl → 1.30.2.dev23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

khoj/processor/conversation/google/utils.py CHANGED Viewed

@@ -25,7 +25,7 @@ from khoj.processor.conversation.utils import (
     get_image_from_url,
 )
 from khoj.utils import state
-from khoj.utils.helpers import in_debug_mode, is_none_or_empty
+from khoj.utils.helpers import get_chat_usage_metrics, in_debug_mode, is_none_or_empty
 logger = logging.getLogger(__name__)
@@ -68,6 +68,7 @@ def gemini_completion_with_backoff(
         response = chat_session.send_message(formatted_messages[-1]["parts"])
         response_text = response.text
     except StopCandidateException as e:
+        response = None
         response_text, _ = handle_gemini_response(e.args)
         # Respond with reason for stopping
         logger.warning(
@@ -75,6 +76,11 @@ def gemini_completion_with_backoff(
             + f"Last Message by {messages[-1].role}: {messages[-1].content}"
         )
+    # Aggregate cost of chat
+    input_tokens = response.usage_metadata.prompt_token_count if response else 0
+    output_tokens = response.usage_metadata.candidates_token_count if response else 0
+    tracer["usage"] = get_chat_usage_metrics(model_name, input_tokens, output_tokens, tracer.get("usage"))
     # Save conversation trace
     tracer["chat_model"] = model_name
     tracer["temperature"] = temperature
@@ -146,6 +152,11 @@ def gemini_llm_thread(
             if stopped:
                 raise StopCandidateException(message)
+        # Calculate cost of chat
+        input_tokens = chunk.usage_metadata.prompt_token_count
+        output_tokens = chunk.usage_metadata.candidates_token_count
+        tracer["usage"] = get_chat_usage_metrics(model_name, input_tokens, output_tokens, tracer.get("usage"))
         # Save conversation trace
         tracer["chat_model"] = model_name
         tracer["temperature"] = temperature

khoj/processor/conversation/openai/utils.py CHANGED Viewed

@@ -4,6 +4,8 @@ from threading import Thread
 from typing import Dict
 import openai
+from openai.types.chat.chat_completion import ChatCompletion
+from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
 from tenacity import (
     before_sleep_log,
     retry,
@@ -18,7 +20,7 @@ from khoj.processor.conversation.utils import (
     commit_conversation_trace,
 )
 from khoj.utils import state
-from khoj.utils.helpers import in_debug_mode
+from khoj.utils.helpers import get_chat_usage_metrics, in_debug_mode
 logger = logging.getLogger(__name__)
@@ -63,27 +65,34 @@ def completion_with_backoff(
     if os.getenv("KHOJ_LLM_SEED"):
         model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
-    chat = client.chat.completions.create(
-        stream=stream,
+    chat: ChatCompletion | openai.Stream[ChatCompletionChunk] = client.chat.completions.create(
         messages=formatted_messages,  # type: ignore
         model=model,  # type: ignore
+        stream=stream,
+        stream_options={"include_usage": True} if stream else {},
         temperature=temperature,
         timeout=20,
         **(model_kwargs or dict()),
     )
-    if not stream:
-        return chat.choices[0].message.content
     aggregated_response = ""
-    for chunk in chat:
-        if len(chunk.choices) == 0:
-            continue
-        delta_chunk = chunk.choices[0].delta  # type: ignore
-        if isinstance(delta_chunk, str):
-            aggregated_response += delta_chunk
-        elif delta_chunk.content:
-            aggregated_response += delta_chunk.content
+    if not stream:
+        chunk = chat
+        aggregated_response = chunk.choices[0].message.content
+    else:
+        for chunk in chat:
+            if len(chunk.choices) == 0:
+                continue
+            delta_chunk = chunk.choices[0].delta  # type: ignore
+            if isinstance(delta_chunk, str):
+                aggregated_response += delta_chunk
+            elif delta_chunk.content:
+                aggregated_response += delta_chunk.content
+    # Calculate cost of chat
+    input_tokens = chunk.usage.prompt_tokens if hasattr(chunk, "usage") and chunk.usage else 0
+    output_tokens = chunk.usage.completion_tokens if hasattr(chunk, "usage") and chunk.usage else 0
+    tracer["usage"] = get_chat_usage_metrics(model, input_tokens, output_tokens, tracer.get("usage"))
     # Save conversation trace
     tracer["chat_model"] = model
@@ -162,10 +171,11 @@ def llm_thread(
         if os.getenv("KHOJ_LLM_SEED"):
             model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
-        chat = client.chat.completions.create(
-            stream=stream,
+        chat: ChatCompletion | openai.Stream[ChatCompletionChunk] = client.chat.completions.create(
             messages=formatted_messages,
             model=model_name,  # type: ignore
+            stream=stream,
+            stream_options={"include_usage": True} if stream else {},
             temperature=temperature,
             timeout=20,
             **(model_kwargs or dict()),
@@ -173,7 +183,8 @@ def llm_thread(
         aggregated_response = ""
         if not stream:
-            aggregated_response = chat.choices[0].message.content
+            chunk = chat
+            aggregated_response = chunk.choices[0].message.content
             g.send(aggregated_response)
         else:
             for chunk in chat:
@@ -189,6 +200,11 @@ def llm_thread(
                     aggregated_response += text_chunk
                     g.send(text_chunk)
+        # Calculate cost of chat
+        input_tokens = chunk.usage.prompt_tokens if hasattr(chunk, "usage") and chunk.usage else 0
+        output_tokens = chunk.usage.completion_tokens if hasattr(chunk, "usage") and chunk.usage else 0
+        tracer["usage"] = get_chat_usage_metrics(model_name, input_tokens, output_tokens, tracer.get("usage"))
         # Save conversation trace
         tracer["chat_model"] = model_name
         tracer["temperature"] = temperature

khoj/processor/conversation/prompts.py CHANGED Viewed

@@ -183,20 +183,23 @@ Improved Prompt:
 improve_diagram_description_prompt = PromptTemplate.from_template(
     """
-you are an architect working with a novice artist using a diagramming tool.
+you are an architect working with a novice digital artist using a diagramming software.
 {personality_context}
 you need to convert the user's query to a description format that the novice artist can use very well. you are allowed to use primitives like
 - text
 - rectangle
-- diamond
 - ellipse
 - line
 - arrow
 use these primitives to describe what sort of diagram the drawer should create. the artist must recreate the diagram every time, so include all relevant prior information in your description.
-use simple, concise language.
+- include the full, exact description. the artist does not have much experience, so be precise.
+- describe the layout.
+- you can only use straight lines.
+- use simple, concise language.
+- keep it simple and easy to understand. the artist is easily distracted.
 Today's Date: {current_date}
 User's Location: {location}
@@ -218,19 +221,23 @@ Query: {query}
 excalidraw_diagram_generation_prompt = PromptTemplate.from_template(
     """
-You are a program manager with the ability to describe diagrams to compose in professional, fine detail.
+You are a program manager with the ability to describe diagrams to compose in professional, fine detail. You LOVE getting into the details and making tedious labels, lines, and shapes look beautiful. You make everything look perfect.
 {personality_context}
-You need to create a declarative description of the diagram and relevant components, using this base schema. Use the `label` property to specify the text to be rendered in the respective elements. Always use light colors for the `backgroundColor` property, like white, or light blue, green, red. "type", "x", "y", "id", are required properties for all elements.
+You need to create a declarative description of the diagram and relevant components, using this base schema.
+- `label`: specify the text to be rendered in the respective elements.
+- Always use light colors for the `backgroundColor` property, like white, or light blue, green, red
+- **ALWAYS Required properties for ALL elements**: `type`, `x`, `y`, `id`.
+- Be very generous with spacing and composition. Use ample space between elements.
 {{
     type: string,
     x: number,
     y: number,
-    strokeColor: string,
-    backgroundColor: string,
     width: number,
     height: number,
+    strokeColor: string,
+    backgroundColor: string,
     id: string,
     label: {{
         text: string,
@@ -240,28 +247,30 @@ You need to create a declarative description of the diagram and relevant compone
 Valid types:
 - text
 - rectangle
-- diamond
 - ellipse
 - line
 - arrow
-For arrows and lines, you can use the `points` property to specify the start and end points of the arrow. You may also use the `label` property to specify the text to be rendered. You may use the `start` and `end` properties to connect the linear elements to other elements. The start and end point can either be the ID to map to an existing object, or the `type` to create a new object. Mapping to an existing object is useful if you want to connect it to multiple objects. Lines and arrows can only start and end at rectangle, text, diamond, or ellipse elements.
+For arrows and lines,
+- `points`: specify the start and end points of the arrow
+- **ALWAYS Required properties for ALL elements**: `type`, `x`, `y`, `id`.
+- `start` and `end` properties: connect the linear elements to other elements. The start and end point can either be the ID to map to an existing object, or the `type` and `text` to create a new object. Mapping to an existing object is useful if you want to connect it to multiple objects. Lines and arrows can only start and end at rectangle, text, or ellipse elements. Even if you're using the `start` and `end` properties, you still need to specify the `x` and `y` properties for the start and end points.
 {{
     type: "arrow",
     id: string,
     x: number,
     y: number,
-    width: number,
-    height: number,
     strokeColor: string,
     start: {{
         id: string,
         type: string,
+        text: string,
     }},
     end: {{
         id: string,
         type: string,
+        text: string,
     }},
     label: {{
         text: string,
@@ -272,7 +281,11 @@ For arrows and lines, you can use the `points` property to specify the start and
     ]
 }}
-For text, you must use the `text` property to specify the text to be rendered. You may also use `fontSize` property to specify the font size of the text. Only use the `text` element for titles, subtitles, and overviews. For labels, use the `label` property in the respective elements.
+For text,
+- `text`: specify the text to be rendered
+- **ALWAYS Required properties for ALL elements**: `type`, `x`, `y`, `id`.
+- `fontSize`: optional property to specify the font size of the text
+- Use this element only for titles, subtitles, and overviews. For labels, use the `label` property in the respective elements.
 {{
     type: "text",
@@ -287,19 +300,25 @@ Here's an example of a valid diagram:
 Design Description: Create a diagram describing a circular development process with 3 stages: design, implementation and feedback. The design stage is connected to the implementation stage and the implementation stage is connected to the feedback stage and the feedback stage is connected to the design stage. Each stage should be labeled with the stage name.
-Response:
-[
-    {{"type":"text","x":-150,"y":50,"width":300,"height":40,"id":"title_text","text":"Circular Development Process","fontSize":24}},
-    {{"type":"ellipse","x":-169,"y":113,"width":188,"height":202,"id":"design_ellipse", "label": {{"text": "Design"}}}},
-    {{"type":"ellipse","x":62,"y":394,"width":186,"height":188,"id":"implement_ellipse", "label": {{"text": "Implement"}}}},
-    {{"type":"ellipse","x":-348,"y":430,"width":184,"height":170,"id":"feedback_ellipse", "label": {{"text": "Feedback"}}}},
+Example Response:
+```json
+{{
+    "scratchpad": "The diagram represents a circular development process with 3 stages: design, implementation and feedback. Each stage is connected to the next stage using an arrow, forming a circular process.",
+    "elements": [
+    {{"type":"text","x":-150,"y":50,"id":"title_text","text":"Circular Development Process","fontSize":24}},
+    {{"type":"ellipse","x":-169,"y":113,"id":"design_ellipse", "label": {{"text": "Design"}}}},
+    {{"type":"ellipse","x":62,"y":394,"id":"implement_ellipse", "label": {{"text": "Implement"}}}},
+    {{"type":"ellipse","x":-348,"y":430,"id":"feedback_ellipse", "label": {{"text": "Feedback"}}}},
     {{"type":"arrow","x":21,"y":273,"id":"design_to_implement_arrow","points":[[0,0],[86,105]],"start":{{"id":"design_ellipse"}}, "end":{{"id":"implement_ellipse"}}}},
     {{"type":"arrow","x":50,"y":519,"id":"implement_to_feedback_arrow","points":[[0,0],[-198,-6]],"start":{{"id":"implement_ellipse"}}, "end":{{"id":"feedback_ellipse"}}}},
     {{"type":"arrow","x":-228,"y":417,"id":"feedback_to_design_arrow","points":[[0,0],[85,-123]],"start":{{"id":"feedback_ellipse"}}, "end":{{"id":"design_ellipse"}}}},
-]
+    ]
+}}
+```
+Think about spacing and composition. Use ample space between elements. Double the amount of space you think you need. Create a detailed diagram from the provided context and user prompt below.
-Create a detailed diagram from the provided context and user prompt below. Return a valid JSON object:
+Return a valid JSON object, where the drawing is in `elements` and your thought process is in `scratchpad`. If you can't make the whole diagram in one response, you can split it into multiple responses. If you need to simplify for brevity, simply do so in the `scratchpad` field. DO NOT add additional info in the `elements` field.
 Diagram Description: {query}

khoj/processor/conversation/utils.py CHANGED Viewed

@@ -5,7 +5,6 @@ import math
 import mimetypes
 import os
 import queue
-import re
 import uuid
 from dataclasses import dataclass
 from datetime import datetime
@@ -57,7 +56,7 @@ model_to_prompt_size = {
     "gemini-1.5-flash": 20000,
     "gemini-1.5-pro": 20000,
     # Anthropic Models
-    "claude-3-5-sonnet-20240620": 20000,
+    "claude-3-5-sonnet-20241022": 20000,
     "claude-3-5-haiku-20241022": 20000,
     # Offline Models
     "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000,
@@ -213,6 +212,8 @@ class ChatEvent(Enum):
     REFERENCES = "references"
     STATUS = "status"
     METADATA = "metadata"
+    USAGE = "usage"
+    END_RESPONSE = "end_response"
 def message_to_log(

khoj/routers/api_chat.py CHANGED Viewed

@@ -667,27 +667,37 @@ async def chat(
             finally:
                 yield event_delimiter
-        async def send_llm_response(response: str):
+        async def send_llm_response(response: str, usage: dict = None):
+            # Send Chat Response
             async for result in send_event(ChatEvent.START_LLM_RESPONSE, ""):
                 yield result
             async for result in send_event(ChatEvent.MESSAGE, response):
                 yield result
             async for result in send_event(ChatEvent.END_LLM_RESPONSE, ""):
                 yield result
+            # Send Usage Metadata once llm interactions are complete
+            if usage:
+                async for event in send_event(ChatEvent.USAGE, usage):
+                    yield event
+            async for result in send_event(ChatEvent.END_RESPONSE, ""):
+                yield result
         def collect_telemetry():
             # Gather chat response telemetry
             nonlocal chat_metadata
             latency = time.perf_counter() - start_time
             cmd_set = set([cmd.value for cmd in conversation_commands])
+            cost = (tracer.get("usage", {}) or {}).get("cost", 0)
             chat_metadata = chat_metadata or {}
             chat_metadata["conversation_command"] = cmd_set
             chat_metadata["agent"] = conversation.agent.slug if conversation and conversation.agent else None
             chat_metadata["latency"] = f"{latency:.3f}"
             chat_metadata["ttft_latency"] = f"{ttft:.3f}"
+            chat_metadata["usage"] = tracer.get("usage")
             logger.info(f"Chat response time to first token: {ttft:.3f} seconds")
             logger.info(f"Chat response total time: {latency:.3f} seconds")
+            logger.info(f"Chat response cost: ${cost:.5f}")
             update_telemetry_state(
                 request=request,
                 telemetry_type="api",
@@ -699,7 +709,7 @@ async def chat(
             )
         if is_query_empty(q):
-            async for result in send_llm_response("Please ask your query to get started."):
+            async for result in send_llm_response("Please ask your query to get started.", tracer.get("usage")):
                 yield result
             return
@@ -713,7 +723,7 @@ async def chat(
             create_new=body.create_new,
         )
         if not conversation:
-            async for result in send_llm_response(f"Conversation {conversation_id} not found"):
+            async for result in send_llm_response(f"Conversation {conversation_id} not found", tracer.get("usage")):
                 yield result
             return
         conversation_id = conversation.id
@@ -777,7 +787,7 @@ async def chat(
                 await conversation_command_rate_limiter.update_and_check_if_valid(request, cmd)
                 q = q.replace(f"/{cmd.value}", "").strip()
             except HTTPException as e:
-                async for result in send_llm_response(str(e.detail)):
+                async for result in send_llm_response(str(e.detail), tracer.get("usage")):
                     yield result
                 return
@@ -834,7 +844,7 @@ async def chat(
             agent_has_entries = await EntryAdapters.aagent_has_entries(agent)
             if len(file_filters) == 0 and not agent_has_entries:
                 response_log = "No files selected for summarization. Please add files using the section on the left."
-                async for result in send_llm_response(response_log):
+                async for result in send_llm_response(response_log, tracer.get("usage")):
                     yield result
             else:
                 async for response in generate_summary_from_files(
@@ -853,7 +863,7 @@ async def chat(
                     else:
                         if isinstance(response, str):
                             response_log = response
-                            async for result in send_llm_response(response):
+                            async for result in send_llm_response(response, tracer.get("usage")):
                                 yield result
             await sync_to_async(save_to_conversation_log)(
@@ -880,7 +890,7 @@ async def chat(
                     conversation_config = await ConversationAdapters.aget_default_conversation_config(user)
                 model_type = conversation_config.model_type
                 formatted_help = help_message.format(model=model_type, version=state.khoj_version, device=get_device())
-                async for result in send_llm_response(formatted_help):
+                async for result in send_llm_response(formatted_help, tracer.get("usage")):
                     yield result
                 return
             # Adding specification to search online specifically on khoj.dev pages.
@@ -895,7 +905,7 @@ async def chat(
             except Exception as e:
                 logger.error(f"Error scheduling task {q} for {user.email}: {e}")
                 error_message = f"Unable to create automation. Ensure the automation doesn't already exist."
-                async for result in send_llm_response(error_message):
+                async for result in send_llm_response(error_message, tracer.get("usage")):
                     yield result
                 return
@@ -916,7 +926,7 @@ async def chat(
                 raw_query_files=raw_query_files,
                 tracer=tracer,
             )
-            async for result in send_llm_response(llm_response):
+            async for result in send_llm_response(llm_response, tracer.get("usage")):
                 yield result
             return
@@ -963,7 +973,7 @@ async def chat(
                     yield result
             if conversation_commands == [ConversationCommand.Notes] and not await EntryAdapters.auser_has_entries(user):
-                async for result in send_llm_response(f"{no_entries_found.format()}"):
+                async for result in send_llm_response(f"{no_entries_found.format()}", tracer.get("usage")):
                     yield result
                 return
@@ -1105,7 +1115,7 @@ async def chat(
                     "detail": improved_image_prompt,
                     "image": None,
                 }
-                async for result in send_llm_response(json.dumps(content_obj)):
+                async for result in send_llm_response(json.dumps(content_obj), tracer.get("usage")):
                     yield result
                 return
@@ -1132,7 +1142,7 @@ async def chat(
                 "inferredQueries": [improved_image_prompt],
                 "image": generated_image,
             }
-            async for result in send_llm_response(json.dumps(content_obj)):
+            async for result in send_llm_response(json.dumps(content_obj), tracer.get("usage")):
                 yield result
             return
@@ -1166,7 +1176,7 @@ async def chat(
                         diagram_description = excalidraw_diagram_description
                     else:
                         error_message = "Failed to generate diagram. Please try again later."
-                        async for result in send_llm_response(error_message):
+                        async for result in send_llm_response(error_message, tracer.get("usage")):
                             yield result
                         await sync_to_async(save_to_conversation_log)(
@@ -1213,7 +1223,7 @@ async def chat(
                 tracer=tracer,
             )
-            async for result in send_llm_response(json.dumps(content_obj)):
+            async for result in send_llm_response(json.dumps(content_obj), tracer.get("usage")):
                 yield result
             return
@@ -1252,6 +1262,11 @@ async def chat(
             if item is None:
                 async for result in send_event(ChatEvent.END_LLM_RESPONSE, ""):
                     yield result
+                # Send Usage Metadata once llm interactions are complete
+                async for event in send_event(ChatEvent.USAGE, tracer.get("usage")):
+                    yield event
+                async for result in send_event(ChatEvent.END_RESPONSE, ""):
+                    yield result
                 logger.debug("Finished streaming response")
                 return
             if not connection_alive or not continue_stream:

khoj/routers/auth.py CHANGED Viewed

@@ -89,7 +89,7 @@ async def login_magic_link(request: Request, form: MagicLinkForm):
             update_telemetry_state(
                 request=request,
                 telemetry_type="api",
-                api="create_user",
+                api="create_user__email",
                 metadata={"server_id": str(user.uuid)},
             )
             logger.log(logging.INFO, f"🥳 New User Created: {user.uuid}")
@@ -174,7 +174,7 @@ async def auth(request: Request):
             update_telemetry_state(
                 request=request,
                 telemetry_type="api",
-                api="create_user",
+                api="create_user__google",
                 metadata={"server_id": str(khoj_user.uuid)},
             )
             logger.log(logging.INFO, f"🥳 New User Created: {khoj_user.uuid}")

khoj/routers/helpers.py CHANGED Viewed

@@ -753,7 +753,11 @@ async def generate_excalidraw_diagram(
         yield None, None
         return
-    yield better_diagram_description_prompt, excalidraw_diagram_description
+    scratchpad = excalidraw_diagram_description.get("scratchpad")
+    inferred_queries = f"Instruction: {better_diagram_description_prompt}\n\nScratchpad: {scratchpad}"
+    yield inferred_queries, excalidraw_diagram_description.get("elements")
 async def generate_better_diagram_description(
@@ -822,7 +826,7 @@ async def generate_excalidraw_diagram_from_description(
     user: KhojUser = None,
     agent: Agent = None,
     tracer: dict = {},
-) -> str:
+) -> Dict[str, Any]:
     personality_context = (
         prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
     )
@@ -838,10 +842,18 @@ async def generate_excalidraw_diagram_from_description(
         )
         raw_response = clean_json(raw_response)
         try:
+            # Expect response to have `elements` and `scratchpad` keys
             response: Dict[str, str] = json.loads(raw_response)
+            if (
+                not response
+                or not isinstance(response, Dict)
+                or not response.get("elements")
+                or not response.get("scratchpad")
+            ):
+                raise AssertionError(f"Invalid response for generating Excalidraw diagram: {response}")
         except Exception:
             raise AssertionError(f"Invalid response for generating Excalidraw diagram: {raw_response}")
-        if not response or not isinstance(response, List) or not isinstance(response[0], Dict):
+        if not response or not isinstance(response["elements"], List) or not isinstance(response["elements"][0], Dict):
             # TODO Some additional validation here that it's a valid Excalidraw diagram
             raise AssertionError(f"Invalid response for improving diagram description: {response}")
@@ -1770,6 +1782,7 @@ Manage your automations [here](/automations).
 class MessageProcessor:
     def __init__(self):
         self.references = {}
+        self.usage = {}
         self.raw_response = ""
     def convert_message_chunk_to_json(self, raw_chunk: str) -> Dict[str, Any]:
@@ -1793,6 +1806,8 @@ class MessageProcessor:
         chunk_type = ChatEvent(chunk["type"])
         if chunk_type == ChatEvent.REFERENCES:
             self.references = chunk["data"]
+        elif chunk_type == ChatEvent.USAGE:
+            self.usage = chunk["data"]
         elif chunk_type == ChatEvent.MESSAGE:
             chunk_data = chunk["data"]
             if isinstance(chunk_data, dict):
@@ -1837,7 +1852,7 @@ async def read_chat_stream(response_iterator: AsyncGenerator[str, None]) -> Dict
     if buffer:
         processor.process_message_chunk(buffer)
-    return {"response": processor.raw_response, "references": processor.references}
+    return {"response": processor.raw_response, "references": processor.references, "usage": processor.usage}
 def get_user_config(user: KhojUser, request: Request, is_detailed: bool = False):

khoj/utils/constants.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from pathlib import Path
+from typing import Dict
 app_root_directory = Path(__file__).parent.parent.parent
 web_directory = app_root_directory / "khoj/interface/web/"
@@ -31,3 +32,19 @@ default_config = {
         "image": {"encoder": "sentence-transformers/clip-ViT-B-32", "model_directory": "~/.khoj/search/image/"},
     },
 }
+model_to_cost: Dict[str, Dict[str, float]] = {
+    # OpenAI Pricing: https://openai.com/api/pricing/
+    "gpt-4o": {"input": 2.50, "output": 10.00},
+    "gpt-4o-mini": {"input": 0.15, "output": 0.60},
+    "o1-preview": {"input": 15.0, "output": 60.00},
+    "o1-mini": {"input": 3.0, "output": 12.0},
+    # Gemini Pricing: https://ai.google.dev/pricing
+    "gemini-1.5-flash": {"input": 0.075, "output": 0.30},
+    "gemini-1.5-flash-002": {"input": 0.075, "output": 0.30},
+    "gemini-1.5-pro": {"input": 1.25, "output": 5.00},
+    "gemini-1.5-pro-002": {"input": 1.25, "output": 5.00},
+    # Anthropic Pricing: https://www.anthropic.com/pricing#anthropic-api_
+    "claude-3-5-sonnet-20241022": {"input": 3.0, "output": 15.0},
+    "claude-3-5-haiku-20241022": {"input": 1.0, "output": 5.0},
+}

khoj/utils/helpers.py CHANGED Viewed

@@ -540,3 +540,27 @@ def get_country_code_from_timezone(tz: str) -> str:
 def get_country_name_from_timezone(tz: str) -> str:
     """Get country name from timezone"""
     return country_names.get(get_country_code_from_timezone(tz), "United States")
+def get_cost_of_chat_message(model_name: str, input_tokens: int = 0, output_tokens: int = 0, prev_cost: float = 0.0):
+    """
+    Calculate cost of chat message based on input and output tokens
+    """
+    # Calculate cost of input and output tokens. Costs are per million tokens
+    input_cost = constants.model_to_cost.get(model_name, {}).get("input", 0) * (input_tokens / 1e6)
+    output_cost = constants.model_to_cost.get(model_name, {}).get("output", 0) * (output_tokens / 1e6)
+    return input_cost + output_cost + prev_cost
+def get_chat_usage_metrics(model_name: str, input_tokens: int = 0, output_tokens: int = 0, usage: dict = {}):
+    """
+    Get usage metrics for chat message based on input and output tokens
+    """
+    prev_usage = usage or {"input_tokens": 0, "output_tokens": 0, "cost": 0.0}
+    return {
+        "input_tokens": prev_usage["input_tokens"] + input_tokens,
+        "output_tokens": prev_usage["output_tokens"] + output_tokens,
+        "cost": get_cost_of_chat_message(model_name, input_tokens, output_tokens, prev_cost=prev_usage["cost"]),
+    }

{khoj-1.30.2.dev11.dist-info → khoj-1.30.2.dev23.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: khoj
-Version: 1.30.2.dev11
+Version: 1.30.2.dev23
 Summary: Your Second Brain
 Project-URL: Homepage, https://khoj.dev
 Project-URL: Documentation, https://docs.khoj.dev

khoj 1.30.2.dev11__py3-none-any.whl → 1.30.2.dev23__py3-none-any.whl

khoj 1.30.2.dev11py3-none-any.whl → 1.30.2.dev23py3-none-any.whl