khoj 1.30.2.dev11__py3-none-any.whl → 1.30.2.dev23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. khoj/interface/compiled/404/index.html +1 -1
  2. khoj/interface/compiled/_next/static/chunks/1210.ef7a0f9a7e43da1d.js +1 -0
  3. khoj/interface/compiled/_next/static/chunks/1603-1407afe510f0145a.js +1 -0
  4. khoj/interface/compiled/_next/static/chunks/1970-1b63ac1497b03a10.js +1 -0
  5. khoj/interface/compiled/_next/static/chunks/app/agents/{page-5f6e0dacc34e33ad.js → page-b086c9b0aa5a3833.js} +1 -1
  6. khoj/interface/compiled/_next/static/chunks/app/automations/{page-60bc7454bc3ea881.js → page-697a2d415e11a872.js} +1 -1
  7. khoj/interface/compiled/_next/static/chunks/app/chat/{page-ac366c9111374312.js → page-461e26fcb7578d39.js} +1 -1
  8. khoj/interface/compiled/_next/static/chunks/app/{page-358154a4436ef316.js → page-4a3c49c5e996cc40.js} +1 -1
  9. khoj/interface/compiled/_next/static/chunks/app/search/{page-64ea1717528979af.js → page-9013658bebfc3d17.js} +1 -1
  10. khoj/interface/compiled/_next/static/chunks/app/settings/{page-17a538580c65e7fe.js → page-41eb536497bb544a.js} +1 -1
  11. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-47641b3691fb0856.js → page-6a68ac7e227b34e7.js} +1 -1
  12. khoj/interface/compiled/_next/static/chunks/{webpack-1c0a37d7df44bed9.js → webpack-9b0a570f15d6209d.js} +1 -1
  13. khoj/interface/compiled/_next/static/css/23f801d22927d568.css +1 -0
  14. khoj/interface/compiled/_next/static/css/592ca99f5122e75a.css +1 -0
  15. khoj/interface/compiled/agents/index.html +1 -1
  16. khoj/interface/compiled/agents/index.txt +2 -2
  17. khoj/interface/compiled/automations/index.html +1 -1
  18. khoj/interface/compiled/automations/index.txt +2 -2
  19. khoj/interface/compiled/chat/index.html +1 -1
  20. khoj/interface/compiled/chat/index.txt +2 -2
  21. khoj/interface/compiled/index.html +1 -1
  22. khoj/interface/compiled/index.txt +2 -2
  23. khoj/interface/compiled/search/index.html +1 -1
  24. khoj/interface/compiled/search/index.txt +2 -2
  25. khoj/interface/compiled/settings/index.html +1 -1
  26. khoj/interface/compiled/settings/index.txt +2 -2
  27. khoj/interface/compiled/share/chat/index.html +1 -1
  28. khoj/interface/compiled/share/chat/index.txt +2 -2
  29. khoj/processor/conversation/anthropic/utils.py +15 -1
  30. khoj/processor/conversation/google/utils.py +12 -1
  31. khoj/processor/conversation/openai/utils.py +33 -17
  32. khoj/processor/conversation/prompts.py +40 -21
  33. khoj/processor/conversation/utils.py +3 -2
  34. khoj/routers/api_chat.py +29 -14
  35. khoj/routers/auth.py +2 -2
  36. khoj/routers/helpers.py +19 -4
  37. khoj/utils/constants.py +17 -0
  38. khoj/utils/helpers.py +24 -0
  39. {khoj-1.30.2.dev11.dist-info → khoj-1.30.2.dev23.dist-info}/METADATA +1 -1
  40. {khoj-1.30.2.dev11.dist-info → khoj-1.30.2.dev23.dist-info}/RECORD +45 -45
  41. khoj/interface/compiled/_next/static/chunks/1210.132a7e1910006bbb.js +0 -1
  42. khoj/interface/compiled/_next/static/chunks/1603-859ddcf58f3ca639.js +0 -1
  43. khoj/interface/compiled/_next/static/chunks/1970-e1935a1d0930a7c5.js +0 -1
  44. khoj/interface/compiled/_next/static/css/2ff098d0815fdbc1.css +0 -1
  45. khoj/interface/compiled/_next/static/css/4cae6c0e5c72fb2d.css +0 -1
  46. /khoj/interface/compiled/_next/static/{ZwZ17DepweYHu7DUF8zml → 07uMbO07BRIyOEFvL9ncb}/_buildManifest.js +0 -0
  47. /khoj/interface/compiled/_next/static/{ZwZ17DepweYHu7DUF8zml → 07uMbO07BRIyOEFvL9ncb}/_ssgManifest.js +0 -0
  48. {khoj-1.30.2.dev11.dist-info → khoj-1.30.2.dev23.dist-info}/WHEEL +0 -0
  49. {khoj-1.30.2.dev11.dist-info → khoj-1.30.2.dev23.dist-info}/entry_points.txt +0 -0
  50. {khoj-1.30.2.dev11.dist-info → khoj-1.30.2.dev23.dist-info}/licenses/LICENSE +0 -0
@@ -25,7 +25,7 @@ from khoj.processor.conversation.utils import (
25
25
  get_image_from_url,
26
26
  )
27
27
  from khoj.utils import state
28
- from khoj.utils.helpers import in_debug_mode, is_none_or_empty
28
+ from khoj.utils.helpers import get_chat_usage_metrics, in_debug_mode, is_none_or_empty
29
29
 
30
30
  logger = logging.getLogger(__name__)
31
31
 
@@ -68,6 +68,7 @@ def gemini_completion_with_backoff(
68
68
  response = chat_session.send_message(formatted_messages[-1]["parts"])
69
69
  response_text = response.text
70
70
  except StopCandidateException as e:
71
+ response = None
71
72
  response_text, _ = handle_gemini_response(e.args)
72
73
  # Respond with reason for stopping
73
74
  logger.warning(
@@ -75,6 +76,11 @@ def gemini_completion_with_backoff(
75
76
  + f"Last Message by {messages[-1].role}: {messages[-1].content}"
76
77
  )
77
78
 
79
+ # Aggregate cost of chat
80
+ input_tokens = response.usage_metadata.prompt_token_count if response else 0
81
+ output_tokens = response.usage_metadata.candidates_token_count if response else 0
82
+ tracer["usage"] = get_chat_usage_metrics(model_name, input_tokens, output_tokens, tracer.get("usage"))
83
+
78
84
  # Save conversation trace
79
85
  tracer["chat_model"] = model_name
80
86
  tracer["temperature"] = temperature
@@ -146,6 +152,11 @@ def gemini_llm_thread(
146
152
  if stopped:
147
153
  raise StopCandidateException(message)
148
154
 
155
+ # Calculate cost of chat
156
+ input_tokens = chunk.usage_metadata.prompt_token_count
157
+ output_tokens = chunk.usage_metadata.candidates_token_count
158
+ tracer["usage"] = get_chat_usage_metrics(model_name, input_tokens, output_tokens, tracer.get("usage"))
159
+
149
160
  # Save conversation trace
150
161
  tracer["chat_model"] = model_name
151
162
  tracer["temperature"] = temperature
@@ -4,6 +4,8 @@ from threading import Thread
4
4
  from typing import Dict
5
5
 
6
6
  import openai
7
+ from openai.types.chat.chat_completion import ChatCompletion
8
+ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
7
9
  from tenacity import (
8
10
  before_sleep_log,
9
11
  retry,
@@ -18,7 +20,7 @@ from khoj.processor.conversation.utils import (
18
20
  commit_conversation_trace,
19
21
  )
20
22
  from khoj.utils import state
21
- from khoj.utils.helpers import in_debug_mode
23
+ from khoj.utils.helpers import get_chat_usage_metrics, in_debug_mode
22
24
 
23
25
  logger = logging.getLogger(__name__)
24
26
 
@@ -63,27 +65,34 @@ def completion_with_backoff(
63
65
  if os.getenv("KHOJ_LLM_SEED"):
64
66
  model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
65
67
 
66
- chat = client.chat.completions.create(
67
- stream=stream,
68
+ chat: ChatCompletion | openai.Stream[ChatCompletionChunk] = client.chat.completions.create(
68
69
  messages=formatted_messages, # type: ignore
69
70
  model=model, # type: ignore
71
+ stream=stream,
72
+ stream_options={"include_usage": True} if stream else {},
70
73
  temperature=temperature,
71
74
  timeout=20,
72
75
  **(model_kwargs or dict()),
73
76
  )
74
77
 
75
- if not stream:
76
- return chat.choices[0].message.content
77
-
78
78
  aggregated_response = ""
79
- for chunk in chat:
80
- if len(chunk.choices) == 0:
81
- continue
82
- delta_chunk = chunk.choices[0].delta # type: ignore
83
- if isinstance(delta_chunk, str):
84
- aggregated_response += delta_chunk
85
- elif delta_chunk.content:
86
- aggregated_response += delta_chunk.content
79
+ if not stream:
80
+ chunk = chat
81
+ aggregated_response = chunk.choices[0].message.content
82
+ else:
83
+ for chunk in chat:
84
+ if len(chunk.choices) == 0:
85
+ continue
86
+ delta_chunk = chunk.choices[0].delta # type: ignore
87
+ if isinstance(delta_chunk, str):
88
+ aggregated_response += delta_chunk
89
+ elif delta_chunk.content:
90
+ aggregated_response += delta_chunk.content
91
+
92
+ # Calculate cost of chat
93
+ input_tokens = chunk.usage.prompt_tokens if hasattr(chunk, "usage") and chunk.usage else 0
94
+ output_tokens = chunk.usage.completion_tokens if hasattr(chunk, "usage") and chunk.usage else 0
95
+ tracer["usage"] = get_chat_usage_metrics(model, input_tokens, output_tokens, tracer.get("usage"))
87
96
 
88
97
  # Save conversation trace
89
98
  tracer["chat_model"] = model
@@ -162,10 +171,11 @@ def llm_thread(
162
171
  if os.getenv("KHOJ_LLM_SEED"):
163
172
  model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
164
173
 
165
- chat = client.chat.completions.create(
166
- stream=stream,
174
+ chat: ChatCompletion | openai.Stream[ChatCompletionChunk] = client.chat.completions.create(
167
175
  messages=formatted_messages,
168
176
  model=model_name, # type: ignore
177
+ stream=stream,
178
+ stream_options={"include_usage": True} if stream else {},
169
179
  temperature=temperature,
170
180
  timeout=20,
171
181
  **(model_kwargs or dict()),
@@ -173,7 +183,8 @@ def llm_thread(
173
183
 
174
184
  aggregated_response = ""
175
185
  if not stream:
176
- aggregated_response = chat.choices[0].message.content
186
+ chunk = chat
187
+ aggregated_response = chunk.choices[0].message.content
177
188
  g.send(aggregated_response)
178
189
  else:
179
190
  for chunk in chat:
@@ -189,6 +200,11 @@ def llm_thread(
189
200
  aggregated_response += text_chunk
190
201
  g.send(text_chunk)
191
202
 
203
+ # Calculate cost of chat
204
+ input_tokens = chunk.usage.prompt_tokens if hasattr(chunk, "usage") and chunk.usage else 0
205
+ output_tokens = chunk.usage.completion_tokens if hasattr(chunk, "usage") and chunk.usage else 0
206
+ tracer["usage"] = get_chat_usage_metrics(model_name, input_tokens, output_tokens, tracer.get("usage"))
207
+
192
208
  # Save conversation trace
193
209
  tracer["chat_model"] = model_name
194
210
  tracer["temperature"] = temperature
@@ -183,20 +183,23 @@ Improved Prompt:
183
183
 
184
184
  improve_diagram_description_prompt = PromptTemplate.from_template(
185
185
  """
186
- you are an architect working with a novice artist using a diagramming tool.
186
+ you are an architect working with a novice digital artist using a diagramming software.
187
187
  {personality_context}
188
188
 
189
189
  you need to convert the user's query to a description format that the novice artist can use very well. you are allowed to use primitives like
190
190
  - text
191
191
  - rectangle
192
- - diamond
193
192
  - ellipse
194
193
  - line
195
194
  - arrow
196
195
 
197
196
  use these primitives to describe what sort of diagram the drawer should create. the artist must recreate the diagram every time, so include all relevant prior information in your description.
198
197
 
199
- use simple, concise language.
198
+ - include the full, exact description. the artist does not have much experience, so be precise.
199
+ - describe the layout.
200
+ - you can only use straight lines.
201
+ - use simple, concise language.
202
+ - keep it simple and easy to understand. the artist is easily distracted.
200
203
 
201
204
  Today's Date: {current_date}
202
205
  User's Location: {location}
@@ -218,19 +221,23 @@ Query: {query}
218
221
 
219
222
  excalidraw_diagram_generation_prompt = PromptTemplate.from_template(
220
223
  """
221
- You are a program manager with the ability to describe diagrams to compose in professional, fine detail.
224
+ You are a program manager with the ability to describe diagrams to compose in professional, fine detail. You LOVE getting into the details and making tedious labels, lines, and shapes look beautiful. You make everything look perfect.
222
225
  {personality_context}
223
226
 
224
- You need to create a declarative description of the diagram and relevant components, using this base schema. Use the `label` property to specify the text to be rendered in the respective elements. Always use light colors for the `backgroundColor` property, like white, or light blue, green, red. "type", "x", "y", "id", are required properties for all elements.
227
+ You need to create a declarative description of the diagram and relevant components, using this base schema.
228
+ - `label`: specify the text to be rendered in the respective elements.
229
+ - Always use light colors for the `backgroundColor` property, like white, or light blue, green, red
230
+ - **ALWAYS Required properties for ALL elements**: `type`, `x`, `y`, `id`.
231
+ - Be very generous with spacing and composition. Use ample space between elements.
225
232
 
226
233
  {{
227
234
  type: string,
228
235
  x: number,
229
236
  y: number,
230
- strokeColor: string,
231
- backgroundColor: string,
232
237
  width: number,
233
238
  height: number,
239
+ strokeColor: string,
240
+ backgroundColor: string,
234
241
  id: string,
235
242
  label: {{
236
243
  text: string,
@@ -240,28 +247,30 @@ You need to create a declarative description of the diagram and relevant compone
240
247
  Valid types:
241
248
  - text
242
249
  - rectangle
243
- - diamond
244
250
  - ellipse
245
251
  - line
246
252
  - arrow
247
253
 
248
- For arrows and lines, you can use the `points` property to specify the start and end points of the arrow. You may also use the `label` property to specify the text to be rendered. You may use the `start` and `end` properties to connect the linear elements to other elements. The start and end point can either be the ID to map to an existing object, or the `type` to create a new object. Mapping to an existing object is useful if you want to connect it to multiple objects. Lines and arrows can only start and end at rectangle, text, diamond, or ellipse elements.
254
+ For arrows and lines,
255
+ - `points`: specify the start and end points of the arrow
256
+ - **ALWAYS Required properties for ALL elements**: `type`, `x`, `y`, `id`.
257
+ - `start` and `end` properties: connect the linear elements to other elements. The start and end point can either be the ID to map to an existing object, or the `type` and `text` to create a new object. Mapping to an existing object is useful if you want to connect it to multiple objects. Lines and arrows can only start and end at rectangle, text, or ellipse elements. Even if you're using the `start` and `end` properties, you still need to specify the `x` and `y` properties for the start and end points.
249
258
 
250
259
  {{
251
260
  type: "arrow",
252
261
  id: string,
253
262
  x: number,
254
263
  y: number,
255
- width: number,
256
- height: number,
257
264
  strokeColor: string,
258
265
  start: {{
259
266
  id: string,
260
267
  type: string,
268
+ text: string,
261
269
  }},
262
270
  end: {{
263
271
  id: string,
264
272
  type: string,
273
+ text: string,
265
274
  }},
266
275
  label: {{
267
276
  text: string,
@@ -272,7 +281,11 @@ For arrows and lines, you can use the `points` property to specify the start and
272
281
  ]
273
282
  }}
274
283
 
275
- For text, you must use the `text` property to specify the text to be rendered. You may also use `fontSize` property to specify the font size of the text. Only use the `text` element for titles, subtitles, and overviews. For labels, use the `label` property in the respective elements.
284
+ For text,
285
+ - `text`: specify the text to be rendered
286
+ - **ALWAYS Required properties for ALL elements**: `type`, `x`, `y`, `id`.
287
+ - `fontSize`: optional property to specify the font size of the text
288
+ - Use this element only for titles, subtitles, and overviews. For labels, use the `label` property in the respective elements.
276
289
 
277
290
  {{
278
291
  type: "text",
@@ -287,19 +300,25 @@ Here's an example of a valid diagram:
287
300
 
288
301
  Design Description: Create a diagram describing a circular development process with 3 stages: design, implementation and feedback. The design stage is connected to the implementation stage and the implementation stage is connected to the feedback stage and the feedback stage is connected to the design stage. Each stage should be labeled with the stage name.
289
302
 
290
- Response:
291
-
292
- [
293
- {{"type":"text","x":-150,"y":50,"width":300,"height":40,"id":"title_text","text":"Circular Development Process","fontSize":24}},
294
- {{"type":"ellipse","x":-169,"y":113,"width":188,"height":202,"id":"design_ellipse", "label": {{"text": "Design"}}}},
295
- {{"type":"ellipse","x":62,"y":394,"width":186,"height":188,"id":"implement_ellipse", "label": {{"text": "Implement"}}}},
296
- {{"type":"ellipse","x":-348,"y":430,"width":184,"height":170,"id":"feedback_ellipse", "label": {{"text": "Feedback"}}}},
303
+ Example Response:
304
+ ```json
305
+ {{
306
+ "scratchpad": "The diagram represents a circular development process with 3 stages: design, implementation and feedback. Each stage is connected to the next stage using an arrow, forming a circular process.",
307
+ "elements": [
308
+ {{"type":"text","x":-150,"y":50,"id":"title_text","text":"Circular Development Process","fontSize":24}},
309
+ {{"type":"ellipse","x":-169,"y":113,"id":"design_ellipse", "label": {{"text": "Design"}}}},
310
+ {{"type":"ellipse","x":62,"y":394,"id":"implement_ellipse", "label": {{"text": "Implement"}}}},
311
+ {{"type":"ellipse","x":-348,"y":430,"id":"feedback_ellipse", "label": {{"text": "Feedback"}}}},
297
312
  {{"type":"arrow","x":21,"y":273,"id":"design_to_implement_arrow","points":[[0,0],[86,105]],"start":{{"id":"design_ellipse"}}, "end":{{"id":"implement_ellipse"}}}},
298
313
  {{"type":"arrow","x":50,"y":519,"id":"implement_to_feedback_arrow","points":[[0,0],[-198,-6]],"start":{{"id":"implement_ellipse"}}, "end":{{"id":"feedback_ellipse"}}}},
299
314
  {{"type":"arrow","x":-228,"y":417,"id":"feedback_to_design_arrow","points":[[0,0],[85,-123]],"start":{{"id":"feedback_ellipse"}}, "end":{{"id":"design_ellipse"}}}},
300
- ]
315
+ ]
316
+ }}
317
+ ```
318
+
319
+ Think about spacing and composition. Use ample space between elements. Double the amount of space you think you need. Create a detailed diagram from the provided context and user prompt below.
301
320
 
302
- Create a detailed diagram from the provided context and user prompt below. Return a valid JSON object:
321
+ Return a valid JSON object, where the drawing is in `elements` and your thought process is in `scratchpad`. If you can't make the whole diagram in one response, you can split it into multiple responses. If you need to simplify for brevity, simply do so in the `scratchpad` field. DO NOT add additional info in the `elements` field.
303
322
 
304
323
  Diagram Description: {query}
305
324
 
@@ -5,7 +5,6 @@ import math
5
5
  import mimetypes
6
6
  import os
7
7
  import queue
8
- import re
9
8
  import uuid
10
9
  from dataclasses import dataclass
11
10
  from datetime import datetime
@@ -57,7 +56,7 @@ model_to_prompt_size = {
57
56
  "gemini-1.5-flash": 20000,
58
57
  "gemini-1.5-pro": 20000,
59
58
  # Anthropic Models
60
- "claude-3-5-sonnet-20240620": 20000,
59
+ "claude-3-5-sonnet-20241022": 20000,
61
60
  "claude-3-5-haiku-20241022": 20000,
62
61
  # Offline Models
63
62
  "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000,
@@ -213,6 +212,8 @@ class ChatEvent(Enum):
213
212
  REFERENCES = "references"
214
213
  STATUS = "status"
215
214
  METADATA = "metadata"
215
+ USAGE = "usage"
216
+ END_RESPONSE = "end_response"
216
217
 
217
218
 
218
219
  def message_to_log(
khoj/routers/api_chat.py CHANGED
@@ -667,27 +667,37 @@ async def chat(
667
667
  finally:
668
668
  yield event_delimiter
669
669
 
670
- async def send_llm_response(response: str):
670
+ async def send_llm_response(response: str, usage: dict = None):
671
+ # Send Chat Response
671
672
  async for result in send_event(ChatEvent.START_LLM_RESPONSE, ""):
672
673
  yield result
673
674
  async for result in send_event(ChatEvent.MESSAGE, response):
674
675
  yield result
675
676
  async for result in send_event(ChatEvent.END_LLM_RESPONSE, ""):
676
677
  yield result
678
+ # Send Usage Metadata once llm interactions are complete
679
+ if usage:
680
+ async for event in send_event(ChatEvent.USAGE, usage):
681
+ yield event
682
+ async for result in send_event(ChatEvent.END_RESPONSE, ""):
683
+ yield result
677
684
 
678
685
  def collect_telemetry():
679
686
  # Gather chat response telemetry
680
687
  nonlocal chat_metadata
681
688
  latency = time.perf_counter() - start_time
682
689
  cmd_set = set([cmd.value for cmd in conversation_commands])
690
+ cost = (tracer.get("usage", {}) or {}).get("cost", 0)
683
691
  chat_metadata = chat_metadata or {}
684
692
  chat_metadata["conversation_command"] = cmd_set
685
693
  chat_metadata["agent"] = conversation.agent.slug if conversation and conversation.agent else None
686
694
  chat_metadata["latency"] = f"{latency:.3f}"
687
695
  chat_metadata["ttft_latency"] = f"{ttft:.3f}"
696
+ chat_metadata["usage"] = tracer.get("usage")
688
697
 
689
698
  logger.info(f"Chat response time to first token: {ttft:.3f} seconds")
690
699
  logger.info(f"Chat response total time: {latency:.3f} seconds")
700
+ logger.info(f"Chat response cost: ${cost:.5f}")
691
701
  update_telemetry_state(
692
702
  request=request,
693
703
  telemetry_type="api",
@@ -699,7 +709,7 @@ async def chat(
699
709
  )
700
710
 
701
711
  if is_query_empty(q):
702
- async for result in send_llm_response("Please ask your query to get started."):
712
+ async for result in send_llm_response("Please ask your query to get started.", tracer.get("usage")):
703
713
  yield result
704
714
  return
705
715
 
@@ -713,7 +723,7 @@ async def chat(
713
723
  create_new=body.create_new,
714
724
  )
715
725
  if not conversation:
716
- async for result in send_llm_response(f"Conversation {conversation_id} not found"):
726
+ async for result in send_llm_response(f"Conversation {conversation_id} not found", tracer.get("usage")):
717
727
  yield result
718
728
  return
719
729
  conversation_id = conversation.id
@@ -777,7 +787,7 @@ async def chat(
777
787
  await conversation_command_rate_limiter.update_and_check_if_valid(request, cmd)
778
788
  q = q.replace(f"/{cmd.value}", "").strip()
779
789
  except HTTPException as e:
780
- async for result in send_llm_response(str(e.detail)):
790
+ async for result in send_llm_response(str(e.detail), tracer.get("usage")):
781
791
  yield result
782
792
  return
783
793
 
@@ -834,7 +844,7 @@ async def chat(
834
844
  agent_has_entries = await EntryAdapters.aagent_has_entries(agent)
835
845
  if len(file_filters) == 0 and not agent_has_entries:
836
846
  response_log = "No files selected for summarization. Please add files using the section on the left."
837
- async for result in send_llm_response(response_log):
847
+ async for result in send_llm_response(response_log, tracer.get("usage")):
838
848
  yield result
839
849
  else:
840
850
  async for response in generate_summary_from_files(
@@ -853,7 +863,7 @@ async def chat(
853
863
  else:
854
864
  if isinstance(response, str):
855
865
  response_log = response
856
- async for result in send_llm_response(response):
866
+ async for result in send_llm_response(response, tracer.get("usage")):
857
867
  yield result
858
868
 
859
869
  await sync_to_async(save_to_conversation_log)(
@@ -880,7 +890,7 @@ async def chat(
880
890
  conversation_config = await ConversationAdapters.aget_default_conversation_config(user)
881
891
  model_type = conversation_config.model_type
882
892
  formatted_help = help_message.format(model=model_type, version=state.khoj_version, device=get_device())
883
- async for result in send_llm_response(formatted_help):
893
+ async for result in send_llm_response(formatted_help, tracer.get("usage")):
884
894
  yield result
885
895
  return
886
896
  # Adding specification to search online specifically on khoj.dev pages.
@@ -895,7 +905,7 @@ async def chat(
895
905
  except Exception as e:
896
906
  logger.error(f"Error scheduling task {q} for {user.email}: {e}")
897
907
  error_message = f"Unable to create automation. Ensure the automation doesn't already exist."
898
- async for result in send_llm_response(error_message):
908
+ async for result in send_llm_response(error_message, tracer.get("usage")):
899
909
  yield result
900
910
  return
901
911
 
@@ -916,7 +926,7 @@ async def chat(
916
926
  raw_query_files=raw_query_files,
917
927
  tracer=tracer,
918
928
  )
919
- async for result in send_llm_response(llm_response):
929
+ async for result in send_llm_response(llm_response, tracer.get("usage")):
920
930
  yield result
921
931
  return
922
932
 
@@ -963,7 +973,7 @@ async def chat(
963
973
  yield result
964
974
 
965
975
  if conversation_commands == [ConversationCommand.Notes] and not await EntryAdapters.auser_has_entries(user):
966
- async for result in send_llm_response(f"{no_entries_found.format()}"):
976
+ async for result in send_llm_response(f"{no_entries_found.format()}", tracer.get("usage")):
967
977
  yield result
968
978
  return
969
979
 
@@ -1105,7 +1115,7 @@ async def chat(
1105
1115
  "detail": improved_image_prompt,
1106
1116
  "image": None,
1107
1117
  }
1108
- async for result in send_llm_response(json.dumps(content_obj)):
1118
+ async for result in send_llm_response(json.dumps(content_obj), tracer.get("usage")):
1109
1119
  yield result
1110
1120
  return
1111
1121
 
@@ -1132,7 +1142,7 @@ async def chat(
1132
1142
  "inferredQueries": [improved_image_prompt],
1133
1143
  "image": generated_image,
1134
1144
  }
1135
- async for result in send_llm_response(json.dumps(content_obj)):
1145
+ async for result in send_llm_response(json.dumps(content_obj), tracer.get("usage")):
1136
1146
  yield result
1137
1147
  return
1138
1148
 
@@ -1166,7 +1176,7 @@ async def chat(
1166
1176
  diagram_description = excalidraw_diagram_description
1167
1177
  else:
1168
1178
  error_message = "Failed to generate diagram. Please try again later."
1169
- async for result in send_llm_response(error_message):
1179
+ async for result in send_llm_response(error_message, tracer.get("usage")):
1170
1180
  yield result
1171
1181
 
1172
1182
  await sync_to_async(save_to_conversation_log)(
@@ -1213,7 +1223,7 @@ async def chat(
1213
1223
  tracer=tracer,
1214
1224
  )
1215
1225
 
1216
- async for result in send_llm_response(json.dumps(content_obj)):
1226
+ async for result in send_llm_response(json.dumps(content_obj), tracer.get("usage")):
1217
1227
  yield result
1218
1228
  return
1219
1229
 
@@ -1252,6 +1262,11 @@ async def chat(
1252
1262
  if item is None:
1253
1263
  async for result in send_event(ChatEvent.END_LLM_RESPONSE, ""):
1254
1264
  yield result
1265
+ # Send Usage Metadata once llm interactions are complete
1266
+ async for event in send_event(ChatEvent.USAGE, tracer.get("usage")):
1267
+ yield event
1268
+ async for result in send_event(ChatEvent.END_RESPONSE, ""):
1269
+ yield result
1255
1270
  logger.debug("Finished streaming response")
1256
1271
  return
1257
1272
  if not connection_alive or not continue_stream:
khoj/routers/auth.py CHANGED
@@ -89,7 +89,7 @@ async def login_magic_link(request: Request, form: MagicLinkForm):
89
89
  update_telemetry_state(
90
90
  request=request,
91
91
  telemetry_type="api",
92
- api="create_user",
92
+ api="create_user__email",
93
93
  metadata={"server_id": str(user.uuid)},
94
94
  )
95
95
  logger.log(logging.INFO, f"🥳 New User Created: {user.uuid}")
@@ -174,7 +174,7 @@ async def auth(request: Request):
174
174
  update_telemetry_state(
175
175
  request=request,
176
176
  telemetry_type="api",
177
- api="create_user",
177
+ api="create_user__google",
178
178
  metadata={"server_id": str(khoj_user.uuid)},
179
179
  )
180
180
  logger.log(logging.INFO, f"🥳 New User Created: {khoj_user.uuid}")
khoj/routers/helpers.py CHANGED
@@ -753,7 +753,11 @@ async def generate_excalidraw_diagram(
753
753
  yield None, None
754
754
  return
755
755
 
756
- yield better_diagram_description_prompt, excalidraw_diagram_description
756
+ scratchpad = excalidraw_diagram_description.get("scratchpad")
757
+
758
+ inferred_queries = f"Instruction: {better_diagram_description_prompt}\n\nScratchpad: {scratchpad}"
759
+
760
+ yield inferred_queries, excalidraw_diagram_description.get("elements")
757
761
 
758
762
 
759
763
  async def generate_better_diagram_description(
@@ -822,7 +826,7 @@ async def generate_excalidraw_diagram_from_description(
822
826
  user: KhojUser = None,
823
827
  agent: Agent = None,
824
828
  tracer: dict = {},
825
- ) -> str:
829
+ ) -> Dict[str, Any]:
826
830
  personality_context = (
827
831
  prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
828
832
  )
@@ -838,10 +842,18 @@ async def generate_excalidraw_diagram_from_description(
838
842
  )
839
843
  raw_response = clean_json(raw_response)
840
844
  try:
845
+ # Expect response to have `elements` and `scratchpad` keys
841
846
  response: Dict[str, str] = json.loads(raw_response)
847
+ if (
848
+ not response
849
+ or not isinstance(response, Dict)
850
+ or not response.get("elements")
851
+ or not response.get("scratchpad")
852
+ ):
853
+ raise AssertionError(f"Invalid response for generating Excalidraw diagram: {response}")
842
854
  except Exception:
843
855
  raise AssertionError(f"Invalid response for generating Excalidraw diagram: {raw_response}")
844
- if not response or not isinstance(response, List) or not isinstance(response[0], Dict):
856
+ if not response or not isinstance(response["elements"], List) or not isinstance(response["elements"][0], Dict):
845
857
  # TODO Some additional validation here that it's a valid Excalidraw diagram
846
858
  raise AssertionError(f"Invalid response for improving diagram description: {response}")
847
859
 
@@ -1770,6 +1782,7 @@ Manage your automations [here](/automations).
1770
1782
  class MessageProcessor:
1771
1783
  def __init__(self):
1772
1784
  self.references = {}
1785
+ self.usage = {}
1773
1786
  self.raw_response = ""
1774
1787
 
1775
1788
  def convert_message_chunk_to_json(self, raw_chunk: str) -> Dict[str, Any]:
@@ -1793,6 +1806,8 @@ class MessageProcessor:
1793
1806
  chunk_type = ChatEvent(chunk["type"])
1794
1807
  if chunk_type == ChatEvent.REFERENCES:
1795
1808
  self.references = chunk["data"]
1809
+ elif chunk_type == ChatEvent.USAGE:
1810
+ self.usage = chunk["data"]
1796
1811
  elif chunk_type == ChatEvent.MESSAGE:
1797
1812
  chunk_data = chunk["data"]
1798
1813
  if isinstance(chunk_data, dict):
@@ -1837,7 +1852,7 @@ async def read_chat_stream(response_iterator: AsyncGenerator[str, None]) -> Dict
1837
1852
  if buffer:
1838
1853
  processor.process_message_chunk(buffer)
1839
1854
 
1840
- return {"response": processor.raw_response, "references": processor.references}
1855
+ return {"response": processor.raw_response, "references": processor.references, "usage": processor.usage}
1841
1856
 
1842
1857
 
1843
1858
  def get_user_config(user: KhojUser, request: Request, is_detailed: bool = False):
khoj/utils/constants.py CHANGED
@@ -1,4 +1,5 @@
1
1
  from pathlib import Path
2
+ from typing import Dict
2
3
 
3
4
  app_root_directory = Path(__file__).parent.parent.parent
4
5
  web_directory = app_root_directory / "khoj/interface/web/"
@@ -31,3 +32,19 @@ default_config = {
31
32
  "image": {"encoder": "sentence-transformers/clip-ViT-B-32", "model_directory": "~/.khoj/search/image/"},
32
33
  },
33
34
  }
35
+
36
+ model_to_cost: Dict[str, Dict[str, float]] = {
37
+ # OpenAI Pricing: https://openai.com/api/pricing/
38
+ "gpt-4o": {"input": 2.50, "output": 10.00},
39
+ "gpt-4o-mini": {"input": 0.15, "output": 0.60},
40
+ "o1-preview": {"input": 15.0, "output": 60.00},
41
+ "o1-mini": {"input": 3.0, "output": 12.0},
42
+ # Gemini Pricing: https://ai.google.dev/pricing
43
+ "gemini-1.5-flash": {"input": 0.075, "output": 0.30},
44
+ "gemini-1.5-flash-002": {"input": 0.075, "output": 0.30},
45
+ "gemini-1.5-pro": {"input": 1.25, "output": 5.00},
46
+ "gemini-1.5-pro-002": {"input": 1.25, "output": 5.00},
47
+ # Anthropic Pricing: https://www.anthropic.com/pricing#anthropic-api_
48
+ "claude-3-5-sonnet-20241022": {"input": 3.0, "output": 15.0},
49
+ "claude-3-5-haiku-20241022": {"input": 1.0, "output": 5.0},
50
+ }
khoj/utils/helpers.py CHANGED
@@ -540,3 +540,27 @@ def get_country_code_from_timezone(tz: str) -> str:
540
540
  def get_country_name_from_timezone(tz: str) -> str:
541
541
  """Get country name from timezone"""
542
542
  return country_names.get(get_country_code_from_timezone(tz), "United States")
543
+
544
+
545
+ def get_cost_of_chat_message(model_name: str, input_tokens: int = 0, output_tokens: int = 0, prev_cost: float = 0.0):
546
+ """
547
+ Calculate cost of chat message based on input and output tokens
548
+ """
549
+
550
+ # Calculate cost of input and output tokens. Costs are per million tokens
551
+ input_cost = constants.model_to_cost.get(model_name, {}).get("input", 0) * (input_tokens / 1e6)
552
+ output_cost = constants.model_to_cost.get(model_name, {}).get("output", 0) * (output_tokens / 1e6)
553
+
554
+ return input_cost + output_cost + prev_cost
555
+
556
+
557
+ def get_chat_usage_metrics(model_name: str, input_tokens: int = 0, output_tokens: int = 0, usage: dict = {}):
558
+ """
559
+ Get usage metrics for chat message based on input and output tokens
560
+ """
561
+ prev_usage = usage or {"input_tokens": 0, "output_tokens": 0, "cost": 0.0}
562
+ return {
563
+ "input_tokens": prev_usage["input_tokens"] + input_tokens,
564
+ "output_tokens": prev_usage["output_tokens"] + output_tokens,
565
+ "cost": get_cost_of_chat_message(model_name, input_tokens, output_tokens, prev_cost=prev_usage["cost"]),
566
+ }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: khoj
3
- Version: 1.30.2.dev11
3
+ Version: 1.30.2.dev23
4
4
  Summary: Your Second Brain
5
5
  Project-URL: Homepage, https://khoj.dev
6
6
  Project-URL: Documentation, https://docs.khoj.dev