khoj 1.30.2.dev11__py3-none-any.whl → 1.30.2.dev23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/interface/compiled/404/index.html +1 -1
- khoj/interface/compiled/_next/static/chunks/1210.ef7a0f9a7e43da1d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1603-1407afe510f0145a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1970-1b63ac1497b03a10.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/{page-5f6e0dacc34e33ad.js → page-b086c9b0aa5a3833.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/{page-60bc7454bc3ea881.js → page-697a2d415e11a872.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/{page-ac366c9111374312.js → page-461e26fcb7578d39.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/{page-358154a4436ef316.js → page-4a3c49c5e996cc40.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/{page-64ea1717528979af.js → page-9013658bebfc3d17.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{page-17a538580c65e7fe.js → page-41eb536497bb544a.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-47641b3691fb0856.js → page-6a68ac7e227b34e7.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-1c0a37d7df44bed9.js → webpack-9b0a570f15d6209d.js} +1 -1
- khoj/interface/compiled/_next/static/css/23f801d22927d568.css +1 -0
- khoj/interface/compiled/_next/static/css/592ca99f5122e75a.css +1 -0
- khoj/interface/compiled/agents/index.html +1 -1
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +1 -1
- khoj/interface/compiled/automations/index.txt +2 -2
- khoj/interface/compiled/chat/index.html +1 -1
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/index.html +1 -1
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +1 -1
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +1 -1
- khoj/interface/compiled/settings/index.txt +2 -2
- khoj/interface/compiled/share/chat/index.html +1 -1
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/anthropic/utils.py +15 -1
- khoj/processor/conversation/google/utils.py +12 -1
- khoj/processor/conversation/openai/utils.py +33 -17
- khoj/processor/conversation/prompts.py +40 -21
- khoj/processor/conversation/utils.py +3 -2
- khoj/routers/api_chat.py +29 -14
- khoj/routers/auth.py +2 -2
- khoj/routers/helpers.py +19 -4
- khoj/utils/constants.py +17 -0
- khoj/utils/helpers.py +24 -0
- {khoj-1.30.2.dev11.dist-info → khoj-1.30.2.dev23.dist-info}/METADATA +1 -1
- {khoj-1.30.2.dev11.dist-info → khoj-1.30.2.dev23.dist-info}/RECORD +45 -45
- khoj/interface/compiled/_next/static/chunks/1210.132a7e1910006bbb.js +0 -1
- khoj/interface/compiled/_next/static/chunks/1603-859ddcf58f3ca639.js +0 -1
- khoj/interface/compiled/_next/static/chunks/1970-e1935a1d0930a7c5.js +0 -1
- khoj/interface/compiled/_next/static/css/2ff098d0815fdbc1.css +0 -1
- khoj/interface/compiled/_next/static/css/4cae6c0e5c72fb2d.css +0 -1
- /khoj/interface/compiled/_next/static/{ZwZ17DepweYHu7DUF8zml → 07uMbO07BRIyOEFvL9ncb}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{ZwZ17DepweYHu7DUF8zml → 07uMbO07BRIyOEFvL9ncb}/_ssgManifest.js +0 -0
- {khoj-1.30.2.dev11.dist-info → khoj-1.30.2.dev23.dist-info}/WHEEL +0 -0
- {khoj-1.30.2.dev11.dist-info → khoj-1.30.2.dev23.dist-info}/entry_points.txt +0 -0
- {khoj-1.30.2.dev11.dist-info → khoj-1.30.2.dev23.dist-info}/licenses/LICENSE +0 -0
@@ -25,7 +25,7 @@ from khoj.processor.conversation.utils import (
|
|
25
25
|
get_image_from_url,
|
26
26
|
)
|
27
27
|
from khoj.utils import state
|
28
|
-
from khoj.utils.helpers import in_debug_mode, is_none_or_empty
|
28
|
+
from khoj.utils.helpers import get_chat_usage_metrics, in_debug_mode, is_none_or_empty
|
29
29
|
|
30
30
|
logger = logging.getLogger(__name__)
|
31
31
|
|
@@ -68,6 +68,7 @@ def gemini_completion_with_backoff(
|
|
68
68
|
response = chat_session.send_message(formatted_messages[-1]["parts"])
|
69
69
|
response_text = response.text
|
70
70
|
except StopCandidateException as e:
|
71
|
+
response = None
|
71
72
|
response_text, _ = handle_gemini_response(e.args)
|
72
73
|
# Respond with reason for stopping
|
73
74
|
logger.warning(
|
@@ -75,6 +76,11 @@ def gemini_completion_with_backoff(
|
|
75
76
|
+ f"Last Message by {messages[-1].role}: {messages[-1].content}"
|
76
77
|
)
|
77
78
|
|
79
|
+
# Aggregate cost of chat
|
80
|
+
input_tokens = response.usage_metadata.prompt_token_count if response else 0
|
81
|
+
output_tokens = response.usage_metadata.candidates_token_count if response else 0
|
82
|
+
tracer["usage"] = get_chat_usage_metrics(model_name, input_tokens, output_tokens, tracer.get("usage"))
|
83
|
+
|
78
84
|
# Save conversation trace
|
79
85
|
tracer["chat_model"] = model_name
|
80
86
|
tracer["temperature"] = temperature
|
@@ -146,6 +152,11 @@ def gemini_llm_thread(
|
|
146
152
|
if stopped:
|
147
153
|
raise StopCandidateException(message)
|
148
154
|
|
155
|
+
# Calculate cost of chat
|
156
|
+
input_tokens = chunk.usage_metadata.prompt_token_count
|
157
|
+
output_tokens = chunk.usage_metadata.candidates_token_count
|
158
|
+
tracer["usage"] = get_chat_usage_metrics(model_name, input_tokens, output_tokens, tracer.get("usage"))
|
159
|
+
|
149
160
|
# Save conversation trace
|
150
161
|
tracer["chat_model"] = model_name
|
151
162
|
tracer["temperature"] = temperature
|
@@ -4,6 +4,8 @@ from threading import Thread
|
|
4
4
|
from typing import Dict
|
5
5
|
|
6
6
|
import openai
|
7
|
+
from openai.types.chat.chat_completion import ChatCompletion
|
8
|
+
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
7
9
|
from tenacity import (
|
8
10
|
before_sleep_log,
|
9
11
|
retry,
|
@@ -18,7 +20,7 @@ from khoj.processor.conversation.utils import (
|
|
18
20
|
commit_conversation_trace,
|
19
21
|
)
|
20
22
|
from khoj.utils import state
|
21
|
-
from khoj.utils.helpers import in_debug_mode
|
23
|
+
from khoj.utils.helpers import get_chat_usage_metrics, in_debug_mode
|
22
24
|
|
23
25
|
logger = logging.getLogger(__name__)
|
24
26
|
|
@@ -63,27 +65,34 @@ def completion_with_backoff(
|
|
63
65
|
if os.getenv("KHOJ_LLM_SEED"):
|
64
66
|
model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
|
65
67
|
|
66
|
-
chat = client.chat.completions.create(
|
67
|
-
stream=stream,
|
68
|
+
chat: ChatCompletion | openai.Stream[ChatCompletionChunk] = client.chat.completions.create(
|
68
69
|
messages=formatted_messages, # type: ignore
|
69
70
|
model=model, # type: ignore
|
71
|
+
stream=stream,
|
72
|
+
stream_options={"include_usage": True} if stream else {},
|
70
73
|
temperature=temperature,
|
71
74
|
timeout=20,
|
72
75
|
**(model_kwargs or dict()),
|
73
76
|
)
|
74
77
|
|
75
|
-
if not stream:
|
76
|
-
return chat.choices[0].message.content
|
77
|
-
|
78
78
|
aggregated_response = ""
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
79
|
+
if not stream:
|
80
|
+
chunk = chat
|
81
|
+
aggregated_response = chunk.choices[0].message.content
|
82
|
+
else:
|
83
|
+
for chunk in chat:
|
84
|
+
if len(chunk.choices) == 0:
|
85
|
+
continue
|
86
|
+
delta_chunk = chunk.choices[0].delta # type: ignore
|
87
|
+
if isinstance(delta_chunk, str):
|
88
|
+
aggregated_response += delta_chunk
|
89
|
+
elif delta_chunk.content:
|
90
|
+
aggregated_response += delta_chunk.content
|
91
|
+
|
92
|
+
# Calculate cost of chat
|
93
|
+
input_tokens = chunk.usage.prompt_tokens if hasattr(chunk, "usage") and chunk.usage else 0
|
94
|
+
output_tokens = chunk.usage.completion_tokens if hasattr(chunk, "usage") and chunk.usage else 0
|
95
|
+
tracer["usage"] = get_chat_usage_metrics(model, input_tokens, output_tokens, tracer.get("usage"))
|
87
96
|
|
88
97
|
# Save conversation trace
|
89
98
|
tracer["chat_model"] = model
|
@@ -162,10 +171,11 @@ def llm_thread(
|
|
162
171
|
if os.getenv("KHOJ_LLM_SEED"):
|
163
172
|
model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
|
164
173
|
|
165
|
-
chat = client.chat.completions.create(
|
166
|
-
stream=stream,
|
174
|
+
chat: ChatCompletion | openai.Stream[ChatCompletionChunk] = client.chat.completions.create(
|
167
175
|
messages=formatted_messages,
|
168
176
|
model=model_name, # type: ignore
|
177
|
+
stream=stream,
|
178
|
+
stream_options={"include_usage": True} if stream else {},
|
169
179
|
temperature=temperature,
|
170
180
|
timeout=20,
|
171
181
|
**(model_kwargs or dict()),
|
@@ -173,7 +183,8 @@ def llm_thread(
|
|
173
183
|
|
174
184
|
aggregated_response = ""
|
175
185
|
if not stream:
|
176
|
-
|
186
|
+
chunk = chat
|
187
|
+
aggregated_response = chunk.choices[0].message.content
|
177
188
|
g.send(aggregated_response)
|
178
189
|
else:
|
179
190
|
for chunk in chat:
|
@@ -189,6 +200,11 @@ def llm_thread(
|
|
189
200
|
aggregated_response += text_chunk
|
190
201
|
g.send(text_chunk)
|
191
202
|
|
203
|
+
# Calculate cost of chat
|
204
|
+
input_tokens = chunk.usage.prompt_tokens if hasattr(chunk, "usage") and chunk.usage else 0
|
205
|
+
output_tokens = chunk.usage.completion_tokens if hasattr(chunk, "usage") and chunk.usage else 0
|
206
|
+
tracer["usage"] = get_chat_usage_metrics(model_name, input_tokens, output_tokens, tracer.get("usage"))
|
207
|
+
|
192
208
|
# Save conversation trace
|
193
209
|
tracer["chat_model"] = model_name
|
194
210
|
tracer["temperature"] = temperature
|
@@ -183,20 +183,23 @@ Improved Prompt:
|
|
183
183
|
|
184
184
|
improve_diagram_description_prompt = PromptTemplate.from_template(
|
185
185
|
"""
|
186
|
-
you are an architect working with a novice artist using a diagramming
|
186
|
+
you are an architect working with a novice digital artist using a diagramming software.
|
187
187
|
{personality_context}
|
188
188
|
|
189
189
|
you need to convert the user's query to a description format that the novice artist can use very well. you are allowed to use primitives like
|
190
190
|
- text
|
191
191
|
- rectangle
|
192
|
-
- diamond
|
193
192
|
- ellipse
|
194
193
|
- line
|
195
194
|
- arrow
|
196
195
|
|
197
196
|
use these primitives to describe what sort of diagram the drawer should create. the artist must recreate the diagram every time, so include all relevant prior information in your description.
|
198
197
|
|
199
|
-
|
198
|
+
- include the full, exact description. the artist does not have much experience, so be precise.
|
199
|
+
- describe the layout.
|
200
|
+
- you can only use straight lines.
|
201
|
+
- use simple, concise language.
|
202
|
+
- keep it simple and easy to understand. the artist is easily distracted.
|
200
203
|
|
201
204
|
Today's Date: {current_date}
|
202
205
|
User's Location: {location}
|
@@ -218,19 +221,23 @@ Query: {query}
|
|
218
221
|
|
219
222
|
excalidraw_diagram_generation_prompt = PromptTemplate.from_template(
|
220
223
|
"""
|
221
|
-
You are a program manager with the ability to describe diagrams to compose in professional, fine detail.
|
224
|
+
You are a program manager with the ability to describe diagrams to compose in professional, fine detail. You LOVE getting into the details and making tedious labels, lines, and shapes look beautiful. You make everything look perfect.
|
222
225
|
{personality_context}
|
223
226
|
|
224
|
-
You need to create a declarative description of the diagram and relevant components, using this base schema.
|
227
|
+
You need to create a declarative description of the diagram and relevant components, using this base schema.
|
228
|
+
- `label`: specify the text to be rendered in the respective elements.
|
229
|
+
- Always use light colors for the `backgroundColor` property, like white, or light blue, green, red
|
230
|
+
- **ALWAYS Required properties for ALL elements**: `type`, `x`, `y`, `id`.
|
231
|
+
- Be very generous with spacing and composition. Use ample space between elements.
|
225
232
|
|
226
233
|
{{
|
227
234
|
type: string,
|
228
235
|
x: number,
|
229
236
|
y: number,
|
230
|
-
strokeColor: string,
|
231
|
-
backgroundColor: string,
|
232
237
|
width: number,
|
233
238
|
height: number,
|
239
|
+
strokeColor: string,
|
240
|
+
backgroundColor: string,
|
234
241
|
id: string,
|
235
242
|
label: {{
|
236
243
|
text: string,
|
@@ -240,28 +247,30 @@ You need to create a declarative description of the diagram and relevant compone
|
|
240
247
|
Valid types:
|
241
248
|
- text
|
242
249
|
- rectangle
|
243
|
-
- diamond
|
244
250
|
- ellipse
|
245
251
|
- line
|
246
252
|
- arrow
|
247
253
|
|
248
|
-
For arrows and lines,
|
254
|
+
For arrows and lines,
|
255
|
+
- `points`: specify the start and end points of the arrow
|
256
|
+
- **ALWAYS Required properties for ALL elements**: `type`, `x`, `y`, `id`.
|
257
|
+
- `start` and `end` properties: connect the linear elements to other elements. The start and end point can either be the ID to map to an existing object, or the `type` and `text` to create a new object. Mapping to an existing object is useful if you want to connect it to multiple objects. Lines and arrows can only start and end at rectangle, text, or ellipse elements. Even if you're using the `start` and `end` properties, you still need to specify the `x` and `y` properties for the start and end points.
|
249
258
|
|
250
259
|
{{
|
251
260
|
type: "arrow",
|
252
261
|
id: string,
|
253
262
|
x: number,
|
254
263
|
y: number,
|
255
|
-
width: number,
|
256
|
-
height: number,
|
257
264
|
strokeColor: string,
|
258
265
|
start: {{
|
259
266
|
id: string,
|
260
267
|
type: string,
|
268
|
+
text: string,
|
261
269
|
}},
|
262
270
|
end: {{
|
263
271
|
id: string,
|
264
272
|
type: string,
|
273
|
+
text: string,
|
265
274
|
}},
|
266
275
|
label: {{
|
267
276
|
text: string,
|
@@ -272,7 +281,11 @@ For arrows and lines, you can use the `points` property to specify the start and
|
|
272
281
|
]
|
273
282
|
}}
|
274
283
|
|
275
|
-
For text,
|
284
|
+
For text,
|
285
|
+
- `text`: specify the text to be rendered
|
286
|
+
- **ALWAYS Required properties for ALL elements**: `type`, `x`, `y`, `id`.
|
287
|
+
- `fontSize`: optional property to specify the font size of the text
|
288
|
+
- Use this element only for titles, subtitles, and overviews. For labels, use the `label` property in the respective elements.
|
276
289
|
|
277
290
|
{{
|
278
291
|
type: "text",
|
@@ -287,19 +300,25 @@ Here's an example of a valid diagram:
|
|
287
300
|
|
288
301
|
Design Description: Create a diagram describing a circular development process with 3 stages: design, implementation and feedback. The design stage is connected to the implementation stage and the implementation stage is connected to the feedback stage and the feedback stage is connected to the design stage. Each stage should be labeled with the stage name.
|
289
302
|
|
290
|
-
Response:
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
{{"type":"
|
296
|
-
{{"type":"ellipse","x":-
|
303
|
+
Example Response:
|
304
|
+
```json
|
305
|
+
{{
|
306
|
+
"scratchpad": "The diagram represents a circular development process with 3 stages: design, implementation and feedback. Each stage is connected to the next stage using an arrow, forming a circular process.",
|
307
|
+
"elements": [
|
308
|
+
{{"type":"text","x":-150,"y":50,"id":"title_text","text":"Circular Development Process","fontSize":24}},
|
309
|
+
{{"type":"ellipse","x":-169,"y":113,"id":"design_ellipse", "label": {{"text": "Design"}}}},
|
310
|
+
{{"type":"ellipse","x":62,"y":394,"id":"implement_ellipse", "label": {{"text": "Implement"}}}},
|
311
|
+
{{"type":"ellipse","x":-348,"y":430,"id":"feedback_ellipse", "label": {{"text": "Feedback"}}}},
|
297
312
|
{{"type":"arrow","x":21,"y":273,"id":"design_to_implement_arrow","points":[[0,0],[86,105]],"start":{{"id":"design_ellipse"}}, "end":{{"id":"implement_ellipse"}}}},
|
298
313
|
{{"type":"arrow","x":50,"y":519,"id":"implement_to_feedback_arrow","points":[[0,0],[-198,-6]],"start":{{"id":"implement_ellipse"}}, "end":{{"id":"feedback_ellipse"}}}},
|
299
314
|
{{"type":"arrow","x":-228,"y":417,"id":"feedback_to_design_arrow","points":[[0,0],[85,-123]],"start":{{"id":"feedback_ellipse"}}, "end":{{"id":"design_ellipse"}}}},
|
300
|
-
]
|
315
|
+
]
|
316
|
+
}}
|
317
|
+
```
|
318
|
+
|
319
|
+
Think about spacing and composition. Use ample space between elements. Double the amount of space you think you need. Create a detailed diagram from the provided context and user prompt below.
|
301
320
|
|
302
|
-
|
321
|
+
Return a valid JSON object, where the drawing is in `elements` and your thought process is in `scratchpad`. If you can't make the whole diagram in one response, you can split it into multiple responses. If you need to simplify for brevity, simply do so in the `scratchpad` field. DO NOT add additional info in the `elements` field.
|
303
322
|
|
304
323
|
Diagram Description: {query}
|
305
324
|
|
@@ -5,7 +5,6 @@ import math
|
|
5
5
|
import mimetypes
|
6
6
|
import os
|
7
7
|
import queue
|
8
|
-
import re
|
9
8
|
import uuid
|
10
9
|
from dataclasses import dataclass
|
11
10
|
from datetime import datetime
|
@@ -57,7 +56,7 @@ model_to_prompt_size = {
|
|
57
56
|
"gemini-1.5-flash": 20000,
|
58
57
|
"gemini-1.5-pro": 20000,
|
59
58
|
# Anthropic Models
|
60
|
-
"claude-3-5-sonnet-
|
59
|
+
"claude-3-5-sonnet-20241022": 20000,
|
61
60
|
"claude-3-5-haiku-20241022": 20000,
|
62
61
|
# Offline Models
|
63
62
|
"bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000,
|
@@ -213,6 +212,8 @@ class ChatEvent(Enum):
|
|
213
212
|
REFERENCES = "references"
|
214
213
|
STATUS = "status"
|
215
214
|
METADATA = "metadata"
|
215
|
+
USAGE = "usage"
|
216
|
+
END_RESPONSE = "end_response"
|
216
217
|
|
217
218
|
|
218
219
|
def message_to_log(
|
khoj/routers/api_chat.py
CHANGED
@@ -667,27 +667,37 @@ async def chat(
|
|
667
667
|
finally:
|
668
668
|
yield event_delimiter
|
669
669
|
|
670
|
-
async def send_llm_response(response: str):
|
670
|
+
async def send_llm_response(response: str, usage: dict = None):
|
671
|
+
# Send Chat Response
|
671
672
|
async for result in send_event(ChatEvent.START_LLM_RESPONSE, ""):
|
672
673
|
yield result
|
673
674
|
async for result in send_event(ChatEvent.MESSAGE, response):
|
674
675
|
yield result
|
675
676
|
async for result in send_event(ChatEvent.END_LLM_RESPONSE, ""):
|
676
677
|
yield result
|
678
|
+
# Send Usage Metadata once llm interactions are complete
|
679
|
+
if usage:
|
680
|
+
async for event in send_event(ChatEvent.USAGE, usage):
|
681
|
+
yield event
|
682
|
+
async for result in send_event(ChatEvent.END_RESPONSE, ""):
|
683
|
+
yield result
|
677
684
|
|
678
685
|
def collect_telemetry():
|
679
686
|
# Gather chat response telemetry
|
680
687
|
nonlocal chat_metadata
|
681
688
|
latency = time.perf_counter() - start_time
|
682
689
|
cmd_set = set([cmd.value for cmd in conversation_commands])
|
690
|
+
cost = (tracer.get("usage", {}) or {}).get("cost", 0)
|
683
691
|
chat_metadata = chat_metadata or {}
|
684
692
|
chat_metadata["conversation_command"] = cmd_set
|
685
693
|
chat_metadata["agent"] = conversation.agent.slug if conversation and conversation.agent else None
|
686
694
|
chat_metadata["latency"] = f"{latency:.3f}"
|
687
695
|
chat_metadata["ttft_latency"] = f"{ttft:.3f}"
|
696
|
+
chat_metadata["usage"] = tracer.get("usage")
|
688
697
|
|
689
698
|
logger.info(f"Chat response time to first token: {ttft:.3f} seconds")
|
690
699
|
logger.info(f"Chat response total time: {latency:.3f} seconds")
|
700
|
+
logger.info(f"Chat response cost: ${cost:.5f}")
|
691
701
|
update_telemetry_state(
|
692
702
|
request=request,
|
693
703
|
telemetry_type="api",
|
@@ -699,7 +709,7 @@ async def chat(
|
|
699
709
|
)
|
700
710
|
|
701
711
|
if is_query_empty(q):
|
702
|
-
async for result in send_llm_response("Please ask your query to get started."):
|
712
|
+
async for result in send_llm_response("Please ask your query to get started.", tracer.get("usage")):
|
703
713
|
yield result
|
704
714
|
return
|
705
715
|
|
@@ -713,7 +723,7 @@ async def chat(
|
|
713
723
|
create_new=body.create_new,
|
714
724
|
)
|
715
725
|
if not conversation:
|
716
|
-
async for result in send_llm_response(f"Conversation {conversation_id} not found"):
|
726
|
+
async for result in send_llm_response(f"Conversation {conversation_id} not found", tracer.get("usage")):
|
717
727
|
yield result
|
718
728
|
return
|
719
729
|
conversation_id = conversation.id
|
@@ -777,7 +787,7 @@ async def chat(
|
|
777
787
|
await conversation_command_rate_limiter.update_and_check_if_valid(request, cmd)
|
778
788
|
q = q.replace(f"/{cmd.value}", "").strip()
|
779
789
|
except HTTPException as e:
|
780
|
-
async for result in send_llm_response(str(e.detail)):
|
790
|
+
async for result in send_llm_response(str(e.detail), tracer.get("usage")):
|
781
791
|
yield result
|
782
792
|
return
|
783
793
|
|
@@ -834,7 +844,7 @@ async def chat(
|
|
834
844
|
agent_has_entries = await EntryAdapters.aagent_has_entries(agent)
|
835
845
|
if len(file_filters) == 0 and not agent_has_entries:
|
836
846
|
response_log = "No files selected for summarization. Please add files using the section on the left."
|
837
|
-
async for result in send_llm_response(response_log):
|
847
|
+
async for result in send_llm_response(response_log, tracer.get("usage")):
|
838
848
|
yield result
|
839
849
|
else:
|
840
850
|
async for response in generate_summary_from_files(
|
@@ -853,7 +863,7 @@ async def chat(
|
|
853
863
|
else:
|
854
864
|
if isinstance(response, str):
|
855
865
|
response_log = response
|
856
|
-
async for result in send_llm_response(response):
|
866
|
+
async for result in send_llm_response(response, tracer.get("usage")):
|
857
867
|
yield result
|
858
868
|
|
859
869
|
await sync_to_async(save_to_conversation_log)(
|
@@ -880,7 +890,7 @@ async def chat(
|
|
880
890
|
conversation_config = await ConversationAdapters.aget_default_conversation_config(user)
|
881
891
|
model_type = conversation_config.model_type
|
882
892
|
formatted_help = help_message.format(model=model_type, version=state.khoj_version, device=get_device())
|
883
|
-
async for result in send_llm_response(formatted_help):
|
893
|
+
async for result in send_llm_response(formatted_help, tracer.get("usage")):
|
884
894
|
yield result
|
885
895
|
return
|
886
896
|
# Adding specification to search online specifically on khoj.dev pages.
|
@@ -895,7 +905,7 @@ async def chat(
|
|
895
905
|
except Exception as e:
|
896
906
|
logger.error(f"Error scheduling task {q} for {user.email}: {e}")
|
897
907
|
error_message = f"Unable to create automation. Ensure the automation doesn't already exist."
|
898
|
-
async for result in send_llm_response(error_message):
|
908
|
+
async for result in send_llm_response(error_message, tracer.get("usage")):
|
899
909
|
yield result
|
900
910
|
return
|
901
911
|
|
@@ -916,7 +926,7 @@ async def chat(
|
|
916
926
|
raw_query_files=raw_query_files,
|
917
927
|
tracer=tracer,
|
918
928
|
)
|
919
|
-
async for result in send_llm_response(llm_response):
|
929
|
+
async for result in send_llm_response(llm_response, tracer.get("usage")):
|
920
930
|
yield result
|
921
931
|
return
|
922
932
|
|
@@ -963,7 +973,7 @@ async def chat(
|
|
963
973
|
yield result
|
964
974
|
|
965
975
|
if conversation_commands == [ConversationCommand.Notes] and not await EntryAdapters.auser_has_entries(user):
|
966
|
-
async for result in send_llm_response(f"{no_entries_found.format()}"):
|
976
|
+
async for result in send_llm_response(f"{no_entries_found.format()}", tracer.get("usage")):
|
967
977
|
yield result
|
968
978
|
return
|
969
979
|
|
@@ -1105,7 +1115,7 @@ async def chat(
|
|
1105
1115
|
"detail": improved_image_prompt,
|
1106
1116
|
"image": None,
|
1107
1117
|
}
|
1108
|
-
async for result in send_llm_response(json.dumps(content_obj)):
|
1118
|
+
async for result in send_llm_response(json.dumps(content_obj), tracer.get("usage")):
|
1109
1119
|
yield result
|
1110
1120
|
return
|
1111
1121
|
|
@@ -1132,7 +1142,7 @@ async def chat(
|
|
1132
1142
|
"inferredQueries": [improved_image_prompt],
|
1133
1143
|
"image": generated_image,
|
1134
1144
|
}
|
1135
|
-
async for result in send_llm_response(json.dumps(content_obj)):
|
1145
|
+
async for result in send_llm_response(json.dumps(content_obj), tracer.get("usage")):
|
1136
1146
|
yield result
|
1137
1147
|
return
|
1138
1148
|
|
@@ -1166,7 +1176,7 @@ async def chat(
|
|
1166
1176
|
diagram_description = excalidraw_diagram_description
|
1167
1177
|
else:
|
1168
1178
|
error_message = "Failed to generate diagram. Please try again later."
|
1169
|
-
async for result in send_llm_response(error_message):
|
1179
|
+
async for result in send_llm_response(error_message, tracer.get("usage")):
|
1170
1180
|
yield result
|
1171
1181
|
|
1172
1182
|
await sync_to_async(save_to_conversation_log)(
|
@@ -1213,7 +1223,7 @@ async def chat(
|
|
1213
1223
|
tracer=tracer,
|
1214
1224
|
)
|
1215
1225
|
|
1216
|
-
async for result in send_llm_response(json.dumps(content_obj)):
|
1226
|
+
async for result in send_llm_response(json.dumps(content_obj), tracer.get("usage")):
|
1217
1227
|
yield result
|
1218
1228
|
return
|
1219
1229
|
|
@@ -1252,6 +1262,11 @@ async def chat(
|
|
1252
1262
|
if item is None:
|
1253
1263
|
async for result in send_event(ChatEvent.END_LLM_RESPONSE, ""):
|
1254
1264
|
yield result
|
1265
|
+
# Send Usage Metadata once llm interactions are complete
|
1266
|
+
async for event in send_event(ChatEvent.USAGE, tracer.get("usage")):
|
1267
|
+
yield event
|
1268
|
+
async for result in send_event(ChatEvent.END_RESPONSE, ""):
|
1269
|
+
yield result
|
1255
1270
|
logger.debug("Finished streaming response")
|
1256
1271
|
return
|
1257
1272
|
if not connection_alive or not continue_stream:
|
khoj/routers/auth.py
CHANGED
@@ -89,7 +89,7 @@ async def login_magic_link(request: Request, form: MagicLinkForm):
|
|
89
89
|
update_telemetry_state(
|
90
90
|
request=request,
|
91
91
|
telemetry_type="api",
|
92
|
-
api="
|
92
|
+
api="create_user__email",
|
93
93
|
metadata={"server_id": str(user.uuid)},
|
94
94
|
)
|
95
95
|
logger.log(logging.INFO, f"🥳 New User Created: {user.uuid}")
|
@@ -174,7 +174,7 @@ async def auth(request: Request):
|
|
174
174
|
update_telemetry_state(
|
175
175
|
request=request,
|
176
176
|
telemetry_type="api",
|
177
|
-
api="
|
177
|
+
api="create_user__google",
|
178
178
|
metadata={"server_id": str(khoj_user.uuid)},
|
179
179
|
)
|
180
180
|
logger.log(logging.INFO, f"🥳 New User Created: {khoj_user.uuid}")
|
khoj/routers/helpers.py
CHANGED
@@ -753,7 +753,11 @@ async def generate_excalidraw_diagram(
|
|
753
753
|
yield None, None
|
754
754
|
return
|
755
755
|
|
756
|
-
|
756
|
+
scratchpad = excalidraw_diagram_description.get("scratchpad")
|
757
|
+
|
758
|
+
inferred_queries = f"Instruction: {better_diagram_description_prompt}\n\nScratchpad: {scratchpad}"
|
759
|
+
|
760
|
+
yield inferred_queries, excalidraw_diagram_description.get("elements")
|
757
761
|
|
758
762
|
|
759
763
|
async def generate_better_diagram_description(
|
@@ -822,7 +826,7 @@ async def generate_excalidraw_diagram_from_description(
|
|
822
826
|
user: KhojUser = None,
|
823
827
|
agent: Agent = None,
|
824
828
|
tracer: dict = {},
|
825
|
-
) -> str:
|
829
|
+
) -> Dict[str, Any]:
|
826
830
|
personality_context = (
|
827
831
|
prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
|
828
832
|
)
|
@@ -838,10 +842,18 @@ async def generate_excalidraw_diagram_from_description(
|
|
838
842
|
)
|
839
843
|
raw_response = clean_json(raw_response)
|
840
844
|
try:
|
845
|
+
# Expect response to have `elements` and `scratchpad` keys
|
841
846
|
response: Dict[str, str] = json.loads(raw_response)
|
847
|
+
if (
|
848
|
+
not response
|
849
|
+
or not isinstance(response, Dict)
|
850
|
+
or not response.get("elements")
|
851
|
+
or not response.get("scratchpad")
|
852
|
+
):
|
853
|
+
raise AssertionError(f"Invalid response for generating Excalidraw diagram: {response}")
|
842
854
|
except Exception:
|
843
855
|
raise AssertionError(f"Invalid response for generating Excalidraw diagram: {raw_response}")
|
844
|
-
if not response or not isinstance(response, List) or not isinstance(response[0], Dict):
|
856
|
+
if not response or not isinstance(response["elements"], List) or not isinstance(response["elements"][0], Dict):
|
845
857
|
# TODO Some additional validation here that it's a valid Excalidraw diagram
|
846
858
|
raise AssertionError(f"Invalid response for improving diagram description: {response}")
|
847
859
|
|
@@ -1770,6 +1782,7 @@ Manage your automations [here](/automations).
|
|
1770
1782
|
class MessageProcessor:
|
1771
1783
|
def __init__(self):
|
1772
1784
|
self.references = {}
|
1785
|
+
self.usage = {}
|
1773
1786
|
self.raw_response = ""
|
1774
1787
|
|
1775
1788
|
def convert_message_chunk_to_json(self, raw_chunk: str) -> Dict[str, Any]:
|
@@ -1793,6 +1806,8 @@ class MessageProcessor:
|
|
1793
1806
|
chunk_type = ChatEvent(chunk["type"])
|
1794
1807
|
if chunk_type == ChatEvent.REFERENCES:
|
1795
1808
|
self.references = chunk["data"]
|
1809
|
+
elif chunk_type == ChatEvent.USAGE:
|
1810
|
+
self.usage = chunk["data"]
|
1796
1811
|
elif chunk_type == ChatEvent.MESSAGE:
|
1797
1812
|
chunk_data = chunk["data"]
|
1798
1813
|
if isinstance(chunk_data, dict):
|
@@ -1837,7 +1852,7 @@ async def read_chat_stream(response_iterator: AsyncGenerator[str, None]) -> Dict
|
|
1837
1852
|
if buffer:
|
1838
1853
|
processor.process_message_chunk(buffer)
|
1839
1854
|
|
1840
|
-
return {"response": processor.raw_response, "references": processor.references}
|
1855
|
+
return {"response": processor.raw_response, "references": processor.references, "usage": processor.usage}
|
1841
1856
|
|
1842
1857
|
|
1843
1858
|
def get_user_config(user: KhojUser, request: Request, is_detailed: bool = False):
|
khoj/utils/constants.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
from pathlib import Path
|
2
|
+
from typing import Dict
|
2
3
|
|
3
4
|
app_root_directory = Path(__file__).parent.parent.parent
|
4
5
|
web_directory = app_root_directory / "khoj/interface/web/"
|
@@ -31,3 +32,19 @@ default_config = {
|
|
31
32
|
"image": {"encoder": "sentence-transformers/clip-ViT-B-32", "model_directory": "~/.khoj/search/image/"},
|
32
33
|
},
|
33
34
|
}
|
35
|
+
|
36
|
+
model_to_cost: Dict[str, Dict[str, float]] = {
|
37
|
+
# OpenAI Pricing: https://openai.com/api/pricing/
|
38
|
+
"gpt-4o": {"input": 2.50, "output": 10.00},
|
39
|
+
"gpt-4o-mini": {"input": 0.15, "output": 0.60},
|
40
|
+
"o1-preview": {"input": 15.0, "output": 60.00},
|
41
|
+
"o1-mini": {"input": 3.0, "output": 12.0},
|
42
|
+
# Gemini Pricing: https://ai.google.dev/pricing
|
43
|
+
"gemini-1.5-flash": {"input": 0.075, "output": 0.30},
|
44
|
+
"gemini-1.5-flash-002": {"input": 0.075, "output": 0.30},
|
45
|
+
"gemini-1.5-pro": {"input": 1.25, "output": 5.00},
|
46
|
+
"gemini-1.5-pro-002": {"input": 1.25, "output": 5.00},
|
47
|
+
# Anthropic Pricing: https://www.anthropic.com/pricing#anthropic-api_
|
48
|
+
"claude-3-5-sonnet-20241022": {"input": 3.0, "output": 15.0},
|
49
|
+
"claude-3-5-haiku-20241022": {"input": 1.0, "output": 5.0},
|
50
|
+
}
|
khoj/utils/helpers.py
CHANGED
@@ -540,3 +540,27 @@ def get_country_code_from_timezone(tz: str) -> str:
|
|
540
540
|
def get_country_name_from_timezone(tz: str) -> str:
|
541
541
|
"""Get country name from timezone"""
|
542
542
|
return country_names.get(get_country_code_from_timezone(tz), "United States")
|
543
|
+
|
544
|
+
|
545
|
+
def get_cost_of_chat_message(model_name: str, input_tokens: int = 0, output_tokens: int = 0, prev_cost: float = 0.0):
|
546
|
+
"""
|
547
|
+
Calculate cost of chat message based on input and output tokens
|
548
|
+
"""
|
549
|
+
|
550
|
+
# Calculate cost of input and output tokens. Costs are per million tokens
|
551
|
+
input_cost = constants.model_to_cost.get(model_name, {}).get("input", 0) * (input_tokens / 1e6)
|
552
|
+
output_cost = constants.model_to_cost.get(model_name, {}).get("output", 0) * (output_tokens / 1e6)
|
553
|
+
|
554
|
+
return input_cost + output_cost + prev_cost
|
555
|
+
|
556
|
+
|
557
|
+
def get_chat_usage_metrics(model_name: str, input_tokens: int = 0, output_tokens: int = 0, usage: dict = {}):
|
558
|
+
"""
|
559
|
+
Get usage metrics for chat message based on input and output tokens
|
560
|
+
"""
|
561
|
+
prev_usage = usage or {"input_tokens": 0, "output_tokens": 0, "cost": 0.0}
|
562
|
+
return {
|
563
|
+
"input_tokens": prev_usage["input_tokens"] + input_tokens,
|
564
|
+
"output_tokens": prev_usage["output_tokens"] + output_tokens,
|
565
|
+
"cost": get_cost_of_chat_message(model_name, input_tokens, output_tokens, prev_cost=prev_usage["cost"]),
|
566
|
+
}
|