khoj 1.16.1.dev15__py3-none-any.whl → 1.17.1.dev229__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/configure.py +6 -6
- khoj/database/adapters/__init__.py +56 -12
- khoj/database/migrations/0053_agent_style_color_agent_style_icon.py +61 -0
- khoj/database/migrations/0054_alter_agent_style_color.py +38 -0
- khoj/database/models/__init__.py +35 -0
- khoj/interface/web/assets/icons/favicon-128x128.png +0 -0
- khoj/interface/web/assets/icons/favicon-256x256.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways.svg +31 -5384
- khoj/interface/web/assets/icons/khoj.svg +26 -0
- khoj/interface/web/chat.html +191 -301
- khoj/interface/web/content_source_computer_input.html +3 -3
- khoj/interface/web/content_source_github_input.html +1 -1
- khoj/interface/web/content_source_notion_input.html +1 -1
- khoj/interface/web/public_conversation.html +1 -1
- khoj/interface/web/search.html +2 -2
- khoj/interface/web/{config.html → settings.html} +30 -30
- khoj/interface/web/utils.html +1 -1
- khoj/processor/content/docx/docx_to_entries.py +4 -9
- khoj/processor/content/github/github_to_entries.py +1 -3
- khoj/processor/content/images/image_to_entries.py +4 -9
- khoj/processor/content/markdown/markdown_to_entries.py +4 -9
- khoj/processor/content/notion/notion_to_entries.py +1 -3
- khoj/processor/content/org_mode/org_to_entries.py +4 -9
- khoj/processor/content/pdf/pdf_to_entries.py +4 -9
- khoj/processor/content/plaintext/plaintext_to_entries.py +4 -9
- khoj/processor/content/text_to_entries.py +1 -3
- khoj/processor/conversation/anthropic/anthropic_chat.py +10 -4
- khoj/processor/conversation/offline/chat_model.py +19 -7
- khoj/processor/conversation/offline/utils.py +2 -0
- khoj/processor/conversation/openai/gpt.py +9 -3
- khoj/processor/conversation/prompts.py +56 -25
- khoj/processor/conversation/utils.py +5 -6
- khoj/processor/tools/online_search.py +13 -7
- khoj/routers/api.py +60 -10
- khoj/routers/api_agents.py +3 -1
- khoj/routers/api_chat.py +335 -562
- khoj/routers/api_content.py +538 -0
- khoj/routers/api_model.py +156 -0
- khoj/routers/helpers.py +339 -26
- khoj/routers/notion.py +2 -8
- khoj/routers/web_client.py +43 -256
- khoj/search_type/text_search.py +5 -4
- khoj/utils/fs_syncer.py +4 -2
- khoj/utils/rawconfig.py +6 -1
- {khoj-1.16.1.dev15.dist-info → khoj-1.17.1.dev229.dist-info}/METADATA +3 -3
- {khoj-1.16.1.dev15.dist-info → khoj-1.17.1.dev229.dist-info}/RECORD +51 -49
- khoj/interface/web/assets/icons/favicon.icns +0 -0
- khoj/routers/api_config.py +0 -434
- khoj/routers/indexer.py +0 -349
- {khoj-1.16.1.dev15.dist-info → khoj-1.17.1.dev229.dist-info}/WHEEL +0 -0
- {khoj-1.16.1.dev15.dist-info → khoj-1.17.1.dev229.dist-info}/entry_points.txt +0 -0
- {khoj-1.16.1.dev15.dist-info → khoj-1.17.1.dev229.dist-info}/licenses/LICENSE +0 -0
|
@@ -19,8 +19,8 @@ You were created by Khoj Inc. with the following capabilities:
|
|
|
19
19
|
- Sometimes the user will share personal information that needs to be remembered, like an account ID or a residential address. These can be acknowledged with a simple "Got it" or "Okay".
|
|
20
20
|
- Provide inline references to quotes from the user's notes or any web pages you refer to in your responses in markdown format. For example, "The farmer had ten sheep. [1](https://example.com)". *ALWAYS CITE YOUR SOURCES AND PROVIDE REFERENCES*. Add them inline to directly support your claim.
|
|
21
21
|
|
|
22
|
-
Note: More information about you, the company or Khoj apps
|
|
23
|
-
Today is {current_date} in UTC.
|
|
22
|
+
Note: More information about you, the company or Khoj apps can be found at https://khoj.dev.
|
|
23
|
+
Today is {day_of_week}, {current_date} in UTC.
|
|
24
24
|
""".strip()
|
|
25
25
|
)
|
|
26
26
|
|
|
@@ -39,7 +39,7 @@ You were created by Khoj Inc. with the following capabilities:
|
|
|
39
39
|
- Ask crisp follow-up questions to get additional context, when the answer cannot be inferred from the provided notes or past conversations.
|
|
40
40
|
- Sometimes the user will share personal information that needs to be remembered, like an account ID or a residential address. These can be acknowledged with a simple "Got it" or "Okay".
|
|
41
41
|
|
|
42
|
-
Today is {current_date} in UTC.
|
|
42
|
+
Today is {day_of_week}, {current_date} in UTC.
|
|
43
43
|
|
|
44
44
|
Instructions:\n{bio}
|
|
45
45
|
""".strip()
|
|
@@ -79,10 +79,12 @@ You are Khoj, a smart, inquisitive and helpful personal assistant.
|
|
|
79
79
|
- Use your general knowledge and past conversation with the user as context to inform your responses.
|
|
80
80
|
- If you do not know the answer, say 'I don't know.'
|
|
81
81
|
- Think step-by-step and ask questions to get the necessary information to answer the user's question.
|
|
82
|
+
- Ask crisp follow-up questions to get additional context, when the answer cannot be inferred from the provided information or past conversations.
|
|
82
83
|
- Do not print verbatim Notes unless necessary.
|
|
83
84
|
|
|
84
|
-
|
|
85
|
-
|
|
85
|
+
Note: More information about you, the company or Khoj apps can be found at https://khoj.dev.
|
|
86
|
+
Today is {day_of_week}, {current_date} in UTC.
|
|
87
|
+
""".strip()
|
|
86
88
|
)
|
|
87
89
|
|
|
88
90
|
custom_system_prompt_offline_chat = PromptTemplate.from_template(
|
|
@@ -91,12 +93,14 @@ You are {name}, a personal agent on Khoj.
|
|
|
91
93
|
- Use your general knowledge and past conversation with the user as context to inform your responses.
|
|
92
94
|
- If you do not know the answer, say 'I don't know.'
|
|
93
95
|
- Think step-by-step and ask questions to get the necessary information to answer the user's question.
|
|
96
|
+
- Ask crisp follow-up questions to get additional context, when the answer cannot be inferred from the provided information or past conversations.
|
|
94
97
|
- Do not print verbatim Notes unless necessary.
|
|
95
98
|
|
|
96
|
-
|
|
99
|
+
Note: More information about you, the company or Khoj apps can be found at https://khoj.dev.
|
|
100
|
+
Today is {day_of_week}, {current_date} in UTC.
|
|
97
101
|
|
|
98
102
|
Instructions:\n{bio}
|
|
99
|
-
|
|
103
|
+
""".strip()
|
|
100
104
|
)
|
|
101
105
|
|
|
102
106
|
## Notes Conversation
|
|
@@ -106,13 +110,15 @@ notes_conversation = PromptTemplate.from_template(
|
|
|
106
110
|
Use my personal notes and our past conversations to inform your response.
|
|
107
111
|
Ask crisp follow-up questions to get additional context, when a helpful response cannot be provided from the provided notes or past conversations.
|
|
108
112
|
|
|
109
|
-
Notes:
|
|
113
|
+
User's Notes:
|
|
110
114
|
{references}
|
|
111
115
|
""".strip()
|
|
112
116
|
)
|
|
113
117
|
|
|
114
118
|
notes_conversation_offline = PromptTemplate.from_template(
|
|
115
119
|
"""
|
|
120
|
+
Use my personal notes and our past conversations to inform your response.
|
|
121
|
+
|
|
116
122
|
User's Notes:
|
|
117
123
|
{references}
|
|
118
124
|
""".strip()
|
|
@@ -174,6 +180,15 @@ Information from the internet:
|
|
|
174
180
|
""".strip()
|
|
175
181
|
)
|
|
176
182
|
|
|
183
|
+
online_search_conversation_offline = PromptTemplate.from_template(
|
|
184
|
+
"""
|
|
185
|
+
Use this up-to-date information from the internet to inform your response.
|
|
186
|
+
|
|
187
|
+
Information from the internet:
|
|
188
|
+
{online_results}
|
|
189
|
+
""".strip()
|
|
190
|
+
)
|
|
191
|
+
|
|
177
192
|
## Query prompt
|
|
178
193
|
## --
|
|
179
194
|
query_prompt = PromptTemplate.from_template(
|
|
@@ -186,15 +201,16 @@ Query: {query}""".strip()
|
|
|
186
201
|
## --
|
|
187
202
|
extract_questions_offline = PromptTemplate.from_template(
|
|
188
203
|
"""
|
|
189
|
-
You are Khoj, an extremely smart and helpful search assistant with the ability to retrieve information from the user's notes.
|
|
190
|
-
|
|
204
|
+
You are Khoj, an extremely smart and helpful search assistant with the ability to retrieve information from the user's notes. Disregard online search requests.
|
|
205
|
+
Construct search queries to retrieve relevant information to answer the user's question.
|
|
206
|
+
- You will be provided past questions(Q) and answers(Khoj) for context.
|
|
191
207
|
- Try to be as specific as possible. Instead of saying "they" or "it" or "he", use proper nouns like name of the person or thing you are referring to.
|
|
192
208
|
- Add as much context from the previous questions and answers as required into your search queries.
|
|
193
209
|
- Break messages into multiple search queries when required to retrieve the relevant information.
|
|
194
210
|
- Add date filters to your search queries from questions and answers when required to retrieve the relevant information.
|
|
195
211
|
- Share relevant search queries as a JSON list of strings. Do not say anything else.
|
|
196
212
|
|
|
197
|
-
Current Date: {current_date}
|
|
213
|
+
Current Date: {day_of_week}, {current_date}
|
|
198
214
|
User's Location: {location}
|
|
199
215
|
|
|
200
216
|
Examples:
|
|
@@ -232,7 +248,8 @@ Q: {query}
|
|
|
232
248
|
|
|
233
249
|
extract_questions = PromptTemplate.from_template(
|
|
234
250
|
"""
|
|
235
|
-
You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes. Disregard online search requests.
|
|
251
|
+
You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes. Disregard online search requests.
|
|
252
|
+
Construct search queries to retrieve relevant information to answer the user's question.
|
|
236
253
|
- You will be provided past questions(Q) and answers(A) for context.
|
|
237
254
|
- Add as much context from the previous questions and answers as required into your search queries.
|
|
238
255
|
- Break messages into multiple search queries when required to retrieve the relevant information.
|
|
@@ -282,8 +299,9 @@ Khoj:
|
|
|
282
299
|
|
|
283
300
|
extract_questions_anthropic_system_prompt = PromptTemplate.from_template(
|
|
284
301
|
"""
|
|
285
|
-
You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes. Disregard online search requests.
|
|
286
|
-
|
|
302
|
+
You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes. Disregard online search requests.
|
|
303
|
+
Construct search queries to retrieve relevant information to answer the user's question.
|
|
304
|
+
- You will be provided past questions(User), extracted queries(Assistant) and answers(A) for context.
|
|
287
305
|
- Add as much context from the previous questions and answers as required into your search queries.
|
|
288
306
|
- Break messages into multiple search queries when required to retrieve the relevant information.
|
|
289
307
|
- Add date filters to your search queries from questions and answers when required to retrieve the relevant information.
|
|
@@ -297,15 +315,19 @@ Here are some examples of how you can construct search queries to answer the use
|
|
|
297
315
|
|
|
298
316
|
User: How was my trip to Cambodia?
|
|
299
317
|
Assistant: {{"queries": ["How was my trip to Cambodia?"]}}
|
|
318
|
+
A: The trip was amazing. You went to the Angkor Wat temple and it was beautiful.
|
|
300
319
|
|
|
301
320
|
User: What national parks did I go to last year?
|
|
302
321
|
Assistant: {{"queries": ["National park I visited in {last_new_year} dt>='{last_new_year_date}' dt<'{current_new_year_date}'"]}}
|
|
322
|
+
A: You visited the Grand Canyon and Yellowstone National Park in {last_new_year}.
|
|
303
323
|
|
|
304
324
|
User: How can you help me?
|
|
305
325
|
Assistant: {{"queries": ["Social relationships", "Physical and mental health", "Education and career", "Personal life goals and habits"]}}
|
|
326
|
+
A: I can help you live healthier and happier across work and personal life
|
|
306
327
|
|
|
307
328
|
User: Who all did I meet here yesterday?
|
|
308
329
|
Assistant: {{"queries": ["Met in {location} on {yesterday_date} dt>='{yesterday_date}' dt<'{current_date}'"]}}
|
|
330
|
+
A: Yesterday's note mentions your visit to your local beach with Ram and Shyam.
|
|
309
331
|
""".strip()
|
|
310
332
|
)
|
|
311
333
|
|
|
@@ -319,7 +341,11 @@ Assistant:
|
|
|
319
341
|
""".strip()
|
|
320
342
|
)
|
|
321
343
|
|
|
322
|
-
system_prompt_extract_relevant_information = """
|
|
344
|
+
system_prompt_extract_relevant_information = """
|
|
345
|
+
As a professional analyst, create a comprehensive report of the most relevant information from a web page in response to a user's query.
|
|
346
|
+
The text provided is directly from within the web page.
|
|
347
|
+
The report you create should be multiple paragraphs, and it should represent the content of the website.
|
|
348
|
+
Tell the user exactly what the website says in response to their query, while adhering to these guidelines:
|
|
323
349
|
|
|
324
350
|
1. Answer the user's query as specifically as possible. Include many supporting details from the website.
|
|
325
351
|
2. Craft a report that is detailed, thorough, in-depth, and complex, while maintaining clarity.
|
|
@@ -340,7 +366,11 @@ Collate only relevant information from the website to answer the target query.
|
|
|
340
366
|
""".strip()
|
|
341
367
|
)
|
|
342
368
|
|
|
343
|
-
system_prompt_extract_relevant_summary = """
|
|
369
|
+
system_prompt_extract_relevant_summary = """
|
|
370
|
+
As a professional analyst, create a comprehensive report of the most relevant information from the document in response to a user's query.
|
|
371
|
+
The text provided is directly from within the document.
|
|
372
|
+
The report you create should be multiple paragraphs, and it should represent the content of the document.
|
|
373
|
+
Tell the user exactly what the document says in response to their query, while adhering to these guidelines:
|
|
344
374
|
|
|
345
375
|
1. Answer the user's query as specifically as possible. Include many supporting details from the document.
|
|
346
376
|
2. Craft a report that is detailed, thorough, in-depth, and complex, while maintaining clarity.
|
|
@@ -363,11 +393,13 @@ Collate only relevant information from the document to answer the target query.
|
|
|
363
393
|
|
|
364
394
|
pick_relevant_output_mode = PromptTemplate.from_template(
|
|
365
395
|
"""
|
|
366
|
-
You are Khoj, an excellent analyst for selecting the correct way to respond to a user's query.
|
|
396
|
+
You are Khoj, an excellent analyst for selecting the correct way to respond to a user's query.
|
|
397
|
+
You have access to a limited set of modes for your response.
|
|
398
|
+
You can only use one of these modes.
|
|
367
399
|
|
|
368
400
|
{modes}
|
|
369
401
|
|
|
370
|
-
Here are some
|
|
402
|
+
Here are some examples:
|
|
371
403
|
|
|
372
404
|
Example:
|
|
373
405
|
Chat History:
|
|
@@ -383,7 +415,7 @@ User: I'm having trouble deciding which laptop to get. I want something with at
|
|
|
383
415
|
AI: I can help with that. I see online that there is a new model of the Dell XPS 15 that meets your requirements.
|
|
384
416
|
|
|
385
417
|
Q: What are the specs of the new Dell XPS 15?
|
|
386
|
-
Khoj:
|
|
418
|
+
Khoj: text
|
|
387
419
|
|
|
388
420
|
Example:
|
|
389
421
|
Chat History:
|
|
@@ -391,7 +423,7 @@ User: Where did I go on my last vacation?
|
|
|
391
423
|
AI: You went to Jordan and visited Petra, the Dead Sea, and Wadi Rum.
|
|
392
424
|
|
|
393
425
|
Q: Remind me who did I go with on that trip?
|
|
394
|
-
Khoj:
|
|
426
|
+
Khoj: text
|
|
395
427
|
|
|
396
428
|
Example:
|
|
397
429
|
Chat History:
|
|
@@ -399,7 +431,7 @@ User: How's the weather outside? Current Location: Bali, Indonesia
|
|
|
399
431
|
AI: It's currently 28°C and partly cloudy in Bali.
|
|
400
432
|
|
|
401
433
|
Q: Share a painting using the weather for Bali every morning.
|
|
402
|
-
Khoj:
|
|
434
|
+
Khoj: automation
|
|
403
435
|
|
|
404
436
|
Now it's your turn to pick the mode you would like to use to answer the user's question. Provide your response as a string.
|
|
405
437
|
|
|
@@ -422,7 +454,7 @@ Which of the data sources listed below you would use to answer the user's questi
|
|
|
422
454
|
|
|
423
455
|
{tools}
|
|
424
456
|
|
|
425
|
-
Here are some
|
|
457
|
+
Here are some examples:
|
|
426
458
|
|
|
427
459
|
Example:
|
|
428
460
|
Chat History:
|
|
@@ -533,10 +565,10 @@ You are Khoj, an advanced google search assistant. You are tasked with construct
|
|
|
533
565
|
- Break messages into multiple search queries when required to retrieve the relevant information.
|
|
534
566
|
- Use site: google search operators when appropriate
|
|
535
567
|
- You have access to the the whole internet to retrieve information.
|
|
536
|
-
- Official, up-to-date information about you, Khoj, is available at site:khoj.dev
|
|
568
|
+
- Official, up-to-date information about you, Khoj, is available at site:khoj.dev, github or pypi.
|
|
537
569
|
|
|
538
570
|
What Google searches, if any, will you need to perform to answer the user's question?
|
|
539
|
-
Provide search queries as a list of strings in a JSON object.
|
|
571
|
+
Provide search queries as a list of strings in a JSON object. Do not wrap the json in a codeblock.
|
|
540
572
|
Current Date: {current_date}
|
|
541
573
|
User's Location: {location}
|
|
542
574
|
|
|
@@ -589,7 +621,6 @@ Q: How many oranges would fit in NASA's Saturn V rocket?
|
|
|
589
621
|
Khoj: {{"queries": ["volume of an orange", "volume of saturn v rocket"]}}
|
|
590
622
|
|
|
591
623
|
Now it's your turn to construct Google search queries to answer the user's question. Provide them as a list of strings in a JSON object. Do not say anything else.
|
|
592
|
-
Now it's your turn to construct a search query for Google to answer the user's question.
|
|
593
624
|
History:
|
|
594
625
|
{chat_history}
|
|
595
626
|
|
|
@@ -62,10 +62,6 @@ class ThreadedGenerator:
|
|
|
62
62
|
self.queue.put(data)
|
|
63
63
|
|
|
64
64
|
def close(self):
|
|
65
|
-
if self.compiled_references and len(self.compiled_references) > 0:
|
|
66
|
-
self.queue.put(f"### compiled references:{json.dumps(self.compiled_references)}")
|
|
67
|
-
if self.online_results and len(self.online_results) > 0:
|
|
68
|
-
self.queue.put(f"### compiled references:{json.dumps(self.online_results)}")
|
|
69
65
|
self.queue.put(StopIteration)
|
|
70
66
|
|
|
71
67
|
|
|
@@ -186,7 +182,7 @@ def generate_chatml_messages_with_context(
|
|
|
186
182
|
|
|
187
183
|
def truncate_messages(
|
|
188
184
|
messages: list[ChatMessage],
|
|
189
|
-
max_prompt_size,
|
|
185
|
+
max_prompt_size: int,
|
|
190
186
|
model_name: str,
|
|
191
187
|
loaded_model: Optional[Llama] = None,
|
|
192
188
|
tokenizer_name=None,
|
|
@@ -232,7 +228,8 @@ def truncate_messages(
|
|
|
232
228
|
tokens = sum([len(encoder.encode(message.content)) for message in messages if type(message.content) == str])
|
|
233
229
|
|
|
234
230
|
# Drop older messages until under max supported prompt size by model
|
|
235
|
-
|
|
231
|
+
# Reserves 4 tokens to demarcate each message (e.g <|im_start|>user, <|im_end|>, <|endoftext|> etc.)
|
|
232
|
+
while (tokens + system_message_tokens + 4 * len(messages)) > max_prompt_size and len(messages) > 1:
|
|
236
233
|
messages.pop()
|
|
237
234
|
tokens = sum([len(encoder.encode(message.content)) for message in messages if type(message.content) == str])
|
|
238
235
|
|
|
@@ -254,6 +251,8 @@ def truncate_messages(
|
|
|
254
251
|
f"Truncate current message to fit within max prompt size of {max_prompt_size} supported by {model_name} model:\n {truncated_message}"
|
|
255
252
|
)
|
|
256
253
|
|
|
254
|
+
if system_message:
|
|
255
|
+
system_message.role = "user" if "gemma-2" in model_name else "system"
|
|
257
256
|
return messages + [system_message] if system_message else messages
|
|
258
257
|
|
|
259
258
|
|
|
@@ -11,6 +11,7 @@ from bs4 import BeautifulSoup
|
|
|
11
11
|
from markdownify import markdownify
|
|
12
12
|
|
|
13
13
|
from khoj.routers.helpers import (
|
|
14
|
+
ChatEvent,
|
|
14
15
|
extract_relevant_info,
|
|
15
16
|
generate_online_subqueries,
|
|
16
17
|
infer_webpage_urls,
|
|
@@ -56,7 +57,8 @@ async def search_online(
|
|
|
56
57
|
query += " ".join(custom_filters)
|
|
57
58
|
if not is_internet_connected():
|
|
58
59
|
logger.warn("Cannot search online as not connected to internet")
|
|
59
|
-
|
|
60
|
+
yield {}
|
|
61
|
+
return
|
|
60
62
|
|
|
61
63
|
# Breakdown the query into subqueries to get the correct answer
|
|
62
64
|
subqueries = await generate_online_subqueries(query, conversation_history, location)
|
|
@@ -66,7 +68,8 @@ async def search_online(
|
|
|
66
68
|
logger.info(f"🌐 Searching the Internet for {list(subqueries)}")
|
|
67
69
|
if send_status_func:
|
|
68
70
|
subqueries_str = "\n- " + "\n- ".join(list(subqueries))
|
|
69
|
-
|
|
71
|
+
async for event in send_status_func(f"**Searching the Internet for**: {subqueries_str}"):
|
|
72
|
+
yield {ChatEvent.STATUS: event}
|
|
70
73
|
|
|
71
74
|
with timer(f"Internet searches for {list(subqueries)} took", logger):
|
|
72
75
|
search_func = search_with_google if SERPER_DEV_API_KEY else search_with_jina
|
|
@@ -89,7 +92,8 @@ async def search_online(
|
|
|
89
92
|
logger.info(f"🌐👀 Reading web pages at: {list(webpage_links)}")
|
|
90
93
|
if send_status_func:
|
|
91
94
|
webpage_links_str = "\n- " + "\n- ".join(list(webpage_links))
|
|
92
|
-
|
|
95
|
+
async for event in send_status_func(f"**Reading web pages**: {webpage_links_str}"):
|
|
96
|
+
yield {ChatEvent.STATUS: event}
|
|
93
97
|
tasks = [read_webpage_and_extract_content(subquery, link, content) for link, subquery, content in webpages]
|
|
94
98
|
results = await asyncio.gather(*tasks)
|
|
95
99
|
|
|
@@ -98,7 +102,7 @@ async def search_online(
|
|
|
98
102
|
if webpage_extract is not None:
|
|
99
103
|
response_dict[subquery]["webpages"] = {"link": url, "snippet": webpage_extract}
|
|
100
104
|
|
|
101
|
-
|
|
105
|
+
yield response_dict
|
|
102
106
|
|
|
103
107
|
|
|
104
108
|
async def search_with_google(query: str) -> Tuple[str, Dict[str, List[Dict]]]:
|
|
@@ -127,13 +131,15 @@ async def read_webpages(
|
|
|
127
131
|
"Infer web pages to read from the query and extract relevant information from them"
|
|
128
132
|
logger.info(f"Inferring web pages to read")
|
|
129
133
|
if send_status_func:
|
|
130
|
-
|
|
134
|
+
async for event in send_status_func(f"**Inferring web pages to read**"):
|
|
135
|
+
yield {ChatEvent.STATUS: event}
|
|
131
136
|
urls = await infer_webpage_urls(query, conversation_history, location)
|
|
132
137
|
|
|
133
138
|
logger.info(f"Reading web pages at: {urls}")
|
|
134
139
|
if send_status_func:
|
|
135
140
|
webpage_links_str = "\n- " + "\n- ".join(list(urls))
|
|
136
|
-
|
|
141
|
+
async for event in send_status_func(f"**Reading web pages**: {webpage_links_str}"):
|
|
142
|
+
yield {ChatEvent.STATUS: event}
|
|
137
143
|
tasks = [read_webpage_and_extract_content(query, url) for url in urls]
|
|
138
144
|
results = await asyncio.gather(*tasks)
|
|
139
145
|
|
|
@@ -141,7 +147,7 @@ async def read_webpages(
|
|
|
141
147
|
response[query]["webpages"] = [
|
|
142
148
|
{"query": q, "link": url, "snippet": web_extract} for q, web_extract, url in results if web_extract is not None
|
|
143
149
|
]
|
|
144
|
-
|
|
150
|
+
yield response
|
|
145
151
|
|
|
146
152
|
|
|
147
153
|
async def read_webpage_and_extract_content(
|
khoj/routers/api.py
CHANGED
|
@@ -6,7 +6,6 @@ import os
|
|
|
6
6
|
import threading
|
|
7
7
|
import time
|
|
8
8
|
import uuid
|
|
9
|
-
from random import random
|
|
10
9
|
from typing import Any, Callable, List, Optional, Union
|
|
11
10
|
|
|
12
11
|
import cron_descriptor
|
|
@@ -20,6 +19,7 @@ from fastapi.responses import Response
|
|
|
20
19
|
from starlette.authentication import has_required_scope, requires
|
|
21
20
|
|
|
22
21
|
from khoj.configure import initialize_content
|
|
22
|
+
from khoj.database import adapters
|
|
23
23
|
from khoj.database.adapters import (
|
|
24
24
|
AutomationAdapters,
|
|
25
25
|
ConversationAdapters,
|
|
@@ -37,9 +37,11 @@ from khoj.processor.conversation.openai.gpt import extract_questions
|
|
|
37
37
|
from khoj.processor.conversation.openai.whisper import transcribe_audio
|
|
38
38
|
from khoj.routers.helpers import (
|
|
39
39
|
ApiUserRateLimiter,
|
|
40
|
+
ChatEvent,
|
|
40
41
|
CommonQueryParams,
|
|
41
42
|
ConversationCommandRateLimiter,
|
|
42
43
|
acreate_title_from_query,
|
|
44
|
+
get_user_config,
|
|
43
45
|
schedule_automation,
|
|
44
46
|
update_telemetry_state,
|
|
45
47
|
)
|
|
@@ -190,7 +192,7 @@ def update(
|
|
|
190
192
|
):
|
|
191
193
|
user = request.user.object
|
|
192
194
|
if not state.config:
|
|
193
|
-
error_msg = f"🚨 Khoj is not configured.\nConfigure it via http://localhost:42110/
|
|
195
|
+
error_msg = f"🚨 Khoj is not configured.\nConfigure it via http://localhost:42110/settings, plugins or by editing {state.config_file}."
|
|
194
196
|
logger.warning(error_msg)
|
|
195
197
|
raise HTTPException(status_code=500, detail=error_msg)
|
|
196
198
|
try:
|
|
@@ -223,10 +225,10 @@ async def transcribe(
|
|
|
223
225
|
common: CommonQueryParams,
|
|
224
226
|
file: UploadFile = File(...),
|
|
225
227
|
rate_limiter_per_minute=Depends(
|
|
226
|
-
ApiUserRateLimiter(requests=
|
|
228
|
+
ApiUserRateLimiter(requests=20, subscribed_requests=20, window=60, slug="transcribe_minute")
|
|
227
229
|
),
|
|
228
230
|
rate_limiter_per_day=Depends(
|
|
229
|
-
ApiUserRateLimiter(requests=
|
|
231
|
+
ApiUserRateLimiter(requests=60, subscribed_requests=600, window=60 * 60 * 24, slug="transcribe_day")
|
|
230
232
|
),
|
|
231
233
|
):
|
|
232
234
|
user: KhojUser = request.user.object
|
|
@@ -277,6 +279,49 @@ async def transcribe(
|
|
|
277
279
|
return Response(content=content, media_type="application/json", status_code=200)
|
|
278
280
|
|
|
279
281
|
|
|
282
|
+
@api.get("/settings", response_class=Response)
|
|
283
|
+
@requires(["authenticated"])
|
|
284
|
+
def get_settings(request: Request, detailed: Optional[bool] = False) -> Response:
|
|
285
|
+
user = request.user.object
|
|
286
|
+
user_config = get_user_config(user, request, is_detailed=detailed)
|
|
287
|
+
del user_config["request"]
|
|
288
|
+
|
|
289
|
+
# Return config data as a JSON response
|
|
290
|
+
return Response(content=json.dumps(user_config), media_type="application/json", status_code=200)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
@api.patch("/user/name", status_code=200)
|
|
294
|
+
@requires(["authenticated"])
|
|
295
|
+
def set_user_name(
|
|
296
|
+
request: Request,
|
|
297
|
+
name: str,
|
|
298
|
+
client: Optional[str] = None,
|
|
299
|
+
):
|
|
300
|
+
user = request.user.object
|
|
301
|
+
|
|
302
|
+
split_name = name.split(" ")
|
|
303
|
+
|
|
304
|
+
if len(split_name) > 2:
|
|
305
|
+
raise HTTPException(status_code=400, detail="Name must be in the format: Firstname Lastname")
|
|
306
|
+
|
|
307
|
+
if len(split_name) == 1:
|
|
308
|
+
first_name = split_name[0]
|
|
309
|
+
last_name = ""
|
|
310
|
+
else:
|
|
311
|
+
first_name, last_name = split_name[0], split_name[-1]
|
|
312
|
+
|
|
313
|
+
adapters.set_user_name(user, first_name, last_name)
|
|
314
|
+
|
|
315
|
+
update_telemetry_state(
|
|
316
|
+
request=request,
|
|
317
|
+
telemetry_type="api",
|
|
318
|
+
api="set_user_name",
|
|
319
|
+
client=client,
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
return {"status": "ok"}
|
|
323
|
+
|
|
324
|
+
|
|
280
325
|
async def extract_references_and_questions(
|
|
281
326
|
request: Request,
|
|
282
327
|
meta_log: dict,
|
|
@@ -298,11 +343,13 @@ async def extract_references_and_questions(
|
|
|
298
343
|
not ConversationCommand.Notes in conversation_commands
|
|
299
344
|
and not ConversationCommand.Default in conversation_commands
|
|
300
345
|
):
|
|
301
|
-
|
|
346
|
+
yield compiled_references, inferred_queries, q
|
|
347
|
+
return
|
|
302
348
|
|
|
303
349
|
if not await sync_to_async(EntryAdapters.user_has_entries)(user=user):
|
|
304
350
|
logger.debug("No documents in knowledge base. Use a Khoj client to sync and chat with your docs.")
|
|
305
|
-
|
|
351
|
+
yield compiled_references, inferred_queries, q
|
|
352
|
+
return
|
|
306
353
|
|
|
307
354
|
# Extract filter terms from user message
|
|
308
355
|
defiltered_query = q
|
|
@@ -313,11 +360,12 @@ async def extract_references_and_questions(
|
|
|
313
360
|
|
|
314
361
|
if not conversation:
|
|
315
362
|
logger.error(f"Conversation with id {conversation_id} not found.")
|
|
316
|
-
|
|
363
|
+
yield compiled_references, inferred_queries, defiltered_query
|
|
364
|
+
return
|
|
317
365
|
|
|
318
366
|
filters_in_query += " ".join([f'file:"{filter}"' for filter in conversation.file_filters])
|
|
319
367
|
using_offline_chat = False
|
|
320
|
-
|
|
368
|
+
logger.debug(f"Filters in query: {filters_in_query}")
|
|
321
369
|
|
|
322
370
|
# Infer search queries from user message
|
|
323
371
|
with timer("Extracting search queries took", logger):
|
|
@@ -335,6 +383,7 @@ async def extract_references_and_questions(
|
|
|
335
383
|
|
|
336
384
|
inferred_queries = extract_questions_offline(
|
|
337
385
|
defiltered_query,
|
|
386
|
+
model=chat_model,
|
|
338
387
|
loaded_model=loaded_model,
|
|
339
388
|
conversation_log=meta_log,
|
|
340
389
|
should_extract_questions=True,
|
|
@@ -372,7 +421,8 @@ async def extract_references_and_questions(
|
|
|
372
421
|
logger.info(f"🔍 Searching knowledge base with queries: {inferred_queries}")
|
|
373
422
|
if send_status_func:
|
|
374
423
|
inferred_queries_str = "\n- " + "\n- ".join(inferred_queries)
|
|
375
|
-
|
|
424
|
+
async for event in send_status_func(f"**Searching Documents for:** {inferred_queries_str}"):
|
|
425
|
+
yield {ChatEvent.STATUS: event}
|
|
376
426
|
for query in inferred_queries:
|
|
377
427
|
n_items = min(n, 3) if using_offline_chat else n
|
|
378
428
|
search_results.extend(
|
|
@@ -391,7 +441,7 @@ async def extract_references_and_questions(
|
|
|
391
441
|
{"compiled": item.additional["compiled"], "file": item.additional["file"]} for item in search_results
|
|
392
442
|
]
|
|
393
443
|
|
|
394
|
-
|
|
444
|
+
yield compiled_references, inferred_queries, defiltered_query
|
|
395
445
|
|
|
396
446
|
|
|
397
447
|
@api.get("/health", response_class=Response)
|
khoj/routers/api_agents.py
CHANGED
|
@@ -30,10 +30,12 @@ async def all_agents(
|
|
|
30
30
|
"slug": agent.slug,
|
|
31
31
|
"avatar": agent.avatar,
|
|
32
32
|
"name": agent.name,
|
|
33
|
-
"
|
|
33
|
+
"persona": agent.personality,
|
|
34
34
|
"public": agent.public,
|
|
35
35
|
"creator": agent.creator.username if agent.creator else None,
|
|
36
36
|
"managed_by_admin": agent.managed_by_admin,
|
|
37
|
+
"color": agent.style_color,
|
|
38
|
+
"icon": agent.style_icon,
|
|
37
39
|
}
|
|
38
40
|
)
|
|
39
41
|
|