khoj 1.16.1.dev25__py3-none-any.whl → 1.16.1.dev47__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/database/adapters/__init__.py +8 -24
- khoj/interface/web/chat.html +186 -296
- khoj/processor/conversation/utils.py +0 -4
- khoj/processor/tools/online_search.py +13 -7
- khoj/routers/api.py +10 -6
- khoj/routers/api_chat.py +264 -483
- khoj/routers/helpers.py +32 -18
- khoj/routers/indexer.py +1 -1
- khoj/utils/fs_syncer.py +1 -1
- {khoj-1.16.1.dev25.dist-info → khoj-1.16.1.dev47.dist-info}/METADATA +1 -1
- {khoj-1.16.1.dev25.dist-info → khoj-1.16.1.dev47.dist-info}/RECORD +14 -14
- {khoj-1.16.1.dev25.dist-info → khoj-1.16.1.dev47.dist-info}/WHEEL +0 -0
- {khoj-1.16.1.dev25.dist-info → khoj-1.16.1.dev47.dist-info}/entry_points.txt +0 -0
- {khoj-1.16.1.dev25.dist-info → khoj-1.16.1.dev47.dist-info}/licenses/LICENSE +0 -0
|
@@ -11,6 +11,7 @@ from bs4 import BeautifulSoup
|
|
|
11
11
|
from markdownify import markdownify
|
|
12
12
|
|
|
13
13
|
from khoj.routers.helpers import (
|
|
14
|
+
ChatEvent,
|
|
14
15
|
extract_relevant_info,
|
|
15
16
|
generate_online_subqueries,
|
|
16
17
|
infer_webpage_urls,
|
|
@@ -56,7 +57,8 @@ async def search_online(
|
|
|
56
57
|
query += " ".join(custom_filters)
|
|
57
58
|
if not is_internet_connected():
|
|
58
59
|
logger.warn("Cannot search online as not connected to internet")
|
|
59
|
-
|
|
60
|
+
yield {}
|
|
61
|
+
return
|
|
60
62
|
|
|
61
63
|
# Breakdown the query into subqueries to get the correct answer
|
|
62
64
|
subqueries = await generate_online_subqueries(query, conversation_history, location)
|
|
@@ -66,7 +68,8 @@ async def search_online(
|
|
|
66
68
|
logger.info(f"🌐 Searching the Internet for {list(subqueries)}")
|
|
67
69
|
if send_status_func:
|
|
68
70
|
subqueries_str = "\n- " + "\n- ".join(list(subqueries))
|
|
69
|
-
|
|
71
|
+
async for event in send_status_func(f"**🌐 Searching the Internet for**: {subqueries_str}"):
|
|
72
|
+
yield {ChatEvent.STATUS: event}
|
|
70
73
|
|
|
71
74
|
with timer(f"Internet searches for {list(subqueries)} took", logger):
|
|
72
75
|
search_func = search_with_google if SERPER_DEV_API_KEY else search_with_jina
|
|
@@ -89,7 +92,8 @@ async def search_online(
|
|
|
89
92
|
logger.info(f"🌐👀 Reading web pages at: {list(webpage_links)}")
|
|
90
93
|
if send_status_func:
|
|
91
94
|
webpage_links_str = "\n- " + "\n- ".join(list(webpage_links))
|
|
92
|
-
|
|
95
|
+
async for event in send_status_func(f"**📖 Reading web pages**: {webpage_links_str}"):
|
|
96
|
+
yield {ChatEvent.STATUS: event}
|
|
93
97
|
tasks = [read_webpage_and_extract_content(subquery, link, content) for link, subquery, content in webpages]
|
|
94
98
|
results = await asyncio.gather(*tasks)
|
|
95
99
|
|
|
@@ -98,7 +102,7 @@ async def search_online(
|
|
|
98
102
|
if webpage_extract is not None:
|
|
99
103
|
response_dict[subquery]["webpages"] = {"link": url, "snippet": webpage_extract}
|
|
100
104
|
|
|
101
|
-
|
|
105
|
+
yield response_dict
|
|
102
106
|
|
|
103
107
|
|
|
104
108
|
async def search_with_google(query: str) -> Tuple[str, Dict[str, List[Dict]]]:
|
|
@@ -127,13 +131,15 @@ async def read_webpages(
|
|
|
127
131
|
"Infer web pages to read from the query and extract relevant information from them"
|
|
128
132
|
logger.info(f"Inferring web pages to read")
|
|
129
133
|
if send_status_func:
|
|
130
|
-
|
|
134
|
+
async for event in send_status_func(f"**🧐 Inferring web pages to read**"):
|
|
135
|
+
yield {ChatEvent.STATUS: event}
|
|
131
136
|
urls = await infer_webpage_urls(query, conversation_history, location)
|
|
132
137
|
|
|
133
138
|
logger.info(f"Reading web pages at: {urls}")
|
|
134
139
|
if send_status_func:
|
|
135
140
|
webpage_links_str = "\n- " + "\n- ".join(list(urls))
|
|
136
|
-
|
|
141
|
+
async for event in send_status_func(f"**📖 Reading web pages**: {webpage_links_str}"):
|
|
142
|
+
yield {ChatEvent.STATUS: event}
|
|
137
143
|
tasks = [read_webpage_and_extract_content(query, url) for url in urls]
|
|
138
144
|
results = await asyncio.gather(*tasks)
|
|
139
145
|
|
|
@@ -141,7 +147,7 @@ async def read_webpages(
|
|
|
141
147
|
response[query]["webpages"] = [
|
|
142
148
|
{"query": q, "link": url, "snippet": web_extract} for q, web_extract, url in results if web_extract is not None
|
|
143
149
|
]
|
|
144
|
-
|
|
150
|
+
yield response
|
|
145
151
|
|
|
146
152
|
|
|
147
153
|
async def read_webpage_and_extract_content(
|
khoj/routers/api.py
CHANGED
|
@@ -6,7 +6,6 @@ import os
|
|
|
6
6
|
import threading
|
|
7
7
|
import time
|
|
8
8
|
import uuid
|
|
9
|
-
from random import random
|
|
10
9
|
from typing import Any, Callable, List, Optional, Union
|
|
11
10
|
|
|
12
11
|
import cron_descriptor
|
|
@@ -37,6 +36,7 @@ from khoj.processor.conversation.openai.gpt import extract_questions
|
|
|
37
36
|
from khoj.processor.conversation.openai.whisper import transcribe_audio
|
|
38
37
|
from khoj.routers.helpers import (
|
|
39
38
|
ApiUserRateLimiter,
|
|
39
|
+
ChatEvent,
|
|
40
40
|
CommonQueryParams,
|
|
41
41
|
ConversationCommandRateLimiter,
|
|
42
42
|
acreate_title_from_query,
|
|
@@ -298,11 +298,13 @@ async def extract_references_and_questions(
|
|
|
298
298
|
not ConversationCommand.Notes in conversation_commands
|
|
299
299
|
and not ConversationCommand.Default in conversation_commands
|
|
300
300
|
):
|
|
301
|
-
|
|
301
|
+
yield compiled_references, inferred_queries, q
|
|
302
|
+
return
|
|
302
303
|
|
|
303
304
|
if not await sync_to_async(EntryAdapters.user_has_entries)(user=user):
|
|
304
305
|
logger.debug("No documents in knowledge base. Use a Khoj client to sync and chat with your docs.")
|
|
305
|
-
|
|
306
|
+
yield compiled_references, inferred_queries, q
|
|
307
|
+
return
|
|
306
308
|
|
|
307
309
|
# Extract filter terms from user message
|
|
308
310
|
defiltered_query = q
|
|
@@ -313,7 +315,8 @@ async def extract_references_and_questions(
|
|
|
313
315
|
|
|
314
316
|
if not conversation:
|
|
315
317
|
logger.error(f"Conversation with id {conversation_id} not found.")
|
|
316
|
-
|
|
318
|
+
yield compiled_references, inferred_queries, defiltered_query
|
|
319
|
+
return
|
|
317
320
|
|
|
318
321
|
filters_in_query += " ".join([f'file:"{filter}"' for filter in conversation.file_filters])
|
|
319
322
|
using_offline_chat = False
|
|
@@ -373,7 +376,8 @@ async def extract_references_and_questions(
|
|
|
373
376
|
logger.info(f"🔍 Searching knowledge base with queries: {inferred_queries}")
|
|
374
377
|
if send_status_func:
|
|
375
378
|
inferred_queries_str = "\n- " + "\n- ".join(inferred_queries)
|
|
376
|
-
|
|
379
|
+
async for event in send_status_func(f"**🔍 Searching Documents for:** {inferred_queries_str}"):
|
|
380
|
+
yield {ChatEvent.STATUS: event}
|
|
377
381
|
for query in inferred_queries:
|
|
378
382
|
n_items = min(n, 3) if using_offline_chat else n
|
|
379
383
|
search_results.extend(
|
|
@@ -392,7 +396,7 @@ async def extract_references_and_questions(
|
|
|
392
396
|
{"compiled": item.additional["compiled"], "file": item.additional["file"]} for item in search_results
|
|
393
397
|
]
|
|
394
398
|
|
|
395
|
-
|
|
399
|
+
yield compiled_references, inferred_queries, defiltered_query
|
|
396
400
|
|
|
397
401
|
|
|
398
402
|
@api.get("/health", response_class=Response)
|