khoj 1.16.1.dev26__py3-none-any.whl → 1.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,6 +11,7 @@ from bs4 import BeautifulSoup
11
11
  from markdownify import markdownify
12
12
 
13
13
  from khoj.routers.helpers import (
14
+ ChatEvent,
14
15
  extract_relevant_info,
15
16
  generate_online_subqueries,
16
17
  infer_webpage_urls,
@@ -56,7 +57,8 @@ async def search_online(
56
57
  query += " ".join(custom_filters)
57
58
  if not is_internet_connected():
58
59
  logger.warn("Cannot search online as not connected to internet")
59
- return {}
60
+ yield {}
61
+ return
60
62
 
61
63
  # Breakdown the query into subqueries to get the correct answer
62
64
  subqueries = await generate_online_subqueries(query, conversation_history, location)
@@ -66,7 +68,8 @@ async def search_online(
66
68
  logger.info(f"🌐 Searching the Internet for {list(subqueries)}")
67
69
  if send_status_func:
68
70
  subqueries_str = "\n- " + "\n- ".join(list(subqueries))
69
- await send_status_func(f"**🌐 Searching the Internet for**: {subqueries_str}")
71
+ async for event in send_status_func(f"**🌐 Searching the Internet for**: {subqueries_str}"):
72
+ yield {ChatEvent.STATUS: event}
70
73
 
71
74
  with timer(f"Internet searches for {list(subqueries)} took", logger):
72
75
  search_func = search_with_google if SERPER_DEV_API_KEY else search_with_jina
@@ -89,7 +92,8 @@ async def search_online(
89
92
  logger.info(f"🌐👀 Reading web pages at: {list(webpage_links)}")
90
93
  if send_status_func:
91
94
  webpage_links_str = "\n- " + "\n- ".join(list(webpage_links))
92
- await send_status_func(f"**📖 Reading web pages**: {webpage_links_str}")
95
+ async for event in send_status_func(f"**📖 Reading web pages**: {webpage_links_str}"):
96
+ yield {ChatEvent.STATUS: event}
93
97
  tasks = [read_webpage_and_extract_content(subquery, link, content) for link, subquery, content in webpages]
94
98
  results = await asyncio.gather(*tasks)
95
99
 
@@ -98,7 +102,7 @@ async def search_online(
98
102
  if webpage_extract is not None:
99
103
  response_dict[subquery]["webpages"] = {"link": url, "snippet": webpage_extract}
100
104
 
101
- return response_dict
105
+ yield response_dict
102
106
 
103
107
 
104
108
  async def search_with_google(query: str) -> Tuple[str, Dict[str, List[Dict]]]:
@@ -127,13 +131,15 @@ async def read_webpages(
127
131
  "Infer web pages to read from the query and extract relevant information from them"
128
132
  logger.info(f"Inferring web pages to read")
129
133
  if send_status_func:
130
- await send_status_func(f"**🧐 Inferring web pages to read**")
134
+ async for event in send_status_func(f"**🧐 Inferring web pages to read**"):
135
+ yield {ChatEvent.STATUS: event}
131
136
  urls = await infer_webpage_urls(query, conversation_history, location)
132
137
 
133
138
  logger.info(f"Reading web pages at: {urls}")
134
139
  if send_status_func:
135
140
  webpage_links_str = "\n- " + "\n- ".join(list(urls))
136
- await send_status_func(f"**📖 Reading web pages**: {webpage_links_str}")
141
+ async for event in send_status_func(f"**📖 Reading web pages**: {webpage_links_str}"):
142
+ yield {ChatEvent.STATUS: event}
137
143
  tasks = [read_webpage_and_extract_content(query, url) for url in urls]
138
144
  results = await asyncio.gather(*tasks)
139
145
 
@@ -141,7 +147,7 @@ async def read_webpages(
141
147
  response[query]["webpages"] = [
142
148
  {"query": q, "link": url, "snippet": web_extract} for q, web_extract, url in results if web_extract is not None
143
149
  ]
144
- return response
150
+ yield response
145
151
 
146
152
 
147
153
  async def read_webpage_and_extract_content(
khoj/routers/api.py CHANGED
@@ -6,7 +6,6 @@ import os
6
6
  import threading
7
7
  import time
8
8
  import uuid
9
- from random import random
10
9
  from typing import Any, Callable, List, Optional, Union
11
10
 
12
11
  import cron_descriptor
@@ -37,6 +36,7 @@ from khoj.processor.conversation.openai.gpt import extract_questions
37
36
  from khoj.processor.conversation.openai.whisper import transcribe_audio
38
37
  from khoj.routers.helpers import (
39
38
  ApiUserRateLimiter,
39
+ ChatEvent,
40
40
  CommonQueryParams,
41
41
  ConversationCommandRateLimiter,
42
42
  acreate_title_from_query,
@@ -298,11 +298,13 @@ async def extract_references_and_questions(
298
298
  not ConversationCommand.Notes in conversation_commands
299
299
  and not ConversationCommand.Default in conversation_commands
300
300
  ):
301
- return compiled_references, inferred_queries, q
301
+ yield compiled_references, inferred_queries, q
302
+ return
302
303
 
303
304
  if not await sync_to_async(EntryAdapters.user_has_entries)(user=user):
304
305
  logger.debug("No documents in knowledge base. Use a Khoj client to sync and chat with your docs.")
305
- return compiled_references, inferred_queries, q
306
+ yield compiled_references, inferred_queries, q
307
+ return
306
308
 
307
309
  # Extract filter terms from user message
308
310
  defiltered_query = q
@@ -313,7 +315,8 @@ async def extract_references_and_questions(
313
315
 
314
316
  if not conversation:
315
317
  logger.error(f"Conversation with id {conversation_id} not found.")
316
- return compiled_references, inferred_queries, defiltered_query
318
+ yield compiled_references, inferred_queries, defiltered_query
319
+ return
317
320
 
318
321
  filters_in_query += " ".join([f'file:"{filter}"' for filter in conversation.file_filters])
319
322
  using_offline_chat = False
@@ -373,7 +376,8 @@ async def extract_references_and_questions(
373
376
  logger.info(f"🔍 Searching knowledge base with queries: {inferred_queries}")
374
377
  if send_status_func:
375
378
  inferred_queries_str = "\n- " + "\n- ".join(inferred_queries)
376
- await send_status_func(f"**🔍 Searching Documents for:** {inferred_queries_str}")
379
+ async for event in send_status_func(f"**🔍 Searching Documents for:** {inferred_queries_str}"):
380
+ yield {ChatEvent.STATUS: event}
377
381
  for query in inferred_queries:
378
382
  n_items = min(n, 3) if using_offline_chat else n
379
383
  search_results.extend(
@@ -392,7 +396,7 @@ async def extract_references_and_questions(
392
396
  {"compiled": item.additional["compiled"], "file": item.additional["file"]} for item in search_results
393
397
  ]
394
398
 
395
- return compiled_references, inferred_queries, defiltered_query
399
+ yield compiled_references, inferred_queries, defiltered_query
396
400
 
397
401
 
398
402
  @api.get("/health", response_class=Response)