khoj 1.27.2.dev18__py3-none-any.whl → 1.27.2.dev130__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. khoj/database/adapters/__init__.py +34 -10
  2. khoj/interface/compiled/404/index.html +1 -1
  3. khoj/interface/compiled/_next/static/chunks/1034-da58b679fcbb79c1.js +1 -0
  4. khoj/interface/compiled/_next/static/chunks/1467-5a191c1cd5bf0b83.js +1 -0
  5. khoj/interface/compiled/_next/static/chunks/1603-5d70d9dfcdcb1f10.js +1 -0
  6. khoj/interface/compiled/_next/static/chunks/3423-fa918f4e5365a35e.js +1 -0
  7. khoj/interface/compiled/_next/static/chunks/8423-3ad0bfb299801220.js +1 -0
  8. khoj/interface/compiled/_next/static/chunks/app/chat/page-7dc98df9c88828f0.js +1 -0
  9. khoj/interface/compiled/_next/static/chunks/app/factchecker/page-d887f55fe6d4f35d.js +1 -0
  10. khoj/interface/compiled/_next/static/chunks/app/{page-8f22b790e50dd722.js → page-d46244282af16509.js} +1 -1
  11. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-6a01e07fb244c10c.js → page-505b07bce608b34e.js} +1 -1
  12. khoj/interface/compiled/_next/static/chunks/{webpack-31239d193815e49e.js → webpack-8ae5ce45161bd98e.js} +1 -1
  13. khoj/interface/compiled/_next/static/css/{2272c73fc7a3b571.css → 26c1c33d0423a7d8.css} +1 -1
  14. khoj/interface/compiled/_next/static/css/e9c5fe555dd3050b.css +25 -0
  15. khoj/interface/compiled/agents/index.html +1 -1
  16. khoj/interface/compiled/agents/index.txt +2 -2
  17. khoj/interface/compiled/automations/index.html +1 -1
  18. khoj/interface/compiled/automations/index.txt +2 -2
  19. khoj/interface/compiled/chat/index.html +1 -1
  20. khoj/interface/compiled/chat/index.txt +2 -2
  21. khoj/interface/compiled/factchecker/index.html +1 -1
  22. khoj/interface/compiled/factchecker/index.txt +2 -2
  23. khoj/interface/compiled/index.html +1 -1
  24. khoj/interface/compiled/index.txt +2 -2
  25. khoj/interface/compiled/search/index.html +1 -1
  26. khoj/interface/compiled/search/index.txt +2 -2
  27. khoj/interface/compiled/settings/index.html +1 -1
  28. khoj/interface/compiled/settings/index.txt +2 -2
  29. khoj/interface/compiled/share/chat/index.html +1 -1
  30. khoj/interface/compiled/share/chat/index.txt +2 -2
  31. khoj/processor/conversation/anthropic/anthropic_chat.py +19 -10
  32. khoj/processor/conversation/anthropic/utils.py +37 -6
  33. khoj/processor/conversation/google/gemini_chat.py +23 -13
  34. khoj/processor/conversation/google/utils.py +34 -10
  35. khoj/processor/conversation/offline/chat_model.py +40 -15
  36. khoj/processor/conversation/openai/gpt.py +25 -10
  37. khoj/processor/conversation/openai/utils.py +43 -9
  38. khoj/processor/conversation/prompts.py +131 -22
  39. khoj/processor/conversation/utils.py +299 -6
  40. khoj/processor/image/generate.py +2 -0
  41. khoj/processor/tools/online_search.py +19 -8
  42. khoj/processor/tools/run_code.py +144 -0
  43. khoj/routers/api.py +11 -6
  44. khoj/routers/api_chat.py +177 -88
  45. khoj/routers/helpers.py +155 -59
  46. khoj/routers/research.py +321 -0
  47. khoj/search_filter/date_filter.py +1 -3
  48. khoj/search_filter/file_filter.py +1 -2
  49. khoj/search_type/text_search.py +3 -3
  50. khoj/utils/helpers.py +15 -2
  51. khoj/utils/yaml.py +4 -0
  52. {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/METADATA +2 -1
  53. {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/RECORD +61 -58
  54. khoj/interface/compiled/_next/static/chunks/1603-5138bb7c8035d9a6.js +0 -1
  55. khoj/interface/compiled/_next/static/chunks/2697-61fcba89fd87eab4.js +0 -1
  56. khoj/interface/compiled/_next/static/chunks/3423-8e9c420574a9fbe3.js +0 -1
  57. khoj/interface/compiled/_next/static/chunks/9479-a5e7ff4c7d1d7ee7.js +0 -1
  58. khoj/interface/compiled/_next/static/chunks/app/chat/page-151232d8417a1ea1.js +0 -1
  59. khoj/interface/compiled/_next/static/chunks/app/factchecker/page-798904432c2417c4.js +0 -1
  60. khoj/interface/compiled/_next/static/css/76d55eb435962b19.css +0 -25
  61. /khoj/interface/compiled/_next/static/{_gBBcNbs4wMKxKXhQs5E4 → N19uqHAJYqRAVxvuVwHfE}/_buildManifest.js +0 -0
  62. /khoj/interface/compiled/_next/static/{_gBBcNbs4wMKxKXhQs5E4 → N19uqHAJYqRAVxvuVwHfE}/_ssgManifest.js +0 -0
  63. /khoj/interface/compiled/_next/static/chunks/{1970-1d6d0c1b00b4f343.js → 1970-444843bea1d17d61.js} +0 -0
  64. /khoj/interface/compiled/_next/static/chunks/{9417-759984ad62caa3dc.js → 9417-19cfd1a9cb758e71.js} +0 -0
  65. /khoj/interface/compiled/_next/static/chunks/app/settings/{page-7946cabb9c54e22d.js → page-89e6737b2cc9fb3a.js} +0 -0
  66. {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/WHEEL +0 -0
  67. {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/entry_points.txt +0 -0
  68. {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/licenses/LICENSE +0 -0
@@ -4,7 +4,7 @@ import logging
4
4
  import os
5
5
  import urllib.parse
6
6
  from collections import defaultdict
7
- from typing import Callable, Dict, List, Optional, Tuple, Union
7
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
8
8
 
9
9
  import aiohttp
10
10
  from bs4 import BeautifulSoup
@@ -52,7 +52,8 @@ OLOSTEP_QUERY_PARAMS = {
52
52
  "expandMarkdown": "True",
53
53
  "expandHtml": "False",
54
54
  }
55
- MAX_WEBPAGES_TO_READ = 1
55
+
56
+ DEFAULT_MAX_WEBPAGES_TO_READ = 1
56
57
 
57
58
 
58
59
  async def search_online(
@@ -62,8 +63,10 @@ async def search_online(
62
63
  user: KhojUser,
63
64
  send_status_func: Optional[Callable] = None,
64
65
  custom_filters: List[str] = [],
66
+ max_webpages_to_read: int = DEFAULT_MAX_WEBPAGES_TO_READ,
65
67
  query_images: List[str] = None,
66
68
  agent: Agent = None,
69
+ tracer: dict = {},
67
70
  ):
68
71
  query += " ".join(custom_filters)
69
72
  if not is_internet_connected():
@@ -73,7 +76,7 @@ async def search_online(
73
76
 
74
77
  # Breakdown the query into subqueries to get the correct answer
75
78
  subqueries = await generate_online_subqueries(
76
- query, conversation_history, location, user, query_images=query_images, agent=agent
79
+ query, conversation_history, location, user, query_images=query_images, agent=agent, tracer=tracer
77
80
  )
78
81
  response_dict = {}
79
82
 
@@ -96,7 +99,7 @@ async def search_online(
96
99
  for subquery in response_dict:
97
100
  if "answerBox" in response_dict[subquery]:
98
101
  continue
99
- for organic in response_dict[subquery].get("organic", [])[:MAX_WEBPAGES_TO_READ]:
102
+ for organic in response_dict[subquery].get("organic", [])[:max_webpages_to_read]:
100
103
  link = organic.get("link")
101
104
  if link in webpages:
102
105
  webpages[link]["queries"].add(subquery)
@@ -111,7 +114,7 @@ async def search_online(
111
114
  async for event in send_status_func(f"**Reading web pages**: {webpage_links_str}"):
112
115
  yield {ChatEvent.STATUS: event}
113
116
  tasks = [
114
- read_webpage_and_extract_content(data["queries"], link, data["content"], user=user, agent=agent)
117
+ read_webpage_and_extract_content(data["queries"], link, data["content"], user=user, agent=agent, tracer=tracer)
115
118
  for link, data in webpages.items()
116
119
  ]
117
120
  results = await asyncio.gather(*tasks)
@@ -153,6 +156,7 @@ async def read_webpages(
153
156
  send_status_func: Optional[Callable] = None,
154
157
  query_images: List[str] = None,
155
158
  agent: Agent = None,
159
+ tracer: dict = {},
156
160
  ):
157
161
  "Infer web pages to read from the query and extract relevant information from them"
158
162
  logger.info(f"Inferring web pages to read")
@@ -166,7 +170,7 @@ async def read_webpages(
166
170
  webpage_links_str = "\n- " + "\n- ".join(list(urls))
167
171
  async for event in send_status_func(f"**Reading web pages**: {webpage_links_str}"):
168
172
  yield {ChatEvent.STATUS: event}
169
- tasks = [read_webpage_and_extract_content({query}, url, user=user, agent=agent) for url in urls]
173
+ tasks = [read_webpage_and_extract_content({query}, url, user=user, agent=agent, tracer=tracer) for url in urls]
170
174
  results = await asyncio.gather(*tasks)
171
175
 
172
176
  response: Dict[str, Dict] = defaultdict(dict)
@@ -192,7 +196,12 @@ async def read_webpage(
192
196
 
193
197
 
194
198
  async def read_webpage_and_extract_content(
195
- subqueries: set[str], url: str, content: str = None, user: KhojUser = None, agent: Agent = None
199
+ subqueries: set[str],
200
+ url: str,
201
+ content: str = None,
202
+ user: KhojUser = None,
203
+ agent: Agent = None,
204
+ tracer: dict = {},
196
205
  ) -> Tuple[set[str], str, Union[None, str]]:
197
206
  # Select the web scrapers to use for reading the web page
198
207
  web_scrapers = await ConversationAdapters.aget_enabled_webscrapers()
@@ -214,7 +223,9 @@ async def read_webpage_and_extract_content(
214
223
  # Extract relevant information from the web page
215
224
  if is_none_or_empty(extracted_info):
216
225
  with timer(f"Extracting relevant information from web page at '{url}' took", logger):
217
- extracted_info = await extract_relevant_info(subqueries, content, user=user, agent=agent)
226
+ extracted_info = await extract_relevant_info(
227
+ subqueries, content, user=user, agent=agent, tracer=tracer
228
+ )
218
229
 
219
230
  # If we successfully extracted information, break the loop
220
231
  if not is_none_or_empty(extracted_info):
@@ -0,0 +1,144 @@
1
+ import asyncio
2
+ import datetime
3
+ import json
4
+ import logging
5
+ import os
6
+ from typing import Any, Callable, List, Optional
7
+
8
+ import aiohttp
9
+
10
+ from khoj.database.adapters import ais_user_subscribed
11
+ from khoj.database.models import Agent, KhojUser
12
+ from khoj.processor.conversation import prompts
13
+ from khoj.processor.conversation.utils import (
14
+ ChatEvent,
15
+ clean_code_python,
16
+ clean_json,
17
+ construct_chat_history,
18
+ )
19
+ from khoj.routers.helpers import send_message_to_model_wrapper
20
+ from khoj.utils.helpers import timer
21
+ from khoj.utils.rawconfig import LocationData
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ SANDBOX_URL = os.getenv("KHOJ_TERRARIUM_URL", "http://localhost:8080")
27
+
28
+
29
+ async def run_code(
30
+ query: str,
31
+ conversation_history: dict,
32
+ context: str,
33
+ location_data: LocationData,
34
+ user: KhojUser,
35
+ send_status_func: Optional[Callable] = None,
36
+ query_images: List[str] = None,
37
+ agent: Agent = None,
38
+ sandbox_url: str = SANDBOX_URL,
39
+ tracer: dict = {},
40
+ ):
41
+ # Generate Code
42
+ if send_status_func:
43
+ async for event in send_status_func(f"**Generate code snippets** for {query}"):
44
+ yield {ChatEvent.STATUS: event}
45
+ try:
46
+ with timer("Chat actor: Generate programs to execute", logger):
47
+ codes = await generate_python_code(
48
+ query,
49
+ conversation_history,
50
+ context,
51
+ location_data,
52
+ user,
53
+ query_images,
54
+ agent,
55
+ tracer,
56
+ )
57
+ except Exception as e:
58
+ raise ValueError(f"Failed to generate code for {query} with error: {e}")
59
+
60
+ # Run Code
61
+ if send_status_func:
62
+ async for event in send_status_func(f"**Running {len(codes)} code snippets**"):
63
+ yield {ChatEvent.STATUS: event}
64
+ try:
65
+ tasks = [execute_sandboxed_python(code, sandbox_url) for code in codes]
66
+ with timer("Chat actor: Execute generated programs", logger):
67
+ results = await asyncio.gather(*tasks)
68
+ for result in results:
69
+ code = result.pop("code")
70
+ logger.info(f"Executed Code:\n--@@--\n{code}\n--@@--Result:\n--@@--\n{result}\n--@@--")
71
+ yield {query: {"code": code, "results": result}}
72
+ except Exception as e:
73
+ raise ValueError(f"Failed to run code for {query} with error: {e}")
74
+
75
+
76
+ async def generate_python_code(
77
+ q: str,
78
+ conversation_history: dict,
79
+ context: str,
80
+ location_data: LocationData,
81
+ user: KhojUser,
82
+ query_images: List[str] = None,
83
+ agent: Agent = None,
84
+ tracer: dict = {},
85
+ ) -> List[str]:
86
+ location = f"{location_data}" if location_data else "Unknown"
87
+ username = prompts.user_name.format(name=user.get_full_name()) if user.get_full_name() else ""
88
+ subscribed = await ais_user_subscribed(user)
89
+ chat_history = construct_chat_history(conversation_history)
90
+
91
+ utc_date = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d")
92
+ personality_context = (
93
+ prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
94
+ )
95
+
96
+ code_generation_prompt = prompts.python_code_generation_prompt.format(
97
+ current_date=utc_date,
98
+ query=q,
99
+ chat_history=chat_history,
100
+ context=context,
101
+ location=location,
102
+ username=username,
103
+ personality_context=personality_context,
104
+ )
105
+
106
+ response = await send_message_to_model_wrapper(
107
+ code_generation_prompt,
108
+ query_images=query_images,
109
+ response_type="json_object",
110
+ user=user,
111
+ tracer=tracer,
112
+ )
113
+
114
+ # Validate that the response is a non-empty, JSON-serializable list
115
+ response = clean_json(response)
116
+ response = json.loads(response)
117
+ codes = [code.strip() for code in response["codes"] if code.strip()]
118
+
119
+ if not isinstance(codes, list) or not codes or len(codes) == 0:
120
+ raise ValueError
121
+ return codes
122
+
123
+
124
+ async def execute_sandboxed_python(code: str, sandbox_url: str = SANDBOX_URL) -> dict[str, Any]:
125
+ """
126
+ Takes code to run as a string and calls the terrarium API to execute it.
127
+ Returns the result of the code execution as a dictionary.
128
+ """
129
+ headers = {"Content-Type": "application/json"}
130
+ cleaned_code = clean_code_python(code)
131
+ data = {"code": cleaned_code}
132
+
133
+ async with aiohttp.ClientSession() as session:
134
+ async with session.post(sandbox_url, json=data, headers=headers) as response:
135
+ if response.status == 200:
136
+ result: dict[str, Any] = await response.json()
137
+ result["code"] = cleaned_code
138
+ return result
139
+ else:
140
+ return {
141
+ "code": cleaned_code,
142
+ "success": False,
143
+ "std_err": f"Failed to execute code with {response.status}",
144
+ }
khoj/routers/api.py CHANGED
@@ -44,6 +44,7 @@ from khoj.processor.conversation.offline.chat_model import extract_questions_off
44
44
  from khoj.processor.conversation.offline.whisper import transcribe_audio_offline
45
45
  from khoj.processor.conversation.openai.gpt import extract_questions
46
46
  from khoj.processor.conversation.openai.whisper import transcribe_audio
47
+ from khoj.processor.conversation.utils import defilter_query
47
48
  from khoj.routers.helpers import (
48
49
  ApiUserRateLimiter,
49
50
  ChatEvent,
@@ -167,8 +168,8 @@ async def execute_search(
167
168
  search_futures += [
168
169
  executor.submit(
169
170
  text_search.query,
170
- user,
171
171
  user_query,
172
+ user,
172
173
  t,
173
174
  question_embedding=encoded_asymmetric_query,
174
175
  max_distance=max_distance,
@@ -350,11 +351,12 @@ async def extract_references_and_questions(
350
351
  send_status_func: Optional[Callable] = None,
351
352
  query_images: Optional[List[str]] = None,
352
353
  agent: Agent = None,
354
+ tracer: dict = {},
353
355
  ):
354
356
  user = request.user.object if request.user.is_authenticated else None
355
357
 
356
358
  # Initialize Variables
357
- compiled_references: List[Any] = []
359
+ compiled_references: List[dict[str, str]] = []
358
360
  inferred_queries: List[str] = []
359
361
 
360
362
  agent_has_entries = False
@@ -383,9 +385,7 @@ async def extract_references_and_questions(
383
385
  return
384
386
 
385
387
  # Extract filter terms from user message
386
- defiltered_query = q
387
- for filter in [DateFilter(), WordFilter(), FileFilter()]:
388
- defiltered_query = filter.defilter(defiltered_query)
388
+ defiltered_query = defilter_query(q)
389
389
  filters_in_query = q.replace(defiltered_query, "").strip()
390
390
  conversation = await sync_to_async(ConversationAdapters.get_conversation_by_id)(conversation_id)
391
391
 
@@ -425,6 +425,7 @@ async def extract_references_and_questions(
425
425
  user=user,
426
426
  max_prompt_size=conversation_config.max_prompt_size,
427
427
  personality_context=personality_context,
428
+ tracer=tracer,
428
429
  )
429
430
  elif conversation_config.model_type == ChatModelOptions.ModelType.OPENAI:
430
431
  openai_chat_config = conversation_config.openai_config
@@ -442,6 +443,7 @@ async def extract_references_and_questions(
442
443
  query_images=query_images,
443
444
  vision_enabled=vision_enabled,
444
445
  personality_context=personality_context,
446
+ tracer=tracer,
445
447
  )
446
448
  elif conversation_config.model_type == ChatModelOptions.ModelType.ANTHROPIC:
447
449
  api_key = conversation_config.openai_config.api_key
@@ -456,6 +458,7 @@ async def extract_references_and_questions(
456
458
  user=user,
457
459
  vision_enabled=vision_enabled,
458
460
  personality_context=personality_context,
461
+ tracer=tracer,
459
462
  )
460
463
  elif conversation_config.model_type == ChatModelOptions.ModelType.GOOGLE:
461
464
  api_key = conversation_config.openai_config.api_key
@@ -471,6 +474,7 @@ async def extract_references_and_questions(
471
474
  user=user,
472
475
  vision_enabled=vision_enabled,
473
476
  personality_context=personality_context,
477
+ tracer=tracer,
474
478
  )
475
479
 
476
480
  # Collate search results as context for GPT
@@ -497,7 +501,8 @@ async def extract_references_and_questions(
497
501
  )
498
502
  search_results = text_search.deduplicated_search_responses(search_results)
499
503
  compiled_references = [
500
- {"compiled": item.additional["compiled"], "file": item.additional["file"]} for item in search_results
504
+ {"query": q, "compiled": item.additional["compiled"], "file": item.additional["file"]}
505
+ for q, item in zip(inferred_queries, search_results)
501
506
  ]
502
507
 
503
508
  yield compiled_references, inferred_queries, defiltered_query