khoj 1.27.2.dev130__py3-none-any.whl → 1.27.2.dev167__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. khoj/configure.py +1 -1
  2. khoj/database/adapters/__init__.py +16 -2
  3. khoj/interface/compiled/404/index.html +1 -1
  4. khoj/interface/compiled/_next/static/chunks/1467-b331e469fe411347.js +1 -0
  5. khoj/interface/compiled/_next/static/chunks/1603-c1568f45947e9f2c.js +1 -0
  6. khoj/interface/compiled/_next/static/chunks/3423-ff7402ae1dd66592.js +1 -0
  7. khoj/interface/compiled/_next/static/chunks/8423-e80647edf6c92c27.js +1 -0
  8. khoj/interface/compiled/_next/static/chunks/app/chat/page-bfc70b16ba5e51b4.js +1 -0
  9. khoj/interface/compiled/_next/static/chunks/app/factchecker/page-340bcf53abf6a2cc.js +1 -0
  10. khoj/interface/compiled/_next/static/chunks/{webpack-8ae5ce45161bd98e.js → webpack-878569182b3af4c6.js} +1 -1
  11. khoj/interface/compiled/_next/static/css/{e9c5fe555dd3050b.css → a795ee88875f4853.css} +1 -1
  12. khoj/interface/compiled/_next/static/css/{b70402177a7c3207.css → d738728883c68af8.css} +1 -1
  13. khoj/interface/compiled/agents/index.html +1 -1
  14. khoj/interface/compiled/agents/index.txt +2 -2
  15. khoj/interface/compiled/automations/index.html +1 -1
  16. khoj/interface/compiled/automations/index.txt +2 -2
  17. khoj/interface/compiled/chat/index.html +1 -1
  18. khoj/interface/compiled/chat/index.txt +2 -2
  19. khoj/interface/compiled/factchecker/index.html +1 -1
  20. khoj/interface/compiled/factchecker/index.txt +2 -2
  21. khoj/interface/compiled/index.html +1 -1
  22. khoj/interface/compiled/index.txt +2 -2
  23. khoj/interface/compiled/search/index.html +1 -1
  24. khoj/interface/compiled/search/index.txt +2 -2
  25. khoj/interface/compiled/settings/index.html +1 -1
  26. khoj/interface/compiled/settings/index.txt +2 -2
  27. khoj/interface/compiled/share/chat/index.html +1 -1
  28. khoj/interface/compiled/share/chat/index.txt +2 -2
  29. khoj/processor/conversation/offline/chat_model.py +8 -1
  30. khoj/processor/conversation/openai/utils.py +7 -0
  31. khoj/processor/conversation/prompts.py +36 -38
  32. khoj/processor/conversation/utils.py +15 -8
  33. khoj/processor/embeddings.py +4 -4
  34. khoj/processor/tools/online_search.py +8 -4
  35. khoj/routers/api_chat.py +38 -25
  36. khoj/routers/helpers.py +16 -1
  37. khoj/routers/research.py +17 -18
  38. khoj/utils/helpers.py +12 -3
  39. {khoj-1.27.2.dev130.dist-info → khoj-1.27.2.dev167.dist-info}/METADATA +2 -2
  40. {khoj-1.27.2.dev130.dist-info → khoj-1.27.2.dev167.dist-info}/RECORD +48 -48
  41. khoj/interface/compiled/_next/static/chunks/1467-5a191c1cd5bf0b83.js +0 -1
  42. khoj/interface/compiled/_next/static/chunks/1603-5d70d9dfcdcb1f10.js +0 -1
  43. khoj/interface/compiled/_next/static/chunks/3423-fa918f4e5365a35e.js +0 -1
  44. khoj/interface/compiled/_next/static/chunks/8423-3ad0bfb299801220.js +0 -1
  45. khoj/interface/compiled/_next/static/chunks/app/chat/page-7dc98df9c88828f0.js +0 -1
  46. khoj/interface/compiled/_next/static/chunks/app/factchecker/page-d887f55fe6d4f35d.js +0 -1
  47. /khoj/interface/compiled/_next/static/chunks/{1970-444843bea1d17d61.js → 1970-90dd510762d820ba.js} +0 -0
  48. /khoj/interface/compiled/_next/static/chunks/{9417-19cfd1a9cb758e71.js → 9417-951f46451a8dd6d7.js} +0 -0
  49. /khoj/interface/compiled/_next/static/chunks/app/{page-d46244282af16509.js → page-f249666a0cbdaa0d.js} +0 -0
  50. /khoj/interface/compiled/_next/static/{N19uqHAJYqRAVxvuVwHfE → vUFFjGuewOr_h39o6kbfT}/_buildManifest.js +0 -0
  51. /khoj/interface/compiled/_next/static/{N19uqHAJYqRAVxvuVwHfE → vUFFjGuewOr_h39o6kbfT}/_ssgManifest.js +0 -0
  52. {khoj-1.27.2.dev130.dist-info → khoj-1.27.2.dev167.dist-info}/WHEEL +0 -0
  53. {khoj-1.27.2.dev130.dist-info → khoj-1.27.2.dev167.dist-info}/entry_points.txt +0 -0
  54. {khoj-1.27.2.dev130.dist-info → khoj-1.27.2.dev167.dist-info}/licenses/LICENSE +0 -0
@@ -1,9 +1,11 @@
1
1
  import base64
2
+ import json
2
3
  import logging
3
4
  import math
4
5
  import mimetypes
5
6
  import os
6
7
  import queue
8
+ import uuid
7
9
  from dataclasses import dataclass
8
10
  from datetime import datetime
9
11
  from enum import Enum
@@ -134,7 +136,11 @@ def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="A
134
136
  for chat in conversation_history.get("chat", [])[-n:]:
135
137
  if chat["by"] == "khoj" and chat["intent"].get("type") in ["remember", "reminder", "summarize"]:
136
138
  chat_history += f"User: {chat['intent']['query']}\n"
137
- chat_history += f"{agent_name}: {chat['message']}\n"
139
+
140
+ if chat["intent"].get("inferred-queries"):
141
+ chat_history += f'Khoj: {{"queries": {chat["intent"].get("inferred-queries")}}}\n'
142
+
143
+ chat_history += f"{agent_name}: {chat['message']}\n\n"
138
144
  elif chat["by"] == "khoj" and ("text-to-image" in chat["intent"].get("type")):
139
145
  chat_history += f"User: {chat['intent']['query']}\n"
140
146
  chat_history += f"{agent_name}: [generated image redacted for space]\n"
@@ -185,6 +191,7 @@ class ChatEvent(Enum):
185
191
  MESSAGE = "message"
186
192
  REFERENCES = "references"
187
193
  STATUS = "status"
194
+ METADATA = "metadata"
188
195
 
189
196
 
190
197
  def message_to_log(
@@ -232,12 +239,14 @@ def save_to_conversation_log(
232
239
  train_of_thought: List[Any] = [],
233
240
  ):
234
241
  user_message_time = user_message_time or datetime.now().strftime("%Y-%m-%d %H:%M:%S")
242
+ turn_id = tracer.get("mid") or str(uuid.uuid4())
235
243
  updated_conversation = message_to_log(
236
244
  user_message=q,
237
245
  chat_response=chat_response,
238
246
  user_message_metadata={
239
247
  "created": user_message_time,
240
248
  "images": query_images,
249
+ "turnId": turn_id,
241
250
  },
242
251
  khoj_message_metadata={
243
252
  "context": compiled_references,
@@ -246,6 +255,7 @@ def save_to_conversation_log(
246
255
  "codeContext": code_results,
247
256
  "automationId": automation_id,
248
257
  "trainOfThought": train_of_thought,
258
+ "turnId": turn_id,
249
259
  },
250
260
  conversation_log=meta_log.get("chat", []),
251
261
  train_of_thought=train_of_thought,
@@ -501,15 +511,12 @@ def commit_conversation_trace(
501
511
  Returns the path to the repository.
502
512
  """
503
513
  # Serialize session, system message and response to yaml
504
- system_message_yaml = yaml.dump(system_message, allow_unicode=True, sort_keys=False, default_flow_style=False)
505
- response_yaml = yaml.dump(response, allow_unicode=True, sort_keys=False, default_flow_style=False)
514
+ system_message_yaml = json.dumps(system_message, ensure_ascii=False, sort_keys=False)
515
+ response_yaml = json.dumps(response, ensure_ascii=False, sort_keys=False)
506
516
  formatted_session = [{"role": message.role, "content": message.content} for message in session]
507
- session_yaml = yaml.dump(formatted_session, allow_unicode=True, sort_keys=False, default_flow_style=False)
517
+ session_yaml = json.dumps(formatted_session, ensure_ascii=False, sort_keys=False)
508
518
  query = (
509
- yaml.dump(session[-1].content, allow_unicode=True, sort_keys=False, default_flow_style=False)
510
- .strip()
511
- .removeprefix("'")
512
- .removesuffix("'")
519
+ json.dumps(session[-1].content, ensure_ascii=False, sort_keys=False).strip().removeprefix("'").removesuffix("'")
513
520
  ) # Extract serialized query from chat session
514
521
 
515
522
  # Extract chat metadata for session
@@ -13,7 +13,7 @@ from tenacity import (
13
13
  )
14
14
  from torch import nn
15
15
 
16
- from khoj.utils.helpers import get_device, merge_dicts, timer
16
+ from khoj.utils.helpers import fix_json_dict, get_device, merge_dicts, timer
17
17
  from khoj.utils.rawconfig import SearchResponse
18
18
 
19
19
  logger = logging.getLogger(__name__)
@@ -31,9 +31,9 @@ class EmbeddingsModel:
31
31
  ):
32
32
  default_query_encode_kwargs = {"show_progress_bar": False, "normalize_embeddings": True}
33
33
  default_docs_encode_kwargs = {"show_progress_bar": True, "normalize_embeddings": True}
34
- self.query_encode_kwargs = merge_dicts(query_encode_kwargs, default_query_encode_kwargs)
35
- self.docs_encode_kwargs = merge_dicts(docs_encode_kwargs, default_docs_encode_kwargs)
36
- self.model_kwargs = merge_dicts(model_kwargs, {"device": get_device()})
34
+ self.query_encode_kwargs = merge_dicts(fix_json_dict(query_encode_kwargs), default_query_encode_kwargs)
35
+ self.docs_encode_kwargs = merge_dicts(fix_json_dict(docs_encode_kwargs), default_docs_encode_kwargs)
36
+ self.model_kwargs = merge_dicts(fix_json_dict(model_kwargs), {"device": get_device()})
37
37
  self.model_name = model_name
38
38
  self.inference_endpoint = embeddings_inference_endpoint
39
39
  self.api_key = embeddings_inference_endpoint_api_key
@@ -54,6 +54,7 @@ OLOSTEP_QUERY_PARAMS = {
54
54
  }
55
55
 
56
56
  DEFAULT_MAX_WEBPAGES_TO_READ = 1
57
+ MAX_WEBPAGES_TO_INFER = 10
57
58
 
58
59
 
59
60
  async def search_online(
@@ -157,13 +158,16 @@ async def read_webpages(
157
158
  query_images: List[str] = None,
158
159
  agent: Agent = None,
159
160
  tracer: dict = {},
161
+ max_webpages_to_read: int = DEFAULT_MAX_WEBPAGES_TO_READ,
160
162
  ):
161
163
  "Infer web pages to read from the query and extract relevant information from them"
162
164
  logger.info(f"Inferring web pages to read")
163
- if send_status_func:
164
- async for event in send_status_func(f"**Inferring web pages to read**"):
165
- yield {ChatEvent.STATUS: event}
166
- urls = await infer_webpage_urls(query, conversation_history, location, user, query_images)
165
+ urls = await infer_webpage_urls(
166
+ query, conversation_history, location, user, query_images, agent=agent, tracer=tracer
167
+ )
168
+
169
+ # Get the top 10 web pages to read
170
+ urls = urls[:max_webpages_to_read]
167
171
 
168
172
  logger.info(f"Reading web pages at: {urls}")
169
173
  if send_status_func:
khoj/routers/api_chat.py CHANGED
@@ -31,6 +31,7 @@ from khoj.processor.speech.text_to_speech import generate_text_to_speech
31
31
  from khoj.processor.tools.online_search import read_webpages, search_online
32
32
  from khoj.processor.tools.run_code import run_code
33
33
  from khoj.routers.api import extract_references_and_questions
34
+ from khoj.routers.email import send_query_feedback
34
35
  from khoj.routers.helpers import (
35
36
  ApiImageRateLimiter,
36
37
  ApiUserRateLimiter,
@@ -38,13 +39,14 @@ from khoj.routers.helpers import (
38
39
  ChatRequestBody,
39
40
  CommonQueryParams,
40
41
  ConversationCommandRateLimiter,
42
+ DeleteMessageRequestBody,
43
+ FeedbackData,
41
44
  agenerate_chat_response,
42
45
  aget_relevant_information_sources,
43
46
  aget_relevant_output_modes,
44
47
  construct_automation_created_message,
45
48
  create_automation,
46
49
  extract_relevant_info,
47
- extract_relevant_summary,
48
50
  generate_excalidraw_diagram,
49
51
  generate_summary_from_files,
50
52
  get_conversation_command,
@@ -75,16 +77,12 @@ from khoj.utils.rawconfig import FileFilterRequest, FilesFilterRequest, Location
75
77
  # Initialize Router
76
78
  logger = logging.getLogger(__name__)
77
79
  conversation_command_rate_limiter = ConversationCommandRateLimiter(
78
- trial_rate_limit=100, subscribed_rate_limit=6000, slug="command"
80
+ trial_rate_limit=20, subscribed_rate_limit=75, slug="command"
79
81
  )
80
82
 
81
83
 
82
84
  api_chat = APIRouter()
83
85
 
84
- from pydantic import BaseModel
85
-
86
- from khoj.routers.email import send_query_feedback
87
-
88
86
 
89
87
  @api_chat.get("/conversation/file-filters/{conversation_id}", response_class=Response)
90
88
  @requires(["authenticated"])
@@ -146,12 +144,6 @@ def remove_file_filter(request: Request, filter: FileFilterRequest) -> Response:
146
144
  return Response(content=json.dumps(file_filters), media_type="application/json", status_code=200)
147
145
 
148
146
 
149
- class FeedbackData(BaseModel):
150
- uquery: str
151
- kquery: str
152
- sentiment: str
153
-
154
-
155
147
  @api_chat.post("/feedback")
156
148
  @requires(["authenticated"])
157
149
  async def sendfeedback(request: Request, data: FeedbackData):
@@ -166,10 +158,10 @@ async def text_to_speech(
166
158
  common: CommonQueryParams,
167
159
  text: str,
168
160
  rate_limiter_per_minute=Depends(
169
- ApiUserRateLimiter(requests=20, subscribed_requests=20, window=60, slug="chat_minute")
161
+ ApiUserRateLimiter(requests=30, subscribed_requests=30, window=60, slug="chat_minute")
170
162
  ),
171
163
  rate_limiter_per_day=Depends(
172
- ApiUserRateLimiter(requests=50, subscribed_requests=300, window=60 * 60 * 24, slug="chat_day")
164
+ ApiUserRateLimiter(requests=100, subscribed_requests=600, window=60 * 60 * 24, slug="chat_day")
173
165
  ),
174
166
  ) -> Response:
175
167
  voice_model = await ConversationAdapters.aget_voice_model_config(request.user.object)
@@ -534,6 +526,19 @@ async def set_conversation_title(
534
526
  )
535
527
 
536
528
 
529
+ @api_chat.delete("/conversation/message", response_class=Response)
530
+ @requires(["authenticated"])
531
+ def delete_message(request: Request, delete_request: DeleteMessageRequestBody) -> Response:
532
+ user = request.user.object
533
+ success = ConversationAdapters.delete_message_by_turn_id(
534
+ user, delete_request.conversation_id, delete_request.turn_id
535
+ )
536
+ if success:
537
+ return Response(content=json.dumps({"status": "ok"}), media_type="application/json", status_code=200)
538
+ else:
539
+ return Response(content=json.dumps({"status": "error", "message": "Message not found"}), status_code=404)
540
+
541
+
537
542
  @api_chat.post("")
538
543
  @requires(["authenticated"])
539
544
  async def chat(
@@ -541,10 +546,10 @@ async def chat(
541
546
  common: CommonQueryParams,
542
547
  body: ChatRequestBody,
543
548
  rate_limiter_per_minute=Depends(
544
- ApiUserRateLimiter(requests=60, subscribed_requests=200, window=60, slug="chat_minute")
549
+ ApiUserRateLimiter(requests=20, subscribed_requests=20, window=60, slug="chat_minute")
545
550
  ),
546
551
  rate_limiter_per_day=Depends(
547
- ApiUserRateLimiter(requests=600, subscribed_requests=6000, window=60 * 60 * 24, slug="chat_day")
552
+ ApiUserRateLimiter(requests=100, subscribed_requests=600, window=60 * 60 * 24, slug="chat_day")
548
553
  ),
549
554
  image_rate_limiter=Depends(ApiImageRateLimiter(max_images=10, max_combined_size_mb=20)),
550
555
  ):
@@ -555,6 +560,7 @@ async def chat(
555
560
  stream = body.stream
556
561
  title = body.title
557
562
  conversation_id = body.conversation_id
563
+ turn_id = str(body.turn_id or uuid.uuid4())
558
564
  city = body.city
559
565
  region = body.region
560
566
  country = body.country or get_country_name_from_timezone(body.timezone)
@@ -574,7 +580,7 @@ async def chat(
574
580
  nonlocal conversation_id
575
581
 
576
582
  tracer: dict = {
577
- "mid": f"{uuid.uuid4()}",
583
+ "mid": turn_id,
578
584
  "cid": conversation_id,
579
585
  "uid": user.id,
580
586
  "khoj_version": state.khoj_version,
@@ -607,7 +613,7 @@ async def chat(
607
613
 
608
614
  if event_type == ChatEvent.MESSAGE:
609
615
  yield data
610
- elif event_type == ChatEvent.REFERENCES or stream:
616
+ elif event_type == ChatEvent.REFERENCES or ChatEvent.METADATA or stream:
611
617
  yield json.dumps({"type": event_type.value, "data": data}, ensure_ascii=False)
612
618
  except asyncio.CancelledError as e:
613
619
  connection_alive = False
@@ -651,6 +657,11 @@ async def chat(
651
657
  metadata=chat_metadata,
652
658
  )
653
659
 
660
+ if is_query_empty(q):
661
+ async for result in send_llm_response("Please ask your query to get started."):
662
+ yield result
663
+ return
664
+
654
665
  conversation_commands = [get_conversation_command(query=q, any_references=True)]
655
666
 
656
667
  conversation = await ConversationAdapters.aget_conversation_by_user(
@@ -666,6 +677,9 @@ async def chat(
666
677
  return
667
678
  conversation_id = conversation.id
668
679
 
680
+ async for event in send_event(ChatEvent.METADATA, {"conversationId": str(conversation_id), "turnId": turn_id}):
681
+ yield event
682
+
669
683
  agent: Agent | None = None
670
684
  default_agent = await AgentAdapters.aget_default_agent()
671
685
  if conversation.agent and conversation.agent != default_agent:
@@ -677,17 +691,11 @@ async def chat(
677
691
  agent = default_agent
678
692
 
679
693
  await is_ready_to_chat(user)
680
-
681
694
  user_name = await aget_user_name(user)
682
695
  location = None
683
696
  if city or region or country or country_code:
684
697
  location = LocationData(city=city, region=region, country=country, country_code=country_code)
685
698
 
686
- if is_query_empty(q):
687
- async for result in send_llm_response("Please ask your query to get started."):
688
- yield result
689
- return
690
-
691
699
  user_message_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
692
700
 
693
701
  meta_log = conversation.conversation_log
@@ -699,7 +707,6 @@ async def chat(
699
707
  ## Extract Document References
700
708
  compiled_references: List[Any] = []
701
709
  inferred_queries: List[Any] = []
702
- defiltered_query = defilter_query(q)
703
710
 
704
711
  if conversation_commands == [ConversationCommand.Default] or is_automated_task:
705
712
  conversation_commands = await aget_relevant_information_sources(
@@ -730,6 +737,12 @@ async def chat(
730
737
  if mode not in conversation_commands:
731
738
  conversation_commands.append(mode)
732
739
 
740
+ for cmd in conversation_commands:
741
+ await conversation_command_rate_limiter.update_and_check_if_valid(request, cmd)
742
+ q = q.replace(f"/{cmd.value}", "").strip()
743
+
744
+ defiltered_query = defilter_query(q)
745
+
733
746
  if conversation_commands == [ConversationCommand.Research]:
734
747
  async for research_result in execute_information_collection(
735
748
  request=request,
khoj/routers/helpers.py CHANGED
@@ -478,6 +478,9 @@ async def infer_webpage_urls(
478
478
  valid_unique_urls = {str(url).strip() for url in urls["links"] if is_valid_url(url)}
479
479
  if is_none_or_empty(valid_unique_urls):
480
480
  raise ValueError(f"Invalid list of urls: {response}")
481
+ if len(valid_unique_urls) == 0:
482
+ logger.error(f"No valid URLs found in response: {response}")
483
+ return []
481
484
  return list(valid_unique_urls)
482
485
  except Exception:
483
486
  raise ValueError(f"Invalid list of urls: {response}")
@@ -1255,6 +1258,7 @@ class ChatRequestBody(BaseModel):
1255
1258
  stream: Optional[bool] = False
1256
1259
  title: Optional[str] = None
1257
1260
  conversation_id: Optional[str] = None
1261
+ turn_id: Optional[str] = None
1258
1262
  city: Optional[str] = None
1259
1263
  region: Optional[str] = None
1260
1264
  country: Optional[str] = None
@@ -1264,6 +1268,17 @@ class ChatRequestBody(BaseModel):
1264
1268
  create_new: Optional[bool] = False
1265
1269
 
1266
1270
 
1271
+ class DeleteMessageRequestBody(BaseModel):
1272
+ conversation_id: str
1273
+ turn_id: str
1274
+
1275
+
1276
+ class FeedbackData(BaseModel):
1277
+ uquery: str
1278
+ kquery: str
1279
+ sentiment: str
1280
+
1281
+
1267
1282
  class ApiUserRateLimiter:
1268
1283
  def __init__(self, requests: int, subscribed_requests: int, window: int, slug: str):
1269
1284
  self.requests = requests
@@ -1366,7 +1381,7 @@ class ConversationCommandRateLimiter:
1366
1381
  self.slug = slug
1367
1382
  self.trial_rate_limit = trial_rate_limit
1368
1383
  self.subscribed_rate_limit = subscribed_rate_limit
1369
- self.restricted_commands = [ConversationCommand.Online, ConversationCommand.Image]
1384
+ self.restricted_commands = [ConversationCommand.Research]
1370
1385
 
1371
1386
  async def update_and_check_if_valid(self, request: Request, conversation_command: ConversationCommand):
1372
1387
  if state.billing_enabled is False:
khoj/routers/research.py CHANGED
@@ -1,12 +1,11 @@
1
1
  import json
2
2
  import logging
3
3
  from datetime import datetime
4
- from typing import Any, Callable, Dict, List, Optional
4
+ from typing import Callable, Dict, List, Optional
5
5
 
6
6
  import yaml
7
7
  from fastapi import Request
8
8
 
9
- from khoj.database.adapters import ConversationAdapters, EntryAdapters
10
9
  from khoj.database.models import Agent, KhojUser
11
10
  from khoj.processor.conversation import prompts
12
11
  from khoj.processor.conversation.utils import (
@@ -191,18 +190,18 @@ async def execute_information_collection(
191
190
  document_results = result[0]
192
191
  this_iteration.context += document_results
193
192
 
194
- if not is_none_or_empty(document_results):
195
- try:
196
- distinct_files = {d["file"] for d in document_results}
197
- distinct_headings = set([d["compiled"].split("\n")[0] for d in document_results if "compiled" in d])
198
- # Strip only leading # from headings
199
- headings_str = "\n- " + "\n- ".join(distinct_headings).replace("#", "")
200
- async for result in send_status_func(
201
- f"**Found {len(distinct_headings)} Notes Across {len(distinct_files)} Files**: {headings_str}"
202
- ):
203
- yield result
204
- except Exception as e:
205
- logger.error(f"Error extracting document references: {e}", exc_info=True)
193
+ if not is_none_or_empty(document_results):
194
+ try:
195
+ distinct_files = {d["file"] for d in document_results}
196
+ distinct_headings = set([d["compiled"].split("\n")[0] for d in document_results if "compiled" in d])
197
+ # Strip only leading # from headings
198
+ headings_str = "\n- " + "\n- ".join(distinct_headings).replace("#", "")
199
+ async for result in send_status_func(
200
+ f"**Found {len(distinct_headings)} Notes Across {len(distinct_files)} Files**: {headings_str}"
201
+ ):
202
+ yield result
203
+ except Exception as e:
204
+ logger.error(f"Error extracting document references: {e}", exc_info=True)
206
205
 
207
206
  elif this_iteration.tool == ConversationCommand.Online:
208
207
  async for result in search_online(
@@ -306,13 +305,13 @@ async def execute_information_collection(
306
305
  if document_results or online_results or code_results or summarize_files:
307
306
  results_data = f"**Results**:\n"
308
307
  if document_results:
309
- results_data += f"**Document References**: {yaml.dump(document_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
308
+ results_data += f"**Document References**:\n{yaml.dump(document_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
310
309
  if online_results:
311
- results_data += f"**Online Results**: {yaml.dump(online_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
310
+ results_data += f"**Online Results**:\n{yaml.dump(online_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
312
311
  if code_results:
313
- results_data += f"**Code Results**: {yaml.dump(code_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
312
+ results_data += f"**Code Results**:\n{yaml.dump(code_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
314
313
  if summarize_files:
315
- results_data += f"**Summarized Files**: {yaml.dump(summarize_files, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
314
+ results_data += f"**Summarized Files**:\n{yaml.dump(summarize_files, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
316
315
 
317
316
  # intermediate_result = await extract_relevant_info(this_iteration.query, results_data, agent)
318
317
  this_iteration.summarizedResult = results_data
khoj/utils/helpers.py CHANGED
@@ -101,6 +101,15 @@ def merge_dicts(priority_dict: dict, default_dict: dict):
101
101
  return merged_dict
102
102
 
103
103
 
104
+ def fix_json_dict(json_dict: dict) -> dict:
105
+ for k, v in json_dict.items():
106
+ if v == "True" or v == "False":
107
+ json_dict[k] = v == "True"
108
+ if isinstance(v, dict):
109
+ json_dict[k] = fix_json_dict(v)
110
+ return json_dict
111
+
112
+
104
113
  def get_file_type(file_type: str, file_content: bytes) -> tuple[str, str]:
105
114
  "Get file type from file mime type"
106
115
 
@@ -359,9 +368,9 @@ tool_descriptions_for_llm = {
359
368
 
360
369
  function_calling_description_for_llm = {
361
370
  ConversationCommand.Notes: "To search the user's personal knowledge base. Especially helpful if the question expects context from the user's notes or documents.",
362
- ConversationCommand.Online: "To search the internet for information. Provide all relevant context to ensure new searches, not previously run, are performed.",
363
- ConversationCommand.Webpage: "To extract information from a webpage. Useful for more detailed research from the internet. Usually used when you know the webpage links to refer to. Share the webpage link and information to extract in your query.",
364
- ConversationCommand.Code: "To run Python code in a Pyodide sandbox with no network access. Helpful when need to parse information, run complex calculations, create documents and charts for user. Matplotlib, bs4, pandas, numpy, etc. are available.",
371
+ ConversationCommand.Online: "To search the internet for information. Useful to get a quick, broad overview from the internet. Provide all relevant context to ensure new searches, not in previous iterations, are performed.",
372
+ ConversationCommand.Webpage: "To extract information from webpages. Useful for more detailed research from the internet. Usually used when you know the webpage links to refer to. Share the webpage links and information to extract in your query.",
373
+ ConversationCommand.Code: "To run Python code in a Pyodide sandbox with no network access. Helpful when need to parse information, run complex calculations, create charts for user. Matplotlib, bs4, pandas, numpy, etc. are available.",
365
374
  }
366
375
 
367
376
  mode_descriptions_for_llm = {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: khoj
3
- Version: 1.27.2.dev130
3
+ Version: 1.27.2.dev167
4
4
  Summary: Your Second Brain
5
5
  Project-URL: Homepage, https://khoj.dev
6
6
  Project-URL: Documentation, https://docs.khoj.dev
@@ -36,7 +36,7 @@ Requires-Dist: django==5.0.9
36
36
  Requires-Dist: docx2txt==0.8
37
37
  Requires-Dist: einops==0.8.0
38
38
  Requires-Dist: fastapi>=0.110.0
39
- Requires-Dist: google-generativeai==0.7.2
39
+ Requires-Dist: google-generativeai==0.8.3
40
40
  Requires-Dist: httpx==0.25.0
41
41
  Requires-Dist: huggingface-hub>=0.22.2
42
42
  Requires-Dist: itsdangerous==2.1.2