khoj 1.42.9.dev26__py3-none-any.whl → 1.42.10.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. khoj/database/adapters/__init__.py +0 -20
  2. khoj/database/models/__init__.py +0 -1
  3. khoj/interface/compiled/404/index.html +2 -2
  4. khoj/interface/compiled/_next/static/chunks/app/chat/page-4c6b873a4a5c7d2f.js +1 -0
  5. khoj/interface/compiled/agents/index.html +2 -2
  6. khoj/interface/compiled/agents/index.txt +2 -2
  7. khoj/interface/compiled/automations/index.html +2 -2
  8. khoj/interface/compiled/automations/index.txt +3 -3
  9. khoj/interface/compiled/chat/index.html +2 -2
  10. khoj/interface/compiled/chat/index.txt +2 -2
  11. khoj/interface/compiled/index.html +2 -2
  12. khoj/interface/compiled/index.txt +2 -2
  13. khoj/interface/compiled/search/index.html +2 -2
  14. khoj/interface/compiled/search/index.txt +2 -2
  15. khoj/interface/compiled/settings/index.html +2 -2
  16. khoj/interface/compiled/settings/index.txt +4 -4
  17. khoj/interface/compiled/share/chat/index.html +2 -2
  18. khoj/interface/compiled/share/chat/index.txt +2 -2
  19. khoj/processor/content/markdown/markdown_to_entries.py +9 -38
  20. khoj/processor/content/org_mode/org_to_entries.py +2 -18
  21. khoj/processor/content/org_mode/orgnode.py +16 -18
  22. khoj/processor/content/text_to_entries.py +0 -30
  23. khoj/processor/conversation/anthropic/anthropic_chat.py +2 -11
  24. khoj/processor/conversation/anthropic/utils.py +103 -90
  25. khoj/processor/conversation/google/gemini_chat.py +1 -4
  26. khoj/processor/conversation/google/utils.py +18 -80
  27. khoj/processor/conversation/offline/chat_model.py +3 -3
  28. khoj/processor/conversation/openai/gpt.py +38 -13
  29. khoj/processor/conversation/openai/utils.py +12 -113
  30. khoj/processor/conversation/prompts.py +35 -17
  31. khoj/processor/conversation/utils.py +58 -129
  32. khoj/processor/operator/grounding_agent.py +1 -1
  33. khoj/processor/operator/operator_agent_binary.py +3 -4
  34. khoj/processor/tools/online_search.py +0 -18
  35. khoj/processor/tools/run_code.py +1 -1
  36. khoj/routers/api_chat.py +1 -1
  37. khoj/routers/api_content.py +6 -6
  38. khoj/routers/helpers.py +27 -297
  39. khoj/routers/research.py +155 -169
  40. khoj/search_type/text_search.py +0 -2
  41. khoj/utils/helpers.py +8 -284
  42. khoj/utils/initialization.py +2 -0
  43. khoj/utils/rawconfig.py +0 -11
  44. {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/METADATA +1 -1
  45. {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/RECORD +57 -57
  46. khoj/interface/compiled/_next/static/chunks/app/chat/page-76fc915800aa90f4.js +0 -1
  47. /khoj/interface/compiled/_next/static/chunks/{1327-3b1a41af530fa8ee.js → 1327-1a9107b9a2a04a98.js} +0 -0
  48. /khoj/interface/compiled/_next/static/chunks/{1915-fbfe167c84ad60c5.js → 1915-5c6508f6ebb62a30.js} +0 -0
  49. /khoj/interface/compiled/_next/static/chunks/{2117-e78b6902ad6f75ec.js → 2117-080746c8e170c81a.js} +0 -0
  50. /khoj/interface/compiled/_next/static/chunks/{2939-4d4084c5b888b960.js → 2939-4af3fd24b8ffc9ad.js} +0 -0
  51. /khoj/interface/compiled/_next/static/chunks/{4447-d6cf93724d57e34b.js → 4447-cd95608f8e93e711.js} +0 -0
  52. /khoj/interface/compiled/_next/static/chunks/{8667-4b7790573b08c50d.js → 8667-50b03a89e82e0ba7.js} +0 -0
  53. /khoj/interface/compiled/_next/static/chunks/{webpack-70e0762712341826.js → webpack-92ce8aaf95718ec4.js} +0 -0
  54. /khoj/interface/compiled/_next/static/{IYGyer2N7GdUJ7QHFghtY → cuzJcS32_a4L4a6gCZ63y}/_buildManifest.js +0 -0
  55. /khoj/interface/compiled/_next/static/{IYGyer2N7GdUJ7QHFghtY → cuzJcS32_a4L4a6gCZ63y}/_ssgManifest.js +0 -0
  56. {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/WHEEL +0 -0
  57. {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/entry_points.txt +0 -0
  58. {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/licenses/LICENSE +0 -0
khoj/routers/helpers.py CHANGED
@@ -1,7 +1,6 @@
1
1
  import asyncio
2
2
  import base64
3
3
  import concurrent.futures
4
- import fnmatch
5
4
  import hashlib
6
5
  import json
7
6
  import logging
@@ -121,7 +120,6 @@ from khoj.utils.config import OfflineChatProcessorModel
121
120
  from khoj.utils.helpers import (
122
121
  LRU,
123
122
  ConversationCommand,
124
- ToolDefinition,
125
123
  get_file_type,
126
124
  in_debug_mode,
127
125
  is_none_or_empty,
@@ -305,7 +303,7 @@ async def acreate_title_from_history(
305
303
  with timer("Chat actor: Generate title from conversation history", logger):
306
304
  response = await send_message_to_model_wrapper(title_generation_prompt, user=user)
307
305
 
308
- return response.text.strip()
306
+ return response.strip()
309
307
 
310
308
 
311
309
  async def acreate_title_from_query(query: str, user: KhojUser = None) -> str:
@@ -317,7 +315,7 @@ async def acreate_title_from_query(query: str, user: KhojUser = None) -> str:
317
315
  with timer("Chat actor: Generate title from query", logger):
318
316
  response = await send_message_to_model_wrapper(title_generation_prompt, user=user)
319
317
 
320
- return response.text.strip()
318
+ return response.strip()
321
319
 
322
320
 
323
321
  async def acheck_if_safe_prompt(system_prompt: str, user: KhojUser = None, lax: bool = False) -> Tuple[bool, str]:
@@ -341,7 +339,7 @@ async def acheck_if_safe_prompt(system_prompt: str, user: KhojUser = None, lax:
341
339
  safe_prompt_check, user=user, response_type="json_object", response_schema=SafetyCheck
342
340
  )
343
341
 
344
- response = response.text.strip()
342
+ response = response.strip()
345
343
  try:
346
344
  response = json.loads(clean_json(response))
347
345
  is_safe = str(response.get("safe", "true")).lower() == "true"
@@ -420,7 +418,7 @@ async def aget_data_sources_and_output_format(
420
418
  output: str
421
419
 
422
420
  with timer("Chat actor: Infer information sources to refer", logger):
423
- raw_response = await send_message_to_model_wrapper(
421
+ response = await send_message_to_model_wrapper(
424
422
  relevant_tools_prompt,
425
423
  response_type="json_object",
426
424
  response_schema=PickTools,
@@ -431,7 +429,7 @@ async def aget_data_sources_and_output_format(
431
429
  )
432
430
 
433
431
  try:
434
- response = clean_json(raw_response.text)
432
+ response = clean_json(response)
435
433
  response = json.loads(response)
436
434
 
437
435
  chosen_sources = [s.strip() for s in response.get("source", []) if s.strip()]
@@ -508,7 +506,7 @@ async def infer_webpage_urls(
508
506
  links: List[str] = Field(..., min_items=1, max_items=max_webpages)
509
507
 
510
508
  with timer("Chat actor: Infer webpage urls to read", logger):
511
- raw_response = await send_message_to_model_wrapper(
509
+ response = await send_message_to_model_wrapper(
512
510
  online_queries_prompt,
513
511
  query_images=query_images,
514
512
  response_type="json_object",
@@ -521,7 +519,7 @@ async def infer_webpage_urls(
521
519
 
522
520
  # Validate that the response is a non-empty, JSON-serializable list of URLs
523
521
  try:
524
- response = clean_json(raw_response.text)
522
+ response = clean_json(response)
525
523
  urls = json.loads(response)
526
524
  valid_unique_urls = {str(url).strip() for url in urls["links"] if is_valid_url(url)}
527
525
  if is_none_or_empty(valid_unique_urls):
@@ -573,7 +571,7 @@ async def generate_online_subqueries(
573
571
  queries: List[str] = Field(..., min_items=1, max_items=max_queries)
574
572
 
575
573
  with timer("Chat actor: Generate online search subqueries", logger):
576
- raw_response = await send_message_to_model_wrapper(
574
+ response = await send_message_to_model_wrapper(
577
575
  online_queries_prompt,
578
576
  query_images=query_images,
579
577
  response_type="json_object",
@@ -586,7 +584,7 @@ async def generate_online_subqueries(
586
584
 
587
585
  # Validate that the response is a non-empty, JSON-serializable list
588
586
  try:
589
- response = clean_json(raw_response.text)
587
+ response = clean_json(response)
590
588
  response = pyjson5.loads(response)
591
589
  response = {q.strip() for q in response["queries"] if q.strip()}
592
590
  if not isinstance(response, set) or not response or len(response) == 0:
@@ -647,7 +645,7 @@ async def aschedule_query(
647
645
 
648
646
  # Validate that the response is a non-empty, JSON-serializable list
649
647
  try:
650
- raw_response = raw_response.text.strip()
648
+ raw_response = raw_response.strip()
651
649
  response: Dict[str, str] = json.loads(clean_json(raw_response))
652
650
  if not response or not isinstance(response, Dict) or len(response) != 3:
653
651
  raise AssertionError(f"Invalid response for scheduling query : {response}")
@@ -685,7 +683,7 @@ async def extract_relevant_info(
685
683
  agent_chat_model=agent_chat_model,
686
684
  tracer=tracer,
687
685
  )
688
- return response.text.strip()
686
+ return response.strip()
689
687
 
690
688
 
691
689
  async def extract_relevant_summary(
@@ -728,7 +726,7 @@ async def extract_relevant_summary(
728
726
  agent_chat_model=agent_chat_model,
729
727
  tracer=tracer,
730
728
  )
731
- return response.text.strip()
729
+ return response.strip()
732
730
 
733
731
 
734
732
  async def generate_summary_from_files(
@@ -899,7 +897,7 @@ async def generate_better_diagram_description(
899
897
  agent_chat_model=agent_chat_model,
900
898
  tracer=tracer,
901
899
  )
902
- response = response.text.strip()
900
+ response = response.strip()
903
901
  if response.startswith(('"', "'")) and response.endswith(('"', "'")):
904
902
  response = response[1:-1]
905
903
 
@@ -927,10 +925,10 @@ async def generate_excalidraw_diagram_from_description(
927
925
  raw_response = await send_message_to_model_wrapper(
928
926
  query=excalidraw_diagram_generation, user=user, agent_chat_model=agent_chat_model, tracer=tracer
929
927
  )
930
- raw_response_text = clean_json(raw_response.text)
928
+ raw_response = clean_json(raw_response)
931
929
  try:
932
930
  # Expect response to have `elements` and `scratchpad` keys
933
- response: Dict[str, str] = json.loads(raw_response_text)
931
+ response: Dict[str, str] = json.loads(raw_response)
934
932
  if (
935
933
  not response
936
934
  or not isinstance(response, Dict)
@@ -939,7 +937,7 @@ async def generate_excalidraw_diagram_from_description(
939
937
  ):
940
938
  raise AssertionError(f"Invalid response for generating Excalidraw diagram: {response}")
941
939
  except Exception:
942
- raise AssertionError(f"Invalid response for generating Excalidraw diagram: {raw_response_text}")
940
+ raise AssertionError(f"Invalid response for generating Excalidraw diagram: {raw_response}")
943
941
  if not response or not isinstance(response["elements"], List) or not isinstance(response["elements"][0], Dict):
944
942
  # TODO Some additional validation here that it's a valid Excalidraw diagram
945
943
  raise AssertionError(f"Invalid response for improving diagram description: {response}")
@@ -1050,11 +1048,11 @@ async def generate_better_mermaidjs_diagram_description(
1050
1048
  agent_chat_model=agent_chat_model,
1051
1049
  tracer=tracer,
1052
1050
  )
1053
- response_text = response.text.strip()
1054
- if response_text.startswith(('"', "'")) and response_text.endswith(('"', "'")):
1055
- response_text = response_text[1:-1]
1051
+ response = response.strip()
1052
+ if response.startswith(('"', "'")) and response.endswith(('"', "'")):
1053
+ response = response[1:-1]
1056
1054
 
1057
- return response_text
1055
+ return response
1058
1056
 
1059
1057
 
1060
1058
  async def generate_mermaidjs_diagram_from_description(
@@ -1078,7 +1076,7 @@ async def generate_mermaidjs_diagram_from_description(
1078
1076
  raw_response = await send_message_to_model_wrapper(
1079
1077
  query=mermaidjs_diagram_generation, user=user, agent_chat_model=agent_chat_model, tracer=tracer
1080
1078
  )
1081
- return clean_mermaidjs(raw_response.text.strip())
1079
+ return clean_mermaidjs(raw_response.strip())
1082
1080
 
1083
1081
 
1084
1082
  async def generate_better_image_prompt(
@@ -1153,11 +1151,11 @@ async def generate_better_image_prompt(
1153
1151
  agent_chat_model=agent_chat_model,
1154
1152
  tracer=tracer,
1155
1153
  )
1156
- response_text = response.text.strip()
1157
- if response_text.startswith(('"', "'")) and response_text.endswith(('"', "'")):
1158
- response_text = response_text[1:-1]
1154
+ response = response.strip()
1155
+ if response.startswith(('"', "'")) and response.endswith(('"', "'")):
1156
+ response = response[1:-1]
1159
1157
 
1160
- return response_text
1158
+ return response
1161
1159
 
1162
1160
 
1163
1161
  async def search_documents(
@@ -1263,9 +1261,8 @@ async def search_documents(
1263
1261
  compiled_references = [
1264
1262
  {
1265
1263
  "query": item.additional["query"],
1266
- "compiled": item["entry"],
1264
+ "compiled": item.additional["compiled"],
1267
1265
  "file": item.additional["file"],
1268
- "uri": item.additional["uri"],
1269
1266
  }
1270
1267
  for item in search_results
1271
1268
  ]
@@ -1332,7 +1329,7 @@ async def extract_questions(
1332
1329
 
1333
1330
  # Extract questions from the response
1334
1331
  try:
1335
- response = clean_json(raw_response.text)
1332
+ response = clean_json(raw_response)
1336
1333
  response = pyjson5.loads(response)
1337
1334
  queries = [q.strip() for q in response["queries"] if q.strip()]
1338
1335
  if not isinstance(queries, list) or not queries:
@@ -1442,7 +1439,6 @@ async def send_message_to_model_wrapper(
1442
1439
  system_message: str = "",
1443
1440
  response_type: str = "text",
1444
1441
  response_schema: BaseModel = None,
1445
- tools: List[ToolDefinition] = None,
1446
1442
  deepthought: bool = False,
1447
1443
  user: KhojUser = None,
1448
1444
  query_images: List[str] = None,
@@ -1510,7 +1506,6 @@ async def send_message_to_model_wrapper(
1510
1506
  model=chat_model_name,
1511
1507
  response_type=response_type,
1512
1508
  response_schema=response_schema,
1513
- tools=tools,
1514
1509
  deepthought=deepthought,
1515
1510
  api_base_url=api_base_url,
1516
1511
  tracer=tracer,
@@ -1522,7 +1517,6 @@ async def send_message_to_model_wrapper(
1522
1517
  model=chat_model_name,
1523
1518
  response_type=response_type,
1524
1519
  response_schema=response_schema,
1525
- tools=tools,
1526
1520
  deepthought=deepthought,
1527
1521
  api_base_url=api_base_url,
1528
1522
  tracer=tracer,
@@ -1534,7 +1528,6 @@ async def send_message_to_model_wrapper(
1534
1528
  model=chat_model_name,
1535
1529
  response_type=response_type,
1536
1530
  response_schema=response_schema,
1537
- tools=tools,
1538
1531
  deepthought=deepthought,
1539
1532
  api_base_url=api_base_url,
1540
1533
  tracer=tracer,
@@ -2803,266 +2796,3 @@ def get_notion_auth_url(user: KhojUser):
2803
2796
  if not NOTION_OAUTH_CLIENT_ID or not NOTION_OAUTH_CLIENT_SECRET or not NOTION_REDIRECT_URI:
2804
2797
  return None
2805
2798
  return f"https://api.notion.com/v1/oauth/authorize?client_id={NOTION_OAUTH_CLIENT_ID}&redirect_uri={NOTION_REDIRECT_URI}&response_type=code&state={user.uuid}"
2806
-
2807
-
2808
- async def view_file_content(
2809
- path: str,
2810
- start_line: Optional[int] = None,
2811
- end_line: Optional[int] = None,
2812
- user: KhojUser = None,
2813
- ):
2814
- """
2815
- View the contents of a file from the user's document database with optional line range specification.
2816
- """
2817
- query = f"View file: {path}"
2818
- if start_line and end_line:
2819
- query += f" (lines {start_line}-{end_line})"
2820
-
2821
- try:
2822
- # Get the file object from the database by name
2823
- file_objects = await FileObjectAdapters.aget_file_objects_by_name(user, path)
2824
-
2825
- if not file_objects:
2826
- error_msg = f"File '{path}' not found in user documents"
2827
- logger.warning(error_msg)
2828
- yield [{"query": query, "file": path, "compiled": error_msg}]
2829
- return
2830
-
2831
- # Use the first file object if multiple exist
2832
- file_object = file_objects[0]
2833
- raw_text = file_object.raw_text
2834
-
2835
- # Apply line range filtering if specified
2836
- if start_line is None and end_line is None:
2837
- filtered_text = raw_text
2838
- else:
2839
- lines = raw_text.split("\n")
2840
- start_line = start_line or 1
2841
- end_line = end_line or len(lines)
2842
-
2843
- # Validate line range
2844
- if start_line < 1 or end_line < 1 or start_line > end_line:
2845
- error_msg = f"Invalid line range: {start_line}-{end_line}"
2846
- logger.warning(error_msg)
2847
- yield [{"query": query, "file": path, "compiled": error_msg}]
2848
- return
2849
- if start_line > len(lines):
2850
- error_msg = f"Start line {start_line} exceeds total number of lines {len(lines)}"
2851
- logger.warning(error_msg)
2852
- yield [{"query": query, "file": path, "compiled": error_msg}]
2853
- return
2854
-
2855
- # Convert from 1-based to 0-based indexing and ensure bounds
2856
- start_idx = max(0, start_line - 1)
2857
- end_idx = min(len(lines), end_line)
2858
-
2859
- selected_lines = lines[start_idx:end_idx]
2860
- filtered_text = "\n".join(selected_lines)
2861
-
2862
- # Truncate the text if it's too long
2863
- if len(filtered_text) > 10000:
2864
- filtered_text = filtered_text[:10000] + "\n\n[Truncated. Use line numbers to view specific sections.]"
2865
-
2866
- # Format the result as a document reference
2867
- document_results = [
2868
- {
2869
- "query": query,
2870
- "file": path,
2871
- "uri": path,
2872
- "compiled": filtered_text,
2873
- }
2874
- ]
2875
-
2876
- yield document_results
2877
-
2878
- except Exception as e:
2879
- error_msg = f"Error viewing file {path}: {str(e)}"
2880
- logger.error(error_msg, exc_info=True)
2881
-
2882
- # Return an error result in the expected format
2883
- yield [{"query": query, "file": path, "uri": path, "compiled": error_msg}]
2884
-
2885
-
2886
- async def grep_files(
2887
- regex_pattern: str,
2888
- path_prefix: Optional[str] = None,
2889
- lines_before: Optional[int] = None,
2890
- lines_after: Optional[int] = None,
2891
- user: KhojUser = None,
2892
- ):
2893
- """
2894
- Search for a regex pattern in files with an optional path prefix and context lines.
2895
- """
2896
-
2897
- # Construct the query string based on provided parameters
2898
- def _generate_query(line_count, doc_count, path, pattern, lines_before, lines_after, max_results=1000):
2899
- query = f"**Found {line_count} matches for '{pattern}' in {doc_count} documents**"
2900
- if path:
2901
- query += f" in {path}"
2902
- if lines_before or lines_after or line_count > max_results:
2903
- query += " Showing"
2904
- if lines_before or lines_after:
2905
- context_info = []
2906
- if lines_before:
2907
- context_info.append(f"{lines_before} lines before")
2908
- if lines_after:
2909
- context_info.append(f"{lines_after} lines after")
2910
- query += f" {' and '.join(context_info)}"
2911
- if line_count > max_results:
2912
- if lines_before or lines_after:
2913
- query += f" for"
2914
- query += f" first {max_results} results"
2915
- return query
2916
-
2917
- # Validate regex pattern
2918
- path_prefix = path_prefix or ""
2919
- lines_before = lines_before or 0
2920
- lines_after = lines_after or 0
2921
-
2922
- try:
2923
- regex = re.compile(regex_pattern, re.IGNORECASE)
2924
- except re.error as e:
2925
- yield {
2926
- "query": _generate_query(0, 0, path_prefix, regex_pattern, lines_before, lines_after),
2927
- "file": path_prefix,
2928
- "compiled": f"Invalid regex pattern: {e}",
2929
- }
2930
- return
2931
-
2932
- try:
2933
- file_matches = await FileObjectAdapters.aget_file_objects_by_regex(user, regex_pattern, path_prefix)
2934
-
2935
- line_matches = []
2936
- for file_object in file_matches:
2937
- lines = file_object.raw_text.split("\n")
2938
- matched_line_numbers = []
2939
-
2940
- # Find all matching line numbers first
2941
- for i, line in enumerate(lines, 1):
2942
- if regex.search(line):
2943
- matched_line_numbers.append(i)
2944
-
2945
- # Build context for each match
2946
- for line_num in matched_line_numbers:
2947
- context_lines = []
2948
-
2949
- # Calculate start and end indices for context (0-based)
2950
- start_idx = max(0, line_num - 1 - lines_before)
2951
- end_idx = min(len(lines), line_num + lines_after)
2952
-
2953
- # Add context lines with line numbers
2954
- for idx in range(start_idx, end_idx):
2955
- current_line_num = idx + 1
2956
- line_content = lines[idx]
2957
-
2958
- if current_line_num == line_num:
2959
- # This is the matching line, mark it
2960
- context_lines.append(f"{file_object.file_name}:{current_line_num}:> {line_content}")
2961
- else:
2962
- # This is a context line
2963
- context_lines.append(f"{file_object.file_name}:{current_line_num}: {line_content}")
2964
-
2965
- # Add separator between matches if showing context
2966
- if lines_before > 0 or lines_after > 0:
2967
- context_lines.append("--")
2968
-
2969
- line_matches.extend(context_lines)
2970
-
2971
- # Remove the last separator if it exists
2972
- if line_matches and line_matches[-1] == "--":
2973
- line_matches.pop()
2974
-
2975
- # Check if no results found
2976
- max_results = 1000
2977
- query = _generate_query(
2978
- len([m for m in line_matches if ":>" in m]),
2979
- len(file_matches),
2980
- path_prefix,
2981
- regex_pattern,
2982
- lines_before,
2983
- lines_after,
2984
- max_results,
2985
- )
2986
- if not line_matches:
2987
- yield {"query": query, "file": path_prefix, "uri": path_prefix, "compiled": "No matches found."}
2988
- return
2989
-
2990
- # Truncate matched lines list if too long
2991
- if len(line_matches) > max_results:
2992
- line_matches = line_matches[:max_results] + [
2993
- f"... {len(line_matches) - max_results} more results found. Use stricter regex or path to narrow down results."
2994
- ]
2995
-
2996
- yield {"query": query, "file": path_prefix, "uri": path_prefix, "compiled": "\n".join(line_matches)}
2997
-
2998
- except Exception as e:
2999
- error_msg = f"Error using grep files tool: {str(e)}"
3000
- logger.error(error_msg, exc_info=True)
3001
- yield [
3002
- {
3003
- "query": _generate_query(0, 0, path_prefix or "", regex_pattern, lines_before, lines_after),
3004
- "file": path_prefix,
3005
- "uri": path_prefix,
3006
- "compiled": error_msg,
3007
- }
3008
- ]
3009
-
3010
-
3011
- async def list_files(
3012
- path: Optional[str] = None,
3013
- pattern: Optional[str] = None,
3014
- user: KhojUser = None,
3015
- ):
3016
- """
3017
- List files under a given path or glob pattern from the user's document database.
3018
- """
3019
-
3020
- # Construct the query string based on provided parameters
3021
- def _generate_query(doc_count, path, pattern):
3022
- query = f"**Found {doc_count} files**"
3023
- if path:
3024
- query += f" in {path}"
3025
- if pattern:
3026
- query += f" filtered by {pattern}"
3027
- return query
3028
-
3029
- try:
3030
- # Get user files by path prefix when specified
3031
- path = path or ""
3032
- if path in ["", "/", ".", "./", "~", "~/"]:
3033
- file_objects = await FileObjectAdapters.aget_all_file_objects(user, limit=10000)
3034
- else:
3035
- file_objects = await FileObjectAdapters.aget_file_objects_by_path_prefix(user, path)
3036
-
3037
- if not file_objects:
3038
- yield {"query": _generate_query(0, path, pattern), "file": path, "uri": path, "compiled": "No files found."}
3039
- return
3040
-
3041
- # Extract file names from file objects
3042
- files = [f.file_name for f in file_objects]
3043
- # Convert to relative file path (similar to ls)
3044
- if path:
3045
- files = [f[len(path) :] for f in files]
3046
-
3047
- # Apply glob pattern filtering if specified
3048
- if pattern:
3049
- files = [f for f in files if fnmatch.fnmatch(f, pattern)]
3050
-
3051
- query = _generate_query(len(files), path, pattern)
3052
- if not files:
3053
- yield {"query": query, "file": path, "uri": path, "compiled": "No files found."}
3054
- return
3055
-
3056
- # Truncate the list if it's too long
3057
- max_files = 100
3058
- if len(files) > max_files:
3059
- files = files[:max_files] + [
3060
- f"... {len(files) - max_files} more files found. Use glob pattern to narrow down results."
3061
- ]
3062
-
3063
- yield {"query": query, "file": path, "uri": path, "compiled": "\n- ".join(files)}
3064
-
3065
- except Exception as e:
3066
- error_msg = f"Error listing files in {path}: {str(e)}"
3067
- logger.error(error_msg, exc_info=True)
3068
- yield {"query": query, "file": path, "uri": path, "compiled": error_msg}