khoj 1.27.2.dev29__py3-none-any.whl → 1.27.2.dev130__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. khoj/database/adapters/__init__.py +34 -10
  2. khoj/interface/compiled/404/index.html +1 -1
  3. khoj/interface/compiled/_next/static/chunks/1034-da58b679fcbb79c1.js +1 -0
  4. khoj/interface/compiled/_next/static/chunks/1467-5a191c1cd5bf0b83.js +1 -0
  5. khoj/interface/compiled/_next/static/chunks/1603-5d70d9dfcdcb1f10.js +1 -0
  6. khoj/interface/compiled/_next/static/chunks/3423-fa918f4e5365a35e.js +1 -0
  7. khoj/interface/compiled/_next/static/chunks/8423-3ad0bfb299801220.js +1 -0
  8. khoj/interface/compiled/_next/static/chunks/app/agents/{page-5ae1e540bb5be8a9.js → page-2beaba7c9bb750bd.js} +1 -1
  9. khoj/interface/compiled/_next/static/chunks/app/automations/{page-774ae3e033f938cd.js → page-9b5c77e0b0dd772c.js} +1 -1
  10. khoj/interface/compiled/_next/static/chunks/app/chat/page-7dc98df9c88828f0.js +1 -0
  11. khoj/interface/compiled/_next/static/chunks/app/factchecker/page-d887f55fe6d4f35d.js +1 -0
  12. khoj/interface/compiled/_next/static/chunks/app/{page-4dc472cf6d674004.js → page-d46244282af16509.js} +1 -1
  13. khoj/interface/compiled/_next/static/chunks/app/search/{page-9b64f61caa5bd7f9.js → page-ab2995529ece3140.js} +1 -1
  14. khoj/interface/compiled/_next/static/chunks/app/settings/{page-7a8c382af2a7e870.js → page-89e6737b2cc9fb3a.js} +1 -1
  15. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-eb9e282691858f2e.js → page-505b07bce608b34e.js} +1 -1
  16. khoj/interface/compiled/_next/static/chunks/{webpack-2b720658ccc746f2.js → webpack-8ae5ce45161bd98e.js} +1 -1
  17. khoj/interface/compiled/_next/static/css/{2272c73fc7a3b571.css → 26c1c33d0423a7d8.css} +1 -1
  18. khoj/interface/compiled/_next/static/css/592ca99f5122e75a.css +1 -0
  19. khoj/interface/compiled/_next/static/css/b70402177a7c3207.css +1 -0
  20. khoj/interface/compiled/_next/static/css/e9c5fe555dd3050b.css +25 -0
  21. khoj/interface/compiled/agents/index.html +1 -1
  22. khoj/interface/compiled/agents/index.txt +2 -2
  23. khoj/interface/compiled/automations/index.html +1 -1
  24. khoj/interface/compiled/automations/index.txt +2 -2
  25. khoj/interface/compiled/chat/index.html +1 -1
  26. khoj/interface/compiled/chat/index.txt +2 -2
  27. khoj/interface/compiled/factchecker/index.html +1 -1
  28. khoj/interface/compiled/factchecker/index.txt +2 -2
  29. khoj/interface/compiled/index.html +1 -1
  30. khoj/interface/compiled/index.txt +2 -2
  31. khoj/interface/compiled/search/index.html +1 -1
  32. khoj/interface/compiled/search/index.txt +2 -2
  33. khoj/interface/compiled/settings/index.html +1 -1
  34. khoj/interface/compiled/settings/index.txt +2 -2
  35. khoj/interface/compiled/share/chat/index.html +1 -1
  36. khoj/interface/compiled/share/chat/index.txt +2 -2
  37. khoj/processor/conversation/anthropic/anthropic_chat.py +14 -10
  38. khoj/processor/conversation/anthropic/utils.py +13 -2
  39. khoj/processor/conversation/google/gemini_chat.py +15 -11
  40. khoj/processor/conversation/offline/chat_model.py +10 -9
  41. khoj/processor/conversation/openai/gpt.py +11 -8
  42. khoj/processor/conversation/prompts.py +131 -22
  43. khoj/processor/conversation/utils.py +132 -6
  44. khoj/processor/tools/online_search.py +5 -3
  45. khoj/processor/tools/run_code.py +144 -0
  46. khoj/routers/api.py +6 -6
  47. khoj/routers/api_chat.py +156 -88
  48. khoj/routers/helpers.py +91 -47
  49. khoj/routers/research.py +321 -0
  50. khoj/search_filter/date_filter.py +1 -3
  51. khoj/search_filter/file_filter.py +1 -2
  52. khoj/search_type/text_search.py +3 -3
  53. khoj/utils/helpers.py +15 -2
  54. khoj/utils/yaml.py +4 -0
  55. {khoj-1.27.2.dev29.dist-info → khoj-1.27.2.dev130.dist-info}/METADATA +1 -1
  56. {khoj-1.27.2.dev29.dist-info → khoj-1.27.2.dev130.dist-info}/RECORD +63 -60
  57. khoj/interface/compiled/_next/static/chunks/1603-5138bb7c8035d9a6.js +0 -1
  58. khoj/interface/compiled/_next/static/chunks/2697-61fcba89fd87eab4.js +0 -1
  59. khoj/interface/compiled/_next/static/chunks/3423-0b533af8bf6ac218.js +0 -1
  60. khoj/interface/compiled/_next/static/chunks/9479-ff7d8c4dae2014d1.js +0 -1
  61. khoj/interface/compiled/_next/static/chunks/app/chat/page-97f5b61aaf46d364.js +0 -1
  62. khoj/interface/compiled/_next/static/chunks/app/factchecker/page-d82403db2866bad8.js +0 -1
  63. khoj/interface/compiled/_next/static/css/4cae6c0e5c72fb2d.css +0 -1
  64. khoj/interface/compiled/_next/static/css/76d55eb435962b19.css +0 -25
  65. khoj/interface/compiled/_next/static/css/ddcc0cf73e062476.css +0 -1
  66. /khoj/interface/compiled/_next/static/{atzIseFarmC7TIwq2BgHC → N19uqHAJYqRAVxvuVwHfE}/_buildManifest.js +0 -0
  67. /khoj/interface/compiled/_next/static/{atzIseFarmC7TIwq2BgHC → N19uqHAJYqRAVxvuVwHfE}/_ssgManifest.js +0 -0
  68. /khoj/interface/compiled/_next/static/chunks/{1970-60c96aed937a4928.js → 1970-444843bea1d17d61.js} +0 -0
  69. /khoj/interface/compiled/_next/static/chunks/{9417-2ca87207387fc790.js → 9417-19cfd1a9cb758e71.js} +0 -0
  70. {khoj-1.27.2.dev29.dist-info → khoj-1.27.2.dev130.dist-info}/WHEEL +0 -0
  71. {khoj-1.27.2.dev29.dist-info → khoj-1.27.2.dev130.dist-info}/entry_points.txt +0 -0
  72. {khoj-1.27.2.dev29.dist-info → khoj-1.27.2.dev130.dist-info}/licenses/LICENSE +0 -0
@@ -19,6 +19,7 @@ from khoj.utils import state
19
19
  from khoj.utils.constants import empty_escape_sequences
20
20
  from khoj.utils.helpers import ConversationCommand, in_debug_mode, is_none_or_empty
21
21
  from khoj.utils.rawconfig import LocationData
22
+ from khoj.utils.yaml import yaml_dump
22
23
 
23
24
  logger = logging.getLogger(__name__)
24
25
 
@@ -138,7 +139,8 @@ def filter_questions(questions: List[str]):
138
139
  def converse_offline(
139
140
  user_query,
140
141
  references=[],
141
- online_results=[],
142
+ online_results={},
143
+ code_results={},
142
144
  conversation_log={},
143
145
  model: str = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
144
146
  loaded_model: Union[Any, None] = None,
@@ -158,8 +160,6 @@ def converse_offline(
158
160
  assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
159
161
  offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
160
162
  tracer["chat_model"] = model
161
-
162
- compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
163
163
  current_date = datetime.now()
164
164
 
165
165
  if agent and agent.personality:
@@ -184,24 +184,25 @@ def converse_offline(
184
184
  system_prompt = f"{system_prompt}\n{user_name_prompt}"
185
185
 
186
186
  # Get Conversation Primer appropriate to Conversation Type
187
- if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(compiled_references):
187
+ if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
188
188
  return iter([prompts.no_notes_found.format()])
189
189
  elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
190
190
  completion_func(chat_response=prompts.no_online_results_found.format())
191
191
  return iter([prompts.no_online_results_found.format()])
192
192
 
193
193
  context_message = ""
194
- if not is_none_or_empty(compiled_references):
195
- context_message += f"{prompts.notes_conversation_offline.format(references=compiled_references)}\n\n"
194
+ if not is_none_or_empty(references):
195
+ context_message = f"{prompts.notes_conversation_offline.format(references=yaml_dump(references))}\n\n"
196
196
  if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
197
197
  simplified_online_results = online_results.copy()
198
198
  for result in online_results:
199
199
  if online_results[result].get("webpages"):
200
200
  simplified_online_results[result] = online_results[result]["webpages"]
201
201
 
202
- context_message += (
203
- f"{prompts.online_search_conversation_offline.format(online_results=str(simplified_online_results))}"
204
- )
202
+ context_message += f"{prompts.online_search_conversation_offline.format(online_results=yaml_dump(simplified_online_results))}\n\n"
203
+ if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results):
204
+ context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
205
+ context_message = context_message.strip()
205
206
 
206
207
  # Setup Prompt with Primer or Conversation History
207
208
  messages = generate_chatml_messages_with_context(
@@ -12,12 +12,13 @@ from khoj.processor.conversation.openai.utils import (
12
12
  completion_with_backoff,
13
13
  )
14
14
  from khoj.processor.conversation.utils import (
15
+ clean_json,
15
16
  construct_structured_message,
16
17
  generate_chatml_messages_with_context,
17
- remove_json_codeblock,
18
18
  )
19
19
  from khoj.utils.helpers import ConversationCommand, is_none_or_empty
20
20
  from khoj.utils.rawconfig import LocationData
21
+ from khoj.utils.yaml import yaml_dump
21
22
 
22
23
  logger = logging.getLogger(__name__)
23
24
 
@@ -94,8 +95,7 @@ def extract_questions(
94
95
 
95
96
  # Extract, Clean Message from GPT's Response
96
97
  try:
97
- response = response.strip()
98
- response = remove_json_codeblock(response)
98
+ response = clean_json(response)
99
99
  response = json.loads(response)
100
100
  response = [q.strip() for q in response["queries"] if q.strip()]
101
101
  if not isinstance(response, list) or not response:
@@ -133,6 +133,7 @@ def converse(
133
133
  references,
134
134
  user_query,
135
135
  online_results: Optional[Dict[str, Dict]] = None,
136
+ code_results: Optional[Dict[str, Dict]] = None,
136
137
  conversation_log={},
137
138
  model: str = "gpt-4o-mini",
138
139
  api_key: Optional[str] = None,
@@ -154,7 +155,6 @@ def converse(
154
155
  """
155
156
  # Initialize Variables
156
157
  current_date = datetime.now()
157
- compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
158
158
 
159
159
  if agent and agent.personality:
160
160
  system_prompt = prompts.custom_personality.format(
@@ -178,7 +178,7 @@ def converse(
178
178
  system_prompt = f"{system_prompt}\n{user_name_prompt}"
179
179
 
180
180
  # Get Conversation Primer appropriate to Conversation Type
181
- if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(compiled_references):
181
+ if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
182
182
  completion_func(chat_response=prompts.no_notes_found.format())
183
183
  return iter([prompts.no_notes_found.format()])
184
184
  elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
@@ -186,10 +186,13 @@ def converse(
186
186
  return iter([prompts.no_online_results_found.format()])
187
187
 
188
188
  context_message = ""
189
- if not is_none_or_empty(compiled_references):
190
- context_message = f"{prompts.notes_conversation.format(references=compiled_references)}\n\n"
189
+ if not is_none_or_empty(references):
190
+ context_message = f"{prompts.notes_conversation.format(references=yaml_dump(references))}\n\n"
191
191
  if not is_none_or_empty(online_results):
192
- context_message += f"{prompts.online_search_conversation.format(online_results=str(online_results))}"
192
+ context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
193
+ if not is_none_or_empty(code_results):
194
+ context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
195
+ context_message = context_message.strip()
193
196
 
194
197
  # Setup Prompt with Primer or Conversation History
195
198
  messages = generate_chatml_messages_with_context(
@@ -394,21 +394,23 @@ Q: {query}
394
394
 
395
395
  extract_questions = PromptTemplate.from_template(
396
396
  """
397
- You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes. Disregard online search requests.
397
+ You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes and documents.
398
398
  Construct search queries to retrieve relevant information to answer the user's question.
399
- - You will be provided past questions(Q) and answers(A) for context.
399
+ - You will be provided example and actual past user questions(Q), search queries(Khoj) and answers(A) for context.
400
400
  - Add as much context from the previous questions and answers as required into your search queries.
401
- - Break messages into multiple search queries when required to retrieve the relevant information.
401
+ - Break your search down into multiple search queries from a diverse set of lenses to retrieve all related documents.
402
402
  - Add date filters to your search queries from questions and answers when required to retrieve the relevant information.
403
403
  - When asked a meta, vague or random questions, search for a variety of broad topics to answer the user's question.
404
404
  {personality_context}
405
- What searches will you perform to answer the users question? Respond with search queries as list of strings in a JSON object.
405
+ What searches will you perform to answer the user's question? Respond with search queries as list of strings in a JSON object.
406
406
  Current Date: {day_of_week}, {current_date}
407
407
  User's Location: {location}
408
408
  {username}
409
409
 
410
+ Examples
411
+ ---
410
412
  Q: How was my trip to Cambodia?
411
- Khoj: {{"queries": ["How was my trip to Cambodia?"]}}
413
+ Khoj: {{"queries": ["How was my trip to Cambodia?", "Angkor Wat temple visit", "Flight to Phnom Penh", "Expenses in Cambodia", "Stay in Cambodia"]}}
412
414
  A: The trip was amazing. You went to the Angkor Wat temple and it was beautiful.
413
415
 
414
416
  Q: Who did i visit that temple with?
@@ -443,6 +445,8 @@ Q: Who all did I meet here yesterday?
443
445
  Khoj: {{"queries": ["Met in {location} on {yesterday_date} dt>='{yesterday_date}' dt<'{current_date}'"]}}
444
446
  A: Yesterday's note mentions your visit to your local beach with Ram and Shyam.
445
447
 
448
+ Actual
449
+ ---
446
450
  {chat_history}
447
451
  Q: {text}
448
452
  Khoj:
@@ -451,11 +455,11 @@ Khoj:
451
455
 
452
456
  extract_questions_anthropic_system_prompt = PromptTemplate.from_template(
453
457
  """
454
- You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes. Disregard online search requests.
458
+ You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes.
455
459
  Construct search queries to retrieve relevant information to answer the user's question.
456
- - You will be provided past questions(User), extracted queries(Assistant) and answers(A) for context.
460
+ - You will be provided past questions(User), search queries(Assistant) and answers(A) for context.
457
461
  - Add as much context from the previous questions and answers as required into your search queries.
458
- - Break messages into multiple search queries when required to retrieve the relevant information.
462
+ - Break your search down into multiple search queries from a diverse set of lenses to retrieve all related documents.
459
463
  - Add date filters to your search queries from questions and answers when required to retrieve the relevant information.
460
464
  - When asked a meta, vague or random questions, search for a variety of broad topics to answer the user's question.
461
465
  {personality_context}
@@ -468,7 +472,7 @@ User's Location: {location}
468
472
  Here are some examples of how you can construct search queries to answer the user's question:
469
473
 
470
474
  User: How was my trip to Cambodia?
471
- Assistant: {{"queries": ["How was my trip to Cambodia?"]}}
475
+ Assistant: {{"queries": ["How was my trip to Cambodia?", "Angkor Wat temple visit", "Flight to Phnom Penh", "Expenses in Cambodia", "Stay in Cambodia"]}}
472
476
  A: The trip was amazing. You went to the Angkor Wat temple and it was beautiful.
473
477
 
474
478
  User: What national parks did I go to last year?
@@ -501,17 +505,14 @@ Assistant:
501
505
  )
502
506
 
503
507
  system_prompt_extract_relevant_information = """
504
- As a professional analyst, create a comprehensive report of the most relevant information from a web page in response to a user's query.
505
- The text provided is directly from within the web page.
506
- The report you create should be multiple paragraphs, and it should represent the content of the website.
507
- Tell the user exactly what the website says in response to their query, while adhering to these guidelines:
508
-
509
- 1. Answer the user's query as specifically as possible. Include many supporting details from the website.
510
- 2. Craft a report that is detailed, thorough, in-depth, and complex, while maintaining clarity.
511
- 3. Rely strictly on the provided text, without including external information.
512
- 4. Format the report in multiple paragraphs with a clear structure.
513
- 5. Be as specific as possible in your answer to the user's query.
514
- 6. Reproduce as much of the provided text as possible, while maintaining readability.
508
+ As a professional analyst, your job is to extract all pertinent information from documents to help answer user's query.
509
+ You will be provided raw text directly from within the document.
510
+ Adhere to these guidelines while extracting information from the provided documents:
511
+
512
+ 1. Extract all relevant text and links from the document that can assist with further research or answer the user's query.
513
+ 2. Craft a comprehensive but compact report with all the necessary data from the document to generate an informed response.
514
+ 3. Rely strictly on the provided text to generate your summary, without including external information.
515
+ 4. Provide specific, important snippets from the document in your report to establish trust in your summary.
515
516
  """.strip()
516
517
 
517
518
  extract_relevant_information = PromptTemplate.from_template(
@@ -519,10 +520,10 @@ extract_relevant_information = PromptTemplate.from_template(
519
520
  {personality_context}
520
521
  Target Query: {query}
521
522
 
522
- Web Pages:
523
+ Document:
523
524
  {corpus}
524
525
 
525
- Collate only relevant information from the website to answer the target query.
526
+ Collate only relevant information from the document to answer the target query.
526
527
  """.strip()
527
528
  )
528
529
 
@@ -617,6 +618,67 @@ Khoj:
617
618
  """.strip()
618
619
  )
619
620
 
621
+ plan_function_execution = PromptTemplate.from_template(
622
+ """
623
+ You are Khoj, a smart, creative and methodical researcher. Use the provided tool AIs to investigate information to answer query.
624
+ Create a multi-step plan and intelligently iterate on the plan based on the retrieved information to find the requested information.
625
+ {personality_context}
626
+
627
+ # Instructions
628
+ - Ask detailed queries to the tool AIs provided below, one at a time, to discover required information or run calculations. Their response will be shown to you in the next iteration.
629
+ - Break down your research process into independent, self-contained steps that can be executed sequentially to answer the user's query. Write your step-by-step plan in the scratchpad.
630
+ - Ask highly diverse, detailed queries to the tool AIs, one at a time, to discover required information or run calculations.
631
+ - NEVER repeat the same query across iterations.
632
+ - Ensure that all the required context is passed to the tool AIs for successful execution.
633
+ - Ensure that you go deeper when possible and try more broad, creative strategies when a path is not yielding useful results. Build on the results of the previous iterations.
634
+ - You are allowed upto {max_iterations} iterations to use the help of the provided tool AIs to answer the user's question.
635
+ - Stop when you have the required information by returning a JSON object with an empty "tool" field. E.g., {{scratchpad: "I have all I need", tool: "", query: ""}}
636
+
637
+ # Examples
638
+ Assuming you can search the user's notes and the internet.
639
+ - When they ask for the population of their hometown
640
+ 1. Try look up their hometown in their notes. Ask the note search AI to search for their birth certificate, childhood memories, school, resume etc.
641
+ 2. If not found in their notes, try infer their hometown from their online social media profiles. Ask the online search AI to look for {username}'s biography, school, resume on linkedin, facebook, website etc.
642
+ 3. Only then try find the latest population of their hometown by reading official websites with the help of the online search and web page reading AI.
643
+ - When user for their computer's specs
644
+ 1. Try find their computer model in their notes.
645
+ 2. Now find webpages with their computer model's spec online and read them.
646
+ - When I ask what clothes to carry for their upcoming trip
647
+ 1. Find the itinerary of their upcoming trip in their notes.
648
+ 2. Next find the weather forecast at the destination online.
649
+ 3. Then find if they mentioned what clothes they own in their notes.
650
+
651
+ # Background Context
652
+ - Current Date: {day_of_week}, {current_date}
653
+ - User Location: {location}
654
+ - User Name: {username}
655
+
656
+ # Available Tool AIs
657
+ Which of the tool AIs listed below would you use to answer the user's question? You **only** have access to the following tool AIs:
658
+
659
+ {tools}
660
+
661
+ # Previous Iterations
662
+ {previous_iterations}
663
+
664
+ # Chat History:
665
+ {chat_history}
666
+
667
+ Return the next tool AI to use and the query to ask it. Your response should always be a valid JSON object. Do not say anything else.
668
+ Response format:
669
+ {{"scratchpad": "<your_scratchpad_to_reason_about_which_tool_to_use>", "tool": "<name_of_tool_ai>", "query": "<your_detailed_query_for_the_tool_ai>"}}
670
+ """.strip()
671
+ )
672
+
673
+ previous_iteration = PromptTemplate.from_template(
674
+ """
675
+ ## Iteration {index}:
676
+ - tool: {tool}
677
+ - query: {query}
678
+ - result: {result}
679
+ """
680
+ )
681
+
620
682
  pick_relevant_information_collection_tools = PromptTemplate.from_template(
621
683
  """
622
684
  You are Khoj, an extremely smart and helpful search assistant.
@@ -806,6 +868,53 @@ Khoj:
806
868
  """.strip()
807
869
  )
808
870
 
871
+ # Code Generation
872
+ # --
873
+ python_code_generation_prompt = PromptTemplate.from_template(
874
+ """
875
+ You are Khoj, an advanced python programmer. You are tasked with constructing **up to three** python programs to best answer the user query.
876
+ - The python program will run in a pyodide python sandbox with no network access.
877
+ - You can write programs to run complex calculations, analyze data, create charts, generate documents to meticulously answer the query
878
+ - The sandbox has access to the standard library, matplotlib, panda, numpy, scipy, bs4, sympy, brotli, cryptography, fast-parquet
879
+ - Do not try display images or plots in the code directly. The code should save the image or plot to a file instead.
880
+ - Write any document, charts etc. to be shared with the user to file. These files can be seen by the user.
881
+ - Use as much context from the previous questions and answers as required to generate your code.
882
+ {personality_context}
883
+ What code will you need to write, if any, to answer the user's question?
884
+ Provide code programs as a list of strings in a JSON object with key "codes".
885
+ Current Date: {current_date}
886
+ User's Location: {location}
887
+ {username}
888
+
889
+ The JSON schema is of the form {{"codes": ["code1", "code2", "code3"]}}
890
+ For example:
891
+ {{"codes": ["print('Hello, World!')", "print('Goodbye, World!')"]}}
892
+
893
+ Now it's your turn to construct python programs to answer the user's question. Provide them as a list of strings in a JSON object. Do not say anything else.
894
+ Context:
895
+ ---
896
+ {context}
897
+
898
+ Chat History:
899
+ ---
900
+ {chat_history}
901
+
902
+ User: {query}
903
+ Khoj:
904
+ """.strip()
905
+ )
906
+
907
+ code_executed_context = PromptTemplate.from_template(
908
+ """
909
+ Use the provided code executions to inform your response.
910
+ Ask crisp follow-up questions to get additional context, when a helpful response cannot be provided from the provided code execution results or past conversations.
911
+
912
+ Code Execution Results:
913
+ {code_results}
914
+ """.strip()
915
+ )
916
+
917
+
809
918
  # Automations
810
919
  # --
811
920
  crontime_prompt = PromptTemplate.from_template(
@@ -6,9 +6,10 @@ import os
6
6
  import queue
7
7
  from dataclasses import dataclass
8
8
  from datetime import datetime
9
+ from enum import Enum
9
10
  from io import BytesIO
10
11
  from time import perf_counter
11
- from typing import Any, Dict, List, Optional
12
+ from typing import Any, Callable, Dict, List, Optional
12
13
 
13
14
  import PIL.Image
14
15
  import requests
@@ -23,8 +24,17 @@ from khoj.database.adapters import ConversationAdapters
23
24
  from khoj.database.models import ChatModelOptions, ClientApplication, KhojUser
24
25
  from khoj.processor.conversation import prompts
25
26
  from khoj.processor.conversation.offline.utils import download_model, infer_max_tokens
27
+ from khoj.search_filter.base_filter import BaseFilter
28
+ from khoj.search_filter.date_filter import DateFilter
29
+ from khoj.search_filter.file_filter import FileFilter
30
+ from khoj.search_filter.word_filter import WordFilter
26
31
  from khoj.utils import state
27
- from khoj.utils.helpers import in_debug_mode, is_none_or_empty, merge_dicts
32
+ from khoj.utils.helpers import (
33
+ ConversationCommand,
34
+ in_debug_mode,
35
+ is_none_or_empty,
36
+ merge_dicts,
37
+ )
28
38
 
29
39
  logger = logging.getLogger(__name__)
30
40
  model_to_prompt_size = {
@@ -85,8 +95,105 @@ class ThreadedGenerator:
85
95
  self.queue.put(StopIteration)
86
96
 
87
97
 
98
+ class InformationCollectionIteration:
99
+ def __init__(
100
+ self,
101
+ tool: str,
102
+ query: str,
103
+ context: list = None,
104
+ onlineContext: dict = None,
105
+ codeContext: dict = None,
106
+ summarizedResult: str = None,
107
+ ):
108
+ self.tool = tool
109
+ self.query = query
110
+ self.context = context
111
+ self.onlineContext = onlineContext
112
+ self.codeContext = codeContext
113
+ self.summarizedResult = summarizedResult
114
+
115
+
116
+ def construct_iteration_history(
117
+ previous_iterations: List[InformationCollectionIteration], previous_iteration_prompt: str
118
+ ) -> str:
119
+ previous_iterations_history = ""
120
+ for idx, iteration in enumerate(previous_iterations):
121
+ iteration_data = previous_iteration_prompt.format(
122
+ tool=iteration.tool,
123
+ query=iteration.query,
124
+ result=iteration.summarizedResult,
125
+ index=idx + 1,
126
+ )
127
+
128
+ previous_iterations_history += iteration_data
129
+ return previous_iterations_history
130
+
131
+
132
+ def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="AI") -> str:
133
+ chat_history = ""
134
+ for chat in conversation_history.get("chat", [])[-n:]:
135
+ if chat["by"] == "khoj" and chat["intent"].get("type") in ["remember", "reminder", "summarize"]:
136
+ chat_history += f"User: {chat['intent']['query']}\n"
137
+ chat_history += f"{agent_name}: {chat['message']}\n"
138
+ elif chat["by"] == "khoj" and ("text-to-image" in chat["intent"].get("type")):
139
+ chat_history += f"User: {chat['intent']['query']}\n"
140
+ chat_history += f"{agent_name}: [generated image redacted for space]\n"
141
+ elif chat["by"] == "khoj" and ("excalidraw" in chat["intent"].get("type")):
142
+ chat_history += f"User: {chat['intent']['query']}\n"
143
+ chat_history += f"{agent_name}: {chat['intent']['inferred-queries'][0]}\n"
144
+ return chat_history
145
+
146
+
147
+ def construct_tool_chat_history(
148
+ previous_iterations: List[InformationCollectionIteration], tool: ConversationCommand = None
149
+ ) -> Dict[str, list]:
150
+ chat_history: list = []
151
+ inferred_query_extractor: Callable[[InformationCollectionIteration], List[str]] = lambda x: []
152
+ if tool == ConversationCommand.Notes:
153
+ inferred_query_extractor = (
154
+ lambda iteration: [c["query"] for c in iteration.context] if iteration.context else []
155
+ )
156
+ elif tool == ConversationCommand.Online:
157
+ inferred_query_extractor = (
158
+ lambda iteration: list(iteration.onlineContext.keys()) if iteration.onlineContext else []
159
+ )
160
+ elif tool == ConversationCommand.Code:
161
+ inferred_query_extractor = lambda iteration: list(iteration.codeContext.keys()) if iteration.codeContext else []
162
+ for iteration in previous_iterations:
163
+ chat_history += [
164
+ {
165
+ "by": "you",
166
+ "message": iteration.query,
167
+ },
168
+ {
169
+ "by": "khoj",
170
+ "intent": {
171
+ "type": "remember",
172
+ "inferred-queries": inferred_query_extractor(iteration),
173
+ "query": iteration.query,
174
+ },
175
+ "message": iteration.summarizedResult,
176
+ },
177
+ ]
178
+
179
+ return {"chat": chat_history}
180
+
181
+
182
+ class ChatEvent(Enum):
183
+ START_LLM_RESPONSE = "start_llm_response"
184
+ END_LLM_RESPONSE = "end_llm_response"
185
+ MESSAGE = "message"
186
+ REFERENCES = "references"
187
+ STATUS = "status"
188
+
189
+
88
190
  def message_to_log(
89
- user_message, chat_response, user_message_metadata={}, khoj_message_metadata={}, conversation_log=[]
191
+ user_message,
192
+ chat_response,
193
+ user_message_metadata={},
194
+ khoj_message_metadata={},
195
+ conversation_log=[],
196
+ train_of_thought=[],
90
197
  ):
91
198
  """Create json logs from messages, metadata for conversation log"""
92
199
  default_khoj_message_metadata = {
@@ -114,6 +221,7 @@ def save_to_conversation_log(
114
221
  user_message_time: str = None,
115
222
  compiled_references: List[Dict[str, Any]] = [],
116
223
  online_results: Dict[str, Any] = {},
224
+ code_results: Dict[str, Any] = {},
117
225
  inferred_queries: List[str] = [],
118
226
  intent_type: str = "remember",
119
227
  client_application: ClientApplication = None,
@@ -121,6 +229,7 @@ def save_to_conversation_log(
121
229
  automation_id: str = None,
122
230
  query_images: List[str] = None,
123
231
  tracer: Dict[str, Any] = {},
232
+ train_of_thought: List[Any] = [],
124
233
  ):
125
234
  user_message_time = user_message_time or datetime.now().strftime("%Y-%m-%d %H:%M:%S")
126
235
  updated_conversation = message_to_log(
@@ -134,9 +243,12 @@ def save_to_conversation_log(
134
243
  "context": compiled_references,
135
244
  "intent": {"inferred-queries": inferred_queries, "type": intent_type},
136
245
  "onlineContext": online_results,
246
+ "codeContext": code_results,
137
247
  "automationId": automation_id,
248
+ "trainOfThought": train_of_thought,
138
249
  },
139
250
  conversation_log=meta_log.get("chat", []),
251
+ train_of_thought=train_of_thought,
140
252
  )
141
253
  ConversationAdapters.save_conversation(
142
254
  user,
@@ -330,9 +442,23 @@ def reciprocal_conversation_to_chatml(message_pair):
330
442
  return [ChatMessage(content=message, role=role) for message, role in zip(message_pair, ["user", "assistant"])]
331
443
 
332
444
 
333
- def remove_json_codeblock(response: str):
334
- """Remove any markdown json codeblock formatting if present. Useful for non schema enforceable models"""
335
- return response.removeprefix("```json").removesuffix("```")
445
+ def clean_json(response: str):
446
+ """Remove any markdown json codeblock and newline formatting if present. Useful for non schema enforceable models"""
447
+ return response.strip().replace("\n", "").removeprefix("```json").removesuffix("```")
448
+
449
+
450
+ def clean_code_python(code: str):
451
+ """Remove any markdown codeblock and newline formatting if present. Useful for non schema enforceable models"""
452
+ return code.strip().removeprefix("```python").removesuffix("```")
453
+
454
+
455
+ def defilter_query(query: str):
456
+ """Remove any query filters in query"""
457
+ defiltered_query = query
458
+ filters: List[BaseFilter] = [WordFilter(), FileFilter(), DateFilter()]
459
+ for filter in filters:
460
+ defiltered_query = filter.defilter(defiltered_query)
461
+ return defiltered_query
336
462
 
337
463
 
338
464
  @dataclass
@@ -4,7 +4,7 @@ import logging
4
4
  import os
5
5
  import urllib.parse
6
6
  from collections import defaultdict
7
- from typing import Callable, Dict, List, Optional, Tuple, Union
7
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
8
8
 
9
9
  import aiohttp
10
10
  from bs4 import BeautifulSoup
@@ -52,7 +52,8 @@ OLOSTEP_QUERY_PARAMS = {
52
52
  "expandMarkdown": "True",
53
53
  "expandHtml": "False",
54
54
  }
55
- MAX_WEBPAGES_TO_READ = 1
55
+
56
+ DEFAULT_MAX_WEBPAGES_TO_READ = 1
56
57
 
57
58
 
58
59
  async def search_online(
@@ -62,6 +63,7 @@ async def search_online(
62
63
  user: KhojUser,
63
64
  send_status_func: Optional[Callable] = None,
64
65
  custom_filters: List[str] = [],
66
+ max_webpages_to_read: int = DEFAULT_MAX_WEBPAGES_TO_READ,
65
67
  query_images: List[str] = None,
66
68
  agent: Agent = None,
67
69
  tracer: dict = {},
@@ -97,7 +99,7 @@ async def search_online(
97
99
  for subquery in response_dict:
98
100
  if "answerBox" in response_dict[subquery]:
99
101
  continue
100
- for organic in response_dict[subquery].get("organic", [])[:MAX_WEBPAGES_TO_READ]:
102
+ for organic in response_dict[subquery].get("organic", [])[:max_webpages_to_read]:
101
103
  link = organic.get("link")
102
104
  if link in webpages:
103
105
  webpages[link]["queries"].add(subquery)