khoj 1.27.2.dev18__py3-none-any.whl → 1.27.2.dev130__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/database/adapters/__init__.py +34 -10
- khoj/interface/compiled/404/index.html +1 -1
- khoj/interface/compiled/_next/static/chunks/1034-da58b679fcbb79c1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1467-5a191c1cd5bf0b83.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1603-5d70d9dfcdcb1f10.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3423-fa918f4e5365a35e.js +1 -0
- khoj/interface/compiled/_next/static/chunks/8423-3ad0bfb299801220.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/page-7dc98df9c88828f0.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-d887f55fe6d4f35d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/{page-8f22b790e50dd722.js → page-d46244282af16509.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-6a01e07fb244c10c.js → page-505b07bce608b34e.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-31239d193815e49e.js → webpack-8ae5ce45161bd98e.js} +1 -1
- khoj/interface/compiled/_next/static/css/{2272c73fc7a3b571.css → 26c1c33d0423a7d8.css} +1 -1
- khoj/interface/compiled/_next/static/css/e9c5fe555dd3050b.css +25 -0
- khoj/interface/compiled/agents/index.html +1 -1
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +1 -1
- khoj/interface/compiled/automations/index.txt +2 -2
- khoj/interface/compiled/chat/index.html +1 -1
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/factchecker/index.html +1 -1
- khoj/interface/compiled/factchecker/index.txt +2 -2
- khoj/interface/compiled/index.html +1 -1
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +1 -1
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +1 -1
- khoj/interface/compiled/settings/index.txt +2 -2
- khoj/interface/compiled/share/chat/index.html +1 -1
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/anthropic/anthropic_chat.py +19 -10
- khoj/processor/conversation/anthropic/utils.py +37 -6
- khoj/processor/conversation/google/gemini_chat.py +23 -13
- khoj/processor/conversation/google/utils.py +34 -10
- khoj/processor/conversation/offline/chat_model.py +40 -15
- khoj/processor/conversation/openai/gpt.py +25 -10
- khoj/processor/conversation/openai/utils.py +43 -9
- khoj/processor/conversation/prompts.py +131 -22
- khoj/processor/conversation/utils.py +299 -6
- khoj/processor/image/generate.py +2 -0
- khoj/processor/tools/online_search.py +19 -8
- khoj/processor/tools/run_code.py +144 -0
- khoj/routers/api.py +11 -6
- khoj/routers/api_chat.py +177 -88
- khoj/routers/helpers.py +155 -59
- khoj/routers/research.py +321 -0
- khoj/search_filter/date_filter.py +1 -3
- khoj/search_filter/file_filter.py +1 -2
- khoj/search_type/text_search.py +3 -3
- khoj/utils/helpers.py +15 -2
- khoj/utils/yaml.py +4 -0
- {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/METADATA +2 -1
- {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/RECORD +61 -58
- khoj/interface/compiled/_next/static/chunks/1603-5138bb7c8035d9a6.js +0 -1
- khoj/interface/compiled/_next/static/chunks/2697-61fcba89fd87eab4.js +0 -1
- khoj/interface/compiled/_next/static/chunks/3423-8e9c420574a9fbe3.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9479-a5e7ff4c7d1d7ee7.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-151232d8417a1ea1.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-798904432c2417c4.js +0 -1
- khoj/interface/compiled/_next/static/css/76d55eb435962b19.css +0 -25
- /khoj/interface/compiled/_next/static/{_gBBcNbs4wMKxKXhQs5E4 → N19uqHAJYqRAVxvuVwHfE}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{_gBBcNbs4wMKxKXhQs5E4 → N19uqHAJYqRAVxvuVwHfE}/_ssgManifest.js +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1970-1d6d0c1b00b4f343.js → 1970-444843bea1d17d61.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{9417-759984ad62caa3dc.js → 9417-19cfd1a9cb758e71.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/app/settings/{page-7946cabb9c54e22d.js → page-89e6737b2cc9fb3a.js} +0 -0
- {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/WHEEL +0 -0
- {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/entry_points.txt +0 -0
- {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/licenses/LICENSE +0 -0
@@ -394,21 +394,23 @@ Q: {query}
|
|
394
394
|
|
395
395
|
extract_questions = PromptTemplate.from_template(
|
396
396
|
"""
|
397
|
-
You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes
|
397
|
+
You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes and documents.
|
398
398
|
Construct search queries to retrieve relevant information to answer the user's question.
|
399
|
-
- You will be provided past questions(Q) and answers(A) for context.
|
399
|
+
- You will be provided example and actual past user questions(Q), search queries(Khoj) and answers(A) for context.
|
400
400
|
- Add as much context from the previous questions and answers as required into your search queries.
|
401
|
-
- Break
|
401
|
+
- Break your search down into multiple search queries from a diverse set of lenses to retrieve all related documents.
|
402
402
|
- Add date filters to your search queries from questions and answers when required to retrieve the relevant information.
|
403
403
|
- When asked a meta, vague or random questions, search for a variety of broad topics to answer the user's question.
|
404
404
|
{personality_context}
|
405
|
-
What searches will you perform to answer the
|
405
|
+
What searches will you perform to answer the user's question? Respond with search queries as list of strings in a JSON object.
|
406
406
|
Current Date: {day_of_week}, {current_date}
|
407
407
|
User's Location: {location}
|
408
408
|
{username}
|
409
409
|
|
410
|
+
Examples
|
411
|
+
---
|
410
412
|
Q: How was my trip to Cambodia?
|
411
|
-
Khoj: {{"queries": ["How was my trip to Cambodia?"]}}
|
413
|
+
Khoj: {{"queries": ["How was my trip to Cambodia?", "Angkor Wat temple visit", "Flight to Phnom Penh", "Expenses in Cambodia", "Stay in Cambodia"]}}
|
412
414
|
A: The trip was amazing. You went to the Angkor Wat temple and it was beautiful.
|
413
415
|
|
414
416
|
Q: Who did i visit that temple with?
|
@@ -443,6 +445,8 @@ Q: Who all did I meet here yesterday?
|
|
443
445
|
Khoj: {{"queries": ["Met in {location} on {yesterday_date} dt>='{yesterday_date}' dt<'{current_date}'"]}}
|
444
446
|
A: Yesterday's note mentions your visit to your local beach with Ram and Shyam.
|
445
447
|
|
448
|
+
Actual
|
449
|
+
---
|
446
450
|
{chat_history}
|
447
451
|
Q: {text}
|
448
452
|
Khoj:
|
@@ -451,11 +455,11 @@ Khoj:
|
|
451
455
|
|
452
456
|
extract_questions_anthropic_system_prompt = PromptTemplate.from_template(
|
453
457
|
"""
|
454
|
-
You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes.
|
458
|
+
You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes.
|
455
459
|
Construct search queries to retrieve relevant information to answer the user's question.
|
456
|
-
- You will be provided past questions(User),
|
460
|
+
- You will be provided past questions(User), search queries(Assistant) and answers(A) for context.
|
457
461
|
- Add as much context from the previous questions and answers as required into your search queries.
|
458
|
-
- Break
|
462
|
+
- Break your search down into multiple search queries from a diverse set of lenses to retrieve all related documents.
|
459
463
|
- Add date filters to your search queries from questions and answers when required to retrieve the relevant information.
|
460
464
|
- When asked a meta, vague or random questions, search for a variety of broad topics to answer the user's question.
|
461
465
|
{personality_context}
|
@@ -468,7 +472,7 @@ User's Location: {location}
|
|
468
472
|
Here are some examples of how you can construct search queries to answer the user's question:
|
469
473
|
|
470
474
|
User: How was my trip to Cambodia?
|
471
|
-
Assistant: {{"queries": ["How was my trip to Cambodia?"]}}
|
475
|
+
Assistant: {{"queries": ["How was my trip to Cambodia?", "Angkor Wat temple visit", "Flight to Phnom Penh", "Expenses in Cambodia", "Stay in Cambodia"]}}
|
472
476
|
A: The trip was amazing. You went to the Angkor Wat temple and it was beautiful.
|
473
477
|
|
474
478
|
User: What national parks did I go to last year?
|
@@ -501,17 +505,14 @@ Assistant:
|
|
501
505
|
)
|
502
506
|
|
503
507
|
system_prompt_extract_relevant_information = """
|
504
|
-
As a professional analyst,
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
4. Format the report in multiple paragraphs with a clear structure.
|
513
|
-
5. Be as specific as possible in your answer to the user's query.
|
514
|
-
6. Reproduce as much of the provided text as possible, while maintaining readability.
|
508
|
+
As a professional analyst, your job is to extract all pertinent information from documents to help answer user's query.
|
509
|
+
You will be provided raw text directly from within the document.
|
510
|
+
Adhere to these guidelines while extracting information from the provided documents:
|
511
|
+
|
512
|
+
1. Extract all relevant text and links from the document that can assist with further research or answer the user's query.
|
513
|
+
2. Craft a comprehensive but compact report with all the necessary data from the document to generate an informed response.
|
514
|
+
3. Rely strictly on the provided text to generate your summary, without including external information.
|
515
|
+
4. Provide specific, important snippets from the document in your report to establish trust in your summary.
|
515
516
|
""".strip()
|
516
517
|
|
517
518
|
extract_relevant_information = PromptTemplate.from_template(
|
@@ -519,10 +520,10 @@ extract_relevant_information = PromptTemplate.from_template(
|
|
519
520
|
{personality_context}
|
520
521
|
Target Query: {query}
|
521
522
|
|
522
|
-
|
523
|
+
Document:
|
523
524
|
{corpus}
|
524
525
|
|
525
|
-
Collate only relevant information from the
|
526
|
+
Collate only relevant information from the document to answer the target query.
|
526
527
|
""".strip()
|
527
528
|
)
|
528
529
|
|
@@ -617,6 +618,67 @@ Khoj:
|
|
617
618
|
""".strip()
|
618
619
|
)
|
619
620
|
|
621
|
+
plan_function_execution = PromptTemplate.from_template(
|
622
|
+
"""
|
623
|
+
You are Khoj, a smart, creative and methodical researcher. Use the provided tool AIs to investigate information to answer query.
|
624
|
+
Create a multi-step plan and intelligently iterate on the plan based on the retrieved information to find the requested information.
|
625
|
+
{personality_context}
|
626
|
+
|
627
|
+
# Instructions
|
628
|
+
- Ask detailed queries to the tool AIs provided below, one at a time, to discover required information or run calculations. Their response will be shown to you in the next iteration.
|
629
|
+
- Break down your research process into independent, self-contained steps that can be executed sequentially to answer the user's query. Write your step-by-step plan in the scratchpad.
|
630
|
+
- Ask highly diverse, detailed queries to the tool AIs, one at a time, to discover required information or run calculations.
|
631
|
+
- NEVER repeat the same query across iterations.
|
632
|
+
- Ensure that all the required context is passed to the tool AIs for successful execution.
|
633
|
+
- Ensure that you go deeper when possible and try more broad, creative strategies when a path is not yielding useful results. Build on the results of the previous iterations.
|
634
|
+
- You are allowed upto {max_iterations} iterations to use the help of the provided tool AIs to answer the user's question.
|
635
|
+
- Stop when you have the required information by returning a JSON object with an empty "tool" field. E.g., {{scratchpad: "I have all I need", tool: "", query: ""}}
|
636
|
+
|
637
|
+
# Examples
|
638
|
+
Assuming you can search the user's notes and the internet.
|
639
|
+
- When they ask for the population of their hometown
|
640
|
+
1. Try look up their hometown in their notes. Ask the note search AI to search for their birth certificate, childhood memories, school, resume etc.
|
641
|
+
2. If not found in their notes, try infer their hometown from their online social media profiles. Ask the online search AI to look for {username}'s biography, school, resume on linkedin, facebook, website etc.
|
642
|
+
3. Only then try find the latest population of their hometown by reading official websites with the help of the online search and web page reading AI.
|
643
|
+
- When user for their computer's specs
|
644
|
+
1. Try find their computer model in their notes.
|
645
|
+
2. Now find webpages with their computer model's spec online and read them.
|
646
|
+
- When I ask what clothes to carry for their upcoming trip
|
647
|
+
1. Find the itinerary of their upcoming trip in their notes.
|
648
|
+
2. Next find the weather forecast at the destination online.
|
649
|
+
3. Then find if they mentioned what clothes they own in their notes.
|
650
|
+
|
651
|
+
# Background Context
|
652
|
+
- Current Date: {day_of_week}, {current_date}
|
653
|
+
- User Location: {location}
|
654
|
+
- User Name: {username}
|
655
|
+
|
656
|
+
# Available Tool AIs
|
657
|
+
Which of the tool AIs listed below would you use to answer the user's question? You **only** have access to the following tool AIs:
|
658
|
+
|
659
|
+
{tools}
|
660
|
+
|
661
|
+
# Previous Iterations
|
662
|
+
{previous_iterations}
|
663
|
+
|
664
|
+
# Chat History:
|
665
|
+
{chat_history}
|
666
|
+
|
667
|
+
Return the next tool AI to use and the query to ask it. Your response should always be a valid JSON object. Do not say anything else.
|
668
|
+
Response format:
|
669
|
+
{{"scratchpad": "<your_scratchpad_to_reason_about_which_tool_to_use>", "tool": "<name_of_tool_ai>", "query": "<your_detailed_query_for_the_tool_ai>"}}
|
670
|
+
""".strip()
|
671
|
+
)
|
672
|
+
|
673
|
+
previous_iteration = PromptTemplate.from_template(
|
674
|
+
"""
|
675
|
+
## Iteration {index}:
|
676
|
+
- tool: {tool}
|
677
|
+
- query: {query}
|
678
|
+
- result: {result}
|
679
|
+
"""
|
680
|
+
)
|
681
|
+
|
620
682
|
pick_relevant_information_collection_tools = PromptTemplate.from_template(
|
621
683
|
"""
|
622
684
|
You are Khoj, an extremely smart and helpful search assistant.
|
@@ -806,6 +868,53 @@ Khoj:
|
|
806
868
|
""".strip()
|
807
869
|
)
|
808
870
|
|
871
|
+
# Code Generation
|
872
|
+
# --
|
873
|
+
python_code_generation_prompt = PromptTemplate.from_template(
|
874
|
+
"""
|
875
|
+
You are Khoj, an advanced python programmer. You are tasked with constructing **up to three** python programs to best answer the user query.
|
876
|
+
- The python program will run in a pyodide python sandbox with no network access.
|
877
|
+
- You can write programs to run complex calculations, analyze data, create charts, generate documents to meticulously answer the query
|
878
|
+
- The sandbox has access to the standard library, matplotlib, panda, numpy, scipy, bs4, sympy, brotli, cryptography, fast-parquet
|
879
|
+
- Do not try display images or plots in the code directly. The code should save the image or plot to a file instead.
|
880
|
+
- Write any document, charts etc. to be shared with the user to file. These files can be seen by the user.
|
881
|
+
- Use as much context from the previous questions and answers as required to generate your code.
|
882
|
+
{personality_context}
|
883
|
+
What code will you need to write, if any, to answer the user's question?
|
884
|
+
Provide code programs as a list of strings in a JSON object with key "codes".
|
885
|
+
Current Date: {current_date}
|
886
|
+
User's Location: {location}
|
887
|
+
{username}
|
888
|
+
|
889
|
+
The JSON schema is of the form {{"codes": ["code1", "code2", "code3"]}}
|
890
|
+
For example:
|
891
|
+
{{"codes": ["print('Hello, World!')", "print('Goodbye, World!')"]}}
|
892
|
+
|
893
|
+
Now it's your turn to construct python programs to answer the user's question. Provide them as a list of strings in a JSON object. Do not say anything else.
|
894
|
+
Context:
|
895
|
+
---
|
896
|
+
{context}
|
897
|
+
|
898
|
+
Chat History:
|
899
|
+
---
|
900
|
+
{chat_history}
|
901
|
+
|
902
|
+
User: {query}
|
903
|
+
Khoj:
|
904
|
+
""".strip()
|
905
|
+
)
|
906
|
+
|
907
|
+
code_executed_context = PromptTemplate.from_template(
|
908
|
+
"""
|
909
|
+
Use the provided code executions to inform your response.
|
910
|
+
Ask crisp follow-up questions to get additional context, when a helpful response cannot be provided from the provided code execution results or past conversations.
|
911
|
+
|
912
|
+
Code Execution Results:
|
913
|
+
{code_results}
|
914
|
+
""".strip()
|
915
|
+
)
|
916
|
+
|
917
|
+
|
809
918
|
# Automations
|
810
919
|
# --
|
811
920
|
crontime_prompt = PromptTemplate.from_template(
|
@@ -2,16 +2,20 @@ import base64
|
|
2
2
|
import logging
|
3
3
|
import math
|
4
4
|
import mimetypes
|
5
|
+
import os
|
5
6
|
import queue
|
6
7
|
from dataclasses import dataclass
|
7
8
|
from datetime import datetime
|
9
|
+
from enum import Enum
|
8
10
|
from io import BytesIO
|
9
11
|
from time import perf_counter
|
10
|
-
from typing import Any, Dict, List, Optional
|
12
|
+
from typing import Any, Callable, Dict, List, Optional
|
11
13
|
|
12
14
|
import PIL.Image
|
13
15
|
import requests
|
14
16
|
import tiktoken
|
17
|
+
import yaml
|
18
|
+
from git import Repo
|
15
19
|
from langchain.schema import ChatMessage
|
16
20
|
from llama_cpp.llama import Llama
|
17
21
|
from transformers import AutoTokenizer
|
@@ -20,8 +24,17 @@ from khoj.database.adapters import ConversationAdapters
|
|
20
24
|
from khoj.database.models import ChatModelOptions, ClientApplication, KhojUser
|
21
25
|
from khoj.processor.conversation import prompts
|
22
26
|
from khoj.processor.conversation.offline.utils import download_model, infer_max_tokens
|
27
|
+
from khoj.search_filter.base_filter import BaseFilter
|
28
|
+
from khoj.search_filter.date_filter import DateFilter
|
29
|
+
from khoj.search_filter.file_filter import FileFilter
|
30
|
+
from khoj.search_filter.word_filter import WordFilter
|
23
31
|
from khoj.utils import state
|
24
|
-
from khoj.utils.helpers import
|
32
|
+
from khoj.utils.helpers import (
|
33
|
+
ConversationCommand,
|
34
|
+
in_debug_mode,
|
35
|
+
is_none_or_empty,
|
36
|
+
merge_dicts,
|
37
|
+
)
|
25
38
|
|
26
39
|
logger = logging.getLogger(__name__)
|
27
40
|
model_to_prompt_size = {
|
@@ -82,8 +95,105 @@ class ThreadedGenerator:
|
|
82
95
|
self.queue.put(StopIteration)
|
83
96
|
|
84
97
|
|
98
|
+
class InformationCollectionIteration:
|
99
|
+
def __init__(
|
100
|
+
self,
|
101
|
+
tool: str,
|
102
|
+
query: str,
|
103
|
+
context: list = None,
|
104
|
+
onlineContext: dict = None,
|
105
|
+
codeContext: dict = None,
|
106
|
+
summarizedResult: str = None,
|
107
|
+
):
|
108
|
+
self.tool = tool
|
109
|
+
self.query = query
|
110
|
+
self.context = context
|
111
|
+
self.onlineContext = onlineContext
|
112
|
+
self.codeContext = codeContext
|
113
|
+
self.summarizedResult = summarizedResult
|
114
|
+
|
115
|
+
|
116
|
+
def construct_iteration_history(
|
117
|
+
previous_iterations: List[InformationCollectionIteration], previous_iteration_prompt: str
|
118
|
+
) -> str:
|
119
|
+
previous_iterations_history = ""
|
120
|
+
for idx, iteration in enumerate(previous_iterations):
|
121
|
+
iteration_data = previous_iteration_prompt.format(
|
122
|
+
tool=iteration.tool,
|
123
|
+
query=iteration.query,
|
124
|
+
result=iteration.summarizedResult,
|
125
|
+
index=idx + 1,
|
126
|
+
)
|
127
|
+
|
128
|
+
previous_iterations_history += iteration_data
|
129
|
+
return previous_iterations_history
|
130
|
+
|
131
|
+
|
132
|
+
def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="AI") -> str:
|
133
|
+
chat_history = ""
|
134
|
+
for chat in conversation_history.get("chat", [])[-n:]:
|
135
|
+
if chat["by"] == "khoj" and chat["intent"].get("type") in ["remember", "reminder", "summarize"]:
|
136
|
+
chat_history += f"User: {chat['intent']['query']}\n"
|
137
|
+
chat_history += f"{agent_name}: {chat['message']}\n"
|
138
|
+
elif chat["by"] == "khoj" and ("text-to-image" in chat["intent"].get("type")):
|
139
|
+
chat_history += f"User: {chat['intent']['query']}\n"
|
140
|
+
chat_history += f"{agent_name}: [generated image redacted for space]\n"
|
141
|
+
elif chat["by"] == "khoj" and ("excalidraw" in chat["intent"].get("type")):
|
142
|
+
chat_history += f"User: {chat['intent']['query']}\n"
|
143
|
+
chat_history += f"{agent_name}: {chat['intent']['inferred-queries'][0]}\n"
|
144
|
+
return chat_history
|
145
|
+
|
146
|
+
|
147
|
+
def construct_tool_chat_history(
|
148
|
+
previous_iterations: List[InformationCollectionIteration], tool: ConversationCommand = None
|
149
|
+
) -> Dict[str, list]:
|
150
|
+
chat_history: list = []
|
151
|
+
inferred_query_extractor: Callable[[InformationCollectionIteration], List[str]] = lambda x: []
|
152
|
+
if tool == ConversationCommand.Notes:
|
153
|
+
inferred_query_extractor = (
|
154
|
+
lambda iteration: [c["query"] for c in iteration.context] if iteration.context else []
|
155
|
+
)
|
156
|
+
elif tool == ConversationCommand.Online:
|
157
|
+
inferred_query_extractor = (
|
158
|
+
lambda iteration: list(iteration.onlineContext.keys()) if iteration.onlineContext else []
|
159
|
+
)
|
160
|
+
elif tool == ConversationCommand.Code:
|
161
|
+
inferred_query_extractor = lambda iteration: list(iteration.codeContext.keys()) if iteration.codeContext else []
|
162
|
+
for iteration in previous_iterations:
|
163
|
+
chat_history += [
|
164
|
+
{
|
165
|
+
"by": "you",
|
166
|
+
"message": iteration.query,
|
167
|
+
},
|
168
|
+
{
|
169
|
+
"by": "khoj",
|
170
|
+
"intent": {
|
171
|
+
"type": "remember",
|
172
|
+
"inferred-queries": inferred_query_extractor(iteration),
|
173
|
+
"query": iteration.query,
|
174
|
+
},
|
175
|
+
"message": iteration.summarizedResult,
|
176
|
+
},
|
177
|
+
]
|
178
|
+
|
179
|
+
return {"chat": chat_history}
|
180
|
+
|
181
|
+
|
182
|
+
class ChatEvent(Enum):
|
183
|
+
START_LLM_RESPONSE = "start_llm_response"
|
184
|
+
END_LLM_RESPONSE = "end_llm_response"
|
185
|
+
MESSAGE = "message"
|
186
|
+
REFERENCES = "references"
|
187
|
+
STATUS = "status"
|
188
|
+
|
189
|
+
|
85
190
|
def message_to_log(
|
86
|
-
user_message,
|
191
|
+
user_message,
|
192
|
+
chat_response,
|
193
|
+
user_message_metadata={},
|
194
|
+
khoj_message_metadata={},
|
195
|
+
conversation_log=[],
|
196
|
+
train_of_thought=[],
|
87
197
|
):
|
88
198
|
"""Create json logs from messages, metadata for conversation log"""
|
89
199
|
default_khoj_message_metadata = {
|
@@ -111,12 +221,15 @@ def save_to_conversation_log(
|
|
111
221
|
user_message_time: str = None,
|
112
222
|
compiled_references: List[Dict[str, Any]] = [],
|
113
223
|
online_results: Dict[str, Any] = {},
|
224
|
+
code_results: Dict[str, Any] = {},
|
114
225
|
inferred_queries: List[str] = [],
|
115
226
|
intent_type: str = "remember",
|
116
227
|
client_application: ClientApplication = None,
|
117
228
|
conversation_id: str = None,
|
118
229
|
automation_id: str = None,
|
119
230
|
query_images: List[str] = None,
|
231
|
+
tracer: Dict[str, Any] = {},
|
232
|
+
train_of_thought: List[Any] = [],
|
120
233
|
):
|
121
234
|
user_message_time = user_message_time or datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
122
235
|
updated_conversation = message_to_log(
|
@@ -130,9 +243,12 @@ def save_to_conversation_log(
|
|
130
243
|
"context": compiled_references,
|
131
244
|
"intent": {"inferred-queries": inferred_queries, "type": intent_type},
|
132
245
|
"onlineContext": online_results,
|
246
|
+
"codeContext": code_results,
|
133
247
|
"automationId": automation_id,
|
248
|
+
"trainOfThought": train_of_thought,
|
134
249
|
},
|
135
250
|
conversation_log=meta_log.get("chat", []),
|
251
|
+
train_of_thought=train_of_thought,
|
136
252
|
)
|
137
253
|
ConversationAdapters.save_conversation(
|
138
254
|
user,
|
@@ -142,6 +258,9 @@ def save_to_conversation_log(
|
|
142
258
|
user_message=q,
|
143
259
|
)
|
144
260
|
|
261
|
+
if in_debug_mode() or state.verbose > 1:
|
262
|
+
merge_message_into_conversation_trace(q, chat_response, tracer)
|
263
|
+
|
145
264
|
logger.info(
|
146
265
|
f"""
|
147
266
|
Saved Conversation Turn
|
@@ -323,9 +442,23 @@ def reciprocal_conversation_to_chatml(message_pair):
|
|
323
442
|
return [ChatMessage(content=message, role=role) for message, role in zip(message_pair, ["user", "assistant"])]
|
324
443
|
|
325
444
|
|
326
|
-
def
|
327
|
-
"""Remove any markdown json codeblock formatting if present. Useful for non schema enforceable models"""
|
328
|
-
return response.removeprefix("```json").removesuffix("```")
|
445
|
+
def clean_json(response: str):
|
446
|
+
"""Remove any markdown json codeblock and newline formatting if present. Useful for non schema enforceable models"""
|
447
|
+
return response.strip().replace("\n", "").removeprefix("```json").removesuffix("```")
|
448
|
+
|
449
|
+
|
450
|
+
def clean_code_python(code: str):
|
451
|
+
"""Remove any markdown codeblock and newline formatting if present. Useful for non schema enforceable models"""
|
452
|
+
return code.strip().removeprefix("```python").removesuffix("```")
|
453
|
+
|
454
|
+
|
455
|
+
def defilter_query(query: str):
|
456
|
+
"""Remove any query filters in query"""
|
457
|
+
defiltered_query = query
|
458
|
+
filters: List[BaseFilter] = [WordFilter(), FileFilter(), DateFilter()]
|
459
|
+
for filter in filters:
|
460
|
+
defiltered_query = filter.defilter(defiltered_query)
|
461
|
+
return defiltered_query
|
329
462
|
|
330
463
|
|
331
464
|
@dataclass
|
@@ -354,3 +487,163 @@ def get_image_from_url(image_url: str, type="pil"):
|
|
354
487
|
except requests.exceptions.RequestException as e:
|
355
488
|
logger.error(f"Failed to get image from URL {image_url}: {e}")
|
356
489
|
return ImageWithType(content=None, type=None)
|
490
|
+
|
491
|
+
|
492
|
+
def commit_conversation_trace(
|
493
|
+
session: list[ChatMessage],
|
494
|
+
response: str | list[dict],
|
495
|
+
tracer: dict,
|
496
|
+
system_message: str | list[dict] = "",
|
497
|
+
repo_path: str = "/tmp/promptrace",
|
498
|
+
) -> str:
|
499
|
+
"""
|
500
|
+
Save trace of conversation step using git. Useful to visualize, compare and debug traces.
|
501
|
+
Returns the path to the repository.
|
502
|
+
"""
|
503
|
+
# Serialize session, system message and response to yaml
|
504
|
+
system_message_yaml = yaml.dump(system_message, allow_unicode=True, sort_keys=False, default_flow_style=False)
|
505
|
+
response_yaml = yaml.dump(response, allow_unicode=True, sort_keys=False, default_flow_style=False)
|
506
|
+
formatted_session = [{"role": message.role, "content": message.content} for message in session]
|
507
|
+
session_yaml = yaml.dump(formatted_session, allow_unicode=True, sort_keys=False, default_flow_style=False)
|
508
|
+
query = (
|
509
|
+
yaml.dump(session[-1].content, allow_unicode=True, sort_keys=False, default_flow_style=False)
|
510
|
+
.strip()
|
511
|
+
.removeprefix("'")
|
512
|
+
.removesuffix("'")
|
513
|
+
) # Extract serialized query from chat session
|
514
|
+
|
515
|
+
# Extract chat metadata for session
|
516
|
+
uid, cid, mid = tracer.get("uid", "main"), tracer.get("cid", "main"), tracer.get("mid")
|
517
|
+
|
518
|
+
# Infer repository path from environment variable or provided path
|
519
|
+
repo_path = os.getenv("PROMPTRACE_DIR", repo_path)
|
520
|
+
|
521
|
+
try:
|
522
|
+
# Prepare git repository
|
523
|
+
os.makedirs(repo_path, exist_ok=True)
|
524
|
+
repo = Repo.init(repo_path)
|
525
|
+
|
526
|
+
# Remove post-commit hook if it exists
|
527
|
+
hooks_dir = os.path.join(repo_path, ".git", "hooks")
|
528
|
+
post_commit_hook = os.path.join(hooks_dir, "post-commit")
|
529
|
+
if os.path.exists(post_commit_hook):
|
530
|
+
os.remove(post_commit_hook)
|
531
|
+
|
532
|
+
# Configure git user if not set
|
533
|
+
if not repo.config_reader().has_option("user", "email"):
|
534
|
+
repo.config_writer().set_value("user", "name", "Prompt Tracer").release()
|
535
|
+
repo.config_writer().set_value("user", "email", "promptracer@khoj.dev").release()
|
536
|
+
|
537
|
+
# Create an initial commit if the repository is newly created
|
538
|
+
if not repo.head.is_valid():
|
539
|
+
repo.index.commit("And then there was a trace")
|
540
|
+
|
541
|
+
# Check out the initial commit
|
542
|
+
initial_commit = repo.commit("HEAD~0")
|
543
|
+
repo.head.reference = initial_commit
|
544
|
+
repo.head.reset(index=True, working_tree=True)
|
545
|
+
|
546
|
+
# Create or switch to user branch from initial commit
|
547
|
+
user_branch = f"u_{uid}"
|
548
|
+
if user_branch not in repo.branches:
|
549
|
+
repo.create_head(user_branch)
|
550
|
+
repo.heads[user_branch].checkout()
|
551
|
+
|
552
|
+
# Create or switch to conversation branch from user branch
|
553
|
+
conv_branch = f"c_{cid}"
|
554
|
+
if conv_branch not in repo.branches:
|
555
|
+
repo.create_head(conv_branch)
|
556
|
+
repo.heads[conv_branch].checkout()
|
557
|
+
|
558
|
+
# Create or switch to message branch from conversation branch
|
559
|
+
msg_branch = f"m_{mid}" if mid else None
|
560
|
+
if msg_branch and msg_branch not in repo.branches:
|
561
|
+
repo.create_head(msg_branch)
|
562
|
+
if msg_branch:
|
563
|
+
repo.heads[msg_branch].checkout()
|
564
|
+
|
565
|
+
# Include file with content to commit
|
566
|
+
files_to_commit = {"query": session_yaml, "response": response_yaml, "system_prompt": system_message_yaml}
|
567
|
+
|
568
|
+
# Write files and stage them
|
569
|
+
for filename, content in files_to_commit.items():
|
570
|
+
file_path = os.path.join(repo_path, filename)
|
571
|
+
# Unescape special characters in content for better readability
|
572
|
+
content = content.strip().replace("\\n", "\n").replace("\\t", "\t")
|
573
|
+
with open(file_path, "w", encoding="utf-8") as f:
|
574
|
+
f.write(content)
|
575
|
+
repo.index.add([filename])
|
576
|
+
|
577
|
+
# Create commit
|
578
|
+
metadata_yaml = yaml.dump(tracer, allow_unicode=True, sort_keys=False, default_flow_style=False)
|
579
|
+
commit_message = f"""
|
580
|
+
{query[:250]}
|
581
|
+
|
582
|
+
Response:
|
583
|
+
---
|
584
|
+
{response[:500]}...
|
585
|
+
|
586
|
+
Metadata
|
587
|
+
---
|
588
|
+
{metadata_yaml}
|
589
|
+
""".strip()
|
590
|
+
|
591
|
+
repo.index.commit(commit_message)
|
592
|
+
|
593
|
+
logger.debug(f"Saved conversation trace to repo at {repo_path}")
|
594
|
+
return repo_path
|
595
|
+
except Exception as e:
|
596
|
+
logger.error(f"Failed to add conversation trace to repo: {str(e)}", exc_info=True)
|
597
|
+
return None
|
598
|
+
|
599
|
+
|
600
|
+
def merge_message_into_conversation_trace(query: str, response: str, tracer: dict, repo_path="/tmp/promptrace") -> bool:
|
601
|
+
"""
|
602
|
+
Merge the message branch into its parent conversation branch.
|
603
|
+
|
604
|
+
Args:
|
605
|
+
query: User query
|
606
|
+
response: Assistant response
|
607
|
+
tracer: Dictionary containing uid, cid and mid
|
608
|
+
repo_path: Path to the git repository
|
609
|
+
|
610
|
+
Returns:
|
611
|
+
bool: True if merge was successful, False otherwise
|
612
|
+
"""
|
613
|
+
try:
|
614
|
+
# Extract branch names
|
615
|
+
msg_branch = f"m_{tracer['mid']}"
|
616
|
+
conv_branch = f"c_{tracer['cid']}"
|
617
|
+
|
618
|
+
# Infer repository path from environment variable or provided path
|
619
|
+
repo_path = os.getenv("PROMPTRACE_DIR", repo_path)
|
620
|
+
repo = Repo(repo_path)
|
621
|
+
|
622
|
+
# Checkout conversation branch
|
623
|
+
repo.heads[conv_branch].checkout()
|
624
|
+
|
625
|
+
# Create commit message
|
626
|
+
metadata_yaml = yaml.dump(tracer, allow_unicode=True, sort_keys=False, default_flow_style=False)
|
627
|
+
commit_message = f"""
|
628
|
+
{query[:250]}
|
629
|
+
|
630
|
+
Response:
|
631
|
+
---
|
632
|
+
{response[:500]}...
|
633
|
+
|
634
|
+
Metadata
|
635
|
+
---
|
636
|
+
{metadata_yaml}
|
637
|
+
""".strip()
|
638
|
+
|
639
|
+
# Merge message branch into conversation branch
|
640
|
+
repo.git.merge(msg_branch, no_ff=True, m=commit_message)
|
641
|
+
|
642
|
+
# Delete message branch after merge
|
643
|
+
repo.delete_head(msg_branch, force=True)
|
644
|
+
|
645
|
+
logger.debug(f"Successfully merged {msg_branch} into {conv_branch}")
|
646
|
+
return True
|
647
|
+
except Exception as e:
|
648
|
+
logger.error(f"Failed to merge message {msg_branch} into conversation {conv_branch}: {str(e)}", exc_info=True)
|
649
|
+
return False
|
khoj/processor/image/generate.py
CHANGED
@@ -28,6 +28,7 @@ async def text_to_image(
|
|
28
28
|
send_status_func: Optional[Callable] = None,
|
29
29
|
query_images: Optional[List[str]] = None,
|
30
30
|
agent: Agent = None,
|
31
|
+
tracer: dict = {},
|
31
32
|
):
|
32
33
|
status_code = 200
|
33
34
|
image = None
|
@@ -68,6 +69,7 @@ async def text_to_image(
|
|
68
69
|
query_images=query_images,
|
69
70
|
user=user,
|
70
71
|
agent=agent,
|
72
|
+
tracer=tracer,
|
71
73
|
)
|
72
74
|
|
73
75
|
if send_status_func:
|