khoj 1.42.1.dev10__py3-none-any.whl → 1.42.2.dev16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. khoj/configure.py +2 -0
  2. khoj/database/adapters/__init__.py +9 -7
  3. khoj/database/models/__init__.py +9 -9
  4. khoj/interface/compiled/404/index.html +2 -2
  5. khoj/interface/compiled/_next/static/chunks/7127-79a3af5138960272.js +1 -0
  6. khoj/interface/compiled/_next/static/chunks/{5138-2cce449fd2454abf.js → 7211-7fedd2ee3655239c.js} +1 -1
  7. khoj/interface/compiled/_next/static/chunks/app/automations/page-ef89ac958e78aa81.js +1 -0
  8. khoj/interface/compiled/_next/static/chunks/app/chat/page-db0fbea54ccea62f.js +1 -0
  9. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-9a167dc9b5fcd464.js → page-da90c78180a86040.js} +1 -1
  10. khoj/interface/compiled/_next/static/chunks/{webpack-964e8ed3380daff1.js → webpack-0f15e6b51732b337.js} +1 -1
  11. khoj/interface/compiled/_next/static/css/{9c223d337a984468.css → 7017ee76c2f2cd87.css} +1 -1
  12. khoj/interface/compiled/_next/static/css/9a460202d29476e5.css +1 -0
  13. khoj/interface/compiled/agents/index.html +2 -2
  14. khoj/interface/compiled/agents/index.txt +1 -1
  15. khoj/interface/compiled/automations/index.html +2 -2
  16. khoj/interface/compiled/automations/index.txt +2 -2
  17. khoj/interface/compiled/chat/index.html +2 -2
  18. khoj/interface/compiled/chat/index.txt +2 -2
  19. khoj/interface/compiled/index.html +2 -2
  20. khoj/interface/compiled/index.txt +1 -1
  21. khoj/interface/compiled/search/index.html +2 -2
  22. khoj/interface/compiled/search/index.txt +1 -1
  23. khoj/interface/compiled/settings/index.html +2 -2
  24. khoj/interface/compiled/settings/index.txt +1 -1
  25. khoj/interface/compiled/share/chat/index.html +2 -2
  26. khoj/interface/compiled/share/chat/index.txt +2 -2
  27. khoj/processor/conversation/anthropic/anthropic_chat.py +19 -134
  28. khoj/processor/conversation/anthropic/utils.py +1 -1
  29. khoj/processor/conversation/google/gemini_chat.py +20 -141
  30. khoj/processor/conversation/offline/chat_model.py +23 -153
  31. khoj/processor/conversation/openai/gpt.py +14 -128
  32. khoj/processor/conversation/prompts.py +2 -63
  33. khoj/processor/conversation/utils.py +94 -89
  34. khoj/processor/image/generate.py +16 -11
  35. khoj/processor/operator/__init__.py +2 -3
  36. khoj/processor/operator/operator_agent_binary.py +11 -11
  37. khoj/processor/tools/online_search.py +9 -3
  38. khoj/processor/tools/run_code.py +5 -5
  39. khoj/routers/api.py +5 -527
  40. khoj/routers/api_automation.py +243 -0
  41. khoj/routers/api_chat.py +48 -129
  42. khoj/routers/helpers.py +371 -121
  43. khoj/routers/research.py +11 -43
  44. khoj/utils/helpers.py +0 -6
  45. {khoj-1.42.1.dev10.dist-info → khoj-1.42.2.dev16.dist-info}/METADATA +1 -1
  46. {khoj-1.42.1.dev10.dist-info → khoj-1.42.2.dev16.dist-info}/RECORD +51 -50
  47. khoj/interface/compiled/_next/static/chunks/7127-d3199617463d45f0.js +0 -1
  48. khoj/interface/compiled/_next/static/chunks/app/automations/page-465741d9149dfd48.js +0 -1
  49. khoj/interface/compiled/_next/static/chunks/app/chat/page-898079bcea5376f4.js +0 -1
  50. khoj/interface/compiled/_next/static/css/fca983d49c3dd1a3.css +0 -1
  51. /khoj/interface/compiled/_next/static/{2niR8lV9_OpGs1vdb2yMp → OTsOjbrtuaYMukpuJS4sy}/_buildManifest.js +0 -0
  52. /khoj/interface/compiled/_next/static/{2niR8lV9_OpGs1vdb2yMp → OTsOjbrtuaYMukpuJS4sy}/_ssgManifest.js +0 -0
  53. {khoj-1.42.1.dev10.dist-info → khoj-1.42.2.dev16.dist-info}/WHEEL +0 -0
  54. {khoj-1.42.1.dev10.dist-info → khoj-1.42.2.dev16.dist-info}/entry_points.txt +0 -0
  55. {khoj-1.42.1.dev10.dist-info → khoj-1.42.2.dev16.dist-info}/licenses/LICENSE +0 -0
@@ -1,14 +1,11 @@
1
- import asyncio
2
1
  import logging
3
- from datetime import datetime, timedelta
2
+ from datetime import datetime
4
3
  from typing import AsyncGenerator, Dict, List, Optional
5
4
 
6
- import pyjson5
7
- from langchain_core.messages.chat import ChatMessage
8
5
  from openai.lib._pydantic import _ensure_strict_json_schema
9
6
  from pydantic import BaseModel
10
7
 
11
- from khoj.database.models import Agent, ChatModel, KhojUser
8
+ from khoj.database.models import Agent, ChatMessageModel, ChatModel
12
9
  from khoj.processor.conversation import prompts
13
10
  from khoj.processor.conversation.openai.utils import (
14
11
  chat_completion_with_backoff,
@@ -19,105 +16,16 @@ from khoj.processor.conversation.utils import (
19
16
  JsonSupport,
20
17
  OperatorRun,
21
18
  ResponseWithThought,
22
- clean_json,
23
- construct_question_history,
24
- construct_structured_message,
25
19
  generate_chatml_messages_with_context,
26
20
  messages_to_print,
27
21
  )
28
- from khoj.utils.helpers import (
29
- ConversationCommand,
30
- is_none_or_empty,
31
- truncate_code_context,
32
- )
22
+ from khoj.utils.helpers import is_none_or_empty, truncate_code_context
33
23
  from khoj.utils.rawconfig import FileAttachment, LocationData
34
24
  from khoj.utils.yaml import yaml_dump
35
25
 
36
26
  logger = logging.getLogger(__name__)
37
27
 
38
28
 
39
- def extract_questions(
40
- text,
41
- model: Optional[str] = "gpt-4o-mini",
42
- conversation_log={},
43
- api_key=None,
44
- api_base_url=None,
45
- location_data: LocationData = None,
46
- user: KhojUser = None,
47
- query_images: Optional[list[str]] = None,
48
- vision_enabled: bool = False,
49
- personality_context: Optional[str] = None,
50
- query_files: str = None,
51
- tracer: dict = {},
52
- ):
53
- """
54
- Infer search queries to retrieve relevant notes to answer user query
55
- """
56
- location = f"{location_data}" if location_data else "Unknown"
57
- username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else ""
58
-
59
- # Extract Past User Message and Inferred Questions from Conversation Log
60
- chat_history = construct_question_history(conversation_log)
61
-
62
- # Get dates relative to today for prompt creation
63
- today = datetime.today()
64
- current_new_year = today.replace(month=1, day=1)
65
- last_new_year = current_new_year.replace(year=today.year - 1)
66
-
67
- prompt = prompts.extract_questions.format(
68
- current_date=today.strftime("%Y-%m-%d"),
69
- day_of_week=today.strftime("%A"),
70
- current_month=today.strftime("%Y-%m"),
71
- last_new_year=last_new_year.strftime("%Y"),
72
- last_new_year_date=last_new_year.strftime("%Y-%m-%d"),
73
- current_new_year_date=current_new_year.strftime("%Y-%m-%d"),
74
- bob_tom_age_difference={current_new_year.year - 1984 - 30},
75
- bob_age={current_new_year.year - 1984},
76
- chat_history=chat_history,
77
- text=text,
78
- yesterday_date=(today - timedelta(days=1)).strftime("%Y-%m-%d"),
79
- location=location,
80
- username=username,
81
- personality_context=personality_context,
82
- )
83
-
84
- prompt = construct_structured_message(
85
- message=prompt,
86
- images=query_images,
87
- model_type=ChatModel.ModelType.OPENAI,
88
- vision_enabled=vision_enabled,
89
- attached_file_context=query_files,
90
- )
91
-
92
- messages = []
93
- messages.append(ChatMessage(content=prompt, role="user"))
94
-
95
- response = send_message_to_model(
96
- messages,
97
- api_key,
98
- model,
99
- response_type="json_object",
100
- api_base_url=api_base_url,
101
- tracer=tracer,
102
- )
103
-
104
- # Extract, Clean Message from GPT's Response
105
- try:
106
- response = clean_json(response)
107
- response = pyjson5.loads(response)
108
- response = [q.strip() for q in response["queries"] if q.strip()]
109
- if not isinstance(response, list) or not response:
110
- logger.error(f"Invalid response for constructing subqueries: {response}")
111
- return [text]
112
- return response
113
- except:
114
- logger.warning(f"GPT returned invalid JSON. Falling back to using user message as search query.\n{response}")
115
- questions = [text]
116
-
117
- logger.debug(f"Extracted Questions by GPT: {questions}")
118
- return questions
119
-
120
-
121
29
  def send_message_to_model(
122
30
  messages,
123
31
  api_key,
@@ -161,32 +69,32 @@ def send_message_to_model(
161
69
 
162
70
 
163
71
  async def converse_openai(
72
+ # Query
164
73
  user_query: str,
74
+ # Context
165
75
  references: list[dict],
166
76
  online_results: Optional[Dict[str, Dict]] = None,
167
77
  code_results: Optional[Dict[str, Dict]] = None,
168
78
  operator_results: Optional[List[OperatorRun]] = None,
169
- conversation_log={},
79
+ query_images: Optional[list[str]] = None,
80
+ query_files: str = None,
81
+ generated_files: List[FileAttachment] = None,
82
+ generated_asset_results: Dict[str, Dict] = {},
83
+ program_execution_context: List[str] = None,
84
+ location_data: LocationData = None,
85
+ chat_history: list[ChatMessageModel] = [],
170
86
  model: str = "gpt-4o-mini",
171
87
  api_key: Optional[str] = None,
172
88
  api_base_url: Optional[str] = None,
173
89
  temperature: float = 0.4,
174
- completion_func=None,
175
- conversation_commands=[ConversationCommand.Default],
176
90
  max_prompt_size=None,
177
91
  tokenizer_name=None,
178
- location_data: LocationData = None,
179
92
  user_name: str = None,
180
93
  agent: Agent = None,
181
- query_images: Optional[list[str]] = None,
182
94
  vision_available: bool = False,
183
- query_files: str = None,
184
- generated_files: List[FileAttachment] = None,
185
- generated_asset_results: Dict[str, Dict] = {},
186
- program_execution_context: List[str] = None,
187
95
  deepthought: Optional[bool] = False,
188
96
  tracer: dict = {},
189
- ) -> AsyncGenerator[str | ResponseWithThought, None]:
97
+ ) -> AsyncGenerator[ResponseWithThought, None]:
190
98
  """
191
99
  Converse with user using OpenAI's ChatGPT
192
100
  """
@@ -214,20 +122,6 @@ async def converse_openai(
214
122
  user_name_prompt = prompts.user_name.format(name=user_name)
215
123
  system_prompt = f"{system_prompt}\n{user_name_prompt}"
216
124
 
217
- # Get Conversation Primer appropriate to Conversation Type
218
- if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
219
- response = prompts.no_notes_found.format()
220
- if completion_func:
221
- asyncio.create_task(completion_func(chat_response=response))
222
- yield response
223
- return
224
- elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
225
- response = prompts.no_online_results_found.format()
226
- if completion_func:
227
- asyncio.create_task(completion_func(chat_response=response))
228
- yield response
229
- return
230
-
231
125
  context_message = ""
232
126
  if not is_none_or_empty(references):
233
127
  context_message = f"{prompts.notes_conversation.format(references=yaml_dump(references))}\n\n"
@@ -251,7 +145,7 @@ async def converse_openai(
251
145
  messages = generate_chatml_messages_with_context(
252
146
  user_query,
253
147
  system_prompt,
254
- conversation_log,
148
+ chat_history,
255
149
  context_message=context_message,
256
150
  model_name=model,
257
151
  max_prompt_size=max_prompt_size,
@@ -267,7 +161,6 @@ async def converse_openai(
267
161
  logger.debug(f"Conversation Context for GPT: {messages_to_print(messages)}")
268
162
 
269
163
  # Get Response from GPT
270
- full_response = ""
271
164
  async for chunk in chat_completion_with_backoff(
272
165
  messages=messages,
273
166
  model_name=model,
@@ -275,17 +168,10 @@ async def converse_openai(
275
168
  openai_api_key=api_key,
276
169
  api_base_url=api_base_url,
277
170
  deepthought=deepthought,
278
- model_kwargs={"stop": ["Notes:\n["]},
279
171
  tracer=tracer,
280
172
  ):
281
- if chunk.response:
282
- full_response += chunk.response
283
173
  yield chunk
284
174
 
285
- # Call completion_func once finish streaming and we have the full response
286
- if completion_func:
287
- asyncio.create_task(completion_func(chat_response=full_response))
288
-
289
175
 
290
176
  def clean_response_schema(schema: BaseModel | dict) -> dict:
291
177
  """
@@ -549,68 +549,7 @@ Q: {query}
549
549
  )
550
550
 
551
551
 
552
- extract_questions = PromptTemplate.from_template(
553
- """
554
- You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes and documents.
555
- Construct search queries to retrieve relevant information to answer the user's question.
556
- - You will be provided example and actual past user questions(Q), search queries(Khoj) and answers(A) for context.
557
- - Add as much context from the previous questions and answers as required into your search queries.
558
- - Break your search down into multiple search queries from a diverse set of lenses to retrieve all related documents.
559
- - Add date filters to your search queries from questions and answers when required to retrieve the relevant information.
560
- - When asked a meta, vague or random questions, search for a variety of broad topics to answer the user's question.
561
- {personality_context}
562
- What searches will you perform to answer the user's question? Respond with search queries as list of strings in a JSON object.
563
- Current Date: {day_of_week}, {current_date}
564
- User's Location: {location}
565
- {username}
566
-
567
- Examples
568
- ---
569
- Q: How was my trip to Cambodia?
570
- Khoj: {{"queries": ["How was my trip to Cambodia?", "Angkor Wat temple visit", "Flight to Phnom Penh", "Expenses in Cambodia", "Stay in Cambodia"]}}
571
- A: The trip was amazing. You went to the Angkor Wat temple and it was beautiful.
572
-
573
- Q: Who did i visit that temple with?
574
- Khoj: {{"queries": ["Who did I visit the Angkor Wat Temple in Cambodia with?"]}}
575
- A: You visited the Angkor Wat Temple in Cambodia with Pablo, Namita and Xi.
576
-
577
- Q: What national parks did I go to last year?
578
- Khoj: {{"queries": ["National park I visited in {last_new_year} dt>='{last_new_year_date}' dt<'{current_new_year_date}'"]}}
579
- A: You visited the Grand Canyon and Yellowstone National Park in {last_new_year}.
580
-
581
- Q: How can you help me?
582
- Khoj: {{"queries": ["Social relationships", "Physical and mental health", "Education and career", "Personal life goals and habits"]}}
583
- A: I can help you live healthier and happier across work and personal life
584
-
585
- Q: How many tennis balls fit in the back of a 2002 Honda Civic?
586
- Khoj: {{"queries": ["What is the size of a tennis ball?", "What is the trunk size of a 2002 Honda Civic?"]}}
587
- A: 1085 tennis balls will fit in the trunk of a Honda Civic
588
-
589
- Q: Share some random, interesting experiences from this month
590
- Khoj: {{"queries": ["Exciting travel adventures from {current_month}", "Fun social events dt>='{current_month}-01' dt<'{current_date}'", "Intense emotional experiences in {current_month}"]}}
591
- A: You had a great time at the local beach with your friends, attended a music concert and had a deep conversation with your friend, Khalid.
592
-
593
- Q: Is Bob older than Tom?
594
- Khoj: {{"queries": ["When was Bob born?", "What is Tom's age?"]}}
595
- A: Yes, Bob is older than Tom. As Bob was born on 1984-01-01 and Tom is 30 years old.
596
-
597
- Q: What is their age difference?
598
- Khoj: {{"queries": ["What is Bob's age?", "What is Tom's age?"]}}
599
- A: Bob is {bob_tom_age_difference} years older than Tom. As Bob is {bob_age} years old and Tom is 30 years old.
600
-
601
- Q: Who all did I meet here yesterday?
602
- Khoj: {{"queries": ["Met in {location} on {yesterday_date} dt>='{yesterday_date}' dt<'{current_date}'"]}}
603
- A: Yesterday's note mentions your visit to your local beach with Ram and Shyam.
604
-
605
- Actual
606
- ---
607
- {chat_history}
608
- Q: {text}
609
- Khoj:
610
- """.strip()
611
- )
612
-
613
- extract_questions_anthropic_system_prompt = PromptTemplate.from_template(
552
+ extract_questions_system_prompt = PromptTemplate.from_template(
614
553
  """
615
554
  You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes.
616
555
  Construct search queries to retrieve relevant information to answer the user's question.
@@ -651,7 +590,7 @@ A: You had a great time at the local beach with your friends, attended a music c
651
590
  """.strip()
652
591
  )
653
592
 
654
- extract_questions_anthropic_user_message = PromptTemplate.from_template(
593
+ extract_questions_user_message = PromptTemplate.from_template(
655
594
  """
656
595
  Here's our most recent chat history:
657
596
  {chat_history}
@@ -24,7 +24,13 @@ from pydantic import BaseModel
24
24
  from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast
25
25
 
26
26
  from khoj.database.adapters import ConversationAdapters
27
- from khoj.database.models import ChatModel, ClientApplication, KhojUser
27
+ from khoj.database.models import (
28
+ ChatMessageModel,
29
+ ChatModel,
30
+ ClientApplication,
31
+ Intent,
32
+ KhojUser,
33
+ )
28
34
  from khoj.processor.conversation import prompts
29
35
  from khoj.processor.conversation.offline.utils import download_model, infer_max_tokens
30
36
  from khoj.search_filter.base_filter import BaseFilter
@@ -161,8 +167,8 @@ def construct_iteration_history(
161
167
  previous_iterations: List[ResearchIteration],
162
168
  previous_iteration_prompt: str,
163
169
  query: str = None,
164
- ) -> list[dict]:
165
- iteration_history: list[dict] = []
170
+ ) -> list[ChatMessageModel]:
171
+ iteration_history: list[ChatMessageModel] = []
166
172
  previous_iteration_messages: list[dict] = []
167
173
  for idx, iteration in enumerate(previous_iterations):
168
174
  iteration_data = previous_iteration_prompt.format(
@@ -176,46 +182,46 @@ def construct_iteration_history(
176
182
 
177
183
  if previous_iteration_messages:
178
184
  if query:
179
- iteration_history.append({"by": "you", "message": query})
185
+ iteration_history.append(ChatMessageModel(by="you", message=query))
180
186
  iteration_history.append(
181
- {
182
- "by": "khoj",
183
- "intent": {"type": "remember", "query": query},
184
- "message": previous_iteration_messages,
185
- }
187
+ ChatMessageModel(
188
+ by="khoj",
189
+ intent={"type": "remember", "query": query},
190
+ message=previous_iteration_messages,
191
+ )
186
192
  )
187
193
  return iteration_history
188
194
 
189
195
 
190
- def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="AI") -> str:
191
- chat_history = ""
192
- for chat in conversation_history.get("chat", [])[-n:]:
193
- if chat["by"] == "khoj" and chat["intent"].get("type") in ["remember", "reminder", "summarize"]:
194
- if chat["intent"].get("inferred-queries"):
195
- chat_history += f'{agent_name}: {{"queries": {chat["intent"].get("inferred-queries")}}}\n'
196
- chat_history += f"{agent_name}: {chat['message']}\n\n"
197
- elif chat["by"] == "khoj" and chat.get("images"):
198
- chat_history += f"User: {chat['intent']['query']}\n"
199
- chat_history += f"{agent_name}: [generated image redacted for space]\n"
200
- elif chat["by"] == "khoj" and ("excalidraw" in chat["intent"].get("type")):
201
- chat_history += f"User: {chat['intent']['query']}\n"
202
- chat_history += f"{agent_name}: {chat['intent']['inferred-queries'][0]}\n"
203
- elif chat["by"] == "you":
204
- chat_history += f"User: {chat['message']}\n"
205
- raw_query_files = chat.get("queryFiles")
196
+ def construct_chat_history(chat_history: list[ChatMessageModel], n: int = 4, agent_name="AI") -> str:
197
+ chat_history_str = ""
198
+ for chat in chat_history[-n:]:
199
+ if chat.by == "khoj" and chat.intent.type in ["remember", "reminder", "summarize"]:
200
+ if chat.intent.inferred_queries:
201
+ chat_history_str += f'{agent_name}: {{"queries": {chat.intent.inferred_queries}}}\n'
202
+ chat_history_str += f"{agent_name}: {chat.message}\n\n"
203
+ elif chat.by == "khoj" and chat.images:
204
+ chat_history_str += f"User: {chat.intent.query}\n"
205
+ chat_history_str += f"{agent_name}: [generated image redacted for space]\n"
206
+ elif chat.by == "khoj" and ("excalidraw" in chat.intent.type):
207
+ chat_history_str += f"User: {chat.intent.query}\n"
208
+ chat_history_str += f"{agent_name}: {chat.intent.inferred_queries[0]}\n"
209
+ elif chat.by == "you":
210
+ chat_history_str += f"User: {chat.message}\n"
211
+ raw_query_files = chat.queryFiles
206
212
  if raw_query_files:
207
213
  query_files: Dict[str, str] = {}
208
214
  for file in raw_query_files:
209
215
  query_files[file["name"]] = file["content"]
210
216
 
211
217
  query_file_context = gather_raw_query_files(query_files)
212
- chat_history += f"User: {query_file_context}\n"
218
+ chat_history_str += f"User: {query_file_context}\n"
213
219
 
214
- return chat_history
220
+ return chat_history_str
215
221
 
216
222
 
217
223
  def construct_question_history(
218
- conversation_log: dict,
224
+ conversation_log: list[ChatMessageModel],
219
225
  include_query: bool = True,
220
226
  lookback: int = 6,
221
227
  query_prefix: str = "Q",
@@ -226,16 +232,16 @@ def construct_question_history(
226
232
  """
227
233
  history_parts = ""
228
234
  original_query = None
229
- for chat in conversation_log.get("chat", [])[-lookback:]:
230
- if chat["by"] == "you":
231
- original_query = chat.get("message")
235
+ for chat in conversation_log[-lookback:]:
236
+ if chat.by == "you":
237
+ original_query = json.dumps(chat.message)
232
238
  history_parts += f"{query_prefix}: {original_query}\n"
233
- if chat["by"] == "khoj":
239
+ if chat.by == "khoj":
234
240
  if original_query is None:
235
241
  continue
236
242
 
237
- message = chat.get("message", "")
238
- inferred_queries_list = chat.get("intent", {}).get("inferred-queries")
243
+ message = chat.message
244
+ inferred_queries_list = chat.intent.inferred_queries or []
239
245
 
240
246
  # Ensure inferred_queries_list is a list, defaulting to the original query in a list
241
247
  if not inferred_queries_list:
@@ -246,7 +252,7 @@ def construct_question_history(
246
252
 
247
253
  if include_query:
248
254
  # Ensure 'type' exists and is a string before checking 'to-image'
249
- intent_type = chat.get("intent", {}).get("type", "")
255
+ intent_type = chat.intent.type if chat.intent and chat.intent.type else ""
250
256
  if "to-image" not in intent_type:
251
257
  history_parts += f'{agent_name}: {{"queries": {inferred_queries_list}}}\n'
252
258
  history_parts += f"A: {message}\n\n"
@@ -259,7 +265,7 @@ def construct_question_history(
259
265
  return history_parts
260
266
 
261
267
 
262
- def construct_chat_history_for_operator(conversation_history: dict, n: int = 6) -> list[AgentMessage]:
268
+ def construct_chat_history_for_operator(conversation_history: List[ChatMessageModel], n: int = 6) -> list[AgentMessage]:
263
269
  """
264
270
  Construct chat history for operator agent in conversation log.
265
271
  Only include last n completed turns (i.e with user and khoj message).
@@ -267,22 +273,22 @@ def construct_chat_history_for_operator(conversation_history: dict, n: int = 6)
267
273
  chat_history: list[AgentMessage] = []
268
274
  user_message: Optional[AgentMessage] = None
269
275
 
270
- for chat in conversation_history.get("chat", []):
276
+ for chat in conversation_history:
271
277
  if len(chat_history) >= n:
272
278
  break
273
- if chat["by"] == "you" and chat.get("message"):
274
- content = [{"type": "text", "text": chat["message"]}]
275
- for file in chat.get("queryFiles", []):
279
+ if chat.by == "you" and chat.message:
280
+ content = [{"type": "text", "text": chat.message}]
281
+ for file in chat.queryFiles or []:
276
282
  content += [{"type": "text", "text": f'## File: {file["name"]}\n\n{file["content"]}'}]
277
283
  user_message = AgentMessage(role="user", content=content)
278
- elif chat["by"] == "khoj" and chat.get("message"):
279
- chat_history += [user_message, AgentMessage(role="assistant", content=chat["message"])]
284
+ elif chat.by == "khoj" and chat.message:
285
+ chat_history += [user_message, AgentMessage(role="assistant", content=chat.message)]
280
286
  return chat_history
281
287
 
282
288
 
283
289
  def construct_tool_chat_history(
284
290
  previous_iterations: List[ResearchIteration], tool: ConversationCommand = None
285
- ) -> Dict[str, list]:
291
+ ) -> List[ChatMessageModel]:
286
292
  """
287
293
  Construct chat history from previous iterations for a specific tool
288
294
 
@@ -313,22 +319,23 @@ def construct_tool_chat_history(
313
319
  tool or ConversationCommand(iteration.tool), base_extractor
314
320
  )
315
321
  chat_history += [
316
- {
317
- "by": "you",
318
- "message": iteration.query,
319
- },
320
- {
321
- "by": "khoj",
322
- "intent": {
323
- "type": "remember",
324
- "inferred-queries": inferred_query_extractor(iteration),
325
- "query": iteration.query,
326
- },
327
- "message": iteration.summarizedResult,
328
- },
322
+ ChatMessageModel(
323
+ by="you",
324
+ message=iteration.query,
325
+ ),
326
+ ChatMessageModel(
327
+ by="khoj",
328
+ intent=Intent(
329
+ type="remember",
330
+ query=iteration.query,
331
+ inferred_queries=inferred_query_extractor(iteration),
332
+ memory_type="notes",
333
+ ),
334
+ message=iteration.summarizedResult,
335
+ ),
329
336
  ]
330
337
 
331
- return {"chat": chat_history}
338
+ return chat_history
332
339
 
333
340
 
334
341
  class ChatEvent(Enum):
@@ -349,8 +356,8 @@ def message_to_log(
349
356
  chat_response,
350
357
  user_message_metadata={},
351
358
  khoj_message_metadata={},
352
- conversation_log=[],
353
- ):
359
+ chat_history: List[ChatMessageModel] = [],
360
+ ) -> List[ChatMessageModel]:
354
361
  """Create json logs from messages, metadata for conversation log"""
355
362
  default_khoj_message_metadata = {
356
363
  "intent": {"type": "remember", "memory-type": "notes", "query": user_message},
@@ -369,15 +376,17 @@ def message_to_log(
369
376
  khoj_log = merge_dicts(khoj_message_metadata, default_khoj_message_metadata)
370
377
  khoj_log = merge_dicts({"message": chat_response, "by": "khoj", "created": khoj_response_time}, khoj_log)
371
378
 
372
- conversation_log.extend([human_log, khoj_log])
373
- return conversation_log
379
+ human_message = ChatMessageModel(**human_log)
380
+ khoj_message = ChatMessageModel(**khoj_log)
381
+ chat_history.extend([human_message, khoj_message])
382
+ return chat_history
374
383
 
375
384
 
376
385
  async def save_to_conversation_log(
377
386
  q: str,
378
387
  chat_response: str,
379
388
  user: KhojUser,
380
- meta_log: Dict,
389
+ chat_history: List[ChatMessageModel],
381
390
  user_message_time: str = None,
382
391
  compiled_references: List[Dict[str, Any]] = [],
383
392
  online_results: Dict[str, Any] = {},
@@ -427,11 +436,11 @@ async def save_to_conversation_log(
427
436
  chat_response=chat_response,
428
437
  user_message_metadata=user_message_metadata,
429
438
  khoj_message_metadata=khoj_message_metadata,
430
- conversation_log=meta_log.get("chat", []),
439
+ chat_history=chat_history,
431
440
  )
432
441
  await ConversationAdapters.save_conversation(
433
442
  user,
434
- {"chat": updated_conversation},
443
+ updated_conversation,
435
444
  client_application=client_application,
436
445
  conversation_id=conversation_id,
437
446
  user_message=q,
@@ -502,7 +511,7 @@ def gather_raw_query_files(
502
511
  def generate_chatml_messages_with_context(
503
512
  user_message: str,
504
513
  system_message: str = None,
505
- conversation_log={},
514
+ chat_history: list[ChatMessageModel] = [],
506
515
  model_name="gpt-4o-mini",
507
516
  loaded_model: Optional[Llama] = None,
508
517
  max_prompt_size=None,
@@ -529,21 +538,21 @@ def generate_chatml_messages_with_context(
529
538
 
530
539
  # Extract Chat History for Context
531
540
  chatml_messages: List[ChatMessage] = []
532
- for chat in conversation_log.get("chat", []):
541
+ for chat in chat_history:
533
542
  message_context = []
534
543
  message_attached_files = ""
535
544
 
536
545
  generated_assets = {}
537
546
 
538
- chat_message = chat.get("message")
539
- role = "user" if chat["by"] == "you" else "assistant"
547
+ chat_message = chat.message
548
+ role = "user" if chat.by == "you" else "assistant"
540
549
 
541
550
  # Legacy code to handle excalidraw diagrams prior to Dec 2024
542
- if chat["by"] == "khoj" and "excalidraw" in chat["intent"].get("type", ""):
543
- chat_message = chat["intent"].get("inferred-queries")[0]
551
+ if chat.by == "khoj" and "excalidraw" in chat.intent.type or "":
552
+ chat_message = (chat.intent.inferred_queries or [])[0]
544
553
 
545
- if chat.get("queryFiles"):
546
- raw_query_files = chat.get("queryFiles")
554
+ if chat.queryFiles:
555
+ raw_query_files = chat.queryFiles
547
556
  query_files_dict = dict()
548
557
  for file in raw_query_files:
549
558
  query_files_dict[file["name"]] = file["content"]
@@ -551,24 +560,24 @@ def generate_chatml_messages_with_context(
551
560
  message_attached_files = gather_raw_query_files(query_files_dict)
552
561
  chatml_messages.append(ChatMessage(content=message_attached_files, role=role))
553
562
 
554
- if not is_none_or_empty(chat.get("onlineContext")):
563
+ if not is_none_or_empty(chat.onlineContext):
555
564
  message_context += [
556
565
  {
557
566
  "type": "text",
558
- "text": f"{prompts.online_search_conversation.format(online_results=chat.get('onlineContext'))}",
567
+ "text": f"{prompts.online_search_conversation.format(online_results=chat.onlineContext)}",
559
568
  }
560
569
  ]
561
570
 
562
- if not is_none_or_empty(chat.get("codeContext")):
571
+ if not is_none_or_empty(chat.codeContext):
563
572
  message_context += [
564
573
  {
565
574
  "type": "text",
566
- "text": f"{prompts.code_executed_context.format(code_results=chat.get('codeContext'))}",
575
+ "text": f"{prompts.code_executed_context.format(code_results=chat.codeContext)}",
567
576
  }
568
577
  ]
569
578
 
570
- if not is_none_or_empty(chat.get("operatorContext")):
571
- operator_context = chat.get("operatorContext")
579
+ if not is_none_or_empty(chat.operatorContext):
580
+ operator_context = chat.operatorContext
572
581
  operator_content = "\n\n".join([f'## Task: {oc["query"]}\n{oc["response"]}\n' for oc in operator_context])
573
582
  message_context += [
574
583
  {
@@ -577,13 +586,9 @@ def generate_chatml_messages_with_context(
577
586
  }
578
587
  ]
579
588
 
580
- if not is_none_or_empty(chat.get("context")):
589
+ if not is_none_or_empty(chat.context):
581
590
  references = "\n\n".join(
582
- {
583
- f"# File: {item['file']}\n## {item['compiled']}\n"
584
- for item in chat.get("context") or []
585
- if isinstance(item, dict)
586
- }
591
+ {f"# File: {item.file}\n## {item.compiled}\n" for item in chat.context or [] if isinstance(item, dict)}
587
592
  )
588
593
  message_context += [{"type": "text", "text": f"{prompts.notes_conversation.format(references=references)}"}]
589
594
 
@@ -591,14 +596,14 @@ def generate_chatml_messages_with_context(
591
596
  reconstructed_context_message = ChatMessage(content=message_context, role="user")
592
597
  chatml_messages.insert(0, reconstructed_context_message)
593
598
 
594
- if not is_none_or_empty(chat.get("images")) and role == "assistant":
599
+ if not is_none_or_empty(chat.images) and role == "assistant":
595
600
  generated_assets["image"] = {
596
- "query": chat.get("intent", {}).get("inferred-queries", [user_message])[0],
601
+ "query": (chat.intent.inferred_queries or [user_message])[0],
597
602
  }
598
603
 
599
- if not is_none_or_empty(chat.get("mermaidjsDiagram")) and role == "assistant":
604
+ if not is_none_or_empty(chat.mermaidjsDiagram) and role == "assistant":
600
605
  generated_assets["diagram"] = {
601
- "query": chat.get("intent", {}).get("inferred-queries", [user_message])[0],
606
+ "query": (chat.intent.inferred_queries or [user_message])[0],
602
607
  }
603
608
 
604
609
  if not is_none_or_empty(generated_assets):
@@ -610,7 +615,7 @@ def generate_chatml_messages_with_context(
610
615
  )
611
616
 
612
617
  message_content = construct_structured_message(
613
- chat_message, chat.get("images") if role == "user" else [], model_type, vision_enabled
618
+ chat_message, chat.images if role == "user" else [], model_type, vision_enabled
614
619
  )
615
620
 
616
621
  reconstructed_message = ChatMessage(content=message_content, role=role)