khoj 1.28.3__py3-none-any.whl → 1.28.4.dev92__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. khoj/configure.py +10 -14
  2. khoj/database/adapters/__init__.py +128 -44
  3. khoj/database/admin.py +6 -3
  4. khoj/database/management/commands/change_default_model.py +7 -72
  5. khoj/database/migrations/0073_delete_usersearchmodelconfig.py +15 -0
  6. khoj/database/models/__init__.py +4 -6
  7. khoj/interface/compiled/404/index.html +1 -1
  8. khoj/interface/compiled/_next/static/chunks/1603-dc5fd983dbcd070d.js +1 -0
  9. khoj/interface/compiled/_next/static/chunks/1970-c78f6acc8e16e30b.js +1 -0
  10. khoj/interface/compiled/_next/static/chunks/2261-748f7c327df3c8c1.js +1 -0
  11. khoj/interface/compiled/_next/static/chunks/3124-a4cea2eda163128d.js +1 -0
  12. khoj/interface/compiled/_next/static/chunks/3803-d74118a2d0182c52.js +1 -0
  13. khoj/interface/compiled/_next/static/chunks/5538-36aa824a75519c5b.js +1 -0
  14. khoj/interface/compiled/_next/static/chunks/5961-3c104d9736b7902b.js +3 -0
  15. khoj/interface/compiled/_next/static/chunks/8423-ebfa9bb9e2424ca3.js +1 -0
  16. khoj/interface/compiled/_next/static/chunks/9417-32c4db52ca42e681.js +1 -0
  17. khoj/interface/compiled/_next/static/chunks/app/agents/layout-e9838b642913a071.js +1 -0
  18. khoj/interface/compiled/_next/static/chunks/app/agents/page-4353b1a532795ad1.js +1 -0
  19. khoj/interface/compiled/_next/static/chunks/app/automations/{page-d3edae545a1b5393.js → page-c9f13c865e739607.js} +1 -1
  20. khoj/interface/compiled/_next/static/chunks/app/chat/layout-b0e7ff4baa3b5265.js +1 -0
  21. khoj/interface/compiled/_next/static/chunks/app/chat/page-45720e1ed71e3ef5.js +1 -0
  22. khoj/interface/compiled/_next/static/chunks/app/{layout-d0f0a9067427fb20.js → layout-86561d2fac35a91a.js} +1 -1
  23. khoj/interface/compiled/_next/static/chunks/app/{page-ea462e20376b6dce.js → page-ecb8e1c192aa8834.js} +1 -1
  24. khoj/interface/compiled/_next/static/chunks/app/search/layout-ea6b73fdaf9b24ca.js +1 -0
  25. khoj/interface/compiled/_next/static/chunks/app/search/{page-a5c277eff207959e.js → page-8e28deacb61f75aa.js} +1 -1
  26. khoj/interface/compiled/_next/static/chunks/app/settings/{layout-a8f33dfe92f997fb.js → layout-254eaaf916449a60.js} +1 -1
  27. khoj/interface/compiled/_next/static/chunks/app/settings/page-2fab613a557d3cc5.js +1 -0
  28. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-cf7445cf0326bda3.js +1 -0
  29. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-30376aa7e9cfa342.js +1 -0
  30. khoj/interface/compiled/_next/static/chunks/{main-f84cd3c1873cd842.js → main-1ea5c2e0fdef4626.js} +1 -1
  31. khoj/interface/compiled/_next/static/chunks/{webpack-8beec5b51cabb39a.js → webpack-27cf153c35b1338d.js} +1 -1
  32. khoj/interface/compiled/_next/static/css/{467a524c75e7d7c0.css → 0e9d53dcd7f11342.css} +1 -1
  33. khoj/interface/compiled/_next/static/css/{26c1c33d0423a7d8.css → 1f293605f2871853.css} +1 -1
  34. khoj/interface/compiled/_next/static/css/2d097a35da6bfe8d.css +1 -0
  35. khoj/interface/compiled/_next/static/css/80bd6301fc657983.css +1 -0
  36. khoj/interface/compiled/_next/static/css/ed437164d77aa600.css +25 -0
  37. khoj/interface/compiled/_next/static/media/5455839c73f146e7-s.p.woff2 +0 -0
  38. khoj/interface/compiled/_next/static/media/5984b96ba4822821-s.woff2 +0 -0
  39. khoj/interface/compiled/_next/static/media/684adc3dde1b03f1-s.woff2 +0 -0
  40. khoj/interface/compiled/_next/static/media/82e3b9a1bdaf0c26-s.woff2 +0 -0
  41. khoj/interface/compiled/_next/static/media/8d1ea331386a0db8-s.woff2 +0 -0
  42. khoj/interface/compiled/_next/static/media/91475f6526542a4f-s.woff2 +0 -0
  43. khoj/interface/compiled/_next/static/media/b98b13dbc1c3b59c-s.woff2 +0 -0
  44. khoj/interface/compiled/_next/static/media/c824d7a20139e39d-s.woff2 +0 -0
  45. khoj/interface/compiled/agents/index.html +1 -1
  46. khoj/interface/compiled/agents/index.txt +2 -2
  47. khoj/interface/compiled/automations/index.html +1 -1
  48. khoj/interface/compiled/automations/index.txt +2 -2
  49. khoj/interface/compiled/chat/index.html +1 -1
  50. khoj/interface/compiled/chat/index.txt +2 -2
  51. khoj/interface/compiled/index.html +1 -1
  52. khoj/interface/compiled/index.txt +3 -3
  53. khoj/interface/compiled/search/index.html +1 -1
  54. khoj/interface/compiled/search/index.txt +2 -2
  55. khoj/interface/compiled/settings/index.html +1 -1
  56. khoj/interface/compiled/settings/index.txt +3 -3
  57. khoj/interface/compiled/share/chat/index.html +1 -1
  58. khoj/interface/compiled/share/chat/index.txt +3 -3
  59. khoj/processor/content/docx/docx_to_entries.py +27 -21
  60. khoj/processor/content/github/github_to_entries.py +2 -2
  61. khoj/processor/content/images/image_to_entries.py +2 -2
  62. khoj/processor/content/markdown/markdown_to_entries.py +2 -2
  63. khoj/processor/content/notion/notion_to_entries.py +2 -2
  64. khoj/processor/content/org_mode/org_to_entries.py +2 -2
  65. khoj/processor/content/org_mode/orgnode.py +1 -1
  66. khoj/processor/content/pdf/pdf_to_entries.py +37 -29
  67. khoj/processor/content/plaintext/plaintext_to_entries.py +2 -2
  68. khoj/processor/content/text_to_entries.py +3 -4
  69. khoj/processor/conversation/anthropic/anthropic_chat.py +9 -1
  70. khoj/processor/conversation/google/gemini_chat.py +15 -2
  71. khoj/processor/conversation/google/utils.py +3 -1
  72. khoj/processor/conversation/offline/chat_model.py +4 -0
  73. khoj/processor/conversation/openai/gpt.py +6 -1
  74. khoj/processor/conversation/prompts.py +72 -13
  75. khoj/processor/conversation/utils.py +80 -13
  76. khoj/processor/image/generate.py +2 -0
  77. khoj/processor/tools/online_search.py +68 -18
  78. khoj/processor/tools/run_code.py +54 -20
  79. khoj/routers/api.py +10 -4
  80. khoj/routers/api_agents.py +8 -10
  81. khoj/routers/api_chat.py +89 -24
  82. khoj/routers/api_content.py +80 -8
  83. khoj/routers/helpers.py +176 -60
  84. khoj/routers/notion.py +1 -1
  85. khoj/routers/research.py +73 -31
  86. khoj/routers/web_client.py +0 -10
  87. khoj/search_type/text_search.py +3 -7
  88. khoj/utils/cli.py +2 -2
  89. khoj/utils/fs_syncer.py +2 -1
  90. khoj/utils/helpers.py +6 -3
  91. khoj/utils/rawconfig.py +32 -0
  92. khoj/utils/state.py +2 -1
  93. {khoj-1.28.3.dist-info → khoj-1.28.4.dev92.dist-info}/METADATA +3 -3
  94. {khoj-1.28.3.dist-info → khoj-1.28.4.dev92.dist-info}/RECORD +99 -105
  95. {khoj-1.28.3.dist-info → khoj-1.28.4.dev92.dist-info}/WHEEL +1 -1
  96. khoj/interface/compiled/_next/static/chunks/1034-da58b679fcbb79c1.js +0 -1
  97. khoj/interface/compiled/_next/static/chunks/1467-b331e469fe411347.js +0 -1
  98. khoj/interface/compiled/_next/static/chunks/1603-c1568f45947e9f2c.js +0 -1
  99. khoj/interface/compiled/_next/static/chunks/1970-d44050bf658ae5cc.js +0 -1
  100. khoj/interface/compiled/_next/static/chunks/3110-ef2cacd1b8d79ad8.js +0 -1
  101. khoj/interface/compiled/_next/static/chunks/3423-f4b7df2f6f3362f7.js +0 -1
  102. khoj/interface/compiled/_next/static/chunks/394-6bcb8c429f168f21.js +0 -3
  103. khoj/interface/compiled/_next/static/chunks/7113-f2e114d7034a0835.js +0 -1
  104. khoj/interface/compiled/_next/static/chunks/8423-da57554315eebcbe.js +0 -1
  105. khoj/interface/compiled/_next/static/chunks/8840-b8d7b9f0923c6651.js +0 -1
  106. khoj/interface/compiled/_next/static/chunks/9417-0d0fc7eb49a86abb.js +0 -1
  107. khoj/interface/compiled/_next/static/chunks/app/agents/layout-75636ab3a413fa8e.js +0 -1
  108. khoj/interface/compiled/_next/static/chunks/app/agents/page-adbf3cd470da248f.js +0 -1
  109. khoj/interface/compiled/_next/static/chunks/app/chat/layout-96fcf62857bf8f30.js +0 -1
  110. khoj/interface/compiled/_next/static/chunks/app/chat/page-222d348681b848a5.js +0 -1
  111. khoj/interface/compiled/_next/static/chunks/app/factchecker/layout-7b30c541c05fb904.js +0 -1
  112. khoj/interface/compiled/_next/static/chunks/app/factchecker/page-bded0868a08ac4ba.js +0 -1
  113. khoj/interface/compiled/_next/static/chunks/app/search/layout-3720f1362310bebb.js +0 -1
  114. khoj/interface/compiled/_next/static/chunks/app/settings/page-210bd54db4841333.js +0 -1
  115. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-2df56074e42adaa0.js +0 -1
  116. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-a21b7e8890ed1209.js +0 -1
  117. khoj/interface/compiled/_next/static/css/4cae6c0e5c72fb2d.css +0 -1
  118. khoj/interface/compiled/_next/static/css/553f9cdcc7a2bcd6.css +0 -1
  119. khoj/interface/compiled/_next/static/css/a795ee88875f4853.css +0 -25
  120. khoj/interface/compiled/_next/static/css/afd3d45cc65d55d8.css +0 -1
  121. khoj/interface/compiled/_next/static/media/0e790e04fd40ad16-s.p.woff2 +0 -0
  122. khoj/interface/compiled/_next/static/media/4221e1667cd19c7d-s.woff2 +0 -0
  123. khoj/interface/compiled/_next/static/media/6c276159aa0eb14b-s.woff2 +0 -0
  124. khoj/interface/compiled/_next/static/media/6cc0b9500e4f9168-s.woff2 +0 -0
  125. khoj/interface/compiled/_next/static/media/9d9319a7a2ac39c6-s.woff2 +0 -0
  126. khoj/interface/compiled/_next/static/media/a75c8ea86756d52d-s.woff2 +0 -0
  127. khoj/interface/compiled/_next/static/media/abce7c400ca31a51-s.woff2 +0 -0
  128. khoj/interface/compiled/_next/static/media/f759c939737fb668-s.woff2 +0 -0
  129. khoj/interface/compiled/factchecker/index.html +0 -1
  130. khoj/interface/compiled/factchecker/index.txt +0 -7
  131. /khoj/interface/compiled/_next/static/{EfnEiWDle86AUcxEdEFgO → t_2jovvUVve0Gvc3FqpT9}/_buildManifest.js +0 -0
  132. /khoj/interface/compiled/_next/static/{EfnEiWDle86AUcxEdEFgO → t_2jovvUVve0Gvc3FqpT9}/_ssgManifest.js +0 -0
  133. {khoj-1.28.3.dist-info → khoj-1.28.4.dev92.dist-info}/entry_points.txt +0 -0
  134. {khoj-1.28.3.dist-info → khoj-1.28.4.dev92.dist-info}/licenses/LICENSE +0 -0
khoj/routers/research.py CHANGED
@@ -11,6 +11,7 @@ from khoj.processor.conversation import prompts
11
11
  from khoj.processor.conversation.utils import (
12
12
  InformationCollectionIteration,
13
13
  clean_json,
14
+ construct_chat_history,
14
15
  construct_iteration_history,
15
16
  construct_tool_chat_history,
16
17
  )
@@ -19,8 +20,6 @@ from khoj.processor.tools.run_code import run_code
19
20
  from khoj.routers.api import extract_references_and_questions
20
21
  from khoj.routers.helpers import (
21
22
  ChatEvent,
22
- construct_chat_history,
23
- extract_relevant_info,
24
23
  generate_summary_from_files,
25
24
  send_message_to_model_wrapper,
26
25
  )
@@ -43,38 +42,36 @@ async def apick_next_tool(
43
42
  location: LocationData = None,
44
43
  user_name: str = None,
45
44
  agent: Agent = None,
46
- previous_iterations_history: str = None,
45
+ previous_iterations: List[InformationCollectionIteration] = [],
47
46
  max_iterations: int = 5,
48
47
  send_status_func: Optional[Callable] = None,
49
48
  tracer: dict = {},
49
+ query_files: str = None,
50
50
  ):
51
- """
52
- Given a query, determine which of the available tools the agent should use in order to answer appropriately. One at a time, and it's able to use subsequent iterations to refine the answer.
53
- """
51
+ """Given a query, determine which of the available tools the agent should use in order to answer appropriately."""
54
52
 
53
+ # Construct tool options for the agent to choose from
55
54
  tool_options = dict()
56
55
  tool_options_str = ""
57
-
58
56
  agent_tools = agent.input_tools if agent else []
59
-
60
57
  for tool, description in function_calling_description_for_llm.items():
61
58
  tool_options[tool.value] = description
62
59
  if len(agent_tools) == 0 or tool.value in agent_tools:
63
60
  tool_options_str += f'- "{tool.value}": "{description}"\n'
64
61
 
62
+ # Construct chat history with user and iteration history with researcher agent for context
65
63
  chat_history = construct_chat_history(conversation_history, agent_name=agent.name if agent else "Khoj")
64
+ previous_iterations_history = construct_iteration_history(previous_iterations, prompts.previous_iteration)
66
65
 
67
66
  if query_images:
68
67
  query = f"[placeholder for user attached images]\n{query}"
69
68
 
69
+ today = datetime.today()
70
+ location_data = f"{location}" if location else "Unknown"
70
71
  personality_context = (
71
72
  prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
72
73
  )
73
74
 
74
- # Extract Past User Message and Inferred Questions from Conversation Log
75
- today = datetime.today()
76
- location_data = f"{location}" if location else "Unknown"
77
-
78
75
  function_planning_prompt = prompts.plan_function_execution.format(
79
76
  tools=tool_options_str,
80
77
  chat_history=chat_history,
@@ -87,15 +84,25 @@ async def apick_next_tool(
87
84
  max_iterations=max_iterations,
88
85
  )
89
86
 
90
- with timer("Chat actor: Infer information sources to refer", logger):
91
- response = await send_message_to_model_wrapper(
92
- query=query,
93
- context=function_planning_prompt,
94
- response_type="json_object",
95
- user=user,
96
- query_images=query_images,
97
- tracer=tracer,
87
+ try:
88
+ with timer("Chat actor: Infer information sources to refer", logger):
89
+ response = await send_message_to_model_wrapper(
90
+ query=query,
91
+ context=function_planning_prompt,
92
+ response_type="json_object",
93
+ user=user,
94
+ query_images=query_images,
95
+ query_files=query_files,
96
+ tracer=tracer,
97
+ )
98
+ except Exception as e:
99
+ logger.error(f"Failed to infer information sources to refer: {e}", exc_info=True)
100
+ yield InformationCollectionIteration(
101
+ tool=None,
102
+ query=None,
103
+ warning="Failed to infer information sources to refer. Skipping iteration. Try again.",
98
104
  )
105
+ return
99
106
 
100
107
  try:
101
108
  response = clean_json(response)
@@ -103,8 +110,15 @@ async def apick_next_tool(
103
110
  selected_tool = response.get("tool", None)
104
111
  generated_query = response.get("query", None)
105
112
  scratchpad = response.get("scratchpad", None)
113
+ warning = None
106
114
  logger.info(f"Response for determining relevant tools: {response}")
107
- if send_status_func:
115
+
116
+ # Detect selection of previously used query, tool combination.
117
+ previous_tool_query_combinations = {(i.tool, i.query) for i in previous_iterations}
118
+ if (selected_tool, generated_query) in previous_tool_query_combinations:
119
+ warning = f"Repeated tool, query combination detected. Skipping iteration. Try something different."
120
+ # Only send client status updates if we'll execute this iteration
121
+ elif send_status_func:
108
122
  determined_tool_message = "**Determined Tool**: "
109
123
  determined_tool_message += f"{selected_tool}({generated_query})." if selected_tool else "respond."
110
124
  determined_tool_message += f"\nReason: {scratchpad}" if scratchpad else ""
@@ -114,13 +128,14 @@ async def apick_next_tool(
114
128
  yield InformationCollectionIteration(
115
129
  tool=selected_tool,
116
130
  query=generated_query,
131
+ warning=warning,
117
132
  )
118
-
119
133
  except Exception as e:
120
134
  logger.error(f"Invalid response for determining relevant tools: {response}. {e}", exc_info=True)
121
135
  yield InformationCollectionIteration(
122
136
  tool=None,
123
137
  query=None,
138
+ warning=f"Invalid response for determining relevant tools: {response}. Skipping iteration. Fix error: {e}",
124
139
  )
125
140
 
126
141
 
@@ -137,6 +152,7 @@ async def execute_information_collection(
137
152
  location: LocationData = None,
138
153
  file_filters: List[str] = [],
139
154
  tracer: dict = {},
155
+ query_files: str = None,
140
156
  ):
141
157
  current_iteration = 0
142
158
  MAX_ITERATIONS = 5
@@ -147,7 +163,6 @@ async def execute_information_collection(
147
163
  document_results: List[Dict[str, str]] = []
148
164
  summarize_files: str = ""
149
165
  this_iteration = InformationCollectionIteration(tool=None, query=query)
150
- previous_iterations_history = construct_iteration_history(previous_iterations, prompts.previous_iteration)
151
166
 
152
167
  async for result in apick_next_tool(
153
168
  query,
@@ -157,19 +172,27 @@ async def execute_information_collection(
157
172
  location,
158
173
  user_name,
159
174
  agent,
160
- previous_iterations_history,
175
+ previous_iterations,
161
176
  MAX_ITERATIONS,
162
177
  send_status_func,
163
178
  tracer=tracer,
179
+ query_files=query_files,
164
180
  ):
165
181
  if isinstance(result, dict) and ChatEvent.STATUS in result:
166
182
  yield result[ChatEvent.STATUS]
167
183
  elif isinstance(result, InformationCollectionIteration):
168
184
  this_iteration = result
169
185
 
170
- if this_iteration.tool == ConversationCommand.Notes:
186
+ # Skip running iteration if warning present in iteration
187
+ if this_iteration.warning:
188
+ logger.warning(f"Research mode: {this_iteration.warning}.")
189
+
190
+ elif this_iteration.tool == ConversationCommand.Notes:
171
191
  this_iteration.context = []
172
192
  document_results = []
193
+ previous_inferred_queries = {
194
+ c["query"] for iteration in previous_iterations if iteration.context for c in iteration.context
195
+ }
173
196
  async for result in extract_references_and_questions(
174
197
  request,
175
198
  construct_tool_chat_history(previous_iterations, ConversationCommand.Notes),
@@ -181,8 +204,10 @@ async def execute_information_collection(
181
204
  location,
182
205
  send_status_func,
183
206
  query_images,
207
+ previous_inferred_queries=previous_inferred_queries,
184
208
  agent=agent,
185
209
  tracer=tracer,
210
+ query_files=query_files,
186
211
  ):
187
212
  if isinstance(result, dict) and ChatEvent.STATUS in result:
188
213
  yield result[ChatEvent.STATUS]
@@ -204,6 +229,12 @@ async def execute_information_collection(
204
229
  logger.error(f"Error extracting document references: {e}", exc_info=True)
205
230
 
206
231
  elif this_iteration.tool == ConversationCommand.Online:
232
+ previous_subqueries = {
233
+ subquery
234
+ for iteration in previous_iterations
235
+ if iteration.onlineContext
236
+ for subquery in iteration.onlineContext.keys()
237
+ }
207
238
  async for result in search_online(
208
239
  this_iteration.query,
209
240
  construct_tool_chat_history(previous_iterations, ConversationCommand.Online),
@@ -213,11 +244,16 @@ async def execute_information_collection(
213
244
  [],
214
245
  max_webpages_to_read=0,
215
246
  query_images=query_images,
247
+ previous_subqueries=previous_subqueries,
216
248
  agent=agent,
217
249
  tracer=tracer,
218
250
  ):
219
251
  if isinstance(result, dict) and ChatEvent.STATUS in result:
220
252
  yield result[ChatEvent.STATUS]
253
+ elif is_none_or_empty(result):
254
+ this_iteration.warning = (
255
+ "Detected previously run online search queries. Skipping iteration. Try something different."
256
+ )
221
257
  else:
222
258
  online_results: Dict[str, Dict] = result # type: ignore
223
259
  this_iteration.onlineContext = online_results
@@ -233,6 +269,7 @@ async def execute_information_collection(
233
269
  query_images=query_images,
234
270
  agent=agent,
235
271
  tracer=tracer,
272
+ query_files=query_files,
236
273
  ):
237
274
  if isinstance(result, dict) and ChatEvent.STATUS in result:
238
275
  yield result[ChatEvent.STATUS]
@@ -263,6 +300,7 @@ async def execute_information_collection(
263
300
  send_status_func,
264
301
  query_images=query_images,
265
302
  agent=agent,
303
+ query_files=query_files,
266
304
  tracer=tracer,
267
305
  ):
268
306
  if isinstance(result, dict) and ChatEvent.STATUS in result:
@@ -288,6 +326,7 @@ async def execute_information_collection(
288
326
  query_images=query_images,
289
327
  agent=agent,
290
328
  send_status_func=send_status_func,
329
+ query_files=query_files,
291
330
  ):
292
331
  if isinstance(result, dict) and ChatEvent.STATUS in result:
293
332
  yield result[ChatEvent.STATUS]
@@ -302,16 +341,19 @@ async def execute_information_collection(
302
341
 
303
342
  current_iteration += 1
304
343
 
305
- if document_results or online_results or code_results or summarize_files:
306
- results_data = f"**Results**:\n"
344
+ if document_results or online_results or code_results or summarize_files or this_iteration.warning:
345
+ results_data = f"\n<iteration>{current_iteration}\n<tool>{this_iteration.tool}</tool>\n<query>{this_iteration.query}</query>\n<results>"
307
346
  if document_results:
308
- results_data += f"**Document References**:\n{yaml.dump(document_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
347
+ results_data += f"\n<document_references>\n{yaml.dump(document_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n</document_references>"
309
348
  if online_results:
310
- results_data += f"**Online Results**:\n{yaml.dump(online_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
349
+ results_data += f"\n<online_results>\n{yaml.dump(online_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n</online_results>"
311
350
  if code_results:
312
- results_data += f"**Code Results**:\n{yaml.dump(code_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
351
+ results_data += f"\n<code_results>\n{yaml.dump(code_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n</code_results>"
313
352
  if summarize_files:
314
- results_data += f"**Summarized Files**:\n{yaml.dump(summarize_files, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
353
+ results_data += f"\n<summarized_files>\n{yaml.dump(summarize_files, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n</summarized_files>"
354
+ if this_iteration.warning:
355
+ results_data += f"\n<warning>\n{this_iteration.warning}\n</warning>"
356
+ results_data += "\n</results>\n</iteration>"
315
357
 
316
358
  # intermediate_result = await extract_relevant_info(this_iteration.query, results_data, agent)
317
359
  this_iteration.summarizedResult = results_data
@@ -51,16 +51,6 @@ def chat_page(request: Request):
51
51
  )
52
52
 
53
53
 
54
- @web_client.get("/factchecker", response_class=FileResponse)
55
- def fact_checker_page(request: Request):
56
- return templates.TemplateResponse(
57
- "factchecker/index.html",
58
- context={
59
- "request": request,
60
- },
61
- )
62
-
63
-
64
54
  @web_client.get("/login", response_class=FileResponse)
65
55
  def login_page(request: Request):
66
56
  next_url = get_next_url(request)
@@ -8,11 +8,7 @@ import torch
8
8
  from asgiref.sync import sync_to_async
9
9
  from sentence_transformers import util
10
10
 
11
- from khoj.database.adapters import (
12
- EntryAdapters,
13
- get_default_search_model,
14
- get_user_default_search_model,
15
- )
11
+ from khoj.database.adapters import EntryAdapters, get_default_search_model
16
12
  from khoj.database.models import Agent
17
13
  from khoj.database.models import Entry as DbEntry
18
14
  from khoj.database.models import KhojUser
@@ -114,7 +110,7 @@ async def query(
114
110
  file_type = search_type_to_embeddings_type[type.value]
115
111
 
116
112
  query = raw_query
117
- search_model = await sync_to_async(get_user_default_search_model)(user)
113
+ search_model = await sync_to_async(get_default_search_model)()
118
114
  if not max_distance:
119
115
  if search_model.bi_encoder_confidence_threshold:
120
116
  max_distance = search_model.bi_encoder_confidence_threshold
@@ -212,7 +208,7 @@ def setup(
212
208
  text_to_entries: Type[TextToEntries],
213
209
  files: dict[str, str],
214
210
  regenerate: bool,
215
- user: KhojUser = None,
211
+ user: KhojUser,
216
212
  config=None,
217
213
  ) -> Tuple[int, int]:
218
214
  if config:
khoj/utils/cli.py CHANGED
@@ -16,7 +16,7 @@ from khoj.migrations.migrate_processor_config_openai import (
16
16
  )
17
17
  from khoj.migrations.migrate_server_pg import migrate_server_pg
18
18
  from khoj.migrations.migrate_version import migrate_config_to_version
19
- from khoj.utils.helpers import in_debug_mode, resolve_absolute_path
19
+ from khoj.utils.helpers import in_debug_mode, is_env_var_true, resolve_absolute_path
20
20
  from khoj.utils.yaml import parse_config_from_file
21
21
 
22
22
 
@@ -79,7 +79,7 @@ def cli(args=None):
79
79
  else:
80
80
  args = run_migrations(args)
81
81
  args.config = parse_config_from_file(args.config_file)
82
- if in_debug_mode():
82
+ if is_env_var_true("KHOJ_TELEMETRY_DISABLE") or in_debug_mode():
83
83
  args.config.app.should_log_telemetry = False
84
84
 
85
85
  return args
khoj/utils/fs_syncer.py CHANGED
@@ -8,6 +8,7 @@ from bs4 import BeautifulSoup
8
8
  from magika import Magika
9
9
 
10
10
  from khoj.database.models import (
11
+ KhojUser,
11
12
  LocalMarkdownConfig,
12
13
  LocalOrgConfig,
13
14
  LocalPdfConfig,
@@ -21,7 +22,7 @@ logger = logging.getLogger(__name__)
21
22
  magika = Magika()
22
23
 
23
24
 
24
- def collect_files(search_type: Optional[SearchType] = SearchType.All, user=None) -> dict:
25
+ def collect_files(user: KhojUser, search_type: Optional[SearchType] = SearchType.All) -> dict:
25
26
  files: dict[str, dict] = {"docx": {}, "image": {}}
26
27
 
27
28
  if search_type == SearchType.All or search_type == SearchType.Org:
khoj/utils/helpers.py CHANGED
@@ -254,8 +254,10 @@ def get_server_id():
254
254
  return server_id
255
255
 
256
256
 
257
- def telemetry_disabled(app_config: AppConfig):
258
- return not app_config or not app_config.should_log_telemetry
257
+ def telemetry_disabled(app_config: AppConfig, telemetry_disable_env) -> bool:
258
+ return (
259
+ not app_config.should_log_telemetry if app_config and app_config.should_log_telemetry else telemetry_disable_env
260
+ )
259
261
 
260
262
 
261
263
  def log_telemetry(
@@ -263,11 +265,12 @@ def log_telemetry(
263
265
  api: str = None,
264
266
  client: Optional[str] = None,
265
267
  app_config: Optional[AppConfig] = None,
268
+ disable_telemetry_env: bool = False,
266
269
  properties: dict = None,
267
270
  ):
268
271
  """Log basic app usage telemetry like client, os, api called"""
269
272
  # Do not log usage telemetry, if telemetry is disabled via app config
270
- if telemetry_disabled(app_config):
273
+ if telemetry_disabled(app_config, disable_telemetry_env):
271
274
  return []
272
275
 
273
276
  if properties.get("server_id") is None:
khoj/utils/rawconfig.py CHANGED
@@ -138,6 +138,38 @@ class SearchResponse(ConfigBase):
138
138
  corpus_id: str
139
139
 
140
140
 
141
+ class FileData(BaseModel):
142
+ name: str
143
+ content: bytes
144
+ file_type: str
145
+ encoding: str | None = None
146
+
147
+
148
+ class FileAttachment(BaseModel):
149
+ name: str
150
+ content: str
151
+ file_type: str
152
+ size: int
153
+
154
+
155
+ class ChatRequestBody(BaseModel):
156
+ q: str
157
+ n: Optional[int] = 7
158
+ d: Optional[float] = None
159
+ stream: Optional[bool] = False
160
+ title: Optional[str] = None
161
+ conversation_id: Optional[str] = None
162
+ turn_id: Optional[str] = None
163
+ city: Optional[str] = None
164
+ region: Optional[str] = None
165
+ country: Optional[str] = None
166
+ country_code: Optional[str] = None
167
+ timezone: Optional[str] = None
168
+ images: Optional[list[str]] = None
169
+ files: Optional[list[FileAttachment]] = []
170
+ create_new: Optional[bool] = False
171
+
172
+
141
173
  class Entry:
142
174
  raw: str
143
175
  compiled: str
khoj/utils/state.py CHANGED
@@ -12,7 +12,7 @@ from khoj.database.models import ProcessLock
12
12
  from khoj.processor.embeddings import CrossEncoderModel, EmbeddingsModel
13
13
  from khoj.utils import config as utils_config
14
14
  from khoj.utils.config import OfflineChatProcessorModel, SearchModels
15
- from khoj.utils.helpers import LRU, get_device
15
+ from khoj.utils.helpers import LRU, get_device, is_env_var_true
16
16
  from khoj.utils.rawconfig import FullConfig
17
17
 
18
18
  # Application Global State
@@ -34,6 +34,7 @@ SearchType = utils_config.SearchType
34
34
  scheduler: BackgroundScheduler = None
35
35
  schedule_leader_process_lock: ProcessLock = None
36
36
  telemetry: List[Dict[str, str]] = []
37
+ telemetry_disabled: bool = is_env_var_true("KHOJ_TELEMETRY_DISABLE")
37
38
  khoj_version: str = None
38
39
  device = get_device()
39
40
  chat_on_gpu: bool = True
@@ -1,13 +1,11 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: khoj
3
- Version: 1.28.3
3
+ Version: 1.28.4.dev92
4
4
  Summary: Your Second Brain
5
5
  Project-URL: Homepage, https://khoj.dev
6
6
  Project-URL: Documentation, https://docs.khoj.dev
7
7
  Project-URL: Code, https://github.com/khoj-ai/khoj
8
8
  Author: Debanjum Singh Solanky, Saba Imran
9
- License-Expression: AGPL-3.0-or-later
10
- License-File: LICENSE
11
9
  Keywords: AI,NLP,images,markdown,org-mode,pdf,productivity,search,semantic-search
12
10
  Classifier: Development Status :: 5 - Production/Stable
13
11
  Classifier: Intended Audience :: Information Technology
@@ -76,12 +74,14 @@ Requires-Dist: websockets==12.0
76
74
  Provides-Extra: dev
77
75
  Requires-Dist: black>=23.1.0; extra == 'dev'
78
76
  Requires-Dist: boto3>=1.34.57; extra == 'dev'
77
+ Requires-Dist: datasets; extra == 'dev'
79
78
  Requires-Dist: factory-boy>=3.2.1; extra == 'dev'
80
79
  Requires-Dist: freezegun>=1.2.0; extra == 'dev'
81
80
  Requires-Dist: gitpython~=3.1.43; extra == 'dev'
82
81
  Requires-Dist: google-auth==2.23.3; extra == 'dev'
83
82
  Requires-Dist: gunicorn==22.0.0; extra == 'dev'
84
83
  Requires-Dist: mypy>=1.0.1; extra == 'dev'
84
+ Requires-Dist: pandas; extra == 'dev'
85
85
  Requires-Dist: pre-commit>=3.0.4; extra == 'dev'
86
86
  Requires-Dist: pytest-asyncio==0.21.1; extra == 'dev'
87
87
  Requires-Dist: pytest-django==4.5.2; extra == 'dev'