khoj 1.17.0__py3-none-any.whl → 1.17.1.dev217__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. khoj/configure.py +6 -6
  2. khoj/database/adapters/__init__.py +47 -2
  3. khoj/database/migrations/0053_agent_style_color_agent_style_icon.py +61 -0
  4. khoj/database/models/__init__.py +35 -0
  5. khoj/interface/web/assets/icons/favicon-128x128.png +0 -0
  6. khoj/interface/web/assets/icons/favicon-256x256.png +0 -0
  7. khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
  8. khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
  9. khoj/interface/web/assets/icons/khoj-logo-sideways.svg +31 -5384
  10. khoj/interface/web/assets/icons/khoj.svg +26 -0
  11. khoj/interface/web/chat.html +5 -5
  12. khoj/interface/web/content_source_computer_input.html +3 -3
  13. khoj/interface/web/content_source_github_input.html +1 -1
  14. khoj/interface/web/content_source_notion_input.html +1 -1
  15. khoj/interface/web/public_conversation.html +1 -1
  16. khoj/interface/web/search.html +2 -2
  17. khoj/interface/web/{config.html → settings.html} +30 -30
  18. khoj/interface/web/utils.html +1 -1
  19. khoj/processor/content/docx/docx_to_entries.py +4 -9
  20. khoj/processor/content/github/github_to_entries.py +1 -3
  21. khoj/processor/content/images/image_to_entries.py +4 -9
  22. khoj/processor/content/markdown/markdown_to_entries.py +4 -9
  23. khoj/processor/content/notion/notion_to_entries.py +1 -3
  24. khoj/processor/content/org_mode/org_to_entries.py +4 -9
  25. khoj/processor/content/pdf/pdf_to_entries.py +4 -9
  26. khoj/processor/content/plaintext/plaintext_to_entries.py +4 -9
  27. khoj/processor/content/text_to_entries.py +1 -3
  28. khoj/processor/tools/online_search.py +4 -4
  29. khoj/routers/api.py +49 -4
  30. khoj/routers/api_agents.py +3 -1
  31. khoj/routers/api_chat.py +80 -88
  32. khoj/routers/api_content.py +538 -0
  33. khoj/routers/api_model.py +156 -0
  34. khoj/routers/helpers.py +308 -7
  35. khoj/routers/notion.py +2 -8
  36. khoj/routers/web_client.py +43 -256
  37. khoj/search_type/text_search.py +5 -4
  38. khoj/utils/fs_syncer.py +3 -1
  39. khoj/utils/rawconfig.py +6 -1
  40. {khoj-1.17.0.dist-info → khoj-1.17.1.dev217.dist-info}/METADATA +2 -2
  41. {khoj-1.17.0.dist-info → khoj-1.17.1.dev217.dist-info}/RECORD +44 -42
  42. khoj/routers/api_config.py +0 -434
  43. khoj/routers/indexer.py +0 -349
  44. {khoj-1.17.0.dist-info → khoj-1.17.1.dev217.dist-info}/WHEEL +0 -0
  45. {khoj-1.17.0.dist-info → khoj-1.17.1.dev217.dist-info}/entry_points.txt +0 -0
  46. {khoj-1.17.0.dist-info → khoj-1.17.1.dev217.dist-info}/licenses/LICENSE +0 -0
khoj/routers/api.py CHANGED
@@ -19,6 +19,7 @@ from fastapi.responses import Response
19
19
  from starlette.authentication import has_required_scope, requires
20
20
 
21
21
  from khoj.configure import initialize_content
22
+ from khoj.database import adapters
22
23
  from khoj.database.adapters import (
23
24
  AutomationAdapters,
24
25
  ConversationAdapters,
@@ -40,6 +41,7 @@ from khoj.routers.helpers import (
40
41
  CommonQueryParams,
41
42
  ConversationCommandRateLimiter,
42
43
  acreate_title_from_query,
44
+ get_user_config,
43
45
  schedule_automation,
44
46
  update_telemetry_state,
45
47
  )
@@ -190,7 +192,7 @@ def update(
190
192
  ):
191
193
  user = request.user.object
192
194
  if not state.config:
193
- error_msg = f"🚨 Khoj is not configured.\nConfigure it via http://localhost:42110/config, plugins or by editing {state.config_file}."
195
+ error_msg = f"🚨 Khoj is not configured.\nConfigure it via http://localhost:42110/settings, plugins or by editing {state.config_file}."
194
196
  logger.warning(error_msg)
195
197
  raise HTTPException(status_code=500, detail=error_msg)
196
198
  try:
@@ -223,10 +225,10 @@ async def transcribe(
223
225
  common: CommonQueryParams,
224
226
  file: UploadFile = File(...),
225
227
  rate_limiter_per_minute=Depends(
226
- ApiUserRateLimiter(requests=1, subscribed_requests=10, window=60, slug="transcribe_minute")
228
+ ApiUserRateLimiter(requests=20, subscribed_requests=20, window=60, slug="transcribe_minute")
227
229
  ),
228
230
  rate_limiter_per_day=Depends(
229
- ApiUserRateLimiter(requests=10, subscribed_requests=600, window=60 * 60 * 24, slug="transcribe_day")
231
+ ApiUserRateLimiter(requests=60, subscribed_requests=600, window=60 * 60 * 24, slug="transcribe_day")
230
232
  ),
231
233
  ):
232
234
  user: KhojUser = request.user.object
@@ -277,6 +279,49 @@ async def transcribe(
277
279
  return Response(content=content, media_type="application/json", status_code=200)
278
280
 
279
281
 
282
+ @api.get("/settings", response_class=Response)
283
+ @requires(["authenticated"])
284
+ def get_settings(request: Request, detailed: Optional[bool] = False) -> Response:
285
+ user = request.user.object
286
+ user_config = get_user_config(user, request, is_detailed=detailed)
287
+ del user_config["request"]
288
+
289
+ # Return config data as a JSON response
290
+ return Response(content=json.dumps(user_config), media_type="application/json", status_code=200)
291
+
292
+
293
+ @api.patch("/user/name", status_code=200)
294
+ @requires(["authenticated"])
295
+ def set_user_name(
296
+ request: Request,
297
+ name: str,
298
+ client: Optional[str] = None,
299
+ ):
300
+ user = request.user.object
301
+
302
+ split_name = name.split(" ")
303
+
304
+ if len(split_name) > 2:
305
+ raise HTTPException(status_code=400, detail="Name must be in the format: Firstname Lastname")
306
+
307
+ if len(split_name) == 1:
308
+ first_name = split_name[0]
309
+ last_name = ""
310
+ else:
311
+ first_name, last_name = split_name[0], split_name[-1]
312
+
313
+ adapters.set_user_name(user, first_name, last_name)
314
+
315
+ update_telemetry_state(
316
+ request=request,
317
+ telemetry_type="api",
318
+ api="set_user_name",
319
+ client=client,
320
+ )
321
+
322
+ return {"status": "ok"}
323
+
324
+
280
325
  async def extract_references_and_questions(
281
326
  request: Request,
282
327
  meta_log: dict,
@@ -376,7 +421,7 @@ async def extract_references_and_questions(
376
421
  logger.info(f"🔍 Searching knowledge base with queries: {inferred_queries}")
377
422
  if send_status_func:
378
423
  inferred_queries_str = "\n- " + "\n- ".join(inferred_queries)
379
- async for event in send_status_func(f"**🔍 Searching Documents for:** {inferred_queries_str}"):
424
+ async for event in send_status_func(f"**Searching Documents for:** {inferred_queries_str}"):
380
425
  yield {ChatEvent.STATUS: event}
381
426
  for query in inferred_queries:
382
427
  n_items = min(n, 3) if using_offline_chat else n
@@ -30,10 +30,12 @@ async def all_agents(
30
30
  "slug": agent.slug,
31
31
  "avatar": agent.avatar,
32
32
  "name": agent.name,
33
- "personality": agent.personality,
33
+ "persona": agent.personality,
34
34
  "public": agent.public,
35
35
  "creator": agent.creator.username if agent.creator else None,
36
36
  "managed_by_admin": agent.managed_by_admin,
37
+ "color": agent.style_color,
38
+ "icon": agent.style_icon,
37
39
  }
38
40
  )
39
41
 
khoj/routers/api_chat.py CHANGED
@@ -16,7 +16,6 @@ from starlette.authentication import requires
16
16
  from khoj.app.settings import ALLOWED_HOSTS
17
17
  from khoj.database.adapters import (
18
18
  ConversationAdapters,
19
- DataStoreAdapters,
20
19
  EntryAdapters,
21
20
  FileObjectAdapters,
22
21
  PublicConversationAdapters,
@@ -54,7 +53,7 @@ from khoj.utils.helpers import (
54
53
  get_device,
55
54
  is_none_or_empty,
56
55
  )
57
- from khoj.utils.rawconfig import FilterRequest, LocationData
56
+ from khoj.utils.rawconfig import FileFilterRequest, FilesFilterRequest, LocationData
58
57
 
59
58
  # Initialize Router
60
59
  logger = logging.getLogger(__name__)
@@ -88,68 +87,36 @@ def get_file_filter(request: Request, conversation_id: str) -> Response:
88
87
  return Response(content=json.dumps(file_filters), media_type="application/json", status_code=200)
89
88
 
90
89
 
91
- class FactCheckerStoreDataFormat(BaseModel):
92
- factToVerify: str
93
- response: str
94
- references: Any
95
- childReferences: List[Any]
96
- runId: str
97
- modelUsed: Dict[str, Any]
98
-
99
-
100
- class FactCheckerStoreData(BaseModel):
101
- runId: str
102
- storeData: FactCheckerStoreDataFormat
103
-
104
-
105
- @api_chat.post("/store/factchecker", response_class=Response)
90
+ @api_chat.delete("/conversation/file-filters/bulk", response_class=Response)
106
91
  @requires(["authenticated"])
107
- async def store_factchecker(request: Request, common: CommonQueryParams, data: FactCheckerStoreData):
108
- user = request.user.object
109
-
110
- update_telemetry_state(
111
- request=request,
112
- telemetry_type="api",
113
- api="store_factchecker",
114
- **common.__dict__,
115
- )
116
- fact_checker_key = f"factchecker_{data.runId}"
117
- await DataStoreAdapters.astore_data(data.storeData.model_dump_json(), fact_checker_key, user, private=False)
118
- return Response(content=json.dumps({"status": "ok"}), media_type="application/json", status_code=200)
119
-
120
-
121
- @api_chat.get("/store/factchecker", response_class=Response)
122
- async def get_factchecker(request: Request, common: CommonQueryParams, runId: str):
123
- update_telemetry_state(
124
- request=request,
125
- telemetry_type="api",
126
- api="read_factchecker",
127
- **common.__dict__,
128
- )
92
+ def remove_files_filter(request: Request, filter: FilesFilterRequest) -> Response:
93
+ conversation_id = int(filter.conversation_id)
94
+ files_filter = filter.filenames
95
+ file_filters = ConversationAdapters.remove_files_from_filter(request.user.object, conversation_id, files_filter)
96
+ return Response(content=json.dumps(file_filters), media_type="application/json", status_code=200)
129
97
 
130
- fact_checker_key = f"factchecker_{runId}"
131
98
 
132
- data = await DataStoreAdapters.aretrieve_public_data(fact_checker_key)
133
- if data is None:
134
- return Response(status_code=404)
135
- return Response(content=json.dumps(data.value), media_type="application/json", status_code=200)
99
+ @api_chat.post("/conversation/file-filters/bulk", response_class=Response)
100
+ @requires(["authenticated"])
101
+ def add_files_filter(request: Request, filter: FilesFilterRequest):
102
+ try:
103
+ conversation_id = int(filter.conversation_id)
104
+ files_filter = filter.filenames
105
+ file_filters = ConversationAdapters.add_files_to_filter(request.user.object, conversation_id, files_filter)
106
+ return Response(content=json.dumps(file_filters), media_type="application/json", status_code=200)
107
+ except Exception as e:
108
+ logger.error(f"Error adding file filter {filter.filename}: {e}", exc_info=True)
109
+ raise HTTPException(status_code=422, detail=str(e))
136
110
 
137
111
 
138
112
  @api_chat.post("/conversation/file-filters", response_class=Response)
139
113
  @requires(["authenticated"])
140
- def add_file_filter(request: Request, filter: FilterRequest):
114
+ def add_file_filter(request: Request, filter: FileFilterRequest):
141
115
  try:
142
- conversation = ConversationAdapters.get_conversation_by_user(
143
- request.user.object, conversation_id=int(filter.conversation_id)
144
- )
145
- file_list = EntryAdapters.get_all_filenames_by_source(request.user.object, "computer")
146
- if filter.filename in file_list and filter.filename not in conversation.file_filters:
147
- conversation.file_filters.append(filter.filename)
148
- conversation.save()
149
- # remove files from conversation.file_filters that are not in file_list
150
- conversation.file_filters = [file for file in conversation.file_filters if file in file_list]
151
- conversation.save()
152
- return Response(content=json.dumps(conversation.file_filters), media_type="application/json", status_code=200)
116
+ conversation_id = int(filter.conversation_id)
117
+ files_filter = [filter.filename]
118
+ file_filters = ConversationAdapters.add_files_to_filter(request.user.object, conversation_id, files_filter)
119
+ return Response(content=json.dumps(file_filters), media_type="application/json", status_code=200)
153
120
  except Exception as e:
154
121
  logger.error(f"Error adding file filter {filter.filename}: {e}", exc_info=True)
155
122
  raise HTTPException(status_code=422, detail=str(e))
@@ -157,18 +124,11 @@ def add_file_filter(request: Request, filter: FilterRequest):
157
124
 
158
125
  @api_chat.delete("/conversation/file-filters", response_class=Response)
159
126
  @requires(["authenticated"])
160
- def remove_file_filter(request: Request, filter: FilterRequest) -> Response:
161
- conversation = ConversationAdapters.get_conversation_by_user(
162
- request.user.object, conversation_id=int(filter.conversation_id)
163
- )
164
- if filter.filename in conversation.file_filters:
165
- conversation.file_filters.remove(filter.filename)
166
- conversation.save()
167
- # remove files from conversation.file_filters that are not in file_list
168
- file_list = EntryAdapters.get_all_filenames_by_source(request.user.object, "computer")
169
- conversation.file_filters = [file for file in conversation.file_filters if file in file_list]
170
- conversation.save()
171
- return Response(content=json.dumps(conversation.file_filters), media_type="application/json", status_code=200)
127
+ def remove_file_filter(request: Request, filter: FileFilterRequest) -> Response:
128
+ conversation_id = int(filter.conversation_id)
129
+ files_filter = [filter.filename]
130
+ file_filters = ConversationAdapters.remove_files_from_filter(request.user.object, conversation_id, files_filter)
131
+ return Response(content=json.dumps(file_filters), media_type="application/json", status_code=200)
172
132
 
173
133
 
174
134
  class FeedbackData(BaseModel):
@@ -191,10 +151,10 @@ async def text_to_speech(
191
151
  common: CommonQueryParams,
192
152
  text: str,
193
153
  rate_limiter_per_minute=Depends(
194
- ApiUserRateLimiter(requests=5, subscribed_requests=20, window=60, slug="chat_minute")
154
+ ApiUserRateLimiter(requests=20, subscribed_requests=20, window=60, slug="chat_minute")
195
155
  ),
196
156
  rate_limiter_per_day=Depends(
197
- ApiUserRateLimiter(requests=5, subscribed_requests=300, window=60 * 60 * 24, slug="chat_day")
157
+ ApiUserRateLimiter(requests=50, subscribed_requests=300, window=60 * 60 * 24, slug="chat_day")
198
158
  ),
199
159
  ) -> Response:
200
160
  voice_model = await ConversationAdapters.aget_voice_model_config(request.user.object)
@@ -248,6 +208,9 @@ def chat_history(
248
208
  "name": conversation.agent.name,
249
209
  "avatar": conversation.agent.avatar,
250
210
  "isCreator": conversation.agent.creator == user,
211
+ "color": conversation.agent.style_color,
212
+ "icon": conversation.agent.style_icon,
213
+ "persona": conversation.agent.personality,
251
214
  }
252
215
 
253
216
  meta_log = conversation.conversation_log
@@ -302,13 +265,21 @@ def get_shared_chat(
302
265
  "name": conversation.agent.name,
303
266
  "avatar": conversation.agent.avatar,
304
267
  "isCreator": conversation.agent.creator == user,
268
+ "color": conversation.agent.style_color,
269
+ "icon": conversation.agent.style_icon,
270
+ "persona": conversation.agent.personality,
305
271
  }
306
272
 
307
273
  meta_log = conversation.conversation_log
274
+ scrubbed_title = conversation.title if conversation.title else conversation.slug
275
+
276
+ if scrubbed_title:
277
+ scrubbed_title = scrubbed_title.replace("-", " ")
278
+
308
279
  meta_log.update(
309
280
  {
310
281
  "conversation_id": conversation.id,
311
- "slug": conversation.title if conversation.title else conversation.slug,
282
+ "slug": scrubbed_title,
312
283
  "agent": agent_metadata,
313
284
  }
314
285
  )
@@ -324,7 +295,7 @@ def get_shared_chat(
324
295
  update_telemetry_state(
325
296
  request=request,
326
297
  telemetry_type="api",
327
- api="public_conversation_history",
298
+ api="chat_history",
328
299
  **common.__dict__,
329
300
  )
330
301
 
@@ -366,7 +337,7 @@ def fork_public_conversation(
366
337
  public_conversation = PublicConversationAdapters.get_public_conversation_by_slug(public_conversation_slug)
367
338
 
368
339
  # Duplicate Public Conversation to User's Private Conversation
369
- ConversationAdapters.create_conversation_from_public_conversation(
340
+ new_conversation = ConversationAdapters.create_conversation_from_public_conversation(
370
341
  user, public_conversation, request.user.client_app
371
342
  )
372
343
 
@@ -382,7 +353,16 @@ def fork_public_conversation(
382
353
 
383
354
  redirect_uri = str(request.app.url_path_for("chat_page"))
384
355
 
385
- return Response(status_code=200, content=json.dumps({"status": "ok", "next_url": redirect_uri}))
356
+ return Response(
357
+ status_code=200,
358
+ content=json.dumps(
359
+ {
360
+ "status": "ok",
361
+ "next_url": redirect_uri,
362
+ "conversation_id": new_conversation.id,
363
+ }
364
+ ),
365
+ )
386
366
 
387
367
 
388
368
  @api_chat.post("/share")
@@ -423,15 +403,30 @@ def duplicate_chat_history_public_conversation(
423
403
  def chat_sessions(
424
404
  request: Request,
425
405
  common: CommonQueryParams,
406
+ recent: Optional[bool] = False,
426
407
  ):
427
408
  user = request.user.object
428
409
 
429
410
  # Load Conversation Sessions
430
- sessions = ConversationAdapters.get_conversation_sessions(user, request.user.client_app).values_list(
431
- "id", "slug", "title"
411
+ conversations = ConversationAdapters.get_conversation_sessions(user, request.user.client_app)
412
+ if recent:
413
+ conversations = conversations[:8]
414
+
415
+ sessions = conversations.values_list(
416
+ "id", "slug", "title", "agent__slug", "agent__name", "agent__avatar", "created_at", "updated_at"
432
417
  )
433
418
 
434
- session_values = [{"conversation_id": session[0], "slug": session[2] or session[1]} for session in sessions]
419
+ session_values = [
420
+ {
421
+ "conversation_id": session[0],
422
+ "slug": session[2] or session[1],
423
+ "agent_name": session[4],
424
+ "agent_avatar": session[5],
425
+ "created": session[6].strftime("%Y-%m-%d %H:%M:%S"),
426
+ "updated": session[7].strftime("%Y-%m-%d %H:%M:%S"),
427
+ }
428
+ for session in sessions
429
+ ]
435
430
 
436
431
  update_telemetry_state(
437
432
  request=request,
@@ -473,7 +468,6 @@ async def create_chat_session(
473
468
 
474
469
 
475
470
  @api_chat.get("/options", response_class=Response)
476
- @requires(["authenticated"])
477
471
  async def chat_options(
478
472
  request: Request,
479
473
  common: CommonQueryParams,
@@ -610,6 +604,8 @@ async def chat(
610
604
  metadata=chat_metadata,
611
605
  )
612
606
 
607
+ conversation_commands = [get_conversation_command(query=q, any_references=True)]
608
+
613
609
  conversation = await ConversationAdapters.aget_conversation_by_user(
614
610
  user, client_application=request.user.client_app, conversation_id=conversation_id, title=title
615
611
  )
@@ -631,10 +627,6 @@ async def chat(
631
627
  return
632
628
 
633
629
  user_message_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
634
- conversation_commands = [get_conversation_command(query=q, any_references=True)]
635
-
636
- async for result in send_event(ChatEvent.STATUS, f"**👀 Understanding Query**: {q}"):
637
- yield result
638
630
 
639
631
  meta_log = conversation.conversation_log
640
632
  is_automated_task = conversation_commands == [ConversationCommand.AutomatedTask]
@@ -643,12 +635,12 @@ async def chat(
643
635
  conversation_commands = await aget_relevant_information_sources(q, meta_log, is_automated_task)
644
636
  conversation_commands_str = ", ".join([cmd.value for cmd in conversation_commands])
645
637
  async for result in send_event(
646
- ChatEvent.STATUS, f"**🗃️ Chose Data Sources to Search:** {conversation_commands_str}"
638
+ ChatEvent.STATUS, f"**Chose Data Sources to Search:** {conversation_commands_str}"
647
639
  ):
648
640
  yield result
649
641
 
650
642
  mode = await aget_relevant_output_modes(q, meta_log, is_automated_task)
651
- async for result in send_event(ChatEvent.STATUS, f"**🧑🏾‍💻 Decided Response Mode:** {mode.value}"):
643
+ async for result in send_event(ChatEvent.STATUS, f"**Decided Response Mode:** {mode.value}"):
652
644
  yield result
653
645
  if mode not in conversation_commands:
654
646
  conversation_commands.append(mode)
@@ -691,7 +683,7 @@ async def chat(
691
683
  if not q:
692
684
  q = "Create a general summary of the file"
693
685
  async for result in send_event(
694
- ChatEvent.STATUS, f"**🧑🏾‍💻 Constructing Summary Using:** {file_object[0].file_name}"
686
+ ChatEvent.STATUS, f"**Constructing Summary Using:** {file_object[0].file_name}"
695
687
  ):
696
688
  yield result
697
689
 
@@ -783,7 +775,7 @@ async def chat(
783
775
 
784
776
  if not is_none_or_empty(compiled_references):
785
777
  headings = "\n- " + "\n- ".join(set([c.get("compiled", c).split("\n")[0] for c in compiled_references]))
786
- async for result in send_event(ChatEvent.STATUS, f"**📜 Found Relevant Notes**: {headings}"):
778
+ async for result in send_event(ChatEvent.STATUS, f"**Found Relevant Notes**: {headings}"):
787
779
  yield result
788
780
 
789
781
  online_results: Dict = dict()
@@ -832,7 +824,7 @@ async def chat(
832
824
 
833
825
  for webpage in direct_web_pages[query]["webpages"]:
834
826
  webpages.append(webpage["link"])
835
- async for result in send_event(ChatEvent.STATUS, f"**📚 Read web pages**: {webpages}"):
827
+ async for result in send_event(ChatEvent.STATUS, f"**Read web pages**: {webpages}"):
836
828
  yield result
837
829
  except ValueError as e:
838
830
  logger.warning(
@@ -902,7 +894,7 @@ async def chat(
902
894
  return
903
895
 
904
896
  ## Generate Text Output
905
- async for result in send_event(ChatEvent.STATUS, f"**💭 Generating a well-informed response**"):
897
+ async for result in send_event(ChatEvent.STATUS, f"**Generating a well-informed response**"):
906
898
  yield result
907
899
  llm_response, chat_metadata = await agenerate_chat_response(
908
900
  defiltered_query,