khoj 1.16.1.dev15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. khoj/__init__.py +0 -0
  2. khoj/app/README.md +94 -0
  3. khoj/app/__init__.py +0 -0
  4. khoj/app/asgi.py +16 -0
  5. khoj/app/settings.py +192 -0
  6. khoj/app/urls.py +25 -0
  7. khoj/configure.py +424 -0
  8. khoj/database/__init__.py +0 -0
  9. khoj/database/adapters/__init__.py +1234 -0
  10. khoj/database/admin.py +290 -0
  11. khoj/database/apps.py +6 -0
  12. khoj/database/management/__init__.py +0 -0
  13. khoj/database/management/commands/__init__.py +0 -0
  14. khoj/database/management/commands/change_generated_images_url.py +61 -0
  15. khoj/database/management/commands/convert_images_png_to_webp.py +99 -0
  16. khoj/database/migrations/0001_khojuser.py +98 -0
  17. khoj/database/migrations/0002_googleuser.py +32 -0
  18. khoj/database/migrations/0003_vector_extension.py +10 -0
  19. khoj/database/migrations/0004_content_types_and_more.py +181 -0
  20. khoj/database/migrations/0005_embeddings_corpus_id.py +19 -0
  21. khoj/database/migrations/0006_embeddingsdates.py +33 -0
  22. khoj/database/migrations/0007_add_conversation.py +27 -0
  23. khoj/database/migrations/0008_alter_conversation_conversation_log.py +17 -0
  24. khoj/database/migrations/0009_khojapiuser.py +24 -0
  25. khoj/database/migrations/0010_chatmodeloptions_and_more.py +83 -0
  26. khoj/database/migrations/0010_rename_embeddings_entry_and_more.py +30 -0
  27. khoj/database/migrations/0011_merge_20231102_0138.py +14 -0
  28. khoj/database/migrations/0012_entry_file_source.py +21 -0
  29. khoj/database/migrations/0013_subscription.py +37 -0
  30. khoj/database/migrations/0014_alter_googleuser_picture.py +17 -0
  31. khoj/database/migrations/0015_alter_subscription_user.py +21 -0
  32. khoj/database/migrations/0016_alter_subscription_renewal_date.py +17 -0
  33. khoj/database/migrations/0017_searchmodel.py +32 -0
  34. khoj/database/migrations/0018_searchmodelconfig_delete_searchmodel.py +30 -0
  35. khoj/database/migrations/0019_alter_googleuser_family_name_and_more.py +27 -0
  36. khoj/database/migrations/0020_reflectivequestion.py +36 -0
  37. khoj/database/migrations/0021_speechtotextmodeloptions_and_more.py +42 -0
  38. khoj/database/migrations/0022_texttoimagemodelconfig.py +25 -0
  39. khoj/database/migrations/0023_usersearchmodelconfig.py +33 -0
  40. khoj/database/migrations/0024_alter_entry_embeddings.py +18 -0
  41. khoj/database/migrations/0025_clientapplication_khojuser_phone_number_and_more.py +46 -0
  42. khoj/database/migrations/0025_searchmodelconfig_embeddings_inference_endpoint_and_more.py +22 -0
  43. khoj/database/migrations/0026_searchmodelconfig_cross_encoder_inference_endpoint_and_more.py +22 -0
  44. khoj/database/migrations/0027_merge_20240118_1324.py +13 -0
  45. khoj/database/migrations/0028_khojuser_verified_phone_number.py +17 -0
  46. khoj/database/migrations/0029_userrequests.py +27 -0
  47. khoj/database/migrations/0030_conversation_slug_and_title.py +38 -0
  48. khoj/database/migrations/0031_agent_conversation_agent.py +53 -0
  49. khoj/database/migrations/0031_alter_googleuser_locale.py +30 -0
  50. khoj/database/migrations/0032_merge_20240322_0427.py +14 -0
  51. khoj/database/migrations/0033_rename_tuning_agent_personality.py +17 -0
  52. khoj/database/migrations/0034_alter_chatmodeloptions_chat_model.py +32 -0
  53. khoj/database/migrations/0035_processlock.py +26 -0
  54. khoj/database/migrations/0036_alter_processlock_name.py +19 -0
  55. khoj/database/migrations/0036_delete_offlinechatprocessorconversationconfig.py +15 -0
  56. khoj/database/migrations/0036_publicconversation.py +42 -0
  57. khoj/database/migrations/0037_chatmodeloptions_openai_config_and_more.py +51 -0
  58. khoj/database/migrations/0037_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +32 -0
  59. khoj/database/migrations/0038_merge_20240425_0857.py +14 -0
  60. khoj/database/migrations/0038_merge_20240426_1640.py +12 -0
  61. khoj/database/migrations/0039_merge_20240501_0301.py +12 -0
  62. khoj/database/migrations/0040_alter_processlock_name.py +26 -0
  63. khoj/database/migrations/0040_merge_20240504_1010.py +14 -0
  64. khoj/database/migrations/0041_merge_20240505_1234.py +14 -0
  65. khoj/database/migrations/0042_serverchatsettings.py +46 -0
  66. khoj/database/migrations/0043_alter_chatmodeloptions_model_type.py +21 -0
  67. khoj/database/migrations/0044_conversation_file_filters.py +17 -0
  68. khoj/database/migrations/0045_fileobject.py +37 -0
  69. khoj/database/migrations/0046_khojuser_email_verification_code_and_more.py +22 -0
  70. khoj/database/migrations/0047_alter_entry_file_type.py +31 -0
  71. khoj/database/migrations/0048_voicemodeloption_uservoicemodelconfig.py +52 -0
  72. khoj/database/migrations/0049_datastore.py +38 -0
  73. khoj/database/migrations/0049_texttoimagemodelconfig_api_key_and_more.py +58 -0
  74. khoj/database/migrations/0050_alter_processlock_name.py +25 -0
  75. khoj/database/migrations/0051_merge_20240702_1220.py +14 -0
  76. khoj/database/migrations/0052_alter_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +27 -0
  77. khoj/database/migrations/__init__.py +0 -0
  78. khoj/database/models/__init__.py +402 -0
  79. khoj/database/tests.py +3 -0
  80. khoj/interface/email/feedback.html +34 -0
  81. khoj/interface/email/magic_link.html +17 -0
  82. khoj/interface/email/task.html +40 -0
  83. khoj/interface/email/welcome.html +61 -0
  84. khoj/interface/web/404.html +56 -0
  85. khoj/interface/web/agent.html +312 -0
  86. khoj/interface/web/agents.html +276 -0
  87. khoj/interface/web/assets/icons/agents.svg +6 -0
  88. khoj/interface/web/assets/icons/automation.svg +37 -0
  89. khoj/interface/web/assets/icons/cancel.svg +3 -0
  90. khoj/interface/web/assets/icons/chat.svg +24 -0
  91. khoj/interface/web/assets/icons/collapse.svg +17 -0
  92. khoj/interface/web/assets/icons/computer.png +0 -0
  93. khoj/interface/web/assets/icons/confirm-icon.svg +1 -0
  94. khoj/interface/web/assets/icons/copy-button-success.svg +6 -0
  95. khoj/interface/web/assets/icons/copy-button.svg +5 -0
  96. khoj/interface/web/assets/icons/credit-card.png +0 -0
  97. khoj/interface/web/assets/icons/delete.svg +26 -0
  98. khoj/interface/web/assets/icons/docx.svg +7 -0
  99. khoj/interface/web/assets/icons/edit.svg +4 -0
  100. khoj/interface/web/assets/icons/favicon-128x128.ico +0 -0
  101. khoj/interface/web/assets/icons/favicon-128x128.png +0 -0
  102. khoj/interface/web/assets/icons/favicon-256x256.png +0 -0
  103. khoj/interface/web/assets/icons/favicon.icns +0 -0
  104. khoj/interface/web/assets/icons/github.svg +1 -0
  105. khoj/interface/web/assets/icons/key.svg +4 -0
  106. khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
  107. khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
  108. khoj/interface/web/assets/icons/khoj-logo-sideways.svg +5385 -0
  109. khoj/interface/web/assets/icons/logotype.svg +1 -0
  110. khoj/interface/web/assets/icons/markdown.svg +1 -0
  111. khoj/interface/web/assets/icons/new.svg +23 -0
  112. khoj/interface/web/assets/icons/notion.svg +4 -0
  113. khoj/interface/web/assets/icons/openai-logomark.svg +1 -0
  114. khoj/interface/web/assets/icons/org.svg +1 -0
  115. khoj/interface/web/assets/icons/pdf.svg +23 -0
  116. khoj/interface/web/assets/icons/pencil-edit.svg +5 -0
  117. khoj/interface/web/assets/icons/plaintext.svg +1 -0
  118. khoj/interface/web/assets/icons/question-mark-icon.svg +1 -0
  119. khoj/interface/web/assets/icons/search.svg +25 -0
  120. khoj/interface/web/assets/icons/send.svg +1 -0
  121. khoj/interface/web/assets/icons/share.svg +8 -0
  122. khoj/interface/web/assets/icons/speaker.svg +4 -0
  123. khoj/interface/web/assets/icons/stop-solid.svg +37 -0
  124. khoj/interface/web/assets/icons/sync.svg +4 -0
  125. khoj/interface/web/assets/icons/thumbs-down-svgrepo-com.svg +6 -0
  126. khoj/interface/web/assets/icons/thumbs-up-svgrepo-com.svg +6 -0
  127. khoj/interface/web/assets/icons/user-silhouette.svg +4 -0
  128. khoj/interface/web/assets/icons/voice.svg +8 -0
  129. khoj/interface/web/assets/icons/web.svg +2 -0
  130. khoj/interface/web/assets/icons/whatsapp.svg +17 -0
  131. khoj/interface/web/assets/khoj.css +237 -0
  132. khoj/interface/web/assets/markdown-it.min.js +8476 -0
  133. khoj/interface/web/assets/natural-cron.min.js +1 -0
  134. khoj/interface/web/assets/org.min.js +1823 -0
  135. khoj/interface/web/assets/pico.min.css +5 -0
  136. khoj/interface/web/assets/purify.min.js +3 -0
  137. khoj/interface/web/assets/samples/desktop-browse-draw-sample.png +0 -0
  138. khoj/interface/web/assets/samples/desktop-plain-chat-sample.png +0 -0
  139. khoj/interface/web/assets/samples/desktop-remember-plan-sample.png +0 -0
  140. khoj/interface/web/assets/samples/phone-browse-draw-sample.png +0 -0
  141. khoj/interface/web/assets/samples/phone-plain-chat-sample.png +0 -0
  142. khoj/interface/web/assets/samples/phone-remember-plan-sample.png +0 -0
  143. khoj/interface/web/assets/utils.js +33 -0
  144. khoj/interface/web/base_config.html +445 -0
  145. khoj/interface/web/chat.html +3546 -0
  146. khoj/interface/web/config.html +1011 -0
  147. khoj/interface/web/config_automation.html +1103 -0
  148. khoj/interface/web/content_source_computer_input.html +139 -0
  149. khoj/interface/web/content_source_github_input.html +216 -0
  150. khoj/interface/web/content_source_notion_input.html +94 -0
  151. khoj/interface/web/khoj.webmanifest +51 -0
  152. khoj/interface/web/login.html +219 -0
  153. khoj/interface/web/public_conversation.html +2006 -0
  154. khoj/interface/web/search.html +470 -0
  155. khoj/interface/web/utils.html +48 -0
  156. khoj/main.py +241 -0
  157. khoj/manage.py +22 -0
  158. khoj/migrations/__init__.py +0 -0
  159. khoj/migrations/migrate_offline_chat_default_model.py +69 -0
  160. khoj/migrations/migrate_offline_chat_default_model_2.py +71 -0
  161. khoj/migrations/migrate_offline_chat_schema.py +83 -0
  162. khoj/migrations/migrate_offline_model.py +29 -0
  163. khoj/migrations/migrate_processor_config_openai.py +67 -0
  164. khoj/migrations/migrate_server_pg.py +138 -0
  165. khoj/migrations/migrate_version.py +17 -0
  166. khoj/processor/__init__.py +0 -0
  167. khoj/processor/content/__init__.py +0 -0
  168. khoj/processor/content/docx/__init__.py +0 -0
  169. khoj/processor/content/docx/docx_to_entries.py +110 -0
  170. khoj/processor/content/github/__init__.py +0 -0
  171. khoj/processor/content/github/github_to_entries.py +224 -0
  172. khoj/processor/content/images/__init__.py +0 -0
  173. khoj/processor/content/images/image_to_entries.py +118 -0
  174. khoj/processor/content/markdown/__init__.py +0 -0
  175. khoj/processor/content/markdown/markdown_to_entries.py +165 -0
  176. khoj/processor/content/notion/notion_to_entries.py +260 -0
  177. khoj/processor/content/org_mode/__init__.py +0 -0
  178. khoj/processor/content/org_mode/org_to_entries.py +231 -0
  179. khoj/processor/content/org_mode/orgnode.py +532 -0
  180. khoj/processor/content/pdf/__init__.py +0 -0
  181. khoj/processor/content/pdf/pdf_to_entries.py +116 -0
  182. khoj/processor/content/plaintext/__init__.py +0 -0
  183. khoj/processor/content/plaintext/plaintext_to_entries.py +122 -0
  184. khoj/processor/content/text_to_entries.py +297 -0
  185. khoj/processor/conversation/__init__.py +0 -0
  186. khoj/processor/conversation/anthropic/__init__.py +0 -0
  187. khoj/processor/conversation/anthropic/anthropic_chat.py +206 -0
  188. khoj/processor/conversation/anthropic/utils.py +114 -0
  189. khoj/processor/conversation/offline/__init__.py +0 -0
  190. khoj/processor/conversation/offline/chat_model.py +231 -0
  191. khoj/processor/conversation/offline/utils.py +78 -0
  192. khoj/processor/conversation/offline/whisper.py +15 -0
  193. khoj/processor/conversation/openai/__init__.py +0 -0
  194. khoj/processor/conversation/openai/gpt.py +187 -0
  195. khoj/processor/conversation/openai/utils.py +129 -0
  196. khoj/processor/conversation/openai/whisper.py +13 -0
  197. khoj/processor/conversation/prompts.py +758 -0
  198. khoj/processor/conversation/utils.py +262 -0
  199. khoj/processor/embeddings.py +117 -0
  200. khoj/processor/speech/__init__.py +0 -0
  201. khoj/processor/speech/text_to_speech.py +51 -0
  202. khoj/processor/tools/__init__.py +0 -0
  203. khoj/processor/tools/online_search.py +225 -0
  204. khoj/routers/__init__.py +0 -0
  205. khoj/routers/api.py +626 -0
  206. khoj/routers/api_agents.py +43 -0
  207. khoj/routers/api_chat.py +1180 -0
  208. khoj/routers/api_config.py +434 -0
  209. khoj/routers/api_phone.py +86 -0
  210. khoj/routers/auth.py +181 -0
  211. khoj/routers/email.py +133 -0
  212. khoj/routers/helpers.py +1188 -0
  213. khoj/routers/indexer.py +349 -0
  214. khoj/routers/notion.py +91 -0
  215. khoj/routers/storage.py +35 -0
  216. khoj/routers/subscription.py +104 -0
  217. khoj/routers/twilio.py +36 -0
  218. khoj/routers/web_client.py +471 -0
  219. khoj/search_filter/__init__.py +0 -0
  220. khoj/search_filter/base_filter.py +15 -0
  221. khoj/search_filter/date_filter.py +217 -0
  222. khoj/search_filter/file_filter.py +30 -0
  223. khoj/search_filter/word_filter.py +29 -0
  224. khoj/search_type/__init__.py +0 -0
  225. khoj/search_type/text_search.py +241 -0
  226. khoj/utils/__init__.py +0 -0
  227. khoj/utils/cli.py +93 -0
  228. khoj/utils/config.py +81 -0
  229. khoj/utils/constants.py +24 -0
  230. khoj/utils/fs_syncer.py +249 -0
  231. khoj/utils/helpers.py +418 -0
  232. khoj/utils/initialization.py +146 -0
  233. khoj/utils/jsonl.py +43 -0
  234. khoj/utils/models.py +47 -0
  235. khoj/utils/rawconfig.py +160 -0
  236. khoj/utils/state.py +46 -0
  237. khoj/utils/yaml.py +43 -0
  238. khoj-1.16.1.dev15.dist-info/METADATA +178 -0
  239. khoj-1.16.1.dev15.dist-info/RECORD +242 -0
  240. khoj-1.16.1.dev15.dist-info/WHEEL +4 -0
  241. khoj-1.16.1.dev15.dist-info/entry_points.txt +2 -0
  242. khoj-1.16.1.dev15.dist-info/licenses/LICENSE +661 -0
khoj/routers/api.py ADDED
@@ -0,0 +1,626 @@
1
+ import concurrent.futures
2
+ import json
3
+ import logging
4
+ import math
5
+ import os
6
+ import threading
7
+ import time
8
+ import uuid
9
+ from random import random
10
+ from typing import Any, Callable, List, Optional, Union
11
+
12
+ import cron_descriptor
13
+ import pytz
14
+ from apscheduler.job import Job
15
+ from apscheduler.triggers.cron import CronTrigger
16
+ from asgiref.sync import sync_to_async
17
+ from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile
18
+ from fastapi.requests import Request
19
+ from fastapi.responses import Response
20
+ from starlette.authentication import has_required_scope, requires
21
+
22
+ from khoj.configure import initialize_content
23
+ from khoj.database.adapters import (
24
+ AutomationAdapters,
25
+ ConversationAdapters,
26
+ EntryAdapters,
27
+ get_user_photo,
28
+ get_user_search_model_or_default,
29
+ )
30
+ from khoj.database.models import ChatModelOptions, KhojUser, SpeechToTextModelOptions
31
+ from khoj.processor.conversation.anthropic.anthropic_chat import (
32
+ extract_questions_anthropic,
33
+ )
34
+ from khoj.processor.conversation.offline.chat_model import extract_questions_offline
35
+ from khoj.processor.conversation.offline.whisper import transcribe_audio_offline
36
+ from khoj.processor.conversation.openai.gpt import extract_questions
37
+ from khoj.processor.conversation.openai.whisper import transcribe_audio
38
+ from khoj.routers.helpers import (
39
+ ApiUserRateLimiter,
40
+ CommonQueryParams,
41
+ ConversationCommandRateLimiter,
42
+ acreate_title_from_query,
43
+ schedule_automation,
44
+ update_telemetry_state,
45
+ )
46
+ from khoj.search_filter.date_filter import DateFilter
47
+ from khoj.search_filter.file_filter import FileFilter
48
+ from khoj.search_filter.word_filter import WordFilter
49
+ from khoj.search_type import text_search
50
+ from khoj.utils import state
51
+ from khoj.utils.config import OfflineChatProcessorModel
52
+ from khoj.utils.helpers import ConversationCommand, is_none_or_empty, timer
53
+ from khoj.utils.rawconfig import LocationData, SearchResponse
54
+ from khoj.utils.state import SearchType
55
+
56
+ # Initialize Router
57
+ api = APIRouter()
58
+ logger = logging.getLogger(__name__)
59
+ conversation_command_rate_limiter = ConversationCommandRateLimiter(
60
+ trial_rate_limit=2, subscribed_rate_limit=100, slug="command"
61
+ )
62
+
63
+
64
+ @api.get("/search", response_model=List[SearchResponse])
65
+ @requires(["authenticated"])
66
+ async def search(
67
+ q: str,
68
+ request: Request,
69
+ common: CommonQueryParams,
70
+ n: Optional[int] = 5,
71
+ t: Optional[SearchType] = SearchType.All,
72
+ r: Optional[bool] = False,
73
+ max_distance: Optional[Union[float, None]] = None,
74
+ dedupe: Optional[bool] = True,
75
+ ):
76
+ user = request.user.object
77
+
78
+ results = await execute_search(
79
+ user=user,
80
+ q=q,
81
+ n=n,
82
+ t=t,
83
+ r=r,
84
+ max_distance=max_distance,
85
+ dedupe=dedupe,
86
+ )
87
+
88
+ update_telemetry_state(
89
+ request=request,
90
+ telemetry_type="api",
91
+ api="search",
92
+ **common.__dict__,
93
+ )
94
+
95
+ return results
96
+
97
+
98
+ async def execute_search(
99
+ user: KhojUser,
100
+ q: str,
101
+ n: Optional[int] = 5,
102
+ t: Optional[SearchType] = SearchType.All,
103
+ r: Optional[bool] = False,
104
+ max_distance: Optional[Union[float, None]] = None,
105
+ dedupe: Optional[bool] = True,
106
+ ):
107
+ start_time = time.time()
108
+
109
+ # Run validation checks
110
+ results: List[SearchResponse] = []
111
+ if q is None or q == "":
112
+ logger.warning(f"No query param (q) passed in API call to initiate search")
113
+ return results
114
+
115
+ # initialize variables
116
+ user_query = q.strip()
117
+ results_count = n or 5
118
+ max_distance = max_distance or math.inf
119
+ search_futures: List[concurrent.futures.Future] = []
120
+
121
+ # return cached results, if available
122
+ if user:
123
+ query_cache_key = f"{user_query}-{n}-{t}-{r}-{max_distance}-{dedupe}"
124
+ if query_cache_key in state.query_cache[user.uuid]:
125
+ logger.debug(f"Return response from query cache")
126
+ return state.query_cache[user.uuid][query_cache_key]
127
+
128
+ # Encode query with filter terms removed
129
+ defiltered_query = user_query
130
+ for filter in [DateFilter(), WordFilter(), FileFilter()]:
131
+ defiltered_query = filter.defilter(defiltered_query)
132
+
133
+ encoded_asymmetric_query = None
134
+ if t != SearchType.Image:
135
+ with timer("Encoding query took", logger=logger):
136
+ search_model = await sync_to_async(get_user_search_model_or_default)(user)
137
+ encoded_asymmetric_query = state.embeddings_model[search_model.name].embed_query(defiltered_query)
138
+
139
+ with concurrent.futures.ThreadPoolExecutor() as executor:
140
+ if t in [
141
+ SearchType.All,
142
+ SearchType.Org,
143
+ SearchType.Markdown,
144
+ SearchType.Github,
145
+ SearchType.Notion,
146
+ SearchType.Plaintext,
147
+ SearchType.Pdf,
148
+ ]:
149
+ # query markdown notes
150
+ search_futures += [
151
+ executor.submit(
152
+ text_search.query,
153
+ user,
154
+ user_query,
155
+ t,
156
+ question_embedding=encoded_asymmetric_query,
157
+ max_distance=max_distance,
158
+ )
159
+ ]
160
+
161
+ # Query across each requested content types in parallel
162
+ with timer("Query took", logger):
163
+ for search_future in concurrent.futures.as_completed(search_futures):
164
+ hits = await search_future.result()
165
+ # Collate results
166
+ results += text_search.collate_results(hits, dedupe=dedupe)
167
+
168
+ # Sort results across all content types and take top results
169
+ results = text_search.rerank_and_sort_results(
170
+ results, query=defiltered_query, rank_results=r, search_model_name=search_model.name
171
+ )[:results_count]
172
+
173
+ # Cache results
174
+ if user:
175
+ state.query_cache[user.uuid][query_cache_key] = results
176
+
177
+ end_time = time.time()
178
+ logger.debug(f"🔍 Search took: {end_time - start_time:.3f} seconds")
179
+
180
+ return results
181
+
182
+
183
+ @api.get("/update")
184
+ @requires(["authenticated"])
185
+ def update(
186
+ request: Request,
187
+ common: CommonQueryParams,
188
+ t: Optional[SearchType] = None,
189
+ force: Optional[bool] = False,
190
+ ):
191
+ user = request.user.object
192
+ if not state.config:
193
+ error_msg = f"🚨 Khoj is not configured.\nConfigure it via http://localhost:42110/config, plugins or by editing {state.config_file}."
194
+ logger.warning(error_msg)
195
+ raise HTTPException(status_code=500, detail=error_msg)
196
+ try:
197
+ initialize_content(regenerate=force, search_type=t, user=user)
198
+ except Exception as e:
199
+ error_msg = f"🚨 Failed to update server via API: {e}"
200
+ logger.error(error_msg, exc_info=True)
201
+ raise HTTPException(status_code=500, detail=error_msg)
202
+ else:
203
+ components = []
204
+ if state.search_models:
205
+ components.append("Search models")
206
+ components_msg = ", ".join(components)
207
+ logger.info(f"📪 {components_msg} updated via API")
208
+
209
+ update_telemetry_state(
210
+ request=request,
211
+ telemetry_type="api",
212
+ api="update",
213
+ **common.__dict__,
214
+ )
215
+
216
+ return {"status": "ok", "message": "khoj reloaded"}
217
+
218
+
219
+ @api.post("/transcribe")
220
+ @requires(["authenticated"])
221
+ async def transcribe(
222
+ request: Request,
223
+ common: CommonQueryParams,
224
+ file: UploadFile = File(...),
225
+ rate_limiter_per_minute=Depends(
226
+ ApiUserRateLimiter(requests=1, subscribed_requests=10, window=60, slug="transcribe_minute")
227
+ ),
228
+ rate_limiter_per_day=Depends(
229
+ ApiUserRateLimiter(requests=10, subscribed_requests=600, window=60 * 60 * 24, slug="transcribe_day")
230
+ ),
231
+ ):
232
+ user: KhojUser = request.user.object
233
+ audio_filename = f"{user.uuid}-{str(uuid.uuid4())}.webm"
234
+ user_message: str = None
235
+
236
+ # If the file is too large, return an unprocessable entity error
237
+ if file.size > 10 * 1024 * 1024:
238
+ logger.warning(f"Audio file too large to transcribe. Audio file size: {file.size}. Exceeds 10Mb limit.")
239
+ return Response(content="Audio size larger than 10Mb limit", status_code=422)
240
+
241
+ # Transcribe the audio from the request
242
+ try:
243
+ # Store the audio from the request in a temporary file
244
+ audio_data = await file.read()
245
+ with open(audio_filename, "wb") as audio_file_writer:
246
+ audio_file_writer.write(audio_data)
247
+ audio_file = open(audio_filename, "rb")
248
+
249
+ # Send the audio data to the Whisper API
250
+ speech_to_text_config = await ConversationAdapters.get_speech_to_text_config()
251
+ if not speech_to_text_config:
252
+ # If the user has not configured a speech to text model, return an unsupported on server error
253
+ status_code = 501
254
+ elif state.openai_client and speech_to_text_config.model_type == SpeechToTextModelOptions.ModelType.OPENAI:
255
+ speech2text_model = speech_to_text_config.model_name
256
+ user_message = await transcribe_audio(audio_file, speech2text_model, client=state.openai_client)
257
+ elif speech_to_text_config.model_type == SpeechToTextModelOptions.ModelType.OFFLINE:
258
+ speech2text_model = speech_to_text_config.model_name
259
+ user_message = await transcribe_audio_offline(audio_filename, speech2text_model)
260
+ finally:
261
+ # Close and Delete the temporary audio file
262
+ audio_file.close()
263
+ os.remove(audio_filename)
264
+
265
+ if user_message is None:
266
+ return Response(status_code=status_code or 500)
267
+
268
+ update_telemetry_state(
269
+ request=request,
270
+ telemetry_type="api",
271
+ api="transcribe",
272
+ **common.__dict__,
273
+ )
274
+
275
+ # Return the spoken text
276
+ content = json.dumps({"text": user_message})
277
+ return Response(content=content, media_type="application/json", status_code=200)
278
+
279
+
280
+ async def extract_references_and_questions(
281
+ request: Request,
282
+ meta_log: dict,
283
+ q: str,
284
+ n: int,
285
+ d: float,
286
+ conversation_id: int,
287
+ conversation_commands: List[ConversationCommand] = [ConversationCommand.Default],
288
+ location_data: LocationData = None,
289
+ send_status_func: Optional[Callable] = None,
290
+ ):
291
+ user = request.user.object if request.user.is_authenticated else None
292
+
293
+ # Initialize Variables
294
+ compiled_references: List[Any] = []
295
+ inferred_queries: List[str] = []
296
+
297
+ if (
298
+ not ConversationCommand.Notes in conversation_commands
299
+ and not ConversationCommand.Default in conversation_commands
300
+ ):
301
+ return compiled_references, inferred_queries, q
302
+
303
+ if not await sync_to_async(EntryAdapters.user_has_entries)(user=user):
304
+ logger.debug("No documents in knowledge base. Use a Khoj client to sync and chat with your docs.")
305
+ return compiled_references, inferred_queries, q
306
+
307
+ # Extract filter terms from user message
308
+ defiltered_query = q
309
+ for filter in [DateFilter(), WordFilter(), FileFilter()]:
310
+ defiltered_query = filter.defilter(defiltered_query)
311
+ filters_in_query = q.replace(defiltered_query, "").strip()
312
+ conversation = await sync_to_async(ConversationAdapters.get_conversation_by_id)(conversation_id)
313
+
314
+ if not conversation:
315
+ logger.error(f"Conversation with id {conversation_id} not found.")
316
+ return compiled_references, inferred_queries, defiltered_query
317
+
318
+ filters_in_query += " ".join([f'file:"{filter}"' for filter in conversation.file_filters])
319
+ using_offline_chat = False
320
+ print(f"Filters in query: {filters_in_query}")
321
+
322
+ # Infer search queries from user message
323
+ with timer("Extracting search queries took", logger):
324
+ # If we've reached here, either the user has enabled offline chat or the openai model is enabled.
325
+ conversation_config = await ConversationAdapters.aget_default_conversation_config()
326
+
327
+ if conversation_config.model_type == ChatModelOptions.ModelType.OFFLINE:
328
+ using_offline_chat = True
329
+ chat_model = conversation_config.chat_model
330
+ max_tokens = conversation_config.max_prompt_size
331
+ if state.offline_chat_processor_config is None:
332
+ state.offline_chat_processor_config = OfflineChatProcessorModel(chat_model, max_tokens)
333
+
334
+ loaded_model = state.offline_chat_processor_config.loaded_model
335
+
336
+ inferred_queries = extract_questions_offline(
337
+ defiltered_query,
338
+ loaded_model=loaded_model,
339
+ conversation_log=meta_log,
340
+ should_extract_questions=True,
341
+ location_data=location_data,
342
+ max_prompt_size=conversation_config.max_prompt_size,
343
+ )
344
+ elif conversation_config.model_type == ChatModelOptions.ModelType.OPENAI:
345
+ openai_chat_config = conversation_config.openai_config
346
+ api_key = openai_chat_config.api_key
347
+ base_url = openai_chat_config.api_base_url
348
+ chat_model = conversation_config.chat_model
349
+ inferred_queries = extract_questions(
350
+ defiltered_query,
351
+ model=chat_model,
352
+ api_key=api_key,
353
+ api_base_url=base_url,
354
+ conversation_log=meta_log,
355
+ location_data=location_data,
356
+ max_tokens=conversation_config.max_prompt_size,
357
+ )
358
+ elif conversation_config.model_type == ChatModelOptions.ModelType.ANTHROPIC:
359
+ api_key = conversation_config.openai_config.api_key
360
+ chat_model = conversation_config.chat_model
361
+ inferred_queries = extract_questions_anthropic(
362
+ defiltered_query,
363
+ model=chat_model,
364
+ api_key=api_key,
365
+ conversation_log=meta_log,
366
+ location_data=location_data,
367
+ )
368
+
369
+ # Collate search results as context for GPT
370
+ with timer("Searching knowledge base took", logger):
371
+ search_results = []
372
+ logger.info(f"🔍 Searching knowledge base with queries: {inferred_queries}")
373
+ if send_status_func:
374
+ inferred_queries_str = "\n- " + "\n- ".join(inferred_queries)
375
+ await send_status_func(f"**🔍 Searching Documents for:** {inferred_queries_str}")
376
+ for query in inferred_queries:
377
+ n_items = min(n, 3) if using_offline_chat else n
378
+ search_results.extend(
379
+ await execute_search(
380
+ user,
381
+ f"{query} {filters_in_query}",
382
+ n=n_items,
383
+ t=SearchType.All,
384
+ r=True,
385
+ max_distance=d,
386
+ dedupe=False,
387
+ )
388
+ )
389
+ search_results = text_search.deduplicated_search_responses(search_results)
390
+ compiled_references = [
391
+ {"compiled": item.additional["compiled"], "file": item.additional["file"]} for item in search_results
392
+ ]
393
+
394
+ return compiled_references, inferred_queries, defiltered_query
395
+
396
+
397
+ @api.get("/health", response_class=Response)
398
+ @requires(["authenticated"], status_code=200)
399
+ def health_check(request: Request) -> Response:
400
+ response_obj = {"email": request.user.object.email}
401
+ return Response(content=json.dumps(response_obj), media_type="application/json", status_code=200)
402
+
403
+
404
+ @api.get("/v1/user", response_class=Response)
405
+ @requires(["authenticated"])
406
+ def user_info(request: Request) -> Response:
407
+ # Get user information
408
+ user: KhojUser = request.user.object
409
+ user_picture = get_user_photo(user=user)
410
+ is_active = has_required_scope(request, ["premium"])
411
+ has_documents = EntryAdapters.user_has_entries(user=user)
412
+
413
+ # Collect user information in a dictionary
414
+ user_info = {
415
+ "email": user.email,
416
+ "username": user.username,
417
+ "photo": user_picture,
418
+ "is_active": is_active,
419
+ "has_documents": has_documents,
420
+ }
421
+
422
+ # Return user information as a JSON response
423
+ return Response(content=json.dumps(user_info), media_type="application/json", status_code=200)
424
+
425
+
426
+ @api.get("/automations", response_class=Response)
427
+ @requires(["authenticated"])
428
+ def get_automations(request: Request) -> Response:
429
+ user: KhojUser = request.user.object
430
+
431
+ # Collate all automations created by user that are still active
432
+ automations_info = [automation_info for automation_info in AutomationAdapters.get_automations_metadata(user)]
433
+
434
+ # Return tasks information as a JSON response
435
+ return Response(content=json.dumps(automations_info), media_type="application/json", status_code=200)
436
+
437
+
438
+ @api.delete("/automation", response_class=Response)
439
+ @requires(["authenticated"])
440
+ def delete_automation(request: Request, automation_id: str) -> Response:
441
+ user: KhojUser = request.user.object
442
+
443
+ try:
444
+ automation_info = AutomationAdapters.delete_automation(user, automation_id)
445
+ except ValueError:
446
+ return Response(status_code=204)
447
+
448
+ # Return deleted automation information as a JSON response
449
+ return Response(content=json.dumps(automation_info), media_type="application/json", status_code=200)
450
+
451
+
452
+ @api.post("/automation", response_class=Response)
453
+ @requires(["authenticated"])
454
+ async def post_automation(
455
+ request: Request,
456
+ q: str,
457
+ crontime: str,
458
+ subject: Optional[str] = None,
459
+ city: Optional[str] = None,
460
+ region: Optional[str] = None,
461
+ country: Optional[str] = None,
462
+ timezone: Optional[str] = None,
463
+ ) -> Response:
464
+ user: KhojUser = request.user.object
465
+
466
+ # Perform validation checks
467
+ if is_none_or_empty(q) or is_none_or_empty(crontime):
468
+ return Response(content="A query and crontime is required", status_code=400)
469
+ if not cron_descriptor.get_description(crontime):
470
+ return Response(content="Invalid crontime", status_code=400)
471
+
472
+ # Normalize query parameters
473
+ # Add /automated_task prefix to query if not present
474
+ q = q.strip()
475
+ if not q.startswith("/automated_task"):
476
+ query_to_run = f"/automated_task {q}"
477
+
478
+ # Normalize crontime for AP Scheduler CronTrigger
479
+ crontime = crontime.strip()
480
+ if len(crontime.split(" ")) > 5:
481
+ # Truncate crontime to 5 fields
482
+ crontime = " ".join(crontime.split(" ")[:5])
483
+
484
+ # Convert crontime to standard unix crontime
485
+ crontime = crontime.replace("?", "*")
486
+
487
+ # Disallow minute level automation recurrence
488
+ minute_value = crontime.split(" ")[0]
489
+ if not minute_value.isdigit():
490
+ return Response(
491
+ content="Recurrence of every X minutes is unsupported. Please create a less frequent schedule.",
492
+ status_code=400,
493
+ )
494
+
495
+ if not subject:
496
+ subject = await acreate_title_from_query(q)
497
+
498
+ # Create new Conversation Session associated with this new task
499
+ conversation = await ConversationAdapters.acreate_conversation_session(user, request.user.client_app)
500
+
501
+ calling_url = request.url.replace(query=f"{request.url.query}&conversation_id={conversation.id}")
502
+
503
+ # Schedule automation with query_to_run, timezone, subject directly provided by user
504
+ try:
505
+ # Use the query to run as the scheduling request if the scheduling request is unset
506
+ automation = await schedule_automation(query_to_run, subject, crontime, timezone, q, user, calling_url)
507
+ except Exception as e:
508
+ logger.error(f"Error creating automation {q} for {user.email}: {e}", exc_info=True)
509
+ return Response(
510
+ content=f"Unable to create automation. Ensure the automation doesn't already exist.",
511
+ media_type="text/plain",
512
+ status_code=500,
513
+ )
514
+
515
+ # Collate info about the created user automation
516
+ automation_info = AutomationAdapters.get_automation_metadata(user, automation)
517
+
518
+ # Return information about the created automation as a JSON response
519
+ return Response(content=json.dumps(automation_info), media_type="application/json", status_code=200)
520
+
521
+
522
+ @api.post("/trigger/automation", response_class=Response)
523
+ @requires(["authenticated"])
524
+ def trigger_manual_job(
525
+ request: Request,
526
+ automation_id: str,
527
+ ):
528
+ user: KhojUser = request.user.object
529
+
530
+ # Check, get automation to edit
531
+ try:
532
+ automation: Job = AutomationAdapters.get_automation(user, automation_id)
533
+ except ValueError as e:
534
+ logger.error(f"Error triggering automation {automation_id} for {user.email}: {e}", exc_info=True)
535
+ return Response(content="Invalid automation", status_code=403)
536
+
537
+ # Trigger the job without waiting for the result.
538
+ scheduled_chat_func = automation.func
539
+
540
+ # Run the function in a separate thread
541
+ thread = threading.Thread(target=scheduled_chat_func, args=automation.args, kwargs=automation.kwargs)
542
+ thread.start()
543
+
544
+ return Response(content="Automation triggered", status_code=200)
545
+
546
+
547
+ @api.put("/automation", response_class=Response)
548
+ @requires(["authenticated"])
549
+ def edit_job(
550
+ request: Request,
551
+ automation_id: str,
552
+ q: Optional[str],
553
+ subject: Optional[str],
554
+ crontime: Optional[str],
555
+ city: Optional[str] = None,
556
+ region: Optional[str] = None,
557
+ country: Optional[str] = None,
558
+ timezone: Optional[str] = None,
559
+ ) -> Response:
560
+ user: KhojUser = request.user.object
561
+
562
+ # Perform validation checks
563
+ if is_none_or_empty(q) or is_none_or_empty(subject) or is_none_or_empty(crontime):
564
+ return Response(content="A query, subject and crontime is required", status_code=400)
565
+ if not cron_descriptor.get_description(crontime):
566
+ return Response(content="Invalid crontime", status_code=400)
567
+
568
+ # Check, get automation to edit
569
+ try:
570
+ automation: Job = AutomationAdapters.get_automation(user, automation_id)
571
+ except ValueError as e:
572
+ logger.error(f"Error editing automation {automation_id} for {user.email}: {e}", exc_info=True)
573
+ return Response(content="Invalid automation", status_code=403)
574
+
575
+ # Normalize query parameters
576
+ # Add /automated_task prefix to query if not present
577
+ q = q.strip()
578
+ if not q.startswith("/automated_task"):
579
+ query_to_run = f"/automated_task {q}"
580
+ # Normalize crontime for AP Scheduler CronTrigger
581
+ crontime = crontime.strip()
582
+ if len(crontime.split(" ")) > 5:
583
+ # Truncate crontime to 5 fields
584
+ crontime = " ".join(crontime.split(" ")[:5])
585
+ # Convert crontime to standard unix crontime
586
+ crontime = crontime.replace("?", "*")
587
+
588
+ # Disallow minute level automation recurrence
589
+ minute_value = crontime.split(" ")[0]
590
+ if not minute_value.isdigit():
591
+ return Response(
592
+ content="Recurrence of every X minutes is unsupported. Please create a less frequent schedule.",
593
+ status_code=400,
594
+ )
595
+
596
+ # Construct updated automation metadata
597
+ automation_metadata = json.loads(automation.name)
598
+ automation_metadata["scheduling_request"] = q
599
+ automation_metadata["query_to_run"] = query_to_run
600
+ automation_metadata["subject"] = subject.strip()
601
+ automation_metadata["crontime"] = crontime
602
+
603
+ # Modify automation with updated query, subject
604
+ automation.modify(
605
+ name=json.dumps(automation_metadata),
606
+ kwargs={
607
+ "query_to_run": query_to_run,
608
+ "subject": subject,
609
+ "scheduling_request": q,
610
+ "user": user,
611
+ "calling_url": request.url,
612
+ },
613
+ )
614
+
615
+ # Reschedule automation if crontime updated
616
+ user_timezone = pytz.timezone(timezone)
617
+ trigger = CronTrigger.from_crontab(crontime, user_timezone)
618
+ if automation.trigger != trigger:
619
+ automation.reschedule(trigger=trigger)
620
+
621
+ # Collate info about the updated user automation
622
+ automation = AutomationAdapters.get_automation(user, automation.id)
623
+ automation_info = AutomationAdapters.get_automation_metadata(user, automation)
624
+
625
+ # Return modified automation information as a JSON response
626
+ return Response(content=json.dumps(automation_info), media_type="application/json", status_code=200)
@@ -0,0 +1,43 @@
1
+ import json
2
+ import logging
3
+
4
+ from fastapi import APIRouter, Request
5
+ from fastapi.requests import Request
6
+ from fastapi.responses import Response
7
+
8
+ from khoj.database.adapters import AgentAdapters
9
+ from khoj.database.models import KhojUser
10
+ from khoj.routers.helpers import CommonQueryParams
11
+
12
+ # Initialize Router
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ api_agents = APIRouter()
17
+
18
+
19
+ @api_agents.get("", response_class=Response)
20
+ async def all_agents(
21
+ request: Request,
22
+ common: CommonQueryParams,
23
+ ) -> Response:
24
+ user: KhojUser = request.user.object if request.user.is_authenticated else None
25
+ agents = await AgentAdapters.aget_all_accessible_agents(user)
26
+ agents_packet = list()
27
+ for agent in agents:
28
+ agents_packet.append(
29
+ {
30
+ "slug": agent.slug,
31
+ "avatar": agent.avatar,
32
+ "name": agent.name,
33
+ "personality": agent.personality,
34
+ "public": agent.public,
35
+ "creator": agent.creator.username if agent.creator else None,
36
+ "managed_by_admin": agent.managed_by_admin,
37
+ }
38
+ )
39
+
40
+ # Make sure that the agent named 'khoj' is first in the list. Everything else is sorted by name.
41
+ agents_packet.sort(key=lambda x: x["name"])
42
+ agents_packet.sort(key=lambda x: x["slug"] == "khoj", reverse=True)
43
+ return Response(content=json.dumps(agents_packet), media_type="application/json", status_code=200)