khoj 1.16.1.dev15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. khoj/__init__.py +0 -0
  2. khoj/app/README.md +94 -0
  3. khoj/app/__init__.py +0 -0
  4. khoj/app/asgi.py +16 -0
  5. khoj/app/settings.py +192 -0
  6. khoj/app/urls.py +25 -0
  7. khoj/configure.py +424 -0
  8. khoj/database/__init__.py +0 -0
  9. khoj/database/adapters/__init__.py +1234 -0
  10. khoj/database/admin.py +290 -0
  11. khoj/database/apps.py +6 -0
  12. khoj/database/management/__init__.py +0 -0
  13. khoj/database/management/commands/__init__.py +0 -0
  14. khoj/database/management/commands/change_generated_images_url.py +61 -0
  15. khoj/database/management/commands/convert_images_png_to_webp.py +99 -0
  16. khoj/database/migrations/0001_khojuser.py +98 -0
  17. khoj/database/migrations/0002_googleuser.py +32 -0
  18. khoj/database/migrations/0003_vector_extension.py +10 -0
  19. khoj/database/migrations/0004_content_types_and_more.py +181 -0
  20. khoj/database/migrations/0005_embeddings_corpus_id.py +19 -0
  21. khoj/database/migrations/0006_embeddingsdates.py +33 -0
  22. khoj/database/migrations/0007_add_conversation.py +27 -0
  23. khoj/database/migrations/0008_alter_conversation_conversation_log.py +17 -0
  24. khoj/database/migrations/0009_khojapiuser.py +24 -0
  25. khoj/database/migrations/0010_chatmodeloptions_and_more.py +83 -0
  26. khoj/database/migrations/0010_rename_embeddings_entry_and_more.py +30 -0
  27. khoj/database/migrations/0011_merge_20231102_0138.py +14 -0
  28. khoj/database/migrations/0012_entry_file_source.py +21 -0
  29. khoj/database/migrations/0013_subscription.py +37 -0
  30. khoj/database/migrations/0014_alter_googleuser_picture.py +17 -0
  31. khoj/database/migrations/0015_alter_subscription_user.py +21 -0
  32. khoj/database/migrations/0016_alter_subscription_renewal_date.py +17 -0
  33. khoj/database/migrations/0017_searchmodel.py +32 -0
  34. khoj/database/migrations/0018_searchmodelconfig_delete_searchmodel.py +30 -0
  35. khoj/database/migrations/0019_alter_googleuser_family_name_and_more.py +27 -0
  36. khoj/database/migrations/0020_reflectivequestion.py +36 -0
  37. khoj/database/migrations/0021_speechtotextmodeloptions_and_more.py +42 -0
  38. khoj/database/migrations/0022_texttoimagemodelconfig.py +25 -0
  39. khoj/database/migrations/0023_usersearchmodelconfig.py +33 -0
  40. khoj/database/migrations/0024_alter_entry_embeddings.py +18 -0
  41. khoj/database/migrations/0025_clientapplication_khojuser_phone_number_and_more.py +46 -0
  42. khoj/database/migrations/0025_searchmodelconfig_embeddings_inference_endpoint_and_more.py +22 -0
  43. khoj/database/migrations/0026_searchmodelconfig_cross_encoder_inference_endpoint_and_more.py +22 -0
  44. khoj/database/migrations/0027_merge_20240118_1324.py +13 -0
  45. khoj/database/migrations/0028_khojuser_verified_phone_number.py +17 -0
  46. khoj/database/migrations/0029_userrequests.py +27 -0
  47. khoj/database/migrations/0030_conversation_slug_and_title.py +38 -0
  48. khoj/database/migrations/0031_agent_conversation_agent.py +53 -0
  49. khoj/database/migrations/0031_alter_googleuser_locale.py +30 -0
  50. khoj/database/migrations/0032_merge_20240322_0427.py +14 -0
  51. khoj/database/migrations/0033_rename_tuning_agent_personality.py +17 -0
  52. khoj/database/migrations/0034_alter_chatmodeloptions_chat_model.py +32 -0
  53. khoj/database/migrations/0035_processlock.py +26 -0
  54. khoj/database/migrations/0036_alter_processlock_name.py +19 -0
  55. khoj/database/migrations/0036_delete_offlinechatprocessorconversationconfig.py +15 -0
  56. khoj/database/migrations/0036_publicconversation.py +42 -0
  57. khoj/database/migrations/0037_chatmodeloptions_openai_config_and_more.py +51 -0
  58. khoj/database/migrations/0037_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +32 -0
  59. khoj/database/migrations/0038_merge_20240425_0857.py +14 -0
  60. khoj/database/migrations/0038_merge_20240426_1640.py +12 -0
  61. khoj/database/migrations/0039_merge_20240501_0301.py +12 -0
  62. khoj/database/migrations/0040_alter_processlock_name.py +26 -0
  63. khoj/database/migrations/0040_merge_20240504_1010.py +14 -0
  64. khoj/database/migrations/0041_merge_20240505_1234.py +14 -0
  65. khoj/database/migrations/0042_serverchatsettings.py +46 -0
  66. khoj/database/migrations/0043_alter_chatmodeloptions_model_type.py +21 -0
  67. khoj/database/migrations/0044_conversation_file_filters.py +17 -0
  68. khoj/database/migrations/0045_fileobject.py +37 -0
  69. khoj/database/migrations/0046_khojuser_email_verification_code_and_more.py +22 -0
  70. khoj/database/migrations/0047_alter_entry_file_type.py +31 -0
  71. khoj/database/migrations/0048_voicemodeloption_uservoicemodelconfig.py +52 -0
  72. khoj/database/migrations/0049_datastore.py +38 -0
  73. khoj/database/migrations/0049_texttoimagemodelconfig_api_key_and_more.py +58 -0
  74. khoj/database/migrations/0050_alter_processlock_name.py +25 -0
  75. khoj/database/migrations/0051_merge_20240702_1220.py +14 -0
  76. khoj/database/migrations/0052_alter_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +27 -0
  77. khoj/database/migrations/__init__.py +0 -0
  78. khoj/database/models/__init__.py +402 -0
  79. khoj/database/tests.py +3 -0
  80. khoj/interface/email/feedback.html +34 -0
  81. khoj/interface/email/magic_link.html +17 -0
  82. khoj/interface/email/task.html +40 -0
  83. khoj/interface/email/welcome.html +61 -0
  84. khoj/interface/web/404.html +56 -0
  85. khoj/interface/web/agent.html +312 -0
  86. khoj/interface/web/agents.html +276 -0
  87. khoj/interface/web/assets/icons/agents.svg +6 -0
  88. khoj/interface/web/assets/icons/automation.svg +37 -0
  89. khoj/interface/web/assets/icons/cancel.svg +3 -0
  90. khoj/interface/web/assets/icons/chat.svg +24 -0
  91. khoj/interface/web/assets/icons/collapse.svg +17 -0
  92. khoj/interface/web/assets/icons/computer.png +0 -0
  93. khoj/interface/web/assets/icons/confirm-icon.svg +1 -0
  94. khoj/interface/web/assets/icons/copy-button-success.svg +6 -0
  95. khoj/interface/web/assets/icons/copy-button.svg +5 -0
  96. khoj/interface/web/assets/icons/credit-card.png +0 -0
  97. khoj/interface/web/assets/icons/delete.svg +26 -0
  98. khoj/interface/web/assets/icons/docx.svg +7 -0
  99. khoj/interface/web/assets/icons/edit.svg +4 -0
  100. khoj/interface/web/assets/icons/favicon-128x128.ico +0 -0
  101. khoj/interface/web/assets/icons/favicon-128x128.png +0 -0
  102. khoj/interface/web/assets/icons/favicon-256x256.png +0 -0
  103. khoj/interface/web/assets/icons/favicon.icns +0 -0
  104. khoj/interface/web/assets/icons/github.svg +1 -0
  105. khoj/interface/web/assets/icons/key.svg +4 -0
  106. khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
  107. khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
  108. khoj/interface/web/assets/icons/khoj-logo-sideways.svg +5385 -0
  109. khoj/interface/web/assets/icons/logotype.svg +1 -0
  110. khoj/interface/web/assets/icons/markdown.svg +1 -0
  111. khoj/interface/web/assets/icons/new.svg +23 -0
  112. khoj/interface/web/assets/icons/notion.svg +4 -0
  113. khoj/interface/web/assets/icons/openai-logomark.svg +1 -0
  114. khoj/interface/web/assets/icons/org.svg +1 -0
  115. khoj/interface/web/assets/icons/pdf.svg +23 -0
  116. khoj/interface/web/assets/icons/pencil-edit.svg +5 -0
  117. khoj/interface/web/assets/icons/plaintext.svg +1 -0
  118. khoj/interface/web/assets/icons/question-mark-icon.svg +1 -0
  119. khoj/interface/web/assets/icons/search.svg +25 -0
  120. khoj/interface/web/assets/icons/send.svg +1 -0
  121. khoj/interface/web/assets/icons/share.svg +8 -0
  122. khoj/interface/web/assets/icons/speaker.svg +4 -0
  123. khoj/interface/web/assets/icons/stop-solid.svg +37 -0
  124. khoj/interface/web/assets/icons/sync.svg +4 -0
  125. khoj/interface/web/assets/icons/thumbs-down-svgrepo-com.svg +6 -0
  126. khoj/interface/web/assets/icons/thumbs-up-svgrepo-com.svg +6 -0
  127. khoj/interface/web/assets/icons/user-silhouette.svg +4 -0
  128. khoj/interface/web/assets/icons/voice.svg +8 -0
  129. khoj/interface/web/assets/icons/web.svg +2 -0
  130. khoj/interface/web/assets/icons/whatsapp.svg +17 -0
  131. khoj/interface/web/assets/khoj.css +237 -0
  132. khoj/interface/web/assets/markdown-it.min.js +8476 -0
  133. khoj/interface/web/assets/natural-cron.min.js +1 -0
  134. khoj/interface/web/assets/org.min.js +1823 -0
  135. khoj/interface/web/assets/pico.min.css +5 -0
  136. khoj/interface/web/assets/purify.min.js +3 -0
  137. khoj/interface/web/assets/samples/desktop-browse-draw-sample.png +0 -0
  138. khoj/interface/web/assets/samples/desktop-plain-chat-sample.png +0 -0
  139. khoj/interface/web/assets/samples/desktop-remember-plan-sample.png +0 -0
  140. khoj/interface/web/assets/samples/phone-browse-draw-sample.png +0 -0
  141. khoj/interface/web/assets/samples/phone-plain-chat-sample.png +0 -0
  142. khoj/interface/web/assets/samples/phone-remember-plan-sample.png +0 -0
  143. khoj/interface/web/assets/utils.js +33 -0
  144. khoj/interface/web/base_config.html +445 -0
  145. khoj/interface/web/chat.html +3546 -0
  146. khoj/interface/web/config.html +1011 -0
  147. khoj/interface/web/config_automation.html +1103 -0
  148. khoj/interface/web/content_source_computer_input.html +139 -0
  149. khoj/interface/web/content_source_github_input.html +216 -0
  150. khoj/interface/web/content_source_notion_input.html +94 -0
  151. khoj/interface/web/khoj.webmanifest +51 -0
  152. khoj/interface/web/login.html +219 -0
  153. khoj/interface/web/public_conversation.html +2006 -0
  154. khoj/interface/web/search.html +470 -0
  155. khoj/interface/web/utils.html +48 -0
  156. khoj/main.py +241 -0
  157. khoj/manage.py +22 -0
  158. khoj/migrations/__init__.py +0 -0
  159. khoj/migrations/migrate_offline_chat_default_model.py +69 -0
  160. khoj/migrations/migrate_offline_chat_default_model_2.py +71 -0
  161. khoj/migrations/migrate_offline_chat_schema.py +83 -0
  162. khoj/migrations/migrate_offline_model.py +29 -0
  163. khoj/migrations/migrate_processor_config_openai.py +67 -0
  164. khoj/migrations/migrate_server_pg.py +138 -0
  165. khoj/migrations/migrate_version.py +17 -0
  166. khoj/processor/__init__.py +0 -0
  167. khoj/processor/content/__init__.py +0 -0
  168. khoj/processor/content/docx/__init__.py +0 -0
  169. khoj/processor/content/docx/docx_to_entries.py +110 -0
  170. khoj/processor/content/github/__init__.py +0 -0
  171. khoj/processor/content/github/github_to_entries.py +224 -0
  172. khoj/processor/content/images/__init__.py +0 -0
  173. khoj/processor/content/images/image_to_entries.py +118 -0
  174. khoj/processor/content/markdown/__init__.py +0 -0
  175. khoj/processor/content/markdown/markdown_to_entries.py +165 -0
  176. khoj/processor/content/notion/notion_to_entries.py +260 -0
  177. khoj/processor/content/org_mode/__init__.py +0 -0
  178. khoj/processor/content/org_mode/org_to_entries.py +231 -0
  179. khoj/processor/content/org_mode/orgnode.py +532 -0
  180. khoj/processor/content/pdf/__init__.py +0 -0
  181. khoj/processor/content/pdf/pdf_to_entries.py +116 -0
  182. khoj/processor/content/plaintext/__init__.py +0 -0
  183. khoj/processor/content/plaintext/plaintext_to_entries.py +122 -0
  184. khoj/processor/content/text_to_entries.py +297 -0
  185. khoj/processor/conversation/__init__.py +0 -0
  186. khoj/processor/conversation/anthropic/__init__.py +0 -0
  187. khoj/processor/conversation/anthropic/anthropic_chat.py +206 -0
  188. khoj/processor/conversation/anthropic/utils.py +114 -0
  189. khoj/processor/conversation/offline/__init__.py +0 -0
  190. khoj/processor/conversation/offline/chat_model.py +231 -0
  191. khoj/processor/conversation/offline/utils.py +78 -0
  192. khoj/processor/conversation/offline/whisper.py +15 -0
  193. khoj/processor/conversation/openai/__init__.py +0 -0
  194. khoj/processor/conversation/openai/gpt.py +187 -0
  195. khoj/processor/conversation/openai/utils.py +129 -0
  196. khoj/processor/conversation/openai/whisper.py +13 -0
  197. khoj/processor/conversation/prompts.py +758 -0
  198. khoj/processor/conversation/utils.py +262 -0
  199. khoj/processor/embeddings.py +117 -0
  200. khoj/processor/speech/__init__.py +0 -0
  201. khoj/processor/speech/text_to_speech.py +51 -0
  202. khoj/processor/tools/__init__.py +0 -0
  203. khoj/processor/tools/online_search.py +225 -0
  204. khoj/routers/__init__.py +0 -0
  205. khoj/routers/api.py +626 -0
  206. khoj/routers/api_agents.py +43 -0
  207. khoj/routers/api_chat.py +1180 -0
  208. khoj/routers/api_config.py +434 -0
  209. khoj/routers/api_phone.py +86 -0
  210. khoj/routers/auth.py +181 -0
  211. khoj/routers/email.py +133 -0
  212. khoj/routers/helpers.py +1188 -0
  213. khoj/routers/indexer.py +349 -0
  214. khoj/routers/notion.py +91 -0
  215. khoj/routers/storage.py +35 -0
  216. khoj/routers/subscription.py +104 -0
  217. khoj/routers/twilio.py +36 -0
  218. khoj/routers/web_client.py +471 -0
  219. khoj/search_filter/__init__.py +0 -0
  220. khoj/search_filter/base_filter.py +15 -0
  221. khoj/search_filter/date_filter.py +217 -0
  222. khoj/search_filter/file_filter.py +30 -0
  223. khoj/search_filter/word_filter.py +29 -0
  224. khoj/search_type/__init__.py +0 -0
  225. khoj/search_type/text_search.py +241 -0
  226. khoj/utils/__init__.py +0 -0
  227. khoj/utils/cli.py +93 -0
  228. khoj/utils/config.py +81 -0
  229. khoj/utils/constants.py +24 -0
  230. khoj/utils/fs_syncer.py +249 -0
  231. khoj/utils/helpers.py +418 -0
  232. khoj/utils/initialization.py +146 -0
  233. khoj/utils/jsonl.py +43 -0
  234. khoj/utils/models.py +47 -0
  235. khoj/utils/rawconfig.py +160 -0
  236. khoj/utils/state.py +46 -0
  237. khoj/utils/yaml.py +43 -0
  238. khoj-1.16.1.dev15.dist-info/METADATA +178 -0
  239. khoj-1.16.1.dev15.dist-info/RECORD +242 -0
  240. khoj-1.16.1.dev15.dist-info/WHEEL +4 -0
  241. khoj-1.16.1.dev15.dist-info/entry_points.txt +2 -0
  242. khoj-1.16.1.dev15.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,349 @@
1
+ import asyncio
2
+ import logging
3
+ from typing import Dict, Optional, Union
4
+
5
+ from fastapi import APIRouter, Depends, Header, Request, Response, UploadFile
6
+ from pydantic import BaseModel
7
+ from starlette.authentication import requires
8
+
9
+ from khoj.database.models import GithubConfig, KhojUser, NotionConfig
10
+ from khoj.processor.content.docx.docx_to_entries import DocxToEntries
11
+ from khoj.processor.content.github.github_to_entries import GithubToEntries
12
+ from khoj.processor.content.images.image_to_entries import ImageToEntries
13
+ from khoj.processor.content.markdown.markdown_to_entries import MarkdownToEntries
14
+ from khoj.processor.content.notion.notion_to_entries import NotionToEntries
15
+ from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
16
+ from khoj.processor.content.pdf.pdf_to_entries import PdfToEntries
17
+ from khoj.processor.content.plaintext.plaintext_to_entries import PlaintextToEntries
18
+ from khoj.routers.helpers import ApiIndexedDataLimiter, update_telemetry_state
19
+ from khoj.search_type import text_search
20
+ from khoj.utils import constants, state
21
+ from khoj.utils.config import SearchModels
22
+ from khoj.utils.helpers import LRU, get_file_type
23
+ from khoj.utils.rawconfig import ContentConfig, FullConfig, SearchConfig
24
+ from khoj.utils.yaml import save_config_to_file_updated_state
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ indexer = APIRouter()
29
+
30
+
31
+ class File(BaseModel):
32
+ path: str
33
+ content: Union[str, bytes]
34
+
35
+
36
+ class IndexBatchRequest(BaseModel):
37
+ files: list[File]
38
+
39
+
40
+ class IndexerInput(BaseModel):
41
+ org: Optional[dict[str, str]] = None
42
+ markdown: Optional[dict[str, str]] = None
43
+ pdf: Optional[dict[str, bytes]] = None
44
+ plaintext: Optional[dict[str, str]] = None
45
+ image: Optional[dict[str, bytes]] = None
46
+ docx: Optional[dict[str, bytes]] = None
47
+
48
+
49
+ @indexer.post("/update")
50
+ @requires(["authenticated"])
51
+ async def update(
52
+ request: Request,
53
+ files: list[UploadFile],
54
+ force: bool = False,
55
+ t: Optional[Union[state.SearchType, str]] = state.SearchType.All,
56
+ client: Optional[str] = None,
57
+ user_agent: Optional[str] = Header(None),
58
+ referer: Optional[str] = Header(None),
59
+ host: Optional[str] = Header(None),
60
+ indexed_data_limiter: ApiIndexedDataLimiter = Depends(
61
+ ApiIndexedDataLimiter(
62
+ incoming_entries_size_limit=10,
63
+ subscribed_incoming_entries_size_limit=25,
64
+ total_entries_size_limit=10,
65
+ subscribed_total_entries_size_limit=100,
66
+ )
67
+ ),
68
+ ):
69
+ user = request.user.object
70
+ index_files: Dict[str, Dict[str, str]] = {
71
+ "org": {},
72
+ "markdown": {},
73
+ "pdf": {},
74
+ "plaintext": {},
75
+ "image": {},
76
+ "docx": {},
77
+ }
78
+ try:
79
+ logger.info(f"📬 Updating content index via API call by {client} client")
80
+ for file in files:
81
+ file_content = file.file.read()
82
+ file_type, encoding = get_file_type(file.content_type, file_content)
83
+ if file_type in index_files:
84
+ index_files[file_type][file.filename] = file_content.decode(encoding) if encoding else file_content
85
+ else:
86
+ logger.warning(f"Skipped indexing unsupported file type sent by {client} client: {file.filename}")
87
+
88
+ indexer_input = IndexerInput(
89
+ org=index_files["org"],
90
+ markdown=index_files["markdown"],
91
+ pdf=index_files["pdf"],
92
+ plaintext=index_files["plaintext"],
93
+ image=index_files["image"],
94
+ docx=index_files["docx"],
95
+ )
96
+
97
+ if state.config == None:
98
+ logger.info("📬 Initializing content index on first run.")
99
+ default_full_config = FullConfig(
100
+ content_type=None,
101
+ search_type=SearchConfig.model_validate(constants.default_config["search-type"]),
102
+ processor=None,
103
+ )
104
+ state.config = default_full_config
105
+ default_content_config = ContentConfig(
106
+ org=None,
107
+ markdown=None,
108
+ pdf=None,
109
+ docx=None,
110
+ image=None,
111
+ github=None,
112
+ notion=None,
113
+ plaintext=None,
114
+ )
115
+ state.config.content_type = default_content_config
116
+ save_config_to_file_updated_state()
117
+ configure_search(state.search_models, state.config.search_type)
118
+
119
+ # Extract required fields from config
120
+ loop = asyncio.get_event_loop()
121
+ success = await loop.run_in_executor(
122
+ None,
123
+ configure_content,
124
+ indexer_input.model_dump(),
125
+ force,
126
+ t,
127
+ False,
128
+ user,
129
+ )
130
+ if not success:
131
+ raise RuntimeError("Failed to update content index")
132
+ logger.info(f"Finished processing batch indexing request")
133
+ except Exception as e:
134
+ logger.error(f"Failed to process batch indexing request: {e}", exc_info=True)
135
+ logger.error(
136
+ f'🚨 Failed to {"force " if force else ""}update {t} content index triggered via API call by {client} client: {e}',
137
+ exc_info=True,
138
+ )
139
+ return Response(content="Failed", status_code=500)
140
+
141
+ indexing_metadata = {
142
+ "num_org": len(index_files["org"]),
143
+ "num_markdown": len(index_files["markdown"]),
144
+ "num_pdf": len(index_files["pdf"]),
145
+ "num_plaintext": len(index_files["plaintext"]),
146
+ "num_image": len(index_files["image"]),
147
+ "num_docx": len(index_files["docx"]),
148
+ }
149
+
150
+ update_telemetry_state(
151
+ request=request,
152
+ telemetry_type="api",
153
+ api="index/update",
154
+ client=client,
155
+ user_agent=user_agent,
156
+ referer=referer,
157
+ host=host,
158
+ metadata=indexing_metadata,
159
+ )
160
+
161
+ logger.info(f"📪 Content index updated via API call by {client} client")
162
+
163
+ indexed_filenames = ",".join(file for ctype in index_files for file in index_files[ctype]) or ""
164
+ return Response(content=indexed_filenames, status_code=200)
165
+
166
+
167
+ def configure_search(search_models: SearchModels, search_config: Optional[SearchConfig]) -> Optional[SearchModels]:
168
+ # Run Validation Checks
169
+ if search_models is None:
170
+ search_models = SearchModels()
171
+
172
+ return search_models
173
+
174
+
175
+ def configure_content(
176
+ files: Optional[dict[str, dict[str, str]]],
177
+ regenerate: bool = False,
178
+ t: Optional[state.SearchType] = state.SearchType.All,
179
+ full_corpus: bool = True,
180
+ user: KhojUser = None,
181
+ ) -> bool:
182
+ success = True
183
+ if t == None:
184
+ t = state.SearchType.All
185
+
186
+ if t is not None and t in [type.value for type in state.SearchType]:
187
+ t = state.SearchType(t)
188
+
189
+ if t is not None and not t.value in [type.value for type in state.SearchType]:
190
+ logger.warning(f"🚨 Invalid search type: {t}")
191
+ return False
192
+
193
+ search_type = t.value if t else None
194
+
195
+ no_documents = all([not files.get(file_type) for file_type in files])
196
+
197
+ if files is None:
198
+ logger.warning(f"🚨 No files to process for {search_type} search.")
199
+ return True
200
+
201
+ try:
202
+ # Initialize Org Notes Search
203
+ if (search_type == state.SearchType.All.value or search_type == state.SearchType.Org.value) and files["org"]:
204
+ logger.info("🦄 Setting up search for orgmode notes")
205
+ # Extract Entries, Generate Notes Embeddings
206
+ text_search.setup(
207
+ OrgToEntries,
208
+ files.get("org"),
209
+ regenerate=regenerate,
210
+ full_corpus=full_corpus,
211
+ user=user,
212
+ )
213
+ except Exception as e:
214
+ logger.error(f"🚨 Failed to setup org: {e}", exc_info=True)
215
+ success = False
216
+
217
+ try:
218
+ # Initialize Markdown Search
219
+ if (search_type == state.SearchType.All.value or search_type == state.SearchType.Markdown.value) and files[
220
+ "markdown"
221
+ ]:
222
+ logger.info("💎 Setting up search for markdown notes")
223
+ # Extract Entries, Generate Markdown Embeddings
224
+ text_search.setup(
225
+ MarkdownToEntries,
226
+ files.get("markdown"),
227
+ regenerate=regenerate,
228
+ full_corpus=full_corpus,
229
+ user=user,
230
+ )
231
+
232
+ except Exception as e:
233
+ logger.error(f"🚨 Failed to setup markdown: {e}", exc_info=True)
234
+ success = False
235
+
236
+ try:
237
+ # Initialize PDF Search
238
+ if (search_type == state.SearchType.All.value or search_type == state.SearchType.Pdf.value) and files["pdf"]:
239
+ logger.info("🖨️ Setting up search for pdf")
240
+ # Extract Entries, Generate PDF Embeddings
241
+ text_search.setup(
242
+ PdfToEntries,
243
+ files.get("pdf"),
244
+ regenerate=regenerate,
245
+ full_corpus=full_corpus,
246
+ user=user,
247
+ )
248
+
249
+ except Exception as e:
250
+ logger.error(f"🚨 Failed to setup PDF: {e}", exc_info=True)
251
+ success = False
252
+
253
+ try:
254
+ # Initialize Plaintext Search
255
+ if (search_type == state.SearchType.All.value or search_type == state.SearchType.Plaintext.value) and files[
256
+ "plaintext"
257
+ ]:
258
+ logger.info("📄 Setting up search for plaintext")
259
+ # Extract Entries, Generate Plaintext Embeddings
260
+ text_search.setup(
261
+ PlaintextToEntries,
262
+ files.get("plaintext"),
263
+ regenerate=regenerate,
264
+ full_corpus=full_corpus,
265
+ user=user,
266
+ )
267
+
268
+ except Exception as e:
269
+ logger.error(f"🚨 Failed to setup plaintext: {e}", exc_info=True)
270
+ success = False
271
+
272
+ try:
273
+ if no_documents:
274
+ github_config = GithubConfig.objects.filter(user=user).prefetch_related("githubrepoconfig").first()
275
+ if (
276
+ search_type == state.SearchType.All.value or search_type == state.SearchType.Github.value
277
+ ) and github_config is not None:
278
+ logger.info("🐙 Setting up search for github")
279
+ # Extract Entries, Generate Github Embeddings
280
+ text_search.setup(
281
+ GithubToEntries,
282
+ None,
283
+ regenerate=regenerate,
284
+ full_corpus=full_corpus,
285
+ user=user,
286
+ config=github_config,
287
+ )
288
+
289
+ except Exception as e:
290
+ logger.error(f"🚨 Failed to setup GitHub: {e}", exc_info=True)
291
+ success = False
292
+
293
+ try:
294
+ if no_documents:
295
+ # Initialize Notion Search
296
+ notion_config = NotionConfig.objects.filter(user=user).first()
297
+ if (
298
+ search_type == state.SearchType.All.value or search_type == state.SearchType.Notion.value
299
+ ) and notion_config:
300
+ logger.info("🔌 Setting up search for notion")
301
+ text_search.setup(
302
+ NotionToEntries,
303
+ None,
304
+ regenerate=regenerate,
305
+ full_corpus=full_corpus,
306
+ user=user,
307
+ config=notion_config,
308
+ )
309
+
310
+ except Exception as e:
311
+ logger.error(f"🚨 Failed to setup Notion: {e}", exc_info=True)
312
+ success = False
313
+
314
+ try:
315
+ # Initialize Image Search
316
+ if (search_type == state.SearchType.All.value or search_type == state.SearchType.Image.value) and files[
317
+ "image"
318
+ ]:
319
+ logger.info("🖼️ Setting up search for images")
320
+ # Extract Entries, Generate Image Embeddings
321
+ text_search.setup(
322
+ ImageToEntries,
323
+ files.get("image"),
324
+ regenerate=regenerate,
325
+ full_corpus=full_corpus,
326
+ user=user,
327
+ )
328
+ except Exception as e:
329
+ logger.error(f"🚨 Failed to setup images: {e}", exc_info=True)
330
+ success = False
331
+ try:
332
+ if (search_type == state.SearchType.All.value or search_type == state.SearchType.Docx.value) and files["docx"]:
333
+ logger.info("📄 Setting up search for docx")
334
+ text_search.setup(
335
+ DocxToEntries,
336
+ files.get("docx"),
337
+ regenerate=regenerate,
338
+ full_corpus=full_corpus,
339
+ user=user,
340
+ )
341
+ except Exception as e:
342
+ logger.error(f"🚨 Failed to setup docx: {e}", exc_info=True)
343
+ success = False
344
+
345
+ # Invalidate Query Cache
346
+ if user:
347
+ state.query_cache[user.uuid] = LRU()
348
+
349
+ return success
khoj/routers/notion.py ADDED
@@ -0,0 +1,91 @@
1
+ import asyncio
2
+ import base64
3
+ import json
4
+ import logging
5
+ import os
6
+ from concurrent.futures import ThreadPoolExecutor
7
+
8
+ import requests
9
+ from fastapi import APIRouter, BackgroundTasks, Request, Response
10
+ from starlette.responses import RedirectResponse
11
+
12
+ from khoj.database.adapters import aget_user_by_uuid
13
+ from khoj.database.models import KhojUser, NotionConfig
14
+ from khoj.routers.indexer import configure_content
15
+ from khoj.utils.state import SearchType
16
+
17
+ NOTION_OAUTH_CLIENT_ID = os.getenv("NOTION_OAUTH_CLIENT_ID")
18
+ NOTION_OAUTH_CLIENT_SECRET = os.getenv("NOTION_OAUTH_CLIENT_SECRET")
19
+ NOTION_REDIRECT_URI = os.getenv("NOTION_REDIRECT_URI")
20
+
21
+ notion_router = APIRouter()
22
+
23
+ executor = ThreadPoolExecutor()
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ def get_notion_auth_url(user: KhojUser):
29
+ if not NOTION_OAUTH_CLIENT_ID or not NOTION_OAUTH_CLIENT_SECRET or not NOTION_REDIRECT_URI:
30
+ return None
31
+ return f"https://api.notion.com/v1/oauth/authorize?client_id={NOTION_OAUTH_CLIENT_ID}&redirect_uri={NOTION_REDIRECT_URI}&response_type=code&state={user.uuid}"
32
+
33
+
34
+ async def run_in_executor(func, *args):
35
+ loop = asyncio.get_event_loop()
36
+ return await loop.run_in_executor(executor, func, *args)
37
+
38
+
39
+ @notion_router.get("/auth/callback")
40
+ async def notion_auth_callback(request: Request, background_tasks: BackgroundTasks):
41
+ code = request.query_params.get("code")
42
+ state = request.query_params.get("state")
43
+ if not code or not state:
44
+ return Response("Missing code or state", status_code=400)
45
+
46
+ user: KhojUser = await aget_user_by_uuid(state)
47
+
48
+ await NotionConfig.objects.filter(user=user).adelete()
49
+
50
+ if not user:
51
+ raise Exception("User not found")
52
+
53
+ bearer_token = f"{NOTION_OAUTH_CLIENT_ID}:{NOTION_OAUTH_CLIENT_SECRET}"
54
+ base64_encoded_token = base64.b64encode(bearer_token.encode()).decode()
55
+
56
+ headers = {
57
+ "Accept": "application/json",
58
+ "Content-Type": "application/json",
59
+ "Authorization": f"Basic {base64_encoded_token}",
60
+ }
61
+
62
+ data = {
63
+ "grant_type": "authorization_code",
64
+ "code": code,
65
+ "redirect_uri": NOTION_REDIRECT_URI,
66
+ }
67
+
68
+ response = requests.post("https://api.notion.com/v1/oauth/token", data=json.dumps(data), headers=headers)
69
+
70
+ final_response = response.json()
71
+
72
+ logger.info(f"Notion auth callback response: {final_response}")
73
+
74
+ access_token = final_response.get("access_token")
75
+ await NotionConfig.objects.acreate(token=access_token, user=user)
76
+
77
+ owner = final_response.get("owner")
78
+ workspace_id = final_response.get("workspace_id")
79
+ workspace_name = final_response.get("workspace_name")
80
+ bot_id = final_response.get("bot_id")
81
+
82
+ logger.info(
83
+ f"Notion integration. Owner: {owner}, Workspace ID: {workspace_id}, Workspace Name: {workspace_name}, Bot ID: {bot_id}"
84
+ )
85
+
86
+ notion_redirect = str(request.app.url_path_for("notion_config_page"))
87
+
88
+ # Trigger an async job to configure_content. Let it run without blocking the response.
89
+ background_tasks.add_task(run_in_executor, configure_content, {}, False, SearchType.Notion, True, user)
90
+
91
+ return RedirectResponse(notion_redirect)
@@ -0,0 +1,35 @@
1
+ import logging
2
+ import os
3
+ import uuid
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+ AWS_ACCESS_KEY = os.getenv("AWS_ACCESS_KEY")
8
+ AWS_SECRET_KEY = os.getenv("AWS_SECRET_KEY")
9
+ # S3 supports serving assets via your domain. Khoj expects this to be used in production. To enable it:
10
+ # 1. Your bucket name for images should be of the form sub.domain.tld. For example, generated.khoj.dev
11
+ # 2. Add CNAME entry to your domain's DNS records pointing to the S3 bucket. For example, CNAME generated.khoj.dev generated-khoj-dev.s3.amazonaws.com
12
+ AWS_UPLOAD_IMAGE_BUCKET_NAME = os.getenv("AWS_IMAGE_UPLOAD_BUCKET")
13
+
14
+ aws_enabled = AWS_ACCESS_KEY is not None and AWS_SECRET_KEY is not None and AWS_UPLOAD_IMAGE_BUCKET_NAME is not None
15
+
16
+ if aws_enabled:
17
+ from boto3 import client
18
+
19
+ s3_client = client("s3", aws_access_key_id=AWS_ACCESS_KEY, aws_secret_access_key=AWS_SECRET_KEY)
20
+
21
+
22
+ def upload_image(image: bytes, user_id: uuid.UUID):
23
+ """Upload the image to the S3 bucket"""
24
+ if not aws_enabled:
25
+ logger.info("AWS is not enabled. Skipping image upload")
26
+ return None
27
+
28
+ image_key = f"{user_id}/{uuid.uuid4()}.webp"
29
+ try:
30
+ s3_client.put_object(Bucket=AWS_UPLOAD_IMAGE_BUCKET_NAME, Key=image_key, Body=image, ACL="public-read")
31
+ url = f"https://{AWS_UPLOAD_IMAGE_BUCKET_NAME}/{image_key}"
32
+ return url
33
+ except Exception as e:
34
+ logger.error(f"Failed to upload image to S3: {e}")
35
+ return None
@@ -0,0 +1,104 @@
1
+ import logging
2
+ import os
3
+ from datetime import datetime, timezone
4
+
5
+ from asgiref.sync import sync_to_async
6
+ from fastapi import APIRouter, Request
7
+ from starlette.authentication import requires
8
+
9
+ from khoj.database import adapters
10
+ from khoj.utils import state
11
+
12
+ # Stripe integration for Khoj Cloud Subscription
13
+ if state.billing_enabled:
14
+ import stripe
15
+
16
+ stripe.api_key = os.getenv("STRIPE_API_KEY")
17
+ endpoint_secret = os.getenv("STRIPE_SIGNING_SECRET")
18
+ logger = logging.getLogger(__name__)
19
+ subscription_router = APIRouter()
20
+
21
+
22
+ @subscription_router.post("")
23
+ async def subscribe(request: Request):
24
+ """Webhook for Stripe to send subscription events to Khoj Cloud"""
25
+ event = None
26
+ try:
27
+ payload = await request.body()
28
+ sig_header = request.headers["stripe-signature"]
29
+ event = stripe.Webhook.construct_event(payload, sig_header, endpoint_secret)
30
+ except ValueError as e:
31
+ # Invalid payload
32
+ raise e
33
+ except stripe.error.SignatureVerificationError as e:
34
+ # Invalid signature
35
+ raise e
36
+
37
+ event_type = event["type"]
38
+ if event_type not in {
39
+ "invoice.paid",
40
+ "customer.subscription.updated",
41
+ "customer.subscription.deleted",
42
+ }:
43
+ logger.warning(f"Unhandled Stripe event type: {event['type']}")
44
+ return {"success": False}
45
+
46
+ # Retrieve the customer's details
47
+ subscription = event["data"]["object"]
48
+ customer_id = subscription["customer"]
49
+ customer = stripe.Customer.retrieve(customer_id)
50
+ customer_email = customer["email"]
51
+
52
+ # Handle valid stripe webhook events
53
+ success = True
54
+ if event_type in {"invoice.paid"}:
55
+ # Mark the user as subscribed and update the next renewal date on payment
56
+ subscription = stripe.Subscription.list(customer=customer_id).data[0]
57
+ renewal_date = datetime.fromtimestamp(subscription["current_period_end"], tz=timezone.utc)
58
+ user = await adapters.set_user_subscription(customer_email, is_recurring=True, renewal_date=renewal_date)
59
+ success = user is not None
60
+ elif event_type in {"customer.subscription.updated"}:
61
+ user_subscription = await sync_to_async(adapters.get_user_subscription)(customer_email)
62
+ # Allow updating subscription status if paid user
63
+ if user_subscription and user_subscription.renewal_date:
64
+ # Mark user as unsubscribed or resubscribed
65
+ is_recurring = not subscription["cancel_at_period_end"]
66
+ updated_user = await adapters.set_user_subscription(customer_email, is_recurring=is_recurring)
67
+ success = updated_user is not None
68
+ elif event_type in {"customer.subscription.deleted"}:
69
+ # Reset the user to trial state
70
+ user = await adapters.set_user_subscription(
71
+ customer_email, is_recurring=False, renewal_date=False, type="trial"
72
+ )
73
+ success = user is not None
74
+
75
+ logger.info(f'Stripe subscription {event["type"]} for {customer_email}')
76
+ return {"success": success}
77
+
78
+
79
+ @subscription_router.patch("")
80
+ @requires(["authenticated"])
81
+ async def update_subscription(request: Request, email: str, operation: str):
82
+ # Retrieve the customer's details
83
+ customers = stripe.Customer.list(email=email).auto_paging_iter()
84
+ customer = next(customers, None)
85
+ if customer is None:
86
+ return {"success": False, "message": "Customer not found"}
87
+
88
+ if operation == "cancel":
89
+ customer_id = customer.id
90
+ for subscription in stripe.Subscription.list(customer=customer_id):
91
+ stripe.Subscription.modify(subscription.id, cancel_at_period_end=True)
92
+ return {"success": True}
93
+
94
+ elif operation == "resubscribe":
95
+ subscriptions = stripe.Subscription.list(customer=customer.id).auto_paging_iter()
96
+ # Find the subscription that is set to cancel at the end of the period
97
+ for subscription in subscriptions:
98
+ if subscription.cancel_at_period_end:
99
+ # Update the subscription to not cancel at the end of the period
100
+ stripe.Subscription.modify(subscription.id, cancel_at_period_end=False)
101
+ return {"success": True}
102
+ return {"success": False, "message": "No subscription found that is set to cancel"}
103
+
104
+ return {"success": False, "message": "Invalid operation"}
khoj/routers/twilio.py ADDED
@@ -0,0 +1,36 @@
1
+ import logging
2
+ import os
3
+
4
+ from khoj.database.models import KhojUser
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ account_sid = os.getenv("TWILIO_ACCOUNT_SID")
9
+ auth_token = os.getenv("TWILIO_AUTH_TOKEN")
10
+ verification_service_sid = os.getenv("TWILIO_VERIFICATION_SID")
11
+
12
+ twilio_enabled = account_sid is not None and auth_token is not None and verification_service_sid is not None
13
+ if twilio_enabled:
14
+ from twilio.rest import Client
15
+
16
+ client = Client(account_sid, auth_token)
17
+
18
+
19
+ def is_twilio_enabled():
20
+ return twilio_enabled
21
+
22
+
23
+ def create_otp(user: KhojUser):
24
+ """Create a new OTP for the user"""
25
+ verification = client.verify.v2.services(verification_service_sid).verifications.create(
26
+ to=str(user.phone_number), channel="whatsapp"
27
+ )
28
+ return verification.sid is not None
29
+
30
+
31
+ def verify_otp(user: KhojUser, code: str):
32
+ """Verify the OTP for the user"""
33
+ verification_check = client.verify.v2.services(verification_service_sid).verification_checks.create(
34
+ to=str(user.phone_number), code=code
35
+ )
36
+ return verification_check.status == "approved"