khoj 1.16.1.dev15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. khoj/__init__.py +0 -0
  2. khoj/app/README.md +94 -0
  3. khoj/app/__init__.py +0 -0
  4. khoj/app/asgi.py +16 -0
  5. khoj/app/settings.py +192 -0
  6. khoj/app/urls.py +25 -0
  7. khoj/configure.py +424 -0
  8. khoj/database/__init__.py +0 -0
  9. khoj/database/adapters/__init__.py +1234 -0
  10. khoj/database/admin.py +290 -0
  11. khoj/database/apps.py +6 -0
  12. khoj/database/management/__init__.py +0 -0
  13. khoj/database/management/commands/__init__.py +0 -0
  14. khoj/database/management/commands/change_generated_images_url.py +61 -0
  15. khoj/database/management/commands/convert_images_png_to_webp.py +99 -0
  16. khoj/database/migrations/0001_khojuser.py +98 -0
  17. khoj/database/migrations/0002_googleuser.py +32 -0
  18. khoj/database/migrations/0003_vector_extension.py +10 -0
  19. khoj/database/migrations/0004_content_types_and_more.py +181 -0
  20. khoj/database/migrations/0005_embeddings_corpus_id.py +19 -0
  21. khoj/database/migrations/0006_embeddingsdates.py +33 -0
  22. khoj/database/migrations/0007_add_conversation.py +27 -0
  23. khoj/database/migrations/0008_alter_conversation_conversation_log.py +17 -0
  24. khoj/database/migrations/0009_khojapiuser.py +24 -0
  25. khoj/database/migrations/0010_chatmodeloptions_and_more.py +83 -0
  26. khoj/database/migrations/0010_rename_embeddings_entry_and_more.py +30 -0
  27. khoj/database/migrations/0011_merge_20231102_0138.py +14 -0
  28. khoj/database/migrations/0012_entry_file_source.py +21 -0
  29. khoj/database/migrations/0013_subscription.py +37 -0
  30. khoj/database/migrations/0014_alter_googleuser_picture.py +17 -0
  31. khoj/database/migrations/0015_alter_subscription_user.py +21 -0
  32. khoj/database/migrations/0016_alter_subscription_renewal_date.py +17 -0
  33. khoj/database/migrations/0017_searchmodel.py +32 -0
  34. khoj/database/migrations/0018_searchmodelconfig_delete_searchmodel.py +30 -0
  35. khoj/database/migrations/0019_alter_googleuser_family_name_and_more.py +27 -0
  36. khoj/database/migrations/0020_reflectivequestion.py +36 -0
  37. khoj/database/migrations/0021_speechtotextmodeloptions_and_more.py +42 -0
  38. khoj/database/migrations/0022_texttoimagemodelconfig.py +25 -0
  39. khoj/database/migrations/0023_usersearchmodelconfig.py +33 -0
  40. khoj/database/migrations/0024_alter_entry_embeddings.py +18 -0
  41. khoj/database/migrations/0025_clientapplication_khojuser_phone_number_and_more.py +46 -0
  42. khoj/database/migrations/0025_searchmodelconfig_embeddings_inference_endpoint_and_more.py +22 -0
  43. khoj/database/migrations/0026_searchmodelconfig_cross_encoder_inference_endpoint_and_more.py +22 -0
  44. khoj/database/migrations/0027_merge_20240118_1324.py +13 -0
  45. khoj/database/migrations/0028_khojuser_verified_phone_number.py +17 -0
  46. khoj/database/migrations/0029_userrequests.py +27 -0
  47. khoj/database/migrations/0030_conversation_slug_and_title.py +38 -0
  48. khoj/database/migrations/0031_agent_conversation_agent.py +53 -0
  49. khoj/database/migrations/0031_alter_googleuser_locale.py +30 -0
  50. khoj/database/migrations/0032_merge_20240322_0427.py +14 -0
  51. khoj/database/migrations/0033_rename_tuning_agent_personality.py +17 -0
  52. khoj/database/migrations/0034_alter_chatmodeloptions_chat_model.py +32 -0
  53. khoj/database/migrations/0035_processlock.py +26 -0
  54. khoj/database/migrations/0036_alter_processlock_name.py +19 -0
  55. khoj/database/migrations/0036_delete_offlinechatprocessorconversationconfig.py +15 -0
  56. khoj/database/migrations/0036_publicconversation.py +42 -0
  57. khoj/database/migrations/0037_chatmodeloptions_openai_config_and_more.py +51 -0
  58. khoj/database/migrations/0037_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +32 -0
  59. khoj/database/migrations/0038_merge_20240425_0857.py +14 -0
  60. khoj/database/migrations/0038_merge_20240426_1640.py +12 -0
  61. khoj/database/migrations/0039_merge_20240501_0301.py +12 -0
  62. khoj/database/migrations/0040_alter_processlock_name.py +26 -0
  63. khoj/database/migrations/0040_merge_20240504_1010.py +14 -0
  64. khoj/database/migrations/0041_merge_20240505_1234.py +14 -0
  65. khoj/database/migrations/0042_serverchatsettings.py +46 -0
  66. khoj/database/migrations/0043_alter_chatmodeloptions_model_type.py +21 -0
  67. khoj/database/migrations/0044_conversation_file_filters.py +17 -0
  68. khoj/database/migrations/0045_fileobject.py +37 -0
  69. khoj/database/migrations/0046_khojuser_email_verification_code_and_more.py +22 -0
  70. khoj/database/migrations/0047_alter_entry_file_type.py +31 -0
  71. khoj/database/migrations/0048_voicemodeloption_uservoicemodelconfig.py +52 -0
  72. khoj/database/migrations/0049_datastore.py +38 -0
  73. khoj/database/migrations/0049_texttoimagemodelconfig_api_key_and_more.py +58 -0
  74. khoj/database/migrations/0050_alter_processlock_name.py +25 -0
  75. khoj/database/migrations/0051_merge_20240702_1220.py +14 -0
  76. khoj/database/migrations/0052_alter_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +27 -0
  77. khoj/database/migrations/__init__.py +0 -0
  78. khoj/database/models/__init__.py +402 -0
  79. khoj/database/tests.py +3 -0
  80. khoj/interface/email/feedback.html +34 -0
  81. khoj/interface/email/magic_link.html +17 -0
  82. khoj/interface/email/task.html +40 -0
  83. khoj/interface/email/welcome.html +61 -0
  84. khoj/interface/web/404.html +56 -0
  85. khoj/interface/web/agent.html +312 -0
  86. khoj/interface/web/agents.html +276 -0
  87. khoj/interface/web/assets/icons/agents.svg +6 -0
  88. khoj/interface/web/assets/icons/automation.svg +37 -0
  89. khoj/interface/web/assets/icons/cancel.svg +3 -0
  90. khoj/interface/web/assets/icons/chat.svg +24 -0
  91. khoj/interface/web/assets/icons/collapse.svg +17 -0
  92. khoj/interface/web/assets/icons/computer.png +0 -0
  93. khoj/interface/web/assets/icons/confirm-icon.svg +1 -0
  94. khoj/interface/web/assets/icons/copy-button-success.svg +6 -0
  95. khoj/interface/web/assets/icons/copy-button.svg +5 -0
  96. khoj/interface/web/assets/icons/credit-card.png +0 -0
  97. khoj/interface/web/assets/icons/delete.svg +26 -0
  98. khoj/interface/web/assets/icons/docx.svg +7 -0
  99. khoj/interface/web/assets/icons/edit.svg +4 -0
  100. khoj/interface/web/assets/icons/favicon-128x128.ico +0 -0
  101. khoj/interface/web/assets/icons/favicon-128x128.png +0 -0
  102. khoj/interface/web/assets/icons/favicon-256x256.png +0 -0
  103. khoj/interface/web/assets/icons/favicon.icns +0 -0
  104. khoj/interface/web/assets/icons/github.svg +1 -0
  105. khoj/interface/web/assets/icons/key.svg +4 -0
  106. khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
  107. khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
  108. khoj/interface/web/assets/icons/khoj-logo-sideways.svg +5385 -0
  109. khoj/interface/web/assets/icons/logotype.svg +1 -0
  110. khoj/interface/web/assets/icons/markdown.svg +1 -0
  111. khoj/interface/web/assets/icons/new.svg +23 -0
  112. khoj/interface/web/assets/icons/notion.svg +4 -0
  113. khoj/interface/web/assets/icons/openai-logomark.svg +1 -0
  114. khoj/interface/web/assets/icons/org.svg +1 -0
  115. khoj/interface/web/assets/icons/pdf.svg +23 -0
  116. khoj/interface/web/assets/icons/pencil-edit.svg +5 -0
  117. khoj/interface/web/assets/icons/plaintext.svg +1 -0
  118. khoj/interface/web/assets/icons/question-mark-icon.svg +1 -0
  119. khoj/interface/web/assets/icons/search.svg +25 -0
  120. khoj/interface/web/assets/icons/send.svg +1 -0
  121. khoj/interface/web/assets/icons/share.svg +8 -0
  122. khoj/interface/web/assets/icons/speaker.svg +4 -0
  123. khoj/interface/web/assets/icons/stop-solid.svg +37 -0
  124. khoj/interface/web/assets/icons/sync.svg +4 -0
  125. khoj/interface/web/assets/icons/thumbs-down-svgrepo-com.svg +6 -0
  126. khoj/interface/web/assets/icons/thumbs-up-svgrepo-com.svg +6 -0
  127. khoj/interface/web/assets/icons/user-silhouette.svg +4 -0
  128. khoj/interface/web/assets/icons/voice.svg +8 -0
  129. khoj/interface/web/assets/icons/web.svg +2 -0
  130. khoj/interface/web/assets/icons/whatsapp.svg +17 -0
  131. khoj/interface/web/assets/khoj.css +237 -0
  132. khoj/interface/web/assets/markdown-it.min.js +8476 -0
  133. khoj/interface/web/assets/natural-cron.min.js +1 -0
  134. khoj/interface/web/assets/org.min.js +1823 -0
  135. khoj/interface/web/assets/pico.min.css +5 -0
  136. khoj/interface/web/assets/purify.min.js +3 -0
  137. khoj/interface/web/assets/samples/desktop-browse-draw-sample.png +0 -0
  138. khoj/interface/web/assets/samples/desktop-plain-chat-sample.png +0 -0
  139. khoj/interface/web/assets/samples/desktop-remember-plan-sample.png +0 -0
  140. khoj/interface/web/assets/samples/phone-browse-draw-sample.png +0 -0
  141. khoj/interface/web/assets/samples/phone-plain-chat-sample.png +0 -0
  142. khoj/interface/web/assets/samples/phone-remember-plan-sample.png +0 -0
  143. khoj/interface/web/assets/utils.js +33 -0
  144. khoj/interface/web/base_config.html +445 -0
  145. khoj/interface/web/chat.html +3546 -0
  146. khoj/interface/web/config.html +1011 -0
  147. khoj/interface/web/config_automation.html +1103 -0
  148. khoj/interface/web/content_source_computer_input.html +139 -0
  149. khoj/interface/web/content_source_github_input.html +216 -0
  150. khoj/interface/web/content_source_notion_input.html +94 -0
  151. khoj/interface/web/khoj.webmanifest +51 -0
  152. khoj/interface/web/login.html +219 -0
  153. khoj/interface/web/public_conversation.html +2006 -0
  154. khoj/interface/web/search.html +470 -0
  155. khoj/interface/web/utils.html +48 -0
  156. khoj/main.py +241 -0
  157. khoj/manage.py +22 -0
  158. khoj/migrations/__init__.py +0 -0
  159. khoj/migrations/migrate_offline_chat_default_model.py +69 -0
  160. khoj/migrations/migrate_offline_chat_default_model_2.py +71 -0
  161. khoj/migrations/migrate_offline_chat_schema.py +83 -0
  162. khoj/migrations/migrate_offline_model.py +29 -0
  163. khoj/migrations/migrate_processor_config_openai.py +67 -0
  164. khoj/migrations/migrate_server_pg.py +138 -0
  165. khoj/migrations/migrate_version.py +17 -0
  166. khoj/processor/__init__.py +0 -0
  167. khoj/processor/content/__init__.py +0 -0
  168. khoj/processor/content/docx/__init__.py +0 -0
  169. khoj/processor/content/docx/docx_to_entries.py +110 -0
  170. khoj/processor/content/github/__init__.py +0 -0
  171. khoj/processor/content/github/github_to_entries.py +224 -0
  172. khoj/processor/content/images/__init__.py +0 -0
  173. khoj/processor/content/images/image_to_entries.py +118 -0
  174. khoj/processor/content/markdown/__init__.py +0 -0
  175. khoj/processor/content/markdown/markdown_to_entries.py +165 -0
  176. khoj/processor/content/notion/notion_to_entries.py +260 -0
  177. khoj/processor/content/org_mode/__init__.py +0 -0
  178. khoj/processor/content/org_mode/org_to_entries.py +231 -0
  179. khoj/processor/content/org_mode/orgnode.py +532 -0
  180. khoj/processor/content/pdf/__init__.py +0 -0
  181. khoj/processor/content/pdf/pdf_to_entries.py +116 -0
  182. khoj/processor/content/plaintext/__init__.py +0 -0
  183. khoj/processor/content/plaintext/plaintext_to_entries.py +122 -0
  184. khoj/processor/content/text_to_entries.py +297 -0
  185. khoj/processor/conversation/__init__.py +0 -0
  186. khoj/processor/conversation/anthropic/__init__.py +0 -0
  187. khoj/processor/conversation/anthropic/anthropic_chat.py +206 -0
  188. khoj/processor/conversation/anthropic/utils.py +114 -0
  189. khoj/processor/conversation/offline/__init__.py +0 -0
  190. khoj/processor/conversation/offline/chat_model.py +231 -0
  191. khoj/processor/conversation/offline/utils.py +78 -0
  192. khoj/processor/conversation/offline/whisper.py +15 -0
  193. khoj/processor/conversation/openai/__init__.py +0 -0
  194. khoj/processor/conversation/openai/gpt.py +187 -0
  195. khoj/processor/conversation/openai/utils.py +129 -0
  196. khoj/processor/conversation/openai/whisper.py +13 -0
  197. khoj/processor/conversation/prompts.py +758 -0
  198. khoj/processor/conversation/utils.py +262 -0
  199. khoj/processor/embeddings.py +117 -0
  200. khoj/processor/speech/__init__.py +0 -0
  201. khoj/processor/speech/text_to_speech.py +51 -0
  202. khoj/processor/tools/__init__.py +0 -0
  203. khoj/processor/tools/online_search.py +225 -0
  204. khoj/routers/__init__.py +0 -0
  205. khoj/routers/api.py +626 -0
  206. khoj/routers/api_agents.py +43 -0
  207. khoj/routers/api_chat.py +1180 -0
  208. khoj/routers/api_config.py +434 -0
  209. khoj/routers/api_phone.py +86 -0
  210. khoj/routers/auth.py +181 -0
  211. khoj/routers/email.py +133 -0
  212. khoj/routers/helpers.py +1188 -0
  213. khoj/routers/indexer.py +349 -0
  214. khoj/routers/notion.py +91 -0
  215. khoj/routers/storage.py +35 -0
  216. khoj/routers/subscription.py +104 -0
  217. khoj/routers/twilio.py +36 -0
  218. khoj/routers/web_client.py +471 -0
  219. khoj/search_filter/__init__.py +0 -0
  220. khoj/search_filter/base_filter.py +15 -0
  221. khoj/search_filter/date_filter.py +217 -0
  222. khoj/search_filter/file_filter.py +30 -0
  223. khoj/search_filter/word_filter.py +29 -0
  224. khoj/search_type/__init__.py +0 -0
  225. khoj/search_type/text_search.py +241 -0
  226. khoj/utils/__init__.py +0 -0
  227. khoj/utils/cli.py +93 -0
  228. khoj/utils/config.py +81 -0
  229. khoj/utils/constants.py +24 -0
  230. khoj/utils/fs_syncer.py +249 -0
  231. khoj/utils/helpers.py +418 -0
  232. khoj/utils/initialization.py +146 -0
  233. khoj/utils/jsonl.py +43 -0
  234. khoj/utils/models.py +47 -0
  235. khoj/utils/rawconfig.py +160 -0
  236. khoj/utils/state.py +46 -0
  237. khoj/utils/yaml.py +43 -0
  238. khoj-1.16.1.dev15.dist-info/METADATA +178 -0
  239. khoj-1.16.1.dev15.dist-info/RECORD +242 -0
  240. khoj-1.16.1.dev15.dist-info/WHEEL +4 -0
  241. khoj-1.16.1.dev15.dist-info/entry_points.txt +2 -0
  242. khoj-1.16.1.dev15.dist-info/licenses/LICENSE +661 -0
khoj/main.py ADDED
@@ -0,0 +1,241 @@
1
+ """ Main module for Khoj Assistant
2
+ isort:skip_file
3
+ """
4
+
5
+ from contextlib import redirect_stdout
6
+ import logging
7
+ import io
8
+ import os
9
+ import atexit
10
+ import sys
11
+ import locale
12
+
13
+ from rich.logging import RichHandler
14
+ import threading
15
+ import warnings
16
+ from importlib.metadata import version
17
+
18
+ from khoj.utils.helpers import in_debug_mode, is_env_var_true
19
+
20
+ # Ignore non-actionable warnings
21
+ warnings.filterwarnings("ignore", message=r"snapshot_download.py has been made private", category=FutureWarning)
22
+ warnings.filterwarnings("ignore", message=r"legacy way to download files from the HF hub,", category=FutureWarning)
23
+
24
+
25
+ import uvicorn
26
+ import django
27
+ from apscheduler.schedulers.background import BackgroundScheduler
28
+ from fastapi import FastAPI
29
+ from fastapi.middleware.cors import CORSMiddleware
30
+ from fastapi.staticfiles import StaticFiles
31
+ import schedule
32
+
33
+ from django.core.asgi import get_asgi_application
34
+ from django.core.management import call_command
35
+
36
+ # Initialize Django
37
+ os.environ.setdefault("DJANGO_SETTINGS_MODULE", "khoj.app.settings")
38
+ django.setup()
39
+
40
+ # Setup Logger
41
+ rich_handler = RichHandler(rich_tracebacks=True)
42
+ rich_handler.setFormatter(fmt=logging.Formatter(fmt="%(name)s: %(message)s", datefmt="[%H:%M:%S.%f]"))
43
+ logging.basicConfig(handlers=[rich_handler])
44
+
45
+ logging.getLogger("uvicorn.error").setLevel(logging.INFO)
46
+
47
+ logger = logging.getLogger("khoj")
48
+
49
+ # Initialize Django Database
50
+ db_migrate_output = io.StringIO()
51
+ with redirect_stdout(db_migrate_output):
52
+ call_command("migrate", "--noinput")
53
+
54
+ # Initialize Django Static Files
55
+ collectstatic_output = io.StringIO()
56
+ with redirect_stdout(collectstatic_output):
57
+ call_command("collectstatic", "--noinput")
58
+
59
+ # Initialize the Application Server
60
+ if in_debug_mode():
61
+ app = FastAPI(debug=True)
62
+ else:
63
+ app = FastAPI(docs_url=None) # Disable Swagger UI in production
64
+
65
+ # Get Django Application
66
+ django_app = get_asgi_application()
67
+
68
+ # Add CORS middleware
69
+ KHOJ_DOMAIN = os.getenv("KHOJ_DOMAIN", "app.khoj.dev")
70
+ scheme = "https" if not is_env_var_true("KHOJ_NO_HTTPS") else "http"
71
+ custom_origins = [f"{scheme}://{KHOJ_DOMAIN.strip()}", f"{scheme}://{KHOJ_DOMAIN.strip()}:*"]
72
+ default_origins = [
73
+ "app://obsidian.md", # To allow access from Obsidian desktop app
74
+ "capacitor://localhost", # To allow access from Obsidian iOS app using Capacitor.JS
75
+ "http://localhost", # To allow access from Obsidian Android app
76
+ "http://localhost:*", # To allow access from localhost
77
+ "http://127.0.0.1:*", # To allow access from localhost
78
+ "app://khoj.dev", # To allow access from Khoj desktop app
79
+ ]
80
+
81
+ app.add_middleware(
82
+ CORSMiddleware,
83
+ allow_origins=default_origins + custom_origins,
84
+ allow_credentials=True,
85
+ allow_methods=["*"],
86
+ allow_headers=["*"],
87
+ )
88
+
89
+ # Set Locale
90
+ locale.setlocale(locale.LC_ALL, "")
91
+
92
+ # We import these packages after setting up Django so that Django features are accessible to the app.
93
+ from khoj.configure import configure_routes, initialize_server, configure_middleware
94
+ from khoj.utils import state
95
+ from khoj.utils.cli import cli
96
+ from khoj.utils.initialization import initialization
97
+ from khoj.database.adapters import ProcessLockAdapters
98
+ from khoj.database.models import ProcessLock
99
+
100
+ from django.db.utils import IntegrityError
101
+
102
+ SCHEDULE_LEADER_NAME = ProcessLock.Operation.SCHEDULE_LEADER
103
+
104
+
105
+ def shutdown_scheduler():
106
+ logger.info("🌑 Shutting down Khoj")
107
+
108
+ if state.schedule_leader_process_lock:
109
+ logger.info("🔓 Schedule Leader released")
110
+ ProcessLockAdapters.remove_process_lock(state.schedule_leader_process_lock)
111
+
112
+ state.scheduler.shutdown()
113
+
114
+
115
+ def run(should_start_server=True):
116
+ # Turn Tokenizers Parallelism Off. App does not support it.
117
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
118
+
119
+ # Load config from CLI
120
+ state.cli_args = sys.argv[1:]
121
+ args = cli(state.cli_args)
122
+ set_state(args)
123
+
124
+ # Set Logging Level
125
+ if args.verbose == 0:
126
+ logger.setLevel(logging.INFO)
127
+ elif args.verbose >= 1:
128
+ logger.setLevel(logging.DEBUG)
129
+
130
+ logger.info(f"🚒 Initializing Khoj v{state.khoj_version}")
131
+ logger.info(f"📦 Initializing DB:\n{db_migrate_output.getvalue().strip()}")
132
+ logger.debug(f"🌍 Initializing Web Client:\n{collectstatic_output.getvalue().strip()}")
133
+
134
+ initialization()
135
+
136
+ # Create app directory, if it doesn't exist
137
+ state.config_file.parent.mkdir(parents=True, exist_ok=True)
138
+
139
+ # Set Log File
140
+ fh = logging.FileHandler(state.config_file.parent / "khoj.log", encoding="utf-8")
141
+ fh.setLevel(logging.DEBUG)
142
+ logger.addHandler(fh)
143
+
144
+ logger.info("🌘 Starting Khoj")
145
+
146
+ # Setup task scheduler
147
+ poll_task_scheduler()
148
+
149
+ # Setup Background Scheduler
150
+ from django_apscheduler.jobstores import DjangoJobStore
151
+
152
+ state.scheduler = BackgroundScheduler(
153
+ {
154
+ "apscheduler.timezone": "UTC",
155
+ "apscheduler.job_defaults.misfire_grace_time": "60", # Useful to run scheduled jobs even when worker delayed because it was busy or down
156
+ "apscheduler.job_defaults.coalesce": "true", # Combine multiple jobs into one if they are scheduled at the same time
157
+ }
158
+ )
159
+ state.scheduler.add_jobstore(DjangoJobStore(), "default")
160
+
161
+ # We use this mechanism to only elect one schedule leader in a distributed environment. This one will be responsible for actually executing the scheduled tasks. The others will still be capable of adding and removing tasks, but they will not execute them. This is to decrease the overall burden on the database and the system.
162
+ try:
163
+ schedule_leader_process_lock = ProcessLockAdapters.get_process_lock(SCHEDULE_LEADER_NAME)
164
+ if schedule_leader_process_lock:
165
+ logger.info("🔒 Schedule Leader is already running")
166
+ state.scheduler.start(paused=True)
167
+ else:
168
+ logger.info("🔒 Schedule Leader elected")
169
+ created_process_lock = ProcessLockAdapters.set_process_lock(
170
+ SCHEDULE_LEADER_NAME, max_duration_in_seconds=43200
171
+ )
172
+ state.scheduler.start()
173
+ state.schedule_leader_process_lock = created_process_lock
174
+ except IntegrityError:
175
+ logger.info("🔒 Schedule Leader running elsewhere")
176
+ state.scheduler.start(paused=True)
177
+ finally:
178
+ logger.info("Started Background Scheduler")
179
+
180
+ # Start Server
181
+ configure_routes(app)
182
+
183
+ # Mount Django and Static Files
184
+ app.mount("/server", django_app, name="server")
185
+ static_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static")
186
+ if not os.path.exists(static_dir):
187
+ os.mkdir(static_dir)
188
+ app.mount(f"/static", StaticFiles(directory=static_dir), name=static_dir)
189
+
190
+ # Configure Middleware
191
+ configure_middleware(app)
192
+
193
+ initialize_server(args.config)
194
+
195
+ # If the server is started through gunicorn (external to the script), don't start the server
196
+ if should_start_server:
197
+ start_server(app, host=args.host, port=args.port, socket=args.socket)
198
+ # Teardown
199
+ shutdown_scheduler()
200
+
201
+
202
+ def set_state(args):
203
+ state.config_file = args.config_file
204
+ state.config = args.config
205
+ state.verbose = args.verbose
206
+ state.host = args.host
207
+ state.port = args.port
208
+ state.anonymous_mode = args.anonymous_mode
209
+ state.khoj_version = version("khoj")
210
+ state.chat_on_gpu = args.chat_on_gpu
211
+
212
+
213
+ def start_server(app, host=None, port=None, socket=None):
214
+ logger.info("🌖 Khoj is ready to use")
215
+ if socket:
216
+ uvicorn.run(app, proxy_headers=True, uds=socket, log_level="debug", use_colors=True, log_config=None)
217
+ else:
218
+ uvicorn.run(
219
+ app,
220
+ host=host,
221
+ port=port,
222
+ log_level="debug" if state.verbose > 1 else "info",
223
+ use_colors=True,
224
+ log_config=None,
225
+ timeout_keep_alive=60,
226
+ )
227
+ logger.info("🌒 Stopping Khoj")
228
+
229
+
230
+ def poll_task_scheduler():
231
+ timer_thread = threading.Timer(60.0, poll_task_scheduler)
232
+ timer_thread.daemon = True
233
+ timer_thread.start()
234
+ schedule.run_pending()
235
+
236
+
237
+ if __name__ == "__main__":
238
+ run()
239
+ else:
240
+ run(should_start_server=False)
241
+ atexit.register(shutdown_scheduler)
khoj/manage.py ADDED
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env python
2
+ """Django's command-line utility for administrative tasks."""
3
+ import os
4
+ import sys
5
+
6
+
7
+ def main():
8
+ """Run administrative tasks."""
9
+ os.environ.setdefault("DJANGO_SETTINGS_MODULE", "khoj.app.settings")
10
+ try:
11
+ from django.core.management import execute_from_command_line
12
+ except ImportError as exc:
13
+ raise ImportError(
14
+ "Couldn't import Django. Are you sure it's installed and "
15
+ "available on your PYTHONPATH environment variable? Did you "
16
+ "forget to activate a virtual environment?"
17
+ ) from exc
18
+ execute_from_command_line(sys.argv)
19
+
20
+
21
+ if __name__ == "__main__":
22
+ main()
File without changes
@@ -0,0 +1,69 @@
1
+ """
2
+ Current format of khoj.yml
3
+ ---
4
+ app:
5
+ ...
6
+ content-type:
7
+ ...
8
+ processor:
9
+ conversation:
10
+ offline-chat:
11
+ enable-offline-chat: false
12
+ chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin
13
+ ...
14
+ search-type:
15
+ ...
16
+
17
+ New format of khoj.yml
18
+ ---
19
+ app:
20
+ ...
21
+ content-type:
22
+ ...
23
+ processor:
24
+ conversation:
25
+ offline-chat:
26
+ enable-offline-chat: false
27
+ chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
28
+ ...
29
+ search-type:
30
+ ...
31
+ """
32
+ import logging
33
+
34
+ from packaging import version
35
+
36
+ from khoj.utils.yaml import load_config_from_file, save_config_to_file
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+
41
+ def migrate_offline_chat_default_model(args):
42
+ schema_version = "0.12.4"
43
+ raw_config = load_config_from_file(args.config_file)
44
+ previous_version = raw_config.get("version")
45
+
46
+ if "processor" not in raw_config:
47
+ return args
48
+ if raw_config["processor"] is None:
49
+ return args
50
+ if "conversation" not in raw_config["processor"]:
51
+ return args
52
+ if "offline-chat" not in raw_config["processor"]["conversation"]:
53
+ return args
54
+ if "chat-model" not in raw_config["processor"]["conversation"]["offline-chat"]:
55
+ return args
56
+
57
+ if previous_version is None or version.parse(previous_version) < version.parse("0.12.4"):
58
+ logger.info(
59
+ f"Upgrading config schema to {schema_version} from {previous_version} to change default (offline) chat model to mistral GGUF"
60
+ )
61
+ raw_config["version"] = schema_version
62
+
63
+ # Update offline chat model to mistral in GGUF format to use latest GPT4All
64
+ offline_chat_model = raw_config["processor"]["conversation"]["offline-chat"]["chat-model"]
65
+ if offline_chat_model.endswith(".bin"):
66
+ raw_config["processor"]["conversation"]["offline-chat"]["chat-model"] = "mistral-7b-instruct-v0.1.Q4_0.gguf"
67
+
68
+ save_config_to_file(raw_config, args.config_file)
69
+ return args
@@ -0,0 +1,71 @@
1
+ """
2
+ Current format of khoj.yml
3
+ ---
4
+ app:
5
+ ...
6
+ content-type:
7
+ ...
8
+ processor:
9
+ conversation:
10
+ offline-chat:
11
+ enable-offline-chat: false
12
+ chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
13
+ ...
14
+ search-type:
15
+ ...
16
+
17
+ New format of khoj.yml
18
+ ---
19
+ app:
20
+ ...
21
+ content-type:
22
+ ...
23
+ processor:
24
+ conversation:
25
+ offline-chat:
26
+ enable-offline-chat: false
27
+ chat-model: NousResearch/Hermes-2-Pro-Mistral-7B-GGUF
28
+ ...
29
+ search-type:
30
+ ...
31
+ """
32
+ import logging
33
+
34
+ from packaging import version
35
+
36
+ from khoj.utils.yaml import load_config_from_file, save_config_to_file
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+
41
+ def migrate_offline_chat_default_model(args):
42
+ schema_version = "1.7.0"
43
+ raw_config = load_config_from_file(args.config_file)
44
+ previous_version = raw_config.get("version")
45
+
46
+ if "processor" not in raw_config:
47
+ return args
48
+ if raw_config["processor"] is None:
49
+ return args
50
+ if "conversation" not in raw_config["processor"]:
51
+ return args
52
+ if "offline-chat" not in raw_config["processor"]["conversation"]:
53
+ return args
54
+ if "chat-model" not in raw_config["processor"]["conversation"]["offline-chat"]:
55
+ return args
56
+
57
+ if previous_version is None or version.parse(previous_version) < version.parse(schema_version):
58
+ logger.info(
59
+ f"Upgrading config schema to {schema_version} from {previous_version} to change default (offline) chat model to mistral GGUF"
60
+ )
61
+ raw_config["version"] = schema_version
62
+
63
+ # Update offline chat model to use Nous Research's Hermes-2-Pro GGUF in path format suitable for llama-cpp
64
+ offline_chat_model = raw_config["processor"]["conversation"]["offline-chat"]["chat-model"]
65
+ if offline_chat_model == "mistral-7b-instruct-v0.1.Q4_0.gguf":
66
+ raw_config["processor"]["conversation"]["offline-chat"][
67
+ "chat-model"
68
+ ] = "NousResearch/Hermes-2-Pro-Mistral-7B-GGUF"
69
+
70
+ save_config_to_file(raw_config, args.config_file)
71
+ return args
@@ -0,0 +1,83 @@
1
+ """
2
+ Current format of khoj.yml
3
+ ---
4
+ app:
5
+ ...
6
+ content-type:
7
+ ...
8
+ processor:
9
+ conversation:
10
+ enable-offline-chat: false
11
+ conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
12
+ openai:
13
+ ...
14
+ search-type:
15
+ ...
16
+
17
+ New format of khoj.yml
18
+ ---
19
+ app:
20
+ ...
21
+ content-type:
22
+ ...
23
+ processor:
24
+ conversation:
25
+ offline-chat:
26
+ enable-offline-chat: false
27
+ chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin
28
+ tokenizer: null
29
+ max_prompt_size: null
30
+ conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
31
+ openai:
32
+ ...
33
+ search-type:
34
+ ...
35
+ """
36
+ import logging
37
+
38
+ from packaging import version
39
+
40
+ from khoj.utils.yaml import load_config_from_file, save_config_to_file
41
+
42
+ logger = logging.getLogger(__name__)
43
+
44
+
45
+ def migrate_offline_chat_schema(args):
46
+ schema_version = "0.12.3"
47
+ raw_config = load_config_from_file(args.config_file)
48
+ previous_version = raw_config.get("version")
49
+
50
+ if "processor" not in raw_config:
51
+ return args
52
+ if raw_config["processor"] is None:
53
+ return args
54
+ if "conversation" not in raw_config["processor"]:
55
+ return args
56
+
57
+ if previous_version is None or version.parse(previous_version) < version.parse("0.12.3"):
58
+ logger.info(
59
+ f"Upgrading config schema to {schema_version} from {previous_version} to make (offline) chat more configuration"
60
+ )
61
+ raw_config["version"] = schema_version
62
+
63
+ # Create max-prompt-size field in conversation processor schema
64
+ raw_config["processor"]["conversation"]["max-prompt-size"] = None
65
+ raw_config["processor"]["conversation"]["tokenizer"] = None
66
+
67
+ # Create offline chat schema based on existing enable_offline_chat field in khoj config schema
68
+ offline_chat_model = (
69
+ raw_config["processor"]["conversation"]
70
+ .get("offline-chat", {})
71
+ .get("chat-model", "llama-2-7b-chat.ggmlv3.q4_0.bin")
72
+ )
73
+ raw_config["processor"]["conversation"]["offline-chat"] = {
74
+ "enable-offline-chat": raw_config["processor"]["conversation"].get("enable-offline-chat", False),
75
+ "chat-model": offline_chat_model,
76
+ }
77
+
78
+ # Delete old enable-offline-chat field from conversation processor schema
79
+ if "enable-offline-chat" in raw_config["processor"]["conversation"]:
80
+ del raw_config["processor"]["conversation"]["enable-offline-chat"]
81
+
82
+ save_config_to_file(raw_config, args.config_file)
83
+ return args
@@ -0,0 +1,29 @@
1
+ import logging
2
+ import os
3
+
4
+ from packaging import version
5
+
6
+ from khoj.utils.yaml import load_config_from_file, save_config_to_file
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ def migrate_offline_model(args):
12
+ schema_version = "0.10.1"
13
+ raw_config = load_config_from_file(args.config_file)
14
+ previous_version = raw_config.get("version")
15
+
16
+ if previous_version is None or version.parse(previous_version) < version.parse("0.10.1"):
17
+ logger.info(
18
+ f"Migrating offline model used for version {previous_version} to latest version for {args.version_no}"
19
+ )
20
+ raw_config["version"] = schema_version
21
+
22
+ # If the user has downloaded the offline model, remove it from the cache.
23
+ offline_model_path = os.path.expanduser("~/.cache/gpt4all/llama-2-7b-chat.ggmlv3.q4_K_S.bin")
24
+ if os.path.exists(offline_model_path):
25
+ os.remove(offline_model_path)
26
+
27
+ save_config_to_file(raw_config, args.config_file)
28
+
29
+ return args
@@ -0,0 +1,67 @@
1
+ """
2
+ Current format of khoj.yml
3
+ ---
4
+ app:
5
+ should-log-telemetry: true
6
+ content-type:
7
+ ...
8
+ processor:
9
+ conversation:
10
+ chat-model: gpt-3.5-turbo
11
+ conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
12
+ model: text-davinci-003
13
+ openai-api-key: sk-secret-key
14
+ search-type:
15
+ ...
16
+
17
+ New format of khoj.yml
18
+ ---
19
+ app:
20
+ should-log-telemetry: true
21
+ content-type:
22
+ ...
23
+ processor:
24
+ conversation:
25
+ openai:
26
+ chat-model: gpt-3.5-turbo
27
+ openai-api-key: sk-secret-key
28
+ conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
29
+ enable-offline-chat: false
30
+ search-type:
31
+ ...
32
+ """
33
+ from khoj.utils.yaml import load_config_from_file, save_config_to_file
34
+
35
+
36
+ def migrate_processor_conversation_schema(args):
37
+ schema_version = "0.10.0"
38
+ raw_config = load_config_from_file(args.config_file)
39
+
40
+ if "processor" not in raw_config:
41
+ return args
42
+ if raw_config["processor"] is None:
43
+ return args
44
+ if "conversation" not in raw_config["processor"]:
45
+ return args
46
+
47
+ current_openai_api_key = raw_config["processor"]["conversation"].get("openai-api-key", None)
48
+ current_chat_model = raw_config["processor"]["conversation"].get("chat-model", None)
49
+ if current_openai_api_key is None and current_chat_model is None:
50
+ return args
51
+
52
+ raw_config["version"] = schema_version
53
+
54
+ # Add enable_offline_chat to khoj config schema
55
+ if "enable-offline-chat" not in raw_config["processor"]["conversation"]:
56
+ raw_config["processor"]["conversation"]["enable-offline-chat"] = False
57
+
58
+ # Update conversation processor schema
59
+ conversation_logfile = raw_config["processor"]["conversation"].get("conversation-logfile", None)
60
+ raw_config["processor"]["conversation"] = {
61
+ "openai": {"chat-model": current_chat_model, "api-key": current_openai_api_key},
62
+ "conversation-logfile": conversation_logfile,
63
+ "enable-offline-chat": False,
64
+ }
65
+
66
+ save_config_to_file(raw_config, args.config_file)
67
+ return args