khoj 1.16.1.dev25__py3-none-any.whl → 1.17.1.dev216__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/configure.py +6 -6
- khoj/database/adapters/__init__.py +55 -26
- khoj/database/migrations/0053_agent_style_color_agent_style_icon.py +61 -0
- khoj/database/models/__init__.py +35 -0
- khoj/interface/web/assets/icons/favicon-128x128.png +0 -0
- khoj/interface/web/assets/icons/favicon-256x256.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways.svg +31 -5384
- khoj/interface/web/assets/icons/khoj.svg +26 -0
- khoj/interface/web/chat.html +191 -301
- khoj/interface/web/content_source_computer_input.html +3 -3
- khoj/interface/web/content_source_github_input.html +1 -1
- khoj/interface/web/content_source_notion_input.html +1 -1
- khoj/interface/web/public_conversation.html +1 -1
- khoj/interface/web/search.html +2 -2
- khoj/interface/web/{config.html → settings.html} +30 -30
- khoj/interface/web/utils.html +1 -1
- khoj/processor/content/docx/docx_to_entries.py +4 -9
- khoj/processor/content/github/github_to_entries.py +1 -3
- khoj/processor/content/images/image_to_entries.py +4 -9
- khoj/processor/content/markdown/markdown_to_entries.py +4 -9
- khoj/processor/content/notion/notion_to_entries.py +1 -3
- khoj/processor/content/org_mode/org_to_entries.py +4 -9
- khoj/processor/content/pdf/pdf_to_entries.py +4 -9
- khoj/processor/content/plaintext/plaintext_to_entries.py +4 -9
- khoj/processor/content/text_to_entries.py +1 -3
- khoj/processor/conversation/utils.py +0 -4
- khoj/processor/tools/online_search.py +13 -7
- khoj/routers/api.py +58 -9
- khoj/routers/api_agents.py +3 -1
- khoj/routers/api_chat.py +335 -562
- khoj/routers/api_content.py +538 -0
- khoj/routers/api_model.py +156 -0
- khoj/routers/helpers.py +338 -23
- khoj/routers/notion.py +2 -8
- khoj/routers/web_client.py +43 -256
- khoj/search_type/text_search.py +5 -4
- khoj/utils/fs_syncer.py +4 -2
- khoj/utils/rawconfig.py +6 -1
- {khoj-1.16.1.dev25.dist-info → khoj-1.17.1.dev216.dist-info}/METADATA +2 -2
- {khoj-1.16.1.dev25.dist-info → khoj-1.17.1.dev216.dist-info}/RECORD +45 -43
- khoj/routers/api_config.py +0 -434
- khoj/routers/indexer.py +0 -349
- {khoj-1.16.1.dev25.dist-info → khoj-1.17.1.dev216.dist-info}/WHEEL +0 -0
- {khoj-1.16.1.dev25.dist-info → khoj-1.17.1.dev216.dist-info}/entry_points.txt +0 -0
- {khoj-1.16.1.dev25.dist-info → khoj-1.17.1.dev216.dist-info}/licenses/LICENSE +0 -0
khoj/routers/indexer.py
DELETED
|
@@ -1,349 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
import logging
|
|
3
|
-
from typing import Dict, Optional, Union
|
|
4
|
-
|
|
5
|
-
from fastapi import APIRouter, Depends, Header, Request, Response, UploadFile
|
|
6
|
-
from pydantic import BaseModel
|
|
7
|
-
from starlette.authentication import requires
|
|
8
|
-
|
|
9
|
-
from khoj.database.models import GithubConfig, KhojUser, NotionConfig
|
|
10
|
-
from khoj.processor.content.docx.docx_to_entries import DocxToEntries
|
|
11
|
-
from khoj.processor.content.github.github_to_entries import GithubToEntries
|
|
12
|
-
from khoj.processor.content.images.image_to_entries import ImageToEntries
|
|
13
|
-
from khoj.processor.content.markdown.markdown_to_entries import MarkdownToEntries
|
|
14
|
-
from khoj.processor.content.notion.notion_to_entries import NotionToEntries
|
|
15
|
-
from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
|
|
16
|
-
from khoj.processor.content.pdf.pdf_to_entries import PdfToEntries
|
|
17
|
-
from khoj.processor.content.plaintext.plaintext_to_entries import PlaintextToEntries
|
|
18
|
-
from khoj.routers.helpers import ApiIndexedDataLimiter, update_telemetry_state
|
|
19
|
-
from khoj.search_type import text_search
|
|
20
|
-
from khoj.utils import constants, state
|
|
21
|
-
from khoj.utils.config import SearchModels
|
|
22
|
-
from khoj.utils.helpers import LRU, get_file_type
|
|
23
|
-
from khoj.utils.rawconfig import ContentConfig, FullConfig, SearchConfig
|
|
24
|
-
from khoj.utils.yaml import save_config_to_file_updated_state
|
|
25
|
-
|
|
26
|
-
logger = logging.getLogger(__name__)
|
|
27
|
-
|
|
28
|
-
indexer = APIRouter()
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class File(BaseModel):
|
|
32
|
-
path: str
|
|
33
|
-
content: Union[str, bytes]
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class IndexBatchRequest(BaseModel):
|
|
37
|
-
files: list[File]
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
class IndexerInput(BaseModel):
|
|
41
|
-
org: Optional[dict[str, str]] = None
|
|
42
|
-
markdown: Optional[dict[str, str]] = None
|
|
43
|
-
pdf: Optional[dict[str, bytes]] = None
|
|
44
|
-
plaintext: Optional[dict[str, str]] = None
|
|
45
|
-
image: Optional[dict[str, bytes]] = None
|
|
46
|
-
docx: Optional[dict[str, bytes]] = None
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
@indexer.post("/update")
|
|
50
|
-
@requires(["authenticated"])
|
|
51
|
-
async def update(
|
|
52
|
-
request: Request,
|
|
53
|
-
files: list[UploadFile],
|
|
54
|
-
force: bool = False,
|
|
55
|
-
t: Optional[Union[state.SearchType, str]] = state.SearchType.All,
|
|
56
|
-
client: Optional[str] = None,
|
|
57
|
-
user_agent: Optional[str] = Header(None),
|
|
58
|
-
referer: Optional[str] = Header(None),
|
|
59
|
-
host: Optional[str] = Header(None),
|
|
60
|
-
indexed_data_limiter: ApiIndexedDataLimiter = Depends(
|
|
61
|
-
ApiIndexedDataLimiter(
|
|
62
|
-
incoming_entries_size_limit=10,
|
|
63
|
-
subscribed_incoming_entries_size_limit=25,
|
|
64
|
-
total_entries_size_limit=10,
|
|
65
|
-
subscribed_total_entries_size_limit=100,
|
|
66
|
-
)
|
|
67
|
-
),
|
|
68
|
-
):
|
|
69
|
-
user = request.user.object
|
|
70
|
-
index_files: Dict[str, Dict[str, str]] = {
|
|
71
|
-
"org": {},
|
|
72
|
-
"markdown": {},
|
|
73
|
-
"pdf": {},
|
|
74
|
-
"plaintext": {},
|
|
75
|
-
"image": {},
|
|
76
|
-
"docx": {},
|
|
77
|
-
}
|
|
78
|
-
try:
|
|
79
|
-
logger.info(f"📬 Updating content index via API call by {client} client")
|
|
80
|
-
for file in files:
|
|
81
|
-
file_content = file.file.read()
|
|
82
|
-
file_type, encoding = get_file_type(file.content_type, file_content)
|
|
83
|
-
if file_type in index_files:
|
|
84
|
-
index_files[file_type][file.filename] = file_content.decode(encoding) if encoding else file_content
|
|
85
|
-
else:
|
|
86
|
-
logger.warning(f"Skipped indexing unsupported file type sent by {client} client: {file.filename}")
|
|
87
|
-
|
|
88
|
-
indexer_input = IndexerInput(
|
|
89
|
-
org=index_files["org"],
|
|
90
|
-
markdown=index_files["markdown"],
|
|
91
|
-
pdf=index_files["pdf"],
|
|
92
|
-
plaintext=index_files["plaintext"],
|
|
93
|
-
image=index_files["image"],
|
|
94
|
-
docx=index_files["docx"],
|
|
95
|
-
)
|
|
96
|
-
|
|
97
|
-
if state.config == None:
|
|
98
|
-
logger.info("📬 Initializing content index on first run.")
|
|
99
|
-
default_full_config = FullConfig(
|
|
100
|
-
content_type=None,
|
|
101
|
-
search_type=SearchConfig.model_validate(constants.default_config["search-type"]),
|
|
102
|
-
processor=None,
|
|
103
|
-
)
|
|
104
|
-
state.config = default_full_config
|
|
105
|
-
default_content_config = ContentConfig(
|
|
106
|
-
org=None,
|
|
107
|
-
markdown=None,
|
|
108
|
-
pdf=None,
|
|
109
|
-
docx=None,
|
|
110
|
-
image=None,
|
|
111
|
-
github=None,
|
|
112
|
-
notion=None,
|
|
113
|
-
plaintext=None,
|
|
114
|
-
)
|
|
115
|
-
state.config.content_type = default_content_config
|
|
116
|
-
save_config_to_file_updated_state()
|
|
117
|
-
configure_search(state.search_models, state.config.search_type)
|
|
118
|
-
|
|
119
|
-
# Extract required fields from config
|
|
120
|
-
loop = asyncio.get_event_loop()
|
|
121
|
-
success = await loop.run_in_executor(
|
|
122
|
-
None,
|
|
123
|
-
configure_content,
|
|
124
|
-
indexer_input.model_dump(),
|
|
125
|
-
force,
|
|
126
|
-
t,
|
|
127
|
-
False,
|
|
128
|
-
user,
|
|
129
|
-
)
|
|
130
|
-
if not success:
|
|
131
|
-
raise RuntimeError("Failed to update content index")
|
|
132
|
-
logger.info(f"Finished processing batch indexing request")
|
|
133
|
-
except Exception as e:
|
|
134
|
-
logger.error(f"Failed to process batch indexing request: {e}", exc_info=True)
|
|
135
|
-
logger.error(
|
|
136
|
-
f'🚨 Failed to {"force " if force else ""}update {t} content index triggered via API call by {client} client: {e}',
|
|
137
|
-
exc_info=True,
|
|
138
|
-
)
|
|
139
|
-
return Response(content="Failed", status_code=500)
|
|
140
|
-
|
|
141
|
-
indexing_metadata = {
|
|
142
|
-
"num_org": len(index_files["org"]),
|
|
143
|
-
"num_markdown": len(index_files["markdown"]),
|
|
144
|
-
"num_pdf": len(index_files["pdf"]),
|
|
145
|
-
"num_plaintext": len(index_files["plaintext"]),
|
|
146
|
-
"num_image": len(index_files["image"]),
|
|
147
|
-
"num_docx": len(index_files["docx"]),
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
update_telemetry_state(
|
|
151
|
-
request=request,
|
|
152
|
-
telemetry_type="api",
|
|
153
|
-
api="index/update",
|
|
154
|
-
client=client,
|
|
155
|
-
user_agent=user_agent,
|
|
156
|
-
referer=referer,
|
|
157
|
-
host=host,
|
|
158
|
-
metadata=indexing_metadata,
|
|
159
|
-
)
|
|
160
|
-
|
|
161
|
-
logger.info(f"📪 Content index updated via API call by {client} client")
|
|
162
|
-
|
|
163
|
-
indexed_filenames = ",".join(file for ctype in index_files for file in index_files[ctype]) or ""
|
|
164
|
-
return Response(content=indexed_filenames, status_code=200)
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
def configure_search(search_models: SearchModels, search_config: Optional[SearchConfig]) -> Optional[SearchModels]:
|
|
168
|
-
# Run Validation Checks
|
|
169
|
-
if search_models is None:
|
|
170
|
-
search_models = SearchModels()
|
|
171
|
-
|
|
172
|
-
return search_models
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
def configure_content(
|
|
176
|
-
files: Optional[dict[str, dict[str, str]]],
|
|
177
|
-
regenerate: bool = False,
|
|
178
|
-
t: Optional[state.SearchType] = state.SearchType.All,
|
|
179
|
-
full_corpus: bool = True,
|
|
180
|
-
user: KhojUser = None,
|
|
181
|
-
) -> bool:
|
|
182
|
-
success = True
|
|
183
|
-
if t == None:
|
|
184
|
-
t = state.SearchType.All
|
|
185
|
-
|
|
186
|
-
if t is not None and t in [type.value for type in state.SearchType]:
|
|
187
|
-
t = state.SearchType(t)
|
|
188
|
-
|
|
189
|
-
if t is not None and not t.value in [type.value for type in state.SearchType]:
|
|
190
|
-
logger.warning(f"🚨 Invalid search type: {t}")
|
|
191
|
-
return False
|
|
192
|
-
|
|
193
|
-
search_type = t.value if t else None
|
|
194
|
-
|
|
195
|
-
no_documents = all([not files.get(file_type) for file_type in files])
|
|
196
|
-
|
|
197
|
-
if files is None:
|
|
198
|
-
logger.warning(f"🚨 No files to process for {search_type} search.")
|
|
199
|
-
return True
|
|
200
|
-
|
|
201
|
-
try:
|
|
202
|
-
# Initialize Org Notes Search
|
|
203
|
-
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Org.value) and files["org"]:
|
|
204
|
-
logger.info("🦄 Setting up search for orgmode notes")
|
|
205
|
-
# Extract Entries, Generate Notes Embeddings
|
|
206
|
-
text_search.setup(
|
|
207
|
-
OrgToEntries,
|
|
208
|
-
files.get("org"),
|
|
209
|
-
regenerate=regenerate,
|
|
210
|
-
full_corpus=full_corpus,
|
|
211
|
-
user=user,
|
|
212
|
-
)
|
|
213
|
-
except Exception as e:
|
|
214
|
-
logger.error(f"🚨 Failed to setup org: {e}", exc_info=True)
|
|
215
|
-
success = False
|
|
216
|
-
|
|
217
|
-
try:
|
|
218
|
-
# Initialize Markdown Search
|
|
219
|
-
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Markdown.value) and files[
|
|
220
|
-
"markdown"
|
|
221
|
-
]:
|
|
222
|
-
logger.info("💎 Setting up search for markdown notes")
|
|
223
|
-
# Extract Entries, Generate Markdown Embeddings
|
|
224
|
-
text_search.setup(
|
|
225
|
-
MarkdownToEntries,
|
|
226
|
-
files.get("markdown"),
|
|
227
|
-
regenerate=regenerate,
|
|
228
|
-
full_corpus=full_corpus,
|
|
229
|
-
user=user,
|
|
230
|
-
)
|
|
231
|
-
|
|
232
|
-
except Exception as e:
|
|
233
|
-
logger.error(f"🚨 Failed to setup markdown: {e}", exc_info=True)
|
|
234
|
-
success = False
|
|
235
|
-
|
|
236
|
-
try:
|
|
237
|
-
# Initialize PDF Search
|
|
238
|
-
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Pdf.value) and files["pdf"]:
|
|
239
|
-
logger.info("🖨️ Setting up search for pdf")
|
|
240
|
-
# Extract Entries, Generate PDF Embeddings
|
|
241
|
-
text_search.setup(
|
|
242
|
-
PdfToEntries,
|
|
243
|
-
files.get("pdf"),
|
|
244
|
-
regenerate=regenerate,
|
|
245
|
-
full_corpus=full_corpus,
|
|
246
|
-
user=user,
|
|
247
|
-
)
|
|
248
|
-
|
|
249
|
-
except Exception as e:
|
|
250
|
-
logger.error(f"🚨 Failed to setup PDF: {e}", exc_info=True)
|
|
251
|
-
success = False
|
|
252
|
-
|
|
253
|
-
try:
|
|
254
|
-
# Initialize Plaintext Search
|
|
255
|
-
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Plaintext.value) and files[
|
|
256
|
-
"plaintext"
|
|
257
|
-
]:
|
|
258
|
-
logger.info("📄 Setting up search for plaintext")
|
|
259
|
-
# Extract Entries, Generate Plaintext Embeddings
|
|
260
|
-
text_search.setup(
|
|
261
|
-
PlaintextToEntries,
|
|
262
|
-
files.get("plaintext"),
|
|
263
|
-
regenerate=regenerate,
|
|
264
|
-
full_corpus=full_corpus,
|
|
265
|
-
user=user,
|
|
266
|
-
)
|
|
267
|
-
|
|
268
|
-
except Exception as e:
|
|
269
|
-
logger.error(f"🚨 Failed to setup plaintext: {e}", exc_info=True)
|
|
270
|
-
success = False
|
|
271
|
-
|
|
272
|
-
try:
|
|
273
|
-
if no_documents:
|
|
274
|
-
github_config = GithubConfig.objects.filter(user=user).prefetch_related("githubrepoconfig").first()
|
|
275
|
-
if (
|
|
276
|
-
search_type == state.SearchType.All.value or search_type == state.SearchType.Github.value
|
|
277
|
-
) and github_config is not None:
|
|
278
|
-
logger.info("🐙 Setting up search for github")
|
|
279
|
-
# Extract Entries, Generate Github Embeddings
|
|
280
|
-
text_search.setup(
|
|
281
|
-
GithubToEntries,
|
|
282
|
-
None,
|
|
283
|
-
regenerate=regenerate,
|
|
284
|
-
full_corpus=full_corpus,
|
|
285
|
-
user=user,
|
|
286
|
-
config=github_config,
|
|
287
|
-
)
|
|
288
|
-
|
|
289
|
-
except Exception as e:
|
|
290
|
-
logger.error(f"🚨 Failed to setup GitHub: {e}", exc_info=True)
|
|
291
|
-
success = False
|
|
292
|
-
|
|
293
|
-
try:
|
|
294
|
-
if no_documents:
|
|
295
|
-
# Initialize Notion Search
|
|
296
|
-
notion_config = NotionConfig.objects.filter(user=user).first()
|
|
297
|
-
if (
|
|
298
|
-
search_type == state.SearchType.All.value or search_type == state.SearchType.Notion.value
|
|
299
|
-
) and notion_config:
|
|
300
|
-
logger.info("🔌 Setting up search for notion")
|
|
301
|
-
text_search.setup(
|
|
302
|
-
NotionToEntries,
|
|
303
|
-
None,
|
|
304
|
-
regenerate=regenerate,
|
|
305
|
-
full_corpus=full_corpus,
|
|
306
|
-
user=user,
|
|
307
|
-
config=notion_config,
|
|
308
|
-
)
|
|
309
|
-
|
|
310
|
-
except Exception as e:
|
|
311
|
-
logger.error(f"🚨 Failed to setup Notion: {e}", exc_info=True)
|
|
312
|
-
success = False
|
|
313
|
-
|
|
314
|
-
try:
|
|
315
|
-
# Initialize Image Search
|
|
316
|
-
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Image.value) and files[
|
|
317
|
-
"image"
|
|
318
|
-
]:
|
|
319
|
-
logger.info("🖼️ Setting up search for images")
|
|
320
|
-
# Extract Entries, Generate Image Embeddings
|
|
321
|
-
text_search.setup(
|
|
322
|
-
ImageToEntries,
|
|
323
|
-
files.get("image"),
|
|
324
|
-
regenerate=regenerate,
|
|
325
|
-
full_corpus=full_corpus,
|
|
326
|
-
user=user,
|
|
327
|
-
)
|
|
328
|
-
except Exception as e:
|
|
329
|
-
logger.error(f"🚨 Failed to setup images: {e}", exc_info=True)
|
|
330
|
-
success = False
|
|
331
|
-
try:
|
|
332
|
-
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Docx.value) and files["docx"]:
|
|
333
|
-
logger.info("📄 Setting up search for docx")
|
|
334
|
-
text_search.setup(
|
|
335
|
-
DocxToEntries,
|
|
336
|
-
files.get("docx"),
|
|
337
|
-
regenerate=regenerate,
|
|
338
|
-
full_corpus=full_corpus,
|
|
339
|
-
user=user,
|
|
340
|
-
)
|
|
341
|
-
except Exception as e:
|
|
342
|
-
logger.error(f"🚨 Failed to setup docx: {e}", exc_info=True)
|
|
343
|
-
success = False
|
|
344
|
-
|
|
345
|
-
# Invalidate Query Cache
|
|
346
|
-
if user:
|
|
347
|
-
state.query_cache[user.uuid] = LRU()
|
|
348
|
-
|
|
349
|
-
return success
|
|
File without changes
|
|
File without changes
|
|
File without changes
|