khoj 1.28.3__py3-none-any.whl → 1.28.4.dev92__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/configure.py +10 -14
- khoj/database/adapters/__init__.py +128 -44
- khoj/database/admin.py +6 -3
- khoj/database/management/commands/change_default_model.py +7 -72
- khoj/database/migrations/0073_delete_usersearchmodelconfig.py +15 -0
- khoj/database/models/__init__.py +4 -6
- khoj/interface/compiled/404/index.html +1 -1
- khoj/interface/compiled/_next/static/chunks/1603-dc5fd983dbcd070d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1970-c78f6acc8e16e30b.js +1 -0
- khoj/interface/compiled/_next/static/chunks/2261-748f7c327df3c8c1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3124-a4cea2eda163128d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3803-d74118a2d0182c52.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5538-36aa824a75519c5b.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5961-3c104d9736b7902b.js +3 -0
- khoj/interface/compiled/_next/static/chunks/8423-ebfa9bb9e2424ca3.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9417-32c4db52ca42e681.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-e9838b642913a071.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/page-4353b1a532795ad1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/{page-d3edae545a1b5393.js → page-c9f13c865e739607.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-b0e7ff4baa3b5265.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/page-45720e1ed71e3ef5.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/{layout-d0f0a9067427fb20.js → layout-86561d2fac35a91a.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/{page-ea462e20376b6dce.js → page-ecb8e1c192aa8834.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-ea6b73fdaf9b24ca.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/{page-a5c277eff207959e.js → page-8e28deacb61f75aa.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{layout-a8f33dfe92f997fb.js → layout-254eaaf916449a60.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/page-2fab613a557d3cc5.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-cf7445cf0326bda3.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/page-30376aa7e9cfa342.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{main-f84cd3c1873cd842.js → main-1ea5c2e0fdef4626.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-8beec5b51cabb39a.js → webpack-27cf153c35b1338d.js} +1 -1
- khoj/interface/compiled/_next/static/css/{467a524c75e7d7c0.css → 0e9d53dcd7f11342.css} +1 -1
- khoj/interface/compiled/_next/static/css/{26c1c33d0423a7d8.css → 1f293605f2871853.css} +1 -1
- khoj/interface/compiled/_next/static/css/2d097a35da6bfe8d.css +1 -0
- khoj/interface/compiled/_next/static/css/80bd6301fc657983.css +1 -0
- khoj/interface/compiled/_next/static/css/ed437164d77aa600.css +25 -0
- khoj/interface/compiled/_next/static/media/5455839c73f146e7-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/5984b96ba4822821-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/684adc3dde1b03f1-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/82e3b9a1bdaf0c26-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/8d1ea331386a0db8-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/91475f6526542a4f-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/b98b13dbc1c3b59c-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/c824d7a20139e39d-s.woff2 +0 -0
- khoj/interface/compiled/agents/index.html +1 -1
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +1 -1
- khoj/interface/compiled/automations/index.txt +2 -2
- khoj/interface/compiled/chat/index.html +1 -1
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/index.html +1 -1
- khoj/interface/compiled/index.txt +3 -3
- khoj/interface/compiled/search/index.html +1 -1
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +1 -1
- khoj/interface/compiled/settings/index.txt +3 -3
- khoj/interface/compiled/share/chat/index.html +1 -1
- khoj/interface/compiled/share/chat/index.txt +3 -3
- khoj/processor/content/docx/docx_to_entries.py +27 -21
- khoj/processor/content/github/github_to_entries.py +2 -2
- khoj/processor/content/images/image_to_entries.py +2 -2
- khoj/processor/content/markdown/markdown_to_entries.py +2 -2
- khoj/processor/content/notion/notion_to_entries.py +2 -2
- khoj/processor/content/org_mode/org_to_entries.py +2 -2
- khoj/processor/content/org_mode/orgnode.py +1 -1
- khoj/processor/content/pdf/pdf_to_entries.py +37 -29
- khoj/processor/content/plaintext/plaintext_to_entries.py +2 -2
- khoj/processor/content/text_to_entries.py +3 -4
- khoj/processor/conversation/anthropic/anthropic_chat.py +9 -1
- khoj/processor/conversation/google/gemini_chat.py +15 -2
- khoj/processor/conversation/google/utils.py +3 -1
- khoj/processor/conversation/offline/chat_model.py +4 -0
- khoj/processor/conversation/openai/gpt.py +6 -1
- khoj/processor/conversation/prompts.py +72 -13
- khoj/processor/conversation/utils.py +80 -13
- khoj/processor/image/generate.py +2 -0
- khoj/processor/tools/online_search.py +68 -18
- khoj/processor/tools/run_code.py +54 -20
- khoj/routers/api.py +10 -4
- khoj/routers/api_agents.py +8 -10
- khoj/routers/api_chat.py +89 -24
- khoj/routers/api_content.py +80 -8
- khoj/routers/helpers.py +176 -60
- khoj/routers/notion.py +1 -1
- khoj/routers/research.py +73 -31
- khoj/routers/web_client.py +0 -10
- khoj/search_type/text_search.py +3 -7
- khoj/utils/cli.py +2 -2
- khoj/utils/fs_syncer.py +2 -1
- khoj/utils/helpers.py +6 -3
- khoj/utils/rawconfig.py +32 -0
- khoj/utils/state.py +2 -1
- {khoj-1.28.3.dist-info → khoj-1.28.4.dev92.dist-info}/METADATA +3 -3
- {khoj-1.28.3.dist-info → khoj-1.28.4.dev92.dist-info}/RECORD +99 -105
- {khoj-1.28.3.dist-info → khoj-1.28.4.dev92.dist-info}/WHEEL +1 -1
- khoj/interface/compiled/_next/static/chunks/1034-da58b679fcbb79c1.js +0 -1
- khoj/interface/compiled/_next/static/chunks/1467-b331e469fe411347.js +0 -1
- khoj/interface/compiled/_next/static/chunks/1603-c1568f45947e9f2c.js +0 -1
- khoj/interface/compiled/_next/static/chunks/1970-d44050bf658ae5cc.js +0 -1
- khoj/interface/compiled/_next/static/chunks/3110-ef2cacd1b8d79ad8.js +0 -1
- khoj/interface/compiled/_next/static/chunks/3423-f4b7df2f6f3362f7.js +0 -1
- khoj/interface/compiled/_next/static/chunks/394-6bcb8c429f168f21.js +0 -3
- khoj/interface/compiled/_next/static/chunks/7113-f2e114d7034a0835.js +0 -1
- khoj/interface/compiled/_next/static/chunks/8423-da57554315eebcbe.js +0 -1
- khoj/interface/compiled/_next/static/chunks/8840-b8d7b9f0923c6651.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9417-0d0fc7eb49a86abb.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-75636ab3a413fa8e.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/page-adbf3cd470da248f.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-96fcf62857bf8f30.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-222d348681b848a5.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/factchecker/layout-7b30c541c05fb904.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-bded0868a08ac4ba.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-3720f1362310bebb.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/page-210bd54db4841333.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-2df56074e42adaa0.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/page-a21b7e8890ed1209.js +0 -1
- khoj/interface/compiled/_next/static/css/4cae6c0e5c72fb2d.css +0 -1
- khoj/interface/compiled/_next/static/css/553f9cdcc7a2bcd6.css +0 -1
- khoj/interface/compiled/_next/static/css/a795ee88875f4853.css +0 -25
- khoj/interface/compiled/_next/static/css/afd3d45cc65d55d8.css +0 -1
- khoj/interface/compiled/_next/static/media/0e790e04fd40ad16-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/4221e1667cd19c7d-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/6c276159aa0eb14b-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/6cc0b9500e4f9168-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/9d9319a7a2ac39c6-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/a75c8ea86756d52d-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/abce7c400ca31a51-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/f759c939737fb668-s.woff2 +0 -0
- khoj/interface/compiled/factchecker/index.html +0 -1
- khoj/interface/compiled/factchecker/index.txt +0 -7
- /khoj/interface/compiled/_next/static/{EfnEiWDle86AUcxEdEFgO → t_2jovvUVve0Gvc3FqpT9}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{EfnEiWDle86AUcxEdEFgO → t_2jovvUVve0Gvc3FqpT9}/_ssgManifest.js +0 -0
- {khoj-1.28.3.dist-info → khoj-1.28.4.dev92.dist-info}/entry_points.txt +0 -0
- {khoj-1.28.3.dist-info → khoj-1.28.4.dev92.dist-info}/licenses/LICENSE +0 -0
khoj/routers/api_content.py
CHANGED
@@ -36,16 +36,18 @@ from khoj.database.models import (
|
|
36
36
|
LocalPlaintextConfig,
|
37
37
|
NotionConfig,
|
38
38
|
)
|
39
|
+
from khoj.processor.content.docx.docx_to_entries import DocxToEntries
|
40
|
+
from khoj.processor.content.pdf.pdf_to_entries import PdfToEntries
|
39
41
|
from khoj.routers.helpers import (
|
40
42
|
ApiIndexedDataLimiter,
|
41
43
|
CommonQueryParams,
|
42
44
|
configure_content,
|
45
|
+
get_file_content,
|
43
46
|
get_user_config,
|
44
47
|
update_telemetry_state,
|
45
48
|
)
|
46
49
|
from khoj.utils import constants, state
|
47
50
|
from khoj.utils.config import SearchModels
|
48
|
-
from khoj.utils.helpers import get_file_type
|
49
51
|
from khoj.utils.rawconfig import (
|
50
52
|
ContentConfig,
|
51
53
|
FullConfig,
|
@@ -237,7 +239,7 @@ async def set_content_notion(
|
|
237
239
|
|
238
240
|
if updated_config.token:
|
239
241
|
# Trigger an async job to configure_content. Let it run without blocking the response.
|
240
|
-
background_tasks.add_task(run_in_executor, configure_content, {}, False, SearchType.Notion
|
242
|
+
background_tasks.add_task(run_in_executor, configure_content, user, {}, False, SearchType.Notion)
|
241
243
|
|
242
244
|
update_telemetry_state(
|
243
245
|
request=request,
|
@@ -375,6 +377,75 @@ async def delete_content_source(
|
|
375
377
|
return {"status": "ok"}
|
376
378
|
|
377
379
|
|
380
|
+
@api_content.post("/convert", status_code=200)
|
381
|
+
@requires(["authenticated"])
|
382
|
+
async def convert_documents(
|
383
|
+
request: Request,
|
384
|
+
files: List[UploadFile],
|
385
|
+
client: Optional[str] = None,
|
386
|
+
):
|
387
|
+
MAX_FILE_SIZE_MB = 10 # 10MB limit
|
388
|
+
MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
|
389
|
+
|
390
|
+
converted_files = []
|
391
|
+
supported_files = ["org", "markdown", "pdf", "plaintext", "docx"]
|
392
|
+
|
393
|
+
for file in files:
|
394
|
+
# Check file size first
|
395
|
+
file_size = 0
|
396
|
+
content = await file.read()
|
397
|
+
file_size = len(content)
|
398
|
+
await file.seek(0) # Reset file pointer
|
399
|
+
|
400
|
+
if file_size > MAX_FILE_SIZE_BYTES:
|
401
|
+
logger.warning(
|
402
|
+
f"Skipped converting oversized file ({file_size / 1024 / 1024:.1f}MB) sent by {client} client: {file.filename}"
|
403
|
+
)
|
404
|
+
continue
|
405
|
+
|
406
|
+
file_data = get_file_content(file)
|
407
|
+
if file_data.file_type in supported_files:
|
408
|
+
extracted_content = (
|
409
|
+
file_data.content.decode(file_data.encoding) if file_data.encoding else file_data.content
|
410
|
+
)
|
411
|
+
|
412
|
+
if file_data.file_type == "docx":
|
413
|
+
entries_per_page = DocxToEntries.extract_text(file_data.content)
|
414
|
+
annotated_pages = [
|
415
|
+
f"Page {index} of {file_data.name}:\n\n{entry}" for index, entry in enumerate(entries_per_page)
|
416
|
+
]
|
417
|
+
extracted_content = "\n".join(annotated_pages)
|
418
|
+
|
419
|
+
elif file_data.file_type == "pdf":
|
420
|
+
entries_per_page = PdfToEntries.extract_text(file_data.content)
|
421
|
+
annotated_pages = [
|
422
|
+
f"Page {index} of {file_data.name}:\n\n{entry}" for index, entry in enumerate(entries_per_page)
|
423
|
+
]
|
424
|
+
extracted_content = "\n".join(annotated_pages)
|
425
|
+
|
426
|
+
size_in_bytes = len(extracted_content.encode("utf-8"))
|
427
|
+
|
428
|
+
converted_files.append(
|
429
|
+
{
|
430
|
+
"name": file_data.name,
|
431
|
+
"content": extracted_content,
|
432
|
+
"file_type": file_data.file_type,
|
433
|
+
"size": size_in_bytes,
|
434
|
+
}
|
435
|
+
)
|
436
|
+
else:
|
437
|
+
logger.warning(f"Skipped converting unsupported file type sent by {client} client: {file.filename}")
|
438
|
+
|
439
|
+
update_telemetry_state(
|
440
|
+
request=request,
|
441
|
+
telemetry_type="api",
|
442
|
+
api="convert_documents",
|
443
|
+
client=client,
|
444
|
+
)
|
445
|
+
|
446
|
+
return Response(content=json.dumps(converted_files), media_type="application/json", status_code=200)
|
447
|
+
|
448
|
+
|
378
449
|
async def indexer(
|
379
450
|
request: Request,
|
380
451
|
files: list[UploadFile],
|
@@ -398,12 +469,13 @@ async def indexer(
|
|
398
469
|
try:
|
399
470
|
logger.info(f"📬 Updating content index via API call by {client} client")
|
400
471
|
for file in files:
|
401
|
-
|
402
|
-
file_type
|
403
|
-
|
404
|
-
|
472
|
+
file_data = get_file_content(file)
|
473
|
+
if file_data.file_type in index_files:
|
474
|
+
index_files[file_data.file_type][file_data.name] = (
|
475
|
+
file_data.content.decode(file_data.encoding) if file_data.encoding else file_data.content
|
476
|
+
)
|
405
477
|
else:
|
406
|
-
logger.warning(f"Skipped indexing unsupported file type sent by {client} client: {
|
478
|
+
logger.warning(f"Skipped indexing unsupported file type sent by {client} client: {file_data.name}")
|
407
479
|
|
408
480
|
indexer_input = IndexerInput(
|
409
481
|
org=index_files["org"],
|
@@ -440,10 +512,10 @@ async def indexer(
|
|
440
512
|
success = await loop.run_in_executor(
|
441
513
|
None,
|
442
514
|
configure_content,
|
515
|
+
user,
|
443
516
|
indexer_input.model_dump(),
|
444
517
|
regenerate,
|
445
518
|
t,
|
446
|
-
user,
|
447
519
|
)
|
448
520
|
if not success:
|
449
521
|
raise RuntimeError(f"Failed to {method} {t} data sent by {client} client into content index")
|