khoj 1.42.9.dev26__py3-none-any.whl → 1.42.10.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. khoj/database/adapters/__init__.py +0 -20
  2. khoj/database/models/__init__.py +0 -1
  3. khoj/interface/compiled/404/index.html +2 -2
  4. khoj/interface/compiled/_next/static/chunks/app/chat/page-4c6b873a4a5c7d2f.js +1 -0
  5. khoj/interface/compiled/agents/index.html +2 -2
  6. khoj/interface/compiled/agents/index.txt +2 -2
  7. khoj/interface/compiled/automations/index.html +2 -2
  8. khoj/interface/compiled/automations/index.txt +3 -3
  9. khoj/interface/compiled/chat/index.html +2 -2
  10. khoj/interface/compiled/chat/index.txt +2 -2
  11. khoj/interface/compiled/index.html +2 -2
  12. khoj/interface/compiled/index.txt +2 -2
  13. khoj/interface/compiled/search/index.html +2 -2
  14. khoj/interface/compiled/search/index.txt +2 -2
  15. khoj/interface/compiled/settings/index.html +2 -2
  16. khoj/interface/compiled/settings/index.txt +4 -4
  17. khoj/interface/compiled/share/chat/index.html +2 -2
  18. khoj/interface/compiled/share/chat/index.txt +2 -2
  19. khoj/processor/content/markdown/markdown_to_entries.py +9 -38
  20. khoj/processor/content/org_mode/org_to_entries.py +2 -18
  21. khoj/processor/content/org_mode/orgnode.py +16 -18
  22. khoj/processor/content/text_to_entries.py +0 -30
  23. khoj/processor/conversation/anthropic/anthropic_chat.py +2 -11
  24. khoj/processor/conversation/anthropic/utils.py +103 -90
  25. khoj/processor/conversation/google/gemini_chat.py +1 -4
  26. khoj/processor/conversation/google/utils.py +18 -80
  27. khoj/processor/conversation/offline/chat_model.py +3 -3
  28. khoj/processor/conversation/openai/gpt.py +38 -13
  29. khoj/processor/conversation/openai/utils.py +12 -113
  30. khoj/processor/conversation/prompts.py +35 -17
  31. khoj/processor/conversation/utils.py +58 -129
  32. khoj/processor/operator/grounding_agent.py +1 -1
  33. khoj/processor/operator/operator_agent_binary.py +3 -4
  34. khoj/processor/tools/online_search.py +0 -18
  35. khoj/processor/tools/run_code.py +1 -1
  36. khoj/routers/api_chat.py +1 -1
  37. khoj/routers/api_content.py +6 -6
  38. khoj/routers/helpers.py +27 -297
  39. khoj/routers/research.py +155 -169
  40. khoj/search_type/text_search.py +0 -2
  41. khoj/utils/helpers.py +8 -284
  42. khoj/utils/initialization.py +2 -0
  43. khoj/utils/rawconfig.py +0 -11
  44. {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/METADATA +1 -1
  45. {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/RECORD +57 -57
  46. khoj/interface/compiled/_next/static/chunks/app/chat/page-76fc915800aa90f4.js +0 -1
  47. /khoj/interface/compiled/_next/static/chunks/{1327-3b1a41af530fa8ee.js → 1327-1a9107b9a2a04a98.js} +0 -0
  48. /khoj/interface/compiled/_next/static/chunks/{1915-fbfe167c84ad60c5.js → 1915-5c6508f6ebb62a30.js} +0 -0
  49. /khoj/interface/compiled/_next/static/chunks/{2117-e78b6902ad6f75ec.js → 2117-080746c8e170c81a.js} +0 -0
  50. /khoj/interface/compiled/_next/static/chunks/{2939-4d4084c5b888b960.js → 2939-4af3fd24b8ffc9ad.js} +0 -0
  51. /khoj/interface/compiled/_next/static/chunks/{4447-d6cf93724d57e34b.js → 4447-cd95608f8e93e711.js} +0 -0
  52. /khoj/interface/compiled/_next/static/chunks/{8667-4b7790573b08c50d.js → 8667-50b03a89e82e0ba7.js} +0 -0
  53. /khoj/interface/compiled/_next/static/chunks/{webpack-70e0762712341826.js → webpack-92ce8aaf95718ec4.js} +0 -0
  54. /khoj/interface/compiled/_next/static/{IYGyer2N7GdUJ7QHFghtY → cuzJcS32_a4L4a6gCZ63y}/_buildManifest.js +0 -0
  55. /khoj/interface/compiled/_next/static/{IYGyer2N7GdUJ7QHFghtY → cuzJcS32_a4L4a6gCZ63y}/_ssgManifest.js +0 -0
  56. {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/WHEEL +0 -0
  57. {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/entry_points.txt +0 -0
  58. {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/licenses/LICENSE +0 -0
khoj/utils/helpers.py CHANGED
@@ -12,7 +12,6 @@ import random
12
12
  import urllib.parse
13
13
  import uuid
14
14
  from collections import OrderedDict
15
- from copy import deepcopy
16
15
  from enum import Enum
17
16
  from functools import lru_cache
18
17
  from importlib import import_module
@@ -20,9 +19,8 @@ from importlib.metadata import version
20
19
  from itertools import islice
21
20
  from os import path
22
21
  from pathlib import Path
23
- from textwrap import dedent
24
22
  from time import perf_counter
25
- from typing import TYPE_CHECKING, Any, NamedTuple, Optional, Tuple, Type, Union
23
+ from typing import TYPE_CHECKING, Any, NamedTuple, Optional, Tuple, Union
26
24
  from urllib.parse import ParseResult, urlparse
27
25
 
28
26
  import anthropic
@@ -38,7 +36,6 @@ from google.auth.credentials import Credentials
38
36
  from google.oauth2 import service_account
39
37
  from magika import Magika
40
38
  from PIL import Image
41
- from pydantic import BaseModel
42
39
  from pytz import country_names, country_timezones
43
40
 
44
41
  from khoj.utils import constants
@@ -337,85 +334,6 @@ def is_e2b_code_sandbox_enabled():
337
334
  return not is_none_or_empty(os.getenv("E2B_API_KEY"))
338
335
 
339
336
 
340
- class ToolDefinition:
341
- def __init__(self, name: str, description: str, schema: dict):
342
- self.name = name
343
- self.description = description
344
- self.schema = schema
345
-
346
-
347
- def create_tool_definition(
348
- schema: Type[BaseModel],
349
- name: str = None,
350
- description: Optional[str] = None,
351
- ) -> ToolDefinition:
352
- """
353
- Converts a response schema BaseModel class into a normalized tool definition.
354
-
355
- A standard AI provider agnostic tool format to specify tools the model can use.
356
- Common logic used across models is kept here. AI provider specific adaptations
357
- should be handled in provider code.
358
-
359
- Args:
360
- response_schema: The Pydantic BaseModel class to convert.
361
- This class defines the response schema for the tool.
362
- tool_name: The name for the AI model tool (e.g., "get_weather", "plan_next_step").
363
- tool_description: Optional description for the AI model tool.
364
- If None, it attempts to use the Pydantic model's docstring.
365
- If that's also missing, a fallback description is generated.
366
-
367
- Returns:
368
- A normalized tool definition for AI model APIs.
369
- """
370
- raw_schema_dict = schema.model_json_schema()
371
-
372
- name = name or schema.__name__.lower()
373
- description = description
374
- if description is None:
375
- docstring = schema.__doc__
376
- if docstring:
377
- description = dedent(docstring).strip()
378
- else:
379
- # Fallback description if no explicit one or docstring is provided
380
- description = f"Tool named '{name}' accepts specified parameters."
381
-
382
- # Process properties to inline enums and remove $defs dependency
383
- processed_properties = {}
384
- original_properties = raw_schema_dict.get("properties", {})
385
- defs = raw_schema_dict.get("$defs", {})
386
-
387
- for prop_name, prop_schema in original_properties.items():
388
- current_prop_schema = deepcopy(prop_schema) # Work on a copy
389
- # Check for enums defined directly in the property for simpler direct enum definitions.
390
- if "$ref" in current_prop_schema:
391
- ref_path = current_prop_schema["$ref"]
392
- if ref_path.startswith("#/$defs/"):
393
- def_name = ref_path.split("/")[-1]
394
- if def_name in defs and "enum" in defs[def_name]:
395
- enum_def = defs[def_name]
396
- current_prop_schema["enum"] = enum_def["enum"]
397
- current_prop_schema["type"] = enum_def.get("type", "string")
398
- if "description" not in current_prop_schema and "description" in enum_def:
399
- current_prop_schema["description"] = enum_def["description"]
400
- del current_prop_schema["$ref"] # Remove the $ref as it's been inlined
401
-
402
- processed_properties[prop_name] = current_prop_schema
403
-
404
- # Generate the compiled schema dictionary for the tool definition.
405
- compiled_schema = {
406
- "type": "object",
407
- "properties": processed_properties,
408
- # Generate content in the order in which the schema properties were defined
409
- "property_ordering": list(schema.model_fields.keys()),
410
- }
411
-
412
- # Include 'required' fields if specified in the Pydantic model
413
- if "required" in raw_schema_dict and raw_schema_dict["required"]:
414
- compiled_schema["required"] = raw_schema_dict["required"]
415
-
416
- return ToolDefinition(name=name, description=description, schema=compiled_schema)
417
-
418
-
419
337
  class ConversationCommand(str, Enum):
420
338
  Default = "default"
421
339
  General = "general"
@@ -429,14 +347,6 @@ class ConversationCommand(str, Enum):
429
347
  Diagram = "diagram"
430
348
  Research = "research"
431
349
  Operator = "operator"
432
- ViewFile = "view_file"
433
- ListFiles = "list_files"
434
- RegexSearchFiles = "regex_search_files"
435
- SemanticSearchFiles = "semantic_search_files"
436
- SearchWeb = "search_web"
437
- ReadWebpage = "read_webpage"
438
- RunCode = "run_code"
439
- OperateComputer = "operate_computer"
440
350
 
441
351
 
442
352
  command_descriptions = {
@@ -450,9 +360,6 @@ command_descriptions = {
450
360
  ConversationCommand.Diagram: "Draw a flowchart, diagram, or any other visual representation best expressed with primitives like lines, rectangles, and text.",
451
361
  ConversationCommand.Research: "Do deep research on a topic. This will take longer than usual, but give a more detailed, comprehensive answer.",
452
362
  ConversationCommand.Operator: "Operate and perform tasks using a computer.",
453
- ConversationCommand.ViewFile: "View the contents of a file with optional line range specification.",
454
- ConversationCommand.ListFiles: "List files under a given path with optional glob pattern.",
455
- ConversationCommand.RegexSearchFiles: "Search for lines in files matching regex pattern with an optional path prefix.",
456
363
  }
457
364
 
458
365
  command_descriptions_for_agent = {
@@ -478,186 +385,13 @@ tool_descriptions_for_llm = {
478
385
  ConversationCommand.Operator: "To use when you need to operate a computer to complete the task.",
479
386
  }
480
387
 
481
- tools_for_research_llm = {
482
- ConversationCommand.SearchWeb: ToolDefinition(
483
- name="search_web",
484
- description="To search the internet for information. Useful to get a quick, broad overview from the internet. Provide all relevant context to ensure new searches, not in previous iterations, are performed. Max {max_search_queries} search queries allowed per iteration.",
485
- schema={
486
- "type": "object",
487
- "properties": {
488
- "query": {
489
- "type": "string",
490
- "description": "The query to search on the internet.",
491
- },
492
- },
493
- "required": ["query"],
494
- },
495
- ),
496
- ConversationCommand.ReadWebpage: ToolDefinition(
497
- name="read_webpage",
498
- description="To extract information from webpages. Useful for more detailed research from the internet. Usually used when you know the webpage links to refer to. Share upto {max_webpages_to_read} webpage links and what information to extract from them in your query.",
499
- schema={
500
- "type": "object",
501
- "properties": {
502
- "urls": {
503
- "type": "array",
504
- "items": {
505
- "type": "string",
506
- },
507
- "description": "The webpage URLs to extract information from.",
508
- },
509
- "query": {
510
- "type": "string",
511
- "description": "The query to extract information from the webpages.",
512
- },
513
- },
514
- "required": ["urls", "query"],
515
- },
516
- ),
517
- ConversationCommand.RunCode: ToolDefinition(
518
- name="run_code",
519
- description=e2b_tool_description if is_e2b_code_sandbox_enabled() else terrarium_tool_description,
520
- schema={
521
- "type": "object",
522
- "properties": {
523
- "query": {
524
- "type": "string",
525
- "description": "Detailed query and all input data required to generate, execute code in the sandbox.",
526
- },
527
- },
528
- "required": ["query"],
529
- },
530
- ),
531
- ConversationCommand.OperateComputer: ToolDefinition(
532
- name="operate_computer",
533
- description="To operate a computer to complete the task.",
534
- schema={
535
- "type": "object",
536
- "properties": {
537
- "query": {
538
- "type": "string",
539
- "description": "The task to perform on the computer.",
540
- },
541
- },
542
- "required": ["query"],
543
- },
544
- ),
545
- ConversationCommand.ViewFile: ToolDefinition(
546
- name="view_file",
547
- description=dedent(
548
- """
549
- To view the contents of specific note or document in the user's personal knowledge base.
550
- Especially helpful if the question expects context from the user's notes or documents.
551
- It can be used after finding the document path with the document search tool.
552
- Optionally specify a line range to view only specific sections of large files.
553
- """
554
- ).strip(),
555
- schema={
556
- "type": "object",
557
- "properties": {
558
- "path": {
559
- "type": "string",
560
- "description": "The file path to view (can be absolute or relative).",
561
- },
562
- "start_line": {
563
- "type": "integer",
564
- "description": "Optional starting line number for viewing a specific range (1-indexed).",
565
- },
566
- "end_line": {
567
- "type": "integer",
568
- "description": "Optional ending line number for viewing a specific range (1-indexed).",
569
- },
570
- },
571
- "required": ["path"],
572
- },
573
- ),
574
- ConversationCommand.ListFiles: ToolDefinition(
575
- name="list_files",
576
- description=dedent(
577
- """
578
- To list files in the user's knowledge base.
579
-
580
- Use the path parameter to only show files under the specified path.
581
- """
582
- ).strip(),
583
- schema={
584
- "type": "object",
585
- "properties": {
586
- "path": {
587
- "type": "string",
588
- "description": "The directory path to list files from.",
589
- },
590
- "pattern": {
591
- "type": "string",
592
- "description": "Optional glob pattern to filter files (e.g., '*.md').",
593
- },
594
- },
595
- },
596
- ),
597
- ConversationCommand.SemanticSearchFiles: ToolDefinition(
598
- name="semantic_search_files",
599
- description=dedent(
600
- """
601
- To have the tool AI semantic search through the user's knowledge base.
602
- Helpful to answer questions for which finding some relevant notes or documents can complete the search. Example: "When was Tom born?"
603
- This tool AI cannot find all relevant notes or documents, only a subset of them.
604
- It is a good starting point to find keywords, discover similar topics or related concepts and some relevant notes or documents.
605
- The tool AI can perform a maximum of {max_search_queries} semantic search queries per iteration.
606
- """
607
- ).strip(),
608
- schema={
609
- "type": "object",
610
- "properties": {
611
- "q": {
612
- "type": "string",
613
- "description": "Your natural language query for the tool to search in the user's knowledge base.",
614
- },
615
- },
616
- "required": ["q"],
617
- },
618
- ),
619
- ConversationCommand.RegexSearchFiles: ToolDefinition(
620
- name="regex_search_files",
621
- description=dedent(
622
- """
623
- To search through the user's knowledge base using regex patterns. Returns all lines matching the pattern.
624
- Helpful to answer questions for which all relevant notes or documents are needed to complete the search. Example: "Notes that mention Tom".
625
- You need to know all the correct keywords or regex patterns for this tool to be useful.
626
-
627
- REMEMBER:
628
- - The regex pattern will ONLY match content on a single line. Multi-line matches are NOT supported (even if you use \\n).
629
-
630
- An optional path prefix can restrict search to specific files/directories.
631
- Use lines_before, lines_after to show context around matches.
632
- """
633
- ).strip(),
634
- schema={
635
- "type": "object",
636
- "properties": {
637
- "regex_pattern": {
638
- "type": "string",
639
- "description": "The regex pattern to search for content in the user's files.",
640
- },
641
- "path_prefix": {
642
- "type": "string",
643
- "description": "Optional path prefix to limit the search to files under a specified path.",
644
- },
645
- "lines_before": {
646
- "type": "integer",
647
- "description": "Optional number of lines to show before each line match for context.",
648
- "minimum": 0,
649
- "maximum": 20,
650
- },
651
- "lines_after": {
652
- "type": "integer",
653
- "description": "Optional number of lines to show after each line match for context.",
654
- "minimum": 0,
655
- "maximum": 20,
656
- },
657
- },
658
- "required": ["regex_pattern"],
659
- },
660
- ),
388
+ tool_description_for_research_llm = {
389
+ ConversationCommand.Notes: "To search the user's personal knowledge base. Especially helpful if the question expects context from the user's notes or documents. Max {max_search_queries} search queries allowed per iteration.",
390
+ ConversationCommand.Online: "To search the internet for information. Useful to get a quick, broad overview from the internet. Provide all relevant context to ensure new searches, not in previous iterations, are performed. Max {max_search_queries} search queries allowed per iteration.",
391
+ ConversationCommand.Webpage: "To extract information from webpages. Useful for more detailed research from the internet. Usually used when you know the webpage links to refer to. Share upto {max_webpages_to_read} webpage links and what information to extract from them in your query.",
392
+ ConversationCommand.Code: e2b_tool_description if is_e2b_code_sandbox_enabled() else terrarium_tool_description,
393
+ ConversationCommand.Text: "To respond to the user once you've completed your research and have the required information.",
394
+ ConversationCommand.Operator: "To operate a computer to complete the task.",
661
395
  }
662
396
 
663
397
  mode_descriptions_for_llm = {
@@ -1116,13 +850,3 @@ def clean_object_for_db(data):
1116
850
  return [clean_object_for_db(item) for item in data]
1117
851
  else:
1118
852
  return data
1119
-
1120
-
1121
- def dict_to_tuple(d):
1122
- # Recursively convert dicts to sorted tuples for hashability
1123
- if isinstance(d, dict):
1124
- return tuple(sorted((k, dict_to_tuple(v)) for k, v in d.items()))
1125
- elif isinstance(d, list):
1126
- return tuple(dict_to_tuple(i) for i in d)
1127
- else:
1128
- return d
@@ -235,6 +235,7 @@ def initialization(interactive: bool = True):
235
235
 
236
236
  chat_model_options = {
237
237
  "name": chat_model,
238
+ "friendly_name": chat_model,
238
239
  "model_type": model_type,
239
240
  "max_prompt_size": default_max_tokens,
240
241
  "vision_enabled": vision_enabled,
@@ -275,6 +276,7 @@ def initialization(interactive: bool = True):
275
276
  if not existing_models.filter(name=model_name).exists():
276
277
  ChatModel.objects.create(
277
278
  name=model_name,
279
+ friendly_name=model_name,
278
280
  model_type=ChatModel.ModelType.OPENAI,
279
281
  max_prompt_size=model_to_prompt_size.get(model_name),
280
282
  vision_enabled=model_name in default_openai_chat_models,
khoj/utils/rawconfig.py CHANGED
@@ -176,7 +176,6 @@ class Entry:
176
176
  compiled: str
177
177
  heading: Optional[str]
178
178
  file: Optional[str]
179
- uri: Optional[str] = None
180
179
  corpus_id: str
181
180
 
182
181
  def __init__(
@@ -185,7 +184,6 @@ class Entry:
185
184
  compiled: str = None,
186
185
  heading: Optional[str] = None,
187
186
  file: Optional[str] = None,
188
- uri: Optional[str] = None,
189
187
  corpus_id: uuid.UUID = None,
190
188
  ):
191
189
  self.raw = raw
@@ -193,14 +191,6 @@ class Entry:
193
191
  self.heading = heading
194
192
  self.file = file
195
193
  self.corpus_id = str(corpus_id)
196
- if uri:
197
- self.uri = uri
198
- elif file and (file.startswith("http") or file.startswith("file://")):
199
- self.uri = file
200
- elif file:
201
- self.uri = f"file://{file}"
202
- else:
203
- self.uri = None
204
194
 
205
195
  def to_json(self) -> str:
206
196
  return json.dumps(self.__dict__, ensure_ascii=False)
@@ -216,5 +206,4 @@ class Entry:
216
206
  file=dictionary.get("file", None),
217
207
  heading=dictionary.get("heading", None),
218
208
  corpus_id=dictionary.get("corpus_id", None),
219
- uri=dictionary.get("uri", None),
220
209
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: khoj
3
- Version: 1.42.9.dev26
3
+ Version: 1.42.10.dev2
4
4
  Summary: Your Second Brain
5
5
  Project-URL: Homepage, https://khoj.dev
6
6
  Project-URL: Documentation, https://docs.khoj.dev