khoj 1.28.4.dev23__py3-none-any.whl → 1.28.4.dev77__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. khoj/configure.py +4 -6
  2. khoj/database/adapters/__init__.py +124 -34
  3. khoj/database/models/__init__.py +4 -0
  4. khoj/interface/compiled/404/index.html +1 -1
  5. khoj/interface/compiled/_next/static/chunks/1603-2418b11d8e8dacb9.js +1 -0
  6. khoj/interface/compiled/_next/static/chunks/1970-c78f6acc8e16e30b.js +1 -0
  7. khoj/interface/compiled/_next/static/chunks/3124-a4cea2eda163128d.js +1 -0
  8. khoj/interface/compiled/_next/static/chunks/5538-5c4f2271e9377b74.js +1 -0
  9. khoj/interface/compiled/_next/static/chunks/8423-db6dad6d44869097.js +1 -0
  10. khoj/interface/compiled/_next/static/chunks/9417-7a8a6da918d37750.js +1 -0
  11. khoj/interface/compiled/_next/static/chunks/app/agents/{page-36da67f03a173e52.js → page-4353b1a532795ad1.js} +1 -1
  12. khoj/interface/compiled/_next/static/chunks/app/automations/{page-774ae3e033f938cd.js → page-c9f13c865e739607.js} +1 -1
  13. khoj/interface/compiled/_next/static/chunks/app/chat/page-97876b3bd3c5e69d.js +1 -0
  14. khoj/interface/compiled/_next/static/chunks/app/{page-322c37514a3a613a.js → page-c33ebe19a3b7b0b2.js} +1 -1
  15. khoj/interface/compiled/_next/static/chunks/app/search/{page-9b64f61caa5bd7f9.js → page-8e28deacb61f75aa.js} +1 -1
  16. khoj/interface/compiled/_next/static/chunks/app/settings/page-2fab613a557d3cc5.js +1 -0
  17. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-3ee3da7e8dfe3572.js +1 -0
  18. khoj/interface/compiled/_next/static/chunks/{webpack-c9799fdebf88abb6.js → webpack-ff5eae43b8dba1d2.js} +1 -1
  19. khoj/interface/compiled/_next/static/css/23f801d22927d568.css +1 -0
  20. khoj/interface/compiled/_next/static/css/592ca99f5122e75a.css +1 -0
  21. khoj/interface/compiled/_next/static/css/af0f36f71f368260.css +25 -0
  22. khoj/interface/compiled/agents/index.html +1 -1
  23. khoj/interface/compiled/agents/index.txt +2 -2
  24. khoj/interface/compiled/automations/index.html +1 -1
  25. khoj/interface/compiled/automations/index.txt +2 -2
  26. khoj/interface/compiled/chat/index.html +1 -1
  27. khoj/interface/compiled/chat/index.txt +2 -2
  28. khoj/interface/compiled/index.html +1 -1
  29. khoj/interface/compiled/index.txt +3 -3
  30. khoj/interface/compiled/search/index.html +1 -1
  31. khoj/interface/compiled/search/index.txt +2 -2
  32. khoj/interface/compiled/settings/index.html +1 -1
  33. khoj/interface/compiled/settings/index.txt +2 -2
  34. khoj/interface/compiled/share/chat/index.html +1 -1
  35. khoj/interface/compiled/share/chat/index.txt +3 -3
  36. khoj/processor/content/docx/docx_to_entries.py +27 -21
  37. khoj/processor/content/github/github_to_entries.py +2 -2
  38. khoj/processor/content/images/image_to_entries.py +2 -2
  39. khoj/processor/content/markdown/markdown_to_entries.py +2 -2
  40. khoj/processor/content/notion/notion_to_entries.py +2 -2
  41. khoj/processor/content/org_mode/org_to_entries.py +2 -2
  42. khoj/processor/content/pdf/pdf_to_entries.py +37 -29
  43. khoj/processor/content/plaintext/plaintext_to_entries.py +2 -2
  44. khoj/processor/content/text_to_entries.py +2 -2
  45. khoj/processor/conversation/anthropic/anthropic_chat.py +7 -1
  46. khoj/processor/conversation/google/gemini_chat.py +15 -2
  47. khoj/processor/conversation/offline/chat_model.py +4 -0
  48. khoj/processor/conversation/openai/gpt.py +6 -1
  49. khoj/processor/conversation/prompts.py +48 -4
  50. khoj/processor/conversation/utils.py +69 -11
  51. khoj/processor/image/generate.py +2 -0
  52. khoj/processor/tools/online_search.py +19 -3
  53. khoj/processor/tools/run_code.py +4 -0
  54. khoj/routers/api.py +6 -1
  55. khoj/routers/api_agents.py +8 -10
  56. khoj/routers/api_chat.py +64 -13
  57. khoj/routers/api_content.py +80 -8
  58. khoj/routers/helpers.py +105 -34
  59. khoj/routers/notion.py +1 -1
  60. khoj/routers/research.py +9 -2
  61. khoj/search_type/text_search.py +1 -1
  62. khoj/utils/fs_syncer.py +2 -1
  63. khoj/utils/rawconfig.py +32 -0
  64. {khoj-1.28.4.dev23.dist-info → khoj-1.28.4.dev77.dist-info}/METADATA +1 -1
  65. {khoj-1.28.4.dev23.dist-info → khoj-1.28.4.dev77.dist-info}/RECORD +70 -70
  66. khoj/interface/compiled/_next/static/chunks/1603-c1568f45947e9f2c.js +0 -1
  67. khoj/interface/compiled/_next/static/chunks/1970-d44050bf658ae5cc.js +0 -1
  68. khoj/interface/compiled/_next/static/chunks/5538-bf582517a8dd3faa.js +0 -1
  69. khoj/interface/compiled/_next/static/chunks/8423-a1f432e4a8d9a6b0.js +0 -1
  70. khoj/interface/compiled/_next/static/chunks/8840-b8d7b9f0923c6651.js +0 -1
  71. khoj/interface/compiled/_next/static/chunks/9417-0d0fc7eb49a86abb.js +0 -1
  72. khoj/interface/compiled/_next/static/chunks/app/chat/page-a369e2bda9897794.js +0 -1
  73. khoj/interface/compiled/_next/static/chunks/app/settings/page-10b288c103f19468.js +0 -1
  74. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-959d5f097cf38c93.js +0 -1
  75. khoj/interface/compiled/_next/static/css/4cae6c0e5c72fb2d.css +0 -1
  76. khoj/interface/compiled/_next/static/css/9d45de78fba367c1.css +0 -1
  77. khoj/interface/compiled/_next/static/css/d2bc549245313f26.css +0 -25
  78. /khoj/interface/compiled/_next/static/{s_mKS5kELaw2v4a7_yWNP → sE94pAZEifEKkz4WQtTNW}/_buildManifest.js +0 -0
  79. /khoj/interface/compiled/_next/static/{s_mKS5kELaw2v4a7_yWNP → sE94pAZEifEKkz4WQtTNW}/_ssgManifest.js +0 -0
  80. {khoj-1.28.4.dev23.dist-info → khoj-1.28.4.dev77.dist-info}/WHEEL +0 -0
  81. {khoj-1.28.4.dev23.dist-info → khoj-1.28.4.dev77.dist-info}/entry_points.txt +0 -0
  82. {khoj-1.28.4.dev23.dist-info → khoj-1.28.4.dev77.dist-info}/licenses/LICENSE +0 -0
@@ -1 +1 @@
1
- <!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" href="/_next/static/media/5455839c73f146e7-s.p.woff2" as="font" crossorigin="" type="font/woff2"/><link rel="stylesheet" href="/_next/static/css/0e9d53dcd7f11342.css" data-precedence="next"/><link rel="stylesheet" href="/_next/static/css/d2bc549245313f26.css" data-precedence="next"/><link rel="stylesheet" href="/_next/static/css/3cf13271869a4aeb.css" data-precedence="next"/><link rel="stylesheet" href="/_next/static/css/9d45de78fba367c1.css" data-precedence="next"/><link rel="stylesheet" href="/_next/static/css/1f293605f2871853.css" data-precedence="next"/><link rel="preload" as="script" fetchPriority="low" href="/_next/static/chunks/webpack-c9799fdebf88abb6.js"/><script src="/_next/static/chunks/fd9d1056-2b978342deb60015.js" async=""></script><script src="/_next/static/chunks/7023-a5bf5744d19b3bd3.js" async=""></script><script src="/_next/static/chunks/main-app-6d6ee3495efe03d4.js" async=""></script><script src="/_next/static/chunks/d3ac728e-a9e3522eef9b6b28.js" async=""></script><script src="/_next/static/chunks/9001-3b27af6d5f21df44.js" async=""></script><script src="/_next/static/chunks/3062-71ed4b46ac2bb87c.js" async=""></script><script src="/_next/static/chunks/8840-b8d7b9f0923c6651.js" async=""></script><script src="/_next/static/chunks/3803-d74118a2d0182c52.js" async=""></script><script src="/_next/static/chunks/2261-748f7c327df3c8c1.js" async=""></script><script src="/_next/static/chunks/9434-9bbae0af6e92854c.js" async=""></script><script src="/_next/static/chunks/1603-c1568f45947e9f2c.js" async=""></script><script src="/_next/static/chunks/9417-0d0fc7eb49a86abb.js" async=""></script><script src="/_next/static/chunks/8423-a1f432e4a8d9a6b0.js" async=""></script><script src="/_next/static/chunks/5538-bf582517a8dd3faa.js" async=""></script><script src="/_next/static/chunks/app/share/chat/page-959d5f097cf38c93.js" async=""></script><meta http-equiv="Content-Security-Policy" content="default-src &#x27;self&#x27; https://assets.khoj.dev; media-src * blob:; script-src &#x27;self&#x27; https://assets.khoj.dev &#x27;unsafe-inline&#x27; &#x27;unsafe-eval&#x27;; connect-src &#x27;self&#x27; blob: https://ipapi.co/json ws://localhost:42110; style-src &#x27;self&#x27; https://assets.khoj.dev &#x27;unsafe-inline&#x27; https://fonts.googleapis.com; img-src &#x27;self&#x27; data: blob: https://*.khoj.dev https://*.googleusercontent.com https://*.google.com/ https://*.gstatic.com; font-src &#x27;self&#x27; https://assets.khoj.dev https://fonts.gstatic.com; child-src &#x27;none&#x27;; object-src &#x27;none&#x27;;"/><meta http-equiv="Content-Security-Policy" content="default-src &#x27;self&#x27; https://assets.khoj.dev; script-src &#x27;self&#x27; https://assets.khoj.dev &#x27;unsafe-inline&#x27; &#x27;unsafe-eval&#x27;; connect-src &#x27;self&#x27; blob: https://ipapi.co/json ws://localhost:42110; style-src &#x27;self&#x27; https://assets.khoj.dev &#x27;unsafe-inline&#x27; https://fonts.googleapis.com; img-src &#x27;self&#x27; data: blob: https://*.khoj.dev https://*.googleusercontent.com https://*.google.com/ https://*.gstatic.com; font-src &#x27;self&#x27; https://assets.khoj.dev https://fonts.gstatic.com; child-src &#x27;none&#x27;; object-src &#x27;none&#x27;;"/><title>Khoj AI - Chat</title><meta name="description" content="Use this page to view a chat with Khoj AI."/><link rel="manifest" href="/static/khoj.webmanifest" crossorigin="use-credentials"/><meta property="og:title" content="Khoj AI - Home"/><meta property="og:description" content="Your Second Brain."/><meta property="og:url" content="https://app.khoj.dev/"/><meta property="og:site_name" content="Khoj AI"/><meta property="og:image" content="https://assets.khoj.dev/khoj_lantern_256x256.png"/><meta property="og:image:width" content="256"/><meta property="og:image:height" content="256"/><meta property="og:image" content="https://assets.khoj.dev/khoj_lantern_logomarktype_1200x630.png"/><meta property="og:image:width" content="1200"/><meta property="og:image:height" content="630"/><meta property="og:type" content="website"/><meta name="twitter:card" content="summary_large_image"/><meta name="twitter:title" content="Khoj AI - Home"/><meta name="twitter:description" content="Your Second Brain."/><meta name="twitter:image" content="https://assets.khoj.dev/khoj_lantern_256x256.png"/><meta name="twitter:image:width" content="256"/><meta name="twitter:image:height" content="256"/><meta name="twitter:image" content="https://assets.khoj.dev/khoj_lantern_logomarktype_1200x630.png"/><meta name="twitter:image:width" content="1200"/><meta name="twitter:image:height" content="630"/><link rel="icon" href="/static/assets/icons/khoj_lantern.ico"/><link rel="apple-touch-icon" href="/static/assets/icons/khoj_lantern_256x256.png"/><meta name="next-size-adjust"/><script src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js" noModule=""></script></head><body class="__className_af6c42"><html lang="en"><body class="__className_af6c42"><div class="bg-background opacity-50 flex items-center justify-center h-screen"><div>Loading<!-- --> <span><svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" fill="currentColor" viewBox="0 0 256 256" class="inline animate-spin h-5 w-5"><path d="M232,128a104,104,0,0,1-208,0c0-41,23.81-78.36,60.66-95.27a8,8,0,0,1,6.68,14.54C60.15,61.59,40,93.27,40,128a88,88,0,0,0,176,0c0-34.73-20.15-66.41-51.34-80.73a8,8,0,0,1,6.68-14.54C208.19,49.64,232,87,232,128Z"></path></svg></span></div></div><script>window.EXCALIDRAW_ASSET_PATH = 'https://assets.khoj.dev/@excalidraw/excalidraw/dist/';</script><script src="/_next/static/chunks/webpack-c9799fdebf88abb6.js" async=""></script></body></html><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/_next/static/media/5455839c73f146e7-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/_next/static/css/0e9d53dcd7f11342.css\",\"style\"]\n3:HL[\"/_next/static/css/d2bc549245313f26.css\",\"style\"]\n4:HL[\"/_next/static/css/3cf13271869a4aeb.css\",\"style\"]\n5:HL[\"/_next/static/css/9d45de78fba367c1.css\",\"style\"]\n6:HL[\"/_next/static/css/1f293605f2871853.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"7:I[95751,[],\"\"]\n9:I[66513,[],\"ClientPageRoot\"]\na:I[5506,[\"3954\",\"static/chunks/d3ac728e-a9e3522eef9b6b28.js\",\"9001\",\"static/chunks/9001-3b27af6d5f21df44.js\",\"3062\",\"static/chunks/3062-71ed4b46ac2bb87c.js\",\"8840\",\"static/chunks/8840-b8d7b9f0923c6651.js\",\"3803\",\"static/chunks/3803-d74118a2d0182c52.js\",\"2261\",\"static/chunks/2261-748f7c327df3c8c1.js\",\"9434\",\"static/chunks/9434-9bbae0af6e92854c.js\",\"1603\",\"static/chunks/1603-c1568f45947e9f2c.js\",\"9417\",\"static/chunks/9417-0d0fc7eb49a86abb.js\",\"8423\",\"static/chunks/8423-a1f432e4a8d9a6b0.js\",\"5538\",\"static/chunks/5538-bf582517a8dd3faa.js\",\"3111\",\"static/chunks/app/share/chat/page-959d5f097cf38c93.js\"],\"default\",1]\nb:I[39275,[],\"\"]\nc:I[61343,[],\"\"]\ne:I[76130,[],\"\"]\nf:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L7\",null,{\"buildId\":\"s_mKS5kELaw2v4a7_yWNP\",\"assetPrefix\":\"\",\"urlParts\":[\"\",\"share\",\"chat\",\"\"],\"initialTree\":[\"\",{\"children\":[\"share\",{\"children\":[\"chat\",{\"children\":[\"__PAGE__\",{}]}]}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"share\",{\"children\":[\"chat\",{\"children\":[\"__PAGE__\",{},[[\"$L8\",[\"$\",\"$L9\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$a\"}],[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/_next/static/css/3cf13271869a4aeb.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/_next/static/css/9d45de78fba367c1.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"2\",{\"rel\":\"stylesheet\",\"href\":\"/_next/static/css/1f293605f2871853.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]]],null],null]},[[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[[\"$\",\"meta\",null,{\"httpEquiv\":\"Content-Security-Policy\",\"content\":\"default-src 'self' https://assets.khoj.dev; script-src 'self' https://assets.khoj.dev 'unsafe-inline' 'unsafe-eval'; connect-src 'self' blob: https://ipapi.co/json ws://localhost:42110; style-src 'self' https://assets.khoj.dev 'unsafe-inline' https://fonts.googleapis.com; img-src 'self' data: blob: https://*.khoj.dev https://*.googleusercontent.com https://*.google.com/ https://*.gstatic.com; font-src 'self' https://assets.khoj.dev https://fonts.gstatic.com; child-src 'none'; object-src 'none';\"}],[\"$\",\"body\",null,{\"className\":\"__className_af6c42\",\"children\":[[\"$\",\"$Lb\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\",\"share\",\"children\",\"chat\",\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$Lc\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":\"$undefined\",\"notFoundStyles\":\"$undefined\"}],[\"$\",\"script\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"window.EXCALIDRAW_ASSET_PATH = 'https://assets.khoj.dev/@excalidraw/excalidraw/dist/';\"}}]]}]]}]],null],null]},[null,[\"$\",\"$Lb\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\",\"share\",\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$Lc\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":\"$undefined\",\"notFoundStyles\":\"$undefined\"}]],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/_next/static/css/0e9d53dcd7f11342.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/_next/static/css/d2bc549245313f26.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[[\"$\",\"meta\",null,{\"httpEquiv\":\"Content-Security-Policy\",\"content\":\"default-src 'self' https://assets.khoj.dev; media-src * blob:; script-src 'self' https://assets.khoj.dev 'unsafe-inline' 'unsafe-eval'; connect-src 'self' blob: https://ipapi.co/json ws://localhost:42110; style-src 'self' https://assets.khoj.dev 'unsafe-inline' https://fonts.googleapis.com; img-src 'self' data: blob: https://*.khoj.dev https://*.googleusercontent.com https://*.google.com/ https://*.gstatic.com; font-src 'self' https://assets.khoj.dev https://fonts.gstatic.com; child-src 'none'; object-src 'none';\"}],[\"$\",\"body\",null,{\"className\":\"__className_af6c42\",\"children\":[\"$\",\"$Lb\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$Lc\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$Ld\"],\"globalErrorComponent\":\"$e\",\"missingSlots\":\"$Wf\"}]\n"])</script><script>self.__next_f.push([1,"d:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"Khoj AI - Chat\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"Use this page to view a chat with Khoj AI.\"}],[\"$\",\"link\",\"4\",{\"rel\":\"manifest\",\"href\":\"/static/khoj.webmanifest\",\"crossOrigin\":\"use-credentials\"}],[\"$\",\"meta\",\"5\",{\"property\":\"og:title\",\"content\":\"Khoj AI - Home\"}],[\"$\",\"meta\",\"6\",{\"property\":\"og:description\",\"content\":\"Your Second Brain.\"}],[\"$\",\"meta\",\"7\",{\"property\":\"og:url\",\"content\":\"https://app.khoj.dev/\"}],[\"$\",\"meta\",\"8\",{\"property\":\"og:site_name\",\"content\":\"Khoj AI\"}],[\"$\",\"meta\",\"9\",{\"property\":\"og:image\",\"content\":\"https://assets.khoj.dev/khoj_lantern_256x256.png\"}],[\"$\",\"meta\",\"10\",{\"property\":\"og:image:width\",\"content\":\"256\"}],[\"$\",\"meta\",\"11\",{\"property\":\"og:image:height\",\"content\":\"256\"}],[\"$\",\"meta\",\"12\",{\"property\":\"og:image\",\"content\":\"https://assets.khoj.dev/khoj_lantern_logomarktype_1200x630.png\"}],[\"$\",\"meta\",\"13\",{\"property\":\"og:image:width\",\"content\":\"1200\"}],[\"$\",\"meta\",\"14\",{\"property\":\"og:image:height\",\"content\":\"630\"}],[\"$\",\"meta\",\"15\",{\"property\":\"og:type\",\"content\":\"website\"}],[\"$\",\"meta\",\"16\",{\"name\":\"twitter:card\",\"content\":\"summary_large_image\"}],[\"$\",\"meta\",\"17\",{\"name\":\"twitter:title\",\"content\":\"Khoj AI - Home\"}],[\"$\",\"meta\",\"18\",{\"name\":\"twitter:description\",\"content\":\"Your Second Brain.\"}],[\"$\",\"meta\",\"19\",{\"name\":\"twitter:image\",\"content\":\"https://assets.khoj.dev/khoj_lantern_256x256.png\"}],[\"$\",\"meta\",\"20\",{\"name\":\"twitter:image:width\",\"content\":\"256\"}],[\"$\",\"meta\",\"21\",{\"name\":\"twitter:image:height\",\"content\":\"256\"}],[\"$\",\"meta\",\"22\",{\"name\":\"twitter:image\",\"content\":\"https://assets.khoj.dev/khoj_lantern_logomarktype_1200x630.png\"}],[\"$\",\"meta\",\"23\",{\"name\":\"twitter:image:width\",\"content\":\"1200\"}],[\"$\",\"meta\",\"24\",{\"name\":\"twitter:image:height\",\"content\":\"630\"}],[\"$\",\"link\",\"25\",{\"rel\":\"icon\",\"href\":\"/static/assets/icons/khoj_lantern.ico\"}],[\"$\",\"link\",\"26\",{\"rel\":\"apple-touch-icon\",\"href\":\"/static/assets/icons/khoj_lantern_256x256.png\"}],[\"$\",\"meta\",\"27\",{\"name\":\"next-size-adjust\"}]]\n"])</script><script>self.__next_f.push([1,"8:null\n"])</script></body></html>
1
+ <!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" href="/_next/static/media/5455839c73f146e7-s.p.woff2" as="font" crossorigin="" type="font/woff2"/><link rel="stylesheet" href="/_next/static/css/0e9d53dcd7f11342.css" data-precedence="next"/><link rel="stylesheet" href="/_next/static/css/af0f36f71f368260.css" data-precedence="next"/><link rel="stylesheet" href="/_next/static/css/3cf13271869a4aeb.css" data-precedence="next"/><link rel="stylesheet" href="/_next/static/css/23f801d22927d568.css" data-precedence="next"/><link rel="stylesheet" href="/_next/static/css/1f293605f2871853.css" data-precedence="next"/><link rel="preload" as="script" fetchPriority="low" href="/_next/static/chunks/webpack-ff5eae43b8dba1d2.js"/><script src="/_next/static/chunks/fd9d1056-2b978342deb60015.js" async=""></script><script src="/_next/static/chunks/7023-a5bf5744d19b3bd3.js" async=""></script><script src="/_next/static/chunks/main-app-6d6ee3495efe03d4.js" async=""></script><script src="/_next/static/chunks/d3ac728e-a9e3522eef9b6b28.js" async=""></script><script src="/_next/static/chunks/9001-3b27af6d5f21df44.js" async=""></script><script src="/_next/static/chunks/3062-71ed4b46ac2bb87c.js" async=""></script><script src="/_next/static/chunks/3124-a4cea2eda163128d.js" async=""></script><script src="/_next/static/chunks/3803-d74118a2d0182c52.js" async=""></script><script src="/_next/static/chunks/2261-748f7c327df3c8c1.js" async=""></script><script src="/_next/static/chunks/9434-9bbae0af6e92854c.js" async=""></script><script src="/_next/static/chunks/1603-2418b11d8e8dacb9.js" async=""></script><script src="/_next/static/chunks/9417-7a8a6da918d37750.js" async=""></script><script src="/_next/static/chunks/8423-db6dad6d44869097.js" async=""></script><script src="/_next/static/chunks/5538-5c4f2271e9377b74.js" async=""></script><script src="/_next/static/chunks/app/share/chat/page-3ee3da7e8dfe3572.js" async=""></script><meta http-equiv="Content-Security-Policy" content="default-src &#x27;self&#x27; https://assets.khoj.dev; media-src * blob:; script-src &#x27;self&#x27; https://assets.khoj.dev &#x27;unsafe-inline&#x27; &#x27;unsafe-eval&#x27;; connect-src &#x27;self&#x27; blob: https://ipapi.co/json ws://localhost:42110; style-src &#x27;self&#x27; https://assets.khoj.dev &#x27;unsafe-inline&#x27; https://fonts.googleapis.com; img-src &#x27;self&#x27; data: blob: https://*.khoj.dev https://*.googleusercontent.com https://*.google.com/ https://*.gstatic.com; font-src &#x27;self&#x27; https://assets.khoj.dev https://fonts.gstatic.com; child-src &#x27;none&#x27;; object-src &#x27;none&#x27;;"/><meta http-equiv="Content-Security-Policy" content="default-src &#x27;self&#x27; https://assets.khoj.dev; script-src &#x27;self&#x27; https://assets.khoj.dev &#x27;unsafe-inline&#x27; &#x27;unsafe-eval&#x27;; connect-src &#x27;self&#x27; blob: https://ipapi.co/json ws://localhost:42110; style-src &#x27;self&#x27; https://assets.khoj.dev &#x27;unsafe-inline&#x27; https://fonts.googleapis.com; img-src &#x27;self&#x27; data: blob: https://*.khoj.dev https://*.googleusercontent.com https://*.google.com/ https://*.gstatic.com; font-src &#x27;self&#x27; https://assets.khoj.dev https://fonts.gstatic.com; child-src &#x27;none&#x27;; object-src &#x27;none&#x27;;"/><title>Khoj AI - Chat</title><meta name="description" content="Use this page to view a chat with Khoj AI."/><link rel="manifest" href="/static/khoj.webmanifest" crossorigin="use-credentials"/><meta property="og:title" content="Khoj AI"/><meta property="og:description" content="Your Second Brain."/><meta property="og:url" content="https://app.khoj.dev/"/><meta property="og:site_name" content="Khoj AI"/><meta property="og:image" content="https://assets.khoj.dev/khoj_lantern_256x256.png"/><meta property="og:image:width" content="256"/><meta property="og:image:height" content="256"/><meta property="og:image" content="https://assets.khoj.dev/khoj_lantern_logomarktype_1200x630.png"/><meta property="og:image:width" content="1200"/><meta property="og:image:height" content="630"/><meta property="og:type" content="website"/><meta name="twitter:card" content="summary_large_image"/><meta name="twitter:title" content="Khoj AI"/><meta name="twitter:description" content="Your Second Brain."/><meta name="twitter:image" content="https://assets.khoj.dev/khoj_lantern_256x256.png"/><meta name="twitter:image:width" content="256"/><meta name="twitter:image:height" content="256"/><meta name="twitter:image" content="https://assets.khoj.dev/khoj_lantern_logomarktype_1200x630.png"/><meta name="twitter:image:width" content="1200"/><meta name="twitter:image:height" content="630"/><link rel="icon" href="/static/assets/icons/khoj_lantern.ico"/><link rel="apple-touch-icon" href="/static/assets/icons/khoj_lantern_256x256.png"/><meta name="next-size-adjust"/><script src="/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js" noModule=""></script></head><body class="__className_af6c42"><html lang="en"><body class="__className_af6c42"><div class="bg-background opacity-50 flex items-center justify-center h-screen"><div>Loading<!-- --> <span><svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" fill="currentColor" viewBox="0 0 256 256" class="inline animate-spin h-5 w-5"><path d="M232,128a104,104,0,0,1-208,0c0-41,23.81-78.36,60.66-95.27a8,8,0,0,1,6.68,14.54C60.15,61.59,40,93.27,40,128a88,88,0,0,0,176,0c0-34.73-20.15-66.41-51.34-80.73a8,8,0,0,1,6.68-14.54C208.19,49.64,232,87,232,128Z"></path></svg></span></div></div><script>window.EXCALIDRAW_ASSET_PATH = 'https://assets.khoj.dev/@excalidraw/excalidraw/dist/';</script><script src="/_next/static/chunks/webpack-ff5eae43b8dba1d2.js" async=""></script></body></html><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/_next/static/media/5455839c73f146e7-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/_next/static/css/0e9d53dcd7f11342.css\",\"style\"]\n3:HL[\"/_next/static/css/af0f36f71f368260.css\",\"style\"]\n4:HL[\"/_next/static/css/3cf13271869a4aeb.css\",\"style\"]\n5:HL[\"/_next/static/css/23f801d22927d568.css\",\"style\"]\n6:HL[\"/_next/static/css/1f293605f2871853.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"7:I[95751,[],\"\"]\n9:I[66513,[],\"ClientPageRoot\"]\na:I[5506,[\"3954\",\"static/chunks/d3ac728e-a9e3522eef9b6b28.js\",\"9001\",\"static/chunks/9001-3b27af6d5f21df44.js\",\"3062\",\"static/chunks/3062-71ed4b46ac2bb87c.js\",\"3124\",\"static/chunks/3124-a4cea2eda163128d.js\",\"3803\",\"static/chunks/3803-d74118a2d0182c52.js\",\"2261\",\"static/chunks/2261-748f7c327df3c8c1.js\",\"9434\",\"static/chunks/9434-9bbae0af6e92854c.js\",\"1603\",\"static/chunks/1603-2418b11d8e8dacb9.js\",\"9417\",\"static/chunks/9417-7a8a6da918d37750.js\",\"8423\",\"static/chunks/8423-db6dad6d44869097.js\",\"5538\",\"static/chunks/5538-5c4f2271e9377b74.js\",\"3111\",\"static/chunks/app/share/chat/page-3ee3da7e8dfe3572.js\"],\"default\",1]\nb:I[39275,[],\"\"]\nc:I[61343,[],\"\"]\ne:I[76130,[],\"\"]\nf:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L7\",null,{\"buildId\":\"sE94pAZEifEKkz4WQtTNW\",\"assetPrefix\":\"\",\"urlParts\":[\"\",\"share\",\"chat\",\"\"],\"initialTree\":[\"\",{\"children\":[\"share\",{\"children\":[\"chat\",{\"children\":[\"__PAGE__\",{}]}]}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"share\",{\"children\":[\"chat\",{\"children\":[\"__PAGE__\",{},[[\"$L8\",[\"$\",\"$L9\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$a\"}],[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/_next/static/css/3cf13271869a4aeb.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/_next/static/css/23f801d22927d568.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"2\",{\"rel\":\"stylesheet\",\"href\":\"/_next/static/css/1f293605f2871853.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]]],null],null]},[[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[[\"$\",\"meta\",null,{\"httpEquiv\":\"Content-Security-Policy\",\"content\":\"default-src 'self' https://assets.khoj.dev; script-src 'self' https://assets.khoj.dev 'unsafe-inline' 'unsafe-eval'; connect-src 'self' blob: https://ipapi.co/json ws://localhost:42110; style-src 'self' https://assets.khoj.dev 'unsafe-inline' https://fonts.googleapis.com; img-src 'self' data: blob: https://*.khoj.dev https://*.googleusercontent.com https://*.google.com/ https://*.gstatic.com; font-src 'self' https://assets.khoj.dev https://fonts.gstatic.com; child-src 'none'; object-src 'none';\"}],[\"$\",\"body\",null,{\"className\":\"__className_af6c42\",\"children\":[[\"$\",\"$Lb\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\",\"share\",\"children\",\"chat\",\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$Lc\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":\"$undefined\",\"notFoundStyles\":\"$undefined\"}],[\"$\",\"script\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"window.EXCALIDRAW_ASSET_PATH = 'https://assets.khoj.dev/@excalidraw/excalidraw/dist/';\"}}]]}]]}]],null],null]},[null,[\"$\",\"$Lb\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\",\"share\",\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$Lc\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":\"$undefined\",\"notFoundStyles\":\"$undefined\"}]],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/_next/static/css/0e9d53dcd7f11342.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/_next/static/css/af0f36f71f368260.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[[\"$\",\"meta\",null,{\"httpEquiv\":\"Content-Security-Policy\",\"content\":\"default-src 'self' https://assets.khoj.dev; media-src * blob:; script-src 'self' https://assets.khoj.dev 'unsafe-inline' 'unsafe-eval'; connect-src 'self' blob: https://ipapi.co/json ws://localhost:42110; style-src 'self' https://assets.khoj.dev 'unsafe-inline' https://fonts.googleapis.com; img-src 'self' data: blob: https://*.khoj.dev https://*.googleusercontent.com https://*.google.com/ https://*.gstatic.com; font-src 'self' https://assets.khoj.dev https://fonts.gstatic.com; child-src 'none'; object-src 'none';\"}],[\"$\",\"body\",null,{\"className\":\"__className_af6c42\",\"children\":[\"$\",\"$Lb\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$Lc\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$Ld\"],\"globalErrorComponent\":\"$e\",\"missingSlots\":\"$Wf\"}]\n"])</script><script>self.__next_f.push([1,"d:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"Khoj AI - Chat\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"Use this page to view a chat with Khoj AI.\"}],[\"$\",\"link\",\"4\",{\"rel\":\"manifest\",\"href\":\"/static/khoj.webmanifest\",\"crossOrigin\":\"use-credentials\"}],[\"$\",\"meta\",\"5\",{\"property\":\"og:title\",\"content\":\"Khoj AI\"}],[\"$\",\"meta\",\"6\",{\"property\":\"og:description\",\"content\":\"Your Second Brain.\"}],[\"$\",\"meta\",\"7\",{\"property\":\"og:url\",\"content\":\"https://app.khoj.dev/\"}],[\"$\",\"meta\",\"8\",{\"property\":\"og:site_name\",\"content\":\"Khoj AI\"}],[\"$\",\"meta\",\"9\",{\"property\":\"og:image\",\"content\":\"https://assets.khoj.dev/khoj_lantern_256x256.png\"}],[\"$\",\"meta\",\"10\",{\"property\":\"og:image:width\",\"content\":\"256\"}],[\"$\",\"meta\",\"11\",{\"property\":\"og:image:height\",\"content\":\"256\"}],[\"$\",\"meta\",\"12\",{\"property\":\"og:image\",\"content\":\"https://assets.khoj.dev/khoj_lantern_logomarktype_1200x630.png\"}],[\"$\",\"meta\",\"13\",{\"property\":\"og:image:width\",\"content\":\"1200\"}],[\"$\",\"meta\",\"14\",{\"property\":\"og:image:height\",\"content\":\"630\"}],[\"$\",\"meta\",\"15\",{\"property\":\"og:type\",\"content\":\"website\"}],[\"$\",\"meta\",\"16\",{\"name\":\"twitter:card\",\"content\":\"summary_large_image\"}],[\"$\",\"meta\",\"17\",{\"name\":\"twitter:title\",\"content\":\"Khoj AI\"}],[\"$\",\"meta\",\"18\",{\"name\":\"twitter:description\",\"content\":\"Your Second Brain.\"}],[\"$\",\"meta\",\"19\",{\"name\":\"twitter:image\",\"content\":\"https://assets.khoj.dev/khoj_lantern_256x256.png\"}],[\"$\",\"meta\",\"20\",{\"name\":\"twitter:image:width\",\"content\":\"256\"}],[\"$\",\"meta\",\"21\",{\"name\":\"twitter:image:height\",\"content\":\"256\"}],[\"$\",\"meta\",\"22\",{\"name\":\"twitter:image\",\"content\":\"https://assets.khoj.dev/khoj_lantern_logomarktype_1200x630.png\"}],[\"$\",\"meta\",\"23\",{\"name\":\"twitter:image:width\",\"content\":\"1200\"}],[\"$\",\"meta\",\"24\",{\"name\":\"twitter:image:height\",\"content\":\"630\"}],[\"$\",\"link\",\"25\",{\"rel\":\"icon\",\"href\":\"/static/assets/icons/khoj_lantern.ico\"}],[\"$\",\"link\",\"26\",{\"rel\":\"apple-touch-icon\",\"href\":\"/static/assets/icons/khoj_lantern_256x256.png\"}],[\"$\",\"meta\",\"27\",{\"name\":\"next-size-adjust\"}]]\n"])</script><script>self.__next_f.push([1,"8:null\n"])</script></body></html>
@@ -1,7 +1,7 @@
1
1
  2:I[66513,[],"ClientPageRoot"]
2
- 3:I[5506,["3954","static/chunks/d3ac728e-a9e3522eef9b6b28.js","9001","static/chunks/9001-3b27af6d5f21df44.js","3062","static/chunks/3062-71ed4b46ac2bb87c.js","8840","static/chunks/8840-b8d7b9f0923c6651.js","3803","static/chunks/3803-d74118a2d0182c52.js","2261","static/chunks/2261-748f7c327df3c8c1.js","9434","static/chunks/9434-9bbae0af6e92854c.js","1603","static/chunks/1603-c1568f45947e9f2c.js","9417","static/chunks/9417-0d0fc7eb49a86abb.js","8423","static/chunks/8423-a1f432e4a8d9a6b0.js","5538","static/chunks/5538-bf582517a8dd3faa.js","3111","static/chunks/app/share/chat/page-959d5f097cf38c93.js"],"default",1]
2
+ 3:I[5506,["3954","static/chunks/d3ac728e-a9e3522eef9b6b28.js","9001","static/chunks/9001-3b27af6d5f21df44.js","3062","static/chunks/3062-71ed4b46ac2bb87c.js","3124","static/chunks/3124-a4cea2eda163128d.js","3803","static/chunks/3803-d74118a2d0182c52.js","2261","static/chunks/2261-748f7c327df3c8c1.js","9434","static/chunks/9434-9bbae0af6e92854c.js","1603","static/chunks/1603-2418b11d8e8dacb9.js","9417","static/chunks/9417-7a8a6da918d37750.js","8423","static/chunks/8423-db6dad6d44869097.js","5538","static/chunks/5538-5c4f2271e9377b74.js","3111","static/chunks/app/share/chat/page-3ee3da7e8dfe3572.js"],"default",1]
3
3
  4:I[39275,[],""]
4
4
  5:I[61343,[],""]
5
- 0:["s_mKS5kELaw2v4a7_yWNP",[[["",{"children":["share",{"children":["chat",{"children":["__PAGE__",{}]}]}]},"$undefined","$undefined",true],["",{"children":["share",{"children":["chat",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],[["$","link","0",{"rel":"stylesheet","href":"/_next/static/css/3cf13271869a4aeb.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/_next/static/css/9d45de78fba367c1.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","2",{"rel":"stylesheet","href":"/_next/static/css/1f293605f2871853.css","precedence":"next","crossOrigin":"$undefined"}]]],null],null]},[[null,["$","html",null,{"lang":"en","children":[["$","meta",null,{"httpEquiv":"Content-Security-Policy","content":"default-src 'self' https://assets.khoj.dev; script-src 'self' https://assets.khoj.dev 'unsafe-inline' 'unsafe-eval'; connect-src 'self' blob: https://ipapi.co/json ws://localhost:42110; style-src 'self' https://assets.khoj.dev 'unsafe-inline' https://fonts.googleapis.com; img-src 'self' data: blob: https://*.khoj.dev https://*.googleusercontent.com https://*.google.com/ https://*.gstatic.com; font-src 'self' https://assets.khoj.dev https://fonts.gstatic.com; child-src 'none'; object-src 'none';"}],["$","body",null,{"className":"__className_af6c42","children":[["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","share","children","chat","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}],["$","script",null,{"dangerouslySetInnerHTML":{"__html":"window.EXCALIDRAW_ASSET_PATH = 'https://assets.khoj.dev/@excalidraw/excalidraw/dist/';"}}]]}]]}]],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","share","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/_next/static/css/0e9d53dcd7f11342.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/_next/static/css/d2bc549245313f26.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":[["$","meta",null,{"httpEquiv":"Content-Security-Policy","content":"default-src 'self' https://assets.khoj.dev; media-src * blob:; script-src 'self' https://assets.khoj.dev 'unsafe-inline' 'unsafe-eval'; connect-src 'self' blob: https://ipapi.co/json ws://localhost:42110; style-src 'self' https://assets.khoj.dev 'unsafe-inline' https://fonts.googleapis.com; img-src 'self' data: blob: https://*.khoj.dev https://*.googleusercontent.com https://*.google.com/ https://*.gstatic.com; font-src 'self' https://assets.khoj.dev https://fonts.gstatic.com; child-src 'none'; object-src 'none';"}],["$","body",null,{"className":"__className_af6c42","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]]}]],null],null],["$L6",null]]]]
6
- 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"Khoj AI - Chat"}],["$","meta","3",{"name":"description","content":"Use this page to view a chat with Khoj AI."}],["$","link","4",{"rel":"manifest","href":"/static/khoj.webmanifest","crossOrigin":"use-credentials"}],["$","meta","5",{"property":"og:title","content":"Khoj AI - Home"}],["$","meta","6",{"property":"og:description","content":"Your Second Brain."}],["$","meta","7",{"property":"og:url","content":"https://app.khoj.dev/"}],["$","meta","8",{"property":"og:site_name","content":"Khoj AI"}],["$","meta","9",{"property":"og:image","content":"https://assets.khoj.dev/khoj_lantern_256x256.png"}],["$","meta","10",{"property":"og:image:width","content":"256"}],["$","meta","11",{"property":"og:image:height","content":"256"}],["$","meta","12",{"property":"og:image","content":"https://assets.khoj.dev/khoj_lantern_logomarktype_1200x630.png"}],["$","meta","13",{"property":"og:image:width","content":"1200"}],["$","meta","14",{"property":"og:image:height","content":"630"}],["$","meta","15",{"property":"og:type","content":"website"}],["$","meta","16",{"name":"twitter:card","content":"summary_large_image"}],["$","meta","17",{"name":"twitter:title","content":"Khoj AI - Home"}],["$","meta","18",{"name":"twitter:description","content":"Your Second Brain."}],["$","meta","19",{"name":"twitter:image","content":"https://assets.khoj.dev/khoj_lantern_256x256.png"}],["$","meta","20",{"name":"twitter:image:width","content":"256"}],["$","meta","21",{"name":"twitter:image:height","content":"256"}],["$","meta","22",{"name":"twitter:image","content":"https://assets.khoj.dev/khoj_lantern_logomarktype_1200x630.png"}],["$","meta","23",{"name":"twitter:image:width","content":"1200"}],["$","meta","24",{"name":"twitter:image:height","content":"630"}],["$","link","25",{"rel":"icon","href":"/static/assets/icons/khoj_lantern.ico"}],["$","link","26",{"rel":"apple-touch-icon","href":"/static/assets/icons/khoj_lantern_256x256.png"}],["$","meta","27",{"name":"next-size-adjust"}]]
5
+ 0:["sE94pAZEifEKkz4WQtTNW",[[["",{"children":["share",{"children":["chat",{"children":["__PAGE__",{}]}]}]},"$undefined","$undefined",true],["",{"children":["share",{"children":["chat",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],[["$","link","0",{"rel":"stylesheet","href":"/_next/static/css/3cf13271869a4aeb.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/_next/static/css/23f801d22927d568.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","2",{"rel":"stylesheet","href":"/_next/static/css/1f293605f2871853.css","precedence":"next","crossOrigin":"$undefined"}]]],null],null]},[[null,["$","html",null,{"lang":"en","children":[["$","meta",null,{"httpEquiv":"Content-Security-Policy","content":"default-src 'self' https://assets.khoj.dev; script-src 'self' https://assets.khoj.dev 'unsafe-inline' 'unsafe-eval'; connect-src 'self' blob: https://ipapi.co/json ws://localhost:42110; style-src 'self' https://assets.khoj.dev 'unsafe-inline' https://fonts.googleapis.com; img-src 'self' data: blob: https://*.khoj.dev https://*.googleusercontent.com https://*.google.com/ https://*.gstatic.com; font-src 'self' https://assets.khoj.dev https://fonts.gstatic.com; child-src 'none'; object-src 'none';"}],["$","body",null,{"className":"__className_af6c42","children":[["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","share","children","chat","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}],["$","script",null,{"dangerouslySetInnerHTML":{"__html":"window.EXCALIDRAW_ASSET_PATH = 'https://assets.khoj.dev/@excalidraw/excalidraw/dist/';"}}]]}]]}]],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","share","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/_next/static/css/0e9d53dcd7f11342.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/_next/static/css/af0f36f71f368260.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":[["$","meta",null,{"httpEquiv":"Content-Security-Policy","content":"default-src 'self' https://assets.khoj.dev; media-src * blob:; script-src 'self' https://assets.khoj.dev 'unsafe-inline' 'unsafe-eval'; connect-src 'self' blob: https://ipapi.co/json ws://localhost:42110; style-src 'self' https://assets.khoj.dev 'unsafe-inline' https://fonts.googleapis.com; img-src 'self' data: blob: https://*.khoj.dev https://*.googleusercontent.com https://*.google.com/ https://*.gstatic.com; font-src 'self' https://assets.khoj.dev https://fonts.gstatic.com; child-src 'none'; object-src 'none';"}],["$","body",null,{"className":"__className_af6c42","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]]}]],null],null],["$L6",null]]]]
6
+ 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"Khoj AI - Chat"}],["$","meta","3",{"name":"description","content":"Use this page to view a chat with Khoj AI."}],["$","link","4",{"rel":"manifest","href":"/static/khoj.webmanifest","crossOrigin":"use-credentials"}],["$","meta","5",{"property":"og:title","content":"Khoj AI"}],["$","meta","6",{"property":"og:description","content":"Your Second Brain."}],["$","meta","7",{"property":"og:url","content":"https://app.khoj.dev/"}],["$","meta","8",{"property":"og:site_name","content":"Khoj AI"}],["$","meta","9",{"property":"og:image","content":"https://assets.khoj.dev/khoj_lantern_256x256.png"}],["$","meta","10",{"property":"og:image:width","content":"256"}],["$","meta","11",{"property":"og:image:height","content":"256"}],["$","meta","12",{"property":"og:image","content":"https://assets.khoj.dev/khoj_lantern_logomarktype_1200x630.png"}],["$","meta","13",{"property":"og:image:width","content":"1200"}],["$","meta","14",{"property":"og:image:height","content":"630"}],["$","meta","15",{"property":"og:type","content":"website"}],["$","meta","16",{"name":"twitter:card","content":"summary_large_image"}],["$","meta","17",{"name":"twitter:title","content":"Khoj AI"}],["$","meta","18",{"name":"twitter:description","content":"Your Second Brain."}],["$","meta","19",{"name":"twitter:image","content":"https://assets.khoj.dev/khoj_lantern_256x256.png"}],["$","meta","20",{"name":"twitter:image:width","content":"256"}],["$","meta","21",{"name":"twitter:image:height","content":"256"}],["$","meta","22",{"name":"twitter:image","content":"https://assets.khoj.dev/khoj_lantern_logomarktype_1200x630.png"}],["$","meta","23",{"name":"twitter:image:width","content":"1200"}],["$","meta","24",{"name":"twitter:image:height","content":"630"}],["$","link","25",{"rel":"icon","href":"/static/assets/icons/khoj_lantern.ico"}],["$","link","26",{"rel":"apple-touch-icon","href":"/static/assets/icons/khoj_lantern_256x256.png"}],["$","meta","27",{"name":"next-size-adjust"}]]
7
7
  1:null
@@ -1,6 +1,5 @@
1
1
  import logging
2
- import os
3
- from datetime import datetime
2
+ import tempfile
4
3
  from typing import Dict, List, Tuple
5
4
 
6
5
  from langchain_community.document_loaders import Docx2txtLoader
@@ -19,7 +18,7 @@ class DocxToEntries(TextToEntries):
19
18
  super().__init__()
20
19
 
21
20
  # Define Functions
22
- def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
21
+ def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = False) -> Tuple[int, int]:
23
22
  # Extract required fields from config
24
23
  deletion_file_names = set([file for file in files if files[file] == b""])
25
24
  files_to_process = set(files) - deletion_file_names
@@ -36,13 +35,13 @@ class DocxToEntries(TextToEntries):
36
35
  # Identify, mark and merge any new entries with previous entries
37
36
  with timer("Identify new or updated entries", logger):
38
37
  num_new_embeddings, num_deleted_embeddings = self.update_embeddings(
38
+ user,
39
39
  current_entries,
40
40
  DbEntry.EntryType.DOCX,
41
41
  DbEntry.EntrySource.COMPUTER,
42
42
  "compiled",
43
43
  logger,
44
44
  deletion_file_names,
45
- user,
46
45
  regenerate=regenerate,
47
46
  file_to_text_map=file_to_text_map,
48
47
  )
@@ -58,28 +57,13 @@ class DocxToEntries(TextToEntries):
58
57
  file_to_text_map = dict()
59
58
  for docx_file in docx_files:
60
59
  try:
61
- timestamp_now = datetime.utcnow().timestamp()
62
- tmp_file = f"tmp_docx_file_{timestamp_now}.docx"
63
- with open(tmp_file, "wb") as f:
64
- bytes_content = docx_files[docx_file]
65
- f.write(bytes_content)
66
-
67
- # Load the content using Docx2txtLoader
68
- loader = Docx2txtLoader(tmp_file)
69
- docx_entries_per_file = loader.load()
70
-
71
- # Convert the loaded entries into the desired format
72
- docx_texts = [page.page_content for page in docx_entries_per_file]
73
-
60
+ docx_texts = DocxToEntries.extract_text(docx_files[docx_file])
74
61
  entry_to_location_map += zip(docx_texts, [docx_file] * len(docx_texts))
75
62
  entries.extend(docx_texts)
76
63
  file_to_text_map[docx_file] = docx_texts
77
64
  except Exception as e:
78
- logger.warning(f"Unable to process file: {docx_file}. This file will not be indexed.")
65
+ logger.warning(f"Unable to extract entries from file: {docx_file}")
79
66
  logger.warning(e, exc_info=True)
80
- finally:
81
- if os.path.exists(f"{tmp_file}"):
82
- os.remove(f"{tmp_file}")
83
67
  return file_to_text_map, DocxToEntries.convert_docx_entries_to_maps(entries, dict(entry_to_location_map))
84
68
 
85
69
  @staticmethod
@@ -103,3 +87,25 @@ class DocxToEntries(TextToEntries):
103
87
  logger.debug(f"Converted {len(parsed_entries)} DOCX entries to dictionaries")
104
88
 
105
89
  return entries
90
+
91
+ @staticmethod
92
+ def extract_text(docx_file):
93
+ """Extract text from specified DOCX file"""
94
+ try:
95
+ docx_entry_by_pages = []
96
+ # Create temp file with .docx extension that gets auto-deleted
97
+ with tempfile.NamedTemporaryFile(suffix=".docx", delete=True) as tmp:
98
+ tmp.write(docx_file)
99
+ tmp.flush() # Ensure all data is written
100
+
101
+ # Load the content using Docx2txtLoader
102
+ loader = Docx2txtLoader(tmp.name)
103
+ docx_entries_per_file = loader.load()
104
+
105
+ # Convert the loaded entries into the desired format
106
+ docx_entry_by_pages = [page.page_content for page in docx_entries_per_file]
107
+ except Exception as e:
108
+ logger.warning(f"Unable to extract text from file: {docx_file}")
109
+ logger.warning(e, exc_info=True)
110
+
111
+ return docx_entry_by_pages
@@ -48,7 +48,7 @@ class GithubToEntries(TextToEntries):
48
48
  else:
49
49
  return
50
50
 
51
- def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
51
+ def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = False) -> Tuple[int, int]:
52
52
  if self.config.pat_token is None or self.config.pat_token == "":
53
53
  logger.error(f"Github PAT token is not set. Skipping github content")
54
54
  raise ValueError("Github PAT token is not set. Skipping github content")
@@ -101,12 +101,12 @@ class GithubToEntries(TextToEntries):
101
101
  # Identify, mark and merge any new entries with previous entries
102
102
  with timer("Identify new or updated entries", logger):
103
103
  num_new_embeddings, num_deleted_embeddings = self.update_embeddings(
104
+ user,
104
105
  current_entries,
105
106
  DbEntry.EntryType.GITHUB,
106
107
  DbEntry.EntrySource.GITHUB,
107
108
  key="compiled",
108
109
  logger=logger,
109
- user=user,
110
110
  )
111
111
 
112
112
  return num_new_embeddings, num_deleted_embeddings
@@ -18,7 +18,7 @@ class ImageToEntries(TextToEntries):
18
18
  super().__init__()
19
19
 
20
20
  # Define Functions
21
- def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
21
+ def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = False) -> Tuple[int, int]:
22
22
  # Extract required fields from config
23
23
  deletion_file_names = set([file for file in files if files[file] == b""])
24
24
  files_to_process = set(files) - deletion_file_names
@@ -35,13 +35,13 @@ class ImageToEntries(TextToEntries):
35
35
  # Identify, mark and merge any new entries with previous entries
36
36
  with timer("Identify new or updated entries", logger):
37
37
  num_new_embeddings, num_deleted_embeddings = self.update_embeddings(
38
+ user,
38
39
  current_entries,
39
40
  DbEntry.EntryType.IMAGE,
40
41
  DbEntry.EntrySource.COMPUTER,
41
42
  "compiled",
42
43
  logger,
43
44
  deletion_file_names,
44
- user,
45
45
  regenerate=regenerate,
46
46
  file_to_text_map=file_to_text_map,
47
47
  )
@@ -19,7 +19,7 @@ class MarkdownToEntries(TextToEntries):
19
19
  super().__init__()
20
20
 
21
21
  # Define Functions
22
- def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
22
+ def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = False) -> Tuple[int, int]:
23
23
  # Extract required fields from config
24
24
  deletion_file_names = set([file for file in files if files[file] == ""])
25
25
  files_to_process = set(files) - deletion_file_names
@@ -37,13 +37,13 @@ class MarkdownToEntries(TextToEntries):
37
37
  # Identify, mark and merge any new entries with previous entries
38
38
  with timer("Identify new or updated entries", logger):
39
39
  num_new_embeddings, num_deleted_embeddings = self.update_embeddings(
40
+ user,
40
41
  current_entries,
41
42
  DbEntry.EntryType.MARKDOWN,
42
43
  DbEntry.EntrySource.COMPUTER,
43
44
  "compiled",
44
45
  logger,
45
46
  deletion_file_names,
46
- user,
47
47
  regenerate=regenerate,
48
48
  file_to_text_map=file_to_text_map,
49
49
  )
@@ -79,7 +79,7 @@ class NotionToEntries(TextToEntries):
79
79
 
80
80
  self.body_params = {"page_size": 100}
81
81
 
82
- def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
82
+ def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = False) -> Tuple[int, int]:
83
83
  current_entries = []
84
84
 
85
85
  # Get all pages
@@ -248,12 +248,12 @@ class NotionToEntries(TextToEntries):
248
248
  # Identify, mark and merge any new entries with previous entries
249
249
  with timer("Identify new or updated entries", logger):
250
250
  num_new_embeddings, num_deleted_embeddings = self.update_embeddings(
251
+ user,
251
252
  current_entries,
252
253
  DbEntry.EntryType.NOTION,
253
254
  DbEntry.EntrySource.NOTION,
254
255
  key="compiled",
255
256
  logger=logger,
256
- user=user,
257
257
  )
258
258
 
259
259
  return num_new_embeddings, num_deleted_embeddings
@@ -20,7 +20,7 @@ class OrgToEntries(TextToEntries):
20
20
  super().__init__()
21
21
 
22
22
  # Define Functions
23
- def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
23
+ def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = False) -> Tuple[int, int]:
24
24
  deletion_file_names = set([file for file in files if files[file] == ""])
25
25
  files_to_process = set(files) - deletion_file_names
26
26
  files = {file: files[file] for file in files_to_process}
@@ -36,13 +36,13 @@ class OrgToEntries(TextToEntries):
36
36
  # Identify, mark and merge any new entries with previous entries
37
37
  with timer("Identify new or updated entries", logger):
38
38
  num_new_embeddings, num_deleted_embeddings = self.update_embeddings(
39
+ user,
39
40
  current_entries,
40
41
  DbEntry.EntryType.ORG,
41
42
  DbEntry.EntrySource.COMPUTER,
42
43
  "compiled",
43
44
  logger,
44
45
  deletion_file_names,
45
- user,
46
46
  regenerate=regenerate,
47
47
  file_to_text_map=file_to_text_map,
48
48
  )
@@ -1,13 +1,9 @@
1
- import base64
2
1
  import logging
3
- import os
4
- from datetime import datetime
5
- from typing import Dict, List, Tuple
2
+ import tempfile
3
+ from typing import Dict, Final, List, Tuple
6
4
 
7
5
  from langchain_community.document_loaders import PyMuPDFLoader
8
6
 
9
- # importing FileObjectAdapter so that we can add new files and debug file object db.
10
- # from khoj.database.adapters import FileObjectAdapters
11
7
  from khoj.database.models import Entry as DbEntry
12
8
  from khoj.database.models import KhojUser
13
9
  from khoj.processor.content.text_to_entries import TextToEntries
@@ -18,11 +14,14 @@ logger = logging.getLogger(__name__)
18
14
 
19
15
 
20
16
  class PdfToEntries(TextToEntries):
17
+ # Class-level constant translation table
18
+ NULL_TRANSLATOR: Final = str.maketrans("", "", "\x00")
19
+
21
20
  def __init__(self):
22
21
  super().__init__()
23
22
 
24
23
  # Define Functions
25
- def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
24
+ def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = False) -> Tuple[int, int]:
26
25
  # Extract required fields from config
27
26
  deletion_file_names = set([file for file in files if files[file] == b""])
28
27
  files_to_process = set(files) - deletion_file_names
@@ -39,13 +38,13 @@ class PdfToEntries(TextToEntries):
39
38
  # Identify, mark and merge any new entries with previous entries
40
39
  with timer("Identify new or updated entries", logger):
41
40
  num_new_embeddings, num_deleted_embeddings = self.update_embeddings(
41
+ user,
42
42
  current_entries,
43
43
  DbEntry.EntryType.PDF,
44
44
  DbEntry.EntrySource.COMPUTER,
45
45
  "compiled",
46
46
  logger,
47
47
  deletion_file_names,
48
- user,
49
48
  regenerate=regenerate,
50
49
  file_to_text_map=file_to_text_map,
51
50
  )
@@ -60,31 +59,13 @@ class PdfToEntries(TextToEntries):
60
59
  entry_to_location_map: List[Tuple[str, str]] = []
61
60
  for pdf_file in pdf_files:
62
61
  try:
63
- # Write the PDF file to a temporary file, as it is stored in byte format in the pdf_file object and the PDF Loader expects a file path
64
- timestamp_now = datetime.utcnow().timestamp()
65
- tmp_file = f"tmp_pdf_file_{timestamp_now}.pdf"
66
- with open(f"{tmp_file}", "wb") as f:
67
- bytes = pdf_files[pdf_file]
68
- f.write(bytes)
69
- try:
70
- loader = PyMuPDFLoader(f"{tmp_file}", extract_images=False)
71
- pdf_entries_per_file = [page.page_content for page in loader.load()]
72
- except ImportError:
73
- loader = PyMuPDFLoader(f"{tmp_file}")
74
- pdf_entries_per_file = [
75
- page.page_content for page in loader.load()
76
- ] # page_content items list for a given pdf.
77
- entry_to_location_map += zip(
78
- pdf_entries_per_file, [pdf_file] * len(pdf_entries_per_file)
79
- ) # this is an indexed map of pdf_entries for the pdf.
62
+ pdf_entries_per_file = PdfToEntries.extract_text(pdf_files[pdf_file])
63
+ entry_to_location_map += zip(pdf_entries_per_file, [pdf_file] * len(pdf_entries_per_file))
80
64
  entries.extend(pdf_entries_per_file)
81
65
  file_to_text_map[pdf_file] = pdf_entries_per_file
82
66
  except Exception as e:
83
- logger.warning(f"Unable to process file: {pdf_file}. This file will not be indexed.")
67
+ logger.warning(f"Unable to extract entries from file: {pdf_file}")
84
68
  logger.warning(e, exc_info=True)
85
- finally:
86
- if os.path.exists(f"{tmp_file}"):
87
- os.remove(f"{tmp_file}")
88
69
 
89
70
  return file_to_text_map, PdfToEntries.convert_pdf_entries_to_maps(entries, dict(entry_to_location_map))
90
71
 
@@ -109,3 +90,30 @@ class PdfToEntries(TextToEntries):
109
90
  logger.debug(f"Converted {len(parsed_entries)} PDF entries to dictionaries")
110
91
 
111
92
  return entries
93
+
94
+ @staticmethod
95
+ def extract_text(pdf_file):
96
+ """Extract text from specified PDF files"""
97
+ try:
98
+ # Create temp file with .pdf extension that gets auto-deleted
99
+ with tempfile.NamedTemporaryFile(suffix=".pdf", delete=True) as tmpf:
100
+ tmpf.write(pdf_file)
101
+ tmpf.flush() # Ensure all data is written
102
+
103
+ # Load the content using PyMuPDFLoader
104
+ loader = PyMuPDFLoader(tmpf.name, extract_images=True)
105
+ pdf_entries_per_file = loader.load()
106
+
107
+ # Convert the loaded entries into the desired format
108
+ pdf_entry_by_pages = [PdfToEntries.clean_text(page.page_content) for page in pdf_entries_per_file]
109
+ except Exception as e:
110
+ logger.warning(f"Unable to process file: {pdf_file}. This file will not be indexed.")
111
+ logger.warning(e, exc_info=True)
112
+
113
+ return pdf_entry_by_pages
114
+
115
+ @staticmethod
116
+ def clean_text(text: str) -> str:
117
+ """Clean PDF text by removing null bytes and invalid Unicode characters."""
118
+ # Use faster translation table instead of replace
119
+ return text.translate(PdfToEntries.NULL_TRANSLATOR)
@@ -20,7 +20,7 @@ class PlaintextToEntries(TextToEntries):
20
20
  super().__init__()
21
21
 
22
22
  # Define Functions
23
- def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
23
+ def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = False) -> Tuple[int, int]:
24
24
  deletion_file_names = set([file for file in files if files[file] == ""])
25
25
  files_to_process = set(files) - deletion_file_names
26
26
  files = {file: files[file] for file in files_to_process}
@@ -36,13 +36,13 @@ class PlaintextToEntries(TextToEntries):
36
36
  # Identify, mark and merge any new entries with previous entries
37
37
  with timer("Identify new or updated entries", logger):
38
38
  num_new_embeddings, num_deleted_embeddings = self.update_embeddings(
39
+ user,
39
40
  current_entries,
40
41
  DbEntry.EntryType.PLAINTEXT,
41
42
  DbEntry.EntrySource.COMPUTER,
42
43
  key="compiled",
43
44
  logger=logger,
44
45
  deletion_filenames=deletion_file_names,
45
- user=user,
46
46
  regenerate=regenerate,
47
47
  file_to_text_map=file_to_text_map,
48
48
  )
@@ -31,7 +31,7 @@ class TextToEntries(ABC):
31
31
  self.date_filter = DateFilter()
32
32
 
33
33
  @abstractmethod
34
- def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
34
+ def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = False) -> Tuple[int, int]:
35
35
  ...
36
36
 
37
37
  @staticmethod
@@ -114,13 +114,13 @@ class TextToEntries(ABC):
114
114
 
115
115
  def update_embeddings(
116
116
  self,
117
+ user: KhojUser,
117
118
  current_entries: List[Entry],
118
119
  file_type: str,
119
120
  file_source: str,
120
121
  key="compiled",
121
122
  logger: logging.Logger = None,
122
123
  deletion_filenames: Set[str] = None,
123
- user: KhojUser = None,
124
124
  regenerate: bool = False,
125
125
  file_to_text_map: dict[str, str] = None,
126
126
  ):
@@ -36,6 +36,7 @@ def extract_questions_anthropic(
36
36
  query_images: Optional[list[str]] = None,
37
37
  vision_enabled: bool = False,
38
38
  personality_context: Optional[str] = None,
39
+ query_files: str = None,
39
40
  tracer: dict = {},
40
41
  ):
41
42
  """
@@ -82,9 +83,12 @@ def extract_questions_anthropic(
82
83
  images=query_images,
83
84
  model_type=ChatModelOptions.ModelType.ANTHROPIC,
84
85
  vision_enabled=vision_enabled,
86
+ attached_file_context=query_files,
85
87
  )
86
88
 
87
- messages = [ChatMessage(content=prompt, role="user")]
89
+ messages = []
90
+
91
+ messages.append(ChatMessage(content=prompt, role="user"))
88
92
 
89
93
  messages, system_prompt = format_messages_for_anthropic(messages, system_prompt)
90
94
 
@@ -148,6 +152,7 @@ def converse_anthropic(
148
152
  agent: Agent = None,
149
153
  query_images: Optional[list[str]] = None,
150
154
  vision_available: bool = False,
155
+ query_files: str = None,
151
156
  tracer: dict = {},
152
157
  ):
153
158
  """
@@ -205,6 +210,7 @@ def converse_anthropic(
205
210
  query_images=query_images,
206
211
  vision_enabled=vision_available,
207
212
  model_type=ChatModelOptions.ModelType.ANTHROPIC,
213
+ query_files=query_files,
208
214
  )
209
215
 
210
216
  messages, system_prompt = format_messages_for_anthropic(messages, system_prompt)
@@ -37,6 +37,7 @@ def extract_questions_gemini(
37
37
  query_images: Optional[list[str]] = None,
38
38
  vision_enabled: bool = False,
39
39
  personality_context: Optional[str] = None,
40
+ query_files: str = None,
40
41
  tracer: dict = {},
41
42
  ):
42
43
  """
@@ -83,9 +84,13 @@ def extract_questions_gemini(
83
84
  images=query_images,
84
85
  model_type=ChatModelOptions.ModelType.GOOGLE,
85
86
  vision_enabled=vision_enabled,
87
+ attached_file_context=query_files,
86
88
  )
87
89
 
88
- messages = [ChatMessage(content=prompt, role="user"), ChatMessage(content=system_prompt, role="system")]
90
+ messages = []
91
+
92
+ messages.append(ChatMessage(content=prompt, role="user"))
93
+ messages.append(ChatMessage(content=system_prompt, role="system"))
89
94
 
90
95
  response = gemini_send_message_to_model(
91
96
  messages, api_key, model, response_type="json_object", temperature=temperature, tracer=tracer
@@ -108,7 +113,13 @@ def extract_questions_gemini(
108
113
 
109
114
 
110
115
  def gemini_send_message_to_model(
111
- messages, api_key, model, response_type="text", temperature=0, model_kwargs=None, tracer={}
116
+ messages,
117
+ api_key,
118
+ model,
119
+ response_type="text",
120
+ temperature=0,
121
+ model_kwargs=None,
122
+ tracer={},
112
123
  ):
113
124
  """
114
125
  Send message to model
@@ -151,6 +162,7 @@ def converse_gemini(
151
162
  agent: Agent = None,
152
163
  query_images: Optional[list[str]] = None,
153
164
  vision_available: bool = False,
165
+ query_files: str = None,
154
166
  tracer={},
155
167
  ):
156
168
  """
@@ -209,6 +221,7 @@ def converse_gemini(
209
221
  query_images=query_images,
210
222
  vision_enabled=vision_available,
211
223
  model_type=ChatModelOptions.ModelType.GOOGLE,
224
+ query_files=query_files,
212
225
  )
213
226
 
214
227
  messages, system_prompt = format_messages_for_gemini(messages, system_prompt)