pembot 0.0.9__tar.gz → 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pembot might be problematic. Click here for more details.
- {pembot-0.0.9 → pembot-0.1.1}/PKG-INFO +1 -1
- pembot-0.1.1/pembot/.git/COMMIT_EDITMSG +1 -0
- pembot-0.1.1/pembot/.git/index +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/logs/HEAD +2 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/logs/refs/heads/main +2 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/logs/refs/remotes/origin/main +2 -0
- pembot-0.1.1/pembot/.git/objects/0e/120123bfadfd594220963f3bbca54056bab6ee +0 -0
- pembot-0.1.1/pembot/.git/objects/0f/ccea3d0db4864a854f8b2c13b9f76b3601d200 +0 -0
- pembot-0.1.1/pembot/.git/objects/37/175696b3ca7a5d17379f03fb61a1023d50aeba +0 -0
- pembot-0.1.1/pembot/.git/objects/44/9dea1ca63d6f1e47d119b36576acc94822a37c +0 -0
- pembot-0.1.1/pembot/.git/objects/59/69ac8b9d6b44a601385c3ed8c710a69d05216e +0 -0
- pembot-0.1.1/pembot/.git/objects/5c/4f01d3ce9e243bbb8a693f97e5c7d13a857cb7 +0 -0
- pembot-0.1.1/pembot/.git/objects/71/014c7a6c8c98449a26ef966485a6cf30a1974d +0 -0
- pembot-0.1.1/pembot/.git/objects/78/4aa28d912b66e07748483efe0326c70d7541a5 +1 -0
- pembot-0.1.1/pembot/.git/objects/8d/adc1d7891c79de24ba2c7c38b4c830bf61870a +0 -0
- pembot-0.1.1/pembot/.git/objects/93/652290aac46c69b1b4dd83062b6cfe648dd643 +0 -0
- pembot-0.1.1/pembot/.git/objects/97/b7aaa5c8b45f5471c9d39893cd4c893da6f444 +0 -0
- pembot-0.1.1/pembot/.git/objects/c2/ce19d575a8cccf6886862c4fa6afefba142511 +0 -0
- pembot-0.1.1/pembot/.git/objects/ef/3488a3c636d73d82ad138e70a92453249b7f37 +0 -0
- pembot-0.1.1/pembot/.git/objects/f1/2d2ef8948cbe4b24279bee282f934cf5a1b834 +0 -0
- pembot-0.1.1/pembot/.git/objects/f2/14d4d56726e2928479c5948bd88e038cf70b2e +0 -0
- pembot-0.1.1/pembot/.git/refs/heads/main +1 -0
- pembot-0.1.1/pembot/.git/refs/remotes/origin/main +1 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/TextEmbedder/mongodb_embedder.py +24 -14
- {pembot-0.0.9 → pembot-0.1.1}/pembot/__init__.py +1 -1
- {pembot-0.0.9 → pembot-0.1.1}/pembot/config/config.yaml +1 -1
- pembot-0.1.1/pembot/pyrightconfig.json +8 -0
- pembot-0.1.1/pembot/query.py +496 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/requirements.txt +1 -1
- pembot-0.1.1/pembot/search.py +190 -0
- pembot-0.0.9/pembot/.git/COMMIT_EDITMSG +0 -1
- pembot-0.0.9/pembot/.git/index +0 -0
- pembot-0.0.9/pembot/.git/refs/heads/main +0 -1
- pembot-0.0.9/pembot/.git/refs/remotes/origin/main +0 -1
- pembot-0.0.9/pembot/query.py +0 -211
- {pembot-0.0.9 → pembot-0.1.1}/LICENSE +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/HEAD +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/config +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/description +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/hooks/applypatch-msg.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/hooks/commit-msg.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/hooks/fsmonitor-watchman.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/hooks/post-update.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/hooks/pre-applypatch.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/hooks/pre-commit.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/hooks/pre-merge-commit.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/hooks/pre-push.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/hooks/pre-rebase.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/hooks/pre-receive.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/hooks/prepare-commit-msg.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/hooks/push-to-checkout.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/hooks/sendemail-validate.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/hooks/update.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/info/exclude +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/logs/refs/remotes/origin/HEAD +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/01/5f71967c525963c827d7fe5415ae2c040c4a64 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/09/ee34d94dffd4c286df1d6d528b2e98e2a6cce2 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/0a/fb3a98cdc55b1434b44534ec2bf22c56cfa26c +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/0b/db4169fc0f312b8698f1df17a258fff163aeaa +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/0c/8d9b2690545bf1906b05cd9f18b783b3eb74f1 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/0c/ab66ffbaf50ef60dd41f3498595ebd2526b33c +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/0d/28f73897db0c9a9351ee9e64d2a0fe27db2705 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/10/9d1ca0463ea42bbbc435bcb43a90711211cf49 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/14/22c2d0cf79fd928ff7e2d77f96ad5b40cc2a31 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/18/28e18ab80aa64d334b26428708140e280cbc63 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/19/f61df7dbd562d04f561288677bbf2f18f5dff7 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/1f/83a471c8119f7794d98c049170a5d7d07a4b71 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/28/db0ab48059acccd7d257aa02e52e9b6b83a4a5 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/35/97e518a8658280be9f377f78edf1dfa1f23814 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/3d/07d3b29ff53d95de3898fb786d61732f210515 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/3d/e536f9c1fd05a23c2dec66423ed610afb0cf5f +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/3e/23850624fcf5f111d6ea88ddd64adf924cf82f +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/3e/cf23eb95123287531d708a21d4ba88d92ccabb +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/3f/78215d7e17da726fb352fd92b3c117db9b63ba +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/3f/e072cf3cb6a9f30c3e9936e3ddf622e80270d0 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/41/ae8fa8f8baa2daee5ec0aa21ae17922ae051a0 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/41/cbeb6bcb4c6fa9ef9be571082d95ecb4ea0ee3 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/42/f03e1b66aa56bbb36a1c3a8dea9e1e727faffa +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/44/86da0f89c566c3bae8abf435d37aeca87f1632 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/4d/a03134f70896f72053fbdc0cd4f4c76d4ac1d8 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/50/39b29fda67743a044993436df6a4a1db7b8888 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/51/9e780574933d7627a083222bd10dd74f430904 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/61/46a371b9c1bd9f51af273f11f986cfd1bedeba +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/63/1700a51c8fa97b543991f5f61bfcd1e7e1327d +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/64/00040794955d17c9a1fe1aaaea59f2c4822177 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/6d/7a865a23b1cb4182f67907820104ced48b11c9 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/72/f047cda92abcd1ddc857f6461de605f8668331 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/73/2e98f08bc806c331b06847fc8c743f545499e5 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/7a/7d28b0313a3d9d509823faaae31949af8610ef +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/7e/0907822f7d316ebe0be07e1f6918bef412c80b +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/86/cdaec229f1fbebf43042266b03878944669f25 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/87/d6df5217a4a374f8c1211a05f9bd657f72c9a7 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/8b/5be2af9b16f290549193859c214cd9072212e8 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/90/f067b86364ab243a7e3bc75f936319ba9eac88 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/93/8f29d9b4b1ae86e39dddf9e3d115a82ddfc9b6 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/95/28bbccd167e3f4ad583a1ae9fac98a52620e27 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/9b/123713e30fc9e225f9ac8ff5b02f8f8cf86456 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/a8/98d2c3947d30d8be64bd2bbcef68f956d5456b +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/a9/d5e349fa091647742b09eb3d05bac8f58fe547 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/ab/139d2cd4798dd8e2c565b80440b1a44b376126 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/ab/c6b15265171457b41e2cfdaf3b8c3994a59eb7 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/ab/f77db148e3fb3b26913af14ae43130396f3269 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/ac/9c9018c62fa30dc142665c1b5a375f4e056880 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/af/80ddb5890f062e364ea8ade2d602df4e12de8c +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/b1/1173d9b68db117437ccb9551461152e1e8a77d +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/b2/4e79ab07fe9e68781961a25ff9f1dbb1546fbb +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/b8/884c6145221ac66f84bf88919754c2cb05c12d +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/b8/eea52176ffa4d88c5a9976bee26092421565d3 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/bd/8fd1cb166996e74a8631f3a6f764a53af75297 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/bf/068a0714e2145de83a5c004f4213b091439d0e +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/bf/32a7e6872e5dc4025ee3df3c921ec7ade0855f +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/bf/518686b06069d2a8abd3689908b7e1a6e16b05 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/c0/793458db6e1bee7f79f1a504fb8ff4963f8ed3 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/c2/443060c07101948487cfa93cc39e082e9e0f5f +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/ce/a4ffc1cf5eab61a2a0abd8f6dc941b580b69fd +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/d0/937f7d832266337289d5ec09459f931a46fcf7 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/e0/9162dbd64d85bb5ed740aa99faefa73f293d78 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/e0/da740b542afc451c45b9b4be6c0c7a3c79b06c +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/e5/3070f2b07f45d031444b09b1b38658f3caf29e +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/e7/911a702079a6144997ea4e70f59abbe59ec2bc +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/e9/1172752e9a421ae463112d2b0506b37498c98d +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/ea/0af89e61a882c5afc2a8c281b2d96f174bfe58 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/eb/75e1c49f1e5b79dca17ccdbec8067756523238 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/ee/a73c7f24094ed83b014f7cfce46e10f817bec8 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/ef/0503a60244391590b16042019032e91d7cc30d +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/f1/3181b12cf4d539e635bf94ad6e950d68cedaf1 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/f1/655afa1c5636c8d58969e3194bb770aefbc552 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/f4/e991088a63def67a30a2b8bbdb4d58514abab8 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/f6/b1d54483ce20fbcb252a8a93a5eff7bec88729 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/f8/6fbd490878cb0d3c35cc4443672d1309171bf1 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/f8/cbb5bfd1503e66cec2c593362c60a317b6d300 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/f9/98e1f01c2bf0a20159fc851327af05beb3ac88 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/fa/9c9a62ec1203a5868b033ded428c2382c4e1b6 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/fb/6c90c9ce5e0cdfbe074a3f060afc66f62eefde +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/fc/988aab7e2d46396dc595ad24345e8e77dda0e4 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/fc/e56f1e09d09a05b9babf796fb40bece176f3a2 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/fd/abd48f3e947a9f420003446dd118c5295346a5 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.idx +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.pack +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.rev +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/packed-refs +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.git/refs/remotes/origin/HEAD +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/.gitignore +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/AnyToText/__init__.py +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/AnyToText/convertor.py +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/LICENSE +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/TextEmbedder/__init__.py +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/TextEmbedder/gemini_embedder.py +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/TextEmbedder/mongodb_index_creator.py +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/TextEmbedder/vector_query.py +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/gartner.py +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/main.py +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/output_structure_local.py +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/COMMIT_EDITMSG +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/HEAD +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/config +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/description +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/hooks/applypatch-msg.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/hooks/commit-msg.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/hooks/fsmonitor-watchman.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/hooks/post-update.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/hooks/pre-applypatch.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/hooks/pre-commit.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/hooks/pre-merge-commit.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/hooks/pre-push.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/hooks/pre-rebase.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/hooks/pre-receive.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/hooks/prepare-commit-msg.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/hooks/push-to-checkout.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/hooks/sendemail-validate.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/hooks/update.sample +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/index +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/info/exclude +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/logs/HEAD +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/logs/refs/heads/main +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/logs/refs/remotes/myorigin/main +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/logs/refs/remotes/origin/HEAD +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/objects/14/251b198e0bac39a3dc3b42f9e57b20c01465fb +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/objects/24/7b15a6b1e0e3d270c05af184f048736376cd4e +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/objects/24/8f03b5f969a7fbd396b496f40b57f0ae81c148 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/objects/57/74dc9c3901d2ffb2cd7dafe2ad6612a7f9f42c +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/objects/72/2dc14f82e78ce41717348b256e0c17834933b4 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/objects/79/eb7b93ced70e399bd561093c45de7641414dbd +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/objects/8d/9ce1fd9733a78c592b34af9c94b98960c601ed +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/objects/95/745843bb4377d6042180daeda818c0b16fd493 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/objects/a5/c6dfb577782c259990dcf977e355298e923428 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/objects/a7/4bcd5e67cb1066dd504b92b42390fe0b2c3d38 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/objects/b4/8d697aa9fd97151eb2a84a1af5d408b7630232 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/objects/b8/702320e56074e9680181d8b7897d6a0a552e2d +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/objects/f3/b2d76c75bbd50e04fc4c2ad17fc94ca6daed32 +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/objects/pack/pack-d3051affdd6c31306dc53489168fc870872085d1.idx +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/objects/pack/pack-d3051affdd6c31306dc53489168fc870872085d1.pack +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/objects/pack/pack-d3051affdd6c31306dc53489168fc870872085d1.rev +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/packed-refs +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/refs/heads/main +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/refs/remotes/myorigin/main +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/.git/refs/remotes/origin/HEAD +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/LICENSE +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/README.md +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/__init__.py +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/config/config.yaml +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/extract.py +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/pyrightconfig.json +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pdf2markdown/requirements.txt +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/pem.py +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/utils/__init__.py +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/utils/inference_client.py +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pembot/utils/string_tools.py +0 -0
- {pembot-0.0.9 → pembot-0.1.1}/pyproject.toml +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
added smolagent functionality with tool extension capability to query.py and added a caveat in search_within_documents by which you can just return no context if you set limit= 0, effectively doing a Non-contextual prompt
|
|
Binary file
|
|
@@ -10,3 +10,5 @@ eb75e1c49f1e5b79dca17ccdbec8067756523238 0bdb4169fc0f312b8698f1df17a258fff163aea
|
|
|
10
10
|
ef0503a60244391590b16042019032e91d7cc30d af80ddb5890f062e364ea8ade2d602df4e12de8c cyto <silverstone965@gmail.com> 1751896700 +0530 commit: added model name to convertor
|
|
11
11
|
af80ddb5890f062e364ea8ade2d602df4e12de8c 0d28f73897db0c9a9351ee9e64d2a0fe27db2705 cyto <silverstone965@gmail.com> 1752081786 +0530 commit: upgraded to 0.0.8; added ability to classify the document chunks into one more filterable parameter 'type'; other query related improvements..
|
|
12
12
|
0d28f73897db0c9a9351ee9e64d2a0fe27db2705 a898d2c3947d30d8be64bd2bbcef68f956d5456b cyto <silverstone965@gmail.com> 1752136060 +0530 commit: fixed the type_info array pushing bug
|
|
13
|
+
a898d2c3947d30d8be64bd2bbcef68f956d5456b 784aa28d912b66e07748483efe0326c70d7541a5 cyto <silverstone965@gmail.com> 1752236415 +0530 commit: added prompt prefixing option to prime the llm with some more data; added the option for no-filter global search in a collection in search_within_document(s)
|
|
14
|
+
784aa28d912b66e07748483efe0326c70d7541a5 f214d4d56726e2928479c5948bd88e038cf70b2e cyto <silverstone965@gmail.com> 1752858244 +0530 commit: added smolagent functionality with tool extension capability to query.py and added a caveat in search_within_documents by which you can just return no context if you set limit= 0, effectively doing a Non-contextual prompt
|
|
@@ -10,3 +10,5 @@ eb75e1c49f1e5b79dca17ccdbec8067756523238 0bdb4169fc0f312b8698f1df17a258fff163aea
|
|
|
10
10
|
ef0503a60244391590b16042019032e91d7cc30d af80ddb5890f062e364ea8ade2d602df4e12de8c cyto <silverstone965@gmail.com> 1751896700 +0530 commit: added model name to convertor
|
|
11
11
|
af80ddb5890f062e364ea8ade2d602df4e12de8c 0d28f73897db0c9a9351ee9e64d2a0fe27db2705 cyto <silverstone965@gmail.com> 1752081786 +0530 commit: upgraded to 0.0.8; added ability to classify the document chunks into one more filterable parameter 'type'; other query related improvements..
|
|
12
12
|
0d28f73897db0c9a9351ee9e64d2a0fe27db2705 a898d2c3947d30d8be64bd2bbcef68f956d5456b cyto <silverstone965@gmail.com> 1752136060 +0530 commit: fixed the type_info array pushing bug
|
|
13
|
+
a898d2c3947d30d8be64bd2bbcef68f956d5456b 784aa28d912b66e07748483efe0326c70d7541a5 cyto <silverstone965@gmail.com> 1752236415 +0530 commit: added prompt prefixing option to prime the llm with some more data; added the option for no-filter global search in a collection in search_within_document(s)
|
|
14
|
+
784aa28d912b66e07748483efe0326c70d7541a5 f214d4d56726e2928479c5948bd88e038cf70b2e cyto <silverstone965@gmail.com> 1752858244 +0530 commit: added smolagent functionality with tool extension capability to query.py and added a caveat in search_within_documents by which you can just return no context if you set limit= 0, effectively doing a Non-contextual prompt
|
|
@@ -9,3 +9,5 @@ eb75e1c49f1e5b79dca17ccdbec8067756523238 0bdb4169fc0f312b8698f1df17a258fff163aea
|
|
|
9
9
|
ef0503a60244391590b16042019032e91d7cc30d af80ddb5890f062e364ea8ade2d602df4e12de8c cyto <silverstone965@gmail.com> 1751896713 +0530 update by push
|
|
10
10
|
af80ddb5890f062e364ea8ade2d602df4e12de8c 0d28f73897db0c9a9351ee9e64d2a0fe27db2705 cyto <silverstone965@gmail.com> 1752081801 +0530 update by push
|
|
11
11
|
0d28f73897db0c9a9351ee9e64d2a0fe27db2705 a898d2c3947d30d8be64bd2bbcef68f956d5456b cyto <silverstone965@gmail.com> 1752136119 +0530 update by push
|
|
12
|
+
a898d2c3947d30d8be64bd2bbcef68f956d5456b 784aa28d912b66e07748483efe0326c70d7541a5 cyto <silverstone965@gmail.com> 1752236436 +0530 update by push
|
|
13
|
+
784aa28d912b66e07748483efe0326c70d7541a5 f214d4d56726e2928479c5948bd88e038cf70b2e cyto <silverstone965@gmail.com> 1752858280 +0530 update by push
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
x�PIn� �:��U�*P��7�r�$H�#��p�����+��~�m9�PaR��"��~vFe�5Z��"o�Z6�O=f�۱P���h7�Ɉ�M��)��qY,y����I!���n\�~W��#�*G�LF��5a�ϖ���I�A�c/��khX���#{��Vȇ��W��Ђ�%$��Ę�3�
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
f214d4d56726e2928479c5948bd88e038cf70b2e
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
f214d4d56726e2928479c5948bd88e038cf70b2e
|
|
@@ -49,6 +49,9 @@ def search_within_document(
|
|
|
49
49
|
A list of dictionaries, where each dictionary represents a matching chunk
|
|
50
50
|
from the specified document, including its text, docId, and score.
|
|
51
51
|
"""
|
|
52
|
+
if limit < 1:
|
|
53
|
+
return []
|
|
54
|
+
|
|
52
55
|
embeddings_collection = db_client[embeddings_collection_name]
|
|
53
56
|
|
|
54
57
|
print(f"Searching within document (docId: {document_name_id})...")
|
|
@@ -69,22 +72,27 @@ def search_within_document(
|
|
|
69
72
|
if document_belongs_to_a_type:
|
|
70
73
|
project_dict['type']= 1
|
|
71
74
|
|
|
75
|
+
vectorSearchParams= {
|
|
76
|
+
'queryVector': aggregate_query_embedding,
|
|
77
|
+
'path': 'embedding',
|
|
78
|
+
|
|
79
|
+
#number of nearest neighbors to consider
|
|
80
|
+
'numCandidates': 100,
|
|
81
|
+
'limit': limit,
|
|
82
|
+
'index': index_name,
|
|
83
|
+
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
#filter (if a type or docid filter is given) to search only within the specified documents search space
|
|
87
|
+
if document_name_id:
|
|
88
|
+
vectorSearchParams['filter']= (
|
|
89
|
+
{ "type": {"$in": [document_belongs_to_a_type ]} } if document_belongs_to_a_type else
|
|
90
|
+
{ 'docId': document_name_id }
|
|
91
|
+
)
|
|
92
|
+
|
|
72
93
|
pipeline = [
|
|
73
94
|
{
|
|
74
|
-
'$vectorSearch':
|
|
75
|
-
'queryVector': aggregate_query_embedding,
|
|
76
|
-
'path': 'embedding',
|
|
77
|
-
|
|
78
|
-
#number of nearest neighbors to consider
|
|
79
|
-
'numCandidates': 100,
|
|
80
|
-
'limit': limit,
|
|
81
|
-
'index': index_name,
|
|
82
|
-
|
|
83
|
-
#filter to search only within the specified document
|
|
84
|
-
'filter':
|
|
85
|
-
{ "type": {"$in": [document_belongs_to_a_type ]} } if document_belongs_to_a_type else
|
|
86
|
-
{ 'docId': document_name_id }
|
|
87
|
-
}
|
|
95
|
+
'$vectorSearch': vectorSearchParams
|
|
88
96
|
},
|
|
89
97
|
|
|
90
98
|
# to exclude the MongoDB internal _id
|
|
@@ -93,6 +101,8 @@ def search_within_document(
|
|
|
93
101
|
}
|
|
94
102
|
]
|
|
95
103
|
|
|
104
|
+
|
|
105
|
+
|
|
96
106
|
# print("sesraching now:")
|
|
97
107
|
results = list(embeddings_collection.aggregate(pipeline))
|
|
98
108
|
# print("search results: ", results)
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
{
|
|
2
|
+
"venvPath": "..", // If your venv is a direct child like ./venv
|
|
3
|
+
"venv": "venvpem" // The name of your venv folder
|
|
4
|
+
// Or if you want to be explicit with the full path
|
|
5
|
+
// "pythonVersion": "3.9", // Or your specific version
|
|
6
|
+
// "pythonPlatform": "Linux", // Or "Windows", "Darwin"
|
|
7
|
+
// "pythonPath": "/path/to/your/project/.venv/bin/python" // Absolute path
|
|
8
|
+
}
|
|
@@ -0,0 +1,496 @@
|
|
|
1
|
+
from os import environ
|
|
2
|
+
from huggingface_hub.inference._generated.types.chat_completion import ChatCompletionOutputMessage
|
|
3
|
+
from huggingface_hub.inference._providers import PROVIDER_T
|
|
4
|
+
import ollama
|
|
5
|
+
import re
|
|
6
|
+
from smolagents import InferenceClientModel, ToolCallingAgent, ActionStep, TaskStep
|
|
7
|
+
from smolagents.default_tools import FinalAnswerTool, UserInputTool, VisitWebpageTool
|
|
8
|
+
from pymongo import MongoClient
|
|
9
|
+
from typing import Callable, Dict, Any, Optional, List
|
|
10
|
+
import uuid
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from smolagents.monitoring import Timing
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
from search import brave_search_tool
|
|
16
|
+
from pembot.TextEmbedder.mongodb_embedder import search_within_document
|
|
17
|
+
import numpy as np
|
|
18
|
+
from huggingface_hub import InferenceClient
|
|
19
|
+
from google import genai
|
|
20
|
+
from google.genai import types
|
|
21
|
+
import time
|
|
22
|
+
from datetime import timezone
|
|
23
|
+
|
|
24
|
+
init_timing= {
|
|
25
|
+
"start_time": 0.0,
|
|
26
|
+
"end_time": 0.0,
|
|
27
|
+
"duration": 0.0,
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
mongodb_uri= environ['MONGODB_SCHEMER']
|
|
31
|
+
mc = MongoClient(mongodb_uri)
|
|
32
|
+
db = mc["schemerdb"]
|
|
33
|
+
collection = db["chat_history"] # Collection name
|
|
34
|
+
|
|
35
|
+
from pembot.TextEmbedder.mongodb_index_creator import create_vector_index
|
|
36
|
+
|
|
37
|
+
def external_llm(rag_prompt, model_name, llm_provider_name: PROVIDER_T= "novita", inference_client = None) -> str:
|
|
38
|
+
|
|
39
|
+
# Here, one can change the provider of the inference LLM if
|
|
40
|
+
# for embedding we are using one which doesnt have our LLM available
|
|
41
|
+
# or, is costly, so we choose different, just here in the function header, or from the main()
|
|
42
|
+
|
|
43
|
+
if not inference_client:
|
|
44
|
+
inference_client= InferenceClient(
|
|
45
|
+
# "nebius" "novita" "hyperbolic"
|
|
46
|
+
provider= llm_provider_name,
|
|
47
|
+
api_key= environ["HF_TOKEN"]
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
completion= inference_client.chat.completions.create(
|
|
51
|
+
model= model_name,
|
|
52
|
+
messages= [
|
|
53
|
+
{"role": "user", "content": rag_prompt}
|
|
54
|
+
]
|
|
55
|
+
)
|
|
56
|
+
response_message: ChatCompletionOutputMessage= completion.choices[0].message
|
|
57
|
+
|
|
58
|
+
if response_message.content:
|
|
59
|
+
return response_message.content
|
|
60
|
+
else:
|
|
61
|
+
return '{}'
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def multi_embedding_average(llm_client, inference_client, descriptions, model= "BAAI/bge-en-icl", embed_locally= False):
|
|
65
|
+
|
|
66
|
+
description_embeddings = []
|
|
67
|
+
for desc in descriptions:
|
|
68
|
+
try:
|
|
69
|
+
if 'gemini' in model:
|
|
70
|
+
client = genai.Client(api_key= environ['GEMINI_API_KEY'])
|
|
71
|
+
result = client.models.embed_content(
|
|
72
|
+
model= model,
|
|
73
|
+
contents= desc,
|
|
74
|
+
config=types.EmbedContentConfig(task_type="RETRIEVAL_DOCUMENT")
|
|
75
|
+
)
|
|
76
|
+
if result is not None and result.embeddings:
|
|
77
|
+
description_embeddings.append(result.embeddings[0].values)
|
|
78
|
+
else:
|
|
79
|
+
raise ValueError("Gemini not givingz embeddingzzz")
|
|
80
|
+
elif embed_locally:
|
|
81
|
+
response = llm_client.embeddings(model=model, prompt=desc)
|
|
82
|
+
description_embeddings.append(response['embedding'])
|
|
83
|
+
|
|
84
|
+
else:
|
|
85
|
+
response = inference_client.feature_extraction(desc, model=model)
|
|
86
|
+
description_embeddings.append(response)
|
|
87
|
+
|
|
88
|
+
except Exception as e:
|
|
89
|
+
print(f"Error generating embedding for description '{desc}': {e}")
|
|
90
|
+
# Decide how to handle errors: skip, raise, or use a placeholder
|
|
91
|
+
# continue
|
|
92
|
+
raise e
|
|
93
|
+
time.sleep(1)
|
|
94
|
+
|
|
95
|
+
if not description_embeddings:
|
|
96
|
+
print("No embeddings could be generated for the descriptions. Aborting search.")
|
|
97
|
+
return []
|
|
98
|
+
|
|
99
|
+
# Aggregate embeddings: A simple approach is to average them.
|
|
100
|
+
# This creates a single query vector that represents the combined meaning.
|
|
101
|
+
return np.mean(description_embeddings, axis=0).tolist()
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def rag_query_llm(db_client, llm_client, inference_client,
|
|
106
|
+
user_query: str, document_id: str, required_fields_descriptions: list[str],
|
|
107
|
+
model_name: str = "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
|
|
108
|
+
ollama_base_url: str = "http://localhost:11434", no_of_fields= 4,
|
|
109
|
+
embedding_model= "BAAI/bge-en-icl", llm_provider_name: PROVIDER_T= "novita",
|
|
110
|
+
index_name: str= "test_search", embeddings_collection= "doc_chunks",
|
|
111
|
+
document_belongs_to_a_type= "", prompt_prefix= ""):
|
|
112
|
+
"""
|
|
113
|
+
Performs a RAG (Retrieval Augmented Generation) query using a Hugging Face
|
|
114
|
+
embedding model, ChromaDB for retrieval, and a local Ollama model for generation.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
db_client: The vector DB client
|
|
118
|
+
user_query (str): The user's query.
|
|
119
|
+
required_fields_descriptions: The required fields which are to be queried from context
|
|
120
|
+
model_name (str): The name of the Ollama model to use (e.g., "llama2", "mistral").
|
|
121
|
+
no_of_fields (str): number of vectors which are to be retrieved from DB
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
str: The generated response from the Ollama model.
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
embed_locally= False
|
|
128
|
+
found= False
|
|
129
|
+
try:
|
|
130
|
+
models = llm_client.list()
|
|
131
|
+
for model in models.models:
|
|
132
|
+
# print(model.model)
|
|
133
|
+
if model.model == model_name:
|
|
134
|
+
found= True
|
|
135
|
+
if model.model == embedding_model:
|
|
136
|
+
embed_locally= True
|
|
137
|
+
except AttributeError as ae:
|
|
138
|
+
print("cant find ollama", ae)
|
|
139
|
+
print("continuing with other models")
|
|
140
|
+
except Exception as e:
|
|
141
|
+
print("unhandled error: ", e)
|
|
142
|
+
raise e
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
aggregate_query_embedding= multi_embedding_average(llm_client, inference_client, required_fields_descriptions, model= embedding_model, embed_locally= embed_locally)
|
|
147
|
+
print("Aggregate query embedding generated. length: ", len(aggregate_query_embedding))
|
|
148
|
+
|
|
149
|
+
create_vector_index(db_client[embeddings_collection], index_name, num_dimensions= len(aggregate_query_embedding), document_belongs_to_a_type= document_belongs_to_a_type)
|
|
150
|
+
|
|
151
|
+
# check the order of args
|
|
152
|
+
relevant_chunks= search_within_document(db_client, aggregate_query_embedding, document_id, limit= no_of_fields, index_name= index_name, embeddings_collection_name= embeddings_collection, document_belongs_to_a_type= document_belongs_to_a_type)
|
|
153
|
+
relevant_chunks= list(map(lambda x: x['chunk_text'], relevant_chunks))
|
|
154
|
+
|
|
155
|
+
if not relevant_chunks:
|
|
156
|
+
context = "No relevant context available."
|
|
157
|
+
else:
|
|
158
|
+
# print(f"Found {len(relevant_chunks)} relevant chunks.")
|
|
159
|
+
# Concatenate relevant chunks into a single context string
|
|
160
|
+
context = "\n\n".join(relevant_chunks)
|
|
161
|
+
|
|
162
|
+
# Construct the RAG prompt
|
|
163
|
+
rag_prompt = f"""
|
|
164
|
+
You are a helpful assistant. Use the following context to answer the question.
|
|
165
|
+
If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
|
166
|
+
|
|
167
|
+
Context:
|
|
168
|
+
{prompt_prefix}
|
|
169
|
+
{context}
|
|
170
|
+
|
|
171
|
+
Question: {user_query}
|
|
172
|
+
"""
|
|
173
|
+
|
|
174
|
+
# print("Step 3: Calling Ollama model with RAG prompt...")
|
|
175
|
+
# print("final prompt: ")
|
|
176
|
+
# print(rag_prompt)
|
|
177
|
+
if 'gemini' in model_name:
|
|
178
|
+
|
|
179
|
+
client = genai.Client(api_key= environ['GEMINI_API_KEY'])
|
|
180
|
+
response = client.models.generate_content(
|
|
181
|
+
model= model_name,
|
|
182
|
+
contents= rag_prompt,
|
|
183
|
+
)
|
|
184
|
+
return response.text
|
|
185
|
+
|
|
186
|
+
elif found:
|
|
187
|
+
try:
|
|
188
|
+
# You can use ollama.chat or ollama.generate depending on your model and preference
|
|
189
|
+
# ollama.chat is generally preferred for conversational models.
|
|
190
|
+
response = llm_client.chat(
|
|
191
|
+
model=model_name,
|
|
192
|
+
messages=[{'role': 'user', 'content': rag_prompt}],
|
|
193
|
+
options={"base_url": ollama_base_url} # Ensure the base URL is set
|
|
194
|
+
)
|
|
195
|
+
return response['message']['content']
|
|
196
|
+
except ollama.ResponseError as e:
|
|
197
|
+
print(f"Error calling Ollama API: {e}")
|
|
198
|
+
return f"Error: Could not get a response from Ollama. Please check if Ollama is running and the model '{model_name}' is pulled."
|
|
199
|
+
except Exception as e:
|
|
200
|
+
print(f"An unexpected error occurred while calling Ollama: {e}")
|
|
201
|
+
return "An unexpected error occurred."
|
|
202
|
+
elif 'qwen' in model_name or 'gemma' in model_name or 'Qwen' in model_name or 'deepseek' in model_name:
|
|
203
|
+
return external_llm(rag_prompt, model_name= model_name, llm_provider_name= llm_provider_name)
|
|
204
|
+
else:
|
|
205
|
+
return '{}'
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def remove_bs(text):
|
|
210
|
+
"""
|
|
211
|
+
Removes everything between <think></think> tags and any text outside of JSON curly brackets.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
text (str): The input string.
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
str: The string with text between <think></think> tags removed and only the
|
|
218
|
+
content within the outermost JSON curly brackets.
|
|
219
|
+
Returns an empty string if no valid JSON is found.
|
|
220
|
+
"""
|
|
221
|
+
# 1. Remove <think></think> tags
|
|
222
|
+
think_pattern = r'<think>.*?</think>'
|
|
223
|
+
text_without_think = re.sub(think_pattern, '', text, flags=re.DOTALL)
|
|
224
|
+
|
|
225
|
+
# 2. Extract JSON content
|
|
226
|
+
# This regex looks for the first opening curly brace and the last closing curly brace.
|
|
227
|
+
# It assumes the JSON structure is well-formed within the string.
|
|
228
|
+
json_match = re.search(r'\{(.*)\}', text_without_think, re.DOTALL)
|
|
229
|
+
|
|
230
|
+
if json_match:
|
|
231
|
+
json_content_str = "{" + json_match.group(1) + "}"
|
|
232
|
+
return json_content_str
|
|
233
|
+
else:
|
|
234
|
+
return ""
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def smolquery(message: str, external_tools: list[Callable] = [], chat_id: str | None = None, allow_web_search= True) -> Dict[str, Any]:
|
|
238
|
+
"""
|
|
239
|
+
Run agent with chat history support.
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
message: User's message
|
|
243
|
+
external_tools: List of external tools to use
|
|
244
|
+
chat_id: Optional chat ID for continuing conversation
|
|
245
|
+
allow_web_search: Boolean to decide whether to include brave tool to fetch search results
|
|
246
|
+
and the Visiting Web Page Tool in the agent's toolbox
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
Dictionary containing response and chat_id
|
|
250
|
+
"""
|
|
251
|
+
alltools = []
|
|
252
|
+
|
|
253
|
+
if allow_web_search:
|
|
254
|
+
alltools.extend([
|
|
255
|
+
brave_search_tool,
|
|
256
|
+
VisitWebpageTool(),
|
|
257
|
+
])
|
|
258
|
+
|
|
259
|
+
alltools.extend(external_tools)
|
|
260
|
+
|
|
261
|
+
model = InferenceClientModel(
|
|
262
|
+
token= environ["HF_TOKEN"],
|
|
263
|
+
# model_id= "HuggingFaceTB/SmolLM3-3B"
|
|
264
|
+
model_id= "deepseek-ai/DeepSeek-R1-0528"
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
agent = ToolCallingAgent(tools=alltools, model=model, add_base_tools=False)
|
|
268
|
+
|
|
269
|
+
# Handle chat history
|
|
270
|
+
if chat_id:
|
|
271
|
+
# Load existing conversation
|
|
272
|
+
chat_doc = collection.find_one({"_id": chat_id})
|
|
273
|
+
if chat_doc:
|
|
274
|
+
# Restore agent memory from database
|
|
275
|
+
restore_agent_memory(agent, chat_doc["messages"])
|
|
276
|
+
else:
|
|
277
|
+
# Chat ID provided but not found, create new one
|
|
278
|
+
chat_id = str(uuid.uuid4())
|
|
279
|
+
else:
|
|
280
|
+
# Create new chat
|
|
281
|
+
chat_id = str(uuid.uuid4())
|
|
282
|
+
|
|
283
|
+
# Run the agent
|
|
284
|
+
response = agent.run(message, reset= False)
|
|
285
|
+
|
|
286
|
+
# Extract the final answer from the response
|
|
287
|
+
final_answer = extract_final_answer(response)
|
|
288
|
+
|
|
289
|
+
# Save conversation to database
|
|
290
|
+
save_chat_history(chat_id, agent, message, final_answer)
|
|
291
|
+
|
|
292
|
+
return {
|
|
293
|
+
"response": final_answer,
|
|
294
|
+
"chat_id": chat_id
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
def extract_final_answer(response: Any) -> str:
|
|
298
|
+
"""
|
|
299
|
+
Extract the final answer from various response types.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
response: Response from agent.run()
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
Final answer as string
|
|
306
|
+
"""
|
|
307
|
+
# Handle RunResult object
|
|
308
|
+
if hasattr(response, 'final_answer'):
|
|
309
|
+
return str(response.final_answer)
|
|
310
|
+
|
|
311
|
+
# Handle direct string response
|
|
312
|
+
if isinstance(response, str):
|
|
313
|
+
return response
|
|
314
|
+
|
|
315
|
+
# Handle generator response
|
|
316
|
+
if hasattr(response, '__iter__') and not isinstance(response, (str, bytes)):
|
|
317
|
+
final_step = None
|
|
318
|
+
for step in response:
|
|
319
|
+
final_step = step
|
|
320
|
+
# Look for FinalAnswerStep
|
|
321
|
+
if hasattr(step, 'final_answer'):
|
|
322
|
+
return str(step.final_answer)
|
|
323
|
+
|
|
324
|
+
# If no final answer found, return last step as string
|
|
325
|
+
if final_step is not None:
|
|
326
|
+
return str(final_step)
|
|
327
|
+
|
|
328
|
+
# Fallback to string conversion
|
|
329
|
+
return str(response)
|
|
330
|
+
|
|
331
|
+
def restore_agent_memory(agent: ToolCallingAgent, messages: List[Dict[str, Any]]) -> None:
|
|
332
|
+
"""
|
|
333
|
+
Restore agent memory from stored messages.
|
|
334
|
+
|
|
335
|
+
Args:
|
|
336
|
+
agent: The agent instance
|
|
337
|
+
messages: List of stored messages
|
|
338
|
+
"""
|
|
339
|
+
for msg in messages:
|
|
340
|
+
if msg["type"] == "task":
|
|
341
|
+
# Add task step
|
|
342
|
+
task_step = TaskStep(
|
|
343
|
+
task=msg["content"],
|
|
344
|
+
task_images=msg.get("images", [])
|
|
345
|
+
)
|
|
346
|
+
agent.memory.steps.append(task_step)
|
|
347
|
+
elif msg["type"] == "action":
|
|
348
|
+
# Add action step with only the required parameters
|
|
349
|
+
# ActionStep objects are typically created during execution
|
|
350
|
+
# and contain read-only information, so we create a minimal one
|
|
351
|
+
action_saved_timing= msg.get("timing", init_timing)
|
|
352
|
+
action_step = ActionStep(
|
|
353
|
+
observations= msg.get("observations", ""),
|
|
354
|
+
step_number=msg["step_number"],
|
|
355
|
+
observations_images=msg.get("observations_images", []),
|
|
356
|
+
timing=Timing(
|
|
357
|
+
start_time= action_saved_timing.get("start_time", 0.0),
|
|
358
|
+
end_time= action_saved_timing.get("end_time", 0.0)
|
|
359
|
+
)
|
|
360
|
+
)
|
|
361
|
+
agent.memory.steps.append(action_step)
|
|
362
|
+
|
|
363
|
+
def save_chat_history(chat_id: str, agent: ToolCallingAgent, user_message: str, agent_response: str) -> None:
|
|
364
|
+
"""
|
|
365
|
+
Save conversation history to MongoDB.
|
|
366
|
+
|
|
367
|
+
Args:
|
|
368
|
+
chat_id: Chat session ID
|
|
369
|
+
agent: Agent instance with memory
|
|
370
|
+
user_message: Latest user message
|
|
371
|
+
agent_response: Agent's response
|
|
372
|
+
"""
|
|
373
|
+
# Convert agent memory to serializable format
|
|
374
|
+
messages = []
|
|
375
|
+
|
|
376
|
+
for step in agent.memory.steps:
|
|
377
|
+
if isinstance(step, TaskStep):
|
|
378
|
+
messages.append({
|
|
379
|
+
"type": "task",
|
|
380
|
+
"content": step.task,
|
|
381
|
+
"images": step.task_images if hasattr(step, 'task_images') else [],
|
|
382
|
+
"timestamp": datetime.now(timezone.utc)
|
|
383
|
+
})
|
|
384
|
+
elif isinstance(step, ActionStep):
|
|
385
|
+
msg = {
|
|
386
|
+
"type": "action",
|
|
387
|
+
"step_number": step.step_number,
|
|
388
|
+
"observations_images": step.observations_images if hasattr(step, 'observations_images') else [],
|
|
389
|
+
"timing": step.timing.dict() if hasattr(step, 'timing') else init_timing,
|
|
390
|
+
"timestamp": datetime.now(timezone.utc)
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
# Store any additional attributes that might be accessible
|
|
394
|
+
# Note: ActionStep attributes are typically read-only
|
|
395
|
+
if hasattr(step, 'observations') and step.observations:
|
|
396
|
+
msg["observations"] = str(step.observations)
|
|
397
|
+
if hasattr(step, 'error') and step.error:
|
|
398
|
+
msg["error"] = str(step.error)
|
|
399
|
+
|
|
400
|
+
messages.append(msg)
|
|
401
|
+
|
|
402
|
+
# Add the latest response
|
|
403
|
+
messages.append({
|
|
404
|
+
"type": "response",
|
|
405
|
+
"content": agent_response,
|
|
406
|
+
"timestamp": datetime.now(timezone.utc)
|
|
407
|
+
})
|
|
408
|
+
|
|
409
|
+
# Update or insert chat document
|
|
410
|
+
collection.update_one(
|
|
411
|
+
{"_id": chat_id},
|
|
412
|
+
{
|
|
413
|
+
"$set": {
|
|
414
|
+
"messages": messages,
|
|
415
|
+
"last_updated": datetime.now(timezone.utc)
|
|
416
|
+
}
|
|
417
|
+
},
|
|
418
|
+
upsert=True
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
def get_chat_history(chat_id: str) -> Optional[List[Dict[str, Any]]]:
|
|
422
|
+
"""
|
|
423
|
+
Retrieve chat history by ID.
|
|
424
|
+
|
|
425
|
+
Args:
|
|
426
|
+
chat_id: Chat session ID
|
|
427
|
+
|
|
428
|
+
Returns:
|
|
429
|
+
List of messages or None if not found
|
|
430
|
+
"""
|
|
431
|
+
chat_doc = collection.find_one({"_id": chat_id})
|
|
432
|
+
return chat_doc["messages"] if chat_doc else None
|
|
433
|
+
|
|
434
|
+
def delete_chat_history(chat_id: str) -> bool:
|
|
435
|
+
"""
|
|
436
|
+
Delete chat history by ID.
|
|
437
|
+
|
|
438
|
+
Args:
|
|
439
|
+
chat_id: Chat session ID
|
|
440
|
+
|
|
441
|
+
Returns:
|
|
442
|
+
True if deleted, False if not found
|
|
443
|
+
"""
|
|
444
|
+
result = collection.delete_one({"_id": chat_id})
|
|
445
|
+
return result.deleted_count > 0
|
|
446
|
+
|
|
447
|
+
def list_chat_sessions() -> List[Dict[str, Any]]:
|
|
448
|
+
"""
|
|
449
|
+
List all chat sessions with basic info.
|
|
450
|
+
|
|
451
|
+
Returns:
|
|
452
|
+
List of chat sessions with ID and last updated time
|
|
453
|
+
"""
|
|
454
|
+
sessions = []
|
|
455
|
+
for doc in collection.find({}, {"_id": 1, "last_updated": 1, "messages": {"$slice": 1}}):
|
|
456
|
+
first_message = doc["messages"][0] if doc["messages"] else {}
|
|
457
|
+
sessions.append({
|
|
458
|
+
"chat_id": doc["_id"],
|
|
459
|
+
"last_updated": doc.get("last_updated"),
|
|
460
|
+
"first_message": first_message.get("content", "")[:100] + "..." if len(first_message.get("content", "")) > 100 else first_message.get("content", "")
|
|
461
|
+
})
|
|
462
|
+
return sessions
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
# # First message - creates new chat
|
|
466
|
+
# result1 = smolquery("Hello, what's the weather like?", [])
|
|
467
|
+
# print(f"Response: {result1['response']}")
|
|
468
|
+
# print(f"Chat ID: {result1['chat_id']}")
|
|
469
|
+
|
|
470
|
+
# # Second message - continues the conversation
|
|
471
|
+
# result2 = smolquery("Thanks, now tell me about Python programming", [], chat_id=result1['chat_id'])
|
|
472
|
+
# print(f"Response: {result2['response']}")
|
|
473
|
+
# print(f"Chat ID: {result2['chat_id']}") # Should be the same as result1['chat_id']
|
|
474
|
+
|
|
475
|
+
# # Retrieve chat history
|
|
476
|
+
# history = get_chat_history(result1['chat_id'])
|
|
477
|
+
# print(f"Chat history length: {len(history) if history else 0}")
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
if __name__ == "__main__":
|
|
482
|
+
print("hemlo worls")
|
|
483
|
+
|
|
484
|
+
# result1 = smolquery("Did i tell you to do something regarding stocks before? What do you conclude?", allow_web_search= False, chat_id= "a52ab59e-d6d0-4089-a963-61e8876244e0")
|
|
485
|
+
result1 = smolquery("How has NIFTY 50 been doing past 3 months?")
|
|
486
|
+
print(f"Response: {result1['response']}")
|
|
487
|
+
print(f"Chat ID: {result1['chat_id']}")
|
|
488
|
+
|
|
489
|
+
# # Second message - continues the conversation
|
|
490
|
+
result2 = smolquery("now tell me about other indices in the same country", chat_id=result1['chat_id'])
|
|
491
|
+
print(f"Response: {result2['response']}")
|
|
492
|
+
print(f"Chat ID: {result2['chat_id']}") # Should be the same as result1['chat_id']
|
|
493
|
+
|
|
494
|
+
# # Retrieve chat history
|
|
495
|
+
history = get_chat_history(result1['chat_id'])
|
|
496
|
+
print(f"Chat history length: {len(history) if history else 0}")
|