pembot 0.1.7__tar.gz → 0.1.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pembot might be problematic. Click here for more details.
- {pembot-0.1.7 → pembot-0.1.9}/PKG-INFO +1 -1
- pembot-0.1.9/pembot/.git/COMMIT_EDITMSG +1 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/index +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/logs/HEAD +2 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/logs/refs/heads/main +2 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/logs/refs/remotes/origin/main +2 -0
- pembot-0.1.9/pembot/.git/objects/06/ef9ad559094e5b48fe2a1b437dca5cea07c06b +0 -0
- pembot-0.1.9/pembot/.git/objects/0a/2121ea3115562cc205df572ea26532aaac5244 +0 -0
- pembot-0.1.9/pembot/.git/objects/3c/80f6d984dff52ee250436fcf3da2e809967ae4 +0 -0
- pembot-0.1.9/pembot/.git/objects/41/cca8bf63122d1044d2fb36f63467ccd500832e +0 -0
- pembot-0.1.9/pembot/.git/objects/55/a26fb846654d84aacea136307a35fb0c46c9c8 +0 -0
- pembot-0.1.9/pembot/.git/objects/82/f733fe4edc22fe2f4caa889d2533b24a7bf9df +0 -0
- pembot-0.1.9/pembot/.git/objects/8d/58ec13be39949ecfe7211b42c56acd2a83dc72 +1 -0
- pembot-0.1.9/pembot/.git/objects/af/66fa89b4e1d1a8ac32dca38126f5510faea126 +0 -0
- pembot-0.1.9/pembot/.git/objects/c0/e6cf6ef4d7f0100113213d8bab75b966cd79ef +0 -0
- pembot-0.1.9/pembot/.git/objects/c9/d9d9b3a93b142e8b7266fc4e3e2417128a0b32 +0 -0
- pembot-0.1.9/pembot/.git/objects/cc/348ab3677f744f8d7cd8b2ac7eb775528cfb1c +0 -0
- pembot-0.1.9/pembot/.git/objects/d3/508f1537e9bd48bc784da569e14c342bc9c05e +0 -0
- pembot-0.1.9/pembot/.git/objects/d4/40b20aae1265dabbd3ddaafb24c35e40e3ab3c +0 -0
- pembot-0.1.9/pembot/.git/objects/ef/141ba9482c729796968be2e562b1488a1e5552 +0 -0
- pembot-0.1.9/pembot/.git/refs/heads/main +1 -0
- pembot-0.1.9/pembot/.git/refs/remotes/origin/main +1 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/AnyToText/convertor.py +58 -46
- {pembot-0.1.7 → pembot-0.1.9}/pembot/__init__.py +1 -1
- {pembot-0.1.7 → pembot-0.1.9}/pembot/config/config.yaml +1 -1
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/extract.py +31 -19
- {pembot-0.1.7 → pembot-0.1.9}/pembot/requirements.txt +1 -1
- pembot-0.1.7/pembot/.git/COMMIT_EDITMSG +0 -1
- pembot-0.1.7/pembot/.git/refs/heads/main +0 -1
- pembot-0.1.7/pembot/.git/refs/remotes/origin/main +0 -1
- {pembot-0.1.7 → pembot-0.1.9}/LICENSE +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/HEAD +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/config +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/description +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/applypatch-msg.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/commit-msg.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/fsmonitor-watchman.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/post-update.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/pre-applypatch.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/pre-commit.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/pre-merge-commit.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/pre-push.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/pre-rebase.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/pre-receive.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/prepare-commit-msg.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/push-to-checkout.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/sendemail-validate.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/update.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/info/exclude +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/logs/refs/remotes/origin/HEAD +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/00/3ba85af0ed7b9f6ab099ca298c3d0c18fb002b +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/01/5f71967c525963c827d7fe5415ae2c040c4a64 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/05/5e82e69847a636258cb994bb920c03a93b5ff4 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/06/f7563094fe405dfd9c69f05e357f4e20fc5979 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/09/ee34d94dffd4c286df1d6d528b2e98e2a6cce2 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/0a/6dd69f9258bba08a669efec17ff170fdd1509e +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/0a/fb3a98cdc55b1434b44534ec2bf22c56cfa26c +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/0b/db4169fc0f312b8698f1df17a258fff163aeaa +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/0c/8d9b2690545bf1906b05cd9f18b783b3eb74f1 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/0c/ab66ffbaf50ef60dd41f3498595ebd2526b33c +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/0d/28f73897db0c9a9351ee9e64d2a0fe27db2705 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/0e/120123bfadfd594220963f3bbca54056bab6ee +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/0e/6b7f7409a88aa2595206b53112a666e4dca8a2 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/0f/ccea3d0db4864a854f8b2c13b9f76b3601d200 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/10/9d1ca0463ea42bbbc435bcb43a90711211cf49 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/10/d1fb81ceede7365dbe132a770a49026e86e9a5 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/14/22c2d0cf79fd928ff7e2d77f96ad5b40cc2a31 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/18/28e18ab80aa64d334b26428708140e280cbc63 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/19/f61df7dbd562d04f561288677bbf2f18f5dff7 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/1f/1fe55f9a705cce752d77718eb870b2c5160138 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/1f/791d08c432b4244a670517c87ada2181159101 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/1f/83a471c8119f7794d98c049170a5d7d07a4b71 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/20/3b390ad0aeb3bc5a8540840b004e6a42e5ce7a +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/24/028fb58c81ceb1ab2b577ae590afb49d598d2e +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/27/02d55c4513a6d23e577aa2f104982c8b9436b2 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/28/db0ab48059acccd7d257aa02e52e9b6b83a4a5 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/2c/69d405c54cb48aa2a3054326420a64698bd7ef +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/2f/f2a190e96dded527d8dd1ab00b706f95348d99 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/35/97e518a8658280be9f377f78edf1dfa1f23814 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/37/175696b3ca7a5d17379f03fb61a1023d50aeba +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/3a/54acc088992fa8e890b93e83115ec6dc019835 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/3d/07d3b29ff53d95de3898fb786d61732f210515 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/3d/e536f9c1fd05a23c2dec66423ed610afb0cf5f +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/3e/23850624fcf5f111d6ea88ddd64adf924cf82f +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/3e/cf23eb95123287531d708a21d4ba88d92ccabb +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/3f/78215d7e17da726fb352fd92b3c117db9b63ba +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/3f/e072cf3cb6a9f30c3e9936e3ddf622e80270d0 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/41/ae8fa8f8baa2daee5ec0aa21ae17922ae051a0 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/41/cbeb6bcb4c6fa9ef9be571082d95ecb4ea0ee3 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/42/ef76e19df247993cf5b64aba5dccaf8587a375 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/42/f03e1b66aa56bbb36a1c3a8dea9e1e727faffa +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/44/86da0f89c566c3bae8abf435d37aeca87f1632 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/44/9dea1ca63d6f1e47d119b36576acc94822a37c +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/48/b71bba3a3f9887828863521c13901eceb54331 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/4b/c4370a037feed828cca0915ebb0bb94b24a9d4 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/4d/a03134f70896f72053fbdc0cd4f4c76d4ac1d8 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/50/39b29fda67743a044993436df6a4a1db7b8888 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/51/9e780574933d7627a083222bd10dd74f430904 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/59/69ac8b9d6b44a601385c3ed8c710a69d05216e +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/5b/efa3b2f18d2b5d332c6de503a7054f4af0569f +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/5c/4f01d3ce9e243bbb8a693f97e5c7d13a857cb7 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/5d/d4656bca3d7605598a799d93fcbf23a789d91a +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/61/46a371b9c1bd9f51af273f11f986cfd1bedeba +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/63/1700a51c8fa97b543991f5f61bfcd1e7e1327d +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/64/00040794955d17c9a1fe1aaaea59f2c4822177 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/67/e48960910bc2dc300e00ee2edec8680ffc5c01 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/69/667188acc459d1f889fad69f5e5507e2188ced +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/6c/16510b4663cd506978f49e8ec05b25862a1d3e +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/6d/7a865a23b1cb4182f67907820104ced48b11c9 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/71/014c7a6c8c98449a26ef966485a6cf30a1974d +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/72/f047cda92abcd1ddc857f6461de605f8668331 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/73/2e98f08bc806c331b06847fc8c743f545499e5 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/73/5b5f6d515f0816599343f1ae7ccffc1d5a487e +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/74/5c54e85b3ea7bfc8a8f35edc907746c29f8663 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/75/321fbcd2be44a548400fbacbf5bcb71e3810fd +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/78/4aa28d912b66e07748483efe0326c70d7541a5 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/79/4431c1d34c60b4f3fb963823f77f33bd947cc7 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/7a/7d28b0313a3d9d509823faaae31949af8610ef +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/7e/0907822f7d316ebe0be07e1f6918bef412c80b +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/81/d01e1c63d48b096c77aae83471d42272ca9fce +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/86/cdaec229f1fbebf43042266b03878944669f25 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/87/d6df5217a4a374f8c1211a05f9bd657f72c9a7 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/88/0c3d45ac59940344dfb6c45005f7e908173138 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/89/d2439385c82b98104f27edf39bcf28a631233f +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/8b/5be2af9b16f290549193859c214cd9072212e8 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/8d/adc1d7891c79de24ba2c7c38b4c830bf61870a +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/8f/c00bf69f4ad3e50c13acc4a0988b6c0fe72b5a +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/90/f067b86364ab243a7e3bc75f936319ba9eac88 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/92/2448ecc557be58195468561e475b904bd1b349 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/93/652290aac46c69b1b4dd83062b6cfe648dd643 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/93/8f29d9b4b1ae86e39dddf9e3d115a82ddfc9b6 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/95/28bbccd167e3f4ad583a1ae9fac98a52620e27 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/97/b7aaa5c8b45f5471c9d39893cd4c893da6f444 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/99/89463f57f1f2931e5973bd543c80f18b0204bc +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/9b/123713e30fc9e225f9ac8ff5b02f8f8cf86456 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/9f/bc171dae3f6b60eaf86ed522b0adf6b123ec85 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/a8/98d2c3947d30d8be64bd2bbcef68f956d5456b +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/a9/d5e349fa091647742b09eb3d05bac8f58fe547 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/ab/139d2cd4798dd8e2c565b80440b1a44b376126 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/ab/c6b15265171457b41e2cfdaf3b8c3994a59eb7 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/ab/f77db148e3fb3b26913af14ae43130396f3269 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/ac/9c9018c62fa30dc142665c1b5a375f4e056880 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/ac/e2b51971b1bbade48afffc8d117c74d18f123c +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/af/80ddb5890f062e364ea8ade2d602df4e12de8c +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/b1/1173d9b68db117437ccb9551461152e1e8a77d +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/b1/ddf2869bc7d213b35dabd6fa5bfae44cd6b7a7 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/b2/4e79ab07fe9e68781961a25ff9f1dbb1546fbb +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/b8/884c6145221ac66f84bf88919754c2cb05c12d +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/b8/eea52176ffa4d88c5a9976bee26092421565d3 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/bb/a495d8e72b78fefcc534259b8edae9a3172d15 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/bd/8fd1cb166996e74a8631f3a6f764a53af75297 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/bf/068a0714e2145de83a5c004f4213b091439d0e +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/bf/32a7e6872e5dc4025ee3df3c921ec7ade0855f +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/bf/518686b06069d2a8abd3689908b7e1a6e16b05 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/c0/793458db6e1bee7f79f1a504fb8ff4963f8ed3 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/c0/f948ab4636a125bc202368e6c9cbe80d76169a +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/c2/443060c07101948487cfa93cc39e082e9e0f5f +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/c2/926f040b089a52edfb8351480f63619ab7e0ab +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/c2/ce19d575a8cccf6886862c4fa6afefba142511 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/c3/cc0da3d955ecec0f865c46c030a0c073697495 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/c6/b72ea9f8856d3bde28cb75775ebea9840535b8 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/c9/efe79dee4c91d4bb8c3d3c6e01ff70ff79a722 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/ce/a4ffc1cf5eab61a2a0abd8f6dc941b580b69fd +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/d0/937f7d832266337289d5ec09459f931a46fcf7 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/d9/ec420cb55a82e7efbc8564e30ec7f4c0f6021e +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/dd/82bd16a51b9bad8241d9fb46619b1c6755cafe +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/e0/9162dbd64d85bb5ed740aa99faefa73f293d78 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/e0/da740b542afc451c45b9b4be6c0c7a3c79b06c +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/e3/c62c141fc65ef2be0095c49b23e06263f0b734 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/e3/da98f3722c2d0c937db0872836fc4491e4487a +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/e5/3070f2b07f45d031444b09b1b38658f3caf29e +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/e6/adbc3c373070269f97ef82d4f63027d7878f67 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/e7/911a702079a6144997ea4e70f59abbe59ec2bc +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/e8/9cb4f5af158d26dcff5eed03dba6671a818739 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/e9/1172752e9a421ae463112d2b0506b37498c98d +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/ea/0af89e61a882c5afc2a8c281b2d96f174bfe58 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/eb/75e1c49f1e5b79dca17ccdbec8067756523238 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/ee/a73c7f24094ed83b014f7cfce46e10f817bec8 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/ef/0503a60244391590b16042019032e91d7cc30d +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/ef/3488a3c636d73d82ad138e70a92453249b7f37 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/ef/c899c7f910cfa7a383692eee851cf5af36da8c +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/f1/2d2ef8948cbe4b24279bee282f934cf5a1b834 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/f1/3181b12cf4d539e635bf94ad6e950d68cedaf1 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/f1/655afa1c5636c8d58969e3194bb770aefbc552 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/f2/14d4d56726e2928479c5948bd88e038cf70b2e +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/f4/e991088a63def67a30a2b8bbdb4d58514abab8 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/f6/b1d54483ce20fbcb252a8a93a5eff7bec88729 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/f8/6fbd490878cb0d3c35cc4443672d1309171bf1 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/f8/cbb5bfd1503e66cec2c593362c60a317b6d300 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/f9/98e1f01c2bf0a20159fc851327af05beb3ac88 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/fa/9c9a62ec1203a5868b033ded428c2382c4e1b6 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/fb/6c90c9ce5e0cdfbe074a3f060afc66f62eefde +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/fc/988aab7e2d46396dc595ad24345e8e77dda0e4 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/fc/e56f1e09d09a05b9babf796fb40bece176f3a2 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/fd/abd48f3e947a9f420003446dd118c5295346a5 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/fe/24641c63f1d906091930cc7e77e448f025814e +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/fe/cc5d8154b1e77e4c6beb23ce9cbe8fea55d34d +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.idx +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.pack +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.rev +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/packed-refs +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/refs/remotes/origin/HEAD +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/.gitignore +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/AnyToText/__init__.py +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/LICENSE +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/TextEmbedder/__init__.py +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/TextEmbedder/gemini_embedder.py +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/TextEmbedder/mongodb_embedder.py +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/TextEmbedder/mongodb_index_creator.py +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/TextEmbedder/vector_query.py +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/gartner.py +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/main.py +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/output_structure_local.py +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/COMMIT_EDITMSG +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/HEAD +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/config +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/description +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/applypatch-msg.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/commit-msg.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/fsmonitor-watchman.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/post-update.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/pre-applypatch.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/pre-commit.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/pre-merge-commit.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/pre-push.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/pre-rebase.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/pre-receive.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/prepare-commit-msg.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/push-to-checkout.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/sendemail-validate.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/update.sample +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/index +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/info/exclude +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/logs/HEAD +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/logs/refs/heads/main +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/logs/refs/remotes/myorigin/main +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/logs/refs/remotes/origin/HEAD +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/14/251b198e0bac39a3dc3b42f9e57b20c01465fb +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/24/7b15a6b1e0e3d270c05af184f048736376cd4e +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/24/8f03b5f969a7fbd396b496f40b57f0ae81c148 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/57/74dc9c3901d2ffb2cd7dafe2ad6612a7f9f42c +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/72/2dc14f82e78ce41717348b256e0c17834933b4 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/79/eb7b93ced70e399bd561093c45de7641414dbd +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/8d/9ce1fd9733a78c592b34af9c94b98960c601ed +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/95/745843bb4377d6042180daeda818c0b16fd493 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/a5/c6dfb577782c259990dcf977e355298e923428 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/a7/4bcd5e67cb1066dd504b92b42390fe0b2c3d38 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/b4/8d697aa9fd97151eb2a84a1af5d408b7630232 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/b8/702320e56074e9680181d8b7897d6a0a552e2d +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/f3/b2d76c75bbd50e04fc4c2ad17fc94ca6daed32 +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/pack/pack-d3051affdd6c31306dc53489168fc870872085d1.idx +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/pack/pack-d3051affdd6c31306dc53489168fc870872085d1.pack +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/pack/pack-d3051affdd6c31306dc53489168fc870872085d1.rev +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/packed-refs +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/refs/heads/main +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/refs/remotes/myorigin/main +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/refs/remotes/origin/HEAD +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/LICENSE +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/README.md +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/__init__.py +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/config/config.yaml +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/pyrightconfig.json +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/requirements.txt +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pem.py +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/pyrightconfig.json +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/query.py +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/search.py +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/utils/__init__.py +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/utils/inference_client.py +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pembot/utils/string_tools.py +0 -0
- {pembot-0.1.7 → pembot-0.1.9}/pyproject.toml +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
added DeepSeek-OCR as a model option to use spaces
|
|
Binary file
|
|
@@ -19,3 +19,5 @@ c3cc0da3d955ecec0f865c46c030a0c073697495 e6adbc3c373070269f97ef82d4f63027d7878f6
|
|
|
19
19
|
e6adbc3c373070269f97ef82d4f63027d7878f67 81d01e1c63d48b096c77aae83471d42272ca9fce cyto <silverstone965@gmail.com> 1758890262 +0530 commit: cyto/fixed a file already open error and wrote to file if myfile is excel type and output_dir is given
|
|
20
20
|
81d01e1c63d48b096c77aae83471d42272ca9fce e89cb4f5af158d26dcff5eed03dba6671a818739 cyto <silverstone965@gmail.com> 1758891589 +0530 commit: cyto/made the excel convertor compatible with suffix input from both system file and with bytes + manual suffix input
|
|
21
21
|
e89cb4f5af158d26dcff5eed03dba6671a818739 e3c62c141fc65ef2be0095c49b23e06263f0b734 cyto <silverstone965@gmail.com> 1758892053 +0530 commit: silly willy mistake
|
|
22
|
+
e3c62c141fc65ef2be0095c49b23e06263f0b734 d440b20aae1265dabbd3ddaafb24c35e40e3ab3c cyto <silverstone965@gmail.com> 1759127664 +0530 commit: cyto/fixed the excel file input bug, in the conversion method; added exceptions where there is invalid input
|
|
23
|
+
d440b20aae1265dabbd3ddaafb24c35e40e3ab3c 8d58ec13be39949ecfe7211b42c56acd2a83dc72 cyto <silverstone965@gmail.com> 1761392079 +0530 commit: added DeepSeek-OCR as a model option to use spaces
|
|
@@ -19,3 +19,5 @@ c3cc0da3d955ecec0f865c46c030a0c073697495 e6adbc3c373070269f97ef82d4f63027d7878f6
|
|
|
19
19
|
e6adbc3c373070269f97ef82d4f63027d7878f67 81d01e1c63d48b096c77aae83471d42272ca9fce cyto <silverstone965@gmail.com> 1758890262 +0530 commit: cyto/fixed a file already open error and wrote to file if myfile is excel type and output_dir is given
|
|
20
20
|
81d01e1c63d48b096c77aae83471d42272ca9fce e89cb4f5af158d26dcff5eed03dba6671a818739 cyto <silverstone965@gmail.com> 1758891589 +0530 commit: cyto/made the excel convertor compatible with suffix input from both system file and with bytes + manual suffix input
|
|
21
21
|
e89cb4f5af158d26dcff5eed03dba6671a818739 e3c62c141fc65ef2be0095c49b23e06263f0b734 cyto <silverstone965@gmail.com> 1758892053 +0530 commit: silly willy mistake
|
|
22
|
+
e3c62c141fc65ef2be0095c49b23e06263f0b734 d440b20aae1265dabbd3ddaafb24c35e40e3ab3c cyto <silverstone965@gmail.com> 1759127664 +0530 commit: cyto/fixed the excel file input bug, in the conversion method; added exceptions where there is invalid input
|
|
23
|
+
d440b20aae1265dabbd3ddaafb24c35e40e3ab3c 8d58ec13be39949ecfe7211b42c56acd2a83dc72 cyto <silverstone965@gmail.com> 1761392079 +0530 commit: added DeepSeek-OCR as a model option to use spaces
|
|
@@ -18,3 +18,5 @@ c3cc0da3d955ecec0f865c46c030a0c073697495 e6adbc3c373070269f97ef82d4f63027d7878f6
|
|
|
18
18
|
e6adbc3c373070269f97ef82d4f63027d7878f67 81d01e1c63d48b096c77aae83471d42272ca9fce cyto <silverstone965@gmail.com> 1758890281 +0530 update by push
|
|
19
19
|
81d01e1c63d48b096c77aae83471d42272ca9fce e89cb4f5af158d26dcff5eed03dba6671a818739 cyto <silverstone965@gmail.com> 1758891600 +0530 update by push
|
|
20
20
|
e89cb4f5af158d26dcff5eed03dba6671a818739 e3c62c141fc65ef2be0095c49b23e06263f0b734 cyto <silverstone965@gmail.com> 1758892065 +0530 update by push
|
|
21
|
+
e3c62c141fc65ef2be0095c49b23e06263f0b734 d440b20aae1265dabbd3ddaafb24c35e40e3ab3c cyto <silverstone965@gmail.com> 1759127680 +0530 update by push
|
|
22
|
+
d440b20aae1265dabbd3ddaafb24c35e40e3ab3c 8d58ec13be39949ecfe7211b42c56acd2a83dc72 cyto <silverstone965@gmail.com> 1761392127 +0530 update by push
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
x��Mj�0@�u�ٗ�ѯc(���� F3��Բ��z���~��q-e���}�*�L�<%o��!���O�40KD<{�f�]�f�Dj]�B9�!���TOٳ�[����Wxm��{�u�1���B��µ������a�g�͡�`�RC"*�}����>.@
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
8d58ec13be39949ecfe7211b42c56acd2a83dc72
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
8d58ec13be39949ecfe7211b42c56acd2a83dc72
|
|
@@ -5,7 +5,6 @@ from pembot.pdf2markdown.extract import MarkdownPDFExtractor
|
|
|
5
5
|
import os
|
|
6
6
|
import pandas as pd
|
|
7
7
|
from typing import Literal, Union
|
|
8
|
-
import tempfile
|
|
9
8
|
from datetime import datetime, date
|
|
10
9
|
from tabulate import tabulate
|
|
11
10
|
|
|
@@ -36,6 +35,10 @@ class Convertor():
|
|
|
36
35
|
self.input_filepath= None
|
|
37
36
|
self.output= ""
|
|
38
37
|
self.suffix= suffix
|
|
38
|
+
self.file_bytes= file_bytes
|
|
39
|
+
|
|
40
|
+
if (file_bytes and not suffix) or (file_bytes and not file_type):
|
|
41
|
+
raise Exception("wrong use of convertor library, have to give suffix and file_type along with the file_bytes")
|
|
39
42
|
|
|
40
43
|
|
|
41
44
|
if model_name is None:
|
|
@@ -44,7 +47,7 @@ class Convertor():
|
|
|
44
47
|
|
|
45
48
|
# file_type can be pdf, excel, etc.
|
|
46
49
|
if file_bytes and suffix:
|
|
47
|
-
with
|
|
50
|
+
with TemporaryDirectory() as dp:
|
|
48
51
|
output_dir = Path(dp)
|
|
49
52
|
myfile = output_dir / f"input{suffix}"
|
|
50
53
|
myfile.write_bytes(file_bytes)
|
|
@@ -102,42 +105,51 @@ class Convertor():
|
|
|
102
105
|
markdown_output = []
|
|
103
106
|
|
|
104
107
|
file_suffix= ''
|
|
105
|
-
try:
|
|
106
|
-
if not input_filepath.exists():
|
|
107
|
-
file_suffix= self.suffix
|
|
108
|
-
else:
|
|
109
|
-
file_suffix = input_filepath.suffix.lower()
|
|
110
|
-
|
|
111
|
-
current_engine: PandasReadEngineType = excel_ods_engine
|
|
112
108
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
109
|
+
try:
|
|
110
|
+
with TemporaryDirectory() as dp:
|
|
111
|
+
dp_path= Path(dp)
|
|
112
|
+
|
|
113
|
+
if input_filepath is None:
|
|
114
|
+
file_suffix= self.suffix
|
|
115
|
+
if file_suffix and self.file_bytes:
|
|
116
|
+
input_filepath= dp_path / ("my_excel_file" + file_suffix)
|
|
117
|
+
input_filepath.write_bytes(self.file_bytes)
|
|
118
|
+
else:
|
|
119
|
+
raise Exception("no input file name, and no file bytes either")
|
|
120
|
+
elif input_filepath.exists():
|
|
121
|
+
file_suffix = input_filepath.suffix.lower()
|
|
122
|
+
|
|
123
|
+
current_engine: PandasReadEngineType = excel_ods_engine
|
|
124
|
+
|
|
125
|
+
if file_suffix in ['.xls', '.xlsx', '.ods']:
|
|
126
|
+
if file_suffix == '.ods':
|
|
127
|
+
if current_engine is None:
|
|
128
|
+
current_engine = 'odf'
|
|
129
|
+
elif current_engine != 'odf':
|
|
130
|
+
print(f"Warning: Specified engine '{current_engine}' may not be optimal for ODS. Forcing 'odf'.")
|
|
131
|
+
current_engine = 'odf'
|
|
132
|
+
|
|
133
|
+
excel_file = pd.ExcelFile(input_filepath, engine=current_engine)
|
|
134
|
+
if not excel_file.sheet_names:
|
|
135
|
+
return f"Warning: File '{input_filepath.name}' contains no sheets."
|
|
136
|
+
|
|
137
|
+
for sheet_name in excel_file.sheet_names:
|
|
138
|
+
df = excel_file.parse(sheet_name)
|
|
139
|
+
markdown_output.append(f"## {sheet_name}\n")
|
|
140
|
+
markdown_table = tabulate(df, headers='keys', tablefmt='pipe')
|
|
141
|
+
markdown_output.append(markdown_table)
|
|
142
|
+
markdown_output.append("\n")
|
|
143
|
+
|
|
144
|
+
return "\n".join(markdown_output)
|
|
145
|
+
|
|
146
|
+
elif file_suffix == '.csv':
|
|
147
|
+
df = pd.read_csv(input_filepath)
|
|
128
148
|
markdown_table = tabulate(df, headers='keys', tablefmt='pipe')
|
|
129
|
-
|
|
130
|
-
markdown_output.append("\n")
|
|
149
|
+
return markdown_table
|
|
131
150
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
elif file_suffix == '.csv':
|
|
135
|
-
df = pd.read_csv(input_filepath)
|
|
136
|
-
markdown_table = tabulate(df, headers='keys', tablefmt='pipe')
|
|
137
|
-
return markdown_table
|
|
138
|
-
|
|
139
|
-
else:
|
|
140
|
-
return f"Error: Unsupported file type: '{file_suffix}'. Please provide a CSV, XLS, XLSX, or ODS file."
|
|
151
|
+
else:
|
|
152
|
+
return f"Error: Unsupported file type: '{file_suffix}'. Please provide a CSV, XLS, XLSX, or ODS file."
|
|
141
153
|
|
|
142
154
|
except ImportError as ie:
|
|
143
155
|
if 'odfpy' in str(ie).lower() and file_suffix == '.ods':
|
|
@@ -183,10 +195,10 @@ if __name__ == '__main__':
|
|
|
183
195
|
# conv= Convertor(file_bytes= imgpdf.read(), suffix= ".pdf", file_type= "pdf")
|
|
184
196
|
# print(conv.output)
|
|
185
197
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
198
|
+
print("Test 2: balance sheet, bytes")
|
|
199
|
+
with open("/home/cyto/Downloads/balance_sheet_2023-24_final.xlsx", "rb") as xl:
|
|
200
|
+
conv= Convertor(file_bytes= xl.read(), suffix= ".xlsx", file_type= "excel")
|
|
201
|
+
print(conv.output)
|
|
190
202
|
|
|
191
203
|
print("Test 3: excel schedule, bytes")
|
|
192
204
|
with open("/home/cyto/Downloads/Assignment schedule.xlsx", "rb") as imgpdf:
|
|
@@ -194,13 +206,13 @@ if __name__ == '__main__':
|
|
|
194
206
|
print(conv.output)
|
|
195
207
|
|
|
196
208
|
# without bytes example:
|
|
197
|
-
print("Test 4: scanned pdf, path")
|
|
198
|
-
conv= Convertor(myfile= Path('/home/cyto/Documents/scanned.pdf'), output_dir= Path('/home/cyto/Documents'))
|
|
199
|
-
print(conv.output)
|
|
200
|
-
|
|
201
|
-
print("Test 5: schedule excel, path")
|
|
202
|
-
conv= Convertor(myfile= Path('/home/cyto/Downloads/Assignment schedule.xlsx'), output_dir= Path('/home/cyto/Downloads'))
|
|
203
|
-
print(conv.output)
|
|
209
|
+
# print("Test 4: scanned pdf, path")
|
|
210
|
+
# conv= Convertor(myfile= Path('/home/cyto/Documents/scanned.pdf'), output_dir= Path('/home/cyto/Documents'))
|
|
211
|
+
# print(conv.output)
|
|
212
|
+
#
|
|
213
|
+
# print("Test 5: schedule excel, path")
|
|
214
|
+
# conv= Convertor(myfile= Path('/home/cyto/Downloads/Assignment schedule.xlsx'), output_dir= Path('/home/cyto/Downloads'))
|
|
215
|
+
# print(conv.output)
|
|
204
216
|
except FileNotFoundError as fe:
|
|
205
217
|
print("file not found, modify the driver code to get sample files to test:\n\n", fe)
|
|
206
218
|
except Exception as e:
|
|
@@ -88,6 +88,9 @@ class MarkdownPDFExtractor(PDFExtractor):
|
|
|
88
88
|
|
|
89
89
|
# zerogpu public
|
|
90
90
|
self.nclient= Client("deepak-mehta/ocr-simplify", hf_token= os.getenv('HF_TOKEN', ''))
|
|
91
|
+
elif "DeepSeek-OCR" in self.MODEL_NAME:
|
|
92
|
+
# zerogpu private
|
|
93
|
+
self.dclient= Client("deepak-mehta/deepseek-ocr", hf_token= os.getenv('HF_TOKEN', ''))
|
|
91
94
|
|
|
92
95
|
|
|
93
96
|
self.markdown_content= ""
|
|
@@ -150,7 +153,7 @@ class MarkdownPDFExtractor(PDFExtractor):
|
|
|
150
153
|
)
|
|
151
154
|
# print("response :", response)
|
|
152
155
|
return response.text
|
|
153
|
-
elif 'nanonet' in model_name:
|
|
156
|
+
elif 'nanonet' in model_name or 'DeepSeek-OCR' in model_name:
|
|
154
157
|
|
|
155
158
|
result= ""
|
|
156
159
|
try:
|
|
@@ -159,24 +162,33 @@ class MarkdownPDFExtractor(PDFExtractor):
|
|
|
159
162
|
print("file name: ", temp_file.name)
|
|
160
163
|
gr_image= handle_file(temp_file.name)
|
|
161
164
|
print("gr image : ", gr_image)
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
165
|
+
if 'nanonet' in model_name:
|
|
166
|
+
result = self.nclient.predict(
|
|
167
|
+
# model_name="Nanonets-OCR-s",
|
|
168
|
+
# text= prompt,
|
|
169
|
+
gr_image,
|
|
170
|
+
# max_new_tokens=max_new_tokens,
|
|
171
|
+
# temperature=0.6,
|
|
172
|
+
# top_p=0.9,
|
|
173
|
+
# top_k=50,
|
|
174
|
+
# repetition_penalty=1.2,
|
|
175
|
+
|
|
176
|
+
# prithiv model
|
|
177
|
+
# api_name="/generate_image"
|
|
178
|
+
|
|
179
|
+
max_new_tokens,
|
|
180
|
+
|
|
181
|
+
# spaces zerogpu
|
|
182
|
+
api_name="/predict"
|
|
183
|
+
)
|
|
184
|
+
else:
|
|
185
|
+
result = self.dclient.predict(
|
|
186
|
+
file_input=gr_image,
|
|
187
|
+
prompt_type= 'markdown',
|
|
188
|
+
|
|
189
|
+
# spaces zerogpu
|
|
190
|
+
api_name="/predict"
|
|
191
|
+
)
|
|
180
192
|
print("ocr'd: ", result[:100] + "...")
|
|
181
193
|
except Exception as e:
|
|
182
194
|
print("Error during nanonet inference", e)
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
silly willy mistake
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
e3c62c141fc65ef2be0095c49b23e06263f0b734
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
e3c62c141fc65ef2be0095c49b23e06263f0b734
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|