pembot 0.1.7__tar.gz → 0.1.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pembot might be problematic. Click here for more details.

Files changed (272) hide show
  1. {pembot-0.1.7 → pembot-0.1.9}/PKG-INFO +1 -1
  2. pembot-0.1.9/pembot/.git/COMMIT_EDITMSG +1 -0
  3. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/index +0 -0
  4. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/logs/HEAD +2 -0
  5. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/logs/refs/heads/main +2 -0
  6. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/logs/refs/remotes/origin/main +2 -0
  7. pembot-0.1.9/pembot/.git/objects/06/ef9ad559094e5b48fe2a1b437dca5cea07c06b +0 -0
  8. pembot-0.1.9/pembot/.git/objects/0a/2121ea3115562cc205df572ea26532aaac5244 +0 -0
  9. pembot-0.1.9/pembot/.git/objects/3c/80f6d984dff52ee250436fcf3da2e809967ae4 +0 -0
  10. pembot-0.1.9/pembot/.git/objects/41/cca8bf63122d1044d2fb36f63467ccd500832e +0 -0
  11. pembot-0.1.9/pembot/.git/objects/55/a26fb846654d84aacea136307a35fb0c46c9c8 +0 -0
  12. pembot-0.1.9/pembot/.git/objects/82/f733fe4edc22fe2f4caa889d2533b24a7bf9df +0 -0
  13. pembot-0.1.9/pembot/.git/objects/8d/58ec13be39949ecfe7211b42c56acd2a83dc72 +1 -0
  14. pembot-0.1.9/pembot/.git/objects/af/66fa89b4e1d1a8ac32dca38126f5510faea126 +0 -0
  15. pembot-0.1.9/pembot/.git/objects/c0/e6cf6ef4d7f0100113213d8bab75b966cd79ef +0 -0
  16. pembot-0.1.9/pembot/.git/objects/c9/d9d9b3a93b142e8b7266fc4e3e2417128a0b32 +0 -0
  17. pembot-0.1.9/pembot/.git/objects/cc/348ab3677f744f8d7cd8b2ac7eb775528cfb1c +0 -0
  18. pembot-0.1.9/pembot/.git/objects/d3/508f1537e9bd48bc784da569e14c342bc9c05e +0 -0
  19. pembot-0.1.9/pembot/.git/objects/d4/40b20aae1265dabbd3ddaafb24c35e40e3ab3c +0 -0
  20. pembot-0.1.9/pembot/.git/objects/ef/141ba9482c729796968be2e562b1488a1e5552 +0 -0
  21. pembot-0.1.9/pembot/.git/refs/heads/main +1 -0
  22. pembot-0.1.9/pembot/.git/refs/remotes/origin/main +1 -0
  23. {pembot-0.1.7 → pembot-0.1.9}/pembot/AnyToText/convertor.py +58 -46
  24. {pembot-0.1.7 → pembot-0.1.9}/pembot/__init__.py +1 -1
  25. {pembot-0.1.7 → pembot-0.1.9}/pembot/config/config.yaml +1 -1
  26. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/extract.py +31 -19
  27. {pembot-0.1.7 → pembot-0.1.9}/pembot/requirements.txt +1 -1
  28. pembot-0.1.7/pembot/.git/COMMIT_EDITMSG +0 -1
  29. pembot-0.1.7/pembot/.git/refs/heads/main +0 -1
  30. pembot-0.1.7/pembot/.git/refs/remotes/origin/main +0 -1
  31. {pembot-0.1.7 → pembot-0.1.9}/LICENSE +0 -0
  32. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/HEAD +0 -0
  33. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/config +0 -0
  34. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/description +0 -0
  35. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/applypatch-msg.sample +0 -0
  36. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/commit-msg.sample +0 -0
  37. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/fsmonitor-watchman.sample +0 -0
  38. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/post-update.sample +0 -0
  39. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/pre-applypatch.sample +0 -0
  40. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/pre-commit.sample +0 -0
  41. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/pre-merge-commit.sample +0 -0
  42. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/pre-push.sample +0 -0
  43. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/pre-rebase.sample +0 -0
  44. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/pre-receive.sample +0 -0
  45. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/prepare-commit-msg.sample +0 -0
  46. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/push-to-checkout.sample +0 -0
  47. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/sendemail-validate.sample +0 -0
  48. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/hooks/update.sample +0 -0
  49. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/info/exclude +0 -0
  50. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/logs/refs/remotes/origin/HEAD +0 -0
  51. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/00/3ba85af0ed7b9f6ab099ca298c3d0c18fb002b +0 -0
  52. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/01/5f71967c525963c827d7fe5415ae2c040c4a64 +0 -0
  53. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/05/5e82e69847a636258cb994bb920c03a93b5ff4 +0 -0
  54. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/06/f7563094fe405dfd9c69f05e357f4e20fc5979 +0 -0
  55. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/09/ee34d94dffd4c286df1d6d528b2e98e2a6cce2 +0 -0
  56. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/0a/6dd69f9258bba08a669efec17ff170fdd1509e +0 -0
  57. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/0a/fb3a98cdc55b1434b44534ec2bf22c56cfa26c +0 -0
  58. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/0b/db4169fc0f312b8698f1df17a258fff163aeaa +0 -0
  59. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/0c/8d9b2690545bf1906b05cd9f18b783b3eb74f1 +0 -0
  60. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/0c/ab66ffbaf50ef60dd41f3498595ebd2526b33c +0 -0
  61. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/0d/28f73897db0c9a9351ee9e64d2a0fe27db2705 +0 -0
  62. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/0e/120123bfadfd594220963f3bbca54056bab6ee +0 -0
  63. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/0e/6b7f7409a88aa2595206b53112a666e4dca8a2 +0 -0
  64. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/0f/ccea3d0db4864a854f8b2c13b9f76b3601d200 +0 -0
  65. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/10/9d1ca0463ea42bbbc435bcb43a90711211cf49 +0 -0
  66. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/10/d1fb81ceede7365dbe132a770a49026e86e9a5 +0 -0
  67. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/14/22c2d0cf79fd928ff7e2d77f96ad5b40cc2a31 +0 -0
  68. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/18/28e18ab80aa64d334b26428708140e280cbc63 +0 -0
  69. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/19/f61df7dbd562d04f561288677bbf2f18f5dff7 +0 -0
  70. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/1f/1fe55f9a705cce752d77718eb870b2c5160138 +0 -0
  71. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/1f/791d08c432b4244a670517c87ada2181159101 +0 -0
  72. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/1f/83a471c8119f7794d98c049170a5d7d07a4b71 +0 -0
  73. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/20/3b390ad0aeb3bc5a8540840b004e6a42e5ce7a +0 -0
  74. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/24/028fb58c81ceb1ab2b577ae590afb49d598d2e +0 -0
  75. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/27/02d55c4513a6d23e577aa2f104982c8b9436b2 +0 -0
  76. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/28/db0ab48059acccd7d257aa02e52e9b6b83a4a5 +0 -0
  77. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/2c/69d405c54cb48aa2a3054326420a64698bd7ef +0 -0
  78. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/2f/f2a190e96dded527d8dd1ab00b706f95348d99 +0 -0
  79. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/35/97e518a8658280be9f377f78edf1dfa1f23814 +0 -0
  80. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/37/175696b3ca7a5d17379f03fb61a1023d50aeba +0 -0
  81. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/3a/54acc088992fa8e890b93e83115ec6dc019835 +0 -0
  82. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/3d/07d3b29ff53d95de3898fb786d61732f210515 +0 -0
  83. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/3d/e536f9c1fd05a23c2dec66423ed610afb0cf5f +0 -0
  84. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/3e/23850624fcf5f111d6ea88ddd64adf924cf82f +0 -0
  85. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/3e/cf23eb95123287531d708a21d4ba88d92ccabb +0 -0
  86. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/3f/78215d7e17da726fb352fd92b3c117db9b63ba +0 -0
  87. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/3f/e072cf3cb6a9f30c3e9936e3ddf622e80270d0 +0 -0
  88. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/41/ae8fa8f8baa2daee5ec0aa21ae17922ae051a0 +0 -0
  89. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/41/cbeb6bcb4c6fa9ef9be571082d95ecb4ea0ee3 +0 -0
  90. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/42/ef76e19df247993cf5b64aba5dccaf8587a375 +0 -0
  91. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/42/f03e1b66aa56bbb36a1c3a8dea9e1e727faffa +0 -0
  92. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/44/86da0f89c566c3bae8abf435d37aeca87f1632 +0 -0
  93. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/44/9dea1ca63d6f1e47d119b36576acc94822a37c +0 -0
  94. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/48/b71bba3a3f9887828863521c13901eceb54331 +0 -0
  95. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/4b/c4370a037feed828cca0915ebb0bb94b24a9d4 +0 -0
  96. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/4d/a03134f70896f72053fbdc0cd4f4c76d4ac1d8 +0 -0
  97. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/50/39b29fda67743a044993436df6a4a1db7b8888 +0 -0
  98. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/51/9e780574933d7627a083222bd10dd74f430904 +0 -0
  99. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/59/69ac8b9d6b44a601385c3ed8c710a69d05216e +0 -0
  100. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/5b/efa3b2f18d2b5d332c6de503a7054f4af0569f +0 -0
  101. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/5c/4f01d3ce9e243bbb8a693f97e5c7d13a857cb7 +0 -0
  102. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/5d/d4656bca3d7605598a799d93fcbf23a789d91a +0 -0
  103. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/61/46a371b9c1bd9f51af273f11f986cfd1bedeba +0 -0
  104. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/63/1700a51c8fa97b543991f5f61bfcd1e7e1327d +0 -0
  105. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/64/00040794955d17c9a1fe1aaaea59f2c4822177 +0 -0
  106. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/67/e48960910bc2dc300e00ee2edec8680ffc5c01 +0 -0
  107. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/69/667188acc459d1f889fad69f5e5507e2188ced +0 -0
  108. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/6c/16510b4663cd506978f49e8ec05b25862a1d3e +0 -0
  109. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/6d/7a865a23b1cb4182f67907820104ced48b11c9 +0 -0
  110. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/71/014c7a6c8c98449a26ef966485a6cf30a1974d +0 -0
  111. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/72/f047cda92abcd1ddc857f6461de605f8668331 +0 -0
  112. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/73/2e98f08bc806c331b06847fc8c743f545499e5 +0 -0
  113. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/73/5b5f6d515f0816599343f1ae7ccffc1d5a487e +0 -0
  114. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/74/5c54e85b3ea7bfc8a8f35edc907746c29f8663 +0 -0
  115. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/75/321fbcd2be44a548400fbacbf5bcb71e3810fd +0 -0
  116. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/78/4aa28d912b66e07748483efe0326c70d7541a5 +0 -0
  117. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/79/4431c1d34c60b4f3fb963823f77f33bd947cc7 +0 -0
  118. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/7a/7d28b0313a3d9d509823faaae31949af8610ef +0 -0
  119. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/7e/0907822f7d316ebe0be07e1f6918bef412c80b +0 -0
  120. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/81/d01e1c63d48b096c77aae83471d42272ca9fce +0 -0
  121. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/86/cdaec229f1fbebf43042266b03878944669f25 +0 -0
  122. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/87/d6df5217a4a374f8c1211a05f9bd657f72c9a7 +0 -0
  123. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/88/0c3d45ac59940344dfb6c45005f7e908173138 +0 -0
  124. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/89/d2439385c82b98104f27edf39bcf28a631233f +0 -0
  125. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/8b/5be2af9b16f290549193859c214cd9072212e8 +0 -0
  126. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/8d/adc1d7891c79de24ba2c7c38b4c830bf61870a +0 -0
  127. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/8f/c00bf69f4ad3e50c13acc4a0988b6c0fe72b5a +0 -0
  128. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/90/f067b86364ab243a7e3bc75f936319ba9eac88 +0 -0
  129. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/92/2448ecc557be58195468561e475b904bd1b349 +0 -0
  130. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/93/652290aac46c69b1b4dd83062b6cfe648dd643 +0 -0
  131. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/93/8f29d9b4b1ae86e39dddf9e3d115a82ddfc9b6 +0 -0
  132. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/95/28bbccd167e3f4ad583a1ae9fac98a52620e27 +0 -0
  133. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/97/b7aaa5c8b45f5471c9d39893cd4c893da6f444 +0 -0
  134. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/99/89463f57f1f2931e5973bd543c80f18b0204bc +0 -0
  135. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/9b/123713e30fc9e225f9ac8ff5b02f8f8cf86456 +0 -0
  136. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/9f/bc171dae3f6b60eaf86ed522b0adf6b123ec85 +0 -0
  137. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/a8/98d2c3947d30d8be64bd2bbcef68f956d5456b +0 -0
  138. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/a9/d5e349fa091647742b09eb3d05bac8f58fe547 +0 -0
  139. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/ab/139d2cd4798dd8e2c565b80440b1a44b376126 +0 -0
  140. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/ab/c6b15265171457b41e2cfdaf3b8c3994a59eb7 +0 -0
  141. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/ab/f77db148e3fb3b26913af14ae43130396f3269 +0 -0
  142. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/ac/9c9018c62fa30dc142665c1b5a375f4e056880 +0 -0
  143. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/ac/e2b51971b1bbade48afffc8d117c74d18f123c +0 -0
  144. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/af/80ddb5890f062e364ea8ade2d602df4e12de8c +0 -0
  145. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/b1/1173d9b68db117437ccb9551461152e1e8a77d +0 -0
  146. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/b1/ddf2869bc7d213b35dabd6fa5bfae44cd6b7a7 +0 -0
  147. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/b2/4e79ab07fe9e68781961a25ff9f1dbb1546fbb +0 -0
  148. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/b8/884c6145221ac66f84bf88919754c2cb05c12d +0 -0
  149. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/b8/eea52176ffa4d88c5a9976bee26092421565d3 +0 -0
  150. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/bb/a495d8e72b78fefcc534259b8edae9a3172d15 +0 -0
  151. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/bd/8fd1cb166996e74a8631f3a6f764a53af75297 +0 -0
  152. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/bf/068a0714e2145de83a5c004f4213b091439d0e +0 -0
  153. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/bf/32a7e6872e5dc4025ee3df3c921ec7ade0855f +0 -0
  154. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/bf/518686b06069d2a8abd3689908b7e1a6e16b05 +0 -0
  155. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/c0/793458db6e1bee7f79f1a504fb8ff4963f8ed3 +0 -0
  156. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/c0/f948ab4636a125bc202368e6c9cbe80d76169a +0 -0
  157. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/c2/443060c07101948487cfa93cc39e082e9e0f5f +0 -0
  158. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/c2/926f040b089a52edfb8351480f63619ab7e0ab +0 -0
  159. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/c2/ce19d575a8cccf6886862c4fa6afefba142511 +0 -0
  160. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/c3/cc0da3d955ecec0f865c46c030a0c073697495 +0 -0
  161. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/c6/b72ea9f8856d3bde28cb75775ebea9840535b8 +0 -0
  162. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/c9/efe79dee4c91d4bb8c3d3c6e01ff70ff79a722 +0 -0
  163. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/ce/a4ffc1cf5eab61a2a0abd8f6dc941b580b69fd +0 -0
  164. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/d0/937f7d832266337289d5ec09459f931a46fcf7 +0 -0
  165. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/d9/ec420cb55a82e7efbc8564e30ec7f4c0f6021e +0 -0
  166. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/dd/82bd16a51b9bad8241d9fb46619b1c6755cafe +0 -0
  167. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/e0/9162dbd64d85bb5ed740aa99faefa73f293d78 +0 -0
  168. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/e0/da740b542afc451c45b9b4be6c0c7a3c79b06c +0 -0
  169. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/e3/c62c141fc65ef2be0095c49b23e06263f0b734 +0 -0
  170. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/e3/da98f3722c2d0c937db0872836fc4491e4487a +0 -0
  171. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/e5/3070f2b07f45d031444b09b1b38658f3caf29e +0 -0
  172. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/e6/adbc3c373070269f97ef82d4f63027d7878f67 +0 -0
  173. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/e7/911a702079a6144997ea4e70f59abbe59ec2bc +0 -0
  174. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/e8/9cb4f5af158d26dcff5eed03dba6671a818739 +0 -0
  175. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/e9/1172752e9a421ae463112d2b0506b37498c98d +0 -0
  176. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/ea/0af89e61a882c5afc2a8c281b2d96f174bfe58 +0 -0
  177. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/eb/75e1c49f1e5b79dca17ccdbec8067756523238 +0 -0
  178. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/ee/a73c7f24094ed83b014f7cfce46e10f817bec8 +0 -0
  179. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/ef/0503a60244391590b16042019032e91d7cc30d +0 -0
  180. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/ef/3488a3c636d73d82ad138e70a92453249b7f37 +0 -0
  181. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/ef/c899c7f910cfa7a383692eee851cf5af36da8c +0 -0
  182. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/f1/2d2ef8948cbe4b24279bee282f934cf5a1b834 +0 -0
  183. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/f1/3181b12cf4d539e635bf94ad6e950d68cedaf1 +0 -0
  184. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/f1/655afa1c5636c8d58969e3194bb770aefbc552 +0 -0
  185. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/f2/14d4d56726e2928479c5948bd88e038cf70b2e +0 -0
  186. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/f4/e991088a63def67a30a2b8bbdb4d58514abab8 +0 -0
  187. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/f6/b1d54483ce20fbcb252a8a93a5eff7bec88729 +0 -0
  188. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/f8/6fbd490878cb0d3c35cc4443672d1309171bf1 +0 -0
  189. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/f8/cbb5bfd1503e66cec2c593362c60a317b6d300 +0 -0
  190. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/f9/98e1f01c2bf0a20159fc851327af05beb3ac88 +0 -0
  191. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/fa/9c9a62ec1203a5868b033ded428c2382c4e1b6 +0 -0
  192. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/fb/6c90c9ce5e0cdfbe074a3f060afc66f62eefde +0 -0
  193. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/fc/988aab7e2d46396dc595ad24345e8e77dda0e4 +0 -0
  194. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/fc/e56f1e09d09a05b9babf796fb40bece176f3a2 +0 -0
  195. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/fd/abd48f3e947a9f420003446dd118c5295346a5 +0 -0
  196. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/fe/24641c63f1d906091930cc7e77e448f025814e +0 -0
  197. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/fe/cc5d8154b1e77e4c6beb23ce9cbe8fea55d34d +0 -0
  198. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.idx +0 -0
  199. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.pack +0 -0
  200. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.rev +0 -0
  201. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/packed-refs +0 -0
  202. {pembot-0.1.7 → pembot-0.1.9}/pembot/.git/refs/remotes/origin/HEAD +0 -0
  203. {pembot-0.1.7 → pembot-0.1.9}/pembot/.gitignore +0 -0
  204. {pembot-0.1.7 → pembot-0.1.9}/pembot/AnyToText/__init__.py +0 -0
  205. {pembot-0.1.7 → pembot-0.1.9}/pembot/LICENSE +0 -0
  206. {pembot-0.1.7 → pembot-0.1.9}/pembot/TextEmbedder/__init__.py +0 -0
  207. {pembot-0.1.7 → pembot-0.1.9}/pembot/TextEmbedder/gemini_embedder.py +0 -0
  208. {pembot-0.1.7 → pembot-0.1.9}/pembot/TextEmbedder/mongodb_embedder.py +0 -0
  209. {pembot-0.1.7 → pembot-0.1.9}/pembot/TextEmbedder/mongodb_index_creator.py +0 -0
  210. {pembot-0.1.7 → pembot-0.1.9}/pembot/TextEmbedder/vector_query.py +0 -0
  211. {pembot-0.1.7 → pembot-0.1.9}/pembot/gartner.py +0 -0
  212. {pembot-0.1.7 → pembot-0.1.9}/pembot/main.py +0 -0
  213. {pembot-0.1.7 → pembot-0.1.9}/pembot/output_structure_local.py +0 -0
  214. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/COMMIT_EDITMSG +0 -0
  215. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/HEAD +0 -0
  216. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/config +0 -0
  217. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/description +0 -0
  218. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/applypatch-msg.sample +0 -0
  219. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/commit-msg.sample +0 -0
  220. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/fsmonitor-watchman.sample +0 -0
  221. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/post-update.sample +0 -0
  222. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/pre-applypatch.sample +0 -0
  223. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/pre-commit.sample +0 -0
  224. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/pre-merge-commit.sample +0 -0
  225. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/pre-push.sample +0 -0
  226. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/pre-rebase.sample +0 -0
  227. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/pre-receive.sample +0 -0
  228. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/prepare-commit-msg.sample +0 -0
  229. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/push-to-checkout.sample +0 -0
  230. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/sendemail-validate.sample +0 -0
  231. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/hooks/update.sample +0 -0
  232. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/index +0 -0
  233. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/info/exclude +0 -0
  234. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/logs/HEAD +0 -0
  235. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/logs/refs/heads/main +0 -0
  236. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/logs/refs/remotes/myorigin/main +0 -0
  237. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/logs/refs/remotes/origin/HEAD +0 -0
  238. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/14/251b198e0bac39a3dc3b42f9e57b20c01465fb +0 -0
  239. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/24/7b15a6b1e0e3d270c05af184f048736376cd4e +0 -0
  240. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/24/8f03b5f969a7fbd396b496f40b57f0ae81c148 +0 -0
  241. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/57/74dc9c3901d2ffb2cd7dafe2ad6612a7f9f42c +0 -0
  242. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/72/2dc14f82e78ce41717348b256e0c17834933b4 +0 -0
  243. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/79/eb7b93ced70e399bd561093c45de7641414dbd +0 -0
  244. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/8d/9ce1fd9733a78c592b34af9c94b98960c601ed +0 -0
  245. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/95/745843bb4377d6042180daeda818c0b16fd493 +0 -0
  246. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/a5/c6dfb577782c259990dcf977e355298e923428 +0 -0
  247. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/a7/4bcd5e67cb1066dd504b92b42390fe0b2c3d38 +0 -0
  248. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/b4/8d697aa9fd97151eb2a84a1af5d408b7630232 +0 -0
  249. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/b8/702320e56074e9680181d8b7897d6a0a552e2d +0 -0
  250. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 +0 -0
  251. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/f3/b2d76c75bbd50e04fc4c2ad17fc94ca6daed32 +0 -0
  252. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/pack/pack-d3051affdd6c31306dc53489168fc870872085d1.idx +0 -0
  253. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/pack/pack-d3051affdd6c31306dc53489168fc870872085d1.pack +0 -0
  254. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/objects/pack/pack-d3051affdd6c31306dc53489168fc870872085d1.rev +0 -0
  255. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/packed-refs +0 -0
  256. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/refs/heads/main +0 -0
  257. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/refs/remotes/myorigin/main +0 -0
  258. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/.git/refs/remotes/origin/HEAD +0 -0
  259. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/LICENSE +0 -0
  260. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/README.md +0 -0
  261. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/__init__.py +0 -0
  262. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/config/config.yaml +0 -0
  263. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/pyrightconfig.json +0 -0
  264. {pembot-0.1.7 → pembot-0.1.9}/pembot/pdf2markdown/requirements.txt +0 -0
  265. {pembot-0.1.7 → pembot-0.1.9}/pembot/pem.py +0 -0
  266. {pembot-0.1.7 → pembot-0.1.9}/pembot/pyrightconfig.json +0 -0
  267. {pembot-0.1.7 → pembot-0.1.9}/pembot/query.py +0 -0
  268. {pembot-0.1.7 → pembot-0.1.9}/pembot/search.py +0 -0
  269. {pembot-0.1.7 → pembot-0.1.9}/pembot/utils/__init__.py +0 -0
  270. {pembot-0.1.7 → pembot-0.1.9}/pembot/utils/inference_client.py +0 -0
  271. {pembot-0.1.7 → pembot-0.1.9}/pembot/utils/string_tools.py +0 -0
  272. {pembot-0.1.7 → pembot-0.1.9}/pyproject.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pembot
3
- Version: 0.1.7
3
+ Version: 0.1.9
4
4
  Summary: A Python Package to convert PEM blog content to usseful information by leveraging LLMs
5
5
  Author-email: cyto <aryan_sidhwani@protonmail.com>
6
6
  License-Expression: MIT
@@ -0,0 +1 @@
1
+ added DeepSeek-OCR as a model option to use spaces
@@ -19,3 +19,5 @@ c3cc0da3d955ecec0f865c46c030a0c073697495 e6adbc3c373070269f97ef82d4f63027d7878f6
19
19
  e6adbc3c373070269f97ef82d4f63027d7878f67 81d01e1c63d48b096c77aae83471d42272ca9fce cyto <silverstone965@gmail.com> 1758890262 +0530 commit: cyto/fixed a file already open error and wrote to file if myfile is excel type and output_dir is given
20
20
  81d01e1c63d48b096c77aae83471d42272ca9fce e89cb4f5af158d26dcff5eed03dba6671a818739 cyto <silverstone965@gmail.com> 1758891589 +0530 commit: cyto/made the excel convertor compatible with suffix input from both system file and with bytes + manual suffix input
21
21
  e89cb4f5af158d26dcff5eed03dba6671a818739 e3c62c141fc65ef2be0095c49b23e06263f0b734 cyto <silverstone965@gmail.com> 1758892053 +0530 commit: silly willy mistake
22
+ e3c62c141fc65ef2be0095c49b23e06263f0b734 d440b20aae1265dabbd3ddaafb24c35e40e3ab3c cyto <silverstone965@gmail.com> 1759127664 +0530 commit: cyto/fixed the excel file input bug, in the conversion method; added exceptions where there is invalid input
23
+ d440b20aae1265dabbd3ddaafb24c35e40e3ab3c 8d58ec13be39949ecfe7211b42c56acd2a83dc72 cyto <silverstone965@gmail.com> 1761392079 +0530 commit: added DeepSeek-OCR as a model option to use spaces
@@ -19,3 +19,5 @@ c3cc0da3d955ecec0f865c46c030a0c073697495 e6adbc3c373070269f97ef82d4f63027d7878f6
19
19
  e6adbc3c373070269f97ef82d4f63027d7878f67 81d01e1c63d48b096c77aae83471d42272ca9fce cyto <silverstone965@gmail.com> 1758890262 +0530 commit: cyto/fixed a file already open error and wrote to file if myfile is excel type and output_dir is given
20
20
  81d01e1c63d48b096c77aae83471d42272ca9fce e89cb4f5af158d26dcff5eed03dba6671a818739 cyto <silverstone965@gmail.com> 1758891589 +0530 commit: cyto/made the excel convertor compatible with suffix input from both system file and with bytes + manual suffix input
21
21
  e89cb4f5af158d26dcff5eed03dba6671a818739 e3c62c141fc65ef2be0095c49b23e06263f0b734 cyto <silverstone965@gmail.com> 1758892053 +0530 commit: silly willy mistake
22
+ e3c62c141fc65ef2be0095c49b23e06263f0b734 d440b20aae1265dabbd3ddaafb24c35e40e3ab3c cyto <silverstone965@gmail.com> 1759127664 +0530 commit: cyto/fixed the excel file input bug, in the conversion method; added exceptions where there is invalid input
23
+ d440b20aae1265dabbd3ddaafb24c35e40e3ab3c 8d58ec13be39949ecfe7211b42c56acd2a83dc72 cyto <silverstone965@gmail.com> 1761392079 +0530 commit: added DeepSeek-OCR as a model option to use spaces
@@ -18,3 +18,5 @@ c3cc0da3d955ecec0f865c46c030a0c073697495 e6adbc3c373070269f97ef82d4f63027d7878f6
18
18
  e6adbc3c373070269f97ef82d4f63027d7878f67 81d01e1c63d48b096c77aae83471d42272ca9fce cyto <silverstone965@gmail.com> 1758890281 +0530 update by push
19
19
  81d01e1c63d48b096c77aae83471d42272ca9fce e89cb4f5af158d26dcff5eed03dba6671a818739 cyto <silverstone965@gmail.com> 1758891600 +0530 update by push
20
20
  e89cb4f5af158d26dcff5eed03dba6671a818739 e3c62c141fc65ef2be0095c49b23e06263f0b734 cyto <silverstone965@gmail.com> 1758892065 +0530 update by push
21
+ e3c62c141fc65ef2be0095c49b23e06263f0b734 d440b20aae1265dabbd3ddaafb24c35e40e3ab3c cyto <silverstone965@gmail.com> 1759127680 +0530 update by push
22
+ d440b20aae1265dabbd3ddaafb24c35e40e3ab3c 8d58ec13be39949ecfe7211b42c56acd2a83dc72 cyto <silverstone965@gmail.com> 1761392127 +0530 update by push
@@ -0,0 +1 @@
1
+ x��Mj�0@�u�ٗ�ѯc(��� � F3��Բ��z�� �~��q-e���}�*�L�<%o��!���O�40KD<{�f�]�f�Dj]�B9�!�� �TOٳ�[����Wxm��{�u�1���B��µ������a�g�͡�`�RC"*�}����>.@
@@ -0,0 +1 @@
1
+ 8d58ec13be39949ecfe7211b42c56acd2a83dc72
@@ -0,0 +1 @@
1
+ 8d58ec13be39949ecfe7211b42c56acd2a83dc72
@@ -5,7 +5,6 @@ from pembot.pdf2markdown.extract import MarkdownPDFExtractor
5
5
  import os
6
6
  import pandas as pd
7
7
  from typing import Literal, Union
8
- import tempfile
9
8
  from datetime import datetime, date
10
9
  from tabulate import tabulate
11
10
 
@@ -36,6 +35,10 @@ class Convertor():
36
35
  self.input_filepath= None
37
36
  self.output= ""
38
37
  self.suffix= suffix
38
+ self.file_bytes= file_bytes
39
+
40
+ if (file_bytes and not suffix) or (file_bytes and not file_type):
41
+ raise Exception("wrong use of convertor library, have to give suffix and file_type along with the file_bytes")
39
42
 
40
43
 
41
44
  if model_name is None:
@@ -44,7 +47,7 @@ class Convertor():
44
47
 
45
48
  # file_type can be pdf, excel, etc.
46
49
  if file_bytes and suffix:
47
- with tempfile.TemporaryDirectory() as dp:
50
+ with TemporaryDirectory() as dp:
48
51
  output_dir = Path(dp)
49
52
  myfile = output_dir / f"input{suffix}"
50
53
  myfile.write_bytes(file_bytes)
@@ -102,42 +105,51 @@ class Convertor():
102
105
  markdown_output = []
103
106
 
104
107
  file_suffix= ''
105
- try:
106
- if not input_filepath.exists():
107
- file_suffix= self.suffix
108
- else:
109
- file_suffix = input_filepath.suffix.lower()
110
-
111
- current_engine: PandasReadEngineType = excel_ods_engine
112
108
 
113
- if file_suffix in ['.xls', '.xlsx', '.ods']:
114
- if file_suffix == '.ods':
115
- if current_engine is None:
116
- current_engine = 'odf'
117
- elif current_engine != 'odf':
118
- print(f"Warning: Specified engine '{current_engine}' may not be optimal for ODS. Forcing 'odf'.")
119
- current_engine = 'odf'
120
-
121
- excel_file = pd.ExcelFile(input_filepath, engine=current_engine)
122
- if not excel_file.sheet_names:
123
- return f"Warning: File '{input_filepath.name}' contains no sheets."
124
-
125
- for sheet_name in excel_file.sheet_names:
126
- df = excel_file.parse(sheet_name)
127
- markdown_output.append(f"## {sheet_name}\n")
109
+ try:
110
+ with TemporaryDirectory() as dp:
111
+ dp_path= Path(dp)
112
+
113
+ if input_filepath is None:
114
+ file_suffix= self.suffix
115
+ if file_suffix and self.file_bytes:
116
+ input_filepath= dp_path / ("my_excel_file" + file_suffix)
117
+ input_filepath.write_bytes(self.file_bytes)
118
+ else:
119
+ raise Exception("no input file name, and no file bytes either")
120
+ elif input_filepath.exists():
121
+ file_suffix = input_filepath.suffix.lower()
122
+
123
+ current_engine: PandasReadEngineType = excel_ods_engine
124
+
125
+ if file_suffix in ['.xls', '.xlsx', '.ods']:
126
+ if file_suffix == '.ods':
127
+ if current_engine is None:
128
+ current_engine = 'odf'
129
+ elif current_engine != 'odf':
130
+ print(f"Warning: Specified engine '{current_engine}' may not be optimal for ODS. Forcing 'odf'.")
131
+ current_engine = 'odf'
132
+
133
+ excel_file = pd.ExcelFile(input_filepath, engine=current_engine)
134
+ if not excel_file.sheet_names:
135
+ return f"Warning: File '{input_filepath.name}' contains no sheets."
136
+
137
+ for sheet_name in excel_file.sheet_names:
138
+ df = excel_file.parse(sheet_name)
139
+ markdown_output.append(f"## {sheet_name}\n")
140
+ markdown_table = tabulate(df, headers='keys', tablefmt='pipe')
141
+ markdown_output.append(markdown_table)
142
+ markdown_output.append("\n")
143
+
144
+ return "\n".join(markdown_output)
145
+
146
+ elif file_suffix == '.csv':
147
+ df = pd.read_csv(input_filepath)
128
148
  markdown_table = tabulate(df, headers='keys', tablefmt='pipe')
129
- markdown_output.append(markdown_table)
130
- markdown_output.append("\n")
149
+ return markdown_table
131
150
 
132
- return "\n".join(markdown_output)
133
-
134
- elif file_suffix == '.csv':
135
- df = pd.read_csv(input_filepath)
136
- markdown_table = tabulate(df, headers='keys', tablefmt='pipe')
137
- return markdown_table
138
-
139
- else:
140
- return f"Error: Unsupported file type: '{file_suffix}'. Please provide a CSV, XLS, XLSX, or ODS file."
151
+ else:
152
+ return f"Error: Unsupported file type: '{file_suffix}'. Please provide a CSV, XLS, XLSX, or ODS file."
141
153
 
142
154
  except ImportError as ie:
143
155
  if 'odfpy' in str(ie).lower() and file_suffix == '.ods':
@@ -183,10 +195,10 @@ if __name__ == '__main__':
183
195
  # conv= Convertor(file_bytes= imgpdf.read(), suffix= ".pdf", file_type= "pdf")
184
196
  # print(conv.output)
185
197
 
186
- # print("Test 2: JD pdf, bytes")
187
- # with open("/home/cyto/dev/pembotdir/jds/PM Trainee.pdf", "rb") as imgpdf:
188
- # conv= Convertor(file_bytes= imgpdf.read(), suffix= ".pdf", file_type= "pdf")
189
- # print(conv.output)
198
+ print("Test 2: balance sheet, bytes")
199
+ with open("/home/cyto/Downloads/balance_sheet_2023-24_final.xlsx", "rb") as xl:
200
+ conv= Convertor(file_bytes= xl.read(), suffix= ".xlsx", file_type= "excel")
201
+ print(conv.output)
190
202
 
191
203
  print("Test 3: excel schedule, bytes")
192
204
  with open("/home/cyto/Downloads/Assignment schedule.xlsx", "rb") as imgpdf:
@@ -194,13 +206,13 @@ if __name__ == '__main__':
194
206
  print(conv.output)
195
207
 
196
208
  # without bytes example:
197
- print("Test 4: scanned pdf, path")
198
- conv= Convertor(myfile= Path('/home/cyto/Documents/scanned.pdf'), output_dir= Path('/home/cyto/Documents'))
199
- print(conv.output)
200
-
201
- print("Test 5: schedule excel, path")
202
- conv= Convertor(myfile= Path('/home/cyto/Downloads/Assignment schedule.xlsx'), output_dir= Path('/home/cyto/Downloads'))
203
- print(conv.output)
209
+ # print("Test 4: scanned pdf, path")
210
+ # conv= Convertor(myfile= Path('/home/cyto/Documents/scanned.pdf'), output_dir= Path('/home/cyto/Documents'))
211
+ # print(conv.output)
212
+ #
213
+ # print("Test 5: schedule excel, path")
214
+ # conv= Convertor(myfile= Path('/home/cyto/Downloads/Assignment schedule.xlsx'), output_dir= Path('/home/cyto/Downloads'))
215
+ # print(conv.output)
204
216
  except FileNotFoundError as fe:
205
217
  print("file not found, modify the driver code to get sample files to test:\n\n", fe)
206
218
  except Exception as e:
@@ -1,6 +1,6 @@
1
1
  """
2
2
  A Python Package to convert PEM blog content to usseful information by leveraging LLMs
3
3
  """
4
- __version__ = '0.1.7'
4
+ __version__ = '0.1.9'
5
5
  from .main import save_to_json_file, make_query
6
6
  __all__ = ["save_to_json_file", "make_query"]
@@ -2,4 +2,4 @@ OUTPUT_DIR: /home/cyto/dev/pembotdir
2
2
  PAGE_DELIMITER: ___________________________ NEXT PAGE ___________________________
3
3
  app:
4
4
  name: pembot
5
- version: 0.1.7
5
+ version: 0.1.9
@@ -88,6 +88,9 @@ class MarkdownPDFExtractor(PDFExtractor):
88
88
 
89
89
  # zerogpu public
90
90
  self.nclient= Client("deepak-mehta/ocr-simplify", hf_token= os.getenv('HF_TOKEN', ''))
91
+ elif "DeepSeek-OCR" in self.MODEL_NAME:
92
+ # zerogpu private
93
+ self.dclient= Client("deepak-mehta/deepseek-ocr", hf_token= os.getenv('HF_TOKEN', ''))
91
94
 
92
95
 
93
96
  self.markdown_content= ""
@@ -150,7 +153,7 @@ class MarkdownPDFExtractor(PDFExtractor):
150
153
  )
151
154
  # print("response :", response)
152
155
  return response.text
153
- elif 'nanonet' in model_name:
156
+ elif 'nanonet' in model_name or 'DeepSeek-OCR' in model_name:
154
157
 
155
158
  result= ""
156
159
  try:
@@ -159,24 +162,33 @@ class MarkdownPDFExtractor(PDFExtractor):
159
162
  print("file name: ", temp_file.name)
160
163
  gr_image= handle_file(temp_file.name)
161
164
  print("gr image : ", gr_image)
162
- result = self.nclient.predict(
163
- # model_name="Nanonets-OCR-s",
164
- # text= prompt,
165
- gr_image,
166
- # max_new_tokens=max_new_tokens,
167
- # temperature=0.6,
168
- # top_p=0.9,
169
- # top_k=50,
170
- # repetition_penalty=1.2,
171
-
172
- # prithiv model
173
- # api_name="/generate_image"
174
-
175
- max_new_tokens,
176
-
177
- # spaces zerogpu
178
- api_name="/predict"
179
- )
165
+ if 'nanonet' in model_name:
166
+ result = self.nclient.predict(
167
+ # model_name="Nanonets-OCR-s",
168
+ # text= prompt,
169
+ gr_image,
170
+ # max_new_tokens=max_new_tokens,
171
+ # temperature=0.6,
172
+ # top_p=0.9,
173
+ # top_k=50,
174
+ # repetition_penalty=1.2,
175
+
176
+ # prithiv model
177
+ # api_name="/generate_image"
178
+
179
+ max_new_tokens,
180
+
181
+ # spaces zerogpu
182
+ api_name="/predict"
183
+ )
184
+ else:
185
+ result = self.dclient.predict(
186
+ file_input=gr_image,
187
+ prompt_type= 'markdown',
188
+
189
+ # spaces zerogpu
190
+ api_name="/predict"
191
+ )
180
192
  print("ocr'd: ", result[:100] + "...")
181
193
  except Exception as e:
182
194
  print("Error during nanonet inference", e)
@@ -45,7 +45,7 @@ pandas==2.3.0
45
45
  pathlib==1.0.1
46
46
  pdfminer.six==20250506
47
47
  pdfplumber==0.11.7
48
- pembot==0.1.7
48
+ pembot==0.1.9
49
49
  pillow==11.3.0
50
50
  primp==0.15.0
51
51
  pyasn1==0.6.1
@@ -1 +0,0 @@
1
- silly willy mistake
@@ -1 +0,0 @@
1
- e3c62c141fc65ef2be0095c49b23e06263f0b734
@@ -1 +0,0 @@
1
- e3c62c141fc65ef2be0095c49b23e06263f0b734
File without changes
File without changes
File without changes
File without changes
File without changes