pembot 0.0.6__tar.gz → 0.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pembot might be problematic. Click here for more details.

Files changed (178) hide show
  1. {pembot-0.0.6 → pembot-0.0.7}/PKG-INFO +1 -1
  2. pembot-0.0.7/pembot/.git/COMMIT_EDITMSG +1 -0
  3. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/index +0 -0
  4. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/logs/HEAD +2 -0
  5. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/logs/refs/heads/main +2 -0
  6. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/logs/refs/remotes/origin/main +2 -0
  7. pembot-0.0.7/pembot/.git/objects/0c/ab66ffbaf50ef60dd41f3498595ebd2526b33c +0 -0
  8. pembot-0.0.7/pembot/.git/objects/41/ae8fa8f8baa2daee5ec0aa21ae17922ae051a0 +0 -0
  9. pembot-0.0.7/pembot/.git/objects/50/39b29fda67743a044993436df6a4a1db7b8888 +0 -0
  10. pembot-0.0.7/pembot/.git/objects/7a/7d28b0313a3d9d509823faaae31949af8610ef +0 -0
  11. pembot-0.0.7/pembot/.git/objects/7e/0907822f7d316ebe0be07e1f6918bef412c80b +0 -0
  12. pembot-0.0.7/pembot/.git/objects/af/80ddb5890f062e364ea8ade2d602df4e12de8c +0 -0
  13. pembot-0.0.7/pembot/.git/objects/b8/884c6145221ac66f84bf88919754c2cb05c12d +0 -0
  14. pembot-0.0.7/pembot/.git/objects/ee/a73c7f24094ed83b014f7cfce46e10f817bec8 +0 -0
  15. pembot-0.0.7/pembot/.git/objects/ef/0503a60244391590b16042019032e91d7cc30d +3 -0
  16. pembot-0.0.7/pembot/.git/objects/f6/b1d54483ce20fbcb252a8a93a5eff7bec88729 +0 -0
  17. pembot-0.0.7/pembot/.git/objects/f8/6fbd490878cb0d3c35cc4443672d1309171bf1 +0 -0
  18. pembot-0.0.7/pembot/.git/refs/heads/main +1 -0
  19. pembot-0.0.7/pembot/.git/refs/remotes/origin/main +1 -0
  20. {pembot-0.0.6 → pembot-0.0.7}/pembot/AnyToText/convertor.py +5 -3
  21. {pembot-0.0.6 → pembot-0.0.7}/pembot/__init__.py +1 -1
  22. {pembot-0.0.6 → pembot-0.0.7}/pembot/config/config.yaml +1 -1
  23. pembot-0.0.7/pembot/pdf2markdown/.git/COMMIT_EDITMSG +1 -0
  24. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/index +0 -0
  25. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/logs/HEAD +1 -0
  26. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/logs/refs/heads/main +1 -0
  27. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/logs/refs/remotes/myorigin/main +1 -0
  28. pembot-0.0.7/pembot/pdf2markdown/.git/objects/24/7b15a6b1e0e3d270c05af184f048736376cd4e +0 -0
  29. pembot-0.0.7/pembot/pdf2markdown/.git/objects/a7/4bcd5e67cb1066dd504b92b42390fe0b2c3d38 +0 -0
  30. pembot-0.0.7/pembot/pdf2markdown/.git/objects/f3/b2d76c75bbd50e04fc4c2ad17fc94ca6daed32 +1 -0
  31. pembot-0.0.7/pembot/pdf2markdown/.git/refs/heads/main +1 -0
  32. pembot-0.0.7/pembot/pdf2markdown/.git/refs/remotes/myorigin/main +1 -0
  33. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/extract.py +26 -1
  34. pembot-0.0.6/pembot/.git/COMMIT_EDITMSG +0 -1
  35. pembot-0.0.6/pembot/.git/refs/heads/main +0 -1
  36. pembot-0.0.6/pembot/.git/refs/remotes/origin/main +0 -1
  37. pembot-0.0.6/pembot/pdf2markdown/.git/COMMIT_EDITMSG +0 -1
  38. pembot-0.0.6/pembot/pdf2markdown/.git/refs/heads/main +0 -1
  39. pembot-0.0.6/pembot/pdf2markdown/.git/refs/remotes/myorigin/main +0 -1
  40. {pembot-0.0.6 → pembot-0.0.7}/LICENSE +0 -0
  41. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/HEAD +0 -0
  42. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/config +0 -0
  43. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/description +0 -0
  44. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/hooks/applypatch-msg.sample +0 -0
  45. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/hooks/commit-msg.sample +0 -0
  46. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/hooks/fsmonitor-watchman.sample +0 -0
  47. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/hooks/post-update.sample +0 -0
  48. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/hooks/pre-applypatch.sample +0 -0
  49. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/hooks/pre-commit.sample +0 -0
  50. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/hooks/pre-merge-commit.sample +0 -0
  51. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/hooks/pre-push.sample +0 -0
  52. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/hooks/pre-rebase.sample +0 -0
  53. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/hooks/pre-receive.sample +0 -0
  54. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/hooks/prepare-commit-msg.sample +0 -0
  55. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/hooks/push-to-checkout.sample +0 -0
  56. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/hooks/sendemail-validate.sample +0 -0
  57. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/hooks/update.sample +0 -0
  58. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/info/exclude +0 -0
  59. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/logs/refs/remotes/origin/HEAD +0 -0
  60. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/0a/fb3a98cdc55b1434b44534ec2bf22c56cfa26c +0 -0
  61. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/0b/db4169fc0f312b8698f1df17a258fff163aeaa +0 -0
  62. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/0c/8d9b2690545bf1906b05cd9f18b783b3eb74f1 +0 -0
  63. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/18/28e18ab80aa64d334b26428708140e280cbc63 +0 -0
  64. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/19/f61df7dbd562d04f561288677bbf2f18f5dff7 +0 -0
  65. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/1f/83a471c8119f7794d98c049170a5d7d07a4b71 +0 -0
  66. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/28/db0ab48059acccd7d257aa02e52e9b6b83a4a5 +0 -0
  67. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/35/97e518a8658280be9f377f78edf1dfa1f23814 +0 -0
  68. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/3d/07d3b29ff53d95de3898fb786d61732f210515 +0 -0
  69. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/3e/23850624fcf5f111d6ea88ddd64adf924cf82f +0 -0
  70. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/3e/cf23eb95123287531d708a21d4ba88d92ccabb +0 -0
  71. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/3f/78215d7e17da726fb352fd92b3c117db9b63ba +0 -0
  72. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/3f/e072cf3cb6a9f30c3e9936e3ddf622e80270d0 +0 -0
  73. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/41/cbeb6bcb4c6fa9ef9be571082d95ecb4ea0ee3 +0 -0
  74. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/4d/a03134f70896f72053fbdc0cd4f4c76d4ac1d8 +0 -0
  75. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/51/9e780574933d7627a083222bd10dd74f430904 +0 -0
  76. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/61/46a371b9c1bd9f51af273f11f986cfd1bedeba +0 -0
  77. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/63/1700a51c8fa97b543991f5f61bfcd1e7e1327d +0 -0
  78. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/64/00040794955d17c9a1fe1aaaea59f2c4822177 +0 -0
  79. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/6d/7a865a23b1cb4182f67907820104ced48b11c9 +0 -0
  80. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/72/f047cda92abcd1ddc857f6461de605f8668331 +0 -0
  81. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/73/2e98f08bc806c331b06847fc8c743f545499e5 +0 -0
  82. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/86/cdaec229f1fbebf43042266b03878944669f25 +0 -0
  83. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/87/d6df5217a4a374f8c1211a05f9bd657f72c9a7 +0 -0
  84. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/8b/5be2af9b16f290549193859c214cd9072212e8 +0 -0
  85. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/93/8f29d9b4b1ae86e39dddf9e3d115a82ddfc9b6 +0 -0
  86. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/95/28bbccd167e3f4ad583a1ae9fac98a52620e27 +0 -0
  87. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/9b/123713e30fc9e225f9ac8ff5b02f8f8cf86456 +0 -0
  88. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/ab/139d2cd4798dd8e2c565b80440b1a44b376126 +0 -0
  89. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/ab/c6b15265171457b41e2cfdaf3b8c3994a59eb7 +0 -0
  90. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/ac/9c9018c62fa30dc142665c1b5a375f4e056880 +0 -0
  91. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/b1/1173d9b68db117437ccb9551461152e1e8a77d +0 -0
  92. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/b2/4e79ab07fe9e68781961a25ff9f1dbb1546fbb +0 -0
  93. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/b8/eea52176ffa4d88c5a9976bee26092421565d3 +0 -0
  94. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/bd/8fd1cb166996e74a8631f3a6f764a53af75297 +0 -0
  95. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/bf/068a0714e2145de83a5c004f4213b091439d0e +0 -0
  96. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/bf/32a7e6872e5dc4025ee3df3c921ec7ade0855f +0 -0
  97. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/bf/518686b06069d2a8abd3689908b7e1a6e16b05 +0 -0
  98. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/c0/793458db6e1bee7f79f1a504fb8ff4963f8ed3 +0 -0
  99. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/c2/443060c07101948487cfa93cc39e082e9e0f5f +0 -0
  100. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/d0/937f7d832266337289d5ec09459f931a46fcf7 +0 -0
  101. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/e0/9162dbd64d85bb5ed740aa99faefa73f293d78 +0 -0
  102. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/e5/3070f2b07f45d031444b09b1b38658f3caf29e +0 -0
  103. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/e7/911a702079a6144997ea4e70f59abbe59ec2bc +0 -0
  104. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/e9/1172752e9a421ae463112d2b0506b37498c98d +0 -0
  105. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/ea/0af89e61a882c5afc2a8c281b2d96f174bfe58 +0 -0
  106. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/eb/75e1c49f1e5b79dca17ccdbec8067756523238 +0 -0
  107. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/f1/655afa1c5636c8d58969e3194bb770aefbc552 +0 -0
  108. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/f4/e991088a63def67a30a2b8bbdb4d58514abab8 +0 -0
  109. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/f8/cbb5bfd1503e66cec2c593362c60a317b6d300 +0 -0
  110. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/f9/98e1f01c2bf0a20159fc851327af05beb3ac88 +0 -0
  111. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/fa/9c9a62ec1203a5868b033ded428c2382c4e1b6 +0 -0
  112. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/fb/6c90c9ce5e0cdfbe074a3f060afc66f62eefde +0 -0
  113. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/fc/988aab7e2d46396dc595ad24345e8e77dda0e4 +0 -0
  114. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/fc/e56f1e09d09a05b9babf796fb40bece176f3a2 +0 -0
  115. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.idx +0 -0
  116. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.pack +0 -0
  117. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.rev +0 -0
  118. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/packed-refs +0 -0
  119. {pembot-0.0.6 → pembot-0.0.7}/pembot/.git/refs/remotes/origin/HEAD +0 -0
  120. {pembot-0.0.6 → pembot-0.0.7}/pembot/.gitignore +0 -0
  121. {pembot-0.0.6 → pembot-0.0.7}/pembot/AnyToText/__init__.py +0 -0
  122. {pembot-0.0.6 → pembot-0.0.7}/pembot/LICENSE +0 -0
  123. {pembot-0.0.6 → pembot-0.0.7}/pembot/TextEmbedder/__init__.py +0 -0
  124. {pembot-0.0.6 → pembot-0.0.7}/pembot/TextEmbedder/gemini_embedder.py +0 -0
  125. {pembot-0.0.6 → pembot-0.0.7}/pembot/TextEmbedder/mongodb_embedder.py +0 -0
  126. {pembot-0.0.6 → pembot-0.0.7}/pembot/TextEmbedder/mongodb_index_creator.py +0 -0
  127. {pembot-0.0.6 → pembot-0.0.7}/pembot/TextEmbedder/vector_query.py +0 -0
  128. {pembot-0.0.6 → pembot-0.0.7}/pembot/gartner.py +0 -0
  129. {pembot-0.0.6 → pembot-0.0.7}/pembot/main.py +0 -0
  130. {pembot-0.0.6 → pembot-0.0.7}/pembot/output_structure_local.py +0 -0
  131. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/HEAD +0 -0
  132. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/config +0 -0
  133. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/description +0 -0
  134. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/hooks/applypatch-msg.sample +0 -0
  135. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/hooks/commit-msg.sample +0 -0
  136. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/hooks/fsmonitor-watchman.sample +0 -0
  137. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/hooks/post-update.sample +0 -0
  138. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/hooks/pre-applypatch.sample +0 -0
  139. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/hooks/pre-commit.sample +0 -0
  140. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/hooks/pre-merge-commit.sample +0 -0
  141. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/hooks/pre-push.sample +0 -0
  142. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/hooks/pre-rebase.sample +0 -0
  143. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/hooks/pre-receive.sample +0 -0
  144. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/hooks/prepare-commit-msg.sample +0 -0
  145. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/hooks/push-to-checkout.sample +0 -0
  146. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/hooks/sendemail-validate.sample +0 -0
  147. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/hooks/update.sample +0 -0
  148. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/info/exclude +0 -0
  149. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/logs/refs/remotes/origin/HEAD +0 -0
  150. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/objects/14/251b198e0bac39a3dc3b42f9e57b20c01465fb +0 -0
  151. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/objects/24/8f03b5f969a7fbd396b496f40b57f0ae81c148 +0 -0
  152. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/objects/57/74dc9c3901d2ffb2cd7dafe2ad6612a7f9f42c +0 -0
  153. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/objects/72/2dc14f82e78ce41717348b256e0c17834933b4 +0 -0
  154. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/objects/79/eb7b93ced70e399bd561093c45de7641414dbd +0 -0
  155. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/objects/8d/9ce1fd9733a78c592b34af9c94b98960c601ed +0 -0
  156. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/objects/95/745843bb4377d6042180daeda818c0b16fd493 +0 -0
  157. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/objects/a5/c6dfb577782c259990dcf977e355298e923428 +0 -0
  158. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/objects/b4/8d697aa9fd97151eb2a84a1af5d408b7630232 +0 -0
  159. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/objects/b8/702320e56074e9680181d8b7897d6a0a552e2d +0 -0
  160. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 +0 -0
  161. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/objects/pack/pack-d3051affdd6c31306dc53489168fc870872085d1.idx +0 -0
  162. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/objects/pack/pack-d3051affdd6c31306dc53489168fc870872085d1.pack +0 -0
  163. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/objects/pack/pack-d3051affdd6c31306dc53489168fc870872085d1.rev +0 -0
  164. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/packed-refs +0 -0
  165. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/.git/refs/remotes/origin/HEAD +0 -0
  166. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/LICENSE +0 -0
  167. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/README.md +0 -0
  168. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/__init__.py +0 -0
  169. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/config/config.yaml +0 -0
  170. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/pyrightconfig.json +0 -0
  171. {pembot-0.0.6 → pembot-0.0.7}/pembot/pdf2markdown/requirements.txt +0 -0
  172. {pembot-0.0.6 → pembot-0.0.7}/pembot/pem.py +0 -0
  173. {pembot-0.0.6 → pembot-0.0.7}/pembot/query.py +0 -0
  174. {pembot-0.0.6 → pembot-0.0.7}/pembot/requirements.txt +0 -0
  175. {pembot-0.0.6 → pembot-0.0.7}/pembot/utils/__init__.py +0 -0
  176. {pembot-0.0.6 → pembot-0.0.7}/pembot/utils/inference_client.py +0 -0
  177. {pembot-0.0.6 → pembot-0.0.7}/pembot/utils/string_tools.py +0 -0
  178. {pembot-0.0.6 → pembot-0.0.7}/pyproject.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pembot
3
- Version: 0.0.6
3
+ Version: 0.0.7
4
4
  Summary: A Python Package to convert PEM blog content to usseful information by leveraging LLMs
5
5
  Author-email: cyto <aryan_sidhwani@protonmail.com>
6
6
  License-Expression: MIT
@@ -0,0 +1 @@
1
+ added model name to convertor
@@ -6,3 +6,5 @@ e91172752e9a421ae463112d2b0506b37498c98d 0c8d9b2690545bf1906b05cd9f18b783b3eb74f
6
6
  0c8d9b2690545bf1906b05cd9f18b783b3eb74f1 eb75e1c49f1e5b79dca17ccdbec8067756523238 cyto <silverstone965@gmail.com> 1750856653 +0530 commit: made arrangements for the cases when custom file bytes are to be processed to text output; handled a ollama running / crashing error
7
7
  eb75e1c49f1e5b79dca17ccdbec8067756523238 0bdb4169fc0f312b8698f1df17a258fff163aeaa cyto <silverstone965@gmail.com> 1750937276 +0530 commit: fixed the output_dir bug; fixed the excel to json function; ran some tests on convertor; incremented the version on the package; removed dependency on schema / structure, and shifted required fields to a pickle file path in the cli args;
8
8
  0bdb4169fc0f312b8698f1df17a258fff163aeaa 9528bbccd167e3f4ad583a1ae9fac98a52620e27 cyto <silverstone965@gmail.com> 1750947488 +0530 commit: handled local llm nonexistent error properly for choice of just passing None as llm_client;
9
+ 9528bbccd167e3f4ad583a1ae9fac98a52620e27 ef0503a60244391590b16042019032e91d7cc30d cyto <silverstone965@gmail.com> 1751872559 +0530 commit: added a model_name_parameter to change models quicky
10
+ ef0503a60244391590b16042019032e91d7cc30d af80ddb5890f062e364ea8ade2d602df4e12de8c cyto <silverstone965@gmail.com> 1751896700 +0530 commit: added model name to convertor
@@ -6,3 +6,5 @@ e91172752e9a421ae463112d2b0506b37498c98d 0c8d9b2690545bf1906b05cd9f18b783b3eb74f
6
6
  0c8d9b2690545bf1906b05cd9f18b783b3eb74f1 eb75e1c49f1e5b79dca17ccdbec8067756523238 cyto <silverstone965@gmail.com> 1750856653 +0530 commit: made arrangements for the cases when custom file bytes are to be processed to text output; handled a ollama running / crashing error
7
7
  eb75e1c49f1e5b79dca17ccdbec8067756523238 0bdb4169fc0f312b8698f1df17a258fff163aeaa cyto <silverstone965@gmail.com> 1750937276 +0530 commit: fixed the output_dir bug; fixed the excel to json function; ran some tests on convertor; incremented the version on the package; removed dependency on schema / structure, and shifted required fields to a pickle file path in the cli args;
8
8
  0bdb4169fc0f312b8698f1df17a258fff163aeaa 9528bbccd167e3f4ad583a1ae9fac98a52620e27 cyto <silverstone965@gmail.com> 1750947488 +0530 commit: handled local llm nonexistent error properly for choice of just passing None as llm_client;
9
+ 9528bbccd167e3f4ad583a1ae9fac98a52620e27 ef0503a60244391590b16042019032e91d7cc30d cyto <silverstone965@gmail.com> 1751872559 +0530 commit: added a model_name_parameter to change models quicky
10
+ ef0503a60244391590b16042019032e91d7cc30d af80ddb5890f062e364ea8ade2d602df4e12de8c cyto <silverstone965@gmail.com> 1751896700 +0530 commit: added model name to convertor
@@ -5,3 +5,5 @@ e91172752e9a421ae463112d2b0506b37498c98d 0c8d9b2690545bf1906b05cd9f18b783b3eb74f
5
5
  0c8d9b2690545bf1906b05cd9f18b783b3eb74f1 eb75e1c49f1e5b79dca17ccdbec8067756523238 cyto <silverstone965@gmail.com> 1750856672 +0530 update by push
6
6
  eb75e1c49f1e5b79dca17ccdbec8067756523238 0bdb4169fc0f312b8698f1df17a258fff163aeaa cyto <silverstone965@gmail.com> 1750937389 +0530 update by push
7
7
  0bdb4169fc0f312b8698f1df17a258fff163aeaa 9528bbccd167e3f4ad583a1ae9fac98a52620e27 cyto <silverstone965@gmail.com> 1750947502 +0530 update by push
8
+ 9528bbccd167e3f4ad583a1ae9fac98a52620e27 ef0503a60244391590b16042019032e91d7cc30d cyto <silverstone965@gmail.com> 1751872581 +0530 update by push
9
+ ef0503a60244391590b16042019032e91d7cc30d af80ddb5890f062e364ea8ade2d602df4e12de8c cyto <silverstone965@gmail.com> 1751896713 +0530 update by push
@@ -0,0 +1,3 @@
1
+ x��Kj1D��)z� ��ĴZ-{�h����>2�AVU��Qom��9��j�5�d ��,��#��tQF&T�J|��ۀ�t̙�(�T��E�
2
+ 9U��i�%� �>�}z�Dz��~��q����pYߩ�OP���s ޤ3R�v
3
+ �*�.��z���a��Ԟ�t��¯����B���k�U�
@@ -0,0 +1 @@
1
+ af80ddb5890f062e364ea8ade2d602df4e12de8c
@@ -0,0 +1 @@
1
+ af80ddb5890f062e364ea8ade2d602df4e12de8c
@@ -31,12 +31,14 @@ EXCEL_FILE_TYPES= [
31
31
  class Convertor():
32
32
 
33
33
 
34
- def __init__(self, myfile: Path | None= None, output_dir: Path | None= None, file_bytes: bytes | None= None, suffix: str | None= None, file_type: str | None= None):
34
+ def __init__(self, myfile: Path | None= None, output_dir: Path | None= None, file_bytes: bytes | None= None, suffix: str | None= None, file_type: str | None= None, model_name: str | None = None):
35
35
 
36
36
  self.output= ""
37
37
 
38
- # model_name= "gemini-2.5-flash"
39
- model_name= None
38
+ if model_name is None:
39
+ # model_name= "gemini-2.5-flash"
40
+ model_name= "Nanonets-OCR-s"
41
+
40
42
  # file_type can be pdf, excel, etc.
41
43
  if output_dir is None and myfile is None and file_bytes is not None and suffix is not None:
42
44
  with tempfile.TemporaryDirectory() as dp:
@@ -1,6 +1,6 @@
1
1
  """
2
2
  A Python Package to convert PEM blog content to usseful information by leveraging LLMs
3
3
  """
4
- __version__ = '0.0.6'
4
+ __version__ = '0.0.7'
5
5
  from .main import save_to_json_file, make_query
6
6
  __all__ = ["save_to_json_file", "make_query"]
@@ -2,4 +2,4 @@ OUTPUT_DIR: /home/cyto/dev/pembotdir
2
2
  PAGE_DELIMITER: ___________________________ NEXT PAGE ___________________________
3
3
  app:
4
4
  name: pembot
5
- version: 0.0.6
5
+ version: 0.0.7
@@ -0,0 +1 @@
1
+ handled the gpu errors non-gracefully so that it stops
@@ -2,3 +2,4 @@
2
2
  ffb759ee4605b232366a9ee58134532913c3f9e0 b8702320e56074e9680181d8b7897d6a0a552e2d cyto <silverstone965@gmail.com> 1750947962 +0530 commit: handled config loading errors gracefully; added gemini support, as an option; added huggingface nanonets transformers support (as an option); redesigned the extract markdown for captioning and image ocr (block image and full-page image);
3
3
  b8702320e56074e9680181d8b7897d6a0a552e2d 14251b198e0bac39a3dc3b42f9e57b20c01465fb cyto <silverstone965@gmail.com> 1751604763 +0530 commit: removed deps on torch and transformers; used gradio client for ocr through public spaces;
4
4
  14251b198e0bac39a3dc3b42f9e57b20c01465fb b48d697aa9fd97151eb2a84a1af5d408b7630232 cyto <silverstone965@gmail.com> 1751871887 +0530 commit: cyto/argument-list-bug-fix;authentication-used-in-gradio-client
5
+ b48d697aa9fd97151eb2a84a1af5d408b7630232 f3b2d76c75bbd50e04fc4c2ad17fc94ca6daed32 cyto <silverstone965@gmail.com> 1751896628 +0530 commit: handled the gpu errors non-gracefully so that it stops
@@ -2,3 +2,4 @@
2
2
  ffb759ee4605b232366a9ee58134532913c3f9e0 b8702320e56074e9680181d8b7897d6a0a552e2d cyto <silverstone965@gmail.com> 1750947962 +0530 commit: handled config loading errors gracefully; added gemini support, as an option; added huggingface nanonets transformers support (as an option); redesigned the extract markdown for captioning and image ocr (block image and full-page image);
3
3
  b8702320e56074e9680181d8b7897d6a0a552e2d 14251b198e0bac39a3dc3b42f9e57b20c01465fb cyto <silverstone965@gmail.com> 1751604763 +0530 commit: removed deps on torch and transformers; used gradio client for ocr through public spaces;
4
4
  14251b198e0bac39a3dc3b42f9e57b20c01465fb b48d697aa9fd97151eb2a84a1af5d408b7630232 cyto <silverstone965@gmail.com> 1751871887 +0530 commit: cyto/argument-list-bug-fix;authentication-used-in-gradio-client
5
+ b48d697aa9fd97151eb2a84a1af5d408b7630232 f3b2d76c75bbd50e04fc4c2ad17fc94ca6daed32 cyto <silverstone965@gmail.com> 1751896628 +0530 commit: handled the gpu errors non-gracefully so that it stops
@@ -1,3 +1,4 @@
1
1
  0000000000000000000000000000000000000000 b8702320e56074e9680181d8b7897d6a0a552e2d cyto <silverstone965@gmail.com> 1750948073 +0530 update by push
2
2
  b8702320e56074e9680181d8b7897d6a0a552e2d 14251b198e0bac39a3dc3b42f9e57b20c01465fb cyto <silverstone965@gmail.com> 1751604904 +0530 update by push
3
3
  14251b198e0bac39a3dc3b42f9e57b20c01465fb b48d697aa9fd97151eb2a84a1af5d408b7630232 cyto <silverstone965@gmail.com> 1751872077 +0530 update by push
4
+ b48d697aa9fd97151eb2a84a1af5d408b7630232 f3b2d76c75bbd50e04fc4c2ad17fc94ca6daed32 cyto <silverstone965@gmail.com> 1751896663 +0530 update by push
@@ -0,0 +1 @@
1
+ x��Kj!3vw� �l媷?`k�v��>�!�C'��:'Hk�f't:�lȺ�6g�u 2j�߈G�TV��ةN��gb�rp���F��ɚ���RI��<Z���
@@ -0,0 +1 @@
1
+ f3b2d76c75bbd50e04fc4c2ad17fc94ca6daed32
@@ -0,0 +1 @@
1
+ f3b2d76c75bbd50e04fc4c2ad17fc94ca6daed32
@@ -115,6 +115,10 @@ class MarkdownPDFExtractor(PDFExtractor):
115
115
  except Exception as e:
116
116
  self.logger.error(f"Error processing PDF: {e}")
117
117
  self.logger.exception(traceback.format_exc())
118
+
119
+ error_message= str(e).lower()
120
+ if "GPU" in error_message and "quota" in error_message:
121
+ return "GPU quota error", []
118
122
  return "", []
119
123
 
120
124
 
@@ -176,6 +180,12 @@ class MarkdownPDFExtractor(PDFExtractor):
176
180
  print("ocr'd: ", result[:100] + "...")
177
181
  except Exception as e:
178
182
  print("Error during nanonet inference", e)
183
+ error_message = str(e)
184
+ if "You have exceeded your Pro GPU quota" in error_message:
185
+ # print("\n\n\nFALLING BACK TO TESS\n\n\n")
186
+ # return pytesseract.image_to_string(pil_image)
187
+ raise e
188
+
179
189
 
180
190
  return result
181
191
  else:
@@ -262,6 +272,9 @@ class MarkdownPDFExtractor(PDFExtractor):
262
272
  except Exception as e:
263
273
  self.logger.error(f" Error processing embedded image block for OCR: {e}")
264
274
  current_page_markdown_blocks.append("\n\n![Image Processing Error](error_on_page_{page_num+1}_block_{block_num+1}.png)\n\n")
275
+ error_message= str(e).lower()
276
+ if "GPU" in error_message and "quota" in error_message:
277
+ raise e
265
278
 
266
279
 
267
280
  # Insert tables at their approximate positions (after blocks are processed for the page)
@@ -306,6 +319,9 @@ class MarkdownPDFExtractor(PDFExtractor):
306
319
  self.logger.info(f" Full-page OCR yielded no text for page {page_num+1}.")
307
320
  except Exception as e:
308
321
  self.logger.error(f" Error during full-page OCR on page {page_num+1}: {e}")
322
+ error_message= str(e).lower()
323
+ if "GPU" in error_message and "quota" in error_message:
324
+ raise e
309
325
  else:
310
326
  self.logger.info(f" Page {page_num + 1} has sufficient searchable text or embedded image OCR; skipping full-page OCR.")
311
327
 
@@ -329,7 +345,12 @@ class MarkdownPDFExtractor(PDFExtractor):
329
345
  except Exception as e:
330
346
  self.logger.critical(f"An unexpected error occurred during markdown extraction: {e}")
331
347
  self.logger.exception(traceback.format_exc())
332
- return "", []
348
+
349
+ error_message= str(e).lower()
350
+ if "GPU" in error_message and "quota" in error_message:
351
+ return "GPU quota error", []
352
+ else:
353
+ return "", []
333
354
 
334
355
  def extract_tables(self):
335
356
  """Extract tables from PDF using pdfplumber."""
@@ -412,6 +433,9 @@ class MarkdownPDFExtractor(PDFExtractor):
412
433
  except Exception as e:
413
434
  self.logger.error(f"Error captioning image: {e}")
414
435
  self.logger.exception(traceback.format_exc())
436
+ error_message= str(e)
437
+ if "GPU" in error_message and "quota" in error_message:
438
+ raise e
415
439
  return ""
416
440
 
417
441
  def clean_text(self, text):
@@ -726,6 +750,7 @@ class MarkdownPDFExtractor(PDFExtractor):
726
750
  self.logger.exception(traceback.format_exc())
727
751
  return ""
728
752
 
753
+
729
754
  def get_header_level(self, font_size):
730
755
  """Determine header level based on font size."""
731
756
  if font_size > 24:
@@ -1 +0,0 @@
1
- handled config loading errors gracefully; added gemini support, as an option; added huggingface nanonets transformers support (as an option); redesigned the extract markdown for captioning and image ocr (block image and full-page image);
@@ -1 +0,0 @@
1
- 9528bbccd167e3f4ad583a1ae9fac98a52620e27
@@ -1 +0,0 @@
1
- 9528bbccd167e3f4ad583a1ae9fac98a52620e27
@@ -1 +0,0 @@
1
- cyto/argument-list-bug-fix;authentication-used-in-gradio-client
@@ -1 +0,0 @@
1
- b48d697aa9fd97151eb2a84a1af5d408b7630232
@@ -1 +0,0 @@
1
- b48d697aa9fd97151eb2a84a1af5d408b7630232
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes