docp 0.1.0b1__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (293) hide show
  1. docp-0.2.0/.gitignore +6 -0
  2. docp-0.2.0/.readthedocs.yaml +24 -0
  3. docp-0.2.0/PKG-INFO +110 -0
  4. docp-0.2.0/README.md +78 -0
  5. docp-0.2.0/docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/cb202bfe2e3c98645018a6d12f182a434c9d3e02.lock +0 -0
  6. docp-0.2.0/docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/d1514c3162bbe87b343f565fadc62e6c06f04f03.lock +0 -0
  7. docp-0.2.0/docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/e7b0375001f109a6b8873d756ad4f7bbb15fbaa5.lock +0 -0
  8. docp-0.2.0/docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/fb140275c155a9c7c5a3b3e0e77a9e839594a938.lock +0 -0
  9. docp-0.2.0/docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/fd1b291129c607e5d49799f87cb219b27f98acdf.lock +0 -0
  10. docp-0.2.0/docp/__init__.py +40 -0
  11. docp-0.2.0/docp/dbs/__init__.py +0 -0
  12. {docp-0.1.0b1 → docp-0.2.0}/docp/dbs/chroma.py +19 -6
  13. docp-0.2.0/docp/libs/_version.py +1 -0
  14. docp-0.2.0/docp/libs/changelog.py +7 -0
  15. docp-0.2.0/docp/libs/utilities.py +107 -0
  16. docp-0.2.0/docp/loaders/__init__.py +38 -0
  17. docp-0.2.0/docp/loaders/_chromabaseloader.py +338 -0
  18. docp-0.1.0b1/docp/loaders/_chromabaseloader.py → docp-0.2.0/docp/loaders/_chromabaseloader.py.bak +17 -1
  19. docp-0.2.0/docp/loaders/_chromabasepdfloader.py +121 -0
  20. docp-0.2.0/docp/loaders/_chromabasepptxloader.py +123 -0
  21. docp-0.1.0b1/docp/loaders/chroma.py → docp-0.2.0/docp/loaders/chroma.py.bak +38 -8
  22. docp-0.2.0/docp/loaders/chromapdfloader.py +199 -0
  23. docp-0.2.0/docp/loaders/chromapptxloader.py +192 -0
  24. docp-0.2.0/docp/loaders/lutilities.py +52 -0
  25. docp-0.2.0/docp/objects/__init__.py +0 -0
  26. {docp-0.1.0b1 → docp-0.2.0}/docp/objects/_docbaseobject.py +7 -18
  27. docp-0.2.0/docp/objects/_imgobject.py +0 -0
  28. {docp-0.1.0b1 → docp-0.2.0}/docp/objects/_pageobject.py +3 -2
  29. docp-0.2.0/docp/objects/_slideobject.py +110 -0
  30. docp-0.2.0/docp/objects/_tableobject.py +0 -0
  31. docp-0.2.0/docp/objects/_textobject.py +64 -0
  32. {docp-0.1.0b1 → docp-0.2.0}/docp/objects/pdfobject.py +24 -2
  33. docp-0.2.0/docp/objects/pptxobject.py +46 -0
  34. docp-0.2.0/docp/parsers/__init__.py +0 -0
  35. {docp-0.1.0b1 → docp-0.2.0}/docp/parsers/_pdfbaseparser.py +36 -10
  36. {docp-0.1.0b1 → docp-0.2.0}/docp/parsers/_pdftableparser.py +6 -7
  37. {docp-0.1.0b1 → docp-0.2.0}/docp/parsers/_pdftextparser.py +23 -13
  38. docp-0.2.0/docp/parsers/_pptxbaseparser.py +93 -0
  39. docp-0.2.0/docp/parsers/_pptxtextparser.py +115 -0
  40. docp-0.2.0/docp/parsers/pptxparser.py +51 -0
  41. docp-0.2.0/docp/parsers/putilities.py +48 -0
  42. docp-0.2.0/docp.egg-info/PKG-INFO +110 -0
  43. docp-0.2.0/docp.egg-info/SOURCES.txt +282 -0
  44. {docp-0.1.0b1 → docp-0.2.0}/docp.egg-info/requires.txt +0 -1
  45. docp-0.2.0/docs/.constants.config +7 -0
  46. docp-0.2.0/docs/build/.buildinfo +4 -0
  47. docp-0.2.0/docs/build/.doctrees/changelog.doctree +0 -0
  48. docp-0.2.0/docs/build/.doctrees/contact.doctree +0 -0
  49. docp-0.2.0/docs/build/.doctrees/dbs_chroma.doctree +0 -0
  50. docp-0.2.0/docs/build/.doctrees/environment.pickle +0 -0
  51. docp-0.2.0/docs/build/.doctrees/index.doctree +0 -0
  52. docp-0.2.0/docs/build/.doctrees/library.doctree +0 -0
  53. docp-0.2.0/docs/build/.doctrees/libs_utilities.doctree +0 -0
  54. docp-0.2.0/docs/build/.doctrees/loaders__chromabaseloader.doctree +0 -0
  55. docp-0.2.0/docs/build/.doctrees/loaders__chromabasepdfloader.doctree +0 -0
  56. docp-0.2.0/docs/build/.doctrees/loaders__chromabasepptxloader.doctree +0 -0
  57. docp-0.2.0/docs/build/.doctrees/loaders_chromapdfloader.doctree +0 -0
  58. docp-0.2.0/docs/build/.doctrees/loaders_chromapptxloader.doctree +0 -0
  59. docp-0.2.0/docs/build/.doctrees/loaders_lutilities.doctree +0 -0
  60. docp-0.2.0/docs/build/.doctrees/objects__docbaseobject.doctree +0 -0
  61. docp-0.2.0/docs/build/.doctrees/objects__pageobject.doctree +0 -0
  62. docp-0.2.0/docs/build/.doctrees/objects__slideobject.doctree +0 -0
  63. docp-0.2.0/docs/build/.doctrees/objects__textobject.doctree +0 -0
  64. docp-0.2.0/docs/build/.doctrees/objects_pdfobject.doctree +0 -0
  65. docp-0.2.0/docs/build/.doctrees/objects_pptxobject.doctree +0 -0
  66. docp-0.2.0/docs/build/.doctrees/parsers__pdfbaseparser.doctree +0 -0
  67. docp-0.2.0/docs/build/.doctrees/parsers__pdftableparser.doctree +0 -0
  68. docp-0.2.0/docs/build/.doctrees/parsers__pdftextparser.doctree +0 -0
  69. docp-0.2.0/docs/build/.doctrees/parsers__pptxbaseparser.doctree +0 -0
  70. docp-0.2.0/docs/build/.doctrees/parsers__pptxtextparser.doctree +0 -0
  71. docp-0.2.0/docs/build/.doctrees/parsers_pdfparser.doctree +0 -0
  72. docp-0.2.0/docs/build/.doctrees/parsers_pptxparser.doctree +0 -0
  73. docp-0.2.0/docs/build/.doctrees/parsers_putilities.doctree +0 -0
  74. docp-0.2.0/docs/build/_modules/docp/dbs/chroma.html +325 -0
  75. docp-0.2.0/docs/build/_modules/docp/libs/utilities.html +226 -0
  76. docp-0.2.0/docs/build/_modules/docp/loaders/_chromabaseloader.html +487 -0
  77. docp-0.2.0/docs/build/_modules/docp/loaders/_chromabasepdfloader.html +237 -0
  78. docp-0.2.0/docs/build/_modules/docp/loaders/_chromabasepptxloader.html +239 -0
  79. docp-0.2.0/docs/build/_modules/docp/loaders/chromapdfloader.html +312 -0
  80. docp-0.2.0/docs/build/_modules/docp/loaders/chromapptxloader.html +305 -0
  81. docp-0.2.0/docs/build/_modules/docp/loaders/lutilities.html +162 -0
  82. docp-0.2.0/docs/build/_modules/docp/objects/_docbaseobject.html +172 -0
  83. docp-0.2.0/docs/build/_modules/docp/objects/_pageobject.html +237 -0
  84. docp-0.2.0/docs/build/_modules/docp/objects/_slideobject.html +217 -0
  85. docp-0.2.0/docs/build/_modules/docp/objects/_textobject.html +171 -0
  86. docp-0.2.0/docs/build/_modules/docp/objects/pdfobject.html +168 -0
  87. docp-0.2.0/docs/build/_modules/docp/objects/pptxobject.html +153 -0
  88. docp-0.2.0/docs/build/_modules/docp/parsers/_pdfbaseparser.html +358 -0
  89. docp-0.2.0/docs/build/_modules/docp/parsers/_pdftableparser.html +403 -0
  90. docp-0.2.0/docs/build/_modules/docp/parsers/_pdftextparser.html +385 -0
  91. docp-0.2.0/docs/build/_modules/docp/parsers/_pptxbaseparser.html +206 -0
  92. docp-0.2.0/docs/build/_modules/docp/parsers/_pptxtextparser.html +228 -0
  93. docp-0.2.0/docs/build/_modules/docp/parsers/pdfparser.html +169 -0
  94. docp-0.2.0/docs/build/_modules/docp/parsers/pptxparser.html +158 -0
  95. docp-0.2.0/docs/build/_modules/docp/parsers/putilities.html +158 -0
  96. docp-0.2.0/docs/build/_modules/index.html +125 -0
  97. docp-0.2.0/docs/build/_static/_sphinx_javascript_frameworks_compat.js +123 -0
  98. docp-0.2.0/docs/build/_static/basic.css +914 -0
  99. docp-0.2.0/docs/build/_static/check-solid.svg +4 -0
  100. docp-0.2.0/docs/build/_static/clipboard.min.js +7 -0
  101. docp-0.2.0/docs/build/_static/copy-button.svg +5 -0
  102. docp-0.2.0/docs/build/_static/copybutton.css +94 -0
  103. docp-0.2.0/docs/build/_static/copybutton.js +248 -0
  104. docp-0.2.0/docs/build/_static/copybutton_funcs.js +73 -0
  105. docp-0.2.0/docs/build/_static/css/badge_only.css +1 -0
  106. docp-0.2.0/docs/build/_static/css/fonts/Roboto-Slab-Bold.woff +0 -0
  107. docp-0.2.0/docs/build/_static/css/fonts/Roboto-Slab-Bold.woff2 +0 -0
  108. docp-0.2.0/docs/build/_static/css/fonts/Roboto-Slab-Regular.woff +0 -0
  109. docp-0.2.0/docs/build/_static/css/fonts/Roboto-Slab-Regular.woff2 +0 -0
  110. docp-0.2.0/docs/build/_static/css/fonts/fontawesome-webfont.eot +0 -0
  111. docp-0.2.0/docs/build/_static/css/fonts/fontawesome-webfont.svg +2671 -0
  112. docp-0.2.0/docs/build/_static/css/fonts/fontawesome-webfont.ttf +0 -0
  113. docp-0.2.0/docs/build/_static/css/fonts/fontawesome-webfont.woff +0 -0
  114. docp-0.2.0/docs/build/_static/css/fonts/fontawesome-webfont.woff2 +0 -0
  115. docp-0.2.0/docs/build/_static/css/fonts/lato-bold-italic.woff +0 -0
  116. docp-0.2.0/docs/build/_static/css/fonts/lato-bold-italic.woff2 +0 -0
  117. docp-0.2.0/docs/build/_static/css/fonts/lato-bold.woff +0 -0
  118. docp-0.2.0/docs/build/_static/css/fonts/lato-bold.woff2 +0 -0
  119. docp-0.2.0/docs/build/_static/css/fonts/lato-normal-italic.woff +0 -0
  120. docp-0.2.0/docs/build/_static/css/fonts/lato-normal-italic.woff2 +0 -0
  121. docp-0.2.0/docs/build/_static/css/fonts/lato-normal.woff +0 -0
  122. docp-0.2.0/docs/build/_static/css/fonts/lato-normal.woff2 +0 -0
  123. docp-0.2.0/docs/build/_static/css/s5defs-rules.css +134 -0
  124. docp-0.2.0/docs/build/_static/css/s5defs.txt +69 -0
  125. docp-0.2.0/docs/build/_static/css/theme.css +4 -0
  126. docp-0.2.0/docs/build/_static/doctools.js +149 -0
  127. docp-0.2.0/docs/build/_static/documentation_options.js +13 -0
  128. docp-0.2.0/docs/build/_static/file.png +0 -0
  129. docp-0.2.0/docs/build/_static/fonts/Lato/lato-bold.eot +0 -0
  130. docp-0.2.0/docs/build/_static/fonts/Lato/lato-bold.ttf +0 -0
  131. docp-0.2.0/docs/build/_static/fonts/Lato/lato-bold.woff +0 -0
  132. docp-0.2.0/docs/build/_static/fonts/Lato/lato-bold.woff2 +0 -0
  133. docp-0.2.0/docs/build/_static/fonts/Lato/lato-bolditalic.eot +0 -0
  134. docp-0.2.0/docs/build/_static/fonts/Lato/lato-bolditalic.ttf +0 -0
  135. docp-0.2.0/docs/build/_static/fonts/Lato/lato-bolditalic.woff +0 -0
  136. docp-0.2.0/docs/build/_static/fonts/Lato/lato-bolditalic.woff2 +0 -0
  137. docp-0.2.0/docs/build/_static/fonts/Lato/lato-italic.eot +0 -0
  138. docp-0.2.0/docs/build/_static/fonts/Lato/lato-italic.ttf +0 -0
  139. docp-0.2.0/docs/build/_static/fonts/Lato/lato-italic.woff +0 -0
  140. docp-0.2.0/docs/build/_static/fonts/Lato/lato-italic.woff2 +0 -0
  141. docp-0.2.0/docs/build/_static/fonts/Lato/lato-regular.eot +0 -0
  142. docp-0.2.0/docs/build/_static/fonts/Lato/lato-regular.ttf +0 -0
  143. docp-0.2.0/docs/build/_static/fonts/Lato/lato-regular.woff +0 -0
  144. docp-0.2.0/docs/build/_static/fonts/Lato/lato-regular.woff2 +0 -0
  145. docp-0.2.0/docs/build/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot +0 -0
  146. docp-0.2.0/docs/build/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf +0 -0
  147. docp-0.2.0/docs/build/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff +0 -0
  148. docp-0.2.0/docs/build/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2 +0 -0
  149. docp-0.2.0/docs/build/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot +0 -0
  150. docp-0.2.0/docs/build/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf +0 -0
  151. docp-0.2.0/docs/build/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff +0 -0
  152. docp-0.2.0/docs/build/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2 +0 -0
  153. docp-0.2.0/docs/build/_static/img/s3dev_tri_white_sm.png +0 -0
  154. docp-0.2.0/docs/build/_static/jquery.js +2 -0
  155. docp-0.2.0/docs/build/_static/js/badge_only.js +1 -0
  156. docp-0.2.0/docs/build/_static/js/mathjax.js +1 -0
  157. docp-0.2.0/docs/build/_static/js/theme.js +1 -0
  158. docp-0.2.0/docs/build/_static/js/versions.js +228 -0
  159. docp-0.2.0/docs/build/_static/language_data.js +192 -0
  160. docp-0.2.0/docs/build/_static/minus.png +0 -0
  161. docp-0.2.0/docs/build/_static/plus.png +0 -0
  162. docp-0.2.0/docs/build/_static/pygments.css +75 -0
  163. docp-0.2.0/docs/build/_static/s3dev_tri_white_sm.png +0 -0
  164. docp-0.2.0/docs/build/_static/searchtools.js +632 -0
  165. docp-0.2.0/docs/build/_static/sphinx_highlight.js +154 -0
  166. docp-0.2.0/docs/build/_static/tbl/contact.csv +4 -0
  167. docp-0.2.0/docs/build/changelog.html +180 -0
  168. docp-0.2.0/docs/build/contact.html +136 -0
  169. docp-0.2.0/docs/build/dbs_chroma.html +288 -0
  170. docp-0.2.0/docs/build/genindex.html +713 -0
  171. docp-0.2.0/docs/build/index.html +244 -0
  172. docp-0.2.0/docs/build/library.html +225 -0
  173. docp-0.2.0/docs/build/libs_utilities.html +253 -0
  174. docp-0.2.0/docs/build/libs_utilities.spelling +2 -0
  175. docp-0.2.0/docs/build/loaders__chromabaseloader.html +420 -0
  176. docp-0.2.0/docs/build/loaders__chromabaseloader.spelling +1 -0
  177. docp-0.2.0/docs/build/loaders__chromabasepdfloader.html +253 -0
  178. docp-0.2.0/docs/build/loaders__chromabasepdfloader.spelling +1 -0
  179. docp-0.2.0/docs/build/loaders__chromabasepptxloader.html +253 -0
  180. docp-0.2.0/docs/build/loaders__chromabasepptxloader.spelling +1 -0
  181. docp-0.2.0/docs/build/loaders_chromapdfloader.html +315 -0
  182. docp-0.2.0/docs/build/loaders_chromapdfloader.spelling +1 -0
  183. docp-0.2.0/docs/build/loaders_chromapptxloader.html +317 -0
  184. docp-0.2.0/docs/build/loaders_chromapptxloader.spelling +2 -0
  185. docp-0.2.0/docs/build/loaders_lutilities.html +191 -0
  186. docp-0.2.0/docs/build/loaders_lutilities.spelling +1 -0
  187. docp-0.2.0/docs/build/objects.inv +0 -0
  188. docp-0.2.0/docs/build/objects__docbaseobject.html +210 -0
  189. docp-0.2.0/docs/build/objects__pageobject.html +243 -0
  190. docp-0.2.0/docs/build/objects__slideobject.html +247 -0
  191. docp-0.2.0/docs/build/objects__textobject.html +194 -0
  192. docp-0.2.0/docs/build/objects_pdfobject.html +195 -0
  193. docp-0.2.0/docs/build/objects_pptxobject.html +184 -0
  194. docp-0.2.0/docs/build/parsers__pdfbaseparser.html +332 -0
  195. docp-0.2.0/docs/build/parsers__pdftableparser.html +363 -0
  196. docp-0.2.0/docs/build/parsers__pdftextparser.html +356 -0
  197. docp-0.2.0/docs/build/parsers__pptxbaseparser.html +223 -0
  198. docp-0.2.0/docs/build/parsers__pptxtextparser.html +241 -0
  199. docp-0.2.0/docs/build/parsers_pdfparser.html +199 -0
  200. docp-0.2.0/docs/build/parsers_pptxparser.html +189 -0
  201. docp-0.2.0/docs/build/parsers_putilities.html +187 -0
  202. docp-0.2.0/docs/build/parsers_putilities.spelling +1 -0
  203. docp-0.2.0/docs/build/py-modindex.html +238 -0
  204. docp-0.2.0/docs/build/search.html +123 -0
  205. docp-0.2.0/docs/build/searchindex.js +1 -0
  206. docp-0.2.0/docs/create.sh +13 -0
  207. docp-0.2.0/docs/requirements.txt +12 -0
  208. docp-0.2.0/docs/source/_static/css/s5defs-rules.css +134 -0
  209. docp-0.2.0/docs/source/_static/css/s5defs.txt +69 -0
  210. docp-0.2.0/docs/source/_static/img/s3dev_tri_white_sm.png +0 -0
  211. docp-0.2.0/docs/source/_static/js/mathjax.js +1 -0
  212. docp-0.2.0/docs/source/_static/tbl/contact.csv +4 -0
  213. docp-0.2.0/docs/source/changelog.rst +6 -0
  214. docp-0.2.0/docs/source/conf.py +104 -0
  215. docp-0.2.0/docs/source/contact.rst +21 -0
  216. docp-0.2.0/docs/source/dbs_chroma.rst +7 -0
  217. docp-0.2.0/docs/source/index.rst +161 -0
  218. docp-0.2.0/docs/source/library.rst +94 -0
  219. docp-0.2.0/docs/source/libs_utilities.rst +7 -0
  220. docp-0.2.0/docs/source/loaders__chromabaseloader.rst +7 -0
  221. docp-0.2.0/docs/source/loaders__chromabasepdfloader.rst +7 -0
  222. docp-0.2.0/docs/source/loaders__chromabasepptxloader.rst +7 -0
  223. docp-0.2.0/docs/source/loaders_chromapdfloader.rst +9 -0
  224. docp-0.2.0/docs/source/loaders_chromapptxloader.rst +9 -0
  225. docp-0.2.0/docs/source/loaders_lutilities.rst +7 -0
  226. docp-0.2.0/docs/source/objects__docbaseobject.rst +7 -0
  227. docp-0.2.0/docs/source/objects__pageobject.rst +7 -0
  228. docp-0.2.0/docs/source/objects__slideobject.rst +7 -0
  229. docp-0.2.0/docs/source/objects__textobject.rst +7 -0
  230. docp-0.2.0/docs/source/objects_pdfobject.rst +7 -0
  231. docp-0.2.0/docs/source/objects_pptxobject.rst +7 -0
  232. docp-0.2.0/docs/source/parsers__pdfbaseparser.rst +7 -0
  233. docp-0.2.0/docs/source/parsers__pdftableparser.rst +7 -0
  234. docp-0.2.0/docs/source/parsers__pdftextparser.rst +7 -0
  235. docp-0.2.0/docs/source/parsers__pptxbaseparser.rst +7 -0
  236. docp-0.2.0/docs/source/parsers__pptxtextparser.rst +7 -0
  237. docp-0.2.0/docs/source/parsers_pdfparser.rst +7 -0
  238. docp-0.2.0/docs/source/parsers_pptxparser.rst +7 -0
  239. docp-0.2.0/docs/source/parsers_putilities.rst +7 -0
  240. docp-0.2.0/docs/source/spelling_wordlist.txt +51 -0
  241. docp-0.2.0/docs/spellcheck.sh +31 -0
  242. docp-0.2.0/docs/update.sh +6 -0
  243. {docp-0.1.0b1 → docp-0.2.0}/pylintr/results/dbs_chroma.plr +6 -6
  244. docp-0.2.0/pylintr/results/libs_changelog.plr +81 -0
  245. docp-0.2.0/pylintr/results/libs_utilities.plr +85 -0
  246. {docp-0.1.0b1 → docp-0.2.0}/pylintr/results/loaders__chromabaseloader.plr +7 -7
  247. docp-0.2.0/pylintr/results/loaders__chromabasepdfloader.plr +97 -0
  248. docp-0.2.0/pylintr/results/loaders__chromabasepptxloader.plr +97 -0
  249. docp-0.1.0b1/pylintr/results/loaders_chroma.plr → docp-0.2.0/pylintr/results/loaders_chromapdfloader.plr +6 -6
  250. docp-0.2.0/pylintr/results/loaders_chromapptxloader.plr +85 -0
  251. {docp-0.1.0b1 → docp-0.2.0}/pylintr/results/objects__docbaseobject.plr +7 -7
  252. {docp-0.1.0b1 → docp-0.2.0}/pylintr/results/objects__pageobject.plr +5 -5
  253. docp-0.2.0/pylintr/results/objects__slideobject.plr +85 -0
  254. docp-0.2.0/pylintr/results/objects__textobject.plr +85 -0
  255. docp-0.2.0/pylintr/results/objects_pdfobject.plr +85 -0
  256. docp-0.1.0b1/pylintr/results/objects_pdfobject.plr → docp-0.2.0/pylintr/results/objects_pptxobject.plr +6 -6
  257. {docp-0.1.0b1 → docp-0.2.0}/pylintr/results/parsers__pdfbaseparser.plr +6 -6
  258. {docp-0.1.0b1 → docp-0.2.0}/pylintr/results/parsers__pdftableparser.plr +5 -5
  259. {docp-0.1.0b1 → docp-0.2.0}/pylintr/results/parsers__pdftextparser.plr +5 -5
  260. docp-0.2.0/pylintr/results/parsers__pptxbaseparser.plr +99 -0
  261. docp-0.2.0/pylintr/results/parsers__pptxtextparser.plr +93 -0
  262. docp-0.2.0/pylintr/results/parsers_pptxparser.plr +85 -0
  263. docp-0.2.0/pylintr/results/summary.plr +27 -0
  264. {docp-0.1.0b1 → docp-0.2.0}/pyproject.toml +4 -5
  265. docp-0.2.0/requirements.txt +11 -0
  266. docp-0.1.0b1/.gitignore +0 -2
  267. docp-0.1.0b1/PKG-INFO +0 -55
  268. docp-0.1.0b1/README.md +0 -22
  269. docp-0.1.0b1/docp/__init__.py +0 -31
  270. docp-0.1.0b1/docp/_version.py +0 -1
  271. docp-0.1.0b1/docp.egg-info/PKG-INFO +0 -55
  272. docp-0.1.0b1/docp.egg-info/SOURCES.txt +0 -46
  273. docp-0.1.0b1/pylintr/results/summary.plr +0 -16
  274. docp-0.1.0b1/requirements.txt +0 -11
  275. {docp-0.1.0b1 → docp-0.2.0}/.pylintrc +0 -0
  276. {docp-0.1.0b1 → docp-0.2.0}/LICENSE +0 -0
  277. {docp-0.1.0b1 → docp-0.2.0}/MANIFEST.in +0 -0
  278. {docp-0.1.0b1 → docp-0.2.0}/build.sh +0 -0
  279. /docp-0.1.0b1/docp/dbs/__init__.py → /docp-0.2.0/docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db.lock +0 -0
  280. /docp-0.1.0b1/docp/loaders/__init__.py → /docp-0.2.0/docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/59d594003bf59880a884c574bf88ef7555bb0202.lock +0 -0
  281. /docp-0.1.0b1/docp/objects/__init__.py → /docp-0.2.0/docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/72b987fd805cfa2b58c4c8c952b274a11bfd5a00.lock +0 -0
  282. /docp-0.1.0b1/docp/objects/_tableobject.py → /docp-0.2.0/docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/8cfec92309f5626a223304af2423e332f6d31887.lock +0 -0
  283. /docp-0.1.0b1/docp/objects/_textobject.py → /docp-0.2.0/docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/952a9b81c0bfd99800fabf352f69c7ccd46c5e43.lock +0 -0
  284. /docp-0.1.0b1/docp/parsers/__init__.py → /docp-0.2.0/docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/c79f2b6a0cea6f4b564fed1938984bace9d30ff0.lock +0 -0
  285. {docp-0.1.0b1 → docp-0.2.0}/docp/parsers/pdfparser.py +0 -0
  286. {docp-0.1.0b1 → docp-0.2.0}/docp.egg-info/dependency_links.txt +0 -0
  287. {docp-0.1.0b1 → docp-0.2.0}/docp.egg-info/top_level.txt +0 -0
  288. {docp-0.1.0b1 → docp-0.2.0}/pylintr/generate_rcfile.sh +0 -0
  289. {docp-0.1.0b1 → docp-0.2.0}/pylintr/pylintr.sh +0 -0
  290. /docp-0.1.0b1/pylintr/results/objects__tableobject.plr → /docp-0.2.0/pylintr/results/objects__imgobject.plr +0 -0
  291. /docp-0.1.0b1/pylintr/results/objects__textobject.plr → /docp-0.2.0/pylintr/results/objects__tableobject.plr +0 -0
  292. {docp-0.1.0b1 → docp-0.2.0}/pylintr/results/parsers_pdfparser.plr +0 -0
  293. {docp-0.1.0b1 → docp-0.2.0}/setup.cfg +0 -0
docp-0.2.0/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ *__pycache__*
2
+ *.pyc
3
+
4
+ # Ignore the development model caches.
5
+ docp/.cache/*
6
+
@@ -0,0 +1,24 @@
1
+ # .readthedocs.yaml
2
+ # Read the Docs configuration file
3
+ # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
4
+
5
+ # Required
6
+ version: 2
7
+
8
+ # Set the OS, Python version and other tools you might need
9
+ build:
10
+ os: ubuntu-lts-latest
11
+ tools:
12
+ python: "3.12"
13
+
14
+ # Build documentation in the "docs/" directory with Sphinx
15
+ sphinx:
16
+ builder: html
17
+ configuration: docs/source/conf.py
18
+
19
+ # Optional but recommended, declare the Python requirements required
20
+ # to build your documentation
21
+ # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
22
+ python:
23
+ install:
24
+ - requirements: docs/requirements.txt
docp-0.2.0/PKG-INFO ADDED
@@ -0,0 +1,110 @@
1
+ Metadata-Version: 2.2
2
+ Name: docp
3
+ Version: 0.2.0
4
+ Summary: A basic document parsing and loading utility.
5
+ Author-email: The Developers <development@s3dev.uk>
6
+ License: GNU GPL-3
7
+ Project-URL: Documentation, https://docp.readthedocs.io
8
+ Project-URL: Homepage, https://github.com/s3dev/docp
9
+ Project-URL: Repository, https://github.com/s3dev/docp
10
+ Keywords: document,library,parsing,utility,utilities
11
+ Classifier: Development Status :: 5 - Production/Stable
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: End Users/Desktop
14
+ Classifier: Programming Language :: Python :: 3.7
15
+ Classifier: Programming Language :: Python :: 3.8
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: Implementation :: CPython
21
+ Classifier: Operating System :: POSIX :: Linux
22
+ Classifier: Operating System :: Microsoft :: Windows
23
+ Classifier: Topic :: Software Development
24
+ Classifier: Topic :: Software Development :: Libraries
25
+ Classifier: Topic :: Utilities
26
+ Requires-Python: >=3.7
27
+ Description-Content-Type: text/markdown
28
+ License-File: LICENSE
29
+ Requires-Dist: pandas
30
+ Requires-Dist: unidecode
31
+ Requires-Dist: utils4
32
+
33
+
34
+ # A basic document parsing and loading utility.
35
+
36
+ [![PyPI - Version](https://img.shields.io/pypi/v/docp?style=flat-square)](https://pypi.org/project/docp)
37
+ [![PyPI - Implementation](https://img.shields.io/pypi/implementation/docp?style=flat-square)](https://pypi.org/project/docp)
38
+ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/docp?style=flat-square)](https://pypi.org/project/docp)
39
+ [![PyPI - Status](https://img.shields.io/pypi/status/docp?style=flat-square)](https://pypi.org/project/docp)
40
+ [![Static Badge](https://img.shields.io/badge/tests-pending-orange?style=flat-square)](https://pypi.org/project/docp)
41
+ [![Static Badge](https://img.shields.io/badge/code_coverage-pending-orange?style=flat-square)](https://pypi.org/project/docp)
42
+ [![Static Badge](https://img.shields.io/badge/pylint_analysis-100%25-brightgreen?style=flat-square)](https://pypi.org/project/docp)
43
+ [![Documentation Status](https://readthedocs.org/projects/docp/badge/?version=latest&style=flat-square)](https://docp.readthedocs.io/en/latest/)
44
+ [![PyPI - License](https://img.shields.io/pypi/l/docp?style=flat-square)](https://opensource.org/license/gpl-3-0)
45
+ [![PyPI - Wheel](https://img.shields.io/pypi/wheel/docp?style=flat-square)](https://pypi.org/project/docp)
46
+
47
+ In its simplest form, the ``docp`` project is a (doc)ument \(p\)arsing library.
48
+
49
+ Written in CPython, the project wraps various lower-level libraries, helping to consolidate binary document structure parsing functionality into a single library. Additional functionality includes [document loaders](#loaders) which load a parsed document's embeddings into a Chroma vector database, for RAG-enabled LLM use.
50
+
51
+
52
+ ## Installation
53
+ The easiest way to install ``docp`` is using ``pip`` *after* activating your virtual environment::
54
+
55
+ pip install docp
56
+
57
+ Additional (older) releases can be found either at [PyPI](https://pypi.org/project/docp/#history) or in [GitHub Releases](https://github.com/s3dev/docp/releases).
58
+
59
+ ### A note on the installation of dependencies:
60
+ To keep the installation dependencies to a minimum, only core libraries are required for installation. Meaning, the parser-specific and loader libraries are *not* installed automatically, as part of the ``pip install`` command.
61
+
62
+ If a parser is imported and a library is required but not installed, you'll be notified with an easy-to-read message, listing the required dependenc(y|ies).
63
+
64
+ The rationale behind this design decision is that not all users will need the document *loading* capability, so ``torch``, ``langchain``, etc. should not be installed automatically. For example, if your project requires a simple PDF parser, you don't need to (and likely don't want to) 'clutter' your environment with something as heavy as ``torch``, nor make your project dependent on it.
65
+
66
+
67
+ ## The Toolset
68
+
69
+ ### Parsers
70
+ As of this release, parsers for the following binary document types are supported:
71
+
72
+ - PDF
73
+ - MS PowerPoint (PPTX)
74
+ - (more coming soon)
75
+
76
+ ### Loaders
77
+ In addition to document parsing, document *loading* functionality is built-in as well. Specifically, loading documents into a [Chroma](https://www.trychroma.com) vector database for RAG-enabled LLM ingestion.
78
+
79
+ For example, you may wish to load a series of PDF files into a vector database which serves as the backend for a RAG-enabled LLM chatbot. The ``ChromaLoader`` class is specifically designed for this. A single call to the class' loader method results in file retrieval, parsing, splitting, embedding and storage.
80
+
81
+ For further detail and usage examples, please refer to the project's [documentation](https://docp.readthedocs.io/).
82
+
83
+
84
+ ## Using the Library
85
+ The documentation suite contains detailed explanation and example usage for each of the library's importable modules. For detailed documentation, usage examples and links the source code itself, please refer to the
86
+ [Library API](https://docp.readthedocs.io/en/latest/library.html) page in the documentation.
87
+
88
+ ### Quickstart
89
+ For convenience, here are a couple examples for how to parse the supported document types.
90
+
91
+ **Extract text from a PDF file:**
92
+
93
+ >>> from docp import PDFParser
94
+
95
+ >>> pdf = PDFParser(path='/path/to/myfile.pdf')
96
+ >>> pdf.extract_text()
97
+
98
+ # Access the content of page 1.
99
+ >>> pg1 = pdf.doc.pages[1].content
100
+
101
+ **Extract text from a PowerPoint presentation:**
102
+
103
+ >>> from docp import PPTXParser
104
+
105
+ >>> pptx = PPTXParser(path='/path/to/myfile.pptx')
106
+ >>> pptx.extract_text()
107
+
108
+ # Access the text on slide 1.
109
+ >>> pg1 = pptx.doc.slides[1].content
110
+
docp-0.2.0/README.md ADDED
@@ -0,0 +1,78 @@
1
+
2
+ # A basic document parsing and loading utility.
3
+
4
+ [![PyPI - Version](https://img.shields.io/pypi/v/docp?style=flat-square)](https://pypi.org/project/docp)
5
+ [![PyPI - Implementation](https://img.shields.io/pypi/implementation/docp?style=flat-square)](https://pypi.org/project/docp)
6
+ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/docp?style=flat-square)](https://pypi.org/project/docp)
7
+ [![PyPI - Status](https://img.shields.io/pypi/status/docp?style=flat-square)](https://pypi.org/project/docp)
8
+ [![Static Badge](https://img.shields.io/badge/tests-pending-orange?style=flat-square)](https://pypi.org/project/docp)
9
+ [![Static Badge](https://img.shields.io/badge/code_coverage-pending-orange?style=flat-square)](https://pypi.org/project/docp)
10
+ [![Static Badge](https://img.shields.io/badge/pylint_analysis-100%25-brightgreen?style=flat-square)](https://pypi.org/project/docp)
11
+ [![Documentation Status](https://readthedocs.org/projects/docp/badge/?version=latest&style=flat-square)](https://docp.readthedocs.io/en/latest/)
12
+ [![PyPI - License](https://img.shields.io/pypi/l/docp?style=flat-square)](https://opensource.org/license/gpl-3-0)
13
+ [![PyPI - Wheel](https://img.shields.io/pypi/wheel/docp?style=flat-square)](https://pypi.org/project/docp)
14
+
15
+ In its simplest form, the ``docp`` project is a (doc)ument \(p\)arsing library.
16
+
17
+ Written in CPython, the project wraps various lower-level libraries, helping to consolidate binary document structure parsing functionality into a single library. Additional functionality includes [document loaders](#loaders) which load a parsed document's embeddings into a Chroma vector database, for RAG-enabled LLM use.
18
+
19
+
20
+ ## Installation
21
+ The easiest way to install ``docp`` is using ``pip`` *after* activating your virtual environment::
22
+
23
+ pip install docp
24
+
25
+ Additional (older) releases can be found either at [PyPI](https://pypi.org/project/docp/#history) or in [GitHub Releases](https://github.com/s3dev/docp/releases).
26
+
27
+ ### A note on the installation of dependencies:
28
+ To keep the installation dependencies to a minimum, only core libraries are required for installation. Meaning, the parser-specific and loader libraries are *not* installed automatically, as part of the ``pip install`` command.
29
+
30
+ If a parser is imported and a library is required but not installed, you'll be notified with an easy-to-read message, listing the required dependenc(y|ies).
31
+
32
+ The rationale behind this design decision is that not all users will need the document *loading* capability, so ``torch``, ``langchain``, etc. should not be installed automatically. For example, if your project requires a simple PDF parser, you don't need to (and likely don't want to) 'clutter' your environment with something as heavy as ``torch``, nor make your project dependent on it.
33
+
34
+
35
+ ## The Toolset
36
+
37
+ ### Parsers
38
+ As of this release, parsers for the following binary document types are supported:
39
+
40
+ - PDF
41
+ - MS PowerPoint (PPTX)
42
+ - (more coming soon)
43
+
44
+ ### Loaders
45
+ In addition to document parsing, document *loading* functionality is built-in as well. Specifically, loading documents into a [Chroma](https://www.trychroma.com) vector database for RAG-enabled LLM ingestion.
46
+
47
+ For example, you may wish to load a series of PDF files into a vector database which serves as the backend for a RAG-enabled LLM chatbot. The ``ChromaLoader`` class is specifically designed for this. A single call to the class' loader method results in file retrieval, parsing, splitting, embedding and storage.
48
+
49
+ For further detail and usage examples, please refer to the project's [documentation](https://docp.readthedocs.io/).
50
+
51
+
52
+ ## Using the Library
53
+ The documentation suite contains detailed explanation and example usage for each of the library's importable modules. For detailed documentation, usage examples and links the source code itself, please refer to the
54
+ [Library API](https://docp.readthedocs.io/en/latest/library.html) page in the documentation.
55
+
56
+ ### Quickstart
57
+ For convenience, here are a couple examples for how to parse the supported document types.
58
+
59
+ **Extract text from a PDF file:**
60
+
61
+ >>> from docp import PDFParser
62
+
63
+ >>> pdf = PDFParser(path='/path/to/myfile.pdf')
64
+ >>> pdf.extract_text()
65
+
66
+ # Access the content of page 1.
67
+ >>> pg1 = pdf.doc.pages[1].content
68
+
69
+ **Extract text from a PowerPoint presentation:**
70
+
71
+ >>> from docp import PPTXParser
72
+
73
+ >>> pptx = PPTXParser(path='/path/to/myfile.pptx')
74
+ >>> pptx.extract_text()
75
+
76
+ # Access the text on slide 1.
77
+ >>> pg1 = pptx.doc.slides[1].content
78
+
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ :Purpose: This module provides the project initilisation logic.
5
+
6
+ :Platform: Linux/Windows | Python 3.10+
7
+ :Developer: J Berendt
8
+ :Email: development@s3dev.uk
9
+
10
+ :Comments: Ths loader modules/classes have *not* been imported due to the
11
+ heavy dependency requirements. Refer to the loaders/__init__.py
12
+ module instead.
13
+
14
+ """
15
+
16
+ import os
17
+ import sys
18
+ sys.path.insert(0, os.path.dirname(os.path.realpath(__file__)))
19
+ from utils4.user_interface import ui
20
+ # locals
21
+ from .libs._version import __version__
22
+
23
+ # TODO: Change these to use logging.
24
+
25
+ # Bring entry-points to the surface.
26
+ try:
27
+ from .parsers.pdfparser import PDFParser
28
+ except ImportError as err:
29
+ msg = ( 'An error occurred while importing the PDF parser:\n'
30
+ f'- {err}\n'
31
+ ' - This can be ignored if the parser is not in use.\n')
32
+ ui.print_warning(f'\n[ImportError]: {msg}')
33
+
34
+ try:
35
+ from .parsers.pptxparser import PPTXParser
36
+ except ImportError as err:
37
+ msg = ( 'An error occurred while importing the PPTX parser:\n'
38
+ f'- {err}\n'
39
+ ' - This can be ignored if the parser is not in use.\n')
40
+ ui.print_warning(f'\n[ImportError]: {msg}')
File without changes
@@ -10,11 +10,18 @@
10
10
  :Developer: J Berendt
11
11
  :Email: development@s3dev.uk
12
12
 
13
- :Comments: n/a
13
+ :Comments: This module uses the
14
+ ``langchain_community.vectorstores.Chroma`` wrapper class,
15
+ rather than the base ``chromadb`` library as it provides the
16
+ ``add_texts`` method which supports GPU processing and
17
+ parallelisation; which is implemented by this module's
18
+ :meth:`~ChromaDB.add_documents` method.
14
19
 
15
20
  """
21
+ # pylint: disable=import-error
16
22
  # pylint: disable=wrong-import-order
17
23
 
24
+ from __future__ import annotations
18
25
  import chromadb
19
26
  import os
20
27
  import torch
@@ -81,19 +88,25 @@ class ChromaDB(_Chroma):
81
88
  """Accessor to the database's path."""
82
89
  return self._path
83
90
 
84
- def add_documents(self, docs: list):
91
+ def add_documents(self, docs: list[langchain_core.documents.base.Document]): # noqa # pylint: disable=undefined-variable
85
92
  """Add multiple documents to the collection.
86
93
 
87
- This method wraps ``Chroma.add_texts`` method which supports GPU
88
- processing and parallelisation. The ID is derived locally from
89
- the file's basename, page number and page content.
94
+ This method overrides the base class' ``add_documents`` method
95
+ to enable local ID derivation. Knowing *how* the IDs are derived
96
+ gives us greater understanding and querying ability of the
97
+ documents in the database. Each ID is derived locally by the
98
+ :meth:`_preproc` method from the file's basename, page number
99
+ and page content.
100
+
101
+ Additionally, this method wraps the
102
+ :func:`langchain_community.vectorstores.Chroma.add_texts`
103
+ method which supports GPU processing and parallelisation.
90
104
 
91
105
  Args:
92
106
  docs (list): A list of ``langchain_core.documents.base.Document``
93
107
  document objects.
94
108
 
95
109
  """
96
- # This method overrides the base class' add_documents method.
97
110
  # pylint: disable=arguments-differ
98
111
  # pylint: disable=arguments-renamed
99
112
  if not isinstance(docs, list):
@@ -0,0 +1 @@
1
+ __version__ = '0.2.0'
@@ -0,0 +1,7 @@
1
+ # Changed.
2
+ # ENABLE SPHINX TO ACCESS THE GIT LOG
3
+ """
4
+ .. git_changelog::
5
+ :revisions: 99
6
+ :detailed-message-pre: True
7
+ """
@@ -0,0 +1,107 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ :Purpose: This module provides utility-based functionality for the
5
+ project.
6
+
7
+ :Platform: Linux/Windows | Python 3.10+
8
+ :Developer: J Berendt
9
+ :Email: development@s3dev.uk
10
+
11
+ :Comments: n/a
12
+
13
+ """
14
+
15
+ import os
16
+ import sys
17
+ sys.path.insert(0, os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../'))
18
+ import re
19
+ from glob import glob
20
+ from utils4 import futils
21
+
22
+
23
+ class Utilities:
24
+ """General (cross-project) utility functions."""
25
+
26
+ @staticmethod
27
+ def collect_files(path: str, ext: str, recursive: bool) -> list:
28
+ """Collect all files for a given extension from a path.
29
+
30
+ Args:
31
+ path (str): Full path serving as the root for the search.
32
+ ext (str, optional): If the ``path`` argument refers to a
33
+ *directory*, a specific file extension can be specified
34
+ here. For example: ``ext = 'pdf'``.
35
+
36
+ If anything other than ``'**'`` is provided, all
37
+ alpha-characters are parsed from the string, and prefixed
38
+ with ``*.``. Meaning, if ``'.pdf'`` is passed, the
39
+ characters ``'pdf'`` are parsed and prefixed with ``*.``
40
+ to create ``'*.pdf'``. However, if ``'things.foo'`` is
41
+ passed, the derived extension will be ``'*.thingsfoo'``.
42
+ Defaults to '**', for a recursive search.
43
+
44
+ recursive (bool): Instruct the search to recurse into
45
+ sub-directories.
46
+
47
+ Returns:
48
+ list: The list of full file paths returned by the ``glob``
49
+ call. Any directory-only paths are removed.
50
+
51
+ """
52
+ if ext != '**':
53
+ ext = f'*.{re.findall("[a-zA-Z]+", ext)[0]}'
54
+ return list(filter(os.path.isfile, glob(os.path.join(path, ext), recursive=recursive)))
55
+
56
+ # !!!: Replace this with utils4.futils when available.
57
+ @staticmethod
58
+ def ispdf(path: str) -> bool:
59
+ """Test the file signature. Verify this is a valid PDF file.
60
+
61
+ Args:
62
+ path (str): Path to the file being tested.
63
+
64
+ Returns:
65
+ bool: True if this is a valid PDF file, otherwise False.
66
+
67
+ """
68
+ with open(path, 'rb') as f:
69
+ sig = f.read(5)
70
+ return sig == b'\x25\x50\x44\x46\x2d'
71
+
72
+ @staticmethod
73
+ def iszip(path: str) -> bool:
74
+ """Test the file signature. Verify this is a valid ZIP archive.
75
+
76
+ Args:
77
+ path (str): Path to the file being tested.
78
+
79
+ Returns:
80
+ bool: True if this is a valid ZIP archive, otherwise False.
81
+
82
+ """
83
+ return futils.iszip(path)
84
+
85
+ @staticmethod
86
+ def parse_to_keywords(resp: str) -> list:
87
+ """Parse the bot's response into a list of keywords.
88
+
89
+ Args:
90
+ resp (str): Text response directly from the bot.
91
+
92
+ Returns:
93
+ list: A list of keywords extracted from the response,
94
+ separated by asterisks as bullet points.
95
+
96
+ """
97
+ # Capture asterisk bullet points or a numbered list.
98
+ rexp = re.compile(r'(?:\*|[0-9]+\.)\s*(.*)\n')
99
+ trans = {45: ' ', 47: ' '}
100
+ resp_ = resp.translate(trans).lower()
101
+ kwds = rexp.findall(resp_)
102
+ if kwds:
103
+ return ', '.join(kwds)
104
+ return ''
105
+
106
+
107
+ utilities = Utilities()
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ :Purpose: This module provides the project initilisation logic.
5
+
6
+ :Platform: Linux/Windows | Python 3.10+
7
+ :Developer: J Berendt
8
+ :Email: development@s3dev.uk
9
+
10
+ :Comments: n/a
11
+
12
+ """
13
+
14
+ import os
15
+ import sys
16
+ sys.path.insert(0, os.path.dirname(os.path.realpath(__file__)))
17
+ from utils4.user_interface import ui
18
+
19
+ # TODO: Change these to use logging.
20
+
21
+ # Bring entry-points to the surface.
22
+ try:
23
+ from .chromapdfloader import ChromaPDFLoader
24
+ except ImportError as err:
25
+ # The chroma loader requires a lot of backend which is not required for the parser.
26
+ msg = ( 'An error occurred while importing the Chroma PDF loader:\n'
27
+ f'- {err}\n'
28
+ ' - This can be ignored if the loader is not in use.\n')
29
+ ui.print_warning(f'\n[ImportError]: {msg}')
30
+
31
+ try:
32
+ from .chromapptxloader import ChromaPPTXLoader
33
+ except ImportError as err:
34
+ # The chroma loader requires a lot of backend which is not required for the parser.
35
+ msg = ( 'An error occurred while importing the Chroma PPTX loader:\n'
36
+ f'- {err}\n'
37
+ ' - This can be ignored if the loader is not in use.\n')
38
+ ui.print_warning(f'\n[ImportError]: {msg}')