flowtask 5.8.4__cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (470) hide show
  1. flowtask/__init__.py +93 -0
  2. flowtask/__main__.py +38 -0
  3. flowtask/bots/__init__.py +6 -0
  4. flowtask/bots/check.py +93 -0
  5. flowtask/bots/codebot.py +51 -0
  6. flowtask/components/ASPX.py +148 -0
  7. flowtask/components/AddDataset.py +352 -0
  8. flowtask/components/Amazon.py +523 -0
  9. flowtask/components/AutoTask.py +314 -0
  10. flowtask/components/Azure.py +80 -0
  11. flowtask/components/AzureUsers.py +106 -0
  12. flowtask/components/BaseAction.py +91 -0
  13. flowtask/components/BaseLoop.py +198 -0
  14. flowtask/components/BestBuy.py +800 -0
  15. flowtask/components/CSVToGCS.py +120 -0
  16. flowtask/components/CompanyScraper/__init__.py +1 -0
  17. flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
  18. flowtask/components/CompanyScraper/parsers/base.py +102 -0
  19. flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
  20. flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
  21. flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
  22. flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
  23. flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
  24. flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
  25. flowtask/components/CompanyScraper/scrapper.py +1054 -0
  26. flowtask/components/CopyTo.py +177 -0
  27. flowtask/components/CopyToBigQuery.py +243 -0
  28. flowtask/components/CopyToMongoDB.py +291 -0
  29. flowtask/components/CopyToPg.py +609 -0
  30. flowtask/components/CopyToRethink.py +207 -0
  31. flowtask/components/CreateGCSBucket.py +102 -0
  32. flowtask/components/CreateReport/CreateReport.py +228 -0
  33. flowtask/components/CreateReport/__init__.py +9 -0
  34. flowtask/components/CreateReport/charts/__init__.py +15 -0
  35. flowtask/components/CreateReport/charts/bar.py +51 -0
  36. flowtask/components/CreateReport/charts/base.py +66 -0
  37. flowtask/components/CreateReport/charts/pie.py +64 -0
  38. flowtask/components/CreateReport/utils.py +9 -0
  39. flowtask/components/CustomerSatisfaction.py +196 -0
  40. flowtask/components/DataInput.py +200 -0
  41. flowtask/components/DateList.py +255 -0
  42. flowtask/components/DbClient.py +163 -0
  43. flowtask/components/DialPad.py +146 -0
  44. flowtask/components/DocumentDBQuery.py +200 -0
  45. flowtask/components/DownloadFrom.py +371 -0
  46. flowtask/components/DownloadFromD2L.py +113 -0
  47. flowtask/components/DownloadFromFTP.py +181 -0
  48. flowtask/components/DownloadFromIMAP.py +315 -0
  49. flowtask/components/DownloadFromS3.py +198 -0
  50. flowtask/components/DownloadFromSFTP.py +265 -0
  51. flowtask/components/DownloadFromSharepoint.py +110 -0
  52. flowtask/components/DownloadFromSmartSheet.py +114 -0
  53. flowtask/components/DownloadS3File.py +229 -0
  54. flowtask/components/Dummy.py +59 -0
  55. flowtask/components/DuplicatePhoto.py +411 -0
  56. flowtask/components/EmployeeEvaluation.py +237 -0
  57. flowtask/components/ExecuteSQL.py +323 -0
  58. flowtask/components/ExtractHTML.py +178 -0
  59. flowtask/components/FileBase.py +178 -0
  60. flowtask/components/FileCopy.py +181 -0
  61. flowtask/components/FileDelete.py +82 -0
  62. flowtask/components/FileExists.py +146 -0
  63. flowtask/components/FileIteratorDelete.py +112 -0
  64. flowtask/components/FileList.py +194 -0
  65. flowtask/components/FileOpen.py +75 -0
  66. flowtask/components/FileRead.py +120 -0
  67. flowtask/components/FileRename.py +106 -0
  68. flowtask/components/FilterIf.py +284 -0
  69. flowtask/components/FilterRows/FilterRows.py +200 -0
  70. flowtask/components/FilterRows/__init__.py +10 -0
  71. flowtask/components/FilterRows/functions.py +4 -0
  72. flowtask/components/GCSToBigQuery.py +103 -0
  73. flowtask/components/GoogleA4.py +150 -0
  74. flowtask/components/GoogleGeoCoding.py +344 -0
  75. flowtask/components/GooglePlaces.py +315 -0
  76. flowtask/components/GoogleSearch.py +539 -0
  77. flowtask/components/HTTPClient.py +268 -0
  78. flowtask/components/ICIMS.py +146 -0
  79. flowtask/components/IF.py +179 -0
  80. flowtask/components/IcimsFolderCopy.py +173 -0
  81. flowtask/components/ImageFeatures/__init__.py +5 -0
  82. flowtask/components/ImageFeatures/process.py +233 -0
  83. flowtask/components/IteratorBase.py +251 -0
  84. flowtask/components/LangchainLoader/__init__.py +5 -0
  85. flowtask/components/LangchainLoader/loader.py +194 -0
  86. flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
  87. flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
  88. flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
  89. flowtask/components/LangchainLoader/loaders/docx.py +91 -0
  90. flowtask/components/LangchainLoader/loaders/html.py +119 -0
  91. flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
  92. flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
  93. flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
  94. flowtask/components/LangchainLoader/loaders/qa.py +67 -0
  95. flowtask/components/LangchainLoader/loaders/txt.py +55 -0
  96. flowtask/components/LeadIQ.py +650 -0
  97. flowtask/components/Loop.py +253 -0
  98. flowtask/components/Lowes.py +334 -0
  99. flowtask/components/MS365Usage.py +156 -0
  100. flowtask/components/MSTeamsMessages.py +320 -0
  101. flowtask/components/MarketClustering.py +1051 -0
  102. flowtask/components/MergeFiles.py +362 -0
  103. flowtask/components/MilvusOutput.py +87 -0
  104. flowtask/components/NearByStores.py +175 -0
  105. flowtask/components/NetworkNinja/__init__.py +6 -0
  106. flowtask/components/NetworkNinja/models/__init__.py +52 -0
  107. flowtask/components/NetworkNinja/models/abstract.py +177 -0
  108. flowtask/components/NetworkNinja/models/account.py +39 -0
  109. flowtask/components/NetworkNinja/models/client.py +19 -0
  110. flowtask/components/NetworkNinja/models/district.py +14 -0
  111. flowtask/components/NetworkNinja/models/events.py +101 -0
  112. flowtask/components/NetworkNinja/models/forms.py +499 -0
  113. flowtask/components/NetworkNinja/models/market.py +16 -0
  114. flowtask/components/NetworkNinja/models/organization.py +34 -0
  115. flowtask/components/NetworkNinja/models/photos.py +125 -0
  116. flowtask/components/NetworkNinja/models/project.py +44 -0
  117. flowtask/components/NetworkNinja/models/region.py +28 -0
  118. flowtask/components/NetworkNinja/models/store.py +203 -0
  119. flowtask/components/NetworkNinja/models/user.py +151 -0
  120. flowtask/components/NetworkNinja/router.py +854 -0
  121. flowtask/components/Odoo.py +175 -0
  122. flowtask/components/OdooInjector.py +192 -0
  123. flowtask/components/OpenFromXML.py +126 -0
  124. flowtask/components/OpenWeather.py +41 -0
  125. flowtask/components/OpenWithBase.py +616 -0
  126. flowtask/components/OpenWithPandas.py +715 -0
  127. flowtask/components/PGPDecrypt.py +199 -0
  128. flowtask/components/PandasIterator.py +187 -0
  129. flowtask/components/PandasToFile.py +189 -0
  130. flowtask/components/Paradox.py +339 -0
  131. flowtask/components/ParamIterator.py +117 -0
  132. flowtask/components/ParseHTML.py +84 -0
  133. flowtask/components/PlacerStores.py +249 -0
  134. flowtask/components/Pokemon.py +507 -0
  135. flowtask/components/PositiveBot.py +62 -0
  136. flowtask/components/PowerPointSlide.py +400 -0
  137. flowtask/components/PrintMessage.py +127 -0
  138. flowtask/components/ProductCompetitors/__init__.py +5 -0
  139. flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
  140. flowtask/components/ProductCompetitors/parsers/base.py +72 -0
  141. flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
  142. flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
  143. flowtask/components/ProductCompetitors/scrapper.py +155 -0
  144. flowtask/components/ProductCompliant.py +169 -0
  145. flowtask/components/ProductInfo/__init__.py +1 -0
  146. flowtask/components/ProductInfo/parsers/__init__.py +5 -0
  147. flowtask/components/ProductInfo/parsers/base.py +83 -0
  148. flowtask/components/ProductInfo/parsers/brother.py +97 -0
  149. flowtask/components/ProductInfo/parsers/canon.py +167 -0
  150. flowtask/components/ProductInfo/parsers/epson.py +118 -0
  151. flowtask/components/ProductInfo/parsers/hp.py +131 -0
  152. flowtask/components/ProductInfo/parsers/samsung.py +97 -0
  153. flowtask/components/ProductInfo/scraper.py +319 -0
  154. flowtask/components/ProductPricing.py +118 -0
  155. flowtask/components/QS.py +261 -0
  156. flowtask/components/QSBase.py +201 -0
  157. flowtask/components/QueryIterator.py +273 -0
  158. flowtask/components/QueryToInsert.py +327 -0
  159. flowtask/components/QueryToPandas.py +432 -0
  160. flowtask/components/RESTClient.py +195 -0
  161. flowtask/components/RethinkDBQuery.py +189 -0
  162. flowtask/components/Rsync.py +74 -0
  163. flowtask/components/RunSSH.py +59 -0
  164. flowtask/components/RunShell.py +71 -0
  165. flowtask/components/SalesForce.py +20 -0
  166. flowtask/components/SaveImageBank/__init__.py +257 -0
  167. flowtask/components/SchedulingVisits.py +592 -0
  168. flowtask/components/ScrapPage.py +216 -0
  169. flowtask/components/ScrapSearch.py +79 -0
  170. flowtask/components/SendNotify.py +257 -0
  171. flowtask/components/SentimentAnalysis.py +694 -0
  172. flowtask/components/ServiceScrapper/__init__.py +5 -0
  173. flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
  174. flowtask/components/ServiceScrapper/parsers/base.py +94 -0
  175. flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
  176. flowtask/components/ServiceScrapper/scrapper.py +199 -0
  177. flowtask/components/SetVariables.py +156 -0
  178. flowtask/components/SubTask.py +182 -0
  179. flowtask/components/SuiteCRM.py +48 -0
  180. flowtask/components/Switch.py +175 -0
  181. flowtask/components/TableBase.py +148 -0
  182. flowtask/components/TableDelete.py +312 -0
  183. flowtask/components/TableInput.py +143 -0
  184. flowtask/components/TableOutput/TableOutput.py +384 -0
  185. flowtask/components/TableOutput/__init__.py +3 -0
  186. flowtask/components/TableSchema.py +534 -0
  187. flowtask/components/Target.py +223 -0
  188. flowtask/components/ThumbnailGenerator.py +156 -0
  189. flowtask/components/ToPandas.py +67 -0
  190. flowtask/components/TransformRows/TransformRows.py +507 -0
  191. flowtask/components/TransformRows/__init__.py +9 -0
  192. flowtask/components/TransformRows/functions.py +559 -0
  193. flowtask/components/TransposeRows.py +176 -0
  194. flowtask/components/UPCDatabase.py +86 -0
  195. flowtask/components/UnGzip.py +171 -0
  196. flowtask/components/Uncompress.py +172 -0
  197. flowtask/components/UniqueRows.py +126 -0
  198. flowtask/components/Unzip.py +107 -0
  199. flowtask/components/UpdateOperationalVars.py +147 -0
  200. flowtask/components/UploadTo.py +299 -0
  201. flowtask/components/UploadToS3.py +136 -0
  202. flowtask/components/UploadToSFTP.py +160 -0
  203. flowtask/components/UploadToSharepoint.py +205 -0
  204. flowtask/components/UserFunc.py +122 -0
  205. flowtask/components/VivaTracker.py +140 -0
  206. flowtask/components/WSDLClient.py +123 -0
  207. flowtask/components/Wait.py +18 -0
  208. flowtask/components/Walmart.py +199 -0
  209. flowtask/components/Workplace.py +134 -0
  210. flowtask/components/XMLToPandas.py +267 -0
  211. flowtask/components/Zammad/__init__.py +41 -0
  212. flowtask/components/Zammad/models.py +0 -0
  213. flowtask/components/ZoomInfoScraper.py +409 -0
  214. flowtask/components/__init__.py +104 -0
  215. flowtask/components/abstract.py +18 -0
  216. flowtask/components/flow.py +530 -0
  217. flowtask/components/google.py +335 -0
  218. flowtask/components/group.py +221 -0
  219. flowtask/components/py.typed +0 -0
  220. flowtask/components/reviewscrap.py +132 -0
  221. flowtask/components/tAutoincrement.py +117 -0
  222. flowtask/components/tConcat.py +109 -0
  223. flowtask/components/tExplode.py +119 -0
  224. flowtask/components/tFilter.py +184 -0
  225. flowtask/components/tGroup.py +236 -0
  226. flowtask/components/tJoin.py +270 -0
  227. flowtask/components/tMap/__init__.py +9 -0
  228. flowtask/components/tMap/functions.py +54 -0
  229. flowtask/components/tMap/tMap.py +450 -0
  230. flowtask/components/tMelt.py +112 -0
  231. flowtask/components/tMerge.py +114 -0
  232. flowtask/components/tOrder.py +93 -0
  233. flowtask/components/tPandas.py +94 -0
  234. flowtask/components/tPivot.py +71 -0
  235. flowtask/components/tPluckCols.py +76 -0
  236. flowtask/components/tUnnest.py +82 -0
  237. flowtask/components/user.py +401 -0
  238. flowtask/conf.py +457 -0
  239. flowtask/download.py +102 -0
  240. flowtask/events/__init__.py +11 -0
  241. flowtask/events/events/__init__.py +20 -0
  242. flowtask/events/events/abstract.py +95 -0
  243. flowtask/events/events/alerts/__init__.py +362 -0
  244. flowtask/events/events/alerts/colfunctions.py +131 -0
  245. flowtask/events/events/alerts/functions.py +158 -0
  246. flowtask/events/events/dummy.py +12 -0
  247. flowtask/events/events/exec.py +124 -0
  248. flowtask/events/events/file/__init__.py +7 -0
  249. flowtask/events/events/file/base.py +51 -0
  250. flowtask/events/events/file/copy.py +23 -0
  251. flowtask/events/events/file/delete.py +16 -0
  252. flowtask/events/events/interfaces/__init__.py +9 -0
  253. flowtask/events/events/interfaces/client.py +67 -0
  254. flowtask/events/events/interfaces/credentials.py +28 -0
  255. flowtask/events/events/interfaces/notifications.py +58 -0
  256. flowtask/events/events/jira.py +122 -0
  257. flowtask/events/events/log.py +26 -0
  258. flowtask/events/events/logerr.py +52 -0
  259. flowtask/events/events/notify.py +59 -0
  260. flowtask/events/events/notify_event.py +160 -0
  261. flowtask/events/events/publish.py +54 -0
  262. flowtask/events/events/sendfile.py +104 -0
  263. flowtask/events/events/task.py +97 -0
  264. flowtask/events/events/teams.py +98 -0
  265. flowtask/events/events/webhook.py +58 -0
  266. flowtask/events/manager.py +287 -0
  267. flowtask/exceptions.c +39393 -0
  268. flowtask/exceptions.cpython-39-x86_64-linux-gnu.so +0 -0
  269. flowtask/extensions/__init__.py +3 -0
  270. flowtask/extensions/abstract.py +82 -0
  271. flowtask/extensions/logging/__init__.py +65 -0
  272. flowtask/hooks/__init__.py +9 -0
  273. flowtask/hooks/actions/__init__.py +22 -0
  274. flowtask/hooks/actions/abstract.py +66 -0
  275. flowtask/hooks/actions/dummy.py +23 -0
  276. flowtask/hooks/actions/jira.py +74 -0
  277. flowtask/hooks/actions/rest.py +320 -0
  278. flowtask/hooks/actions/sampledata.py +37 -0
  279. flowtask/hooks/actions/sensor.py +23 -0
  280. flowtask/hooks/actions/task.py +9 -0
  281. flowtask/hooks/actions/ticket.py +37 -0
  282. flowtask/hooks/actions/zammad.py +55 -0
  283. flowtask/hooks/hook.py +62 -0
  284. flowtask/hooks/models.py +17 -0
  285. flowtask/hooks/service.py +187 -0
  286. flowtask/hooks/step.py +91 -0
  287. flowtask/hooks/types/__init__.py +23 -0
  288. flowtask/hooks/types/base.py +129 -0
  289. flowtask/hooks/types/brokers/__init__.py +11 -0
  290. flowtask/hooks/types/brokers/base.py +54 -0
  291. flowtask/hooks/types/brokers/mqtt.py +35 -0
  292. flowtask/hooks/types/brokers/rabbitmq.py +82 -0
  293. flowtask/hooks/types/brokers/redis.py +83 -0
  294. flowtask/hooks/types/brokers/sqs.py +44 -0
  295. flowtask/hooks/types/fs.py +232 -0
  296. flowtask/hooks/types/http.py +49 -0
  297. flowtask/hooks/types/imap.py +200 -0
  298. flowtask/hooks/types/jira.py +279 -0
  299. flowtask/hooks/types/mail.py +205 -0
  300. flowtask/hooks/types/postgres.py +98 -0
  301. flowtask/hooks/types/responses/__init__.py +8 -0
  302. flowtask/hooks/types/responses/base.py +5 -0
  303. flowtask/hooks/types/sharepoint.py +288 -0
  304. flowtask/hooks/types/ssh.py +141 -0
  305. flowtask/hooks/types/tagged.py +59 -0
  306. flowtask/hooks/types/upload.py +85 -0
  307. flowtask/hooks/types/watch.py +71 -0
  308. flowtask/hooks/types/web.py +36 -0
  309. flowtask/interfaces/AzureClient.py +137 -0
  310. flowtask/interfaces/AzureGraph.py +839 -0
  311. flowtask/interfaces/Boto3Client.py +326 -0
  312. flowtask/interfaces/DropboxClient.py +173 -0
  313. flowtask/interfaces/ExcelHandler.py +94 -0
  314. flowtask/interfaces/FTPClient.py +131 -0
  315. flowtask/interfaces/GoogleCalendar.py +201 -0
  316. flowtask/interfaces/GoogleClient.py +133 -0
  317. flowtask/interfaces/GoogleDrive.py +127 -0
  318. flowtask/interfaces/GoogleGCS.py +89 -0
  319. flowtask/interfaces/GoogleGeocoding.py +93 -0
  320. flowtask/interfaces/GoogleLang.py +114 -0
  321. flowtask/interfaces/GooglePub.py +61 -0
  322. flowtask/interfaces/GoogleSheet.py +68 -0
  323. flowtask/interfaces/IMAPClient.py +137 -0
  324. flowtask/interfaces/O365Calendar.py +113 -0
  325. flowtask/interfaces/O365Client.py +220 -0
  326. flowtask/interfaces/OneDrive.py +284 -0
  327. flowtask/interfaces/Outlook.py +155 -0
  328. flowtask/interfaces/ParrotBot.py +130 -0
  329. flowtask/interfaces/SSHClient.py +378 -0
  330. flowtask/interfaces/Sharepoint.py +496 -0
  331. flowtask/interfaces/__init__.py +36 -0
  332. flowtask/interfaces/azureauth.py +119 -0
  333. flowtask/interfaces/cache.py +201 -0
  334. flowtask/interfaces/client.py +82 -0
  335. flowtask/interfaces/compress.py +525 -0
  336. flowtask/interfaces/credentials.py +124 -0
  337. flowtask/interfaces/d2l.py +239 -0
  338. flowtask/interfaces/databases/__init__.py +5 -0
  339. flowtask/interfaces/databases/db.py +223 -0
  340. flowtask/interfaces/databases/documentdb.py +55 -0
  341. flowtask/interfaces/databases/rethink.py +39 -0
  342. flowtask/interfaces/dataframes/__init__.py +11 -0
  343. flowtask/interfaces/dataframes/abstract.py +21 -0
  344. flowtask/interfaces/dataframes/arrow.py +71 -0
  345. flowtask/interfaces/dataframes/dt.py +69 -0
  346. flowtask/interfaces/dataframes/pandas.py +167 -0
  347. flowtask/interfaces/dataframes/polars.py +60 -0
  348. flowtask/interfaces/db.py +263 -0
  349. flowtask/interfaces/env.py +46 -0
  350. flowtask/interfaces/func.py +137 -0
  351. flowtask/interfaces/http.py +1780 -0
  352. flowtask/interfaces/locale.py +40 -0
  353. flowtask/interfaces/log.py +75 -0
  354. flowtask/interfaces/mask.py +143 -0
  355. flowtask/interfaces/notification.py +154 -0
  356. flowtask/interfaces/playwright.py +339 -0
  357. flowtask/interfaces/powerpoint.py +368 -0
  358. flowtask/interfaces/py.typed +0 -0
  359. flowtask/interfaces/qs.py +376 -0
  360. flowtask/interfaces/result.py +87 -0
  361. flowtask/interfaces/selenium_service.py +779 -0
  362. flowtask/interfaces/smartsheet.py +154 -0
  363. flowtask/interfaces/stat.py +39 -0
  364. flowtask/interfaces/task.py +96 -0
  365. flowtask/interfaces/template.py +118 -0
  366. flowtask/interfaces/vectorstores/__init__.py +1 -0
  367. flowtask/interfaces/vectorstores/abstract.py +133 -0
  368. flowtask/interfaces/vectorstores/milvus.py +669 -0
  369. flowtask/interfaces/zammad.py +107 -0
  370. flowtask/models.py +193 -0
  371. flowtask/parsers/__init__.py +15 -0
  372. flowtask/parsers/_yaml.c +11978 -0
  373. flowtask/parsers/_yaml.cpython-39-x86_64-linux-gnu.so +0 -0
  374. flowtask/parsers/argparser.py +235 -0
  375. flowtask/parsers/base.c +15155 -0
  376. flowtask/parsers/base.cpython-39-x86_64-linux-gnu.so +0 -0
  377. flowtask/parsers/json.c +11968 -0
  378. flowtask/parsers/json.cpython-39-x86_64-linux-gnu.so +0 -0
  379. flowtask/parsers/maps.py +49 -0
  380. flowtask/parsers/toml.c +11968 -0
  381. flowtask/parsers/toml.cpython-39-x86_64-linux-gnu.so +0 -0
  382. flowtask/plugins/__init__.py +16 -0
  383. flowtask/plugins/components/__init__.py +0 -0
  384. flowtask/plugins/handler/__init__.py +45 -0
  385. flowtask/plugins/importer.py +31 -0
  386. flowtask/plugins/sources/__init__.py +0 -0
  387. flowtask/runner.py +283 -0
  388. flowtask/scheduler/__init__.py +9 -0
  389. flowtask/scheduler/functions.py +493 -0
  390. flowtask/scheduler/handlers/__init__.py +8 -0
  391. flowtask/scheduler/handlers/manager.py +504 -0
  392. flowtask/scheduler/handlers/models.py +58 -0
  393. flowtask/scheduler/handlers/service.py +72 -0
  394. flowtask/scheduler/notifications.py +65 -0
  395. flowtask/scheduler/scheduler.py +993 -0
  396. flowtask/services/__init__.py +0 -0
  397. flowtask/services/bots/__init__.py +0 -0
  398. flowtask/services/bots/telegram.py +264 -0
  399. flowtask/services/files/__init__.py +11 -0
  400. flowtask/services/files/manager.py +522 -0
  401. flowtask/services/files/model.py +37 -0
  402. flowtask/services/files/service.py +767 -0
  403. flowtask/services/jira/__init__.py +3 -0
  404. flowtask/services/jira/jira_actions.py +191 -0
  405. flowtask/services/tasks/__init__.py +13 -0
  406. flowtask/services/tasks/launcher.py +213 -0
  407. flowtask/services/tasks/manager.py +323 -0
  408. flowtask/services/tasks/service.py +275 -0
  409. flowtask/services/tasks/task_manager.py +376 -0
  410. flowtask/services/tasks/tasks.py +155 -0
  411. flowtask/storages/__init__.py +16 -0
  412. flowtask/storages/exceptions.py +12 -0
  413. flowtask/storages/files/__init__.py +8 -0
  414. flowtask/storages/files/abstract.py +29 -0
  415. flowtask/storages/files/filesystem.py +66 -0
  416. flowtask/storages/tasks/__init__.py +19 -0
  417. flowtask/storages/tasks/abstract.py +26 -0
  418. flowtask/storages/tasks/database.py +33 -0
  419. flowtask/storages/tasks/filesystem.py +108 -0
  420. flowtask/storages/tasks/github.py +119 -0
  421. flowtask/storages/tasks/memory.py +45 -0
  422. flowtask/storages/tasks/row.py +25 -0
  423. flowtask/tasks/__init__.py +0 -0
  424. flowtask/tasks/abstract.py +526 -0
  425. flowtask/tasks/command.py +118 -0
  426. flowtask/tasks/pile.py +486 -0
  427. flowtask/tasks/py.typed +0 -0
  428. flowtask/tasks/task.py +778 -0
  429. flowtask/template/__init__.py +161 -0
  430. flowtask/tests.py +257 -0
  431. flowtask/types/__init__.py +8 -0
  432. flowtask/types/typedefs.c +11347 -0
  433. flowtask/types/typedefs.cpython-39-x86_64-linux-gnu.so +0 -0
  434. flowtask/utils/__init__.py +24 -0
  435. flowtask/utils/constants.py +117 -0
  436. flowtask/utils/encoders.py +21 -0
  437. flowtask/utils/executor.py +112 -0
  438. flowtask/utils/functions.cpp +14280 -0
  439. flowtask/utils/functions.cpython-39-x86_64-linux-gnu.so +0 -0
  440. flowtask/utils/json.cpp +13349 -0
  441. flowtask/utils/json.cpython-39-x86_64-linux-gnu.so +0 -0
  442. flowtask/utils/mail.py +63 -0
  443. flowtask/utils/parseqs.c +13324 -0
  444. flowtask/utils/parserqs.cpython-39-x86_64-linux-gnu.so +0 -0
  445. flowtask/utils/stats.py +308 -0
  446. flowtask/utils/transformations.py +74 -0
  447. flowtask/utils/uv.py +12 -0
  448. flowtask/utils/validators.py +97 -0
  449. flowtask/version.py +11 -0
  450. flowtask-5.8.4.dist-info/LICENSE +201 -0
  451. flowtask-5.8.4.dist-info/METADATA +209 -0
  452. flowtask-5.8.4.dist-info/RECORD +470 -0
  453. flowtask-5.8.4.dist-info/WHEEL +6 -0
  454. flowtask-5.8.4.dist-info/entry_points.txt +3 -0
  455. flowtask-5.8.4.dist-info/top_level.txt +2 -0
  456. plugins/components/CreateQR.py +39 -0
  457. plugins/components/TestComponent.py +28 -0
  458. plugins/components/Use1.py +13 -0
  459. plugins/components/Workplace.py +117 -0
  460. plugins/components/__init__.py +3 -0
  461. plugins/sources/__init__.py +0 -0
  462. plugins/sources/get_populartimes.py +78 -0
  463. plugins/sources/google.py +150 -0
  464. plugins/sources/hubspot.py +679 -0
  465. plugins/sources/icims.py +679 -0
  466. plugins/sources/mobileinsight.py +501 -0
  467. plugins/sources/newrelic.py +262 -0
  468. plugins/sources/uap.py +268 -0
  469. plugins/sources/venu.py +244 -0
  470. plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,194 @@
1
+ import asyncio
2
+ from typing import List
3
+ from collections.abc import Callable
4
+ import importlib
5
+ from pathlib import Path, PurePath
6
+ from parrot.loaders import AbstractLoader, Document
7
+ from parrot.llms.vertex import VertexLLM
8
+ from ..flow import FlowComponent
9
+ from ...exceptions import ConfigError, ComponentError
10
+ from ...conf import (
11
+ DEFAULT_LLM_MODEL,
12
+ DEFAULT_LLM_TEMPERATURE
13
+ )
14
+
15
+ class LangchainLoader(FlowComponent):
16
+ """LangchainLoader.
17
+
18
+ Overview:
19
+
20
+ Getting a list of documents and convert into Langchain Documents.
21
+
22
+
23
+ Example:
24
+
25
+ ```yaml
26
+ LangchainLoader:
27
+ path: /home/ubuntu/symbits/lg/bot/products_positive
28
+ source_type: Product-Top-Reviews
29
+ loader: HTMLLoader
30
+ chunk_size: 2048
31
+ elements:
32
+ - div: .product
33
+ ```
34
+
35
+ """
36
+
37
+ def __init__(
38
+ self,
39
+ loop: asyncio.AbstractEventLoop = None,
40
+ job: Callable = None,
41
+ stat: Callable = None,
42
+ **kwargs,
43
+ ):
44
+ self.extensions: list = kwargs.pop('extensions', [])
45
+ self.encoding: str = kwargs.get('encoding', 'utf-8')
46
+ self.path: str = kwargs.pop('path', None)
47
+ self.skip_directories: List[str] = kwargs.pop('skip_directories', [])
48
+ self._chunk_size = kwargs.get('chunk_size', 2048)
49
+ self._embed_size: int = kwargs.pop('embed_size', 768)
50
+ self.source_type: str = kwargs.pop('source_type', 'document')
51
+ self.doctype: str = kwargs.pop('doctype', 'document')
52
+ # LLM (if required)
53
+ self._llm = kwargs.pop('llm', None)
54
+ super().__init__(
55
+ loop=loop, job=job, stat=stat, **kwargs
56
+ )
57
+ self._device: str = kwargs.get('device', 'cpu')
58
+ self._cuda_number: int = kwargs.get('cuda_device', 0)
59
+ # Use caching to avoid instanciate several times same loader
60
+ self._caching_loaders: dict = {}
61
+
62
+ async def close(self):
63
+ # Destroy effectively all Models.
64
+ pass
65
+
66
+ def get_default_llm(self):
67
+ """Return a VertexLLM instance."""
68
+ return VertexLLM(
69
+ model=DEFAULT_LLM_MODEL,
70
+ temperature=DEFAULT_LLM_TEMPERATURE,
71
+ top_k=30,
72
+ top_p=0.5,
73
+ )
74
+
75
+ async def start(self, **kwargs):
76
+ await super().start(**kwargs)
77
+ if self.path:
78
+ if isinstance(self.path, str):
79
+ self.path = self.mask_replacement_recursively(self.path)
80
+ self.path = Path(self.path).resolve()
81
+ if not self.path.exists():
82
+ raise ComponentError(
83
+ f"Langchain: {self.path} doesn't exists."
84
+ )
85
+ else:
86
+ raise ConfigError(
87
+ "Provide at least one directory or filename in *path* attribute."
88
+ )
89
+
90
+ def _get_loader(self, suffix: str, **kwargs):
91
+ """
92
+ Get a Document Loader based on Prefix.
93
+ TODO: a more automated way using importlib.
94
+ """
95
+ # Common Arguments
96
+ args = {
97
+ "markdown_splitter": self._md_splitter,
98
+ "summarization_model": self.summarization_model,
99
+ "device": self._device,
100
+ "cuda_number": self._cuda_number,
101
+ "source_type": self.source_type,
102
+ "encoding": self.encoding,
103
+ "llm": self._llm
104
+ }
105
+
106
+ def _load_loader(self, name: str, **kwargs):
107
+ """Dynamically imports a loader class from the loaders module.
108
+
109
+ Args:
110
+ loader_name: The name of the loader class to import (e.g., 'QALoader').
111
+
112
+ Returns:
113
+ The imported loader class.
114
+ """
115
+ try:
116
+ module_path = ".loaders"
117
+ module = importlib.import_module(module_path, package=__package__)
118
+ cls = getattr(module, name)
119
+ if cls:
120
+ args = {
121
+ "markdown_splitter": self._md_splitter,
122
+ "summarization_model": self.summarization_model,
123
+ "device": self._device,
124
+ "cuda_number": self._cuda_number,
125
+ "source_type": self.source_type,
126
+ "encoding": self.encoding,
127
+ "llm": self._llm,
128
+ **kwargs
129
+ }
130
+ loader = cls(**args)
131
+ self._caching_loaders[name] = loader
132
+ return loader
133
+ except (ModuleNotFoundError, AttributeError) as e:
134
+ raise ImportError(
135
+ f"Unable to load the loader '{name}': {e}"
136
+ ) from e
137
+
138
+ async def _load_document(self, path: PurePath) -> List[Document]:
139
+ documents = []
140
+ suffix = path.suffix
141
+ if suffix in self._caching_loaders:
142
+ loader = self._caching_loaders[suffix]
143
+ else:
144
+ loader = self._get_loader(suffix)
145
+ self._caching_loaders[suffix] = loader
146
+ async with loader as ld:
147
+ documents = await ld.load(path)
148
+ # split or not split?
149
+ return documents
150
+
151
+ async def run(self):
152
+ documents = []
153
+ if hasattr(self, 'loader'):
154
+ print('PARAMS >> ', self._attrs)
155
+ loader = self._load_loader(self.loader, **self._attrs)
156
+ async with loader as ld:
157
+ ext = loader.supported_extensions()
158
+ if self.path.is_dir():
159
+ if self.extensions:
160
+ # iterate over the files in the directory
161
+ for ext in self.extensions:
162
+ for item in self.path.glob(f'*{ext}'):
163
+ if item.is_file() and set(item.parts).isdisjoint(self.skip_directories):
164
+ documents.extend(await ld.load(item))
165
+ else:
166
+ for item in self.path.glob('*.*'):
167
+ if item.is_file() and set(item.parts).isdisjoint(self.skip_directories):
168
+ documents.extend(await ld.load(item))
169
+ else:
170
+ documents = await ld.load(self.path)
171
+ else:
172
+ if self.path.is_dir():
173
+ # iterate over the files in the directory
174
+ if self.extensions:
175
+ for ext in self.extensions:
176
+ for item in self.path.glob(f'*{ext}'):
177
+ if item.is_file() and set(item.parts).isdisjoint(self.skip_directories):
178
+ documents.extend(await self._load_document(item))
179
+ else:
180
+ for item in self.path.glob('*.*'):
181
+ if item.is_file() and set(item.parts).isdisjoint(self.skip_directories):
182
+ documents.extend(await self._load_document(item))
183
+ elif self.path.is_file():
184
+ if self.path.suffix in self.extensions:
185
+ if set(self.path.parts).isdisjoint(self.skip_directories):
186
+ documents = await self._load_document(self.path)
187
+ else:
188
+ raise ValueError(
189
+ f"Langchain Loader: Invalid path: {self.path}"
190
+ )
191
+ self._result = documents
192
+ self.add_metric('NUM_DOCUMENTS', len(documents))
193
+ # return self._result
194
+ return True
@@ -0,0 +1,22 @@
1
+ # """
2
+ # Langchain Loaders.
3
+
4
+ # Basic Documents Loaders, adapted to be used in Flowtask Tasks.
5
+ # """
6
+ # from .docx import MSWordLoader
7
+ # from .qa import QAFileLoader
8
+ # from .pdfmark import PDFMarkdown
9
+ # from .pdftables import PDFTables
10
+ # from .pdfblocks import PDFBlocks
11
+ # from .txt import TXTLoader
12
+ # from .html import HTMLLoader
13
+
14
+ # __all__ = (
15
+ # "MSWordLoader",
16
+ # "QAFileLoader",
17
+ # "PDFMarkdown",
18
+ # "PDFTables",
19
+ # "TXTLoader",
20
+ # "PDFBlocks",
21
+ # "HTMLLoader",
22
+ # )
@@ -0,0 +1,362 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Union, List, Optional
3
+ from collections.abc import Callable
4
+ from datetime import datetime
5
+ from pathlib import Path, PurePath
6
+ import torch
7
+ from langchain.docstore.document import Document
8
+ from langchain.chains.summarize import load_summarize_chain
9
+ from langchain.text_splitter import (
10
+ TokenTextSplitter
11
+ )
12
+ from langchain_huggingface import HuggingFacePipeline
13
+ from transformers import (
14
+ AutoModelForSeq2SeqLM,
15
+ AutoTokenizer,
16
+ pipeline
17
+ )
18
+ from langchain_core.prompts import PromptTemplate
19
+ from navconfig.logging import logging
20
+ from navigator.libs.json import JSONContent # pylint: disable=E0611
21
+ from parrot.llms.vertex import VertexLLM
22
+ from ....conf import (
23
+ EMBEDDING_DEVICE,
24
+ DEFAULT_LLM_MODEL,
25
+ DEFAULT_LLM_TEMPERATURE,
26
+ )
27
+
28
+ class AbstractLoader(ABC):
29
+ """
30
+ Abstract class for Document loaders.
31
+ """
32
+ _extension: List[str] = []
33
+
34
+ def __init__(
35
+ self,
36
+ tokenizer: Union[str, Callable] = None,
37
+ text_splitter: Union[str, Callable] = None,
38
+ summarizer: Union[str, Callable] = None,
39
+ markdown_splitter: Union[str, Callable] = None,
40
+ source_type: str = 'file',
41
+ doctype: Optional[str] = 'document',
42
+ device: str = None,
43
+ cuda_number: int = 0,
44
+ llm: Callable = None,
45
+ **kwargs
46
+ ):
47
+ self.tokenizer = tokenizer
48
+ self._summary_model = summarizer
49
+ self.text_splitter = text_splitter
50
+ self.markdown_splitter = markdown_splitter
51
+ self.doctype = doctype
52
+ self.logger = logging.getLogger(
53
+ f"Loader.{self.__class__.__name__}"
54
+ )
55
+ self.path = kwargs.pop('path', None)
56
+ self._source_type = source_type
57
+ # LLM (if required)
58
+ self._llm = llm
59
+ # JSON encoder:
60
+ self._encoder = JSONContent()
61
+ self.device_name = device
62
+ self.cuda_number = cuda_number
63
+ self._device = None
64
+ self.encoding: str = kwargs.get('encoding', 'utf-8')
65
+ self.summarization_model = kwargs.get(
66
+ 'summarization_model',
67
+ "facebook/bart-large-cnn"
68
+ )
69
+ self._no_summarization = kwargs.get('no_summarization', False)
70
+ self._translation = kwargs.get('translation', False)
71
+ self.category: str = kwargs.get('category', 'document')
72
+
73
+ async def __aenter__(self):
74
+ # Cuda Device:
75
+ self._device = self._get_device(
76
+ self.device_name,
77
+ self.cuda_number
78
+ )
79
+ return self
80
+
81
+ def supported_extensions(self):
82
+ return self._extension
83
+
84
+ async def __aexit__(self, *exc_info):
85
+ self.post_load()
86
+
87
+ def post_load(self):
88
+ self.tokenizer = None # Reset the tokenizer
89
+ self.text_splitter = None # Reset the text splitter
90
+ torch.cuda.synchronize() # Wait for all kernels to finish
91
+ torch.cuda.empty_cache() # Clear unused memory
92
+
93
+ def _get_device(self, device_type: str = None, cuda_number: int = 0):
94
+ """Get Default device for Torch and transformers.
95
+
96
+ """
97
+ if device_type == 'cpu':
98
+ return torch.device('cpu')
99
+ elif device_type == 'cuda':
100
+ return torch.device(f'cuda:{cuda_number}')
101
+ else:
102
+ if torch.cuda.is_available():
103
+ # Use CUDA GPU if available
104
+ return torch.device(f'cuda:{cuda_number}')
105
+ if torch.backends.mps.is_available():
106
+ # Use CUDA Multi-Processing Service if available
107
+ return torch.device("mps")
108
+ if EMBEDDING_DEVICE == 'cuda':
109
+ return torch.device(f'cuda:{cuda_number}')
110
+ else:
111
+ return torch.device(EMBEDDING_DEVICE)
112
+
113
+ def _check_path(
114
+ self,
115
+ path: PurePath,
116
+ suffix: Optional[List[str]] = None
117
+ ) -> bool:
118
+ """Check if the file path exists.
119
+ Args:
120
+ path (PurePath): The path to the file.
121
+ Returns:
122
+ bool: True if the file exists, False otherwise.
123
+ """
124
+ if isinstance(path, str):
125
+ path = Path(path).resolve()
126
+ if not suffix:
127
+ suffix = self._extension
128
+ return path.exists() and path.is_file() and path.suffix in suffix
129
+
130
+ def create_metadata(
131
+ self,
132
+ path: Union[str, PurePath],
133
+ doctype: str = 'document',
134
+ source_type: str = 'source',
135
+ doc_metadata: Optional[dict] = None,
136
+ summary: Optional[str] = '',
137
+ **kwargs
138
+ ):
139
+ if not doc_metadata:
140
+ doc_metadata = {}
141
+ if isinstance(path, PurePath):
142
+ origin = path.name
143
+ url = f'file://{path.name}'
144
+ filename = path
145
+ else:
146
+ origin = path
147
+ url = path
148
+ filename = f'file://{path}'
149
+ metadata = {
150
+ "url": url,
151
+ "source": origin,
152
+ "filename": str(filename),
153
+ "type": doctype,
154
+ "question": '',
155
+ "answer": '',
156
+ "summary": summary,
157
+ "source_type": source_type,
158
+ "created_at": datetime.now().strftime("%Y-%m-%d, %H:%M:%S"),
159
+ "category": self.category,
160
+ "document_meta": {
161
+ **doc_metadata
162
+ },
163
+ **kwargs
164
+ }
165
+ return metadata
166
+
167
+ def get_default_llm(self):
168
+ """Return a VertexLLM instance."""
169
+ return VertexLLM(
170
+ model=DEFAULT_LLM_MODEL,
171
+ temperature=DEFAULT_LLM_TEMPERATURE,
172
+ top_k=30,
173
+ top_p=0.5,
174
+ )
175
+
176
+ def get_summary_from_text(self, text: str, use_gpu: bool = False) -> str:
177
+ """
178
+ Get a summary of a text.
179
+ """
180
+ if not text:
181
+ # NO data to be summarized
182
+ return ''
183
+ # splitter = TokenTextSplitter(
184
+ # chunk_size=2048,
185
+ # chunk_overlap=100,
186
+ # )
187
+ prompt_template = """Write a summary of the following, please also identify the main theme:
188
+ {text}
189
+ SUMMARY:"""
190
+ prompt = PromptTemplate.from_template(prompt_template)
191
+ refine_template = (
192
+ "Your job is to produce a final summary\n"
193
+ "We have provided an existing summary up to a certain point: {existing_answer}\n"
194
+ "We have the opportunity to refine the existing summary"
195
+ "(only if needed) with some more context below.\n"
196
+ "------------\n"
197
+ "{text}\n"
198
+ "------------\n"
199
+ "Given the new context, refine the original summary adding more explanation."
200
+ "If the context isn't useful, return the original summary."
201
+ )
202
+ refine_prompt = PromptTemplate.from_template(refine_template)
203
+ # if self._llm:
204
+ # llm = self._llm
205
+ # else:
206
+ # llm = self.get_summarization_model(
207
+ # self.summarization_model,
208
+ # use_gpu=use_gpu
209
+ # )
210
+ # if not llm:
211
+ # return ''
212
+ llm = self.get_default_llm()
213
+ llm = llm.get_llm()
214
+ summarize_chain = load_summarize_chain(
215
+ llm=llm,
216
+ chain_type="refine",
217
+ question_prompt=prompt,
218
+ refine_prompt=refine_prompt,
219
+ return_intermediate_steps=False,
220
+ input_key="input_documents",
221
+ output_key="output_text",
222
+ )
223
+ doc = Document(page_content=text)
224
+ try:
225
+ summary = summarize_chain.invoke(
226
+ {"input_documents": [doc]}, return_only_outputs=True
227
+ )
228
+ return summary.get('output_text', '')
229
+ except Exception as e:
230
+ print('ERROR in get_summary_from_text:', e)
231
+ return ""
232
+
233
+ def get_translator(self, model_name: str = 'Helsinki-NLP/opus-mt-en-es'):
234
+ if not self._translation:
235
+ return None
236
+ trans_model = AutoModelForSeq2SeqLM.from_pretrained(
237
+ model_name,
238
+ # device_map="auto",
239
+ # torch_dtype=torch.bfloat16,
240
+ trust_remote_code=True
241
+ )
242
+ trans_tokenizer = AutoTokenizer.from_pretrained(model_name)
243
+ translator = pipeline(
244
+ "translation",
245
+ model=trans_model,
246
+ tokenizer=trans_tokenizer,
247
+ batch_size=True,
248
+ max_new_tokens=500,
249
+ min_new_tokens=300,
250
+ use_fast=True
251
+ )
252
+ return translator
253
+
254
+ def get_summarization_model(
255
+ self,
256
+ model_name: str = 'facebook/bart-large-cnn',
257
+ use_gpu: bool = False
258
+ ):
259
+ if self._no_summarization is True:
260
+ return None
261
+ if not self._summary_model:
262
+ summarize_model = AutoModelForSeq2SeqLM.from_pretrained(
263
+ model_name,
264
+ # torch_dtype=torch.float32,
265
+ torch_dtype=torch.bfloat16,
266
+ trust_remote_code=True
267
+ )
268
+ if use_gpu:
269
+ # summarize_model.to(0)
270
+ summarize_model.cuda()
271
+ summarize_tokenizer = AutoTokenizer.from_pretrained(
272
+ model_name,
273
+ padding_side="left"
274
+ )
275
+ pipe_summary = pipeline(
276
+ "summarization",
277
+ model=summarize_model,
278
+ tokenizer=summarize_tokenizer,
279
+ # device='cuda:0',
280
+ # batch_size=True,
281
+ max_new_tokens=256,
282
+ # min_new_tokens=300,
283
+ use_fast=True
284
+ )
285
+ self._summary_model = HuggingFacePipeline(
286
+ model_id=model_name,
287
+ pipeline=pipe_summary,
288
+ verbose=True
289
+ )
290
+ return self._summary_model
291
+
292
+ def resolve_paths(self, path: Union[str, PurePath, List[PurePath]]) -> List[Path]:
293
+ """
294
+ Resolve the input path into a list of file paths.
295
+ Handles lists, directories, glob patterns, and single file paths.
296
+
297
+ Args:
298
+ path (Union[str, PurePath, List[PurePath]]): Input path(s).
299
+
300
+ Returns:
301
+ List[Path]: A list of resolved file paths.
302
+ """
303
+ resolved_paths = []
304
+
305
+ if isinstance(path, str):
306
+ if "*" in path:
307
+ # Glob pattern
308
+ resolved_paths = list(Path().glob(path))
309
+ else:
310
+ # Single path as string
311
+ resolved_paths = [Path(path)]
312
+ elif isinstance(path, PurePath):
313
+ # Single Path
314
+ resolved_paths = [Path(path)]
315
+ elif isinstance(path, list):
316
+ # List of paths
317
+ resolved_paths = [Path(p) for p in path]
318
+
319
+ final_paths = []
320
+ for p in resolved_paths:
321
+ if p.is_dir():
322
+ # Add all matching files in the directory
323
+ if self._extension:
324
+ for ext in self._extension:
325
+ final_paths.extend(p.glob(f"*{ext}"))
326
+ else:
327
+ final_paths.extend(p.glob("*"))
328
+ elif p.is_file():
329
+ final_paths.append(p)
330
+
331
+ return final_paths
332
+
333
+ async def load(self, path: Union[str, PurePath, List[PurePath]]) -> List[Document]:
334
+ """Load data from a source and return it as a Langchain Document.
335
+
336
+ Args:
337
+ path (Union[str, PurePath, List[PurePath]]): The source of the data.
338
+
339
+ Returns:
340
+ List[Document]: A list of Langchain Documents.
341
+ """
342
+ self.logger.info(
343
+ f"Loading file: {path}"
344
+ )
345
+ paths = self.resolve_paths(path)
346
+ docs = []
347
+ for p in paths:
348
+ if p.exists():
349
+ docs.extend(await self._load_document(p))
350
+ return docs
351
+
352
+ async def _load_document(self, path: Path) -> List:
353
+ """
354
+ Abstract method for loading a document.
355
+
356
+ Args:
357
+ path (Path): The path to the file.
358
+
359
+ Returns:
360
+ List: A list of Langchain documents.
361
+ """
362
+ pass
@@ -0,0 +1,50 @@
1
+ from abc import abstractmethod
2
+ from typing import List, Union
3
+ from pathlib import Path, PurePath
4
+ from markdownify import markdownify as md
5
+ from langchain.docstore.document import Document
6
+ from .abstract import AbstractLoader
7
+
8
+
9
+ class BasePDF(AbstractLoader):
10
+ """
11
+ Base Abstract loader for all PDF-file Loaders.
12
+ """
13
+ _extension = ['.pdf']
14
+ chunk_size = 768
15
+
16
+ def __init__(self, **kwargs):
17
+ self._lang = 'eng'
18
+ super().__init__(**kwargs)
19
+
20
+ @abstractmethod
21
+ def _load_pdf(self, path: Path) -> list:
22
+ """
23
+ Load a PDF file using Fitz.
24
+
25
+ Args:
26
+ path (Path): The path to the PDF file.
27
+
28
+ Returns:
29
+ list: A list of Langchain Documents.
30
+ """
31
+ pass
32
+
33
+ async def load(self, path: Union[str, PurePath, List[PurePath]]) -> List[Document]:
34
+ """Load data from a source and return it as a Langchain Document.
35
+
36
+ Args:
37
+ path (Union[str, PurePath, List[PurePath]]): The source of the data.
38
+
39
+ Returns:
40
+ List[Document]: A list of Langchain Documents.
41
+ """
42
+ self.logger.info(
43
+ f"Loading file: {path}"
44
+ )
45
+ paths = self.resolve_paths(path)
46
+ docs = []
47
+ for p in paths:
48
+ if p.exists():
49
+ docs.extend(self._load_pdf(p))
50
+ return docs