flowtask 5.8.4__cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (470) hide show
  1. flowtask/__init__.py +93 -0
  2. flowtask/__main__.py +38 -0
  3. flowtask/bots/__init__.py +6 -0
  4. flowtask/bots/check.py +93 -0
  5. flowtask/bots/codebot.py +51 -0
  6. flowtask/components/ASPX.py +148 -0
  7. flowtask/components/AddDataset.py +352 -0
  8. flowtask/components/Amazon.py +523 -0
  9. flowtask/components/AutoTask.py +314 -0
  10. flowtask/components/Azure.py +80 -0
  11. flowtask/components/AzureUsers.py +106 -0
  12. flowtask/components/BaseAction.py +91 -0
  13. flowtask/components/BaseLoop.py +198 -0
  14. flowtask/components/BestBuy.py +800 -0
  15. flowtask/components/CSVToGCS.py +120 -0
  16. flowtask/components/CompanyScraper/__init__.py +1 -0
  17. flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
  18. flowtask/components/CompanyScraper/parsers/base.py +102 -0
  19. flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
  20. flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
  21. flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
  22. flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
  23. flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
  24. flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
  25. flowtask/components/CompanyScraper/scrapper.py +1054 -0
  26. flowtask/components/CopyTo.py +177 -0
  27. flowtask/components/CopyToBigQuery.py +243 -0
  28. flowtask/components/CopyToMongoDB.py +291 -0
  29. flowtask/components/CopyToPg.py +609 -0
  30. flowtask/components/CopyToRethink.py +207 -0
  31. flowtask/components/CreateGCSBucket.py +102 -0
  32. flowtask/components/CreateReport/CreateReport.py +228 -0
  33. flowtask/components/CreateReport/__init__.py +9 -0
  34. flowtask/components/CreateReport/charts/__init__.py +15 -0
  35. flowtask/components/CreateReport/charts/bar.py +51 -0
  36. flowtask/components/CreateReport/charts/base.py +66 -0
  37. flowtask/components/CreateReport/charts/pie.py +64 -0
  38. flowtask/components/CreateReport/utils.py +9 -0
  39. flowtask/components/CustomerSatisfaction.py +196 -0
  40. flowtask/components/DataInput.py +200 -0
  41. flowtask/components/DateList.py +255 -0
  42. flowtask/components/DbClient.py +163 -0
  43. flowtask/components/DialPad.py +146 -0
  44. flowtask/components/DocumentDBQuery.py +200 -0
  45. flowtask/components/DownloadFrom.py +371 -0
  46. flowtask/components/DownloadFromD2L.py +113 -0
  47. flowtask/components/DownloadFromFTP.py +181 -0
  48. flowtask/components/DownloadFromIMAP.py +315 -0
  49. flowtask/components/DownloadFromS3.py +198 -0
  50. flowtask/components/DownloadFromSFTP.py +265 -0
  51. flowtask/components/DownloadFromSharepoint.py +110 -0
  52. flowtask/components/DownloadFromSmartSheet.py +114 -0
  53. flowtask/components/DownloadS3File.py +229 -0
  54. flowtask/components/Dummy.py +59 -0
  55. flowtask/components/DuplicatePhoto.py +411 -0
  56. flowtask/components/EmployeeEvaluation.py +237 -0
  57. flowtask/components/ExecuteSQL.py +323 -0
  58. flowtask/components/ExtractHTML.py +178 -0
  59. flowtask/components/FileBase.py +178 -0
  60. flowtask/components/FileCopy.py +181 -0
  61. flowtask/components/FileDelete.py +82 -0
  62. flowtask/components/FileExists.py +146 -0
  63. flowtask/components/FileIteratorDelete.py +112 -0
  64. flowtask/components/FileList.py +194 -0
  65. flowtask/components/FileOpen.py +75 -0
  66. flowtask/components/FileRead.py +120 -0
  67. flowtask/components/FileRename.py +106 -0
  68. flowtask/components/FilterIf.py +284 -0
  69. flowtask/components/FilterRows/FilterRows.py +200 -0
  70. flowtask/components/FilterRows/__init__.py +10 -0
  71. flowtask/components/FilterRows/functions.py +4 -0
  72. flowtask/components/GCSToBigQuery.py +103 -0
  73. flowtask/components/GoogleA4.py +150 -0
  74. flowtask/components/GoogleGeoCoding.py +344 -0
  75. flowtask/components/GooglePlaces.py +315 -0
  76. flowtask/components/GoogleSearch.py +539 -0
  77. flowtask/components/HTTPClient.py +268 -0
  78. flowtask/components/ICIMS.py +146 -0
  79. flowtask/components/IF.py +179 -0
  80. flowtask/components/IcimsFolderCopy.py +173 -0
  81. flowtask/components/ImageFeatures/__init__.py +5 -0
  82. flowtask/components/ImageFeatures/process.py +233 -0
  83. flowtask/components/IteratorBase.py +251 -0
  84. flowtask/components/LangchainLoader/__init__.py +5 -0
  85. flowtask/components/LangchainLoader/loader.py +194 -0
  86. flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
  87. flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
  88. flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
  89. flowtask/components/LangchainLoader/loaders/docx.py +91 -0
  90. flowtask/components/LangchainLoader/loaders/html.py +119 -0
  91. flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
  92. flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
  93. flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
  94. flowtask/components/LangchainLoader/loaders/qa.py +67 -0
  95. flowtask/components/LangchainLoader/loaders/txt.py +55 -0
  96. flowtask/components/LeadIQ.py +650 -0
  97. flowtask/components/Loop.py +253 -0
  98. flowtask/components/Lowes.py +334 -0
  99. flowtask/components/MS365Usage.py +156 -0
  100. flowtask/components/MSTeamsMessages.py +320 -0
  101. flowtask/components/MarketClustering.py +1051 -0
  102. flowtask/components/MergeFiles.py +362 -0
  103. flowtask/components/MilvusOutput.py +87 -0
  104. flowtask/components/NearByStores.py +175 -0
  105. flowtask/components/NetworkNinja/__init__.py +6 -0
  106. flowtask/components/NetworkNinja/models/__init__.py +52 -0
  107. flowtask/components/NetworkNinja/models/abstract.py +177 -0
  108. flowtask/components/NetworkNinja/models/account.py +39 -0
  109. flowtask/components/NetworkNinja/models/client.py +19 -0
  110. flowtask/components/NetworkNinja/models/district.py +14 -0
  111. flowtask/components/NetworkNinja/models/events.py +101 -0
  112. flowtask/components/NetworkNinja/models/forms.py +499 -0
  113. flowtask/components/NetworkNinja/models/market.py +16 -0
  114. flowtask/components/NetworkNinja/models/organization.py +34 -0
  115. flowtask/components/NetworkNinja/models/photos.py +125 -0
  116. flowtask/components/NetworkNinja/models/project.py +44 -0
  117. flowtask/components/NetworkNinja/models/region.py +28 -0
  118. flowtask/components/NetworkNinja/models/store.py +203 -0
  119. flowtask/components/NetworkNinja/models/user.py +151 -0
  120. flowtask/components/NetworkNinja/router.py +854 -0
  121. flowtask/components/Odoo.py +175 -0
  122. flowtask/components/OdooInjector.py +192 -0
  123. flowtask/components/OpenFromXML.py +126 -0
  124. flowtask/components/OpenWeather.py +41 -0
  125. flowtask/components/OpenWithBase.py +616 -0
  126. flowtask/components/OpenWithPandas.py +715 -0
  127. flowtask/components/PGPDecrypt.py +199 -0
  128. flowtask/components/PandasIterator.py +187 -0
  129. flowtask/components/PandasToFile.py +189 -0
  130. flowtask/components/Paradox.py +339 -0
  131. flowtask/components/ParamIterator.py +117 -0
  132. flowtask/components/ParseHTML.py +84 -0
  133. flowtask/components/PlacerStores.py +249 -0
  134. flowtask/components/Pokemon.py +507 -0
  135. flowtask/components/PositiveBot.py +62 -0
  136. flowtask/components/PowerPointSlide.py +400 -0
  137. flowtask/components/PrintMessage.py +127 -0
  138. flowtask/components/ProductCompetitors/__init__.py +5 -0
  139. flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
  140. flowtask/components/ProductCompetitors/parsers/base.py +72 -0
  141. flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
  142. flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
  143. flowtask/components/ProductCompetitors/scrapper.py +155 -0
  144. flowtask/components/ProductCompliant.py +169 -0
  145. flowtask/components/ProductInfo/__init__.py +1 -0
  146. flowtask/components/ProductInfo/parsers/__init__.py +5 -0
  147. flowtask/components/ProductInfo/parsers/base.py +83 -0
  148. flowtask/components/ProductInfo/parsers/brother.py +97 -0
  149. flowtask/components/ProductInfo/parsers/canon.py +167 -0
  150. flowtask/components/ProductInfo/parsers/epson.py +118 -0
  151. flowtask/components/ProductInfo/parsers/hp.py +131 -0
  152. flowtask/components/ProductInfo/parsers/samsung.py +97 -0
  153. flowtask/components/ProductInfo/scraper.py +319 -0
  154. flowtask/components/ProductPricing.py +118 -0
  155. flowtask/components/QS.py +261 -0
  156. flowtask/components/QSBase.py +201 -0
  157. flowtask/components/QueryIterator.py +273 -0
  158. flowtask/components/QueryToInsert.py +327 -0
  159. flowtask/components/QueryToPandas.py +432 -0
  160. flowtask/components/RESTClient.py +195 -0
  161. flowtask/components/RethinkDBQuery.py +189 -0
  162. flowtask/components/Rsync.py +74 -0
  163. flowtask/components/RunSSH.py +59 -0
  164. flowtask/components/RunShell.py +71 -0
  165. flowtask/components/SalesForce.py +20 -0
  166. flowtask/components/SaveImageBank/__init__.py +257 -0
  167. flowtask/components/SchedulingVisits.py +592 -0
  168. flowtask/components/ScrapPage.py +216 -0
  169. flowtask/components/ScrapSearch.py +79 -0
  170. flowtask/components/SendNotify.py +257 -0
  171. flowtask/components/SentimentAnalysis.py +694 -0
  172. flowtask/components/ServiceScrapper/__init__.py +5 -0
  173. flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
  174. flowtask/components/ServiceScrapper/parsers/base.py +94 -0
  175. flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
  176. flowtask/components/ServiceScrapper/scrapper.py +199 -0
  177. flowtask/components/SetVariables.py +156 -0
  178. flowtask/components/SubTask.py +182 -0
  179. flowtask/components/SuiteCRM.py +48 -0
  180. flowtask/components/Switch.py +175 -0
  181. flowtask/components/TableBase.py +148 -0
  182. flowtask/components/TableDelete.py +312 -0
  183. flowtask/components/TableInput.py +143 -0
  184. flowtask/components/TableOutput/TableOutput.py +384 -0
  185. flowtask/components/TableOutput/__init__.py +3 -0
  186. flowtask/components/TableSchema.py +534 -0
  187. flowtask/components/Target.py +223 -0
  188. flowtask/components/ThumbnailGenerator.py +156 -0
  189. flowtask/components/ToPandas.py +67 -0
  190. flowtask/components/TransformRows/TransformRows.py +507 -0
  191. flowtask/components/TransformRows/__init__.py +9 -0
  192. flowtask/components/TransformRows/functions.py +559 -0
  193. flowtask/components/TransposeRows.py +176 -0
  194. flowtask/components/UPCDatabase.py +86 -0
  195. flowtask/components/UnGzip.py +171 -0
  196. flowtask/components/Uncompress.py +172 -0
  197. flowtask/components/UniqueRows.py +126 -0
  198. flowtask/components/Unzip.py +107 -0
  199. flowtask/components/UpdateOperationalVars.py +147 -0
  200. flowtask/components/UploadTo.py +299 -0
  201. flowtask/components/UploadToS3.py +136 -0
  202. flowtask/components/UploadToSFTP.py +160 -0
  203. flowtask/components/UploadToSharepoint.py +205 -0
  204. flowtask/components/UserFunc.py +122 -0
  205. flowtask/components/VivaTracker.py +140 -0
  206. flowtask/components/WSDLClient.py +123 -0
  207. flowtask/components/Wait.py +18 -0
  208. flowtask/components/Walmart.py +199 -0
  209. flowtask/components/Workplace.py +134 -0
  210. flowtask/components/XMLToPandas.py +267 -0
  211. flowtask/components/Zammad/__init__.py +41 -0
  212. flowtask/components/Zammad/models.py +0 -0
  213. flowtask/components/ZoomInfoScraper.py +409 -0
  214. flowtask/components/__init__.py +104 -0
  215. flowtask/components/abstract.py +18 -0
  216. flowtask/components/flow.py +530 -0
  217. flowtask/components/google.py +335 -0
  218. flowtask/components/group.py +221 -0
  219. flowtask/components/py.typed +0 -0
  220. flowtask/components/reviewscrap.py +132 -0
  221. flowtask/components/tAutoincrement.py +117 -0
  222. flowtask/components/tConcat.py +109 -0
  223. flowtask/components/tExplode.py +119 -0
  224. flowtask/components/tFilter.py +184 -0
  225. flowtask/components/tGroup.py +236 -0
  226. flowtask/components/tJoin.py +270 -0
  227. flowtask/components/tMap/__init__.py +9 -0
  228. flowtask/components/tMap/functions.py +54 -0
  229. flowtask/components/tMap/tMap.py +450 -0
  230. flowtask/components/tMelt.py +112 -0
  231. flowtask/components/tMerge.py +114 -0
  232. flowtask/components/tOrder.py +93 -0
  233. flowtask/components/tPandas.py +94 -0
  234. flowtask/components/tPivot.py +71 -0
  235. flowtask/components/tPluckCols.py +76 -0
  236. flowtask/components/tUnnest.py +82 -0
  237. flowtask/components/user.py +401 -0
  238. flowtask/conf.py +457 -0
  239. flowtask/download.py +102 -0
  240. flowtask/events/__init__.py +11 -0
  241. flowtask/events/events/__init__.py +20 -0
  242. flowtask/events/events/abstract.py +95 -0
  243. flowtask/events/events/alerts/__init__.py +362 -0
  244. flowtask/events/events/alerts/colfunctions.py +131 -0
  245. flowtask/events/events/alerts/functions.py +158 -0
  246. flowtask/events/events/dummy.py +12 -0
  247. flowtask/events/events/exec.py +124 -0
  248. flowtask/events/events/file/__init__.py +7 -0
  249. flowtask/events/events/file/base.py +51 -0
  250. flowtask/events/events/file/copy.py +23 -0
  251. flowtask/events/events/file/delete.py +16 -0
  252. flowtask/events/events/interfaces/__init__.py +9 -0
  253. flowtask/events/events/interfaces/client.py +67 -0
  254. flowtask/events/events/interfaces/credentials.py +28 -0
  255. flowtask/events/events/interfaces/notifications.py +58 -0
  256. flowtask/events/events/jira.py +122 -0
  257. flowtask/events/events/log.py +26 -0
  258. flowtask/events/events/logerr.py +52 -0
  259. flowtask/events/events/notify.py +59 -0
  260. flowtask/events/events/notify_event.py +160 -0
  261. flowtask/events/events/publish.py +54 -0
  262. flowtask/events/events/sendfile.py +104 -0
  263. flowtask/events/events/task.py +97 -0
  264. flowtask/events/events/teams.py +98 -0
  265. flowtask/events/events/webhook.py +58 -0
  266. flowtask/events/manager.py +287 -0
  267. flowtask/exceptions.c +39393 -0
  268. flowtask/exceptions.cpython-312-x86_64-linux-gnu.so +0 -0
  269. flowtask/extensions/__init__.py +3 -0
  270. flowtask/extensions/abstract.py +82 -0
  271. flowtask/extensions/logging/__init__.py +65 -0
  272. flowtask/hooks/__init__.py +9 -0
  273. flowtask/hooks/actions/__init__.py +22 -0
  274. flowtask/hooks/actions/abstract.py +66 -0
  275. flowtask/hooks/actions/dummy.py +23 -0
  276. flowtask/hooks/actions/jira.py +74 -0
  277. flowtask/hooks/actions/rest.py +320 -0
  278. flowtask/hooks/actions/sampledata.py +37 -0
  279. flowtask/hooks/actions/sensor.py +23 -0
  280. flowtask/hooks/actions/task.py +9 -0
  281. flowtask/hooks/actions/ticket.py +37 -0
  282. flowtask/hooks/actions/zammad.py +55 -0
  283. flowtask/hooks/hook.py +62 -0
  284. flowtask/hooks/models.py +17 -0
  285. flowtask/hooks/service.py +187 -0
  286. flowtask/hooks/step.py +91 -0
  287. flowtask/hooks/types/__init__.py +23 -0
  288. flowtask/hooks/types/base.py +129 -0
  289. flowtask/hooks/types/brokers/__init__.py +11 -0
  290. flowtask/hooks/types/brokers/base.py +54 -0
  291. flowtask/hooks/types/brokers/mqtt.py +35 -0
  292. flowtask/hooks/types/brokers/rabbitmq.py +82 -0
  293. flowtask/hooks/types/brokers/redis.py +83 -0
  294. flowtask/hooks/types/brokers/sqs.py +44 -0
  295. flowtask/hooks/types/fs.py +232 -0
  296. flowtask/hooks/types/http.py +49 -0
  297. flowtask/hooks/types/imap.py +200 -0
  298. flowtask/hooks/types/jira.py +279 -0
  299. flowtask/hooks/types/mail.py +205 -0
  300. flowtask/hooks/types/postgres.py +98 -0
  301. flowtask/hooks/types/responses/__init__.py +8 -0
  302. flowtask/hooks/types/responses/base.py +5 -0
  303. flowtask/hooks/types/sharepoint.py +288 -0
  304. flowtask/hooks/types/ssh.py +141 -0
  305. flowtask/hooks/types/tagged.py +59 -0
  306. flowtask/hooks/types/upload.py +85 -0
  307. flowtask/hooks/types/watch.py +71 -0
  308. flowtask/hooks/types/web.py +36 -0
  309. flowtask/interfaces/AzureClient.py +137 -0
  310. flowtask/interfaces/AzureGraph.py +839 -0
  311. flowtask/interfaces/Boto3Client.py +326 -0
  312. flowtask/interfaces/DropboxClient.py +173 -0
  313. flowtask/interfaces/ExcelHandler.py +94 -0
  314. flowtask/interfaces/FTPClient.py +131 -0
  315. flowtask/interfaces/GoogleCalendar.py +201 -0
  316. flowtask/interfaces/GoogleClient.py +133 -0
  317. flowtask/interfaces/GoogleDrive.py +127 -0
  318. flowtask/interfaces/GoogleGCS.py +89 -0
  319. flowtask/interfaces/GoogleGeocoding.py +93 -0
  320. flowtask/interfaces/GoogleLang.py +114 -0
  321. flowtask/interfaces/GooglePub.py +61 -0
  322. flowtask/interfaces/GoogleSheet.py +68 -0
  323. flowtask/interfaces/IMAPClient.py +137 -0
  324. flowtask/interfaces/O365Calendar.py +113 -0
  325. flowtask/interfaces/O365Client.py +220 -0
  326. flowtask/interfaces/OneDrive.py +284 -0
  327. flowtask/interfaces/Outlook.py +155 -0
  328. flowtask/interfaces/ParrotBot.py +130 -0
  329. flowtask/interfaces/SSHClient.py +378 -0
  330. flowtask/interfaces/Sharepoint.py +496 -0
  331. flowtask/interfaces/__init__.py +36 -0
  332. flowtask/interfaces/azureauth.py +119 -0
  333. flowtask/interfaces/cache.py +201 -0
  334. flowtask/interfaces/client.py +82 -0
  335. flowtask/interfaces/compress.py +525 -0
  336. flowtask/interfaces/credentials.py +124 -0
  337. flowtask/interfaces/d2l.py +239 -0
  338. flowtask/interfaces/databases/__init__.py +5 -0
  339. flowtask/interfaces/databases/db.py +223 -0
  340. flowtask/interfaces/databases/documentdb.py +55 -0
  341. flowtask/interfaces/databases/rethink.py +39 -0
  342. flowtask/interfaces/dataframes/__init__.py +11 -0
  343. flowtask/interfaces/dataframes/abstract.py +21 -0
  344. flowtask/interfaces/dataframes/arrow.py +71 -0
  345. flowtask/interfaces/dataframes/dt.py +69 -0
  346. flowtask/interfaces/dataframes/pandas.py +167 -0
  347. flowtask/interfaces/dataframes/polars.py +60 -0
  348. flowtask/interfaces/db.py +263 -0
  349. flowtask/interfaces/env.py +46 -0
  350. flowtask/interfaces/func.py +137 -0
  351. flowtask/interfaces/http.py +1780 -0
  352. flowtask/interfaces/locale.py +40 -0
  353. flowtask/interfaces/log.py +75 -0
  354. flowtask/interfaces/mask.py +143 -0
  355. flowtask/interfaces/notification.py +154 -0
  356. flowtask/interfaces/playwright.py +339 -0
  357. flowtask/interfaces/powerpoint.py +368 -0
  358. flowtask/interfaces/py.typed +0 -0
  359. flowtask/interfaces/qs.py +376 -0
  360. flowtask/interfaces/result.py +87 -0
  361. flowtask/interfaces/selenium_service.py +779 -0
  362. flowtask/interfaces/smartsheet.py +154 -0
  363. flowtask/interfaces/stat.py +39 -0
  364. flowtask/interfaces/task.py +96 -0
  365. flowtask/interfaces/template.py +118 -0
  366. flowtask/interfaces/vectorstores/__init__.py +1 -0
  367. flowtask/interfaces/vectorstores/abstract.py +133 -0
  368. flowtask/interfaces/vectorstores/milvus.py +669 -0
  369. flowtask/interfaces/zammad.py +107 -0
  370. flowtask/models.py +193 -0
  371. flowtask/parsers/__init__.py +15 -0
  372. flowtask/parsers/_yaml.c +11978 -0
  373. flowtask/parsers/_yaml.cpython-312-x86_64-linux-gnu.so +0 -0
  374. flowtask/parsers/argparser.py +235 -0
  375. flowtask/parsers/base.c +15155 -0
  376. flowtask/parsers/base.cpython-312-x86_64-linux-gnu.so +0 -0
  377. flowtask/parsers/json.c +11968 -0
  378. flowtask/parsers/json.cpython-312-x86_64-linux-gnu.so +0 -0
  379. flowtask/parsers/maps.py +49 -0
  380. flowtask/parsers/toml.c +11968 -0
  381. flowtask/parsers/toml.cpython-312-x86_64-linux-gnu.so +0 -0
  382. flowtask/plugins/__init__.py +16 -0
  383. flowtask/plugins/components/__init__.py +0 -0
  384. flowtask/plugins/handler/__init__.py +45 -0
  385. flowtask/plugins/importer.py +31 -0
  386. flowtask/plugins/sources/__init__.py +0 -0
  387. flowtask/runner.py +283 -0
  388. flowtask/scheduler/__init__.py +9 -0
  389. flowtask/scheduler/functions.py +493 -0
  390. flowtask/scheduler/handlers/__init__.py +8 -0
  391. flowtask/scheduler/handlers/manager.py +504 -0
  392. flowtask/scheduler/handlers/models.py +58 -0
  393. flowtask/scheduler/handlers/service.py +72 -0
  394. flowtask/scheduler/notifications.py +65 -0
  395. flowtask/scheduler/scheduler.py +993 -0
  396. flowtask/services/__init__.py +0 -0
  397. flowtask/services/bots/__init__.py +0 -0
  398. flowtask/services/bots/telegram.py +264 -0
  399. flowtask/services/files/__init__.py +11 -0
  400. flowtask/services/files/manager.py +522 -0
  401. flowtask/services/files/model.py +37 -0
  402. flowtask/services/files/service.py +767 -0
  403. flowtask/services/jira/__init__.py +3 -0
  404. flowtask/services/jira/jira_actions.py +191 -0
  405. flowtask/services/tasks/__init__.py +13 -0
  406. flowtask/services/tasks/launcher.py +213 -0
  407. flowtask/services/tasks/manager.py +323 -0
  408. flowtask/services/tasks/service.py +275 -0
  409. flowtask/services/tasks/task_manager.py +376 -0
  410. flowtask/services/tasks/tasks.py +155 -0
  411. flowtask/storages/__init__.py +16 -0
  412. flowtask/storages/exceptions.py +12 -0
  413. flowtask/storages/files/__init__.py +8 -0
  414. flowtask/storages/files/abstract.py +29 -0
  415. flowtask/storages/files/filesystem.py +66 -0
  416. flowtask/storages/tasks/__init__.py +19 -0
  417. flowtask/storages/tasks/abstract.py +26 -0
  418. flowtask/storages/tasks/database.py +33 -0
  419. flowtask/storages/tasks/filesystem.py +108 -0
  420. flowtask/storages/tasks/github.py +119 -0
  421. flowtask/storages/tasks/memory.py +45 -0
  422. flowtask/storages/tasks/row.py +25 -0
  423. flowtask/tasks/__init__.py +0 -0
  424. flowtask/tasks/abstract.py +526 -0
  425. flowtask/tasks/command.py +118 -0
  426. flowtask/tasks/pile.py +486 -0
  427. flowtask/tasks/py.typed +0 -0
  428. flowtask/tasks/task.py +778 -0
  429. flowtask/template/__init__.py +161 -0
  430. flowtask/tests.py +257 -0
  431. flowtask/types/__init__.py +8 -0
  432. flowtask/types/typedefs.c +11347 -0
  433. flowtask/types/typedefs.cpython-312-x86_64-linux-gnu.so +0 -0
  434. flowtask/utils/__init__.py +24 -0
  435. flowtask/utils/constants.py +117 -0
  436. flowtask/utils/encoders.py +21 -0
  437. flowtask/utils/executor.py +112 -0
  438. flowtask/utils/functions.cpp +14280 -0
  439. flowtask/utils/functions.cpython-312-x86_64-linux-gnu.so +0 -0
  440. flowtask/utils/json.cpp +13349 -0
  441. flowtask/utils/json.cpython-312-x86_64-linux-gnu.so +0 -0
  442. flowtask/utils/mail.py +63 -0
  443. flowtask/utils/parseqs.c +13324 -0
  444. flowtask/utils/parserqs.cpython-312-x86_64-linux-gnu.so +0 -0
  445. flowtask/utils/stats.py +308 -0
  446. flowtask/utils/transformations.py +74 -0
  447. flowtask/utils/uv.py +12 -0
  448. flowtask/utils/validators.py +97 -0
  449. flowtask/version.py +11 -0
  450. flowtask-5.8.4.dist-info/LICENSE +201 -0
  451. flowtask-5.8.4.dist-info/METADATA +209 -0
  452. flowtask-5.8.4.dist-info/RECORD +470 -0
  453. flowtask-5.8.4.dist-info/WHEEL +6 -0
  454. flowtask-5.8.4.dist-info/entry_points.txt +3 -0
  455. flowtask-5.8.4.dist-info/top_level.txt +2 -0
  456. plugins/components/CreateQR.py +39 -0
  457. plugins/components/TestComponent.py +28 -0
  458. plugins/components/Use1.py +13 -0
  459. plugins/components/Workplace.py +117 -0
  460. plugins/components/__init__.py +3 -0
  461. plugins/sources/__init__.py +0 -0
  462. plugins/sources/get_populartimes.py +78 -0
  463. plugins/sources/google.py +150 -0
  464. plugins/sources/hubspot.py +679 -0
  465. plugins/sources/icims.py +679 -0
  466. plugins/sources/mobileinsight.py +501 -0
  467. plugins/sources/newrelic.py +262 -0
  468. plugins/sources/uap.py +268 -0
  469. plugins/sources/venu.py +244 -0
  470. plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,91 @@
1
+ from typing import List
2
+ from pathlib import PurePath
3
+ import mammoth
4
+ import docx
5
+ from markdownify import markdownify as md
6
+ from langchain.docstore.document import Document
7
+ from .abstract import AbstractLoader
8
+
9
+
10
+ class MSWordLoader(AbstractLoader):
11
+ """
12
+ Load Microsoft Docx as Langchain Documents.
13
+ """
14
+ def extract_text(self, path):
15
+ """Extract text from a docx file.
16
+
17
+ Args:
18
+ path (Path): The source of the data.
19
+
20
+ Returns:
21
+ str: The extracted text.
22
+ """
23
+ doc = docx.Document(str(path))
24
+ text = []
25
+ for paragraph in doc.paragraphs:
26
+ text.append(paragraph.text)
27
+ return "\n".join(text)
28
+
29
+ async def _load_document(self, path: PurePath) -> List[Document]:
30
+ """Load data from a source and return it as a Langchain Document.
31
+
32
+ Args:
33
+ path (Path): The source of the data.
34
+
35
+ Returns:
36
+ List[Document]: A list of Langchain Documents.
37
+ """
38
+ self.logger.info(
39
+ f"Loading Word file: {path}"
40
+ )
41
+ docs = []
42
+ with open(path, "rb") as docx_file:
43
+ doc = docx.Document(str(path))
44
+ properties = doc.core_properties
45
+ result = mammoth.convert_to_html(docx_file)
46
+ # text_file = mammoth.extract_raw_text(docx_file) # Use text File for summary
47
+ html = result.value # The generated HTML
48
+ md_text = md(html) # The generated Markdown
49
+
50
+ print(f"Type of HTML result: {type(html)}")
51
+ print(f"Length of HTML: {len(html)}")
52
+ print(f"First 100 characters: {html[:100]}")
53
+ print(f"Messages from conversion: {result.messages}")
54
+ # TODO: add summarization and translation if requested
55
+ summary = ''
56
+ # try:
57
+ # summary = self.get_summary_from_text(md_text, use_gpu=True)
58
+ # except Exception:
59
+ # summary = ''
60
+ document_meta = {
61
+ "author": properties.author,
62
+ "version": properties.version,
63
+ "title": properties.title,
64
+ # "created": properties.created.strftime("%Y-%m-%d %H:%M:%S"),
65
+ # "last_modified": properties.modified.strftime("%Y-%m-%d %H:%M:%S")
66
+ }
67
+ metadata = self.create_metadata(
68
+ path=path,
69
+ doctype=self.doctype,
70
+ source_type=self._source_type,
71
+ summary=summary,
72
+ doc_metadata=document_meta
73
+ )
74
+ # Create document-level context
75
+ document_context = f"File Name: {path.name}\n"
76
+ document_context += f"Document Type: {self.doctype}\n"
77
+ document_context += f"Source Type: {self._source_type}\n"
78
+ document_context += f"Summary: {summary}\n"
79
+ document_context += "======\n"
80
+ # splitting the content:
81
+ for chunk in self.markdown_splitter.split_text(md_text):
82
+ _idx = {
83
+ **metadata
84
+ }
85
+ docs.append(
86
+ Document(
87
+ page_content=document_context + chunk,
88
+ metadata=_idx
89
+ )
90
+ )
91
+ return docs
@@ -0,0 +1,119 @@
1
+ from bs4 import BeautifulSoup
2
+ from langchain.docstore.document import Document
3
+ from .abstract import AbstractLoader
4
+ from pathlib import Path, PurePath
5
+ from markdownify import markdownify as md
6
+ from datetime import datetime
7
+
8
+
9
+ class HTMLLoader(AbstractLoader):
10
+ """
11
+ Loader for HTML files to convert into Langchain Documents.
12
+
13
+ Processes HTML files, extracts relevant content, converts to Markdown,
14
+ and associates metadata with each document.
15
+ """
16
+
17
+ _extension = ['.html', '.htm']
18
+
19
+ def __init__(self, **kwargs):
20
+ """Initialize the HTMLLoader."""
21
+ self.elements: list = kwargs.pop('elements', [])
22
+ super().__init__(**kwargs)
23
+
24
+ async def _load_document(self, path: PurePath) -> list[Document]:
25
+ """
26
+ Load an HTML file and convert its content into Langchain documents.
27
+
28
+ Args:
29
+ path (PurePath): Path to the HTML file.
30
+
31
+ Returns:
32
+ list[Document]: A list of Langchain documents with content and metadata.
33
+ """
34
+ documents = []
35
+
36
+ # Check if the file exists and is valid
37
+ if not self._check_path(path):
38
+ raise ValueError(f"File {path} does not exist or is not a valid HTML file.")
39
+
40
+ # Read and parse the HTML file
41
+ with open(path, 'r', encoding=self.encoding) as file:
42
+ html_content = file.read()
43
+
44
+ soup = BeautifulSoup(html_content, 'html.parser')
45
+
46
+ # Extract the entire <body> content or
47
+ # Determine the top-level element to process
48
+ top_element = soup.body or soup
49
+ if not top_element:
50
+ raise ValueError(
51
+ "The HTML file does not contain a <body> or Top element tag."
52
+ )
53
+
54
+ extracted_elements = []
55
+
56
+ if self.elements:
57
+ # Extract content from specific elements
58
+ for element in self.elements:
59
+ for tag, selector in element.items():
60
+ extracted_elements.extend(top_element.find_all(tag, class_=selector.lstrip('.')))
61
+
62
+ if not extracted_elements:
63
+ extracted_elements = [top_element]
64
+
65
+ # Process each extracted element
66
+ for elem in extracted_elements:
67
+ # Get the plain text content
68
+ text = elem.get_text(separator="\n", strip=True)
69
+
70
+ # Generate a summary for the extracted text
71
+ try:
72
+ summary = self.get_summary_from_text(text, use_gpu=True)
73
+ except Exception as e:
74
+ if self.logger:
75
+ self.logger.error(f"Error generating summary: {e}")
76
+ summary = "Summary not available."
77
+
78
+ # Create document-level context
79
+ document_context = f"File Name: {path.name}\n"
80
+ document_context += f"Document Type: {self.doctype}\n"
81
+ document_context += f"Source Type: {self._source_type}\n"
82
+ document_context += f"Element: {elem.name}\n"
83
+ document_context += f"Summary: {summary}\n\n"
84
+
85
+ # Convert the entire <body> to Markdown for better structure
86
+ markdown_content = md(str(elem))
87
+
88
+ # Metadata preparation
89
+ document_meta = self.create_metadata(
90
+ path=path,
91
+ doctype=self.doctype,
92
+ source_type=self._source_type,
93
+ summary=summary,
94
+ doc_metadata={
95
+ "type": "html",
96
+ "category": self.category,
97
+ }
98
+ )
99
+
100
+ # Create a single Langchain Document with the full body content
101
+ document = Document(
102
+ page_content=document_context + markdown_content,
103
+ metadata=document_meta
104
+ )
105
+ documents.append(document)
106
+
107
+ # splitting the content:
108
+ for chunk in self.markdown_splitter.split_text(text):
109
+ _idx = {
110
+ **document_meta
111
+ }
112
+ # Create a Langchain Document
113
+ documents.append(
114
+ Document(
115
+ page_content=document_context + chunk,
116
+ metadata=_idx
117
+ )
118
+ )
119
+ return documents
@@ -0,0 +1,146 @@
1
+ from io import StringIO
2
+ from pathlib import Path
3
+ from datetime import datetime
4
+ import fitz
5
+ import pandas as pd
6
+ from langchain.docstore.document import Document
7
+ from .basepdf import BasePDF
8
+
9
+
10
+ class PDFBlocks(BasePDF):
11
+ """
12
+ Load a PDF Table as Blocks of text.
13
+ """
14
+ _extension = ['.pdf']
15
+
16
+ def __init__(
17
+ self,
18
+ table_settings: dict = {},
19
+ **kwargs
20
+ ):
21
+ self._skiprows = kwargs.pop('skiprows', None)
22
+ super().__init__(**kwargs)
23
+ # Table Settings:
24
+ self.table_settings = {
25
+ # "vertical_strategy": "text",
26
+ # "horizontal_strategy": "text",
27
+ "intersection_x_tolerance": 5,
28
+ "intersection_y_tolerance": 5
29
+ }
30
+ if table_settings:
31
+ self.table_settings.update(table_settings)
32
+
33
+ def unique_columns(self, df: pd.DataFrame) -> pd.DataFrame:
34
+ """
35
+ Rename duplicate columns in the DataFrame to ensure they are unique.
36
+
37
+ Args:
38
+ df (pd.DataFrame): The DataFrame with potential duplicate column names.
39
+
40
+ Returns:
41
+ pd.DataFrame: A DataFrame with unique column names.
42
+ """
43
+ seen = {}
44
+ new_columns = []
45
+ for col in df.columns:
46
+ new_col = col
47
+ count = seen.get(col, 0)
48
+ while new_col in new_columns:
49
+ count += 1
50
+ new_col = f"{col}_{count}"
51
+ new_columns.append(new_col)
52
+ seen[col] = count
53
+ df.columns = new_columns
54
+ return df
55
+
56
+ def get_markdown(self, df: pd.DataFrame) -> str:
57
+ """
58
+ Convert a DataFrame to a Markdown string.
59
+
60
+ Args:
61
+ df (pd.DataFrame): The DataFrame to convert.
62
+
63
+ Returns:
64
+ str: The JSON string.
65
+ """
66
+ buffer = StringIO()
67
+ df = self.unique_columns(df)
68
+ df.to_markdown(buffer)
69
+ buffer.seek(0)
70
+ return buffer.getvalue()
71
+
72
+ def parse_table(self, table_idx, table, page_number, path) -> pd.DataFrame:
73
+ df = table.to_pandas() # convert to pandas DataFrame
74
+ df = df.dropna(axis=1, how='all')
75
+ df = df.dropna(how='all', axis=0) # Drop empty rows
76
+ page = page_number + 1
77
+ table_meta = {
78
+ "url": '',
79
+ "source": f"{path.name} Page.#{page} Table.#{table_idx}",
80
+ "filename": path.name,
81
+ "question": '',
82
+ "answer": '',
83
+ "type": 'table',
84
+ "summary": '',
85
+ "category": self.category,
86
+ "source_type": self._source_type,
87
+ "created_at": datetime.now().strftime("%Y-%m-%d, %H:%M:%S"),
88
+ "document_meta": {
89
+ "table_index": table_idx,
90
+ "table_shape": df.shape,
91
+ "table_columns": df.columns.tolist(),
92
+ "description": f"Extracted from Page.#{page}."
93
+ }
94
+ }
95
+ return df, table_meta
96
+
97
+ def _load_pdf(self, path: Path) -> list:
98
+ """
99
+ Load a PDF file using the Fitz library.
100
+
101
+ Args:
102
+ path (Path): The path to the PDF file.
103
+
104
+ Returns:
105
+ list: A list of Langchain Documents.
106
+ """
107
+ if self._check_path(path):
108
+ self.logger.info(f"Loading PDF file: {path}")
109
+ pdf = fitz.open(str(path)) # Open the PDF file
110
+ docs = []
111
+ # Create document-level context
112
+ document_context = f"File Name: {path.name}\n"
113
+ document_context += f"Document Type: {self.doctype}\n"
114
+ document_context += f"Source Type: {self._source_type}\n\n"
115
+ for page_number in range(pdf.page_count):
116
+ page = pdf[page_number]
117
+ try:
118
+ tabs = page.find_tables(**self.table_settings)
119
+ for tab_idx, tab in enumerate(tabs):
120
+ df, _meta = self.parse_table(tab_idx, tab, page_number, path)
121
+ try:
122
+ markdown_table = self.get_markdown(df)
123
+ docs.append(
124
+ Document(
125
+ page_content=document_context + markdown_table,
126
+ metadata=_meta
127
+ )
128
+ )
129
+ except Exception as exc:
130
+ print(exc)
131
+ ## Sample information:
132
+ print('::: Printing Table Information === ')
133
+ print(df)
134
+ print("::: Printing Column Information === ")
135
+ for column, t in df.dtypes.items():
136
+ print(column, "->", t, "->", df[column].iloc[0])
137
+ # convert into markdown:
138
+ txt = df.to_markdown()
139
+ if txt:
140
+ docs.append(
141
+ Document(page_content=document_context + txt, metadata=_meta)
142
+ )
143
+ except Exception as exc:
144
+ print(exc)
145
+ continue
146
+ return docs
@@ -0,0 +1,79 @@
1
+ from pathlib import Path
2
+ import fitz
3
+ from pdf4llm import to_markdown
4
+ from langchain.docstore.document import Document
5
+ from langchain.text_splitter import MarkdownTextSplitter
6
+ from .basepdf import BasePDF
7
+
8
+
9
+ class PDFMarkdown(BasePDF):
10
+ """
11
+ Loader for PDF files converted content to markdown.
12
+ """
13
+ def __init__(
14
+ self,
15
+ **kwargs
16
+ ):
17
+ super().__init__(**kwargs)
18
+ self._splitter = MarkdownTextSplitter(chunk_size=2048, chunk_overlap=10)
19
+
20
+ def _load_pdf(self, path: Path) -> list:
21
+ """
22
+ Load a PDF file using the PDFMiner library.
23
+
24
+ Args:
25
+ path (Path): The path to the PDF file.
26
+
27
+ Returns:
28
+ list: A list of Langchain Documents.
29
+ """
30
+ if self._check_path(path):
31
+ self.logger.info(f"Loading PDF file: {path}")
32
+ docs = []
33
+ pdf = fitz.open(str(path))
34
+ md_text = to_markdown(pdf)
35
+ try:
36
+ summary = self.get_summary_from_text(md_text, use_gpu=True)
37
+ except Exception:
38
+ summary = ''
39
+ document_meta = {
40
+ "title": pdf.metadata.get("title", ""),
41
+ "creationDate": pdf.metadata.get("creationDate", ""),
42
+ "author": pdf.metadata.get("author", ""),
43
+ }
44
+ metadata = self.create_metadata(
45
+ path=path,
46
+ doctype=self.doctype,
47
+ source_type=self._source_type,
48
+ summary=summary,
49
+ doc_metadata=document_meta
50
+ )
51
+
52
+ # Prepend document-level context
53
+ document_context = f"Document Title: {document_meta.get('title', '')}\n"
54
+ # document_context += f"Document Author: {document_meta.get('author', '')}\n"
55
+ document_context += f"File Path: {str(path)}\n"
56
+ document_context += f"Summary: {summary}\n\n"
57
+
58
+ for _, chunk in enumerate(self._splitter.split_text(md_text)):
59
+ docs.append(
60
+ Document(
61
+ page_content=document_context + chunk,
62
+ metadata=metadata
63
+ )
64
+ )
65
+ # also, creating a document for summary:
66
+ if summary:
67
+ _info = {
68
+ "category": "Summary",
69
+ **metadata
70
+ }
71
+ docs.append(
72
+ Document(
73
+ page_content=f"**Summary:** {summary}",
74
+ metadata=_info
75
+ )
76
+ )
77
+ return docs
78
+ else:
79
+ return []
@@ -0,0 +1,135 @@
1
+ from io import StringIO
2
+ from pathlib import Path
3
+ from datetime import datetime
4
+ import fitz
5
+ import pandas as pd
6
+ from langchain.docstore.document import Document
7
+ from .basepdf import BasePDF
8
+
9
+
10
+ class PDFTables(BasePDF):
11
+ """
12
+ Loader for Tables present on PDF Files.
13
+ """
14
+ _extension = ['.pdf']
15
+
16
+ def __init__(
17
+ self,
18
+ table_settings: dict = {},
19
+ **kwargs
20
+ ):
21
+ self._skiprows = kwargs.pop('skiprows', None)
22
+ super().__init__(**kwargs)
23
+ # Table Settings:
24
+ self.table_settings = {
25
+ # "vertical_strategy": "text",
26
+ # "horizontal_strategy": "text",
27
+ "intersection_x_tolerance": 5,
28
+ "intersection_y_tolerance": 5
29
+ }
30
+ if table_settings:
31
+ self.table_settings.update(table_settings)
32
+
33
+ def unique_columns(self, df: pd.DataFrame) -> pd.DataFrame:
34
+ """
35
+ Rename duplicate columns in the DataFrame to ensure they are unique.
36
+
37
+ Args:
38
+ df (pd.DataFrame): The DataFrame with potential duplicate column names.
39
+
40
+ Returns:
41
+ pd.DataFrame: A DataFrame with unique column names.
42
+ """
43
+ seen = {}
44
+ new_columns = []
45
+ for col in df.columns:
46
+ new_col = col
47
+ count = seen.get(col, 0)
48
+ while new_col in new_columns:
49
+ count += 1
50
+ new_col = f"{col}_{count}"
51
+ new_columns.append(new_col)
52
+ seen[col] = count
53
+ df.columns = new_columns
54
+ return df
55
+
56
+ def get_markdown(self, df: pd.DataFrame) -> str:
57
+ """
58
+ Convert a DataFrame to a Markdown string.
59
+
60
+ Args:
61
+ df (pd.DataFrame): The DataFrame to convert.
62
+
63
+ Returns:
64
+ str: The JSON string.
65
+ """
66
+ buffer = StringIO()
67
+ df = self.unique_columns(df)
68
+ df.to_markdown(buffer)
69
+ buffer.seek(0)
70
+ return buffer.getvalue()
71
+
72
+ def parse_table(self, table_idx, table, page_number, path) -> pd.DataFrame:
73
+ df = table.to_pandas() # convert to pandas DataFrame
74
+ df = df.dropna(axis=1, how='all')
75
+ df = df.dropna(how='all', axis=0) # Drop empty rows
76
+ page = page_number + 1
77
+ table_meta = {
78
+ "url": '',
79
+ "source": f"{path.name} Page.#{page} Table.#{table_idx}",
80
+ "filename": path.name,
81
+ "question": '',
82
+ "answer": '',
83
+ "type": 'table',
84
+ "summary": '',
85
+ "category": self.category,
86
+ "source_type": self._source_type,
87
+ "created_at": datetime.now().strftime("%Y-%m-%d, %H:%M:%S"),
88
+ "document_meta": {
89
+ "table_index": table_idx,
90
+ "table_shape": df.shape,
91
+ "table_columns": df.columns.tolist(),
92
+ "description": f"Extracted from Page.#{page}."
93
+ }
94
+ }
95
+ return df, table_meta
96
+
97
+ def _load_pdf(self, path: Path) -> list:
98
+ """
99
+ Load a PDF file using the Fitz library.
100
+
101
+ Args:
102
+ path (Path): The path to the PDF file.
103
+
104
+ Returns:
105
+ list: A list of Langchain Documents.
106
+ """
107
+ if self._check_path(path):
108
+ self.logger.info(f"Loading PDF file: {path}")
109
+ pdf = fitz.open(str(path)) # Open the PDF file
110
+ docs = []
111
+ document_context = f"File Name: {path.name}\n"
112
+ document_context += f"Document Type: {self.doctype}\n"
113
+ document_context += f"Source Type: {self._source_type}\n\n"
114
+ for page_number in range(pdf.page_count):
115
+ page = pdf[page_number]
116
+ try:
117
+ tabs = page.find_tables(**self.table_settings)
118
+ for tab_idx, tab in enumerate(tabs):
119
+ df, _meta = self.parse_table(tab_idx, tab, page_number, path)
120
+ ## Sample information:
121
+ print('::: Printing Table Information === ')
122
+ print(df)
123
+ print("::: Printing Column Information === ")
124
+ for column, t in df.dtypes.items():
125
+ print(column, "->", t, "->", df[column].iloc[0])
126
+ # convert into markdown:
127
+ txt = df.to_markdown()
128
+ if txt:
129
+ docs.append(
130
+ Document(page_content=document_context + txt, metadata=_meta)
131
+ )
132
+ except Exception as exc:
133
+ print(exc)
134
+ continue
135
+ return docs
@@ -0,0 +1,67 @@
1
+
2
+ from pathlib import PurePath
3
+ from typing import List
4
+ import pandas as pd
5
+ from langchain.docstore.document import Document
6
+ from .abstract import AbstractLoader
7
+
8
+
9
+ class QAFileLoader(AbstractLoader):
10
+ """
11
+ Question and Answers File based on Excel, coverted to Langchain Documents.
12
+ """
13
+ _extension = ['.xlsx']
14
+ chunk_size = 768
15
+
16
+ def __init__(
17
+ self,
18
+ columns: list = ['Question', 'Answer'],
19
+ **kwargs
20
+ ):
21
+ super().__init__(**kwargs)
22
+ self._columns = columns
23
+
24
+ def _load_document(self, path: PurePath) -> list:
25
+ df = pd.read_excel(path)
26
+ q = self._columns[0]
27
+ a = self._columns[1]
28
+ docs = []
29
+ for idx, row in df.iterrows():
30
+ # Question Document
31
+ document_meta = {
32
+ "question": row[q],
33
+ "answer": row[a],
34
+ }
35
+ metadata = self.create_metadata(
36
+ path=path,
37
+ doctype=self.doctype,
38
+ source_type=self._source_type,
39
+ summary=f"Question: {row[q]}?: **{row[a]}**",
40
+ doc_metadata=document_meta,
41
+ type="QA",
42
+ question=row[q],
43
+ answer=row[a],
44
+ )
45
+ doc = Document(
46
+ page_content=f"**Question:** {row[q]}: **Answer:** {row[a]}",
47
+ metadata=metadata,
48
+ )
49
+ docs.append(doc)
50
+ return docs
51
+
52
+ async def load(self, path: PurePath) -> List[Document]:
53
+ """Load data from a source and return it as a Langchain Document.
54
+
55
+ Args:
56
+ path (Path): The source of the data.
57
+
58
+ Returns:
59
+ List[Document]: A list of Langchain Documents.
60
+ """
61
+ self.logger.info(
62
+ f"Loading Excel FAQ file: {path}"
63
+ )
64
+ docs = []
65
+ if path.exists():
66
+ docs = self._load_document(path)
67
+ return docs
@@ -0,0 +1,55 @@
1
+ from typing import List
2
+ from pathlib import PurePath
3
+ from langchain.docstore.document import Document
4
+ from .abstract import AbstractLoader
5
+
6
+
7
+ class TXTLoader(AbstractLoader):
8
+ """
9
+ Loader for PDF files.
10
+ """
11
+ _extension = ['.txt']
12
+
13
+ def _load_document(self, path: PurePath) -> List[Document]:
14
+ """
15
+ Load a TXT file.
16
+
17
+ Args:
18
+ path (Path): The path to the TXT file.
19
+
20
+ Returns:
21
+ list: A list of Langchain Documents.
22
+ """
23
+ docs = []
24
+ if self._check_path(path):
25
+ self.logger.info(f"Loading TXT file: {path}")
26
+ with open(path, 'r') as file:
27
+ text = file.read()
28
+ try:
29
+ summary = self.get_summary_from_text(text, use_gpu=True)
30
+ except Exception:
31
+ summary = ''
32
+ metadata = self.create_metadata(
33
+ path=path,
34
+ doctype=self.doctype,
35
+ source_type=self._source_type,
36
+ summary=summary,
37
+ doc_metadata={}
38
+ )
39
+ # Create document-level context
40
+ document_context = f"File Name: {path.name}\n"
41
+ document_context += f"Document Type: {self.doctype}\n"
42
+ document_context += f"Source Type: {self._source_type}\n"
43
+ document_context += f"Summary: {summary}\n\n"
44
+ # splitting the content:
45
+ for chunk in self.markdown_splitter.split_text(text):
46
+ _idx = {
47
+ **metadata
48
+ }
49
+ docs.append(
50
+ Document(
51
+ page_content=document_context + chunk,
52
+ metadata=_idx
53
+ )
54
+ )
55
+ return docs