flowtask 5.8.4__cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (470) hide show
  1. flowtask/__init__.py +93 -0
  2. flowtask/__main__.py +38 -0
  3. flowtask/bots/__init__.py +6 -0
  4. flowtask/bots/check.py +93 -0
  5. flowtask/bots/codebot.py +51 -0
  6. flowtask/components/ASPX.py +148 -0
  7. flowtask/components/AddDataset.py +352 -0
  8. flowtask/components/Amazon.py +523 -0
  9. flowtask/components/AutoTask.py +314 -0
  10. flowtask/components/Azure.py +80 -0
  11. flowtask/components/AzureUsers.py +106 -0
  12. flowtask/components/BaseAction.py +91 -0
  13. flowtask/components/BaseLoop.py +198 -0
  14. flowtask/components/BestBuy.py +800 -0
  15. flowtask/components/CSVToGCS.py +120 -0
  16. flowtask/components/CompanyScraper/__init__.py +1 -0
  17. flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
  18. flowtask/components/CompanyScraper/parsers/base.py +102 -0
  19. flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
  20. flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
  21. flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
  22. flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
  23. flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
  24. flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
  25. flowtask/components/CompanyScraper/scrapper.py +1054 -0
  26. flowtask/components/CopyTo.py +177 -0
  27. flowtask/components/CopyToBigQuery.py +243 -0
  28. flowtask/components/CopyToMongoDB.py +291 -0
  29. flowtask/components/CopyToPg.py +609 -0
  30. flowtask/components/CopyToRethink.py +207 -0
  31. flowtask/components/CreateGCSBucket.py +102 -0
  32. flowtask/components/CreateReport/CreateReport.py +228 -0
  33. flowtask/components/CreateReport/__init__.py +9 -0
  34. flowtask/components/CreateReport/charts/__init__.py +15 -0
  35. flowtask/components/CreateReport/charts/bar.py +51 -0
  36. flowtask/components/CreateReport/charts/base.py +66 -0
  37. flowtask/components/CreateReport/charts/pie.py +64 -0
  38. flowtask/components/CreateReport/utils.py +9 -0
  39. flowtask/components/CustomerSatisfaction.py +196 -0
  40. flowtask/components/DataInput.py +200 -0
  41. flowtask/components/DateList.py +255 -0
  42. flowtask/components/DbClient.py +163 -0
  43. flowtask/components/DialPad.py +146 -0
  44. flowtask/components/DocumentDBQuery.py +200 -0
  45. flowtask/components/DownloadFrom.py +371 -0
  46. flowtask/components/DownloadFromD2L.py +113 -0
  47. flowtask/components/DownloadFromFTP.py +181 -0
  48. flowtask/components/DownloadFromIMAP.py +315 -0
  49. flowtask/components/DownloadFromS3.py +198 -0
  50. flowtask/components/DownloadFromSFTP.py +265 -0
  51. flowtask/components/DownloadFromSharepoint.py +110 -0
  52. flowtask/components/DownloadFromSmartSheet.py +114 -0
  53. flowtask/components/DownloadS3File.py +229 -0
  54. flowtask/components/Dummy.py +59 -0
  55. flowtask/components/DuplicatePhoto.py +411 -0
  56. flowtask/components/EmployeeEvaluation.py +237 -0
  57. flowtask/components/ExecuteSQL.py +323 -0
  58. flowtask/components/ExtractHTML.py +178 -0
  59. flowtask/components/FileBase.py +178 -0
  60. flowtask/components/FileCopy.py +181 -0
  61. flowtask/components/FileDelete.py +82 -0
  62. flowtask/components/FileExists.py +146 -0
  63. flowtask/components/FileIteratorDelete.py +112 -0
  64. flowtask/components/FileList.py +194 -0
  65. flowtask/components/FileOpen.py +75 -0
  66. flowtask/components/FileRead.py +120 -0
  67. flowtask/components/FileRename.py +106 -0
  68. flowtask/components/FilterIf.py +284 -0
  69. flowtask/components/FilterRows/FilterRows.py +200 -0
  70. flowtask/components/FilterRows/__init__.py +10 -0
  71. flowtask/components/FilterRows/functions.py +4 -0
  72. flowtask/components/GCSToBigQuery.py +103 -0
  73. flowtask/components/GoogleA4.py +150 -0
  74. flowtask/components/GoogleGeoCoding.py +344 -0
  75. flowtask/components/GooglePlaces.py +315 -0
  76. flowtask/components/GoogleSearch.py +539 -0
  77. flowtask/components/HTTPClient.py +268 -0
  78. flowtask/components/ICIMS.py +146 -0
  79. flowtask/components/IF.py +179 -0
  80. flowtask/components/IcimsFolderCopy.py +173 -0
  81. flowtask/components/ImageFeatures/__init__.py +5 -0
  82. flowtask/components/ImageFeatures/process.py +233 -0
  83. flowtask/components/IteratorBase.py +251 -0
  84. flowtask/components/LangchainLoader/__init__.py +5 -0
  85. flowtask/components/LangchainLoader/loader.py +194 -0
  86. flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
  87. flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
  88. flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
  89. flowtask/components/LangchainLoader/loaders/docx.py +91 -0
  90. flowtask/components/LangchainLoader/loaders/html.py +119 -0
  91. flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
  92. flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
  93. flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
  94. flowtask/components/LangchainLoader/loaders/qa.py +67 -0
  95. flowtask/components/LangchainLoader/loaders/txt.py +55 -0
  96. flowtask/components/LeadIQ.py +650 -0
  97. flowtask/components/Loop.py +253 -0
  98. flowtask/components/Lowes.py +334 -0
  99. flowtask/components/MS365Usage.py +156 -0
  100. flowtask/components/MSTeamsMessages.py +320 -0
  101. flowtask/components/MarketClustering.py +1051 -0
  102. flowtask/components/MergeFiles.py +362 -0
  103. flowtask/components/MilvusOutput.py +87 -0
  104. flowtask/components/NearByStores.py +175 -0
  105. flowtask/components/NetworkNinja/__init__.py +6 -0
  106. flowtask/components/NetworkNinja/models/__init__.py +52 -0
  107. flowtask/components/NetworkNinja/models/abstract.py +177 -0
  108. flowtask/components/NetworkNinja/models/account.py +39 -0
  109. flowtask/components/NetworkNinja/models/client.py +19 -0
  110. flowtask/components/NetworkNinja/models/district.py +14 -0
  111. flowtask/components/NetworkNinja/models/events.py +101 -0
  112. flowtask/components/NetworkNinja/models/forms.py +499 -0
  113. flowtask/components/NetworkNinja/models/market.py +16 -0
  114. flowtask/components/NetworkNinja/models/organization.py +34 -0
  115. flowtask/components/NetworkNinja/models/photos.py +125 -0
  116. flowtask/components/NetworkNinja/models/project.py +44 -0
  117. flowtask/components/NetworkNinja/models/region.py +28 -0
  118. flowtask/components/NetworkNinja/models/store.py +203 -0
  119. flowtask/components/NetworkNinja/models/user.py +151 -0
  120. flowtask/components/NetworkNinja/router.py +854 -0
  121. flowtask/components/Odoo.py +175 -0
  122. flowtask/components/OdooInjector.py +192 -0
  123. flowtask/components/OpenFromXML.py +126 -0
  124. flowtask/components/OpenWeather.py +41 -0
  125. flowtask/components/OpenWithBase.py +616 -0
  126. flowtask/components/OpenWithPandas.py +715 -0
  127. flowtask/components/PGPDecrypt.py +199 -0
  128. flowtask/components/PandasIterator.py +187 -0
  129. flowtask/components/PandasToFile.py +189 -0
  130. flowtask/components/Paradox.py +339 -0
  131. flowtask/components/ParamIterator.py +117 -0
  132. flowtask/components/ParseHTML.py +84 -0
  133. flowtask/components/PlacerStores.py +249 -0
  134. flowtask/components/Pokemon.py +507 -0
  135. flowtask/components/PositiveBot.py +62 -0
  136. flowtask/components/PowerPointSlide.py +400 -0
  137. flowtask/components/PrintMessage.py +127 -0
  138. flowtask/components/ProductCompetitors/__init__.py +5 -0
  139. flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
  140. flowtask/components/ProductCompetitors/parsers/base.py +72 -0
  141. flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
  142. flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
  143. flowtask/components/ProductCompetitors/scrapper.py +155 -0
  144. flowtask/components/ProductCompliant.py +169 -0
  145. flowtask/components/ProductInfo/__init__.py +1 -0
  146. flowtask/components/ProductInfo/parsers/__init__.py +5 -0
  147. flowtask/components/ProductInfo/parsers/base.py +83 -0
  148. flowtask/components/ProductInfo/parsers/brother.py +97 -0
  149. flowtask/components/ProductInfo/parsers/canon.py +167 -0
  150. flowtask/components/ProductInfo/parsers/epson.py +118 -0
  151. flowtask/components/ProductInfo/parsers/hp.py +131 -0
  152. flowtask/components/ProductInfo/parsers/samsung.py +97 -0
  153. flowtask/components/ProductInfo/scraper.py +319 -0
  154. flowtask/components/ProductPricing.py +118 -0
  155. flowtask/components/QS.py +261 -0
  156. flowtask/components/QSBase.py +201 -0
  157. flowtask/components/QueryIterator.py +273 -0
  158. flowtask/components/QueryToInsert.py +327 -0
  159. flowtask/components/QueryToPandas.py +432 -0
  160. flowtask/components/RESTClient.py +195 -0
  161. flowtask/components/RethinkDBQuery.py +189 -0
  162. flowtask/components/Rsync.py +74 -0
  163. flowtask/components/RunSSH.py +59 -0
  164. flowtask/components/RunShell.py +71 -0
  165. flowtask/components/SalesForce.py +20 -0
  166. flowtask/components/SaveImageBank/__init__.py +257 -0
  167. flowtask/components/SchedulingVisits.py +592 -0
  168. flowtask/components/ScrapPage.py +216 -0
  169. flowtask/components/ScrapSearch.py +79 -0
  170. flowtask/components/SendNotify.py +257 -0
  171. flowtask/components/SentimentAnalysis.py +694 -0
  172. flowtask/components/ServiceScrapper/__init__.py +5 -0
  173. flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
  174. flowtask/components/ServiceScrapper/parsers/base.py +94 -0
  175. flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
  176. flowtask/components/ServiceScrapper/scrapper.py +199 -0
  177. flowtask/components/SetVariables.py +156 -0
  178. flowtask/components/SubTask.py +182 -0
  179. flowtask/components/SuiteCRM.py +48 -0
  180. flowtask/components/Switch.py +175 -0
  181. flowtask/components/TableBase.py +148 -0
  182. flowtask/components/TableDelete.py +312 -0
  183. flowtask/components/TableInput.py +143 -0
  184. flowtask/components/TableOutput/TableOutput.py +384 -0
  185. flowtask/components/TableOutput/__init__.py +3 -0
  186. flowtask/components/TableSchema.py +534 -0
  187. flowtask/components/Target.py +223 -0
  188. flowtask/components/ThumbnailGenerator.py +156 -0
  189. flowtask/components/ToPandas.py +67 -0
  190. flowtask/components/TransformRows/TransformRows.py +507 -0
  191. flowtask/components/TransformRows/__init__.py +9 -0
  192. flowtask/components/TransformRows/functions.py +559 -0
  193. flowtask/components/TransposeRows.py +176 -0
  194. flowtask/components/UPCDatabase.py +86 -0
  195. flowtask/components/UnGzip.py +171 -0
  196. flowtask/components/Uncompress.py +172 -0
  197. flowtask/components/UniqueRows.py +126 -0
  198. flowtask/components/Unzip.py +107 -0
  199. flowtask/components/UpdateOperationalVars.py +147 -0
  200. flowtask/components/UploadTo.py +299 -0
  201. flowtask/components/UploadToS3.py +136 -0
  202. flowtask/components/UploadToSFTP.py +160 -0
  203. flowtask/components/UploadToSharepoint.py +205 -0
  204. flowtask/components/UserFunc.py +122 -0
  205. flowtask/components/VivaTracker.py +140 -0
  206. flowtask/components/WSDLClient.py +123 -0
  207. flowtask/components/Wait.py +18 -0
  208. flowtask/components/Walmart.py +199 -0
  209. flowtask/components/Workplace.py +134 -0
  210. flowtask/components/XMLToPandas.py +267 -0
  211. flowtask/components/Zammad/__init__.py +41 -0
  212. flowtask/components/Zammad/models.py +0 -0
  213. flowtask/components/ZoomInfoScraper.py +409 -0
  214. flowtask/components/__init__.py +104 -0
  215. flowtask/components/abstract.py +18 -0
  216. flowtask/components/flow.py +530 -0
  217. flowtask/components/google.py +335 -0
  218. flowtask/components/group.py +221 -0
  219. flowtask/components/py.typed +0 -0
  220. flowtask/components/reviewscrap.py +132 -0
  221. flowtask/components/tAutoincrement.py +117 -0
  222. flowtask/components/tConcat.py +109 -0
  223. flowtask/components/tExplode.py +119 -0
  224. flowtask/components/tFilter.py +184 -0
  225. flowtask/components/tGroup.py +236 -0
  226. flowtask/components/tJoin.py +270 -0
  227. flowtask/components/tMap/__init__.py +9 -0
  228. flowtask/components/tMap/functions.py +54 -0
  229. flowtask/components/tMap/tMap.py +450 -0
  230. flowtask/components/tMelt.py +112 -0
  231. flowtask/components/tMerge.py +114 -0
  232. flowtask/components/tOrder.py +93 -0
  233. flowtask/components/tPandas.py +94 -0
  234. flowtask/components/tPivot.py +71 -0
  235. flowtask/components/tPluckCols.py +76 -0
  236. flowtask/components/tUnnest.py +82 -0
  237. flowtask/components/user.py +401 -0
  238. flowtask/conf.py +457 -0
  239. flowtask/download.py +102 -0
  240. flowtask/events/__init__.py +11 -0
  241. flowtask/events/events/__init__.py +20 -0
  242. flowtask/events/events/abstract.py +95 -0
  243. flowtask/events/events/alerts/__init__.py +362 -0
  244. flowtask/events/events/alerts/colfunctions.py +131 -0
  245. flowtask/events/events/alerts/functions.py +158 -0
  246. flowtask/events/events/dummy.py +12 -0
  247. flowtask/events/events/exec.py +124 -0
  248. flowtask/events/events/file/__init__.py +7 -0
  249. flowtask/events/events/file/base.py +51 -0
  250. flowtask/events/events/file/copy.py +23 -0
  251. flowtask/events/events/file/delete.py +16 -0
  252. flowtask/events/events/interfaces/__init__.py +9 -0
  253. flowtask/events/events/interfaces/client.py +67 -0
  254. flowtask/events/events/interfaces/credentials.py +28 -0
  255. flowtask/events/events/interfaces/notifications.py +58 -0
  256. flowtask/events/events/jira.py +122 -0
  257. flowtask/events/events/log.py +26 -0
  258. flowtask/events/events/logerr.py +52 -0
  259. flowtask/events/events/notify.py +59 -0
  260. flowtask/events/events/notify_event.py +160 -0
  261. flowtask/events/events/publish.py +54 -0
  262. flowtask/events/events/sendfile.py +104 -0
  263. flowtask/events/events/task.py +97 -0
  264. flowtask/events/events/teams.py +98 -0
  265. flowtask/events/events/webhook.py +58 -0
  266. flowtask/events/manager.py +287 -0
  267. flowtask/exceptions.c +39393 -0
  268. flowtask/exceptions.cpython-39-x86_64-linux-gnu.so +0 -0
  269. flowtask/extensions/__init__.py +3 -0
  270. flowtask/extensions/abstract.py +82 -0
  271. flowtask/extensions/logging/__init__.py +65 -0
  272. flowtask/hooks/__init__.py +9 -0
  273. flowtask/hooks/actions/__init__.py +22 -0
  274. flowtask/hooks/actions/abstract.py +66 -0
  275. flowtask/hooks/actions/dummy.py +23 -0
  276. flowtask/hooks/actions/jira.py +74 -0
  277. flowtask/hooks/actions/rest.py +320 -0
  278. flowtask/hooks/actions/sampledata.py +37 -0
  279. flowtask/hooks/actions/sensor.py +23 -0
  280. flowtask/hooks/actions/task.py +9 -0
  281. flowtask/hooks/actions/ticket.py +37 -0
  282. flowtask/hooks/actions/zammad.py +55 -0
  283. flowtask/hooks/hook.py +62 -0
  284. flowtask/hooks/models.py +17 -0
  285. flowtask/hooks/service.py +187 -0
  286. flowtask/hooks/step.py +91 -0
  287. flowtask/hooks/types/__init__.py +23 -0
  288. flowtask/hooks/types/base.py +129 -0
  289. flowtask/hooks/types/brokers/__init__.py +11 -0
  290. flowtask/hooks/types/brokers/base.py +54 -0
  291. flowtask/hooks/types/brokers/mqtt.py +35 -0
  292. flowtask/hooks/types/brokers/rabbitmq.py +82 -0
  293. flowtask/hooks/types/brokers/redis.py +83 -0
  294. flowtask/hooks/types/brokers/sqs.py +44 -0
  295. flowtask/hooks/types/fs.py +232 -0
  296. flowtask/hooks/types/http.py +49 -0
  297. flowtask/hooks/types/imap.py +200 -0
  298. flowtask/hooks/types/jira.py +279 -0
  299. flowtask/hooks/types/mail.py +205 -0
  300. flowtask/hooks/types/postgres.py +98 -0
  301. flowtask/hooks/types/responses/__init__.py +8 -0
  302. flowtask/hooks/types/responses/base.py +5 -0
  303. flowtask/hooks/types/sharepoint.py +288 -0
  304. flowtask/hooks/types/ssh.py +141 -0
  305. flowtask/hooks/types/tagged.py +59 -0
  306. flowtask/hooks/types/upload.py +85 -0
  307. flowtask/hooks/types/watch.py +71 -0
  308. flowtask/hooks/types/web.py +36 -0
  309. flowtask/interfaces/AzureClient.py +137 -0
  310. flowtask/interfaces/AzureGraph.py +839 -0
  311. flowtask/interfaces/Boto3Client.py +326 -0
  312. flowtask/interfaces/DropboxClient.py +173 -0
  313. flowtask/interfaces/ExcelHandler.py +94 -0
  314. flowtask/interfaces/FTPClient.py +131 -0
  315. flowtask/interfaces/GoogleCalendar.py +201 -0
  316. flowtask/interfaces/GoogleClient.py +133 -0
  317. flowtask/interfaces/GoogleDrive.py +127 -0
  318. flowtask/interfaces/GoogleGCS.py +89 -0
  319. flowtask/interfaces/GoogleGeocoding.py +93 -0
  320. flowtask/interfaces/GoogleLang.py +114 -0
  321. flowtask/interfaces/GooglePub.py +61 -0
  322. flowtask/interfaces/GoogleSheet.py +68 -0
  323. flowtask/interfaces/IMAPClient.py +137 -0
  324. flowtask/interfaces/O365Calendar.py +113 -0
  325. flowtask/interfaces/O365Client.py +220 -0
  326. flowtask/interfaces/OneDrive.py +284 -0
  327. flowtask/interfaces/Outlook.py +155 -0
  328. flowtask/interfaces/ParrotBot.py +130 -0
  329. flowtask/interfaces/SSHClient.py +378 -0
  330. flowtask/interfaces/Sharepoint.py +496 -0
  331. flowtask/interfaces/__init__.py +36 -0
  332. flowtask/interfaces/azureauth.py +119 -0
  333. flowtask/interfaces/cache.py +201 -0
  334. flowtask/interfaces/client.py +82 -0
  335. flowtask/interfaces/compress.py +525 -0
  336. flowtask/interfaces/credentials.py +124 -0
  337. flowtask/interfaces/d2l.py +239 -0
  338. flowtask/interfaces/databases/__init__.py +5 -0
  339. flowtask/interfaces/databases/db.py +223 -0
  340. flowtask/interfaces/databases/documentdb.py +55 -0
  341. flowtask/interfaces/databases/rethink.py +39 -0
  342. flowtask/interfaces/dataframes/__init__.py +11 -0
  343. flowtask/interfaces/dataframes/abstract.py +21 -0
  344. flowtask/interfaces/dataframes/arrow.py +71 -0
  345. flowtask/interfaces/dataframes/dt.py +69 -0
  346. flowtask/interfaces/dataframes/pandas.py +167 -0
  347. flowtask/interfaces/dataframes/polars.py +60 -0
  348. flowtask/interfaces/db.py +263 -0
  349. flowtask/interfaces/env.py +46 -0
  350. flowtask/interfaces/func.py +137 -0
  351. flowtask/interfaces/http.py +1780 -0
  352. flowtask/interfaces/locale.py +40 -0
  353. flowtask/interfaces/log.py +75 -0
  354. flowtask/interfaces/mask.py +143 -0
  355. flowtask/interfaces/notification.py +154 -0
  356. flowtask/interfaces/playwright.py +339 -0
  357. flowtask/interfaces/powerpoint.py +368 -0
  358. flowtask/interfaces/py.typed +0 -0
  359. flowtask/interfaces/qs.py +376 -0
  360. flowtask/interfaces/result.py +87 -0
  361. flowtask/interfaces/selenium_service.py +779 -0
  362. flowtask/interfaces/smartsheet.py +154 -0
  363. flowtask/interfaces/stat.py +39 -0
  364. flowtask/interfaces/task.py +96 -0
  365. flowtask/interfaces/template.py +118 -0
  366. flowtask/interfaces/vectorstores/__init__.py +1 -0
  367. flowtask/interfaces/vectorstores/abstract.py +133 -0
  368. flowtask/interfaces/vectorstores/milvus.py +669 -0
  369. flowtask/interfaces/zammad.py +107 -0
  370. flowtask/models.py +193 -0
  371. flowtask/parsers/__init__.py +15 -0
  372. flowtask/parsers/_yaml.c +11978 -0
  373. flowtask/parsers/_yaml.cpython-39-x86_64-linux-gnu.so +0 -0
  374. flowtask/parsers/argparser.py +235 -0
  375. flowtask/parsers/base.c +15155 -0
  376. flowtask/parsers/base.cpython-39-x86_64-linux-gnu.so +0 -0
  377. flowtask/parsers/json.c +11968 -0
  378. flowtask/parsers/json.cpython-39-x86_64-linux-gnu.so +0 -0
  379. flowtask/parsers/maps.py +49 -0
  380. flowtask/parsers/toml.c +11968 -0
  381. flowtask/parsers/toml.cpython-39-x86_64-linux-gnu.so +0 -0
  382. flowtask/plugins/__init__.py +16 -0
  383. flowtask/plugins/components/__init__.py +0 -0
  384. flowtask/plugins/handler/__init__.py +45 -0
  385. flowtask/plugins/importer.py +31 -0
  386. flowtask/plugins/sources/__init__.py +0 -0
  387. flowtask/runner.py +283 -0
  388. flowtask/scheduler/__init__.py +9 -0
  389. flowtask/scheduler/functions.py +493 -0
  390. flowtask/scheduler/handlers/__init__.py +8 -0
  391. flowtask/scheduler/handlers/manager.py +504 -0
  392. flowtask/scheduler/handlers/models.py +58 -0
  393. flowtask/scheduler/handlers/service.py +72 -0
  394. flowtask/scheduler/notifications.py +65 -0
  395. flowtask/scheduler/scheduler.py +993 -0
  396. flowtask/services/__init__.py +0 -0
  397. flowtask/services/bots/__init__.py +0 -0
  398. flowtask/services/bots/telegram.py +264 -0
  399. flowtask/services/files/__init__.py +11 -0
  400. flowtask/services/files/manager.py +522 -0
  401. flowtask/services/files/model.py +37 -0
  402. flowtask/services/files/service.py +767 -0
  403. flowtask/services/jira/__init__.py +3 -0
  404. flowtask/services/jira/jira_actions.py +191 -0
  405. flowtask/services/tasks/__init__.py +13 -0
  406. flowtask/services/tasks/launcher.py +213 -0
  407. flowtask/services/tasks/manager.py +323 -0
  408. flowtask/services/tasks/service.py +275 -0
  409. flowtask/services/tasks/task_manager.py +376 -0
  410. flowtask/services/tasks/tasks.py +155 -0
  411. flowtask/storages/__init__.py +16 -0
  412. flowtask/storages/exceptions.py +12 -0
  413. flowtask/storages/files/__init__.py +8 -0
  414. flowtask/storages/files/abstract.py +29 -0
  415. flowtask/storages/files/filesystem.py +66 -0
  416. flowtask/storages/tasks/__init__.py +19 -0
  417. flowtask/storages/tasks/abstract.py +26 -0
  418. flowtask/storages/tasks/database.py +33 -0
  419. flowtask/storages/tasks/filesystem.py +108 -0
  420. flowtask/storages/tasks/github.py +119 -0
  421. flowtask/storages/tasks/memory.py +45 -0
  422. flowtask/storages/tasks/row.py +25 -0
  423. flowtask/tasks/__init__.py +0 -0
  424. flowtask/tasks/abstract.py +526 -0
  425. flowtask/tasks/command.py +118 -0
  426. flowtask/tasks/pile.py +486 -0
  427. flowtask/tasks/py.typed +0 -0
  428. flowtask/tasks/task.py +778 -0
  429. flowtask/template/__init__.py +161 -0
  430. flowtask/tests.py +257 -0
  431. flowtask/types/__init__.py +8 -0
  432. flowtask/types/typedefs.c +11347 -0
  433. flowtask/types/typedefs.cpython-39-x86_64-linux-gnu.so +0 -0
  434. flowtask/utils/__init__.py +24 -0
  435. flowtask/utils/constants.py +117 -0
  436. flowtask/utils/encoders.py +21 -0
  437. flowtask/utils/executor.py +112 -0
  438. flowtask/utils/functions.cpp +14280 -0
  439. flowtask/utils/functions.cpython-39-x86_64-linux-gnu.so +0 -0
  440. flowtask/utils/json.cpp +13349 -0
  441. flowtask/utils/json.cpython-39-x86_64-linux-gnu.so +0 -0
  442. flowtask/utils/mail.py +63 -0
  443. flowtask/utils/parseqs.c +13324 -0
  444. flowtask/utils/parserqs.cpython-39-x86_64-linux-gnu.so +0 -0
  445. flowtask/utils/stats.py +308 -0
  446. flowtask/utils/transformations.py +74 -0
  447. flowtask/utils/uv.py +12 -0
  448. flowtask/utils/validators.py +97 -0
  449. flowtask/version.py +11 -0
  450. flowtask-5.8.4.dist-info/LICENSE +201 -0
  451. flowtask-5.8.4.dist-info/METADATA +209 -0
  452. flowtask-5.8.4.dist-info/RECORD +470 -0
  453. flowtask-5.8.4.dist-info/WHEEL +6 -0
  454. flowtask-5.8.4.dist-info/entry_points.txt +3 -0
  455. flowtask-5.8.4.dist-info/top_level.txt +2 -0
  456. plugins/components/CreateQR.py +39 -0
  457. plugins/components/TestComponent.py +28 -0
  458. plugins/components/Use1.py +13 -0
  459. plugins/components/Workplace.py +117 -0
  460. plugins/components/__init__.py +3 -0
  461. plugins/sources/__init__.py +0 -0
  462. plugins/sources/get_populartimes.py +78 -0
  463. plugins/sources/google.py +150 -0
  464. plugins/sources/hubspot.py +679 -0
  465. plugins/sources/icims.py +679 -0
  466. plugins/sources/mobileinsight.py +501 -0
  467. plugins/sources/newrelic.py +262 -0
  468. plugins/sources/uap.py +268 -0
  469. plugins/sources/venu.py +244 -0
  470. plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,694 @@
1
+ import asyncio
2
+ from collections.abc import Callable
3
+ from typing import List
4
+ import contextlib
5
+ import numpy as np
6
+ import pandas as pd
7
+ from concurrent.futures import ThreadPoolExecutor
8
+ from transformers import (
9
+ AutoTokenizer,
10
+ AutoModelForSequenceClassification,
11
+ AutoModelForTokenClassification,
12
+ BertForSequenceClassification,
13
+ BertTokenizer,
14
+ BertweetTokenizer,
15
+ RobertaTokenizer,
16
+ RobertaForSequenceClassification,
17
+ pipeline
18
+ )
19
+ from nltk.tokenize import sent_tokenize
20
+ import torch
21
+ from ..exceptions import ComponentError
22
+ from .flow import FlowComponent
23
+
24
+
25
+ class ModelPrediction:
26
+ """
27
+ ModelPrediction
28
+
29
+ Overview
30
+
31
+ Performs sentiment analysis and emotion detection on text using Hugging Face Transformers.
32
+
33
+ This class utilizes pre-trained models for sentiment analysis and emotion detection.
34
+ It supports different model architectures like BERT, BERTweet, and RoBERTa.
35
+ The class handles text chunking for inputs exceeding the maximum token length
36
+ and provides detailed sentiment and emotion scores along with predicted labels.
37
+
38
+ Attributes:
39
+ sentiment_model (str): Name of the sentiment analysis model to use from Hugging Face.
40
+ Defaults to 'tabularisai/robust-sentiment-analysis'.
41
+ emotions_model (str): Name of the emotion detection model to use from Hugging Face.
42
+ Defaults to 'bhadresh-savani/distilbert-base-uncased-emotion'.
43
+ classification (str): Type of classification pipeline to use (e.g., 'sentiment-analysis').
44
+ Defaults to 'sentiment-analysis'.
45
+ levels (int): Number of sentiment levels for sentiment analysis (2, 3, or 5).
46
+ Default is 5.
47
+ max_length (int): Maximum token length for input texts. Defaults to 512.
48
+ use_bertweet (bool): If True, uses BERTweet model for sentiment analysis. Defaults to False.
49
+ use_bert (bool): If True, uses BERT model for sentiment analysis. Defaults to False.
50
+ use_roberta (bool): If True, uses RoBERTa model for sentiment analysis. Defaults to False.
51
+
52
+ Returns:
53
+ DataFrame: A DataFrame with sentiment and emotion analysis results.
54
+ Includes columns for sentiment scores, sentiment labels, emotion scores, and emotion labels.
55
+
56
+ Raises:
57
+ ComponentError: If there is an issue during text processing or data handling.
58
+
59
+
60
+ Example:
61
+
62
+ ```yaml
63
+ SentimentAnalysis:
64
+ text_column: text
65
+ sentiment_model: tabularisai/robust-sentiment-analysis
66
+ sentiment_levels: 5
67
+ emotions_model: bhadresh-savani/distilbert-base-uncased-emotion
68
+ ```
69
+
70
+ """ # noqa
71
+
72
+ def __init__(
73
+ self,
74
+ sentiment_model: str = "tabularisai/robust-sentiment-analysis",
75
+ emotions_model: str = "bhadresh-savani/distilbert-base-uncased-emotion",
76
+ classification: str = 'sentiment-analysis',
77
+ levels: int = 5,
78
+ max_length: int = 512,
79
+ use_bertweet: bool = False,
80
+ use_bert: bool = False,
81
+ use_roberta: bool = False
82
+ ):
83
+ """
84
+ Initializes the ModelPrediction component.
85
+
86
+ Sets up the sentiment analysis and emotion detection models and tokenizers
87
+ based on the provided configurations.
88
+ """
89
+ self.max_length = max_length
90
+ self.levels = levels
91
+ self.use_bertweet: bool = use_bertweet
92
+ if use_bert:
93
+ self.model = BertForSequenceClassification.from_pretrained(
94
+ sentiment_model,
95
+ num_labels=abs(levels),
96
+ ignore_mismatched_sizes=True
97
+ )
98
+ self.tokenizer = BertTokenizer.from_pretrained(sentiment_model)
99
+ elif use_roberta:
100
+ self.model = RobertaForSequenceClassification.from_pretrained(sentiment_model)
101
+ self.tokenizer = RobertaTokenizer.from_pretrained(sentiment_model)
102
+ elif use_bertweet:
103
+ self.model = AutoModelForSequenceClassification.from_pretrained(sentiment_model)
104
+ self.tokenizer = BertweetTokenizer.from_pretrained(sentiment_model)
105
+ else:
106
+ self.tokenizer = AutoTokenizer.from_pretrained(
107
+ sentiment_model,
108
+ truncation=True,
109
+ max_length=self.max_length
110
+ # normalization=True
111
+ )
112
+ self.model = AutoModelForSequenceClassification.from_pretrained(
113
+ sentiment_model,
114
+ )
115
+ # And the Emotional Model:
116
+ self.emotional_model = AutoModelForSequenceClassification.from_pretrained(
117
+ emotions_model
118
+ )
119
+ self.emo_tokenizer = AutoTokenizer.from_pretrained(
120
+ emotions_model,
121
+ truncation=True,
122
+ max_length=self.max_length
123
+ )
124
+ self._device = self._get_device()
125
+ self.emotion_classifier = pipeline(
126
+ classification,
127
+ model=self.emotional_model,
128
+ tokenizer=self.emo_tokenizer,
129
+ device=self._device,
130
+ return_all_scores=True,
131
+ # ensure the pipeline is forcibly truncating on re-tokenize
132
+ truncation=True,
133
+ max_length=512
134
+ )
135
+ # sentiment classifier:
136
+ self.sentiment_classifier = pipeline(
137
+ classification,
138
+ model=self.model,
139
+ tokenizer=self.tokenizer,
140
+ device=self._device,
141
+ return_all_scores=True,
142
+ # ensure the pipeline is forcibly truncating on re-tokenize
143
+ truncation=True,
144
+ max_length=512
145
+ )
146
+
147
+ def _get_device(self, use_device: str = 'cpu', cuda_number: int = 0):
148
+ """
149
+ Determines and returns the appropriate device (CPU, CUDA, MPS) for model execution.
150
+
151
+ Utilizes CUDA if available, then MPS, and defaults to CPU if neither is accessible or if specified.
152
+
153
+ Args:
154
+ use_device (str): Desired device to use ('cpu', 'cuda'). Defaults to 'cpu'.
155
+ cuda_number (int): CUDA device number to use, if CUDA is selected. Defaults to 0.
156
+
157
+ Returns:
158
+ torch.device: The device object representing the chosen execution environment.
159
+ """
160
+ torch.backends.cudnn.deterministic = True
161
+ if torch.cuda.is_available():
162
+ # Use CUDA GPU if available
163
+ device = torch.device(f'cuda:{cuda_number}')
164
+ elif torch.backends.mps.is_available():
165
+ # Use CUDA Multi-Processing Service if available
166
+ device = torch.device("mps")
167
+ elif use_device == 'cuda':
168
+ device = torch.device(f'cuda:{cuda_number}')
169
+ else:
170
+ device = torch.device(use_device)
171
+ return device
172
+
173
+ def predict_emotion(self, text: str) -> dict:
174
+ """
175
+ Predicts the emotion of the input text.
176
+
177
+ Handles text chunking for long texts to ensure they fit within the model's
178
+ token limit. Returns a dictionary containing emotion predictions.
179
+
180
+ Args:
181
+ text (str): The input text to predict emotion for.
182
+
183
+ Returns:
184
+ dict: A dictionary containing emotion predictions.
185
+ For example: {'emotions': [{'label': 'joy', 'score': 0.99}]}
186
+ Returns an empty dictionary if the input text is empty.
187
+ """
188
+ if not text:
189
+ return {}
190
+
191
+ # Tokenize the text to check its length
192
+ encoded_text = self.emo_tokenizer.encode(
193
+ str(text),
194
+ truncation=False,
195
+ add_special_tokens=True
196
+ )
197
+
198
+ # Handle long texts by splitting them into chunks if needed
199
+ if len(encoded_text) > self.max_length:
200
+ text_chunks = self._split_text(text, self.max_length)
201
+ return self._predict_multiple_emotion_chunks(text_chunks)
202
+
203
+ # Use the pipeline to predict emotion for shorter texts
204
+ prediction = self.emotion_classifier(str(text))
205
+
206
+ if len(prediction) > 0 and isinstance(prediction[0], list): # When return_all_scores=True
207
+ emotions = [emo_pred for emo_pred in prediction[0] if emo_pred['score'] >= 0.5] # Apply threshold
208
+ if not emotions:
209
+ emotions.append({"label": "neutral", "score": 0})
210
+ return {'emotions': emotions}
211
+
212
+ return {}
213
+
214
+ def _predict_multiple_emotion_chunks(self, chunks: list) -> dict:
215
+ """
216
+ Predicts emotions for multiple text chunks and aggregates the results.
217
+
218
+ Used for processing long texts that have been split into smaller chunks.
219
+ Aggregates emotion predictions from each chunk.
220
+
221
+ Args:
222
+ chunks (list): List of text chunks (strings) to predict emotions for.
223
+
224
+ Returns:
225
+ dict: A dictionary containing aggregated emotion predictions.
226
+ For example: {'emotions': [{'label': 'joy', 'score': 0.99}, {'label': 'surprise', 'score': 0.6}]}
227
+ Returns emotions with scores above a threshold (e.g., 0.5). If no emotion meets the threshold,
228
+ it returns neutral emotion with a score of 0.
229
+ """
230
+ all_emotions = []
231
+
232
+ for chunk in chunks:
233
+ predictions = self.emotion_classifier(chunk)
234
+ if len(predictions) > 0 and isinstance(predictions[0], list):
235
+ # Filter predictions for significant emotions
236
+ emotions = [emo_pred for emo_pred in predictions[0] if emo_pred['score'] >= 0.5]
237
+ if emotions:
238
+ all_emotions.extend(emotions)
239
+
240
+ # Aggregate emotions across all chunks
241
+ if not all_emotions:
242
+ return {'emotions': [{"label": "neutral", "score": 0}]}
243
+
244
+ # Optionally, you can further process and aggregate emotions, but this returns them all
245
+ return {'emotions': all_emotions}
246
+
247
+ def _get_sentiment_map(self) -> dict:
248
+ """
249
+ Provides a mapping of sentiment class indices to sentiment labels based on the configured levels.
250
+
251
+ Returns a dictionary that maps the numerical index of sentiment classes to
252
+ their corresponding descriptive labels (e.g., 'Positive', 'Negative', 'Neutral').
253
+ The mapping is determined by the `levels` attribute set during initialization.
254
+
255
+ Returns:
256
+ dict: A dictionary mapping sentiment class indices to sentiment labels.
257
+ For example, for 5 levels: {0: "Very Negative", 1: "Negative", 2: "Neutral", 3: "Positive", 4: "Very Positive"}.
258
+ """ # noqa
259
+ if self.levels == -3: # Inverted
260
+ return {
261
+ 0: "Neutral",
262
+ 1: "Positive",
263
+ 2: "Negative",
264
+ }
265
+ elif self.levels == 5:
266
+ return {
267
+ 0: "Very Negative",
268
+ 1: "Negative",
269
+ 2: "Neutral",
270
+ 3: "Positive",
271
+ 4: "Very Positive"
272
+ }
273
+ elif self.levels == 3:
274
+ return {
275
+ 0: "Negative",
276
+ 1: "Neutral",
277
+ 2: "Positive",
278
+ }
279
+ else:
280
+ return {
281
+ 0: "Negative",
282
+ 1: "Positive",
283
+ }
284
+
285
+ def predict_sentiment(self, text: str) -> dict:
286
+ """
287
+ Predicts the sentiment of the input text.
288
+
289
+ Utilizes the sentiment analysis pipeline to classify the text and returns
290
+ sentiment scores and the predicted sentiment label. Handles text chunking
291
+ for texts exceeding the maximum token length.
292
+
293
+ Args:
294
+ text (str): The text to analyze for sentiment.
295
+
296
+ Returns:
297
+ dict: A dictionary containing sentiment analysis results.
298
+ Includes 'score' (list of sentiment scores) and 'predicted_sentiment' (string label).
299
+ Returns None if the input text is empty.
300
+ """
301
+ if not text:
302
+ return None
303
+ if isinstance(text, float):
304
+ text = str(text)
305
+
306
+ # Tokenize the text to check its length
307
+ encoded_text = self.tokenizer.encode(text, truncation=False, add_special_tokens=True)
308
+
309
+ # Handle long texts by splitting them into chunks if needed
310
+ if len(encoded_text) > self.max_length:
311
+ text_chunks = self._split_text(text, self.max_length)
312
+ return self._predict_multiple_chunks_pipeline(text_chunks)
313
+
314
+ # Use the pipeline to predict sentiment for shorter texts
315
+ predictions = self.sentiment_classifier(text)
316
+
317
+ # Since return_all_scores=True, predictions is a list of lists
318
+ # Each inner list contains dicts with 'label' and 'score'
319
+ scores = predictions[0]
320
+
321
+ # Extract scores and labels
322
+ probabilities = [item['score'] for item in scores]
323
+ labels = [item['label'] for item in scores]
324
+
325
+ # Check if labels are descriptive (e.g., 'positive', 'neutral', 'negative')
326
+ if all(label.lower() in ['positive', 'neutral', 'negative'] for label in labels):
327
+ # If labels are descriptive, no need for custom mapping
328
+ predicted_label = max(scores, key=lambda x: x['score'])['label']
329
+ return {
330
+ "score": probabilities,
331
+ "predicted_sentiment": predicted_label.capitalize()
332
+ }
333
+
334
+ # Map labels to indices
335
+ label_to_index = {}
336
+ for _, label in enumerate(labels):
337
+ if label.startswith("LABEL_"):
338
+ label_idx = int(label.replace("LABEL_", ""))
339
+ label_to_index[label] = label_idx
340
+ if not label_to_index:
341
+ label_to_index = {label: idx for idx, label in enumerate(labels)}
342
+
343
+ predicted_label = max(scores, key=lambda x: x['score'])['label']
344
+ predicted_class = label_to_index[predicted_label]
345
+
346
+ # Map predicted_class to sentiment
347
+ sentiment_map = self._get_sentiment_map()
348
+
349
+ predicted_sentiment = sentiment_map.get(predicted_class, predicted_label)
350
+
351
+ return {
352
+ "score": probabilities,
353
+ "predicted_sentiment": predicted_sentiment
354
+ }
355
+
356
+ def _predict_multiple_chunks_pipeline(self, chunks: list) -> dict:
357
+ """
358
+ Predicts sentiment for multiple text chunks using the pipeline and aggregates the results.
359
+
360
+ Averages sentiment probabilities across all chunks to determine the overall sentiment.
361
+ This method is specifically designed for handling long texts split into smaller processable chunks.
362
+
363
+ Args:
364
+ chunks (list): A list of text chunks (strings) to analyze for sentiment.
365
+
366
+ Returns:
367
+ dict: A dictionary containing the aggregated sentiment analysis results.
368
+ Includes 'score' (list of averaged sentiment probabilities) and 'predicted_sentiment' (string label
369
+ of the overall predicted sentiment).
370
+ """ # noqa
371
+ all_probabilities = []
372
+ for chunk in chunks:
373
+ predictions = self.sentiment_classifier(chunk)
374
+ scores = predictions[0]
375
+ probabilities = [item['score'] for item in scores]
376
+ all_probabilities.append(torch.tensor(probabilities))
377
+
378
+ # Averaging probabilities across chunks
379
+ avg_probabilities = torch.mean(torch.stack(all_probabilities), dim=0)
380
+ predicted_class = torch.argmax(avg_probabilities).item()
381
+
382
+ sentiment_map = self._get_sentiment_map()
383
+ predicted_sentiment = sentiment_map.get(predicted_class, "Unknown")
384
+
385
+ return {
386
+ "score": avg_probabilities.tolist(),
387
+ "predicted_sentiment": predicted_sentiment
388
+ }
389
+
390
+ def _split_text(self, text: str, max_length: int) -> List[str]:
391
+ """
392
+ Splits input text into processable chunks based on sentence boundaries and token count.
393
+
394
+ Ensures that each chunk does not exceed the maximum token length limit of the model.
395
+ It attempts to split text at sentence boundaries to maintain semantic integrity where possible.
396
+ Handles cases where sentences themselves are too long by further splitting them.
397
+
398
+ Args:
399
+ text (str): The input text to be split.
400
+ max_length (int): The maximum token length allowed for each chunk.
401
+
402
+ Returns:
403
+ List[str]: A list of text chunks, each guaranteed to be within the token limit.
404
+ """
405
+ chunks = []
406
+ current_chunk = []
407
+ split_by_sentences = text.split(". ")
408
+
409
+ for sentence in split_by_sentences:
410
+ sentence_tokens = self.tokenizer.encode(sentence, add_special_tokens=False)
411
+ # +1 for potential separator
412
+ if len(current_chunk) + len(sentence_tokens) + 1 <= max_length:
413
+ current_chunk.extend(sentence_tokens)
414
+ # Add a separator between sentences
415
+ current_chunk.append(self.tokenizer.sep_token_id)
416
+ else:
417
+ # Sentence is too long, add current chunk
418
+ if current_chunk:
419
+ chunks.append(self.tokenizer.decode(current_chunk))
420
+ current_chunk = []
421
+ # Handle long sentence: split it into smaller parts
422
+ temp_sentence_chunks = []
423
+ temp_sentence_chunks.extend(
424
+ sentence_tokens[i: i + max_length]
425
+ for i in range(0, len(sentence_tokens), max_length)
426
+ )
427
+ # If there are sentences shorter than the max_length
428
+ if len(temp_sentence_chunks) > 1:
429
+ for i, chunk in enumerate(temp_sentence_chunks):
430
+ if i < len(temp_sentence_chunks) - 1:
431
+ chunks.append(self.tokenizer.decode(chunk))
432
+ else:
433
+ current_chunk.extend(chunk)
434
+ else:
435
+ current_chunk.extend(sentence_tokens)
436
+
437
+ if current_chunk:
438
+ current_chunk.append(self.tokenizer.sep_token_id)
439
+
440
+ if current_chunk:
441
+ chunks.append(self.tokenizer.decode(current_chunk))
442
+
443
+ # Remove extra sentence separators that are not required
444
+ for i, chunk in enumerate(chunks):
445
+ if chunk.endswith(self.tokenizer.sep_token):
446
+ chunks[i] = chunk[:-len(self.tokenizer.sep_token)]
447
+
448
+ return chunks
449
+
450
+ def split_into_sentences(self, text):
451
+ """
452
+ Splits a text into sentences using NLTK's sentence tokenizer.
453
+
454
+ Leverages nltk.tokenize.sent_tokenize for robust sentence splitting,
455
+ handling various sentence terminators and abbreviations.
456
+
457
+ Args:
458
+ text (str): The input text to be split into sentences.
459
+
460
+ Returns:
461
+ list: A list of strings, where each string is a sentence from the input text.
462
+ """
463
+ return sent_tokenize(text)
464
+
465
+ def aggregate_sentiments(self, sentiments, levels):
466
+ """
467
+ Aggregates sentiment predictions from multiple texts to produce a single overall sentiment.
468
+
469
+ Calculates the average sentiment score across a list of sentiment predictions
470
+ and determines the overall predicted sentiment based on these averages.
471
+
472
+ Args:
473
+ sentiments (list): A list of dictionaries, each containing sentiment prediction results
474
+ for a text (output from `predict_sentiment`).
475
+ levels (int): The number of sentiment levels used in the analysis, determining the sentiment map.
476
+
477
+ Returns:
478
+ str: The aggregated predicted sentiment label (e.g., 'Positive', 'Negative', 'Neutral').
479
+ """
480
+ # Initialize an array to hold cumulative scores
481
+ cumulative_scores = torch.zeros(levels)
482
+ for sentiment in sentiments:
483
+ scores = torch.tensor(sentiment['score'][0])
484
+ cumulative_scores += scores
485
+
486
+ # Calculate average scores
487
+ avg_scores = cumulative_scores / len(sentiments)
488
+ predicted_class = torch.argmax(avg_scores).item()
489
+
490
+ if levels == 5:
491
+ sentiment_map = {
492
+ 0: "Very Negative",
493
+ 1: "Negative",
494
+ 2: "Neutral",
495
+ 3: "Positive",
496
+ 4: "Very Positive"
497
+ }
498
+ elif levels == 3:
499
+ sentiment_map = {
500
+ 0: "Negative",
501
+ 1: "Neutral",
502
+ 2: "Positive",
503
+ }
504
+ else:
505
+ sentiment_map = {
506
+ 0: "Negative",
507
+ 1: "Positive",
508
+ }
509
+
510
+ return sentiment_map[predicted_class]
511
+
512
+
513
+ class SentimentAnalysis(FlowComponent):
514
+ """
515
+ Applies sentiment analysis and emotion detection to a DataFrame of text data.
516
+
517
+ This component processes a DataFrame, applying Hugging Face Transformer models
518
+ to analyze the sentiment and emotions expressed in a specified text column.
519
+ It leverages the `ModelPrediction` class to perform the actual predictions
520
+ and integrates these results back into the DataFrame.
521
+
522
+ Properties:
523
+ text_column (str): The name of the DataFrame column containing the text to analyze.
524
+ Defaults to 'text'.
525
+ sentiment_model (str): Model name for sentiment analysis.
526
+ Defaults to 'tabularisai/robust-sentiment-analysis'.
527
+ emotions_model (str): Model name for emotion detection.
528
+ Defaults to 'cardiffnlp/twitter-roberta-base-emotion'.
529
+ pipeline_classification (str): Classification type for the pipeline (e.g., 'sentiment-analysis').
530
+ Defaults to 'sentiment-analysis'.
531
+ with_average (bool): Boolean to indicate if sentiment should be averaged across rows (if applicable).
532
+ Defaults to True.
533
+ sentiment_levels (int): Number of sentiment levels (2, 3, or 5). Default is 5.
534
+ use_bert (bool): Boolean to use BERT model for sentiment analysis. Defaults to False.
535
+ use_roberta (bool): Boolean to use RoBERTa model for sentiment analysis. Defaults to False.
536
+ use_bertweet (bool): Boolean to use BERTweet model for sentiment analysis. Defaults to False.
537
+
538
+ Returns:
539
+ DataFrame: The input DataFrame augmented with new columns for sentiment scores,
540
+ predicted sentiment, emotion scores, and predicted emotion.
541
+ Specifically, it adds: 'sentiment_scores', 'sentiment_score', 'emotions_score',
542
+ 'predicted_emotion', and 'predicted_sentiment' columns.
543
+
544
+ Raises:
545
+ ComponentError: If input data is not a Pandas DataFrame or if the text column is not found.
546
+ """
547
+ def __init__(
548
+ self,
549
+ loop: asyncio.AbstractEventLoop = None,
550
+ job: Callable = None,
551
+ stat: Callable = None,
552
+ **kwargs,
553
+ ):
554
+ """Extract sentiment analysis."""
555
+ self.text_column: str = kwargs.pop('text_column', 'text')
556
+ self._sentiment_model: str = kwargs.pop(
557
+ 'sentiment_model',
558
+ 'tabularisai/robust-sentiment-analysis'
559
+ )
560
+ self._emotion_model: str = kwargs.pop(
561
+ 'emotions_model',
562
+ "cardiffnlp/twitter-roberta-base-emotion"
563
+ )
564
+ self._classification: str = kwargs.pop(
565
+ 'pipeline_classification',
566
+ 'sentiment-analysis'
567
+ )
568
+ self.with_average: bool = kwargs.pop('with_average', True)
569
+ self.sentiment_levels: int = kwargs.pop('sentiment_levels', 5)
570
+ self._use_bert: bool = kwargs.pop('use_bert', False)
571
+ self._use_roberta: bool = kwargs.pop('use_roberta', False)
572
+ self._use_bertweet: bool = kwargs.pop('use_bertweet', False)
573
+ self.chunk_size: int = 100
574
+ self.max_workers: int = 5
575
+ super().__init__(loop=loop, job=job, stat=stat, **kwargs)
576
+
577
+ async def start(self, **kwargs):
578
+ if self.previous:
579
+ self.data = self.input
580
+ else:
581
+ raise ComponentError(
582
+ "Data Not Found",
583
+ status=404
584
+ )
585
+ if not isinstance(self.data, pd.DataFrame):
586
+ raise ComponentError(
587
+ "Incompatible Data, we need a Pandas Dataframe",
588
+ status=404
589
+ )
590
+ # instanciate the model:
591
+ self._predictor = ModelPrediction(
592
+ sentiment_model=self._sentiment_model,
593
+ emotions_model=self._emotion_model,
594
+ classification=self._classification,
595
+ max_length=512,
596
+ levels=self.sentiment_levels,
597
+ use_bertweet=self._use_bertweet,
598
+ use_bert=self._use_bert,
599
+ use_roberta=self._use_roberta
600
+ )
601
+ return True
602
+
603
+ async def close(self):
604
+ pass
605
+
606
+ def _analyze_chunk(self, chunk: pd.DataFrame):
607
+ """
608
+ Analyzes a chunk of the DataFrame to add sentiment and emotion predictions.
609
+
610
+ This method is designed to be run in parallel using a thread pool to process
611
+ DataFrame chunks. For each chunk, it applies sentiment and emotion prediction
612
+ models to the text data and adds the results as new columns in the DataFrame.
613
+
614
+ Args:
615
+ chunk (pd.DataFrame): A subset of the input DataFrame to be analyzed.
616
+
617
+ Returns:
618
+ pd.DataFrame: The processed DataFrame chunk, now including sentiment and emotion analysis columns.
619
+ """ # noqa
620
+ # instanciate the model:
621
+ predictor = ModelPrediction(
622
+ sentiment_model=self._sentiment_model,
623
+ emotions_model=self._emotion_model,
624
+ classification=self._classification,
625
+ max_length=512,
626
+ levels=self.sentiment_levels,
627
+ use_bertweet=self._use_bertweet,
628
+ use_bert=self._use_bert,
629
+ use_roberta=self._use_roberta
630
+ )
631
+ chunk['sentiment'] = chunk[self.text_column].apply(
632
+ predictor.predict_sentiment
633
+ )
634
+ chunk['emotions'] = chunk[self.text_column].apply(
635
+ predictor.predict_emotion
636
+ )
637
+ with contextlib.suppress(Exception):
638
+ torch.cuda.empty_cache()
639
+ return chunk
640
+
641
+ async def run(self):
642
+ """
643
+ Executes the sentiment analysis and emotion detection process on the input DataFrame.
644
+
645
+ Splits the DataFrame into chunks and processes them in parallel using a thread pool.
646
+ After processing, it concatenates the results, extracts relevant prediction scores and labels,
647
+ and adds them as new columns to the DataFrame.
648
+
649
+ Returns:
650
+ pd.DataFrame: The DataFrame with added sentiment and emotion analysis results.
651
+ """
652
+ # Split the dataframe into chunks
653
+ num_chunks = np.ceil(len(self.data) / self.chunk_size).astype(int)
654
+ chunks = np.array_split(self.data, num_chunks)
655
+
656
+ # Run analysis in parallel using a thread pool
657
+ with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
658
+ processed_chunks = list(executor.map(self._analyze_chunk, chunks))
659
+
660
+ # Concatenate all the chunks back into a single DataFrame
661
+ df = pd.concat(processed_chunks)
662
+ # extract the predicted sentiment and emotion
663
+ try:
664
+ # Extract 'sentiment_score' from 'sentiment' column (e.g., first score in the list)
665
+ df['sentiment_scores'] = df['sentiment'].apply(
666
+ lambda x: x.get('score', []) if x and isinstance(x.get('score', []), list) else []
667
+ )
668
+ # Max value of sentiments
669
+ df['sentiment_score'] = df['sentiment_scores'].apply(
670
+ lambda x: max(x) if isinstance(x, list) and len(x) > 0 else None
671
+ )
672
+ # Extract 'emotions_score' from 'emotions' column (e.g., score from the first emotion)
673
+ df['emotions_score'] = df['emotions'].apply(
674
+ lambda x: x.get('emotions', [{'score': None}])[0]['score'] if x and isinstance(x.get('emotions', []), list) and len(x['emotions']) > 0 else None # noqa
675
+ )
676
+ # Expand the 'emotions' and 'sentiments' column to extract the label
677
+ df['predicted_emotion'] = df['emotions'].apply(
678
+ lambda x: x.get('emotions', [{'label': None}])[0]['label'] if x and isinstance(x.get('emotions', []), list) and len(x.get('emotions', [])) > 0 else None # noqa
679
+ )
680
+ df['predicted_sentiment'] = df['sentiment'].apply(
681
+ lambda x: x.get('predicted_sentiment', None) if x else None
682
+ )
683
+ except Exception as e:
684
+ print(e)
685
+ pass
686
+ self._result = df
687
+ if self._debug is True:
688
+ print("== DATA PREVIEW ==")
689
+ print(self._result)
690
+ print()
691
+ print("::: Printing Column Information === ")
692
+ for column, t in df.dtypes.items():
693
+ print(column, "->", t, "->", df[column].iloc[0])
694
+ return self._result
@@ -0,0 +1,5 @@
1
+ from .scrapper import ServiceScrapper
2
+
3
+ __all__ = (
4
+ 'ServiceScrapper',
5
+ )