flowtask 5.8.4__cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowtask/__init__.py +93 -0
- flowtask/__main__.py +38 -0
- flowtask/bots/__init__.py +6 -0
- flowtask/bots/check.py +93 -0
- flowtask/bots/codebot.py +51 -0
- flowtask/components/ASPX.py +148 -0
- flowtask/components/AddDataset.py +352 -0
- flowtask/components/Amazon.py +523 -0
- flowtask/components/AutoTask.py +314 -0
- flowtask/components/Azure.py +80 -0
- flowtask/components/AzureUsers.py +106 -0
- flowtask/components/BaseAction.py +91 -0
- flowtask/components/BaseLoop.py +198 -0
- flowtask/components/BestBuy.py +800 -0
- flowtask/components/CSVToGCS.py +120 -0
- flowtask/components/CompanyScraper/__init__.py +1 -0
- flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
- flowtask/components/CompanyScraper/parsers/base.py +102 -0
- flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
- flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
- flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
- flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
- flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
- flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
- flowtask/components/CompanyScraper/scrapper.py +1054 -0
- flowtask/components/CopyTo.py +177 -0
- flowtask/components/CopyToBigQuery.py +243 -0
- flowtask/components/CopyToMongoDB.py +291 -0
- flowtask/components/CopyToPg.py +609 -0
- flowtask/components/CopyToRethink.py +207 -0
- flowtask/components/CreateGCSBucket.py +102 -0
- flowtask/components/CreateReport/CreateReport.py +228 -0
- flowtask/components/CreateReport/__init__.py +9 -0
- flowtask/components/CreateReport/charts/__init__.py +15 -0
- flowtask/components/CreateReport/charts/bar.py +51 -0
- flowtask/components/CreateReport/charts/base.py +66 -0
- flowtask/components/CreateReport/charts/pie.py +64 -0
- flowtask/components/CreateReport/utils.py +9 -0
- flowtask/components/CustomerSatisfaction.py +196 -0
- flowtask/components/DataInput.py +200 -0
- flowtask/components/DateList.py +255 -0
- flowtask/components/DbClient.py +163 -0
- flowtask/components/DialPad.py +146 -0
- flowtask/components/DocumentDBQuery.py +200 -0
- flowtask/components/DownloadFrom.py +371 -0
- flowtask/components/DownloadFromD2L.py +113 -0
- flowtask/components/DownloadFromFTP.py +181 -0
- flowtask/components/DownloadFromIMAP.py +315 -0
- flowtask/components/DownloadFromS3.py +198 -0
- flowtask/components/DownloadFromSFTP.py +265 -0
- flowtask/components/DownloadFromSharepoint.py +110 -0
- flowtask/components/DownloadFromSmartSheet.py +114 -0
- flowtask/components/DownloadS3File.py +229 -0
- flowtask/components/Dummy.py +59 -0
- flowtask/components/DuplicatePhoto.py +411 -0
- flowtask/components/EmployeeEvaluation.py +237 -0
- flowtask/components/ExecuteSQL.py +323 -0
- flowtask/components/ExtractHTML.py +178 -0
- flowtask/components/FileBase.py +178 -0
- flowtask/components/FileCopy.py +181 -0
- flowtask/components/FileDelete.py +82 -0
- flowtask/components/FileExists.py +146 -0
- flowtask/components/FileIteratorDelete.py +112 -0
- flowtask/components/FileList.py +194 -0
- flowtask/components/FileOpen.py +75 -0
- flowtask/components/FileRead.py +120 -0
- flowtask/components/FileRename.py +106 -0
- flowtask/components/FilterIf.py +284 -0
- flowtask/components/FilterRows/FilterRows.py +200 -0
- flowtask/components/FilterRows/__init__.py +10 -0
- flowtask/components/FilterRows/functions.py +4 -0
- flowtask/components/GCSToBigQuery.py +103 -0
- flowtask/components/GoogleA4.py +150 -0
- flowtask/components/GoogleGeoCoding.py +344 -0
- flowtask/components/GooglePlaces.py +315 -0
- flowtask/components/GoogleSearch.py +539 -0
- flowtask/components/HTTPClient.py +268 -0
- flowtask/components/ICIMS.py +146 -0
- flowtask/components/IF.py +179 -0
- flowtask/components/IcimsFolderCopy.py +173 -0
- flowtask/components/ImageFeatures/__init__.py +5 -0
- flowtask/components/ImageFeatures/process.py +233 -0
- flowtask/components/IteratorBase.py +251 -0
- flowtask/components/LangchainLoader/__init__.py +5 -0
- flowtask/components/LangchainLoader/loader.py +194 -0
- flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
- flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
- flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
- flowtask/components/LangchainLoader/loaders/docx.py +91 -0
- flowtask/components/LangchainLoader/loaders/html.py +119 -0
- flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
- flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
- flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
- flowtask/components/LangchainLoader/loaders/qa.py +67 -0
- flowtask/components/LangchainLoader/loaders/txt.py +55 -0
- flowtask/components/LeadIQ.py +650 -0
- flowtask/components/Loop.py +253 -0
- flowtask/components/Lowes.py +334 -0
- flowtask/components/MS365Usage.py +156 -0
- flowtask/components/MSTeamsMessages.py +320 -0
- flowtask/components/MarketClustering.py +1051 -0
- flowtask/components/MergeFiles.py +362 -0
- flowtask/components/MilvusOutput.py +87 -0
- flowtask/components/NearByStores.py +175 -0
- flowtask/components/NetworkNinja/__init__.py +6 -0
- flowtask/components/NetworkNinja/models/__init__.py +52 -0
- flowtask/components/NetworkNinja/models/abstract.py +177 -0
- flowtask/components/NetworkNinja/models/account.py +39 -0
- flowtask/components/NetworkNinja/models/client.py +19 -0
- flowtask/components/NetworkNinja/models/district.py +14 -0
- flowtask/components/NetworkNinja/models/events.py +101 -0
- flowtask/components/NetworkNinja/models/forms.py +499 -0
- flowtask/components/NetworkNinja/models/market.py +16 -0
- flowtask/components/NetworkNinja/models/organization.py +34 -0
- flowtask/components/NetworkNinja/models/photos.py +125 -0
- flowtask/components/NetworkNinja/models/project.py +44 -0
- flowtask/components/NetworkNinja/models/region.py +28 -0
- flowtask/components/NetworkNinja/models/store.py +203 -0
- flowtask/components/NetworkNinja/models/user.py +151 -0
- flowtask/components/NetworkNinja/router.py +854 -0
- flowtask/components/Odoo.py +175 -0
- flowtask/components/OdooInjector.py +192 -0
- flowtask/components/OpenFromXML.py +126 -0
- flowtask/components/OpenWeather.py +41 -0
- flowtask/components/OpenWithBase.py +616 -0
- flowtask/components/OpenWithPandas.py +715 -0
- flowtask/components/PGPDecrypt.py +199 -0
- flowtask/components/PandasIterator.py +187 -0
- flowtask/components/PandasToFile.py +189 -0
- flowtask/components/Paradox.py +339 -0
- flowtask/components/ParamIterator.py +117 -0
- flowtask/components/ParseHTML.py +84 -0
- flowtask/components/PlacerStores.py +249 -0
- flowtask/components/Pokemon.py +507 -0
- flowtask/components/PositiveBot.py +62 -0
- flowtask/components/PowerPointSlide.py +400 -0
- flowtask/components/PrintMessage.py +127 -0
- flowtask/components/ProductCompetitors/__init__.py +5 -0
- flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
- flowtask/components/ProductCompetitors/parsers/base.py +72 -0
- flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
- flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
- flowtask/components/ProductCompetitors/scrapper.py +155 -0
- flowtask/components/ProductCompliant.py +169 -0
- flowtask/components/ProductInfo/__init__.py +1 -0
- flowtask/components/ProductInfo/parsers/__init__.py +5 -0
- flowtask/components/ProductInfo/parsers/base.py +83 -0
- flowtask/components/ProductInfo/parsers/brother.py +97 -0
- flowtask/components/ProductInfo/parsers/canon.py +167 -0
- flowtask/components/ProductInfo/parsers/epson.py +118 -0
- flowtask/components/ProductInfo/parsers/hp.py +131 -0
- flowtask/components/ProductInfo/parsers/samsung.py +97 -0
- flowtask/components/ProductInfo/scraper.py +319 -0
- flowtask/components/ProductPricing.py +118 -0
- flowtask/components/QS.py +261 -0
- flowtask/components/QSBase.py +201 -0
- flowtask/components/QueryIterator.py +273 -0
- flowtask/components/QueryToInsert.py +327 -0
- flowtask/components/QueryToPandas.py +432 -0
- flowtask/components/RESTClient.py +195 -0
- flowtask/components/RethinkDBQuery.py +189 -0
- flowtask/components/Rsync.py +74 -0
- flowtask/components/RunSSH.py +59 -0
- flowtask/components/RunShell.py +71 -0
- flowtask/components/SalesForce.py +20 -0
- flowtask/components/SaveImageBank/__init__.py +257 -0
- flowtask/components/SchedulingVisits.py +592 -0
- flowtask/components/ScrapPage.py +216 -0
- flowtask/components/ScrapSearch.py +79 -0
- flowtask/components/SendNotify.py +257 -0
- flowtask/components/SentimentAnalysis.py +694 -0
- flowtask/components/ServiceScrapper/__init__.py +5 -0
- flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
- flowtask/components/ServiceScrapper/parsers/base.py +94 -0
- flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
- flowtask/components/ServiceScrapper/scrapper.py +199 -0
- flowtask/components/SetVariables.py +156 -0
- flowtask/components/SubTask.py +182 -0
- flowtask/components/SuiteCRM.py +48 -0
- flowtask/components/Switch.py +175 -0
- flowtask/components/TableBase.py +148 -0
- flowtask/components/TableDelete.py +312 -0
- flowtask/components/TableInput.py +143 -0
- flowtask/components/TableOutput/TableOutput.py +384 -0
- flowtask/components/TableOutput/__init__.py +3 -0
- flowtask/components/TableSchema.py +534 -0
- flowtask/components/Target.py +223 -0
- flowtask/components/ThumbnailGenerator.py +156 -0
- flowtask/components/ToPandas.py +67 -0
- flowtask/components/TransformRows/TransformRows.py +507 -0
- flowtask/components/TransformRows/__init__.py +9 -0
- flowtask/components/TransformRows/functions.py +559 -0
- flowtask/components/TransposeRows.py +176 -0
- flowtask/components/UPCDatabase.py +86 -0
- flowtask/components/UnGzip.py +171 -0
- flowtask/components/Uncompress.py +172 -0
- flowtask/components/UniqueRows.py +126 -0
- flowtask/components/Unzip.py +107 -0
- flowtask/components/UpdateOperationalVars.py +147 -0
- flowtask/components/UploadTo.py +299 -0
- flowtask/components/UploadToS3.py +136 -0
- flowtask/components/UploadToSFTP.py +160 -0
- flowtask/components/UploadToSharepoint.py +205 -0
- flowtask/components/UserFunc.py +122 -0
- flowtask/components/VivaTracker.py +140 -0
- flowtask/components/WSDLClient.py +123 -0
- flowtask/components/Wait.py +18 -0
- flowtask/components/Walmart.py +199 -0
- flowtask/components/Workplace.py +134 -0
- flowtask/components/XMLToPandas.py +267 -0
- flowtask/components/Zammad/__init__.py +41 -0
- flowtask/components/Zammad/models.py +0 -0
- flowtask/components/ZoomInfoScraper.py +409 -0
- flowtask/components/__init__.py +104 -0
- flowtask/components/abstract.py +18 -0
- flowtask/components/flow.py +530 -0
- flowtask/components/google.py +335 -0
- flowtask/components/group.py +221 -0
- flowtask/components/py.typed +0 -0
- flowtask/components/reviewscrap.py +132 -0
- flowtask/components/tAutoincrement.py +117 -0
- flowtask/components/tConcat.py +109 -0
- flowtask/components/tExplode.py +119 -0
- flowtask/components/tFilter.py +184 -0
- flowtask/components/tGroup.py +236 -0
- flowtask/components/tJoin.py +270 -0
- flowtask/components/tMap/__init__.py +9 -0
- flowtask/components/tMap/functions.py +54 -0
- flowtask/components/tMap/tMap.py +450 -0
- flowtask/components/tMelt.py +112 -0
- flowtask/components/tMerge.py +114 -0
- flowtask/components/tOrder.py +93 -0
- flowtask/components/tPandas.py +94 -0
- flowtask/components/tPivot.py +71 -0
- flowtask/components/tPluckCols.py +76 -0
- flowtask/components/tUnnest.py +82 -0
- flowtask/components/user.py +401 -0
- flowtask/conf.py +457 -0
- flowtask/download.py +102 -0
- flowtask/events/__init__.py +11 -0
- flowtask/events/events/__init__.py +20 -0
- flowtask/events/events/abstract.py +95 -0
- flowtask/events/events/alerts/__init__.py +362 -0
- flowtask/events/events/alerts/colfunctions.py +131 -0
- flowtask/events/events/alerts/functions.py +158 -0
- flowtask/events/events/dummy.py +12 -0
- flowtask/events/events/exec.py +124 -0
- flowtask/events/events/file/__init__.py +7 -0
- flowtask/events/events/file/base.py +51 -0
- flowtask/events/events/file/copy.py +23 -0
- flowtask/events/events/file/delete.py +16 -0
- flowtask/events/events/interfaces/__init__.py +9 -0
- flowtask/events/events/interfaces/client.py +67 -0
- flowtask/events/events/interfaces/credentials.py +28 -0
- flowtask/events/events/interfaces/notifications.py +58 -0
- flowtask/events/events/jira.py +122 -0
- flowtask/events/events/log.py +26 -0
- flowtask/events/events/logerr.py +52 -0
- flowtask/events/events/notify.py +59 -0
- flowtask/events/events/notify_event.py +160 -0
- flowtask/events/events/publish.py +54 -0
- flowtask/events/events/sendfile.py +104 -0
- flowtask/events/events/task.py +97 -0
- flowtask/events/events/teams.py +98 -0
- flowtask/events/events/webhook.py +58 -0
- flowtask/events/manager.py +287 -0
- flowtask/exceptions.c +39393 -0
- flowtask/exceptions.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/extensions/__init__.py +3 -0
- flowtask/extensions/abstract.py +82 -0
- flowtask/extensions/logging/__init__.py +65 -0
- flowtask/hooks/__init__.py +9 -0
- flowtask/hooks/actions/__init__.py +22 -0
- flowtask/hooks/actions/abstract.py +66 -0
- flowtask/hooks/actions/dummy.py +23 -0
- flowtask/hooks/actions/jira.py +74 -0
- flowtask/hooks/actions/rest.py +320 -0
- flowtask/hooks/actions/sampledata.py +37 -0
- flowtask/hooks/actions/sensor.py +23 -0
- flowtask/hooks/actions/task.py +9 -0
- flowtask/hooks/actions/ticket.py +37 -0
- flowtask/hooks/actions/zammad.py +55 -0
- flowtask/hooks/hook.py +62 -0
- flowtask/hooks/models.py +17 -0
- flowtask/hooks/service.py +187 -0
- flowtask/hooks/step.py +91 -0
- flowtask/hooks/types/__init__.py +23 -0
- flowtask/hooks/types/base.py +129 -0
- flowtask/hooks/types/brokers/__init__.py +11 -0
- flowtask/hooks/types/brokers/base.py +54 -0
- flowtask/hooks/types/brokers/mqtt.py +35 -0
- flowtask/hooks/types/brokers/rabbitmq.py +82 -0
- flowtask/hooks/types/brokers/redis.py +83 -0
- flowtask/hooks/types/brokers/sqs.py +44 -0
- flowtask/hooks/types/fs.py +232 -0
- flowtask/hooks/types/http.py +49 -0
- flowtask/hooks/types/imap.py +200 -0
- flowtask/hooks/types/jira.py +279 -0
- flowtask/hooks/types/mail.py +205 -0
- flowtask/hooks/types/postgres.py +98 -0
- flowtask/hooks/types/responses/__init__.py +8 -0
- flowtask/hooks/types/responses/base.py +5 -0
- flowtask/hooks/types/sharepoint.py +288 -0
- flowtask/hooks/types/ssh.py +141 -0
- flowtask/hooks/types/tagged.py +59 -0
- flowtask/hooks/types/upload.py +85 -0
- flowtask/hooks/types/watch.py +71 -0
- flowtask/hooks/types/web.py +36 -0
- flowtask/interfaces/AzureClient.py +137 -0
- flowtask/interfaces/AzureGraph.py +839 -0
- flowtask/interfaces/Boto3Client.py +326 -0
- flowtask/interfaces/DropboxClient.py +173 -0
- flowtask/interfaces/ExcelHandler.py +94 -0
- flowtask/interfaces/FTPClient.py +131 -0
- flowtask/interfaces/GoogleCalendar.py +201 -0
- flowtask/interfaces/GoogleClient.py +133 -0
- flowtask/interfaces/GoogleDrive.py +127 -0
- flowtask/interfaces/GoogleGCS.py +89 -0
- flowtask/interfaces/GoogleGeocoding.py +93 -0
- flowtask/interfaces/GoogleLang.py +114 -0
- flowtask/interfaces/GooglePub.py +61 -0
- flowtask/interfaces/GoogleSheet.py +68 -0
- flowtask/interfaces/IMAPClient.py +137 -0
- flowtask/interfaces/O365Calendar.py +113 -0
- flowtask/interfaces/O365Client.py +220 -0
- flowtask/interfaces/OneDrive.py +284 -0
- flowtask/interfaces/Outlook.py +155 -0
- flowtask/interfaces/ParrotBot.py +130 -0
- flowtask/interfaces/SSHClient.py +378 -0
- flowtask/interfaces/Sharepoint.py +496 -0
- flowtask/interfaces/__init__.py +36 -0
- flowtask/interfaces/azureauth.py +119 -0
- flowtask/interfaces/cache.py +201 -0
- flowtask/interfaces/client.py +82 -0
- flowtask/interfaces/compress.py +525 -0
- flowtask/interfaces/credentials.py +124 -0
- flowtask/interfaces/d2l.py +239 -0
- flowtask/interfaces/databases/__init__.py +5 -0
- flowtask/interfaces/databases/db.py +223 -0
- flowtask/interfaces/databases/documentdb.py +55 -0
- flowtask/interfaces/databases/rethink.py +39 -0
- flowtask/interfaces/dataframes/__init__.py +11 -0
- flowtask/interfaces/dataframes/abstract.py +21 -0
- flowtask/interfaces/dataframes/arrow.py +71 -0
- flowtask/interfaces/dataframes/dt.py +69 -0
- flowtask/interfaces/dataframes/pandas.py +167 -0
- flowtask/interfaces/dataframes/polars.py +60 -0
- flowtask/interfaces/db.py +263 -0
- flowtask/interfaces/env.py +46 -0
- flowtask/interfaces/func.py +137 -0
- flowtask/interfaces/http.py +1780 -0
- flowtask/interfaces/locale.py +40 -0
- flowtask/interfaces/log.py +75 -0
- flowtask/interfaces/mask.py +143 -0
- flowtask/interfaces/notification.py +154 -0
- flowtask/interfaces/playwright.py +339 -0
- flowtask/interfaces/powerpoint.py +368 -0
- flowtask/interfaces/py.typed +0 -0
- flowtask/interfaces/qs.py +376 -0
- flowtask/interfaces/result.py +87 -0
- flowtask/interfaces/selenium_service.py +779 -0
- flowtask/interfaces/smartsheet.py +154 -0
- flowtask/interfaces/stat.py +39 -0
- flowtask/interfaces/task.py +96 -0
- flowtask/interfaces/template.py +118 -0
- flowtask/interfaces/vectorstores/__init__.py +1 -0
- flowtask/interfaces/vectorstores/abstract.py +133 -0
- flowtask/interfaces/vectorstores/milvus.py +669 -0
- flowtask/interfaces/zammad.py +107 -0
- flowtask/models.py +193 -0
- flowtask/parsers/__init__.py +15 -0
- flowtask/parsers/_yaml.c +11978 -0
- flowtask/parsers/_yaml.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/argparser.py +235 -0
- flowtask/parsers/base.c +15155 -0
- flowtask/parsers/base.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/json.c +11968 -0
- flowtask/parsers/json.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/maps.py +49 -0
- flowtask/parsers/toml.c +11968 -0
- flowtask/parsers/toml.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/plugins/__init__.py +16 -0
- flowtask/plugins/components/__init__.py +0 -0
- flowtask/plugins/handler/__init__.py +45 -0
- flowtask/plugins/importer.py +31 -0
- flowtask/plugins/sources/__init__.py +0 -0
- flowtask/runner.py +283 -0
- flowtask/scheduler/__init__.py +9 -0
- flowtask/scheduler/functions.py +493 -0
- flowtask/scheduler/handlers/__init__.py +8 -0
- flowtask/scheduler/handlers/manager.py +504 -0
- flowtask/scheduler/handlers/models.py +58 -0
- flowtask/scheduler/handlers/service.py +72 -0
- flowtask/scheduler/notifications.py +65 -0
- flowtask/scheduler/scheduler.py +993 -0
- flowtask/services/__init__.py +0 -0
- flowtask/services/bots/__init__.py +0 -0
- flowtask/services/bots/telegram.py +264 -0
- flowtask/services/files/__init__.py +11 -0
- flowtask/services/files/manager.py +522 -0
- flowtask/services/files/model.py +37 -0
- flowtask/services/files/service.py +767 -0
- flowtask/services/jira/__init__.py +3 -0
- flowtask/services/jira/jira_actions.py +191 -0
- flowtask/services/tasks/__init__.py +13 -0
- flowtask/services/tasks/launcher.py +213 -0
- flowtask/services/tasks/manager.py +323 -0
- flowtask/services/tasks/service.py +275 -0
- flowtask/services/tasks/task_manager.py +376 -0
- flowtask/services/tasks/tasks.py +155 -0
- flowtask/storages/__init__.py +16 -0
- flowtask/storages/exceptions.py +12 -0
- flowtask/storages/files/__init__.py +8 -0
- flowtask/storages/files/abstract.py +29 -0
- flowtask/storages/files/filesystem.py +66 -0
- flowtask/storages/tasks/__init__.py +19 -0
- flowtask/storages/tasks/abstract.py +26 -0
- flowtask/storages/tasks/database.py +33 -0
- flowtask/storages/tasks/filesystem.py +108 -0
- flowtask/storages/tasks/github.py +119 -0
- flowtask/storages/tasks/memory.py +45 -0
- flowtask/storages/tasks/row.py +25 -0
- flowtask/tasks/__init__.py +0 -0
- flowtask/tasks/abstract.py +526 -0
- flowtask/tasks/command.py +118 -0
- flowtask/tasks/pile.py +486 -0
- flowtask/tasks/py.typed +0 -0
- flowtask/tasks/task.py +778 -0
- flowtask/template/__init__.py +161 -0
- flowtask/tests.py +257 -0
- flowtask/types/__init__.py +8 -0
- flowtask/types/typedefs.c +11347 -0
- flowtask/types/typedefs.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/utils/__init__.py +24 -0
- flowtask/utils/constants.py +117 -0
- flowtask/utils/encoders.py +21 -0
- flowtask/utils/executor.py +112 -0
- flowtask/utils/functions.cpp +14280 -0
- flowtask/utils/functions.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/utils/json.cpp +13349 -0
- flowtask/utils/json.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/utils/mail.py +63 -0
- flowtask/utils/parseqs.c +13324 -0
- flowtask/utils/parserqs.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/utils/stats.py +308 -0
- flowtask/utils/transformations.py +74 -0
- flowtask/utils/uv.py +12 -0
- flowtask/utils/validators.py +97 -0
- flowtask/version.py +11 -0
- flowtask-5.8.4.dist-info/LICENSE +201 -0
- flowtask-5.8.4.dist-info/METADATA +209 -0
- flowtask-5.8.4.dist-info/RECORD +470 -0
- flowtask-5.8.4.dist-info/WHEEL +6 -0
- flowtask-5.8.4.dist-info/entry_points.txt +3 -0
- flowtask-5.8.4.dist-info/top_level.txt +2 -0
- plugins/components/CreateQR.py +39 -0
- plugins/components/TestComponent.py +28 -0
- plugins/components/Use1.py +13 -0
- plugins/components/Workplace.py +117 -0
- plugins/components/__init__.py +3 -0
- plugins/sources/__init__.py +0 -0
- plugins/sources/get_populartimes.py +78 -0
- plugins/sources/google.py +150 -0
- plugins/sources/hubspot.py +679 -0
- plugins/sources/icims.py +679 -0
- plugins/sources/mobileinsight.py +501 -0
- plugins/sources/newrelic.py +262 -0
- plugins/sources/uap.py +268 -0
- plugins/sources/venu.py +244 -0
- plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,694 @@
|
|
1
|
+
import asyncio
|
2
|
+
from collections.abc import Callable
|
3
|
+
from typing import List
|
4
|
+
import contextlib
|
5
|
+
import numpy as np
|
6
|
+
import pandas as pd
|
7
|
+
from concurrent.futures import ThreadPoolExecutor
|
8
|
+
from transformers import (
|
9
|
+
AutoTokenizer,
|
10
|
+
AutoModelForSequenceClassification,
|
11
|
+
AutoModelForTokenClassification,
|
12
|
+
BertForSequenceClassification,
|
13
|
+
BertTokenizer,
|
14
|
+
BertweetTokenizer,
|
15
|
+
RobertaTokenizer,
|
16
|
+
RobertaForSequenceClassification,
|
17
|
+
pipeline
|
18
|
+
)
|
19
|
+
from nltk.tokenize import sent_tokenize
|
20
|
+
import torch
|
21
|
+
from ..exceptions import ComponentError
|
22
|
+
from .flow import FlowComponent
|
23
|
+
|
24
|
+
|
25
|
+
class ModelPrediction:
|
26
|
+
"""
|
27
|
+
ModelPrediction
|
28
|
+
|
29
|
+
Overview
|
30
|
+
|
31
|
+
Performs sentiment analysis and emotion detection on text using Hugging Face Transformers.
|
32
|
+
|
33
|
+
This class utilizes pre-trained models for sentiment analysis and emotion detection.
|
34
|
+
It supports different model architectures like BERT, BERTweet, and RoBERTa.
|
35
|
+
The class handles text chunking for inputs exceeding the maximum token length
|
36
|
+
and provides detailed sentiment and emotion scores along with predicted labels.
|
37
|
+
|
38
|
+
Attributes:
|
39
|
+
sentiment_model (str): Name of the sentiment analysis model to use from Hugging Face.
|
40
|
+
Defaults to 'tabularisai/robust-sentiment-analysis'.
|
41
|
+
emotions_model (str): Name of the emotion detection model to use from Hugging Face.
|
42
|
+
Defaults to 'bhadresh-savani/distilbert-base-uncased-emotion'.
|
43
|
+
classification (str): Type of classification pipeline to use (e.g., 'sentiment-analysis').
|
44
|
+
Defaults to 'sentiment-analysis'.
|
45
|
+
levels (int): Number of sentiment levels for sentiment analysis (2, 3, or 5).
|
46
|
+
Default is 5.
|
47
|
+
max_length (int): Maximum token length for input texts. Defaults to 512.
|
48
|
+
use_bertweet (bool): If True, uses BERTweet model for sentiment analysis. Defaults to False.
|
49
|
+
use_bert (bool): If True, uses BERT model for sentiment analysis. Defaults to False.
|
50
|
+
use_roberta (bool): If True, uses RoBERTa model for sentiment analysis. Defaults to False.
|
51
|
+
|
52
|
+
Returns:
|
53
|
+
DataFrame: A DataFrame with sentiment and emotion analysis results.
|
54
|
+
Includes columns for sentiment scores, sentiment labels, emotion scores, and emotion labels.
|
55
|
+
|
56
|
+
Raises:
|
57
|
+
ComponentError: If there is an issue during text processing or data handling.
|
58
|
+
|
59
|
+
|
60
|
+
Example:
|
61
|
+
|
62
|
+
```yaml
|
63
|
+
SentimentAnalysis:
|
64
|
+
text_column: text
|
65
|
+
sentiment_model: tabularisai/robust-sentiment-analysis
|
66
|
+
sentiment_levels: 5
|
67
|
+
emotions_model: bhadresh-savani/distilbert-base-uncased-emotion
|
68
|
+
```
|
69
|
+
|
70
|
+
""" # noqa
|
71
|
+
|
72
|
+
def __init__(
|
73
|
+
self,
|
74
|
+
sentiment_model: str = "tabularisai/robust-sentiment-analysis",
|
75
|
+
emotions_model: str = "bhadresh-savani/distilbert-base-uncased-emotion",
|
76
|
+
classification: str = 'sentiment-analysis',
|
77
|
+
levels: int = 5,
|
78
|
+
max_length: int = 512,
|
79
|
+
use_bertweet: bool = False,
|
80
|
+
use_bert: bool = False,
|
81
|
+
use_roberta: bool = False
|
82
|
+
):
|
83
|
+
"""
|
84
|
+
Initializes the ModelPrediction component.
|
85
|
+
|
86
|
+
Sets up the sentiment analysis and emotion detection models and tokenizers
|
87
|
+
based on the provided configurations.
|
88
|
+
"""
|
89
|
+
self.max_length = max_length
|
90
|
+
self.levels = levels
|
91
|
+
self.use_bertweet: bool = use_bertweet
|
92
|
+
if use_bert:
|
93
|
+
self.model = BertForSequenceClassification.from_pretrained(
|
94
|
+
sentiment_model,
|
95
|
+
num_labels=abs(levels),
|
96
|
+
ignore_mismatched_sizes=True
|
97
|
+
)
|
98
|
+
self.tokenizer = BertTokenizer.from_pretrained(sentiment_model)
|
99
|
+
elif use_roberta:
|
100
|
+
self.model = RobertaForSequenceClassification.from_pretrained(sentiment_model)
|
101
|
+
self.tokenizer = RobertaTokenizer.from_pretrained(sentiment_model)
|
102
|
+
elif use_bertweet:
|
103
|
+
self.model = AutoModelForSequenceClassification.from_pretrained(sentiment_model)
|
104
|
+
self.tokenizer = BertweetTokenizer.from_pretrained(sentiment_model)
|
105
|
+
else:
|
106
|
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
107
|
+
sentiment_model,
|
108
|
+
truncation=True,
|
109
|
+
max_length=self.max_length
|
110
|
+
# normalization=True
|
111
|
+
)
|
112
|
+
self.model = AutoModelForSequenceClassification.from_pretrained(
|
113
|
+
sentiment_model,
|
114
|
+
)
|
115
|
+
# And the Emotional Model:
|
116
|
+
self.emotional_model = AutoModelForSequenceClassification.from_pretrained(
|
117
|
+
emotions_model
|
118
|
+
)
|
119
|
+
self.emo_tokenizer = AutoTokenizer.from_pretrained(
|
120
|
+
emotions_model,
|
121
|
+
truncation=True,
|
122
|
+
max_length=self.max_length
|
123
|
+
)
|
124
|
+
self._device = self._get_device()
|
125
|
+
self.emotion_classifier = pipeline(
|
126
|
+
classification,
|
127
|
+
model=self.emotional_model,
|
128
|
+
tokenizer=self.emo_tokenizer,
|
129
|
+
device=self._device,
|
130
|
+
return_all_scores=True,
|
131
|
+
# ensure the pipeline is forcibly truncating on re-tokenize
|
132
|
+
truncation=True,
|
133
|
+
max_length=512
|
134
|
+
)
|
135
|
+
# sentiment classifier:
|
136
|
+
self.sentiment_classifier = pipeline(
|
137
|
+
classification,
|
138
|
+
model=self.model,
|
139
|
+
tokenizer=self.tokenizer,
|
140
|
+
device=self._device,
|
141
|
+
return_all_scores=True,
|
142
|
+
# ensure the pipeline is forcibly truncating on re-tokenize
|
143
|
+
truncation=True,
|
144
|
+
max_length=512
|
145
|
+
)
|
146
|
+
|
147
|
+
def _get_device(self, use_device: str = 'cpu', cuda_number: int = 0):
|
148
|
+
"""
|
149
|
+
Determines and returns the appropriate device (CPU, CUDA, MPS) for model execution.
|
150
|
+
|
151
|
+
Utilizes CUDA if available, then MPS, and defaults to CPU if neither is accessible or if specified.
|
152
|
+
|
153
|
+
Args:
|
154
|
+
use_device (str): Desired device to use ('cpu', 'cuda'). Defaults to 'cpu'.
|
155
|
+
cuda_number (int): CUDA device number to use, if CUDA is selected. Defaults to 0.
|
156
|
+
|
157
|
+
Returns:
|
158
|
+
torch.device: The device object representing the chosen execution environment.
|
159
|
+
"""
|
160
|
+
torch.backends.cudnn.deterministic = True
|
161
|
+
if torch.cuda.is_available():
|
162
|
+
# Use CUDA GPU if available
|
163
|
+
device = torch.device(f'cuda:{cuda_number}')
|
164
|
+
elif torch.backends.mps.is_available():
|
165
|
+
# Use CUDA Multi-Processing Service if available
|
166
|
+
device = torch.device("mps")
|
167
|
+
elif use_device == 'cuda':
|
168
|
+
device = torch.device(f'cuda:{cuda_number}')
|
169
|
+
else:
|
170
|
+
device = torch.device(use_device)
|
171
|
+
return device
|
172
|
+
|
173
|
+
def predict_emotion(self, text: str) -> dict:
|
174
|
+
"""
|
175
|
+
Predicts the emotion of the input text.
|
176
|
+
|
177
|
+
Handles text chunking for long texts to ensure they fit within the model's
|
178
|
+
token limit. Returns a dictionary containing emotion predictions.
|
179
|
+
|
180
|
+
Args:
|
181
|
+
text (str): The input text to predict emotion for.
|
182
|
+
|
183
|
+
Returns:
|
184
|
+
dict: A dictionary containing emotion predictions.
|
185
|
+
For example: {'emotions': [{'label': 'joy', 'score': 0.99}]}
|
186
|
+
Returns an empty dictionary if the input text is empty.
|
187
|
+
"""
|
188
|
+
if not text:
|
189
|
+
return {}
|
190
|
+
|
191
|
+
# Tokenize the text to check its length
|
192
|
+
encoded_text = self.emo_tokenizer.encode(
|
193
|
+
str(text),
|
194
|
+
truncation=False,
|
195
|
+
add_special_tokens=True
|
196
|
+
)
|
197
|
+
|
198
|
+
# Handle long texts by splitting them into chunks if needed
|
199
|
+
if len(encoded_text) > self.max_length:
|
200
|
+
text_chunks = self._split_text(text, self.max_length)
|
201
|
+
return self._predict_multiple_emotion_chunks(text_chunks)
|
202
|
+
|
203
|
+
# Use the pipeline to predict emotion for shorter texts
|
204
|
+
prediction = self.emotion_classifier(str(text))
|
205
|
+
|
206
|
+
if len(prediction) > 0 and isinstance(prediction[0], list): # When return_all_scores=True
|
207
|
+
emotions = [emo_pred for emo_pred in prediction[0] if emo_pred['score'] >= 0.5] # Apply threshold
|
208
|
+
if not emotions:
|
209
|
+
emotions.append({"label": "neutral", "score": 0})
|
210
|
+
return {'emotions': emotions}
|
211
|
+
|
212
|
+
return {}
|
213
|
+
|
214
|
+
def _predict_multiple_emotion_chunks(self, chunks: list) -> dict:
|
215
|
+
"""
|
216
|
+
Predicts emotions for multiple text chunks and aggregates the results.
|
217
|
+
|
218
|
+
Used for processing long texts that have been split into smaller chunks.
|
219
|
+
Aggregates emotion predictions from each chunk.
|
220
|
+
|
221
|
+
Args:
|
222
|
+
chunks (list): List of text chunks (strings) to predict emotions for.
|
223
|
+
|
224
|
+
Returns:
|
225
|
+
dict: A dictionary containing aggregated emotion predictions.
|
226
|
+
For example: {'emotions': [{'label': 'joy', 'score': 0.99}, {'label': 'surprise', 'score': 0.6}]}
|
227
|
+
Returns emotions with scores above a threshold (e.g., 0.5). If no emotion meets the threshold,
|
228
|
+
it returns neutral emotion with a score of 0.
|
229
|
+
"""
|
230
|
+
all_emotions = []
|
231
|
+
|
232
|
+
for chunk in chunks:
|
233
|
+
predictions = self.emotion_classifier(chunk)
|
234
|
+
if len(predictions) > 0 and isinstance(predictions[0], list):
|
235
|
+
# Filter predictions for significant emotions
|
236
|
+
emotions = [emo_pred for emo_pred in predictions[0] if emo_pred['score'] >= 0.5]
|
237
|
+
if emotions:
|
238
|
+
all_emotions.extend(emotions)
|
239
|
+
|
240
|
+
# Aggregate emotions across all chunks
|
241
|
+
if not all_emotions:
|
242
|
+
return {'emotions': [{"label": "neutral", "score": 0}]}
|
243
|
+
|
244
|
+
# Optionally, you can further process and aggregate emotions, but this returns them all
|
245
|
+
return {'emotions': all_emotions}
|
246
|
+
|
247
|
+
def _get_sentiment_map(self) -> dict:
|
248
|
+
"""
|
249
|
+
Provides a mapping of sentiment class indices to sentiment labels based on the configured levels.
|
250
|
+
|
251
|
+
Returns a dictionary that maps the numerical index of sentiment classes to
|
252
|
+
their corresponding descriptive labels (e.g., 'Positive', 'Negative', 'Neutral').
|
253
|
+
The mapping is determined by the `levels` attribute set during initialization.
|
254
|
+
|
255
|
+
Returns:
|
256
|
+
dict: A dictionary mapping sentiment class indices to sentiment labels.
|
257
|
+
For example, for 5 levels: {0: "Very Negative", 1: "Negative", 2: "Neutral", 3: "Positive", 4: "Very Positive"}.
|
258
|
+
""" # noqa
|
259
|
+
if self.levels == -3: # Inverted
|
260
|
+
return {
|
261
|
+
0: "Neutral",
|
262
|
+
1: "Positive",
|
263
|
+
2: "Negative",
|
264
|
+
}
|
265
|
+
elif self.levels == 5:
|
266
|
+
return {
|
267
|
+
0: "Very Negative",
|
268
|
+
1: "Negative",
|
269
|
+
2: "Neutral",
|
270
|
+
3: "Positive",
|
271
|
+
4: "Very Positive"
|
272
|
+
}
|
273
|
+
elif self.levels == 3:
|
274
|
+
return {
|
275
|
+
0: "Negative",
|
276
|
+
1: "Neutral",
|
277
|
+
2: "Positive",
|
278
|
+
}
|
279
|
+
else:
|
280
|
+
return {
|
281
|
+
0: "Negative",
|
282
|
+
1: "Positive",
|
283
|
+
}
|
284
|
+
|
285
|
+
def predict_sentiment(self, text: str) -> dict:
|
286
|
+
"""
|
287
|
+
Predicts the sentiment of the input text.
|
288
|
+
|
289
|
+
Utilizes the sentiment analysis pipeline to classify the text and returns
|
290
|
+
sentiment scores and the predicted sentiment label. Handles text chunking
|
291
|
+
for texts exceeding the maximum token length.
|
292
|
+
|
293
|
+
Args:
|
294
|
+
text (str): The text to analyze for sentiment.
|
295
|
+
|
296
|
+
Returns:
|
297
|
+
dict: A dictionary containing sentiment analysis results.
|
298
|
+
Includes 'score' (list of sentiment scores) and 'predicted_sentiment' (string label).
|
299
|
+
Returns None if the input text is empty.
|
300
|
+
"""
|
301
|
+
if not text:
|
302
|
+
return None
|
303
|
+
if isinstance(text, float):
|
304
|
+
text = str(text)
|
305
|
+
|
306
|
+
# Tokenize the text to check its length
|
307
|
+
encoded_text = self.tokenizer.encode(text, truncation=False, add_special_tokens=True)
|
308
|
+
|
309
|
+
# Handle long texts by splitting them into chunks if needed
|
310
|
+
if len(encoded_text) > self.max_length:
|
311
|
+
text_chunks = self._split_text(text, self.max_length)
|
312
|
+
return self._predict_multiple_chunks_pipeline(text_chunks)
|
313
|
+
|
314
|
+
# Use the pipeline to predict sentiment for shorter texts
|
315
|
+
predictions = self.sentiment_classifier(text)
|
316
|
+
|
317
|
+
# Since return_all_scores=True, predictions is a list of lists
|
318
|
+
# Each inner list contains dicts with 'label' and 'score'
|
319
|
+
scores = predictions[0]
|
320
|
+
|
321
|
+
# Extract scores and labels
|
322
|
+
probabilities = [item['score'] for item in scores]
|
323
|
+
labels = [item['label'] for item in scores]
|
324
|
+
|
325
|
+
# Check if labels are descriptive (e.g., 'positive', 'neutral', 'negative')
|
326
|
+
if all(label.lower() in ['positive', 'neutral', 'negative'] for label in labels):
|
327
|
+
# If labels are descriptive, no need for custom mapping
|
328
|
+
predicted_label = max(scores, key=lambda x: x['score'])['label']
|
329
|
+
return {
|
330
|
+
"score": probabilities,
|
331
|
+
"predicted_sentiment": predicted_label.capitalize()
|
332
|
+
}
|
333
|
+
|
334
|
+
# Map labels to indices
|
335
|
+
label_to_index = {}
|
336
|
+
for _, label in enumerate(labels):
|
337
|
+
if label.startswith("LABEL_"):
|
338
|
+
label_idx = int(label.replace("LABEL_", ""))
|
339
|
+
label_to_index[label] = label_idx
|
340
|
+
if not label_to_index:
|
341
|
+
label_to_index = {label: idx for idx, label in enumerate(labels)}
|
342
|
+
|
343
|
+
predicted_label = max(scores, key=lambda x: x['score'])['label']
|
344
|
+
predicted_class = label_to_index[predicted_label]
|
345
|
+
|
346
|
+
# Map predicted_class to sentiment
|
347
|
+
sentiment_map = self._get_sentiment_map()
|
348
|
+
|
349
|
+
predicted_sentiment = sentiment_map.get(predicted_class, predicted_label)
|
350
|
+
|
351
|
+
return {
|
352
|
+
"score": probabilities,
|
353
|
+
"predicted_sentiment": predicted_sentiment
|
354
|
+
}
|
355
|
+
|
356
|
+
def _predict_multiple_chunks_pipeline(self, chunks: list) -> dict:
|
357
|
+
"""
|
358
|
+
Predicts sentiment for multiple text chunks using the pipeline and aggregates the results.
|
359
|
+
|
360
|
+
Averages sentiment probabilities across all chunks to determine the overall sentiment.
|
361
|
+
This method is specifically designed for handling long texts split into smaller processable chunks.
|
362
|
+
|
363
|
+
Args:
|
364
|
+
chunks (list): A list of text chunks (strings) to analyze for sentiment.
|
365
|
+
|
366
|
+
Returns:
|
367
|
+
dict: A dictionary containing the aggregated sentiment analysis results.
|
368
|
+
Includes 'score' (list of averaged sentiment probabilities) and 'predicted_sentiment' (string label
|
369
|
+
of the overall predicted sentiment).
|
370
|
+
""" # noqa
|
371
|
+
all_probabilities = []
|
372
|
+
for chunk in chunks:
|
373
|
+
predictions = self.sentiment_classifier(chunk)
|
374
|
+
scores = predictions[0]
|
375
|
+
probabilities = [item['score'] for item in scores]
|
376
|
+
all_probabilities.append(torch.tensor(probabilities))
|
377
|
+
|
378
|
+
# Averaging probabilities across chunks
|
379
|
+
avg_probabilities = torch.mean(torch.stack(all_probabilities), dim=0)
|
380
|
+
predicted_class = torch.argmax(avg_probabilities).item()
|
381
|
+
|
382
|
+
sentiment_map = self._get_sentiment_map()
|
383
|
+
predicted_sentiment = sentiment_map.get(predicted_class, "Unknown")
|
384
|
+
|
385
|
+
return {
|
386
|
+
"score": avg_probabilities.tolist(),
|
387
|
+
"predicted_sentiment": predicted_sentiment
|
388
|
+
}
|
389
|
+
|
390
|
+
def _split_text(self, text: str, max_length: int) -> List[str]:
|
391
|
+
"""
|
392
|
+
Splits input text into processable chunks based on sentence boundaries and token count.
|
393
|
+
|
394
|
+
Ensures that each chunk does not exceed the maximum token length limit of the model.
|
395
|
+
It attempts to split text at sentence boundaries to maintain semantic integrity where possible.
|
396
|
+
Handles cases where sentences themselves are too long by further splitting them.
|
397
|
+
|
398
|
+
Args:
|
399
|
+
text (str): The input text to be split.
|
400
|
+
max_length (int): The maximum token length allowed for each chunk.
|
401
|
+
|
402
|
+
Returns:
|
403
|
+
List[str]: A list of text chunks, each guaranteed to be within the token limit.
|
404
|
+
"""
|
405
|
+
chunks = []
|
406
|
+
current_chunk = []
|
407
|
+
split_by_sentences = text.split(". ")
|
408
|
+
|
409
|
+
for sentence in split_by_sentences:
|
410
|
+
sentence_tokens = self.tokenizer.encode(sentence, add_special_tokens=False)
|
411
|
+
# +1 for potential separator
|
412
|
+
if len(current_chunk) + len(sentence_tokens) + 1 <= max_length:
|
413
|
+
current_chunk.extend(sentence_tokens)
|
414
|
+
# Add a separator between sentences
|
415
|
+
current_chunk.append(self.tokenizer.sep_token_id)
|
416
|
+
else:
|
417
|
+
# Sentence is too long, add current chunk
|
418
|
+
if current_chunk:
|
419
|
+
chunks.append(self.tokenizer.decode(current_chunk))
|
420
|
+
current_chunk = []
|
421
|
+
# Handle long sentence: split it into smaller parts
|
422
|
+
temp_sentence_chunks = []
|
423
|
+
temp_sentence_chunks.extend(
|
424
|
+
sentence_tokens[i: i + max_length]
|
425
|
+
for i in range(0, len(sentence_tokens), max_length)
|
426
|
+
)
|
427
|
+
# If there are sentences shorter than the max_length
|
428
|
+
if len(temp_sentence_chunks) > 1:
|
429
|
+
for i, chunk in enumerate(temp_sentence_chunks):
|
430
|
+
if i < len(temp_sentence_chunks) - 1:
|
431
|
+
chunks.append(self.tokenizer.decode(chunk))
|
432
|
+
else:
|
433
|
+
current_chunk.extend(chunk)
|
434
|
+
else:
|
435
|
+
current_chunk.extend(sentence_tokens)
|
436
|
+
|
437
|
+
if current_chunk:
|
438
|
+
current_chunk.append(self.tokenizer.sep_token_id)
|
439
|
+
|
440
|
+
if current_chunk:
|
441
|
+
chunks.append(self.tokenizer.decode(current_chunk))
|
442
|
+
|
443
|
+
# Remove extra sentence separators that are not required
|
444
|
+
for i, chunk in enumerate(chunks):
|
445
|
+
if chunk.endswith(self.tokenizer.sep_token):
|
446
|
+
chunks[i] = chunk[:-len(self.tokenizer.sep_token)]
|
447
|
+
|
448
|
+
return chunks
|
449
|
+
|
450
|
+
def split_into_sentences(self, text):
|
451
|
+
"""
|
452
|
+
Splits a text into sentences using NLTK's sentence tokenizer.
|
453
|
+
|
454
|
+
Leverages nltk.tokenize.sent_tokenize for robust sentence splitting,
|
455
|
+
handling various sentence terminators and abbreviations.
|
456
|
+
|
457
|
+
Args:
|
458
|
+
text (str): The input text to be split into sentences.
|
459
|
+
|
460
|
+
Returns:
|
461
|
+
list: A list of strings, where each string is a sentence from the input text.
|
462
|
+
"""
|
463
|
+
return sent_tokenize(text)
|
464
|
+
|
465
|
+
def aggregate_sentiments(self, sentiments, levels):
|
466
|
+
"""
|
467
|
+
Aggregates sentiment predictions from multiple texts to produce a single overall sentiment.
|
468
|
+
|
469
|
+
Calculates the average sentiment score across a list of sentiment predictions
|
470
|
+
and determines the overall predicted sentiment based on these averages.
|
471
|
+
|
472
|
+
Args:
|
473
|
+
sentiments (list): A list of dictionaries, each containing sentiment prediction results
|
474
|
+
for a text (output from `predict_sentiment`).
|
475
|
+
levels (int): The number of sentiment levels used in the analysis, determining the sentiment map.
|
476
|
+
|
477
|
+
Returns:
|
478
|
+
str: The aggregated predicted sentiment label (e.g., 'Positive', 'Negative', 'Neutral').
|
479
|
+
"""
|
480
|
+
# Initialize an array to hold cumulative scores
|
481
|
+
cumulative_scores = torch.zeros(levels)
|
482
|
+
for sentiment in sentiments:
|
483
|
+
scores = torch.tensor(sentiment['score'][0])
|
484
|
+
cumulative_scores += scores
|
485
|
+
|
486
|
+
# Calculate average scores
|
487
|
+
avg_scores = cumulative_scores / len(sentiments)
|
488
|
+
predicted_class = torch.argmax(avg_scores).item()
|
489
|
+
|
490
|
+
if levels == 5:
|
491
|
+
sentiment_map = {
|
492
|
+
0: "Very Negative",
|
493
|
+
1: "Negative",
|
494
|
+
2: "Neutral",
|
495
|
+
3: "Positive",
|
496
|
+
4: "Very Positive"
|
497
|
+
}
|
498
|
+
elif levels == 3:
|
499
|
+
sentiment_map = {
|
500
|
+
0: "Negative",
|
501
|
+
1: "Neutral",
|
502
|
+
2: "Positive",
|
503
|
+
}
|
504
|
+
else:
|
505
|
+
sentiment_map = {
|
506
|
+
0: "Negative",
|
507
|
+
1: "Positive",
|
508
|
+
}
|
509
|
+
|
510
|
+
return sentiment_map[predicted_class]
|
511
|
+
|
512
|
+
|
513
|
+
class SentimentAnalysis(FlowComponent):
|
514
|
+
"""
|
515
|
+
Applies sentiment analysis and emotion detection to a DataFrame of text data.
|
516
|
+
|
517
|
+
This component processes a DataFrame, applying Hugging Face Transformer models
|
518
|
+
to analyze the sentiment and emotions expressed in a specified text column.
|
519
|
+
It leverages the `ModelPrediction` class to perform the actual predictions
|
520
|
+
and integrates these results back into the DataFrame.
|
521
|
+
|
522
|
+
Properties:
|
523
|
+
text_column (str): The name of the DataFrame column containing the text to analyze.
|
524
|
+
Defaults to 'text'.
|
525
|
+
sentiment_model (str): Model name for sentiment analysis.
|
526
|
+
Defaults to 'tabularisai/robust-sentiment-analysis'.
|
527
|
+
emotions_model (str): Model name for emotion detection.
|
528
|
+
Defaults to 'cardiffnlp/twitter-roberta-base-emotion'.
|
529
|
+
pipeline_classification (str): Classification type for the pipeline (e.g., 'sentiment-analysis').
|
530
|
+
Defaults to 'sentiment-analysis'.
|
531
|
+
with_average (bool): Boolean to indicate if sentiment should be averaged across rows (if applicable).
|
532
|
+
Defaults to True.
|
533
|
+
sentiment_levels (int): Number of sentiment levels (2, 3, or 5). Default is 5.
|
534
|
+
use_bert (bool): Boolean to use BERT model for sentiment analysis. Defaults to False.
|
535
|
+
use_roberta (bool): Boolean to use RoBERTa model for sentiment analysis. Defaults to False.
|
536
|
+
use_bertweet (bool): Boolean to use BERTweet model for sentiment analysis. Defaults to False.
|
537
|
+
|
538
|
+
Returns:
|
539
|
+
DataFrame: The input DataFrame augmented with new columns for sentiment scores,
|
540
|
+
predicted sentiment, emotion scores, and predicted emotion.
|
541
|
+
Specifically, it adds: 'sentiment_scores', 'sentiment_score', 'emotions_score',
|
542
|
+
'predicted_emotion', and 'predicted_sentiment' columns.
|
543
|
+
|
544
|
+
Raises:
|
545
|
+
ComponentError: If input data is not a Pandas DataFrame or if the text column is not found.
|
546
|
+
"""
|
547
|
+
def __init__(
|
548
|
+
self,
|
549
|
+
loop: asyncio.AbstractEventLoop = None,
|
550
|
+
job: Callable = None,
|
551
|
+
stat: Callable = None,
|
552
|
+
**kwargs,
|
553
|
+
):
|
554
|
+
"""Extract sentiment analysis."""
|
555
|
+
self.text_column: str = kwargs.pop('text_column', 'text')
|
556
|
+
self._sentiment_model: str = kwargs.pop(
|
557
|
+
'sentiment_model',
|
558
|
+
'tabularisai/robust-sentiment-analysis'
|
559
|
+
)
|
560
|
+
self._emotion_model: str = kwargs.pop(
|
561
|
+
'emotions_model',
|
562
|
+
"cardiffnlp/twitter-roberta-base-emotion"
|
563
|
+
)
|
564
|
+
self._classification: str = kwargs.pop(
|
565
|
+
'pipeline_classification',
|
566
|
+
'sentiment-analysis'
|
567
|
+
)
|
568
|
+
self.with_average: bool = kwargs.pop('with_average', True)
|
569
|
+
self.sentiment_levels: int = kwargs.pop('sentiment_levels', 5)
|
570
|
+
self._use_bert: bool = kwargs.pop('use_bert', False)
|
571
|
+
self._use_roberta: bool = kwargs.pop('use_roberta', False)
|
572
|
+
self._use_bertweet: bool = kwargs.pop('use_bertweet', False)
|
573
|
+
self.chunk_size: int = 100
|
574
|
+
self.max_workers: int = 5
|
575
|
+
super().__init__(loop=loop, job=job, stat=stat, **kwargs)
|
576
|
+
|
577
|
+
async def start(self, **kwargs):
|
578
|
+
if self.previous:
|
579
|
+
self.data = self.input
|
580
|
+
else:
|
581
|
+
raise ComponentError(
|
582
|
+
"Data Not Found",
|
583
|
+
status=404
|
584
|
+
)
|
585
|
+
if not isinstance(self.data, pd.DataFrame):
|
586
|
+
raise ComponentError(
|
587
|
+
"Incompatible Data, we need a Pandas Dataframe",
|
588
|
+
status=404
|
589
|
+
)
|
590
|
+
# instanciate the model:
|
591
|
+
self._predictor = ModelPrediction(
|
592
|
+
sentiment_model=self._sentiment_model,
|
593
|
+
emotions_model=self._emotion_model,
|
594
|
+
classification=self._classification,
|
595
|
+
max_length=512,
|
596
|
+
levels=self.sentiment_levels,
|
597
|
+
use_bertweet=self._use_bertweet,
|
598
|
+
use_bert=self._use_bert,
|
599
|
+
use_roberta=self._use_roberta
|
600
|
+
)
|
601
|
+
return True
|
602
|
+
|
603
|
+
async def close(self):
|
604
|
+
pass
|
605
|
+
|
606
|
+
def _analyze_chunk(self, chunk: pd.DataFrame):
|
607
|
+
"""
|
608
|
+
Analyzes a chunk of the DataFrame to add sentiment and emotion predictions.
|
609
|
+
|
610
|
+
This method is designed to be run in parallel using a thread pool to process
|
611
|
+
DataFrame chunks. For each chunk, it applies sentiment and emotion prediction
|
612
|
+
models to the text data and adds the results as new columns in the DataFrame.
|
613
|
+
|
614
|
+
Args:
|
615
|
+
chunk (pd.DataFrame): A subset of the input DataFrame to be analyzed.
|
616
|
+
|
617
|
+
Returns:
|
618
|
+
pd.DataFrame: The processed DataFrame chunk, now including sentiment and emotion analysis columns.
|
619
|
+
""" # noqa
|
620
|
+
# instanciate the model:
|
621
|
+
predictor = ModelPrediction(
|
622
|
+
sentiment_model=self._sentiment_model,
|
623
|
+
emotions_model=self._emotion_model,
|
624
|
+
classification=self._classification,
|
625
|
+
max_length=512,
|
626
|
+
levels=self.sentiment_levels,
|
627
|
+
use_bertweet=self._use_bertweet,
|
628
|
+
use_bert=self._use_bert,
|
629
|
+
use_roberta=self._use_roberta
|
630
|
+
)
|
631
|
+
chunk['sentiment'] = chunk[self.text_column].apply(
|
632
|
+
predictor.predict_sentiment
|
633
|
+
)
|
634
|
+
chunk['emotions'] = chunk[self.text_column].apply(
|
635
|
+
predictor.predict_emotion
|
636
|
+
)
|
637
|
+
with contextlib.suppress(Exception):
|
638
|
+
torch.cuda.empty_cache()
|
639
|
+
return chunk
|
640
|
+
|
641
|
+
async def run(self):
|
642
|
+
"""
|
643
|
+
Executes the sentiment analysis and emotion detection process on the input DataFrame.
|
644
|
+
|
645
|
+
Splits the DataFrame into chunks and processes them in parallel using a thread pool.
|
646
|
+
After processing, it concatenates the results, extracts relevant prediction scores and labels,
|
647
|
+
and adds them as new columns to the DataFrame.
|
648
|
+
|
649
|
+
Returns:
|
650
|
+
pd.DataFrame: The DataFrame with added sentiment and emotion analysis results.
|
651
|
+
"""
|
652
|
+
# Split the dataframe into chunks
|
653
|
+
num_chunks = np.ceil(len(self.data) / self.chunk_size).astype(int)
|
654
|
+
chunks = np.array_split(self.data, num_chunks)
|
655
|
+
|
656
|
+
# Run analysis in parallel using a thread pool
|
657
|
+
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
658
|
+
processed_chunks = list(executor.map(self._analyze_chunk, chunks))
|
659
|
+
|
660
|
+
# Concatenate all the chunks back into a single DataFrame
|
661
|
+
df = pd.concat(processed_chunks)
|
662
|
+
# extract the predicted sentiment and emotion
|
663
|
+
try:
|
664
|
+
# Extract 'sentiment_score' from 'sentiment' column (e.g., first score in the list)
|
665
|
+
df['sentiment_scores'] = df['sentiment'].apply(
|
666
|
+
lambda x: x.get('score', []) if x and isinstance(x.get('score', []), list) else []
|
667
|
+
)
|
668
|
+
# Max value of sentiments
|
669
|
+
df['sentiment_score'] = df['sentiment_scores'].apply(
|
670
|
+
lambda x: max(x) if isinstance(x, list) and len(x) > 0 else None
|
671
|
+
)
|
672
|
+
# Extract 'emotions_score' from 'emotions' column (e.g., score from the first emotion)
|
673
|
+
df['emotions_score'] = df['emotions'].apply(
|
674
|
+
lambda x: x.get('emotions', [{'score': None}])[0]['score'] if x and isinstance(x.get('emotions', []), list) and len(x['emotions']) > 0 else None # noqa
|
675
|
+
)
|
676
|
+
# Expand the 'emotions' and 'sentiments' column to extract the label
|
677
|
+
df['predicted_emotion'] = df['emotions'].apply(
|
678
|
+
lambda x: x.get('emotions', [{'label': None}])[0]['label'] if x and isinstance(x.get('emotions', []), list) and len(x.get('emotions', [])) > 0 else None # noqa
|
679
|
+
)
|
680
|
+
df['predicted_sentiment'] = df['sentiment'].apply(
|
681
|
+
lambda x: x.get('predicted_sentiment', None) if x else None
|
682
|
+
)
|
683
|
+
except Exception as e:
|
684
|
+
print(e)
|
685
|
+
pass
|
686
|
+
self._result = df
|
687
|
+
if self._debug is True:
|
688
|
+
print("== DATA PREVIEW ==")
|
689
|
+
print(self._result)
|
690
|
+
print()
|
691
|
+
print("::: Printing Column Information === ")
|
692
|
+
for column, t in df.dtypes.items():
|
693
|
+
print(column, "->", t, "->", df[column].iloc[0])
|
694
|
+
return self._result
|