flowtask 5.8.4__cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowtask/__init__.py +93 -0
- flowtask/__main__.py +38 -0
- flowtask/bots/__init__.py +6 -0
- flowtask/bots/check.py +93 -0
- flowtask/bots/codebot.py +51 -0
- flowtask/components/ASPX.py +148 -0
- flowtask/components/AddDataset.py +352 -0
- flowtask/components/Amazon.py +523 -0
- flowtask/components/AutoTask.py +314 -0
- flowtask/components/Azure.py +80 -0
- flowtask/components/AzureUsers.py +106 -0
- flowtask/components/BaseAction.py +91 -0
- flowtask/components/BaseLoop.py +198 -0
- flowtask/components/BestBuy.py +800 -0
- flowtask/components/CSVToGCS.py +120 -0
- flowtask/components/CompanyScraper/__init__.py +1 -0
- flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
- flowtask/components/CompanyScraper/parsers/base.py +102 -0
- flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
- flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
- flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
- flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
- flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
- flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
- flowtask/components/CompanyScraper/scrapper.py +1054 -0
- flowtask/components/CopyTo.py +177 -0
- flowtask/components/CopyToBigQuery.py +243 -0
- flowtask/components/CopyToMongoDB.py +291 -0
- flowtask/components/CopyToPg.py +609 -0
- flowtask/components/CopyToRethink.py +207 -0
- flowtask/components/CreateGCSBucket.py +102 -0
- flowtask/components/CreateReport/CreateReport.py +228 -0
- flowtask/components/CreateReport/__init__.py +9 -0
- flowtask/components/CreateReport/charts/__init__.py +15 -0
- flowtask/components/CreateReport/charts/bar.py +51 -0
- flowtask/components/CreateReport/charts/base.py +66 -0
- flowtask/components/CreateReport/charts/pie.py +64 -0
- flowtask/components/CreateReport/utils.py +9 -0
- flowtask/components/CustomerSatisfaction.py +196 -0
- flowtask/components/DataInput.py +200 -0
- flowtask/components/DateList.py +255 -0
- flowtask/components/DbClient.py +163 -0
- flowtask/components/DialPad.py +146 -0
- flowtask/components/DocumentDBQuery.py +200 -0
- flowtask/components/DownloadFrom.py +371 -0
- flowtask/components/DownloadFromD2L.py +113 -0
- flowtask/components/DownloadFromFTP.py +181 -0
- flowtask/components/DownloadFromIMAP.py +315 -0
- flowtask/components/DownloadFromS3.py +198 -0
- flowtask/components/DownloadFromSFTP.py +265 -0
- flowtask/components/DownloadFromSharepoint.py +110 -0
- flowtask/components/DownloadFromSmartSheet.py +114 -0
- flowtask/components/DownloadS3File.py +229 -0
- flowtask/components/Dummy.py +59 -0
- flowtask/components/DuplicatePhoto.py +411 -0
- flowtask/components/EmployeeEvaluation.py +237 -0
- flowtask/components/ExecuteSQL.py +323 -0
- flowtask/components/ExtractHTML.py +178 -0
- flowtask/components/FileBase.py +178 -0
- flowtask/components/FileCopy.py +181 -0
- flowtask/components/FileDelete.py +82 -0
- flowtask/components/FileExists.py +146 -0
- flowtask/components/FileIteratorDelete.py +112 -0
- flowtask/components/FileList.py +194 -0
- flowtask/components/FileOpen.py +75 -0
- flowtask/components/FileRead.py +120 -0
- flowtask/components/FileRename.py +106 -0
- flowtask/components/FilterIf.py +284 -0
- flowtask/components/FilterRows/FilterRows.py +200 -0
- flowtask/components/FilterRows/__init__.py +10 -0
- flowtask/components/FilterRows/functions.py +4 -0
- flowtask/components/GCSToBigQuery.py +103 -0
- flowtask/components/GoogleA4.py +150 -0
- flowtask/components/GoogleGeoCoding.py +344 -0
- flowtask/components/GooglePlaces.py +315 -0
- flowtask/components/GoogleSearch.py +539 -0
- flowtask/components/HTTPClient.py +268 -0
- flowtask/components/ICIMS.py +146 -0
- flowtask/components/IF.py +179 -0
- flowtask/components/IcimsFolderCopy.py +173 -0
- flowtask/components/ImageFeatures/__init__.py +5 -0
- flowtask/components/ImageFeatures/process.py +233 -0
- flowtask/components/IteratorBase.py +251 -0
- flowtask/components/LangchainLoader/__init__.py +5 -0
- flowtask/components/LangchainLoader/loader.py +194 -0
- flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
- flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
- flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
- flowtask/components/LangchainLoader/loaders/docx.py +91 -0
- flowtask/components/LangchainLoader/loaders/html.py +119 -0
- flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
- flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
- flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
- flowtask/components/LangchainLoader/loaders/qa.py +67 -0
- flowtask/components/LangchainLoader/loaders/txt.py +55 -0
- flowtask/components/LeadIQ.py +650 -0
- flowtask/components/Loop.py +253 -0
- flowtask/components/Lowes.py +334 -0
- flowtask/components/MS365Usage.py +156 -0
- flowtask/components/MSTeamsMessages.py +320 -0
- flowtask/components/MarketClustering.py +1051 -0
- flowtask/components/MergeFiles.py +362 -0
- flowtask/components/MilvusOutput.py +87 -0
- flowtask/components/NearByStores.py +175 -0
- flowtask/components/NetworkNinja/__init__.py +6 -0
- flowtask/components/NetworkNinja/models/__init__.py +52 -0
- flowtask/components/NetworkNinja/models/abstract.py +177 -0
- flowtask/components/NetworkNinja/models/account.py +39 -0
- flowtask/components/NetworkNinja/models/client.py +19 -0
- flowtask/components/NetworkNinja/models/district.py +14 -0
- flowtask/components/NetworkNinja/models/events.py +101 -0
- flowtask/components/NetworkNinja/models/forms.py +499 -0
- flowtask/components/NetworkNinja/models/market.py +16 -0
- flowtask/components/NetworkNinja/models/organization.py +34 -0
- flowtask/components/NetworkNinja/models/photos.py +125 -0
- flowtask/components/NetworkNinja/models/project.py +44 -0
- flowtask/components/NetworkNinja/models/region.py +28 -0
- flowtask/components/NetworkNinja/models/store.py +203 -0
- flowtask/components/NetworkNinja/models/user.py +151 -0
- flowtask/components/NetworkNinja/router.py +854 -0
- flowtask/components/Odoo.py +175 -0
- flowtask/components/OdooInjector.py +192 -0
- flowtask/components/OpenFromXML.py +126 -0
- flowtask/components/OpenWeather.py +41 -0
- flowtask/components/OpenWithBase.py +616 -0
- flowtask/components/OpenWithPandas.py +715 -0
- flowtask/components/PGPDecrypt.py +199 -0
- flowtask/components/PandasIterator.py +187 -0
- flowtask/components/PandasToFile.py +189 -0
- flowtask/components/Paradox.py +339 -0
- flowtask/components/ParamIterator.py +117 -0
- flowtask/components/ParseHTML.py +84 -0
- flowtask/components/PlacerStores.py +249 -0
- flowtask/components/Pokemon.py +507 -0
- flowtask/components/PositiveBot.py +62 -0
- flowtask/components/PowerPointSlide.py +400 -0
- flowtask/components/PrintMessage.py +127 -0
- flowtask/components/ProductCompetitors/__init__.py +5 -0
- flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
- flowtask/components/ProductCompetitors/parsers/base.py +72 -0
- flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
- flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
- flowtask/components/ProductCompetitors/scrapper.py +155 -0
- flowtask/components/ProductCompliant.py +169 -0
- flowtask/components/ProductInfo/__init__.py +1 -0
- flowtask/components/ProductInfo/parsers/__init__.py +5 -0
- flowtask/components/ProductInfo/parsers/base.py +83 -0
- flowtask/components/ProductInfo/parsers/brother.py +97 -0
- flowtask/components/ProductInfo/parsers/canon.py +167 -0
- flowtask/components/ProductInfo/parsers/epson.py +118 -0
- flowtask/components/ProductInfo/parsers/hp.py +131 -0
- flowtask/components/ProductInfo/parsers/samsung.py +97 -0
- flowtask/components/ProductInfo/scraper.py +319 -0
- flowtask/components/ProductPricing.py +118 -0
- flowtask/components/QS.py +261 -0
- flowtask/components/QSBase.py +201 -0
- flowtask/components/QueryIterator.py +273 -0
- flowtask/components/QueryToInsert.py +327 -0
- flowtask/components/QueryToPandas.py +432 -0
- flowtask/components/RESTClient.py +195 -0
- flowtask/components/RethinkDBQuery.py +189 -0
- flowtask/components/Rsync.py +74 -0
- flowtask/components/RunSSH.py +59 -0
- flowtask/components/RunShell.py +71 -0
- flowtask/components/SalesForce.py +20 -0
- flowtask/components/SaveImageBank/__init__.py +257 -0
- flowtask/components/SchedulingVisits.py +592 -0
- flowtask/components/ScrapPage.py +216 -0
- flowtask/components/ScrapSearch.py +79 -0
- flowtask/components/SendNotify.py +257 -0
- flowtask/components/SentimentAnalysis.py +694 -0
- flowtask/components/ServiceScrapper/__init__.py +5 -0
- flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
- flowtask/components/ServiceScrapper/parsers/base.py +94 -0
- flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
- flowtask/components/ServiceScrapper/scrapper.py +199 -0
- flowtask/components/SetVariables.py +156 -0
- flowtask/components/SubTask.py +182 -0
- flowtask/components/SuiteCRM.py +48 -0
- flowtask/components/Switch.py +175 -0
- flowtask/components/TableBase.py +148 -0
- flowtask/components/TableDelete.py +312 -0
- flowtask/components/TableInput.py +143 -0
- flowtask/components/TableOutput/TableOutput.py +384 -0
- flowtask/components/TableOutput/__init__.py +3 -0
- flowtask/components/TableSchema.py +534 -0
- flowtask/components/Target.py +223 -0
- flowtask/components/ThumbnailGenerator.py +156 -0
- flowtask/components/ToPandas.py +67 -0
- flowtask/components/TransformRows/TransformRows.py +507 -0
- flowtask/components/TransformRows/__init__.py +9 -0
- flowtask/components/TransformRows/functions.py +559 -0
- flowtask/components/TransposeRows.py +176 -0
- flowtask/components/UPCDatabase.py +86 -0
- flowtask/components/UnGzip.py +171 -0
- flowtask/components/Uncompress.py +172 -0
- flowtask/components/UniqueRows.py +126 -0
- flowtask/components/Unzip.py +107 -0
- flowtask/components/UpdateOperationalVars.py +147 -0
- flowtask/components/UploadTo.py +299 -0
- flowtask/components/UploadToS3.py +136 -0
- flowtask/components/UploadToSFTP.py +160 -0
- flowtask/components/UploadToSharepoint.py +205 -0
- flowtask/components/UserFunc.py +122 -0
- flowtask/components/VivaTracker.py +140 -0
- flowtask/components/WSDLClient.py +123 -0
- flowtask/components/Wait.py +18 -0
- flowtask/components/Walmart.py +199 -0
- flowtask/components/Workplace.py +134 -0
- flowtask/components/XMLToPandas.py +267 -0
- flowtask/components/Zammad/__init__.py +41 -0
- flowtask/components/Zammad/models.py +0 -0
- flowtask/components/ZoomInfoScraper.py +409 -0
- flowtask/components/__init__.py +104 -0
- flowtask/components/abstract.py +18 -0
- flowtask/components/flow.py +530 -0
- flowtask/components/google.py +335 -0
- flowtask/components/group.py +221 -0
- flowtask/components/py.typed +0 -0
- flowtask/components/reviewscrap.py +132 -0
- flowtask/components/tAutoincrement.py +117 -0
- flowtask/components/tConcat.py +109 -0
- flowtask/components/tExplode.py +119 -0
- flowtask/components/tFilter.py +184 -0
- flowtask/components/tGroup.py +236 -0
- flowtask/components/tJoin.py +270 -0
- flowtask/components/tMap/__init__.py +9 -0
- flowtask/components/tMap/functions.py +54 -0
- flowtask/components/tMap/tMap.py +450 -0
- flowtask/components/tMelt.py +112 -0
- flowtask/components/tMerge.py +114 -0
- flowtask/components/tOrder.py +93 -0
- flowtask/components/tPandas.py +94 -0
- flowtask/components/tPivot.py +71 -0
- flowtask/components/tPluckCols.py +76 -0
- flowtask/components/tUnnest.py +82 -0
- flowtask/components/user.py +401 -0
- flowtask/conf.py +457 -0
- flowtask/download.py +102 -0
- flowtask/events/__init__.py +11 -0
- flowtask/events/events/__init__.py +20 -0
- flowtask/events/events/abstract.py +95 -0
- flowtask/events/events/alerts/__init__.py +362 -0
- flowtask/events/events/alerts/colfunctions.py +131 -0
- flowtask/events/events/alerts/functions.py +158 -0
- flowtask/events/events/dummy.py +12 -0
- flowtask/events/events/exec.py +124 -0
- flowtask/events/events/file/__init__.py +7 -0
- flowtask/events/events/file/base.py +51 -0
- flowtask/events/events/file/copy.py +23 -0
- flowtask/events/events/file/delete.py +16 -0
- flowtask/events/events/interfaces/__init__.py +9 -0
- flowtask/events/events/interfaces/client.py +67 -0
- flowtask/events/events/interfaces/credentials.py +28 -0
- flowtask/events/events/interfaces/notifications.py +58 -0
- flowtask/events/events/jira.py +122 -0
- flowtask/events/events/log.py +26 -0
- flowtask/events/events/logerr.py +52 -0
- flowtask/events/events/notify.py +59 -0
- flowtask/events/events/notify_event.py +160 -0
- flowtask/events/events/publish.py +54 -0
- flowtask/events/events/sendfile.py +104 -0
- flowtask/events/events/task.py +97 -0
- flowtask/events/events/teams.py +98 -0
- flowtask/events/events/webhook.py +58 -0
- flowtask/events/manager.py +287 -0
- flowtask/exceptions.c +39393 -0
- flowtask/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/extensions/__init__.py +3 -0
- flowtask/extensions/abstract.py +82 -0
- flowtask/extensions/logging/__init__.py +65 -0
- flowtask/hooks/__init__.py +9 -0
- flowtask/hooks/actions/__init__.py +22 -0
- flowtask/hooks/actions/abstract.py +66 -0
- flowtask/hooks/actions/dummy.py +23 -0
- flowtask/hooks/actions/jira.py +74 -0
- flowtask/hooks/actions/rest.py +320 -0
- flowtask/hooks/actions/sampledata.py +37 -0
- flowtask/hooks/actions/sensor.py +23 -0
- flowtask/hooks/actions/task.py +9 -0
- flowtask/hooks/actions/ticket.py +37 -0
- flowtask/hooks/actions/zammad.py +55 -0
- flowtask/hooks/hook.py +62 -0
- flowtask/hooks/models.py +17 -0
- flowtask/hooks/service.py +187 -0
- flowtask/hooks/step.py +91 -0
- flowtask/hooks/types/__init__.py +23 -0
- flowtask/hooks/types/base.py +129 -0
- flowtask/hooks/types/brokers/__init__.py +11 -0
- flowtask/hooks/types/brokers/base.py +54 -0
- flowtask/hooks/types/brokers/mqtt.py +35 -0
- flowtask/hooks/types/brokers/rabbitmq.py +82 -0
- flowtask/hooks/types/brokers/redis.py +83 -0
- flowtask/hooks/types/brokers/sqs.py +44 -0
- flowtask/hooks/types/fs.py +232 -0
- flowtask/hooks/types/http.py +49 -0
- flowtask/hooks/types/imap.py +200 -0
- flowtask/hooks/types/jira.py +279 -0
- flowtask/hooks/types/mail.py +205 -0
- flowtask/hooks/types/postgres.py +98 -0
- flowtask/hooks/types/responses/__init__.py +8 -0
- flowtask/hooks/types/responses/base.py +5 -0
- flowtask/hooks/types/sharepoint.py +288 -0
- flowtask/hooks/types/ssh.py +141 -0
- flowtask/hooks/types/tagged.py +59 -0
- flowtask/hooks/types/upload.py +85 -0
- flowtask/hooks/types/watch.py +71 -0
- flowtask/hooks/types/web.py +36 -0
- flowtask/interfaces/AzureClient.py +137 -0
- flowtask/interfaces/AzureGraph.py +839 -0
- flowtask/interfaces/Boto3Client.py +326 -0
- flowtask/interfaces/DropboxClient.py +173 -0
- flowtask/interfaces/ExcelHandler.py +94 -0
- flowtask/interfaces/FTPClient.py +131 -0
- flowtask/interfaces/GoogleCalendar.py +201 -0
- flowtask/interfaces/GoogleClient.py +133 -0
- flowtask/interfaces/GoogleDrive.py +127 -0
- flowtask/interfaces/GoogleGCS.py +89 -0
- flowtask/interfaces/GoogleGeocoding.py +93 -0
- flowtask/interfaces/GoogleLang.py +114 -0
- flowtask/interfaces/GooglePub.py +61 -0
- flowtask/interfaces/GoogleSheet.py +68 -0
- flowtask/interfaces/IMAPClient.py +137 -0
- flowtask/interfaces/O365Calendar.py +113 -0
- flowtask/interfaces/O365Client.py +220 -0
- flowtask/interfaces/OneDrive.py +284 -0
- flowtask/interfaces/Outlook.py +155 -0
- flowtask/interfaces/ParrotBot.py +130 -0
- flowtask/interfaces/SSHClient.py +378 -0
- flowtask/interfaces/Sharepoint.py +496 -0
- flowtask/interfaces/__init__.py +36 -0
- flowtask/interfaces/azureauth.py +119 -0
- flowtask/interfaces/cache.py +201 -0
- flowtask/interfaces/client.py +82 -0
- flowtask/interfaces/compress.py +525 -0
- flowtask/interfaces/credentials.py +124 -0
- flowtask/interfaces/d2l.py +239 -0
- flowtask/interfaces/databases/__init__.py +5 -0
- flowtask/interfaces/databases/db.py +223 -0
- flowtask/interfaces/databases/documentdb.py +55 -0
- flowtask/interfaces/databases/rethink.py +39 -0
- flowtask/interfaces/dataframes/__init__.py +11 -0
- flowtask/interfaces/dataframes/abstract.py +21 -0
- flowtask/interfaces/dataframes/arrow.py +71 -0
- flowtask/interfaces/dataframes/dt.py +69 -0
- flowtask/interfaces/dataframes/pandas.py +167 -0
- flowtask/interfaces/dataframes/polars.py +60 -0
- flowtask/interfaces/db.py +263 -0
- flowtask/interfaces/env.py +46 -0
- flowtask/interfaces/func.py +137 -0
- flowtask/interfaces/http.py +1780 -0
- flowtask/interfaces/locale.py +40 -0
- flowtask/interfaces/log.py +75 -0
- flowtask/interfaces/mask.py +143 -0
- flowtask/interfaces/notification.py +154 -0
- flowtask/interfaces/playwright.py +339 -0
- flowtask/interfaces/powerpoint.py +368 -0
- flowtask/interfaces/py.typed +0 -0
- flowtask/interfaces/qs.py +376 -0
- flowtask/interfaces/result.py +87 -0
- flowtask/interfaces/selenium_service.py +779 -0
- flowtask/interfaces/smartsheet.py +154 -0
- flowtask/interfaces/stat.py +39 -0
- flowtask/interfaces/task.py +96 -0
- flowtask/interfaces/template.py +118 -0
- flowtask/interfaces/vectorstores/__init__.py +1 -0
- flowtask/interfaces/vectorstores/abstract.py +133 -0
- flowtask/interfaces/vectorstores/milvus.py +669 -0
- flowtask/interfaces/zammad.py +107 -0
- flowtask/models.py +193 -0
- flowtask/parsers/__init__.py +15 -0
- flowtask/parsers/_yaml.c +11978 -0
- flowtask/parsers/_yaml.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/argparser.py +235 -0
- flowtask/parsers/base.c +15155 -0
- flowtask/parsers/base.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/json.c +11968 -0
- flowtask/parsers/json.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/maps.py +49 -0
- flowtask/parsers/toml.c +11968 -0
- flowtask/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/plugins/__init__.py +16 -0
- flowtask/plugins/components/__init__.py +0 -0
- flowtask/plugins/handler/__init__.py +45 -0
- flowtask/plugins/importer.py +31 -0
- flowtask/plugins/sources/__init__.py +0 -0
- flowtask/runner.py +283 -0
- flowtask/scheduler/__init__.py +9 -0
- flowtask/scheduler/functions.py +493 -0
- flowtask/scheduler/handlers/__init__.py +8 -0
- flowtask/scheduler/handlers/manager.py +504 -0
- flowtask/scheduler/handlers/models.py +58 -0
- flowtask/scheduler/handlers/service.py +72 -0
- flowtask/scheduler/notifications.py +65 -0
- flowtask/scheduler/scheduler.py +993 -0
- flowtask/services/__init__.py +0 -0
- flowtask/services/bots/__init__.py +0 -0
- flowtask/services/bots/telegram.py +264 -0
- flowtask/services/files/__init__.py +11 -0
- flowtask/services/files/manager.py +522 -0
- flowtask/services/files/model.py +37 -0
- flowtask/services/files/service.py +767 -0
- flowtask/services/jira/__init__.py +3 -0
- flowtask/services/jira/jira_actions.py +191 -0
- flowtask/services/tasks/__init__.py +13 -0
- flowtask/services/tasks/launcher.py +213 -0
- flowtask/services/tasks/manager.py +323 -0
- flowtask/services/tasks/service.py +275 -0
- flowtask/services/tasks/task_manager.py +376 -0
- flowtask/services/tasks/tasks.py +155 -0
- flowtask/storages/__init__.py +16 -0
- flowtask/storages/exceptions.py +12 -0
- flowtask/storages/files/__init__.py +8 -0
- flowtask/storages/files/abstract.py +29 -0
- flowtask/storages/files/filesystem.py +66 -0
- flowtask/storages/tasks/__init__.py +19 -0
- flowtask/storages/tasks/abstract.py +26 -0
- flowtask/storages/tasks/database.py +33 -0
- flowtask/storages/tasks/filesystem.py +108 -0
- flowtask/storages/tasks/github.py +119 -0
- flowtask/storages/tasks/memory.py +45 -0
- flowtask/storages/tasks/row.py +25 -0
- flowtask/tasks/__init__.py +0 -0
- flowtask/tasks/abstract.py +526 -0
- flowtask/tasks/command.py +118 -0
- flowtask/tasks/pile.py +486 -0
- flowtask/tasks/py.typed +0 -0
- flowtask/tasks/task.py +778 -0
- flowtask/template/__init__.py +161 -0
- flowtask/tests.py +257 -0
- flowtask/types/__init__.py +8 -0
- flowtask/types/typedefs.c +11347 -0
- flowtask/types/typedefs.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/utils/__init__.py +24 -0
- flowtask/utils/constants.py +117 -0
- flowtask/utils/encoders.py +21 -0
- flowtask/utils/executor.py +112 -0
- flowtask/utils/functions.cpp +14280 -0
- flowtask/utils/functions.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/utils/json.cpp +13349 -0
- flowtask/utils/json.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/utils/mail.py +63 -0
- flowtask/utils/parseqs.c +13324 -0
- flowtask/utils/parserqs.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/utils/stats.py +308 -0
- flowtask/utils/transformations.py +74 -0
- flowtask/utils/uv.py +12 -0
- flowtask/utils/validators.py +97 -0
- flowtask/version.py +11 -0
- flowtask-5.8.4.dist-info/LICENSE +201 -0
- flowtask-5.8.4.dist-info/METADATA +209 -0
- flowtask-5.8.4.dist-info/RECORD +470 -0
- flowtask-5.8.4.dist-info/WHEEL +6 -0
- flowtask-5.8.4.dist-info/entry_points.txt +3 -0
- flowtask-5.8.4.dist-info/top_level.txt +2 -0
- plugins/components/CreateQR.py +39 -0
- plugins/components/TestComponent.py +28 -0
- plugins/components/Use1.py +13 -0
- plugins/components/Workplace.py +117 -0
- plugins/components/__init__.py +3 -0
- plugins/sources/__init__.py +0 -0
- plugins/sources/get_populartimes.py +78 -0
- plugins/sources/google.py +150 -0
- plugins/sources/hubspot.py +679 -0
- plugins/sources/icims.py +679 -0
- plugins/sources/mobileinsight.py +501 -0
- plugins/sources/newrelic.py +262 -0
- plugins/sources/uap.py +268 -0
- plugins/sources/venu.py +244 -0
- plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,411 @@
|
|
1
|
+
from typing import Callable, Dict, Any, Optional, List
|
2
|
+
import re
|
3
|
+
import asyncio
|
4
|
+
import asyncpg
|
5
|
+
from pgvector.asyncpg import register_vector
|
6
|
+
from navigator.libs.json import JSONContent
|
7
|
+
from .flow import FlowComponent
|
8
|
+
from ..exceptions import ConfigError, ComponentError
|
9
|
+
from ..conf import default_dsn
|
10
|
+
|
11
|
+
IDENT_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
|
12
|
+
|
13
|
+
def qid(name: str) -> str:
|
14
|
+
"""
|
15
|
+
Very small helper to quote SQL identifiers safely.
|
16
|
+
Raises if name contains anything but letters, digits or '_'.
|
17
|
+
"""
|
18
|
+
if not IDENT_RE.match(name):
|
19
|
+
raise ValueError(
|
20
|
+
f"illegal identifier: {name!r}"
|
21
|
+
)
|
22
|
+
return '"' + name + '"'
|
23
|
+
|
24
|
+
|
25
|
+
class DuplicatePhoto(FlowComponent):
|
26
|
+
"""DuplicatePhoto.
|
27
|
+
|
28
|
+
Check if Photo is Duplicated and add a column with the result.
|
29
|
+
This component is used to check if a photo is duplicated in the dataset.
|
30
|
+
It uses the image hash to check if the photo is duplicated.
|
31
|
+
The image hash is a unique identifier for the image.
|
32
|
+
The image hash is calculated using the image hash algorithm.
|
33
|
+
The image hash algorithm is a fast and efficient way to calculate the hash of an image.
|
34
|
+
saves a detailed information about matches based on perceptual hash and vector similarity.
|
35
|
+
"""
|
36
|
+
def __init__(
|
37
|
+
self,
|
38
|
+
loop: asyncio.AbstractEventLoop = None,
|
39
|
+
job: Callable = None,
|
40
|
+
stat: Callable = None,
|
41
|
+
**kwargs,
|
42
|
+
) -> None:
|
43
|
+
self.chunk_size: int = kwargs.get('chunk_size', 100)
|
44
|
+
self.id_column: str = kwargs.get("id_column", "photo_id")
|
45
|
+
self.hash_column: str = kwargs.get("hash_column", "image_hash")
|
46
|
+
self.vector_column: str = kwargs.get("vector_column", "image_vector")
|
47
|
+
# 4-6 (exact duplicates)
|
48
|
+
self.hamming_threshold: int = kwargs.get("hamming_threshold", 4)
|
49
|
+
# exact match: 0.05-0.10 (95-99% similarity for duplicates)
|
50
|
+
self.vector_threshold: float = kwargs.get("vector_threshold", 0.05)
|
51
|
+
# Similarity detection
|
52
|
+
# More lenient threshold 8-12 (similar images)
|
53
|
+
self.similar_hamming_threshold: int = kwargs.get("similar_hamming_threshold", 8)
|
54
|
+
# ~95% similarity 0.15-0.25 (75-85% similarity for similar images)
|
55
|
+
self.similar_vector_threshold: float = kwargs.get("similar_vector_threshold", 0.15)
|
56
|
+
self.tablename: str = kwargs.get("tablename", "image_bank")
|
57
|
+
self.schema: str = kwargs.get("schema", "public")
|
58
|
+
self.duplicate_column: str = kwargs.get("duplicate_column", "is_duplicated")
|
59
|
+
self.similar_column: str = kwargs.get("similar_column", "is_similar")
|
60
|
+
self.pool: asyncpg.Pool | None = None
|
61
|
+
super(DuplicatePhoto, self).__init__(loop=loop, job=job, stat=stat, **kwargs)
|
62
|
+
self._semaphore = asyncio.Semaphore(10) # Adjust the limit as needed
|
63
|
+
|
64
|
+
def _qualified_tablename(self) -> str:
|
65
|
+
"""
|
66
|
+
Get the qualified table name.
|
67
|
+
"""
|
68
|
+
if not self.schema:
|
69
|
+
raise ConfigError("Schema is not set.")
|
70
|
+
if not self.tablename:
|
71
|
+
raise ConfigError("Table name is not set.")
|
72
|
+
return f"{qid(self.schema)}.{qid(self.tablename)}"
|
73
|
+
|
74
|
+
def _build_phash_sql(self) -> str:
|
75
|
+
return (
|
76
|
+
f"SELECT {qid(self.id_column)}, "
|
77
|
+
f"bit_count(('x' || $1)::bit(256) # ('x' || {qid(self.hash_column)})::bit(256)) as distance "
|
78
|
+
f"FROM {self._qualified_tablename()} "
|
79
|
+
f"WHERE {qid(self.id_column)} IS DISTINCT FROM $3::bigint "
|
80
|
+
f"AND bit_count(('x' || $1)::bit(256) # ('x' || {qid(self.hash_column)})::bit(256)) <= $2::integer "
|
81
|
+
f"ORDER BY distance ASC "
|
82
|
+
f"LIMIT 1;"
|
83
|
+
)
|
84
|
+
|
85
|
+
def _build_vector_sql(self) -> str:
|
86
|
+
return (
|
87
|
+
f"SELECT {qid(self.id_column)}, "
|
88
|
+
f"{qid(self.vector_column)} <-> $1::vector as distance, "
|
89
|
+
f"1 - ({qid(self.vector_column)} <=> $1::vector) as similarity "
|
90
|
+
f"FROM {self._qualified_tablename()} "
|
91
|
+
f"WHERE {qid(self.id_column)} IS DISTINCT FROM $3::bigint "
|
92
|
+
f"AND {qid(self.vector_column)} <-> $1::vector < $2::float8 "
|
93
|
+
f"ORDER BY distance ASC "
|
94
|
+
f"LIMIT 1;"
|
95
|
+
)
|
96
|
+
|
97
|
+
def _build_similar_phash_sql(self) -> str:
|
98
|
+
return (
|
99
|
+
f"SELECT {qid(self.id_column)}, "
|
100
|
+
f"bit_count(('x' || $1)::bit(256) # ('x' || {qid(self.hash_column)})::bit(256)) as distance "
|
101
|
+
f"FROM {self._qualified_tablename()} "
|
102
|
+
f"WHERE {qid(self.id_column)} IS DISTINCT FROM $3::bigint "
|
103
|
+
f"AND bit_count(('x' || $1)::bit(256) # ('x' || {qid(self.hash_column)})::bit(256)) > $2::integer "
|
104
|
+
f"AND bit_count(('x' || $1)::bit(256) # ('x' || {qid(self.hash_column)})::bit(256)) <= $4::integer "
|
105
|
+
f"ORDER BY distance ASC "
|
106
|
+
f"LIMIT 1;"
|
107
|
+
)
|
108
|
+
|
109
|
+
def _build_similar_vector_sql(self) -> str:
|
110
|
+
return (
|
111
|
+
f"SELECT {qid(self.id_column)}, "
|
112
|
+
f"{qid(self.vector_column)} <-> $1::vector as distance, "
|
113
|
+
f"1 - ({qid(self.vector_column)} <=> $1::vector) as similarity "
|
114
|
+
f"FROM {self._qualified_tablename()} "
|
115
|
+
f"WHERE {qid(self.id_column)} IS DISTINCT FROM $3::bigint "
|
116
|
+
f"AND {qid(self.vector_column)} <-> $1::vector >= $2::float8 "
|
117
|
+
f"AND {qid(self.vector_column)} <-> $1::vector < $4::float8 "
|
118
|
+
f"ORDER BY distance ASC "
|
119
|
+
f"LIMIT 1;"
|
120
|
+
)
|
121
|
+
|
122
|
+
async def pgvector_init(self, conn):
|
123
|
+
"""
|
124
|
+
Initialize pgvector extension in PostgreSQL.
|
125
|
+
"""
|
126
|
+
# Setup jsonb encoder/decoder
|
127
|
+
def _encoder(value):
|
128
|
+
# return json.dumps(value, cls=BaseEncoder)
|
129
|
+
return self._encoder.dumps(value) # pylint: disable=E1120
|
130
|
+
|
131
|
+
def _decoder(value):
|
132
|
+
return self._encoder.loads(value) # pylint: disable=E1120
|
133
|
+
|
134
|
+
await conn.set_type_codec(
|
135
|
+
"json",
|
136
|
+
encoder=_encoder,
|
137
|
+
decoder=_decoder,
|
138
|
+
schema="pg_catalog"
|
139
|
+
)
|
140
|
+
await conn.set_type_codec(
|
141
|
+
"jsonb",
|
142
|
+
encoder=_encoder,
|
143
|
+
decoder=_decoder,
|
144
|
+
schema="pg_catalog"
|
145
|
+
)
|
146
|
+
|
147
|
+
await register_vector(conn)
|
148
|
+
|
149
|
+
# ──────────────────────────────────────────────────────────────
|
150
|
+
# Setup / teardown
|
151
|
+
# ──────────────────────────────────────────────────────────────
|
152
|
+
async def start(self, **kwargs):
|
153
|
+
if self.previous:
|
154
|
+
self.data = self.input
|
155
|
+
|
156
|
+
# column checks
|
157
|
+
for col in (self.id_column, self.hash_column, self.vector_column,):
|
158
|
+
if col not in self.data.columns:
|
159
|
+
raise ConfigError(
|
160
|
+
f"Column '{col}' missing from DataFrame"
|
161
|
+
)
|
162
|
+
self.pool = await asyncpg.create_pool(
|
163
|
+
dsn=default_dsn,
|
164
|
+
min_size=1,
|
165
|
+
max_size=4,
|
166
|
+
max_queries=100,
|
167
|
+
init=self.pgvector_init,
|
168
|
+
timeout=10,
|
169
|
+
)
|
170
|
+
# Check if the table exists
|
171
|
+
if not self.pool:
|
172
|
+
raise ConfigError(
|
173
|
+
"Database connection pool is not initialized."
|
174
|
+
)
|
175
|
+
async with self.pool.acquire() as conn:
|
176
|
+
try:
|
177
|
+
await conn.execute(
|
178
|
+
f"SELECT 1 FROM {self.schema}.{self.tablename} LIMIT 1"
|
179
|
+
)
|
180
|
+
except asyncpg.exceptions.UndefinedTableError:
|
181
|
+
raise ConfigError(
|
182
|
+
f"Table {self.schema}.{self.tablename} does not exist."
|
183
|
+
)
|
184
|
+
except asyncpg.exceptions.UndefinedSchemaError:
|
185
|
+
raise ConfigError(
|
186
|
+
f"Schema {self.schema} does not exist."
|
187
|
+
)
|
188
|
+
if self.duplicate_column not in self.data.columns:
|
189
|
+
self.data[self.duplicate_column] = {
|
190
|
+
"phash": None,
|
191
|
+
"vector": None,
|
192
|
+
"duplicate": False
|
193
|
+
}
|
194
|
+
if self.similar_column not in self.data.columns:
|
195
|
+
self.data[self.similar_column] = {
|
196
|
+
"phash": None,
|
197
|
+
"vector": None,
|
198
|
+
"similar": False,
|
199
|
+
"similarity_percentage": None
|
200
|
+
}
|
201
|
+
# prepare SQL strings
|
202
|
+
self._sql_phash = self._build_phash_sql()
|
203
|
+
self._sql_vector = self._build_vector_sql()
|
204
|
+
self._sql_similar_phash = self._build_similar_phash_sql()
|
205
|
+
self._sql_similar_vector = self._build_similar_vector_sql()
|
206
|
+
|
207
|
+
async def close(self):
|
208
|
+
if self.pool:
|
209
|
+
await self.pool.close()
|
210
|
+
|
211
|
+
# --------------- duplicate test --------------------
|
212
|
+
async def _check_duplicates(
|
213
|
+
self,
|
214
|
+
conn,
|
215
|
+
phash: str,
|
216
|
+
vec: list[float],
|
217
|
+
current_id: int
|
218
|
+
) -> tuple[Dict[str, Any], Dict[str, Any]]:
|
219
|
+
"""
|
220
|
+
Check if the given hash and vector are duplicated in the database.
|
221
|
+
Return detailed information about matches.
|
222
|
+
|
223
|
+
:param conn: Database connection.
|
224
|
+
:param phash: Perceptual hash of the image.
|
225
|
+
:param vec: Vector representation of the image.
|
226
|
+
:param current_id: Current photo ID.
|
227
|
+
:return: Tuple of dictionaries with duplicate and similarity information
|
228
|
+
"""
|
229
|
+
duplicate_result = {
|
230
|
+
"phash": None,
|
231
|
+
"vector": None,
|
232
|
+
"duplicate": False
|
233
|
+
}
|
234
|
+
similar_result = {
|
235
|
+
"phash": None,
|
236
|
+
"vector": None,
|
237
|
+
"similar": False,
|
238
|
+
"similarity_percentage": None
|
239
|
+
}
|
240
|
+
|
241
|
+
# Check perceptual hash match for both duplicates and similar images
|
242
|
+
if phash:
|
243
|
+
phash_match = await conn.fetchrow(
|
244
|
+
self._sql_phash,
|
245
|
+
phash,
|
246
|
+
self.hamming_threshold, # Strict threshold for duplicates
|
247
|
+
current_id
|
248
|
+
)
|
249
|
+
|
250
|
+
if phash_match:
|
251
|
+
distance = int(phash_match["distance"])
|
252
|
+
duplicate_result["phash"] = {
|
253
|
+
"duplicate": True,
|
254
|
+
self.id_column: phash_match[self.id_column],
|
255
|
+
"threshold": distance
|
256
|
+
}
|
257
|
+
|
258
|
+
# Check vector match for both duplicates and similar images
|
259
|
+
vector_match = await conn.fetch(
|
260
|
+
self._sql_vector,
|
261
|
+
vec,
|
262
|
+
self.vector_threshold,
|
263
|
+
current_id
|
264
|
+
)
|
265
|
+
|
266
|
+
if vector_match:
|
267
|
+
distance = float(vector_match["distance"])
|
268
|
+
similarity = float(vector_match.get("similarity", 1 - distance))
|
269
|
+
similarity_pct = similarity * 100
|
270
|
+
|
271
|
+
duplicate_result["vector"] = {
|
272
|
+
"duplicate": True,
|
273
|
+
"photo_id": vector_match[self.id_column],
|
274
|
+
"threshold": distance,
|
275
|
+
"similarity": similarity,
|
276
|
+
"similarity_percentage": similarity_pct
|
277
|
+
}
|
278
|
+
|
279
|
+
# Determine overall duplicate status
|
280
|
+
phash_duplicate = duplicate_result["phash"] is not None and duplicate_result["phash"].get("duplicate", False)
|
281
|
+
vector_duplicate = duplicate_result["vector"] is not None and duplicate_result["vector"].get("duplicate", False)
|
282
|
+
|
283
|
+
if phash_duplicate or vector_duplicate:
|
284
|
+
duplicate_result["duplicate"] = True
|
285
|
+
# If it's a duplicate, don't check for similarity
|
286
|
+
return duplicate_result, similar_result
|
287
|
+
|
288
|
+
similar_phash_match = await conn.fetchrow(
|
289
|
+
self._sql_similar_phash,
|
290
|
+
phash,
|
291
|
+
self.hamming_threshold, # Duplicate threshold (lower bound)
|
292
|
+
current_id,
|
293
|
+
self.similar_hamming_threshold # Similar threshold (upper bound)
|
294
|
+
)
|
295
|
+
if similar_phash_match:
|
296
|
+
distance = int(similar_phash_match["distance"])
|
297
|
+
# Calculate perceptual hash similarity percentage
|
298
|
+
hash_similarity_pct = 100 - (distance / 256 * 100)
|
299
|
+
similar_result["phash"] = {
|
300
|
+
"similar": True,
|
301
|
+
self.id_column: similar_phash_match[self.id_column],
|
302
|
+
"threshold": distance,
|
303
|
+
"similarity_percentage": hash_similarity_pct
|
304
|
+
}
|
305
|
+
|
306
|
+
similar_vector_match = await conn.fetchrow(
|
307
|
+
self._sql_similar_vector,
|
308
|
+
vec,
|
309
|
+
self.vector_threshold, # Duplicate threshold (lower bound)
|
310
|
+
current_id,
|
311
|
+
self.similar_vector_threshold # Similar threshold (upper bound)
|
312
|
+
)
|
313
|
+
|
314
|
+
if similar_vector_match:
|
315
|
+
distance = float(similar_vector_match["distance"])
|
316
|
+
similarity = float(similar_vector_match.get("similarity", 1 - distance))
|
317
|
+
similarity_pct = similarity * 100
|
318
|
+
|
319
|
+
similar_result["vector"] = {
|
320
|
+
"similar": True,
|
321
|
+
"photo_id": similar_vector_match[self.id_column],
|
322
|
+
"threshold": distance,
|
323
|
+
"similarity": similarity,
|
324
|
+
"similarity_percentage": similarity_pct
|
325
|
+
}
|
326
|
+
|
327
|
+
# Determine overall similarity status
|
328
|
+
phash_similar = similar_result["phash"] is not None and similar_result["phash"].get("similar", False)
|
329
|
+
vector_similar = similar_result["vector"] is not None and similar_result["vector"].get("similar", False)
|
330
|
+
|
331
|
+
if phash_similar or vector_similar:
|
332
|
+
similar_result["similar"] = True
|
333
|
+
|
334
|
+
# Get the best similarity percentage
|
335
|
+
if vector_similar and similar_result["vector"].get("similarity_percentage") is not None:
|
336
|
+
similar_result["similarity_percentage"] = similar_result["vector"]["similarity_percentage"]
|
337
|
+
elif phash_similar and similar_result["phash"].get("similarity_percentage") is not None:
|
338
|
+
similar_result["similarity_percentage"] = similar_result["phash"]["similarity_percentage"]
|
339
|
+
|
340
|
+
return duplicate_result, similar_result
|
341
|
+
|
342
|
+
async def _process_row(self, conn, row) -> Dict[str, Any]:
|
343
|
+
"""
|
344
|
+
Process a row and check for duplicates with detailed information.
|
345
|
+
|
346
|
+
:param conn: Database connection.
|
347
|
+
:param row: Row data to process.
|
348
|
+
:return: Dictionary with detailed match information.
|
349
|
+
"""
|
350
|
+
phash = row[self.hash_column]
|
351
|
+
vec = row[self.vector_column]
|
352
|
+
current_id = row[self.id_column]
|
353
|
+
|
354
|
+
# Log current processing information for debugging
|
355
|
+
self._logger.debug(
|
356
|
+
f"Processing photo_id: {current_id} with threshold: {self.vector_threshold}"
|
357
|
+
)
|
358
|
+
|
359
|
+
duplicate_info, similar_info = await self._check_duplicates(conn, phash, vec, current_id)
|
360
|
+
|
361
|
+
# Debug information about match results
|
362
|
+
if duplicate_info["vector"]:
|
363
|
+
self._logger.debug(f"Vector match found: {duplicate_info['vector']}")
|
364
|
+
if duplicate_info["phash"]:
|
365
|
+
self._logger.debug(f"Perceptual hash match found: {duplicate_info['phash']}")
|
366
|
+
|
367
|
+
# Update the row with duplicate and similarity information
|
368
|
+
row[self.duplicate_column] = duplicate_info
|
369
|
+
row[self.similar_column] = similar_info
|
370
|
+
if duplicate_info.get('duplicate', False) is True:
|
371
|
+
row['is_duplicated'] = True
|
372
|
+
if similar_info.get('similar', False) is True and not duplicate_info.get('duplicate', False):
|
373
|
+
row['is_similar'] = True
|
374
|
+
# If we have similarity percentage, add it directly to the row
|
375
|
+
if similar_info.get('similarity_percentage') is not None:
|
376
|
+
row['similarity_percentage'] = similar_info['similarity_percentage']
|
377
|
+
return row
|
378
|
+
|
379
|
+
async def run(self):
|
380
|
+
"""
|
381
|
+
Run the duplicate detection with enhanced information.
|
382
|
+
"""
|
383
|
+
if self.pool is None:
|
384
|
+
raise ConfigError("Database connection pool is not initialized.")
|
385
|
+
|
386
|
+
# Process rows and check for duplicates
|
387
|
+
async def handle(idx):
|
388
|
+
async with self._semaphore, self.pool.acquire() as conn:
|
389
|
+
row = self.data.loc[idx].to_dict()
|
390
|
+
updated_row = await self._process_row(conn, row)
|
391
|
+
# Write duplicate info back into DataFrame
|
392
|
+
return idx, updated_row[self.duplicate_column], updated_row[self.similar_column]
|
393
|
+
|
394
|
+
results = await asyncio.gather(*(handle(i) for i in self.data.index))
|
395
|
+
# Apply results to DataFrame all at once
|
396
|
+
for idx, dup_result, sim_result in results:
|
397
|
+
self.data.at[idx, self.duplicate_column] = dup_result
|
398
|
+
self.data.at[idx, self.similar_column] = sim_result
|
399
|
+
|
400
|
+
# Set flat boolean fields for easier filtering
|
401
|
+
self.data.at[idx, 'is_duplicated'] = dup_result.get('duplicate', False)
|
402
|
+
|
403
|
+
is_similar = sim_result.get('similar', False) and not dup_result.get('duplicate', False)
|
404
|
+
self.data.at[idx, 'is_similar'] = is_similar
|
405
|
+
|
406
|
+
# Set similarity percentage if available
|
407
|
+
if is_similar and sim_result.get('similarity_percentage') is not None:
|
408
|
+
self.data.at[idx, 'similarity_percentage'] = sim_result['similarity_percentage']
|
409
|
+
|
410
|
+
self._result = self.data
|
411
|
+
return self._result
|
@@ -0,0 +1,237 @@
|
|
1
|
+
from collections.abc import Callable
|
2
|
+
import asyncio
|
3
|
+
from typing import Any
|
4
|
+
import pandas as pd
|
5
|
+
# Bot Infraestructure:
|
6
|
+
from parrot.bots.basic import BasicBot
|
7
|
+
from .flow import FlowComponent
|
8
|
+
from ..exceptions import ComponentError, ConfigError
|
9
|
+
|
10
|
+
class EmployeeEvaluation(FlowComponent):
|
11
|
+
"""
|
12
|
+
EmployeeEvaluation
|
13
|
+
|
14
|
+
Overview
|
15
|
+
|
16
|
+
The EmployeeEvaluation class is a component for interacting with an IA Agent evaluating Users chats.
|
17
|
+
.. table:: Properties
|
18
|
+
:widths: auto
|
19
|
+
|
20
|
+
+------------------+----------+--------------------------------------------------------------------------------------------------+
|
21
|
+
| Name | Required | Description |
|
22
|
+
+------------------+----------+--------------------------------------------------------------------------------------------------+
|
23
|
+
| output_column | Yes | Column for saving the Customer Satisfaction information. |
|
24
|
+
+------------------+----------+--------------------------------------------------------------------------------------------------+
|
25
|
+
Return
|
26
|
+
|
27
|
+
A Pandas Dataframe with the EmployeeEvaluation statistics.
|
28
|
+
|
29
|
+
""" # noqa
|
30
|
+
|
31
|
+
def __init__(
|
32
|
+
self,
|
33
|
+
loop: asyncio.AbstractEventLoop = None,
|
34
|
+
job: Callable = None,
|
35
|
+
stat: Callable = None,
|
36
|
+
**kwargs,
|
37
|
+
):
|
38
|
+
super().__init__(
|
39
|
+
loop=loop, job=job, stat=stat, **kwargs
|
40
|
+
)
|
41
|
+
|
42
|
+
self._bot_name = kwargs.get('bot_name', 'EmployeeBot')
|
43
|
+
# TaskStorage
|
44
|
+
# Find in the taskstorage, the "prompts" directory.
|
45
|
+
prompt_path = self._taskstore.path.joinpath(self._program, 'prompts')
|
46
|
+
if not prompt_path.exists():
|
47
|
+
raise ConfigError(
|
48
|
+
f"{self.system_prompt} Prompts Path Not Found: {prompt_path}"
|
49
|
+
)
|
50
|
+
self.prompt_path = prompt_path
|
51
|
+
# System Prompt:
|
52
|
+
# is hardcoded to this particular Bot.
|
53
|
+
self.system_prompt_file = 'employee.txt'
|
54
|
+
# Bot Object:
|
55
|
+
self._bot: Any = None
|
56
|
+
|
57
|
+
async def start(self, **kwargs):
|
58
|
+
"""
|
59
|
+
start
|
60
|
+
|
61
|
+
Overview
|
62
|
+
|
63
|
+
The start method is a method for starting the EmployeeEvaluation component.
|
64
|
+
|
65
|
+
Return
|
66
|
+
|
67
|
+
True if the EmployeeEvaluation component started successfully.
|
68
|
+
|
69
|
+
"""
|
70
|
+
if self.previous:
|
71
|
+
self.data = self.input
|
72
|
+
else:
|
73
|
+
raise ComponentError(
|
74
|
+
"EmployeeBot: Data Was Not Found"
|
75
|
+
)
|
76
|
+
if not self.output_column:
|
77
|
+
raise ConfigError(
|
78
|
+
"Employee Evaluation: output_column is required"
|
79
|
+
)
|
80
|
+
# check if Prompt File exists
|
81
|
+
prompt_file = self.prompt_path.joinpath(self.system_prompt_file)
|
82
|
+
if not prompt_file.exists():
|
83
|
+
raise ConfigError(
|
84
|
+
f"{self.system_prompt} Prompt File Not Found: {prompt_file}"
|
85
|
+
)
|
86
|
+
self.system_prompt_file = prompt_file.name
|
87
|
+
# read the prompt file as text:
|
88
|
+
with open(prompt_file, 'r') as f:
|
89
|
+
self.system_prompt = f.read()
|
90
|
+
# Set the Bot:
|
91
|
+
try:
|
92
|
+
self._bot = BasicBot(
|
93
|
+
name=self._bot_name,
|
94
|
+
system_prompt=self.system_prompt,
|
95
|
+
goal="Employee Evaluation using chat messages",
|
96
|
+
use_llm=self.llm.get('name', 'name'),
|
97
|
+
model_name=self.llm.get('model_name', 'gemini-1.5-pro'),
|
98
|
+
)
|
99
|
+
# configure the bot:
|
100
|
+
await self._bot.configure()
|
101
|
+
except Exception as err:
|
102
|
+
raise ComponentError(
|
103
|
+
f"{self.system_prompt} Error Configuring Bot: {err}"
|
104
|
+
) from err
|
105
|
+
return True
|
106
|
+
|
107
|
+
def generate_prompt(self, df, sender_name, messages):
|
108
|
+
# Filter the dataframe for the given `sender_name`
|
109
|
+
employee_df = df[df['sender_name'] == sender_name].copy()
|
110
|
+
|
111
|
+
# Extract the relevant statistics
|
112
|
+
positive_count = employee_df['Positive_Count'].values[0]
|
113
|
+
negative_count = employee_df['Negative_Count'].values[0]
|
114
|
+
avg_message_length = employee_df['Avg_Message_Length'].values[0]
|
115
|
+
percentage_of_messages = employee_df['Percentage_of_Messages'].values[0]
|
116
|
+
rank_by_message_count = employee_df['Rank_by_Message_Count'].values[0]
|
117
|
+
message_count = employee_df['Message_Count'].values[0]
|
118
|
+
|
119
|
+
# # Extract the chat messages
|
120
|
+
# chat_messages = ""
|
121
|
+
# for chat in messages:
|
122
|
+
# chat_messages += f"* {chat}\n"
|
123
|
+
|
124
|
+
# Extract the chat messages
|
125
|
+
chat_messages = employee_df['text'].values[0]
|
126
|
+
|
127
|
+
# Define the prompt for Gemini Pro
|
128
|
+
prompt = f"""
|
129
|
+
Please analyze the chat messages and provide insights and useful information about the employee's behavior,
|
130
|
+
including their mood, feelings, and most relevant chat messages.
|
131
|
+
|
132
|
+
Use the following statistics to support your analysis:
|
133
|
+
- Positive_Count: {positive_count}
|
134
|
+
- Negative_Count: {negative_count}
|
135
|
+
- Avg_Message_Length: {avg_message_length}
|
136
|
+
- Percentage_of_Messages: {percentage_of_messages}
|
137
|
+
- Rank_by_Message_Count: {rank_by_message_count}
|
138
|
+
- Message_Count: {message_count}
|
139
|
+
|
140
|
+
Chat messages: {chat_messages}
|
141
|
+
"""
|
142
|
+
|
143
|
+
return prompt
|
144
|
+
|
145
|
+
async def run(self):
|
146
|
+
"""
|
147
|
+
run
|
148
|
+
|
149
|
+
Overview
|
150
|
+
|
151
|
+
The run method is a method for running the CustomerSatisfaction component.
|
152
|
+
|
153
|
+
Return
|
154
|
+
|
155
|
+
A Pandas Dataframe with the Customer Satisfaction statistics.
|
156
|
+
|
157
|
+
"""
|
158
|
+
# Create the summary statistics about employees conversations:
|
159
|
+
# Aggregate the data by `sender_name` and count the number of messages
|
160
|
+
message_counts = self.data.groupby('sender_name')['text'].count().reset_index(name='Message_Count')
|
161
|
+
# Calculate the total number of messages sent by each employee
|
162
|
+
total_messages = message_counts['Message_Count'].sum()
|
163
|
+
# Calculate the percentage of messages sent by each employee
|
164
|
+
message_counts['Percentage_of_Messages'] = (message_counts['Message_Count'] / total_messages) * 100
|
165
|
+
# Rank the employees based on the number of messages sent
|
166
|
+
message_counts['Rank_by_Message_Count'] = message_counts['Message_Count'].rank(ascending=False)
|
167
|
+
# combine message_counts into self.data dataframe:
|
168
|
+
self.data = self.data.merge(message_counts, on='sender_name')
|
169
|
+
# Group the data by `sender_name` and calculate the average message length
|
170
|
+
avg_message_length = self.data.groupby('sender_name')['text'].apply(
|
171
|
+
lambda x: x.str.len().mean()
|
172
|
+
).reset_index(name='Avg_Message_Length')
|
173
|
+
# combine avg_message_length into self.data dataframe:
|
174
|
+
self.data = self.data.merge(avg_message_length, on='sender_name')
|
175
|
+
|
176
|
+
# Filter for negative emotions and sentiments
|
177
|
+
negative_interactions = self.data[
|
178
|
+
self.data['predicted_emotion'].isin(['anger', 'disgust', 'fear', 'sadness']) | (self.data['predicted_sentiment'] == 'Negative') # noqa
|
179
|
+
]
|
180
|
+
|
181
|
+
# Group by `sender_name` and count the number of negative messages
|
182
|
+
negative_counts = negative_interactions.groupby('sender_name').size().reset_index(name='Negative_Count')
|
183
|
+
|
184
|
+
# Sort the results in descending order
|
185
|
+
negative_counts = negative_counts.sort_values(by='Negative_Count', ascending=False)
|
186
|
+
|
187
|
+
# Filter for positive emotions and sentiments
|
188
|
+
positive_interactions = self.data[
|
189
|
+
self.data['predicted_emotion'].isin(['joy'])
|
190
|
+
| (self.data['predicted_sentiment'] == 'Positive')
|
191
|
+
]
|
192
|
+
|
193
|
+
# Group by `sender_name` and count the number of positive messages
|
194
|
+
positive_counts = positive_interactions.groupby('sender_name').size().reset_index(name='Positive_Count')
|
195
|
+
|
196
|
+
# Sort the results in descending order
|
197
|
+
positive_counts = positive_counts.sort_values(by='Positive_Count', ascending=False)
|
198
|
+
|
199
|
+
# Merge the negative and positive counts to self.data:
|
200
|
+
self.data = self.data.merge(negative_counts, on='sender_name', how='left')
|
201
|
+
self.data = self.data.merge(positive_counts, on='sender_name', how='left')
|
202
|
+
# Fill NaN values with 0
|
203
|
+
self.data = self.data.fillna(0)
|
204
|
+
|
205
|
+
columns = [
|
206
|
+
"sender_name",
|
207
|
+
'Positive_Count',
|
208
|
+
'Negative_Count',
|
209
|
+
'Avg_Message_Length',
|
210
|
+
'Percentage_of_Messages',
|
211
|
+
'Rank_by_Message_Count',
|
212
|
+
'Message_Count'
|
213
|
+
]
|
214
|
+
# Group by all created columns + sender name, and convert to a list the "text" column.
|
215
|
+
grouped = self.data.groupby(columns)['text'].apply(list).reset_index()
|
216
|
+
employee_evaluation = {}
|
217
|
+
for _, row in grouped.iterrows():
|
218
|
+
employee = row['sender_name']
|
219
|
+
texts = row['text']
|
220
|
+
formatted_question = self.generate_prompt(grouped, employee, texts)
|
221
|
+
result = await self._bot.question(
|
222
|
+
question=formatted_question,
|
223
|
+
return_docs=False
|
224
|
+
)
|
225
|
+
employee_evaluation[employee] = {
|
226
|
+
"answer": result.answer
|
227
|
+
}
|
228
|
+
# Join "grouped" dataset with employee evaluation based on sender_name
|
229
|
+
grouped[self.output_column] = grouped['sender_name'].map(
|
230
|
+
lambda x: employee_evaluation[x]['answer']
|
231
|
+
)
|
232
|
+
# return the grouped dataframe
|
233
|
+
self._result = grouped
|
234
|
+
return self._result
|
235
|
+
|
236
|
+
async def close(self):
|
237
|
+
pass
|