flowtask 5.8.4__cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowtask/__init__.py +93 -0
- flowtask/__main__.py +38 -0
- flowtask/bots/__init__.py +6 -0
- flowtask/bots/check.py +93 -0
- flowtask/bots/codebot.py +51 -0
- flowtask/components/ASPX.py +148 -0
- flowtask/components/AddDataset.py +352 -0
- flowtask/components/Amazon.py +523 -0
- flowtask/components/AutoTask.py +314 -0
- flowtask/components/Azure.py +80 -0
- flowtask/components/AzureUsers.py +106 -0
- flowtask/components/BaseAction.py +91 -0
- flowtask/components/BaseLoop.py +198 -0
- flowtask/components/BestBuy.py +800 -0
- flowtask/components/CSVToGCS.py +120 -0
- flowtask/components/CompanyScraper/__init__.py +1 -0
- flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
- flowtask/components/CompanyScraper/parsers/base.py +102 -0
- flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
- flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
- flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
- flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
- flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
- flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
- flowtask/components/CompanyScraper/scrapper.py +1054 -0
- flowtask/components/CopyTo.py +177 -0
- flowtask/components/CopyToBigQuery.py +243 -0
- flowtask/components/CopyToMongoDB.py +291 -0
- flowtask/components/CopyToPg.py +609 -0
- flowtask/components/CopyToRethink.py +207 -0
- flowtask/components/CreateGCSBucket.py +102 -0
- flowtask/components/CreateReport/CreateReport.py +228 -0
- flowtask/components/CreateReport/__init__.py +9 -0
- flowtask/components/CreateReport/charts/__init__.py +15 -0
- flowtask/components/CreateReport/charts/bar.py +51 -0
- flowtask/components/CreateReport/charts/base.py +66 -0
- flowtask/components/CreateReport/charts/pie.py +64 -0
- flowtask/components/CreateReport/utils.py +9 -0
- flowtask/components/CustomerSatisfaction.py +196 -0
- flowtask/components/DataInput.py +200 -0
- flowtask/components/DateList.py +255 -0
- flowtask/components/DbClient.py +163 -0
- flowtask/components/DialPad.py +146 -0
- flowtask/components/DocumentDBQuery.py +200 -0
- flowtask/components/DownloadFrom.py +371 -0
- flowtask/components/DownloadFromD2L.py +113 -0
- flowtask/components/DownloadFromFTP.py +181 -0
- flowtask/components/DownloadFromIMAP.py +315 -0
- flowtask/components/DownloadFromS3.py +198 -0
- flowtask/components/DownloadFromSFTP.py +265 -0
- flowtask/components/DownloadFromSharepoint.py +110 -0
- flowtask/components/DownloadFromSmartSheet.py +114 -0
- flowtask/components/DownloadS3File.py +229 -0
- flowtask/components/Dummy.py +59 -0
- flowtask/components/DuplicatePhoto.py +411 -0
- flowtask/components/EmployeeEvaluation.py +237 -0
- flowtask/components/ExecuteSQL.py +323 -0
- flowtask/components/ExtractHTML.py +178 -0
- flowtask/components/FileBase.py +178 -0
- flowtask/components/FileCopy.py +181 -0
- flowtask/components/FileDelete.py +82 -0
- flowtask/components/FileExists.py +146 -0
- flowtask/components/FileIteratorDelete.py +112 -0
- flowtask/components/FileList.py +194 -0
- flowtask/components/FileOpen.py +75 -0
- flowtask/components/FileRead.py +120 -0
- flowtask/components/FileRename.py +106 -0
- flowtask/components/FilterIf.py +284 -0
- flowtask/components/FilterRows/FilterRows.py +200 -0
- flowtask/components/FilterRows/__init__.py +10 -0
- flowtask/components/FilterRows/functions.py +4 -0
- flowtask/components/GCSToBigQuery.py +103 -0
- flowtask/components/GoogleA4.py +150 -0
- flowtask/components/GoogleGeoCoding.py +344 -0
- flowtask/components/GooglePlaces.py +315 -0
- flowtask/components/GoogleSearch.py +539 -0
- flowtask/components/HTTPClient.py +268 -0
- flowtask/components/ICIMS.py +146 -0
- flowtask/components/IF.py +179 -0
- flowtask/components/IcimsFolderCopy.py +173 -0
- flowtask/components/ImageFeatures/__init__.py +5 -0
- flowtask/components/ImageFeatures/process.py +233 -0
- flowtask/components/IteratorBase.py +251 -0
- flowtask/components/LangchainLoader/__init__.py +5 -0
- flowtask/components/LangchainLoader/loader.py +194 -0
- flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
- flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
- flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
- flowtask/components/LangchainLoader/loaders/docx.py +91 -0
- flowtask/components/LangchainLoader/loaders/html.py +119 -0
- flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
- flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
- flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
- flowtask/components/LangchainLoader/loaders/qa.py +67 -0
- flowtask/components/LangchainLoader/loaders/txt.py +55 -0
- flowtask/components/LeadIQ.py +650 -0
- flowtask/components/Loop.py +253 -0
- flowtask/components/Lowes.py +334 -0
- flowtask/components/MS365Usage.py +156 -0
- flowtask/components/MSTeamsMessages.py +320 -0
- flowtask/components/MarketClustering.py +1051 -0
- flowtask/components/MergeFiles.py +362 -0
- flowtask/components/MilvusOutput.py +87 -0
- flowtask/components/NearByStores.py +175 -0
- flowtask/components/NetworkNinja/__init__.py +6 -0
- flowtask/components/NetworkNinja/models/__init__.py +52 -0
- flowtask/components/NetworkNinja/models/abstract.py +177 -0
- flowtask/components/NetworkNinja/models/account.py +39 -0
- flowtask/components/NetworkNinja/models/client.py +19 -0
- flowtask/components/NetworkNinja/models/district.py +14 -0
- flowtask/components/NetworkNinja/models/events.py +101 -0
- flowtask/components/NetworkNinja/models/forms.py +499 -0
- flowtask/components/NetworkNinja/models/market.py +16 -0
- flowtask/components/NetworkNinja/models/organization.py +34 -0
- flowtask/components/NetworkNinja/models/photos.py +125 -0
- flowtask/components/NetworkNinja/models/project.py +44 -0
- flowtask/components/NetworkNinja/models/region.py +28 -0
- flowtask/components/NetworkNinja/models/store.py +203 -0
- flowtask/components/NetworkNinja/models/user.py +151 -0
- flowtask/components/NetworkNinja/router.py +854 -0
- flowtask/components/Odoo.py +175 -0
- flowtask/components/OdooInjector.py +192 -0
- flowtask/components/OpenFromXML.py +126 -0
- flowtask/components/OpenWeather.py +41 -0
- flowtask/components/OpenWithBase.py +616 -0
- flowtask/components/OpenWithPandas.py +715 -0
- flowtask/components/PGPDecrypt.py +199 -0
- flowtask/components/PandasIterator.py +187 -0
- flowtask/components/PandasToFile.py +189 -0
- flowtask/components/Paradox.py +339 -0
- flowtask/components/ParamIterator.py +117 -0
- flowtask/components/ParseHTML.py +84 -0
- flowtask/components/PlacerStores.py +249 -0
- flowtask/components/Pokemon.py +507 -0
- flowtask/components/PositiveBot.py +62 -0
- flowtask/components/PowerPointSlide.py +400 -0
- flowtask/components/PrintMessage.py +127 -0
- flowtask/components/ProductCompetitors/__init__.py +5 -0
- flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
- flowtask/components/ProductCompetitors/parsers/base.py +72 -0
- flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
- flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
- flowtask/components/ProductCompetitors/scrapper.py +155 -0
- flowtask/components/ProductCompliant.py +169 -0
- flowtask/components/ProductInfo/__init__.py +1 -0
- flowtask/components/ProductInfo/parsers/__init__.py +5 -0
- flowtask/components/ProductInfo/parsers/base.py +83 -0
- flowtask/components/ProductInfo/parsers/brother.py +97 -0
- flowtask/components/ProductInfo/parsers/canon.py +167 -0
- flowtask/components/ProductInfo/parsers/epson.py +118 -0
- flowtask/components/ProductInfo/parsers/hp.py +131 -0
- flowtask/components/ProductInfo/parsers/samsung.py +97 -0
- flowtask/components/ProductInfo/scraper.py +319 -0
- flowtask/components/ProductPricing.py +118 -0
- flowtask/components/QS.py +261 -0
- flowtask/components/QSBase.py +201 -0
- flowtask/components/QueryIterator.py +273 -0
- flowtask/components/QueryToInsert.py +327 -0
- flowtask/components/QueryToPandas.py +432 -0
- flowtask/components/RESTClient.py +195 -0
- flowtask/components/RethinkDBQuery.py +189 -0
- flowtask/components/Rsync.py +74 -0
- flowtask/components/RunSSH.py +59 -0
- flowtask/components/RunShell.py +71 -0
- flowtask/components/SalesForce.py +20 -0
- flowtask/components/SaveImageBank/__init__.py +257 -0
- flowtask/components/SchedulingVisits.py +592 -0
- flowtask/components/ScrapPage.py +216 -0
- flowtask/components/ScrapSearch.py +79 -0
- flowtask/components/SendNotify.py +257 -0
- flowtask/components/SentimentAnalysis.py +694 -0
- flowtask/components/ServiceScrapper/__init__.py +5 -0
- flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
- flowtask/components/ServiceScrapper/parsers/base.py +94 -0
- flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
- flowtask/components/ServiceScrapper/scrapper.py +199 -0
- flowtask/components/SetVariables.py +156 -0
- flowtask/components/SubTask.py +182 -0
- flowtask/components/SuiteCRM.py +48 -0
- flowtask/components/Switch.py +175 -0
- flowtask/components/TableBase.py +148 -0
- flowtask/components/TableDelete.py +312 -0
- flowtask/components/TableInput.py +143 -0
- flowtask/components/TableOutput/TableOutput.py +384 -0
- flowtask/components/TableOutput/__init__.py +3 -0
- flowtask/components/TableSchema.py +534 -0
- flowtask/components/Target.py +223 -0
- flowtask/components/ThumbnailGenerator.py +156 -0
- flowtask/components/ToPandas.py +67 -0
- flowtask/components/TransformRows/TransformRows.py +507 -0
- flowtask/components/TransformRows/__init__.py +9 -0
- flowtask/components/TransformRows/functions.py +559 -0
- flowtask/components/TransposeRows.py +176 -0
- flowtask/components/UPCDatabase.py +86 -0
- flowtask/components/UnGzip.py +171 -0
- flowtask/components/Uncompress.py +172 -0
- flowtask/components/UniqueRows.py +126 -0
- flowtask/components/Unzip.py +107 -0
- flowtask/components/UpdateOperationalVars.py +147 -0
- flowtask/components/UploadTo.py +299 -0
- flowtask/components/UploadToS3.py +136 -0
- flowtask/components/UploadToSFTP.py +160 -0
- flowtask/components/UploadToSharepoint.py +205 -0
- flowtask/components/UserFunc.py +122 -0
- flowtask/components/VivaTracker.py +140 -0
- flowtask/components/WSDLClient.py +123 -0
- flowtask/components/Wait.py +18 -0
- flowtask/components/Walmart.py +199 -0
- flowtask/components/Workplace.py +134 -0
- flowtask/components/XMLToPandas.py +267 -0
- flowtask/components/Zammad/__init__.py +41 -0
- flowtask/components/Zammad/models.py +0 -0
- flowtask/components/ZoomInfoScraper.py +409 -0
- flowtask/components/__init__.py +104 -0
- flowtask/components/abstract.py +18 -0
- flowtask/components/flow.py +530 -0
- flowtask/components/google.py +335 -0
- flowtask/components/group.py +221 -0
- flowtask/components/py.typed +0 -0
- flowtask/components/reviewscrap.py +132 -0
- flowtask/components/tAutoincrement.py +117 -0
- flowtask/components/tConcat.py +109 -0
- flowtask/components/tExplode.py +119 -0
- flowtask/components/tFilter.py +184 -0
- flowtask/components/tGroup.py +236 -0
- flowtask/components/tJoin.py +270 -0
- flowtask/components/tMap/__init__.py +9 -0
- flowtask/components/tMap/functions.py +54 -0
- flowtask/components/tMap/tMap.py +450 -0
- flowtask/components/tMelt.py +112 -0
- flowtask/components/tMerge.py +114 -0
- flowtask/components/tOrder.py +93 -0
- flowtask/components/tPandas.py +94 -0
- flowtask/components/tPivot.py +71 -0
- flowtask/components/tPluckCols.py +76 -0
- flowtask/components/tUnnest.py +82 -0
- flowtask/components/user.py +401 -0
- flowtask/conf.py +457 -0
- flowtask/download.py +102 -0
- flowtask/events/__init__.py +11 -0
- flowtask/events/events/__init__.py +20 -0
- flowtask/events/events/abstract.py +95 -0
- flowtask/events/events/alerts/__init__.py +362 -0
- flowtask/events/events/alerts/colfunctions.py +131 -0
- flowtask/events/events/alerts/functions.py +158 -0
- flowtask/events/events/dummy.py +12 -0
- flowtask/events/events/exec.py +124 -0
- flowtask/events/events/file/__init__.py +7 -0
- flowtask/events/events/file/base.py +51 -0
- flowtask/events/events/file/copy.py +23 -0
- flowtask/events/events/file/delete.py +16 -0
- flowtask/events/events/interfaces/__init__.py +9 -0
- flowtask/events/events/interfaces/client.py +67 -0
- flowtask/events/events/interfaces/credentials.py +28 -0
- flowtask/events/events/interfaces/notifications.py +58 -0
- flowtask/events/events/jira.py +122 -0
- flowtask/events/events/log.py +26 -0
- flowtask/events/events/logerr.py +52 -0
- flowtask/events/events/notify.py +59 -0
- flowtask/events/events/notify_event.py +160 -0
- flowtask/events/events/publish.py +54 -0
- flowtask/events/events/sendfile.py +104 -0
- flowtask/events/events/task.py +97 -0
- flowtask/events/events/teams.py +98 -0
- flowtask/events/events/webhook.py +58 -0
- flowtask/events/manager.py +287 -0
- flowtask/exceptions.c +39393 -0
- flowtask/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/extensions/__init__.py +3 -0
- flowtask/extensions/abstract.py +82 -0
- flowtask/extensions/logging/__init__.py +65 -0
- flowtask/hooks/__init__.py +9 -0
- flowtask/hooks/actions/__init__.py +22 -0
- flowtask/hooks/actions/abstract.py +66 -0
- flowtask/hooks/actions/dummy.py +23 -0
- flowtask/hooks/actions/jira.py +74 -0
- flowtask/hooks/actions/rest.py +320 -0
- flowtask/hooks/actions/sampledata.py +37 -0
- flowtask/hooks/actions/sensor.py +23 -0
- flowtask/hooks/actions/task.py +9 -0
- flowtask/hooks/actions/ticket.py +37 -0
- flowtask/hooks/actions/zammad.py +55 -0
- flowtask/hooks/hook.py +62 -0
- flowtask/hooks/models.py +17 -0
- flowtask/hooks/service.py +187 -0
- flowtask/hooks/step.py +91 -0
- flowtask/hooks/types/__init__.py +23 -0
- flowtask/hooks/types/base.py +129 -0
- flowtask/hooks/types/brokers/__init__.py +11 -0
- flowtask/hooks/types/brokers/base.py +54 -0
- flowtask/hooks/types/brokers/mqtt.py +35 -0
- flowtask/hooks/types/brokers/rabbitmq.py +82 -0
- flowtask/hooks/types/brokers/redis.py +83 -0
- flowtask/hooks/types/brokers/sqs.py +44 -0
- flowtask/hooks/types/fs.py +232 -0
- flowtask/hooks/types/http.py +49 -0
- flowtask/hooks/types/imap.py +200 -0
- flowtask/hooks/types/jira.py +279 -0
- flowtask/hooks/types/mail.py +205 -0
- flowtask/hooks/types/postgres.py +98 -0
- flowtask/hooks/types/responses/__init__.py +8 -0
- flowtask/hooks/types/responses/base.py +5 -0
- flowtask/hooks/types/sharepoint.py +288 -0
- flowtask/hooks/types/ssh.py +141 -0
- flowtask/hooks/types/tagged.py +59 -0
- flowtask/hooks/types/upload.py +85 -0
- flowtask/hooks/types/watch.py +71 -0
- flowtask/hooks/types/web.py +36 -0
- flowtask/interfaces/AzureClient.py +137 -0
- flowtask/interfaces/AzureGraph.py +839 -0
- flowtask/interfaces/Boto3Client.py +326 -0
- flowtask/interfaces/DropboxClient.py +173 -0
- flowtask/interfaces/ExcelHandler.py +94 -0
- flowtask/interfaces/FTPClient.py +131 -0
- flowtask/interfaces/GoogleCalendar.py +201 -0
- flowtask/interfaces/GoogleClient.py +133 -0
- flowtask/interfaces/GoogleDrive.py +127 -0
- flowtask/interfaces/GoogleGCS.py +89 -0
- flowtask/interfaces/GoogleGeocoding.py +93 -0
- flowtask/interfaces/GoogleLang.py +114 -0
- flowtask/interfaces/GooglePub.py +61 -0
- flowtask/interfaces/GoogleSheet.py +68 -0
- flowtask/interfaces/IMAPClient.py +137 -0
- flowtask/interfaces/O365Calendar.py +113 -0
- flowtask/interfaces/O365Client.py +220 -0
- flowtask/interfaces/OneDrive.py +284 -0
- flowtask/interfaces/Outlook.py +155 -0
- flowtask/interfaces/ParrotBot.py +130 -0
- flowtask/interfaces/SSHClient.py +378 -0
- flowtask/interfaces/Sharepoint.py +496 -0
- flowtask/interfaces/__init__.py +36 -0
- flowtask/interfaces/azureauth.py +119 -0
- flowtask/interfaces/cache.py +201 -0
- flowtask/interfaces/client.py +82 -0
- flowtask/interfaces/compress.py +525 -0
- flowtask/interfaces/credentials.py +124 -0
- flowtask/interfaces/d2l.py +239 -0
- flowtask/interfaces/databases/__init__.py +5 -0
- flowtask/interfaces/databases/db.py +223 -0
- flowtask/interfaces/databases/documentdb.py +55 -0
- flowtask/interfaces/databases/rethink.py +39 -0
- flowtask/interfaces/dataframes/__init__.py +11 -0
- flowtask/interfaces/dataframes/abstract.py +21 -0
- flowtask/interfaces/dataframes/arrow.py +71 -0
- flowtask/interfaces/dataframes/dt.py +69 -0
- flowtask/interfaces/dataframes/pandas.py +167 -0
- flowtask/interfaces/dataframes/polars.py +60 -0
- flowtask/interfaces/db.py +263 -0
- flowtask/interfaces/env.py +46 -0
- flowtask/interfaces/func.py +137 -0
- flowtask/interfaces/http.py +1780 -0
- flowtask/interfaces/locale.py +40 -0
- flowtask/interfaces/log.py +75 -0
- flowtask/interfaces/mask.py +143 -0
- flowtask/interfaces/notification.py +154 -0
- flowtask/interfaces/playwright.py +339 -0
- flowtask/interfaces/powerpoint.py +368 -0
- flowtask/interfaces/py.typed +0 -0
- flowtask/interfaces/qs.py +376 -0
- flowtask/interfaces/result.py +87 -0
- flowtask/interfaces/selenium_service.py +779 -0
- flowtask/interfaces/smartsheet.py +154 -0
- flowtask/interfaces/stat.py +39 -0
- flowtask/interfaces/task.py +96 -0
- flowtask/interfaces/template.py +118 -0
- flowtask/interfaces/vectorstores/__init__.py +1 -0
- flowtask/interfaces/vectorstores/abstract.py +133 -0
- flowtask/interfaces/vectorstores/milvus.py +669 -0
- flowtask/interfaces/zammad.py +107 -0
- flowtask/models.py +193 -0
- flowtask/parsers/__init__.py +15 -0
- flowtask/parsers/_yaml.c +11978 -0
- flowtask/parsers/_yaml.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/argparser.py +235 -0
- flowtask/parsers/base.c +15155 -0
- flowtask/parsers/base.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/json.c +11968 -0
- flowtask/parsers/json.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/maps.py +49 -0
- flowtask/parsers/toml.c +11968 -0
- flowtask/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/plugins/__init__.py +16 -0
- flowtask/plugins/components/__init__.py +0 -0
- flowtask/plugins/handler/__init__.py +45 -0
- flowtask/plugins/importer.py +31 -0
- flowtask/plugins/sources/__init__.py +0 -0
- flowtask/runner.py +283 -0
- flowtask/scheduler/__init__.py +9 -0
- flowtask/scheduler/functions.py +493 -0
- flowtask/scheduler/handlers/__init__.py +8 -0
- flowtask/scheduler/handlers/manager.py +504 -0
- flowtask/scheduler/handlers/models.py +58 -0
- flowtask/scheduler/handlers/service.py +72 -0
- flowtask/scheduler/notifications.py +65 -0
- flowtask/scheduler/scheduler.py +993 -0
- flowtask/services/__init__.py +0 -0
- flowtask/services/bots/__init__.py +0 -0
- flowtask/services/bots/telegram.py +264 -0
- flowtask/services/files/__init__.py +11 -0
- flowtask/services/files/manager.py +522 -0
- flowtask/services/files/model.py +37 -0
- flowtask/services/files/service.py +767 -0
- flowtask/services/jira/__init__.py +3 -0
- flowtask/services/jira/jira_actions.py +191 -0
- flowtask/services/tasks/__init__.py +13 -0
- flowtask/services/tasks/launcher.py +213 -0
- flowtask/services/tasks/manager.py +323 -0
- flowtask/services/tasks/service.py +275 -0
- flowtask/services/tasks/task_manager.py +376 -0
- flowtask/services/tasks/tasks.py +155 -0
- flowtask/storages/__init__.py +16 -0
- flowtask/storages/exceptions.py +12 -0
- flowtask/storages/files/__init__.py +8 -0
- flowtask/storages/files/abstract.py +29 -0
- flowtask/storages/files/filesystem.py +66 -0
- flowtask/storages/tasks/__init__.py +19 -0
- flowtask/storages/tasks/abstract.py +26 -0
- flowtask/storages/tasks/database.py +33 -0
- flowtask/storages/tasks/filesystem.py +108 -0
- flowtask/storages/tasks/github.py +119 -0
- flowtask/storages/tasks/memory.py +45 -0
- flowtask/storages/tasks/row.py +25 -0
- flowtask/tasks/__init__.py +0 -0
- flowtask/tasks/abstract.py +526 -0
- flowtask/tasks/command.py +118 -0
- flowtask/tasks/pile.py +486 -0
- flowtask/tasks/py.typed +0 -0
- flowtask/tasks/task.py +778 -0
- flowtask/template/__init__.py +161 -0
- flowtask/tests.py +257 -0
- flowtask/types/__init__.py +8 -0
- flowtask/types/typedefs.c +11347 -0
- flowtask/types/typedefs.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/utils/__init__.py +24 -0
- flowtask/utils/constants.py +117 -0
- flowtask/utils/encoders.py +21 -0
- flowtask/utils/executor.py +112 -0
- flowtask/utils/functions.cpp +14280 -0
- flowtask/utils/functions.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/utils/json.cpp +13349 -0
- flowtask/utils/json.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/utils/mail.py +63 -0
- flowtask/utils/parseqs.c +13324 -0
- flowtask/utils/parserqs.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/utils/stats.py +308 -0
- flowtask/utils/transformations.py +74 -0
- flowtask/utils/uv.py +12 -0
- flowtask/utils/validators.py +97 -0
- flowtask/version.py +11 -0
- flowtask-5.8.4.dist-info/LICENSE +201 -0
- flowtask-5.8.4.dist-info/METADATA +209 -0
- flowtask-5.8.4.dist-info/RECORD +470 -0
- flowtask-5.8.4.dist-info/WHEEL +6 -0
- flowtask-5.8.4.dist-info/entry_points.txt +3 -0
- flowtask-5.8.4.dist-info/top_level.txt +2 -0
- plugins/components/CreateQR.py +39 -0
- plugins/components/TestComponent.py +28 -0
- plugins/components/Use1.py +13 -0
- plugins/components/Workplace.py +117 -0
- plugins/components/__init__.py +3 -0
- plugins/sources/__init__.py +0 -0
- plugins/sources/get_populartimes.py +78 -0
- plugins/sources/google.py +150 -0
- plugins/sources/hubspot.py +679 -0
- plugins/sources/icims.py +679 -0
- plugins/sources/mobileinsight.py +501 -0
- plugins/sources/newrelic.py +262 -0
- plugins/sources/uap.py +268 -0
- plugins/sources/venu.py +244 -0
- plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,189 @@
|
|
1
|
+
|
2
|
+
import asyncio
|
3
|
+
from typing import List, Dict, Union
|
4
|
+
from collections.abc import Callable
|
5
|
+
import rethinkdb as r
|
6
|
+
from ..exceptions import ComponentError, DataNotFound, ConfigError
|
7
|
+
from .flow import FlowComponent
|
8
|
+
from ..interfaces.dataframes import PandasDataframe
|
9
|
+
from ..interfaces import TemplateSupport
|
10
|
+
from ..interfaces.databases.rethink import RethinkDBSupport
|
11
|
+
|
12
|
+
|
13
|
+
class RethinkDBQuery(
|
14
|
+
RethinkDBSupport,
|
15
|
+
FlowComponent,
|
16
|
+
PandasDataframe,
|
17
|
+
TemplateSupport,
|
18
|
+
):
|
19
|
+
"""
|
20
|
+
RethinkDBQuery.
|
21
|
+
|
22
|
+
Class to execute queries against a RethinkDB database and retrieve results.
|
23
|
+
using asyncDB as backend.
|
24
|
+
|
25
|
+
RethinkDB Query can support queries by mapping RethinkDB methods as attributes.
|
26
|
+
Methods as "table", "filter", "order_by", "limit", "pluck" are supported.
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
Example:
|
31
|
+
|
32
|
+
```yaml
|
33
|
+
RethinkDBQuery:
|
34
|
+
table: stores_reviews
|
35
|
+
schema: epson
|
36
|
+
filter:
|
37
|
+
- rating:
|
38
|
+
gt: 4
|
39
|
+
- rating:
|
40
|
+
lt: 6
|
41
|
+
order_by:
|
42
|
+
- rating: desc
|
43
|
+
limit: 50
|
44
|
+
columns:
|
45
|
+
- store_id
|
46
|
+
- store_name
|
47
|
+
- formatted_address
|
48
|
+
- latitude
|
49
|
+
- longitude
|
50
|
+
- reviews
|
51
|
+
- rating
|
52
|
+
- user_ratings_total
|
53
|
+
as_dataframe: true
|
54
|
+
```
|
55
|
+
|
56
|
+
"""
|
57
|
+
def __init__(
|
58
|
+
self,
|
59
|
+
loop: asyncio.AbstractEventLoop = None,
|
60
|
+
job: Callable = None,
|
61
|
+
stat: Callable = None,
|
62
|
+
**kwargs,
|
63
|
+
) -> None:
|
64
|
+
"""Init Method."""
|
65
|
+
self.table = kwargs.get('table', None)
|
66
|
+
self.schema = kwargs.get('schema', None)
|
67
|
+
super().__init__(loop=loop, job=job, stat=stat, **kwargs)
|
68
|
+
self._db = None
|
69
|
+
|
70
|
+
async def close(self):
|
71
|
+
"""Close the connection to the RethinkDB database."""
|
72
|
+
if self._db:
|
73
|
+
try:
|
74
|
+
await self._db.close()
|
75
|
+
except Exception:
|
76
|
+
pass
|
77
|
+
self._db = None
|
78
|
+
|
79
|
+
async def start(self, **kwargs):
|
80
|
+
await super().start(**kwargs)
|
81
|
+
if not hasattr(self, 'table'):
|
82
|
+
raise ConfigError(
|
83
|
+
"'table' attribute is required in the RethinkDBQuery."
|
84
|
+
)
|
85
|
+
if not hasattr(self, 'schema'):
|
86
|
+
raise ConfigError(
|
87
|
+
"'schema' attribute is required in the RethinkDBQuery."
|
88
|
+
)
|
89
|
+
# Replacing with Masking if needed.
|
90
|
+
self.schema = self.mask_replacement(self.schema)
|
91
|
+
self.table = self.mask_replacement(self.table)
|
92
|
+
if hasattr(self, 'columns'):
|
93
|
+
# used as "pluck"
|
94
|
+
self.pluck = self.columns
|
95
|
+
return True
|
96
|
+
|
97
|
+
def _filter_criteria(self, engine, cursor):
|
98
|
+
result = engine.expr(True)
|
99
|
+
for args in self.filter:
|
100
|
+
field, inner_args = next(iter(args.items()))
|
101
|
+
func, value = next(iter(inner_args.items()))
|
102
|
+
if func == 'in':
|
103
|
+
cursor = cursor.filter(
|
104
|
+
(
|
105
|
+
lambda exp: engine.expr(value)
|
106
|
+
.coerce_to("array")
|
107
|
+
.contains(exp[field])
|
108
|
+
)
|
109
|
+
)
|
110
|
+
elif func == 'gt':
|
111
|
+
result = result.and_(
|
112
|
+
engine.row[field].gt(value)
|
113
|
+
)
|
114
|
+
elif func == 'eq':
|
115
|
+
result = result.and_(
|
116
|
+
engine.row[field].eq(value)
|
117
|
+
)
|
118
|
+
elif func == 'lt':
|
119
|
+
result = result.and_(
|
120
|
+
engine.row[field].lt(value)
|
121
|
+
)
|
122
|
+
elif func == 'ge':
|
123
|
+
result = result.and_(
|
124
|
+
engine.row[field].ge(value)
|
125
|
+
)
|
126
|
+
elif func == 'le':
|
127
|
+
result = result.and_(
|
128
|
+
engine.row[field].le(value)
|
129
|
+
)
|
130
|
+
cursor = cursor.filter(result)
|
131
|
+
return cursor
|
132
|
+
|
133
|
+
def _order_by(self, engine, cursor):
|
134
|
+
order_clauses = []
|
135
|
+
for order in self.order_by:
|
136
|
+
field, direction = next(iter(order.items()))
|
137
|
+
if direction.lower() == 'asc':
|
138
|
+
order_clauses.append(engine.asc(field))
|
139
|
+
elif direction.lower() == 'desc':
|
140
|
+
order_clauses.append(engine.desc(field))
|
141
|
+
else:
|
142
|
+
raise ComponentError(
|
143
|
+
f"Invalid order direction: {direction}"
|
144
|
+
)
|
145
|
+
if order_clauses:
|
146
|
+
cursor = cursor.order_by(*order_clauses)
|
147
|
+
return cursor
|
148
|
+
|
149
|
+
async def run(self):
|
150
|
+
"""Execute the RethinkDB query and retrieve the results."""
|
151
|
+
if not self._db:
|
152
|
+
# TODO: add support for datasources.
|
153
|
+
self._db = self.default_connection()
|
154
|
+
try:
|
155
|
+
async with await self._db.connection() as conn:
|
156
|
+
# Change to default database:
|
157
|
+
engine = conn.engine()
|
158
|
+
# changing to active database
|
159
|
+
cursor = engine.db(self.schema).table(self.table)
|
160
|
+
if hasattr(self, 'filter'):
|
161
|
+
# Build a Filter functionality:
|
162
|
+
cursor = self._filter_criteria(engine, cursor)
|
163
|
+
if hasattr(self, 'order_by'):
|
164
|
+
cursor = self._order_by(engine, cursor)
|
165
|
+
# cursor = cursor.order_by(self.order_by)
|
166
|
+
pass
|
167
|
+
if hasattr(self, 'limit'):
|
168
|
+
cursor = cursor.limit(self.limit)
|
169
|
+
if hasattr(self, 'pluck'):
|
170
|
+
cursor = cursor.pluck(self.pluck)
|
171
|
+
data = []
|
172
|
+
print('CURSOR > ', cursor)
|
173
|
+
cursor = await cursor.run(conn.get_connection())
|
174
|
+
if isinstance(cursor, list):
|
175
|
+
data = cursor
|
176
|
+
else:
|
177
|
+
while await cursor.fetch_next():
|
178
|
+
item = await cursor.next()
|
179
|
+
data.append(item)
|
180
|
+
# Check if return as Dataframe:
|
181
|
+
if self.as_dataframe is True:
|
182
|
+
self._result = await self.create_dataframe(data)
|
183
|
+
else:
|
184
|
+
self._result = data
|
185
|
+
return self._result
|
186
|
+
except Exception as e:
|
187
|
+
raise ComponentError(
|
188
|
+
f"Error executing RethinkDB query: {e}"
|
189
|
+
) from e
|
@@ -0,0 +1,74 @@
|
|
1
|
+
from collections.abc import Callable
|
2
|
+
import asyncio
|
3
|
+
import asyncssh
|
4
|
+
from ..exceptions import ComponentError
|
5
|
+
from .RunSSH import RunSSH
|
6
|
+
|
7
|
+
|
8
|
+
class Rsync(RunSSH):
|
9
|
+
def __init__(
|
10
|
+
self,
|
11
|
+
loop: asyncio.AbstractEventLoop = None,
|
12
|
+
job: Callable = None,
|
13
|
+
stat: Callable = None,
|
14
|
+
**kwargs,
|
15
|
+
):
|
16
|
+
self.flags: str = "azrv"
|
17
|
+
super(Rsync, self).__init__(loop=loop, job=job, stat=stat, **kwargs)
|
18
|
+
|
19
|
+
async def start(self, **kwargs):
|
20
|
+
super(Rsync, self).start(**kwargs)
|
21
|
+
if hasattr(self, "source"): # using the destination filosophy
|
22
|
+
try:
|
23
|
+
if hasattr(self, "masks"):
|
24
|
+
self.source_dir = self.mask_replacement(self.source["directory"])
|
25
|
+
else:
|
26
|
+
self.source_dir = self.source["directory"]
|
27
|
+
except KeyError as exc:
|
28
|
+
raise ComponentError(
|
29
|
+
"Rsync Error: you must specify a source directory"
|
30
|
+
) from exc
|
31
|
+
if hasattr(self, "destination"):
|
32
|
+
if hasattr(self, "masks"):
|
33
|
+
self.destination_dir = self.mask_replacement(
|
34
|
+
self.destination["directory"]
|
35
|
+
)
|
36
|
+
else:
|
37
|
+
self.destination_dir = self.destination["directory"]
|
38
|
+
# also, calculate the destination server:
|
39
|
+
self.dest_server = self.mask_replacement(self.destination["server"])
|
40
|
+
self.dest_user = self.mask_replacement(self.destination["user"])
|
41
|
+
try:
|
42
|
+
self.dest_port = self.mask_replacement(self.destination["port"])
|
43
|
+
except KeyError:
|
44
|
+
self.dest_port = None
|
45
|
+
return True
|
46
|
+
|
47
|
+
async def run(self):
|
48
|
+
await self.open(
|
49
|
+
host=self.host,
|
50
|
+
port=self.port,
|
51
|
+
tunnel=self.tunnel,
|
52
|
+
credentials=self.credentials,
|
53
|
+
)
|
54
|
+
rsync = "rsync -{flags} {source} {destination}"
|
55
|
+
if self.dest_port is not None:
|
56
|
+
rsync = rsync + f" --port={self.dest_port}"
|
57
|
+
destination = f"{self.dest_user}@{self.dest_server}:{self.destination_dir}"
|
58
|
+
command = rsync.format(
|
59
|
+
flags=self.flags, destination=destination, source=self.source_dir
|
60
|
+
)
|
61
|
+
try:
|
62
|
+
rst = await self._connection.run(command, check=True)
|
63
|
+
result = {
|
64
|
+
"exit_status": rst.exit_status,
|
65
|
+
"returncode": rst.returncode,
|
66
|
+
"error": rst.stderr,
|
67
|
+
# "stdout": rst.stdout
|
68
|
+
}
|
69
|
+
except asyncssh.process.ProcessError as err:
|
70
|
+
self._logger.error(f"Error executing command: {err}")
|
71
|
+
self.add_metric("SSH: COMMAND", command)
|
72
|
+
self.add_metric("SSH: RESULT", result)
|
73
|
+
self._result = result
|
74
|
+
return result
|
@@ -0,0 +1,59 @@
|
|
1
|
+
import asyncio
|
2
|
+
from collections.abc import Callable
|
3
|
+
import logging
|
4
|
+
import asyncssh
|
5
|
+
from .flow import FlowComponent
|
6
|
+
from ..interfaces.SSHClient import SSHClient
|
7
|
+
|
8
|
+
|
9
|
+
class RunSSH(SSHClient, FlowComponent):
|
10
|
+
"""
|
11
|
+
RunSSH.
|
12
|
+
|
13
|
+
Run any arbitrary command into an SSH server.
|
14
|
+
"""
|
15
|
+
|
16
|
+
def __init__(
|
17
|
+
self,
|
18
|
+
loop: asyncio.AbstractEventLoop = None,
|
19
|
+
job: Callable = None,
|
20
|
+
stat: Callable = None,
|
21
|
+
**kwargs,
|
22
|
+
):
|
23
|
+
super().__init__(loop=loop, job=job, stat=stat, **kwargs)
|
24
|
+
|
25
|
+
async def start(self, **kwargs):
|
26
|
+
"""Start.
|
27
|
+
|
28
|
+
Processing variables and credentials.
|
29
|
+
"""
|
30
|
+
try:
|
31
|
+
self.define_host()
|
32
|
+
self.processing_credentials()
|
33
|
+
except Exception as err:
|
34
|
+
logging.error(err)
|
35
|
+
raise
|
36
|
+
|
37
|
+
async def run(self):
|
38
|
+
result = {}
|
39
|
+
await self.open(
|
40
|
+
host=self.host,
|
41
|
+
port=self.port,
|
42
|
+
tunnel=self.tunnel,
|
43
|
+
credentials=self.credentials,
|
44
|
+
)
|
45
|
+
for command in self.commands:
|
46
|
+
command = self.mask_replacement(command)
|
47
|
+
try:
|
48
|
+
rst = await self._connection.run(command, check=True)
|
49
|
+
result[command] = {
|
50
|
+
"exit_status": rst.exit_status,
|
51
|
+
"returncode": rst.returncode,
|
52
|
+
"error": rst.stderr,
|
53
|
+
# "stdout": rst.stdout
|
54
|
+
}
|
55
|
+
except asyncssh.process.ProcessError as err:
|
56
|
+
logging.error(f"Error executing command: {err}")
|
57
|
+
self.add_metric("SSH: COMMAND", result)
|
58
|
+
self._result = result
|
59
|
+
return result
|
@@ -0,0 +1,71 @@
|
|
1
|
+
import subprocess
|
2
|
+
import asyncio
|
3
|
+
from typing import List
|
4
|
+
from collections.abc import Callable
|
5
|
+
from navconfig.logging import logging
|
6
|
+
from .flow import FlowComponent
|
7
|
+
|
8
|
+
|
9
|
+
class RunShell(FlowComponent):
|
10
|
+
"""
|
11
|
+
RunShell.
|
12
|
+
|
13
|
+
|
14
|
+
Overview
|
15
|
+
|
16
|
+
Execute a Command to run a task
|
17
|
+
|
18
|
+
.. table:: Properties
|
19
|
+
:widths: auto
|
20
|
+
|
21
|
+
|
22
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
23
|
+
| Name | Required | Summary |
|
24
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
25
|
+
| name | Yes | Name of task |
|
26
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
27
|
+
| description | Yes | Task description |
|
28
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
29
|
+
| steps | Yes | Not assigned steps |
|
30
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
31
|
+
| runtask | Yes | This method runs the task |
|
32
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
33
|
+
| program | Yes | Program name |
|
34
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
35
|
+
| task | Yes | Assign the run shell attribute |
|
36
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
37
|
+
|
38
|
+
Return the list of arbitrary days
|
39
|
+
"""
|
40
|
+
|
41
|
+
def __init__(
|
42
|
+
self,
|
43
|
+
loop: asyncio.AbstractEventLoop = None,
|
44
|
+
job: Callable = None,
|
45
|
+
stat: Callable = None,
|
46
|
+
**kwargs,
|
47
|
+
):
|
48
|
+
"""Init Method."""
|
49
|
+
self.commands: List = []
|
50
|
+
super(RunShell, self).__init__(loop=loop, job=job, stat=stat, **kwargs)
|
51
|
+
|
52
|
+
async def start(self, **kwargs):
|
53
|
+
return True
|
54
|
+
|
55
|
+
async def close(self):
|
56
|
+
pass
|
57
|
+
|
58
|
+
async def run(self):
|
59
|
+
for cmd in self.commands:
|
60
|
+
if hasattr(self, "masks"):
|
61
|
+
cmd = self.mask_replacement(cmd)
|
62
|
+
logging.debug(">", cmd)
|
63
|
+
try:
|
64
|
+
result = subprocess.Popen(
|
65
|
+
cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
66
|
+
).communicate()
|
67
|
+
logging.debug(result)
|
68
|
+
return True
|
69
|
+
except subprocess.CalledProcessError as e:
|
70
|
+
print(f"Error in command: {e}")
|
71
|
+
return False
|
@@ -0,0 +1,20 @@
|
|
1
|
+
from navconfig.logging import logging
|
2
|
+
from querysource.exceptions import DataNotFound as QSNotFound
|
3
|
+
from ..exceptions import ComponentError, DataNotFound
|
4
|
+
from .QSBase import QSBase
|
5
|
+
|
6
|
+
|
7
|
+
class SalesForce(QSBase):
|
8
|
+
"""SalesForce Connector."""
|
9
|
+
|
10
|
+
type = "report"
|
11
|
+
_driver = "salesforce"
|
12
|
+
|
13
|
+
async def report(self):
|
14
|
+
try:
|
15
|
+
return await self._qs.report()
|
16
|
+
except QSNotFound as err:
|
17
|
+
raise DataNotFound(f"SalesForce: Report Not Found: {err}") from err
|
18
|
+
except Exception as err:
|
19
|
+
logging.exception(err)
|
20
|
+
raise ComponentError(f"SalesForce ERROR: {err!s}") from err
|
@@ -0,0 +1,257 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
from collections.abc import Callable
|
3
|
+
import re
|
4
|
+
import asyncio
|
5
|
+
import asyncpg
|
6
|
+
from PIL.TiffImagePlugin import IFDRational
|
7
|
+
from pgvector.asyncpg import register_vector
|
8
|
+
from querysource.types.validators import Entity
|
9
|
+
from navigator.libs.json import JSONContent
|
10
|
+
from ..flow import FlowComponent
|
11
|
+
from ...exceptions import ConfigError, ComponentError
|
12
|
+
from ...conf import default_dsn
|
13
|
+
|
14
|
+
IDENT_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
|
15
|
+
|
16
|
+
def qid(name: str) -> str:
|
17
|
+
"""
|
18
|
+
Very small helper to quote SQL identifiers safely.
|
19
|
+
Raises if name contains anything but letters, digits or '_'.
|
20
|
+
"""
|
21
|
+
if not IDENT_RE.match(name):
|
22
|
+
raise ValueError(
|
23
|
+
f"illegal identifier: {name!r}"
|
24
|
+
)
|
25
|
+
return '"' + name + '"'
|
26
|
+
|
27
|
+
class SaveImageBank(FlowComponent):
|
28
|
+
"""
|
29
|
+
SaveImageBank.
|
30
|
+
|
31
|
+
Save images into a postgreSQL Table, with UPSERT and optional evaluation for duplicates.
|
32
|
+
"""
|
33
|
+
def __init__(
|
34
|
+
self,
|
35
|
+
loop: Optional[asyncio.AbstractEventLoop] = None,
|
36
|
+
job: Callable | None = None,
|
37
|
+
stat: Callable | None = None,
|
38
|
+
**kwargs,
|
39
|
+
):
|
40
|
+
self.id_column: str = kwargs.get("id_column", "photo_id")
|
41
|
+
self.hash_column: str = kwargs.get("hash_column", "image_hash")
|
42
|
+
self.vector_column: str = kwargs.get("vector_column", "image_vector")
|
43
|
+
self.detections_column: str = kwargs.get("detections_column", "image_features")
|
44
|
+
self.hamming_threshold: int = kwargs.get("hamming_threshold", 4)
|
45
|
+
self.vector_threshold: float = kwargs.get("vector_threshold", 0.05)
|
46
|
+
self.tablename: str = kwargs.get("tablename", "image_bank")
|
47
|
+
self.schema: str = kwargs.get("schema", "public")
|
48
|
+
self.pool: asyncpg.Pool | None = None
|
49
|
+
self._semaphore = asyncio.Semaphore(16) # limit GPU tasks
|
50
|
+
self.drop_columns: list[str] = kwargs.get("drop_columns", [])
|
51
|
+
super().__init__(loop=loop, job=job, stat=stat, **kwargs)
|
52
|
+
# JSON encoder:
|
53
|
+
self._encoder = JSONContent()
|
54
|
+
|
55
|
+
def _qualified_tablename(self) -> str:
|
56
|
+
"""
|
57
|
+
Get the qualified table name.
|
58
|
+
"""
|
59
|
+
if not self.schema:
|
60
|
+
raise ConfigError("Schema is not set.")
|
61
|
+
if not self.tablename:
|
62
|
+
raise ConfigError("Table name is not set.")
|
63
|
+
return f"{qid(self.schema)}.{qid(self.tablename)}"
|
64
|
+
|
65
|
+
def _build_insert_sql(self, columns: list[str]) -> str:
|
66
|
+
"""
|
67
|
+
Produces something like:
|
68
|
+
|
69
|
+
INSERT INTO schema.table (col1,col2,…) VALUES ($1,$2,…)
|
70
|
+
ON CONFLICT (photo_id) DO UPDATE SET
|
71
|
+
col1 = EXCLUDED.col1,
|
72
|
+
...
|
73
|
+
"""
|
74
|
+
col_list = ", ".join(map(qid, columns))
|
75
|
+
placeholders = ", ".join(f"${i}" for i in range(1, len(columns) + 1))
|
76
|
+
updates = ", ".join(f"{qid(c)} = EXCLUDED.{qid(c)}" for c in columns
|
77
|
+
if c != self.id_column)
|
78
|
+
|
79
|
+
return (
|
80
|
+
f"INSERT INTO {self._qualified_tablename()} ({col_list}) "
|
81
|
+
f"VALUES ({placeholders}) "
|
82
|
+
f"ON CONFLICT ({qid(self.id_column)}) "
|
83
|
+
f"DO UPDATE SET {updates};"
|
84
|
+
)
|
85
|
+
|
86
|
+
def _build_phash_sql(self) -> str:
|
87
|
+
return (
|
88
|
+
f"SELECT 1 FROM {self._qualified_tablename()} "
|
89
|
+
f"WHERE {qid(self.id_column)} IS DISTINCT FROM $3 "
|
90
|
+
f"AND bit_count(('x' || $1)::bit(256) # "
|
91
|
+
f" ('x' || {qid(self.hash_column)})::bit(256)) "
|
92
|
+
f" <= $2 "
|
93
|
+
f"LIMIT 1;"
|
94
|
+
)
|
95
|
+
|
96
|
+
def _build_vector_sql(self) -> str:
|
97
|
+
return (
|
98
|
+
f"SELECT 1 FROM {self._qualified_tablename()} "
|
99
|
+
f"WHERE {qid(self.id_column)} IS DISTINCT FROM $3 "
|
100
|
+
f"AND {qid(self.vector_column)} <#> $1::vector < $2 "
|
101
|
+
f"LIMIT 1;"
|
102
|
+
)
|
103
|
+
|
104
|
+
async def pgvector_init(self, conn):
|
105
|
+
"""
|
106
|
+
Initialize pgvector extension in PostgreSQL.
|
107
|
+
"""
|
108
|
+
# Setup jsonb encoder/decoder
|
109
|
+
def _encoder(value):
|
110
|
+
# return json.dumps(value, cls=BaseEncoder)
|
111
|
+
return self._encoder.dumps(value) # pylint: disable=E1120
|
112
|
+
|
113
|
+
def _decoder(value):
|
114
|
+
return self._encoder.loads(value) # pylint: disable=E1120
|
115
|
+
|
116
|
+
await conn.set_type_codec(
|
117
|
+
"json",
|
118
|
+
encoder=_encoder,
|
119
|
+
decoder=_decoder,
|
120
|
+
schema="pg_catalog"
|
121
|
+
)
|
122
|
+
await conn.set_type_codec(
|
123
|
+
"jsonb",
|
124
|
+
encoder=_encoder,
|
125
|
+
decoder=_decoder,
|
126
|
+
schema="pg_catalog"
|
127
|
+
)
|
128
|
+
|
129
|
+
await register_vector(conn)
|
130
|
+
|
131
|
+
# ──────────────────────────────────────────────────────────────
|
132
|
+
# Setup / teardown
|
133
|
+
# ──────────────────────────────────────────────────────────────
|
134
|
+
async def start(self, **kwargs):
|
135
|
+
if self.previous:
|
136
|
+
self.data = self.input
|
137
|
+
|
138
|
+
# column checks
|
139
|
+
for col in (self.id_column, self.hash_column,
|
140
|
+
self.vector_column, self.detections_column):
|
141
|
+
if col not in self.data.columns:
|
142
|
+
raise ConfigError(
|
143
|
+
f"Column '{col}' missing from DataFrame"
|
144
|
+
)
|
145
|
+
self.pool = await asyncpg.create_pool(
|
146
|
+
dsn=default_dsn,
|
147
|
+
min_size=1,
|
148
|
+
max_size=4,
|
149
|
+
max_queries=100,
|
150
|
+
init=self.pgvector_init,
|
151
|
+
timeout=10,
|
152
|
+
)
|
153
|
+
# Check if the table exists
|
154
|
+
if not self.pool:
|
155
|
+
raise ConfigError(
|
156
|
+
"Database connection pool is not initialized."
|
157
|
+
)
|
158
|
+
async with self.pool.acquire() as conn:
|
159
|
+
try:
|
160
|
+
await conn.execute(
|
161
|
+
f"SELECT 1 FROM {self.schema}.{self.tablename} LIMIT 1"
|
162
|
+
)
|
163
|
+
except asyncpg.exceptions.UndefinedTableError:
|
164
|
+
raise ConfigError(
|
165
|
+
f"Table {self.schema}.{self.tablename} does not exist."
|
166
|
+
)
|
167
|
+
except asyncpg.exceptions.UndefinedSchemaError:
|
168
|
+
raise ConfigError(
|
169
|
+
f"Schema {self.schema} does not exist."
|
170
|
+
)
|
171
|
+
if "duplicated" not in self.data.columns:
|
172
|
+
self.data["duplicated"] = False
|
173
|
+
# prepare SQL strings
|
174
|
+
self._sql_phash = self._build_phash_sql()
|
175
|
+
self._sql_vector = self._build_vector_sql()
|
176
|
+
|
177
|
+
async def close(self):
|
178
|
+
if self.pool:
|
179
|
+
await self.pool.close()
|
180
|
+
|
181
|
+
# --------------- duplicate test --------------------
|
182
|
+
async def _is_duplicated(self, conn, phash: str, vec: list[float], current_id: int) -> bool:
|
183
|
+
"""
|
184
|
+
Check if the given hash and vector are duplicated in the database.
|
185
|
+
:param conn: Database connection.
|
186
|
+
:param phash: Perceptual hash of the image.
|
187
|
+
:param vec: Vector representation of the image.
|
188
|
+
:return: True if the image is duplicated, False otherwise.
|
189
|
+
"""
|
190
|
+
# phash first
|
191
|
+
phash_dup = False
|
192
|
+
vector_dup = False
|
193
|
+
if phash:
|
194
|
+
if await conn.fetchval(self._sql_phash, phash, self.hamming_threshold, current_id):
|
195
|
+
phash_dup = True
|
196
|
+
# vector second
|
197
|
+
vector_dup = bool(
|
198
|
+
await conn.fetchval(self._sql_vector, vec, self.vector_threshold, current_id)
|
199
|
+
)
|
200
|
+
# return True if both are duplicated
|
201
|
+
return phash_dup and vector_dup
|
202
|
+
|
203
|
+
async def _upsert_row(self, conn, row) -> bool:
|
204
|
+
"""
|
205
|
+
UPSERT a single row into the database.
|
206
|
+
:param conn: Database connection.
|
207
|
+
:param row: Row data to be inserted/updated.
|
208
|
+
:return: True if the row was duplicated, False otherwise.
|
209
|
+
"""
|
210
|
+
# --------------- UPSERT one row --------------------
|
211
|
+
phash = row[self.hash_column]
|
212
|
+
vec = row[self.vector_column]
|
213
|
+
dup = await self._is_duplicated(
|
214
|
+
conn,
|
215
|
+
phash,
|
216
|
+
vec,
|
217
|
+
current_id=row[self.id_column]
|
218
|
+
)
|
219
|
+
|
220
|
+
# Add/overwrite duplicated flag in the in‑memory DF row
|
221
|
+
row["duplicated"] = dup
|
222
|
+
|
223
|
+
# Build VALUES array in the same order as self.data.columns
|
224
|
+
values = [row[col] for col in self.data.columns]
|
225
|
+
# asyncpg needs list/tuple for pgvector, ensure np → list
|
226
|
+
idx_vec = self.data.columns.get_loc(self.vector_column)
|
227
|
+
values[idx_vec] = list(values[idx_vec])
|
228
|
+
|
229
|
+
await conn.execute(self._sql_insert, *values)
|
230
|
+
|
231
|
+
async def run(self):
|
232
|
+
"""
|
233
|
+
Run the task.
|
234
|
+
"""
|
235
|
+
if self.pool is None:
|
236
|
+
raise ConfigError("Database connection pool is not initialized.")
|
237
|
+
if self.drop_columns:
|
238
|
+
# drop columns from dataframe:
|
239
|
+
self.data.drop(
|
240
|
+
columns=self.drop_columns,
|
241
|
+
axis=1,
|
242
|
+
inplace=True,
|
243
|
+
)
|
244
|
+
#
|
245
|
+
self._sql_insert = self._build_insert_sql(list(self.data.columns))
|
246
|
+
|
247
|
+
# check for duplicates
|
248
|
+
async def handle(idx):
|
249
|
+
async with self._semaphore, self.pool.acquire() as conn:
|
250
|
+
row = self.data.loc[idx].to_dict()
|
251
|
+
await self._upsert_row(conn, row)
|
252
|
+
# write duplicated flag back into DF
|
253
|
+
self.data.at[idx, "duplicated"] = row["duplicated"]
|
254
|
+
await asyncio.gather(*(handle(i) for i in self.data.index))
|
255
|
+
|
256
|
+
self._result = self.data
|
257
|
+
return self._result
|