flowtask 5.8.4__cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowtask/__init__.py +93 -0
- flowtask/__main__.py +38 -0
- flowtask/bots/__init__.py +6 -0
- flowtask/bots/check.py +93 -0
- flowtask/bots/codebot.py +51 -0
- flowtask/components/ASPX.py +148 -0
- flowtask/components/AddDataset.py +352 -0
- flowtask/components/Amazon.py +523 -0
- flowtask/components/AutoTask.py +314 -0
- flowtask/components/Azure.py +80 -0
- flowtask/components/AzureUsers.py +106 -0
- flowtask/components/BaseAction.py +91 -0
- flowtask/components/BaseLoop.py +198 -0
- flowtask/components/BestBuy.py +800 -0
- flowtask/components/CSVToGCS.py +120 -0
- flowtask/components/CompanyScraper/__init__.py +1 -0
- flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
- flowtask/components/CompanyScraper/parsers/base.py +102 -0
- flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
- flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
- flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
- flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
- flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
- flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
- flowtask/components/CompanyScraper/scrapper.py +1054 -0
- flowtask/components/CopyTo.py +177 -0
- flowtask/components/CopyToBigQuery.py +243 -0
- flowtask/components/CopyToMongoDB.py +291 -0
- flowtask/components/CopyToPg.py +609 -0
- flowtask/components/CopyToRethink.py +207 -0
- flowtask/components/CreateGCSBucket.py +102 -0
- flowtask/components/CreateReport/CreateReport.py +228 -0
- flowtask/components/CreateReport/__init__.py +9 -0
- flowtask/components/CreateReport/charts/__init__.py +15 -0
- flowtask/components/CreateReport/charts/bar.py +51 -0
- flowtask/components/CreateReport/charts/base.py +66 -0
- flowtask/components/CreateReport/charts/pie.py +64 -0
- flowtask/components/CreateReport/utils.py +9 -0
- flowtask/components/CustomerSatisfaction.py +196 -0
- flowtask/components/DataInput.py +200 -0
- flowtask/components/DateList.py +255 -0
- flowtask/components/DbClient.py +163 -0
- flowtask/components/DialPad.py +146 -0
- flowtask/components/DocumentDBQuery.py +200 -0
- flowtask/components/DownloadFrom.py +371 -0
- flowtask/components/DownloadFromD2L.py +113 -0
- flowtask/components/DownloadFromFTP.py +181 -0
- flowtask/components/DownloadFromIMAP.py +315 -0
- flowtask/components/DownloadFromS3.py +198 -0
- flowtask/components/DownloadFromSFTP.py +265 -0
- flowtask/components/DownloadFromSharepoint.py +110 -0
- flowtask/components/DownloadFromSmartSheet.py +114 -0
- flowtask/components/DownloadS3File.py +229 -0
- flowtask/components/Dummy.py +59 -0
- flowtask/components/DuplicatePhoto.py +411 -0
- flowtask/components/EmployeeEvaluation.py +237 -0
- flowtask/components/ExecuteSQL.py +323 -0
- flowtask/components/ExtractHTML.py +178 -0
- flowtask/components/FileBase.py +178 -0
- flowtask/components/FileCopy.py +181 -0
- flowtask/components/FileDelete.py +82 -0
- flowtask/components/FileExists.py +146 -0
- flowtask/components/FileIteratorDelete.py +112 -0
- flowtask/components/FileList.py +194 -0
- flowtask/components/FileOpen.py +75 -0
- flowtask/components/FileRead.py +120 -0
- flowtask/components/FileRename.py +106 -0
- flowtask/components/FilterIf.py +284 -0
- flowtask/components/FilterRows/FilterRows.py +200 -0
- flowtask/components/FilterRows/__init__.py +10 -0
- flowtask/components/FilterRows/functions.py +4 -0
- flowtask/components/GCSToBigQuery.py +103 -0
- flowtask/components/GoogleA4.py +150 -0
- flowtask/components/GoogleGeoCoding.py +344 -0
- flowtask/components/GooglePlaces.py +315 -0
- flowtask/components/GoogleSearch.py +539 -0
- flowtask/components/HTTPClient.py +268 -0
- flowtask/components/ICIMS.py +146 -0
- flowtask/components/IF.py +179 -0
- flowtask/components/IcimsFolderCopy.py +173 -0
- flowtask/components/ImageFeatures/__init__.py +5 -0
- flowtask/components/ImageFeatures/process.py +233 -0
- flowtask/components/IteratorBase.py +251 -0
- flowtask/components/LangchainLoader/__init__.py +5 -0
- flowtask/components/LangchainLoader/loader.py +194 -0
- flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
- flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
- flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
- flowtask/components/LangchainLoader/loaders/docx.py +91 -0
- flowtask/components/LangchainLoader/loaders/html.py +119 -0
- flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
- flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
- flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
- flowtask/components/LangchainLoader/loaders/qa.py +67 -0
- flowtask/components/LangchainLoader/loaders/txt.py +55 -0
- flowtask/components/LeadIQ.py +650 -0
- flowtask/components/Loop.py +253 -0
- flowtask/components/Lowes.py +334 -0
- flowtask/components/MS365Usage.py +156 -0
- flowtask/components/MSTeamsMessages.py +320 -0
- flowtask/components/MarketClustering.py +1051 -0
- flowtask/components/MergeFiles.py +362 -0
- flowtask/components/MilvusOutput.py +87 -0
- flowtask/components/NearByStores.py +175 -0
- flowtask/components/NetworkNinja/__init__.py +6 -0
- flowtask/components/NetworkNinja/models/__init__.py +52 -0
- flowtask/components/NetworkNinja/models/abstract.py +177 -0
- flowtask/components/NetworkNinja/models/account.py +39 -0
- flowtask/components/NetworkNinja/models/client.py +19 -0
- flowtask/components/NetworkNinja/models/district.py +14 -0
- flowtask/components/NetworkNinja/models/events.py +101 -0
- flowtask/components/NetworkNinja/models/forms.py +499 -0
- flowtask/components/NetworkNinja/models/market.py +16 -0
- flowtask/components/NetworkNinja/models/organization.py +34 -0
- flowtask/components/NetworkNinja/models/photos.py +125 -0
- flowtask/components/NetworkNinja/models/project.py +44 -0
- flowtask/components/NetworkNinja/models/region.py +28 -0
- flowtask/components/NetworkNinja/models/store.py +203 -0
- flowtask/components/NetworkNinja/models/user.py +151 -0
- flowtask/components/NetworkNinja/router.py +854 -0
- flowtask/components/Odoo.py +175 -0
- flowtask/components/OdooInjector.py +192 -0
- flowtask/components/OpenFromXML.py +126 -0
- flowtask/components/OpenWeather.py +41 -0
- flowtask/components/OpenWithBase.py +616 -0
- flowtask/components/OpenWithPandas.py +715 -0
- flowtask/components/PGPDecrypt.py +199 -0
- flowtask/components/PandasIterator.py +187 -0
- flowtask/components/PandasToFile.py +189 -0
- flowtask/components/Paradox.py +339 -0
- flowtask/components/ParamIterator.py +117 -0
- flowtask/components/ParseHTML.py +84 -0
- flowtask/components/PlacerStores.py +249 -0
- flowtask/components/Pokemon.py +507 -0
- flowtask/components/PositiveBot.py +62 -0
- flowtask/components/PowerPointSlide.py +400 -0
- flowtask/components/PrintMessage.py +127 -0
- flowtask/components/ProductCompetitors/__init__.py +5 -0
- flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
- flowtask/components/ProductCompetitors/parsers/base.py +72 -0
- flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
- flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
- flowtask/components/ProductCompetitors/scrapper.py +155 -0
- flowtask/components/ProductCompliant.py +169 -0
- flowtask/components/ProductInfo/__init__.py +1 -0
- flowtask/components/ProductInfo/parsers/__init__.py +5 -0
- flowtask/components/ProductInfo/parsers/base.py +83 -0
- flowtask/components/ProductInfo/parsers/brother.py +97 -0
- flowtask/components/ProductInfo/parsers/canon.py +167 -0
- flowtask/components/ProductInfo/parsers/epson.py +118 -0
- flowtask/components/ProductInfo/parsers/hp.py +131 -0
- flowtask/components/ProductInfo/parsers/samsung.py +97 -0
- flowtask/components/ProductInfo/scraper.py +319 -0
- flowtask/components/ProductPricing.py +118 -0
- flowtask/components/QS.py +261 -0
- flowtask/components/QSBase.py +201 -0
- flowtask/components/QueryIterator.py +273 -0
- flowtask/components/QueryToInsert.py +327 -0
- flowtask/components/QueryToPandas.py +432 -0
- flowtask/components/RESTClient.py +195 -0
- flowtask/components/RethinkDBQuery.py +189 -0
- flowtask/components/Rsync.py +74 -0
- flowtask/components/RunSSH.py +59 -0
- flowtask/components/RunShell.py +71 -0
- flowtask/components/SalesForce.py +20 -0
- flowtask/components/SaveImageBank/__init__.py +257 -0
- flowtask/components/SchedulingVisits.py +592 -0
- flowtask/components/ScrapPage.py +216 -0
- flowtask/components/ScrapSearch.py +79 -0
- flowtask/components/SendNotify.py +257 -0
- flowtask/components/SentimentAnalysis.py +694 -0
- flowtask/components/ServiceScrapper/__init__.py +5 -0
- flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
- flowtask/components/ServiceScrapper/parsers/base.py +94 -0
- flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
- flowtask/components/ServiceScrapper/scrapper.py +199 -0
- flowtask/components/SetVariables.py +156 -0
- flowtask/components/SubTask.py +182 -0
- flowtask/components/SuiteCRM.py +48 -0
- flowtask/components/Switch.py +175 -0
- flowtask/components/TableBase.py +148 -0
- flowtask/components/TableDelete.py +312 -0
- flowtask/components/TableInput.py +143 -0
- flowtask/components/TableOutput/TableOutput.py +384 -0
- flowtask/components/TableOutput/__init__.py +3 -0
- flowtask/components/TableSchema.py +534 -0
- flowtask/components/Target.py +223 -0
- flowtask/components/ThumbnailGenerator.py +156 -0
- flowtask/components/ToPandas.py +67 -0
- flowtask/components/TransformRows/TransformRows.py +507 -0
- flowtask/components/TransformRows/__init__.py +9 -0
- flowtask/components/TransformRows/functions.py +559 -0
- flowtask/components/TransposeRows.py +176 -0
- flowtask/components/UPCDatabase.py +86 -0
- flowtask/components/UnGzip.py +171 -0
- flowtask/components/Uncompress.py +172 -0
- flowtask/components/UniqueRows.py +126 -0
- flowtask/components/Unzip.py +107 -0
- flowtask/components/UpdateOperationalVars.py +147 -0
- flowtask/components/UploadTo.py +299 -0
- flowtask/components/UploadToS3.py +136 -0
- flowtask/components/UploadToSFTP.py +160 -0
- flowtask/components/UploadToSharepoint.py +205 -0
- flowtask/components/UserFunc.py +122 -0
- flowtask/components/VivaTracker.py +140 -0
- flowtask/components/WSDLClient.py +123 -0
- flowtask/components/Wait.py +18 -0
- flowtask/components/Walmart.py +199 -0
- flowtask/components/Workplace.py +134 -0
- flowtask/components/XMLToPandas.py +267 -0
- flowtask/components/Zammad/__init__.py +41 -0
- flowtask/components/Zammad/models.py +0 -0
- flowtask/components/ZoomInfoScraper.py +409 -0
- flowtask/components/__init__.py +104 -0
- flowtask/components/abstract.py +18 -0
- flowtask/components/flow.py +530 -0
- flowtask/components/google.py +335 -0
- flowtask/components/group.py +221 -0
- flowtask/components/py.typed +0 -0
- flowtask/components/reviewscrap.py +132 -0
- flowtask/components/tAutoincrement.py +117 -0
- flowtask/components/tConcat.py +109 -0
- flowtask/components/tExplode.py +119 -0
- flowtask/components/tFilter.py +184 -0
- flowtask/components/tGroup.py +236 -0
- flowtask/components/tJoin.py +270 -0
- flowtask/components/tMap/__init__.py +9 -0
- flowtask/components/tMap/functions.py +54 -0
- flowtask/components/tMap/tMap.py +450 -0
- flowtask/components/tMelt.py +112 -0
- flowtask/components/tMerge.py +114 -0
- flowtask/components/tOrder.py +93 -0
- flowtask/components/tPandas.py +94 -0
- flowtask/components/tPivot.py +71 -0
- flowtask/components/tPluckCols.py +76 -0
- flowtask/components/tUnnest.py +82 -0
- flowtask/components/user.py +401 -0
- flowtask/conf.py +457 -0
- flowtask/download.py +102 -0
- flowtask/events/__init__.py +11 -0
- flowtask/events/events/__init__.py +20 -0
- flowtask/events/events/abstract.py +95 -0
- flowtask/events/events/alerts/__init__.py +362 -0
- flowtask/events/events/alerts/colfunctions.py +131 -0
- flowtask/events/events/alerts/functions.py +158 -0
- flowtask/events/events/dummy.py +12 -0
- flowtask/events/events/exec.py +124 -0
- flowtask/events/events/file/__init__.py +7 -0
- flowtask/events/events/file/base.py +51 -0
- flowtask/events/events/file/copy.py +23 -0
- flowtask/events/events/file/delete.py +16 -0
- flowtask/events/events/interfaces/__init__.py +9 -0
- flowtask/events/events/interfaces/client.py +67 -0
- flowtask/events/events/interfaces/credentials.py +28 -0
- flowtask/events/events/interfaces/notifications.py +58 -0
- flowtask/events/events/jira.py +122 -0
- flowtask/events/events/log.py +26 -0
- flowtask/events/events/logerr.py +52 -0
- flowtask/events/events/notify.py +59 -0
- flowtask/events/events/notify_event.py +160 -0
- flowtask/events/events/publish.py +54 -0
- flowtask/events/events/sendfile.py +104 -0
- flowtask/events/events/task.py +97 -0
- flowtask/events/events/teams.py +98 -0
- flowtask/events/events/webhook.py +58 -0
- flowtask/events/manager.py +287 -0
- flowtask/exceptions.c +39393 -0
- flowtask/exceptions.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/extensions/__init__.py +3 -0
- flowtask/extensions/abstract.py +82 -0
- flowtask/extensions/logging/__init__.py +65 -0
- flowtask/hooks/__init__.py +9 -0
- flowtask/hooks/actions/__init__.py +22 -0
- flowtask/hooks/actions/abstract.py +66 -0
- flowtask/hooks/actions/dummy.py +23 -0
- flowtask/hooks/actions/jira.py +74 -0
- flowtask/hooks/actions/rest.py +320 -0
- flowtask/hooks/actions/sampledata.py +37 -0
- flowtask/hooks/actions/sensor.py +23 -0
- flowtask/hooks/actions/task.py +9 -0
- flowtask/hooks/actions/ticket.py +37 -0
- flowtask/hooks/actions/zammad.py +55 -0
- flowtask/hooks/hook.py +62 -0
- flowtask/hooks/models.py +17 -0
- flowtask/hooks/service.py +187 -0
- flowtask/hooks/step.py +91 -0
- flowtask/hooks/types/__init__.py +23 -0
- flowtask/hooks/types/base.py +129 -0
- flowtask/hooks/types/brokers/__init__.py +11 -0
- flowtask/hooks/types/brokers/base.py +54 -0
- flowtask/hooks/types/brokers/mqtt.py +35 -0
- flowtask/hooks/types/brokers/rabbitmq.py +82 -0
- flowtask/hooks/types/brokers/redis.py +83 -0
- flowtask/hooks/types/brokers/sqs.py +44 -0
- flowtask/hooks/types/fs.py +232 -0
- flowtask/hooks/types/http.py +49 -0
- flowtask/hooks/types/imap.py +200 -0
- flowtask/hooks/types/jira.py +279 -0
- flowtask/hooks/types/mail.py +205 -0
- flowtask/hooks/types/postgres.py +98 -0
- flowtask/hooks/types/responses/__init__.py +8 -0
- flowtask/hooks/types/responses/base.py +5 -0
- flowtask/hooks/types/sharepoint.py +288 -0
- flowtask/hooks/types/ssh.py +141 -0
- flowtask/hooks/types/tagged.py +59 -0
- flowtask/hooks/types/upload.py +85 -0
- flowtask/hooks/types/watch.py +71 -0
- flowtask/hooks/types/web.py +36 -0
- flowtask/interfaces/AzureClient.py +137 -0
- flowtask/interfaces/AzureGraph.py +839 -0
- flowtask/interfaces/Boto3Client.py +326 -0
- flowtask/interfaces/DropboxClient.py +173 -0
- flowtask/interfaces/ExcelHandler.py +94 -0
- flowtask/interfaces/FTPClient.py +131 -0
- flowtask/interfaces/GoogleCalendar.py +201 -0
- flowtask/interfaces/GoogleClient.py +133 -0
- flowtask/interfaces/GoogleDrive.py +127 -0
- flowtask/interfaces/GoogleGCS.py +89 -0
- flowtask/interfaces/GoogleGeocoding.py +93 -0
- flowtask/interfaces/GoogleLang.py +114 -0
- flowtask/interfaces/GooglePub.py +61 -0
- flowtask/interfaces/GoogleSheet.py +68 -0
- flowtask/interfaces/IMAPClient.py +137 -0
- flowtask/interfaces/O365Calendar.py +113 -0
- flowtask/interfaces/O365Client.py +220 -0
- flowtask/interfaces/OneDrive.py +284 -0
- flowtask/interfaces/Outlook.py +155 -0
- flowtask/interfaces/ParrotBot.py +130 -0
- flowtask/interfaces/SSHClient.py +378 -0
- flowtask/interfaces/Sharepoint.py +496 -0
- flowtask/interfaces/__init__.py +36 -0
- flowtask/interfaces/azureauth.py +119 -0
- flowtask/interfaces/cache.py +201 -0
- flowtask/interfaces/client.py +82 -0
- flowtask/interfaces/compress.py +525 -0
- flowtask/interfaces/credentials.py +124 -0
- flowtask/interfaces/d2l.py +239 -0
- flowtask/interfaces/databases/__init__.py +5 -0
- flowtask/interfaces/databases/db.py +223 -0
- flowtask/interfaces/databases/documentdb.py +55 -0
- flowtask/interfaces/databases/rethink.py +39 -0
- flowtask/interfaces/dataframes/__init__.py +11 -0
- flowtask/interfaces/dataframes/abstract.py +21 -0
- flowtask/interfaces/dataframes/arrow.py +71 -0
- flowtask/interfaces/dataframes/dt.py +69 -0
- flowtask/interfaces/dataframes/pandas.py +167 -0
- flowtask/interfaces/dataframes/polars.py +60 -0
- flowtask/interfaces/db.py +263 -0
- flowtask/interfaces/env.py +46 -0
- flowtask/interfaces/func.py +137 -0
- flowtask/interfaces/http.py +1780 -0
- flowtask/interfaces/locale.py +40 -0
- flowtask/interfaces/log.py +75 -0
- flowtask/interfaces/mask.py +143 -0
- flowtask/interfaces/notification.py +154 -0
- flowtask/interfaces/playwright.py +339 -0
- flowtask/interfaces/powerpoint.py +368 -0
- flowtask/interfaces/py.typed +0 -0
- flowtask/interfaces/qs.py +376 -0
- flowtask/interfaces/result.py +87 -0
- flowtask/interfaces/selenium_service.py +779 -0
- flowtask/interfaces/smartsheet.py +154 -0
- flowtask/interfaces/stat.py +39 -0
- flowtask/interfaces/task.py +96 -0
- flowtask/interfaces/template.py +118 -0
- flowtask/interfaces/vectorstores/__init__.py +1 -0
- flowtask/interfaces/vectorstores/abstract.py +133 -0
- flowtask/interfaces/vectorstores/milvus.py +669 -0
- flowtask/interfaces/zammad.py +107 -0
- flowtask/models.py +193 -0
- flowtask/parsers/__init__.py +15 -0
- flowtask/parsers/_yaml.c +11978 -0
- flowtask/parsers/_yaml.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/argparser.py +235 -0
- flowtask/parsers/base.c +15155 -0
- flowtask/parsers/base.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/json.c +11968 -0
- flowtask/parsers/json.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/maps.py +49 -0
- flowtask/parsers/toml.c +11968 -0
- flowtask/parsers/toml.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/plugins/__init__.py +16 -0
- flowtask/plugins/components/__init__.py +0 -0
- flowtask/plugins/handler/__init__.py +45 -0
- flowtask/plugins/importer.py +31 -0
- flowtask/plugins/sources/__init__.py +0 -0
- flowtask/runner.py +283 -0
- flowtask/scheduler/__init__.py +9 -0
- flowtask/scheduler/functions.py +493 -0
- flowtask/scheduler/handlers/__init__.py +8 -0
- flowtask/scheduler/handlers/manager.py +504 -0
- flowtask/scheduler/handlers/models.py +58 -0
- flowtask/scheduler/handlers/service.py +72 -0
- flowtask/scheduler/notifications.py +65 -0
- flowtask/scheduler/scheduler.py +993 -0
- flowtask/services/__init__.py +0 -0
- flowtask/services/bots/__init__.py +0 -0
- flowtask/services/bots/telegram.py +264 -0
- flowtask/services/files/__init__.py +11 -0
- flowtask/services/files/manager.py +522 -0
- flowtask/services/files/model.py +37 -0
- flowtask/services/files/service.py +767 -0
- flowtask/services/jira/__init__.py +3 -0
- flowtask/services/jira/jira_actions.py +191 -0
- flowtask/services/tasks/__init__.py +13 -0
- flowtask/services/tasks/launcher.py +213 -0
- flowtask/services/tasks/manager.py +323 -0
- flowtask/services/tasks/service.py +275 -0
- flowtask/services/tasks/task_manager.py +376 -0
- flowtask/services/tasks/tasks.py +155 -0
- flowtask/storages/__init__.py +16 -0
- flowtask/storages/exceptions.py +12 -0
- flowtask/storages/files/__init__.py +8 -0
- flowtask/storages/files/abstract.py +29 -0
- flowtask/storages/files/filesystem.py +66 -0
- flowtask/storages/tasks/__init__.py +19 -0
- flowtask/storages/tasks/abstract.py +26 -0
- flowtask/storages/tasks/database.py +33 -0
- flowtask/storages/tasks/filesystem.py +108 -0
- flowtask/storages/tasks/github.py +119 -0
- flowtask/storages/tasks/memory.py +45 -0
- flowtask/storages/tasks/row.py +25 -0
- flowtask/tasks/__init__.py +0 -0
- flowtask/tasks/abstract.py +526 -0
- flowtask/tasks/command.py +118 -0
- flowtask/tasks/pile.py +486 -0
- flowtask/tasks/py.typed +0 -0
- flowtask/tasks/task.py +778 -0
- flowtask/template/__init__.py +161 -0
- flowtask/tests.py +257 -0
- flowtask/types/__init__.py +8 -0
- flowtask/types/typedefs.c +11347 -0
- flowtask/types/typedefs.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/__init__.py +24 -0
- flowtask/utils/constants.py +117 -0
- flowtask/utils/encoders.py +21 -0
- flowtask/utils/executor.py +112 -0
- flowtask/utils/functions.cpp +14280 -0
- flowtask/utils/functions.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/json.cpp +13349 -0
- flowtask/utils/json.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/mail.py +63 -0
- flowtask/utils/parseqs.c +13324 -0
- flowtask/utils/parserqs.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/stats.py +308 -0
- flowtask/utils/transformations.py +74 -0
- flowtask/utils/uv.py +12 -0
- flowtask/utils/validators.py +97 -0
- flowtask/version.py +11 -0
- flowtask-5.8.4.dist-info/LICENSE +201 -0
- flowtask-5.8.4.dist-info/METADATA +209 -0
- flowtask-5.8.4.dist-info/RECORD +470 -0
- flowtask-5.8.4.dist-info/WHEEL +6 -0
- flowtask-5.8.4.dist-info/entry_points.txt +3 -0
- flowtask-5.8.4.dist-info/top_level.txt +2 -0
- plugins/components/CreateQR.py +39 -0
- plugins/components/TestComponent.py +28 -0
- plugins/components/Use1.py +13 -0
- plugins/components/Workplace.py +117 -0
- plugins/components/__init__.py +3 -0
- plugins/sources/__init__.py +0 -0
- plugins/sources/get_populartimes.py +78 -0
- plugins/sources/google.py +150 -0
- plugins/sources/hubspot.py +679 -0
- plugins/sources/icims.py +679 -0
- plugins/sources/mobileinsight.py +501 -0
- plugins/sources/newrelic.py +262 -0
- plugins/sources/uap.py +268 -0
- plugins/sources/venu.py +244 -0
- plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,284 @@
|
|
1
|
+
import asyncio
|
2
|
+
from collections.abc import Callable
|
3
|
+
import re
|
4
|
+
import pandas as pd
|
5
|
+
import numpy as np
|
6
|
+
from querysource.types.dt import filters as qsffunctions
|
7
|
+
from querysource.queries.multi.operators.filter.flt import (
|
8
|
+
create_filter,
|
9
|
+
valid_operators
|
10
|
+
)
|
11
|
+
from .FilterRows import functions as dffunctions
|
12
|
+
# create_filter
|
13
|
+
from ..exceptions import (
|
14
|
+
ConfigError,
|
15
|
+
ComponentError,
|
16
|
+
DataNotFound
|
17
|
+
)
|
18
|
+
from .flow import FlowComponent
|
19
|
+
from . import getComponent
|
20
|
+
|
21
|
+
|
22
|
+
class FilterIf(FlowComponent):
|
23
|
+
"""
|
24
|
+
FilterIf.
|
25
|
+
|
26
|
+
Overview
|
27
|
+
|
28
|
+
The FilterIf is a component that applies specified filters to a Pandas DataFrame.
|
29
|
+
if the condition is met, the row is kept, otherwise it is discarded.
|
30
|
+
at result set (if any) will be executed a subset of components.
|
31
|
+
|
32
|
+
.. table:: Properties
|
33
|
+
:widths: auto
|
34
|
+
|
35
|
+
+--------------+----------+-----------+---------------------------------------------------------------+
|
36
|
+
| Name | Required | Summary |
|
37
|
+
+--------------+----------+-----------+---------------------------------------------------------------+
|
38
|
+
| operator | Yes | Logical operator (e.g., `and`, `or`) used to combine filter conditions. |
|
39
|
+
+--------------+----------+-----------+---------------------------------------------------------------+
|
40
|
+
| conditions | Yes | List of conditions with columns, values, and expressions for filtering. |
|
41
|
+
| | | Format: `{ "column": <col_name>, "value": <val>, "expression": <expr> }` |
|
42
|
+
+--------------+----------+-----------+---------------------------------------------------------------+
|
43
|
+
| filter | Yes | List of conditions with columns, values, and expressions for filtering. |
|
44
|
+
| | | Format: `{ "column": <col_name>, "value": <val>, "expression": <expr> }` |
|
45
|
+
+--------------+----------+-----------+---------------------------------------------------------------+
|
46
|
+
| true_condition| Yes | List of components to execute if the condition is met. |
|
47
|
+
+--------------+----------+-----------+---------------------------------------------------------------+
|
48
|
+
| false_condition| Yes | List of components to execute if the condition is not met. |
|
49
|
+
+--------------+----------+-----------+---------------------------------------------------------------+
|
50
|
+
|
51
|
+
Returns
|
52
|
+
|
53
|
+
This component returns a filtered Pandas DataFrame based on the provided conditions.
|
54
|
+
The component tracks metrics
|
55
|
+
such as the initial and filtered row counts, and optionally limits the returned columns if specified.
|
56
|
+
Additional debugging information can be outputted based on configuration.
|
57
|
+
|
58
|
+
|
59
|
+
Example:
|
60
|
+
|
61
|
+
```yaml
|
62
|
+
- FilterIf:
|
63
|
+
operator: "&"
|
64
|
+
filter:
|
65
|
+
- column: previous_form_id
|
66
|
+
expression: not_null
|
67
|
+
true_condition:
|
68
|
+
- TransformRows:
|
69
|
+
replace_columns: true
|
70
|
+
fields:
|
71
|
+
form_id: previous_form_id
|
72
|
+
- ExecuteSQL:
|
73
|
+
file_sql: delete_previous_form.sql
|
74
|
+
use_template: true
|
75
|
+
use_dataframe: true
|
76
|
+
```
|
77
|
+
|
78
|
+
""" # noqa
|
79
|
+
|
80
|
+
def __init__(
|
81
|
+
self,
|
82
|
+
loop: asyncio.AbstractEventLoop = None,
|
83
|
+
job: Callable = None,
|
84
|
+
stat: Callable = None,
|
85
|
+
**kwargs,
|
86
|
+
):
|
87
|
+
"""Init Method."""
|
88
|
+
self.condition: str = ""
|
89
|
+
self.fields: dict = kwargs.pop('fields', {})
|
90
|
+
self.operator = kwargs.pop('operator', '&')
|
91
|
+
self.filter = kwargs.pop('filter', [])
|
92
|
+
self.true_condition = kwargs.pop('true_condition', [])
|
93
|
+
self.false_condition = kwargs.pop('false_condition', [])
|
94
|
+
self.filter_conditions: dict = {}
|
95
|
+
super(FilterIf, self).__init__(loop=loop, job=job, stat=stat, **kwargs)
|
96
|
+
|
97
|
+
async def start(self, **kwargs):
|
98
|
+
# Si lo que llega no es un DataFrame de Pandas se cancela la tarea
|
99
|
+
if self.previous:
|
100
|
+
self.data = self.input
|
101
|
+
else:
|
102
|
+
raise ComponentError(
|
103
|
+
"Data Not Found"
|
104
|
+
)
|
105
|
+
if not isinstance(self.data, pd.DataFrame):
|
106
|
+
raise ComponentError(
|
107
|
+
"Incompatible Pandas Dataframe"
|
108
|
+
)
|
109
|
+
return True
|
110
|
+
|
111
|
+
async def close(self):
|
112
|
+
pass
|
113
|
+
|
114
|
+
def _filter_conditions(self, df: pd.DataFrame) -> pd.DataFrame:
|
115
|
+
it = df.copy()
|
116
|
+
for ft, args in self.filter_conditions.items():
|
117
|
+
self._applied.append(f"Filter: {ft!s} args: {args}")
|
118
|
+
try:
|
119
|
+
try:
|
120
|
+
func = getattr(qsffunctions, ft)
|
121
|
+
except AttributeError:
|
122
|
+
try:
|
123
|
+
func = getattr(dffunctions, ft)
|
124
|
+
except AttributeError:
|
125
|
+
func = globals()[ft]
|
126
|
+
if callable(func):
|
127
|
+
it = func(it, **args)
|
128
|
+
except Exception as err:
|
129
|
+
print(f"Error on {ft}: {err}")
|
130
|
+
df = it
|
131
|
+
if df is None or df.empty:
|
132
|
+
raise DataNotFound(
|
133
|
+
"No Data was Found after Filtering."
|
134
|
+
)
|
135
|
+
return df
|
136
|
+
|
137
|
+
def _filter_fields(self, df: pd.DataFrame) -> pd.DataFrame:
|
138
|
+
for column, value in self.fields.items():
|
139
|
+
if column in df.columns:
|
140
|
+
if isinstance(value, list):
|
141
|
+
for v in value:
|
142
|
+
df = df[df[column] == v]
|
143
|
+
else:
|
144
|
+
df = df[df[column] == value]
|
145
|
+
return df
|
146
|
+
|
147
|
+
def _define_step(self, cpobj: FlowComponent, params: dict) -> Callable:
|
148
|
+
params["ENV"] = self._environment
|
149
|
+
if hasattr(self, "_program"):
|
150
|
+
params["_program"] = self._program
|
151
|
+
# params
|
152
|
+
params["params"] = self._params
|
153
|
+
# parameters
|
154
|
+
params["parameters"] = self._parameters
|
155
|
+
# useful to change variables in set var components
|
156
|
+
params["_vars"] = self._vars
|
157
|
+
# variables dictionary
|
158
|
+
params["variables"] = self._variables
|
159
|
+
params["_args"] = self._args
|
160
|
+
# argument list for components (or tasks) that need argument lists
|
161
|
+
params["arguments"] = self._arguments
|
162
|
+
params["debug"] = self._debug
|
163
|
+
params["argparser"] = self._argparser
|
164
|
+
# the current in-memory connector
|
165
|
+
params["memory"] = self._memory
|
166
|
+
try:
|
167
|
+
job = cpobj(
|
168
|
+
job=self,
|
169
|
+
loop=self._loop,
|
170
|
+
# stat=self.stat,
|
171
|
+
**params
|
172
|
+
)
|
173
|
+
return job
|
174
|
+
except TypeError as err:
|
175
|
+
raise ComponentError(
|
176
|
+
f"Component {cpobj} is not callable: {err}"
|
177
|
+
) from err
|
178
|
+
except AttributeError as err:
|
179
|
+
raise ComponentError(
|
180
|
+
f"Component {cpobj} not found: {err}"
|
181
|
+
) from err
|
182
|
+
except Exception as err:
|
183
|
+
raise ComponentError(
|
184
|
+
f"Generic Component Error on {cpobj}, error: {err}"
|
185
|
+
) from err
|
186
|
+
|
187
|
+
async def _execute_components(
|
188
|
+
self,
|
189
|
+
components: list,
|
190
|
+
df: pd.DataFrame
|
191
|
+
) -> None:
|
192
|
+
"""
|
193
|
+
Execute a list of components with the given DataFrame.
|
194
|
+
Args:
|
195
|
+
components (list): List of components to execute.
|
196
|
+
df (pd.DataFrame): DataFrame to pass to the components.
|
197
|
+
"""
|
198
|
+
inner_result = df
|
199
|
+
for component in components:
|
200
|
+
try:
|
201
|
+
# component is a dict, split into key and values:
|
202
|
+
component_name = list(component.keys())[0]
|
203
|
+
args = component[component_name]
|
204
|
+
cpobj = getComponent(component_name)
|
205
|
+
# check if component is callable:
|
206
|
+
if not callable(cpobj):
|
207
|
+
raise ComponentError(
|
208
|
+
f"Component {component_name} is not callable."
|
209
|
+
)
|
210
|
+
step = self._define_step(cpobj, args)
|
211
|
+
step.input = inner_result
|
212
|
+
async with step as comp:
|
213
|
+
try:
|
214
|
+
result = await comp.run()
|
215
|
+
if isinstance(result, bool):
|
216
|
+
result = step.input
|
217
|
+
if result is None or result.empty:
|
218
|
+
self._logger.warning(
|
219
|
+
f"No Data was Found after Executing {component_name}."
|
220
|
+
)
|
221
|
+
self._logger.notice(
|
222
|
+
f"Component {component_name} executed successfully."
|
223
|
+
)
|
224
|
+
inner_result = result
|
225
|
+
except Exception as e:
|
226
|
+
self._logger.error(
|
227
|
+
f"Error executing component {component_name}: {e}"
|
228
|
+
)
|
229
|
+
except ComponentError as exc:
|
230
|
+
raise ComponentError(
|
231
|
+
f"Component {component_name} not found: {exc}"
|
232
|
+
) from exc
|
233
|
+
|
234
|
+
async def run(self):
|
235
|
+
self.add_metric("STARTED_ROWS", len(self.data.index))
|
236
|
+
if not self.filter:
|
237
|
+
raise ConfigError(
|
238
|
+
"No Filter Conditions were Found."
|
239
|
+
)
|
240
|
+
if not self.operator:
|
241
|
+
self.operator = '&'
|
242
|
+
df = self.data.copy()
|
243
|
+
# iterate over all filtering conditions:
|
244
|
+
df = self._filter_conditions(df)
|
245
|
+
# Applying filter expressions by Column:
|
246
|
+
if self.fields:
|
247
|
+
df = self._filter_fields()
|
248
|
+
if self.filter:
|
249
|
+
conditions = create_filter(self.filter, df)
|
250
|
+
# Joining all conditions
|
251
|
+
self.condition = f" {self.operator} ".join(conditions)
|
252
|
+
self._logger.notice(
|
253
|
+
f"Filter conditions >> {self.condition}"
|
254
|
+
)
|
255
|
+
df = df.loc[
|
256
|
+
eval(self.condition)
|
257
|
+
] # pylint: disable=W0123
|
258
|
+
if df is None or df.empty:
|
259
|
+
self._logger.warning(
|
260
|
+
"No Data was Found after Filtering."
|
261
|
+
)
|
262
|
+
self._result = self.data
|
263
|
+
return self._result
|
264
|
+
# if the condition is met, execute true_condition
|
265
|
+
if self.true_condition:
|
266
|
+
await self._execute_components(
|
267
|
+
self.true_condition,
|
268
|
+
df
|
269
|
+
)
|
270
|
+
self._result = df
|
271
|
+
self.add_metric(
|
272
|
+
"FILTERED_ROWS", len(self._result.index)
|
273
|
+
)
|
274
|
+
if hasattr(self, "columns"):
|
275
|
+
# returning only a subset of data
|
276
|
+
self._result = self._result[self.columns]
|
277
|
+
if self._debug is True:
|
278
|
+
print("::: Printing Column Information === ")
|
279
|
+
for column, t in self._result.dtypes.items():
|
280
|
+
print(column, "->", t, "->", self._result[column].iloc[0])
|
281
|
+
self.add_metric(
|
282
|
+
"FILTERED_COLS", len(self._result.columns)
|
283
|
+
)
|
284
|
+
return self._result
|
@@ -0,0 +1,200 @@
|
|
1
|
+
import asyncio
|
2
|
+
from collections.abc import Callable
|
3
|
+
|
4
|
+
# logging system
|
5
|
+
import numpy as np
|
6
|
+
import pandas
|
7
|
+
from querysource.types.dt import filters as qsffunctions
|
8
|
+
from . import functions as dffunctions
|
9
|
+
from ..flow import FlowComponent
|
10
|
+
from ...utils.functions import check_empty
|
11
|
+
from ...exceptions import ComponentError, DataNotFound
|
12
|
+
|
13
|
+
|
14
|
+
class FilterRows(FlowComponent):
|
15
|
+
"""
|
16
|
+
FilterRows
|
17
|
+
|
18
|
+
Overview
|
19
|
+
|
20
|
+
The FilterRows class is a component for removing or cleaning rows in a Pandas DataFrame based on specified criteria.
|
21
|
+
It supports various cleaning and filtering operations and allows for the saving of rejected rows to a file.
|
22
|
+
|
23
|
+
.. table:: Properties
|
24
|
+
:widths: auto
|
25
|
+
|
26
|
+
+------------------+----------+-----------+--------------------------------------------------------------------------------------+
|
27
|
+
| Name | Required | Description |
|
28
|
+
+------------------+----------+-----------+--------------------------------------------------------------------------------------+
|
29
|
+
| fields | Yes | A dictionary defining the fields and corresponding filtering conditions to be applied. |
|
30
|
+
+------------------+----------+-----------+--------------------------------------------------------------------------------------+
|
31
|
+
| filter_conditions| Yes | A dictionary defining the filter conditions for transformations. |
|
32
|
+
+------------------+----------+-----------+--------------------------------------------------------------------------------------+
|
33
|
+
| _applied | No | A list to store the applied filters. |
|
34
|
+
+------------------+----------+-----------+--------------------------------------------------------------------------------------+
|
35
|
+
| multi | No | A flag indicating if multiple DataFrame transformations are supported, defaults to False. |
|
36
|
+
+------------------+----------+-----------+--------------------------------------------------------------------------------------+
|
37
|
+
|
38
|
+
Return
|
39
|
+
|
40
|
+
The methods in this class manage the filtering of rows in a Pandas DataFrame, including initialization, execution,
|
41
|
+
and result handling.
|
42
|
+
|
43
|
+
|
44
|
+
|
45
|
+
Example:
|
46
|
+
|
47
|
+
```yaml
|
48
|
+
FilterRows:
|
49
|
+
filter_conditions:
|
50
|
+
clean_empty:
|
51
|
+
columns:
|
52
|
+
- updated
|
53
|
+
drop_columns:
|
54
|
+
columns:
|
55
|
+
- legal_street_address_1
|
56
|
+
- legal_street_address_2
|
57
|
+
- work_location_address_1
|
58
|
+
- work_location_address_2
|
59
|
+
- birth_date
|
60
|
+
suppress:
|
61
|
+
columns:
|
62
|
+
- payroll_id
|
63
|
+
- reports_to_payroll_id
|
64
|
+
pattern: (\.0)
|
65
|
+
drop_empty: true
|
66
|
+
```
|
67
|
+
|
68
|
+
""" # noqa
|
69
|
+
|
70
|
+
def __init__(
|
71
|
+
self,
|
72
|
+
loop: asyncio.AbstractEventLoop = None,
|
73
|
+
job: Callable = None,
|
74
|
+
stat: Callable = None,
|
75
|
+
**kwargs,
|
76
|
+
):
|
77
|
+
self.fields: dict = {}
|
78
|
+
self.filter_conditions: dict = {}
|
79
|
+
self._applied: list = []
|
80
|
+
self.multi = bool(kwargs.pop("multi", False))
|
81
|
+
if self.multi:
|
82
|
+
self.fields = {}
|
83
|
+
if self.multi is False:
|
84
|
+
if "fields" in kwargs:
|
85
|
+
self.fields = kwargs.pop('fields', {})
|
86
|
+
else:
|
87
|
+
self.fields = {}
|
88
|
+
super(FilterRows, self).__init__(loop=loop, job=job, stat=stat, **kwargs)
|
89
|
+
|
90
|
+
async def start(self, **kwargs):
|
91
|
+
"""Obtain Pandas Dataframe."""
|
92
|
+
if self.previous:
|
93
|
+
self.data = self.input
|
94
|
+
else:
|
95
|
+
raise ComponentError("a Previous Component was not found.")
|
96
|
+
if check_empty(self.data):
|
97
|
+
raise DataNotFound("No data was found")
|
98
|
+
|
99
|
+
async def run(self):
|
100
|
+
if self.data is None:
|
101
|
+
return False
|
102
|
+
if isinstance(self.data, pandas.DataFrame):
|
103
|
+
# add first metrics
|
104
|
+
self.add_metric("started_rows", self.data.shape[0])
|
105
|
+
self.add_metric("started_columns", self.data.shape[1])
|
106
|
+
|
107
|
+
# start filtering
|
108
|
+
if hasattr(self, "clean_strings"):
|
109
|
+
u = self.data.select_dtypes(include=["object", "string"])
|
110
|
+
self.data[u.columns] = self.data[u.columns].fillna("")
|
111
|
+
if hasattr(self, "clean_numbers"):
|
112
|
+
u = self.data.select_dtypes(include=["Int64"])
|
113
|
+
# self.data[u.columns] = self.data[u.columns].fillna('')
|
114
|
+
self.data[u.columns] = self.data[u.columns].replace(
|
115
|
+
["nan", np.nan], 0, regex=True
|
116
|
+
)
|
117
|
+
u = self.data.select_dtypes(include=["float64"])
|
118
|
+
self.data[u.columns] = self.data[u.columns].replace(
|
119
|
+
["nan", np.nan], 0, regex=True
|
120
|
+
)
|
121
|
+
if hasattr(self, "clean_dates"):
|
122
|
+
u = self.data.select_dtypes(include=["datetime64[ns]"])
|
123
|
+
self.data[u.columns] = self.data[u.columns].replace({np.nan: None})
|
124
|
+
# df[u.columns] = df[u.columns].astype('datetime64[ns]')
|
125
|
+
if hasattr(self, "drop_empty"):
|
126
|
+
# First filter out those rows which
|
127
|
+
# does not contain any data
|
128
|
+
self.data.dropna(how="all")
|
129
|
+
# removing empty cols
|
130
|
+
self.data.is_copy = None
|
131
|
+
self.data.dropna(axis=1, how="all")
|
132
|
+
self.data.dropna(axis=0, how="all")
|
133
|
+
if hasattr(self, "dropna"):
|
134
|
+
self.data.dropna(subset=self.dropna, how="all")
|
135
|
+
# iterate over all filtering conditions:
|
136
|
+
df = self.data
|
137
|
+
it = df.copy()
|
138
|
+
for ft, args in self.filter_conditions.items():
|
139
|
+
self._applied.append(f"Filter: {ft!s} args: {args}")
|
140
|
+
# TODO: create an expression builder
|
141
|
+
# condition = dataframe[(dataframe[column].empty) & (dataframe[column]=='')].index
|
142
|
+
# check if is a function
|
143
|
+
try:
|
144
|
+
try:
|
145
|
+
func = getattr(dffunctions, ft)
|
146
|
+
except AttributeError:
|
147
|
+
func = getattr(qsffunctions, ft)
|
148
|
+
except AttributeError:
|
149
|
+
func = globals()[ft]
|
150
|
+
if callable(func):
|
151
|
+
it = func(it, **args)
|
152
|
+
except Exception as err:
|
153
|
+
print(f"Error on {ft}: {err}")
|
154
|
+
else:
|
155
|
+
df = it
|
156
|
+
if df is None or df.empty:
|
157
|
+
raise DataNotFound("No Data was Found after Filtering.")
|
158
|
+
self._result = df
|
159
|
+
passed = len(self._result.index)
|
160
|
+
rejected = len(self.data.index) - len(self._result.index)
|
161
|
+
# avoid threat the Dataframe as a Copy
|
162
|
+
self._result.is_copy = None
|
163
|
+
self.add_metric("ended_rows", df.shape[0])
|
164
|
+
self.add_metric("ended_columns", df.shape[1])
|
165
|
+
self.add_metric("PASSED", passed)
|
166
|
+
self.add_metric("REJECTED", rejected)
|
167
|
+
self.add_metric("FILTERS", self._applied)
|
168
|
+
self._variables[f"{self.StepName}_PASSED"] = passed
|
169
|
+
self._variables[f"{self.StepName}_REJECTED"] = rejected
|
170
|
+
if hasattr(self, 'save_rejected'):
|
171
|
+
if self.save_rejected:
|
172
|
+
# Identify the indices of the rows that were removed
|
173
|
+
removed_indices = set(self.data.index) - set(self._result.index)
|
174
|
+
# Select these rows from the original DataFrame
|
175
|
+
rejected = self.data.loc[list(removed_indices)]
|
176
|
+
filename = self.save_rejected.get("filename", "rejected_rows.csv")
|
177
|
+
try:
|
178
|
+
rejected.to_csv(filename, sep="|")
|
179
|
+
except IOError:
|
180
|
+
self._logger.warning(
|
181
|
+
f"Error writing Rejectd File: {filename}"
|
182
|
+
)
|
183
|
+
self.add_metric(
|
184
|
+
"rejected_file", filename
|
185
|
+
)
|
186
|
+
if self._debug:
|
187
|
+
self._logger.verbose(
|
188
|
+
f"PASSED: {passed}, REJECTED: {rejected}",
|
189
|
+
)
|
190
|
+
print("FILTERED ===")
|
191
|
+
print(df)
|
192
|
+
print("::: Printing Column Information === ")
|
193
|
+
for column, t in df.dtypes.items():
|
194
|
+
print(column, "->", t, "->", df[column].iloc[0])
|
195
|
+
return self._result
|
196
|
+
else:
|
197
|
+
return self._result
|
198
|
+
|
199
|
+
def close(self):
|
200
|
+
pass
|
@@ -0,0 +1,103 @@
|
|
1
|
+
import asyncio
|
2
|
+
from typing import Callable, Tuple, List
|
3
|
+
from asyncdb import AsyncDB
|
4
|
+
from querysource.datasources.drivers.bigquery import bigquery_default
|
5
|
+
from .flow import FlowComponent
|
6
|
+
from ..exceptions import ComponentError
|
7
|
+
|
8
|
+
|
9
|
+
class GCSToBigQuery(FlowComponent):
|
10
|
+
"""
|
11
|
+
GCSToBigQuery.
|
12
|
+
|
13
|
+
Este componente carga un archivo CSV desde un bucket específico de GCS a una tabla de BigQuery.
|
14
|
+
"""
|
15
|
+
|
16
|
+
def __init__(
|
17
|
+
self,
|
18
|
+
loop: asyncio.AbstractEventLoop = None,
|
19
|
+
job: Callable = None,
|
20
|
+
stat: Callable = None,
|
21
|
+
**kwargs,
|
22
|
+
):
|
23
|
+
self.bucket_uri: str = kwargs.pop('bucket_uri', None) # Recibe el object_uri de CSVToGCS
|
24
|
+
self.table_id: str = kwargs.pop('table_id')
|
25
|
+
self.dataset_id: str = kwargs.pop('dataset_id')
|
26
|
+
self.schema: List[dict] = kwargs.pop('schema', None)
|
27
|
+
self.overwrite: bool = kwargs.pop('overwrite', False)
|
28
|
+
self.delete_gcs: bool = kwargs.pop('delete_gcs', False)
|
29
|
+
self.bq = None # Instancia de AsyncDB
|
30
|
+
super(GCSToBigQuery, self).__init__(loop=loop, job=job, stat=stat, **kwargs)
|
31
|
+
|
32
|
+
async def start(self, **kwargs):
|
33
|
+
"""Inicializa el componente configurando la conexión AsyncDB."""
|
34
|
+
# Obtener bucket_uri del componente anterior si no se proporciona directamente
|
35
|
+
if not self.bucket_uri:
|
36
|
+
self.bucket_uri = self.getTaskVar('object_uri') # Get object_uri set by CSVToGCS
|
37
|
+
|
38
|
+
# Validar parámetros requeridos
|
39
|
+
if not self.bucket_uri or not self.table_id or not self.dataset_id:
|
40
|
+
raise ComponentError("GCSToBigQuery: 'bucket_uri', 'table_id' y 'dataset_id' son parámetros requeridos.")
|
41
|
+
|
42
|
+
if not bigquery_default:
|
43
|
+
raise ComponentError("GCSToBigQuery: 'bigquery_default' no está configurado correctamente.")
|
44
|
+
|
45
|
+
# Obtener credenciales y parámetros del driver
|
46
|
+
credentials = bigquery_default.get_credentials()
|
47
|
+
|
48
|
+
# Inicializar AsyncDB con el driver de BigQuery
|
49
|
+
try:
|
50
|
+
self.bq = AsyncDB("bigquery", params=credentials)
|
51
|
+
self._logger.info("GCSToBigQuery: Instancia de AsyncDB creada exitosamente.")
|
52
|
+
except Exception as e:
|
53
|
+
raise ComponentError(f"GCSToBigQuery: Error al inicializar AsyncDB: {e}") from e
|
54
|
+
|
55
|
+
async def run(self) -> Tuple[str, str]:
|
56
|
+
"""Ejecuta la carga del CSV desde GCS a BigQuery."""
|
57
|
+
if not self.bq:
|
58
|
+
raise ComponentError("GCSToBigQuery: AsyncDB no está inicializado. Asegúrate de ejecutar 'start' antes de 'run'.")
|
59
|
+
|
60
|
+
try:
|
61
|
+
async with await self.bq.connection() as conn:
|
62
|
+
# Truncar la tabla si overwrite=True
|
63
|
+
if self.overwrite:
|
64
|
+
truncated = await conn.truncate_table(
|
65
|
+
dataset_id=self.dataset_id,
|
66
|
+
table_id=self.table_id
|
67
|
+
)
|
68
|
+
self._logger.info(f"GCSToBigQuery: Tabla '{self.dataset_id}.{self.table_id}' truncada exitosamente.")
|
69
|
+
|
70
|
+
# Cargar el CSV desde GCS a BigQuery
|
71
|
+
load_result = await conn.read_csv_from_gcs(
|
72
|
+
bucket_uri=self.bucket_uri,
|
73
|
+
table_id=self.table_id,
|
74
|
+
dataset_id=self.dataset_id,
|
75
|
+
schema=self.schema # Puede ser None para autodetectar
|
76
|
+
)
|
77
|
+
self._logger.info(f"GCSToBigQuery: {load_result}")
|
78
|
+
|
79
|
+
# Guardar el resultado para el siguiente componente (si es necesario)
|
80
|
+
self.setTaskVar('bigquery_load_result', load_result)
|
81
|
+
|
82
|
+
# Opcionalmente eliminar el objeto de GCS
|
83
|
+
if self.delete_gcs:
|
84
|
+
await conn.delete_gcs_object(
|
85
|
+
bucket_uri=self.bucket_uri
|
86
|
+
)
|
87
|
+
self._logger.info(f"GCSToBigQuery: Objeto GCS '{self.bucket_uri}' eliminado exitosamente.")
|
88
|
+
|
89
|
+
return self.bucket_uri, "Carga exitosa en BigQuery."
|
90
|
+
|
91
|
+
except ComponentError as ce:
|
92
|
+
raise ce # Re-lanzar errores específicos de componentes
|
93
|
+
except Exception as e:
|
94
|
+
raise ComponentError(f"GCSToBigQuery: Error durante la carga a BigQuery: {e}") from e
|
95
|
+
|
96
|
+
async def close(self):
|
97
|
+
"""Cierra la conexión AsyncDB."""
|
98
|
+
try:
|
99
|
+
if self.bq:
|
100
|
+
await self.bq.close()
|
101
|
+
self._logger.info("GCSToBigQuery: AsyncDB cerrado exitosamente.")
|
102
|
+
except Exception as e:
|
103
|
+
self._logger.error(f"GCSToBigQuery: Error al cerrar AsyncDB: {e}")
|