flowtask 5.8.4__cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowtask/__init__.py +93 -0
- flowtask/__main__.py +38 -0
- flowtask/bots/__init__.py +6 -0
- flowtask/bots/check.py +93 -0
- flowtask/bots/codebot.py +51 -0
- flowtask/components/ASPX.py +148 -0
- flowtask/components/AddDataset.py +352 -0
- flowtask/components/Amazon.py +523 -0
- flowtask/components/AutoTask.py +314 -0
- flowtask/components/Azure.py +80 -0
- flowtask/components/AzureUsers.py +106 -0
- flowtask/components/BaseAction.py +91 -0
- flowtask/components/BaseLoop.py +198 -0
- flowtask/components/BestBuy.py +800 -0
- flowtask/components/CSVToGCS.py +120 -0
- flowtask/components/CompanyScraper/__init__.py +1 -0
- flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
- flowtask/components/CompanyScraper/parsers/base.py +102 -0
- flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
- flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
- flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
- flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
- flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
- flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
- flowtask/components/CompanyScraper/scrapper.py +1054 -0
- flowtask/components/CopyTo.py +177 -0
- flowtask/components/CopyToBigQuery.py +243 -0
- flowtask/components/CopyToMongoDB.py +291 -0
- flowtask/components/CopyToPg.py +609 -0
- flowtask/components/CopyToRethink.py +207 -0
- flowtask/components/CreateGCSBucket.py +102 -0
- flowtask/components/CreateReport/CreateReport.py +228 -0
- flowtask/components/CreateReport/__init__.py +9 -0
- flowtask/components/CreateReport/charts/__init__.py +15 -0
- flowtask/components/CreateReport/charts/bar.py +51 -0
- flowtask/components/CreateReport/charts/base.py +66 -0
- flowtask/components/CreateReport/charts/pie.py +64 -0
- flowtask/components/CreateReport/utils.py +9 -0
- flowtask/components/CustomerSatisfaction.py +196 -0
- flowtask/components/DataInput.py +200 -0
- flowtask/components/DateList.py +255 -0
- flowtask/components/DbClient.py +163 -0
- flowtask/components/DialPad.py +146 -0
- flowtask/components/DocumentDBQuery.py +200 -0
- flowtask/components/DownloadFrom.py +371 -0
- flowtask/components/DownloadFromD2L.py +113 -0
- flowtask/components/DownloadFromFTP.py +181 -0
- flowtask/components/DownloadFromIMAP.py +315 -0
- flowtask/components/DownloadFromS3.py +198 -0
- flowtask/components/DownloadFromSFTP.py +265 -0
- flowtask/components/DownloadFromSharepoint.py +110 -0
- flowtask/components/DownloadFromSmartSheet.py +114 -0
- flowtask/components/DownloadS3File.py +229 -0
- flowtask/components/Dummy.py +59 -0
- flowtask/components/DuplicatePhoto.py +411 -0
- flowtask/components/EmployeeEvaluation.py +237 -0
- flowtask/components/ExecuteSQL.py +323 -0
- flowtask/components/ExtractHTML.py +178 -0
- flowtask/components/FileBase.py +178 -0
- flowtask/components/FileCopy.py +181 -0
- flowtask/components/FileDelete.py +82 -0
- flowtask/components/FileExists.py +146 -0
- flowtask/components/FileIteratorDelete.py +112 -0
- flowtask/components/FileList.py +194 -0
- flowtask/components/FileOpen.py +75 -0
- flowtask/components/FileRead.py +120 -0
- flowtask/components/FileRename.py +106 -0
- flowtask/components/FilterIf.py +284 -0
- flowtask/components/FilterRows/FilterRows.py +200 -0
- flowtask/components/FilterRows/__init__.py +10 -0
- flowtask/components/FilterRows/functions.py +4 -0
- flowtask/components/GCSToBigQuery.py +103 -0
- flowtask/components/GoogleA4.py +150 -0
- flowtask/components/GoogleGeoCoding.py +344 -0
- flowtask/components/GooglePlaces.py +315 -0
- flowtask/components/GoogleSearch.py +539 -0
- flowtask/components/HTTPClient.py +268 -0
- flowtask/components/ICIMS.py +146 -0
- flowtask/components/IF.py +179 -0
- flowtask/components/IcimsFolderCopy.py +173 -0
- flowtask/components/ImageFeatures/__init__.py +5 -0
- flowtask/components/ImageFeatures/process.py +233 -0
- flowtask/components/IteratorBase.py +251 -0
- flowtask/components/LangchainLoader/__init__.py +5 -0
- flowtask/components/LangchainLoader/loader.py +194 -0
- flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
- flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
- flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
- flowtask/components/LangchainLoader/loaders/docx.py +91 -0
- flowtask/components/LangchainLoader/loaders/html.py +119 -0
- flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
- flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
- flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
- flowtask/components/LangchainLoader/loaders/qa.py +67 -0
- flowtask/components/LangchainLoader/loaders/txt.py +55 -0
- flowtask/components/LeadIQ.py +650 -0
- flowtask/components/Loop.py +253 -0
- flowtask/components/Lowes.py +334 -0
- flowtask/components/MS365Usage.py +156 -0
- flowtask/components/MSTeamsMessages.py +320 -0
- flowtask/components/MarketClustering.py +1051 -0
- flowtask/components/MergeFiles.py +362 -0
- flowtask/components/MilvusOutput.py +87 -0
- flowtask/components/NearByStores.py +175 -0
- flowtask/components/NetworkNinja/__init__.py +6 -0
- flowtask/components/NetworkNinja/models/__init__.py +52 -0
- flowtask/components/NetworkNinja/models/abstract.py +177 -0
- flowtask/components/NetworkNinja/models/account.py +39 -0
- flowtask/components/NetworkNinja/models/client.py +19 -0
- flowtask/components/NetworkNinja/models/district.py +14 -0
- flowtask/components/NetworkNinja/models/events.py +101 -0
- flowtask/components/NetworkNinja/models/forms.py +499 -0
- flowtask/components/NetworkNinja/models/market.py +16 -0
- flowtask/components/NetworkNinja/models/organization.py +34 -0
- flowtask/components/NetworkNinja/models/photos.py +125 -0
- flowtask/components/NetworkNinja/models/project.py +44 -0
- flowtask/components/NetworkNinja/models/region.py +28 -0
- flowtask/components/NetworkNinja/models/store.py +203 -0
- flowtask/components/NetworkNinja/models/user.py +151 -0
- flowtask/components/NetworkNinja/router.py +854 -0
- flowtask/components/Odoo.py +175 -0
- flowtask/components/OdooInjector.py +192 -0
- flowtask/components/OpenFromXML.py +126 -0
- flowtask/components/OpenWeather.py +41 -0
- flowtask/components/OpenWithBase.py +616 -0
- flowtask/components/OpenWithPandas.py +715 -0
- flowtask/components/PGPDecrypt.py +199 -0
- flowtask/components/PandasIterator.py +187 -0
- flowtask/components/PandasToFile.py +189 -0
- flowtask/components/Paradox.py +339 -0
- flowtask/components/ParamIterator.py +117 -0
- flowtask/components/ParseHTML.py +84 -0
- flowtask/components/PlacerStores.py +249 -0
- flowtask/components/Pokemon.py +507 -0
- flowtask/components/PositiveBot.py +62 -0
- flowtask/components/PowerPointSlide.py +400 -0
- flowtask/components/PrintMessage.py +127 -0
- flowtask/components/ProductCompetitors/__init__.py +5 -0
- flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
- flowtask/components/ProductCompetitors/parsers/base.py +72 -0
- flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
- flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
- flowtask/components/ProductCompetitors/scrapper.py +155 -0
- flowtask/components/ProductCompliant.py +169 -0
- flowtask/components/ProductInfo/__init__.py +1 -0
- flowtask/components/ProductInfo/parsers/__init__.py +5 -0
- flowtask/components/ProductInfo/parsers/base.py +83 -0
- flowtask/components/ProductInfo/parsers/brother.py +97 -0
- flowtask/components/ProductInfo/parsers/canon.py +167 -0
- flowtask/components/ProductInfo/parsers/epson.py +118 -0
- flowtask/components/ProductInfo/parsers/hp.py +131 -0
- flowtask/components/ProductInfo/parsers/samsung.py +97 -0
- flowtask/components/ProductInfo/scraper.py +319 -0
- flowtask/components/ProductPricing.py +118 -0
- flowtask/components/QS.py +261 -0
- flowtask/components/QSBase.py +201 -0
- flowtask/components/QueryIterator.py +273 -0
- flowtask/components/QueryToInsert.py +327 -0
- flowtask/components/QueryToPandas.py +432 -0
- flowtask/components/RESTClient.py +195 -0
- flowtask/components/RethinkDBQuery.py +189 -0
- flowtask/components/Rsync.py +74 -0
- flowtask/components/RunSSH.py +59 -0
- flowtask/components/RunShell.py +71 -0
- flowtask/components/SalesForce.py +20 -0
- flowtask/components/SaveImageBank/__init__.py +257 -0
- flowtask/components/SchedulingVisits.py +592 -0
- flowtask/components/ScrapPage.py +216 -0
- flowtask/components/ScrapSearch.py +79 -0
- flowtask/components/SendNotify.py +257 -0
- flowtask/components/SentimentAnalysis.py +694 -0
- flowtask/components/ServiceScrapper/__init__.py +5 -0
- flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
- flowtask/components/ServiceScrapper/parsers/base.py +94 -0
- flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
- flowtask/components/ServiceScrapper/scrapper.py +199 -0
- flowtask/components/SetVariables.py +156 -0
- flowtask/components/SubTask.py +182 -0
- flowtask/components/SuiteCRM.py +48 -0
- flowtask/components/Switch.py +175 -0
- flowtask/components/TableBase.py +148 -0
- flowtask/components/TableDelete.py +312 -0
- flowtask/components/TableInput.py +143 -0
- flowtask/components/TableOutput/TableOutput.py +384 -0
- flowtask/components/TableOutput/__init__.py +3 -0
- flowtask/components/TableSchema.py +534 -0
- flowtask/components/Target.py +223 -0
- flowtask/components/ThumbnailGenerator.py +156 -0
- flowtask/components/ToPandas.py +67 -0
- flowtask/components/TransformRows/TransformRows.py +507 -0
- flowtask/components/TransformRows/__init__.py +9 -0
- flowtask/components/TransformRows/functions.py +559 -0
- flowtask/components/TransposeRows.py +176 -0
- flowtask/components/UPCDatabase.py +86 -0
- flowtask/components/UnGzip.py +171 -0
- flowtask/components/Uncompress.py +172 -0
- flowtask/components/UniqueRows.py +126 -0
- flowtask/components/Unzip.py +107 -0
- flowtask/components/UpdateOperationalVars.py +147 -0
- flowtask/components/UploadTo.py +299 -0
- flowtask/components/UploadToS3.py +136 -0
- flowtask/components/UploadToSFTP.py +160 -0
- flowtask/components/UploadToSharepoint.py +205 -0
- flowtask/components/UserFunc.py +122 -0
- flowtask/components/VivaTracker.py +140 -0
- flowtask/components/WSDLClient.py +123 -0
- flowtask/components/Wait.py +18 -0
- flowtask/components/Walmart.py +199 -0
- flowtask/components/Workplace.py +134 -0
- flowtask/components/XMLToPandas.py +267 -0
- flowtask/components/Zammad/__init__.py +41 -0
- flowtask/components/Zammad/models.py +0 -0
- flowtask/components/ZoomInfoScraper.py +409 -0
- flowtask/components/__init__.py +104 -0
- flowtask/components/abstract.py +18 -0
- flowtask/components/flow.py +530 -0
- flowtask/components/google.py +335 -0
- flowtask/components/group.py +221 -0
- flowtask/components/py.typed +0 -0
- flowtask/components/reviewscrap.py +132 -0
- flowtask/components/tAutoincrement.py +117 -0
- flowtask/components/tConcat.py +109 -0
- flowtask/components/tExplode.py +119 -0
- flowtask/components/tFilter.py +184 -0
- flowtask/components/tGroup.py +236 -0
- flowtask/components/tJoin.py +270 -0
- flowtask/components/tMap/__init__.py +9 -0
- flowtask/components/tMap/functions.py +54 -0
- flowtask/components/tMap/tMap.py +450 -0
- flowtask/components/tMelt.py +112 -0
- flowtask/components/tMerge.py +114 -0
- flowtask/components/tOrder.py +93 -0
- flowtask/components/tPandas.py +94 -0
- flowtask/components/tPivot.py +71 -0
- flowtask/components/tPluckCols.py +76 -0
- flowtask/components/tUnnest.py +82 -0
- flowtask/components/user.py +401 -0
- flowtask/conf.py +457 -0
- flowtask/download.py +102 -0
- flowtask/events/__init__.py +11 -0
- flowtask/events/events/__init__.py +20 -0
- flowtask/events/events/abstract.py +95 -0
- flowtask/events/events/alerts/__init__.py +362 -0
- flowtask/events/events/alerts/colfunctions.py +131 -0
- flowtask/events/events/alerts/functions.py +158 -0
- flowtask/events/events/dummy.py +12 -0
- flowtask/events/events/exec.py +124 -0
- flowtask/events/events/file/__init__.py +7 -0
- flowtask/events/events/file/base.py +51 -0
- flowtask/events/events/file/copy.py +23 -0
- flowtask/events/events/file/delete.py +16 -0
- flowtask/events/events/interfaces/__init__.py +9 -0
- flowtask/events/events/interfaces/client.py +67 -0
- flowtask/events/events/interfaces/credentials.py +28 -0
- flowtask/events/events/interfaces/notifications.py +58 -0
- flowtask/events/events/jira.py +122 -0
- flowtask/events/events/log.py +26 -0
- flowtask/events/events/logerr.py +52 -0
- flowtask/events/events/notify.py +59 -0
- flowtask/events/events/notify_event.py +160 -0
- flowtask/events/events/publish.py +54 -0
- flowtask/events/events/sendfile.py +104 -0
- flowtask/events/events/task.py +97 -0
- flowtask/events/events/teams.py +98 -0
- flowtask/events/events/webhook.py +58 -0
- flowtask/events/manager.py +287 -0
- flowtask/exceptions.c +39393 -0
- flowtask/exceptions.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/extensions/__init__.py +3 -0
- flowtask/extensions/abstract.py +82 -0
- flowtask/extensions/logging/__init__.py +65 -0
- flowtask/hooks/__init__.py +9 -0
- flowtask/hooks/actions/__init__.py +22 -0
- flowtask/hooks/actions/abstract.py +66 -0
- flowtask/hooks/actions/dummy.py +23 -0
- flowtask/hooks/actions/jira.py +74 -0
- flowtask/hooks/actions/rest.py +320 -0
- flowtask/hooks/actions/sampledata.py +37 -0
- flowtask/hooks/actions/sensor.py +23 -0
- flowtask/hooks/actions/task.py +9 -0
- flowtask/hooks/actions/ticket.py +37 -0
- flowtask/hooks/actions/zammad.py +55 -0
- flowtask/hooks/hook.py +62 -0
- flowtask/hooks/models.py +17 -0
- flowtask/hooks/service.py +187 -0
- flowtask/hooks/step.py +91 -0
- flowtask/hooks/types/__init__.py +23 -0
- flowtask/hooks/types/base.py +129 -0
- flowtask/hooks/types/brokers/__init__.py +11 -0
- flowtask/hooks/types/brokers/base.py +54 -0
- flowtask/hooks/types/brokers/mqtt.py +35 -0
- flowtask/hooks/types/brokers/rabbitmq.py +82 -0
- flowtask/hooks/types/brokers/redis.py +83 -0
- flowtask/hooks/types/brokers/sqs.py +44 -0
- flowtask/hooks/types/fs.py +232 -0
- flowtask/hooks/types/http.py +49 -0
- flowtask/hooks/types/imap.py +200 -0
- flowtask/hooks/types/jira.py +279 -0
- flowtask/hooks/types/mail.py +205 -0
- flowtask/hooks/types/postgres.py +98 -0
- flowtask/hooks/types/responses/__init__.py +8 -0
- flowtask/hooks/types/responses/base.py +5 -0
- flowtask/hooks/types/sharepoint.py +288 -0
- flowtask/hooks/types/ssh.py +141 -0
- flowtask/hooks/types/tagged.py +59 -0
- flowtask/hooks/types/upload.py +85 -0
- flowtask/hooks/types/watch.py +71 -0
- flowtask/hooks/types/web.py +36 -0
- flowtask/interfaces/AzureClient.py +137 -0
- flowtask/interfaces/AzureGraph.py +839 -0
- flowtask/interfaces/Boto3Client.py +326 -0
- flowtask/interfaces/DropboxClient.py +173 -0
- flowtask/interfaces/ExcelHandler.py +94 -0
- flowtask/interfaces/FTPClient.py +131 -0
- flowtask/interfaces/GoogleCalendar.py +201 -0
- flowtask/interfaces/GoogleClient.py +133 -0
- flowtask/interfaces/GoogleDrive.py +127 -0
- flowtask/interfaces/GoogleGCS.py +89 -0
- flowtask/interfaces/GoogleGeocoding.py +93 -0
- flowtask/interfaces/GoogleLang.py +114 -0
- flowtask/interfaces/GooglePub.py +61 -0
- flowtask/interfaces/GoogleSheet.py +68 -0
- flowtask/interfaces/IMAPClient.py +137 -0
- flowtask/interfaces/O365Calendar.py +113 -0
- flowtask/interfaces/O365Client.py +220 -0
- flowtask/interfaces/OneDrive.py +284 -0
- flowtask/interfaces/Outlook.py +155 -0
- flowtask/interfaces/ParrotBot.py +130 -0
- flowtask/interfaces/SSHClient.py +378 -0
- flowtask/interfaces/Sharepoint.py +496 -0
- flowtask/interfaces/__init__.py +36 -0
- flowtask/interfaces/azureauth.py +119 -0
- flowtask/interfaces/cache.py +201 -0
- flowtask/interfaces/client.py +82 -0
- flowtask/interfaces/compress.py +525 -0
- flowtask/interfaces/credentials.py +124 -0
- flowtask/interfaces/d2l.py +239 -0
- flowtask/interfaces/databases/__init__.py +5 -0
- flowtask/interfaces/databases/db.py +223 -0
- flowtask/interfaces/databases/documentdb.py +55 -0
- flowtask/interfaces/databases/rethink.py +39 -0
- flowtask/interfaces/dataframes/__init__.py +11 -0
- flowtask/interfaces/dataframes/abstract.py +21 -0
- flowtask/interfaces/dataframes/arrow.py +71 -0
- flowtask/interfaces/dataframes/dt.py +69 -0
- flowtask/interfaces/dataframes/pandas.py +167 -0
- flowtask/interfaces/dataframes/polars.py +60 -0
- flowtask/interfaces/db.py +263 -0
- flowtask/interfaces/env.py +46 -0
- flowtask/interfaces/func.py +137 -0
- flowtask/interfaces/http.py +1780 -0
- flowtask/interfaces/locale.py +40 -0
- flowtask/interfaces/log.py +75 -0
- flowtask/interfaces/mask.py +143 -0
- flowtask/interfaces/notification.py +154 -0
- flowtask/interfaces/playwright.py +339 -0
- flowtask/interfaces/powerpoint.py +368 -0
- flowtask/interfaces/py.typed +0 -0
- flowtask/interfaces/qs.py +376 -0
- flowtask/interfaces/result.py +87 -0
- flowtask/interfaces/selenium_service.py +779 -0
- flowtask/interfaces/smartsheet.py +154 -0
- flowtask/interfaces/stat.py +39 -0
- flowtask/interfaces/task.py +96 -0
- flowtask/interfaces/template.py +118 -0
- flowtask/interfaces/vectorstores/__init__.py +1 -0
- flowtask/interfaces/vectorstores/abstract.py +133 -0
- flowtask/interfaces/vectorstores/milvus.py +669 -0
- flowtask/interfaces/zammad.py +107 -0
- flowtask/models.py +193 -0
- flowtask/parsers/__init__.py +15 -0
- flowtask/parsers/_yaml.c +11978 -0
- flowtask/parsers/_yaml.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/argparser.py +235 -0
- flowtask/parsers/base.c +15155 -0
- flowtask/parsers/base.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/json.c +11968 -0
- flowtask/parsers/json.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/maps.py +49 -0
- flowtask/parsers/toml.c +11968 -0
- flowtask/parsers/toml.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/plugins/__init__.py +16 -0
- flowtask/plugins/components/__init__.py +0 -0
- flowtask/plugins/handler/__init__.py +45 -0
- flowtask/plugins/importer.py +31 -0
- flowtask/plugins/sources/__init__.py +0 -0
- flowtask/runner.py +283 -0
- flowtask/scheduler/__init__.py +9 -0
- flowtask/scheduler/functions.py +493 -0
- flowtask/scheduler/handlers/__init__.py +8 -0
- flowtask/scheduler/handlers/manager.py +504 -0
- flowtask/scheduler/handlers/models.py +58 -0
- flowtask/scheduler/handlers/service.py +72 -0
- flowtask/scheduler/notifications.py +65 -0
- flowtask/scheduler/scheduler.py +993 -0
- flowtask/services/__init__.py +0 -0
- flowtask/services/bots/__init__.py +0 -0
- flowtask/services/bots/telegram.py +264 -0
- flowtask/services/files/__init__.py +11 -0
- flowtask/services/files/manager.py +522 -0
- flowtask/services/files/model.py +37 -0
- flowtask/services/files/service.py +767 -0
- flowtask/services/jira/__init__.py +3 -0
- flowtask/services/jira/jira_actions.py +191 -0
- flowtask/services/tasks/__init__.py +13 -0
- flowtask/services/tasks/launcher.py +213 -0
- flowtask/services/tasks/manager.py +323 -0
- flowtask/services/tasks/service.py +275 -0
- flowtask/services/tasks/task_manager.py +376 -0
- flowtask/services/tasks/tasks.py +155 -0
- flowtask/storages/__init__.py +16 -0
- flowtask/storages/exceptions.py +12 -0
- flowtask/storages/files/__init__.py +8 -0
- flowtask/storages/files/abstract.py +29 -0
- flowtask/storages/files/filesystem.py +66 -0
- flowtask/storages/tasks/__init__.py +19 -0
- flowtask/storages/tasks/abstract.py +26 -0
- flowtask/storages/tasks/database.py +33 -0
- flowtask/storages/tasks/filesystem.py +108 -0
- flowtask/storages/tasks/github.py +119 -0
- flowtask/storages/tasks/memory.py +45 -0
- flowtask/storages/tasks/row.py +25 -0
- flowtask/tasks/__init__.py +0 -0
- flowtask/tasks/abstract.py +526 -0
- flowtask/tasks/command.py +118 -0
- flowtask/tasks/pile.py +486 -0
- flowtask/tasks/py.typed +0 -0
- flowtask/tasks/task.py +778 -0
- flowtask/template/__init__.py +161 -0
- flowtask/tests.py +257 -0
- flowtask/types/__init__.py +8 -0
- flowtask/types/typedefs.c +11347 -0
- flowtask/types/typedefs.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/__init__.py +24 -0
- flowtask/utils/constants.py +117 -0
- flowtask/utils/encoders.py +21 -0
- flowtask/utils/executor.py +112 -0
- flowtask/utils/functions.cpp +14280 -0
- flowtask/utils/functions.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/json.cpp +13349 -0
- flowtask/utils/json.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/mail.py +63 -0
- flowtask/utils/parseqs.c +13324 -0
- flowtask/utils/parserqs.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/stats.py +308 -0
- flowtask/utils/transformations.py +74 -0
- flowtask/utils/uv.py +12 -0
- flowtask/utils/validators.py +97 -0
- flowtask/version.py +11 -0
- flowtask-5.8.4.dist-info/LICENSE +201 -0
- flowtask-5.8.4.dist-info/METADATA +209 -0
- flowtask-5.8.4.dist-info/RECORD +470 -0
- flowtask-5.8.4.dist-info/WHEEL +6 -0
- flowtask-5.8.4.dist-info/entry_points.txt +3 -0
- flowtask-5.8.4.dist-info/top_level.txt +2 -0
- plugins/components/CreateQR.py +39 -0
- plugins/components/TestComponent.py +28 -0
- plugins/components/Use1.py +13 -0
- plugins/components/Workplace.py +117 -0
- plugins/components/__init__.py +3 -0
- plugins/sources/__init__.py +0 -0
- plugins/sources/get_populartimes.py +78 -0
- plugins/sources/google.py +150 -0
- plugins/sources/hubspot.py +679 -0
- plugins/sources/icims.py +679 -0
- plugins/sources/mobileinsight.py +501 -0
- plugins/sources/newrelic.py +262 -0
- plugins/sources/uap.py +268 -0
- plugins/sources/venu.py +244 -0
- plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,236 @@
|
|
1
|
+
import asyncio
|
2
|
+
from collections.abc import Callable
|
3
|
+
import pandas as pd
|
4
|
+
import numpy as np
|
5
|
+
from ..exceptions import ComponentError, ConfigError
|
6
|
+
from .flow import FlowComponent
|
7
|
+
|
8
|
+
|
9
|
+
class tGroup(FlowComponent):
|
10
|
+
"""
|
11
|
+
tGroup
|
12
|
+
|
13
|
+
Overview
|
14
|
+
|
15
|
+
The tGroup class is a component for performing a group-by operation on a DataFrame using specified columns.
|
16
|
+
It returns unique combinations of the specified group-by columns, allowing data aggregation and summarization.
|
17
|
+
|
18
|
+
.. table:: Properties
|
19
|
+
:widths: auto
|
20
|
+
|
21
|
+
+----------------+----------+-----------+---------------------------------------------------------------+
|
22
|
+
| Name | Required | Summary |
|
23
|
+
+----------------+----------+-----------+---------------------------------------------------------------+
|
24
|
+
| group_by | Yes | List of columns to group by. |
|
25
|
+
+----------------+----------+-----------+---------------------------------------------------------------+
|
26
|
+
| columns | No | List of columns to retain in the result DataFrame. If None, |
|
27
|
+
| | | all columns in `group_by` are returned. |
|
28
|
+
+----------------+----------+-----------+---------------------------------------------------------------+
|
29
|
+
| agg | No | List of aggregation functions to apply to the grouped data. |
|
30
|
+
| | | Each aggregation should be a dictionary with the column name, |
|
31
|
+
| | aggregation function, and an optional alias. |
|
32
|
+
+----------------+----------+-----------+---------------------------------------------------------------+
|
33
|
+
Returns
|
34
|
+
|
35
|
+
This component returns a DataFrame with unique rows based on the specified `group_by` columns. If `columns`
|
36
|
+
is defined, only those columns are included in the result. The component provides debugging information on
|
37
|
+
column data types if enabled, and any errors during grouping are logged and raised as exceptions.
|
38
|
+
|
39
|
+
Example:
|
40
|
+
|
41
|
+
```
|
42
|
+
- tGroup:
|
43
|
+
group_by:
|
44
|
+
- store_id
|
45
|
+
- formatted_address
|
46
|
+
- state_code
|
47
|
+
- latitude
|
48
|
+
- longitude
|
49
|
+
- store_name
|
50
|
+
- city
|
51
|
+
```
|
52
|
+
|
53
|
+
- Aggregation Example:
|
54
|
+
```
|
55
|
+
- tGroup:
|
56
|
+
group_by:
|
57
|
+
- store_id
|
58
|
+
- formatted_address
|
59
|
+
- state_code
|
60
|
+
- latitude
|
61
|
+
- longitude
|
62
|
+
- store_name
|
63
|
+
- city
|
64
|
+
agg:
|
65
|
+
- store_id: distinct
|
66
|
+
alias: unique_store_ids
|
67
|
+
- latitude: mean
|
68
|
+
alias: avg_latitude
|
69
|
+
- longitude: mean
|
70
|
+
alias: avg_longitude
|
71
|
+
- state_code: count
|
72
|
+
```
|
73
|
+
""" # noqa
|
74
|
+
|
75
|
+
condition = ""
|
76
|
+
|
77
|
+
def __init__(
|
78
|
+
self,
|
79
|
+
loop: asyncio.AbstractEventLoop = None,
|
80
|
+
job: Callable = None,
|
81
|
+
stat: Callable = None,
|
82
|
+
**kwargs,
|
83
|
+
):
|
84
|
+
"""Init Method."""
|
85
|
+
self._columns: list = kwargs.pop("group_by", None)
|
86
|
+
self.aggregations: list = kwargs.pop("agg", [])
|
87
|
+
if not self._columns:
|
88
|
+
raise ConfigError(
|
89
|
+
"tGroup require a list of Columns for Group By => **group_by**"
|
90
|
+
)
|
91
|
+
if not isinstance(self._columns, list):
|
92
|
+
raise ConfigError("Group By must be a list of columns")
|
93
|
+
if not all(isinstance(col, str) for col in self._columns):
|
94
|
+
raise ConfigError("All group_by columns must be strings")
|
95
|
+
super(tGroup, self).__init__(loop=loop, job=job, stat=stat, **kwargs)
|
96
|
+
|
97
|
+
async def start(self, **kwargs):
|
98
|
+
# Si lo que llega no es un DataFrame de Pandas se cancela la tarea
|
99
|
+
if self.previous:
|
100
|
+
self.data = self.input
|
101
|
+
else:
|
102
|
+
raise ComponentError("Data Not Found")
|
103
|
+
if not isinstance(self.data, pd.DataFrame):
|
104
|
+
raise ComponentError("Incompatible Pandas Dataframe")
|
105
|
+
return True
|
106
|
+
|
107
|
+
async def close(self):
|
108
|
+
pass
|
109
|
+
|
110
|
+
async def run(self):
|
111
|
+
self._result = None
|
112
|
+
try:
|
113
|
+
hashable_columns = [
|
114
|
+
col for col in self._columns
|
115
|
+
if not self.data[col].apply(lambda x: isinstance(x, list)).any()
|
116
|
+
]
|
117
|
+
if self.aggregations:
|
118
|
+
agg_dict = {}
|
119
|
+
for agg in self.aggregations:
|
120
|
+
agg = agg.copy()
|
121
|
+
# agg is a dictionary as:
|
122
|
+
# - store_id: distinct
|
123
|
+
# alias: unique_store_ids
|
124
|
+
# get key, value of first element on dictionary:
|
125
|
+
if not isinstance(agg, dict):
|
126
|
+
raise ConfigError(
|
127
|
+
f"Aggregation must be a dict with column and agg function, got {agg} instead"
|
128
|
+
)
|
129
|
+
col, func = next(iter(agg.items()))
|
130
|
+
fn = func.lower()
|
131
|
+
agg.pop(col)
|
132
|
+
alias = agg.pop("alias", f"{col}_{fn}")
|
133
|
+
args = agg
|
134
|
+
if fn == "count":
|
135
|
+
agg_dict[alias] = (col, 'count')
|
136
|
+
elif fn == "sum":
|
137
|
+
agg_dict[alias] = (col, 'sum', args)
|
138
|
+
elif fn == "mean":
|
139
|
+
agg_dict[alias] = (col, 'mean', args)
|
140
|
+
elif fn == "median":
|
141
|
+
agg_dict[alias] = (col, 'median', args)
|
142
|
+
elif fn == "min":
|
143
|
+
agg_dict[alias] = (col, 'min', args)
|
144
|
+
elif fn == "max":
|
145
|
+
agg_dict[alias] = (col, 'max', args)
|
146
|
+
elif fn == "std":
|
147
|
+
agg_dict[alias] = (col, 'std', args)
|
148
|
+
elif fn == "var":
|
149
|
+
agg_dict[alias] = (col, 'var', args)
|
150
|
+
elif fn == "first":
|
151
|
+
agg_dict[alias] = (col, 'first', args)
|
152
|
+
elif fn == "last":
|
153
|
+
agg_dict[alias] = (col, 'last', args)
|
154
|
+
elif fn == "unique":
|
155
|
+
agg_dict[alias] = (col, 'unique', args)
|
156
|
+
elif fn == "nunique":
|
157
|
+
agg_dict[alias] = (col, 'nunique', args)
|
158
|
+
elif fn == "mode":
|
159
|
+
agg_dict[alias] = (col, 'mode', args)
|
160
|
+
elif fn == "quantile":
|
161
|
+
agg_dict[alias] = (col, 'quantile', args)
|
162
|
+
elif fn == "skew":
|
163
|
+
agg_dict[alias] = (col, 'skew', args)
|
164
|
+
elif fn == "kurt":
|
165
|
+
agg_dict[alias] = (col, 'kurt', args)
|
166
|
+
elif fn == "mad":
|
167
|
+
agg_dict[alias] = (col, 'mad', args)
|
168
|
+
elif fn == "sem":
|
169
|
+
agg_dict[alias] = (col, 'sem', args)
|
170
|
+
elif fn == "pct_change":
|
171
|
+
agg_dict[alias] = (col, 'pct_change', args)
|
172
|
+
elif fn == "diff":
|
173
|
+
agg_dict[alias] = (col, 'diff', args)
|
174
|
+
elif fn == "cumsum":
|
175
|
+
agg_dict[alias] = (col, 'cumsum', args)
|
176
|
+
elif fn == "cumprod":
|
177
|
+
agg_dict[alias] = (col, 'cumprod', args)
|
178
|
+
elif fn == "cummax":
|
179
|
+
agg_dict[alias] = (col, 'cummax', args)
|
180
|
+
elif fn == "cummin":
|
181
|
+
agg_dict[alias] = (col, 'cummin', args)
|
182
|
+
elif fn == "distinct":
|
183
|
+
agg_dict[alias] = (col, pd.Series.nunique)
|
184
|
+
elif fn == 'count_nulls':
|
185
|
+
agg_dict[alias] = (col, lambda x: x.isnull().sum())
|
186
|
+
elif fn == 'count_not_nulls':
|
187
|
+
agg_dict[alias] = (col, lambda x: x.notnull().sum())
|
188
|
+
elif fn == 'count_unique':
|
189
|
+
agg_dict[alias] = (col, lambda x: x.nunique())
|
190
|
+
elif fn == 'count_distinct':
|
191
|
+
agg_dict[alias] = (col, lambda x: x.unique().size)
|
192
|
+
elif fn == 'count_zeros':
|
193
|
+
agg_dict[alias] = (col, lambda x: (x == 0).sum())
|
194
|
+
elif hasattr(np, fn):
|
195
|
+
agg_dict[col] = (col, fn, args)
|
196
|
+
elif fn == 'apply':
|
197
|
+
if 'function' not in agg:
|
198
|
+
raise ConfigError(
|
199
|
+
"Function must be specified for apply"
|
200
|
+
)
|
201
|
+
func = agg['function']
|
202
|
+
if callable(func):
|
203
|
+
agg_dict[alias] = (col, func)
|
204
|
+
else:
|
205
|
+
raise ConfigError(
|
206
|
+
f"Function {func} must be callable"
|
207
|
+
)
|
208
|
+
else:
|
209
|
+
raise ConfigError(f"Unsupported aggregation function: {fn}")
|
210
|
+
# Perform group by with aggregation
|
211
|
+
try:
|
212
|
+
df = self.data.groupby(hashable_columns).agg(**agg_dict).reset_index()
|
213
|
+
except KeyError as err:
|
214
|
+
raise ConfigError(
|
215
|
+
f"Invalid columns for aggregation: {err}"
|
216
|
+
) from err
|
217
|
+
except Exception as err:
|
218
|
+
raise ComponentError(
|
219
|
+
f"Error during aggregation: {err}"
|
220
|
+
) from err
|
221
|
+
else:
|
222
|
+
# Perform group by without aggregation, avoiding unhashable types
|
223
|
+
# Get unique elements
|
224
|
+
df = self.data[hashable_columns].drop_duplicates().reset_index(drop=True)
|
225
|
+
except Exception as err:
|
226
|
+
raise ComponentError(f"Generic Error on Data: error: {err}") from err
|
227
|
+
if hasattr(self, "columns"):
|
228
|
+
# returning only a subset of data
|
229
|
+
df = df[self.columns]
|
230
|
+
if self._debug is True:
|
231
|
+
print("::: Printing Column Information === ")
|
232
|
+
print("Grouped: ", df)
|
233
|
+
for column, t in df.dtypes.items():
|
234
|
+
print(column, "->", t, "->", df[column].iloc[0])
|
235
|
+
self._result = df
|
236
|
+
return self._result
|
@@ -0,0 +1,270 @@
|
|
1
|
+
import asyncio
|
2
|
+
from typing import Any, Union
|
3
|
+
from collections.abc import Callable
|
4
|
+
import pandas
|
5
|
+
from pandas import DataFrame
|
6
|
+
from asyncdb.exceptions import NoDataFound
|
7
|
+
from ..exceptions import ComponentError, DataNotFound
|
8
|
+
from .flow import FlowComponent
|
9
|
+
|
10
|
+
|
11
|
+
class tJoin(FlowComponent):
|
12
|
+
"""
|
13
|
+
tJoin
|
14
|
+
|
15
|
+
Overview
|
16
|
+
|
17
|
+
The tJoin class is a component for joining two Pandas DataFrames based on specified join conditions. It supports various join types
|
18
|
+
(such as left, right, inner, and outer joins) and handles different scenarios like missing data, custom join conditions, and multi-source joins.
|
19
|
+
|
20
|
+
.. table:: Properties
|
21
|
+
:widths: auto
|
22
|
+
|
23
|
+
+------------------+----------+-----------+--------------------------------------------------------------------------------------+
|
24
|
+
| Name | Required | Description |
|
25
|
+
+------------------+----------+-----------+--------------------------------------------------------------------------------------+
|
26
|
+
| df1 | Yes | The left DataFrame to join. |
|
27
|
+
+------------------+----------+-----------+--------------------------------------------------------------------------------------+
|
28
|
+
| df2 | Yes | The right DataFrame to join. |
|
29
|
+
+------------------+----------+-----------+--------------------------------------------------------------------------------------+
|
30
|
+
| type | No | "left" | The type of join to perform. Supported values are "left", "right", "inner", |
|
31
|
+
| | | | "outer", and "anti-join". When "anti-join" is used, it returns the difference |
|
32
|
+
| | | | of B - A, i.e., all rows present in df1 but not in df2. |
|
33
|
+
+------------------+----------+-----------+--------------------------------------------------------------------------------------+
|
34
|
+
| depends | Yes | A list of dependencies defining the sources for the join. |
|
35
|
+
+------------------+----------+-----------+--------------------------------------------------------------------------------------+
|
36
|
+
| operator | No | The logical operator to use for join conditions, defaults to "and". |
|
37
|
+
+------------------+----------+-----------+--------------------------------------------------------------------------------------+
|
38
|
+
| fk | No | The foreign key or list of keys to use for joining DataFrames. |
|
39
|
+
+------------------+----------+-----------+--------------------------------------------------------------------------------------+
|
40
|
+
| no_copy | No | A flag indicating if copies of the DataFrames should not be made, defaults to True. |
|
41
|
+
+------------------+----------+-----------+--------------------------------------------------------------------------------------+
|
42
|
+
| join_with | No | A list of additional keys to use for join conditions. |
|
43
|
+
+------------------+----------+-----------+--------------------------------------------------------------------------------------+
|
44
|
+
|
45
|
+
Return
|
46
|
+
|
47
|
+
The methods in this class manage the joining of two Pandas DataFrames, including initialization, execution, and result handling.
|
48
|
+
It ensures proper handling of temporary columns and provides metrics on the joined rows.
|
49
|
+
|
50
|
+
|
51
|
+
|
52
|
+
Example:
|
53
|
+
|
54
|
+
```yaml
|
55
|
+
tJoin:
|
56
|
+
depends:
|
57
|
+
- TransformRows_2
|
58
|
+
- QueryToPandas_3
|
59
|
+
type: left
|
60
|
+
fk:
|
61
|
+
- store_number
|
62
|
+
args:
|
63
|
+
validate: many_to_many
|
64
|
+
```
|
65
|
+
|
66
|
+
""" # noqa
|
67
|
+
def __init__(
|
68
|
+
self,
|
69
|
+
loop: asyncio.AbstractEventLoop = None,
|
70
|
+
job: Callable = None,
|
71
|
+
stat: Callable = None,
|
72
|
+
**kwargs,
|
73
|
+
) -> None:
|
74
|
+
"""Init Method."""
|
75
|
+
self.type: str = "left"
|
76
|
+
self.df1: Union[DataFrame, Any] = None
|
77
|
+
self.df2: Union[DataFrame, Any] = None
|
78
|
+
super(tJoin, self).__init__(loop=loop, job=job, stat=stat, **kwargs)
|
79
|
+
|
80
|
+
async def start(self, **kwargs):
|
81
|
+
"""Obtain Pandas Dataframe."""
|
82
|
+
if not hasattr(self, "depends"):
|
83
|
+
raise ComponentError(
|
84
|
+
"Missing Dependency (depends) Attribute for declaring Sources."
|
85
|
+
)
|
86
|
+
if self._multi:
|
87
|
+
try:
|
88
|
+
self.df1 = self.previous[0].output()
|
89
|
+
except IndexError as ex:
|
90
|
+
name = self.depends[0]
|
91
|
+
raise ComponentError(f"Missing LEFT Dataframe: {name}") from ex
|
92
|
+
try:
|
93
|
+
self.df2 = self.previous[1].output()
|
94
|
+
except IndexError as ex:
|
95
|
+
name = self.depends[1]
|
96
|
+
raise ComponentError("Missing RIGHT Dataframe") from ex
|
97
|
+
elif hasattr(self, "left"):
|
98
|
+
# TODO: this not work:
|
99
|
+
# think in a persistent structure to save every component after
|
100
|
+
# execution, to get later
|
101
|
+
# discover the "Left" Table
|
102
|
+
try:
|
103
|
+
_, num = self.left.split("_")
|
104
|
+
left = self.JobTask.getJobByID(int(num) - 1)
|
105
|
+
self.df1 = left["component"].output()
|
106
|
+
except KeyError as ex:
|
107
|
+
raise DataNotFound(f"Failed Left Task name: {self.left}") from ex
|
108
|
+
elif hasattr(self, "right"):
|
109
|
+
# discover the "Left" Table
|
110
|
+
try:
|
111
|
+
_, num = self.right.split("_")
|
112
|
+
right = self.JobTask.getJobByID(int(num) - 1)
|
113
|
+
self.df2 = right["component"].output()
|
114
|
+
except KeyError as ex:
|
115
|
+
raise DataNotFound(f"Failed Right Task name: {self.right}") from ex
|
116
|
+
else:
|
117
|
+
raise DataNotFound("Data Was Not Found for Join", status=404)
|
118
|
+
return True
|
119
|
+
|
120
|
+
def cleanup_temp_rows(self, df: pandas.DataFrame = None) -> None:
|
121
|
+
try:
|
122
|
+
self.df1.drop(['_tmp_key_df1'], axis=1, inplace=True)
|
123
|
+
self.df2.drop(['_tmp_key_df2'], axis=1, inplace=True)
|
124
|
+
except KeyError:
|
125
|
+
pass
|
126
|
+
if df is not None:
|
127
|
+
df.is_copy = None
|
128
|
+
# Remember to drop the temporary columns before finalizing the dataframe
|
129
|
+
df.drop(
|
130
|
+
['_tmp_key_df1', '_tmp_key_df2'],
|
131
|
+
axis=1,
|
132
|
+
inplace=True
|
133
|
+
)
|
134
|
+
|
135
|
+
async def run(self):
|
136
|
+
args = {}
|
137
|
+
if self.df1 is None:
|
138
|
+
raise DataNotFound("Main data Not Found for Join", status=404)
|
139
|
+
if self.df1.empty:
|
140
|
+
raise DataNotFound("Data Was Not Found on Dataframe 1", status=404)
|
141
|
+
if self.type == "left" and (self.df2 is None or self.df2.empty):
|
142
|
+
self._result = self.df1
|
143
|
+
return True
|
144
|
+
elif self.df2 is None or self.df2.empty:
|
145
|
+
raise DataNotFound("Data Was Not Found on Dataframe 2", status=404)
|
146
|
+
if hasattr(self, "no_copy"):
|
147
|
+
args["copy"] = self.no_copy
|
148
|
+
if not self.type:
|
149
|
+
self.type = "inner"
|
150
|
+
args["left_index"] = True
|
151
|
+
if hasattr(self, "args") and isinstance(self.args, dict):
|
152
|
+
args = {**args, **self.args}
|
153
|
+
if hasattr(self, "operator"):
|
154
|
+
operator = self.operator
|
155
|
+
else:
|
156
|
+
operator = "and"
|
157
|
+
if hasattr(self, "fk"):
|
158
|
+
args["on"] = self.fk
|
159
|
+
else:
|
160
|
+
args["left_index"] = True
|
161
|
+
# making a Join between 2 dataframes
|
162
|
+
# Add a unique identifier to both dataframes before the merge
|
163
|
+
self.df1['_tmp_key_df1'] = range(1, len(self.df1) + 1)
|
164
|
+
self.df2['_tmp_key_df2'] = range(1, len(self.df2) + 1)
|
165
|
+
try:
|
166
|
+
if operator == "and":
|
167
|
+
if self.type == "anti-join":
|
168
|
+
# Perform a left merge, adding an indicator column to track the merge source
|
169
|
+
df = pandas.merge(
|
170
|
+
self.df1,
|
171
|
+
self.df2,
|
172
|
+
how="left",
|
173
|
+
suffixes=("", "_right"),
|
174
|
+
indicator=True,
|
175
|
+
**args,
|
176
|
+
)
|
177
|
+
|
178
|
+
# Filter the rows to keep only those that appear exclusively in the left DataFrame (df1)
|
179
|
+
# by selecting rows labeled as 'left_only' in the _merge column, then drop the _merge column
|
180
|
+
df = df[df['_merge'] == 'left_only'].drop(columns=['_merge'])
|
181
|
+
|
182
|
+
# Remove any columns that were suffixed with '_right', since we only want columns from df1
|
183
|
+
df = df.loc[:, ~df.columns.str.endswith('_right')]
|
184
|
+
|
185
|
+
else:
|
186
|
+
df = pandas.merge(
|
187
|
+
self.df1,
|
188
|
+
self.df2,
|
189
|
+
how=self.type,
|
190
|
+
suffixes=("_left", "_right"),
|
191
|
+
**args,
|
192
|
+
)
|
193
|
+
|
194
|
+
else:
|
195
|
+
if hasattr(self, "fk"):
|
196
|
+
args["left_on"] = self.fk
|
197
|
+
else:
|
198
|
+
args["left_index"] = True
|
199
|
+
ndf = self.df1
|
200
|
+
sdf = self.df2.copy()
|
201
|
+
merge = []
|
202
|
+
for key in self.join_with:
|
203
|
+
d = pandas.merge(
|
204
|
+
ndf,
|
205
|
+
sdf,
|
206
|
+
right_on=key,
|
207
|
+
how=self.type,
|
208
|
+
suffixes=("_left", None),
|
209
|
+
**args,
|
210
|
+
)
|
211
|
+
ndf = d[d[key].isnull()]
|
212
|
+
ndf.drop(
|
213
|
+
ndf.columns[ndf.columns.str.contains("_left")],
|
214
|
+
axis=1,
|
215
|
+
inplace=True,
|
216
|
+
)
|
217
|
+
ddf = d[d[key].notnull()]
|
218
|
+
ddf.drop(
|
219
|
+
ddf.columns[ddf.columns.str.contains("_left")],
|
220
|
+
axis=1,
|
221
|
+
inplace=True,
|
222
|
+
)
|
223
|
+
merge.append(ddf)
|
224
|
+
# merge the last (not matched) rows
|
225
|
+
merge.append(ndf)
|
226
|
+
df = pandas.concat(merge, axis=0)
|
227
|
+
df.reset_index(drop=True)
|
228
|
+
df.is_copy = None
|
229
|
+
except (ValueError, KeyError) as err:
|
230
|
+
self.cleanup_temp_rows()
|
231
|
+
raise ComponentError(
|
232
|
+
f"Cannot Join with missing Column: {err!s}"
|
233
|
+
) from err
|
234
|
+
except Exception as err:
|
235
|
+
self.cleanup_temp_rows(df)
|
236
|
+
raise ComponentError(
|
237
|
+
f"Unknown JOIN error {err!s}"
|
238
|
+
) from err
|
239
|
+
numrows = len(df.index)
|
240
|
+
if numrows == 0:
|
241
|
+
raise DataNotFound(
|
242
|
+
"Cannot make any JOIN, returns zero coincidences"
|
243
|
+
)
|
244
|
+
self._variables[f"{self.StepName}_NUMROWS"] = numrows
|
245
|
+
print("ON END> ", numrows)
|
246
|
+
self.add_metric("TOTAL_ROWS", numrows)
|
247
|
+
try:
|
248
|
+
# After merge, count matched rows from each dataframe
|
249
|
+
matched_rows_df1 = df['_tmp_key_df1'].nunique()
|
250
|
+
matched_rows_df2 = df['_tmp_key_df2'].nunique()
|
251
|
+
# Log or print the count of matched rows
|
252
|
+
print(f"Matched Rows from df1: {matched_rows_df1}")
|
253
|
+
print(f"Matched Rows from df2: {matched_rows_df2}")
|
254
|
+
_rows = {
|
255
|
+
"df1": matched_rows_df1,
|
256
|
+
"df2": matched_rows_df2,
|
257
|
+
}
|
258
|
+
self.add_metric("JOINED_ROWS", _rows)
|
259
|
+
if self._debug is True:
|
260
|
+
print("::: Printing Column Information === ")
|
261
|
+
for column, t in df.dtypes.items():
|
262
|
+
print(column, "->", t, "->", df[column].iloc[0])
|
263
|
+
finally:
|
264
|
+
# helping some transformations
|
265
|
+
self.cleanup_temp_rows(df)
|
266
|
+
self._result = df
|
267
|
+
return self._result
|
268
|
+
|
269
|
+
async def close(self):
|
270
|
+
pass
|
@@ -0,0 +1,54 @@
|
|
1
|
+
"""
|
2
|
+
tMap Transformations functions based on Series.
|
3
|
+
"""
|
4
|
+
from typing import List
|
5
|
+
import pandas as pd
|
6
|
+
import numpy as np
|
7
|
+
|
8
|
+
|
9
|
+
def to_string(series: pd.Series, remove_nan: bool = False, **kwargs) -> pd.Series:
|
10
|
+
"""to_string.
|
11
|
+
|
12
|
+
Converting to string a Pandas column (Series)
|
13
|
+
Args:
|
14
|
+
series (pandas.Series): Column Series to be converted
|
15
|
+
remove_nan (bool, optional): remove Not a Number from Column. Defaults to False.
|
16
|
+
|
17
|
+
Returns:
|
18
|
+
pandas.Series: a New Serie is returned with string values.
|
19
|
+
"""
|
20
|
+
series = series.astype("string")
|
21
|
+
if remove_nan is True:
|
22
|
+
series = series.replace(np.nan, "", regex=True)
|
23
|
+
return series
|
24
|
+
|
25
|
+
|
26
|
+
def to_integer(series: pd.Series, **kwargs):
|
27
|
+
"""
|
28
|
+
Converts a pandas Series to an integer type, handling errors by coercing invalid values to NaN.
|
29
|
+
|
30
|
+
:param series: The pandas Series to be converted.
|
31
|
+
:param kwargs: Additional keyword arguments.
|
32
|
+
:return: The converted pandas Series with integer type.
|
33
|
+
"""
|
34
|
+
try:
|
35
|
+
series = pd.to_numeric(series, errors="coerce")
|
36
|
+
series = series.astype("Int64", copy=False)
|
37
|
+
except Exception as err:
|
38
|
+
print(f"Error on to_Integer: {err}")
|
39
|
+
return series
|
40
|
+
|
41
|
+
|
42
|
+
def concat(df: pd.DataFrame, columns: List[str], sep: str = " ") -> pd.Series:
|
43
|
+
"""
|
44
|
+
Concatenates the values of the specified columns in the given DataFrame.
|
45
|
+
|
46
|
+
:param df: The input DataFrame
|
47
|
+
:param columns: The list of columns to concatenate
|
48
|
+
:param sep: The separator to use between the concatenated values (default is a space)
|
49
|
+
:return: A Series with the concatenated values
|
50
|
+
"""
|
51
|
+
combined = df[columns[0]].astype(str)
|
52
|
+
for col in columns[1:]:
|
53
|
+
combined += sep + df[col].astype(str)
|
54
|
+
return combined
|