flowtask 5.8.4__cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowtask/__init__.py +93 -0
- flowtask/__main__.py +38 -0
- flowtask/bots/__init__.py +6 -0
- flowtask/bots/check.py +93 -0
- flowtask/bots/codebot.py +51 -0
- flowtask/components/ASPX.py +148 -0
- flowtask/components/AddDataset.py +352 -0
- flowtask/components/Amazon.py +523 -0
- flowtask/components/AutoTask.py +314 -0
- flowtask/components/Azure.py +80 -0
- flowtask/components/AzureUsers.py +106 -0
- flowtask/components/BaseAction.py +91 -0
- flowtask/components/BaseLoop.py +198 -0
- flowtask/components/BestBuy.py +800 -0
- flowtask/components/CSVToGCS.py +120 -0
- flowtask/components/CompanyScraper/__init__.py +1 -0
- flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
- flowtask/components/CompanyScraper/parsers/base.py +102 -0
- flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
- flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
- flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
- flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
- flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
- flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
- flowtask/components/CompanyScraper/scrapper.py +1054 -0
- flowtask/components/CopyTo.py +177 -0
- flowtask/components/CopyToBigQuery.py +243 -0
- flowtask/components/CopyToMongoDB.py +291 -0
- flowtask/components/CopyToPg.py +609 -0
- flowtask/components/CopyToRethink.py +207 -0
- flowtask/components/CreateGCSBucket.py +102 -0
- flowtask/components/CreateReport/CreateReport.py +228 -0
- flowtask/components/CreateReport/__init__.py +9 -0
- flowtask/components/CreateReport/charts/__init__.py +15 -0
- flowtask/components/CreateReport/charts/bar.py +51 -0
- flowtask/components/CreateReport/charts/base.py +66 -0
- flowtask/components/CreateReport/charts/pie.py +64 -0
- flowtask/components/CreateReport/utils.py +9 -0
- flowtask/components/CustomerSatisfaction.py +196 -0
- flowtask/components/DataInput.py +200 -0
- flowtask/components/DateList.py +255 -0
- flowtask/components/DbClient.py +163 -0
- flowtask/components/DialPad.py +146 -0
- flowtask/components/DocumentDBQuery.py +200 -0
- flowtask/components/DownloadFrom.py +371 -0
- flowtask/components/DownloadFromD2L.py +113 -0
- flowtask/components/DownloadFromFTP.py +181 -0
- flowtask/components/DownloadFromIMAP.py +315 -0
- flowtask/components/DownloadFromS3.py +198 -0
- flowtask/components/DownloadFromSFTP.py +265 -0
- flowtask/components/DownloadFromSharepoint.py +110 -0
- flowtask/components/DownloadFromSmartSheet.py +114 -0
- flowtask/components/DownloadS3File.py +229 -0
- flowtask/components/Dummy.py +59 -0
- flowtask/components/DuplicatePhoto.py +411 -0
- flowtask/components/EmployeeEvaluation.py +237 -0
- flowtask/components/ExecuteSQL.py +323 -0
- flowtask/components/ExtractHTML.py +178 -0
- flowtask/components/FileBase.py +178 -0
- flowtask/components/FileCopy.py +181 -0
- flowtask/components/FileDelete.py +82 -0
- flowtask/components/FileExists.py +146 -0
- flowtask/components/FileIteratorDelete.py +112 -0
- flowtask/components/FileList.py +194 -0
- flowtask/components/FileOpen.py +75 -0
- flowtask/components/FileRead.py +120 -0
- flowtask/components/FileRename.py +106 -0
- flowtask/components/FilterIf.py +284 -0
- flowtask/components/FilterRows/FilterRows.py +200 -0
- flowtask/components/FilterRows/__init__.py +10 -0
- flowtask/components/FilterRows/functions.py +4 -0
- flowtask/components/GCSToBigQuery.py +103 -0
- flowtask/components/GoogleA4.py +150 -0
- flowtask/components/GoogleGeoCoding.py +344 -0
- flowtask/components/GooglePlaces.py +315 -0
- flowtask/components/GoogleSearch.py +539 -0
- flowtask/components/HTTPClient.py +268 -0
- flowtask/components/ICIMS.py +146 -0
- flowtask/components/IF.py +179 -0
- flowtask/components/IcimsFolderCopy.py +173 -0
- flowtask/components/ImageFeatures/__init__.py +5 -0
- flowtask/components/ImageFeatures/process.py +233 -0
- flowtask/components/IteratorBase.py +251 -0
- flowtask/components/LangchainLoader/__init__.py +5 -0
- flowtask/components/LangchainLoader/loader.py +194 -0
- flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
- flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
- flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
- flowtask/components/LangchainLoader/loaders/docx.py +91 -0
- flowtask/components/LangchainLoader/loaders/html.py +119 -0
- flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
- flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
- flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
- flowtask/components/LangchainLoader/loaders/qa.py +67 -0
- flowtask/components/LangchainLoader/loaders/txt.py +55 -0
- flowtask/components/LeadIQ.py +650 -0
- flowtask/components/Loop.py +253 -0
- flowtask/components/Lowes.py +334 -0
- flowtask/components/MS365Usage.py +156 -0
- flowtask/components/MSTeamsMessages.py +320 -0
- flowtask/components/MarketClustering.py +1051 -0
- flowtask/components/MergeFiles.py +362 -0
- flowtask/components/MilvusOutput.py +87 -0
- flowtask/components/NearByStores.py +175 -0
- flowtask/components/NetworkNinja/__init__.py +6 -0
- flowtask/components/NetworkNinja/models/__init__.py +52 -0
- flowtask/components/NetworkNinja/models/abstract.py +177 -0
- flowtask/components/NetworkNinja/models/account.py +39 -0
- flowtask/components/NetworkNinja/models/client.py +19 -0
- flowtask/components/NetworkNinja/models/district.py +14 -0
- flowtask/components/NetworkNinja/models/events.py +101 -0
- flowtask/components/NetworkNinja/models/forms.py +499 -0
- flowtask/components/NetworkNinja/models/market.py +16 -0
- flowtask/components/NetworkNinja/models/organization.py +34 -0
- flowtask/components/NetworkNinja/models/photos.py +125 -0
- flowtask/components/NetworkNinja/models/project.py +44 -0
- flowtask/components/NetworkNinja/models/region.py +28 -0
- flowtask/components/NetworkNinja/models/store.py +203 -0
- flowtask/components/NetworkNinja/models/user.py +151 -0
- flowtask/components/NetworkNinja/router.py +854 -0
- flowtask/components/Odoo.py +175 -0
- flowtask/components/OdooInjector.py +192 -0
- flowtask/components/OpenFromXML.py +126 -0
- flowtask/components/OpenWeather.py +41 -0
- flowtask/components/OpenWithBase.py +616 -0
- flowtask/components/OpenWithPandas.py +715 -0
- flowtask/components/PGPDecrypt.py +199 -0
- flowtask/components/PandasIterator.py +187 -0
- flowtask/components/PandasToFile.py +189 -0
- flowtask/components/Paradox.py +339 -0
- flowtask/components/ParamIterator.py +117 -0
- flowtask/components/ParseHTML.py +84 -0
- flowtask/components/PlacerStores.py +249 -0
- flowtask/components/Pokemon.py +507 -0
- flowtask/components/PositiveBot.py +62 -0
- flowtask/components/PowerPointSlide.py +400 -0
- flowtask/components/PrintMessage.py +127 -0
- flowtask/components/ProductCompetitors/__init__.py +5 -0
- flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
- flowtask/components/ProductCompetitors/parsers/base.py +72 -0
- flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
- flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
- flowtask/components/ProductCompetitors/scrapper.py +155 -0
- flowtask/components/ProductCompliant.py +169 -0
- flowtask/components/ProductInfo/__init__.py +1 -0
- flowtask/components/ProductInfo/parsers/__init__.py +5 -0
- flowtask/components/ProductInfo/parsers/base.py +83 -0
- flowtask/components/ProductInfo/parsers/brother.py +97 -0
- flowtask/components/ProductInfo/parsers/canon.py +167 -0
- flowtask/components/ProductInfo/parsers/epson.py +118 -0
- flowtask/components/ProductInfo/parsers/hp.py +131 -0
- flowtask/components/ProductInfo/parsers/samsung.py +97 -0
- flowtask/components/ProductInfo/scraper.py +319 -0
- flowtask/components/ProductPricing.py +118 -0
- flowtask/components/QS.py +261 -0
- flowtask/components/QSBase.py +201 -0
- flowtask/components/QueryIterator.py +273 -0
- flowtask/components/QueryToInsert.py +327 -0
- flowtask/components/QueryToPandas.py +432 -0
- flowtask/components/RESTClient.py +195 -0
- flowtask/components/RethinkDBQuery.py +189 -0
- flowtask/components/Rsync.py +74 -0
- flowtask/components/RunSSH.py +59 -0
- flowtask/components/RunShell.py +71 -0
- flowtask/components/SalesForce.py +20 -0
- flowtask/components/SaveImageBank/__init__.py +257 -0
- flowtask/components/SchedulingVisits.py +592 -0
- flowtask/components/ScrapPage.py +216 -0
- flowtask/components/ScrapSearch.py +79 -0
- flowtask/components/SendNotify.py +257 -0
- flowtask/components/SentimentAnalysis.py +694 -0
- flowtask/components/ServiceScrapper/__init__.py +5 -0
- flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
- flowtask/components/ServiceScrapper/parsers/base.py +94 -0
- flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
- flowtask/components/ServiceScrapper/scrapper.py +199 -0
- flowtask/components/SetVariables.py +156 -0
- flowtask/components/SubTask.py +182 -0
- flowtask/components/SuiteCRM.py +48 -0
- flowtask/components/Switch.py +175 -0
- flowtask/components/TableBase.py +148 -0
- flowtask/components/TableDelete.py +312 -0
- flowtask/components/TableInput.py +143 -0
- flowtask/components/TableOutput/TableOutput.py +384 -0
- flowtask/components/TableOutput/__init__.py +3 -0
- flowtask/components/TableSchema.py +534 -0
- flowtask/components/Target.py +223 -0
- flowtask/components/ThumbnailGenerator.py +156 -0
- flowtask/components/ToPandas.py +67 -0
- flowtask/components/TransformRows/TransformRows.py +507 -0
- flowtask/components/TransformRows/__init__.py +9 -0
- flowtask/components/TransformRows/functions.py +559 -0
- flowtask/components/TransposeRows.py +176 -0
- flowtask/components/UPCDatabase.py +86 -0
- flowtask/components/UnGzip.py +171 -0
- flowtask/components/Uncompress.py +172 -0
- flowtask/components/UniqueRows.py +126 -0
- flowtask/components/Unzip.py +107 -0
- flowtask/components/UpdateOperationalVars.py +147 -0
- flowtask/components/UploadTo.py +299 -0
- flowtask/components/UploadToS3.py +136 -0
- flowtask/components/UploadToSFTP.py +160 -0
- flowtask/components/UploadToSharepoint.py +205 -0
- flowtask/components/UserFunc.py +122 -0
- flowtask/components/VivaTracker.py +140 -0
- flowtask/components/WSDLClient.py +123 -0
- flowtask/components/Wait.py +18 -0
- flowtask/components/Walmart.py +199 -0
- flowtask/components/Workplace.py +134 -0
- flowtask/components/XMLToPandas.py +267 -0
- flowtask/components/Zammad/__init__.py +41 -0
- flowtask/components/Zammad/models.py +0 -0
- flowtask/components/ZoomInfoScraper.py +409 -0
- flowtask/components/__init__.py +104 -0
- flowtask/components/abstract.py +18 -0
- flowtask/components/flow.py +530 -0
- flowtask/components/google.py +335 -0
- flowtask/components/group.py +221 -0
- flowtask/components/py.typed +0 -0
- flowtask/components/reviewscrap.py +132 -0
- flowtask/components/tAutoincrement.py +117 -0
- flowtask/components/tConcat.py +109 -0
- flowtask/components/tExplode.py +119 -0
- flowtask/components/tFilter.py +184 -0
- flowtask/components/tGroup.py +236 -0
- flowtask/components/tJoin.py +270 -0
- flowtask/components/tMap/__init__.py +9 -0
- flowtask/components/tMap/functions.py +54 -0
- flowtask/components/tMap/tMap.py +450 -0
- flowtask/components/tMelt.py +112 -0
- flowtask/components/tMerge.py +114 -0
- flowtask/components/tOrder.py +93 -0
- flowtask/components/tPandas.py +94 -0
- flowtask/components/tPivot.py +71 -0
- flowtask/components/tPluckCols.py +76 -0
- flowtask/components/tUnnest.py +82 -0
- flowtask/components/user.py +401 -0
- flowtask/conf.py +457 -0
- flowtask/download.py +102 -0
- flowtask/events/__init__.py +11 -0
- flowtask/events/events/__init__.py +20 -0
- flowtask/events/events/abstract.py +95 -0
- flowtask/events/events/alerts/__init__.py +362 -0
- flowtask/events/events/alerts/colfunctions.py +131 -0
- flowtask/events/events/alerts/functions.py +158 -0
- flowtask/events/events/dummy.py +12 -0
- flowtask/events/events/exec.py +124 -0
- flowtask/events/events/file/__init__.py +7 -0
- flowtask/events/events/file/base.py +51 -0
- flowtask/events/events/file/copy.py +23 -0
- flowtask/events/events/file/delete.py +16 -0
- flowtask/events/events/interfaces/__init__.py +9 -0
- flowtask/events/events/interfaces/client.py +67 -0
- flowtask/events/events/interfaces/credentials.py +28 -0
- flowtask/events/events/interfaces/notifications.py +58 -0
- flowtask/events/events/jira.py +122 -0
- flowtask/events/events/log.py +26 -0
- flowtask/events/events/logerr.py +52 -0
- flowtask/events/events/notify.py +59 -0
- flowtask/events/events/notify_event.py +160 -0
- flowtask/events/events/publish.py +54 -0
- flowtask/events/events/sendfile.py +104 -0
- flowtask/events/events/task.py +97 -0
- flowtask/events/events/teams.py +98 -0
- flowtask/events/events/webhook.py +58 -0
- flowtask/events/manager.py +287 -0
- flowtask/exceptions.c +39393 -0
- flowtask/exceptions.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/extensions/__init__.py +3 -0
- flowtask/extensions/abstract.py +82 -0
- flowtask/extensions/logging/__init__.py +65 -0
- flowtask/hooks/__init__.py +9 -0
- flowtask/hooks/actions/__init__.py +22 -0
- flowtask/hooks/actions/abstract.py +66 -0
- flowtask/hooks/actions/dummy.py +23 -0
- flowtask/hooks/actions/jira.py +74 -0
- flowtask/hooks/actions/rest.py +320 -0
- flowtask/hooks/actions/sampledata.py +37 -0
- flowtask/hooks/actions/sensor.py +23 -0
- flowtask/hooks/actions/task.py +9 -0
- flowtask/hooks/actions/ticket.py +37 -0
- flowtask/hooks/actions/zammad.py +55 -0
- flowtask/hooks/hook.py +62 -0
- flowtask/hooks/models.py +17 -0
- flowtask/hooks/service.py +187 -0
- flowtask/hooks/step.py +91 -0
- flowtask/hooks/types/__init__.py +23 -0
- flowtask/hooks/types/base.py +129 -0
- flowtask/hooks/types/brokers/__init__.py +11 -0
- flowtask/hooks/types/brokers/base.py +54 -0
- flowtask/hooks/types/brokers/mqtt.py +35 -0
- flowtask/hooks/types/brokers/rabbitmq.py +82 -0
- flowtask/hooks/types/brokers/redis.py +83 -0
- flowtask/hooks/types/brokers/sqs.py +44 -0
- flowtask/hooks/types/fs.py +232 -0
- flowtask/hooks/types/http.py +49 -0
- flowtask/hooks/types/imap.py +200 -0
- flowtask/hooks/types/jira.py +279 -0
- flowtask/hooks/types/mail.py +205 -0
- flowtask/hooks/types/postgres.py +98 -0
- flowtask/hooks/types/responses/__init__.py +8 -0
- flowtask/hooks/types/responses/base.py +5 -0
- flowtask/hooks/types/sharepoint.py +288 -0
- flowtask/hooks/types/ssh.py +141 -0
- flowtask/hooks/types/tagged.py +59 -0
- flowtask/hooks/types/upload.py +85 -0
- flowtask/hooks/types/watch.py +71 -0
- flowtask/hooks/types/web.py +36 -0
- flowtask/interfaces/AzureClient.py +137 -0
- flowtask/interfaces/AzureGraph.py +839 -0
- flowtask/interfaces/Boto3Client.py +326 -0
- flowtask/interfaces/DropboxClient.py +173 -0
- flowtask/interfaces/ExcelHandler.py +94 -0
- flowtask/interfaces/FTPClient.py +131 -0
- flowtask/interfaces/GoogleCalendar.py +201 -0
- flowtask/interfaces/GoogleClient.py +133 -0
- flowtask/interfaces/GoogleDrive.py +127 -0
- flowtask/interfaces/GoogleGCS.py +89 -0
- flowtask/interfaces/GoogleGeocoding.py +93 -0
- flowtask/interfaces/GoogleLang.py +114 -0
- flowtask/interfaces/GooglePub.py +61 -0
- flowtask/interfaces/GoogleSheet.py +68 -0
- flowtask/interfaces/IMAPClient.py +137 -0
- flowtask/interfaces/O365Calendar.py +113 -0
- flowtask/interfaces/O365Client.py +220 -0
- flowtask/interfaces/OneDrive.py +284 -0
- flowtask/interfaces/Outlook.py +155 -0
- flowtask/interfaces/ParrotBot.py +130 -0
- flowtask/interfaces/SSHClient.py +378 -0
- flowtask/interfaces/Sharepoint.py +496 -0
- flowtask/interfaces/__init__.py +36 -0
- flowtask/interfaces/azureauth.py +119 -0
- flowtask/interfaces/cache.py +201 -0
- flowtask/interfaces/client.py +82 -0
- flowtask/interfaces/compress.py +525 -0
- flowtask/interfaces/credentials.py +124 -0
- flowtask/interfaces/d2l.py +239 -0
- flowtask/interfaces/databases/__init__.py +5 -0
- flowtask/interfaces/databases/db.py +223 -0
- flowtask/interfaces/databases/documentdb.py +55 -0
- flowtask/interfaces/databases/rethink.py +39 -0
- flowtask/interfaces/dataframes/__init__.py +11 -0
- flowtask/interfaces/dataframes/abstract.py +21 -0
- flowtask/interfaces/dataframes/arrow.py +71 -0
- flowtask/interfaces/dataframes/dt.py +69 -0
- flowtask/interfaces/dataframes/pandas.py +167 -0
- flowtask/interfaces/dataframes/polars.py +60 -0
- flowtask/interfaces/db.py +263 -0
- flowtask/interfaces/env.py +46 -0
- flowtask/interfaces/func.py +137 -0
- flowtask/interfaces/http.py +1780 -0
- flowtask/interfaces/locale.py +40 -0
- flowtask/interfaces/log.py +75 -0
- flowtask/interfaces/mask.py +143 -0
- flowtask/interfaces/notification.py +154 -0
- flowtask/interfaces/playwright.py +339 -0
- flowtask/interfaces/powerpoint.py +368 -0
- flowtask/interfaces/py.typed +0 -0
- flowtask/interfaces/qs.py +376 -0
- flowtask/interfaces/result.py +87 -0
- flowtask/interfaces/selenium_service.py +779 -0
- flowtask/interfaces/smartsheet.py +154 -0
- flowtask/interfaces/stat.py +39 -0
- flowtask/interfaces/task.py +96 -0
- flowtask/interfaces/template.py +118 -0
- flowtask/interfaces/vectorstores/__init__.py +1 -0
- flowtask/interfaces/vectorstores/abstract.py +133 -0
- flowtask/interfaces/vectorstores/milvus.py +669 -0
- flowtask/interfaces/zammad.py +107 -0
- flowtask/models.py +193 -0
- flowtask/parsers/__init__.py +15 -0
- flowtask/parsers/_yaml.c +11978 -0
- flowtask/parsers/_yaml.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/argparser.py +235 -0
- flowtask/parsers/base.c +15155 -0
- flowtask/parsers/base.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/json.c +11968 -0
- flowtask/parsers/json.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/maps.py +49 -0
- flowtask/parsers/toml.c +11968 -0
- flowtask/parsers/toml.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/plugins/__init__.py +16 -0
- flowtask/plugins/components/__init__.py +0 -0
- flowtask/plugins/handler/__init__.py +45 -0
- flowtask/plugins/importer.py +31 -0
- flowtask/plugins/sources/__init__.py +0 -0
- flowtask/runner.py +283 -0
- flowtask/scheduler/__init__.py +9 -0
- flowtask/scheduler/functions.py +493 -0
- flowtask/scheduler/handlers/__init__.py +8 -0
- flowtask/scheduler/handlers/manager.py +504 -0
- flowtask/scheduler/handlers/models.py +58 -0
- flowtask/scheduler/handlers/service.py +72 -0
- flowtask/scheduler/notifications.py +65 -0
- flowtask/scheduler/scheduler.py +993 -0
- flowtask/services/__init__.py +0 -0
- flowtask/services/bots/__init__.py +0 -0
- flowtask/services/bots/telegram.py +264 -0
- flowtask/services/files/__init__.py +11 -0
- flowtask/services/files/manager.py +522 -0
- flowtask/services/files/model.py +37 -0
- flowtask/services/files/service.py +767 -0
- flowtask/services/jira/__init__.py +3 -0
- flowtask/services/jira/jira_actions.py +191 -0
- flowtask/services/tasks/__init__.py +13 -0
- flowtask/services/tasks/launcher.py +213 -0
- flowtask/services/tasks/manager.py +323 -0
- flowtask/services/tasks/service.py +275 -0
- flowtask/services/tasks/task_manager.py +376 -0
- flowtask/services/tasks/tasks.py +155 -0
- flowtask/storages/__init__.py +16 -0
- flowtask/storages/exceptions.py +12 -0
- flowtask/storages/files/__init__.py +8 -0
- flowtask/storages/files/abstract.py +29 -0
- flowtask/storages/files/filesystem.py +66 -0
- flowtask/storages/tasks/__init__.py +19 -0
- flowtask/storages/tasks/abstract.py +26 -0
- flowtask/storages/tasks/database.py +33 -0
- flowtask/storages/tasks/filesystem.py +108 -0
- flowtask/storages/tasks/github.py +119 -0
- flowtask/storages/tasks/memory.py +45 -0
- flowtask/storages/tasks/row.py +25 -0
- flowtask/tasks/__init__.py +0 -0
- flowtask/tasks/abstract.py +526 -0
- flowtask/tasks/command.py +118 -0
- flowtask/tasks/pile.py +486 -0
- flowtask/tasks/py.typed +0 -0
- flowtask/tasks/task.py +778 -0
- flowtask/template/__init__.py +161 -0
- flowtask/tests.py +257 -0
- flowtask/types/__init__.py +8 -0
- flowtask/types/typedefs.c +11347 -0
- flowtask/types/typedefs.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/__init__.py +24 -0
- flowtask/utils/constants.py +117 -0
- flowtask/utils/encoders.py +21 -0
- flowtask/utils/executor.py +112 -0
- flowtask/utils/functions.cpp +14280 -0
- flowtask/utils/functions.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/json.cpp +13349 -0
- flowtask/utils/json.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/mail.py +63 -0
- flowtask/utils/parseqs.c +13324 -0
- flowtask/utils/parserqs.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/stats.py +308 -0
- flowtask/utils/transformations.py +74 -0
- flowtask/utils/uv.py +12 -0
- flowtask/utils/validators.py +97 -0
- flowtask/version.py +11 -0
- flowtask-5.8.4.dist-info/LICENSE +201 -0
- flowtask-5.8.4.dist-info/METADATA +209 -0
- flowtask-5.8.4.dist-info/RECORD +470 -0
- flowtask-5.8.4.dist-info/WHEEL +6 -0
- flowtask-5.8.4.dist-info/entry_points.txt +3 -0
- flowtask-5.8.4.dist-info/top_level.txt +2 -0
- plugins/components/CreateQR.py +39 -0
- plugins/components/TestComponent.py +28 -0
- plugins/components/Use1.py +13 -0
- plugins/components/Workplace.py +117 -0
- plugins/components/__init__.py +3 -0
- plugins/sources/__init__.py +0 -0
- plugins/sources/get_populartimes.py +78 -0
- plugins/sources/google.py +150 -0
- plugins/sources/hubspot.py +679 -0
- plugins/sources/icims.py +679 -0
- plugins/sources/mobileinsight.py +501 -0
- plugins/sources/newrelic.py +262 -0
- plugins/sources/uap.py +268 -0
- plugins/sources/venu.py +244 -0
- plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,609 @@
|
|
1
|
+
import re
|
2
|
+
import asyncio
|
3
|
+
import multiprocessing
|
4
|
+
import gc
|
5
|
+
import csv
|
6
|
+
from decimal import Decimal
|
7
|
+
import datetime
|
8
|
+
from io import BytesIO
|
9
|
+
from typing import Sequence, Union
|
10
|
+
import pandas as pd
|
11
|
+
from pandas.api.types import (
|
12
|
+
is_datetime64_any_dtype,
|
13
|
+
is_datetime64tz_dtype,
|
14
|
+
is_integer_dtype,
|
15
|
+
is_float_dtype,
|
16
|
+
is_bool_dtype
|
17
|
+
)
|
18
|
+
import numpy as np
|
19
|
+
import orjson
|
20
|
+
import asyncpg
|
21
|
+
from asyncpg.exceptions import (
|
22
|
+
StringDataRightTruncationError,
|
23
|
+
UniqueViolationError,
|
24
|
+
ForeignKeyViolationError,
|
25
|
+
NotNullViolationError,
|
26
|
+
)
|
27
|
+
from pgvector.asyncpg import register_vector
|
28
|
+
from asyncdb.exceptions import StatementError, DataError
|
29
|
+
from asyncdb.models import Model
|
30
|
+
# Dataintegration components:
|
31
|
+
from ..exceptions import (
|
32
|
+
ComponentError,
|
33
|
+
DataNotFound,
|
34
|
+
)
|
35
|
+
from .CopyTo import CopyTo, dtypes
|
36
|
+
from ..utils.json import json_decoder, json_encoder
|
37
|
+
|
38
|
+
|
39
|
+
# adding support for primary keys on raw tables
|
40
|
+
pk_sentence = """ALTER TABLE {schema}.{table}
|
41
|
+
ADD CONSTRAINT {schema}_{table}_pkey PRIMARY KEY({fields});
|
42
|
+
|
43
|
+
Example:
|
44
|
+
|
45
|
+
```yaml
|
46
|
+
CopyToPg:
|
47
|
+
tablename: employees
|
48
|
+
schema: bacardi
|
49
|
+
truncate: true
|
50
|
+
```
|
51
|
+
|
52
|
+
"""
|
53
|
+
unique_sentence = """ALTER TABLE {schema}.{table}
|
54
|
+
ADD CONSTRAINT unq_{schema}_{table} UNIQUE({fields});"""
|
55
|
+
|
56
|
+
|
57
|
+
class CopyToPg(CopyTo):
|
58
|
+
"""
|
59
|
+
CopyToPg
|
60
|
+
|
61
|
+
This component allows copy data into a Postgres table,
|
62
|
+
Copy into main postgres using copy_to_table functionality.
|
63
|
+
TODO: Design an Upsert feature with Copy to Pg.
|
64
|
+
.. table:: Properties
|
65
|
+
:widths: auto
|
66
|
+
|
67
|
+
+----------------+----------+----------------------------------------------------------------------------------+
|
68
|
+
| Name | Required | Summary |
|
69
|
+
+----------------+----------+----------------------------------------------------------------------------------+
|
70
|
+
| schema | Yes | Name of the schema where the table resides. |
|
71
|
+
+----------------+----------+----------------------------------------------------------------------------------+
|
72
|
+
| tablename | Yes | Name of the table to insert data into. |
|
73
|
+
+----------------+----------+----------------------------------------------------------------------------------+
|
74
|
+
| truncate | No | Boolean flag indicating whether to truncate the table before inserting. |
|
75
|
+
| | | Defaults to False. |
|
76
|
+
+----------------+----------+----------------------------------------------------------------------------------+
|
77
|
+
| use_chunks | No | Boolean flag indicating whether to insert data in chunks (for large datasets). |
|
78
|
+
| | | Defaults to False. |
|
79
|
+
| | | Requires specifying a `chunksize` property for chunk size determination. |
|
80
|
+
+----------------+----------+----------------------------------------------------------------------------------+
|
81
|
+
| chunksize | No | Integer value specifying the size of each data chunk when `use_chunks` is True. |
|
82
|
+
| | | Defaults to None (chunk size will be calculated based on CPU cores). |
|
83
|
+
+----------------+----------+----------------------------------------------------------------------------------+
|
84
|
+
| use_buffer | No | Boolean flag indicating whether to use a buffer for data insertion (optional). |
|
85
|
+
| | | Defaults to False. |
|
86
|
+
| | | Using a buffer can improve performance for large datasets. |
|
87
|
+
+----------------+----------+----------------------------------------------------------------------------------+
|
88
|
+
| array_columns | No | List of column names containing JSON arrays. These columns will be formatted |
|
89
|
+
| | | appropriately before insertion. |
|
90
|
+
| | | Requires `use_buffer` to be True. |
|
91
|
+
+----------------+----------+----------------------------------------------------------------------------------+
|
92
|
+
| use_quoting | No | Boolean flag indicating whether to use quoting for CSV data insertion (optional).|
|
93
|
+
| | | Defaults to False. |
|
94
|
+
| | | Using quoting can be helpful for data containing special characters. |
|
95
|
+
+----------------+----------+----------------------------------------------------------------------------------+
|
96
|
+
| datasource | No | Using a Datasource instead manual credentials |
|
97
|
+
+----------------+----------+----------------------------------------------------------------------------------+
|
98
|
+
| credentials | No | Supporting manual postgresql credentials |
|
99
|
+
+----------------+----------+----------------------------------------------------------------------------------+
|
100
|
+
|
101
|
+
Returns a dictionary containing metrics about the copy operation:
|
102
|
+
* ROWS_SAVED (int): The number of rows successfully inserted into the target table.
|
103
|
+
* NUM_ROWS (int): The total number of rows processed from the input data.
|
104
|
+
* NUM_COLUMNS (int): The number of columns found in the input data.
|
105
|
+
* (optional): Other metrics specific to the implementation.
|
106
|
+
"""
|
107
|
+
|
108
|
+
async def paralelize_insert(self, columns, tuples):
|
109
|
+
result = False
|
110
|
+
try:
|
111
|
+
result = await self._connection.copy_into_table(
|
112
|
+
table=self.tablename,
|
113
|
+
schema=self.schema,
|
114
|
+
source=tuples,
|
115
|
+
columns=columns
|
116
|
+
)
|
117
|
+
return result
|
118
|
+
except StatementError as err:
|
119
|
+
self._logger.exception(
|
120
|
+
f"Statement Error: {err}",
|
121
|
+
stack_info=True
|
122
|
+
)
|
123
|
+
except DataError as err:
|
124
|
+
self._logger.exception(
|
125
|
+
f"Data Error: {err}",
|
126
|
+
stack_info=True
|
127
|
+
)
|
128
|
+
except Exception as err:
|
129
|
+
self._logger.exception(
|
130
|
+
f"Pg Error: {err}",
|
131
|
+
stack_info=True
|
132
|
+
)
|
133
|
+
|
134
|
+
def extract_copied(self, result: Union[str, Sequence[str], None]) -> int:
|
135
|
+
if result is None:
|
136
|
+
return 0
|
137
|
+
if isinstance(result, str):
|
138
|
+
try:
|
139
|
+
return int(re.findall(r"\bCOPY\s(\d+)", result)[0])
|
140
|
+
except Exception as err:
|
141
|
+
self._logger.error(str(err))
|
142
|
+
# iterable of results (atomic, row-by-row mode)
|
143
|
+
total = 0
|
144
|
+
for item in result:
|
145
|
+
if not item: # skip None/empty
|
146
|
+
continue
|
147
|
+
m = re.search(r"\bCOPY\s+(\d+)", item)
|
148
|
+
if m:
|
149
|
+
total += int(m.group(1))
|
150
|
+
return total
|
151
|
+
|
152
|
+
async def _create_table(self):
|
153
|
+
_pk = self.create_table.get("pk", None)
|
154
|
+
_unq = self.create_table.get("unique", None)
|
155
|
+
_drop = self.create_table.get("drop", False)
|
156
|
+
if _pk is None:
|
157
|
+
raise ComponentError(
|
158
|
+
f"Error creating table: {self.schema}.{self.tablename}: PK not defined."
|
159
|
+
)
|
160
|
+
# extracting columns:
|
161
|
+
columns = self.data.columns.tolist()
|
162
|
+
cols = []
|
163
|
+
for col in columns:
|
164
|
+
datatype = self.data.dtypes[col]
|
165
|
+
try:
|
166
|
+
t = dtypes[str(datatype)]
|
167
|
+
except KeyError:
|
168
|
+
t = str
|
169
|
+
f = (col, t)
|
170
|
+
cols.append(f)
|
171
|
+
try:
|
172
|
+
cls = Model.make_model(
|
173
|
+
name=self.tablename, schema=self.schema, fields=cols
|
174
|
+
)
|
175
|
+
mdl = cls() # empty model, I only need the schema
|
176
|
+
if sql := mdl.model(dialect="sql"):
|
177
|
+
print("SQL IS ", sql)
|
178
|
+
async with await self._connection.connection() as conn:
|
179
|
+
if _drop is True:
|
180
|
+
result, error = await conn.execute(
|
181
|
+
sentence=f"DROP TABLE IF EXISTS {self.schema}.{self.tablename};"
|
182
|
+
)
|
183
|
+
self._logger.debug(f"DROP Table: {result}, {error}")
|
184
|
+
result, error = await conn.execute(sentence=sql)
|
185
|
+
self._logger.debug(f"Create Table: {result!s}")
|
186
|
+
if error:
|
187
|
+
raise ComponentError(
|
188
|
+
f"Error on Table creation: {error}"
|
189
|
+
)
|
190
|
+
## Add Primary Key(s):
|
191
|
+
pk = pk_sentence.format(
|
192
|
+
schema=self.schema,
|
193
|
+
table=self.tablename,
|
194
|
+
fields=",".join(_pk),
|
195
|
+
)
|
196
|
+
_primary, error = await conn.execute(sentence=pk)
|
197
|
+
self._logger.debug(
|
198
|
+
f"Create Table: PK creation: {_primary}, {error}"
|
199
|
+
)
|
200
|
+
## Add Unique (if required):
|
201
|
+
if _unq is not None:
|
202
|
+
unique = unique_sentence.format(
|
203
|
+
schema=self.schema,
|
204
|
+
table=self.tablename,
|
205
|
+
fields=",".join(_unq),
|
206
|
+
)
|
207
|
+
_unique, error = await conn.execute(sentence=unique)
|
208
|
+
self._logger.debug(
|
209
|
+
f"Create Table: Unique creation: {_unique}, {error}"
|
210
|
+
)
|
211
|
+
except Exception as err:
|
212
|
+
raise ComponentError(
|
213
|
+
f"CopyToPg: Error on Table Creation {err}"
|
214
|
+
) from err
|
215
|
+
|
216
|
+
async def _truncate_table(self):
|
217
|
+
# ---- SELECT pg_advisory_xact_lock(1);
|
218
|
+
truncate = """TRUNCATE {}.{};"""
|
219
|
+
truncate = truncate.format(self.schema, self.tablename)
|
220
|
+
retry_count = 0
|
221
|
+
max_retries = 2
|
222
|
+
while retry_count <= max_retries:
|
223
|
+
try:
|
224
|
+
async with await self._connection.connection() as conn:
|
225
|
+
result, error = await conn.execute(truncate)
|
226
|
+
if error is not None:
|
227
|
+
raise ComponentError(
|
228
|
+
f"CopyToPg Error truncating {self.schema}.{self.tablename}: {error}"
|
229
|
+
)
|
230
|
+
await conn.execute(
|
231
|
+
"SELECT pg_advisory_unlock_all();"
|
232
|
+
)
|
233
|
+
self._logger.debug(
|
234
|
+
f"COPYTOPG TRUNCATED: {result}"
|
235
|
+
)
|
236
|
+
await asyncio.sleep(5e-3)
|
237
|
+
break # exit loop
|
238
|
+
except (asyncpg.exceptions.QueryCanceledError, StatementError) as e:
|
239
|
+
if "canceling statement due to statement timeout" in str(e) or "another operation is in progress" in str(e): # noqa
|
240
|
+
retry_count += 1
|
241
|
+
self._logger.warning(
|
242
|
+
f"CopyToPg Error: {str(e)}, Retrying... {retry_count}/{max_retries}"
|
243
|
+
)
|
244
|
+
if retry_count > max_retries:
|
245
|
+
raise ComponentError(
|
246
|
+
f"CopyToPg Error: {str(e)}, Max Retries reached"
|
247
|
+
) from e
|
248
|
+
else:
|
249
|
+
# Create a new connection an wait until repeat operation:
|
250
|
+
self._connection = await self.create_connection(
|
251
|
+
driver='pg'
|
252
|
+
)
|
253
|
+
await asyncio.sleep(2)
|
254
|
+
|
255
|
+
async def _get_column_types(self, conn):
|
256
|
+
"""
|
257
|
+
Get the PostgreSQL column types for the target table.
|
258
|
+
Returns a dictionary mapping column names to their PostgreSQL types.
|
259
|
+
"""
|
260
|
+
try:
|
261
|
+
engine = conn.engine()
|
262
|
+
# LIMIT 0 forces Postgres to return only the RowDescription
|
263
|
+
stmt = await engine.prepare(
|
264
|
+
f'SELECT * FROM {self.schema}.{self.tablename} LIMIT 0'
|
265
|
+
)
|
266
|
+
columns = []
|
267
|
+
for attr in stmt.get_attributes(): # tuple of Attribute objects
|
268
|
+
columns.append({
|
269
|
+
"name": attr.name, # column name
|
270
|
+
"pg_oid": attr.type.oid, # numeric OID
|
271
|
+
"pg_type": attr.type.name, # text type name, e.g. "timestamp"
|
272
|
+
"schema": attr.type.schema # type’s schema, e.g. "pg_catalog"
|
273
|
+
# attr.is_nullable, attr.is_array … also available in ≥0.29
|
274
|
+
})
|
275
|
+
return columns
|
276
|
+
except Exception as e:
|
277
|
+
self._logger.error(f"Error getting column types: {e}")
|
278
|
+
return {}
|
279
|
+
|
280
|
+
async def _copy_dataframe(self):
|
281
|
+
# insert data directly into table
|
282
|
+
columns = list(self.data.columns)
|
283
|
+
|
284
|
+
if self.data.empty:
|
285
|
+
self.logger.info("Dataframe is empty, nothing to copy")
|
286
|
+
return True
|
287
|
+
|
288
|
+
if hasattr(self, "use_chunks") and self.use_chunks is True:
|
289
|
+
self._logger.debug(":: Saving data using Chunks ::")
|
290
|
+
# TODO: paralelize CHUNKS
|
291
|
+
# calculate the chunk size as an integer
|
292
|
+
if not self.chunksize:
|
293
|
+
num_cores = multiprocessing.cpu_count()
|
294
|
+
chunk_size = int(self.data.shape[0] / num_cores) - 1
|
295
|
+
else:
|
296
|
+
chunk_size = self.chunksize
|
297
|
+
if chunk_size == 0:
|
298
|
+
raise ComponentError(
|
299
|
+
"CopyToPG: Wrong ChunkSize or Empty Dataframe"
|
300
|
+
)
|
301
|
+
chunks = (
|
302
|
+
self.data.loc[self.data.index[i: i + chunk_size]]
|
303
|
+
for i in range(0, self.data.shape[0], chunk_size)
|
304
|
+
)
|
305
|
+
count = 0
|
306
|
+
numrows = 0
|
307
|
+
for chunk in chunks:
|
308
|
+
self._logger.debug(f"Iteration {count}")
|
309
|
+
s_buf = BytesIO()
|
310
|
+
chunk.to_csv(s_buf, index=None, header=None)
|
311
|
+
s_buf.seek(0)
|
312
|
+
try:
|
313
|
+
async with await self._connection.connection() as conn:
|
314
|
+
result = await conn.engine().copy_to_table(
|
315
|
+
table_name=self.tablename,
|
316
|
+
schema_name=self.schema,
|
317
|
+
source=s_buf,
|
318
|
+
columns=columns,
|
319
|
+
format="csv",
|
320
|
+
)
|
321
|
+
rows = self.extract_copied(result)
|
322
|
+
numrows += rows
|
323
|
+
count += 1
|
324
|
+
except StatementError as err:
|
325
|
+
self._logger.error(f"Statement Error: {err}")
|
326
|
+
continue
|
327
|
+
except DataError as err:
|
328
|
+
self._logger.error(f"Data Error: {err}")
|
329
|
+
continue
|
330
|
+
await asyncio.sleep(5e-3)
|
331
|
+
self.add_metric("ROWS_SAVED", numrows)
|
332
|
+
else:
|
333
|
+
try:
|
334
|
+
result = None
|
335
|
+
# insert data directly into table
|
336
|
+
if hasattr(self, "use_buffer"):
|
337
|
+
if hasattr(self, "array_columns"):
|
338
|
+
for col in self.array_columns:
|
339
|
+
# self.data[col].notna()
|
340
|
+
self.data[col] = self.data[col].apply(
|
341
|
+
lambda x: "{"
|
342
|
+
+ ",".join("'" + str(i) + "'" for i in x)
|
343
|
+
+ "}"
|
344
|
+
if isinstance(x, (list, tuple)) and len(x) > 0
|
345
|
+
else np.nan
|
346
|
+
)
|
347
|
+
s_buf = BytesIO()
|
348
|
+
kw = {}
|
349
|
+
if hasattr(self, "use_quoting"):
|
350
|
+
kw = {"quoting": csv.QUOTE_NONNUMERIC}
|
351
|
+
self.data.to_csv(s_buf, index=None, header=None, **kw)
|
352
|
+
s_buf.seek(0)
|
353
|
+
if hasattr(self, "clean_df"):
|
354
|
+
del self.data
|
355
|
+
gc.collect()
|
356
|
+
self.data = pd.DataFrame()
|
357
|
+
async with await self._connection.connection() as conn:
|
358
|
+
try:
|
359
|
+
await conn.engine().set_type_codec(
|
360
|
+
"json",
|
361
|
+
encoder=orjson.dumps,
|
362
|
+
decoder=orjson.loads,
|
363
|
+
schema="pg_catalog",
|
364
|
+
)
|
365
|
+
await conn.engine().set_type_codec(
|
366
|
+
"jsonb",
|
367
|
+
encoder=orjson.dumps,
|
368
|
+
decoder=orjson.loads,
|
369
|
+
schema="pg_catalog",
|
370
|
+
format="binary",
|
371
|
+
)
|
372
|
+
await register_vector(conn.engine())
|
373
|
+
# Saving as CSV to the table
|
374
|
+
result = await conn.engine().copy_to_table(
|
375
|
+
table_name=self.tablename,
|
376
|
+
schema_name=self.schema,
|
377
|
+
source=s_buf,
|
378
|
+
columns=columns,
|
379
|
+
format="csv",
|
380
|
+
)
|
381
|
+
except (
|
382
|
+
StringDataRightTruncationError,
|
383
|
+
ForeignKeyViolationError,
|
384
|
+
NotNullViolationError,
|
385
|
+
UniqueViolationError,
|
386
|
+
) as exc:
|
387
|
+
try:
|
388
|
+
column = exc.column_name
|
389
|
+
except AttributeError:
|
390
|
+
column = None
|
391
|
+
raise DataError(
|
392
|
+
f"Error: {exc}, details: {exc.detail}, column: {column}"
|
393
|
+
) from exc
|
394
|
+
except asyncpg.exceptions.DataError as e:
|
395
|
+
print(f"Error message: {e}")
|
396
|
+
raise DataError(str(e)) from e
|
397
|
+
else:
|
398
|
+
# --- convert pd.NA → None in any nullable *numeric* (or boolean) column ----
|
399
|
+
num_like = self.data.select_dtypes(
|
400
|
+
include=[
|
401
|
+
"Int8", "Int16", "Int32", "Int64",
|
402
|
+
"UInt8", "UInt16", "UInt32", "UInt64",
|
403
|
+
"Float32", "Float64",
|
404
|
+
"boolean" # pandas’ nullable BooleanDtype
|
405
|
+
]
|
406
|
+
)
|
407
|
+
|
408
|
+
if not num_like.empty:
|
409
|
+
# cast to 'object' so the column can hold mixed Python objects,
|
410
|
+
# then replace every missing value with None
|
411
|
+
self.data[num_like.columns] = (
|
412
|
+
num_like.astype(object)
|
413
|
+
.where(pd.notnull(num_like), None) # pd.NA / NaN → None
|
414
|
+
)
|
415
|
+
|
416
|
+
# can remove NAT from str fields:
|
417
|
+
u = self.data.select_dtypes(include=["string"])
|
418
|
+
if not u.empty:
|
419
|
+
self.data[u.columns] = u.astype(object).where(
|
420
|
+
pd.notnull(u), None
|
421
|
+
)
|
422
|
+
self.data = (
|
423
|
+
self.data
|
424
|
+
.where(pd.notnull(self.data), None) # nulls → None
|
425
|
+
.convert_dtypes() # uses pandas’ logical dtypes
|
426
|
+
.astype({c: 'string' for c in u})
|
427
|
+
)
|
428
|
+
|
429
|
+
async with await self._connection.connection() as conn:
|
430
|
+
# Get PostgreSQL column types
|
431
|
+
pg_column_types = await self._get_column_types(conn)
|
432
|
+
# TODO: using the column types to refine the conversion
|
433
|
+
|
434
|
+
# Handle datetime columns - replace NaT with None
|
435
|
+
datetime_cols = self.data.select_dtypes(include=['datetime64[ns]', 'datetime64[ns, UTC]'])
|
436
|
+
if not datetime_cols.empty:
|
437
|
+
self.data[datetime_cols.columns] = self.data[datetime_cols.columns].astype(object).where(
|
438
|
+
pd.notnull(datetime_cols), None
|
439
|
+
)
|
440
|
+
|
441
|
+
if self._naive_columns:
|
442
|
+
# Remove the timezone on tz-naive columns:
|
443
|
+
for col in self._naive_columns:
|
444
|
+
if col not in self.data.columns:
|
445
|
+
continue
|
446
|
+
|
447
|
+
# If the column is not datetime-like, coerce it first (optional)
|
448
|
+
if not is_datetime64_any_dtype(self.data[col]):
|
449
|
+
self.data[col] = pd.to_datetime(
|
450
|
+
self.data[col],
|
451
|
+
errors="coerce", # bad values → NaT
|
452
|
+
utc=True # parse as UTC if a tz string is present
|
453
|
+
)
|
454
|
+
# After the coercion, act only on tz-aware columns
|
455
|
+
if is_datetime64tz_dtype(self.data[col]):
|
456
|
+
# tz_localize(None) drops the tz info but keeps the *wall time*
|
457
|
+
# e.g. 2025-03-20 19:11:09+00:00 → 2025-03-20 19:11:09
|
458
|
+
self.data[col] = self.data[col].dt.tz_localize(None)
|
459
|
+
if self._json_columns:
|
460
|
+
for col in self._json_columns:
|
461
|
+
if col in self.data.columns:
|
462
|
+
# First convert any None values to empty dicts/lists as appropriate
|
463
|
+
self.data[col] = self.data[col].apply(
|
464
|
+
lambda x: {} if x is None else
|
465
|
+
{} if isinstance(x, dict) and not x else
|
466
|
+
[] if isinstance(x, list) and not x else x
|
467
|
+
)
|
468
|
+
if self._vector_columns:
|
469
|
+
for col in self._vector_columns:
|
470
|
+
if col in self.data.columns:
|
471
|
+
# Ensure vector values are Python lists
|
472
|
+
self.data[col] = self.data[col].apply(
|
473
|
+
lambda x: list(x) if x is not None and hasattr(x, '__iter__') else x
|
474
|
+
)
|
475
|
+
if self._array_columns:
|
476
|
+
for col in self._array_columns:
|
477
|
+
if col in self.data.columns:
|
478
|
+
# Ensure array values are Python lists
|
479
|
+
self.data[col] = self.data[col].apply(
|
480
|
+
lambda x: None if x is None else x if isinstance(x, list) else eval(x) if isinstance(x, str) and x.startswith('[') else [x] # noqa
|
481
|
+
)
|
482
|
+
# Final NaT cleanup for all columns - ensure we have no NaT values before sending to PostgreSQL
|
483
|
+
for col in self.data.columns:
|
484
|
+
if self.data[col].apply(lambda x: isinstance(x, pd._libs.tslibs.nattype.NaTType)).any():
|
485
|
+
self.data[col] = self.data[col].apply(lambda x: None if pd.isna(x) else x)
|
486
|
+
|
487
|
+
# 1️⃣ Turn every NaN / NA / NaT into Python None
|
488
|
+
self.data = self.data.astype(object).where(pd.notnull(self.data), None)
|
489
|
+
tuples = list(zip(*map(self.data.get, self.data)))
|
490
|
+
|
491
|
+
async with await self._connection.connection() as conn:
|
492
|
+
await conn.engine().set_type_codec(
|
493
|
+
"json",
|
494
|
+
encoder=orjson.dumps,
|
495
|
+
decoder=json_decoder,
|
496
|
+
schema="pg_catalog",
|
497
|
+
)
|
498
|
+
await conn.engine().set_type_codec(
|
499
|
+
"jsonb",
|
500
|
+
encoder=lambda data: b"\x01" + orjson.dumps(data),
|
501
|
+
decoder=lambda data: orjson.loads(data[1:]),
|
502
|
+
schema="pg_catalog",
|
503
|
+
format="binary"
|
504
|
+
)
|
505
|
+
await register_vector(conn.engine())
|
506
|
+
|
507
|
+
def print_types(df):
|
508
|
+
for c in df.columns:
|
509
|
+
print(f"{c:>20}: {df[c].dtype}, sample={type(df[c].dropna().iat[0]).__name__}")
|
510
|
+
|
511
|
+
print_types(self.data)
|
512
|
+
|
513
|
+
# show any cell that is still NAType after the scrub
|
514
|
+
bad = self.data.applymap(lambda v: isinstance(v, type(pd.NA)))
|
515
|
+
if bad.any().any():
|
516
|
+
print("found NAType in", bad.columns[bad.any()].tolist())
|
517
|
+
|
518
|
+
rejects = []
|
519
|
+
result = []
|
520
|
+
if self._atomic is True:
|
521
|
+
# ------------- slow path: row-level COPY with savepoints -----------
|
522
|
+
copied = 0
|
523
|
+
async with conn.engine().transaction():
|
524
|
+
for row in tuples:
|
525
|
+
try:
|
526
|
+
# COPY wants an *iterable* of tuples → [row] is fine
|
527
|
+
rs = await conn.copy_into_table(
|
528
|
+
table=self.tablename,
|
529
|
+
schema=self.schema,
|
530
|
+
source=[row],
|
531
|
+
columns=columns,
|
532
|
+
)
|
533
|
+
copied += 1
|
534
|
+
result.extend(rs)
|
535
|
+
except Exception as e:
|
536
|
+
self._logger.error(
|
537
|
+
f"Error copying row: {row}, error: {e}"
|
538
|
+
)
|
539
|
+
# Handle the error and add to rejects
|
540
|
+
rejects.append(row)
|
541
|
+
else:
|
542
|
+
result = await conn.copy_into_table(
|
543
|
+
table=self.tablename,
|
544
|
+
schema=self.schema,
|
545
|
+
source=tuples,
|
546
|
+
columns=columns,
|
547
|
+
)
|
548
|
+
self._logger.info(
|
549
|
+
f"Copied {len(self.data)} rows to {self.schema}.{self.tablename}"
|
550
|
+
)
|
551
|
+
self.add_metric("ROWS_SAVED", self.extract_copied(result))
|
552
|
+
if rejects:
|
553
|
+
# build a DataFrame with the same column order
|
554
|
+
rejects_df = (
|
555
|
+
pd.DataFrame(rejects, columns=columns)
|
556
|
+
if rejects else pd.DataFrame(columns=columns)
|
557
|
+
)
|
558
|
+
# rejects_df.to_csv(
|
559
|
+
# self._logger,
|
560
|
+
# index=False,
|
561
|
+
# header=False,
|
562
|
+
# quoting=csv.QUOTE_NONNUMERIC,
|
563
|
+
# )
|
564
|
+
self._logger.warning(
|
565
|
+
f"Rejected rows: {len(rejects_df)}"
|
566
|
+
)
|
567
|
+
self._logger.warning(
|
568
|
+
f"Rejected rows: {rejects_df.to_dict(orient='records')}"
|
569
|
+
)
|
570
|
+
if self._debug:
|
571
|
+
self._logger.debug(
|
572
|
+
f"Saving results into: {self.schema}.{self.tablename}"
|
573
|
+
)
|
574
|
+
except StatementError as err:
|
575
|
+
raise ComponentError(f"Statement error: {err}") from err
|
576
|
+
except DataError as err:
|
577
|
+
raise ComponentError(f"Data error: {err}") from err
|
578
|
+
except Exception as err:
|
579
|
+
raise ComponentError(f"{self.StepName} Error: {err!s}") from err
|
580
|
+
|
581
|
+
async def _copy_iterable(self):
|
582
|
+
tuples = [tuple(x.values()) for x in self.data]
|
583
|
+
row = self.data[0]
|
584
|
+
columns = list(row.keys())
|
585
|
+
try:
|
586
|
+
# TODO: iterate the data into chunks (to avoid kill the process)
|
587
|
+
async with await self._connection.connection() as conn:
|
588
|
+
result = await conn.copy_into_table(
|
589
|
+
table=self.tablename,
|
590
|
+
schema=self.schema,
|
591
|
+
source=tuples,
|
592
|
+
columns=columns,
|
593
|
+
)
|
594
|
+
self.add_metric(
|
595
|
+
"ROWS_SAVED", self.extract_copied(result)
|
596
|
+
)
|
597
|
+
self._logger.debug("CopyToPg: {result}")
|
598
|
+
except StatementError as err:
|
599
|
+
raise ComponentError(
|
600
|
+
f"Statement error: {err}"
|
601
|
+
) from err
|
602
|
+
except DataError as err:
|
603
|
+
raise ComponentError(
|
604
|
+
f"Data error: {err}"
|
605
|
+
) from err
|
606
|
+
except Exception as err:
|
607
|
+
raise ComponentError(
|
608
|
+
f"{self.StepName} Error: {err!s}"
|
609
|
+
) from err
|