flowtask 5.8.4__cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowtask/__init__.py +93 -0
- flowtask/__main__.py +38 -0
- flowtask/bots/__init__.py +6 -0
- flowtask/bots/check.py +93 -0
- flowtask/bots/codebot.py +51 -0
- flowtask/components/ASPX.py +148 -0
- flowtask/components/AddDataset.py +352 -0
- flowtask/components/Amazon.py +523 -0
- flowtask/components/AutoTask.py +314 -0
- flowtask/components/Azure.py +80 -0
- flowtask/components/AzureUsers.py +106 -0
- flowtask/components/BaseAction.py +91 -0
- flowtask/components/BaseLoop.py +198 -0
- flowtask/components/BestBuy.py +800 -0
- flowtask/components/CSVToGCS.py +120 -0
- flowtask/components/CompanyScraper/__init__.py +1 -0
- flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
- flowtask/components/CompanyScraper/parsers/base.py +102 -0
- flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
- flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
- flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
- flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
- flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
- flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
- flowtask/components/CompanyScraper/scrapper.py +1054 -0
- flowtask/components/CopyTo.py +177 -0
- flowtask/components/CopyToBigQuery.py +243 -0
- flowtask/components/CopyToMongoDB.py +291 -0
- flowtask/components/CopyToPg.py +609 -0
- flowtask/components/CopyToRethink.py +207 -0
- flowtask/components/CreateGCSBucket.py +102 -0
- flowtask/components/CreateReport/CreateReport.py +228 -0
- flowtask/components/CreateReport/__init__.py +9 -0
- flowtask/components/CreateReport/charts/__init__.py +15 -0
- flowtask/components/CreateReport/charts/bar.py +51 -0
- flowtask/components/CreateReport/charts/base.py +66 -0
- flowtask/components/CreateReport/charts/pie.py +64 -0
- flowtask/components/CreateReport/utils.py +9 -0
- flowtask/components/CustomerSatisfaction.py +196 -0
- flowtask/components/DataInput.py +200 -0
- flowtask/components/DateList.py +255 -0
- flowtask/components/DbClient.py +163 -0
- flowtask/components/DialPad.py +146 -0
- flowtask/components/DocumentDBQuery.py +200 -0
- flowtask/components/DownloadFrom.py +371 -0
- flowtask/components/DownloadFromD2L.py +113 -0
- flowtask/components/DownloadFromFTP.py +181 -0
- flowtask/components/DownloadFromIMAP.py +315 -0
- flowtask/components/DownloadFromS3.py +198 -0
- flowtask/components/DownloadFromSFTP.py +265 -0
- flowtask/components/DownloadFromSharepoint.py +110 -0
- flowtask/components/DownloadFromSmartSheet.py +114 -0
- flowtask/components/DownloadS3File.py +229 -0
- flowtask/components/Dummy.py +59 -0
- flowtask/components/DuplicatePhoto.py +411 -0
- flowtask/components/EmployeeEvaluation.py +237 -0
- flowtask/components/ExecuteSQL.py +323 -0
- flowtask/components/ExtractHTML.py +178 -0
- flowtask/components/FileBase.py +178 -0
- flowtask/components/FileCopy.py +181 -0
- flowtask/components/FileDelete.py +82 -0
- flowtask/components/FileExists.py +146 -0
- flowtask/components/FileIteratorDelete.py +112 -0
- flowtask/components/FileList.py +194 -0
- flowtask/components/FileOpen.py +75 -0
- flowtask/components/FileRead.py +120 -0
- flowtask/components/FileRename.py +106 -0
- flowtask/components/FilterIf.py +284 -0
- flowtask/components/FilterRows/FilterRows.py +200 -0
- flowtask/components/FilterRows/__init__.py +10 -0
- flowtask/components/FilterRows/functions.py +4 -0
- flowtask/components/GCSToBigQuery.py +103 -0
- flowtask/components/GoogleA4.py +150 -0
- flowtask/components/GoogleGeoCoding.py +344 -0
- flowtask/components/GooglePlaces.py +315 -0
- flowtask/components/GoogleSearch.py +539 -0
- flowtask/components/HTTPClient.py +268 -0
- flowtask/components/ICIMS.py +146 -0
- flowtask/components/IF.py +179 -0
- flowtask/components/IcimsFolderCopy.py +173 -0
- flowtask/components/ImageFeatures/__init__.py +5 -0
- flowtask/components/ImageFeatures/process.py +233 -0
- flowtask/components/IteratorBase.py +251 -0
- flowtask/components/LangchainLoader/__init__.py +5 -0
- flowtask/components/LangchainLoader/loader.py +194 -0
- flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
- flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
- flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
- flowtask/components/LangchainLoader/loaders/docx.py +91 -0
- flowtask/components/LangchainLoader/loaders/html.py +119 -0
- flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
- flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
- flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
- flowtask/components/LangchainLoader/loaders/qa.py +67 -0
- flowtask/components/LangchainLoader/loaders/txt.py +55 -0
- flowtask/components/LeadIQ.py +650 -0
- flowtask/components/Loop.py +253 -0
- flowtask/components/Lowes.py +334 -0
- flowtask/components/MS365Usage.py +156 -0
- flowtask/components/MSTeamsMessages.py +320 -0
- flowtask/components/MarketClustering.py +1051 -0
- flowtask/components/MergeFiles.py +362 -0
- flowtask/components/MilvusOutput.py +87 -0
- flowtask/components/NearByStores.py +175 -0
- flowtask/components/NetworkNinja/__init__.py +6 -0
- flowtask/components/NetworkNinja/models/__init__.py +52 -0
- flowtask/components/NetworkNinja/models/abstract.py +177 -0
- flowtask/components/NetworkNinja/models/account.py +39 -0
- flowtask/components/NetworkNinja/models/client.py +19 -0
- flowtask/components/NetworkNinja/models/district.py +14 -0
- flowtask/components/NetworkNinja/models/events.py +101 -0
- flowtask/components/NetworkNinja/models/forms.py +499 -0
- flowtask/components/NetworkNinja/models/market.py +16 -0
- flowtask/components/NetworkNinja/models/organization.py +34 -0
- flowtask/components/NetworkNinja/models/photos.py +125 -0
- flowtask/components/NetworkNinja/models/project.py +44 -0
- flowtask/components/NetworkNinja/models/region.py +28 -0
- flowtask/components/NetworkNinja/models/store.py +203 -0
- flowtask/components/NetworkNinja/models/user.py +151 -0
- flowtask/components/NetworkNinja/router.py +854 -0
- flowtask/components/Odoo.py +175 -0
- flowtask/components/OdooInjector.py +192 -0
- flowtask/components/OpenFromXML.py +126 -0
- flowtask/components/OpenWeather.py +41 -0
- flowtask/components/OpenWithBase.py +616 -0
- flowtask/components/OpenWithPandas.py +715 -0
- flowtask/components/PGPDecrypt.py +199 -0
- flowtask/components/PandasIterator.py +187 -0
- flowtask/components/PandasToFile.py +189 -0
- flowtask/components/Paradox.py +339 -0
- flowtask/components/ParamIterator.py +117 -0
- flowtask/components/ParseHTML.py +84 -0
- flowtask/components/PlacerStores.py +249 -0
- flowtask/components/Pokemon.py +507 -0
- flowtask/components/PositiveBot.py +62 -0
- flowtask/components/PowerPointSlide.py +400 -0
- flowtask/components/PrintMessage.py +127 -0
- flowtask/components/ProductCompetitors/__init__.py +5 -0
- flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
- flowtask/components/ProductCompetitors/parsers/base.py +72 -0
- flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
- flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
- flowtask/components/ProductCompetitors/scrapper.py +155 -0
- flowtask/components/ProductCompliant.py +169 -0
- flowtask/components/ProductInfo/__init__.py +1 -0
- flowtask/components/ProductInfo/parsers/__init__.py +5 -0
- flowtask/components/ProductInfo/parsers/base.py +83 -0
- flowtask/components/ProductInfo/parsers/brother.py +97 -0
- flowtask/components/ProductInfo/parsers/canon.py +167 -0
- flowtask/components/ProductInfo/parsers/epson.py +118 -0
- flowtask/components/ProductInfo/parsers/hp.py +131 -0
- flowtask/components/ProductInfo/parsers/samsung.py +97 -0
- flowtask/components/ProductInfo/scraper.py +319 -0
- flowtask/components/ProductPricing.py +118 -0
- flowtask/components/QS.py +261 -0
- flowtask/components/QSBase.py +201 -0
- flowtask/components/QueryIterator.py +273 -0
- flowtask/components/QueryToInsert.py +327 -0
- flowtask/components/QueryToPandas.py +432 -0
- flowtask/components/RESTClient.py +195 -0
- flowtask/components/RethinkDBQuery.py +189 -0
- flowtask/components/Rsync.py +74 -0
- flowtask/components/RunSSH.py +59 -0
- flowtask/components/RunShell.py +71 -0
- flowtask/components/SalesForce.py +20 -0
- flowtask/components/SaveImageBank/__init__.py +257 -0
- flowtask/components/SchedulingVisits.py +592 -0
- flowtask/components/ScrapPage.py +216 -0
- flowtask/components/ScrapSearch.py +79 -0
- flowtask/components/SendNotify.py +257 -0
- flowtask/components/SentimentAnalysis.py +694 -0
- flowtask/components/ServiceScrapper/__init__.py +5 -0
- flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
- flowtask/components/ServiceScrapper/parsers/base.py +94 -0
- flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
- flowtask/components/ServiceScrapper/scrapper.py +199 -0
- flowtask/components/SetVariables.py +156 -0
- flowtask/components/SubTask.py +182 -0
- flowtask/components/SuiteCRM.py +48 -0
- flowtask/components/Switch.py +175 -0
- flowtask/components/TableBase.py +148 -0
- flowtask/components/TableDelete.py +312 -0
- flowtask/components/TableInput.py +143 -0
- flowtask/components/TableOutput/TableOutput.py +384 -0
- flowtask/components/TableOutput/__init__.py +3 -0
- flowtask/components/TableSchema.py +534 -0
- flowtask/components/Target.py +223 -0
- flowtask/components/ThumbnailGenerator.py +156 -0
- flowtask/components/ToPandas.py +67 -0
- flowtask/components/TransformRows/TransformRows.py +507 -0
- flowtask/components/TransformRows/__init__.py +9 -0
- flowtask/components/TransformRows/functions.py +559 -0
- flowtask/components/TransposeRows.py +176 -0
- flowtask/components/UPCDatabase.py +86 -0
- flowtask/components/UnGzip.py +171 -0
- flowtask/components/Uncompress.py +172 -0
- flowtask/components/UniqueRows.py +126 -0
- flowtask/components/Unzip.py +107 -0
- flowtask/components/UpdateOperationalVars.py +147 -0
- flowtask/components/UploadTo.py +299 -0
- flowtask/components/UploadToS3.py +136 -0
- flowtask/components/UploadToSFTP.py +160 -0
- flowtask/components/UploadToSharepoint.py +205 -0
- flowtask/components/UserFunc.py +122 -0
- flowtask/components/VivaTracker.py +140 -0
- flowtask/components/WSDLClient.py +123 -0
- flowtask/components/Wait.py +18 -0
- flowtask/components/Walmart.py +199 -0
- flowtask/components/Workplace.py +134 -0
- flowtask/components/XMLToPandas.py +267 -0
- flowtask/components/Zammad/__init__.py +41 -0
- flowtask/components/Zammad/models.py +0 -0
- flowtask/components/ZoomInfoScraper.py +409 -0
- flowtask/components/__init__.py +104 -0
- flowtask/components/abstract.py +18 -0
- flowtask/components/flow.py +530 -0
- flowtask/components/google.py +335 -0
- flowtask/components/group.py +221 -0
- flowtask/components/py.typed +0 -0
- flowtask/components/reviewscrap.py +132 -0
- flowtask/components/tAutoincrement.py +117 -0
- flowtask/components/tConcat.py +109 -0
- flowtask/components/tExplode.py +119 -0
- flowtask/components/tFilter.py +184 -0
- flowtask/components/tGroup.py +236 -0
- flowtask/components/tJoin.py +270 -0
- flowtask/components/tMap/__init__.py +9 -0
- flowtask/components/tMap/functions.py +54 -0
- flowtask/components/tMap/tMap.py +450 -0
- flowtask/components/tMelt.py +112 -0
- flowtask/components/tMerge.py +114 -0
- flowtask/components/tOrder.py +93 -0
- flowtask/components/tPandas.py +94 -0
- flowtask/components/tPivot.py +71 -0
- flowtask/components/tPluckCols.py +76 -0
- flowtask/components/tUnnest.py +82 -0
- flowtask/components/user.py +401 -0
- flowtask/conf.py +457 -0
- flowtask/download.py +102 -0
- flowtask/events/__init__.py +11 -0
- flowtask/events/events/__init__.py +20 -0
- flowtask/events/events/abstract.py +95 -0
- flowtask/events/events/alerts/__init__.py +362 -0
- flowtask/events/events/alerts/colfunctions.py +131 -0
- flowtask/events/events/alerts/functions.py +158 -0
- flowtask/events/events/dummy.py +12 -0
- flowtask/events/events/exec.py +124 -0
- flowtask/events/events/file/__init__.py +7 -0
- flowtask/events/events/file/base.py +51 -0
- flowtask/events/events/file/copy.py +23 -0
- flowtask/events/events/file/delete.py +16 -0
- flowtask/events/events/interfaces/__init__.py +9 -0
- flowtask/events/events/interfaces/client.py +67 -0
- flowtask/events/events/interfaces/credentials.py +28 -0
- flowtask/events/events/interfaces/notifications.py +58 -0
- flowtask/events/events/jira.py +122 -0
- flowtask/events/events/log.py +26 -0
- flowtask/events/events/logerr.py +52 -0
- flowtask/events/events/notify.py +59 -0
- flowtask/events/events/notify_event.py +160 -0
- flowtask/events/events/publish.py +54 -0
- flowtask/events/events/sendfile.py +104 -0
- flowtask/events/events/task.py +97 -0
- flowtask/events/events/teams.py +98 -0
- flowtask/events/events/webhook.py +58 -0
- flowtask/events/manager.py +287 -0
- flowtask/exceptions.c +39393 -0
- flowtask/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/extensions/__init__.py +3 -0
- flowtask/extensions/abstract.py +82 -0
- flowtask/extensions/logging/__init__.py +65 -0
- flowtask/hooks/__init__.py +9 -0
- flowtask/hooks/actions/__init__.py +22 -0
- flowtask/hooks/actions/abstract.py +66 -0
- flowtask/hooks/actions/dummy.py +23 -0
- flowtask/hooks/actions/jira.py +74 -0
- flowtask/hooks/actions/rest.py +320 -0
- flowtask/hooks/actions/sampledata.py +37 -0
- flowtask/hooks/actions/sensor.py +23 -0
- flowtask/hooks/actions/task.py +9 -0
- flowtask/hooks/actions/ticket.py +37 -0
- flowtask/hooks/actions/zammad.py +55 -0
- flowtask/hooks/hook.py +62 -0
- flowtask/hooks/models.py +17 -0
- flowtask/hooks/service.py +187 -0
- flowtask/hooks/step.py +91 -0
- flowtask/hooks/types/__init__.py +23 -0
- flowtask/hooks/types/base.py +129 -0
- flowtask/hooks/types/brokers/__init__.py +11 -0
- flowtask/hooks/types/brokers/base.py +54 -0
- flowtask/hooks/types/brokers/mqtt.py +35 -0
- flowtask/hooks/types/brokers/rabbitmq.py +82 -0
- flowtask/hooks/types/brokers/redis.py +83 -0
- flowtask/hooks/types/brokers/sqs.py +44 -0
- flowtask/hooks/types/fs.py +232 -0
- flowtask/hooks/types/http.py +49 -0
- flowtask/hooks/types/imap.py +200 -0
- flowtask/hooks/types/jira.py +279 -0
- flowtask/hooks/types/mail.py +205 -0
- flowtask/hooks/types/postgres.py +98 -0
- flowtask/hooks/types/responses/__init__.py +8 -0
- flowtask/hooks/types/responses/base.py +5 -0
- flowtask/hooks/types/sharepoint.py +288 -0
- flowtask/hooks/types/ssh.py +141 -0
- flowtask/hooks/types/tagged.py +59 -0
- flowtask/hooks/types/upload.py +85 -0
- flowtask/hooks/types/watch.py +71 -0
- flowtask/hooks/types/web.py +36 -0
- flowtask/interfaces/AzureClient.py +137 -0
- flowtask/interfaces/AzureGraph.py +839 -0
- flowtask/interfaces/Boto3Client.py +326 -0
- flowtask/interfaces/DropboxClient.py +173 -0
- flowtask/interfaces/ExcelHandler.py +94 -0
- flowtask/interfaces/FTPClient.py +131 -0
- flowtask/interfaces/GoogleCalendar.py +201 -0
- flowtask/interfaces/GoogleClient.py +133 -0
- flowtask/interfaces/GoogleDrive.py +127 -0
- flowtask/interfaces/GoogleGCS.py +89 -0
- flowtask/interfaces/GoogleGeocoding.py +93 -0
- flowtask/interfaces/GoogleLang.py +114 -0
- flowtask/interfaces/GooglePub.py +61 -0
- flowtask/interfaces/GoogleSheet.py +68 -0
- flowtask/interfaces/IMAPClient.py +137 -0
- flowtask/interfaces/O365Calendar.py +113 -0
- flowtask/interfaces/O365Client.py +220 -0
- flowtask/interfaces/OneDrive.py +284 -0
- flowtask/interfaces/Outlook.py +155 -0
- flowtask/interfaces/ParrotBot.py +130 -0
- flowtask/interfaces/SSHClient.py +378 -0
- flowtask/interfaces/Sharepoint.py +496 -0
- flowtask/interfaces/__init__.py +36 -0
- flowtask/interfaces/azureauth.py +119 -0
- flowtask/interfaces/cache.py +201 -0
- flowtask/interfaces/client.py +82 -0
- flowtask/interfaces/compress.py +525 -0
- flowtask/interfaces/credentials.py +124 -0
- flowtask/interfaces/d2l.py +239 -0
- flowtask/interfaces/databases/__init__.py +5 -0
- flowtask/interfaces/databases/db.py +223 -0
- flowtask/interfaces/databases/documentdb.py +55 -0
- flowtask/interfaces/databases/rethink.py +39 -0
- flowtask/interfaces/dataframes/__init__.py +11 -0
- flowtask/interfaces/dataframes/abstract.py +21 -0
- flowtask/interfaces/dataframes/arrow.py +71 -0
- flowtask/interfaces/dataframes/dt.py +69 -0
- flowtask/interfaces/dataframes/pandas.py +167 -0
- flowtask/interfaces/dataframes/polars.py +60 -0
- flowtask/interfaces/db.py +263 -0
- flowtask/interfaces/env.py +46 -0
- flowtask/interfaces/func.py +137 -0
- flowtask/interfaces/http.py +1780 -0
- flowtask/interfaces/locale.py +40 -0
- flowtask/interfaces/log.py +75 -0
- flowtask/interfaces/mask.py +143 -0
- flowtask/interfaces/notification.py +154 -0
- flowtask/interfaces/playwright.py +339 -0
- flowtask/interfaces/powerpoint.py +368 -0
- flowtask/interfaces/py.typed +0 -0
- flowtask/interfaces/qs.py +376 -0
- flowtask/interfaces/result.py +87 -0
- flowtask/interfaces/selenium_service.py +779 -0
- flowtask/interfaces/smartsheet.py +154 -0
- flowtask/interfaces/stat.py +39 -0
- flowtask/interfaces/task.py +96 -0
- flowtask/interfaces/template.py +118 -0
- flowtask/interfaces/vectorstores/__init__.py +1 -0
- flowtask/interfaces/vectorstores/abstract.py +133 -0
- flowtask/interfaces/vectorstores/milvus.py +669 -0
- flowtask/interfaces/zammad.py +107 -0
- flowtask/models.py +193 -0
- flowtask/parsers/__init__.py +15 -0
- flowtask/parsers/_yaml.c +11978 -0
- flowtask/parsers/_yaml.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/argparser.py +235 -0
- flowtask/parsers/base.c +15155 -0
- flowtask/parsers/base.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/json.c +11968 -0
- flowtask/parsers/json.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/maps.py +49 -0
- flowtask/parsers/toml.c +11968 -0
- flowtask/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/plugins/__init__.py +16 -0
- flowtask/plugins/components/__init__.py +0 -0
- flowtask/plugins/handler/__init__.py +45 -0
- flowtask/plugins/importer.py +31 -0
- flowtask/plugins/sources/__init__.py +0 -0
- flowtask/runner.py +283 -0
- flowtask/scheduler/__init__.py +9 -0
- flowtask/scheduler/functions.py +493 -0
- flowtask/scheduler/handlers/__init__.py +8 -0
- flowtask/scheduler/handlers/manager.py +504 -0
- flowtask/scheduler/handlers/models.py +58 -0
- flowtask/scheduler/handlers/service.py +72 -0
- flowtask/scheduler/notifications.py +65 -0
- flowtask/scheduler/scheduler.py +993 -0
- flowtask/services/__init__.py +0 -0
- flowtask/services/bots/__init__.py +0 -0
- flowtask/services/bots/telegram.py +264 -0
- flowtask/services/files/__init__.py +11 -0
- flowtask/services/files/manager.py +522 -0
- flowtask/services/files/model.py +37 -0
- flowtask/services/files/service.py +767 -0
- flowtask/services/jira/__init__.py +3 -0
- flowtask/services/jira/jira_actions.py +191 -0
- flowtask/services/tasks/__init__.py +13 -0
- flowtask/services/tasks/launcher.py +213 -0
- flowtask/services/tasks/manager.py +323 -0
- flowtask/services/tasks/service.py +275 -0
- flowtask/services/tasks/task_manager.py +376 -0
- flowtask/services/tasks/tasks.py +155 -0
- flowtask/storages/__init__.py +16 -0
- flowtask/storages/exceptions.py +12 -0
- flowtask/storages/files/__init__.py +8 -0
- flowtask/storages/files/abstract.py +29 -0
- flowtask/storages/files/filesystem.py +66 -0
- flowtask/storages/tasks/__init__.py +19 -0
- flowtask/storages/tasks/abstract.py +26 -0
- flowtask/storages/tasks/database.py +33 -0
- flowtask/storages/tasks/filesystem.py +108 -0
- flowtask/storages/tasks/github.py +119 -0
- flowtask/storages/tasks/memory.py +45 -0
- flowtask/storages/tasks/row.py +25 -0
- flowtask/tasks/__init__.py +0 -0
- flowtask/tasks/abstract.py +526 -0
- flowtask/tasks/command.py +118 -0
- flowtask/tasks/pile.py +486 -0
- flowtask/tasks/py.typed +0 -0
- flowtask/tasks/task.py +778 -0
- flowtask/template/__init__.py +161 -0
- flowtask/tests.py +257 -0
- flowtask/types/__init__.py +8 -0
- flowtask/types/typedefs.c +11347 -0
- flowtask/types/typedefs.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/utils/__init__.py +24 -0
- flowtask/utils/constants.py +117 -0
- flowtask/utils/encoders.py +21 -0
- flowtask/utils/executor.py +112 -0
- flowtask/utils/functions.cpp +14280 -0
- flowtask/utils/functions.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/utils/json.cpp +13349 -0
- flowtask/utils/json.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/utils/mail.py +63 -0
- flowtask/utils/parseqs.c +13324 -0
- flowtask/utils/parserqs.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/utils/stats.py +308 -0
- flowtask/utils/transformations.py +74 -0
- flowtask/utils/uv.py +12 -0
- flowtask/utils/validators.py +97 -0
- flowtask/version.py +11 -0
- flowtask-5.8.4.dist-info/LICENSE +201 -0
- flowtask-5.8.4.dist-info/METADATA +209 -0
- flowtask-5.8.4.dist-info/RECORD +470 -0
- flowtask-5.8.4.dist-info/WHEEL +6 -0
- flowtask-5.8.4.dist-info/entry_points.txt +3 -0
- flowtask-5.8.4.dist-info/top_level.txt +2 -0
- plugins/components/CreateQR.py +39 -0
- plugins/components/TestComponent.py +28 -0
- plugins/components/Use1.py +13 -0
- plugins/components/Workplace.py +117 -0
- plugins/components/__init__.py +3 -0
- plugins/sources/__init__.py +0 -0
- plugins/sources/get_populartimes.py +78 -0
- plugins/sources/google.py +150 -0
- plugins/sources/hubspot.py +679 -0
- plugins/sources/icims.py +679 -0
- plugins/sources/mobileinsight.py +501 -0
- plugins/sources/newrelic.py +262 -0
- plugins/sources/uap.py +268 -0
- plugins/sources/venu.py +244 -0
- plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,715 @@
|
|
1
|
+
from typing import Any
|
2
|
+
from pathlib import PurePath
|
3
|
+
from io import BytesIO
|
4
|
+
import aiofiles
|
5
|
+
from xml.sax import parse
|
6
|
+
import warnings
|
7
|
+
import pandas
|
8
|
+
from pandas._libs.parsers import STR_NA_VALUES
|
9
|
+
import orjson
|
10
|
+
import xlrd
|
11
|
+
import numpy as np
|
12
|
+
from ..utils import check_empty
|
13
|
+
from ..exceptions import ComponentError, DataNotFound, EmptyFile
|
14
|
+
from .OpenWithBase import OpenWithBase, detect_encoding, excel_based, ExcelHandler
|
15
|
+
|
16
|
+
|
17
|
+
# Suppress specific warning
|
18
|
+
warnings.filterwarnings("ignore", category=UserWarning)
|
19
|
+
|
20
|
+
class OpenWithPandas(OpenWithBase):
|
21
|
+
"""
|
22
|
+
OpenWithPandas
|
23
|
+
|
24
|
+
Overview
|
25
|
+
|
26
|
+
Open a file and return a Dataframe type
|
27
|
+
|
28
|
+
.. table:: Properties
|
29
|
+
:widths: auto
|
30
|
+
|
31
|
+
|
32
|
+
+-------------+----------+-----------+-------------------------------------------------------+
|
33
|
+
| Name | Required | Summary |
|
34
|
+
+-------------+----------+-----------+-------------------------------------------------------+
|
35
|
+
| model | Yes | A model (json) representative of the data that I am going to |
|
36
|
+
| | | open * name of a DataModel (in-development) |
|
37
|
+
+-------------+----------+-----------+-------------------------------------------------------+
|
38
|
+
| map | Yes | Map the columns against the model |
|
39
|
+
+-------------+----------+-----------+-------------------------------------------------------+
|
40
|
+
| tablename | Yes | Join the data from the table in the postgres database |
|
41
|
+
+-------------+----------+-----------+-------------------------------------------------------+
|
42
|
+
| use_map | Yes | If true, then a MAP file is used instead of a table in postgresql |
|
43
|
+
+-------------+----------+-----------+-------------------------------------------------------+
|
44
|
+
| file_engine | Yes | Pandas different types of engines for different types of Excel |
|
45
|
+
| | | * xlrd (legacy, xls type) |
|
46
|
+
| | | * openpyxl (new xlsx files) |
|
47
|
+
| | | * pyxlsb (to open with macros and functions) |
|
48
|
+
+-------------+----------+-----------+-------------------------------------------------------+
|
49
|
+
| dtypes | No | force the data type of a column ex: { order_date: datetime } |
|
50
|
+
+-------------+----------+-----------+-------------------------------------------------------+
|
51
|
+
|
52
|
+
|
53
|
+
Return the list of arbitrary days
|
54
|
+
|
55
|
+
|
56
|
+
Example:
|
57
|
+
|
58
|
+
```yaml
|
59
|
+
OpenWithPandas:
|
60
|
+
mime: text/csv
|
61
|
+
process: true
|
62
|
+
separator: '|'
|
63
|
+
drop_empty: true
|
64
|
+
trim: true
|
65
|
+
pk:
|
66
|
+
columns:
|
67
|
+
- associate_oid
|
68
|
+
- associate_id
|
69
|
+
append: false
|
70
|
+
verify_integrity: true
|
71
|
+
map:
|
72
|
+
tablename: employees
|
73
|
+
schema: bacardi
|
74
|
+
map: employees
|
75
|
+
replace: false
|
76
|
+
```
|
77
|
+
|
78
|
+
"""
|
79
|
+
"""
|
80
|
+
OpenWithPandas
|
81
|
+
|
82
|
+
Overview
|
83
|
+
|
84
|
+
This component opens various file types (CSV, Excel, HTML, JSON) into Pandas DataFrames.
|
85
|
+
|
86
|
+
.. table:: Properties
|
87
|
+
:widths: auto
|
88
|
+
|
89
|
+
|
90
|
+
+------------------------+----------+-----------+---------------------------------------------------------------+
|
91
|
+
| Name | Required | Summary |
|
92
|
+
+------------------------+----------+-----------+---------------------------------------------------------------+
|
93
|
+
| directory | No | The directory where the files are located. |
|
94
|
+
+------------------------+----------+-----------+---------------------------------------------------------------+
|
95
|
+
| filename | No | The name of the file to open. |
|
96
|
+
+------------------------+----------+-----------+---------------------------------------------------------------+
|
97
|
+
| file | No | Pattern or file to open. |
|
98
|
+
+------------------------+----------+-----------+---------------------------------------------------------------+
|
99
|
+
| mime | No | The MIME type of the file. Default is "text/csv". |
|
100
|
+
+------------------------+----------+-----------+---------------------------------------------------------------+
|
101
|
+
| separator | No | Separator for CSV files. Default is ",". |
|
102
|
+
+------------------------+----------+-----------+---------------------------------------------------------------+
|
103
|
+
| force_map | No | Force the use of a map file. Default is False. |
|
104
|
+
+------------------------+----------+-----------+---------------------------------------------------------------+
|
105
|
+
| parse_dates | No | Columns to parse as dates. Default is an empty dictionary. |
|
106
|
+
+------------------------+----------+-----------+---------------------------------------------------------------+
|
107
|
+
| filter_nan | No | Filter out NaN values. Default is True. |
|
108
|
+
+------------------------+----------+-----------+---------------------------------------------------------------+
|
109
|
+
| na_values | No | List of values to recognize as NaN. Default is ["NULL", "TBD"]. |
|
110
|
+
+------------------------+----------+-----------+---------------------------------------------------------------+
|
111
|
+
| remove_empty_strings | No | Remove empty strings. Default is True. |
|
112
|
+
+------------------------+----------+-----------+---------------------------------------------------------------+
|
113
|
+
| no_multi | No | Disable multi-file processing. Default is False. |
|
114
|
+
+------------------------+----------+-----------+---------------------------------------------------------------+
|
115
|
+
| clean_nat | No | Clean NaT values. Default is False. |
|
116
|
+
+------------------------+----------+-----------+---------------------------------------------------------------+
|
117
|
+
| flavor | No | The flavor of the database for column information. Default is "postgres". |
|
118
|
+
+------------------------+----------+-----------+---------------------------------------------------------------+
|
119
|
+
| pd_args | No | Additional arguments for pandas. Default is an empty dictionary. |
|
120
|
+
+------------------------+----------+-----------+---------------------------------------------------------------+
|
121
|
+
| model | Yes | A model (json) representative of the data that I am going to |
|
122
|
+
| | | open * name of a DataModel (in-development) |
|
123
|
+
+------------------------+----------+-----------+---------------------------------------------------------------+
|
124
|
+
| map | Yes | Map the columns against the model |
|
125
|
+
+------------------------+----------+-----------+---------------------------------------------------------------+
|
126
|
+
| tablename | Yes | Join the data from the table in the postgres database |
|
127
|
+
+------------------------+----------+-----------+---------------------------------------------------------------+
|
128
|
+
| use_map | Yes | If true, then a MAP file is used instead of a table in postgresql |
|
129
|
+
+------------------------+----------+-----------+---------------------------------------------------------------+
|
130
|
+
| file_engine | Yes | Pandas different types of engines for different types of Excel |
|
131
|
+
| | | * xlrd (legacy, xls type) |
|
132
|
+
| | | * openpyxl (new xlsx files) |
|
133
|
+
| | | * pyxlsb (to open with macros and functions) |
|
134
|
+
+------------------------+----------+-----------+---------------------------------------------------------------+
|
135
|
+
| dtypes | No | force the data type of a column ex: { order_date: datetime } |
|
136
|
+
+------------------------+----------+-----------+---------------------------------------------------------------+
|
137
|
+
|
138
|
+
Returns
|
139
|
+
|
140
|
+
This component returns a Pandas DataFrame containing the data from the opened file(s).
|
141
|
+
|
142
|
+
"""
|
143
|
+
def get_column_headers(self):
|
144
|
+
headers = []
|
145
|
+
for filename in self._filenames:
|
146
|
+
try:
|
147
|
+
encoding = self.check_encoding(filename)
|
148
|
+
except Exception:
|
149
|
+
encoding = "UTF-8"
|
150
|
+
df = pandas.read_csv(
|
151
|
+
filename,
|
152
|
+
sep=self.separator,
|
153
|
+
skipinitialspace=True,
|
154
|
+
encoding=encoding,
|
155
|
+
engine="python",
|
156
|
+
nrows=1,
|
157
|
+
)
|
158
|
+
headers.append(df.columns.values.tolist())
|
159
|
+
return headers
|
160
|
+
|
161
|
+
def set_datatypes(self):
|
162
|
+
dtypes = {}
|
163
|
+
for field, dtype in self.datatypes.items():
|
164
|
+
if dtype == "uint8":
|
165
|
+
dtypes[field] = np.uint8
|
166
|
+
elif dtype == "uint16":
|
167
|
+
dtypes[field] = np.uint16
|
168
|
+
elif dtype == "uint32":
|
169
|
+
dtypes[field] = np.uint32
|
170
|
+
elif dtype == "int8":
|
171
|
+
dtypes[field] = np.int8
|
172
|
+
elif dtype == "int16":
|
173
|
+
dtypes[field] = np.int16
|
174
|
+
elif dtype == "int32":
|
175
|
+
dtypes[field] = np.int32
|
176
|
+
elif dtype == "float":
|
177
|
+
dtypes[field] = float
|
178
|
+
elif dtype == "float32":
|
179
|
+
dtypes[field] = float
|
180
|
+
elif dtype in ("string", "varchar", "str"):
|
181
|
+
dtypes[field] = "str"
|
182
|
+
elif dtype == "object":
|
183
|
+
dtypes[field] = object
|
184
|
+
else:
|
185
|
+
# invalid datatype
|
186
|
+
raise ComponentError(
|
187
|
+
f"Invalid DataType value: {field} for field {dtype}"
|
188
|
+
)
|
189
|
+
if dtypes:
|
190
|
+
self.args["dtype"] = dtypes
|
191
|
+
|
192
|
+
async def open_excel(
|
193
|
+
self, filename: str, add_columns: dict, encoding
|
194
|
+
) -> pandas.DataFrame:
|
195
|
+
self._logger.debug(
|
196
|
+
f"Opening Excel file {filename} with Pandas, encoding: {encoding}"
|
197
|
+
)
|
198
|
+
if self.mime == "text/xml":
|
199
|
+
xmlparser = ExcelHandler()
|
200
|
+
parse(filename, xmlparser)
|
201
|
+
if hasattr(self, "skiprows"):
|
202
|
+
row = self.skiprows
|
203
|
+
columns = self.skiprows + 1
|
204
|
+
start = columns + 1
|
205
|
+
else:
|
206
|
+
row = 0
|
207
|
+
columns = 0
|
208
|
+
start = columns + 1
|
209
|
+
try:
|
210
|
+
if (
|
211
|
+
hasattr(self, "add_columns") and hasattr(self, "rename")
|
212
|
+
and self.rename is True
|
213
|
+
):
|
214
|
+
cols = add_columns
|
215
|
+
else:
|
216
|
+
cols = xmlparser.tables[0][columns]
|
217
|
+
df = pandas.DataFrame(data=xmlparser.tables[0][start:], columns=cols)
|
218
|
+
return df
|
219
|
+
except pandas.errors.EmptyDataError as err:
|
220
|
+
raise EmptyFile(f"Empty File {filename}: {err}") from err
|
221
|
+
except pandas.errors.ParserError as err:
|
222
|
+
raise ComponentError(f"Parsing File {filename}: {err}") from err
|
223
|
+
except Exception as err:
|
224
|
+
raise ComponentError(
|
225
|
+
f"Generic Error on file {filename}, error: {err}"
|
226
|
+
) from err
|
227
|
+
else:
|
228
|
+
if (
|
229
|
+
self.mime == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
230
|
+
):
|
231
|
+
# xlsx or any openxml based document
|
232
|
+
file_engine = self._params.get("file_engine", "openpyxl")
|
233
|
+
elif self.mime == "application/vnd.ms-excel.sheet.binary.macroEnabled.12":
|
234
|
+
file_engine = self._params.get("file_engine", "pyxlsb")
|
235
|
+
else:
|
236
|
+
try:
|
237
|
+
ext = filename.suffix
|
238
|
+
except (AttributeError, ValueError) as e:
|
239
|
+
print(f"Error detecting extension: {e}")
|
240
|
+
ext = ".xls"
|
241
|
+
if ext == ".xls":
|
242
|
+
file_engine = self._params.get("file_engine", "xlrd")
|
243
|
+
else:
|
244
|
+
file_engine = self._params.get("file_engine", "calamine")
|
245
|
+
try:
|
246
|
+
arguments = {**self.args, **add_columns, **self.parse_dates}
|
247
|
+
if self._limit is not None and isinstance(self._limit, int):
|
248
|
+
arguments["nrows"] = self._limit
|
249
|
+
if self.sheet_name is not None:
|
250
|
+
arguments["sheet_name"] = self.sheet_name
|
251
|
+
# TODO: if sheet_name is None, then open all worksheets
|
252
|
+
# work with the dictionary of dataframes.
|
253
|
+
df = pandas.read_excel(
|
254
|
+
filename,
|
255
|
+
na_values=self.na_values,
|
256
|
+
na_filter=self.filter_nan,
|
257
|
+
engine=file_engine,
|
258
|
+
keep_default_na=False,
|
259
|
+
**arguments,
|
260
|
+
)
|
261
|
+
return df
|
262
|
+
except (IndexError, xlrd.biffh.XLRDError) as err:
|
263
|
+
raise ComponentError(
|
264
|
+
f"Excel Index error on File {filename}: {err}"
|
265
|
+
) from err
|
266
|
+
except pandas.errors.EmptyDataError as err:
|
267
|
+
raise EmptyFile(f"Empty File {filename}: {err}") from err
|
268
|
+
except pandas.errors.ParserError as err:
|
269
|
+
raise ComponentError(f"Error Parsing File {filename}: {err}") from err
|
270
|
+
except Exception as err:
|
271
|
+
raise ComponentError(
|
272
|
+
f"Generic Error on file {filename}, error: {err}"
|
273
|
+
) from err
|
274
|
+
|
275
|
+
async def open_html(
|
276
|
+
self, filename: str, add_columns: dict, encoding: str
|
277
|
+
) -> pandas.DataFrame:
|
278
|
+
self._logger.debug(
|
279
|
+
f"Opening an HTML file {filename} with Pandas, encoding={encoding}"
|
280
|
+
)
|
281
|
+
if "dtype" in self.args:
|
282
|
+
del self.args["dtype"]
|
283
|
+
if "skiprows" in self.args:
|
284
|
+
del self.args["skiprows"]
|
285
|
+
try:
|
286
|
+
dfs = pandas.read_html(
|
287
|
+
filename,
|
288
|
+
keep_default_na=False,
|
289
|
+
flavor="html5lib",
|
290
|
+
na_values=self.na_values,
|
291
|
+
encoding=encoding,
|
292
|
+
**self.parse_dates,
|
293
|
+
**self.args,
|
294
|
+
)
|
295
|
+
if dfs:
|
296
|
+
df = dfs[0]
|
297
|
+
else:
|
298
|
+
df = None
|
299
|
+
if "names" in add_columns:
|
300
|
+
df.columns = add_columns["names"]
|
301
|
+
return df
|
302
|
+
except pandas.errors.EmptyDataError as err:
|
303
|
+
raise EmptyFile(message=f"Empty File {filename}: {err}") from err
|
304
|
+
except pandas.errors.ParserError as err:
|
305
|
+
raise ComponentError(message=f"Parsing File {filename}: {err}") from err
|
306
|
+
except Exception as err:
|
307
|
+
raise ComponentError(
|
308
|
+
message=f"Generic Error on file {filename}: {err}"
|
309
|
+
) from err
|
310
|
+
|
311
|
+
async def open_parquet(
|
312
|
+
self, filename: str, add_columns: dict, encoding
|
313
|
+
) -> pandas.DataFrame:
|
314
|
+
pass
|
315
|
+
|
316
|
+
async def open_sql(
|
317
|
+
self, filename: str, add_columns: dict, encoding
|
318
|
+
) -> pandas.DataFrame:
|
319
|
+
pass
|
320
|
+
|
321
|
+
async def open_json(
|
322
|
+
self, filename: str, add_columns: dict, encoding: str
|
323
|
+
) -> pandas.DataFrame:
|
324
|
+
self._logger.debug(
|
325
|
+
f"Opening a JSON file {filename} with Pandas, encoding={encoding}"
|
326
|
+
)
|
327
|
+
# TODO: add columns functionality.
|
328
|
+
try:
|
329
|
+
df = pandas.read_json(
|
330
|
+
filename, orient="records", encoding=encoding, **self.args
|
331
|
+
)
|
332
|
+
return df
|
333
|
+
except pandas.errors.EmptyDataError as err:
|
334
|
+
raise EmptyFile(message=f"Empty File {filename}: {err}") from err
|
335
|
+
except pandas.errors.ParserError as err:
|
336
|
+
raise ComponentError(
|
337
|
+
message=f"Error Parsing File {filename}: {err}"
|
338
|
+
) from err
|
339
|
+
except Exception as err:
|
340
|
+
raise ComponentError(
|
341
|
+
message=f"Generic Error on file {filename}: {err}"
|
342
|
+
) from err
|
343
|
+
|
344
|
+
async def open_csv(
|
345
|
+
self, filename: str, add_columns: dict, encoding
|
346
|
+
) -> pandas.DataFrame:
|
347
|
+
self._logger.debug(
|
348
|
+
f"Opening CSV file {filename} with Pandas, encoding={encoding}"
|
349
|
+
)
|
350
|
+
try:
|
351
|
+
add_columns["low_memory"] = False
|
352
|
+
add_columns["float_precision"] = "high"
|
353
|
+
except KeyError:
|
354
|
+
pass
|
355
|
+
try:
|
356
|
+
# can we use pyarrow.
|
357
|
+
engine = self.args["engine"]
|
358
|
+
del self.args["engine"]
|
359
|
+
except KeyError:
|
360
|
+
engine = "c"
|
361
|
+
if self._limit is not None and isinstance(self._limit, int):
|
362
|
+
add_columns["nrows"] = self._limit
|
363
|
+
# try to fix the encoding problem on files:
|
364
|
+
_, new_encoding = detect_encoding(filename, encoding)
|
365
|
+
if new_encoding != encoding:
|
366
|
+
self._logger.warning(
|
367
|
+
f"Encoding on file: {new_encoding} and \
|
368
|
+
declared by Task ({encoding}) are different"
|
369
|
+
)
|
370
|
+
# encoding = new_encoding
|
371
|
+
# open file:
|
372
|
+
if hasattr(self, "bigfile"):
|
373
|
+
try:
|
374
|
+
tp = pandas.read_csv(
|
375
|
+
filename,
|
376
|
+
sep=self.separator,
|
377
|
+
decimal=",",
|
378
|
+
engine=engine,
|
379
|
+
keep_default_na=False,
|
380
|
+
na_values=self.na_values,
|
381
|
+
na_filter=self.filter_nan,
|
382
|
+
encoding=encoding,
|
383
|
+
skipinitialspace=True,
|
384
|
+
iterator=True,
|
385
|
+
chunksize=int(self.chunksize),
|
386
|
+
**add_columns,
|
387
|
+
**self.parse_dates,
|
388
|
+
**self.args,
|
389
|
+
)
|
390
|
+
return pandas.concat(tp, ignore_index=True)
|
391
|
+
except pandas.errors.EmptyDataError as err:
|
392
|
+
raise ComponentError(
|
393
|
+
f"Empty Data File on: {filename}, error: {err}"
|
394
|
+
) from err
|
395
|
+
except Exception as err:
|
396
|
+
raise ComponentError(
|
397
|
+
f"Generic Error on file: {filename}, error: {err}"
|
398
|
+
) from err
|
399
|
+
else:
|
400
|
+
try:
|
401
|
+
return pandas.read_csv(
|
402
|
+
filename,
|
403
|
+
sep=self.separator,
|
404
|
+
quotechar='"',
|
405
|
+
decimal=",",
|
406
|
+
engine=engine,
|
407
|
+
keep_default_na=False,
|
408
|
+
na_values=self.na_values,
|
409
|
+
na_filter=self.filter_nan,
|
410
|
+
encoding=encoding,
|
411
|
+
skipinitialspace=True,
|
412
|
+
**add_columns,
|
413
|
+
**self.parse_dates,
|
414
|
+
**self.args,
|
415
|
+
)
|
416
|
+
except UnicodeDecodeError as exc:
|
417
|
+
self._logger.warning(
|
418
|
+
f"Invalid Encoding {encoding}: {exc}"
|
419
|
+
)
|
420
|
+
# fallback to a default unicode:
|
421
|
+
_, encoding = detect_encoding(filename, encoding)
|
422
|
+
self._logger.debug(f"Detected Encoding > {encoding!s}")
|
423
|
+
last_encoding = None
|
424
|
+
fname = filename
|
425
|
+
if hasattr(self, 'clean_null_bytes'):
|
426
|
+
async with aiofiles.open(filename, 'rb') as file:
|
427
|
+
# Removing all null bytes
|
428
|
+
content = await file.read()
|
429
|
+
content = content.replace(b'\x00', b'')
|
430
|
+
fname = BytesIO(content)
|
431
|
+
for enc in ('utf-8', 'latin1', 'ascii'):
|
432
|
+
last_encoding = enc
|
433
|
+
try:
|
434
|
+
return pandas.read_csv(
|
435
|
+
fname,
|
436
|
+
sep=self.separator,
|
437
|
+
quotechar='"',
|
438
|
+
decimal=",",
|
439
|
+
engine=engine,
|
440
|
+
keep_default_na=False,
|
441
|
+
na_values=self.na_values,
|
442
|
+
na_filter=self.filter_nan,
|
443
|
+
encoding=enc,
|
444
|
+
skipinitialspace=True,
|
445
|
+
on_bad_lines='warn',
|
446
|
+
**add_columns,
|
447
|
+
**self.parse_dates,
|
448
|
+
**self.args,
|
449
|
+
)
|
450
|
+
except Exception as e:
|
451
|
+
print(e)
|
452
|
+
continue
|
453
|
+
else:
|
454
|
+
# No encoding match
|
455
|
+
raise ComponentError(
|
456
|
+
f"Cannot Open the file with encoding {last_encoding}"
|
457
|
+
)
|
458
|
+
except ValueError as exc:
|
459
|
+
# Open Pandas with default settings for detect discrepancies
|
460
|
+
df = pandas.read_csv(
|
461
|
+
filename,
|
462
|
+
sep=self.separator,
|
463
|
+
quotechar='"',
|
464
|
+
decimal=",",
|
465
|
+
engine=engine,
|
466
|
+
encoding=encoding,
|
467
|
+
dtype=str,
|
468
|
+
header=None,
|
469
|
+
)
|
470
|
+
# columns in Pandas:
|
471
|
+
num_cols = int(df.shape[1])
|
472
|
+
expected = len(add_columns.get('names', []))
|
473
|
+
if expected > 0 and num_cols - expected > 0:
|
474
|
+
# some extra columns were found:
|
475
|
+
raise ComponentError(
|
476
|
+
(
|
477
|
+
f"There are more columns in FILE than expected. "
|
478
|
+
f"There are {num_cols} in File received vs "
|
479
|
+
f"{expected} columns in Mapping definition."
|
480
|
+
)
|
481
|
+
)
|
482
|
+
try:
|
483
|
+
del self.args['dtype']
|
484
|
+
except KeyError:
|
485
|
+
pass
|
486
|
+
self._logger.error(
|
487
|
+
(
|
488
|
+
f"Some columns have wrong type in Model: {exc}, "
|
489
|
+
"Opening file with default settings (str)"
|
490
|
+
)
|
491
|
+
)
|
492
|
+
try:
|
493
|
+
return pandas.read_csv(
|
494
|
+
filename,
|
495
|
+
sep=self.separator,
|
496
|
+
quotechar='"',
|
497
|
+
decimal=",",
|
498
|
+
engine=engine,
|
499
|
+
keep_default_na=False,
|
500
|
+
na_values=self.na_values,
|
501
|
+
na_filter=self.filter_nan,
|
502
|
+
encoding=encoding,
|
503
|
+
**add_columns,
|
504
|
+
**self.parse_dates,
|
505
|
+
**self.args,
|
506
|
+
)
|
507
|
+
except Exception as ex:
|
508
|
+
raise ComponentError(
|
509
|
+
f"Invalid types of columns found on file {filename}, {ex}"
|
510
|
+
)
|
511
|
+
except pandas.errors.EmptyDataError as err:
|
512
|
+
raise ComponentError(
|
513
|
+
f"Empty Data in file: {filename}, error: {err}"
|
514
|
+
) from err
|
515
|
+
except pandas.errors.ParserError as err:
|
516
|
+
raise ComponentError(
|
517
|
+
f"Error parsing File: {filename}, error: {err}"
|
518
|
+
) from err
|
519
|
+
except Exception as err:
|
520
|
+
raise ComponentError(
|
521
|
+
f"Generic Error on file: {filename}, error: {err}"
|
522
|
+
) from err
|
523
|
+
|
524
|
+
async def run(self) -> Any:
|
525
|
+
await super(OpenWithPandas, self).run()
|
526
|
+
add_columns = await self.colinfo()
|
527
|
+
result = []
|
528
|
+
df = None
|
529
|
+
## Define NA Values:
|
530
|
+
default_missing = STR_NA_VALUES.copy()
|
531
|
+
if self.remove_empty_strings is True:
|
532
|
+
try:
|
533
|
+
default_missing.remove("")
|
534
|
+
except KeyError:
|
535
|
+
pass
|
536
|
+
for val in self.na_values: # pylint: disable=E0203
|
537
|
+
default_missing.add(val)
|
538
|
+
default_missing.add(val)
|
539
|
+
self.na_values = default_missing
|
540
|
+
if self._filenames is None and not check_empty(self._data):
|
541
|
+
if isinstance(self._data, list):
|
542
|
+
for file in self._data:
|
543
|
+
try:
|
544
|
+
df = pandas.DataFrame(
|
545
|
+
data=file, **add_columns, **self.parse_dates, **self.args
|
546
|
+
)
|
547
|
+
result.append(df)
|
548
|
+
except pandas.errors.EmptyDataError as err:
|
549
|
+
raise ComponentError(
|
550
|
+
f"Error on Empty Data: error: {err}"
|
551
|
+
) from err
|
552
|
+
except ValueError as err:
|
553
|
+
raise ComponentError(
|
554
|
+
f"Error parsing Data: error: {err}"
|
555
|
+
) from err
|
556
|
+
except Exception as err:
|
557
|
+
raise ComponentError(
|
558
|
+
f"Generic Error on Data: error: {err}"
|
559
|
+
) from err
|
560
|
+
if df is None or df.empty:
|
561
|
+
raise DataNotFound("Dataframe is Empty: Data not found")
|
562
|
+
else:
|
563
|
+
# itereate over all files or data
|
564
|
+
self._variables["FILENAMES"] = self._filenames
|
565
|
+
for filename in self._filenames:
|
566
|
+
try:
|
567
|
+
encoding = self.check_encoding(filename)
|
568
|
+
except Exception:
|
569
|
+
encoding = "UTF-8"
|
570
|
+
if self.mime == "text/csv" or self.mime == "text/plain":
|
571
|
+
try:
|
572
|
+
df = await self.open_csv(filename, add_columns, encoding)
|
573
|
+
if isinstance(filename, PurePath):
|
574
|
+
self.add_metric(f"{filename.name}", len(df.index))
|
575
|
+
else:
|
576
|
+
self.add_metric(f"{filename}", len(df.index))
|
577
|
+
except Exception as err:
|
578
|
+
raise ComponentError(f"Encoding Error: {err}") from err
|
579
|
+
if hasattr(self, "add_columns") and hasattr(self, "rename"):
|
580
|
+
if self.rename is True:
|
581
|
+
df = df.drop(df.index[0])
|
582
|
+
elif self.mime in excel_based:
|
583
|
+
try:
|
584
|
+
df = await self.open_excel(filename, add_columns, encoding)
|
585
|
+
except Exception as err:
|
586
|
+
raise ComponentError(
|
587
|
+
f"Error parsing Excel: {err}"
|
588
|
+
) from err
|
589
|
+
elif self.mime == "text/html" or self.mime == "application/html":
|
590
|
+
try:
|
591
|
+
df = await self.open_html(filename, add_columns, encoding)
|
592
|
+
except Exception as err:
|
593
|
+
raise ComponentError(f"Error parsing XML: {err}") from err
|
594
|
+
elif self.mime == "application/json":
|
595
|
+
try:
|
596
|
+
df = await self.open_json(filename, add_columns, encoding)
|
597
|
+
except Exception as err:
|
598
|
+
raise ComponentError(f"Error parsing JSON: {err}") from err
|
599
|
+
else:
|
600
|
+
raise ComponentError(f"Try to Open invalid MIME Type: {self.mime}")
|
601
|
+
if df is None or df.empty:
|
602
|
+
raise EmptyFile(f"Empty File {filename}")
|
603
|
+
result.append(df)
|
604
|
+
# at the end, concat the sources:
|
605
|
+
if len(result) == 1:
|
606
|
+
df = result[0]
|
607
|
+
else:
|
608
|
+
## fix Pandas Concat
|
609
|
+
if self.no_multi is True: # get only one element
|
610
|
+
df = result.pop()
|
611
|
+
else:
|
612
|
+
try:
|
613
|
+
df = pandas.concat(
|
614
|
+
result # , ignore_index=True # , sort=False, axis=0,
|
615
|
+
) # .reindex(result[0].index)
|
616
|
+
except Exception as err:
|
617
|
+
raise ComponentError(
|
618
|
+
f"Error Combining Resultset Dataframes: {err}"
|
619
|
+
) from err
|
620
|
+
# post-processing:
|
621
|
+
if hasattr(self, "remove_scientific_notation"):
|
622
|
+
pandas.set_option("display.float_format", lambda x: "%.3f" % x)
|
623
|
+
if hasattr(self, "drop_empty"):
|
624
|
+
df.dropna(axis=1, how="all", inplace=True)
|
625
|
+
df.dropna(axis=0, how="all", inplace=True)
|
626
|
+
df = df.loc[:, ~df.columns.str.contains("^Unnamed")]
|
627
|
+
if hasattr(self, "dropna"):
|
628
|
+
df.dropna(subset=self.dropna, how="all", inplace=True)
|
629
|
+
if hasattr(self, "trim"):
|
630
|
+
# cols = list(df.columns)
|
631
|
+
cols = df.select_dtypes(include=["object", "string"])
|
632
|
+
# def utrim(x): return x.strip() if isinstance(x, str) else x
|
633
|
+
# u.applymap(utrim)
|
634
|
+
for col in cols:
|
635
|
+
df[col] = df[col].astype(str).str.strip()
|
636
|
+
# define the primary keys for DataFrame
|
637
|
+
if hasattr(self, "pk"):
|
638
|
+
try:
|
639
|
+
columns = self.pk["columns"]
|
640
|
+
del self.pk["columns"]
|
641
|
+
df.reset_index().set_index(columns, inplace=True, drop=False, **self.pk)
|
642
|
+
except Exception as err:
|
643
|
+
self._logger.error(f"OpenWith: Error setting index: {err}")
|
644
|
+
if self.clean_nat is True:
|
645
|
+
df.replace({pandas.NaT: None}, inplace=True)
|
646
|
+
if self._colinfo:
|
647
|
+
# fix the datatype for every column in dataframe (if needed)
|
648
|
+
for column, dtype in self._colinfo.items():
|
649
|
+
# print(column, '->', dtype, '->', df[column].iloc[0])
|
650
|
+
try:
|
651
|
+
if (
|
652
|
+
dtype == "timestamp without time zone"
|
653
|
+
or dtype == "timestamp with time zone"
|
654
|
+
or dtype == "date"
|
655
|
+
):
|
656
|
+
if df[column].dtype != "datetime64[ns]":
|
657
|
+
df[column] = pandas.to_datetime(df[column], errors="coerce")
|
658
|
+
df[column] = df[column].astype("datetime64[ns]")
|
659
|
+
elif (
|
660
|
+
dtype == "character varying"
|
661
|
+
or dtype == "character"
|
662
|
+
or dtype == "text"
|
663
|
+
or dtype == "varchar"
|
664
|
+
):
|
665
|
+
# print(column, '->', dtype, '->', df[column].iloc[0])
|
666
|
+
df[column] = df[column].replace([np.nan], "", regex=True)
|
667
|
+
# df[column] = df[column].astype(str)
|
668
|
+
# df[column].fillna("", inplace=True)
|
669
|
+
df[column] = df[column].fillna("")
|
670
|
+
# df[column].astype(str, inplace=True, errors='coerce')
|
671
|
+
df[column] = df[column].astype("string", errors="raise")
|
672
|
+
# df[column].fillna(None, inplace=True)
|
673
|
+
elif dtype == "smallint":
|
674
|
+
df[column] = pandas.to_numeric(df[column], errors="coerce")
|
675
|
+
df[column] = df[column].fillna("").astype("Int8")
|
676
|
+
elif dtype == "integer" or dtype == "bigint":
|
677
|
+
try:
|
678
|
+
ctype = df[column].dtypes[0].name
|
679
|
+
except (TypeError, KeyError):
|
680
|
+
ctype = df[column].dtype
|
681
|
+
if ctype not in ("Int8", "Int32", "Int64"):
|
682
|
+
df[column] = pandas.to_numeric(df[column], errors="raise")
|
683
|
+
df[column] = df[column].astype("Int64", errors="raise")
|
684
|
+
else:
|
685
|
+
df[column] = df[column].astype("Int64", errors="raise")
|
686
|
+
elif dtype == "numeric" or dtype == "float":
|
687
|
+
df[column] = pandas.to_numeric(df[column], errors="coerce")
|
688
|
+
df[column] = df[column].astype("float64")
|
689
|
+
elif dtype == "double precision" or dtype == "real":
|
690
|
+
df[column] = pandas.to_numeric(df[column], errors="coerce")
|
691
|
+
df[column] = df[column].astype("float64")
|
692
|
+
elif dtype == "jsonb":
|
693
|
+
df[column] = df[column].apply(orjson.loads)
|
694
|
+
elif dtype == "object":
|
695
|
+
df[column] = df[column].replace([np.nan], "", regex=True)
|
696
|
+
except Exception as err:
|
697
|
+
print("ERR ::", column, dtype, err, type(err))
|
698
|
+
self._logger.warning(
|
699
|
+
f"Cannot set data type for column {column}: {err}"
|
700
|
+
)
|
701
|
+
continue
|
702
|
+
self._result = df
|
703
|
+
numrows = len(df.index)
|
704
|
+
self._variables["_numRows_"] = numrows
|
705
|
+
self._variables[f"{self.StepName}_NUMROWS"] = numrows
|
706
|
+
self.add_metric("NUMROWS", numrows)
|
707
|
+
self.add_metric("OPENED_FILES", self._filenames)
|
708
|
+
if self._debug is True:
|
709
|
+
print(df)
|
710
|
+
print("::: Printing Column Information === ")
|
711
|
+
columns = list(df.columns)
|
712
|
+
for column, t in df.dtypes.items():
|
713
|
+
print(column, "->", t, "->", df[column].iloc[0])
|
714
|
+
self._logger.debug(f"Opened File(s) with Pandas {self._filenames}")
|
715
|
+
return self._result
|