flowtask 5.8.4__cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowtask/__init__.py +93 -0
- flowtask/__main__.py +38 -0
- flowtask/bots/__init__.py +6 -0
- flowtask/bots/check.py +93 -0
- flowtask/bots/codebot.py +51 -0
- flowtask/components/ASPX.py +148 -0
- flowtask/components/AddDataset.py +352 -0
- flowtask/components/Amazon.py +523 -0
- flowtask/components/AutoTask.py +314 -0
- flowtask/components/Azure.py +80 -0
- flowtask/components/AzureUsers.py +106 -0
- flowtask/components/BaseAction.py +91 -0
- flowtask/components/BaseLoop.py +198 -0
- flowtask/components/BestBuy.py +800 -0
- flowtask/components/CSVToGCS.py +120 -0
- flowtask/components/CompanyScraper/__init__.py +1 -0
- flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
- flowtask/components/CompanyScraper/parsers/base.py +102 -0
- flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
- flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
- flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
- flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
- flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
- flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
- flowtask/components/CompanyScraper/scrapper.py +1054 -0
- flowtask/components/CopyTo.py +177 -0
- flowtask/components/CopyToBigQuery.py +243 -0
- flowtask/components/CopyToMongoDB.py +291 -0
- flowtask/components/CopyToPg.py +609 -0
- flowtask/components/CopyToRethink.py +207 -0
- flowtask/components/CreateGCSBucket.py +102 -0
- flowtask/components/CreateReport/CreateReport.py +228 -0
- flowtask/components/CreateReport/__init__.py +9 -0
- flowtask/components/CreateReport/charts/__init__.py +15 -0
- flowtask/components/CreateReport/charts/bar.py +51 -0
- flowtask/components/CreateReport/charts/base.py +66 -0
- flowtask/components/CreateReport/charts/pie.py +64 -0
- flowtask/components/CreateReport/utils.py +9 -0
- flowtask/components/CustomerSatisfaction.py +196 -0
- flowtask/components/DataInput.py +200 -0
- flowtask/components/DateList.py +255 -0
- flowtask/components/DbClient.py +163 -0
- flowtask/components/DialPad.py +146 -0
- flowtask/components/DocumentDBQuery.py +200 -0
- flowtask/components/DownloadFrom.py +371 -0
- flowtask/components/DownloadFromD2L.py +113 -0
- flowtask/components/DownloadFromFTP.py +181 -0
- flowtask/components/DownloadFromIMAP.py +315 -0
- flowtask/components/DownloadFromS3.py +198 -0
- flowtask/components/DownloadFromSFTP.py +265 -0
- flowtask/components/DownloadFromSharepoint.py +110 -0
- flowtask/components/DownloadFromSmartSheet.py +114 -0
- flowtask/components/DownloadS3File.py +229 -0
- flowtask/components/Dummy.py +59 -0
- flowtask/components/DuplicatePhoto.py +411 -0
- flowtask/components/EmployeeEvaluation.py +237 -0
- flowtask/components/ExecuteSQL.py +323 -0
- flowtask/components/ExtractHTML.py +178 -0
- flowtask/components/FileBase.py +178 -0
- flowtask/components/FileCopy.py +181 -0
- flowtask/components/FileDelete.py +82 -0
- flowtask/components/FileExists.py +146 -0
- flowtask/components/FileIteratorDelete.py +112 -0
- flowtask/components/FileList.py +194 -0
- flowtask/components/FileOpen.py +75 -0
- flowtask/components/FileRead.py +120 -0
- flowtask/components/FileRename.py +106 -0
- flowtask/components/FilterIf.py +284 -0
- flowtask/components/FilterRows/FilterRows.py +200 -0
- flowtask/components/FilterRows/__init__.py +10 -0
- flowtask/components/FilterRows/functions.py +4 -0
- flowtask/components/GCSToBigQuery.py +103 -0
- flowtask/components/GoogleA4.py +150 -0
- flowtask/components/GoogleGeoCoding.py +344 -0
- flowtask/components/GooglePlaces.py +315 -0
- flowtask/components/GoogleSearch.py +539 -0
- flowtask/components/HTTPClient.py +268 -0
- flowtask/components/ICIMS.py +146 -0
- flowtask/components/IF.py +179 -0
- flowtask/components/IcimsFolderCopy.py +173 -0
- flowtask/components/ImageFeatures/__init__.py +5 -0
- flowtask/components/ImageFeatures/process.py +233 -0
- flowtask/components/IteratorBase.py +251 -0
- flowtask/components/LangchainLoader/__init__.py +5 -0
- flowtask/components/LangchainLoader/loader.py +194 -0
- flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
- flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
- flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
- flowtask/components/LangchainLoader/loaders/docx.py +91 -0
- flowtask/components/LangchainLoader/loaders/html.py +119 -0
- flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
- flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
- flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
- flowtask/components/LangchainLoader/loaders/qa.py +67 -0
- flowtask/components/LangchainLoader/loaders/txt.py +55 -0
- flowtask/components/LeadIQ.py +650 -0
- flowtask/components/Loop.py +253 -0
- flowtask/components/Lowes.py +334 -0
- flowtask/components/MS365Usage.py +156 -0
- flowtask/components/MSTeamsMessages.py +320 -0
- flowtask/components/MarketClustering.py +1051 -0
- flowtask/components/MergeFiles.py +362 -0
- flowtask/components/MilvusOutput.py +87 -0
- flowtask/components/NearByStores.py +175 -0
- flowtask/components/NetworkNinja/__init__.py +6 -0
- flowtask/components/NetworkNinja/models/__init__.py +52 -0
- flowtask/components/NetworkNinja/models/abstract.py +177 -0
- flowtask/components/NetworkNinja/models/account.py +39 -0
- flowtask/components/NetworkNinja/models/client.py +19 -0
- flowtask/components/NetworkNinja/models/district.py +14 -0
- flowtask/components/NetworkNinja/models/events.py +101 -0
- flowtask/components/NetworkNinja/models/forms.py +499 -0
- flowtask/components/NetworkNinja/models/market.py +16 -0
- flowtask/components/NetworkNinja/models/organization.py +34 -0
- flowtask/components/NetworkNinja/models/photos.py +125 -0
- flowtask/components/NetworkNinja/models/project.py +44 -0
- flowtask/components/NetworkNinja/models/region.py +28 -0
- flowtask/components/NetworkNinja/models/store.py +203 -0
- flowtask/components/NetworkNinja/models/user.py +151 -0
- flowtask/components/NetworkNinja/router.py +854 -0
- flowtask/components/Odoo.py +175 -0
- flowtask/components/OdooInjector.py +192 -0
- flowtask/components/OpenFromXML.py +126 -0
- flowtask/components/OpenWeather.py +41 -0
- flowtask/components/OpenWithBase.py +616 -0
- flowtask/components/OpenWithPandas.py +715 -0
- flowtask/components/PGPDecrypt.py +199 -0
- flowtask/components/PandasIterator.py +187 -0
- flowtask/components/PandasToFile.py +189 -0
- flowtask/components/Paradox.py +339 -0
- flowtask/components/ParamIterator.py +117 -0
- flowtask/components/ParseHTML.py +84 -0
- flowtask/components/PlacerStores.py +249 -0
- flowtask/components/Pokemon.py +507 -0
- flowtask/components/PositiveBot.py +62 -0
- flowtask/components/PowerPointSlide.py +400 -0
- flowtask/components/PrintMessage.py +127 -0
- flowtask/components/ProductCompetitors/__init__.py +5 -0
- flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
- flowtask/components/ProductCompetitors/parsers/base.py +72 -0
- flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
- flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
- flowtask/components/ProductCompetitors/scrapper.py +155 -0
- flowtask/components/ProductCompliant.py +169 -0
- flowtask/components/ProductInfo/__init__.py +1 -0
- flowtask/components/ProductInfo/parsers/__init__.py +5 -0
- flowtask/components/ProductInfo/parsers/base.py +83 -0
- flowtask/components/ProductInfo/parsers/brother.py +97 -0
- flowtask/components/ProductInfo/parsers/canon.py +167 -0
- flowtask/components/ProductInfo/parsers/epson.py +118 -0
- flowtask/components/ProductInfo/parsers/hp.py +131 -0
- flowtask/components/ProductInfo/parsers/samsung.py +97 -0
- flowtask/components/ProductInfo/scraper.py +319 -0
- flowtask/components/ProductPricing.py +118 -0
- flowtask/components/QS.py +261 -0
- flowtask/components/QSBase.py +201 -0
- flowtask/components/QueryIterator.py +273 -0
- flowtask/components/QueryToInsert.py +327 -0
- flowtask/components/QueryToPandas.py +432 -0
- flowtask/components/RESTClient.py +195 -0
- flowtask/components/RethinkDBQuery.py +189 -0
- flowtask/components/Rsync.py +74 -0
- flowtask/components/RunSSH.py +59 -0
- flowtask/components/RunShell.py +71 -0
- flowtask/components/SalesForce.py +20 -0
- flowtask/components/SaveImageBank/__init__.py +257 -0
- flowtask/components/SchedulingVisits.py +592 -0
- flowtask/components/ScrapPage.py +216 -0
- flowtask/components/ScrapSearch.py +79 -0
- flowtask/components/SendNotify.py +257 -0
- flowtask/components/SentimentAnalysis.py +694 -0
- flowtask/components/ServiceScrapper/__init__.py +5 -0
- flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
- flowtask/components/ServiceScrapper/parsers/base.py +94 -0
- flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
- flowtask/components/ServiceScrapper/scrapper.py +199 -0
- flowtask/components/SetVariables.py +156 -0
- flowtask/components/SubTask.py +182 -0
- flowtask/components/SuiteCRM.py +48 -0
- flowtask/components/Switch.py +175 -0
- flowtask/components/TableBase.py +148 -0
- flowtask/components/TableDelete.py +312 -0
- flowtask/components/TableInput.py +143 -0
- flowtask/components/TableOutput/TableOutput.py +384 -0
- flowtask/components/TableOutput/__init__.py +3 -0
- flowtask/components/TableSchema.py +534 -0
- flowtask/components/Target.py +223 -0
- flowtask/components/ThumbnailGenerator.py +156 -0
- flowtask/components/ToPandas.py +67 -0
- flowtask/components/TransformRows/TransformRows.py +507 -0
- flowtask/components/TransformRows/__init__.py +9 -0
- flowtask/components/TransformRows/functions.py +559 -0
- flowtask/components/TransposeRows.py +176 -0
- flowtask/components/UPCDatabase.py +86 -0
- flowtask/components/UnGzip.py +171 -0
- flowtask/components/Uncompress.py +172 -0
- flowtask/components/UniqueRows.py +126 -0
- flowtask/components/Unzip.py +107 -0
- flowtask/components/UpdateOperationalVars.py +147 -0
- flowtask/components/UploadTo.py +299 -0
- flowtask/components/UploadToS3.py +136 -0
- flowtask/components/UploadToSFTP.py +160 -0
- flowtask/components/UploadToSharepoint.py +205 -0
- flowtask/components/UserFunc.py +122 -0
- flowtask/components/VivaTracker.py +140 -0
- flowtask/components/WSDLClient.py +123 -0
- flowtask/components/Wait.py +18 -0
- flowtask/components/Walmart.py +199 -0
- flowtask/components/Workplace.py +134 -0
- flowtask/components/XMLToPandas.py +267 -0
- flowtask/components/Zammad/__init__.py +41 -0
- flowtask/components/Zammad/models.py +0 -0
- flowtask/components/ZoomInfoScraper.py +409 -0
- flowtask/components/__init__.py +104 -0
- flowtask/components/abstract.py +18 -0
- flowtask/components/flow.py +530 -0
- flowtask/components/google.py +335 -0
- flowtask/components/group.py +221 -0
- flowtask/components/py.typed +0 -0
- flowtask/components/reviewscrap.py +132 -0
- flowtask/components/tAutoincrement.py +117 -0
- flowtask/components/tConcat.py +109 -0
- flowtask/components/tExplode.py +119 -0
- flowtask/components/tFilter.py +184 -0
- flowtask/components/tGroup.py +236 -0
- flowtask/components/tJoin.py +270 -0
- flowtask/components/tMap/__init__.py +9 -0
- flowtask/components/tMap/functions.py +54 -0
- flowtask/components/tMap/tMap.py +450 -0
- flowtask/components/tMelt.py +112 -0
- flowtask/components/tMerge.py +114 -0
- flowtask/components/tOrder.py +93 -0
- flowtask/components/tPandas.py +94 -0
- flowtask/components/tPivot.py +71 -0
- flowtask/components/tPluckCols.py +76 -0
- flowtask/components/tUnnest.py +82 -0
- flowtask/components/user.py +401 -0
- flowtask/conf.py +457 -0
- flowtask/download.py +102 -0
- flowtask/events/__init__.py +11 -0
- flowtask/events/events/__init__.py +20 -0
- flowtask/events/events/abstract.py +95 -0
- flowtask/events/events/alerts/__init__.py +362 -0
- flowtask/events/events/alerts/colfunctions.py +131 -0
- flowtask/events/events/alerts/functions.py +158 -0
- flowtask/events/events/dummy.py +12 -0
- flowtask/events/events/exec.py +124 -0
- flowtask/events/events/file/__init__.py +7 -0
- flowtask/events/events/file/base.py +51 -0
- flowtask/events/events/file/copy.py +23 -0
- flowtask/events/events/file/delete.py +16 -0
- flowtask/events/events/interfaces/__init__.py +9 -0
- flowtask/events/events/interfaces/client.py +67 -0
- flowtask/events/events/interfaces/credentials.py +28 -0
- flowtask/events/events/interfaces/notifications.py +58 -0
- flowtask/events/events/jira.py +122 -0
- flowtask/events/events/log.py +26 -0
- flowtask/events/events/logerr.py +52 -0
- flowtask/events/events/notify.py +59 -0
- flowtask/events/events/notify_event.py +160 -0
- flowtask/events/events/publish.py +54 -0
- flowtask/events/events/sendfile.py +104 -0
- flowtask/events/events/task.py +97 -0
- flowtask/events/events/teams.py +98 -0
- flowtask/events/events/webhook.py +58 -0
- flowtask/events/manager.py +287 -0
- flowtask/exceptions.c +39393 -0
- flowtask/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/extensions/__init__.py +3 -0
- flowtask/extensions/abstract.py +82 -0
- flowtask/extensions/logging/__init__.py +65 -0
- flowtask/hooks/__init__.py +9 -0
- flowtask/hooks/actions/__init__.py +22 -0
- flowtask/hooks/actions/abstract.py +66 -0
- flowtask/hooks/actions/dummy.py +23 -0
- flowtask/hooks/actions/jira.py +74 -0
- flowtask/hooks/actions/rest.py +320 -0
- flowtask/hooks/actions/sampledata.py +37 -0
- flowtask/hooks/actions/sensor.py +23 -0
- flowtask/hooks/actions/task.py +9 -0
- flowtask/hooks/actions/ticket.py +37 -0
- flowtask/hooks/actions/zammad.py +55 -0
- flowtask/hooks/hook.py +62 -0
- flowtask/hooks/models.py +17 -0
- flowtask/hooks/service.py +187 -0
- flowtask/hooks/step.py +91 -0
- flowtask/hooks/types/__init__.py +23 -0
- flowtask/hooks/types/base.py +129 -0
- flowtask/hooks/types/brokers/__init__.py +11 -0
- flowtask/hooks/types/brokers/base.py +54 -0
- flowtask/hooks/types/brokers/mqtt.py +35 -0
- flowtask/hooks/types/brokers/rabbitmq.py +82 -0
- flowtask/hooks/types/brokers/redis.py +83 -0
- flowtask/hooks/types/brokers/sqs.py +44 -0
- flowtask/hooks/types/fs.py +232 -0
- flowtask/hooks/types/http.py +49 -0
- flowtask/hooks/types/imap.py +200 -0
- flowtask/hooks/types/jira.py +279 -0
- flowtask/hooks/types/mail.py +205 -0
- flowtask/hooks/types/postgres.py +98 -0
- flowtask/hooks/types/responses/__init__.py +8 -0
- flowtask/hooks/types/responses/base.py +5 -0
- flowtask/hooks/types/sharepoint.py +288 -0
- flowtask/hooks/types/ssh.py +141 -0
- flowtask/hooks/types/tagged.py +59 -0
- flowtask/hooks/types/upload.py +85 -0
- flowtask/hooks/types/watch.py +71 -0
- flowtask/hooks/types/web.py +36 -0
- flowtask/interfaces/AzureClient.py +137 -0
- flowtask/interfaces/AzureGraph.py +839 -0
- flowtask/interfaces/Boto3Client.py +326 -0
- flowtask/interfaces/DropboxClient.py +173 -0
- flowtask/interfaces/ExcelHandler.py +94 -0
- flowtask/interfaces/FTPClient.py +131 -0
- flowtask/interfaces/GoogleCalendar.py +201 -0
- flowtask/interfaces/GoogleClient.py +133 -0
- flowtask/interfaces/GoogleDrive.py +127 -0
- flowtask/interfaces/GoogleGCS.py +89 -0
- flowtask/interfaces/GoogleGeocoding.py +93 -0
- flowtask/interfaces/GoogleLang.py +114 -0
- flowtask/interfaces/GooglePub.py +61 -0
- flowtask/interfaces/GoogleSheet.py +68 -0
- flowtask/interfaces/IMAPClient.py +137 -0
- flowtask/interfaces/O365Calendar.py +113 -0
- flowtask/interfaces/O365Client.py +220 -0
- flowtask/interfaces/OneDrive.py +284 -0
- flowtask/interfaces/Outlook.py +155 -0
- flowtask/interfaces/ParrotBot.py +130 -0
- flowtask/interfaces/SSHClient.py +378 -0
- flowtask/interfaces/Sharepoint.py +496 -0
- flowtask/interfaces/__init__.py +36 -0
- flowtask/interfaces/azureauth.py +119 -0
- flowtask/interfaces/cache.py +201 -0
- flowtask/interfaces/client.py +82 -0
- flowtask/interfaces/compress.py +525 -0
- flowtask/interfaces/credentials.py +124 -0
- flowtask/interfaces/d2l.py +239 -0
- flowtask/interfaces/databases/__init__.py +5 -0
- flowtask/interfaces/databases/db.py +223 -0
- flowtask/interfaces/databases/documentdb.py +55 -0
- flowtask/interfaces/databases/rethink.py +39 -0
- flowtask/interfaces/dataframes/__init__.py +11 -0
- flowtask/interfaces/dataframes/abstract.py +21 -0
- flowtask/interfaces/dataframes/arrow.py +71 -0
- flowtask/interfaces/dataframes/dt.py +69 -0
- flowtask/interfaces/dataframes/pandas.py +167 -0
- flowtask/interfaces/dataframes/polars.py +60 -0
- flowtask/interfaces/db.py +263 -0
- flowtask/interfaces/env.py +46 -0
- flowtask/interfaces/func.py +137 -0
- flowtask/interfaces/http.py +1780 -0
- flowtask/interfaces/locale.py +40 -0
- flowtask/interfaces/log.py +75 -0
- flowtask/interfaces/mask.py +143 -0
- flowtask/interfaces/notification.py +154 -0
- flowtask/interfaces/playwright.py +339 -0
- flowtask/interfaces/powerpoint.py +368 -0
- flowtask/interfaces/py.typed +0 -0
- flowtask/interfaces/qs.py +376 -0
- flowtask/interfaces/result.py +87 -0
- flowtask/interfaces/selenium_service.py +779 -0
- flowtask/interfaces/smartsheet.py +154 -0
- flowtask/interfaces/stat.py +39 -0
- flowtask/interfaces/task.py +96 -0
- flowtask/interfaces/template.py +118 -0
- flowtask/interfaces/vectorstores/__init__.py +1 -0
- flowtask/interfaces/vectorstores/abstract.py +133 -0
- flowtask/interfaces/vectorstores/milvus.py +669 -0
- flowtask/interfaces/zammad.py +107 -0
- flowtask/models.py +193 -0
- flowtask/parsers/__init__.py +15 -0
- flowtask/parsers/_yaml.c +11978 -0
- flowtask/parsers/_yaml.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/argparser.py +235 -0
- flowtask/parsers/base.c +15155 -0
- flowtask/parsers/base.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/json.c +11968 -0
- flowtask/parsers/json.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/maps.py +49 -0
- flowtask/parsers/toml.c +11968 -0
- flowtask/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/plugins/__init__.py +16 -0
- flowtask/plugins/components/__init__.py +0 -0
- flowtask/plugins/handler/__init__.py +45 -0
- flowtask/plugins/importer.py +31 -0
- flowtask/plugins/sources/__init__.py +0 -0
- flowtask/runner.py +283 -0
- flowtask/scheduler/__init__.py +9 -0
- flowtask/scheduler/functions.py +493 -0
- flowtask/scheduler/handlers/__init__.py +8 -0
- flowtask/scheduler/handlers/manager.py +504 -0
- flowtask/scheduler/handlers/models.py +58 -0
- flowtask/scheduler/handlers/service.py +72 -0
- flowtask/scheduler/notifications.py +65 -0
- flowtask/scheduler/scheduler.py +993 -0
- flowtask/services/__init__.py +0 -0
- flowtask/services/bots/__init__.py +0 -0
- flowtask/services/bots/telegram.py +264 -0
- flowtask/services/files/__init__.py +11 -0
- flowtask/services/files/manager.py +522 -0
- flowtask/services/files/model.py +37 -0
- flowtask/services/files/service.py +767 -0
- flowtask/services/jira/__init__.py +3 -0
- flowtask/services/jira/jira_actions.py +191 -0
- flowtask/services/tasks/__init__.py +13 -0
- flowtask/services/tasks/launcher.py +213 -0
- flowtask/services/tasks/manager.py +323 -0
- flowtask/services/tasks/service.py +275 -0
- flowtask/services/tasks/task_manager.py +376 -0
- flowtask/services/tasks/tasks.py +155 -0
- flowtask/storages/__init__.py +16 -0
- flowtask/storages/exceptions.py +12 -0
- flowtask/storages/files/__init__.py +8 -0
- flowtask/storages/files/abstract.py +29 -0
- flowtask/storages/files/filesystem.py +66 -0
- flowtask/storages/tasks/__init__.py +19 -0
- flowtask/storages/tasks/abstract.py +26 -0
- flowtask/storages/tasks/database.py +33 -0
- flowtask/storages/tasks/filesystem.py +108 -0
- flowtask/storages/tasks/github.py +119 -0
- flowtask/storages/tasks/memory.py +45 -0
- flowtask/storages/tasks/row.py +25 -0
- flowtask/tasks/__init__.py +0 -0
- flowtask/tasks/abstract.py +526 -0
- flowtask/tasks/command.py +118 -0
- flowtask/tasks/pile.py +486 -0
- flowtask/tasks/py.typed +0 -0
- flowtask/tasks/task.py +778 -0
- flowtask/template/__init__.py +161 -0
- flowtask/tests.py +257 -0
- flowtask/types/__init__.py +8 -0
- flowtask/types/typedefs.c +11347 -0
- flowtask/types/typedefs.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/utils/__init__.py +24 -0
- flowtask/utils/constants.py +117 -0
- flowtask/utils/encoders.py +21 -0
- flowtask/utils/executor.py +112 -0
- flowtask/utils/functions.cpp +14280 -0
- flowtask/utils/functions.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/utils/json.cpp +13349 -0
- flowtask/utils/json.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/utils/mail.py +63 -0
- flowtask/utils/parseqs.c +13324 -0
- flowtask/utils/parserqs.cpython-310-x86_64-linux-gnu.so +0 -0
- flowtask/utils/stats.py +308 -0
- flowtask/utils/transformations.py +74 -0
- flowtask/utils/uv.py +12 -0
- flowtask/utils/validators.py +97 -0
- flowtask/version.py +11 -0
- flowtask-5.8.4.dist-info/LICENSE +201 -0
- flowtask-5.8.4.dist-info/METADATA +209 -0
- flowtask-5.8.4.dist-info/RECORD +470 -0
- flowtask-5.8.4.dist-info/WHEEL +6 -0
- flowtask-5.8.4.dist-info/entry_points.txt +3 -0
- flowtask-5.8.4.dist-info/top_level.txt +2 -0
- plugins/components/CreateQR.py +39 -0
- plugins/components/TestComponent.py +28 -0
- plugins/components/Use1.py +13 -0
- plugins/components/Workplace.py +117 -0
- plugins/components/__init__.py +3 -0
- plugins/sources/__init__.py +0 -0
- plugins/sources/get_populartimes.py +78 -0
- plugins/sources/google.py +150 -0
- plugins/sources/hubspot.py +679 -0
- plugins/sources/icims.py +679 -0
- plugins/sources/mobileinsight.py +501 -0
- plugins/sources/newrelic.py +262 -0
- plugins/sources/uap.py +268 -0
- plugins/sources/venu.py +244 -0
- plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,616 @@
|
|
1
|
+
import os
|
2
|
+
from typing import Any
|
3
|
+
from collections.abc import Callable
|
4
|
+
from abc import abstractmethod
|
5
|
+
import logging
|
6
|
+
from io import StringIO
|
7
|
+
import pandas
|
8
|
+
import asyncio
|
9
|
+
import codecs
|
10
|
+
import mimetypes
|
11
|
+
from pathlib import Path, PurePath, PosixPath
|
12
|
+
from xml.sax import ContentHandler
|
13
|
+
import magic
|
14
|
+
import chardet
|
15
|
+
from asyncdb import AsyncDB
|
16
|
+
from asyncdb.exceptions import NoDataFound
|
17
|
+
from querysource.conf import asyncpg_url
|
18
|
+
from ..exceptions import FileNotFound, ComponentError
|
19
|
+
from ..utils.mail import MailMessage
|
20
|
+
from ..utils import check_empty
|
21
|
+
from ..parsers.maps import open_map, open_model
|
22
|
+
from ..conf import TASK_PATH, DEFAULT_ENCODING
|
23
|
+
from .flow import FlowComponent
|
24
|
+
from ..utils.constants import excel_based
|
25
|
+
|
26
|
+
|
27
|
+
supported_extensions = (
|
28
|
+
".xls",
|
29
|
+
".xlsx",
|
30
|
+
".xlsm",
|
31
|
+
".xlsb",
|
32
|
+
".xml",
|
33
|
+
".txt",
|
34
|
+
".csv",
|
35
|
+
".json",
|
36
|
+
".htm",
|
37
|
+
".html",
|
38
|
+
)
|
39
|
+
|
40
|
+
|
41
|
+
def detect_encoding(filename, encoding: str = "utf-8"):
|
42
|
+
bt = min(256, os.path.getsize(filename))
|
43
|
+
enc = ""
|
44
|
+
raw = open(filename, "rb").read(bt)
|
45
|
+
if raw.startswith(codecs.BOM_UTF8):
|
46
|
+
enc = "utf-8-sig"
|
47
|
+
elif raw.startswith(codecs.BOM_UTF16_LE):
|
48
|
+
enc = "utf-16le"
|
49
|
+
elif raw.startswith(codecs.BOM_UTF16):
|
50
|
+
enc = "utf-16"
|
51
|
+
elif raw.startswith(codecs.BOM_UTF16_BE):
|
52
|
+
enc = "utf-16be"
|
53
|
+
elif raw.startswith(codecs.BOM_UTF32_LE):
|
54
|
+
enc = "utf-32"
|
55
|
+
else:
|
56
|
+
try:
|
57
|
+
result = chardet.detect(raw)
|
58
|
+
if result["encoding"] in ("ASCII", "ascii"):
|
59
|
+
# let me try to repair the file:
|
60
|
+
content = None
|
61
|
+
with open(filename, "rb") as fp:
|
62
|
+
content = fp.read()
|
63
|
+
decoded = content.decode(encoding, "ignore")
|
64
|
+
output = StringIO()
|
65
|
+
output.write(decoded)
|
66
|
+
output.seek(0)
|
67
|
+
return [output, encoding]
|
68
|
+
else:
|
69
|
+
enc = result["encoding"]
|
70
|
+
except UnicodeDecodeError:
|
71
|
+
try:
|
72
|
+
raw = open(filename, "r+", encoding=encoding).read(bt)
|
73
|
+
result = raw.encode("utf-8")
|
74
|
+
output = StringIO()
|
75
|
+
output.write(result.decode("utf-8"))
|
76
|
+
output.seek(0)
|
77
|
+
return [output, encoding]
|
78
|
+
except UnicodeEncodeError:
|
79
|
+
return [None, "iso-8859-1"]
|
80
|
+
except Exception as exc:
|
81
|
+
logging.warning(f"Unable to determine enconding of file: {exc}")
|
82
|
+
return [None, DEFAULT_ENCODING]
|
83
|
+
return [None, enc]
|
84
|
+
|
85
|
+
|
86
|
+
# Reference https://goo.gl/KaOBG3
|
87
|
+
class ExcelHandler(ContentHandler):
|
88
|
+
def __init__(self):
|
89
|
+
self.chars = []
|
90
|
+
self.cells = []
|
91
|
+
self.rows = []
|
92
|
+
self.tables = []
|
93
|
+
super(ExcelHandler, self).__init__()
|
94
|
+
|
95
|
+
def characters(self, content):
|
96
|
+
self.chars.append(content)
|
97
|
+
|
98
|
+
def startElement(self, name, attrs):
|
99
|
+
if name == "Cell":
|
100
|
+
self.chars = []
|
101
|
+
elif name == "Row":
|
102
|
+
self.cells = []
|
103
|
+
elif name == "Table":
|
104
|
+
self.rows = []
|
105
|
+
|
106
|
+
def endElement(self, name):
|
107
|
+
if name == "Cell":
|
108
|
+
self.cells.append("".join(self.chars))
|
109
|
+
elif name == "Row":
|
110
|
+
self.rows.append(self.cells)
|
111
|
+
elif name == "Table":
|
112
|
+
self.tables.append(self.rows)
|
113
|
+
|
114
|
+
|
115
|
+
class OpenWithBase(FlowComponent):
|
116
|
+
"""
|
117
|
+
OpenWithBase
|
118
|
+
|
119
|
+
|
120
|
+
Overview
|
121
|
+
|
122
|
+
Abstract Component for Opening Files into DataFrames.
|
123
|
+
Supports various file types such as CSV, Excel, and XML.
|
124
|
+
|
125
|
+
.. table:: Properties
|
126
|
+
:widths: auto
|
127
|
+
|
128
|
+
|
129
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
130
|
+
| Name | Required | Summary |
|
131
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
132
|
+
| directory | No | The directory where the files are located. |
|
133
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
134
|
+
| filename | No | The name of the file to be opened. Supports glob patterns. |
|
135
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
136
|
+
| file | No | A dictionary containing the file patterns to be used. |
|
137
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
138
|
+
| mime | No | The MIME type of the file. Default is "text/csv". |
|
139
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
140
|
+
| separator | No | The delimiter to be used in CSV files. Default is ",". |
|
141
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
142
|
+
| encoding | No | The encoding of the file. |
|
143
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
144
|
+
| datatypes | No | Specifies the datatypes to be used for columns. |
|
145
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
146
|
+
| parse_dates | No | Specifies columns to be parsed as dates. |
|
147
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
148
|
+
| filter_nan | No | If True, filters out NaN values. Default is True. |
|
149
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
150
|
+
| na_values | No | List of strings to recognize as NaN. Default is ["NULL", "TBD"]. |
|
151
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
152
|
+
| clean_nat | No | If True, cleans Not-A-Time (NaT) values. |
|
153
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
154
|
+
| no_multi | No | If True, disables multi-threading. |
|
155
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
156
|
+
| flavor | No | Specifies the database flavor to be used for column information. |
|
157
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
158
|
+
| force_map | No | If True, forces the use of a mapping file. |
|
159
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
160
|
+
| skipcols | No | List of columns to be skipped. |
|
161
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
162
|
+
| pd_args | No | Additional arguments to be passed to pandas read functions. |
|
163
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
164
|
+
|
165
|
+
Returns
|
166
|
+
|
167
|
+
This component opens files and prepares them for further processing. The actual return type depends on the concrete
|
168
|
+
implementation, but typically it returns a list of filenames or file data.
|
169
|
+
""" # noqa
|
170
|
+
|
171
|
+
def __init__(
|
172
|
+
self,
|
173
|
+
loop: asyncio.AbstractEventLoop = None,
|
174
|
+
job: Callable = None,
|
175
|
+
stat: Callable = None,
|
176
|
+
**kwargs,
|
177
|
+
):
|
178
|
+
"""Init Method."""
|
179
|
+
# self.directory: str = None
|
180
|
+
self._filenames: list[PurePath] = []
|
181
|
+
self.filename: PurePath = kwargs.get('filename', None)
|
182
|
+
self.directory: PurePath = None
|
183
|
+
self._path: str = None
|
184
|
+
self.mime: str = "text/csv" # Default Mime type
|
185
|
+
self.separator: str = ","
|
186
|
+
self._colinfo = None
|
187
|
+
self._data = None
|
188
|
+
self.force_map: bool = False
|
189
|
+
self.parse_dates = {}
|
190
|
+
self.filter_nan: bool = True
|
191
|
+
self.na_values: list = ["NULL", "TBD"]
|
192
|
+
self.remove_empty_strings: bool = True
|
193
|
+
self.no_multi: bool = False
|
194
|
+
self.sheet_name: str = None
|
195
|
+
self.clean_nat = kwargs.pop(
|
196
|
+
"clean_nat", False
|
197
|
+
)
|
198
|
+
self._limit = kwargs.pop('limit', None)
|
199
|
+
self._flavor: str = kwargs.pop('flavor', 'postgres')
|
200
|
+
super(OpenWithBase, self).__init__(loop=loop, job=job, stat=stat, **kwargs)
|
201
|
+
if hasattr(self, "pd_args"):
|
202
|
+
self.args = getattr(self, "pd_args", {})
|
203
|
+
else:
|
204
|
+
self.args = {}
|
205
|
+
|
206
|
+
async def close(self) -> None:
|
207
|
+
pass
|
208
|
+
|
209
|
+
@abstractmethod
|
210
|
+
def set_datatypes(self):
|
211
|
+
pass
|
212
|
+
|
213
|
+
async def column_info(
|
214
|
+
self, table: str, schema: str = "public", flavor: str = "postgres"
|
215
|
+
) -> list:
|
216
|
+
if not self.force_map:
|
217
|
+
result = None
|
218
|
+
if flavor == "postgres":
|
219
|
+
tablename = f"{schema}.{table}"
|
220
|
+
discover = f"""SELECT attname AS column_name,
|
221
|
+
atttypid::regtype AS data_type, attnotnull::boolean as notnull
|
222
|
+
FROM pg_attribute WHERE attrelid = '{tablename}'::regclass
|
223
|
+
AND attnum > 0 AND NOT attisdropped ORDER BY attnum;
|
224
|
+
"""
|
225
|
+
try:
|
226
|
+
try:
|
227
|
+
event_loop = asyncio.get_running_loop()
|
228
|
+
except RuntimeError:
|
229
|
+
event_loop = asyncio.get_event_loop()
|
230
|
+
db = AsyncDB("pg", dsn=asyncpg_url, loop=event_loop)
|
231
|
+
async with await db.connection() as conn:
|
232
|
+
result, error = await conn.query(discover)
|
233
|
+
if error:
|
234
|
+
raise ComponentError(f"Column Info Error {error}")
|
235
|
+
except NoDataFound:
|
236
|
+
pass
|
237
|
+
finally:
|
238
|
+
db = None
|
239
|
+
else:
|
240
|
+
raise ValueError(f"Column Info: Flavor not supported yet: {flavor}")
|
241
|
+
if result:
|
242
|
+
return {item["column_name"]: item["data_type"] for item in result}
|
243
|
+
model = await open_model(table, schema)
|
244
|
+
if model:
|
245
|
+
fields = model["fields"]
|
246
|
+
return {field: fields[field]["data_type"] for field in fields}
|
247
|
+
else:
|
248
|
+
if self.force_map:
|
249
|
+
self._logger.debug(
|
250
|
+
f"Open Map: Forcing using of Map File {schema}.{table}"
|
251
|
+
)
|
252
|
+
else:
|
253
|
+
self._logger.error(f"Open Map: Table {schema}.{table} doesn't exist")
|
254
|
+
return None
|
255
|
+
|
256
|
+
def previous_component(self):
|
257
|
+
# TODO: check if input is a list of paths (purepath?)
|
258
|
+
self._filenames = []
|
259
|
+
if isinstance(self.input, (PosixPath, PurePath)):
|
260
|
+
self.filename = self.input
|
261
|
+
self._filenames.append(self.input)
|
262
|
+
elif isinstance(self.input, list):
|
263
|
+
self._data = []
|
264
|
+
for filename in self.input:
|
265
|
+
# check if is a MailMessage Object
|
266
|
+
if isinstance(filename, MailMessage):
|
267
|
+
for f in filename.attachments:
|
268
|
+
fname = f["filename"]
|
269
|
+
logging.debug(f"File: Detected Attachment: {fname}")
|
270
|
+
self._filenames.append(fname)
|
271
|
+
elif isinstance(filename, (PosixPath, PurePath)):
|
272
|
+
self.filename = filename
|
273
|
+
self._filenames.append(filename)
|
274
|
+
elif isinstance(filename, str):
|
275
|
+
fname = self.mask_replacement(filename)
|
276
|
+
if "*" in fname:
|
277
|
+
listing = list(self.directory.glob(fname))
|
278
|
+
for fname in listing:
|
279
|
+
logging.debug(f"Filename > {fname}")
|
280
|
+
self._filenames.append(fname)
|
281
|
+
self._filenames.append(PosixPath(fname))
|
282
|
+
elif isinstance(filename, (bytes, bytearray)):
|
283
|
+
self._data.append(filename)
|
284
|
+
else:
|
285
|
+
raise ValueError(
|
286
|
+
f"OpenWithBase: Unknown Input type {type(filename)}"
|
287
|
+
)
|
288
|
+
elif isinstance(self.input, dict):
|
289
|
+
if "files" in self.input:
|
290
|
+
for filename in self.input["files"]:
|
291
|
+
if filename.suffix.lower() not in supported_extensions:
|
292
|
+
continue
|
293
|
+
if filename.exists():
|
294
|
+
self._filenames.append(filename)
|
295
|
+
else:
|
296
|
+
filenames = list(self.input.keys())
|
297
|
+
for filename in filenames:
|
298
|
+
if not isinstance(filename, PurePath):
|
299
|
+
filename = PosixPath(filename)
|
300
|
+
if filename.suffix.lower() not in supported_extensions:
|
301
|
+
continue
|
302
|
+
fname = self.mask_replacement(filename)
|
303
|
+
self._filenames.append(fname)
|
304
|
+
elif isinstance(self.input, pandas.DataFrame):
|
305
|
+
# if is a dataframe, we don't have filenames information
|
306
|
+
self.data = self.input
|
307
|
+
self._data = None
|
308
|
+
self._filenames = []
|
309
|
+
else:
|
310
|
+
if hasattr(self, "FileIterator"):
|
311
|
+
self._data = self.chunkData
|
312
|
+
elif isinstance(self.input, (bytes, bytearray)):
|
313
|
+
self.filename = None
|
314
|
+
self._data = self.input
|
315
|
+
else:
|
316
|
+
self.filename = self.input
|
317
|
+
self._filenames = self._filenames.append(self.input)
|
318
|
+
|
319
|
+
async def start(self, **kwargs):
|
320
|
+
if self.previous and not check_empty(self.input):
|
321
|
+
self.previous_component()
|
322
|
+
if "iterate" in self._params and self._params["iterate"] is True:
|
323
|
+
## re-evaluate previous data:
|
324
|
+
self.previous_component()
|
325
|
+
if self.directory:
|
326
|
+
p = Path(self.directory)
|
327
|
+
if p.exists() and p.is_dir():
|
328
|
+
self.directory = p
|
329
|
+
else:
|
330
|
+
logging.error(f"Path doesn't exists: {self.directory}")
|
331
|
+
raise FileNotFound(f"Path doesn't exists: {self.directory}")
|
332
|
+
else:
|
333
|
+
# TODO: using FileStorage
|
334
|
+
self.directory = Path(TASK_PATH, self._program, "files")
|
335
|
+
if not self._filenames:
|
336
|
+
if self.filename:
|
337
|
+
if isinstance(self.filename, list):
|
338
|
+
for file in self.filename:
|
339
|
+
self._filenames.append(self.directory.joinpath(file))
|
340
|
+
elif isinstance(self.filename, str):
|
341
|
+
self.filename = self.mask_replacement(self.filename)
|
342
|
+
if "*" in self.filename:
|
343
|
+
# is a glob list of files
|
344
|
+
listing = list(self.directory.glob(self.filename))
|
345
|
+
for fname in listing:
|
346
|
+
logging.debug(f"Filename > {fname}")
|
347
|
+
self._filenames.append(fname)
|
348
|
+
else:
|
349
|
+
self._path = self.directory.joinpath(self.filename)
|
350
|
+
self._filenames.append(self._path)
|
351
|
+
elif hasattr(self, "file"):
|
352
|
+
filename = self.process_pattern("file")
|
353
|
+
if hasattr(self, "masks"):
|
354
|
+
filename = self.mask_replacement(filename)
|
355
|
+
# path for file
|
356
|
+
listing = list(self.directory.glob(filename))
|
357
|
+
for fname in listing:
|
358
|
+
logging.debug(f"Filename > {fname}")
|
359
|
+
self._filenames.append(fname)
|
360
|
+
if not self._filenames:
|
361
|
+
raise FileNotFound("OpenWithPandas: File is empty or doesn't exists")
|
362
|
+
# definition of data types:
|
363
|
+
if hasattr(self, "datatypes"):
|
364
|
+
# need to build a definition of datatypes
|
365
|
+
self.set_datatypes()
|
366
|
+
# check if data is not none:
|
367
|
+
if self._filenames is None and self._data is not None:
|
368
|
+
self._filenames = []
|
369
|
+
if not isinstance(self._data, list):
|
370
|
+
# convert into a list:
|
371
|
+
self._data = [self._data]
|
372
|
+
else:
|
373
|
+
m = magic.Magic(mime=True)
|
374
|
+
for file in self._filenames:
|
375
|
+
if isinstance(file, str):
|
376
|
+
file = PosixPath(file)
|
377
|
+
if file.exists() and file.is_file():
|
378
|
+
if not self.mime:
|
379
|
+
# detecting the MIME type
|
380
|
+
try:
|
381
|
+
self.mime = m.from_file(str(file))
|
382
|
+
self._logger.debug(f":: Detected MIME IS: {self.mime}")
|
383
|
+
except Exception as err:
|
384
|
+
logging.error(err)
|
385
|
+
self.mime = mimetypes.guess_type(file)[0]
|
386
|
+
if not self.mime:
|
387
|
+
ext = file.suffix
|
388
|
+
if ext == ".xlsx" or ext == ".xls":
|
389
|
+
self.mime = "application/vnd.ms-excel"
|
390
|
+
elif ext == ".csv" or ext == ".txt":
|
391
|
+
self.mime = "text/csv"
|
392
|
+
else:
|
393
|
+
self.mime = "text/plain"
|
394
|
+
else:
|
395
|
+
raise FileNotFound(f"{__name__}: File doesn't Exists: {file}")
|
396
|
+
return True
|
397
|
+
|
398
|
+
async def colinfo(self):
|
399
|
+
add_columns = {}
|
400
|
+
if hasattr(self, "model"):
|
401
|
+
raise NotImplementedError("Using Models is not implemented yet.")
|
402
|
+
elif hasattr(self, "map"):
|
403
|
+
try:
|
404
|
+
replace = self.map["replace"]
|
405
|
+
except KeyError:
|
406
|
+
replace = False
|
407
|
+
try:
|
408
|
+
self.force_map = self.map["use_map"]
|
409
|
+
except KeyError:
|
410
|
+
self.force_map = False
|
411
|
+
## schema name:
|
412
|
+
try:
|
413
|
+
schema = self.map["schema"]
|
414
|
+
except KeyError:
|
415
|
+
schema = self._program
|
416
|
+
## first: check if Table exists:
|
417
|
+
try:
|
418
|
+
tablename = self.map["tablename"]
|
419
|
+
colinfo = await self.column_info(
|
420
|
+
table=tablename, schema=schema, flavor=self._flavor
|
421
|
+
)
|
422
|
+
except KeyError:
|
423
|
+
mapping = self.map["map"]
|
424
|
+
model = await open_model(mapping, schema)
|
425
|
+
fields = model["fields"]
|
426
|
+
colinfo = {field: fields[field]["data_type"] for field in fields}
|
427
|
+
if not colinfo:
|
428
|
+
# last effort:
|
429
|
+
colinfo = await open_map(mapping, schema)
|
430
|
+
if colinfo is not None:
|
431
|
+
try:
|
432
|
+
ignore = self.map["ignore"]
|
433
|
+
colinfo = {k: v for k, v in colinfo.items() if k not in ignore}
|
434
|
+
except KeyError:
|
435
|
+
pass
|
436
|
+
# skipcols
|
437
|
+
if "skipcols" in self.map:
|
438
|
+
# need to remove some columns
|
439
|
+
if "num_cols" in self.map:
|
440
|
+
cols = self.map["num_cols"]
|
441
|
+
else:
|
442
|
+
cols = len(colinfo.keys())
|
443
|
+
colrange = range(cols + 1)
|
444
|
+
remcols = self.map["skipcols"]
|
445
|
+
self.args["usecols"] = list(set(colrange) - set(remcols))
|
446
|
+
# change the functionality to use the columns and not first row
|
447
|
+
self._colinfo = colinfo
|
448
|
+
if replace:
|
449
|
+
if (
|
450
|
+
hasattr(self, "pd_args")
|
451
|
+
and isinstance(self.pd_args, dict)
|
452
|
+
and "skiprows" in self.pd_args
|
453
|
+
):
|
454
|
+
self.pd_args["skiprows"].append(
|
455
|
+
self.pd_args["skiprows"][-1] + 1
|
456
|
+
)
|
457
|
+
self.args["skiprows"] = self.pd_args["skiprows"]
|
458
|
+
else:
|
459
|
+
self.args["skiprows"] = [0]
|
460
|
+
replace_columns = {"header": None, "names": list(colinfo.keys())}
|
461
|
+
add_columns = {**add_columns, **replace_columns}
|
462
|
+
# parse dates and dataTypes
|
463
|
+
dates = []
|
464
|
+
dtypes = {}
|
465
|
+
try:
|
466
|
+
mapped_types = self.args["dtype"]
|
467
|
+
except KeyError:
|
468
|
+
mapped_types = {}
|
469
|
+
coliter = colinfo.copy()
|
470
|
+
for column, dtype in coliter.items():
|
471
|
+
# print('COL ', column, dtype, mapped_types)
|
472
|
+
if column in mapped_types:
|
473
|
+
## is already overrided by datetypes:
|
474
|
+
colinfo[column] = mapped_types[column]
|
475
|
+
dtypes[column] = mapped_types[column]
|
476
|
+
continue
|
477
|
+
if dtype in (
|
478
|
+
"timestamp without time zone",
|
479
|
+
"timestamp with time zone",
|
480
|
+
"date",
|
481
|
+
):
|
482
|
+
dates.append(column)
|
483
|
+
elif dtype in (
|
484
|
+
"time",
|
485
|
+
"time with time zone",
|
486
|
+
"time without time zone",
|
487
|
+
):
|
488
|
+
dates.append(column)
|
489
|
+
# dtypes[column] = 'datetime64[ns]'
|
490
|
+
elif dtype in ("varchar", "character varying", "str"):
|
491
|
+
dtypes[column] = "str"
|
492
|
+
elif dtype == "character" or dtype == "text":
|
493
|
+
dtypes[column] = "object"
|
494
|
+
elif (
|
495
|
+
dtype == "integer"
|
496
|
+
or dtype == "smallint"
|
497
|
+
or dtype == "bigint"
|
498
|
+
):
|
499
|
+
dtypes[column] = "Int64"
|
500
|
+
elif dtype == "float" or dtype == "double precision":
|
501
|
+
dtypes[column] = float
|
502
|
+
else:
|
503
|
+
dtypes[column] = "object"
|
504
|
+
if self.mime in excel_based:
|
505
|
+
# can safely convert to integer
|
506
|
+
if dtype == "numeric" or dtype == "float":
|
507
|
+
dtypes[column] = float
|
508
|
+
elif dtype == "real":
|
509
|
+
dtypes[column] = float
|
510
|
+
elif dtype == "double precision":
|
511
|
+
dtypes[column] = float
|
512
|
+
elif dtype == "integer":
|
513
|
+
try:
|
514
|
+
dtypes[column] = "Int32"
|
515
|
+
except Exception:
|
516
|
+
dtypes[column] = "Int64"
|
517
|
+
elif dtype == "bigint":
|
518
|
+
dtypes[column] = "Int64"
|
519
|
+
else:
|
520
|
+
dtypes[column] = "object"
|
521
|
+
if dates:
|
522
|
+
self.parse_dates["parse_dates"] = dates
|
523
|
+
if dtypes:
|
524
|
+
self.args["dtype"] = dtypes
|
525
|
+
elif not replace and hasattr(self, "no_header") and self.no_header:
|
526
|
+
if self._data is not None:
|
527
|
+
replace_columns = {"columns": list(colinfo.keys())}
|
528
|
+
else:
|
529
|
+
replace_columns = {
|
530
|
+
"header": None,
|
531
|
+
"names": list(colinfo.keys()),
|
532
|
+
}
|
533
|
+
add_columns = {**add_columns, **replace_columns}
|
534
|
+
else:
|
535
|
+
raise ComponentError("Failed to Load Column Information")
|
536
|
+
elif hasattr(self, "add_columns"):
|
537
|
+
try:
|
538
|
+
if self._data is not None:
|
539
|
+
add_columns = {"columns": self.add_columns}
|
540
|
+
else:
|
541
|
+
if isinstance(self.add_columns, list):
|
542
|
+
add_columns = {"names": list(self.add_columns)}
|
543
|
+
if hasattr(self, "replace_columns"):
|
544
|
+
add_columns["header"] = None
|
545
|
+
add_columns["skiprows"] = [0]
|
546
|
+
except AttributeError:
|
547
|
+
pass
|
548
|
+
elif hasattr(self, "skipcols"):
|
549
|
+
## Skip Columns from Raw Table.
|
550
|
+
skipcols = self.skipcols
|
551
|
+
if isinstance(skipcols[0], str):
|
552
|
+
### directly the name of a Column:
|
553
|
+
add_columns = {"usecols": skipcols}
|
554
|
+
elif isinstance(skipcols[0], int):
|
555
|
+
### Discover the number of Columns:
|
556
|
+
headers = self.get_column_headers()
|
557
|
+
### because all files need to be equal, using first one:
|
558
|
+
try:
|
559
|
+
columns = headers[0]
|
560
|
+
colrange = range(len(columns))
|
561
|
+
usecols = list(set(colrange) - set(skipcols))
|
562
|
+
add_columns = {"usecols": usecols}
|
563
|
+
except (IndexError, ValueError, KeyError):
|
564
|
+
pass
|
565
|
+
return add_columns
|
566
|
+
|
567
|
+
async def run(self) -> Any:
|
568
|
+
print(f"Opening File(s): {self._filenames!r}")
|
569
|
+
|
570
|
+
def check_encoding(self, filename):
|
571
|
+
encoding = "utf-8" # default encoding
|
572
|
+
try:
|
573
|
+
if hasattr(self, "encoding"):
|
574
|
+
encoding = self.encoding
|
575
|
+
else:
|
576
|
+
count = 0
|
577
|
+
# migrate to aiofile
|
578
|
+
bt = min(32, os.path.getsize(filename))
|
579
|
+
with open(filename, "rb") as f:
|
580
|
+
line = f.read(bt)
|
581
|
+
# with open(filename, 'rb') as f:
|
582
|
+
# line = f.readline()
|
583
|
+
while line and count < 20:
|
584
|
+
curChar = chardet.detect(line)
|
585
|
+
if curChar != chardet.detect(line):
|
586
|
+
result_charset = chardet.detect(line)
|
587
|
+
else:
|
588
|
+
result_charset = curChar
|
589
|
+
count = count + 1
|
590
|
+
self._logger.debug(
|
591
|
+
f"Detected Charset in file {filename} > {result_charset!s}"
|
592
|
+
)
|
593
|
+
if result_charset["confidence"] < 0.8:
|
594
|
+
# failed confidence, need to use:
|
595
|
+
if result_charset["encoding"] in ("ascii", "ASCII"):
|
596
|
+
encoding = "ISO-8859–1"
|
597
|
+
else:
|
598
|
+
encoding = "utf-8"
|
599
|
+
else:
|
600
|
+
encoding = result_charset["encoding"]
|
601
|
+
if result_charset["encoding"] in ("ascii", "ASCII"):
|
602
|
+
encoding = "ISO-8859–1"
|
603
|
+
else:
|
604
|
+
encoding = result_charset["encoding"]
|
605
|
+
except Exception:
|
606
|
+
encoding = "utf-8"
|
607
|
+
# returns default encoding
|
608
|
+
return encoding
|
609
|
+
|
610
|
+
@abstractmethod
|
611
|
+
async def open_csv(self, filename: str, add_columns: dict, encoding) -> Any:
|
612
|
+
pass
|
613
|
+
|
614
|
+
@abstractmethod
|
615
|
+
async def open_excel(self, filename: str, add_columns: dict, encoding) -> Any:
|
616
|
+
pass
|