flowtask 5.8.4__cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowtask/__init__.py +93 -0
- flowtask/__main__.py +38 -0
- flowtask/bots/__init__.py +6 -0
- flowtask/bots/check.py +93 -0
- flowtask/bots/codebot.py +51 -0
- flowtask/components/ASPX.py +148 -0
- flowtask/components/AddDataset.py +352 -0
- flowtask/components/Amazon.py +523 -0
- flowtask/components/AutoTask.py +314 -0
- flowtask/components/Azure.py +80 -0
- flowtask/components/AzureUsers.py +106 -0
- flowtask/components/BaseAction.py +91 -0
- flowtask/components/BaseLoop.py +198 -0
- flowtask/components/BestBuy.py +800 -0
- flowtask/components/CSVToGCS.py +120 -0
- flowtask/components/CompanyScraper/__init__.py +1 -0
- flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
- flowtask/components/CompanyScraper/parsers/base.py +102 -0
- flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
- flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
- flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
- flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
- flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
- flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
- flowtask/components/CompanyScraper/scrapper.py +1054 -0
- flowtask/components/CopyTo.py +177 -0
- flowtask/components/CopyToBigQuery.py +243 -0
- flowtask/components/CopyToMongoDB.py +291 -0
- flowtask/components/CopyToPg.py +609 -0
- flowtask/components/CopyToRethink.py +207 -0
- flowtask/components/CreateGCSBucket.py +102 -0
- flowtask/components/CreateReport/CreateReport.py +228 -0
- flowtask/components/CreateReport/__init__.py +9 -0
- flowtask/components/CreateReport/charts/__init__.py +15 -0
- flowtask/components/CreateReport/charts/bar.py +51 -0
- flowtask/components/CreateReport/charts/base.py +66 -0
- flowtask/components/CreateReport/charts/pie.py +64 -0
- flowtask/components/CreateReport/utils.py +9 -0
- flowtask/components/CustomerSatisfaction.py +196 -0
- flowtask/components/DataInput.py +200 -0
- flowtask/components/DateList.py +255 -0
- flowtask/components/DbClient.py +163 -0
- flowtask/components/DialPad.py +146 -0
- flowtask/components/DocumentDBQuery.py +200 -0
- flowtask/components/DownloadFrom.py +371 -0
- flowtask/components/DownloadFromD2L.py +113 -0
- flowtask/components/DownloadFromFTP.py +181 -0
- flowtask/components/DownloadFromIMAP.py +315 -0
- flowtask/components/DownloadFromS3.py +198 -0
- flowtask/components/DownloadFromSFTP.py +265 -0
- flowtask/components/DownloadFromSharepoint.py +110 -0
- flowtask/components/DownloadFromSmartSheet.py +114 -0
- flowtask/components/DownloadS3File.py +229 -0
- flowtask/components/Dummy.py +59 -0
- flowtask/components/DuplicatePhoto.py +411 -0
- flowtask/components/EmployeeEvaluation.py +237 -0
- flowtask/components/ExecuteSQL.py +323 -0
- flowtask/components/ExtractHTML.py +178 -0
- flowtask/components/FileBase.py +178 -0
- flowtask/components/FileCopy.py +181 -0
- flowtask/components/FileDelete.py +82 -0
- flowtask/components/FileExists.py +146 -0
- flowtask/components/FileIteratorDelete.py +112 -0
- flowtask/components/FileList.py +194 -0
- flowtask/components/FileOpen.py +75 -0
- flowtask/components/FileRead.py +120 -0
- flowtask/components/FileRename.py +106 -0
- flowtask/components/FilterIf.py +284 -0
- flowtask/components/FilterRows/FilterRows.py +200 -0
- flowtask/components/FilterRows/__init__.py +10 -0
- flowtask/components/FilterRows/functions.py +4 -0
- flowtask/components/GCSToBigQuery.py +103 -0
- flowtask/components/GoogleA4.py +150 -0
- flowtask/components/GoogleGeoCoding.py +344 -0
- flowtask/components/GooglePlaces.py +315 -0
- flowtask/components/GoogleSearch.py +539 -0
- flowtask/components/HTTPClient.py +268 -0
- flowtask/components/ICIMS.py +146 -0
- flowtask/components/IF.py +179 -0
- flowtask/components/IcimsFolderCopy.py +173 -0
- flowtask/components/ImageFeatures/__init__.py +5 -0
- flowtask/components/ImageFeatures/process.py +233 -0
- flowtask/components/IteratorBase.py +251 -0
- flowtask/components/LangchainLoader/__init__.py +5 -0
- flowtask/components/LangchainLoader/loader.py +194 -0
- flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
- flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
- flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
- flowtask/components/LangchainLoader/loaders/docx.py +91 -0
- flowtask/components/LangchainLoader/loaders/html.py +119 -0
- flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
- flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
- flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
- flowtask/components/LangchainLoader/loaders/qa.py +67 -0
- flowtask/components/LangchainLoader/loaders/txt.py +55 -0
- flowtask/components/LeadIQ.py +650 -0
- flowtask/components/Loop.py +253 -0
- flowtask/components/Lowes.py +334 -0
- flowtask/components/MS365Usage.py +156 -0
- flowtask/components/MSTeamsMessages.py +320 -0
- flowtask/components/MarketClustering.py +1051 -0
- flowtask/components/MergeFiles.py +362 -0
- flowtask/components/MilvusOutput.py +87 -0
- flowtask/components/NearByStores.py +175 -0
- flowtask/components/NetworkNinja/__init__.py +6 -0
- flowtask/components/NetworkNinja/models/__init__.py +52 -0
- flowtask/components/NetworkNinja/models/abstract.py +177 -0
- flowtask/components/NetworkNinja/models/account.py +39 -0
- flowtask/components/NetworkNinja/models/client.py +19 -0
- flowtask/components/NetworkNinja/models/district.py +14 -0
- flowtask/components/NetworkNinja/models/events.py +101 -0
- flowtask/components/NetworkNinja/models/forms.py +499 -0
- flowtask/components/NetworkNinja/models/market.py +16 -0
- flowtask/components/NetworkNinja/models/organization.py +34 -0
- flowtask/components/NetworkNinja/models/photos.py +125 -0
- flowtask/components/NetworkNinja/models/project.py +44 -0
- flowtask/components/NetworkNinja/models/region.py +28 -0
- flowtask/components/NetworkNinja/models/store.py +203 -0
- flowtask/components/NetworkNinja/models/user.py +151 -0
- flowtask/components/NetworkNinja/router.py +854 -0
- flowtask/components/Odoo.py +175 -0
- flowtask/components/OdooInjector.py +192 -0
- flowtask/components/OpenFromXML.py +126 -0
- flowtask/components/OpenWeather.py +41 -0
- flowtask/components/OpenWithBase.py +616 -0
- flowtask/components/OpenWithPandas.py +715 -0
- flowtask/components/PGPDecrypt.py +199 -0
- flowtask/components/PandasIterator.py +187 -0
- flowtask/components/PandasToFile.py +189 -0
- flowtask/components/Paradox.py +339 -0
- flowtask/components/ParamIterator.py +117 -0
- flowtask/components/ParseHTML.py +84 -0
- flowtask/components/PlacerStores.py +249 -0
- flowtask/components/Pokemon.py +507 -0
- flowtask/components/PositiveBot.py +62 -0
- flowtask/components/PowerPointSlide.py +400 -0
- flowtask/components/PrintMessage.py +127 -0
- flowtask/components/ProductCompetitors/__init__.py +5 -0
- flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
- flowtask/components/ProductCompetitors/parsers/base.py +72 -0
- flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
- flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
- flowtask/components/ProductCompetitors/scrapper.py +155 -0
- flowtask/components/ProductCompliant.py +169 -0
- flowtask/components/ProductInfo/__init__.py +1 -0
- flowtask/components/ProductInfo/parsers/__init__.py +5 -0
- flowtask/components/ProductInfo/parsers/base.py +83 -0
- flowtask/components/ProductInfo/parsers/brother.py +97 -0
- flowtask/components/ProductInfo/parsers/canon.py +167 -0
- flowtask/components/ProductInfo/parsers/epson.py +118 -0
- flowtask/components/ProductInfo/parsers/hp.py +131 -0
- flowtask/components/ProductInfo/parsers/samsung.py +97 -0
- flowtask/components/ProductInfo/scraper.py +319 -0
- flowtask/components/ProductPricing.py +118 -0
- flowtask/components/QS.py +261 -0
- flowtask/components/QSBase.py +201 -0
- flowtask/components/QueryIterator.py +273 -0
- flowtask/components/QueryToInsert.py +327 -0
- flowtask/components/QueryToPandas.py +432 -0
- flowtask/components/RESTClient.py +195 -0
- flowtask/components/RethinkDBQuery.py +189 -0
- flowtask/components/Rsync.py +74 -0
- flowtask/components/RunSSH.py +59 -0
- flowtask/components/RunShell.py +71 -0
- flowtask/components/SalesForce.py +20 -0
- flowtask/components/SaveImageBank/__init__.py +257 -0
- flowtask/components/SchedulingVisits.py +592 -0
- flowtask/components/ScrapPage.py +216 -0
- flowtask/components/ScrapSearch.py +79 -0
- flowtask/components/SendNotify.py +257 -0
- flowtask/components/SentimentAnalysis.py +694 -0
- flowtask/components/ServiceScrapper/__init__.py +5 -0
- flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
- flowtask/components/ServiceScrapper/parsers/base.py +94 -0
- flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
- flowtask/components/ServiceScrapper/scrapper.py +199 -0
- flowtask/components/SetVariables.py +156 -0
- flowtask/components/SubTask.py +182 -0
- flowtask/components/SuiteCRM.py +48 -0
- flowtask/components/Switch.py +175 -0
- flowtask/components/TableBase.py +148 -0
- flowtask/components/TableDelete.py +312 -0
- flowtask/components/TableInput.py +143 -0
- flowtask/components/TableOutput/TableOutput.py +384 -0
- flowtask/components/TableOutput/__init__.py +3 -0
- flowtask/components/TableSchema.py +534 -0
- flowtask/components/Target.py +223 -0
- flowtask/components/ThumbnailGenerator.py +156 -0
- flowtask/components/ToPandas.py +67 -0
- flowtask/components/TransformRows/TransformRows.py +507 -0
- flowtask/components/TransformRows/__init__.py +9 -0
- flowtask/components/TransformRows/functions.py +559 -0
- flowtask/components/TransposeRows.py +176 -0
- flowtask/components/UPCDatabase.py +86 -0
- flowtask/components/UnGzip.py +171 -0
- flowtask/components/Uncompress.py +172 -0
- flowtask/components/UniqueRows.py +126 -0
- flowtask/components/Unzip.py +107 -0
- flowtask/components/UpdateOperationalVars.py +147 -0
- flowtask/components/UploadTo.py +299 -0
- flowtask/components/UploadToS3.py +136 -0
- flowtask/components/UploadToSFTP.py +160 -0
- flowtask/components/UploadToSharepoint.py +205 -0
- flowtask/components/UserFunc.py +122 -0
- flowtask/components/VivaTracker.py +140 -0
- flowtask/components/WSDLClient.py +123 -0
- flowtask/components/Wait.py +18 -0
- flowtask/components/Walmart.py +199 -0
- flowtask/components/Workplace.py +134 -0
- flowtask/components/XMLToPandas.py +267 -0
- flowtask/components/Zammad/__init__.py +41 -0
- flowtask/components/Zammad/models.py +0 -0
- flowtask/components/ZoomInfoScraper.py +409 -0
- flowtask/components/__init__.py +104 -0
- flowtask/components/abstract.py +18 -0
- flowtask/components/flow.py +530 -0
- flowtask/components/google.py +335 -0
- flowtask/components/group.py +221 -0
- flowtask/components/py.typed +0 -0
- flowtask/components/reviewscrap.py +132 -0
- flowtask/components/tAutoincrement.py +117 -0
- flowtask/components/tConcat.py +109 -0
- flowtask/components/tExplode.py +119 -0
- flowtask/components/tFilter.py +184 -0
- flowtask/components/tGroup.py +236 -0
- flowtask/components/tJoin.py +270 -0
- flowtask/components/tMap/__init__.py +9 -0
- flowtask/components/tMap/functions.py +54 -0
- flowtask/components/tMap/tMap.py +450 -0
- flowtask/components/tMelt.py +112 -0
- flowtask/components/tMerge.py +114 -0
- flowtask/components/tOrder.py +93 -0
- flowtask/components/tPandas.py +94 -0
- flowtask/components/tPivot.py +71 -0
- flowtask/components/tPluckCols.py +76 -0
- flowtask/components/tUnnest.py +82 -0
- flowtask/components/user.py +401 -0
- flowtask/conf.py +457 -0
- flowtask/download.py +102 -0
- flowtask/events/__init__.py +11 -0
- flowtask/events/events/__init__.py +20 -0
- flowtask/events/events/abstract.py +95 -0
- flowtask/events/events/alerts/__init__.py +362 -0
- flowtask/events/events/alerts/colfunctions.py +131 -0
- flowtask/events/events/alerts/functions.py +158 -0
- flowtask/events/events/dummy.py +12 -0
- flowtask/events/events/exec.py +124 -0
- flowtask/events/events/file/__init__.py +7 -0
- flowtask/events/events/file/base.py +51 -0
- flowtask/events/events/file/copy.py +23 -0
- flowtask/events/events/file/delete.py +16 -0
- flowtask/events/events/interfaces/__init__.py +9 -0
- flowtask/events/events/interfaces/client.py +67 -0
- flowtask/events/events/interfaces/credentials.py +28 -0
- flowtask/events/events/interfaces/notifications.py +58 -0
- flowtask/events/events/jira.py +122 -0
- flowtask/events/events/log.py +26 -0
- flowtask/events/events/logerr.py +52 -0
- flowtask/events/events/notify.py +59 -0
- flowtask/events/events/notify_event.py +160 -0
- flowtask/events/events/publish.py +54 -0
- flowtask/events/events/sendfile.py +104 -0
- flowtask/events/events/task.py +97 -0
- flowtask/events/events/teams.py +98 -0
- flowtask/events/events/webhook.py +58 -0
- flowtask/events/manager.py +287 -0
- flowtask/exceptions.c +39393 -0
- flowtask/exceptions.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/extensions/__init__.py +3 -0
- flowtask/extensions/abstract.py +82 -0
- flowtask/extensions/logging/__init__.py +65 -0
- flowtask/hooks/__init__.py +9 -0
- flowtask/hooks/actions/__init__.py +22 -0
- flowtask/hooks/actions/abstract.py +66 -0
- flowtask/hooks/actions/dummy.py +23 -0
- flowtask/hooks/actions/jira.py +74 -0
- flowtask/hooks/actions/rest.py +320 -0
- flowtask/hooks/actions/sampledata.py +37 -0
- flowtask/hooks/actions/sensor.py +23 -0
- flowtask/hooks/actions/task.py +9 -0
- flowtask/hooks/actions/ticket.py +37 -0
- flowtask/hooks/actions/zammad.py +55 -0
- flowtask/hooks/hook.py +62 -0
- flowtask/hooks/models.py +17 -0
- flowtask/hooks/service.py +187 -0
- flowtask/hooks/step.py +91 -0
- flowtask/hooks/types/__init__.py +23 -0
- flowtask/hooks/types/base.py +129 -0
- flowtask/hooks/types/brokers/__init__.py +11 -0
- flowtask/hooks/types/brokers/base.py +54 -0
- flowtask/hooks/types/brokers/mqtt.py +35 -0
- flowtask/hooks/types/brokers/rabbitmq.py +82 -0
- flowtask/hooks/types/brokers/redis.py +83 -0
- flowtask/hooks/types/brokers/sqs.py +44 -0
- flowtask/hooks/types/fs.py +232 -0
- flowtask/hooks/types/http.py +49 -0
- flowtask/hooks/types/imap.py +200 -0
- flowtask/hooks/types/jira.py +279 -0
- flowtask/hooks/types/mail.py +205 -0
- flowtask/hooks/types/postgres.py +98 -0
- flowtask/hooks/types/responses/__init__.py +8 -0
- flowtask/hooks/types/responses/base.py +5 -0
- flowtask/hooks/types/sharepoint.py +288 -0
- flowtask/hooks/types/ssh.py +141 -0
- flowtask/hooks/types/tagged.py +59 -0
- flowtask/hooks/types/upload.py +85 -0
- flowtask/hooks/types/watch.py +71 -0
- flowtask/hooks/types/web.py +36 -0
- flowtask/interfaces/AzureClient.py +137 -0
- flowtask/interfaces/AzureGraph.py +839 -0
- flowtask/interfaces/Boto3Client.py +326 -0
- flowtask/interfaces/DropboxClient.py +173 -0
- flowtask/interfaces/ExcelHandler.py +94 -0
- flowtask/interfaces/FTPClient.py +131 -0
- flowtask/interfaces/GoogleCalendar.py +201 -0
- flowtask/interfaces/GoogleClient.py +133 -0
- flowtask/interfaces/GoogleDrive.py +127 -0
- flowtask/interfaces/GoogleGCS.py +89 -0
- flowtask/interfaces/GoogleGeocoding.py +93 -0
- flowtask/interfaces/GoogleLang.py +114 -0
- flowtask/interfaces/GooglePub.py +61 -0
- flowtask/interfaces/GoogleSheet.py +68 -0
- flowtask/interfaces/IMAPClient.py +137 -0
- flowtask/interfaces/O365Calendar.py +113 -0
- flowtask/interfaces/O365Client.py +220 -0
- flowtask/interfaces/OneDrive.py +284 -0
- flowtask/interfaces/Outlook.py +155 -0
- flowtask/interfaces/ParrotBot.py +130 -0
- flowtask/interfaces/SSHClient.py +378 -0
- flowtask/interfaces/Sharepoint.py +496 -0
- flowtask/interfaces/__init__.py +36 -0
- flowtask/interfaces/azureauth.py +119 -0
- flowtask/interfaces/cache.py +201 -0
- flowtask/interfaces/client.py +82 -0
- flowtask/interfaces/compress.py +525 -0
- flowtask/interfaces/credentials.py +124 -0
- flowtask/interfaces/d2l.py +239 -0
- flowtask/interfaces/databases/__init__.py +5 -0
- flowtask/interfaces/databases/db.py +223 -0
- flowtask/interfaces/databases/documentdb.py +55 -0
- flowtask/interfaces/databases/rethink.py +39 -0
- flowtask/interfaces/dataframes/__init__.py +11 -0
- flowtask/interfaces/dataframes/abstract.py +21 -0
- flowtask/interfaces/dataframes/arrow.py +71 -0
- flowtask/interfaces/dataframes/dt.py +69 -0
- flowtask/interfaces/dataframes/pandas.py +167 -0
- flowtask/interfaces/dataframes/polars.py +60 -0
- flowtask/interfaces/db.py +263 -0
- flowtask/interfaces/env.py +46 -0
- flowtask/interfaces/func.py +137 -0
- flowtask/interfaces/http.py +1780 -0
- flowtask/interfaces/locale.py +40 -0
- flowtask/interfaces/log.py +75 -0
- flowtask/interfaces/mask.py +143 -0
- flowtask/interfaces/notification.py +154 -0
- flowtask/interfaces/playwright.py +339 -0
- flowtask/interfaces/powerpoint.py +368 -0
- flowtask/interfaces/py.typed +0 -0
- flowtask/interfaces/qs.py +376 -0
- flowtask/interfaces/result.py +87 -0
- flowtask/interfaces/selenium_service.py +779 -0
- flowtask/interfaces/smartsheet.py +154 -0
- flowtask/interfaces/stat.py +39 -0
- flowtask/interfaces/task.py +96 -0
- flowtask/interfaces/template.py +118 -0
- flowtask/interfaces/vectorstores/__init__.py +1 -0
- flowtask/interfaces/vectorstores/abstract.py +133 -0
- flowtask/interfaces/vectorstores/milvus.py +669 -0
- flowtask/interfaces/zammad.py +107 -0
- flowtask/models.py +193 -0
- flowtask/parsers/__init__.py +15 -0
- flowtask/parsers/_yaml.c +11978 -0
- flowtask/parsers/_yaml.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/argparser.py +235 -0
- flowtask/parsers/base.c +15155 -0
- flowtask/parsers/base.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/json.c +11968 -0
- flowtask/parsers/json.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/maps.py +49 -0
- flowtask/parsers/toml.c +11968 -0
- flowtask/parsers/toml.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/plugins/__init__.py +16 -0
- flowtask/plugins/components/__init__.py +0 -0
- flowtask/plugins/handler/__init__.py +45 -0
- flowtask/plugins/importer.py +31 -0
- flowtask/plugins/sources/__init__.py +0 -0
- flowtask/runner.py +283 -0
- flowtask/scheduler/__init__.py +9 -0
- flowtask/scheduler/functions.py +493 -0
- flowtask/scheduler/handlers/__init__.py +8 -0
- flowtask/scheduler/handlers/manager.py +504 -0
- flowtask/scheduler/handlers/models.py +58 -0
- flowtask/scheduler/handlers/service.py +72 -0
- flowtask/scheduler/notifications.py +65 -0
- flowtask/scheduler/scheduler.py +993 -0
- flowtask/services/__init__.py +0 -0
- flowtask/services/bots/__init__.py +0 -0
- flowtask/services/bots/telegram.py +264 -0
- flowtask/services/files/__init__.py +11 -0
- flowtask/services/files/manager.py +522 -0
- flowtask/services/files/model.py +37 -0
- flowtask/services/files/service.py +767 -0
- flowtask/services/jira/__init__.py +3 -0
- flowtask/services/jira/jira_actions.py +191 -0
- flowtask/services/tasks/__init__.py +13 -0
- flowtask/services/tasks/launcher.py +213 -0
- flowtask/services/tasks/manager.py +323 -0
- flowtask/services/tasks/service.py +275 -0
- flowtask/services/tasks/task_manager.py +376 -0
- flowtask/services/tasks/tasks.py +155 -0
- flowtask/storages/__init__.py +16 -0
- flowtask/storages/exceptions.py +12 -0
- flowtask/storages/files/__init__.py +8 -0
- flowtask/storages/files/abstract.py +29 -0
- flowtask/storages/files/filesystem.py +66 -0
- flowtask/storages/tasks/__init__.py +19 -0
- flowtask/storages/tasks/abstract.py +26 -0
- flowtask/storages/tasks/database.py +33 -0
- flowtask/storages/tasks/filesystem.py +108 -0
- flowtask/storages/tasks/github.py +119 -0
- flowtask/storages/tasks/memory.py +45 -0
- flowtask/storages/tasks/row.py +25 -0
- flowtask/tasks/__init__.py +0 -0
- flowtask/tasks/abstract.py +526 -0
- flowtask/tasks/command.py +118 -0
- flowtask/tasks/pile.py +486 -0
- flowtask/tasks/py.typed +0 -0
- flowtask/tasks/task.py +778 -0
- flowtask/template/__init__.py +161 -0
- flowtask/tests.py +257 -0
- flowtask/types/__init__.py +8 -0
- flowtask/types/typedefs.c +11347 -0
- flowtask/types/typedefs.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/utils/__init__.py +24 -0
- flowtask/utils/constants.py +117 -0
- flowtask/utils/encoders.py +21 -0
- flowtask/utils/executor.py +112 -0
- flowtask/utils/functions.cpp +14280 -0
- flowtask/utils/functions.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/utils/json.cpp +13349 -0
- flowtask/utils/json.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/utils/mail.py +63 -0
- flowtask/utils/parseqs.c +13324 -0
- flowtask/utils/parserqs.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/utils/stats.py +308 -0
- flowtask/utils/transformations.py +74 -0
- flowtask/utils/uv.py +12 -0
- flowtask/utils/validators.py +97 -0
- flowtask/version.py +11 -0
- flowtask-5.8.4.dist-info/LICENSE +201 -0
- flowtask-5.8.4.dist-info/METADATA +209 -0
- flowtask-5.8.4.dist-info/RECORD +470 -0
- flowtask-5.8.4.dist-info/WHEEL +6 -0
- flowtask-5.8.4.dist-info/entry_points.txt +3 -0
- flowtask-5.8.4.dist-info/top_level.txt +2 -0
- plugins/components/CreateQR.py +39 -0
- plugins/components/TestComponent.py +28 -0
- plugins/components/Use1.py +13 -0
- plugins/components/Workplace.py +117 -0
- plugins/components/__init__.py +3 -0
- plugins/sources/__init__.py +0 -0
- plugins/sources/get_populartimes.py +78 -0
- plugins/sources/google.py +150 -0
- plugins/sources/hubspot.py +679 -0
- plugins/sources/icims.py +679 -0
- plugins/sources/mobileinsight.py +501 -0
- plugins/sources/newrelic.py +262 -0
- plugins/sources/uap.py +268 -0
- plugins/sources/venu.py +244 -0
- plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,323 @@
|
|
1
|
+
import asyncio
|
2
|
+
from collections.abc import Callable
|
3
|
+
from pathlib import PurePath
|
4
|
+
from concurrent.futures import ThreadPoolExecutor
|
5
|
+
from functools import partial
|
6
|
+
import aiofiles
|
7
|
+
from asyncdb.drivers.pg import pg
|
8
|
+
from asyncdb.exceptions import StatementError, DataError
|
9
|
+
from querysource.conf import (
|
10
|
+
default_dsn,
|
11
|
+
DB_TIMEOUT,
|
12
|
+
DB_STATEMENT_TIMEOUT,
|
13
|
+
DB_SESSION_TIMEOUT,
|
14
|
+
DB_KEEPALIVE_IDLE
|
15
|
+
)
|
16
|
+
from navconfig.logging import logging
|
17
|
+
from ..exceptions import ComponentError, FileError
|
18
|
+
from ..utils import SafeDict
|
19
|
+
# TODO: migrate to FileStore component
|
20
|
+
from .flow import FlowComponent
|
21
|
+
from ..interfaces import TemplateSupport
|
22
|
+
from ..conf import TASK_PATH
|
23
|
+
from ..interfaces.qs import QSSupport
|
24
|
+
|
25
|
+
|
26
|
+
class ExecuteSQL(QSSupport, FlowComponent, TemplateSupport):
|
27
|
+
"""
|
28
|
+
ExecuteSQL
|
29
|
+
|
30
|
+
Overview
|
31
|
+
|
32
|
+
Executes one or more SQL queries against a PostgreSQL database, also can execute SQL's in a file.
|
33
|
+
|
34
|
+
**Properties** (inherited from FlowComponent)
|
35
|
+
|
36
|
+
.. table:: Properties
|
37
|
+
:widths: auto
|
38
|
+
|
39
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
40
|
+
| Name | Required | Summary |
|
41
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
42
|
+
| skipError | No | The name of the database schema to use (default: ""). |
|
43
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
44
|
+
| sql | No | A raw SQL query string to execute. |
|
45
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
46
|
+
| file_sql | No | A path (string) or list of paths (strings) to SQL files |
|
47
|
+
| | | containing the queries to execute. |
|
48
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
49
|
+
| pattern | No | A dictionary mapping variable names to functions that return |
|
50
|
+
| | | the corresponding values to be used in the SQL query. |
|
51
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
52
|
+
| use_template | No | Whether to treat the SQL string as a template and use the |
|
53
|
+
| | | `_templateparser` component to render it (default: False). |
|
54
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
55
|
+
| multi | No | Whether to treat the `sql` property as a list of multiple |
|
56
|
+
| | | queries to execute sequentially (default: False). |
|
57
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
58
|
+
| exec_timeout | No | The timeout value for executing a single SQL query |
|
59
|
+
| | | (default: 3600 seconds). |
|
60
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
61
|
+
**Methods**
|
62
|
+
|
63
|
+
* open_sqlfile(self, file: PurePath, **kwargs) -> str: Opens an SQL file and returns its content.
|
64
|
+
* get_connection(self, event_loop: asyncio.AbstractEventLoop): Creates a connection pool to the PostgreSQL database.
|
65
|
+
* _execute(self, query, event_loop): Executes a single SQL query asynchronously.
|
66
|
+
* execute_sql(self, query: str, event_loop: asyncio.AbstractEventLoop) -> str: Executes an SQL query and returns the result.
|
67
|
+
|
68
|
+
**Notes**
|
69
|
+
|
70
|
+
* This component uses asynchronous functions for non-blocking I/O operations.
|
71
|
+
* Error handling is implemented to catch exceptions during database connection, SQL execution, and file operations.
|
72
|
+
* Supports loading SQL queries from files.
|
73
|
+
* Supports using templates for dynamic SQL generation.
|
74
|
+
* Supports executing multiple queries sequentially.
|
75
|
+
|
76
|
+
|
77
|
+
Example:
|
78
|
+
|
79
|
+
```yaml
|
80
|
+
ExecuteSQL:
|
81
|
+
file_sql: fill_employees.sql
|
82
|
+
```
|
83
|
+
|
84
|
+
""" # noqa
|
85
|
+
def __init__(
|
86
|
+
self,
|
87
|
+
loop: asyncio.AbstractEventLoop = None,
|
88
|
+
job: Callable = None,
|
89
|
+
stat: Callable = None,
|
90
|
+
**kwargs,
|
91
|
+
):
|
92
|
+
self.tablename: str = ""
|
93
|
+
self.schema: str = ""
|
94
|
+
self._connection: Callable = None
|
95
|
+
self._queries = []
|
96
|
+
self.exec_timeout: float = kwargs.pop(
|
97
|
+
"exec_timeout", 3600000.0
|
98
|
+
)
|
99
|
+
self._driver: str = kwargs.pop('driver', 'pg')
|
100
|
+
self.multi = bool(kwargs.pop('multi', False))
|
101
|
+
self.credentials = kwargs.pop('credentials', {})
|
102
|
+
self.use_template: bool = bool(kwargs.get('use_template', False))
|
103
|
+
self.use_dataframe: bool = bool(kwargs.get('use_dataframe', False))
|
104
|
+
super().__init__(loop=loop, job=job, stat=stat, **kwargs)
|
105
|
+
# set the program:
|
106
|
+
if hasattr(self, "program"):
|
107
|
+
self._program = self.program
|
108
|
+
|
109
|
+
async def close(self):
|
110
|
+
"""Closing Database Connection."""
|
111
|
+
pass
|
112
|
+
|
113
|
+
async def open_sqlfile(self, file: PurePath, **kwargs) -> str:
|
114
|
+
content = None
|
115
|
+
self._logger.info(f"Open SQL File: {file}")
|
116
|
+
if file.exists() and file.is_file():
|
117
|
+
# open SQL File:
|
118
|
+
async with aiofiles.open(file, "r+") as afp:
|
119
|
+
content = await afp.read()
|
120
|
+
# check if we need to replace masks
|
121
|
+
if "{" in content:
|
122
|
+
content = self.mask_replacement(content)
|
123
|
+
if self.use_template is True:
|
124
|
+
content = self._templateparser.from_string(content, kwargs)
|
125
|
+
return content
|
126
|
+
else:
|
127
|
+
raise FileError(f"ExecuteSQL: Missing SQL File: {file}")
|
128
|
+
|
129
|
+
async def start(self, **kwargs):
|
130
|
+
"""Start Component"""
|
131
|
+
self.processing_credentials()
|
132
|
+
if self.previous:
|
133
|
+
self.data = self.input
|
134
|
+
# check if sql comes from a filename:
|
135
|
+
if hasattr(self, "file_sql"):
|
136
|
+
self._logger.debug(f"SQL File: {self.file_sql}")
|
137
|
+
self._queries = []
|
138
|
+
qs = []
|
139
|
+
if isinstance(self.file_sql, str):
|
140
|
+
qs.append(self.file_sql)
|
141
|
+
elif isinstance(self.file_sql, list):
|
142
|
+
qs = self.file_sql
|
143
|
+
else:
|
144
|
+
raise ComponentError(
|
145
|
+
"ExecuteSQL: Unknown type for *file_sql* attribute."
|
146
|
+
)
|
147
|
+
for fs in qs:
|
148
|
+
self._logger.debug(f"Execute SQL File: {fs!s}")
|
149
|
+
file_path = TASK_PATH.joinpath(self._program, "sql", fs)
|
150
|
+
try:
|
151
|
+
sql = await self.open_sqlfile(file_path)
|
152
|
+
self._queries.append(sql)
|
153
|
+
except Exception as err:
|
154
|
+
raise ComponentError(f"{err}") from err
|
155
|
+
if hasattr(self, "pattern"):
|
156
|
+
# need to parse variables in SQL
|
157
|
+
pattern = self.pattern
|
158
|
+
self._queries = []
|
159
|
+
try:
|
160
|
+
variables = {}
|
161
|
+
for field, val in pattern.items():
|
162
|
+
variables[field] = self.getFunc(val)
|
163
|
+
except (TypeError, AttributeError) as err:
|
164
|
+
self._logger.error(err)
|
165
|
+
# replace all ocurrences on SQL
|
166
|
+
try:
|
167
|
+
# TODO: capture when sql is a list of queries
|
168
|
+
sql = self.sql.format_map(SafeDict(**variables))
|
169
|
+
# Replace variables
|
170
|
+
for val in self._variables:
|
171
|
+
if isinstance(self._variables[val], list):
|
172
|
+
if isinstance(self._variables[val], int):
|
173
|
+
self._variables[val] = ", ".join(self._variables[val])
|
174
|
+
else:
|
175
|
+
self._variables[val] = ", ".join(
|
176
|
+
"'{}'".format(v) for v in self._variables[val]
|
177
|
+
)
|
178
|
+
sql = sql.replace(
|
179
|
+
"{{{}}}".format(str(val)), str(self._variables[val])
|
180
|
+
)
|
181
|
+
self._queries.append(sql)
|
182
|
+
except Exception as err:
|
183
|
+
logging.exception(err, stack_info=True)
|
184
|
+
if hasattr(self, "sql"):
|
185
|
+
if isinstance(self.sql, str):
|
186
|
+
self._queries = [self.sql]
|
187
|
+
elif isinstance(self.sql, list):
|
188
|
+
self._queries = self.sql
|
189
|
+
# Replace variables
|
190
|
+
for val in self._variables:
|
191
|
+
sqls = []
|
192
|
+
for sql in self._queries:
|
193
|
+
if isinstance(self._variables[val], list):
|
194
|
+
if isinstance(self._variables[val], int):
|
195
|
+
self._variables[val] = ", ".join(self._variables[val])
|
196
|
+
else:
|
197
|
+
self._variables[val] = ", ".join(
|
198
|
+
"'{}'".format(v) for v in self._variables[val]
|
199
|
+
)
|
200
|
+
sql = sql.replace("{{{}}}".format(str(val)), str(self._variables[val]))
|
201
|
+
if hasattr(self, 'masks'):
|
202
|
+
sql = self.mask_replacement(sql)
|
203
|
+
sqls.append(sql)
|
204
|
+
self._queries = sqls
|
205
|
+
return True
|
206
|
+
|
207
|
+
async def _execute(self, query, event_loop):
|
208
|
+
try:
|
209
|
+
connection = await self.create_connection(
|
210
|
+
driver=self._driver
|
211
|
+
)
|
212
|
+
async with await connection.connection() as conn:
|
213
|
+
if hasattr(self, "background"):
|
214
|
+
future = asyncio.create_task(conn.execute(query))
|
215
|
+
# query will be executed in background
|
216
|
+
_, pending = await asyncio.wait(
|
217
|
+
[future], timeout=self.exec_timeout, return_when="ALL_COMPLETED"
|
218
|
+
)
|
219
|
+
if future in pending:
|
220
|
+
## task reachs timeout
|
221
|
+
for t in pending:
|
222
|
+
t.cancel()
|
223
|
+
raise asyncio.TimeoutError(
|
224
|
+
f"Query {query!s} was cancelled due timeout."
|
225
|
+
)
|
226
|
+
result, error = future.result()
|
227
|
+
else:
|
228
|
+
try:
|
229
|
+
result, error = await conn.execute(query)
|
230
|
+
except asyncio.TimeoutError as exc:
|
231
|
+
raise asyncio.TimeoutError(
|
232
|
+
f"Query {query!s} was cancelled due Timeout."
|
233
|
+
) from exc
|
234
|
+
except Exception as exc:
|
235
|
+
raise ComponentError(f"ExecuteSQL Error: {exc!s}") from exc
|
236
|
+
if error:
|
237
|
+
raise ComponentError(
|
238
|
+
f"Execute SQL error: {result!s} err: {error!s}"
|
239
|
+
)
|
240
|
+
else:
|
241
|
+
if self._driver == 'bigquery':
|
242
|
+
return next(iter(result))
|
243
|
+
return result
|
244
|
+
except StatementError as err:
|
245
|
+
raise StatementError(f"Statement error: {err}") from err
|
246
|
+
except DataError as err:
|
247
|
+
raise DataError(f"Data error: {err}") from err
|
248
|
+
except ComponentError:
|
249
|
+
raise
|
250
|
+
except Exception as err:
|
251
|
+
raise ComponentError(f"ExecuteSQL error: {err}") from err
|
252
|
+
finally:
|
253
|
+
connection = None
|
254
|
+
|
255
|
+
def execute_sql(self, query: str, event_loop: asyncio.AbstractEventLoop) -> str:
|
256
|
+
asyncio.set_event_loop(event_loop)
|
257
|
+
if self._debug:
|
258
|
+
self._logger.verbose(f"::: Exec SQL: {query}")
|
259
|
+
future = event_loop.create_task(self._execute(query, event_loop))
|
260
|
+
try:
|
261
|
+
result = event_loop.run_until_complete(future)
|
262
|
+
st = {"sql": query, "result": result}
|
263
|
+
self.add_metric("EXECUTED", st)
|
264
|
+
return result
|
265
|
+
except Exception as err:
|
266
|
+
self.add_metric("QUERY_ERROR", str(err))
|
267
|
+
self._logger.error(f"{err}")
|
268
|
+
|
269
|
+
async def run(self):
|
270
|
+
"""Run Raw SQL functionality."""
|
271
|
+
try:
|
272
|
+
_new = True
|
273
|
+
event_loop = asyncio.new_event_loop()
|
274
|
+
except RuntimeError:
|
275
|
+
event_loop = asyncio.get_running_loop()
|
276
|
+
_new = False
|
277
|
+
ct = len(self._queries)
|
278
|
+
if ct <= 0:
|
279
|
+
ct = 1
|
280
|
+
result = []
|
281
|
+
try:
|
282
|
+
loop = asyncio.get_event_loop()
|
283
|
+
asyncio.set_event_loop(loop)
|
284
|
+
with ThreadPoolExecutor(max_workers=ct) as executor:
|
285
|
+
for query in self._queries:
|
286
|
+
if self.use_dataframe is True:
|
287
|
+
# Execute the Query for every row in dataframe:
|
288
|
+
if self.data is not None:
|
289
|
+
for _, row in self.data.iterrows():
|
290
|
+
# Replace variables in SQL with values from dataframe
|
291
|
+
# row to dict:
|
292
|
+
data = row.to_dict()
|
293
|
+
sql = query.format_map(SafeDict(**data))
|
294
|
+
# Execute the SQL
|
295
|
+
fn = partial(self.execute_sql, sql, event_loop)
|
296
|
+
try:
|
297
|
+
res = await loop.run_in_executor(executor, fn)
|
298
|
+
result.append(res)
|
299
|
+
except Exception as err:
|
300
|
+
self._logger.error(
|
301
|
+
f"ExecuteSQL error on query {sql!s}: {err!s}"
|
302
|
+
)
|
303
|
+
else:
|
304
|
+
# Execute the SQL
|
305
|
+
fn = partial(self.execute_sql, query, event_loop)
|
306
|
+
res = await loop.run_in_executor(executor, fn)
|
307
|
+
result.append(res)
|
308
|
+
except ComponentError:
|
309
|
+
raise
|
310
|
+
except Exception as err:
|
311
|
+
raise ComponentError(f"{err}") from err
|
312
|
+
finally:
|
313
|
+
try:
|
314
|
+
if _new is True:
|
315
|
+
event_loop.close()
|
316
|
+
except Exception:
|
317
|
+
pass
|
318
|
+
# returning the previous data:
|
319
|
+
if self.data is not None:
|
320
|
+
self._result = self.data
|
321
|
+
else:
|
322
|
+
self._result = result
|
323
|
+
return self._result
|
@@ -0,0 +1,178 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
import asyncio
|
4
|
+
from datetime import datetime
|
5
|
+
from typing import Optional
|
6
|
+
from collections.abc import Callable
|
7
|
+
from bs4 import BeautifulSoup
|
8
|
+
from lxml import etree
|
9
|
+
from .flow import FlowComponent
|
10
|
+
from ..interfaces.dataframes import PandasDataframe
|
11
|
+
|
12
|
+
|
13
|
+
class ExtractHTML(FlowComponent, PandasDataframe):
|
14
|
+
"""
|
15
|
+
ExtractHTML
|
16
|
+
|
17
|
+
Overview:
|
18
|
+
Extract HTML using XPATH or BS CSS Selectors.
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
Example:
|
23
|
+
|
24
|
+
```yaml
|
25
|
+
ExtractHTML:
|
26
|
+
custom_parser: trustpilot_reviews
|
27
|
+
as_dataframe: true
|
28
|
+
```
|
29
|
+
|
30
|
+
"""
|
31
|
+
def __init__(
|
32
|
+
self,
|
33
|
+
loop: asyncio.AbstractEventLoop = None,
|
34
|
+
job: Callable = None,
|
35
|
+
stat: Callable = None,
|
36
|
+
**kwargs,
|
37
|
+
):
|
38
|
+
self._xpath: Optional[str] = kwargs.get('xpath', None)
|
39
|
+
super().__init__(loop=loop, job=job, stat=stat, **kwargs)
|
40
|
+
|
41
|
+
async def start(self, **kwargs):
|
42
|
+
if self.previous:
|
43
|
+
self._input = self.input
|
44
|
+
|
45
|
+
if not self._input:
|
46
|
+
raise ValueError("No input provided.")
|
47
|
+
|
48
|
+
if not isinstance(self._input, dict):
|
49
|
+
raise TypeError("Input must be a dictionary.")
|
50
|
+
|
51
|
+
async def close(self):
|
52
|
+
pass
|
53
|
+
|
54
|
+
def get_soup(self, content: str, parser: str = 'html.parser'):
|
55
|
+
"""Get a BeautifulSoup Object."""
|
56
|
+
return BeautifulSoup(content, parser)
|
57
|
+
|
58
|
+
def trustpilot_reviews(self, xml_obj, **kwargs):
|
59
|
+
xpath = '//article[@data-service-review-card-paper="true"]'
|
60
|
+
# Extract using XPATH
|
61
|
+
elements = xml_obj.xpath(xpath)
|
62
|
+
results = []
|
63
|
+
date_formats = [
|
64
|
+
"%b. %d, %Y",
|
65
|
+
"%B %d, %Y",
|
66
|
+
"%b %d, %Y",
|
67
|
+
]
|
68
|
+
for el in elements:
|
69
|
+
soup = self.get_soup(etree.tostring(el))
|
70
|
+
# Extract using BS CSS Selectors
|
71
|
+
consumer_profile = soup.find('a', {"name": "consumer-profile"})
|
72
|
+
username = consumer_profile.find('span').get_text(strip=True)
|
73
|
+
user_url = consumer_profile.get('href')
|
74
|
+
rating_div = soup.find('div', {"data-service-review-rating": True})
|
75
|
+
# Extract title
|
76
|
+
title_h2 = soup.find("h2", {"data-service-review-title-typography": "true"})
|
77
|
+
review_title = title_h2.get_text(strip=True) if title_h2 else None
|
78
|
+
rating = rating_div.get('data-service-review-rating')
|
79
|
+
review_body = soup.find('p', {"data-service-review-text-typography": "true"})
|
80
|
+
review = review_body.get_text(strip=True) if review_body else None
|
81
|
+
review_date_p = soup.find('p', {"data-service-review-date-of-experience-typography": "true"})
|
82
|
+
review_date_str = review_date_p.get_text(strip=True) if review_date_p else None
|
83
|
+
review_date = None
|
84
|
+
if review_date_str:
|
85
|
+
if ":" in review_date_str:
|
86
|
+
review_date_str = review_date_str.split(":", 1)[1].strip()
|
87
|
+
for date_format in date_formats:
|
88
|
+
try:
|
89
|
+
review_date = datetime.strptime(review_date_str, date_format)
|
90
|
+
break
|
91
|
+
except ValueError:
|
92
|
+
continue
|
93
|
+
|
94
|
+
if not review_date:
|
95
|
+
print(f"Error to convert the date '{review_date_str}'")
|
96
|
+
if not review:
|
97
|
+
continue
|
98
|
+
results.append(
|
99
|
+
{
|
100
|
+
"origin": "trustpilot",
|
101
|
+
"username": username,
|
102
|
+
#"user_url": user_url,
|
103
|
+
"review_date": review_date,
|
104
|
+
"rating": rating,
|
105
|
+
#"title": review_title,
|
106
|
+
"review": review
|
107
|
+
}
|
108
|
+
)
|
109
|
+
return results
|
110
|
+
|
111
|
+
def consumeraffairs_reviews(self, xml_obj, **kwargs):
|
112
|
+
xpath = '//div[@id="reviews-container"]//div[@itemprop="reviews"]'
|
113
|
+
elements = xml_obj.xpath(xpath)
|
114
|
+
results = []
|
115
|
+
date_formats = [
|
116
|
+
"%b. %d, %Y",
|
117
|
+
"%B %d, %Y",
|
118
|
+
"%b %d, %Y",
|
119
|
+
]
|
120
|
+
for el in elements:
|
121
|
+
soup = self.get_soup(etree.tostring(el))
|
122
|
+
consumer_profile = soup.find('span', class_='rvw__inf-nm')
|
123
|
+
username = consumer_profile.get_text(strip=True) if consumer_profile else None
|
124
|
+
rating_div = soup.find('div', class_='rvw__hdr-stat')
|
125
|
+
rating_meta = rating_div.find('meta', itemprop='ratingValue') if rating_div else None
|
126
|
+
rating = rating_meta.get('content') if rating_meta else None
|
127
|
+
review_body_tag = soup.find('div', class_='rvw__top-text')
|
128
|
+
review = review_body_tag.get_text(strip=True) if review_body_tag else None
|
129
|
+
date_tag = soup.find('p', class_='rvw__rvd-dt')
|
130
|
+
review_date_str = date_tag.get_text(strip=True).replace("Reviewed ", "") if date_tag else None
|
131
|
+
|
132
|
+
review_date = None
|
133
|
+
if review_date_str:
|
134
|
+
review_date_str = review_date_str.replace("Reviewed ", "").replace("Updated review: ", "").replace("Original Review: ", "")
|
135
|
+
review_date_str = review_date_str.replace("Sept.", "Sep.")
|
136
|
+
for date_format in date_formats:
|
137
|
+
try:
|
138
|
+
review_date = datetime.strptime(review_date_str, date_format)
|
139
|
+
break
|
140
|
+
except ValueError:
|
141
|
+
continue
|
142
|
+
|
143
|
+
if not review_date:
|
144
|
+
print(f"Error to convert the date '{review_date_str}'")
|
145
|
+
|
146
|
+
if not review:
|
147
|
+
continue
|
148
|
+
results.append(
|
149
|
+
{
|
150
|
+
"origin": "consumeraffairs",
|
151
|
+
"username": username,
|
152
|
+
"review_date": review_date,
|
153
|
+
"rating": rating,
|
154
|
+
"review": review
|
155
|
+
}
|
156
|
+
)
|
157
|
+
return results
|
158
|
+
|
159
|
+
async def run(self):
|
160
|
+
results = []
|
161
|
+
parser = None
|
162
|
+
if hasattr(self, 'custom_parser'):
|
163
|
+
parser = getattr(self, self.custom_parser, None)
|
164
|
+
for filename, result in self._input.items():
|
165
|
+
html_obj = result.get('html', None)
|
166
|
+
if html_obj is None:
|
167
|
+
raise ValueError("No HTML object found.")
|
168
|
+
if parser:
|
169
|
+
results += parser(html_obj)
|
170
|
+
else:
|
171
|
+
# Extract using BS CSS Selectors
|
172
|
+
pass
|
173
|
+
if getattr(self, 'as_dataframe', False) is True:
|
174
|
+
df = await self.create_dataframe(results)
|
175
|
+
self._result = df
|
176
|
+
else:
|
177
|
+
self._result = results
|
178
|
+
return self._result
|
@@ -0,0 +1,178 @@
|
|
1
|
+
from abc import abstractmethod
|
2
|
+
import logging
|
3
|
+
import asyncio
|
4
|
+
import glob
|
5
|
+
from pathlib import PurePath, Path
|
6
|
+
from collections.abc import Callable
|
7
|
+
from ..exceptions import FileNotFound, ComponentError
|
8
|
+
from ..utils.mail import MailMessage
|
9
|
+
from ..utils import check_empty
|
10
|
+
from .flow import FlowComponent
|
11
|
+
|
12
|
+
|
13
|
+
class FileBase(FlowComponent):
|
14
|
+
"""
|
15
|
+
FileBase
|
16
|
+
|
17
|
+
**Overview**
|
18
|
+
|
19
|
+
Abstract base class for file-based components.
|
20
|
+
|
21
|
+
**Properties** (inherited from FlowComponent)
|
22
|
+
|
23
|
+
.. table:: Properties
|
24
|
+
:widths: auto
|
25
|
+
|
26
|
+
+--------------------+----------+-----------+------------------------------------------------------------+
|
27
|
+
| Name | Required | Summary |
|
28
|
+
+--------------------+----------+-----------+------------------------------------------------------------+
|
29
|
+
| create_destination | No | Boolean flag indicating whether to create the destination |
|
30
|
+
| | | directory if it doesn't exist (default: True). |
|
31
|
+
+--------------------+----------+-----------+------------------------------------------------------------+
|
32
|
+
| directory | Yes | The path to the directory containing the files. |
|
33
|
+
+--------------------+----------+-----------+------------------------------------------------------------+
|
34
|
+
| filename | No | A filename (string), list of filenames (strings), or a |
|
35
|
+
| | | glob pattern (string) to identify files. |
|
36
|
+
+--------------------+----------+-----------+------------------------------------------------------------+
|
37
|
+
| file | No | A pattern or filename (string) to identify files. |
|
38
|
+
| | | (This property takes precedence over 'filename' if both are specified.)|
|
39
|
+
+--------------------+----------+-----------+------------------------------------------------------------+
|
40
|
+
"""
|
41
|
+
|
42
|
+
def __init__(
|
43
|
+
self,
|
44
|
+
loop: asyncio.AbstractEventLoop = None,
|
45
|
+
job: Callable = None,
|
46
|
+
stat: Callable = None,
|
47
|
+
**kwargs,
|
48
|
+
):
|
49
|
+
"""Init Method."""
|
50
|
+
# self.directory: str = None
|
51
|
+
self._filenames: list[PurePath] = []
|
52
|
+
self._path: str = None
|
53
|
+
super(FileBase, self).__init__(loop=loop, job=job, stat=stat, **kwargs)
|
54
|
+
|
55
|
+
async def start(self, **kwargs):
|
56
|
+
"""Check for File and Directory information."""
|
57
|
+
await super().start(**kwargs)
|
58
|
+
try:
|
59
|
+
if hasattr(self, "directory"):
|
60
|
+
try:
|
61
|
+
directory = Path(self.directory)
|
62
|
+
if not directory.is_absolute():
|
63
|
+
self.directory = self._filestore.get_directory(self.directory)
|
64
|
+
else:
|
65
|
+
self.directory = directory
|
66
|
+
except TypeError:
|
67
|
+
self.directory = self._filestore.default_directory("/")
|
68
|
+
# check for filename:
|
69
|
+
if self.previous and not check_empty(self.input):
|
70
|
+
if (
|
71
|
+
not hasattr(self, "ignore_previous")
|
72
|
+
or self.ignore_previous is False
|
73
|
+
):
|
74
|
+
if not isinstance(self.previous, FileBase):
|
75
|
+
# avoid chaining components
|
76
|
+
if isinstance(self.input, list):
|
77
|
+
if isinstance(self.input[0], MailMessage):
|
78
|
+
self._filenames = []
|
79
|
+
for file in self.input:
|
80
|
+
for f in file.attachments:
|
81
|
+
fname = f["filename"]
|
82
|
+
logging.debug(
|
83
|
+
f"File: Detected attachment: {fname}"
|
84
|
+
)
|
85
|
+
self._filenames.append(fname)
|
86
|
+
elif "files" in self.input:
|
87
|
+
self._filenames = self.input["files"]
|
88
|
+
else:
|
89
|
+
self._filenames = self.input
|
90
|
+
return True
|
91
|
+
elif isinstance(self.input, dict):
|
92
|
+
if "files" in self.input:
|
93
|
+
# there is a "files" attribute in dictionary:
|
94
|
+
self._filenames = self.input["files"]
|
95
|
+
else:
|
96
|
+
self._filenames = [self.input]
|
97
|
+
return True
|
98
|
+
if hasattr(self, "filename"):
|
99
|
+
if isinstance(self.filename, list):
|
100
|
+
for file in self.filename:
|
101
|
+
self._filenames.append(self.directory.joinpath(file))
|
102
|
+
elif isinstance(self.filename, PurePath):
|
103
|
+
self._filenames.append(self.filename)
|
104
|
+
elif isinstance(self.filename, str):
|
105
|
+
if "*" in self.filename:
|
106
|
+
# is a glob list of files
|
107
|
+
path = self.directory.joinpath(self.filename)
|
108
|
+
listing = glob.glob(str(path)) # TODO using glob from pathlib
|
109
|
+
for fname in listing:
|
110
|
+
logging.debug(f"Filename > {fname}")
|
111
|
+
self._filenames.append(fname)
|
112
|
+
else:
|
113
|
+
self.filename = self.mask_replacement(self.filename)
|
114
|
+
self._path = self.directory.joinpath(self.filename)
|
115
|
+
self._filenames.append(self._path)
|
116
|
+
return True
|
117
|
+
elif hasattr(self, "file"):
|
118
|
+
filename = self.process_pattern("file")
|
119
|
+
if hasattr(self, "masks"):
|
120
|
+
filename = self.mask_replacement(filename)
|
121
|
+
# path for file
|
122
|
+
self._path = self.directory.joinpath(filename)
|
123
|
+
listing = glob.glob(str(self._path))
|
124
|
+
if not listing:
|
125
|
+
raise FileNotFound(
|
126
|
+
f"FileExists: There are no files in {self._path}"
|
127
|
+
)
|
128
|
+
for fname in listing:
|
129
|
+
logging.debug(f"Filename > {fname}")
|
130
|
+
self._filenames.append(fname)
|
131
|
+
logging.debug(f" ::: Checking for Files: {self._filenames}")
|
132
|
+
return True
|
133
|
+
except (FileNotFound, ComponentError):
|
134
|
+
raise
|
135
|
+
except Exception as err:
|
136
|
+
raise ComponentError(
|
137
|
+
f"File: Invalid Arguments: {err!s}"
|
138
|
+
) from err
|
139
|
+
|
140
|
+
def get_filelist(self) -> list[PurePath]:
|
141
|
+
"""
|
142
|
+
Retrieves a list of files based on the component's configuration.
|
143
|
+
|
144
|
+
This method determines the list of files to process based on the component's
|
145
|
+
attributes such as 'pattern', 'file', or 'filename'. It applies any masks or
|
146
|
+
variables to the file patterns if specified.
|
147
|
+
|
148
|
+
Returns:
|
149
|
+
list[PurePath]: A list of PurePath objects representing the files to be processed.
|
150
|
+
If no specific pattern or filename is set, it returns all files
|
151
|
+
in the component's directory.
|
152
|
+
"""
|
153
|
+
if hasattr(self, "pattern"):
|
154
|
+
value = self.pattern
|
155
|
+
if hasattr(self, "masks"):
|
156
|
+
for mask, replace in self._mask.items():
|
157
|
+
value = str(value).replace(mask, replace)
|
158
|
+
if self._variables:
|
159
|
+
value = value.format(**self._variables)
|
160
|
+
files = (f for f in self.directory.glob(value))
|
161
|
+
elif hasattr(self, "file"):
|
162
|
+
# using pattern/file version
|
163
|
+
value = self.get_filepattern()
|
164
|
+
files = (f for f in self.directory.glob(value))
|
165
|
+
elif hasattr(self, "filename"):
|
166
|
+
# already discovered by start:
|
167
|
+
files = self._filenames
|
168
|
+
else:
|
169
|
+
files = (f for f in self.directory.iterdir() if f.is_file())
|
170
|
+
return files
|
171
|
+
|
172
|
+
@abstractmethod
|
173
|
+
async def run(self):
|
174
|
+
"""Run File checking."""
|
175
|
+
|
176
|
+
@abstractmethod
|
177
|
+
async def close(self):
|
178
|
+
"""Method."""
|