flowtask 5.8.4__cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowtask/__init__.py +93 -0
- flowtask/__main__.py +38 -0
- flowtask/bots/__init__.py +6 -0
- flowtask/bots/check.py +93 -0
- flowtask/bots/codebot.py +51 -0
- flowtask/components/ASPX.py +148 -0
- flowtask/components/AddDataset.py +352 -0
- flowtask/components/Amazon.py +523 -0
- flowtask/components/AutoTask.py +314 -0
- flowtask/components/Azure.py +80 -0
- flowtask/components/AzureUsers.py +106 -0
- flowtask/components/BaseAction.py +91 -0
- flowtask/components/BaseLoop.py +198 -0
- flowtask/components/BestBuy.py +800 -0
- flowtask/components/CSVToGCS.py +120 -0
- flowtask/components/CompanyScraper/__init__.py +1 -0
- flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
- flowtask/components/CompanyScraper/parsers/base.py +102 -0
- flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
- flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
- flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
- flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
- flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
- flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
- flowtask/components/CompanyScraper/scrapper.py +1054 -0
- flowtask/components/CopyTo.py +177 -0
- flowtask/components/CopyToBigQuery.py +243 -0
- flowtask/components/CopyToMongoDB.py +291 -0
- flowtask/components/CopyToPg.py +609 -0
- flowtask/components/CopyToRethink.py +207 -0
- flowtask/components/CreateGCSBucket.py +102 -0
- flowtask/components/CreateReport/CreateReport.py +228 -0
- flowtask/components/CreateReport/__init__.py +9 -0
- flowtask/components/CreateReport/charts/__init__.py +15 -0
- flowtask/components/CreateReport/charts/bar.py +51 -0
- flowtask/components/CreateReport/charts/base.py +66 -0
- flowtask/components/CreateReport/charts/pie.py +64 -0
- flowtask/components/CreateReport/utils.py +9 -0
- flowtask/components/CustomerSatisfaction.py +196 -0
- flowtask/components/DataInput.py +200 -0
- flowtask/components/DateList.py +255 -0
- flowtask/components/DbClient.py +163 -0
- flowtask/components/DialPad.py +146 -0
- flowtask/components/DocumentDBQuery.py +200 -0
- flowtask/components/DownloadFrom.py +371 -0
- flowtask/components/DownloadFromD2L.py +113 -0
- flowtask/components/DownloadFromFTP.py +181 -0
- flowtask/components/DownloadFromIMAP.py +315 -0
- flowtask/components/DownloadFromS3.py +198 -0
- flowtask/components/DownloadFromSFTP.py +265 -0
- flowtask/components/DownloadFromSharepoint.py +110 -0
- flowtask/components/DownloadFromSmartSheet.py +114 -0
- flowtask/components/DownloadS3File.py +229 -0
- flowtask/components/Dummy.py +59 -0
- flowtask/components/DuplicatePhoto.py +411 -0
- flowtask/components/EmployeeEvaluation.py +237 -0
- flowtask/components/ExecuteSQL.py +323 -0
- flowtask/components/ExtractHTML.py +178 -0
- flowtask/components/FileBase.py +178 -0
- flowtask/components/FileCopy.py +181 -0
- flowtask/components/FileDelete.py +82 -0
- flowtask/components/FileExists.py +146 -0
- flowtask/components/FileIteratorDelete.py +112 -0
- flowtask/components/FileList.py +194 -0
- flowtask/components/FileOpen.py +75 -0
- flowtask/components/FileRead.py +120 -0
- flowtask/components/FileRename.py +106 -0
- flowtask/components/FilterIf.py +284 -0
- flowtask/components/FilterRows/FilterRows.py +200 -0
- flowtask/components/FilterRows/__init__.py +10 -0
- flowtask/components/FilterRows/functions.py +4 -0
- flowtask/components/GCSToBigQuery.py +103 -0
- flowtask/components/GoogleA4.py +150 -0
- flowtask/components/GoogleGeoCoding.py +344 -0
- flowtask/components/GooglePlaces.py +315 -0
- flowtask/components/GoogleSearch.py +539 -0
- flowtask/components/HTTPClient.py +268 -0
- flowtask/components/ICIMS.py +146 -0
- flowtask/components/IF.py +179 -0
- flowtask/components/IcimsFolderCopy.py +173 -0
- flowtask/components/ImageFeatures/__init__.py +5 -0
- flowtask/components/ImageFeatures/process.py +233 -0
- flowtask/components/IteratorBase.py +251 -0
- flowtask/components/LangchainLoader/__init__.py +5 -0
- flowtask/components/LangchainLoader/loader.py +194 -0
- flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
- flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
- flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
- flowtask/components/LangchainLoader/loaders/docx.py +91 -0
- flowtask/components/LangchainLoader/loaders/html.py +119 -0
- flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
- flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
- flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
- flowtask/components/LangchainLoader/loaders/qa.py +67 -0
- flowtask/components/LangchainLoader/loaders/txt.py +55 -0
- flowtask/components/LeadIQ.py +650 -0
- flowtask/components/Loop.py +253 -0
- flowtask/components/Lowes.py +334 -0
- flowtask/components/MS365Usage.py +156 -0
- flowtask/components/MSTeamsMessages.py +320 -0
- flowtask/components/MarketClustering.py +1051 -0
- flowtask/components/MergeFiles.py +362 -0
- flowtask/components/MilvusOutput.py +87 -0
- flowtask/components/NearByStores.py +175 -0
- flowtask/components/NetworkNinja/__init__.py +6 -0
- flowtask/components/NetworkNinja/models/__init__.py +52 -0
- flowtask/components/NetworkNinja/models/abstract.py +177 -0
- flowtask/components/NetworkNinja/models/account.py +39 -0
- flowtask/components/NetworkNinja/models/client.py +19 -0
- flowtask/components/NetworkNinja/models/district.py +14 -0
- flowtask/components/NetworkNinja/models/events.py +101 -0
- flowtask/components/NetworkNinja/models/forms.py +499 -0
- flowtask/components/NetworkNinja/models/market.py +16 -0
- flowtask/components/NetworkNinja/models/organization.py +34 -0
- flowtask/components/NetworkNinja/models/photos.py +125 -0
- flowtask/components/NetworkNinja/models/project.py +44 -0
- flowtask/components/NetworkNinja/models/region.py +28 -0
- flowtask/components/NetworkNinja/models/store.py +203 -0
- flowtask/components/NetworkNinja/models/user.py +151 -0
- flowtask/components/NetworkNinja/router.py +854 -0
- flowtask/components/Odoo.py +175 -0
- flowtask/components/OdooInjector.py +192 -0
- flowtask/components/OpenFromXML.py +126 -0
- flowtask/components/OpenWeather.py +41 -0
- flowtask/components/OpenWithBase.py +616 -0
- flowtask/components/OpenWithPandas.py +715 -0
- flowtask/components/PGPDecrypt.py +199 -0
- flowtask/components/PandasIterator.py +187 -0
- flowtask/components/PandasToFile.py +189 -0
- flowtask/components/Paradox.py +339 -0
- flowtask/components/ParamIterator.py +117 -0
- flowtask/components/ParseHTML.py +84 -0
- flowtask/components/PlacerStores.py +249 -0
- flowtask/components/Pokemon.py +507 -0
- flowtask/components/PositiveBot.py +62 -0
- flowtask/components/PowerPointSlide.py +400 -0
- flowtask/components/PrintMessage.py +127 -0
- flowtask/components/ProductCompetitors/__init__.py +5 -0
- flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
- flowtask/components/ProductCompetitors/parsers/base.py +72 -0
- flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
- flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
- flowtask/components/ProductCompetitors/scrapper.py +155 -0
- flowtask/components/ProductCompliant.py +169 -0
- flowtask/components/ProductInfo/__init__.py +1 -0
- flowtask/components/ProductInfo/parsers/__init__.py +5 -0
- flowtask/components/ProductInfo/parsers/base.py +83 -0
- flowtask/components/ProductInfo/parsers/brother.py +97 -0
- flowtask/components/ProductInfo/parsers/canon.py +167 -0
- flowtask/components/ProductInfo/parsers/epson.py +118 -0
- flowtask/components/ProductInfo/parsers/hp.py +131 -0
- flowtask/components/ProductInfo/parsers/samsung.py +97 -0
- flowtask/components/ProductInfo/scraper.py +319 -0
- flowtask/components/ProductPricing.py +118 -0
- flowtask/components/QS.py +261 -0
- flowtask/components/QSBase.py +201 -0
- flowtask/components/QueryIterator.py +273 -0
- flowtask/components/QueryToInsert.py +327 -0
- flowtask/components/QueryToPandas.py +432 -0
- flowtask/components/RESTClient.py +195 -0
- flowtask/components/RethinkDBQuery.py +189 -0
- flowtask/components/Rsync.py +74 -0
- flowtask/components/RunSSH.py +59 -0
- flowtask/components/RunShell.py +71 -0
- flowtask/components/SalesForce.py +20 -0
- flowtask/components/SaveImageBank/__init__.py +257 -0
- flowtask/components/SchedulingVisits.py +592 -0
- flowtask/components/ScrapPage.py +216 -0
- flowtask/components/ScrapSearch.py +79 -0
- flowtask/components/SendNotify.py +257 -0
- flowtask/components/SentimentAnalysis.py +694 -0
- flowtask/components/ServiceScrapper/__init__.py +5 -0
- flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
- flowtask/components/ServiceScrapper/parsers/base.py +94 -0
- flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
- flowtask/components/ServiceScrapper/scrapper.py +199 -0
- flowtask/components/SetVariables.py +156 -0
- flowtask/components/SubTask.py +182 -0
- flowtask/components/SuiteCRM.py +48 -0
- flowtask/components/Switch.py +175 -0
- flowtask/components/TableBase.py +148 -0
- flowtask/components/TableDelete.py +312 -0
- flowtask/components/TableInput.py +143 -0
- flowtask/components/TableOutput/TableOutput.py +384 -0
- flowtask/components/TableOutput/__init__.py +3 -0
- flowtask/components/TableSchema.py +534 -0
- flowtask/components/Target.py +223 -0
- flowtask/components/ThumbnailGenerator.py +156 -0
- flowtask/components/ToPandas.py +67 -0
- flowtask/components/TransformRows/TransformRows.py +507 -0
- flowtask/components/TransformRows/__init__.py +9 -0
- flowtask/components/TransformRows/functions.py +559 -0
- flowtask/components/TransposeRows.py +176 -0
- flowtask/components/UPCDatabase.py +86 -0
- flowtask/components/UnGzip.py +171 -0
- flowtask/components/Uncompress.py +172 -0
- flowtask/components/UniqueRows.py +126 -0
- flowtask/components/Unzip.py +107 -0
- flowtask/components/UpdateOperationalVars.py +147 -0
- flowtask/components/UploadTo.py +299 -0
- flowtask/components/UploadToS3.py +136 -0
- flowtask/components/UploadToSFTP.py +160 -0
- flowtask/components/UploadToSharepoint.py +205 -0
- flowtask/components/UserFunc.py +122 -0
- flowtask/components/VivaTracker.py +140 -0
- flowtask/components/WSDLClient.py +123 -0
- flowtask/components/Wait.py +18 -0
- flowtask/components/Walmart.py +199 -0
- flowtask/components/Workplace.py +134 -0
- flowtask/components/XMLToPandas.py +267 -0
- flowtask/components/Zammad/__init__.py +41 -0
- flowtask/components/Zammad/models.py +0 -0
- flowtask/components/ZoomInfoScraper.py +409 -0
- flowtask/components/__init__.py +104 -0
- flowtask/components/abstract.py +18 -0
- flowtask/components/flow.py +530 -0
- flowtask/components/google.py +335 -0
- flowtask/components/group.py +221 -0
- flowtask/components/py.typed +0 -0
- flowtask/components/reviewscrap.py +132 -0
- flowtask/components/tAutoincrement.py +117 -0
- flowtask/components/tConcat.py +109 -0
- flowtask/components/tExplode.py +119 -0
- flowtask/components/tFilter.py +184 -0
- flowtask/components/tGroup.py +236 -0
- flowtask/components/tJoin.py +270 -0
- flowtask/components/tMap/__init__.py +9 -0
- flowtask/components/tMap/functions.py +54 -0
- flowtask/components/tMap/tMap.py +450 -0
- flowtask/components/tMelt.py +112 -0
- flowtask/components/tMerge.py +114 -0
- flowtask/components/tOrder.py +93 -0
- flowtask/components/tPandas.py +94 -0
- flowtask/components/tPivot.py +71 -0
- flowtask/components/tPluckCols.py +76 -0
- flowtask/components/tUnnest.py +82 -0
- flowtask/components/user.py +401 -0
- flowtask/conf.py +457 -0
- flowtask/download.py +102 -0
- flowtask/events/__init__.py +11 -0
- flowtask/events/events/__init__.py +20 -0
- flowtask/events/events/abstract.py +95 -0
- flowtask/events/events/alerts/__init__.py +362 -0
- flowtask/events/events/alerts/colfunctions.py +131 -0
- flowtask/events/events/alerts/functions.py +158 -0
- flowtask/events/events/dummy.py +12 -0
- flowtask/events/events/exec.py +124 -0
- flowtask/events/events/file/__init__.py +7 -0
- flowtask/events/events/file/base.py +51 -0
- flowtask/events/events/file/copy.py +23 -0
- flowtask/events/events/file/delete.py +16 -0
- flowtask/events/events/interfaces/__init__.py +9 -0
- flowtask/events/events/interfaces/client.py +67 -0
- flowtask/events/events/interfaces/credentials.py +28 -0
- flowtask/events/events/interfaces/notifications.py +58 -0
- flowtask/events/events/jira.py +122 -0
- flowtask/events/events/log.py +26 -0
- flowtask/events/events/logerr.py +52 -0
- flowtask/events/events/notify.py +59 -0
- flowtask/events/events/notify_event.py +160 -0
- flowtask/events/events/publish.py +54 -0
- flowtask/events/events/sendfile.py +104 -0
- flowtask/events/events/task.py +97 -0
- flowtask/events/events/teams.py +98 -0
- flowtask/events/events/webhook.py +58 -0
- flowtask/events/manager.py +287 -0
- flowtask/exceptions.c +39393 -0
- flowtask/exceptions.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/extensions/__init__.py +3 -0
- flowtask/extensions/abstract.py +82 -0
- flowtask/extensions/logging/__init__.py +65 -0
- flowtask/hooks/__init__.py +9 -0
- flowtask/hooks/actions/__init__.py +22 -0
- flowtask/hooks/actions/abstract.py +66 -0
- flowtask/hooks/actions/dummy.py +23 -0
- flowtask/hooks/actions/jira.py +74 -0
- flowtask/hooks/actions/rest.py +320 -0
- flowtask/hooks/actions/sampledata.py +37 -0
- flowtask/hooks/actions/sensor.py +23 -0
- flowtask/hooks/actions/task.py +9 -0
- flowtask/hooks/actions/ticket.py +37 -0
- flowtask/hooks/actions/zammad.py +55 -0
- flowtask/hooks/hook.py +62 -0
- flowtask/hooks/models.py +17 -0
- flowtask/hooks/service.py +187 -0
- flowtask/hooks/step.py +91 -0
- flowtask/hooks/types/__init__.py +23 -0
- flowtask/hooks/types/base.py +129 -0
- flowtask/hooks/types/brokers/__init__.py +11 -0
- flowtask/hooks/types/brokers/base.py +54 -0
- flowtask/hooks/types/brokers/mqtt.py +35 -0
- flowtask/hooks/types/brokers/rabbitmq.py +82 -0
- flowtask/hooks/types/brokers/redis.py +83 -0
- flowtask/hooks/types/brokers/sqs.py +44 -0
- flowtask/hooks/types/fs.py +232 -0
- flowtask/hooks/types/http.py +49 -0
- flowtask/hooks/types/imap.py +200 -0
- flowtask/hooks/types/jira.py +279 -0
- flowtask/hooks/types/mail.py +205 -0
- flowtask/hooks/types/postgres.py +98 -0
- flowtask/hooks/types/responses/__init__.py +8 -0
- flowtask/hooks/types/responses/base.py +5 -0
- flowtask/hooks/types/sharepoint.py +288 -0
- flowtask/hooks/types/ssh.py +141 -0
- flowtask/hooks/types/tagged.py +59 -0
- flowtask/hooks/types/upload.py +85 -0
- flowtask/hooks/types/watch.py +71 -0
- flowtask/hooks/types/web.py +36 -0
- flowtask/interfaces/AzureClient.py +137 -0
- flowtask/interfaces/AzureGraph.py +839 -0
- flowtask/interfaces/Boto3Client.py +326 -0
- flowtask/interfaces/DropboxClient.py +173 -0
- flowtask/interfaces/ExcelHandler.py +94 -0
- flowtask/interfaces/FTPClient.py +131 -0
- flowtask/interfaces/GoogleCalendar.py +201 -0
- flowtask/interfaces/GoogleClient.py +133 -0
- flowtask/interfaces/GoogleDrive.py +127 -0
- flowtask/interfaces/GoogleGCS.py +89 -0
- flowtask/interfaces/GoogleGeocoding.py +93 -0
- flowtask/interfaces/GoogleLang.py +114 -0
- flowtask/interfaces/GooglePub.py +61 -0
- flowtask/interfaces/GoogleSheet.py +68 -0
- flowtask/interfaces/IMAPClient.py +137 -0
- flowtask/interfaces/O365Calendar.py +113 -0
- flowtask/interfaces/O365Client.py +220 -0
- flowtask/interfaces/OneDrive.py +284 -0
- flowtask/interfaces/Outlook.py +155 -0
- flowtask/interfaces/ParrotBot.py +130 -0
- flowtask/interfaces/SSHClient.py +378 -0
- flowtask/interfaces/Sharepoint.py +496 -0
- flowtask/interfaces/__init__.py +36 -0
- flowtask/interfaces/azureauth.py +119 -0
- flowtask/interfaces/cache.py +201 -0
- flowtask/interfaces/client.py +82 -0
- flowtask/interfaces/compress.py +525 -0
- flowtask/interfaces/credentials.py +124 -0
- flowtask/interfaces/d2l.py +239 -0
- flowtask/interfaces/databases/__init__.py +5 -0
- flowtask/interfaces/databases/db.py +223 -0
- flowtask/interfaces/databases/documentdb.py +55 -0
- flowtask/interfaces/databases/rethink.py +39 -0
- flowtask/interfaces/dataframes/__init__.py +11 -0
- flowtask/interfaces/dataframes/abstract.py +21 -0
- flowtask/interfaces/dataframes/arrow.py +71 -0
- flowtask/interfaces/dataframes/dt.py +69 -0
- flowtask/interfaces/dataframes/pandas.py +167 -0
- flowtask/interfaces/dataframes/polars.py +60 -0
- flowtask/interfaces/db.py +263 -0
- flowtask/interfaces/env.py +46 -0
- flowtask/interfaces/func.py +137 -0
- flowtask/interfaces/http.py +1780 -0
- flowtask/interfaces/locale.py +40 -0
- flowtask/interfaces/log.py +75 -0
- flowtask/interfaces/mask.py +143 -0
- flowtask/interfaces/notification.py +154 -0
- flowtask/interfaces/playwright.py +339 -0
- flowtask/interfaces/powerpoint.py +368 -0
- flowtask/interfaces/py.typed +0 -0
- flowtask/interfaces/qs.py +376 -0
- flowtask/interfaces/result.py +87 -0
- flowtask/interfaces/selenium_service.py +779 -0
- flowtask/interfaces/smartsheet.py +154 -0
- flowtask/interfaces/stat.py +39 -0
- flowtask/interfaces/task.py +96 -0
- flowtask/interfaces/template.py +118 -0
- flowtask/interfaces/vectorstores/__init__.py +1 -0
- flowtask/interfaces/vectorstores/abstract.py +133 -0
- flowtask/interfaces/vectorstores/milvus.py +669 -0
- flowtask/interfaces/zammad.py +107 -0
- flowtask/models.py +193 -0
- flowtask/parsers/__init__.py +15 -0
- flowtask/parsers/_yaml.c +11978 -0
- flowtask/parsers/_yaml.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/argparser.py +235 -0
- flowtask/parsers/base.c +15155 -0
- flowtask/parsers/base.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/json.c +11968 -0
- flowtask/parsers/json.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/maps.py +49 -0
- flowtask/parsers/toml.c +11968 -0
- flowtask/parsers/toml.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/plugins/__init__.py +16 -0
- flowtask/plugins/components/__init__.py +0 -0
- flowtask/plugins/handler/__init__.py +45 -0
- flowtask/plugins/importer.py +31 -0
- flowtask/plugins/sources/__init__.py +0 -0
- flowtask/runner.py +283 -0
- flowtask/scheduler/__init__.py +9 -0
- flowtask/scheduler/functions.py +493 -0
- flowtask/scheduler/handlers/__init__.py +8 -0
- flowtask/scheduler/handlers/manager.py +504 -0
- flowtask/scheduler/handlers/models.py +58 -0
- flowtask/scheduler/handlers/service.py +72 -0
- flowtask/scheduler/notifications.py +65 -0
- flowtask/scheduler/scheduler.py +993 -0
- flowtask/services/__init__.py +0 -0
- flowtask/services/bots/__init__.py +0 -0
- flowtask/services/bots/telegram.py +264 -0
- flowtask/services/files/__init__.py +11 -0
- flowtask/services/files/manager.py +522 -0
- flowtask/services/files/model.py +37 -0
- flowtask/services/files/service.py +767 -0
- flowtask/services/jira/__init__.py +3 -0
- flowtask/services/jira/jira_actions.py +191 -0
- flowtask/services/tasks/__init__.py +13 -0
- flowtask/services/tasks/launcher.py +213 -0
- flowtask/services/tasks/manager.py +323 -0
- flowtask/services/tasks/service.py +275 -0
- flowtask/services/tasks/task_manager.py +376 -0
- flowtask/services/tasks/tasks.py +155 -0
- flowtask/storages/__init__.py +16 -0
- flowtask/storages/exceptions.py +12 -0
- flowtask/storages/files/__init__.py +8 -0
- flowtask/storages/files/abstract.py +29 -0
- flowtask/storages/files/filesystem.py +66 -0
- flowtask/storages/tasks/__init__.py +19 -0
- flowtask/storages/tasks/abstract.py +26 -0
- flowtask/storages/tasks/database.py +33 -0
- flowtask/storages/tasks/filesystem.py +108 -0
- flowtask/storages/tasks/github.py +119 -0
- flowtask/storages/tasks/memory.py +45 -0
- flowtask/storages/tasks/row.py +25 -0
- flowtask/tasks/__init__.py +0 -0
- flowtask/tasks/abstract.py +526 -0
- flowtask/tasks/command.py +118 -0
- flowtask/tasks/pile.py +486 -0
- flowtask/tasks/py.typed +0 -0
- flowtask/tasks/task.py +778 -0
- flowtask/template/__init__.py +161 -0
- flowtask/tests.py +257 -0
- flowtask/types/__init__.py +8 -0
- flowtask/types/typedefs.c +11347 -0
- flowtask/types/typedefs.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/__init__.py +24 -0
- flowtask/utils/constants.py +117 -0
- flowtask/utils/encoders.py +21 -0
- flowtask/utils/executor.py +112 -0
- flowtask/utils/functions.cpp +14280 -0
- flowtask/utils/functions.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/json.cpp +13349 -0
- flowtask/utils/json.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/mail.py +63 -0
- flowtask/utils/parseqs.c +13324 -0
- flowtask/utils/parserqs.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/stats.py +308 -0
- flowtask/utils/transformations.py +74 -0
- flowtask/utils/uv.py +12 -0
- flowtask/utils/validators.py +97 -0
- flowtask/version.py +11 -0
- flowtask-5.8.4.dist-info/LICENSE +201 -0
- flowtask-5.8.4.dist-info/METADATA +209 -0
- flowtask-5.8.4.dist-info/RECORD +470 -0
- flowtask-5.8.4.dist-info/WHEEL +6 -0
- flowtask-5.8.4.dist-info/entry_points.txt +3 -0
- flowtask-5.8.4.dist-info/top_level.txt +2 -0
- plugins/components/CreateQR.py +39 -0
- plugins/components/TestComponent.py +28 -0
- plugins/components/Use1.py +13 -0
- plugins/components/Workplace.py +117 -0
- plugins/components/__init__.py +3 -0
- plugins/sources/__init__.py +0 -0
- plugins/sources/get_populartimes.py +78 -0
- plugins/sources/google.py +150 -0
- plugins/sources/hubspot.py +679 -0
- plugins/sources/icims.py +679 -0
- plugins/sources/mobileinsight.py +501 -0
- plugins/sources/newrelic.py +262 -0
- plugins/sources/uap.py +268 -0
- plugins/sources/venu.py +244 -0
- plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,335 @@
|
|
1
|
+
from typing import Union
|
2
|
+
from collections.abc import Callable
|
3
|
+
from functools import partial
|
4
|
+
import asyncio
|
5
|
+
import aiohttp
|
6
|
+
from aiohttp.resolver import AsyncResolver
|
7
|
+
import pandas as pd
|
8
|
+
import ssl
|
9
|
+
from datamodel.parsers.json import json_encoder
|
10
|
+
from proxylists import check_address
|
11
|
+
from proxylists.proxies import (
|
12
|
+
FreeProxy,
|
13
|
+
Oxylabs
|
14
|
+
)
|
15
|
+
from ..conf import GOOGLE_API_KEY, GOOGLE_PLACES_API_KEY
|
16
|
+
from ..exceptions import ComponentError
|
17
|
+
from ..components import FlowComponent
|
18
|
+
|
19
|
+
|
20
|
+
# Monkey-Patching for <3.11 TLS Support
|
21
|
+
setattr(
|
22
|
+
asyncio.sslproto._SSLProtocolTransport,
|
23
|
+
"_start_tls_compatible", True
|
24
|
+
)
|
25
|
+
|
26
|
+
class GoogleBase(FlowComponent):
|
27
|
+
"""
|
28
|
+
GoogleBase.
|
29
|
+
|
30
|
+
Overview: A base class for Google API components.
|
31
|
+
"""
|
32
|
+
def __init__(
|
33
|
+
self,
|
34
|
+
loop: asyncio.AbstractEventLoop = None,
|
35
|
+
job: Callable = None,
|
36
|
+
stat: Callable = None,
|
37
|
+
**kwargs,
|
38
|
+
) -> None:
|
39
|
+
self.chunk_size: int = kwargs.get('chunk_size', 100)
|
40
|
+
self._type: str = kwargs.pop('type', None)
|
41
|
+
self.api_key: str = kwargs.pop('api_key', GOOGLE_API_KEY)
|
42
|
+
self.use_proxies: bool = kwargs.pop('use_proxies', False)
|
43
|
+
self.paid_proxy: bool = kwargs.pop('paid_proxy', False)
|
44
|
+
super(GoogleBase, self).__init__(loop=loop, job=job, stat=stat, **kwargs)
|
45
|
+
self.semaphore = asyncio.Semaphore(10) # Adjust the limit as needed
|
46
|
+
|
47
|
+
async def close(self):
|
48
|
+
pass
|
49
|
+
|
50
|
+
def _evaluate_input(self):
|
51
|
+
if self.previous:
|
52
|
+
self.data = self.input
|
53
|
+
elif self.input is not None:
|
54
|
+
self.data = self.input
|
55
|
+
|
56
|
+
async def start(self, **kwargs):
|
57
|
+
self._counter: int = 0
|
58
|
+
self._evaluate_input()
|
59
|
+
if not self._type:
|
60
|
+
raise RuntimeError(
|
61
|
+
'Google requires a Type Function'
|
62
|
+
)
|
63
|
+
if not isinstance(self.data, pd.DataFrame):
|
64
|
+
raise ComponentError(
|
65
|
+
"Incompatible Pandas Dataframe", status=404
|
66
|
+
)
|
67
|
+
if not self.api_key:
|
68
|
+
self.api_key = GOOGLE_PLACES_API_KEY
|
69
|
+
if not self.api_key:
|
70
|
+
raise ComponentError(
|
71
|
+
"Google API Key is missing", status=404
|
72
|
+
)
|
73
|
+
return True
|
74
|
+
|
75
|
+
def _get_session_args(self) -> dict:
|
76
|
+
"""Get aiohttp Session arguments."""
|
77
|
+
# Total timeout for the request
|
78
|
+
timeout = aiohttp.ClientTimeout(total=20)
|
79
|
+
resolver = AsyncResolver(
|
80
|
+
nameservers=["1.1.1.1", "8.8.8.8"]
|
81
|
+
)
|
82
|
+
connector = aiohttp.TCPConnector(
|
83
|
+
limit=100,
|
84
|
+
resolver=resolver
|
85
|
+
)
|
86
|
+
return {
|
87
|
+
"connector": connector,
|
88
|
+
"timeout": timeout,
|
89
|
+
"json_serialize": json_encoder,
|
90
|
+
"trust_env": True
|
91
|
+
}
|
92
|
+
|
93
|
+
async def get_proxies(self):
|
94
|
+
|
95
|
+
if self.paid_proxy is True:
|
96
|
+
proxies = await Oxylabs().get_proxy_list()
|
97
|
+
return proxies.get('https')
|
98
|
+
else:
|
99
|
+
p = []
|
100
|
+
proxies = await FreeProxy().get_list()
|
101
|
+
for address in proxies:
|
102
|
+
host, port = address.split(':')
|
103
|
+
if await check_address(host=host, port=port) is True:
|
104
|
+
p.append(f"http://{address}")
|
105
|
+
return p[0]
|
106
|
+
|
107
|
+
async def _google_session(
|
108
|
+
self,
|
109
|
+
url: str,
|
110
|
+
session_args: dict,
|
111
|
+
params: dict = None,
|
112
|
+
method: str = 'GET',
|
113
|
+
use_json: bool = False,
|
114
|
+
as_json: bool = True,
|
115
|
+
use_proxies: bool = False,
|
116
|
+
google_search: bool = False,
|
117
|
+
**kwargs
|
118
|
+
) -> Union[aiohttp.ClientResponse, dict]:
|
119
|
+
"""Make a Google API request using aiohttp Session."""
|
120
|
+
_proxies = None
|
121
|
+
if use_proxies is True or self.use_proxies is True:
|
122
|
+
_proxies = await self.get_proxies()
|
123
|
+
|
124
|
+
ssl_context = ssl.create_default_context()
|
125
|
+
# Ensure at least TLS 1.2 is used
|
126
|
+
ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
|
127
|
+
ssl_context.check_hostname = False
|
128
|
+
ssl_context.verify_mode = ssl.CERT_NONE
|
129
|
+
async with aiohttp.ClientSession(**session_args) as session:
|
130
|
+
if method.upper() == 'GET':
|
131
|
+
request = partial(
|
132
|
+
session.request,
|
133
|
+
method.upper(),
|
134
|
+
url,
|
135
|
+
params=params,
|
136
|
+
proxy=_proxies,
|
137
|
+
ssl=ssl_context,
|
138
|
+
**kwargs
|
139
|
+
)
|
140
|
+
else:
|
141
|
+
if use_json is True:
|
142
|
+
request = partial(
|
143
|
+
session.request,
|
144
|
+
method.upper(),
|
145
|
+
url,
|
146
|
+
json=params,
|
147
|
+
proxy=_proxies,
|
148
|
+
ssl=ssl_context,
|
149
|
+
**kwargs
|
150
|
+
)
|
151
|
+
else:
|
152
|
+
request = partial(
|
153
|
+
session.request,
|
154
|
+
method.upper(),
|
155
|
+
url, data=params,
|
156
|
+
proxy=_proxies,
|
157
|
+
ssl=ssl_context,
|
158
|
+
**kwargs
|
159
|
+
)
|
160
|
+
async with request() as response:
|
161
|
+
if response.status == 200:
|
162
|
+
if as_json is True:
|
163
|
+
result = await response.json()
|
164
|
+
if result['status'] == 'OK':
|
165
|
+
# TODO: Check if it's a premise or subpremise
|
166
|
+
return result
|
167
|
+
else:
|
168
|
+
if google_search is True:
|
169
|
+
return await response.read()
|
170
|
+
else:
|
171
|
+
return await response.text()
|
172
|
+
else:
|
173
|
+
if google_search is True:
|
174
|
+
await self.check_response_search(response)
|
175
|
+
else:
|
176
|
+
await self.google_response_code(response)
|
177
|
+
return None
|
178
|
+
|
179
|
+
async def check_response_search(self, response: aiohttp.ClientResponse):
|
180
|
+
if response.status == 429:
|
181
|
+
error = await response.text()
|
182
|
+
self._logger.error(
|
183
|
+
"Google Search: Too many requests"
|
184
|
+
)
|
185
|
+
return None
|
186
|
+
elif response.status > 299:
|
187
|
+
error = await response.text()
|
188
|
+
self._logger.error(
|
189
|
+
f"Raw response Error: {error}"
|
190
|
+
)
|
191
|
+
raise ComponentError(
|
192
|
+
f"Google Places Error {response.status}",
|
193
|
+
f"Error: {error}"
|
194
|
+
)
|
195
|
+
|
196
|
+
async def google_response_code(self, response: aiohttp.ClientResponse):
|
197
|
+
"""
|
198
|
+
check if query quota has been surpassed or other errors that can happen.
|
199
|
+
:param resp: json response
|
200
|
+
:return:
|
201
|
+
"""
|
202
|
+
if response.status == 429:
|
203
|
+
error = await response.text()
|
204
|
+
self._logger.error(
|
205
|
+
"Google Search: Too many requests"
|
206
|
+
)
|
207
|
+
return None
|
208
|
+
else:
|
209
|
+
result = await response.json()
|
210
|
+
status = result.get('status', 'Unknown')
|
211
|
+
if status == "OK" or status == "ZERO_RESULTS":
|
212
|
+
return
|
213
|
+
# Error:
|
214
|
+
error = result.get('error', result)
|
215
|
+
status = error.get('status', 'Unknown')
|
216
|
+
message = error.get('message', error)
|
217
|
+
|
218
|
+
self._logger.error(
|
219
|
+
f"{status}: {message}: {error}"
|
220
|
+
)
|
221
|
+
|
222
|
+
if status == "REQUEST_DENIED":
|
223
|
+
raise ComponentError(
|
224
|
+
(
|
225
|
+
f"Google Places {status}: "
|
226
|
+
"Request was denied, maybe the API key is invalid."
|
227
|
+
)
|
228
|
+
)
|
229
|
+
|
230
|
+
if status == "OVER_QUERY_LIMIT":
|
231
|
+
raise ComponentError(
|
232
|
+
(
|
233
|
+
f"Google Places {status}: "
|
234
|
+
"You exceeded your Query Limit for Google Places API Web Service, "
|
235
|
+
"check https://developers.google.com/places/web-service/usage "
|
236
|
+
"to upgrade your quota."
|
237
|
+
)
|
238
|
+
)
|
239
|
+
|
240
|
+
if status == "INVALID_REQUEST":
|
241
|
+
raise ComponentError(
|
242
|
+
(
|
243
|
+
f"Google Places {status}: "
|
244
|
+
"Invalid Request: "
|
245
|
+
"The query string is malformed, "
|
246
|
+
"check if your formatting for lat/lng and radius is correct."
|
247
|
+
f"Error: {error}"
|
248
|
+
)
|
249
|
+
)
|
250
|
+
|
251
|
+
if status == "NOT_FOUND":
|
252
|
+
raise ComponentError(
|
253
|
+
(
|
254
|
+
f"Google Places {status}: "
|
255
|
+
"The place ID was not found and either does not exist or was retired."
|
256
|
+
)
|
257
|
+
)
|
258
|
+
|
259
|
+
raise ComponentError(
|
260
|
+
(
|
261
|
+
f"Google Places {status}: "
|
262
|
+
"Unidentified error with the Places API, please check the response code"
|
263
|
+
f"error: {error}"
|
264
|
+
)
|
265
|
+
)
|
266
|
+
|
267
|
+
def column_exists(self, column: str):
|
268
|
+
"""Returns True if the column exists in the DataFrame."""
|
269
|
+
if column not in self.data.columns:
|
270
|
+
self._logger.warning(
|
271
|
+
f"Column {column} does not exist in the dataframe"
|
272
|
+
)
|
273
|
+
self.data[column] = None
|
274
|
+
return False
|
275
|
+
return True
|
276
|
+
|
277
|
+
def chunkify(self, lst, n):
|
278
|
+
"""Split list lst into chunks of size n."""
|
279
|
+
for i in range(0, len(lst), n):
|
280
|
+
yield lst[i:i + n]
|
281
|
+
|
282
|
+
async def _processing_tasks(self, tasks: list) -> pd.DataFrame:
|
283
|
+
"""Process tasks concurrently."""
|
284
|
+
results = []
|
285
|
+
for chunk in self.chunkify(tasks, self.chunk_size):
|
286
|
+
result = await asyncio.gather(*chunk, return_exceptions=True)
|
287
|
+
if result:
|
288
|
+
for res in result:
|
289
|
+
if isinstance(res, Exception):
|
290
|
+
# Handle the exception
|
291
|
+
self._logger.error(
|
292
|
+
f"Task failed with exception: {res}. Type: {type(res)}"
|
293
|
+
)
|
294
|
+
self._logger.error(
|
295
|
+
f"Exception type: {type(res)}, Task input types: {type(chunk)}"
|
296
|
+
)
|
297
|
+
continue
|
298
|
+
results.append(res)
|
299
|
+
results_list = []
|
300
|
+
for idx, result in results:
|
301
|
+
if result:
|
302
|
+
result['idx'] = idx # Add the index to the result dictionary
|
303
|
+
results_list.append(result)
|
304
|
+
if results_list:
|
305
|
+
results_df = pd.DataFrame(results_list)
|
306
|
+
results_df.set_index('idx', inplace=True)
|
307
|
+
# If necessary, reindex results_df to match self.data
|
308
|
+
results_df = results_df.reindex(self.data.index)
|
309
|
+
# Directly assign columns from results_df to self.data
|
310
|
+
for column in results_df.columns:
|
311
|
+
mask = results_df[column].notnull()
|
312
|
+
indices = results_df.index[mask]
|
313
|
+
self.data.loc[indices, column] = results_df.loc[indices, column]
|
314
|
+
return self.data
|
315
|
+
|
316
|
+
async def run(self):
|
317
|
+
"""Run the Google Places API."""
|
318
|
+
tasks = []
|
319
|
+
fn = getattr(self, self._type)
|
320
|
+
tasks = [
|
321
|
+
fn(
|
322
|
+
idx,
|
323
|
+
row,
|
324
|
+
) for idx, row in self.data.iterrows()
|
325
|
+
]
|
326
|
+
# Execute tasks concurrently
|
327
|
+
df = await self._processing_tasks(tasks)
|
328
|
+
if self._debug is True:
|
329
|
+
print(df)
|
330
|
+
print("::: Printing Column Information === ")
|
331
|
+
for column, t in df.dtypes.items():
|
332
|
+
print(column, "->", t, "->", df[column].iloc[0])
|
333
|
+
self.add_metric("GOOGLE_PLACES_DOWNLOADED", self._counter)
|
334
|
+
self._result = df
|
335
|
+
return self._result
|
@@ -0,0 +1,221 @@
|
|
1
|
+
import asyncio
|
2
|
+
import copy
|
3
|
+
from collections.abc import Callable
|
4
|
+
from navconfig.logging import logging
|
5
|
+
from asyncdb.exceptions import NoDataFound, ProviderError
|
6
|
+
from ..utils.stats import StepMonitor
|
7
|
+
from ..interfaces.log import SkipErrors
|
8
|
+
from ..exceptions import DataNotFound, NotSupported, ComponentError
|
9
|
+
from ..utils import cPrint
|
10
|
+
from .flow import FlowComponent
|
11
|
+
|
12
|
+
|
13
|
+
class GroupComponent(FlowComponent):
|
14
|
+
"""
|
15
|
+
GroupComponent
|
16
|
+
|
17
|
+
Overview
|
18
|
+
|
19
|
+
This component executes a group of other FlowTask components sequentially as a single unit.
|
20
|
+
It allows chaining multiple tasks together and provides error handling for various scenarios.
|
21
|
+
|
22
|
+
.. table:: Properties
|
23
|
+
:widths: auto
|
24
|
+
|
25
|
+
+------------------------+----------+----------------------------------------------------------------------------------------------------------------+
|
26
|
+
| Name | Required | Description |
|
27
|
+
+------------------------+----------+----------------------------------------------------------------------------------------------------------------+
|
28
|
+
| component_list (list) | Yes | List of dictionaries defining the components to be executed in the group. Each dictionary |
|
29
|
+
| | | should contain the following keys: |
|
30
|
+
| | | - "component": The FlowTask component class to be used. |
|
31
|
+
| | | - "params": A dictionary containing parameters to be passed to the component. |
|
32
|
+
| | | (Optional) |
|
33
|
+
| | | - "conditions": A dictionary containing conditions that must be met before running the component. (Optional) |
|
34
|
+
+------------------------+----------+----------------------------------------------------------------------------------------------------------------+
|
35
|
+
| stat (Callable) | No | Optional callback function for step-level monitoring and statistics collection. |
|
36
|
+
+------------------------+----------+----------------------------------------------------------------------------------------------------------------+
|
37
|
+
| skipError | No | Defines the behavior when a component within the group raises an error. |
|
38
|
+
| | | Valid options are: |
|
39
|
+
| | | SkipErrors: Skip This makes the component continue his execution. |
|
40
|
+
| | | SkipErrors: Raise This Raise the error and interrupt execution. |
|
41
|
+
+------------------------+----------+----------------------------------------------------------------------------------------------------------------+
|
42
|
+
|
43
|
+
Return
|
44
|
+
|
45
|
+
The component modifies the data received from the previous component and returns the final output after
|
46
|
+
all components in the group have been executed.
|
47
|
+
|
48
|
+
""" # noqa
|
49
|
+
|
50
|
+
def __init__(
|
51
|
+
self,
|
52
|
+
loop: asyncio.AbstractEventLoop = None,
|
53
|
+
job: Callable = None,
|
54
|
+
stat: Callable = None,
|
55
|
+
component_list: list = None,
|
56
|
+
**kwargs,
|
57
|
+
):
|
58
|
+
"""Init Method."""
|
59
|
+
self._params = {}
|
60
|
+
self._components = component_list
|
61
|
+
self._conditions: dict = {}
|
62
|
+
super(GroupComponent, self).__init__(loop=loop, job=job, stat=stat, **kwargs)
|
63
|
+
|
64
|
+
async def start(self, **kwargs):
|
65
|
+
if self.previous:
|
66
|
+
self.data = self.input
|
67
|
+
return True
|
68
|
+
|
69
|
+
async def close(self):
|
70
|
+
pass
|
71
|
+
|
72
|
+
async def run(self):
|
73
|
+
steps = []
|
74
|
+
prev = self.previous
|
75
|
+
result = None
|
76
|
+
for step in self._components:
|
77
|
+
step = copy.deepcopy(step)
|
78
|
+
step_name = step.name
|
79
|
+
try:
|
80
|
+
_prev = prev
|
81
|
+
component = self.get_component(step=step, previous=prev)
|
82
|
+
prev = component
|
83
|
+
except Exception as e:
|
84
|
+
raise ComponentError(f"{e!s}") from e
|
85
|
+
# calling start method for component
|
86
|
+
start = getattr(component, "start", None)
|
87
|
+
if callable(start):
|
88
|
+
try:
|
89
|
+
if asyncio.iscoroutinefunction(start):
|
90
|
+
st = await component.start()
|
91
|
+
else:
|
92
|
+
st = component.start()
|
93
|
+
logging.debug(f"{step_name} STARTED: {st}")
|
94
|
+
except (NoDataFound, DataNotFound) as err:
|
95
|
+
if component.skipError == SkipErrors.SKIP:
|
96
|
+
self._logger.warning(
|
97
|
+
f"::: SKIPPING Error on {step_name} :::: "
|
98
|
+
)
|
99
|
+
prev = _prev
|
100
|
+
continue
|
101
|
+
raise DataNotFound(
|
102
|
+
f'Data Not Found over {step_name}'
|
103
|
+
) from err
|
104
|
+
except (ProviderError, ComponentError, NotSupported) as err:
|
105
|
+
raise ComponentError(
|
106
|
+
f"Group Error: calling Start on {step.name}, error: {err}"
|
107
|
+
) from err
|
108
|
+
else:
|
109
|
+
raise ComponentError(f"Group Error: missing Start on {step.name}")
|
110
|
+
# then, calling the run method:
|
111
|
+
try:
|
112
|
+
run = getattr(component, "run", None)
|
113
|
+
if asyncio.iscoroutinefunction(run):
|
114
|
+
result = await run()
|
115
|
+
else:
|
116
|
+
result = run()
|
117
|
+
except (NoDataFound, DataNotFound) as err:
|
118
|
+
if component.skipError == SkipErrors.SKIP:
|
119
|
+
self._logger.warning(
|
120
|
+
f"::: SKIPPING Error on {step_name} :::: "
|
121
|
+
)
|
122
|
+
prev = _prev
|
123
|
+
continue
|
124
|
+
raise DataNotFound(
|
125
|
+
f'Data Not Found over {step_name}'
|
126
|
+
) from err
|
127
|
+
except (ProviderError, ComponentError, NotSupported) as err:
|
128
|
+
if component.skipError == SkipErrors.SKIP:
|
129
|
+
self._logger.warning(
|
130
|
+
f"::: SKIPPING Error on {step_name} :::: "
|
131
|
+
)
|
132
|
+
prev = _prev
|
133
|
+
continue
|
134
|
+
raise NotSupported(
|
135
|
+
f"Group Error: Not Supported on {step.name}, error: {err}"
|
136
|
+
) from err
|
137
|
+
except Exception as err:
|
138
|
+
if component.skipError == SkipErrors.SKIP:
|
139
|
+
self._logger.warning(
|
140
|
+
f"::: SKIPPING Error on {step_name} :::: "
|
141
|
+
)
|
142
|
+
prev = _prev
|
143
|
+
continue
|
144
|
+
raise ComponentError(
|
145
|
+
f"Group Error: Calling Start on {step.name}, error: {err}"
|
146
|
+
) from err
|
147
|
+
finally:
|
148
|
+
steps.append(step_name)
|
149
|
+
try:
|
150
|
+
close = getattr(component, "close", None)
|
151
|
+
if asyncio.iscoroutinefunction(close):
|
152
|
+
await close()
|
153
|
+
else:
|
154
|
+
close()
|
155
|
+
except Exception as e: # pylint: disable=W0703
|
156
|
+
logging.warning(e)
|
157
|
+
self._result = result
|
158
|
+
return self._result
|
159
|
+
|
160
|
+
def get_component(self, step, previous):
|
161
|
+
if self.stat:
|
162
|
+
parent_stat = self.stat.parent()
|
163
|
+
stat = StepMonitor(name=step.name, parent=parent_stat)
|
164
|
+
parent_stat.add_step(stat)
|
165
|
+
else:
|
166
|
+
stat = None
|
167
|
+
params = step.params
|
168
|
+
try:
|
169
|
+
if params["conditions"]:
|
170
|
+
self._conditions[step.name] = params["conditions"]
|
171
|
+
except KeyError:
|
172
|
+
pass
|
173
|
+
params["ENV"] = self._environment
|
174
|
+
# params
|
175
|
+
if self._params:
|
176
|
+
try:
|
177
|
+
params["params"] = {**params["params"], **self._params}
|
178
|
+
except (KeyError, TypeError):
|
179
|
+
pass
|
180
|
+
# parameters
|
181
|
+
if self._parameters:
|
182
|
+
parameters = params.get("parameters", {})
|
183
|
+
params["parameters"] = {**parameters, **self._parameters}
|
184
|
+
if hasattr(self, "_program"):
|
185
|
+
params["_program"] = self._program
|
186
|
+
# useful to change variables in set var components
|
187
|
+
params["_vars"] = self._vars
|
188
|
+
# variables dictionary
|
189
|
+
params["variables"] = self._variables
|
190
|
+
params["_args"] = self._args
|
191
|
+
# argument list for components (or tasks) that need argument lists
|
192
|
+
params["arguments"] = self._arguments
|
193
|
+
# for components with conditions, we can add more conditions
|
194
|
+
conditions = params.get("conditions", {})
|
195
|
+
step_conds = self._conditions.get(step.name, {})
|
196
|
+
if self.conditions is not None:
|
197
|
+
step_conds = {**self.conditions, **step_conds}
|
198
|
+
params["conditions"] = {**conditions, **step_conds}
|
199
|
+
# attributes only usable component-only
|
200
|
+
params["attributes"] = self._attributes
|
201
|
+
# the current Pile of components
|
202
|
+
params["TaskPile"] = self._TaskPile
|
203
|
+
# params['TaskName'] = step_name
|
204
|
+
params["debug"] = self._debug
|
205
|
+
params["argparser"] = self._argparser
|
206
|
+
# the current in-memory connector
|
207
|
+
params["memory"] = self._memory
|
208
|
+
target = step.component
|
209
|
+
job = None
|
210
|
+
try:
|
211
|
+
job = target(job=previous, loop=self._loop, stat=stat, **params)
|
212
|
+
job.SetPile(self._TaskPile)
|
213
|
+
cPrint(
|
214
|
+
f"LOADED STEP: {step.name}",
|
215
|
+
level="DEBUG"
|
216
|
+
)
|
217
|
+
return job
|
218
|
+
except Exception as err:
|
219
|
+
raise ComponentError(
|
220
|
+
f"Component Error on {target}, error: {err}"
|
221
|
+
) from err
|
File without changes
|
@@ -0,0 +1,132 @@
|
|
1
|
+
from typing import Any
|
2
|
+
from collections.abc import Callable
|
3
|
+
import asyncio
|
4
|
+
import httpx
|
5
|
+
from pandas import DataFrame
|
6
|
+
from seleniumwire import webdriver
|
7
|
+
from navconfig.logging import logging
|
8
|
+
from ..exceptions import (
|
9
|
+
ConfigError,
|
10
|
+
ComponentError,
|
11
|
+
NotSupported,
|
12
|
+
)
|
13
|
+
|
14
|
+
from .flow import FlowComponent
|
15
|
+
from ..interfaces import SeleniumService
|
16
|
+
from ..interfaces import HTTPService
|
17
|
+
|
18
|
+
logging.getLogger(name='selenium.webdriver').setLevel(logging.WARNING)
|
19
|
+
logging.getLogger(name='WDM').setLevel(logging.WARNING)
|
20
|
+
logging.getLogger(name='hpack').setLevel(logging.WARNING)
|
21
|
+
logging.getLogger(name='seleniumwire').setLevel(logging.WARNING)
|
22
|
+
|
23
|
+
def on_backoff(details):
|
24
|
+
logging.warning(
|
25
|
+
f"Backing off {details['wait']:0.1f} seconds after {details['tries']} tries due to error: {details['exception']}"
|
26
|
+
)
|
27
|
+
|
28
|
+
def bad_gateway_exception(exc):
|
29
|
+
"""Check if the exception is a 502 Bad Gateway error."""
|
30
|
+
return isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code == 502
|
31
|
+
|
32
|
+
|
33
|
+
class ReviewScrapper(FlowComponent, SeleniumService, HTTPService):
|
34
|
+
def __init__(
|
35
|
+
self,
|
36
|
+
loop: asyncio.AbstractEventLoop = None,
|
37
|
+
job: Callable = None,
|
38
|
+
stat: Callable = None,
|
39
|
+
**kwargs,
|
40
|
+
):
|
41
|
+
self._fn = kwargs.pop('type', None)
|
42
|
+
self.chunk_size: int = kwargs.get('chunk_size', 100)
|
43
|
+
self.task_parts: int = kwargs.get('task_parts', 10)
|
44
|
+
if not self._fn:
|
45
|
+
raise ConfigError(
|
46
|
+
f"{self.__name__}: require a `type` Function to be called, ex: availability"
|
47
|
+
)
|
48
|
+
super().__init__(loop=loop, job=job, stat=stat, **kwargs)
|
49
|
+
|
50
|
+
async def get_cookies(self, url: str) -> dict:
|
51
|
+
options = webdriver.ChromeOptions()
|
52
|
+
options.add_argument('--headless') # Run in headless mode
|
53
|
+
driver = webdriver.Chrome(options=options)
|
54
|
+
driver.get(url)
|
55
|
+
cookies = driver.get_cookies()
|
56
|
+
driver.quit()
|
57
|
+
return {cookie['name']: cookie['value'] for cookie in cookies}
|
58
|
+
|
59
|
+
def chunkify(self, lst, n):
|
60
|
+
"""Split list lst into chunks of size n."""
|
61
|
+
for i in range(0, len(lst), n):
|
62
|
+
yield lst[i:i + n]
|
63
|
+
|
64
|
+
def column_exists(self, column: str, default_val: Any = None):
|
65
|
+
if column not in self.data.columns:
|
66
|
+
self._logger.warning(
|
67
|
+
f"Column {column} does not exist in the Dataframe"
|
68
|
+
)
|
69
|
+
self.data[column] = default_val
|
70
|
+
return False
|
71
|
+
return True
|
72
|
+
|
73
|
+
def split_parts(self, task_list, num_parts: int = 5) -> list:
|
74
|
+
part_size, remainder = divmod(len(task_list), num_parts)
|
75
|
+
parts = []
|
76
|
+
start = 0
|
77
|
+
for i in range(num_parts):
|
78
|
+
# Distribute the remainder across the first `remainder` parts
|
79
|
+
end = start + part_size + (1 if i < remainder else 0)
|
80
|
+
parts.append(task_list[start:end])
|
81
|
+
start = end
|
82
|
+
return parts
|
83
|
+
|
84
|
+
async def _processing_tasks(self, tasks: list) -> DataFrame:
|
85
|
+
"""Process tasks concurrently."""
|
86
|
+
results = []
|
87
|
+
for chunk in self.split_parts(tasks, self.task_parts):
|
88
|
+
result = await asyncio.gather(*chunk, return_exceptions=False)
|
89
|
+
results.extend(result)
|
90
|
+
return results
|
91
|
+
|
92
|
+
async def run(self):
|
93
|
+
# we need to call the "function" for Services.
|
94
|
+
fn = getattr(self, self._fn)
|
95
|
+
result = None
|
96
|
+
if not callable(fn):
|
97
|
+
raise ComponentError(
|
98
|
+
f"{self.__name__}: Function {self._fn} doesn't exists."
|
99
|
+
)
|
100
|
+
try:
|
101
|
+
result = await fn()
|
102
|
+
except (ComponentError, TimeoutError, NotSupported):
|
103
|
+
raise
|
104
|
+
except Exception as exc:
|
105
|
+
raise ComponentError(
|
106
|
+
f"{self.__name__}: Unknown Error: {exc}"
|
107
|
+
) from exc
|
108
|
+
# Print results
|
109
|
+
print(result)
|
110
|
+
print("::: Printing Column Information === ")
|
111
|
+
for column, t in result.dtypes.items():
|
112
|
+
print(column, "->", t, "->", result[column].iloc[0])
|
113
|
+
self._result = result
|
114
|
+
return self._result
|
115
|
+
|
116
|
+
async def close(self, **kwargs) -> bool:
|
117
|
+
self.close_driver()
|
118
|
+
return True
|
119
|
+
|
120
|
+
async def start(self, **kwargs) -> bool:
|
121
|
+
await super(ReviewScrapper, self).start(**kwargs)
|
122
|
+
if self.previous:
|
123
|
+
self.data = self.input
|
124
|
+
if not isinstance(self.data, DataFrame):
|
125
|
+
raise ComponentError(
|
126
|
+
"Incompatible Pandas Dataframe"
|
127
|
+
)
|
128
|
+
self.api_token = self.get_env_value(self.api_token) if hasattr(self, 'api_token') else self.get_env_value('TARGET_API_KEY') # noqa
|
129
|
+
if not hasattr(self, self._fn):
|
130
|
+
raise ConfigError(
|
131
|
+
f"{self.__name__}: Unable to found Function {self._fn} in Component."
|
132
|
+
)
|