flowtask 5.8.4__cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowtask/__init__.py +93 -0
- flowtask/__main__.py +38 -0
- flowtask/bots/__init__.py +6 -0
- flowtask/bots/check.py +93 -0
- flowtask/bots/codebot.py +51 -0
- flowtask/components/ASPX.py +148 -0
- flowtask/components/AddDataset.py +352 -0
- flowtask/components/Amazon.py +523 -0
- flowtask/components/AutoTask.py +314 -0
- flowtask/components/Azure.py +80 -0
- flowtask/components/AzureUsers.py +106 -0
- flowtask/components/BaseAction.py +91 -0
- flowtask/components/BaseLoop.py +198 -0
- flowtask/components/BestBuy.py +800 -0
- flowtask/components/CSVToGCS.py +120 -0
- flowtask/components/CompanyScraper/__init__.py +1 -0
- flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
- flowtask/components/CompanyScraper/parsers/base.py +102 -0
- flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
- flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
- flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
- flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
- flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
- flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
- flowtask/components/CompanyScraper/scrapper.py +1054 -0
- flowtask/components/CopyTo.py +177 -0
- flowtask/components/CopyToBigQuery.py +243 -0
- flowtask/components/CopyToMongoDB.py +291 -0
- flowtask/components/CopyToPg.py +609 -0
- flowtask/components/CopyToRethink.py +207 -0
- flowtask/components/CreateGCSBucket.py +102 -0
- flowtask/components/CreateReport/CreateReport.py +228 -0
- flowtask/components/CreateReport/__init__.py +9 -0
- flowtask/components/CreateReport/charts/__init__.py +15 -0
- flowtask/components/CreateReport/charts/bar.py +51 -0
- flowtask/components/CreateReport/charts/base.py +66 -0
- flowtask/components/CreateReport/charts/pie.py +64 -0
- flowtask/components/CreateReport/utils.py +9 -0
- flowtask/components/CustomerSatisfaction.py +196 -0
- flowtask/components/DataInput.py +200 -0
- flowtask/components/DateList.py +255 -0
- flowtask/components/DbClient.py +163 -0
- flowtask/components/DialPad.py +146 -0
- flowtask/components/DocumentDBQuery.py +200 -0
- flowtask/components/DownloadFrom.py +371 -0
- flowtask/components/DownloadFromD2L.py +113 -0
- flowtask/components/DownloadFromFTP.py +181 -0
- flowtask/components/DownloadFromIMAP.py +315 -0
- flowtask/components/DownloadFromS3.py +198 -0
- flowtask/components/DownloadFromSFTP.py +265 -0
- flowtask/components/DownloadFromSharepoint.py +110 -0
- flowtask/components/DownloadFromSmartSheet.py +114 -0
- flowtask/components/DownloadS3File.py +229 -0
- flowtask/components/Dummy.py +59 -0
- flowtask/components/DuplicatePhoto.py +411 -0
- flowtask/components/EmployeeEvaluation.py +237 -0
- flowtask/components/ExecuteSQL.py +323 -0
- flowtask/components/ExtractHTML.py +178 -0
- flowtask/components/FileBase.py +178 -0
- flowtask/components/FileCopy.py +181 -0
- flowtask/components/FileDelete.py +82 -0
- flowtask/components/FileExists.py +146 -0
- flowtask/components/FileIteratorDelete.py +112 -0
- flowtask/components/FileList.py +194 -0
- flowtask/components/FileOpen.py +75 -0
- flowtask/components/FileRead.py +120 -0
- flowtask/components/FileRename.py +106 -0
- flowtask/components/FilterIf.py +284 -0
- flowtask/components/FilterRows/FilterRows.py +200 -0
- flowtask/components/FilterRows/__init__.py +10 -0
- flowtask/components/FilterRows/functions.py +4 -0
- flowtask/components/GCSToBigQuery.py +103 -0
- flowtask/components/GoogleA4.py +150 -0
- flowtask/components/GoogleGeoCoding.py +344 -0
- flowtask/components/GooglePlaces.py +315 -0
- flowtask/components/GoogleSearch.py +539 -0
- flowtask/components/HTTPClient.py +268 -0
- flowtask/components/ICIMS.py +146 -0
- flowtask/components/IF.py +179 -0
- flowtask/components/IcimsFolderCopy.py +173 -0
- flowtask/components/ImageFeatures/__init__.py +5 -0
- flowtask/components/ImageFeatures/process.py +233 -0
- flowtask/components/IteratorBase.py +251 -0
- flowtask/components/LangchainLoader/__init__.py +5 -0
- flowtask/components/LangchainLoader/loader.py +194 -0
- flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
- flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
- flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
- flowtask/components/LangchainLoader/loaders/docx.py +91 -0
- flowtask/components/LangchainLoader/loaders/html.py +119 -0
- flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
- flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
- flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
- flowtask/components/LangchainLoader/loaders/qa.py +67 -0
- flowtask/components/LangchainLoader/loaders/txt.py +55 -0
- flowtask/components/LeadIQ.py +650 -0
- flowtask/components/Loop.py +253 -0
- flowtask/components/Lowes.py +334 -0
- flowtask/components/MS365Usage.py +156 -0
- flowtask/components/MSTeamsMessages.py +320 -0
- flowtask/components/MarketClustering.py +1051 -0
- flowtask/components/MergeFiles.py +362 -0
- flowtask/components/MilvusOutput.py +87 -0
- flowtask/components/NearByStores.py +175 -0
- flowtask/components/NetworkNinja/__init__.py +6 -0
- flowtask/components/NetworkNinja/models/__init__.py +52 -0
- flowtask/components/NetworkNinja/models/abstract.py +177 -0
- flowtask/components/NetworkNinja/models/account.py +39 -0
- flowtask/components/NetworkNinja/models/client.py +19 -0
- flowtask/components/NetworkNinja/models/district.py +14 -0
- flowtask/components/NetworkNinja/models/events.py +101 -0
- flowtask/components/NetworkNinja/models/forms.py +499 -0
- flowtask/components/NetworkNinja/models/market.py +16 -0
- flowtask/components/NetworkNinja/models/organization.py +34 -0
- flowtask/components/NetworkNinja/models/photos.py +125 -0
- flowtask/components/NetworkNinja/models/project.py +44 -0
- flowtask/components/NetworkNinja/models/region.py +28 -0
- flowtask/components/NetworkNinja/models/store.py +203 -0
- flowtask/components/NetworkNinja/models/user.py +151 -0
- flowtask/components/NetworkNinja/router.py +854 -0
- flowtask/components/Odoo.py +175 -0
- flowtask/components/OdooInjector.py +192 -0
- flowtask/components/OpenFromXML.py +126 -0
- flowtask/components/OpenWeather.py +41 -0
- flowtask/components/OpenWithBase.py +616 -0
- flowtask/components/OpenWithPandas.py +715 -0
- flowtask/components/PGPDecrypt.py +199 -0
- flowtask/components/PandasIterator.py +187 -0
- flowtask/components/PandasToFile.py +189 -0
- flowtask/components/Paradox.py +339 -0
- flowtask/components/ParamIterator.py +117 -0
- flowtask/components/ParseHTML.py +84 -0
- flowtask/components/PlacerStores.py +249 -0
- flowtask/components/Pokemon.py +507 -0
- flowtask/components/PositiveBot.py +62 -0
- flowtask/components/PowerPointSlide.py +400 -0
- flowtask/components/PrintMessage.py +127 -0
- flowtask/components/ProductCompetitors/__init__.py +5 -0
- flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
- flowtask/components/ProductCompetitors/parsers/base.py +72 -0
- flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
- flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
- flowtask/components/ProductCompetitors/scrapper.py +155 -0
- flowtask/components/ProductCompliant.py +169 -0
- flowtask/components/ProductInfo/__init__.py +1 -0
- flowtask/components/ProductInfo/parsers/__init__.py +5 -0
- flowtask/components/ProductInfo/parsers/base.py +83 -0
- flowtask/components/ProductInfo/parsers/brother.py +97 -0
- flowtask/components/ProductInfo/parsers/canon.py +167 -0
- flowtask/components/ProductInfo/parsers/epson.py +118 -0
- flowtask/components/ProductInfo/parsers/hp.py +131 -0
- flowtask/components/ProductInfo/parsers/samsung.py +97 -0
- flowtask/components/ProductInfo/scraper.py +319 -0
- flowtask/components/ProductPricing.py +118 -0
- flowtask/components/QS.py +261 -0
- flowtask/components/QSBase.py +201 -0
- flowtask/components/QueryIterator.py +273 -0
- flowtask/components/QueryToInsert.py +327 -0
- flowtask/components/QueryToPandas.py +432 -0
- flowtask/components/RESTClient.py +195 -0
- flowtask/components/RethinkDBQuery.py +189 -0
- flowtask/components/Rsync.py +74 -0
- flowtask/components/RunSSH.py +59 -0
- flowtask/components/RunShell.py +71 -0
- flowtask/components/SalesForce.py +20 -0
- flowtask/components/SaveImageBank/__init__.py +257 -0
- flowtask/components/SchedulingVisits.py +592 -0
- flowtask/components/ScrapPage.py +216 -0
- flowtask/components/ScrapSearch.py +79 -0
- flowtask/components/SendNotify.py +257 -0
- flowtask/components/SentimentAnalysis.py +694 -0
- flowtask/components/ServiceScrapper/__init__.py +5 -0
- flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
- flowtask/components/ServiceScrapper/parsers/base.py +94 -0
- flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
- flowtask/components/ServiceScrapper/scrapper.py +199 -0
- flowtask/components/SetVariables.py +156 -0
- flowtask/components/SubTask.py +182 -0
- flowtask/components/SuiteCRM.py +48 -0
- flowtask/components/Switch.py +175 -0
- flowtask/components/TableBase.py +148 -0
- flowtask/components/TableDelete.py +312 -0
- flowtask/components/TableInput.py +143 -0
- flowtask/components/TableOutput/TableOutput.py +384 -0
- flowtask/components/TableOutput/__init__.py +3 -0
- flowtask/components/TableSchema.py +534 -0
- flowtask/components/Target.py +223 -0
- flowtask/components/ThumbnailGenerator.py +156 -0
- flowtask/components/ToPandas.py +67 -0
- flowtask/components/TransformRows/TransformRows.py +507 -0
- flowtask/components/TransformRows/__init__.py +9 -0
- flowtask/components/TransformRows/functions.py +559 -0
- flowtask/components/TransposeRows.py +176 -0
- flowtask/components/UPCDatabase.py +86 -0
- flowtask/components/UnGzip.py +171 -0
- flowtask/components/Uncompress.py +172 -0
- flowtask/components/UniqueRows.py +126 -0
- flowtask/components/Unzip.py +107 -0
- flowtask/components/UpdateOperationalVars.py +147 -0
- flowtask/components/UploadTo.py +299 -0
- flowtask/components/UploadToS3.py +136 -0
- flowtask/components/UploadToSFTP.py +160 -0
- flowtask/components/UploadToSharepoint.py +205 -0
- flowtask/components/UserFunc.py +122 -0
- flowtask/components/VivaTracker.py +140 -0
- flowtask/components/WSDLClient.py +123 -0
- flowtask/components/Wait.py +18 -0
- flowtask/components/Walmart.py +199 -0
- flowtask/components/Workplace.py +134 -0
- flowtask/components/XMLToPandas.py +267 -0
- flowtask/components/Zammad/__init__.py +41 -0
- flowtask/components/Zammad/models.py +0 -0
- flowtask/components/ZoomInfoScraper.py +409 -0
- flowtask/components/__init__.py +104 -0
- flowtask/components/abstract.py +18 -0
- flowtask/components/flow.py +530 -0
- flowtask/components/google.py +335 -0
- flowtask/components/group.py +221 -0
- flowtask/components/py.typed +0 -0
- flowtask/components/reviewscrap.py +132 -0
- flowtask/components/tAutoincrement.py +117 -0
- flowtask/components/tConcat.py +109 -0
- flowtask/components/tExplode.py +119 -0
- flowtask/components/tFilter.py +184 -0
- flowtask/components/tGroup.py +236 -0
- flowtask/components/tJoin.py +270 -0
- flowtask/components/tMap/__init__.py +9 -0
- flowtask/components/tMap/functions.py +54 -0
- flowtask/components/tMap/tMap.py +450 -0
- flowtask/components/tMelt.py +112 -0
- flowtask/components/tMerge.py +114 -0
- flowtask/components/tOrder.py +93 -0
- flowtask/components/tPandas.py +94 -0
- flowtask/components/tPivot.py +71 -0
- flowtask/components/tPluckCols.py +76 -0
- flowtask/components/tUnnest.py +82 -0
- flowtask/components/user.py +401 -0
- flowtask/conf.py +457 -0
- flowtask/download.py +102 -0
- flowtask/events/__init__.py +11 -0
- flowtask/events/events/__init__.py +20 -0
- flowtask/events/events/abstract.py +95 -0
- flowtask/events/events/alerts/__init__.py +362 -0
- flowtask/events/events/alerts/colfunctions.py +131 -0
- flowtask/events/events/alerts/functions.py +158 -0
- flowtask/events/events/dummy.py +12 -0
- flowtask/events/events/exec.py +124 -0
- flowtask/events/events/file/__init__.py +7 -0
- flowtask/events/events/file/base.py +51 -0
- flowtask/events/events/file/copy.py +23 -0
- flowtask/events/events/file/delete.py +16 -0
- flowtask/events/events/interfaces/__init__.py +9 -0
- flowtask/events/events/interfaces/client.py +67 -0
- flowtask/events/events/interfaces/credentials.py +28 -0
- flowtask/events/events/interfaces/notifications.py +58 -0
- flowtask/events/events/jira.py +122 -0
- flowtask/events/events/log.py +26 -0
- flowtask/events/events/logerr.py +52 -0
- flowtask/events/events/notify.py +59 -0
- flowtask/events/events/notify_event.py +160 -0
- flowtask/events/events/publish.py +54 -0
- flowtask/events/events/sendfile.py +104 -0
- flowtask/events/events/task.py +97 -0
- flowtask/events/events/teams.py +98 -0
- flowtask/events/events/webhook.py +58 -0
- flowtask/events/manager.py +287 -0
- flowtask/exceptions.c +39393 -0
- flowtask/exceptions.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/extensions/__init__.py +3 -0
- flowtask/extensions/abstract.py +82 -0
- flowtask/extensions/logging/__init__.py +65 -0
- flowtask/hooks/__init__.py +9 -0
- flowtask/hooks/actions/__init__.py +22 -0
- flowtask/hooks/actions/abstract.py +66 -0
- flowtask/hooks/actions/dummy.py +23 -0
- flowtask/hooks/actions/jira.py +74 -0
- flowtask/hooks/actions/rest.py +320 -0
- flowtask/hooks/actions/sampledata.py +37 -0
- flowtask/hooks/actions/sensor.py +23 -0
- flowtask/hooks/actions/task.py +9 -0
- flowtask/hooks/actions/ticket.py +37 -0
- flowtask/hooks/actions/zammad.py +55 -0
- flowtask/hooks/hook.py +62 -0
- flowtask/hooks/models.py +17 -0
- flowtask/hooks/service.py +187 -0
- flowtask/hooks/step.py +91 -0
- flowtask/hooks/types/__init__.py +23 -0
- flowtask/hooks/types/base.py +129 -0
- flowtask/hooks/types/brokers/__init__.py +11 -0
- flowtask/hooks/types/brokers/base.py +54 -0
- flowtask/hooks/types/brokers/mqtt.py +35 -0
- flowtask/hooks/types/brokers/rabbitmq.py +82 -0
- flowtask/hooks/types/brokers/redis.py +83 -0
- flowtask/hooks/types/brokers/sqs.py +44 -0
- flowtask/hooks/types/fs.py +232 -0
- flowtask/hooks/types/http.py +49 -0
- flowtask/hooks/types/imap.py +200 -0
- flowtask/hooks/types/jira.py +279 -0
- flowtask/hooks/types/mail.py +205 -0
- flowtask/hooks/types/postgres.py +98 -0
- flowtask/hooks/types/responses/__init__.py +8 -0
- flowtask/hooks/types/responses/base.py +5 -0
- flowtask/hooks/types/sharepoint.py +288 -0
- flowtask/hooks/types/ssh.py +141 -0
- flowtask/hooks/types/tagged.py +59 -0
- flowtask/hooks/types/upload.py +85 -0
- flowtask/hooks/types/watch.py +71 -0
- flowtask/hooks/types/web.py +36 -0
- flowtask/interfaces/AzureClient.py +137 -0
- flowtask/interfaces/AzureGraph.py +839 -0
- flowtask/interfaces/Boto3Client.py +326 -0
- flowtask/interfaces/DropboxClient.py +173 -0
- flowtask/interfaces/ExcelHandler.py +94 -0
- flowtask/interfaces/FTPClient.py +131 -0
- flowtask/interfaces/GoogleCalendar.py +201 -0
- flowtask/interfaces/GoogleClient.py +133 -0
- flowtask/interfaces/GoogleDrive.py +127 -0
- flowtask/interfaces/GoogleGCS.py +89 -0
- flowtask/interfaces/GoogleGeocoding.py +93 -0
- flowtask/interfaces/GoogleLang.py +114 -0
- flowtask/interfaces/GooglePub.py +61 -0
- flowtask/interfaces/GoogleSheet.py +68 -0
- flowtask/interfaces/IMAPClient.py +137 -0
- flowtask/interfaces/O365Calendar.py +113 -0
- flowtask/interfaces/O365Client.py +220 -0
- flowtask/interfaces/OneDrive.py +284 -0
- flowtask/interfaces/Outlook.py +155 -0
- flowtask/interfaces/ParrotBot.py +130 -0
- flowtask/interfaces/SSHClient.py +378 -0
- flowtask/interfaces/Sharepoint.py +496 -0
- flowtask/interfaces/__init__.py +36 -0
- flowtask/interfaces/azureauth.py +119 -0
- flowtask/interfaces/cache.py +201 -0
- flowtask/interfaces/client.py +82 -0
- flowtask/interfaces/compress.py +525 -0
- flowtask/interfaces/credentials.py +124 -0
- flowtask/interfaces/d2l.py +239 -0
- flowtask/interfaces/databases/__init__.py +5 -0
- flowtask/interfaces/databases/db.py +223 -0
- flowtask/interfaces/databases/documentdb.py +55 -0
- flowtask/interfaces/databases/rethink.py +39 -0
- flowtask/interfaces/dataframes/__init__.py +11 -0
- flowtask/interfaces/dataframes/abstract.py +21 -0
- flowtask/interfaces/dataframes/arrow.py +71 -0
- flowtask/interfaces/dataframes/dt.py +69 -0
- flowtask/interfaces/dataframes/pandas.py +167 -0
- flowtask/interfaces/dataframes/polars.py +60 -0
- flowtask/interfaces/db.py +263 -0
- flowtask/interfaces/env.py +46 -0
- flowtask/interfaces/func.py +137 -0
- flowtask/interfaces/http.py +1780 -0
- flowtask/interfaces/locale.py +40 -0
- flowtask/interfaces/log.py +75 -0
- flowtask/interfaces/mask.py +143 -0
- flowtask/interfaces/notification.py +154 -0
- flowtask/interfaces/playwright.py +339 -0
- flowtask/interfaces/powerpoint.py +368 -0
- flowtask/interfaces/py.typed +0 -0
- flowtask/interfaces/qs.py +376 -0
- flowtask/interfaces/result.py +87 -0
- flowtask/interfaces/selenium_service.py +779 -0
- flowtask/interfaces/smartsheet.py +154 -0
- flowtask/interfaces/stat.py +39 -0
- flowtask/interfaces/task.py +96 -0
- flowtask/interfaces/template.py +118 -0
- flowtask/interfaces/vectorstores/__init__.py +1 -0
- flowtask/interfaces/vectorstores/abstract.py +133 -0
- flowtask/interfaces/vectorstores/milvus.py +669 -0
- flowtask/interfaces/zammad.py +107 -0
- flowtask/models.py +193 -0
- flowtask/parsers/__init__.py +15 -0
- flowtask/parsers/_yaml.c +11978 -0
- flowtask/parsers/_yaml.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/argparser.py +235 -0
- flowtask/parsers/base.c +15155 -0
- flowtask/parsers/base.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/json.c +11968 -0
- flowtask/parsers/json.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/maps.py +49 -0
- flowtask/parsers/toml.c +11968 -0
- flowtask/parsers/toml.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/plugins/__init__.py +16 -0
- flowtask/plugins/components/__init__.py +0 -0
- flowtask/plugins/handler/__init__.py +45 -0
- flowtask/plugins/importer.py +31 -0
- flowtask/plugins/sources/__init__.py +0 -0
- flowtask/runner.py +283 -0
- flowtask/scheduler/__init__.py +9 -0
- flowtask/scheduler/functions.py +493 -0
- flowtask/scheduler/handlers/__init__.py +8 -0
- flowtask/scheduler/handlers/manager.py +504 -0
- flowtask/scheduler/handlers/models.py +58 -0
- flowtask/scheduler/handlers/service.py +72 -0
- flowtask/scheduler/notifications.py +65 -0
- flowtask/scheduler/scheduler.py +993 -0
- flowtask/services/__init__.py +0 -0
- flowtask/services/bots/__init__.py +0 -0
- flowtask/services/bots/telegram.py +264 -0
- flowtask/services/files/__init__.py +11 -0
- flowtask/services/files/manager.py +522 -0
- flowtask/services/files/model.py +37 -0
- flowtask/services/files/service.py +767 -0
- flowtask/services/jira/__init__.py +3 -0
- flowtask/services/jira/jira_actions.py +191 -0
- flowtask/services/tasks/__init__.py +13 -0
- flowtask/services/tasks/launcher.py +213 -0
- flowtask/services/tasks/manager.py +323 -0
- flowtask/services/tasks/service.py +275 -0
- flowtask/services/tasks/task_manager.py +376 -0
- flowtask/services/tasks/tasks.py +155 -0
- flowtask/storages/__init__.py +16 -0
- flowtask/storages/exceptions.py +12 -0
- flowtask/storages/files/__init__.py +8 -0
- flowtask/storages/files/abstract.py +29 -0
- flowtask/storages/files/filesystem.py +66 -0
- flowtask/storages/tasks/__init__.py +19 -0
- flowtask/storages/tasks/abstract.py +26 -0
- flowtask/storages/tasks/database.py +33 -0
- flowtask/storages/tasks/filesystem.py +108 -0
- flowtask/storages/tasks/github.py +119 -0
- flowtask/storages/tasks/memory.py +45 -0
- flowtask/storages/tasks/row.py +25 -0
- flowtask/tasks/__init__.py +0 -0
- flowtask/tasks/abstract.py +526 -0
- flowtask/tasks/command.py +118 -0
- flowtask/tasks/pile.py +486 -0
- flowtask/tasks/py.typed +0 -0
- flowtask/tasks/task.py +778 -0
- flowtask/template/__init__.py +161 -0
- flowtask/tests.py +257 -0
- flowtask/types/__init__.py +8 -0
- flowtask/types/typedefs.c +11347 -0
- flowtask/types/typedefs.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/__init__.py +24 -0
- flowtask/utils/constants.py +117 -0
- flowtask/utils/encoders.py +21 -0
- flowtask/utils/executor.py +112 -0
- flowtask/utils/functions.cpp +14280 -0
- flowtask/utils/functions.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/json.cpp +13349 -0
- flowtask/utils/json.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/mail.py +63 -0
- flowtask/utils/parseqs.c +13324 -0
- flowtask/utils/parserqs.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/stats.py +308 -0
- flowtask/utils/transformations.py +74 -0
- flowtask/utils/uv.py +12 -0
- flowtask/utils/validators.py +97 -0
- flowtask/version.py +11 -0
- flowtask-5.8.4.dist-info/LICENSE +201 -0
- flowtask-5.8.4.dist-info/METADATA +209 -0
- flowtask-5.8.4.dist-info/RECORD +470 -0
- flowtask-5.8.4.dist-info/WHEEL +6 -0
- flowtask-5.8.4.dist-info/entry_points.txt +3 -0
- flowtask-5.8.4.dist-info/top_level.txt +2 -0
- plugins/components/CreateQR.py +39 -0
- plugins/components/TestComponent.py +28 -0
- plugins/components/Use1.py +13 -0
- plugins/components/Workplace.py +117 -0
- plugins/components/__init__.py +3 -0
- plugins/sources/__init__.py +0 -0
- plugins/sources/get_populartimes.py +78 -0
- plugins/sources/google.py +150 -0
- plugins/sources/hubspot.py +679 -0
- plugins/sources/icims.py +679 -0
- plugins/sources/mobileinsight.py +501 -0
- plugins/sources/newrelic.py +262 -0
- plugins/sources/uap.py +268 -0
- plugins/sources/venu.py +244 -0
- plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,779 @@
|
|
1
|
+
from abc import ABC
|
2
|
+
from typing import Optional
|
3
|
+
from collections.abc import Callable
|
4
|
+
import random
|
5
|
+
import time
|
6
|
+
# BeautifulSoup:
|
7
|
+
from bs4 import BeautifulSoup
|
8
|
+
from lxml import html, etree
|
9
|
+
# Undetected Chrome Driver:
|
10
|
+
import undetected_chromedriver as uc
|
11
|
+
# WebDriver Support:
|
12
|
+
from webdriver_manager.chrome import ChromeDriverManager
|
13
|
+
from webdriver_manager.firefox import GeckoDriverManager
|
14
|
+
from webdriver_manager.microsoft import EdgeChromiumDriverManager
|
15
|
+
from webdriver_manager.core.driver_cache import DriverCacheManager
|
16
|
+
# from selenium import webdriver
|
17
|
+
from seleniumwire import webdriver
|
18
|
+
# Selenium:
|
19
|
+
from selenium import webdriver as selenium_driver
|
20
|
+
# Selenium Proxy:
|
21
|
+
from selenium.webdriver import Proxy
|
22
|
+
# Chrome Support:
|
23
|
+
from selenium.webdriver.chrome.service import Service as ChromeService
|
24
|
+
from selenium.webdriver.chrome.options import Options as ChromeOptions
|
25
|
+
# Firefox Support:
|
26
|
+
from selenium.webdriver.firefox.service import Service as FirefoxService
|
27
|
+
from selenium.webdriver.firefox.options import Options as FirefoxOptions
|
28
|
+
# Edge Support:
|
29
|
+
from selenium.webdriver.edge.service import Service as EdgeService
|
30
|
+
from selenium.webdriver.edge.options import Options as EdgeOptions
|
31
|
+
# Safari Support:
|
32
|
+
from selenium.webdriver.safari.options import Options as SafariOptions
|
33
|
+
from selenium.webdriver.safari.service import Service as SafariService
|
34
|
+
# WebKitGTK Support:
|
35
|
+
from selenium.webdriver.webkitgtk.service import Service as WebKitGTKService
|
36
|
+
from selenium.webdriver.webkitgtk.options import Options as WebKitGTKOptions
|
37
|
+
# Selenium Options:
|
38
|
+
from selenium.webdriver.common.by import By
|
39
|
+
from selenium.webdriver.common.action_chains import ActionChains
|
40
|
+
from selenium.webdriver.support import expected_conditions as EC
|
41
|
+
from selenium.webdriver.support.ui import WebDriverWait
|
42
|
+
from selenium.common.exceptions import (
|
43
|
+
TimeoutException,
|
44
|
+
NoSuchElementException,
|
45
|
+
WebDriverException
|
46
|
+
)
|
47
|
+
from navconfig import BASE_DIR
|
48
|
+
from navconfig.logging import logging
|
49
|
+
from ..conf import (
|
50
|
+
### Oxylabs Proxy Support for Selenium
|
51
|
+
OXYLABS_USERNAME,
|
52
|
+
OXYLABS_PASSWORD,
|
53
|
+
OXYLABS_ENDPOINT,
|
54
|
+
GOOGLE_SEARCH_ENGINE_ID
|
55
|
+
)
|
56
|
+
from ..exceptions import (
|
57
|
+
NotSupported,
|
58
|
+
TimeOutError,
|
59
|
+
ComponentError
|
60
|
+
)
|
61
|
+
from .http import ua, mobile_ua
|
62
|
+
|
63
|
+
|
64
|
+
logging.getLogger(name='selenium.webdriver').setLevel(logging.INFO)
|
65
|
+
logging.getLogger(name='WDM').setLevel(logging.WARNING)
|
66
|
+
logging.getLogger(name='hpack').setLevel(logging.WARNING)
|
67
|
+
logging.getLogger(name='seleniumwire').setLevel(logging.WARNING)
|
68
|
+
logging.getLogger(name='undetected_chromedriver').setLevel(logging.INFO)
|
69
|
+
|
70
|
+
|
71
|
+
mobile_devices = [
|
72
|
+
'iPhone X',
|
73
|
+
'Google Nexus 7',
|
74
|
+
'Pixel 2',
|
75
|
+
'Samsung Galaxy Tab',
|
76
|
+
'Nexus 5',
|
77
|
+
]
|
78
|
+
|
79
|
+
|
80
|
+
class SeleniumService(ABC):
|
81
|
+
"""SeleniumService.
|
82
|
+
|
83
|
+
Interface for making HTTP connections using Selenium.
|
84
|
+
"""
|
85
|
+
chrome_options = [
|
86
|
+
# "--headless=new",
|
87
|
+
"--disable-gpu",
|
88
|
+
"--no-sandbox",
|
89
|
+
"--enable-automation",
|
90
|
+
"--lang=en",
|
91
|
+
"--disable-dev-shm-usage",
|
92
|
+
"--disable-features=VizDisplayCompositor",
|
93
|
+
"--disable-features=IsolateOrigins",
|
94
|
+
# "--disable-extensions",
|
95
|
+
# "--disable-features=NetworkService,NetworkServiceInProcess",
|
96
|
+
# "--ignore-certificate-errors-spki-list",
|
97
|
+
# "--allow-insecure-localhost",
|
98
|
+
# "--ignore-ssl-errors",
|
99
|
+
# "--disable-web-security",
|
100
|
+
# "--allow-running-insecure-content",
|
101
|
+
]
|
102
|
+
undetected_options = [
|
103
|
+
"--disable-gpu",
|
104
|
+
"--no-sandbox",
|
105
|
+
"--enable-automation",
|
106
|
+
"--disable-blink-features=AutomationControlled",
|
107
|
+
"--disable-features=NetworkService,NetworkServiceInProcess",
|
108
|
+
"--disable-dev-shm-usage",
|
109
|
+
]
|
110
|
+
firefox_options = [
|
111
|
+
"--no-sandbox",
|
112
|
+
"--disable-gpu",
|
113
|
+
# browser viewport size
|
114
|
+
"--width=1920",
|
115
|
+
"--height=1080"
|
116
|
+
]
|
117
|
+
accept: str = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9" # noqa
|
118
|
+
|
119
|
+
def __init__(self, *args, **kwargs):
|
120
|
+
self._driver: Callable = None
|
121
|
+
self._wait: WebDriverWait = None
|
122
|
+
# Accept Cookies is a tuple with button for accepting cookies.
|
123
|
+
self.accept_cookies: tuple = kwargs.get('accept_cookies', None)
|
124
|
+
self.use_wire: bool = kwargs.get('use_wire', False)
|
125
|
+
self.use_firefox: bool = kwargs.get('use_firefox', False)
|
126
|
+
self.use_edge: bool = kwargs.get('use_edge', False)
|
127
|
+
self.use_safari: bool = kwargs.get('use_safari', False)
|
128
|
+
self.use_webkit: bool = kwargs.get('use_webkit', False)
|
129
|
+
self.as_mobile: bool = kwargs.get('as_mobile', False)
|
130
|
+
self.use_undetected: bool = kwargs.get('use_undetected', False)
|
131
|
+
self.headless: bool = kwargs.get('headless', True)
|
132
|
+
self.enable_http2: bool = kwargs.get('enable_http2', True)
|
133
|
+
self._browser_binary: str = kwargs.get('browser_binary', None)
|
134
|
+
self._driver_binary: str = kwargs.get('driver_binary', None)
|
135
|
+
self._userdata: str = kwargs.get('userdata', None)
|
136
|
+
# Device type, defaulting to:
|
137
|
+
# TODO: create a dictionary matching userAgent and Mobile Device.
|
138
|
+
self.mobile_device: str = kwargs.get(
|
139
|
+
'mobile_device', 'Pixel 2'
|
140
|
+
)
|
141
|
+
self.default_tag: str = kwargs.get('default_tag', 'body')
|
142
|
+
self.accept_is_clickable: bool = kwargs.get('accept_is_clickable', False)
|
143
|
+
self.timeout: int = kwargs.get('timeout', 60)
|
144
|
+
self.wait_until: tuple = kwargs.get('wait_until', None)
|
145
|
+
self.inner_tag: tuple = kwargs.get('inner_tag', None)
|
146
|
+
self._options = None
|
147
|
+
super().__init__(*args, **kwargs)
|
148
|
+
headers = kwargs.get('headers', {})
|
149
|
+
self.headers: dict = {
|
150
|
+
"Accept": self.accept,
|
151
|
+
"TE": "trailers",
|
152
|
+
"Accept-Encoding": "gzip, deflate",
|
153
|
+
"DNT": "1",
|
154
|
+
"Connection": "keep-alive",
|
155
|
+
"Upgrade-Insecure-Requests": "1",
|
156
|
+
"User-Agent": random.choice(ua),
|
157
|
+
**headers
|
158
|
+
}
|
159
|
+
# Configure Cookies:
|
160
|
+
self.cookies: dict = kwargs.get('cookies', {})
|
161
|
+
if isinstance(self.cookies, str):
|
162
|
+
self.cookies = self.parse_cookies(self.cookies)
|
163
|
+
|
164
|
+
def parse_cookies(self, cookie_pair: str) -> dict:
|
165
|
+
"""Parse the Cookies."""
|
166
|
+
cookies = {}
|
167
|
+
cookie_pairs = [c.strip() for c in cookie_pair.strip().split(';') if c.strip()]
|
168
|
+
for pair in cookie_pairs:
|
169
|
+
if '=' in pair:
|
170
|
+
name, value = pair.split('=', 1)
|
171
|
+
name = name.strip()
|
172
|
+
value = value.strip().strip('"') # remove quotes if any
|
173
|
+
cookies[name] = value
|
174
|
+
return cookies
|
175
|
+
|
176
|
+
def check_by_attribute(self, attribute: tuple):
|
177
|
+
if not attribute:
|
178
|
+
return None
|
179
|
+
el = attribute[0]
|
180
|
+
value = attribute[1]
|
181
|
+
new_attr = None
|
182
|
+
if el == 'id':
|
183
|
+
new_attr = (By.ID, value)
|
184
|
+
elif el in ('class', 'class name'):
|
185
|
+
new_attr = (By.CLASS_NAME, value)
|
186
|
+
elif el == 'name':
|
187
|
+
new_attr = (By.NAME, value)
|
188
|
+
elif el == 'xpath':
|
189
|
+
new_attr = (By.XPATH, value)
|
190
|
+
elif el == 'css':
|
191
|
+
new_attr = (By.CSS_SELECTOR, value)
|
192
|
+
elif el in ('tag', 'tag name', 'tagname', 'tag_name'):
|
193
|
+
new_attr = (By.TAG_NAME, value)
|
194
|
+
else:
|
195
|
+
raise NotSupported(
|
196
|
+
f"Selenium: Attribute {el} is not supported."
|
197
|
+
)
|
198
|
+
return new_attr
|
199
|
+
|
200
|
+
def driver(self):
|
201
|
+
return self._driver
|
202
|
+
|
203
|
+
def close_driver(self):
|
204
|
+
if self._driver:
|
205
|
+
self._driver.quit()
|
206
|
+
|
207
|
+
async def start(self, **kwargs) -> bool:
|
208
|
+
await super(SeleniumService, self).start(**kwargs)
|
209
|
+
# Check the Accept Cookies:
|
210
|
+
if self.accept_cookies:
|
211
|
+
if not isinstance(self.accept_cookies, tuple):
|
212
|
+
raise NotSupported(
|
213
|
+
"Accept Cookies must be a Tuple with the Button to Accept Cookies."
|
214
|
+
)
|
215
|
+
self.accept_cookies = self.check_by_attribute(self.accept_cookies)
|
216
|
+
if self.inner_tag:
|
217
|
+
self.inner_tag = self.check_by_attribute(self.inner_tag)
|
218
|
+
if hasattr(self, 'screenshot'):
|
219
|
+
try:
|
220
|
+
self.screenshot['portion'] = self.check_by_attribute(
|
221
|
+
self.screenshot['portion']
|
222
|
+
)
|
223
|
+
except (KeyError, ValueError):
|
224
|
+
pass
|
225
|
+
return True
|
226
|
+
|
227
|
+
def proxy_selenium(self, user: str, password: str, endpoint: str, only_http: bool = True) -> dict:
|
228
|
+
if only_http is True:
|
229
|
+
wire_options = {
|
230
|
+
"proxy": {
|
231
|
+
"http": f"http://{user}:{password}@{endpoint}",
|
232
|
+
"https": f"http://{user}:{password}@{endpoint}",
|
233
|
+
}
|
234
|
+
}
|
235
|
+
else:
|
236
|
+
wire_options = {
|
237
|
+
"proxy": {
|
238
|
+
"http": f"http://{user}:{password}@{endpoint}",
|
239
|
+
"https": f"https://{user}:{password}@{endpoint}",
|
240
|
+
# "socks5": f"https://{user}:{password}@{endpoint}",
|
241
|
+
}
|
242
|
+
}
|
243
|
+
# print(':: Proxy :', wire_options)
|
244
|
+
return wire_options
|
245
|
+
|
246
|
+
async def get_driver(self):
|
247
|
+
"""
|
248
|
+
Return a Selenium Driver instance for Firefox, Edge, Safari, WebKitGTK or Chrome.
|
249
|
+
|
250
|
+
This method configures the browser based on instance flags (such as:
|
251
|
+
- self.use_firefox, self.use_edge, self.use_safari, self.use_webkit
|
252
|
+
- self.use_proxy, self._free_proxy, self.use_undetected
|
253
|
+
- self.as_mobile for mobile emulation (Chrome-only)
|
254
|
+
- self.enable_http2 (if False, HTTP/2 will be disabled)
|
255
|
+
|
256
|
+
It applies a common set of options (stored in self.chrome_options) to all browsers
|
257
|
+
and adds proxy settings (if configured) to the options and/or desired capabilities.
|
258
|
+
|
259
|
+
Returns:
|
260
|
+
A Selenium WebDriver instance.
|
261
|
+
"""
|
262
|
+
proxies = None
|
263
|
+
proxy = None
|
264
|
+
# Define first which webdriver to use:
|
265
|
+
if self.use_wire is True:
|
266
|
+
# Use Wire protocol for Selenium
|
267
|
+
self._webdriver = webdriver
|
268
|
+
_options = {
|
269
|
+
"seleniumwire_options": {
|
270
|
+
"proxy": None,
|
271
|
+
'http2': False # Explicitly disable HTTP/2 in Selenium Wire
|
272
|
+
}
|
273
|
+
}
|
274
|
+
else:
|
275
|
+
self._webdriver = selenium_driver
|
276
|
+
_options = {}
|
277
|
+
# Selenium Options:
|
278
|
+
if self.use_firefox is True:
|
279
|
+
self._options = FirefoxOptions()
|
280
|
+
elif self.use_edge is True:
|
281
|
+
self._options = EdgeOptions()
|
282
|
+
elif self.use_safari is True:
|
283
|
+
self._options = SafariOptions()
|
284
|
+
elif self.use_webkit is True:
|
285
|
+
self._options = WebKitGTKOptions()
|
286
|
+
elif self.use_undetected is True:
|
287
|
+
# Start an undetected Chrome instance
|
288
|
+
self._options = uc.ChromeOptions()
|
289
|
+
else:
|
290
|
+
# used for Chrome by default
|
291
|
+
self._options = ChromeOptions()
|
292
|
+
# Add UA to Headers:
|
293
|
+
_ua = random.choice(ua)
|
294
|
+
self._options.add_argument(f"user-agent={_ua}")
|
295
|
+
# Configure Proxy Support
|
296
|
+
if self.use_proxy is True:
|
297
|
+
proxy = Proxy()
|
298
|
+
if self._free_proxy is False:
|
299
|
+
# Oxylabs Proxy:
|
300
|
+
if hasattr(self, 'us_proxy'):
|
301
|
+
endpoint = "us-pr.oxylabs.io:10000"
|
302
|
+
else:
|
303
|
+
endpoint = OXYLABS_ENDPOINT
|
304
|
+
customer = f"customer-{OXYLABS_USERNAME}-sesstime-1"
|
305
|
+
proxies = self.proxy_selenium(
|
306
|
+
customer, OXYLABS_PASSWORD, endpoint
|
307
|
+
)
|
308
|
+
proxy.http_proxy = f"http://{customer}:{OXYLABS_PASSWORD}@{endpoint}"
|
309
|
+
proxy.ssl_proxy = f"https://{customer}:{OXYLABS_PASSWORD}@{endpoint}"
|
310
|
+
proxy = {
|
311
|
+
"proxyType": "manual",
|
312
|
+
"httpProxy": proxy.http_proxy,
|
313
|
+
"sslProxy": proxy.ssl_proxy,
|
314
|
+
}
|
315
|
+
# and using the simple config:
|
316
|
+
self._options.add_argument(
|
317
|
+
f"--proxy-server=http://{OXYLABS_USERNAME}:{OXYLABS_PASSWORD}@{OXYLABS_ENDPOINT}"
|
318
|
+
)
|
319
|
+
if self.use_wire is True:
|
320
|
+
_options['seleniumwire_options']['proxy'] = proxies['proxy']
|
321
|
+
else:
|
322
|
+
proxies = await self.get_proxies()
|
323
|
+
# proxies is a list of IP:port, so we need to convert it to a dict
|
324
|
+
proxy.http_proxy = f"http://{proxies[0]}"
|
325
|
+
proxy.ssl_proxy = f"https://{proxies[0]}"
|
326
|
+
self._options.add_argument(f"--proxy-server={proxy.http_proxy}")
|
327
|
+
if self.use_wire is True:
|
328
|
+
_options['seleniumwire_options']['proxy'] = {
|
329
|
+
"http": f"http://{proxies[0]}",
|
330
|
+
"https": f"https://{proxies[0]}"
|
331
|
+
}
|
332
|
+
if self.use_undetected is True:
|
333
|
+
for option in self.undetected_options:
|
334
|
+
try:
|
335
|
+
self._options.add_argument(option)
|
336
|
+
except Exception:
|
337
|
+
# If the browser does not support add_argument, skip it.
|
338
|
+
pass
|
339
|
+
# Start an undetected Chrome instance
|
340
|
+
self._options.headless = self.headless # Run in visible mode to reduce bot detection
|
341
|
+
self._driver = uc.Chrome(
|
342
|
+
options=self._options,
|
343
|
+
headless=self.headless,
|
344
|
+
use_subprocess=False,
|
345
|
+
advanced_elements=True,
|
346
|
+
enable_cdp_events=True
|
347
|
+
)
|
348
|
+
elif self.use_firefox is True:
|
349
|
+
# Use Firefox Browser
|
350
|
+
# if True, Run in visible mode to reduce bot detection
|
351
|
+
self._options.headless = self.headless
|
352
|
+
for option in self.firefox_options:
|
353
|
+
self._options.add_argument(option)
|
354
|
+
if self.headless is True:
|
355
|
+
self._options.add_argument("--headless")
|
356
|
+
self._options.set_preference("network.http.http2.enabled", self.enable_http2)
|
357
|
+
if self.use_proxy is True:
|
358
|
+
customer = f"customer-{OXYLABS_USERNAME}-sesstime-1"
|
359
|
+
proxy = {
|
360
|
+
"proxyType": "manual",
|
361
|
+
"httpProxy": f"{customer}:{OXYLABS_PASSWORD}@{OXYLABS_ENDPOINT}",
|
362
|
+
"sslProxy": f"{customer}:{OXYLABS_PASSWORD}@{OXYLABS_ENDPOINT}",
|
363
|
+
}
|
364
|
+
self._options.set_capability("proxy", proxy)
|
365
|
+
if self._browser_binary:
|
366
|
+
self._options.binary_location = self._browser_binary
|
367
|
+
service = FirefoxService(
|
368
|
+
GeckoDriverManager().install()
|
369
|
+
)
|
370
|
+
elif self._driver_binary:
|
371
|
+
# Use the binary driver if available
|
372
|
+
service = FirefoxService(
|
373
|
+
executable_path=self._driver_binary
|
374
|
+
)
|
375
|
+
else:
|
376
|
+
# Use the cached driver if available
|
377
|
+
cache_manager = DriverCacheManager(valid_range=7)
|
378
|
+
service = FirefoxService(
|
379
|
+
GeckoDriverManager(
|
380
|
+
cache_manager=cache_manager
|
381
|
+
).install()
|
382
|
+
)
|
383
|
+
self._driver = self._webdriver.Firefox(
|
384
|
+
service=service,
|
385
|
+
options=self._options,
|
386
|
+
**_options
|
387
|
+
)
|
388
|
+
elif self.use_edge is True:
|
389
|
+
# Use Chromium Edge Browser
|
390
|
+
# Use Edge in headless mode to reduce bot detection
|
391
|
+
self._options.headless = self.headless
|
392
|
+
if self.headless is True:
|
393
|
+
self._options.add_argument("--headless=new")
|
394
|
+
# if self.use_proxy is True:
|
395
|
+
# self._options.set_capability("proxy", proxy)
|
396
|
+
if self._browser_binary is not None:
|
397
|
+
self._options.binary_location = self._browser_binary
|
398
|
+
service = EdgeService(
|
399
|
+
executable_path=self._browser_binary
|
400
|
+
)
|
401
|
+
else:
|
402
|
+
service = EdgeService(
|
403
|
+
EdgeChromiumDriverManager().install()
|
404
|
+
)
|
405
|
+
self._options.set_capability("ms:edgeOptions", {"http2": self.enable_http2})
|
406
|
+
self._driver = self._webdriver.Edge(
|
407
|
+
service=service,
|
408
|
+
options=self._options,
|
409
|
+
**_options
|
410
|
+
)
|
411
|
+
elif self.use_safari is True:
|
412
|
+
# Use Safari Browser
|
413
|
+
self._driver = self._webdriver.Safari(
|
414
|
+
service=SafariService(
|
415
|
+
executable_path=self._browser_path
|
416
|
+
),
|
417
|
+
options=self._options,
|
418
|
+
**_options
|
419
|
+
)
|
420
|
+
elif self.use_webkit is True:
|
421
|
+
# Use WebKitGTK Browser
|
422
|
+
self._driver = self._webdriver.WebKitGTK(
|
423
|
+
service=WebKitGTKService().install(),
|
424
|
+
options=self._options,
|
425
|
+
**_options
|
426
|
+
)
|
427
|
+
else:
|
428
|
+
# Use Chrome Browser
|
429
|
+
if self.use_proxy is True:
|
430
|
+
self._options.set_capability("proxy", proxy)
|
431
|
+
if self.headless is True:
|
432
|
+
self._options.add_argument("--headless=new")
|
433
|
+
if self._browser_binary:
|
434
|
+
self._options.binary_location = self._browser_binary
|
435
|
+
# self._options.add_argument(
|
436
|
+
# f"--user-data-dir={self.self._userdata}"
|
437
|
+
# )
|
438
|
+
service = ChromeService(
|
439
|
+
ChromeDriverManager().install()
|
440
|
+
)
|
441
|
+
for option in self.chrome_options:
|
442
|
+
try:
|
443
|
+
self._options.add_argument(option)
|
444
|
+
except Exception:
|
445
|
+
# If the browser does not support add_argument, skip it.
|
446
|
+
pass
|
447
|
+
if self.as_mobile is True:
|
448
|
+
# Mobile Device:
|
449
|
+
self.mobile_device = random.choice(mobile_devices)
|
450
|
+
# Use Chrome mobile emulation options
|
451
|
+
mobile_emulation_options = {
|
452
|
+
"deviceName": self.mobile_device,
|
453
|
+
"userAgent": random.choice(mobile_ua)
|
454
|
+
}
|
455
|
+
self._options.add_experimental_option(
|
456
|
+
"mobileEmulation",
|
457
|
+
mobile_emulation_options
|
458
|
+
)
|
459
|
+
self._logger.debug(
|
460
|
+
f"Running in mobile emulation mode as {self.mobile_device}"
|
461
|
+
)
|
462
|
+
# Explicitly disable HTTP/2
|
463
|
+
if self.enable_http2 is False:
|
464
|
+
self._options.add_experimental_option(
|
465
|
+
"prefs", {"disable-http2": True}
|
466
|
+
)
|
467
|
+
self._driver = self._webdriver.Chrome(
|
468
|
+
service=service,
|
469
|
+
options=self._options,
|
470
|
+
**_options
|
471
|
+
)
|
472
|
+
# Creating the WebDriverWait and Return the Driver:
|
473
|
+
self._wait = WebDriverWait(self._driver, self.timeout)
|
474
|
+
return self._driver
|
475
|
+
|
476
|
+
def _execute_scroll(self, scroll_pause_time=1.0, max_scrolls=5):
|
477
|
+
"""
|
478
|
+
Execute a progressive scroll through the page to ensure dynamic content loads.
|
479
|
+
|
480
|
+
Args:
|
481
|
+
scroll_pause_time (float): Time to pause between scrolls
|
482
|
+
max_scrolls (int): Maximum number of scroll operations
|
483
|
+
"""
|
484
|
+
try:
|
485
|
+
# Wait for the page to be loaded initially
|
486
|
+
WebDriverWait(self._driver, 20).until(
|
487
|
+
lambda driver: driver.execute_script("return document.body.scrollHeight") > 0
|
488
|
+
)
|
489
|
+
|
490
|
+
# Get initial scroll height
|
491
|
+
last_height = self._driver.execute_script("return document.body.scrollHeight")
|
492
|
+
|
493
|
+
# Progressive scrolling
|
494
|
+
for scroll in range(max_scrolls):
|
495
|
+
# Scroll down to bottom in steps
|
496
|
+
self._driver.execute_script(f"window.scrollTo(0, {(scroll+1) * last_height/max_scrolls});")
|
497
|
+
|
498
|
+
# Wait to load page
|
499
|
+
time.sleep(scroll_pause_time)
|
500
|
+
|
501
|
+
# Check if new elements have loaded after each partial scroll
|
502
|
+
new_height = self._driver.execute_script("return document.body.scrollHeight")
|
503
|
+
if new_height == last_height and scroll > 0:
|
504
|
+
# If no new content loaded after first scroll, break
|
505
|
+
break
|
506
|
+
|
507
|
+
last_height = new_height
|
508
|
+
|
509
|
+
# If this is the last scroll, try to wait for any AJAX to complete
|
510
|
+
if scroll == max_scrolls - 1:
|
511
|
+
time.sleep(scroll_pause_time * 1.5)
|
512
|
+
|
513
|
+
# Scroll back to top for better user interaction
|
514
|
+
self._driver.execute_script("window.scrollTo(0, 0);")
|
515
|
+
except Exception as e:
|
516
|
+
# Log but don't fail completely on scroll errors
|
517
|
+
self._logger.warning(f"Error during scroll operation: {e}")
|
518
|
+
|
519
|
+
def save_screenshot(self, filename: str) -> None:
|
520
|
+
"""Saving and Screenshot of entire Page."""
|
521
|
+
original_size = self._driver.get_window_size()
|
522
|
+
width = self._driver.execute_script(
|
523
|
+
'return document.body.parentNode.scrollWidth'
|
524
|
+
) or 1920
|
525
|
+
height = self._driver.execute_script(
|
526
|
+
'return document.body.parentNode.scrollHeight'
|
527
|
+
) or 1080
|
528
|
+
if not width:
|
529
|
+
width = 1920
|
530
|
+
if not height:
|
531
|
+
height = 1080
|
532
|
+
self._driver.set_window_size(width, height)
|
533
|
+
self._execute_scroll()
|
534
|
+
|
535
|
+
# Ensure the page is fully loaded after resizing
|
536
|
+
self._wait.until(
|
537
|
+
lambda driver: driver.execute_script("return document.readyState") == "complete"
|
538
|
+
)
|
539
|
+
|
540
|
+
# Wait for specific elements to load
|
541
|
+
if self.wait_until:
|
542
|
+
WebDriverWait(self._driver, 20).until(
|
543
|
+
EC.presence_of_all_elements_located(
|
544
|
+
self.wait_until
|
545
|
+
)
|
546
|
+
)
|
547
|
+
if 'portion' in self.screenshot:
|
548
|
+
element = self._driver.find_element(*self.screenshot['portion'])
|
549
|
+
# Check if the element has a size
|
550
|
+
size = element.size
|
551
|
+
if size['height'] == 0 or size['width'] == 0:
|
552
|
+
# Try scrolling or waiting until element is visible
|
553
|
+
self.logger.warning(
|
554
|
+
"Element to screenshot has zero dimension, waiting for it to render..."
|
555
|
+
)
|
556
|
+
WebDriverWait(self._driver, 20).until(
|
557
|
+
lambda driver: element.size['height'] > 0 and element.size['width'] > 0
|
558
|
+
)
|
559
|
+
element.screenshot(filename)
|
560
|
+
else:
|
561
|
+
# Take a full-page screenshot
|
562
|
+
self._driver.save_screenshot(filename)
|
563
|
+
# resize to the Original Size:
|
564
|
+
self._driver.set_window_size(
|
565
|
+
original_size['width'],
|
566
|
+
original_size['height']
|
567
|
+
)
|
568
|
+
|
569
|
+
def get_soup(self, content: str, parser: str = 'html.parser'):
|
570
|
+
"""Get a BeautifulSoup Object."""
|
571
|
+
return BeautifulSoup(content, parser)
|
572
|
+
|
573
|
+
def get_etree(self, content: str) -> tuple:
|
574
|
+
try:
|
575
|
+
x = etree.fromstring(content)
|
576
|
+
except etree.XMLSyntaxError:
|
577
|
+
x = None
|
578
|
+
try:
|
579
|
+
h = html.fromstring(content)
|
580
|
+
except etree.XMLSyntaxError:
|
581
|
+
h = None
|
582
|
+
return x, h
|
583
|
+
|
584
|
+
async def get_page(
|
585
|
+
self,
|
586
|
+
url: str,
|
587
|
+
cookies: Optional[dict] = None,
|
588
|
+
retries: int = 3,
|
589
|
+
backoff_delay: int = 2
|
590
|
+
):
|
591
|
+
"""get_page with selenium.
|
592
|
+
|
593
|
+
Get one page using Selenium.
|
594
|
+
"""
|
595
|
+
if not self._driver:
|
596
|
+
await self.get_driver()
|
597
|
+
attempt = 0
|
598
|
+
# Debug for using Proxy:
|
599
|
+
# self._driver.get('https://api.ipify.org?format=json')
|
600
|
+
# page_source = self._driver.page_source
|
601
|
+
# print(page_source)
|
602
|
+
while attempt < retries:
|
603
|
+
try:
|
604
|
+
try:
|
605
|
+
self._driver.delete_all_cookies()
|
606
|
+
except Exception:
|
607
|
+
pass
|
608
|
+
self._driver.get(url)
|
609
|
+
if cookies:
|
610
|
+
# Add the cookies
|
611
|
+
for cookie_name, cookie_value in cookies.items():
|
612
|
+
if cookie_value:
|
613
|
+
self._driver.add_cookie({'name': cookie_name, 'value': cookie_value})
|
614
|
+
# Refresh the page to apply the cookies
|
615
|
+
self._driver.refresh()
|
616
|
+
|
617
|
+
# Ensure the page is fully loaded before attempting to click
|
618
|
+
self._wait.until(
|
619
|
+
lambda driver: driver.execute_script("return document.readyState") == "complete"
|
620
|
+
)
|
621
|
+
|
622
|
+
# Wait for specific elements to load (replace with your actual elements)
|
623
|
+
if self.wait_until:
|
624
|
+
WebDriverWait(self._driver, 20).until(
|
625
|
+
EC.presence_of_all_elements_located(
|
626
|
+
self.wait_until
|
627
|
+
)
|
628
|
+
)
|
629
|
+
else:
|
630
|
+
# Wait for the tag to appear in the page.
|
631
|
+
self._wait.until(
|
632
|
+
EC.presence_of_element_located(
|
633
|
+
(By.TAG_NAME, self.default_tag)
|
634
|
+
)
|
635
|
+
)
|
636
|
+
# Accept Cookies if enabled.
|
637
|
+
if self.accept_cookies:
|
638
|
+
# Wait for the button to appear and click it.
|
639
|
+
try:
|
640
|
+
# Wait for the "Ok" button to be clickable and then click it
|
641
|
+
if self.accept_is_clickable is True:
|
642
|
+
accept_button = self._wait.until(
|
643
|
+
EC.element_to_be_clickable(self.accept_cookies)
|
644
|
+
)
|
645
|
+
accept_button.click()
|
646
|
+
else:
|
647
|
+
accept_button = self._wait.until(
|
648
|
+
EC.presence_of_element_located(
|
649
|
+
self.accept_cookies
|
650
|
+
)
|
651
|
+
)
|
652
|
+
self._driver.execute_script("arguments[0].click();", accept_button)
|
653
|
+
except TimeoutException:
|
654
|
+
self._logger.warning(
|
655
|
+
'Accept Cookies Button not found'
|
656
|
+
)
|
657
|
+
# Execute an scroll of the page:
|
658
|
+
self._execute_scroll()
|
659
|
+
return
|
660
|
+
except TimeoutException:
|
661
|
+
# The page never reached complete.
|
662
|
+
print("Page did not reach a complete readyState.")
|
663
|
+
print("Current Page Source:")
|
664
|
+
print('===========================')
|
665
|
+
print(self._driver.page_source)
|
666
|
+
print('===========================')
|
667
|
+
# Challenge Button:
|
668
|
+
# Try to detect the challenge element. For example, if the button has text "Pulsar y mantener pulsado"
|
669
|
+
|
670
|
+
wait = WebDriverWait(self._driver, 20)
|
671
|
+
base = wait.until(EC.presence_of_element_located((By.ID, "px-captcha")))
|
672
|
+
iframe = base.find_element(By.TAG_NAME, "iframe")
|
673
|
+
print('IFRAME > ', iframe)
|
674
|
+
self._driver.switch_to.frame(iframe)
|
675
|
+
challenge_button = self._driver.find_element(By.XPATH, "//p[contains(text(), 'Pulsar y mantener pulsado')]")
|
676
|
+
print('BUTTON HERE > ', challenge_button)
|
677
|
+
|
678
|
+
try:
|
679
|
+
challenge_button = WebDriverWait(self._driver, 5).until(
|
680
|
+
EC.presence_of_element_located(challenge_button)
|
681
|
+
)
|
682
|
+
print('BUTTON HERE > ', challenge_button)
|
683
|
+
# If we found the button, simulate the click and hold action
|
684
|
+
actions = ActionChains(self._driver)
|
685
|
+
# Hold the button for, say, 5 seconds
|
686
|
+
actions.click_and_hold(challenge_button).pause(5).release().perform()
|
687
|
+
self._driver.switch_to.default_content()
|
688
|
+
# Optionally wait again for the page to load after the challenge
|
689
|
+
self._wait.until(
|
690
|
+
lambda driver: driver.execute_script("return document.readyState") == "complete"
|
691
|
+
)
|
692
|
+
# Execute an scroll of the page:
|
693
|
+
self._execute_scroll()
|
694
|
+
return
|
695
|
+
except TimeoutException:
|
696
|
+
# If the challenge button isn't present, continue as normal
|
697
|
+
pass
|
698
|
+
attempt += 1
|
699
|
+
if attempt < retries:
|
700
|
+
self._logger.warning(
|
701
|
+
f"TimeoutException occurred. Retrying ({attempt}/{retries}) in {backoff_delay}s..."
|
702
|
+
)
|
703
|
+
time.sleep(backoff_delay)
|
704
|
+
else:
|
705
|
+
raise TimeOutError(f"Timeout Error on URL {self.url} after {retries} attempts")
|
706
|
+
except Exception as exc:
|
707
|
+
raise ComponentError(
|
708
|
+
f"Error running Scrapping Tool: {exc}"
|
709
|
+
)
|
710
|
+
|
711
|
+
async def search_google_cse(self, query: str, max_results: int = 5):
|
712
|
+
"""
|
713
|
+
Search Google Custom Search Engine (CSE) using Selenium.
|
714
|
+
|
715
|
+
Args:
|
716
|
+
query (str): The search query.
|
717
|
+
max_results (int, optional): Maximum number of search results to return.
|
718
|
+
|
719
|
+
Returns:
|
720
|
+
list[dict]: A list of search results with 'title' and 'link'.
|
721
|
+
"""
|
722
|
+
try:
|
723
|
+
search_url = f"https://cse.google.com/cse?cx={GOOGLE_SEARCH_ENGINE_ID}#gsc.tab=0&gsc.q={query}&gsc.sort="
|
724
|
+
driver = await self.get_driver()
|
725
|
+
driver.get(search_url)
|
726
|
+
|
727
|
+
# ✅ Wait for search results or "No results" message
|
728
|
+
try:
|
729
|
+
WebDriverWait(driver, 5).until(
|
730
|
+
EC.presence_of_element_located((By.CLASS_NAME, "gsc-results"))
|
731
|
+
)
|
732
|
+
except TimeoutException:
|
733
|
+
try:
|
734
|
+
WebDriverWait(driver, 3).until(
|
735
|
+
EC.presence_of_element_located((By.CLASS_NAME, "gs-no-results-result"))
|
736
|
+
)
|
737
|
+
return [] # No results found, return an empty list
|
738
|
+
except TimeoutException:
|
739
|
+
raise RuntimeError("CSE: No results found or page failed to load.")
|
740
|
+
|
741
|
+
time.sleep(2) # Allow JS to finalize
|
742
|
+
|
743
|
+
# ✅ Extract search results
|
744
|
+
results = []
|
745
|
+
try:
|
746
|
+
search_results = driver.find_elements(By.CLASS_NAME, "gsc-webResult")
|
747
|
+
except NoSuchElementException:
|
748
|
+
search_results = driver.find_elements(By.CLASS_NAME, "gsc-expansionArea")
|
749
|
+
|
750
|
+
for result in search_results[:max_results]:
|
751
|
+
try:
|
752
|
+
title_element = result.find_element(By.CLASS_NAME, "gs-title")
|
753
|
+
url_element = title_element.find_element(By.TAG_NAME, "a") if title_element else None
|
754
|
+
|
755
|
+
if title_element and url_element:
|
756
|
+
title = title_element.text.strip()
|
757
|
+
url = url_element.get_attribute("href").strip()
|
758
|
+
if title and url:
|
759
|
+
results.append({"title": title, "link": url})
|
760
|
+
|
761
|
+
except NoSuchElementException:
|
762
|
+
continue # Skip missing results
|
763
|
+
|
764
|
+
return results
|
765
|
+
|
766
|
+
except NoSuchElementException as e:
|
767
|
+
raise RuntimeError(f"CSE Error: Element not found ({e})")
|
768
|
+
except TimeoutException as e:
|
769
|
+
raise RuntimeError(f"CSE Timeout: {e}")
|
770
|
+
except WebDriverException as e:
|
771
|
+
raise RuntimeError(f"CSE WebDriver Error: {e}")
|
772
|
+
except RuntimeError as e:
|
773
|
+
if str(e) == "CSE: No results found or page failed to load.":
|
774
|
+
return []
|
775
|
+
raise RuntimeError(f"CSE Runtime Error: {e}")
|
776
|
+
except Exception as e:
|
777
|
+
raise RuntimeError(f"CSE Unexpected Error: {e}")
|
778
|
+
finally:
|
779
|
+
self.close_driver() # Always close driver
|