flowtask 5.8.4__cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowtask/__init__.py +93 -0
- flowtask/__main__.py +38 -0
- flowtask/bots/__init__.py +6 -0
- flowtask/bots/check.py +93 -0
- flowtask/bots/codebot.py +51 -0
- flowtask/components/ASPX.py +148 -0
- flowtask/components/AddDataset.py +352 -0
- flowtask/components/Amazon.py +523 -0
- flowtask/components/AutoTask.py +314 -0
- flowtask/components/Azure.py +80 -0
- flowtask/components/AzureUsers.py +106 -0
- flowtask/components/BaseAction.py +91 -0
- flowtask/components/BaseLoop.py +198 -0
- flowtask/components/BestBuy.py +800 -0
- flowtask/components/CSVToGCS.py +120 -0
- flowtask/components/CompanyScraper/__init__.py +1 -0
- flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
- flowtask/components/CompanyScraper/parsers/base.py +102 -0
- flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
- flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
- flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
- flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
- flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
- flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
- flowtask/components/CompanyScraper/scrapper.py +1054 -0
- flowtask/components/CopyTo.py +177 -0
- flowtask/components/CopyToBigQuery.py +243 -0
- flowtask/components/CopyToMongoDB.py +291 -0
- flowtask/components/CopyToPg.py +609 -0
- flowtask/components/CopyToRethink.py +207 -0
- flowtask/components/CreateGCSBucket.py +102 -0
- flowtask/components/CreateReport/CreateReport.py +228 -0
- flowtask/components/CreateReport/__init__.py +9 -0
- flowtask/components/CreateReport/charts/__init__.py +15 -0
- flowtask/components/CreateReport/charts/bar.py +51 -0
- flowtask/components/CreateReport/charts/base.py +66 -0
- flowtask/components/CreateReport/charts/pie.py +64 -0
- flowtask/components/CreateReport/utils.py +9 -0
- flowtask/components/CustomerSatisfaction.py +196 -0
- flowtask/components/DataInput.py +200 -0
- flowtask/components/DateList.py +255 -0
- flowtask/components/DbClient.py +163 -0
- flowtask/components/DialPad.py +146 -0
- flowtask/components/DocumentDBQuery.py +200 -0
- flowtask/components/DownloadFrom.py +371 -0
- flowtask/components/DownloadFromD2L.py +113 -0
- flowtask/components/DownloadFromFTP.py +181 -0
- flowtask/components/DownloadFromIMAP.py +315 -0
- flowtask/components/DownloadFromS3.py +198 -0
- flowtask/components/DownloadFromSFTP.py +265 -0
- flowtask/components/DownloadFromSharepoint.py +110 -0
- flowtask/components/DownloadFromSmartSheet.py +114 -0
- flowtask/components/DownloadS3File.py +229 -0
- flowtask/components/Dummy.py +59 -0
- flowtask/components/DuplicatePhoto.py +411 -0
- flowtask/components/EmployeeEvaluation.py +237 -0
- flowtask/components/ExecuteSQL.py +323 -0
- flowtask/components/ExtractHTML.py +178 -0
- flowtask/components/FileBase.py +178 -0
- flowtask/components/FileCopy.py +181 -0
- flowtask/components/FileDelete.py +82 -0
- flowtask/components/FileExists.py +146 -0
- flowtask/components/FileIteratorDelete.py +112 -0
- flowtask/components/FileList.py +194 -0
- flowtask/components/FileOpen.py +75 -0
- flowtask/components/FileRead.py +120 -0
- flowtask/components/FileRename.py +106 -0
- flowtask/components/FilterIf.py +284 -0
- flowtask/components/FilterRows/FilterRows.py +200 -0
- flowtask/components/FilterRows/__init__.py +10 -0
- flowtask/components/FilterRows/functions.py +4 -0
- flowtask/components/GCSToBigQuery.py +103 -0
- flowtask/components/GoogleA4.py +150 -0
- flowtask/components/GoogleGeoCoding.py +344 -0
- flowtask/components/GooglePlaces.py +315 -0
- flowtask/components/GoogleSearch.py +539 -0
- flowtask/components/HTTPClient.py +268 -0
- flowtask/components/ICIMS.py +146 -0
- flowtask/components/IF.py +179 -0
- flowtask/components/IcimsFolderCopy.py +173 -0
- flowtask/components/ImageFeatures/__init__.py +5 -0
- flowtask/components/ImageFeatures/process.py +233 -0
- flowtask/components/IteratorBase.py +251 -0
- flowtask/components/LangchainLoader/__init__.py +5 -0
- flowtask/components/LangchainLoader/loader.py +194 -0
- flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
- flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
- flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
- flowtask/components/LangchainLoader/loaders/docx.py +91 -0
- flowtask/components/LangchainLoader/loaders/html.py +119 -0
- flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
- flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
- flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
- flowtask/components/LangchainLoader/loaders/qa.py +67 -0
- flowtask/components/LangchainLoader/loaders/txt.py +55 -0
- flowtask/components/LeadIQ.py +650 -0
- flowtask/components/Loop.py +253 -0
- flowtask/components/Lowes.py +334 -0
- flowtask/components/MS365Usage.py +156 -0
- flowtask/components/MSTeamsMessages.py +320 -0
- flowtask/components/MarketClustering.py +1051 -0
- flowtask/components/MergeFiles.py +362 -0
- flowtask/components/MilvusOutput.py +87 -0
- flowtask/components/NearByStores.py +175 -0
- flowtask/components/NetworkNinja/__init__.py +6 -0
- flowtask/components/NetworkNinja/models/__init__.py +52 -0
- flowtask/components/NetworkNinja/models/abstract.py +177 -0
- flowtask/components/NetworkNinja/models/account.py +39 -0
- flowtask/components/NetworkNinja/models/client.py +19 -0
- flowtask/components/NetworkNinja/models/district.py +14 -0
- flowtask/components/NetworkNinja/models/events.py +101 -0
- flowtask/components/NetworkNinja/models/forms.py +499 -0
- flowtask/components/NetworkNinja/models/market.py +16 -0
- flowtask/components/NetworkNinja/models/organization.py +34 -0
- flowtask/components/NetworkNinja/models/photos.py +125 -0
- flowtask/components/NetworkNinja/models/project.py +44 -0
- flowtask/components/NetworkNinja/models/region.py +28 -0
- flowtask/components/NetworkNinja/models/store.py +203 -0
- flowtask/components/NetworkNinja/models/user.py +151 -0
- flowtask/components/NetworkNinja/router.py +854 -0
- flowtask/components/Odoo.py +175 -0
- flowtask/components/OdooInjector.py +192 -0
- flowtask/components/OpenFromXML.py +126 -0
- flowtask/components/OpenWeather.py +41 -0
- flowtask/components/OpenWithBase.py +616 -0
- flowtask/components/OpenWithPandas.py +715 -0
- flowtask/components/PGPDecrypt.py +199 -0
- flowtask/components/PandasIterator.py +187 -0
- flowtask/components/PandasToFile.py +189 -0
- flowtask/components/Paradox.py +339 -0
- flowtask/components/ParamIterator.py +117 -0
- flowtask/components/ParseHTML.py +84 -0
- flowtask/components/PlacerStores.py +249 -0
- flowtask/components/Pokemon.py +507 -0
- flowtask/components/PositiveBot.py +62 -0
- flowtask/components/PowerPointSlide.py +400 -0
- flowtask/components/PrintMessage.py +127 -0
- flowtask/components/ProductCompetitors/__init__.py +5 -0
- flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
- flowtask/components/ProductCompetitors/parsers/base.py +72 -0
- flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
- flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
- flowtask/components/ProductCompetitors/scrapper.py +155 -0
- flowtask/components/ProductCompliant.py +169 -0
- flowtask/components/ProductInfo/__init__.py +1 -0
- flowtask/components/ProductInfo/parsers/__init__.py +5 -0
- flowtask/components/ProductInfo/parsers/base.py +83 -0
- flowtask/components/ProductInfo/parsers/brother.py +97 -0
- flowtask/components/ProductInfo/parsers/canon.py +167 -0
- flowtask/components/ProductInfo/parsers/epson.py +118 -0
- flowtask/components/ProductInfo/parsers/hp.py +131 -0
- flowtask/components/ProductInfo/parsers/samsung.py +97 -0
- flowtask/components/ProductInfo/scraper.py +319 -0
- flowtask/components/ProductPricing.py +118 -0
- flowtask/components/QS.py +261 -0
- flowtask/components/QSBase.py +201 -0
- flowtask/components/QueryIterator.py +273 -0
- flowtask/components/QueryToInsert.py +327 -0
- flowtask/components/QueryToPandas.py +432 -0
- flowtask/components/RESTClient.py +195 -0
- flowtask/components/RethinkDBQuery.py +189 -0
- flowtask/components/Rsync.py +74 -0
- flowtask/components/RunSSH.py +59 -0
- flowtask/components/RunShell.py +71 -0
- flowtask/components/SalesForce.py +20 -0
- flowtask/components/SaveImageBank/__init__.py +257 -0
- flowtask/components/SchedulingVisits.py +592 -0
- flowtask/components/ScrapPage.py +216 -0
- flowtask/components/ScrapSearch.py +79 -0
- flowtask/components/SendNotify.py +257 -0
- flowtask/components/SentimentAnalysis.py +694 -0
- flowtask/components/ServiceScrapper/__init__.py +5 -0
- flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
- flowtask/components/ServiceScrapper/parsers/base.py +94 -0
- flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
- flowtask/components/ServiceScrapper/scrapper.py +199 -0
- flowtask/components/SetVariables.py +156 -0
- flowtask/components/SubTask.py +182 -0
- flowtask/components/SuiteCRM.py +48 -0
- flowtask/components/Switch.py +175 -0
- flowtask/components/TableBase.py +148 -0
- flowtask/components/TableDelete.py +312 -0
- flowtask/components/TableInput.py +143 -0
- flowtask/components/TableOutput/TableOutput.py +384 -0
- flowtask/components/TableOutput/__init__.py +3 -0
- flowtask/components/TableSchema.py +534 -0
- flowtask/components/Target.py +223 -0
- flowtask/components/ThumbnailGenerator.py +156 -0
- flowtask/components/ToPandas.py +67 -0
- flowtask/components/TransformRows/TransformRows.py +507 -0
- flowtask/components/TransformRows/__init__.py +9 -0
- flowtask/components/TransformRows/functions.py +559 -0
- flowtask/components/TransposeRows.py +176 -0
- flowtask/components/UPCDatabase.py +86 -0
- flowtask/components/UnGzip.py +171 -0
- flowtask/components/Uncompress.py +172 -0
- flowtask/components/UniqueRows.py +126 -0
- flowtask/components/Unzip.py +107 -0
- flowtask/components/UpdateOperationalVars.py +147 -0
- flowtask/components/UploadTo.py +299 -0
- flowtask/components/UploadToS3.py +136 -0
- flowtask/components/UploadToSFTP.py +160 -0
- flowtask/components/UploadToSharepoint.py +205 -0
- flowtask/components/UserFunc.py +122 -0
- flowtask/components/VivaTracker.py +140 -0
- flowtask/components/WSDLClient.py +123 -0
- flowtask/components/Wait.py +18 -0
- flowtask/components/Walmart.py +199 -0
- flowtask/components/Workplace.py +134 -0
- flowtask/components/XMLToPandas.py +267 -0
- flowtask/components/Zammad/__init__.py +41 -0
- flowtask/components/Zammad/models.py +0 -0
- flowtask/components/ZoomInfoScraper.py +409 -0
- flowtask/components/__init__.py +104 -0
- flowtask/components/abstract.py +18 -0
- flowtask/components/flow.py +530 -0
- flowtask/components/google.py +335 -0
- flowtask/components/group.py +221 -0
- flowtask/components/py.typed +0 -0
- flowtask/components/reviewscrap.py +132 -0
- flowtask/components/tAutoincrement.py +117 -0
- flowtask/components/tConcat.py +109 -0
- flowtask/components/tExplode.py +119 -0
- flowtask/components/tFilter.py +184 -0
- flowtask/components/tGroup.py +236 -0
- flowtask/components/tJoin.py +270 -0
- flowtask/components/tMap/__init__.py +9 -0
- flowtask/components/tMap/functions.py +54 -0
- flowtask/components/tMap/tMap.py +450 -0
- flowtask/components/tMelt.py +112 -0
- flowtask/components/tMerge.py +114 -0
- flowtask/components/tOrder.py +93 -0
- flowtask/components/tPandas.py +94 -0
- flowtask/components/tPivot.py +71 -0
- flowtask/components/tPluckCols.py +76 -0
- flowtask/components/tUnnest.py +82 -0
- flowtask/components/user.py +401 -0
- flowtask/conf.py +457 -0
- flowtask/download.py +102 -0
- flowtask/events/__init__.py +11 -0
- flowtask/events/events/__init__.py +20 -0
- flowtask/events/events/abstract.py +95 -0
- flowtask/events/events/alerts/__init__.py +362 -0
- flowtask/events/events/alerts/colfunctions.py +131 -0
- flowtask/events/events/alerts/functions.py +158 -0
- flowtask/events/events/dummy.py +12 -0
- flowtask/events/events/exec.py +124 -0
- flowtask/events/events/file/__init__.py +7 -0
- flowtask/events/events/file/base.py +51 -0
- flowtask/events/events/file/copy.py +23 -0
- flowtask/events/events/file/delete.py +16 -0
- flowtask/events/events/interfaces/__init__.py +9 -0
- flowtask/events/events/interfaces/client.py +67 -0
- flowtask/events/events/interfaces/credentials.py +28 -0
- flowtask/events/events/interfaces/notifications.py +58 -0
- flowtask/events/events/jira.py +122 -0
- flowtask/events/events/log.py +26 -0
- flowtask/events/events/logerr.py +52 -0
- flowtask/events/events/notify.py +59 -0
- flowtask/events/events/notify_event.py +160 -0
- flowtask/events/events/publish.py +54 -0
- flowtask/events/events/sendfile.py +104 -0
- flowtask/events/events/task.py +97 -0
- flowtask/events/events/teams.py +98 -0
- flowtask/events/events/webhook.py +58 -0
- flowtask/events/manager.py +287 -0
- flowtask/exceptions.c +39393 -0
- flowtask/exceptions.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/extensions/__init__.py +3 -0
- flowtask/extensions/abstract.py +82 -0
- flowtask/extensions/logging/__init__.py +65 -0
- flowtask/hooks/__init__.py +9 -0
- flowtask/hooks/actions/__init__.py +22 -0
- flowtask/hooks/actions/abstract.py +66 -0
- flowtask/hooks/actions/dummy.py +23 -0
- flowtask/hooks/actions/jira.py +74 -0
- flowtask/hooks/actions/rest.py +320 -0
- flowtask/hooks/actions/sampledata.py +37 -0
- flowtask/hooks/actions/sensor.py +23 -0
- flowtask/hooks/actions/task.py +9 -0
- flowtask/hooks/actions/ticket.py +37 -0
- flowtask/hooks/actions/zammad.py +55 -0
- flowtask/hooks/hook.py +62 -0
- flowtask/hooks/models.py +17 -0
- flowtask/hooks/service.py +187 -0
- flowtask/hooks/step.py +91 -0
- flowtask/hooks/types/__init__.py +23 -0
- flowtask/hooks/types/base.py +129 -0
- flowtask/hooks/types/brokers/__init__.py +11 -0
- flowtask/hooks/types/brokers/base.py +54 -0
- flowtask/hooks/types/brokers/mqtt.py +35 -0
- flowtask/hooks/types/brokers/rabbitmq.py +82 -0
- flowtask/hooks/types/brokers/redis.py +83 -0
- flowtask/hooks/types/brokers/sqs.py +44 -0
- flowtask/hooks/types/fs.py +232 -0
- flowtask/hooks/types/http.py +49 -0
- flowtask/hooks/types/imap.py +200 -0
- flowtask/hooks/types/jira.py +279 -0
- flowtask/hooks/types/mail.py +205 -0
- flowtask/hooks/types/postgres.py +98 -0
- flowtask/hooks/types/responses/__init__.py +8 -0
- flowtask/hooks/types/responses/base.py +5 -0
- flowtask/hooks/types/sharepoint.py +288 -0
- flowtask/hooks/types/ssh.py +141 -0
- flowtask/hooks/types/tagged.py +59 -0
- flowtask/hooks/types/upload.py +85 -0
- flowtask/hooks/types/watch.py +71 -0
- flowtask/hooks/types/web.py +36 -0
- flowtask/interfaces/AzureClient.py +137 -0
- flowtask/interfaces/AzureGraph.py +839 -0
- flowtask/interfaces/Boto3Client.py +326 -0
- flowtask/interfaces/DropboxClient.py +173 -0
- flowtask/interfaces/ExcelHandler.py +94 -0
- flowtask/interfaces/FTPClient.py +131 -0
- flowtask/interfaces/GoogleCalendar.py +201 -0
- flowtask/interfaces/GoogleClient.py +133 -0
- flowtask/interfaces/GoogleDrive.py +127 -0
- flowtask/interfaces/GoogleGCS.py +89 -0
- flowtask/interfaces/GoogleGeocoding.py +93 -0
- flowtask/interfaces/GoogleLang.py +114 -0
- flowtask/interfaces/GooglePub.py +61 -0
- flowtask/interfaces/GoogleSheet.py +68 -0
- flowtask/interfaces/IMAPClient.py +137 -0
- flowtask/interfaces/O365Calendar.py +113 -0
- flowtask/interfaces/O365Client.py +220 -0
- flowtask/interfaces/OneDrive.py +284 -0
- flowtask/interfaces/Outlook.py +155 -0
- flowtask/interfaces/ParrotBot.py +130 -0
- flowtask/interfaces/SSHClient.py +378 -0
- flowtask/interfaces/Sharepoint.py +496 -0
- flowtask/interfaces/__init__.py +36 -0
- flowtask/interfaces/azureauth.py +119 -0
- flowtask/interfaces/cache.py +201 -0
- flowtask/interfaces/client.py +82 -0
- flowtask/interfaces/compress.py +525 -0
- flowtask/interfaces/credentials.py +124 -0
- flowtask/interfaces/d2l.py +239 -0
- flowtask/interfaces/databases/__init__.py +5 -0
- flowtask/interfaces/databases/db.py +223 -0
- flowtask/interfaces/databases/documentdb.py +55 -0
- flowtask/interfaces/databases/rethink.py +39 -0
- flowtask/interfaces/dataframes/__init__.py +11 -0
- flowtask/interfaces/dataframes/abstract.py +21 -0
- flowtask/interfaces/dataframes/arrow.py +71 -0
- flowtask/interfaces/dataframes/dt.py +69 -0
- flowtask/interfaces/dataframes/pandas.py +167 -0
- flowtask/interfaces/dataframes/polars.py +60 -0
- flowtask/interfaces/db.py +263 -0
- flowtask/interfaces/env.py +46 -0
- flowtask/interfaces/func.py +137 -0
- flowtask/interfaces/http.py +1780 -0
- flowtask/interfaces/locale.py +40 -0
- flowtask/interfaces/log.py +75 -0
- flowtask/interfaces/mask.py +143 -0
- flowtask/interfaces/notification.py +154 -0
- flowtask/interfaces/playwright.py +339 -0
- flowtask/interfaces/powerpoint.py +368 -0
- flowtask/interfaces/py.typed +0 -0
- flowtask/interfaces/qs.py +376 -0
- flowtask/interfaces/result.py +87 -0
- flowtask/interfaces/selenium_service.py +779 -0
- flowtask/interfaces/smartsheet.py +154 -0
- flowtask/interfaces/stat.py +39 -0
- flowtask/interfaces/task.py +96 -0
- flowtask/interfaces/template.py +118 -0
- flowtask/interfaces/vectorstores/__init__.py +1 -0
- flowtask/interfaces/vectorstores/abstract.py +133 -0
- flowtask/interfaces/vectorstores/milvus.py +669 -0
- flowtask/interfaces/zammad.py +107 -0
- flowtask/models.py +193 -0
- flowtask/parsers/__init__.py +15 -0
- flowtask/parsers/_yaml.c +11978 -0
- flowtask/parsers/_yaml.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/argparser.py +235 -0
- flowtask/parsers/base.c +15155 -0
- flowtask/parsers/base.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/json.c +11968 -0
- flowtask/parsers/json.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/maps.py +49 -0
- flowtask/parsers/toml.c +11968 -0
- flowtask/parsers/toml.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/plugins/__init__.py +16 -0
- flowtask/plugins/components/__init__.py +0 -0
- flowtask/plugins/handler/__init__.py +45 -0
- flowtask/plugins/importer.py +31 -0
- flowtask/plugins/sources/__init__.py +0 -0
- flowtask/runner.py +283 -0
- flowtask/scheduler/__init__.py +9 -0
- flowtask/scheduler/functions.py +493 -0
- flowtask/scheduler/handlers/__init__.py +8 -0
- flowtask/scheduler/handlers/manager.py +504 -0
- flowtask/scheduler/handlers/models.py +58 -0
- flowtask/scheduler/handlers/service.py +72 -0
- flowtask/scheduler/notifications.py +65 -0
- flowtask/scheduler/scheduler.py +993 -0
- flowtask/services/__init__.py +0 -0
- flowtask/services/bots/__init__.py +0 -0
- flowtask/services/bots/telegram.py +264 -0
- flowtask/services/files/__init__.py +11 -0
- flowtask/services/files/manager.py +522 -0
- flowtask/services/files/model.py +37 -0
- flowtask/services/files/service.py +767 -0
- flowtask/services/jira/__init__.py +3 -0
- flowtask/services/jira/jira_actions.py +191 -0
- flowtask/services/tasks/__init__.py +13 -0
- flowtask/services/tasks/launcher.py +213 -0
- flowtask/services/tasks/manager.py +323 -0
- flowtask/services/tasks/service.py +275 -0
- flowtask/services/tasks/task_manager.py +376 -0
- flowtask/services/tasks/tasks.py +155 -0
- flowtask/storages/__init__.py +16 -0
- flowtask/storages/exceptions.py +12 -0
- flowtask/storages/files/__init__.py +8 -0
- flowtask/storages/files/abstract.py +29 -0
- flowtask/storages/files/filesystem.py +66 -0
- flowtask/storages/tasks/__init__.py +19 -0
- flowtask/storages/tasks/abstract.py +26 -0
- flowtask/storages/tasks/database.py +33 -0
- flowtask/storages/tasks/filesystem.py +108 -0
- flowtask/storages/tasks/github.py +119 -0
- flowtask/storages/tasks/memory.py +45 -0
- flowtask/storages/tasks/row.py +25 -0
- flowtask/tasks/__init__.py +0 -0
- flowtask/tasks/abstract.py +526 -0
- flowtask/tasks/command.py +118 -0
- flowtask/tasks/pile.py +486 -0
- flowtask/tasks/py.typed +0 -0
- flowtask/tasks/task.py +778 -0
- flowtask/template/__init__.py +161 -0
- flowtask/tests.py +257 -0
- flowtask/types/__init__.py +8 -0
- flowtask/types/typedefs.c +11347 -0
- flowtask/types/typedefs.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/__init__.py +24 -0
- flowtask/utils/constants.py +117 -0
- flowtask/utils/encoders.py +21 -0
- flowtask/utils/executor.py +112 -0
- flowtask/utils/functions.cpp +14280 -0
- flowtask/utils/functions.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/json.cpp +13349 -0
- flowtask/utils/json.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/mail.py +63 -0
- flowtask/utils/parseqs.c +13324 -0
- flowtask/utils/parserqs.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/stats.py +308 -0
- flowtask/utils/transformations.py +74 -0
- flowtask/utils/uv.py +12 -0
- flowtask/utils/validators.py +97 -0
- flowtask/version.py +11 -0
- flowtask-5.8.4.dist-info/LICENSE +201 -0
- flowtask-5.8.4.dist-info/METADATA +209 -0
- flowtask-5.8.4.dist-info/RECORD +470 -0
- flowtask-5.8.4.dist-info/WHEEL +6 -0
- flowtask-5.8.4.dist-info/entry_points.txt +3 -0
- flowtask-5.8.4.dist-info/top_level.txt +2 -0
- plugins/components/CreateQR.py +39 -0
- plugins/components/TestComponent.py +28 -0
- plugins/components/Use1.py +13 -0
- plugins/components/Workplace.py +117 -0
- plugins/components/__init__.py +3 -0
- plugins/sources/__init__.py +0 -0
- plugins/sources/get_populartimes.py +78 -0
- plugins/sources/google.py +150 -0
- plugins/sources/hubspot.py +679 -0
- plugins/sources/icims.py +679 -0
- plugins/sources/mobileinsight.py +501 -0
- plugins/sources/newrelic.py +262 -0
- plugins/sources/uap.py +268 -0
- plugins/sources/venu.py +244 -0
- plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,1780 @@
|
|
1
|
+
from typing import Optional, Union, Dict, Any
|
2
|
+
import os
|
3
|
+
import asyncio
|
4
|
+
import random
|
5
|
+
import urllib.parse
|
6
|
+
from email.message import Message
|
7
|
+
from concurrent.futures import ThreadPoolExecutor
|
8
|
+
from functools import partial
|
9
|
+
from io import BytesIO
|
10
|
+
import ssl
|
11
|
+
from pathlib import Path
|
12
|
+
from urllib.parse import quote, urlencode, urlparse
|
13
|
+
import urllib3
|
14
|
+
import aiofiles
|
15
|
+
import requests
|
16
|
+
import backoff
|
17
|
+
from requests.auth import HTTPBasicAuth
|
18
|
+
from requests.exceptions import HTTPError
|
19
|
+
from requests.exceptions import Timeout as RequestTimeoutException
|
20
|
+
from googleapiclient.discovery import build
|
21
|
+
from googleapiclient.errors import HttpError
|
22
|
+
from duckduckgo_search import DDGS
|
23
|
+
from duckduckgo_search.exceptions import (
|
24
|
+
ConversationLimitException,
|
25
|
+
DuckDuckGoSearchException,
|
26
|
+
RatelimitException,
|
27
|
+
TimeoutException,
|
28
|
+
)
|
29
|
+
import primp
|
30
|
+
import aiohttp
|
31
|
+
from aiohttp import BasicAuth
|
32
|
+
import httpx
|
33
|
+
from bs4 import BeautifulSoup as bs
|
34
|
+
from lxml import html, etree
|
35
|
+
from navconfig.logging import logging
|
36
|
+
from proxylists.proxies import (
|
37
|
+
FreeProxy,
|
38
|
+
Oxylabs,
|
39
|
+
Decodo,
|
40
|
+
Geonode
|
41
|
+
)
|
42
|
+
from ..utils import cPrint, SafeDict
|
43
|
+
from ..utils.json import JSONContent
|
44
|
+
from ..conf import (
|
45
|
+
HTTPCLIENT_MAX_SEMAPHORE,
|
46
|
+
HTTPCLIENT_MAX_WORKERS,
|
47
|
+
GOOGLE_SEARCH_API_KEY,
|
48
|
+
GOOGLE_SEARCH_ENGINE_ID
|
49
|
+
)
|
50
|
+
from .dataframes import PandasDataframe
|
51
|
+
from ..exceptions import ComponentError
|
52
|
+
from .credentials import CredentialsInterface
|
53
|
+
|
54
|
+
|
55
|
+
logging.getLogger("urllib3").setLevel(logging.WARNING)
|
56
|
+
urllib3.disable_warnings()
|
57
|
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|
58
|
+
logging.getLogger("httpcore").setLevel(logging.WARNING)
|
59
|
+
logging.getLogger("aiohttp").setLevel(logging.WARNING)
|
60
|
+
logging.getLogger("rquest").setLevel(logging.INFO)
|
61
|
+
|
62
|
+
|
63
|
+
ua = [
|
64
|
+
# Chrome - Desktop (Windows)
|
65
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
|
66
|
+
# Chrome - Desktop (Mac)
|
67
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36", # noqa
|
68
|
+
# Safari - Desktop (Mac)
|
69
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15", # noqa
|
70
|
+
# Firefox - Desktop (Windows)
|
71
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/118.0",
|
72
|
+
# Edge - Desktop (Windows)
|
73
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.46", # noqa
|
74
|
+
# Chrome - Mobile (Android)
|
75
|
+
"Mozilla/5.0 (Linux; Android 13; SM-G991B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Mobile Safari/537.36", # noqa
|
76
|
+
# Safari - Mobile (iOS)
|
77
|
+
"Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1", # noqa
|
78
|
+
# Samsung Internet - Mobile (Android)
|
79
|
+
"Mozilla/5.0 (Linux; Android 13; SAMSUNG SM-G991B) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/21.0 Chrome/118.0.0.0 Mobile Safari/537.36", # noqa
|
80
|
+
# Firefox - Mobile (Android)
|
81
|
+
"Mozilla/5.0 (Android 13; Mobile; rv:118.0) Gecko/118.0 Firefox/118.0",
|
82
|
+
# Opera - Desktop (Windows)
|
83
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 OPR/104.0.0.0" # noqa
|
84
|
+
# Firefox - Desktop (Linux)
|
85
|
+
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0",
|
86
|
+
# Chrome - Desktop (Linux)
|
87
|
+
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
|
88
|
+
# Other:
|
89
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", # noqa
|
90
|
+
]
|
91
|
+
|
92
|
+
mobile_ua = [
|
93
|
+
"Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19", # noqa
|
94
|
+
'Mozilla/5.0 (iPhone; CPU iPhone OS 12_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1', # noqa
|
95
|
+
'Mozilla/5.0 (Linux; Android 9; SM-G973F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.119 Mobile Safari/537.36', # noqa
|
96
|
+
'Mozilla/5.0 (Linux; Android 8.0.0; Pixel 2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.93 Mobile Safari/537.36', # noqa
|
97
|
+
'Mozilla/5.0 (Linux; Android 10; HUAWEI VOG-L29) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Mobile Safari/537.36', # noqa
|
98
|
+
'Mozilla/5.0 (iPad; CPU OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0 Mobile/15E148 Safari/604.1', # noqa
|
99
|
+
]
|
100
|
+
|
101
|
+
impersonates = (
|
102
|
+
"chrome_100", "chrome_101", "chrome_104", "chrome_105", "chrome_106", "chrome_107",
|
103
|
+
"chrome_108", "chrome_109", "chrome_114", "chrome_116", "chrome_117", "chrome_118",
|
104
|
+
"chrome_119", "chrome_120", "chrome_123", "chrome_124", "chrome_126", "chrome_127",
|
105
|
+
"chrome_128", "chrome_129", "chrome_130", "chrome_131",
|
106
|
+
"safari_ios_16.5", "safari_ios_17.2", "safari_ios_17.4.1", "safari_ios_18.1.1",
|
107
|
+
"safari_15.3", "safari_15.5", "safari_15.6.1", "safari_16", "safari_16.5",
|
108
|
+
"safari_17.0", "safari_17.2.1", "safari_17.4.1", "safari_17.5",
|
109
|
+
"safari_18", "safari_18.2",
|
110
|
+
"safari_ipad_18",
|
111
|
+
"edge_101", "edge_122", "edge_127", "edge_131",
|
112
|
+
"firefox_109", "firefox_117", "firefox_128", "firefox_133",
|
113
|
+
) # fmt: skip
|
114
|
+
|
115
|
+
impersonates_os = ("android", "ios", "linux", "macos", "windows")
|
116
|
+
|
117
|
+
valid_methods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS']
|
118
|
+
|
119
|
+
def bad_gateway_exception(exc):
|
120
|
+
"""Check if the exception is a 502 Bad Gateway error."""
|
121
|
+
return isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code == 502
|
122
|
+
|
123
|
+
class HTTPService(CredentialsInterface, PandasDataframe):
|
124
|
+
"""
|
125
|
+
HTTPService.
|
126
|
+
|
127
|
+
Overview
|
128
|
+
|
129
|
+
Interface for making connections to HTTP services.
|
130
|
+
"""
|
131
|
+
accept: str = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9" # noqa
|
132
|
+
|
133
|
+
def __init__(self, *args, **kwargs):
|
134
|
+
self.url: str = kwargs.get("url", None)
|
135
|
+
self.accept: str = kwargs.get(
|
136
|
+
"accept",
|
137
|
+
self.accept
|
138
|
+
)
|
139
|
+
self.use_proxy: bool = kwargs.pop("use_proxy", False)
|
140
|
+
self.proxy_type: str = kwargs.pop('proxy_type', 'decodo')
|
141
|
+
self._free_proxy: bool = kwargs.pop('use_free_proxy', True)
|
142
|
+
self._proxies: list = []
|
143
|
+
self.rotate_ua: bool = kwargs.pop("rotate_ua", False)
|
144
|
+
self.use_async: bool = bool(kwargs.pop("use_async", True))
|
145
|
+
self.google_api_key: str = kwargs.pop('google_api_key', GOOGLE_SEARCH_API_KEY)
|
146
|
+
self.google_cse: str = kwargs.pop('google_cse', GOOGLE_SEARCH_ENGINE_ID)
|
147
|
+
self.as_binary: bool = kwargs.pop('as_binary', False)
|
148
|
+
self.download: bool = kwargs.pop('download', False)
|
149
|
+
self.timeout: int = 30
|
150
|
+
self.headers: dict = kwargs.get('headers', {})
|
151
|
+
self.auth: dict = {}
|
152
|
+
self.auth_type: str = None
|
153
|
+
self.token_type: str = "Bearer"
|
154
|
+
self._user, self._pwd = None, None
|
155
|
+
self.method: str = kwargs.get("method", "get")
|
156
|
+
self._default_parser: str = kwargs.pop('bs4_parser', 'html.parser')
|
157
|
+
self.parameters = {}
|
158
|
+
if self.rotate_ua is True:
|
159
|
+
self._ua = random.choice(ua)
|
160
|
+
else:
|
161
|
+
self._ua: str = ua[0]
|
162
|
+
self.headers = {
|
163
|
+
"Accept": self.accept,
|
164
|
+
"Accept-Encoding": "gzip, deflate",
|
165
|
+
"DNT": "1",
|
166
|
+
"Connection": "keep-alive",
|
167
|
+
"Upgrade-Insecure-Requests": "1",
|
168
|
+
"User-Agent": self._ua,
|
169
|
+
**self.headers,
|
170
|
+
}
|
171
|
+
# potentially cookies to be used by request.
|
172
|
+
self.cookies = kwargs.get('cookies', {})
|
173
|
+
self._encoder = JSONContent()
|
174
|
+
# other arguments:
|
175
|
+
self.arguments = kwargs
|
176
|
+
# Executor:
|
177
|
+
self._executor = ThreadPoolExecutor(
|
178
|
+
max_workers=int(HTTPCLIENT_MAX_WORKERS)
|
179
|
+
)
|
180
|
+
self._semaphore = asyncio.Semaphore(
|
181
|
+
int(HTTPCLIENT_MAX_SEMAPHORE)
|
182
|
+
)
|
183
|
+
super().__init__(*args, **kwargs)
|
184
|
+
|
185
|
+
async def get_proxies(self, session_time: float = 0.40):
|
186
|
+
"""
|
187
|
+
Asynchronously retrieves a list of free proxies.
|
188
|
+
TODO: SELECT or rotate the free/paid proxies.
|
189
|
+
"""
|
190
|
+
if self._free_proxy is True:
|
191
|
+
return await FreeProxy().get_list()
|
192
|
+
else:
|
193
|
+
if self.proxy_type == 'decodo':
|
194
|
+
return await Decodo().get_list()
|
195
|
+
elif self.proxy_type == 'oxylabs':
|
196
|
+
return await Oxylabs(
|
197
|
+
session_time=session_time,
|
198
|
+
timeout=10
|
199
|
+
).get_list()
|
200
|
+
elif self.proxy_type == 'geonode':
|
201
|
+
return await Geonode().get_list()
|
202
|
+
else:
|
203
|
+
return []
|
204
|
+
|
205
|
+
async def refresh_proxies(self):
|
206
|
+
"""
|
207
|
+
Asynchronously refreshes the list of proxies if proxy usage is enabled.
|
208
|
+
"""
|
209
|
+
if self.use_proxy is True:
|
210
|
+
self._proxies = await self.get_proxies()
|
211
|
+
|
212
|
+
def build_url(self, url, queryparams: str = "", args=None):
|
213
|
+
"""
|
214
|
+
Constructs a full URL with optional query parameters and arguments.
|
215
|
+
|
216
|
+
:param url: The base URL to be formatted.
|
217
|
+
:param queryparams: Additional query parameters to be appended to the URL.
|
218
|
+
:param args: Arguments to format within the URL.
|
219
|
+
:return: The fully constructed URL.
|
220
|
+
"""
|
221
|
+
url = str(url).format_map(SafeDict(**self._variables))
|
222
|
+
if args:
|
223
|
+
u = url.format(**args)
|
224
|
+
else:
|
225
|
+
u = url
|
226
|
+
if queryparams:
|
227
|
+
if "?" in u:
|
228
|
+
full_url = u + "&" + queryparams
|
229
|
+
else:
|
230
|
+
full_url = u + "?" + queryparams
|
231
|
+
else:
|
232
|
+
full_url = u
|
233
|
+
logging.debug(
|
234
|
+
f"Resource URL: {full_url!s}"
|
235
|
+
)
|
236
|
+
return full_url
|
237
|
+
|
238
|
+
def extract_host(self, url):
|
239
|
+
parsed_url = urlparse(url)
|
240
|
+
return parsed_url.netloc
|
241
|
+
|
242
|
+
async def session(
|
243
|
+
self,
|
244
|
+
url: str,
|
245
|
+
method: str = "get",
|
246
|
+
data: dict = None,
|
247
|
+
cookies: dict = None,
|
248
|
+
headers: dict = None,
|
249
|
+
use_json: bool = False,
|
250
|
+
follow_redirects: bool = False,
|
251
|
+
use_proxy: bool = False,
|
252
|
+
accept: str = None,
|
253
|
+
return_response: bool = False
|
254
|
+
):
|
255
|
+
"""
|
256
|
+
Asynchronously sends an HTTP request using HTTPx.
|
257
|
+
|
258
|
+
:param url: The URL to send the request to.
|
259
|
+
:param method: The HTTP method to use (e.g., 'GET', 'POST').
|
260
|
+
:param data: The data to send in the request body.
|
261
|
+
:param use_json: Whether to send the data as JSON.
|
262
|
+
:param cookies: A dictionary of cookies to send with the request.
|
263
|
+
:param headers: A dictionary of headers to send with the request.
|
264
|
+
:return: A tuple containing the result and any error information.
|
265
|
+
"""
|
266
|
+
result = []
|
267
|
+
error = {}
|
268
|
+
auth = None
|
269
|
+
proxies = None
|
270
|
+
if accept is not None:
|
271
|
+
self.headers["Accept"] = accept
|
272
|
+
else:
|
273
|
+
self.headers["Accept"] = self.accept
|
274
|
+
if use_proxy is True:
|
275
|
+
self._proxies = await self.get_proxies()
|
276
|
+
if len(self._proxies) == 1:
|
277
|
+
proxies = self._proxies[0]
|
278
|
+
if not proxies.startswith('http'):
|
279
|
+
proxies = f"http://{proxies}"
|
280
|
+
elif len(self._proxies) > 1:
|
281
|
+
proxy = random.choice(self._proxies)
|
282
|
+
if not proxy.startswith('http'):
|
283
|
+
proxy = f"http://{proxy}"
|
284
|
+
proxies = {
|
285
|
+
"http://": httpx.AsyncHTTPTransport(
|
286
|
+
proxy=f"http://{proxy}"
|
287
|
+
),
|
288
|
+
"https://": httpx.AsyncHTTPTransport(
|
289
|
+
proxy=f"http://{proxy}"
|
290
|
+
),
|
291
|
+
}
|
292
|
+
else:
|
293
|
+
self._proxies = None
|
294
|
+
if self.credentials:
|
295
|
+
if "apikey" in self.auth:
|
296
|
+
self.headers[
|
297
|
+
"Authorization"
|
298
|
+
] = f"{self.token_type} {self.auth['apikey']}"
|
299
|
+
elif self.auth_type == "api_key":
|
300
|
+
self.headers = {**self.headers, **self.credentials}
|
301
|
+
elif self.auth_type == "key":
|
302
|
+
url = self.build_url(
|
303
|
+
url, args=self.arguments, queryparams=urlencode(self.credentials)
|
304
|
+
)
|
305
|
+
elif self.auth_type in ["basic", "auth", "user"]:
|
306
|
+
auth = (self.credentials["username"], self.credentials["password"])
|
307
|
+
elif self._user and self.auth_type == "basic":
|
308
|
+
auth = (self._user, self._pwd)
|
309
|
+
cPrint(
|
310
|
+
f"HTTP: Connecting to {url} using {method}",
|
311
|
+
level="DEBUG"
|
312
|
+
)
|
313
|
+
if self.download is True:
|
314
|
+
self.headers["Accept"] = "application/octet-stream"
|
315
|
+
self.headers["Content-Type"] = "application/octet-stream"
|
316
|
+
if self.use_streams is True:
|
317
|
+
self.headers["Transfer-Encoding"] = "chunked"
|
318
|
+
headers = self.headers
|
319
|
+
if headers is not None and isinstance(headers, dict):
|
320
|
+
headers = {**self.headers, **headers}
|
321
|
+
timeout = httpx.Timeout(self.timeout)
|
322
|
+
args = {"timeout": timeout, "headers": headers, "cookies": cookies}
|
323
|
+
if auth is not None:
|
324
|
+
args["auth"] = auth
|
325
|
+
if proxies:
|
326
|
+
if isinstance(proxies, dict):
|
327
|
+
args['mounts'] = proxies
|
328
|
+
else:
|
329
|
+
args["proxies"] = proxies
|
330
|
+
# if self._debug is True:
|
331
|
+
# self.add_metric("HEADERS", headers)
|
332
|
+
if proxies is not None:
|
333
|
+
self.add_metric('Proxies', proxies)
|
334
|
+
self.add_metric('URL', url)
|
335
|
+
self.add_metric('METHOD', method)
|
336
|
+
req_args = {
|
337
|
+
"method": method.upper(),
|
338
|
+
"url": url,
|
339
|
+
"follow_redirects": follow_redirects,
|
340
|
+
"json" if use_json else "data": data
|
341
|
+
}
|
342
|
+
# Process the response
|
343
|
+
try:
|
344
|
+
if hasattr(self, "_client"):
|
345
|
+
# Use a client without context manager to keep the session alive
|
346
|
+
# Remember to call `await self._client.aclose()` manually
|
347
|
+
response = await self._client.request(**req_args)
|
348
|
+
else:
|
349
|
+
async with httpx.AsyncClient(**args) as client:
|
350
|
+
response = await client.request(**req_args)
|
351
|
+
|
352
|
+
result, error = await self.process_response(response, url)
|
353
|
+
|
354
|
+
if return_response:
|
355
|
+
return response, result, error
|
356
|
+
|
357
|
+
except httpx.HTTPError as e:
|
358
|
+
error = str(e)
|
359
|
+
|
360
|
+
return (result, error)
|
361
|
+
|
362
|
+
async def async_request(
|
363
|
+
self,
|
364
|
+
url,
|
365
|
+
method: str = 'GET',
|
366
|
+
data: dict = None,
|
367
|
+
use_json: bool = False,
|
368
|
+
use_proxy: bool = False,
|
369
|
+
accept: Optional[str] = None
|
370
|
+
):
|
371
|
+
"""
|
372
|
+
Asynchronously sends an HTTP request using aiohttp.
|
373
|
+
|
374
|
+
:param url: The URL to send the request to.
|
375
|
+
:param method: The HTTP method to use (e.g., 'GET', 'POST').
|
376
|
+
:param data: The data to send in the request body.
|
377
|
+
:param use_json: Whether to send the data as JSON.
|
378
|
+
:param use_proxy: force proxy usage.
|
379
|
+
:return: A tuple containing the result and any error information.
|
380
|
+
"""
|
381
|
+
result = []
|
382
|
+
error = {}
|
383
|
+
auth = None
|
384
|
+
proxy = None
|
385
|
+
if use_proxy is True:
|
386
|
+
self._proxies = await self.get_proxies()
|
387
|
+
if self._proxies:
|
388
|
+
proxy = random.choice(self._proxies)
|
389
|
+
self.add_metric("Proxies", proxy)
|
390
|
+
if self.credentials:
|
391
|
+
if "apikey" in self.auth:
|
392
|
+
self.headers[
|
393
|
+
"Authorization"
|
394
|
+
] = f"{self.token_type} {self.auth['apikey']}"
|
395
|
+
elif self.auth_type == "api_key":
|
396
|
+
self.headers = {**self.headers, **self.credentials}
|
397
|
+
elif self.auth_type == "key":
|
398
|
+
url = self.build_url(
|
399
|
+
url,
|
400
|
+
args=self.arguments,
|
401
|
+
queryparams=urlencode(self.credentials)
|
402
|
+
)
|
403
|
+
elif self.auth_type in ["basic", "auth", "user"]:
|
404
|
+
auth = BasicAuth(
|
405
|
+
self.credentials["username"],
|
406
|
+
self.credentials["password"]
|
407
|
+
)
|
408
|
+
elif "apikey" in self.auth:
|
409
|
+
self.headers["Authorization"] = f"{self.token_type} {self.auth['apikey']}"
|
410
|
+
elif self.auth:
|
411
|
+
token_type, token = list(self.auth.items())[0]
|
412
|
+
self.headers["Authorization"] = f"{token_type} {token}"
|
413
|
+
elif self._user and self.auth_type == "basic":
|
414
|
+
auth = BasicAuth(self._user, self._pwd)
|
415
|
+
cPrint(
|
416
|
+
f"HTTP: Connecting to {url} using {method}",
|
417
|
+
level="DEBUG"
|
418
|
+
)
|
419
|
+
if self._debug is True:
|
420
|
+
self.add_metric("HEADERS", self.headers)
|
421
|
+
self.add_metric("URL", url)
|
422
|
+
self.add_metric("METHOD", method)
|
423
|
+
if auth is not None:
|
424
|
+
args = {"auth": auth}
|
425
|
+
else:
|
426
|
+
args = {}
|
427
|
+
if accept is not None:
|
428
|
+
self.headers["Accept"] = accept
|
429
|
+
else:
|
430
|
+
self.headers["Accept"] = self.accept
|
431
|
+
if self.download is True:
|
432
|
+
self.headers["Accept"] = "application/octet-stream"
|
433
|
+
self.headers["Content-Type"] = "application/octet-stream"
|
434
|
+
if hasattr(self, "use_streams"):
|
435
|
+
self.headers["Transfer-Encoding"] = "chunked"
|
436
|
+
args["stream"] = True
|
437
|
+
timeout = aiohttp.ClientTimeout(total=self.timeout)
|
438
|
+
async with aiohttp.ClientSession(
|
439
|
+
headers=self.headers, timeout=timeout, auth=auth
|
440
|
+
) as session:
|
441
|
+
try:
|
442
|
+
if use_json is True:
|
443
|
+
async with session.request(
|
444
|
+
method.upper(), url, json=data, proxy=proxy, **args
|
445
|
+
) as response:
|
446
|
+
result, error = await self.process_response(response, url)
|
447
|
+
else:
|
448
|
+
async with session.request(
|
449
|
+
method.upper(), url, data=data, proxy=proxy, **args
|
450
|
+
) as response:
|
451
|
+
# Process the response
|
452
|
+
result, error = await self.process_response(response, url)
|
453
|
+
except aiohttp.ClientError as e:
|
454
|
+
error = str(e)
|
455
|
+
return (result, error)
|
456
|
+
|
457
|
+
async def evaluate_error(
|
458
|
+
self, response: Union[str, list], message: Union[str, list, dict]
|
459
|
+
) -> tuple:
|
460
|
+
"""evaluate_response.
|
461
|
+
|
462
|
+
Check Response status and available payloads.
|
463
|
+
Args:
|
464
|
+
response (_type_): _description_
|
465
|
+
url (str): _description_
|
466
|
+
|
467
|
+
Returns:
|
468
|
+
tuple: _description_
|
469
|
+
"""
|
470
|
+
if isinstance(response, list):
|
471
|
+
# a list of potential errors:
|
472
|
+
for msg in response:
|
473
|
+
if message in msg:
|
474
|
+
return True
|
475
|
+
if isinstance(response, dict) and "errors" in response:
|
476
|
+
errors = response["errors"]
|
477
|
+
if isinstance(errors, list):
|
478
|
+
for error in errors:
|
479
|
+
try:
|
480
|
+
if message in error:
|
481
|
+
return True
|
482
|
+
except TypeError:
|
483
|
+
if message == error:
|
484
|
+
return True
|
485
|
+
else:
|
486
|
+
if message == errors:
|
487
|
+
return True
|
488
|
+
else:
|
489
|
+
if message in response:
|
490
|
+
return True
|
491
|
+
return False
|
492
|
+
|
493
|
+
async def process_response(self, response, url: str) -> tuple:
|
494
|
+
"""
|
495
|
+
Processes the response from an HTTP request.
|
496
|
+
|
497
|
+
:param response: The response object from aiohttp.
|
498
|
+
:param url: The URL that was requested.
|
499
|
+
:return: A tuple containing the processed result and any error information.
|
500
|
+
"""
|
501
|
+
error = None
|
502
|
+
result = None
|
503
|
+
# Process the response
|
504
|
+
status = self.response_status(response)
|
505
|
+
|
506
|
+
if status >= 400:
|
507
|
+
# Evaluate response body and headers.
|
508
|
+
print(" == ERROR Headers == ")
|
509
|
+
print(f"{response.headers}")
|
510
|
+
content_type = response.headers.get("Content-Type", "").lower()
|
511
|
+
if "application/json" in content_type:
|
512
|
+
message = await self.response_json(response)
|
513
|
+
elif "text/" in content_type:
|
514
|
+
message = await self.response_text(response)
|
515
|
+
elif "X-Error" in response.headers:
|
516
|
+
message = response.headers["X-Error"]
|
517
|
+
else:
|
518
|
+
# Fallback to a unified read method for the raw body content
|
519
|
+
message = await self.response_read(response)
|
520
|
+
|
521
|
+
# Combine response headers and body for enriched logging
|
522
|
+
error_context = {
|
523
|
+
"status": status,
|
524
|
+
"reason": await self.response_reason(response),
|
525
|
+
"headers": response.headers,
|
526
|
+
"body": message
|
527
|
+
}
|
528
|
+
|
529
|
+
# Log the detailed error context
|
530
|
+
self._logger.error(f"Error: {error_context}")
|
531
|
+
|
532
|
+
# Additional error handling or custom evaluation based on status
|
533
|
+
if hasattr(self, 'no_errors'):
|
534
|
+
for key, msg in self.no_errors.items():
|
535
|
+
if int(key) == status and await self.evaluate_error(message, msg):
|
536
|
+
return response, status
|
537
|
+
|
538
|
+
# Raise an exception if error handling does not continue
|
539
|
+
raise ConnectionError(f"HTTP Error {status}: {message!s}")
|
540
|
+
else:
|
541
|
+
if self.download is True:
|
542
|
+
filename = os.path.basename(url)
|
543
|
+
# Get the filename from the response headers, if available
|
544
|
+
content_disposition = response.headers.get("content-disposition")
|
545
|
+
if content_disposition:
|
546
|
+
msg = Message()
|
547
|
+
msg["Content-Disposition"] = response.headers.get("content-disposition")
|
548
|
+
filename = msg.get_param("filename", header="Content-Disposition")
|
549
|
+
utf8_filename = msg.get_param("filename*", header="Content-Disposition")
|
550
|
+
if utf8_filename:
|
551
|
+
_, utf8_filename = utf8_filename.split("''", 1)
|
552
|
+
filename = urllib.parse.unquote(utf8_filename)
|
553
|
+
if "{filename}" in str(self.filename):
|
554
|
+
self.filename = str(self.filename).format_map(
|
555
|
+
SafeDict(filename=filename)
|
556
|
+
)
|
557
|
+
if "{" in str(self.filename):
|
558
|
+
self.filename = str(self.filename).format_map(
|
559
|
+
SafeDict(**self.arguments)
|
560
|
+
)
|
561
|
+
if isinstance(self.filename, str):
|
562
|
+
self.filename = Path(self.filename)
|
563
|
+
# Saving File in Directory:
|
564
|
+
total_length = response.headers.get("Content-Length")
|
565
|
+
self._logger.info(
|
566
|
+
f"HTTPClient: Saving File {self.filename}, size: {total_length}"
|
567
|
+
)
|
568
|
+
pathname = self.filename.parent.absolute()
|
569
|
+
if not pathname.exists():
|
570
|
+
# Create a new directory
|
571
|
+
pathname.mkdir(parents=True, exist_ok=True)
|
572
|
+
transfer = response.headers.get("transfer-encoding", None)
|
573
|
+
if transfer is None:
|
574
|
+
chunk_size = int(total_length)
|
575
|
+
else:
|
576
|
+
chunk_size = 8192
|
577
|
+
# Asynchronous file writing
|
578
|
+
if self.filename.exists() and self.filename.is_file():
|
579
|
+
overwrite = self.destination.get("overwrite", True)
|
580
|
+
if overwrite is False:
|
581
|
+
self._logger.warning(
|
582
|
+
f"HTTPClient: File Already exists: {self.filename}"
|
583
|
+
)
|
584
|
+
# Filename already exists
|
585
|
+
result = self.filename
|
586
|
+
return result, error
|
587
|
+
else:
|
588
|
+
self._logger.warning(
|
589
|
+
f"HTTPClient: Overwriting File: {self.filename}"
|
590
|
+
)
|
591
|
+
# Delete the file before downloading again.
|
592
|
+
try:
|
593
|
+
self.filename.unlink()
|
594
|
+
except Exception as e:
|
595
|
+
self._logger.warning(
|
596
|
+
f"HTTPClient: Error Deleting File: {self.filename}, {e}"
|
597
|
+
)
|
598
|
+
if hasattr(self, "use_streams") and self.use_streams is True:
|
599
|
+
async with aiofiles.open(self.filename, "wb") as file:
|
600
|
+
async for chunk in response.content.iter_chunked(chunk_size):
|
601
|
+
await file.write(chunk)
|
602
|
+
else:
|
603
|
+
with open(self.filename, "wb") as fp:
|
604
|
+
try:
|
605
|
+
fp.write(await self.response_read(response))
|
606
|
+
except Exception:
|
607
|
+
pass
|
608
|
+
self._logger.debug(
|
609
|
+
f"Filename Saved Successfully: {self.filename}"
|
610
|
+
)
|
611
|
+
result = self.filename
|
612
|
+
else:
|
613
|
+
if self.accept == 'application/octet-stream':
|
614
|
+
data = await self.response_read(response)
|
615
|
+
buffer = BytesIO(data)
|
616
|
+
buffer.seek(0)
|
617
|
+
result = buffer
|
618
|
+
elif self.accept in ('text/html'):
|
619
|
+
result = await self.response_read(response)
|
620
|
+
try:
|
621
|
+
# html parser for lxml
|
622
|
+
self._parser = html.fromstring(result)
|
623
|
+
# BeautifulSoup parser
|
624
|
+
self._bs = bs(response.text, self._default_parser)
|
625
|
+
result = self._bs
|
626
|
+
except Exception as e:
|
627
|
+
error = e
|
628
|
+
elif self.accept in ('application/xhtml+xml', 'application/xml'):
|
629
|
+
result = await self.response_read(response)
|
630
|
+
try:
|
631
|
+
self._parser = etree.fromstring(result)
|
632
|
+
except etree.XMLSyntaxError:
|
633
|
+
self._parser = html.fromstring(result)
|
634
|
+
except Exception as e:
|
635
|
+
error = e
|
636
|
+
elif self.accept == "application/json":
|
637
|
+
try:
|
638
|
+
result = await self.response_json(response)
|
639
|
+
except Exception as e:
|
640
|
+
logging.warning(e)
|
641
|
+
# is not an json, try first with beautiful soup:
|
642
|
+
try:
|
643
|
+
self._bs = bs(
|
644
|
+
await self.response_text(response),
|
645
|
+
self._default_parser
|
646
|
+
)
|
647
|
+
result = self._bs
|
648
|
+
except Exception:
|
649
|
+
error = e
|
650
|
+
elif self.as_binary is True:
|
651
|
+
result = await self.response_read(
|
652
|
+
response
|
653
|
+
)
|
654
|
+
else:
|
655
|
+
result = await self.response_text(
|
656
|
+
response
|
657
|
+
)
|
658
|
+
return result, error
|
659
|
+
|
660
|
+
async def request(
|
661
|
+
self,
|
662
|
+
url: str,
|
663
|
+
method: str = "GET",
|
664
|
+
data: dict = None,
|
665
|
+
use_proxy: bool = False,
|
666
|
+
accept: Optional[str] = None
|
667
|
+
) -> tuple:
|
668
|
+
"""
|
669
|
+
Sends an HTTP request using the requests library.
|
670
|
+
|
671
|
+
:param url: The URL to send the request to.
|
672
|
+
:param method: The HTTP method to use (e.g., 'GET', 'POST').
|
673
|
+
:param data: The data to send in the request body.
|
674
|
+
:return: A tuple containing the result and any error information.
|
675
|
+
"""
|
676
|
+
result = []
|
677
|
+
error = {}
|
678
|
+
auth = None
|
679
|
+
proxies = None
|
680
|
+
if use_proxy is True:
|
681
|
+
self._proxies = await self.get_proxies()
|
682
|
+
if self._proxies:
|
683
|
+
proxy = random.choice(self._proxies)
|
684
|
+
proxies = {"http": proxy, "https": proxy, "ftp": proxy}
|
685
|
+
self.add_metric("Proxies", proxies)
|
686
|
+
if self.credentials:
|
687
|
+
if "apikey" in self.auth:
|
688
|
+
self.headers[
|
689
|
+
"Authorization"
|
690
|
+
] = f"{self.token_type} {self.auth['apikey']}"
|
691
|
+
elif self.auth_type == "api_key":
|
692
|
+
self.headers = {**self.headers, **self.credentials}
|
693
|
+
elif self.auth_type == "key":
|
694
|
+
url = self.build_url(
|
695
|
+
url, args=self.arguments, queryparams=urlencode(self.credentials)
|
696
|
+
)
|
697
|
+
elif self.auth_type == "basic":
|
698
|
+
auth = HTTPBasicAuth(
|
699
|
+
self.credentials["username"], self.credentials["password"]
|
700
|
+
)
|
701
|
+
else:
|
702
|
+
auth = HTTPBasicAuth(
|
703
|
+
self.credentials["username"], self.credentials["password"]
|
704
|
+
)
|
705
|
+
elif self._user and self.auth_type == "basic":
|
706
|
+
auth = HTTPBasicAuth(self._user, self._pwd)
|
707
|
+
cPrint(f"HTTP: Connecting to {url} using {method}", level="DEBUG")
|
708
|
+
self.add_metric("URL", url)
|
709
|
+
self.add_metric("METHOD", method)
|
710
|
+
if auth is not None:
|
711
|
+
args = {"auth": auth, "verify": False}
|
712
|
+
else:
|
713
|
+
args = {}
|
714
|
+
if accept is not None:
|
715
|
+
self.headers["Accept"] = accept
|
716
|
+
else:
|
717
|
+
self.headers["Accept"] = self.accept
|
718
|
+
if self.download is True:
|
719
|
+
self.headers["Accept"] = "application/octet-stream"
|
720
|
+
self.headers["Content-Type"] = "application/octet-stream"
|
721
|
+
if hasattr(self, "use_streams"):
|
722
|
+
self.headers["Transfer-Encoding"] = "chunked"
|
723
|
+
args["stream"] = True
|
724
|
+
if self._debug is True:
|
725
|
+
self.add_metric("HEADERS", self.headers)
|
726
|
+
args["headers"] = self.headers
|
727
|
+
args["timeout"] = self.timeout
|
728
|
+
args["proxies"] = proxies
|
729
|
+
if method == "get":
|
730
|
+
my_request = partial(requests.get, **args)
|
731
|
+
elif method == "post":
|
732
|
+
my_request = partial(requests.post, data=data, **args)
|
733
|
+
elif method == "put":
|
734
|
+
my_request = partial(requests.put, data=data, **args)
|
735
|
+
elif method == "delete":
|
736
|
+
my_request = partial(requests.delete, data=data, **args)
|
737
|
+
elif method == "patch":
|
738
|
+
my_request = partial(requests.patch, data=data, *args)
|
739
|
+
else:
|
740
|
+
my_request = partial(requests.post, data=data, **args)
|
741
|
+
try:
|
742
|
+
# making request
|
743
|
+
async with self._semaphore:
|
744
|
+
loop = asyncio.get_running_loop()
|
745
|
+
future = loop.run_in_executor(self._executor, my_request, url)
|
746
|
+
result, error = await self.process_request(future, url)
|
747
|
+
if error:
|
748
|
+
if isinstance(error, BaseException):
|
749
|
+
raise error
|
750
|
+
else:
|
751
|
+
raise ComponentError(f"{error!s}")
|
752
|
+
return (result, error)
|
753
|
+
except requests.exceptions.ReadTimeout as err:
|
754
|
+
self._logger.warning(f"Timeout Error: {err!r}")
|
755
|
+
# TODO: retrying
|
756
|
+
raise ComponentError(f"Timeout: {err}") from err
|
757
|
+
except Exception as err:
|
758
|
+
self._logger.exception(str(err), stack_info=True)
|
759
|
+
raise ComponentError(f"Error: {err}") from err
|
760
|
+
|
761
|
+
async def process_request(self, future, url: str):
|
762
|
+
"""
|
763
|
+
Processes the result of an asynchronous HTTP request.
|
764
|
+
|
765
|
+
:param future: The future object representing the asynchronous operation.
|
766
|
+
:param url: The URL that was requested.
|
767
|
+
:return: A tuple containing the result and any error information.
|
768
|
+
"""
|
769
|
+
# getting the result, based on the Accept logic
|
770
|
+
error = None
|
771
|
+
result = None
|
772
|
+
loop = asyncio.get_running_loop()
|
773
|
+
asyncio.set_event_loop(loop)
|
774
|
+
done, _ = await asyncio.wait([future], return_when=asyncio.FIRST_COMPLETED)
|
775
|
+
for f in done:
|
776
|
+
response = f.result()
|
777
|
+
# for response in await asyncio.gather(*future):
|
778
|
+
# Check for HTTP errors
|
779
|
+
try:
|
780
|
+
response.raise_for_status()
|
781
|
+
except HTTPError as http_err:
|
782
|
+
# Handle HTTP errors here
|
783
|
+
error = http_err
|
784
|
+
# Log the error or perform other error handling
|
785
|
+
self._logger.error(f"HTTP error occurred: {http_err}")
|
786
|
+
# You can choose to continue, break, or return based on your logic
|
787
|
+
continue
|
788
|
+
try:
|
789
|
+
if self.download is True:
|
790
|
+
# Filename:
|
791
|
+
filename = os.path.basename(url)
|
792
|
+
# Get the filename from the response headers, if available
|
793
|
+
content_disposition = response.headers.get("content-disposition")
|
794
|
+
if content_disposition:
|
795
|
+
_, params = content_disposition.split(";")
|
796
|
+
try:
|
797
|
+
key, value = params.strip().split("=")
|
798
|
+
if key == "filename":
|
799
|
+
filename = value.strip("'\"")
|
800
|
+
except ValueError:
|
801
|
+
pass
|
802
|
+
if "{filename}" in str(self.filename):
|
803
|
+
self.filename = str(self.filename).format_map(
|
804
|
+
SafeDict(filename=filename)
|
805
|
+
)
|
806
|
+
if "{" in str(self.filename):
|
807
|
+
self.filename = str(self.filename).format_map(
|
808
|
+
SafeDict(**self.arguments)
|
809
|
+
)
|
810
|
+
if isinstance(self.filename, str):
|
811
|
+
self.filename = Path(self.filename)
|
812
|
+
# Saving File in Directory:
|
813
|
+
total_length = response.headers.get("Content-Length")
|
814
|
+
self._logger.info(
|
815
|
+
f"HTTPClient: Saving File {self.filename}, size: {total_length}"
|
816
|
+
)
|
817
|
+
pathname = self.filename.parent.absolute()
|
818
|
+
if not pathname.exists():
|
819
|
+
# Create a new directory
|
820
|
+
pathname.mkdir(parents=True, exist_ok=True)
|
821
|
+
response.raise_for_status()
|
822
|
+
transfer = response.headers.get("transfer-encoding", None)
|
823
|
+
if transfer is None:
|
824
|
+
chunk_size = int(total_length)
|
825
|
+
else:
|
826
|
+
chunk_size = 8192
|
827
|
+
if self.filename.exists() and self.filename.is_file():
|
828
|
+
overwrite = self.destination.get("overwrite", True)
|
829
|
+
if overwrite is False:
|
830
|
+
self._logger.warning(
|
831
|
+
f"HTTPClient: File Already exists: {self.filename}"
|
832
|
+
)
|
833
|
+
# Filename already exists
|
834
|
+
result = self.filename
|
835
|
+
continue
|
836
|
+
else:
|
837
|
+
self._logger.warning(
|
838
|
+
f"HTTPClient: Overwriting File: {self.filename}"
|
839
|
+
)
|
840
|
+
# Delete the file before downloading again.
|
841
|
+
try:
|
842
|
+
self.filename.unlink()
|
843
|
+
except Exception as e:
|
844
|
+
self._logger.warning(
|
845
|
+
f"HTTPClient: Error Deleting File: {self.filename}, {e}"
|
846
|
+
)
|
847
|
+
with open(self.filename, "wb") as fp:
|
848
|
+
try:
|
849
|
+
for chunk in response.iter_content(chunk_size=chunk_size):
|
850
|
+
fp.write(chunk)
|
851
|
+
fp.flush()
|
852
|
+
except Exception:
|
853
|
+
pass
|
854
|
+
self._logger.debug(f"Filename Saved Successfully: {self.filename}")
|
855
|
+
result = self.filename
|
856
|
+
elif self.accept in ("text/html"):
|
857
|
+
result = response.content # Get content of the response as bytes
|
858
|
+
try:
|
859
|
+
# html parser for lxml
|
860
|
+
self._parser = html.fromstring(result)
|
861
|
+
# BeautifulSoup parser
|
862
|
+
|
863
|
+
self._bs = bs(response.text, self._default_parser)
|
864
|
+
result = self._bs
|
865
|
+
except Exception as e:
|
866
|
+
error = e
|
867
|
+
elif self.accept in ("application/xhtml+xml", "application/xml"):
|
868
|
+
result = response.content # Get content of the response as bytes
|
869
|
+
try:
|
870
|
+
self._parser = etree.fromstring(result)
|
871
|
+
except Exception as e:
|
872
|
+
error = e
|
873
|
+
elif self.accept == "application/json":
|
874
|
+
try:
|
875
|
+
result = response.json()
|
876
|
+
except Exception as e:
|
877
|
+
logging.error(e)
|
878
|
+
# is not an json, try first with beautiful soup:
|
879
|
+
try:
|
880
|
+
self._bs = bs(response.text, self._default_parser)
|
881
|
+
result = self._bs
|
882
|
+
except Exception:
|
883
|
+
error = e
|
884
|
+
else:
|
885
|
+
result = response.text
|
886
|
+
except requests.exceptions.ProxyError as err:
|
887
|
+
raise ComponentError(f"Proxy Connection Error: {err!r}") from err
|
888
|
+
except requests.ReadTimeout as err:
|
889
|
+
return (result, err)
|
890
|
+
except requests.exceptions.HTTPError as e:
|
891
|
+
# Log the error or perform other error handling
|
892
|
+
self._logger.error(f"HTTP error occurred: {error}")
|
893
|
+
raise ComponentError(f"HTTP Error: {error!r}, ex: {e!s}") from e
|
894
|
+
except Exception as e:
|
895
|
+
logging.exception(e)
|
896
|
+
return (result, e)
|
897
|
+
# returning results
|
898
|
+
return (result, error)
|
899
|
+
|
900
|
+
@staticmethod
|
901
|
+
async def response_read(response):
|
902
|
+
if hasattr(response, 'aread'):
|
903
|
+
return await response.aread()
|
904
|
+
|
905
|
+
return await response.read()
|
906
|
+
|
907
|
+
@staticmethod
|
908
|
+
async def response_json(response):
|
909
|
+
if asyncio.iscoroutinefunction(response.json):
|
910
|
+
return await response.json()
|
911
|
+
|
912
|
+
return response.json()
|
913
|
+
|
914
|
+
@staticmethod
|
915
|
+
def response_status(response):
|
916
|
+
if hasattr(response, 'status_code'):
|
917
|
+
return response.status_code
|
918
|
+
|
919
|
+
return response.status
|
920
|
+
|
921
|
+
@staticmethod
|
922
|
+
async def response_text(response):
|
923
|
+
if asyncio.iscoroutinefunction(response.text):
|
924
|
+
return await response.text()
|
925
|
+
|
926
|
+
return response.text
|
927
|
+
|
928
|
+
@staticmethod
|
929
|
+
async def response_reason(response):
|
930
|
+
# Attempt to retrieve `reason`, `reason_phrase`, or fallback to an empty string
|
931
|
+
reason = getattr(response, 'reason', getattr(response, 'reason_phrase', b''))
|
932
|
+
|
933
|
+
return f"{reason!s}"
|
934
|
+
|
935
|
+
@backoff.on_exception(
|
936
|
+
backoff.expo,
|
937
|
+
(httpx.HTTPStatusError, httpx.TimeoutException), # Catch HTTP errors and timeouts
|
938
|
+
max_tries=3,
|
939
|
+
max_time=120,
|
940
|
+
jitter=backoff.full_jitter,
|
941
|
+
on_backoff=lambda details: print(
|
942
|
+
f"Retrying HTTP Get: attempt {details['tries']} after {details['wait']:0.2f}s"
|
943
|
+
),
|
944
|
+
)
|
945
|
+
async def _get(
|
946
|
+
self,
|
947
|
+
url: str,
|
948
|
+
cookies: httpx.Cookies = None,
|
949
|
+
params: Dict[str, Any] = None,
|
950
|
+
headers: Dict[str, str] = None,
|
951
|
+
timeout: Union[int, float] = 30.0,
|
952
|
+
use_proxy: bool = True,
|
953
|
+
free_proxy: bool = False,
|
954
|
+
connect_timeout: Union[int, float] = 5.0,
|
955
|
+
read_timeout: Union[int, float] = 20.0,
|
956
|
+
write_timeout: Union[int, float] = 5.0,
|
957
|
+
pool_timeout: Union[int, float] = 20.0,
|
958
|
+
num_retries: int = 2,
|
959
|
+
**kwargs
|
960
|
+
) -> Dict[str, Any]:
|
961
|
+
"""
|
962
|
+
Make an asynchronous HTTP GET request, returning the response object.
|
963
|
+
|
964
|
+
Args:
|
965
|
+
url (str): The URL to send the GET request to.
|
966
|
+
cookies (httpx.Cookies): Cookies to include in the request.
|
967
|
+
params (dict): Dictionary of query parameters to include in the URL.
|
968
|
+
|
969
|
+
Returns:
|
970
|
+
Response: The response object from the httpx.
|
971
|
+
"""
|
972
|
+
proxies = None
|
973
|
+
if use_proxy is True:
|
974
|
+
self._proxies = await self.get_proxies()
|
975
|
+
if len(self._proxies) == 1:
|
976
|
+
proxies = self._proxies[0]
|
977
|
+
if not proxies.startswith('http'):
|
978
|
+
proxies = f"http://{proxies}"
|
979
|
+
elif len(self._proxies) > 1:
|
980
|
+
proxy = random.choice(self._proxies)
|
981
|
+
if not proxy.startswith('http'):
|
982
|
+
proxy = f"http://{proxy}"
|
983
|
+
proxies = {
|
984
|
+
"http://": httpx.AsyncHTTPTransport(
|
985
|
+
proxy=f"http://{proxy}"
|
986
|
+
),
|
987
|
+
"https://": httpx.AsyncHTTPTransport(
|
988
|
+
proxy=f"http://{proxy}"
|
989
|
+
),
|
990
|
+
}
|
991
|
+
else:
|
992
|
+
self._proxies = None
|
993
|
+
|
994
|
+
# Define custom SSL context
|
995
|
+
ssl_context = ssl.create_default_context()
|
996
|
+
# Disable older protocols if needed
|
997
|
+
ssl_context.options |= ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1
|
998
|
+
# Ensure at least TLS 1.2 is used
|
999
|
+
ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
|
1000
|
+
ssl_context.check_hostname = False
|
1001
|
+
ssl_context.verify_mode = ssl.CERT_NONE
|
1002
|
+
|
1003
|
+
# Use AsyncHTTPTransport to pass in SSL context explicitly
|
1004
|
+
transport = httpx.AsyncHTTPTransport(
|
1005
|
+
retries=num_retries,
|
1006
|
+
verify=ssl_context
|
1007
|
+
)
|
1008
|
+
timeout = httpx.Timeout(
|
1009
|
+
timeout=timeout,
|
1010
|
+
connect=connect_timeout,
|
1011
|
+
read=read_timeout,
|
1012
|
+
write=write_timeout,
|
1013
|
+
pool=pool_timeout
|
1014
|
+
)
|
1015
|
+
async with httpx.AsyncClient(
|
1016
|
+
cookies=cookies,
|
1017
|
+
proxy=proxies or None,
|
1018
|
+
transport=transport,
|
1019
|
+
headers=headers,
|
1020
|
+
timeout=timeout,
|
1021
|
+
http2=True,
|
1022
|
+
follow_redirects=True,
|
1023
|
+
**kwargs
|
1024
|
+
) as client:
|
1025
|
+
try:
|
1026
|
+
response = await client.get(
|
1027
|
+
url,
|
1028
|
+
params=params # Pass query parameters here
|
1029
|
+
)
|
1030
|
+
response.raise_for_status()
|
1031
|
+
return response
|
1032
|
+
except httpx.TimeoutException:
|
1033
|
+
print("Request timed out.")
|
1034
|
+
raise
|
1035
|
+
except httpx.HTTPError as ex:
|
1036
|
+
print(f"HTTP error occurred: {ex}")
|
1037
|
+
raise httpx.HTTPError(ex) from ex
|
1038
|
+
except Exception as exc:
|
1039
|
+
print('EXC > ', exc)
|
1040
|
+
raise ComponentError(
|
1041
|
+
f"An error occurred: {exc}"
|
1042
|
+
) from exc
|
1043
|
+
|
1044
|
+
@backoff.on_exception(
|
1045
|
+
backoff.expo,
|
1046
|
+
(httpx.HTTPStatusError, httpx.TimeoutException), # Catch HTTP errors and timeouts
|
1047
|
+
max_tries=3,
|
1048
|
+
max_time=120,
|
1049
|
+
jitter=backoff.full_jitter,
|
1050
|
+
on_backoff=lambda details: print(
|
1051
|
+
f"Retrying HTTP Get: attempt {details['tries']} after {details['wait']:0.2f}s"
|
1052
|
+
),
|
1053
|
+
)
|
1054
|
+
async def _post(
|
1055
|
+
self,
|
1056
|
+
url: str,
|
1057
|
+
cookies: httpx.Cookies,
|
1058
|
+
params: Dict[str, Any] = None,
|
1059
|
+
headers: Dict[str, str] = None,
|
1060
|
+
data: Dict[str, str] = None,
|
1061
|
+
follow_redirects: bool = True,
|
1062
|
+
raise_for_status: bool = True,
|
1063
|
+
use_proxy: bool = True,
|
1064
|
+
free_proxy: bool = False,
|
1065
|
+
) -> Dict[str, Any]:
|
1066
|
+
"""
|
1067
|
+
Make an asynchronous HTTP POST request, returning the response object.
|
1068
|
+
|
1069
|
+
Args:
|
1070
|
+
url (str): The URL to send the POST request to.
|
1071
|
+
cookies (httpx.Cookies): Cookies to include in the request.
|
1072
|
+
params (dict): Dictionary of query parameters to include in the URL.
|
1073
|
+
|
1074
|
+
Returns:
|
1075
|
+
Response: The response object from the httpx.
|
1076
|
+
"""
|
1077
|
+
proxies = None
|
1078
|
+
if use_proxy is True:
|
1079
|
+
self._proxies = await self.get_proxies()
|
1080
|
+
if len(self._proxies) == 1:
|
1081
|
+
proxies = self._proxies[0]
|
1082
|
+
if not proxies.startswith('http'):
|
1083
|
+
proxies = f"http://{proxies}"
|
1084
|
+
elif len(self._proxies) > 1:
|
1085
|
+
proxy = random.choice(self._proxies)
|
1086
|
+
if not proxy.startswith('http'):
|
1087
|
+
proxy = f"http://{proxy}"
|
1088
|
+
proxies = {
|
1089
|
+
"http://": httpx.AsyncHTTPTransport(
|
1090
|
+
proxy=f"http://{proxy}"
|
1091
|
+
),
|
1092
|
+
"https://": httpx.AsyncHTTPTransport(
|
1093
|
+
proxy=f"http://{proxy}"
|
1094
|
+
),
|
1095
|
+
}
|
1096
|
+
else:
|
1097
|
+
self._proxies = None
|
1098
|
+
|
1099
|
+
# Define custom SSL context
|
1100
|
+
ssl_context = ssl.create_default_context()
|
1101
|
+
# Disable older protocols if needed
|
1102
|
+
ssl_context.options |= ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1
|
1103
|
+
# Ensure at least TLS 1.2 is used
|
1104
|
+
ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
|
1105
|
+
ssl_context.check_hostname = False
|
1106
|
+
ssl_context.verify_mode = ssl.CERT_NONE
|
1107
|
+
|
1108
|
+
# Use AsyncHTTPTransport to pass in SSL context explicitly
|
1109
|
+
transport = httpx.AsyncHTTPTransport(retries=2, verify=ssl_context)
|
1110
|
+
timeout = httpx.Timeout(connect=5.0, read=20.0, write=5.0, pool=20.0)
|
1111
|
+
|
1112
|
+
async with httpx.AsyncClient(
|
1113
|
+
cookies=cookies,
|
1114
|
+
proxy=proxies or None,
|
1115
|
+
transport=transport,
|
1116
|
+
headers=headers,
|
1117
|
+
timeout=timeout
|
1118
|
+
) as client:
|
1119
|
+
try:
|
1120
|
+
response = await client.post(
|
1121
|
+
url,
|
1122
|
+
data=data,
|
1123
|
+
params=params,
|
1124
|
+
follow_redirects=follow_redirects
|
1125
|
+
)
|
1126
|
+
if raise_for_status:
|
1127
|
+
response.raise_for_status()
|
1128
|
+
return response
|
1129
|
+
except httpx.TimeoutException:
|
1130
|
+
print("Request timed out.")
|
1131
|
+
raise
|
1132
|
+
except httpx.HTTPError as ex:
|
1133
|
+
print(f"HTTP error occurred: {ex}")
|
1134
|
+
raise httpx.HTTPError(ex) from ex
|
1135
|
+
except Exception as exc:
|
1136
|
+
print('EXC > ', exc)
|
1137
|
+
raise ComponentError(
|
1138
|
+
f"An error occurred: {exc}"
|
1139
|
+
) from exc
|
1140
|
+
|
1141
|
+
@backoff.on_exception(
|
1142
|
+
backoff.expo,
|
1143
|
+
(httpx.HTTPStatusError, httpx.TimeoutException), # Catch HTTP errors and timeouts
|
1144
|
+
max_tries=3,
|
1145
|
+
max_time=120,
|
1146
|
+
jitter=backoff.full_jitter,
|
1147
|
+
on_backoff=lambda details: print(
|
1148
|
+
f"Retrying HTTP Get: attempt {details['tries']} after {details['wait']:0.2f}s"
|
1149
|
+
),
|
1150
|
+
)
|
1151
|
+
async def api_get(
|
1152
|
+
self,
|
1153
|
+
url: str,
|
1154
|
+
cookies: httpx.Cookies = None,
|
1155
|
+
params: Dict[str, Any] = None,
|
1156
|
+
headers: Dict[str, str] = None,
|
1157
|
+
use_proxy: bool = True,
|
1158
|
+
free_proxy: bool = False,
|
1159
|
+
) -> Dict[str, Any]:
|
1160
|
+
"""
|
1161
|
+
Make an asynchronous HTTP GET request.
|
1162
|
+
|
1163
|
+
Args:
|
1164
|
+
url (str): The URL to send the GET request to.
|
1165
|
+
cookies (httpx.Cookies): Cookies to include in the request.
|
1166
|
+
params (dict): Dictionary of query parameters to include in the URL.
|
1167
|
+
|
1168
|
+
Returns:
|
1169
|
+
dict: The JSON response from the API if the request is successful.
|
1170
|
+
Returns an empty dictionary if the request fails.
|
1171
|
+
"""
|
1172
|
+
proxies = None
|
1173
|
+
if use_proxy is True:
|
1174
|
+
self._proxies = await self.get_proxies()
|
1175
|
+
if len(self._proxies) == 1:
|
1176
|
+
proxies = self._proxies[0]
|
1177
|
+
if not proxies.startswith('http'):
|
1178
|
+
proxies = f"http://{proxies}"
|
1179
|
+
elif len(self._proxies) > 1:
|
1180
|
+
proxy = random.choice(self._proxies)
|
1181
|
+
if not proxy.startswith('http'):
|
1182
|
+
proxy = f"http://{proxy}"
|
1183
|
+
proxies = {
|
1184
|
+
"http://": httpx.AsyncHTTPTransport(
|
1185
|
+
proxy=f"http://{proxy}"
|
1186
|
+
),
|
1187
|
+
"https://": httpx.AsyncHTTPTransport(
|
1188
|
+
proxy=f"http://{proxy}"
|
1189
|
+
),
|
1190
|
+
}
|
1191
|
+
else:
|
1192
|
+
self._proxies = None
|
1193
|
+
|
1194
|
+
# Define custom SSL context
|
1195
|
+
ssl_context = ssl.create_default_context()
|
1196
|
+
# Disable older protocols if needed
|
1197
|
+
ssl_context.options |= ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1
|
1198
|
+
# Ensure at least TLS 1.2 is used
|
1199
|
+
ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
|
1200
|
+
ssl_context.check_hostname = False
|
1201
|
+
ssl_context.verify_mode = ssl.CERT_NONE
|
1202
|
+
|
1203
|
+
# Use AsyncHTTPTransport to pass in SSL context explicitly
|
1204
|
+
transport = httpx.AsyncHTTPTransport(retries=2, verify=ssl_context)
|
1205
|
+
timeout = httpx.Timeout(connect=5.0, read=20.0, write=5.0, pool=20.0)
|
1206
|
+
|
1207
|
+
async with httpx.AsyncClient(
|
1208
|
+
cookies=cookies,
|
1209
|
+
proxy=proxies or None,
|
1210
|
+
transport=transport,
|
1211
|
+
headers=headers,
|
1212
|
+
timeout=timeout
|
1213
|
+
) as client:
|
1214
|
+
try:
|
1215
|
+
response = await client.get(
|
1216
|
+
url,
|
1217
|
+
params=params
|
1218
|
+
)
|
1219
|
+
response.raise_for_status()
|
1220
|
+
if response.status_code == 200:
|
1221
|
+
return response.json()
|
1222
|
+
else:
|
1223
|
+
print(
|
1224
|
+
f"API request failed with status code {response.status_code}"
|
1225
|
+
)
|
1226
|
+
return {}
|
1227
|
+
except httpx.TimeoutException:
|
1228
|
+
print("Request timed out.")
|
1229
|
+
raise
|
1230
|
+
except httpx.HTTPError as ex:
|
1231
|
+
print(f"HTTP error occurred: {ex}")
|
1232
|
+
raise httpx.HTTPError(ex) from ex
|
1233
|
+
except Exception as exc:
|
1234
|
+
print('EXC > ', exc)
|
1235
|
+
raise ComponentError(
|
1236
|
+
f"An error occurred: {exc}"
|
1237
|
+
) from exc
|
1238
|
+
|
1239
|
+
@backoff.on_exception(
|
1240
|
+
backoff.expo,
|
1241
|
+
(httpx.HTTPStatusError, httpx.TimeoutException), # Catch HTTP errors and timeouts
|
1242
|
+
max_tries=3,
|
1243
|
+
max_time=120,
|
1244
|
+
jitter=backoff.full_jitter,
|
1245
|
+
on_backoff=lambda details: print(
|
1246
|
+
f"Retrying HTTP Get: attempt {details['tries']} after {details['wait']:0.2f}s"
|
1247
|
+
),
|
1248
|
+
)
|
1249
|
+
async def api_post(
|
1250
|
+
self,
|
1251
|
+
url: str,
|
1252
|
+
payload: Dict,
|
1253
|
+
cookies: httpx.Cookies = None,
|
1254
|
+
use_proxy: bool = True,
|
1255
|
+
free_proxy: bool = False,
|
1256
|
+
full_response: bool = False
|
1257
|
+
) -> Dict[str, Any]:
|
1258
|
+
proxies = None
|
1259
|
+
if use_proxy is True:
|
1260
|
+
self._proxies = await self.get_proxies()
|
1261
|
+
if len(self._proxies) == 1:
|
1262
|
+
proxies = self._proxies[0]
|
1263
|
+
if not proxies.startswith('http'):
|
1264
|
+
proxies = f"http://{proxies}"
|
1265
|
+
elif len(self._proxies) > 1:
|
1266
|
+
proxy = random.choice(self._proxies)
|
1267
|
+
if not proxy.startswith('http'):
|
1268
|
+
proxy = f"http://{proxy}"
|
1269
|
+
proxies = {
|
1270
|
+
"http://": httpx.AsyncHTTPTransport(
|
1271
|
+
proxy=f"http://{proxy}"
|
1272
|
+
),
|
1273
|
+
"https://": httpx.AsyncHTTPTransport(
|
1274
|
+
proxy=f"http://{proxy}"
|
1275
|
+
),
|
1276
|
+
}
|
1277
|
+
else:
|
1278
|
+
self._proxies = None
|
1279
|
+
# Define custom SSL context
|
1280
|
+
ssl_context = ssl.create_default_context()
|
1281
|
+
# Disable older protocols if needed
|
1282
|
+
ssl_context.options |= ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1
|
1283
|
+
# Ensure at least TLS 1.2 is used
|
1284
|
+
ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
|
1285
|
+
ssl_context.check_hostname = False
|
1286
|
+
ssl_context.verify_mode = ssl.CERT_NONE
|
1287
|
+
|
1288
|
+
# Use AsyncHTTPTransport to pass in SSL context explicitly
|
1289
|
+
transport = httpx.AsyncHTTPTransport(retries=2, verify=ssl_context)
|
1290
|
+
timeout = httpx.Timeout(connect=5.0, read=20.0, write=5.0, pool=20.0)
|
1291
|
+
|
1292
|
+
async with httpx.AsyncClient(
|
1293
|
+
cookies=cookies,
|
1294
|
+
proxy=proxies,
|
1295
|
+
transport=transport
|
1296
|
+
) as client:
|
1297
|
+
try:
|
1298
|
+
response = await client.post(
|
1299
|
+
url,
|
1300
|
+
json=payload,
|
1301
|
+
headers=self.headers,
|
1302
|
+
timeout=timeout
|
1303
|
+
)
|
1304
|
+
response.raise_for_status()
|
1305
|
+
if full_response:
|
1306
|
+
return response
|
1307
|
+
if response.status_code == 200:
|
1308
|
+
return response.json()
|
1309
|
+
else:
|
1310
|
+
print(
|
1311
|
+
f"API request failed with status code {response.status_code}"
|
1312
|
+
)
|
1313
|
+
return {}
|
1314
|
+
except httpx.TimeoutException:
|
1315
|
+
raise
|
1316
|
+
except (httpx.HTTPError) as ex:
|
1317
|
+
raise httpx.HTTPError(ex)
|
1318
|
+
except Exception as exc:
|
1319
|
+
print('EXC > ', exc)
|
1320
|
+
raise ComponentError(
|
1321
|
+
f"An error occurred: {exc}"
|
1322
|
+
)
|
1323
|
+
|
1324
|
+
@backoff.on_exception(
|
1325
|
+
backoff.expo,
|
1326
|
+
(RatelimitException, TimeoutException, DuckDuckGoSearchException),
|
1327
|
+
max_tries=5,
|
1328
|
+
max_time=120, # Extended max time to allow sufficient retries
|
1329
|
+
jitter=backoff.full_jitter, # Introduces randomization in retry timing
|
1330
|
+
on_backoff=lambda details: print(
|
1331
|
+
f"Retrying DuckDuckGo search: attempt {details['tries']} after {details['wait']:0.2f}s"
|
1332
|
+
),
|
1333
|
+
)
|
1334
|
+
async def _search_duckduckgo(
|
1335
|
+
self,
|
1336
|
+
query: str,
|
1337
|
+
max_results: int = 5,
|
1338
|
+
use_proxy: bool = True,
|
1339
|
+
timeout: int = 20,
|
1340
|
+
headers: dict = None,
|
1341
|
+
region: str = "wt-wt",
|
1342
|
+
backend: str = 'lite'
|
1343
|
+
):
|
1344
|
+
"""
|
1345
|
+
Search DuckDuckGo for a given query.
|
1346
|
+
|
1347
|
+
Args:
|
1348
|
+
query (str): The search query.
|
1349
|
+
max_results (int): The maximum number of results to return.
|
1350
|
+
use_proxy (bool): Whether to use a proxy for the search.
|
1351
|
+
|
1352
|
+
Returns:
|
1353
|
+
list: A list of search results.
|
1354
|
+
"""
|
1355
|
+
proxies = None
|
1356
|
+
if use_proxy is True:
|
1357
|
+
self._proxies = await self.get_proxies()
|
1358
|
+
if len(self._proxies) == 1:
|
1359
|
+
proxies = self._proxies[0]
|
1360
|
+
if not proxies.startswith('http'):
|
1361
|
+
proxies = f"http://{proxies}"
|
1362
|
+
elif len(self._proxies) > 1:
|
1363
|
+
proxy = random.choice(self._proxies)
|
1364
|
+
if not proxy.startswith('http'):
|
1365
|
+
proxy = f"http://{proxy}"
|
1366
|
+
proxies = {
|
1367
|
+
"http://": httpx.AsyncHTTPTransport(
|
1368
|
+
proxy=f"http://{proxy}"
|
1369
|
+
),
|
1370
|
+
"https://": httpx.AsyncHTTPTransport(
|
1371
|
+
proxy=f"http://{proxy}"
|
1372
|
+
),
|
1373
|
+
}
|
1374
|
+
else:
|
1375
|
+
self._proxies = None
|
1376
|
+
if headers is None:
|
1377
|
+
headers = {}
|
1378
|
+
headers = {**self.headers, **headers}
|
1379
|
+
headers["User-Agent"] = random.choice(ua)
|
1380
|
+
try:
|
1381
|
+
with DDGS(
|
1382
|
+
headers=headers,
|
1383
|
+
proxy=proxies,
|
1384
|
+
timeout=timeout,
|
1385
|
+
verify=False
|
1386
|
+
) as search:
|
1387
|
+
# 🐵 Monkey Patching Primp Client to avoid Rate-Limits issues:
|
1388
|
+
search.client = primp.Client(
|
1389
|
+
headers=search.headers,
|
1390
|
+
proxy=proxies,
|
1391
|
+
timeout=timeout,
|
1392
|
+
cookie_store=False, # 🚀 Disable cookie persistence dynamically
|
1393
|
+
referer=True,
|
1394
|
+
impersonate=random.choice(DDGS._impersonates),
|
1395
|
+
impersonate_os=random.choice(DDGS._impersonates_os),
|
1396
|
+
follow_redirects=False,
|
1397
|
+
verify=False,
|
1398
|
+
)
|
1399
|
+
return search.text(
|
1400
|
+
keywords=query,
|
1401
|
+
timelimit=timeout,
|
1402
|
+
max_results=max_results,
|
1403
|
+
backend=backend,
|
1404
|
+
region=region
|
1405
|
+
)
|
1406
|
+
except DuckDuckGoSearchException as e:
|
1407
|
+
raise RatelimitException(
|
1408
|
+
f"Error on DuckDuckGo Search: {e}"
|
1409
|
+
) from e
|
1410
|
+
except Exception as e:
|
1411
|
+
raise RuntimeError(
|
1412
|
+
f"DuckDuckGo Error: {e}"
|
1413
|
+
) from e
|
1414
|
+
|
1415
|
+
@backoff.on_exception(
|
1416
|
+
backoff.expo,
|
1417
|
+
(httpx.HTTPStatusError, httpx.TimeoutException, httpx.RemoteProtocolError), # Catch HTTP errors and timeouts
|
1418
|
+
max_tries=5,
|
1419
|
+
max_time=120,
|
1420
|
+
jitter=backoff.full_jitter,
|
1421
|
+
on_backoff=lambda details: print(
|
1422
|
+
f"Retrying Google Search: attempt {details['tries']} after {details['wait']:0.2f}s"
|
1423
|
+
),
|
1424
|
+
)
|
1425
|
+
async def _search_google(
|
1426
|
+
self,
|
1427
|
+
query: str,
|
1428
|
+
exact_term: str = None,
|
1429
|
+
max_results: int = 5,
|
1430
|
+
use_proxy: bool = True,
|
1431
|
+
timeout: int = 20,
|
1432
|
+
headers: dict = None,
|
1433
|
+
region: str = None,
|
1434
|
+
country: str = None,
|
1435
|
+
language: str = None,
|
1436
|
+
use_primp: bool = False,
|
1437
|
+
**kwargs
|
1438
|
+
):
|
1439
|
+
if headers:
|
1440
|
+
headers = {
|
1441
|
+
**self.headers,
|
1442
|
+
**headers,
|
1443
|
+
"Referer": "https://www.google.com/",
|
1444
|
+
}
|
1445
|
+
proxies = None
|
1446
|
+
if use_proxy is True:
|
1447
|
+
self._proxies = await self.get_proxies()
|
1448
|
+
if len(self._proxies) == 1:
|
1449
|
+
proxies = self._proxies[0]
|
1450
|
+
if not proxies.startswith('http'):
|
1451
|
+
proxies = f"http://{proxies}"
|
1452
|
+
elif len(self._proxies) > 1:
|
1453
|
+
proxy = random.choice(self._proxies)
|
1454
|
+
if not proxy.startswith('http'):
|
1455
|
+
proxy = f"http://{proxy}"
|
1456
|
+
proxies = {
|
1457
|
+
"http://": httpx.AsyncHTTPTransport(
|
1458
|
+
proxy=f"http://{proxy}"
|
1459
|
+
),
|
1460
|
+
"https://": httpx.AsyncHTTPTransport(
|
1461
|
+
proxy=f"http://{proxy}"
|
1462
|
+
),
|
1463
|
+
}
|
1464
|
+
else:
|
1465
|
+
self._proxies = None
|
1466
|
+
args = {
|
1467
|
+
"q": query,
|
1468
|
+
"cx": str(GOOGLE_SEARCH_ENGINE_ID),
|
1469
|
+
"num": str(max_results),
|
1470
|
+
"key": str(self.google_api_key),
|
1471
|
+
"hl": "en", # UI language in English
|
1472
|
+
"sort": "date", # Prefer newer content
|
1473
|
+
}
|
1474
|
+
if region:
|
1475
|
+
args["gl"] = region # Geolocation
|
1476
|
+
if country:
|
1477
|
+
args["cr"] = country # Country restriction
|
1478
|
+
if language:
|
1479
|
+
args["hl"] = language # Language preference
|
1480
|
+
if exact_term:
|
1481
|
+
args["exactTerms"] = exact_term
|
1482
|
+
if use_primp:
|
1483
|
+
# Use Primp Client instead httpx:
|
1484
|
+
client = primp.Client(
|
1485
|
+
headers=headers,
|
1486
|
+
proxy=proxies, # Use proxy if enabled
|
1487
|
+
timeout=timeout,
|
1488
|
+
cookie_store=False, # 🚀 Disable cookie persistence
|
1489
|
+
referer=True,
|
1490
|
+
impersonate=random.choice(impersonates),
|
1491
|
+
impersonate_os=random.choice(impersonates_os),
|
1492
|
+
follow_redirects=True,
|
1493
|
+
verify=False
|
1494
|
+
)
|
1495
|
+
try:
|
1496
|
+
query = quote(query)
|
1497
|
+
search_url = f"https://cse.google.com/cse?cx={GOOGLE_SEARCH_ENGINE_ID}#gsc.tab=0&gsc.q={query}&gsc.sort=" # noqa
|
1498
|
+
response = client.get(
|
1499
|
+
search_url,
|
1500
|
+
**kwargs
|
1501
|
+
)
|
1502
|
+
if response.status_code != 200:
|
1503
|
+
raise RuntimeError(
|
1504
|
+
f"Google Search API failed with status {response.status_code}: {response.text}"
|
1505
|
+
)
|
1506
|
+
return self._parse_google_cse_results(response.text, max_results)
|
1507
|
+
except Exception as e:
|
1508
|
+
print(f"Unexpected error: {e}")
|
1509
|
+
raise RuntimeError(
|
1510
|
+
f"Primp Unexpected error: {e}"
|
1511
|
+
) from e
|
1512
|
+
else:
|
1513
|
+
t = httpx.Timeout(timeout, connect=5.0, read=20.0, write=5.0, pool=20.0)
|
1514
|
+
async with httpx.AsyncClient(
|
1515
|
+
proxy=proxies,
|
1516
|
+
timeout=t,
|
1517
|
+
) as client:
|
1518
|
+
try:
|
1519
|
+
response = await client.get(
|
1520
|
+
"https://customsearch.googleapis.com/customsearch/v1",
|
1521
|
+
headers=headers,
|
1522
|
+
params=args,
|
1523
|
+
**kwargs
|
1524
|
+
)
|
1525
|
+
response.raise_for_status()
|
1526
|
+
if response.status_code == 200:
|
1527
|
+
return response.json()
|
1528
|
+
else:
|
1529
|
+
raise RuntimeError(
|
1530
|
+
f"Google Search API failed: {response.text}, status: {response.status_code}"
|
1531
|
+
)
|
1532
|
+
except httpx.HTTPStatusError as e:
|
1533
|
+
print(f"Search Google: HTTP error: {e.response.status_code} - {e.response.text}")
|
1534
|
+
raise
|
1535
|
+
except httpx.TimeoutException:
|
1536
|
+
print("Search Google: Request timed out")
|
1537
|
+
raise
|
1538
|
+
except httpx.RemoteProtocolError: # ✅ Catch server disconnection error
|
1539
|
+
print("Search Google: Server disconnected. Retrying with redirect enabled...")
|
1540
|
+
raise
|
1541
|
+
except Exception as e:
|
1542
|
+
print(f"Search Google: Unexpected error: {e}")
|
1543
|
+
raise
|
1544
|
+
|
1545
|
+
def get_httpx_cookies(self, domain: str = None, path: str = '/', cookies: dict = None):
|
1546
|
+
httpx_cookies = httpx.Cookies()
|
1547
|
+
if cookies is None:
|
1548
|
+
cookies = {}
|
1549
|
+
for key, value in cookies.items():
|
1550
|
+
httpx_cookies.set(
|
1551
|
+
key, value,
|
1552
|
+
domain=domain,
|
1553
|
+
path=path
|
1554
|
+
)
|
1555
|
+
return httpx_cookies
|
1556
|
+
|
1557
|
+
def _parse_google_cse_results(self, html_content: str, max_results: int):
|
1558
|
+
"""
|
1559
|
+
Extracts search results from the rendered HTML of `cse.google.com/cse`.
|
1560
|
+
|
1561
|
+
Args:
|
1562
|
+
html_content (str): The HTML response from the search.
|
1563
|
+
max_results (int): Max number of results to return.
|
1564
|
+
|
1565
|
+
Returns:
|
1566
|
+
list: List of extracted URLs and titles.
|
1567
|
+
"""
|
1568
|
+
soup = bs(html_content, "html.parser")
|
1569
|
+
results = []
|
1570
|
+
|
1571
|
+
print('CONTENT > ', html_content)
|
1572
|
+
|
1573
|
+
# Extract results from the dynamically loaded content
|
1574
|
+
for item in soup.select(".gsc-webResult")[:max_results]: # Adjust this selector if necessary
|
1575
|
+
title_tag = item.select_one(".gs-title")
|
1576
|
+
url_tag = item.select_one(".gs-title a")
|
1577
|
+
|
1578
|
+
if title_tag and url_tag:
|
1579
|
+
title = title_tag.get_text(strip=True)
|
1580
|
+
url = url_tag["href"]
|
1581
|
+
results.append({"title": title, "url": url})
|
1582
|
+
|
1583
|
+
return results
|
1584
|
+
@backoff.on_exception(
|
1585
|
+
backoff.expo,
|
1586
|
+
(httpx.HTTPStatusError, httpx.TimeoutException), # Catch HTTP errors and timeouts
|
1587
|
+
max_tries=3,
|
1588
|
+
max_time=120,
|
1589
|
+
jitter=backoff.full_jitter,
|
1590
|
+
on_backoff=lambda details: logging.warning(
|
1591
|
+
f"Retrying HTTP Get: attempt {details['tries']} after {details['wait']:0.2f}s"
|
1592
|
+
),
|
1593
|
+
giveup=lambda e: isinstance(e, httpx.HTTPStatusError) and e.response.status_code not in [429, 500, 502, 503, 504] # pylint: disable=C0301 # noqa
|
1594
|
+
)
|
1595
|
+
async def _request(
|
1596
|
+
self,
|
1597
|
+
url: str,
|
1598
|
+
method: str = 'get',
|
1599
|
+
cookies: Optional[httpx.Cookies] = None,
|
1600
|
+
params: Optional[Dict[str, Any]] = None,
|
1601
|
+
data: Optional[Dict[str, Any]] = None,
|
1602
|
+
headers: Optional[Dict[str, str]] = None,
|
1603
|
+
timeout: Union[int, float] = 30.0,
|
1604
|
+
use_proxy: bool = True,
|
1605
|
+
free_proxy: bool = False,
|
1606
|
+
use_ssl: bool = True,
|
1607
|
+
use_json: bool = False,
|
1608
|
+
follow_redirects: bool = True,
|
1609
|
+
raise_for_status: bool = True,
|
1610
|
+
full_response: bool = False,
|
1611
|
+
connect_timeout: Union[int, float] = 5.0,
|
1612
|
+
read_timeout: Union[int, float] = 20.0,
|
1613
|
+
write_timeout: Union[int, float] = 5.0,
|
1614
|
+
pool_timeout: Union[int, float] = 20.0,
|
1615
|
+
num_retries: int = 2,
|
1616
|
+
**kwargs
|
1617
|
+
) -> Dict[str, Any]:
|
1618
|
+
"""
|
1619
|
+
Make an asynchronous HTTPx request, returning the response object.
|
1620
|
+
|
1621
|
+
Args:
|
1622
|
+
url (str): The URL to send the request to.
|
1623
|
+
method (str): The HTTP method to use (default: 'get').
|
1624
|
+
headers (dict, optional): Dictionary of HTTP headers to include in the request.
|
1625
|
+
cookies (httpx.Cookies, optional): Cookies to include in the request.
|
1626
|
+
params (dict, optional): Dictionary of query parameters to include in the URL.
|
1627
|
+
data (dict, optional): Dictionary of data to send in the request body.
|
1628
|
+
timeout (float, optional): Total timeout for the request in seconds.
|
1629
|
+
use_proxy (bool): Whether to use a proxy for the request.
|
1630
|
+
free_proxy (bool): Whether to use a free proxy.
|
1631
|
+
use_ssl (bool): Whether to use SSL for the request.
|
1632
|
+
use_json (bool): Whether to send data as JSON.
|
1633
|
+
follow_redirects (bool): Whether to follow redirects.
|
1634
|
+
raise_for_status (bool): Whether to raise an exception for HTTP errors.
|
1635
|
+
full_response (bool): Whether to return the full response object.
|
1636
|
+
connect_timeout (float): Timeout for connecting to the server.
|
1637
|
+
read_timeout (float): Timeout for reading from the server.
|
1638
|
+
write_timeout (float): Timeout for writing to the server.
|
1639
|
+
pool_timeout (float): Timeout for connection pool operations.
|
1640
|
+
num_retries (int): Number of retries to attempt at the transport level.
|
1641
|
+
**kwargs: Additional arguments to pass to httpx.AsyncClient.
|
1642
|
+
|
1643
|
+
Returns:
|
1644
|
+
Tuple[Any, Optional[Dict[str, Any]]]: A tuple containing the result and any error information.
|
1645
|
+
|
1646
|
+
Raises:
|
1647
|
+
httpx.TimeoutException: When the request times out.
|
1648
|
+
httpx.TooManyRedirects: When too many redirects are encountered.
|
1649
|
+
httpx.HTTPStatusError: When an HTTP error status is encountered.
|
1650
|
+
httpx.HTTPError: When an HTTP-related error occurs.
|
1651
|
+
AttributeError: When the HTTP method is invalid.
|
1652
|
+
RuntimeError: When an unknown error occurs.
|
1653
|
+
"""
|
1654
|
+
proxies = None
|
1655
|
+
if use_proxy is True:
|
1656
|
+
self._proxies = await self.get_proxies()
|
1657
|
+
if len(self._proxies) == 1:
|
1658
|
+
proxies = self._proxies[0]
|
1659
|
+
if not proxies.startswith('http'):
|
1660
|
+
proxies = f"http://{proxies}"
|
1661
|
+
elif len(self._proxies) > 1:
|
1662
|
+
proxy = random.choice(self._proxies)
|
1663
|
+
if not proxy.startswith('http'):
|
1664
|
+
proxy = f"http://{proxy}"
|
1665
|
+
proxies = {
|
1666
|
+
"http://": httpx.AsyncHTTPTransport(
|
1667
|
+
proxy=f"http://{proxy}"
|
1668
|
+
),
|
1669
|
+
"https://": httpx.AsyncHTTPTransport(
|
1670
|
+
proxy=f"http://{proxy}"
|
1671
|
+
),
|
1672
|
+
}
|
1673
|
+
else:
|
1674
|
+
self._proxies = None
|
1675
|
+
|
1676
|
+
ssl_context = None
|
1677
|
+
if use_ssl:
|
1678
|
+
# Define custom SSL context
|
1679
|
+
ssl_context = ssl.create_default_context()
|
1680
|
+
# Disable older protocols if needed
|
1681
|
+
ssl_context.options |= ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1
|
1682
|
+
# Ensure at least TLS 1.2 is used
|
1683
|
+
ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
|
1684
|
+
# Make this configurable rather than hardcoded to CERT_NONE
|
1685
|
+
if kwargs.get('verify_ssl', True):
|
1686
|
+
ssl_context.check_hostname = True
|
1687
|
+
ssl_context.verify_mode = ssl.CERT_REQUIRED
|
1688
|
+
else:
|
1689
|
+
ssl_context.check_hostname = False
|
1690
|
+
ssl_context.verify_mode = ssl.CERT_NONE
|
1691
|
+
|
1692
|
+
# Use AsyncHTTPTransport to pass in SSL context explicitly
|
1693
|
+
transport_options = {
|
1694
|
+
'retries': num_retries,
|
1695
|
+
'verify': ssl_context
|
1696
|
+
}
|
1697
|
+
if 'transport_options' in kwargs:
|
1698
|
+
transport_options.update(kwargs.pop('transport_options'))
|
1699
|
+
transport = httpx.AsyncHTTPTransport(
|
1700
|
+
**transport_options
|
1701
|
+
)
|
1702
|
+
timeout = httpx.Timeout(
|
1703
|
+
timeout=timeout,
|
1704
|
+
connect=connect_timeout,
|
1705
|
+
read=read_timeout,
|
1706
|
+
write=write_timeout,
|
1707
|
+
pool=pool_timeout
|
1708
|
+
)
|
1709
|
+
method = method.upper()
|
1710
|
+
if method not in valid_methods:
|
1711
|
+
raise ValueError(
|
1712
|
+
f"Invalid HTTP method: {method}. Must be one of {valid_methods}"
|
1713
|
+
)
|
1714
|
+
async with httpx.AsyncClient(
|
1715
|
+
cookies=cookies,
|
1716
|
+
proxy=proxies or None,
|
1717
|
+
transport=transport,
|
1718
|
+
headers=headers,
|
1719
|
+
timeout=timeout,
|
1720
|
+
http2=kwargs.get('use_http2', True),
|
1721
|
+
follow_redirects=follow_redirects,
|
1722
|
+
**kwargs
|
1723
|
+
) as client:
|
1724
|
+
try:
|
1725
|
+
args = {
|
1726
|
+
"method": method.upper(),
|
1727
|
+
"url": url,
|
1728
|
+
"follow_redirects": follow_redirects
|
1729
|
+
}
|
1730
|
+
if data:
|
1731
|
+
if use_json:
|
1732
|
+
args["json"] = data
|
1733
|
+
else:
|
1734
|
+
args["data"] = data
|
1735
|
+
if params:
|
1736
|
+
args["params"] = params
|
1737
|
+
if self._httpclient:
|
1738
|
+
# keep session alive.
|
1739
|
+
response = await client.request(
|
1740
|
+
**args
|
1741
|
+
)
|
1742
|
+
else:
|
1743
|
+
response = await client.request(**args)
|
1744
|
+
if raise_for_status:
|
1745
|
+
response.raise_for_status()
|
1746
|
+
if full_response:
|
1747
|
+
if self.logger.isEnabledFor(logging.DEBUG):
|
1748
|
+
self.logger.debug(
|
1749
|
+
f"Response from {url}: status={response.status_code}, headers={response.headers}"
|
1750
|
+
)
|
1751
|
+
return response, None
|
1752
|
+
result, error = await self.process_response(
|
1753
|
+
response,
|
1754
|
+
url,
|
1755
|
+
download=kwargs.get('download', False),
|
1756
|
+
filename=kwargs.get('filename', None)
|
1757
|
+
)
|
1758
|
+
return result, error
|
1759
|
+
except httpx.TimeoutException:
|
1760
|
+
self.logger.error("Request timed out.")
|
1761
|
+
raise
|
1762
|
+
except httpx.TooManyRedirects:
|
1763
|
+
self.logger.error("Too many redirects.")
|
1764
|
+
raise
|
1765
|
+
except httpx.HTTPStatusError as ex:
|
1766
|
+
self.logger.error(
|
1767
|
+
f"HTTP status error occurred: {ex.response.status_code} - {ex}"
|
1768
|
+
)
|
1769
|
+
raise
|
1770
|
+
except httpx.HTTPError as ex:
|
1771
|
+
self.logger.error(f"HTTP error occurred: {ex}")
|
1772
|
+
raise
|
1773
|
+
except AttributeError as e:
|
1774
|
+
self.logger.error(f"HTTPx Client doesn't have attribute {method}: {e}")
|
1775
|
+
raise
|
1776
|
+
except Exception as exc:
|
1777
|
+
self.logger.error(f'Unknown Error > {exc}')
|
1778
|
+
raise RuntimeError(
|
1779
|
+
f"An error occurred: {exc}"
|
1780
|
+
) from exc
|