flowtask 5.8.4__cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (470) hide show
  1. flowtask/__init__.py +93 -0
  2. flowtask/__main__.py +38 -0
  3. flowtask/bots/__init__.py +6 -0
  4. flowtask/bots/check.py +93 -0
  5. flowtask/bots/codebot.py +51 -0
  6. flowtask/components/ASPX.py +148 -0
  7. flowtask/components/AddDataset.py +352 -0
  8. flowtask/components/Amazon.py +523 -0
  9. flowtask/components/AutoTask.py +314 -0
  10. flowtask/components/Azure.py +80 -0
  11. flowtask/components/AzureUsers.py +106 -0
  12. flowtask/components/BaseAction.py +91 -0
  13. flowtask/components/BaseLoop.py +198 -0
  14. flowtask/components/BestBuy.py +800 -0
  15. flowtask/components/CSVToGCS.py +120 -0
  16. flowtask/components/CompanyScraper/__init__.py +1 -0
  17. flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
  18. flowtask/components/CompanyScraper/parsers/base.py +102 -0
  19. flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
  20. flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
  21. flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
  22. flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
  23. flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
  24. flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
  25. flowtask/components/CompanyScraper/scrapper.py +1054 -0
  26. flowtask/components/CopyTo.py +177 -0
  27. flowtask/components/CopyToBigQuery.py +243 -0
  28. flowtask/components/CopyToMongoDB.py +291 -0
  29. flowtask/components/CopyToPg.py +609 -0
  30. flowtask/components/CopyToRethink.py +207 -0
  31. flowtask/components/CreateGCSBucket.py +102 -0
  32. flowtask/components/CreateReport/CreateReport.py +228 -0
  33. flowtask/components/CreateReport/__init__.py +9 -0
  34. flowtask/components/CreateReport/charts/__init__.py +15 -0
  35. flowtask/components/CreateReport/charts/bar.py +51 -0
  36. flowtask/components/CreateReport/charts/base.py +66 -0
  37. flowtask/components/CreateReport/charts/pie.py +64 -0
  38. flowtask/components/CreateReport/utils.py +9 -0
  39. flowtask/components/CustomerSatisfaction.py +196 -0
  40. flowtask/components/DataInput.py +200 -0
  41. flowtask/components/DateList.py +255 -0
  42. flowtask/components/DbClient.py +163 -0
  43. flowtask/components/DialPad.py +146 -0
  44. flowtask/components/DocumentDBQuery.py +200 -0
  45. flowtask/components/DownloadFrom.py +371 -0
  46. flowtask/components/DownloadFromD2L.py +113 -0
  47. flowtask/components/DownloadFromFTP.py +181 -0
  48. flowtask/components/DownloadFromIMAP.py +315 -0
  49. flowtask/components/DownloadFromS3.py +198 -0
  50. flowtask/components/DownloadFromSFTP.py +265 -0
  51. flowtask/components/DownloadFromSharepoint.py +110 -0
  52. flowtask/components/DownloadFromSmartSheet.py +114 -0
  53. flowtask/components/DownloadS3File.py +229 -0
  54. flowtask/components/Dummy.py +59 -0
  55. flowtask/components/DuplicatePhoto.py +411 -0
  56. flowtask/components/EmployeeEvaluation.py +237 -0
  57. flowtask/components/ExecuteSQL.py +323 -0
  58. flowtask/components/ExtractHTML.py +178 -0
  59. flowtask/components/FileBase.py +178 -0
  60. flowtask/components/FileCopy.py +181 -0
  61. flowtask/components/FileDelete.py +82 -0
  62. flowtask/components/FileExists.py +146 -0
  63. flowtask/components/FileIteratorDelete.py +112 -0
  64. flowtask/components/FileList.py +194 -0
  65. flowtask/components/FileOpen.py +75 -0
  66. flowtask/components/FileRead.py +120 -0
  67. flowtask/components/FileRename.py +106 -0
  68. flowtask/components/FilterIf.py +284 -0
  69. flowtask/components/FilterRows/FilterRows.py +200 -0
  70. flowtask/components/FilterRows/__init__.py +10 -0
  71. flowtask/components/FilterRows/functions.py +4 -0
  72. flowtask/components/GCSToBigQuery.py +103 -0
  73. flowtask/components/GoogleA4.py +150 -0
  74. flowtask/components/GoogleGeoCoding.py +344 -0
  75. flowtask/components/GooglePlaces.py +315 -0
  76. flowtask/components/GoogleSearch.py +539 -0
  77. flowtask/components/HTTPClient.py +268 -0
  78. flowtask/components/ICIMS.py +146 -0
  79. flowtask/components/IF.py +179 -0
  80. flowtask/components/IcimsFolderCopy.py +173 -0
  81. flowtask/components/ImageFeatures/__init__.py +5 -0
  82. flowtask/components/ImageFeatures/process.py +233 -0
  83. flowtask/components/IteratorBase.py +251 -0
  84. flowtask/components/LangchainLoader/__init__.py +5 -0
  85. flowtask/components/LangchainLoader/loader.py +194 -0
  86. flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
  87. flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
  88. flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
  89. flowtask/components/LangchainLoader/loaders/docx.py +91 -0
  90. flowtask/components/LangchainLoader/loaders/html.py +119 -0
  91. flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
  92. flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
  93. flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
  94. flowtask/components/LangchainLoader/loaders/qa.py +67 -0
  95. flowtask/components/LangchainLoader/loaders/txt.py +55 -0
  96. flowtask/components/LeadIQ.py +650 -0
  97. flowtask/components/Loop.py +253 -0
  98. flowtask/components/Lowes.py +334 -0
  99. flowtask/components/MS365Usage.py +156 -0
  100. flowtask/components/MSTeamsMessages.py +320 -0
  101. flowtask/components/MarketClustering.py +1051 -0
  102. flowtask/components/MergeFiles.py +362 -0
  103. flowtask/components/MilvusOutput.py +87 -0
  104. flowtask/components/NearByStores.py +175 -0
  105. flowtask/components/NetworkNinja/__init__.py +6 -0
  106. flowtask/components/NetworkNinja/models/__init__.py +52 -0
  107. flowtask/components/NetworkNinja/models/abstract.py +177 -0
  108. flowtask/components/NetworkNinja/models/account.py +39 -0
  109. flowtask/components/NetworkNinja/models/client.py +19 -0
  110. flowtask/components/NetworkNinja/models/district.py +14 -0
  111. flowtask/components/NetworkNinja/models/events.py +101 -0
  112. flowtask/components/NetworkNinja/models/forms.py +499 -0
  113. flowtask/components/NetworkNinja/models/market.py +16 -0
  114. flowtask/components/NetworkNinja/models/organization.py +34 -0
  115. flowtask/components/NetworkNinja/models/photos.py +125 -0
  116. flowtask/components/NetworkNinja/models/project.py +44 -0
  117. flowtask/components/NetworkNinja/models/region.py +28 -0
  118. flowtask/components/NetworkNinja/models/store.py +203 -0
  119. flowtask/components/NetworkNinja/models/user.py +151 -0
  120. flowtask/components/NetworkNinja/router.py +854 -0
  121. flowtask/components/Odoo.py +175 -0
  122. flowtask/components/OdooInjector.py +192 -0
  123. flowtask/components/OpenFromXML.py +126 -0
  124. flowtask/components/OpenWeather.py +41 -0
  125. flowtask/components/OpenWithBase.py +616 -0
  126. flowtask/components/OpenWithPandas.py +715 -0
  127. flowtask/components/PGPDecrypt.py +199 -0
  128. flowtask/components/PandasIterator.py +187 -0
  129. flowtask/components/PandasToFile.py +189 -0
  130. flowtask/components/Paradox.py +339 -0
  131. flowtask/components/ParamIterator.py +117 -0
  132. flowtask/components/ParseHTML.py +84 -0
  133. flowtask/components/PlacerStores.py +249 -0
  134. flowtask/components/Pokemon.py +507 -0
  135. flowtask/components/PositiveBot.py +62 -0
  136. flowtask/components/PowerPointSlide.py +400 -0
  137. flowtask/components/PrintMessage.py +127 -0
  138. flowtask/components/ProductCompetitors/__init__.py +5 -0
  139. flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
  140. flowtask/components/ProductCompetitors/parsers/base.py +72 -0
  141. flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
  142. flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
  143. flowtask/components/ProductCompetitors/scrapper.py +155 -0
  144. flowtask/components/ProductCompliant.py +169 -0
  145. flowtask/components/ProductInfo/__init__.py +1 -0
  146. flowtask/components/ProductInfo/parsers/__init__.py +5 -0
  147. flowtask/components/ProductInfo/parsers/base.py +83 -0
  148. flowtask/components/ProductInfo/parsers/brother.py +97 -0
  149. flowtask/components/ProductInfo/parsers/canon.py +167 -0
  150. flowtask/components/ProductInfo/parsers/epson.py +118 -0
  151. flowtask/components/ProductInfo/parsers/hp.py +131 -0
  152. flowtask/components/ProductInfo/parsers/samsung.py +97 -0
  153. flowtask/components/ProductInfo/scraper.py +319 -0
  154. flowtask/components/ProductPricing.py +118 -0
  155. flowtask/components/QS.py +261 -0
  156. flowtask/components/QSBase.py +201 -0
  157. flowtask/components/QueryIterator.py +273 -0
  158. flowtask/components/QueryToInsert.py +327 -0
  159. flowtask/components/QueryToPandas.py +432 -0
  160. flowtask/components/RESTClient.py +195 -0
  161. flowtask/components/RethinkDBQuery.py +189 -0
  162. flowtask/components/Rsync.py +74 -0
  163. flowtask/components/RunSSH.py +59 -0
  164. flowtask/components/RunShell.py +71 -0
  165. flowtask/components/SalesForce.py +20 -0
  166. flowtask/components/SaveImageBank/__init__.py +257 -0
  167. flowtask/components/SchedulingVisits.py +592 -0
  168. flowtask/components/ScrapPage.py +216 -0
  169. flowtask/components/ScrapSearch.py +79 -0
  170. flowtask/components/SendNotify.py +257 -0
  171. flowtask/components/SentimentAnalysis.py +694 -0
  172. flowtask/components/ServiceScrapper/__init__.py +5 -0
  173. flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
  174. flowtask/components/ServiceScrapper/parsers/base.py +94 -0
  175. flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
  176. flowtask/components/ServiceScrapper/scrapper.py +199 -0
  177. flowtask/components/SetVariables.py +156 -0
  178. flowtask/components/SubTask.py +182 -0
  179. flowtask/components/SuiteCRM.py +48 -0
  180. flowtask/components/Switch.py +175 -0
  181. flowtask/components/TableBase.py +148 -0
  182. flowtask/components/TableDelete.py +312 -0
  183. flowtask/components/TableInput.py +143 -0
  184. flowtask/components/TableOutput/TableOutput.py +384 -0
  185. flowtask/components/TableOutput/__init__.py +3 -0
  186. flowtask/components/TableSchema.py +534 -0
  187. flowtask/components/Target.py +223 -0
  188. flowtask/components/ThumbnailGenerator.py +156 -0
  189. flowtask/components/ToPandas.py +67 -0
  190. flowtask/components/TransformRows/TransformRows.py +507 -0
  191. flowtask/components/TransformRows/__init__.py +9 -0
  192. flowtask/components/TransformRows/functions.py +559 -0
  193. flowtask/components/TransposeRows.py +176 -0
  194. flowtask/components/UPCDatabase.py +86 -0
  195. flowtask/components/UnGzip.py +171 -0
  196. flowtask/components/Uncompress.py +172 -0
  197. flowtask/components/UniqueRows.py +126 -0
  198. flowtask/components/Unzip.py +107 -0
  199. flowtask/components/UpdateOperationalVars.py +147 -0
  200. flowtask/components/UploadTo.py +299 -0
  201. flowtask/components/UploadToS3.py +136 -0
  202. flowtask/components/UploadToSFTP.py +160 -0
  203. flowtask/components/UploadToSharepoint.py +205 -0
  204. flowtask/components/UserFunc.py +122 -0
  205. flowtask/components/VivaTracker.py +140 -0
  206. flowtask/components/WSDLClient.py +123 -0
  207. flowtask/components/Wait.py +18 -0
  208. flowtask/components/Walmart.py +199 -0
  209. flowtask/components/Workplace.py +134 -0
  210. flowtask/components/XMLToPandas.py +267 -0
  211. flowtask/components/Zammad/__init__.py +41 -0
  212. flowtask/components/Zammad/models.py +0 -0
  213. flowtask/components/ZoomInfoScraper.py +409 -0
  214. flowtask/components/__init__.py +104 -0
  215. flowtask/components/abstract.py +18 -0
  216. flowtask/components/flow.py +530 -0
  217. flowtask/components/google.py +335 -0
  218. flowtask/components/group.py +221 -0
  219. flowtask/components/py.typed +0 -0
  220. flowtask/components/reviewscrap.py +132 -0
  221. flowtask/components/tAutoincrement.py +117 -0
  222. flowtask/components/tConcat.py +109 -0
  223. flowtask/components/tExplode.py +119 -0
  224. flowtask/components/tFilter.py +184 -0
  225. flowtask/components/tGroup.py +236 -0
  226. flowtask/components/tJoin.py +270 -0
  227. flowtask/components/tMap/__init__.py +9 -0
  228. flowtask/components/tMap/functions.py +54 -0
  229. flowtask/components/tMap/tMap.py +450 -0
  230. flowtask/components/tMelt.py +112 -0
  231. flowtask/components/tMerge.py +114 -0
  232. flowtask/components/tOrder.py +93 -0
  233. flowtask/components/tPandas.py +94 -0
  234. flowtask/components/tPivot.py +71 -0
  235. flowtask/components/tPluckCols.py +76 -0
  236. flowtask/components/tUnnest.py +82 -0
  237. flowtask/components/user.py +401 -0
  238. flowtask/conf.py +457 -0
  239. flowtask/download.py +102 -0
  240. flowtask/events/__init__.py +11 -0
  241. flowtask/events/events/__init__.py +20 -0
  242. flowtask/events/events/abstract.py +95 -0
  243. flowtask/events/events/alerts/__init__.py +362 -0
  244. flowtask/events/events/alerts/colfunctions.py +131 -0
  245. flowtask/events/events/alerts/functions.py +158 -0
  246. flowtask/events/events/dummy.py +12 -0
  247. flowtask/events/events/exec.py +124 -0
  248. flowtask/events/events/file/__init__.py +7 -0
  249. flowtask/events/events/file/base.py +51 -0
  250. flowtask/events/events/file/copy.py +23 -0
  251. flowtask/events/events/file/delete.py +16 -0
  252. flowtask/events/events/interfaces/__init__.py +9 -0
  253. flowtask/events/events/interfaces/client.py +67 -0
  254. flowtask/events/events/interfaces/credentials.py +28 -0
  255. flowtask/events/events/interfaces/notifications.py +58 -0
  256. flowtask/events/events/jira.py +122 -0
  257. flowtask/events/events/log.py +26 -0
  258. flowtask/events/events/logerr.py +52 -0
  259. flowtask/events/events/notify.py +59 -0
  260. flowtask/events/events/notify_event.py +160 -0
  261. flowtask/events/events/publish.py +54 -0
  262. flowtask/events/events/sendfile.py +104 -0
  263. flowtask/events/events/task.py +97 -0
  264. flowtask/events/events/teams.py +98 -0
  265. flowtask/events/events/webhook.py +58 -0
  266. flowtask/events/manager.py +287 -0
  267. flowtask/exceptions.c +39393 -0
  268. flowtask/exceptions.cpython-312-x86_64-linux-gnu.so +0 -0
  269. flowtask/extensions/__init__.py +3 -0
  270. flowtask/extensions/abstract.py +82 -0
  271. flowtask/extensions/logging/__init__.py +65 -0
  272. flowtask/hooks/__init__.py +9 -0
  273. flowtask/hooks/actions/__init__.py +22 -0
  274. flowtask/hooks/actions/abstract.py +66 -0
  275. flowtask/hooks/actions/dummy.py +23 -0
  276. flowtask/hooks/actions/jira.py +74 -0
  277. flowtask/hooks/actions/rest.py +320 -0
  278. flowtask/hooks/actions/sampledata.py +37 -0
  279. flowtask/hooks/actions/sensor.py +23 -0
  280. flowtask/hooks/actions/task.py +9 -0
  281. flowtask/hooks/actions/ticket.py +37 -0
  282. flowtask/hooks/actions/zammad.py +55 -0
  283. flowtask/hooks/hook.py +62 -0
  284. flowtask/hooks/models.py +17 -0
  285. flowtask/hooks/service.py +187 -0
  286. flowtask/hooks/step.py +91 -0
  287. flowtask/hooks/types/__init__.py +23 -0
  288. flowtask/hooks/types/base.py +129 -0
  289. flowtask/hooks/types/brokers/__init__.py +11 -0
  290. flowtask/hooks/types/brokers/base.py +54 -0
  291. flowtask/hooks/types/brokers/mqtt.py +35 -0
  292. flowtask/hooks/types/brokers/rabbitmq.py +82 -0
  293. flowtask/hooks/types/brokers/redis.py +83 -0
  294. flowtask/hooks/types/brokers/sqs.py +44 -0
  295. flowtask/hooks/types/fs.py +232 -0
  296. flowtask/hooks/types/http.py +49 -0
  297. flowtask/hooks/types/imap.py +200 -0
  298. flowtask/hooks/types/jira.py +279 -0
  299. flowtask/hooks/types/mail.py +205 -0
  300. flowtask/hooks/types/postgres.py +98 -0
  301. flowtask/hooks/types/responses/__init__.py +8 -0
  302. flowtask/hooks/types/responses/base.py +5 -0
  303. flowtask/hooks/types/sharepoint.py +288 -0
  304. flowtask/hooks/types/ssh.py +141 -0
  305. flowtask/hooks/types/tagged.py +59 -0
  306. flowtask/hooks/types/upload.py +85 -0
  307. flowtask/hooks/types/watch.py +71 -0
  308. flowtask/hooks/types/web.py +36 -0
  309. flowtask/interfaces/AzureClient.py +137 -0
  310. flowtask/interfaces/AzureGraph.py +839 -0
  311. flowtask/interfaces/Boto3Client.py +326 -0
  312. flowtask/interfaces/DropboxClient.py +173 -0
  313. flowtask/interfaces/ExcelHandler.py +94 -0
  314. flowtask/interfaces/FTPClient.py +131 -0
  315. flowtask/interfaces/GoogleCalendar.py +201 -0
  316. flowtask/interfaces/GoogleClient.py +133 -0
  317. flowtask/interfaces/GoogleDrive.py +127 -0
  318. flowtask/interfaces/GoogleGCS.py +89 -0
  319. flowtask/interfaces/GoogleGeocoding.py +93 -0
  320. flowtask/interfaces/GoogleLang.py +114 -0
  321. flowtask/interfaces/GooglePub.py +61 -0
  322. flowtask/interfaces/GoogleSheet.py +68 -0
  323. flowtask/interfaces/IMAPClient.py +137 -0
  324. flowtask/interfaces/O365Calendar.py +113 -0
  325. flowtask/interfaces/O365Client.py +220 -0
  326. flowtask/interfaces/OneDrive.py +284 -0
  327. flowtask/interfaces/Outlook.py +155 -0
  328. flowtask/interfaces/ParrotBot.py +130 -0
  329. flowtask/interfaces/SSHClient.py +378 -0
  330. flowtask/interfaces/Sharepoint.py +496 -0
  331. flowtask/interfaces/__init__.py +36 -0
  332. flowtask/interfaces/azureauth.py +119 -0
  333. flowtask/interfaces/cache.py +201 -0
  334. flowtask/interfaces/client.py +82 -0
  335. flowtask/interfaces/compress.py +525 -0
  336. flowtask/interfaces/credentials.py +124 -0
  337. flowtask/interfaces/d2l.py +239 -0
  338. flowtask/interfaces/databases/__init__.py +5 -0
  339. flowtask/interfaces/databases/db.py +223 -0
  340. flowtask/interfaces/databases/documentdb.py +55 -0
  341. flowtask/interfaces/databases/rethink.py +39 -0
  342. flowtask/interfaces/dataframes/__init__.py +11 -0
  343. flowtask/interfaces/dataframes/abstract.py +21 -0
  344. flowtask/interfaces/dataframes/arrow.py +71 -0
  345. flowtask/interfaces/dataframes/dt.py +69 -0
  346. flowtask/interfaces/dataframes/pandas.py +167 -0
  347. flowtask/interfaces/dataframes/polars.py +60 -0
  348. flowtask/interfaces/db.py +263 -0
  349. flowtask/interfaces/env.py +46 -0
  350. flowtask/interfaces/func.py +137 -0
  351. flowtask/interfaces/http.py +1780 -0
  352. flowtask/interfaces/locale.py +40 -0
  353. flowtask/interfaces/log.py +75 -0
  354. flowtask/interfaces/mask.py +143 -0
  355. flowtask/interfaces/notification.py +154 -0
  356. flowtask/interfaces/playwright.py +339 -0
  357. flowtask/interfaces/powerpoint.py +368 -0
  358. flowtask/interfaces/py.typed +0 -0
  359. flowtask/interfaces/qs.py +376 -0
  360. flowtask/interfaces/result.py +87 -0
  361. flowtask/interfaces/selenium_service.py +779 -0
  362. flowtask/interfaces/smartsheet.py +154 -0
  363. flowtask/interfaces/stat.py +39 -0
  364. flowtask/interfaces/task.py +96 -0
  365. flowtask/interfaces/template.py +118 -0
  366. flowtask/interfaces/vectorstores/__init__.py +1 -0
  367. flowtask/interfaces/vectorstores/abstract.py +133 -0
  368. flowtask/interfaces/vectorstores/milvus.py +669 -0
  369. flowtask/interfaces/zammad.py +107 -0
  370. flowtask/models.py +193 -0
  371. flowtask/parsers/__init__.py +15 -0
  372. flowtask/parsers/_yaml.c +11978 -0
  373. flowtask/parsers/_yaml.cpython-312-x86_64-linux-gnu.so +0 -0
  374. flowtask/parsers/argparser.py +235 -0
  375. flowtask/parsers/base.c +15155 -0
  376. flowtask/parsers/base.cpython-312-x86_64-linux-gnu.so +0 -0
  377. flowtask/parsers/json.c +11968 -0
  378. flowtask/parsers/json.cpython-312-x86_64-linux-gnu.so +0 -0
  379. flowtask/parsers/maps.py +49 -0
  380. flowtask/parsers/toml.c +11968 -0
  381. flowtask/parsers/toml.cpython-312-x86_64-linux-gnu.so +0 -0
  382. flowtask/plugins/__init__.py +16 -0
  383. flowtask/plugins/components/__init__.py +0 -0
  384. flowtask/plugins/handler/__init__.py +45 -0
  385. flowtask/plugins/importer.py +31 -0
  386. flowtask/plugins/sources/__init__.py +0 -0
  387. flowtask/runner.py +283 -0
  388. flowtask/scheduler/__init__.py +9 -0
  389. flowtask/scheduler/functions.py +493 -0
  390. flowtask/scheduler/handlers/__init__.py +8 -0
  391. flowtask/scheduler/handlers/manager.py +504 -0
  392. flowtask/scheduler/handlers/models.py +58 -0
  393. flowtask/scheduler/handlers/service.py +72 -0
  394. flowtask/scheduler/notifications.py +65 -0
  395. flowtask/scheduler/scheduler.py +993 -0
  396. flowtask/services/__init__.py +0 -0
  397. flowtask/services/bots/__init__.py +0 -0
  398. flowtask/services/bots/telegram.py +264 -0
  399. flowtask/services/files/__init__.py +11 -0
  400. flowtask/services/files/manager.py +522 -0
  401. flowtask/services/files/model.py +37 -0
  402. flowtask/services/files/service.py +767 -0
  403. flowtask/services/jira/__init__.py +3 -0
  404. flowtask/services/jira/jira_actions.py +191 -0
  405. flowtask/services/tasks/__init__.py +13 -0
  406. flowtask/services/tasks/launcher.py +213 -0
  407. flowtask/services/tasks/manager.py +323 -0
  408. flowtask/services/tasks/service.py +275 -0
  409. flowtask/services/tasks/task_manager.py +376 -0
  410. flowtask/services/tasks/tasks.py +155 -0
  411. flowtask/storages/__init__.py +16 -0
  412. flowtask/storages/exceptions.py +12 -0
  413. flowtask/storages/files/__init__.py +8 -0
  414. flowtask/storages/files/abstract.py +29 -0
  415. flowtask/storages/files/filesystem.py +66 -0
  416. flowtask/storages/tasks/__init__.py +19 -0
  417. flowtask/storages/tasks/abstract.py +26 -0
  418. flowtask/storages/tasks/database.py +33 -0
  419. flowtask/storages/tasks/filesystem.py +108 -0
  420. flowtask/storages/tasks/github.py +119 -0
  421. flowtask/storages/tasks/memory.py +45 -0
  422. flowtask/storages/tasks/row.py +25 -0
  423. flowtask/tasks/__init__.py +0 -0
  424. flowtask/tasks/abstract.py +526 -0
  425. flowtask/tasks/command.py +118 -0
  426. flowtask/tasks/pile.py +486 -0
  427. flowtask/tasks/py.typed +0 -0
  428. flowtask/tasks/task.py +778 -0
  429. flowtask/template/__init__.py +161 -0
  430. flowtask/tests.py +257 -0
  431. flowtask/types/__init__.py +8 -0
  432. flowtask/types/typedefs.c +11347 -0
  433. flowtask/types/typedefs.cpython-312-x86_64-linux-gnu.so +0 -0
  434. flowtask/utils/__init__.py +24 -0
  435. flowtask/utils/constants.py +117 -0
  436. flowtask/utils/encoders.py +21 -0
  437. flowtask/utils/executor.py +112 -0
  438. flowtask/utils/functions.cpp +14280 -0
  439. flowtask/utils/functions.cpython-312-x86_64-linux-gnu.so +0 -0
  440. flowtask/utils/json.cpp +13349 -0
  441. flowtask/utils/json.cpython-312-x86_64-linux-gnu.so +0 -0
  442. flowtask/utils/mail.py +63 -0
  443. flowtask/utils/parseqs.c +13324 -0
  444. flowtask/utils/parserqs.cpython-312-x86_64-linux-gnu.so +0 -0
  445. flowtask/utils/stats.py +308 -0
  446. flowtask/utils/transformations.py +74 -0
  447. flowtask/utils/uv.py +12 -0
  448. flowtask/utils/validators.py +97 -0
  449. flowtask/version.py +11 -0
  450. flowtask-5.8.4.dist-info/LICENSE +201 -0
  451. flowtask-5.8.4.dist-info/METADATA +209 -0
  452. flowtask-5.8.4.dist-info/RECORD +470 -0
  453. flowtask-5.8.4.dist-info/WHEEL +6 -0
  454. flowtask-5.8.4.dist-info/entry_points.txt +3 -0
  455. flowtask-5.8.4.dist-info/top_level.txt +2 -0
  456. plugins/components/CreateQR.py +39 -0
  457. plugins/components/TestComponent.py +28 -0
  458. plugins/components/Use1.py +13 -0
  459. plugins/components/Workplace.py +117 -0
  460. plugins/components/__init__.py +3 -0
  461. plugins/sources/__init__.py +0 -0
  462. plugins/sources/get_populartimes.py +78 -0
  463. plugins/sources/google.py +150 -0
  464. plugins/sources/hubspot.py +679 -0
  465. plugins/sources/icims.py +679 -0
  466. plugins/sources/mobileinsight.py +501 -0
  467. plugins/sources/newrelic.py +262 -0
  468. plugins/sources/uap.py +268 -0
  469. plugins/sources/venu.py +244 -0
  470. plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,1780 @@
1
+ from typing import Optional, Union, Dict, Any
2
+ import os
3
+ import asyncio
4
+ import random
5
+ import urllib.parse
6
+ from email.message import Message
7
+ from concurrent.futures import ThreadPoolExecutor
8
+ from functools import partial
9
+ from io import BytesIO
10
+ import ssl
11
+ from pathlib import Path
12
+ from urllib.parse import quote, urlencode, urlparse
13
+ import urllib3
14
+ import aiofiles
15
+ import requests
16
+ import backoff
17
+ from requests.auth import HTTPBasicAuth
18
+ from requests.exceptions import HTTPError
19
+ from requests.exceptions import Timeout as RequestTimeoutException
20
+ from googleapiclient.discovery import build
21
+ from googleapiclient.errors import HttpError
22
+ from duckduckgo_search import DDGS
23
+ from duckduckgo_search.exceptions import (
24
+ ConversationLimitException,
25
+ DuckDuckGoSearchException,
26
+ RatelimitException,
27
+ TimeoutException,
28
+ )
29
+ import primp
30
+ import aiohttp
31
+ from aiohttp import BasicAuth
32
+ import httpx
33
+ from bs4 import BeautifulSoup as bs
34
+ from lxml import html, etree
35
+ from navconfig.logging import logging
36
+ from proxylists.proxies import (
37
+ FreeProxy,
38
+ Oxylabs,
39
+ Decodo,
40
+ Geonode
41
+ )
42
+ from ..utils import cPrint, SafeDict
43
+ from ..utils.json import JSONContent
44
+ from ..conf import (
45
+ HTTPCLIENT_MAX_SEMAPHORE,
46
+ HTTPCLIENT_MAX_WORKERS,
47
+ GOOGLE_SEARCH_API_KEY,
48
+ GOOGLE_SEARCH_ENGINE_ID
49
+ )
50
+ from .dataframes import PandasDataframe
51
+ from ..exceptions import ComponentError
52
+ from .credentials import CredentialsInterface
53
+
54
+
55
+ logging.getLogger("urllib3").setLevel(logging.WARNING)
56
+ urllib3.disable_warnings()
57
+ logging.getLogger("httpx").setLevel(logging.WARNING)
58
+ logging.getLogger("httpcore").setLevel(logging.WARNING)
59
+ logging.getLogger("aiohttp").setLevel(logging.WARNING)
60
+ logging.getLogger("rquest").setLevel(logging.INFO)
61
+
62
+
63
+ ua = [
64
+ # Chrome - Desktop (Windows)
65
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
66
+ # Chrome - Desktop (Mac)
67
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36", # noqa
68
+ # Safari - Desktop (Mac)
69
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15", # noqa
70
+ # Firefox - Desktop (Windows)
71
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/118.0",
72
+ # Edge - Desktop (Windows)
73
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.46", # noqa
74
+ # Chrome - Mobile (Android)
75
+ "Mozilla/5.0 (Linux; Android 13; SM-G991B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Mobile Safari/537.36", # noqa
76
+ # Safari - Mobile (iOS)
77
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1", # noqa
78
+ # Samsung Internet - Mobile (Android)
79
+ "Mozilla/5.0 (Linux; Android 13; SAMSUNG SM-G991B) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/21.0 Chrome/118.0.0.0 Mobile Safari/537.36", # noqa
80
+ # Firefox - Mobile (Android)
81
+ "Mozilla/5.0 (Android 13; Mobile; rv:118.0) Gecko/118.0 Firefox/118.0",
82
+ # Opera - Desktop (Windows)
83
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 OPR/104.0.0.0" # noqa
84
+ # Firefox - Desktop (Linux)
85
+ "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0",
86
+ # Chrome - Desktop (Linux)
87
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
88
+ # Other:
89
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", # noqa
90
+ ]
91
+
92
+ mobile_ua = [
93
+ "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19", # noqa
94
+ 'Mozilla/5.0 (iPhone; CPU iPhone OS 12_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1', # noqa
95
+ 'Mozilla/5.0 (Linux; Android 9; SM-G973F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.119 Mobile Safari/537.36', # noqa
96
+ 'Mozilla/5.0 (Linux; Android 8.0.0; Pixel 2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.93 Mobile Safari/537.36', # noqa
97
+ 'Mozilla/5.0 (Linux; Android 10; HUAWEI VOG-L29) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Mobile Safari/537.36', # noqa
98
+ 'Mozilla/5.0 (iPad; CPU OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0 Mobile/15E148 Safari/604.1', # noqa
99
+ ]
100
+
101
+ impersonates = (
102
+ "chrome_100", "chrome_101", "chrome_104", "chrome_105", "chrome_106", "chrome_107",
103
+ "chrome_108", "chrome_109", "chrome_114", "chrome_116", "chrome_117", "chrome_118",
104
+ "chrome_119", "chrome_120", "chrome_123", "chrome_124", "chrome_126", "chrome_127",
105
+ "chrome_128", "chrome_129", "chrome_130", "chrome_131",
106
+ "safari_ios_16.5", "safari_ios_17.2", "safari_ios_17.4.1", "safari_ios_18.1.1",
107
+ "safari_15.3", "safari_15.5", "safari_15.6.1", "safari_16", "safari_16.5",
108
+ "safari_17.0", "safari_17.2.1", "safari_17.4.1", "safari_17.5",
109
+ "safari_18", "safari_18.2",
110
+ "safari_ipad_18",
111
+ "edge_101", "edge_122", "edge_127", "edge_131",
112
+ "firefox_109", "firefox_117", "firefox_128", "firefox_133",
113
+ ) # fmt: skip
114
+
115
+ impersonates_os = ("android", "ios", "linux", "macos", "windows")
116
+
117
+ valid_methods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS']
118
+
119
+ def bad_gateway_exception(exc):
120
+ """Check if the exception is a 502 Bad Gateway error."""
121
+ return isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code == 502
122
+
123
+ class HTTPService(CredentialsInterface, PandasDataframe):
124
+ """
125
+ HTTPService.
126
+
127
+ Overview
128
+
129
+ Interface for making connections to HTTP services.
130
+ """
131
+ accept: str = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9" # noqa
132
+
133
+ def __init__(self, *args, **kwargs):
134
+ self.url: str = kwargs.get("url", None)
135
+ self.accept: str = kwargs.get(
136
+ "accept",
137
+ self.accept
138
+ )
139
+ self.use_proxy: bool = kwargs.pop("use_proxy", False)
140
+ self.proxy_type: str = kwargs.pop('proxy_type', 'decodo')
141
+ self._free_proxy: bool = kwargs.pop('use_free_proxy', True)
142
+ self._proxies: list = []
143
+ self.rotate_ua: bool = kwargs.pop("rotate_ua", False)
144
+ self.use_async: bool = bool(kwargs.pop("use_async", True))
145
+ self.google_api_key: str = kwargs.pop('google_api_key', GOOGLE_SEARCH_API_KEY)
146
+ self.google_cse: str = kwargs.pop('google_cse', GOOGLE_SEARCH_ENGINE_ID)
147
+ self.as_binary: bool = kwargs.pop('as_binary', False)
148
+ self.download: bool = kwargs.pop('download', False)
149
+ self.timeout: int = 30
150
+ self.headers: dict = kwargs.get('headers', {})
151
+ self.auth: dict = {}
152
+ self.auth_type: str = None
153
+ self.token_type: str = "Bearer"
154
+ self._user, self._pwd = None, None
155
+ self.method: str = kwargs.get("method", "get")
156
+ self._default_parser: str = kwargs.pop('bs4_parser', 'html.parser')
157
+ self.parameters = {}
158
+ if self.rotate_ua is True:
159
+ self._ua = random.choice(ua)
160
+ else:
161
+ self._ua: str = ua[0]
162
+ self.headers = {
163
+ "Accept": self.accept,
164
+ "Accept-Encoding": "gzip, deflate",
165
+ "DNT": "1",
166
+ "Connection": "keep-alive",
167
+ "Upgrade-Insecure-Requests": "1",
168
+ "User-Agent": self._ua,
169
+ **self.headers,
170
+ }
171
+ # potentially cookies to be used by request.
172
+ self.cookies = kwargs.get('cookies', {})
173
+ self._encoder = JSONContent()
174
+ # other arguments:
175
+ self.arguments = kwargs
176
+ # Executor:
177
+ self._executor = ThreadPoolExecutor(
178
+ max_workers=int(HTTPCLIENT_MAX_WORKERS)
179
+ )
180
+ self._semaphore = asyncio.Semaphore(
181
+ int(HTTPCLIENT_MAX_SEMAPHORE)
182
+ )
183
+ super().__init__(*args, **kwargs)
184
+
185
+ async def get_proxies(self, session_time: float = 0.40):
186
+ """
187
+ Asynchronously retrieves a list of free proxies.
188
+ TODO: SELECT or rotate the free/paid proxies.
189
+ """
190
+ if self._free_proxy is True:
191
+ return await FreeProxy().get_list()
192
+ else:
193
+ if self.proxy_type == 'decodo':
194
+ return await Decodo().get_list()
195
+ elif self.proxy_type == 'oxylabs':
196
+ return await Oxylabs(
197
+ session_time=session_time,
198
+ timeout=10
199
+ ).get_list()
200
+ elif self.proxy_type == 'geonode':
201
+ return await Geonode().get_list()
202
+ else:
203
+ return []
204
+
205
+ async def refresh_proxies(self):
206
+ """
207
+ Asynchronously refreshes the list of proxies if proxy usage is enabled.
208
+ """
209
+ if self.use_proxy is True:
210
+ self._proxies = await self.get_proxies()
211
+
212
+ def build_url(self, url, queryparams: str = "", args=None):
213
+ """
214
+ Constructs a full URL with optional query parameters and arguments.
215
+
216
+ :param url: The base URL to be formatted.
217
+ :param queryparams: Additional query parameters to be appended to the URL.
218
+ :param args: Arguments to format within the URL.
219
+ :return: The fully constructed URL.
220
+ """
221
+ url = str(url).format_map(SafeDict(**self._variables))
222
+ if args:
223
+ u = url.format(**args)
224
+ else:
225
+ u = url
226
+ if queryparams:
227
+ if "?" in u:
228
+ full_url = u + "&" + queryparams
229
+ else:
230
+ full_url = u + "?" + queryparams
231
+ else:
232
+ full_url = u
233
+ logging.debug(
234
+ f"Resource URL: {full_url!s}"
235
+ )
236
+ return full_url
237
+
238
+ def extract_host(self, url):
239
+ parsed_url = urlparse(url)
240
+ return parsed_url.netloc
241
+
242
+ async def session(
243
+ self,
244
+ url: str,
245
+ method: str = "get",
246
+ data: dict = None,
247
+ cookies: dict = None,
248
+ headers: dict = None,
249
+ use_json: bool = False,
250
+ follow_redirects: bool = False,
251
+ use_proxy: bool = False,
252
+ accept: str = None,
253
+ return_response: bool = False
254
+ ):
255
+ """
256
+ Asynchronously sends an HTTP request using HTTPx.
257
+
258
+ :param url: The URL to send the request to.
259
+ :param method: The HTTP method to use (e.g., 'GET', 'POST').
260
+ :param data: The data to send in the request body.
261
+ :param use_json: Whether to send the data as JSON.
262
+ :param cookies: A dictionary of cookies to send with the request.
263
+ :param headers: A dictionary of headers to send with the request.
264
+ :return: A tuple containing the result and any error information.
265
+ """
266
+ result = []
267
+ error = {}
268
+ auth = None
269
+ proxies = None
270
+ if accept is not None:
271
+ self.headers["Accept"] = accept
272
+ else:
273
+ self.headers["Accept"] = self.accept
274
+ if use_proxy is True:
275
+ self._proxies = await self.get_proxies()
276
+ if len(self._proxies) == 1:
277
+ proxies = self._proxies[0]
278
+ if not proxies.startswith('http'):
279
+ proxies = f"http://{proxies}"
280
+ elif len(self._proxies) > 1:
281
+ proxy = random.choice(self._proxies)
282
+ if not proxy.startswith('http'):
283
+ proxy = f"http://{proxy}"
284
+ proxies = {
285
+ "http://": httpx.AsyncHTTPTransport(
286
+ proxy=f"http://{proxy}"
287
+ ),
288
+ "https://": httpx.AsyncHTTPTransport(
289
+ proxy=f"http://{proxy}"
290
+ ),
291
+ }
292
+ else:
293
+ self._proxies = None
294
+ if self.credentials:
295
+ if "apikey" in self.auth:
296
+ self.headers[
297
+ "Authorization"
298
+ ] = f"{self.token_type} {self.auth['apikey']}"
299
+ elif self.auth_type == "api_key":
300
+ self.headers = {**self.headers, **self.credentials}
301
+ elif self.auth_type == "key":
302
+ url = self.build_url(
303
+ url, args=self.arguments, queryparams=urlencode(self.credentials)
304
+ )
305
+ elif self.auth_type in ["basic", "auth", "user"]:
306
+ auth = (self.credentials["username"], self.credentials["password"])
307
+ elif self._user and self.auth_type == "basic":
308
+ auth = (self._user, self._pwd)
309
+ cPrint(
310
+ f"HTTP: Connecting to {url} using {method}",
311
+ level="DEBUG"
312
+ )
313
+ if self.download is True:
314
+ self.headers["Accept"] = "application/octet-stream"
315
+ self.headers["Content-Type"] = "application/octet-stream"
316
+ if self.use_streams is True:
317
+ self.headers["Transfer-Encoding"] = "chunked"
318
+ headers = self.headers
319
+ if headers is not None and isinstance(headers, dict):
320
+ headers = {**self.headers, **headers}
321
+ timeout = httpx.Timeout(self.timeout)
322
+ args = {"timeout": timeout, "headers": headers, "cookies": cookies}
323
+ if auth is not None:
324
+ args["auth"] = auth
325
+ if proxies:
326
+ if isinstance(proxies, dict):
327
+ args['mounts'] = proxies
328
+ else:
329
+ args["proxies"] = proxies
330
+ # if self._debug is True:
331
+ # self.add_metric("HEADERS", headers)
332
+ if proxies is not None:
333
+ self.add_metric('Proxies', proxies)
334
+ self.add_metric('URL', url)
335
+ self.add_metric('METHOD', method)
336
+ req_args = {
337
+ "method": method.upper(),
338
+ "url": url,
339
+ "follow_redirects": follow_redirects,
340
+ "json" if use_json else "data": data
341
+ }
342
+ # Process the response
343
+ try:
344
+ if hasattr(self, "_client"):
345
+ # Use a client without context manager to keep the session alive
346
+ # Remember to call `await self._client.aclose()` manually
347
+ response = await self._client.request(**req_args)
348
+ else:
349
+ async with httpx.AsyncClient(**args) as client:
350
+ response = await client.request(**req_args)
351
+
352
+ result, error = await self.process_response(response, url)
353
+
354
+ if return_response:
355
+ return response, result, error
356
+
357
+ except httpx.HTTPError as e:
358
+ error = str(e)
359
+
360
+ return (result, error)
361
+
362
+ async def async_request(
363
+ self,
364
+ url,
365
+ method: str = 'GET',
366
+ data: dict = None,
367
+ use_json: bool = False,
368
+ use_proxy: bool = False,
369
+ accept: Optional[str] = None
370
+ ):
371
+ """
372
+ Asynchronously sends an HTTP request using aiohttp.
373
+
374
+ :param url: The URL to send the request to.
375
+ :param method: The HTTP method to use (e.g., 'GET', 'POST').
376
+ :param data: The data to send in the request body.
377
+ :param use_json: Whether to send the data as JSON.
378
+ :param use_proxy: force proxy usage.
379
+ :return: A tuple containing the result and any error information.
380
+ """
381
+ result = []
382
+ error = {}
383
+ auth = None
384
+ proxy = None
385
+ if use_proxy is True:
386
+ self._proxies = await self.get_proxies()
387
+ if self._proxies:
388
+ proxy = random.choice(self._proxies)
389
+ self.add_metric("Proxies", proxy)
390
+ if self.credentials:
391
+ if "apikey" in self.auth:
392
+ self.headers[
393
+ "Authorization"
394
+ ] = f"{self.token_type} {self.auth['apikey']}"
395
+ elif self.auth_type == "api_key":
396
+ self.headers = {**self.headers, **self.credentials}
397
+ elif self.auth_type == "key":
398
+ url = self.build_url(
399
+ url,
400
+ args=self.arguments,
401
+ queryparams=urlencode(self.credentials)
402
+ )
403
+ elif self.auth_type in ["basic", "auth", "user"]:
404
+ auth = BasicAuth(
405
+ self.credentials["username"],
406
+ self.credentials["password"]
407
+ )
408
+ elif "apikey" in self.auth:
409
+ self.headers["Authorization"] = f"{self.token_type} {self.auth['apikey']}"
410
+ elif self.auth:
411
+ token_type, token = list(self.auth.items())[0]
412
+ self.headers["Authorization"] = f"{token_type} {token}"
413
+ elif self._user and self.auth_type == "basic":
414
+ auth = BasicAuth(self._user, self._pwd)
415
+ cPrint(
416
+ f"HTTP: Connecting to {url} using {method}",
417
+ level="DEBUG"
418
+ )
419
+ if self._debug is True:
420
+ self.add_metric("HEADERS", self.headers)
421
+ self.add_metric("URL", url)
422
+ self.add_metric("METHOD", method)
423
+ if auth is not None:
424
+ args = {"auth": auth}
425
+ else:
426
+ args = {}
427
+ if accept is not None:
428
+ self.headers["Accept"] = accept
429
+ else:
430
+ self.headers["Accept"] = self.accept
431
+ if self.download is True:
432
+ self.headers["Accept"] = "application/octet-stream"
433
+ self.headers["Content-Type"] = "application/octet-stream"
434
+ if hasattr(self, "use_streams"):
435
+ self.headers["Transfer-Encoding"] = "chunked"
436
+ args["stream"] = True
437
+ timeout = aiohttp.ClientTimeout(total=self.timeout)
438
+ async with aiohttp.ClientSession(
439
+ headers=self.headers, timeout=timeout, auth=auth
440
+ ) as session:
441
+ try:
442
+ if use_json is True:
443
+ async with session.request(
444
+ method.upper(), url, json=data, proxy=proxy, **args
445
+ ) as response:
446
+ result, error = await self.process_response(response, url)
447
+ else:
448
+ async with session.request(
449
+ method.upper(), url, data=data, proxy=proxy, **args
450
+ ) as response:
451
+ # Process the response
452
+ result, error = await self.process_response(response, url)
453
+ except aiohttp.ClientError as e:
454
+ error = str(e)
455
+ return (result, error)
456
+
457
+ async def evaluate_error(
458
+ self, response: Union[str, list], message: Union[str, list, dict]
459
+ ) -> tuple:
460
+ """evaluate_response.
461
+
462
+ Check Response status and available payloads.
463
+ Args:
464
+ response (_type_): _description_
465
+ url (str): _description_
466
+
467
+ Returns:
468
+ tuple: _description_
469
+ """
470
+ if isinstance(response, list):
471
+ # a list of potential errors:
472
+ for msg in response:
473
+ if message in msg:
474
+ return True
475
+ if isinstance(response, dict) and "errors" in response:
476
+ errors = response["errors"]
477
+ if isinstance(errors, list):
478
+ for error in errors:
479
+ try:
480
+ if message in error:
481
+ return True
482
+ except TypeError:
483
+ if message == error:
484
+ return True
485
+ else:
486
+ if message == errors:
487
+ return True
488
+ else:
489
+ if message in response:
490
+ return True
491
+ return False
492
+
493
+ async def process_response(self, response, url: str) -> tuple:
494
+ """
495
+ Processes the response from an HTTP request.
496
+
497
+ :param response: The response object from aiohttp.
498
+ :param url: The URL that was requested.
499
+ :return: A tuple containing the processed result and any error information.
500
+ """
501
+ error = None
502
+ result = None
503
+ # Process the response
504
+ status = self.response_status(response)
505
+
506
+ if status >= 400:
507
+ # Evaluate response body and headers.
508
+ print(" == ERROR Headers == ")
509
+ print(f"{response.headers}")
510
+ content_type = response.headers.get("Content-Type", "").lower()
511
+ if "application/json" in content_type:
512
+ message = await self.response_json(response)
513
+ elif "text/" in content_type:
514
+ message = await self.response_text(response)
515
+ elif "X-Error" in response.headers:
516
+ message = response.headers["X-Error"]
517
+ else:
518
+ # Fallback to a unified read method for the raw body content
519
+ message = await self.response_read(response)
520
+
521
+ # Combine response headers and body for enriched logging
522
+ error_context = {
523
+ "status": status,
524
+ "reason": await self.response_reason(response),
525
+ "headers": response.headers,
526
+ "body": message
527
+ }
528
+
529
+ # Log the detailed error context
530
+ self._logger.error(f"Error: {error_context}")
531
+
532
+ # Additional error handling or custom evaluation based on status
533
+ if hasattr(self, 'no_errors'):
534
+ for key, msg in self.no_errors.items():
535
+ if int(key) == status and await self.evaluate_error(message, msg):
536
+ return response, status
537
+
538
+ # Raise an exception if error handling does not continue
539
+ raise ConnectionError(f"HTTP Error {status}: {message!s}")
540
+ else:
541
+ if self.download is True:
542
+ filename = os.path.basename(url)
543
+ # Get the filename from the response headers, if available
544
+ content_disposition = response.headers.get("content-disposition")
545
+ if content_disposition:
546
+ msg = Message()
547
+ msg["Content-Disposition"] = response.headers.get("content-disposition")
548
+ filename = msg.get_param("filename", header="Content-Disposition")
549
+ utf8_filename = msg.get_param("filename*", header="Content-Disposition")
550
+ if utf8_filename:
551
+ _, utf8_filename = utf8_filename.split("''", 1)
552
+ filename = urllib.parse.unquote(utf8_filename)
553
+ if "{filename}" in str(self.filename):
554
+ self.filename = str(self.filename).format_map(
555
+ SafeDict(filename=filename)
556
+ )
557
+ if "{" in str(self.filename):
558
+ self.filename = str(self.filename).format_map(
559
+ SafeDict(**self.arguments)
560
+ )
561
+ if isinstance(self.filename, str):
562
+ self.filename = Path(self.filename)
563
+ # Saving File in Directory:
564
+ total_length = response.headers.get("Content-Length")
565
+ self._logger.info(
566
+ f"HTTPClient: Saving File {self.filename}, size: {total_length}"
567
+ )
568
+ pathname = self.filename.parent.absolute()
569
+ if not pathname.exists():
570
+ # Create a new directory
571
+ pathname.mkdir(parents=True, exist_ok=True)
572
+ transfer = response.headers.get("transfer-encoding", None)
573
+ if transfer is None:
574
+ chunk_size = int(total_length)
575
+ else:
576
+ chunk_size = 8192
577
+ # Asynchronous file writing
578
+ if self.filename.exists() and self.filename.is_file():
579
+ overwrite = self.destination.get("overwrite", True)
580
+ if overwrite is False:
581
+ self._logger.warning(
582
+ f"HTTPClient: File Already exists: {self.filename}"
583
+ )
584
+ # Filename already exists
585
+ result = self.filename
586
+ return result, error
587
+ else:
588
+ self._logger.warning(
589
+ f"HTTPClient: Overwriting File: {self.filename}"
590
+ )
591
+ # Delete the file before downloading again.
592
+ try:
593
+ self.filename.unlink()
594
+ except Exception as e:
595
+ self._logger.warning(
596
+ f"HTTPClient: Error Deleting File: {self.filename}, {e}"
597
+ )
598
+ if hasattr(self, "use_streams") and self.use_streams is True:
599
+ async with aiofiles.open(self.filename, "wb") as file:
600
+ async for chunk in response.content.iter_chunked(chunk_size):
601
+ await file.write(chunk)
602
+ else:
603
+ with open(self.filename, "wb") as fp:
604
+ try:
605
+ fp.write(await self.response_read(response))
606
+ except Exception:
607
+ pass
608
+ self._logger.debug(
609
+ f"Filename Saved Successfully: {self.filename}"
610
+ )
611
+ result = self.filename
612
+ else:
613
+ if self.accept == 'application/octet-stream':
614
+ data = await self.response_read(response)
615
+ buffer = BytesIO(data)
616
+ buffer.seek(0)
617
+ result = buffer
618
+ elif self.accept in ('text/html'):
619
+ result = await self.response_read(response)
620
+ try:
621
+ # html parser for lxml
622
+ self._parser = html.fromstring(result)
623
+ # BeautifulSoup parser
624
+ self._bs = bs(response.text, self._default_parser)
625
+ result = self._bs
626
+ except Exception as e:
627
+ error = e
628
+ elif self.accept in ('application/xhtml+xml', 'application/xml'):
629
+ result = await self.response_read(response)
630
+ try:
631
+ self._parser = etree.fromstring(result)
632
+ except etree.XMLSyntaxError:
633
+ self._parser = html.fromstring(result)
634
+ except Exception as e:
635
+ error = e
636
+ elif self.accept == "application/json":
637
+ try:
638
+ result = await self.response_json(response)
639
+ except Exception as e:
640
+ logging.warning(e)
641
+ # is not an json, try first with beautiful soup:
642
+ try:
643
+ self._bs = bs(
644
+ await self.response_text(response),
645
+ self._default_parser
646
+ )
647
+ result = self._bs
648
+ except Exception:
649
+ error = e
650
+ elif self.as_binary is True:
651
+ result = await self.response_read(
652
+ response
653
+ )
654
+ else:
655
+ result = await self.response_text(
656
+ response
657
+ )
658
+ return result, error
659
+
660
+ async def request(
661
+ self,
662
+ url: str,
663
+ method: str = "GET",
664
+ data: dict = None,
665
+ use_proxy: bool = False,
666
+ accept: Optional[str] = None
667
+ ) -> tuple:
668
+ """
669
+ Sends an HTTP request using the requests library.
670
+
671
+ :param url: The URL to send the request to.
672
+ :param method: The HTTP method to use (e.g., 'GET', 'POST').
673
+ :param data: The data to send in the request body.
674
+ :return: A tuple containing the result and any error information.
675
+ """
676
+ result = []
677
+ error = {}
678
+ auth = None
679
+ proxies = None
680
+ if use_proxy is True:
681
+ self._proxies = await self.get_proxies()
682
+ if self._proxies:
683
+ proxy = random.choice(self._proxies)
684
+ proxies = {"http": proxy, "https": proxy, "ftp": proxy}
685
+ self.add_metric("Proxies", proxies)
686
+ if self.credentials:
687
+ if "apikey" in self.auth:
688
+ self.headers[
689
+ "Authorization"
690
+ ] = f"{self.token_type} {self.auth['apikey']}"
691
+ elif self.auth_type == "api_key":
692
+ self.headers = {**self.headers, **self.credentials}
693
+ elif self.auth_type == "key":
694
+ url = self.build_url(
695
+ url, args=self.arguments, queryparams=urlencode(self.credentials)
696
+ )
697
+ elif self.auth_type == "basic":
698
+ auth = HTTPBasicAuth(
699
+ self.credentials["username"], self.credentials["password"]
700
+ )
701
+ else:
702
+ auth = HTTPBasicAuth(
703
+ self.credentials["username"], self.credentials["password"]
704
+ )
705
+ elif self._user and self.auth_type == "basic":
706
+ auth = HTTPBasicAuth(self._user, self._pwd)
707
+ cPrint(f"HTTP: Connecting to {url} using {method}", level="DEBUG")
708
+ self.add_metric("URL", url)
709
+ self.add_metric("METHOD", method)
710
+ if auth is not None:
711
+ args = {"auth": auth, "verify": False}
712
+ else:
713
+ args = {}
714
+ if accept is not None:
715
+ self.headers["Accept"] = accept
716
+ else:
717
+ self.headers["Accept"] = self.accept
718
+ if self.download is True:
719
+ self.headers["Accept"] = "application/octet-stream"
720
+ self.headers["Content-Type"] = "application/octet-stream"
721
+ if hasattr(self, "use_streams"):
722
+ self.headers["Transfer-Encoding"] = "chunked"
723
+ args["stream"] = True
724
+ if self._debug is True:
725
+ self.add_metric("HEADERS", self.headers)
726
+ args["headers"] = self.headers
727
+ args["timeout"] = self.timeout
728
+ args["proxies"] = proxies
729
+ if method == "get":
730
+ my_request = partial(requests.get, **args)
731
+ elif method == "post":
732
+ my_request = partial(requests.post, data=data, **args)
733
+ elif method == "put":
734
+ my_request = partial(requests.put, data=data, **args)
735
+ elif method == "delete":
736
+ my_request = partial(requests.delete, data=data, **args)
737
+ elif method == "patch":
738
+ my_request = partial(requests.patch, data=data, *args)
739
+ else:
740
+ my_request = partial(requests.post, data=data, **args)
741
+ try:
742
+ # making request
743
+ async with self._semaphore:
744
+ loop = asyncio.get_running_loop()
745
+ future = loop.run_in_executor(self._executor, my_request, url)
746
+ result, error = await self.process_request(future, url)
747
+ if error:
748
+ if isinstance(error, BaseException):
749
+ raise error
750
+ else:
751
+ raise ComponentError(f"{error!s}")
752
+ return (result, error)
753
+ except requests.exceptions.ReadTimeout as err:
754
+ self._logger.warning(f"Timeout Error: {err!r}")
755
+ # TODO: retrying
756
+ raise ComponentError(f"Timeout: {err}") from err
757
+ except Exception as err:
758
+ self._logger.exception(str(err), stack_info=True)
759
+ raise ComponentError(f"Error: {err}") from err
760
+
761
+ async def process_request(self, future, url: str):
762
+ """
763
+ Processes the result of an asynchronous HTTP request.
764
+
765
+ :param future: The future object representing the asynchronous operation.
766
+ :param url: The URL that was requested.
767
+ :return: A tuple containing the result and any error information.
768
+ """
769
+ # getting the result, based on the Accept logic
770
+ error = None
771
+ result = None
772
+ loop = asyncio.get_running_loop()
773
+ asyncio.set_event_loop(loop)
774
+ done, _ = await asyncio.wait([future], return_when=asyncio.FIRST_COMPLETED)
775
+ for f in done:
776
+ response = f.result()
777
+ # for response in await asyncio.gather(*future):
778
+ # Check for HTTP errors
779
+ try:
780
+ response.raise_for_status()
781
+ except HTTPError as http_err:
782
+ # Handle HTTP errors here
783
+ error = http_err
784
+ # Log the error or perform other error handling
785
+ self._logger.error(f"HTTP error occurred: {http_err}")
786
+ # You can choose to continue, break, or return based on your logic
787
+ continue
788
+ try:
789
+ if self.download is True:
790
+ # Filename:
791
+ filename = os.path.basename(url)
792
+ # Get the filename from the response headers, if available
793
+ content_disposition = response.headers.get("content-disposition")
794
+ if content_disposition:
795
+ _, params = content_disposition.split(";")
796
+ try:
797
+ key, value = params.strip().split("=")
798
+ if key == "filename":
799
+ filename = value.strip("'\"")
800
+ except ValueError:
801
+ pass
802
+ if "{filename}" in str(self.filename):
803
+ self.filename = str(self.filename).format_map(
804
+ SafeDict(filename=filename)
805
+ )
806
+ if "{" in str(self.filename):
807
+ self.filename = str(self.filename).format_map(
808
+ SafeDict(**self.arguments)
809
+ )
810
+ if isinstance(self.filename, str):
811
+ self.filename = Path(self.filename)
812
+ # Saving File in Directory:
813
+ total_length = response.headers.get("Content-Length")
814
+ self._logger.info(
815
+ f"HTTPClient: Saving File {self.filename}, size: {total_length}"
816
+ )
817
+ pathname = self.filename.parent.absolute()
818
+ if not pathname.exists():
819
+ # Create a new directory
820
+ pathname.mkdir(parents=True, exist_ok=True)
821
+ response.raise_for_status()
822
+ transfer = response.headers.get("transfer-encoding", None)
823
+ if transfer is None:
824
+ chunk_size = int(total_length)
825
+ else:
826
+ chunk_size = 8192
827
+ if self.filename.exists() and self.filename.is_file():
828
+ overwrite = self.destination.get("overwrite", True)
829
+ if overwrite is False:
830
+ self._logger.warning(
831
+ f"HTTPClient: File Already exists: {self.filename}"
832
+ )
833
+ # Filename already exists
834
+ result = self.filename
835
+ continue
836
+ else:
837
+ self._logger.warning(
838
+ f"HTTPClient: Overwriting File: {self.filename}"
839
+ )
840
+ # Delete the file before downloading again.
841
+ try:
842
+ self.filename.unlink()
843
+ except Exception as e:
844
+ self._logger.warning(
845
+ f"HTTPClient: Error Deleting File: {self.filename}, {e}"
846
+ )
847
+ with open(self.filename, "wb") as fp:
848
+ try:
849
+ for chunk in response.iter_content(chunk_size=chunk_size):
850
+ fp.write(chunk)
851
+ fp.flush()
852
+ except Exception:
853
+ pass
854
+ self._logger.debug(f"Filename Saved Successfully: {self.filename}")
855
+ result = self.filename
856
+ elif self.accept in ("text/html"):
857
+ result = response.content # Get content of the response as bytes
858
+ try:
859
+ # html parser for lxml
860
+ self._parser = html.fromstring(result)
861
+ # BeautifulSoup parser
862
+
863
+ self._bs = bs(response.text, self._default_parser)
864
+ result = self._bs
865
+ except Exception as e:
866
+ error = e
867
+ elif self.accept in ("application/xhtml+xml", "application/xml"):
868
+ result = response.content # Get content of the response as bytes
869
+ try:
870
+ self._parser = etree.fromstring(result)
871
+ except Exception as e:
872
+ error = e
873
+ elif self.accept == "application/json":
874
+ try:
875
+ result = response.json()
876
+ except Exception as e:
877
+ logging.error(e)
878
+ # is not an json, try first with beautiful soup:
879
+ try:
880
+ self._bs = bs(response.text, self._default_parser)
881
+ result = self._bs
882
+ except Exception:
883
+ error = e
884
+ else:
885
+ result = response.text
886
+ except requests.exceptions.ProxyError as err:
887
+ raise ComponentError(f"Proxy Connection Error: {err!r}") from err
888
+ except requests.ReadTimeout as err:
889
+ return (result, err)
890
+ except requests.exceptions.HTTPError as e:
891
+ # Log the error or perform other error handling
892
+ self._logger.error(f"HTTP error occurred: {error}")
893
+ raise ComponentError(f"HTTP Error: {error!r}, ex: {e!s}") from e
894
+ except Exception as e:
895
+ logging.exception(e)
896
+ return (result, e)
897
+ # returning results
898
+ return (result, error)
899
+
900
+ @staticmethod
901
+ async def response_read(response):
902
+ if hasattr(response, 'aread'):
903
+ return await response.aread()
904
+
905
+ return await response.read()
906
+
907
+ @staticmethod
908
+ async def response_json(response):
909
+ if asyncio.iscoroutinefunction(response.json):
910
+ return await response.json()
911
+
912
+ return response.json()
913
+
914
+ @staticmethod
915
+ def response_status(response):
916
+ if hasattr(response, 'status_code'):
917
+ return response.status_code
918
+
919
+ return response.status
920
+
921
+ @staticmethod
922
+ async def response_text(response):
923
+ if asyncio.iscoroutinefunction(response.text):
924
+ return await response.text()
925
+
926
+ return response.text
927
+
928
+ @staticmethod
929
+ async def response_reason(response):
930
+ # Attempt to retrieve `reason`, `reason_phrase`, or fallback to an empty string
931
+ reason = getattr(response, 'reason', getattr(response, 'reason_phrase', b''))
932
+
933
+ return f"{reason!s}"
934
+
935
+ @backoff.on_exception(
936
+ backoff.expo,
937
+ (httpx.HTTPStatusError, httpx.TimeoutException), # Catch HTTP errors and timeouts
938
+ max_tries=3,
939
+ max_time=120,
940
+ jitter=backoff.full_jitter,
941
+ on_backoff=lambda details: print(
942
+ f"Retrying HTTP Get: attempt {details['tries']} after {details['wait']:0.2f}s"
943
+ ),
944
+ )
945
+ async def _get(
946
+ self,
947
+ url: str,
948
+ cookies: httpx.Cookies = None,
949
+ params: Dict[str, Any] = None,
950
+ headers: Dict[str, str] = None,
951
+ timeout: Union[int, float] = 30.0,
952
+ use_proxy: bool = True,
953
+ free_proxy: bool = False,
954
+ connect_timeout: Union[int, float] = 5.0,
955
+ read_timeout: Union[int, float] = 20.0,
956
+ write_timeout: Union[int, float] = 5.0,
957
+ pool_timeout: Union[int, float] = 20.0,
958
+ num_retries: int = 2,
959
+ **kwargs
960
+ ) -> Dict[str, Any]:
961
+ """
962
+ Make an asynchronous HTTP GET request, returning the response object.
963
+
964
+ Args:
965
+ url (str): The URL to send the GET request to.
966
+ cookies (httpx.Cookies): Cookies to include in the request.
967
+ params (dict): Dictionary of query parameters to include in the URL.
968
+
969
+ Returns:
970
+ Response: The response object from the httpx.
971
+ """
972
+ proxies = None
973
+ if use_proxy is True:
974
+ self._proxies = await self.get_proxies()
975
+ if len(self._proxies) == 1:
976
+ proxies = self._proxies[0]
977
+ if not proxies.startswith('http'):
978
+ proxies = f"http://{proxies}"
979
+ elif len(self._proxies) > 1:
980
+ proxy = random.choice(self._proxies)
981
+ if not proxy.startswith('http'):
982
+ proxy = f"http://{proxy}"
983
+ proxies = {
984
+ "http://": httpx.AsyncHTTPTransport(
985
+ proxy=f"http://{proxy}"
986
+ ),
987
+ "https://": httpx.AsyncHTTPTransport(
988
+ proxy=f"http://{proxy}"
989
+ ),
990
+ }
991
+ else:
992
+ self._proxies = None
993
+
994
+ # Define custom SSL context
995
+ ssl_context = ssl.create_default_context()
996
+ # Disable older protocols if needed
997
+ ssl_context.options |= ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1
998
+ # Ensure at least TLS 1.2 is used
999
+ ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
1000
+ ssl_context.check_hostname = False
1001
+ ssl_context.verify_mode = ssl.CERT_NONE
1002
+
1003
+ # Use AsyncHTTPTransport to pass in SSL context explicitly
1004
+ transport = httpx.AsyncHTTPTransport(
1005
+ retries=num_retries,
1006
+ verify=ssl_context
1007
+ )
1008
+ timeout = httpx.Timeout(
1009
+ timeout=timeout,
1010
+ connect=connect_timeout,
1011
+ read=read_timeout,
1012
+ write=write_timeout,
1013
+ pool=pool_timeout
1014
+ )
1015
+ async with httpx.AsyncClient(
1016
+ cookies=cookies,
1017
+ proxy=proxies or None,
1018
+ transport=transport,
1019
+ headers=headers,
1020
+ timeout=timeout,
1021
+ http2=True,
1022
+ follow_redirects=True,
1023
+ **kwargs
1024
+ ) as client:
1025
+ try:
1026
+ response = await client.get(
1027
+ url,
1028
+ params=params # Pass query parameters here
1029
+ )
1030
+ response.raise_for_status()
1031
+ return response
1032
+ except httpx.TimeoutException:
1033
+ print("Request timed out.")
1034
+ raise
1035
+ except httpx.HTTPError as ex:
1036
+ print(f"HTTP error occurred: {ex}")
1037
+ raise httpx.HTTPError(ex) from ex
1038
+ except Exception as exc:
1039
+ print('EXC > ', exc)
1040
+ raise ComponentError(
1041
+ f"An error occurred: {exc}"
1042
+ ) from exc
1043
+
1044
+ @backoff.on_exception(
1045
+ backoff.expo,
1046
+ (httpx.HTTPStatusError, httpx.TimeoutException), # Catch HTTP errors and timeouts
1047
+ max_tries=3,
1048
+ max_time=120,
1049
+ jitter=backoff.full_jitter,
1050
+ on_backoff=lambda details: print(
1051
+ f"Retrying HTTP Get: attempt {details['tries']} after {details['wait']:0.2f}s"
1052
+ ),
1053
+ )
1054
+ async def _post(
1055
+ self,
1056
+ url: str,
1057
+ cookies: httpx.Cookies,
1058
+ params: Dict[str, Any] = None,
1059
+ headers: Dict[str, str] = None,
1060
+ data: Dict[str, str] = None,
1061
+ follow_redirects: bool = True,
1062
+ raise_for_status: bool = True,
1063
+ use_proxy: bool = True,
1064
+ free_proxy: bool = False,
1065
+ ) -> Dict[str, Any]:
1066
+ """
1067
+ Make an asynchronous HTTP POST request, returning the response object.
1068
+
1069
+ Args:
1070
+ url (str): The URL to send the POST request to.
1071
+ cookies (httpx.Cookies): Cookies to include in the request.
1072
+ params (dict): Dictionary of query parameters to include in the URL.
1073
+
1074
+ Returns:
1075
+ Response: The response object from the httpx.
1076
+ """
1077
+ proxies = None
1078
+ if use_proxy is True:
1079
+ self._proxies = await self.get_proxies()
1080
+ if len(self._proxies) == 1:
1081
+ proxies = self._proxies[0]
1082
+ if not proxies.startswith('http'):
1083
+ proxies = f"http://{proxies}"
1084
+ elif len(self._proxies) > 1:
1085
+ proxy = random.choice(self._proxies)
1086
+ if not proxy.startswith('http'):
1087
+ proxy = f"http://{proxy}"
1088
+ proxies = {
1089
+ "http://": httpx.AsyncHTTPTransport(
1090
+ proxy=f"http://{proxy}"
1091
+ ),
1092
+ "https://": httpx.AsyncHTTPTransport(
1093
+ proxy=f"http://{proxy}"
1094
+ ),
1095
+ }
1096
+ else:
1097
+ self._proxies = None
1098
+
1099
+ # Define custom SSL context
1100
+ ssl_context = ssl.create_default_context()
1101
+ # Disable older protocols if needed
1102
+ ssl_context.options |= ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1
1103
+ # Ensure at least TLS 1.2 is used
1104
+ ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
1105
+ ssl_context.check_hostname = False
1106
+ ssl_context.verify_mode = ssl.CERT_NONE
1107
+
1108
+ # Use AsyncHTTPTransport to pass in SSL context explicitly
1109
+ transport = httpx.AsyncHTTPTransport(retries=2, verify=ssl_context)
1110
+ timeout = httpx.Timeout(connect=5.0, read=20.0, write=5.0, pool=20.0)
1111
+
1112
+ async with httpx.AsyncClient(
1113
+ cookies=cookies,
1114
+ proxy=proxies or None,
1115
+ transport=transport,
1116
+ headers=headers,
1117
+ timeout=timeout
1118
+ ) as client:
1119
+ try:
1120
+ response = await client.post(
1121
+ url,
1122
+ data=data,
1123
+ params=params,
1124
+ follow_redirects=follow_redirects
1125
+ )
1126
+ if raise_for_status:
1127
+ response.raise_for_status()
1128
+ return response
1129
+ except httpx.TimeoutException:
1130
+ print("Request timed out.")
1131
+ raise
1132
+ except httpx.HTTPError as ex:
1133
+ print(f"HTTP error occurred: {ex}")
1134
+ raise httpx.HTTPError(ex) from ex
1135
+ except Exception as exc:
1136
+ print('EXC > ', exc)
1137
+ raise ComponentError(
1138
+ f"An error occurred: {exc}"
1139
+ ) from exc
1140
+
1141
+ @backoff.on_exception(
1142
+ backoff.expo,
1143
+ (httpx.HTTPStatusError, httpx.TimeoutException), # Catch HTTP errors and timeouts
1144
+ max_tries=3,
1145
+ max_time=120,
1146
+ jitter=backoff.full_jitter,
1147
+ on_backoff=lambda details: print(
1148
+ f"Retrying HTTP Get: attempt {details['tries']} after {details['wait']:0.2f}s"
1149
+ ),
1150
+ )
1151
+ async def api_get(
1152
+ self,
1153
+ url: str,
1154
+ cookies: httpx.Cookies = None,
1155
+ params: Dict[str, Any] = None,
1156
+ headers: Dict[str, str] = None,
1157
+ use_proxy: bool = True,
1158
+ free_proxy: bool = False,
1159
+ ) -> Dict[str, Any]:
1160
+ """
1161
+ Make an asynchronous HTTP GET request.
1162
+
1163
+ Args:
1164
+ url (str): The URL to send the GET request to.
1165
+ cookies (httpx.Cookies): Cookies to include in the request.
1166
+ params (dict): Dictionary of query parameters to include in the URL.
1167
+
1168
+ Returns:
1169
+ dict: The JSON response from the API if the request is successful.
1170
+ Returns an empty dictionary if the request fails.
1171
+ """
1172
+ proxies = None
1173
+ if use_proxy is True:
1174
+ self._proxies = await self.get_proxies()
1175
+ if len(self._proxies) == 1:
1176
+ proxies = self._proxies[0]
1177
+ if not proxies.startswith('http'):
1178
+ proxies = f"http://{proxies}"
1179
+ elif len(self._proxies) > 1:
1180
+ proxy = random.choice(self._proxies)
1181
+ if not proxy.startswith('http'):
1182
+ proxy = f"http://{proxy}"
1183
+ proxies = {
1184
+ "http://": httpx.AsyncHTTPTransport(
1185
+ proxy=f"http://{proxy}"
1186
+ ),
1187
+ "https://": httpx.AsyncHTTPTransport(
1188
+ proxy=f"http://{proxy}"
1189
+ ),
1190
+ }
1191
+ else:
1192
+ self._proxies = None
1193
+
1194
+ # Define custom SSL context
1195
+ ssl_context = ssl.create_default_context()
1196
+ # Disable older protocols if needed
1197
+ ssl_context.options |= ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1
1198
+ # Ensure at least TLS 1.2 is used
1199
+ ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
1200
+ ssl_context.check_hostname = False
1201
+ ssl_context.verify_mode = ssl.CERT_NONE
1202
+
1203
+ # Use AsyncHTTPTransport to pass in SSL context explicitly
1204
+ transport = httpx.AsyncHTTPTransport(retries=2, verify=ssl_context)
1205
+ timeout = httpx.Timeout(connect=5.0, read=20.0, write=5.0, pool=20.0)
1206
+
1207
+ async with httpx.AsyncClient(
1208
+ cookies=cookies,
1209
+ proxy=proxies or None,
1210
+ transport=transport,
1211
+ headers=headers,
1212
+ timeout=timeout
1213
+ ) as client:
1214
+ try:
1215
+ response = await client.get(
1216
+ url,
1217
+ params=params
1218
+ )
1219
+ response.raise_for_status()
1220
+ if response.status_code == 200:
1221
+ return response.json()
1222
+ else:
1223
+ print(
1224
+ f"API request failed with status code {response.status_code}"
1225
+ )
1226
+ return {}
1227
+ except httpx.TimeoutException:
1228
+ print("Request timed out.")
1229
+ raise
1230
+ except httpx.HTTPError as ex:
1231
+ print(f"HTTP error occurred: {ex}")
1232
+ raise httpx.HTTPError(ex) from ex
1233
+ except Exception as exc:
1234
+ print('EXC > ', exc)
1235
+ raise ComponentError(
1236
+ f"An error occurred: {exc}"
1237
+ ) from exc
1238
+
1239
+ @backoff.on_exception(
1240
+ backoff.expo,
1241
+ (httpx.HTTPStatusError, httpx.TimeoutException), # Catch HTTP errors and timeouts
1242
+ max_tries=3,
1243
+ max_time=120,
1244
+ jitter=backoff.full_jitter,
1245
+ on_backoff=lambda details: print(
1246
+ f"Retrying HTTP Get: attempt {details['tries']} after {details['wait']:0.2f}s"
1247
+ ),
1248
+ )
1249
+ async def api_post(
1250
+ self,
1251
+ url: str,
1252
+ payload: Dict,
1253
+ cookies: httpx.Cookies = None,
1254
+ use_proxy: bool = True,
1255
+ free_proxy: bool = False,
1256
+ full_response: bool = False
1257
+ ) -> Dict[str, Any]:
1258
+ proxies = None
1259
+ if use_proxy is True:
1260
+ self._proxies = await self.get_proxies()
1261
+ if len(self._proxies) == 1:
1262
+ proxies = self._proxies[0]
1263
+ if not proxies.startswith('http'):
1264
+ proxies = f"http://{proxies}"
1265
+ elif len(self._proxies) > 1:
1266
+ proxy = random.choice(self._proxies)
1267
+ if not proxy.startswith('http'):
1268
+ proxy = f"http://{proxy}"
1269
+ proxies = {
1270
+ "http://": httpx.AsyncHTTPTransport(
1271
+ proxy=f"http://{proxy}"
1272
+ ),
1273
+ "https://": httpx.AsyncHTTPTransport(
1274
+ proxy=f"http://{proxy}"
1275
+ ),
1276
+ }
1277
+ else:
1278
+ self._proxies = None
1279
+ # Define custom SSL context
1280
+ ssl_context = ssl.create_default_context()
1281
+ # Disable older protocols if needed
1282
+ ssl_context.options |= ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1
1283
+ # Ensure at least TLS 1.2 is used
1284
+ ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
1285
+ ssl_context.check_hostname = False
1286
+ ssl_context.verify_mode = ssl.CERT_NONE
1287
+
1288
+ # Use AsyncHTTPTransport to pass in SSL context explicitly
1289
+ transport = httpx.AsyncHTTPTransport(retries=2, verify=ssl_context)
1290
+ timeout = httpx.Timeout(connect=5.0, read=20.0, write=5.0, pool=20.0)
1291
+
1292
+ async with httpx.AsyncClient(
1293
+ cookies=cookies,
1294
+ proxy=proxies,
1295
+ transport=transport
1296
+ ) as client:
1297
+ try:
1298
+ response = await client.post(
1299
+ url,
1300
+ json=payload,
1301
+ headers=self.headers,
1302
+ timeout=timeout
1303
+ )
1304
+ response.raise_for_status()
1305
+ if full_response:
1306
+ return response
1307
+ if response.status_code == 200:
1308
+ return response.json()
1309
+ else:
1310
+ print(
1311
+ f"API request failed with status code {response.status_code}"
1312
+ )
1313
+ return {}
1314
+ except httpx.TimeoutException:
1315
+ raise
1316
+ except (httpx.HTTPError) as ex:
1317
+ raise httpx.HTTPError(ex)
1318
+ except Exception as exc:
1319
+ print('EXC > ', exc)
1320
+ raise ComponentError(
1321
+ f"An error occurred: {exc}"
1322
+ )
1323
+
1324
+ @backoff.on_exception(
1325
+ backoff.expo,
1326
+ (RatelimitException, TimeoutException, DuckDuckGoSearchException),
1327
+ max_tries=5,
1328
+ max_time=120, # Extended max time to allow sufficient retries
1329
+ jitter=backoff.full_jitter, # Introduces randomization in retry timing
1330
+ on_backoff=lambda details: print(
1331
+ f"Retrying DuckDuckGo search: attempt {details['tries']} after {details['wait']:0.2f}s"
1332
+ ),
1333
+ )
1334
+ async def _search_duckduckgo(
1335
+ self,
1336
+ query: str,
1337
+ max_results: int = 5,
1338
+ use_proxy: bool = True,
1339
+ timeout: int = 20,
1340
+ headers: dict = None,
1341
+ region: str = "wt-wt",
1342
+ backend: str = 'lite'
1343
+ ):
1344
+ """
1345
+ Search DuckDuckGo for a given query.
1346
+
1347
+ Args:
1348
+ query (str): The search query.
1349
+ max_results (int): The maximum number of results to return.
1350
+ use_proxy (bool): Whether to use a proxy for the search.
1351
+
1352
+ Returns:
1353
+ list: A list of search results.
1354
+ """
1355
+ proxies = None
1356
+ if use_proxy is True:
1357
+ self._proxies = await self.get_proxies()
1358
+ if len(self._proxies) == 1:
1359
+ proxies = self._proxies[0]
1360
+ if not proxies.startswith('http'):
1361
+ proxies = f"http://{proxies}"
1362
+ elif len(self._proxies) > 1:
1363
+ proxy = random.choice(self._proxies)
1364
+ if not proxy.startswith('http'):
1365
+ proxy = f"http://{proxy}"
1366
+ proxies = {
1367
+ "http://": httpx.AsyncHTTPTransport(
1368
+ proxy=f"http://{proxy}"
1369
+ ),
1370
+ "https://": httpx.AsyncHTTPTransport(
1371
+ proxy=f"http://{proxy}"
1372
+ ),
1373
+ }
1374
+ else:
1375
+ self._proxies = None
1376
+ if headers is None:
1377
+ headers = {}
1378
+ headers = {**self.headers, **headers}
1379
+ headers["User-Agent"] = random.choice(ua)
1380
+ try:
1381
+ with DDGS(
1382
+ headers=headers,
1383
+ proxy=proxies,
1384
+ timeout=timeout,
1385
+ verify=False
1386
+ ) as search:
1387
+ # 🐵 Monkey Patching Primp Client to avoid Rate-Limits issues:
1388
+ search.client = primp.Client(
1389
+ headers=search.headers,
1390
+ proxy=proxies,
1391
+ timeout=timeout,
1392
+ cookie_store=False, # 🚀 Disable cookie persistence dynamically
1393
+ referer=True,
1394
+ impersonate=random.choice(DDGS._impersonates),
1395
+ impersonate_os=random.choice(DDGS._impersonates_os),
1396
+ follow_redirects=False,
1397
+ verify=False,
1398
+ )
1399
+ return search.text(
1400
+ keywords=query,
1401
+ timelimit=timeout,
1402
+ max_results=max_results,
1403
+ backend=backend,
1404
+ region=region
1405
+ )
1406
+ except DuckDuckGoSearchException as e:
1407
+ raise RatelimitException(
1408
+ f"Error on DuckDuckGo Search: {e}"
1409
+ ) from e
1410
+ except Exception as e:
1411
+ raise RuntimeError(
1412
+ f"DuckDuckGo Error: {e}"
1413
+ ) from e
1414
+
1415
+ @backoff.on_exception(
1416
+ backoff.expo,
1417
+ (httpx.HTTPStatusError, httpx.TimeoutException, httpx.RemoteProtocolError), # Catch HTTP errors and timeouts
1418
+ max_tries=5,
1419
+ max_time=120,
1420
+ jitter=backoff.full_jitter,
1421
+ on_backoff=lambda details: print(
1422
+ f"Retrying Google Search: attempt {details['tries']} after {details['wait']:0.2f}s"
1423
+ ),
1424
+ )
1425
+ async def _search_google(
1426
+ self,
1427
+ query: str,
1428
+ exact_term: str = None,
1429
+ max_results: int = 5,
1430
+ use_proxy: bool = True,
1431
+ timeout: int = 20,
1432
+ headers: dict = None,
1433
+ region: str = None,
1434
+ country: str = None,
1435
+ language: str = None,
1436
+ use_primp: bool = False,
1437
+ **kwargs
1438
+ ):
1439
+ if headers:
1440
+ headers = {
1441
+ **self.headers,
1442
+ **headers,
1443
+ "Referer": "https://www.google.com/",
1444
+ }
1445
+ proxies = None
1446
+ if use_proxy is True:
1447
+ self._proxies = await self.get_proxies()
1448
+ if len(self._proxies) == 1:
1449
+ proxies = self._proxies[0]
1450
+ if not proxies.startswith('http'):
1451
+ proxies = f"http://{proxies}"
1452
+ elif len(self._proxies) > 1:
1453
+ proxy = random.choice(self._proxies)
1454
+ if not proxy.startswith('http'):
1455
+ proxy = f"http://{proxy}"
1456
+ proxies = {
1457
+ "http://": httpx.AsyncHTTPTransport(
1458
+ proxy=f"http://{proxy}"
1459
+ ),
1460
+ "https://": httpx.AsyncHTTPTransport(
1461
+ proxy=f"http://{proxy}"
1462
+ ),
1463
+ }
1464
+ else:
1465
+ self._proxies = None
1466
+ args = {
1467
+ "q": query,
1468
+ "cx": str(GOOGLE_SEARCH_ENGINE_ID),
1469
+ "num": str(max_results),
1470
+ "key": str(self.google_api_key),
1471
+ "hl": "en", # UI language in English
1472
+ "sort": "date", # Prefer newer content
1473
+ }
1474
+ if region:
1475
+ args["gl"] = region # Geolocation
1476
+ if country:
1477
+ args["cr"] = country # Country restriction
1478
+ if language:
1479
+ args["hl"] = language # Language preference
1480
+ if exact_term:
1481
+ args["exactTerms"] = exact_term
1482
+ if use_primp:
1483
+ # Use Primp Client instead httpx:
1484
+ client = primp.Client(
1485
+ headers=headers,
1486
+ proxy=proxies, # Use proxy if enabled
1487
+ timeout=timeout,
1488
+ cookie_store=False, # 🚀 Disable cookie persistence
1489
+ referer=True,
1490
+ impersonate=random.choice(impersonates),
1491
+ impersonate_os=random.choice(impersonates_os),
1492
+ follow_redirects=True,
1493
+ verify=False
1494
+ )
1495
+ try:
1496
+ query = quote(query)
1497
+ search_url = f"https://cse.google.com/cse?cx={GOOGLE_SEARCH_ENGINE_ID}#gsc.tab=0&gsc.q={query}&gsc.sort=" # noqa
1498
+ response = client.get(
1499
+ search_url,
1500
+ **kwargs
1501
+ )
1502
+ if response.status_code != 200:
1503
+ raise RuntimeError(
1504
+ f"Google Search API failed with status {response.status_code}: {response.text}"
1505
+ )
1506
+ return self._parse_google_cse_results(response.text, max_results)
1507
+ except Exception as e:
1508
+ print(f"Unexpected error: {e}")
1509
+ raise RuntimeError(
1510
+ f"Primp Unexpected error: {e}"
1511
+ ) from e
1512
+ else:
1513
+ t = httpx.Timeout(timeout, connect=5.0, read=20.0, write=5.0, pool=20.0)
1514
+ async with httpx.AsyncClient(
1515
+ proxy=proxies,
1516
+ timeout=t,
1517
+ ) as client:
1518
+ try:
1519
+ response = await client.get(
1520
+ "https://customsearch.googleapis.com/customsearch/v1",
1521
+ headers=headers,
1522
+ params=args,
1523
+ **kwargs
1524
+ )
1525
+ response.raise_for_status()
1526
+ if response.status_code == 200:
1527
+ return response.json()
1528
+ else:
1529
+ raise RuntimeError(
1530
+ f"Google Search API failed: {response.text}, status: {response.status_code}"
1531
+ )
1532
+ except httpx.HTTPStatusError as e:
1533
+ print(f"Search Google: HTTP error: {e.response.status_code} - {e.response.text}")
1534
+ raise
1535
+ except httpx.TimeoutException:
1536
+ print("Search Google: Request timed out")
1537
+ raise
1538
+ except httpx.RemoteProtocolError: # ✅ Catch server disconnection error
1539
+ print("Search Google: Server disconnected. Retrying with redirect enabled...")
1540
+ raise
1541
+ except Exception as e:
1542
+ print(f"Search Google: Unexpected error: {e}")
1543
+ raise
1544
+
1545
+ def get_httpx_cookies(self, domain: str = None, path: str = '/', cookies: dict = None):
1546
+ httpx_cookies = httpx.Cookies()
1547
+ if cookies is None:
1548
+ cookies = {}
1549
+ for key, value in cookies.items():
1550
+ httpx_cookies.set(
1551
+ key, value,
1552
+ domain=domain,
1553
+ path=path
1554
+ )
1555
+ return httpx_cookies
1556
+
1557
+ def _parse_google_cse_results(self, html_content: str, max_results: int):
1558
+ """
1559
+ Extracts search results from the rendered HTML of `cse.google.com/cse`.
1560
+
1561
+ Args:
1562
+ html_content (str): The HTML response from the search.
1563
+ max_results (int): Max number of results to return.
1564
+
1565
+ Returns:
1566
+ list: List of extracted URLs and titles.
1567
+ """
1568
+ soup = bs(html_content, "html.parser")
1569
+ results = []
1570
+
1571
+ print('CONTENT > ', html_content)
1572
+
1573
+ # Extract results from the dynamically loaded content
1574
+ for item in soup.select(".gsc-webResult")[:max_results]: # Adjust this selector if necessary
1575
+ title_tag = item.select_one(".gs-title")
1576
+ url_tag = item.select_one(".gs-title a")
1577
+
1578
+ if title_tag and url_tag:
1579
+ title = title_tag.get_text(strip=True)
1580
+ url = url_tag["href"]
1581
+ results.append({"title": title, "url": url})
1582
+
1583
+ return results
1584
+ @backoff.on_exception(
1585
+ backoff.expo,
1586
+ (httpx.HTTPStatusError, httpx.TimeoutException), # Catch HTTP errors and timeouts
1587
+ max_tries=3,
1588
+ max_time=120,
1589
+ jitter=backoff.full_jitter,
1590
+ on_backoff=lambda details: logging.warning(
1591
+ f"Retrying HTTP Get: attempt {details['tries']} after {details['wait']:0.2f}s"
1592
+ ),
1593
+ giveup=lambda e: isinstance(e, httpx.HTTPStatusError) and e.response.status_code not in [429, 500, 502, 503, 504] # pylint: disable=C0301 # noqa
1594
+ )
1595
+ async def _request(
1596
+ self,
1597
+ url: str,
1598
+ method: str = 'get',
1599
+ cookies: Optional[httpx.Cookies] = None,
1600
+ params: Optional[Dict[str, Any]] = None,
1601
+ data: Optional[Dict[str, Any]] = None,
1602
+ headers: Optional[Dict[str, str]] = None,
1603
+ timeout: Union[int, float] = 30.0,
1604
+ use_proxy: bool = True,
1605
+ free_proxy: bool = False,
1606
+ use_ssl: bool = True,
1607
+ use_json: bool = False,
1608
+ follow_redirects: bool = True,
1609
+ raise_for_status: bool = True,
1610
+ full_response: bool = False,
1611
+ connect_timeout: Union[int, float] = 5.0,
1612
+ read_timeout: Union[int, float] = 20.0,
1613
+ write_timeout: Union[int, float] = 5.0,
1614
+ pool_timeout: Union[int, float] = 20.0,
1615
+ num_retries: int = 2,
1616
+ **kwargs
1617
+ ) -> Dict[str, Any]:
1618
+ """
1619
+ Make an asynchronous HTTPx request, returning the response object.
1620
+
1621
+ Args:
1622
+ url (str): The URL to send the request to.
1623
+ method (str): The HTTP method to use (default: 'get').
1624
+ headers (dict, optional): Dictionary of HTTP headers to include in the request.
1625
+ cookies (httpx.Cookies, optional): Cookies to include in the request.
1626
+ params (dict, optional): Dictionary of query parameters to include in the URL.
1627
+ data (dict, optional): Dictionary of data to send in the request body.
1628
+ timeout (float, optional): Total timeout for the request in seconds.
1629
+ use_proxy (bool): Whether to use a proxy for the request.
1630
+ free_proxy (bool): Whether to use a free proxy.
1631
+ use_ssl (bool): Whether to use SSL for the request.
1632
+ use_json (bool): Whether to send data as JSON.
1633
+ follow_redirects (bool): Whether to follow redirects.
1634
+ raise_for_status (bool): Whether to raise an exception for HTTP errors.
1635
+ full_response (bool): Whether to return the full response object.
1636
+ connect_timeout (float): Timeout for connecting to the server.
1637
+ read_timeout (float): Timeout for reading from the server.
1638
+ write_timeout (float): Timeout for writing to the server.
1639
+ pool_timeout (float): Timeout for connection pool operations.
1640
+ num_retries (int): Number of retries to attempt at the transport level.
1641
+ **kwargs: Additional arguments to pass to httpx.AsyncClient.
1642
+
1643
+ Returns:
1644
+ Tuple[Any, Optional[Dict[str, Any]]]: A tuple containing the result and any error information.
1645
+
1646
+ Raises:
1647
+ httpx.TimeoutException: When the request times out.
1648
+ httpx.TooManyRedirects: When too many redirects are encountered.
1649
+ httpx.HTTPStatusError: When an HTTP error status is encountered.
1650
+ httpx.HTTPError: When an HTTP-related error occurs.
1651
+ AttributeError: When the HTTP method is invalid.
1652
+ RuntimeError: When an unknown error occurs.
1653
+ """
1654
+ proxies = None
1655
+ if use_proxy is True:
1656
+ self._proxies = await self.get_proxies()
1657
+ if len(self._proxies) == 1:
1658
+ proxies = self._proxies[0]
1659
+ if not proxies.startswith('http'):
1660
+ proxies = f"http://{proxies}"
1661
+ elif len(self._proxies) > 1:
1662
+ proxy = random.choice(self._proxies)
1663
+ if not proxy.startswith('http'):
1664
+ proxy = f"http://{proxy}"
1665
+ proxies = {
1666
+ "http://": httpx.AsyncHTTPTransport(
1667
+ proxy=f"http://{proxy}"
1668
+ ),
1669
+ "https://": httpx.AsyncHTTPTransport(
1670
+ proxy=f"http://{proxy}"
1671
+ ),
1672
+ }
1673
+ else:
1674
+ self._proxies = None
1675
+
1676
+ ssl_context = None
1677
+ if use_ssl:
1678
+ # Define custom SSL context
1679
+ ssl_context = ssl.create_default_context()
1680
+ # Disable older protocols if needed
1681
+ ssl_context.options |= ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1
1682
+ # Ensure at least TLS 1.2 is used
1683
+ ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
1684
+ # Make this configurable rather than hardcoded to CERT_NONE
1685
+ if kwargs.get('verify_ssl', True):
1686
+ ssl_context.check_hostname = True
1687
+ ssl_context.verify_mode = ssl.CERT_REQUIRED
1688
+ else:
1689
+ ssl_context.check_hostname = False
1690
+ ssl_context.verify_mode = ssl.CERT_NONE
1691
+
1692
+ # Use AsyncHTTPTransport to pass in SSL context explicitly
1693
+ transport_options = {
1694
+ 'retries': num_retries,
1695
+ 'verify': ssl_context
1696
+ }
1697
+ if 'transport_options' in kwargs:
1698
+ transport_options.update(kwargs.pop('transport_options'))
1699
+ transport = httpx.AsyncHTTPTransport(
1700
+ **transport_options
1701
+ )
1702
+ timeout = httpx.Timeout(
1703
+ timeout=timeout,
1704
+ connect=connect_timeout,
1705
+ read=read_timeout,
1706
+ write=write_timeout,
1707
+ pool=pool_timeout
1708
+ )
1709
+ method = method.upper()
1710
+ if method not in valid_methods:
1711
+ raise ValueError(
1712
+ f"Invalid HTTP method: {method}. Must be one of {valid_methods}"
1713
+ )
1714
+ async with httpx.AsyncClient(
1715
+ cookies=cookies,
1716
+ proxy=proxies or None,
1717
+ transport=transport,
1718
+ headers=headers,
1719
+ timeout=timeout,
1720
+ http2=kwargs.get('use_http2', True),
1721
+ follow_redirects=follow_redirects,
1722
+ **kwargs
1723
+ ) as client:
1724
+ try:
1725
+ args = {
1726
+ "method": method.upper(),
1727
+ "url": url,
1728
+ "follow_redirects": follow_redirects
1729
+ }
1730
+ if data:
1731
+ if use_json:
1732
+ args["json"] = data
1733
+ else:
1734
+ args["data"] = data
1735
+ if params:
1736
+ args["params"] = params
1737
+ if self._httpclient:
1738
+ # keep session alive.
1739
+ response = await client.request(
1740
+ **args
1741
+ )
1742
+ else:
1743
+ response = await client.request(**args)
1744
+ if raise_for_status:
1745
+ response.raise_for_status()
1746
+ if full_response:
1747
+ if self.logger.isEnabledFor(logging.DEBUG):
1748
+ self.logger.debug(
1749
+ f"Response from {url}: status={response.status_code}, headers={response.headers}"
1750
+ )
1751
+ return response, None
1752
+ result, error = await self.process_response(
1753
+ response,
1754
+ url,
1755
+ download=kwargs.get('download', False),
1756
+ filename=kwargs.get('filename', None)
1757
+ )
1758
+ return result, error
1759
+ except httpx.TimeoutException:
1760
+ self.logger.error("Request timed out.")
1761
+ raise
1762
+ except httpx.TooManyRedirects:
1763
+ self.logger.error("Too many redirects.")
1764
+ raise
1765
+ except httpx.HTTPStatusError as ex:
1766
+ self.logger.error(
1767
+ f"HTTP status error occurred: {ex.response.status_code} - {ex}"
1768
+ )
1769
+ raise
1770
+ except httpx.HTTPError as ex:
1771
+ self.logger.error(f"HTTP error occurred: {ex}")
1772
+ raise
1773
+ except AttributeError as e:
1774
+ self.logger.error(f"HTTPx Client doesn't have attribute {method}: {e}")
1775
+ raise
1776
+ except Exception as exc:
1777
+ self.logger.error(f'Unknown Error > {exc}')
1778
+ raise RuntimeError(
1779
+ f"An error occurred: {exc}"
1780
+ ) from exc