flowtask 5.8.4__cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (470) hide show
  1. flowtask/__init__.py +93 -0
  2. flowtask/__main__.py +38 -0
  3. flowtask/bots/__init__.py +6 -0
  4. flowtask/bots/check.py +93 -0
  5. flowtask/bots/codebot.py +51 -0
  6. flowtask/components/ASPX.py +148 -0
  7. flowtask/components/AddDataset.py +352 -0
  8. flowtask/components/Amazon.py +523 -0
  9. flowtask/components/AutoTask.py +314 -0
  10. flowtask/components/Azure.py +80 -0
  11. flowtask/components/AzureUsers.py +106 -0
  12. flowtask/components/BaseAction.py +91 -0
  13. flowtask/components/BaseLoop.py +198 -0
  14. flowtask/components/BestBuy.py +800 -0
  15. flowtask/components/CSVToGCS.py +120 -0
  16. flowtask/components/CompanyScraper/__init__.py +1 -0
  17. flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
  18. flowtask/components/CompanyScraper/parsers/base.py +102 -0
  19. flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
  20. flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
  21. flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
  22. flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
  23. flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
  24. flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
  25. flowtask/components/CompanyScraper/scrapper.py +1054 -0
  26. flowtask/components/CopyTo.py +177 -0
  27. flowtask/components/CopyToBigQuery.py +243 -0
  28. flowtask/components/CopyToMongoDB.py +291 -0
  29. flowtask/components/CopyToPg.py +609 -0
  30. flowtask/components/CopyToRethink.py +207 -0
  31. flowtask/components/CreateGCSBucket.py +102 -0
  32. flowtask/components/CreateReport/CreateReport.py +228 -0
  33. flowtask/components/CreateReport/__init__.py +9 -0
  34. flowtask/components/CreateReport/charts/__init__.py +15 -0
  35. flowtask/components/CreateReport/charts/bar.py +51 -0
  36. flowtask/components/CreateReport/charts/base.py +66 -0
  37. flowtask/components/CreateReport/charts/pie.py +64 -0
  38. flowtask/components/CreateReport/utils.py +9 -0
  39. flowtask/components/CustomerSatisfaction.py +196 -0
  40. flowtask/components/DataInput.py +200 -0
  41. flowtask/components/DateList.py +255 -0
  42. flowtask/components/DbClient.py +163 -0
  43. flowtask/components/DialPad.py +146 -0
  44. flowtask/components/DocumentDBQuery.py +200 -0
  45. flowtask/components/DownloadFrom.py +371 -0
  46. flowtask/components/DownloadFromD2L.py +113 -0
  47. flowtask/components/DownloadFromFTP.py +181 -0
  48. flowtask/components/DownloadFromIMAP.py +315 -0
  49. flowtask/components/DownloadFromS3.py +198 -0
  50. flowtask/components/DownloadFromSFTP.py +265 -0
  51. flowtask/components/DownloadFromSharepoint.py +110 -0
  52. flowtask/components/DownloadFromSmartSheet.py +114 -0
  53. flowtask/components/DownloadS3File.py +229 -0
  54. flowtask/components/Dummy.py +59 -0
  55. flowtask/components/DuplicatePhoto.py +411 -0
  56. flowtask/components/EmployeeEvaluation.py +237 -0
  57. flowtask/components/ExecuteSQL.py +323 -0
  58. flowtask/components/ExtractHTML.py +178 -0
  59. flowtask/components/FileBase.py +178 -0
  60. flowtask/components/FileCopy.py +181 -0
  61. flowtask/components/FileDelete.py +82 -0
  62. flowtask/components/FileExists.py +146 -0
  63. flowtask/components/FileIteratorDelete.py +112 -0
  64. flowtask/components/FileList.py +194 -0
  65. flowtask/components/FileOpen.py +75 -0
  66. flowtask/components/FileRead.py +120 -0
  67. flowtask/components/FileRename.py +106 -0
  68. flowtask/components/FilterIf.py +284 -0
  69. flowtask/components/FilterRows/FilterRows.py +200 -0
  70. flowtask/components/FilterRows/__init__.py +10 -0
  71. flowtask/components/FilterRows/functions.py +4 -0
  72. flowtask/components/GCSToBigQuery.py +103 -0
  73. flowtask/components/GoogleA4.py +150 -0
  74. flowtask/components/GoogleGeoCoding.py +344 -0
  75. flowtask/components/GooglePlaces.py +315 -0
  76. flowtask/components/GoogleSearch.py +539 -0
  77. flowtask/components/HTTPClient.py +268 -0
  78. flowtask/components/ICIMS.py +146 -0
  79. flowtask/components/IF.py +179 -0
  80. flowtask/components/IcimsFolderCopy.py +173 -0
  81. flowtask/components/ImageFeatures/__init__.py +5 -0
  82. flowtask/components/ImageFeatures/process.py +233 -0
  83. flowtask/components/IteratorBase.py +251 -0
  84. flowtask/components/LangchainLoader/__init__.py +5 -0
  85. flowtask/components/LangchainLoader/loader.py +194 -0
  86. flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
  87. flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
  88. flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
  89. flowtask/components/LangchainLoader/loaders/docx.py +91 -0
  90. flowtask/components/LangchainLoader/loaders/html.py +119 -0
  91. flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
  92. flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
  93. flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
  94. flowtask/components/LangchainLoader/loaders/qa.py +67 -0
  95. flowtask/components/LangchainLoader/loaders/txt.py +55 -0
  96. flowtask/components/LeadIQ.py +650 -0
  97. flowtask/components/Loop.py +253 -0
  98. flowtask/components/Lowes.py +334 -0
  99. flowtask/components/MS365Usage.py +156 -0
  100. flowtask/components/MSTeamsMessages.py +320 -0
  101. flowtask/components/MarketClustering.py +1051 -0
  102. flowtask/components/MergeFiles.py +362 -0
  103. flowtask/components/MilvusOutput.py +87 -0
  104. flowtask/components/NearByStores.py +175 -0
  105. flowtask/components/NetworkNinja/__init__.py +6 -0
  106. flowtask/components/NetworkNinja/models/__init__.py +52 -0
  107. flowtask/components/NetworkNinja/models/abstract.py +177 -0
  108. flowtask/components/NetworkNinja/models/account.py +39 -0
  109. flowtask/components/NetworkNinja/models/client.py +19 -0
  110. flowtask/components/NetworkNinja/models/district.py +14 -0
  111. flowtask/components/NetworkNinja/models/events.py +101 -0
  112. flowtask/components/NetworkNinja/models/forms.py +499 -0
  113. flowtask/components/NetworkNinja/models/market.py +16 -0
  114. flowtask/components/NetworkNinja/models/organization.py +34 -0
  115. flowtask/components/NetworkNinja/models/photos.py +125 -0
  116. flowtask/components/NetworkNinja/models/project.py +44 -0
  117. flowtask/components/NetworkNinja/models/region.py +28 -0
  118. flowtask/components/NetworkNinja/models/store.py +203 -0
  119. flowtask/components/NetworkNinja/models/user.py +151 -0
  120. flowtask/components/NetworkNinja/router.py +854 -0
  121. flowtask/components/Odoo.py +175 -0
  122. flowtask/components/OdooInjector.py +192 -0
  123. flowtask/components/OpenFromXML.py +126 -0
  124. flowtask/components/OpenWeather.py +41 -0
  125. flowtask/components/OpenWithBase.py +616 -0
  126. flowtask/components/OpenWithPandas.py +715 -0
  127. flowtask/components/PGPDecrypt.py +199 -0
  128. flowtask/components/PandasIterator.py +187 -0
  129. flowtask/components/PandasToFile.py +189 -0
  130. flowtask/components/Paradox.py +339 -0
  131. flowtask/components/ParamIterator.py +117 -0
  132. flowtask/components/ParseHTML.py +84 -0
  133. flowtask/components/PlacerStores.py +249 -0
  134. flowtask/components/Pokemon.py +507 -0
  135. flowtask/components/PositiveBot.py +62 -0
  136. flowtask/components/PowerPointSlide.py +400 -0
  137. flowtask/components/PrintMessage.py +127 -0
  138. flowtask/components/ProductCompetitors/__init__.py +5 -0
  139. flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
  140. flowtask/components/ProductCompetitors/parsers/base.py +72 -0
  141. flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
  142. flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
  143. flowtask/components/ProductCompetitors/scrapper.py +155 -0
  144. flowtask/components/ProductCompliant.py +169 -0
  145. flowtask/components/ProductInfo/__init__.py +1 -0
  146. flowtask/components/ProductInfo/parsers/__init__.py +5 -0
  147. flowtask/components/ProductInfo/parsers/base.py +83 -0
  148. flowtask/components/ProductInfo/parsers/brother.py +97 -0
  149. flowtask/components/ProductInfo/parsers/canon.py +167 -0
  150. flowtask/components/ProductInfo/parsers/epson.py +118 -0
  151. flowtask/components/ProductInfo/parsers/hp.py +131 -0
  152. flowtask/components/ProductInfo/parsers/samsung.py +97 -0
  153. flowtask/components/ProductInfo/scraper.py +319 -0
  154. flowtask/components/ProductPricing.py +118 -0
  155. flowtask/components/QS.py +261 -0
  156. flowtask/components/QSBase.py +201 -0
  157. flowtask/components/QueryIterator.py +273 -0
  158. flowtask/components/QueryToInsert.py +327 -0
  159. flowtask/components/QueryToPandas.py +432 -0
  160. flowtask/components/RESTClient.py +195 -0
  161. flowtask/components/RethinkDBQuery.py +189 -0
  162. flowtask/components/Rsync.py +74 -0
  163. flowtask/components/RunSSH.py +59 -0
  164. flowtask/components/RunShell.py +71 -0
  165. flowtask/components/SalesForce.py +20 -0
  166. flowtask/components/SaveImageBank/__init__.py +257 -0
  167. flowtask/components/SchedulingVisits.py +592 -0
  168. flowtask/components/ScrapPage.py +216 -0
  169. flowtask/components/ScrapSearch.py +79 -0
  170. flowtask/components/SendNotify.py +257 -0
  171. flowtask/components/SentimentAnalysis.py +694 -0
  172. flowtask/components/ServiceScrapper/__init__.py +5 -0
  173. flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
  174. flowtask/components/ServiceScrapper/parsers/base.py +94 -0
  175. flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
  176. flowtask/components/ServiceScrapper/scrapper.py +199 -0
  177. flowtask/components/SetVariables.py +156 -0
  178. flowtask/components/SubTask.py +182 -0
  179. flowtask/components/SuiteCRM.py +48 -0
  180. flowtask/components/Switch.py +175 -0
  181. flowtask/components/TableBase.py +148 -0
  182. flowtask/components/TableDelete.py +312 -0
  183. flowtask/components/TableInput.py +143 -0
  184. flowtask/components/TableOutput/TableOutput.py +384 -0
  185. flowtask/components/TableOutput/__init__.py +3 -0
  186. flowtask/components/TableSchema.py +534 -0
  187. flowtask/components/Target.py +223 -0
  188. flowtask/components/ThumbnailGenerator.py +156 -0
  189. flowtask/components/ToPandas.py +67 -0
  190. flowtask/components/TransformRows/TransformRows.py +507 -0
  191. flowtask/components/TransformRows/__init__.py +9 -0
  192. flowtask/components/TransformRows/functions.py +559 -0
  193. flowtask/components/TransposeRows.py +176 -0
  194. flowtask/components/UPCDatabase.py +86 -0
  195. flowtask/components/UnGzip.py +171 -0
  196. flowtask/components/Uncompress.py +172 -0
  197. flowtask/components/UniqueRows.py +126 -0
  198. flowtask/components/Unzip.py +107 -0
  199. flowtask/components/UpdateOperationalVars.py +147 -0
  200. flowtask/components/UploadTo.py +299 -0
  201. flowtask/components/UploadToS3.py +136 -0
  202. flowtask/components/UploadToSFTP.py +160 -0
  203. flowtask/components/UploadToSharepoint.py +205 -0
  204. flowtask/components/UserFunc.py +122 -0
  205. flowtask/components/VivaTracker.py +140 -0
  206. flowtask/components/WSDLClient.py +123 -0
  207. flowtask/components/Wait.py +18 -0
  208. flowtask/components/Walmart.py +199 -0
  209. flowtask/components/Workplace.py +134 -0
  210. flowtask/components/XMLToPandas.py +267 -0
  211. flowtask/components/Zammad/__init__.py +41 -0
  212. flowtask/components/Zammad/models.py +0 -0
  213. flowtask/components/ZoomInfoScraper.py +409 -0
  214. flowtask/components/__init__.py +104 -0
  215. flowtask/components/abstract.py +18 -0
  216. flowtask/components/flow.py +530 -0
  217. flowtask/components/google.py +335 -0
  218. flowtask/components/group.py +221 -0
  219. flowtask/components/py.typed +0 -0
  220. flowtask/components/reviewscrap.py +132 -0
  221. flowtask/components/tAutoincrement.py +117 -0
  222. flowtask/components/tConcat.py +109 -0
  223. flowtask/components/tExplode.py +119 -0
  224. flowtask/components/tFilter.py +184 -0
  225. flowtask/components/tGroup.py +236 -0
  226. flowtask/components/tJoin.py +270 -0
  227. flowtask/components/tMap/__init__.py +9 -0
  228. flowtask/components/tMap/functions.py +54 -0
  229. flowtask/components/tMap/tMap.py +450 -0
  230. flowtask/components/tMelt.py +112 -0
  231. flowtask/components/tMerge.py +114 -0
  232. flowtask/components/tOrder.py +93 -0
  233. flowtask/components/tPandas.py +94 -0
  234. flowtask/components/tPivot.py +71 -0
  235. flowtask/components/tPluckCols.py +76 -0
  236. flowtask/components/tUnnest.py +82 -0
  237. flowtask/components/user.py +401 -0
  238. flowtask/conf.py +457 -0
  239. flowtask/download.py +102 -0
  240. flowtask/events/__init__.py +11 -0
  241. flowtask/events/events/__init__.py +20 -0
  242. flowtask/events/events/abstract.py +95 -0
  243. flowtask/events/events/alerts/__init__.py +362 -0
  244. flowtask/events/events/alerts/colfunctions.py +131 -0
  245. flowtask/events/events/alerts/functions.py +158 -0
  246. flowtask/events/events/dummy.py +12 -0
  247. flowtask/events/events/exec.py +124 -0
  248. flowtask/events/events/file/__init__.py +7 -0
  249. flowtask/events/events/file/base.py +51 -0
  250. flowtask/events/events/file/copy.py +23 -0
  251. flowtask/events/events/file/delete.py +16 -0
  252. flowtask/events/events/interfaces/__init__.py +9 -0
  253. flowtask/events/events/interfaces/client.py +67 -0
  254. flowtask/events/events/interfaces/credentials.py +28 -0
  255. flowtask/events/events/interfaces/notifications.py +58 -0
  256. flowtask/events/events/jira.py +122 -0
  257. flowtask/events/events/log.py +26 -0
  258. flowtask/events/events/logerr.py +52 -0
  259. flowtask/events/events/notify.py +59 -0
  260. flowtask/events/events/notify_event.py +160 -0
  261. flowtask/events/events/publish.py +54 -0
  262. flowtask/events/events/sendfile.py +104 -0
  263. flowtask/events/events/task.py +97 -0
  264. flowtask/events/events/teams.py +98 -0
  265. flowtask/events/events/webhook.py +58 -0
  266. flowtask/events/manager.py +287 -0
  267. flowtask/exceptions.c +39393 -0
  268. flowtask/exceptions.cpython-312-x86_64-linux-gnu.so +0 -0
  269. flowtask/extensions/__init__.py +3 -0
  270. flowtask/extensions/abstract.py +82 -0
  271. flowtask/extensions/logging/__init__.py +65 -0
  272. flowtask/hooks/__init__.py +9 -0
  273. flowtask/hooks/actions/__init__.py +22 -0
  274. flowtask/hooks/actions/abstract.py +66 -0
  275. flowtask/hooks/actions/dummy.py +23 -0
  276. flowtask/hooks/actions/jira.py +74 -0
  277. flowtask/hooks/actions/rest.py +320 -0
  278. flowtask/hooks/actions/sampledata.py +37 -0
  279. flowtask/hooks/actions/sensor.py +23 -0
  280. flowtask/hooks/actions/task.py +9 -0
  281. flowtask/hooks/actions/ticket.py +37 -0
  282. flowtask/hooks/actions/zammad.py +55 -0
  283. flowtask/hooks/hook.py +62 -0
  284. flowtask/hooks/models.py +17 -0
  285. flowtask/hooks/service.py +187 -0
  286. flowtask/hooks/step.py +91 -0
  287. flowtask/hooks/types/__init__.py +23 -0
  288. flowtask/hooks/types/base.py +129 -0
  289. flowtask/hooks/types/brokers/__init__.py +11 -0
  290. flowtask/hooks/types/brokers/base.py +54 -0
  291. flowtask/hooks/types/brokers/mqtt.py +35 -0
  292. flowtask/hooks/types/brokers/rabbitmq.py +82 -0
  293. flowtask/hooks/types/brokers/redis.py +83 -0
  294. flowtask/hooks/types/brokers/sqs.py +44 -0
  295. flowtask/hooks/types/fs.py +232 -0
  296. flowtask/hooks/types/http.py +49 -0
  297. flowtask/hooks/types/imap.py +200 -0
  298. flowtask/hooks/types/jira.py +279 -0
  299. flowtask/hooks/types/mail.py +205 -0
  300. flowtask/hooks/types/postgres.py +98 -0
  301. flowtask/hooks/types/responses/__init__.py +8 -0
  302. flowtask/hooks/types/responses/base.py +5 -0
  303. flowtask/hooks/types/sharepoint.py +288 -0
  304. flowtask/hooks/types/ssh.py +141 -0
  305. flowtask/hooks/types/tagged.py +59 -0
  306. flowtask/hooks/types/upload.py +85 -0
  307. flowtask/hooks/types/watch.py +71 -0
  308. flowtask/hooks/types/web.py +36 -0
  309. flowtask/interfaces/AzureClient.py +137 -0
  310. flowtask/interfaces/AzureGraph.py +839 -0
  311. flowtask/interfaces/Boto3Client.py +326 -0
  312. flowtask/interfaces/DropboxClient.py +173 -0
  313. flowtask/interfaces/ExcelHandler.py +94 -0
  314. flowtask/interfaces/FTPClient.py +131 -0
  315. flowtask/interfaces/GoogleCalendar.py +201 -0
  316. flowtask/interfaces/GoogleClient.py +133 -0
  317. flowtask/interfaces/GoogleDrive.py +127 -0
  318. flowtask/interfaces/GoogleGCS.py +89 -0
  319. flowtask/interfaces/GoogleGeocoding.py +93 -0
  320. flowtask/interfaces/GoogleLang.py +114 -0
  321. flowtask/interfaces/GooglePub.py +61 -0
  322. flowtask/interfaces/GoogleSheet.py +68 -0
  323. flowtask/interfaces/IMAPClient.py +137 -0
  324. flowtask/interfaces/O365Calendar.py +113 -0
  325. flowtask/interfaces/O365Client.py +220 -0
  326. flowtask/interfaces/OneDrive.py +284 -0
  327. flowtask/interfaces/Outlook.py +155 -0
  328. flowtask/interfaces/ParrotBot.py +130 -0
  329. flowtask/interfaces/SSHClient.py +378 -0
  330. flowtask/interfaces/Sharepoint.py +496 -0
  331. flowtask/interfaces/__init__.py +36 -0
  332. flowtask/interfaces/azureauth.py +119 -0
  333. flowtask/interfaces/cache.py +201 -0
  334. flowtask/interfaces/client.py +82 -0
  335. flowtask/interfaces/compress.py +525 -0
  336. flowtask/interfaces/credentials.py +124 -0
  337. flowtask/interfaces/d2l.py +239 -0
  338. flowtask/interfaces/databases/__init__.py +5 -0
  339. flowtask/interfaces/databases/db.py +223 -0
  340. flowtask/interfaces/databases/documentdb.py +55 -0
  341. flowtask/interfaces/databases/rethink.py +39 -0
  342. flowtask/interfaces/dataframes/__init__.py +11 -0
  343. flowtask/interfaces/dataframes/abstract.py +21 -0
  344. flowtask/interfaces/dataframes/arrow.py +71 -0
  345. flowtask/interfaces/dataframes/dt.py +69 -0
  346. flowtask/interfaces/dataframes/pandas.py +167 -0
  347. flowtask/interfaces/dataframes/polars.py +60 -0
  348. flowtask/interfaces/db.py +263 -0
  349. flowtask/interfaces/env.py +46 -0
  350. flowtask/interfaces/func.py +137 -0
  351. flowtask/interfaces/http.py +1780 -0
  352. flowtask/interfaces/locale.py +40 -0
  353. flowtask/interfaces/log.py +75 -0
  354. flowtask/interfaces/mask.py +143 -0
  355. flowtask/interfaces/notification.py +154 -0
  356. flowtask/interfaces/playwright.py +339 -0
  357. flowtask/interfaces/powerpoint.py +368 -0
  358. flowtask/interfaces/py.typed +0 -0
  359. flowtask/interfaces/qs.py +376 -0
  360. flowtask/interfaces/result.py +87 -0
  361. flowtask/interfaces/selenium_service.py +779 -0
  362. flowtask/interfaces/smartsheet.py +154 -0
  363. flowtask/interfaces/stat.py +39 -0
  364. flowtask/interfaces/task.py +96 -0
  365. flowtask/interfaces/template.py +118 -0
  366. flowtask/interfaces/vectorstores/__init__.py +1 -0
  367. flowtask/interfaces/vectorstores/abstract.py +133 -0
  368. flowtask/interfaces/vectorstores/milvus.py +669 -0
  369. flowtask/interfaces/zammad.py +107 -0
  370. flowtask/models.py +193 -0
  371. flowtask/parsers/__init__.py +15 -0
  372. flowtask/parsers/_yaml.c +11978 -0
  373. flowtask/parsers/_yaml.cpython-312-x86_64-linux-gnu.so +0 -0
  374. flowtask/parsers/argparser.py +235 -0
  375. flowtask/parsers/base.c +15155 -0
  376. flowtask/parsers/base.cpython-312-x86_64-linux-gnu.so +0 -0
  377. flowtask/parsers/json.c +11968 -0
  378. flowtask/parsers/json.cpython-312-x86_64-linux-gnu.so +0 -0
  379. flowtask/parsers/maps.py +49 -0
  380. flowtask/parsers/toml.c +11968 -0
  381. flowtask/parsers/toml.cpython-312-x86_64-linux-gnu.so +0 -0
  382. flowtask/plugins/__init__.py +16 -0
  383. flowtask/plugins/components/__init__.py +0 -0
  384. flowtask/plugins/handler/__init__.py +45 -0
  385. flowtask/plugins/importer.py +31 -0
  386. flowtask/plugins/sources/__init__.py +0 -0
  387. flowtask/runner.py +283 -0
  388. flowtask/scheduler/__init__.py +9 -0
  389. flowtask/scheduler/functions.py +493 -0
  390. flowtask/scheduler/handlers/__init__.py +8 -0
  391. flowtask/scheduler/handlers/manager.py +504 -0
  392. flowtask/scheduler/handlers/models.py +58 -0
  393. flowtask/scheduler/handlers/service.py +72 -0
  394. flowtask/scheduler/notifications.py +65 -0
  395. flowtask/scheduler/scheduler.py +993 -0
  396. flowtask/services/__init__.py +0 -0
  397. flowtask/services/bots/__init__.py +0 -0
  398. flowtask/services/bots/telegram.py +264 -0
  399. flowtask/services/files/__init__.py +11 -0
  400. flowtask/services/files/manager.py +522 -0
  401. flowtask/services/files/model.py +37 -0
  402. flowtask/services/files/service.py +767 -0
  403. flowtask/services/jira/__init__.py +3 -0
  404. flowtask/services/jira/jira_actions.py +191 -0
  405. flowtask/services/tasks/__init__.py +13 -0
  406. flowtask/services/tasks/launcher.py +213 -0
  407. flowtask/services/tasks/manager.py +323 -0
  408. flowtask/services/tasks/service.py +275 -0
  409. flowtask/services/tasks/task_manager.py +376 -0
  410. flowtask/services/tasks/tasks.py +155 -0
  411. flowtask/storages/__init__.py +16 -0
  412. flowtask/storages/exceptions.py +12 -0
  413. flowtask/storages/files/__init__.py +8 -0
  414. flowtask/storages/files/abstract.py +29 -0
  415. flowtask/storages/files/filesystem.py +66 -0
  416. flowtask/storages/tasks/__init__.py +19 -0
  417. flowtask/storages/tasks/abstract.py +26 -0
  418. flowtask/storages/tasks/database.py +33 -0
  419. flowtask/storages/tasks/filesystem.py +108 -0
  420. flowtask/storages/tasks/github.py +119 -0
  421. flowtask/storages/tasks/memory.py +45 -0
  422. flowtask/storages/tasks/row.py +25 -0
  423. flowtask/tasks/__init__.py +0 -0
  424. flowtask/tasks/abstract.py +526 -0
  425. flowtask/tasks/command.py +118 -0
  426. flowtask/tasks/pile.py +486 -0
  427. flowtask/tasks/py.typed +0 -0
  428. flowtask/tasks/task.py +778 -0
  429. flowtask/template/__init__.py +161 -0
  430. flowtask/tests.py +257 -0
  431. flowtask/types/__init__.py +8 -0
  432. flowtask/types/typedefs.c +11347 -0
  433. flowtask/types/typedefs.cpython-312-x86_64-linux-gnu.so +0 -0
  434. flowtask/utils/__init__.py +24 -0
  435. flowtask/utils/constants.py +117 -0
  436. flowtask/utils/encoders.py +21 -0
  437. flowtask/utils/executor.py +112 -0
  438. flowtask/utils/functions.cpp +14280 -0
  439. flowtask/utils/functions.cpython-312-x86_64-linux-gnu.so +0 -0
  440. flowtask/utils/json.cpp +13349 -0
  441. flowtask/utils/json.cpython-312-x86_64-linux-gnu.so +0 -0
  442. flowtask/utils/mail.py +63 -0
  443. flowtask/utils/parseqs.c +13324 -0
  444. flowtask/utils/parserqs.cpython-312-x86_64-linux-gnu.so +0 -0
  445. flowtask/utils/stats.py +308 -0
  446. flowtask/utils/transformations.py +74 -0
  447. flowtask/utils/uv.py +12 -0
  448. flowtask/utils/validators.py +97 -0
  449. flowtask/version.py +11 -0
  450. flowtask-5.8.4.dist-info/LICENSE +201 -0
  451. flowtask-5.8.4.dist-info/METADATA +209 -0
  452. flowtask-5.8.4.dist-info/RECORD +470 -0
  453. flowtask-5.8.4.dist-info/WHEEL +6 -0
  454. flowtask-5.8.4.dist-info/entry_points.txt +3 -0
  455. flowtask-5.8.4.dist-info/top_level.txt +2 -0
  456. plugins/components/CreateQR.py +39 -0
  457. plugins/components/TestComponent.py +28 -0
  458. plugins/components/Use1.py +13 -0
  459. plugins/components/Workplace.py +117 -0
  460. plugins/components/__init__.py +3 -0
  461. plugins/sources/__init__.py +0 -0
  462. plugins/sources/get_populartimes.py +78 -0
  463. plugins/sources/google.py +150 -0
  464. plugins/sources/hubspot.py +679 -0
  465. plugins/sources/icims.py +679 -0
  466. plugins/sources/mobileinsight.py +501 -0
  467. plugins/sources/newrelic.py +262 -0
  468. plugins/sources/uap.py +268 -0
  469. plugins/sources/venu.py +244 -0
  470. plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,779 @@
1
+ from abc import ABC
2
+ from typing import Optional
3
+ from collections.abc import Callable
4
+ import random
5
+ import time
6
+ # BeautifulSoup:
7
+ from bs4 import BeautifulSoup
8
+ from lxml import html, etree
9
+ # Undetected Chrome Driver:
10
+ import undetected_chromedriver as uc
11
+ # WebDriver Support:
12
+ from webdriver_manager.chrome import ChromeDriverManager
13
+ from webdriver_manager.firefox import GeckoDriverManager
14
+ from webdriver_manager.microsoft import EdgeChromiumDriverManager
15
+ from webdriver_manager.core.driver_cache import DriverCacheManager
16
+ # from selenium import webdriver
17
+ from seleniumwire import webdriver
18
+ # Selenium:
19
+ from selenium import webdriver as selenium_driver
20
+ # Selenium Proxy:
21
+ from selenium.webdriver import Proxy
22
+ # Chrome Support:
23
+ from selenium.webdriver.chrome.service import Service as ChromeService
24
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
25
+ # Firefox Support:
26
+ from selenium.webdriver.firefox.service import Service as FirefoxService
27
+ from selenium.webdriver.firefox.options import Options as FirefoxOptions
28
+ # Edge Support:
29
+ from selenium.webdriver.edge.service import Service as EdgeService
30
+ from selenium.webdriver.edge.options import Options as EdgeOptions
31
+ # Safari Support:
32
+ from selenium.webdriver.safari.options import Options as SafariOptions
33
+ from selenium.webdriver.safari.service import Service as SafariService
34
+ # WebKitGTK Support:
35
+ from selenium.webdriver.webkitgtk.service import Service as WebKitGTKService
36
+ from selenium.webdriver.webkitgtk.options import Options as WebKitGTKOptions
37
+ # Selenium Options:
38
+ from selenium.webdriver.common.by import By
39
+ from selenium.webdriver.common.action_chains import ActionChains
40
+ from selenium.webdriver.support import expected_conditions as EC
41
+ from selenium.webdriver.support.ui import WebDriverWait
42
+ from selenium.common.exceptions import (
43
+ TimeoutException,
44
+ NoSuchElementException,
45
+ WebDriverException
46
+ )
47
+ from navconfig import BASE_DIR
48
+ from navconfig.logging import logging
49
+ from ..conf import (
50
+ ### Oxylabs Proxy Support for Selenium
51
+ OXYLABS_USERNAME,
52
+ OXYLABS_PASSWORD,
53
+ OXYLABS_ENDPOINT,
54
+ GOOGLE_SEARCH_ENGINE_ID
55
+ )
56
+ from ..exceptions import (
57
+ NotSupported,
58
+ TimeOutError,
59
+ ComponentError
60
+ )
61
+ from .http import ua, mobile_ua
62
+
63
+
64
+ logging.getLogger(name='selenium.webdriver').setLevel(logging.INFO)
65
+ logging.getLogger(name='WDM').setLevel(logging.WARNING)
66
+ logging.getLogger(name='hpack').setLevel(logging.WARNING)
67
+ logging.getLogger(name='seleniumwire').setLevel(logging.WARNING)
68
+ logging.getLogger(name='undetected_chromedriver').setLevel(logging.INFO)
69
+
70
+
71
+ mobile_devices = [
72
+ 'iPhone X',
73
+ 'Google Nexus 7',
74
+ 'Pixel 2',
75
+ 'Samsung Galaxy Tab',
76
+ 'Nexus 5',
77
+ ]
78
+
79
+
80
+ class SeleniumService(ABC):
81
+ """SeleniumService.
82
+
83
+ Interface for making HTTP connections using Selenium.
84
+ """
85
+ chrome_options = [
86
+ # "--headless=new",
87
+ "--disable-gpu",
88
+ "--no-sandbox",
89
+ "--enable-automation",
90
+ "--lang=en",
91
+ "--disable-dev-shm-usage",
92
+ "--disable-features=VizDisplayCompositor",
93
+ "--disable-features=IsolateOrigins",
94
+ # "--disable-extensions",
95
+ # "--disable-features=NetworkService,NetworkServiceInProcess",
96
+ # "--ignore-certificate-errors-spki-list",
97
+ # "--allow-insecure-localhost",
98
+ # "--ignore-ssl-errors",
99
+ # "--disable-web-security",
100
+ # "--allow-running-insecure-content",
101
+ ]
102
+ undetected_options = [
103
+ "--disable-gpu",
104
+ "--no-sandbox",
105
+ "--enable-automation",
106
+ "--disable-blink-features=AutomationControlled",
107
+ "--disable-features=NetworkService,NetworkServiceInProcess",
108
+ "--disable-dev-shm-usage",
109
+ ]
110
+ firefox_options = [
111
+ "--no-sandbox",
112
+ "--disable-gpu",
113
+ # browser viewport size
114
+ "--width=1920",
115
+ "--height=1080"
116
+ ]
117
+ accept: str = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9" # noqa
118
+
119
+ def __init__(self, *args, **kwargs):
120
+ self._driver: Callable = None
121
+ self._wait: WebDriverWait = None
122
+ # Accept Cookies is a tuple with button for accepting cookies.
123
+ self.accept_cookies: tuple = kwargs.get('accept_cookies', None)
124
+ self.use_wire: bool = kwargs.get('use_wire', False)
125
+ self.use_firefox: bool = kwargs.get('use_firefox', False)
126
+ self.use_edge: bool = kwargs.get('use_edge', False)
127
+ self.use_safari: bool = kwargs.get('use_safari', False)
128
+ self.use_webkit: bool = kwargs.get('use_webkit', False)
129
+ self.as_mobile: bool = kwargs.get('as_mobile', False)
130
+ self.use_undetected: bool = kwargs.get('use_undetected', False)
131
+ self.headless: bool = kwargs.get('headless', True)
132
+ self.enable_http2: bool = kwargs.get('enable_http2', True)
133
+ self._browser_binary: str = kwargs.get('browser_binary', None)
134
+ self._driver_binary: str = kwargs.get('driver_binary', None)
135
+ self._userdata: str = kwargs.get('userdata', None)
136
+ # Device type, defaulting to:
137
+ # TODO: create a dictionary matching userAgent and Mobile Device.
138
+ self.mobile_device: str = kwargs.get(
139
+ 'mobile_device', 'Pixel 2'
140
+ )
141
+ self.default_tag: str = kwargs.get('default_tag', 'body')
142
+ self.accept_is_clickable: bool = kwargs.get('accept_is_clickable', False)
143
+ self.timeout: int = kwargs.get('timeout', 60)
144
+ self.wait_until: tuple = kwargs.get('wait_until', None)
145
+ self.inner_tag: tuple = kwargs.get('inner_tag', None)
146
+ self._options = None
147
+ super().__init__(*args, **kwargs)
148
+ headers = kwargs.get('headers', {})
149
+ self.headers: dict = {
150
+ "Accept": self.accept,
151
+ "TE": "trailers",
152
+ "Accept-Encoding": "gzip, deflate",
153
+ "DNT": "1",
154
+ "Connection": "keep-alive",
155
+ "Upgrade-Insecure-Requests": "1",
156
+ "User-Agent": random.choice(ua),
157
+ **headers
158
+ }
159
+ # Configure Cookies:
160
+ self.cookies: dict = kwargs.get('cookies', {})
161
+ if isinstance(self.cookies, str):
162
+ self.cookies = self.parse_cookies(self.cookies)
163
+
164
+ def parse_cookies(self, cookie_pair: str) -> dict:
165
+ """Parse the Cookies."""
166
+ cookies = {}
167
+ cookie_pairs = [c.strip() for c in cookie_pair.strip().split(';') if c.strip()]
168
+ for pair in cookie_pairs:
169
+ if '=' in pair:
170
+ name, value = pair.split('=', 1)
171
+ name = name.strip()
172
+ value = value.strip().strip('"') # remove quotes if any
173
+ cookies[name] = value
174
+ return cookies
175
+
176
+ def check_by_attribute(self, attribute: tuple):
177
+ if not attribute:
178
+ return None
179
+ el = attribute[0]
180
+ value = attribute[1]
181
+ new_attr = None
182
+ if el == 'id':
183
+ new_attr = (By.ID, value)
184
+ elif el in ('class', 'class name'):
185
+ new_attr = (By.CLASS_NAME, value)
186
+ elif el == 'name':
187
+ new_attr = (By.NAME, value)
188
+ elif el == 'xpath':
189
+ new_attr = (By.XPATH, value)
190
+ elif el == 'css':
191
+ new_attr = (By.CSS_SELECTOR, value)
192
+ elif el in ('tag', 'tag name', 'tagname', 'tag_name'):
193
+ new_attr = (By.TAG_NAME, value)
194
+ else:
195
+ raise NotSupported(
196
+ f"Selenium: Attribute {el} is not supported."
197
+ )
198
+ return new_attr
199
+
200
+ def driver(self):
201
+ return self._driver
202
+
203
+ def close_driver(self):
204
+ if self._driver:
205
+ self._driver.quit()
206
+
207
+ async def start(self, **kwargs) -> bool:
208
+ await super(SeleniumService, self).start(**kwargs)
209
+ # Check the Accept Cookies:
210
+ if self.accept_cookies:
211
+ if not isinstance(self.accept_cookies, tuple):
212
+ raise NotSupported(
213
+ "Accept Cookies must be a Tuple with the Button to Accept Cookies."
214
+ )
215
+ self.accept_cookies = self.check_by_attribute(self.accept_cookies)
216
+ if self.inner_tag:
217
+ self.inner_tag = self.check_by_attribute(self.inner_tag)
218
+ if hasattr(self, 'screenshot'):
219
+ try:
220
+ self.screenshot['portion'] = self.check_by_attribute(
221
+ self.screenshot['portion']
222
+ )
223
+ except (KeyError, ValueError):
224
+ pass
225
+ return True
226
+
227
+ def proxy_selenium(self, user: str, password: str, endpoint: str, only_http: bool = True) -> dict:
228
+ if only_http is True:
229
+ wire_options = {
230
+ "proxy": {
231
+ "http": f"http://{user}:{password}@{endpoint}",
232
+ "https": f"http://{user}:{password}@{endpoint}",
233
+ }
234
+ }
235
+ else:
236
+ wire_options = {
237
+ "proxy": {
238
+ "http": f"http://{user}:{password}@{endpoint}",
239
+ "https": f"https://{user}:{password}@{endpoint}",
240
+ # "socks5": f"https://{user}:{password}@{endpoint}",
241
+ }
242
+ }
243
+ # print(':: Proxy :', wire_options)
244
+ return wire_options
245
+
246
+ async def get_driver(self):
247
+ """
248
+ Return a Selenium Driver instance for Firefox, Edge, Safari, WebKitGTK or Chrome.
249
+
250
+ This method configures the browser based on instance flags (such as:
251
+ - self.use_firefox, self.use_edge, self.use_safari, self.use_webkit
252
+ - self.use_proxy, self._free_proxy, self.use_undetected
253
+ - self.as_mobile for mobile emulation (Chrome-only)
254
+ - self.enable_http2 (if False, HTTP/2 will be disabled)
255
+
256
+ It applies a common set of options (stored in self.chrome_options) to all browsers
257
+ and adds proxy settings (if configured) to the options and/or desired capabilities.
258
+
259
+ Returns:
260
+ A Selenium WebDriver instance.
261
+ """
262
+ proxies = None
263
+ proxy = None
264
+ # Define first which webdriver to use:
265
+ if self.use_wire is True:
266
+ # Use Wire protocol for Selenium
267
+ self._webdriver = webdriver
268
+ _options = {
269
+ "seleniumwire_options": {
270
+ "proxy": None,
271
+ 'http2': False # Explicitly disable HTTP/2 in Selenium Wire
272
+ }
273
+ }
274
+ else:
275
+ self._webdriver = selenium_driver
276
+ _options = {}
277
+ # Selenium Options:
278
+ if self.use_firefox is True:
279
+ self._options = FirefoxOptions()
280
+ elif self.use_edge is True:
281
+ self._options = EdgeOptions()
282
+ elif self.use_safari is True:
283
+ self._options = SafariOptions()
284
+ elif self.use_webkit is True:
285
+ self._options = WebKitGTKOptions()
286
+ elif self.use_undetected is True:
287
+ # Start an undetected Chrome instance
288
+ self._options = uc.ChromeOptions()
289
+ else:
290
+ # used for Chrome by default
291
+ self._options = ChromeOptions()
292
+ # Add UA to Headers:
293
+ _ua = random.choice(ua)
294
+ self._options.add_argument(f"user-agent={_ua}")
295
+ # Configure Proxy Support
296
+ if self.use_proxy is True:
297
+ proxy = Proxy()
298
+ if self._free_proxy is False:
299
+ # Oxylabs Proxy:
300
+ if hasattr(self, 'us_proxy'):
301
+ endpoint = "us-pr.oxylabs.io:10000"
302
+ else:
303
+ endpoint = OXYLABS_ENDPOINT
304
+ customer = f"customer-{OXYLABS_USERNAME}-sesstime-1"
305
+ proxies = self.proxy_selenium(
306
+ customer, OXYLABS_PASSWORD, endpoint
307
+ )
308
+ proxy.http_proxy = f"http://{customer}:{OXYLABS_PASSWORD}@{endpoint}"
309
+ proxy.ssl_proxy = f"https://{customer}:{OXYLABS_PASSWORD}@{endpoint}"
310
+ proxy = {
311
+ "proxyType": "manual",
312
+ "httpProxy": proxy.http_proxy,
313
+ "sslProxy": proxy.ssl_proxy,
314
+ }
315
+ # and using the simple config:
316
+ self._options.add_argument(
317
+ f"--proxy-server=http://{OXYLABS_USERNAME}:{OXYLABS_PASSWORD}@{OXYLABS_ENDPOINT}"
318
+ )
319
+ if self.use_wire is True:
320
+ _options['seleniumwire_options']['proxy'] = proxies['proxy']
321
+ else:
322
+ proxies = await self.get_proxies()
323
+ # proxies is a list of IP:port, so we need to convert it to a dict
324
+ proxy.http_proxy = f"http://{proxies[0]}"
325
+ proxy.ssl_proxy = f"https://{proxies[0]}"
326
+ self._options.add_argument(f"--proxy-server={proxy.http_proxy}")
327
+ if self.use_wire is True:
328
+ _options['seleniumwire_options']['proxy'] = {
329
+ "http": f"http://{proxies[0]}",
330
+ "https": f"https://{proxies[0]}"
331
+ }
332
+ if self.use_undetected is True:
333
+ for option in self.undetected_options:
334
+ try:
335
+ self._options.add_argument(option)
336
+ except Exception:
337
+ # If the browser does not support add_argument, skip it.
338
+ pass
339
+ # Start an undetected Chrome instance
340
+ self._options.headless = self.headless # Run in visible mode to reduce bot detection
341
+ self._driver = uc.Chrome(
342
+ options=self._options,
343
+ headless=self.headless,
344
+ use_subprocess=False,
345
+ advanced_elements=True,
346
+ enable_cdp_events=True
347
+ )
348
+ elif self.use_firefox is True:
349
+ # Use Firefox Browser
350
+ # if True, Run in visible mode to reduce bot detection
351
+ self._options.headless = self.headless
352
+ for option in self.firefox_options:
353
+ self._options.add_argument(option)
354
+ if self.headless is True:
355
+ self._options.add_argument("--headless")
356
+ self._options.set_preference("network.http.http2.enabled", self.enable_http2)
357
+ if self.use_proxy is True:
358
+ customer = f"customer-{OXYLABS_USERNAME}-sesstime-1"
359
+ proxy = {
360
+ "proxyType": "manual",
361
+ "httpProxy": f"{customer}:{OXYLABS_PASSWORD}@{OXYLABS_ENDPOINT}",
362
+ "sslProxy": f"{customer}:{OXYLABS_PASSWORD}@{OXYLABS_ENDPOINT}",
363
+ }
364
+ self._options.set_capability("proxy", proxy)
365
+ if self._browser_binary:
366
+ self._options.binary_location = self._browser_binary
367
+ service = FirefoxService(
368
+ GeckoDriverManager().install()
369
+ )
370
+ elif self._driver_binary:
371
+ # Use the binary driver if available
372
+ service = FirefoxService(
373
+ executable_path=self._driver_binary
374
+ )
375
+ else:
376
+ # Use the cached driver if available
377
+ cache_manager = DriverCacheManager(valid_range=7)
378
+ service = FirefoxService(
379
+ GeckoDriverManager(
380
+ cache_manager=cache_manager
381
+ ).install()
382
+ )
383
+ self._driver = self._webdriver.Firefox(
384
+ service=service,
385
+ options=self._options,
386
+ **_options
387
+ )
388
+ elif self.use_edge is True:
389
+ # Use Chromium Edge Browser
390
+ # Use Edge in headless mode to reduce bot detection
391
+ self._options.headless = self.headless
392
+ if self.headless is True:
393
+ self._options.add_argument("--headless=new")
394
+ # if self.use_proxy is True:
395
+ # self._options.set_capability("proxy", proxy)
396
+ if self._browser_binary is not None:
397
+ self._options.binary_location = self._browser_binary
398
+ service = EdgeService(
399
+ executable_path=self._browser_binary
400
+ )
401
+ else:
402
+ service = EdgeService(
403
+ EdgeChromiumDriverManager().install()
404
+ )
405
+ self._options.set_capability("ms:edgeOptions", {"http2": self.enable_http2})
406
+ self._driver = self._webdriver.Edge(
407
+ service=service,
408
+ options=self._options,
409
+ **_options
410
+ )
411
+ elif self.use_safari is True:
412
+ # Use Safari Browser
413
+ self._driver = self._webdriver.Safari(
414
+ service=SafariService(
415
+ executable_path=self._browser_path
416
+ ),
417
+ options=self._options,
418
+ **_options
419
+ )
420
+ elif self.use_webkit is True:
421
+ # Use WebKitGTK Browser
422
+ self._driver = self._webdriver.WebKitGTK(
423
+ service=WebKitGTKService().install(),
424
+ options=self._options,
425
+ **_options
426
+ )
427
+ else:
428
+ # Use Chrome Browser
429
+ if self.use_proxy is True:
430
+ self._options.set_capability("proxy", proxy)
431
+ if self.headless is True:
432
+ self._options.add_argument("--headless=new")
433
+ if self._browser_binary:
434
+ self._options.binary_location = self._browser_binary
435
+ # self._options.add_argument(
436
+ # f"--user-data-dir={self.self._userdata}"
437
+ # )
438
+ service = ChromeService(
439
+ ChromeDriverManager().install()
440
+ )
441
+ for option in self.chrome_options:
442
+ try:
443
+ self._options.add_argument(option)
444
+ except Exception:
445
+ # If the browser does not support add_argument, skip it.
446
+ pass
447
+ if self.as_mobile is True:
448
+ # Mobile Device:
449
+ self.mobile_device = random.choice(mobile_devices)
450
+ # Use Chrome mobile emulation options
451
+ mobile_emulation_options = {
452
+ "deviceName": self.mobile_device,
453
+ "userAgent": random.choice(mobile_ua)
454
+ }
455
+ self._options.add_experimental_option(
456
+ "mobileEmulation",
457
+ mobile_emulation_options
458
+ )
459
+ self._logger.debug(
460
+ f"Running in mobile emulation mode as {self.mobile_device}"
461
+ )
462
+ # Explicitly disable HTTP/2
463
+ if self.enable_http2 is False:
464
+ self._options.add_experimental_option(
465
+ "prefs", {"disable-http2": True}
466
+ )
467
+ self._driver = self._webdriver.Chrome(
468
+ service=service,
469
+ options=self._options,
470
+ **_options
471
+ )
472
+ # Creating the WebDriverWait and Return the Driver:
473
+ self._wait = WebDriverWait(self._driver, self.timeout)
474
+ return self._driver
475
+
476
+ def _execute_scroll(self, scroll_pause_time=1.0, max_scrolls=5):
477
+ """
478
+ Execute a progressive scroll through the page to ensure dynamic content loads.
479
+
480
+ Args:
481
+ scroll_pause_time (float): Time to pause between scrolls
482
+ max_scrolls (int): Maximum number of scroll operations
483
+ """
484
+ try:
485
+ # Wait for the page to be loaded initially
486
+ WebDriverWait(self._driver, 20).until(
487
+ lambda driver: driver.execute_script("return document.body.scrollHeight") > 0
488
+ )
489
+
490
+ # Get initial scroll height
491
+ last_height = self._driver.execute_script("return document.body.scrollHeight")
492
+
493
+ # Progressive scrolling
494
+ for scroll in range(max_scrolls):
495
+ # Scroll down to bottom in steps
496
+ self._driver.execute_script(f"window.scrollTo(0, {(scroll+1) * last_height/max_scrolls});")
497
+
498
+ # Wait to load page
499
+ time.sleep(scroll_pause_time)
500
+
501
+ # Check if new elements have loaded after each partial scroll
502
+ new_height = self._driver.execute_script("return document.body.scrollHeight")
503
+ if new_height == last_height and scroll > 0:
504
+ # If no new content loaded after first scroll, break
505
+ break
506
+
507
+ last_height = new_height
508
+
509
+ # If this is the last scroll, try to wait for any AJAX to complete
510
+ if scroll == max_scrolls - 1:
511
+ time.sleep(scroll_pause_time * 1.5)
512
+
513
+ # Scroll back to top for better user interaction
514
+ self._driver.execute_script("window.scrollTo(0, 0);")
515
+ except Exception as e:
516
+ # Log but don't fail completely on scroll errors
517
+ self._logger.warning(f"Error during scroll operation: {e}")
518
+
519
+ def save_screenshot(self, filename: str) -> None:
520
+ """Saving and Screenshot of entire Page."""
521
+ original_size = self._driver.get_window_size()
522
+ width = self._driver.execute_script(
523
+ 'return document.body.parentNode.scrollWidth'
524
+ ) or 1920
525
+ height = self._driver.execute_script(
526
+ 'return document.body.parentNode.scrollHeight'
527
+ ) or 1080
528
+ if not width:
529
+ width = 1920
530
+ if not height:
531
+ height = 1080
532
+ self._driver.set_window_size(width, height)
533
+ self._execute_scroll()
534
+
535
+ # Ensure the page is fully loaded after resizing
536
+ self._wait.until(
537
+ lambda driver: driver.execute_script("return document.readyState") == "complete"
538
+ )
539
+
540
+ # Wait for specific elements to load
541
+ if self.wait_until:
542
+ WebDriverWait(self._driver, 20).until(
543
+ EC.presence_of_all_elements_located(
544
+ self.wait_until
545
+ )
546
+ )
547
+ if 'portion' in self.screenshot:
548
+ element = self._driver.find_element(*self.screenshot['portion'])
549
+ # Check if the element has a size
550
+ size = element.size
551
+ if size['height'] == 0 or size['width'] == 0:
552
+ # Try scrolling or waiting until element is visible
553
+ self.logger.warning(
554
+ "Element to screenshot has zero dimension, waiting for it to render..."
555
+ )
556
+ WebDriverWait(self._driver, 20).until(
557
+ lambda driver: element.size['height'] > 0 and element.size['width'] > 0
558
+ )
559
+ element.screenshot(filename)
560
+ else:
561
+ # Take a full-page screenshot
562
+ self._driver.save_screenshot(filename)
563
+ # resize to the Original Size:
564
+ self._driver.set_window_size(
565
+ original_size['width'],
566
+ original_size['height']
567
+ )
568
+
569
+ def get_soup(self, content: str, parser: str = 'html.parser'):
570
+ """Get a BeautifulSoup Object."""
571
+ return BeautifulSoup(content, parser)
572
+
573
+ def get_etree(self, content: str) -> tuple:
574
+ try:
575
+ x = etree.fromstring(content)
576
+ except etree.XMLSyntaxError:
577
+ x = None
578
+ try:
579
+ h = html.fromstring(content)
580
+ except etree.XMLSyntaxError:
581
+ h = None
582
+ return x, h
583
+
584
+ async def get_page(
585
+ self,
586
+ url: str,
587
+ cookies: Optional[dict] = None,
588
+ retries: int = 3,
589
+ backoff_delay: int = 2
590
+ ):
591
+ """get_page with selenium.
592
+
593
+ Get one page using Selenium.
594
+ """
595
+ if not self._driver:
596
+ await self.get_driver()
597
+ attempt = 0
598
+ # Debug for using Proxy:
599
+ # self._driver.get('https://api.ipify.org?format=json')
600
+ # page_source = self._driver.page_source
601
+ # print(page_source)
602
+ while attempt < retries:
603
+ try:
604
+ try:
605
+ self._driver.delete_all_cookies()
606
+ except Exception:
607
+ pass
608
+ self._driver.get(url)
609
+ if cookies:
610
+ # Add the cookies
611
+ for cookie_name, cookie_value in cookies.items():
612
+ if cookie_value:
613
+ self._driver.add_cookie({'name': cookie_name, 'value': cookie_value})
614
+ # Refresh the page to apply the cookies
615
+ self._driver.refresh()
616
+
617
+ # Ensure the page is fully loaded before attempting to click
618
+ self._wait.until(
619
+ lambda driver: driver.execute_script("return document.readyState") == "complete"
620
+ )
621
+
622
+ # Wait for specific elements to load (replace with your actual elements)
623
+ if self.wait_until:
624
+ WebDriverWait(self._driver, 20).until(
625
+ EC.presence_of_all_elements_located(
626
+ self.wait_until
627
+ )
628
+ )
629
+ else:
630
+ # Wait for the tag to appear in the page.
631
+ self._wait.until(
632
+ EC.presence_of_element_located(
633
+ (By.TAG_NAME, self.default_tag)
634
+ )
635
+ )
636
+ # Accept Cookies if enabled.
637
+ if self.accept_cookies:
638
+ # Wait for the button to appear and click it.
639
+ try:
640
+ # Wait for the "Ok" button to be clickable and then click it
641
+ if self.accept_is_clickable is True:
642
+ accept_button = self._wait.until(
643
+ EC.element_to_be_clickable(self.accept_cookies)
644
+ )
645
+ accept_button.click()
646
+ else:
647
+ accept_button = self._wait.until(
648
+ EC.presence_of_element_located(
649
+ self.accept_cookies
650
+ )
651
+ )
652
+ self._driver.execute_script("arguments[0].click();", accept_button)
653
+ except TimeoutException:
654
+ self._logger.warning(
655
+ 'Accept Cookies Button not found'
656
+ )
657
+ # Execute an scroll of the page:
658
+ self._execute_scroll()
659
+ return
660
+ except TimeoutException:
661
+ # The page never reached complete.
662
+ print("Page did not reach a complete readyState.")
663
+ print("Current Page Source:")
664
+ print('===========================')
665
+ print(self._driver.page_source)
666
+ print('===========================')
667
+ # Challenge Button:
668
+ # Try to detect the challenge element. For example, if the button has text "Pulsar y mantener pulsado"
669
+
670
+ wait = WebDriverWait(self._driver, 20)
671
+ base = wait.until(EC.presence_of_element_located((By.ID, "px-captcha")))
672
+ iframe = base.find_element(By.TAG_NAME, "iframe")
673
+ print('IFRAME > ', iframe)
674
+ self._driver.switch_to.frame(iframe)
675
+ challenge_button = self._driver.find_element(By.XPATH, "//p[contains(text(), 'Pulsar y mantener pulsado')]")
676
+ print('BUTTON HERE > ', challenge_button)
677
+
678
+ try:
679
+ challenge_button = WebDriverWait(self._driver, 5).until(
680
+ EC.presence_of_element_located(challenge_button)
681
+ )
682
+ print('BUTTON HERE > ', challenge_button)
683
+ # If we found the button, simulate the click and hold action
684
+ actions = ActionChains(self._driver)
685
+ # Hold the button for, say, 5 seconds
686
+ actions.click_and_hold(challenge_button).pause(5).release().perform()
687
+ self._driver.switch_to.default_content()
688
+ # Optionally wait again for the page to load after the challenge
689
+ self._wait.until(
690
+ lambda driver: driver.execute_script("return document.readyState") == "complete"
691
+ )
692
+ # Execute an scroll of the page:
693
+ self._execute_scroll()
694
+ return
695
+ except TimeoutException:
696
+ # If the challenge button isn't present, continue as normal
697
+ pass
698
+ attempt += 1
699
+ if attempt < retries:
700
+ self._logger.warning(
701
+ f"TimeoutException occurred. Retrying ({attempt}/{retries}) in {backoff_delay}s..."
702
+ )
703
+ time.sleep(backoff_delay)
704
+ else:
705
+ raise TimeOutError(f"Timeout Error on URL {self.url} after {retries} attempts")
706
+ except Exception as exc:
707
+ raise ComponentError(
708
+ f"Error running Scrapping Tool: {exc}"
709
+ )
710
+
711
+ async def search_google_cse(self, query: str, max_results: int = 5):
712
+ """
713
+ Search Google Custom Search Engine (CSE) using Selenium.
714
+
715
+ Args:
716
+ query (str): The search query.
717
+ max_results (int, optional): Maximum number of search results to return.
718
+
719
+ Returns:
720
+ list[dict]: A list of search results with 'title' and 'link'.
721
+ """
722
+ try:
723
+ search_url = f"https://cse.google.com/cse?cx={GOOGLE_SEARCH_ENGINE_ID}#gsc.tab=0&gsc.q={query}&gsc.sort="
724
+ driver = await self.get_driver()
725
+ driver.get(search_url)
726
+
727
+ # ✅ Wait for search results or "No results" message
728
+ try:
729
+ WebDriverWait(driver, 5).until(
730
+ EC.presence_of_element_located((By.CLASS_NAME, "gsc-results"))
731
+ )
732
+ except TimeoutException:
733
+ try:
734
+ WebDriverWait(driver, 3).until(
735
+ EC.presence_of_element_located((By.CLASS_NAME, "gs-no-results-result"))
736
+ )
737
+ return [] # No results found, return an empty list
738
+ except TimeoutException:
739
+ raise RuntimeError("CSE: No results found or page failed to load.")
740
+
741
+ time.sleep(2) # Allow JS to finalize
742
+
743
+ # ✅ Extract search results
744
+ results = []
745
+ try:
746
+ search_results = driver.find_elements(By.CLASS_NAME, "gsc-webResult")
747
+ except NoSuchElementException:
748
+ search_results = driver.find_elements(By.CLASS_NAME, "gsc-expansionArea")
749
+
750
+ for result in search_results[:max_results]:
751
+ try:
752
+ title_element = result.find_element(By.CLASS_NAME, "gs-title")
753
+ url_element = title_element.find_element(By.TAG_NAME, "a") if title_element else None
754
+
755
+ if title_element and url_element:
756
+ title = title_element.text.strip()
757
+ url = url_element.get_attribute("href").strip()
758
+ if title and url:
759
+ results.append({"title": title, "link": url})
760
+
761
+ except NoSuchElementException:
762
+ continue # Skip missing results
763
+
764
+ return results
765
+
766
+ except NoSuchElementException as e:
767
+ raise RuntimeError(f"CSE Error: Element not found ({e})")
768
+ except TimeoutException as e:
769
+ raise RuntimeError(f"CSE Timeout: {e}")
770
+ except WebDriverException as e:
771
+ raise RuntimeError(f"CSE WebDriver Error: {e}")
772
+ except RuntimeError as e:
773
+ if str(e) == "CSE: No results found or page failed to load.":
774
+ return []
775
+ raise RuntimeError(f"CSE Runtime Error: {e}")
776
+ except Exception as e:
777
+ raise RuntimeError(f"CSE Unexpected Error: {e}")
778
+ finally:
779
+ self.close_driver() # Always close driver