flowtask 5.8.4__cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (470) hide show
  1. flowtask/__init__.py +93 -0
  2. flowtask/__main__.py +38 -0
  3. flowtask/bots/__init__.py +6 -0
  4. flowtask/bots/check.py +93 -0
  5. flowtask/bots/codebot.py +51 -0
  6. flowtask/components/ASPX.py +148 -0
  7. flowtask/components/AddDataset.py +352 -0
  8. flowtask/components/Amazon.py +523 -0
  9. flowtask/components/AutoTask.py +314 -0
  10. flowtask/components/Azure.py +80 -0
  11. flowtask/components/AzureUsers.py +106 -0
  12. flowtask/components/BaseAction.py +91 -0
  13. flowtask/components/BaseLoop.py +198 -0
  14. flowtask/components/BestBuy.py +800 -0
  15. flowtask/components/CSVToGCS.py +120 -0
  16. flowtask/components/CompanyScraper/__init__.py +1 -0
  17. flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
  18. flowtask/components/CompanyScraper/parsers/base.py +102 -0
  19. flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
  20. flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
  21. flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
  22. flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
  23. flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
  24. flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
  25. flowtask/components/CompanyScraper/scrapper.py +1054 -0
  26. flowtask/components/CopyTo.py +177 -0
  27. flowtask/components/CopyToBigQuery.py +243 -0
  28. flowtask/components/CopyToMongoDB.py +291 -0
  29. flowtask/components/CopyToPg.py +609 -0
  30. flowtask/components/CopyToRethink.py +207 -0
  31. flowtask/components/CreateGCSBucket.py +102 -0
  32. flowtask/components/CreateReport/CreateReport.py +228 -0
  33. flowtask/components/CreateReport/__init__.py +9 -0
  34. flowtask/components/CreateReport/charts/__init__.py +15 -0
  35. flowtask/components/CreateReport/charts/bar.py +51 -0
  36. flowtask/components/CreateReport/charts/base.py +66 -0
  37. flowtask/components/CreateReport/charts/pie.py +64 -0
  38. flowtask/components/CreateReport/utils.py +9 -0
  39. flowtask/components/CustomerSatisfaction.py +196 -0
  40. flowtask/components/DataInput.py +200 -0
  41. flowtask/components/DateList.py +255 -0
  42. flowtask/components/DbClient.py +163 -0
  43. flowtask/components/DialPad.py +146 -0
  44. flowtask/components/DocumentDBQuery.py +200 -0
  45. flowtask/components/DownloadFrom.py +371 -0
  46. flowtask/components/DownloadFromD2L.py +113 -0
  47. flowtask/components/DownloadFromFTP.py +181 -0
  48. flowtask/components/DownloadFromIMAP.py +315 -0
  49. flowtask/components/DownloadFromS3.py +198 -0
  50. flowtask/components/DownloadFromSFTP.py +265 -0
  51. flowtask/components/DownloadFromSharepoint.py +110 -0
  52. flowtask/components/DownloadFromSmartSheet.py +114 -0
  53. flowtask/components/DownloadS3File.py +229 -0
  54. flowtask/components/Dummy.py +59 -0
  55. flowtask/components/DuplicatePhoto.py +411 -0
  56. flowtask/components/EmployeeEvaluation.py +237 -0
  57. flowtask/components/ExecuteSQL.py +323 -0
  58. flowtask/components/ExtractHTML.py +178 -0
  59. flowtask/components/FileBase.py +178 -0
  60. flowtask/components/FileCopy.py +181 -0
  61. flowtask/components/FileDelete.py +82 -0
  62. flowtask/components/FileExists.py +146 -0
  63. flowtask/components/FileIteratorDelete.py +112 -0
  64. flowtask/components/FileList.py +194 -0
  65. flowtask/components/FileOpen.py +75 -0
  66. flowtask/components/FileRead.py +120 -0
  67. flowtask/components/FileRename.py +106 -0
  68. flowtask/components/FilterIf.py +284 -0
  69. flowtask/components/FilterRows/FilterRows.py +200 -0
  70. flowtask/components/FilterRows/__init__.py +10 -0
  71. flowtask/components/FilterRows/functions.py +4 -0
  72. flowtask/components/GCSToBigQuery.py +103 -0
  73. flowtask/components/GoogleA4.py +150 -0
  74. flowtask/components/GoogleGeoCoding.py +344 -0
  75. flowtask/components/GooglePlaces.py +315 -0
  76. flowtask/components/GoogleSearch.py +539 -0
  77. flowtask/components/HTTPClient.py +268 -0
  78. flowtask/components/ICIMS.py +146 -0
  79. flowtask/components/IF.py +179 -0
  80. flowtask/components/IcimsFolderCopy.py +173 -0
  81. flowtask/components/ImageFeatures/__init__.py +5 -0
  82. flowtask/components/ImageFeatures/process.py +233 -0
  83. flowtask/components/IteratorBase.py +251 -0
  84. flowtask/components/LangchainLoader/__init__.py +5 -0
  85. flowtask/components/LangchainLoader/loader.py +194 -0
  86. flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
  87. flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
  88. flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
  89. flowtask/components/LangchainLoader/loaders/docx.py +91 -0
  90. flowtask/components/LangchainLoader/loaders/html.py +119 -0
  91. flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
  92. flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
  93. flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
  94. flowtask/components/LangchainLoader/loaders/qa.py +67 -0
  95. flowtask/components/LangchainLoader/loaders/txt.py +55 -0
  96. flowtask/components/LeadIQ.py +650 -0
  97. flowtask/components/Loop.py +253 -0
  98. flowtask/components/Lowes.py +334 -0
  99. flowtask/components/MS365Usage.py +156 -0
  100. flowtask/components/MSTeamsMessages.py +320 -0
  101. flowtask/components/MarketClustering.py +1051 -0
  102. flowtask/components/MergeFiles.py +362 -0
  103. flowtask/components/MilvusOutput.py +87 -0
  104. flowtask/components/NearByStores.py +175 -0
  105. flowtask/components/NetworkNinja/__init__.py +6 -0
  106. flowtask/components/NetworkNinja/models/__init__.py +52 -0
  107. flowtask/components/NetworkNinja/models/abstract.py +177 -0
  108. flowtask/components/NetworkNinja/models/account.py +39 -0
  109. flowtask/components/NetworkNinja/models/client.py +19 -0
  110. flowtask/components/NetworkNinja/models/district.py +14 -0
  111. flowtask/components/NetworkNinja/models/events.py +101 -0
  112. flowtask/components/NetworkNinja/models/forms.py +499 -0
  113. flowtask/components/NetworkNinja/models/market.py +16 -0
  114. flowtask/components/NetworkNinja/models/organization.py +34 -0
  115. flowtask/components/NetworkNinja/models/photos.py +125 -0
  116. flowtask/components/NetworkNinja/models/project.py +44 -0
  117. flowtask/components/NetworkNinja/models/region.py +28 -0
  118. flowtask/components/NetworkNinja/models/store.py +203 -0
  119. flowtask/components/NetworkNinja/models/user.py +151 -0
  120. flowtask/components/NetworkNinja/router.py +854 -0
  121. flowtask/components/Odoo.py +175 -0
  122. flowtask/components/OdooInjector.py +192 -0
  123. flowtask/components/OpenFromXML.py +126 -0
  124. flowtask/components/OpenWeather.py +41 -0
  125. flowtask/components/OpenWithBase.py +616 -0
  126. flowtask/components/OpenWithPandas.py +715 -0
  127. flowtask/components/PGPDecrypt.py +199 -0
  128. flowtask/components/PandasIterator.py +187 -0
  129. flowtask/components/PandasToFile.py +189 -0
  130. flowtask/components/Paradox.py +339 -0
  131. flowtask/components/ParamIterator.py +117 -0
  132. flowtask/components/ParseHTML.py +84 -0
  133. flowtask/components/PlacerStores.py +249 -0
  134. flowtask/components/Pokemon.py +507 -0
  135. flowtask/components/PositiveBot.py +62 -0
  136. flowtask/components/PowerPointSlide.py +400 -0
  137. flowtask/components/PrintMessage.py +127 -0
  138. flowtask/components/ProductCompetitors/__init__.py +5 -0
  139. flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
  140. flowtask/components/ProductCompetitors/parsers/base.py +72 -0
  141. flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
  142. flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
  143. flowtask/components/ProductCompetitors/scrapper.py +155 -0
  144. flowtask/components/ProductCompliant.py +169 -0
  145. flowtask/components/ProductInfo/__init__.py +1 -0
  146. flowtask/components/ProductInfo/parsers/__init__.py +5 -0
  147. flowtask/components/ProductInfo/parsers/base.py +83 -0
  148. flowtask/components/ProductInfo/parsers/brother.py +97 -0
  149. flowtask/components/ProductInfo/parsers/canon.py +167 -0
  150. flowtask/components/ProductInfo/parsers/epson.py +118 -0
  151. flowtask/components/ProductInfo/parsers/hp.py +131 -0
  152. flowtask/components/ProductInfo/parsers/samsung.py +97 -0
  153. flowtask/components/ProductInfo/scraper.py +319 -0
  154. flowtask/components/ProductPricing.py +118 -0
  155. flowtask/components/QS.py +261 -0
  156. flowtask/components/QSBase.py +201 -0
  157. flowtask/components/QueryIterator.py +273 -0
  158. flowtask/components/QueryToInsert.py +327 -0
  159. flowtask/components/QueryToPandas.py +432 -0
  160. flowtask/components/RESTClient.py +195 -0
  161. flowtask/components/RethinkDBQuery.py +189 -0
  162. flowtask/components/Rsync.py +74 -0
  163. flowtask/components/RunSSH.py +59 -0
  164. flowtask/components/RunShell.py +71 -0
  165. flowtask/components/SalesForce.py +20 -0
  166. flowtask/components/SaveImageBank/__init__.py +257 -0
  167. flowtask/components/SchedulingVisits.py +592 -0
  168. flowtask/components/ScrapPage.py +216 -0
  169. flowtask/components/ScrapSearch.py +79 -0
  170. flowtask/components/SendNotify.py +257 -0
  171. flowtask/components/SentimentAnalysis.py +694 -0
  172. flowtask/components/ServiceScrapper/__init__.py +5 -0
  173. flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
  174. flowtask/components/ServiceScrapper/parsers/base.py +94 -0
  175. flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
  176. flowtask/components/ServiceScrapper/scrapper.py +199 -0
  177. flowtask/components/SetVariables.py +156 -0
  178. flowtask/components/SubTask.py +182 -0
  179. flowtask/components/SuiteCRM.py +48 -0
  180. flowtask/components/Switch.py +175 -0
  181. flowtask/components/TableBase.py +148 -0
  182. flowtask/components/TableDelete.py +312 -0
  183. flowtask/components/TableInput.py +143 -0
  184. flowtask/components/TableOutput/TableOutput.py +384 -0
  185. flowtask/components/TableOutput/__init__.py +3 -0
  186. flowtask/components/TableSchema.py +534 -0
  187. flowtask/components/Target.py +223 -0
  188. flowtask/components/ThumbnailGenerator.py +156 -0
  189. flowtask/components/ToPandas.py +67 -0
  190. flowtask/components/TransformRows/TransformRows.py +507 -0
  191. flowtask/components/TransformRows/__init__.py +9 -0
  192. flowtask/components/TransformRows/functions.py +559 -0
  193. flowtask/components/TransposeRows.py +176 -0
  194. flowtask/components/UPCDatabase.py +86 -0
  195. flowtask/components/UnGzip.py +171 -0
  196. flowtask/components/Uncompress.py +172 -0
  197. flowtask/components/UniqueRows.py +126 -0
  198. flowtask/components/Unzip.py +107 -0
  199. flowtask/components/UpdateOperationalVars.py +147 -0
  200. flowtask/components/UploadTo.py +299 -0
  201. flowtask/components/UploadToS3.py +136 -0
  202. flowtask/components/UploadToSFTP.py +160 -0
  203. flowtask/components/UploadToSharepoint.py +205 -0
  204. flowtask/components/UserFunc.py +122 -0
  205. flowtask/components/VivaTracker.py +140 -0
  206. flowtask/components/WSDLClient.py +123 -0
  207. flowtask/components/Wait.py +18 -0
  208. flowtask/components/Walmart.py +199 -0
  209. flowtask/components/Workplace.py +134 -0
  210. flowtask/components/XMLToPandas.py +267 -0
  211. flowtask/components/Zammad/__init__.py +41 -0
  212. flowtask/components/Zammad/models.py +0 -0
  213. flowtask/components/ZoomInfoScraper.py +409 -0
  214. flowtask/components/__init__.py +104 -0
  215. flowtask/components/abstract.py +18 -0
  216. flowtask/components/flow.py +530 -0
  217. flowtask/components/google.py +335 -0
  218. flowtask/components/group.py +221 -0
  219. flowtask/components/py.typed +0 -0
  220. flowtask/components/reviewscrap.py +132 -0
  221. flowtask/components/tAutoincrement.py +117 -0
  222. flowtask/components/tConcat.py +109 -0
  223. flowtask/components/tExplode.py +119 -0
  224. flowtask/components/tFilter.py +184 -0
  225. flowtask/components/tGroup.py +236 -0
  226. flowtask/components/tJoin.py +270 -0
  227. flowtask/components/tMap/__init__.py +9 -0
  228. flowtask/components/tMap/functions.py +54 -0
  229. flowtask/components/tMap/tMap.py +450 -0
  230. flowtask/components/tMelt.py +112 -0
  231. flowtask/components/tMerge.py +114 -0
  232. flowtask/components/tOrder.py +93 -0
  233. flowtask/components/tPandas.py +94 -0
  234. flowtask/components/tPivot.py +71 -0
  235. flowtask/components/tPluckCols.py +76 -0
  236. flowtask/components/tUnnest.py +82 -0
  237. flowtask/components/user.py +401 -0
  238. flowtask/conf.py +457 -0
  239. flowtask/download.py +102 -0
  240. flowtask/events/__init__.py +11 -0
  241. flowtask/events/events/__init__.py +20 -0
  242. flowtask/events/events/abstract.py +95 -0
  243. flowtask/events/events/alerts/__init__.py +362 -0
  244. flowtask/events/events/alerts/colfunctions.py +131 -0
  245. flowtask/events/events/alerts/functions.py +158 -0
  246. flowtask/events/events/dummy.py +12 -0
  247. flowtask/events/events/exec.py +124 -0
  248. flowtask/events/events/file/__init__.py +7 -0
  249. flowtask/events/events/file/base.py +51 -0
  250. flowtask/events/events/file/copy.py +23 -0
  251. flowtask/events/events/file/delete.py +16 -0
  252. flowtask/events/events/interfaces/__init__.py +9 -0
  253. flowtask/events/events/interfaces/client.py +67 -0
  254. flowtask/events/events/interfaces/credentials.py +28 -0
  255. flowtask/events/events/interfaces/notifications.py +58 -0
  256. flowtask/events/events/jira.py +122 -0
  257. flowtask/events/events/log.py +26 -0
  258. flowtask/events/events/logerr.py +52 -0
  259. flowtask/events/events/notify.py +59 -0
  260. flowtask/events/events/notify_event.py +160 -0
  261. flowtask/events/events/publish.py +54 -0
  262. flowtask/events/events/sendfile.py +104 -0
  263. flowtask/events/events/task.py +97 -0
  264. flowtask/events/events/teams.py +98 -0
  265. flowtask/events/events/webhook.py +58 -0
  266. flowtask/events/manager.py +287 -0
  267. flowtask/exceptions.c +39393 -0
  268. flowtask/exceptions.cpython-312-x86_64-linux-gnu.so +0 -0
  269. flowtask/extensions/__init__.py +3 -0
  270. flowtask/extensions/abstract.py +82 -0
  271. flowtask/extensions/logging/__init__.py +65 -0
  272. flowtask/hooks/__init__.py +9 -0
  273. flowtask/hooks/actions/__init__.py +22 -0
  274. flowtask/hooks/actions/abstract.py +66 -0
  275. flowtask/hooks/actions/dummy.py +23 -0
  276. flowtask/hooks/actions/jira.py +74 -0
  277. flowtask/hooks/actions/rest.py +320 -0
  278. flowtask/hooks/actions/sampledata.py +37 -0
  279. flowtask/hooks/actions/sensor.py +23 -0
  280. flowtask/hooks/actions/task.py +9 -0
  281. flowtask/hooks/actions/ticket.py +37 -0
  282. flowtask/hooks/actions/zammad.py +55 -0
  283. flowtask/hooks/hook.py +62 -0
  284. flowtask/hooks/models.py +17 -0
  285. flowtask/hooks/service.py +187 -0
  286. flowtask/hooks/step.py +91 -0
  287. flowtask/hooks/types/__init__.py +23 -0
  288. flowtask/hooks/types/base.py +129 -0
  289. flowtask/hooks/types/brokers/__init__.py +11 -0
  290. flowtask/hooks/types/brokers/base.py +54 -0
  291. flowtask/hooks/types/brokers/mqtt.py +35 -0
  292. flowtask/hooks/types/brokers/rabbitmq.py +82 -0
  293. flowtask/hooks/types/brokers/redis.py +83 -0
  294. flowtask/hooks/types/brokers/sqs.py +44 -0
  295. flowtask/hooks/types/fs.py +232 -0
  296. flowtask/hooks/types/http.py +49 -0
  297. flowtask/hooks/types/imap.py +200 -0
  298. flowtask/hooks/types/jira.py +279 -0
  299. flowtask/hooks/types/mail.py +205 -0
  300. flowtask/hooks/types/postgres.py +98 -0
  301. flowtask/hooks/types/responses/__init__.py +8 -0
  302. flowtask/hooks/types/responses/base.py +5 -0
  303. flowtask/hooks/types/sharepoint.py +288 -0
  304. flowtask/hooks/types/ssh.py +141 -0
  305. flowtask/hooks/types/tagged.py +59 -0
  306. flowtask/hooks/types/upload.py +85 -0
  307. flowtask/hooks/types/watch.py +71 -0
  308. flowtask/hooks/types/web.py +36 -0
  309. flowtask/interfaces/AzureClient.py +137 -0
  310. flowtask/interfaces/AzureGraph.py +839 -0
  311. flowtask/interfaces/Boto3Client.py +326 -0
  312. flowtask/interfaces/DropboxClient.py +173 -0
  313. flowtask/interfaces/ExcelHandler.py +94 -0
  314. flowtask/interfaces/FTPClient.py +131 -0
  315. flowtask/interfaces/GoogleCalendar.py +201 -0
  316. flowtask/interfaces/GoogleClient.py +133 -0
  317. flowtask/interfaces/GoogleDrive.py +127 -0
  318. flowtask/interfaces/GoogleGCS.py +89 -0
  319. flowtask/interfaces/GoogleGeocoding.py +93 -0
  320. flowtask/interfaces/GoogleLang.py +114 -0
  321. flowtask/interfaces/GooglePub.py +61 -0
  322. flowtask/interfaces/GoogleSheet.py +68 -0
  323. flowtask/interfaces/IMAPClient.py +137 -0
  324. flowtask/interfaces/O365Calendar.py +113 -0
  325. flowtask/interfaces/O365Client.py +220 -0
  326. flowtask/interfaces/OneDrive.py +284 -0
  327. flowtask/interfaces/Outlook.py +155 -0
  328. flowtask/interfaces/ParrotBot.py +130 -0
  329. flowtask/interfaces/SSHClient.py +378 -0
  330. flowtask/interfaces/Sharepoint.py +496 -0
  331. flowtask/interfaces/__init__.py +36 -0
  332. flowtask/interfaces/azureauth.py +119 -0
  333. flowtask/interfaces/cache.py +201 -0
  334. flowtask/interfaces/client.py +82 -0
  335. flowtask/interfaces/compress.py +525 -0
  336. flowtask/interfaces/credentials.py +124 -0
  337. flowtask/interfaces/d2l.py +239 -0
  338. flowtask/interfaces/databases/__init__.py +5 -0
  339. flowtask/interfaces/databases/db.py +223 -0
  340. flowtask/interfaces/databases/documentdb.py +55 -0
  341. flowtask/interfaces/databases/rethink.py +39 -0
  342. flowtask/interfaces/dataframes/__init__.py +11 -0
  343. flowtask/interfaces/dataframes/abstract.py +21 -0
  344. flowtask/interfaces/dataframes/arrow.py +71 -0
  345. flowtask/interfaces/dataframes/dt.py +69 -0
  346. flowtask/interfaces/dataframes/pandas.py +167 -0
  347. flowtask/interfaces/dataframes/polars.py +60 -0
  348. flowtask/interfaces/db.py +263 -0
  349. flowtask/interfaces/env.py +46 -0
  350. flowtask/interfaces/func.py +137 -0
  351. flowtask/interfaces/http.py +1780 -0
  352. flowtask/interfaces/locale.py +40 -0
  353. flowtask/interfaces/log.py +75 -0
  354. flowtask/interfaces/mask.py +143 -0
  355. flowtask/interfaces/notification.py +154 -0
  356. flowtask/interfaces/playwright.py +339 -0
  357. flowtask/interfaces/powerpoint.py +368 -0
  358. flowtask/interfaces/py.typed +0 -0
  359. flowtask/interfaces/qs.py +376 -0
  360. flowtask/interfaces/result.py +87 -0
  361. flowtask/interfaces/selenium_service.py +779 -0
  362. flowtask/interfaces/smartsheet.py +154 -0
  363. flowtask/interfaces/stat.py +39 -0
  364. flowtask/interfaces/task.py +96 -0
  365. flowtask/interfaces/template.py +118 -0
  366. flowtask/interfaces/vectorstores/__init__.py +1 -0
  367. flowtask/interfaces/vectorstores/abstract.py +133 -0
  368. flowtask/interfaces/vectorstores/milvus.py +669 -0
  369. flowtask/interfaces/zammad.py +107 -0
  370. flowtask/models.py +193 -0
  371. flowtask/parsers/__init__.py +15 -0
  372. flowtask/parsers/_yaml.c +11978 -0
  373. flowtask/parsers/_yaml.cpython-312-x86_64-linux-gnu.so +0 -0
  374. flowtask/parsers/argparser.py +235 -0
  375. flowtask/parsers/base.c +15155 -0
  376. flowtask/parsers/base.cpython-312-x86_64-linux-gnu.so +0 -0
  377. flowtask/parsers/json.c +11968 -0
  378. flowtask/parsers/json.cpython-312-x86_64-linux-gnu.so +0 -0
  379. flowtask/parsers/maps.py +49 -0
  380. flowtask/parsers/toml.c +11968 -0
  381. flowtask/parsers/toml.cpython-312-x86_64-linux-gnu.so +0 -0
  382. flowtask/plugins/__init__.py +16 -0
  383. flowtask/plugins/components/__init__.py +0 -0
  384. flowtask/plugins/handler/__init__.py +45 -0
  385. flowtask/plugins/importer.py +31 -0
  386. flowtask/plugins/sources/__init__.py +0 -0
  387. flowtask/runner.py +283 -0
  388. flowtask/scheduler/__init__.py +9 -0
  389. flowtask/scheduler/functions.py +493 -0
  390. flowtask/scheduler/handlers/__init__.py +8 -0
  391. flowtask/scheduler/handlers/manager.py +504 -0
  392. flowtask/scheduler/handlers/models.py +58 -0
  393. flowtask/scheduler/handlers/service.py +72 -0
  394. flowtask/scheduler/notifications.py +65 -0
  395. flowtask/scheduler/scheduler.py +993 -0
  396. flowtask/services/__init__.py +0 -0
  397. flowtask/services/bots/__init__.py +0 -0
  398. flowtask/services/bots/telegram.py +264 -0
  399. flowtask/services/files/__init__.py +11 -0
  400. flowtask/services/files/manager.py +522 -0
  401. flowtask/services/files/model.py +37 -0
  402. flowtask/services/files/service.py +767 -0
  403. flowtask/services/jira/__init__.py +3 -0
  404. flowtask/services/jira/jira_actions.py +191 -0
  405. flowtask/services/tasks/__init__.py +13 -0
  406. flowtask/services/tasks/launcher.py +213 -0
  407. flowtask/services/tasks/manager.py +323 -0
  408. flowtask/services/tasks/service.py +275 -0
  409. flowtask/services/tasks/task_manager.py +376 -0
  410. flowtask/services/tasks/tasks.py +155 -0
  411. flowtask/storages/__init__.py +16 -0
  412. flowtask/storages/exceptions.py +12 -0
  413. flowtask/storages/files/__init__.py +8 -0
  414. flowtask/storages/files/abstract.py +29 -0
  415. flowtask/storages/files/filesystem.py +66 -0
  416. flowtask/storages/tasks/__init__.py +19 -0
  417. flowtask/storages/tasks/abstract.py +26 -0
  418. flowtask/storages/tasks/database.py +33 -0
  419. flowtask/storages/tasks/filesystem.py +108 -0
  420. flowtask/storages/tasks/github.py +119 -0
  421. flowtask/storages/tasks/memory.py +45 -0
  422. flowtask/storages/tasks/row.py +25 -0
  423. flowtask/tasks/__init__.py +0 -0
  424. flowtask/tasks/abstract.py +526 -0
  425. flowtask/tasks/command.py +118 -0
  426. flowtask/tasks/pile.py +486 -0
  427. flowtask/tasks/py.typed +0 -0
  428. flowtask/tasks/task.py +778 -0
  429. flowtask/template/__init__.py +161 -0
  430. flowtask/tests.py +257 -0
  431. flowtask/types/__init__.py +8 -0
  432. flowtask/types/typedefs.c +11347 -0
  433. flowtask/types/typedefs.cpython-312-x86_64-linux-gnu.so +0 -0
  434. flowtask/utils/__init__.py +24 -0
  435. flowtask/utils/constants.py +117 -0
  436. flowtask/utils/encoders.py +21 -0
  437. flowtask/utils/executor.py +112 -0
  438. flowtask/utils/functions.cpp +14280 -0
  439. flowtask/utils/functions.cpython-312-x86_64-linux-gnu.so +0 -0
  440. flowtask/utils/json.cpp +13349 -0
  441. flowtask/utils/json.cpython-312-x86_64-linux-gnu.so +0 -0
  442. flowtask/utils/mail.py +63 -0
  443. flowtask/utils/parseqs.c +13324 -0
  444. flowtask/utils/parserqs.cpython-312-x86_64-linux-gnu.so +0 -0
  445. flowtask/utils/stats.py +308 -0
  446. flowtask/utils/transformations.py +74 -0
  447. flowtask/utils/uv.py +12 -0
  448. flowtask/utils/validators.py +97 -0
  449. flowtask/version.py +11 -0
  450. flowtask-5.8.4.dist-info/LICENSE +201 -0
  451. flowtask-5.8.4.dist-info/METADATA +209 -0
  452. flowtask-5.8.4.dist-info/RECORD +470 -0
  453. flowtask-5.8.4.dist-info/WHEEL +6 -0
  454. flowtask-5.8.4.dist-info/entry_points.txt +3 -0
  455. flowtask-5.8.4.dist-info/top_level.txt +2 -0
  456. plugins/components/CreateQR.py +39 -0
  457. plugins/components/TestComponent.py +28 -0
  458. plugins/components/Use1.py +13 -0
  459. plugins/components/Workplace.py +117 -0
  460. plugins/components/__init__.py +3 -0
  461. plugins/sources/__init__.py +0 -0
  462. plugins/sources/get_populartimes.py +78 -0
  463. plugins/sources/google.py +150 -0
  464. plugins/sources/hubspot.py +679 -0
  465. plugins/sources/icims.py +679 -0
  466. plugins/sources/mobileinsight.py +501 -0
  467. plugins/sources/newrelic.py +262 -0
  468. plugins/sources/uap.py +268 -0
  469. plugins/sources/venu.py +244 -0
  470. plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,523 @@
1
+ """
2
+ Scrapping a Web Page Using Selenium + ChromeDriver + BeautifulSoup.
3
+
4
+
5
+ Example:
6
+
7
+ ```yaml
8
+ Amazon:
9
+ type: product_info
10
+ use_proxies: true
11
+ paid_proxy: true
12
+ ```
13
+
14
+ """
15
+ from typing import Any
16
+ import asyncio
17
+ from collections.abc import Callable
18
+ import re
19
+ from urllib.parse import urljoin
20
+ from bs4 import BeautifulSoup
21
+ import random
22
+ import httpx
23
+ import pandas as pd
24
+ import backoff
25
+ # Internals
26
+ from ..exceptions import (
27
+ ComponentError,
28
+ ConfigError,
29
+ NotSupported,
30
+ DataNotFound,
31
+ DataError
32
+ )
33
+ from ..interfaces.http import ua
34
+ from .reviewscrap import ReviewScrapper, on_backoff, bad_gateway_exception
35
+
36
+
37
+ class Amazon(ReviewScrapper):
38
+ """Amazon.
39
+
40
+ Combining API Key and Web Scrapping, this component will be able to extract
41
+ Amazon Product Information (reviews, etc).
42
+ """
43
+ def __init__(
44
+ self,
45
+ loop: asyncio.AbstractEventLoop = None,
46
+ job: Callable = None,
47
+ stat: Callable = None,
48
+ **kwargs,
49
+ ):
50
+ super(Amazon, self).__init__(
51
+ loop=loop,
52
+ job=job,
53
+ stat=stat,
54
+ **kwargs
55
+ )
56
+ # Always use proxies:
57
+ self.use_proxy: bool = True
58
+ self._free_proxy: bool = False
59
+ self.cookies = {
60
+ # "aws-session-id": "241-9979986-0092756",
61
+ }
62
+ self.headers: dict = {
63
+ 'authority': 'www.amazon.com',
64
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
65
+ "Accept-Encoding": "gzip, deflate, br, zstd",
66
+ "Accept-Language": "es-US,es;q=0.9,en-US;q=0.8,en;q=0.7,es-419;q=0.6",
67
+ "Origin": "https://www.amazon.com",
68
+ "Referer": "https://www.amazon.com/dp/",
69
+ "Sec-CH-UA": '"Not A(Brand";v="8", "Chromium";v="132", "Google Chrome";v="132"',
70
+ "Sec-CH-UA-Mobile": "?0",
71
+ "Sec-CH-UA-Platform": '"Linux"',
72
+ 'sec-fetch-site': 'none',
73
+ 'sec-fetch-mode': 'navigate',
74
+ 'sec-fetch-dest': 'document',
75
+ "Sec-Fetch-Site": "same-origin",
76
+ "User-Agent": random.choice(ua),
77
+ "Connection": "keep-alive",
78
+ 'dnt': '1',
79
+ 'upgrade-insecure-requests': '1',
80
+ }
81
+ self.semaphore = asyncio.Semaphore(10)
82
+
83
+ def _extract_reviews_from_page(self, soup: BeautifulSoup) -> list:
84
+ """
85
+ Given a BeautifulSoup-parsed Amazon reviews page, extract individual reviews.
86
+ Returns a list of dictionaries.
87
+ """
88
+ reviews = []
89
+ # Reviews are contained within the element with id 'cm-cr-review_list'
90
+ reviews_container = soup.find(
91
+ "ul", id="cm-cr-review_list"
92
+ ) or soup.find("div", {"data-hook": "reviews-medley-widget"})
93
+ if reviews_container:
94
+ # Each review is typically in a <li> element with data-hook "review"
95
+ for review_el in reviews_container.find_all("li", {"data-hook": "review"}):
96
+ try:
97
+ # Extract review title
98
+ title_el = review_el.select_one("[data-hook=review-title] > span")
99
+ title = title_el.get_text(strip=True) if title_el else None
100
+ # Extract review body text
101
+ body_el = review_el.select_one("[data-hook=review-body]")
102
+ body = " ".join(body_el.stripped_strings) if body_el else None
103
+ # Extract review date and/or location/date information
104
+ date_el = review_el.select_one("[data-hook=review-date]")
105
+ date_text = date_el.get_text(strip=True) if date_el else None
106
+ # Extract rating (look for an element with data-hook containing 'review-star-rating')
107
+ if rating_el := review_el.select_one("[data-hook*='review-star-rating'] span.a-icon-alt"):
108
+ # Extract numeric rating (first match of digits possibly with a decimal)
109
+ import re
110
+ rating_match = re.search(r"(\d+\.?\d*) out", rating_el.get_text(strip=True))
111
+ rating = rating_match.group(1) if rating_match else None
112
+ else:
113
+ rating = None
114
+ # Extract Verified Purchase badge (if exists)
115
+ verified = bool(review_el.select_one("[data-hook=avp-badge]"))
116
+
117
+ review_dict = {
118
+ "title": title,
119
+ "review": body,
120
+ "location_and_date": date_text,
121
+ "rating": rating,
122
+ "verified": verified
123
+ }
124
+ reviews.append(review_dict)
125
+ except Exception as e:
126
+ # Log exception for this review, but continue extracting others
127
+ self._logger.error(
128
+ f"Failed to parse a review: {e}"
129
+ )
130
+ return reviews
131
+
132
+ def _extract_next_page_url(self, soup: BeautifulSoup, base_url: str) -> str:
133
+ """
134
+ Look for a 'Next' page link in the pagination (typically via the CSS selector
135
+ '.a-pagination .a-last > a').
136
+ Returns an absolute URL string if found, otherwise returns None.
137
+ """
138
+ pagination_el = soup.select_one(".a-pagination .a-last > a")
139
+ next_page_relative = pagination_el.get("href") if pagination_el else None
140
+ return urljoin(base_url, next_page_relative) if next_page_relative else None
141
+
142
+ @backoff.on_exception(
143
+ backoff.expo,
144
+ (httpx.TimeoutException, httpx.ConnectTimeout, httpx.HTTPStatusError, httpx.HTTPError),
145
+ max_tries=3,
146
+ jitter=backoff.full_jitter,
147
+ on_backoff=on_backoff,
148
+ giveup=lambda e: not bad_gateway_exception(e) and not isinstance(e, httpx.ConnectTimeout)
149
+ )
150
+ async def _fetch_product_page(self, asin: str, cookies: httpx.Cookies, for_reviews: bool = False) -> tuple:
151
+ product_page_url = f"https://www.amazon.com/dp/{asin}"
152
+ response = await self._get(url=product_page_url, cookies=cookies, headers=self.headers)
153
+ if response.status_code != 200:
154
+ raise DataError(
155
+ f"Failed to fetch product page, status code: {response.status_code}"
156
+ )
157
+ html = response.text
158
+ soup = BeautifulSoup(html, "html.parser")
159
+ if for_reviews:
160
+ if medley := soup.find("div", id="reviewsMedley"):
161
+ return product_page_url, html, soup
162
+ elif title_div := soup.find("div", id="title_feature_div"):
163
+ product_name = title_div.get_text(separator=" ", strip=True)
164
+ self._logger.info(f"Extracted product name: {product_name} from {product_page_url}")
165
+ return product_page_url, html, soup
166
+ else:
167
+ await asyncio.sleep(1.5)
168
+ raise httpx.HTTPError(
169
+ f"Failed to find product name on product page: {product_page_url}"
170
+ )
171
+
172
+ def _extract_reviews_from_product_page(self, url: str, row: Any, soup: BeautifulSoup) -> list:
173
+ """Extract review snippet(s) from the product page (fallback)."""
174
+ reviews = []
175
+ if medley := soup.find("div", id="reviewsMedley"):
176
+ for li in medley.find_all("li", {"data-hook": "review"}):
177
+ try:
178
+ profile_user = li.find("div", {"class": "a-profile-content"})
179
+ profile_name = profile_user.find("span", {"class": "a-profile-name"}).get_text(strip=True)
180
+ customer_reviews = ""
181
+ title_text = ""
182
+ if title := li.find("a", {"data-hook": "review-title"}):
183
+ customer_reviews = title["href"]
184
+ title_text = title.find_all("span")[-1].text.strip()
185
+
186
+ body = li.select_one("[data-hook=review-body]")
187
+ body_text = " ".join(body.stripped_strings) if body else None
188
+
189
+ date_el = li.select_one("[data-hook=review-date]")
190
+ date_text = date_el.get_text(strip=True) if date_el else None
191
+
192
+ rating_el = li.select_one("[data-hook*='review-star-rating'] span.a-icon-alt")
193
+ rating_match = re.search(r"(\d+\.?\d*) out", rating_el.get_text(strip=True)) if rating_el else None
194
+ rating = rating_match.group(1) if rating_match else None
195
+
196
+ verified = bool(li.select_one("[data-hook=avp-badge]"))
197
+ _data = row.to_dict()
198
+ review_dict = {
199
+ "url": url,
200
+ "user": profile_name,
201
+ "customer_reviews": customer_reviews,
202
+ "title": title_text,
203
+ "review": body_text,
204
+ "location_and_date": date_text,
205
+ "rating": rating,
206
+ "verified": verified,
207
+ **_data
208
+ }
209
+ reviews.append(review_dict)
210
+ except Exception as e:
211
+ self._logger.error(f"Error parsing a fallback review: {e}")
212
+ return reviews
213
+
214
+ async def _fetch_review_page(self, url: str, cookies: httpx.Cookies) -> str:
215
+ """
216
+ Fetches the review page HTML for a given URL.
217
+ Returns the HTML text.
218
+ """
219
+ try:
220
+ response = await self._get(url=url, cookies=cookies, headers=self.headers)
221
+ if response.status_code != 200:
222
+ raise DataError(f"Failed to fetch reviews page (status code: {response.status_code})")
223
+ return response.text
224
+ except Exception as e:
225
+ raise DataError(f"Failed to fetch reviews page: {e}") from e
226
+
227
+ async def _product_reviews(self, idx, row, cookies, max_pages: int = 5) -> list:
228
+ async with self.semaphore:
229
+ # Prepare payload for the API request
230
+ asin = row['asin']
231
+ reviews = []
232
+ # base_review_url = f"https://www.amazon.com/product-reviews/{asin}/"
233
+ #
234
+ # try:
235
+ # # Try fetching the reviews page
236
+ # html = await self._fetch_review_page(base_review_url, cookies)
237
+ # soup = BeautifulSoup(html, "html.parser")
238
+ # reviews.extend(self._extract_reviews_from_page(soup))
239
+ # self._logger.info(f"Fetched reviews from reviews URL for ASIN {asin}")
240
+ # except DataError as e:
241
+ # # If a redirect (or other error) is detected, log and fall back to the product page.
242
+ # self._logger.warning(
243
+ # f"Direct reviews page fetch failed ({e}); falling back to product page for ASIN {asin}"
244
+ # )
245
+ try:
246
+ url, _, soup = await self._fetch_product_page(asin, cookies=cookies, for_reviews=True)
247
+ reviews.extend(
248
+ self._extract_reviews_from_product_page(url, row, soup)
249
+ )
250
+ except Exception as ee:
251
+ self._logger.error(
252
+ f"Fallback product page review extraction failed: {ee}"
253
+ )
254
+ return []
255
+ self._logger.info(
256
+ f"Fetched {len(reviews)} reviews for ASIN {asin}."
257
+ )
258
+ await asyncio.sleep(random.randint(3, 5))
259
+ return reviews
260
+
261
+ async def reviews(self):
262
+ """reviews.
263
+
264
+ Target Product Reviews.
265
+ """
266
+ httpx_cookies = httpx.Cookies()
267
+ for key, value in self.cookies.items():
268
+ httpx_cookies.set(
269
+ key, value,
270
+ domain='.amazon.com',
271
+ path='/'
272
+ )
273
+
274
+ # Iterate over each row in the DataFrame
275
+ print('starting ...')
276
+ tasks = [
277
+ self._product_reviews(
278
+ idx,
279
+ row,
280
+ httpx_cookies,
281
+ max_pages=2
282
+ ) for idx, row in self.data.iterrows()
283
+ ]
284
+ # Gather results concurrently
285
+ all_reviews_nested = await self._processing_tasks(tasks)
286
+ # Flatten the nested list: one item per review, and add the asin as reference.
287
+ reviews_flat = []
288
+ for idx, review_list in enumerate(all_reviews_nested):
289
+ asin = self.data.iloc[idx]['asin']
290
+ for review in review_list:
291
+ review['asin'] = asin
292
+ reviews_flat.append(review)
293
+
294
+ reviews_df = pd.DataFrame(reviews_flat)
295
+ self._logger.notice(f"Extracted total {len(reviews_df)} reviews.")
296
+
297
+ # at the end, adding a column for origin of reviews:
298
+ reviews_df['origin'] = 'amazon'
299
+ self.data = reviews_df # or store separately
300
+ return self.data
301
+
302
+ def _extract_product_name(self, soup: BeautifulSoup) -> str:
303
+ if title_div := soup.find("div", id="title_feature_div"):
304
+ return title_div.get_text(separator=" ", strip=True)
305
+ return None
306
+
307
+ def _extract_price(self, soup: BeautifulSoup) -> str:
308
+ price_element = soup.select_one("span.a-offscreen")
309
+ return price_element.get_text(strip=True) if price_element else None
310
+
311
+ def _extract_product_description(self, soup: BeautifulSoup) -> str:
312
+ if desc_div := soup.find("div", id="productDescription_feature_div"):
313
+ # Sometimes there is an inner div with id="productDescription"
314
+ if desc_inner := desc_div.find("div", id="productDescription"):
315
+ # Join all paragraph texts into one string
316
+ paragraphs = [p.get_text(separator=" ", strip=True) for p in desc_inner.find_all("p")]
317
+ product_description = "\n".join([p for p in paragraphs if p])
318
+ else:
319
+ product_description = desc_div.get_text(separator=" ", strip=True)
320
+ return product_description
321
+ return None
322
+
323
+ def _extract_rating(self, soup: BeautifulSoup) -> tuple:
324
+ """
325
+ Extract the average rating and review count from an Amazon product page.
326
+
327
+ This function parses the BeautifulSoup object to find and extract the average
328
+ customer rating and the total number of reviews for a product.
329
+
330
+ Args:
331
+ soup (BeautifulSoup): A BeautifulSoup object representing the parsed HTML
332
+ of an Amazon product page.
333
+
334
+ Returns:
335
+ tuple: A tuple containing two elements:
336
+ - review_rating (str or None): The average rating of the product
337
+ (e.g., "4.5 out of 5 stars"), or None if not found.
338
+ - review_count (str or None): The total number of reviews for the
339
+ product (e.g., "1,234"), or None if not found.
340
+ """
341
+ review_rating = None
342
+ review_count = None
343
+ if acr_div := soup.find("div", id="averageCustomerReviews_feature_div"):
344
+ # The star rating is contained inside a span within the "acrPopover"
345
+ if acr_popover := acr_div.find("span", id="acrPopover"):
346
+ if rating_span := acr_popover.find("span", class_="a-color-base"):
347
+ review_rating = rating_span.get_text(strip=True)
348
+ # The review count is extracted from the anchor "acrCustomerReviewLink"
349
+ if review_link := acr_div.find("a", id="acrCustomerReviewLink"):
350
+ if count_span := review_link.find("span", id="acrCustomerReviewText"):
351
+ review_count = count_span.get_text(strip=True).replace('ratings', '').strip()
352
+ return review_rating, review_count
353
+ return None, None
354
+
355
+ def _extract_product_overview(self, soup: BeautifulSoup) -> dict:
356
+ overview = {}
357
+ # Check if the overview container is present
358
+ if overview_container := soup.find("div", id="productOverview_hoc_view_div"):
359
+ # Iterate over each row in the container. Each row is typically a div with class "a-row"
360
+ for row in overview_container.find_all("div", class_="a-row"):
361
+ # Amazon structure: each row contains at least 2 columns.
362
+ columns = row.find_all("div", class_="a-column")
363
+ if len(columns) >= 2:
364
+ # The first column typically contains the label (e.g., "Screen Size")
365
+ label = columns[0].get_text(separator=" ", strip=True)
366
+ # The second column typically contains the value (e.g., "86 Inches")
367
+ value = columns[1].get_text(separator=" ", strip=True)
368
+ if label and value:
369
+ overview[label] = value
370
+ elif overview_div := soup.find("div", id="productOverview_feature_div"):
371
+ if table := overview_div.find("table", {"class": "a-spacing-micro"}):
372
+ for row in table.find_all("tr"):
373
+ th = row.find("th")
374
+ td = row.find("td")
375
+ if th and td:
376
+ key = th.get_text(separator=" ", strip=True)
377
+ value = td.get_text(separator=" ", strip=True)
378
+ overview[key] = value
379
+ return overview
380
+
381
+ def _extract_product_details(self, soup: BeautifulSoup) -> tuple:
382
+ # Extract technical specifications from "productDetails_techSpec_section_1"
383
+ tech_details = {}
384
+ if details_table := soup.find("table", id="productDetails_techSpec_section_1"):
385
+ for tr in details_table.find_all("tr"):
386
+ th = tr.find("th")
387
+ td = tr.find("td")
388
+ if th and td:
389
+ key = th.get_text(separator=" ", strip=True)
390
+ value = td.get_text(separator=" ", strip=True)
391
+ tech_details[key] = value
392
+
393
+ # Extract additional product details from "productDetails_detailBullets_sections1"
394
+ additional_details = {}
395
+ if additional_table := soup.find("table", id="productDetails_detailBullets_sections1"):
396
+ for tr in additional_table.find_all("tr"):
397
+ th = tr.find("th")
398
+ td = tr.find("td")
399
+ if th and td:
400
+ key = th.get_text(separator=" ", strip=True)
401
+ value = td.get_text(separator=" ", strip=True)
402
+ additional_details[key] = value
403
+
404
+ return tech_details, additional_details
405
+
406
+ def _extract_product_info(self, url: str, row: Any, soup: BeautifulSoup) -> dict:
407
+ """
408
+ Extract product information from the Amazon product page.
409
+ Returns a dictionary with:
410
+ - productName: from 'title_feature_div'
411
+ - overview: (e.g., screen size, brand, display technology, resolution, refresh rate)
412
+ - reviewRating: from 'acrPopover' (if available)
413
+ - reviewCount: from 'acrCustomerReviewText' (if available)
414
+ - technicalDetails: from table "productDetails_techSpec_section_1"
415
+ - additionalDetails: from table "productDetails_detailBullets_sections1"
416
+ """
417
+ # Extract product information here
418
+ # Return a dictionary with relevant fields
419
+ # Extract product name from "title_feature_div"
420
+ # --- Product Name ---
421
+ product_name = self._extract_product_name(soup)
422
+ prince = self._extract_price(soup)
423
+
424
+ # Extract review rating and count from "averageCustomerReviews"
425
+ review_rating, review_count = self._extract_rating(soup)
426
+
427
+ # --- Overview (revised for dynamic content) ---
428
+ overview = self._extract_product_overview(soup)
429
+
430
+ # --- Technical Details ---
431
+ tech_details, additional_details = self._extract_product_details(soup)
432
+
433
+ # --- Product Description ---
434
+ product_description = self._extract_product_description(soup)
435
+
436
+ # --- About This Item (feature bullets) ---
437
+ about_this_item = []
438
+ if featurebullets_div := soup.find("div", id="featurebullets_feature_div"):
439
+ if ul := featurebullets_div.find("ul", {"class": "a-unordered-list"}):
440
+ # Extract each bullet text and add to list
441
+ for li in ul.find_all("li"):
442
+ if text := li.get_text(separator=" ", strip=True):
443
+ about_this_item.append(text)
444
+ _data = row.to_dict()
445
+ return {
446
+ "product_name": product_name,
447
+ "price": prince,
448
+ "url": url,
449
+ "about_this_item": about_this_item,
450
+ "rating": review_rating,
451
+ "review_count": review_count,
452
+ "overview": overview,
453
+ "description": product_description,
454
+ "tech_details": tech_details,
455
+ "additional_details": additional_details,
456
+ **_data
457
+ }
458
+
459
+ async def _product_information(self, idx, row, cookies):
460
+ async with self.semaphore:
461
+ # Prepare payload for the API request
462
+ asin = row['asin']
463
+ try:
464
+ # Fetch the product page
465
+ url, html, soup = await self._fetch_product_page(asin, cookies=cookies, for_reviews=False)
466
+ if not html:
467
+ self._logger.warning(
468
+ f"No Product Information found for {asin}."
469
+ )
470
+ return {}
471
+ except (httpx.TimeoutException, httpx.HTTPError) as ex:
472
+ self._logger.warning(f"Request failed: {ex}")
473
+ return []
474
+ except Exception as ex:
475
+ self._logger.error(f"An error occurred: {ex}")
476
+ return []
477
+
478
+ # Extract the product information using BeautifulSoup
479
+ if product_info := self._extract_product_info(url, row, soup):
480
+ return product_info
481
+ raise DataNotFound(
482
+ f"Failed to extract product information for {asin}"
483
+ )
484
+
485
+ async def product_info(self):
486
+ """product_info.
487
+
488
+ Product Information.
489
+ """
490
+ httpx_cookies = httpx.Cookies()
491
+ for key, value in self.cookies.items():
492
+ httpx_cookies.set(
493
+ key, value,
494
+ domain='.amazon.com',
495
+ path='/'
496
+ )
497
+
498
+ # Iterate over each row in the DataFrame
499
+ print('starting ...')
500
+
501
+ tasks = [
502
+ self._product_information(
503
+ idx,
504
+ row,
505
+ httpx_cookies
506
+ ) for idx, row in self.data.iterrows()
507
+ ]
508
+ # Gather results concurrently
509
+ all_products = await self._processing_tasks(tasks)
510
+
511
+ # Convert to DataFrame
512
+ df = pd.DataFrame(all_products)
513
+
514
+ # show the num of rows in final dataframe:
515
+ self._logger.notice(
516
+ f"Ending Product Info: {len(df)}"
517
+ )
518
+
519
+ # Override previous dataframe:
520
+ self.data = df
521
+
522
+ # return existing data
523
+ return self.data