flowtask 5.8.4__cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (470) hide show
  1. flowtask/__init__.py +93 -0
  2. flowtask/__main__.py +38 -0
  3. flowtask/bots/__init__.py +6 -0
  4. flowtask/bots/check.py +93 -0
  5. flowtask/bots/codebot.py +51 -0
  6. flowtask/components/ASPX.py +148 -0
  7. flowtask/components/AddDataset.py +352 -0
  8. flowtask/components/Amazon.py +523 -0
  9. flowtask/components/AutoTask.py +314 -0
  10. flowtask/components/Azure.py +80 -0
  11. flowtask/components/AzureUsers.py +106 -0
  12. flowtask/components/BaseAction.py +91 -0
  13. flowtask/components/BaseLoop.py +198 -0
  14. flowtask/components/BestBuy.py +800 -0
  15. flowtask/components/CSVToGCS.py +120 -0
  16. flowtask/components/CompanyScraper/__init__.py +1 -0
  17. flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
  18. flowtask/components/CompanyScraper/parsers/base.py +102 -0
  19. flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
  20. flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
  21. flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
  22. flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
  23. flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
  24. flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
  25. flowtask/components/CompanyScraper/scrapper.py +1054 -0
  26. flowtask/components/CopyTo.py +177 -0
  27. flowtask/components/CopyToBigQuery.py +243 -0
  28. flowtask/components/CopyToMongoDB.py +291 -0
  29. flowtask/components/CopyToPg.py +609 -0
  30. flowtask/components/CopyToRethink.py +207 -0
  31. flowtask/components/CreateGCSBucket.py +102 -0
  32. flowtask/components/CreateReport/CreateReport.py +228 -0
  33. flowtask/components/CreateReport/__init__.py +9 -0
  34. flowtask/components/CreateReport/charts/__init__.py +15 -0
  35. flowtask/components/CreateReport/charts/bar.py +51 -0
  36. flowtask/components/CreateReport/charts/base.py +66 -0
  37. flowtask/components/CreateReport/charts/pie.py +64 -0
  38. flowtask/components/CreateReport/utils.py +9 -0
  39. flowtask/components/CustomerSatisfaction.py +196 -0
  40. flowtask/components/DataInput.py +200 -0
  41. flowtask/components/DateList.py +255 -0
  42. flowtask/components/DbClient.py +163 -0
  43. flowtask/components/DialPad.py +146 -0
  44. flowtask/components/DocumentDBQuery.py +200 -0
  45. flowtask/components/DownloadFrom.py +371 -0
  46. flowtask/components/DownloadFromD2L.py +113 -0
  47. flowtask/components/DownloadFromFTP.py +181 -0
  48. flowtask/components/DownloadFromIMAP.py +315 -0
  49. flowtask/components/DownloadFromS3.py +198 -0
  50. flowtask/components/DownloadFromSFTP.py +265 -0
  51. flowtask/components/DownloadFromSharepoint.py +110 -0
  52. flowtask/components/DownloadFromSmartSheet.py +114 -0
  53. flowtask/components/DownloadS3File.py +229 -0
  54. flowtask/components/Dummy.py +59 -0
  55. flowtask/components/DuplicatePhoto.py +411 -0
  56. flowtask/components/EmployeeEvaluation.py +237 -0
  57. flowtask/components/ExecuteSQL.py +323 -0
  58. flowtask/components/ExtractHTML.py +178 -0
  59. flowtask/components/FileBase.py +178 -0
  60. flowtask/components/FileCopy.py +181 -0
  61. flowtask/components/FileDelete.py +82 -0
  62. flowtask/components/FileExists.py +146 -0
  63. flowtask/components/FileIteratorDelete.py +112 -0
  64. flowtask/components/FileList.py +194 -0
  65. flowtask/components/FileOpen.py +75 -0
  66. flowtask/components/FileRead.py +120 -0
  67. flowtask/components/FileRename.py +106 -0
  68. flowtask/components/FilterIf.py +284 -0
  69. flowtask/components/FilterRows/FilterRows.py +200 -0
  70. flowtask/components/FilterRows/__init__.py +10 -0
  71. flowtask/components/FilterRows/functions.py +4 -0
  72. flowtask/components/GCSToBigQuery.py +103 -0
  73. flowtask/components/GoogleA4.py +150 -0
  74. flowtask/components/GoogleGeoCoding.py +344 -0
  75. flowtask/components/GooglePlaces.py +315 -0
  76. flowtask/components/GoogleSearch.py +539 -0
  77. flowtask/components/HTTPClient.py +268 -0
  78. flowtask/components/ICIMS.py +146 -0
  79. flowtask/components/IF.py +179 -0
  80. flowtask/components/IcimsFolderCopy.py +173 -0
  81. flowtask/components/ImageFeatures/__init__.py +5 -0
  82. flowtask/components/ImageFeatures/process.py +233 -0
  83. flowtask/components/IteratorBase.py +251 -0
  84. flowtask/components/LangchainLoader/__init__.py +5 -0
  85. flowtask/components/LangchainLoader/loader.py +194 -0
  86. flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
  87. flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
  88. flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
  89. flowtask/components/LangchainLoader/loaders/docx.py +91 -0
  90. flowtask/components/LangchainLoader/loaders/html.py +119 -0
  91. flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
  92. flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
  93. flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
  94. flowtask/components/LangchainLoader/loaders/qa.py +67 -0
  95. flowtask/components/LangchainLoader/loaders/txt.py +55 -0
  96. flowtask/components/LeadIQ.py +650 -0
  97. flowtask/components/Loop.py +253 -0
  98. flowtask/components/Lowes.py +334 -0
  99. flowtask/components/MS365Usage.py +156 -0
  100. flowtask/components/MSTeamsMessages.py +320 -0
  101. flowtask/components/MarketClustering.py +1051 -0
  102. flowtask/components/MergeFiles.py +362 -0
  103. flowtask/components/MilvusOutput.py +87 -0
  104. flowtask/components/NearByStores.py +175 -0
  105. flowtask/components/NetworkNinja/__init__.py +6 -0
  106. flowtask/components/NetworkNinja/models/__init__.py +52 -0
  107. flowtask/components/NetworkNinja/models/abstract.py +177 -0
  108. flowtask/components/NetworkNinja/models/account.py +39 -0
  109. flowtask/components/NetworkNinja/models/client.py +19 -0
  110. flowtask/components/NetworkNinja/models/district.py +14 -0
  111. flowtask/components/NetworkNinja/models/events.py +101 -0
  112. flowtask/components/NetworkNinja/models/forms.py +499 -0
  113. flowtask/components/NetworkNinja/models/market.py +16 -0
  114. flowtask/components/NetworkNinja/models/organization.py +34 -0
  115. flowtask/components/NetworkNinja/models/photos.py +125 -0
  116. flowtask/components/NetworkNinja/models/project.py +44 -0
  117. flowtask/components/NetworkNinja/models/region.py +28 -0
  118. flowtask/components/NetworkNinja/models/store.py +203 -0
  119. flowtask/components/NetworkNinja/models/user.py +151 -0
  120. flowtask/components/NetworkNinja/router.py +854 -0
  121. flowtask/components/Odoo.py +175 -0
  122. flowtask/components/OdooInjector.py +192 -0
  123. flowtask/components/OpenFromXML.py +126 -0
  124. flowtask/components/OpenWeather.py +41 -0
  125. flowtask/components/OpenWithBase.py +616 -0
  126. flowtask/components/OpenWithPandas.py +715 -0
  127. flowtask/components/PGPDecrypt.py +199 -0
  128. flowtask/components/PandasIterator.py +187 -0
  129. flowtask/components/PandasToFile.py +189 -0
  130. flowtask/components/Paradox.py +339 -0
  131. flowtask/components/ParamIterator.py +117 -0
  132. flowtask/components/ParseHTML.py +84 -0
  133. flowtask/components/PlacerStores.py +249 -0
  134. flowtask/components/Pokemon.py +507 -0
  135. flowtask/components/PositiveBot.py +62 -0
  136. flowtask/components/PowerPointSlide.py +400 -0
  137. flowtask/components/PrintMessage.py +127 -0
  138. flowtask/components/ProductCompetitors/__init__.py +5 -0
  139. flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
  140. flowtask/components/ProductCompetitors/parsers/base.py +72 -0
  141. flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
  142. flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
  143. flowtask/components/ProductCompetitors/scrapper.py +155 -0
  144. flowtask/components/ProductCompliant.py +169 -0
  145. flowtask/components/ProductInfo/__init__.py +1 -0
  146. flowtask/components/ProductInfo/parsers/__init__.py +5 -0
  147. flowtask/components/ProductInfo/parsers/base.py +83 -0
  148. flowtask/components/ProductInfo/parsers/brother.py +97 -0
  149. flowtask/components/ProductInfo/parsers/canon.py +167 -0
  150. flowtask/components/ProductInfo/parsers/epson.py +118 -0
  151. flowtask/components/ProductInfo/parsers/hp.py +131 -0
  152. flowtask/components/ProductInfo/parsers/samsung.py +97 -0
  153. flowtask/components/ProductInfo/scraper.py +319 -0
  154. flowtask/components/ProductPricing.py +118 -0
  155. flowtask/components/QS.py +261 -0
  156. flowtask/components/QSBase.py +201 -0
  157. flowtask/components/QueryIterator.py +273 -0
  158. flowtask/components/QueryToInsert.py +327 -0
  159. flowtask/components/QueryToPandas.py +432 -0
  160. flowtask/components/RESTClient.py +195 -0
  161. flowtask/components/RethinkDBQuery.py +189 -0
  162. flowtask/components/Rsync.py +74 -0
  163. flowtask/components/RunSSH.py +59 -0
  164. flowtask/components/RunShell.py +71 -0
  165. flowtask/components/SalesForce.py +20 -0
  166. flowtask/components/SaveImageBank/__init__.py +257 -0
  167. flowtask/components/SchedulingVisits.py +592 -0
  168. flowtask/components/ScrapPage.py +216 -0
  169. flowtask/components/ScrapSearch.py +79 -0
  170. flowtask/components/SendNotify.py +257 -0
  171. flowtask/components/SentimentAnalysis.py +694 -0
  172. flowtask/components/ServiceScrapper/__init__.py +5 -0
  173. flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
  174. flowtask/components/ServiceScrapper/parsers/base.py +94 -0
  175. flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
  176. flowtask/components/ServiceScrapper/scrapper.py +199 -0
  177. flowtask/components/SetVariables.py +156 -0
  178. flowtask/components/SubTask.py +182 -0
  179. flowtask/components/SuiteCRM.py +48 -0
  180. flowtask/components/Switch.py +175 -0
  181. flowtask/components/TableBase.py +148 -0
  182. flowtask/components/TableDelete.py +312 -0
  183. flowtask/components/TableInput.py +143 -0
  184. flowtask/components/TableOutput/TableOutput.py +384 -0
  185. flowtask/components/TableOutput/__init__.py +3 -0
  186. flowtask/components/TableSchema.py +534 -0
  187. flowtask/components/Target.py +223 -0
  188. flowtask/components/ThumbnailGenerator.py +156 -0
  189. flowtask/components/ToPandas.py +67 -0
  190. flowtask/components/TransformRows/TransformRows.py +507 -0
  191. flowtask/components/TransformRows/__init__.py +9 -0
  192. flowtask/components/TransformRows/functions.py +559 -0
  193. flowtask/components/TransposeRows.py +176 -0
  194. flowtask/components/UPCDatabase.py +86 -0
  195. flowtask/components/UnGzip.py +171 -0
  196. flowtask/components/Uncompress.py +172 -0
  197. flowtask/components/UniqueRows.py +126 -0
  198. flowtask/components/Unzip.py +107 -0
  199. flowtask/components/UpdateOperationalVars.py +147 -0
  200. flowtask/components/UploadTo.py +299 -0
  201. flowtask/components/UploadToS3.py +136 -0
  202. flowtask/components/UploadToSFTP.py +160 -0
  203. flowtask/components/UploadToSharepoint.py +205 -0
  204. flowtask/components/UserFunc.py +122 -0
  205. flowtask/components/VivaTracker.py +140 -0
  206. flowtask/components/WSDLClient.py +123 -0
  207. flowtask/components/Wait.py +18 -0
  208. flowtask/components/Walmart.py +199 -0
  209. flowtask/components/Workplace.py +134 -0
  210. flowtask/components/XMLToPandas.py +267 -0
  211. flowtask/components/Zammad/__init__.py +41 -0
  212. flowtask/components/Zammad/models.py +0 -0
  213. flowtask/components/ZoomInfoScraper.py +409 -0
  214. flowtask/components/__init__.py +104 -0
  215. flowtask/components/abstract.py +18 -0
  216. flowtask/components/flow.py +530 -0
  217. flowtask/components/google.py +335 -0
  218. flowtask/components/group.py +221 -0
  219. flowtask/components/py.typed +0 -0
  220. flowtask/components/reviewscrap.py +132 -0
  221. flowtask/components/tAutoincrement.py +117 -0
  222. flowtask/components/tConcat.py +109 -0
  223. flowtask/components/tExplode.py +119 -0
  224. flowtask/components/tFilter.py +184 -0
  225. flowtask/components/tGroup.py +236 -0
  226. flowtask/components/tJoin.py +270 -0
  227. flowtask/components/tMap/__init__.py +9 -0
  228. flowtask/components/tMap/functions.py +54 -0
  229. flowtask/components/tMap/tMap.py +450 -0
  230. flowtask/components/tMelt.py +112 -0
  231. flowtask/components/tMerge.py +114 -0
  232. flowtask/components/tOrder.py +93 -0
  233. flowtask/components/tPandas.py +94 -0
  234. flowtask/components/tPivot.py +71 -0
  235. flowtask/components/tPluckCols.py +76 -0
  236. flowtask/components/tUnnest.py +82 -0
  237. flowtask/components/user.py +401 -0
  238. flowtask/conf.py +457 -0
  239. flowtask/download.py +102 -0
  240. flowtask/events/__init__.py +11 -0
  241. flowtask/events/events/__init__.py +20 -0
  242. flowtask/events/events/abstract.py +95 -0
  243. flowtask/events/events/alerts/__init__.py +362 -0
  244. flowtask/events/events/alerts/colfunctions.py +131 -0
  245. flowtask/events/events/alerts/functions.py +158 -0
  246. flowtask/events/events/dummy.py +12 -0
  247. flowtask/events/events/exec.py +124 -0
  248. flowtask/events/events/file/__init__.py +7 -0
  249. flowtask/events/events/file/base.py +51 -0
  250. flowtask/events/events/file/copy.py +23 -0
  251. flowtask/events/events/file/delete.py +16 -0
  252. flowtask/events/events/interfaces/__init__.py +9 -0
  253. flowtask/events/events/interfaces/client.py +67 -0
  254. flowtask/events/events/interfaces/credentials.py +28 -0
  255. flowtask/events/events/interfaces/notifications.py +58 -0
  256. flowtask/events/events/jira.py +122 -0
  257. flowtask/events/events/log.py +26 -0
  258. flowtask/events/events/logerr.py +52 -0
  259. flowtask/events/events/notify.py +59 -0
  260. flowtask/events/events/notify_event.py +160 -0
  261. flowtask/events/events/publish.py +54 -0
  262. flowtask/events/events/sendfile.py +104 -0
  263. flowtask/events/events/task.py +97 -0
  264. flowtask/events/events/teams.py +98 -0
  265. flowtask/events/events/webhook.py +58 -0
  266. flowtask/events/manager.py +287 -0
  267. flowtask/exceptions.c +39393 -0
  268. flowtask/exceptions.cpython-39-x86_64-linux-gnu.so +0 -0
  269. flowtask/extensions/__init__.py +3 -0
  270. flowtask/extensions/abstract.py +82 -0
  271. flowtask/extensions/logging/__init__.py +65 -0
  272. flowtask/hooks/__init__.py +9 -0
  273. flowtask/hooks/actions/__init__.py +22 -0
  274. flowtask/hooks/actions/abstract.py +66 -0
  275. flowtask/hooks/actions/dummy.py +23 -0
  276. flowtask/hooks/actions/jira.py +74 -0
  277. flowtask/hooks/actions/rest.py +320 -0
  278. flowtask/hooks/actions/sampledata.py +37 -0
  279. flowtask/hooks/actions/sensor.py +23 -0
  280. flowtask/hooks/actions/task.py +9 -0
  281. flowtask/hooks/actions/ticket.py +37 -0
  282. flowtask/hooks/actions/zammad.py +55 -0
  283. flowtask/hooks/hook.py +62 -0
  284. flowtask/hooks/models.py +17 -0
  285. flowtask/hooks/service.py +187 -0
  286. flowtask/hooks/step.py +91 -0
  287. flowtask/hooks/types/__init__.py +23 -0
  288. flowtask/hooks/types/base.py +129 -0
  289. flowtask/hooks/types/brokers/__init__.py +11 -0
  290. flowtask/hooks/types/brokers/base.py +54 -0
  291. flowtask/hooks/types/brokers/mqtt.py +35 -0
  292. flowtask/hooks/types/brokers/rabbitmq.py +82 -0
  293. flowtask/hooks/types/brokers/redis.py +83 -0
  294. flowtask/hooks/types/brokers/sqs.py +44 -0
  295. flowtask/hooks/types/fs.py +232 -0
  296. flowtask/hooks/types/http.py +49 -0
  297. flowtask/hooks/types/imap.py +200 -0
  298. flowtask/hooks/types/jira.py +279 -0
  299. flowtask/hooks/types/mail.py +205 -0
  300. flowtask/hooks/types/postgres.py +98 -0
  301. flowtask/hooks/types/responses/__init__.py +8 -0
  302. flowtask/hooks/types/responses/base.py +5 -0
  303. flowtask/hooks/types/sharepoint.py +288 -0
  304. flowtask/hooks/types/ssh.py +141 -0
  305. flowtask/hooks/types/tagged.py +59 -0
  306. flowtask/hooks/types/upload.py +85 -0
  307. flowtask/hooks/types/watch.py +71 -0
  308. flowtask/hooks/types/web.py +36 -0
  309. flowtask/interfaces/AzureClient.py +137 -0
  310. flowtask/interfaces/AzureGraph.py +839 -0
  311. flowtask/interfaces/Boto3Client.py +326 -0
  312. flowtask/interfaces/DropboxClient.py +173 -0
  313. flowtask/interfaces/ExcelHandler.py +94 -0
  314. flowtask/interfaces/FTPClient.py +131 -0
  315. flowtask/interfaces/GoogleCalendar.py +201 -0
  316. flowtask/interfaces/GoogleClient.py +133 -0
  317. flowtask/interfaces/GoogleDrive.py +127 -0
  318. flowtask/interfaces/GoogleGCS.py +89 -0
  319. flowtask/interfaces/GoogleGeocoding.py +93 -0
  320. flowtask/interfaces/GoogleLang.py +114 -0
  321. flowtask/interfaces/GooglePub.py +61 -0
  322. flowtask/interfaces/GoogleSheet.py +68 -0
  323. flowtask/interfaces/IMAPClient.py +137 -0
  324. flowtask/interfaces/O365Calendar.py +113 -0
  325. flowtask/interfaces/O365Client.py +220 -0
  326. flowtask/interfaces/OneDrive.py +284 -0
  327. flowtask/interfaces/Outlook.py +155 -0
  328. flowtask/interfaces/ParrotBot.py +130 -0
  329. flowtask/interfaces/SSHClient.py +378 -0
  330. flowtask/interfaces/Sharepoint.py +496 -0
  331. flowtask/interfaces/__init__.py +36 -0
  332. flowtask/interfaces/azureauth.py +119 -0
  333. flowtask/interfaces/cache.py +201 -0
  334. flowtask/interfaces/client.py +82 -0
  335. flowtask/interfaces/compress.py +525 -0
  336. flowtask/interfaces/credentials.py +124 -0
  337. flowtask/interfaces/d2l.py +239 -0
  338. flowtask/interfaces/databases/__init__.py +5 -0
  339. flowtask/interfaces/databases/db.py +223 -0
  340. flowtask/interfaces/databases/documentdb.py +55 -0
  341. flowtask/interfaces/databases/rethink.py +39 -0
  342. flowtask/interfaces/dataframes/__init__.py +11 -0
  343. flowtask/interfaces/dataframes/abstract.py +21 -0
  344. flowtask/interfaces/dataframes/arrow.py +71 -0
  345. flowtask/interfaces/dataframes/dt.py +69 -0
  346. flowtask/interfaces/dataframes/pandas.py +167 -0
  347. flowtask/interfaces/dataframes/polars.py +60 -0
  348. flowtask/interfaces/db.py +263 -0
  349. flowtask/interfaces/env.py +46 -0
  350. flowtask/interfaces/func.py +137 -0
  351. flowtask/interfaces/http.py +1780 -0
  352. flowtask/interfaces/locale.py +40 -0
  353. flowtask/interfaces/log.py +75 -0
  354. flowtask/interfaces/mask.py +143 -0
  355. flowtask/interfaces/notification.py +154 -0
  356. flowtask/interfaces/playwright.py +339 -0
  357. flowtask/interfaces/powerpoint.py +368 -0
  358. flowtask/interfaces/py.typed +0 -0
  359. flowtask/interfaces/qs.py +376 -0
  360. flowtask/interfaces/result.py +87 -0
  361. flowtask/interfaces/selenium_service.py +779 -0
  362. flowtask/interfaces/smartsheet.py +154 -0
  363. flowtask/interfaces/stat.py +39 -0
  364. flowtask/interfaces/task.py +96 -0
  365. flowtask/interfaces/template.py +118 -0
  366. flowtask/interfaces/vectorstores/__init__.py +1 -0
  367. flowtask/interfaces/vectorstores/abstract.py +133 -0
  368. flowtask/interfaces/vectorstores/milvus.py +669 -0
  369. flowtask/interfaces/zammad.py +107 -0
  370. flowtask/models.py +193 -0
  371. flowtask/parsers/__init__.py +15 -0
  372. flowtask/parsers/_yaml.c +11978 -0
  373. flowtask/parsers/_yaml.cpython-39-x86_64-linux-gnu.so +0 -0
  374. flowtask/parsers/argparser.py +235 -0
  375. flowtask/parsers/base.c +15155 -0
  376. flowtask/parsers/base.cpython-39-x86_64-linux-gnu.so +0 -0
  377. flowtask/parsers/json.c +11968 -0
  378. flowtask/parsers/json.cpython-39-x86_64-linux-gnu.so +0 -0
  379. flowtask/parsers/maps.py +49 -0
  380. flowtask/parsers/toml.c +11968 -0
  381. flowtask/parsers/toml.cpython-39-x86_64-linux-gnu.so +0 -0
  382. flowtask/plugins/__init__.py +16 -0
  383. flowtask/plugins/components/__init__.py +0 -0
  384. flowtask/plugins/handler/__init__.py +45 -0
  385. flowtask/plugins/importer.py +31 -0
  386. flowtask/plugins/sources/__init__.py +0 -0
  387. flowtask/runner.py +283 -0
  388. flowtask/scheduler/__init__.py +9 -0
  389. flowtask/scheduler/functions.py +493 -0
  390. flowtask/scheduler/handlers/__init__.py +8 -0
  391. flowtask/scheduler/handlers/manager.py +504 -0
  392. flowtask/scheduler/handlers/models.py +58 -0
  393. flowtask/scheduler/handlers/service.py +72 -0
  394. flowtask/scheduler/notifications.py +65 -0
  395. flowtask/scheduler/scheduler.py +993 -0
  396. flowtask/services/__init__.py +0 -0
  397. flowtask/services/bots/__init__.py +0 -0
  398. flowtask/services/bots/telegram.py +264 -0
  399. flowtask/services/files/__init__.py +11 -0
  400. flowtask/services/files/manager.py +522 -0
  401. flowtask/services/files/model.py +37 -0
  402. flowtask/services/files/service.py +767 -0
  403. flowtask/services/jira/__init__.py +3 -0
  404. flowtask/services/jira/jira_actions.py +191 -0
  405. flowtask/services/tasks/__init__.py +13 -0
  406. flowtask/services/tasks/launcher.py +213 -0
  407. flowtask/services/tasks/manager.py +323 -0
  408. flowtask/services/tasks/service.py +275 -0
  409. flowtask/services/tasks/task_manager.py +376 -0
  410. flowtask/services/tasks/tasks.py +155 -0
  411. flowtask/storages/__init__.py +16 -0
  412. flowtask/storages/exceptions.py +12 -0
  413. flowtask/storages/files/__init__.py +8 -0
  414. flowtask/storages/files/abstract.py +29 -0
  415. flowtask/storages/files/filesystem.py +66 -0
  416. flowtask/storages/tasks/__init__.py +19 -0
  417. flowtask/storages/tasks/abstract.py +26 -0
  418. flowtask/storages/tasks/database.py +33 -0
  419. flowtask/storages/tasks/filesystem.py +108 -0
  420. flowtask/storages/tasks/github.py +119 -0
  421. flowtask/storages/tasks/memory.py +45 -0
  422. flowtask/storages/tasks/row.py +25 -0
  423. flowtask/tasks/__init__.py +0 -0
  424. flowtask/tasks/abstract.py +526 -0
  425. flowtask/tasks/command.py +118 -0
  426. flowtask/tasks/pile.py +486 -0
  427. flowtask/tasks/py.typed +0 -0
  428. flowtask/tasks/task.py +778 -0
  429. flowtask/template/__init__.py +161 -0
  430. flowtask/tests.py +257 -0
  431. flowtask/types/__init__.py +8 -0
  432. flowtask/types/typedefs.c +11347 -0
  433. flowtask/types/typedefs.cpython-39-x86_64-linux-gnu.so +0 -0
  434. flowtask/utils/__init__.py +24 -0
  435. flowtask/utils/constants.py +117 -0
  436. flowtask/utils/encoders.py +21 -0
  437. flowtask/utils/executor.py +112 -0
  438. flowtask/utils/functions.cpp +14280 -0
  439. flowtask/utils/functions.cpython-39-x86_64-linux-gnu.so +0 -0
  440. flowtask/utils/json.cpp +13349 -0
  441. flowtask/utils/json.cpython-39-x86_64-linux-gnu.so +0 -0
  442. flowtask/utils/mail.py +63 -0
  443. flowtask/utils/parseqs.c +13324 -0
  444. flowtask/utils/parserqs.cpython-39-x86_64-linux-gnu.so +0 -0
  445. flowtask/utils/stats.py +308 -0
  446. flowtask/utils/transformations.py +74 -0
  447. flowtask/utils/uv.py +12 -0
  448. flowtask/utils/validators.py +97 -0
  449. flowtask/version.py +11 -0
  450. flowtask-5.8.4.dist-info/LICENSE +201 -0
  451. flowtask-5.8.4.dist-info/METADATA +209 -0
  452. flowtask-5.8.4.dist-info/RECORD +470 -0
  453. flowtask-5.8.4.dist-info/WHEEL +6 -0
  454. flowtask-5.8.4.dist-info/entry_points.txt +3 -0
  455. flowtask-5.8.4.dist-info/top_level.txt +2 -0
  456. plugins/components/CreateQR.py +39 -0
  457. plugins/components/TestComponent.py +28 -0
  458. plugins/components/Use1.py +13 -0
  459. plugins/components/Workplace.py +117 -0
  460. plugins/components/__init__.py +3 -0
  461. plugins/sources/__init__.py +0 -0
  462. plugins/sources/get_populartimes.py +78 -0
  463. plugins/sources/google.py +150 -0
  464. plugins/sources/hubspot.py +679 -0
  465. plugins/sources/icims.py +679 -0
  466. plugins/sources/mobileinsight.py +501 -0
  467. plugins/sources/newrelic.py +262 -0
  468. plugins/sources/uap.py +268 -0
  469. plugins/sources/venu.py +244 -0
  470. plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,800 @@
1
+ import asyncio
2
+ import aiohttp
3
+ from typing import Any, Dict
4
+ from collections.abc import Callable
5
+ import random
6
+ from concurrent.futures import ThreadPoolExecutor
7
+ from bs4 import BeautifulSoup
8
+ import httpx
9
+ import pandas as pd
10
+ import backoff
11
+ import ssl
12
+ from navconfig.logging import logging
13
+ # Internals
14
+ from ..exceptions import (
15
+ ComponentError,
16
+ DataNotFound,
17
+ NotSupported,
18
+ ConfigError
19
+ )
20
+ from .flow import FlowComponent
21
+ from ..interfaces import HTTPService, SeleniumService
22
+ from ..interfaces.http import ua
23
+
24
+
25
+ logging.getLogger(name='selenium.webdriver').setLevel(logging.WARNING)
26
+ logging.getLogger(name='WDM').setLevel(logging.WARNING)
27
+ logging.getLogger(name='hpack').setLevel(logging.WARNING)
28
+ logging.getLogger(name='seleniumwire').setLevel(logging.WARNING)
29
+
30
+
31
+ ProductPayload = {
32
+ "locationId": None,
33
+ "zipCode": None,
34
+ "showOnShelf": True,
35
+ "lookupInStoreQuantity": True,
36
+ "xboxAllAccess": False,
37
+ "consolidated": True,
38
+ "showOnlyOnShelf": False,
39
+ "showInStore": True,
40
+ "pickupTypes": [
41
+ "UPS_ACCESS_POINT",
42
+ "FEDEX_HAL"
43
+ ],
44
+ "onlyBestBuyLocations": True,
45
+ "items": [
46
+ {
47
+ "sku": None,
48
+ "condition": None,
49
+ "quantity": 1,
50
+ "itemSeqNumber": "1",
51
+ "reservationToken": None,
52
+ "selectedServices": [],
53
+ "requiredAccessories": [],
54
+ "isTradeIn": False,
55
+ "isLeased": False
56
+ }
57
+ ]
58
+ }
59
+
60
+
61
+ def bad_gateway_exception(exc):
62
+ """Check if the exception is a 502 Bad Gateway error."""
63
+ return isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code == 502
64
+
65
+
66
+ class BestBuy(FlowComponent, SeleniumService, HTTPService):
67
+ """BestBuy.
68
+
69
+ Combining API Key and Web Scrapping, this component will be able to extract
70
+ Best Buy Information (stores, products, Product Availability, etc).
71
+
72
+
73
+ Example:
74
+
75
+ ```yaml
76
+ BestBuy:
77
+ type: availability
78
+ product_info: false
79
+ brand: Bose
80
+ ```
81
+
82
+ """
83
+ def __init__(
84
+ self,
85
+ loop: asyncio.AbstractEventLoop = None,
86
+ job: Callable = None,
87
+ stat: Callable = None,
88
+ **kwargs,
89
+ ):
90
+ self._fn = kwargs.pop('type', None)
91
+ self.chunk_size: int = kwargs.get('chunk_size', 100)
92
+ self.task_parts: int = kwargs.get('task_parts', 10)
93
+ self.product_info: bool = kwargs.get('product_info', False)
94
+ if not self._fn:
95
+ raise ConfigError(
96
+ "BestBuy: require a `type` Function to be called, ex: availability"
97
+ )
98
+ super(BestBuy, self).__init__(
99
+ loop=loop,
100
+ job=job,
101
+ stat=stat,
102
+ **kwargs
103
+ )
104
+ # Always use proxies:
105
+ self.use_proxy: bool = True
106
+ self._free_proxy: bool = False
107
+ ctt_list: list = [
108
+ "f3dbf688e45146555bb2b8604a993601",
109
+ "06f4dfe367e87866397ef32302f5042e",
110
+ "4e07e03ff03f5debc4e09ac4db9239ac"
111
+ ]
112
+ sid_list: list = [
113
+ "d4fa1142-2998-4b68-af78-46d821bb3e1f",
114
+ "9627390e-b423-459f-83ee-7964dd05c9a8"
115
+ ]
116
+ self.cookies = {
117
+ # "CTT": ,
118
+ "CTT": random.choice(ctt_list),
119
+ "SID": random.choice(sid_list),
120
+ "bby_rdp": "l",
121
+ "bm_sz": "9F5ED0110AF18594E2347A89BB4AB998~YAAQxm1lX6EqYHGSAQAAw+apmhkhXIeGYEc4KnzUMsjeac3xEoQmTNz5+of62i3RXQL6fUI+0FvCb/jgSjiVQOcfaSF+LdLkOXP1F4urgeIcqp/dBAhu5MvZXaCQsT06bwr7j21ozhFfTTWhjz1HmZN8wecsE6WGbK6wXp/33ODKlLaGWkTutqHbkzvMiiHXBCs9hT8jVny0REfita4AfqTK85Y6/M6Uq4IaDLPBLnTtJ0cTlPHk1HmkG5EsnI46llghcx1KZnCGnvZfHdb2ME9YZJ2GmC2b7dNmAgyL/gSVpoNdCJOj5Jk6z/MCVhZ81OZfX4S01E2F1mBGq4uV5/1oK2KR4YgZP4dsTN8izEEPybUKGY3CyM1gOUc=~3556420~4277810", # noqa
122
+ "bby_cbc_lb": "p-browse-e",
123
+ "intl_splash": "false"
124
+ }
125
+ self.headers: dict = {
126
+ "Host": "www.bestbuy.com",
127
+ "Referer": "https://www.bestbuy.com/",
128
+ "X-Requested-With": "XMLHttpRequest",
129
+ "TE": "trailers",
130
+ "Accept-Language": "en-US,en;q=0.5",
131
+ # "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", # noqa
132
+ "Accept-Encoding": "gzip, deflate",
133
+ "DNT": "1",
134
+ "Connection": "keep-alive",
135
+ "Upgrade-Insecure-Requests": "1",
136
+ "User-Agent": random.choice(ua),
137
+ **self.headers
138
+ }
139
+ self.semaphore = asyncio.Semaphore(10)
140
+
141
+ async def close(self, **kwargs) -> bool:
142
+ self.close_driver()
143
+ return True
144
+
145
+ async def start(self, **kwargs) -> bool:
146
+ await super(BestBuy, self).start(**kwargs)
147
+ if self.previous:
148
+ self.data = self.input
149
+ if not isinstance(self.data, pd.DataFrame):
150
+ raise ComponentError(
151
+ "Incompatible Pandas Dataframe"
152
+ )
153
+ #else:
154
+ # raise DataNotFound(
155
+ # "Data Not Found",
156
+ # status=404
157
+ # )
158
+ self.api_token = self.get_env_value(self.api_token) if hasattr(self, 'api_token') else self.get_env_value('BEST_BUY_API_KEY')
159
+ # if self._fn == 'availability':
160
+ # if not hasattr(self, 'brand'):
161
+ # raise ConfigError(
162
+ # "BestBuy: A Brand is required for using Product Availability"
163
+ # )
164
+ if not hasattr(self, self._fn):
165
+ raise ConfigError(
166
+ f"BestBuy: Unable to found Function {self._fn} in BBY Component."
167
+ )
168
+
169
+ def _get_search_url(self, brand: str, sku: str) -> str:
170
+ front_url = "https://www.bestbuy.com/site/searchpage.jsp?cp="
171
+ middle_url = "&searchType=search&st="
172
+ page_count = 1
173
+ # TODO: Get the Brand and Model from the Component.
174
+ search_term = f'{sku}'
175
+ end_url = "&_dyncharset=UTF-8&id=pcat17071&type=page&sc=Global&nrp=&sp=&qp=&list=n&af=true&iht=y&usc=All%20Categories&ks=960&keys=keys" # noqa
176
+ url = front_url + str(page_count) + middle_url + search_term + end_url
177
+ print('SEARCH URL: ', url)
178
+ return url
179
+
180
+ async def _extract_product_info(self, product_element):
181
+ """Extract product information from a specific product element"""
182
+ try:
183
+ # Get the SKU from data-testid attribute (primary method)
184
+ sku_id = product_element.get("data-testid")
185
+
186
+ # Fallback: Try to find SKU in the attribute div
187
+ if not sku_id:
188
+ sku_element = product_element.select_one('div.attribute:-soup-contains("SKU") span.value')
189
+ if sku_element:
190
+ sku_id = sku_element.text.strip()
191
+
192
+ # Get product title through the product link
193
+ title_element = product_element.select_one('.product-list-item-title a, h4.sku-title a, a.product-list-item-link')
194
+ if not title_element:
195
+ title_element = product_element.select_one('a.product-list-item-link')
196
+
197
+ if not title_element:
198
+ return None
199
+
200
+ title = title_element.text.strip()
201
+
202
+ # Find price
203
+ price_element = product_element.select_one('div[data-testid="medium-customer-price"], div.customer-price.medium, .priceView-customer-price span, .price-block span')
204
+ price = price_element.text.strip() if price_element else "N/A"
205
+
206
+ # Image
207
+ image_element = product_element.select_one('img.product-image, img')
208
+ image = image_element['src'] if image_element and 'src' in image_element.attrs else None
209
+
210
+ # URL
211
+ url_element = title_element
212
+ url = "{url}".format(
213
+ url=url_element['href']
214
+ ) if url_element and 'href' in url_element.attrs else None
215
+
216
+ self._logger.notice(f':: Product URL: {url}')
217
+
218
+ # Get model
219
+ model_element = product_element.select_one('div.attribute:-soup-contains("Model") span.value')
220
+ model_value = model_element.text.strip() if model_element else self.model
221
+
222
+ return {
223
+ "sku": sku_id,
224
+ "brand": self.brand,
225
+ "product_name": title,
226
+ "image_url": image,
227
+ "price": price,
228
+ "url": url
229
+ }
230
+ except Exception as e:
231
+ self._logger.error(f"Error extracting product info: {e}")
232
+ return None
233
+
234
+ async def _product_info(self, idx, row):
235
+ async with self.semaphore:
236
+ # Extract model, brand and SKU from the row
237
+ model = row['model']
238
+ brand = row['brand']
239
+ sku = row['sku'] if 'sku' in row else None
240
+
241
+ # Configure these values for searching
242
+ self.brand = brand
243
+ self.sku = sku
244
+ self.model = model
245
+
246
+ try:
247
+ # Generate the search URL
248
+ url = self._get_search_url(brand, model)
249
+
250
+ # Initialize Selenium driver if it doesn't exist
251
+ if not self._driver:
252
+ await self.get_driver()
253
+
254
+ # Navigate to the URL
255
+ await self.get_page(url)
256
+
257
+ # Initially assume we won't find the product
258
+ self.data.loc[idx, 'enabled'] = False
259
+
260
+ # Execute progressive scroll with longer pauses to ensure dynamic content loads
261
+ self._execute_scroll(scroll_pause_time=4.0, max_scrolls=10)
262
+
263
+ # Add additional wait after scroll to ensure complete load
264
+ await asyncio.sleep(3) # Additional wait after completing all scrolls
265
+
266
+ # Get the HTML content of the page after scrolling
267
+ page_content = self._driver.page_source
268
+
269
+ # Parse the HTML content
270
+ soup = BeautifulSoup(page_content, 'html.parser')
271
+
272
+ # Find all product items - using the correct class from the HTML analysis
273
+ product_items = soup.find_all('li', {'class': ['product-list-item']})
274
+ if not product_items:
275
+ # Fallback to other possible class names
276
+ product_items = soup.find_all('li', {'class': ['sku-item']})
277
+
278
+ # Debug: Print how many products were found
279
+ print(f"Found {len(product_items)} products in search results")
280
+ print(f"Searching for: Model={self.model}, SKU={self.sku}")
281
+
282
+ # Process each product item
283
+ for item in product_items:
284
+ try:
285
+ # Get the SKU from data-testid
286
+ sku_id = item.get("data-testid")
287
+
288
+ # Try to get the model
289
+ model_element = item.select_one('div.attribute:-soup-contains("Model") span.value')
290
+ model_value = model_element.text.strip() if model_element else None
291
+
292
+ # Debug information
293
+ if model_value or sku_id:
294
+ print(f"Found product: SKU={sku_id}, Model={model_value}")
295
+ if self.model:
296
+ print(f"Comparing with our Model={self.model}")
297
+ if self.sku:
298
+ print(f"Comparing with our SKU={self.sku}")
299
+
300
+ # Check if either SKU or model matches
301
+ sku_match = self.sku and sku_id == self.sku
302
+ model_match = False
303
+ if model_value and self.model:
304
+ # More flexible comparison
305
+ model_match = (
306
+ model_value.strip() == self.model.strip() or
307
+ model_value.lower() == self.model.lower() or
308
+ model_value.replace(" ", "") == self.model.replace(" ", "") or
309
+ self.model in model_value or
310
+ model_value in self.model
311
+ )
312
+
313
+ # If we have a match, extract product information
314
+ if sku_match or model_match:
315
+ self._logger.info(f"Found matching product - SKU: {sku_id}, Model: {model_value}")
316
+
317
+ # Extract product information from this element
318
+ product_info = await self._extract_product_info(item)
319
+
320
+ if product_info:
321
+ # Update the DataFrame with product information
322
+ for key, value in product_info.items():
323
+ if key in self.data.columns:
324
+ self.data.loc[idx, key] = value
325
+ else:
326
+ self.data.at[idx, key] = value
327
+
328
+ # Mark as enabled since we found a match
329
+ self.data.loc[idx, 'enabled'] = True
330
+
331
+ # We found our product, no need to continue
332
+ return row
333
+ except Exception as e:
334
+ print(f"Error processing product: {e}")
335
+
336
+ # If we got here, no match was found
337
+ self._logger.warning(f"No matching product found for {brand} {model} / {sku}")
338
+ return row
339
+
340
+ except Exception as exc:
341
+ self._logger.error(f"Error during product search for {brand} {model}: {exc}")
342
+ return row
343
+
344
+ def chunkify(self, lst, n):
345
+ """Split list lst into chunks of size n."""
346
+ for i in range(0, len(lst), n):
347
+ yield lst[i:i + n]
348
+
349
+ @backoff.on_exception(
350
+ backoff.expo,
351
+ (httpx.ConnectTimeout, httpx.HTTPStatusError),
352
+ max_tries=2,
353
+ giveup=lambda e: not bad_gateway_exception(e) and not isinstance(e, httpx.ConnectTimeout)
354
+ )
355
+ async def _check_store_availability(self, idx, row, cookies):
356
+ async with self.semaphore:
357
+ # Prepare payload for the API request
358
+ zipcode = row['zipcode']
359
+ location_code = str(row['location_code'])
360
+ sku = row['sku']
361
+ brand = row['brand']
362
+ payload = ProductPayload.copy()
363
+ payload["locationId"] = location_code
364
+ payload["zipCode"] = zipcode
365
+ for item in payload["items"]:
366
+ item["sku"] = sku
367
+
368
+ # checking if this current store is already marked as checked:
369
+ matching_store = self.data[
370
+ (self.data['location_code'] == location_code) & (self.data['sku'] == sku)
371
+ ]
372
+ if not matching_store.empty and matching_store.iloc[0]['checked'] is True:
373
+ # exit without making any HTTP call.
374
+ return row
375
+ try:
376
+ result = await self.api_post(
377
+ url="https://www.bestbuy.com/productfulfillment/c/api/2.0/storeAvailability",
378
+ cookies=cookies,
379
+ payload=payload
380
+ )
381
+ self._num_iterations += 1
382
+ except (httpx.TimeoutException, httpx.HTTPError) as ex:
383
+ self._logger.warning(f"Request failed: {ex}")
384
+ return row
385
+ except Exception as ex:
386
+ self._logger.error(f"An error occurred: {ex}")
387
+ return row
388
+
389
+ if not result:
390
+ self._logger.warning(
391
+ f"No availability data found for {sku} at zipcode {zipcode}"
392
+ )
393
+ return row
394
+
395
+ # Extract the availability data from the API response
396
+ items = result.get('ispu', {}).get('items', [])
397
+ for item in items:
398
+ locations = item.get('locations', [])
399
+ for location in locations:
400
+ self.data.loc[idx, 'enabled'] = False if result.get('consolidatedButtonState', {}).get('buttonState', '') == 'NOT_AVAILABLE' else True
401
+ lid = location.get('locationId')
402
+ # Find matching store and SKU in DataFrame
403
+ matching_store = self.data[
404
+ (self.data['location_code'] == lid) & (self.data['sku'] == sku)
405
+ ]
406
+ if not matching_store.empty:
407
+ idx = matching_store.index[0]
408
+ if self.data.loc[idx, 'checked'] is True:
409
+ print('Already checked, continue ...')
410
+ continue # Skip this row if it's already marked as checked
411
+ if 'availability' not in location:
412
+ self.data.loc[idx, 'locationId'] = lid
413
+ self.data.loc[idx, 'checked'] = True
414
+ continue # This store doesn't have availability
415
+ print(f'Found matching store {lid} for sku {sku}')
416
+
417
+ # Update the DataFrame row with new availability data
418
+ self.data.loc[idx, ['brand', 'location_data']] = [brand, location]
419
+ for key, val in location.items():
420
+ if key in self.data.columns:
421
+ self.data.at[idx, key] = val
422
+ else:
423
+ self.data.at[idx, key] = None
424
+ if key == 'inStoreAvailability':
425
+ try:
426
+ self.data.loc[idx, 'availableInStoreQuantity'] = val.get(
427
+ 'availableInStoreQuantity', 0
428
+ )
429
+ except KeyError:
430
+ self.data.loc[idx, 'availableInStoreQuantity'] = None
431
+ # Mark the row as checked
432
+ self.data.loc[idx, 'checked'] = True
433
+ return row
434
+
435
+ def column_exists(self, column: str, default_val: Any = None):
436
+ if column not in self.data.columns:
437
+ self._logger.warning(
438
+ f"Column {column} does not exist in the Dataframe"
439
+ )
440
+ self.data[column] = default_val
441
+ return False
442
+ return True
443
+
444
+ async def availability(self):
445
+ """availability.
446
+
447
+ Best Buy Product Availability.
448
+ """
449
+ httpx_cookies = httpx.Cookies()
450
+ for key, value in self.cookies.items():
451
+ httpx_cookies.set(
452
+ key, value,
453
+ domain='.bestbuy.com',
454
+ path='/'
455
+ )
456
+
457
+ # define the columns returned:
458
+ self.column_exists('brand')
459
+ self.column_exists('location_data')
460
+ self.column_exists('locationId')
461
+ self.column_exists('availability')
462
+ self.column_exists('inStoreAvailability')
463
+ self.column_exists('onShelfDisplay', False)
464
+ self.column_exists('availableInStoreQuantity', 0)
465
+ self.column_exists('enabled', False)
466
+
467
+ # With available cookies, iterate over dataframe for stores:
468
+ self.data['checked'] = False # Add 'checked' flag column
469
+
470
+ # Iterate over each row in the DataFrame
471
+ print('starting ...')
472
+
473
+ tasks = [
474
+ self._check_store_availability(
475
+ idx,
476
+ row,
477
+ httpx_cookies
478
+ ) for idx, row in self.data.iterrows()
479
+ ]
480
+
481
+ self._num_iterations = 0
482
+ await self._processing_tasks(tasks)
483
+
484
+ self.add_metric('NUM_HTTP_CALLS', self._num_iterations)
485
+
486
+ # show the num of rows in final dataframe:
487
+ self._logger.notice(
488
+ "Ending Checking Availability."
489
+ )
490
+
491
+ # return existing data
492
+ return self.data
493
+
494
+ async def products(self):
495
+ """
496
+ Fetch all products from the Best Buy API by paginating through all pages.
497
+
498
+ Returns:
499
+ list: A combined list of all products from all pages.
500
+ """
501
+ all_products = []
502
+ current_page = 1
503
+ total_pages = None
504
+ show = 'sku,upc,modelNumber,name,manufacturer,type,salePrice,url,productTemplate,classId,class,subclassId,subclass,department,image,longDescription,customerReviewCount,customerReviewAverage'
505
+ self._num_iterations = 0
506
+ try:
507
+ while True:
508
+ url = f"https://api.bestbuy.com/v1/products?page={current_page}&pageSize=100&apiKey={self.api_token}&show={show}&format=json"
509
+ async with aiohttp.ClientSession() as session:
510
+ async with session.get(url) as result:
511
+ response = await result.json()
512
+ #response = await self.api_get(url, httpx_cookies)
513
+ self._num_iterations += 1
514
+
515
+ # Extract products from the response
516
+ products = response.get("products", [])
517
+ if len(products) == 0:
518
+ continue
519
+ all_products.extend(products)
520
+ #all_products += products
521
+
522
+ # Pagination control
523
+ current_page = response.get("currentPage", current_page)
524
+ total_pages = response.get("totalPages", current_page) if total_pages is None else total_pages
525
+ self._logger.debug(f"{url}\n Current Page: {current_page}, Total Pages: {total_pages}, Products: {len(all_products)}")
526
+
527
+ # Break if we've processed all pages
528
+ if current_page >= total_pages: # or current_page == 3:
529
+ break
530
+
531
+ # Increment page for the next request
532
+ current_page += 1
533
+
534
+ self.add_metric('NUM_HTTP_CALLS', self._num_iterations)
535
+ return pd.DataFrame(all_products)
536
+
537
+ except Exception as exc:
538
+ self._logger.error(f"Error while fetching products: {exc}")
539
+ return []
540
+
541
+ async def stores(self):
542
+ """
543
+ Fetch all stores from the Best Buy API by paginating through all pages.
544
+
545
+ Returns:
546
+ list: A combined list of all stores from all pages.
547
+ """
548
+ all_stores = []
549
+ current_page = 1
550
+ total_pages = None
551
+ self._num_iterations = 0
552
+ try:
553
+ while True:
554
+ url = f"https://api.bestbuy.com/v1/stores?page={current_page}&pageSize=100&apiKey={self.api_token}&format=json"
555
+ async with aiohttp.ClientSession() as session:
556
+ async with session.get(url) as result:
557
+ response = await result.json()
558
+ self._num_iterations += 1
559
+
560
+ # Extract stores from the response
561
+ stores = response.get("stores", [])
562
+ if len(stores) == 0:
563
+ continue
564
+ all_stores.extend(stores)
565
+ # Pagination control
566
+ current_page = response.get("currentPage", current_page)
567
+ total_pages = response.get("totalPages", current_page) if total_pages is None else total_pages
568
+ self._logger.debug(f"{url}\n Current Page: {current_page}, Total Pages: {total_pages}, Stores: {len(all_stores)}")
569
+
570
+ # Break if we've processed all pages
571
+ if current_page >= total_pages:
572
+ break
573
+
574
+ # Increment page for the next request
575
+ current_page += 1
576
+
577
+ self.add_metric('NUM_HTTP_CALLS', self._num_iterations)
578
+ return pd.DataFrame(all_stores)
579
+
580
+ except Exception as exc:
581
+ self._logger.error(f"Error while fetching stores: {exc}")
582
+ return []
583
+
584
+ @backoff.on_exception(
585
+ backoff.expo,
586
+ (httpx.TimeoutException, httpx.ConnectTimeout, httpx.HTTPStatusError),
587
+ max_tries=3,
588
+ jitter=backoff.full_jitter,
589
+ giveup=lambda e: not bad_gateway_exception(e) and not isinstance(e, httpx.ConnectTimeout)
590
+ )
591
+ async def _product_reviews(self, idx, row, cookies):
592
+ async with self.semaphore:
593
+ # Prepare payload for the API request
594
+ sku = row['sku']
595
+ pagesize = 20
596
+ max_pages = 20 # Maximum number of pages to fetch
597
+ current_page = 1
598
+ all_reviews = []
599
+ total_reviews = 0
600
+ try:
601
+ while current_page <= max_pages:
602
+ payload = {
603
+ "page": current_page,
604
+ "pageSize": pagesize,
605
+ "sort": "MOST_RECENT",
606
+ # "variant": "A",
607
+ # "verifiedPurchaseOnly": "true",
608
+ "sku": sku
609
+ }
610
+ result = await self.api_get(
611
+ url="https://www.bestbuy.com/ugc/v2/reviews",
612
+ cookies=cookies,
613
+ params=payload
614
+ )
615
+ await asyncio.sleep(0.1)
616
+ total_reviews = result.get('totalResults', 0)
617
+ if not result:
618
+ self._logger.warning(
619
+ f"No Product Reviews found for {sku}."
620
+ )
621
+ break
622
+ # Extract the reviews data from the API response
623
+ items = result.get('topics', [])
624
+ if len(items) == 0:
625
+ break
626
+
627
+ all_reviews.extend(items)
628
+
629
+ # Determine if we've reached the last page
630
+ total_pages = result.get('totalPages', max_pages)
631
+ if current_page >= total_pages:
632
+ break
633
+ current_page += 1 # Move to the next page
634
+ except (httpx.TimeoutException, httpx.HTTPError) as ex:
635
+ self._logger.warning(f"Request failed: {ex}")
636
+ return []
637
+ except Exception as ex:
638
+ self._logger.error(f"An error occurred: {ex}")
639
+ return []
640
+
641
+ # Extract the reviews data from the API response
642
+ reviews = []
643
+ for item in all_reviews:
644
+ # Exclude certain keys
645
+ filtered_item = {k: v for k, v in item.items() if k not in ('brandResponses', 'badges', 'photos', 'secondaryRatings')}
646
+ # Combine with original row data
647
+ review_data = row.to_dict()
648
+ review_data['total_reviews'] = total_reviews
649
+ review_data.update(filtered_item)
650
+ reviews.append(review_data)
651
+ self._logger.info(
652
+ f"Fetched {len(reviews)} reviews for SKU {sku}."
653
+ )
654
+ await asyncio.sleep(random.randint(1, 3))
655
+ return reviews
656
+
657
+ async def reviews(self):
658
+ """reviews.
659
+
660
+ Best Buy Product Reviews.
661
+ """
662
+ httpx_cookies = httpx.Cookies()
663
+ for key, value in self.cookies.items():
664
+ httpx_cookies.set(
665
+ key, value,
666
+ domain='.bestbuy.com',
667
+ path='/'
668
+ )
669
+
670
+ # With available cookies, iterate over dataframe for stores:
671
+ self.data['checked'] = False # Add 'checked' flag column
672
+
673
+ # Iterate over each row in the DataFrame
674
+ print('starting ...')
675
+
676
+ tasks = [
677
+ self._product_reviews(
678
+ idx,
679
+ row,
680
+ httpx_cookies
681
+ ) for idx, row in self.data.iterrows()
682
+ ]
683
+ # Gather results concurrently
684
+ all_reviews_nested = await self._processing_tasks(tasks)
685
+
686
+ # Flatten the list of lists
687
+ all_reviews = [review for reviews in all_reviews_nested for review in reviews]
688
+
689
+ # Convert to DataFrame
690
+ reviews_df = pd.DataFrame(all_reviews)
691
+
692
+ # Remove duplicates based on the review 'id' column
693
+ if 'id' in reviews_df.columns:
694
+ reviews_df = reviews_df.drop_duplicates(subset=['id'])
695
+
696
+ # rename the "text" column as "review" and the "id" column as "reviewid"
697
+ reviews_df.rename(columns={'text': 'review', 'id': 'reviewid'}, inplace=True)
698
+
699
+ # at the end, adding a column for origin of reviews:
700
+ reviews_df['origin'] = 'bestbuy'
701
+
702
+ # show the num of rows in final dataframe:
703
+ self._logger.notice(
704
+ f"Ending Product Reviews: {len(reviews_df)}"
705
+ )
706
+
707
+ # Override previous dataframe:
708
+ self.data = reviews_df
709
+
710
+ # return existing data
711
+ return self.data
712
+
713
+ async def product(self):
714
+ """product.
715
+
716
+ Best Buy Product Information.
717
+ """
718
+ # Ensure required columns exist in the DataFrame
719
+ self.column_exists('model')
720
+ self.column_exists('brand')
721
+ self.column_exists('sku')
722
+ self.column_exists('product_name')
723
+ self.column_exists('image_url')
724
+ self.column_exists('price')
725
+ self.column_exists('url')
726
+ self.column_exists('enabled', False)
727
+
728
+ # Set headless to False for debugging
729
+ self.headless = True
730
+
731
+ # Always set as_mobile to False to ensure desktop mode
732
+ self.as_mobile = False
733
+
734
+ # Initialize Selenium driver
735
+ if not self._driver:
736
+ await self.get_driver()
737
+
738
+ # Create tasks to process each row in the DataFrame
739
+ tasks = [
740
+ self._product_info(
741
+ idx,
742
+ row
743
+ ) for idx, row in self.data.iterrows()
744
+ ]
745
+
746
+ # Process tasks concurrently
747
+ await self._processing_tasks(tasks)
748
+
749
+ # Add origin column
750
+ self.data['origin'] = 'bestbuy'
751
+
752
+ # Close Selenium driver after completing all tasks
753
+ self.close_driver()
754
+
755
+ # Return the updated DataFrame
756
+ return self.data
757
+
758
+ async def run(self):
759
+ # we need to call the "function" for Best Buy Services.
760
+ fn = getattr(self, self._fn)
761
+ result = None
762
+ if not callable(fn):
763
+ raise ComponentError(
764
+ f"Best Buy: Function {self._fn} doesn't exists."
765
+ )
766
+ try:
767
+ result = await fn()
768
+ except (ComponentError, TimeoutError, NotSupported):
769
+ raise
770
+ except Exception as exc:
771
+ raise ComponentError(
772
+ f"BestBuy: Unknown Error: {exc}"
773
+ ) from exc
774
+ # Print results
775
+ print(result)
776
+ print("::: Printing Column Information === ")
777
+ for column, t in result.dtypes.items():
778
+ print(column, "->", t, "->", result[column].iloc[0])
779
+ self._result = result
780
+ return self._result
781
+
782
+ def split_parts(self, task_list, num_parts: int = 5) -> list:
783
+ part_size = len(task_list) // num_parts
784
+ remainder = len(task_list) % num_parts
785
+ parts = []
786
+ start = 0
787
+ for i in range(num_parts):
788
+ # Distribute the remainder across the first `remainder` parts
789
+ end = start + part_size + (1 if i < remainder else 0)
790
+ parts.append(task_list[start:end])
791
+ start = end
792
+ return parts
793
+
794
+ async def _processing_tasks(self, tasks: list) -> pd.DataFrame:
795
+ """Process tasks concurrently."""
796
+ results = []
797
+ for chunk in self.split_parts(tasks, self.task_parts):
798
+ result = await asyncio.gather(*chunk, return_exceptions=False)
799
+ results.extend(result)
800
+ return results