flowtask 5.8.4__cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (470) hide show
  1. flowtask/__init__.py +93 -0
  2. flowtask/__main__.py +38 -0
  3. flowtask/bots/__init__.py +6 -0
  4. flowtask/bots/check.py +93 -0
  5. flowtask/bots/codebot.py +51 -0
  6. flowtask/components/ASPX.py +148 -0
  7. flowtask/components/AddDataset.py +352 -0
  8. flowtask/components/Amazon.py +523 -0
  9. flowtask/components/AutoTask.py +314 -0
  10. flowtask/components/Azure.py +80 -0
  11. flowtask/components/AzureUsers.py +106 -0
  12. flowtask/components/BaseAction.py +91 -0
  13. flowtask/components/BaseLoop.py +198 -0
  14. flowtask/components/BestBuy.py +800 -0
  15. flowtask/components/CSVToGCS.py +120 -0
  16. flowtask/components/CompanyScraper/__init__.py +1 -0
  17. flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
  18. flowtask/components/CompanyScraper/parsers/base.py +102 -0
  19. flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
  20. flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
  21. flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
  22. flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
  23. flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
  24. flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
  25. flowtask/components/CompanyScraper/scrapper.py +1054 -0
  26. flowtask/components/CopyTo.py +177 -0
  27. flowtask/components/CopyToBigQuery.py +243 -0
  28. flowtask/components/CopyToMongoDB.py +291 -0
  29. flowtask/components/CopyToPg.py +609 -0
  30. flowtask/components/CopyToRethink.py +207 -0
  31. flowtask/components/CreateGCSBucket.py +102 -0
  32. flowtask/components/CreateReport/CreateReport.py +228 -0
  33. flowtask/components/CreateReport/__init__.py +9 -0
  34. flowtask/components/CreateReport/charts/__init__.py +15 -0
  35. flowtask/components/CreateReport/charts/bar.py +51 -0
  36. flowtask/components/CreateReport/charts/base.py +66 -0
  37. flowtask/components/CreateReport/charts/pie.py +64 -0
  38. flowtask/components/CreateReport/utils.py +9 -0
  39. flowtask/components/CustomerSatisfaction.py +196 -0
  40. flowtask/components/DataInput.py +200 -0
  41. flowtask/components/DateList.py +255 -0
  42. flowtask/components/DbClient.py +163 -0
  43. flowtask/components/DialPad.py +146 -0
  44. flowtask/components/DocumentDBQuery.py +200 -0
  45. flowtask/components/DownloadFrom.py +371 -0
  46. flowtask/components/DownloadFromD2L.py +113 -0
  47. flowtask/components/DownloadFromFTP.py +181 -0
  48. flowtask/components/DownloadFromIMAP.py +315 -0
  49. flowtask/components/DownloadFromS3.py +198 -0
  50. flowtask/components/DownloadFromSFTP.py +265 -0
  51. flowtask/components/DownloadFromSharepoint.py +110 -0
  52. flowtask/components/DownloadFromSmartSheet.py +114 -0
  53. flowtask/components/DownloadS3File.py +229 -0
  54. flowtask/components/Dummy.py +59 -0
  55. flowtask/components/DuplicatePhoto.py +411 -0
  56. flowtask/components/EmployeeEvaluation.py +237 -0
  57. flowtask/components/ExecuteSQL.py +323 -0
  58. flowtask/components/ExtractHTML.py +178 -0
  59. flowtask/components/FileBase.py +178 -0
  60. flowtask/components/FileCopy.py +181 -0
  61. flowtask/components/FileDelete.py +82 -0
  62. flowtask/components/FileExists.py +146 -0
  63. flowtask/components/FileIteratorDelete.py +112 -0
  64. flowtask/components/FileList.py +194 -0
  65. flowtask/components/FileOpen.py +75 -0
  66. flowtask/components/FileRead.py +120 -0
  67. flowtask/components/FileRename.py +106 -0
  68. flowtask/components/FilterIf.py +284 -0
  69. flowtask/components/FilterRows/FilterRows.py +200 -0
  70. flowtask/components/FilterRows/__init__.py +10 -0
  71. flowtask/components/FilterRows/functions.py +4 -0
  72. flowtask/components/GCSToBigQuery.py +103 -0
  73. flowtask/components/GoogleA4.py +150 -0
  74. flowtask/components/GoogleGeoCoding.py +344 -0
  75. flowtask/components/GooglePlaces.py +315 -0
  76. flowtask/components/GoogleSearch.py +539 -0
  77. flowtask/components/HTTPClient.py +268 -0
  78. flowtask/components/ICIMS.py +146 -0
  79. flowtask/components/IF.py +179 -0
  80. flowtask/components/IcimsFolderCopy.py +173 -0
  81. flowtask/components/ImageFeatures/__init__.py +5 -0
  82. flowtask/components/ImageFeatures/process.py +233 -0
  83. flowtask/components/IteratorBase.py +251 -0
  84. flowtask/components/LangchainLoader/__init__.py +5 -0
  85. flowtask/components/LangchainLoader/loader.py +194 -0
  86. flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
  87. flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
  88. flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
  89. flowtask/components/LangchainLoader/loaders/docx.py +91 -0
  90. flowtask/components/LangchainLoader/loaders/html.py +119 -0
  91. flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
  92. flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
  93. flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
  94. flowtask/components/LangchainLoader/loaders/qa.py +67 -0
  95. flowtask/components/LangchainLoader/loaders/txt.py +55 -0
  96. flowtask/components/LeadIQ.py +650 -0
  97. flowtask/components/Loop.py +253 -0
  98. flowtask/components/Lowes.py +334 -0
  99. flowtask/components/MS365Usage.py +156 -0
  100. flowtask/components/MSTeamsMessages.py +320 -0
  101. flowtask/components/MarketClustering.py +1051 -0
  102. flowtask/components/MergeFiles.py +362 -0
  103. flowtask/components/MilvusOutput.py +87 -0
  104. flowtask/components/NearByStores.py +175 -0
  105. flowtask/components/NetworkNinja/__init__.py +6 -0
  106. flowtask/components/NetworkNinja/models/__init__.py +52 -0
  107. flowtask/components/NetworkNinja/models/abstract.py +177 -0
  108. flowtask/components/NetworkNinja/models/account.py +39 -0
  109. flowtask/components/NetworkNinja/models/client.py +19 -0
  110. flowtask/components/NetworkNinja/models/district.py +14 -0
  111. flowtask/components/NetworkNinja/models/events.py +101 -0
  112. flowtask/components/NetworkNinja/models/forms.py +499 -0
  113. flowtask/components/NetworkNinja/models/market.py +16 -0
  114. flowtask/components/NetworkNinja/models/organization.py +34 -0
  115. flowtask/components/NetworkNinja/models/photos.py +125 -0
  116. flowtask/components/NetworkNinja/models/project.py +44 -0
  117. flowtask/components/NetworkNinja/models/region.py +28 -0
  118. flowtask/components/NetworkNinja/models/store.py +203 -0
  119. flowtask/components/NetworkNinja/models/user.py +151 -0
  120. flowtask/components/NetworkNinja/router.py +854 -0
  121. flowtask/components/Odoo.py +175 -0
  122. flowtask/components/OdooInjector.py +192 -0
  123. flowtask/components/OpenFromXML.py +126 -0
  124. flowtask/components/OpenWeather.py +41 -0
  125. flowtask/components/OpenWithBase.py +616 -0
  126. flowtask/components/OpenWithPandas.py +715 -0
  127. flowtask/components/PGPDecrypt.py +199 -0
  128. flowtask/components/PandasIterator.py +187 -0
  129. flowtask/components/PandasToFile.py +189 -0
  130. flowtask/components/Paradox.py +339 -0
  131. flowtask/components/ParamIterator.py +117 -0
  132. flowtask/components/ParseHTML.py +84 -0
  133. flowtask/components/PlacerStores.py +249 -0
  134. flowtask/components/Pokemon.py +507 -0
  135. flowtask/components/PositiveBot.py +62 -0
  136. flowtask/components/PowerPointSlide.py +400 -0
  137. flowtask/components/PrintMessage.py +127 -0
  138. flowtask/components/ProductCompetitors/__init__.py +5 -0
  139. flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
  140. flowtask/components/ProductCompetitors/parsers/base.py +72 -0
  141. flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
  142. flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
  143. flowtask/components/ProductCompetitors/scrapper.py +155 -0
  144. flowtask/components/ProductCompliant.py +169 -0
  145. flowtask/components/ProductInfo/__init__.py +1 -0
  146. flowtask/components/ProductInfo/parsers/__init__.py +5 -0
  147. flowtask/components/ProductInfo/parsers/base.py +83 -0
  148. flowtask/components/ProductInfo/parsers/brother.py +97 -0
  149. flowtask/components/ProductInfo/parsers/canon.py +167 -0
  150. flowtask/components/ProductInfo/parsers/epson.py +118 -0
  151. flowtask/components/ProductInfo/parsers/hp.py +131 -0
  152. flowtask/components/ProductInfo/parsers/samsung.py +97 -0
  153. flowtask/components/ProductInfo/scraper.py +319 -0
  154. flowtask/components/ProductPricing.py +118 -0
  155. flowtask/components/QS.py +261 -0
  156. flowtask/components/QSBase.py +201 -0
  157. flowtask/components/QueryIterator.py +273 -0
  158. flowtask/components/QueryToInsert.py +327 -0
  159. flowtask/components/QueryToPandas.py +432 -0
  160. flowtask/components/RESTClient.py +195 -0
  161. flowtask/components/RethinkDBQuery.py +189 -0
  162. flowtask/components/Rsync.py +74 -0
  163. flowtask/components/RunSSH.py +59 -0
  164. flowtask/components/RunShell.py +71 -0
  165. flowtask/components/SalesForce.py +20 -0
  166. flowtask/components/SaveImageBank/__init__.py +257 -0
  167. flowtask/components/SchedulingVisits.py +592 -0
  168. flowtask/components/ScrapPage.py +216 -0
  169. flowtask/components/ScrapSearch.py +79 -0
  170. flowtask/components/SendNotify.py +257 -0
  171. flowtask/components/SentimentAnalysis.py +694 -0
  172. flowtask/components/ServiceScrapper/__init__.py +5 -0
  173. flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
  174. flowtask/components/ServiceScrapper/parsers/base.py +94 -0
  175. flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
  176. flowtask/components/ServiceScrapper/scrapper.py +199 -0
  177. flowtask/components/SetVariables.py +156 -0
  178. flowtask/components/SubTask.py +182 -0
  179. flowtask/components/SuiteCRM.py +48 -0
  180. flowtask/components/Switch.py +175 -0
  181. flowtask/components/TableBase.py +148 -0
  182. flowtask/components/TableDelete.py +312 -0
  183. flowtask/components/TableInput.py +143 -0
  184. flowtask/components/TableOutput/TableOutput.py +384 -0
  185. flowtask/components/TableOutput/__init__.py +3 -0
  186. flowtask/components/TableSchema.py +534 -0
  187. flowtask/components/Target.py +223 -0
  188. flowtask/components/ThumbnailGenerator.py +156 -0
  189. flowtask/components/ToPandas.py +67 -0
  190. flowtask/components/TransformRows/TransformRows.py +507 -0
  191. flowtask/components/TransformRows/__init__.py +9 -0
  192. flowtask/components/TransformRows/functions.py +559 -0
  193. flowtask/components/TransposeRows.py +176 -0
  194. flowtask/components/UPCDatabase.py +86 -0
  195. flowtask/components/UnGzip.py +171 -0
  196. flowtask/components/Uncompress.py +172 -0
  197. flowtask/components/UniqueRows.py +126 -0
  198. flowtask/components/Unzip.py +107 -0
  199. flowtask/components/UpdateOperationalVars.py +147 -0
  200. flowtask/components/UploadTo.py +299 -0
  201. flowtask/components/UploadToS3.py +136 -0
  202. flowtask/components/UploadToSFTP.py +160 -0
  203. flowtask/components/UploadToSharepoint.py +205 -0
  204. flowtask/components/UserFunc.py +122 -0
  205. flowtask/components/VivaTracker.py +140 -0
  206. flowtask/components/WSDLClient.py +123 -0
  207. flowtask/components/Wait.py +18 -0
  208. flowtask/components/Walmart.py +199 -0
  209. flowtask/components/Workplace.py +134 -0
  210. flowtask/components/XMLToPandas.py +267 -0
  211. flowtask/components/Zammad/__init__.py +41 -0
  212. flowtask/components/Zammad/models.py +0 -0
  213. flowtask/components/ZoomInfoScraper.py +409 -0
  214. flowtask/components/__init__.py +104 -0
  215. flowtask/components/abstract.py +18 -0
  216. flowtask/components/flow.py +530 -0
  217. flowtask/components/google.py +335 -0
  218. flowtask/components/group.py +221 -0
  219. flowtask/components/py.typed +0 -0
  220. flowtask/components/reviewscrap.py +132 -0
  221. flowtask/components/tAutoincrement.py +117 -0
  222. flowtask/components/tConcat.py +109 -0
  223. flowtask/components/tExplode.py +119 -0
  224. flowtask/components/tFilter.py +184 -0
  225. flowtask/components/tGroup.py +236 -0
  226. flowtask/components/tJoin.py +270 -0
  227. flowtask/components/tMap/__init__.py +9 -0
  228. flowtask/components/tMap/functions.py +54 -0
  229. flowtask/components/tMap/tMap.py +450 -0
  230. flowtask/components/tMelt.py +112 -0
  231. flowtask/components/tMerge.py +114 -0
  232. flowtask/components/tOrder.py +93 -0
  233. flowtask/components/tPandas.py +94 -0
  234. flowtask/components/tPivot.py +71 -0
  235. flowtask/components/tPluckCols.py +76 -0
  236. flowtask/components/tUnnest.py +82 -0
  237. flowtask/components/user.py +401 -0
  238. flowtask/conf.py +457 -0
  239. flowtask/download.py +102 -0
  240. flowtask/events/__init__.py +11 -0
  241. flowtask/events/events/__init__.py +20 -0
  242. flowtask/events/events/abstract.py +95 -0
  243. flowtask/events/events/alerts/__init__.py +362 -0
  244. flowtask/events/events/alerts/colfunctions.py +131 -0
  245. flowtask/events/events/alerts/functions.py +158 -0
  246. flowtask/events/events/dummy.py +12 -0
  247. flowtask/events/events/exec.py +124 -0
  248. flowtask/events/events/file/__init__.py +7 -0
  249. flowtask/events/events/file/base.py +51 -0
  250. flowtask/events/events/file/copy.py +23 -0
  251. flowtask/events/events/file/delete.py +16 -0
  252. flowtask/events/events/interfaces/__init__.py +9 -0
  253. flowtask/events/events/interfaces/client.py +67 -0
  254. flowtask/events/events/interfaces/credentials.py +28 -0
  255. flowtask/events/events/interfaces/notifications.py +58 -0
  256. flowtask/events/events/jira.py +122 -0
  257. flowtask/events/events/log.py +26 -0
  258. flowtask/events/events/logerr.py +52 -0
  259. flowtask/events/events/notify.py +59 -0
  260. flowtask/events/events/notify_event.py +160 -0
  261. flowtask/events/events/publish.py +54 -0
  262. flowtask/events/events/sendfile.py +104 -0
  263. flowtask/events/events/task.py +97 -0
  264. flowtask/events/events/teams.py +98 -0
  265. flowtask/events/events/webhook.py +58 -0
  266. flowtask/events/manager.py +287 -0
  267. flowtask/exceptions.c +39393 -0
  268. flowtask/exceptions.cpython-39-x86_64-linux-gnu.so +0 -0
  269. flowtask/extensions/__init__.py +3 -0
  270. flowtask/extensions/abstract.py +82 -0
  271. flowtask/extensions/logging/__init__.py +65 -0
  272. flowtask/hooks/__init__.py +9 -0
  273. flowtask/hooks/actions/__init__.py +22 -0
  274. flowtask/hooks/actions/abstract.py +66 -0
  275. flowtask/hooks/actions/dummy.py +23 -0
  276. flowtask/hooks/actions/jira.py +74 -0
  277. flowtask/hooks/actions/rest.py +320 -0
  278. flowtask/hooks/actions/sampledata.py +37 -0
  279. flowtask/hooks/actions/sensor.py +23 -0
  280. flowtask/hooks/actions/task.py +9 -0
  281. flowtask/hooks/actions/ticket.py +37 -0
  282. flowtask/hooks/actions/zammad.py +55 -0
  283. flowtask/hooks/hook.py +62 -0
  284. flowtask/hooks/models.py +17 -0
  285. flowtask/hooks/service.py +187 -0
  286. flowtask/hooks/step.py +91 -0
  287. flowtask/hooks/types/__init__.py +23 -0
  288. flowtask/hooks/types/base.py +129 -0
  289. flowtask/hooks/types/brokers/__init__.py +11 -0
  290. flowtask/hooks/types/brokers/base.py +54 -0
  291. flowtask/hooks/types/brokers/mqtt.py +35 -0
  292. flowtask/hooks/types/brokers/rabbitmq.py +82 -0
  293. flowtask/hooks/types/brokers/redis.py +83 -0
  294. flowtask/hooks/types/brokers/sqs.py +44 -0
  295. flowtask/hooks/types/fs.py +232 -0
  296. flowtask/hooks/types/http.py +49 -0
  297. flowtask/hooks/types/imap.py +200 -0
  298. flowtask/hooks/types/jira.py +279 -0
  299. flowtask/hooks/types/mail.py +205 -0
  300. flowtask/hooks/types/postgres.py +98 -0
  301. flowtask/hooks/types/responses/__init__.py +8 -0
  302. flowtask/hooks/types/responses/base.py +5 -0
  303. flowtask/hooks/types/sharepoint.py +288 -0
  304. flowtask/hooks/types/ssh.py +141 -0
  305. flowtask/hooks/types/tagged.py +59 -0
  306. flowtask/hooks/types/upload.py +85 -0
  307. flowtask/hooks/types/watch.py +71 -0
  308. flowtask/hooks/types/web.py +36 -0
  309. flowtask/interfaces/AzureClient.py +137 -0
  310. flowtask/interfaces/AzureGraph.py +839 -0
  311. flowtask/interfaces/Boto3Client.py +326 -0
  312. flowtask/interfaces/DropboxClient.py +173 -0
  313. flowtask/interfaces/ExcelHandler.py +94 -0
  314. flowtask/interfaces/FTPClient.py +131 -0
  315. flowtask/interfaces/GoogleCalendar.py +201 -0
  316. flowtask/interfaces/GoogleClient.py +133 -0
  317. flowtask/interfaces/GoogleDrive.py +127 -0
  318. flowtask/interfaces/GoogleGCS.py +89 -0
  319. flowtask/interfaces/GoogleGeocoding.py +93 -0
  320. flowtask/interfaces/GoogleLang.py +114 -0
  321. flowtask/interfaces/GooglePub.py +61 -0
  322. flowtask/interfaces/GoogleSheet.py +68 -0
  323. flowtask/interfaces/IMAPClient.py +137 -0
  324. flowtask/interfaces/O365Calendar.py +113 -0
  325. flowtask/interfaces/O365Client.py +220 -0
  326. flowtask/interfaces/OneDrive.py +284 -0
  327. flowtask/interfaces/Outlook.py +155 -0
  328. flowtask/interfaces/ParrotBot.py +130 -0
  329. flowtask/interfaces/SSHClient.py +378 -0
  330. flowtask/interfaces/Sharepoint.py +496 -0
  331. flowtask/interfaces/__init__.py +36 -0
  332. flowtask/interfaces/azureauth.py +119 -0
  333. flowtask/interfaces/cache.py +201 -0
  334. flowtask/interfaces/client.py +82 -0
  335. flowtask/interfaces/compress.py +525 -0
  336. flowtask/interfaces/credentials.py +124 -0
  337. flowtask/interfaces/d2l.py +239 -0
  338. flowtask/interfaces/databases/__init__.py +5 -0
  339. flowtask/interfaces/databases/db.py +223 -0
  340. flowtask/interfaces/databases/documentdb.py +55 -0
  341. flowtask/interfaces/databases/rethink.py +39 -0
  342. flowtask/interfaces/dataframes/__init__.py +11 -0
  343. flowtask/interfaces/dataframes/abstract.py +21 -0
  344. flowtask/interfaces/dataframes/arrow.py +71 -0
  345. flowtask/interfaces/dataframes/dt.py +69 -0
  346. flowtask/interfaces/dataframes/pandas.py +167 -0
  347. flowtask/interfaces/dataframes/polars.py +60 -0
  348. flowtask/interfaces/db.py +263 -0
  349. flowtask/interfaces/env.py +46 -0
  350. flowtask/interfaces/func.py +137 -0
  351. flowtask/interfaces/http.py +1780 -0
  352. flowtask/interfaces/locale.py +40 -0
  353. flowtask/interfaces/log.py +75 -0
  354. flowtask/interfaces/mask.py +143 -0
  355. flowtask/interfaces/notification.py +154 -0
  356. flowtask/interfaces/playwright.py +339 -0
  357. flowtask/interfaces/powerpoint.py +368 -0
  358. flowtask/interfaces/py.typed +0 -0
  359. flowtask/interfaces/qs.py +376 -0
  360. flowtask/interfaces/result.py +87 -0
  361. flowtask/interfaces/selenium_service.py +779 -0
  362. flowtask/interfaces/smartsheet.py +154 -0
  363. flowtask/interfaces/stat.py +39 -0
  364. flowtask/interfaces/task.py +96 -0
  365. flowtask/interfaces/template.py +118 -0
  366. flowtask/interfaces/vectorstores/__init__.py +1 -0
  367. flowtask/interfaces/vectorstores/abstract.py +133 -0
  368. flowtask/interfaces/vectorstores/milvus.py +669 -0
  369. flowtask/interfaces/zammad.py +107 -0
  370. flowtask/models.py +193 -0
  371. flowtask/parsers/__init__.py +15 -0
  372. flowtask/parsers/_yaml.c +11978 -0
  373. flowtask/parsers/_yaml.cpython-39-x86_64-linux-gnu.so +0 -0
  374. flowtask/parsers/argparser.py +235 -0
  375. flowtask/parsers/base.c +15155 -0
  376. flowtask/parsers/base.cpython-39-x86_64-linux-gnu.so +0 -0
  377. flowtask/parsers/json.c +11968 -0
  378. flowtask/parsers/json.cpython-39-x86_64-linux-gnu.so +0 -0
  379. flowtask/parsers/maps.py +49 -0
  380. flowtask/parsers/toml.c +11968 -0
  381. flowtask/parsers/toml.cpython-39-x86_64-linux-gnu.so +0 -0
  382. flowtask/plugins/__init__.py +16 -0
  383. flowtask/plugins/components/__init__.py +0 -0
  384. flowtask/plugins/handler/__init__.py +45 -0
  385. flowtask/plugins/importer.py +31 -0
  386. flowtask/plugins/sources/__init__.py +0 -0
  387. flowtask/runner.py +283 -0
  388. flowtask/scheduler/__init__.py +9 -0
  389. flowtask/scheduler/functions.py +493 -0
  390. flowtask/scheduler/handlers/__init__.py +8 -0
  391. flowtask/scheduler/handlers/manager.py +504 -0
  392. flowtask/scheduler/handlers/models.py +58 -0
  393. flowtask/scheduler/handlers/service.py +72 -0
  394. flowtask/scheduler/notifications.py +65 -0
  395. flowtask/scheduler/scheduler.py +993 -0
  396. flowtask/services/__init__.py +0 -0
  397. flowtask/services/bots/__init__.py +0 -0
  398. flowtask/services/bots/telegram.py +264 -0
  399. flowtask/services/files/__init__.py +11 -0
  400. flowtask/services/files/manager.py +522 -0
  401. flowtask/services/files/model.py +37 -0
  402. flowtask/services/files/service.py +767 -0
  403. flowtask/services/jira/__init__.py +3 -0
  404. flowtask/services/jira/jira_actions.py +191 -0
  405. flowtask/services/tasks/__init__.py +13 -0
  406. flowtask/services/tasks/launcher.py +213 -0
  407. flowtask/services/tasks/manager.py +323 -0
  408. flowtask/services/tasks/service.py +275 -0
  409. flowtask/services/tasks/task_manager.py +376 -0
  410. flowtask/services/tasks/tasks.py +155 -0
  411. flowtask/storages/__init__.py +16 -0
  412. flowtask/storages/exceptions.py +12 -0
  413. flowtask/storages/files/__init__.py +8 -0
  414. flowtask/storages/files/abstract.py +29 -0
  415. flowtask/storages/files/filesystem.py +66 -0
  416. flowtask/storages/tasks/__init__.py +19 -0
  417. flowtask/storages/tasks/abstract.py +26 -0
  418. flowtask/storages/tasks/database.py +33 -0
  419. flowtask/storages/tasks/filesystem.py +108 -0
  420. flowtask/storages/tasks/github.py +119 -0
  421. flowtask/storages/tasks/memory.py +45 -0
  422. flowtask/storages/tasks/row.py +25 -0
  423. flowtask/tasks/__init__.py +0 -0
  424. flowtask/tasks/abstract.py +526 -0
  425. flowtask/tasks/command.py +118 -0
  426. flowtask/tasks/pile.py +486 -0
  427. flowtask/tasks/py.typed +0 -0
  428. flowtask/tasks/task.py +778 -0
  429. flowtask/template/__init__.py +161 -0
  430. flowtask/tests.py +257 -0
  431. flowtask/types/__init__.py +8 -0
  432. flowtask/types/typedefs.c +11347 -0
  433. flowtask/types/typedefs.cpython-39-x86_64-linux-gnu.so +0 -0
  434. flowtask/utils/__init__.py +24 -0
  435. flowtask/utils/constants.py +117 -0
  436. flowtask/utils/encoders.py +21 -0
  437. flowtask/utils/executor.py +112 -0
  438. flowtask/utils/functions.cpp +14280 -0
  439. flowtask/utils/functions.cpython-39-x86_64-linux-gnu.so +0 -0
  440. flowtask/utils/json.cpp +13349 -0
  441. flowtask/utils/json.cpython-39-x86_64-linux-gnu.so +0 -0
  442. flowtask/utils/mail.py +63 -0
  443. flowtask/utils/parseqs.c +13324 -0
  444. flowtask/utils/parserqs.cpython-39-x86_64-linux-gnu.so +0 -0
  445. flowtask/utils/stats.py +308 -0
  446. flowtask/utils/transformations.py +74 -0
  447. flowtask/utils/uv.py +12 -0
  448. flowtask/utils/validators.py +97 -0
  449. flowtask/version.py +11 -0
  450. flowtask-5.8.4.dist-info/LICENSE +201 -0
  451. flowtask-5.8.4.dist-info/METADATA +209 -0
  452. flowtask-5.8.4.dist-info/RECORD +470 -0
  453. flowtask-5.8.4.dist-info/WHEEL +6 -0
  454. flowtask-5.8.4.dist-info/entry_points.txt +3 -0
  455. flowtask-5.8.4.dist-info/top_level.txt +2 -0
  456. plugins/components/CreateQR.py +39 -0
  457. plugins/components/TestComponent.py +28 -0
  458. plugins/components/Use1.py +13 -0
  459. plugins/components/Workplace.py +117 -0
  460. plugins/components/__init__.py +3 -0
  461. plugins/sources/__init__.py +0 -0
  462. plugins/sources/get_populartimes.py +78 -0
  463. plugins/sources/google.py +150 -0
  464. plugins/sources/hubspot.py +679 -0
  465. plugins/sources/icims.py +679 -0
  466. plugins/sources/mobileinsight.py +501 -0
  467. plugins/sources/newrelic.py +262 -0
  468. plugins/sources/uap.py +268 -0
  469. plugins/sources/venu.py +244 -0
  470. plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,335 @@
1
+ from typing import Union
2
+ from collections.abc import Callable
3
+ from functools import partial
4
+ import asyncio
5
+ import aiohttp
6
+ from aiohttp.resolver import AsyncResolver
7
+ import pandas as pd
8
+ import ssl
9
+ from datamodel.parsers.json import json_encoder
10
+ from proxylists import check_address
11
+ from proxylists.proxies import (
12
+ FreeProxy,
13
+ Oxylabs
14
+ )
15
+ from ..conf import GOOGLE_API_KEY, GOOGLE_PLACES_API_KEY
16
+ from ..exceptions import ComponentError
17
+ from ..components import FlowComponent
18
+
19
+
20
+ # Monkey-Patching for <3.11 TLS Support
21
+ setattr(
22
+ asyncio.sslproto._SSLProtocolTransport,
23
+ "_start_tls_compatible", True
24
+ )
25
+
26
+ class GoogleBase(FlowComponent):
27
+ """
28
+ GoogleBase.
29
+
30
+ Overview: A base class for Google API components.
31
+ """
32
+ def __init__(
33
+ self,
34
+ loop: asyncio.AbstractEventLoop = None,
35
+ job: Callable = None,
36
+ stat: Callable = None,
37
+ **kwargs,
38
+ ) -> None:
39
+ self.chunk_size: int = kwargs.get('chunk_size', 100)
40
+ self._type: str = kwargs.pop('type', None)
41
+ self.api_key: str = kwargs.pop('api_key', GOOGLE_API_KEY)
42
+ self.use_proxies: bool = kwargs.pop('use_proxies', False)
43
+ self.paid_proxy: bool = kwargs.pop('paid_proxy', False)
44
+ super(GoogleBase, self).__init__(loop=loop, job=job, stat=stat, **kwargs)
45
+ self.semaphore = asyncio.Semaphore(10) # Adjust the limit as needed
46
+
47
+ async def close(self):
48
+ pass
49
+
50
+ def _evaluate_input(self):
51
+ if self.previous:
52
+ self.data = self.input
53
+ elif self.input is not None:
54
+ self.data = self.input
55
+
56
+ async def start(self, **kwargs):
57
+ self._counter: int = 0
58
+ self._evaluate_input()
59
+ if not self._type:
60
+ raise RuntimeError(
61
+ 'Google requires a Type Function'
62
+ )
63
+ if not isinstance(self.data, pd.DataFrame):
64
+ raise ComponentError(
65
+ "Incompatible Pandas Dataframe", status=404
66
+ )
67
+ if not self.api_key:
68
+ self.api_key = GOOGLE_PLACES_API_KEY
69
+ if not self.api_key:
70
+ raise ComponentError(
71
+ "Google API Key is missing", status=404
72
+ )
73
+ return True
74
+
75
+ def _get_session_args(self) -> dict:
76
+ """Get aiohttp Session arguments."""
77
+ # Total timeout for the request
78
+ timeout = aiohttp.ClientTimeout(total=20)
79
+ resolver = AsyncResolver(
80
+ nameservers=["1.1.1.1", "8.8.8.8"]
81
+ )
82
+ connector = aiohttp.TCPConnector(
83
+ limit=100,
84
+ resolver=resolver
85
+ )
86
+ return {
87
+ "connector": connector,
88
+ "timeout": timeout,
89
+ "json_serialize": json_encoder,
90
+ "trust_env": True
91
+ }
92
+
93
+ async def get_proxies(self):
94
+
95
+ if self.paid_proxy is True:
96
+ proxies = await Oxylabs().get_proxy_list()
97
+ return proxies.get('https')
98
+ else:
99
+ p = []
100
+ proxies = await FreeProxy().get_list()
101
+ for address in proxies:
102
+ host, port = address.split(':')
103
+ if await check_address(host=host, port=port) is True:
104
+ p.append(f"http://{address}")
105
+ return p[0]
106
+
107
+ async def _google_session(
108
+ self,
109
+ url: str,
110
+ session_args: dict,
111
+ params: dict = None,
112
+ method: str = 'GET',
113
+ use_json: bool = False,
114
+ as_json: bool = True,
115
+ use_proxies: bool = False,
116
+ google_search: bool = False,
117
+ **kwargs
118
+ ) -> Union[aiohttp.ClientResponse, dict]:
119
+ """Make a Google API request using aiohttp Session."""
120
+ _proxies = None
121
+ if use_proxies is True or self.use_proxies is True:
122
+ _proxies = await self.get_proxies()
123
+
124
+ ssl_context = ssl.create_default_context()
125
+ # Ensure at least TLS 1.2 is used
126
+ ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
127
+ ssl_context.check_hostname = False
128
+ ssl_context.verify_mode = ssl.CERT_NONE
129
+ async with aiohttp.ClientSession(**session_args) as session:
130
+ if method.upper() == 'GET':
131
+ request = partial(
132
+ session.request,
133
+ method.upper(),
134
+ url,
135
+ params=params,
136
+ proxy=_proxies,
137
+ ssl=ssl_context,
138
+ **kwargs
139
+ )
140
+ else:
141
+ if use_json is True:
142
+ request = partial(
143
+ session.request,
144
+ method.upper(),
145
+ url,
146
+ json=params,
147
+ proxy=_proxies,
148
+ ssl=ssl_context,
149
+ **kwargs
150
+ )
151
+ else:
152
+ request = partial(
153
+ session.request,
154
+ method.upper(),
155
+ url, data=params,
156
+ proxy=_proxies,
157
+ ssl=ssl_context,
158
+ **kwargs
159
+ )
160
+ async with request() as response:
161
+ if response.status == 200:
162
+ if as_json is True:
163
+ result = await response.json()
164
+ if result['status'] == 'OK':
165
+ # TODO: Check if it's a premise or subpremise
166
+ return result
167
+ else:
168
+ if google_search is True:
169
+ return await response.read()
170
+ else:
171
+ return await response.text()
172
+ else:
173
+ if google_search is True:
174
+ await self.check_response_search(response)
175
+ else:
176
+ await self.google_response_code(response)
177
+ return None
178
+
179
+ async def check_response_search(self, response: aiohttp.ClientResponse):
180
+ if response.status == 429:
181
+ error = await response.text()
182
+ self._logger.error(
183
+ "Google Search: Too many requests"
184
+ )
185
+ return None
186
+ elif response.status > 299:
187
+ error = await response.text()
188
+ self._logger.error(
189
+ f"Raw response Error: {error}"
190
+ )
191
+ raise ComponentError(
192
+ f"Google Places Error {response.status}",
193
+ f"Error: {error}"
194
+ )
195
+
196
+ async def google_response_code(self, response: aiohttp.ClientResponse):
197
+ """
198
+ check if query quota has been surpassed or other errors that can happen.
199
+ :param resp: json response
200
+ :return:
201
+ """
202
+ if response.status == 429:
203
+ error = await response.text()
204
+ self._logger.error(
205
+ "Google Search: Too many requests"
206
+ )
207
+ return None
208
+ else:
209
+ result = await response.json()
210
+ status = result.get('status', 'Unknown')
211
+ if status == "OK" or status == "ZERO_RESULTS":
212
+ return
213
+ # Error:
214
+ error = result.get('error', result)
215
+ status = error.get('status', 'Unknown')
216
+ message = error.get('message', error)
217
+
218
+ self._logger.error(
219
+ f"{status}: {message}: {error}"
220
+ )
221
+
222
+ if status == "REQUEST_DENIED":
223
+ raise ComponentError(
224
+ (
225
+ f"Google Places {status}: "
226
+ "Request was denied, maybe the API key is invalid."
227
+ )
228
+ )
229
+
230
+ if status == "OVER_QUERY_LIMIT":
231
+ raise ComponentError(
232
+ (
233
+ f"Google Places {status}: "
234
+ "You exceeded your Query Limit for Google Places API Web Service, "
235
+ "check https://developers.google.com/places/web-service/usage "
236
+ "to upgrade your quota."
237
+ )
238
+ )
239
+
240
+ if status == "INVALID_REQUEST":
241
+ raise ComponentError(
242
+ (
243
+ f"Google Places {status}: "
244
+ "Invalid Request: "
245
+ "The query string is malformed, "
246
+ "check if your formatting for lat/lng and radius is correct."
247
+ f"Error: {error}"
248
+ )
249
+ )
250
+
251
+ if status == "NOT_FOUND":
252
+ raise ComponentError(
253
+ (
254
+ f"Google Places {status}: "
255
+ "The place ID was not found and either does not exist or was retired."
256
+ )
257
+ )
258
+
259
+ raise ComponentError(
260
+ (
261
+ f"Google Places {status}: "
262
+ "Unidentified error with the Places API, please check the response code"
263
+ f"error: {error}"
264
+ )
265
+ )
266
+
267
+ def column_exists(self, column: str):
268
+ """Returns True if the column exists in the DataFrame."""
269
+ if column not in self.data.columns:
270
+ self._logger.warning(
271
+ f"Column {column} does not exist in the dataframe"
272
+ )
273
+ self.data[column] = None
274
+ return False
275
+ return True
276
+
277
+ def chunkify(self, lst, n):
278
+ """Split list lst into chunks of size n."""
279
+ for i in range(0, len(lst), n):
280
+ yield lst[i:i + n]
281
+
282
+ async def _processing_tasks(self, tasks: list) -> pd.DataFrame:
283
+ """Process tasks concurrently."""
284
+ results = []
285
+ for chunk in self.chunkify(tasks, self.chunk_size):
286
+ result = await asyncio.gather(*chunk, return_exceptions=True)
287
+ if result:
288
+ for res in result:
289
+ if isinstance(res, Exception):
290
+ # Handle the exception
291
+ self._logger.error(
292
+ f"Task failed with exception: {res}. Type: {type(res)}"
293
+ )
294
+ self._logger.error(
295
+ f"Exception type: {type(res)}, Task input types: {type(chunk)}"
296
+ )
297
+ continue
298
+ results.append(res)
299
+ results_list = []
300
+ for idx, result in results:
301
+ if result:
302
+ result['idx'] = idx # Add the index to the result dictionary
303
+ results_list.append(result)
304
+ if results_list:
305
+ results_df = pd.DataFrame(results_list)
306
+ results_df.set_index('idx', inplace=True)
307
+ # If necessary, reindex results_df to match self.data
308
+ results_df = results_df.reindex(self.data.index)
309
+ # Directly assign columns from results_df to self.data
310
+ for column in results_df.columns:
311
+ mask = results_df[column].notnull()
312
+ indices = results_df.index[mask]
313
+ self.data.loc[indices, column] = results_df.loc[indices, column]
314
+ return self.data
315
+
316
+ async def run(self):
317
+ """Run the Google Places API."""
318
+ tasks = []
319
+ fn = getattr(self, self._type)
320
+ tasks = [
321
+ fn(
322
+ idx,
323
+ row,
324
+ ) for idx, row in self.data.iterrows()
325
+ ]
326
+ # Execute tasks concurrently
327
+ df = await self._processing_tasks(tasks)
328
+ if self._debug is True:
329
+ print(df)
330
+ print("::: Printing Column Information === ")
331
+ for column, t in df.dtypes.items():
332
+ print(column, "->", t, "->", df[column].iloc[0])
333
+ self.add_metric("GOOGLE_PLACES_DOWNLOADED", self._counter)
334
+ self._result = df
335
+ return self._result
@@ -0,0 +1,221 @@
1
+ import asyncio
2
+ import copy
3
+ from collections.abc import Callable
4
+ from navconfig.logging import logging
5
+ from asyncdb.exceptions import NoDataFound, ProviderError
6
+ from ..utils.stats import StepMonitor
7
+ from ..interfaces.log import SkipErrors
8
+ from ..exceptions import DataNotFound, NotSupported, ComponentError
9
+ from ..utils import cPrint
10
+ from .flow import FlowComponent
11
+
12
+
13
+ class GroupComponent(FlowComponent):
14
+ """
15
+ GroupComponent
16
+
17
+ Overview
18
+
19
+ This component executes a group of other FlowTask components sequentially as a single unit.
20
+ It allows chaining multiple tasks together and provides error handling for various scenarios.
21
+
22
+ .. table:: Properties
23
+ :widths: auto
24
+
25
+ +------------------------+----------+----------------------------------------------------------------------------------------------------------------+
26
+ | Name | Required | Description |
27
+ +------------------------+----------+----------------------------------------------------------------------------------------------------------------+
28
+ | component_list (list) | Yes | List of dictionaries defining the components to be executed in the group. Each dictionary |
29
+ | | | should contain the following keys: |
30
+ | | | - "component": The FlowTask component class to be used. |
31
+ | | | - "params": A dictionary containing parameters to be passed to the component. |
32
+ | | | (Optional) |
33
+ | | | - "conditions": A dictionary containing conditions that must be met before running the component. (Optional) |
34
+ +------------------------+----------+----------------------------------------------------------------------------------------------------------------+
35
+ | stat (Callable) | No | Optional callback function for step-level monitoring and statistics collection. |
36
+ +------------------------+----------+----------------------------------------------------------------------------------------------------------------+
37
+ | skipError | No | Defines the behavior when a component within the group raises an error. |
38
+ | | | Valid options are: |
39
+ | | | SkipErrors: Skip This makes the component continue his execution. |
40
+ | | | SkipErrors: Raise This Raise the error and interrupt execution. |
41
+ +------------------------+----------+----------------------------------------------------------------------------------------------------------------+
42
+
43
+ Return
44
+
45
+ The component modifies the data received from the previous component and returns the final output after
46
+ all components in the group have been executed.
47
+
48
+ """ # noqa
49
+
50
+ def __init__(
51
+ self,
52
+ loop: asyncio.AbstractEventLoop = None,
53
+ job: Callable = None,
54
+ stat: Callable = None,
55
+ component_list: list = None,
56
+ **kwargs,
57
+ ):
58
+ """Init Method."""
59
+ self._params = {}
60
+ self._components = component_list
61
+ self._conditions: dict = {}
62
+ super(GroupComponent, self).__init__(loop=loop, job=job, stat=stat, **kwargs)
63
+
64
+ async def start(self, **kwargs):
65
+ if self.previous:
66
+ self.data = self.input
67
+ return True
68
+
69
+ async def close(self):
70
+ pass
71
+
72
+ async def run(self):
73
+ steps = []
74
+ prev = self.previous
75
+ result = None
76
+ for step in self._components:
77
+ step = copy.deepcopy(step)
78
+ step_name = step.name
79
+ try:
80
+ _prev = prev
81
+ component = self.get_component(step=step, previous=prev)
82
+ prev = component
83
+ except Exception as e:
84
+ raise ComponentError(f"{e!s}") from e
85
+ # calling start method for component
86
+ start = getattr(component, "start", None)
87
+ if callable(start):
88
+ try:
89
+ if asyncio.iscoroutinefunction(start):
90
+ st = await component.start()
91
+ else:
92
+ st = component.start()
93
+ logging.debug(f"{step_name} STARTED: {st}")
94
+ except (NoDataFound, DataNotFound) as err:
95
+ if component.skipError == SkipErrors.SKIP:
96
+ self._logger.warning(
97
+ f"::: SKIPPING Error on {step_name} :::: "
98
+ )
99
+ prev = _prev
100
+ continue
101
+ raise DataNotFound(
102
+ f'Data Not Found over {step_name}'
103
+ ) from err
104
+ except (ProviderError, ComponentError, NotSupported) as err:
105
+ raise ComponentError(
106
+ f"Group Error: calling Start on {step.name}, error: {err}"
107
+ ) from err
108
+ else:
109
+ raise ComponentError(f"Group Error: missing Start on {step.name}")
110
+ # then, calling the run method:
111
+ try:
112
+ run = getattr(component, "run", None)
113
+ if asyncio.iscoroutinefunction(run):
114
+ result = await run()
115
+ else:
116
+ result = run()
117
+ except (NoDataFound, DataNotFound) as err:
118
+ if component.skipError == SkipErrors.SKIP:
119
+ self._logger.warning(
120
+ f"::: SKIPPING Error on {step_name} :::: "
121
+ )
122
+ prev = _prev
123
+ continue
124
+ raise DataNotFound(
125
+ f'Data Not Found over {step_name}'
126
+ ) from err
127
+ except (ProviderError, ComponentError, NotSupported) as err:
128
+ if component.skipError == SkipErrors.SKIP:
129
+ self._logger.warning(
130
+ f"::: SKIPPING Error on {step_name} :::: "
131
+ )
132
+ prev = _prev
133
+ continue
134
+ raise NotSupported(
135
+ f"Group Error: Not Supported on {step.name}, error: {err}"
136
+ ) from err
137
+ except Exception as err:
138
+ if component.skipError == SkipErrors.SKIP:
139
+ self._logger.warning(
140
+ f"::: SKIPPING Error on {step_name} :::: "
141
+ )
142
+ prev = _prev
143
+ continue
144
+ raise ComponentError(
145
+ f"Group Error: Calling Start on {step.name}, error: {err}"
146
+ ) from err
147
+ finally:
148
+ steps.append(step_name)
149
+ try:
150
+ close = getattr(component, "close", None)
151
+ if asyncio.iscoroutinefunction(close):
152
+ await close()
153
+ else:
154
+ close()
155
+ except Exception as e: # pylint: disable=W0703
156
+ logging.warning(e)
157
+ self._result = result
158
+ return self._result
159
+
160
+ def get_component(self, step, previous):
161
+ if self.stat:
162
+ parent_stat = self.stat.parent()
163
+ stat = StepMonitor(name=step.name, parent=parent_stat)
164
+ parent_stat.add_step(stat)
165
+ else:
166
+ stat = None
167
+ params = step.params
168
+ try:
169
+ if params["conditions"]:
170
+ self._conditions[step.name] = params["conditions"]
171
+ except KeyError:
172
+ pass
173
+ params["ENV"] = self._environment
174
+ # params
175
+ if self._params:
176
+ try:
177
+ params["params"] = {**params["params"], **self._params}
178
+ except (KeyError, TypeError):
179
+ pass
180
+ # parameters
181
+ if self._parameters:
182
+ parameters = params.get("parameters", {})
183
+ params["parameters"] = {**parameters, **self._parameters}
184
+ if hasattr(self, "_program"):
185
+ params["_program"] = self._program
186
+ # useful to change variables in set var components
187
+ params["_vars"] = self._vars
188
+ # variables dictionary
189
+ params["variables"] = self._variables
190
+ params["_args"] = self._args
191
+ # argument list for components (or tasks) that need argument lists
192
+ params["arguments"] = self._arguments
193
+ # for components with conditions, we can add more conditions
194
+ conditions = params.get("conditions", {})
195
+ step_conds = self._conditions.get(step.name, {})
196
+ if self.conditions is not None:
197
+ step_conds = {**self.conditions, **step_conds}
198
+ params["conditions"] = {**conditions, **step_conds}
199
+ # attributes only usable component-only
200
+ params["attributes"] = self._attributes
201
+ # the current Pile of components
202
+ params["TaskPile"] = self._TaskPile
203
+ # params['TaskName'] = step_name
204
+ params["debug"] = self._debug
205
+ params["argparser"] = self._argparser
206
+ # the current in-memory connector
207
+ params["memory"] = self._memory
208
+ target = step.component
209
+ job = None
210
+ try:
211
+ job = target(job=previous, loop=self._loop, stat=stat, **params)
212
+ job.SetPile(self._TaskPile)
213
+ cPrint(
214
+ f"LOADED STEP: {step.name}",
215
+ level="DEBUG"
216
+ )
217
+ return job
218
+ except Exception as err:
219
+ raise ComponentError(
220
+ f"Component Error on {target}, error: {err}"
221
+ ) from err
File without changes
@@ -0,0 +1,132 @@
1
+ from typing import Any
2
+ from collections.abc import Callable
3
+ import asyncio
4
+ import httpx
5
+ from pandas import DataFrame
6
+ from seleniumwire import webdriver
7
+ from navconfig.logging import logging
8
+ from ..exceptions import (
9
+ ConfigError,
10
+ ComponentError,
11
+ NotSupported,
12
+ )
13
+
14
+ from .flow import FlowComponent
15
+ from ..interfaces import SeleniumService
16
+ from ..interfaces import HTTPService
17
+
18
+ logging.getLogger(name='selenium.webdriver').setLevel(logging.WARNING)
19
+ logging.getLogger(name='WDM').setLevel(logging.WARNING)
20
+ logging.getLogger(name='hpack').setLevel(logging.WARNING)
21
+ logging.getLogger(name='seleniumwire').setLevel(logging.WARNING)
22
+
23
+ def on_backoff(details):
24
+ logging.warning(
25
+ f"Backing off {details['wait']:0.1f} seconds after {details['tries']} tries due to error: {details['exception']}"
26
+ )
27
+
28
+ def bad_gateway_exception(exc):
29
+ """Check if the exception is a 502 Bad Gateway error."""
30
+ return isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code == 502
31
+
32
+
33
+ class ReviewScrapper(FlowComponent, SeleniumService, HTTPService):
34
+ def __init__(
35
+ self,
36
+ loop: asyncio.AbstractEventLoop = None,
37
+ job: Callable = None,
38
+ stat: Callable = None,
39
+ **kwargs,
40
+ ):
41
+ self._fn = kwargs.pop('type', None)
42
+ self.chunk_size: int = kwargs.get('chunk_size', 100)
43
+ self.task_parts: int = kwargs.get('task_parts', 10)
44
+ if not self._fn:
45
+ raise ConfigError(
46
+ f"{self.__name__}: require a `type` Function to be called, ex: availability"
47
+ )
48
+ super().__init__(loop=loop, job=job, stat=stat, **kwargs)
49
+
50
+ async def get_cookies(self, url: str) -> dict:
51
+ options = webdriver.ChromeOptions()
52
+ options.add_argument('--headless') # Run in headless mode
53
+ driver = webdriver.Chrome(options=options)
54
+ driver.get(url)
55
+ cookies = driver.get_cookies()
56
+ driver.quit()
57
+ return {cookie['name']: cookie['value'] for cookie in cookies}
58
+
59
+ def chunkify(self, lst, n):
60
+ """Split list lst into chunks of size n."""
61
+ for i in range(0, len(lst), n):
62
+ yield lst[i:i + n]
63
+
64
+ def column_exists(self, column: str, default_val: Any = None):
65
+ if column not in self.data.columns:
66
+ self._logger.warning(
67
+ f"Column {column} does not exist in the Dataframe"
68
+ )
69
+ self.data[column] = default_val
70
+ return False
71
+ return True
72
+
73
+ def split_parts(self, task_list, num_parts: int = 5) -> list:
74
+ part_size, remainder = divmod(len(task_list), num_parts)
75
+ parts = []
76
+ start = 0
77
+ for i in range(num_parts):
78
+ # Distribute the remainder across the first `remainder` parts
79
+ end = start + part_size + (1 if i < remainder else 0)
80
+ parts.append(task_list[start:end])
81
+ start = end
82
+ return parts
83
+
84
+ async def _processing_tasks(self, tasks: list) -> DataFrame:
85
+ """Process tasks concurrently."""
86
+ results = []
87
+ for chunk in self.split_parts(tasks, self.task_parts):
88
+ result = await asyncio.gather(*chunk, return_exceptions=False)
89
+ results.extend(result)
90
+ return results
91
+
92
+ async def run(self):
93
+ # we need to call the "function" for Services.
94
+ fn = getattr(self, self._fn)
95
+ result = None
96
+ if not callable(fn):
97
+ raise ComponentError(
98
+ f"{self.__name__}: Function {self._fn} doesn't exists."
99
+ )
100
+ try:
101
+ result = await fn()
102
+ except (ComponentError, TimeoutError, NotSupported):
103
+ raise
104
+ except Exception as exc:
105
+ raise ComponentError(
106
+ f"{self.__name__}: Unknown Error: {exc}"
107
+ ) from exc
108
+ # Print results
109
+ print(result)
110
+ print("::: Printing Column Information === ")
111
+ for column, t in result.dtypes.items():
112
+ print(column, "->", t, "->", result[column].iloc[0])
113
+ self._result = result
114
+ return self._result
115
+
116
+ async def close(self, **kwargs) -> bool:
117
+ self.close_driver()
118
+ return True
119
+
120
+ async def start(self, **kwargs) -> bool:
121
+ await super(ReviewScrapper, self).start(**kwargs)
122
+ if self.previous:
123
+ self.data = self.input
124
+ if not isinstance(self.data, DataFrame):
125
+ raise ComponentError(
126
+ "Incompatible Pandas Dataframe"
127
+ )
128
+ self.api_token = self.get_env_value(self.api_token) if hasattr(self, 'api_token') else self.get_env_value('TARGET_API_KEY') # noqa
129
+ if not hasattr(self, self._fn):
130
+ raise ConfigError(
131
+ f"{self.__name__}: Unable to found Function {self._fn} in Component."
132
+ )