flowtask 5.8.4__cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (470) hide show
  1. flowtask/__init__.py +93 -0
  2. flowtask/__main__.py +38 -0
  3. flowtask/bots/__init__.py +6 -0
  4. flowtask/bots/check.py +93 -0
  5. flowtask/bots/codebot.py +51 -0
  6. flowtask/components/ASPX.py +148 -0
  7. flowtask/components/AddDataset.py +352 -0
  8. flowtask/components/Amazon.py +523 -0
  9. flowtask/components/AutoTask.py +314 -0
  10. flowtask/components/Azure.py +80 -0
  11. flowtask/components/AzureUsers.py +106 -0
  12. flowtask/components/BaseAction.py +91 -0
  13. flowtask/components/BaseLoop.py +198 -0
  14. flowtask/components/BestBuy.py +800 -0
  15. flowtask/components/CSVToGCS.py +120 -0
  16. flowtask/components/CompanyScraper/__init__.py +1 -0
  17. flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
  18. flowtask/components/CompanyScraper/parsers/base.py +102 -0
  19. flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
  20. flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
  21. flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
  22. flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
  23. flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
  24. flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
  25. flowtask/components/CompanyScraper/scrapper.py +1054 -0
  26. flowtask/components/CopyTo.py +177 -0
  27. flowtask/components/CopyToBigQuery.py +243 -0
  28. flowtask/components/CopyToMongoDB.py +291 -0
  29. flowtask/components/CopyToPg.py +609 -0
  30. flowtask/components/CopyToRethink.py +207 -0
  31. flowtask/components/CreateGCSBucket.py +102 -0
  32. flowtask/components/CreateReport/CreateReport.py +228 -0
  33. flowtask/components/CreateReport/__init__.py +9 -0
  34. flowtask/components/CreateReport/charts/__init__.py +15 -0
  35. flowtask/components/CreateReport/charts/bar.py +51 -0
  36. flowtask/components/CreateReport/charts/base.py +66 -0
  37. flowtask/components/CreateReport/charts/pie.py +64 -0
  38. flowtask/components/CreateReport/utils.py +9 -0
  39. flowtask/components/CustomerSatisfaction.py +196 -0
  40. flowtask/components/DataInput.py +200 -0
  41. flowtask/components/DateList.py +255 -0
  42. flowtask/components/DbClient.py +163 -0
  43. flowtask/components/DialPad.py +146 -0
  44. flowtask/components/DocumentDBQuery.py +200 -0
  45. flowtask/components/DownloadFrom.py +371 -0
  46. flowtask/components/DownloadFromD2L.py +113 -0
  47. flowtask/components/DownloadFromFTP.py +181 -0
  48. flowtask/components/DownloadFromIMAP.py +315 -0
  49. flowtask/components/DownloadFromS3.py +198 -0
  50. flowtask/components/DownloadFromSFTP.py +265 -0
  51. flowtask/components/DownloadFromSharepoint.py +110 -0
  52. flowtask/components/DownloadFromSmartSheet.py +114 -0
  53. flowtask/components/DownloadS3File.py +229 -0
  54. flowtask/components/Dummy.py +59 -0
  55. flowtask/components/DuplicatePhoto.py +411 -0
  56. flowtask/components/EmployeeEvaluation.py +237 -0
  57. flowtask/components/ExecuteSQL.py +323 -0
  58. flowtask/components/ExtractHTML.py +178 -0
  59. flowtask/components/FileBase.py +178 -0
  60. flowtask/components/FileCopy.py +181 -0
  61. flowtask/components/FileDelete.py +82 -0
  62. flowtask/components/FileExists.py +146 -0
  63. flowtask/components/FileIteratorDelete.py +112 -0
  64. flowtask/components/FileList.py +194 -0
  65. flowtask/components/FileOpen.py +75 -0
  66. flowtask/components/FileRead.py +120 -0
  67. flowtask/components/FileRename.py +106 -0
  68. flowtask/components/FilterIf.py +284 -0
  69. flowtask/components/FilterRows/FilterRows.py +200 -0
  70. flowtask/components/FilterRows/__init__.py +10 -0
  71. flowtask/components/FilterRows/functions.py +4 -0
  72. flowtask/components/GCSToBigQuery.py +103 -0
  73. flowtask/components/GoogleA4.py +150 -0
  74. flowtask/components/GoogleGeoCoding.py +344 -0
  75. flowtask/components/GooglePlaces.py +315 -0
  76. flowtask/components/GoogleSearch.py +539 -0
  77. flowtask/components/HTTPClient.py +268 -0
  78. flowtask/components/ICIMS.py +146 -0
  79. flowtask/components/IF.py +179 -0
  80. flowtask/components/IcimsFolderCopy.py +173 -0
  81. flowtask/components/ImageFeatures/__init__.py +5 -0
  82. flowtask/components/ImageFeatures/process.py +233 -0
  83. flowtask/components/IteratorBase.py +251 -0
  84. flowtask/components/LangchainLoader/__init__.py +5 -0
  85. flowtask/components/LangchainLoader/loader.py +194 -0
  86. flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
  87. flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
  88. flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
  89. flowtask/components/LangchainLoader/loaders/docx.py +91 -0
  90. flowtask/components/LangchainLoader/loaders/html.py +119 -0
  91. flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
  92. flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
  93. flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
  94. flowtask/components/LangchainLoader/loaders/qa.py +67 -0
  95. flowtask/components/LangchainLoader/loaders/txt.py +55 -0
  96. flowtask/components/LeadIQ.py +650 -0
  97. flowtask/components/Loop.py +253 -0
  98. flowtask/components/Lowes.py +334 -0
  99. flowtask/components/MS365Usage.py +156 -0
  100. flowtask/components/MSTeamsMessages.py +320 -0
  101. flowtask/components/MarketClustering.py +1051 -0
  102. flowtask/components/MergeFiles.py +362 -0
  103. flowtask/components/MilvusOutput.py +87 -0
  104. flowtask/components/NearByStores.py +175 -0
  105. flowtask/components/NetworkNinja/__init__.py +6 -0
  106. flowtask/components/NetworkNinja/models/__init__.py +52 -0
  107. flowtask/components/NetworkNinja/models/abstract.py +177 -0
  108. flowtask/components/NetworkNinja/models/account.py +39 -0
  109. flowtask/components/NetworkNinja/models/client.py +19 -0
  110. flowtask/components/NetworkNinja/models/district.py +14 -0
  111. flowtask/components/NetworkNinja/models/events.py +101 -0
  112. flowtask/components/NetworkNinja/models/forms.py +499 -0
  113. flowtask/components/NetworkNinja/models/market.py +16 -0
  114. flowtask/components/NetworkNinja/models/organization.py +34 -0
  115. flowtask/components/NetworkNinja/models/photos.py +125 -0
  116. flowtask/components/NetworkNinja/models/project.py +44 -0
  117. flowtask/components/NetworkNinja/models/region.py +28 -0
  118. flowtask/components/NetworkNinja/models/store.py +203 -0
  119. flowtask/components/NetworkNinja/models/user.py +151 -0
  120. flowtask/components/NetworkNinja/router.py +854 -0
  121. flowtask/components/Odoo.py +175 -0
  122. flowtask/components/OdooInjector.py +192 -0
  123. flowtask/components/OpenFromXML.py +126 -0
  124. flowtask/components/OpenWeather.py +41 -0
  125. flowtask/components/OpenWithBase.py +616 -0
  126. flowtask/components/OpenWithPandas.py +715 -0
  127. flowtask/components/PGPDecrypt.py +199 -0
  128. flowtask/components/PandasIterator.py +187 -0
  129. flowtask/components/PandasToFile.py +189 -0
  130. flowtask/components/Paradox.py +339 -0
  131. flowtask/components/ParamIterator.py +117 -0
  132. flowtask/components/ParseHTML.py +84 -0
  133. flowtask/components/PlacerStores.py +249 -0
  134. flowtask/components/Pokemon.py +507 -0
  135. flowtask/components/PositiveBot.py +62 -0
  136. flowtask/components/PowerPointSlide.py +400 -0
  137. flowtask/components/PrintMessage.py +127 -0
  138. flowtask/components/ProductCompetitors/__init__.py +5 -0
  139. flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
  140. flowtask/components/ProductCompetitors/parsers/base.py +72 -0
  141. flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
  142. flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
  143. flowtask/components/ProductCompetitors/scrapper.py +155 -0
  144. flowtask/components/ProductCompliant.py +169 -0
  145. flowtask/components/ProductInfo/__init__.py +1 -0
  146. flowtask/components/ProductInfo/parsers/__init__.py +5 -0
  147. flowtask/components/ProductInfo/parsers/base.py +83 -0
  148. flowtask/components/ProductInfo/parsers/brother.py +97 -0
  149. flowtask/components/ProductInfo/parsers/canon.py +167 -0
  150. flowtask/components/ProductInfo/parsers/epson.py +118 -0
  151. flowtask/components/ProductInfo/parsers/hp.py +131 -0
  152. flowtask/components/ProductInfo/parsers/samsung.py +97 -0
  153. flowtask/components/ProductInfo/scraper.py +319 -0
  154. flowtask/components/ProductPricing.py +118 -0
  155. flowtask/components/QS.py +261 -0
  156. flowtask/components/QSBase.py +201 -0
  157. flowtask/components/QueryIterator.py +273 -0
  158. flowtask/components/QueryToInsert.py +327 -0
  159. flowtask/components/QueryToPandas.py +432 -0
  160. flowtask/components/RESTClient.py +195 -0
  161. flowtask/components/RethinkDBQuery.py +189 -0
  162. flowtask/components/Rsync.py +74 -0
  163. flowtask/components/RunSSH.py +59 -0
  164. flowtask/components/RunShell.py +71 -0
  165. flowtask/components/SalesForce.py +20 -0
  166. flowtask/components/SaveImageBank/__init__.py +257 -0
  167. flowtask/components/SchedulingVisits.py +592 -0
  168. flowtask/components/ScrapPage.py +216 -0
  169. flowtask/components/ScrapSearch.py +79 -0
  170. flowtask/components/SendNotify.py +257 -0
  171. flowtask/components/SentimentAnalysis.py +694 -0
  172. flowtask/components/ServiceScrapper/__init__.py +5 -0
  173. flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
  174. flowtask/components/ServiceScrapper/parsers/base.py +94 -0
  175. flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
  176. flowtask/components/ServiceScrapper/scrapper.py +199 -0
  177. flowtask/components/SetVariables.py +156 -0
  178. flowtask/components/SubTask.py +182 -0
  179. flowtask/components/SuiteCRM.py +48 -0
  180. flowtask/components/Switch.py +175 -0
  181. flowtask/components/TableBase.py +148 -0
  182. flowtask/components/TableDelete.py +312 -0
  183. flowtask/components/TableInput.py +143 -0
  184. flowtask/components/TableOutput/TableOutput.py +384 -0
  185. flowtask/components/TableOutput/__init__.py +3 -0
  186. flowtask/components/TableSchema.py +534 -0
  187. flowtask/components/Target.py +223 -0
  188. flowtask/components/ThumbnailGenerator.py +156 -0
  189. flowtask/components/ToPandas.py +67 -0
  190. flowtask/components/TransformRows/TransformRows.py +507 -0
  191. flowtask/components/TransformRows/__init__.py +9 -0
  192. flowtask/components/TransformRows/functions.py +559 -0
  193. flowtask/components/TransposeRows.py +176 -0
  194. flowtask/components/UPCDatabase.py +86 -0
  195. flowtask/components/UnGzip.py +171 -0
  196. flowtask/components/Uncompress.py +172 -0
  197. flowtask/components/UniqueRows.py +126 -0
  198. flowtask/components/Unzip.py +107 -0
  199. flowtask/components/UpdateOperationalVars.py +147 -0
  200. flowtask/components/UploadTo.py +299 -0
  201. flowtask/components/UploadToS3.py +136 -0
  202. flowtask/components/UploadToSFTP.py +160 -0
  203. flowtask/components/UploadToSharepoint.py +205 -0
  204. flowtask/components/UserFunc.py +122 -0
  205. flowtask/components/VivaTracker.py +140 -0
  206. flowtask/components/WSDLClient.py +123 -0
  207. flowtask/components/Wait.py +18 -0
  208. flowtask/components/Walmart.py +199 -0
  209. flowtask/components/Workplace.py +134 -0
  210. flowtask/components/XMLToPandas.py +267 -0
  211. flowtask/components/Zammad/__init__.py +41 -0
  212. flowtask/components/Zammad/models.py +0 -0
  213. flowtask/components/ZoomInfoScraper.py +409 -0
  214. flowtask/components/__init__.py +104 -0
  215. flowtask/components/abstract.py +18 -0
  216. flowtask/components/flow.py +530 -0
  217. flowtask/components/google.py +335 -0
  218. flowtask/components/group.py +221 -0
  219. flowtask/components/py.typed +0 -0
  220. flowtask/components/reviewscrap.py +132 -0
  221. flowtask/components/tAutoincrement.py +117 -0
  222. flowtask/components/tConcat.py +109 -0
  223. flowtask/components/tExplode.py +119 -0
  224. flowtask/components/tFilter.py +184 -0
  225. flowtask/components/tGroup.py +236 -0
  226. flowtask/components/tJoin.py +270 -0
  227. flowtask/components/tMap/__init__.py +9 -0
  228. flowtask/components/tMap/functions.py +54 -0
  229. flowtask/components/tMap/tMap.py +450 -0
  230. flowtask/components/tMelt.py +112 -0
  231. flowtask/components/tMerge.py +114 -0
  232. flowtask/components/tOrder.py +93 -0
  233. flowtask/components/tPandas.py +94 -0
  234. flowtask/components/tPivot.py +71 -0
  235. flowtask/components/tPluckCols.py +76 -0
  236. flowtask/components/tUnnest.py +82 -0
  237. flowtask/components/user.py +401 -0
  238. flowtask/conf.py +457 -0
  239. flowtask/download.py +102 -0
  240. flowtask/events/__init__.py +11 -0
  241. flowtask/events/events/__init__.py +20 -0
  242. flowtask/events/events/abstract.py +95 -0
  243. flowtask/events/events/alerts/__init__.py +362 -0
  244. flowtask/events/events/alerts/colfunctions.py +131 -0
  245. flowtask/events/events/alerts/functions.py +158 -0
  246. flowtask/events/events/dummy.py +12 -0
  247. flowtask/events/events/exec.py +124 -0
  248. flowtask/events/events/file/__init__.py +7 -0
  249. flowtask/events/events/file/base.py +51 -0
  250. flowtask/events/events/file/copy.py +23 -0
  251. flowtask/events/events/file/delete.py +16 -0
  252. flowtask/events/events/interfaces/__init__.py +9 -0
  253. flowtask/events/events/interfaces/client.py +67 -0
  254. flowtask/events/events/interfaces/credentials.py +28 -0
  255. flowtask/events/events/interfaces/notifications.py +58 -0
  256. flowtask/events/events/jira.py +122 -0
  257. flowtask/events/events/log.py +26 -0
  258. flowtask/events/events/logerr.py +52 -0
  259. flowtask/events/events/notify.py +59 -0
  260. flowtask/events/events/notify_event.py +160 -0
  261. flowtask/events/events/publish.py +54 -0
  262. flowtask/events/events/sendfile.py +104 -0
  263. flowtask/events/events/task.py +97 -0
  264. flowtask/events/events/teams.py +98 -0
  265. flowtask/events/events/webhook.py +58 -0
  266. flowtask/events/manager.py +287 -0
  267. flowtask/exceptions.c +39393 -0
  268. flowtask/exceptions.cpython-39-x86_64-linux-gnu.so +0 -0
  269. flowtask/extensions/__init__.py +3 -0
  270. flowtask/extensions/abstract.py +82 -0
  271. flowtask/extensions/logging/__init__.py +65 -0
  272. flowtask/hooks/__init__.py +9 -0
  273. flowtask/hooks/actions/__init__.py +22 -0
  274. flowtask/hooks/actions/abstract.py +66 -0
  275. flowtask/hooks/actions/dummy.py +23 -0
  276. flowtask/hooks/actions/jira.py +74 -0
  277. flowtask/hooks/actions/rest.py +320 -0
  278. flowtask/hooks/actions/sampledata.py +37 -0
  279. flowtask/hooks/actions/sensor.py +23 -0
  280. flowtask/hooks/actions/task.py +9 -0
  281. flowtask/hooks/actions/ticket.py +37 -0
  282. flowtask/hooks/actions/zammad.py +55 -0
  283. flowtask/hooks/hook.py +62 -0
  284. flowtask/hooks/models.py +17 -0
  285. flowtask/hooks/service.py +187 -0
  286. flowtask/hooks/step.py +91 -0
  287. flowtask/hooks/types/__init__.py +23 -0
  288. flowtask/hooks/types/base.py +129 -0
  289. flowtask/hooks/types/brokers/__init__.py +11 -0
  290. flowtask/hooks/types/brokers/base.py +54 -0
  291. flowtask/hooks/types/brokers/mqtt.py +35 -0
  292. flowtask/hooks/types/brokers/rabbitmq.py +82 -0
  293. flowtask/hooks/types/brokers/redis.py +83 -0
  294. flowtask/hooks/types/brokers/sqs.py +44 -0
  295. flowtask/hooks/types/fs.py +232 -0
  296. flowtask/hooks/types/http.py +49 -0
  297. flowtask/hooks/types/imap.py +200 -0
  298. flowtask/hooks/types/jira.py +279 -0
  299. flowtask/hooks/types/mail.py +205 -0
  300. flowtask/hooks/types/postgres.py +98 -0
  301. flowtask/hooks/types/responses/__init__.py +8 -0
  302. flowtask/hooks/types/responses/base.py +5 -0
  303. flowtask/hooks/types/sharepoint.py +288 -0
  304. flowtask/hooks/types/ssh.py +141 -0
  305. flowtask/hooks/types/tagged.py +59 -0
  306. flowtask/hooks/types/upload.py +85 -0
  307. flowtask/hooks/types/watch.py +71 -0
  308. flowtask/hooks/types/web.py +36 -0
  309. flowtask/interfaces/AzureClient.py +137 -0
  310. flowtask/interfaces/AzureGraph.py +839 -0
  311. flowtask/interfaces/Boto3Client.py +326 -0
  312. flowtask/interfaces/DropboxClient.py +173 -0
  313. flowtask/interfaces/ExcelHandler.py +94 -0
  314. flowtask/interfaces/FTPClient.py +131 -0
  315. flowtask/interfaces/GoogleCalendar.py +201 -0
  316. flowtask/interfaces/GoogleClient.py +133 -0
  317. flowtask/interfaces/GoogleDrive.py +127 -0
  318. flowtask/interfaces/GoogleGCS.py +89 -0
  319. flowtask/interfaces/GoogleGeocoding.py +93 -0
  320. flowtask/interfaces/GoogleLang.py +114 -0
  321. flowtask/interfaces/GooglePub.py +61 -0
  322. flowtask/interfaces/GoogleSheet.py +68 -0
  323. flowtask/interfaces/IMAPClient.py +137 -0
  324. flowtask/interfaces/O365Calendar.py +113 -0
  325. flowtask/interfaces/O365Client.py +220 -0
  326. flowtask/interfaces/OneDrive.py +284 -0
  327. flowtask/interfaces/Outlook.py +155 -0
  328. flowtask/interfaces/ParrotBot.py +130 -0
  329. flowtask/interfaces/SSHClient.py +378 -0
  330. flowtask/interfaces/Sharepoint.py +496 -0
  331. flowtask/interfaces/__init__.py +36 -0
  332. flowtask/interfaces/azureauth.py +119 -0
  333. flowtask/interfaces/cache.py +201 -0
  334. flowtask/interfaces/client.py +82 -0
  335. flowtask/interfaces/compress.py +525 -0
  336. flowtask/interfaces/credentials.py +124 -0
  337. flowtask/interfaces/d2l.py +239 -0
  338. flowtask/interfaces/databases/__init__.py +5 -0
  339. flowtask/interfaces/databases/db.py +223 -0
  340. flowtask/interfaces/databases/documentdb.py +55 -0
  341. flowtask/interfaces/databases/rethink.py +39 -0
  342. flowtask/interfaces/dataframes/__init__.py +11 -0
  343. flowtask/interfaces/dataframes/abstract.py +21 -0
  344. flowtask/interfaces/dataframes/arrow.py +71 -0
  345. flowtask/interfaces/dataframes/dt.py +69 -0
  346. flowtask/interfaces/dataframes/pandas.py +167 -0
  347. flowtask/interfaces/dataframes/polars.py +60 -0
  348. flowtask/interfaces/db.py +263 -0
  349. flowtask/interfaces/env.py +46 -0
  350. flowtask/interfaces/func.py +137 -0
  351. flowtask/interfaces/http.py +1780 -0
  352. flowtask/interfaces/locale.py +40 -0
  353. flowtask/interfaces/log.py +75 -0
  354. flowtask/interfaces/mask.py +143 -0
  355. flowtask/interfaces/notification.py +154 -0
  356. flowtask/interfaces/playwright.py +339 -0
  357. flowtask/interfaces/powerpoint.py +368 -0
  358. flowtask/interfaces/py.typed +0 -0
  359. flowtask/interfaces/qs.py +376 -0
  360. flowtask/interfaces/result.py +87 -0
  361. flowtask/interfaces/selenium_service.py +779 -0
  362. flowtask/interfaces/smartsheet.py +154 -0
  363. flowtask/interfaces/stat.py +39 -0
  364. flowtask/interfaces/task.py +96 -0
  365. flowtask/interfaces/template.py +118 -0
  366. flowtask/interfaces/vectorstores/__init__.py +1 -0
  367. flowtask/interfaces/vectorstores/abstract.py +133 -0
  368. flowtask/interfaces/vectorstores/milvus.py +669 -0
  369. flowtask/interfaces/zammad.py +107 -0
  370. flowtask/models.py +193 -0
  371. flowtask/parsers/__init__.py +15 -0
  372. flowtask/parsers/_yaml.c +11978 -0
  373. flowtask/parsers/_yaml.cpython-39-x86_64-linux-gnu.so +0 -0
  374. flowtask/parsers/argparser.py +235 -0
  375. flowtask/parsers/base.c +15155 -0
  376. flowtask/parsers/base.cpython-39-x86_64-linux-gnu.so +0 -0
  377. flowtask/parsers/json.c +11968 -0
  378. flowtask/parsers/json.cpython-39-x86_64-linux-gnu.so +0 -0
  379. flowtask/parsers/maps.py +49 -0
  380. flowtask/parsers/toml.c +11968 -0
  381. flowtask/parsers/toml.cpython-39-x86_64-linux-gnu.so +0 -0
  382. flowtask/plugins/__init__.py +16 -0
  383. flowtask/plugins/components/__init__.py +0 -0
  384. flowtask/plugins/handler/__init__.py +45 -0
  385. flowtask/plugins/importer.py +31 -0
  386. flowtask/plugins/sources/__init__.py +0 -0
  387. flowtask/runner.py +283 -0
  388. flowtask/scheduler/__init__.py +9 -0
  389. flowtask/scheduler/functions.py +493 -0
  390. flowtask/scheduler/handlers/__init__.py +8 -0
  391. flowtask/scheduler/handlers/manager.py +504 -0
  392. flowtask/scheduler/handlers/models.py +58 -0
  393. flowtask/scheduler/handlers/service.py +72 -0
  394. flowtask/scheduler/notifications.py +65 -0
  395. flowtask/scheduler/scheduler.py +993 -0
  396. flowtask/services/__init__.py +0 -0
  397. flowtask/services/bots/__init__.py +0 -0
  398. flowtask/services/bots/telegram.py +264 -0
  399. flowtask/services/files/__init__.py +11 -0
  400. flowtask/services/files/manager.py +522 -0
  401. flowtask/services/files/model.py +37 -0
  402. flowtask/services/files/service.py +767 -0
  403. flowtask/services/jira/__init__.py +3 -0
  404. flowtask/services/jira/jira_actions.py +191 -0
  405. flowtask/services/tasks/__init__.py +13 -0
  406. flowtask/services/tasks/launcher.py +213 -0
  407. flowtask/services/tasks/manager.py +323 -0
  408. flowtask/services/tasks/service.py +275 -0
  409. flowtask/services/tasks/task_manager.py +376 -0
  410. flowtask/services/tasks/tasks.py +155 -0
  411. flowtask/storages/__init__.py +16 -0
  412. flowtask/storages/exceptions.py +12 -0
  413. flowtask/storages/files/__init__.py +8 -0
  414. flowtask/storages/files/abstract.py +29 -0
  415. flowtask/storages/files/filesystem.py +66 -0
  416. flowtask/storages/tasks/__init__.py +19 -0
  417. flowtask/storages/tasks/abstract.py +26 -0
  418. flowtask/storages/tasks/database.py +33 -0
  419. flowtask/storages/tasks/filesystem.py +108 -0
  420. flowtask/storages/tasks/github.py +119 -0
  421. flowtask/storages/tasks/memory.py +45 -0
  422. flowtask/storages/tasks/row.py +25 -0
  423. flowtask/tasks/__init__.py +0 -0
  424. flowtask/tasks/abstract.py +526 -0
  425. flowtask/tasks/command.py +118 -0
  426. flowtask/tasks/pile.py +486 -0
  427. flowtask/tasks/py.typed +0 -0
  428. flowtask/tasks/task.py +778 -0
  429. flowtask/template/__init__.py +161 -0
  430. flowtask/tests.py +257 -0
  431. flowtask/types/__init__.py +8 -0
  432. flowtask/types/typedefs.c +11347 -0
  433. flowtask/types/typedefs.cpython-39-x86_64-linux-gnu.so +0 -0
  434. flowtask/utils/__init__.py +24 -0
  435. flowtask/utils/constants.py +117 -0
  436. flowtask/utils/encoders.py +21 -0
  437. flowtask/utils/executor.py +112 -0
  438. flowtask/utils/functions.cpp +14280 -0
  439. flowtask/utils/functions.cpython-39-x86_64-linux-gnu.so +0 -0
  440. flowtask/utils/json.cpp +13349 -0
  441. flowtask/utils/json.cpython-39-x86_64-linux-gnu.so +0 -0
  442. flowtask/utils/mail.py +63 -0
  443. flowtask/utils/parseqs.c +13324 -0
  444. flowtask/utils/parserqs.cpython-39-x86_64-linux-gnu.so +0 -0
  445. flowtask/utils/stats.py +308 -0
  446. flowtask/utils/transformations.py +74 -0
  447. flowtask/utils/uv.py +12 -0
  448. flowtask/utils/validators.py +97 -0
  449. flowtask/version.py +11 -0
  450. flowtask-5.8.4.dist-info/LICENSE +201 -0
  451. flowtask-5.8.4.dist-info/METADATA +209 -0
  452. flowtask-5.8.4.dist-info/RECORD +470 -0
  453. flowtask-5.8.4.dist-info/WHEEL +6 -0
  454. flowtask-5.8.4.dist-info/entry_points.txt +3 -0
  455. flowtask-5.8.4.dist-info/top_level.txt +2 -0
  456. plugins/components/CreateQR.py +39 -0
  457. plugins/components/TestComponent.py +28 -0
  458. plugins/components/Use1.py +13 -0
  459. plugins/components/Workplace.py +117 -0
  460. plugins/components/__init__.py +3 -0
  461. plugins/sources/__init__.py +0 -0
  462. plugins/sources/get_populartimes.py +78 -0
  463. plugins/sources/google.py +150 -0
  464. plugins/sources/hubspot.py +679 -0
  465. plugins/sources/icims.py +679 -0
  466. plugins/sources/mobileinsight.py +501 -0
  467. plugins/sources/newrelic.py +262 -0
  468. plugins/sources/uap.py +268 -0
  469. plugins/sources/venu.py +244 -0
  470. plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,97 @@
1
+ import re
2
+ from typing import Dict, Any, Optional, List
3
+ from bs4 import BeautifulSoup
4
+ from .base import ParserBase
5
+
6
+ class BrotherParser(ParserBase):
7
+ """
8
+ Parser for Brother product information.
9
+
10
+ Extracts product details from Brother's USA website using Selenium.
11
+ """
12
+ domain = "brother-usa.com"
13
+ search_format = "site:brother-usa.com {}"
14
+ product_url_pattern = "brother-usa.com/products/"
15
+
16
+ async def parse(self, url: str, search_term: str, retailer: Optional[str] = None) -> Dict[str, Any]:
17
+ """
18
+ Parse product information from a Brother URL using Selenium.
19
+
20
+ Args:
21
+ url: Brother product URL
22
+ search_term: Original search term
23
+ retailer: Optional retailer information (not used for Brother)
24
+
25
+ Returns:
26
+ Dictionary with product information
27
+ """
28
+ result = {
29
+ "source_url": url,
30
+ "search_term": search_term,
31
+ "model_code": None,
32
+ "product_name": None,
33
+ "price": None,
34
+ "specs": None,
35
+ "images": None,
36
+ "parse_status": "pending"
37
+ }
38
+
39
+ try:
40
+ self.headless = True
41
+ driver = await self.get_driver()
42
+ await self.get_page(url)
43
+
44
+ page_content = driver.page_source
45
+ soup = BeautifulSoup(page_content, "html.parser")
46
+
47
+ # Extract model code - usando el selector exacto de la captura
48
+ model_elem = soup.select_one("h1.janus-model-number")
49
+ if model_elem:
50
+ result["model_code"] = model_elem.text.strip()
51
+
52
+ # Extract product name - usando el selector exacto de la captura
53
+ name_elem = soup.select_one("h2.janus-product-title")
54
+ if name_elem:
55
+ result["product_name"] = name_elem.text.strip()
56
+
57
+ # Extract price (ajustar según el sitio real)
58
+ price_elem = soup.select_one("span.price") # Ajustar según el sitio real
59
+ if price_elem:
60
+ result["price"] = price_elem.text.strip()
61
+
62
+ # Extract image (ajustar según el sitio real)
63
+ main_img = soup.select_one("img.product-image") # Ajustar según el sitio real
64
+ if main_img:
65
+ src = main_img.get("src")
66
+ if src:
67
+ if not src.startswith(("http://", "https://")):
68
+ src = f"https://{self.domain}{src}" if src.startswith("/") else f"https://{self.domain}/{src}"
69
+ result["images"] = [src]
70
+
71
+ result["parse_status"] = "success"
72
+
73
+ except Exception as e:
74
+ self._logger.error(f"Error parsing Brother product: {str(e)}")
75
+ result["parse_status"] = f"error: {str(e)}"
76
+ finally:
77
+ self.close_driver()
78
+
79
+ return result
80
+
81
+ def get_product_urls(self, search_results: List[Dict[str, str]], max_urls: int = 5) -> List[str]:
82
+ """
83
+ Extract relevant product URLs from search results.
84
+
85
+ Args:
86
+ search_results: List of search result dictionaries
87
+ max_urls: Maximum number of URLs to return
88
+
89
+ Returns:
90
+ List of product URLs that match the Brother product pattern
91
+ """
92
+ urls = []
93
+ for result in search_results[:max_urls]:
94
+ url = result.get('link') or result.get('href') or result.get('url')
95
+ if url and self.domain in url and self.product_url_pattern in url:
96
+ urls.append(url)
97
+ return urls
@@ -0,0 +1,167 @@
1
+ import re
2
+ from typing import Dict, Any, Optional, List
3
+ from bs4 import BeautifulSoup
4
+ from .base import ParserBase
5
+
6
+ class CanonParser(ParserBase):
7
+ """
8
+ Parser for Canon product information.
9
+
10
+ Extracts product details from Canon's USA and Canada websites using Selenium.
11
+ """
12
+ domain_us = "usa.canon.com"
13
+ domain_ca = "canon.ca"
14
+ product_url_pattern_us = "usa.canon.com/shop/p/"
15
+ product_url_pattern_ca = "canon.ca/en/product"
16
+
17
+ def __init__(self, *args, **kwargs):
18
+ super().__init__(*args, **kwargs)
19
+ self.region = "us" # Default region
20
+ self.retailer = None # Store retailer info
21
+
22
+ def determine_region(self, retailer: Optional[str]) -> str:
23
+ """
24
+ Determine region based on retailer information.
25
+
26
+ Args:
27
+ retailer: Retailer string that may contain region information
28
+
29
+ Returns:
30
+ 'ca' for Canada, 'us' for United States (default)
31
+ """
32
+ if retailer:
33
+ retailer_lower = retailer.lower()
34
+ if 'canada' in retailer_lower:
35
+ return 'ca'
36
+ elif 'us' in retailer_lower:
37
+ return 'us'
38
+ return 'us' # Default to US if no region found
39
+
40
+ def create_search_query(self, term: str) -> str:
41
+ """
42
+ Create region-specific search query.
43
+
44
+ Args:
45
+ term: Search term (typically product model)
46
+
47
+ Returns:
48
+ Formatted search query for the appropriate region
49
+ """
50
+ # Determine region based on stored retailer info
51
+ self.region = self.determine_region(self.retailer)
52
+ domain = self.domain_ca if self.region == 'ca' else self.domain_us
53
+ return f"site:{domain} {term}"
54
+
55
+ async def parse(self, url: str, search_term: str, retailer: Optional[str] = None) -> Dict[str, Any]:
56
+ """
57
+ Parse product information from a Canon URL using Selenium.
58
+
59
+ Args:
60
+ url: Canon product URL
61
+ search_term: Original search term
62
+ retailer: Optional retailer information to determine region
63
+
64
+ Returns:
65
+ Dictionary with product information
66
+ """
67
+ self.retailer = retailer # Store retailer info for use in other methods
68
+ self.region = self.determine_region(retailer)
69
+
70
+ result = {
71
+ "source_url": url,
72
+ "search_term": search_term,
73
+ "model_code": None,
74
+ "product_name": None,
75
+ "price": None,
76
+ "specs": None,
77
+ "images": None,
78
+ "parse_status": "pending",
79
+ "region": self.region
80
+ }
81
+
82
+ try:
83
+ self.headless = True
84
+ driver = await self.get_driver()
85
+ await self.get_page(url)
86
+
87
+ page_content = driver.page_source
88
+ soup = BeautifulSoup(page_content, "html.parser")
89
+
90
+ if self.region == 'ca':
91
+ # Selectores para el sitio de Canadá
92
+ # Extract model code - usando un selector más robusto
93
+ model_elem = soup.select_one("p[class*='ItemCode']") # Busca cualquier p que contenga 'ItemCode' en su clase
94
+ if not model_elem:
95
+ # Alternativa: buscar por el texto "Item Code" y navegar a su contenido
96
+ model_elems = soup.find_all(string=lambda text: text and "Item Code" in text)
97
+ if model_elems:
98
+ # Encontrar el elemento padre y extraer el texto completo
99
+ model_elem = model_elems[0].parent
100
+
101
+ if model_elem:
102
+ # Obtener el texto y limpiarlo
103
+ model_text = model_elem.text.strip()
104
+ # Eliminar "Item Code:" y cualquier espacio extra
105
+ model_text = model_text.replace("Item Code: ", "").strip()
106
+ result["model_code"] = model_text
107
+
108
+ # Extract product name
109
+ name_elem = soup.select_one("h1.ProductName") # Ajustar según el sitio real
110
+ if name_elem:
111
+ result["product_name"] = name_elem.text.strip()
112
+
113
+ else:
114
+ # Selectores para el sitio de USA
115
+ product_name_elem = soup.select_one("span.base[data-ui-id='page-title-wrapper'][itemprop='name']")
116
+ if product_name_elem:
117
+ result["product_name"] = product_name_elem.text.strip()
118
+
119
+ sku_elem = soup.select_one("div.value[itemprop='sku']")
120
+ if sku_elem:
121
+ result["model_code"] = sku_elem.text.strip()
122
+
123
+ # Extract price (común para ambos sitios, ajustar si es necesario)
124
+ price_elem = soup.select_one("[data-price-type='finalPrice'] .price")
125
+ if price_elem:
126
+ result["price"] = price_elem.text.strip()
127
+
128
+ # Extract image (común para ambos sitios, ajustar si es necesario)
129
+ main_img = soup.select_one("img[data-role='product-image']")
130
+ if main_img:
131
+ src = main_img.get("src")
132
+ if src:
133
+ domain = self.domain_ca if self.region == 'ca' else self.domain_us
134
+ if not src.startswith(("http://", "https://")):
135
+ src = f"https://{domain}{src}" if src.startswith("/") else f"https://{domain}/{src}"
136
+ result["images"] = [src]
137
+
138
+ result["parse_status"] = "success"
139
+
140
+ except Exception as e:
141
+ self._logger.error(f"Error parsing Canon product: {str(e)}")
142
+ result["parse_status"] = f"error: {str(e)}"
143
+ finally:
144
+ self.close_driver()
145
+
146
+ return result
147
+
148
+ def get_product_urls(self, search_results: List[Dict[str, str]], max_urls: int = 5) -> List[str]:
149
+ """
150
+ Extract relevant product URLs from search results.
151
+
152
+ Args:
153
+ search_results: List of search result dictionaries
154
+ max_urls: Maximum number of URLs to return
155
+
156
+ Returns:
157
+ List of product URLs that match the Canon product pattern
158
+ """
159
+ urls = []
160
+ pattern = self.product_url_pattern_ca if self.region == 'ca' else self.product_url_pattern_us
161
+ domain = self.domain_ca if self.region == 'ca' else self.domain_us
162
+
163
+ for result in search_results[:max_urls]:
164
+ url = result.get('link') or result.get('href') or result.get('url')
165
+ if url and domain in url and pattern in url:
166
+ urls.append(url)
167
+ return urls
@@ -0,0 +1,118 @@
1
+ import re
2
+ from typing import Dict, Any, Optional
3
+ import httpx
4
+ from bs4 import BeautifulSoup
5
+ from .base import ParserBase
6
+
7
+ class EpsonParser(ParserBase):
8
+ """
9
+ Parser for Epson product information.
10
+
11
+ Extracts product details from Epson's website.
12
+ """
13
+ domain = "epson.com"
14
+ search_format = "site:epson.com {} product"
15
+ model_pattern = r"^.*\/\b[p|s]\/([^?]+)"
16
+
17
+ def extract_model_code(self, url: str) -> Optional[str]:
18
+ """
19
+ Extract model code from URL using the regex pattern and clean it.
20
+
21
+ Args:
22
+ url: URL to extract model code from
23
+
24
+ Returns:
25
+ Cleaned model code or None if not found
26
+ """
27
+ match = re.search(self.model_pattern, url)
28
+ if match and match.group(1):
29
+ # Extraer el código
30
+ model_code = match.group(1)
31
+
32
+ # Limpiar el prefijo SPT_ si existe
33
+ if model_code.startswith("SPT_"):
34
+ model_code = model_code.replace("SPT_", "", 1)
35
+
36
+ return model_code
37
+ return None
38
+
39
+ async def parse(self, url: str, search_term: str, retailer: str = None) -> Dict[str, Any]:
40
+ """
41
+ Parse product information from an Epson URL.
42
+
43
+ Args:
44
+ url: Epson product URL
45
+ search_term: Original search term
46
+ retailer: Optional retailer information
47
+
48
+ Returns:
49
+ Dictionary with product information
50
+ """
51
+ result = {
52
+ "source_url": url,
53
+ "search_term": search_term,
54
+ "model_code": self.extract_model_code(url),
55
+ "product_name": None,
56
+ "price": None,
57
+ "description": None,
58
+ "specs": None,
59
+ "images": None,
60
+ "parse_status": "pending"
61
+ }
62
+
63
+ try:
64
+ # Get the page content
65
+ response = await self._get(url, headers=self.headers, use_proxy=True)
66
+ if not response or response.status_code != 200:
67
+ result["parse_status"] = f"error: HTTP {response.status_code if response else 'no response'}"
68
+ return result
69
+
70
+ # Parse the HTML
71
+ soup = BeautifulSoup(response.text, "html.parser")
72
+
73
+ # Extract product name
74
+ product_name_elem = soup.select_one("h1.product-name, h1.product-title")
75
+ if product_name_elem:
76
+ result["product_name"] = product_name_elem.text.strip()
77
+
78
+ # Extract price
79
+ price_elem = soup.select_one("span.price, div.product-price")
80
+ if price_elem:
81
+ result["price"] = price_elem.text.strip()
82
+
83
+ # Extract description
84
+ desc_elem = soup.select_one("div.product-description, div.product-overview")
85
+ if desc_elem:
86
+ result["description"] = desc_elem.text.strip()
87
+
88
+ # Extract specifications
89
+ specs = {}
90
+ specs_section = soup.select_one("div.product-specifications, div.tech-specs")
91
+ if specs_section:
92
+ for row in specs_section.select("tr, div.spec-row"):
93
+ label = row.select_one("th, .spec-label")
94
+ value = row.select_one("td, .spec-value")
95
+ if label and value:
96
+ specs[label.text.strip()] = value.text.strip()
97
+ result["specs"] = specs
98
+
99
+ # Extract images
100
+ image_urls = []
101
+ for img in soup.select("div.product-gallery img, img.product-image"):
102
+ src = img.get("src") or img.get("data-src")
103
+ if src:
104
+ if not src.startswith(("http://", "https://")):
105
+ src = f"https://{self.domain}{src}" if src.startswith("/") else f"https://{self.domain}/{src}"
106
+ image_urls.append(src)
107
+ if image_urls:
108
+ result["images"] = image_urls
109
+
110
+ result["parse_status"] = "success"
111
+
112
+ except httpx.RequestError as e:
113
+ result["parse_status"] = f"error: request failed - {str(e)}"
114
+ except Exception as e:
115
+ self._logger.error(f"Error parsing Epson product: {str(e)}")
116
+ result["parse_status"] = f"error: {str(e)}"
117
+
118
+ return result
@@ -0,0 +1,131 @@
1
+ import re
2
+ import asyncio
3
+ from typing import Dict, Any, Optional, List
4
+ import httpx
5
+ from bs4 import BeautifulSoup
6
+ from .base import ParserBase
7
+
8
+ class HPParser(ParserBase):
9
+ """
10
+ Parser for HP product information.
11
+
12
+ Extracts product details from HP's website using Selenium for dynamic content.
13
+ """
14
+ domain = "hp.com"
15
+ search_format = "site:hp.com {}" # Sin la palabra 'product' como en Epson
16
+ product_url_pattern = "hp.com/us-en/shop/pdp/" # Patrón para URLs de producto válidas
17
+
18
+ async def parse(self, url: str, search_term: str, retailer: str = None) -> Dict[str, Any]:
19
+ """
20
+ Parse product information from an HP URL using Selenium.
21
+
22
+ Args:
23
+ url: HP product URL
24
+ search_term: Original search term
25
+
26
+ Returns:
27
+ Dictionary with product information
28
+ """
29
+ result = {
30
+ "source_url": url,
31
+ "search_term": search_term,
32
+ "model_code": None,
33
+ "product_name": None,
34
+ "price": None,
35
+ "specs": None,
36
+ "images": None,
37
+ "parse_status": "pending"
38
+ }
39
+
40
+ try:
41
+ # Utilizamos Selenium ya que la página de HP tiene contenido dinámico
42
+ driver = await self.get_driver()
43
+ await self.get_page(url)
44
+
45
+ # Ejecutamos un scroll para cargar todo el contenido
46
+ self._execute_scroll(scroll_pause_time=1.5, max_scrolls=5)
47
+
48
+ # Extraer contenido de la página
49
+ page_content = driver.page_source
50
+ soup = BeautifulSoup(page_content, "html.parser")
51
+
52
+ # 1. Extract product name - usando el selector exacto de la captura
53
+ product_name_elem = soup.select_one("h1[data-test-hook='@hpstellar/core/typography']")
54
+ if product_name_elem:
55
+ result["product_name"] = product_name_elem.text.strip()
56
+
57
+ # 2. Extract price - directamente desde el atributo data-widget-item-price
58
+ price_attr_elem = soup.select_one("[data-widget-item-price]")
59
+ if price_attr_elem:
60
+ price_value = price_attr_elem.get("data-widget-item-price")
61
+ if price_value:
62
+ result["price"] = f"${price_value}"
63
+ # Si no se encuentra con el atributo, intentar con el selector de la captura
64
+ elif not result["price"]:
65
+ price_elem = soup.select_one("span[data-test-hook='@hpstellar/core/typography'][class*='sale-subscription-price']")
66
+ if price_elem:
67
+ result["price"] = price_elem.text.strip()
68
+
69
+ # 4. Extract model code - directamente desde el atributo data-widget-item-sku
70
+ sku_elem = soup.select_one("[data-widget-item-sku]")
71
+ if sku_elem:
72
+ result["model_code"] = sku_elem.get("data-widget-item-sku")
73
+
74
+ # Si no se encontró el código en el atributo, intentar con span.sku
75
+ if not result["model_code"]:
76
+ model_elem = soup.select_one("span.sku")
77
+ if model_elem:
78
+ text = model_elem.text.strip()
79
+ # Eliminar el prefijo "Product #" si está presente
80
+ if "Product #" in text:
81
+ result["model_code"] = text.replace("Product # ", "").strip()
82
+ else:
83
+ result["model_code"] = text
84
+
85
+ # 5. Extract SINGLE product image - usando el selector exacto de la captura
86
+ # Intentar primero con el botón específico
87
+ main_img = soup.select_one("button[data-gtm-category='linkClick'][data-gtm-id='gallery'] img")
88
+
89
+ # Si no encuentra con ese selector, probar con el otro selector visible en las capturas
90
+ if not main_img:
91
+ main_img = soup.select_one("[data-test-hook='@hpstellar/core/image-with-placeholder'] img")
92
+
93
+ if main_img:
94
+ src = main_img.get("src") or main_img.get("data-src")
95
+ if src:
96
+ # Asegurar que la URL sea absoluta
97
+ if not src.startswith(("http://", "https://")):
98
+ src = f"https://{self.domain}{src}" if src.startswith("/") else f"https://{self.domain}/{src}"
99
+ result["images"] = [src] # Guardamos una sola imagen como lista
100
+
101
+ result["parse_status"] = "success"
102
+
103
+ except httpx.RequestError as e:
104
+ result["parse_status"] = f"error: request failed - {str(e)}"
105
+ except Exception as e:
106
+ self._logger.error(f"Error parsing HP product: {str(e)}")
107
+ result["parse_status"] = f"error: {str(e)}"
108
+ finally:
109
+ # Asegurar que cerramos el driver
110
+ self.close_driver()
111
+
112
+ return result
113
+
114
+ def get_product_urls(self, search_results: List[Dict[str, str]], max_urls: int = 5) -> List[str]:
115
+ """
116
+ Extract relevant product URLs from search results.
117
+
118
+ Args:
119
+ search_results: List of search result dictionaries
120
+ max_urls: Maximum number of URLs to return
121
+
122
+ Returns:
123
+ List of product URLs that match the HP product pattern
124
+ """
125
+ urls = []
126
+ for result in search_results[:max_urls]:
127
+ url = result.get('link') or result.get('href') or result.get('url')
128
+ # Verificar que la URL sea de una página de producto de HP
129
+ if url and self.domain in url and self.product_url_pattern in url:
130
+ urls.append(url)
131
+ return urls
@@ -0,0 +1,97 @@
1
+ import re
2
+ from typing import Dict, Any, Optional, List
3
+ from bs4 import BeautifulSoup
4
+ from .base import ParserBase
5
+
6
+ class SamsungParser(ParserBase):
7
+ """
8
+ Parser for Samsung product information.
9
+
10
+ Extracts product details from Samsung's website using Selenium.
11
+ """
12
+ domain = "samsung.com"
13
+ search_format = "site:samsung.com {}"
14
+ product_url_pattern = "samsung.com/products/" # Ajustar según la estructura real de URLs
15
+
16
+ async def parse(self, url: str, search_term: str, retailer: Optional[str] = None) -> Dict[str, Any]:
17
+ """
18
+ Parse product information from a Samsung URL using Selenium.
19
+
20
+ Args:
21
+ url: Samsung product URL
22
+ search_term: Original search term
23
+ retailer: Optional retailer information (not used for Samsung)
24
+
25
+ Returns:
26
+ Dictionary with product information
27
+ """
28
+ result = {
29
+ "source_url": url,
30
+ "search_term": search_term,
31
+ "model_code": None,
32
+ "product_name": None,
33
+ "price": None,
34
+ "specs": None,
35
+ "images": None,
36
+ "parse_status": "pending"
37
+ }
38
+
39
+ try:
40
+ self.headless = True
41
+ driver = await self.get_driver()
42
+ await self.get_page(url)
43
+
44
+ page_content = driver.page_source
45
+ soup = BeautifulSoup(page_content, "html.parser")
46
+
47
+ # Extract model code - usando el selector exacto de la captura
48
+ model_elem = soup.select_one('span[data-testid="atom_label"]')
49
+ if model_elem:
50
+ result["model_code"] = model_elem.text.strip()
51
+
52
+ # Extract product name - usando el selector exacto de la captura
53
+ name_elem = soup.select_one("div[class*='ProductTitle_product']")
54
+ if name_elem:
55
+ result["product_name"] = name_elem.text.strip()
56
+
57
+ # Extract price (ajustar según el sitio real)
58
+ price_elem = soup.select_one("span.price") # Ajustar según el sitio real
59
+ if price_elem:
60
+ result["price"] = price_elem.text.strip()
61
+
62
+ # Extract image (ajustar según el sitio real)
63
+ main_img = soup.select_one("img.product-image") # Ajustar según el sitio real
64
+ if main_img:
65
+ src = main_img.get("src")
66
+ if src:
67
+ if not src.startswith(("http://", "https://")):
68
+ src = f"https://{self.domain}{src}" if src.startswith("/") else f"https://{self.domain}/{src}"
69
+ result["images"] = [src]
70
+
71
+ result["parse_status"] = "success"
72
+
73
+ except Exception as e:
74
+ self._logger.error(f"Error parsing Samsung product: {str(e)}")
75
+ result["parse_status"] = f"error: {str(e)}"
76
+ finally:
77
+ self.close_driver()
78
+
79
+ return result
80
+
81
+ def get_product_urls(self, search_results: List[Dict[str, str]], max_urls: int = 1) -> List[str]: # Cambiado a 1 para tomar solo el primer resultado
82
+ """
83
+ Extract relevant product URLs from search results.
84
+
85
+ Args:
86
+ search_results: List of search result dictionaries
87
+ max_urls: Maximum number of URLs to return (default: 1)
88
+
89
+ Returns:
90
+ List of product URLs that match the Samsung product pattern
91
+ """
92
+ urls = []
93
+ for result in search_results[:max_urls]:
94
+ url = result.get('link') or result.get('href') or result.get('url')
95
+ if url and self.domain in url: # Menos restrictivo con el patrón de URL
96
+ urls.append(url)
97
+ return urls