flowtask 5.8.4__cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (470) hide show
  1. flowtask/__init__.py +93 -0
  2. flowtask/__main__.py +38 -0
  3. flowtask/bots/__init__.py +6 -0
  4. flowtask/bots/check.py +93 -0
  5. flowtask/bots/codebot.py +51 -0
  6. flowtask/components/ASPX.py +148 -0
  7. flowtask/components/AddDataset.py +352 -0
  8. flowtask/components/Amazon.py +523 -0
  9. flowtask/components/AutoTask.py +314 -0
  10. flowtask/components/Azure.py +80 -0
  11. flowtask/components/AzureUsers.py +106 -0
  12. flowtask/components/BaseAction.py +91 -0
  13. flowtask/components/BaseLoop.py +198 -0
  14. flowtask/components/BestBuy.py +800 -0
  15. flowtask/components/CSVToGCS.py +120 -0
  16. flowtask/components/CompanyScraper/__init__.py +1 -0
  17. flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
  18. flowtask/components/CompanyScraper/parsers/base.py +102 -0
  19. flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
  20. flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
  21. flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
  22. flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
  23. flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
  24. flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
  25. flowtask/components/CompanyScraper/scrapper.py +1054 -0
  26. flowtask/components/CopyTo.py +177 -0
  27. flowtask/components/CopyToBigQuery.py +243 -0
  28. flowtask/components/CopyToMongoDB.py +291 -0
  29. flowtask/components/CopyToPg.py +609 -0
  30. flowtask/components/CopyToRethink.py +207 -0
  31. flowtask/components/CreateGCSBucket.py +102 -0
  32. flowtask/components/CreateReport/CreateReport.py +228 -0
  33. flowtask/components/CreateReport/__init__.py +9 -0
  34. flowtask/components/CreateReport/charts/__init__.py +15 -0
  35. flowtask/components/CreateReport/charts/bar.py +51 -0
  36. flowtask/components/CreateReport/charts/base.py +66 -0
  37. flowtask/components/CreateReport/charts/pie.py +64 -0
  38. flowtask/components/CreateReport/utils.py +9 -0
  39. flowtask/components/CustomerSatisfaction.py +196 -0
  40. flowtask/components/DataInput.py +200 -0
  41. flowtask/components/DateList.py +255 -0
  42. flowtask/components/DbClient.py +163 -0
  43. flowtask/components/DialPad.py +146 -0
  44. flowtask/components/DocumentDBQuery.py +200 -0
  45. flowtask/components/DownloadFrom.py +371 -0
  46. flowtask/components/DownloadFromD2L.py +113 -0
  47. flowtask/components/DownloadFromFTP.py +181 -0
  48. flowtask/components/DownloadFromIMAP.py +315 -0
  49. flowtask/components/DownloadFromS3.py +198 -0
  50. flowtask/components/DownloadFromSFTP.py +265 -0
  51. flowtask/components/DownloadFromSharepoint.py +110 -0
  52. flowtask/components/DownloadFromSmartSheet.py +114 -0
  53. flowtask/components/DownloadS3File.py +229 -0
  54. flowtask/components/Dummy.py +59 -0
  55. flowtask/components/DuplicatePhoto.py +411 -0
  56. flowtask/components/EmployeeEvaluation.py +237 -0
  57. flowtask/components/ExecuteSQL.py +323 -0
  58. flowtask/components/ExtractHTML.py +178 -0
  59. flowtask/components/FileBase.py +178 -0
  60. flowtask/components/FileCopy.py +181 -0
  61. flowtask/components/FileDelete.py +82 -0
  62. flowtask/components/FileExists.py +146 -0
  63. flowtask/components/FileIteratorDelete.py +112 -0
  64. flowtask/components/FileList.py +194 -0
  65. flowtask/components/FileOpen.py +75 -0
  66. flowtask/components/FileRead.py +120 -0
  67. flowtask/components/FileRename.py +106 -0
  68. flowtask/components/FilterIf.py +284 -0
  69. flowtask/components/FilterRows/FilterRows.py +200 -0
  70. flowtask/components/FilterRows/__init__.py +10 -0
  71. flowtask/components/FilterRows/functions.py +4 -0
  72. flowtask/components/GCSToBigQuery.py +103 -0
  73. flowtask/components/GoogleA4.py +150 -0
  74. flowtask/components/GoogleGeoCoding.py +344 -0
  75. flowtask/components/GooglePlaces.py +315 -0
  76. flowtask/components/GoogleSearch.py +539 -0
  77. flowtask/components/HTTPClient.py +268 -0
  78. flowtask/components/ICIMS.py +146 -0
  79. flowtask/components/IF.py +179 -0
  80. flowtask/components/IcimsFolderCopy.py +173 -0
  81. flowtask/components/ImageFeatures/__init__.py +5 -0
  82. flowtask/components/ImageFeatures/process.py +233 -0
  83. flowtask/components/IteratorBase.py +251 -0
  84. flowtask/components/LangchainLoader/__init__.py +5 -0
  85. flowtask/components/LangchainLoader/loader.py +194 -0
  86. flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
  87. flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
  88. flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
  89. flowtask/components/LangchainLoader/loaders/docx.py +91 -0
  90. flowtask/components/LangchainLoader/loaders/html.py +119 -0
  91. flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
  92. flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
  93. flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
  94. flowtask/components/LangchainLoader/loaders/qa.py +67 -0
  95. flowtask/components/LangchainLoader/loaders/txt.py +55 -0
  96. flowtask/components/LeadIQ.py +650 -0
  97. flowtask/components/Loop.py +253 -0
  98. flowtask/components/Lowes.py +334 -0
  99. flowtask/components/MS365Usage.py +156 -0
  100. flowtask/components/MSTeamsMessages.py +320 -0
  101. flowtask/components/MarketClustering.py +1051 -0
  102. flowtask/components/MergeFiles.py +362 -0
  103. flowtask/components/MilvusOutput.py +87 -0
  104. flowtask/components/NearByStores.py +175 -0
  105. flowtask/components/NetworkNinja/__init__.py +6 -0
  106. flowtask/components/NetworkNinja/models/__init__.py +52 -0
  107. flowtask/components/NetworkNinja/models/abstract.py +177 -0
  108. flowtask/components/NetworkNinja/models/account.py +39 -0
  109. flowtask/components/NetworkNinja/models/client.py +19 -0
  110. flowtask/components/NetworkNinja/models/district.py +14 -0
  111. flowtask/components/NetworkNinja/models/events.py +101 -0
  112. flowtask/components/NetworkNinja/models/forms.py +499 -0
  113. flowtask/components/NetworkNinja/models/market.py +16 -0
  114. flowtask/components/NetworkNinja/models/organization.py +34 -0
  115. flowtask/components/NetworkNinja/models/photos.py +125 -0
  116. flowtask/components/NetworkNinja/models/project.py +44 -0
  117. flowtask/components/NetworkNinja/models/region.py +28 -0
  118. flowtask/components/NetworkNinja/models/store.py +203 -0
  119. flowtask/components/NetworkNinja/models/user.py +151 -0
  120. flowtask/components/NetworkNinja/router.py +854 -0
  121. flowtask/components/Odoo.py +175 -0
  122. flowtask/components/OdooInjector.py +192 -0
  123. flowtask/components/OpenFromXML.py +126 -0
  124. flowtask/components/OpenWeather.py +41 -0
  125. flowtask/components/OpenWithBase.py +616 -0
  126. flowtask/components/OpenWithPandas.py +715 -0
  127. flowtask/components/PGPDecrypt.py +199 -0
  128. flowtask/components/PandasIterator.py +187 -0
  129. flowtask/components/PandasToFile.py +189 -0
  130. flowtask/components/Paradox.py +339 -0
  131. flowtask/components/ParamIterator.py +117 -0
  132. flowtask/components/ParseHTML.py +84 -0
  133. flowtask/components/PlacerStores.py +249 -0
  134. flowtask/components/Pokemon.py +507 -0
  135. flowtask/components/PositiveBot.py +62 -0
  136. flowtask/components/PowerPointSlide.py +400 -0
  137. flowtask/components/PrintMessage.py +127 -0
  138. flowtask/components/ProductCompetitors/__init__.py +5 -0
  139. flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
  140. flowtask/components/ProductCompetitors/parsers/base.py +72 -0
  141. flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
  142. flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
  143. flowtask/components/ProductCompetitors/scrapper.py +155 -0
  144. flowtask/components/ProductCompliant.py +169 -0
  145. flowtask/components/ProductInfo/__init__.py +1 -0
  146. flowtask/components/ProductInfo/parsers/__init__.py +5 -0
  147. flowtask/components/ProductInfo/parsers/base.py +83 -0
  148. flowtask/components/ProductInfo/parsers/brother.py +97 -0
  149. flowtask/components/ProductInfo/parsers/canon.py +167 -0
  150. flowtask/components/ProductInfo/parsers/epson.py +118 -0
  151. flowtask/components/ProductInfo/parsers/hp.py +131 -0
  152. flowtask/components/ProductInfo/parsers/samsung.py +97 -0
  153. flowtask/components/ProductInfo/scraper.py +319 -0
  154. flowtask/components/ProductPricing.py +118 -0
  155. flowtask/components/QS.py +261 -0
  156. flowtask/components/QSBase.py +201 -0
  157. flowtask/components/QueryIterator.py +273 -0
  158. flowtask/components/QueryToInsert.py +327 -0
  159. flowtask/components/QueryToPandas.py +432 -0
  160. flowtask/components/RESTClient.py +195 -0
  161. flowtask/components/RethinkDBQuery.py +189 -0
  162. flowtask/components/Rsync.py +74 -0
  163. flowtask/components/RunSSH.py +59 -0
  164. flowtask/components/RunShell.py +71 -0
  165. flowtask/components/SalesForce.py +20 -0
  166. flowtask/components/SaveImageBank/__init__.py +257 -0
  167. flowtask/components/SchedulingVisits.py +592 -0
  168. flowtask/components/ScrapPage.py +216 -0
  169. flowtask/components/ScrapSearch.py +79 -0
  170. flowtask/components/SendNotify.py +257 -0
  171. flowtask/components/SentimentAnalysis.py +694 -0
  172. flowtask/components/ServiceScrapper/__init__.py +5 -0
  173. flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
  174. flowtask/components/ServiceScrapper/parsers/base.py +94 -0
  175. flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
  176. flowtask/components/ServiceScrapper/scrapper.py +199 -0
  177. flowtask/components/SetVariables.py +156 -0
  178. flowtask/components/SubTask.py +182 -0
  179. flowtask/components/SuiteCRM.py +48 -0
  180. flowtask/components/Switch.py +175 -0
  181. flowtask/components/TableBase.py +148 -0
  182. flowtask/components/TableDelete.py +312 -0
  183. flowtask/components/TableInput.py +143 -0
  184. flowtask/components/TableOutput/TableOutput.py +384 -0
  185. flowtask/components/TableOutput/__init__.py +3 -0
  186. flowtask/components/TableSchema.py +534 -0
  187. flowtask/components/Target.py +223 -0
  188. flowtask/components/ThumbnailGenerator.py +156 -0
  189. flowtask/components/ToPandas.py +67 -0
  190. flowtask/components/TransformRows/TransformRows.py +507 -0
  191. flowtask/components/TransformRows/__init__.py +9 -0
  192. flowtask/components/TransformRows/functions.py +559 -0
  193. flowtask/components/TransposeRows.py +176 -0
  194. flowtask/components/UPCDatabase.py +86 -0
  195. flowtask/components/UnGzip.py +171 -0
  196. flowtask/components/Uncompress.py +172 -0
  197. flowtask/components/UniqueRows.py +126 -0
  198. flowtask/components/Unzip.py +107 -0
  199. flowtask/components/UpdateOperationalVars.py +147 -0
  200. flowtask/components/UploadTo.py +299 -0
  201. flowtask/components/UploadToS3.py +136 -0
  202. flowtask/components/UploadToSFTP.py +160 -0
  203. flowtask/components/UploadToSharepoint.py +205 -0
  204. flowtask/components/UserFunc.py +122 -0
  205. flowtask/components/VivaTracker.py +140 -0
  206. flowtask/components/WSDLClient.py +123 -0
  207. flowtask/components/Wait.py +18 -0
  208. flowtask/components/Walmart.py +199 -0
  209. flowtask/components/Workplace.py +134 -0
  210. flowtask/components/XMLToPandas.py +267 -0
  211. flowtask/components/Zammad/__init__.py +41 -0
  212. flowtask/components/Zammad/models.py +0 -0
  213. flowtask/components/ZoomInfoScraper.py +409 -0
  214. flowtask/components/__init__.py +104 -0
  215. flowtask/components/abstract.py +18 -0
  216. flowtask/components/flow.py +530 -0
  217. flowtask/components/google.py +335 -0
  218. flowtask/components/group.py +221 -0
  219. flowtask/components/py.typed +0 -0
  220. flowtask/components/reviewscrap.py +132 -0
  221. flowtask/components/tAutoincrement.py +117 -0
  222. flowtask/components/tConcat.py +109 -0
  223. flowtask/components/tExplode.py +119 -0
  224. flowtask/components/tFilter.py +184 -0
  225. flowtask/components/tGroup.py +236 -0
  226. flowtask/components/tJoin.py +270 -0
  227. flowtask/components/tMap/__init__.py +9 -0
  228. flowtask/components/tMap/functions.py +54 -0
  229. flowtask/components/tMap/tMap.py +450 -0
  230. flowtask/components/tMelt.py +112 -0
  231. flowtask/components/tMerge.py +114 -0
  232. flowtask/components/tOrder.py +93 -0
  233. flowtask/components/tPandas.py +94 -0
  234. flowtask/components/tPivot.py +71 -0
  235. flowtask/components/tPluckCols.py +76 -0
  236. flowtask/components/tUnnest.py +82 -0
  237. flowtask/components/user.py +401 -0
  238. flowtask/conf.py +457 -0
  239. flowtask/download.py +102 -0
  240. flowtask/events/__init__.py +11 -0
  241. flowtask/events/events/__init__.py +20 -0
  242. flowtask/events/events/abstract.py +95 -0
  243. flowtask/events/events/alerts/__init__.py +362 -0
  244. flowtask/events/events/alerts/colfunctions.py +131 -0
  245. flowtask/events/events/alerts/functions.py +158 -0
  246. flowtask/events/events/dummy.py +12 -0
  247. flowtask/events/events/exec.py +124 -0
  248. flowtask/events/events/file/__init__.py +7 -0
  249. flowtask/events/events/file/base.py +51 -0
  250. flowtask/events/events/file/copy.py +23 -0
  251. flowtask/events/events/file/delete.py +16 -0
  252. flowtask/events/events/interfaces/__init__.py +9 -0
  253. flowtask/events/events/interfaces/client.py +67 -0
  254. flowtask/events/events/interfaces/credentials.py +28 -0
  255. flowtask/events/events/interfaces/notifications.py +58 -0
  256. flowtask/events/events/jira.py +122 -0
  257. flowtask/events/events/log.py +26 -0
  258. flowtask/events/events/logerr.py +52 -0
  259. flowtask/events/events/notify.py +59 -0
  260. flowtask/events/events/notify_event.py +160 -0
  261. flowtask/events/events/publish.py +54 -0
  262. flowtask/events/events/sendfile.py +104 -0
  263. flowtask/events/events/task.py +97 -0
  264. flowtask/events/events/teams.py +98 -0
  265. flowtask/events/events/webhook.py +58 -0
  266. flowtask/events/manager.py +287 -0
  267. flowtask/exceptions.c +39393 -0
  268. flowtask/exceptions.cpython-312-x86_64-linux-gnu.so +0 -0
  269. flowtask/extensions/__init__.py +3 -0
  270. flowtask/extensions/abstract.py +82 -0
  271. flowtask/extensions/logging/__init__.py +65 -0
  272. flowtask/hooks/__init__.py +9 -0
  273. flowtask/hooks/actions/__init__.py +22 -0
  274. flowtask/hooks/actions/abstract.py +66 -0
  275. flowtask/hooks/actions/dummy.py +23 -0
  276. flowtask/hooks/actions/jira.py +74 -0
  277. flowtask/hooks/actions/rest.py +320 -0
  278. flowtask/hooks/actions/sampledata.py +37 -0
  279. flowtask/hooks/actions/sensor.py +23 -0
  280. flowtask/hooks/actions/task.py +9 -0
  281. flowtask/hooks/actions/ticket.py +37 -0
  282. flowtask/hooks/actions/zammad.py +55 -0
  283. flowtask/hooks/hook.py +62 -0
  284. flowtask/hooks/models.py +17 -0
  285. flowtask/hooks/service.py +187 -0
  286. flowtask/hooks/step.py +91 -0
  287. flowtask/hooks/types/__init__.py +23 -0
  288. flowtask/hooks/types/base.py +129 -0
  289. flowtask/hooks/types/brokers/__init__.py +11 -0
  290. flowtask/hooks/types/brokers/base.py +54 -0
  291. flowtask/hooks/types/brokers/mqtt.py +35 -0
  292. flowtask/hooks/types/brokers/rabbitmq.py +82 -0
  293. flowtask/hooks/types/brokers/redis.py +83 -0
  294. flowtask/hooks/types/brokers/sqs.py +44 -0
  295. flowtask/hooks/types/fs.py +232 -0
  296. flowtask/hooks/types/http.py +49 -0
  297. flowtask/hooks/types/imap.py +200 -0
  298. flowtask/hooks/types/jira.py +279 -0
  299. flowtask/hooks/types/mail.py +205 -0
  300. flowtask/hooks/types/postgres.py +98 -0
  301. flowtask/hooks/types/responses/__init__.py +8 -0
  302. flowtask/hooks/types/responses/base.py +5 -0
  303. flowtask/hooks/types/sharepoint.py +288 -0
  304. flowtask/hooks/types/ssh.py +141 -0
  305. flowtask/hooks/types/tagged.py +59 -0
  306. flowtask/hooks/types/upload.py +85 -0
  307. flowtask/hooks/types/watch.py +71 -0
  308. flowtask/hooks/types/web.py +36 -0
  309. flowtask/interfaces/AzureClient.py +137 -0
  310. flowtask/interfaces/AzureGraph.py +839 -0
  311. flowtask/interfaces/Boto3Client.py +326 -0
  312. flowtask/interfaces/DropboxClient.py +173 -0
  313. flowtask/interfaces/ExcelHandler.py +94 -0
  314. flowtask/interfaces/FTPClient.py +131 -0
  315. flowtask/interfaces/GoogleCalendar.py +201 -0
  316. flowtask/interfaces/GoogleClient.py +133 -0
  317. flowtask/interfaces/GoogleDrive.py +127 -0
  318. flowtask/interfaces/GoogleGCS.py +89 -0
  319. flowtask/interfaces/GoogleGeocoding.py +93 -0
  320. flowtask/interfaces/GoogleLang.py +114 -0
  321. flowtask/interfaces/GooglePub.py +61 -0
  322. flowtask/interfaces/GoogleSheet.py +68 -0
  323. flowtask/interfaces/IMAPClient.py +137 -0
  324. flowtask/interfaces/O365Calendar.py +113 -0
  325. flowtask/interfaces/O365Client.py +220 -0
  326. flowtask/interfaces/OneDrive.py +284 -0
  327. flowtask/interfaces/Outlook.py +155 -0
  328. flowtask/interfaces/ParrotBot.py +130 -0
  329. flowtask/interfaces/SSHClient.py +378 -0
  330. flowtask/interfaces/Sharepoint.py +496 -0
  331. flowtask/interfaces/__init__.py +36 -0
  332. flowtask/interfaces/azureauth.py +119 -0
  333. flowtask/interfaces/cache.py +201 -0
  334. flowtask/interfaces/client.py +82 -0
  335. flowtask/interfaces/compress.py +525 -0
  336. flowtask/interfaces/credentials.py +124 -0
  337. flowtask/interfaces/d2l.py +239 -0
  338. flowtask/interfaces/databases/__init__.py +5 -0
  339. flowtask/interfaces/databases/db.py +223 -0
  340. flowtask/interfaces/databases/documentdb.py +55 -0
  341. flowtask/interfaces/databases/rethink.py +39 -0
  342. flowtask/interfaces/dataframes/__init__.py +11 -0
  343. flowtask/interfaces/dataframes/abstract.py +21 -0
  344. flowtask/interfaces/dataframes/arrow.py +71 -0
  345. flowtask/interfaces/dataframes/dt.py +69 -0
  346. flowtask/interfaces/dataframes/pandas.py +167 -0
  347. flowtask/interfaces/dataframes/polars.py +60 -0
  348. flowtask/interfaces/db.py +263 -0
  349. flowtask/interfaces/env.py +46 -0
  350. flowtask/interfaces/func.py +137 -0
  351. flowtask/interfaces/http.py +1780 -0
  352. flowtask/interfaces/locale.py +40 -0
  353. flowtask/interfaces/log.py +75 -0
  354. flowtask/interfaces/mask.py +143 -0
  355. flowtask/interfaces/notification.py +154 -0
  356. flowtask/interfaces/playwright.py +339 -0
  357. flowtask/interfaces/powerpoint.py +368 -0
  358. flowtask/interfaces/py.typed +0 -0
  359. flowtask/interfaces/qs.py +376 -0
  360. flowtask/interfaces/result.py +87 -0
  361. flowtask/interfaces/selenium_service.py +779 -0
  362. flowtask/interfaces/smartsheet.py +154 -0
  363. flowtask/interfaces/stat.py +39 -0
  364. flowtask/interfaces/task.py +96 -0
  365. flowtask/interfaces/template.py +118 -0
  366. flowtask/interfaces/vectorstores/__init__.py +1 -0
  367. flowtask/interfaces/vectorstores/abstract.py +133 -0
  368. flowtask/interfaces/vectorstores/milvus.py +669 -0
  369. flowtask/interfaces/zammad.py +107 -0
  370. flowtask/models.py +193 -0
  371. flowtask/parsers/__init__.py +15 -0
  372. flowtask/parsers/_yaml.c +11978 -0
  373. flowtask/parsers/_yaml.cpython-312-x86_64-linux-gnu.so +0 -0
  374. flowtask/parsers/argparser.py +235 -0
  375. flowtask/parsers/base.c +15155 -0
  376. flowtask/parsers/base.cpython-312-x86_64-linux-gnu.so +0 -0
  377. flowtask/parsers/json.c +11968 -0
  378. flowtask/parsers/json.cpython-312-x86_64-linux-gnu.so +0 -0
  379. flowtask/parsers/maps.py +49 -0
  380. flowtask/parsers/toml.c +11968 -0
  381. flowtask/parsers/toml.cpython-312-x86_64-linux-gnu.so +0 -0
  382. flowtask/plugins/__init__.py +16 -0
  383. flowtask/plugins/components/__init__.py +0 -0
  384. flowtask/plugins/handler/__init__.py +45 -0
  385. flowtask/plugins/importer.py +31 -0
  386. flowtask/plugins/sources/__init__.py +0 -0
  387. flowtask/runner.py +283 -0
  388. flowtask/scheduler/__init__.py +9 -0
  389. flowtask/scheduler/functions.py +493 -0
  390. flowtask/scheduler/handlers/__init__.py +8 -0
  391. flowtask/scheduler/handlers/manager.py +504 -0
  392. flowtask/scheduler/handlers/models.py +58 -0
  393. flowtask/scheduler/handlers/service.py +72 -0
  394. flowtask/scheduler/notifications.py +65 -0
  395. flowtask/scheduler/scheduler.py +993 -0
  396. flowtask/services/__init__.py +0 -0
  397. flowtask/services/bots/__init__.py +0 -0
  398. flowtask/services/bots/telegram.py +264 -0
  399. flowtask/services/files/__init__.py +11 -0
  400. flowtask/services/files/manager.py +522 -0
  401. flowtask/services/files/model.py +37 -0
  402. flowtask/services/files/service.py +767 -0
  403. flowtask/services/jira/__init__.py +3 -0
  404. flowtask/services/jira/jira_actions.py +191 -0
  405. flowtask/services/tasks/__init__.py +13 -0
  406. flowtask/services/tasks/launcher.py +213 -0
  407. flowtask/services/tasks/manager.py +323 -0
  408. flowtask/services/tasks/service.py +275 -0
  409. flowtask/services/tasks/task_manager.py +376 -0
  410. flowtask/services/tasks/tasks.py +155 -0
  411. flowtask/storages/__init__.py +16 -0
  412. flowtask/storages/exceptions.py +12 -0
  413. flowtask/storages/files/__init__.py +8 -0
  414. flowtask/storages/files/abstract.py +29 -0
  415. flowtask/storages/files/filesystem.py +66 -0
  416. flowtask/storages/tasks/__init__.py +19 -0
  417. flowtask/storages/tasks/abstract.py +26 -0
  418. flowtask/storages/tasks/database.py +33 -0
  419. flowtask/storages/tasks/filesystem.py +108 -0
  420. flowtask/storages/tasks/github.py +119 -0
  421. flowtask/storages/tasks/memory.py +45 -0
  422. flowtask/storages/tasks/row.py +25 -0
  423. flowtask/tasks/__init__.py +0 -0
  424. flowtask/tasks/abstract.py +526 -0
  425. flowtask/tasks/command.py +118 -0
  426. flowtask/tasks/pile.py +486 -0
  427. flowtask/tasks/py.typed +0 -0
  428. flowtask/tasks/task.py +778 -0
  429. flowtask/template/__init__.py +161 -0
  430. flowtask/tests.py +257 -0
  431. flowtask/types/__init__.py +8 -0
  432. flowtask/types/typedefs.c +11347 -0
  433. flowtask/types/typedefs.cpython-312-x86_64-linux-gnu.so +0 -0
  434. flowtask/utils/__init__.py +24 -0
  435. flowtask/utils/constants.py +117 -0
  436. flowtask/utils/encoders.py +21 -0
  437. flowtask/utils/executor.py +112 -0
  438. flowtask/utils/functions.cpp +14280 -0
  439. flowtask/utils/functions.cpython-312-x86_64-linux-gnu.so +0 -0
  440. flowtask/utils/json.cpp +13349 -0
  441. flowtask/utils/json.cpython-312-x86_64-linux-gnu.so +0 -0
  442. flowtask/utils/mail.py +63 -0
  443. flowtask/utils/parseqs.c +13324 -0
  444. flowtask/utils/parserqs.cpython-312-x86_64-linux-gnu.so +0 -0
  445. flowtask/utils/stats.py +308 -0
  446. flowtask/utils/transformations.py +74 -0
  447. flowtask/utils/uv.py +12 -0
  448. flowtask/utils/validators.py +97 -0
  449. flowtask/version.py +11 -0
  450. flowtask-5.8.4.dist-info/LICENSE +201 -0
  451. flowtask-5.8.4.dist-info/METADATA +209 -0
  452. flowtask-5.8.4.dist-info/RECORD +470 -0
  453. flowtask-5.8.4.dist-info/WHEEL +6 -0
  454. flowtask-5.8.4.dist-info/entry_points.txt +3 -0
  455. flowtask-5.8.4.dist-info/top_level.txt +2 -0
  456. plugins/components/CreateQR.py +39 -0
  457. plugins/components/TestComponent.py +28 -0
  458. plugins/components/Use1.py +13 -0
  459. plugins/components/Workplace.py +117 -0
  460. plugins/components/__init__.py +3 -0
  461. plugins/sources/__init__.py +0 -0
  462. plugins/sources/get_populartimes.py +78 -0
  463. plugins/sources/google.py +150 -0
  464. plugins/sources/hubspot.py +679 -0
  465. plugins/sources/icims.py +679 -0
  466. plugins/sources/mobileinsight.py +501 -0
  467. plugins/sources/newrelic.py +262 -0
  468. plugins/sources/uap.py +268 -0
  469. plugins/sources/venu.py +244 -0
  470. plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,120 @@
1
+ import asyncio
2
+ from pathlib import Path
3
+ from typing import Callable, Tuple
4
+ from asyncdb import AsyncDB
5
+ from querysource.datasources.drivers.bigquery import bigquery_default
6
+ from .flow import FlowComponent
7
+ from ..exceptions import ComponentError
8
+
9
+ class CSVToGCS(FlowComponent):
10
+ """
11
+ CSVToGCS.
12
+
13
+ Este componente sube un archivo CSV desde el sistema local a un bucket específico de Google Cloud Storage (GCS).
14
+ Opcionalmente, puede crear el bucket si no existe.
15
+ """
16
+
17
+ def __init__(
18
+ self,
19
+ loop: asyncio.AbstractEventLoop = None,
20
+ job: Callable = None,
21
+ stat: Callable = None,
22
+ **kwargs,
23
+ ):
24
+ self.csv_path: Path = Path(kwargs.pop('csv_path'))
25
+ self.bucket_uri: str = kwargs.pop('bucket_uri', None) # Puede ser proporcionado directamente o generado
26
+ self.object_name: str = kwargs.pop('object_name', self.csv_path.name)
27
+ self.overwrite: bool = kwargs.pop('overwrite', False)
28
+ self.create_bucket: bool = kwargs.pop('create_bucket', False)
29
+ self.storage_class: str = kwargs.pop('storage_class', 'STANDARD')
30
+ self.location: str = kwargs.pop('location', 'US')
31
+ self.delete_local: bool = kwargs.pop('delete_local', False)
32
+ self.bq = None # Instancia de AsyncDB
33
+ self.bucket_name: str = kwargs.pop('bucket_name', None) # Necesario si bucket_uri no se proporciona
34
+ super(CSVToGCS, self).__init__(loop=loop, job=job, stat=stat, **kwargs)
35
+
36
+ async def start(self, **kwargs):
37
+ """Inicializa el componente configurando la conexión AsyncDB."""
38
+ # Validar parámetros requeridos
39
+ if not self.csv_path.exists():
40
+ raise ComponentError(f"CSVToGCS: El archivo CSV '{self.csv_path}' no existe.")
41
+
42
+ if not bigquery_default:
43
+ raise ComponentError("CSVToGCS: 'bigquery_default' no está configurado correctamente.")
44
+
45
+ # Obtener credenciales y parámetros del driver
46
+ credentials = bigquery_default.get_credentials()
47
+
48
+ # Inicializar AsyncDB con el driver de BigQuery
49
+ try:
50
+ self.bq = AsyncDB("bigquery", params=credentials)
51
+ self._logger.info("CSVToGCS: Instancia de AsyncDB creada exitosamente.")
52
+ except Exception as e:
53
+ raise ComponentError(f"CSVToGCS: Error al inicializar AsyncDB: {e}") from e
54
+
55
+ async def run(self) -> Tuple[str, str]:
56
+ """Ejecuta la carga del archivo CSV a GCS y retorna bucket_uri y object_uri."""
57
+ if not self.bq:
58
+ raise ComponentError("CSVToGCS: AsyncDB no está inicializado. Asegúrate de ejecutar 'start' antes de 'run'.")
59
+
60
+ try:
61
+ async with await self.bq.connection() as conn:
62
+ # Obtener bucket_uri y bucket_name del componente anterior si no se proporcionan
63
+ if not self.bucket_uri:
64
+ self.bucket_uri = self.getTaskVar('bucket_uri')
65
+ if not self.bucket_name:
66
+ self.bucket_name = self.getTaskVar('bucket_name')
67
+
68
+ if not self.bucket_uri:
69
+ if not self.bucket_name:
70
+ raise ComponentError("CSVToGCS: 'bucket_uri' o 'bucket_name' deben ser proporcionados.")
71
+ self.bucket_uri = f"gs://{self.bucket_name}"
72
+
73
+ # Verificar si el bucket existe
74
+ bucket_exists = await conn.bucket_exists(self.bucket_name)
75
+ if not bucket_exists:
76
+ if self.create_bucket:
77
+ await conn.create_bucket(
78
+ bucket_name=self.bucket_name,
79
+ location=self.location,
80
+ storage_class=self.storage_class
81
+ )
82
+ self._logger.info(f"CSVToGCS: Bucket '{self.bucket_name}' creado exitosamente en la región '{self.location}' con clase de almacenamiento '{self.storage_class}'.")
83
+ else:
84
+ raise ComponentError(f"CSVToGCS: El bucket '{self.bucket_name}' no existe y 'create_bucket' está establecido en False.")
85
+ else:
86
+ self._logger.info(f"CSVToGCS: Bucket '{self.bucket_name}' ya existe.")
87
+
88
+ # Subir el archivo CSV a GCS
89
+ object_uri, message = await conn.create_gcs_from_csv(
90
+ bucket_uri=self.bucket_uri,
91
+ object_name=self.object_name,
92
+ csv_data=self.csv_path,
93
+ overwrite=self.overwrite
94
+ )
95
+ self._logger.info(f"CSVToGCS: {message}")
96
+
97
+ # Guardar bucket_uri y object_uri para el siguiente componente
98
+ self.setTaskVar('bucket_uri', self.bucket_uri)
99
+ self.setTaskVar('object_uri', object_uri)
100
+
101
+ # Opcionalmente eliminar el archivo local
102
+ if self.delete_local and object_uri:
103
+ self.csv_path.unlink()
104
+ self._logger.info(f"CSVToGCS: Archivo local '{self.csv_path}' eliminado exitosamente después de la carga.")
105
+
106
+ return self.bucket_uri, object_uri
107
+
108
+ except ComponentError as ce:
109
+ raise ce # Re-lanzar errores específicos de componentes
110
+ except Exception as e:
111
+ raise ComponentError(f"CSVToGCS: Error durante la carga a GCS: {e}") from e
112
+
113
+ async def close(self):
114
+ """Cierra la conexión AsyncDB."""
115
+ try:
116
+ if self.bq:
117
+ await self.bq.close()
118
+ self._logger.info("CSVToGCS: AsyncDB cerrado exitosamente.")
119
+ except Exception as e:
120
+ self._logger.error(f"CSVToGCS: Error al cerrar AsyncDB: {e}")
@@ -0,0 +1 @@
1
+ from .scrapper import CompanyScraper
@@ -0,0 +1,6 @@
1
+ from .leadiq import LeadiqScrapper
2
+ from .explorium import ExploriumScrapper
3
+ from .zoominfo import ZoomInfoScrapper
4
+ from .siccode import SicCodeScrapper
5
+ from .rocket import RocketReachScrapper
6
+ from .visualvisitor import VisualVisitorScrapper
@@ -0,0 +1,102 @@
1
+ from typing import Any, List, Dict
2
+ from bs4 import BeautifulSoup as bs
3
+ from abc import abstractmethod
4
+ from ....interfaces import SeleniumService, HTTPService
5
+ import re
6
+ import logging
7
+
8
+ class ScrapperBase(SeleniumService, HTTPService):
9
+ """
10
+ ScrapperBase Model.
11
+
12
+
13
+ Define how scrappers should be work.-
14
+ """
15
+ domain: str
16
+ search_term: str
17
+ cookies: Any
18
+ keywords: List[str]
19
+
20
+ def __init__(self, *args, **kwargs):
21
+ self.cookies = kwargs.get('cookies', None)
22
+ self._logger = logging.getLogger(self.__class__.__name__)
23
+ self._counter: int = 0
24
+ self.search_term_used: str = ''
25
+ super().__init__(*args, **kwargs)
26
+
27
+ @abstractmethod
28
+ async def scrapping(self, document: bs, idx: int, row: dict):
29
+ pass
30
+
31
+ @abstractmethod
32
+ def define_search_term(self, term: str):
33
+ pass
34
+
35
+ async def get(self, url, headers: dict):
36
+ return await self._get(url, headers=headers, use_proxy=True)
37
+
38
+ def _parse_address(self, address: str) -> Dict[str, str]:
39
+ """
40
+ Parse address string to extract state, zipcode and country.
41
+
42
+ Args:
43
+ address (str): Raw address string
44
+
45
+ Returns:
46
+ Dict with parsed address components:
47
+ {
48
+ 'address': str,
49
+ 'state': str,
50
+ 'zipcode': str,
51
+ 'country': str
52
+ }
53
+ """
54
+ if not address:
55
+ return {
56
+ 'address': None,
57
+ 'state': None,
58
+ 'zipcode': None,
59
+ 'country': None
60
+ }
61
+
62
+ # Mantener la dirección original
63
+ result = {'address': address}
64
+
65
+ # Primera regex para formato completo
66
+ pattern1 = r'^.*,\s+([^,]+?)\s+([\w\s-]+)\s+([A-Z]{2})$'
67
+ # Segunda regex como fallback
68
+ pattern2 = r'^.*,\s*([^,]+?),\s+([\w\s-]+?)\s*([A-Z]{2})'
69
+
70
+ try:
71
+ # Intentar con la primera regex
72
+ match = re.search(pattern1, address)
73
+ if not match:
74
+ # Si no hay match, intentar con la segunda
75
+ match = re.search(pattern2, address)
76
+
77
+ if match:
78
+ result['state'] = match.group(1).strip()
79
+ result['zipcode'] = match.group(2).strip()
80
+ result['country'] = match.group(3).strip()
81
+ else:
82
+ self._logger.warning(f"Could not parse address: {address}")
83
+ result.update({
84
+ 'state': None,
85
+ 'zipcode': None,
86
+ 'country': None
87
+ })
88
+ except Exception as e:
89
+ self._logger.error(f"Error parsing address {address}: {str(e)}")
90
+ result.update({
91
+ 'state': None,
92
+ 'zipcode': None,
93
+ 'country': None
94
+ })
95
+
96
+ return result
97
+
98
+ def _standardize_name(self, text: str) -> str:
99
+ """Estandariza el formato del texto: lowercase y guiones en lugar de espacios."""
100
+ # Primero limpiamos caracteres especiales y espacios extras
101
+ cleaned = text.strip().lower().replace(' ', '-')
102
+ return f"\'{cleaned}\'"
@@ -0,0 +1,192 @@
1
+ from bs4 import BeautifulSoup as bs
2
+ from .base import ScrapperBase
3
+ import json
4
+
5
+
6
+ class ExploriumScrapper(ScrapperBase):
7
+ """
8
+ ExploriumScrapper Model.
9
+ """
10
+ domain: str = 'explorium.ai'
11
+ search_term: str = 'site:explorium.ai {}'
12
+ keywords: list = [
13
+ 'overview - services',
14
+ ]
15
+
16
+ def define_search_term(self, term: str):
17
+ cleaned = term.strip().lower()
18
+ return self.search_term.format(cleaned)
19
+
20
+ async def scrapping(self, document: bs, idx: int, row: dict):
21
+ """
22
+ Scrape company information from Explorium.
23
+ Updates the existing row with new data from Explorium.
24
+ """
25
+ # Start with the existing row data
26
+ result = row.copy()
27
+
28
+ # Actualizamos solo los campos específicos de Explorium
29
+ result.update({
30
+ 'source_platform': 'explorium',
31
+ 'scrape_status': 'pending',
32
+ 'search_term': self.search_term_used
33
+ })
34
+
35
+ try:
36
+ # Extraer información de la compañía
37
+ company_info = document.find('div', {'class': 'company-info'})
38
+ if company_info:
39
+ # Nombre de la compañía
40
+ company_name = company_info.find('h1', {'class': 'company-name'})
41
+ if company_name:
42
+ result['company_name'] = company_name.text.strip()
43
+
44
+ # Dirección
45
+ address = company_info.find('div', {'class': 'address'})
46
+ if address:
47
+ address_info = self._parse_address(address.text.strip())
48
+ result.update(address_info)
49
+
50
+ # Otros detalles de la compañía
51
+ details = company_info.find_all('div', {'class': 'detail-item'})
52
+ for detail in details:
53
+ label = detail.find('span', {'class': 'label'})
54
+ value = detail.find('span', {'class': 'value'})
55
+ if label and value:
56
+ field = label.text.strip().lower()
57
+ val = value.text.strip()
58
+
59
+ if 'phone' in field:
60
+ result['phone_number'] = val
61
+ elif 'website' in field:
62
+ result['website'] = val
63
+ elif 'employees' in field:
64
+ result['employee_count'] = val
65
+ elif 'revenue' in field:
66
+ result['revenue_range'] = val
67
+ elif 'naics' in field:
68
+ result['naics_code'] = val
69
+ elif 'sic' in field:
70
+ result['sic_code'] = val
71
+
72
+ # 🔍 Extract NAICS & SIC codes and industry descriptions
73
+ result.update(self._extract_naics_sic(document))
74
+
75
+ # Extract company logo, headquarters, country, and description
76
+ result.update(self._extract_company_info(document))
77
+
78
+ # Verificamos si se encontró algún dato
79
+ has_data = any([
80
+ result.get('company_name'),
81
+ result.get('headquarters'),
82
+ result.get('country'),
83
+ result.get('phone_number'),
84
+ result.get('website'),
85
+ result.get('stock_symbol'),
86
+ result.get('naics_code'),
87
+ result.get('sic_code'),
88
+ result.get('employee_count'),
89
+ result.get('revenue_range'),
90
+ result.get('company_description'),
91
+ result.get('logo_url')
92
+ ])
93
+
94
+ # Establecemos el estado según si encontramos datos o no
95
+ result['scrape_status'] = 'success' if has_data else 'no_data'
96
+
97
+ # Siempre devolvemos el resultado, tenga datos o no
98
+ return idx, result
99
+
100
+ except Exception as e:
101
+ self._logger.error(f"Error parsing Explorium data: {str(e)}")
102
+ result['scrape_status'] = f'error: {str(e)[:50]}'
103
+ return idx, result
104
+
105
+ def _extract_naics_sic(self, document: bs):
106
+ """
107
+ Extract NAICS & SIC codes along with their industry descriptions.
108
+
109
+ Returns:
110
+ dict: A dictionary containing 'naics_code', 'sic_code', and 'industry' (comma-separated).
111
+ """
112
+ result = {
113
+ 'naics_code': None,
114
+ 'sic_code': None,
115
+ 'industry': None
116
+ }
117
+
118
+ naics_codes = []
119
+ sic_codes = []
120
+ industries = []
121
+
122
+ # Extract NAICS section
123
+ naics_section = document.find('div', {'data-id': 'company-stat-naics'})
124
+ if naics_section:
125
+ naics_entries = naics_section.find_all('p', {'class': 'ExpTypography-root'})
126
+ for entry in naics_entries:
127
+ code = entry.text.strip().strip(',')
128
+ industry_desc = entry.get('aria-label', '').strip()
129
+ if code:
130
+ naics_codes.append(code)
131
+ if industry_desc:
132
+ industries.append(industry_desc)
133
+
134
+ # Extract SIC section
135
+ sic_section = document.find('div', {'data-id': 'company-stat-sic'})
136
+ if sic_section:
137
+ sic_entries = sic_section.find_all('p', {'class': 'ExpTypography-root'})
138
+ for entry in sic_entries:
139
+ code = entry.text.strip().strip(',')
140
+ industry_desc = entry.get('aria-label', '').strip()
141
+ if code:
142
+ sic_codes.append(code)
143
+ if industry_desc:
144
+ industries.append(industry_desc)
145
+
146
+ # Convert lists to comma-separated strings
147
+ if naics_codes:
148
+ result['naics_code'] = ', '.join(naics_codes)
149
+ if sic_codes:
150
+ result['sic_code'] = ', '.join(sic_codes)
151
+ if industries:
152
+ result['industry'] = ', '.join(industries)
153
+
154
+ return result
155
+
156
+ def _extract_company_info(self, document: bs):
157
+ """
158
+ Extract headquarters, country, company description, and logo.
159
+ """
160
+ result = {
161
+ 'headquarters': None,
162
+ 'country': None,
163
+ 'company_description': None,
164
+ 'logo_url': None
165
+ }
166
+
167
+ # Extract headquarters address
168
+ address_section = document.find('div', {'data-id': 'info-address'})
169
+ if address_section:
170
+ address_element = address_section.find('p', {'aria-label': True})
171
+ if address_element:
172
+ address_text = address_element.get('aria-label', '').strip()
173
+ result['headquarters'] = address_text
174
+
175
+ # Extract country (last word in the address)
176
+ country = address_text.split(',')[-1].strip()
177
+ result['country'] = country if country else None
178
+
179
+ # Extract company description
180
+ name_element = document.find('h1', {'data-id': 'txt-company-name'})
181
+ description_element = document.find('p', {'class': 'ExpTypography-root ExpTypography-body1'})
182
+ if name_element and description_element:
183
+ company_name = name_element.text.strip()
184
+ company_desc = description_element.text.strip()
185
+ result['company_description'] = f"{company_name}: {company_desc}"
186
+
187
+ # Extract company logo
188
+ logo_element = document.find('img', {'alt': True, 'src': True})
189
+ if logo_element:
190
+ result['logo_url'] = logo_element['src']
191
+
192
+ return result
@@ -0,0 +1,206 @@
1
+ from bs4 import BeautifulSoup as bs
2
+ from .base import ScrapperBase
3
+ import json
4
+
5
+
6
+ class LeadiqScrapper(ScrapperBase):
7
+ """
8
+ LeadiqScrapper Model.
9
+ """
10
+ domain: str = 'leadiq.com'
11
+ search_term: str = "site:leadiq.com {}"
12
+ keywords: list = [
13
+ 'Email Formats & Email Address',
14
+ 'Company Overview',
15
+ 'Employee Directory',
16
+ 'Contact Details & Competitors',
17
+ 'Email Format'
18
+ ]
19
+
20
+ def define_search_term(self, term: str):
21
+ standardized_term = self._standardize_name(term)
22
+ search_term = self.search_term.format(standardized_term)
23
+ return search_term
24
+
25
+ async def scrapping(self, document: bs, idx: int, row: dict):
26
+ """
27
+ Scrape company information from LeadIQ.
28
+ Updates the existing row with new data from LeadIQ.
29
+ """
30
+ # Start with the existing row data
31
+ result = row.copy()
32
+
33
+ # Actualizamos solo los campos específicos de LeadIQ
34
+ result.update({
35
+ 'source_platform': 'leadiq',
36
+ 'scrape_status': 'pending',
37
+ 'search_term': self.search_term_used
38
+ })
39
+
40
+ try:
41
+ # Get company name and logo URL from logo image
42
+ logo = document.find('img', {'alt': True, 'width': '76.747'})
43
+ if logo:
44
+ result['company_name'] = logo.get('alt')
45
+ result['logo_url'] = logo.get('src')
46
+
47
+ # Get company revenue range from highlight-right section
48
+ highlight_right = document.find('div', {'class': 'highlight-right'})
49
+ if highlight_right:
50
+ revenue_span = highlight_right.find('span', {'class': 'start'})
51
+ if revenue_span:
52
+ start_value = revenue_span.text.strip()
53
+ end_span = revenue_span.find_next_sibling('span', {'class': 'end'})
54
+ if end_span:
55
+ end_value = end_span.text.strip()
56
+ result['revenue_range'] = f"{start_value} - {end_value}"
57
+ else:
58
+ result['revenue_range'] = start_value
59
+
60
+ # First find the highlight-left section that contains company info
61
+ highlight_left = document.find('div', {'class': 'highlight-left'})
62
+ if not highlight_left:
63
+ self._logger.warning("Could not find highlight-left section")
64
+ return idx, result
65
+
66
+ # Then find the card span within highlight-left
67
+ overview_section = highlight_left.find('div', {'class': 'card span'})
68
+ if not overview_section:
69
+ return idx, result
70
+
71
+ # Extract information from dl/dt/dd elements
72
+ dl_element = overview_section.find('dl')
73
+ if dl_element:
74
+ for item in dl_element.find_all('div', {'class': 'item'}):
75
+ dt = item.find('dt')
76
+ dd = item.find('dd')
77
+ if dt and dd:
78
+ field = dt.text.strip().lower()
79
+ value = dd.text.strip()
80
+
81
+ # Map fields to our column names
82
+ if field == 'headquarters':
83
+ address_info = self._parse_address(value)
84
+ result.update(address_info)
85
+ # Extract country from headquarters
86
+ parts = value.split()
87
+ result['country'] = parts[-1] if len(parts) > 1 else None
88
+ elif field == 'phone number':
89
+ phone = value.replace('****', '0000')
90
+ result['phone_number'] = phone
91
+ elif field == 'website':
92
+ website = dd.find('a')
93
+ result['website'] = website['href'] if website else value
94
+ elif field == 'stock symbol':
95
+ result['stock_symbol'] = value
96
+ elif field == 'naics code':
97
+ result['naics_code'] = value
98
+ elif field == 'employees':
99
+ result['employee_count'] = value
100
+ elif field == 'sic code':
101
+ result['sic_code'] = value
102
+
103
+ # Extract information from the hero section
104
+ hero_section = document.find('div', {'class': 'card hero snug'})
105
+ if hero_section:
106
+ # Company name
107
+ company_name_element = hero_section.find('h1')
108
+ if company_name_element:
109
+ result['company_name'] = company_name_element.text.strip()
110
+
111
+ # Industry, location, and number of employees
112
+ info_p = hero_section.find('p', {'class': 'info'})
113
+ if info_p:
114
+ spans = info_p.find_all('span')
115
+ if len(spans) >= 3:
116
+ result['industry'] = spans[0].text.strip()
117
+ result['location'] = spans[1].text.strip()
118
+ result['number_employees'] = spans[2].text.strip()
119
+
120
+ # Company description
121
+ description_p = hero_section.find('pre')
122
+ if description_p:
123
+ result['company_description'] = description_p.text.strip()
124
+
125
+ # Extract similar companies
126
+ similar_companies = []
127
+ similar_section = document.find('div', {'id': 'similar'})
128
+ if similar_section:
129
+ for company in similar_section.find_all('li'):
130
+ company_link = company.find('a')
131
+ if not company_link:
132
+ continue
133
+
134
+ company_logo = company_link.find('img')
135
+ company_name = company_link.find('h3')
136
+
137
+ # Find revenue span
138
+ revenue_spans = company_link.find_all('span')
139
+ revenue_span = None
140
+ for span in revenue_spans:
141
+ if span.find('span', {'class': 'start'}):
142
+ revenue_span = span
143
+ break
144
+
145
+ if company_name:
146
+ similar_company = {
147
+ 'name': company_name.text.strip(), # No escapamos las comillas
148
+ 'leadiq_url': company_link['href'],
149
+ 'logo_url': company_logo['src'] if company_logo else None,
150
+ }
151
+
152
+ # Extract revenue range
153
+ if revenue_span:
154
+ start = revenue_span.find('span', {'class': 'start'})
155
+ end = revenue_span.find('span', {'class': 'end'})
156
+
157
+ if start:
158
+ start_value = start.text.strip()
159
+ if end:
160
+ end_value = end.text.strip()
161
+ similar_company['revenue_range'] = f"{start_value} - {end_value}"
162
+ else:
163
+ similar_company['revenue_range'] = start_value
164
+
165
+ similar_companies.append(similar_company)
166
+
167
+ if similar_companies:
168
+ try:
169
+ result['similar_companies'] = json.dumps(
170
+ similar_companies,
171
+ ensure_ascii=False,
172
+ allow_nan=False,
173
+ separators=(',', ':')
174
+ )
175
+ except Exception as e:
176
+ self._logger.error(
177
+ f"Error formatting similar companies JSON: {str(e)}"
178
+ )
179
+ result['similar_companies'] = None
180
+
181
+ # Actualizamos el contador y el estado
182
+ self._counter += 1
183
+
184
+ # Verificamos si se encontró algún dato
185
+ has_data = any([
186
+ result.get('company_name'),
187
+ result.get('logo_url'),
188
+ result.get('address'),
189
+ result.get('phone_number'),
190
+ result.get('website'),
191
+ result.get('stock_symbol'),
192
+ result.get('naics_code'),
193
+ result.get('employee_count'),
194
+ result.get('revenue_range'),
195
+ result.get('similar_companies')
196
+ ])
197
+
198
+ # Establecemos el estado según si encontramos datos o no
199
+ result['scrape_status'] = 'success' if has_data else 'no_data'
200
+ # Siempre devolvemos el resultado, tenga datos o no
201
+ return idx, result
202
+
203
+ except Exception as e:
204
+ self._logger.error(f"Error parsing LeadIQ data: {str(e)}")
205
+ result['scrape_status'] = f'error: {str(e)[:50]}'
206
+ return idx, result