flowtask 5.8.4__cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (470) hide show
  1. flowtask/__init__.py +93 -0
  2. flowtask/__main__.py +38 -0
  3. flowtask/bots/__init__.py +6 -0
  4. flowtask/bots/check.py +93 -0
  5. flowtask/bots/codebot.py +51 -0
  6. flowtask/components/ASPX.py +148 -0
  7. flowtask/components/AddDataset.py +352 -0
  8. flowtask/components/Amazon.py +523 -0
  9. flowtask/components/AutoTask.py +314 -0
  10. flowtask/components/Azure.py +80 -0
  11. flowtask/components/AzureUsers.py +106 -0
  12. flowtask/components/BaseAction.py +91 -0
  13. flowtask/components/BaseLoop.py +198 -0
  14. flowtask/components/BestBuy.py +800 -0
  15. flowtask/components/CSVToGCS.py +120 -0
  16. flowtask/components/CompanyScraper/__init__.py +1 -0
  17. flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
  18. flowtask/components/CompanyScraper/parsers/base.py +102 -0
  19. flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
  20. flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
  21. flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
  22. flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
  23. flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
  24. flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
  25. flowtask/components/CompanyScraper/scrapper.py +1054 -0
  26. flowtask/components/CopyTo.py +177 -0
  27. flowtask/components/CopyToBigQuery.py +243 -0
  28. flowtask/components/CopyToMongoDB.py +291 -0
  29. flowtask/components/CopyToPg.py +609 -0
  30. flowtask/components/CopyToRethink.py +207 -0
  31. flowtask/components/CreateGCSBucket.py +102 -0
  32. flowtask/components/CreateReport/CreateReport.py +228 -0
  33. flowtask/components/CreateReport/__init__.py +9 -0
  34. flowtask/components/CreateReport/charts/__init__.py +15 -0
  35. flowtask/components/CreateReport/charts/bar.py +51 -0
  36. flowtask/components/CreateReport/charts/base.py +66 -0
  37. flowtask/components/CreateReport/charts/pie.py +64 -0
  38. flowtask/components/CreateReport/utils.py +9 -0
  39. flowtask/components/CustomerSatisfaction.py +196 -0
  40. flowtask/components/DataInput.py +200 -0
  41. flowtask/components/DateList.py +255 -0
  42. flowtask/components/DbClient.py +163 -0
  43. flowtask/components/DialPad.py +146 -0
  44. flowtask/components/DocumentDBQuery.py +200 -0
  45. flowtask/components/DownloadFrom.py +371 -0
  46. flowtask/components/DownloadFromD2L.py +113 -0
  47. flowtask/components/DownloadFromFTP.py +181 -0
  48. flowtask/components/DownloadFromIMAP.py +315 -0
  49. flowtask/components/DownloadFromS3.py +198 -0
  50. flowtask/components/DownloadFromSFTP.py +265 -0
  51. flowtask/components/DownloadFromSharepoint.py +110 -0
  52. flowtask/components/DownloadFromSmartSheet.py +114 -0
  53. flowtask/components/DownloadS3File.py +229 -0
  54. flowtask/components/Dummy.py +59 -0
  55. flowtask/components/DuplicatePhoto.py +411 -0
  56. flowtask/components/EmployeeEvaluation.py +237 -0
  57. flowtask/components/ExecuteSQL.py +323 -0
  58. flowtask/components/ExtractHTML.py +178 -0
  59. flowtask/components/FileBase.py +178 -0
  60. flowtask/components/FileCopy.py +181 -0
  61. flowtask/components/FileDelete.py +82 -0
  62. flowtask/components/FileExists.py +146 -0
  63. flowtask/components/FileIteratorDelete.py +112 -0
  64. flowtask/components/FileList.py +194 -0
  65. flowtask/components/FileOpen.py +75 -0
  66. flowtask/components/FileRead.py +120 -0
  67. flowtask/components/FileRename.py +106 -0
  68. flowtask/components/FilterIf.py +284 -0
  69. flowtask/components/FilterRows/FilterRows.py +200 -0
  70. flowtask/components/FilterRows/__init__.py +10 -0
  71. flowtask/components/FilterRows/functions.py +4 -0
  72. flowtask/components/GCSToBigQuery.py +103 -0
  73. flowtask/components/GoogleA4.py +150 -0
  74. flowtask/components/GoogleGeoCoding.py +344 -0
  75. flowtask/components/GooglePlaces.py +315 -0
  76. flowtask/components/GoogleSearch.py +539 -0
  77. flowtask/components/HTTPClient.py +268 -0
  78. flowtask/components/ICIMS.py +146 -0
  79. flowtask/components/IF.py +179 -0
  80. flowtask/components/IcimsFolderCopy.py +173 -0
  81. flowtask/components/ImageFeatures/__init__.py +5 -0
  82. flowtask/components/ImageFeatures/process.py +233 -0
  83. flowtask/components/IteratorBase.py +251 -0
  84. flowtask/components/LangchainLoader/__init__.py +5 -0
  85. flowtask/components/LangchainLoader/loader.py +194 -0
  86. flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
  87. flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
  88. flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
  89. flowtask/components/LangchainLoader/loaders/docx.py +91 -0
  90. flowtask/components/LangchainLoader/loaders/html.py +119 -0
  91. flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
  92. flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
  93. flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
  94. flowtask/components/LangchainLoader/loaders/qa.py +67 -0
  95. flowtask/components/LangchainLoader/loaders/txt.py +55 -0
  96. flowtask/components/LeadIQ.py +650 -0
  97. flowtask/components/Loop.py +253 -0
  98. flowtask/components/Lowes.py +334 -0
  99. flowtask/components/MS365Usage.py +156 -0
  100. flowtask/components/MSTeamsMessages.py +320 -0
  101. flowtask/components/MarketClustering.py +1051 -0
  102. flowtask/components/MergeFiles.py +362 -0
  103. flowtask/components/MilvusOutput.py +87 -0
  104. flowtask/components/NearByStores.py +175 -0
  105. flowtask/components/NetworkNinja/__init__.py +6 -0
  106. flowtask/components/NetworkNinja/models/__init__.py +52 -0
  107. flowtask/components/NetworkNinja/models/abstract.py +177 -0
  108. flowtask/components/NetworkNinja/models/account.py +39 -0
  109. flowtask/components/NetworkNinja/models/client.py +19 -0
  110. flowtask/components/NetworkNinja/models/district.py +14 -0
  111. flowtask/components/NetworkNinja/models/events.py +101 -0
  112. flowtask/components/NetworkNinja/models/forms.py +499 -0
  113. flowtask/components/NetworkNinja/models/market.py +16 -0
  114. flowtask/components/NetworkNinja/models/organization.py +34 -0
  115. flowtask/components/NetworkNinja/models/photos.py +125 -0
  116. flowtask/components/NetworkNinja/models/project.py +44 -0
  117. flowtask/components/NetworkNinja/models/region.py +28 -0
  118. flowtask/components/NetworkNinja/models/store.py +203 -0
  119. flowtask/components/NetworkNinja/models/user.py +151 -0
  120. flowtask/components/NetworkNinja/router.py +854 -0
  121. flowtask/components/Odoo.py +175 -0
  122. flowtask/components/OdooInjector.py +192 -0
  123. flowtask/components/OpenFromXML.py +126 -0
  124. flowtask/components/OpenWeather.py +41 -0
  125. flowtask/components/OpenWithBase.py +616 -0
  126. flowtask/components/OpenWithPandas.py +715 -0
  127. flowtask/components/PGPDecrypt.py +199 -0
  128. flowtask/components/PandasIterator.py +187 -0
  129. flowtask/components/PandasToFile.py +189 -0
  130. flowtask/components/Paradox.py +339 -0
  131. flowtask/components/ParamIterator.py +117 -0
  132. flowtask/components/ParseHTML.py +84 -0
  133. flowtask/components/PlacerStores.py +249 -0
  134. flowtask/components/Pokemon.py +507 -0
  135. flowtask/components/PositiveBot.py +62 -0
  136. flowtask/components/PowerPointSlide.py +400 -0
  137. flowtask/components/PrintMessage.py +127 -0
  138. flowtask/components/ProductCompetitors/__init__.py +5 -0
  139. flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
  140. flowtask/components/ProductCompetitors/parsers/base.py +72 -0
  141. flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
  142. flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
  143. flowtask/components/ProductCompetitors/scrapper.py +155 -0
  144. flowtask/components/ProductCompliant.py +169 -0
  145. flowtask/components/ProductInfo/__init__.py +1 -0
  146. flowtask/components/ProductInfo/parsers/__init__.py +5 -0
  147. flowtask/components/ProductInfo/parsers/base.py +83 -0
  148. flowtask/components/ProductInfo/parsers/brother.py +97 -0
  149. flowtask/components/ProductInfo/parsers/canon.py +167 -0
  150. flowtask/components/ProductInfo/parsers/epson.py +118 -0
  151. flowtask/components/ProductInfo/parsers/hp.py +131 -0
  152. flowtask/components/ProductInfo/parsers/samsung.py +97 -0
  153. flowtask/components/ProductInfo/scraper.py +319 -0
  154. flowtask/components/ProductPricing.py +118 -0
  155. flowtask/components/QS.py +261 -0
  156. flowtask/components/QSBase.py +201 -0
  157. flowtask/components/QueryIterator.py +273 -0
  158. flowtask/components/QueryToInsert.py +327 -0
  159. flowtask/components/QueryToPandas.py +432 -0
  160. flowtask/components/RESTClient.py +195 -0
  161. flowtask/components/RethinkDBQuery.py +189 -0
  162. flowtask/components/Rsync.py +74 -0
  163. flowtask/components/RunSSH.py +59 -0
  164. flowtask/components/RunShell.py +71 -0
  165. flowtask/components/SalesForce.py +20 -0
  166. flowtask/components/SaveImageBank/__init__.py +257 -0
  167. flowtask/components/SchedulingVisits.py +592 -0
  168. flowtask/components/ScrapPage.py +216 -0
  169. flowtask/components/ScrapSearch.py +79 -0
  170. flowtask/components/SendNotify.py +257 -0
  171. flowtask/components/SentimentAnalysis.py +694 -0
  172. flowtask/components/ServiceScrapper/__init__.py +5 -0
  173. flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
  174. flowtask/components/ServiceScrapper/parsers/base.py +94 -0
  175. flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
  176. flowtask/components/ServiceScrapper/scrapper.py +199 -0
  177. flowtask/components/SetVariables.py +156 -0
  178. flowtask/components/SubTask.py +182 -0
  179. flowtask/components/SuiteCRM.py +48 -0
  180. flowtask/components/Switch.py +175 -0
  181. flowtask/components/TableBase.py +148 -0
  182. flowtask/components/TableDelete.py +312 -0
  183. flowtask/components/TableInput.py +143 -0
  184. flowtask/components/TableOutput/TableOutput.py +384 -0
  185. flowtask/components/TableOutput/__init__.py +3 -0
  186. flowtask/components/TableSchema.py +534 -0
  187. flowtask/components/Target.py +223 -0
  188. flowtask/components/ThumbnailGenerator.py +156 -0
  189. flowtask/components/ToPandas.py +67 -0
  190. flowtask/components/TransformRows/TransformRows.py +507 -0
  191. flowtask/components/TransformRows/__init__.py +9 -0
  192. flowtask/components/TransformRows/functions.py +559 -0
  193. flowtask/components/TransposeRows.py +176 -0
  194. flowtask/components/UPCDatabase.py +86 -0
  195. flowtask/components/UnGzip.py +171 -0
  196. flowtask/components/Uncompress.py +172 -0
  197. flowtask/components/UniqueRows.py +126 -0
  198. flowtask/components/Unzip.py +107 -0
  199. flowtask/components/UpdateOperationalVars.py +147 -0
  200. flowtask/components/UploadTo.py +299 -0
  201. flowtask/components/UploadToS3.py +136 -0
  202. flowtask/components/UploadToSFTP.py +160 -0
  203. flowtask/components/UploadToSharepoint.py +205 -0
  204. flowtask/components/UserFunc.py +122 -0
  205. flowtask/components/VivaTracker.py +140 -0
  206. flowtask/components/WSDLClient.py +123 -0
  207. flowtask/components/Wait.py +18 -0
  208. flowtask/components/Walmart.py +199 -0
  209. flowtask/components/Workplace.py +134 -0
  210. flowtask/components/XMLToPandas.py +267 -0
  211. flowtask/components/Zammad/__init__.py +41 -0
  212. flowtask/components/Zammad/models.py +0 -0
  213. flowtask/components/ZoomInfoScraper.py +409 -0
  214. flowtask/components/__init__.py +104 -0
  215. flowtask/components/abstract.py +18 -0
  216. flowtask/components/flow.py +530 -0
  217. flowtask/components/google.py +335 -0
  218. flowtask/components/group.py +221 -0
  219. flowtask/components/py.typed +0 -0
  220. flowtask/components/reviewscrap.py +132 -0
  221. flowtask/components/tAutoincrement.py +117 -0
  222. flowtask/components/tConcat.py +109 -0
  223. flowtask/components/tExplode.py +119 -0
  224. flowtask/components/tFilter.py +184 -0
  225. flowtask/components/tGroup.py +236 -0
  226. flowtask/components/tJoin.py +270 -0
  227. flowtask/components/tMap/__init__.py +9 -0
  228. flowtask/components/tMap/functions.py +54 -0
  229. flowtask/components/tMap/tMap.py +450 -0
  230. flowtask/components/tMelt.py +112 -0
  231. flowtask/components/tMerge.py +114 -0
  232. flowtask/components/tOrder.py +93 -0
  233. flowtask/components/tPandas.py +94 -0
  234. flowtask/components/tPivot.py +71 -0
  235. flowtask/components/tPluckCols.py +76 -0
  236. flowtask/components/tUnnest.py +82 -0
  237. flowtask/components/user.py +401 -0
  238. flowtask/conf.py +457 -0
  239. flowtask/download.py +102 -0
  240. flowtask/events/__init__.py +11 -0
  241. flowtask/events/events/__init__.py +20 -0
  242. flowtask/events/events/abstract.py +95 -0
  243. flowtask/events/events/alerts/__init__.py +362 -0
  244. flowtask/events/events/alerts/colfunctions.py +131 -0
  245. flowtask/events/events/alerts/functions.py +158 -0
  246. flowtask/events/events/dummy.py +12 -0
  247. flowtask/events/events/exec.py +124 -0
  248. flowtask/events/events/file/__init__.py +7 -0
  249. flowtask/events/events/file/base.py +51 -0
  250. flowtask/events/events/file/copy.py +23 -0
  251. flowtask/events/events/file/delete.py +16 -0
  252. flowtask/events/events/interfaces/__init__.py +9 -0
  253. flowtask/events/events/interfaces/client.py +67 -0
  254. flowtask/events/events/interfaces/credentials.py +28 -0
  255. flowtask/events/events/interfaces/notifications.py +58 -0
  256. flowtask/events/events/jira.py +122 -0
  257. flowtask/events/events/log.py +26 -0
  258. flowtask/events/events/logerr.py +52 -0
  259. flowtask/events/events/notify.py +59 -0
  260. flowtask/events/events/notify_event.py +160 -0
  261. flowtask/events/events/publish.py +54 -0
  262. flowtask/events/events/sendfile.py +104 -0
  263. flowtask/events/events/task.py +97 -0
  264. flowtask/events/events/teams.py +98 -0
  265. flowtask/events/events/webhook.py +58 -0
  266. flowtask/events/manager.py +287 -0
  267. flowtask/exceptions.c +39393 -0
  268. flowtask/exceptions.cpython-312-x86_64-linux-gnu.so +0 -0
  269. flowtask/extensions/__init__.py +3 -0
  270. flowtask/extensions/abstract.py +82 -0
  271. flowtask/extensions/logging/__init__.py +65 -0
  272. flowtask/hooks/__init__.py +9 -0
  273. flowtask/hooks/actions/__init__.py +22 -0
  274. flowtask/hooks/actions/abstract.py +66 -0
  275. flowtask/hooks/actions/dummy.py +23 -0
  276. flowtask/hooks/actions/jira.py +74 -0
  277. flowtask/hooks/actions/rest.py +320 -0
  278. flowtask/hooks/actions/sampledata.py +37 -0
  279. flowtask/hooks/actions/sensor.py +23 -0
  280. flowtask/hooks/actions/task.py +9 -0
  281. flowtask/hooks/actions/ticket.py +37 -0
  282. flowtask/hooks/actions/zammad.py +55 -0
  283. flowtask/hooks/hook.py +62 -0
  284. flowtask/hooks/models.py +17 -0
  285. flowtask/hooks/service.py +187 -0
  286. flowtask/hooks/step.py +91 -0
  287. flowtask/hooks/types/__init__.py +23 -0
  288. flowtask/hooks/types/base.py +129 -0
  289. flowtask/hooks/types/brokers/__init__.py +11 -0
  290. flowtask/hooks/types/brokers/base.py +54 -0
  291. flowtask/hooks/types/brokers/mqtt.py +35 -0
  292. flowtask/hooks/types/brokers/rabbitmq.py +82 -0
  293. flowtask/hooks/types/brokers/redis.py +83 -0
  294. flowtask/hooks/types/brokers/sqs.py +44 -0
  295. flowtask/hooks/types/fs.py +232 -0
  296. flowtask/hooks/types/http.py +49 -0
  297. flowtask/hooks/types/imap.py +200 -0
  298. flowtask/hooks/types/jira.py +279 -0
  299. flowtask/hooks/types/mail.py +205 -0
  300. flowtask/hooks/types/postgres.py +98 -0
  301. flowtask/hooks/types/responses/__init__.py +8 -0
  302. flowtask/hooks/types/responses/base.py +5 -0
  303. flowtask/hooks/types/sharepoint.py +288 -0
  304. flowtask/hooks/types/ssh.py +141 -0
  305. flowtask/hooks/types/tagged.py +59 -0
  306. flowtask/hooks/types/upload.py +85 -0
  307. flowtask/hooks/types/watch.py +71 -0
  308. flowtask/hooks/types/web.py +36 -0
  309. flowtask/interfaces/AzureClient.py +137 -0
  310. flowtask/interfaces/AzureGraph.py +839 -0
  311. flowtask/interfaces/Boto3Client.py +326 -0
  312. flowtask/interfaces/DropboxClient.py +173 -0
  313. flowtask/interfaces/ExcelHandler.py +94 -0
  314. flowtask/interfaces/FTPClient.py +131 -0
  315. flowtask/interfaces/GoogleCalendar.py +201 -0
  316. flowtask/interfaces/GoogleClient.py +133 -0
  317. flowtask/interfaces/GoogleDrive.py +127 -0
  318. flowtask/interfaces/GoogleGCS.py +89 -0
  319. flowtask/interfaces/GoogleGeocoding.py +93 -0
  320. flowtask/interfaces/GoogleLang.py +114 -0
  321. flowtask/interfaces/GooglePub.py +61 -0
  322. flowtask/interfaces/GoogleSheet.py +68 -0
  323. flowtask/interfaces/IMAPClient.py +137 -0
  324. flowtask/interfaces/O365Calendar.py +113 -0
  325. flowtask/interfaces/O365Client.py +220 -0
  326. flowtask/interfaces/OneDrive.py +284 -0
  327. flowtask/interfaces/Outlook.py +155 -0
  328. flowtask/interfaces/ParrotBot.py +130 -0
  329. flowtask/interfaces/SSHClient.py +378 -0
  330. flowtask/interfaces/Sharepoint.py +496 -0
  331. flowtask/interfaces/__init__.py +36 -0
  332. flowtask/interfaces/azureauth.py +119 -0
  333. flowtask/interfaces/cache.py +201 -0
  334. flowtask/interfaces/client.py +82 -0
  335. flowtask/interfaces/compress.py +525 -0
  336. flowtask/interfaces/credentials.py +124 -0
  337. flowtask/interfaces/d2l.py +239 -0
  338. flowtask/interfaces/databases/__init__.py +5 -0
  339. flowtask/interfaces/databases/db.py +223 -0
  340. flowtask/interfaces/databases/documentdb.py +55 -0
  341. flowtask/interfaces/databases/rethink.py +39 -0
  342. flowtask/interfaces/dataframes/__init__.py +11 -0
  343. flowtask/interfaces/dataframes/abstract.py +21 -0
  344. flowtask/interfaces/dataframes/arrow.py +71 -0
  345. flowtask/interfaces/dataframes/dt.py +69 -0
  346. flowtask/interfaces/dataframes/pandas.py +167 -0
  347. flowtask/interfaces/dataframes/polars.py +60 -0
  348. flowtask/interfaces/db.py +263 -0
  349. flowtask/interfaces/env.py +46 -0
  350. flowtask/interfaces/func.py +137 -0
  351. flowtask/interfaces/http.py +1780 -0
  352. flowtask/interfaces/locale.py +40 -0
  353. flowtask/interfaces/log.py +75 -0
  354. flowtask/interfaces/mask.py +143 -0
  355. flowtask/interfaces/notification.py +154 -0
  356. flowtask/interfaces/playwright.py +339 -0
  357. flowtask/interfaces/powerpoint.py +368 -0
  358. flowtask/interfaces/py.typed +0 -0
  359. flowtask/interfaces/qs.py +376 -0
  360. flowtask/interfaces/result.py +87 -0
  361. flowtask/interfaces/selenium_service.py +779 -0
  362. flowtask/interfaces/smartsheet.py +154 -0
  363. flowtask/interfaces/stat.py +39 -0
  364. flowtask/interfaces/task.py +96 -0
  365. flowtask/interfaces/template.py +118 -0
  366. flowtask/interfaces/vectorstores/__init__.py +1 -0
  367. flowtask/interfaces/vectorstores/abstract.py +133 -0
  368. flowtask/interfaces/vectorstores/milvus.py +669 -0
  369. flowtask/interfaces/zammad.py +107 -0
  370. flowtask/models.py +193 -0
  371. flowtask/parsers/__init__.py +15 -0
  372. flowtask/parsers/_yaml.c +11978 -0
  373. flowtask/parsers/_yaml.cpython-312-x86_64-linux-gnu.so +0 -0
  374. flowtask/parsers/argparser.py +235 -0
  375. flowtask/parsers/base.c +15155 -0
  376. flowtask/parsers/base.cpython-312-x86_64-linux-gnu.so +0 -0
  377. flowtask/parsers/json.c +11968 -0
  378. flowtask/parsers/json.cpython-312-x86_64-linux-gnu.so +0 -0
  379. flowtask/parsers/maps.py +49 -0
  380. flowtask/parsers/toml.c +11968 -0
  381. flowtask/parsers/toml.cpython-312-x86_64-linux-gnu.so +0 -0
  382. flowtask/plugins/__init__.py +16 -0
  383. flowtask/plugins/components/__init__.py +0 -0
  384. flowtask/plugins/handler/__init__.py +45 -0
  385. flowtask/plugins/importer.py +31 -0
  386. flowtask/plugins/sources/__init__.py +0 -0
  387. flowtask/runner.py +283 -0
  388. flowtask/scheduler/__init__.py +9 -0
  389. flowtask/scheduler/functions.py +493 -0
  390. flowtask/scheduler/handlers/__init__.py +8 -0
  391. flowtask/scheduler/handlers/manager.py +504 -0
  392. flowtask/scheduler/handlers/models.py +58 -0
  393. flowtask/scheduler/handlers/service.py +72 -0
  394. flowtask/scheduler/notifications.py +65 -0
  395. flowtask/scheduler/scheduler.py +993 -0
  396. flowtask/services/__init__.py +0 -0
  397. flowtask/services/bots/__init__.py +0 -0
  398. flowtask/services/bots/telegram.py +264 -0
  399. flowtask/services/files/__init__.py +11 -0
  400. flowtask/services/files/manager.py +522 -0
  401. flowtask/services/files/model.py +37 -0
  402. flowtask/services/files/service.py +767 -0
  403. flowtask/services/jira/__init__.py +3 -0
  404. flowtask/services/jira/jira_actions.py +191 -0
  405. flowtask/services/tasks/__init__.py +13 -0
  406. flowtask/services/tasks/launcher.py +213 -0
  407. flowtask/services/tasks/manager.py +323 -0
  408. flowtask/services/tasks/service.py +275 -0
  409. flowtask/services/tasks/task_manager.py +376 -0
  410. flowtask/services/tasks/tasks.py +155 -0
  411. flowtask/storages/__init__.py +16 -0
  412. flowtask/storages/exceptions.py +12 -0
  413. flowtask/storages/files/__init__.py +8 -0
  414. flowtask/storages/files/abstract.py +29 -0
  415. flowtask/storages/files/filesystem.py +66 -0
  416. flowtask/storages/tasks/__init__.py +19 -0
  417. flowtask/storages/tasks/abstract.py +26 -0
  418. flowtask/storages/tasks/database.py +33 -0
  419. flowtask/storages/tasks/filesystem.py +108 -0
  420. flowtask/storages/tasks/github.py +119 -0
  421. flowtask/storages/tasks/memory.py +45 -0
  422. flowtask/storages/tasks/row.py +25 -0
  423. flowtask/tasks/__init__.py +0 -0
  424. flowtask/tasks/abstract.py +526 -0
  425. flowtask/tasks/command.py +118 -0
  426. flowtask/tasks/pile.py +486 -0
  427. flowtask/tasks/py.typed +0 -0
  428. flowtask/tasks/task.py +778 -0
  429. flowtask/template/__init__.py +161 -0
  430. flowtask/tests.py +257 -0
  431. flowtask/types/__init__.py +8 -0
  432. flowtask/types/typedefs.c +11347 -0
  433. flowtask/types/typedefs.cpython-312-x86_64-linux-gnu.so +0 -0
  434. flowtask/utils/__init__.py +24 -0
  435. flowtask/utils/constants.py +117 -0
  436. flowtask/utils/encoders.py +21 -0
  437. flowtask/utils/executor.py +112 -0
  438. flowtask/utils/functions.cpp +14280 -0
  439. flowtask/utils/functions.cpython-312-x86_64-linux-gnu.so +0 -0
  440. flowtask/utils/json.cpp +13349 -0
  441. flowtask/utils/json.cpython-312-x86_64-linux-gnu.so +0 -0
  442. flowtask/utils/mail.py +63 -0
  443. flowtask/utils/parseqs.c +13324 -0
  444. flowtask/utils/parserqs.cpython-312-x86_64-linux-gnu.so +0 -0
  445. flowtask/utils/stats.py +308 -0
  446. flowtask/utils/transformations.py +74 -0
  447. flowtask/utils/uv.py +12 -0
  448. flowtask/utils/validators.py +97 -0
  449. flowtask/version.py +11 -0
  450. flowtask-5.8.4.dist-info/LICENSE +201 -0
  451. flowtask-5.8.4.dist-info/METADATA +209 -0
  452. flowtask-5.8.4.dist-info/RECORD +470 -0
  453. flowtask-5.8.4.dist-info/WHEEL +6 -0
  454. flowtask-5.8.4.dist-info/entry_points.txt +3 -0
  455. flowtask-5.8.4.dist-info/top_level.txt +2 -0
  456. plugins/components/CreateQR.py +39 -0
  457. plugins/components/TestComponent.py +28 -0
  458. plugins/components/Use1.py +13 -0
  459. plugins/components/Workplace.py +117 -0
  460. plugins/components/__init__.py +3 -0
  461. plugins/sources/__init__.py +0 -0
  462. plugins/sources/get_populartimes.py +78 -0
  463. plugins/sources/google.py +150 -0
  464. plugins/sources/hubspot.py +679 -0
  465. plugins/sources/icims.py +679 -0
  466. plugins/sources/mobileinsight.py +501 -0
  467. plugins/sources/newrelic.py +262 -0
  468. plugins/sources/uap.py +268 -0
  469. plugins/sources/venu.py +244 -0
  470. plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,189 @@
1
+
2
+ import asyncio
3
+ from typing import List, Dict, Union
4
+ from collections.abc import Callable
5
+ import rethinkdb as r
6
+ from ..exceptions import ComponentError, DataNotFound, ConfigError
7
+ from .flow import FlowComponent
8
+ from ..interfaces.dataframes import PandasDataframe
9
+ from ..interfaces import TemplateSupport
10
+ from ..interfaces.databases.rethink import RethinkDBSupport
11
+
12
+
13
+ class RethinkDBQuery(
14
+ RethinkDBSupport,
15
+ FlowComponent,
16
+ PandasDataframe,
17
+ TemplateSupport,
18
+ ):
19
+ """
20
+ RethinkDBQuery.
21
+
22
+ Class to execute queries against a RethinkDB database and retrieve results.
23
+ using asyncDB as backend.
24
+
25
+ RethinkDB Query can support queries by mapping RethinkDB methods as attributes.
26
+ Methods as "table", "filter", "order_by", "limit", "pluck" are supported.
27
+
28
+
29
+
30
+ Example:
31
+
32
+ ```yaml
33
+ RethinkDBQuery:
34
+ table: stores_reviews
35
+ schema: epson
36
+ filter:
37
+ - rating:
38
+ gt: 4
39
+ - rating:
40
+ lt: 6
41
+ order_by:
42
+ - rating: desc
43
+ limit: 50
44
+ columns:
45
+ - store_id
46
+ - store_name
47
+ - formatted_address
48
+ - latitude
49
+ - longitude
50
+ - reviews
51
+ - rating
52
+ - user_ratings_total
53
+ as_dataframe: true
54
+ ```
55
+
56
+ """
57
+ def __init__(
58
+ self,
59
+ loop: asyncio.AbstractEventLoop = None,
60
+ job: Callable = None,
61
+ stat: Callable = None,
62
+ **kwargs,
63
+ ) -> None:
64
+ """Init Method."""
65
+ self.table = kwargs.get('table', None)
66
+ self.schema = kwargs.get('schema', None)
67
+ super().__init__(loop=loop, job=job, stat=stat, **kwargs)
68
+ self._db = None
69
+
70
+ async def close(self):
71
+ """Close the connection to the RethinkDB database."""
72
+ if self._db:
73
+ try:
74
+ await self._db.close()
75
+ except Exception:
76
+ pass
77
+ self._db = None
78
+
79
+ async def start(self, **kwargs):
80
+ await super().start(**kwargs)
81
+ if not hasattr(self, 'table'):
82
+ raise ConfigError(
83
+ "'table' attribute is required in the RethinkDBQuery."
84
+ )
85
+ if not hasattr(self, 'schema'):
86
+ raise ConfigError(
87
+ "'schema' attribute is required in the RethinkDBQuery."
88
+ )
89
+ # Replacing with Masking if needed.
90
+ self.schema = self.mask_replacement(self.schema)
91
+ self.table = self.mask_replacement(self.table)
92
+ if hasattr(self, 'columns'):
93
+ # used as "pluck"
94
+ self.pluck = self.columns
95
+ return True
96
+
97
+ def _filter_criteria(self, engine, cursor):
98
+ result = engine.expr(True)
99
+ for args in self.filter:
100
+ field, inner_args = next(iter(args.items()))
101
+ func, value = next(iter(inner_args.items()))
102
+ if func == 'in':
103
+ cursor = cursor.filter(
104
+ (
105
+ lambda exp: engine.expr(value)
106
+ .coerce_to("array")
107
+ .contains(exp[field])
108
+ )
109
+ )
110
+ elif func == 'gt':
111
+ result = result.and_(
112
+ engine.row[field].gt(value)
113
+ )
114
+ elif func == 'eq':
115
+ result = result.and_(
116
+ engine.row[field].eq(value)
117
+ )
118
+ elif func == 'lt':
119
+ result = result.and_(
120
+ engine.row[field].lt(value)
121
+ )
122
+ elif func == 'ge':
123
+ result = result.and_(
124
+ engine.row[field].ge(value)
125
+ )
126
+ elif func == 'le':
127
+ result = result.and_(
128
+ engine.row[field].le(value)
129
+ )
130
+ cursor = cursor.filter(result)
131
+ return cursor
132
+
133
+ def _order_by(self, engine, cursor):
134
+ order_clauses = []
135
+ for order in self.order_by:
136
+ field, direction = next(iter(order.items()))
137
+ if direction.lower() == 'asc':
138
+ order_clauses.append(engine.asc(field))
139
+ elif direction.lower() == 'desc':
140
+ order_clauses.append(engine.desc(field))
141
+ else:
142
+ raise ComponentError(
143
+ f"Invalid order direction: {direction}"
144
+ )
145
+ if order_clauses:
146
+ cursor = cursor.order_by(*order_clauses)
147
+ return cursor
148
+
149
+ async def run(self):
150
+ """Execute the RethinkDB query and retrieve the results."""
151
+ if not self._db:
152
+ # TODO: add support for datasources.
153
+ self._db = self.default_connection()
154
+ try:
155
+ async with await self._db.connection() as conn:
156
+ # Change to default database:
157
+ engine = conn.engine()
158
+ # changing to active database
159
+ cursor = engine.db(self.schema).table(self.table)
160
+ if hasattr(self, 'filter'):
161
+ # Build a Filter functionality:
162
+ cursor = self._filter_criteria(engine, cursor)
163
+ if hasattr(self, 'order_by'):
164
+ cursor = self._order_by(engine, cursor)
165
+ # cursor = cursor.order_by(self.order_by)
166
+ pass
167
+ if hasattr(self, 'limit'):
168
+ cursor = cursor.limit(self.limit)
169
+ if hasattr(self, 'pluck'):
170
+ cursor = cursor.pluck(self.pluck)
171
+ data = []
172
+ print('CURSOR > ', cursor)
173
+ cursor = await cursor.run(conn.get_connection())
174
+ if isinstance(cursor, list):
175
+ data = cursor
176
+ else:
177
+ while await cursor.fetch_next():
178
+ item = await cursor.next()
179
+ data.append(item)
180
+ # Check if return as Dataframe:
181
+ if self.as_dataframe is True:
182
+ self._result = await self.create_dataframe(data)
183
+ else:
184
+ self._result = data
185
+ return self._result
186
+ except Exception as e:
187
+ raise ComponentError(
188
+ f"Error executing RethinkDB query: {e}"
189
+ ) from e
@@ -0,0 +1,74 @@
1
+ from collections.abc import Callable
2
+ import asyncio
3
+ import asyncssh
4
+ from ..exceptions import ComponentError
5
+ from .RunSSH import RunSSH
6
+
7
+
8
+ class Rsync(RunSSH):
9
+ def __init__(
10
+ self,
11
+ loop: asyncio.AbstractEventLoop = None,
12
+ job: Callable = None,
13
+ stat: Callable = None,
14
+ **kwargs,
15
+ ):
16
+ self.flags: str = "azrv"
17
+ super(Rsync, self).__init__(loop=loop, job=job, stat=stat, **kwargs)
18
+
19
+ async def start(self, **kwargs):
20
+ super(Rsync, self).start(**kwargs)
21
+ if hasattr(self, "source"): # using the destination filosophy
22
+ try:
23
+ if hasattr(self, "masks"):
24
+ self.source_dir = self.mask_replacement(self.source["directory"])
25
+ else:
26
+ self.source_dir = self.source["directory"]
27
+ except KeyError as exc:
28
+ raise ComponentError(
29
+ "Rsync Error: you must specify a source directory"
30
+ ) from exc
31
+ if hasattr(self, "destination"):
32
+ if hasattr(self, "masks"):
33
+ self.destination_dir = self.mask_replacement(
34
+ self.destination["directory"]
35
+ )
36
+ else:
37
+ self.destination_dir = self.destination["directory"]
38
+ # also, calculate the destination server:
39
+ self.dest_server = self.mask_replacement(self.destination["server"])
40
+ self.dest_user = self.mask_replacement(self.destination["user"])
41
+ try:
42
+ self.dest_port = self.mask_replacement(self.destination["port"])
43
+ except KeyError:
44
+ self.dest_port = None
45
+ return True
46
+
47
+ async def run(self):
48
+ await self.open(
49
+ host=self.host,
50
+ port=self.port,
51
+ tunnel=self.tunnel,
52
+ credentials=self.credentials,
53
+ )
54
+ rsync = "rsync -{flags} {source} {destination}"
55
+ if self.dest_port is not None:
56
+ rsync = rsync + f" --port={self.dest_port}"
57
+ destination = f"{self.dest_user}@{self.dest_server}:{self.destination_dir}"
58
+ command = rsync.format(
59
+ flags=self.flags, destination=destination, source=self.source_dir
60
+ )
61
+ try:
62
+ rst = await self._connection.run(command, check=True)
63
+ result = {
64
+ "exit_status": rst.exit_status,
65
+ "returncode": rst.returncode,
66
+ "error": rst.stderr,
67
+ # "stdout": rst.stdout
68
+ }
69
+ except asyncssh.process.ProcessError as err:
70
+ self._logger.error(f"Error executing command: {err}")
71
+ self.add_metric("SSH: COMMAND", command)
72
+ self.add_metric("SSH: RESULT", result)
73
+ self._result = result
74
+ return result
@@ -0,0 +1,59 @@
1
+ import asyncio
2
+ from collections.abc import Callable
3
+ import logging
4
+ import asyncssh
5
+ from .flow import FlowComponent
6
+ from ..interfaces.SSHClient import SSHClient
7
+
8
+
9
+ class RunSSH(SSHClient, FlowComponent):
10
+ """
11
+ RunSSH.
12
+
13
+ Run any arbitrary command into an SSH server.
14
+ """
15
+
16
+ def __init__(
17
+ self,
18
+ loop: asyncio.AbstractEventLoop = None,
19
+ job: Callable = None,
20
+ stat: Callable = None,
21
+ **kwargs,
22
+ ):
23
+ super().__init__(loop=loop, job=job, stat=stat, **kwargs)
24
+
25
+ async def start(self, **kwargs):
26
+ """Start.
27
+
28
+ Processing variables and credentials.
29
+ """
30
+ try:
31
+ self.define_host()
32
+ self.processing_credentials()
33
+ except Exception as err:
34
+ logging.error(err)
35
+ raise
36
+
37
+ async def run(self):
38
+ result = {}
39
+ await self.open(
40
+ host=self.host,
41
+ port=self.port,
42
+ tunnel=self.tunnel,
43
+ credentials=self.credentials,
44
+ )
45
+ for command in self.commands:
46
+ command = self.mask_replacement(command)
47
+ try:
48
+ rst = await self._connection.run(command, check=True)
49
+ result[command] = {
50
+ "exit_status": rst.exit_status,
51
+ "returncode": rst.returncode,
52
+ "error": rst.stderr,
53
+ # "stdout": rst.stdout
54
+ }
55
+ except asyncssh.process.ProcessError as err:
56
+ logging.error(f"Error executing command: {err}")
57
+ self.add_metric("SSH: COMMAND", result)
58
+ self._result = result
59
+ return result
@@ -0,0 +1,71 @@
1
+ import subprocess
2
+ import asyncio
3
+ from typing import List
4
+ from collections.abc import Callable
5
+ from navconfig.logging import logging
6
+ from .flow import FlowComponent
7
+
8
+
9
+ class RunShell(FlowComponent):
10
+ """
11
+ RunShell.
12
+
13
+
14
+ Overview
15
+
16
+ Execute a Command to run a task
17
+
18
+ .. table:: Properties
19
+ :widths: auto
20
+
21
+
22
+ +--------------+----------+-----------+-------------------------------------------------------+
23
+ | Name | Required | Summary |
24
+ +--------------+----------+-----------+-------------------------------------------------------+
25
+ | name | Yes | Name of task |
26
+ +--------------+----------+-----------+-------------------------------------------------------+
27
+ | description | Yes | Task description |
28
+ +--------------+----------+-----------+-------------------------------------------------------+
29
+ | steps | Yes | Not assigned steps |
30
+ +--------------+----------+-----------+-------------------------------------------------------+
31
+ | runtask | Yes | This method runs the task |
32
+ +--------------+----------+-----------+-------------------------------------------------------+
33
+ | program | Yes | Program name |
34
+ +--------------+----------+-----------+-------------------------------------------------------+
35
+ | task | Yes | Assign the run shell attribute |
36
+ +--------------+----------+-----------+-------------------------------------------------------+
37
+
38
+ Return the list of arbitrary days
39
+ """
40
+
41
+ def __init__(
42
+ self,
43
+ loop: asyncio.AbstractEventLoop = None,
44
+ job: Callable = None,
45
+ stat: Callable = None,
46
+ **kwargs,
47
+ ):
48
+ """Init Method."""
49
+ self.commands: List = []
50
+ super(RunShell, self).__init__(loop=loop, job=job, stat=stat, **kwargs)
51
+
52
+ async def start(self, **kwargs):
53
+ return True
54
+
55
+ async def close(self):
56
+ pass
57
+
58
+ async def run(self):
59
+ for cmd in self.commands:
60
+ if hasattr(self, "masks"):
61
+ cmd = self.mask_replacement(cmd)
62
+ logging.debug(">", cmd)
63
+ try:
64
+ result = subprocess.Popen(
65
+ cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
66
+ ).communicate()
67
+ logging.debug(result)
68
+ return True
69
+ except subprocess.CalledProcessError as e:
70
+ print(f"Error in command: {e}")
71
+ return False
@@ -0,0 +1,20 @@
1
+ from navconfig.logging import logging
2
+ from querysource.exceptions import DataNotFound as QSNotFound
3
+ from ..exceptions import ComponentError, DataNotFound
4
+ from .QSBase import QSBase
5
+
6
+
7
+ class SalesForce(QSBase):
8
+ """SalesForce Connector."""
9
+
10
+ type = "report"
11
+ _driver = "salesforce"
12
+
13
+ async def report(self):
14
+ try:
15
+ return await self._qs.report()
16
+ except QSNotFound as err:
17
+ raise DataNotFound(f"SalesForce: Report Not Found: {err}") from err
18
+ except Exception as err:
19
+ logging.exception(err)
20
+ raise ComponentError(f"SalesForce ERROR: {err!s}") from err
@@ -0,0 +1,257 @@
1
+ from typing import Optional
2
+ from collections.abc import Callable
3
+ import re
4
+ import asyncio
5
+ import asyncpg
6
+ from PIL.TiffImagePlugin import IFDRational
7
+ from pgvector.asyncpg import register_vector
8
+ from querysource.types.validators import Entity
9
+ from navigator.libs.json import JSONContent
10
+ from ..flow import FlowComponent
11
+ from ...exceptions import ConfigError, ComponentError
12
+ from ...conf import default_dsn
13
+
14
+ IDENT_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
15
+
16
+ def qid(name: str) -> str:
17
+ """
18
+ Very small helper to quote SQL identifiers safely.
19
+ Raises if name contains anything but letters, digits or '_'.
20
+ """
21
+ if not IDENT_RE.match(name):
22
+ raise ValueError(
23
+ f"illegal identifier: {name!r}"
24
+ )
25
+ return '"' + name + '"'
26
+
27
+ class SaveImageBank(FlowComponent):
28
+ """
29
+ SaveImageBank.
30
+
31
+ Save images into a postgreSQL Table, with UPSERT and optional evaluation for duplicates.
32
+ """
33
+ def __init__(
34
+ self,
35
+ loop: Optional[asyncio.AbstractEventLoop] = None,
36
+ job: Callable | None = None,
37
+ stat: Callable | None = None,
38
+ **kwargs,
39
+ ):
40
+ self.id_column: str = kwargs.get("id_column", "photo_id")
41
+ self.hash_column: str = kwargs.get("hash_column", "image_hash")
42
+ self.vector_column: str = kwargs.get("vector_column", "image_vector")
43
+ self.detections_column: str = kwargs.get("detections_column", "image_features")
44
+ self.hamming_threshold: int = kwargs.get("hamming_threshold", 4)
45
+ self.vector_threshold: float = kwargs.get("vector_threshold", 0.05)
46
+ self.tablename: str = kwargs.get("tablename", "image_bank")
47
+ self.schema: str = kwargs.get("schema", "public")
48
+ self.pool: asyncpg.Pool | None = None
49
+ self._semaphore = asyncio.Semaphore(16) # limit GPU tasks
50
+ self.drop_columns: list[str] = kwargs.get("drop_columns", [])
51
+ super().__init__(loop=loop, job=job, stat=stat, **kwargs)
52
+ # JSON encoder:
53
+ self._encoder = JSONContent()
54
+
55
+ def _qualified_tablename(self) -> str:
56
+ """
57
+ Get the qualified table name.
58
+ """
59
+ if not self.schema:
60
+ raise ConfigError("Schema is not set.")
61
+ if not self.tablename:
62
+ raise ConfigError("Table name is not set.")
63
+ return f"{qid(self.schema)}.{qid(self.tablename)}"
64
+
65
+ def _build_insert_sql(self, columns: list[str]) -> str:
66
+ """
67
+ Produces something like:
68
+
69
+ INSERT INTO schema.table (col1,col2,…) VALUES ($1,$2,…)
70
+ ON CONFLICT (photo_id) DO UPDATE SET
71
+ col1 = EXCLUDED.col1,
72
+ ...
73
+ """
74
+ col_list = ", ".join(map(qid, columns))
75
+ placeholders = ", ".join(f"${i}" for i in range(1, len(columns) + 1))
76
+ updates = ", ".join(f"{qid(c)} = EXCLUDED.{qid(c)}" for c in columns
77
+ if c != self.id_column)
78
+
79
+ return (
80
+ f"INSERT INTO {self._qualified_tablename()} ({col_list}) "
81
+ f"VALUES ({placeholders}) "
82
+ f"ON CONFLICT ({qid(self.id_column)}) "
83
+ f"DO UPDATE SET {updates};"
84
+ )
85
+
86
+ def _build_phash_sql(self) -> str:
87
+ return (
88
+ f"SELECT 1 FROM {self._qualified_tablename()} "
89
+ f"WHERE {qid(self.id_column)} IS DISTINCT FROM $3 "
90
+ f"AND bit_count(('x' || $1)::bit(256) # "
91
+ f" ('x' || {qid(self.hash_column)})::bit(256)) "
92
+ f" <= $2 "
93
+ f"LIMIT 1;"
94
+ )
95
+
96
+ def _build_vector_sql(self) -> str:
97
+ return (
98
+ f"SELECT 1 FROM {self._qualified_tablename()} "
99
+ f"WHERE {qid(self.id_column)} IS DISTINCT FROM $3 "
100
+ f"AND {qid(self.vector_column)} <#> $1::vector < $2 "
101
+ f"LIMIT 1;"
102
+ )
103
+
104
+ async def pgvector_init(self, conn):
105
+ """
106
+ Initialize pgvector extension in PostgreSQL.
107
+ """
108
+ # Setup jsonb encoder/decoder
109
+ def _encoder(value):
110
+ # return json.dumps(value, cls=BaseEncoder)
111
+ return self._encoder.dumps(value) # pylint: disable=E1120
112
+
113
+ def _decoder(value):
114
+ return self._encoder.loads(value) # pylint: disable=E1120
115
+
116
+ await conn.set_type_codec(
117
+ "json",
118
+ encoder=_encoder,
119
+ decoder=_decoder,
120
+ schema="pg_catalog"
121
+ )
122
+ await conn.set_type_codec(
123
+ "jsonb",
124
+ encoder=_encoder,
125
+ decoder=_decoder,
126
+ schema="pg_catalog"
127
+ )
128
+
129
+ await register_vector(conn)
130
+
131
+ # ──────────────────────────────────────────────────────────────
132
+ # Setup / teardown
133
+ # ──────────────────────────────────────────────────────────────
134
+ async def start(self, **kwargs):
135
+ if self.previous:
136
+ self.data = self.input
137
+
138
+ # column checks
139
+ for col in (self.id_column, self.hash_column,
140
+ self.vector_column, self.detections_column):
141
+ if col not in self.data.columns:
142
+ raise ConfigError(
143
+ f"Column '{col}' missing from DataFrame"
144
+ )
145
+ self.pool = await asyncpg.create_pool(
146
+ dsn=default_dsn,
147
+ min_size=1,
148
+ max_size=4,
149
+ max_queries=100,
150
+ init=self.pgvector_init,
151
+ timeout=10,
152
+ )
153
+ # Check if the table exists
154
+ if not self.pool:
155
+ raise ConfigError(
156
+ "Database connection pool is not initialized."
157
+ )
158
+ async with self.pool.acquire() as conn:
159
+ try:
160
+ await conn.execute(
161
+ f"SELECT 1 FROM {self.schema}.{self.tablename} LIMIT 1"
162
+ )
163
+ except asyncpg.exceptions.UndefinedTableError:
164
+ raise ConfigError(
165
+ f"Table {self.schema}.{self.tablename} does not exist."
166
+ )
167
+ except asyncpg.exceptions.UndefinedSchemaError:
168
+ raise ConfigError(
169
+ f"Schema {self.schema} does not exist."
170
+ )
171
+ if "duplicated" not in self.data.columns:
172
+ self.data["duplicated"] = False
173
+ # prepare SQL strings
174
+ self._sql_phash = self._build_phash_sql()
175
+ self._sql_vector = self._build_vector_sql()
176
+
177
+ async def close(self):
178
+ if self.pool:
179
+ await self.pool.close()
180
+
181
+ # --------------- duplicate test --------------------
182
+ async def _is_duplicated(self, conn, phash: str, vec: list[float], current_id: int) -> bool:
183
+ """
184
+ Check if the given hash and vector are duplicated in the database.
185
+ :param conn: Database connection.
186
+ :param phash: Perceptual hash of the image.
187
+ :param vec: Vector representation of the image.
188
+ :return: True if the image is duplicated, False otherwise.
189
+ """
190
+ # phash first
191
+ phash_dup = False
192
+ vector_dup = False
193
+ if phash:
194
+ if await conn.fetchval(self._sql_phash, phash, self.hamming_threshold, current_id):
195
+ phash_dup = True
196
+ # vector second
197
+ vector_dup = bool(
198
+ await conn.fetchval(self._sql_vector, vec, self.vector_threshold, current_id)
199
+ )
200
+ # return True if both are duplicated
201
+ return phash_dup and vector_dup
202
+
203
+ async def _upsert_row(self, conn, row) -> bool:
204
+ """
205
+ UPSERT a single row into the database.
206
+ :param conn: Database connection.
207
+ :param row: Row data to be inserted/updated.
208
+ :return: True if the row was duplicated, False otherwise.
209
+ """
210
+ # --------------- UPSERT one row --------------------
211
+ phash = row[self.hash_column]
212
+ vec = row[self.vector_column]
213
+ dup = await self._is_duplicated(
214
+ conn,
215
+ phash,
216
+ vec,
217
+ current_id=row[self.id_column]
218
+ )
219
+
220
+ # Add/overwrite duplicated flag in the in‑memory DF row
221
+ row["duplicated"] = dup
222
+
223
+ # Build VALUES array in the same order as self.data.columns
224
+ values = [row[col] for col in self.data.columns]
225
+ # asyncpg needs list/tuple for pgvector, ensure np → list
226
+ idx_vec = self.data.columns.get_loc(self.vector_column)
227
+ values[idx_vec] = list(values[idx_vec])
228
+
229
+ await conn.execute(self._sql_insert, *values)
230
+
231
+ async def run(self):
232
+ """
233
+ Run the task.
234
+ """
235
+ if self.pool is None:
236
+ raise ConfigError("Database connection pool is not initialized.")
237
+ if self.drop_columns:
238
+ # drop columns from dataframe:
239
+ self.data.drop(
240
+ columns=self.drop_columns,
241
+ axis=1,
242
+ inplace=True,
243
+ )
244
+ #
245
+ self._sql_insert = self._build_insert_sql(list(self.data.columns))
246
+
247
+ # check for duplicates
248
+ async def handle(idx):
249
+ async with self._semaphore, self.pool.acquire() as conn:
250
+ row = self.data.loc[idx].to_dict()
251
+ await self._upsert_row(conn, row)
252
+ # write duplicated flag back into DF
253
+ self.data.at[idx, "duplicated"] = row["duplicated"]
254
+ await asyncio.gather(*(handle(i) for i in self.data.index))
255
+
256
+ self._result = self.data
257
+ return self._result