flowtask 5.8.4__cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (470) hide show
  1. flowtask/__init__.py +93 -0
  2. flowtask/__main__.py +38 -0
  3. flowtask/bots/__init__.py +6 -0
  4. flowtask/bots/check.py +93 -0
  5. flowtask/bots/codebot.py +51 -0
  6. flowtask/components/ASPX.py +148 -0
  7. flowtask/components/AddDataset.py +352 -0
  8. flowtask/components/Amazon.py +523 -0
  9. flowtask/components/AutoTask.py +314 -0
  10. flowtask/components/Azure.py +80 -0
  11. flowtask/components/AzureUsers.py +106 -0
  12. flowtask/components/BaseAction.py +91 -0
  13. flowtask/components/BaseLoop.py +198 -0
  14. flowtask/components/BestBuy.py +800 -0
  15. flowtask/components/CSVToGCS.py +120 -0
  16. flowtask/components/CompanyScraper/__init__.py +1 -0
  17. flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
  18. flowtask/components/CompanyScraper/parsers/base.py +102 -0
  19. flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
  20. flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
  21. flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
  22. flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
  23. flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
  24. flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
  25. flowtask/components/CompanyScraper/scrapper.py +1054 -0
  26. flowtask/components/CopyTo.py +177 -0
  27. flowtask/components/CopyToBigQuery.py +243 -0
  28. flowtask/components/CopyToMongoDB.py +291 -0
  29. flowtask/components/CopyToPg.py +609 -0
  30. flowtask/components/CopyToRethink.py +207 -0
  31. flowtask/components/CreateGCSBucket.py +102 -0
  32. flowtask/components/CreateReport/CreateReport.py +228 -0
  33. flowtask/components/CreateReport/__init__.py +9 -0
  34. flowtask/components/CreateReport/charts/__init__.py +15 -0
  35. flowtask/components/CreateReport/charts/bar.py +51 -0
  36. flowtask/components/CreateReport/charts/base.py +66 -0
  37. flowtask/components/CreateReport/charts/pie.py +64 -0
  38. flowtask/components/CreateReport/utils.py +9 -0
  39. flowtask/components/CustomerSatisfaction.py +196 -0
  40. flowtask/components/DataInput.py +200 -0
  41. flowtask/components/DateList.py +255 -0
  42. flowtask/components/DbClient.py +163 -0
  43. flowtask/components/DialPad.py +146 -0
  44. flowtask/components/DocumentDBQuery.py +200 -0
  45. flowtask/components/DownloadFrom.py +371 -0
  46. flowtask/components/DownloadFromD2L.py +113 -0
  47. flowtask/components/DownloadFromFTP.py +181 -0
  48. flowtask/components/DownloadFromIMAP.py +315 -0
  49. flowtask/components/DownloadFromS3.py +198 -0
  50. flowtask/components/DownloadFromSFTP.py +265 -0
  51. flowtask/components/DownloadFromSharepoint.py +110 -0
  52. flowtask/components/DownloadFromSmartSheet.py +114 -0
  53. flowtask/components/DownloadS3File.py +229 -0
  54. flowtask/components/Dummy.py +59 -0
  55. flowtask/components/DuplicatePhoto.py +411 -0
  56. flowtask/components/EmployeeEvaluation.py +237 -0
  57. flowtask/components/ExecuteSQL.py +323 -0
  58. flowtask/components/ExtractHTML.py +178 -0
  59. flowtask/components/FileBase.py +178 -0
  60. flowtask/components/FileCopy.py +181 -0
  61. flowtask/components/FileDelete.py +82 -0
  62. flowtask/components/FileExists.py +146 -0
  63. flowtask/components/FileIteratorDelete.py +112 -0
  64. flowtask/components/FileList.py +194 -0
  65. flowtask/components/FileOpen.py +75 -0
  66. flowtask/components/FileRead.py +120 -0
  67. flowtask/components/FileRename.py +106 -0
  68. flowtask/components/FilterIf.py +284 -0
  69. flowtask/components/FilterRows/FilterRows.py +200 -0
  70. flowtask/components/FilterRows/__init__.py +10 -0
  71. flowtask/components/FilterRows/functions.py +4 -0
  72. flowtask/components/GCSToBigQuery.py +103 -0
  73. flowtask/components/GoogleA4.py +150 -0
  74. flowtask/components/GoogleGeoCoding.py +344 -0
  75. flowtask/components/GooglePlaces.py +315 -0
  76. flowtask/components/GoogleSearch.py +539 -0
  77. flowtask/components/HTTPClient.py +268 -0
  78. flowtask/components/ICIMS.py +146 -0
  79. flowtask/components/IF.py +179 -0
  80. flowtask/components/IcimsFolderCopy.py +173 -0
  81. flowtask/components/ImageFeatures/__init__.py +5 -0
  82. flowtask/components/ImageFeatures/process.py +233 -0
  83. flowtask/components/IteratorBase.py +251 -0
  84. flowtask/components/LangchainLoader/__init__.py +5 -0
  85. flowtask/components/LangchainLoader/loader.py +194 -0
  86. flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
  87. flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
  88. flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
  89. flowtask/components/LangchainLoader/loaders/docx.py +91 -0
  90. flowtask/components/LangchainLoader/loaders/html.py +119 -0
  91. flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
  92. flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
  93. flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
  94. flowtask/components/LangchainLoader/loaders/qa.py +67 -0
  95. flowtask/components/LangchainLoader/loaders/txt.py +55 -0
  96. flowtask/components/LeadIQ.py +650 -0
  97. flowtask/components/Loop.py +253 -0
  98. flowtask/components/Lowes.py +334 -0
  99. flowtask/components/MS365Usage.py +156 -0
  100. flowtask/components/MSTeamsMessages.py +320 -0
  101. flowtask/components/MarketClustering.py +1051 -0
  102. flowtask/components/MergeFiles.py +362 -0
  103. flowtask/components/MilvusOutput.py +87 -0
  104. flowtask/components/NearByStores.py +175 -0
  105. flowtask/components/NetworkNinja/__init__.py +6 -0
  106. flowtask/components/NetworkNinja/models/__init__.py +52 -0
  107. flowtask/components/NetworkNinja/models/abstract.py +177 -0
  108. flowtask/components/NetworkNinja/models/account.py +39 -0
  109. flowtask/components/NetworkNinja/models/client.py +19 -0
  110. flowtask/components/NetworkNinja/models/district.py +14 -0
  111. flowtask/components/NetworkNinja/models/events.py +101 -0
  112. flowtask/components/NetworkNinja/models/forms.py +499 -0
  113. flowtask/components/NetworkNinja/models/market.py +16 -0
  114. flowtask/components/NetworkNinja/models/organization.py +34 -0
  115. flowtask/components/NetworkNinja/models/photos.py +125 -0
  116. flowtask/components/NetworkNinja/models/project.py +44 -0
  117. flowtask/components/NetworkNinja/models/region.py +28 -0
  118. flowtask/components/NetworkNinja/models/store.py +203 -0
  119. flowtask/components/NetworkNinja/models/user.py +151 -0
  120. flowtask/components/NetworkNinja/router.py +854 -0
  121. flowtask/components/Odoo.py +175 -0
  122. flowtask/components/OdooInjector.py +192 -0
  123. flowtask/components/OpenFromXML.py +126 -0
  124. flowtask/components/OpenWeather.py +41 -0
  125. flowtask/components/OpenWithBase.py +616 -0
  126. flowtask/components/OpenWithPandas.py +715 -0
  127. flowtask/components/PGPDecrypt.py +199 -0
  128. flowtask/components/PandasIterator.py +187 -0
  129. flowtask/components/PandasToFile.py +189 -0
  130. flowtask/components/Paradox.py +339 -0
  131. flowtask/components/ParamIterator.py +117 -0
  132. flowtask/components/ParseHTML.py +84 -0
  133. flowtask/components/PlacerStores.py +249 -0
  134. flowtask/components/Pokemon.py +507 -0
  135. flowtask/components/PositiveBot.py +62 -0
  136. flowtask/components/PowerPointSlide.py +400 -0
  137. flowtask/components/PrintMessage.py +127 -0
  138. flowtask/components/ProductCompetitors/__init__.py +5 -0
  139. flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
  140. flowtask/components/ProductCompetitors/parsers/base.py +72 -0
  141. flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
  142. flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
  143. flowtask/components/ProductCompetitors/scrapper.py +155 -0
  144. flowtask/components/ProductCompliant.py +169 -0
  145. flowtask/components/ProductInfo/__init__.py +1 -0
  146. flowtask/components/ProductInfo/parsers/__init__.py +5 -0
  147. flowtask/components/ProductInfo/parsers/base.py +83 -0
  148. flowtask/components/ProductInfo/parsers/brother.py +97 -0
  149. flowtask/components/ProductInfo/parsers/canon.py +167 -0
  150. flowtask/components/ProductInfo/parsers/epson.py +118 -0
  151. flowtask/components/ProductInfo/parsers/hp.py +131 -0
  152. flowtask/components/ProductInfo/parsers/samsung.py +97 -0
  153. flowtask/components/ProductInfo/scraper.py +319 -0
  154. flowtask/components/ProductPricing.py +118 -0
  155. flowtask/components/QS.py +261 -0
  156. flowtask/components/QSBase.py +201 -0
  157. flowtask/components/QueryIterator.py +273 -0
  158. flowtask/components/QueryToInsert.py +327 -0
  159. flowtask/components/QueryToPandas.py +432 -0
  160. flowtask/components/RESTClient.py +195 -0
  161. flowtask/components/RethinkDBQuery.py +189 -0
  162. flowtask/components/Rsync.py +74 -0
  163. flowtask/components/RunSSH.py +59 -0
  164. flowtask/components/RunShell.py +71 -0
  165. flowtask/components/SalesForce.py +20 -0
  166. flowtask/components/SaveImageBank/__init__.py +257 -0
  167. flowtask/components/SchedulingVisits.py +592 -0
  168. flowtask/components/ScrapPage.py +216 -0
  169. flowtask/components/ScrapSearch.py +79 -0
  170. flowtask/components/SendNotify.py +257 -0
  171. flowtask/components/SentimentAnalysis.py +694 -0
  172. flowtask/components/ServiceScrapper/__init__.py +5 -0
  173. flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
  174. flowtask/components/ServiceScrapper/parsers/base.py +94 -0
  175. flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
  176. flowtask/components/ServiceScrapper/scrapper.py +199 -0
  177. flowtask/components/SetVariables.py +156 -0
  178. flowtask/components/SubTask.py +182 -0
  179. flowtask/components/SuiteCRM.py +48 -0
  180. flowtask/components/Switch.py +175 -0
  181. flowtask/components/TableBase.py +148 -0
  182. flowtask/components/TableDelete.py +312 -0
  183. flowtask/components/TableInput.py +143 -0
  184. flowtask/components/TableOutput/TableOutput.py +384 -0
  185. flowtask/components/TableOutput/__init__.py +3 -0
  186. flowtask/components/TableSchema.py +534 -0
  187. flowtask/components/Target.py +223 -0
  188. flowtask/components/ThumbnailGenerator.py +156 -0
  189. flowtask/components/ToPandas.py +67 -0
  190. flowtask/components/TransformRows/TransformRows.py +507 -0
  191. flowtask/components/TransformRows/__init__.py +9 -0
  192. flowtask/components/TransformRows/functions.py +559 -0
  193. flowtask/components/TransposeRows.py +176 -0
  194. flowtask/components/UPCDatabase.py +86 -0
  195. flowtask/components/UnGzip.py +171 -0
  196. flowtask/components/Uncompress.py +172 -0
  197. flowtask/components/UniqueRows.py +126 -0
  198. flowtask/components/Unzip.py +107 -0
  199. flowtask/components/UpdateOperationalVars.py +147 -0
  200. flowtask/components/UploadTo.py +299 -0
  201. flowtask/components/UploadToS3.py +136 -0
  202. flowtask/components/UploadToSFTP.py +160 -0
  203. flowtask/components/UploadToSharepoint.py +205 -0
  204. flowtask/components/UserFunc.py +122 -0
  205. flowtask/components/VivaTracker.py +140 -0
  206. flowtask/components/WSDLClient.py +123 -0
  207. flowtask/components/Wait.py +18 -0
  208. flowtask/components/Walmart.py +199 -0
  209. flowtask/components/Workplace.py +134 -0
  210. flowtask/components/XMLToPandas.py +267 -0
  211. flowtask/components/Zammad/__init__.py +41 -0
  212. flowtask/components/Zammad/models.py +0 -0
  213. flowtask/components/ZoomInfoScraper.py +409 -0
  214. flowtask/components/__init__.py +104 -0
  215. flowtask/components/abstract.py +18 -0
  216. flowtask/components/flow.py +530 -0
  217. flowtask/components/google.py +335 -0
  218. flowtask/components/group.py +221 -0
  219. flowtask/components/py.typed +0 -0
  220. flowtask/components/reviewscrap.py +132 -0
  221. flowtask/components/tAutoincrement.py +117 -0
  222. flowtask/components/tConcat.py +109 -0
  223. flowtask/components/tExplode.py +119 -0
  224. flowtask/components/tFilter.py +184 -0
  225. flowtask/components/tGroup.py +236 -0
  226. flowtask/components/tJoin.py +270 -0
  227. flowtask/components/tMap/__init__.py +9 -0
  228. flowtask/components/tMap/functions.py +54 -0
  229. flowtask/components/tMap/tMap.py +450 -0
  230. flowtask/components/tMelt.py +112 -0
  231. flowtask/components/tMerge.py +114 -0
  232. flowtask/components/tOrder.py +93 -0
  233. flowtask/components/tPandas.py +94 -0
  234. flowtask/components/tPivot.py +71 -0
  235. flowtask/components/tPluckCols.py +76 -0
  236. flowtask/components/tUnnest.py +82 -0
  237. flowtask/components/user.py +401 -0
  238. flowtask/conf.py +457 -0
  239. flowtask/download.py +102 -0
  240. flowtask/events/__init__.py +11 -0
  241. flowtask/events/events/__init__.py +20 -0
  242. flowtask/events/events/abstract.py +95 -0
  243. flowtask/events/events/alerts/__init__.py +362 -0
  244. flowtask/events/events/alerts/colfunctions.py +131 -0
  245. flowtask/events/events/alerts/functions.py +158 -0
  246. flowtask/events/events/dummy.py +12 -0
  247. flowtask/events/events/exec.py +124 -0
  248. flowtask/events/events/file/__init__.py +7 -0
  249. flowtask/events/events/file/base.py +51 -0
  250. flowtask/events/events/file/copy.py +23 -0
  251. flowtask/events/events/file/delete.py +16 -0
  252. flowtask/events/events/interfaces/__init__.py +9 -0
  253. flowtask/events/events/interfaces/client.py +67 -0
  254. flowtask/events/events/interfaces/credentials.py +28 -0
  255. flowtask/events/events/interfaces/notifications.py +58 -0
  256. flowtask/events/events/jira.py +122 -0
  257. flowtask/events/events/log.py +26 -0
  258. flowtask/events/events/logerr.py +52 -0
  259. flowtask/events/events/notify.py +59 -0
  260. flowtask/events/events/notify_event.py +160 -0
  261. flowtask/events/events/publish.py +54 -0
  262. flowtask/events/events/sendfile.py +104 -0
  263. flowtask/events/events/task.py +97 -0
  264. flowtask/events/events/teams.py +98 -0
  265. flowtask/events/events/webhook.py +58 -0
  266. flowtask/events/manager.py +287 -0
  267. flowtask/exceptions.c +39393 -0
  268. flowtask/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
  269. flowtask/extensions/__init__.py +3 -0
  270. flowtask/extensions/abstract.py +82 -0
  271. flowtask/extensions/logging/__init__.py +65 -0
  272. flowtask/hooks/__init__.py +9 -0
  273. flowtask/hooks/actions/__init__.py +22 -0
  274. flowtask/hooks/actions/abstract.py +66 -0
  275. flowtask/hooks/actions/dummy.py +23 -0
  276. flowtask/hooks/actions/jira.py +74 -0
  277. flowtask/hooks/actions/rest.py +320 -0
  278. flowtask/hooks/actions/sampledata.py +37 -0
  279. flowtask/hooks/actions/sensor.py +23 -0
  280. flowtask/hooks/actions/task.py +9 -0
  281. flowtask/hooks/actions/ticket.py +37 -0
  282. flowtask/hooks/actions/zammad.py +55 -0
  283. flowtask/hooks/hook.py +62 -0
  284. flowtask/hooks/models.py +17 -0
  285. flowtask/hooks/service.py +187 -0
  286. flowtask/hooks/step.py +91 -0
  287. flowtask/hooks/types/__init__.py +23 -0
  288. flowtask/hooks/types/base.py +129 -0
  289. flowtask/hooks/types/brokers/__init__.py +11 -0
  290. flowtask/hooks/types/brokers/base.py +54 -0
  291. flowtask/hooks/types/brokers/mqtt.py +35 -0
  292. flowtask/hooks/types/brokers/rabbitmq.py +82 -0
  293. flowtask/hooks/types/brokers/redis.py +83 -0
  294. flowtask/hooks/types/brokers/sqs.py +44 -0
  295. flowtask/hooks/types/fs.py +232 -0
  296. flowtask/hooks/types/http.py +49 -0
  297. flowtask/hooks/types/imap.py +200 -0
  298. flowtask/hooks/types/jira.py +279 -0
  299. flowtask/hooks/types/mail.py +205 -0
  300. flowtask/hooks/types/postgres.py +98 -0
  301. flowtask/hooks/types/responses/__init__.py +8 -0
  302. flowtask/hooks/types/responses/base.py +5 -0
  303. flowtask/hooks/types/sharepoint.py +288 -0
  304. flowtask/hooks/types/ssh.py +141 -0
  305. flowtask/hooks/types/tagged.py +59 -0
  306. flowtask/hooks/types/upload.py +85 -0
  307. flowtask/hooks/types/watch.py +71 -0
  308. flowtask/hooks/types/web.py +36 -0
  309. flowtask/interfaces/AzureClient.py +137 -0
  310. flowtask/interfaces/AzureGraph.py +839 -0
  311. flowtask/interfaces/Boto3Client.py +326 -0
  312. flowtask/interfaces/DropboxClient.py +173 -0
  313. flowtask/interfaces/ExcelHandler.py +94 -0
  314. flowtask/interfaces/FTPClient.py +131 -0
  315. flowtask/interfaces/GoogleCalendar.py +201 -0
  316. flowtask/interfaces/GoogleClient.py +133 -0
  317. flowtask/interfaces/GoogleDrive.py +127 -0
  318. flowtask/interfaces/GoogleGCS.py +89 -0
  319. flowtask/interfaces/GoogleGeocoding.py +93 -0
  320. flowtask/interfaces/GoogleLang.py +114 -0
  321. flowtask/interfaces/GooglePub.py +61 -0
  322. flowtask/interfaces/GoogleSheet.py +68 -0
  323. flowtask/interfaces/IMAPClient.py +137 -0
  324. flowtask/interfaces/O365Calendar.py +113 -0
  325. flowtask/interfaces/O365Client.py +220 -0
  326. flowtask/interfaces/OneDrive.py +284 -0
  327. flowtask/interfaces/Outlook.py +155 -0
  328. flowtask/interfaces/ParrotBot.py +130 -0
  329. flowtask/interfaces/SSHClient.py +378 -0
  330. flowtask/interfaces/Sharepoint.py +496 -0
  331. flowtask/interfaces/__init__.py +36 -0
  332. flowtask/interfaces/azureauth.py +119 -0
  333. flowtask/interfaces/cache.py +201 -0
  334. flowtask/interfaces/client.py +82 -0
  335. flowtask/interfaces/compress.py +525 -0
  336. flowtask/interfaces/credentials.py +124 -0
  337. flowtask/interfaces/d2l.py +239 -0
  338. flowtask/interfaces/databases/__init__.py +5 -0
  339. flowtask/interfaces/databases/db.py +223 -0
  340. flowtask/interfaces/databases/documentdb.py +55 -0
  341. flowtask/interfaces/databases/rethink.py +39 -0
  342. flowtask/interfaces/dataframes/__init__.py +11 -0
  343. flowtask/interfaces/dataframes/abstract.py +21 -0
  344. flowtask/interfaces/dataframes/arrow.py +71 -0
  345. flowtask/interfaces/dataframes/dt.py +69 -0
  346. flowtask/interfaces/dataframes/pandas.py +167 -0
  347. flowtask/interfaces/dataframes/polars.py +60 -0
  348. flowtask/interfaces/db.py +263 -0
  349. flowtask/interfaces/env.py +46 -0
  350. flowtask/interfaces/func.py +137 -0
  351. flowtask/interfaces/http.py +1780 -0
  352. flowtask/interfaces/locale.py +40 -0
  353. flowtask/interfaces/log.py +75 -0
  354. flowtask/interfaces/mask.py +143 -0
  355. flowtask/interfaces/notification.py +154 -0
  356. flowtask/interfaces/playwright.py +339 -0
  357. flowtask/interfaces/powerpoint.py +368 -0
  358. flowtask/interfaces/py.typed +0 -0
  359. flowtask/interfaces/qs.py +376 -0
  360. flowtask/interfaces/result.py +87 -0
  361. flowtask/interfaces/selenium_service.py +779 -0
  362. flowtask/interfaces/smartsheet.py +154 -0
  363. flowtask/interfaces/stat.py +39 -0
  364. flowtask/interfaces/task.py +96 -0
  365. flowtask/interfaces/template.py +118 -0
  366. flowtask/interfaces/vectorstores/__init__.py +1 -0
  367. flowtask/interfaces/vectorstores/abstract.py +133 -0
  368. flowtask/interfaces/vectorstores/milvus.py +669 -0
  369. flowtask/interfaces/zammad.py +107 -0
  370. flowtask/models.py +193 -0
  371. flowtask/parsers/__init__.py +15 -0
  372. flowtask/parsers/_yaml.c +11978 -0
  373. flowtask/parsers/_yaml.cpython-310-x86_64-linux-gnu.so +0 -0
  374. flowtask/parsers/argparser.py +235 -0
  375. flowtask/parsers/base.c +15155 -0
  376. flowtask/parsers/base.cpython-310-x86_64-linux-gnu.so +0 -0
  377. flowtask/parsers/json.c +11968 -0
  378. flowtask/parsers/json.cpython-310-x86_64-linux-gnu.so +0 -0
  379. flowtask/parsers/maps.py +49 -0
  380. flowtask/parsers/toml.c +11968 -0
  381. flowtask/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
  382. flowtask/plugins/__init__.py +16 -0
  383. flowtask/plugins/components/__init__.py +0 -0
  384. flowtask/plugins/handler/__init__.py +45 -0
  385. flowtask/plugins/importer.py +31 -0
  386. flowtask/plugins/sources/__init__.py +0 -0
  387. flowtask/runner.py +283 -0
  388. flowtask/scheduler/__init__.py +9 -0
  389. flowtask/scheduler/functions.py +493 -0
  390. flowtask/scheduler/handlers/__init__.py +8 -0
  391. flowtask/scheduler/handlers/manager.py +504 -0
  392. flowtask/scheduler/handlers/models.py +58 -0
  393. flowtask/scheduler/handlers/service.py +72 -0
  394. flowtask/scheduler/notifications.py +65 -0
  395. flowtask/scheduler/scheduler.py +993 -0
  396. flowtask/services/__init__.py +0 -0
  397. flowtask/services/bots/__init__.py +0 -0
  398. flowtask/services/bots/telegram.py +264 -0
  399. flowtask/services/files/__init__.py +11 -0
  400. flowtask/services/files/manager.py +522 -0
  401. flowtask/services/files/model.py +37 -0
  402. flowtask/services/files/service.py +767 -0
  403. flowtask/services/jira/__init__.py +3 -0
  404. flowtask/services/jira/jira_actions.py +191 -0
  405. flowtask/services/tasks/__init__.py +13 -0
  406. flowtask/services/tasks/launcher.py +213 -0
  407. flowtask/services/tasks/manager.py +323 -0
  408. flowtask/services/tasks/service.py +275 -0
  409. flowtask/services/tasks/task_manager.py +376 -0
  410. flowtask/services/tasks/tasks.py +155 -0
  411. flowtask/storages/__init__.py +16 -0
  412. flowtask/storages/exceptions.py +12 -0
  413. flowtask/storages/files/__init__.py +8 -0
  414. flowtask/storages/files/abstract.py +29 -0
  415. flowtask/storages/files/filesystem.py +66 -0
  416. flowtask/storages/tasks/__init__.py +19 -0
  417. flowtask/storages/tasks/abstract.py +26 -0
  418. flowtask/storages/tasks/database.py +33 -0
  419. flowtask/storages/tasks/filesystem.py +108 -0
  420. flowtask/storages/tasks/github.py +119 -0
  421. flowtask/storages/tasks/memory.py +45 -0
  422. flowtask/storages/tasks/row.py +25 -0
  423. flowtask/tasks/__init__.py +0 -0
  424. flowtask/tasks/abstract.py +526 -0
  425. flowtask/tasks/command.py +118 -0
  426. flowtask/tasks/pile.py +486 -0
  427. flowtask/tasks/py.typed +0 -0
  428. flowtask/tasks/task.py +778 -0
  429. flowtask/template/__init__.py +161 -0
  430. flowtask/tests.py +257 -0
  431. flowtask/types/__init__.py +8 -0
  432. flowtask/types/typedefs.c +11347 -0
  433. flowtask/types/typedefs.cpython-310-x86_64-linux-gnu.so +0 -0
  434. flowtask/utils/__init__.py +24 -0
  435. flowtask/utils/constants.py +117 -0
  436. flowtask/utils/encoders.py +21 -0
  437. flowtask/utils/executor.py +112 -0
  438. flowtask/utils/functions.cpp +14280 -0
  439. flowtask/utils/functions.cpython-310-x86_64-linux-gnu.so +0 -0
  440. flowtask/utils/json.cpp +13349 -0
  441. flowtask/utils/json.cpython-310-x86_64-linux-gnu.so +0 -0
  442. flowtask/utils/mail.py +63 -0
  443. flowtask/utils/parseqs.c +13324 -0
  444. flowtask/utils/parserqs.cpython-310-x86_64-linux-gnu.so +0 -0
  445. flowtask/utils/stats.py +308 -0
  446. flowtask/utils/transformations.py +74 -0
  447. flowtask/utils/uv.py +12 -0
  448. flowtask/utils/validators.py +97 -0
  449. flowtask/version.py +11 -0
  450. flowtask-5.8.4.dist-info/LICENSE +201 -0
  451. flowtask-5.8.4.dist-info/METADATA +209 -0
  452. flowtask-5.8.4.dist-info/RECORD +470 -0
  453. flowtask-5.8.4.dist-info/WHEEL +6 -0
  454. flowtask-5.8.4.dist-info/entry_points.txt +3 -0
  455. flowtask-5.8.4.dist-info/top_level.txt +2 -0
  456. plugins/components/CreateQR.py +39 -0
  457. plugins/components/TestComponent.py +28 -0
  458. plugins/components/Use1.py +13 -0
  459. plugins/components/Workplace.py +117 -0
  460. plugins/components/__init__.py +3 -0
  461. plugins/sources/__init__.py +0 -0
  462. plugins/sources/get_populartimes.py +78 -0
  463. plugins/sources/google.py +150 -0
  464. plugins/sources/hubspot.py +679 -0
  465. plugins/sources/icims.py +679 -0
  466. plugins/sources/mobileinsight.py +501 -0
  467. plugins/sources/newrelic.py +262 -0
  468. plugins/sources/uap.py +268 -0
  469. plugins/sources/venu.py +244 -0
  470. plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,609 @@
1
+ import re
2
+ import asyncio
3
+ import multiprocessing
4
+ import gc
5
+ import csv
6
+ from decimal import Decimal
7
+ import datetime
8
+ from io import BytesIO
9
+ from typing import Sequence, Union
10
+ import pandas as pd
11
+ from pandas.api.types import (
12
+ is_datetime64_any_dtype,
13
+ is_datetime64tz_dtype,
14
+ is_integer_dtype,
15
+ is_float_dtype,
16
+ is_bool_dtype
17
+ )
18
+ import numpy as np
19
+ import orjson
20
+ import asyncpg
21
+ from asyncpg.exceptions import (
22
+ StringDataRightTruncationError,
23
+ UniqueViolationError,
24
+ ForeignKeyViolationError,
25
+ NotNullViolationError,
26
+ )
27
+ from pgvector.asyncpg import register_vector
28
+ from asyncdb.exceptions import StatementError, DataError
29
+ from asyncdb.models import Model
30
+ # Dataintegration components:
31
+ from ..exceptions import (
32
+ ComponentError,
33
+ DataNotFound,
34
+ )
35
+ from .CopyTo import CopyTo, dtypes
36
+ from ..utils.json import json_decoder, json_encoder
37
+
38
+
39
+ # adding support for primary keys on raw tables
40
+ pk_sentence = """ALTER TABLE {schema}.{table}
41
+ ADD CONSTRAINT {schema}_{table}_pkey PRIMARY KEY({fields});
42
+
43
+ Example:
44
+
45
+ ```yaml
46
+ CopyToPg:
47
+ tablename: employees
48
+ schema: bacardi
49
+ truncate: true
50
+ ```
51
+
52
+ """
53
+ unique_sentence = """ALTER TABLE {schema}.{table}
54
+ ADD CONSTRAINT unq_{schema}_{table} UNIQUE({fields});"""
55
+
56
+
57
+ class CopyToPg(CopyTo):
58
+ """
59
+ CopyToPg
60
+
61
+ This component allows copy data into a Postgres table,
62
+ Copy into main postgres using copy_to_table functionality.
63
+ TODO: Design an Upsert feature with Copy to Pg.
64
+ .. table:: Properties
65
+ :widths: auto
66
+
67
+ +----------------+----------+----------------------------------------------------------------------------------+
68
+ | Name | Required | Summary |
69
+ +----------------+----------+----------------------------------------------------------------------------------+
70
+ | schema | Yes | Name of the schema where the table resides. |
71
+ +----------------+----------+----------------------------------------------------------------------------------+
72
+ | tablename | Yes | Name of the table to insert data into. |
73
+ +----------------+----------+----------------------------------------------------------------------------------+
74
+ | truncate | No | Boolean flag indicating whether to truncate the table before inserting. |
75
+ | | | Defaults to False. |
76
+ +----------------+----------+----------------------------------------------------------------------------------+
77
+ | use_chunks | No | Boolean flag indicating whether to insert data in chunks (for large datasets). |
78
+ | | | Defaults to False. |
79
+ | | | Requires specifying a `chunksize` property for chunk size determination. |
80
+ +----------------+----------+----------------------------------------------------------------------------------+
81
+ | chunksize | No | Integer value specifying the size of each data chunk when `use_chunks` is True. |
82
+ | | | Defaults to None (chunk size will be calculated based on CPU cores). |
83
+ +----------------+----------+----------------------------------------------------------------------------------+
84
+ | use_buffer | No | Boolean flag indicating whether to use a buffer for data insertion (optional). |
85
+ | | | Defaults to False. |
86
+ | | | Using a buffer can improve performance for large datasets. |
87
+ +----------------+----------+----------------------------------------------------------------------------------+
88
+ | array_columns | No | List of column names containing JSON arrays. These columns will be formatted |
89
+ | | | appropriately before insertion. |
90
+ | | | Requires `use_buffer` to be True. |
91
+ +----------------+----------+----------------------------------------------------------------------------------+
92
+ | use_quoting | No | Boolean flag indicating whether to use quoting for CSV data insertion (optional).|
93
+ | | | Defaults to False. |
94
+ | | | Using quoting can be helpful for data containing special characters. |
95
+ +----------------+----------+----------------------------------------------------------------------------------+
96
+ | datasource | No | Using a Datasource instead manual credentials |
97
+ +----------------+----------+----------------------------------------------------------------------------------+
98
+ | credentials | No | Supporting manual postgresql credentials |
99
+ +----------------+----------+----------------------------------------------------------------------------------+
100
+
101
+ Returns a dictionary containing metrics about the copy operation:
102
+ * ROWS_SAVED (int): The number of rows successfully inserted into the target table.
103
+ * NUM_ROWS (int): The total number of rows processed from the input data.
104
+ * NUM_COLUMNS (int): The number of columns found in the input data.
105
+ * (optional): Other metrics specific to the implementation.
106
+ """
107
+
108
+ async def paralelize_insert(self, columns, tuples):
109
+ result = False
110
+ try:
111
+ result = await self._connection.copy_into_table(
112
+ table=self.tablename,
113
+ schema=self.schema,
114
+ source=tuples,
115
+ columns=columns
116
+ )
117
+ return result
118
+ except StatementError as err:
119
+ self._logger.exception(
120
+ f"Statement Error: {err}",
121
+ stack_info=True
122
+ )
123
+ except DataError as err:
124
+ self._logger.exception(
125
+ f"Data Error: {err}",
126
+ stack_info=True
127
+ )
128
+ except Exception as err:
129
+ self._logger.exception(
130
+ f"Pg Error: {err}",
131
+ stack_info=True
132
+ )
133
+
134
+ def extract_copied(self, result: Union[str, Sequence[str], None]) -> int:
135
+ if result is None:
136
+ return 0
137
+ if isinstance(result, str):
138
+ try:
139
+ return int(re.findall(r"\bCOPY\s(\d+)", result)[0])
140
+ except Exception as err:
141
+ self._logger.error(str(err))
142
+ # iterable of results (atomic, row-by-row mode)
143
+ total = 0
144
+ for item in result:
145
+ if not item: # skip None/empty
146
+ continue
147
+ m = re.search(r"\bCOPY\s+(\d+)", item)
148
+ if m:
149
+ total += int(m.group(1))
150
+ return total
151
+
152
+ async def _create_table(self):
153
+ _pk = self.create_table.get("pk", None)
154
+ _unq = self.create_table.get("unique", None)
155
+ _drop = self.create_table.get("drop", False)
156
+ if _pk is None:
157
+ raise ComponentError(
158
+ f"Error creating table: {self.schema}.{self.tablename}: PK not defined."
159
+ )
160
+ # extracting columns:
161
+ columns = self.data.columns.tolist()
162
+ cols = []
163
+ for col in columns:
164
+ datatype = self.data.dtypes[col]
165
+ try:
166
+ t = dtypes[str(datatype)]
167
+ except KeyError:
168
+ t = str
169
+ f = (col, t)
170
+ cols.append(f)
171
+ try:
172
+ cls = Model.make_model(
173
+ name=self.tablename, schema=self.schema, fields=cols
174
+ )
175
+ mdl = cls() # empty model, I only need the schema
176
+ if sql := mdl.model(dialect="sql"):
177
+ print("SQL IS ", sql)
178
+ async with await self._connection.connection() as conn:
179
+ if _drop is True:
180
+ result, error = await conn.execute(
181
+ sentence=f"DROP TABLE IF EXISTS {self.schema}.{self.tablename};"
182
+ )
183
+ self._logger.debug(f"DROP Table: {result}, {error}")
184
+ result, error = await conn.execute(sentence=sql)
185
+ self._logger.debug(f"Create Table: {result!s}")
186
+ if error:
187
+ raise ComponentError(
188
+ f"Error on Table creation: {error}"
189
+ )
190
+ ## Add Primary Key(s):
191
+ pk = pk_sentence.format(
192
+ schema=self.schema,
193
+ table=self.tablename,
194
+ fields=",".join(_pk),
195
+ )
196
+ _primary, error = await conn.execute(sentence=pk)
197
+ self._logger.debug(
198
+ f"Create Table: PK creation: {_primary}, {error}"
199
+ )
200
+ ## Add Unique (if required):
201
+ if _unq is not None:
202
+ unique = unique_sentence.format(
203
+ schema=self.schema,
204
+ table=self.tablename,
205
+ fields=",".join(_unq),
206
+ )
207
+ _unique, error = await conn.execute(sentence=unique)
208
+ self._logger.debug(
209
+ f"Create Table: Unique creation: {_unique}, {error}"
210
+ )
211
+ except Exception as err:
212
+ raise ComponentError(
213
+ f"CopyToPg: Error on Table Creation {err}"
214
+ ) from err
215
+
216
+ async def _truncate_table(self):
217
+ # ---- SELECT pg_advisory_xact_lock(1);
218
+ truncate = """TRUNCATE {}.{};"""
219
+ truncate = truncate.format(self.schema, self.tablename)
220
+ retry_count = 0
221
+ max_retries = 2
222
+ while retry_count <= max_retries:
223
+ try:
224
+ async with await self._connection.connection() as conn:
225
+ result, error = await conn.execute(truncate)
226
+ if error is not None:
227
+ raise ComponentError(
228
+ f"CopyToPg Error truncating {self.schema}.{self.tablename}: {error}"
229
+ )
230
+ await conn.execute(
231
+ "SELECT pg_advisory_unlock_all();"
232
+ )
233
+ self._logger.debug(
234
+ f"COPYTOPG TRUNCATED: {result}"
235
+ )
236
+ await asyncio.sleep(5e-3)
237
+ break # exit loop
238
+ except (asyncpg.exceptions.QueryCanceledError, StatementError) as e:
239
+ if "canceling statement due to statement timeout" in str(e) or "another operation is in progress" in str(e): # noqa
240
+ retry_count += 1
241
+ self._logger.warning(
242
+ f"CopyToPg Error: {str(e)}, Retrying... {retry_count}/{max_retries}"
243
+ )
244
+ if retry_count > max_retries:
245
+ raise ComponentError(
246
+ f"CopyToPg Error: {str(e)}, Max Retries reached"
247
+ ) from e
248
+ else:
249
+ # Create a new connection an wait until repeat operation:
250
+ self._connection = await self.create_connection(
251
+ driver='pg'
252
+ )
253
+ await asyncio.sleep(2)
254
+
255
+ async def _get_column_types(self, conn):
256
+ """
257
+ Get the PostgreSQL column types for the target table.
258
+ Returns a dictionary mapping column names to their PostgreSQL types.
259
+ """
260
+ try:
261
+ engine = conn.engine()
262
+ # LIMIT 0 forces Postgres to return only the RowDescription
263
+ stmt = await engine.prepare(
264
+ f'SELECT * FROM {self.schema}.{self.tablename} LIMIT 0'
265
+ )
266
+ columns = []
267
+ for attr in stmt.get_attributes(): # tuple of Attribute objects
268
+ columns.append({
269
+ "name": attr.name, # column name
270
+ "pg_oid": attr.type.oid, # numeric OID
271
+ "pg_type": attr.type.name, # text type name, e.g. "timestamp"
272
+ "schema": attr.type.schema # type’s schema, e.g. "pg_catalog"
273
+ # attr.is_nullable, attr.is_array … also available in ≥0.29
274
+ })
275
+ return columns
276
+ except Exception as e:
277
+ self._logger.error(f"Error getting column types: {e}")
278
+ return {}
279
+
280
+ async def _copy_dataframe(self):
281
+ # insert data directly into table
282
+ columns = list(self.data.columns)
283
+
284
+ if self.data.empty:
285
+ self.logger.info("Dataframe is empty, nothing to copy")
286
+ return True
287
+
288
+ if hasattr(self, "use_chunks") and self.use_chunks is True:
289
+ self._logger.debug(":: Saving data using Chunks ::")
290
+ # TODO: paralelize CHUNKS
291
+ # calculate the chunk size as an integer
292
+ if not self.chunksize:
293
+ num_cores = multiprocessing.cpu_count()
294
+ chunk_size = int(self.data.shape[0] / num_cores) - 1
295
+ else:
296
+ chunk_size = self.chunksize
297
+ if chunk_size == 0:
298
+ raise ComponentError(
299
+ "CopyToPG: Wrong ChunkSize or Empty Dataframe"
300
+ )
301
+ chunks = (
302
+ self.data.loc[self.data.index[i: i + chunk_size]]
303
+ for i in range(0, self.data.shape[0], chunk_size)
304
+ )
305
+ count = 0
306
+ numrows = 0
307
+ for chunk in chunks:
308
+ self._logger.debug(f"Iteration {count}")
309
+ s_buf = BytesIO()
310
+ chunk.to_csv(s_buf, index=None, header=None)
311
+ s_buf.seek(0)
312
+ try:
313
+ async with await self._connection.connection() as conn:
314
+ result = await conn.engine().copy_to_table(
315
+ table_name=self.tablename,
316
+ schema_name=self.schema,
317
+ source=s_buf,
318
+ columns=columns,
319
+ format="csv",
320
+ )
321
+ rows = self.extract_copied(result)
322
+ numrows += rows
323
+ count += 1
324
+ except StatementError as err:
325
+ self._logger.error(f"Statement Error: {err}")
326
+ continue
327
+ except DataError as err:
328
+ self._logger.error(f"Data Error: {err}")
329
+ continue
330
+ await asyncio.sleep(5e-3)
331
+ self.add_metric("ROWS_SAVED", numrows)
332
+ else:
333
+ try:
334
+ result = None
335
+ # insert data directly into table
336
+ if hasattr(self, "use_buffer"):
337
+ if hasattr(self, "array_columns"):
338
+ for col in self.array_columns:
339
+ # self.data[col].notna()
340
+ self.data[col] = self.data[col].apply(
341
+ lambda x: "{"
342
+ + ",".join("'" + str(i) + "'" for i in x)
343
+ + "}"
344
+ if isinstance(x, (list, tuple)) and len(x) > 0
345
+ else np.nan
346
+ )
347
+ s_buf = BytesIO()
348
+ kw = {}
349
+ if hasattr(self, "use_quoting"):
350
+ kw = {"quoting": csv.QUOTE_NONNUMERIC}
351
+ self.data.to_csv(s_buf, index=None, header=None, **kw)
352
+ s_buf.seek(0)
353
+ if hasattr(self, "clean_df"):
354
+ del self.data
355
+ gc.collect()
356
+ self.data = pd.DataFrame()
357
+ async with await self._connection.connection() as conn:
358
+ try:
359
+ await conn.engine().set_type_codec(
360
+ "json",
361
+ encoder=orjson.dumps,
362
+ decoder=orjson.loads,
363
+ schema="pg_catalog",
364
+ )
365
+ await conn.engine().set_type_codec(
366
+ "jsonb",
367
+ encoder=orjson.dumps,
368
+ decoder=orjson.loads,
369
+ schema="pg_catalog",
370
+ format="binary",
371
+ )
372
+ await register_vector(conn.engine())
373
+ # Saving as CSV to the table
374
+ result = await conn.engine().copy_to_table(
375
+ table_name=self.tablename,
376
+ schema_name=self.schema,
377
+ source=s_buf,
378
+ columns=columns,
379
+ format="csv",
380
+ )
381
+ except (
382
+ StringDataRightTruncationError,
383
+ ForeignKeyViolationError,
384
+ NotNullViolationError,
385
+ UniqueViolationError,
386
+ ) as exc:
387
+ try:
388
+ column = exc.column_name
389
+ except AttributeError:
390
+ column = None
391
+ raise DataError(
392
+ f"Error: {exc}, details: {exc.detail}, column: {column}"
393
+ ) from exc
394
+ except asyncpg.exceptions.DataError as e:
395
+ print(f"Error message: {e}")
396
+ raise DataError(str(e)) from e
397
+ else:
398
+ # --- convert pd.NA → None in any nullable *numeric* (or boolean) column ----
399
+ num_like = self.data.select_dtypes(
400
+ include=[
401
+ "Int8", "Int16", "Int32", "Int64",
402
+ "UInt8", "UInt16", "UInt32", "UInt64",
403
+ "Float32", "Float64",
404
+ "boolean" # pandas’ nullable BooleanDtype
405
+ ]
406
+ )
407
+
408
+ if not num_like.empty:
409
+ # cast to 'object' so the column can hold mixed Python objects,
410
+ # then replace every missing value with None
411
+ self.data[num_like.columns] = (
412
+ num_like.astype(object)
413
+ .where(pd.notnull(num_like), None) # pd.NA / NaN → None
414
+ )
415
+
416
+ # can remove NAT from str fields:
417
+ u = self.data.select_dtypes(include=["string"])
418
+ if not u.empty:
419
+ self.data[u.columns] = u.astype(object).where(
420
+ pd.notnull(u), None
421
+ )
422
+ self.data = (
423
+ self.data
424
+ .where(pd.notnull(self.data), None) # nulls → None
425
+ .convert_dtypes() # uses pandas’ logical dtypes
426
+ .astype({c: 'string' for c in u})
427
+ )
428
+
429
+ async with await self._connection.connection() as conn:
430
+ # Get PostgreSQL column types
431
+ pg_column_types = await self._get_column_types(conn)
432
+ # TODO: using the column types to refine the conversion
433
+
434
+ # Handle datetime columns - replace NaT with None
435
+ datetime_cols = self.data.select_dtypes(include=['datetime64[ns]', 'datetime64[ns, UTC]'])
436
+ if not datetime_cols.empty:
437
+ self.data[datetime_cols.columns] = self.data[datetime_cols.columns].astype(object).where(
438
+ pd.notnull(datetime_cols), None
439
+ )
440
+
441
+ if self._naive_columns:
442
+ # Remove the timezone on tz-naive columns:
443
+ for col in self._naive_columns:
444
+ if col not in self.data.columns:
445
+ continue
446
+
447
+ # If the column is not datetime-like, coerce it first (optional)
448
+ if not is_datetime64_any_dtype(self.data[col]):
449
+ self.data[col] = pd.to_datetime(
450
+ self.data[col],
451
+ errors="coerce", # bad values → NaT
452
+ utc=True # parse as UTC if a tz string is present
453
+ )
454
+ # After the coercion, act only on tz-aware columns
455
+ if is_datetime64tz_dtype(self.data[col]):
456
+ # tz_localize(None) drops the tz info but keeps the *wall time*
457
+ # e.g. 2025-03-20 19:11:09+00:00 → 2025-03-20 19:11:09
458
+ self.data[col] = self.data[col].dt.tz_localize(None)
459
+ if self._json_columns:
460
+ for col in self._json_columns:
461
+ if col in self.data.columns:
462
+ # First convert any None values to empty dicts/lists as appropriate
463
+ self.data[col] = self.data[col].apply(
464
+ lambda x: {} if x is None else
465
+ {} if isinstance(x, dict) and not x else
466
+ [] if isinstance(x, list) and not x else x
467
+ )
468
+ if self._vector_columns:
469
+ for col in self._vector_columns:
470
+ if col in self.data.columns:
471
+ # Ensure vector values are Python lists
472
+ self.data[col] = self.data[col].apply(
473
+ lambda x: list(x) if x is not None and hasattr(x, '__iter__') else x
474
+ )
475
+ if self._array_columns:
476
+ for col in self._array_columns:
477
+ if col in self.data.columns:
478
+ # Ensure array values are Python lists
479
+ self.data[col] = self.data[col].apply(
480
+ lambda x: None if x is None else x if isinstance(x, list) else eval(x) if isinstance(x, str) and x.startswith('[') else [x] # noqa
481
+ )
482
+ # Final NaT cleanup for all columns - ensure we have no NaT values before sending to PostgreSQL
483
+ for col in self.data.columns:
484
+ if self.data[col].apply(lambda x: isinstance(x, pd._libs.tslibs.nattype.NaTType)).any():
485
+ self.data[col] = self.data[col].apply(lambda x: None if pd.isna(x) else x)
486
+
487
+ # 1️⃣ Turn every NaN / NA / NaT into Python None
488
+ self.data = self.data.astype(object).where(pd.notnull(self.data), None)
489
+ tuples = list(zip(*map(self.data.get, self.data)))
490
+
491
+ async with await self._connection.connection() as conn:
492
+ await conn.engine().set_type_codec(
493
+ "json",
494
+ encoder=orjson.dumps,
495
+ decoder=json_decoder,
496
+ schema="pg_catalog",
497
+ )
498
+ await conn.engine().set_type_codec(
499
+ "jsonb",
500
+ encoder=lambda data: b"\x01" + orjson.dumps(data),
501
+ decoder=lambda data: orjson.loads(data[1:]),
502
+ schema="pg_catalog",
503
+ format="binary"
504
+ )
505
+ await register_vector(conn.engine())
506
+
507
+ def print_types(df):
508
+ for c in df.columns:
509
+ print(f"{c:>20}: {df[c].dtype}, sample={type(df[c].dropna().iat[0]).__name__}")
510
+
511
+ print_types(self.data)
512
+
513
+ # show any cell that is still NAType after the scrub
514
+ bad = self.data.applymap(lambda v: isinstance(v, type(pd.NA)))
515
+ if bad.any().any():
516
+ print("found NAType in", bad.columns[bad.any()].tolist())
517
+
518
+ rejects = []
519
+ result = []
520
+ if self._atomic is True:
521
+ # ------------- slow path: row-level COPY with savepoints -----------
522
+ copied = 0
523
+ async with conn.engine().transaction():
524
+ for row in tuples:
525
+ try:
526
+ # COPY wants an *iterable* of tuples → [row] is fine
527
+ rs = await conn.copy_into_table(
528
+ table=self.tablename,
529
+ schema=self.schema,
530
+ source=[row],
531
+ columns=columns,
532
+ )
533
+ copied += 1
534
+ result.extend(rs)
535
+ except Exception as e:
536
+ self._logger.error(
537
+ f"Error copying row: {row}, error: {e}"
538
+ )
539
+ # Handle the error and add to rejects
540
+ rejects.append(row)
541
+ else:
542
+ result = await conn.copy_into_table(
543
+ table=self.tablename,
544
+ schema=self.schema,
545
+ source=tuples,
546
+ columns=columns,
547
+ )
548
+ self._logger.info(
549
+ f"Copied {len(self.data)} rows to {self.schema}.{self.tablename}"
550
+ )
551
+ self.add_metric("ROWS_SAVED", self.extract_copied(result))
552
+ if rejects:
553
+ # build a DataFrame with the same column order
554
+ rejects_df = (
555
+ pd.DataFrame(rejects, columns=columns)
556
+ if rejects else pd.DataFrame(columns=columns)
557
+ )
558
+ # rejects_df.to_csv(
559
+ # self._logger,
560
+ # index=False,
561
+ # header=False,
562
+ # quoting=csv.QUOTE_NONNUMERIC,
563
+ # )
564
+ self._logger.warning(
565
+ f"Rejected rows: {len(rejects_df)}"
566
+ )
567
+ self._logger.warning(
568
+ f"Rejected rows: {rejects_df.to_dict(orient='records')}"
569
+ )
570
+ if self._debug:
571
+ self._logger.debug(
572
+ f"Saving results into: {self.schema}.{self.tablename}"
573
+ )
574
+ except StatementError as err:
575
+ raise ComponentError(f"Statement error: {err}") from err
576
+ except DataError as err:
577
+ raise ComponentError(f"Data error: {err}") from err
578
+ except Exception as err:
579
+ raise ComponentError(f"{self.StepName} Error: {err!s}") from err
580
+
581
+ async def _copy_iterable(self):
582
+ tuples = [tuple(x.values()) for x in self.data]
583
+ row = self.data[0]
584
+ columns = list(row.keys())
585
+ try:
586
+ # TODO: iterate the data into chunks (to avoid kill the process)
587
+ async with await self._connection.connection() as conn:
588
+ result = await conn.copy_into_table(
589
+ table=self.tablename,
590
+ schema=self.schema,
591
+ source=tuples,
592
+ columns=columns,
593
+ )
594
+ self.add_metric(
595
+ "ROWS_SAVED", self.extract_copied(result)
596
+ )
597
+ self._logger.debug("CopyToPg: {result}")
598
+ except StatementError as err:
599
+ raise ComponentError(
600
+ f"Statement error: {err}"
601
+ ) from err
602
+ except DataError as err:
603
+ raise ComponentError(
604
+ f"Data error: {err}"
605
+ ) from err
606
+ except Exception as err:
607
+ raise ComponentError(
608
+ f"{self.StepName} Error: {err!s}"
609
+ ) from err