flowtask 5.8.4__cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (470) hide show
  1. flowtask/__init__.py +93 -0
  2. flowtask/__main__.py +38 -0
  3. flowtask/bots/__init__.py +6 -0
  4. flowtask/bots/check.py +93 -0
  5. flowtask/bots/codebot.py +51 -0
  6. flowtask/components/ASPX.py +148 -0
  7. flowtask/components/AddDataset.py +352 -0
  8. flowtask/components/Amazon.py +523 -0
  9. flowtask/components/AutoTask.py +314 -0
  10. flowtask/components/Azure.py +80 -0
  11. flowtask/components/AzureUsers.py +106 -0
  12. flowtask/components/BaseAction.py +91 -0
  13. flowtask/components/BaseLoop.py +198 -0
  14. flowtask/components/BestBuy.py +800 -0
  15. flowtask/components/CSVToGCS.py +120 -0
  16. flowtask/components/CompanyScraper/__init__.py +1 -0
  17. flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
  18. flowtask/components/CompanyScraper/parsers/base.py +102 -0
  19. flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
  20. flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
  21. flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
  22. flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
  23. flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
  24. flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
  25. flowtask/components/CompanyScraper/scrapper.py +1054 -0
  26. flowtask/components/CopyTo.py +177 -0
  27. flowtask/components/CopyToBigQuery.py +243 -0
  28. flowtask/components/CopyToMongoDB.py +291 -0
  29. flowtask/components/CopyToPg.py +609 -0
  30. flowtask/components/CopyToRethink.py +207 -0
  31. flowtask/components/CreateGCSBucket.py +102 -0
  32. flowtask/components/CreateReport/CreateReport.py +228 -0
  33. flowtask/components/CreateReport/__init__.py +9 -0
  34. flowtask/components/CreateReport/charts/__init__.py +15 -0
  35. flowtask/components/CreateReport/charts/bar.py +51 -0
  36. flowtask/components/CreateReport/charts/base.py +66 -0
  37. flowtask/components/CreateReport/charts/pie.py +64 -0
  38. flowtask/components/CreateReport/utils.py +9 -0
  39. flowtask/components/CustomerSatisfaction.py +196 -0
  40. flowtask/components/DataInput.py +200 -0
  41. flowtask/components/DateList.py +255 -0
  42. flowtask/components/DbClient.py +163 -0
  43. flowtask/components/DialPad.py +146 -0
  44. flowtask/components/DocumentDBQuery.py +200 -0
  45. flowtask/components/DownloadFrom.py +371 -0
  46. flowtask/components/DownloadFromD2L.py +113 -0
  47. flowtask/components/DownloadFromFTP.py +181 -0
  48. flowtask/components/DownloadFromIMAP.py +315 -0
  49. flowtask/components/DownloadFromS3.py +198 -0
  50. flowtask/components/DownloadFromSFTP.py +265 -0
  51. flowtask/components/DownloadFromSharepoint.py +110 -0
  52. flowtask/components/DownloadFromSmartSheet.py +114 -0
  53. flowtask/components/DownloadS3File.py +229 -0
  54. flowtask/components/Dummy.py +59 -0
  55. flowtask/components/DuplicatePhoto.py +411 -0
  56. flowtask/components/EmployeeEvaluation.py +237 -0
  57. flowtask/components/ExecuteSQL.py +323 -0
  58. flowtask/components/ExtractHTML.py +178 -0
  59. flowtask/components/FileBase.py +178 -0
  60. flowtask/components/FileCopy.py +181 -0
  61. flowtask/components/FileDelete.py +82 -0
  62. flowtask/components/FileExists.py +146 -0
  63. flowtask/components/FileIteratorDelete.py +112 -0
  64. flowtask/components/FileList.py +194 -0
  65. flowtask/components/FileOpen.py +75 -0
  66. flowtask/components/FileRead.py +120 -0
  67. flowtask/components/FileRename.py +106 -0
  68. flowtask/components/FilterIf.py +284 -0
  69. flowtask/components/FilterRows/FilterRows.py +200 -0
  70. flowtask/components/FilterRows/__init__.py +10 -0
  71. flowtask/components/FilterRows/functions.py +4 -0
  72. flowtask/components/GCSToBigQuery.py +103 -0
  73. flowtask/components/GoogleA4.py +150 -0
  74. flowtask/components/GoogleGeoCoding.py +344 -0
  75. flowtask/components/GooglePlaces.py +315 -0
  76. flowtask/components/GoogleSearch.py +539 -0
  77. flowtask/components/HTTPClient.py +268 -0
  78. flowtask/components/ICIMS.py +146 -0
  79. flowtask/components/IF.py +179 -0
  80. flowtask/components/IcimsFolderCopy.py +173 -0
  81. flowtask/components/ImageFeatures/__init__.py +5 -0
  82. flowtask/components/ImageFeatures/process.py +233 -0
  83. flowtask/components/IteratorBase.py +251 -0
  84. flowtask/components/LangchainLoader/__init__.py +5 -0
  85. flowtask/components/LangchainLoader/loader.py +194 -0
  86. flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
  87. flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
  88. flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
  89. flowtask/components/LangchainLoader/loaders/docx.py +91 -0
  90. flowtask/components/LangchainLoader/loaders/html.py +119 -0
  91. flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
  92. flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
  93. flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
  94. flowtask/components/LangchainLoader/loaders/qa.py +67 -0
  95. flowtask/components/LangchainLoader/loaders/txt.py +55 -0
  96. flowtask/components/LeadIQ.py +650 -0
  97. flowtask/components/Loop.py +253 -0
  98. flowtask/components/Lowes.py +334 -0
  99. flowtask/components/MS365Usage.py +156 -0
  100. flowtask/components/MSTeamsMessages.py +320 -0
  101. flowtask/components/MarketClustering.py +1051 -0
  102. flowtask/components/MergeFiles.py +362 -0
  103. flowtask/components/MilvusOutput.py +87 -0
  104. flowtask/components/NearByStores.py +175 -0
  105. flowtask/components/NetworkNinja/__init__.py +6 -0
  106. flowtask/components/NetworkNinja/models/__init__.py +52 -0
  107. flowtask/components/NetworkNinja/models/abstract.py +177 -0
  108. flowtask/components/NetworkNinja/models/account.py +39 -0
  109. flowtask/components/NetworkNinja/models/client.py +19 -0
  110. flowtask/components/NetworkNinja/models/district.py +14 -0
  111. flowtask/components/NetworkNinja/models/events.py +101 -0
  112. flowtask/components/NetworkNinja/models/forms.py +499 -0
  113. flowtask/components/NetworkNinja/models/market.py +16 -0
  114. flowtask/components/NetworkNinja/models/organization.py +34 -0
  115. flowtask/components/NetworkNinja/models/photos.py +125 -0
  116. flowtask/components/NetworkNinja/models/project.py +44 -0
  117. flowtask/components/NetworkNinja/models/region.py +28 -0
  118. flowtask/components/NetworkNinja/models/store.py +203 -0
  119. flowtask/components/NetworkNinja/models/user.py +151 -0
  120. flowtask/components/NetworkNinja/router.py +854 -0
  121. flowtask/components/Odoo.py +175 -0
  122. flowtask/components/OdooInjector.py +192 -0
  123. flowtask/components/OpenFromXML.py +126 -0
  124. flowtask/components/OpenWeather.py +41 -0
  125. flowtask/components/OpenWithBase.py +616 -0
  126. flowtask/components/OpenWithPandas.py +715 -0
  127. flowtask/components/PGPDecrypt.py +199 -0
  128. flowtask/components/PandasIterator.py +187 -0
  129. flowtask/components/PandasToFile.py +189 -0
  130. flowtask/components/Paradox.py +339 -0
  131. flowtask/components/ParamIterator.py +117 -0
  132. flowtask/components/ParseHTML.py +84 -0
  133. flowtask/components/PlacerStores.py +249 -0
  134. flowtask/components/Pokemon.py +507 -0
  135. flowtask/components/PositiveBot.py +62 -0
  136. flowtask/components/PowerPointSlide.py +400 -0
  137. flowtask/components/PrintMessage.py +127 -0
  138. flowtask/components/ProductCompetitors/__init__.py +5 -0
  139. flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
  140. flowtask/components/ProductCompetitors/parsers/base.py +72 -0
  141. flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
  142. flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
  143. flowtask/components/ProductCompetitors/scrapper.py +155 -0
  144. flowtask/components/ProductCompliant.py +169 -0
  145. flowtask/components/ProductInfo/__init__.py +1 -0
  146. flowtask/components/ProductInfo/parsers/__init__.py +5 -0
  147. flowtask/components/ProductInfo/parsers/base.py +83 -0
  148. flowtask/components/ProductInfo/parsers/brother.py +97 -0
  149. flowtask/components/ProductInfo/parsers/canon.py +167 -0
  150. flowtask/components/ProductInfo/parsers/epson.py +118 -0
  151. flowtask/components/ProductInfo/parsers/hp.py +131 -0
  152. flowtask/components/ProductInfo/parsers/samsung.py +97 -0
  153. flowtask/components/ProductInfo/scraper.py +319 -0
  154. flowtask/components/ProductPricing.py +118 -0
  155. flowtask/components/QS.py +261 -0
  156. flowtask/components/QSBase.py +201 -0
  157. flowtask/components/QueryIterator.py +273 -0
  158. flowtask/components/QueryToInsert.py +327 -0
  159. flowtask/components/QueryToPandas.py +432 -0
  160. flowtask/components/RESTClient.py +195 -0
  161. flowtask/components/RethinkDBQuery.py +189 -0
  162. flowtask/components/Rsync.py +74 -0
  163. flowtask/components/RunSSH.py +59 -0
  164. flowtask/components/RunShell.py +71 -0
  165. flowtask/components/SalesForce.py +20 -0
  166. flowtask/components/SaveImageBank/__init__.py +257 -0
  167. flowtask/components/SchedulingVisits.py +592 -0
  168. flowtask/components/ScrapPage.py +216 -0
  169. flowtask/components/ScrapSearch.py +79 -0
  170. flowtask/components/SendNotify.py +257 -0
  171. flowtask/components/SentimentAnalysis.py +694 -0
  172. flowtask/components/ServiceScrapper/__init__.py +5 -0
  173. flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
  174. flowtask/components/ServiceScrapper/parsers/base.py +94 -0
  175. flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
  176. flowtask/components/ServiceScrapper/scrapper.py +199 -0
  177. flowtask/components/SetVariables.py +156 -0
  178. flowtask/components/SubTask.py +182 -0
  179. flowtask/components/SuiteCRM.py +48 -0
  180. flowtask/components/Switch.py +175 -0
  181. flowtask/components/TableBase.py +148 -0
  182. flowtask/components/TableDelete.py +312 -0
  183. flowtask/components/TableInput.py +143 -0
  184. flowtask/components/TableOutput/TableOutput.py +384 -0
  185. flowtask/components/TableOutput/__init__.py +3 -0
  186. flowtask/components/TableSchema.py +534 -0
  187. flowtask/components/Target.py +223 -0
  188. flowtask/components/ThumbnailGenerator.py +156 -0
  189. flowtask/components/ToPandas.py +67 -0
  190. flowtask/components/TransformRows/TransformRows.py +507 -0
  191. flowtask/components/TransformRows/__init__.py +9 -0
  192. flowtask/components/TransformRows/functions.py +559 -0
  193. flowtask/components/TransposeRows.py +176 -0
  194. flowtask/components/UPCDatabase.py +86 -0
  195. flowtask/components/UnGzip.py +171 -0
  196. flowtask/components/Uncompress.py +172 -0
  197. flowtask/components/UniqueRows.py +126 -0
  198. flowtask/components/Unzip.py +107 -0
  199. flowtask/components/UpdateOperationalVars.py +147 -0
  200. flowtask/components/UploadTo.py +299 -0
  201. flowtask/components/UploadToS3.py +136 -0
  202. flowtask/components/UploadToSFTP.py +160 -0
  203. flowtask/components/UploadToSharepoint.py +205 -0
  204. flowtask/components/UserFunc.py +122 -0
  205. flowtask/components/VivaTracker.py +140 -0
  206. flowtask/components/WSDLClient.py +123 -0
  207. flowtask/components/Wait.py +18 -0
  208. flowtask/components/Walmart.py +199 -0
  209. flowtask/components/Workplace.py +134 -0
  210. flowtask/components/XMLToPandas.py +267 -0
  211. flowtask/components/Zammad/__init__.py +41 -0
  212. flowtask/components/Zammad/models.py +0 -0
  213. flowtask/components/ZoomInfoScraper.py +409 -0
  214. flowtask/components/__init__.py +104 -0
  215. flowtask/components/abstract.py +18 -0
  216. flowtask/components/flow.py +530 -0
  217. flowtask/components/google.py +335 -0
  218. flowtask/components/group.py +221 -0
  219. flowtask/components/py.typed +0 -0
  220. flowtask/components/reviewscrap.py +132 -0
  221. flowtask/components/tAutoincrement.py +117 -0
  222. flowtask/components/tConcat.py +109 -0
  223. flowtask/components/tExplode.py +119 -0
  224. flowtask/components/tFilter.py +184 -0
  225. flowtask/components/tGroup.py +236 -0
  226. flowtask/components/tJoin.py +270 -0
  227. flowtask/components/tMap/__init__.py +9 -0
  228. flowtask/components/tMap/functions.py +54 -0
  229. flowtask/components/tMap/tMap.py +450 -0
  230. flowtask/components/tMelt.py +112 -0
  231. flowtask/components/tMerge.py +114 -0
  232. flowtask/components/tOrder.py +93 -0
  233. flowtask/components/tPandas.py +94 -0
  234. flowtask/components/tPivot.py +71 -0
  235. flowtask/components/tPluckCols.py +76 -0
  236. flowtask/components/tUnnest.py +82 -0
  237. flowtask/components/user.py +401 -0
  238. flowtask/conf.py +457 -0
  239. flowtask/download.py +102 -0
  240. flowtask/events/__init__.py +11 -0
  241. flowtask/events/events/__init__.py +20 -0
  242. flowtask/events/events/abstract.py +95 -0
  243. flowtask/events/events/alerts/__init__.py +362 -0
  244. flowtask/events/events/alerts/colfunctions.py +131 -0
  245. flowtask/events/events/alerts/functions.py +158 -0
  246. flowtask/events/events/dummy.py +12 -0
  247. flowtask/events/events/exec.py +124 -0
  248. flowtask/events/events/file/__init__.py +7 -0
  249. flowtask/events/events/file/base.py +51 -0
  250. flowtask/events/events/file/copy.py +23 -0
  251. flowtask/events/events/file/delete.py +16 -0
  252. flowtask/events/events/interfaces/__init__.py +9 -0
  253. flowtask/events/events/interfaces/client.py +67 -0
  254. flowtask/events/events/interfaces/credentials.py +28 -0
  255. flowtask/events/events/interfaces/notifications.py +58 -0
  256. flowtask/events/events/jira.py +122 -0
  257. flowtask/events/events/log.py +26 -0
  258. flowtask/events/events/logerr.py +52 -0
  259. flowtask/events/events/notify.py +59 -0
  260. flowtask/events/events/notify_event.py +160 -0
  261. flowtask/events/events/publish.py +54 -0
  262. flowtask/events/events/sendfile.py +104 -0
  263. flowtask/events/events/task.py +97 -0
  264. flowtask/events/events/teams.py +98 -0
  265. flowtask/events/events/webhook.py +58 -0
  266. flowtask/events/manager.py +287 -0
  267. flowtask/exceptions.c +39393 -0
  268. flowtask/exceptions.cpython-312-x86_64-linux-gnu.so +0 -0
  269. flowtask/extensions/__init__.py +3 -0
  270. flowtask/extensions/abstract.py +82 -0
  271. flowtask/extensions/logging/__init__.py +65 -0
  272. flowtask/hooks/__init__.py +9 -0
  273. flowtask/hooks/actions/__init__.py +22 -0
  274. flowtask/hooks/actions/abstract.py +66 -0
  275. flowtask/hooks/actions/dummy.py +23 -0
  276. flowtask/hooks/actions/jira.py +74 -0
  277. flowtask/hooks/actions/rest.py +320 -0
  278. flowtask/hooks/actions/sampledata.py +37 -0
  279. flowtask/hooks/actions/sensor.py +23 -0
  280. flowtask/hooks/actions/task.py +9 -0
  281. flowtask/hooks/actions/ticket.py +37 -0
  282. flowtask/hooks/actions/zammad.py +55 -0
  283. flowtask/hooks/hook.py +62 -0
  284. flowtask/hooks/models.py +17 -0
  285. flowtask/hooks/service.py +187 -0
  286. flowtask/hooks/step.py +91 -0
  287. flowtask/hooks/types/__init__.py +23 -0
  288. flowtask/hooks/types/base.py +129 -0
  289. flowtask/hooks/types/brokers/__init__.py +11 -0
  290. flowtask/hooks/types/brokers/base.py +54 -0
  291. flowtask/hooks/types/brokers/mqtt.py +35 -0
  292. flowtask/hooks/types/brokers/rabbitmq.py +82 -0
  293. flowtask/hooks/types/brokers/redis.py +83 -0
  294. flowtask/hooks/types/brokers/sqs.py +44 -0
  295. flowtask/hooks/types/fs.py +232 -0
  296. flowtask/hooks/types/http.py +49 -0
  297. flowtask/hooks/types/imap.py +200 -0
  298. flowtask/hooks/types/jira.py +279 -0
  299. flowtask/hooks/types/mail.py +205 -0
  300. flowtask/hooks/types/postgres.py +98 -0
  301. flowtask/hooks/types/responses/__init__.py +8 -0
  302. flowtask/hooks/types/responses/base.py +5 -0
  303. flowtask/hooks/types/sharepoint.py +288 -0
  304. flowtask/hooks/types/ssh.py +141 -0
  305. flowtask/hooks/types/tagged.py +59 -0
  306. flowtask/hooks/types/upload.py +85 -0
  307. flowtask/hooks/types/watch.py +71 -0
  308. flowtask/hooks/types/web.py +36 -0
  309. flowtask/interfaces/AzureClient.py +137 -0
  310. flowtask/interfaces/AzureGraph.py +839 -0
  311. flowtask/interfaces/Boto3Client.py +326 -0
  312. flowtask/interfaces/DropboxClient.py +173 -0
  313. flowtask/interfaces/ExcelHandler.py +94 -0
  314. flowtask/interfaces/FTPClient.py +131 -0
  315. flowtask/interfaces/GoogleCalendar.py +201 -0
  316. flowtask/interfaces/GoogleClient.py +133 -0
  317. flowtask/interfaces/GoogleDrive.py +127 -0
  318. flowtask/interfaces/GoogleGCS.py +89 -0
  319. flowtask/interfaces/GoogleGeocoding.py +93 -0
  320. flowtask/interfaces/GoogleLang.py +114 -0
  321. flowtask/interfaces/GooglePub.py +61 -0
  322. flowtask/interfaces/GoogleSheet.py +68 -0
  323. flowtask/interfaces/IMAPClient.py +137 -0
  324. flowtask/interfaces/O365Calendar.py +113 -0
  325. flowtask/interfaces/O365Client.py +220 -0
  326. flowtask/interfaces/OneDrive.py +284 -0
  327. flowtask/interfaces/Outlook.py +155 -0
  328. flowtask/interfaces/ParrotBot.py +130 -0
  329. flowtask/interfaces/SSHClient.py +378 -0
  330. flowtask/interfaces/Sharepoint.py +496 -0
  331. flowtask/interfaces/__init__.py +36 -0
  332. flowtask/interfaces/azureauth.py +119 -0
  333. flowtask/interfaces/cache.py +201 -0
  334. flowtask/interfaces/client.py +82 -0
  335. flowtask/interfaces/compress.py +525 -0
  336. flowtask/interfaces/credentials.py +124 -0
  337. flowtask/interfaces/d2l.py +239 -0
  338. flowtask/interfaces/databases/__init__.py +5 -0
  339. flowtask/interfaces/databases/db.py +223 -0
  340. flowtask/interfaces/databases/documentdb.py +55 -0
  341. flowtask/interfaces/databases/rethink.py +39 -0
  342. flowtask/interfaces/dataframes/__init__.py +11 -0
  343. flowtask/interfaces/dataframes/abstract.py +21 -0
  344. flowtask/interfaces/dataframes/arrow.py +71 -0
  345. flowtask/interfaces/dataframes/dt.py +69 -0
  346. flowtask/interfaces/dataframes/pandas.py +167 -0
  347. flowtask/interfaces/dataframes/polars.py +60 -0
  348. flowtask/interfaces/db.py +263 -0
  349. flowtask/interfaces/env.py +46 -0
  350. flowtask/interfaces/func.py +137 -0
  351. flowtask/interfaces/http.py +1780 -0
  352. flowtask/interfaces/locale.py +40 -0
  353. flowtask/interfaces/log.py +75 -0
  354. flowtask/interfaces/mask.py +143 -0
  355. flowtask/interfaces/notification.py +154 -0
  356. flowtask/interfaces/playwright.py +339 -0
  357. flowtask/interfaces/powerpoint.py +368 -0
  358. flowtask/interfaces/py.typed +0 -0
  359. flowtask/interfaces/qs.py +376 -0
  360. flowtask/interfaces/result.py +87 -0
  361. flowtask/interfaces/selenium_service.py +779 -0
  362. flowtask/interfaces/smartsheet.py +154 -0
  363. flowtask/interfaces/stat.py +39 -0
  364. flowtask/interfaces/task.py +96 -0
  365. flowtask/interfaces/template.py +118 -0
  366. flowtask/interfaces/vectorstores/__init__.py +1 -0
  367. flowtask/interfaces/vectorstores/abstract.py +133 -0
  368. flowtask/interfaces/vectorstores/milvus.py +669 -0
  369. flowtask/interfaces/zammad.py +107 -0
  370. flowtask/models.py +193 -0
  371. flowtask/parsers/__init__.py +15 -0
  372. flowtask/parsers/_yaml.c +11978 -0
  373. flowtask/parsers/_yaml.cpython-312-x86_64-linux-gnu.so +0 -0
  374. flowtask/parsers/argparser.py +235 -0
  375. flowtask/parsers/base.c +15155 -0
  376. flowtask/parsers/base.cpython-312-x86_64-linux-gnu.so +0 -0
  377. flowtask/parsers/json.c +11968 -0
  378. flowtask/parsers/json.cpython-312-x86_64-linux-gnu.so +0 -0
  379. flowtask/parsers/maps.py +49 -0
  380. flowtask/parsers/toml.c +11968 -0
  381. flowtask/parsers/toml.cpython-312-x86_64-linux-gnu.so +0 -0
  382. flowtask/plugins/__init__.py +16 -0
  383. flowtask/plugins/components/__init__.py +0 -0
  384. flowtask/plugins/handler/__init__.py +45 -0
  385. flowtask/plugins/importer.py +31 -0
  386. flowtask/plugins/sources/__init__.py +0 -0
  387. flowtask/runner.py +283 -0
  388. flowtask/scheduler/__init__.py +9 -0
  389. flowtask/scheduler/functions.py +493 -0
  390. flowtask/scheduler/handlers/__init__.py +8 -0
  391. flowtask/scheduler/handlers/manager.py +504 -0
  392. flowtask/scheduler/handlers/models.py +58 -0
  393. flowtask/scheduler/handlers/service.py +72 -0
  394. flowtask/scheduler/notifications.py +65 -0
  395. flowtask/scheduler/scheduler.py +993 -0
  396. flowtask/services/__init__.py +0 -0
  397. flowtask/services/bots/__init__.py +0 -0
  398. flowtask/services/bots/telegram.py +264 -0
  399. flowtask/services/files/__init__.py +11 -0
  400. flowtask/services/files/manager.py +522 -0
  401. flowtask/services/files/model.py +37 -0
  402. flowtask/services/files/service.py +767 -0
  403. flowtask/services/jira/__init__.py +3 -0
  404. flowtask/services/jira/jira_actions.py +191 -0
  405. flowtask/services/tasks/__init__.py +13 -0
  406. flowtask/services/tasks/launcher.py +213 -0
  407. flowtask/services/tasks/manager.py +323 -0
  408. flowtask/services/tasks/service.py +275 -0
  409. flowtask/services/tasks/task_manager.py +376 -0
  410. flowtask/services/tasks/tasks.py +155 -0
  411. flowtask/storages/__init__.py +16 -0
  412. flowtask/storages/exceptions.py +12 -0
  413. flowtask/storages/files/__init__.py +8 -0
  414. flowtask/storages/files/abstract.py +29 -0
  415. flowtask/storages/files/filesystem.py +66 -0
  416. flowtask/storages/tasks/__init__.py +19 -0
  417. flowtask/storages/tasks/abstract.py +26 -0
  418. flowtask/storages/tasks/database.py +33 -0
  419. flowtask/storages/tasks/filesystem.py +108 -0
  420. flowtask/storages/tasks/github.py +119 -0
  421. flowtask/storages/tasks/memory.py +45 -0
  422. flowtask/storages/tasks/row.py +25 -0
  423. flowtask/tasks/__init__.py +0 -0
  424. flowtask/tasks/abstract.py +526 -0
  425. flowtask/tasks/command.py +118 -0
  426. flowtask/tasks/pile.py +486 -0
  427. flowtask/tasks/py.typed +0 -0
  428. flowtask/tasks/task.py +778 -0
  429. flowtask/template/__init__.py +161 -0
  430. flowtask/tests.py +257 -0
  431. flowtask/types/__init__.py +8 -0
  432. flowtask/types/typedefs.c +11347 -0
  433. flowtask/types/typedefs.cpython-312-x86_64-linux-gnu.so +0 -0
  434. flowtask/utils/__init__.py +24 -0
  435. flowtask/utils/constants.py +117 -0
  436. flowtask/utils/encoders.py +21 -0
  437. flowtask/utils/executor.py +112 -0
  438. flowtask/utils/functions.cpp +14280 -0
  439. flowtask/utils/functions.cpython-312-x86_64-linux-gnu.so +0 -0
  440. flowtask/utils/json.cpp +13349 -0
  441. flowtask/utils/json.cpython-312-x86_64-linux-gnu.so +0 -0
  442. flowtask/utils/mail.py +63 -0
  443. flowtask/utils/parseqs.c +13324 -0
  444. flowtask/utils/parserqs.cpython-312-x86_64-linux-gnu.so +0 -0
  445. flowtask/utils/stats.py +308 -0
  446. flowtask/utils/transformations.py +74 -0
  447. flowtask/utils/uv.py +12 -0
  448. flowtask/utils/validators.py +97 -0
  449. flowtask/version.py +11 -0
  450. flowtask-5.8.4.dist-info/LICENSE +201 -0
  451. flowtask-5.8.4.dist-info/METADATA +209 -0
  452. flowtask-5.8.4.dist-info/RECORD +470 -0
  453. flowtask-5.8.4.dist-info/WHEEL +6 -0
  454. flowtask-5.8.4.dist-info/entry_points.txt +3 -0
  455. flowtask-5.8.4.dist-info/top_level.txt +2 -0
  456. plugins/components/CreateQR.py +39 -0
  457. plugins/components/TestComponent.py +28 -0
  458. plugins/components/Use1.py +13 -0
  459. plugins/components/Workplace.py +117 -0
  460. plugins/components/__init__.py +3 -0
  461. plugins/sources/__init__.py +0 -0
  462. plugins/sources/get_populartimes.py +78 -0
  463. plugins/sources/google.py +150 -0
  464. plugins/sources/hubspot.py +679 -0
  465. plugins/sources/icims.py +679 -0
  466. plugins/sources/mobileinsight.py +501 -0
  467. plugins/sources/newrelic.py +262 -0
  468. plugins/sources/uap.py +268 -0
  469. plugins/sources/venu.py +244 -0
  470. plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,715 @@
1
+ from typing import Any
2
+ from pathlib import PurePath
3
+ from io import BytesIO
4
+ import aiofiles
5
+ from xml.sax import parse
6
+ import warnings
7
+ import pandas
8
+ from pandas._libs.parsers import STR_NA_VALUES
9
+ import orjson
10
+ import xlrd
11
+ import numpy as np
12
+ from ..utils import check_empty
13
+ from ..exceptions import ComponentError, DataNotFound, EmptyFile
14
+ from .OpenWithBase import OpenWithBase, detect_encoding, excel_based, ExcelHandler
15
+
16
+
17
+ # Suppress specific warning
18
+ warnings.filterwarnings("ignore", category=UserWarning)
19
+
20
+ class OpenWithPandas(OpenWithBase):
21
+ """
22
+ OpenWithPandas
23
+
24
+ Overview
25
+
26
+ Open a file and return a Dataframe type
27
+
28
+ .. table:: Properties
29
+ :widths: auto
30
+
31
+
32
+ +-------------+----------+-----------+-------------------------------------------------------+
33
+ | Name | Required | Summary |
34
+ +-------------+----------+-----------+-------------------------------------------------------+
35
+ | model | Yes | A model (json) representative of the data that I am going to |
36
+ | | | open * name of a DataModel (in-development) |
37
+ +-------------+----------+-----------+-------------------------------------------------------+
38
+ | map | Yes | Map the columns against the model |
39
+ +-------------+----------+-----------+-------------------------------------------------------+
40
+ | tablename | Yes | Join the data from the table in the postgres database |
41
+ +-------------+----------+-----------+-------------------------------------------------------+
42
+ | use_map | Yes | If true, then a MAP file is used instead of a table in postgresql |
43
+ +-------------+----------+-----------+-------------------------------------------------------+
44
+ | file_engine | Yes | Pandas different types of engines for different types of Excel |
45
+ | | | * xlrd (legacy, xls type) |
46
+ | | | * openpyxl (new xlsx files) |
47
+ | | | * pyxlsb (to open with macros and functions) |
48
+ +-------------+----------+-----------+-------------------------------------------------------+
49
+ | dtypes | No | force the data type of a column ex: { order_date: datetime } |
50
+ +-------------+----------+-----------+-------------------------------------------------------+
51
+
52
+
53
+ Return the list of arbitrary days
54
+
55
+
56
+ Example:
57
+
58
+ ```yaml
59
+ OpenWithPandas:
60
+ mime: text/csv
61
+ process: true
62
+ separator: '|'
63
+ drop_empty: true
64
+ trim: true
65
+ pk:
66
+ columns:
67
+ - associate_oid
68
+ - associate_id
69
+ append: false
70
+ verify_integrity: true
71
+ map:
72
+ tablename: employees
73
+ schema: bacardi
74
+ map: employees
75
+ replace: false
76
+ ```
77
+
78
+ """
79
+ """
80
+ OpenWithPandas
81
+
82
+ Overview
83
+
84
+ This component opens various file types (CSV, Excel, HTML, JSON) into Pandas DataFrames.
85
+
86
+ .. table:: Properties
87
+ :widths: auto
88
+
89
+
90
+ +------------------------+----------+-----------+---------------------------------------------------------------+
91
+ | Name | Required | Summary |
92
+ +------------------------+----------+-----------+---------------------------------------------------------------+
93
+ | directory | No | The directory where the files are located. |
94
+ +------------------------+----------+-----------+---------------------------------------------------------------+
95
+ | filename | No | The name of the file to open. |
96
+ +------------------------+----------+-----------+---------------------------------------------------------------+
97
+ | file | No | Pattern or file to open. |
98
+ +------------------------+----------+-----------+---------------------------------------------------------------+
99
+ | mime | No | The MIME type of the file. Default is "text/csv". |
100
+ +------------------------+----------+-----------+---------------------------------------------------------------+
101
+ | separator | No | Separator for CSV files. Default is ",". |
102
+ +------------------------+----------+-----------+---------------------------------------------------------------+
103
+ | force_map | No | Force the use of a map file. Default is False. |
104
+ +------------------------+----------+-----------+---------------------------------------------------------------+
105
+ | parse_dates | No | Columns to parse as dates. Default is an empty dictionary. |
106
+ +------------------------+----------+-----------+---------------------------------------------------------------+
107
+ | filter_nan | No | Filter out NaN values. Default is True. |
108
+ +------------------------+----------+-----------+---------------------------------------------------------------+
109
+ | na_values | No | List of values to recognize as NaN. Default is ["NULL", "TBD"]. |
110
+ +------------------------+----------+-----------+---------------------------------------------------------------+
111
+ | remove_empty_strings | No | Remove empty strings. Default is True. |
112
+ +------------------------+----------+-----------+---------------------------------------------------------------+
113
+ | no_multi | No | Disable multi-file processing. Default is False. |
114
+ +------------------------+----------+-----------+---------------------------------------------------------------+
115
+ | clean_nat | No | Clean NaT values. Default is False. |
116
+ +------------------------+----------+-----------+---------------------------------------------------------------+
117
+ | flavor | No | The flavor of the database for column information. Default is "postgres". |
118
+ +------------------------+----------+-----------+---------------------------------------------------------------+
119
+ | pd_args | No | Additional arguments for pandas. Default is an empty dictionary. |
120
+ +------------------------+----------+-----------+---------------------------------------------------------------+
121
+ | model | Yes | A model (json) representative of the data that I am going to |
122
+ | | | open * name of a DataModel (in-development) |
123
+ +------------------------+----------+-----------+---------------------------------------------------------------+
124
+ | map | Yes | Map the columns against the model |
125
+ +------------------------+----------+-----------+---------------------------------------------------------------+
126
+ | tablename | Yes | Join the data from the table in the postgres database |
127
+ +------------------------+----------+-----------+---------------------------------------------------------------+
128
+ | use_map | Yes | If true, then a MAP file is used instead of a table in postgresql |
129
+ +------------------------+----------+-----------+---------------------------------------------------------------+
130
+ | file_engine | Yes | Pandas different types of engines for different types of Excel |
131
+ | | | * xlrd (legacy, xls type) |
132
+ | | | * openpyxl (new xlsx files) |
133
+ | | | * pyxlsb (to open with macros and functions) |
134
+ +------------------------+----------+-----------+---------------------------------------------------------------+
135
+ | dtypes | No | force the data type of a column ex: { order_date: datetime } |
136
+ +------------------------+----------+-----------+---------------------------------------------------------------+
137
+
138
+ Returns
139
+
140
+ This component returns a Pandas DataFrame containing the data from the opened file(s).
141
+
142
+ """
143
+ def get_column_headers(self):
144
+ headers = []
145
+ for filename in self._filenames:
146
+ try:
147
+ encoding = self.check_encoding(filename)
148
+ except Exception:
149
+ encoding = "UTF-8"
150
+ df = pandas.read_csv(
151
+ filename,
152
+ sep=self.separator,
153
+ skipinitialspace=True,
154
+ encoding=encoding,
155
+ engine="python",
156
+ nrows=1,
157
+ )
158
+ headers.append(df.columns.values.tolist())
159
+ return headers
160
+
161
+ def set_datatypes(self):
162
+ dtypes = {}
163
+ for field, dtype in self.datatypes.items():
164
+ if dtype == "uint8":
165
+ dtypes[field] = np.uint8
166
+ elif dtype == "uint16":
167
+ dtypes[field] = np.uint16
168
+ elif dtype == "uint32":
169
+ dtypes[field] = np.uint32
170
+ elif dtype == "int8":
171
+ dtypes[field] = np.int8
172
+ elif dtype == "int16":
173
+ dtypes[field] = np.int16
174
+ elif dtype == "int32":
175
+ dtypes[field] = np.int32
176
+ elif dtype == "float":
177
+ dtypes[field] = float
178
+ elif dtype == "float32":
179
+ dtypes[field] = float
180
+ elif dtype in ("string", "varchar", "str"):
181
+ dtypes[field] = "str"
182
+ elif dtype == "object":
183
+ dtypes[field] = object
184
+ else:
185
+ # invalid datatype
186
+ raise ComponentError(
187
+ f"Invalid DataType value: {field} for field {dtype}"
188
+ )
189
+ if dtypes:
190
+ self.args["dtype"] = dtypes
191
+
192
+ async def open_excel(
193
+ self, filename: str, add_columns: dict, encoding
194
+ ) -> pandas.DataFrame:
195
+ self._logger.debug(
196
+ f"Opening Excel file {filename} with Pandas, encoding: {encoding}"
197
+ )
198
+ if self.mime == "text/xml":
199
+ xmlparser = ExcelHandler()
200
+ parse(filename, xmlparser)
201
+ if hasattr(self, "skiprows"):
202
+ row = self.skiprows
203
+ columns = self.skiprows + 1
204
+ start = columns + 1
205
+ else:
206
+ row = 0
207
+ columns = 0
208
+ start = columns + 1
209
+ try:
210
+ if (
211
+ hasattr(self, "add_columns") and hasattr(self, "rename")
212
+ and self.rename is True
213
+ ):
214
+ cols = add_columns
215
+ else:
216
+ cols = xmlparser.tables[0][columns]
217
+ df = pandas.DataFrame(data=xmlparser.tables[0][start:], columns=cols)
218
+ return df
219
+ except pandas.errors.EmptyDataError as err:
220
+ raise EmptyFile(f"Empty File {filename}: {err}") from err
221
+ except pandas.errors.ParserError as err:
222
+ raise ComponentError(f"Parsing File {filename}: {err}") from err
223
+ except Exception as err:
224
+ raise ComponentError(
225
+ f"Generic Error on file {filename}, error: {err}"
226
+ ) from err
227
+ else:
228
+ if (
229
+ self.mime == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
230
+ ):
231
+ # xlsx or any openxml based document
232
+ file_engine = self._params.get("file_engine", "openpyxl")
233
+ elif self.mime == "application/vnd.ms-excel.sheet.binary.macroEnabled.12":
234
+ file_engine = self._params.get("file_engine", "pyxlsb")
235
+ else:
236
+ try:
237
+ ext = filename.suffix
238
+ except (AttributeError, ValueError) as e:
239
+ print(f"Error detecting extension: {e}")
240
+ ext = ".xls"
241
+ if ext == ".xls":
242
+ file_engine = self._params.get("file_engine", "xlrd")
243
+ else:
244
+ file_engine = self._params.get("file_engine", "calamine")
245
+ try:
246
+ arguments = {**self.args, **add_columns, **self.parse_dates}
247
+ if self._limit is not None and isinstance(self._limit, int):
248
+ arguments["nrows"] = self._limit
249
+ if self.sheet_name is not None:
250
+ arguments["sheet_name"] = self.sheet_name
251
+ # TODO: if sheet_name is None, then open all worksheets
252
+ # work with the dictionary of dataframes.
253
+ df = pandas.read_excel(
254
+ filename,
255
+ na_values=self.na_values,
256
+ na_filter=self.filter_nan,
257
+ engine=file_engine,
258
+ keep_default_na=False,
259
+ **arguments,
260
+ )
261
+ return df
262
+ except (IndexError, xlrd.biffh.XLRDError) as err:
263
+ raise ComponentError(
264
+ f"Excel Index error on File {filename}: {err}"
265
+ ) from err
266
+ except pandas.errors.EmptyDataError as err:
267
+ raise EmptyFile(f"Empty File {filename}: {err}") from err
268
+ except pandas.errors.ParserError as err:
269
+ raise ComponentError(f"Error Parsing File {filename}: {err}") from err
270
+ except Exception as err:
271
+ raise ComponentError(
272
+ f"Generic Error on file {filename}, error: {err}"
273
+ ) from err
274
+
275
+ async def open_html(
276
+ self, filename: str, add_columns: dict, encoding: str
277
+ ) -> pandas.DataFrame:
278
+ self._logger.debug(
279
+ f"Opening an HTML file {filename} with Pandas, encoding={encoding}"
280
+ )
281
+ if "dtype" in self.args:
282
+ del self.args["dtype"]
283
+ if "skiprows" in self.args:
284
+ del self.args["skiprows"]
285
+ try:
286
+ dfs = pandas.read_html(
287
+ filename,
288
+ keep_default_na=False,
289
+ flavor="html5lib",
290
+ na_values=self.na_values,
291
+ encoding=encoding,
292
+ **self.parse_dates,
293
+ **self.args,
294
+ )
295
+ if dfs:
296
+ df = dfs[0]
297
+ else:
298
+ df = None
299
+ if "names" in add_columns:
300
+ df.columns = add_columns["names"]
301
+ return df
302
+ except pandas.errors.EmptyDataError as err:
303
+ raise EmptyFile(message=f"Empty File {filename}: {err}") from err
304
+ except pandas.errors.ParserError as err:
305
+ raise ComponentError(message=f"Parsing File {filename}: {err}") from err
306
+ except Exception as err:
307
+ raise ComponentError(
308
+ message=f"Generic Error on file {filename}: {err}"
309
+ ) from err
310
+
311
+ async def open_parquet(
312
+ self, filename: str, add_columns: dict, encoding
313
+ ) -> pandas.DataFrame:
314
+ pass
315
+
316
+ async def open_sql(
317
+ self, filename: str, add_columns: dict, encoding
318
+ ) -> pandas.DataFrame:
319
+ pass
320
+
321
+ async def open_json(
322
+ self, filename: str, add_columns: dict, encoding: str
323
+ ) -> pandas.DataFrame:
324
+ self._logger.debug(
325
+ f"Opening a JSON file {filename} with Pandas, encoding={encoding}"
326
+ )
327
+ # TODO: add columns functionality.
328
+ try:
329
+ df = pandas.read_json(
330
+ filename, orient="records", encoding=encoding, **self.args
331
+ )
332
+ return df
333
+ except pandas.errors.EmptyDataError as err:
334
+ raise EmptyFile(message=f"Empty File {filename}: {err}") from err
335
+ except pandas.errors.ParserError as err:
336
+ raise ComponentError(
337
+ message=f"Error Parsing File {filename}: {err}"
338
+ ) from err
339
+ except Exception as err:
340
+ raise ComponentError(
341
+ message=f"Generic Error on file {filename}: {err}"
342
+ ) from err
343
+
344
+ async def open_csv(
345
+ self, filename: str, add_columns: dict, encoding
346
+ ) -> pandas.DataFrame:
347
+ self._logger.debug(
348
+ f"Opening CSV file {filename} with Pandas, encoding={encoding}"
349
+ )
350
+ try:
351
+ add_columns["low_memory"] = False
352
+ add_columns["float_precision"] = "high"
353
+ except KeyError:
354
+ pass
355
+ try:
356
+ # can we use pyarrow.
357
+ engine = self.args["engine"]
358
+ del self.args["engine"]
359
+ except KeyError:
360
+ engine = "c"
361
+ if self._limit is not None and isinstance(self._limit, int):
362
+ add_columns["nrows"] = self._limit
363
+ # try to fix the encoding problem on files:
364
+ _, new_encoding = detect_encoding(filename, encoding)
365
+ if new_encoding != encoding:
366
+ self._logger.warning(
367
+ f"Encoding on file: {new_encoding} and \
368
+ declared by Task ({encoding}) are different"
369
+ )
370
+ # encoding = new_encoding
371
+ # open file:
372
+ if hasattr(self, "bigfile"):
373
+ try:
374
+ tp = pandas.read_csv(
375
+ filename,
376
+ sep=self.separator,
377
+ decimal=",",
378
+ engine=engine,
379
+ keep_default_na=False,
380
+ na_values=self.na_values,
381
+ na_filter=self.filter_nan,
382
+ encoding=encoding,
383
+ skipinitialspace=True,
384
+ iterator=True,
385
+ chunksize=int(self.chunksize),
386
+ **add_columns,
387
+ **self.parse_dates,
388
+ **self.args,
389
+ )
390
+ return pandas.concat(tp, ignore_index=True)
391
+ except pandas.errors.EmptyDataError as err:
392
+ raise ComponentError(
393
+ f"Empty Data File on: {filename}, error: {err}"
394
+ ) from err
395
+ except Exception as err:
396
+ raise ComponentError(
397
+ f"Generic Error on file: {filename}, error: {err}"
398
+ ) from err
399
+ else:
400
+ try:
401
+ return pandas.read_csv(
402
+ filename,
403
+ sep=self.separator,
404
+ quotechar='"',
405
+ decimal=",",
406
+ engine=engine,
407
+ keep_default_na=False,
408
+ na_values=self.na_values,
409
+ na_filter=self.filter_nan,
410
+ encoding=encoding,
411
+ skipinitialspace=True,
412
+ **add_columns,
413
+ **self.parse_dates,
414
+ **self.args,
415
+ )
416
+ except UnicodeDecodeError as exc:
417
+ self._logger.warning(
418
+ f"Invalid Encoding {encoding}: {exc}"
419
+ )
420
+ # fallback to a default unicode:
421
+ _, encoding = detect_encoding(filename, encoding)
422
+ self._logger.debug(f"Detected Encoding > {encoding!s}")
423
+ last_encoding = None
424
+ fname = filename
425
+ if hasattr(self, 'clean_null_bytes'):
426
+ async with aiofiles.open(filename, 'rb') as file:
427
+ # Removing all null bytes
428
+ content = await file.read()
429
+ content = content.replace(b'\x00', b'')
430
+ fname = BytesIO(content)
431
+ for enc in ('utf-8', 'latin1', 'ascii'):
432
+ last_encoding = enc
433
+ try:
434
+ return pandas.read_csv(
435
+ fname,
436
+ sep=self.separator,
437
+ quotechar='"',
438
+ decimal=",",
439
+ engine=engine,
440
+ keep_default_na=False,
441
+ na_values=self.na_values,
442
+ na_filter=self.filter_nan,
443
+ encoding=enc,
444
+ skipinitialspace=True,
445
+ on_bad_lines='warn',
446
+ **add_columns,
447
+ **self.parse_dates,
448
+ **self.args,
449
+ )
450
+ except Exception as e:
451
+ print(e)
452
+ continue
453
+ else:
454
+ # No encoding match
455
+ raise ComponentError(
456
+ f"Cannot Open the file with encoding {last_encoding}"
457
+ )
458
+ except ValueError as exc:
459
+ # Open Pandas with default settings for detect discrepancies
460
+ df = pandas.read_csv(
461
+ filename,
462
+ sep=self.separator,
463
+ quotechar='"',
464
+ decimal=",",
465
+ engine=engine,
466
+ encoding=encoding,
467
+ dtype=str,
468
+ header=None,
469
+ )
470
+ # columns in Pandas:
471
+ num_cols = int(df.shape[1])
472
+ expected = len(add_columns.get('names', []))
473
+ if expected > 0 and num_cols - expected > 0:
474
+ # some extra columns were found:
475
+ raise ComponentError(
476
+ (
477
+ f"There are more columns in FILE than expected. "
478
+ f"There are {num_cols} in File received vs "
479
+ f"{expected} columns in Mapping definition."
480
+ )
481
+ )
482
+ try:
483
+ del self.args['dtype']
484
+ except KeyError:
485
+ pass
486
+ self._logger.error(
487
+ (
488
+ f"Some columns have wrong type in Model: {exc}, "
489
+ "Opening file with default settings (str)"
490
+ )
491
+ )
492
+ try:
493
+ return pandas.read_csv(
494
+ filename,
495
+ sep=self.separator,
496
+ quotechar='"',
497
+ decimal=",",
498
+ engine=engine,
499
+ keep_default_na=False,
500
+ na_values=self.na_values,
501
+ na_filter=self.filter_nan,
502
+ encoding=encoding,
503
+ **add_columns,
504
+ **self.parse_dates,
505
+ **self.args,
506
+ )
507
+ except Exception as ex:
508
+ raise ComponentError(
509
+ f"Invalid types of columns found on file {filename}, {ex}"
510
+ )
511
+ except pandas.errors.EmptyDataError as err:
512
+ raise ComponentError(
513
+ f"Empty Data in file: {filename}, error: {err}"
514
+ ) from err
515
+ except pandas.errors.ParserError as err:
516
+ raise ComponentError(
517
+ f"Error parsing File: {filename}, error: {err}"
518
+ ) from err
519
+ except Exception as err:
520
+ raise ComponentError(
521
+ f"Generic Error on file: {filename}, error: {err}"
522
+ ) from err
523
+
524
+ async def run(self) -> Any:
525
+ await super(OpenWithPandas, self).run()
526
+ add_columns = await self.colinfo()
527
+ result = []
528
+ df = None
529
+ ## Define NA Values:
530
+ default_missing = STR_NA_VALUES.copy()
531
+ if self.remove_empty_strings is True:
532
+ try:
533
+ default_missing.remove("")
534
+ except KeyError:
535
+ pass
536
+ for val in self.na_values: # pylint: disable=E0203
537
+ default_missing.add(val)
538
+ default_missing.add(val)
539
+ self.na_values = default_missing
540
+ if self._filenames is None and not check_empty(self._data):
541
+ if isinstance(self._data, list):
542
+ for file in self._data:
543
+ try:
544
+ df = pandas.DataFrame(
545
+ data=file, **add_columns, **self.parse_dates, **self.args
546
+ )
547
+ result.append(df)
548
+ except pandas.errors.EmptyDataError as err:
549
+ raise ComponentError(
550
+ f"Error on Empty Data: error: {err}"
551
+ ) from err
552
+ except ValueError as err:
553
+ raise ComponentError(
554
+ f"Error parsing Data: error: {err}"
555
+ ) from err
556
+ except Exception as err:
557
+ raise ComponentError(
558
+ f"Generic Error on Data: error: {err}"
559
+ ) from err
560
+ if df is None or df.empty:
561
+ raise DataNotFound("Dataframe is Empty: Data not found")
562
+ else:
563
+ # itereate over all files or data
564
+ self._variables["FILENAMES"] = self._filenames
565
+ for filename in self._filenames:
566
+ try:
567
+ encoding = self.check_encoding(filename)
568
+ except Exception:
569
+ encoding = "UTF-8"
570
+ if self.mime == "text/csv" or self.mime == "text/plain":
571
+ try:
572
+ df = await self.open_csv(filename, add_columns, encoding)
573
+ if isinstance(filename, PurePath):
574
+ self.add_metric(f"{filename.name}", len(df.index))
575
+ else:
576
+ self.add_metric(f"{filename}", len(df.index))
577
+ except Exception as err:
578
+ raise ComponentError(f"Encoding Error: {err}") from err
579
+ if hasattr(self, "add_columns") and hasattr(self, "rename"):
580
+ if self.rename is True:
581
+ df = df.drop(df.index[0])
582
+ elif self.mime in excel_based:
583
+ try:
584
+ df = await self.open_excel(filename, add_columns, encoding)
585
+ except Exception as err:
586
+ raise ComponentError(
587
+ f"Error parsing Excel: {err}"
588
+ ) from err
589
+ elif self.mime == "text/html" or self.mime == "application/html":
590
+ try:
591
+ df = await self.open_html(filename, add_columns, encoding)
592
+ except Exception as err:
593
+ raise ComponentError(f"Error parsing XML: {err}") from err
594
+ elif self.mime == "application/json":
595
+ try:
596
+ df = await self.open_json(filename, add_columns, encoding)
597
+ except Exception as err:
598
+ raise ComponentError(f"Error parsing JSON: {err}") from err
599
+ else:
600
+ raise ComponentError(f"Try to Open invalid MIME Type: {self.mime}")
601
+ if df is None or df.empty:
602
+ raise EmptyFile(f"Empty File {filename}")
603
+ result.append(df)
604
+ # at the end, concat the sources:
605
+ if len(result) == 1:
606
+ df = result[0]
607
+ else:
608
+ ## fix Pandas Concat
609
+ if self.no_multi is True: # get only one element
610
+ df = result.pop()
611
+ else:
612
+ try:
613
+ df = pandas.concat(
614
+ result # , ignore_index=True # , sort=False, axis=0,
615
+ ) # .reindex(result[0].index)
616
+ except Exception as err:
617
+ raise ComponentError(
618
+ f"Error Combining Resultset Dataframes: {err}"
619
+ ) from err
620
+ # post-processing:
621
+ if hasattr(self, "remove_scientific_notation"):
622
+ pandas.set_option("display.float_format", lambda x: "%.3f" % x)
623
+ if hasattr(self, "drop_empty"):
624
+ df.dropna(axis=1, how="all", inplace=True)
625
+ df.dropna(axis=0, how="all", inplace=True)
626
+ df = df.loc[:, ~df.columns.str.contains("^Unnamed")]
627
+ if hasattr(self, "dropna"):
628
+ df.dropna(subset=self.dropna, how="all", inplace=True)
629
+ if hasattr(self, "trim"):
630
+ # cols = list(df.columns)
631
+ cols = df.select_dtypes(include=["object", "string"])
632
+ # def utrim(x): return x.strip() if isinstance(x, str) else x
633
+ # u.applymap(utrim)
634
+ for col in cols:
635
+ df[col] = df[col].astype(str).str.strip()
636
+ # define the primary keys for DataFrame
637
+ if hasattr(self, "pk"):
638
+ try:
639
+ columns = self.pk["columns"]
640
+ del self.pk["columns"]
641
+ df.reset_index().set_index(columns, inplace=True, drop=False, **self.pk)
642
+ except Exception as err:
643
+ self._logger.error(f"OpenWith: Error setting index: {err}")
644
+ if self.clean_nat is True:
645
+ df.replace({pandas.NaT: None}, inplace=True)
646
+ if self._colinfo:
647
+ # fix the datatype for every column in dataframe (if needed)
648
+ for column, dtype in self._colinfo.items():
649
+ # print(column, '->', dtype, '->', df[column].iloc[0])
650
+ try:
651
+ if (
652
+ dtype == "timestamp without time zone"
653
+ or dtype == "timestamp with time zone"
654
+ or dtype == "date"
655
+ ):
656
+ if df[column].dtype != "datetime64[ns]":
657
+ df[column] = pandas.to_datetime(df[column], errors="coerce")
658
+ df[column] = df[column].astype("datetime64[ns]")
659
+ elif (
660
+ dtype == "character varying"
661
+ or dtype == "character"
662
+ or dtype == "text"
663
+ or dtype == "varchar"
664
+ ):
665
+ # print(column, '->', dtype, '->', df[column].iloc[0])
666
+ df[column] = df[column].replace([np.nan], "", regex=True)
667
+ # df[column] = df[column].astype(str)
668
+ # df[column].fillna("", inplace=True)
669
+ df[column] = df[column].fillna("")
670
+ # df[column].astype(str, inplace=True, errors='coerce')
671
+ df[column] = df[column].astype("string", errors="raise")
672
+ # df[column].fillna(None, inplace=True)
673
+ elif dtype == "smallint":
674
+ df[column] = pandas.to_numeric(df[column], errors="coerce")
675
+ df[column] = df[column].fillna("").astype("Int8")
676
+ elif dtype == "integer" or dtype == "bigint":
677
+ try:
678
+ ctype = df[column].dtypes[0].name
679
+ except (TypeError, KeyError):
680
+ ctype = df[column].dtype
681
+ if ctype not in ("Int8", "Int32", "Int64"):
682
+ df[column] = pandas.to_numeric(df[column], errors="raise")
683
+ df[column] = df[column].astype("Int64", errors="raise")
684
+ else:
685
+ df[column] = df[column].astype("Int64", errors="raise")
686
+ elif dtype == "numeric" or dtype == "float":
687
+ df[column] = pandas.to_numeric(df[column], errors="coerce")
688
+ df[column] = df[column].astype("float64")
689
+ elif dtype == "double precision" or dtype == "real":
690
+ df[column] = pandas.to_numeric(df[column], errors="coerce")
691
+ df[column] = df[column].astype("float64")
692
+ elif dtype == "jsonb":
693
+ df[column] = df[column].apply(orjson.loads)
694
+ elif dtype == "object":
695
+ df[column] = df[column].replace([np.nan], "", regex=True)
696
+ except Exception as err:
697
+ print("ERR ::", column, dtype, err, type(err))
698
+ self._logger.warning(
699
+ f"Cannot set data type for column {column}: {err}"
700
+ )
701
+ continue
702
+ self._result = df
703
+ numrows = len(df.index)
704
+ self._variables["_numRows_"] = numrows
705
+ self._variables[f"{self.StepName}_NUMROWS"] = numrows
706
+ self.add_metric("NUMROWS", numrows)
707
+ self.add_metric("OPENED_FILES", self._filenames)
708
+ if self._debug is True:
709
+ print(df)
710
+ print("::: Printing Column Information === ")
711
+ columns = list(df.columns)
712
+ for column, t in df.dtypes.items():
713
+ print(column, "->", t, "->", df[column].iloc[0])
714
+ self._logger.debug(f"Opened File(s) with Pandas {self._filenames}")
715
+ return self._result