flowtask 5.8.4__cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowtask/__init__.py +93 -0
- flowtask/__main__.py +38 -0
- flowtask/bots/__init__.py +6 -0
- flowtask/bots/check.py +93 -0
- flowtask/bots/codebot.py +51 -0
- flowtask/components/ASPX.py +148 -0
- flowtask/components/AddDataset.py +352 -0
- flowtask/components/Amazon.py +523 -0
- flowtask/components/AutoTask.py +314 -0
- flowtask/components/Azure.py +80 -0
- flowtask/components/AzureUsers.py +106 -0
- flowtask/components/BaseAction.py +91 -0
- flowtask/components/BaseLoop.py +198 -0
- flowtask/components/BestBuy.py +800 -0
- flowtask/components/CSVToGCS.py +120 -0
- flowtask/components/CompanyScraper/__init__.py +1 -0
- flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
- flowtask/components/CompanyScraper/parsers/base.py +102 -0
- flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
- flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
- flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
- flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
- flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
- flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
- flowtask/components/CompanyScraper/scrapper.py +1054 -0
- flowtask/components/CopyTo.py +177 -0
- flowtask/components/CopyToBigQuery.py +243 -0
- flowtask/components/CopyToMongoDB.py +291 -0
- flowtask/components/CopyToPg.py +609 -0
- flowtask/components/CopyToRethink.py +207 -0
- flowtask/components/CreateGCSBucket.py +102 -0
- flowtask/components/CreateReport/CreateReport.py +228 -0
- flowtask/components/CreateReport/__init__.py +9 -0
- flowtask/components/CreateReport/charts/__init__.py +15 -0
- flowtask/components/CreateReport/charts/bar.py +51 -0
- flowtask/components/CreateReport/charts/base.py +66 -0
- flowtask/components/CreateReport/charts/pie.py +64 -0
- flowtask/components/CreateReport/utils.py +9 -0
- flowtask/components/CustomerSatisfaction.py +196 -0
- flowtask/components/DataInput.py +200 -0
- flowtask/components/DateList.py +255 -0
- flowtask/components/DbClient.py +163 -0
- flowtask/components/DialPad.py +146 -0
- flowtask/components/DocumentDBQuery.py +200 -0
- flowtask/components/DownloadFrom.py +371 -0
- flowtask/components/DownloadFromD2L.py +113 -0
- flowtask/components/DownloadFromFTP.py +181 -0
- flowtask/components/DownloadFromIMAP.py +315 -0
- flowtask/components/DownloadFromS3.py +198 -0
- flowtask/components/DownloadFromSFTP.py +265 -0
- flowtask/components/DownloadFromSharepoint.py +110 -0
- flowtask/components/DownloadFromSmartSheet.py +114 -0
- flowtask/components/DownloadS3File.py +229 -0
- flowtask/components/Dummy.py +59 -0
- flowtask/components/DuplicatePhoto.py +411 -0
- flowtask/components/EmployeeEvaluation.py +237 -0
- flowtask/components/ExecuteSQL.py +323 -0
- flowtask/components/ExtractHTML.py +178 -0
- flowtask/components/FileBase.py +178 -0
- flowtask/components/FileCopy.py +181 -0
- flowtask/components/FileDelete.py +82 -0
- flowtask/components/FileExists.py +146 -0
- flowtask/components/FileIteratorDelete.py +112 -0
- flowtask/components/FileList.py +194 -0
- flowtask/components/FileOpen.py +75 -0
- flowtask/components/FileRead.py +120 -0
- flowtask/components/FileRename.py +106 -0
- flowtask/components/FilterIf.py +284 -0
- flowtask/components/FilterRows/FilterRows.py +200 -0
- flowtask/components/FilterRows/__init__.py +10 -0
- flowtask/components/FilterRows/functions.py +4 -0
- flowtask/components/GCSToBigQuery.py +103 -0
- flowtask/components/GoogleA4.py +150 -0
- flowtask/components/GoogleGeoCoding.py +344 -0
- flowtask/components/GooglePlaces.py +315 -0
- flowtask/components/GoogleSearch.py +539 -0
- flowtask/components/HTTPClient.py +268 -0
- flowtask/components/ICIMS.py +146 -0
- flowtask/components/IF.py +179 -0
- flowtask/components/IcimsFolderCopy.py +173 -0
- flowtask/components/ImageFeatures/__init__.py +5 -0
- flowtask/components/ImageFeatures/process.py +233 -0
- flowtask/components/IteratorBase.py +251 -0
- flowtask/components/LangchainLoader/__init__.py +5 -0
- flowtask/components/LangchainLoader/loader.py +194 -0
- flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
- flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
- flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
- flowtask/components/LangchainLoader/loaders/docx.py +91 -0
- flowtask/components/LangchainLoader/loaders/html.py +119 -0
- flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
- flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
- flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
- flowtask/components/LangchainLoader/loaders/qa.py +67 -0
- flowtask/components/LangchainLoader/loaders/txt.py +55 -0
- flowtask/components/LeadIQ.py +650 -0
- flowtask/components/Loop.py +253 -0
- flowtask/components/Lowes.py +334 -0
- flowtask/components/MS365Usage.py +156 -0
- flowtask/components/MSTeamsMessages.py +320 -0
- flowtask/components/MarketClustering.py +1051 -0
- flowtask/components/MergeFiles.py +362 -0
- flowtask/components/MilvusOutput.py +87 -0
- flowtask/components/NearByStores.py +175 -0
- flowtask/components/NetworkNinja/__init__.py +6 -0
- flowtask/components/NetworkNinja/models/__init__.py +52 -0
- flowtask/components/NetworkNinja/models/abstract.py +177 -0
- flowtask/components/NetworkNinja/models/account.py +39 -0
- flowtask/components/NetworkNinja/models/client.py +19 -0
- flowtask/components/NetworkNinja/models/district.py +14 -0
- flowtask/components/NetworkNinja/models/events.py +101 -0
- flowtask/components/NetworkNinja/models/forms.py +499 -0
- flowtask/components/NetworkNinja/models/market.py +16 -0
- flowtask/components/NetworkNinja/models/organization.py +34 -0
- flowtask/components/NetworkNinja/models/photos.py +125 -0
- flowtask/components/NetworkNinja/models/project.py +44 -0
- flowtask/components/NetworkNinja/models/region.py +28 -0
- flowtask/components/NetworkNinja/models/store.py +203 -0
- flowtask/components/NetworkNinja/models/user.py +151 -0
- flowtask/components/NetworkNinja/router.py +854 -0
- flowtask/components/Odoo.py +175 -0
- flowtask/components/OdooInjector.py +192 -0
- flowtask/components/OpenFromXML.py +126 -0
- flowtask/components/OpenWeather.py +41 -0
- flowtask/components/OpenWithBase.py +616 -0
- flowtask/components/OpenWithPandas.py +715 -0
- flowtask/components/PGPDecrypt.py +199 -0
- flowtask/components/PandasIterator.py +187 -0
- flowtask/components/PandasToFile.py +189 -0
- flowtask/components/Paradox.py +339 -0
- flowtask/components/ParamIterator.py +117 -0
- flowtask/components/ParseHTML.py +84 -0
- flowtask/components/PlacerStores.py +249 -0
- flowtask/components/Pokemon.py +507 -0
- flowtask/components/PositiveBot.py +62 -0
- flowtask/components/PowerPointSlide.py +400 -0
- flowtask/components/PrintMessage.py +127 -0
- flowtask/components/ProductCompetitors/__init__.py +5 -0
- flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
- flowtask/components/ProductCompetitors/parsers/base.py +72 -0
- flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
- flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
- flowtask/components/ProductCompetitors/scrapper.py +155 -0
- flowtask/components/ProductCompliant.py +169 -0
- flowtask/components/ProductInfo/__init__.py +1 -0
- flowtask/components/ProductInfo/parsers/__init__.py +5 -0
- flowtask/components/ProductInfo/parsers/base.py +83 -0
- flowtask/components/ProductInfo/parsers/brother.py +97 -0
- flowtask/components/ProductInfo/parsers/canon.py +167 -0
- flowtask/components/ProductInfo/parsers/epson.py +118 -0
- flowtask/components/ProductInfo/parsers/hp.py +131 -0
- flowtask/components/ProductInfo/parsers/samsung.py +97 -0
- flowtask/components/ProductInfo/scraper.py +319 -0
- flowtask/components/ProductPricing.py +118 -0
- flowtask/components/QS.py +261 -0
- flowtask/components/QSBase.py +201 -0
- flowtask/components/QueryIterator.py +273 -0
- flowtask/components/QueryToInsert.py +327 -0
- flowtask/components/QueryToPandas.py +432 -0
- flowtask/components/RESTClient.py +195 -0
- flowtask/components/RethinkDBQuery.py +189 -0
- flowtask/components/Rsync.py +74 -0
- flowtask/components/RunSSH.py +59 -0
- flowtask/components/RunShell.py +71 -0
- flowtask/components/SalesForce.py +20 -0
- flowtask/components/SaveImageBank/__init__.py +257 -0
- flowtask/components/SchedulingVisits.py +592 -0
- flowtask/components/ScrapPage.py +216 -0
- flowtask/components/ScrapSearch.py +79 -0
- flowtask/components/SendNotify.py +257 -0
- flowtask/components/SentimentAnalysis.py +694 -0
- flowtask/components/ServiceScrapper/__init__.py +5 -0
- flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
- flowtask/components/ServiceScrapper/parsers/base.py +94 -0
- flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
- flowtask/components/ServiceScrapper/scrapper.py +199 -0
- flowtask/components/SetVariables.py +156 -0
- flowtask/components/SubTask.py +182 -0
- flowtask/components/SuiteCRM.py +48 -0
- flowtask/components/Switch.py +175 -0
- flowtask/components/TableBase.py +148 -0
- flowtask/components/TableDelete.py +312 -0
- flowtask/components/TableInput.py +143 -0
- flowtask/components/TableOutput/TableOutput.py +384 -0
- flowtask/components/TableOutput/__init__.py +3 -0
- flowtask/components/TableSchema.py +534 -0
- flowtask/components/Target.py +223 -0
- flowtask/components/ThumbnailGenerator.py +156 -0
- flowtask/components/ToPandas.py +67 -0
- flowtask/components/TransformRows/TransformRows.py +507 -0
- flowtask/components/TransformRows/__init__.py +9 -0
- flowtask/components/TransformRows/functions.py +559 -0
- flowtask/components/TransposeRows.py +176 -0
- flowtask/components/UPCDatabase.py +86 -0
- flowtask/components/UnGzip.py +171 -0
- flowtask/components/Uncompress.py +172 -0
- flowtask/components/UniqueRows.py +126 -0
- flowtask/components/Unzip.py +107 -0
- flowtask/components/UpdateOperationalVars.py +147 -0
- flowtask/components/UploadTo.py +299 -0
- flowtask/components/UploadToS3.py +136 -0
- flowtask/components/UploadToSFTP.py +160 -0
- flowtask/components/UploadToSharepoint.py +205 -0
- flowtask/components/UserFunc.py +122 -0
- flowtask/components/VivaTracker.py +140 -0
- flowtask/components/WSDLClient.py +123 -0
- flowtask/components/Wait.py +18 -0
- flowtask/components/Walmart.py +199 -0
- flowtask/components/Workplace.py +134 -0
- flowtask/components/XMLToPandas.py +267 -0
- flowtask/components/Zammad/__init__.py +41 -0
- flowtask/components/Zammad/models.py +0 -0
- flowtask/components/ZoomInfoScraper.py +409 -0
- flowtask/components/__init__.py +104 -0
- flowtask/components/abstract.py +18 -0
- flowtask/components/flow.py +530 -0
- flowtask/components/google.py +335 -0
- flowtask/components/group.py +221 -0
- flowtask/components/py.typed +0 -0
- flowtask/components/reviewscrap.py +132 -0
- flowtask/components/tAutoincrement.py +117 -0
- flowtask/components/tConcat.py +109 -0
- flowtask/components/tExplode.py +119 -0
- flowtask/components/tFilter.py +184 -0
- flowtask/components/tGroup.py +236 -0
- flowtask/components/tJoin.py +270 -0
- flowtask/components/tMap/__init__.py +9 -0
- flowtask/components/tMap/functions.py +54 -0
- flowtask/components/tMap/tMap.py +450 -0
- flowtask/components/tMelt.py +112 -0
- flowtask/components/tMerge.py +114 -0
- flowtask/components/tOrder.py +93 -0
- flowtask/components/tPandas.py +94 -0
- flowtask/components/tPivot.py +71 -0
- flowtask/components/tPluckCols.py +76 -0
- flowtask/components/tUnnest.py +82 -0
- flowtask/components/user.py +401 -0
- flowtask/conf.py +457 -0
- flowtask/download.py +102 -0
- flowtask/events/__init__.py +11 -0
- flowtask/events/events/__init__.py +20 -0
- flowtask/events/events/abstract.py +95 -0
- flowtask/events/events/alerts/__init__.py +362 -0
- flowtask/events/events/alerts/colfunctions.py +131 -0
- flowtask/events/events/alerts/functions.py +158 -0
- flowtask/events/events/dummy.py +12 -0
- flowtask/events/events/exec.py +124 -0
- flowtask/events/events/file/__init__.py +7 -0
- flowtask/events/events/file/base.py +51 -0
- flowtask/events/events/file/copy.py +23 -0
- flowtask/events/events/file/delete.py +16 -0
- flowtask/events/events/interfaces/__init__.py +9 -0
- flowtask/events/events/interfaces/client.py +67 -0
- flowtask/events/events/interfaces/credentials.py +28 -0
- flowtask/events/events/interfaces/notifications.py +58 -0
- flowtask/events/events/jira.py +122 -0
- flowtask/events/events/log.py +26 -0
- flowtask/events/events/logerr.py +52 -0
- flowtask/events/events/notify.py +59 -0
- flowtask/events/events/notify_event.py +160 -0
- flowtask/events/events/publish.py +54 -0
- flowtask/events/events/sendfile.py +104 -0
- flowtask/events/events/task.py +97 -0
- flowtask/events/events/teams.py +98 -0
- flowtask/events/events/webhook.py +58 -0
- flowtask/events/manager.py +287 -0
- flowtask/exceptions.c +39393 -0
- flowtask/exceptions.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/extensions/__init__.py +3 -0
- flowtask/extensions/abstract.py +82 -0
- flowtask/extensions/logging/__init__.py +65 -0
- flowtask/hooks/__init__.py +9 -0
- flowtask/hooks/actions/__init__.py +22 -0
- flowtask/hooks/actions/abstract.py +66 -0
- flowtask/hooks/actions/dummy.py +23 -0
- flowtask/hooks/actions/jira.py +74 -0
- flowtask/hooks/actions/rest.py +320 -0
- flowtask/hooks/actions/sampledata.py +37 -0
- flowtask/hooks/actions/sensor.py +23 -0
- flowtask/hooks/actions/task.py +9 -0
- flowtask/hooks/actions/ticket.py +37 -0
- flowtask/hooks/actions/zammad.py +55 -0
- flowtask/hooks/hook.py +62 -0
- flowtask/hooks/models.py +17 -0
- flowtask/hooks/service.py +187 -0
- flowtask/hooks/step.py +91 -0
- flowtask/hooks/types/__init__.py +23 -0
- flowtask/hooks/types/base.py +129 -0
- flowtask/hooks/types/brokers/__init__.py +11 -0
- flowtask/hooks/types/brokers/base.py +54 -0
- flowtask/hooks/types/brokers/mqtt.py +35 -0
- flowtask/hooks/types/brokers/rabbitmq.py +82 -0
- flowtask/hooks/types/brokers/redis.py +83 -0
- flowtask/hooks/types/brokers/sqs.py +44 -0
- flowtask/hooks/types/fs.py +232 -0
- flowtask/hooks/types/http.py +49 -0
- flowtask/hooks/types/imap.py +200 -0
- flowtask/hooks/types/jira.py +279 -0
- flowtask/hooks/types/mail.py +205 -0
- flowtask/hooks/types/postgres.py +98 -0
- flowtask/hooks/types/responses/__init__.py +8 -0
- flowtask/hooks/types/responses/base.py +5 -0
- flowtask/hooks/types/sharepoint.py +288 -0
- flowtask/hooks/types/ssh.py +141 -0
- flowtask/hooks/types/tagged.py +59 -0
- flowtask/hooks/types/upload.py +85 -0
- flowtask/hooks/types/watch.py +71 -0
- flowtask/hooks/types/web.py +36 -0
- flowtask/interfaces/AzureClient.py +137 -0
- flowtask/interfaces/AzureGraph.py +839 -0
- flowtask/interfaces/Boto3Client.py +326 -0
- flowtask/interfaces/DropboxClient.py +173 -0
- flowtask/interfaces/ExcelHandler.py +94 -0
- flowtask/interfaces/FTPClient.py +131 -0
- flowtask/interfaces/GoogleCalendar.py +201 -0
- flowtask/interfaces/GoogleClient.py +133 -0
- flowtask/interfaces/GoogleDrive.py +127 -0
- flowtask/interfaces/GoogleGCS.py +89 -0
- flowtask/interfaces/GoogleGeocoding.py +93 -0
- flowtask/interfaces/GoogleLang.py +114 -0
- flowtask/interfaces/GooglePub.py +61 -0
- flowtask/interfaces/GoogleSheet.py +68 -0
- flowtask/interfaces/IMAPClient.py +137 -0
- flowtask/interfaces/O365Calendar.py +113 -0
- flowtask/interfaces/O365Client.py +220 -0
- flowtask/interfaces/OneDrive.py +284 -0
- flowtask/interfaces/Outlook.py +155 -0
- flowtask/interfaces/ParrotBot.py +130 -0
- flowtask/interfaces/SSHClient.py +378 -0
- flowtask/interfaces/Sharepoint.py +496 -0
- flowtask/interfaces/__init__.py +36 -0
- flowtask/interfaces/azureauth.py +119 -0
- flowtask/interfaces/cache.py +201 -0
- flowtask/interfaces/client.py +82 -0
- flowtask/interfaces/compress.py +525 -0
- flowtask/interfaces/credentials.py +124 -0
- flowtask/interfaces/d2l.py +239 -0
- flowtask/interfaces/databases/__init__.py +5 -0
- flowtask/interfaces/databases/db.py +223 -0
- flowtask/interfaces/databases/documentdb.py +55 -0
- flowtask/interfaces/databases/rethink.py +39 -0
- flowtask/interfaces/dataframes/__init__.py +11 -0
- flowtask/interfaces/dataframes/abstract.py +21 -0
- flowtask/interfaces/dataframes/arrow.py +71 -0
- flowtask/interfaces/dataframes/dt.py +69 -0
- flowtask/interfaces/dataframes/pandas.py +167 -0
- flowtask/interfaces/dataframes/polars.py +60 -0
- flowtask/interfaces/db.py +263 -0
- flowtask/interfaces/env.py +46 -0
- flowtask/interfaces/func.py +137 -0
- flowtask/interfaces/http.py +1780 -0
- flowtask/interfaces/locale.py +40 -0
- flowtask/interfaces/log.py +75 -0
- flowtask/interfaces/mask.py +143 -0
- flowtask/interfaces/notification.py +154 -0
- flowtask/interfaces/playwright.py +339 -0
- flowtask/interfaces/powerpoint.py +368 -0
- flowtask/interfaces/py.typed +0 -0
- flowtask/interfaces/qs.py +376 -0
- flowtask/interfaces/result.py +87 -0
- flowtask/interfaces/selenium_service.py +779 -0
- flowtask/interfaces/smartsheet.py +154 -0
- flowtask/interfaces/stat.py +39 -0
- flowtask/interfaces/task.py +96 -0
- flowtask/interfaces/template.py +118 -0
- flowtask/interfaces/vectorstores/__init__.py +1 -0
- flowtask/interfaces/vectorstores/abstract.py +133 -0
- flowtask/interfaces/vectorstores/milvus.py +669 -0
- flowtask/interfaces/zammad.py +107 -0
- flowtask/models.py +193 -0
- flowtask/parsers/__init__.py +15 -0
- flowtask/parsers/_yaml.c +11978 -0
- flowtask/parsers/_yaml.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/argparser.py +235 -0
- flowtask/parsers/base.c +15155 -0
- flowtask/parsers/base.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/json.c +11968 -0
- flowtask/parsers/json.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/maps.py +49 -0
- flowtask/parsers/toml.c +11968 -0
- flowtask/parsers/toml.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/plugins/__init__.py +16 -0
- flowtask/plugins/components/__init__.py +0 -0
- flowtask/plugins/handler/__init__.py +45 -0
- flowtask/plugins/importer.py +31 -0
- flowtask/plugins/sources/__init__.py +0 -0
- flowtask/runner.py +283 -0
- flowtask/scheduler/__init__.py +9 -0
- flowtask/scheduler/functions.py +493 -0
- flowtask/scheduler/handlers/__init__.py +8 -0
- flowtask/scheduler/handlers/manager.py +504 -0
- flowtask/scheduler/handlers/models.py +58 -0
- flowtask/scheduler/handlers/service.py +72 -0
- flowtask/scheduler/notifications.py +65 -0
- flowtask/scheduler/scheduler.py +993 -0
- flowtask/services/__init__.py +0 -0
- flowtask/services/bots/__init__.py +0 -0
- flowtask/services/bots/telegram.py +264 -0
- flowtask/services/files/__init__.py +11 -0
- flowtask/services/files/manager.py +522 -0
- flowtask/services/files/model.py +37 -0
- flowtask/services/files/service.py +767 -0
- flowtask/services/jira/__init__.py +3 -0
- flowtask/services/jira/jira_actions.py +191 -0
- flowtask/services/tasks/__init__.py +13 -0
- flowtask/services/tasks/launcher.py +213 -0
- flowtask/services/tasks/manager.py +323 -0
- flowtask/services/tasks/service.py +275 -0
- flowtask/services/tasks/task_manager.py +376 -0
- flowtask/services/tasks/tasks.py +155 -0
- flowtask/storages/__init__.py +16 -0
- flowtask/storages/exceptions.py +12 -0
- flowtask/storages/files/__init__.py +8 -0
- flowtask/storages/files/abstract.py +29 -0
- flowtask/storages/files/filesystem.py +66 -0
- flowtask/storages/tasks/__init__.py +19 -0
- flowtask/storages/tasks/abstract.py +26 -0
- flowtask/storages/tasks/database.py +33 -0
- flowtask/storages/tasks/filesystem.py +108 -0
- flowtask/storages/tasks/github.py +119 -0
- flowtask/storages/tasks/memory.py +45 -0
- flowtask/storages/tasks/row.py +25 -0
- flowtask/tasks/__init__.py +0 -0
- flowtask/tasks/abstract.py +526 -0
- flowtask/tasks/command.py +118 -0
- flowtask/tasks/pile.py +486 -0
- flowtask/tasks/py.typed +0 -0
- flowtask/tasks/task.py +778 -0
- flowtask/template/__init__.py +161 -0
- flowtask/tests.py +257 -0
- flowtask/types/__init__.py +8 -0
- flowtask/types/typedefs.c +11347 -0
- flowtask/types/typedefs.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/__init__.py +24 -0
- flowtask/utils/constants.py +117 -0
- flowtask/utils/encoders.py +21 -0
- flowtask/utils/executor.py +112 -0
- flowtask/utils/functions.cpp +14280 -0
- flowtask/utils/functions.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/json.cpp +13349 -0
- flowtask/utils/json.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/mail.py +63 -0
- flowtask/utils/parseqs.c +13324 -0
- flowtask/utils/parserqs.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/stats.py +308 -0
- flowtask/utils/transformations.py +74 -0
- flowtask/utils/uv.py +12 -0
- flowtask/utils/validators.py +97 -0
- flowtask/version.py +11 -0
- flowtask-5.8.4.dist-info/LICENSE +201 -0
- flowtask-5.8.4.dist-info/METADATA +209 -0
- flowtask-5.8.4.dist-info/RECORD +470 -0
- flowtask-5.8.4.dist-info/WHEEL +6 -0
- flowtask-5.8.4.dist-info/entry_points.txt +3 -0
- flowtask-5.8.4.dist-info/top_level.txt +2 -0
- plugins/components/CreateQR.py +39 -0
- plugins/components/TestComponent.py +28 -0
- plugins/components/Use1.py +13 -0
- plugins/components/Workplace.py +117 -0
- plugins/components/__init__.py +3 -0
- plugins/sources/__init__.py +0 -0
- plugins/sources/get_populartimes.py +78 -0
- plugins/sources/google.py +150 -0
- plugins/sources/hubspot.py +679 -0
- plugins/sources/icims.py +679 -0
- plugins/sources/mobileinsight.py +501 -0
- plugins/sources/newrelic.py +262 -0
- plugins/sources/uap.py +268 -0
- plugins/sources/venu.py +244 -0
- plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,559 @@
|
|
1
|
+
"""
|
2
|
+
Functions.
|
3
|
+
|
4
|
+
Tree of TransformRows functions.
|
5
|
+
|
6
|
+
"""
|
7
|
+
from typing import List, Optional
|
8
|
+
import re
|
9
|
+
import requests
|
10
|
+
import numpy as np
|
11
|
+
from numba import njit
|
12
|
+
from datetime import datetime
|
13
|
+
import pytz
|
14
|
+
from zoneinfo import ZoneInfo
|
15
|
+
from dateutil import parser
|
16
|
+
import pandas
|
17
|
+
from ...conf import BARCODELOOKUP_API_KEY
|
18
|
+
from ...utils.executor import getFunction
|
19
|
+
|
20
|
+
|
21
|
+
def apply_function(
|
22
|
+
df: pandas.DataFrame,
|
23
|
+
field: str,
|
24
|
+
fname: str,
|
25
|
+
column: Optional[str] = None,
|
26
|
+
**kwargs
|
27
|
+
) -> pandas.DataFrame:
|
28
|
+
"""
|
29
|
+
Apply any scalar function to a column in the DataFrame.
|
30
|
+
|
31
|
+
Parameters:
|
32
|
+
- df: pandas DataFrame
|
33
|
+
- field: The column where the result will be stored.
|
34
|
+
- fname: The name of the function to apply.
|
35
|
+
- column: The column to which the function is applied (if None, apply to `field` column).
|
36
|
+
- **kwargs: Additional arguments to pass to the function.
|
37
|
+
"""
|
38
|
+
|
39
|
+
# Retrieve the scalar function using getFunc
|
40
|
+
try:
|
41
|
+
func = getFunction(fname)
|
42
|
+
except Exception:
|
43
|
+
raise
|
44
|
+
|
45
|
+
# If a different column is specified, apply the function to it,
|
46
|
+
# but save result in `field`
|
47
|
+
try:
|
48
|
+
if column is not None:
|
49
|
+
df[field] = df[column].apply(lambda x: func(x, **kwargs))
|
50
|
+
else:
|
51
|
+
if field not in df.columns:
|
52
|
+
# column doesn't exist
|
53
|
+
df[field] = None
|
54
|
+
# Apply the function to the field itself
|
55
|
+
df[field] = df[field].apply(lambda x: func(x, **kwargs))
|
56
|
+
except Exception as err:
|
57
|
+
print(
|
58
|
+
f"Error in apply_function for field {field}:", err
|
59
|
+
)
|
60
|
+
return df
|
61
|
+
|
62
|
+
|
63
|
+
def get_product(row, field, columns):
|
64
|
+
"""
|
65
|
+
Retrieves product information from the Barcode Lookup API based on a barcode.
|
66
|
+
|
67
|
+
:param row: The DataFrame row containing the barcode.
|
68
|
+
:param field: The name of the field containing the barcode.
|
69
|
+
:param columns: The list of columns to extract from the API response.
|
70
|
+
:return: The DataFrame row with the product information.
|
71
|
+
"""
|
72
|
+
|
73
|
+
barcode = row[field]
|
74
|
+
url = f'https://api.barcodelookup.com/v3/products?barcode={barcode}&key={BARCODELOOKUP_API_KEY}'
|
75
|
+
response = requests.get(url)
|
76
|
+
result = response.json()['products'][0]
|
77
|
+
for col in columns:
|
78
|
+
try:
|
79
|
+
row[col] = result[col]
|
80
|
+
except KeyError:
|
81
|
+
row[col] = None
|
82
|
+
return row
|
83
|
+
|
84
|
+
|
85
|
+
def upc_to_product(
|
86
|
+
df: pandas.DataFrame,
|
87
|
+
field: str,
|
88
|
+
columns: list = ['barcode_formats', 'mpn', 'asin', 'title', 'category', 'model', 'brand']
|
89
|
+
) -> pandas.DataFrame:
|
90
|
+
"""
|
91
|
+
Converts UPC codes in a DataFrame to product information using the Barcode Lookup API.
|
92
|
+
|
93
|
+
:param df: The DataFrame containing the UPC codes.
|
94
|
+
:param field: The name of the field containing the UPC codes.
|
95
|
+
:param columns: The list of columns to extract from the API response.
|
96
|
+
:return: The DataFrame with the product information.
|
97
|
+
"""
|
98
|
+
try:
|
99
|
+
df = df.apply(lambda x: get_product(x, field, columns), axis=1)
|
100
|
+
return df
|
101
|
+
except Exception as err:
|
102
|
+
print(f"Error on upc_to_product {field}:", err)
|
103
|
+
return df
|
104
|
+
|
105
|
+
def day_of_week(
|
106
|
+
df: pandas.DataFrame,
|
107
|
+
field: str,
|
108
|
+
column: str,
|
109
|
+
locale: str = 'en_US.utf8'
|
110
|
+
) -> pandas.DataFrame:
|
111
|
+
"""
|
112
|
+
Extracts the day of the week from a date column.
|
113
|
+
|
114
|
+
:param df: The DataFrame containing the date column.
|
115
|
+
:param field: The name of the field to store the day of the week.
|
116
|
+
:param column: The name of the date column.
|
117
|
+
:return: The DataFrame with the day of the week.
|
118
|
+
"""
|
119
|
+
try:
|
120
|
+
df[field] = df[column].dt.day_name(locale=locale)
|
121
|
+
return df
|
122
|
+
except Exception as err:
|
123
|
+
print(f"Error on day_of_week {field}:", err)
|
124
|
+
return df
|
125
|
+
|
126
|
+
def duration(
|
127
|
+
df: pandas.DataFrame,
|
128
|
+
field: str,
|
129
|
+
columns: List[str],
|
130
|
+
unit: str = 's'
|
131
|
+
) -> pandas.DataFrame:
|
132
|
+
"""
|
133
|
+
Converts a duration column to a specified unit.
|
134
|
+
|
135
|
+
:param df: The DataFrame containing the duration column.
|
136
|
+
:param field: The name of the field to store the converted duration.
|
137
|
+
:param column: The name of the duration column.
|
138
|
+
:param unit: The unit to convert the duration to.
|
139
|
+
:return: The DataFrame with the converted duration.
|
140
|
+
"""
|
141
|
+
try:
|
142
|
+
if unit == 's':
|
143
|
+
_unit = 1.0
|
144
|
+
if unit == 'm':
|
145
|
+
_unit = 60.0
|
146
|
+
elif unit == 'h':
|
147
|
+
_unit = 3600.0
|
148
|
+
elif unit == 'd':
|
149
|
+
_unit = 86400.0
|
150
|
+
# Calculate duration in minutes as float
|
151
|
+
df[field] = (
|
152
|
+
(df[columns[1]] - df[columns[0]]).dt.total_seconds() / _unit
|
153
|
+
)
|
154
|
+
return df
|
155
|
+
except Exception as err:
|
156
|
+
print(f"Error on duration {field}:", err)
|
157
|
+
return df
|
158
|
+
|
159
|
+
|
160
|
+
def get_moment(
|
161
|
+
df: pandas.DataFrame,
|
162
|
+
field: str,
|
163
|
+
column: str,
|
164
|
+
moments: List[tuple] = None,
|
165
|
+
) -> pandas.DataFrame:
|
166
|
+
"""
|
167
|
+
df: pandas DataFrame
|
168
|
+
column: name of the column to compare (e.g. "updated_hour")
|
169
|
+
ranges: list of tuples [(label, (start, end)), ...]
|
170
|
+
e.g. [("night",(0,7)), ("morning",(7,10)), ...]
|
171
|
+
returns: a Series of labels corresponding to each row
|
172
|
+
"""
|
173
|
+
if not moments:
|
174
|
+
moments = [
|
175
|
+
("night", (0, 7)), # >= 0 and < 7
|
176
|
+
("morning", (7, 10)), # >= 7 and < 10
|
177
|
+
("afternoon", (10, 16)), # >= 10 and < 16
|
178
|
+
("evening", (16, 20)), # >= 16 and < 20
|
179
|
+
("night", (20, 24)), # >= 20 and < 24 (or use float("inf") for open-ended)
|
180
|
+
]
|
181
|
+
conditions = [
|
182
|
+
(df[column] >= start) & (df[column] < end)
|
183
|
+
for _, (start, end) in moments
|
184
|
+
]
|
185
|
+
df[field] = np.select(conditions, [label for label, _ in moments], default=None)
|
186
|
+
return df
|
187
|
+
|
188
|
+
|
189
|
+
def fully_geoloc(
|
190
|
+
df: pandas.DataFrame,
|
191
|
+
field: str,
|
192
|
+
columns: List[tuple],
|
193
|
+
inverse: bool = False
|
194
|
+
) -> pandas.DataFrame:
|
195
|
+
"""
|
196
|
+
Adds a boolean column (named `field`) to `df` that is True when,
|
197
|
+
for each tuple in `columns`, all the involved columns are neither NaN nor empty.
|
198
|
+
|
199
|
+
Parameters:
|
200
|
+
df (pd.DataFrame): The DataFrame.
|
201
|
+
field (str): The name of the output column.
|
202
|
+
columns (list of tuple of str): List of tuples, where each tuple
|
203
|
+
contains column names that must be valid (non-null and non-empty).
|
204
|
+
Example: [("start_lat", "start_long"), ("end_lat", "end_log")]
|
205
|
+
|
206
|
+
Returns:
|
207
|
+
pd.DataFrame: The original DataFrame with the new `field` column.
|
208
|
+
"""
|
209
|
+
# Start with an initial mask that's True for all rows.
|
210
|
+
mask = pandas.Series(True, index=df.index)
|
211
|
+
|
212
|
+
# Loop over each tuple of columns, then each column in the tuple.
|
213
|
+
for col_group in columns:
|
214
|
+
for col in col_group:
|
215
|
+
if inverse:
|
216
|
+
mask &= df[col].isna() | (df[col] == "")
|
217
|
+
else:
|
218
|
+
mask &= df[col].notna() & (df[col] != "")
|
219
|
+
|
220
|
+
df[field] = mask
|
221
|
+
return df
|
222
|
+
|
223
|
+
|
224
|
+
def any_tuple_valid(
|
225
|
+
df: pandas.DataFrame,
|
226
|
+
field: str,
|
227
|
+
columns: List[tuple]
|
228
|
+
) -> pandas.DataFrame:
|
229
|
+
"""
|
230
|
+
Adds a boolean column (named `field`) to `df` that is True when
|
231
|
+
any tuple in `columns` has all of its columns neither NaN nor empty.
|
232
|
+
|
233
|
+
Parameters:
|
234
|
+
df (pd.DataFrame): The DataFrame.
|
235
|
+
field (str): The name of the output column.
|
236
|
+
columns (list of tuple of str): List of tuples, where each tuple
|
237
|
+
contains column names that must be checked.
|
238
|
+
Example: [("start_lat", "start_long"), ("end_lat", "end_log")]
|
239
|
+
|
240
|
+
Returns:
|
241
|
+
pd.DataFrame: The original DataFrame with the new `field` column.
|
242
|
+
"""
|
243
|
+
# Start with an initial mask that's False for all rows
|
244
|
+
result = pandas.Series(False, index=df.index)
|
245
|
+
|
246
|
+
# Loop over each tuple of columns
|
247
|
+
for col_group in columns:
|
248
|
+
# For each group, assume all columns are valid initially
|
249
|
+
group_all_valid = pandas.Series(True, index=df.index)
|
250
|
+
|
251
|
+
# Check that all columns in this group are non-null and non-empty
|
252
|
+
for col in col_group:
|
253
|
+
group_all_valid &= df[col].notna() & (df[col] != "")
|
254
|
+
|
255
|
+
# If all columns in this group are valid, update the result
|
256
|
+
result |= group_all_valid
|
257
|
+
|
258
|
+
df[field] = result
|
259
|
+
return df
|
260
|
+
|
261
|
+
|
262
|
+
@njit
|
263
|
+
def haversine_distance(
|
264
|
+
lat1: float,
|
265
|
+
lon1: float,
|
266
|
+
lat2: float,
|
267
|
+
lon2: float,
|
268
|
+
unit: str = 'km'
|
269
|
+
) -> float:
|
270
|
+
"""Distance between two points on Earth in kilometers."""
|
271
|
+
# Convert decimal degrees to radians
|
272
|
+
lat1, lon1, lat2, lon2 = np.radians(lat1), np.radians(lon1), np.radians(lat2), np.radians(lon2)
|
273
|
+
|
274
|
+
# Haversine formula
|
275
|
+
dlon = lon2 - lon1
|
276
|
+
dlat = lat2 - lat1
|
277
|
+
|
278
|
+
a = np.sin(dlat / 2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2)**2
|
279
|
+
c = 2 * np.arcsin(np.sqrt(a))
|
280
|
+
# Select radius based on unit
|
281
|
+
if unit == 'km':
|
282
|
+
r = 6371.0 # Radius of earth in kilometers
|
283
|
+
elif unit == 'm':
|
284
|
+
r = 6371000.0 # Radius of earth in meters
|
285
|
+
elif unit == 'mi':
|
286
|
+
r = 3956.0 # Radius of earth in miles
|
287
|
+
else:
|
288
|
+
# Numba doesn't support raising exceptions, so default to km
|
289
|
+
r = 6371.0
|
290
|
+
|
291
|
+
return c * r
|
292
|
+
|
293
|
+
def calculate_distance(
|
294
|
+
df: pandas.DataFrame,
|
295
|
+
field: str,
|
296
|
+
columns: List[tuple],
|
297
|
+
unit: str = 'km',
|
298
|
+
chunk_size: int = 1000
|
299
|
+
) -> pandas.DataFrame:
|
300
|
+
"""
|
301
|
+
Add a distance column to a dataframe.
|
302
|
+
|
303
|
+
Args:
|
304
|
+
df: pandas DataFrame with columns 'latitude', 'longitude', 'store_lat', 'store_lng'
|
305
|
+
columns: list of tuples with column names for coordinates
|
306
|
+
- First tuple: [latitude1, longitude1]
|
307
|
+
- Second tuple: [latitude2, longitude2]
|
308
|
+
unit: unit of distance ('km' for kilometers, 'm' for meters, 'mi' for miles)
|
309
|
+
chunk_size: number of rows to process at once for large datasets
|
310
|
+
|
311
|
+
Returns:
|
312
|
+
df with additional 'distance_km' column
|
313
|
+
"""
|
314
|
+
result = df.copy()
|
315
|
+
result[field] = np.nan
|
316
|
+
# Unpack column names
|
317
|
+
(lat1_col, lon1_col), (lat2_col, lon2_col) = columns
|
318
|
+
try:
|
319
|
+
for i in range(0, len(df), chunk_size):
|
320
|
+
chunk = df.iloc[i:i + chunk_size]
|
321
|
+
# Convert to standard NumPy arrays before passing to haversine_distance
|
322
|
+
lat1_values = chunk[lat1_col].to_numpy(dtype=np.float64)
|
323
|
+
lon1_values = chunk[lon1_col].to_numpy(dtype=np.float64)
|
324
|
+
lat2_values = chunk[lat2_col].to_numpy(dtype=np.float64)
|
325
|
+
lon2_values = chunk[lon2_col].to_numpy(dtype=np.float64)
|
326
|
+
result.loc[chunk.index, field] = haversine_distance(
|
327
|
+
lat1_values,
|
328
|
+
lon1_values,
|
329
|
+
lat2_values,
|
330
|
+
lon2_values,
|
331
|
+
unit=unit
|
332
|
+
)
|
333
|
+
except Exception as err:
|
334
|
+
print(f"Error on calculate_distance {field}:", err)
|
335
|
+
return result
|
336
|
+
|
337
|
+
|
338
|
+
def drop_timezone(
|
339
|
+
df: pandas.DataFrame,
|
340
|
+
field: str,
|
341
|
+
column: Optional[str] = None
|
342
|
+
) -> pandas.DataFrame:
|
343
|
+
"""
|
344
|
+
Drop the timezone information from a datetime column.
|
345
|
+
|
346
|
+
Args:
|
347
|
+
df: pandas DataFrame with a datetime column
|
348
|
+
field: name of the datetime column
|
349
|
+
|
350
|
+
Returns:
|
351
|
+
df with timezone-free datetime column
|
352
|
+
"""
|
353
|
+
try:
|
354
|
+
if column is None:
|
355
|
+
column = field
|
356
|
+
|
357
|
+
series = df[column]
|
358
|
+
if pandas.api.types.is_datetime64tz_dtype(series):
|
359
|
+
# This is a regular tz-aware pandas Series
|
360
|
+
df[field] = series.dt.tz_localize(None)
|
361
|
+
return df
|
362
|
+
|
363
|
+
elif series.dtype == 'object':
|
364
|
+
# Object-dtype: apply tz-localize(None) to each element
|
365
|
+
def remove_tz(x):
|
366
|
+
if isinstance(x, (pandas.Timestamp, datetime)) and x.tzinfo is not None:
|
367
|
+
return x.replace(tzinfo=None)
|
368
|
+
return x # leave as-is (could be NaT, None, or already naive)
|
369
|
+
|
370
|
+
df[field] = series.apply(remove_tz).astype('datetime64[ns]')
|
371
|
+
return df
|
372
|
+
|
373
|
+
else:
|
374
|
+
# already naive or not datetime
|
375
|
+
df[field] = series
|
376
|
+
return df
|
377
|
+
except Exception as err:
|
378
|
+
print(f"Error on drop_timezone {field}:", err)
|
379
|
+
return df
|
380
|
+
|
381
|
+
def convert_timezone(
|
382
|
+
df: pandas.DataFrame,
|
383
|
+
field: str,
|
384
|
+
*,
|
385
|
+
column: str | None = None,
|
386
|
+
from_tz: str = "UTC",
|
387
|
+
to_tz: str | None = None,
|
388
|
+
tz_column: str | None = None,
|
389
|
+
default_timezone: str = "UTC",
|
390
|
+
) -> pandas.DataFrame:
|
391
|
+
"""
|
392
|
+
Convert `field` to a target time‑zone.
|
393
|
+
|
394
|
+
Parameters
|
395
|
+
----------
|
396
|
+
df : DataFrame
|
397
|
+
field : name of an existing datetime column
|
398
|
+
column : name of the output column (defaults to `field`)
|
399
|
+
from_tz : timezone used to localise *naive* timestamps
|
400
|
+
to_tz : target timezone (ignored if `tz_column` is given)
|
401
|
+
tz_column : optional column that contains a timezone per row
|
402
|
+
default_tz: fallback when a row's `tz_column` is null/NaN
|
403
|
+
|
404
|
+
Returns:
|
405
|
+
df with converted datetime column
|
406
|
+
"""
|
407
|
+
if column is None:
|
408
|
+
column = field
|
409
|
+
|
410
|
+
try:
|
411
|
+
# --- 1. make a working copy of current column
|
412
|
+
out = df[column].copy()
|
413
|
+
out = pandas.to_datetime(out, errors="coerce") # force datetime dtype
|
414
|
+
|
415
|
+
# --- 2. give tz‑naive stamps a timezone --------------------------------
|
416
|
+
if out.dt.tz is None:
|
417
|
+
out = out.dt.tz_localize(from_tz, ambiguous="infer", nonexistent="raise")
|
418
|
+
|
419
|
+
# --- 3. convert ---------------------------------------------------------
|
420
|
+
if tz_column is None:
|
421
|
+
# same tz for every row
|
422
|
+
target = to_tz or default_timezone
|
423
|
+
out = out.dt.tz_convert(target)
|
424
|
+
else:
|
425
|
+
# using the timezone declared on column:
|
426
|
+
timezones = (
|
427
|
+
df[tz_column]
|
428
|
+
.fillna(default_timezone)
|
429
|
+
.astype("string")
|
430
|
+
)
|
431
|
+
|
432
|
+
# First, convert all timestamps to UTC to have a common base
|
433
|
+
utc_times = out.dt.tz_convert('UTC')
|
434
|
+
|
435
|
+
# Create a list to store the converted datetimes
|
436
|
+
converted_times = []
|
437
|
+
|
438
|
+
# Apply timezone conversion row by row
|
439
|
+
for idx in df.index:
|
440
|
+
try:
|
441
|
+
tz_name = timezones.loc[idx]
|
442
|
+
# Convert the UTC time to the target timezone
|
443
|
+
converted_dt = utc_times.loc[idx].tz_convert(ZoneInfo(tz_name))
|
444
|
+
converted_times.append(converted_dt)
|
445
|
+
except Exception as e:
|
446
|
+
# Handle invalid timezones gracefully
|
447
|
+
converted_dt = utc_times.loc[idx].tz_convert(ZoneInfo(default_timezone))
|
448
|
+
converted_times.append(converted_dt)
|
449
|
+
|
450
|
+
# Create a new Series with the converted values
|
451
|
+
out = pandas.Series(converted_times, index=df.index)
|
452
|
+
|
453
|
+
df[field] = out
|
454
|
+
except Exception as err:
|
455
|
+
print(f"Error on convert_timezone {field}:", err)
|
456
|
+
|
457
|
+
return df
|
458
|
+
|
459
|
+
|
460
|
+
def add_timestamp_to_time(df: pandas.DataFrame, field: str, date: str, time: str):
|
461
|
+
"""
|
462
|
+
Takes a pandas DataFrame and combines the values from a date column and a time column
|
463
|
+
to create a new timestamp column.
|
464
|
+
|
465
|
+
:param df: pandas DataFrame to be modified.
|
466
|
+
:param field: Name of the new column to store the combined timestamp.
|
467
|
+
:param date: Name of the column in the df DataFrame containing date values.
|
468
|
+
:param time: Name of the column in the df DataFrame containing time values.
|
469
|
+
:return: Modified pandas DataFrame with the combined timestamp stored in a new column.
|
470
|
+
"""
|
471
|
+
try:
|
472
|
+
df[field] = pandas.to_datetime(df[date].astype(str) + " " + df[time].astype(str))
|
473
|
+
except Exception as e:
|
474
|
+
print(f"Error adding timestamp to time: {str(e)}")
|
475
|
+
return df
|
476
|
+
return df
|
477
|
+
|
478
|
+
def _convert_string_to_vector(vector_string):
|
479
|
+
"""
|
480
|
+
Converts a string representation of a list into an actual list.
|
481
|
+
|
482
|
+
:param vector_string: The string representation of the list.
|
483
|
+
:return: The converted list.
|
484
|
+
"""
|
485
|
+
try:
|
486
|
+
# Extract the numbers from the string representation
|
487
|
+
numbers = re.findall(r'-?\d+\.\d+', vector_string)
|
488
|
+
# Convert the extracted strings to float values
|
489
|
+
float_values = [float(num) for num in numbers]
|
490
|
+
# Return as numpy array
|
491
|
+
return np.array(float_values, dtype=np.float32)
|
492
|
+
except Exception as err:
|
493
|
+
print(
|
494
|
+
f"Error converting string to vector: {err}"
|
495
|
+
)
|
496
|
+
return vector_string
|
497
|
+
|
498
|
+
def string_to_vector(df: pandas.DataFrame, field: str) -> pandas.DataFrame:
|
499
|
+
"""
|
500
|
+
Converts a string representation of a list into an actual list.
|
501
|
+
|
502
|
+
:param df: The DataFrame containing the string representation.
|
503
|
+
:param field: The name of the field to convert.
|
504
|
+
:return: The DataFrame with the converted field.
|
505
|
+
"""
|
506
|
+
try:
|
507
|
+
df[field] = df[field].apply(_convert_string_to_vector)
|
508
|
+
return df
|
509
|
+
except Exception as err:
|
510
|
+
print(f"Error on vector_string_to_array {field}:", err)
|
511
|
+
return df
|
512
|
+
|
513
|
+
def extract_from_dictionary(
|
514
|
+
df: pandas.DataFrame,
|
515
|
+
field: str,
|
516
|
+
column: str,
|
517
|
+
key: str,
|
518
|
+
conditions: dict = None,
|
519
|
+
as_timestamp: bool = False
|
520
|
+
) -> pandas.DataFrame:
|
521
|
+
"""
|
522
|
+
Extracts a value from a JSON column in the DataFrame.
|
523
|
+
|
524
|
+
:param df: The DataFrame containing the JSON column.
|
525
|
+
:param field: The name of the field to store the extracted value.
|
526
|
+
:param column: The name of the JSON column.
|
527
|
+
:param key: The key to extract from the JSON object.
|
528
|
+
:param conditions: Optional dictionary of conditions to filter rows before extraction.
|
529
|
+
:param as_timestamp: If True, converts the extracted value to a timestamp.
|
530
|
+
:return: The DataFrame with the extracted value.
|
531
|
+
"""
|
532
|
+
def extract_from_dict(row, key, conditions=None, as_timestamp=False):
|
533
|
+
items = row if isinstance(row, list) else []
|
534
|
+
if not row:
|
535
|
+
return None
|
536
|
+
# Apply filtering
|
537
|
+
if conditions:
|
538
|
+
items = [
|
539
|
+
item for item in items
|
540
|
+
if all(item.get(k) == v for k, v in conditions.items())
|
541
|
+
]
|
542
|
+
if not items:
|
543
|
+
return None
|
544
|
+
# Take last item if multiple
|
545
|
+
value = items[-1].get(key)
|
546
|
+
if as_timestamp and value:
|
547
|
+
try:
|
548
|
+
return pandas.to_datetime(value)
|
549
|
+
except Exception:
|
550
|
+
return None
|
551
|
+
return value
|
552
|
+
try:
|
553
|
+
df[field] = df[column].apply(
|
554
|
+
extract_from_dict, args=(key, conditions, as_timestamp)
|
555
|
+
)
|
556
|
+
return df
|
557
|
+
except Exception as err:
|
558
|
+
print(f"Error on extract_from_json {field}:", err)
|
559
|
+
return df
|