flowtask 5.8.4__cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowtask/__init__.py +93 -0
- flowtask/__main__.py +38 -0
- flowtask/bots/__init__.py +6 -0
- flowtask/bots/check.py +93 -0
- flowtask/bots/codebot.py +51 -0
- flowtask/components/ASPX.py +148 -0
- flowtask/components/AddDataset.py +352 -0
- flowtask/components/Amazon.py +523 -0
- flowtask/components/AutoTask.py +314 -0
- flowtask/components/Azure.py +80 -0
- flowtask/components/AzureUsers.py +106 -0
- flowtask/components/BaseAction.py +91 -0
- flowtask/components/BaseLoop.py +198 -0
- flowtask/components/BestBuy.py +800 -0
- flowtask/components/CSVToGCS.py +120 -0
- flowtask/components/CompanyScraper/__init__.py +1 -0
- flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
- flowtask/components/CompanyScraper/parsers/base.py +102 -0
- flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
- flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
- flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
- flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
- flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
- flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
- flowtask/components/CompanyScraper/scrapper.py +1054 -0
- flowtask/components/CopyTo.py +177 -0
- flowtask/components/CopyToBigQuery.py +243 -0
- flowtask/components/CopyToMongoDB.py +291 -0
- flowtask/components/CopyToPg.py +609 -0
- flowtask/components/CopyToRethink.py +207 -0
- flowtask/components/CreateGCSBucket.py +102 -0
- flowtask/components/CreateReport/CreateReport.py +228 -0
- flowtask/components/CreateReport/__init__.py +9 -0
- flowtask/components/CreateReport/charts/__init__.py +15 -0
- flowtask/components/CreateReport/charts/bar.py +51 -0
- flowtask/components/CreateReport/charts/base.py +66 -0
- flowtask/components/CreateReport/charts/pie.py +64 -0
- flowtask/components/CreateReport/utils.py +9 -0
- flowtask/components/CustomerSatisfaction.py +196 -0
- flowtask/components/DataInput.py +200 -0
- flowtask/components/DateList.py +255 -0
- flowtask/components/DbClient.py +163 -0
- flowtask/components/DialPad.py +146 -0
- flowtask/components/DocumentDBQuery.py +200 -0
- flowtask/components/DownloadFrom.py +371 -0
- flowtask/components/DownloadFromD2L.py +113 -0
- flowtask/components/DownloadFromFTP.py +181 -0
- flowtask/components/DownloadFromIMAP.py +315 -0
- flowtask/components/DownloadFromS3.py +198 -0
- flowtask/components/DownloadFromSFTP.py +265 -0
- flowtask/components/DownloadFromSharepoint.py +110 -0
- flowtask/components/DownloadFromSmartSheet.py +114 -0
- flowtask/components/DownloadS3File.py +229 -0
- flowtask/components/Dummy.py +59 -0
- flowtask/components/DuplicatePhoto.py +411 -0
- flowtask/components/EmployeeEvaluation.py +237 -0
- flowtask/components/ExecuteSQL.py +323 -0
- flowtask/components/ExtractHTML.py +178 -0
- flowtask/components/FileBase.py +178 -0
- flowtask/components/FileCopy.py +181 -0
- flowtask/components/FileDelete.py +82 -0
- flowtask/components/FileExists.py +146 -0
- flowtask/components/FileIteratorDelete.py +112 -0
- flowtask/components/FileList.py +194 -0
- flowtask/components/FileOpen.py +75 -0
- flowtask/components/FileRead.py +120 -0
- flowtask/components/FileRename.py +106 -0
- flowtask/components/FilterIf.py +284 -0
- flowtask/components/FilterRows/FilterRows.py +200 -0
- flowtask/components/FilterRows/__init__.py +10 -0
- flowtask/components/FilterRows/functions.py +4 -0
- flowtask/components/GCSToBigQuery.py +103 -0
- flowtask/components/GoogleA4.py +150 -0
- flowtask/components/GoogleGeoCoding.py +344 -0
- flowtask/components/GooglePlaces.py +315 -0
- flowtask/components/GoogleSearch.py +539 -0
- flowtask/components/HTTPClient.py +268 -0
- flowtask/components/ICIMS.py +146 -0
- flowtask/components/IF.py +179 -0
- flowtask/components/IcimsFolderCopy.py +173 -0
- flowtask/components/ImageFeatures/__init__.py +5 -0
- flowtask/components/ImageFeatures/process.py +233 -0
- flowtask/components/IteratorBase.py +251 -0
- flowtask/components/LangchainLoader/__init__.py +5 -0
- flowtask/components/LangchainLoader/loader.py +194 -0
- flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
- flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
- flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
- flowtask/components/LangchainLoader/loaders/docx.py +91 -0
- flowtask/components/LangchainLoader/loaders/html.py +119 -0
- flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
- flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
- flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
- flowtask/components/LangchainLoader/loaders/qa.py +67 -0
- flowtask/components/LangchainLoader/loaders/txt.py +55 -0
- flowtask/components/LeadIQ.py +650 -0
- flowtask/components/Loop.py +253 -0
- flowtask/components/Lowes.py +334 -0
- flowtask/components/MS365Usage.py +156 -0
- flowtask/components/MSTeamsMessages.py +320 -0
- flowtask/components/MarketClustering.py +1051 -0
- flowtask/components/MergeFiles.py +362 -0
- flowtask/components/MilvusOutput.py +87 -0
- flowtask/components/NearByStores.py +175 -0
- flowtask/components/NetworkNinja/__init__.py +6 -0
- flowtask/components/NetworkNinja/models/__init__.py +52 -0
- flowtask/components/NetworkNinja/models/abstract.py +177 -0
- flowtask/components/NetworkNinja/models/account.py +39 -0
- flowtask/components/NetworkNinja/models/client.py +19 -0
- flowtask/components/NetworkNinja/models/district.py +14 -0
- flowtask/components/NetworkNinja/models/events.py +101 -0
- flowtask/components/NetworkNinja/models/forms.py +499 -0
- flowtask/components/NetworkNinja/models/market.py +16 -0
- flowtask/components/NetworkNinja/models/organization.py +34 -0
- flowtask/components/NetworkNinja/models/photos.py +125 -0
- flowtask/components/NetworkNinja/models/project.py +44 -0
- flowtask/components/NetworkNinja/models/region.py +28 -0
- flowtask/components/NetworkNinja/models/store.py +203 -0
- flowtask/components/NetworkNinja/models/user.py +151 -0
- flowtask/components/NetworkNinja/router.py +854 -0
- flowtask/components/Odoo.py +175 -0
- flowtask/components/OdooInjector.py +192 -0
- flowtask/components/OpenFromXML.py +126 -0
- flowtask/components/OpenWeather.py +41 -0
- flowtask/components/OpenWithBase.py +616 -0
- flowtask/components/OpenWithPandas.py +715 -0
- flowtask/components/PGPDecrypt.py +199 -0
- flowtask/components/PandasIterator.py +187 -0
- flowtask/components/PandasToFile.py +189 -0
- flowtask/components/Paradox.py +339 -0
- flowtask/components/ParamIterator.py +117 -0
- flowtask/components/ParseHTML.py +84 -0
- flowtask/components/PlacerStores.py +249 -0
- flowtask/components/Pokemon.py +507 -0
- flowtask/components/PositiveBot.py +62 -0
- flowtask/components/PowerPointSlide.py +400 -0
- flowtask/components/PrintMessage.py +127 -0
- flowtask/components/ProductCompetitors/__init__.py +5 -0
- flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
- flowtask/components/ProductCompetitors/parsers/base.py +72 -0
- flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
- flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
- flowtask/components/ProductCompetitors/scrapper.py +155 -0
- flowtask/components/ProductCompliant.py +169 -0
- flowtask/components/ProductInfo/__init__.py +1 -0
- flowtask/components/ProductInfo/parsers/__init__.py +5 -0
- flowtask/components/ProductInfo/parsers/base.py +83 -0
- flowtask/components/ProductInfo/parsers/brother.py +97 -0
- flowtask/components/ProductInfo/parsers/canon.py +167 -0
- flowtask/components/ProductInfo/parsers/epson.py +118 -0
- flowtask/components/ProductInfo/parsers/hp.py +131 -0
- flowtask/components/ProductInfo/parsers/samsung.py +97 -0
- flowtask/components/ProductInfo/scraper.py +319 -0
- flowtask/components/ProductPricing.py +118 -0
- flowtask/components/QS.py +261 -0
- flowtask/components/QSBase.py +201 -0
- flowtask/components/QueryIterator.py +273 -0
- flowtask/components/QueryToInsert.py +327 -0
- flowtask/components/QueryToPandas.py +432 -0
- flowtask/components/RESTClient.py +195 -0
- flowtask/components/RethinkDBQuery.py +189 -0
- flowtask/components/Rsync.py +74 -0
- flowtask/components/RunSSH.py +59 -0
- flowtask/components/RunShell.py +71 -0
- flowtask/components/SalesForce.py +20 -0
- flowtask/components/SaveImageBank/__init__.py +257 -0
- flowtask/components/SchedulingVisits.py +592 -0
- flowtask/components/ScrapPage.py +216 -0
- flowtask/components/ScrapSearch.py +79 -0
- flowtask/components/SendNotify.py +257 -0
- flowtask/components/SentimentAnalysis.py +694 -0
- flowtask/components/ServiceScrapper/__init__.py +5 -0
- flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
- flowtask/components/ServiceScrapper/parsers/base.py +94 -0
- flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
- flowtask/components/ServiceScrapper/scrapper.py +199 -0
- flowtask/components/SetVariables.py +156 -0
- flowtask/components/SubTask.py +182 -0
- flowtask/components/SuiteCRM.py +48 -0
- flowtask/components/Switch.py +175 -0
- flowtask/components/TableBase.py +148 -0
- flowtask/components/TableDelete.py +312 -0
- flowtask/components/TableInput.py +143 -0
- flowtask/components/TableOutput/TableOutput.py +384 -0
- flowtask/components/TableOutput/__init__.py +3 -0
- flowtask/components/TableSchema.py +534 -0
- flowtask/components/Target.py +223 -0
- flowtask/components/ThumbnailGenerator.py +156 -0
- flowtask/components/ToPandas.py +67 -0
- flowtask/components/TransformRows/TransformRows.py +507 -0
- flowtask/components/TransformRows/__init__.py +9 -0
- flowtask/components/TransformRows/functions.py +559 -0
- flowtask/components/TransposeRows.py +176 -0
- flowtask/components/UPCDatabase.py +86 -0
- flowtask/components/UnGzip.py +171 -0
- flowtask/components/Uncompress.py +172 -0
- flowtask/components/UniqueRows.py +126 -0
- flowtask/components/Unzip.py +107 -0
- flowtask/components/UpdateOperationalVars.py +147 -0
- flowtask/components/UploadTo.py +299 -0
- flowtask/components/UploadToS3.py +136 -0
- flowtask/components/UploadToSFTP.py +160 -0
- flowtask/components/UploadToSharepoint.py +205 -0
- flowtask/components/UserFunc.py +122 -0
- flowtask/components/VivaTracker.py +140 -0
- flowtask/components/WSDLClient.py +123 -0
- flowtask/components/Wait.py +18 -0
- flowtask/components/Walmart.py +199 -0
- flowtask/components/Workplace.py +134 -0
- flowtask/components/XMLToPandas.py +267 -0
- flowtask/components/Zammad/__init__.py +41 -0
- flowtask/components/Zammad/models.py +0 -0
- flowtask/components/ZoomInfoScraper.py +409 -0
- flowtask/components/__init__.py +104 -0
- flowtask/components/abstract.py +18 -0
- flowtask/components/flow.py +530 -0
- flowtask/components/google.py +335 -0
- flowtask/components/group.py +221 -0
- flowtask/components/py.typed +0 -0
- flowtask/components/reviewscrap.py +132 -0
- flowtask/components/tAutoincrement.py +117 -0
- flowtask/components/tConcat.py +109 -0
- flowtask/components/tExplode.py +119 -0
- flowtask/components/tFilter.py +184 -0
- flowtask/components/tGroup.py +236 -0
- flowtask/components/tJoin.py +270 -0
- flowtask/components/tMap/__init__.py +9 -0
- flowtask/components/tMap/functions.py +54 -0
- flowtask/components/tMap/tMap.py +450 -0
- flowtask/components/tMelt.py +112 -0
- flowtask/components/tMerge.py +114 -0
- flowtask/components/tOrder.py +93 -0
- flowtask/components/tPandas.py +94 -0
- flowtask/components/tPivot.py +71 -0
- flowtask/components/tPluckCols.py +76 -0
- flowtask/components/tUnnest.py +82 -0
- flowtask/components/user.py +401 -0
- flowtask/conf.py +457 -0
- flowtask/download.py +102 -0
- flowtask/events/__init__.py +11 -0
- flowtask/events/events/__init__.py +20 -0
- flowtask/events/events/abstract.py +95 -0
- flowtask/events/events/alerts/__init__.py +362 -0
- flowtask/events/events/alerts/colfunctions.py +131 -0
- flowtask/events/events/alerts/functions.py +158 -0
- flowtask/events/events/dummy.py +12 -0
- flowtask/events/events/exec.py +124 -0
- flowtask/events/events/file/__init__.py +7 -0
- flowtask/events/events/file/base.py +51 -0
- flowtask/events/events/file/copy.py +23 -0
- flowtask/events/events/file/delete.py +16 -0
- flowtask/events/events/interfaces/__init__.py +9 -0
- flowtask/events/events/interfaces/client.py +67 -0
- flowtask/events/events/interfaces/credentials.py +28 -0
- flowtask/events/events/interfaces/notifications.py +58 -0
- flowtask/events/events/jira.py +122 -0
- flowtask/events/events/log.py +26 -0
- flowtask/events/events/logerr.py +52 -0
- flowtask/events/events/notify.py +59 -0
- flowtask/events/events/notify_event.py +160 -0
- flowtask/events/events/publish.py +54 -0
- flowtask/events/events/sendfile.py +104 -0
- flowtask/events/events/task.py +97 -0
- flowtask/events/events/teams.py +98 -0
- flowtask/events/events/webhook.py +58 -0
- flowtask/events/manager.py +287 -0
- flowtask/exceptions.c +39393 -0
- flowtask/exceptions.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/extensions/__init__.py +3 -0
- flowtask/extensions/abstract.py +82 -0
- flowtask/extensions/logging/__init__.py +65 -0
- flowtask/hooks/__init__.py +9 -0
- flowtask/hooks/actions/__init__.py +22 -0
- flowtask/hooks/actions/abstract.py +66 -0
- flowtask/hooks/actions/dummy.py +23 -0
- flowtask/hooks/actions/jira.py +74 -0
- flowtask/hooks/actions/rest.py +320 -0
- flowtask/hooks/actions/sampledata.py +37 -0
- flowtask/hooks/actions/sensor.py +23 -0
- flowtask/hooks/actions/task.py +9 -0
- flowtask/hooks/actions/ticket.py +37 -0
- flowtask/hooks/actions/zammad.py +55 -0
- flowtask/hooks/hook.py +62 -0
- flowtask/hooks/models.py +17 -0
- flowtask/hooks/service.py +187 -0
- flowtask/hooks/step.py +91 -0
- flowtask/hooks/types/__init__.py +23 -0
- flowtask/hooks/types/base.py +129 -0
- flowtask/hooks/types/brokers/__init__.py +11 -0
- flowtask/hooks/types/brokers/base.py +54 -0
- flowtask/hooks/types/brokers/mqtt.py +35 -0
- flowtask/hooks/types/brokers/rabbitmq.py +82 -0
- flowtask/hooks/types/brokers/redis.py +83 -0
- flowtask/hooks/types/brokers/sqs.py +44 -0
- flowtask/hooks/types/fs.py +232 -0
- flowtask/hooks/types/http.py +49 -0
- flowtask/hooks/types/imap.py +200 -0
- flowtask/hooks/types/jira.py +279 -0
- flowtask/hooks/types/mail.py +205 -0
- flowtask/hooks/types/postgres.py +98 -0
- flowtask/hooks/types/responses/__init__.py +8 -0
- flowtask/hooks/types/responses/base.py +5 -0
- flowtask/hooks/types/sharepoint.py +288 -0
- flowtask/hooks/types/ssh.py +141 -0
- flowtask/hooks/types/tagged.py +59 -0
- flowtask/hooks/types/upload.py +85 -0
- flowtask/hooks/types/watch.py +71 -0
- flowtask/hooks/types/web.py +36 -0
- flowtask/interfaces/AzureClient.py +137 -0
- flowtask/interfaces/AzureGraph.py +839 -0
- flowtask/interfaces/Boto3Client.py +326 -0
- flowtask/interfaces/DropboxClient.py +173 -0
- flowtask/interfaces/ExcelHandler.py +94 -0
- flowtask/interfaces/FTPClient.py +131 -0
- flowtask/interfaces/GoogleCalendar.py +201 -0
- flowtask/interfaces/GoogleClient.py +133 -0
- flowtask/interfaces/GoogleDrive.py +127 -0
- flowtask/interfaces/GoogleGCS.py +89 -0
- flowtask/interfaces/GoogleGeocoding.py +93 -0
- flowtask/interfaces/GoogleLang.py +114 -0
- flowtask/interfaces/GooglePub.py +61 -0
- flowtask/interfaces/GoogleSheet.py +68 -0
- flowtask/interfaces/IMAPClient.py +137 -0
- flowtask/interfaces/O365Calendar.py +113 -0
- flowtask/interfaces/O365Client.py +220 -0
- flowtask/interfaces/OneDrive.py +284 -0
- flowtask/interfaces/Outlook.py +155 -0
- flowtask/interfaces/ParrotBot.py +130 -0
- flowtask/interfaces/SSHClient.py +378 -0
- flowtask/interfaces/Sharepoint.py +496 -0
- flowtask/interfaces/__init__.py +36 -0
- flowtask/interfaces/azureauth.py +119 -0
- flowtask/interfaces/cache.py +201 -0
- flowtask/interfaces/client.py +82 -0
- flowtask/interfaces/compress.py +525 -0
- flowtask/interfaces/credentials.py +124 -0
- flowtask/interfaces/d2l.py +239 -0
- flowtask/interfaces/databases/__init__.py +5 -0
- flowtask/interfaces/databases/db.py +223 -0
- flowtask/interfaces/databases/documentdb.py +55 -0
- flowtask/interfaces/databases/rethink.py +39 -0
- flowtask/interfaces/dataframes/__init__.py +11 -0
- flowtask/interfaces/dataframes/abstract.py +21 -0
- flowtask/interfaces/dataframes/arrow.py +71 -0
- flowtask/interfaces/dataframes/dt.py +69 -0
- flowtask/interfaces/dataframes/pandas.py +167 -0
- flowtask/interfaces/dataframes/polars.py +60 -0
- flowtask/interfaces/db.py +263 -0
- flowtask/interfaces/env.py +46 -0
- flowtask/interfaces/func.py +137 -0
- flowtask/interfaces/http.py +1780 -0
- flowtask/interfaces/locale.py +40 -0
- flowtask/interfaces/log.py +75 -0
- flowtask/interfaces/mask.py +143 -0
- flowtask/interfaces/notification.py +154 -0
- flowtask/interfaces/playwright.py +339 -0
- flowtask/interfaces/powerpoint.py +368 -0
- flowtask/interfaces/py.typed +0 -0
- flowtask/interfaces/qs.py +376 -0
- flowtask/interfaces/result.py +87 -0
- flowtask/interfaces/selenium_service.py +779 -0
- flowtask/interfaces/smartsheet.py +154 -0
- flowtask/interfaces/stat.py +39 -0
- flowtask/interfaces/task.py +96 -0
- flowtask/interfaces/template.py +118 -0
- flowtask/interfaces/vectorstores/__init__.py +1 -0
- flowtask/interfaces/vectorstores/abstract.py +133 -0
- flowtask/interfaces/vectorstores/milvus.py +669 -0
- flowtask/interfaces/zammad.py +107 -0
- flowtask/models.py +193 -0
- flowtask/parsers/__init__.py +15 -0
- flowtask/parsers/_yaml.c +11978 -0
- flowtask/parsers/_yaml.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/argparser.py +235 -0
- flowtask/parsers/base.c +15155 -0
- flowtask/parsers/base.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/json.c +11968 -0
- flowtask/parsers/json.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/maps.py +49 -0
- flowtask/parsers/toml.c +11968 -0
- flowtask/parsers/toml.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/plugins/__init__.py +16 -0
- flowtask/plugins/components/__init__.py +0 -0
- flowtask/plugins/handler/__init__.py +45 -0
- flowtask/plugins/importer.py +31 -0
- flowtask/plugins/sources/__init__.py +0 -0
- flowtask/runner.py +283 -0
- flowtask/scheduler/__init__.py +9 -0
- flowtask/scheduler/functions.py +493 -0
- flowtask/scheduler/handlers/__init__.py +8 -0
- flowtask/scheduler/handlers/manager.py +504 -0
- flowtask/scheduler/handlers/models.py +58 -0
- flowtask/scheduler/handlers/service.py +72 -0
- flowtask/scheduler/notifications.py +65 -0
- flowtask/scheduler/scheduler.py +993 -0
- flowtask/services/__init__.py +0 -0
- flowtask/services/bots/__init__.py +0 -0
- flowtask/services/bots/telegram.py +264 -0
- flowtask/services/files/__init__.py +11 -0
- flowtask/services/files/manager.py +522 -0
- flowtask/services/files/model.py +37 -0
- flowtask/services/files/service.py +767 -0
- flowtask/services/jira/__init__.py +3 -0
- flowtask/services/jira/jira_actions.py +191 -0
- flowtask/services/tasks/__init__.py +13 -0
- flowtask/services/tasks/launcher.py +213 -0
- flowtask/services/tasks/manager.py +323 -0
- flowtask/services/tasks/service.py +275 -0
- flowtask/services/tasks/task_manager.py +376 -0
- flowtask/services/tasks/tasks.py +155 -0
- flowtask/storages/__init__.py +16 -0
- flowtask/storages/exceptions.py +12 -0
- flowtask/storages/files/__init__.py +8 -0
- flowtask/storages/files/abstract.py +29 -0
- flowtask/storages/files/filesystem.py +66 -0
- flowtask/storages/tasks/__init__.py +19 -0
- flowtask/storages/tasks/abstract.py +26 -0
- flowtask/storages/tasks/database.py +33 -0
- flowtask/storages/tasks/filesystem.py +108 -0
- flowtask/storages/tasks/github.py +119 -0
- flowtask/storages/tasks/memory.py +45 -0
- flowtask/storages/tasks/row.py +25 -0
- flowtask/tasks/__init__.py +0 -0
- flowtask/tasks/abstract.py +526 -0
- flowtask/tasks/command.py +118 -0
- flowtask/tasks/pile.py +486 -0
- flowtask/tasks/py.typed +0 -0
- flowtask/tasks/task.py +778 -0
- flowtask/template/__init__.py +161 -0
- flowtask/tests.py +257 -0
- flowtask/types/__init__.py +8 -0
- flowtask/types/typedefs.c +11347 -0
- flowtask/types/typedefs.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/__init__.py +24 -0
- flowtask/utils/constants.py +117 -0
- flowtask/utils/encoders.py +21 -0
- flowtask/utils/executor.py +112 -0
- flowtask/utils/functions.cpp +14280 -0
- flowtask/utils/functions.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/json.cpp +13349 -0
- flowtask/utils/json.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/mail.py +63 -0
- flowtask/utils/parseqs.c +13324 -0
- flowtask/utils/parserqs.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/stats.py +308 -0
- flowtask/utils/transformations.py +74 -0
- flowtask/utils/uv.py +12 -0
- flowtask/utils/validators.py +97 -0
- flowtask/version.py +11 -0
- flowtask-5.8.4.dist-info/LICENSE +201 -0
- flowtask-5.8.4.dist-info/METADATA +209 -0
- flowtask-5.8.4.dist-info/RECORD +470 -0
- flowtask-5.8.4.dist-info/WHEEL +6 -0
- flowtask-5.8.4.dist-info/entry_points.txt +3 -0
- flowtask-5.8.4.dist-info/top_level.txt +2 -0
- plugins/components/CreateQR.py +39 -0
- plugins/components/TestComponent.py +28 -0
- plugins/components/Use1.py +13 -0
- plugins/components/Workplace.py +117 -0
- plugins/components/__init__.py +3 -0
- plugins/sources/__init__.py +0 -0
- plugins/sources/get_populartimes.py +78 -0
- plugins/sources/google.py +150 -0
- plugins/sources/hubspot.py +679 -0
- plugins/sources/icims.py +679 -0
- plugins/sources/mobileinsight.py +501 -0
- plugins/sources/newrelic.py +262 -0
- plugins/sources/uap.py +268 -0
- plugins/sources/venu.py +244 -0
- plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,1051 @@
|
|
1
|
+
import asyncio
|
2
|
+
import math
|
3
|
+
import osmnx as ox
|
4
|
+
from osmnx import graph as ox_graph
|
5
|
+
from osmnx import distance as ox_distance
|
6
|
+
import networkx as nx
|
7
|
+
from pyrosm import OSM
|
8
|
+
import numpy as np
|
9
|
+
import pandas as pd
|
10
|
+
from geopy.distance import geodesic
|
11
|
+
from collections.abc import Callable
|
12
|
+
from typing import List, Dict, Optional, Any, Union
|
13
|
+
from navconfig import BASE_DIR
|
14
|
+
from ortools.constraint_solver import pywrapcp, routing_enums_pb2
|
15
|
+
from shapely.geometry import Polygon
|
16
|
+
from scipy.spatial.distance import pdist, squareform
|
17
|
+
from sklearn import metrics
|
18
|
+
from sklearn.cluster import KMeans
|
19
|
+
from sklearn.neighbors import BallTree
|
20
|
+
from .flow import FlowComponent
|
21
|
+
from ..exceptions import DataNotFound, ConfigError, ComponentError
|
22
|
+
|
23
|
+
|
24
|
+
# -----------------------------
|
25
|
+
# Utility Functions
|
26
|
+
# -----------------------------
|
27
|
+
def meters_to_miles(m):
|
28
|
+
return m * 0.000621371
|
29
|
+
|
30
|
+
|
31
|
+
def miles_to_radians(miles):
|
32
|
+
earth_radius_km = 6371.0087714150598
|
33
|
+
km_per_mi = 1.609344
|
34
|
+
return miles / (earth_radius_km * km_per_mi)
|
35
|
+
|
36
|
+
def degrees_to_radians(row):
|
37
|
+
lat = np.deg2rad(row[0])
|
38
|
+
lon = np.deg2rad(row[1])
|
39
|
+
|
40
|
+
return lat, lon
|
41
|
+
|
42
|
+
|
43
|
+
def radians_to_miles(rad):
|
44
|
+
# Options here: https://geopy.readthedocs.io/en/stable/#module-geopy.distance
|
45
|
+
earth_radius = 6371.0087714150598
|
46
|
+
mi_per_km = 0.62137119
|
47
|
+
|
48
|
+
return rad * earth_radius * mi_per_km
|
49
|
+
|
50
|
+
|
51
|
+
def create_data_model(distance_matrix, num_vehicles, depot=0, max_distance=150, max_stores_per_vehicle=3):
|
52
|
+
"""Stores the data for the VRP problem."""
|
53
|
+
data = {}
|
54
|
+
data['distance_matrix'] = distance_matrix # 2D list or numpy array
|
55
|
+
data['num_vehicles'] = num_vehicles
|
56
|
+
data['depot'] = depot
|
57
|
+
data['max_distance'] = max_distance
|
58
|
+
data['max_stores_per_vehicle'] = max_stores_per_vehicle
|
59
|
+
return data
|
60
|
+
|
61
|
+
|
62
|
+
def solve_vrp(data):
|
63
|
+
"""Solves the VRP problem using OR-Tools and returns the routes."""
|
64
|
+
# Create the routing index manager
|
65
|
+
manager = pywrapcp.RoutingIndexManager(
|
66
|
+
len(data['distance_matrix']),
|
67
|
+
data['num_vehicles'], data['depot']
|
68
|
+
)
|
69
|
+
|
70
|
+
# Create Routing Model
|
71
|
+
routing = pywrapcp.RoutingModel(manager)
|
72
|
+
|
73
|
+
# Create and register a transit callback
|
74
|
+
def distance_callback(from_index, to_index):
|
75
|
+
"""Returns the distance between the two nodes."""
|
76
|
+
from_node = manager.IndexToNode(from_index)
|
77
|
+
to_node = manager.IndexToNode(to_index)
|
78
|
+
return int(data['distance_matrix'][from_node][to_node] * 1000) # Convert to integer
|
79
|
+
|
80
|
+
transit_callback_index = routing.RegisterTransitCallback(distance_callback)
|
81
|
+
|
82
|
+
# Define cost of each arc
|
83
|
+
routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
|
84
|
+
|
85
|
+
# Add Distance constraint
|
86
|
+
routing.AddDimension(
|
87
|
+
transit_callback_index,
|
88
|
+
0, # no slack
|
89
|
+
int(data['max_distance'] * 1000), # maximum distance per vehicle
|
90
|
+
True, # start cumul to zero
|
91
|
+
'Distance')
|
92
|
+
distance_dimension = routing.GetDimensionOrDie('Distance')
|
93
|
+
distance_dimension.SetGlobalSpanCostCoefficient(100)
|
94
|
+
|
95
|
+
# Add Constraint: Maximum number of stores per vehicle
|
96
|
+
def demand_callback(from_index):
|
97
|
+
"""Returns the demand of the node."""
|
98
|
+
return 1 # Each store is a demand of 1
|
99
|
+
|
100
|
+
demand_callback_index = routing.RegisterUnaryTransitCallback(demand_callback)
|
101
|
+
routing.AddDimensionWithVehicleCapacity(
|
102
|
+
demand_callback_index,
|
103
|
+
0, # null capacity slack
|
104
|
+
[data['max_stores_per_vehicle']] * data['num_vehicles'], # vehicle maximum capacities
|
105
|
+
True, # start cumul to zero
|
106
|
+
'Capacity')
|
107
|
+
|
108
|
+
# Setting first solution heuristic
|
109
|
+
search_parameters = pywrapcp.DefaultRoutingSearchParameters()
|
110
|
+
search_parameters.first_solution_strategy = (
|
111
|
+
routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC)
|
112
|
+
|
113
|
+
# Solve the problem
|
114
|
+
solution = routing.SolveWithParameters(search_parameters)
|
115
|
+
|
116
|
+
# If no solution found, return empty routes
|
117
|
+
if not solution:
|
118
|
+
print("No solution found!")
|
119
|
+
return []
|
120
|
+
|
121
|
+
# Extract routes
|
122
|
+
routes = []
|
123
|
+
for vehicle_id in range(data['num_vehicles']):
|
124
|
+
index = routing.Start(vehicle_id)
|
125
|
+
route = []
|
126
|
+
while not routing.IsEnd(index):
|
127
|
+
node = manager.IndexToNode(index)
|
128
|
+
route.append(node)
|
129
|
+
index = solution.Value(routing.NextVar(index))
|
130
|
+
route.append(manager.IndexToNode(index))
|
131
|
+
routes.append(route)
|
132
|
+
return routes
|
133
|
+
|
134
|
+
|
135
|
+
def print_routes(routes, store_ids):
|
136
|
+
"""Prints the routes in a readable format."""
|
137
|
+
for i, route in enumerate(routes):
|
138
|
+
print(f"Route for ghost employee {i+1}:")
|
139
|
+
# Exclude depot if it's part of the route
|
140
|
+
route_store_ids = [store_ids[node] for node in route if store_ids[node] != store_ids[route[0]]]
|
141
|
+
print(" -> ".join(map(str, route_store_ids)))
|
142
|
+
print()
|
143
|
+
|
144
|
+
|
145
|
+
class MarketClustering(FlowComponent):
|
146
|
+
"""
|
147
|
+
Offline clustering of stores using BallTree+DBSCAN (in miles or km),
|
148
|
+
then generating a fixed number of ghost employees for each cluster,
|
149
|
+
refining if store-to-ghost distance > threshold,
|
150
|
+
and optionally checking daily route constraints.
|
151
|
+
|
152
|
+
Steps:
|
153
|
+
1) Clustering with DBSCAN (haversine + approximate).
|
154
|
+
2) Create ghost employees at cluster centroid (random offset).
|
155
|
+
3) Remove 'unreachable' stores if no ghost employee can reach them within a threshold (e.g. 25 miles).
|
156
|
+
4) Check if a single ghost can cover up to `max_stores_per_day` in a route < `day_hours` or `max_distance_by_day`.
|
157
|
+
If not, we mark that store as 'rejected' too.
|
158
|
+
5) Return two DataFrames: final assignment + rejected stores.
|
159
|
+
"""
|
160
|
+
|
161
|
+
def __init__(
|
162
|
+
self,
|
163
|
+
loop: asyncio.AbstractEventLoop = None,
|
164
|
+
job: Callable = None,
|
165
|
+
stat: Callable = None,
|
166
|
+
**kwargs,
|
167
|
+
):
|
168
|
+
# DBSCAN config
|
169
|
+
self.max_cluster_distance = kwargs.pop('max_cluster_distance', 50.0)
|
170
|
+
self.cluster_radius = kwargs.pop('cluster_radius', 150.0)
|
171
|
+
self.max_cluster_size: int = kwargs.pop('max_cluster_size', 25) # number of items in cluster
|
172
|
+
self.min_cluster_size: int = kwargs.pop('min_cluster_size', 5) # minimum number of items in cluster
|
173
|
+
self.distance_unit = kwargs.pop('distance_unit', 'miles') # or 'km'
|
174
|
+
self.min_samples = kwargs.pop('min_samples', 1)
|
175
|
+
self._cluster_id: str = kwargs.pop('cluster_id', 'market_id')
|
176
|
+
self._cluster_name: str = kwargs.pop('cluster_name', 'market')
|
177
|
+
# degrees around min/max lat/lon
|
178
|
+
self.buffer_deg = kwargs.pop('buffer_deg', 0.01)
|
179
|
+
# OSMnx config
|
180
|
+
self.custom_filter = kwargs.get(
|
181
|
+
"custom_filter",
|
182
|
+
'["highway"~"motorway|trunk|primary|secondary|tertiary"]'
|
183
|
+
)
|
184
|
+
self.network_type = kwargs.get("network_type", "drive")
|
185
|
+
# Ghost employees config
|
186
|
+
self.num_ghosts_per_cluster = kwargs.pop('num_ghosts_per_cluster', 2)
|
187
|
+
self.ghost_distance_threshold = kwargs.pop('ghost_distance_threshold', 50.0)
|
188
|
+
# e.g. 25 miles or km to consider a store "reachable" from that ghost
|
189
|
+
|
190
|
+
# Daily route constraints
|
191
|
+
self.max_stores_per_day = kwargs.pop('max_stores_per_day', 3)
|
192
|
+
self.day_hours = kwargs.pop('day_hours', 8.0)
|
193
|
+
self.max_distance_by_day = kwargs.pop('max_distance_by_day', 150.0)
|
194
|
+
# e.g. 150 miles, or if using km, adapt accordingly
|
195
|
+
|
196
|
+
# Refinement with OSMnx route-based distances?
|
197
|
+
self.borderline_threshold = kwargs.pop('borderline_threshold', 2.5)
|
198
|
+
|
199
|
+
# bounding box or place
|
200
|
+
self.bounding_box = kwargs.pop('bounding_box', None)
|
201
|
+
self.place_name = kwargs.pop('place_name', None)
|
202
|
+
|
203
|
+
# Internals
|
204
|
+
self._data: pd.DataFrame = pd.DataFrame()
|
205
|
+
self._result: Optional[pd.DataFrame] = None
|
206
|
+
self._rejected: pd.DataFrame = pd.DataFrame() # for stores that get dropped
|
207
|
+
self._ghosts: List[Dict[str, Any]] = []
|
208
|
+
self._graphs: dict = {}
|
209
|
+
super().__init__(loop=loop, job=job, stat=stat, **kwargs)
|
210
|
+
|
211
|
+
async def start(self, **kwargs):
|
212
|
+
"""Validate input DataFrame and columns."""
|
213
|
+
if self.previous:
|
214
|
+
self._data = self.input
|
215
|
+
if not isinstance(self._data, pd.DataFrame):
|
216
|
+
raise ConfigError("Incompatible input: Must be a Pandas DataFrame.")
|
217
|
+
else:
|
218
|
+
raise DataNotFound("No input DataFrame found.")
|
219
|
+
|
220
|
+
required_cols = {'store_id', 'latitude', 'longitude'}
|
221
|
+
missing = required_cols - set(self._data.columns)
|
222
|
+
if missing:
|
223
|
+
raise ComponentError(f"DataFrame missing required columns: {missing}")
|
224
|
+
|
225
|
+
return True
|
226
|
+
|
227
|
+
async def close(self):
|
228
|
+
pass
|
229
|
+
|
230
|
+
def get_rejected_stores(self) -> pd.DataFrame:
|
231
|
+
"""Return the DataFrame of rejected stores (those removed from any final market)."""
|
232
|
+
return self._rejected
|
233
|
+
|
234
|
+
# ------------------------------------------------------------------
|
235
|
+
# BallTree + Haversine
|
236
|
+
# ------------------------------------------------------------------
|
237
|
+
|
238
|
+
def _detect_outliers(
|
239
|
+
self,
|
240
|
+
stores: pd.DataFrame,
|
241
|
+
cluster_label: int,
|
242
|
+
cluster_indices: List[int]
|
243
|
+
) -> List[int]:
|
244
|
+
"""
|
245
|
+
1) Compute centroid of all stores in 'cluster_indices'.
|
246
|
+
2) Check each store in that cluster: if dist(store -> centroid) >
|
247
|
+
self.max_cluster_distance, mark as outlier.
|
248
|
+
3) Return a list of outlier indices.
|
249
|
+
"""
|
250
|
+
if not cluster_indices:
|
251
|
+
return []
|
252
|
+
|
253
|
+
# coordinates of cluster
|
254
|
+
arr = stores.loc[cluster_indices, ['latitude', 'longitude']].values
|
255
|
+
|
256
|
+
# Simple approach: K-Means with n_clusters=1
|
257
|
+
# This basically finds the centroid that minimizes sum of squares.
|
258
|
+
km = KMeans(n_clusters=1, random_state=42).fit(arr)
|
259
|
+
centroid = km.cluster_centers_[0] # [lat, lon]
|
260
|
+
|
261
|
+
outliers = []
|
262
|
+
for idx in cluster_indices:
|
263
|
+
store_lat = stores.at[idx, 'latitude']
|
264
|
+
store_lon = stores.at[idx, 'longitude']
|
265
|
+
d = self._haversine_miles(centroid[0], centroid[1], store_lat, store_lon)
|
266
|
+
if d > (self.cluster_radius + self.borderline_threshold):
|
267
|
+
outliers.append(idx)
|
268
|
+
|
269
|
+
return outliers
|
270
|
+
|
271
|
+
def _validate_distance(self, stores, cluster_stores: pd.DataFrame):
|
272
|
+
"""
|
273
|
+
Validates distances between neighbors using precomputed distances.
|
274
|
+
Args:
|
275
|
+
coords_rad (ndarray): Array of [latitude, longitude] in radians.
|
276
|
+
neighbors (ndarray): Array of indices of neighbors.
|
277
|
+
distances (ndarray): Distances from the query point to each neighbor.
|
278
|
+
"""
|
279
|
+
# Convert max_cluster_distance (in miles) to radians
|
280
|
+
max_distance_radians = miles_to_radians(
|
281
|
+
self.max_cluster_distance + self.borderline_threshold
|
282
|
+
)
|
283
|
+
|
284
|
+
# Extract coordinates of the stores in the cluster
|
285
|
+
cluster_coords = cluster_stores[['latitude', 'longitude']].values
|
286
|
+
cluster_indices = cluster_stores.index.tolist()
|
287
|
+
|
288
|
+
# Iterate through each store in the cluster
|
289
|
+
outliers = []
|
290
|
+
for idx, (store_lat, store_lon) in zip(cluster_indices, cluster_coords):
|
291
|
+
# Compute the traveled distance using OSMnx to all other stores in the cluster
|
292
|
+
traveled_distances = []
|
293
|
+
for neighbor_idx, (neighbor_lat, neighbor_lon) in zip(cluster_indices, cluster_coords):
|
294
|
+
if idx == neighbor_idx:
|
295
|
+
continue # Skip self-distance
|
296
|
+
try:
|
297
|
+
# Calculate the traveled distance using OSMnx (network distance)
|
298
|
+
traveled_distance = self._osmnx_travel_distance(
|
299
|
+
store_lat, store_lon, neighbor_lat, neighbor_lon
|
300
|
+
)
|
301
|
+
traveled_distances.append(traveled_distance)
|
302
|
+
except Exception as e:
|
303
|
+
print(f"Error calculating distance for {idx} -> {neighbor_idx}: {e}")
|
304
|
+
|
305
|
+
# Check if the maximum traveled distance exceeds the threshold
|
306
|
+
if traveled_distances and max(traveled_distances) > max_distance_radians:
|
307
|
+
outliers.append(idx)
|
308
|
+
# Mark store as unassigned
|
309
|
+
stores.at[idx, self._cluster_id] = -1
|
310
|
+
|
311
|
+
return outliers
|
312
|
+
|
313
|
+
def _post_process_outliers(self, stores: pd.DataFrame, unassigned: set):
|
314
|
+
"""
|
315
|
+
Assign unassigned stores to the nearest cluster using relaxed distance criteria.
|
316
|
+
"""
|
317
|
+
if not unassigned:
|
318
|
+
return
|
319
|
+
|
320
|
+
# Get cluster centroids
|
321
|
+
clusters = stores[stores[self._cluster_id] != -1].groupby(self._cluster_id)
|
322
|
+
centroids = {
|
323
|
+
cluster_id: cluster_df[['latitude', 'longitude']].mean().values
|
324
|
+
for cluster_id, cluster_df in clusters
|
325
|
+
}
|
326
|
+
|
327
|
+
# Relaxed distance threshold
|
328
|
+
relaxed_threshold = self.cluster_radius + self.relaxed_threshold
|
329
|
+
|
330
|
+
for outlier_idx in list(unassigned):
|
331
|
+
outlier_lat = stores.at[outlier_idx, 'latitude']
|
332
|
+
outlier_lon = stores.at[outlier_idx, 'longitude']
|
333
|
+
|
334
|
+
# Find nearest cluster within relaxed threshold
|
335
|
+
nearest_cluster = None
|
336
|
+
min_distance = float('inf')
|
337
|
+
|
338
|
+
for cluster_id, centroid in centroids.items():
|
339
|
+
distance = self._haversine_miles(centroid[0], centroid[1], outlier_lat, outlier_lon)
|
340
|
+
if distance < relaxed_threshold and distance < min_distance:
|
341
|
+
nearest_cluster = cluster_id
|
342
|
+
min_distance = distance
|
343
|
+
|
344
|
+
# Assign to the nearest cluster if valid
|
345
|
+
if nearest_cluster is not None:
|
346
|
+
stores.at[outlier_idx, self._cluster_id] = nearest_cluster
|
347
|
+
unassigned.remove(outlier_idx)
|
348
|
+
|
349
|
+
print(f"Post-processing completed. Remaining unassigned: {len(unassigned)}")
|
350
|
+
|
351
|
+
def _create_cluster(self, stores: pd.DataFrame):
|
352
|
+
"""
|
353
|
+
1) BFS with BallTree to create a provisional cluster.
|
354
|
+
2) Post-check each cluster with a distance validation (centroid-based or K-Means).
|
355
|
+
3) Mark outliers as -1 or store them as rejected.
|
356
|
+
"""
|
357
|
+
# 1) Sort by latitude and longitude to ensure spatial proximity in clustering
|
358
|
+
stores = stores.sort_values(by=['latitude', 'longitude']).reset_index(drop=True)
|
359
|
+
stores['rad'] = stores.apply(
|
360
|
+
lambda row: np.radians([row.latitude, row.longitude]), axis=1
|
361
|
+
)
|
362
|
+
# rad_df = stores[['latitude', 'longitude']].apply(degrees_to_radians, axis=1).apply(pd.Series)
|
363
|
+
# stores = pd.concat([stores, rad_df], axis=1)
|
364
|
+
# stores.rename(columns={0: "rad_latitude", 1: "rad_longitude"}, inplace=True)
|
365
|
+
|
366
|
+
# Convert 'rad' column to a numpy array for BallTree
|
367
|
+
coords_rad = np.stack(stores['rad'].to_numpy())
|
368
|
+
|
369
|
+
# Create BallTree with all coordinates:
|
370
|
+
tree = BallTree(
|
371
|
+
coords_rad,
|
372
|
+
leaf_size=15,
|
373
|
+
metric='haversine'
|
374
|
+
)
|
375
|
+
|
376
|
+
# All unassigned
|
377
|
+
N = len(stores)
|
378
|
+
# Initialize cluster labels to -1 (unassigned)
|
379
|
+
stores[self._cluster_id] = -1
|
380
|
+
unassigned = set(range(N))
|
381
|
+
outliers = set()
|
382
|
+
outlier_attempts = {idx: 0 for idx in range(N)} # Track attempts to recluster
|
383
|
+
|
384
|
+
cluster_label = 0
|
385
|
+
|
386
|
+
# Convert self.cluster_radius (in miles) to radians for BallTree search
|
387
|
+
radius_radians = miles_to_radians(self.cluster_radius)
|
388
|
+
|
389
|
+
while unassigned:
|
390
|
+
|
391
|
+
# Convert unassigned set to list and rebuild BallTree
|
392
|
+
unassigned_list = sorted(list(unassigned))
|
393
|
+
unassigned_coords = coords_rad[unassigned_list]
|
394
|
+
|
395
|
+
# Build a new BallTree with only unassigned elements
|
396
|
+
tree = BallTree(
|
397
|
+
unassigned_coords,
|
398
|
+
leaf_size=50,
|
399
|
+
metric='haversine'
|
400
|
+
)
|
401
|
+
|
402
|
+
# Start a new cluster
|
403
|
+
cluster_indices = []
|
404
|
+
# Get the first unassigned store
|
405
|
+
current_idx = unassigned_list[0]
|
406
|
+
cluster_indices.append(current_idx)
|
407
|
+
stores.at[current_idx, self._cluster_id] = cluster_label
|
408
|
+
unassigned.remove(current_idx)
|
409
|
+
|
410
|
+
# Frontier for BFS
|
411
|
+
frontier = [current_idx]
|
412
|
+
|
413
|
+
while frontier and len(cluster_indices) < self.max_cluster_size:
|
414
|
+
# Map global index to local index for the BallTree query
|
415
|
+
global_idx = frontier.pop()
|
416
|
+
local_idx = unassigned_list.index(global_idx)
|
417
|
+
|
418
|
+
neighbors, distances = tree.query_radius(
|
419
|
+
[unassigned_coords[local_idx]], r=radius_radians, return_distance=True
|
420
|
+
)
|
421
|
+
|
422
|
+
neighbors = neighbors[0] # Extract the single query point's neighbors
|
423
|
+
distances = distances[0] # Extract the single query point's distances
|
424
|
+
|
425
|
+
# Map local indices back to global indices
|
426
|
+
global_neighbors = [unassigned_list[i] for i in neighbors]
|
427
|
+
new_candidates = [idx for idx in global_neighbors if idx in unassigned]
|
428
|
+
|
429
|
+
# print('New candidates ', len(new_candidates))
|
430
|
+
if not new_candidates and len(cluster_indices) < self.min_cluster_size:
|
431
|
+
# Expand search radius for small clusters
|
432
|
+
expanded_radius = radius_radians * 1.1 # Slightly larger radius
|
433
|
+
neighbors, distances = tree.query_radius(
|
434
|
+
[unassigned_coords[local_idx]], r=expanded_radius, return_distance=True
|
435
|
+
)
|
436
|
+
elif not new_candidates:
|
437
|
+
continue
|
438
|
+
|
439
|
+
# Limit number of stores to add to not exceed max_cluster_size
|
440
|
+
num_needed = self.max_cluster_size - len(cluster_indices)
|
441
|
+
new_candidates = new_candidates[:num_needed]
|
442
|
+
|
443
|
+
# Assign them to the cluster
|
444
|
+
for cand_idx in new_candidates:
|
445
|
+
if cand_idx not in cluster_indices:
|
446
|
+
frontier.append(cand_idx)
|
447
|
+
stores.at[cand_idx, self._cluster_id] = cluster_label
|
448
|
+
# Remove new_indices from unassigned_indices
|
449
|
+
unassigned.remove(cand_idx)
|
450
|
+
|
451
|
+
# Add them to BFS frontier
|
452
|
+
frontier.extend(new_candidates)
|
453
|
+
cluster_indices.extend(new_candidates)
|
454
|
+
|
455
|
+
# Validate cluster
|
456
|
+
outliers = self._detect_outliers(stores, cluster_label, cluster_indices)
|
457
|
+
for out_idx in outliers:
|
458
|
+
stores.at[out_idx, self._cluster_id] = -1
|
459
|
+
unassigned.add(out_idx)
|
460
|
+
|
461
|
+
cluster_label += 1
|
462
|
+
|
463
|
+
# Post-process unassigned stores
|
464
|
+
print(f"Starting post-processing for {len(unassigned)} unassigned stores.")
|
465
|
+
self._post_process_outliers(stores, unassigned)
|
466
|
+
|
467
|
+
# Map cluster -> Market1, Market2, ...
|
468
|
+
print(f"Final clusters formed: {cluster_label}")
|
469
|
+
print(f"Total outliers: {len(outliers)}")
|
470
|
+
|
471
|
+
print(stores)
|
472
|
+
self._apply_market_labels(stores, stores[self._cluster_id].values)
|
473
|
+
return stores
|
474
|
+
|
475
|
+
def _build_haversine_matrix(self, coords_rad, tree: BallTree) -> np.ndarray:
|
476
|
+
"""
|
477
|
+
Build a full NxN matrix of haversine distances in radians.
|
478
|
+
"""
|
479
|
+
n = len(coords_rad)
|
480
|
+
dist_matrix = np.zeros((n, n), dtype=float)
|
481
|
+
|
482
|
+
for i in range(n):
|
483
|
+
dist, idx = tree.query([coords_rad[i]], k=n)
|
484
|
+
dist = dist[0] # shape (n,)
|
485
|
+
idx = idx[0] # shape (n,)
|
486
|
+
dist_matrix[i, idx] = dist
|
487
|
+
|
488
|
+
return dist_matrix
|
489
|
+
|
490
|
+
def _convert_to_radians(self, value: float, unit: str) -> float:
|
491
|
+
"""
|
492
|
+
Convert value in miles or km to radians (on Earth).
|
493
|
+
Earth radius ~ 6371 km or 3959 miles.
|
494
|
+
"""
|
495
|
+
if unit.lower().startswith('mile'):
|
496
|
+
# miles
|
497
|
+
earth_radius = 3959.0
|
498
|
+
else:
|
499
|
+
# kilometers
|
500
|
+
earth_radius = 6371.0
|
501
|
+
|
502
|
+
return value / earth_radius
|
503
|
+
|
504
|
+
def _apply_market_labels(self, df: pd.DataFrame, labels: np.ndarray):
|
505
|
+
"""Map cluster_id => Market1, Market2, etc."""
|
506
|
+
cluster_map = {}
|
507
|
+
cluster_ids = sorted(set(labels))
|
508
|
+
market_idx = 0
|
509
|
+
for cid in cluster_ids:
|
510
|
+
if cid == -1:
|
511
|
+
cluster_map[cid] = "Outlier"
|
512
|
+
else:
|
513
|
+
cluster_map[cid] = f"Market-{market_idx}"
|
514
|
+
market_idx += 1
|
515
|
+
df[self._cluster_name] = df[self._cluster_id].map(cluster_map)
|
516
|
+
|
517
|
+
# ------------------------------------------------------------------
|
518
|
+
# OSMnx-based refinement
|
519
|
+
# ------------------------------------------------------------------
|
520
|
+
|
521
|
+
def load_graph_from_pbf(self, pbf_path, bounding_box: list) -> nx.MultiDiGraph:
|
522
|
+
"""
|
523
|
+
Load a road network graph from a PBF file for the specified bounding box.
|
524
|
+
Args:
|
525
|
+
pbf_path (str): Path to the PBF file.
|
526
|
+
north, south, east, west (float): Bounding box coordinates.
|
527
|
+
Returns:
|
528
|
+
nx.MultiDiGraph: A road network graph for the bounding box.
|
529
|
+
"""
|
530
|
+
osm = OSM(str(pbf_path), bounding_box=bounding_box)
|
531
|
+
|
532
|
+
# Extract the road network
|
533
|
+
road_network = osm.get_network(network_type="driving")
|
534
|
+
|
535
|
+
# Convert to NetworkX graph
|
536
|
+
G = osm.to_graph(road_network, graph_type="networkx")
|
537
|
+
return G
|
538
|
+
|
539
|
+
def _build_osmnx_graph_for_point(self, lat: float, lon: float) -> nx.MultiDiGraph:
|
540
|
+
"""
|
541
|
+
Build a local OSMnx graph for the point (lat, lon) + self.network_type.
|
542
|
+
"""
|
543
|
+
# For example:
|
544
|
+
G = ox.graph_from_point(
|
545
|
+
(lat, lon),
|
546
|
+
dist=50000,
|
547
|
+
network_type=self.network_type,
|
548
|
+
simplify=True,
|
549
|
+
custom_filter=self.custom_filter
|
550
|
+
)
|
551
|
+
return G
|
552
|
+
|
553
|
+
def _build_osmnx_graph_for_bbox(self, north, south, east, west) -> nx.MultiDiGraph:
|
554
|
+
"""
|
555
|
+
Build a local OSMnx graph for the bounding box + self.network_type.
|
556
|
+
"""
|
557
|
+
# For example:
|
558
|
+
buffer = 0.005 # Degrees (~0.5 km buffer)
|
559
|
+
bbox = (north + buffer, south - buffer, east + buffer, west - buffer)
|
560
|
+
print('BOX > ', bbox)
|
561
|
+
G = ox.graph_from_bbox(
|
562
|
+
bbox=bbox,
|
563
|
+
network_type=self.network_type,
|
564
|
+
# simplify=True,
|
565
|
+
# retain_all=True,
|
566
|
+
# truncate_by_edge=True,
|
567
|
+
# custom_filter=self.custom_filter
|
568
|
+
)
|
569
|
+
ox.plot_graph(G)
|
570
|
+
return G
|
571
|
+
|
572
|
+
def _find_borderline_stores(self, df: pd.DataFrame) -> List[int]:
|
573
|
+
"""
|
574
|
+
Identify stores near the boundary (within borderline_threshold * max_dist_radians)
|
575
|
+
from a neighbor in a different cluster. We'll re-check them with route-based distance.
|
576
|
+
"""
|
577
|
+
store_clusters = df[self._cluster_id].values
|
578
|
+
n = len(df)
|
579
|
+
|
580
|
+
# Rebuild the same distance matrix used before
|
581
|
+
lat_lon = df[['latitude', 'longitude']].values
|
582
|
+
coords_rad = np.radians(lat_lon)
|
583
|
+
tree = BallTree(coords_rad, metric='haversine')
|
584
|
+
dist_matrix = self._build_haversine_matrix(coords_rad, tree)
|
585
|
+
|
586
|
+
max_dist_radians = self._convert_to_radians(self.max_cluster_distance, self.distance_unit)
|
587
|
+
threshold = max_dist_radians * self.borderline_threshold
|
588
|
+
|
589
|
+
borderline_indices = []
|
590
|
+
for i in range(n):
|
591
|
+
cid_i = store_clusters[i]
|
592
|
+
if cid_i == -1:
|
593
|
+
continue
|
594
|
+
distances_i = dist_matrix[i]
|
595
|
+
# find any store j in a different cluster that is within 'threshold'
|
596
|
+
neighbors_other_cluster = np.where((store_clusters != cid_i) & (distances_i < threshold))[0]
|
597
|
+
if len(neighbors_other_cluster) > 0:
|
598
|
+
borderline_indices.append(i)
|
599
|
+
|
600
|
+
borderline_indices = sorted(set(borderline_indices))
|
601
|
+
return borderline_indices
|
602
|
+
|
603
|
+
def _compute_cluster_representatives(self):
|
604
|
+
"""
|
605
|
+
For each cluster, pick a "representative" store (e.g., the first one).
|
606
|
+
Then record the OSMnx node after we build the graph.
|
607
|
+
"""
|
608
|
+
info = {}
|
609
|
+
for cid, grp in self._data.groupby(self._cluster_id):
|
610
|
+
if cid == -1:
|
611
|
+
info[cid] = {"index": None, "latitude": None, "longitude": None, "node": None}
|
612
|
+
continue
|
613
|
+
first_idx = grp.index[0]
|
614
|
+
lat = grp.at[first_idx, 'latitude']
|
615
|
+
lon = grp.at[first_idx, 'longitude']
|
616
|
+
info[cid] = {"index": first_idx, "latitude": lat, "longitude": lon, "node": None}
|
617
|
+
|
618
|
+
# We can fill 'node' after we have the graph if needed
|
619
|
+
lat_array = self._data['latitude'].values
|
620
|
+
lon_array = self._data['longitude'].values
|
621
|
+
# But we do that in _refine_border_stores to ensure we only do nearest_nodes once
|
622
|
+
return info
|
623
|
+
|
624
|
+
# ------------------------------------------------------------------
|
625
|
+
# Ghost Employees
|
626
|
+
# ------------------------------------------------------------------
|
627
|
+
def _haversine_distance_km(self, lat1, lon1, lat2, lon2):
|
628
|
+
"""
|
629
|
+
Calculate the geodesic distance between two points in kilometers using Geopy.
|
630
|
+
"""
|
631
|
+
return geodesic((lat1, lon1), (lat2, lon2)).kilometers
|
632
|
+
|
633
|
+
def _create_ghost_employees(self, cid, df: pd.DataFrame) -> List[Dict[str, Any]]:
|
634
|
+
"""
|
635
|
+
Create self.num_ghosts_per_cluster employees around each cluster's centroid.
|
636
|
+
Ensure no ghost is more than 5 km from the centroid.
|
637
|
+
Spread ghosts within the cluster to maximize coverage.
|
638
|
+
"""
|
639
|
+
ghosts = []
|
640
|
+
cluster_rows = df[df[self._cluster_id] == cid]
|
641
|
+
if cluster_rows.empty:
|
642
|
+
return ghosts
|
643
|
+
|
644
|
+
if len(cluster_rows) == 1:
|
645
|
+
# Only one store in this cluster, no need for ghosts
|
646
|
+
return ghosts
|
647
|
+
|
648
|
+
# Centroid of this Cluster
|
649
|
+
lat_mean = cluster_rows['latitude'].mean()
|
650
|
+
lon_mean = cluster_rows['longitude'].mean()
|
651
|
+
|
652
|
+
max_offset_lat = 0.002 # ~5 km
|
653
|
+
max_offset_lon = 0.002 # ~5 km at 40° latitude
|
654
|
+
max_offset_miles = 50.0 # Maximum distance from centroid
|
655
|
+
min_distance_km = 10.0 # Minimum distance between ghosts to prevent overlapping
|
656
|
+
|
657
|
+
# Optimal number of ghost employees:
|
658
|
+
num_stores = len(cluster_rows)
|
659
|
+
num_ghosts = max(2, math.ceil(num_stores / 10)) # At least 2 ghosts per cluster
|
660
|
+
|
661
|
+
if num_ghosts < self.num_ghosts_per_cluster:
|
662
|
+
num_ghosts = self.num_ghosts_per_cluster
|
663
|
+
|
664
|
+
for i in range(num_ghosts):
|
665
|
+
attempt = 0
|
666
|
+
while True:
|
667
|
+
# lat_offset = np.random.uniform(-max_offset_lat, max_offset_lat)
|
668
|
+
# lon_offset = np.random.uniform(-max_offset_lon, max_offset_lon)
|
669
|
+
|
670
|
+
# ghost_lat = lat_mean + lat_offset
|
671
|
+
# ghost_lon = lon_mean + lon_offset
|
672
|
+
|
673
|
+
# # Calculate distance to centroid using geodesic distance for precision
|
674
|
+
# distance_km = self._haversine_distance_km(lat_mean, lon_mean, ghost_lat, ghost_lon)
|
675
|
+
# if distance_km > 5.0:
|
676
|
+
# attempt += 1
|
677
|
+
# if attempt >= 100:
|
678
|
+
# self._logger.warning(
|
679
|
+
# f"Could not place ghost {i+1} within 5 km after 100 attempts in cluster {cid}."
|
680
|
+
# )
|
681
|
+
# break
|
682
|
+
# continue # Exceeds maximum distance, retry
|
683
|
+
|
684
|
+
# Generate a random point within a circle of radius 50 miles from the centroid
|
685
|
+
angle = np.random.uniform(0, 2 * np.pi)
|
686
|
+
distance = np.random.uniform(0, max_offset_miles)
|
687
|
+
delta_lat = (distance * math.cos(angle)) / 69.0 # Approx. degrees per mile
|
688
|
+
delta_lon = (distance * math.sin(angle)) / (69.0 * math.cos(math.radians(lat_mean)))
|
689
|
+
|
690
|
+
ghost_lat = lat_mean + delta_lat
|
691
|
+
ghost_lon = lon_mean + delta_lon
|
692
|
+
|
693
|
+
# Ensure ghosts are not too close to each other
|
694
|
+
too_close = False
|
695
|
+
for existing_ghost in ghosts:
|
696
|
+
existing_distance = self._haversine_distance_km(
|
697
|
+
existing_ghost['latitude'],
|
698
|
+
existing_ghost['longitude'],
|
699
|
+
ghost_lat,
|
700
|
+
ghost_lon
|
701
|
+
)
|
702
|
+
if existing_distance < min_distance_km:
|
703
|
+
too_close = True
|
704
|
+
break
|
705
|
+
if not too_close:
|
706
|
+
break # Valid position found
|
707
|
+
if too_close:
|
708
|
+
attempt += 1
|
709
|
+
if attempt >= 100:
|
710
|
+
self._logger.warning(
|
711
|
+
f"Ghost {i+1} in cluster {cid} is too close to existing ghosts after 100 attempts."
|
712
|
+
)
|
713
|
+
break
|
714
|
+
continue # Ghost too close to existing, retry
|
715
|
+
|
716
|
+
# Valid position found
|
717
|
+
break
|
718
|
+
|
719
|
+
ghost_id = f"Ghost-{cid}-{i+1}"
|
720
|
+
ghost = {
|
721
|
+
'ghost_id': ghost_id,
|
722
|
+
self._cluster_id: cid,
|
723
|
+
'latitude': ghost_lat,
|
724
|
+
'longitude': ghost_lon
|
725
|
+
}
|
726
|
+
ghosts.append(ghost)
|
727
|
+
|
728
|
+
return ghosts
|
729
|
+
|
730
|
+
# ------------------------------------------------------------------
|
731
|
+
# Filter stores unreachable from any ghost
|
732
|
+
# ------------------------------------------------------------------
|
733
|
+
def _filter_unreachable_stores(
|
734
|
+
self,
|
735
|
+
cid: int,
|
736
|
+
employees: List[Dict[str, Any]],
|
737
|
+
cluster_stores: pd.DataFrame
|
738
|
+
) -> List[int]:
|
739
|
+
"""
|
740
|
+
For each store in the given cluster's df_cluster, check if
|
741
|
+
any of the provided employees is within ghost_distance_threshold miles.
|
742
|
+
Return a list of indices that are unreachable.
|
743
|
+
"""
|
744
|
+
unreachable_indices = []
|
745
|
+
|
746
|
+
# If no employees for this cluster, everything is unreachable
|
747
|
+
if not employees:
|
748
|
+
return cluster_stores.index.tolist()
|
749
|
+
|
750
|
+
if cid == -1 or len(cluster_stores) == 1:
|
751
|
+
return []
|
752
|
+
|
753
|
+
for idx, row in cluster_stores.iterrows():
|
754
|
+
store_lat = row['latitude']
|
755
|
+
store_lon = row['longitude']
|
756
|
+
cluster_id = row['market_id']
|
757
|
+
store_id = row['store_id']
|
758
|
+
|
759
|
+
reachable = False
|
760
|
+
for ghost in employees:
|
761
|
+
g_lat = ghost['latitude']
|
762
|
+
g_lon = ghost['longitude']
|
763
|
+
distance_km = self._haversine_distance_km(store_lat, store_lon, g_lat, g_lon)
|
764
|
+
dist = meters_to_miles(distance_km * 1000)
|
765
|
+
if dist <= self.ghost_distance_threshold:
|
766
|
+
reachable = True
|
767
|
+
break
|
768
|
+
# print('store ', idx, ':', store_id, ' Cluster: ', cluster_id, ' ghost ', ghost['ghost_id'], 'distance ', dist)
|
769
|
+
|
770
|
+
if not reachable:
|
771
|
+
unreachable_indices.append(idx)
|
772
|
+
|
773
|
+
return unreachable_indices
|
774
|
+
|
775
|
+
def _haversine_miles(self, lat1, lon1, lat2, lon2):
|
776
|
+
"""
|
777
|
+
Simple haversine formula returning miles between two lat/lon points.
|
778
|
+
Earth radius ~3959 miles.
|
779
|
+
"""
|
780
|
+
R = 3959.0 # Earth radius in miles
|
781
|
+
dlat = np.radians(lat2 - lat1)
|
782
|
+
dlon = np.radians(lon2 - lon1)
|
783
|
+
a = np.sin(dlat / 2)**2 + np.cos(np.radians(lat1)) * np.cos(np.radians(lat2)) * np.sin(dlon / 2)**2
|
784
|
+
c = 2 * np.arcsin(np.sqrt(a))
|
785
|
+
return R * c
|
786
|
+
|
787
|
+
def _nearest_osm_node(self, G: nx.MultiDiGraph, lat: float, lon: float) -> int:
|
788
|
+
"""
|
789
|
+
Return the nearest node in graph G to (lat, lon).
|
790
|
+
"""
|
791
|
+
node = ox_distance.nearest_nodes(G, X=[lon], Y=[lat])
|
792
|
+
# node is usually an array or single value
|
793
|
+
if isinstance(node, np.ndarray):
|
794
|
+
return node[0]
|
795
|
+
return node
|
796
|
+
|
797
|
+
def _road_distance_miles(
|
798
|
+
self, G: nx.MultiDiGraph,
|
799
|
+
center_lat: float,
|
800
|
+
center_lon: float,
|
801
|
+
lat: float,
|
802
|
+
lon: float
|
803
|
+
) -> Optional[float]:
|
804
|
+
"""
|
805
|
+
Compute route distance in miles from node_center to (lat, lon) in G.
|
806
|
+
If no path, return None.
|
807
|
+
1) nearest node for center, nearest node for candidate
|
808
|
+
2) shortest_path_length with weight='length'
|
809
|
+
3) convert meters->miles
|
810
|
+
If no path, return None
|
811
|
+
"""
|
812
|
+
node_center = self._nearest_osm_node(G, center_lat, center_lon)
|
813
|
+
node_target = self._nearest_osm_node(G, lat, lon)
|
814
|
+
try:
|
815
|
+
dist_m = nx.shortest_path_length(G, node_center, node_target, weight='length')
|
816
|
+
dist_miles = dist_m * 0.000621371
|
817
|
+
return dist_miles
|
818
|
+
except nx.NetworkXNoPath:
|
819
|
+
return None
|
820
|
+
|
821
|
+
def _compute_distance_matrix(self, cluster_df: pd.DataFrame, G_local: nx.MultiDiGraph, depot_lat: float, depot_lon: float) -> np.ndarray:
|
822
|
+
"""
|
823
|
+
Computes the road-based distance matrix for the cluster.
|
824
|
+
Includes the depot as the first node.
|
825
|
+
"""
|
826
|
+
store_ids = cluster_df.index.tolist()
|
827
|
+
all_coords = [(depot_lat, depot_lon)] + list(cluster_df[['latitude', 'longitude']].values)
|
828
|
+
distance_matrix = np.zeros((len(all_coords), len(all_coords)), dtype=float)
|
829
|
+
|
830
|
+
# Precompute nearest nodes
|
831
|
+
nodes = ox_distance.nearest_nodes(G_local, X=[lon for lat, lon in all_coords], Y=[lat for lat, lon in all_coords])
|
832
|
+
|
833
|
+
for i in range(len(all_coords)):
|
834
|
+
for j in range(len(all_coords)):
|
835
|
+
if i == j:
|
836
|
+
distance_matrix[i][j] = 0
|
837
|
+
else:
|
838
|
+
try:
|
839
|
+
dist_m = nx.shortest_path_length(G_local, nodes[i], nodes[j], weight='length')
|
840
|
+
dist_miles = dist_m * 0.000621371 # meters to miles
|
841
|
+
distance_matrix[i][j] = dist_miles
|
842
|
+
except nx.NetworkXNoPath:
|
843
|
+
distance_matrix[i][j] = np.inf # No path exists
|
844
|
+
|
845
|
+
return distance_matrix
|
846
|
+
|
847
|
+
def _assign_routes_vrp(self, cluster_df: pd.DataFrame, G_local: nx.MultiDiGraph, depot_lat: float, depot_lon: float) -> Dict[int, List[int]]:
|
848
|
+
"""
|
849
|
+
Assigns stores in the cluster to ghost employees using VRP.
|
850
|
+
Returns a dictionary where keys are ghost IDs and values are lists of store indices.
|
851
|
+
"""
|
852
|
+
store_ids = cluster_df.index.tolist()
|
853
|
+
num_vehicles = self.num_ghosts_per_cluster
|
854
|
+
|
855
|
+
# Compute distance matrix with depot as first node
|
856
|
+
distance_matrix = self._compute_distance_matrix(cluster_df, G_local, depot_lat, depot_lon)
|
857
|
+
|
858
|
+
# Handle infinite distances by setting a large number
|
859
|
+
distance_matrix[np.isinf(distance_matrix)] = 1e6
|
860
|
+
|
861
|
+
# Create data model for VRP
|
862
|
+
data = create_data_model(
|
863
|
+
distance_matrix=distance_matrix.tolist(), # OR-Tools requires lists
|
864
|
+
num_vehicles=num_vehicles,
|
865
|
+
depot=0,
|
866
|
+
max_distance=self.max_distance_by_day,
|
867
|
+
max_stores_per_vehicle=self.max_stores_per_day
|
868
|
+
)
|
869
|
+
|
870
|
+
# Solve VRP
|
871
|
+
routes = solve_vrp(data)
|
872
|
+
|
873
|
+
# Map routes to store indices (excluding depot)
|
874
|
+
assignment = {}
|
875
|
+
for vehicle_id, route in enumerate(routes):
|
876
|
+
# Exclude depot (first node)
|
877
|
+
assigned_store_indices = route[1:-1] # Remove depot start and end
|
878
|
+
assignment[vehicle_id] = [store_ids[idx - 1] for idx in assigned_store_indices] # idx-1 because depot is first
|
879
|
+
|
880
|
+
return assignment
|
881
|
+
|
882
|
+
def _validate_clusters_by_vrp(self):
|
883
|
+
"""
|
884
|
+
For each cluster, assign stores to ghost employees using VRP.
|
885
|
+
Remove any stores that cannot be assigned within constraints.
|
886
|
+
"""
|
887
|
+
df = self._data
|
888
|
+
clusters = df[self._cluster_id].unique()
|
889
|
+
to_remove = []
|
890
|
+
assignment_dict = {} # To store assignments per cluster
|
891
|
+
|
892
|
+
for cid in clusters:
|
893
|
+
if cid == -1:
|
894
|
+
continue # Skip outliers
|
895
|
+
|
896
|
+
cluster_df = df[df[self._cluster_id] == cid]
|
897
|
+
if cluster_df.empty:
|
898
|
+
continue
|
899
|
+
|
900
|
+
# 1) Compute bounding box with buffer
|
901
|
+
lat_min = cluster_df['latitude'].min()
|
902
|
+
lat_max = cluster_df['latitude'].max()
|
903
|
+
lon_min = cluster_df['longitude'].min()
|
904
|
+
lon_max = cluster_df['longitude'].max()
|
905
|
+
|
906
|
+
buffer_deg = 0.1
|
907
|
+
north = lat_max + buffer_deg
|
908
|
+
south = lat_min - buffer_deg
|
909
|
+
east = lon_max + buffer_deg
|
910
|
+
west = lon_min - buffer_deg
|
911
|
+
|
912
|
+
# 2) Build local OSMnx graph for the cluster
|
913
|
+
G_local = self._build_osmnx_graph_for_bbox(north, south, east, west)
|
914
|
+
|
915
|
+
# 3) Define depot (cluster centroid)
|
916
|
+
centroid_lat = cluster_df['latitude'].mean()
|
917
|
+
centroid_lon = cluster_df['longitude'].mean()
|
918
|
+
|
919
|
+
# 4) Assign routes using VRP
|
920
|
+
assignment = self._assign_routes_vrp(cluster_df, G_local, centroid_lat, centroid_lon)
|
921
|
+
|
922
|
+
# 5) Assign ghost IDs to stores
|
923
|
+
for vehicle_id, store_ids in assignment.items():
|
924
|
+
ghost_id = f"Ghost-{cid}-{vehicle_id + 1}"
|
925
|
+
df.loc[store_ids, 'ghost_id'] = ghost_id
|
926
|
+
|
927
|
+
# 6) Identify unassigned stores (if any)
|
928
|
+
assigned_store_ids = set()
|
929
|
+
for route in assignment.values():
|
930
|
+
assigned_store_ids.update(route)
|
931
|
+
|
932
|
+
all_store_ids = set(cluster_df.index.tolist())
|
933
|
+
unassigned_store_ids = all_store_ids - assigned_store_ids
|
934
|
+
|
935
|
+
if unassigned_store_ids:
|
936
|
+
to_remove.extend(list(unassigned_store_ids))
|
937
|
+
|
938
|
+
# 6) Remove unassigned stores
|
939
|
+
to_remove = list(set(to_remove))
|
940
|
+
if to_remove:
|
941
|
+
self._logger.info(
|
942
|
+
f"Removing {len(to_remove)} stores that could not be assigned via VRP."
|
943
|
+
)
|
944
|
+
self._rejected = pd.concat([self._rejected, self._data.loc[to_remove]]).drop_duplicates()
|
945
|
+
self._data.drop(index=to_remove, inplace=True)
|
946
|
+
|
947
|
+
# 8) Update DataFrame with assignments
|
948
|
+
self._data = df.copy()
|
949
|
+
|
950
|
+
# 9) Apply market labels again if needed
|
951
|
+
self._apply_market_labels(self._data, self._data[self._cluster_id].values)
|
952
|
+
|
953
|
+
def _reassign_rejected_stores(self):
|
954
|
+
"""
|
955
|
+
Attempt to reassign rejected stores to existing clusters if within the borderline threshold.
|
956
|
+
"""
|
957
|
+
if self._rejected.empty:
|
958
|
+
return
|
959
|
+
|
960
|
+
borderline_threshold = self.borderline_threshold
|
961
|
+
to_remove = []
|
962
|
+
df = self._rejected.copy()
|
963
|
+
|
964
|
+
for idx, row in df.iterrows():
|
965
|
+
# Find the nearest cluster centroid
|
966
|
+
min_distance = np.inf
|
967
|
+
assigned_cid = -1
|
968
|
+
|
969
|
+
for cid in self._data[self._cluster_id].unique():
|
970
|
+
if cid == -1:
|
971
|
+
continue
|
972
|
+
centroid_lat = self._data[self._cluster_id == cid]['latitude'].mean()
|
973
|
+
centroid_lon = self._data[self._cluster_id == cid]['longitude'].mean()
|
974
|
+
distance = self._haversine_miles(centroid_lat, centroid_lon, row['latitude'], row['longitude'])
|
975
|
+
if distance < min_distance:
|
976
|
+
min_distance = distance
|
977
|
+
assigned_cid = cid
|
978
|
+
|
979
|
+
# Check if within the borderline threshold
|
980
|
+
if min_distance <= self.max_cluster_distance * borderline_threshold:
|
981
|
+
# Assign to this cluster
|
982
|
+
self._data.at[idx, self._cluster_id] = assigned_cid
|
983
|
+
self._data.at[idx, 'ghost_id'] = f"Ghost-{assigned_cid}-1" # Assign to the first ghost for simplicity
|
984
|
+
to_remove.append(idx)
|
985
|
+
|
986
|
+
# Remove reassigned stores from rejected
|
987
|
+
if to_remove:
|
988
|
+
self._rejected.drop(index=to_remove, inplace=True)
|
989
|
+
self._logger.info(
|
990
|
+
f"Reassigned {len(to_remove)} rejected stores to existing clusters."
|
991
|
+
)
|
992
|
+
|
993
|
+
async def run(self):
|
994
|
+
"""
|
995
|
+
1) Cluster with BallTree + K-Means validation.
|
996
|
+
2) Road-based validation: assign stores to ghost employees via VRP.
|
997
|
+
3) Remove any stores that cannot be assigned within constraints.
|
998
|
+
4) Re-assign rejected stores if possible.
|
999
|
+
5) Return final assignment + rejected stores.
|
1000
|
+
"""
|
1001
|
+
self._logger.info(
|
1002
|
+
"=== Running MarketClustering ==="
|
1003
|
+
)
|
1004
|
+
|
1005
|
+
# --- create cluster in haversine space (balltree)
|
1006
|
+
self._data = self._create_cluster(self._data)
|
1007
|
+
|
1008
|
+
# 2) Road-based validation via VRP
|
1009
|
+
# self._validate_clusters_by_vrp()
|
1010
|
+
|
1011
|
+
# 3) Reassign rejected stores
|
1012
|
+
# self._reassign_rejected_stores()
|
1013
|
+
|
1014
|
+
unreachable_stores = [] # gather all unreachable store indices globally
|
1015
|
+
grouped = self._data.groupby(self._cluster_id)
|
1016
|
+
for cid, cluster_stores in grouped:
|
1017
|
+
if cid == -1 or len(cluster_stores) <= 1:
|
1018
|
+
continue # skip outliers
|
1019
|
+
|
1020
|
+
# Validate distances after cluster creation
|
1021
|
+
# outliers = self._validate_distance(self._data, cluster_stores)
|
1022
|
+
|
1023
|
+
# Log outlier count
|
1024
|
+
# print(f"Number of outliers detected: {len(outliers)}")
|
1025
|
+
|
1026
|
+
# Create the ghost employees for this Cluster:
|
1027
|
+
employees = self._create_ghost_employees(cid, self._data)
|
1028
|
+
cluster_unreachable = self._filter_unreachable_stores(
|
1029
|
+
cid=cid,
|
1030
|
+
employees=employees,
|
1031
|
+
cluster_stores=cluster_stores
|
1032
|
+
)
|
1033
|
+
unreachable_stores.extend(cluster_unreachable)
|
1034
|
+
|
1035
|
+
# TODO: remove unreachable stores from the cluster
|
1036
|
+
unreachable_stores = list(set(unreachable_stores))
|
1037
|
+
self._rejected = self._data.loc[unreachable_stores].copy()
|
1038
|
+
self._data.drop(index=unreachable_stores, inplace=True)
|
1039
|
+
self._logger.info(
|
1040
|
+
f"Unreachable stores: {len(unreachable_stores)}"
|
1041
|
+
)
|
1042
|
+
|
1043
|
+
self._logger.info(
|
1044
|
+
f"Final clusters formed: {self._data[self._cluster_id].nunique() - 1} (excluding Outliers)"
|
1045
|
+
)
|
1046
|
+
self._logger.info(
|
1047
|
+
f"Total rejected stores: {len(self._rejected)}"
|
1048
|
+
)
|
1049
|
+
|
1050
|
+
self._result = self._data
|
1051
|
+
return self._result
|