flowtask 5.8.4__cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowtask/__init__.py +93 -0
- flowtask/__main__.py +38 -0
- flowtask/bots/__init__.py +6 -0
- flowtask/bots/check.py +93 -0
- flowtask/bots/codebot.py +51 -0
- flowtask/components/ASPX.py +148 -0
- flowtask/components/AddDataset.py +352 -0
- flowtask/components/Amazon.py +523 -0
- flowtask/components/AutoTask.py +314 -0
- flowtask/components/Azure.py +80 -0
- flowtask/components/AzureUsers.py +106 -0
- flowtask/components/BaseAction.py +91 -0
- flowtask/components/BaseLoop.py +198 -0
- flowtask/components/BestBuy.py +800 -0
- flowtask/components/CSVToGCS.py +120 -0
- flowtask/components/CompanyScraper/__init__.py +1 -0
- flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
- flowtask/components/CompanyScraper/parsers/base.py +102 -0
- flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
- flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
- flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
- flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
- flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
- flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
- flowtask/components/CompanyScraper/scrapper.py +1054 -0
- flowtask/components/CopyTo.py +177 -0
- flowtask/components/CopyToBigQuery.py +243 -0
- flowtask/components/CopyToMongoDB.py +291 -0
- flowtask/components/CopyToPg.py +609 -0
- flowtask/components/CopyToRethink.py +207 -0
- flowtask/components/CreateGCSBucket.py +102 -0
- flowtask/components/CreateReport/CreateReport.py +228 -0
- flowtask/components/CreateReport/__init__.py +9 -0
- flowtask/components/CreateReport/charts/__init__.py +15 -0
- flowtask/components/CreateReport/charts/bar.py +51 -0
- flowtask/components/CreateReport/charts/base.py +66 -0
- flowtask/components/CreateReport/charts/pie.py +64 -0
- flowtask/components/CreateReport/utils.py +9 -0
- flowtask/components/CustomerSatisfaction.py +196 -0
- flowtask/components/DataInput.py +200 -0
- flowtask/components/DateList.py +255 -0
- flowtask/components/DbClient.py +163 -0
- flowtask/components/DialPad.py +146 -0
- flowtask/components/DocumentDBQuery.py +200 -0
- flowtask/components/DownloadFrom.py +371 -0
- flowtask/components/DownloadFromD2L.py +113 -0
- flowtask/components/DownloadFromFTP.py +181 -0
- flowtask/components/DownloadFromIMAP.py +315 -0
- flowtask/components/DownloadFromS3.py +198 -0
- flowtask/components/DownloadFromSFTP.py +265 -0
- flowtask/components/DownloadFromSharepoint.py +110 -0
- flowtask/components/DownloadFromSmartSheet.py +114 -0
- flowtask/components/DownloadS3File.py +229 -0
- flowtask/components/Dummy.py +59 -0
- flowtask/components/DuplicatePhoto.py +411 -0
- flowtask/components/EmployeeEvaluation.py +237 -0
- flowtask/components/ExecuteSQL.py +323 -0
- flowtask/components/ExtractHTML.py +178 -0
- flowtask/components/FileBase.py +178 -0
- flowtask/components/FileCopy.py +181 -0
- flowtask/components/FileDelete.py +82 -0
- flowtask/components/FileExists.py +146 -0
- flowtask/components/FileIteratorDelete.py +112 -0
- flowtask/components/FileList.py +194 -0
- flowtask/components/FileOpen.py +75 -0
- flowtask/components/FileRead.py +120 -0
- flowtask/components/FileRename.py +106 -0
- flowtask/components/FilterIf.py +284 -0
- flowtask/components/FilterRows/FilterRows.py +200 -0
- flowtask/components/FilterRows/__init__.py +10 -0
- flowtask/components/FilterRows/functions.py +4 -0
- flowtask/components/GCSToBigQuery.py +103 -0
- flowtask/components/GoogleA4.py +150 -0
- flowtask/components/GoogleGeoCoding.py +344 -0
- flowtask/components/GooglePlaces.py +315 -0
- flowtask/components/GoogleSearch.py +539 -0
- flowtask/components/HTTPClient.py +268 -0
- flowtask/components/ICIMS.py +146 -0
- flowtask/components/IF.py +179 -0
- flowtask/components/IcimsFolderCopy.py +173 -0
- flowtask/components/ImageFeatures/__init__.py +5 -0
- flowtask/components/ImageFeatures/process.py +233 -0
- flowtask/components/IteratorBase.py +251 -0
- flowtask/components/LangchainLoader/__init__.py +5 -0
- flowtask/components/LangchainLoader/loader.py +194 -0
- flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
- flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
- flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
- flowtask/components/LangchainLoader/loaders/docx.py +91 -0
- flowtask/components/LangchainLoader/loaders/html.py +119 -0
- flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
- flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
- flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
- flowtask/components/LangchainLoader/loaders/qa.py +67 -0
- flowtask/components/LangchainLoader/loaders/txt.py +55 -0
- flowtask/components/LeadIQ.py +650 -0
- flowtask/components/Loop.py +253 -0
- flowtask/components/Lowes.py +334 -0
- flowtask/components/MS365Usage.py +156 -0
- flowtask/components/MSTeamsMessages.py +320 -0
- flowtask/components/MarketClustering.py +1051 -0
- flowtask/components/MergeFiles.py +362 -0
- flowtask/components/MilvusOutput.py +87 -0
- flowtask/components/NearByStores.py +175 -0
- flowtask/components/NetworkNinja/__init__.py +6 -0
- flowtask/components/NetworkNinja/models/__init__.py +52 -0
- flowtask/components/NetworkNinja/models/abstract.py +177 -0
- flowtask/components/NetworkNinja/models/account.py +39 -0
- flowtask/components/NetworkNinja/models/client.py +19 -0
- flowtask/components/NetworkNinja/models/district.py +14 -0
- flowtask/components/NetworkNinja/models/events.py +101 -0
- flowtask/components/NetworkNinja/models/forms.py +499 -0
- flowtask/components/NetworkNinja/models/market.py +16 -0
- flowtask/components/NetworkNinja/models/organization.py +34 -0
- flowtask/components/NetworkNinja/models/photos.py +125 -0
- flowtask/components/NetworkNinja/models/project.py +44 -0
- flowtask/components/NetworkNinja/models/region.py +28 -0
- flowtask/components/NetworkNinja/models/store.py +203 -0
- flowtask/components/NetworkNinja/models/user.py +151 -0
- flowtask/components/NetworkNinja/router.py +854 -0
- flowtask/components/Odoo.py +175 -0
- flowtask/components/OdooInjector.py +192 -0
- flowtask/components/OpenFromXML.py +126 -0
- flowtask/components/OpenWeather.py +41 -0
- flowtask/components/OpenWithBase.py +616 -0
- flowtask/components/OpenWithPandas.py +715 -0
- flowtask/components/PGPDecrypt.py +199 -0
- flowtask/components/PandasIterator.py +187 -0
- flowtask/components/PandasToFile.py +189 -0
- flowtask/components/Paradox.py +339 -0
- flowtask/components/ParamIterator.py +117 -0
- flowtask/components/ParseHTML.py +84 -0
- flowtask/components/PlacerStores.py +249 -0
- flowtask/components/Pokemon.py +507 -0
- flowtask/components/PositiveBot.py +62 -0
- flowtask/components/PowerPointSlide.py +400 -0
- flowtask/components/PrintMessage.py +127 -0
- flowtask/components/ProductCompetitors/__init__.py +5 -0
- flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
- flowtask/components/ProductCompetitors/parsers/base.py +72 -0
- flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
- flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
- flowtask/components/ProductCompetitors/scrapper.py +155 -0
- flowtask/components/ProductCompliant.py +169 -0
- flowtask/components/ProductInfo/__init__.py +1 -0
- flowtask/components/ProductInfo/parsers/__init__.py +5 -0
- flowtask/components/ProductInfo/parsers/base.py +83 -0
- flowtask/components/ProductInfo/parsers/brother.py +97 -0
- flowtask/components/ProductInfo/parsers/canon.py +167 -0
- flowtask/components/ProductInfo/parsers/epson.py +118 -0
- flowtask/components/ProductInfo/parsers/hp.py +131 -0
- flowtask/components/ProductInfo/parsers/samsung.py +97 -0
- flowtask/components/ProductInfo/scraper.py +319 -0
- flowtask/components/ProductPricing.py +118 -0
- flowtask/components/QS.py +261 -0
- flowtask/components/QSBase.py +201 -0
- flowtask/components/QueryIterator.py +273 -0
- flowtask/components/QueryToInsert.py +327 -0
- flowtask/components/QueryToPandas.py +432 -0
- flowtask/components/RESTClient.py +195 -0
- flowtask/components/RethinkDBQuery.py +189 -0
- flowtask/components/Rsync.py +74 -0
- flowtask/components/RunSSH.py +59 -0
- flowtask/components/RunShell.py +71 -0
- flowtask/components/SalesForce.py +20 -0
- flowtask/components/SaveImageBank/__init__.py +257 -0
- flowtask/components/SchedulingVisits.py +592 -0
- flowtask/components/ScrapPage.py +216 -0
- flowtask/components/ScrapSearch.py +79 -0
- flowtask/components/SendNotify.py +257 -0
- flowtask/components/SentimentAnalysis.py +694 -0
- flowtask/components/ServiceScrapper/__init__.py +5 -0
- flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
- flowtask/components/ServiceScrapper/parsers/base.py +94 -0
- flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
- flowtask/components/ServiceScrapper/scrapper.py +199 -0
- flowtask/components/SetVariables.py +156 -0
- flowtask/components/SubTask.py +182 -0
- flowtask/components/SuiteCRM.py +48 -0
- flowtask/components/Switch.py +175 -0
- flowtask/components/TableBase.py +148 -0
- flowtask/components/TableDelete.py +312 -0
- flowtask/components/TableInput.py +143 -0
- flowtask/components/TableOutput/TableOutput.py +384 -0
- flowtask/components/TableOutput/__init__.py +3 -0
- flowtask/components/TableSchema.py +534 -0
- flowtask/components/Target.py +223 -0
- flowtask/components/ThumbnailGenerator.py +156 -0
- flowtask/components/ToPandas.py +67 -0
- flowtask/components/TransformRows/TransformRows.py +507 -0
- flowtask/components/TransformRows/__init__.py +9 -0
- flowtask/components/TransformRows/functions.py +559 -0
- flowtask/components/TransposeRows.py +176 -0
- flowtask/components/UPCDatabase.py +86 -0
- flowtask/components/UnGzip.py +171 -0
- flowtask/components/Uncompress.py +172 -0
- flowtask/components/UniqueRows.py +126 -0
- flowtask/components/Unzip.py +107 -0
- flowtask/components/UpdateOperationalVars.py +147 -0
- flowtask/components/UploadTo.py +299 -0
- flowtask/components/UploadToS3.py +136 -0
- flowtask/components/UploadToSFTP.py +160 -0
- flowtask/components/UploadToSharepoint.py +205 -0
- flowtask/components/UserFunc.py +122 -0
- flowtask/components/VivaTracker.py +140 -0
- flowtask/components/WSDLClient.py +123 -0
- flowtask/components/Wait.py +18 -0
- flowtask/components/Walmart.py +199 -0
- flowtask/components/Workplace.py +134 -0
- flowtask/components/XMLToPandas.py +267 -0
- flowtask/components/Zammad/__init__.py +41 -0
- flowtask/components/Zammad/models.py +0 -0
- flowtask/components/ZoomInfoScraper.py +409 -0
- flowtask/components/__init__.py +104 -0
- flowtask/components/abstract.py +18 -0
- flowtask/components/flow.py +530 -0
- flowtask/components/google.py +335 -0
- flowtask/components/group.py +221 -0
- flowtask/components/py.typed +0 -0
- flowtask/components/reviewscrap.py +132 -0
- flowtask/components/tAutoincrement.py +117 -0
- flowtask/components/tConcat.py +109 -0
- flowtask/components/tExplode.py +119 -0
- flowtask/components/tFilter.py +184 -0
- flowtask/components/tGroup.py +236 -0
- flowtask/components/tJoin.py +270 -0
- flowtask/components/tMap/__init__.py +9 -0
- flowtask/components/tMap/functions.py +54 -0
- flowtask/components/tMap/tMap.py +450 -0
- flowtask/components/tMelt.py +112 -0
- flowtask/components/tMerge.py +114 -0
- flowtask/components/tOrder.py +93 -0
- flowtask/components/tPandas.py +94 -0
- flowtask/components/tPivot.py +71 -0
- flowtask/components/tPluckCols.py +76 -0
- flowtask/components/tUnnest.py +82 -0
- flowtask/components/user.py +401 -0
- flowtask/conf.py +457 -0
- flowtask/download.py +102 -0
- flowtask/events/__init__.py +11 -0
- flowtask/events/events/__init__.py +20 -0
- flowtask/events/events/abstract.py +95 -0
- flowtask/events/events/alerts/__init__.py +362 -0
- flowtask/events/events/alerts/colfunctions.py +131 -0
- flowtask/events/events/alerts/functions.py +158 -0
- flowtask/events/events/dummy.py +12 -0
- flowtask/events/events/exec.py +124 -0
- flowtask/events/events/file/__init__.py +7 -0
- flowtask/events/events/file/base.py +51 -0
- flowtask/events/events/file/copy.py +23 -0
- flowtask/events/events/file/delete.py +16 -0
- flowtask/events/events/interfaces/__init__.py +9 -0
- flowtask/events/events/interfaces/client.py +67 -0
- flowtask/events/events/interfaces/credentials.py +28 -0
- flowtask/events/events/interfaces/notifications.py +58 -0
- flowtask/events/events/jira.py +122 -0
- flowtask/events/events/log.py +26 -0
- flowtask/events/events/logerr.py +52 -0
- flowtask/events/events/notify.py +59 -0
- flowtask/events/events/notify_event.py +160 -0
- flowtask/events/events/publish.py +54 -0
- flowtask/events/events/sendfile.py +104 -0
- flowtask/events/events/task.py +97 -0
- flowtask/events/events/teams.py +98 -0
- flowtask/events/events/webhook.py +58 -0
- flowtask/events/manager.py +287 -0
- flowtask/exceptions.c +39393 -0
- flowtask/exceptions.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/extensions/__init__.py +3 -0
- flowtask/extensions/abstract.py +82 -0
- flowtask/extensions/logging/__init__.py +65 -0
- flowtask/hooks/__init__.py +9 -0
- flowtask/hooks/actions/__init__.py +22 -0
- flowtask/hooks/actions/abstract.py +66 -0
- flowtask/hooks/actions/dummy.py +23 -0
- flowtask/hooks/actions/jira.py +74 -0
- flowtask/hooks/actions/rest.py +320 -0
- flowtask/hooks/actions/sampledata.py +37 -0
- flowtask/hooks/actions/sensor.py +23 -0
- flowtask/hooks/actions/task.py +9 -0
- flowtask/hooks/actions/ticket.py +37 -0
- flowtask/hooks/actions/zammad.py +55 -0
- flowtask/hooks/hook.py +62 -0
- flowtask/hooks/models.py +17 -0
- flowtask/hooks/service.py +187 -0
- flowtask/hooks/step.py +91 -0
- flowtask/hooks/types/__init__.py +23 -0
- flowtask/hooks/types/base.py +129 -0
- flowtask/hooks/types/brokers/__init__.py +11 -0
- flowtask/hooks/types/brokers/base.py +54 -0
- flowtask/hooks/types/brokers/mqtt.py +35 -0
- flowtask/hooks/types/brokers/rabbitmq.py +82 -0
- flowtask/hooks/types/brokers/redis.py +83 -0
- flowtask/hooks/types/brokers/sqs.py +44 -0
- flowtask/hooks/types/fs.py +232 -0
- flowtask/hooks/types/http.py +49 -0
- flowtask/hooks/types/imap.py +200 -0
- flowtask/hooks/types/jira.py +279 -0
- flowtask/hooks/types/mail.py +205 -0
- flowtask/hooks/types/postgres.py +98 -0
- flowtask/hooks/types/responses/__init__.py +8 -0
- flowtask/hooks/types/responses/base.py +5 -0
- flowtask/hooks/types/sharepoint.py +288 -0
- flowtask/hooks/types/ssh.py +141 -0
- flowtask/hooks/types/tagged.py +59 -0
- flowtask/hooks/types/upload.py +85 -0
- flowtask/hooks/types/watch.py +71 -0
- flowtask/hooks/types/web.py +36 -0
- flowtask/interfaces/AzureClient.py +137 -0
- flowtask/interfaces/AzureGraph.py +839 -0
- flowtask/interfaces/Boto3Client.py +326 -0
- flowtask/interfaces/DropboxClient.py +173 -0
- flowtask/interfaces/ExcelHandler.py +94 -0
- flowtask/interfaces/FTPClient.py +131 -0
- flowtask/interfaces/GoogleCalendar.py +201 -0
- flowtask/interfaces/GoogleClient.py +133 -0
- flowtask/interfaces/GoogleDrive.py +127 -0
- flowtask/interfaces/GoogleGCS.py +89 -0
- flowtask/interfaces/GoogleGeocoding.py +93 -0
- flowtask/interfaces/GoogleLang.py +114 -0
- flowtask/interfaces/GooglePub.py +61 -0
- flowtask/interfaces/GoogleSheet.py +68 -0
- flowtask/interfaces/IMAPClient.py +137 -0
- flowtask/interfaces/O365Calendar.py +113 -0
- flowtask/interfaces/O365Client.py +220 -0
- flowtask/interfaces/OneDrive.py +284 -0
- flowtask/interfaces/Outlook.py +155 -0
- flowtask/interfaces/ParrotBot.py +130 -0
- flowtask/interfaces/SSHClient.py +378 -0
- flowtask/interfaces/Sharepoint.py +496 -0
- flowtask/interfaces/__init__.py +36 -0
- flowtask/interfaces/azureauth.py +119 -0
- flowtask/interfaces/cache.py +201 -0
- flowtask/interfaces/client.py +82 -0
- flowtask/interfaces/compress.py +525 -0
- flowtask/interfaces/credentials.py +124 -0
- flowtask/interfaces/d2l.py +239 -0
- flowtask/interfaces/databases/__init__.py +5 -0
- flowtask/interfaces/databases/db.py +223 -0
- flowtask/interfaces/databases/documentdb.py +55 -0
- flowtask/interfaces/databases/rethink.py +39 -0
- flowtask/interfaces/dataframes/__init__.py +11 -0
- flowtask/interfaces/dataframes/abstract.py +21 -0
- flowtask/interfaces/dataframes/arrow.py +71 -0
- flowtask/interfaces/dataframes/dt.py +69 -0
- flowtask/interfaces/dataframes/pandas.py +167 -0
- flowtask/interfaces/dataframes/polars.py +60 -0
- flowtask/interfaces/db.py +263 -0
- flowtask/interfaces/env.py +46 -0
- flowtask/interfaces/func.py +137 -0
- flowtask/interfaces/http.py +1780 -0
- flowtask/interfaces/locale.py +40 -0
- flowtask/interfaces/log.py +75 -0
- flowtask/interfaces/mask.py +143 -0
- flowtask/interfaces/notification.py +154 -0
- flowtask/interfaces/playwright.py +339 -0
- flowtask/interfaces/powerpoint.py +368 -0
- flowtask/interfaces/py.typed +0 -0
- flowtask/interfaces/qs.py +376 -0
- flowtask/interfaces/result.py +87 -0
- flowtask/interfaces/selenium_service.py +779 -0
- flowtask/interfaces/smartsheet.py +154 -0
- flowtask/interfaces/stat.py +39 -0
- flowtask/interfaces/task.py +96 -0
- flowtask/interfaces/template.py +118 -0
- flowtask/interfaces/vectorstores/__init__.py +1 -0
- flowtask/interfaces/vectorstores/abstract.py +133 -0
- flowtask/interfaces/vectorstores/milvus.py +669 -0
- flowtask/interfaces/zammad.py +107 -0
- flowtask/models.py +193 -0
- flowtask/parsers/__init__.py +15 -0
- flowtask/parsers/_yaml.c +11978 -0
- flowtask/parsers/_yaml.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/argparser.py +235 -0
- flowtask/parsers/base.c +15155 -0
- flowtask/parsers/base.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/json.c +11968 -0
- flowtask/parsers/json.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/maps.py +49 -0
- flowtask/parsers/toml.c +11968 -0
- flowtask/parsers/toml.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/plugins/__init__.py +16 -0
- flowtask/plugins/components/__init__.py +0 -0
- flowtask/plugins/handler/__init__.py +45 -0
- flowtask/plugins/importer.py +31 -0
- flowtask/plugins/sources/__init__.py +0 -0
- flowtask/runner.py +283 -0
- flowtask/scheduler/__init__.py +9 -0
- flowtask/scheduler/functions.py +493 -0
- flowtask/scheduler/handlers/__init__.py +8 -0
- flowtask/scheduler/handlers/manager.py +504 -0
- flowtask/scheduler/handlers/models.py +58 -0
- flowtask/scheduler/handlers/service.py +72 -0
- flowtask/scheduler/notifications.py +65 -0
- flowtask/scheduler/scheduler.py +993 -0
- flowtask/services/__init__.py +0 -0
- flowtask/services/bots/__init__.py +0 -0
- flowtask/services/bots/telegram.py +264 -0
- flowtask/services/files/__init__.py +11 -0
- flowtask/services/files/manager.py +522 -0
- flowtask/services/files/model.py +37 -0
- flowtask/services/files/service.py +767 -0
- flowtask/services/jira/__init__.py +3 -0
- flowtask/services/jira/jira_actions.py +191 -0
- flowtask/services/tasks/__init__.py +13 -0
- flowtask/services/tasks/launcher.py +213 -0
- flowtask/services/tasks/manager.py +323 -0
- flowtask/services/tasks/service.py +275 -0
- flowtask/services/tasks/task_manager.py +376 -0
- flowtask/services/tasks/tasks.py +155 -0
- flowtask/storages/__init__.py +16 -0
- flowtask/storages/exceptions.py +12 -0
- flowtask/storages/files/__init__.py +8 -0
- flowtask/storages/files/abstract.py +29 -0
- flowtask/storages/files/filesystem.py +66 -0
- flowtask/storages/tasks/__init__.py +19 -0
- flowtask/storages/tasks/abstract.py +26 -0
- flowtask/storages/tasks/database.py +33 -0
- flowtask/storages/tasks/filesystem.py +108 -0
- flowtask/storages/tasks/github.py +119 -0
- flowtask/storages/tasks/memory.py +45 -0
- flowtask/storages/tasks/row.py +25 -0
- flowtask/tasks/__init__.py +0 -0
- flowtask/tasks/abstract.py +526 -0
- flowtask/tasks/command.py +118 -0
- flowtask/tasks/pile.py +486 -0
- flowtask/tasks/py.typed +0 -0
- flowtask/tasks/task.py +778 -0
- flowtask/template/__init__.py +161 -0
- flowtask/tests.py +257 -0
- flowtask/types/__init__.py +8 -0
- flowtask/types/typedefs.c +11347 -0
- flowtask/types/typedefs.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/utils/__init__.py +24 -0
- flowtask/utils/constants.py +117 -0
- flowtask/utils/encoders.py +21 -0
- flowtask/utils/executor.py +112 -0
- flowtask/utils/functions.cpp +14280 -0
- flowtask/utils/functions.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/utils/json.cpp +13349 -0
- flowtask/utils/json.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/utils/mail.py +63 -0
- flowtask/utils/parseqs.c +13324 -0
- flowtask/utils/parserqs.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/utils/stats.py +308 -0
- flowtask/utils/transformations.py +74 -0
- flowtask/utils/uv.py +12 -0
- flowtask/utils/validators.py +97 -0
- flowtask/version.py +11 -0
- flowtask-5.8.4.dist-info/LICENSE +201 -0
- flowtask-5.8.4.dist-info/METADATA +209 -0
- flowtask-5.8.4.dist-info/RECORD +470 -0
- flowtask-5.8.4.dist-info/WHEEL +6 -0
- flowtask-5.8.4.dist-info/entry_points.txt +3 -0
- flowtask-5.8.4.dist-info/top_level.txt +2 -0
- plugins/components/CreateQR.py +39 -0
- plugins/components/TestComponent.py +28 -0
- plugins/components/Use1.py +13 -0
- plugins/components/Workplace.py +117 -0
- plugins/components/__init__.py +3 -0
- plugins/sources/__init__.py +0 -0
- plugins/sources/get_populartimes.py +78 -0
- plugins/sources/google.py +150 -0
- plugins/sources/hubspot.py +679 -0
- plugins/sources/icims.py +679 -0
- plugins/sources/mobileinsight.py +501 -0
- plugins/sources/newrelic.py +262 -0
- plugins/sources/uap.py +268 -0
- plugins/sources/venu.py +244 -0
- plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,800 @@
|
|
1
|
+
import asyncio
|
2
|
+
import aiohttp
|
3
|
+
from typing import Any, Dict
|
4
|
+
from collections.abc import Callable
|
5
|
+
import random
|
6
|
+
from concurrent.futures import ThreadPoolExecutor
|
7
|
+
from bs4 import BeautifulSoup
|
8
|
+
import httpx
|
9
|
+
import pandas as pd
|
10
|
+
import backoff
|
11
|
+
import ssl
|
12
|
+
from navconfig.logging import logging
|
13
|
+
# Internals
|
14
|
+
from ..exceptions import (
|
15
|
+
ComponentError,
|
16
|
+
DataNotFound,
|
17
|
+
NotSupported,
|
18
|
+
ConfigError
|
19
|
+
)
|
20
|
+
from .flow import FlowComponent
|
21
|
+
from ..interfaces import HTTPService, SeleniumService
|
22
|
+
from ..interfaces.http import ua
|
23
|
+
|
24
|
+
|
25
|
+
logging.getLogger(name='selenium.webdriver').setLevel(logging.WARNING)
|
26
|
+
logging.getLogger(name='WDM').setLevel(logging.WARNING)
|
27
|
+
logging.getLogger(name='hpack').setLevel(logging.WARNING)
|
28
|
+
logging.getLogger(name='seleniumwire').setLevel(logging.WARNING)
|
29
|
+
|
30
|
+
|
31
|
+
ProductPayload = {
|
32
|
+
"locationId": None,
|
33
|
+
"zipCode": None,
|
34
|
+
"showOnShelf": True,
|
35
|
+
"lookupInStoreQuantity": True,
|
36
|
+
"xboxAllAccess": False,
|
37
|
+
"consolidated": True,
|
38
|
+
"showOnlyOnShelf": False,
|
39
|
+
"showInStore": True,
|
40
|
+
"pickupTypes": [
|
41
|
+
"UPS_ACCESS_POINT",
|
42
|
+
"FEDEX_HAL"
|
43
|
+
],
|
44
|
+
"onlyBestBuyLocations": True,
|
45
|
+
"items": [
|
46
|
+
{
|
47
|
+
"sku": None,
|
48
|
+
"condition": None,
|
49
|
+
"quantity": 1,
|
50
|
+
"itemSeqNumber": "1",
|
51
|
+
"reservationToken": None,
|
52
|
+
"selectedServices": [],
|
53
|
+
"requiredAccessories": [],
|
54
|
+
"isTradeIn": False,
|
55
|
+
"isLeased": False
|
56
|
+
}
|
57
|
+
]
|
58
|
+
}
|
59
|
+
|
60
|
+
|
61
|
+
def bad_gateway_exception(exc):
|
62
|
+
"""Check if the exception is a 502 Bad Gateway error."""
|
63
|
+
return isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code == 502
|
64
|
+
|
65
|
+
|
66
|
+
class BestBuy(FlowComponent, SeleniumService, HTTPService):
|
67
|
+
"""BestBuy.
|
68
|
+
|
69
|
+
Combining API Key and Web Scrapping, this component will be able to extract
|
70
|
+
Best Buy Information (stores, products, Product Availability, etc).
|
71
|
+
|
72
|
+
|
73
|
+
Example:
|
74
|
+
|
75
|
+
```yaml
|
76
|
+
BestBuy:
|
77
|
+
type: availability
|
78
|
+
product_info: false
|
79
|
+
brand: Bose
|
80
|
+
```
|
81
|
+
|
82
|
+
"""
|
83
|
+
def __init__(
|
84
|
+
self,
|
85
|
+
loop: asyncio.AbstractEventLoop = None,
|
86
|
+
job: Callable = None,
|
87
|
+
stat: Callable = None,
|
88
|
+
**kwargs,
|
89
|
+
):
|
90
|
+
self._fn = kwargs.pop('type', None)
|
91
|
+
self.chunk_size: int = kwargs.get('chunk_size', 100)
|
92
|
+
self.task_parts: int = kwargs.get('task_parts', 10)
|
93
|
+
self.product_info: bool = kwargs.get('product_info', False)
|
94
|
+
if not self._fn:
|
95
|
+
raise ConfigError(
|
96
|
+
"BestBuy: require a `type` Function to be called, ex: availability"
|
97
|
+
)
|
98
|
+
super(BestBuy, self).__init__(
|
99
|
+
loop=loop,
|
100
|
+
job=job,
|
101
|
+
stat=stat,
|
102
|
+
**kwargs
|
103
|
+
)
|
104
|
+
# Always use proxies:
|
105
|
+
self.use_proxy: bool = True
|
106
|
+
self._free_proxy: bool = False
|
107
|
+
ctt_list: list = [
|
108
|
+
"f3dbf688e45146555bb2b8604a993601",
|
109
|
+
"06f4dfe367e87866397ef32302f5042e",
|
110
|
+
"4e07e03ff03f5debc4e09ac4db9239ac"
|
111
|
+
]
|
112
|
+
sid_list: list = [
|
113
|
+
"d4fa1142-2998-4b68-af78-46d821bb3e1f",
|
114
|
+
"9627390e-b423-459f-83ee-7964dd05c9a8"
|
115
|
+
]
|
116
|
+
self.cookies = {
|
117
|
+
# "CTT": ,
|
118
|
+
"CTT": random.choice(ctt_list),
|
119
|
+
"SID": random.choice(sid_list),
|
120
|
+
"bby_rdp": "l",
|
121
|
+
"bm_sz": "9F5ED0110AF18594E2347A89BB4AB998~YAAQxm1lX6EqYHGSAQAAw+apmhkhXIeGYEc4KnzUMsjeac3xEoQmTNz5+of62i3RXQL6fUI+0FvCb/jgSjiVQOcfaSF+LdLkOXP1F4urgeIcqp/dBAhu5MvZXaCQsT06bwr7j21ozhFfTTWhjz1HmZN8wecsE6WGbK6wXp/33ODKlLaGWkTutqHbkzvMiiHXBCs9hT8jVny0REfita4AfqTK85Y6/M6Uq4IaDLPBLnTtJ0cTlPHk1HmkG5EsnI46llghcx1KZnCGnvZfHdb2ME9YZJ2GmC2b7dNmAgyL/gSVpoNdCJOj5Jk6z/MCVhZ81OZfX4S01E2F1mBGq4uV5/1oK2KR4YgZP4dsTN8izEEPybUKGY3CyM1gOUc=~3556420~4277810", # noqa
|
122
|
+
"bby_cbc_lb": "p-browse-e",
|
123
|
+
"intl_splash": "false"
|
124
|
+
}
|
125
|
+
self.headers: dict = {
|
126
|
+
"Host": "www.bestbuy.com",
|
127
|
+
"Referer": "https://www.bestbuy.com/",
|
128
|
+
"X-Requested-With": "XMLHttpRequest",
|
129
|
+
"TE": "trailers",
|
130
|
+
"Accept-Language": "en-US,en;q=0.5",
|
131
|
+
# "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", # noqa
|
132
|
+
"Accept-Encoding": "gzip, deflate",
|
133
|
+
"DNT": "1",
|
134
|
+
"Connection": "keep-alive",
|
135
|
+
"Upgrade-Insecure-Requests": "1",
|
136
|
+
"User-Agent": random.choice(ua),
|
137
|
+
**self.headers
|
138
|
+
}
|
139
|
+
self.semaphore = asyncio.Semaphore(10)
|
140
|
+
|
141
|
+
async def close(self, **kwargs) -> bool:
|
142
|
+
self.close_driver()
|
143
|
+
return True
|
144
|
+
|
145
|
+
async def start(self, **kwargs) -> bool:
|
146
|
+
await super(BestBuy, self).start(**kwargs)
|
147
|
+
if self.previous:
|
148
|
+
self.data = self.input
|
149
|
+
if not isinstance(self.data, pd.DataFrame):
|
150
|
+
raise ComponentError(
|
151
|
+
"Incompatible Pandas Dataframe"
|
152
|
+
)
|
153
|
+
#else:
|
154
|
+
# raise DataNotFound(
|
155
|
+
# "Data Not Found",
|
156
|
+
# status=404
|
157
|
+
# )
|
158
|
+
self.api_token = self.get_env_value(self.api_token) if hasattr(self, 'api_token') else self.get_env_value('BEST_BUY_API_KEY')
|
159
|
+
# if self._fn == 'availability':
|
160
|
+
# if not hasattr(self, 'brand'):
|
161
|
+
# raise ConfigError(
|
162
|
+
# "BestBuy: A Brand is required for using Product Availability"
|
163
|
+
# )
|
164
|
+
if not hasattr(self, self._fn):
|
165
|
+
raise ConfigError(
|
166
|
+
f"BestBuy: Unable to found Function {self._fn} in BBY Component."
|
167
|
+
)
|
168
|
+
|
169
|
+
def _get_search_url(self, brand: str, sku: str) -> str:
|
170
|
+
front_url = "https://www.bestbuy.com/site/searchpage.jsp?cp="
|
171
|
+
middle_url = "&searchType=search&st="
|
172
|
+
page_count = 1
|
173
|
+
# TODO: Get the Brand and Model from the Component.
|
174
|
+
search_term = f'{sku}'
|
175
|
+
end_url = "&_dyncharset=UTF-8&id=pcat17071&type=page&sc=Global&nrp=&sp=&qp=&list=n&af=true&iht=y&usc=All%20Categories&ks=960&keys=keys" # noqa
|
176
|
+
url = front_url + str(page_count) + middle_url + search_term + end_url
|
177
|
+
print('SEARCH URL: ', url)
|
178
|
+
return url
|
179
|
+
|
180
|
+
async def _extract_product_info(self, product_element):
|
181
|
+
"""Extract product information from a specific product element"""
|
182
|
+
try:
|
183
|
+
# Get the SKU from data-testid attribute (primary method)
|
184
|
+
sku_id = product_element.get("data-testid")
|
185
|
+
|
186
|
+
# Fallback: Try to find SKU in the attribute div
|
187
|
+
if not sku_id:
|
188
|
+
sku_element = product_element.select_one('div.attribute:-soup-contains("SKU") span.value')
|
189
|
+
if sku_element:
|
190
|
+
sku_id = sku_element.text.strip()
|
191
|
+
|
192
|
+
# Get product title through the product link
|
193
|
+
title_element = product_element.select_one('.product-list-item-title a, h4.sku-title a, a.product-list-item-link')
|
194
|
+
if not title_element:
|
195
|
+
title_element = product_element.select_one('a.product-list-item-link')
|
196
|
+
|
197
|
+
if not title_element:
|
198
|
+
return None
|
199
|
+
|
200
|
+
title = title_element.text.strip()
|
201
|
+
|
202
|
+
# Find price
|
203
|
+
price_element = product_element.select_one('div[data-testid="medium-customer-price"], div.customer-price.medium, .priceView-customer-price span, .price-block span')
|
204
|
+
price = price_element.text.strip() if price_element else "N/A"
|
205
|
+
|
206
|
+
# Image
|
207
|
+
image_element = product_element.select_one('img.product-image, img')
|
208
|
+
image = image_element['src'] if image_element and 'src' in image_element.attrs else None
|
209
|
+
|
210
|
+
# URL
|
211
|
+
url_element = title_element
|
212
|
+
url = "{url}".format(
|
213
|
+
url=url_element['href']
|
214
|
+
) if url_element and 'href' in url_element.attrs else None
|
215
|
+
|
216
|
+
self._logger.notice(f':: Product URL: {url}')
|
217
|
+
|
218
|
+
# Get model
|
219
|
+
model_element = product_element.select_one('div.attribute:-soup-contains("Model") span.value')
|
220
|
+
model_value = model_element.text.strip() if model_element else self.model
|
221
|
+
|
222
|
+
return {
|
223
|
+
"sku": sku_id,
|
224
|
+
"brand": self.brand,
|
225
|
+
"product_name": title,
|
226
|
+
"image_url": image,
|
227
|
+
"price": price,
|
228
|
+
"url": url
|
229
|
+
}
|
230
|
+
except Exception as e:
|
231
|
+
self._logger.error(f"Error extracting product info: {e}")
|
232
|
+
return None
|
233
|
+
|
234
|
+
async def _product_info(self, idx, row):
|
235
|
+
async with self.semaphore:
|
236
|
+
# Extract model, brand and SKU from the row
|
237
|
+
model = row['model']
|
238
|
+
brand = row['brand']
|
239
|
+
sku = row['sku'] if 'sku' in row else None
|
240
|
+
|
241
|
+
# Configure these values for searching
|
242
|
+
self.brand = brand
|
243
|
+
self.sku = sku
|
244
|
+
self.model = model
|
245
|
+
|
246
|
+
try:
|
247
|
+
# Generate the search URL
|
248
|
+
url = self._get_search_url(brand, model)
|
249
|
+
|
250
|
+
# Initialize Selenium driver if it doesn't exist
|
251
|
+
if not self._driver:
|
252
|
+
await self.get_driver()
|
253
|
+
|
254
|
+
# Navigate to the URL
|
255
|
+
await self.get_page(url)
|
256
|
+
|
257
|
+
# Initially assume we won't find the product
|
258
|
+
self.data.loc[idx, 'enabled'] = False
|
259
|
+
|
260
|
+
# Execute progressive scroll with longer pauses to ensure dynamic content loads
|
261
|
+
self._execute_scroll(scroll_pause_time=4.0, max_scrolls=10)
|
262
|
+
|
263
|
+
# Add additional wait after scroll to ensure complete load
|
264
|
+
await asyncio.sleep(3) # Additional wait after completing all scrolls
|
265
|
+
|
266
|
+
# Get the HTML content of the page after scrolling
|
267
|
+
page_content = self._driver.page_source
|
268
|
+
|
269
|
+
# Parse the HTML content
|
270
|
+
soup = BeautifulSoup(page_content, 'html.parser')
|
271
|
+
|
272
|
+
# Find all product items - using the correct class from the HTML analysis
|
273
|
+
product_items = soup.find_all('li', {'class': ['product-list-item']})
|
274
|
+
if not product_items:
|
275
|
+
# Fallback to other possible class names
|
276
|
+
product_items = soup.find_all('li', {'class': ['sku-item']})
|
277
|
+
|
278
|
+
# Debug: Print how many products were found
|
279
|
+
print(f"Found {len(product_items)} products in search results")
|
280
|
+
print(f"Searching for: Model={self.model}, SKU={self.sku}")
|
281
|
+
|
282
|
+
# Process each product item
|
283
|
+
for item in product_items:
|
284
|
+
try:
|
285
|
+
# Get the SKU from data-testid
|
286
|
+
sku_id = item.get("data-testid")
|
287
|
+
|
288
|
+
# Try to get the model
|
289
|
+
model_element = item.select_one('div.attribute:-soup-contains("Model") span.value')
|
290
|
+
model_value = model_element.text.strip() if model_element else None
|
291
|
+
|
292
|
+
# Debug information
|
293
|
+
if model_value or sku_id:
|
294
|
+
print(f"Found product: SKU={sku_id}, Model={model_value}")
|
295
|
+
if self.model:
|
296
|
+
print(f"Comparing with our Model={self.model}")
|
297
|
+
if self.sku:
|
298
|
+
print(f"Comparing with our SKU={self.sku}")
|
299
|
+
|
300
|
+
# Check if either SKU or model matches
|
301
|
+
sku_match = self.sku and sku_id == self.sku
|
302
|
+
model_match = False
|
303
|
+
if model_value and self.model:
|
304
|
+
# More flexible comparison
|
305
|
+
model_match = (
|
306
|
+
model_value.strip() == self.model.strip() or
|
307
|
+
model_value.lower() == self.model.lower() or
|
308
|
+
model_value.replace(" ", "") == self.model.replace(" ", "") or
|
309
|
+
self.model in model_value or
|
310
|
+
model_value in self.model
|
311
|
+
)
|
312
|
+
|
313
|
+
# If we have a match, extract product information
|
314
|
+
if sku_match or model_match:
|
315
|
+
self._logger.info(f"Found matching product - SKU: {sku_id}, Model: {model_value}")
|
316
|
+
|
317
|
+
# Extract product information from this element
|
318
|
+
product_info = await self._extract_product_info(item)
|
319
|
+
|
320
|
+
if product_info:
|
321
|
+
# Update the DataFrame with product information
|
322
|
+
for key, value in product_info.items():
|
323
|
+
if key in self.data.columns:
|
324
|
+
self.data.loc[idx, key] = value
|
325
|
+
else:
|
326
|
+
self.data.at[idx, key] = value
|
327
|
+
|
328
|
+
# Mark as enabled since we found a match
|
329
|
+
self.data.loc[idx, 'enabled'] = True
|
330
|
+
|
331
|
+
# We found our product, no need to continue
|
332
|
+
return row
|
333
|
+
except Exception as e:
|
334
|
+
print(f"Error processing product: {e}")
|
335
|
+
|
336
|
+
# If we got here, no match was found
|
337
|
+
self._logger.warning(f"No matching product found for {brand} {model} / {sku}")
|
338
|
+
return row
|
339
|
+
|
340
|
+
except Exception as exc:
|
341
|
+
self._logger.error(f"Error during product search for {brand} {model}: {exc}")
|
342
|
+
return row
|
343
|
+
|
344
|
+
def chunkify(self, lst, n):
|
345
|
+
"""Split list lst into chunks of size n."""
|
346
|
+
for i in range(0, len(lst), n):
|
347
|
+
yield lst[i:i + n]
|
348
|
+
|
349
|
+
@backoff.on_exception(
|
350
|
+
backoff.expo,
|
351
|
+
(httpx.ConnectTimeout, httpx.HTTPStatusError),
|
352
|
+
max_tries=2,
|
353
|
+
giveup=lambda e: not bad_gateway_exception(e) and not isinstance(e, httpx.ConnectTimeout)
|
354
|
+
)
|
355
|
+
async def _check_store_availability(self, idx, row, cookies):
|
356
|
+
async with self.semaphore:
|
357
|
+
# Prepare payload for the API request
|
358
|
+
zipcode = row['zipcode']
|
359
|
+
location_code = str(row['location_code'])
|
360
|
+
sku = row['sku']
|
361
|
+
brand = row['brand']
|
362
|
+
payload = ProductPayload.copy()
|
363
|
+
payload["locationId"] = location_code
|
364
|
+
payload["zipCode"] = zipcode
|
365
|
+
for item in payload["items"]:
|
366
|
+
item["sku"] = sku
|
367
|
+
|
368
|
+
# checking if this current store is already marked as checked:
|
369
|
+
matching_store = self.data[
|
370
|
+
(self.data['location_code'] == location_code) & (self.data['sku'] == sku)
|
371
|
+
]
|
372
|
+
if not matching_store.empty and matching_store.iloc[0]['checked'] is True:
|
373
|
+
# exit without making any HTTP call.
|
374
|
+
return row
|
375
|
+
try:
|
376
|
+
result = await self.api_post(
|
377
|
+
url="https://www.bestbuy.com/productfulfillment/c/api/2.0/storeAvailability",
|
378
|
+
cookies=cookies,
|
379
|
+
payload=payload
|
380
|
+
)
|
381
|
+
self._num_iterations += 1
|
382
|
+
except (httpx.TimeoutException, httpx.HTTPError) as ex:
|
383
|
+
self._logger.warning(f"Request failed: {ex}")
|
384
|
+
return row
|
385
|
+
except Exception as ex:
|
386
|
+
self._logger.error(f"An error occurred: {ex}")
|
387
|
+
return row
|
388
|
+
|
389
|
+
if not result:
|
390
|
+
self._logger.warning(
|
391
|
+
f"No availability data found for {sku} at zipcode {zipcode}"
|
392
|
+
)
|
393
|
+
return row
|
394
|
+
|
395
|
+
# Extract the availability data from the API response
|
396
|
+
items = result.get('ispu', {}).get('items', [])
|
397
|
+
for item in items:
|
398
|
+
locations = item.get('locations', [])
|
399
|
+
for location in locations:
|
400
|
+
self.data.loc[idx, 'enabled'] = False if result.get('consolidatedButtonState', {}).get('buttonState', '') == 'NOT_AVAILABLE' else True
|
401
|
+
lid = location.get('locationId')
|
402
|
+
# Find matching store and SKU in DataFrame
|
403
|
+
matching_store = self.data[
|
404
|
+
(self.data['location_code'] == lid) & (self.data['sku'] == sku)
|
405
|
+
]
|
406
|
+
if not matching_store.empty:
|
407
|
+
idx = matching_store.index[0]
|
408
|
+
if self.data.loc[idx, 'checked'] is True:
|
409
|
+
print('Already checked, continue ...')
|
410
|
+
continue # Skip this row if it's already marked as checked
|
411
|
+
if 'availability' not in location:
|
412
|
+
self.data.loc[idx, 'locationId'] = lid
|
413
|
+
self.data.loc[idx, 'checked'] = True
|
414
|
+
continue # This store doesn't have availability
|
415
|
+
print(f'Found matching store {lid} for sku {sku}')
|
416
|
+
|
417
|
+
# Update the DataFrame row with new availability data
|
418
|
+
self.data.loc[idx, ['brand', 'location_data']] = [brand, location]
|
419
|
+
for key, val in location.items():
|
420
|
+
if key in self.data.columns:
|
421
|
+
self.data.at[idx, key] = val
|
422
|
+
else:
|
423
|
+
self.data.at[idx, key] = None
|
424
|
+
if key == 'inStoreAvailability':
|
425
|
+
try:
|
426
|
+
self.data.loc[idx, 'availableInStoreQuantity'] = val.get(
|
427
|
+
'availableInStoreQuantity', 0
|
428
|
+
)
|
429
|
+
except KeyError:
|
430
|
+
self.data.loc[idx, 'availableInStoreQuantity'] = None
|
431
|
+
# Mark the row as checked
|
432
|
+
self.data.loc[idx, 'checked'] = True
|
433
|
+
return row
|
434
|
+
|
435
|
+
def column_exists(self, column: str, default_val: Any = None):
|
436
|
+
if column not in self.data.columns:
|
437
|
+
self._logger.warning(
|
438
|
+
f"Column {column} does not exist in the Dataframe"
|
439
|
+
)
|
440
|
+
self.data[column] = default_val
|
441
|
+
return False
|
442
|
+
return True
|
443
|
+
|
444
|
+
async def availability(self):
|
445
|
+
"""availability.
|
446
|
+
|
447
|
+
Best Buy Product Availability.
|
448
|
+
"""
|
449
|
+
httpx_cookies = httpx.Cookies()
|
450
|
+
for key, value in self.cookies.items():
|
451
|
+
httpx_cookies.set(
|
452
|
+
key, value,
|
453
|
+
domain='.bestbuy.com',
|
454
|
+
path='/'
|
455
|
+
)
|
456
|
+
|
457
|
+
# define the columns returned:
|
458
|
+
self.column_exists('brand')
|
459
|
+
self.column_exists('location_data')
|
460
|
+
self.column_exists('locationId')
|
461
|
+
self.column_exists('availability')
|
462
|
+
self.column_exists('inStoreAvailability')
|
463
|
+
self.column_exists('onShelfDisplay', False)
|
464
|
+
self.column_exists('availableInStoreQuantity', 0)
|
465
|
+
self.column_exists('enabled', False)
|
466
|
+
|
467
|
+
# With available cookies, iterate over dataframe for stores:
|
468
|
+
self.data['checked'] = False # Add 'checked' flag column
|
469
|
+
|
470
|
+
# Iterate over each row in the DataFrame
|
471
|
+
print('starting ...')
|
472
|
+
|
473
|
+
tasks = [
|
474
|
+
self._check_store_availability(
|
475
|
+
idx,
|
476
|
+
row,
|
477
|
+
httpx_cookies
|
478
|
+
) for idx, row in self.data.iterrows()
|
479
|
+
]
|
480
|
+
|
481
|
+
self._num_iterations = 0
|
482
|
+
await self._processing_tasks(tasks)
|
483
|
+
|
484
|
+
self.add_metric('NUM_HTTP_CALLS', self._num_iterations)
|
485
|
+
|
486
|
+
# show the num of rows in final dataframe:
|
487
|
+
self._logger.notice(
|
488
|
+
"Ending Checking Availability."
|
489
|
+
)
|
490
|
+
|
491
|
+
# return existing data
|
492
|
+
return self.data
|
493
|
+
|
494
|
+
async def products(self):
|
495
|
+
"""
|
496
|
+
Fetch all products from the Best Buy API by paginating through all pages.
|
497
|
+
|
498
|
+
Returns:
|
499
|
+
list: A combined list of all products from all pages.
|
500
|
+
"""
|
501
|
+
all_products = []
|
502
|
+
current_page = 1
|
503
|
+
total_pages = None
|
504
|
+
show = 'sku,upc,modelNumber,name,manufacturer,type,salePrice,url,productTemplate,classId,class,subclassId,subclass,department,image,longDescription,customerReviewCount,customerReviewAverage'
|
505
|
+
self._num_iterations = 0
|
506
|
+
try:
|
507
|
+
while True:
|
508
|
+
url = f"https://api.bestbuy.com/v1/products?page={current_page}&pageSize=100&apiKey={self.api_token}&show={show}&format=json"
|
509
|
+
async with aiohttp.ClientSession() as session:
|
510
|
+
async with session.get(url) as result:
|
511
|
+
response = await result.json()
|
512
|
+
#response = await self.api_get(url, httpx_cookies)
|
513
|
+
self._num_iterations += 1
|
514
|
+
|
515
|
+
# Extract products from the response
|
516
|
+
products = response.get("products", [])
|
517
|
+
if len(products) == 0:
|
518
|
+
continue
|
519
|
+
all_products.extend(products)
|
520
|
+
#all_products += products
|
521
|
+
|
522
|
+
# Pagination control
|
523
|
+
current_page = response.get("currentPage", current_page)
|
524
|
+
total_pages = response.get("totalPages", current_page) if total_pages is None else total_pages
|
525
|
+
self._logger.debug(f"{url}\n Current Page: {current_page}, Total Pages: {total_pages}, Products: {len(all_products)}")
|
526
|
+
|
527
|
+
# Break if we've processed all pages
|
528
|
+
if current_page >= total_pages: # or current_page == 3:
|
529
|
+
break
|
530
|
+
|
531
|
+
# Increment page for the next request
|
532
|
+
current_page += 1
|
533
|
+
|
534
|
+
self.add_metric('NUM_HTTP_CALLS', self._num_iterations)
|
535
|
+
return pd.DataFrame(all_products)
|
536
|
+
|
537
|
+
except Exception as exc:
|
538
|
+
self._logger.error(f"Error while fetching products: {exc}")
|
539
|
+
return []
|
540
|
+
|
541
|
+
async def stores(self):
|
542
|
+
"""
|
543
|
+
Fetch all stores from the Best Buy API by paginating through all pages.
|
544
|
+
|
545
|
+
Returns:
|
546
|
+
list: A combined list of all stores from all pages.
|
547
|
+
"""
|
548
|
+
all_stores = []
|
549
|
+
current_page = 1
|
550
|
+
total_pages = None
|
551
|
+
self._num_iterations = 0
|
552
|
+
try:
|
553
|
+
while True:
|
554
|
+
url = f"https://api.bestbuy.com/v1/stores?page={current_page}&pageSize=100&apiKey={self.api_token}&format=json"
|
555
|
+
async with aiohttp.ClientSession() as session:
|
556
|
+
async with session.get(url) as result:
|
557
|
+
response = await result.json()
|
558
|
+
self._num_iterations += 1
|
559
|
+
|
560
|
+
# Extract stores from the response
|
561
|
+
stores = response.get("stores", [])
|
562
|
+
if len(stores) == 0:
|
563
|
+
continue
|
564
|
+
all_stores.extend(stores)
|
565
|
+
# Pagination control
|
566
|
+
current_page = response.get("currentPage", current_page)
|
567
|
+
total_pages = response.get("totalPages", current_page) if total_pages is None else total_pages
|
568
|
+
self._logger.debug(f"{url}\n Current Page: {current_page}, Total Pages: {total_pages}, Stores: {len(all_stores)}")
|
569
|
+
|
570
|
+
# Break if we've processed all pages
|
571
|
+
if current_page >= total_pages:
|
572
|
+
break
|
573
|
+
|
574
|
+
# Increment page for the next request
|
575
|
+
current_page += 1
|
576
|
+
|
577
|
+
self.add_metric('NUM_HTTP_CALLS', self._num_iterations)
|
578
|
+
return pd.DataFrame(all_stores)
|
579
|
+
|
580
|
+
except Exception as exc:
|
581
|
+
self._logger.error(f"Error while fetching stores: {exc}")
|
582
|
+
return []
|
583
|
+
|
584
|
+
@backoff.on_exception(
|
585
|
+
backoff.expo,
|
586
|
+
(httpx.TimeoutException, httpx.ConnectTimeout, httpx.HTTPStatusError),
|
587
|
+
max_tries=3,
|
588
|
+
jitter=backoff.full_jitter,
|
589
|
+
giveup=lambda e: not bad_gateway_exception(e) and not isinstance(e, httpx.ConnectTimeout)
|
590
|
+
)
|
591
|
+
async def _product_reviews(self, idx, row, cookies):
|
592
|
+
async with self.semaphore:
|
593
|
+
# Prepare payload for the API request
|
594
|
+
sku = row['sku']
|
595
|
+
pagesize = 20
|
596
|
+
max_pages = 20 # Maximum number of pages to fetch
|
597
|
+
current_page = 1
|
598
|
+
all_reviews = []
|
599
|
+
total_reviews = 0
|
600
|
+
try:
|
601
|
+
while current_page <= max_pages:
|
602
|
+
payload = {
|
603
|
+
"page": current_page,
|
604
|
+
"pageSize": pagesize,
|
605
|
+
"sort": "MOST_RECENT",
|
606
|
+
# "variant": "A",
|
607
|
+
# "verifiedPurchaseOnly": "true",
|
608
|
+
"sku": sku
|
609
|
+
}
|
610
|
+
result = await self.api_get(
|
611
|
+
url="https://www.bestbuy.com/ugc/v2/reviews",
|
612
|
+
cookies=cookies,
|
613
|
+
params=payload
|
614
|
+
)
|
615
|
+
await asyncio.sleep(0.1)
|
616
|
+
total_reviews = result.get('totalResults', 0)
|
617
|
+
if not result:
|
618
|
+
self._logger.warning(
|
619
|
+
f"No Product Reviews found for {sku}."
|
620
|
+
)
|
621
|
+
break
|
622
|
+
# Extract the reviews data from the API response
|
623
|
+
items = result.get('topics', [])
|
624
|
+
if len(items) == 0:
|
625
|
+
break
|
626
|
+
|
627
|
+
all_reviews.extend(items)
|
628
|
+
|
629
|
+
# Determine if we've reached the last page
|
630
|
+
total_pages = result.get('totalPages', max_pages)
|
631
|
+
if current_page >= total_pages:
|
632
|
+
break
|
633
|
+
current_page += 1 # Move to the next page
|
634
|
+
except (httpx.TimeoutException, httpx.HTTPError) as ex:
|
635
|
+
self._logger.warning(f"Request failed: {ex}")
|
636
|
+
return []
|
637
|
+
except Exception as ex:
|
638
|
+
self._logger.error(f"An error occurred: {ex}")
|
639
|
+
return []
|
640
|
+
|
641
|
+
# Extract the reviews data from the API response
|
642
|
+
reviews = []
|
643
|
+
for item in all_reviews:
|
644
|
+
# Exclude certain keys
|
645
|
+
filtered_item = {k: v for k, v in item.items() if k not in ('brandResponses', 'badges', 'photos', 'secondaryRatings')}
|
646
|
+
# Combine with original row data
|
647
|
+
review_data = row.to_dict()
|
648
|
+
review_data['total_reviews'] = total_reviews
|
649
|
+
review_data.update(filtered_item)
|
650
|
+
reviews.append(review_data)
|
651
|
+
self._logger.info(
|
652
|
+
f"Fetched {len(reviews)} reviews for SKU {sku}."
|
653
|
+
)
|
654
|
+
await asyncio.sleep(random.randint(1, 3))
|
655
|
+
return reviews
|
656
|
+
|
657
|
+
async def reviews(self):
|
658
|
+
"""reviews.
|
659
|
+
|
660
|
+
Best Buy Product Reviews.
|
661
|
+
"""
|
662
|
+
httpx_cookies = httpx.Cookies()
|
663
|
+
for key, value in self.cookies.items():
|
664
|
+
httpx_cookies.set(
|
665
|
+
key, value,
|
666
|
+
domain='.bestbuy.com',
|
667
|
+
path='/'
|
668
|
+
)
|
669
|
+
|
670
|
+
# With available cookies, iterate over dataframe for stores:
|
671
|
+
self.data['checked'] = False # Add 'checked' flag column
|
672
|
+
|
673
|
+
# Iterate over each row in the DataFrame
|
674
|
+
print('starting ...')
|
675
|
+
|
676
|
+
tasks = [
|
677
|
+
self._product_reviews(
|
678
|
+
idx,
|
679
|
+
row,
|
680
|
+
httpx_cookies
|
681
|
+
) for idx, row in self.data.iterrows()
|
682
|
+
]
|
683
|
+
# Gather results concurrently
|
684
|
+
all_reviews_nested = await self._processing_tasks(tasks)
|
685
|
+
|
686
|
+
# Flatten the list of lists
|
687
|
+
all_reviews = [review for reviews in all_reviews_nested for review in reviews]
|
688
|
+
|
689
|
+
# Convert to DataFrame
|
690
|
+
reviews_df = pd.DataFrame(all_reviews)
|
691
|
+
|
692
|
+
# Remove duplicates based on the review 'id' column
|
693
|
+
if 'id' in reviews_df.columns:
|
694
|
+
reviews_df = reviews_df.drop_duplicates(subset=['id'])
|
695
|
+
|
696
|
+
# rename the "text" column as "review" and the "id" column as "reviewid"
|
697
|
+
reviews_df.rename(columns={'text': 'review', 'id': 'reviewid'}, inplace=True)
|
698
|
+
|
699
|
+
# at the end, adding a column for origin of reviews:
|
700
|
+
reviews_df['origin'] = 'bestbuy'
|
701
|
+
|
702
|
+
# show the num of rows in final dataframe:
|
703
|
+
self._logger.notice(
|
704
|
+
f"Ending Product Reviews: {len(reviews_df)}"
|
705
|
+
)
|
706
|
+
|
707
|
+
# Override previous dataframe:
|
708
|
+
self.data = reviews_df
|
709
|
+
|
710
|
+
# return existing data
|
711
|
+
return self.data
|
712
|
+
|
713
|
+
async def product(self):
|
714
|
+
"""product.
|
715
|
+
|
716
|
+
Best Buy Product Information.
|
717
|
+
"""
|
718
|
+
# Ensure required columns exist in the DataFrame
|
719
|
+
self.column_exists('model')
|
720
|
+
self.column_exists('brand')
|
721
|
+
self.column_exists('sku')
|
722
|
+
self.column_exists('product_name')
|
723
|
+
self.column_exists('image_url')
|
724
|
+
self.column_exists('price')
|
725
|
+
self.column_exists('url')
|
726
|
+
self.column_exists('enabled', False)
|
727
|
+
|
728
|
+
# Set headless to False for debugging
|
729
|
+
self.headless = True
|
730
|
+
|
731
|
+
# Always set as_mobile to False to ensure desktop mode
|
732
|
+
self.as_mobile = False
|
733
|
+
|
734
|
+
# Initialize Selenium driver
|
735
|
+
if not self._driver:
|
736
|
+
await self.get_driver()
|
737
|
+
|
738
|
+
# Create tasks to process each row in the DataFrame
|
739
|
+
tasks = [
|
740
|
+
self._product_info(
|
741
|
+
idx,
|
742
|
+
row
|
743
|
+
) for idx, row in self.data.iterrows()
|
744
|
+
]
|
745
|
+
|
746
|
+
# Process tasks concurrently
|
747
|
+
await self._processing_tasks(tasks)
|
748
|
+
|
749
|
+
# Add origin column
|
750
|
+
self.data['origin'] = 'bestbuy'
|
751
|
+
|
752
|
+
# Close Selenium driver after completing all tasks
|
753
|
+
self.close_driver()
|
754
|
+
|
755
|
+
# Return the updated DataFrame
|
756
|
+
return self.data
|
757
|
+
|
758
|
+
async def run(self):
|
759
|
+
# we need to call the "function" for Best Buy Services.
|
760
|
+
fn = getattr(self, self._fn)
|
761
|
+
result = None
|
762
|
+
if not callable(fn):
|
763
|
+
raise ComponentError(
|
764
|
+
f"Best Buy: Function {self._fn} doesn't exists."
|
765
|
+
)
|
766
|
+
try:
|
767
|
+
result = await fn()
|
768
|
+
except (ComponentError, TimeoutError, NotSupported):
|
769
|
+
raise
|
770
|
+
except Exception as exc:
|
771
|
+
raise ComponentError(
|
772
|
+
f"BestBuy: Unknown Error: {exc}"
|
773
|
+
) from exc
|
774
|
+
# Print results
|
775
|
+
print(result)
|
776
|
+
print("::: Printing Column Information === ")
|
777
|
+
for column, t in result.dtypes.items():
|
778
|
+
print(column, "->", t, "->", result[column].iloc[0])
|
779
|
+
self._result = result
|
780
|
+
return self._result
|
781
|
+
|
782
|
+
def split_parts(self, task_list, num_parts: int = 5) -> list:
|
783
|
+
part_size = len(task_list) // num_parts
|
784
|
+
remainder = len(task_list) % num_parts
|
785
|
+
parts = []
|
786
|
+
start = 0
|
787
|
+
for i in range(num_parts):
|
788
|
+
# Distribute the remainder across the first `remainder` parts
|
789
|
+
end = start + part_size + (1 if i < remainder else 0)
|
790
|
+
parts.append(task_list[start:end])
|
791
|
+
start = end
|
792
|
+
return parts
|
793
|
+
|
794
|
+
async def _processing_tasks(self, tasks: list) -> pd.DataFrame:
|
795
|
+
"""Process tasks concurrently."""
|
796
|
+
results = []
|
797
|
+
for chunk in self.split_parts(tasks, self.task_parts):
|
798
|
+
result = await asyncio.gather(*chunk, return_exceptions=False)
|
799
|
+
results.extend(result)
|
800
|
+
return results
|