flowtask 5.8.4__cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowtask/__init__.py +93 -0
- flowtask/__main__.py +38 -0
- flowtask/bots/__init__.py +6 -0
- flowtask/bots/check.py +93 -0
- flowtask/bots/codebot.py +51 -0
- flowtask/components/ASPX.py +148 -0
- flowtask/components/AddDataset.py +352 -0
- flowtask/components/Amazon.py +523 -0
- flowtask/components/AutoTask.py +314 -0
- flowtask/components/Azure.py +80 -0
- flowtask/components/AzureUsers.py +106 -0
- flowtask/components/BaseAction.py +91 -0
- flowtask/components/BaseLoop.py +198 -0
- flowtask/components/BestBuy.py +800 -0
- flowtask/components/CSVToGCS.py +120 -0
- flowtask/components/CompanyScraper/__init__.py +1 -0
- flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
- flowtask/components/CompanyScraper/parsers/base.py +102 -0
- flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
- flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
- flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
- flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
- flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
- flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
- flowtask/components/CompanyScraper/scrapper.py +1054 -0
- flowtask/components/CopyTo.py +177 -0
- flowtask/components/CopyToBigQuery.py +243 -0
- flowtask/components/CopyToMongoDB.py +291 -0
- flowtask/components/CopyToPg.py +609 -0
- flowtask/components/CopyToRethink.py +207 -0
- flowtask/components/CreateGCSBucket.py +102 -0
- flowtask/components/CreateReport/CreateReport.py +228 -0
- flowtask/components/CreateReport/__init__.py +9 -0
- flowtask/components/CreateReport/charts/__init__.py +15 -0
- flowtask/components/CreateReport/charts/bar.py +51 -0
- flowtask/components/CreateReport/charts/base.py +66 -0
- flowtask/components/CreateReport/charts/pie.py +64 -0
- flowtask/components/CreateReport/utils.py +9 -0
- flowtask/components/CustomerSatisfaction.py +196 -0
- flowtask/components/DataInput.py +200 -0
- flowtask/components/DateList.py +255 -0
- flowtask/components/DbClient.py +163 -0
- flowtask/components/DialPad.py +146 -0
- flowtask/components/DocumentDBQuery.py +200 -0
- flowtask/components/DownloadFrom.py +371 -0
- flowtask/components/DownloadFromD2L.py +113 -0
- flowtask/components/DownloadFromFTP.py +181 -0
- flowtask/components/DownloadFromIMAP.py +315 -0
- flowtask/components/DownloadFromS3.py +198 -0
- flowtask/components/DownloadFromSFTP.py +265 -0
- flowtask/components/DownloadFromSharepoint.py +110 -0
- flowtask/components/DownloadFromSmartSheet.py +114 -0
- flowtask/components/DownloadS3File.py +229 -0
- flowtask/components/Dummy.py +59 -0
- flowtask/components/DuplicatePhoto.py +411 -0
- flowtask/components/EmployeeEvaluation.py +237 -0
- flowtask/components/ExecuteSQL.py +323 -0
- flowtask/components/ExtractHTML.py +178 -0
- flowtask/components/FileBase.py +178 -0
- flowtask/components/FileCopy.py +181 -0
- flowtask/components/FileDelete.py +82 -0
- flowtask/components/FileExists.py +146 -0
- flowtask/components/FileIteratorDelete.py +112 -0
- flowtask/components/FileList.py +194 -0
- flowtask/components/FileOpen.py +75 -0
- flowtask/components/FileRead.py +120 -0
- flowtask/components/FileRename.py +106 -0
- flowtask/components/FilterIf.py +284 -0
- flowtask/components/FilterRows/FilterRows.py +200 -0
- flowtask/components/FilterRows/__init__.py +10 -0
- flowtask/components/FilterRows/functions.py +4 -0
- flowtask/components/GCSToBigQuery.py +103 -0
- flowtask/components/GoogleA4.py +150 -0
- flowtask/components/GoogleGeoCoding.py +344 -0
- flowtask/components/GooglePlaces.py +315 -0
- flowtask/components/GoogleSearch.py +539 -0
- flowtask/components/HTTPClient.py +268 -0
- flowtask/components/ICIMS.py +146 -0
- flowtask/components/IF.py +179 -0
- flowtask/components/IcimsFolderCopy.py +173 -0
- flowtask/components/ImageFeatures/__init__.py +5 -0
- flowtask/components/ImageFeatures/process.py +233 -0
- flowtask/components/IteratorBase.py +251 -0
- flowtask/components/LangchainLoader/__init__.py +5 -0
- flowtask/components/LangchainLoader/loader.py +194 -0
- flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
- flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
- flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
- flowtask/components/LangchainLoader/loaders/docx.py +91 -0
- flowtask/components/LangchainLoader/loaders/html.py +119 -0
- flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
- flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
- flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
- flowtask/components/LangchainLoader/loaders/qa.py +67 -0
- flowtask/components/LangchainLoader/loaders/txt.py +55 -0
- flowtask/components/LeadIQ.py +650 -0
- flowtask/components/Loop.py +253 -0
- flowtask/components/Lowes.py +334 -0
- flowtask/components/MS365Usage.py +156 -0
- flowtask/components/MSTeamsMessages.py +320 -0
- flowtask/components/MarketClustering.py +1051 -0
- flowtask/components/MergeFiles.py +362 -0
- flowtask/components/MilvusOutput.py +87 -0
- flowtask/components/NearByStores.py +175 -0
- flowtask/components/NetworkNinja/__init__.py +6 -0
- flowtask/components/NetworkNinja/models/__init__.py +52 -0
- flowtask/components/NetworkNinja/models/abstract.py +177 -0
- flowtask/components/NetworkNinja/models/account.py +39 -0
- flowtask/components/NetworkNinja/models/client.py +19 -0
- flowtask/components/NetworkNinja/models/district.py +14 -0
- flowtask/components/NetworkNinja/models/events.py +101 -0
- flowtask/components/NetworkNinja/models/forms.py +499 -0
- flowtask/components/NetworkNinja/models/market.py +16 -0
- flowtask/components/NetworkNinja/models/organization.py +34 -0
- flowtask/components/NetworkNinja/models/photos.py +125 -0
- flowtask/components/NetworkNinja/models/project.py +44 -0
- flowtask/components/NetworkNinja/models/region.py +28 -0
- flowtask/components/NetworkNinja/models/store.py +203 -0
- flowtask/components/NetworkNinja/models/user.py +151 -0
- flowtask/components/NetworkNinja/router.py +854 -0
- flowtask/components/Odoo.py +175 -0
- flowtask/components/OdooInjector.py +192 -0
- flowtask/components/OpenFromXML.py +126 -0
- flowtask/components/OpenWeather.py +41 -0
- flowtask/components/OpenWithBase.py +616 -0
- flowtask/components/OpenWithPandas.py +715 -0
- flowtask/components/PGPDecrypt.py +199 -0
- flowtask/components/PandasIterator.py +187 -0
- flowtask/components/PandasToFile.py +189 -0
- flowtask/components/Paradox.py +339 -0
- flowtask/components/ParamIterator.py +117 -0
- flowtask/components/ParseHTML.py +84 -0
- flowtask/components/PlacerStores.py +249 -0
- flowtask/components/Pokemon.py +507 -0
- flowtask/components/PositiveBot.py +62 -0
- flowtask/components/PowerPointSlide.py +400 -0
- flowtask/components/PrintMessage.py +127 -0
- flowtask/components/ProductCompetitors/__init__.py +5 -0
- flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
- flowtask/components/ProductCompetitors/parsers/base.py +72 -0
- flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
- flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
- flowtask/components/ProductCompetitors/scrapper.py +155 -0
- flowtask/components/ProductCompliant.py +169 -0
- flowtask/components/ProductInfo/__init__.py +1 -0
- flowtask/components/ProductInfo/parsers/__init__.py +5 -0
- flowtask/components/ProductInfo/parsers/base.py +83 -0
- flowtask/components/ProductInfo/parsers/brother.py +97 -0
- flowtask/components/ProductInfo/parsers/canon.py +167 -0
- flowtask/components/ProductInfo/parsers/epson.py +118 -0
- flowtask/components/ProductInfo/parsers/hp.py +131 -0
- flowtask/components/ProductInfo/parsers/samsung.py +97 -0
- flowtask/components/ProductInfo/scraper.py +319 -0
- flowtask/components/ProductPricing.py +118 -0
- flowtask/components/QS.py +261 -0
- flowtask/components/QSBase.py +201 -0
- flowtask/components/QueryIterator.py +273 -0
- flowtask/components/QueryToInsert.py +327 -0
- flowtask/components/QueryToPandas.py +432 -0
- flowtask/components/RESTClient.py +195 -0
- flowtask/components/RethinkDBQuery.py +189 -0
- flowtask/components/Rsync.py +74 -0
- flowtask/components/RunSSH.py +59 -0
- flowtask/components/RunShell.py +71 -0
- flowtask/components/SalesForce.py +20 -0
- flowtask/components/SaveImageBank/__init__.py +257 -0
- flowtask/components/SchedulingVisits.py +592 -0
- flowtask/components/ScrapPage.py +216 -0
- flowtask/components/ScrapSearch.py +79 -0
- flowtask/components/SendNotify.py +257 -0
- flowtask/components/SentimentAnalysis.py +694 -0
- flowtask/components/ServiceScrapper/__init__.py +5 -0
- flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
- flowtask/components/ServiceScrapper/parsers/base.py +94 -0
- flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
- flowtask/components/ServiceScrapper/scrapper.py +199 -0
- flowtask/components/SetVariables.py +156 -0
- flowtask/components/SubTask.py +182 -0
- flowtask/components/SuiteCRM.py +48 -0
- flowtask/components/Switch.py +175 -0
- flowtask/components/TableBase.py +148 -0
- flowtask/components/TableDelete.py +312 -0
- flowtask/components/TableInput.py +143 -0
- flowtask/components/TableOutput/TableOutput.py +384 -0
- flowtask/components/TableOutput/__init__.py +3 -0
- flowtask/components/TableSchema.py +534 -0
- flowtask/components/Target.py +223 -0
- flowtask/components/ThumbnailGenerator.py +156 -0
- flowtask/components/ToPandas.py +67 -0
- flowtask/components/TransformRows/TransformRows.py +507 -0
- flowtask/components/TransformRows/__init__.py +9 -0
- flowtask/components/TransformRows/functions.py +559 -0
- flowtask/components/TransposeRows.py +176 -0
- flowtask/components/UPCDatabase.py +86 -0
- flowtask/components/UnGzip.py +171 -0
- flowtask/components/Uncompress.py +172 -0
- flowtask/components/UniqueRows.py +126 -0
- flowtask/components/Unzip.py +107 -0
- flowtask/components/UpdateOperationalVars.py +147 -0
- flowtask/components/UploadTo.py +299 -0
- flowtask/components/UploadToS3.py +136 -0
- flowtask/components/UploadToSFTP.py +160 -0
- flowtask/components/UploadToSharepoint.py +205 -0
- flowtask/components/UserFunc.py +122 -0
- flowtask/components/VivaTracker.py +140 -0
- flowtask/components/WSDLClient.py +123 -0
- flowtask/components/Wait.py +18 -0
- flowtask/components/Walmart.py +199 -0
- flowtask/components/Workplace.py +134 -0
- flowtask/components/XMLToPandas.py +267 -0
- flowtask/components/Zammad/__init__.py +41 -0
- flowtask/components/Zammad/models.py +0 -0
- flowtask/components/ZoomInfoScraper.py +409 -0
- flowtask/components/__init__.py +104 -0
- flowtask/components/abstract.py +18 -0
- flowtask/components/flow.py +530 -0
- flowtask/components/google.py +335 -0
- flowtask/components/group.py +221 -0
- flowtask/components/py.typed +0 -0
- flowtask/components/reviewscrap.py +132 -0
- flowtask/components/tAutoincrement.py +117 -0
- flowtask/components/tConcat.py +109 -0
- flowtask/components/tExplode.py +119 -0
- flowtask/components/tFilter.py +184 -0
- flowtask/components/tGroup.py +236 -0
- flowtask/components/tJoin.py +270 -0
- flowtask/components/tMap/__init__.py +9 -0
- flowtask/components/tMap/functions.py +54 -0
- flowtask/components/tMap/tMap.py +450 -0
- flowtask/components/tMelt.py +112 -0
- flowtask/components/tMerge.py +114 -0
- flowtask/components/tOrder.py +93 -0
- flowtask/components/tPandas.py +94 -0
- flowtask/components/tPivot.py +71 -0
- flowtask/components/tPluckCols.py +76 -0
- flowtask/components/tUnnest.py +82 -0
- flowtask/components/user.py +401 -0
- flowtask/conf.py +457 -0
- flowtask/download.py +102 -0
- flowtask/events/__init__.py +11 -0
- flowtask/events/events/__init__.py +20 -0
- flowtask/events/events/abstract.py +95 -0
- flowtask/events/events/alerts/__init__.py +362 -0
- flowtask/events/events/alerts/colfunctions.py +131 -0
- flowtask/events/events/alerts/functions.py +158 -0
- flowtask/events/events/dummy.py +12 -0
- flowtask/events/events/exec.py +124 -0
- flowtask/events/events/file/__init__.py +7 -0
- flowtask/events/events/file/base.py +51 -0
- flowtask/events/events/file/copy.py +23 -0
- flowtask/events/events/file/delete.py +16 -0
- flowtask/events/events/interfaces/__init__.py +9 -0
- flowtask/events/events/interfaces/client.py +67 -0
- flowtask/events/events/interfaces/credentials.py +28 -0
- flowtask/events/events/interfaces/notifications.py +58 -0
- flowtask/events/events/jira.py +122 -0
- flowtask/events/events/log.py +26 -0
- flowtask/events/events/logerr.py +52 -0
- flowtask/events/events/notify.py +59 -0
- flowtask/events/events/notify_event.py +160 -0
- flowtask/events/events/publish.py +54 -0
- flowtask/events/events/sendfile.py +104 -0
- flowtask/events/events/task.py +97 -0
- flowtask/events/events/teams.py +98 -0
- flowtask/events/events/webhook.py +58 -0
- flowtask/events/manager.py +287 -0
- flowtask/exceptions.c +39393 -0
- flowtask/exceptions.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/extensions/__init__.py +3 -0
- flowtask/extensions/abstract.py +82 -0
- flowtask/extensions/logging/__init__.py +65 -0
- flowtask/hooks/__init__.py +9 -0
- flowtask/hooks/actions/__init__.py +22 -0
- flowtask/hooks/actions/abstract.py +66 -0
- flowtask/hooks/actions/dummy.py +23 -0
- flowtask/hooks/actions/jira.py +74 -0
- flowtask/hooks/actions/rest.py +320 -0
- flowtask/hooks/actions/sampledata.py +37 -0
- flowtask/hooks/actions/sensor.py +23 -0
- flowtask/hooks/actions/task.py +9 -0
- flowtask/hooks/actions/ticket.py +37 -0
- flowtask/hooks/actions/zammad.py +55 -0
- flowtask/hooks/hook.py +62 -0
- flowtask/hooks/models.py +17 -0
- flowtask/hooks/service.py +187 -0
- flowtask/hooks/step.py +91 -0
- flowtask/hooks/types/__init__.py +23 -0
- flowtask/hooks/types/base.py +129 -0
- flowtask/hooks/types/brokers/__init__.py +11 -0
- flowtask/hooks/types/brokers/base.py +54 -0
- flowtask/hooks/types/brokers/mqtt.py +35 -0
- flowtask/hooks/types/brokers/rabbitmq.py +82 -0
- flowtask/hooks/types/brokers/redis.py +83 -0
- flowtask/hooks/types/brokers/sqs.py +44 -0
- flowtask/hooks/types/fs.py +232 -0
- flowtask/hooks/types/http.py +49 -0
- flowtask/hooks/types/imap.py +200 -0
- flowtask/hooks/types/jira.py +279 -0
- flowtask/hooks/types/mail.py +205 -0
- flowtask/hooks/types/postgres.py +98 -0
- flowtask/hooks/types/responses/__init__.py +8 -0
- flowtask/hooks/types/responses/base.py +5 -0
- flowtask/hooks/types/sharepoint.py +288 -0
- flowtask/hooks/types/ssh.py +141 -0
- flowtask/hooks/types/tagged.py +59 -0
- flowtask/hooks/types/upload.py +85 -0
- flowtask/hooks/types/watch.py +71 -0
- flowtask/hooks/types/web.py +36 -0
- flowtask/interfaces/AzureClient.py +137 -0
- flowtask/interfaces/AzureGraph.py +839 -0
- flowtask/interfaces/Boto3Client.py +326 -0
- flowtask/interfaces/DropboxClient.py +173 -0
- flowtask/interfaces/ExcelHandler.py +94 -0
- flowtask/interfaces/FTPClient.py +131 -0
- flowtask/interfaces/GoogleCalendar.py +201 -0
- flowtask/interfaces/GoogleClient.py +133 -0
- flowtask/interfaces/GoogleDrive.py +127 -0
- flowtask/interfaces/GoogleGCS.py +89 -0
- flowtask/interfaces/GoogleGeocoding.py +93 -0
- flowtask/interfaces/GoogleLang.py +114 -0
- flowtask/interfaces/GooglePub.py +61 -0
- flowtask/interfaces/GoogleSheet.py +68 -0
- flowtask/interfaces/IMAPClient.py +137 -0
- flowtask/interfaces/O365Calendar.py +113 -0
- flowtask/interfaces/O365Client.py +220 -0
- flowtask/interfaces/OneDrive.py +284 -0
- flowtask/interfaces/Outlook.py +155 -0
- flowtask/interfaces/ParrotBot.py +130 -0
- flowtask/interfaces/SSHClient.py +378 -0
- flowtask/interfaces/Sharepoint.py +496 -0
- flowtask/interfaces/__init__.py +36 -0
- flowtask/interfaces/azureauth.py +119 -0
- flowtask/interfaces/cache.py +201 -0
- flowtask/interfaces/client.py +82 -0
- flowtask/interfaces/compress.py +525 -0
- flowtask/interfaces/credentials.py +124 -0
- flowtask/interfaces/d2l.py +239 -0
- flowtask/interfaces/databases/__init__.py +5 -0
- flowtask/interfaces/databases/db.py +223 -0
- flowtask/interfaces/databases/documentdb.py +55 -0
- flowtask/interfaces/databases/rethink.py +39 -0
- flowtask/interfaces/dataframes/__init__.py +11 -0
- flowtask/interfaces/dataframes/abstract.py +21 -0
- flowtask/interfaces/dataframes/arrow.py +71 -0
- flowtask/interfaces/dataframes/dt.py +69 -0
- flowtask/interfaces/dataframes/pandas.py +167 -0
- flowtask/interfaces/dataframes/polars.py +60 -0
- flowtask/interfaces/db.py +263 -0
- flowtask/interfaces/env.py +46 -0
- flowtask/interfaces/func.py +137 -0
- flowtask/interfaces/http.py +1780 -0
- flowtask/interfaces/locale.py +40 -0
- flowtask/interfaces/log.py +75 -0
- flowtask/interfaces/mask.py +143 -0
- flowtask/interfaces/notification.py +154 -0
- flowtask/interfaces/playwright.py +339 -0
- flowtask/interfaces/powerpoint.py +368 -0
- flowtask/interfaces/py.typed +0 -0
- flowtask/interfaces/qs.py +376 -0
- flowtask/interfaces/result.py +87 -0
- flowtask/interfaces/selenium_service.py +779 -0
- flowtask/interfaces/smartsheet.py +154 -0
- flowtask/interfaces/stat.py +39 -0
- flowtask/interfaces/task.py +96 -0
- flowtask/interfaces/template.py +118 -0
- flowtask/interfaces/vectorstores/__init__.py +1 -0
- flowtask/interfaces/vectorstores/abstract.py +133 -0
- flowtask/interfaces/vectorstores/milvus.py +669 -0
- flowtask/interfaces/zammad.py +107 -0
- flowtask/models.py +193 -0
- flowtask/parsers/__init__.py +15 -0
- flowtask/parsers/_yaml.c +11978 -0
- flowtask/parsers/_yaml.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/argparser.py +235 -0
- flowtask/parsers/base.c +15155 -0
- flowtask/parsers/base.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/json.c +11968 -0
- flowtask/parsers/json.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/maps.py +49 -0
- flowtask/parsers/toml.c +11968 -0
- flowtask/parsers/toml.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/plugins/__init__.py +16 -0
- flowtask/plugins/components/__init__.py +0 -0
- flowtask/plugins/handler/__init__.py +45 -0
- flowtask/plugins/importer.py +31 -0
- flowtask/plugins/sources/__init__.py +0 -0
- flowtask/runner.py +283 -0
- flowtask/scheduler/__init__.py +9 -0
- flowtask/scheduler/functions.py +493 -0
- flowtask/scheduler/handlers/__init__.py +8 -0
- flowtask/scheduler/handlers/manager.py +504 -0
- flowtask/scheduler/handlers/models.py +58 -0
- flowtask/scheduler/handlers/service.py +72 -0
- flowtask/scheduler/notifications.py +65 -0
- flowtask/scheduler/scheduler.py +993 -0
- flowtask/services/__init__.py +0 -0
- flowtask/services/bots/__init__.py +0 -0
- flowtask/services/bots/telegram.py +264 -0
- flowtask/services/files/__init__.py +11 -0
- flowtask/services/files/manager.py +522 -0
- flowtask/services/files/model.py +37 -0
- flowtask/services/files/service.py +767 -0
- flowtask/services/jira/__init__.py +3 -0
- flowtask/services/jira/jira_actions.py +191 -0
- flowtask/services/tasks/__init__.py +13 -0
- flowtask/services/tasks/launcher.py +213 -0
- flowtask/services/tasks/manager.py +323 -0
- flowtask/services/tasks/service.py +275 -0
- flowtask/services/tasks/task_manager.py +376 -0
- flowtask/services/tasks/tasks.py +155 -0
- flowtask/storages/__init__.py +16 -0
- flowtask/storages/exceptions.py +12 -0
- flowtask/storages/files/__init__.py +8 -0
- flowtask/storages/files/abstract.py +29 -0
- flowtask/storages/files/filesystem.py +66 -0
- flowtask/storages/tasks/__init__.py +19 -0
- flowtask/storages/tasks/abstract.py +26 -0
- flowtask/storages/tasks/database.py +33 -0
- flowtask/storages/tasks/filesystem.py +108 -0
- flowtask/storages/tasks/github.py +119 -0
- flowtask/storages/tasks/memory.py +45 -0
- flowtask/storages/tasks/row.py +25 -0
- flowtask/tasks/__init__.py +0 -0
- flowtask/tasks/abstract.py +526 -0
- flowtask/tasks/command.py +118 -0
- flowtask/tasks/pile.py +486 -0
- flowtask/tasks/py.typed +0 -0
- flowtask/tasks/task.py +778 -0
- flowtask/template/__init__.py +161 -0
- flowtask/tests.py +257 -0
- flowtask/types/__init__.py +8 -0
- flowtask/types/typedefs.c +11347 -0
- flowtask/types/typedefs.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/utils/__init__.py +24 -0
- flowtask/utils/constants.py +117 -0
- flowtask/utils/encoders.py +21 -0
- flowtask/utils/executor.py +112 -0
- flowtask/utils/functions.cpp +14280 -0
- flowtask/utils/functions.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/utils/json.cpp +13349 -0
- flowtask/utils/json.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/utils/mail.py +63 -0
- flowtask/utils/parseqs.c +13324 -0
- flowtask/utils/parserqs.cpython-39-x86_64-linux-gnu.so +0 -0
- flowtask/utils/stats.py +308 -0
- flowtask/utils/transformations.py +74 -0
- flowtask/utils/uv.py +12 -0
- flowtask/utils/validators.py +97 -0
- flowtask/version.py +11 -0
- flowtask-5.8.4.dist-info/LICENSE +201 -0
- flowtask-5.8.4.dist-info/METADATA +209 -0
- flowtask-5.8.4.dist-info/RECORD +470 -0
- flowtask-5.8.4.dist-info/WHEEL +6 -0
- flowtask-5.8.4.dist-info/entry_points.txt +3 -0
- flowtask-5.8.4.dist-info/top_level.txt +2 -0
- plugins/components/CreateQR.py +39 -0
- plugins/components/TestComponent.py +28 -0
- plugins/components/Use1.py +13 -0
- plugins/components/Workplace.py +117 -0
- plugins/components/__init__.py +3 -0
- plugins/sources/__init__.py +0 -0
- plugins/sources/get_populartimes.py +78 -0
- plugins/sources/google.py +150 -0
- plugins/sources/hubspot.py +679 -0
- plugins/sources/icims.py +679 -0
- plugins/sources/mobileinsight.py +501 -0
- plugins/sources/newrelic.py +262 -0
- plugins/sources/uap.py +268 -0
- plugins/sources/venu.py +244 -0
- plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,86 @@
|
|
1
|
+
from .base import ProductCompetitorsBase
|
2
|
+
|
3
|
+
|
4
|
+
class BestBuyScrapper(ProductCompetitorsBase):
|
5
|
+
domain: str = 'bestbuy.com'
|
6
|
+
|
7
|
+
def __init__(self, *args, **kwargs):
|
8
|
+
super().__init__(*args, **kwargs)
|
9
|
+
self.headless = False
|
10
|
+
self.use_wire = False
|
11
|
+
|
12
|
+
async def product_information(self, response: object, idx: int, row: dict) -> tuple:
|
13
|
+
"""Get the product information from BestBuy."""
|
14
|
+
try:
|
15
|
+
document = self.get_bs(response)
|
16
|
+
competitors_found = {}
|
17
|
+
|
18
|
+
# Inicializar valores vacíos para todos los competidores
|
19
|
+
for competitor in self.competitors:
|
20
|
+
self.set_empty_values(row, competitor)
|
21
|
+
|
22
|
+
# Find all product cards in the carousel
|
23
|
+
carousel = document.find('ul', {'class': 'c-carousel-list'})
|
24
|
+
if carousel:
|
25
|
+
for product in carousel.find_all('li', {'class': 'product-carousel-v2_brix-item'}):
|
26
|
+
try:
|
27
|
+
# Extract product info
|
28
|
+
product_link = product.find('a', {'data-testid': 'product-link'})
|
29
|
+
if not product_link:
|
30
|
+
continue
|
31
|
+
|
32
|
+
# Get product name and brand
|
33
|
+
title_div = product_link.find('div', {'class': 'title-block__title'})
|
34
|
+
if not title_div:
|
35
|
+
continue
|
36
|
+
|
37
|
+
full_name = title_div.text.strip()
|
38
|
+
brand = full_name.split(' - ')[0] if ' - ' in full_name else ''
|
39
|
+
|
40
|
+
# Check if this brand is in our competitors list and we haven't found it yet
|
41
|
+
if brand not in self.competitors or brand in competitors_found:
|
42
|
+
continue
|
43
|
+
|
44
|
+
# Get product URL and SKU
|
45
|
+
url = f"https://www.bestbuy.com{product_link.get('href')}"
|
46
|
+
sku = product_link.get('data-cy', '').replace('product-link-', '')
|
47
|
+
|
48
|
+
# Get price
|
49
|
+
price_div = product.find('div', {'class': 'priceView-hero-price'})
|
50
|
+
price = price_div.text.strip() if price_div else None
|
51
|
+
|
52
|
+
# Get rating and reviews
|
53
|
+
rating_p = product.find('p', {'class': 'visually-hidden'})
|
54
|
+
rating = None
|
55
|
+
num_reviews = None
|
56
|
+
if rating_p:
|
57
|
+
rating_text = rating_p.text
|
58
|
+
try:
|
59
|
+
rating = rating_text.split('rating,')[1].split('out')[0].strip() if 'rating,' in rating_text else None
|
60
|
+
num_reviews = rating_text.split('with')[1].split('reviews')[0].strip() if 'with' in rating_text else None
|
61
|
+
except (IndexError, AttributeError):
|
62
|
+
pass
|
63
|
+
|
64
|
+
# Store competitor info
|
65
|
+
row.update({
|
66
|
+
f'competitor_brand_{brand}': brand,
|
67
|
+
f'competitor_name_{brand}': full_name,
|
68
|
+
f'competitor_url_{brand}': url,
|
69
|
+
f'competitor_sku_{brand}': sku,
|
70
|
+
f'competitor_price_{brand}': price,
|
71
|
+
f'competitor_rating_{brand}': rating,
|
72
|
+
f'competitor_reviews_{brand}': num_reviews
|
73
|
+
})
|
74
|
+
competitors_found[brand] = True
|
75
|
+
|
76
|
+
# Si ya encontramos todos los competidores, podemos salir
|
77
|
+
if len(competitors_found) == len(self.competitors):
|
78
|
+
break
|
79
|
+
except Exception as err:
|
80
|
+
self._logger.warning(f'Error processing product in carousel: {err}')
|
81
|
+
continue
|
82
|
+
|
83
|
+
return idx, row
|
84
|
+
except Exception as err:
|
85
|
+
self._logger.error(f'Error getting product information from BestBuy: {err}')
|
86
|
+
return idx, row
|
@@ -0,0 +1,103 @@
|
|
1
|
+
from .base import ProductCompetitorsBase
|
2
|
+
|
3
|
+
|
4
|
+
class LowesScrapper(ProductCompetitorsBase):
|
5
|
+
domain: str = 'lowes.com'
|
6
|
+
|
7
|
+
def __init__(self, *args, **kwargs):
|
8
|
+
super().__init__(*args, **kwargs)
|
9
|
+
self.expected_columns = [
|
10
|
+
'price',
|
11
|
+
'availability',
|
12
|
+
'product_name',
|
13
|
+
'product_description',
|
14
|
+
'product_id'
|
15
|
+
]
|
16
|
+
self.headless = False
|
17
|
+
self.use_wire = False
|
18
|
+
|
19
|
+
async def connect(self):
|
20
|
+
"""Creates the Driver and Connects to the Site."""
|
21
|
+
self._driver = await self.get_driver()
|
22
|
+
await self.start()
|
23
|
+
|
24
|
+
async def disconnect(self):
|
25
|
+
"""Disconnects the Driver and closes the Connection."""
|
26
|
+
if self._driver:
|
27
|
+
self.close_driver()
|
28
|
+
|
29
|
+
async def product_information(self, response: object, idx: int, row: dict) -> tuple:
|
30
|
+
"""Get the product information from Lowes."""
|
31
|
+
try:
|
32
|
+
document = self.get_bs(response)
|
33
|
+
competitors_found = {}
|
34
|
+
|
35
|
+
# Inicializar valores vacíos para todos los competidores
|
36
|
+
for competitor in self.competitors:
|
37
|
+
self.set_empty_values(row, competitor)
|
38
|
+
|
39
|
+
# Find all product cards in the carousel
|
40
|
+
carousel = document.find('div', {'class': 'carousel-inner-container'})
|
41
|
+
if carousel:
|
42
|
+
for product in carousel.find_all('div', {'class': 'product-card-wrapper'}):
|
43
|
+
try:
|
44
|
+
# Extract product info
|
45
|
+
product_link = product.find('a', {'class': 'carousel-container'})
|
46
|
+
if not product_link:
|
47
|
+
continue
|
48
|
+
|
49
|
+
# Get product name and brand
|
50
|
+
title_div = product_link.find('span', {'class': 'brand-name'})
|
51
|
+
if not title_div:
|
52
|
+
continue
|
53
|
+
|
54
|
+
brand = title_div.text.strip()
|
55
|
+
full_name = title_div.find_next('span', {'class': 'product-desc'}).text.strip()
|
56
|
+
|
57
|
+
# Check if this brand is in our competitors list and we haven't found it yet
|
58
|
+
if brand not in self.competitors or brand in competitors_found:
|
59
|
+
continue
|
60
|
+
|
61
|
+
# Get product URL and SKU
|
62
|
+
url = f"https://www.lowes.com{product_link.get('href')}"
|
63
|
+
sku = product_link.get('data-productid', '')
|
64
|
+
|
65
|
+
# Get price
|
66
|
+
price_div = product.find('span', {'class': 'final-price'})
|
67
|
+
price = price_div.text.strip() if price_div else None
|
68
|
+
|
69
|
+
# Get rating and reviews
|
70
|
+
rating_div = product.find('span', {'class': 'rating'})
|
71
|
+
rating = None
|
72
|
+
num_reviews = None
|
73
|
+
if rating_div:
|
74
|
+
try:
|
75
|
+
rating = rating_div.get('aria-label', '').split(' ')[0]
|
76
|
+
reviews_span = product.find('span', {'class': 'rating-count'})
|
77
|
+
num_reviews = reviews_span.text.strip() if reviews_span else None
|
78
|
+
except (IndexError, AttributeError):
|
79
|
+
pass
|
80
|
+
|
81
|
+
# Store competitor info
|
82
|
+
row.update({
|
83
|
+
f'competitor_brand_{brand}': brand,
|
84
|
+
f'competitor_name_{brand}': full_name,
|
85
|
+
f'competitor_url_{brand}': url,
|
86
|
+
f'competitor_sku_{brand}': sku,
|
87
|
+
f'competitor_price_{brand}': price,
|
88
|
+
f'competitor_rating_{brand}': rating,
|
89
|
+
f'competitor_reviews_{brand}': num_reviews
|
90
|
+
})
|
91
|
+
competitors_found[brand] = True
|
92
|
+
|
93
|
+
# Si ya encontramos todos los competidores, podemos salir
|
94
|
+
if len(competitors_found) == len(self.competitors):
|
95
|
+
break
|
96
|
+
except Exception as err:
|
97
|
+
self._logger.warning(f'Error processing product in carousel: {err}')
|
98
|
+
continue
|
99
|
+
|
100
|
+
return idx, row
|
101
|
+
except Exception as err:
|
102
|
+
self._logger.error(f'Error getting product information from Lowes: {err}')
|
103
|
+
return idx, row
|
@@ -0,0 +1,155 @@
|
|
1
|
+
from collections.abc import Callable, List
|
2
|
+
import asyncio
|
3
|
+
import random
|
4
|
+
import pandas as pd
|
5
|
+
from tqdm.asyncio import tqdm
|
6
|
+
from ...exceptions import ComponentError, ConfigError
|
7
|
+
from ...interfaces import HTTPService, SeleniumService
|
8
|
+
from ...interfaces.http import ua
|
9
|
+
from ..flow import FlowComponent
|
10
|
+
from .parsers import (
|
11
|
+
BestBuyScrapper,
|
12
|
+
LowesScrapper,
|
13
|
+
)
|
14
|
+
|
15
|
+
|
16
|
+
class ProductCompetitors(FlowComponent, SeleniumService, HTTPService):
|
17
|
+
"""
|
18
|
+
Product Competitors Scraper Component
|
19
|
+
|
20
|
+
Overview:
|
21
|
+
Pluggable component for scraping product information from competitors (BestBuy and Lowes).
|
22
|
+
|
23
|
+
Properties:
|
24
|
+
- url_column (str): Name of the column containing URLs to scrape (default: 'url')
|
25
|
+
- account_name_column (str): Name of the column containing retailer name (default: 'account_name')
|
26
|
+
- product_id_column (str): Name of the column containing product IDs (default: 'product_id')
|
27
|
+
- competitors (list): List of competitor brands to search for (e.g. ['Insignia', 'TCL', 'LG', 'Sony', 'Samsung'])
|
28
|
+
"""
|
29
|
+
|
30
|
+
def __init__(
|
31
|
+
self,
|
32
|
+
loop: asyncio.AbstractEventLoop = None,
|
33
|
+
job: Callable = None,
|
34
|
+
stat: Callable = None,
|
35
|
+
**kwargs,
|
36
|
+
) -> None:
|
37
|
+
self.info_column: str = kwargs.get('url_column', 'url')
|
38
|
+
self.account_column: str = kwargs.get('account_name_column', 'account_name')
|
39
|
+
self.product_id_column: str = kwargs.get('product_id_column', 'product_id')
|
40
|
+
self.competitors: List[str] = kwargs.get('competitors', ['Insignia', 'TCL', 'LG', 'Sony', 'Samsung'])
|
41
|
+
self.competitors_bucket = {comp: [] for comp in self.competitors}
|
42
|
+
self.scrapper_class: Callable = None
|
43
|
+
self._scrapper_func: str = 'product_information'
|
44
|
+
self.use_proxy = True
|
45
|
+
self._free_proxy = False
|
46
|
+
self.paid_proxy = True
|
47
|
+
self.chunk_size: int = kwargs.get('chunk_size', 100)
|
48
|
+
self.concurrently: bool = kwargs.get('concurrently', False)
|
49
|
+
self.task_parts: int = kwargs.get('task_parts', 10)
|
50
|
+
super().__init__(loop=loop, job=job, stat=stat, **kwargs)
|
51
|
+
self.headers: dict = {
|
52
|
+
"Accept": self.accept,
|
53
|
+
"TE": "trailers",
|
54
|
+
"Accept-Encoding": "gzip, deflate",
|
55
|
+
"DNT": "1",
|
56
|
+
"Connection": "keep-alive",
|
57
|
+
"Upgrade-Insecure-Requests": "1",
|
58
|
+
"User-Agent": random.choice(ua),
|
59
|
+
**kwargs.get('headers', {})
|
60
|
+
}
|
61
|
+
|
62
|
+
def _get_scrapper(self, account_name: str):
|
63
|
+
"""Get the appropriate scrapper based on account name."""
|
64
|
+
try:
|
65
|
+
if account_name == "Best Buy":
|
66
|
+
return BestBuyScrapper()
|
67
|
+
elif account_name == "Lowe's":
|
68
|
+
return LowesScrapper()
|
69
|
+
else:
|
70
|
+
return None
|
71
|
+
except Exception as err:
|
72
|
+
self._logger.error(f"Error while getting scrapper: {err}")
|
73
|
+
raise ConfigError(f"Error while getting scrapper: {err}")
|
74
|
+
|
75
|
+
async def start(self, **kwargs) -> bool:
|
76
|
+
"""Initialize the component and validate required parameters."""
|
77
|
+
if self.previous:
|
78
|
+
self.data = self.input
|
79
|
+
|
80
|
+
if not isinstance(self.data, pd.DataFrame):
|
81
|
+
raise ComponentError("Input must be a DataFrame", status=404)
|
82
|
+
|
83
|
+
required_columns = [self.info_column, self.account_column, self.product_id_column]
|
84
|
+
for col in required_columns:
|
85
|
+
if col not in self.data.columns:
|
86
|
+
raise ConfigError(f"Column {col} not found in DataFrame")
|
87
|
+
|
88
|
+
return True
|
89
|
+
|
90
|
+
async def _start_scrapping(self, idx, row):
|
91
|
+
"""Handle scraping for a single row."""
|
92
|
+
try:
|
93
|
+
async with self._semaphore:
|
94
|
+
url = row[self.info_column]
|
95
|
+
account_name = row[self.account_column]
|
96
|
+
|
97
|
+
self._logger.debug(f"Scraping URL: {url} for {account_name}")
|
98
|
+
|
99
|
+
scrapper = self._get_scrapper(account_name)
|
100
|
+
if not scrapper:
|
101
|
+
self._logger.error(f"No scrapper found for {account_name}")
|
102
|
+
return idx, row
|
103
|
+
|
104
|
+
async with scrapper as s:
|
105
|
+
response = await s.get(url, headers=self.headers)
|
106
|
+
if response:
|
107
|
+
try:
|
108
|
+
idx, row = await s.product_information(response, idx, row)
|
109
|
+
except Exception as err:
|
110
|
+
self._logger.error(f"Scraping error: {err}")
|
111
|
+
return idx, row
|
112
|
+
return idx, row
|
113
|
+
except Exception as err:
|
114
|
+
self._logger.error(f"Error while scraping: {err}")
|
115
|
+
raise ComponentError(f"Error while scraping: {err}")
|
116
|
+
|
117
|
+
async def run(self):
|
118
|
+
"""Execute scraping for all URLs in the DataFrame."""
|
119
|
+
tasks = [
|
120
|
+
self._start_scrapping(idx, row)
|
121
|
+
for idx, row in self.data.iterrows()
|
122
|
+
]
|
123
|
+
|
124
|
+
results = []
|
125
|
+
total_tasks = len(tasks)
|
126
|
+
|
127
|
+
with tqdm(total=total_tasks, desc="Scraping Progress", unit="task") as pbar:
|
128
|
+
if not self.concurrently:
|
129
|
+
for task in tasks:
|
130
|
+
try:
|
131
|
+
idx, row = await task
|
132
|
+
results.append((idx, row))
|
133
|
+
await asyncio.sleep(random.uniform(0.25, 1.5))
|
134
|
+
except Exception as e:
|
135
|
+
self._logger.error(f"Task error: {str(e)}")
|
136
|
+
finally:
|
137
|
+
pbar.update(1)
|
138
|
+
else:
|
139
|
+
for chunk in self.split_parts(tasks, self.task_parts):
|
140
|
+
chunk_results = await asyncio.gather(*chunk, return_exceptions=True)
|
141
|
+
results.extend(chunk_results)
|
142
|
+
pbar.update(len(chunk))
|
143
|
+
|
144
|
+
if not results:
|
145
|
+
return pd.DataFrame()
|
146
|
+
|
147
|
+
indices, data_dicts = zip(*results)
|
148
|
+
df = pd.DataFrame(data_dicts, index=indices)
|
149
|
+
self._result = df
|
150
|
+
self._print_data_(self._result, 'Competitor Scraping Results')
|
151
|
+
return self._result
|
152
|
+
|
153
|
+
async def close(self):
|
154
|
+
"""Clean up resources."""
|
155
|
+
return True
|
@@ -0,0 +1,169 @@
|
|
1
|
+
from collections.abc import Callable
|
2
|
+
import asyncio
|
3
|
+
from typing import Any
|
4
|
+
# Bot Infraestructure:
|
5
|
+
from parrot.bots.basic import BasicBot
|
6
|
+
from .flow import FlowComponent
|
7
|
+
from ..exceptions import ComponentError, ConfigError
|
8
|
+
from ..conf import TASK_STORAGES
|
9
|
+
|
10
|
+
class ProductCompliant(FlowComponent):
|
11
|
+
"""
|
12
|
+
ProductCompliant
|
13
|
+
|
14
|
+
Overview
|
15
|
+
|
16
|
+
The ProductCompliant class is a component for interacting with an IA Agent for making Customer Satisfaction Analysis.
|
17
|
+
It extends the FlowComponent class.
|
18
|
+
|
19
|
+
.. table:: Properties
|
20
|
+
:widths: auto
|
21
|
+
|
22
|
+
+------------------+----------+--------------------------------------------------------------------------------------------------+
|
23
|
+
| Name | Required | Description |
|
24
|
+
+------------------+----------+--------------------------------------------------------------------------------------------------+
|
25
|
+
| output_column | Yes | Column for saving the Customer Satisfaction information. |
|
26
|
+
+------------------+----------+--------------------------------------------------------------------------------------------------+
|
27
|
+
Return
|
28
|
+
|
29
|
+
A Pandas Dataframe with the Customer Satisfaction statistics.
|
30
|
+
|
31
|
+
""" # noqa
|
32
|
+
|
33
|
+
def __init__(
|
34
|
+
self,
|
35
|
+
loop: asyncio.AbstractEventLoop = None,
|
36
|
+
job: Callable = None,
|
37
|
+
stat: Callable = None,
|
38
|
+
**kwargs,
|
39
|
+
):
|
40
|
+
super().__init__(
|
41
|
+
loop=loop, job=job, stat=stat, **kwargs
|
42
|
+
)
|
43
|
+
# System Prompt:
|
44
|
+
self.system_prompt = "Product Compliant: "
|
45
|
+
self._bot_name = kwargs.get('bot_name', 'CompliantBot')
|
46
|
+
# TaskStorage
|
47
|
+
# Find in the taskstorage, the "prompts" directory.
|
48
|
+
prompt_path = self._taskstore.path.joinpath(self._program, 'prompts')
|
49
|
+
if not prompt_path.exists():
|
50
|
+
raise ConfigError(
|
51
|
+
f"{self.system_prompt} Prompts Path Not Found: {prompt_path}"
|
52
|
+
)
|
53
|
+
self.prompt_path = prompt_path
|
54
|
+
# is hardcoded to this particular Bot.
|
55
|
+
self.system_prompt_file = 'compliantbot.txt'
|
56
|
+
# Bot Object:
|
57
|
+
self._bot: Any = None
|
58
|
+
|
59
|
+
async def start(self, **kwargs):
|
60
|
+
"""
|
61
|
+
start
|
62
|
+
|
63
|
+
Overview
|
64
|
+
|
65
|
+
The start method is a method for starting the ProductCompliant component.
|
66
|
+
|
67
|
+
Parameters
|
68
|
+
|
69
|
+
kwargs: dict
|
70
|
+
A dictionary containing the parameters for the ProductCompliant component.
|
71
|
+
|
72
|
+
Return
|
73
|
+
|
74
|
+
True if the ProductCompliant component started successfully.
|
75
|
+
|
76
|
+
"""
|
77
|
+
if self.previous:
|
78
|
+
self.data = self.input
|
79
|
+
else:
|
80
|
+
raise ComponentError(
|
81
|
+
"CompliantBot: Data Was Not Found"
|
82
|
+
)
|
83
|
+
if not self.output_column:
|
84
|
+
raise ConfigError(
|
85
|
+
"ProductCompliant: output_column is required"
|
86
|
+
)
|
87
|
+
# check if Prompt File exists
|
88
|
+
prompt_file = self.prompt_path.joinpath(self.system_prompt_file)
|
89
|
+
if not prompt_file.exists():
|
90
|
+
raise ConfigError(
|
91
|
+
f"{self.system_prompt} Prompt File Not Found: {prompt_file}"
|
92
|
+
)
|
93
|
+
self.system_prompt_file = prompt_file.name
|
94
|
+
# read the prompt file as text:
|
95
|
+
with open(prompt_file, 'r') as f:
|
96
|
+
self.system_prompt = f.read()
|
97
|
+
# Set the Bot:
|
98
|
+
try:
|
99
|
+
self._bot = BasicBot(
|
100
|
+
name=self._bot_name,
|
101
|
+
system_prompt=self.system_prompt,
|
102
|
+
goal="Your task is to provide a concise and insightful analysis on negative reviews of products",
|
103
|
+
use_llm=self.llm.get('name', 'name'),
|
104
|
+
model_name=self.llm.get('model_name', 'gemini-2.0-pro'),
|
105
|
+
)
|
106
|
+
# configure the bot:
|
107
|
+
await self._bot.configure()
|
108
|
+
except Exception as err:
|
109
|
+
raise ComponentError(
|
110
|
+
f"{self.system_prompt} Error Configuring Bot: {err}"
|
111
|
+
) from err
|
112
|
+
return True
|
113
|
+
|
114
|
+
def format_question(self, product_name, reviews):
|
115
|
+
question = f"""
|
116
|
+
Product: {product_name}
|
117
|
+
|
118
|
+
Question:
|
119
|
+
"What are the primary customer concerns, problems, and issues based on these negative product reviews for {product_name}?"
|
120
|
+
|
121
|
+
Negative Customer Reviews:
|
122
|
+
|
123
|
+
"""
|
124
|
+
for review in reviews:
|
125
|
+
question += f"* {review}\n"
|
126
|
+
return question
|
127
|
+
|
128
|
+
async def run(self):
|
129
|
+
"""
|
130
|
+
run
|
131
|
+
|
132
|
+
Overview
|
133
|
+
|
134
|
+
The run method is a method for running the ProductCompliant component.
|
135
|
+
|
136
|
+
Return
|
137
|
+
|
138
|
+
A Pandas Dataframe with the Product Compliant statistics.
|
139
|
+
|
140
|
+
"""
|
141
|
+
# Group reviews by product_name and aggregate them into a list
|
142
|
+
grouped = self.data.groupby(self.product_column)[self.review_column].apply(list).reset_index()
|
143
|
+
products_evaluation = {}
|
144
|
+
for _, row in grouped.iterrows():
|
145
|
+
product_name = row[self.product_column]
|
146
|
+
reviews = row[self.review_column]
|
147
|
+
formatted_question = self.format_question(product_name, reviews)
|
148
|
+
result = await self._bot.question(
|
149
|
+
question=formatted_question,
|
150
|
+
return_docs=False
|
151
|
+
)
|
152
|
+
products_evaluation[product_name] = {
|
153
|
+
"answer": result.answer
|
154
|
+
}
|
155
|
+
# Then, create a dataframe only with the columns in "self.columns" grouped.
|
156
|
+
grouped_df = self.data.groupby(self.columns).agg(
|
157
|
+
num_reviews=(self.review_column, "count"),
|
158
|
+
avg_rating=("rating", "mean")
|
159
|
+
).reset_index()
|
160
|
+
# Add the Product Compliant column, using the dictionary and match per product_name column
|
161
|
+
grouped_df[self.output_column] = grouped_df[self.product_column].map(
|
162
|
+
lambda x: products_evaluation[x]['answer']
|
163
|
+
)
|
164
|
+
# return the grouped dataframe
|
165
|
+
self._result = grouped_df
|
166
|
+
return self._result
|
167
|
+
|
168
|
+
async def close(self):
|
169
|
+
pass
|
@@ -0,0 +1 @@
|
|
1
|
+
from .scraper import ProductInfo
|
@@ -0,0 +1,83 @@
|
|
1
|
+
from abc import abstractmethod
|
2
|
+
import re
|
3
|
+
import logging
|
4
|
+
from typing import Dict, Any, Optional, List
|
5
|
+
from bs4 import BeautifulSoup
|
6
|
+
from ....interfaces import HTTPService, SeleniumService
|
7
|
+
|
8
|
+
class ParserBase(HTTPService, SeleniumService):
|
9
|
+
"""
|
10
|
+
Base class for product information parsers.
|
11
|
+
|
12
|
+
Defines the interface and common functionality for all product parsers.
|
13
|
+
"""
|
14
|
+
domain: str
|
15
|
+
search_format: str
|
16
|
+
model_pattern: Optional[str] = None # Hacemos que sea opcional con valor predeterminado None
|
17
|
+
|
18
|
+
def __init__(self, *args, **kwargs):
|
19
|
+
self.cookies = kwargs.get('cookies', None)
|
20
|
+
self._logger = logging.getLogger(self.__class__.__name__)
|
21
|
+
super().__init__(*args, **kwargs)
|
22
|
+
|
23
|
+
@abstractmethod
|
24
|
+
async def parse(self, url: str, search_term: str) -> Dict[str, Any]:
|
25
|
+
"""
|
26
|
+
Parse product information from a URL.
|
27
|
+
|
28
|
+
Args:
|
29
|
+
url: URL to parse
|
30
|
+
search_term: Original search term
|
31
|
+
|
32
|
+
Returns:
|
33
|
+
Dictionary with extracted product information
|
34
|
+
"""
|
35
|
+
pass
|
36
|
+
|
37
|
+
def create_search_query(self, term: str) -> str:
|
38
|
+
"""
|
39
|
+
Create a search query for the given term.
|
40
|
+
|
41
|
+
Args:
|
42
|
+
term: Search term (typically product model)
|
43
|
+
|
44
|
+
Returns:
|
45
|
+
Formatted search query
|
46
|
+
"""
|
47
|
+
return self.search_format.format(term)
|
48
|
+
|
49
|
+
def extract_model_code(self, url: str) -> Optional[str]:
|
50
|
+
"""
|
51
|
+
Extract model code from URL using the regex pattern if defined.
|
52
|
+
|
53
|
+
Args:
|
54
|
+
url: URL to extract model code from
|
55
|
+
|
56
|
+
Returns:
|
57
|
+
Extracted model code or None if not found or pattern not defined
|
58
|
+
"""
|
59
|
+
if not hasattr(self, 'model_pattern') or self.model_pattern is None:
|
60
|
+
return None # Si no hay patrón definido, devolvemos None
|
61
|
+
|
62
|
+
match = re.search(self.model_pattern, url)
|
63
|
+
if match and match.group(1):
|
64
|
+
return match.group(1)
|
65
|
+
return None
|
66
|
+
|
67
|
+
def get_product_urls(self, search_results: List[Dict[str, str]], max_urls: int = 5) -> List[str]:
|
68
|
+
"""
|
69
|
+
Extract relevant product URLs from search results.
|
70
|
+
|
71
|
+
Args:
|
72
|
+
search_results: List of search result dictionaries
|
73
|
+
max_urls: Maximum number of URLs to return
|
74
|
+
|
75
|
+
Returns:
|
76
|
+
List of product URLs
|
77
|
+
"""
|
78
|
+
urls = []
|
79
|
+
for result in search_results[:max_urls]:
|
80
|
+
url = result.get('link') or result.get('href') or result.get('url')
|
81
|
+
if url and self.domain in url:
|
82
|
+
urls.append(url)
|
83
|
+
return urls
|