flowtask 5.8.4__cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowtask/__init__.py +93 -0
- flowtask/__main__.py +38 -0
- flowtask/bots/__init__.py +6 -0
- flowtask/bots/check.py +93 -0
- flowtask/bots/codebot.py +51 -0
- flowtask/components/ASPX.py +148 -0
- flowtask/components/AddDataset.py +352 -0
- flowtask/components/Amazon.py +523 -0
- flowtask/components/AutoTask.py +314 -0
- flowtask/components/Azure.py +80 -0
- flowtask/components/AzureUsers.py +106 -0
- flowtask/components/BaseAction.py +91 -0
- flowtask/components/BaseLoop.py +198 -0
- flowtask/components/BestBuy.py +800 -0
- flowtask/components/CSVToGCS.py +120 -0
- flowtask/components/CompanyScraper/__init__.py +1 -0
- flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
- flowtask/components/CompanyScraper/parsers/base.py +102 -0
- flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
- flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
- flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
- flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
- flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
- flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
- flowtask/components/CompanyScraper/scrapper.py +1054 -0
- flowtask/components/CopyTo.py +177 -0
- flowtask/components/CopyToBigQuery.py +243 -0
- flowtask/components/CopyToMongoDB.py +291 -0
- flowtask/components/CopyToPg.py +609 -0
- flowtask/components/CopyToRethink.py +207 -0
- flowtask/components/CreateGCSBucket.py +102 -0
- flowtask/components/CreateReport/CreateReport.py +228 -0
- flowtask/components/CreateReport/__init__.py +9 -0
- flowtask/components/CreateReport/charts/__init__.py +15 -0
- flowtask/components/CreateReport/charts/bar.py +51 -0
- flowtask/components/CreateReport/charts/base.py +66 -0
- flowtask/components/CreateReport/charts/pie.py +64 -0
- flowtask/components/CreateReport/utils.py +9 -0
- flowtask/components/CustomerSatisfaction.py +196 -0
- flowtask/components/DataInput.py +200 -0
- flowtask/components/DateList.py +255 -0
- flowtask/components/DbClient.py +163 -0
- flowtask/components/DialPad.py +146 -0
- flowtask/components/DocumentDBQuery.py +200 -0
- flowtask/components/DownloadFrom.py +371 -0
- flowtask/components/DownloadFromD2L.py +113 -0
- flowtask/components/DownloadFromFTP.py +181 -0
- flowtask/components/DownloadFromIMAP.py +315 -0
- flowtask/components/DownloadFromS3.py +198 -0
- flowtask/components/DownloadFromSFTP.py +265 -0
- flowtask/components/DownloadFromSharepoint.py +110 -0
- flowtask/components/DownloadFromSmartSheet.py +114 -0
- flowtask/components/DownloadS3File.py +229 -0
- flowtask/components/Dummy.py +59 -0
- flowtask/components/DuplicatePhoto.py +411 -0
- flowtask/components/EmployeeEvaluation.py +237 -0
- flowtask/components/ExecuteSQL.py +323 -0
- flowtask/components/ExtractHTML.py +178 -0
- flowtask/components/FileBase.py +178 -0
- flowtask/components/FileCopy.py +181 -0
- flowtask/components/FileDelete.py +82 -0
- flowtask/components/FileExists.py +146 -0
- flowtask/components/FileIteratorDelete.py +112 -0
- flowtask/components/FileList.py +194 -0
- flowtask/components/FileOpen.py +75 -0
- flowtask/components/FileRead.py +120 -0
- flowtask/components/FileRename.py +106 -0
- flowtask/components/FilterIf.py +284 -0
- flowtask/components/FilterRows/FilterRows.py +200 -0
- flowtask/components/FilterRows/__init__.py +10 -0
- flowtask/components/FilterRows/functions.py +4 -0
- flowtask/components/GCSToBigQuery.py +103 -0
- flowtask/components/GoogleA4.py +150 -0
- flowtask/components/GoogleGeoCoding.py +344 -0
- flowtask/components/GooglePlaces.py +315 -0
- flowtask/components/GoogleSearch.py +539 -0
- flowtask/components/HTTPClient.py +268 -0
- flowtask/components/ICIMS.py +146 -0
- flowtask/components/IF.py +179 -0
- flowtask/components/IcimsFolderCopy.py +173 -0
- flowtask/components/ImageFeatures/__init__.py +5 -0
- flowtask/components/ImageFeatures/process.py +233 -0
- flowtask/components/IteratorBase.py +251 -0
- flowtask/components/LangchainLoader/__init__.py +5 -0
- flowtask/components/LangchainLoader/loader.py +194 -0
- flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
- flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
- flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
- flowtask/components/LangchainLoader/loaders/docx.py +91 -0
- flowtask/components/LangchainLoader/loaders/html.py +119 -0
- flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
- flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
- flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
- flowtask/components/LangchainLoader/loaders/qa.py +67 -0
- flowtask/components/LangchainLoader/loaders/txt.py +55 -0
- flowtask/components/LeadIQ.py +650 -0
- flowtask/components/Loop.py +253 -0
- flowtask/components/Lowes.py +334 -0
- flowtask/components/MS365Usage.py +156 -0
- flowtask/components/MSTeamsMessages.py +320 -0
- flowtask/components/MarketClustering.py +1051 -0
- flowtask/components/MergeFiles.py +362 -0
- flowtask/components/MilvusOutput.py +87 -0
- flowtask/components/NearByStores.py +175 -0
- flowtask/components/NetworkNinja/__init__.py +6 -0
- flowtask/components/NetworkNinja/models/__init__.py +52 -0
- flowtask/components/NetworkNinja/models/abstract.py +177 -0
- flowtask/components/NetworkNinja/models/account.py +39 -0
- flowtask/components/NetworkNinja/models/client.py +19 -0
- flowtask/components/NetworkNinja/models/district.py +14 -0
- flowtask/components/NetworkNinja/models/events.py +101 -0
- flowtask/components/NetworkNinja/models/forms.py +499 -0
- flowtask/components/NetworkNinja/models/market.py +16 -0
- flowtask/components/NetworkNinja/models/organization.py +34 -0
- flowtask/components/NetworkNinja/models/photos.py +125 -0
- flowtask/components/NetworkNinja/models/project.py +44 -0
- flowtask/components/NetworkNinja/models/region.py +28 -0
- flowtask/components/NetworkNinja/models/store.py +203 -0
- flowtask/components/NetworkNinja/models/user.py +151 -0
- flowtask/components/NetworkNinja/router.py +854 -0
- flowtask/components/Odoo.py +175 -0
- flowtask/components/OdooInjector.py +192 -0
- flowtask/components/OpenFromXML.py +126 -0
- flowtask/components/OpenWeather.py +41 -0
- flowtask/components/OpenWithBase.py +616 -0
- flowtask/components/OpenWithPandas.py +715 -0
- flowtask/components/PGPDecrypt.py +199 -0
- flowtask/components/PandasIterator.py +187 -0
- flowtask/components/PandasToFile.py +189 -0
- flowtask/components/Paradox.py +339 -0
- flowtask/components/ParamIterator.py +117 -0
- flowtask/components/ParseHTML.py +84 -0
- flowtask/components/PlacerStores.py +249 -0
- flowtask/components/Pokemon.py +507 -0
- flowtask/components/PositiveBot.py +62 -0
- flowtask/components/PowerPointSlide.py +400 -0
- flowtask/components/PrintMessage.py +127 -0
- flowtask/components/ProductCompetitors/__init__.py +5 -0
- flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
- flowtask/components/ProductCompetitors/parsers/base.py +72 -0
- flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
- flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
- flowtask/components/ProductCompetitors/scrapper.py +155 -0
- flowtask/components/ProductCompliant.py +169 -0
- flowtask/components/ProductInfo/__init__.py +1 -0
- flowtask/components/ProductInfo/parsers/__init__.py +5 -0
- flowtask/components/ProductInfo/parsers/base.py +83 -0
- flowtask/components/ProductInfo/parsers/brother.py +97 -0
- flowtask/components/ProductInfo/parsers/canon.py +167 -0
- flowtask/components/ProductInfo/parsers/epson.py +118 -0
- flowtask/components/ProductInfo/parsers/hp.py +131 -0
- flowtask/components/ProductInfo/parsers/samsung.py +97 -0
- flowtask/components/ProductInfo/scraper.py +319 -0
- flowtask/components/ProductPricing.py +118 -0
- flowtask/components/QS.py +261 -0
- flowtask/components/QSBase.py +201 -0
- flowtask/components/QueryIterator.py +273 -0
- flowtask/components/QueryToInsert.py +327 -0
- flowtask/components/QueryToPandas.py +432 -0
- flowtask/components/RESTClient.py +195 -0
- flowtask/components/RethinkDBQuery.py +189 -0
- flowtask/components/Rsync.py +74 -0
- flowtask/components/RunSSH.py +59 -0
- flowtask/components/RunShell.py +71 -0
- flowtask/components/SalesForce.py +20 -0
- flowtask/components/SaveImageBank/__init__.py +257 -0
- flowtask/components/SchedulingVisits.py +592 -0
- flowtask/components/ScrapPage.py +216 -0
- flowtask/components/ScrapSearch.py +79 -0
- flowtask/components/SendNotify.py +257 -0
- flowtask/components/SentimentAnalysis.py +694 -0
- flowtask/components/ServiceScrapper/__init__.py +5 -0
- flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
- flowtask/components/ServiceScrapper/parsers/base.py +94 -0
- flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
- flowtask/components/ServiceScrapper/scrapper.py +199 -0
- flowtask/components/SetVariables.py +156 -0
- flowtask/components/SubTask.py +182 -0
- flowtask/components/SuiteCRM.py +48 -0
- flowtask/components/Switch.py +175 -0
- flowtask/components/TableBase.py +148 -0
- flowtask/components/TableDelete.py +312 -0
- flowtask/components/TableInput.py +143 -0
- flowtask/components/TableOutput/TableOutput.py +384 -0
- flowtask/components/TableOutput/__init__.py +3 -0
- flowtask/components/TableSchema.py +534 -0
- flowtask/components/Target.py +223 -0
- flowtask/components/ThumbnailGenerator.py +156 -0
- flowtask/components/ToPandas.py +67 -0
- flowtask/components/TransformRows/TransformRows.py +507 -0
- flowtask/components/TransformRows/__init__.py +9 -0
- flowtask/components/TransformRows/functions.py +559 -0
- flowtask/components/TransposeRows.py +176 -0
- flowtask/components/UPCDatabase.py +86 -0
- flowtask/components/UnGzip.py +171 -0
- flowtask/components/Uncompress.py +172 -0
- flowtask/components/UniqueRows.py +126 -0
- flowtask/components/Unzip.py +107 -0
- flowtask/components/UpdateOperationalVars.py +147 -0
- flowtask/components/UploadTo.py +299 -0
- flowtask/components/UploadToS3.py +136 -0
- flowtask/components/UploadToSFTP.py +160 -0
- flowtask/components/UploadToSharepoint.py +205 -0
- flowtask/components/UserFunc.py +122 -0
- flowtask/components/VivaTracker.py +140 -0
- flowtask/components/WSDLClient.py +123 -0
- flowtask/components/Wait.py +18 -0
- flowtask/components/Walmart.py +199 -0
- flowtask/components/Workplace.py +134 -0
- flowtask/components/XMLToPandas.py +267 -0
- flowtask/components/Zammad/__init__.py +41 -0
- flowtask/components/Zammad/models.py +0 -0
- flowtask/components/ZoomInfoScraper.py +409 -0
- flowtask/components/__init__.py +104 -0
- flowtask/components/abstract.py +18 -0
- flowtask/components/flow.py +530 -0
- flowtask/components/google.py +335 -0
- flowtask/components/group.py +221 -0
- flowtask/components/py.typed +0 -0
- flowtask/components/reviewscrap.py +132 -0
- flowtask/components/tAutoincrement.py +117 -0
- flowtask/components/tConcat.py +109 -0
- flowtask/components/tExplode.py +119 -0
- flowtask/components/tFilter.py +184 -0
- flowtask/components/tGroup.py +236 -0
- flowtask/components/tJoin.py +270 -0
- flowtask/components/tMap/__init__.py +9 -0
- flowtask/components/tMap/functions.py +54 -0
- flowtask/components/tMap/tMap.py +450 -0
- flowtask/components/tMelt.py +112 -0
- flowtask/components/tMerge.py +114 -0
- flowtask/components/tOrder.py +93 -0
- flowtask/components/tPandas.py +94 -0
- flowtask/components/tPivot.py +71 -0
- flowtask/components/tPluckCols.py +76 -0
- flowtask/components/tUnnest.py +82 -0
- flowtask/components/user.py +401 -0
- flowtask/conf.py +457 -0
- flowtask/download.py +102 -0
- flowtask/events/__init__.py +11 -0
- flowtask/events/events/__init__.py +20 -0
- flowtask/events/events/abstract.py +95 -0
- flowtask/events/events/alerts/__init__.py +362 -0
- flowtask/events/events/alerts/colfunctions.py +131 -0
- flowtask/events/events/alerts/functions.py +158 -0
- flowtask/events/events/dummy.py +12 -0
- flowtask/events/events/exec.py +124 -0
- flowtask/events/events/file/__init__.py +7 -0
- flowtask/events/events/file/base.py +51 -0
- flowtask/events/events/file/copy.py +23 -0
- flowtask/events/events/file/delete.py +16 -0
- flowtask/events/events/interfaces/__init__.py +9 -0
- flowtask/events/events/interfaces/client.py +67 -0
- flowtask/events/events/interfaces/credentials.py +28 -0
- flowtask/events/events/interfaces/notifications.py +58 -0
- flowtask/events/events/jira.py +122 -0
- flowtask/events/events/log.py +26 -0
- flowtask/events/events/logerr.py +52 -0
- flowtask/events/events/notify.py +59 -0
- flowtask/events/events/notify_event.py +160 -0
- flowtask/events/events/publish.py +54 -0
- flowtask/events/events/sendfile.py +104 -0
- flowtask/events/events/task.py +97 -0
- flowtask/events/events/teams.py +98 -0
- flowtask/events/events/webhook.py +58 -0
- flowtask/events/manager.py +287 -0
- flowtask/exceptions.c +39393 -0
- flowtask/exceptions.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/extensions/__init__.py +3 -0
- flowtask/extensions/abstract.py +82 -0
- flowtask/extensions/logging/__init__.py +65 -0
- flowtask/hooks/__init__.py +9 -0
- flowtask/hooks/actions/__init__.py +22 -0
- flowtask/hooks/actions/abstract.py +66 -0
- flowtask/hooks/actions/dummy.py +23 -0
- flowtask/hooks/actions/jira.py +74 -0
- flowtask/hooks/actions/rest.py +320 -0
- flowtask/hooks/actions/sampledata.py +37 -0
- flowtask/hooks/actions/sensor.py +23 -0
- flowtask/hooks/actions/task.py +9 -0
- flowtask/hooks/actions/ticket.py +37 -0
- flowtask/hooks/actions/zammad.py +55 -0
- flowtask/hooks/hook.py +62 -0
- flowtask/hooks/models.py +17 -0
- flowtask/hooks/service.py +187 -0
- flowtask/hooks/step.py +91 -0
- flowtask/hooks/types/__init__.py +23 -0
- flowtask/hooks/types/base.py +129 -0
- flowtask/hooks/types/brokers/__init__.py +11 -0
- flowtask/hooks/types/brokers/base.py +54 -0
- flowtask/hooks/types/brokers/mqtt.py +35 -0
- flowtask/hooks/types/brokers/rabbitmq.py +82 -0
- flowtask/hooks/types/brokers/redis.py +83 -0
- flowtask/hooks/types/brokers/sqs.py +44 -0
- flowtask/hooks/types/fs.py +232 -0
- flowtask/hooks/types/http.py +49 -0
- flowtask/hooks/types/imap.py +200 -0
- flowtask/hooks/types/jira.py +279 -0
- flowtask/hooks/types/mail.py +205 -0
- flowtask/hooks/types/postgres.py +98 -0
- flowtask/hooks/types/responses/__init__.py +8 -0
- flowtask/hooks/types/responses/base.py +5 -0
- flowtask/hooks/types/sharepoint.py +288 -0
- flowtask/hooks/types/ssh.py +141 -0
- flowtask/hooks/types/tagged.py +59 -0
- flowtask/hooks/types/upload.py +85 -0
- flowtask/hooks/types/watch.py +71 -0
- flowtask/hooks/types/web.py +36 -0
- flowtask/interfaces/AzureClient.py +137 -0
- flowtask/interfaces/AzureGraph.py +839 -0
- flowtask/interfaces/Boto3Client.py +326 -0
- flowtask/interfaces/DropboxClient.py +173 -0
- flowtask/interfaces/ExcelHandler.py +94 -0
- flowtask/interfaces/FTPClient.py +131 -0
- flowtask/interfaces/GoogleCalendar.py +201 -0
- flowtask/interfaces/GoogleClient.py +133 -0
- flowtask/interfaces/GoogleDrive.py +127 -0
- flowtask/interfaces/GoogleGCS.py +89 -0
- flowtask/interfaces/GoogleGeocoding.py +93 -0
- flowtask/interfaces/GoogleLang.py +114 -0
- flowtask/interfaces/GooglePub.py +61 -0
- flowtask/interfaces/GoogleSheet.py +68 -0
- flowtask/interfaces/IMAPClient.py +137 -0
- flowtask/interfaces/O365Calendar.py +113 -0
- flowtask/interfaces/O365Client.py +220 -0
- flowtask/interfaces/OneDrive.py +284 -0
- flowtask/interfaces/Outlook.py +155 -0
- flowtask/interfaces/ParrotBot.py +130 -0
- flowtask/interfaces/SSHClient.py +378 -0
- flowtask/interfaces/Sharepoint.py +496 -0
- flowtask/interfaces/__init__.py +36 -0
- flowtask/interfaces/azureauth.py +119 -0
- flowtask/interfaces/cache.py +201 -0
- flowtask/interfaces/client.py +82 -0
- flowtask/interfaces/compress.py +525 -0
- flowtask/interfaces/credentials.py +124 -0
- flowtask/interfaces/d2l.py +239 -0
- flowtask/interfaces/databases/__init__.py +5 -0
- flowtask/interfaces/databases/db.py +223 -0
- flowtask/interfaces/databases/documentdb.py +55 -0
- flowtask/interfaces/databases/rethink.py +39 -0
- flowtask/interfaces/dataframes/__init__.py +11 -0
- flowtask/interfaces/dataframes/abstract.py +21 -0
- flowtask/interfaces/dataframes/arrow.py +71 -0
- flowtask/interfaces/dataframes/dt.py +69 -0
- flowtask/interfaces/dataframes/pandas.py +167 -0
- flowtask/interfaces/dataframes/polars.py +60 -0
- flowtask/interfaces/db.py +263 -0
- flowtask/interfaces/env.py +46 -0
- flowtask/interfaces/func.py +137 -0
- flowtask/interfaces/http.py +1780 -0
- flowtask/interfaces/locale.py +40 -0
- flowtask/interfaces/log.py +75 -0
- flowtask/interfaces/mask.py +143 -0
- flowtask/interfaces/notification.py +154 -0
- flowtask/interfaces/playwright.py +339 -0
- flowtask/interfaces/powerpoint.py +368 -0
- flowtask/interfaces/py.typed +0 -0
- flowtask/interfaces/qs.py +376 -0
- flowtask/interfaces/result.py +87 -0
- flowtask/interfaces/selenium_service.py +779 -0
- flowtask/interfaces/smartsheet.py +154 -0
- flowtask/interfaces/stat.py +39 -0
- flowtask/interfaces/task.py +96 -0
- flowtask/interfaces/template.py +118 -0
- flowtask/interfaces/vectorstores/__init__.py +1 -0
- flowtask/interfaces/vectorstores/abstract.py +133 -0
- flowtask/interfaces/vectorstores/milvus.py +669 -0
- flowtask/interfaces/zammad.py +107 -0
- flowtask/models.py +193 -0
- flowtask/parsers/__init__.py +15 -0
- flowtask/parsers/_yaml.c +11978 -0
- flowtask/parsers/_yaml.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/argparser.py +235 -0
- flowtask/parsers/base.c +15155 -0
- flowtask/parsers/base.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/json.c +11968 -0
- flowtask/parsers/json.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/maps.py +49 -0
- flowtask/parsers/toml.c +11968 -0
- flowtask/parsers/toml.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/plugins/__init__.py +16 -0
- flowtask/plugins/components/__init__.py +0 -0
- flowtask/plugins/handler/__init__.py +45 -0
- flowtask/plugins/importer.py +31 -0
- flowtask/plugins/sources/__init__.py +0 -0
- flowtask/runner.py +283 -0
- flowtask/scheduler/__init__.py +9 -0
- flowtask/scheduler/functions.py +493 -0
- flowtask/scheduler/handlers/__init__.py +8 -0
- flowtask/scheduler/handlers/manager.py +504 -0
- flowtask/scheduler/handlers/models.py +58 -0
- flowtask/scheduler/handlers/service.py +72 -0
- flowtask/scheduler/notifications.py +65 -0
- flowtask/scheduler/scheduler.py +993 -0
- flowtask/services/__init__.py +0 -0
- flowtask/services/bots/__init__.py +0 -0
- flowtask/services/bots/telegram.py +264 -0
- flowtask/services/files/__init__.py +11 -0
- flowtask/services/files/manager.py +522 -0
- flowtask/services/files/model.py +37 -0
- flowtask/services/files/service.py +767 -0
- flowtask/services/jira/__init__.py +3 -0
- flowtask/services/jira/jira_actions.py +191 -0
- flowtask/services/tasks/__init__.py +13 -0
- flowtask/services/tasks/launcher.py +213 -0
- flowtask/services/tasks/manager.py +323 -0
- flowtask/services/tasks/service.py +275 -0
- flowtask/services/tasks/task_manager.py +376 -0
- flowtask/services/tasks/tasks.py +155 -0
- flowtask/storages/__init__.py +16 -0
- flowtask/storages/exceptions.py +12 -0
- flowtask/storages/files/__init__.py +8 -0
- flowtask/storages/files/abstract.py +29 -0
- flowtask/storages/files/filesystem.py +66 -0
- flowtask/storages/tasks/__init__.py +19 -0
- flowtask/storages/tasks/abstract.py +26 -0
- flowtask/storages/tasks/database.py +33 -0
- flowtask/storages/tasks/filesystem.py +108 -0
- flowtask/storages/tasks/github.py +119 -0
- flowtask/storages/tasks/memory.py +45 -0
- flowtask/storages/tasks/row.py +25 -0
- flowtask/tasks/__init__.py +0 -0
- flowtask/tasks/abstract.py +526 -0
- flowtask/tasks/command.py +118 -0
- flowtask/tasks/pile.py +486 -0
- flowtask/tasks/py.typed +0 -0
- flowtask/tasks/task.py +778 -0
- flowtask/template/__init__.py +161 -0
- flowtask/tests.py +257 -0
- flowtask/types/__init__.py +8 -0
- flowtask/types/typedefs.c +11347 -0
- flowtask/types/typedefs.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/__init__.py +24 -0
- flowtask/utils/constants.py +117 -0
- flowtask/utils/encoders.py +21 -0
- flowtask/utils/executor.py +112 -0
- flowtask/utils/functions.cpp +14280 -0
- flowtask/utils/functions.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/json.cpp +13349 -0
- flowtask/utils/json.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/mail.py +63 -0
- flowtask/utils/parseqs.c +13324 -0
- flowtask/utils/parserqs.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/stats.py +308 -0
- flowtask/utils/transformations.py +74 -0
- flowtask/utils/uv.py +12 -0
- flowtask/utils/validators.py +97 -0
- flowtask/version.py +11 -0
- flowtask-5.8.4.dist-info/LICENSE +201 -0
- flowtask-5.8.4.dist-info/METADATA +209 -0
- flowtask-5.8.4.dist-info/RECORD +470 -0
- flowtask-5.8.4.dist-info/WHEEL +6 -0
- flowtask-5.8.4.dist-info/entry_points.txt +3 -0
- flowtask-5.8.4.dist-info/top_level.txt +2 -0
- plugins/components/CreateQR.py +39 -0
- plugins/components/TestComponent.py +28 -0
- plugins/components/Use1.py +13 -0
- plugins/components/Workplace.py +117 -0
- plugins/components/__init__.py +3 -0
- plugins/sources/__init__.py +0 -0
- plugins/sources/get_populartimes.py +78 -0
- plugins/sources/google.py +150 -0
- plugins/sources/hubspot.py +679 -0
- plugins/sources/icims.py +679 -0
- plugins/sources/mobileinsight.py +501 -0
- plugins/sources/newrelic.py +262 -0
- plugins/sources/uap.py +268 -0
- plugins/sources/venu.py +244 -0
- plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,97 @@
|
|
1
|
+
import re
|
2
|
+
from typing import Dict, Any, Optional, List
|
3
|
+
from bs4 import BeautifulSoup
|
4
|
+
from .base import ParserBase
|
5
|
+
|
6
|
+
class BrotherParser(ParserBase):
|
7
|
+
"""
|
8
|
+
Parser for Brother product information.
|
9
|
+
|
10
|
+
Extracts product details from Brother's USA website using Selenium.
|
11
|
+
"""
|
12
|
+
domain = "brother-usa.com"
|
13
|
+
search_format = "site:brother-usa.com {}"
|
14
|
+
product_url_pattern = "brother-usa.com/products/"
|
15
|
+
|
16
|
+
async def parse(self, url: str, search_term: str, retailer: Optional[str] = None) -> Dict[str, Any]:
|
17
|
+
"""
|
18
|
+
Parse product information from a Brother URL using Selenium.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
url: Brother product URL
|
22
|
+
search_term: Original search term
|
23
|
+
retailer: Optional retailer information (not used for Brother)
|
24
|
+
|
25
|
+
Returns:
|
26
|
+
Dictionary with product information
|
27
|
+
"""
|
28
|
+
result = {
|
29
|
+
"source_url": url,
|
30
|
+
"search_term": search_term,
|
31
|
+
"model_code": None,
|
32
|
+
"product_name": None,
|
33
|
+
"price": None,
|
34
|
+
"specs": None,
|
35
|
+
"images": None,
|
36
|
+
"parse_status": "pending"
|
37
|
+
}
|
38
|
+
|
39
|
+
try:
|
40
|
+
self.headless = True
|
41
|
+
driver = await self.get_driver()
|
42
|
+
await self.get_page(url)
|
43
|
+
|
44
|
+
page_content = driver.page_source
|
45
|
+
soup = BeautifulSoup(page_content, "html.parser")
|
46
|
+
|
47
|
+
# Extract model code - usando el selector exacto de la captura
|
48
|
+
model_elem = soup.select_one("h1.janus-model-number")
|
49
|
+
if model_elem:
|
50
|
+
result["model_code"] = model_elem.text.strip()
|
51
|
+
|
52
|
+
# Extract product name - usando el selector exacto de la captura
|
53
|
+
name_elem = soup.select_one("h2.janus-product-title")
|
54
|
+
if name_elem:
|
55
|
+
result["product_name"] = name_elem.text.strip()
|
56
|
+
|
57
|
+
# Extract price (ajustar según el sitio real)
|
58
|
+
price_elem = soup.select_one("span.price") # Ajustar según el sitio real
|
59
|
+
if price_elem:
|
60
|
+
result["price"] = price_elem.text.strip()
|
61
|
+
|
62
|
+
# Extract image (ajustar según el sitio real)
|
63
|
+
main_img = soup.select_one("img.product-image") # Ajustar según el sitio real
|
64
|
+
if main_img:
|
65
|
+
src = main_img.get("src")
|
66
|
+
if src:
|
67
|
+
if not src.startswith(("http://", "https://")):
|
68
|
+
src = f"https://{self.domain}{src}" if src.startswith("/") else f"https://{self.domain}/{src}"
|
69
|
+
result["images"] = [src]
|
70
|
+
|
71
|
+
result["parse_status"] = "success"
|
72
|
+
|
73
|
+
except Exception as e:
|
74
|
+
self._logger.error(f"Error parsing Brother product: {str(e)}")
|
75
|
+
result["parse_status"] = f"error: {str(e)}"
|
76
|
+
finally:
|
77
|
+
self.close_driver()
|
78
|
+
|
79
|
+
return result
|
80
|
+
|
81
|
+
def get_product_urls(self, search_results: List[Dict[str, str]], max_urls: int = 5) -> List[str]:
|
82
|
+
"""
|
83
|
+
Extract relevant product URLs from search results.
|
84
|
+
|
85
|
+
Args:
|
86
|
+
search_results: List of search result dictionaries
|
87
|
+
max_urls: Maximum number of URLs to return
|
88
|
+
|
89
|
+
Returns:
|
90
|
+
List of product URLs that match the Brother product pattern
|
91
|
+
"""
|
92
|
+
urls = []
|
93
|
+
for result in search_results[:max_urls]:
|
94
|
+
url = result.get('link') or result.get('href') or result.get('url')
|
95
|
+
if url and self.domain in url and self.product_url_pattern in url:
|
96
|
+
urls.append(url)
|
97
|
+
return urls
|
@@ -0,0 +1,167 @@
|
|
1
|
+
import re
|
2
|
+
from typing import Dict, Any, Optional, List
|
3
|
+
from bs4 import BeautifulSoup
|
4
|
+
from .base import ParserBase
|
5
|
+
|
6
|
+
class CanonParser(ParserBase):
|
7
|
+
"""
|
8
|
+
Parser for Canon product information.
|
9
|
+
|
10
|
+
Extracts product details from Canon's USA and Canada websites using Selenium.
|
11
|
+
"""
|
12
|
+
domain_us = "usa.canon.com"
|
13
|
+
domain_ca = "canon.ca"
|
14
|
+
product_url_pattern_us = "usa.canon.com/shop/p/"
|
15
|
+
product_url_pattern_ca = "canon.ca/en/product"
|
16
|
+
|
17
|
+
def __init__(self, *args, **kwargs):
|
18
|
+
super().__init__(*args, **kwargs)
|
19
|
+
self.region = "us" # Default region
|
20
|
+
self.retailer = None # Store retailer info
|
21
|
+
|
22
|
+
def determine_region(self, retailer: Optional[str]) -> str:
|
23
|
+
"""
|
24
|
+
Determine region based on retailer information.
|
25
|
+
|
26
|
+
Args:
|
27
|
+
retailer: Retailer string that may contain region information
|
28
|
+
|
29
|
+
Returns:
|
30
|
+
'ca' for Canada, 'us' for United States (default)
|
31
|
+
"""
|
32
|
+
if retailer:
|
33
|
+
retailer_lower = retailer.lower()
|
34
|
+
if 'canada' in retailer_lower:
|
35
|
+
return 'ca'
|
36
|
+
elif 'us' in retailer_lower:
|
37
|
+
return 'us'
|
38
|
+
return 'us' # Default to US if no region found
|
39
|
+
|
40
|
+
def create_search_query(self, term: str) -> str:
|
41
|
+
"""
|
42
|
+
Create region-specific search query.
|
43
|
+
|
44
|
+
Args:
|
45
|
+
term: Search term (typically product model)
|
46
|
+
|
47
|
+
Returns:
|
48
|
+
Formatted search query for the appropriate region
|
49
|
+
"""
|
50
|
+
# Determine region based on stored retailer info
|
51
|
+
self.region = self.determine_region(self.retailer)
|
52
|
+
domain = self.domain_ca if self.region == 'ca' else self.domain_us
|
53
|
+
return f"site:{domain} {term}"
|
54
|
+
|
55
|
+
async def parse(self, url: str, search_term: str, retailer: Optional[str] = None) -> Dict[str, Any]:
|
56
|
+
"""
|
57
|
+
Parse product information from a Canon URL using Selenium.
|
58
|
+
|
59
|
+
Args:
|
60
|
+
url: Canon product URL
|
61
|
+
search_term: Original search term
|
62
|
+
retailer: Optional retailer information to determine region
|
63
|
+
|
64
|
+
Returns:
|
65
|
+
Dictionary with product information
|
66
|
+
"""
|
67
|
+
self.retailer = retailer # Store retailer info for use in other methods
|
68
|
+
self.region = self.determine_region(retailer)
|
69
|
+
|
70
|
+
result = {
|
71
|
+
"source_url": url,
|
72
|
+
"search_term": search_term,
|
73
|
+
"model_code": None,
|
74
|
+
"product_name": None,
|
75
|
+
"price": None,
|
76
|
+
"specs": None,
|
77
|
+
"images": None,
|
78
|
+
"parse_status": "pending",
|
79
|
+
"region": self.region
|
80
|
+
}
|
81
|
+
|
82
|
+
try:
|
83
|
+
self.headless = True
|
84
|
+
driver = await self.get_driver()
|
85
|
+
await self.get_page(url)
|
86
|
+
|
87
|
+
page_content = driver.page_source
|
88
|
+
soup = BeautifulSoup(page_content, "html.parser")
|
89
|
+
|
90
|
+
if self.region == 'ca':
|
91
|
+
# Selectores para el sitio de Canadá
|
92
|
+
# Extract model code - usando un selector más robusto
|
93
|
+
model_elem = soup.select_one("p[class*='ItemCode']") # Busca cualquier p que contenga 'ItemCode' en su clase
|
94
|
+
if not model_elem:
|
95
|
+
# Alternativa: buscar por el texto "Item Code" y navegar a su contenido
|
96
|
+
model_elems = soup.find_all(string=lambda text: text and "Item Code" in text)
|
97
|
+
if model_elems:
|
98
|
+
# Encontrar el elemento padre y extraer el texto completo
|
99
|
+
model_elem = model_elems[0].parent
|
100
|
+
|
101
|
+
if model_elem:
|
102
|
+
# Obtener el texto y limpiarlo
|
103
|
+
model_text = model_elem.text.strip()
|
104
|
+
# Eliminar "Item Code:" y cualquier espacio extra
|
105
|
+
model_text = model_text.replace("Item Code: ", "").strip()
|
106
|
+
result["model_code"] = model_text
|
107
|
+
|
108
|
+
# Extract product name
|
109
|
+
name_elem = soup.select_one("h1.ProductName") # Ajustar según el sitio real
|
110
|
+
if name_elem:
|
111
|
+
result["product_name"] = name_elem.text.strip()
|
112
|
+
|
113
|
+
else:
|
114
|
+
# Selectores para el sitio de USA
|
115
|
+
product_name_elem = soup.select_one("span.base[data-ui-id='page-title-wrapper'][itemprop='name']")
|
116
|
+
if product_name_elem:
|
117
|
+
result["product_name"] = product_name_elem.text.strip()
|
118
|
+
|
119
|
+
sku_elem = soup.select_one("div.value[itemprop='sku']")
|
120
|
+
if sku_elem:
|
121
|
+
result["model_code"] = sku_elem.text.strip()
|
122
|
+
|
123
|
+
# Extract price (común para ambos sitios, ajustar si es necesario)
|
124
|
+
price_elem = soup.select_one("[data-price-type='finalPrice'] .price")
|
125
|
+
if price_elem:
|
126
|
+
result["price"] = price_elem.text.strip()
|
127
|
+
|
128
|
+
# Extract image (común para ambos sitios, ajustar si es necesario)
|
129
|
+
main_img = soup.select_one("img[data-role='product-image']")
|
130
|
+
if main_img:
|
131
|
+
src = main_img.get("src")
|
132
|
+
if src:
|
133
|
+
domain = self.domain_ca if self.region == 'ca' else self.domain_us
|
134
|
+
if not src.startswith(("http://", "https://")):
|
135
|
+
src = f"https://{domain}{src}" if src.startswith("/") else f"https://{domain}/{src}"
|
136
|
+
result["images"] = [src]
|
137
|
+
|
138
|
+
result["parse_status"] = "success"
|
139
|
+
|
140
|
+
except Exception as e:
|
141
|
+
self._logger.error(f"Error parsing Canon product: {str(e)}")
|
142
|
+
result["parse_status"] = f"error: {str(e)}"
|
143
|
+
finally:
|
144
|
+
self.close_driver()
|
145
|
+
|
146
|
+
return result
|
147
|
+
|
148
|
+
def get_product_urls(self, search_results: List[Dict[str, str]], max_urls: int = 5) -> List[str]:
|
149
|
+
"""
|
150
|
+
Extract relevant product URLs from search results.
|
151
|
+
|
152
|
+
Args:
|
153
|
+
search_results: List of search result dictionaries
|
154
|
+
max_urls: Maximum number of URLs to return
|
155
|
+
|
156
|
+
Returns:
|
157
|
+
List of product URLs that match the Canon product pattern
|
158
|
+
"""
|
159
|
+
urls = []
|
160
|
+
pattern = self.product_url_pattern_ca if self.region == 'ca' else self.product_url_pattern_us
|
161
|
+
domain = self.domain_ca if self.region == 'ca' else self.domain_us
|
162
|
+
|
163
|
+
for result in search_results[:max_urls]:
|
164
|
+
url = result.get('link') or result.get('href') or result.get('url')
|
165
|
+
if url and domain in url and pattern in url:
|
166
|
+
urls.append(url)
|
167
|
+
return urls
|
@@ -0,0 +1,118 @@
|
|
1
|
+
import re
|
2
|
+
from typing import Dict, Any, Optional
|
3
|
+
import httpx
|
4
|
+
from bs4 import BeautifulSoup
|
5
|
+
from .base import ParserBase
|
6
|
+
|
7
|
+
class EpsonParser(ParserBase):
|
8
|
+
"""
|
9
|
+
Parser for Epson product information.
|
10
|
+
|
11
|
+
Extracts product details from Epson's website.
|
12
|
+
"""
|
13
|
+
domain = "epson.com"
|
14
|
+
search_format = "site:epson.com {} product"
|
15
|
+
model_pattern = r"^.*\/\b[p|s]\/([^?]+)"
|
16
|
+
|
17
|
+
def extract_model_code(self, url: str) -> Optional[str]:
|
18
|
+
"""
|
19
|
+
Extract model code from URL using the regex pattern and clean it.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
url: URL to extract model code from
|
23
|
+
|
24
|
+
Returns:
|
25
|
+
Cleaned model code or None if not found
|
26
|
+
"""
|
27
|
+
match = re.search(self.model_pattern, url)
|
28
|
+
if match and match.group(1):
|
29
|
+
# Extraer el código
|
30
|
+
model_code = match.group(1)
|
31
|
+
|
32
|
+
# Limpiar el prefijo SPT_ si existe
|
33
|
+
if model_code.startswith("SPT_"):
|
34
|
+
model_code = model_code.replace("SPT_", "", 1)
|
35
|
+
|
36
|
+
return model_code
|
37
|
+
return None
|
38
|
+
|
39
|
+
async def parse(self, url: str, search_term: str, retailer: str = None) -> Dict[str, Any]:
|
40
|
+
"""
|
41
|
+
Parse product information from an Epson URL.
|
42
|
+
|
43
|
+
Args:
|
44
|
+
url: Epson product URL
|
45
|
+
search_term: Original search term
|
46
|
+
retailer: Optional retailer information
|
47
|
+
|
48
|
+
Returns:
|
49
|
+
Dictionary with product information
|
50
|
+
"""
|
51
|
+
result = {
|
52
|
+
"source_url": url,
|
53
|
+
"search_term": search_term,
|
54
|
+
"model_code": self.extract_model_code(url),
|
55
|
+
"product_name": None,
|
56
|
+
"price": None,
|
57
|
+
"description": None,
|
58
|
+
"specs": None,
|
59
|
+
"images": None,
|
60
|
+
"parse_status": "pending"
|
61
|
+
}
|
62
|
+
|
63
|
+
try:
|
64
|
+
# Get the page content
|
65
|
+
response = await self._get(url, headers=self.headers, use_proxy=True)
|
66
|
+
if not response or response.status_code != 200:
|
67
|
+
result["parse_status"] = f"error: HTTP {response.status_code if response else 'no response'}"
|
68
|
+
return result
|
69
|
+
|
70
|
+
# Parse the HTML
|
71
|
+
soup = BeautifulSoup(response.text, "html.parser")
|
72
|
+
|
73
|
+
# Extract product name
|
74
|
+
product_name_elem = soup.select_one("h1.product-name, h1.product-title")
|
75
|
+
if product_name_elem:
|
76
|
+
result["product_name"] = product_name_elem.text.strip()
|
77
|
+
|
78
|
+
# Extract price
|
79
|
+
price_elem = soup.select_one("span.price, div.product-price")
|
80
|
+
if price_elem:
|
81
|
+
result["price"] = price_elem.text.strip()
|
82
|
+
|
83
|
+
# Extract description
|
84
|
+
desc_elem = soup.select_one("div.product-description, div.product-overview")
|
85
|
+
if desc_elem:
|
86
|
+
result["description"] = desc_elem.text.strip()
|
87
|
+
|
88
|
+
# Extract specifications
|
89
|
+
specs = {}
|
90
|
+
specs_section = soup.select_one("div.product-specifications, div.tech-specs")
|
91
|
+
if specs_section:
|
92
|
+
for row in specs_section.select("tr, div.spec-row"):
|
93
|
+
label = row.select_one("th, .spec-label")
|
94
|
+
value = row.select_one("td, .spec-value")
|
95
|
+
if label and value:
|
96
|
+
specs[label.text.strip()] = value.text.strip()
|
97
|
+
result["specs"] = specs
|
98
|
+
|
99
|
+
# Extract images
|
100
|
+
image_urls = []
|
101
|
+
for img in soup.select("div.product-gallery img, img.product-image"):
|
102
|
+
src = img.get("src") or img.get("data-src")
|
103
|
+
if src:
|
104
|
+
if not src.startswith(("http://", "https://")):
|
105
|
+
src = f"https://{self.domain}{src}" if src.startswith("/") else f"https://{self.domain}/{src}"
|
106
|
+
image_urls.append(src)
|
107
|
+
if image_urls:
|
108
|
+
result["images"] = image_urls
|
109
|
+
|
110
|
+
result["parse_status"] = "success"
|
111
|
+
|
112
|
+
except httpx.RequestError as e:
|
113
|
+
result["parse_status"] = f"error: request failed - {str(e)}"
|
114
|
+
except Exception as e:
|
115
|
+
self._logger.error(f"Error parsing Epson product: {str(e)}")
|
116
|
+
result["parse_status"] = f"error: {str(e)}"
|
117
|
+
|
118
|
+
return result
|
@@ -0,0 +1,131 @@
|
|
1
|
+
import re
|
2
|
+
import asyncio
|
3
|
+
from typing import Dict, Any, Optional, List
|
4
|
+
import httpx
|
5
|
+
from bs4 import BeautifulSoup
|
6
|
+
from .base import ParserBase
|
7
|
+
|
8
|
+
class HPParser(ParserBase):
|
9
|
+
"""
|
10
|
+
Parser for HP product information.
|
11
|
+
|
12
|
+
Extracts product details from HP's website using Selenium for dynamic content.
|
13
|
+
"""
|
14
|
+
domain = "hp.com"
|
15
|
+
search_format = "site:hp.com {}" # Sin la palabra 'product' como en Epson
|
16
|
+
product_url_pattern = "hp.com/us-en/shop/pdp/" # Patrón para URLs de producto válidas
|
17
|
+
|
18
|
+
async def parse(self, url: str, search_term: str, retailer: str = None) -> Dict[str, Any]:
|
19
|
+
"""
|
20
|
+
Parse product information from an HP URL using Selenium.
|
21
|
+
|
22
|
+
Args:
|
23
|
+
url: HP product URL
|
24
|
+
search_term: Original search term
|
25
|
+
|
26
|
+
Returns:
|
27
|
+
Dictionary with product information
|
28
|
+
"""
|
29
|
+
result = {
|
30
|
+
"source_url": url,
|
31
|
+
"search_term": search_term,
|
32
|
+
"model_code": None,
|
33
|
+
"product_name": None,
|
34
|
+
"price": None,
|
35
|
+
"specs": None,
|
36
|
+
"images": None,
|
37
|
+
"parse_status": "pending"
|
38
|
+
}
|
39
|
+
|
40
|
+
try:
|
41
|
+
# Utilizamos Selenium ya que la página de HP tiene contenido dinámico
|
42
|
+
driver = await self.get_driver()
|
43
|
+
await self.get_page(url)
|
44
|
+
|
45
|
+
# Ejecutamos un scroll para cargar todo el contenido
|
46
|
+
self._execute_scroll(scroll_pause_time=1.5, max_scrolls=5)
|
47
|
+
|
48
|
+
# Extraer contenido de la página
|
49
|
+
page_content = driver.page_source
|
50
|
+
soup = BeautifulSoup(page_content, "html.parser")
|
51
|
+
|
52
|
+
# 1. Extract product name - usando el selector exacto de la captura
|
53
|
+
product_name_elem = soup.select_one("h1[data-test-hook='@hpstellar/core/typography']")
|
54
|
+
if product_name_elem:
|
55
|
+
result["product_name"] = product_name_elem.text.strip()
|
56
|
+
|
57
|
+
# 2. Extract price - directamente desde el atributo data-widget-item-price
|
58
|
+
price_attr_elem = soup.select_one("[data-widget-item-price]")
|
59
|
+
if price_attr_elem:
|
60
|
+
price_value = price_attr_elem.get("data-widget-item-price")
|
61
|
+
if price_value:
|
62
|
+
result["price"] = f"${price_value}"
|
63
|
+
# Si no se encuentra con el atributo, intentar con el selector de la captura
|
64
|
+
elif not result["price"]:
|
65
|
+
price_elem = soup.select_one("span[data-test-hook='@hpstellar/core/typography'][class*='sale-subscription-price']")
|
66
|
+
if price_elem:
|
67
|
+
result["price"] = price_elem.text.strip()
|
68
|
+
|
69
|
+
# 4. Extract model code - directamente desde el atributo data-widget-item-sku
|
70
|
+
sku_elem = soup.select_one("[data-widget-item-sku]")
|
71
|
+
if sku_elem:
|
72
|
+
result["model_code"] = sku_elem.get("data-widget-item-sku")
|
73
|
+
|
74
|
+
# Si no se encontró el código en el atributo, intentar con span.sku
|
75
|
+
if not result["model_code"]:
|
76
|
+
model_elem = soup.select_one("span.sku")
|
77
|
+
if model_elem:
|
78
|
+
text = model_elem.text.strip()
|
79
|
+
# Eliminar el prefijo "Product #" si está presente
|
80
|
+
if "Product #" in text:
|
81
|
+
result["model_code"] = text.replace("Product # ", "").strip()
|
82
|
+
else:
|
83
|
+
result["model_code"] = text
|
84
|
+
|
85
|
+
# 5. Extract SINGLE product image - usando el selector exacto de la captura
|
86
|
+
# Intentar primero con el botón específico
|
87
|
+
main_img = soup.select_one("button[data-gtm-category='linkClick'][data-gtm-id='gallery'] img")
|
88
|
+
|
89
|
+
# Si no encuentra con ese selector, probar con el otro selector visible en las capturas
|
90
|
+
if not main_img:
|
91
|
+
main_img = soup.select_one("[data-test-hook='@hpstellar/core/image-with-placeholder'] img")
|
92
|
+
|
93
|
+
if main_img:
|
94
|
+
src = main_img.get("src") or main_img.get("data-src")
|
95
|
+
if src:
|
96
|
+
# Asegurar que la URL sea absoluta
|
97
|
+
if not src.startswith(("http://", "https://")):
|
98
|
+
src = f"https://{self.domain}{src}" if src.startswith("/") else f"https://{self.domain}/{src}"
|
99
|
+
result["images"] = [src] # Guardamos una sola imagen como lista
|
100
|
+
|
101
|
+
result["parse_status"] = "success"
|
102
|
+
|
103
|
+
except httpx.RequestError as e:
|
104
|
+
result["parse_status"] = f"error: request failed - {str(e)}"
|
105
|
+
except Exception as e:
|
106
|
+
self._logger.error(f"Error parsing HP product: {str(e)}")
|
107
|
+
result["parse_status"] = f"error: {str(e)}"
|
108
|
+
finally:
|
109
|
+
# Asegurar que cerramos el driver
|
110
|
+
self.close_driver()
|
111
|
+
|
112
|
+
return result
|
113
|
+
|
114
|
+
def get_product_urls(self, search_results: List[Dict[str, str]], max_urls: int = 5) -> List[str]:
|
115
|
+
"""
|
116
|
+
Extract relevant product URLs from search results.
|
117
|
+
|
118
|
+
Args:
|
119
|
+
search_results: List of search result dictionaries
|
120
|
+
max_urls: Maximum number of URLs to return
|
121
|
+
|
122
|
+
Returns:
|
123
|
+
List of product URLs that match the HP product pattern
|
124
|
+
"""
|
125
|
+
urls = []
|
126
|
+
for result in search_results[:max_urls]:
|
127
|
+
url = result.get('link') or result.get('href') or result.get('url')
|
128
|
+
# Verificar que la URL sea de una página de producto de HP
|
129
|
+
if url and self.domain in url and self.product_url_pattern in url:
|
130
|
+
urls.append(url)
|
131
|
+
return urls
|
@@ -0,0 +1,97 @@
|
|
1
|
+
import re
|
2
|
+
from typing import Dict, Any, Optional, List
|
3
|
+
from bs4 import BeautifulSoup
|
4
|
+
from .base import ParserBase
|
5
|
+
|
6
|
+
class SamsungParser(ParserBase):
|
7
|
+
"""
|
8
|
+
Parser for Samsung product information.
|
9
|
+
|
10
|
+
Extracts product details from Samsung's website using Selenium.
|
11
|
+
"""
|
12
|
+
domain = "samsung.com"
|
13
|
+
search_format = "site:samsung.com {}"
|
14
|
+
product_url_pattern = "samsung.com/products/" # Ajustar según la estructura real de URLs
|
15
|
+
|
16
|
+
async def parse(self, url: str, search_term: str, retailer: Optional[str] = None) -> Dict[str, Any]:
|
17
|
+
"""
|
18
|
+
Parse product information from a Samsung URL using Selenium.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
url: Samsung product URL
|
22
|
+
search_term: Original search term
|
23
|
+
retailer: Optional retailer information (not used for Samsung)
|
24
|
+
|
25
|
+
Returns:
|
26
|
+
Dictionary with product information
|
27
|
+
"""
|
28
|
+
result = {
|
29
|
+
"source_url": url,
|
30
|
+
"search_term": search_term,
|
31
|
+
"model_code": None,
|
32
|
+
"product_name": None,
|
33
|
+
"price": None,
|
34
|
+
"specs": None,
|
35
|
+
"images": None,
|
36
|
+
"parse_status": "pending"
|
37
|
+
}
|
38
|
+
|
39
|
+
try:
|
40
|
+
self.headless = True
|
41
|
+
driver = await self.get_driver()
|
42
|
+
await self.get_page(url)
|
43
|
+
|
44
|
+
page_content = driver.page_source
|
45
|
+
soup = BeautifulSoup(page_content, "html.parser")
|
46
|
+
|
47
|
+
# Extract model code - usando el selector exacto de la captura
|
48
|
+
model_elem = soup.select_one('span[data-testid="atom_label"]')
|
49
|
+
if model_elem:
|
50
|
+
result["model_code"] = model_elem.text.strip()
|
51
|
+
|
52
|
+
# Extract product name - usando el selector exacto de la captura
|
53
|
+
name_elem = soup.select_one("div[class*='ProductTitle_product']")
|
54
|
+
if name_elem:
|
55
|
+
result["product_name"] = name_elem.text.strip()
|
56
|
+
|
57
|
+
# Extract price (ajustar según el sitio real)
|
58
|
+
price_elem = soup.select_one("span.price") # Ajustar según el sitio real
|
59
|
+
if price_elem:
|
60
|
+
result["price"] = price_elem.text.strip()
|
61
|
+
|
62
|
+
# Extract image (ajustar según el sitio real)
|
63
|
+
main_img = soup.select_one("img.product-image") # Ajustar según el sitio real
|
64
|
+
if main_img:
|
65
|
+
src = main_img.get("src")
|
66
|
+
if src:
|
67
|
+
if not src.startswith(("http://", "https://")):
|
68
|
+
src = f"https://{self.domain}{src}" if src.startswith("/") else f"https://{self.domain}/{src}"
|
69
|
+
result["images"] = [src]
|
70
|
+
|
71
|
+
result["parse_status"] = "success"
|
72
|
+
|
73
|
+
except Exception as e:
|
74
|
+
self._logger.error(f"Error parsing Samsung product: {str(e)}")
|
75
|
+
result["parse_status"] = f"error: {str(e)}"
|
76
|
+
finally:
|
77
|
+
self.close_driver()
|
78
|
+
|
79
|
+
return result
|
80
|
+
|
81
|
+
def get_product_urls(self, search_results: List[Dict[str, str]], max_urls: int = 1) -> List[str]: # Cambiado a 1 para tomar solo el primer resultado
|
82
|
+
"""
|
83
|
+
Extract relevant product URLs from search results.
|
84
|
+
|
85
|
+
Args:
|
86
|
+
search_results: List of search result dictionaries
|
87
|
+
max_urls: Maximum number of URLs to return (default: 1)
|
88
|
+
|
89
|
+
Returns:
|
90
|
+
List of product URLs that match the Samsung product pattern
|
91
|
+
"""
|
92
|
+
urls = []
|
93
|
+
for result in search_results[:max_urls]:
|
94
|
+
url = result.get('link') or result.get('href') or result.get('url')
|
95
|
+
if url and self.domain in url: # Menos restrictivo con el patrón de URL
|
96
|
+
urls.append(url)
|
97
|
+
return urls
|