flowtask 5.8.4__cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowtask/__init__.py +93 -0
- flowtask/__main__.py +38 -0
- flowtask/bots/__init__.py +6 -0
- flowtask/bots/check.py +93 -0
- flowtask/bots/codebot.py +51 -0
- flowtask/components/ASPX.py +148 -0
- flowtask/components/AddDataset.py +352 -0
- flowtask/components/Amazon.py +523 -0
- flowtask/components/AutoTask.py +314 -0
- flowtask/components/Azure.py +80 -0
- flowtask/components/AzureUsers.py +106 -0
- flowtask/components/BaseAction.py +91 -0
- flowtask/components/BaseLoop.py +198 -0
- flowtask/components/BestBuy.py +800 -0
- flowtask/components/CSVToGCS.py +120 -0
- flowtask/components/CompanyScraper/__init__.py +1 -0
- flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
- flowtask/components/CompanyScraper/parsers/base.py +102 -0
- flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
- flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
- flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
- flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
- flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
- flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
- flowtask/components/CompanyScraper/scrapper.py +1054 -0
- flowtask/components/CopyTo.py +177 -0
- flowtask/components/CopyToBigQuery.py +243 -0
- flowtask/components/CopyToMongoDB.py +291 -0
- flowtask/components/CopyToPg.py +609 -0
- flowtask/components/CopyToRethink.py +207 -0
- flowtask/components/CreateGCSBucket.py +102 -0
- flowtask/components/CreateReport/CreateReport.py +228 -0
- flowtask/components/CreateReport/__init__.py +9 -0
- flowtask/components/CreateReport/charts/__init__.py +15 -0
- flowtask/components/CreateReport/charts/bar.py +51 -0
- flowtask/components/CreateReport/charts/base.py +66 -0
- flowtask/components/CreateReport/charts/pie.py +64 -0
- flowtask/components/CreateReport/utils.py +9 -0
- flowtask/components/CustomerSatisfaction.py +196 -0
- flowtask/components/DataInput.py +200 -0
- flowtask/components/DateList.py +255 -0
- flowtask/components/DbClient.py +163 -0
- flowtask/components/DialPad.py +146 -0
- flowtask/components/DocumentDBQuery.py +200 -0
- flowtask/components/DownloadFrom.py +371 -0
- flowtask/components/DownloadFromD2L.py +113 -0
- flowtask/components/DownloadFromFTP.py +181 -0
- flowtask/components/DownloadFromIMAP.py +315 -0
- flowtask/components/DownloadFromS3.py +198 -0
- flowtask/components/DownloadFromSFTP.py +265 -0
- flowtask/components/DownloadFromSharepoint.py +110 -0
- flowtask/components/DownloadFromSmartSheet.py +114 -0
- flowtask/components/DownloadS3File.py +229 -0
- flowtask/components/Dummy.py +59 -0
- flowtask/components/DuplicatePhoto.py +411 -0
- flowtask/components/EmployeeEvaluation.py +237 -0
- flowtask/components/ExecuteSQL.py +323 -0
- flowtask/components/ExtractHTML.py +178 -0
- flowtask/components/FileBase.py +178 -0
- flowtask/components/FileCopy.py +181 -0
- flowtask/components/FileDelete.py +82 -0
- flowtask/components/FileExists.py +146 -0
- flowtask/components/FileIteratorDelete.py +112 -0
- flowtask/components/FileList.py +194 -0
- flowtask/components/FileOpen.py +75 -0
- flowtask/components/FileRead.py +120 -0
- flowtask/components/FileRename.py +106 -0
- flowtask/components/FilterIf.py +284 -0
- flowtask/components/FilterRows/FilterRows.py +200 -0
- flowtask/components/FilterRows/__init__.py +10 -0
- flowtask/components/FilterRows/functions.py +4 -0
- flowtask/components/GCSToBigQuery.py +103 -0
- flowtask/components/GoogleA4.py +150 -0
- flowtask/components/GoogleGeoCoding.py +344 -0
- flowtask/components/GooglePlaces.py +315 -0
- flowtask/components/GoogleSearch.py +539 -0
- flowtask/components/HTTPClient.py +268 -0
- flowtask/components/ICIMS.py +146 -0
- flowtask/components/IF.py +179 -0
- flowtask/components/IcimsFolderCopy.py +173 -0
- flowtask/components/ImageFeatures/__init__.py +5 -0
- flowtask/components/ImageFeatures/process.py +233 -0
- flowtask/components/IteratorBase.py +251 -0
- flowtask/components/LangchainLoader/__init__.py +5 -0
- flowtask/components/LangchainLoader/loader.py +194 -0
- flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
- flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
- flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
- flowtask/components/LangchainLoader/loaders/docx.py +91 -0
- flowtask/components/LangchainLoader/loaders/html.py +119 -0
- flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
- flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
- flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
- flowtask/components/LangchainLoader/loaders/qa.py +67 -0
- flowtask/components/LangchainLoader/loaders/txt.py +55 -0
- flowtask/components/LeadIQ.py +650 -0
- flowtask/components/Loop.py +253 -0
- flowtask/components/Lowes.py +334 -0
- flowtask/components/MS365Usage.py +156 -0
- flowtask/components/MSTeamsMessages.py +320 -0
- flowtask/components/MarketClustering.py +1051 -0
- flowtask/components/MergeFiles.py +362 -0
- flowtask/components/MilvusOutput.py +87 -0
- flowtask/components/NearByStores.py +175 -0
- flowtask/components/NetworkNinja/__init__.py +6 -0
- flowtask/components/NetworkNinja/models/__init__.py +52 -0
- flowtask/components/NetworkNinja/models/abstract.py +177 -0
- flowtask/components/NetworkNinja/models/account.py +39 -0
- flowtask/components/NetworkNinja/models/client.py +19 -0
- flowtask/components/NetworkNinja/models/district.py +14 -0
- flowtask/components/NetworkNinja/models/events.py +101 -0
- flowtask/components/NetworkNinja/models/forms.py +499 -0
- flowtask/components/NetworkNinja/models/market.py +16 -0
- flowtask/components/NetworkNinja/models/organization.py +34 -0
- flowtask/components/NetworkNinja/models/photos.py +125 -0
- flowtask/components/NetworkNinja/models/project.py +44 -0
- flowtask/components/NetworkNinja/models/region.py +28 -0
- flowtask/components/NetworkNinja/models/store.py +203 -0
- flowtask/components/NetworkNinja/models/user.py +151 -0
- flowtask/components/NetworkNinja/router.py +854 -0
- flowtask/components/Odoo.py +175 -0
- flowtask/components/OdooInjector.py +192 -0
- flowtask/components/OpenFromXML.py +126 -0
- flowtask/components/OpenWeather.py +41 -0
- flowtask/components/OpenWithBase.py +616 -0
- flowtask/components/OpenWithPandas.py +715 -0
- flowtask/components/PGPDecrypt.py +199 -0
- flowtask/components/PandasIterator.py +187 -0
- flowtask/components/PandasToFile.py +189 -0
- flowtask/components/Paradox.py +339 -0
- flowtask/components/ParamIterator.py +117 -0
- flowtask/components/ParseHTML.py +84 -0
- flowtask/components/PlacerStores.py +249 -0
- flowtask/components/Pokemon.py +507 -0
- flowtask/components/PositiveBot.py +62 -0
- flowtask/components/PowerPointSlide.py +400 -0
- flowtask/components/PrintMessage.py +127 -0
- flowtask/components/ProductCompetitors/__init__.py +5 -0
- flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
- flowtask/components/ProductCompetitors/parsers/base.py +72 -0
- flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
- flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
- flowtask/components/ProductCompetitors/scrapper.py +155 -0
- flowtask/components/ProductCompliant.py +169 -0
- flowtask/components/ProductInfo/__init__.py +1 -0
- flowtask/components/ProductInfo/parsers/__init__.py +5 -0
- flowtask/components/ProductInfo/parsers/base.py +83 -0
- flowtask/components/ProductInfo/parsers/brother.py +97 -0
- flowtask/components/ProductInfo/parsers/canon.py +167 -0
- flowtask/components/ProductInfo/parsers/epson.py +118 -0
- flowtask/components/ProductInfo/parsers/hp.py +131 -0
- flowtask/components/ProductInfo/parsers/samsung.py +97 -0
- flowtask/components/ProductInfo/scraper.py +319 -0
- flowtask/components/ProductPricing.py +118 -0
- flowtask/components/QS.py +261 -0
- flowtask/components/QSBase.py +201 -0
- flowtask/components/QueryIterator.py +273 -0
- flowtask/components/QueryToInsert.py +327 -0
- flowtask/components/QueryToPandas.py +432 -0
- flowtask/components/RESTClient.py +195 -0
- flowtask/components/RethinkDBQuery.py +189 -0
- flowtask/components/Rsync.py +74 -0
- flowtask/components/RunSSH.py +59 -0
- flowtask/components/RunShell.py +71 -0
- flowtask/components/SalesForce.py +20 -0
- flowtask/components/SaveImageBank/__init__.py +257 -0
- flowtask/components/SchedulingVisits.py +592 -0
- flowtask/components/ScrapPage.py +216 -0
- flowtask/components/ScrapSearch.py +79 -0
- flowtask/components/SendNotify.py +257 -0
- flowtask/components/SentimentAnalysis.py +694 -0
- flowtask/components/ServiceScrapper/__init__.py +5 -0
- flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
- flowtask/components/ServiceScrapper/parsers/base.py +94 -0
- flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
- flowtask/components/ServiceScrapper/scrapper.py +199 -0
- flowtask/components/SetVariables.py +156 -0
- flowtask/components/SubTask.py +182 -0
- flowtask/components/SuiteCRM.py +48 -0
- flowtask/components/Switch.py +175 -0
- flowtask/components/TableBase.py +148 -0
- flowtask/components/TableDelete.py +312 -0
- flowtask/components/TableInput.py +143 -0
- flowtask/components/TableOutput/TableOutput.py +384 -0
- flowtask/components/TableOutput/__init__.py +3 -0
- flowtask/components/TableSchema.py +534 -0
- flowtask/components/Target.py +223 -0
- flowtask/components/ThumbnailGenerator.py +156 -0
- flowtask/components/ToPandas.py +67 -0
- flowtask/components/TransformRows/TransformRows.py +507 -0
- flowtask/components/TransformRows/__init__.py +9 -0
- flowtask/components/TransformRows/functions.py +559 -0
- flowtask/components/TransposeRows.py +176 -0
- flowtask/components/UPCDatabase.py +86 -0
- flowtask/components/UnGzip.py +171 -0
- flowtask/components/Uncompress.py +172 -0
- flowtask/components/UniqueRows.py +126 -0
- flowtask/components/Unzip.py +107 -0
- flowtask/components/UpdateOperationalVars.py +147 -0
- flowtask/components/UploadTo.py +299 -0
- flowtask/components/UploadToS3.py +136 -0
- flowtask/components/UploadToSFTP.py +160 -0
- flowtask/components/UploadToSharepoint.py +205 -0
- flowtask/components/UserFunc.py +122 -0
- flowtask/components/VivaTracker.py +140 -0
- flowtask/components/WSDLClient.py +123 -0
- flowtask/components/Wait.py +18 -0
- flowtask/components/Walmart.py +199 -0
- flowtask/components/Workplace.py +134 -0
- flowtask/components/XMLToPandas.py +267 -0
- flowtask/components/Zammad/__init__.py +41 -0
- flowtask/components/Zammad/models.py +0 -0
- flowtask/components/ZoomInfoScraper.py +409 -0
- flowtask/components/__init__.py +104 -0
- flowtask/components/abstract.py +18 -0
- flowtask/components/flow.py +530 -0
- flowtask/components/google.py +335 -0
- flowtask/components/group.py +221 -0
- flowtask/components/py.typed +0 -0
- flowtask/components/reviewscrap.py +132 -0
- flowtask/components/tAutoincrement.py +117 -0
- flowtask/components/tConcat.py +109 -0
- flowtask/components/tExplode.py +119 -0
- flowtask/components/tFilter.py +184 -0
- flowtask/components/tGroup.py +236 -0
- flowtask/components/tJoin.py +270 -0
- flowtask/components/tMap/__init__.py +9 -0
- flowtask/components/tMap/functions.py +54 -0
- flowtask/components/tMap/tMap.py +450 -0
- flowtask/components/tMelt.py +112 -0
- flowtask/components/tMerge.py +114 -0
- flowtask/components/tOrder.py +93 -0
- flowtask/components/tPandas.py +94 -0
- flowtask/components/tPivot.py +71 -0
- flowtask/components/tPluckCols.py +76 -0
- flowtask/components/tUnnest.py +82 -0
- flowtask/components/user.py +401 -0
- flowtask/conf.py +457 -0
- flowtask/download.py +102 -0
- flowtask/events/__init__.py +11 -0
- flowtask/events/events/__init__.py +20 -0
- flowtask/events/events/abstract.py +95 -0
- flowtask/events/events/alerts/__init__.py +362 -0
- flowtask/events/events/alerts/colfunctions.py +131 -0
- flowtask/events/events/alerts/functions.py +158 -0
- flowtask/events/events/dummy.py +12 -0
- flowtask/events/events/exec.py +124 -0
- flowtask/events/events/file/__init__.py +7 -0
- flowtask/events/events/file/base.py +51 -0
- flowtask/events/events/file/copy.py +23 -0
- flowtask/events/events/file/delete.py +16 -0
- flowtask/events/events/interfaces/__init__.py +9 -0
- flowtask/events/events/interfaces/client.py +67 -0
- flowtask/events/events/interfaces/credentials.py +28 -0
- flowtask/events/events/interfaces/notifications.py +58 -0
- flowtask/events/events/jira.py +122 -0
- flowtask/events/events/log.py +26 -0
- flowtask/events/events/logerr.py +52 -0
- flowtask/events/events/notify.py +59 -0
- flowtask/events/events/notify_event.py +160 -0
- flowtask/events/events/publish.py +54 -0
- flowtask/events/events/sendfile.py +104 -0
- flowtask/events/events/task.py +97 -0
- flowtask/events/events/teams.py +98 -0
- flowtask/events/events/webhook.py +58 -0
- flowtask/events/manager.py +287 -0
- flowtask/exceptions.c +39393 -0
- flowtask/exceptions.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/extensions/__init__.py +3 -0
- flowtask/extensions/abstract.py +82 -0
- flowtask/extensions/logging/__init__.py +65 -0
- flowtask/hooks/__init__.py +9 -0
- flowtask/hooks/actions/__init__.py +22 -0
- flowtask/hooks/actions/abstract.py +66 -0
- flowtask/hooks/actions/dummy.py +23 -0
- flowtask/hooks/actions/jira.py +74 -0
- flowtask/hooks/actions/rest.py +320 -0
- flowtask/hooks/actions/sampledata.py +37 -0
- flowtask/hooks/actions/sensor.py +23 -0
- flowtask/hooks/actions/task.py +9 -0
- flowtask/hooks/actions/ticket.py +37 -0
- flowtask/hooks/actions/zammad.py +55 -0
- flowtask/hooks/hook.py +62 -0
- flowtask/hooks/models.py +17 -0
- flowtask/hooks/service.py +187 -0
- flowtask/hooks/step.py +91 -0
- flowtask/hooks/types/__init__.py +23 -0
- flowtask/hooks/types/base.py +129 -0
- flowtask/hooks/types/brokers/__init__.py +11 -0
- flowtask/hooks/types/brokers/base.py +54 -0
- flowtask/hooks/types/brokers/mqtt.py +35 -0
- flowtask/hooks/types/brokers/rabbitmq.py +82 -0
- flowtask/hooks/types/brokers/redis.py +83 -0
- flowtask/hooks/types/brokers/sqs.py +44 -0
- flowtask/hooks/types/fs.py +232 -0
- flowtask/hooks/types/http.py +49 -0
- flowtask/hooks/types/imap.py +200 -0
- flowtask/hooks/types/jira.py +279 -0
- flowtask/hooks/types/mail.py +205 -0
- flowtask/hooks/types/postgres.py +98 -0
- flowtask/hooks/types/responses/__init__.py +8 -0
- flowtask/hooks/types/responses/base.py +5 -0
- flowtask/hooks/types/sharepoint.py +288 -0
- flowtask/hooks/types/ssh.py +141 -0
- flowtask/hooks/types/tagged.py +59 -0
- flowtask/hooks/types/upload.py +85 -0
- flowtask/hooks/types/watch.py +71 -0
- flowtask/hooks/types/web.py +36 -0
- flowtask/interfaces/AzureClient.py +137 -0
- flowtask/interfaces/AzureGraph.py +839 -0
- flowtask/interfaces/Boto3Client.py +326 -0
- flowtask/interfaces/DropboxClient.py +173 -0
- flowtask/interfaces/ExcelHandler.py +94 -0
- flowtask/interfaces/FTPClient.py +131 -0
- flowtask/interfaces/GoogleCalendar.py +201 -0
- flowtask/interfaces/GoogleClient.py +133 -0
- flowtask/interfaces/GoogleDrive.py +127 -0
- flowtask/interfaces/GoogleGCS.py +89 -0
- flowtask/interfaces/GoogleGeocoding.py +93 -0
- flowtask/interfaces/GoogleLang.py +114 -0
- flowtask/interfaces/GooglePub.py +61 -0
- flowtask/interfaces/GoogleSheet.py +68 -0
- flowtask/interfaces/IMAPClient.py +137 -0
- flowtask/interfaces/O365Calendar.py +113 -0
- flowtask/interfaces/O365Client.py +220 -0
- flowtask/interfaces/OneDrive.py +284 -0
- flowtask/interfaces/Outlook.py +155 -0
- flowtask/interfaces/ParrotBot.py +130 -0
- flowtask/interfaces/SSHClient.py +378 -0
- flowtask/interfaces/Sharepoint.py +496 -0
- flowtask/interfaces/__init__.py +36 -0
- flowtask/interfaces/azureauth.py +119 -0
- flowtask/interfaces/cache.py +201 -0
- flowtask/interfaces/client.py +82 -0
- flowtask/interfaces/compress.py +525 -0
- flowtask/interfaces/credentials.py +124 -0
- flowtask/interfaces/d2l.py +239 -0
- flowtask/interfaces/databases/__init__.py +5 -0
- flowtask/interfaces/databases/db.py +223 -0
- flowtask/interfaces/databases/documentdb.py +55 -0
- flowtask/interfaces/databases/rethink.py +39 -0
- flowtask/interfaces/dataframes/__init__.py +11 -0
- flowtask/interfaces/dataframes/abstract.py +21 -0
- flowtask/interfaces/dataframes/arrow.py +71 -0
- flowtask/interfaces/dataframes/dt.py +69 -0
- flowtask/interfaces/dataframes/pandas.py +167 -0
- flowtask/interfaces/dataframes/polars.py +60 -0
- flowtask/interfaces/db.py +263 -0
- flowtask/interfaces/env.py +46 -0
- flowtask/interfaces/func.py +137 -0
- flowtask/interfaces/http.py +1780 -0
- flowtask/interfaces/locale.py +40 -0
- flowtask/interfaces/log.py +75 -0
- flowtask/interfaces/mask.py +143 -0
- flowtask/interfaces/notification.py +154 -0
- flowtask/interfaces/playwright.py +339 -0
- flowtask/interfaces/powerpoint.py +368 -0
- flowtask/interfaces/py.typed +0 -0
- flowtask/interfaces/qs.py +376 -0
- flowtask/interfaces/result.py +87 -0
- flowtask/interfaces/selenium_service.py +779 -0
- flowtask/interfaces/smartsheet.py +154 -0
- flowtask/interfaces/stat.py +39 -0
- flowtask/interfaces/task.py +96 -0
- flowtask/interfaces/template.py +118 -0
- flowtask/interfaces/vectorstores/__init__.py +1 -0
- flowtask/interfaces/vectorstores/abstract.py +133 -0
- flowtask/interfaces/vectorstores/milvus.py +669 -0
- flowtask/interfaces/zammad.py +107 -0
- flowtask/models.py +193 -0
- flowtask/parsers/__init__.py +15 -0
- flowtask/parsers/_yaml.c +11978 -0
- flowtask/parsers/_yaml.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/argparser.py +235 -0
- flowtask/parsers/base.c +15155 -0
- flowtask/parsers/base.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/json.c +11968 -0
- flowtask/parsers/json.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/maps.py +49 -0
- flowtask/parsers/toml.c +11968 -0
- flowtask/parsers/toml.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/plugins/__init__.py +16 -0
- flowtask/plugins/components/__init__.py +0 -0
- flowtask/plugins/handler/__init__.py +45 -0
- flowtask/plugins/importer.py +31 -0
- flowtask/plugins/sources/__init__.py +0 -0
- flowtask/runner.py +283 -0
- flowtask/scheduler/__init__.py +9 -0
- flowtask/scheduler/functions.py +493 -0
- flowtask/scheduler/handlers/__init__.py +8 -0
- flowtask/scheduler/handlers/manager.py +504 -0
- flowtask/scheduler/handlers/models.py +58 -0
- flowtask/scheduler/handlers/service.py +72 -0
- flowtask/scheduler/notifications.py +65 -0
- flowtask/scheduler/scheduler.py +993 -0
- flowtask/services/__init__.py +0 -0
- flowtask/services/bots/__init__.py +0 -0
- flowtask/services/bots/telegram.py +264 -0
- flowtask/services/files/__init__.py +11 -0
- flowtask/services/files/manager.py +522 -0
- flowtask/services/files/model.py +37 -0
- flowtask/services/files/service.py +767 -0
- flowtask/services/jira/__init__.py +3 -0
- flowtask/services/jira/jira_actions.py +191 -0
- flowtask/services/tasks/__init__.py +13 -0
- flowtask/services/tasks/launcher.py +213 -0
- flowtask/services/tasks/manager.py +323 -0
- flowtask/services/tasks/service.py +275 -0
- flowtask/services/tasks/task_manager.py +376 -0
- flowtask/services/tasks/tasks.py +155 -0
- flowtask/storages/__init__.py +16 -0
- flowtask/storages/exceptions.py +12 -0
- flowtask/storages/files/__init__.py +8 -0
- flowtask/storages/files/abstract.py +29 -0
- flowtask/storages/files/filesystem.py +66 -0
- flowtask/storages/tasks/__init__.py +19 -0
- flowtask/storages/tasks/abstract.py +26 -0
- flowtask/storages/tasks/database.py +33 -0
- flowtask/storages/tasks/filesystem.py +108 -0
- flowtask/storages/tasks/github.py +119 -0
- flowtask/storages/tasks/memory.py +45 -0
- flowtask/storages/tasks/row.py +25 -0
- flowtask/tasks/__init__.py +0 -0
- flowtask/tasks/abstract.py +526 -0
- flowtask/tasks/command.py +118 -0
- flowtask/tasks/pile.py +486 -0
- flowtask/tasks/py.typed +0 -0
- flowtask/tasks/task.py +778 -0
- flowtask/template/__init__.py +161 -0
- flowtask/tests.py +257 -0
- flowtask/types/__init__.py +8 -0
- flowtask/types/typedefs.c +11347 -0
- flowtask/types/typedefs.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/__init__.py +24 -0
- flowtask/utils/constants.py +117 -0
- flowtask/utils/encoders.py +21 -0
- flowtask/utils/executor.py +112 -0
- flowtask/utils/functions.cpp +14280 -0
- flowtask/utils/functions.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/json.cpp +13349 -0
- flowtask/utils/json.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/mail.py +63 -0
- flowtask/utils/parseqs.c +13324 -0
- flowtask/utils/parserqs.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/stats.py +308 -0
- flowtask/utils/transformations.py +74 -0
- flowtask/utils/uv.py +12 -0
- flowtask/utils/validators.py +97 -0
- flowtask/version.py +11 -0
- flowtask-5.8.4.dist-info/LICENSE +201 -0
- flowtask-5.8.4.dist-info/METADATA +209 -0
- flowtask-5.8.4.dist-info/RECORD +470 -0
- flowtask-5.8.4.dist-info/WHEEL +6 -0
- flowtask-5.8.4.dist-info/entry_points.txt +3 -0
- flowtask-5.8.4.dist-info/top_level.txt +2 -0
- plugins/components/CreateQR.py +39 -0
- plugins/components/TestComponent.py +28 -0
- plugins/components/Use1.py +13 -0
- plugins/components/Workplace.py +117 -0
- plugins/components/__init__.py +3 -0
- plugins/sources/__init__.py +0 -0
- plugins/sources/get_populartimes.py +78 -0
- plugins/sources/google.py +150 -0
- plugins/sources/hubspot.py +679 -0
- plugins/sources/icims.py +679 -0
- plugins/sources/mobileinsight.py +501 -0
- plugins/sources/newrelic.py +262 -0
- plugins/sources/uap.py +268 -0
- plugins/sources/venu.py +244 -0
- plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,133 @@
|
|
1
|
+
import re
|
2
|
+
from bs4 import BeautifulSoup as bs
|
3
|
+
from .base import ScrapperBase
|
4
|
+
|
5
|
+
|
6
|
+
class RocketReachScrapper(ScrapperBase):
|
7
|
+
"""
|
8
|
+
RocketReachScrapper Model.
|
9
|
+
"""
|
10
|
+
domain: str = 'https://rocketreach.co/'
|
11
|
+
search_term: str = "site:rocketreach.co '{}'"
|
12
|
+
keywords: list = [
|
13
|
+
' Information',
|
14
|
+
' Information - ',
|
15
|
+
' Information - RocketReach',
|
16
|
+
': Contact Details'
|
17
|
+
]
|
18
|
+
|
19
|
+
def define_search_term(self, term: str):
|
20
|
+
# standardized_term = self._standardize_name(term)
|
21
|
+
standardized_term = term.strip()
|
22
|
+
return self.search_term.format(standardized_term)
|
23
|
+
|
24
|
+
def _extract_codes(self, value):
|
25
|
+
"""
|
26
|
+
Extracts NAICS/SIC codes from RocketReach company info.
|
27
|
+
"""
|
28
|
+
codes = []
|
29
|
+
for link in value.find_all("a"): # Iterate over <a> elements
|
30
|
+
match = re.search(r"\b\d+\b", link.text) # Extract only numbers
|
31
|
+
if match:
|
32
|
+
codes.append(match.group()) # Store only the number part
|
33
|
+
return codes # Return the list of codes
|
34
|
+
|
35
|
+
async def scrapping(self, document: bs, idx: int, row: dict):
|
36
|
+
"""
|
37
|
+
Scrape company information from LeadIQ.
|
38
|
+
Updates the existing row with new data from LeadIQ.
|
39
|
+
"""
|
40
|
+
# Start with the existing row data
|
41
|
+
result = row.copy()
|
42
|
+
|
43
|
+
# Actualizamos solo los campos específicos de LeadIQ
|
44
|
+
result.update({
|
45
|
+
'source_platform': 'rocketreach',
|
46
|
+
'scrape_status': 'pending',
|
47
|
+
'search_term': self.search_term_used
|
48
|
+
})
|
49
|
+
try:
|
50
|
+
# Extract `company-header` details
|
51
|
+
company_header = document.select_one(".company-header")
|
52
|
+
if company_header:
|
53
|
+
# Extract company logo
|
54
|
+
img_tag = company_header.select_one(".company-logo")
|
55
|
+
result["logo_url"] = img_tag["src"] if img_tag else None
|
56
|
+
|
57
|
+
# Extract company name
|
58
|
+
title_tag = company_header.select_one(".company-title")
|
59
|
+
if title_tag:
|
60
|
+
result["company_name"] = title_tag.text.replace(" Information", "").strip()
|
61
|
+
|
62
|
+
# Extract company description from `headline-summary`
|
63
|
+
headline_summary = document.select_one(".headline-summary p")
|
64
|
+
result["company_description"] = headline_summary.text.strip() if headline_summary else None
|
65
|
+
|
66
|
+
# Extract details from the information table
|
67
|
+
info_table = document.select(".headline-summary table tbody tr")
|
68
|
+
for row in info_table:
|
69
|
+
key = row.select_one("td strong")
|
70
|
+
value = row.select_one("td:nth-of-type(2)")
|
71
|
+
|
72
|
+
if key and value:
|
73
|
+
key_text = key.text.strip().lower()
|
74
|
+
value_text = value.text.strip()
|
75
|
+
|
76
|
+
if "website" in key_text:
|
77
|
+
result["website"] = value.select_one("a")["href"] if value.select_one("a") else value_text
|
78
|
+
|
79
|
+
elif "ticker" in key_text:
|
80
|
+
result["stock_symbol"] = value_text
|
81
|
+
|
82
|
+
elif "revenue" in key_text:
|
83
|
+
result["revenue_range"] = value_text
|
84
|
+
|
85
|
+
elif "funding" in key_text:
|
86
|
+
result["funding"] = value_text
|
87
|
+
|
88
|
+
elif "employees" in key_text:
|
89
|
+
result["employee_count"] = value_text.split()[0]
|
90
|
+
result['number_employees'] = value_text
|
91
|
+
|
92
|
+
elif "founded" in key_text:
|
93
|
+
result["founded"] = value_text
|
94
|
+
|
95
|
+
elif "address" in key_text:
|
96
|
+
result["headquarters"] = value.select_one("a").text.strip() if value.select_one("a") else value_text
|
97
|
+
|
98
|
+
elif "phone" in key_text:
|
99
|
+
result["phone_number"] = value.select_one("a").text.strip() if value.select_one("a") else value_text
|
100
|
+
|
101
|
+
elif "industry" in key_text:
|
102
|
+
result["industry"] = [i.strip() for i in value_text.split(",")]
|
103
|
+
|
104
|
+
elif "keywords" in key_text:
|
105
|
+
result["keywords"] = [i.strip() for i in value_text.split(",")]
|
106
|
+
|
107
|
+
elif "sic" in key_text:
|
108
|
+
result["sic_code"] = self._extract_codes(value)
|
109
|
+
|
110
|
+
elif "naics" in key_text:
|
111
|
+
result["naics_code"] = self._extract_codes(value)
|
112
|
+
|
113
|
+
# Validate if any meaningful data was found
|
114
|
+
has_data = any([
|
115
|
+
result.get('company_name'),
|
116
|
+
result.get('logo_url'),
|
117
|
+
result.get('headquarters'),
|
118
|
+
result.get('phone_number'),
|
119
|
+
result.get('website'),
|
120
|
+
result.get('stock_symbol'),
|
121
|
+
result.get('naics_code'),
|
122
|
+
result.get('sic_code'),
|
123
|
+
result.get('employee_count'),
|
124
|
+
result.get('revenue_range'),
|
125
|
+
result.get('company_description')
|
126
|
+
])
|
127
|
+
result['scrape_status'] = 'success' if has_data else 'no_data'
|
128
|
+
return idx, result
|
129
|
+
|
130
|
+
except Exception as e:
|
131
|
+
self._logger.error(f"Error parsing LeadIQ data: {str(e)}")
|
132
|
+
result['scrape_status'] = f'error: {str(e)[:50]}'
|
133
|
+
return idx, result
|
@@ -0,0 +1,109 @@
|
|
1
|
+
import time
|
2
|
+
from bs4 import BeautifulSoup as bs
|
3
|
+
from selenium.webdriver.common.by import By
|
4
|
+
from selenium.webdriver.support import expected_conditions as EC
|
5
|
+
from selenium.webdriver.support.ui import WebDriverWait
|
6
|
+
from selenium.common.exceptions import (
|
7
|
+
TimeoutException,
|
8
|
+
NoSuchElementException,
|
9
|
+
WebDriverException
|
10
|
+
)
|
11
|
+
from .base import ScrapperBase
|
12
|
+
|
13
|
+
|
14
|
+
class SicCodeScrapper(ScrapperBase):
|
15
|
+
"""
|
16
|
+
SicCodeScrapper Model.
|
17
|
+
"""
|
18
|
+
domain: str = 'siccode.com'
|
19
|
+
search_term: str = "site:siccode.com '{}' +NAICS"
|
20
|
+
keywords: list = [
|
21
|
+
' - ZIP',
|
22
|
+
' - ZIP '
|
23
|
+
]
|
24
|
+
|
25
|
+
def define_search_term(self, term: str):
|
26
|
+
cleaned = term.strip().lower()
|
27
|
+
return self.search_term.format(cleaned)
|
28
|
+
|
29
|
+
# async def get(self, url, headers: dict):
|
30
|
+
# self.use_proxy = True
|
31
|
+
# self._free_proxy = False
|
32
|
+
# driver = await self.get_driver()
|
33
|
+
# try:
|
34
|
+
# try:
|
35
|
+
# driver.get(url)
|
36
|
+
# # WebDriverWait(driver, 2).until(
|
37
|
+
# # EC.presence_of_element_located((By.ID, "main"))
|
38
|
+
# # )
|
39
|
+
# return bs(driver.page_source, 'html.parser')
|
40
|
+
# except TimeoutException:
|
41
|
+
# return None
|
42
|
+
# finally:
|
43
|
+
# self.close_driver()
|
44
|
+
|
45
|
+
async def scrapping(self, document: bs, idx: int, row: dict):
|
46
|
+
"""
|
47
|
+
Scrapes company information from siccode.com and updates the row.
|
48
|
+
"""
|
49
|
+
result = row.copy()
|
50
|
+
result.update({
|
51
|
+
'source_platform': 'siccode',
|
52
|
+
'scrape_status': 'pending',
|
53
|
+
'search_term': self.search_term_used
|
54
|
+
})
|
55
|
+
|
56
|
+
try:
|
57
|
+
header = document.select_one("div.main-title")
|
58
|
+
# Extract company name
|
59
|
+
result["company_name"] = (
|
60
|
+
header.select_one("h1.size-h2 a span") and
|
61
|
+
header.select_one("h1.size-h2 a span").text.strip()
|
62
|
+
)
|
63
|
+
# Extract Industry Category
|
64
|
+
result["industry_category"] = header.select_one("b.p-category").text.strip()
|
65
|
+
|
66
|
+
# Extract SIC and NAICS Codes
|
67
|
+
desc = document.find('div', {'id': 'description'})
|
68
|
+
sic_code_elem = desc.select_one("a.sic")
|
69
|
+
naics_code_elem = desc.select_one("a.naics")
|
70
|
+
|
71
|
+
sic = sic_code_elem.text.split("SIC CODE")[-1].strip() if sic_code_elem else None
|
72
|
+
naics = naics_code_elem.text.split("NAICS CODE")[-1].strip() if naics_code_elem else None
|
73
|
+
result["sic_code"], result["industry"] = sic.split(' - ')
|
74
|
+
result["naics_code"], result["category"] = naics.split(' - ')
|
75
|
+
# Extract Location Details
|
76
|
+
overview = document.find('div', {'id': 'overview'})
|
77
|
+
result['company_description'] = overview.select_one("p.p-note").text.strip()
|
78
|
+
|
79
|
+
result["city"] = overview.select_one(".p-locality") and overview.select_one(".p-locality").text.strip()
|
80
|
+
result["state"] = overview.select_one(".p-region") and overview.select_one(".p-region").text.strip()
|
81
|
+
result["zip_code"] = overview.select_one(".p-postal-code") and overview.select_one(".p-postal-code").text.strip()
|
82
|
+
result["country"] = overview.select_one(".p-country-name") and overview.select_one(".p-country-name").text.strip()
|
83
|
+
result["metro_area"] = overview.select_one("div[title]") and overview.select_one("div[title]").text.strip()
|
84
|
+
|
85
|
+
# Construct Headquarters Address
|
86
|
+
result["headquarters"] = ", ".join(
|
87
|
+
filter(None, [result.get("city"), result.get("state"), result.get("zip_code"), result.get("country")])
|
88
|
+
)
|
89
|
+
|
90
|
+
# Check if we found any meaningful data
|
91
|
+
has_data = any([
|
92
|
+
result.get("company_name"),
|
93
|
+
result.get("category"),
|
94
|
+
result.get("sic_code"),
|
95
|
+
result.get("naics_code"),
|
96
|
+
result.get("headquarters"),
|
97
|
+
result.get("revenue_range"),
|
98
|
+
result.get("years_in_business"),
|
99
|
+
result.get("company_size"),
|
100
|
+
])
|
101
|
+
|
102
|
+
result['scrape_status'] = 'success' if has_data else 'no_data'
|
103
|
+
|
104
|
+
return idx, result
|
105
|
+
|
106
|
+
except Exception as e:
|
107
|
+
self._logger.error(f"Error parsing SICCode data: {str(e)}")
|
108
|
+
result['scrape_status'] = f'error: {str(e)[:50]}'
|
109
|
+
return idx, result
|
@@ -0,0 +1,130 @@
|
|
1
|
+
import re
|
2
|
+
from bs4 import BeautifulSoup as bs
|
3
|
+
from .base import ScrapperBase
|
4
|
+
|
5
|
+
|
6
|
+
class VisualVisitorScrapper(ScrapperBase):
|
7
|
+
"""
|
8
|
+
VisualVisitorScrapper Model.
|
9
|
+
"""
|
10
|
+
domain: str = 'https://www.visualvisitor.com/'
|
11
|
+
search_term: str = "site:visualvisitor.com '{}'"
|
12
|
+
keywords: list = [
|
13
|
+
' Phone',
|
14
|
+
' - Phone',
|
15
|
+
]
|
16
|
+
|
17
|
+
def define_search_term(self, term: str):
|
18
|
+
standardized_term = term.strip().lower()
|
19
|
+
return self.search_term.format(standardized_term)
|
20
|
+
|
21
|
+
def _extract_codes(self, value):
|
22
|
+
"""
|
23
|
+
Extracts NAICS/SIC codes from RocketReach company info.
|
24
|
+
"""
|
25
|
+
codes = []
|
26
|
+
for link in value.find_all("a"): # Iterate over <a> elements
|
27
|
+
match = re.search(r"\b\d+\b", link.text) # Extract only numbers
|
28
|
+
if match:
|
29
|
+
codes.append(match.group()) # Store only the number part
|
30
|
+
return codes # Return the list of codes
|
31
|
+
|
32
|
+
async def scrapping(self, document: bs, idx: int, row: dict):
|
33
|
+
"""
|
34
|
+
Scrape company information from LeadIQ.
|
35
|
+
Updates the existing row with new data from LeadIQ.
|
36
|
+
"""
|
37
|
+
# Start with the existing row data
|
38
|
+
result = row.copy()
|
39
|
+
|
40
|
+
# Actualizamos solo los campos específicos de LeadIQ
|
41
|
+
result.update({
|
42
|
+
'source_platform': 'rocketreach',
|
43
|
+
'scrape_status': 'pending',
|
44
|
+
'search_term': self.search_term_used
|
45
|
+
})
|
46
|
+
try:
|
47
|
+
# Extract `company-header` details
|
48
|
+
company_header = document.select_one(".company-header")
|
49
|
+
if company_header:
|
50
|
+
# Extract company logo
|
51
|
+
img_tag = company_header.select_one(".company-logo")
|
52
|
+
result["logo_url"] = img_tag["src"] if img_tag else None
|
53
|
+
|
54
|
+
# Extract company name
|
55
|
+
title_tag = company_header.select_one(".company-title")
|
56
|
+
if title_tag:
|
57
|
+
result["company_name"] = title_tag.text.replace(" Information", "").strip()
|
58
|
+
|
59
|
+
# Extract company description from `headline-summary`
|
60
|
+
headline_summary = document.select_one(".headline-summary p")
|
61
|
+
result["company_description"] = headline_summary.text.strip() if headline_summary else None
|
62
|
+
|
63
|
+
# Extract details from the information table
|
64
|
+
info_table = document.select(".headline-summary table tbody tr")
|
65
|
+
for row in info_table:
|
66
|
+
key = row.select_one("td strong")
|
67
|
+
value = row.select_one("td:nth-of-type(2)")
|
68
|
+
|
69
|
+
if key and value:
|
70
|
+
key_text = key.text.strip().lower()
|
71
|
+
value_text = value.text.strip()
|
72
|
+
|
73
|
+
if "website" in key_text:
|
74
|
+
result["website"] = value.select_one("a")["href"] if value.select_one("a") else value_text
|
75
|
+
|
76
|
+
elif "ticker" in key_text:
|
77
|
+
result["stock_symbol"] = value_text
|
78
|
+
|
79
|
+
elif "revenue" in key_text:
|
80
|
+
result["revenue_range"] = value_text
|
81
|
+
|
82
|
+
elif "funding" in key_text:
|
83
|
+
result["funding"] = value_text
|
84
|
+
|
85
|
+
elif "employees" in key_text:
|
86
|
+
result["employee_count"] = value_text.split()[0]
|
87
|
+
result['number_employees'] = value_text
|
88
|
+
|
89
|
+
elif "founded" in key_text:
|
90
|
+
result["founded"] = value_text
|
91
|
+
|
92
|
+
elif "address" in key_text:
|
93
|
+
result["headquarters"] = value.select_one("a").text.strip() if value.select_one("a") else value_text
|
94
|
+
|
95
|
+
elif "phone" in key_text:
|
96
|
+
result["phone_number"] = value.select_one("a").text.strip() if value.select_one("a") else value_text
|
97
|
+
|
98
|
+
elif "industry" in key_text:
|
99
|
+
result["industry"] = [i.strip() for i in value_text.split(",")]
|
100
|
+
|
101
|
+
elif "keywords" in key_text:
|
102
|
+
result["keywords"] = [i.strip() for i in value_text.split(",")]
|
103
|
+
|
104
|
+
elif "sic" in key_text:
|
105
|
+
result["sic_code"] = self._extract_codes(value)
|
106
|
+
|
107
|
+
elif "naics" in key_text:
|
108
|
+
result["naics_code"] = self._extract_codes(value)
|
109
|
+
|
110
|
+
# Validate if any meaningful data was found
|
111
|
+
has_data = any([
|
112
|
+
result.get('company_name'),
|
113
|
+
result.get('logo_url'),
|
114
|
+
result.get('headquarters'),
|
115
|
+
result.get('phone_number'),
|
116
|
+
result.get('website'),
|
117
|
+
result.get('stock_symbol'),
|
118
|
+
result.get('naics_code'),
|
119
|
+
result.get('sic_code'),
|
120
|
+
result.get('employee_count'),
|
121
|
+
result.get('revenue_range'),
|
122
|
+
result.get('company_description')
|
123
|
+
])
|
124
|
+
result['scrape_status'] = 'success' if has_data else 'no_data'
|
125
|
+
return idx, result
|
126
|
+
|
127
|
+
except Exception as e:
|
128
|
+
self._logger.error(f"Error parsing LeadIQ data: {str(e)}")
|
129
|
+
result['scrape_status'] = f'error: {str(e)[:50]}'
|
130
|
+
return idx, result
|
@@ -0,0 +1,118 @@
|
|
1
|
+
import time
|
2
|
+
from bs4 import BeautifulSoup as bs
|
3
|
+
from selenium.webdriver.common.by import By
|
4
|
+
from selenium.webdriver.support import expected_conditions as EC
|
5
|
+
from selenium.webdriver.support.ui import WebDriverWait
|
6
|
+
from selenium.common.exceptions import (
|
7
|
+
TimeoutException,
|
8
|
+
NoSuchElementException,
|
9
|
+
WebDriverException
|
10
|
+
)
|
11
|
+
from .base import ScrapperBase
|
12
|
+
|
13
|
+
|
14
|
+
class ZoomInfoScrapper(ScrapperBase):
|
15
|
+
"""
|
16
|
+
ZoomInfo Model.
|
17
|
+
"""
|
18
|
+
domain: str = 'zoominfo.com'
|
19
|
+
search_term: str = 'site:zoominfo.com {} Overview'
|
20
|
+
keywords: list = [
|
21
|
+
' - Overview, News',
|
22
|
+
'Overview, News'
|
23
|
+
]
|
24
|
+
|
25
|
+
def define_search_term(self, term: str):
|
26
|
+
cleaned = term.strip().lower()
|
27
|
+
return self.search_term.format(cleaned)
|
28
|
+
|
29
|
+
async def get(self, url, headers: dict):
|
30
|
+
self.use_proxy = True
|
31
|
+
self._free_proxy = False
|
32
|
+
self.use_undetected = True
|
33
|
+
driver = await self.get_driver()
|
34
|
+
try:
|
35
|
+
try:
|
36
|
+
print('URL > ', url)
|
37
|
+
driver.get(url)
|
38
|
+
return driver.page_source
|
39
|
+
except TimeoutException:
|
40
|
+
return None
|
41
|
+
finally:
|
42
|
+
self.close_driver()
|
43
|
+
|
44
|
+
async def scrapping(self, document: bs, idx: int, row: dict):
|
45
|
+
"""
|
46
|
+
Scrape company information from Zoominfo.
|
47
|
+
Updates the existing row with new data from Zoominfo.
|
48
|
+
"""
|
49
|
+
# Start with the existing row data
|
50
|
+
result = row.copy()
|
51
|
+
|
52
|
+
# Actualizamos solo los campos específicos de Explorium
|
53
|
+
result.update({
|
54
|
+
'source_platform': 'zoominfo',
|
55
|
+
'scrape_status': 'pending',
|
56
|
+
'search_term': self.search_term_used
|
57
|
+
})
|
58
|
+
|
59
|
+
try:
|
60
|
+
|
61
|
+
# Extraer información de la compañía
|
62
|
+
result.update({
|
63
|
+
"company_name": document.select_one("h2#company-description-text-header") and document.select_one("h2#company-description-text-header").text.strip(),
|
64
|
+
"headquarters": document.select_one(".icon-label:-soup-contains('Headquarters') + .content") and document.select_one(".icon-label:-soup-contains('Headquarters') + .content").text.strip(),
|
65
|
+
"phone_number": document.select_one(".icon-label:-soup-contains('Phone Number') + .content") and document.select_one(".icon-label:-soup-contains('Phone Number') + .content").text.strip(),
|
66
|
+
"website": document.select_one(".icon-label:-soup-contains('Website') + a") and document.select_one(".icon-label:-soup-contains('Website') + a")["href"],
|
67
|
+
"revenue_range": document.select_one(".icon-label:-soup-contains('Revenue') + .content") and document.select_one(".icon-label:-soup-contains('Revenue') + .content").text.strip(),
|
68
|
+
"stock_symbol": document.select_one(".icon-label:-soup-contains('Stock Symbol') + .content") and document.select_one(".icon-label:-soup-contains('Stock Symbol') + .content").text.strip(),
|
69
|
+
"industry": [i.text.strip() for i in document.select("#company-chips-wrapper a")],
|
70
|
+
"company_description": document.select_one("#company-description-text-content .company-desc") and document.select_one("#company-description-text-content .company-desc").text.strip(),
|
71
|
+
}) # noqa
|
72
|
+
|
73
|
+
# Extracting NAICS and SIC codes
|
74
|
+
codes_section = document.select("#codes-wrapper .codes-content")
|
75
|
+
result["naics_code"], result["sic_code"] = None, None # Default to None
|
76
|
+
|
77
|
+
for code in codes_section:
|
78
|
+
text = code.text.strip()
|
79
|
+
if "NAICS Code" in text:
|
80
|
+
result["naics_code"] = text.replace("NAICS Code", "").strip()
|
81
|
+
elif "SIC Code" in text:
|
82
|
+
result["sic_code"] = text.replace("SIC Code", "").strip()
|
83
|
+
|
84
|
+
# Extract executives
|
85
|
+
result["executives"] = [
|
86
|
+
{
|
87
|
+
"name": exec.select_one(".person-name").text.strip(),
|
88
|
+
"title": exec.select_one(".job-title").text.strip(),
|
89
|
+
"profile_link": exec.select_one(".person-name")["href"]
|
90
|
+
}
|
91
|
+
for exec in document.select(".org-chart .person-right-content")
|
92
|
+
if exec.select_one(".person-name")
|
93
|
+
]
|
94
|
+
|
95
|
+
# Verificamos si se encontró algún dato
|
96
|
+
has_data = any([
|
97
|
+
result.get('company_name'),
|
98
|
+
result.get('headquarters'),
|
99
|
+
result.get('country'),
|
100
|
+
result.get('phone_number'),
|
101
|
+
result.get('website'),
|
102
|
+
result.get('stock_symbol'),
|
103
|
+
result.get('naics_code'),
|
104
|
+
result.get('sic_code'),
|
105
|
+
result.get('employee_count'),
|
106
|
+
result.get('revenue_range'),
|
107
|
+
result.get('company_description'),
|
108
|
+
])
|
109
|
+
|
110
|
+
# Establecemos el estado según si encontramos datos o no
|
111
|
+
result['scrape_status'] = 'success' if has_data else 'no_data'
|
112
|
+
|
113
|
+
# Siempre devolvemos el resultado, tenga datos o no
|
114
|
+
return idx, result
|
115
|
+
except Exception as e:
|
116
|
+
self._logger.error(f"Error parsing Zoominfo data: {str(e)}")
|
117
|
+
result['scrape_status'] = f'error: {str(e)[:50]}'
|
118
|
+
return idx, result
|