flowtask 5.8.4__cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowtask/__init__.py +93 -0
- flowtask/__main__.py +38 -0
- flowtask/bots/__init__.py +6 -0
- flowtask/bots/check.py +93 -0
- flowtask/bots/codebot.py +51 -0
- flowtask/components/ASPX.py +148 -0
- flowtask/components/AddDataset.py +352 -0
- flowtask/components/Amazon.py +523 -0
- flowtask/components/AutoTask.py +314 -0
- flowtask/components/Azure.py +80 -0
- flowtask/components/AzureUsers.py +106 -0
- flowtask/components/BaseAction.py +91 -0
- flowtask/components/BaseLoop.py +198 -0
- flowtask/components/BestBuy.py +800 -0
- flowtask/components/CSVToGCS.py +120 -0
- flowtask/components/CompanyScraper/__init__.py +1 -0
- flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
- flowtask/components/CompanyScraper/parsers/base.py +102 -0
- flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
- flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
- flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
- flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
- flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
- flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
- flowtask/components/CompanyScraper/scrapper.py +1054 -0
- flowtask/components/CopyTo.py +177 -0
- flowtask/components/CopyToBigQuery.py +243 -0
- flowtask/components/CopyToMongoDB.py +291 -0
- flowtask/components/CopyToPg.py +609 -0
- flowtask/components/CopyToRethink.py +207 -0
- flowtask/components/CreateGCSBucket.py +102 -0
- flowtask/components/CreateReport/CreateReport.py +228 -0
- flowtask/components/CreateReport/__init__.py +9 -0
- flowtask/components/CreateReport/charts/__init__.py +15 -0
- flowtask/components/CreateReport/charts/bar.py +51 -0
- flowtask/components/CreateReport/charts/base.py +66 -0
- flowtask/components/CreateReport/charts/pie.py +64 -0
- flowtask/components/CreateReport/utils.py +9 -0
- flowtask/components/CustomerSatisfaction.py +196 -0
- flowtask/components/DataInput.py +200 -0
- flowtask/components/DateList.py +255 -0
- flowtask/components/DbClient.py +163 -0
- flowtask/components/DialPad.py +146 -0
- flowtask/components/DocumentDBQuery.py +200 -0
- flowtask/components/DownloadFrom.py +371 -0
- flowtask/components/DownloadFromD2L.py +113 -0
- flowtask/components/DownloadFromFTP.py +181 -0
- flowtask/components/DownloadFromIMAP.py +315 -0
- flowtask/components/DownloadFromS3.py +198 -0
- flowtask/components/DownloadFromSFTP.py +265 -0
- flowtask/components/DownloadFromSharepoint.py +110 -0
- flowtask/components/DownloadFromSmartSheet.py +114 -0
- flowtask/components/DownloadS3File.py +229 -0
- flowtask/components/Dummy.py +59 -0
- flowtask/components/DuplicatePhoto.py +411 -0
- flowtask/components/EmployeeEvaluation.py +237 -0
- flowtask/components/ExecuteSQL.py +323 -0
- flowtask/components/ExtractHTML.py +178 -0
- flowtask/components/FileBase.py +178 -0
- flowtask/components/FileCopy.py +181 -0
- flowtask/components/FileDelete.py +82 -0
- flowtask/components/FileExists.py +146 -0
- flowtask/components/FileIteratorDelete.py +112 -0
- flowtask/components/FileList.py +194 -0
- flowtask/components/FileOpen.py +75 -0
- flowtask/components/FileRead.py +120 -0
- flowtask/components/FileRename.py +106 -0
- flowtask/components/FilterIf.py +284 -0
- flowtask/components/FilterRows/FilterRows.py +200 -0
- flowtask/components/FilterRows/__init__.py +10 -0
- flowtask/components/FilterRows/functions.py +4 -0
- flowtask/components/GCSToBigQuery.py +103 -0
- flowtask/components/GoogleA4.py +150 -0
- flowtask/components/GoogleGeoCoding.py +344 -0
- flowtask/components/GooglePlaces.py +315 -0
- flowtask/components/GoogleSearch.py +539 -0
- flowtask/components/HTTPClient.py +268 -0
- flowtask/components/ICIMS.py +146 -0
- flowtask/components/IF.py +179 -0
- flowtask/components/IcimsFolderCopy.py +173 -0
- flowtask/components/ImageFeatures/__init__.py +5 -0
- flowtask/components/ImageFeatures/process.py +233 -0
- flowtask/components/IteratorBase.py +251 -0
- flowtask/components/LangchainLoader/__init__.py +5 -0
- flowtask/components/LangchainLoader/loader.py +194 -0
- flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
- flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
- flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
- flowtask/components/LangchainLoader/loaders/docx.py +91 -0
- flowtask/components/LangchainLoader/loaders/html.py +119 -0
- flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
- flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
- flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
- flowtask/components/LangchainLoader/loaders/qa.py +67 -0
- flowtask/components/LangchainLoader/loaders/txt.py +55 -0
- flowtask/components/LeadIQ.py +650 -0
- flowtask/components/Loop.py +253 -0
- flowtask/components/Lowes.py +334 -0
- flowtask/components/MS365Usage.py +156 -0
- flowtask/components/MSTeamsMessages.py +320 -0
- flowtask/components/MarketClustering.py +1051 -0
- flowtask/components/MergeFiles.py +362 -0
- flowtask/components/MilvusOutput.py +87 -0
- flowtask/components/NearByStores.py +175 -0
- flowtask/components/NetworkNinja/__init__.py +6 -0
- flowtask/components/NetworkNinja/models/__init__.py +52 -0
- flowtask/components/NetworkNinja/models/abstract.py +177 -0
- flowtask/components/NetworkNinja/models/account.py +39 -0
- flowtask/components/NetworkNinja/models/client.py +19 -0
- flowtask/components/NetworkNinja/models/district.py +14 -0
- flowtask/components/NetworkNinja/models/events.py +101 -0
- flowtask/components/NetworkNinja/models/forms.py +499 -0
- flowtask/components/NetworkNinja/models/market.py +16 -0
- flowtask/components/NetworkNinja/models/organization.py +34 -0
- flowtask/components/NetworkNinja/models/photos.py +125 -0
- flowtask/components/NetworkNinja/models/project.py +44 -0
- flowtask/components/NetworkNinja/models/region.py +28 -0
- flowtask/components/NetworkNinja/models/store.py +203 -0
- flowtask/components/NetworkNinja/models/user.py +151 -0
- flowtask/components/NetworkNinja/router.py +854 -0
- flowtask/components/Odoo.py +175 -0
- flowtask/components/OdooInjector.py +192 -0
- flowtask/components/OpenFromXML.py +126 -0
- flowtask/components/OpenWeather.py +41 -0
- flowtask/components/OpenWithBase.py +616 -0
- flowtask/components/OpenWithPandas.py +715 -0
- flowtask/components/PGPDecrypt.py +199 -0
- flowtask/components/PandasIterator.py +187 -0
- flowtask/components/PandasToFile.py +189 -0
- flowtask/components/Paradox.py +339 -0
- flowtask/components/ParamIterator.py +117 -0
- flowtask/components/ParseHTML.py +84 -0
- flowtask/components/PlacerStores.py +249 -0
- flowtask/components/Pokemon.py +507 -0
- flowtask/components/PositiveBot.py +62 -0
- flowtask/components/PowerPointSlide.py +400 -0
- flowtask/components/PrintMessage.py +127 -0
- flowtask/components/ProductCompetitors/__init__.py +5 -0
- flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
- flowtask/components/ProductCompetitors/parsers/base.py +72 -0
- flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
- flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
- flowtask/components/ProductCompetitors/scrapper.py +155 -0
- flowtask/components/ProductCompliant.py +169 -0
- flowtask/components/ProductInfo/__init__.py +1 -0
- flowtask/components/ProductInfo/parsers/__init__.py +5 -0
- flowtask/components/ProductInfo/parsers/base.py +83 -0
- flowtask/components/ProductInfo/parsers/brother.py +97 -0
- flowtask/components/ProductInfo/parsers/canon.py +167 -0
- flowtask/components/ProductInfo/parsers/epson.py +118 -0
- flowtask/components/ProductInfo/parsers/hp.py +131 -0
- flowtask/components/ProductInfo/parsers/samsung.py +97 -0
- flowtask/components/ProductInfo/scraper.py +319 -0
- flowtask/components/ProductPricing.py +118 -0
- flowtask/components/QS.py +261 -0
- flowtask/components/QSBase.py +201 -0
- flowtask/components/QueryIterator.py +273 -0
- flowtask/components/QueryToInsert.py +327 -0
- flowtask/components/QueryToPandas.py +432 -0
- flowtask/components/RESTClient.py +195 -0
- flowtask/components/RethinkDBQuery.py +189 -0
- flowtask/components/Rsync.py +74 -0
- flowtask/components/RunSSH.py +59 -0
- flowtask/components/RunShell.py +71 -0
- flowtask/components/SalesForce.py +20 -0
- flowtask/components/SaveImageBank/__init__.py +257 -0
- flowtask/components/SchedulingVisits.py +592 -0
- flowtask/components/ScrapPage.py +216 -0
- flowtask/components/ScrapSearch.py +79 -0
- flowtask/components/SendNotify.py +257 -0
- flowtask/components/SentimentAnalysis.py +694 -0
- flowtask/components/ServiceScrapper/__init__.py +5 -0
- flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
- flowtask/components/ServiceScrapper/parsers/base.py +94 -0
- flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
- flowtask/components/ServiceScrapper/scrapper.py +199 -0
- flowtask/components/SetVariables.py +156 -0
- flowtask/components/SubTask.py +182 -0
- flowtask/components/SuiteCRM.py +48 -0
- flowtask/components/Switch.py +175 -0
- flowtask/components/TableBase.py +148 -0
- flowtask/components/TableDelete.py +312 -0
- flowtask/components/TableInput.py +143 -0
- flowtask/components/TableOutput/TableOutput.py +384 -0
- flowtask/components/TableOutput/__init__.py +3 -0
- flowtask/components/TableSchema.py +534 -0
- flowtask/components/Target.py +223 -0
- flowtask/components/ThumbnailGenerator.py +156 -0
- flowtask/components/ToPandas.py +67 -0
- flowtask/components/TransformRows/TransformRows.py +507 -0
- flowtask/components/TransformRows/__init__.py +9 -0
- flowtask/components/TransformRows/functions.py +559 -0
- flowtask/components/TransposeRows.py +176 -0
- flowtask/components/UPCDatabase.py +86 -0
- flowtask/components/UnGzip.py +171 -0
- flowtask/components/Uncompress.py +172 -0
- flowtask/components/UniqueRows.py +126 -0
- flowtask/components/Unzip.py +107 -0
- flowtask/components/UpdateOperationalVars.py +147 -0
- flowtask/components/UploadTo.py +299 -0
- flowtask/components/UploadToS3.py +136 -0
- flowtask/components/UploadToSFTP.py +160 -0
- flowtask/components/UploadToSharepoint.py +205 -0
- flowtask/components/UserFunc.py +122 -0
- flowtask/components/VivaTracker.py +140 -0
- flowtask/components/WSDLClient.py +123 -0
- flowtask/components/Wait.py +18 -0
- flowtask/components/Walmart.py +199 -0
- flowtask/components/Workplace.py +134 -0
- flowtask/components/XMLToPandas.py +267 -0
- flowtask/components/Zammad/__init__.py +41 -0
- flowtask/components/Zammad/models.py +0 -0
- flowtask/components/ZoomInfoScraper.py +409 -0
- flowtask/components/__init__.py +104 -0
- flowtask/components/abstract.py +18 -0
- flowtask/components/flow.py +530 -0
- flowtask/components/google.py +335 -0
- flowtask/components/group.py +221 -0
- flowtask/components/py.typed +0 -0
- flowtask/components/reviewscrap.py +132 -0
- flowtask/components/tAutoincrement.py +117 -0
- flowtask/components/tConcat.py +109 -0
- flowtask/components/tExplode.py +119 -0
- flowtask/components/tFilter.py +184 -0
- flowtask/components/tGroup.py +236 -0
- flowtask/components/tJoin.py +270 -0
- flowtask/components/tMap/__init__.py +9 -0
- flowtask/components/tMap/functions.py +54 -0
- flowtask/components/tMap/tMap.py +450 -0
- flowtask/components/tMelt.py +112 -0
- flowtask/components/tMerge.py +114 -0
- flowtask/components/tOrder.py +93 -0
- flowtask/components/tPandas.py +94 -0
- flowtask/components/tPivot.py +71 -0
- flowtask/components/tPluckCols.py +76 -0
- flowtask/components/tUnnest.py +82 -0
- flowtask/components/user.py +401 -0
- flowtask/conf.py +457 -0
- flowtask/download.py +102 -0
- flowtask/events/__init__.py +11 -0
- flowtask/events/events/__init__.py +20 -0
- flowtask/events/events/abstract.py +95 -0
- flowtask/events/events/alerts/__init__.py +362 -0
- flowtask/events/events/alerts/colfunctions.py +131 -0
- flowtask/events/events/alerts/functions.py +158 -0
- flowtask/events/events/dummy.py +12 -0
- flowtask/events/events/exec.py +124 -0
- flowtask/events/events/file/__init__.py +7 -0
- flowtask/events/events/file/base.py +51 -0
- flowtask/events/events/file/copy.py +23 -0
- flowtask/events/events/file/delete.py +16 -0
- flowtask/events/events/interfaces/__init__.py +9 -0
- flowtask/events/events/interfaces/client.py +67 -0
- flowtask/events/events/interfaces/credentials.py +28 -0
- flowtask/events/events/interfaces/notifications.py +58 -0
- flowtask/events/events/jira.py +122 -0
- flowtask/events/events/log.py +26 -0
- flowtask/events/events/logerr.py +52 -0
- flowtask/events/events/notify.py +59 -0
- flowtask/events/events/notify_event.py +160 -0
- flowtask/events/events/publish.py +54 -0
- flowtask/events/events/sendfile.py +104 -0
- flowtask/events/events/task.py +97 -0
- flowtask/events/events/teams.py +98 -0
- flowtask/events/events/webhook.py +58 -0
- flowtask/events/manager.py +287 -0
- flowtask/exceptions.c +39393 -0
- flowtask/exceptions.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/extensions/__init__.py +3 -0
- flowtask/extensions/abstract.py +82 -0
- flowtask/extensions/logging/__init__.py +65 -0
- flowtask/hooks/__init__.py +9 -0
- flowtask/hooks/actions/__init__.py +22 -0
- flowtask/hooks/actions/abstract.py +66 -0
- flowtask/hooks/actions/dummy.py +23 -0
- flowtask/hooks/actions/jira.py +74 -0
- flowtask/hooks/actions/rest.py +320 -0
- flowtask/hooks/actions/sampledata.py +37 -0
- flowtask/hooks/actions/sensor.py +23 -0
- flowtask/hooks/actions/task.py +9 -0
- flowtask/hooks/actions/ticket.py +37 -0
- flowtask/hooks/actions/zammad.py +55 -0
- flowtask/hooks/hook.py +62 -0
- flowtask/hooks/models.py +17 -0
- flowtask/hooks/service.py +187 -0
- flowtask/hooks/step.py +91 -0
- flowtask/hooks/types/__init__.py +23 -0
- flowtask/hooks/types/base.py +129 -0
- flowtask/hooks/types/brokers/__init__.py +11 -0
- flowtask/hooks/types/brokers/base.py +54 -0
- flowtask/hooks/types/brokers/mqtt.py +35 -0
- flowtask/hooks/types/brokers/rabbitmq.py +82 -0
- flowtask/hooks/types/brokers/redis.py +83 -0
- flowtask/hooks/types/brokers/sqs.py +44 -0
- flowtask/hooks/types/fs.py +232 -0
- flowtask/hooks/types/http.py +49 -0
- flowtask/hooks/types/imap.py +200 -0
- flowtask/hooks/types/jira.py +279 -0
- flowtask/hooks/types/mail.py +205 -0
- flowtask/hooks/types/postgres.py +98 -0
- flowtask/hooks/types/responses/__init__.py +8 -0
- flowtask/hooks/types/responses/base.py +5 -0
- flowtask/hooks/types/sharepoint.py +288 -0
- flowtask/hooks/types/ssh.py +141 -0
- flowtask/hooks/types/tagged.py +59 -0
- flowtask/hooks/types/upload.py +85 -0
- flowtask/hooks/types/watch.py +71 -0
- flowtask/hooks/types/web.py +36 -0
- flowtask/interfaces/AzureClient.py +137 -0
- flowtask/interfaces/AzureGraph.py +839 -0
- flowtask/interfaces/Boto3Client.py +326 -0
- flowtask/interfaces/DropboxClient.py +173 -0
- flowtask/interfaces/ExcelHandler.py +94 -0
- flowtask/interfaces/FTPClient.py +131 -0
- flowtask/interfaces/GoogleCalendar.py +201 -0
- flowtask/interfaces/GoogleClient.py +133 -0
- flowtask/interfaces/GoogleDrive.py +127 -0
- flowtask/interfaces/GoogleGCS.py +89 -0
- flowtask/interfaces/GoogleGeocoding.py +93 -0
- flowtask/interfaces/GoogleLang.py +114 -0
- flowtask/interfaces/GooglePub.py +61 -0
- flowtask/interfaces/GoogleSheet.py +68 -0
- flowtask/interfaces/IMAPClient.py +137 -0
- flowtask/interfaces/O365Calendar.py +113 -0
- flowtask/interfaces/O365Client.py +220 -0
- flowtask/interfaces/OneDrive.py +284 -0
- flowtask/interfaces/Outlook.py +155 -0
- flowtask/interfaces/ParrotBot.py +130 -0
- flowtask/interfaces/SSHClient.py +378 -0
- flowtask/interfaces/Sharepoint.py +496 -0
- flowtask/interfaces/__init__.py +36 -0
- flowtask/interfaces/azureauth.py +119 -0
- flowtask/interfaces/cache.py +201 -0
- flowtask/interfaces/client.py +82 -0
- flowtask/interfaces/compress.py +525 -0
- flowtask/interfaces/credentials.py +124 -0
- flowtask/interfaces/d2l.py +239 -0
- flowtask/interfaces/databases/__init__.py +5 -0
- flowtask/interfaces/databases/db.py +223 -0
- flowtask/interfaces/databases/documentdb.py +55 -0
- flowtask/interfaces/databases/rethink.py +39 -0
- flowtask/interfaces/dataframes/__init__.py +11 -0
- flowtask/interfaces/dataframes/abstract.py +21 -0
- flowtask/interfaces/dataframes/arrow.py +71 -0
- flowtask/interfaces/dataframes/dt.py +69 -0
- flowtask/interfaces/dataframes/pandas.py +167 -0
- flowtask/interfaces/dataframes/polars.py +60 -0
- flowtask/interfaces/db.py +263 -0
- flowtask/interfaces/env.py +46 -0
- flowtask/interfaces/func.py +137 -0
- flowtask/interfaces/http.py +1780 -0
- flowtask/interfaces/locale.py +40 -0
- flowtask/interfaces/log.py +75 -0
- flowtask/interfaces/mask.py +143 -0
- flowtask/interfaces/notification.py +154 -0
- flowtask/interfaces/playwright.py +339 -0
- flowtask/interfaces/powerpoint.py +368 -0
- flowtask/interfaces/py.typed +0 -0
- flowtask/interfaces/qs.py +376 -0
- flowtask/interfaces/result.py +87 -0
- flowtask/interfaces/selenium_service.py +779 -0
- flowtask/interfaces/smartsheet.py +154 -0
- flowtask/interfaces/stat.py +39 -0
- flowtask/interfaces/task.py +96 -0
- flowtask/interfaces/template.py +118 -0
- flowtask/interfaces/vectorstores/__init__.py +1 -0
- flowtask/interfaces/vectorstores/abstract.py +133 -0
- flowtask/interfaces/vectorstores/milvus.py +669 -0
- flowtask/interfaces/zammad.py +107 -0
- flowtask/models.py +193 -0
- flowtask/parsers/__init__.py +15 -0
- flowtask/parsers/_yaml.c +11978 -0
- flowtask/parsers/_yaml.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/argparser.py +235 -0
- flowtask/parsers/base.c +15155 -0
- flowtask/parsers/base.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/json.c +11968 -0
- flowtask/parsers/json.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/maps.py +49 -0
- flowtask/parsers/toml.c +11968 -0
- flowtask/parsers/toml.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/plugins/__init__.py +16 -0
- flowtask/plugins/components/__init__.py +0 -0
- flowtask/plugins/handler/__init__.py +45 -0
- flowtask/plugins/importer.py +31 -0
- flowtask/plugins/sources/__init__.py +0 -0
- flowtask/runner.py +283 -0
- flowtask/scheduler/__init__.py +9 -0
- flowtask/scheduler/functions.py +493 -0
- flowtask/scheduler/handlers/__init__.py +8 -0
- flowtask/scheduler/handlers/manager.py +504 -0
- flowtask/scheduler/handlers/models.py +58 -0
- flowtask/scheduler/handlers/service.py +72 -0
- flowtask/scheduler/notifications.py +65 -0
- flowtask/scheduler/scheduler.py +993 -0
- flowtask/services/__init__.py +0 -0
- flowtask/services/bots/__init__.py +0 -0
- flowtask/services/bots/telegram.py +264 -0
- flowtask/services/files/__init__.py +11 -0
- flowtask/services/files/manager.py +522 -0
- flowtask/services/files/model.py +37 -0
- flowtask/services/files/service.py +767 -0
- flowtask/services/jira/__init__.py +3 -0
- flowtask/services/jira/jira_actions.py +191 -0
- flowtask/services/tasks/__init__.py +13 -0
- flowtask/services/tasks/launcher.py +213 -0
- flowtask/services/tasks/manager.py +323 -0
- flowtask/services/tasks/service.py +275 -0
- flowtask/services/tasks/task_manager.py +376 -0
- flowtask/services/tasks/tasks.py +155 -0
- flowtask/storages/__init__.py +16 -0
- flowtask/storages/exceptions.py +12 -0
- flowtask/storages/files/__init__.py +8 -0
- flowtask/storages/files/abstract.py +29 -0
- flowtask/storages/files/filesystem.py +66 -0
- flowtask/storages/tasks/__init__.py +19 -0
- flowtask/storages/tasks/abstract.py +26 -0
- flowtask/storages/tasks/database.py +33 -0
- flowtask/storages/tasks/filesystem.py +108 -0
- flowtask/storages/tasks/github.py +119 -0
- flowtask/storages/tasks/memory.py +45 -0
- flowtask/storages/tasks/row.py +25 -0
- flowtask/tasks/__init__.py +0 -0
- flowtask/tasks/abstract.py +526 -0
- flowtask/tasks/command.py +118 -0
- flowtask/tasks/pile.py +486 -0
- flowtask/tasks/py.typed +0 -0
- flowtask/tasks/task.py +778 -0
- flowtask/template/__init__.py +161 -0
- flowtask/tests.py +257 -0
- flowtask/types/__init__.py +8 -0
- flowtask/types/typedefs.c +11347 -0
- flowtask/types/typedefs.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/__init__.py +24 -0
- flowtask/utils/constants.py +117 -0
- flowtask/utils/encoders.py +21 -0
- flowtask/utils/executor.py +112 -0
- flowtask/utils/functions.cpp +14280 -0
- flowtask/utils/functions.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/json.cpp +13349 -0
- flowtask/utils/json.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/mail.py +63 -0
- flowtask/utils/parseqs.c +13324 -0
- flowtask/utils/parserqs.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/stats.py +308 -0
- flowtask/utils/transformations.py +74 -0
- flowtask/utils/uv.py +12 -0
- flowtask/utils/validators.py +97 -0
- flowtask/version.py +11 -0
- flowtask-5.8.4.dist-info/LICENSE +201 -0
- flowtask-5.8.4.dist-info/METADATA +209 -0
- flowtask-5.8.4.dist-info/RECORD +470 -0
- flowtask-5.8.4.dist-info/WHEEL +6 -0
- flowtask-5.8.4.dist-info/entry_points.txt +3 -0
- flowtask-5.8.4.dist-info/top_level.txt +2 -0
- plugins/components/CreateQR.py +39 -0
- plugins/components/TestComponent.py +28 -0
- plugins/components/Use1.py +13 -0
- plugins/components/Workplace.py +117 -0
- plugins/components/__init__.py +3 -0
- plugins/sources/__init__.py +0 -0
- plugins/sources/get_populartimes.py +78 -0
- plugins/sources/google.py +150 -0
- plugins/sources/hubspot.py +679 -0
- plugins/sources/icims.py +679 -0
- plugins/sources/mobileinsight.py +501 -0
- plugins/sources/newrelic.py +262 -0
- plugins/sources/uap.py +268 -0
- plugins/sources/venu.py +244 -0
- plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,539 @@
|
|
1
|
+
from collections.abc import Callable
|
2
|
+
import asyncio
|
3
|
+
import logging
|
4
|
+
from typing import Optional, List, Dict, Any
|
5
|
+
import pandas as pd
|
6
|
+
from googleapiclient.discovery import build
|
7
|
+
from googleapiclient.errors import HttpError
|
8
|
+
from fuzzywuzzy import fuzz
|
9
|
+
from ..conf import (
|
10
|
+
GOOGLE_SEARCH_ENGINE_ID,
|
11
|
+
GOOGLE_SEARCH_API_KEY,
|
12
|
+
OXYLABS_USERNAME,
|
13
|
+
OXYLABS_PASSWORD,
|
14
|
+
OXYLABS_ENDPOINT
|
15
|
+
)
|
16
|
+
from ..exceptions import ComponentError
|
17
|
+
from .google import GoogleBase
|
18
|
+
from bs4 import BeautifulSoup
|
19
|
+
import random
|
20
|
+
from urllib.parse import quote
|
21
|
+
import time
|
22
|
+
from duckduckgo_search import DDGS
|
23
|
+
from ..interfaces.http import ua
|
24
|
+
|
25
|
+
|
26
|
+
class GoogleSearch(GoogleBase):
|
27
|
+
"""
|
28
|
+
Google Custom Search Component
|
29
|
+
|
30
|
+
Overview:
|
31
|
+
|
32
|
+
This component performs Google Custom Search queries using the Google Custom Search API.
|
33
|
+
It can search for specific queries and return results including URLs, titles, and snippets.
|
34
|
+
The component can receive search terms either from a previous component or a list of terms
|
35
|
+
specified in the configuration.
|
36
|
+
|
37
|
+
.. table:: Properties
|
38
|
+
:widths: auto
|
39
|
+
|
40
|
+
+-----------------------+----------+------------------------------------------------------------------------------------------------------+
|
41
|
+
| Name | Required | Description |
|
42
|
+
+-----------------------+----------+------------------------------------------------------------------------------------------------------+
|
43
|
+
| terms (list) | No | List of search terms to use. Required if no previous component provided |
|
44
|
+
+-----------------------+----------+------------------------------------------------------------------------------------------------------+
|
45
|
+
| column (str) | No | Name of the column in the DataFrame when using a previous component |
|
46
|
+
+-----------------------+----------+------------------------------------------------------------------------------------------------------+
|
47
|
+
| site (str) | No | Optional site restriction for the search (e.g., 'site:example.com') |
|
48
|
+
+-----------------------+----------+------------------------------------------------------------------------------------------------------+
|
49
|
+
| max_results (int) | No | Maximum number of results to return per search (default: 1) |
|
50
|
+
+-----------------------+----------+------------------------------------------------------------------------------------------------------+
|
51
|
+
|
52
|
+
Return:
|
53
|
+
|
54
|
+
The component returns a DataFrame with columns:
|
55
|
+
'search_term', 'search_url', 'search_title', 'search_snippet' containing the search results.
|
56
|
+
"""
|
57
|
+
|
58
|
+
_type: str = 'search' # Define type at class level
|
59
|
+
|
60
|
+
def __init__(
|
61
|
+
self,
|
62
|
+
loop: asyncio.AbstractEventLoop = None,
|
63
|
+
job: Callable = None,
|
64
|
+
stat: Callable = None,
|
65
|
+
**kwargs,
|
66
|
+
) -> None:
|
67
|
+
self.site: Optional[str] = kwargs.get('site', 'leadiq.com') # Valor por defecto
|
68
|
+
self.terms: Optional[List[str]] = kwargs.get('terms', None)
|
69
|
+
self.max_results: int = kwargs.get('max_results', 10)
|
70
|
+
self.engine = kwargs.get('engine', 'google')
|
71
|
+
self.fallback_search: bool = kwargs.get('fallback_search', False) # Nueva opción
|
72
|
+
self.api_key = GOOGLE_SEARCH_API_KEY
|
73
|
+
self._search_service = None
|
74
|
+
self._ddg_service = None
|
75
|
+
self._semaphore = asyncio.Semaphore(30)
|
76
|
+
self._last_request = 0
|
77
|
+
# Configuración de proxy
|
78
|
+
self.use_proxy: bool = True
|
79
|
+
self._free_proxy: bool = False
|
80
|
+
self.headers: dict = {
|
81
|
+
"Accept-Language": "en-US,en;q=0.5",
|
82
|
+
"Accept-Encoding": "gzip, deflate",
|
83
|
+
"DNT": "1",
|
84
|
+
"Connection": "keep-alive",
|
85
|
+
"Upgrade-Insecure-Requests": "1",
|
86
|
+
"User-Agent": random.choice(ua)
|
87
|
+
}
|
88
|
+
super().__init__(loop=loop, job=job, stat=stat, **kwargs)
|
89
|
+
|
90
|
+
def get_search_service(self):
|
91
|
+
"""Get the Google Custom Search service."""
|
92
|
+
if not self._search_service:
|
93
|
+
self._search_service = build(
|
94
|
+
"customsearch",
|
95
|
+
"v1",
|
96
|
+
developerKey=self.api_key
|
97
|
+
)
|
98
|
+
return self._search_service
|
99
|
+
|
100
|
+
async def get_ddg_service(self):
|
101
|
+
"""Get the DuckDuckGo search service."""
|
102
|
+
try:
|
103
|
+
if self._ddg_service:
|
104
|
+
# Si ya existe una instancia, cerrarla primero
|
105
|
+
self._ddg_service = None
|
106
|
+
|
107
|
+
proxy = None
|
108
|
+
if self.use_proxy:
|
109
|
+
proxies = await self.get_proxies()
|
110
|
+
if proxies and len(proxies) > 0:
|
111
|
+
proxy = f"http://{OXYLABS_USERNAME}:{OXYLABS_PASSWORD}@{OXYLABS_ENDPOINT}"
|
112
|
+
self._logger.info(f"Using proxy: {proxy}")
|
113
|
+
else:
|
114
|
+
self._logger.warning("No proxies available, continuing without proxy")
|
115
|
+
|
116
|
+
self._ddg_service = DDGS(proxy=proxy if proxy else None)
|
117
|
+
return self._ddg_service
|
118
|
+
|
119
|
+
except Exception as e:
|
120
|
+
self._logger.error(f"Error creating DDGS instance: {str(e)}")
|
121
|
+
return None
|
122
|
+
|
123
|
+
async def start(self, **kwargs) -> bool:
|
124
|
+
"""Initialize the component and validate required parameters."""
|
125
|
+
self._counter = 0
|
126
|
+
self._evaluate_input() # Use GoogleBase's input evaluation
|
127
|
+
|
128
|
+
# Handle terms list if no previous component
|
129
|
+
if not self.previous and self.terms:
|
130
|
+
if not isinstance(self.terms, list):
|
131
|
+
raise ComponentError(
|
132
|
+
"Terms must be a list of strings"
|
133
|
+
)
|
134
|
+
# Create DataFrame from terms list
|
135
|
+
self.data = pd.DataFrame({'search_term': self.terms})
|
136
|
+
self.column = 'search_term'
|
137
|
+
|
138
|
+
if not hasattr(self, 'column'):
|
139
|
+
raise RuntimeError(
|
140
|
+
'Column attribute is required'
|
141
|
+
)
|
142
|
+
|
143
|
+
if not isinstance(self.data, pd.DataFrame):
|
144
|
+
raise ComponentError(
|
145
|
+
"Incompatible Pandas DataFrame"
|
146
|
+
)
|
147
|
+
|
148
|
+
if not self.api_key:
|
149
|
+
raise ComponentError(
|
150
|
+
"Google Search API Key is missing"
|
151
|
+
)
|
152
|
+
|
153
|
+
if not GOOGLE_SEARCH_ENGINE_ID:
|
154
|
+
raise ComponentError(
|
155
|
+
"Google Custom Search Engine ID is missing"
|
156
|
+
)
|
157
|
+
|
158
|
+
# Initialize result columns if they don't exist
|
159
|
+
for col in ['search_url', 'search_title', 'search_snippet']:
|
160
|
+
if col not in self.data.columns:
|
161
|
+
self.data[col] = None
|
162
|
+
|
163
|
+
return True
|
164
|
+
|
165
|
+
def _build_query(self, query: str) -> str:
|
166
|
+
"""Build the search query with optional site restriction."""
|
167
|
+
|
168
|
+
if self.site:
|
169
|
+
return f"site:{self.site} {query}"
|
170
|
+
return query
|
171
|
+
|
172
|
+
def _standardize_name(self, text: str) -> str:
|
173
|
+
"""Estandariza el formato del texto: lowercase y guiones en lugar de espacios."""
|
174
|
+
# Primero limpiamos caracteres especiales y espacios extras
|
175
|
+
cleaned = text.strip().lower()
|
176
|
+
# Reemplazamos espacios por guiones
|
177
|
+
return cleaned.replace(' ', '-')
|
178
|
+
|
179
|
+
def _clean_company_name(self, title: str) -> str:
|
180
|
+
"""Extrae y estandariza el nombre de la compañía del título."""
|
181
|
+
decorators = [
|
182
|
+
'Email Formats & Email Address',
|
183
|
+
'Company Overview',
|
184
|
+
'Employee Directory',
|
185
|
+
'Contact Details & Competitors',
|
186
|
+
'Email Format'
|
187
|
+
]
|
188
|
+
|
189
|
+
# Tomar la parte antes del primer decorador que encuentre
|
190
|
+
clean_name = title
|
191
|
+
for decorator in decorators:
|
192
|
+
if decorator.lower() in title.lower(): # Hacer la comparación case-insensitive
|
193
|
+
clean_name = title.split(decorator)[0]
|
194
|
+
break
|
195
|
+
|
196
|
+
return self._standardize_name(clean_name)
|
197
|
+
|
198
|
+
async def search(self, idx: int, row: pd.Series) -> tuple[int, Optional[Dict[str, Any]]]:
|
199
|
+
"""Perform search using selected engine and return results."""
|
200
|
+
async with self._semaphore:
|
201
|
+
try:
|
202
|
+
search_term = row[self.column]
|
203
|
+
|
204
|
+
if pd.isna(search_term):
|
205
|
+
return idx, {
|
206
|
+
'search_url': None,
|
207
|
+
'search_term': search_term,
|
208
|
+
'search_status': 'invalid_term',
|
209
|
+
'search_engine': self.engine,
|
210
|
+
'is_best_match': False,
|
211
|
+
'match_score': 0
|
212
|
+
}
|
213
|
+
|
214
|
+
# Primer intento con el motor seleccionado
|
215
|
+
try:
|
216
|
+
if self.engine == 'duckduckgo':
|
217
|
+
results = await self._search_with_ddg(search_term)
|
218
|
+
else:
|
219
|
+
results = await self._search_with_google(search_term)
|
220
|
+
except Exception as e:
|
221
|
+
if not self.fallback_search:
|
222
|
+
self._logger.error(f"Search failed with {self.engine} for {search_term}: {str(e)}")
|
223
|
+
return idx, {
|
224
|
+
'search_url': None,
|
225
|
+
'search_term': search_term,
|
226
|
+
'search_status': f'error_{self.engine}',
|
227
|
+
'search_engine': self.engine,
|
228
|
+
'is_best_match': False,
|
229
|
+
'match_score': 0
|
230
|
+
}
|
231
|
+
|
232
|
+
self._logger.warning(f"Error with {self.engine} for {search_term}: {str(e)}, trying alternative engine...")
|
233
|
+
try:
|
234
|
+
if self.engine == 'duckduckgo':
|
235
|
+
results = await self._search_with_google(search_term)
|
236
|
+
else:
|
237
|
+
results = await self._search_with_ddg(search_term)
|
238
|
+
except Exception as e2:
|
239
|
+
self._logger.error(f"Both engines failed for {search_term}. Errors: {str(e)}, {str(e2)}")
|
240
|
+
return idx, {
|
241
|
+
'search_url': None,
|
242
|
+
'search_term': search_term,
|
243
|
+
'search_status': f'error_both_engines',
|
244
|
+
'search_engine': 'both',
|
245
|
+
'is_best_match': False,
|
246
|
+
'match_score': 0
|
247
|
+
}
|
248
|
+
|
249
|
+
# Si hay resultados pero el score es bajo, intentar con el otro motor solo si fallback_search está activo
|
250
|
+
if self.fallback_search and (not results or (isinstance(results, list) and len(results) > 0 and results[0]['match_score'] < 60)):
|
251
|
+
self._logger.info(f"No good results with {self.engine} for {search_term}, trying alternative engine...")
|
252
|
+
if self.engine == 'duckduckgo':
|
253
|
+
results = await self._search_with_google(search_term)
|
254
|
+
else:
|
255
|
+
results = await self._search_with_ddg(search_term)
|
256
|
+
|
257
|
+
if results:
|
258
|
+
self._counter += 1
|
259
|
+
return idx, results
|
260
|
+
|
261
|
+
return idx, {
|
262
|
+
'search_url': None,
|
263
|
+
'search_term': search_term,
|
264
|
+
'search_status': 'no_results',
|
265
|
+
'search_engine': self.engine if not self.fallback_search else 'both',
|
266
|
+
'is_best_match': False,
|
267
|
+
'match_score': 0
|
268
|
+
}
|
269
|
+
|
270
|
+
except Exception as e:
|
271
|
+
self._logger.error(f"Unexpected error for {search_term}: {str(e)}")
|
272
|
+
return idx, {
|
273
|
+
'search_url': None,
|
274
|
+
'search_term': search_term,
|
275
|
+
'search_status': f'error_unexpected',
|
276
|
+
'search_engine': self.engine if not self.fallback_search else 'both',
|
277
|
+
'is_best_match': False,
|
278
|
+
'match_score': 0
|
279
|
+
}
|
280
|
+
|
281
|
+
async def run(self):
|
282
|
+
"""Execute searches for each query in the DataFrame."""
|
283
|
+
tasks = [
|
284
|
+
self.search(idx, row) for idx, row in self.data.iterrows()
|
285
|
+
]
|
286
|
+
results = await asyncio.gather(*tasks)
|
287
|
+
|
288
|
+
flattened_results = []
|
289
|
+
for idx, result_list in results:
|
290
|
+
if isinstance(result_list, list) and result_list:
|
291
|
+
# Ordenar por match_score
|
292
|
+
result_list.sort(key=lambda x: -x['match_score'])
|
293
|
+
best_result = result_list[0]
|
294
|
+
|
295
|
+
# Mantener la URL incluso si el match es bajo
|
296
|
+
if best_result['match_score'] < 60:
|
297
|
+
flattened_results.append({
|
298
|
+
'search_url': best_result['search_url'], # Mantener la URL
|
299
|
+
'search_term': best_result['search_term'],
|
300
|
+
'search_status': 'low_match', # Cambiar el status para indicar match bajo
|
301
|
+
'search_engine': best_result['search_engine'],
|
302
|
+
'match_score': best_result['match_score']
|
303
|
+
})
|
304
|
+
else:
|
305
|
+
flattened_results.append(best_result)
|
306
|
+
else:
|
307
|
+
flattened_results.append(result_list)
|
308
|
+
|
309
|
+
df = pd.DataFrame(flattened_results)
|
310
|
+
|
311
|
+
# Seleccionar solo las columnas necesarias
|
312
|
+
columns_to_keep = ['search_url', 'search_term', 'search_status', 'search_engine', 'match_score']
|
313
|
+
df = df[columns_to_keep]
|
314
|
+
|
315
|
+
self.add_metric("SEARCHES_COMPLETED", self._counter)
|
316
|
+
|
317
|
+
if self._debug is True:
|
318
|
+
print(df)
|
319
|
+
print("::: Printing Column Information === ")
|
320
|
+
for column, t in df.dtypes.items():
|
321
|
+
print(column, "->", t, "->", df[column].iloc[0])
|
322
|
+
|
323
|
+
self._result = df
|
324
|
+
return self._result
|
325
|
+
|
326
|
+
async def close(self):
|
327
|
+
"""Clean up resources."""
|
328
|
+
if self._ddg_service:
|
329
|
+
try:
|
330
|
+
# Cerrar la instancia de DDGS si existe
|
331
|
+
self._ddg_service = None
|
332
|
+
except:
|
333
|
+
pass
|
334
|
+
|
335
|
+
if self._search_service:
|
336
|
+
try:
|
337
|
+
# Cerrar el servicio de Google
|
338
|
+
self._search_service = None
|
339
|
+
except:
|
340
|
+
pass
|
341
|
+
|
342
|
+
return True
|
343
|
+
|
344
|
+
async def search_company(self, idx: int, row: pd.Series) -> tuple[int, Optional[str]]:
|
345
|
+
async with self.semaphore:
|
346
|
+
try:
|
347
|
+
company_name = row[self.company_column]
|
348
|
+
if pd.isna(company_name):
|
349
|
+
return idx, None
|
350
|
+
|
351
|
+
query = f'site:{self.site} {company_name}'
|
352
|
+
self._logger.notice(f"Searching for: {query}")
|
353
|
+
|
354
|
+
url = f'https://www.google.com/search?q={quote(query)}'
|
355
|
+
await self.get_page(url)
|
356
|
+
await asyncio.sleep(random.uniform(2, 5))
|
357
|
+
|
358
|
+
content = self._driver.page_source
|
359
|
+
soup = BeautifulSoup(content, 'html.parser')
|
360
|
+
|
361
|
+
# Find first LeadIQ URL
|
362
|
+
search_results = soup.find_all('div', class_='g')
|
363
|
+
for result in search_results:
|
364
|
+
link = result.find('a')
|
365
|
+
if not link:
|
366
|
+
continue
|
367
|
+
|
368
|
+
url = link.get('href', '')
|
369
|
+
if 'leadiq.com/c/' in url:
|
370
|
+
# Remove /email-format if present
|
371
|
+
url = url.split('/email-format')[0]
|
372
|
+
self._logger.info(f"Found LeadIQ URL for {company_name}: {url}")
|
373
|
+
self._counter += 1
|
374
|
+
return idx, url
|
375
|
+
|
376
|
+
self._logger.warning(f"No LeadIQ URL found for {company_name}")
|
377
|
+
return idx, None
|
378
|
+
|
379
|
+
except Exception as e:
|
380
|
+
self._logger.error(f"Error searching for {company_name}: {str(e)}")
|
381
|
+
return idx, None
|
382
|
+
|
383
|
+
async def _search_with_ddg(self, search_term: str) -> List[Dict]:
|
384
|
+
"""Perform search using DuckDuckGo."""
|
385
|
+
max_retries = 3
|
386
|
+
base_delay = 5 # segundos
|
387
|
+
|
388
|
+
for attempt in range(max_retries):
|
389
|
+
try:
|
390
|
+
# Añadir delay entre búsquedas
|
391
|
+
now = time.time()
|
392
|
+
elapsed = now - self._last_request
|
393
|
+
if elapsed < 2.0: # Mínimo 2 segundos entre búsquedas
|
394
|
+
await asyncio.sleep(2.0 - elapsed)
|
395
|
+
|
396
|
+
original_term = search_term
|
397
|
+
standardized_term = self._standardize_name(search_term)
|
398
|
+
query = f"site:{self.site or 'leadiq.com'} {standardized_term}"
|
399
|
+
self._logger.info(f"DDG Query: {query}")
|
400
|
+
|
401
|
+
# Rotar proxy en cada intento
|
402
|
+
if self.use_proxy:
|
403
|
+
proxies = await self.get_proxies()
|
404
|
+
if proxies:
|
405
|
+
proxy = f"http://{OXYLABS_USERNAME}:{OXYLABS_PASSWORD}@{OXYLABS_ENDPOINT}"
|
406
|
+
self._logger.info(f"Using proxy (attempt {attempt + 1}): {proxy}")
|
407
|
+
self._ddg_service = DDGS(proxy=proxy)
|
408
|
+
|
409
|
+
results = list(self._ddg_service.text(
|
410
|
+
keywords=query,
|
411
|
+
region="wt-wt",
|
412
|
+
max_results=self.max_results,
|
413
|
+
backend='html'
|
414
|
+
))
|
415
|
+
|
416
|
+
self._last_request = time.time()
|
417
|
+
|
418
|
+
# Verificar que hay resultados antes de procesarlos
|
419
|
+
if not results:
|
420
|
+
if attempt < max_retries - 1:
|
421
|
+
self._logger.warning(f"No results found for {search_term}, retrying...")
|
422
|
+
continue
|
423
|
+
else:
|
424
|
+
self._logger.warning(f"No results found for {search_term} after {max_retries} attempts")
|
425
|
+
return []
|
426
|
+
|
427
|
+
# Si llegamos aquí, tenemos resultados
|
428
|
+
formatted_results = []
|
429
|
+
for rank, item in enumerate(results, 1):
|
430
|
+
title = item.get('title', '')
|
431
|
+
url = item.get('href', '')
|
432
|
+
|
433
|
+
if '/email-format' in url:
|
434
|
+
url = url.split('/email-format')[0]
|
435
|
+
if '/employee-directory' in url:
|
436
|
+
url = url.split('/employee-directory')[0]
|
437
|
+
|
438
|
+
clean_company_name = self._clean_company_name(title)
|
439
|
+
score = fuzz.ratio(standardized_term, clean_company_name)
|
440
|
+
|
441
|
+
formatted_results.append({
|
442
|
+
'search_url': url,
|
443
|
+
'search_term': original_term,
|
444
|
+
'search_status': 'success',
|
445
|
+
'match_score': score,
|
446
|
+
'search_engine': 'duckduckgo'
|
447
|
+
})
|
448
|
+
|
449
|
+
if formatted_results:
|
450
|
+
formatted_results.sort(key=lambda x: -x['match_score'])
|
451
|
+
formatted_results[0]['is_best_match'] = True
|
452
|
+
return formatted_results
|
453
|
+
|
454
|
+
return []
|
455
|
+
|
456
|
+
except Exception as e:
|
457
|
+
error_msg = str(e)
|
458
|
+
if "403 Ratelimit" in error_msg and attempt < max_retries - 1:
|
459
|
+
delay = base_delay * (attempt + 1)
|
460
|
+
self._logger.warning(f"Rate limit hit, waiting {delay} seconds before retry {attempt + 1}")
|
461
|
+
await asyncio.sleep(delay)
|
462
|
+
self._ddg_service = None
|
463
|
+
continue
|
464
|
+
else:
|
465
|
+
self._logger.error(f"DuckDuckGo search error for {search_term}: {error_msg}")
|
466
|
+
return []
|
467
|
+
|
468
|
+
return []
|
469
|
+
|
470
|
+
async def _search_with_google(self, search_term: str) -> List[Dict]:
|
471
|
+
"""Perform search using Google Custom Search."""
|
472
|
+
try:
|
473
|
+
now = time.time()
|
474
|
+
elapsed = now - self._last_request
|
475
|
+
if elapsed < 1.0:
|
476
|
+
await asyncio.sleep(1.0 - elapsed)
|
477
|
+
|
478
|
+
original_term = search_term
|
479
|
+
standardized_term = self._standardize_name(search_term)
|
480
|
+
search_query = self._build_query(standardized_term)
|
481
|
+
self._logger.notice(f"Searching for: {search_query}")
|
482
|
+
|
483
|
+
try:
|
484
|
+
service = self.get_search_service()
|
485
|
+
results = service.cse().list(
|
486
|
+
q=search_query,
|
487
|
+
cx=GOOGLE_SEARCH_ENGINE_ID,
|
488
|
+
num=self.max_results
|
489
|
+
).execute()
|
490
|
+
|
491
|
+
self._last_request = time.time()
|
492
|
+
|
493
|
+
except HttpError as e:
|
494
|
+
if e.resp.status == 429:
|
495
|
+
self._logger.warning(f"Rate limit exceeded for '{search_term}', waiting 30 seconds")
|
496
|
+
await asyncio.sleep(30)
|
497
|
+
service = self.get_search_service()
|
498
|
+
results = service.cse().list(
|
499
|
+
q=search_query,
|
500
|
+
cx=GOOGLE_SEARCH_ENGINE_ID,
|
501
|
+
num=self.max_results
|
502
|
+
).execute()
|
503
|
+
self._last_request = time.time()
|
504
|
+
else:
|
505
|
+
raise
|
506
|
+
|
507
|
+
if results and 'items' in results:
|
508
|
+
formatted_results = []
|
509
|
+
for rank, item in enumerate(results['items'], 1):
|
510
|
+
title = item.get('title', '')
|
511
|
+
url = item.get('link', '')
|
512
|
+
|
513
|
+
if '/email-format' in url:
|
514
|
+
url = url.split('/email-format')[0]
|
515
|
+
if '/employee-directory' in url:
|
516
|
+
url = url.split('/employee-directory')[0]
|
517
|
+
|
518
|
+
clean_company_name = self._clean_company_name(title)
|
519
|
+
score = fuzz.ratio(standardized_term, clean_company_name)
|
520
|
+
|
521
|
+
formatted_results.append({
|
522
|
+
'search_url': url,
|
523
|
+
'search_term': original_term,
|
524
|
+
'search_status': 'success',
|
525
|
+
'match_score': score,
|
526
|
+
'search_engine': 'google'
|
527
|
+
})
|
528
|
+
|
529
|
+
if formatted_results:
|
530
|
+
formatted_results.sort(key=lambda x: -x['match_score'])
|
531
|
+
formatted_results[0]['is_best_match'] = True
|
532
|
+
|
533
|
+
return formatted_results
|
534
|
+
|
535
|
+
return []
|
536
|
+
|
537
|
+
except Exception as e:
|
538
|
+
self._logger.error(f"Google search error for {search_term}: {str(e)}")
|
539
|
+
return []
|