flowtask 5.8.4__cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowtask/__init__.py +93 -0
- flowtask/__main__.py +38 -0
- flowtask/bots/__init__.py +6 -0
- flowtask/bots/check.py +93 -0
- flowtask/bots/codebot.py +51 -0
- flowtask/components/ASPX.py +148 -0
- flowtask/components/AddDataset.py +352 -0
- flowtask/components/Amazon.py +523 -0
- flowtask/components/AutoTask.py +314 -0
- flowtask/components/Azure.py +80 -0
- flowtask/components/AzureUsers.py +106 -0
- flowtask/components/BaseAction.py +91 -0
- flowtask/components/BaseLoop.py +198 -0
- flowtask/components/BestBuy.py +800 -0
- flowtask/components/CSVToGCS.py +120 -0
- flowtask/components/CompanyScraper/__init__.py +1 -0
- flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
- flowtask/components/CompanyScraper/parsers/base.py +102 -0
- flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
- flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
- flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
- flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
- flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
- flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
- flowtask/components/CompanyScraper/scrapper.py +1054 -0
- flowtask/components/CopyTo.py +177 -0
- flowtask/components/CopyToBigQuery.py +243 -0
- flowtask/components/CopyToMongoDB.py +291 -0
- flowtask/components/CopyToPg.py +609 -0
- flowtask/components/CopyToRethink.py +207 -0
- flowtask/components/CreateGCSBucket.py +102 -0
- flowtask/components/CreateReport/CreateReport.py +228 -0
- flowtask/components/CreateReport/__init__.py +9 -0
- flowtask/components/CreateReport/charts/__init__.py +15 -0
- flowtask/components/CreateReport/charts/bar.py +51 -0
- flowtask/components/CreateReport/charts/base.py +66 -0
- flowtask/components/CreateReport/charts/pie.py +64 -0
- flowtask/components/CreateReport/utils.py +9 -0
- flowtask/components/CustomerSatisfaction.py +196 -0
- flowtask/components/DataInput.py +200 -0
- flowtask/components/DateList.py +255 -0
- flowtask/components/DbClient.py +163 -0
- flowtask/components/DialPad.py +146 -0
- flowtask/components/DocumentDBQuery.py +200 -0
- flowtask/components/DownloadFrom.py +371 -0
- flowtask/components/DownloadFromD2L.py +113 -0
- flowtask/components/DownloadFromFTP.py +181 -0
- flowtask/components/DownloadFromIMAP.py +315 -0
- flowtask/components/DownloadFromS3.py +198 -0
- flowtask/components/DownloadFromSFTP.py +265 -0
- flowtask/components/DownloadFromSharepoint.py +110 -0
- flowtask/components/DownloadFromSmartSheet.py +114 -0
- flowtask/components/DownloadS3File.py +229 -0
- flowtask/components/Dummy.py +59 -0
- flowtask/components/DuplicatePhoto.py +411 -0
- flowtask/components/EmployeeEvaluation.py +237 -0
- flowtask/components/ExecuteSQL.py +323 -0
- flowtask/components/ExtractHTML.py +178 -0
- flowtask/components/FileBase.py +178 -0
- flowtask/components/FileCopy.py +181 -0
- flowtask/components/FileDelete.py +82 -0
- flowtask/components/FileExists.py +146 -0
- flowtask/components/FileIteratorDelete.py +112 -0
- flowtask/components/FileList.py +194 -0
- flowtask/components/FileOpen.py +75 -0
- flowtask/components/FileRead.py +120 -0
- flowtask/components/FileRename.py +106 -0
- flowtask/components/FilterIf.py +284 -0
- flowtask/components/FilterRows/FilterRows.py +200 -0
- flowtask/components/FilterRows/__init__.py +10 -0
- flowtask/components/FilterRows/functions.py +4 -0
- flowtask/components/GCSToBigQuery.py +103 -0
- flowtask/components/GoogleA4.py +150 -0
- flowtask/components/GoogleGeoCoding.py +344 -0
- flowtask/components/GooglePlaces.py +315 -0
- flowtask/components/GoogleSearch.py +539 -0
- flowtask/components/HTTPClient.py +268 -0
- flowtask/components/ICIMS.py +146 -0
- flowtask/components/IF.py +179 -0
- flowtask/components/IcimsFolderCopy.py +173 -0
- flowtask/components/ImageFeatures/__init__.py +5 -0
- flowtask/components/ImageFeatures/process.py +233 -0
- flowtask/components/IteratorBase.py +251 -0
- flowtask/components/LangchainLoader/__init__.py +5 -0
- flowtask/components/LangchainLoader/loader.py +194 -0
- flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
- flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
- flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
- flowtask/components/LangchainLoader/loaders/docx.py +91 -0
- flowtask/components/LangchainLoader/loaders/html.py +119 -0
- flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
- flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
- flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
- flowtask/components/LangchainLoader/loaders/qa.py +67 -0
- flowtask/components/LangchainLoader/loaders/txt.py +55 -0
- flowtask/components/LeadIQ.py +650 -0
- flowtask/components/Loop.py +253 -0
- flowtask/components/Lowes.py +334 -0
- flowtask/components/MS365Usage.py +156 -0
- flowtask/components/MSTeamsMessages.py +320 -0
- flowtask/components/MarketClustering.py +1051 -0
- flowtask/components/MergeFiles.py +362 -0
- flowtask/components/MilvusOutput.py +87 -0
- flowtask/components/NearByStores.py +175 -0
- flowtask/components/NetworkNinja/__init__.py +6 -0
- flowtask/components/NetworkNinja/models/__init__.py +52 -0
- flowtask/components/NetworkNinja/models/abstract.py +177 -0
- flowtask/components/NetworkNinja/models/account.py +39 -0
- flowtask/components/NetworkNinja/models/client.py +19 -0
- flowtask/components/NetworkNinja/models/district.py +14 -0
- flowtask/components/NetworkNinja/models/events.py +101 -0
- flowtask/components/NetworkNinja/models/forms.py +499 -0
- flowtask/components/NetworkNinja/models/market.py +16 -0
- flowtask/components/NetworkNinja/models/organization.py +34 -0
- flowtask/components/NetworkNinja/models/photos.py +125 -0
- flowtask/components/NetworkNinja/models/project.py +44 -0
- flowtask/components/NetworkNinja/models/region.py +28 -0
- flowtask/components/NetworkNinja/models/store.py +203 -0
- flowtask/components/NetworkNinja/models/user.py +151 -0
- flowtask/components/NetworkNinja/router.py +854 -0
- flowtask/components/Odoo.py +175 -0
- flowtask/components/OdooInjector.py +192 -0
- flowtask/components/OpenFromXML.py +126 -0
- flowtask/components/OpenWeather.py +41 -0
- flowtask/components/OpenWithBase.py +616 -0
- flowtask/components/OpenWithPandas.py +715 -0
- flowtask/components/PGPDecrypt.py +199 -0
- flowtask/components/PandasIterator.py +187 -0
- flowtask/components/PandasToFile.py +189 -0
- flowtask/components/Paradox.py +339 -0
- flowtask/components/ParamIterator.py +117 -0
- flowtask/components/ParseHTML.py +84 -0
- flowtask/components/PlacerStores.py +249 -0
- flowtask/components/Pokemon.py +507 -0
- flowtask/components/PositiveBot.py +62 -0
- flowtask/components/PowerPointSlide.py +400 -0
- flowtask/components/PrintMessage.py +127 -0
- flowtask/components/ProductCompetitors/__init__.py +5 -0
- flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
- flowtask/components/ProductCompetitors/parsers/base.py +72 -0
- flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
- flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
- flowtask/components/ProductCompetitors/scrapper.py +155 -0
- flowtask/components/ProductCompliant.py +169 -0
- flowtask/components/ProductInfo/__init__.py +1 -0
- flowtask/components/ProductInfo/parsers/__init__.py +5 -0
- flowtask/components/ProductInfo/parsers/base.py +83 -0
- flowtask/components/ProductInfo/parsers/brother.py +97 -0
- flowtask/components/ProductInfo/parsers/canon.py +167 -0
- flowtask/components/ProductInfo/parsers/epson.py +118 -0
- flowtask/components/ProductInfo/parsers/hp.py +131 -0
- flowtask/components/ProductInfo/parsers/samsung.py +97 -0
- flowtask/components/ProductInfo/scraper.py +319 -0
- flowtask/components/ProductPricing.py +118 -0
- flowtask/components/QS.py +261 -0
- flowtask/components/QSBase.py +201 -0
- flowtask/components/QueryIterator.py +273 -0
- flowtask/components/QueryToInsert.py +327 -0
- flowtask/components/QueryToPandas.py +432 -0
- flowtask/components/RESTClient.py +195 -0
- flowtask/components/RethinkDBQuery.py +189 -0
- flowtask/components/Rsync.py +74 -0
- flowtask/components/RunSSH.py +59 -0
- flowtask/components/RunShell.py +71 -0
- flowtask/components/SalesForce.py +20 -0
- flowtask/components/SaveImageBank/__init__.py +257 -0
- flowtask/components/SchedulingVisits.py +592 -0
- flowtask/components/ScrapPage.py +216 -0
- flowtask/components/ScrapSearch.py +79 -0
- flowtask/components/SendNotify.py +257 -0
- flowtask/components/SentimentAnalysis.py +694 -0
- flowtask/components/ServiceScrapper/__init__.py +5 -0
- flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
- flowtask/components/ServiceScrapper/parsers/base.py +94 -0
- flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
- flowtask/components/ServiceScrapper/scrapper.py +199 -0
- flowtask/components/SetVariables.py +156 -0
- flowtask/components/SubTask.py +182 -0
- flowtask/components/SuiteCRM.py +48 -0
- flowtask/components/Switch.py +175 -0
- flowtask/components/TableBase.py +148 -0
- flowtask/components/TableDelete.py +312 -0
- flowtask/components/TableInput.py +143 -0
- flowtask/components/TableOutput/TableOutput.py +384 -0
- flowtask/components/TableOutput/__init__.py +3 -0
- flowtask/components/TableSchema.py +534 -0
- flowtask/components/Target.py +223 -0
- flowtask/components/ThumbnailGenerator.py +156 -0
- flowtask/components/ToPandas.py +67 -0
- flowtask/components/TransformRows/TransformRows.py +507 -0
- flowtask/components/TransformRows/__init__.py +9 -0
- flowtask/components/TransformRows/functions.py +559 -0
- flowtask/components/TransposeRows.py +176 -0
- flowtask/components/UPCDatabase.py +86 -0
- flowtask/components/UnGzip.py +171 -0
- flowtask/components/Uncompress.py +172 -0
- flowtask/components/UniqueRows.py +126 -0
- flowtask/components/Unzip.py +107 -0
- flowtask/components/UpdateOperationalVars.py +147 -0
- flowtask/components/UploadTo.py +299 -0
- flowtask/components/UploadToS3.py +136 -0
- flowtask/components/UploadToSFTP.py +160 -0
- flowtask/components/UploadToSharepoint.py +205 -0
- flowtask/components/UserFunc.py +122 -0
- flowtask/components/VivaTracker.py +140 -0
- flowtask/components/WSDLClient.py +123 -0
- flowtask/components/Wait.py +18 -0
- flowtask/components/Walmart.py +199 -0
- flowtask/components/Workplace.py +134 -0
- flowtask/components/XMLToPandas.py +267 -0
- flowtask/components/Zammad/__init__.py +41 -0
- flowtask/components/Zammad/models.py +0 -0
- flowtask/components/ZoomInfoScraper.py +409 -0
- flowtask/components/__init__.py +104 -0
- flowtask/components/abstract.py +18 -0
- flowtask/components/flow.py +530 -0
- flowtask/components/google.py +335 -0
- flowtask/components/group.py +221 -0
- flowtask/components/py.typed +0 -0
- flowtask/components/reviewscrap.py +132 -0
- flowtask/components/tAutoincrement.py +117 -0
- flowtask/components/tConcat.py +109 -0
- flowtask/components/tExplode.py +119 -0
- flowtask/components/tFilter.py +184 -0
- flowtask/components/tGroup.py +236 -0
- flowtask/components/tJoin.py +270 -0
- flowtask/components/tMap/__init__.py +9 -0
- flowtask/components/tMap/functions.py +54 -0
- flowtask/components/tMap/tMap.py +450 -0
- flowtask/components/tMelt.py +112 -0
- flowtask/components/tMerge.py +114 -0
- flowtask/components/tOrder.py +93 -0
- flowtask/components/tPandas.py +94 -0
- flowtask/components/tPivot.py +71 -0
- flowtask/components/tPluckCols.py +76 -0
- flowtask/components/tUnnest.py +82 -0
- flowtask/components/user.py +401 -0
- flowtask/conf.py +457 -0
- flowtask/download.py +102 -0
- flowtask/events/__init__.py +11 -0
- flowtask/events/events/__init__.py +20 -0
- flowtask/events/events/abstract.py +95 -0
- flowtask/events/events/alerts/__init__.py +362 -0
- flowtask/events/events/alerts/colfunctions.py +131 -0
- flowtask/events/events/alerts/functions.py +158 -0
- flowtask/events/events/dummy.py +12 -0
- flowtask/events/events/exec.py +124 -0
- flowtask/events/events/file/__init__.py +7 -0
- flowtask/events/events/file/base.py +51 -0
- flowtask/events/events/file/copy.py +23 -0
- flowtask/events/events/file/delete.py +16 -0
- flowtask/events/events/interfaces/__init__.py +9 -0
- flowtask/events/events/interfaces/client.py +67 -0
- flowtask/events/events/interfaces/credentials.py +28 -0
- flowtask/events/events/interfaces/notifications.py +58 -0
- flowtask/events/events/jira.py +122 -0
- flowtask/events/events/log.py +26 -0
- flowtask/events/events/logerr.py +52 -0
- flowtask/events/events/notify.py +59 -0
- flowtask/events/events/notify_event.py +160 -0
- flowtask/events/events/publish.py +54 -0
- flowtask/events/events/sendfile.py +104 -0
- flowtask/events/events/task.py +97 -0
- flowtask/events/events/teams.py +98 -0
- flowtask/events/events/webhook.py +58 -0
- flowtask/events/manager.py +287 -0
- flowtask/exceptions.c +39393 -0
- flowtask/exceptions.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/extensions/__init__.py +3 -0
- flowtask/extensions/abstract.py +82 -0
- flowtask/extensions/logging/__init__.py +65 -0
- flowtask/hooks/__init__.py +9 -0
- flowtask/hooks/actions/__init__.py +22 -0
- flowtask/hooks/actions/abstract.py +66 -0
- flowtask/hooks/actions/dummy.py +23 -0
- flowtask/hooks/actions/jira.py +74 -0
- flowtask/hooks/actions/rest.py +320 -0
- flowtask/hooks/actions/sampledata.py +37 -0
- flowtask/hooks/actions/sensor.py +23 -0
- flowtask/hooks/actions/task.py +9 -0
- flowtask/hooks/actions/ticket.py +37 -0
- flowtask/hooks/actions/zammad.py +55 -0
- flowtask/hooks/hook.py +62 -0
- flowtask/hooks/models.py +17 -0
- flowtask/hooks/service.py +187 -0
- flowtask/hooks/step.py +91 -0
- flowtask/hooks/types/__init__.py +23 -0
- flowtask/hooks/types/base.py +129 -0
- flowtask/hooks/types/brokers/__init__.py +11 -0
- flowtask/hooks/types/brokers/base.py +54 -0
- flowtask/hooks/types/brokers/mqtt.py +35 -0
- flowtask/hooks/types/brokers/rabbitmq.py +82 -0
- flowtask/hooks/types/brokers/redis.py +83 -0
- flowtask/hooks/types/brokers/sqs.py +44 -0
- flowtask/hooks/types/fs.py +232 -0
- flowtask/hooks/types/http.py +49 -0
- flowtask/hooks/types/imap.py +200 -0
- flowtask/hooks/types/jira.py +279 -0
- flowtask/hooks/types/mail.py +205 -0
- flowtask/hooks/types/postgres.py +98 -0
- flowtask/hooks/types/responses/__init__.py +8 -0
- flowtask/hooks/types/responses/base.py +5 -0
- flowtask/hooks/types/sharepoint.py +288 -0
- flowtask/hooks/types/ssh.py +141 -0
- flowtask/hooks/types/tagged.py +59 -0
- flowtask/hooks/types/upload.py +85 -0
- flowtask/hooks/types/watch.py +71 -0
- flowtask/hooks/types/web.py +36 -0
- flowtask/interfaces/AzureClient.py +137 -0
- flowtask/interfaces/AzureGraph.py +839 -0
- flowtask/interfaces/Boto3Client.py +326 -0
- flowtask/interfaces/DropboxClient.py +173 -0
- flowtask/interfaces/ExcelHandler.py +94 -0
- flowtask/interfaces/FTPClient.py +131 -0
- flowtask/interfaces/GoogleCalendar.py +201 -0
- flowtask/interfaces/GoogleClient.py +133 -0
- flowtask/interfaces/GoogleDrive.py +127 -0
- flowtask/interfaces/GoogleGCS.py +89 -0
- flowtask/interfaces/GoogleGeocoding.py +93 -0
- flowtask/interfaces/GoogleLang.py +114 -0
- flowtask/interfaces/GooglePub.py +61 -0
- flowtask/interfaces/GoogleSheet.py +68 -0
- flowtask/interfaces/IMAPClient.py +137 -0
- flowtask/interfaces/O365Calendar.py +113 -0
- flowtask/interfaces/O365Client.py +220 -0
- flowtask/interfaces/OneDrive.py +284 -0
- flowtask/interfaces/Outlook.py +155 -0
- flowtask/interfaces/ParrotBot.py +130 -0
- flowtask/interfaces/SSHClient.py +378 -0
- flowtask/interfaces/Sharepoint.py +496 -0
- flowtask/interfaces/__init__.py +36 -0
- flowtask/interfaces/azureauth.py +119 -0
- flowtask/interfaces/cache.py +201 -0
- flowtask/interfaces/client.py +82 -0
- flowtask/interfaces/compress.py +525 -0
- flowtask/interfaces/credentials.py +124 -0
- flowtask/interfaces/d2l.py +239 -0
- flowtask/interfaces/databases/__init__.py +5 -0
- flowtask/interfaces/databases/db.py +223 -0
- flowtask/interfaces/databases/documentdb.py +55 -0
- flowtask/interfaces/databases/rethink.py +39 -0
- flowtask/interfaces/dataframes/__init__.py +11 -0
- flowtask/interfaces/dataframes/abstract.py +21 -0
- flowtask/interfaces/dataframes/arrow.py +71 -0
- flowtask/interfaces/dataframes/dt.py +69 -0
- flowtask/interfaces/dataframes/pandas.py +167 -0
- flowtask/interfaces/dataframes/polars.py +60 -0
- flowtask/interfaces/db.py +263 -0
- flowtask/interfaces/env.py +46 -0
- flowtask/interfaces/func.py +137 -0
- flowtask/interfaces/http.py +1780 -0
- flowtask/interfaces/locale.py +40 -0
- flowtask/interfaces/log.py +75 -0
- flowtask/interfaces/mask.py +143 -0
- flowtask/interfaces/notification.py +154 -0
- flowtask/interfaces/playwright.py +339 -0
- flowtask/interfaces/powerpoint.py +368 -0
- flowtask/interfaces/py.typed +0 -0
- flowtask/interfaces/qs.py +376 -0
- flowtask/interfaces/result.py +87 -0
- flowtask/interfaces/selenium_service.py +779 -0
- flowtask/interfaces/smartsheet.py +154 -0
- flowtask/interfaces/stat.py +39 -0
- flowtask/interfaces/task.py +96 -0
- flowtask/interfaces/template.py +118 -0
- flowtask/interfaces/vectorstores/__init__.py +1 -0
- flowtask/interfaces/vectorstores/abstract.py +133 -0
- flowtask/interfaces/vectorstores/milvus.py +669 -0
- flowtask/interfaces/zammad.py +107 -0
- flowtask/models.py +193 -0
- flowtask/parsers/__init__.py +15 -0
- flowtask/parsers/_yaml.c +11978 -0
- flowtask/parsers/_yaml.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/argparser.py +235 -0
- flowtask/parsers/base.c +15155 -0
- flowtask/parsers/base.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/json.c +11968 -0
- flowtask/parsers/json.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/maps.py +49 -0
- flowtask/parsers/toml.c +11968 -0
- flowtask/parsers/toml.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/plugins/__init__.py +16 -0
- flowtask/plugins/components/__init__.py +0 -0
- flowtask/plugins/handler/__init__.py +45 -0
- flowtask/plugins/importer.py +31 -0
- flowtask/plugins/sources/__init__.py +0 -0
- flowtask/runner.py +283 -0
- flowtask/scheduler/__init__.py +9 -0
- flowtask/scheduler/functions.py +493 -0
- flowtask/scheduler/handlers/__init__.py +8 -0
- flowtask/scheduler/handlers/manager.py +504 -0
- flowtask/scheduler/handlers/models.py +58 -0
- flowtask/scheduler/handlers/service.py +72 -0
- flowtask/scheduler/notifications.py +65 -0
- flowtask/scheduler/scheduler.py +993 -0
- flowtask/services/__init__.py +0 -0
- flowtask/services/bots/__init__.py +0 -0
- flowtask/services/bots/telegram.py +264 -0
- flowtask/services/files/__init__.py +11 -0
- flowtask/services/files/manager.py +522 -0
- flowtask/services/files/model.py +37 -0
- flowtask/services/files/service.py +767 -0
- flowtask/services/jira/__init__.py +3 -0
- flowtask/services/jira/jira_actions.py +191 -0
- flowtask/services/tasks/__init__.py +13 -0
- flowtask/services/tasks/launcher.py +213 -0
- flowtask/services/tasks/manager.py +323 -0
- flowtask/services/tasks/service.py +275 -0
- flowtask/services/tasks/task_manager.py +376 -0
- flowtask/services/tasks/tasks.py +155 -0
- flowtask/storages/__init__.py +16 -0
- flowtask/storages/exceptions.py +12 -0
- flowtask/storages/files/__init__.py +8 -0
- flowtask/storages/files/abstract.py +29 -0
- flowtask/storages/files/filesystem.py +66 -0
- flowtask/storages/tasks/__init__.py +19 -0
- flowtask/storages/tasks/abstract.py +26 -0
- flowtask/storages/tasks/database.py +33 -0
- flowtask/storages/tasks/filesystem.py +108 -0
- flowtask/storages/tasks/github.py +119 -0
- flowtask/storages/tasks/memory.py +45 -0
- flowtask/storages/tasks/row.py +25 -0
- flowtask/tasks/__init__.py +0 -0
- flowtask/tasks/abstract.py +526 -0
- flowtask/tasks/command.py +118 -0
- flowtask/tasks/pile.py +486 -0
- flowtask/tasks/py.typed +0 -0
- flowtask/tasks/task.py +778 -0
- flowtask/template/__init__.py +161 -0
- flowtask/tests.py +257 -0
- flowtask/types/__init__.py +8 -0
- flowtask/types/typedefs.c +11347 -0
- flowtask/types/typedefs.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/__init__.py +24 -0
- flowtask/utils/constants.py +117 -0
- flowtask/utils/encoders.py +21 -0
- flowtask/utils/executor.py +112 -0
- flowtask/utils/functions.cpp +14280 -0
- flowtask/utils/functions.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/json.cpp +13349 -0
- flowtask/utils/json.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/mail.py +63 -0
- flowtask/utils/parseqs.c +13324 -0
- flowtask/utils/parserqs.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/stats.py +308 -0
- flowtask/utils/transformations.py +74 -0
- flowtask/utils/uv.py +12 -0
- flowtask/utils/validators.py +97 -0
- flowtask/version.py +11 -0
- flowtask-5.8.4.dist-info/LICENSE +201 -0
- flowtask-5.8.4.dist-info/METADATA +209 -0
- flowtask-5.8.4.dist-info/RECORD +470 -0
- flowtask-5.8.4.dist-info/WHEEL +6 -0
- flowtask-5.8.4.dist-info/entry_points.txt +3 -0
- flowtask-5.8.4.dist-info/top_level.txt +2 -0
- plugins/components/CreateQR.py +39 -0
- plugins/components/TestComponent.py +28 -0
- plugins/components/Use1.py +13 -0
- plugins/components/Workplace.py +117 -0
- plugins/components/__init__.py +3 -0
- plugins/sources/__init__.py +0 -0
- plugins/sources/get_populartimes.py +78 -0
- plugins/sources/google.py +150 -0
- plugins/sources/hubspot.py +679 -0
- plugins/sources/icims.py +679 -0
- plugins/sources/mobileinsight.py +501 -0
- plugins/sources/newrelic.py +262 -0
- plugins/sources/uap.py +268 -0
- plugins/sources/venu.py +244 -0
- plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,339 @@
|
|
1
|
+
import asyncio
|
2
|
+
from collections.abc import Callable
|
3
|
+
from typing import Optional, Dict, Any
|
4
|
+
import pandas as pd
|
5
|
+
from .flow import FlowComponent
|
6
|
+
from ..interfaces.http import HTTPService
|
7
|
+
from ..interfaces.cache import CacheSupport
|
8
|
+
from ..exceptions import ComponentError
|
9
|
+
from ..conf import PARADOX_ACCOUNT_ID, PARADOX_API_SECRET
|
10
|
+
|
11
|
+
class Paradox(HTTPService, CacheSupport, FlowComponent):
|
12
|
+
"""
|
13
|
+
Paradox Component
|
14
|
+
|
15
|
+
**Overview**
|
16
|
+
|
17
|
+
This component interacts with the Paradox API to perform various operations.
|
18
|
+
The first step is to handle authentication and obtain an access token.
|
19
|
+
The token is cached in Redis to avoid requesting a new one on each execution.
|
20
|
+
|
21
|
+
.. table:: Properties
|
22
|
+
:widths: auto
|
23
|
+
|
24
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------+
|
25
|
+
| Name | Required | Summary |
|
26
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------+
|
27
|
+
| type | Yes | Type of operation to perform with the API |
|
28
|
+
+----------------------------+----------+----------------------------------------------------------------------------------------------+
|
29
|
+
"""
|
30
|
+
|
31
|
+
accept: str = "application/json"
|
32
|
+
BASE_URL = "https://api.paradox.ai"
|
33
|
+
CACHE_KEY = "_paradox_authentication"
|
34
|
+
|
35
|
+
def __init__(
|
36
|
+
self,
|
37
|
+
loop: asyncio.AbstractEventLoop = None,
|
38
|
+
job: Callable = None,
|
39
|
+
stat: Callable = None,
|
40
|
+
**kwargs,
|
41
|
+
):
|
42
|
+
self.type: str = kwargs.get('type')
|
43
|
+
self._access_token: Optional[str] = None
|
44
|
+
self.max_pages: Optional[int] = kwargs.get('max_pages')
|
45
|
+
super().__init__(
|
46
|
+
loop=loop, job=job, stat=stat, **kwargs
|
47
|
+
)
|
48
|
+
|
49
|
+
async def get_cached_token(self) -> Optional[str]:
|
50
|
+
"""
|
51
|
+
Retrieves the cached authentication token from Redis if it exists.
|
52
|
+
"""
|
53
|
+
try:
|
54
|
+
async with self as cache:
|
55
|
+
token = await cache._redis.get(self.CACHE_KEY)
|
56
|
+
if token and isinstance(token, str) and len(token) > 10:
|
57
|
+
self._logger.info(f"Using cached authentication token: {token[:10]}...")
|
58
|
+
return token
|
59
|
+
else:
|
60
|
+
self._logger.debug(f"Invalid or no token in cache: {token}")
|
61
|
+
except Exception as e:
|
62
|
+
self._logger.warning(f"Error getting cached token: {str(e)}")
|
63
|
+
return None
|
64
|
+
|
65
|
+
def set_auth_headers(self, token: str) -> None:
|
66
|
+
"""Set authentication token and headers"""
|
67
|
+
self._access_token = token
|
68
|
+
if "Authorization" not in self.headers:
|
69
|
+
self.headers = {} # Asegurarnos de que headers está inicializado
|
70
|
+
self.headers["Authorization"] = f"Bearer {token}"
|
71
|
+
self._logger.debug(f"Headers set: {self.headers}") # Agregar log para verificar
|
72
|
+
|
73
|
+
async def start(self, **kwargs):
|
74
|
+
"""
|
75
|
+
Initialize the component and authenticate with the API.
|
76
|
+
Handles authentication flow including token caching in Redis.
|
77
|
+
"""
|
78
|
+
if not PARADOX_ACCOUNT_ID or not PARADOX_API_SECRET:
|
79
|
+
raise ComponentError(f"{__name__}: Missing required credentials")
|
80
|
+
|
81
|
+
if token := await self.get_cached_token():
|
82
|
+
self.set_auth_headers(token)
|
83
|
+
self._logger.debug("Using cached authentication token")
|
84
|
+
return True
|
85
|
+
|
86
|
+
try:
|
87
|
+
auth_url = f"{self.BASE_URL}/api/v1/public/auth/token"
|
88
|
+
|
89
|
+
payload = {
|
90
|
+
'client_id': PARADOX_ACCOUNT_ID,
|
91
|
+
'client_secret': PARADOX_API_SECRET,
|
92
|
+
'grant_type': 'client_credentials'
|
93
|
+
}
|
94
|
+
|
95
|
+
headers = {
|
96
|
+
'Content-Type': 'application/x-www-form-urlencoded',
|
97
|
+
'Accept': 'application/json'
|
98
|
+
}
|
99
|
+
|
100
|
+
result, error = await self.session(
|
101
|
+
url=auth_url,
|
102
|
+
method="post",
|
103
|
+
data=payload,
|
104
|
+
headers=headers,
|
105
|
+
use_json=True,
|
106
|
+
follow_redirects=True
|
107
|
+
)
|
108
|
+
|
109
|
+
if error:
|
110
|
+
raise ComponentError(f"Authentication request failed: {error}")
|
111
|
+
|
112
|
+
if 'access_token' not in result:
|
113
|
+
raise ComponentError("No access token in authentication response")
|
114
|
+
|
115
|
+
token = result['access_token']
|
116
|
+
|
117
|
+
# Primero guardar en caché
|
118
|
+
async with self as cache:
|
119
|
+
await cache.setex(
|
120
|
+
self.CACHE_KEY,
|
121
|
+
token,
|
122
|
+
timeout=f"{result.get('expires_in', 86400)}s"
|
123
|
+
)
|
124
|
+
cached_token = await cache._redis.get(self.CACHE_KEY)
|
125
|
+
if not cached_token:
|
126
|
+
raise ComponentError("Failed to store token in cache")
|
127
|
+
|
128
|
+
self._logger.debug(f"Token successfully stored in cache")
|
129
|
+
|
130
|
+
# Después establecer los headers
|
131
|
+
self.set_auth_headers(token)
|
132
|
+
self._logger.debug(f"Headers after cache: {self.headers}")
|
133
|
+
|
134
|
+
# Verificación final
|
135
|
+
if not self._access_token or "Authorization" not in self.headers:
|
136
|
+
raise ComponentError("Authentication headers not properly set")
|
137
|
+
|
138
|
+
self._logger.info("Successfully authenticated with Paradox API")
|
139
|
+
return True
|
140
|
+
|
141
|
+
except Exception as e:
|
142
|
+
self._logger.error(f"Authentication failed: {str(e)}")
|
143
|
+
raise ComponentError(f"Authentication failed: {str(e)}") from e
|
144
|
+
|
145
|
+
async def run(self):
|
146
|
+
"""
|
147
|
+
Execute the main component logic based on the specified type.
|
148
|
+
Currently supports authentication as the initial implementation.
|
149
|
+
"""
|
150
|
+
if not self._access_token or "Authorization" not in self.headers:
|
151
|
+
self._logger.error(f"{__name__}: Not authenticated or missing Authorization header")
|
152
|
+
raise ComponentError(f"{__name__}: Not authenticated. Call start() first")
|
153
|
+
|
154
|
+
if not hasattr(self, self.type):
|
155
|
+
raise ComponentError(f"{__name__}: Invalid operation type: {self.type}")
|
156
|
+
|
157
|
+
try:
|
158
|
+
method = getattr(self, self.type)
|
159
|
+
result = await method()
|
160
|
+
|
161
|
+
if isinstance(result, pd.DataFrame):
|
162
|
+
self.add_metric("NUMROWS", len(result.index))
|
163
|
+
self.add_metric("NUMCOLS", len(result.columns))
|
164
|
+
|
165
|
+
if self._debug:
|
166
|
+
print("\n=== DataFrame Info ===")
|
167
|
+
print(result.head())
|
168
|
+
print("\n=== Column Information ===")
|
169
|
+
for column, dtype in result.dtypes.items():
|
170
|
+
print(f"{column} -> {dtype} -> {result[column].iloc[0] if not result.empty else 'N/A'}")
|
171
|
+
|
172
|
+
self._result = result
|
173
|
+
return self._result
|
174
|
+
|
175
|
+
except Exception as e:
|
176
|
+
self._logger.error(f"Error executing {self.type}: {str(e)}")
|
177
|
+
raise
|
178
|
+
|
179
|
+
async def close(self):
|
180
|
+
"""Cleanup any resources"""
|
181
|
+
self._access_token = None
|
182
|
+
return True
|
183
|
+
|
184
|
+
async def candidates(self) -> pd.DataFrame:
|
185
|
+
"""
|
186
|
+
Retrieves candidates from Paradox API using efficient pandas operations.
|
187
|
+
Uses pagination to fetch all available candidates up to the maximum offset.
|
188
|
+
Includes a delay between requests to avoid API rate limits.
|
189
|
+
|
190
|
+
Kwargs:
|
191
|
+
offset_start (int): Starting offset for pagination (default: 0)
|
192
|
+
|
193
|
+
Returns:
|
194
|
+
pd.DataFrame: DataFrame containing candidate information
|
195
|
+
|
196
|
+
Raises:
|
197
|
+
ComponentError: If the request fails or returns invalid data
|
198
|
+
"""
|
199
|
+
try:
|
200
|
+
offset = getattr(self, 'offset_start', 0)
|
201
|
+
count = 0
|
202
|
+
limit = getattr(self, 'limit', 50)
|
203
|
+
all_candidates_data = []
|
204
|
+
current_page = offset
|
205
|
+
pages_processed = 0
|
206
|
+
max_retries = 3
|
207
|
+
retry_delay = 2.0
|
208
|
+
|
209
|
+
base_params = {
|
210
|
+
'limit': limit,
|
211
|
+
'note': 'true',
|
212
|
+
'include_attributes': 'Yes'
|
213
|
+
}
|
214
|
+
|
215
|
+
while True:
|
216
|
+
params = {
|
217
|
+
**base_params,
|
218
|
+
'offset': offset,
|
219
|
+
}
|
220
|
+
|
221
|
+
self._logger.debug(
|
222
|
+
f"Fetching candidates page {current_page + 1} with offset {offset}"
|
223
|
+
)
|
224
|
+
|
225
|
+
# Implement retry logic
|
226
|
+
data = None
|
227
|
+
for retry in range(max_retries):
|
228
|
+
try:
|
229
|
+
data = await self.api_get(
|
230
|
+
url=self.BASE_URL + "/api/v1/public/candidates",
|
231
|
+
params=params,
|
232
|
+
headers=self.headers,
|
233
|
+
use_proxy=False
|
234
|
+
)
|
235
|
+
|
236
|
+
if data and 'candidates' in data:
|
237
|
+
break
|
238
|
+
|
239
|
+
except Exception as e:
|
240
|
+
if retry < max_retries - 1:
|
241
|
+
self._logger.warning(
|
242
|
+
f"Attempt {retry + 1} failed, retrying in {retry_delay} seconds... Error: {str(e)}"
|
243
|
+
)
|
244
|
+
await asyncio.sleep(retry_delay)
|
245
|
+
retry_delay *= 2
|
246
|
+
continue
|
247
|
+
raise
|
248
|
+
|
249
|
+
candidates = data.get('candidates', [])
|
250
|
+
if not candidates:
|
251
|
+
self._logger.warning(f"No candidates found for offset {offset * limit}")
|
252
|
+
break
|
253
|
+
|
254
|
+
# Obtener el offset de la respuesta de la API y verificar
|
255
|
+
response_offset = data.get('offset', 0)
|
256
|
+
# Añadir el offset actual y orden global a cada candidato
|
257
|
+
for idx, candidate in enumerate(candidates, 1):
|
258
|
+
candidate['response_offset'] = response_offset
|
259
|
+
candidate['global_order'] = response_offset + idx
|
260
|
+
|
261
|
+
if count == 0:
|
262
|
+
count = data.get('count', 0)
|
263
|
+
# El último offset válido será el múltiplo de limit más cercano
|
264
|
+
# que no exceda count - limit
|
265
|
+
max_offset = ((count - 1) // limit) * limit
|
266
|
+
self._logger.info(
|
267
|
+
f"Total candidates: {count}, Max offset: {max_offset}, Current offset: {offset}"
|
268
|
+
)
|
269
|
+
if self.max_pages:
|
270
|
+
self._logger.info(f"Will retrieve maximum {self.max_pages} pages")
|
271
|
+
|
272
|
+
|
273
|
+
all_candidates_data.extend(candidates)
|
274
|
+
current_page += 1
|
275
|
+
pages_processed += 1
|
276
|
+
|
277
|
+
self._logger.debug(
|
278
|
+
f"Retrieved {len(all_candidates_data)} candidates so far (Offset: {offset * limit}, "
|
279
|
+
f"Page: {pages_processed}"
|
280
|
+
)
|
281
|
+
|
282
|
+
if offset >= max_offset:
|
283
|
+
break
|
284
|
+
|
285
|
+
if self.max_pages and pages_processed >= self.max_pages:
|
286
|
+
self._logger.info(f"Reached configured page limit: {self.max_pages}")
|
287
|
+
break
|
288
|
+
|
289
|
+
offset += limit
|
290
|
+
|
291
|
+
# Convert to DataFrame and process using pandas operations
|
292
|
+
df = pd.DataFrame(all_candidates_data)
|
293
|
+
|
294
|
+
if df.empty:
|
295
|
+
self._logger.warning("No candidates data found")
|
296
|
+
return df
|
297
|
+
|
298
|
+
# Extract nested data using pandas operations
|
299
|
+
candidates = df['candidate'].apply(pd.Series)
|
300
|
+
stage = df['stage'].apply(pd.Series)
|
301
|
+
notes = df.pop('note')
|
302
|
+
|
303
|
+
# Remove processed columns and join the extracted data
|
304
|
+
df = df.drop(columns=['candidate', 'stage'])
|
305
|
+
df = df.join(candidates).join(stage)
|
306
|
+
df['notes'] = notes
|
307
|
+
|
308
|
+
# Extract fields from attributes
|
309
|
+
atribute = df['attributes'].apply(
|
310
|
+
lambda x: pd.Series({
|
311
|
+
"first_name": x.get('first_name'),
|
312
|
+
"last_name": x.get('last_name'),
|
313
|
+
"address": x.get('address'),
|
314
|
+
"address_2": x.get('address_2'),
|
315
|
+
"city": x.get('city'),
|
316
|
+
"state": x.get('state'),
|
317
|
+
"zipcode": x.get('zip_code'),
|
318
|
+
"birth_date": x.get('__birthdate'),
|
319
|
+
"gender": x.get('__gender'),
|
320
|
+
"offer_created_date": x.get('offer_created_date'),
|
321
|
+
"offer_accepted_date": x.get('offer_accepted_date'),
|
322
|
+
"current_employee": x.get('current_employee'),
|
323
|
+
"previously_employed_at_troc": x.get('previously_employed_at_troc')
|
324
|
+
})
|
325
|
+
)
|
326
|
+
df = pd.concat([df, atribute], axis=1)
|
327
|
+
|
328
|
+
self._logger.info(
|
329
|
+
f"Retrieved total of {len(df)} candidates out of {count} (Pages: {current_page})"
|
330
|
+
)
|
331
|
+
return df
|
332
|
+
|
333
|
+
except Exception as e:
|
334
|
+
self._logger.error(
|
335
|
+
f"Error fetching candidates: {str(e)}"
|
336
|
+
)
|
337
|
+
raise ComponentError(
|
338
|
+
f"Failed to fetch candidates: {str(e)}"
|
339
|
+
) from e
|
@@ -0,0 +1,117 @@
|
|
1
|
+
from asyncdb.exceptions import ProviderError
|
2
|
+
from ..exceptions import ComponentError, NotSupported
|
3
|
+
from .IteratorBase import IteratorBase
|
4
|
+
|
5
|
+
|
6
|
+
class ParamIterator(IteratorBase):
|
7
|
+
"""
|
8
|
+
ParamIterator.
|
9
|
+
|
10
|
+
Overview
|
11
|
+
|
12
|
+
This component iterates over a set of parameters and executes a job for each set of parameters.
|
13
|
+
|
14
|
+
|
15
|
+
.. table:: Properties
|
16
|
+
:widths: auto
|
17
|
+
|
18
|
+
|
19
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
20
|
+
| Name | Required | Summary |
|
21
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
22
|
+
| _init_ | Yes | This attribute is to initialize the component methods |
|
23
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
24
|
+
| start | Yes | We start by validating if the file exists, then the function |
|
25
|
+
| | | to get the data is started |
|
26
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
27
|
+
| close | Yes | This attribute allows me to close the process |
|
28
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
29
|
+
| create_job | Yes | This metod create the job component |
|
30
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
31
|
+
| run | Yes | This method creates the job component by assigning parameters |
|
32
|
+
| | | to it |
|
33
|
+
+--------------+----------+-----------+-------------------------------------------------------+
|
34
|
+
|
35
|
+
|
36
|
+
Return the list of arbitrary days
|
37
|
+
|
38
|
+
|
39
|
+
Example:
|
40
|
+
|
41
|
+
```yaml
|
42
|
+
ParamIterator:
|
43
|
+
params:
|
44
|
+
formid:
|
45
|
+
- 2552
|
46
|
+
- 2567
|
47
|
+
- 2569
|
48
|
+
```
|
49
|
+
|
50
|
+
"""
|
51
|
+
"""
|
52
|
+
ParamIterator
|
53
|
+
|
54
|
+
|
55
|
+
Overview
|
56
|
+
|
57
|
+
This component iterates over a set of parameters and executes a job for each set of parameters.
|
58
|
+
|
59
|
+
.. table:: Properties
|
60
|
+
:widths: auto
|
61
|
+
|
62
|
+
|
63
|
+
+------------------------+----------+-----------+-------------------------------------------------------+
|
64
|
+
| Name | Required | Summary |
|
65
|
+
+------------------------+----------+-----------+-------------------------------------------------------+
|
66
|
+
| params | Yes | Dictionary containing parameters to iterate over. |
|
67
|
+
+------------------------+----------+-----------+-------------------------------------------------------+
|
68
|
+
|
69
|
+
Returns
|
70
|
+
|
71
|
+
This component returns a status indicating the success or failure of the iteration process.
|
72
|
+
"""
|
73
|
+
async def start(self, **kwargs):
|
74
|
+
"""Check if exists Parameters."""
|
75
|
+
super(ParamIterator, self).start()
|
76
|
+
if self.previous:
|
77
|
+
self.data = self.input
|
78
|
+
return True
|
79
|
+
|
80
|
+
def get_iterator(self):
|
81
|
+
lst = []
|
82
|
+
try:
|
83
|
+
if self.params:
|
84
|
+
for item, val in self.params.items():
|
85
|
+
for value in val:
|
86
|
+
a = {item: value}
|
87
|
+
lst.append(a)
|
88
|
+
return lst
|
89
|
+
else:
|
90
|
+
raise ComponentError("Error: Doesnt exists Parameters!")
|
91
|
+
except Exception as err:
|
92
|
+
raise ComponentError(f"Error: Generating Iterator: {err}") from err
|
93
|
+
|
94
|
+
async def run(self):
|
95
|
+
status = False
|
96
|
+
iterator = self.get_iterator()
|
97
|
+
step, target, params = self.get_step()
|
98
|
+
step_name = step.name
|
99
|
+
for item in iterator:
|
100
|
+
params["parameters"] = item
|
101
|
+
self._result = item
|
102
|
+
job = self.get_job(target, **params)
|
103
|
+
if job:
|
104
|
+
try:
|
105
|
+
status = await self.async_job(job, step_name)
|
106
|
+
except (ProviderError, ComponentError, NotSupported) as err:
|
107
|
+
raise NotSupported(
|
108
|
+
f"Error running Component {step_name}, error: {err}"
|
109
|
+
) from err
|
110
|
+
except Exception as err:
|
111
|
+
raise ComponentError(
|
112
|
+
f"Generic Component Error on {step_name}, error: {err}"
|
113
|
+
) from err
|
114
|
+
return status
|
115
|
+
|
116
|
+
async def close(self):
|
117
|
+
pass
|
@@ -0,0 +1,84 @@
|
|
1
|
+
from typing import Union
|
2
|
+
from pathlib import PurePath, Path
|
3
|
+
# BeautifulSoup:
|
4
|
+
from bs4 import BeautifulSoup
|
5
|
+
from lxml import html, etree
|
6
|
+
# aiofiles:
|
7
|
+
import aiofiles
|
8
|
+
|
9
|
+
from .flow import FlowComponent
|
10
|
+
|
11
|
+
|
12
|
+
class ParseHTML(FlowComponent):
|
13
|
+
"""ParseHTML.
|
14
|
+
Parse HTML Content using lxml etree and BeautifulSoup.
|
15
|
+
|
16
|
+
|
17
|
+
Example:
|
18
|
+
|
19
|
+
```yaml
|
20
|
+
ParseHTML:
|
21
|
+
xml: true
|
22
|
+
```
|
23
|
+
|
24
|
+
"""
|
25
|
+
|
26
|
+
async def open_html(self, filename: Union[str, PurePath]) -> str:
|
27
|
+
"""Open the HTML file."""
|
28
|
+
if isinstance(filename, str):
|
29
|
+
filename = Path(filename).resolve()
|
30
|
+
if not filename.exists():
|
31
|
+
raise FileNotFoundError(
|
32
|
+
f"File not found: {filename}"
|
33
|
+
)
|
34
|
+
async with aiofiles.open(filename, '+rb') as fp:
|
35
|
+
return await fp.read()
|
36
|
+
|
37
|
+
async def start(self, **kwargs) -> bool:
|
38
|
+
if self.previous:
|
39
|
+
self._filelist = self.input
|
40
|
+
else:
|
41
|
+
# TODO: parsing from a directory provided instead.
|
42
|
+
pass
|
43
|
+
if not isinstance(self._filelist, list):
|
44
|
+
raise TypeError(
|
45
|
+
"Input must be a list of filenames"
|
46
|
+
)
|
47
|
+
return True
|
48
|
+
|
49
|
+
def get_soup(self, content: str, parser: str = 'html.parser'):
|
50
|
+
"""Get a BeautifulSoup Object."""
|
51
|
+
return BeautifulSoup(content, parser)
|
52
|
+
|
53
|
+
def get_etree(self, content: str) -> tuple:
|
54
|
+
try:
|
55
|
+
x = etree.fromstring(content)
|
56
|
+
except etree.XMLSyntaxError:
|
57
|
+
x = None
|
58
|
+
try:
|
59
|
+
h = html.fromstring(content)
|
60
|
+
except etree.XMLSyntaxError:
|
61
|
+
h = None
|
62
|
+
return x, h
|
63
|
+
|
64
|
+
async def run(self):
|
65
|
+
"""
|
66
|
+
Open all Filenames and convert them into BeautifulSoup and etree objects.
|
67
|
+
"""
|
68
|
+
self._result = {}
|
69
|
+
for filename in self._filelist:
|
70
|
+
content = await self.open_html(filename)
|
71
|
+
soup = self.get_soup(content)
|
72
|
+
etree_obj, html_obj = self.get_etree(content)
|
73
|
+
self._result[filename] = {
|
74
|
+
'soup': soup,
|
75
|
+
'html': html_obj,
|
76
|
+
'content': content
|
77
|
+
}
|
78
|
+
if getattr(self, 'xml', False) is True:
|
79
|
+
self._result[filename]['xml'] = etree_obj
|
80
|
+
|
81
|
+
return self._result
|
82
|
+
|
83
|
+
async def close(self):
|
84
|
+
pass
|