flowtask 5.8.4__cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowtask/__init__.py +93 -0
- flowtask/__main__.py +38 -0
- flowtask/bots/__init__.py +6 -0
- flowtask/bots/check.py +93 -0
- flowtask/bots/codebot.py +51 -0
- flowtask/components/ASPX.py +148 -0
- flowtask/components/AddDataset.py +352 -0
- flowtask/components/Amazon.py +523 -0
- flowtask/components/AutoTask.py +314 -0
- flowtask/components/Azure.py +80 -0
- flowtask/components/AzureUsers.py +106 -0
- flowtask/components/BaseAction.py +91 -0
- flowtask/components/BaseLoop.py +198 -0
- flowtask/components/BestBuy.py +800 -0
- flowtask/components/CSVToGCS.py +120 -0
- flowtask/components/CompanyScraper/__init__.py +1 -0
- flowtask/components/CompanyScraper/parsers/__init__.py +6 -0
- flowtask/components/CompanyScraper/parsers/base.py +102 -0
- flowtask/components/CompanyScraper/parsers/explorium.py +192 -0
- flowtask/components/CompanyScraper/parsers/leadiq.py +206 -0
- flowtask/components/CompanyScraper/parsers/rocket.py +133 -0
- flowtask/components/CompanyScraper/parsers/siccode.py +109 -0
- flowtask/components/CompanyScraper/parsers/visualvisitor.py +130 -0
- flowtask/components/CompanyScraper/parsers/zoominfo.py +118 -0
- flowtask/components/CompanyScraper/scrapper.py +1054 -0
- flowtask/components/CopyTo.py +177 -0
- flowtask/components/CopyToBigQuery.py +243 -0
- flowtask/components/CopyToMongoDB.py +291 -0
- flowtask/components/CopyToPg.py +609 -0
- flowtask/components/CopyToRethink.py +207 -0
- flowtask/components/CreateGCSBucket.py +102 -0
- flowtask/components/CreateReport/CreateReport.py +228 -0
- flowtask/components/CreateReport/__init__.py +9 -0
- flowtask/components/CreateReport/charts/__init__.py +15 -0
- flowtask/components/CreateReport/charts/bar.py +51 -0
- flowtask/components/CreateReport/charts/base.py +66 -0
- flowtask/components/CreateReport/charts/pie.py +64 -0
- flowtask/components/CreateReport/utils.py +9 -0
- flowtask/components/CustomerSatisfaction.py +196 -0
- flowtask/components/DataInput.py +200 -0
- flowtask/components/DateList.py +255 -0
- flowtask/components/DbClient.py +163 -0
- flowtask/components/DialPad.py +146 -0
- flowtask/components/DocumentDBQuery.py +200 -0
- flowtask/components/DownloadFrom.py +371 -0
- flowtask/components/DownloadFromD2L.py +113 -0
- flowtask/components/DownloadFromFTP.py +181 -0
- flowtask/components/DownloadFromIMAP.py +315 -0
- flowtask/components/DownloadFromS3.py +198 -0
- flowtask/components/DownloadFromSFTP.py +265 -0
- flowtask/components/DownloadFromSharepoint.py +110 -0
- flowtask/components/DownloadFromSmartSheet.py +114 -0
- flowtask/components/DownloadS3File.py +229 -0
- flowtask/components/Dummy.py +59 -0
- flowtask/components/DuplicatePhoto.py +411 -0
- flowtask/components/EmployeeEvaluation.py +237 -0
- flowtask/components/ExecuteSQL.py +323 -0
- flowtask/components/ExtractHTML.py +178 -0
- flowtask/components/FileBase.py +178 -0
- flowtask/components/FileCopy.py +181 -0
- flowtask/components/FileDelete.py +82 -0
- flowtask/components/FileExists.py +146 -0
- flowtask/components/FileIteratorDelete.py +112 -0
- flowtask/components/FileList.py +194 -0
- flowtask/components/FileOpen.py +75 -0
- flowtask/components/FileRead.py +120 -0
- flowtask/components/FileRename.py +106 -0
- flowtask/components/FilterIf.py +284 -0
- flowtask/components/FilterRows/FilterRows.py +200 -0
- flowtask/components/FilterRows/__init__.py +10 -0
- flowtask/components/FilterRows/functions.py +4 -0
- flowtask/components/GCSToBigQuery.py +103 -0
- flowtask/components/GoogleA4.py +150 -0
- flowtask/components/GoogleGeoCoding.py +344 -0
- flowtask/components/GooglePlaces.py +315 -0
- flowtask/components/GoogleSearch.py +539 -0
- flowtask/components/HTTPClient.py +268 -0
- flowtask/components/ICIMS.py +146 -0
- flowtask/components/IF.py +179 -0
- flowtask/components/IcimsFolderCopy.py +173 -0
- flowtask/components/ImageFeatures/__init__.py +5 -0
- flowtask/components/ImageFeatures/process.py +233 -0
- flowtask/components/IteratorBase.py +251 -0
- flowtask/components/LangchainLoader/__init__.py +5 -0
- flowtask/components/LangchainLoader/loader.py +194 -0
- flowtask/components/LangchainLoader/loaders/__init__.py +22 -0
- flowtask/components/LangchainLoader/loaders/abstract.py +362 -0
- flowtask/components/LangchainLoader/loaders/basepdf.py +50 -0
- flowtask/components/LangchainLoader/loaders/docx.py +91 -0
- flowtask/components/LangchainLoader/loaders/html.py +119 -0
- flowtask/components/LangchainLoader/loaders/pdfblocks.py +146 -0
- flowtask/components/LangchainLoader/loaders/pdfmark.py +79 -0
- flowtask/components/LangchainLoader/loaders/pdftables.py +135 -0
- flowtask/components/LangchainLoader/loaders/qa.py +67 -0
- flowtask/components/LangchainLoader/loaders/txt.py +55 -0
- flowtask/components/LeadIQ.py +650 -0
- flowtask/components/Loop.py +253 -0
- flowtask/components/Lowes.py +334 -0
- flowtask/components/MS365Usage.py +156 -0
- flowtask/components/MSTeamsMessages.py +320 -0
- flowtask/components/MarketClustering.py +1051 -0
- flowtask/components/MergeFiles.py +362 -0
- flowtask/components/MilvusOutput.py +87 -0
- flowtask/components/NearByStores.py +175 -0
- flowtask/components/NetworkNinja/__init__.py +6 -0
- flowtask/components/NetworkNinja/models/__init__.py +52 -0
- flowtask/components/NetworkNinja/models/abstract.py +177 -0
- flowtask/components/NetworkNinja/models/account.py +39 -0
- flowtask/components/NetworkNinja/models/client.py +19 -0
- flowtask/components/NetworkNinja/models/district.py +14 -0
- flowtask/components/NetworkNinja/models/events.py +101 -0
- flowtask/components/NetworkNinja/models/forms.py +499 -0
- flowtask/components/NetworkNinja/models/market.py +16 -0
- flowtask/components/NetworkNinja/models/organization.py +34 -0
- flowtask/components/NetworkNinja/models/photos.py +125 -0
- flowtask/components/NetworkNinja/models/project.py +44 -0
- flowtask/components/NetworkNinja/models/region.py +28 -0
- flowtask/components/NetworkNinja/models/store.py +203 -0
- flowtask/components/NetworkNinja/models/user.py +151 -0
- flowtask/components/NetworkNinja/router.py +854 -0
- flowtask/components/Odoo.py +175 -0
- flowtask/components/OdooInjector.py +192 -0
- flowtask/components/OpenFromXML.py +126 -0
- flowtask/components/OpenWeather.py +41 -0
- flowtask/components/OpenWithBase.py +616 -0
- flowtask/components/OpenWithPandas.py +715 -0
- flowtask/components/PGPDecrypt.py +199 -0
- flowtask/components/PandasIterator.py +187 -0
- flowtask/components/PandasToFile.py +189 -0
- flowtask/components/Paradox.py +339 -0
- flowtask/components/ParamIterator.py +117 -0
- flowtask/components/ParseHTML.py +84 -0
- flowtask/components/PlacerStores.py +249 -0
- flowtask/components/Pokemon.py +507 -0
- flowtask/components/PositiveBot.py +62 -0
- flowtask/components/PowerPointSlide.py +400 -0
- flowtask/components/PrintMessage.py +127 -0
- flowtask/components/ProductCompetitors/__init__.py +5 -0
- flowtask/components/ProductCompetitors/parsers/__init__.py +7 -0
- flowtask/components/ProductCompetitors/parsers/base.py +72 -0
- flowtask/components/ProductCompetitors/parsers/bestbuy.py +86 -0
- flowtask/components/ProductCompetitors/parsers/lowes.py +103 -0
- flowtask/components/ProductCompetitors/scrapper.py +155 -0
- flowtask/components/ProductCompliant.py +169 -0
- flowtask/components/ProductInfo/__init__.py +1 -0
- flowtask/components/ProductInfo/parsers/__init__.py +5 -0
- flowtask/components/ProductInfo/parsers/base.py +83 -0
- flowtask/components/ProductInfo/parsers/brother.py +97 -0
- flowtask/components/ProductInfo/parsers/canon.py +167 -0
- flowtask/components/ProductInfo/parsers/epson.py +118 -0
- flowtask/components/ProductInfo/parsers/hp.py +131 -0
- flowtask/components/ProductInfo/parsers/samsung.py +97 -0
- flowtask/components/ProductInfo/scraper.py +319 -0
- flowtask/components/ProductPricing.py +118 -0
- flowtask/components/QS.py +261 -0
- flowtask/components/QSBase.py +201 -0
- flowtask/components/QueryIterator.py +273 -0
- flowtask/components/QueryToInsert.py +327 -0
- flowtask/components/QueryToPandas.py +432 -0
- flowtask/components/RESTClient.py +195 -0
- flowtask/components/RethinkDBQuery.py +189 -0
- flowtask/components/Rsync.py +74 -0
- flowtask/components/RunSSH.py +59 -0
- flowtask/components/RunShell.py +71 -0
- flowtask/components/SalesForce.py +20 -0
- flowtask/components/SaveImageBank/__init__.py +257 -0
- flowtask/components/SchedulingVisits.py +592 -0
- flowtask/components/ScrapPage.py +216 -0
- flowtask/components/ScrapSearch.py +79 -0
- flowtask/components/SendNotify.py +257 -0
- flowtask/components/SentimentAnalysis.py +694 -0
- flowtask/components/ServiceScrapper/__init__.py +5 -0
- flowtask/components/ServiceScrapper/parsers/__init__.py +1 -0
- flowtask/components/ServiceScrapper/parsers/base.py +94 -0
- flowtask/components/ServiceScrapper/parsers/costco.py +93 -0
- flowtask/components/ServiceScrapper/scrapper.py +199 -0
- flowtask/components/SetVariables.py +156 -0
- flowtask/components/SubTask.py +182 -0
- flowtask/components/SuiteCRM.py +48 -0
- flowtask/components/Switch.py +175 -0
- flowtask/components/TableBase.py +148 -0
- flowtask/components/TableDelete.py +312 -0
- flowtask/components/TableInput.py +143 -0
- flowtask/components/TableOutput/TableOutput.py +384 -0
- flowtask/components/TableOutput/__init__.py +3 -0
- flowtask/components/TableSchema.py +534 -0
- flowtask/components/Target.py +223 -0
- flowtask/components/ThumbnailGenerator.py +156 -0
- flowtask/components/ToPandas.py +67 -0
- flowtask/components/TransformRows/TransformRows.py +507 -0
- flowtask/components/TransformRows/__init__.py +9 -0
- flowtask/components/TransformRows/functions.py +559 -0
- flowtask/components/TransposeRows.py +176 -0
- flowtask/components/UPCDatabase.py +86 -0
- flowtask/components/UnGzip.py +171 -0
- flowtask/components/Uncompress.py +172 -0
- flowtask/components/UniqueRows.py +126 -0
- flowtask/components/Unzip.py +107 -0
- flowtask/components/UpdateOperationalVars.py +147 -0
- flowtask/components/UploadTo.py +299 -0
- flowtask/components/UploadToS3.py +136 -0
- flowtask/components/UploadToSFTP.py +160 -0
- flowtask/components/UploadToSharepoint.py +205 -0
- flowtask/components/UserFunc.py +122 -0
- flowtask/components/VivaTracker.py +140 -0
- flowtask/components/WSDLClient.py +123 -0
- flowtask/components/Wait.py +18 -0
- flowtask/components/Walmart.py +199 -0
- flowtask/components/Workplace.py +134 -0
- flowtask/components/XMLToPandas.py +267 -0
- flowtask/components/Zammad/__init__.py +41 -0
- flowtask/components/Zammad/models.py +0 -0
- flowtask/components/ZoomInfoScraper.py +409 -0
- flowtask/components/__init__.py +104 -0
- flowtask/components/abstract.py +18 -0
- flowtask/components/flow.py +530 -0
- flowtask/components/google.py +335 -0
- flowtask/components/group.py +221 -0
- flowtask/components/py.typed +0 -0
- flowtask/components/reviewscrap.py +132 -0
- flowtask/components/tAutoincrement.py +117 -0
- flowtask/components/tConcat.py +109 -0
- flowtask/components/tExplode.py +119 -0
- flowtask/components/tFilter.py +184 -0
- flowtask/components/tGroup.py +236 -0
- flowtask/components/tJoin.py +270 -0
- flowtask/components/tMap/__init__.py +9 -0
- flowtask/components/tMap/functions.py +54 -0
- flowtask/components/tMap/tMap.py +450 -0
- flowtask/components/tMelt.py +112 -0
- flowtask/components/tMerge.py +114 -0
- flowtask/components/tOrder.py +93 -0
- flowtask/components/tPandas.py +94 -0
- flowtask/components/tPivot.py +71 -0
- flowtask/components/tPluckCols.py +76 -0
- flowtask/components/tUnnest.py +82 -0
- flowtask/components/user.py +401 -0
- flowtask/conf.py +457 -0
- flowtask/download.py +102 -0
- flowtask/events/__init__.py +11 -0
- flowtask/events/events/__init__.py +20 -0
- flowtask/events/events/abstract.py +95 -0
- flowtask/events/events/alerts/__init__.py +362 -0
- flowtask/events/events/alerts/colfunctions.py +131 -0
- flowtask/events/events/alerts/functions.py +158 -0
- flowtask/events/events/dummy.py +12 -0
- flowtask/events/events/exec.py +124 -0
- flowtask/events/events/file/__init__.py +7 -0
- flowtask/events/events/file/base.py +51 -0
- flowtask/events/events/file/copy.py +23 -0
- flowtask/events/events/file/delete.py +16 -0
- flowtask/events/events/interfaces/__init__.py +9 -0
- flowtask/events/events/interfaces/client.py +67 -0
- flowtask/events/events/interfaces/credentials.py +28 -0
- flowtask/events/events/interfaces/notifications.py +58 -0
- flowtask/events/events/jira.py +122 -0
- flowtask/events/events/log.py +26 -0
- flowtask/events/events/logerr.py +52 -0
- flowtask/events/events/notify.py +59 -0
- flowtask/events/events/notify_event.py +160 -0
- flowtask/events/events/publish.py +54 -0
- flowtask/events/events/sendfile.py +104 -0
- flowtask/events/events/task.py +97 -0
- flowtask/events/events/teams.py +98 -0
- flowtask/events/events/webhook.py +58 -0
- flowtask/events/manager.py +287 -0
- flowtask/exceptions.c +39393 -0
- flowtask/exceptions.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/extensions/__init__.py +3 -0
- flowtask/extensions/abstract.py +82 -0
- flowtask/extensions/logging/__init__.py +65 -0
- flowtask/hooks/__init__.py +9 -0
- flowtask/hooks/actions/__init__.py +22 -0
- flowtask/hooks/actions/abstract.py +66 -0
- flowtask/hooks/actions/dummy.py +23 -0
- flowtask/hooks/actions/jira.py +74 -0
- flowtask/hooks/actions/rest.py +320 -0
- flowtask/hooks/actions/sampledata.py +37 -0
- flowtask/hooks/actions/sensor.py +23 -0
- flowtask/hooks/actions/task.py +9 -0
- flowtask/hooks/actions/ticket.py +37 -0
- flowtask/hooks/actions/zammad.py +55 -0
- flowtask/hooks/hook.py +62 -0
- flowtask/hooks/models.py +17 -0
- flowtask/hooks/service.py +187 -0
- flowtask/hooks/step.py +91 -0
- flowtask/hooks/types/__init__.py +23 -0
- flowtask/hooks/types/base.py +129 -0
- flowtask/hooks/types/brokers/__init__.py +11 -0
- flowtask/hooks/types/brokers/base.py +54 -0
- flowtask/hooks/types/brokers/mqtt.py +35 -0
- flowtask/hooks/types/brokers/rabbitmq.py +82 -0
- flowtask/hooks/types/brokers/redis.py +83 -0
- flowtask/hooks/types/brokers/sqs.py +44 -0
- flowtask/hooks/types/fs.py +232 -0
- flowtask/hooks/types/http.py +49 -0
- flowtask/hooks/types/imap.py +200 -0
- flowtask/hooks/types/jira.py +279 -0
- flowtask/hooks/types/mail.py +205 -0
- flowtask/hooks/types/postgres.py +98 -0
- flowtask/hooks/types/responses/__init__.py +8 -0
- flowtask/hooks/types/responses/base.py +5 -0
- flowtask/hooks/types/sharepoint.py +288 -0
- flowtask/hooks/types/ssh.py +141 -0
- flowtask/hooks/types/tagged.py +59 -0
- flowtask/hooks/types/upload.py +85 -0
- flowtask/hooks/types/watch.py +71 -0
- flowtask/hooks/types/web.py +36 -0
- flowtask/interfaces/AzureClient.py +137 -0
- flowtask/interfaces/AzureGraph.py +839 -0
- flowtask/interfaces/Boto3Client.py +326 -0
- flowtask/interfaces/DropboxClient.py +173 -0
- flowtask/interfaces/ExcelHandler.py +94 -0
- flowtask/interfaces/FTPClient.py +131 -0
- flowtask/interfaces/GoogleCalendar.py +201 -0
- flowtask/interfaces/GoogleClient.py +133 -0
- flowtask/interfaces/GoogleDrive.py +127 -0
- flowtask/interfaces/GoogleGCS.py +89 -0
- flowtask/interfaces/GoogleGeocoding.py +93 -0
- flowtask/interfaces/GoogleLang.py +114 -0
- flowtask/interfaces/GooglePub.py +61 -0
- flowtask/interfaces/GoogleSheet.py +68 -0
- flowtask/interfaces/IMAPClient.py +137 -0
- flowtask/interfaces/O365Calendar.py +113 -0
- flowtask/interfaces/O365Client.py +220 -0
- flowtask/interfaces/OneDrive.py +284 -0
- flowtask/interfaces/Outlook.py +155 -0
- flowtask/interfaces/ParrotBot.py +130 -0
- flowtask/interfaces/SSHClient.py +378 -0
- flowtask/interfaces/Sharepoint.py +496 -0
- flowtask/interfaces/__init__.py +36 -0
- flowtask/interfaces/azureauth.py +119 -0
- flowtask/interfaces/cache.py +201 -0
- flowtask/interfaces/client.py +82 -0
- flowtask/interfaces/compress.py +525 -0
- flowtask/interfaces/credentials.py +124 -0
- flowtask/interfaces/d2l.py +239 -0
- flowtask/interfaces/databases/__init__.py +5 -0
- flowtask/interfaces/databases/db.py +223 -0
- flowtask/interfaces/databases/documentdb.py +55 -0
- flowtask/interfaces/databases/rethink.py +39 -0
- flowtask/interfaces/dataframes/__init__.py +11 -0
- flowtask/interfaces/dataframes/abstract.py +21 -0
- flowtask/interfaces/dataframes/arrow.py +71 -0
- flowtask/interfaces/dataframes/dt.py +69 -0
- flowtask/interfaces/dataframes/pandas.py +167 -0
- flowtask/interfaces/dataframes/polars.py +60 -0
- flowtask/interfaces/db.py +263 -0
- flowtask/interfaces/env.py +46 -0
- flowtask/interfaces/func.py +137 -0
- flowtask/interfaces/http.py +1780 -0
- flowtask/interfaces/locale.py +40 -0
- flowtask/interfaces/log.py +75 -0
- flowtask/interfaces/mask.py +143 -0
- flowtask/interfaces/notification.py +154 -0
- flowtask/interfaces/playwright.py +339 -0
- flowtask/interfaces/powerpoint.py +368 -0
- flowtask/interfaces/py.typed +0 -0
- flowtask/interfaces/qs.py +376 -0
- flowtask/interfaces/result.py +87 -0
- flowtask/interfaces/selenium_service.py +779 -0
- flowtask/interfaces/smartsheet.py +154 -0
- flowtask/interfaces/stat.py +39 -0
- flowtask/interfaces/task.py +96 -0
- flowtask/interfaces/template.py +118 -0
- flowtask/interfaces/vectorstores/__init__.py +1 -0
- flowtask/interfaces/vectorstores/abstract.py +133 -0
- flowtask/interfaces/vectorstores/milvus.py +669 -0
- flowtask/interfaces/zammad.py +107 -0
- flowtask/models.py +193 -0
- flowtask/parsers/__init__.py +15 -0
- flowtask/parsers/_yaml.c +11978 -0
- flowtask/parsers/_yaml.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/argparser.py +235 -0
- flowtask/parsers/base.c +15155 -0
- flowtask/parsers/base.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/json.c +11968 -0
- flowtask/parsers/json.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/parsers/maps.py +49 -0
- flowtask/parsers/toml.c +11968 -0
- flowtask/parsers/toml.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/plugins/__init__.py +16 -0
- flowtask/plugins/components/__init__.py +0 -0
- flowtask/plugins/handler/__init__.py +45 -0
- flowtask/plugins/importer.py +31 -0
- flowtask/plugins/sources/__init__.py +0 -0
- flowtask/runner.py +283 -0
- flowtask/scheduler/__init__.py +9 -0
- flowtask/scheduler/functions.py +493 -0
- flowtask/scheduler/handlers/__init__.py +8 -0
- flowtask/scheduler/handlers/manager.py +504 -0
- flowtask/scheduler/handlers/models.py +58 -0
- flowtask/scheduler/handlers/service.py +72 -0
- flowtask/scheduler/notifications.py +65 -0
- flowtask/scheduler/scheduler.py +993 -0
- flowtask/services/__init__.py +0 -0
- flowtask/services/bots/__init__.py +0 -0
- flowtask/services/bots/telegram.py +264 -0
- flowtask/services/files/__init__.py +11 -0
- flowtask/services/files/manager.py +522 -0
- flowtask/services/files/model.py +37 -0
- flowtask/services/files/service.py +767 -0
- flowtask/services/jira/__init__.py +3 -0
- flowtask/services/jira/jira_actions.py +191 -0
- flowtask/services/tasks/__init__.py +13 -0
- flowtask/services/tasks/launcher.py +213 -0
- flowtask/services/tasks/manager.py +323 -0
- flowtask/services/tasks/service.py +275 -0
- flowtask/services/tasks/task_manager.py +376 -0
- flowtask/services/tasks/tasks.py +155 -0
- flowtask/storages/__init__.py +16 -0
- flowtask/storages/exceptions.py +12 -0
- flowtask/storages/files/__init__.py +8 -0
- flowtask/storages/files/abstract.py +29 -0
- flowtask/storages/files/filesystem.py +66 -0
- flowtask/storages/tasks/__init__.py +19 -0
- flowtask/storages/tasks/abstract.py +26 -0
- flowtask/storages/tasks/database.py +33 -0
- flowtask/storages/tasks/filesystem.py +108 -0
- flowtask/storages/tasks/github.py +119 -0
- flowtask/storages/tasks/memory.py +45 -0
- flowtask/storages/tasks/row.py +25 -0
- flowtask/tasks/__init__.py +0 -0
- flowtask/tasks/abstract.py +526 -0
- flowtask/tasks/command.py +118 -0
- flowtask/tasks/pile.py +486 -0
- flowtask/tasks/py.typed +0 -0
- flowtask/tasks/task.py +778 -0
- flowtask/template/__init__.py +161 -0
- flowtask/tests.py +257 -0
- flowtask/types/__init__.py +8 -0
- flowtask/types/typedefs.c +11347 -0
- flowtask/types/typedefs.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/__init__.py +24 -0
- flowtask/utils/constants.py +117 -0
- flowtask/utils/encoders.py +21 -0
- flowtask/utils/executor.py +112 -0
- flowtask/utils/functions.cpp +14280 -0
- flowtask/utils/functions.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/json.cpp +13349 -0
- flowtask/utils/json.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/mail.py +63 -0
- flowtask/utils/parseqs.c +13324 -0
- flowtask/utils/parserqs.cpython-312-x86_64-linux-gnu.so +0 -0
- flowtask/utils/stats.py +308 -0
- flowtask/utils/transformations.py +74 -0
- flowtask/utils/uv.py +12 -0
- flowtask/utils/validators.py +97 -0
- flowtask/version.py +11 -0
- flowtask-5.8.4.dist-info/LICENSE +201 -0
- flowtask-5.8.4.dist-info/METADATA +209 -0
- flowtask-5.8.4.dist-info/RECORD +470 -0
- flowtask-5.8.4.dist-info/WHEEL +6 -0
- flowtask-5.8.4.dist-info/entry_points.txt +3 -0
- flowtask-5.8.4.dist-info/top_level.txt +2 -0
- plugins/components/CreateQR.py +39 -0
- plugins/components/TestComponent.py +28 -0
- plugins/components/Use1.py +13 -0
- plugins/components/Workplace.py +117 -0
- plugins/components/__init__.py +3 -0
- plugins/sources/__init__.py +0 -0
- plugins/sources/get_populartimes.py +78 -0
- plugins/sources/google.py +150 -0
- plugins/sources/hubspot.py +679 -0
- plugins/sources/icims.py +679 -0
- plugins/sources/mobileinsight.py +501 -0
- plugins/sources/newrelic.py +262 -0
- plugins/sources/uap.py +268 -0
- plugins/sources/venu.py +244 -0
- plugins/sources/vocinity.py +314 -0
@@ -0,0 +1,669 @@
|
|
1
|
+
from typing import List, Union, Optional, Any
|
2
|
+
from dataclasses import fields, is_dataclass
|
3
|
+
import asyncio
|
4
|
+
from pathlib import Path, PurePath
|
5
|
+
from fastavro import writer, reader, parse_schema
|
6
|
+
from pymilvus import (
|
7
|
+
MilvusClient,
|
8
|
+
Collection,
|
9
|
+
FieldSchema,
|
10
|
+
CollectionSchema,
|
11
|
+
DataType,
|
12
|
+
connections,
|
13
|
+
db
|
14
|
+
)
|
15
|
+
from pymilvus.exceptions import MilvusException
|
16
|
+
from langchain_milvus import Milvus # pylint: disable=import-error, E0611
|
17
|
+
from langchain.schema import Document
|
18
|
+
from ..credentials import CredentialsInterface
|
19
|
+
from ...conf import (
|
20
|
+
MILVUS_URL,
|
21
|
+
MILVUS_HOST,
|
22
|
+
MILVUS_PORT,
|
23
|
+
MILVUS_DATABASE,
|
24
|
+
MILVUS_PROTOCOL,
|
25
|
+
MILVUS_USER,
|
26
|
+
MILVUS_PASSWORD,
|
27
|
+
MILVUS_TOKEN,
|
28
|
+
MILVUS_SECURE,
|
29
|
+
MILVUS_SERVER_NAME,
|
30
|
+
MILVUS_CA_CERT,
|
31
|
+
MILVUS_SERVER_CERT,
|
32
|
+
MILVUS_SERVER_KEY,
|
33
|
+
MILVUS_USE_TLSv2
|
34
|
+
)
|
35
|
+
from .abstract import AbstractStore
|
36
|
+
|
37
|
+
|
38
|
+
class MilvusStore(AbstractStore, CredentialsInterface):
|
39
|
+
"""
|
40
|
+
Interface for managing document storage in Milvus using LangChain.
|
41
|
+
"""
|
42
|
+
_credentials: dict = {
|
43
|
+
"uri": str,
|
44
|
+
"host": str,
|
45
|
+
"port": int,
|
46
|
+
"user": str,
|
47
|
+
"password": str,
|
48
|
+
"token": str,
|
49
|
+
"db_name": str,
|
50
|
+
"collection_name": str,
|
51
|
+
}
|
52
|
+
|
53
|
+
def __init__(self, *args, **kwargs):
|
54
|
+
self.host = kwargs.pop("host", MILVUS_HOST)
|
55
|
+
self.port = kwargs.pop("port", MILVUS_PORT)
|
56
|
+
self.protocol = kwargs.pop("protocol", MILVUS_PROTOCOL)
|
57
|
+
self._secure: bool = kwargs.pop('secure', MILVUS_SECURE)
|
58
|
+
self.create_database: bool = kwargs.pop('create_database', True)
|
59
|
+
self.collection_name = kwargs.pop('collection_name', None)
|
60
|
+
self.consistency_level: str = kwargs.pop('consistency_level', 'Session')
|
61
|
+
super().__init__(*args, **kwargs)
|
62
|
+
|
63
|
+
def processing_credentials(self):
|
64
|
+
super().processing_credentials()
|
65
|
+
self.url = self.credentials.get('uri', MILVUS_URL)
|
66
|
+
if not self.url:
|
67
|
+
self.url = MILVUS_URL
|
68
|
+
self.host = self.credentials.get('host', self.host)
|
69
|
+
self.protocol = self.credentials.pop('protocol', self.protocol)
|
70
|
+
self.port = self.credentials.get('port', self.port)
|
71
|
+
self.collection_name = self.credentials.pop('collection_name', self.collection_name)
|
72
|
+
if not self.url:
|
73
|
+
self.url = f"{self.protocol}://{self.host}:{self.port}"
|
74
|
+
self.credentials['uri'] = self.url
|
75
|
+
else:
|
76
|
+
# Extract host and port from URL
|
77
|
+
if not self.host:
|
78
|
+
self.host = self.url.split("://")[-1].split(":")[0]
|
79
|
+
if not self.port:
|
80
|
+
self.port = int(self.url.split(":")[-1])
|
81
|
+
self.token = self.credentials.pop("token", MILVUS_TOKEN)
|
82
|
+
# user and password (if required)
|
83
|
+
self.user = self.credentials.pop("user", MILVUS_USER)
|
84
|
+
self.password = self.credentials.pop("password", MILVUS_PASSWORD)
|
85
|
+
# Database:
|
86
|
+
self.database = self.credentials.get('db_name', MILVUS_DATABASE)
|
87
|
+
# SSL/TLS
|
88
|
+
self._server_name: str = self.credentials.get('server_name', MILVUS_SERVER_NAME)
|
89
|
+
self._cert: str = self.credentials.pop('server_pem_path', MILVUS_SERVER_CERT)
|
90
|
+
self._ca_cert: str = self.credentials.pop('ca_pem_path', MILVUS_CA_CERT)
|
91
|
+
self._cert_key: str = self.credentials.pop('client_key_path', MILVUS_SERVER_KEY)
|
92
|
+
|
93
|
+
if self.token:
|
94
|
+
self.credentials['token'] = self.token
|
95
|
+
if self.user:
|
96
|
+
self.credentials['token'] = f"{self.user}:{self.password}"
|
97
|
+
if self._secure is True:
|
98
|
+
args = {
|
99
|
+
"secure": self._secure,
|
100
|
+
"server_name": self._server_name
|
101
|
+
}
|
102
|
+
if self._cert:
|
103
|
+
if MILVUS_USE_TLSv2 is True:
|
104
|
+
args['client_pem_path'] = self._cert
|
105
|
+
args['client_key_path'] = self._cert_key
|
106
|
+
else:
|
107
|
+
args["server_pem_path"] = self._cert
|
108
|
+
if self._ca_cert:
|
109
|
+
args['ca_pem_path'] = self._ca_cert
|
110
|
+
self.credentials = {**self.credentials, **args}
|
111
|
+
|
112
|
+
async def connect(self, alias: str = None) -> "MilvusStore":
|
113
|
+
"""Connects to the Milvus database."""
|
114
|
+
if not alias:
|
115
|
+
self._client_id = 'default'
|
116
|
+
else:
|
117
|
+
self._client_id = alias
|
118
|
+
_ = connections.connect(
|
119
|
+
alias=self._client_id,
|
120
|
+
**self.credentials
|
121
|
+
)
|
122
|
+
if self.database:
|
123
|
+
self.use_database(
|
124
|
+
self.database,
|
125
|
+
alias=self._client_id,
|
126
|
+
create=self.create_database
|
127
|
+
)
|
128
|
+
self._connection = MilvusClient(
|
129
|
+
**self.credentials
|
130
|
+
)
|
131
|
+
|
132
|
+
async def disconnect(self, alias: str = 'default'):
|
133
|
+
try:
|
134
|
+
connections.disconnect(alias=alias)
|
135
|
+
self._connection.close()
|
136
|
+
except AttributeError:
|
137
|
+
pass
|
138
|
+
finally:
|
139
|
+
self._connection = None
|
140
|
+
|
141
|
+
def use_database(
|
142
|
+
self,
|
143
|
+
db_name: str,
|
144
|
+
alias: str = 'default',
|
145
|
+
create: bool = False
|
146
|
+
) -> None:
|
147
|
+
try:
|
148
|
+
conn = connections.connect(alias, **self.credentials)
|
149
|
+
except MilvusException as exc:
|
150
|
+
if "database not found" in exc.message:
|
151
|
+
args = self.credentials.copy()
|
152
|
+
del args['db_name']
|
153
|
+
self.create_database(db_name, alias=alias, **args)
|
154
|
+
# re-connect:
|
155
|
+
try:
|
156
|
+
_ = connections.connect(alias, **self.credentials)
|
157
|
+
if db_name not in db.list_database(using=alias):
|
158
|
+
if self.create_database is True or create is True:
|
159
|
+
try:
|
160
|
+
db.create_database(db_name, using=alias, timeout=10)
|
161
|
+
self.logger.notice(
|
162
|
+
f"Database {db_name} created successfully."
|
163
|
+
)
|
164
|
+
except Exception as e:
|
165
|
+
raise ValueError(
|
166
|
+
f"Error creating database: {e}"
|
167
|
+
)
|
168
|
+
else:
|
169
|
+
raise ValueError(
|
170
|
+
f"Database {db_name} does not exist."
|
171
|
+
)
|
172
|
+
finally:
|
173
|
+
connections.disconnect(alias=alias)
|
174
|
+
|
175
|
+
def create_database(self, db_name: str, alias: str = 'default', **kwargs) -> bool:
|
176
|
+
args = {
|
177
|
+
"uri": self.url,
|
178
|
+
"host": self.host,
|
179
|
+
"port": self.port,
|
180
|
+
**kwargs
|
181
|
+
}
|
182
|
+
try:
|
183
|
+
conn = connections.connect(alias, **args)
|
184
|
+
db.create_database(db_name)
|
185
|
+
self.logger.notice(
|
186
|
+
f"Database {db_name} created successfully."
|
187
|
+
)
|
188
|
+
except Exception as e:
|
189
|
+
raise ValueError(
|
190
|
+
f"Error creating database: {e}"
|
191
|
+
)
|
192
|
+
finally:
|
193
|
+
connections.disconnect(alias=alias)
|
194
|
+
|
195
|
+
async def delete_documents_by_attr(
|
196
|
+
self,
|
197
|
+
collection_name: str,
|
198
|
+
attribute_name: str,
|
199
|
+
attribute_value: str
|
200
|
+
):
|
201
|
+
"""
|
202
|
+
Deletes documents in the Milvus collection that match a specific attribute.
|
203
|
+
|
204
|
+
This asynchronous method removes documents from a specified Milvus collection
|
205
|
+
where the given attribute matches the provided value.
|
206
|
+
|
207
|
+
Args:
|
208
|
+
collection_name (str): The name of the Milvus collection to delete from.
|
209
|
+
attribute_name (str): The name of the attribute to filter on.
|
210
|
+
attribute_value (str): The value of the attribute to match for deletion.
|
211
|
+
|
212
|
+
Raises:
|
213
|
+
Exception: If the deletion operation fails, the error is logged and re-raised.
|
214
|
+
|
215
|
+
Returns:
|
216
|
+
None
|
217
|
+
|
218
|
+
Note:
|
219
|
+
The method logs a notice with the number of deleted documents upon successful deletion.
|
220
|
+
"""
|
221
|
+
try:
|
222
|
+
async with self:
|
223
|
+
if self._connection is None:
|
224
|
+
print("Error: Not connected to Milvus. Please call connect() first.")
|
225
|
+
return
|
226
|
+
deleted = self._connection.delete(
|
227
|
+
collection_name=collection_name,
|
228
|
+
filter=f'{attribute_name} == "{attribute_value}"'
|
229
|
+
)
|
230
|
+
self.logger.notice(
|
231
|
+
f"Documents with {attribute_name} = {attribute_value} deleted: {deleted}"
|
232
|
+
)
|
233
|
+
except Exception as e:
|
234
|
+
self.logger.error(f"Failed to delete documents: {e}")
|
235
|
+
raise
|
236
|
+
|
237
|
+
async def load_documents(
|
238
|
+
self,
|
239
|
+
documents: List[Document],
|
240
|
+
upsert: Optional[bool] = True,
|
241
|
+
collection: str = None,
|
242
|
+
pk: str = 'source_type',
|
243
|
+
dimension: int = 768,
|
244
|
+
index_type: str = 'HNSW',
|
245
|
+
metric_type: str = 'L2',
|
246
|
+
**kwargs
|
247
|
+
):
|
248
|
+
"""
|
249
|
+
Loads LangChain documents into the Milvus collection.
|
250
|
+
|
251
|
+
Args:
|
252
|
+
documents (List[Document]): List of LangChain Document objects.
|
253
|
+
upsert (bool): If True, delete existing documents with matching attributes before inserting.
|
254
|
+
pk: str: If upsert True, Key to be used for deleting documents before inserting.
|
255
|
+
collection (str): Name of the collection.
|
256
|
+
"""
|
257
|
+
if not self._connection:
|
258
|
+
await self.connect()
|
259
|
+
|
260
|
+
if not collection:
|
261
|
+
collection = self.collection
|
262
|
+
|
263
|
+
# Add posibility of creating the Collection
|
264
|
+
# Ensure the collection exists before attempting deletes or inserts
|
265
|
+
if not await self.collection_exists(collection):
|
266
|
+
# Attempt to create the collection
|
267
|
+
await self.create_default_collection(
|
268
|
+
collection_name=collection,
|
269
|
+
dimension=dimension,
|
270
|
+
index_type=index_type,
|
271
|
+
metric_type=metric_type
|
272
|
+
)
|
273
|
+
|
274
|
+
if upsert is True:
|
275
|
+
# Delete documents with matching `category`
|
276
|
+
for doc in documents:
|
277
|
+
category = doc.metadata.get(pk)
|
278
|
+
if category:
|
279
|
+
await self.delete_documents_by_attr(collection, pk, category)
|
280
|
+
|
281
|
+
# Insert documents asynchronously
|
282
|
+
async with self:
|
283
|
+
if self._connection is None:
|
284
|
+
print("Error: Not connected to Milvus. Please call connect() first.")
|
285
|
+
return
|
286
|
+
print('Inserting documents ', documents[0])
|
287
|
+
docstore = await Milvus.afrom_documents(
|
288
|
+
documents,
|
289
|
+
embedding=self._embed_,
|
290
|
+
connection_args={**self.credentials},
|
291
|
+
collection_name=collection,
|
292
|
+
drop_old=False,
|
293
|
+
consistency_level=self.consistency_level,
|
294
|
+
primary_field='pk',
|
295
|
+
text_field=self.text_field,
|
296
|
+
vector_field=self.vector_field,
|
297
|
+
**kwargs
|
298
|
+
)
|
299
|
+
self.logger.info(
|
300
|
+
f"{len(documents)} Docs loaded into Milvus collection '{collection}': {docstore}"
|
301
|
+
)
|
302
|
+
return docstore, documents
|
303
|
+
|
304
|
+
async def collection_exists(self, collection_name: str) -> bool:
|
305
|
+
async with self:
|
306
|
+
collections = self._connection.list_collections()
|
307
|
+
return collection_name in collections
|
308
|
+
|
309
|
+
def check_state(self, collection_name: str) -> dict:
|
310
|
+
return self._connection.get_load_state(collection_name=collection_name)
|
311
|
+
|
312
|
+
async def delete_collection(self, collection: str = None) -> dict:
|
313
|
+
self._connection.drop_collection(
|
314
|
+
collection_name=collection
|
315
|
+
)
|
316
|
+
|
317
|
+
async def create_default_collection(
|
318
|
+
self,
|
319
|
+
collection_name: str,
|
320
|
+
document: Any = None,
|
321
|
+
dimension: int = 768,
|
322
|
+
index_type: str = None,
|
323
|
+
metric_type: str = None,
|
324
|
+
schema_type: str = 'default',
|
325
|
+
database: Optional[str] = None,
|
326
|
+
metadata_field: str = None,
|
327
|
+
**kwargs
|
328
|
+
) -> dict:
|
329
|
+
"""create_collection.
|
330
|
+
|
331
|
+
Create a Schema (Milvus Collection) on the Current Database.
|
332
|
+
|
333
|
+
Args:
|
334
|
+
collection_name (str): Collection Name.
|
335
|
+
document (Any): List of Documents.
|
336
|
+
dimension (int, optional): Vector Dimension. Defaults to 768.
|
337
|
+
index_type (str, optional): Default index type of Vector Field. Defaults to "HNSW".
|
338
|
+
metric_type (str, optional): Default Metric for Vector Index. Defaults to "L2".
|
339
|
+
schema_type (str, optional): Description of Model. Defaults to 'default'.
|
340
|
+
|
341
|
+
Returns:
|
342
|
+
dict: _description_
|
343
|
+
"""
|
344
|
+
# Check if collection exists:
|
345
|
+
if await self.collection_exists(collection_name):
|
346
|
+
self.logger.warning(
|
347
|
+
f"Collection {collection_name} already exists."
|
348
|
+
)
|
349
|
+
return None
|
350
|
+
|
351
|
+
if not database:
|
352
|
+
database = self.database
|
353
|
+
idx_params = {}
|
354
|
+
if not index_type:
|
355
|
+
index_type = self._index_type
|
356
|
+
if index_type == 'HNSW':
|
357
|
+
idx_params = {
|
358
|
+
"M": 36,
|
359
|
+
"efConstruction": 1024
|
360
|
+
}
|
361
|
+
elif index_type in ('IVF_FLAT', 'SCANN', 'IVF_SQ8'):
|
362
|
+
idx_params = {
|
363
|
+
"nlist": 1024
|
364
|
+
}
|
365
|
+
elif index_type in ('IVF_PQ'):
|
366
|
+
idx_params = {
|
367
|
+
"nlist": 1024,
|
368
|
+
"m": 16
|
369
|
+
}
|
370
|
+
if not metric_type:
|
371
|
+
metric_type = self._metric_type # default metric type
|
372
|
+
# print('::::::::::: HERE > ', index_type, idx_params, metric_type)
|
373
|
+
async with self:
|
374
|
+
if schema_type == 'default':
|
375
|
+
# Default Collection for all loaders:
|
376
|
+
schema = MilvusClient.create_schema(
|
377
|
+
auto_id=False,
|
378
|
+
enable_dynamic_field=True,
|
379
|
+
description=collection_name
|
380
|
+
)
|
381
|
+
schema.add_field(
|
382
|
+
field_name="pk",
|
383
|
+
datatype=DataType.INT64,
|
384
|
+
is_primary=True,
|
385
|
+
auto_id=True,
|
386
|
+
max_length=100
|
387
|
+
)
|
388
|
+
schema.add_field(
|
389
|
+
field_name="url",
|
390
|
+
datatype=DataType.VARCHAR,
|
391
|
+
max_length=65535
|
392
|
+
)
|
393
|
+
schema.add_field(
|
394
|
+
field_name="source",
|
395
|
+
datatype=DataType.VARCHAR,
|
396
|
+
max_length=65535
|
397
|
+
)
|
398
|
+
schema.add_field(
|
399
|
+
field_name="filename",
|
400
|
+
datatype=DataType.VARCHAR,
|
401
|
+
max_length=65535
|
402
|
+
)
|
403
|
+
schema.add_field(
|
404
|
+
field_name="question",
|
405
|
+
datatype=DataType.VARCHAR,
|
406
|
+
max_length=65535
|
407
|
+
)
|
408
|
+
schema.add_field(
|
409
|
+
field_name="answer",
|
410
|
+
datatype=DataType.VARCHAR,
|
411
|
+
max_length=65535
|
412
|
+
)
|
413
|
+
schema.add_field(
|
414
|
+
field_name="source_type",
|
415
|
+
datatype=DataType.VARCHAR,
|
416
|
+
max_length=128
|
417
|
+
)
|
418
|
+
schema.add_field(
|
419
|
+
field_name="type",
|
420
|
+
datatype=DataType.VARCHAR,
|
421
|
+
max_length=65535
|
422
|
+
)
|
423
|
+
schema.add_field(
|
424
|
+
field_name="category",
|
425
|
+
datatype=DataType.VARCHAR,
|
426
|
+
max_length=65535
|
427
|
+
)
|
428
|
+
schema.add_field(
|
429
|
+
field_name="text",
|
430
|
+
datatype=DataType.VARCHAR,
|
431
|
+
description="Text",
|
432
|
+
max_length=65535
|
433
|
+
)
|
434
|
+
schema.add_field(
|
435
|
+
field_name="summary",
|
436
|
+
datatype=DataType.VARCHAR,
|
437
|
+
description="Summary (refine resume)",
|
438
|
+
max_length=65535
|
439
|
+
)
|
440
|
+
schema.add_field(
|
441
|
+
field_name="vector",
|
442
|
+
datatype=DataType.FLOAT_VECTOR,
|
443
|
+
dim=dimension,
|
444
|
+
description="vector"
|
445
|
+
)
|
446
|
+
schema.add_field(
|
447
|
+
field_name="document_meta",
|
448
|
+
datatype=DataType.JSON,
|
449
|
+
description="Custom Metadata information"
|
450
|
+
)
|
451
|
+
index_params = self._connection.prepare_index_params()
|
452
|
+
index_params.add_index(
|
453
|
+
field_name="pk",
|
454
|
+
index_type="STL_SORT"
|
455
|
+
)
|
456
|
+
index_params.add_index(
|
457
|
+
field_name="text",
|
458
|
+
index_type="marisa-trie"
|
459
|
+
)
|
460
|
+
index_params.add_index(
|
461
|
+
field_name="summary",
|
462
|
+
index_type="marisa-trie"
|
463
|
+
)
|
464
|
+
index_params.add_index(
|
465
|
+
field_name="vector",
|
466
|
+
index_type=index_type,
|
467
|
+
metric_type=metric_type,
|
468
|
+
params=idx_params
|
469
|
+
)
|
470
|
+
self._connection.create_collection(
|
471
|
+
collection_name=collection_name,
|
472
|
+
schema=schema,
|
473
|
+
index_params=index_params,
|
474
|
+
num_shards=2
|
475
|
+
)
|
476
|
+
await asyncio.sleep(2)
|
477
|
+
self._connection.get_load_state(
|
478
|
+
collection_name=collection_name
|
479
|
+
)
|
480
|
+
return None
|
481
|
+
else:
|
482
|
+
# Create a Collection based on a Document
|
483
|
+
self._connection.create_collection(
|
484
|
+
collection_name=collection_name,
|
485
|
+
dimension=dimension
|
486
|
+
)
|
487
|
+
if metadata_field:
|
488
|
+
kwargs['metadata_field'] = metadata_field
|
489
|
+
# Here using drop_old=True to force recreate based on the first document
|
490
|
+
docstore = Milvus.from_documents(
|
491
|
+
[document], # Only the first document
|
492
|
+
self._embed_,
|
493
|
+
connection_args={**self.kwargs},
|
494
|
+
collection_name=collection_name,
|
495
|
+
drop_old=True,
|
496
|
+
consistency_level='Session',
|
497
|
+
primary_field='pk',
|
498
|
+
text_field='text',
|
499
|
+
vector_field='vector',
|
500
|
+
**kwargs
|
501
|
+
)
|
502
|
+
return docstore
|
503
|
+
|
504
|
+
def _minimal_schema(self, dimension: int) -> List[FieldSchema]:
|
505
|
+
"""Defines a minimal schema with basic fields."""
|
506
|
+
return [
|
507
|
+
FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=True),
|
508
|
+
FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=65535),
|
509
|
+
FieldSchema(name="source_type", dtype=DataType.VARCHAR, max_length=128),
|
510
|
+
FieldSchema(name="category", dtype=DataType.VARCHAR, max_length=128),
|
511
|
+
FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=dimension),
|
512
|
+
]
|
513
|
+
|
514
|
+
async def create_collection(
|
515
|
+
self,
|
516
|
+
collection_name: str,
|
517
|
+
origin: Union[str, Path, Any] = None,
|
518
|
+
dimension: int = 768,
|
519
|
+
index_type: str = "HNSW",
|
520
|
+
metric_type: str = "L2",
|
521
|
+
**kwargs
|
522
|
+
) -> dict:
|
523
|
+
"""
|
524
|
+
Create a Milvus collection based on an origin.
|
525
|
+
|
526
|
+
Args:
|
527
|
+
collection_name (str): Name of the Milvus collection.
|
528
|
+
origin (Union[str, Path, Any]): None for minimal schema, Path for Avro file, or dataclass for schema.
|
529
|
+
dimension (int, optional): Dimension of the vector field. Defaults to 768.
|
530
|
+
index_type (str, optional): Index type for vector field. Defaults to "HNSW".
|
531
|
+
metric_type (str, optional): Metric type for vector index. Defaults to "L2".
|
532
|
+
|
533
|
+
Returns:
|
534
|
+
dict: Result of the collection creation.
|
535
|
+
"""
|
536
|
+
if await self.collection_exists(collection_name):
|
537
|
+
self.logger.warning(
|
538
|
+
f"Collection {collection_name} already exists."
|
539
|
+
)
|
540
|
+
return None
|
541
|
+
idx_params = {}
|
542
|
+
if not index_type:
|
543
|
+
index_type = self._index_type
|
544
|
+
if index_type == 'HNSW':
|
545
|
+
idx_params = {
|
546
|
+
"M": 36,
|
547
|
+
"efConstruction": 1024
|
548
|
+
}
|
549
|
+
elif index_type in ('IVF_FLAT', 'SCANN', 'IVF_SQ8'):
|
550
|
+
idx_params = {
|
551
|
+
"nlist": 1024
|
552
|
+
}
|
553
|
+
elif index_type in ('IVF_PQ'):
|
554
|
+
idx_params = {
|
555
|
+
"nlist": 1024,
|
556
|
+
"m": 16
|
557
|
+
}
|
558
|
+
|
559
|
+
_fields = []
|
560
|
+
|
561
|
+
if origin is None:
|
562
|
+
# Define minimal schema with basic fields
|
563
|
+
_fields = self._minimal_schema(dimension)
|
564
|
+
elif is_dataclass(origin):
|
565
|
+
# Define schema based on dataclass fields
|
566
|
+
_fields = self._as_dataclass_schema(origin)
|
567
|
+
elif isinstance(origin, (PurePath, str)):
|
568
|
+
if isinstance(origin, str):
|
569
|
+
origin = Path(origin).resolve()
|
570
|
+
_fields = self._as_avro_schema(origin)
|
571
|
+
|
572
|
+
# Create the collection schema and collection
|
573
|
+
schema = CollectionSchema(
|
574
|
+
_fields,
|
575
|
+
description=f"Schema for {collection_name}"
|
576
|
+
)
|
577
|
+
await self._create_milvus_collection(
|
578
|
+
collection_name,
|
579
|
+
schema, index_type, metric_type, idx_params)
|
580
|
+
return {
|
581
|
+
"collection_name": collection_name,
|
582
|
+
"status": "created"
|
583
|
+
}
|
584
|
+
|
585
|
+
def _as_dataclass_schema(self, dataclass_type: Any) -> List[FieldSchema]:
|
586
|
+
"""Defines fields for a Milvus collection based on dataclass attributes."""
|
587
|
+
_fields = []
|
588
|
+
for field in fields(dataclass_type):
|
589
|
+
field_name = field.name
|
590
|
+
field_type = field.type
|
591
|
+
if field_type == str:
|
592
|
+
size = getattr(field, 'metadata', {}).get('size', 65535)
|
593
|
+
_fields.append(
|
594
|
+
FieldSchema(name=field_name, dtype=DataType.VARCHAR, max_length=size)
|
595
|
+
)
|
596
|
+
elif field_type == int:
|
597
|
+
_fields.append(
|
598
|
+
FieldSchema(name=field_name, dtype=DataType.INT64)
|
599
|
+
)
|
600
|
+
elif field_type == float:
|
601
|
+
_fields.append(FieldSchema(name=field_name, dtype=DataType.FLOAT))
|
602
|
+
elif field_type == bytes:
|
603
|
+
# Assume bytes field indicates a vector; specify dimension in metadata
|
604
|
+
dim = getattr(field, 'metadata', {}).get('dim', 768)
|
605
|
+
_fields.append(
|
606
|
+
FieldSchema(name=field_name, dtype=DataType.FLOAT_VECTOR, dim=dim)
|
607
|
+
)
|
608
|
+
elif field_type == bool:
|
609
|
+
_fields.append(
|
610
|
+
FieldSchema(name=field_name, dtype=DataType.BOOL)
|
611
|
+
)
|
612
|
+
else:
|
613
|
+
print(
|
614
|
+
f"Unsupported field type for dataclass field {field_name}: {field_type}"
|
615
|
+
)
|
616
|
+
return _fields
|
617
|
+
|
618
|
+
async def _parse_avro_schema(self, avro_file: Path, dimension: int) -> List[FieldSchema]:
|
619
|
+
"""Parses an Avro schema file to define Milvus collection fields."""
|
620
|
+
fields = []
|
621
|
+
try:
|
622
|
+
schema = parse_schema(open(avro_file, "r").read())
|
623
|
+
for field in schema.fields:
|
624
|
+
field_name = field.name
|
625
|
+
field_type = field.type
|
626
|
+
if field_type == "string":
|
627
|
+
fields.append(FieldSchema(name=field_name, dtype=DataType.VARCHAR, max_length=65535))
|
628
|
+
elif field_type == "int" or field_type == "long":
|
629
|
+
fields.append(FieldSchema(name=field_name, dtype=DataType.INT64))
|
630
|
+
elif field_type == "float" or field_type == "double":
|
631
|
+
fields.append(FieldSchema(name=field_name, dtype=DataType.FLOAT))
|
632
|
+
elif field_type == "bytes":
|
633
|
+
fields.append(FieldSchema(name=field_name, dtype=DataType.FLOAT_VECTOR, dim=dimension))
|
634
|
+
elif field_type == "boolean":
|
635
|
+
fields.append(FieldSchema(name=field_name, dtype=DataType.BOOL))
|
636
|
+
else:
|
637
|
+
print(f"Unsupported field type: {field_type}")
|
638
|
+
except Exception as e:
|
639
|
+
print(f"Failed to parse Avro schema: {e}")
|
640
|
+
return fields
|
641
|
+
|
642
|
+
async def _create_milvus_collection(
|
643
|
+
self,
|
644
|
+
collection_name: str,
|
645
|
+
schema: CollectionSchema,
|
646
|
+
index_type: str,
|
647
|
+
metric_type: str,
|
648
|
+
idx_params: dict
|
649
|
+
):
|
650
|
+
"""Creates a collection with the given schema in Milvus."""
|
651
|
+
index_params = {
|
652
|
+
"index_type": index_type,
|
653
|
+
"metric_type": metric_type,
|
654
|
+
"params": idx_params
|
655
|
+
}
|
656
|
+
try:
|
657
|
+
collection = Collection(
|
658
|
+
name=collection_name,
|
659
|
+
schema=schema,
|
660
|
+
num_shards=2
|
661
|
+
)
|
662
|
+
collection.create_index(field_name="vector", index_params=index_params)
|
663
|
+
self.logger.debug(
|
664
|
+
f"Created collection '{collection_name}' with schema: {schema}"
|
665
|
+
)
|
666
|
+
except Exception as e:
|
667
|
+
self.logger.error(
|
668
|
+
f"Failed to create collection '{collection_name}': {e}"
|
669
|
+
)
|