PraisonAI 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (393) hide show
  1. praisonai/__init__.py +54 -0
  2. praisonai/__main__.py +15 -0
  3. praisonai/acp/__init__.py +54 -0
  4. praisonai/acp/config.py +159 -0
  5. praisonai/acp/server.py +587 -0
  6. praisonai/acp/session.py +219 -0
  7. praisonai/adapters/__init__.py +50 -0
  8. praisonai/adapters/readers.py +395 -0
  9. praisonai/adapters/rerankers.py +315 -0
  10. praisonai/adapters/retrievers.py +394 -0
  11. praisonai/adapters/vector_stores.py +409 -0
  12. praisonai/agent_scheduler.py +337 -0
  13. praisonai/agents_generator.py +903 -0
  14. praisonai/api/call.py +292 -0
  15. praisonai/auto.py +1197 -0
  16. praisonai/capabilities/__init__.py +275 -0
  17. praisonai/capabilities/a2a.py +140 -0
  18. praisonai/capabilities/assistants.py +283 -0
  19. praisonai/capabilities/audio.py +320 -0
  20. praisonai/capabilities/batches.py +469 -0
  21. praisonai/capabilities/completions.py +336 -0
  22. praisonai/capabilities/container_files.py +155 -0
  23. praisonai/capabilities/containers.py +93 -0
  24. praisonai/capabilities/embeddings.py +158 -0
  25. praisonai/capabilities/files.py +467 -0
  26. praisonai/capabilities/fine_tuning.py +293 -0
  27. praisonai/capabilities/guardrails.py +182 -0
  28. praisonai/capabilities/images.py +330 -0
  29. praisonai/capabilities/mcp.py +190 -0
  30. praisonai/capabilities/messages.py +270 -0
  31. praisonai/capabilities/moderations.py +154 -0
  32. praisonai/capabilities/ocr.py +217 -0
  33. praisonai/capabilities/passthrough.py +204 -0
  34. praisonai/capabilities/rag.py +207 -0
  35. praisonai/capabilities/realtime.py +160 -0
  36. praisonai/capabilities/rerank.py +165 -0
  37. praisonai/capabilities/responses.py +266 -0
  38. praisonai/capabilities/search.py +109 -0
  39. praisonai/capabilities/skills.py +133 -0
  40. praisonai/capabilities/vector_store_files.py +334 -0
  41. praisonai/capabilities/vector_stores.py +304 -0
  42. praisonai/capabilities/videos.py +141 -0
  43. praisonai/chainlit_ui.py +304 -0
  44. praisonai/chat/__init__.py +106 -0
  45. praisonai/chat/app.py +125 -0
  46. praisonai/cli/__init__.py +26 -0
  47. praisonai/cli/app.py +213 -0
  48. praisonai/cli/commands/__init__.py +75 -0
  49. praisonai/cli/commands/acp.py +70 -0
  50. praisonai/cli/commands/completion.py +333 -0
  51. praisonai/cli/commands/config.py +166 -0
  52. praisonai/cli/commands/debug.py +142 -0
  53. praisonai/cli/commands/diag.py +55 -0
  54. praisonai/cli/commands/doctor.py +166 -0
  55. praisonai/cli/commands/environment.py +179 -0
  56. praisonai/cli/commands/lsp.py +112 -0
  57. praisonai/cli/commands/mcp.py +210 -0
  58. praisonai/cli/commands/profile.py +457 -0
  59. praisonai/cli/commands/run.py +228 -0
  60. praisonai/cli/commands/schedule.py +150 -0
  61. praisonai/cli/commands/serve.py +97 -0
  62. praisonai/cli/commands/session.py +212 -0
  63. praisonai/cli/commands/traces.py +145 -0
  64. praisonai/cli/commands/version.py +101 -0
  65. praisonai/cli/configuration/__init__.py +18 -0
  66. praisonai/cli/configuration/loader.py +353 -0
  67. praisonai/cli/configuration/paths.py +114 -0
  68. praisonai/cli/configuration/schema.py +164 -0
  69. praisonai/cli/features/__init__.py +268 -0
  70. praisonai/cli/features/acp.py +236 -0
  71. praisonai/cli/features/action_orchestrator.py +546 -0
  72. praisonai/cli/features/agent_scheduler.py +773 -0
  73. praisonai/cli/features/agent_tools.py +474 -0
  74. praisonai/cli/features/agents.py +375 -0
  75. praisonai/cli/features/at_mentions.py +471 -0
  76. praisonai/cli/features/auto_memory.py +182 -0
  77. praisonai/cli/features/autonomy_mode.py +490 -0
  78. praisonai/cli/features/background.py +356 -0
  79. praisonai/cli/features/base.py +168 -0
  80. praisonai/cli/features/capabilities.py +1326 -0
  81. praisonai/cli/features/checkpoints.py +338 -0
  82. praisonai/cli/features/code_intelligence.py +652 -0
  83. praisonai/cli/features/compaction.py +294 -0
  84. praisonai/cli/features/compare.py +534 -0
  85. praisonai/cli/features/cost_tracker.py +514 -0
  86. praisonai/cli/features/debug.py +810 -0
  87. praisonai/cli/features/deploy.py +517 -0
  88. praisonai/cli/features/diag.py +289 -0
  89. praisonai/cli/features/doctor/__init__.py +63 -0
  90. praisonai/cli/features/doctor/checks/__init__.py +24 -0
  91. praisonai/cli/features/doctor/checks/acp_checks.py +240 -0
  92. praisonai/cli/features/doctor/checks/config_checks.py +366 -0
  93. praisonai/cli/features/doctor/checks/db_checks.py +366 -0
  94. praisonai/cli/features/doctor/checks/env_checks.py +543 -0
  95. praisonai/cli/features/doctor/checks/lsp_checks.py +199 -0
  96. praisonai/cli/features/doctor/checks/mcp_checks.py +349 -0
  97. praisonai/cli/features/doctor/checks/memory_checks.py +268 -0
  98. praisonai/cli/features/doctor/checks/network_checks.py +251 -0
  99. praisonai/cli/features/doctor/checks/obs_checks.py +328 -0
  100. praisonai/cli/features/doctor/checks/performance_checks.py +235 -0
  101. praisonai/cli/features/doctor/checks/permissions_checks.py +259 -0
  102. praisonai/cli/features/doctor/checks/selftest_checks.py +322 -0
  103. praisonai/cli/features/doctor/checks/serve_checks.py +426 -0
  104. praisonai/cli/features/doctor/checks/skills_checks.py +231 -0
  105. praisonai/cli/features/doctor/checks/tools_checks.py +371 -0
  106. praisonai/cli/features/doctor/engine.py +266 -0
  107. praisonai/cli/features/doctor/formatters.py +310 -0
  108. praisonai/cli/features/doctor/handler.py +397 -0
  109. praisonai/cli/features/doctor/models.py +264 -0
  110. praisonai/cli/features/doctor/registry.py +239 -0
  111. praisonai/cli/features/endpoints.py +1019 -0
  112. praisonai/cli/features/eval.py +560 -0
  113. praisonai/cli/features/external_agents.py +231 -0
  114. praisonai/cli/features/fast_context.py +410 -0
  115. praisonai/cli/features/flow_display.py +566 -0
  116. praisonai/cli/features/git_integration.py +651 -0
  117. praisonai/cli/features/guardrail.py +171 -0
  118. praisonai/cli/features/handoff.py +185 -0
  119. praisonai/cli/features/hooks.py +583 -0
  120. praisonai/cli/features/image.py +384 -0
  121. praisonai/cli/features/interactive_runtime.py +585 -0
  122. praisonai/cli/features/interactive_tools.py +380 -0
  123. praisonai/cli/features/interactive_tui.py +603 -0
  124. praisonai/cli/features/jobs.py +632 -0
  125. praisonai/cli/features/knowledge.py +531 -0
  126. praisonai/cli/features/lite.py +244 -0
  127. praisonai/cli/features/lsp_cli.py +225 -0
  128. praisonai/cli/features/mcp.py +169 -0
  129. praisonai/cli/features/message_queue.py +587 -0
  130. praisonai/cli/features/metrics.py +211 -0
  131. praisonai/cli/features/n8n.py +673 -0
  132. praisonai/cli/features/observability.py +293 -0
  133. praisonai/cli/features/ollama.py +361 -0
  134. praisonai/cli/features/output_style.py +273 -0
  135. praisonai/cli/features/package.py +631 -0
  136. praisonai/cli/features/performance.py +308 -0
  137. praisonai/cli/features/persistence.py +636 -0
  138. praisonai/cli/features/profile.py +226 -0
  139. praisonai/cli/features/profiler/__init__.py +81 -0
  140. praisonai/cli/features/profiler/core.py +558 -0
  141. praisonai/cli/features/profiler/optimizations.py +652 -0
  142. praisonai/cli/features/profiler/suite.py +386 -0
  143. praisonai/cli/features/profiling.py +350 -0
  144. praisonai/cli/features/queue/__init__.py +73 -0
  145. praisonai/cli/features/queue/manager.py +395 -0
  146. praisonai/cli/features/queue/models.py +286 -0
  147. praisonai/cli/features/queue/persistence.py +564 -0
  148. praisonai/cli/features/queue/scheduler.py +484 -0
  149. praisonai/cli/features/queue/worker.py +372 -0
  150. praisonai/cli/features/recipe.py +1723 -0
  151. praisonai/cli/features/recipes.py +449 -0
  152. praisonai/cli/features/registry.py +229 -0
  153. praisonai/cli/features/repo_map.py +860 -0
  154. praisonai/cli/features/router.py +466 -0
  155. praisonai/cli/features/sandbox_executor.py +515 -0
  156. praisonai/cli/features/serve.py +829 -0
  157. praisonai/cli/features/session.py +222 -0
  158. praisonai/cli/features/skills.py +856 -0
  159. praisonai/cli/features/slash_commands.py +650 -0
  160. praisonai/cli/features/telemetry.py +179 -0
  161. praisonai/cli/features/templates.py +1384 -0
  162. praisonai/cli/features/thinking.py +305 -0
  163. praisonai/cli/features/todo.py +334 -0
  164. praisonai/cli/features/tools.py +680 -0
  165. praisonai/cli/features/tui/__init__.py +83 -0
  166. praisonai/cli/features/tui/app.py +580 -0
  167. praisonai/cli/features/tui/cli.py +566 -0
  168. praisonai/cli/features/tui/debug.py +511 -0
  169. praisonai/cli/features/tui/events.py +99 -0
  170. praisonai/cli/features/tui/mock_provider.py +328 -0
  171. praisonai/cli/features/tui/orchestrator.py +652 -0
  172. praisonai/cli/features/tui/screens/__init__.py +50 -0
  173. praisonai/cli/features/tui/screens/main.py +245 -0
  174. praisonai/cli/features/tui/screens/queue.py +174 -0
  175. praisonai/cli/features/tui/screens/session.py +124 -0
  176. praisonai/cli/features/tui/screens/settings.py +148 -0
  177. praisonai/cli/features/tui/widgets/__init__.py +56 -0
  178. praisonai/cli/features/tui/widgets/chat.py +261 -0
  179. praisonai/cli/features/tui/widgets/composer.py +224 -0
  180. praisonai/cli/features/tui/widgets/queue_panel.py +200 -0
  181. praisonai/cli/features/tui/widgets/status.py +167 -0
  182. praisonai/cli/features/tui/widgets/tool_panel.py +248 -0
  183. praisonai/cli/features/workflow.py +720 -0
  184. praisonai/cli/legacy.py +236 -0
  185. praisonai/cli/main.py +5559 -0
  186. praisonai/cli/schedule_cli.py +54 -0
  187. praisonai/cli/state/__init__.py +31 -0
  188. praisonai/cli/state/identifiers.py +161 -0
  189. praisonai/cli/state/sessions.py +313 -0
  190. praisonai/code/__init__.py +93 -0
  191. praisonai/code/agent_tools.py +344 -0
  192. praisonai/code/diff/__init__.py +21 -0
  193. praisonai/code/diff/diff_strategy.py +432 -0
  194. praisonai/code/tools/__init__.py +27 -0
  195. praisonai/code/tools/apply_diff.py +221 -0
  196. praisonai/code/tools/execute_command.py +275 -0
  197. praisonai/code/tools/list_files.py +274 -0
  198. praisonai/code/tools/read_file.py +206 -0
  199. praisonai/code/tools/search_replace.py +248 -0
  200. praisonai/code/tools/write_file.py +217 -0
  201. praisonai/code/utils/__init__.py +46 -0
  202. praisonai/code/utils/file_utils.py +307 -0
  203. praisonai/code/utils/ignore_utils.py +308 -0
  204. praisonai/code/utils/text_utils.py +276 -0
  205. praisonai/db/__init__.py +64 -0
  206. praisonai/db/adapter.py +531 -0
  207. praisonai/deploy/__init__.py +62 -0
  208. praisonai/deploy/api.py +231 -0
  209. praisonai/deploy/docker.py +454 -0
  210. praisonai/deploy/doctor.py +367 -0
  211. praisonai/deploy/main.py +327 -0
  212. praisonai/deploy/models.py +179 -0
  213. praisonai/deploy/providers/__init__.py +33 -0
  214. praisonai/deploy/providers/aws.py +331 -0
  215. praisonai/deploy/providers/azure.py +358 -0
  216. praisonai/deploy/providers/base.py +101 -0
  217. praisonai/deploy/providers/gcp.py +314 -0
  218. praisonai/deploy/schema.py +208 -0
  219. praisonai/deploy.py +185 -0
  220. praisonai/endpoints/__init__.py +53 -0
  221. praisonai/endpoints/a2u_server.py +410 -0
  222. praisonai/endpoints/discovery.py +165 -0
  223. praisonai/endpoints/providers/__init__.py +28 -0
  224. praisonai/endpoints/providers/a2a.py +253 -0
  225. praisonai/endpoints/providers/a2u.py +208 -0
  226. praisonai/endpoints/providers/agents_api.py +171 -0
  227. praisonai/endpoints/providers/base.py +231 -0
  228. praisonai/endpoints/providers/mcp.py +263 -0
  229. praisonai/endpoints/providers/recipe.py +206 -0
  230. praisonai/endpoints/providers/tools_mcp.py +150 -0
  231. praisonai/endpoints/registry.py +131 -0
  232. praisonai/endpoints/server.py +161 -0
  233. praisonai/inbuilt_tools/__init__.py +24 -0
  234. praisonai/inbuilt_tools/autogen_tools.py +117 -0
  235. praisonai/inc/__init__.py +2 -0
  236. praisonai/inc/config.py +96 -0
  237. praisonai/inc/models.py +155 -0
  238. praisonai/integrations/__init__.py +56 -0
  239. praisonai/integrations/base.py +303 -0
  240. praisonai/integrations/claude_code.py +270 -0
  241. praisonai/integrations/codex_cli.py +255 -0
  242. praisonai/integrations/cursor_cli.py +195 -0
  243. praisonai/integrations/gemini_cli.py +222 -0
  244. praisonai/jobs/__init__.py +67 -0
  245. praisonai/jobs/executor.py +425 -0
  246. praisonai/jobs/models.py +230 -0
  247. praisonai/jobs/router.py +314 -0
  248. praisonai/jobs/server.py +186 -0
  249. praisonai/jobs/store.py +203 -0
  250. praisonai/llm/__init__.py +66 -0
  251. praisonai/llm/registry.py +382 -0
  252. praisonai/mcp_server/__init__.py +152 -0
  253. praisonai/mcp_server/adapters/__init__.py +74 -0
  254. praisonai/mcp_server/adapters/agents.py +128 -0
  255. praisonai/mcp_server/adapters/capabilities.py +168 -0
  256. praisonai/mcp_server/adapters/cli_tools.py +568 -0
  257. praisonai/mcp_server/adapters/extended_capabilities.py +462 -0
  258. praisonai/mcp_server/adapters/knowledge.py +93 -0
  259. praisonai/mcp_server/adapters/memory.py +104 -0
  260. praisonai/mcp_server/adapters/prompts.py +306 -0
  261. praisonai/mcp_server/adapters/resources.py +124 -0
  262. praisonai/mcp_server/adapters/tools_bridge.py +280 -0
  263. praisonai/mcp_server/auth/__init__.py +48 -0
  264. praisonai/mcp_server/auth/api_key.py +291 -0
  265. praisonai/mcp_server/auth/oauth.py +460 -0
  266. praisonai/mcp_server/auth/oidc.py +289 -0
  267. praisonai/mcp_server/auth/scopes.py +260 -0
  268. praisonai/mcp_server/cli.py +852 -0
  269. praisonai/mcp_server/elicitation.py +445 -0
  270. praisonai/mcp_server/icons.py +302 -0
  271. praisonai/mcp_server/recipe_adapter.py +573 -0
  272. praisonai/mcp_server/recipe_cli.py +824 -0
  273. praisonai/mcp_server/registry.py +703 -0
  274. praisonai/mcp_server/sampling.py +422 -0
  275. praisonai/mcp_server/server.py +490 -0
  276. praisonai/mcp_server/tasks.py +443 -0
  277. praisonai/mcp_server/transports/__init__.py +18 -0
  278. praisonai/mcp_server/transports/http_stream.py +376 -0
  279. praisonai/mcp_server/transports/stdio.py +132 -0
  280. praisonai/persistence/__init__.py +84 -0
  281. praisonai/persistence/config.py +238 -0
  282. praisonai/persistence/conversation/__init__.py +25 -0
  283. praisonai/persistence/conversation/async_mysql.py +427 -0
  284. praisonai/persistence/conversation/async_postgres.py +410 -0
  285. praisonai/persistence/conversation/async_sqlite.py +371 -0
  286. praisonai/persistence/conversation/base.py +151 -0
  287. praisonai/persistence/conversation/json_store.py +250 -0
  288. praisonai/persistence/conversation/mysql.py +387 -0
  289. praisonai/persistence/conversation/postgres.py +401 -0
  290. praisonai/persistence/conversation/singlestore.py +240 -0
  291. praisonai/persistence/conversation/sqlite.py +341 -0
  292. praisonai/persistence/conversation/supabase.py +203 -0
  293. praisonai/persistence/conversation/surrealdb.py +287 -0
  294. praisonai/persistence/factory.py +301 -0
  295. praisonai/persistence/hooks/__init__.py +18 -0
  296. praisonai/persistence/hooks/agent_hooks.py +297 -0
  297. praisonai/persistence/knowledge/__init__.py +26 -0
  298. praisonai/persistence/knowledge/base.py +144 -0
  299. praisonai/persistence/knowledge/cassandra.py +232 -0
  300. praisonai/persistence/knowledge/chroma.py +295 -0
  301. praisonai/persistence/knowledge/clickhouse.py +242 -0
  302. praisonai/persistence/knowledge/cosmosdb_vector.py +438 -0
  303. praisonai/persistence/knowledge/couchbase.py +286 -0
  304. praisonai/persistence/knowledge/lancedb.py +216 -0
  305. praisonai/persistence/knowledge/langchain_adapter.py +291 -0
  306. praisonai/persistence/knowledge/lightrag_adapter.py +212 -0
  307. praisonai/persistence/knowledge/llamaindex_adapter.py +256 -0
  308. praisonai/persistence/knowledge/milvus.py +277 -0
  309. praisonai/persistence/knowledge/mongodb_vector.py +306 -0
  310. praisonai/persistence/knowledge/pgvector.py +335 -0
  311. praisonai/persistence/knowledge/pinecone.py +253 -0
  312. praisonai/persistence/knowledge/qdrant.py +301 -0
  313. praisonai/persistence/knowledge/redis_vector.py +291 -0
  314. praisonai/persistence/knowledge/singlestore_vector.py +299 -0
  315. praisonai/persistence/knowledge/surrealdb_vector.py +309 -0
  316. praisonai/persistence/knowledge/upstash_vector.py +266 -0
  317. praisonai/persistence/knowledge/weaviate.py +223 -0
  318. praisonai/persistence/migrations/__init__.py +10 -0
  319. praisonai/persistence/migrations/manager.py +251 -0
  320. praisonai/persistence/orchestrator.py +406 -0
  321. praisonai/persistence/state/__init__.py +21 -0
  322. praisonai/persistence/state/async_mongodb.py +200 -0
  323. praisonai/persistence/state/base.py +107 -0
  324. praisonai/persistence/state/dynamodb.py +226 -0
  325. praisonai/persistence/state/firestore.py +175 -0
  326. praisonai/persistence/state/gcs.py +155 -0
  327. praisonai/persistence/state/memory.py +245 -0
  328. praisonai/persistence/state/mongodb.py +158 -0
  329. praisonai/persistence/state/redis.py +190 -0
  330. praisonai/persistence/state/upstash.py +144 -0
  331. praisonai/persistence/tests/__init__.py +3 -0
  332. praisonai/persistence/tests/test_all_backends.py +633 -0
  333. praisonai/profiler.py +1214 -0
  334. praisonai/recipe/__init__.py +134 -0
  335. praisonai/recipe/bridge.py +278 -0
  336. praisonai/recipe/core.py +893 -0
  337. praisonai/recipe/exceptions.py +54 -0
  338. praisonai/recipe/history.py +402 -0
  339. praisonai/recipe/models.py +266 -0
  340. praisonai/recipe/operations.py +440 -0
  341. praisonai/recipe/policy.py +422 -0
  342. praisonai/recipe/registry.py +849 -0
  343. praisonai/recipe/runtime.py +214 -0
  344. praisonai/recipe/security.py +711 -0
  345. praisonai/recipe/serve.py +859 -0
  346. praisonai/recipe/server.py +613 -0
  347. praisonai/scheduler/__init__.py +45 -0
  348. praisonai/scheduler/agent_scheduler.py +552 -0
  349. praisonai/scheduler/base.py +124 -0
  350. praisonai/scheduler/daemon_manager.py +225 -0
  351. praisonai/scheduler/state_manager.py +155 -0
  352. praisonai/scheduler/yaml_loader.py +193 -0
  353. praisonai/scheduler.py +194 -0
  354. praisonai/setup/__init__.py +1 -0
  355. praisonai/setup/build.py +21 -0
  356. praisonai/setup/post_install.py +23 -0
  357. praisonai/setup/setup_conda_env.py +25 -0
  358. praisonai/setup.py +16 -0
  359. praisonai/templates/__init__.py +116 -0
  360. praisonai/templates/cache.py +364 -0
  361. praisonai/templates/dependency_checker.py +358 -0
  362. praisonai/templates/discovery.py +391 -0
  363. praisonai/templates/loader.py +564 -0
  364. praisonai/templates/registry.py +511 -0
  365. praisonai/templates/resolver.py +206 -0
  366. praisonai/templates/security.py +327 -0
  367. praisonai/templates/tool_override.py +498 -0
  368. praisonai/templates/tools_doctor.py +256 -0
  369. praisonai/test.py +105 -0
  370. praisonai/train.py +562 -0
  371. praisonai/train_vision.py +306 -0
  372. praisonai/ui/agents.py +824 -0
  373. praisonai/ui/callbacks.py +57 -0
  374. praisonai/ui/chainlit_compat.py +246 -0
  375. praisonai/ui/chat.py +532 -0
  376. praisonai/ui/code.py +717 -0
  377. praisonai/ui/colab.py +474 -0
  378. praisonai/ui/colab_chainlit.py +81 -0
  379. praisonai/ui/components/aicoder.py +284 -0
  380. praisonai/ui/context.py +283 -0
  381. praisonai/ui/database_config.py +56 -0
  382. praisonai/ui/db.py +294 -0
  383. praisonai/ui/realtime.py +488 -0
  384. praisonai/ui/realtimeclient/__init__.py +756 -0
  385. praisonai/ui/realtimeclient/tools.py +242 -0
  386. praisonai/ui/sql_alchemy.py +710 -0
  387. praisonai/upload_vision.py +140 -0
  388. praisonai/version.py +1 -0
  389. praisonai-3.0.0.dist-info/METADATA +3493 -0
  390. praisonai-3.0.0.dist-info/RECORD +393 -0
  391. praisonai-3.0.0.dist-info/WHEEL +5 -0
  392. praisonai-3.0.0.dist-info/entry_points.txt +4 -0
  393. praisonai-3.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,438 @@
1
+ """
2
+ Azure Cosmos DB Vector implementation of KnowledgeStore.
3
+
4
+ Supports both MongoDB API and vCore modes.
5
+
6
+ Requires: pymongo (for MongoDB API) or azure-cosmos (for SQL API)
7
+ Install: pip install pymongo # or pip install azure-cosmos
8
+ """
9
+
10
+ import logging
11
+ from typing import Any, Dict, List, Optional
12
+
13
+ from .base import KnowledgeStore, KnowledgeDocument
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class CosmosDBVectorKnowledgeStore(KnowledgeStore):
19
+ """
20
+ Azure Cosmos DB vector store for knowledge/RAG.
21
+
22
+ Supports MongoDB API with vector search capabilities.
23
+
24
+ Example:
25
+ store = CosmosDBVectorKnowledgeStore(
26
+ connection_string="mongodb+srv://...",
27
+ database="praisonai",
28
+ collection="vectors"
29
+ )
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ connection_string: Optional[str] = None,
35
+ database: str = "praisonai",
36
+ collection: str = "vectors",
37
+ index_name: str = "vector_index",
38
+ embedding_field: str = "embedding",
39
+ text_field: str = "content",
40
+ embedding_dim: int = 1536,
41
+ api_mode: str = "mongodb", # "mongodb" or "sql"
42
+ ):
43
+ """
44
+ Initialize Cosmos DB Vector store.
45
+
46
+ Args:
47
+ connection_string: Azure Cosmos DB connection string
48
+ database: Database name
49
+ collection: Collection name
50
+ index_name: Vector search index name
51
+ embedding_field: Field name for embeddings
52
+ text_field: Field name for text content
53
+ embedding_dim: Embedding dimension
54
+ api_mode: API mode ("mongodb" or "sql")
55
+ """
56
+ self.connection_string = connection_string
57
+ self.database_name = database
58
+ self.collection_name = collection
59
+ self.index_name = index_name
60
+ self.embedding_field = embedding_field
61
+ self.text_field = text_field
62
+ self.embedding_dim = embedding_dim
63
+ self.api_mode = api_mode
64
+
65
+ self._client = None
66
+ self._db = None
67
+ self._collection = None
68
+ self._initialized = False
69
+
70
+ def _init_client(self):
71
+ """Initialize Cosmos DB client lazily."""
72
+ if self._initialized:
73
+ return
74
+
75
+ if not self.connection_string:
76
+ import os
77
+ self.connection_string = os.getenv("COSMOS_CONNECTION_STRING")
78
+ if not self.connection_string:
79
+ raise ValueError(
80
+ "connection_string required. Set COSMOS_CONNECTION_STRING env var."
81
+ )
82
+
83
+ if self.api_mode == "mongodb":
84
+ self._init_mongodb_client()
85
+ else:
86
+ self._init_sql_client()
87
+
88
+ self._initialized = True
89
+
90
+ def _init_mongodb_client(self):
91
+ """Initialize MongoDB API client."""
92
+ try:
93
+ from pymongo import MongoClient
94
+ except ImportError:
95
+ raise ImportError(
96
+ "pymongo is required for Cosmos DB MongoDB API. "
97
+ "Install with: pip install pymongo"
98
+ )
99
+
100
+ self._client = MongoClient(self.connection_string)
101
+ self._db = self._client[self.database_name]
102
+ self._collection = self._db[self.collection_name]
103
+
104
+ def _init_sql_client(self):
105
+ """Initialize SQL API client."""
106
+ try:
107
+ from azure.cosmos import CosmosClient
108
+ except ImportError:
109
+ raise ImportError(
110
+ "azure-cosmos is required for Cosmos DB SQL API. "
111
+ "Install with: pip install azure-cosmos"
112
+ )
113
+
114
+ self._client = CosmosClient.from_connection_string(self.connection_string)
115
+ self._db = self._client.get_database_client(self.database_name)
116
+ self._collection = self._db.get_container_client(self.collection_name)
117
+
118
+ def create_collection(
119
+ self,
120
+ name: str,
121
+ dimension: int = 1536,
122
+ distance: str = "cosine",
123
+ metadata: Optional[Dict[str, Any]] = None
124
+ ) -> None:
125
+ """Create collection with vector index."""
126
+ self._init_client()
127
+
128
+ if self.api_mode == "mongodb":
129
+ # Create vector search index via MongoDB command
130
+ try:
131
+ self._db.command({
132
+ "createIndexes": name,
133
+ "indexes": [{
134
+ "name": self.index_name,
135
+ "key": {self.embedding_field: "cosmosSearch"},
136
+ "cosmosSearchOptions": {
137
+ "kind": "vector-ivf",
138
+ "numLists": 100,
139
+ "similarity": distance,
140
+ "dimensions": dimension
141
+ }
142
+ }]
143
+ })
144
+ except Exception as e:
145
+ logger.warning(f"Index creation may require manual setup: {e}")
146
+
147
+ def delete_collection(self, name: str) -> bool:
148
+ """Delete collection."""
149
+ self._init_client()
150
+ try:
151
+ if self.api_mode == "mongodb":
152
+ self._db.drop_collection(name)
153
+ else:
154
+ self._db.delete_container(name)
155
+ return True
156
+ except Exception as e:
157
+ logger.error(f"Failed to delete collection: {e}")
158
+ return False
159
+
160
+ def collection_exists(self, name: str) -> bool:
161
+ """Check if collection exists."""
162
+ self._init_client()
163
+ if self.api_mode == "mongodb":
164
+ return name in self._db.list_collection_names()
165
+ return True # SQL API containers are pre-created
166
+
167
+ def list_collections(self) -> List[str]:
168
+ """List collections."""
169
+ self._init_client()
170
+ if self.api_mode == "mongodb":
171
+ return self._db.list_collection_names()
172
+ return [c["id"] for c in self._db.list_containers()]
173
+
174
+ def insert(
175
+ self,
176
+ collection: str,
177
+ documents: List[KnowledgeDocument]
178
+ ) -> List[str]:
179
+ """Insert documents."""
180
+ self._init_client()
181
+
182
+ coll = self._db[collection] if self.api_mode == "mongodb" else self._db.get_container_client(collection)
183
+ ids = []
184
+
185
+ for doc in documents:
186
+ item = {
187
+ "_id" if self.api_mode == "mongodb" else "id": doc.id,
188
+ self.text_field: doc.content,
189
+ self.embedding_field: doc.embedding,
190
+ "metadata": doc.metadata or {},
191
+ "content_hash": doc.content_hash,
192
+ "created_at": doc.created_at,
193
+ }
194
+
195
+ try:
196
+ if self.api_mode == "mongodb":
197
+ coll.insert_one(item)
198
+ else:
199
+ coll.create_item(item)
200
+ ids.append(doc.id)
201
+ except Exception as e:
202
+ logger.warning(f"Failed to insert {doc.id}: {e}")
203
+
204
+ return ids
205
+
206
+ def upsert(
207
+ self,
208
+ collection: str,
209
+ documents: List[KnowledgeDocument]
210
+ ) -> List[str]:
211
+ """Upsert documents."""
212
+ self._init_client()
213
+
214
+ coll = self._db[collection] if self.api_mode == "mongodb" else self._db.get_container_client(collection)
215
+ ids = []
216
+
217
+ for doc in documents:
218
+ item = {
219
+ "_id" if self.api_mode == "mongodb" else "id": doc.id,
220
+ self.text_field: doc.content,
221
+ self.embedding_field: doc.embedding,
222
+ "metadata": doc.metadata or {},
223
+ "content_hash": doc.content_hash,
224
+ "created_at": doc.created_at,
225
+ }
226
+
227
+ try:
228
+ if self.api_mode == "mongodb":
229
+ coll.replace_one({"_id": doc.id}, item, upsert=True)
230
+ else:
231
+ coll.upsert_item(item)
232
+ ids.append(doc.id)
233
+ except Exception as e:
234
+ logger.warning(f"Failed to upsert {doc.id}: {e}")
235
+
236
+ return ids
237
+
238
+ def search(
239
+ self,
240
+ collection: str,
241
+ query_embedding: List[float],
242
+ limit: int = 5,
243
+ filters: Optional[Dict[str, Any]] = None,
244
+ score_threshold: Optional[float] = None
245
+ ) -> List[KnowledgeDocument]:
246
+ """Search for similar documents using vector search."""
247
+ self._init_client()
248
+
249
+ if self.api_mode == "mongodb":
250
+ return self._search_mongodb(collection, query_embedding, limit, filters, score_threshold)
251
+ else:
252
+ return self._search_sql(collection, query_embedding, limit, filters, score_threshold)
253
+
254
+ def _search_mongodb(
255
+ self,
256
+ collection: str,
257
+ query_embedding: List[float],
258
+ limit: int,
259
+ filters: Optional[Dict[str, Any]],
260
+ score_threshold: Optional[float]
261
+ ) -> List[KnowledgeDocument]:
262
+ """MongoDB API vector search."""
263
+ coll = self._db[collection]
264
+
265
+ pipeline = [
266
+ {
267
+ "$search": {
268
+ "cosmosSearch": {
269
+ "vector": query_embedding,
270
+ "path": self.embedding_field,
271
+ "k": limit
272
+ },
273
+ "returnStoredSource": True
274
+ }
275
+ },
276
+ {
277
+ "$project": {
278
+ "similarityScore": {"$meta": "searchScore"},
279
+ self.text_field: 1,
280
+ "metadata": 1,
281
+ "content_hash": 1,
282
+ "created_at": 1
283
+ }
284
+ }
285
+ ]
286
+
287
+ if filters:
288
+ pipeline.insert(1, {"$match": filters})
289
+
290
+ try:
291
+ results = list(coll.aggregate(pipeline))
292
+
293
+ documents = []
294
+ for doc in results:
295
+ score = doc.get("similarityScore", 0)
296
+ if score_threshold and score < score_threshold:
297
+ continue
298
+
299
+ documents.append(KnowledgeDocument(
300
+ id=str(doc.get("_id", "")),
301
+ content=doc.get(self.text_field, ""),
302
+ metadata={**(doc.get("metadata") or {}), "score": score},
303
+ content_hash=doc.get("content_hash"),
304
+ created_at=doc.get("created_at", 0)
305
+ ))
306
+
307
+ return documents
308
+ except Exception as e:
309
+ logger.error(f"Vector search failed: {e}")
310
+ return []
311
+
312
+ def _search_sql(
313
+ self,
314
+ collection: str,
315
+ query_embedding: List[float],
316
+ limit: int,
317
+ filters: Optional[Dict[str, Any]],
318
+ score_threshold: Optional[float]
319
+ ) -> List[KnowledgeDocument]:
320
+ """SQL API vector search (requires DiskANN index)."""
321
+ coll = self._db.get_container_client(collection)
322
+
323
+ # SQL API uses VectorDistance function
324
+ query = f"""
325
+ SELECT TOP {limit} c.id, c.{self.text_field}, c.metadata, c.content_hash, c.created_at,
326
+ VectorDistance(c.{self.embedding_field}, @embedding) AS score
327
+ FROM c
328
+ ORDER BY VectorDistance(c.{self.embedding_field}, @embedding)
329
+ """
330
+
331
+ try:
332
+ results = list(coll.query_items(
333
+ query=query,
334
+ parameters=[{"name": "@embedding", "value": query_embedding}],
335
+ enable_cross_partition_query=True
336
+ ))
337
+
338
+ documents = []
339
+ for doc in results:
340
+ score = doc.get("score", 0)
341
+ if score_threshold and score < score_threshold:
342
+ continue
343
+
344
+ documents.append(KnowledgeDocument(
345
+ id=doc.get("id", ""),
346
+ content=doc.get(self.text_field, ""),
347
+ metadata={**(doc.get("metadata") or {}), "score": score},
348
+ content_hash=doc.get("content_hash"),
349
+ created_at=doc.get("created_at", 0)
350
+ ))
351
+
352
+ return documents
353
+ except Exception as e:
354
+ logger.error(f"Vector search failed: {e}")
355
+ return []
356
+
357
+ def get(
358
+ self,
359
+ collection: str,
360
+ ids: List[str]
361
+ ) -> List[KnowledgeDocument]:
362
+ """Get documents by IDs."""
363
+ self._init_client()
364
+
365
+ coll = self._db[collection] if self.api_mode == "mongodb" else self._db.get_container_client(collection)
366
+ documents = []
367
+
368
+ for doc_id in ids:
369
+ try:
370
+ if self.api_mode == "mongodb":
371
+ doc = coll.find_one({"_id": doc_id})
372
+ else:
373
+ doc = coll.read_item(item=doc_id, partition_key=doc_id)
374
+
375
+ if doc:
376
+ documents.append(KnowledgeDocument(
377
+ id=str(doc.get("_id" if self.api_mode == "mongodb" else "id", "")),
378
+ content=doc.get(self.text_field, ""),
379
+ embedding=doc.get(self.embedding_field),
380
+ metadata=doc.get("metadata"),
381
+ content_hash=doc.get("content_hash"),
382
+ created_at=doc.get("created_at", 0)
383
+ ))
384
+ except Exception as e:
385
+ logger.warning(f"Failed to get {doc_id}: {e}")
386
+
387
+ return documents
388
+
389
+ def delete(
390
+ self,
391
+ collection: str,
392
+ ids: Optional[List[str]] = None,
393
+ filters: Optional[Dict[str, Any]] = None
394
+ ) -> int:
395
+ """Delete documents."""
396
+ self._init_client()
397
+
398
+ coll = self._db[collection] if self.api_mode == "mongodb" else self._db.get_container_client(collection)
399
+
400
+ if ids:
401
+ count = 0
402
+ for doc_id in ids:
403
+ try:
404
+ if self.api_mode == "mongodb":
405
+ coll.delete_one({"_id": doc_id})
406
+ else:
407
+ coll.delete_item(item=doc_id, partition_key=doc_id)
408
+ count += 1
409
+ except Exception as e:
410
+ logger.warning(f"Failed to delete {doc_id}: {e}")
411
+ return count
412
+
413
+ return 0
414
+
415
+ def count(self, collection: str) -> int:
416
+ """Count documents."""
417
+ self._init_client()
418
+
419
+ if self.api_mode == "mongodb":
420
+ return self._db[collection].count_documents({})
421
+ else:
422
+ # SQL API count
423
+ coll = self._db.get_container_client(collection)
424
+ result = list(coll.query_items(
425
+ query="SELECT VALUE COUNT(1) FROM c",
426
+ enable_cross_partition_query=True
427
+ ))
428
+ return result[0] if result else 0
429
+
430
+ def close(self) -> None:
431
+ """Close the connection."""
432
+ if self._client:
433
+ if self.api_mode == "mongodb":
434
+ self._client.close()
435
+ self._client = None
436
+ self._db = None
437
+ self._collection = None
438
+ self._initialized = False