camel-ai 0.2.59__py3-none-any.whl → 0.2.82__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +3 -3
- camel/agents/__init__.py +2 -2
- camel/agents/_types.py +9 -4
- camel/agents/_utils.py +40 -2
- camel/agents/base.py +2 -2
- camel/agents/chat_agent.py +5012 -902
- camel/agents/critic_agent.py +2 -2
- camel/agents/deductive_reasoner_agent.py +56 -56
- camel/agents/embodied_agent.py +2 -2
- camel/agents/knowledge_graph_agent.py +20 -20
- camel/agents/mcp_agent.py +39 -36
- camel/agents/multi_hop_generator_agent.py +3 -3
- camel/agents/programmed_agent_instruction.py +2 -2
- camel/agents/repo_agent.py +4 -3
- camel/agents/role_assignment_agent.py +2 -2
- camel/agents/search_agent.py +2 -2
- camel/agents/task_agent.py +2 -2
- camel/agents/tool_agents/__init__.py +2 -2
- camel/agents/tool_agents/base.py +2 -2
- camel/agents/tool_agents/hugging_face_tool_agent.py +3 -3
- camel/benchmarks/__init__.py +2 -2
- camel/benchmarks/apibank.py +5 -5
- camel/benchmarks/apibench.py +2 -2
- camel/benchmarks/base.py +2 -2
- camel/benchmarks/browsecomp.py +44 -33
- camel/benchmarks/gaia.py +17 -13
- camel/benchmarks/mock_website/README.md +94 -0
- camel/benchmarks/mock_website/mock_web.py +299 -0
- camel/benchmarks/mock_website/requirements.txt +3 -0
- camel/benchmarks/mock_website/shopping_mall/app.py +465 -0
- camel/benchmarks/mock_website/task.json +104 -0
- camel/benchmarks/nexus.py +3 -3
- camel/benchmarks/ragbench.py +2 -2
- camel/bots/__init__.py +2 -2
- camel/bots/discord/__init__.py +2 -2
- camel/bots/discord/discord_app.py +2 -2
- camel/bots/discord/discord_installation.py +2 -2
- camel/bots/discord/discord_store.py +3 -3
- camel/bots/slack/__init__.py +2 -2
- camel/bots/slack/models.py +4 -4
- camel/bots/slack/slack_app.py +2 -2
- camel/bots/telegram_bot.py +2 -2
- camel/configs/__init__.py +26 -2
- camel/configs/aihubmix_config.py +90 -0
- camel/configs/aiml_config.py +2 -2
- camel/configs/amd_config.py +70 -0
- camel/configs/anthropic_config.py +8 -7
- camel/configs/base_config.py +2 -2
- camel/configs/bedrock_config.py +5 -3
- camel/configs/cerebras_config.py +98 -0
- camel/configs/cohere_config.py +3 -3
- camel/configs/cometapi_config.py +106 -0
- camel/configs/crynux_config.py +94 -0
- camel/configs/deepseek_config.py +9 -8
- camel/configs/gemini_config.py +6 -4
- camel/configs/groq_config.py +6 -4
- camel/configs/internlm_config.py +6 -4
- camel/configs/litellm_config.py +2 -2
- camel/configs/lmstudio_config.py +6 -4
- camel/configs/minimax_config.py +95 -0
- camel/configs/mistral_config.py +3 -3
- camel/configs/modelscope_config.py +5 -3
- camel/configs/moonshot_config.py +2 -2
- camel/configs/nebius_config.py +105 -0
- camel/configs/netmind_config.py +2 -2
- camel/configs/novita_config.py +2 -2
- camel/configs/nvidia_config.py +2 -2
- camel/configs/ollama_config.py +2 -2
- camel/configs/openai_config.py +8 -3
- camel/configs/openrouter_config.py +6 -4
- camel/configs/ppio_config.py +2 -2
- camel/configs/qianfan_config.py +85 -0
- camel/configs/qwen_config.py +2 -2
- camel/configs/reka_config.py +3 -3
- camel/configs/samba_config.py +8 -6
- camel/configs/sglang_config.py +2 -2
- camel/configs/siliconflow_config.py +2 -2
- camel/configs/togetherai_config.py +2 -2
- camel/configs/vllm_config.py +4 -2
- camel/configs/watsonx_config.py +2 -2
- camel/configs/yi_config.py +6 -4
- camel/configs/zhipuai_config.py +6 -4
- camel/{data_collector → data_collectors}/__init__.py +2 -2
- camel/{data_collector → data_collectors}/alpaca_collector.py +19 -10
- camel/{data_collector → data_collectors}/base.py +2 -2
- camel/{data_collector → data_collectors}/sharegpt_collector.py +3 -3
- camel/datagen/__init__.py +2 -2
- camel/datagen/cot_datagen.py +32 -37
- camel/datagen/evol_instruct/__init__.py +2 -2
- camel/datagen/evol_instruct/evol_instruct.py +2 -2
- camel/datagen/evol_instruct/scorer.py +24 -25
- camel/datagen/evol_instruct/templates.py +48 -48
- camel/datagen/self_improving_cot.py +5 -5
- camel/datagen/self_instruct/__init__.py +2 -2
- camel/datagen/self_instruct/filter/__init__.py +2 -2
- camel/datagen/self_instruct/filter/filter_function.py +2 -2
- camel/datagen/self_instruct/filter/filter_registry.py +2 -2
- camel/datagen/self_instruct/filter/instruction_filter.py +2 -2
- camel/datagen/self_instruct/self_instruct.py +2 -2
- camel/datagen/self_instruct/templates.py +47 -47
- camel/datagen/source2synth/__init__.py +2 -2
- camel/datagen/source2synth/data_processor.py +2 -2
- camel/datagen/source2synth/models.py +2 -2
- camel/datagen/source2synth/user_data_processor_config.py +2 -2
- camel/datahubs/__init__.py +2 -2
- camel/datahubs/base.py +2 -2
- camel/datahubs/huggingface.py +2 -2
- camel/datahubs/models.py +2 -2
- camel/datasets/__init__.py +2 -2
- camel/datasets/base_generator.py +41 -12
- camel/datasets/few_shot_generator.py +18 -18
- camel/datasets/models.py +3 -3
- camel/datasets/self_instruct_generator.py +2 -2
- camel/datasets/static_dataset.py +152 -2
- camel/embeddings/__init__.py +2 -2
- camel/embeddings/azure_embedding.py +2 -2
- camel/embeddings/base.py +2 -2
- camel/embeddings/gemini_embedding.py +2 -2
- camel/embeddings/jina_embedding.py +10 -3
- camel/embeddings/mistral_embedding.py +2 -2
- camel/embeddings/openai_compatible_embedding.py +2 -2
- camel/embeddings/openai_embedding.py +2 -2
- camel/embeddings/sentence_transformers_embeddings.py +4 -4
- camel/embeddings/together_embedding.py +2 -2
- camel/embeddings/vlm_embedding.py +11 -4
- camel/environments/__init__.py +14 -2
- camel/environments/models.py +2 -2
- camel/environments/multi_step.py +2 -2
- camel/environments/rlcards_env.py +860 -0
- camel/environments/single_step.py +30 -5
- camel/environments/tic_tac_toe.py +3 -3
- camel/extractors/__init__.py +2 -2
- camel/extractors/base.py +2 -2
- camel/extractors/python_strategies.py +2 -2
- camel/generators.py +2 -2
- camel/human.py +2 -2
- camel/interpreters/__init__.py +4 -2
- camel/interpreters/base.py +16 -3
- camel/interpreters/docker/Dockerfile +53 -7
- camel/interpreters/docker_interpreter.py +70 -11
- camel/interpreters/e2b_interpreter.py +59 -11
- camel/interpreters/internal_python_interpreter.py +81 -4
- camel/interpreters/interpreter_error.py +2 -2
- camel/interpreters/ipython_interpreter.py +23 -5
- camel/interpreters/microsandbox_interpreter.py +395 -0
- camel/interpreters/subprocess_interpreter.py +36 -4
- camel/loaders/__init__.py +17 -5
- camel/loaders/apify_reader.py +2 -2
- camel/loaders/base_io.py +2 -2
- camel/loaders/base_loader.py +85 -0
- camel/loaders/chunkr_reader.py +128 -93
- camel/loaders/crawl4ai_reader.py +2 -2
- camel/loaders/firecrawl_reader.py +6 -6
- camel/loaders/jina_url_reader.py +2 -2
- camel/loaders/markitdown.py +2 -2
- camel/loaders/mineru_extractor.py +2 -2
- camel/loaders/mistral_reader.py +148 -0
- camel/loaders/scrapegraph_reader.py +2 -2
- camel/loaders/unstructured_io.py +2 -2
- camel/logger.py +5 -5
- camel/memories/__init__.py +2 -2
- camel/memories/agent_memories.py +86 -3
- camel/memories/base.py +36 -2
- camel/memories/blocks/__init__.py +2 -2
- camel/memories/blocks/chat_history_block.py +126 -9
- camel/memories/blocks/vectordb_block.py +10 -3
- camel/memories/context_creators/__init__.py +2 -2
- camel/memories/context_creators/score_based.py +31 -239
- camel/memories/records.py +98 -13
- camel/messages/__init__.py +2 -2
- camel/messages/base.py +193 -46
- camel/messages/conversion/__init__.py +2 -2
- camel/messages/conversion/alpaca.py +2 -2
- camel/messages/conversion/conversation_models.py +2 -2
- camel/messages/conversion/sharegpt/__init__.py +2 -2
- camel/messages/conversion/sharegpt/function_call_formatter.py +2 -2
- camel/messages/conversion/sharegpt/hermes/__init__.py +2 -2
- camel/messages/conversion/sharegpt/hermes/hermes_function_formatter.py +2 -2
- camel/messages/func_message.py +54 -17
- camel/models/__init__.py +18 -2
- camel/models/_utils.py +3 -3
- camel/models/aihubmix_model.py +83 -0
- camel/models/aiml_model.py +11 -18
- camel/models/amd_model.py +101 -0
- camel/models/anthropic_model.py +127 -20
- camel/models/aws_bedrock_model.py +12 -35
- camel/models/azure_openai_model.py +263 -63
- camel/models/base_audio_model.py +5 -3
- camel/models/base_model.py +195 -26
- camel/models/cerebras_model.py +83 -0
- camel/models/cohere_model.py +81 -21
- camel/models/cometapi_model.py +83 -0
- camel/models/crynux_model.py +87 -0
- camel/models/deepseek_model.py +61 -59
- camel/models/fish_audio_model.py +8 -2
- camel/models/gemini_model.py +439 -30
- camel/models/groq_model.py +11 -19
- camel/models/internlm_model.py +11 -18
- camel/models/litellm_model.py +94 -34
- camel/models/lmstudio_model.py +17 -20
- camel/models/minimax_model.py +83 -0
- camel/models/mistral_model.py +84 -19
- camel/models/model_factory.py +49 -6
- camel/models/model_manager.py +33 -11
- camel/models/modelscope_model.py +13 -193
- camel/models/moonshot_model.py +195 -21
- camel/models/nebius_model.py +83 -0
- camel/models/nemotron_model.py +19 -9
- camel/models/netmind_model.py +11 -18
- camel/models/novita_model.py +11 -18
- camel/models/nvidia_model.py +11 -18
- camel/models/ollama_model.py +14 -21
- camel/models/openai_audio_models.py +2 -2
- camel/models/openai_compatible_model.py +234 -27
- camel/models/openai_model.py +255 -39
- camel/models/openrouter_model.py +11 -19
- camel/models/ppio_model.py +11 -18
- camel/models/qianfan_model.py +89 -0
- camel/models/qwen_model.py +13 -193
- camel/models/reka_model.py +90 -21
- camel/models/reward/__init__.py +2 -2
- camel/models/reward/base_reward_model.py +2 -2
- camel/models/reward/evaluator.py +2 -2
- camel/models/reward/nemotron_model.py +2 -2
- camel/models/reward/skywork_model.py +2 -2
- camel/models/samba_model.py +117 -49
- camel/models/sglang_model.py +162 -42
- camel/models/siliconflow_model.py +12 -35
- camel/models/stub_model.py +10 -7
- camel/models/togetherai_model.py +11 -18
- camel/models/vllm_model.py +10 -18
- camel/models/volcano_model.py +16 -20
- camel/models/watsonx_model.py +69 -19
- camel/models/yi_model.py +11 -18
- camel/models/zhipuai_model.py +70 -18
- camel/parsers/__init__.py +18 -0
- camel/parsers/mcp_tool_call_parser.py +176 -0
- camel/personas/__init__.py +2 -2
- camel/personas/persona.py +2 -2
- camel/personas/persona_hub.py +2 -2
- camel/prompts/__init__.py +2 -2
- camel/prompts/ai_society.py +2 -2
- camel/prompts/base.py +2 -2
- camel/prompts/code.py +2 -2
- camel/prompts/evaluation.py +2 -2
- camel/prompts/generate_text_embedding_data.py +2 -2
- camel/prompts/image_craft.py +2 -2
- camel/prompts/misalignment.py +2 -2
- camel/prompts/multi_condition_image_craft.py +2 -2
- camel/prompts/object_recognition.py +2 -2
- camel/prompts/persona_hub.py +3 -3
- camel/prompts/prompt_templates.py +2 -2
- camel/prompts/role_description_prompt_template.py +2 -2
- camel/prompts/solution_extraction.py +8 -8
- camel/prompts/task_prompt_template.py +2 -2
- camel/prompts/translation.py +2 -2
- camel/prompts/video_description_prompt.py +3 -3
- camel/responses/__init__.py +2 -2
- camel/responses/agent_responses.py +2 -2
- camel/retrievers/__init__.py +2 -2
- camel/retrievers/auto_retriever.py +23 -3
- camel/retrievers/base.py +2 -2
- camel/retrievers/bm25_retriever.py +3 -4
- camel/retrievers/cohere_rerank_retriever.py +2 -2
- camel/retrievers/hybrid_retrival.py +4 -4
- camel/retrievers/vector_retriever.py +2 -2
- camel/runtimes/Dockerfile.multi-toolkit +90 -0
- camel/{runtime → runtimes}/__init__.py +2 -2
- camel/runtimes/api.py +153 -0
- camel/{runtime → runtimes}/base.py +2 -2
- camel/{runtime → runtimes}/configs.py +13 -13
- camel/{runtime → runtimes}/daytona_runtime.py +18 -19
- camel/{runtime → runtimes}/docker_runtime.py +13 -13
- camel/{runtime → runtimes}/llm_guard_runtime.py +28 -28
- camel/{runtime → runtimes}/remote_http_runtime.py +12 -12
- camel/{runtime → runtimes}/ubuntu_docker_runtime.py +3 -3
- camel/{runtime → runtimes}/utils/__init__.py +2 -2
- camel/{runtime → runtimes}/utils/function_risk_toolkit.py +2 -2
- camel/{runtime → runtimes}/utils/ignore_risk_toolkit.py +2 -2
- camel/schemas/__init__.py +2 -2
- camel/schemas/base.py +2 -2
- camel/schemas/openai_converter.py +3 -3
- camel/schemas/outlines_converter.py +2 -2
- camel/services/agent_openapi_server.py +380 -0
- camel/societies/__init__.py +4 -2
- camel/societies/babyagi_playing.py +2 -2
- camel/societies/role_playing.py +201 -80
- camel/societies/workforce/__init__.py +10 -3
- camel/societies/workforce/base.py +9 -5
- camel/societies/workforce/events.py +143 -0
- camel/societies/workforce/prompts.py +258 -33
- camel/societies/workforce/role_playing_worker.py +95 -30
- camel/societies/workforce/single_agent_worker.py +659 -30
- camel/societies/workforce/structured_output_handler.py +512 -0
- camel/societies/workforce/task_channel.py +182 -38
- camel/societies/workforce/utils.py +784 -18
- camel/societies/workforce/worker.py +96 -28
- camel/societies/workforce/workflow_memory_manager.py +1746 -0
- camel/societies/workforce/workforce.py +5730 -366
- camel/societies/workforce/workforce_callback.py +103 -0
- camel/societies/workforce/workforce_logger.py +647 -0
- camel/societies/workforce/workforce_metrics.py +33 -0
- camel/storages/__init__.py +10 -2
- camel/storages/graph_storages/__init__.py +2 -2
- camel/storages/graph_storages/base.py +2 -2
- camel/storages/graph_storages/graph_element.py +2 -2
- camel/storages/graph_storages/nebula_graph.py +4 -4
- camel/storages/graph_storages/neo4j_graph.py +7 -7
- camel/storages/key_value_storages/__init__.py +2 -2
- camel/storages/key_value_storages/base.py +2 -2
- camel/storages/key_value_storages/in_memory.py +2 -2
- camel/storages/key_value_storages/json.py +17 -4
- camel/storages/key_value_storages/mem0_cloud.py +50 -49
- camel/storages/key_value_storages/redis.py +2 -2
- camel/storages/object_storages/__init__.py +2 -2
- camel/storages/object_storages/amazon_s3.py +2 -2
- camel/storages/object_storages/azure_blob.py +2 -2
- camel/storages/object_storages/base.py +2 -2
- camel/storages/object_storages/google_cloud.py +3 -3
- camel/storages/vectordb_storages/__init__.py +12 -2
- camel/storages/vectordb_storages/base.py +2 -2
- camel/storages/vectordb_storages/chroma.py +731 -0
- camel/storages/vectordb_storages/faiss.py +712 -0
- camel/storages/vectordb_storages/milvus.py +2 -2
- camel/storages/vectordb_storages/oceanbase.py +16 -17
- camel/storages/vectordb_storages/pgvector.py +349 -0
- camel/storages/vectordb_storages/qdrant.py +6 -6
- camel/storages/vectordb_storages/surreal.py +372 -0
- camel/storages/vectordb_storages/tidb.py +11 -8
- camel/storages/vectordb_storages/weaviate.py +714 -0
- camel/tasks/__init__.py +2 -2
- camel/tasks/task.py +366 -27
- camel/tasks/task_prompt.py +3 -3
- camel/terminators/__init__.py +2 -2
- camel/terminators/base.py +2 -2
- camel/terminators/response_terminator.py +2 -2
- camel/terminators/token_limit_terminator.py +2 -2
- camel/toolkits/__init__.py +58 -10
- camel/toolkits/aci_toolkit.py +66 -21
- camel/toolkits/arxiv_toolkit.py +8 -8
- camel/toolkits/ask_news_toolkit.py +2 -2
- camel/toolkits/async_browser_toolkit.py +174 -575
- camel/toolkits/audio_analysis_toolkit.py +3 -3
- camel/toolkits/base.py +65 -7
- camel/toolkits/bohrium_toolkit.py +318 -0
- camel/toolkits/browser_toolkit.py +306 -566
- camel/toolkits/browser_toolkit_commons.py +568 -0
- camel/toolkits/code_execution.py +67 -11
- camel/toolkits/context_summarizer_toolkit.py +684 -0
- camel/toolkits/craw4ai_toolkit.py +93 -0
- camel/toolkits/dappier_toolkit.py +12 -8
- camel/toolkits/data_commons_toolkit.py +2 -2
- camel/toolkits/dingtalk.py +1135 -0
- camel/toolkits/earth_science_toolkit.py +5367 -0
- camel/toolkits/edgeone_pages_mcp_toolkit.py +49 -0
- camel/toolkits/excel_toolkit.py +910 -70
- camel/toolkits/file_toolkit.py +1402 -0
- camel/toolkits/function_tool.py +128 -20
- camel/toolkits/github_toolkit.py +148 -43
- camel/toolkits/gmail_toolkit.py +1839 -0
- camel/toolkits/google_calendar_toolkit.py +40 -6
- camel/toolkits/google_drive_mcp_toolkit.py +54 -0
- camel/toolkits/google_maps_toolkit.py +2 -2
- camel/toolkits/google_scholar_toolkit.py +2 -2
- camel/toolkits/human_toolkit.py +36 -12
- camel/toolkits/hybrid_browser_toolkit/__init__.py +18 -0
- camel/toolkits/hybrid_browser_toolkit/config_loader.py +185 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +246 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +1973 -0
- camel/toolkits/hybrid_browser_toolkit/installer.py +203 -0
- camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +4589 -0
- camel/toolkits/hybrid_browser_toolkit/ts/package.json +33 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-scripts.js +125 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +1929 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +233 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +589 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/index.ts +7 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +219 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +129 -0
- camel/toolkits/hybrid_browser_toolkit/ts/tsconfig.json +27 -0
- camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +319 -0
- camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +1037 -0
- camel/toolkits/hybrid_browser_toolkit_py/__init__.py +17 -0
- camel/toolkits/hybrid_browser_toolkit_py/actions.py +575 -0
- camel/toolkits/hybrid_browser_toolkit_py/agent.py +311 -0
- camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +787 -0
- camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +490 -0
- camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +2390 -0
- camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +233 -0
- camel/toolkits/hybrid_browser_toolkit_py/stealth_script.js +0 -0
- camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +1043 -0
- camel/toolkits/image_analysis_toolkit.py +3 -3
- camel/toolkits/image_generation_toolkit.py +390 -0
- camel/toolkits/jina_reranker_toolkit.py +195 -79
- camel/toolkits/klavis_toolkit.py +7 -3
- camel/toolkits/linkedin_toolkit.py +2 -2
- camel/toolkits/markitdown_toolkit.py +104 -0
- camel/toolkits/math_toolkit.py +66 -12
- camel/toolkits/mcp_toolkit.py +841 -600
- camel/toolkits/memory_toolkit.py +7 -3
- camel/toolkits/meshy_toolkit.py +2 -2
- camel/toolkits/message_agent_toolkit.py +608 -0
- camel/toolkits/message_integration.py +724 -0
- camel/toolkits/mineru_toolkit.py +2 -2
- camel/toolkits/minimax_mcp_toolkit.py +195 -0
- camel/toolkits/networkx_toolkit.py +2 -2
- camel/toolkits/note_taking_toolkit.py +277 -0
- camel/toolkits/notion_mcp_toolkit.py +224 -0
- camel/toolkits/notion_toolkit.py +2 -2
- camel/toolkits/open_api_specs/biztoc/__init__.py +2 -2
- camel/toolkits/open_api_specs/biztoc/ai-plugin.json +1 -1
- camel/toolkits/open_api_specs/coursera/__init__.py +2 -2
- camel/toolkits/open_api_specs/create_qr_code/__init__.py +2 -2
- camel/toolkits/open_api_specs/klarna/__init__.py +2 -2
- camel/toolkits/open_api_specs/nasa_apod/__init__.py +2 -2
- camel/toolkits/open_api_specs/outschool/__init__.py +2 -2
- camel/toolkits/open_api_specs/outschool/ai-plugin.json +1 -1
- camel/toolkits/open_api_specs/outschool/openapi.yaml +1 -1
- camel/toolkits/open_api_specs/outschool/paths/__init__.py +2 -2
- camel/toolkits/open_api_specs/outschool/paths/get_classes.py +2 -2
- camel/toolkits/open_api_specs/outschool/paths/search_teachers.py +2 -2
- camel/toolkits/open_api_specs/security_config.py +2 -2
- camel/toolkits/open_api_specs/speak/__init__.py +2 -2
- camel/toolkits/open_api_specs/web_scraper/__init__.py +2 -2
- camel/toolkits/open_api_specs/web_scraper/ai-plugin.json +1 -1
- camel/toolkits/open_api_specs/web_scraper/paths/__init__.py +2 -2
- camel/toolkits/open_api_specs/web_scraper/paths/scraper.py +2 -2
- camel/toolkits/open_api_toolkit.py +2 -2
- camel/toolkits/openbb_toolkit.py +7 -3
- camel/toolkits/origene_mcp_toolkit.py +56 -0
- camel/toolkits/page_script.js +86 -74
- camel/toolkits/playwright_mcp_toolkit.py +27 -32
- camel/toolkits/pptx_toolkit.py +790 -0
- camel/toolkits/pubmed_toolkit.py +2 -2
- camel/toolkits/pulse_mcp_search_toolkit.py +2 -2
- camel/toolkits/pyautogui_toolkit.py +2 -2
- camel/toolkits/reddit_toolkit.py +2 -2
- camel/toolkits/resend_toolkit.py +168 -0
- camel/toolkits/retrieval_toolkit.py +2 -2
- camel/toolkits/screenshot_toolkit.py +213 -0
- camel/toolkits/search_toolkit.py +539 -146
- camel/toolkits/searxng_toolkit.py +2 -2
- camel/toolkits/semantic_scholar_toolkit.py +2 -2
- camel/toolkits/slack_toolkit.py +108 -58
- camel/toolkits/sql_toolkit.py +712 -0
- camel/toolkits/stripe_toolkit.py +2 -2
- camel/toolkits/sympy_toolkit.py +3 -3
- camel/toolkits/task_planning_toolkit.py +134 -0
- camel/toolkits/terminal_toolkit/__init__.py +18 -0
- camel/toolkits/terminal_toolkit/terminal_toolkit.py +1070 -0
- camel/toolkits/terminal_toolkit/utils.py +532 -0
- camel/toolkits/thinking_toolkit.py +3 -3
- camel/toolkits/twitter_toolkit.py +8 -3
- camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
- camel/toolkits/video_analysis_toolkit.py +112 -29
- camel/toolkits/video_download_toolkit.py +22 -16
- camel/toolkits/weather_toolkit.py +2 -2
- camel/toolkits/web_deploy_toolkit.py +1219 -0
- camel/toolkits/wechat_official_toolkit.py +483 -0
- camel/toolkits/whatsapp_toolkit.py +2 -2
- camel/toolkits/wolfram_alpha_toolkit.py +53 -25
- camel/toolkits/zapier_toolkit.py +7 -3
- camel/types/__init__.py +4 -4
- camel/types/agents/__init__.py +2 -2
- camel/types/agents/tool_calling_record.py +6 -3
- camel/types/enums.py +454 -35
- camel/types/mcp_registries.py +2 -2
- camel/types/openai_types.py +4 -4
- camel/types/unified_model_type.py +43 -6
- camel/utils/__init__.py +20 -2
- camel/utils/async_func.py +2 -2
- camel/utils/chunker/__init__.py +2 -2
- camel/utils/chunker/base.py +2 -2
- camel/utils/chunker/code_chunker.py +2 -2
- camel/utils/chunker/uio_chunker.py +2 -2
- camel/utils/commons.py +65 -7
- camel/utils/constants.py +5 -2
- camel/utils/context_utils.py +1134 -0
- camel/utils/deduplication.py +2 -2
- camel/utils/filename.py +2 -2
- camel/utils/langfuse.py +258 -0
- camel/utils/mcp.py +140 -6
- camel/utils/mcp_client.py +1056 -0
- camel/utils/message_summarizer.py +148 -0
- camel/utils/response_format.py +2 -2
- camel/utils/token_counting.py +45 -22
- camel/utils/tool_result.py +44 -0
- camel/verifiers/__init__.py +2 -2
- camel/verifiers/base.py +2 -2
- camel/verifiers/math_verifier.py +2 -2
- camel/verifiers/models.py +2 -2
- camel/verifiers/physics_verifier.py +2 -2
- camel/verifiers/python_verifier.py +2 -2
- {camel_ai-0.2.59.dist-info → camel_ai-0.2.82.dist-info}/METADATA +349 -108
- camel_ai-0.2.82.dist-info/RECORD +507 -0
- {camel_ai-0.2.59.dist-info → camel_ai-0.2.82.dist-info}/WHEEL +1 -1
- {camel_ai-0.2.59.dist-info → camel_ai-0.2.82.dist-info}/licenses/LICENSE +1 -1
- camel/loaders/pandas_reader.py +0 -368
- camel/runtime/api.py +0 -97
- camel/toolkits/dalle_toolkit.py +0 -171
- camel/toolkits/file_write_toolkit.py +0 -395
- camel/toolkits/openai_agent_toolkit.py +0 -135
- camel/toolkits/terminal_toolkit.py +0 -1037
- camel_ai-0.2.59.dist-info/RECORD +0 -410
|
@@ -0,0 +1,712 @@
|
|
|
1
|
+
# ========= Copyright 2023-2025 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2025 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
import pickle
|
|
17
|
+
from datetime import datetime
|
|
18
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, cast
|
|
19
|
+
|
|
20
|
+
from camel.logger import get_logger
|
|
21
|
+
from camel.storages.vectordb_storages import (
|
|
22
|
+
BaseVectorStorage,
|
|
23
|
+
VectorDBQuery,
|
|
24
|
+
VectorDBQueryResult,
|
|
25
|
+
VectorDBStatus,
|
|
26
|
+
VectorRecord,
|
|
27
|
+
)
|
|
28
|
+
from camel.types import VectorDistance
|
|
29
|
+
from camel.utils import dependencies_required
|
|
30
|
+
|
|
31
|
+
if TYPE_CHECKING:
|
|
32
|
+
from numpy import ndarray
|
|
33
|
+
|
|
34
|
+
logger = get_logger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class FaissStorage(BaseVectorStorage):
|
|
38
|
+
r"""An implementation of the `BaseVectorStorage` using FAISS,
|
|
39
|
+
Facebook AI's Similarity Search library for efficient vector search.
|
|
40
|
+
|
|
41
|
+
The detailed information about FAISS is available at:
|
|
42
|
+
`FAISS <https://github.com/facebookresearch/faiss>`_
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
vector_dim (int): The dimension of storing vectors.
|
|
46
|
+
index_type (str, optional): Type of FAISS index to create.
|
|
47
|
+
Options include 'Flat', 'IVF', 'HNSW', etc. (default:
|
|
48
|
+
:obj:`'Flat'`)
|
|
49
|
+
collection_name (Optional[str], optional): Name for the collection.
|
|
50
|
+
If not provided, set it to the current time with iso format.
|
|
51
|
+
(default: :obj:`None`)
|
|
52
|
+
storage_path (Optional[str], optional): Path to directory where
|
|
53
|
+
the index will be stored. If None, index will only exist in memory.
|
|
54
|
+
(default: :obj:`None`)
|
|
55
|
+
distance (VectorDistance, optional): The distance metric for vector
|
|
56
|
+
comparison (default: :obj:`VectorDistance.COSINE`)
|
|
57
|
+
nlist (int, optional): Number of cluster centroids for IVF indexes.
|
|
58
|
+
Only used if index_type includes 'IVF'. (default: :obj:`100`)
|
|
59
|
+
m (int, optional): HNSW parameter. Number of connections per node.
|
|
60
|
+
Only used if index_type includes 'HNSW'. (default: :obj:`16`)
|
|
61
|
+
**kwargs (Any): Additional keyword arguments.
|
|
62
|
+
|
|
63
|
+
Notes:
|
|
64
|
+
- FAISS offers various index types optimized for different use cases:
|
|
65
|
+
- 'Flat': Exact search, but slowest for large datasets
|
|
66
|
+
- 'IVF': Inverted file index, good balance of speed and recall
|
|
67
|
+
- 'HNSW': Hierarchical Navigable Small World, fast with high recall
|
|
68
|
+
- 'PQ': Product Quantization for memory-efficient storage
|
|
69
|
+
- The choice of index should be based on your specific requirements
|
|
70
|
+
for search speed, memory usage, and accuracy.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
@dependencies_required('faiss')
|
|
74
|
+
def __init__(
|
|
75
|
+
self,
|
|
76
|
+
vector_dim: int,
|
|
77
|
+
index_type: str = 'Flat',
|
|
78
|
+
collection_name: Optional[str] = None,
|
|
79
|
+
storage_path: Optional[str] = None,
|
|
80
|
+
distance: VectorDistance = VectorDistance.COSINE,
|
|
81
|
+
nlist: int = 100,
|
|
82
|
+
m: int = 16,
|
|
83
|
+
**kwargs: Any,
|
|
84
|
+
) -> None:
|
|
85
|
+
r"""Initialize the FAISS vector storage.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
vector_dim: Dimension of vectors to be stored
|
|
89
|
+
index_type: FAISS index type ('Flat', 'IVF', 'HNSW', etc.)
|
|
90
|
+
collection_name: Name of the collection (defaults to timestamp)
|
|
91
|
+
storage_path: Directory to save the index (None for in-memory only)
|
|
92
|
+
distance: Vector distance metric
|
|
93
|
+
nlist: Number of clusters for IVF indexes
|
|
94
|
+
m: HNSW parameter for connections per node
|
|
95
|
+
**kwargs: Additional parameters
|
|
96
|
+
"""
|
|
97
|
+
import faiss
|
|
98
|
+
import numpy as np
|
|
99
|
+
|
|
100
|
+
self.vector_dim = vector_dim
|
|
101
|
+
self.index_type = index_type
|
|
102
|
+
self.collection_name = (
|
|
103
|
+
collection_name or self._generate_collection_name()
|
|
104
|
+
)
|
|
105
|
+
self.storage_path = storage_path
|
|
106
|
+
self.distance = distance
|
|
107
|
+
self.nlist = nlist
|
|
108
|
+
self.m = m
|
|
109
|
+
self._faiss_client = faiss # Store the faiss module as the client
|
|
110
|
+
|
|
111
|
+
# Create directory for storage if it doesn't exist
|
|
112
|
+
if self.storage_path is not None:
|
|
113
|
+
os.makedirs(self.storage_path, exist_ok=True)
|
|
114
|
+
|
|
115
|
+
# Initialize the FAISS index
|
|
116
|
+
self._index = self._create_index()
|
|
117
|
+
|
|
118
|
+
# Storage for IDs and payloads (FAISS only stores vectors)
|
|
119
|
+
self._id_to_index: Dict[str, int] = {}
|
|
120
|
+
self._index_to_id: Dict[int, str] = {}
|
|
121
|
+
self._payloads: Dict[str, Dict[str, Any]] = {}
|
|
122
|
+
self._vectors: Dict[str, np.ndarray] = {}
|
|
123
|
+
|
|
124
|
+
# Load existing index if it exists
|
|
125
|
+
if self.storage_path:
|
|
126
|
+
self._load_from_disk()
|
|
127
|
+
|
|
128
|
+
def _generate_collection_name(self) -> str:
|
|
129
|
+
r"""Generates a collection name if user doesn't provide"""
|
|
130
|
+
return f"faiss_index_{datetime.now().isoformat()}"
|
|
131
|
+
|
|
132
|
+
def _get_index_path(self) -> str:
|
|
133
|
+
r"""Returns the path to the index file"""
|
|
134
|
+
if self.storage_path is None:
|
|
135
|
+
raise ValueError("Storage path is not set.")
|
|
136
|
+
return os.path.join(self.storage_path, f"{self.collection_name}.index")
|
|
137
|
+
|
|
138
|
+
def _get_metadata_path(self) -> str:
|
|
139
|
+
r"""Returns the path to the metadata file"""
|
|
140
|
+
if self.storage_path is None:
|
|
141
|
+
raise ValueError("Storage path is not set.")
|
|
142
|
+
return os.path.join(
|
|
143
|
+
self.storage_path, f"{self.collection_name}.metadata"
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
def _create_index(self):
|
|
147
|
+
r"""Creates a new FAISS index based on specified parameters.
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
A FAISS index object configured according to the parameters.
|
|
151
|
+
"""
|
|
152
|
+
import faiss
|
|
153
|
+
|
|
154
|
+
# Determine the metric to use based on distance type
|
|
155
|
+
if self.distance == VectorDistance.COSINE:
|
|
156
|
+
# For cosine similarity, we need to normalize vectors
|
|
157
|
+
metric = faiss.METRIC_INNER_PRODUCT
|
|
158
|
+
elif self.distance == VectorDistance.EUCLIDEAN:
|
|
159
|
+
metric = faiss.METRIC_L2
|
|
160
|
+
elif self.distance == VectorDistance.DOT:
|
|
161
|
+
metric = faiss.METRIC_INNER_PRODUCT
|
|
162
|
+
else:
|
|
163
|
+
raise ValueError(f"Unsupported distance metric: {self.distance}")
|
|
164
|
+
|
|
165
|
+
# Create the appropriate index based on index_type
|
|
166
|
+
if self.index_type == 'Flat':
|
|
167
|
+
if metric == faiss.METRIC_INNER_PRODUCT:
|
|
168
|
+
index = faiss.IndexFlatIP(self.vector_dim)
|
|
169
|
+
else:
|
|
170
|
+
index = faiss.IndexFlatL2(self.vector_dim)
|
|
171
|
+
elif self.index_type.startswith('IVF'):
|
|
172
|
+
# IVF requires a quantizer (often a flat index)
|
|
173
|
+
quantizer = faiss.IndexFlatL2(self.vector_dim)
|
|
174
|
+
if 'Flat' in self.index_type:
|
|
175
|
+
index = faiss.IndexIVFFlat(
|
|
176
|
+
quantizer, self.vector_dim, self.nlist, metric
|
|
177
|
+
)
|
|
178
|
+
elif 'PQ' in self.index_type:
|
|
179
|
+
# M value for PQ, typically a divisor of vector_dim
|
|
180
|
+
m = self.vector_dim // 4 # default setting
|
|
181
|
+
nbits = 8 # typically 8 bits per sub-vector
|
|
182
|
+
index = faiss.IndexIVFPQ(
|
|
183
|
+
quantizer, self.vector_dim, self.nlist, m, nbits
|
|
184
|
+
)
|
|
185
|
+
else:
|
|
186
|
+
raise ValueError(
|
|
187
|
+
f"Unsupported IVF index type: {self.index_type}"
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
# IVF indexes need to be trained before use
|
|
191
|
+
# This is a placeholder since actual training requires data
|
|
192
|
+
# Number of clusters to search (trade-off between speed and
|
|
193
|
+
# accuracy)
|
|
194
|
+
index.nprobe = 10
|
|
195
|
+
elif self.index_type == 'HNSW':
|
|
196
|
+
index = faiss.IndexHNSWFlat(self.vector_dim, self.m, metric)
|
|
197
|
+
else:
|
|
198
|
+
raise ValueError(f"Unsupported index type: {self.index_type}")
|
|
199
|
+
|
|
200
|
+
return index
|
|
201
|
+
|
|
202
|
+
def _save_to_disk(self) -> None:
|
|
203
|
+
r"""Save the index and metadata to disk if storage_path is provided."""
|
|
204
|
+
if self.storage_path is None:
|
|
205
|
+
return
|
|
206
|
+
|
|
207
|
+
import faiss
|
|
208
|
+
|
|
209
|
+
# Save the FAISS index
|
|
210
|
+
faiss.write_index(self._index, self._get_index_path())
|
|
211
|
+
|
|
212
|
+
# Save the metadata (IDs, payloads, vectors)
|
|
213
|
+
metadata = {
|
|
214
|
+
"id_to_index": self._id_to_index,
|
|
215
|
+
"index_to_id": self._index_to_id,
|
|
216
|
+
"payloads": self._payloads,
|
|
217
|
+
"vectors": self._vectors,
|
|
218
|
+
"vector_dim": self.vector_dim,
|
|
219
|
+
"distance": self.distance,
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
with open(self._get_metadata_path(), 'wb') as f:
|
|
223
|
+
pickle.dump(metadata, f)
|
|
224
|
+
|
|
225
|
+
logger.info(f"Saved FAISS index and metadata to {self.storage_path}")
|
|
226
|
+
|
|
227
|
+
def _load_from_disk(self) -> None:
|
|
228
|
+
r"""Loads the index and metadata from disk if they exist."""
|
|
229
|
+
if self.storage_path is None:
|
|
230
|
+
return
|
|
231
|
+
|
|
232
|
+
import faiss
|
|
233
|
+
|
|
234
|
+
index_path = self._get_index_path()
|
|
235
|
+
metadata_path = self._get_metadata_path()
|
|
236
|
+
|
|
237
|
+
if os.path.exists(index_path) and os.path.exists(metadata_path):
|
|
238
|
+
try:
|
|
239
|
+
# Load the FAISS index
|
|
240
|
+
self._index = faiss.read_index(index_path)
|
|
241
|
+
|
|
242
|
+
# Load the metadata
|
|
243
|
+
with open(metadata_path, 'rb') as f:
|
|
244
|
+
metadata = pickle.load(f)
|
|
245
|
+
|
|
246
|
+
# Verify metadata structure before assigning
|
|
247
|
+
required_keys = [
|
|
248
|
+
"id_to_index",
|
|
249
|
+
"index_to_id",
|
|
250
|
+
"payloads",
|
|
251
|
+
"vectors",
|
|
252
|
+
"vector_dim",
|
|
253
|
+
]
|
|
254
|
+
if not all(key in metadata for key in required_keys):
|
|
255
|
+
missing_keys = [
|
|
256
|
+
key for key in required_keys if key not in metadata
|
|
257
|
+
]
|
|
258
|
+
raise ValueError(
|
|
259
|
+
f"Metadata is missing required keys: {missing_keys}"
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
self._id_to_index = metadata["id_to_index"]
|
|
263
|
+
self._index_to_id = metadata["index_to_id"]
|
|
264
|
+
self._payloads = metadata["payloads"]
|
|
265
|
+
self._vectors = metadata["vectors"]
|
|
266
|
+
|
|
267
|
+
# Check that the loaded index is compatible
|
|
268
|
+
if metadata["vector_dim"] != self.vector_dim:
|
|
269
|
+
logger.warning(
|
|
270
|
+
f"Loaded index has different vector dimension "
|
|
271
|
+
f"({metadata['vector_dim']}) than specified "
|
|
272
|
+
f"({self.vector_dim})."
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
logger.info(
|
|
276
|
+
f"Loaded FAISS index and metadata from {self.storage_path}"
|
|
277
|
+
)
|
|
278
|
+
except Exception as e:
|
|
279
|
+
logger.error(f"Failed to load index from disk: {e}")
|
|
280
|
+
# Initialize a new index
|
|
281
|
+
self._index = self._create_index()
|
|
282
|
+
else:
|
|
283
|
+
logger.info("No existing index found. Creating new one.")
|
|
284
|
+
|
|
285
|
+
def add(
|
|
286
|
+
self,
|
|
287
|
+
records: List[VectorRecord],
|
|
288
|
+
**kwargs,
|
|
289
|
+
) -> None:
|
|
290
|
+
r"""Adds a list of vectors to the index.
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
records (List[VectorRecord]): List of vector records to be added.
|
|
294
|
+
**kwargs (Any): Additional keyword arguments.
|
|
295
|
+
|
|
296
|
+
Raises:
|
|
297
|
+
RuntimeError: If there was an error in the addition process.
|
|
298
|
+
"""
|
|
299
|
+
import numpy as np
|
|
300
|
+
|
|
301
|
+
if not records:
|
|
302
|
+
return
|
|
303
|
+
|
|
304
|
+
# Check if the index needs training (for IVF indexes)
|
|
305
|
+
if self.index_type.startswith('IVF') and not self._index.is_trained:
|
|
306
|
+
# For IVF indexes, we need to train with vectors before adding
|
|
307
|
+
vectors = np.array(
|
|
308
|
+
[record.vector for record in records], dtype=np.float32
|
|
309
|
+
)
|
|
310
|
+
try:
|
|
311
|
+
self._index.train(vectors)
|
|
312
|
+
except Exception as e:
|
|
313
|
+
raise RuntimeError(f"Failed to train FAISS index: {e}")
|
|
314
|
+
|
|
315
|
+
# Add each record to the index
|
|
316
|
+
for record in records:
|
|
317
|
+
# Normalize vector if using cosine similarity
|
|
318
|
+
vector = np.array(record.vector, dtype=np.float32).reshape(1, -1)
|
|
319
|
+
if self.distance == VectorDistance.COSINE:
|
|
320
|
+
vector = self._normalize_vector(vector)
|
|
321
|
+
|
|
322
|
+
# Get the next index
|
|
323
|
+
idx = len(self._id_to_index)
|
|
324
|
+
|
|
325
|
+
# Add to FAISS index and update mappings atomically
|
|
326
|
+
try:
|
|
327
|
+
self._index.add(vector)
|
|
328
|
+
# Store mapping from ID to index
|
|
329
|
+
self._id_to_index[record.id] = idx
|
|
330
|
+
self._index_to_id[idx] = record.id
|
|
331
|
+
# Store payload
|
|
332
|
+
if record.payload is not None:
|
|
333
|
+
self._payloads[record.id] = record.payload.copy()
|
|
334
|
+
else:
|
|
335
|
+
self._payloads[record.id] = {}
|
|
336
|
+
# Store the original vector for later retrieval
|
|
337
|
+
self._vectors[record.id] = vector.flatten().copy()
|
|
338
|
+
except Exception as e:
|
|
339
|
+
# If adding to the index fails, roll back any partial changes
|
|
340
|
+
if record.id in self._id_to_index:
|
|
341
|
+
del self._id_to_index[record.id]
|
|
342
|
+
if idx in self._index_to_id:
|
|
343
|
+
del self._index_to_id[idx]
|
|
344
|
+
if record.id in self._payloads:
|
|
345
|
+
del self._payloads[record.id]
|
|
346
|
+
if record.id in self._vectors:
|
|
347
|
+
del self._vectors[record.id]
|
|
348
|
+
raise RuntimeError(f"Failed to add vector to FAISS index: {e}")
|
|
349
|
+
|
|
350
|
+
# Save to disk if storage path is provided
|
|
351
|
+
self._save_to_disk()
|
|
352
|
+
|
|
353
|
+
def update_payload(
|
|
354
|
+
self, ids: List[str], payload: Dict[str, Any], **kwargs: Any
|
|
355
|
+
) -> None:
|
|
356
|
+
r"""Updates the payload of the vectors identified by their IDs.
|
|
357
|
+
|
|
358
|
+
Args:
|
|
359
|
+
ids (List[str]): List of unique identifiers for the vectors to be
|
|
360
|
+
updated.
|
|
361
|
+
payload (Dict[str, Any]): Payload to be updated for all specified
|
|
362
|
+
IDs.
|
|
363
|
+
**kwargs (Any): Additional keyword arguments.
|
|
364
|
+
|
|
365
|
+
Raises:
|
|
366
|
+
KeyError: If any of the provided IDs does not exist in the index.
|
|
367
|
+
"""
|
|
368
|
+
for id in ids:
|
|
369
|
+
if id not in self._payloads:
|
|
370
|
+
raise KeyError(f"Vector with ID {id} not found in the index.")
|
|
371
|
+
|
|
372
|
+
# Update payload (merge with existing payload)
|
|
373
|
+
if id in self._payloads:
|
|
374
|
+
self._payloads[id].update(payload)
|
|
375
|
+
else:
|
|
376
|
+
self._payloads[id] = payload.copy()
|
|
377
|
+
|
|
378
|
+
# Save to disk if storage path is provided
|
|
379
|
+
self._save_to_disk()
|
|
380
|
+
|
|
381
|
+
def delete_collection(self) -> None:
|
|
382
|
+
r"""Deletes the entire collection (index and metadata)."""
|
|
383
|
+
# Reset the index
|
|
384
|
+
self._index = self._create_index()
|
|
385
|
+
|
|
386
|
+
# Clear metadata
|
|
387
|
+
self._id_to_index = {}
|
|
388
|
+
self._index_to_id = {}
|
|
389
|
+
self._payloads = {}
|
|
390
|
+
self._vectors = {}
|
|
391
|
+
|
|
392
|
+
# Remove files from disk if storage path is provided
|
|
393
|
+
if self.storage_path:
|
|
394
|
+
index_path = self._get_index_path()
|
|
395
|
+
metadata_path = self._get_metadata_path()
|
|
396
|
+
|
|
397
|
+
if os.path.exists(index_path):
|
|
398
|
+
os.remove(index_path)
|
|
399
|
+
|
|
400
|
+
if os.path.exists(metadata_path):
|
|
401
|
+
os.remove(metadata_path)
|
|
402
|
+
|
|
403
|
+
logger.info(
|
|
404
|
+
f"Deleted FAISS index and metadata from {self.storage_path}"
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
def delete(
|
|
408
|
+
self,
|
|
409
|
+
ids: Optional[List[str]] = None,
|
|
410
|
+
payload_filter: Optional[Dict[str, Any]] = None,
|
|
411
|
+
**kwargs: Any,
|
|
412
|
+
) -> None:
|
|
413
|
+
r"""Deletes vectors from the index based on either IDs or payload
|
|
414
|
+
filters.
|
|
415
|
+
|
|
416
|
+
Args:
|
|
417
|
+
ids (Optional[List[str]], optional): List of unique identifiers
|
|
418
|
+
for the vectors to be deleted.
|
|
419
|
+
payload_filter (Optional[Dict[str, Any]], optional): A filter for
|
|
420
|
+
the payload to delete points matching specific conditions.
|
|
421
|
+
**kwargs (Any): Additional keyword arguments.
|
|
422
|
+
|
|
423
|
+
Raises:
|
|
424
|
+
ValueError: If neither `ids` nor `payload_filter` is provided.
|
|
425
|
+
RuntimeError: If the FAISS index does not support removal.
|
|
426
|
+
|
|
427
|
+
Notes:
|
|
428
|
+
- FAISS does not support efficient single vector removal for most
|
|
429
|
+
index types. This implementation recreates the index without the
|
|
430
|
+
deleted vectors, which can be inefficient for large datasets.
|
|
431
|
+
- If both `ids` and `payload_filter` are provided, both filters
|
|
432
|
+
will be applied (vectors matching either will be deleted).
|
|
433
|
+
"""
|
|
434
|
+
import faiss
|
|
435
|
+
import numpy as np
|
|
436
|
+
|
|
437
|
+
if not ids and not payload_filter:
|
|
438
|
+
raise ValueError(
|
|
439
|
+
"You must provide either `ids` or `payload_filter` to delete "
|
|
440
|
+
"vectors."
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
# Get IDs to delete from payload filter
|
|
444
|
+
if payload_filter:
|
|
445
|
+
filtered_ids = [
|
|
446
|
+
id
|
|
447
|
+
for id, payload in self._payloads.items()
|
|
448
|
+
if all(
|
|
449
|
+
payload.get(key) == value
|
|
450
|
+
for key, value in payload_filter.items()
|
|
451
|
+
)
|
|
452
|
+
]
|
|
453
|
+
else:
|
|
454
|
+
filtered_ids = []
|
|
455
|
+
|
|
456
|
+
# Combine with explicit IDs
|
|
457
|
+
ids_to_delete = set(ids or []) | set(filtered_ids)
|
|
458
|
+
if not ids_to_delete:
|
|
459
|
+
return
|
|
460
|
+
|
|
461
|
+
# Check if the index supports removal
|
|
462
|
+
if hasattr(self._index, 'remove_ids'):
|
|
463
|
+
# Convert IDs to indices
|
|
464
|
+
indices_to_remove = np.array(
|
|
465
|
+
[
|
|
466
|
+
self._id_to_index[id]
|
|
467
|
+
for id in ids_to_delete
|
|
468
|
+
if id in self._id_to_index
|
|
469
|
+
],
|
|
470
|
+
dtype=np.int64,
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
if len(indices_to_remove) > 0:
|
|
474
|
+
# Create a selector where 1 means "remove"
|
|
475
|
+
selector = np.zeros(self._index.ntotal, dtype=bool)
|
|
476
|
+
selector[indices_to_remove] = True
|
|
477
|
+
|
|
478
|
+
try:
|
|
479
|
+
# Remove from FAISS index
|
|
480
|
+
id_selector = faiss.IDSelectorArray(
|
|
481
|
+
len(indices_to_remove), indices_to_remove
|
|
482
|
+
)
|
|
483
|
+
self._index.remove_ids(id_selector)
|
|
484
|
+
|
|
485
|
+
# Update mappings and storage
|
|
486
|
+
for id in ids_to_delete:
|
|
487
|
+
if id in self._id_to_index:
|
|
488
|
+
idx = self._id_to_index[id]
|
|
489
|
+
del self._index_to_id[idx]
|
|
490
|
+
del self._id_to_index[id]
|
|
491
|
+
if id in self._payloads:
|
|
492
|
+
del self._payloads[id]
|
|
493
|
+
if id in self._vectors:
|
|
494
|
+
del self._vectors[id]
|
|
495
|
+
|
|
496
|
+
# Save to disk if storage path is provided
|
|
497
|
+
self._save_to_disk()
|
|
498
|
+
except Exception as e:
|
|
499
|
+
raise RuntimeError(
|
|
500
|
+
f"Failed to remove vectors from FAISS index: {e}"
|
|
501
|
+
)
|
|
502
|
+
else:
|
|
503
|
+
# Index doesn't support removal, need to rebuild
|
|
504
|
+
logger.warning(
|
|
505
|
+
"This FAISS index type doesn't support direct removal. "
|
|
506
|
+
"Rebuilding entire index (may be slow for large datasets)."
|
|
507
|
+
)
|
|
508
|
+
|
|
509
|
+
# Get all vectors and IDs that should be kept
|
|
510
|
+
keep_ids = [
|
|
511
|
+
id
|
|
512
|
+
for id in self._id_to_index.keys()
|
|
513
|
+
if id not in ids_to_delete
|
|
514
|
+
]
|
|
515
|
+
|
|
516
|
+
# Create a new index
|
|
517
|
+
new_index = self._create_index()
|
|
518
|
+
|
|
519
|
+
# If it's an IVF index and there are vectors to train on, train it
|
|
520
|
+
if self.index_type.startswith('IVF') and keep_ids:
|
|
521
|
+
train_vectors = np.vstack(
|
|
522
|
+
[self._vectors[id].reshape(1, -1) for id in keep_ids]
|
|
523
|
+
)
|
|
524
|
+
new_index.train(train_vectors)
|
|
525
|
+
|
|
526
|
+
# Add the vectors to keep to the new index
|
|
527
|
+
new_id_to_index = {}
|
|
528
|
+
new_index_to_id = {}
|
|
529
|
+
|
|
530
|
+
for new_idx, id in enumerate(keep_ids):
|
|
531
|
+
vector = self._vectors[id].reshape(1, -1)
|
|
532
|
+
new_index.add(vector)
|
|
533
|
+
new_id_to_index[id] = new_idx
|
|
534
|
+
new_index_to_id[new_idx] = id
|
|
535
|
+
|
|
536
|
+
# Replace the old index and mappings
|
|
537
|
+
self._index = new_index
|
|
538
|
+
self._id_to_index = new_id_to_index
|
|
539
|
+
self._index_to_id = new_index_to_id
|
|
540
|
+
|
|
541
|
+
# Remove deleted IDs from payloads and vectors
|
|
542
|
+
for id in ids_to_delete:
|
|
543
|
+
if id in self._payloads:
|
|
544
|
+
del self._payloads[id]
|
|
545
|
+
if id in self._vectors:
|
|
546
|
+
del self._vectors[id]
|
|
547
|
+
|
|
548
|
+
# Save to disk if storage path is provided
|
|
549
|
+
self._save_to_disk()
|
|
550
|
+
|
|
551
|
+
def status(self) -> VectorDBStatus:
|
|
552
|
+
r"""Returns the status of the vector database.
|
|
553
|
+
|
|
554
|
+
Returns:
|
|
555
|
+
VectorDBStatus: Current status of the vector database.
|
|
556
|
+
"""
|
|
557
|
+
return VectorDBStatus(
|
|
558
|
+
vector_dim=self.vector_dim,
|
|
559
|
+
vector_count=self._index.ntotal,
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
def query(
|
|
563
|
+
self,
|
|
564
|
+
query: VectorDBQuery,
|
|
565
|
+
filter_conditions: Optional[Dict[str, Any]] = None,
|
|
566
|
+
**kwargs: Any,
|
|
567
|
+
) -> List[VectorDBQueryResult]:
|
|
568
|
+
r"""Searches for similar vectors in the storage based on the provided
|
|
569
|
+
query.
|
|
570
|
+
|
|
571
|
+
Args:
|
|
572
|
+
query (VectorDBQuery): The query object containing the search
|
|
573
|
+
vector and the number of top similar vectors to retrieve.
|
|
574
|
+
filter_conditions (Optional[Dict[str, Any]], optional): A
|
|
575
|
+
dictionary specifying conditions to filter the query results.
|
|
576
|
+
**kwargs (Any): Additional keyword arguments.
|
|
577
|
+
|
|
578
|
+
Returns:
|
|
579
|
+
List[VectorDBQueryResult]: A list of query results ordered by
|
|
580
|
+
similarity.
|
|
581
|
+
"""
|
|
582
|
+
import numpy as np
|
|
583
|
+
|
|
584
|
+
if self._index.ntotal == 0:
|
|
585
|
+
return []
|
|
586
|
+
|
|
587
|
+
# Prepare the query vector
|
|
588
|
+
query_vector = np.array(query.query_vector, dtype=np.float32)
|
|
589
|
+
if query_vector.ndim == 1:
|
|
590
|
+
query_vector = query_vector.reshape(1, -1)
|
|
591
|
+
|
|
592
|
+
# Normalize if using cosine similarity
|
|
593
|
+
if self.distance == VectorDistance.COSINE:
|
|
594
|
+
query_vector = self._normalize_vector(query_vector)
|
|
595
|
+
|
|
596
|
+
# For IVF indexes, set the number of clusters to probe (higher = more
|
|
597
|
+
# accurate but slower)
|
|
598
|
+
if hasattr(self._index, 'nprobe'):
|
|
599
|
+
nprobe = kwargs.get('nprobe', 10) # Default to 10
|
|
600
|
+
self._index.nprobe = nprobe
|
|
601
|
+
|
|
602
|
+
# Determine how many results to fetch initially when using filters
|
|
603
|
+
k = query.top_k
|
|
604
|
+
if filter_conditions:
|
|
605
|
+
# Fetch more results if filtering is applied to ensure we have
|
|
606
|
+
# enough after filtering
|
|
607
|
+
# This is a simple heuristic - multiply by 2 or by a user-provided
|
|
608
|
+
# factor
|
|
609
|
+
fetch_factor = kwargs.get('fetch_factor', 2)
|
|
610
|
+
fetch_k = min(int(k * fetch_factor), self._index.ntotal)
|
|
611
|
+
else:
|
|
612
|
+
fetch_k = min(k, self._index.ntotal)
|
|
613
|
+
|
|
614
|
+
# Perform the search
|
|
615
|
+
distances, indices = self._index.search(query_vector, fetch_k)
|
|
616
|
+
|
|
617
|
+
# Convert results to VectorDBQueryResult objects
|
|
618
|
+
results = []
|
|
619
|
+
for i, idx in enumerate(indices[0]):
|
|
620
|
+
if idx == -1: # FAISS returns -1 for empty slots
|
|
621
|
+
continue
|
|
622
|
+
|
|
623
|
+
vector_id = self._index_to_id.get(idx)
|
|
624
|
+
if not vector_id:
|
|
625
|
+
continue
|
|
626
|
+
|
|
627
|
+
# If there are filter conditions, check if this result passes
|
|
628
|
+
if filter_conditions and not self._matches_filter(
|
|
629
|
+
vector_id, filter_conditions
|
|
630
|
+
):
|
|
631
|
+
continue
|
|
632
|
+
|
|
633
|
+
# Adjust similarity score based on distance metric
|
|
634
|
+
if self.distance == VectorDistance.EUCLIDEAN:
|
|
635
|
+
# For Euclidean distance, smaller is better, so we invert
|
|
636
|
+
similarity = 1.0 / (1.0 + distances[0][i])
|
|
637
|
+
else:
|
|
638
|
+
# For inner product/cosine, higher is better
|
|
639
|
+
similarity = float(distances[0][i])
|
|
640
|
+
|
|
641
|
+
vector = self._vectors.get(vector_id)
|
|
642
|
+
if vector is not None:
|
|
643
|
+
vector = vector.tolist()
|
|
644
|
+
else:
|
|
645
|
+
vector = [] # type: ignore[assignment]
|
|
646
|
+
|
|
647
|
+
results.append(
|
|
648
|
+
VectorDBQueryResult.create(
|
|
649
|
+
similarity=similarity,
|
|
650
|
+
id=vector_id,
|
|
651
|
+
payload=self._payloads.get(vector_id, {}),
|
|
652
|
+
vector=cast(List[float], vector),
|
|
653
|
+
)
|
|
654
|
+
)
|
|
655
|
+
|
|
656
|
+
# Stop once we have enough results
|
|
657
|
+
if len(results) >= k:
|
|
658
|
+
break
|
|
659
|
+
|
|
660
|
+
return results
|
|
661
|
+
|
|
662
|
+
def clear(self) -> None:
|
|
663
|
+
r"""Remove all vectors from the storage."""
|
|
664
|
+
self.delete_collection()
|
|
665
|
+
|
|
666
|
+
def load(self) -> None:
|
|
667
|
+
r"""Load the index from disk if storage_path is provided."""
|
|
668
|
+
if self.storage_path:
|
|
669
|
+
self._load_from_disk()
|
|
670
|
+
else:
|
|
671
|
+
logger.warning("No storage path provided. Cannot load index.")
|
|
672
|
+
|
|
673
|
+
@property
|
|
674
|
+
def client(self) -> Any:
|
|
675
|
+
r"""Provides access to the underlying FAISS client."""
|
|
676
|
+
return self._faiss_client
|
|
677
|
+
|
|
678
|
+
def _matches_filter(
|
|
679
|
+
self, vector_id: str, filter_conditions: Dict[str, Any]
|
|
680
|
+
) -> bool:
|
|
681
|
+
r"""Checks if a vector's payload matches the filter conditions.
|
|
682
|
+
|
|
683
|
+
Args:
|
|
684
|
+
vector_id (str): ID of the vector to check.
|
|
685
|
+
filter_conditions (Dict[str, Any]): Conditions to match against.
|
|
686
|
+
|
|
687
|
+
Returns:
|
|
688
|
+
bool: True if the payload matches all conditions, False otherwise.
|
|
689
|
+
"""
|
|
690
|
+
payload = self._payloads.get(vector_id, {})
|
|
691
|
+
for key, value in filter_conditions.items():
|
|
692
|
+
if key not in payload or payload[key] != value:
|
|
693
|
+
return False
|
|
694
|
+
return True
|
|
695
|
+
|
|
696
|
+
def _normalize_vector(self, vector: "ndarray") -> "ndarray":
|
|
697
|
+
r"""Normalizes a vector to unit length for cosine similarity.
|
|
698
|
+
|
|
699
|
+
Args:
|
|
700
|
+
vector (ndarray): Vector to normalize, either 1D or 2D array.
|
|
701
|
+
|
|
702
|
+
Returns:
|
|
703
|
+
ndarray: Normalized vector with the same shape as input.
|
|
704
|
+
"""
|
|
705
|
+
import numpy as np
|
|
706
|
+
|
|
707
|
+
if vector.ndim == 1:
|
|
708
|
+
vector = vector.reshape(1, -1)
|
|
709
|
+
norm = np.linalg.norm(vector, axis=1, keepdims=True)
|
|
710
|
+
# Avoid division by zero
|
|
711
|
+
norm = np.maximum(norm, 1e-10)
|
|
712
|
+
return vector / norm
|