camel-ai 0.2.37__py3-none-any.whl → 0.2.38__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/datagen/evol_instruct/__init__.py +20 -0
- camel/datagen/evol_instruct/evol_instruct.py +424 -0
- camel/datagen/evol_instruct/scorer.py +166 -0
- camel/datagen/evol_instruct/templates.py +268 -0
- camel/environments/models.py +10 -4
- camel/environments/single_step.py +91 -17
- camel/interpreters/docker_interpreter.py +1 -1
- camel/interpreters/e2b_interpreter.py +1 -1
- camel/interpreters/subprocess_interpreter.py +1 -1
- camel/loaders/__init__.py +2 -2
- camel/loaders/{panda_reader.py → pandas_reader.py} +61 -30
- camel/memories/context_creators/score_based.py +198 -67
- camel/models/aiml_model.py +9 -3
- camel/models/anthropic_model.py +11 -3
- camel/models/azure_openai_model.py +9 -3
- camel/models/base_audio_model.py +6 -0
- camel/models/base_model.py +4 -0
- camel/models/deepseek_model.py +9 -3
- camel/models/gemini_model.py +9 -3
- camel/models/groq_model.py +9 -3
- camel/models/internlm_model.py +8 -2
- camel/models/model_factory.py +4 -0
- camel/models/moonshot_model.py +8 -2
- camel/models/nemotron_model.py +9 -3
- camel/models/nvidia_model.py +9 -3
- camel/models/ollama_model.py +9 -3
- camel/models/openai_audio_models.py +5 -3
- camel/models/openai_compatible_model.py +9 -3
- camel/models/openai_model.py +9 -3
- camel/models/openrouter_model.py +9 -3
- camel/models/qwen_model.py +9 -3
- camel/models/samba_model.py +9 -3
- camel/models/sglang_model.py +11 -4
- camel/models/siliconflow_model.py +8 -2
- camel/models/stub_model.py +2 -1
- camel/models/togetherai_model.py +9 -3
- camel/models/vllm_model.py +9 -3
- camel/models/yi_model.py +9 -3
- camel/models/zhipuai_model.py +9 -3
- camel/retrievers/auto_retriever.py +14 -0
- camel/storages/__init__.py +2 -0
- camel/storages/vectordb_storages/__init__.py +2 -0
- camel/storages/vectordb_storages/tidb.py +332 -0
- camel/toolkits/__init__.py +5 -0
- camel/toolkits/browser_toolkit.py +84 -61
- camel/toolkits/openai_agent_toolkit.py +131 -0
- camel/toolkits/searxng_toolkit.py +207 -0
- camel/toolkits/thinking_toolkit.py +168 -12
- camel/types/enums.py +1 -0
- camel/verifiers/python_verifier.py +12 -4
- {camel_ai-0.2.37.dist-info → camel_ai-0.2.38.dist-info}/METADATA +52 -4
- {camel_ai-0.2.37.dist-info → camel_ai-0.2.38.dist-info}/RECORD +55 -48
- {camel_ai-0.2.37.dist-info → camel_ai-0.2.38.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.37.dist-info → camel_ai-0.2.38.dist-info}/licenses/LICENSE +0 -0
camel/models/sglang_model.py
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
import logging
|
|
15
|
+
import os
|
|
15
16
|
import subprocess
|
|
16
17
|
import threading
|
|
17
18
|
import time
|
|
@@ -51,6 +52,10 @@ class SGLangModel(BaseModelBackend):
|
|
|
51
52
|
use for the model. If not provided, :obj:`OpenAITokenCounter(
|
|
52
53
|
ModelType.GPT_4O_MINI)` will be used.
|
|
53
54
|
(default: :obj:`None`)
|
|
55
|
+
timeout (Optional[float], optional): The timeout value in seconds for
|
|
56
|
+
API calls. If not provided, will fall back to the MODEL_TIMEOUT
|
|
57
|
+
environment variable or default to 180 seconds.
|
|
58
|
+
(default: :obj:`None`)
|
|
54
59
|
|
|
55
60
|
Reference: https://sgl-project.github.io/backend/openai_api_completions.html
|
|
56
61
|
"""
|
|
@@ -62,6 +67,7 @@ class SGLangModel(BaseModelBackend):
|
|
|
62
67
|
api_key: Optional[str] = None,
|
|
63
68
|
url: Optional[str] = None,
|
|
64
69
|
token_counter: Optional[BaseTokenCounter] = None,
|
|
70
|
+
timeout: Optional[float] = None,
|
|
65
71
|
) -> None:
|
|
66
72
|
if model_config_dict is None:
|
|
67
73
|
model_config_dict = SGLangConfig().as_dict()
|
|
@@ -73,8 +79,9 @@ class SGLangModel(BaseModelBackend):
|
|
|
73
79
|
self._lock = threading.Lock()
|
|
74
80
|
self._inactivity_thread: Optional[threading.Thread] = None
|
|
75
81
|
|
|
82
|
+
timeout = timeout or float(os.environ.get("MODEL_TIMEOUT", 180))
|
|
76
83
|
super().__init__(
|
|
77
|
-
model_type, model_config_dict, api_key, url, token_counter
|
|
84
|
+
model_type, model_config_dict, api_key, url, token_counter, timeout
|
|
78
85
|
)
|
|
79
86
|
|
|
80
87
|
self._client = None
|
|
@@ -82,13 +89,13 @@ class SGLangModel(BaseModelBackend):
|
|
|
82
89
|
if self._url:
|
|
83
90
|
# Initialize the client if an existing URL is provided
|
|
84
91
|
self._client = OpenAI(
|
|
85
|
-
timeout=
|
|
92
|
+
timeout=self._timeout,
|
|
86
93
|
max_retries=3,
|
|
87
94
|
api_key="Set-but-ignored", # required but ignored
|
|
88
95
|
base_url=self._url,
|
|
89
96
|
)
|
|
90
97
|
self._async_client = AsyncOpenAI(
|
|
91
|
-
timeout=
|
|
98
|
+
timeout=self._timeout,
|
|
92
99
|
max_retries=3,
|
|
93
100
|
api_key="Set-but-ignored", # required but ignored
|
|
94
101
|
base_url=self._url,
|
|
@@ -123,7 +130,7 @@ class SGLangModel(BaseModelBackend):
|
|
|
123
130
|
self.last_run_time = time.time()
|
|
124
131
|
# Initialize the client after the server starts
|
|
125
132
|
self._client = OpenAI(
|
|
126
|
-
timeout=
|
|
133
|
+
timeout=self._timeout,
|
|
127
134
|
max_retries=3,
|
|
128
135
|
api_key="Set-but-ignored", # required but ignored
|
|
129
136
|
base_url=self._url,
|
|
@@ -51,6 +51,10 @@ class SiliconFlowModel(BaseModelBackend):
|
|
|
51
51
|
use for the model. If not provided, :obj:`OpenAITokenCounter(
|
|
52
52
|
ModelType.GPT_4O_MINI)` will be used.
|
|
53
53
|
(default: :obj:`None`)
|
|
54
|
+
timeout (Optional[float], optional): The timeout value in seconds for
|
|
55
|
+
API calls. If not provided, will fall back to the MODEL_TIMEOUT
|
|
56
|
+
environment variable or default to 180 seconds.
|
|
57
|
+
(default: :obj:`None`)
|
|
54
58
|
"""
|
|
55
59
|
|
|
56
60
|
@api_keys_required(
|
|
@@ -65,6 +69,7 @@ class SiliconFlowModel(BaseModelBackend):
|
|
|
65
69
|
api_key: Optional[str] = None,
|
|
66
70
|
url: Optional[str] = None,
|
|
67
71
|
token_counter: Optional[BaseTokenCounter] = None,
|
|
72
|
+
timeout: Optional[float] = None,
|
|
68
73
|
) -> None:
|
|
69
74
|
if model_config_dict is None:
|
|
70
75
|
model_config_dict = SiliconFlowConfig().as_dict()
|
|
@@ -73,11 +78,12 @@ class SiliconFlowModel(BaseModelBackend):
|
|
|
73
78
|
"SILICONFLOW_API_BASE_URL",
|
|
74
79
|
"https://api.siliconflow.cn/v1/",
|
|
75
80
|
)
|
|
81
|
+
timeout = timeout or float(os.environ.get("MODEL_TIMEOUT", 180))
|
|
76
82
|
super().__init__(
|
|
77
|
-
model_type, model_config_dict, api_key, url, token_counter
|
|
83
|
+
model_type, model_config_dict, api_key, url, token_counter, timeout
|
|
78
84
|
)
|
|
79
85
|
self._client = OpenAI(
|
|
80
|
-
timeout=
|
|
86
|
+
timeout=self._timeout,
|
|
81
87
|
max_retries=3,
|
|
82
88
|
api_key=self._api_key,
|
|
83
89
|
base_url=self._url,
|
camel/models/stub_model.py
CHANGED
|
@@ -82,10 +82,11 @@ class StubModel(BaseModelBackend):
|
|
|
82
82
|
api_key: Optional[str] = None,
|
|
83
83
|
url: Optional[str] = None,
|
|
84
84
|
token_counter: Optional[BaseTokenCounter] = None,
|
|
85
|
+
timeout: Optional[float] = None,
|
|
85
86
|
) -> None:
|
|
86
87
|
r"""All arguments are unused for the dummy model."""
|
|
87
88
|
super().__init__(
|
|
88
|
-
model_type, model_config_dict, api_key, url, token_counter
|
|
89
|
+
model_type, model_config_dict, api_key, url, token_counter, timeout
|
|
89
90
|
)
|
|
90
91
|
|
|
91
92
|
@property
|
camel/models/togetherai_model.py
CHANGED
|
@@ -52,6 +52,10 @@ class TogetherAIModel(BaseModelBackend):
|
|
|
52
52
|
token_counter (Optional[BaseTokenCounter], optional): Token counter to
|
|
53
53
|
use for the model. If not provided, :obj:`OpenAITokenCounter(
|
|
54
54
|
ModelType.GPT_4O_MINI)` will be used.
|
|
55
|
+
timeout (Optional[float], optional): The timeout value in seconds for
|
|
56
|
+
API calls. If not provided, will fall back to the MODEL_TIMEOUT
|
|
57
|
+
environment variable or default to 180 seconds.
|
|
58
|
+
(default: :obj:`None`)
|
|
55
59
|
"""
|
|
56
60
|
|
|
57
61
|
@api_keys_required(
|
|
@@ -66,6 +70,7 @@ class TogetherAIModel(BaseModelBackend):
|
|
|
66
70
|
api_key: Optional[str] = None,
|
|
67
71
|
url: Optional[str] = None,
|
|
68
72
|
token_counter: Optional[BaseTokenCounter] = None,
|
|
73
|
+
timeout: Optional[float] = None,
|
|
69
74
|
) -> None:
|
|
70
75
|
if model_config_dict is None:
|
|
71
76
|
model_config_dict = TogetherAIConfig().as_dict()
|
|
@@ -73,18 +78,19 @@ class TogetherAIModel(BaseModelBackend):
|
|
|
73
78
|
url = url or os.environ.get(
|
|
74
79
|
"TOGETHER_API_BASE_URL", "https://api.together.xyz/v1"
|
|
75
80
|
)
|
|
81
|
+
timeout = timeout or float(os.environ.get("MODEL_TIMEOUT", 180))
|
|
76
82
|
super().__init__(
|
|
77
|
-
model_type, model_config_dict, api_key, url, token_counter
|
|
83
|
+
model_type, model_config_dict, api_key, url, token_counter, timeout
|
|
78
84
|
)
|
|
79
85
|
|
|
80
86
|
self._client = OpenAI(
|
|
81
|
-
timeout=
|
|
87
|
+
timeout=self._timeout,
|
|
82
88
|
max_retries=3,
|
|
83
89
|
api_key=self._api_key,
|
|
84
90
|
base_url=self._url,
|
|
85
91
|
)
|
|
86
92
|
self._async_client = AsyncOpenAI(
|
|
87
|
-
timeout=
|
|
93
|
+
timeout=self._timeout,
|
|
88
94
|
max_retries=3,
|
|
89
95
|
api_key=self._api_key,
|
|
90
96
|
base_url=self._url,
|
camel/models/vllm_model.py
CHANGED
|
@@ -50,6 +50,10 @@ class VLLMModel(BaseModelBackend):
|
|
|
50
50
|
use for the model. If not provided, :obj:`OpenAITokenCounter(
|
|
51
51
|
ModelType.GPT_4O_MINI)` will be used.
|
|
52
52
|
(default: :obj:`None`)
|
|
53
|
+
timeout (Optional[float], optional): The timeout value in seconds for
|
|
54
|
+
API calls. If not provided, will fall back to the MODEL_TIMEOUT
|
|
55
|
+
environment variable or default to 180 seconds.
|
|
56
|
+
(default: :obj:`None`)
|
|
53
57
|
|
|
54
58
|
References:
|
|
55
59
|
https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html
|
|
@@ -62,24 +66,26 @@ class VLLMModel(BaseModelBackend):
|
|
|
62
66
|
api_key: Optional[str] = None,
|
|
63
67
|
url: Optional[str] = None,
|
|
64
68
|
token_counter: Optional[BaseTokenCounter] = None,
|
|
69
|
+
timeout: Optional[float] = None,
|
|
65
70
|
) -> None:
|
|
66
71
|
if model_config_dict is None:
|
|
67
72
|
model_config_dict = VLLMConfig().as_dict()
|
|
68
73
|
url = url or os.environ.get("VLLM_BASE_URL")
|
|
74
|
+
timeout = timeout or float(os.environ.get("MODEL_TIMEOUT", 180))
|
|
69
75
|
super().__init__(
|
|
70
|
-
model_type, model_config_dict, api_key, url, token_counter
|
|
76
|
+
model_type, model_config_dict, api_key, url, token_counter, timeout
|
|
71
77
|
)
|
|
72
78
|
if not self._url:
|
|
73
79
|
self._start_server()
|
|
74
80
|
# Use OpenAI cilent as interface call vLLM
|
|
75
81
|
self._client = OpenAI(
|
|
76
|
-
timeout=
|
|
82
|
+
timeout=self._timeout,
|
|
77
83
|
max_retries=3,
|
|
78
84
|
api_key="EMPTY", # required but ignored
|
|
79
85
|
base_url=self._url,
|
|
80
86
|
)
|
|
81
87
|
self._async_client = AsyncOpenAI(
|
|
82
|
-
timeout=
|
|
88
|
+
timeout=self._timeout,
|
|
83
89
|
max_retries=3,
|
|
84
90
|
api_key="EMPTY", # required but ignored
|
|
85
91
|
base_url=self._url,
|
camel/models/yi_model.py
CHANGED
|
@@ -51,6 +51,10 @@ class YiModel(BaseModelBackend):
|
|
|
51
51
|
use for the model. If not provided, :obj:`OpenAITokenCounter(
|
|
52
52
|
ModelType.GPT_4O_MINI)` will be used.
|
|
53
53
|
(default: :obj:`None`)
|
|
54
|
+
timeout (Optional[float], optional): The timeout value in seconds for
|
|
55
|
+
API calls. If not provided, will fall back to the MODEL_TIMEOUT
|
|
56
|
+
environment variable or default to 180 seconds.
|
|
57
|
+
(default: :obj:`None`)
|
|
54
58
|
"""
|
|
55
59
|
|
|
56
60
|
@api_keys_required(
|
|
@@ -65,6 +69,7 @@ class YiModel(BaseModelBackend):
|
|
|
65
69
|
api_key: Optional[str] = None,
|
|
66
70
|
url: Optional[str] = None,
|
|
67
71
|
token_counter: Optional[BaseTokenCounter] = None,
|
|
72
|
+
timeout: Optional[float] = None,
|
|
68
73
|
) -> None:
|
|
69
74
|
if model_config_dict is None:
|
|
70
75
|
model_config_dict = YiConfig().as_dict()
|
|
@@ -72,17 +77,18 @@ class YiModel(BaseModelBackend):
|
|
|
72
77
|
url = url or os.environ.get(
|
|
73
78
|
"YI_API_BASE_URL", "https://api.lingyiwanwu.com/v1"
|
|
74
79
|
)
|
|
80
|
+
timeout = timeout or float(os.environ.get("MODEL_TIMEOUT", 180))
|
|
75
81
|
super().__init__(
|
|
76
|
-
model_type, model_config_dict, api_key, url, token_counter
|
|
82
|
+
model_type, model_config_dict, api_key, url, token_counter, timeout
|
|
77
83
|
)
|
|
78
84
|
self._client = OpenAI(
|
|
79
|
-
timeout=
|
|
85
|
+
timeout=self._timeout,
|
|
80
86
|
max_retries=3,
|
|
81
87
|
api_key=self._api_key,
|
|
82
88
|
base_url=self._url,
|
|
83
89
|
)
|
|
84
90
|
self._async_client = AsyncOpenAI(
|
|
85
|
-
timeout=
|
|
91
|
+
timeout=self._timeout,
|
|
86
92
|
max_retries=3,
|
|
87
93
|
api_key=self._api_key,
|
|
88
94
|
base_url=self._url,
|
camel/models/zhipuai_model.py
CHANGED
|
@@ -51,6 +51,10 @@ class ZhipuAIModel(BaseModelBackend):
|
|
|
51
51
|
use for the model. If not provided, :obj:`OpenAITokenCounter(
|
|
52
52
|
ModelType.GPT_4O_MINI)` will be used.
|
|
53
53
|
(default: :obj:`None`)
|
|
54
|
+
timeout (Optional[float], optional): The timeout value in seconds for
|
|
55
|
+
API calls. If not provided, will fall back to the MODEL_TIMEOUT
|
|
56
|
+
environment variable or default to 180 seconds.
|
|
57
|
+
(default: :obj:`None`)
|
|
54
58
|
"""
|
|
55
59
|
|
|
56
60
|
@api_keys_required(
|
|
@@ -65,6 +69,7 @@ class ZhipuAIModel(BaseModelBackend):
|
|
|
65
69
|
api_key: Optional[str] = None,
|
|
66
70
|
url: Optional[str] = None,
|
|
67
71
|
token_counter: Optional[BaseTokenCounter] = None,
|
|
72
|
+
timeout: Optional[float] = None,
|
|
68
73
|
) -> None:
|
|
69
74
|
if model_config_dict is None:
|
|
70
75
|
model_config_dict = ZhipuAIConfig().as_dict()
|
|
@@ -72,17 +77,18 @@ class ZhipuAIModel(BaseModelBackend):
|
|
|
72
77
|
url = url or os.environ.get(
|
|
73
78
|
"ZHIPUAI_API_BASE_URL", "https://open.bigmodel.cn/api/paas/v4/"
|
|
74
79
|
)
|
|
80
|
+
timeout = timeout or float(os.environ.get("MODEL_TIMEOUT", 180))
|
|
75
81
|
super().__init__(
|
|
76
|
-
model_type, model_config_dict, api_key, url, token_counter
|
|
82
|
+
model_type, model_config_dict, api_key, url, token_counter, timeout
|
|
77
83
|
)
|
|
78
84
|
self._client = OpenAI(
|
|
79
|
-
timeout=
|
|
85
|
+
timeout=self._timeout,
|
|
80
86
|
max_retries=3,
|
|
81
87
|
api_key=self._api_key,
|
|
82
88
|
base_url=self._url,
|
|
83
89
|
)
|
|
84
90
|
self._async_client = AsyncOpenAI(
|
|
85
|
-
timeout=
|
|
91
|
+
timeout=self._timeout,
|
|
86
92
|
max_retries=3,
|
|
87
93
|
api_key=self._api_key,
|
|
88
94
|
base_url=self._url,
|
|
@@ -29,6 +29,7 @@ from camel.storages import (
|
|
|
29
29
|
BaseVectorStorage,
|
|
30
30
|
MilvusStorage,
|
|
31
31
|
QdrantStorage,
|
|
32
|
+
TiDBStorage,
|
|
32
33
|
)
|
|
33
34
|
from camel.types import StorageType
|
|
34
35
|
from camel.utils import Constants
|
|
@@ -90,6 +91,19 @@ class AutoRetriever:
|
|
|
90
91
|
url_and_api_key=self.url_and_api_key,
|
|
91
92
|
)
|
|
92
93
|
|
|
94
|
+
if self.storage_type == StorageType.TIDB:
|
|
95
|
+
if self.url_and_api_key is None:
|
|
96
|
+
raise ValueError(
|
|
97
|
+
"URL (database url) and API key required for TiDB storage "
|
|
98
|
+
"are not provided. Format: "
|
|
99
|
+
"mysql+pymysql://<username>:<password>@<host>:4000/test"
|
|
100
|
+
)
|
|
101
|
+
return TiDBStorage(
|
|
102
|
+
vector_dim=self.embedding_model.get_output_dim(),
|
|
103
|
+
collection_name=collection_name,
|
|
104
|
+
url_and_api_key=self.url_and_api_key,
|
|
105
|
+
)
|
|
106
|
+
|
|
93
107
|
if self.storage_type == StorageType.QDRANT:
|
|
94
108
|
return QdrantStorage(
|
|
95
109
|
vector_dim=self.embedding_model.get_output_dim(),
|
camel/storages/__init__.py
CHANGED
|
@@ -28,6 +28,7 @@ from .vectordb_storages.base import (
|
|
|
28
28
|
)
|
|
29
29
|
from .vectordb_storages.milvus import MilvusStorage
|
|
30
30
|
from .vectordb_storages.qdrant import QdrantStorage
|
|
31
|
+
from .vectordb_storages.tidb import TiDBStorage
|
|
31
32
|
|
|
32
33
|
__all__ = [
|
|
33
34
|
'BaseKeyValueStorage',
|
|
@@ -40,6 +41,7 @@ __all__ = [
|
|
|
40
41
|
'VectorDBQueryResult',
|
|
41
42
|
'QdrantStorage',
|
|
42
43
|
'MilvusStorage',
|
|
44
|
+
"TiDBStorage",
|
|
43
45
|
'BaseGraphStorage',
|
|
44
46
|
'Neo4jGraph',
|
|
45
47
|
'NebulaGraph',
|
|
@@ -21,6 +21,7 @@ from .base import (
|
|
|
21
21
|
)
|
|
22
22
|
from .milvus import MilvusStorage
|
|
23
23
|
from .qdrant import QdrantStorage
|
|
24
|
+
from .tidb import TiDBStorage
|
|
24
25
|
|
|
25
26
|
__all__ = [
|
|
26
27
|
'BaseVectorStorage',
|
|
@@ -28,6 +29,7 @@ __all__ = [
|
|
|
28
29
|
'VectorDBQueryResult',
|
|
29
30
|
'QdrantStorage',
|
|
30
31
|
'MilvusStorage',
|
|
32
|
+
"TiDBStorage",
|
|
31
33
|
'VectorRecord',
|
|
32
34
|
'VectorDBStatus',
|
|
33
35
|
]
|
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
import json
|
|
15
|
+
import logging
|
|
16
|
+
import re
|
|
17
|
+
from datetime import datetime
|
|
18
|
+
from enum import Enum
|
|
19
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
|
|
20
|
+
|
|
21
|
+
from camel.storages.vectordb_storages import (
|
|
22
|
+
BaseVectorStorage,
|
|
23
|
+
VectorDBQuery,
|
|
24
|
+
VectorDBQueryResult,
|
|
25
|
+
VectorDBStatus,
|
|
26
|
+
VectorRecord,
|
|
27
|
+
)
|
|
28
|
+
from camel.utils import dependencies_required
|
|
29
|
+
|
|
30
|
+
if TYPE_CHECKING:
|
|
31
|
+
from pytidb import Table, TiDBClient
|
|
32
|
+
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class EnumEncoder(json.JSONEncoder):
|
|
37
|
+
def default(self, obj):
|
|
38
|
+
if isinstance(obj, Enum):
|
|
39
|
+
return obj.value
|
|
40
|
+
return super().default(obj)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class TiDBStorage(BaseVectorStorage):
|
|
44
|
+
r"""An implementation of the `BaseVectorStorage` for interacting with TiDB.
|
|
45
|
+
|
|
46
|
+
The detailed information about TiDB is available at:
|
|
47
|
+
`TiDB Vector Search <https://ai.pingcap.com/>`_
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
vector_dim (int): The dimension of storing vectors.
|
|
51
|
+
url_and_api_key (Optional[Union[Tuple[str, str], str]]): A tuple
|
|
52
|
+
containing the database url and API key for connecting to a TiDB
|
|
53
|
+
cluster. The URL should be in the format:
|
|
54
|
+
"mysql+pymysql://<username>:<password>@<host>:<port>/<db_name>".
|
|
55
|
+
TiDB will not use the API Key, but retains the definition for
|
|
56
|
+
interface compatible.
|
|
57
|
+
collection_name (Optional[str]): Name of the collection.
|
|
58
|
+
The collection name will be used as the table name in TiDB. If not
|
|
59
|
+
provided, set it to the current time with iso format.
|
|
60
|
+
**kwargs (Any): Additional keyword arguments for initializing
|
|
61
|
+
TiDB connection.
|
|
62
|
+
|
|
63
|
+
Raises:
|
|
64
|
+
ImportError: If `pytidb` package is not installed.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
@dependencies_required('pytidb')
|
|
68
|
+
def __init__(
|
|
69
|
+
self,
|
|
70
|
+
vector_dim: int,
|
|
71
|
+
collection_name: Optional[str] = None,
|
|
72
|
+
url_and_api_key: Optional[Union[Tuple[str, str], str]] = None,
|
|
73
|
+
**kwargs: Any,
|
|
74
|
+
) -> None:
|
|
75
|
+
from pytidb import TiDBClient
|
|
76
|
+
|
|
77
|
+
self._client: TiDBClient
|
|
78
|
+
database_url = None
|
|
79
|
+
if isinstance(url_and_api_key, str):
|
|
80
|
+
database_url = url_and_api_key
|
|
81
|
+
elif isinstance(url_and_api_key, tuple):
|
|
82
|
+
database_url = url_and_api_key[0]
|
|
83
|
+
self._create_client(database_url, **kwargs)
|
|
84
|
+
self.vector_dim = vector_dim
|
|
85
|
+
self.collection_name = collection_name or self._generate_table_name()
|
|
86
|
+
self._table = self._open_and_create_table()
|
|
87
|
+
self._table_model = self._table.table_model
|
|
88
|
+
self._check_table()
|
|
89
|
+
|
|
90
|
+
def _create_client(
|
|
91
|
+
self,
|
|
92
|
+
database_url: Optional[str] = None,
|
|
93
|
+
**kwargs: Any,
|
|
94
|
+
) -> None:
|
|
95
|
+
r"""Initializes the TiDB client with the provided connection details.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
database_url (Optional[str]): The database connection string for
|
|
99
|
+
the TiDB server.
|
|
100
|
+
**kwargs: Additional keyword arguments passed to the TiDB client.
|
|
101
|
+
"""
|
|
102
|
+
from pytidb import TiDBClient
|
|
103
|
+
|
|
104
|
+
self._client = TiDBClient.connect(
|
|
105
|
+
database_url,
|
|
106
|
+
**kwargs,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
def _get_table_model(self, collection_name: str) -> Any:
|
|
110
|
+
from pytidb.schema import Field, TableModel, VectorField
|
|
111
|
+
from sqlalchemy import JSON
|
|
112
|
+
|
|
113
|
+
class VectorDBRecord(TableModel):
|
|
114
|
+
id: Optional[str] = Field(None, primary_key=True)
|
|
115
|
+
vector: list[float] = VectorField(self.vector_dim)
|
|
116
|
+
payload: Optional[dict[str, Any]] = Field(None, sa_type=JSON)
|
|
117
|
+
|
|
118
|
+
# Notice: Avoid repeated definition warnings by dynamically generating
|
|
119
|
+
# class names.
|
|
120
|
+
return type(
|
|
121
|
+
f"VectorDBRecord_{collection_name}",
|
|
122
|
+
(VectorDBRecord,),
|
|
123
|
+
{"__tablename__": collection_name},
|
|
124
|
+
table=True,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
def _open_and_create_table(self) -> "Table[Any]":
|
|
128
|
+
r"""Opens an existing table or creates a new table in TiDB."""
|
|
129
|
+
table = self._client.open_table(self.collection_name)
|
|
130
|
+
if table is None:
|
|
131
|
+
table = self._client.create_table(
|
|
132
|
+
schema=self._get_table_model(self.collection_name)
|
|
133
|
+
)
|
|
134
|
+
return table
|
|
135
|
+
|
|
136
|
+
def _check_table(self):
|
|
137
|
+
r"""Ensuring the specified table matches the specified vector
|
|
138
|
+
dimensionality.
|
|
139
|
+
"""
|
|
140
|
+
in_dim = self._get_table_info()["vector_dim"]
|
|
141
|
+
if in_dim != self.vector_dim:
|
|
142
|
+
raise ValueError(
|
|
143
|
+
"Vector dimension of the existing table "
|
|
144
|
+
f'"{self.collection_name}" ({in_dim}) is different from '
|
|
145
|
+
f"the given embedding dim ({self.vector_dim})."
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
def _generate_table_name(self) -> str:
|
|
149
|
+
r"""Generates a unique name for a new table based on the current
|
|
150
|
+
timestamp. TiDB table names can only contain alphanumeric
|
|
151
|
+
characters and underscores.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
str: A unique, valid table name.
|
|
155
|
+
"""
|
|
156
|
+
timestamp = datetime.now().isoformat()
|
|
157
|
+
transformed_name = re.sub(r'[^a-zA-Z0-9_]', '_', timestamp)
|
|
158
|
+
valid_name = "vectors_" + transformed_name
|
|
159
|
+
return valid_name
|
|
160
|
+
|
|
161
|
+
def _get_table_info(self) -> Dict[str, Any]:
|
|
162
|
+
r"""Retrieves details of an existing table.
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
Dict[str, Any]: A dictionary containing details about the
|
|
166
|
+
table.
|
|
167
|
+
"""
|
|
168
|
+
vector_count = self._table.rows()
|
|
169
|
+
# Get vector dimension from table schema
|
|
170
|
+
columns = self._table.columns()
|
|
171
|
+
dim_value = None
|
|
172
|
+
for col in columns:
|
|
173
|
+
match = re.search(r'vector\((\d+)\)', col.column_type)
|
|
174
|
+
if match:
|
|
175
|
+
dim_value = int(match.group(1))
|
|
176
|
+
break
|
|
177
|
+
|
|
178
|
+
# If no vector column found, log a warning
|
|
179
|
+
if dim_value is None:
|
|
180
|
+
logger.warning(
|
|
181
|
+
f"No vector column found in table {self.collection_name}. "
|
|
182
|
+
"This may indicate an incompatible table schema."
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
return {
|
|
186
|
+
"vector_count": vector_count,
|
|
187
|
+
"vector_dim": dim_value,
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
def _validate_and_convert_vectors(
|
|
191
|
+
self, records: List[VectorRecord]
|
|
192
|
+
) -> List[Any]:
|
|
193
|
+
r"""Validates and converts VectorRecord instances to VectorDBRecord
|
|
194
|
+
instances.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
records (List[VectorRecord]): List of vector records to validate
|
|
198
|
+
and convert.
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
List[VectorDBRecord]: A list of VectorDBRecord instances.
|
|
202
|
+
"""
|
|
203
|
+
db_records = []
|
|
204
|
+
for record in records:
|
|
205
|
+
payload = record.payload
|
|
206
|
+
if isinstance(payload, str):
|
|
207
|
+
payload = json.loads(payload)
|
|
208
|
+
elif isinstance(payload, dict):
|
|
209
|
+
payload = json.loads(json.dumps(payload, cls=EnumEncoder))
|
|
210
|
+
else:
|
|
211
|
+
payload = None
|
|
212
|
+
|
|
213
|
+
db_records.append(
|
|
214
|
+
self._table_model(
|
|
215
|
+
id=record.id,
|
|
216
|
+
vector=record.vector,
|
|
217
|
+
payload=payload,
|
|
218
|
+
)
|
|
219
|
+
)
|
|
220
|
+
return db_records
|
|
221
|
+
|
|
222
|
+
def add(
|
|
223
|
+
self,
|
|
224
|
+
records: List[VectorRecord],
|
|
225
|
+
**kwargs,
|
|
226
|
+
) -> None:
|
|
227
|
+
r"""Adds a list of vectors to the specified table.
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
records (List[VectorRecord]): List of vectors to be added.
|
|
231
|
+
**kwargs (Any): Additional keyword arguments pass to insert.
|
|
232
|
+
|
|
233
|
+
Raises:
|
|
234
|
+
RuntimeError: If there was an error in the addition process.
|
|
235
|
+
"""
|
|
236
|
+
|
|
237
|
+
db_records = self._validate_and_convert_vectors(records)
|
|
238
|
+
if len(db_records) == 0:
|
|
239
|
+
return
|
|
240
|
+
self._table.bulk_insert(db_records)
|
|
241
|
+
|
|
242
|
+
logger.debug(
|
|
243
|
+
f"Successfully added vectors to TiDB table: {self.collection_name}"
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
def delete(
|
|
247
|
+
self,
|
|
248
|
+
ids: List[str],
|
|
249
|
+
**kwargs: Any,
|
|
250
|
+
) -> None:
|
|
251
|
+
r"""Deletes a list of vectors identified by their IDs from the
|
|
252
|
+
storage.
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
ids (List[str]): List of unique identifiers for the vectors to be
|
|
256
|
+
deleted.
|
|
257
|
+
**kwargs (Any): Additional keyword arguments passed to delete.
|
|
258
|
+
|
|
259
|
+
Raises:
|
|
260
|
+
RuntimeError: If there is an error during the deletion process.
|
|
261
|
+
"""
|
|
262
|
+
self._table.delete({"id": {"$in": ids}})
|
|
263
|
+
logger.debug(
|
|
264
|
+
f"Successfully deleted vectors from TiDB table "
|
|
265
|
+
f"<{self.collection_name}>"
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
def status(self) -> VectorDBStatus:
|
|
269
|
+
r"""Retrieves the current status of the TiDB table.
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
VectorDBStatus: An object containing information about the
|
|
273
|
+
table's status.
|
|
274
|
+
"""
|
|
275
|
+
status = self._get_table_info()
|
|
276
|
+
return VectorDBStatus(
|
|
277
|
+
vector_dim=status["vector_dim"],
|
|
278
|
+
vector_count=status["vector_count"],
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
def query(
|
|
282
|
+
self,
|
|
283
|
+
query: VectorDBQuery,
|
|
284
|
+
**kwargs: Any,
|
|
285
|
+
) -> List[VectorDBQueryResult]:
|
|
286
|
+
r"""Searches for similar vectors in the storage based on the provided
|
|
287
|
+
query.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
query (VectorDBQuery): The query object containing the search
|
|
291
|
+
vector and the number of top similar vectors to retrieve.
|
|
292
|
+
**kwargs (Any): Additional keyword arguments passed to search.
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
List[VectorDBQueryResult]: A list of vectors retrieved from the
|
|
296
|
+
storage based on similarity to the query vector.
|
|
297
|
+
"""
|
|
298
|
+
rows = (
|
|
299
|
+
self._table.search(query.query_vector).limit(query.top_k).to_list()
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
query_results = []
|
|
303
|
+
for row in rows:
|
|
304
|
+
query_results.append(
|
|
305
|
+
VectorDBQueryResult.create(
|
|
306
|
+
similarity=float(row['similarity_score']),
|
|
307
|
+
id=str(row['id']),
|
|
308
|
+
payload=row['payload'],
|
|
309
|
+
vector=row['vector'],
|
|
310
|
+
)
|
|
311
|
+
)
|
|
312
|
+
return query_results
|
|
313
|
+
|
|
314
|
+
def clear(self) -> None:
|
|
315
|
+
r"""Removes all vectors from the TiDB table. This method
|
|
316
|
+
deletes the existing table and then recreates it with the same
|
|
317
|
+
schema to effectively remove all stored vectors.
|
|
318
|
+
"""
|
|
319
|
+
self._table.truncate()
|
|
320
|
+
|
|
321
|
+
def load(self) -> None:
|
|
322
|
+
r"""Load the collection hosted on cloud service."""
|
|
323
|
+
pass
|
|
324
|
+
|
|
325
|
+
@property
|
|
326
|
+
def client(self) -> "TiDBClient":
|
|
327
|
+
r"""Provides direct access to the TiDB client.
|
|
328
|
+
|
|
329
|
+
Returns:
|
|
330
|
+
Any: The TiDB client instance.
|
|
331
|
+
"""
|
|
332
|
+
return self._client
|
camel/toolkits/__init__.py
CHANGED
|
@@ -61,6 +61,9 @@ from .file_write_toolkit import FileWriteToolkit
|
|
|
61
61
|
from .terminal_toolkit import TerminalToolkit
|
|
62
62
|
from .pubmed_toolkit import PubMedToolkit
|
|
63
63
|
from .thinking_toolkit import ThinkingToolkit
|
|
64
|
+
from .openai_agent_toolkit import OpenAIAgentToolkit
|
|
65
|
+
from .searxng_toolkit import SearxNGToolkit
|
|
66
|
+
|
|
64
67
|
|
|
65
68
|
__all__ = [
|
|
66
69
|
'BaseToolkit',
|
|
@@ -110,4 +113,6 @@ __all__ = [
|
|
|
110
113
|
'TerminalToolkit',
|
|
111
114
|
'PubMedToolkit',
|
|
112
115
|
'ThinkingToolkit',
|
|
116
|
+
'OpenAIAgentToolkit',
|
|
117
|
+
'SearxNGToolkit',
|
|
113
118
|
]
|