camel-ai 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/__init__.py +2 -0
- camel/agents/chat_agent.py +40 -53
- camel/agents/knowledge_graph_agent.py +221 -0
- camel/configs/__init__.py +29 -0
- camel/configs/anthropic_config.py +73 -0
- camel/configs/base_config.py +22 -0
- camel/configs/openai_config.py +132 -0
- camel/embeddings/openai_embedding.py +7 -2
- camel/functions/__init__.py +13 -8
- camel/functions/open_api_function.py +380 -0
- camel/functions/open_api_specs/coursera/__init__.py +13 -0
- camel/functions/open_api_specs/coursera/openapi.yaml +82 -0
- camel/functions/open_api_specs/klarna/__init__.py +13 -0
- camel/functions/open_api_specs/klarna/openapi.yaml +87 -0
- camel/functions/open_api_specs/speak/__init__.py +13 -0
- camel/functions/open_api_specs/speak/openapi.yaml +151 -0
- camel/functions/openai_function.py +3 -1
- camel/functions/retrieval_functions.py +61 -0
- camel/functions/slack_functions.py +275 -0
- camel/models/__init__.py +2 -0
- camel/models/anthropic_model.py +16 -2
- camel/models/base_model.py +8 -2
- camel/models/model_factory.py +7 -3
- camel/models/openai_audio_models.py +251 -0
- camel/models/openai_model.py +12 -4
- camel/models/stub_model.py +5 -1
- camel/retrievers/__init__.py +2 -0
- camel/retrievers/auto_retriever.py +47 -36
- camel/retrievers/base.py +42 -37
- camel/retrievers/bm25_retriever.py +10 -19
- camel/retrievers/cohere_rerank_retriever.py +108 -0
- camel/retrievers/vector_retriever.py +43 -26
- camel/storages/vectordb_storages/qdrant.py +3 -1
- camel/toolkits/__init__.py +21 -0
- camel/toolkits/base.py +22 -0
- camel/toolkits/github_toolkit.py +245 -0
- camel/types/__init__.py +6 -0
- camel/types/enums.py +44 -3
- camel/utils/__init__.py +4 -2
- camel/utils/commons.py +97 -173
- {camel_ai-0.1.3.dist-info → camel_ai-0.1.4.dist-info}/METADATA +9 -3
- {camel_ai-0.1.3.dist-info → camel_ai-0.1.4.dist-info}/RECORD +44 -26
- camel/configs.py +0 -271
- {camel_ai-0.1.3.dist-info → camel_ai-0.1.4.dist-info}/WHEEL +0 -0
|
@@ -33,8 +33,8 @@ class BM25Retriever(BaseRetriever):
|
|
|
33
33
|
calculating document scores.
|
|
34
34
|
content_input_path (str): The path to the content that has been
|
|
35
35
|
processed and stored.
|
|
36
|
-
|
|
37
|
-
|
|
36
|
+
unstructured_modules (UnstructuredIO): A module for parsing files and
|
|
37
|
+
URLs and chunking content based on specified parameters.
|
|
38
38
|
|
|
39
39
|
References:
|
|
40
40
|
https://github.com/dorianbrown/rank_bm25
|
|
@@ -47,13 +47,12 @@ class BM25Retriever(BaseRetriever):
|
|
|
47
47
|
from rank_bm25 import BM25Okapi
|
|
48
48
|
except ImportError as e:
|
|
49
49
|
raise ImportError(
|
|
50
|
-
"Package `rank_bm25` not installed, install by running"
|
|
51
|
-
" 'pip install rank_bm25'"
|
|
50
|
+
"Package `rank_bm25` not installed, install by running 'pip install rank_bm25'"
|
|
52
51
|
) from e
|
|
53
52
|
|
|
54
53
|
self.bm25: BM25Okapi = None
|
|
55
54
|
self.content_input_path: str = ""
|
|
56
|
-
self.
|
|
55
|
+
self.unstructured_modules: UnstructuredIO = UnstructuredIO()
|
|
57
56
|
|
|
58
57
|
def process(
|
|
59
58
|
self,
|
|
@@ -76,11 +75,10 @@ class BM25Retriever(BaseRetriever):
|
|
|
76
75
|
|
|
77
76
|
# Load and preprocess documents
|
|
78
77
|
self.content_input_path = content_input_path
|
|
79
|
-
|
|
80
|
-
elements = unstructured_modules.parse_file_or_url(
|
|
78
|
+
elements = self.unstructured_modules.parse_file_or_url(
|
|
81
79
|
content_input_path, **kwargs
|
|
82
80
|
)
|
|
83
|
-
self.chunks = unstructured_modules.chunk_elements(
|
|
81
|
+
self.chunks = self.unstructured_modules.chunk_elements(
|
|
84
82
|
chunk_type=chunk_type, elements=elements
|
|
85
83
|
)
|
|
86
84
|
|
|
@@ -88,7 +86,7 @@ class BM25Retriever(BaseRetriever):
|
|
|
88
86
|
tokenized_corpus = [str(chunk).split(" ") for chunk in self.chunks]
|
|
89
87
|
self.bm25 = BM25Okapi(tokenized_corpus)
|
|
90
88
|
|
|
91
|
-
def query(
|
|
89
|
+
def query(
|
|
92
90
|
self,
|
|
93
91
|
query: str,
|
|
94
92
|
top_k: int = DEFAULT_TOP_K_RESULTS,
|
|
@@ -106,22 +104,15 @@ class BM25Retriever(BaseRetriever):
|
|
|
106
104
|
|
|
107
105
|
Raises:
|
|
108
106
|
ValueError: If `top_k` is less than or equal to 0, if the BM25
|
|
109
|
-
model has not been initialized by calling `
|
|
107
|
+
model has not been initialized by calling `process`
|
|
110
108
|
first.
|
|
111
|
-
|
|
112
|
-
Note:
|
|
113
|
-
`storage` and `kwargs` parameters are included to maintain
|
|
114
|
-
compatibility with the `BaseRetriever` interface but are not used
|
|
115
|
-
in this implementation.
|
|
116
109
|
"""
|
|
117
110
|
|
|
118
111
|
if top_k <= 0:
|
|
119
112
|
raise ValueError("top_k must be a positive integer.")
|
|
120
|
-
|
|
121
|
-
if self.bm25 is None:
|
|
113
|
+
if self.bm25 is None or not self.chunks:
|
|
122
114
|
raise ValueError(
|
|
123
|
-
"BM25 model is not initialized. Call `
|
|
124
|
-
" first."
|
|
115
|
+
"BM25 model is not initialized. Call `process` first."
|
|
125
116
|
)
|
|
126
117
|
|
|
127
118
|
# Preprocess query similarly to how documents were processed
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
import os
|
|
15
|
+
from typing import Any, Dict, List, Optional
|
|
16
|
+
|
|
17
|
+
from camel.retrievers import BaseRetriever
|
|
18
|
+
|
|
19
|
+
DEFAULT_TOP_K_RESULTS = 1
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class CohereRerankRetriever(BaseRetriever):
|
|
23
|
+
r"""An implementation of the `BaseRetriever` using the `Cohere Re-ranking`
|
|
24
|
+
model.
|
|
25
|
+
|
|
26
|
+
Attributes:
|
|
27
|
+
model_name (str): The model name to use for re-ranking.
|
|
28
|
+
api_key (Optional[str]): The API key for authenticating with the
|
|
29
|
+
Cohere service.
|
|
30
|
+
|
|
31
|
+
References:
|
|
32
|
+
https://txt.cohere.com/rerank/
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
model_name: str = "rerank-multilingual-v2.0",
|
|
38
|
+
api_key: Optional[str] = None,
|
|
39
|
+
) -> None:
|
|
40
|
+
r"""Initializes an instance of the CohereRerankRetriever. This
|
|
41
|
+
constructor sets up a client for interacting with the Cohere API using
|
|
42
|
+
the specified model name and API key. If the API key is not provided,
|
|
43
|
+
it attempts to retrieve it from the COHERE_API_KEY environment
|
|
44
|
+
variable.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
model_name (str): The name of the model to be used for re-ranking.
|
|
48
|
+
Defaults to 'rerank-multilingual-v2.0'.
|
|
49
|
+
api_key (Optional[str]): The API key for authenticating requests
|
|
50
|
+
to the Cohere API. If not provided, the method will attempt to
|
|
51
|
+
retrieve the key from the environment variable
|
|
52
|
+
'COHERE_API_KEY'.
|
|
53
|
+
|
|
54
|
+
Raises:
|
|
55
|
+
ImportError: If the 'cohere' package is not installed.
|
|
56
|
+
ValueError: If the API key is neither passed as an argument nor
|
|
57
|
+
set in the environment variable.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
try:
|
|
61
|
+
import cohere
|
|
62
|
+
except ImportError as e:
|
|
63
|
+
raise ImportError("Package 'cohere' is not installed") from e
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
self.api_key = api_key or os.environ["COHERE_API_KEY"]
|
|
67
|
+
except ValueError as e:
|
|
68
|
+
raise ValueError(
|
|
69
|
+
"Must pass in cohere api key or specify via COHERE_API_KEY environment variable."
|
|
70
|
+
) from e
|
|
71
|
+
|
|
72
|
+
self.co = cohere.Client(self.api_key)
|
|
73
|
+
self.model_name = model_name
|
|
74
|
+
|
|
75
|
+
def query(
|
|
76
|
+
self,
|
|
77
|
+
query: str,
|
|
78
|
+
retrieved_result: List[Dict[str, Any]],
|
|
79
|
+
top_k: int = DEFAULT_TOP_K_RESULTS,
|
|
80
|
+
) -> List[Dict[str, Any]]:
|
|
81
|
+
r"""Queries and compiles results using the Cohere re-ranking model.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
query (str): Query string for information retriever.
|
|
85
|
+
retrieved_result (List[Dict[str, Any]]): The content to be
|
|
86
|
+
re-ranked, should be the output from `BaseRetriever` like
|
|
87
|
+
`VectorRetriever`.
|
|
88
|
+
top_k (int, optional): The number of top results to return during
|
|
89
|
+
retriever. Must be a positive integer. Defaults to
|
|
90
|
+
`DEFAULT_TOP_K_RESULTS`.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
List[Dict[str, Any]]: Concatenated list of the query results.
|
|
94
|
+
"""
|
|
95
|
+
rerank_results = self.co.rerank(
|
|
96
|
+
query=query,
|
|
97
|
+
documents=retrieved_result,
|
|
98
|
+
top_n=top_k,
|
|
99
|
+
model=self.model_name,
|
|
100
|
+
)
|
|
101
|
+
formatted_results = []
|
|
102
|
+
for i in range(0, len(rerank_results.results)):
|
|
103
|
+
selected_chunk = retrieved_result[rerank_results[i].index]
|
|
104
|
+
selected_chunk['similarity score'] = rerank_results[
|
|
105
|
+
i
|
|
106
|
+
].relevance_score
|
|
107
|
+
formatted_results.append(selected_chunk)
|
|
108
|
+
return formatted_results
|
|
@@ -14,9 +14,14 @@
|
|
|
14
14
|
from typing import Any, Dict, List, Optional
|
|
15
15
|
|
|
16
16
|
from camel.embeddings import BaseEmbedding, OpenAIEmbedding
|
|
17
|
-
from camel.
|
|
17
|
+
from camel.loaders import UnstructuredIO
|
|
18
18
|
from camel.retrievers.base import BaseRetriever
|
|
19
|
-
from camel.storages import
|
|
19
|
+
from camel.storages import (
|
|
20
|
+
BaseVectorStorage,
|
|
21
|
+
QdrantStorage,
|
|
22
|
+
VectorDBQuery,
|
|
23
|
+
VectorRecord,
|
|
24
|
+
)
|
|
20
25
|
|
|
21
26
|
DEFAULT_TOP_K_RESULTS = 1
|
|
22
27
|
DEFAULT_SIMILARITY_THRESHOLD = 0.75
|
|
@@ -32,21 +37,41 @@ class VectorRetriever(BaseRetriever):
|
|
|
32
37
|
Attributes:
|
|
33
38
|
embedding_model (BaseEmbedding): Embedding model used to generate
|
|
34
39
|
vector embeddings.
|
|
40
|
+
storage (BaseVectorStorage): Vector storage to query.
|
|
41
|
+
similarity_threshold (float, optional): The similarity threshold
|
|
42
|
+
for filtering results. Defaults to `DEFAULT_SIMILARITY_THRESHOLD`.
|
|
43
|
+
unstructured_modules (UnstructuredIO): A module for parsing files and
|
|
44
|
+
URLs and chunking content based on specified parameters.
|
|
35
45
|
"""
|
|
36
46
|
|
|
37
|
-
def __init__(
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
similarity_threshold: float = DEFAULT_SIMILARITY_THRESHOLD,
|
|
50
|
+
embedding_model: Optional[BaseEmbedding] = None,
|
|
51
|
+
storage: Optional[BaseVectorStorage] = None,
|
|
52
|
+
) -> None:
|
|
38
53
|
r"""Initializes the retriever class with an optional embedding model.
|
|
39
54
|
|
|
40
55
|
Args:
|
|
56
|
+
similarity_threshold (float, optional): The similarity threshold
|
|
57
|
+
for filtering results. Defaults to
|
|
58
|
+
`DEFAULT_SIMILARITY_THRESHOLD`.
|
|
41
59
|
embedding_model (Optional[BaseEmbedding]): The embedding model
|
|
42
60
|
instance. Defaults to `OpenAIEmbedding` if not provided.
|
|
61
|
+
storage (BaseVectorStorage): Vector storage to query.
|
|
43
62
|
"""
|
|
44
63
|
self.embedding_model = embedding_model or OpenAIEmbedding()
|
|
64
|
+
self.storage = (
|
|
65
|
+
storage
|
|
66
|
+
if storage is not None
|
|
67
|
+
else QdrantStorage(vector_dim=self.embedding_model.get_output_dim())
|
|
68
|
+
)
|
|
69
|
+
self.similarity_threshold = similarity_threshold
|
|
70
|
+
self.unstructured_modules: UnstructuredIO = UnstructuredIO()
|
|
45
71
|
|
|
46
|
-
def process(
|
|
72
|
+
def process(
|
|
47
73
|
self,
|
|
48
74
|
content_input_path: str,
|
|
49
|
-
storage: BaseVectorStorage,
|
|
50
75
|
chunk_type: str = "chunk_by_title",
|
|
51
76
|
**kwargs: Any,
|
|
52
77
|
) -> None:
|
|
@@ -59,12 +84,13 @@ class VectorRetriever(BaseRetriever):
|
|
|
59
84
|
processed.
|
|
60
85
|
chunk_type (str): Type of chunking going to apply. Defaults to
|
|
61
86
|
"chunk_by_title".
|
|
62
|
-
**kwargs (Any): Additional keyword arguments for
|
|
87
|
+
**kwargs (Any): Additional keyword arguments for content parsing.
|
|
63
88
|
"""
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
89
|
+
elements = self.unstructured_modules.parse_file_or_url(
|
|
90
|
+
content_input_path, **kwargs
|
|
91
|
+
)
|
|
92
|
+
chunks = self.unstructured_modules.chunk_elements(
|
|
93
|
+
chunk_type=chunk_type, elements=elements
|
|
68
94
|
)
|
|
69
95
|
# Iterate to process and store embeddings, set batch of 50
|
|
70
96
|
for i in range(0, len(chunks), 50):
|
|
@@ -90,28 +116,20 @@ class VectorRetriever(BaseRetriever):
|
|
|
90
116
|
VectorRecord(vector=vector, payload=combined_dict)
|
|
91
117
|
)
|
|
92
118
|
|
|
93
|
-
storage.add(records=records)
|
|
119
|
+
self.storage.add(records=records)
|
|
94
120
|
|
|
95
|
-
def query(
|
|
121
|
+
def query(
|
|
96
122
|
self,
|
|
97
123
|
query: str,
|
|
98
|
-
storage: BaseVectorStorage,
|
|
99
124
|
top_k: int = DEFAULT_TOP_K_RESULTS,
|
|
100
|
-
similarity_threshold: float = DEFAULT_SIMILARITY_THRESHOLD,
|
|
101
|
-
**kwargs: Any,
|
|
102
125
|
) -> List[Dict[str, Any]]:
|
|
103
126
|
r"""Executes a query in vector storage and compiles the retrieved
|
|
104
127
|
results into a dictionary.
|
|
105
128
|
|
|
106
129
|
Args:
|
|
107
130
|
query (str): Query string for information retriever.
|
|
108
|
-
storage (BaseVectorStorage): Vector storage to query.
|
|
109
131
|
top_k (int, optional): The number of top results to return during
|
|
110
132
|
retriever. Must be a positive integer. Defaults to 1.
|
|
111
|
-
similarity_threshold (float, optional): The similarity threshold
|
|
112
|
-
for filtering results. Defaults to 0.75.
|
|
113
|
-
**kwargs (Any): Additional keyword arguments for vector storage
|
|
114
|
-
query.
|
|
115
133
|
|
|
116
134
|
Returns:
|
|
117
135
|
List[Dict[str, Any]]: Concatenated list of the query results.
|
|
@@ -125,23 +143,22 @@ class VectorRetriever(BaseRetriever):
|
|
|
125
143
|
raise ValueError("top_k must be a positive integer.")
|
|
126
144
|
|
|
127
145
|
# Load the storage incase it's hosted remote
|
|
128
|
-
storage.load()
|
|
146
|
+
self.storage.load()
|
|
129
147
|
|
|
130
148
|
query_vector = self.embedding_model.embed(obj=query)
|
|
131
149
|
db_query = VectorDBQuery(query_vector=query_vector, top_k=top_k)
|
|
132
|
-
query_results = storage.query(query=db_query
|
|
150
|
+
query_results = self.storage.query(query=db_query)
|
|
133
151
|
|
|
134
152
|
if query_results[0].record.payload is None:
|
|
135
153
|
raise ValueError(
|
|
136
|
-
"Payload of vector storage is None, please check"
|
|
137
|
-
" the collection."
|
|
154
|
+
"Payload of vector storage is None, please check the collection."
|
|
138
155
|
)
|
|
139
156
|
|
|
140
157
|
# format the results
|
|
141
158
|
formatted_results = []
|
|
142
159
|
for result in query_results:
|
|
143
160
|
if (
|
|
144
|
-
result.similarity >= similarity_threshold
|
|
161
|
+
result.similarity >= self.similarity_threshold
|
|
145
162
|
and result.record.payload is not None
|
|
146
163
|
):
|
|
147
164
|
result_dict = {
|
|
@@ -160,7 +177,7 @@ class VectorRetriever(BaseRetriever):
|
|
|
160
177
|
return [
|
|
161
178
|
{
|
|
162
179
|
'text': f"""No suitable information retrieved from {content_path} \
|
|
163
|
-
with similarity_threshold = {similarity_threshold}."""
|
|
180
|
+
with similarity_threshold = {self.similarity_threshold}."""
|
|
164
181
|
}
|
|
165
182
|
]
|
|
166
183
|
return formatted_results
|
|
@@ -185,7 +185,9 @@ class QdrantStorage(BaseVectorStorage):
|
|
|
185
185
|
VectorDistance.COSINE: Distance.COSINE,
|
|
186
186
|
VectorDistance.EUCLIDEAN: Distance.EUCLID,
|
|
187
187
|
}
|
|
188
|
-
|
|
188
|
+
# Since `recreate_collection` method will be removed in the future
|
|
189
|
+
# by Qdrant, `create_collection` is recommended instead.
|
|
190
|
+
self._client.create_collection(
|
|
189
191
|
collection_name=collection_name,
|
|
190
192
|
vectors_config=VectorParams(
|
|
191
193
|
size=size,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
|
|
15
|
+
from .base import BaseToolkit
|
|
16
|
+
from .github_toolkit import GithubToolkit
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
'BaseToolkit',
|
|
20
|
+
'GithubToolkit',
|
|
21
|
+
]
|
camel/toolkits/base.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
17
|
+
from camel.functions import OpenAIFunction
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class BaseToolkit:
|
|
21
|
+
def get_tools(self) -> List[OpenAIFunction]:
|
|
22
|
+
raise NotImplementedError("Subclasses must implement this method.")
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from typing import List, Optional
|
|
18
|
+
|
|
19
|
+
from camel.functions import OpenAIFunction
|
|
20
|
+
|
|
21
|
+
from .base import BaseToolkit
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class GithubIssue:
|
|
26
|
+
r"""Represents a GitHub issue.
|
|
27
|
+
|
|
28
|
+
Attributes:
|
|
29
|
+
title (str): The title of the issue.
|
|
30
|
+
body (str): The body/content of the issue.
|
|
31
|
+
number (int): The issue number.
|
|
32
|
+
file_path (str): The path of the file associated with the issue.
|
|
33
|
+
file_content (str): The content of the file associated with the issue.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
title: str,
|
|
39
|
+
body: str,
|
|
40
|
+
number: int,
|
|
41
|
+
file_path: str,
|
|
42
|
+
file_content: str,
|
|
43
|
+
) -> None:
|
|
44
|
+
r"""Initialize a GithubIssue object.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
title (str): The title of the GitHub issue.
|
|
48
|
+
body (str): The body/content of the GitHub issue.
|
|
49
|
+
number (int): The issue number.
|
|
50
|
+
file_path (str): The path of the file associated with the issue.
|
|
51
|
+
file_content (str): The content of the file associated with the issue.
|
|
52
|
+
"""
|
|
53
|
+
self.title = title
|
|
54
|
+
self.body = body
|
|
55
|
+
self.number = number
|
|
56
|
+
self.file_path = file_path
|
|
57
|
+
self.file_content = file_content
|
|
58
|
+
|
|
59
|
+
def summary(self) -> str:
|
|
60
|
+
r"""Returns a summary of the issue.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
str: A string containing the title, body, number, file path, and file content of the issue.
|
|
64
|
+
"""
|
|
65
|
+
return (
|
|
66
|
+
f"Title: {self.title}\n"
|
|
67
|
+
f"Body: {self.body}\n"
|
|
68
|
+
f"Number: {self.number}\n"
|
|
69
|
+
f"File Path: {self.file_path}\n"
|
|
70
|
+
f"File Content: {self.file_content}"
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class GithubToolkit(BaseToolkit):
|
|
75
|
+
r"""A class representing a toolkit for interacting with GitHub repositories.
|
|
76
|
+
|
|
77
|
+
This class provides methods for retrieving open issues, retrieving specific issues,
|
|
78
|
+
and creating pull requests in a GitHub repository.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
repo_name (str): The name of the GitHub repository.
|
|
82
|
+
access_token (str, optional): The access token to authenticate with GitHub.
|
|
83
|
+
If not provided, it will be obtained using the `get_github_access_token` method.
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
def __init__(
|
|
87
|
+
self, repo_name: str, access_token: Optional[str] = None
|
|
88
|
+
) -> None:
|
|
89
|
+
r"""Initializes a new instance of the GitHubToolkit class.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
repo_name (str): The name of the GitHub repository.
|
|
93
|
+
access_token (str, optional): The access token to authenticate with GitHub.
|
|
94
|
+
If not provided, it will be obtained using the `get_github_access_token` method.
|
|
95
|
+
"""
|
|
96
|
+
if access_token is None:
|
|
97
|
+
access_token = self.get_github_access_token()
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
from github import Auth, Github
|
|
101
|
+
except ImportError:
|
|
102
|
+
raise ImportError(
|
|
103
|
+
"Please install `github` first. You can install it by running "
|
|
104
|
+
"`pip install wikipedia`."
|
|
105
|
+
)
|
|
106
|
+
self.github = Github(auth=Auth.Token(access_token))
|
|
107
|
+
self.repo = self.github.get_repo(repo_name)
|
|
108
|
+
|
|
109
|
+
def get_tools(self) -> List[OpenAIFunction]:
|
|
110
|
+
r"""Returns a list of OpenAIFunction objects representing the functions in the toolkit.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
List[OpenAIFunction]: A list of OpenAIFunction objects representing the functions in the toolkit.
|
|
114
|
+
"""
|
|
115
|
+
return [
|
|
116
|
+
OpenAIFunction(self.retrieve_issue_list),
|
|
117
|
+
OpenAIFunction(self.retrieve_issue),
|
|
118
|
+
OpenAIFunction(self.create_pull_request),
|
|
119
|
+
]
|
|
120
|
+
|
|
121
|
+
def get_github_access_token(self) -> str:
|
|
122
|
+
r"""Retrieve the GitHub access token from environment variables.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
str: A string containing the GitHub access token.
|
|
126
|
+
|
|
127
|
+
Raises:
|
|
128
|
+
ValueError: If the API key or secret is not found in the environment variables.
|
|
129
|
+
"""
|
|
130
|
+
# Get `GITHUB_ACCESS_TOKEN` here: https://github.com/settings/tokens
|
|
131
|
+
GITHUB_ACCESS_TOKEN = os.environ.get("GITHUB_ACCESS_TOKEN")
|
|
132
|
+
|
|
133
|
+
if not GITHUB_ACCESS_TOKEN:
|
|
134
|
+
raise ValueError(
|
|
135
|
+
"`GITHUB_ACCESS_TOKEN` not found in environment variables. Get it "
|
|
136
|
+
"here: `https://github.com/settings/tokens`."
|
|
137
|
+
)
|
|
138
|
+
return GITHUB_ACCESS_TOKEN
|
|
139
|
+
|
|
140
|
+
def retrieve_issue_list(self) -> List[GithubIssue]:
|
|
141
|
+
r"""Retrieve a list of open issues from the repository.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
A list of GithubIssue objects representing the open issues.
|
|
145
|
+
"""
|
|
146
|
+
issues = self.repo.get_issues(state='open')
|
|
147
|
+
return [
|
|
148
|
+
GithubIssue(
|
|
149
|
+
title=issue.title,
|
|
150
|
+
body=issue.body,
|
|
151
|
+
number=issue.number,
|
|
152
|
+
file_path=issue.labels[
|
|
153
|
+
0
|
|
154
|
+
].name, # for now we require file path to be the first label in the PR
|
|
155
|
+
file_content=self.retrieve_file_content(issue.labels[0].name),
|
|
156
|
+
)
|
|
157
|
+
for issue in issues
|
|
158
|
+
if not issue.pull_request
|
|
159
|
+
]
|
|
160
|
+
|
|
161
|
+
def retrieve_issue(self, issue_number: int) -> Optional[str]:
|
|
162
|
+
r"""Retrieves an issue from a GitHub repository.
|
|
163
|
+
|
|
164
|
+
This function retrieves an issue from a specified repository using the
|
|
165
|
+
issue number.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
issue_number (int): The number of the issue to retrieve.
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
str: A formatted report of the retrieved issue.
|
|
172
|
+
"""
|
|
173
|
+
issues = self.retrieve_issue_list()
|
|
174
|
+
for issue in issues:
|
|
175
|
+
if issue.number == issue_number:
|
|
176
|
+
return issue.summary()
|
|
177
|
+
return None
|
|
178
|
+
|
|
179
|
+
def create_pull_request(
|
|
180
|
+
self,
|
|
181
|
+
file_path: str,
|
|
182
|
+
new_content: str,
|
|
183
|
+
pr_title: str,
|
|
184
|
+
body: str,
|
|
185
|
+
branch_name: str,
|
|
186
|
+
) -> str:
|
|
187
|
+
r"""Creates a pull request.
|
|
188
|
+
|
|
189
|
+
This function creates a pull request in specified repository, which updates a
|
|
190
|
+
file in the specific path with new content. The pull request description
|
|
191
|
+
contains information about the issue title and number.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
file_path (str): The path of the file to be updated in the repository.
|
|
195
|
+
new_content (str): The specified new content of the specified file.
|
|
196
|
+
pr_title (str): The title of the issue that is solved by this pull request.
|
|
197
|
+
body (str): The commit message for the pull request.
|
|
198
|
+
branch_name (str): The name of the branch to create and submit the pull request from.
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
str: A formatted report of whether the pull request was created successfully or not.
|
|
202
|
+
"""
|
|
203
|
+
sb = self.repo.get_branch(self.repo.default_branch)
|
|
204
|
+
self.repo.create_git_ref(
|
|
205
|
+
ref=f"refs/heads/{branch_name}", sha=sb.commit.sha
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
file = self.repo.get_contents(file_path)
|
|
209
|
+
from github.ContentFile import ContentFile
|
|
210
|
+
|
|
211
|
+
if isinstance(file, ContentFile):
|
|
212
|
+
self.repo.update_file(
|
|
213
|
+
file.path, body, new_content, file.sha, branch=branch_name
|
|
214
|
+
)
|
|
215
|
+
pr = self.repo.create_pull(
|
|
216
|
+
title=pr_title,
|
|
217
|
+
body=body,
|
|
218
|
+
head=branch_name,
|
|
219
|
+
base=self.repo.default_branch,
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
if pr is not None:
|
|
223
|
+
return f"Title: {pr.title}\n" f"Body: {pr.body}\n"
|
|
224
|
+
else:
|
|
225
|
+
return "Failed to create pull request."
|
|
226
|
+
else:
|
|
227
|
+
raise ValueError("PRs with multiple files aren't supported yet.")
|
|
228
|
+
|
|
229
|
+
def retrieve_file_content(self, file_path: str) -> str:
|
|
230
|
+
r"""Retrieves the content of a file from the GitHub repository.
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
file_path (str): The path of the file to retrieve.
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
str: The decoded content of the file.
|
|
237
|
+
"""
|
|
238
|
+
file_content = self.repo.get_contents(file_path)
|
|
239
|
+
|
|
240
|
+
from github.ContentFile import ContentFile
|
|
241
|
+
|
|
242
|
+
if isinstance(file_content, ContentFile):
|
|
243
|
+
return file_content.decoded_content.decode()
|
|
244
|
+
else:
|
|
245
|
+
raise ValueError("PRs with multiple files aren't supported yet.")
|
camel/types/__init__.py
CHANGED
|
@@ -12,16 +12,19 @@
|
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
14
|
from .enums import (
|
|
15
|
+
AudioModelType,
|
|
15
16
|
EmbeddingModelType,
|
|
16
17
|
ModelType,
|
|
17
18
|
OpenAIBackendRole,
|
|
18
19
|
OpenAIImageDetailType,
|
|
19
20
|
OpenAIImageType,
|
|
21
|
+
OpenAPIName,
|
|
20
22
|
RoleType,
|
|
21
23
|
StorageType,
|
|
22
24
|
TaskType,
|
|
23
25
|
TerminationMode,
|
|
24
26
|
VectorDistance,
|
|
27
|
+
VoiceType,
|
|
25
28
|
)
|
|
26
29
|
from .openai_types import (
|
|
27
30
|
ChatCompletion,
|
|
@@ -57,4 +60,7 @@ __all__ = [
|
|
|
57
60
|
'CompletionUsage',
|
|
58
61
|
'OpenAIImageType',
|
|
59
62
|
'OpenAIImageDetailType',
|
|
63
|
+
'OpenAPIName',
|
|
64
|
+
'AudioModelType',
|
|
65
|
+
'VoiceType',
|
|
60
66
|
]
|