camel-ai 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +30 -0
- camel/agents/__init__.py +40 -0
- camel/agents/base.py +29 -0
- camel/agents/chat_agent.py +539 -0
- camel/agents/critic_agent.py +179 -0
- camel/agents/embodied_agent.py +138 -0
- camel/agents/role_assignment_agent.py +117 -0
- camel/agents/task_agent.py +382 -0
- camel/agents/tool_agents/__init__.py +20 -0
- camel/agents/tool_agents/base.py +40 -0
- camel/agents/tool_agents/hugging_face_tool_agent.py +203 -0
- camel/configs.py +159 -0
- camel/embeddings/__init__.py +20 -0
- camel/embeddings/base.py +65 -0
- camel/embeddings/openai_embedding.py +74 -0
- camel/functions/__init__.py +27 -0
- camel/functions/base_io_functions.py +261 -0
- camel/functions/math_functions.py +61 -0
- camel/functions/openai_function.py +88 -0
- camel/functions/search_functions.py +309 -0
- camel/functions/unstructured_io_fuctions.py +616 -0
- camel/functions/weather_functions.py +136 -0
- camel/generators.py +263 -0
- camel/human.py +130 -0
- camel/memories/__init__.py +28 -0
- camel/memories/base.py +75 -0
- camel/memories/chat_history_memory.py +111 -0
- camel/memories/context_creators/__init__.py +18 -0
- camel/memories/context_creators/base.py +72 -0
- camel/memories/context_creators/score_based.py +130 -0
- camel/memories/records.py +92 -0
- camel/messages/__init__.py +38 -0
- camel/messages/base.py +223 -0
- camel/messages/func_message.py +106 -0
- camel/models/__init__.py +26 -0
- camel/models/base_model.py +110 -0
- camel/models/model_factory.py +59 -0
- camel/models/open_source_model.py +144 -0
- camel/models/openai_model.py +103 -0
- camel/models/stub_model.py +106 -0
- camel/prompts/__init__.py +38 -0
- camel/prompts/ai_society.py +121 -0
- camel/prompts/base.py +227 -0
- camel/prompts/code.py +111 -0
- camel/prompts/evaluation.py +40 -0
- camel/prompts/misalignment.py +84 -0
- camel/prompts/prompt_templates.py +117 -0
- camel/prompts/role_description_prompt_template.py +53 -0
- camel/prompts/solution_extraction.py +44 -0
- camel/prompts/task_prompt_template.py +56 -0
- camel/prompts/translation.py +42 -0
- camel/responses/__init__.py +18 -0
- camel/responses/agent_responses.py +42 -0
- camel/societies/__init__.py +20 -0
- camel/societies/babyagi_playing.py +254 -0
- camel/societies/role_playing.py +456 -0
- camel/storages/__init__.py +23 -0
- camel/storages/key_value_storages/__init__.py +23 -0
- camel/storages/key_value_storages/base.py +57 -0
- camel/storages/key_value_storages/in_memory.py +51 -0
- camel/storages/key_value_storages/json.py +97 -0
- camel/terminators/__init__.py +23 -0
- camel/terminators/base.py +44 -0
- camel/terminators/response_terminator.py +118 -0
- camel/terminators/token_limit_terminator.py +55 -0
- camel/types/__init__.py +54 -0
- camel/types/enums.py +176 -0
- camel/types/openai_types.py +39 -0
- camel/utils/__init__.py +47 -0
- camel/utils/commons.py +243 -0
- camel/utils/python_interpreter.py +435 -0
- camel/utils/token_counting.py +220 -0
- camel_ai-0.1.1.dist-info/METADATA +311 -0
- camel_ai-0.1.1.dist-info/RECORD +75 -0
- camel_ai-0.1.1.dist-info/WHEEL +4 -0
camel/configs.py
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
from abc import ABC
|
|
15
|
+
from dataclasses import asdict, dataclass, field
|
|
16
|
+
from typing import Any, Dict, List, Optional, Sequence, Union
|
|
17
|
+
|
|
18
|
+
from camel.functions import OpenAIFunction
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass(frozen=True)
|
|
22
|
+
class BaseConfig(ABC):
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass(frozen=True)
|
|
27
|
+
class ChatGPTConfig(BaseConfig):
|
|
28
|
+
r"""Defines the parameters for generating chat completions using the
|
|
29
|
+
OpenAI API.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
temperature (float, optional): Sampling temperature to use, between
|
|
33
|
+
:obj:`0` and :obj:`2`. Higher values make the output more random,
|
|
34
|
+
while lower values make it more focused and deterministic.
|
|
35
|
+
(default: :obj:`0.2`)
|
|
36
|
+
top_p (float, optional): An alternative to sampling with temperature,
|
|
37
|
+
called nucleus sampling, where the model considers the results of
|
|
38
|
+
the tokens with top_p probability mass. So :obj:`0.1` means only
|
|
39
|
+
the tokens comprising the top 10% probability mass are considered.
|
|
40
|
+
(default: :obj:`1.0`)
|
|
41
|
+
n (int, optional): How many chat completion choices to generate for
|
|
42
|
+
each input message. (default: :obj:`1`)
|
|
43
|
+
stream (bool, optional): If True, partial message deltas will be sent
|
|
44
|
+
as data-only server-sent events as they become available.
|
|
45
|
+
(default: :obj:`False`)
|
|
46
|
+
stop (str or list, optional): Up to :obj:`4` sequences where the API
|
|
47
|
+
will stop generating further tokens. (default: :obj:`None`)
|
|
48
|
+
max_tokens (int, optional): The maximum number of tokens to generate
|
|
49
|
+
in the chat completion. The total length of input tokens and
|
|
50
|
+
generated tokens is limited by the model's context length.
|
|
51
|
+
(default: :obj:`None`)
|
|
52
|
+
presence_penalty (float, optional): Number between :obj:`-2.0` and
|
|
53
|
+
:obj:`2.0`. Positive values penalize new tokens based on whether
|
|
54
|
+
they appear in the text so far, increasing the model's likelihood
|
|
55
|
+
to talk about new topics. See more information about frequency and
|
|
56
|
+
presence penalties. (default: :obj:`0.0`)
|
|
57
|
+
frequency_penalty (float, optional): Number between :obj:`-2.0` and
|
|
58
|
+
:obj:`2.0`. Positive values penalize new tokens based on their
|
|
59
|
+
existing frequency in the text so far, decreasing the model's
|
|
60
|
+
likelihood to repeat the same line verbatim. See more information
|
|
61
|
+
about frequency and presence penalties. (default: :obj:`0.0`)
|
|
62
|
+
logit_bias (dict, optional): Modify the likelihood of specified tokens
|
|
63
|
+
appearing in the completion. Accepts a json object that maps tokens
|
|
64
|
+
(specified by their token ID in the tokenizer) to an associated
|
|
65
|
+
bias value from :obj:`-100` to :obj:`100`. Mathematically, the bias
|
|
66
|
+
is added to the logits generated by the model prior to sampling.
|
|
67
|
+
The exact effect will vary per model, but values between:obj:` -1`
|
|
68
|
+
and :obj:`1` should decrease or increase likelihood of selection;
|
|
69
|
+
values like :obj:`-100` or :obj:`100` should result in a ban or
|
|
70
|
+
exclusive selection of the relevant token. (default: :obj:`{}`)
|
|
71
|
+
user (str, optional): A unique identifier representing your end-user,
|
|
72
|
+
which can help OpenAI to monitor and detect abuse.
|
|
73
|
+
(default: :obj:`""`)
|
|
74
|
+
"""
|
|
75
|
+
temperature: float = 0.2 # openai default: 1.0
|
|
76
|
+
top_p: float = 1.0
|
|
77
|
+
n: int = 1
|
|
78
|
+
stream: bool = False
|
|
79
|
+
stop: Optional[Union[str, Sequence[str]]] = None
|
|
80
|
+
max_tokens: Optional[int] = None
|
|
81
|
+
presence_penalty: float = 0.0
|
|
82
|
+
frequency_penalty: float = 0.0
|
|
83
|
+
logit_bias: Dict = field(default_factory=dict)
|
|
84
|
+
user: str = ""
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@dataclass(frozen=True)
|
|
88
|
+
class FunctionCallingConfig(ChatGPTConfig):
|
|
89
|
+
r"""Defines the parameters for generating chat completions using the
|
|
90
|
+
OpenAI API with functions included.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
functions (List[Dict[str, Any]]): A list of functions the model may
|
|
94
|
+
generate JSON inputs for.
|
|
95
|
+
function_call (Union[Dict[str, str], str], optional): Controls how the
|
|
96
|
+
model responds to function calls. :obj:`"none"` means the model
|
|
97
|
+
does not call a function, and responds to the end-user.
|
|
98
|
+
:obj:`"auto"` means the model can pick between an end-user or
|
|
99
|
+
calling a function. Specifying a particular function via
|
|
100
|
+
:obj:`{"name": "my_function"}` forces the model to call that
|
|
101
|
+
function. (default: :obj:`"auto"`)
|
|
102
|
+
"""
|
|
103
|
+
functions: List[Dict[str, Any]] = field(default_factory=list)
|
|
104
|
+
function_call: Union[Dict[str, str], str] = "auto"
|
|
105
|
+
|
|
106
|
+
@classmethod
|
|
107
|
+
def from_openai_function_list(
|
|
108
|
+
cls,
|
|
109
|
+
function_list: List[OpenAIFunction],
|
|
110
|
+
function_call: Union[Dict[str, str], str] = "auto",
|
|
111
|
+
kwargs: Optional[Dict[str, Any]] = None,
|
|
112
|
+
):
|
|
113
|
+
r"""Class method for creating an instance given the function-related
|
|
114
|
+
arguments.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
function_list (List[OpenAIFunction]): The list of function objects
|
|
118
|
+
to be loaded into this configuration and passed to the model.
|
|
119
|
+
function_call (Union[Dict[str, str], str], optional): Controls how
|
|
120
|
+
the model responds to function calls, as specified in the
|
|
121
|
+
creator's documentation.
|
|
122
|
+
kwargs (Optional[Dict[str, Any]]): The extra modifications to be
|
|
123
|
+
made on the original settings defined in :obj:`ChatGPTConfig`.
|
|
124
|
+
|
|
125
|
+
Return:
|
|
126
|
+
FunctionCallingConfig: A new instance which loads the given
|
|
127
|
+
function list into a list of dictionaries and the input
|
|
128
|
+
:obj:`function_call` argument.
|
|
129
|
+
"""
|
|
130
|
+
return cls(
|
|
131
|
+
functions=[func.as_dict() for func in function_list],
|
|
132
|
+
function_call=function_call,
|
|
133
|
+
**(kwargs or {}),
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
@dataclass(frozen=True)
|
|
138
|
+
class OpenSourceConfig(BaseConfig):
|
|
139
|
+
r"""Defines parameters for setting up open-source models and includes
|
|
140
|
+
parameters to be passed to chat completion function of OpenAI API.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
model_path (str): The path to a local folder containing the model
|
|
144
|
+
files or the model card in HuggingFace hub.
|
|
145
|
+
server_url (str): The URL to the server running the model inference
|
|
146
|
+
which will be used as the API base of OpenAI API.
|
|
147
|
+
api_params (ChatGPTConfig): An instance of :obj:ChatGPTConfig to
|
|
148
|
+
contain the arguments to be passed to OpenAI API.
|
|
149
|
+
"""
|
|
150
|
+
model_path: str
|
|
151
|
+
server_url: str
|
|
152
|
+
api_params: ChatGPTConfig = ChatGPTConfig()
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
OPENAI_API_PARAMS = {param for param in asdict(ChatGPTConfig()).keys()}
|
|
156
|
+
OPENAI_API_PARAMS_WITH_FUNCTIONS = {
|
|
157
|
+
param
|
|
158
|
+
for param in asdict(FunctionCallingConfig()).keys()
|
|
159
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
from .base import BaseEmbedding
|
|
15
|
+
from .openai_embedding import OpenAIEmbedding
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"BaseEmbedding",
|
|
19
|
+
"OpenAIEmbedding",
|
|
20
|
+
]
|
camel/embeddings/base.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
from abc import ABC, abstractmethod
|
|
15
|
+
from typing import Any, Generic, List, TypeVar
|
|
16
|
+
|
|
17
|
+
T = TypeVar('T')
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class BaseEmbedding(ABC, Generic[T]):
|
|
21
|
+
r"""Abstract base class for text embedding functionalities."""
|
|
22
|
+
|
|
23
|
+
@abstractmethod
|
|
24
|
+
def embed_list(
|
|
25
|
+
self,
|
|
26
|
+
objs: List[T],
|
|
27
|
+
**kwargs: Any,
|
|
28
|
+
) -> List[List[float]]:
|
|
29
|
+
r"""Generates embeddings for the given texts.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
objs (List[T]): The objects for which to generate the embeddings.
|
|
33
|
+
**kwargs (Any): Extra kwargs passed to the embedding API.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
List[List[float]]: A list that represents the generated embedding
|
|
37
|
+
as a list of floating-point numbers.
|
|
38
|
+
"""
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
def embed(
|
|
42
|
+
self,
|
|
43
|
+
obj: T,
|
|
44
|
+
**kwargs: Any,
|
|
45
|
+
) -> List[float]:
|
|
46
|
+
r"""Generates an embedding for the given text.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
obj (T): The object for which to generate the embedding.
|
|
50
|
+
**kwargs (Any): Extra kwargs passed to the embedding API.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
List[float]: A list of floating-point numbers representing the
|
|
54
|
+
generated embedding.
|
|
55
|
+
"""
|
|
56
|
+
return self.embed_list([obj], **kwargs)[0]
|
|
57
|
+
|
|
58
|
+
@abstractmethod
|
|
59
|
+
def get_output_dim(self) -> int:
|
|
60
|
+
r"""Returns the output dimension of the embeddings.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
int: The dimensionality of the embedding for the current model.
|
|
64
|
+
"""
|
|
65
|
+
pass
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
from typing import Any, List
|
|
15
|
+
|
|
16
|
+
from openai import OpenAI
|
|
17
|
+
|
|
18
|
+
from camel.embeddings import BaseEmbedding
|
|
19
|
+
from camel.types import EmbeddingModelType
|
|
20
|
+
from camel.utils import openai_api_key_required
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class OpenAIEmbedding(BaseEmbedding[str]):
|
|
24
|
+
r"""Provides text embedding functionalities using OpenAI's models.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
model (OpenAiEmbeddingModel, optional): The model type to be used for
|
|
28
|
+
generating embeddings. (default: :obj:`ModelType.ADA_2`)
|
|
29
|
+
|
|
30
|
+
Raises:
|
|
31
|
+
RuntimeError: If an unsupported model type is specified.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
model_type: EmbeddingModelType = EmbeddingModelType.ADA_2,
|
|
37
|
+
) -> None:
|
|
38
|
+
if not model_type.is_openai:
|
|
39
|
+
raise ValueError("Invalid OpenAI embedding model type.")
|
|
40
|
+
self.model_type = model_type
|
|
41
|
+
self.output_dim = model_type.output_dim
|
|
42
|
+
self.client = OpenAI()
|
|
43
|
+
|
|
44
|
+
@openai_api_key_required
|
|
45
|
+
def embed_list(
|
|
46
|
+
self,
|
|
47
|
+
objs: List[str],
|
|
48
|
+
**kwargs: Any,
|
|
49
|
+
) -> List[List[float]]:
|
|
50
|
+
r"""Generates embeddings for the given texts.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
objs (List[str]): The texts for which to generate the embeddings.
|
|
54
|
+
**kwargs (Any): Extra kwargs passed to the embedding API.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
List[List[float]]: A list that represents the generated embedding
|
|
58
|
+
as a list of floating-point numbers.
|
|
59
|
+
"""
|
|
60
|
+
# TODO: count tokens
|
|
61
|
+
response = self.client.embeddings.create(
|
|
62
|
+
input=objs,
|
|
63
|
+
model=self.model_type.value,
|
|
64
|
+
**kwargs,
|
|
65
|
+
)
|
|
66
|
+
return [data.embedding for data in response.data]
|
|
67
|
+
|
|
68
|
+
def get_output_dim(self) -> int:
|
|
69
|
+
r"""Returns the output dimension of the embeddings.
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
int: The dimensionality of the embedding for the current model.
|
|
73
|
+
"""
|
|
74
|
+
return self.output_dim
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
|
|
15
|
+
from .math_functions import MATH_FUNCS
|
|
16
|
+
from .openai_function import OpenAIFunction
|
|
17
|
+
from .search_functions import SEARCH_FUNCS
|
|
18
|
+
from .weather_functions import WEATHER_FUNCS
|
|
19
|
+
from .unstructured_io_fuctions import UnstructuredModules
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
'OpenAIFunction',
|
|
23
|
+
'MATH_FUNCS',
|
|
24
|
+
'SEARCH_FUNCS',
|
|
25
|
+
'WEATHER_FUNCS',
|
|
26
|
+
'UnstructuredModules',
|
|
27
|
+
]
|
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
import json
|
|
15
|
+
import re
|
|
16
|
+
from abc import ABC, abstractmethod
|
|
17
|
+
from copy import deepcopy
|
|
18
|
+
from hashlib import md5
|
|
19
|
+
from io import BytesIO
|
|
20
|
+
from typing import Any, Dict, List, Optional
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class File(ABC):
|
|
24
|
+
r"""Represents an uploaded file comprised of Documents"""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
name: str,
|
|
29
|
+
id: str,
|
|
30
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
31
|
+
docs: Optional[List[Dict[str, Any]]] = None,
|
|
32
|
+
):
|
|
33
|
+
r"""
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
name (str): The name of the file.
|
|
37
|
+
id (str): The unique identifier of the file.
|
|
38
|
+
metadata (Dict[str, Any], optional):
|
|
39
|
+
Additional metadata associated with the file. Defaults to None.
|
|
40
|
+
docs (List[Dict[str, Any]], optional):
|
|
41
|
+
A list of documents contained within the file. Defaults to None.
|
|
42
|
+
"""
|
|
43
|
+
self.name = name
|
|
44
|
+
self.id = id
|
|
45
|
+
self.metadata = metadata or {}
|
|
46
|
+
self.docs = docs or []
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
@abstractmethod
|
|
50
|
+
def from_bytes(cls, file: BytesIO) -> "File":
|
|
51
|
+
r"""Creates a File object from a BytesIO object.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
file (BytesIO):
|
|
55
|
+
A BytesIO object representing the contents of the file.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
File: A File object.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
def __repr__(self) -> str:
|
|
62
|
+
return (f"File(name={self.name}, id={self.id}, "
|
|
63
|
+
f"metadata={self.metadata}, docs={self.docs})")
|
|
64
|
+
|
|
65
|
+
def __str__(self) -> str:
|
|
66
|
+
return (
|
|
67
|
+
f"File(name={self.name}, id={self.id}, metadata={self.metadata})")
|
|
68
|
+
|
|
69
|
+
def copy(self) -> "File":
|
|
70
|
+
r"""Create a deep copy of this File"""
|
|
71
|
+
|
|
72
|
+
return self.__class__(
|
|
73
|
+
name=self.name,
|
|
74
|
+
id=self.id,
|
|
75
|
+
metadata=deepcopy(self.metadata),
|
|
76
|
+
docs=deepcopy(self.docs),
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def strip_consecutive_newlines(text: str) -> str:
|
|
81
|
+
r"""Strips consecutive newlines from a string.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
text (str): The string to strip.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
str: The string with consecutive newlines stripped.
|
|
88
|
+
"""
|
|
89
|
+
return re.sub(r"\s*\n\s*", "\n", text)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class DocxFile(File):
|
|
93
|
+
|
|
94
|
+
@classmethod
|
|
95
|
+
def from_bytes(cls, file: BytesIO) -> "DocxFile":
|
|
96
|
+
r"""Creates a DocxFile object from a BytesIO object.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
file (BytesIO):
|
|
100
|
+
A BytesIO object representing the contents of the docx file.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
DocxFile: A DocxFile object.
|
|
104
|
+
"""
|
|
105
|
+
# Use docx2txt to extract text from docx files
|
|
106
|
+
try:
|
|
107
|
+
import docx2txt
|
|
108
|
+
except ImportError:
|
|
109
|
+
raise ImportError("Please install `docx2txt` first. "
|
|
110
|
+
"You can install it by running "
|
|
111
|
+
"`pip install docx2txt`.")
|
|
112
|
+
text = docx2txt.process(file)
|
|
113
|
+
text = strip_consecutive_newlines(text)
|
|
114
|
+
# Create a dictionary with the extracted text
|
|
115
|
+
doc = {"page_content": text.strip()}
|
|
116
|
+
# Calculate a unique identifier for the file
|
|
117
|
+
file_id = md5(file.getvalue()).hexdigest()
|
|
118
|
+
# Reset the file pointer to the beginning
|
|
119
|
+
file.seek(0)
|
|
120
|
+
return cls(name=file.name, id=file_id, docs=[doc])
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class PdfFile(File):
|
|
124
|
+
|
|
125
|
+
@classmethod
|
|
126
|
+
def from_bytes(cls, file: BytesIO) -> "PdfFile":
|
|
127
|
+
r"""Creates a PdfFile object from a BytesIO object.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
file (BytesIO):
|
|
131
|
+
A BytesIO object representing the contents of the pdf file.
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
PdfFile: A PdfFile object.
|
|
135
|
+
"""
|
|
136
|
+
# Use fitz to extract text from pdf files
|
|
137
|
+
try:
|
|
138
|
+
import fitz
|
|
139
|
+
except ImportError:
|
|
140
|
+
raise ImportError("Please install `PyMuPDF` first. "
|
|
141
|
+
"You can install it by running "
|
|
142
|
+
"`pip install PyMuPDF`.")
|
|
143
|
+
pdf = fitz.open(stream=file.read(), filetype="pdf")
|
|
144
|
+
docs = []
|
|
145
|
+
for i, page in enumerate(pdf):
|
|
146
|
+
text = page.get_text(sort=True)
|
|
147
|
+
text = strip_consecutive_newlines(text)
|
|
148
|
+
# Create a dictionary with the extracted text
|
|
149
|
+
doc = {"page_content": text.strip(), "page": i + 1}
|
|
150
|
+
docs.append(doc)
|
|
151
|
+
# Calculate a unique identifier for the file
|
|
152
|
+
file_id = md5(file.getvalue()).hexdigest()
|
|
153
|
+
# Reset the file pointer to the beginning
|
|
154
|
+
file.seek(0)
|
|
155
|
+
return cls(name=file.name, id=file_id, docs=docs)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class TxtFile(File):
|
|
159
|
+
|
|
160
|
+
@classmethod
|
|
161
|
+
def from_bytes(cls, file: BytesIO) -> "TxtFile":
|
|
162
|
+
r"""Creates a TxtFile object from a BytesIO object.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
file (BytesIO):
|
|
166
|
+
A BytesIO object representing the contents of the txt file.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
TxtFile: A TxtFile object.
|
|
170
|
+
"""
|
|
171
|
+
# Read the text from the file
|
|
172
|
+
text = file.read().decode("utf-8")
|
|
173
|
+
text = strip_consecutive_newlines(text)
|
|
174
|
+
# Create a dictionary with the extracted text
|
|
175
|
+
doc = {"page_content": text.strip()}
|
|
176
|
+
# Calculate a unique identifier for the file
|
|
177
|
+
file_id = md5(file.getvalue()).hexdigest()
|
|
178
|
+
# Reset the file pointer to the beginning
|
|
179
|
+
file.seek(0)
|
|
180
|
+
return cls(name=file.name, id=file_id, docs=[doc])
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
class JsonFile(File):
|
|
184
|
+
|
|
185
|
+
@classmethod
|
|
186
|
+
def from_bytes(cls, file: BytesIO) -> "JsonFile":
|
|
187
|
+
r"""Creates a JsonFile object from a BytesIO object.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
file (BytesIO):
|
|
191
|
+
A BytesIO object representing the contents of the json file.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
JsonFile: A JsonFile object.
|
|
195
|
+
"""
|
|
196
|
+
# Parse the JSON data from the file
|
|
197
|
+
data = json.load(file)
|
|
198
|
+
# Create a dictionary with the parsed data
|
|
199
|
+
doc = {"page_content": json.dumps(data)}
|
|
200
|
+
# Calculate a unique identifier for the file
|
|
201
|
+
file_id = md5(file.getvalue()).hexdigest()
|
|
202
|
+
# Reset the file pointer to the beginning
|
|
203
|
+
file.seek(0)
|
|
204
|
+
return cls(name=file.name, id=file_id, docs=[doc])
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
class HtmlFile(File):
|
|
208
|
+
|
|
209
|
+
@classmethod
|
|
210
|
+
def from_bytes(cls, file: BytesIO) -> "HtmlFile":
|
|
211
|
+
r"""Creates a HtmlFile object from a BytesIO object.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
file (BytesIO):
|
|
215
|
+
A BytesIO object representing the contents of the html file.
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
HtmlFile: A HtmlFile object.
|
|
219
|
+
"""
|
|
220
|
+
# Parse the HTML data from the file
|
|
221
|
+
try:
|
|
222
|
+
from bs4 import BeautifulSoup
|
|
223
|
+
except ImportError:
|
|
224
|
+
raise ImportError("Please install `beautifulsoup4` first. "
|
|
225
|
+
"You can install it by running "
|
|
226
|
+
"`pip install beautifulsoup4`.")
|
|
227
|
+
soup = BeautifulSoup(file, "html.parser")
|
|
228
|
+
text = soup.get_text()
|
|
229
|
+
text = strip_consecutive_newlines(text)
|
|
230
|
+
# Create a dictionary with the parsed data
|
|
231
|
+
doc = {"page_content": text.strip()}
|
|
232
|
+
# Calculate a unique identifier for the file
|
|
233
|
+
file_id = md5(file.getvalue()).hexdigest()
|
|
234
|
+
# Reset the file pointer to the beginning
|
|
235
|
+
file.seek(0)
|
|
236
|
+
return cls(name=file.name, id=file_id, docs=[doc])
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def read_file(file: BytesIO) -> File:
|
|
240
|
+
r"""Reads an uploaded file and returns a File object.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
file (BytesIO): A BytesIO object representing the contents of the file.
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
File: A File object.
|
|
247
|
+
"""
|
|
248
|
+
# Determine the file type based on the file extension
|
|
249
|
+
if file.name.lower().endswith(".docx"):
|
|
250
|
+
return DocxFile.from_bytes(file)
|
|
251
|
+
elif file.name.lower().endswith(".pdf"):
|
|
252
|
+
return PdfFile.from_bytes(file)
|
|
253
|
+
elif file.name.lower().endswith(".txt"):
|
|
254
|
+
return TxtFile.from_bytes(file)
|
|
255
|
+
elif file.name.lower().endswith(".json"):
|
|
256
|
+
return JsonFile.from_bytes(file)
|
|
257
|
+
elif file.name.lower().endswith(".html"):
|
|
258
|
+
return HtmlFile.from_bytes(file)
|
|
259
|
+
else:
|
|
260
|
+
raise NotImplementedError(
|
|
261
|
+
f"File type {file.name.split('.')[-1]} not supported")
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
17
|
+
from .openai_function import OpenAIFunction
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def add(a: int, b: int) -> int:
|
|
21
|
+
r"""Adds two numbers.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
a (integer): The first number to be added.
|
|
25
|
+
b (integer): The second number to be added.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
integer: The sum of the two numbers.
|
|
29
|
+
"""
|
|
30
|
+
return a + b
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def sub(a: int, b: int) -> int:
|
|
34
|
+
r"""Do subtraction between two numbers.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
a (integer): The minuend in subtraction.
|
|
38
|
+
b (integer): The subtrahend in subtraction.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
integer: The result of subtracting :obj:`b` from :obj:`a`.
|
|
42
|
+
"""
|
|
43
|
+
return a - b
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def mul(a: int, b: int) -> int:
|
|
47
|
+
r"""Multiplies two integers.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
a (integer): The multiplier in the multiplication.
|
|
51
|
+
b (integer): The multiplicand in the multiplication.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
integer: The product of the two numbers.
|
|
55
|
+
"""
|
|
56
|
+
return a * b
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
MATH_FUNCS: List[OpenAIFunction] = [
|
|
60
|
+
OpenAIFunction(func) for func in [add, sub, mul]
|
|
61
|
+
]
|