camel-ai 0.2.41__py3-none-any.whl → 0.2.43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +24 -4
- camel/configs/__init__.py +3 -0
- camel/configs/anthropic_config.py +2 -24
- camel/configs/ppio_config.py +102 -0
- camel/configs/reka_config.py +1 -7
- camel/configs/samba_config.py +1 -7
- camel/configs/togetherai_config.py +1 -7
- camel/embeddings/__init__.py +4 -0
- camel/embeddings/azure_embedding.py +119 -0
- camel/embeddings/together_embedding.py +136 -0
- camel/environments/__init__.py +3 -0
- camel/environments/multi_step.py +12 -10
- camel/environments/single_step.py +28 -11
- camel/environments/tic_tac_toe.py +518 -0
- camel/loaders/__init__.py +2 -0
- camel/loaders/crawl4ai_reader.py +230 -0
- camel/models/__init__.py +2 -0
- camel/models/azure_openai_model.py +10 -2
- camel/models/base_model.py +111 -28
- camel/models/cohere_model.py +5 -1
- camel/models/deepseek_model.py +4 -0
- camel/models/gemini_model.py +8 -2
- camel/models/model_factory.py +3 -0
- camel/models/ollama_model.py +8 -2
- camel/models/openai_compatible_model.py +8 -2
- camel/models/openai_model.py +16 -4
- camel/models/ppio_model.py +184 -0
- camel/models/vllm_model.py +147 -48
- camel/societies/workforce/workforce.py +26 -3
- camel/toolkits/__init__.py +2 -0
- camel/toolkits/browser_toolkit.py +7 -3
- camel/toolkits/google_calendar_toolkit.py +432 -0
- camel/toolkits/search_toolkit.py +119 -1
- camel/toolkits/terminal_toolkit.py +729 -115
- camel/types/enums.py +68 -3
- camel/types/unified_model_type.py +5 -0
- camel/verifiers/python_verifier.py +93 -9
- {camel_ai-0.2.41.dist-info → camel_ai-0.2.43.dist-info}/METADATA +21 -2
- {camel_ai-0.2.41.dist-info → camel_ai-0.2.43.dist-info}/RECORD +42 -35
- {camel_ai-0.2.41.dist-info → camel_ai-0.2.43.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.41.dist-info → camel_ai-0.2.43.dist-info}/licenses/LICENSE +0 -0
|
@@ -224,7 +224,7 @@ class SingleStepEnv:
|
|
|
224
224
|
raise TypeError(f"Unsupported dataset type: {type(self.dataset)}")
|
|
225
225
|
|
|
226
226
|
async def step(
|
|
227
|
-
self, action: Union[Action, List[Action], str]
|
|
227
|
+
self, action: Union[Action, List[Action], str, Dict[int, str]]
|
|
228
228
|
) -> Union[
|
|
229
229
|
Tuple[Observation, float, bool, Dict[str, Any]],
|
|
230
230
|
List[Tuple[Observation, float, bool, Dict[str, Any]]],
|
|
@@ -242,13 +242,15 @@ class SingleStepEnv:
|
|
|
242
242
|
the observation will not change.
|
|
243
243
|
|
|
244
244
|
Args:
|
|
245
|
-
action (Union[Action, List[Action], str]):
|
|
245
|
+
action (Union[Action, List[Action], str, Dict[int, str]]):
|
|
246
246
|
The action(s) taken by the agent,
|
|
247
247
|
which should contain the response(s)
|
|
248
248
|
to the observation(s). Can be:
|
|
249
249
|
- A single `Action` object (for batch size 1),
|
|
250
250
|
- A list of `Action` objects (for batched evaluation),
|
|
251
251
|
- A raw string (only allowed when batch size is 1).
|
|
252
|
+
- A dict that maps indices to their `llm_response`
|
|
253
|
+
(for batched evaluation)
|
|
252
254
|
|
|
253
255
|
Returns:
|
|
254
256
|
Union[Tuple[Observation, float, bool, Dict[str, Any]], List[...]]:
|
|
@@ -293,6 +295,7 @@ class SingleStepEnv:
|
|
|
293
295
|
f"total batch size ({self.current_batch_size})"
|
|
294
296
|
)
|
|
295
297
|
|
|
298
|
+
indices = [act.index for act in actions]
|
|
296
299
|
proposed_solutions = [act.llm_response for act in actions]
|
|
297
300
|
ground_truths: List[str] = []
|
|
298
301
|
for idx in indices:
|
|
@@ -334,21 +337,22 @@ class SingleStepEnv:
|
|
|
334
337
|
).as_tuple()
|
|
335
338
|
for i in range(len(actions))
|
|
336
339
|
]
|
|
340
|
+
|
|
337
341
|
for _, idx in enumerate(indices):
|
|
338
342
|
self._states_done[idx] = True
|
|
339
343
|
|
|
340
344
|
return step_results[0] if len(step_results) == 1 else step_results
|
|
341
345
|
|
|
342
346
|
def _normalize_actions(
|
|
343
|
-
self, action: Union[Action, List[Action], str]
|
|
347
|
+
self, action: Union[Action, List[Action], str, Dict[int, str]]
|
|
344
348
|
) -> List[Action]:
|
|
345
349
|
r"""Normalize the user-provided action(s) into a validated list
|
|
346
350
|
of `Action` objects.
|
|
347
351
|
|
|
348
352
|
This method handles flexibility in input format by converting
|
|
349
|
-
raw strings (only allowed when batch size is 1) and
|
|
350
|
-
all necessary structure and integrity checks on
|
|
351
|
-
(e.g., index bounds, duplicates).
|
|
353
|
+
raw strings (only allowed when batch size is 1) and dictionaries,
|
|
354
|
+
ensuring all necessary structure and integrity checks on
|
|
355
|
+
actions (e.g., index bounds, duplicates).
|
|
352
356
|
|
|
353
357
|
Args:
|
|
354
358
|
action (Union[Action, List[Action], str]):
|
|
@@ -357,6 +361,7 @@ class SingleStepEnv:
|
|
|
357
361
|
- A list of `Action` objects.
|
|
358
362
|
- A raw string (if `batch_size == 1`), auto-wrapped
|
|
359
363
|
in an `Action`.
|
|
364
|
+
- A dict mapping int indices to str responses
|
|
360
365
|
|
|
361
366
|
Returns:
|
|
362
367
|
List[Action]: A list of validated `Action` instances
|
|
@@ -368,8 +373,9 @@ class SingleStepEnv:
|
|
|
368
373
|
- Action list is empty,
|
|
369
374
|
- Index mismatches expected values
|
|
370
375
|
(e.g., 0 for batch size 1),
|
|
371
|
-
- Wrong structure is used
|
|
372
|
-
|
|
376
|
+
- Wrong structure is used (e.g.,
|
|
377
|
+
string used with batch size > 1,
|
|
378
|
+
dict used with batch size == 1).
|
|
373
379
|
TypeError: If the action is of an unsupported type.
|
|
374
380
|
"""
|
|
375
381
|
|
|
@@ -380,9 +386,20 @@ class SingleStepEnv:
|
|
|
380
386
|
" when batch_size == 1"
|
|
381
387
|
)
|
|
382
388
|
logger.warning("Auto-converting from str to Action", stacklevel=2)
|
|
383
|
-
|
|
389
|
+
actions = [Action(index=0, llm_response=action)]
|
|
390
|
+
|
|
391
|
+
elif isinstance(action, dict):
|
|
392
|
+
if not all(isinstance(k, int) for k in action.keys()):
|
|
393
|
+
raise ValueError("All dictionary keys must be integers")
|
|
384
394
|
|
|
385
|
-
|
|
395
|
+
if self.current_batch_size == 1 and list(action.keys()) != [0]:
|
|
396
|
+
raise ValueError(
|
|
397
|
+
"For batch_size=1, dict input must have exactly one key: 0"
|
|
398
|
+
)
|
|
399
|
+
actions = [
|
|
400
|
+
Action(index=k, llm_response=v) for k, v in action.items()
|
|
401
|
+
]
|
|
402
|
+
elif isinstance(action, Action):
|
|
386
403
|
actions = [action]
|
|
387
404
|
elif isinstance(action, list):
|
|
388
405
|
if not action:
|
|
@@ -397,7 +414,7 @@ class SingleStepEnv:
|
|
|
397
414
|
|
|
398
415
|
if self.current_batch_size == 1 and len(actions) != 1:
|
|
399
416
|
raise ValueError(
|
|
400
|
-
"For batch_size=1, expect a single Action or a "
|
|
417
|
+
"For batch_size=1, expect a single Action, a dictionary or a "
|
|
401
418
|
"list containing exactly one Action"
|
|
402
419
|
)
|
|
403
420
|
|
|
@@ -0,0 +1,518 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
import math
|
|
15
|
+
import random
|
|
16
|
+
import re
|
|
17
|
+
from typing import Any, ClassVar, Dict, List, Literal, Optional, Tuple
|
|
18
|
+
|
|
19
|
+
from camel.environments.models import Action, Observation
|
|
20
|
+
from camel.environments.multi_step import MultiStepEnv
|
|
21
|
+
from camel.extractors import BaseExtractor, BaseExtractorStrategy
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class MoveExtractor(BaseExtractorStrategy):
|
|
25
|
+
r"""A strategy for extracting Tic Tac Toe actions from text."""
|
|
26
|
+
|
|
27
|
+
async def extract(self, text: str) -> Optional[str]:
|
|
28
|
+
r"""Extract a valid Tic Tac Toe move from text.
|
|
29
|
+
|
|
30
|
+
Looks for a pattern '<Action> n' where n is a digit between 1 and 9.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
text (str): The text to extract the action from.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Optional[str]: The extracted move as a string, or None if no valid
|
|
37
|
+
move is found.
|
|
38
|
+
"""
|
|
39
|
+
match = re.search(r"<Action>\s*(\d+)", text)
|
|
40
|
+
if match:
|
|
41
|
+
move = match.group(1)
|
|
42
|
+
# Validate that the move is in range 1-9
|
|
43
|
+
if move.isdigit() and 1 <= int(move) <= 9:
|
|
44
|
+
return move
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class Opponent:
|
|
49
|
+
r"""AI opponent for the Tic Tac Toe game.
|
|
50
|
+
|
|
51
|
+
This class implements different playing strategies for the AI opponent,
|
|
52
|
+
including an optimal strategy using the minimax algorithm with alpha-beta
|
|
53
|
+
pruning, and a random strategy.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __init__(
|
|
57
|
+
self, play_style: Literal["optimal", "random"] = "optimal"
|
|
58
|
+
) -> None:
|
|
59
|
+
r"""Initialize the opponent with a specific play style.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
play_style (Literal["optimal", "random"]): The strategy to use,
|
|
63
|
+
either "optimal" or "random". (default: :obj:`"optimal"`)
|
|
64
|
+
"""
|
|
65
|
+
self.play_style = play_style
|
|
66
|
+
|
|
67
|
+
def select_move(self, board: List[str]) -> Optional[int]:
|
|
68
|
+
r"""Select a move based on the opponent's play style.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
board (List[str]): The current game board as a list of strings.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Optional[int]: The index of the selected move, or None if no move
|
|
75
|
+
is available.
|
|
76
|
+
"""
|
|
77
|
+
if self.play_style == "optimal":
|
|
78
|
+
return self.get_optimal_move(board)
|
|
79
|
+
elif self.play_style == "random":
|
|
80
|
+
moves = TicTacToeEnv.available_moves(board)
|
|
81
|
+
if not moves:
|
|
82
|
+
return None # Consistent with optimal strategy
|
|
83
|
+
return random.choice(moves)
|
|
84
|
+
|
|
85
|
+
def get_optimal_move(self, board: List[str]) -> Optional[int]:
|
|
86
|
+
r"""Get the optimal move using the minimax algorithm.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
board (List[str]): The current game board as a list of strings.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
Optional[int]: The index of the optimal move, or None if no move
|
|
93
|
+
is available.
|
|
94
|
+
"""
|
|
95
|
+
_, move = self.minimax(board, is_maximizing=True)
|
|
96
|
+
return move
|
|
97
|
+
|
|
98
|
+
def minimax(
|
|
99
|
+
self,
|
|
100
|
+
board: List[str],
|
|
101
|
+
is_maximizing: bool,
|
|
102
|
+
depth: int = 0,
|
|
103
|
+
alpha: float = -math.inf,
|
|
104
|
+
beta: float = math.inf,
|
|
105
|
+
) -> Tuple[float, Optional[int]]:
|
|
106
|
+
r"""Minimax algorithm with alpha-beta pruning for optimal move
|
|
107
|
+
selection.
|
|
108
|
+
|
|
109
|
+
Recursively evaluates all possible moves to find the best one.
|
|
110
|
+
Uses alpha-beta pruning to reduce the search space.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
board (List[str]): The current game board as a list of strings.
|
|
114
|
+
is_maximizing (bool): True if maximizing player (O), False if
|
|
115
|
+
minimizing (X).
|
|
116
|
+
depth (int): Current depth in the search tree. (default: :obj:`0`)
|
|
117
|
+
alpha (float): Alpha value for pruning. (default: :obj:`-math.inf`)
|
|
118
|
+
beta (float): Beta value for pruning. (default: :obj:`math.inf`)
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
Tuple[float, Optional[int]]: A tuple containing:
|
|
122
|
+
- float: The score of the best move (1 for O win, -1 for X
|
|
123
|
+
win, 0 for draw)
|
|
124
|
+
- Optional[int]: The index of the best move, or None if
|
|
125
|
+
terminal state
|
|
126
|
+
"""
|
|
127
|
+
winner = TicTacToeEnv.check_winner(board)
|
|
128
|
+
if winner == "O":
|
|
129
|
+
return (1, None)
|
|
130
|
+
elif winner == "X":
|
|
131
|
+
return (-1, None)
|
|
132
|
+
elif winner == "draw":
|
|
133
|
+
return (0, None)
|
|
134
|
+
|
|
135
|
+
moves = TicTacToeEnv.available_moves(board)
|
|
136
|
+
# Add depth limit to prevent stack overflow (9 is max depth for
|
|
137
|
+
# tic-tac-toe)
|
|
138
|
+
if depth >= 9:
|
|
139
|
+
# Evaluate current position
|
|
140
|
+
return (0, None)
|
|
141
|
+
|
|
142
|
+
if is_maximizing:
|
|
143
|
+
best_score = -math.inf
|
|
144
|
+
best_move = None
|
|
145
|
+
for move in moves:
|
|
146
|
+
board[move] = "O"
|
|
147
|
+
score, _ = self.minimax(
|
|
148
|
+
board,
|
|
149
|
+
is_maximizing=False,
|
|
150
|
+
depth=depth + 1,
|
|
151
|
+
alpha=alpha,
|
|
152
|
+
beta=beta,
|
|
153
|
+
)
|
|
154
|
+
board[move] = " "
|
|
155
|
+
if score > best_score:
|
|
156
|
+
best_score = score
|
|
157
|
+
best_move = move
|
|
158
|
+
alpha = max(alpha, best_score)
|
|
159
|
+
if beta <= alpha:
|
|
160
|
+
break # Beta cutoff
|
|
161
|
+
return best_score, best_move
|
|
162
|
+
else:
|
|
163
|
+
best_score = math.inf
|
|
164
|
+
best_move = None
|
|
165
|
+
for move in moves:
|
|
166
|
+
board[move] = "X"
|
|
167
|
+
score, _ = self.minimax(
|
|
168
|
+
board,
|
|
169
|
+
is_maximizing=True,
|
|
170
|
+
depth=depth + 1,
|
|
171
|
+
alpha=alpha,
|
|
172
|
+
beta=beta,
|
|
173
|
+
)
|
|
174
|
+
board[move] = " "
|
|
175
|
+
if score < best_score:
|
|
176
|
+
best_score = score
|
|
177
|
+
best_move = move
|
|
178
|
+
beta = min(beta, best_score)
|
|
179
|
+
if beta <= alpha:
|
|
180
|
+
break # Alpha cutoff
|
|
181
|
+
return best_score, best_move
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
class TicTacToeEnv(MultiStepEnv):
|
|
185
|
+
r"""A Tic Tac Toe environment for reinforcement learning with LLMs.
|
|
186
|
+
|
|
187
|
+
This environment implements a standard Tic Tac Toe game where the LLM agent
|
|
188
|
+
plays as 'X' against an AI opponent that plays as 'O'. The opponent can use
|
|
189
|
+
either an optimal strategy (minimax with alpha-beta pruning) or a random
|
|
190
|
+
strategy.
|
|
191
|
+
"""
|
|
192
|
+
|
|
193
|
+
WIN_COMBINATIONS: ClassVar = [
|
|
194
|
+
(0, 1, 2), # Top row
|
|
195
|
+
(3, 4, 5), # Middle row
|
|
196
|
+
(6, 7, 8), # Bottom row
|
|
197
|
+
(0, 3, 6), # Left column
|
|
198
|
+
(1, 4, 7), # Middle column
|
|
199
|
+
(2, 5, 8), # Right column
|
|
200
|
+
(0, 4, 8), # Diagonal from top-left
|
|
201
|
+
(2, 4, 6), # Diagonal from top-right
|
|
202
|
+
]
|
|
203
|
+
|
|
204
|
+
def __init__(
|
|
205
|
+
self,
|
|
206
|
+
extractor: Optional[BaseExtractor] = None,
|
|
207
|
+
max_steps: Optional[int] = None,
|
|
208
|
+
play_style: Literal["optimal", "random"] = "optimal",
|
|
209
|
+
**kwargs,
|
|
210
|
+
) -> None:
|
|
211
|
+
r"""Initialize the Tic Tac Toe environment.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
extractor (Optional[BaseExtractor]): Extractor to process LLM
|
|
215
|
+
responses. If None, a default extractor with
|
|
216
|
+
MoveExtractor will be used. (default: :obj:`None`)
|
|
217
|
+
max_steps (Optional[int]): Maximum steps per episode.
|
|
218
|
+
(default: :obj:`None`)
|
|
219
|
+
play_style (Literal["optimal", "random"]): The strategy for the
|
|
220
|
+
opponent to use, either "optimal" or "random". (default:
|
|
221
|
+
:obj:`"optimal"`)
|
|
222
|
+
**kwargs: Additional environment parameters.
|
|
223
|
+
"""
|
|
224
|
+
if extractor is None:
|
|
225
|
+
extractor = BaseExtractor(pipeline=[[MoveExtractor()]])
|
|
226
|
+
super().__init__(extractor, max_steps, **kwargs)
|
|
227
|
+
self.opponent = Opponent(play_style=play_style)
|
|
228
|
+
|
|
229
|
+
def _get_initial_state(self) -> Dict[str, Any]:
|
|
230
|
+
r"""Get the initial state of the environment.
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
Dict[str, Any]: A dictionary containing the initial state with an
|
|
234
|
+
empty board, game status flags, and move history.
|
|
235
|
+
"""
|
|
236
|
+
# State includes the board (9 cells), game_over flag, and winner info.
|
|
237
|
+
return {
|
|
238
|
+
"board": [" " for _ in range(9)],
|
|
239
|
+
"game_over": False,
|
|
240
|
+
"winner": None,
|
|
241
|
+
"last_move_illegal": False,
|
|
242
|
+
"last_move": None,
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
async def _update_state(self, action: Action) -> None:
|
|
246
|
+
r"""Update the environment state based on the agent's action.
|
|
247
|
+
|
|
248
|
+
This method processes the agent's move, updates the board, checks for
|
|
249
|
+
a winner, and if the game is not over, makes a move for the opponent.
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
action (Action): The action containing the LLM's response with the
|
|
253
|
+
chosen move.
|
|
254
|
+
|
|
255
|
+
Returns:
|
|
256
|
+
None
|
|
257
|
+
"""
|
|
258
|
+
board = self._state["board"]
|
|
259
|
+
|
|
260
|
+
# Attempt to parse the agent's chosen move
|
|
261
|
+
extraction_result = await self.extractor.extract(action.llm_response)
|
|
262
|
+
if not extraction_result:
|
|
263
|
+
# Handle extraction failure gracefully
|
|
264
|
+
self._state["last_move_illegal"] = True
|
|
265
|
+
self._state["last_move"] = None
|
|
266
|
+
self._state["extraction_error"] = "Could not extract a valid move"
|
|
267
|
+
return
|
|
268
|
+
|
|
269
|
+
try:
|
|
270
|
+
move = int(extraction_result)
|
|
271
|
+
self._state["last_move"] = move
|
|
272
|
+
self._state["extraction_error"] = None
|
|
273
|
+
except ValueError:
|
|
274
|
+
# Handle invalid move format gracefully
|
|
275
|
+
self._state["last_move_illegal"] = True
|
|
276
|
+
self._state["last_move"] = extraction_result
|
|
277
|
+
self._state["extraction_error"] = (
|
|
278
|
+
f"'{extraction_result}' is not a valid number"
|
|
279
|
+
)
|
|
280
|
+
return
|
|
281
|
+
|
|
282
|
+
# Convert 1-indexed move to 0-indexed board position.
|
|
283
|
+
index = move - 1
|
|
284
|
+
if index < 0 or index > 8 or board[index] != " ":
|
|
285
|
+
self._state["last_move_illegal"] = True
|
|
286
|
+
self._state["extraction_error"] = (
|
|
287
|
+
f"Position {move} is not a valid or available move"
|
|
288
|
+
)
|
|
289
|
+
return
|
|
290
|
+
|
|
291
|
+
# Reset the flag
|
|
292
|
+
self._state["last_move_illegal"] = False
|
|
293
|
+
|
|
294
|
+
# Agent (X) makes the move.
|
|
295
|
+
board[index] = "X"
|
|
296
|
+
|
|
297
|
+
# Check if agent wins (or draw) right after its move.
|
|
298
|
+
winner = self.check_winner(board)
|
|
299
|
+
if winner is not None:
|
|
300
|
+
self._state["game_over"] = True
|
|
301
|
+
self._state["winner"] = winner
|
|
302
|
+
return
|
|
303
|
+
|
|
304
|
+
# Opponent (O) plays using the opponent class.
|
|
305
|
+
opponent_move = self.opponent.select_move(board)
|
|
306
|
+
if opponent_move is not None:
|
|
307
|
+
board[opponent_move] = "O"
|
|
308
|
+
|
|
309
|
+
# Check if the game ended after opponent's move.
|
|
310
|
+
winner = self.check_winner(board)
|
|
311
|
+
if winner is not None:
|
|
312
|
+
self._state["game_over"] = True
|
|
313
|
+
self._state["winner"] = winner
|
|
314
|
+
|
|
315
|
+
def _get_next_observation(self) -> Observation:
|
|
316
|
+
r"""Get the next observation based on the current state.
|
|
317
|
+
|
|
318
|
+
This method generates a text observation describing the current state
|
|
319
|
+
of the game board and prompting the agent to make a move.
|
|
320
|
+
|
|
321
|
+
Returns:
|
|
322
|
+
Observation: An Observation object containing the game state
|
|
323
|
+
description.
|
|
324
|
+
"""
|
|
325
|
+
board = self._state["board"]
|
|
326
|
+
if self._state["last_move_illegal"]:
|
|
327
|
+
obs = (
|
|
328
|
+
"You are playing Tic Tac Toe with standard rules.\n"
|
|
329
|
+
"You are the player with X.\n"
|
|
330
|
+
"Your last move was illegal.\n"
|
|
331
|
+
f"You chose the move {self._state['last_move']}."
|
|
332
|
+
"Choose another number between 1 and 9 to place an X.\n"
|
|
333
|
+
"The field must still be available.\n"
|
|
334
|
+
"This is the current state of the board:\n"
|
|
335
|
+
f"{self.render_board(board)}\n"
|
|
336
|
+
"Each number that you can see is still an empty field "
|
|
337
|
+
"that you can place your 'X' in. Please end your response "
|
|
338
|
+
"with <Action> [a number from 1 to 9]"
|
|
339
|
+
)
|
|
340
|
+
else:
|
|
341
|
+
obs = (
|
|
342
|
+
"You are playing Tic Tac Toe with standard rules.\n"
|
|
343
|
+
"You are the player with X.\n"
|
|
344
|
+
"Choose a number between 1 and 9 to place an X.\n"
|
|
345
|
+
"This is the current state of the board:\n"
|
|
346
|
+
f"{self.render_board(board)}\n"
|
|
347
|
+
"Each number that you can see is still an empty field "
|
|
348
|
+
"that you can place your 'X' in. Please end your response "
|
|
349
|
+
"with <Action> [a number from 1 to 9]"
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
return Observation(question=obs, context={}, metadata={})
|
|
353
|
+
|
|
354
|
+
def _get_terminal_observation(self) -> Observation:
|
|
355
|
+
r"""Get the final observation when the game is over.
|
|
356
|
+
|
|
357
|
+
This method generates a text observation describing the final state
|
|
358
|
+
of the game board and the game result (win, loss, or draw).
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
Observation: An Observation object containing the final game state
|
|
362
|
+
description.
|
|
363
|
+
"""
|
|
364
|
+
board = self._state["board"]
|
|
365
|
+
result_message = ""
|
|
366
|
+
if self._state["winner"] == "X":
|
|
367
|
+
result_message = "Congratulations, you won!"
|
|
368
|
+
elif self._state["winner"] == "O":
|
|
369
|
+
result_message = "Sorry, you lost!"
|
|
370
|
+
else:
|
|
371
|
+
result_message = "It's a draw!"
|
|
372
|
+
|
|
373
|
+
obs = f"{self.render_board(board)}\nGame Over. {result_message}"
|
|
374
|
+
|
|
375
|
+
return Observation(question=obs, context={}, metadata={})
|
|
376
|
+
|
|
377
|
+
async def compute_reward(self) -> Tuple[float, Dict[str, float]]:
|
|
378
|
+
r"""Compute the reward for the current state.
|
|
379
|
+
|
|
380
|
+
Returns:
|
|
381
|
+
Tuple[float, Dict[str, float]]: A tuple containing the total
|
|
382
|
+
reward and a dictionary of reward components:
|
|
383
|
+
- 1.0 for a win
|
|
384
|
+
- 0.0 for a loss or illegal move
|
|
385
|
+
- 0.5 for a draw
|
|
386
|
+
- For ongoing games, returns an evaluation of the position
|
|
387
|
+
"""
|
|
388
|
+
# Simple reward: 1 for win, 0 for loss, 0.5 for draw or ongoing.
|
|
389
|
+
if self._state["game_over"]:
|
|
390
|
+
if self._state["winner"] == "X":
|
|
391
|
+
return 1.0, {"win": 1.0}
|
|
392
|
+
elif self._state["winner"] == "O":
|
|
393
|
+
return 0.0, {"loss": 0.0}
|
|
394
|
+
else:
|
|
395
|
+
return 0.5, {"draw": 0.5}
|
|
396
|
+
|
|
397
|
+
elif self._state["last_move_illegal"]:
|
|
398
|
+
return 0.0, {"illegal_move": 0.0}
|
|
399
|
+
|
|
400
|
+
else:
|
|
401
|
+
board = self._state["board"]
|
|
402
|
+
value = TicTacToeEnv.evaluate_position_for_x(board, is_x_turn=True)
|
|
403
|
+
return value, {"x_non_loss_value": value}
|
|
404
|
+
|
|
405
|
+
@staticmethod
|
|
406
|
+
def evaluate_position_for_x(
|
|
407
|
+
board: List[str], is_x_turn: bool, depth: int = 0, max_depth: int = 10
|
|
408
|
+
) -> float:
|
|
409
|
+
r"""Evaluate the current board position from X's perspective.
|
|
410
|
+
|
|
411
|
+
Uses minimax to determine the value of the position.
|
|
412
|
+
|
|
413
|
+
Args:
|
|
414
|
+
board (List[str]): The current game board as a list of strings.
|
|
415
|
+
is_x_turn (bool): True if it's X's turn to move, False otherwise.
|
|
416
|
+
|
|
417
|
+
Returns:
|
|
418
|
+
float: A float value representing the position evaluation:
|
|
419
|
+
- 1.0 if X has a winning position
|
|
420
|
+
- 0.0 if O has a winning position
|
|
421
|
+
- 0.5 for a draw
|
|
422
|
+
- For ongoing positions, returns the expected outcome with
|
|
423
|
+
perfect play
|
|
424
|
+
"""
|
|
425
|
+
winner = TicTacToeEnv.check_winner(board)
|
|
426
|
+
if winner == "X":
|
|
427
|
+
return 1.0 # X wins
|
|
428
|
+
elif winner == "O":
|
|
429
|
+
return 0.0 # X loses
|
|
430
|
+
elif winner == "draw":
|
|
431
|
+
return 0.5 # draw
|
|
432
|
+
|
|
433
|
+
# Add depth limit to prevent potential stack overflow
|
|
434
|
+
if depth >= max_depth:
|
|
435
|
+
return 0.5 # Return draw evaluation at max depth
|
|
436
|
+
|
|
437
|
+
moves = TicTacToeEnv.available_moves(board)
|
|
438
|
+
values = []
|
|
439
|
+
# Create a copy of the board to avoid side effects
|
|
440
|
+
for move in moves:
|
|
441
|
+
board_copy = board.copy()
|
|
442
|
+
board_copy[move] = "X" if is_x_turn else "O"
|
|
443
|
+
value = TicTacToeEnv.evaluate_position_for_x(
|
|
444
|
+
board_copy, not is_x_turn, depth + 1, max_depth
|
|
445
|
+
)
|
|
446
|
+
values.append(value)
|
|
447
|
+
|
|
448
|
+
return max(values) if is_x_turn else min(values)
|
|
449
|
+
|
|
450
|
+
def _is_done(self) -> bool:
|
|
451
|
+
r"""Check if the episode is done.
|
|
452
|
+
|
|
453
|
+
Returns:
|
|
454
|
+
True if the game is over, False otherwise.
|
|
455
|
+
"""
|
|
456
|
+
return self._state["game_over"]
|
|
457
|
+
|
|
458
|
+
@staticmethod
|
|
459
|
+
def available_moves(board: List[str]) -> List[int]:
|
|
460
|
+
r"""Get all available moves on the board.
|
|
461
|
+
|
|
462
|
+
Args:
|
|
463
|
+
board (List[str]): The current game board as a list of strings.
|
|
464
|
+
|
|
465
|
+
Returns:
|
|
466
|
+
List[int]: A list of indices representing empty cells on the board.
|
|
467
|
+
"""
|
|
468
|
+
# Return list of indices that are free.
|
|
469
|
+
return [i for i, cell in enumerate(board) if cell == " "]
|
|
470
|
+
|
|
471
|
+
@staticmethod
|
|
472
|
+
def check_winner(board: List[str]) -> Optional[Literal["X", "O", "draw"]]:
|
|
473
|
+
r"""Check if there is a winner or a draw on the board.
|
|
474
|
+
|
|
475
|
+
Args:
|
|
476
|
+
board (List[str]): The current game board as a list of strings.
|
|
477
|
+
|
|
478
|
+
Returns:
|
|
479
|
+
Optional[Literal["X", "O", "draw"]]: "X" if X has won, "O" if O
|
|
480
|
+
has won, "draw" if the game is a draw, or None if the game is
|
|
481
|
+
still ongoing.
|
|
482
|
+
"""
|
|
483
|
+
# Check all win combinations.
|
|
484
|
+
for a, b, c in TicTacToeEnv.WIN_COMBINATIONS:
|
|
485
|
+
if board[a] != " " and board[a] == board[b] == board[c]:
|
|
486
|
+
return board[a]
|
|
487
|
+
# Check for draw.
|
|
488
|
+
if all(cell != " " for cell in board):
|
|
489
|
+
return "draw"
|
|
490
|
+
return None
|
|
491
|
+
|
|
492
|
+
def render_board(self, board: List[str]) -> str:
|
|
493
|
+
r"""Render the board as a string for display.
|
|
494
|
+
|
|
495
|
+
Args:
|
|
496
|
+
board (List[str]): The current game board as a list of strings.
|
|
497
|
+
|
|
498
|
+
Returns:
|
|
499
|
+
str: A formatted string representation of the board.
|
|
500
|
+
"""
|
|
501
|
+
|
|
502
|
+
# Create a nice formatted board.
|
|
503
|
+
def cell_value(i: int) -> str:
|
|
504
|
+
r"""Get the display value for a cell.
|
|
505
|
+
|
|
506
|
+
Args:
|
|
507
|
+
i (int): The index of the cell.
|
|
508
|
+
|
|
509
|
+
Returns:
|
|
510
|
+
str: The cell content ("X" or "O") or the cell number if empty.
|
|
511
|
+
"""
|
|
512
|
+
return board[i] if board[i] != " " else str(i + 1)
|
|
513
|
+
|
|
514
|
+
rows = []
|
|
515
|
+
for i in range(0, 9, 3):
|
|
516
|
+
row = " | ".join(cell_value(j) for j in range(i, i + 3))
|
|
517
|
+
rows.append(row)
|
|
518
|
+
return "\n---------\n".join(rows)
|
camel/loaders/__init__.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
from .apify_reader import Apify
|
|
16
16
|
from .base_io import File, create_file, create_file_from_raw_bytes
|
|
17
17
|
from .chunkr_reader import ChunkrReader
|
|
18
|
+
from .crawl4ai_reader import Crawl4AI
|
|
18
19
|
from .firecrawl_reader import Firecrawl
|
|
19
20
|
from .jina_url_reader import JinaURLReader
|
|
20
21
|
from .mineru_extractor import MinerU
|
|
@@ -32,4 +33,5 @@ __all__ = [
|
|
|
32
33
|
'ChunkrReader',
|
|
33
34
|
'PandasReader',
|
|
34
35
|
'MinerU',
|
|
36
|
+
'Crawl4AI',
|
|
35
37
|
]
|