camel-ai 0.2.37__py3-none-any.whl → 0.2.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +4 -0
- camel/agents/repo_agent.py +2 -2
- camel/benchmarks/apibank.py +1 -1
- camel/benchmarks/apibench.py +1 -1
- camel/configs/__init__.py +3 -0
- camel/configs/modelscope_config.py +59 -0
- camel/datagen/evol_instruct/__init__.py +20 -0
- camel/datagen/evol_instruct/evol_instruct.py +424 -0
- camel/datagen/evol_instruct/scorer.py +166 -0
- camel/datagen/evol_instruct/templates.py +268 -0
- camel/datagen/self_improving_cot.py +1 -1
- camel/datasets/__init__.py +2 -0
- camel/datasets/base_generator.py +22 -9
- camel/datasets/few_shot_generator.py +2 -3
- camel/datasets/self_instruct_generator.py +415 -0
- camel/embeddings/openai_compatible_embedding.py +13 -5
- camel/environments/models.py +10 -4
- camel/environments/single_step.py +181 -41
- camel/interpreters/docker_interpreter.py +2 -2
- camel/interpreters/e2b_interpreter.py +1 -1
- camel/interpreters/internal_python_interpreter.py +1 -1
- camel/interpreters/subprocess_interpreter.py +1 -1
- camel/loaders/__init__.py +2 -2
- camel/loaders/{panda_reader.py → pandas_reader.py} +61 -30
- camel/loaders/unstructured_io.py +2 -1
- camel/memories/blocks/chat_history_block.py +1 -1
- camel/memories/context_creators/score_based.py +198 -67
- camel/models/__init__.py +2 -0
- camel/models/aiml_model.py +9 -3
- camel/models/anthropic_model.py +11 -3
- camel/models/azure_openai_model.py +9 -3
- camel/models/base_audio_model.py +6 -0
- camel/models/base_model.py +4 -0
- camel/models/deepseek_model.py +9 -3
- camel/models/gemini_model.py +9 -3
- camel/models/groq_model.py +9 -3
- camel/models/internlm_model.py +8 -2
- camel/models/model_factory.py +123 -0
- camel/models/modelscope_model.py +208 -0
- camel/models/moonshot_model.py +8 -2
- camel/models/nemotron_model.py +9 -3
- camel/models/nvidia_model.py +9 -3
- camel/models/ollama_model.py +9 -3
- camel/models/openai_audio_models.py +7 -5
- camel/models/openai_compatible_model.py +9 -3
- camel/models/openai_model.py +58 -5
- camel/models/openrouter_model.py +9 -3
- camel/models/qwen_model.py +9 -3
- camel/models/samba_model.py +9 -3
- camel/models/sglang_model.py +11 -4
- camel/models/siliconflow_model.py +8 -2
- camel/models/stub_model.py +2 -1
- camel/models/togetherai_model.py +11 -5
- camel/models/vllm_model.py +10 -4
- camel/models/yi_model.py +9 -3
- camel/models/zhipuai_model.py +11 -5
- camel/retrievers/auto_retriever.py +14 -0
- camel/retrievers/vector_retriever.py +1 -1
- camel/storages/__init__.py +2 -0
- camel/storages/graph_storages/neo4j_graph.py +1 -1
- camel/storages/vectordb_storages/__init__.py +2 -0
- camel/storages/vectordb_storages/base.py +2 -2
- camel/storages/vectordb_storages/milvus.py +2 -2
- camel/storages/vectordb_storages/qdrant.py +2 -2
- camel/storages/vectordb_storages/tidb.py +332 -0
- camel/tasks/task.py +2 -2
- camel/toolkits/__init__.py +9 -1
- camel/toolkits/arxiv_toolkit.py +2 -1
- camel/toolkits/ask_news_toolkit.py +11 -3
- camel/toolkits/audio_analysis_toolkit.py +2 -0
- camel/toolkits/base.py +3 -0
- camel/toolkits/browser_toolkit.py +84 -61
- camel/toolkits/code_execution.py +3 -1
- camel/toolkits/dappier_toolkit.py +2 -1
- camel/toolkits/data_commons_toolkit.py +2 -0
- camel/toolkits/excel_toolkit.py +2 -0
- camel/toolkits/file_write_toolkit.py +2 -0
- camel/toolkits/github_toolkit.py +6 -4
- camel/toolkits/google_scholar_toolkit.py +2 -0
- camel/toolkits/human_toolkit.py +17 -1
- camel/toolkits/image_analysis_toolkit.py +2 -0
- camel/toolkits/linkedin_toolkit.py +2 -1
- camel/toolkits/math_toolkit.py +2 -0
- camel/toolkits/mcp_toolkit.py +42 -52
- camel/toolkits/meshy_toolkit.py +20 -2
- camel/toolkits/networkx_toolkit.py +2 -0
- camel/toolkits/notion_toolkit.py +7 -0
- camel/toolkits/openai_agent_toolkit.py +131 -0
- camel/toolkits/openbb_toolkit.py +2 -1
- camel/toolkits/pubmed_toolkit.py +2 -0
- camel/toolkits/reddit_toolkit.py +2 -1
- camel/toolkits/retrieval_toolkit.py +2 -1
- camel/toolkits/search_toolkit.py +2 -1
- camel/toolkits/searxng_toolkit.py +207 -0
- camel/toolkits/semantic_scholar_toolkit.py +2 -0
- camel/toolkits/slack_toolkit.py +2 -0
- camel/toolkits/stripe_toolkit.py +2 -1
- camel/toolkits/sympy_toolkit.py +2 -0
- camel/toolkits/terminal_toolkit.py +2 -0
- camel/toolkits/thinking_toolkit.py +168 -12
- camel/toolkits/twitter_toolkit.py +2 -1
- camel/toolkits/video_analysis_toolkit.py +2 -1
- camel/toolkits/video_download_toolkit.py +2 -1
- camel/toolkits/weather_toolkit.py +2 -0
- camel/toolkits/whatsapp_toolkit.py +2 -1
- camel/toolkits/zapier_toolkit.py +2 -1
- camel/types/enums.py +66 -0
- camel/types/unified_model_type.py +5 -0
- camel/utils/__init__.py +2 -0
- camel/utils/chunker/code_chunker.py +9 -9
- camel/utils/commons.py +50 -30
- camel/utils/constants.py +2 -2
- camel/utils/mcp.py +79 -0
- camel/verifiers/__init__.py +2 -0
- camel/verifiers/base.py +15 -15
- camel/verifiers/math_verifier.py +182 -0
- camel/verifiers/python_verifier.py +28 -28
- {camel_ai-0.2.37.dist-info → camel_ai-0.2.39.dist-info}/METADATA +54 -4
- {camel_ai-0.2.37.dist-info → camel_ai-0.2.39.dist-info}/RECORD +122 -110
- {camel_ai-0.2.37.dist-info → camel_ai-0.2.39.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.37.dist-info → camel_ai-0.2.39.dist-info}/licenses/LICENSE +0 -0
|
@@ -19,6 +19,7 @@ from camel.datasets import BaseGenerator, DataPoint, StaticDataset
|
|
|
19
19
|
from camel.logger import get_logger
|
|
20
20
|
from camel.verifiers.base import (
|
|
21
21
|
BaseVerifier,
|
|
22
|
+
VerificationOutcome,
|
|
22
23
|
VerificationResult,
|
|
23
24
|
)
|
|
24
25
|
|
|
@@ -51,7 +52,7 @@ class SingleStepEnv:
|
|
|
51
52
|
question="Episode ended. This is just a placeholder."
|
|
52
53
|
)
|
|
53
54
|
|
|
54
|
-
ACCURACY_REWARD =
|
|
55
|
+
ACCURACY_REWARD = 1
|
|
55
56
|
|
|
56
57
|
def __init__(
|
|
57
58
|
self,
|
|
@@ -126,6 +127,7 @@ class SingleStepEnv:
|
|
|
126
127
|
await self.verifier.cleanup()
|
|
127
128
|
self._states = []
|
|
128
129
|
self._states_done = []
|
|
130
|
+
self.current_batch_size = 0
|
|
129
131
|
logger.info('Environment closed successfully')
|
|
130
132
|
except Exception as e:
|
|
131
133
|
logger.error(f'Failed to close environment: {e}')
|
|
@@ -157,6 +159,8 @@ class SingleStepEnv:
|
|
|
157
159
|
ValueError: If batch size exceeds dataset size.
|
|
158
160
|
TypeError: If the dataset is of an unsupported type.
|
|
159
161
|
"""
|
|
162
|
+
if batch_size <= 0:
|
|
163
|
+
raise ValueError("Batch size must be positive")
|
|
160
164
|
|
|
161
165
|
if not self._is_setup:
|
|
162
166
|
logger.warning(
|
|
@@ -203,35 +207,66 @@ class SingleStepEnv:
|
|
|
203
207
|
return observations[0] if batch_size == 1 else observations
|
|
204
208
|
|
|
205
209
|
elif isinstance(self.dataset, BaseGenerator):
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
210
|
+
self._states = [
|
|
211
|
+
await self.dataset.async_sample() for _ in range(batch_size)
|
|
212
|
+
]
|
|
213
|
+
self.current_batch_size = batch_size
|
|
214
|
+
self._states_done = [False] * batch_size
|
|
215
|
+
|
|
216
|
+
observations = [
|
|
217
|
+
Observation(question=sample.question, context={}, metadata={})
|
|
218
|
+
for sample in self._states
|
|
219
|
+
]
|
|
220
|
+
|
|
221
|
+
return observations[0] if batch_size == 1 else observations
|
|
209
222
|
|
|
210
223
|
else:
|
|
211
224
|
raise TypeError(f"Unsupported dataset type: {type(self.dataset)}")
|
|
212
225
|
|
|
213
226
|
async def step(
|
|
214
|
-
self, action: Union[Action, List[Action]]
|
|
215
|
-
) -> Union[
|
|
216
|
-
|
|
217
|
-
|
|
227
|
+
self, action: Union[Action, List[Action], str]
|
|
228
|
+
) -> Union[
|
|
229
|
+
Tuple[Observation, float, bool, Dict[str, Any]],
|
|
230
|
+
List[Tuple[Observation, float, bool, Dict[str, Any]]],
|
|
231
|
+
]:
|
|
232
|
+
r"""Execute one interaction step in the environment using the
|
|
233
|
+
proposed solution.
|
|
234
|
+
|
|
235
|
+
This method processes the agent's response(s) to the current
|
|
236
|
+
observation(s), verifies the correctness of the responses using
|
|
237
|
+
the verifier, computes rewards, and returns the resulting
|
|
238
|
+
state transition(s).
|
|
239
|
+
|
|
240
|
+
The environment is strictly single-step. Once an action is
|
|
241
|
+
submitted for a state, that state is marked as done, and
|
|
242
|
+
the observation will not change.
|
|
218
243
|
|
|
219
244
|
Args:
|
|
220
|
-
action
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
245
|
+
action (Union[Action, List[Action], str]):
|
|
246
|
+
The action(s) taken by the agent,
|
|
247
|
+
which should contain the response(s)
|
|
248
|
+
to the observation(s). Can be:
|
|
249
|
+
- A single `Action` object (for batch size 1),
|
|
250
|
+
- A list of `Action` objects (for batched evaluation),
|
|
251
|
+
- A raw string (only allowed when batch size is 1).
|
|
225
252
|
|
|
226
253
|
Returns:
|
|
227
|
-
Union[
|
|
228
|
-
|
|
254
|
+
Union[Tuple[Observation, float, bool, Dict[str, Any]], List[...]]:
|
|
255
|
+
A tuple or list of tuples containing:
|
|
256
|
+
- `Observation`: Placeholder indicating episode end.
|
|
257
|
+
- `float`: The reward for the response.
|
|
258
|
+
- `bool`: Whether the episode is done
|
|
259
|
+
(always `True` in this case).
|
|
260
|
+
- `dict`: Additional info including the proposed solution,
|
|
261
|
+
verification result, and original data point.
|
|
229
262
|
|
|
230
263
|
Raises:
|
|
231
|
-
RuntimeError: If environment
|
|
232
|
-
|
|
233
|
-
|
|
264
|
+
RuntimeError: If the environment has not been set up,
|
|
265
|
+
or if `reset()` has not been called.
|
|
266
|
+
ValueError: If invalid action format, duplicate indices,
|
|
267
|
+
or out-of-bounds indices are detected.
|
|
234
268
|
"""
|
|
269
|
+
|
|
235
270
|
if not self._is_setup:
|
|
236
271
|
raise RuntimeError("Environment not set up. Call setup() first.")
|
|
237
272
|
if self._batch_done():
|
|
@@ -241,12 +276,10 @@ class SingleStepEnv:
|
|
|
241
276
|
if not self._states:
|
|
242
277
|
raise RuntimeError("No current observation. Call reset() first.")
|
|
243
278
|
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
indices = [
|
|
279
|
+
actions = self._normalize_actions(action)
|
|
280
|
+
|
|
281
|
+
indices = [a.index for a in actions]
|
|
247
282
|
|
|
248
|
-
if len(set(indices)) != len(indices):
|
|
249
|
-
raise ValueError("Duplicate state indices in actions.")
|
|
250
283
|
for idx in indices:
|
|
251
284
|
if idx < 0 or idx >= len(self._states):
|
|
252
285
|
raise ValueError(f"Invalid state index {idx}.")
|
|
@@ -254,7 +287,6 @@ class SingleStepEnv:
|
|
|
254
287
|
raise ValueError(f"State at index {idx} is already finished.")
|
|
255
288
|
|
|
256
289
|
num_actions = len(actions)
|
|
257
|
-
|
|
258
290
|
if self.current_batch_size % num_actions != 0:
|
|
259
291
|
logger.warning(
|
|
260
292
|
f"Number of actions ({num_actions}) is not a divisor of "
|
|
@@ -262,25 +294,34 @@ class SingleStepEnv:
|
|
|
262
294
|
)
|
|
263
295
|
|
|
264
296
|
proposed_solutions = [act.llm_response for act in actions]
|
|
265
|
-
ground_truths: List[str] = [
|
|
266
|
-
|
|
267
|
-
|
|
297
|
+
ground_truths: List[str] = []
|
|
298
|
+
for idx in indices:
|
|
299
|
+
ground_truths.append(self._states[idx].final_answer)
|
|
268
300
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
301
|
+
try:
|
|
302
|
+
verification_results = await self.verifier.verify_batch(
|
|
303
|
+
solutions=proposed_solutions,
|
|
304
|
+
reference_answers=ground_truths, # type: ignore [arg-type]
|
|
305
|
+
raise_on_error=True,
|
|
306
|
+
)
|
|
307
|
+
except Exception as e:
|
|
308
|
+
logger.error(f"Verification failed: {e}")
|
|
309
|
+
# Return failed verification results with status=FAILURE
|
|
310
|
+
verification_results = [
|
|
311
|
+
VerificationResult(
|
|
312
|
+
result="",
|
|
313
|
+
status=VerificationOutcome.FAILURE,
|
|
314
|
+
error_message=f"Verification error: {e}",
|
|
315
|
+
)
|
|
316
|
+
for _ in range(len(proposed_solutions))
|
|
317
|
+
]
|
|
274
318
|
|
|
275
319
|
total_rewards, rewards_dicts = await self._compute_reward_batch(
|
|
276
320
|
proposed_solutions, verification_results
|
|
277
321
|
)
|
|
278
|
-
|
|
279
|
-
step_results = [
|
|
280
|
-
|
|
281
|
-
for i, action in enumerate(actions):
|
|
282
|
-
idx = action.index
|
|
283
|
-
step_result = StepResult(
|
|
322
|
+
# Create and return step results in batch
|
|
323
|
+
step_results = [
|
|
324
|
+
StepResult(
|
|
284
325
|
observation=self.PLACEHOLDER_OBS,
|
|
285
326
|
reward=total_rewards[i],
|
|
286
327
|
rewards_dict=rewards_dicts[i],
|
|
@@ -288,14 +329,96 @@ class SingleStepEnv:
|
|
|
288
329
|
info={
|
|
289
330
|
"proposed_solution": proposed_solutions[i],
|
|
290
331
|
"verification_result": verification_results[i],
|
|
291
|
-
"state": self._states[
|
|
332
|
+
"state": self._states[indices[i]],
|
|
292
333
|
},
|
|
293
|
-
)
|
|
294
|
-
|
|
334
|
+
).as_tuple()
|
|
335
|
+
for i in range(len(actions))
|
|
336
|
+
]
|
|
337
|
+
for _, idx in enumerate(indices):
|
|
295
338
|
self._states_done[idx] = True
|
|
296
339
|
|
|
297
340
|
return step_results[0] if len(step_results) == 1 else step_results
|
|
298
341
|
|
|
342
|
+
def _normalize_actions(
|
|
343
|
+
self, action: Union[Action, List[Action], str]
|
|
344
|
+
) -> List[Action]:
|
|
345
|
+
r"""Normalize the user-provided action(s) into a validated list
|
|
346
|
+
of `Action` objects.
|
|
347
|
+
|
|
348
|
+
This method handles flexibility in input format by converting
|
|
349
|
+
raw strings (only allowed when batch size is 1) and ensuring
|
|
350
|
+
all necessary structure and integrity checks on actions
|
|
351
|
+
(e.g., index bounds, duplicates).
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
action (Union[Action, List[Action], str]):
|
|
355
|
+
The raw input action(s) provided by the agent. Can be:
|
|
356
|
+
- A single `Action` object.
|
|
357
|
+
- A list of `Action` objects.
|
|
358
|
+
- A raw string (if `batch_size == 1`), auto-wrapped
|
|
359
|
+
in an `Action`.
|
|
360
|
+
|
|
361
|
+
Returns:
|
|
362
|
+
List[Action]: A list of validated `Action` instances
|
|
363
|
+
ready for evaluation.
|
|
364
|
+
|
|
365
|
+
Raises:
|
|
366
|
+
ValueError: If:
|
|
367
|
+
- Action indices are invalid or duplicated,
|
|
368
|
+
- Action list is empty,
|
|
369
|
+
- Index mismatches expected values
|
|
370
|
+
(e.g., 0 for batch size 1),
|
|
371
|
+
- Wrong structure is used
|
|
372
|
+
(e.g., string used with batch size > 1).
|
|
373
|
+
TypeError: If the action is of an unsupported type.
|
|
374
|
+
"""
|
|
375
|
+
|
|
376
|
+
if isinstance(action, str):
|
|
377
|
+
if self.current_batch_size != 1:
|
|
378
|
+
raise ValueError(
|
|
379
|
+
"String input for action is only allowed"
|
|
380
|
+
" when batch_size == 1"
|
|
381
|
+
)
|
|
382
|
+
logger.warning("Auto-converting from str to Action", stacklevel=2)
|
|
383
|
+
action = Action(index=0, llm_response=action)
|
|
384
|
+
|
|
385
|
+
if isinstance(action, Action):
|
|
386
|
+
actions = [action]
|
|
387
|
+
elif isinstance(action, list):
|
|
388
|
+
if not action:
|
|
389
|
+
raise ValueError("Action list cannot be empty")
|
|
390
|
+
if not all(isinstance(a, Action) for a in action):
|
|
391
|
+
raise ValueError(
|
|
392
|
+
"All elements in the list must be Action objects"
|
|
393
|
+
)
|
|
394
|
+
actions = action
|
|
395
|
+
else:
|
|
396
|
+
raise TypeError("Action must be a str, Action, or list of Actions")
|
|
397
|
+
|
|
398
|
+
if self.current_batch_size == 1 and len(actions) != 1:
|
|
399
|
+
raise ValueError(
|
|
400
|
+
"For batch_size=1, expect a single Action or a "
|
|
401
|
+
"list containing exactly one Action"
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
# Validate indices
|
|
405
|
+
for a in actions:
|
|
406
|
+
if not isinstance(a.index, int):
|
|
407
|
+
raise ValueError(
|
|
408
|
+
f"Action index must be an integer, got {a.index}"
|
|
409
|
+
)
|
|
410
|
+
if self.current_batch_size == 1:
|
|
411
|
+
if a.index != 0:
|
|
412
|
+
raise ValueError(
|
|
413
|
+
"For batch_size=1, Action index must be 0"
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
indices = [a.index for a in actions]
|
|
417
|
+
if len(set(indices)) != len(indices):
|
|
418
|
+
raise ValueError("Duplicate state indices in actions.")
|
|
419
|
+
|
|
420
|
+
return actions
|
|
421
|
+
|
|
299
422
|
async def _compute_reward_batch(
|
|
300
423
|
self,
|
|
301
424
|
proposed_solutions: List[str],
|
|
@@ -315,6 +438,12 @@ class SingleStepEnv:
|
|
|
315
438
|
- List of total rewards for each solution.
|
|
316
439
|
- List of reward component dictionaries for each solution.
|
|
317
440
|
"""
|
|
441
|
+
if len(proposed_solutions) != len(verification_results):
|
|
442
|
+
raise ValueError(
|
|
443
|
+
f"Length mismatch: {len(proposed_solutions)} solutions vs "
|
|
444
|
+
f"{len(verification_results)} verification results"
|
|
445
|
+
)
|
|
446
|
+
|
|
318
447
|
total_rewards = []
|
|
319
448
|
rewards_dicts = []
|
|
320
449
|
|
|
@@ -355,9 +484,20 @@ class SingleStepEnv:
|
|
|
355
484
|
return {}
|
|
356
485
|
|
|
357
486
|
def _batch_done(self) -> bool:
|
|
487
|
+
r"""Check if all states in the current batch are done.
|
|
488
|
+
|
|
489
|
+
Returns:
|
|
490
|
+
bool: True if all states are marked as done, False otherwise.
|
|
491
|
+
"""
|
|
358
492
|
return all(self._states_done)
|
|
359
493
|
|
|
360
494
|
def _batch_started(self) -> bool:
|
|
495
|
+
r"""Check if the batch processing has started.
|
|
496
|
+
|
|
497
|
+
Returns:
|
|
498
|
+
bool: True if at least one state is marked as done, False
|
|
499
|
+
otherwise.
|
|
500
|
+
"""
|
|
361
501
|
return any(self._states_done)
|
|
362
502
|
|
|
363
503
|
@property
|
|
@@ -185,7 +185,7 @@ class DockerInterpreter(BaseInterpreter):
|
|
|
185
185
|
code: str,
|
|
186
186
|
code_type: str,
|
|
187
187
|
) -> str:
|
|
188
|
-
r"""Executes the given code in the
|
|
188
|
+
r"""Executes the given code in the container attached to the
|
|
189
189
|
interpreter, and captures the stdout and stderr streams.
|
|
190
190
|
|
|
191
191
|
Args:
|
|
@@ -210,7 +210,7 @@ class DockerInterpreter(BaseInterpreter):
|
|
|
210
210
|
if self.require_confirm:
|
|
211
211
|
logger.info(
|
|
212
212
|
f"The following {code_type} code will run on your "
|
|
213
|
-
"computer: {code}"
|
|
213
|
+
f"computer: {code}"
|
|
214
214
|
)
|
|
215
215
|
while True:
|
|
216
216
|
choice = input("Running code? [Y/n]:").lower()
|
|
@@ -99,7 +99,7 @@ class E2BInterpreter(BaseInterpreter):
|
|
|
99
99
|
if self.require_confirm:
|
|
100
100
|
logger.info(
|
|
101
101
|
f"The following {code_type} code will run on your "
|
|
102
|
-
"e2b sandbox: {code}"
|
|
102
|
+
f"e2b sandbox: {code}"
|
|
103
103
|
)
|
|
104
104
|
while True:
|
|
105
105
|
choice = input("Running code? [Y/n]:").lower()
|
|
@@ -421,7 +421,7 @@ class InternalPythonInterpreter(BaseInterpreter):
|
|
|
421
421
|
result = None
|
|
422
422
|
if not isinstance(if_statement.test, ast.Compare):
|
|
423
423
|
raise InterpreterError(
|
|
424
|
-
"Only
|
|
424
|
+
"Only Compare expr supported in if statement, get"
|
|
425
425
|
f" {if_statement.test.__class__.__name__}"
|
|
426
426
|
)
|
|
427
427
|
if self._execute_condition(if_statement.test):
|
|
@@ -292,7 +292,7 @@ class SubprocessInterpreter(BaseInterpreter):
|
|
|
292
292
|
if self.require_confirm:
|
|
293
293
|
logger.info(
|
|
294
294
|
f"The following {code_type} code will run on your "
|
|
295
|
-
"computer: {code}"
|
|
295
|
+
f"computer: {code}"
|
|
296
296
|
)
|
|
297
297
|
while True:
|
|
298
298
|
choice = input("Running code? [Y/n]:").lower().strip()
|
camel/loaders/__init__.py
CHANGED
|
@@ -18,7 +18,7 @@ from .chunkr_reader import ChunkrReader
|
|
|
18
18
|
from .firecrawl_reader import Firecrawl
|
|
19
19
|
from .jina_url_reader import JinaURLReader
|
|
20
20
|
from .mineru_extractor import MinerU
|
|
21
|
-
from .
|
|
21
|
+
from .pandas_reader import PandasReader
|
|
22
22
|
from .unstructured_io import UnstructuredIO
|
|
23
23
|
|
|
24
24
|
__all__ = [
|
|
@@ -30,6 +30,6 @@ __all__ = [
|
|
|
30
30
|
'Firecrawl',
|
|
31
31
|
'Apify',
|
|
32
32
|
'ChunkrReader',
|
|
33
|
-
'
|
|
33
|
+
'PandasReader',
|
|
34
34
|
'MinerU',
|
|
35
35
|
]
|
|
@@ -11,13 +11,10 @@
|
|
|
11
11
|
# See the License for the specific language governing permissions and
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
-
import os
|
|
15
14
|
from functools import wraps
|
|
16
15
|
from pathlib import Path
|
|
17
16
|
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
|
|
18
17
|
|
|
19
|
-
import pandas as pd
|
|
20
|
-
|
|
21
18
|
if TYPE_CHECKING:
|
|
22
19
|
from pandas import DataFrame
|
|
23
20
|
from pandasai import SmartDataframe
|
|
@@ -50,25 +47,18 @@ def check_suffix(valid_suffixs: List[str]) -> Callable:
|
|
|
50
47
|
return decorator
|
|
51
48
|
|
|
52
49
|
|
|
53
|
-
class
|
|
50
|
+
class PandasReader:
|
|
54
51
|
def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
|
|
55
|
-
r"""Initializes the
|
|
52
|
+
r"""Initializes the PandasReader class.
|
|
56
53
|
|
|
57
54
|
Args:
|
|
58
55
|
config (Optional[Dict[str, Any]], optional): The configuration
|
|
59
56
|
dictionary that can include LLM API settings for LLM-based
|
|
60
|
-
processing. If not provided,
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
the config dictionary. (default: :obj:`None`)
|
|
57
|
+
processing. If not provided, no LLM will be configured by
|
|
58
|
+
default. You can customize the LLM configuration by providing
|
|
59
|
+
a 'llm' key in the config dictionary. (default: :obj:`None`)
|
|
64
60
|
"""
|
|
65
|
-
from pandasai.llm import OpenAI # type: ignore[import-untyped]
|
|
66
|
-
|
|
67
61
|
self.config = config or {}
|
|
68
|
-
if "llm" not in self.config:
|
|
69
|
-
self.config["llm"] = OpenAI(
|
|
70
|
-
api_token=os.getenv("OPENAI_API_KEY"),
|
|
71
|
-
)
|
|
72
62
|
|
|
73
63
|
self.__LOADER = {
|
|
74
64
|
".csv": self.read_csv,
|
|
@@ -91,8 +81,13 @@ class PandaReader:
|
|
|
91
81
|
data: Union["DataFrame", str],
|
|
92
82
|
*args: Any,
|
|
93
83
|
**kwargs: Dict[str, Any],
|
|
94
|
-
) -> "SmartDataframe":
|
|
95
|
-
r"""Loads a file or DataFrame and returns a
|
|
84
|
+
) -> Union["DataFrame", "SmartDataframe"]:
|
|
85
|
+
r"""Loads a file or DataFrame and returns a DataFrame or
|
|
86
|
+
SmartDataframe object.
|
|
87
|
+
|
|
88
|
+
If an LLM is configured in the config dictionary, a SmartDataframe
|
|
89
|
+
will be returned, otherwise a regular pandas DataFrame will be
|
|
90
|
+
returned.
|
|
96
91
|
|
|
97
92
|
args:
|
|
98
93
|
data (Union[DataFrame, str]): The data to load.
|
|
@@ -100,24 +95,32 @@ class PandaReader:
|
|
|
100
95
|
**kwargs (Dict[str, Any]): Additional keyword arguments.
|
|
101
96
|
|
|
102
97
|
Returns:
|
|
103
|
-
SmartDataframe: The SmartDataframe
|
|
98
|
+
Union[DataFrame, SmartDataframe]: The DataFrame or SmartDataframe
|
|
99
|
+
object.
|
|
104
100
|
"""
|
|
105
101
|
from pandas import DataFrame
|
|
106
|
-
from pandasai import SmartDataframe
|
|
107
102
|
|
|
103
|
+
# Load the data into a pandas DataFrame
|
|
108
104
|
if isinstance(data, DataFrame):
|
|
109
|
-
|
|
110
|
-
file_path = str(data)
|
|
111
|
-
path = Path(file_path)
|
|
112
|
-
if not file_path.startswith("http") and not path.exists():
|
|
113
|
-
raise FileNotFoundError(f"File {file_path} not found")
|
|
114
|
-
if path.suffix in self.__LOADER:
|
|
115
|
-
return SmartDataframe(
|
|
116
|
-
self.__LOADER[path.suffix](file_path, *args, **kwargs), # type: ignore[operator]
|
|
117
|
-
config=self.config,
|
|
118
|
-
)
|
|
105
|
+
df = data
|
|
119
106
|
else:
|
|
120
|
-
|
|
107
|
+
file_path = str(data)
|
|
108
|
+
path = Path(file_path)
|
|
109
|
+
if not file_path.startswith("http") and not path.exists():
|
|
110
|
+
raise FileNotFoundError(f"File {file_path} not found")
|
|
111
|
+
if path.suffix in self.__LOADER:
|
|
112
|
+
df = self.__LOADER[path.suffix](file_path, *args, **kwargs) # type: ignore[operator]
|
|
113
|
+
else:
|
|
114
|
+
raise ValueError(f"Unsupported file format: {path.suffix}")
|
|
115
|
+
|
|
116
|
+
# If an LLM is configured, return a SmartDataframe, otherwise return a
|
|
117
|
+
# regular DataFrame
|
|
118
|
+
if "llm" in self.config:
|
|
119
|
+
from pandasai import SmartDataframe
|
|
120
|
+
|
|
121
|
+
return SmartDataframe(df, config=self.config)
|
|
122
|
+
else:
|
|
123
|
+
return df
|
|
121
124
|
|
|
122
125
|
@check_suffix([".csv"])
|
|
123
126
|
def read_csv(
|
|
@@ -133,6 +136,8 @@ class PandaReader:
|
|
|
133
136
|
Returns:
|
|
134
137
|
DataFrame: The DataFrame object.
|
|
135
138
|
"""
|
|
139
|
+
import pandas as pd
|
|
140
|
+
|
|
136
141
|
return pd.read_csv(file_path, *args, **kwargs)
|
|
137
142
|
|
|
138
143
|
@check_suffix([".xlsx", ".xls"])
|
|
@@ -149,6 +154,8 @@ class PandaReader:
|
|
|
149
154
|
Returns:
|
|
150
155
|
DataFrame: The DataFrame object.
|
|
151
156
|
"""
|
|
157
|
+
import pandas as pd
|
|
158
|
+
|
|
152
159
|
return pd.read_excel(file_path, *args, **kwargs)
|
|
153
160
|
|
|
154
161
|
@check_suffix([".json"])
|
|
@@ -165,6 +172,8 @@ class PandaReader:
|
|
|
165
172
|
Returns:
|
|
166
173
|
DataFrame: The DataFrame object.
|
|
167
174
|
"""
|
|
175
|
+
import pandas as pd
|
|
176
|
+
|
|
168
177
|
return pd.read_json(file_path, *args, **kwargs)
|
|
169
178
|
|
|
170
179
|
@check_suffix([".parquet"])
|
|
@@ -181,6 +190,8 @@ class PandaReader:
|
|
|
181
190
|
Returns:
|
|
182
191
|
DataFrame: The DataFrame object.
|
|
183
192
|
"""
|
|
193
|
+
import pandas as pd
|
|
194
|
+
|
|
184
195
|
return pd.read_parquet(file_path, *args, **kwargs)
|
|
185
196
|
|
|
186
197
|
def read_sql(self, *args: Any, **kwargs: Dict[str, Any]) -> "DataFrame":
|
|
@@ -193,6 +204,8 @@ class PandaReader:
|
|
|
193
204
|
Returns:
|
|
194
205
|
DataFrame: The DataFrame object.
|
|
195
206
|
"""
|
|
207
|
+
import pandas as pd
|
|
208
|
+
|
|
196
209
|
return pd.read_sql(*args, **kwargs)
|
|
197
210
|
|
|
198
211
|
def read_table(
|
|
@@ -208,6 +221,8 @@ class PandaReader:
|
|
|
208
221
|
Returns:
|
|
209
222
|
DataFrame: The DataFrame object.
|
|
210
223
|
"""
|
|
224
|
+
import pandas as pd
|
|
225
|
+
|
|
211
226
|
return pd.read_table(file_path, *args, **kwargs)
|
|
212
227
|
|
|
213
228
|
def read_clipboard(
|
|
@@ -222,6 +237,8 @@ class PandaReader:
|
|
|
222
237
|
Returns:
|
|
223
238
|
DataFrame: The DataFrame object.
|
|
224
239
|
"""
|
|
240
|
+
import pandas as pd
|
|
241
|
+
|
|
225
242
|
return pd.read_clipboard(*args, **kwargs)
|
|
226
243
|
|
|
227
244
|
@check_suffix([".html"])
|
|
@@ -238,6 +255,8 @@ class PandaReader:
|
|
|
238
255
|
Returns:
|
|
239
256
|
DataFrame: The DataFrame object.
|
|
240
257
|
"""
|
|
258
|
+
import pandas as pd
|
|
259
|
+
|
|
241
260
|
return pd.read_html(file_path, *args, **kwargs)
|
|
242
261
|
|
|
243
262
|
@check_suffix([".feather"])
|
|
@@ -254,6 +273,8 @@ class PandaReader:
|
|
|
254
273
|
Returns:
|
|
255
274
|
DataFrame: The DataFrame object.
|
|
256
275
|
"""
|
|
276
|
+
import pandas as pd
|
|
277
|
+
|
|
257
278
|
return pd.read_feather(file_path, *args, **kwargs)
|
|
258
279
|
|
|
259
280
|
@check_suffix([".dta"])
|
|
@@ -270,6 +291,8 @@ class PandaReader:
|
|
|
270
291
|
Returns:
|
|
271
292
|
DataFrame: The DataFrame object.
|
|
272
293
|
"""
|
|
294
|
+
import pandas as pd
|
|
295
|
+
|
|
273
296
|
return pd.read_stata(file_path, *args, **kwargs)
|
|
274
297
|
|
|
275
298
|
@check_suffix([".sas"])
|
|
@@ -286,6 +309,8 @@ class PandaReader:
|
|
|
286
309
|
Returns:
|
|
287
310
|
DataFrame: The DataFrame object.
|
|
288
311
|
"""
|
|
312
|
+
import pandas as pd
|
|
313
|
+
|
|
289
314
|
return pd.read_sas(file_path, *args, **kwargs)
|
|
290
315
|
|
|
291
316
|
@check_suffix([".pkl"])
|
|
@@ -302,6 +327,8 @@ class PandaReader:
|
|
|
302
327
|
Returns:
|
|
303
328
|
DataFrame: The DataFrame object.
|
|
304
329
|
"""
|
|
330
|
+
import pandas as pd
|
|
331
|
+
|
|
305
332
|
return pd.read_pickle(file_path, *args, **kwargs)
|
|
306
333
|
|
|
307
334
|
@check_suffix([".h5"])
|
|
@@ -318,6 +345,8 @@ class PandaReader:
|
|
|
318
345
|
Returns:
|
|
319
346
|
DataFrame: The DataFrame object.
|
|
320
347
|
"""
|
|
348
|
+
import pandas as pd
|
|
349
|
+
|
|
321
350
|
return pd.read_hdf(file_path, *args, **kwargs)
|
|
322
351
|
|
|
323
352
|
@check_suffix([".orc"])
|
|
@@ -334,4 +363,6 @@ class PandaReader:
|
|
|
334
363
|
Returns:
|
|
335
364
|
DataFrame: The DataFrame object.
|
|
336
365
|
"""
|
|
366
|
+
import pandas as pd
|
|
367
|
+
|
|
337
368
|
return pd.read_orc(file_path, *args, **kwargs)
|
camel/loaders/unstructured_io.py
CHANGED
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
# See the License for the specific language governing permissions and
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
import traceback
|
|
14
15
|
import uuid
|
|
15
16
|
import warnings
|
|
16
17
|
from typing import (
|
|
@@ -151,7 +152,7 @@ class UnstructuredIO:
|
|
|
151
152
|
elements = partition(file=f, **kwargs)
|
|
152
153
|
return elements
|
|
153
154
|
except Exception:
|
|
154
|
-
warnings.warn(
|
|
155
|
+
warnings.warn(traceback.format_exc())
|
|
155
156
|
return None
|
|
156
157
|
|
|
157
158
|
@staticmethod
|
|
@@ -38,7 +38,7 @@ class ChatHistoryBlock(MemoryBlock):
|
|
|
38
38
|
keep_rate (float, optional): In historical messages, the score of the
|
|
39
39
|
last message is 1.0, and with each step taken backward, the score
|
|
40
40
|
of the message is multiplied by the `keep_rate`. Higher `keep_rate`
|
|
41
|
-
leads to high
|
|
41
|
+
leads to high possibility to keep history messages during context
|
|
42
42
|
creation.
|
|
43
43
|
"""
|
|
44
44
|
|