camel-ai 0.2.37__py3-none-any.whl → 0.2.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (122) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/chat_agent.py +4 -0
  3. camel/agents/repo_agent.py +2 -2
  4. camel/benchmarks/apibank.py +1 -1
  5. camel/benchmarks/apibench.py +1 -1
  6. camel/configs/__init__.py +3 -0
  7. camel/configs/modelscope_config.py +59 -0
  8. camel/datagen/evol_instruct/__init__.py +20 -0
  9. camel/datagen/evol_instruct/evol_instruct.py +424 -0
  10. camel/datagen/evol_instruct/scorer.py +166 -0
  11. camel/datagen/evol_instruct/templates.py +268 -0
  12. camel/datagen/self_improving_cot.py +1 -1
  13. camel/datasets/__init__.py +2 -0
  14. camel/datasets/base_generator.py +22 -9
  15. camel/datasets/few_shot_generator.py +2 -3
  16. camel/datasets/self_instruct_generator.py +415 -0
  17. camel/embeddings/openai_compatible_embedding.py +13 -5
  18. camel/environments/models.py +10 -4
  19. camel/environments/single_step.py +181 -41
  20. camel/interpreters/docker_interpreter.py +2 -2
  21. camel/interpreters/e2b_interpreter.py +1 -1
  22. camel/interpreters/internal_python_interpreter.py +1 -1
  23. camel/interpreters/subprocess_interpreter.py +1 -1
  24. camel/loaders/__init__.py +2 -2
  25. camel/loaders/{panda_reader.py → pandas_reader.py} +61 -30
  26. camel/loaders/unstructured_io.py +2 -1
  27. camel/memories/blocks/chat_history_block.py +1 -1
  28. camel/memories/context_creators/score_based.py +198 -67
  29. camel/models/__init__.py +2 -0
  30. camel/models/aiml_model.py +9 -3
  31. camel/models/anthropic_model.py +11 -3
  32. camel/models/azure_openai_model.py +9 -3
  33. camel/models/base_audio_model.py +6 -0
  34. camel/models/base_model.py +4 -0
  35. camel/models/deepseek_model.py +9 -3
  36. camel/models/gemini_model.py +9 -3
  37. camel/models/groq_model.py +9 -3
  38. camel/models/internlm_model.py +8 -2
  39. camel/models/model_factory.py +123 -0
  40. camel/models/modelscope_model.py +208 -0
  41. camel/models/moonshot_model.py +8 -2
  42. camel/models/nemotron_model.py +9 -3
  43. camel/models/nvidia_model.py +9 -3
  44. camel/models/ollama_model.py +9 -3
  45. camel/models/openai_audio_models.py +7 -5
  46. camel/models/openai_compatible_model.py +9 -3
  47. camel/models/openai_model.py +58 -5
  48. camel/models/openrouter_model.py +9 -3
  49. camel/models/qwen_model.py +9 -3
  50. camel/models/samba_model.py +9 -3
  51. camel/models/sglang_model.py +11 -4
  52. camel/models/siliconflow_model.py +8 -2
  53. camel/models/stub_model.py +2 -1
  54. camel/models/togetherai_model.py +11 -5
  55. camel/models/vllm_model.py +10 -4
  56. camel/models/yi_model.py +9 -3
  57. camel/models/zhipuai_model.py +11 -5
  58. camel/retrievers/auto_retriever.py +14 -0
  59. camel/retrievers/vector_retriever.py +1 -1
  60. camel/storages/__init__.py +2 -0
  61. camel/storages/graph_storages/neo4j_graph.py +1 -1
  62. camel/storages/vectordb_storages/__init__.py +2 -0
  63. camel/storages/vectordb_storages/base.py +2 -2
  64. camel/storages/vectordb_storages/milvus.py +2 -2
  65. camel/storages/vectordb_storages/qdrant.py +2 -2
  66. camel/storages/vectordb_storages/tidb.py +332 -0
  67. camel/tasks/task.py +2 -2
  68. camel/toolkits/__init__.py +9 -1
  69. camel/toolkits/arxiv_toolkit.py +2 -1
  70. camel/toolkits/ask_news_toolkit.py +11 -3
  71. camel/toolkits/audio_analysis_toolkit.py +2 -0
  72. camel/toolkits/base.py +3 -0
  73. camel/toolkits/browser_toolkit.py +84 -61
  74. camel/toolkits/code_execution.py +3 -1
  75. camel/toolkits/dappier_toolkit.py +2 -1
  76. camel/toolkits/data_commons_toolkit.py +2 -0
  77. camel/toolkits/excel_toolkit.py +2 -0
  78. camel/toolkits/file_write_toolkit.py +2 -0
  79. camel/toolkits/github_toolkit.py +6 -4
  80. camel/toolkits/google_scholar_toolkit.py +2 -0
  81. camel/toolkits/human_toolkit.py +17 -1
  82. camel/toolkits/image_analysis_toolkit.py +2 -0
  83. camel/toolkits/linkedin_toolkit.py +2 -1
  84. camel/toolkits/math_toolkit.py +2 -0
  85. camel/toolkits/mcp_toolkit.py +42 -52
  86. camel/toolkits/meshy_toolkit.py +20 -2
  87. camel/toolkits/networkx_toolkit.py +2 -0
  88. camel/toolkits/notion_toolkit.py +7 -0
  89. camel/toolkits/openai_agent_toolkit.py +131 -0
  90. camel/toolkits/openbb_toolkit.py +2 -1
  91. camel/toolkits/pubmed_toolkit.py +2 -0
  92. camel/toolkits/reddit_toolkit.py +2 -1
  93. camel/toolkits/retrieval_toolkit.py +2 -1
  94. camel/toolkits/search_toolkit.py +2 -1
  95. camel/toolkits/searxng_toolkit.py +207 -0
  96. camel/toolkits/semantic_scholar_toolkit.py +2 -0
  97. camel/toolkits/slack_toolkit.py +2 -0
  98. camel/toolkits/stripe_toolkit.py +2 -1
  99. camel/toolkits/sympy_toolkit.py +2 -0
  100. camel/toolkits/terminal_toolkit.py +2 -0
  101. camel/toolkits/thinking_toolkit.py +168 -12
  102. camel/toolkits/twitter_toolkit.py +2 -1
  103. camel/toolkits/video_analysis_toolkit.py +2 -1
  104. camel/toolkits/video_download_toolkit.py +2 -1
  105. camel/toolkits/weather_toolkit.py +2 -0
  106. camel/toolkits/whatsapp_toolkit.py +2 -1
  107. camel/toolkits/zapier_toolkit.py +2 -1
  108. camel/types/enums.py +66 -0
  109. camel/types/unified_model_type.py +5 -0
  110. camel/utils/__init__.py +2 -0
  111. camel/utils/chunker/code_chunker.py +9 -9
  112. camel/utils/commons.py +50 -30
  113. camel/utils/constants.py +2 -2
  114. camel/utils/mcp.py +79 -0
  115. camel/verifiers/__init__.py +2 -0
  116. camel/verifiers/base.py +15 -15
  117. camel/verifiers/math_verifier.py +182 -0
  118. camel/verifiers/python_verifier.py +28 -28
  119. {camel_ai-0.2.37.dist-info → camel_ai-0.2.39.dist-info}/METADATA +54 -4
  120. {camel_ai-0.2.37.dist-info → camel_ai-0.2.39.dist-info}/RECORD +122 -110
  121. {camel_ai-0.2.37.dist-info → camel_ai-0.2.39.dist-info}/WHEEL +0 -0
  122. {camel_ai-0.2.37.dist-info → camel_ai-0.2.39.dist-info}/licenses/LICENSE +0 -0
@@ -19,6 +19,7 @@ from camel.datasets import BaseGenerator, DataPoint, StaticDataset
19
19
  from camel.logger import get_logger
20
20
  from camel.verifiers.base import (
21
21
  BaseVerifier,
22
+ VerificationOutcome,
22
23
  VerificationResult,
23
24
  )
24
25
 
@@ -51,7 +52,7 @@ class SingleStepEnv:
51
52
  question="Episode ended. This is just a placeholder."
52
53
  )
53
54
 
54
- ACCURACY_REWARD = 10
55
+ ACCURACY_REWARD = 1
55
56
 
56
57
  def __init__(
57
58
  self,
@@ -126,6 +127,7 @@ class SingleStepEnv:
126
127
  await self.verifier.cleanup()
127
128
  self._states = []
128
129
  self._states_done = []
130
+ self.current_batch_size = 0
129
131
  logger.info('Environment closed successfully')
130
132
  except Exception as e:
131
133
  logger.error(f'Failed to close environment: {e}')
@@ -157,6 +159,8 @@ class SingleStepEnv:
157
159
  ValueError: If batch size exceeds dataset size.
158
160
  TypeError: If the dataset is of an unsupported type.
159
161
  """
162
+ if batch_size <= 0:
163
+ raise ValueError("Batch size must be positive")
160
164
 
161
165
  if not self._is_setup:
162
166
  logger.warning(
@@ -203,35 +207,66 @@ class SingleStepEnv:
203
207
  return observations[0] if batch_size == 1 else observations
204
208
 
205
209
  elif isinstance(self.dataset, BaseGenerator):
206
- raise NotImplementedError(
207
- "Reset not yet implemented for BaseGenerator datasets."
208
- )
210
+ self._states = [
211
+ await self.dataset.async_sample() for _ in range(batch_size)
212
+ ]
213
+ self.current_batch_size = batch_size
214
+ self._states_done = [False] * batch_size
215
+
216
+ observations = [
217
+ Observation(question=sample.question, context={}, metadata={})
218
+ for sample in self._states
219
+ ]
220
+
221
+ return observations[0] if batch_size == 1 else observations
209
222
 
210
223
  else:
211
224
  raise TypeError(f"Unsupported dataset type: {type(self.dataset)}")
212
225
 
213
226
  async def step(
214
- self, action: Union[Action, List[Action]]
215
- ) -> Union[StepResult, List[StepResult]]:
216
- r"""Process actions for a subset of states and update their
217
- finished status.
227
+ self, action: Union[Action, List[Action], str]
228
+ ) -> Union[
229
+ Tuple[Observation, float, bool, Dict[str, Any]],
230
+ List[Tuple[Observation, float, bool, Dict[str, Any]]],
231
+ ]:
232
+ r"""Execute one interaction step in the environment using the
233
+ proposed solution.
234
+
235
+ This method processes the agent's response(s) to the current
236
+ observation(s), verifies the correctness of the responses using
237
+ the verifier, computes rewards, and returns the resulting
238
+ state transition(s).
239
+
240
+ The environment is strictly single-step. Once an action is
241
+ submitted for a state, that state is marked as done, and
242
+ the observation will not change.
218
243
 
219
244
  Args:
220
- action: Single action or list of actions, where each action
221
- contains an index indicating which state it corresponds to.
222
- The index must be a valid position in the internal _states list
223
- that was populated during the reset() call.
224
-
245
+ action (Union[Action, List[Action], str]):
246
+ The action(s) taken by the agent,
247
+ which should contain the response(s)
248
+ to the observation(s). Can be:
249
+ - A single `Action` object (for batch size 1),
250
+ - A list of `Action` objects (for batched evaluation),
251
+ - A raw string (only allowed when batch size is 1).
225
252
 
226
253
  Returns:
227
- Union[StepResult, List[StepResult]]: StepResult or list of
228
- StepResults for the processed states.
254
+ Union[Tuple[Observation, float, bool, Dict[str, Any]], List[...]]:
255
+ A tuple or list of tuples containing:
256
+ - `Observation`: Placeholder indicating episode end.
257
+ - `float`: The reward for the response.
258
+ - `bool`: Whether the episode is done
259
+ (always `True` in this case).
260
+ - `dict`: Additional info including the proposed solution,
261
+ verification result, and original data point.
229
262
 
230
263
  Raises:
231
- RuntimeError: If environment isn't set up or episode has ended.
232
- ValueError: If indices are invalid, duplicate, or correspond to
233
- finished states.
264
+ RuntimeError: If the environment has not been set up,
265
+ or if `reset()` has not been called.
266
+ ValueError: If invalid action format, duplicate indices,
267
+ or out-of-bounds indices are detected.
234
268
  """
269
+
235
270
  if not self._is_setup:
236
271
  raise RuntimeError("Environment not set up. Call setup() first.")
237
272
  if self._batch_done():
@@ -241,12 +276,10 @@ class SingleStepEnv:
241
276
  if not self._states:
242
277
  raise RuntimeError("No current observation. Call reset() first.")
243
278
 
244
- # Normalize everything to list
245
- actions = [action] if isinstance(action, Action) else action
246
- indices = [act.index for act in actions]
279
+ actions = self._normalize_actions(action)
280
+
281
+ indices = [a.index for a in actions]
247
282
 
248
- if len(set(indices)) != len(indices):
249
- raise ValueError("Duplicate state indices in actions.")
250
283
  for idx in indices:
251
284
  if idx < 0 or idx >= len(self._states):
252
285
  raise ValueError(f"Invalid state index {idx}.")
@@ -254,7 +287,6 @@ class SingleStepEnv:
254
287
  raise ValueError(f"State at index {idx} is already finished.")
255
288
 
256
289
  num_actions = len(actions)
257
-
258
290
  if self.current_batch_size % num_actions != 0:
259
291
  logger.warning(
260
292
  f"Number of actions ({num_actions}) is not a divisor of "
@@ -262,25 +294,34 @@ class SingleStepEnv:
262
294
  )
263
295
 
264
296
  proposed_solutions = [act.llm_response for act in actions]
265
- ground_truths: List[str] = [
266
- self._states[idx].final_answer for idx in indices
267
- ]
297
+ ground_truths: List[str] = []
298
+ for idx in indices:
299
+ ground_truths.append(self._states[idx].final_answer)
268
300
 
269
- verification_results = await self.verifier.verify_batch(
270
- solutions=proposed_solutions,
271
- ground_truths=ground_truths, # type: ignore [arg-type]
272
- raise_on_error=True,
273
- )
301
+ try:
302
+ verification_results = await self.verifier.verify_batch(
303
+ solutions=proposed_solutions,
304
+ reference_answers=ground_truths, # type: ignore [arg-type]
305
+ raise_on_error=True,
306
+ )
307
+ except Exception as e:
308
+ logger.error(f"Verification failed: {e}")
309
+ # Return failed verification results with status=FAILURE
310
+ verification_results = [
311
+ VerificationResult(
312
+ result="",
313
+ status=VerificationOutcome.FAILURE,
314
+ error_message=f"Verification error: {e}",
315
+ )
316
+ for _ in range(len(proposed_solutions))
317
+ ]
274
318
 
275
319
  total_rewards, rewards_dicts = await self._compute_reward_batch(
276
320
  proposed_solutions, verification_results
277
321
  )
278
-
279
- step_results = []
280
- # TODO: batch this
281
- for i, action in enumerate(actions):
282
- idx = action.index
283
- step_result = StepResult(
322
+ # Create and return step results in batch
323
+ step_results = [
324
+ StepResult(
284
325
  observation=self.PLACEHOLDER_OBS,
285
326
  reward=total_rewards[i],
286
327
  rewards_dict=rewards_dicts[i],
@@ -288,14 +329,96 @@ class SingleStepEnv:
288
329
  info={
289
330
  "proposed_solution": proposed_solutions[i],
290
331
  "verification_result": verification_results[i],
291
- "state": self._states[idx],
332
+ "state": self._states[indices[i]],
292
333
  },
293
- )
294
- step_results.append(step_result)
334
+ ).as_tuple()
335
+ for i in range(len(actions))
336
+ ]
337
+ for _, idx in enumerate(indices):
295
338
  self._states_done[idx] = True
296
339
 
297
340
  return step_results[0] if len(step_results) == 1 else step_results
298
341
 
342
+ def _normalize_actions(
343
+ self, action: Union[Action, List[Action], str]
344
+ ) -> List[Action]:
345
+ r"""Normalize the user-provided action(s) into a validated list
346
+ of `Action` objects.
347
+
348
+ This method handles flexibility in input format by converting
349
+ raw strings (only allowed when batch size is 1) and ensuring
350
+ all necessary structure and integrity checks on actions
351
+ (e.g., index bounds, duplicates).
352
+
353
+ Args:
354
+ action (Union[Action, List[Action], str]):
355
+ The raw input action(s) provided by the agent. Can be:
356
+ - A single `Action` object.
357
+ - A list of `Action` objects.
358
+ - A raw string (if `batch_size == 1`), auto-wrapped
359
+ in an `Action`.
360
+
361
+ Returns:
362
+ List[Action]: A list of validated `Action` instances
363
+ ready for evaluation.
364
+
365
+ Raises:
366
+ ValueError: If:
367
+ - Action indices are invalid or duplicated,
368
+ - Action list is empty,
369
+ - Index mismatches expected values
370
+ (e.g., 0 for batch size 1),
371
+ - Wrong structure is used
372
+ (e.g., string used with batch size > 1).
373
+ TypeError: If the action is of an unsupported type.
374
+ """
375
+
376
+ if isinstance(action, str):
377
+ if self.current_batch_size != 1:
378
+ raise ValueError(
379
+ "String input for action is only allowed"
380
+ " when batch_size == 1"
381
+ )
382
+ logger.warning("Auto-converting from str to Action", stacklevel=2)
383
+ action = Action(index=0, llm_response=action)
384
+
385
+ if isinstance(action, Action):
386
+ actions = [action]
387
+ elif isinstance(action, list):
388
+ if not action:
389
+ raise ValueError("Action list cannot be empty")
390
+ if not all(isinstance(a, Action) for a in action):
391
+ raise ValueError(
392
+ "All elements in the list must be Action objects"
393
+ )
394
+ actions = action
395
+ else:
396
+ raise TypeError("Action must be a str, Action, or list of Actions")
397
+
398
+ if self.current_batch_size == 1 and len(actions) != 1:
399
+ raise ValueError(
400
+ "For batch_size=1, expect a single Action or a "
401
+ "list containing exactly one Action"
402
+ )
403
+
404
+ # Validate indices
405
+ for a in actions:
406
+ if not isinstance(a.index, int):
407
+ raise ValueError(
408
+ f"Action index must be an integer, got {a.index}"
409
+ )
410
+ if self.current_batch_size == 1:
411
+ if a.index != 0:
412
+ raise ValueError(
413
+ "For batch_size=1, Action index must be 0"
414
+ )
415
+
416
+ indices = [a.index for a in actions]
417
+ if len(set(indices)) != len(indices):
418
+ raise ValueError("Duplicate state indices in actions.")
419
+
420
+ return actions
421
+
299
422
  async def _compute_reward_batch(
300
423
  self,
301
424
  proposed_solutions: List[str],
@@ -315,6 +438,12 @@ class SingleStepEnv:
315
438
  - List of total rewards for each solution.
316
439
  - List of reward component dictionaries for each solution.
317
440
  """
441
+ if len(proposed_solutions) != len(verification_results):
442
+ raise ValueError(
443
+ f"Length mismatch: {len(proposed_solutions)} solutions vs "
444
+ f"{len(verification_results)} verification results"
445
+ )
446
+
318
447
  total_rewards = []
319
448
  rewards_dicts = []
320
449
 
@@ -355,9 +484,20 @@ class SingleStepEnv:
355
484
  return {}
356
485
 
357
486
  def _batch_done(self) -> bool:
487
+ r"""Check if all states in the current batch are done.
488
+
489
+ Returns:
490
+ bool: True if all states are marked as done, False otherwise.
491
+ """
358
492
  return all(self._states_done)
359
493
 
360
494
  def _batch_started(self) -> bool:
495
+ r"""Check if the batch processing has started.
496
+
497
+ Returns:
498
+ bool: True if at least one state is marked as done, False
499
+ otherwise.
500
+ """
361
501
  return any(self._states_done)
362
502
 
363
503
  @property
@@ -185,7 +185,7 @@ class DockerInterpreter(BaseInterpreter):
185
185
  code: str,
186
186
  code_type: str,
187
187
  ) -> str:
188
- r"""Executes the given code in the conatiner attached to the
188
+ r"""Executes the given code in the container attached to the
189
189
  interpreter, and captures the stdout and stderr streams.
190
190
 
191
191
  Args:
@@ -210,7 +210,7 @@ class DockerInterpreter(BaseInterpreter):
210
210
  if self.require_confirm:
211
211
  logger.info(
212
212
  f"The following {code_type} code will run on your "
213
- "computer: {code}"
213
+ f"computer: {code}"
214
214
  )
215
215
  while True:
216
216
  choice = input("Running code? [Y/n]:").lower()
@@ -99,7 +99,7 @@ class E2BInterpreter(BaseInterpreter):
99
99
  if self.require_confirm:
100
100
  logger.info(
101
101
  f"The following {code_type} code will run on your "
102
- "e2b sandbox: {code}"
102
+ f"e2b sandbox: {code}"
103
103
  )
104
104
  while True:
105
105
  choice = input("Running code? [Y/n]:").lower()
@@ -421,7 +421,7 @@ class InternalPythonInterpreter(BaseInterpreter):
421
421
  result = None
422
422
  if not isinstance(if_statement.test, ast.Compare):
423
423
  raise InterpreterError(
424
- "Only Campare expr supported in if statement, get"
424
+ "Only Compare expr supported in if statement, get"
425
425
  f" {if_statement.test.__class__.__name__}"
426
426
  )
427
427
  if self._execute_condition(if_statement.test):
@@ -292,7 +292,7 @@ class SubprocessInterpreter(BaseInterpreter):
292
292
  if self.require_confirm:
293
293
  logger.info(
294
294
  f"The following {code_type} code will run on your "
295
- "computer: {code}"
295
+ f"computer: {code}"
296
296
  )
297
297
  while True:
298
298
  choice = input("Running code? [Y/n]:").lower().strip()
camel/loaders/__init__.py CHANGED
@@ -18,7 +18,7 @@ from .chunkr_reader import ChunkrReader
18
18
  from .firecrawl_reader import Firecrawl
19
19
  from .jina_url_reader import JinaURLReader
20
20
  from .mineru_extractor import MinerU
21
- from .panda_reader import PandaReader
21
+ from .pandas_reader import PandasReader
22
22
  from .unstructured_io import UnstructuredIO
23
23
 
24
24
  __all__ = [
@@ -30,6 +30,6 @@ __all__ = [
30
30
  'Firecrawl',
31
31
  'Apify',
32
32
  'ChunkrReader',
33
- 'PandaReader',
33
+ 'PandasReader',
34
34
  'MinerU',
35
35
  ]
@@ -11,13 +11,10 @@
11
11
  # See the License for the specific language governing permissions and
12
12
  # limitations under the License.
13
13
  # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
- import os
15
14
  from functools import wraps
16
15
  from pathlib import Path
17
16
  from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
18
17
 
19
- import pandas as pd
20
-
21
18
  if TYPE_CHECKING:
22
19
  from pandas import DataFrame
23
20
  from pandasai import SmartDataframe
@@ -50,25 +47,18 @@ def check_suffix(valid_suffixs: List[str]) -> Callable:
50
47
  return decorator
51
48
 
52
49
 
53
- class PandaReader:
50
+ class PandasReader:
54
51
  def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
55
- r"""Initializes the PandaReader class.
52
+ r"""Initializes the PandasReader class.
56
53
 
57
54
  Args:
58
55
  config (Optional[Dict[str, Any]], optional): The configuration
59
56
  dictionary that can include LLM API settings for LLM-based
60
- processing. If not provided, it will use OpenAI with the API
61
- key from the OPENAI_API_KEY environment variable. You can
62
- customize the LLM configuration by providing a 'llm' key in
63
- the config dictionary. (default: :obj:`None`)
57
+ processing. If not provided, no LLM will be configured by
58
+ default. You can customize the LLM configuration by providing
59
+ a 'llm' key in the config dictionary. (default: :obj:`None`)
64
60
  """
65
- from pandasai.llm import OpenAI # type: ignore[import-untyped]
66
-
67
61
  self.config = config or {}
68
- if "llm" not in self.config:
69
- self.config["llm"] = OpenAI(
70
- api_token=os.getenv("OPENAI_API_KEY"),
71
- )
72
62
 
73
63
  self.__LOADER = {
74
64
  ".csv": self.read_csv,
@@ -91,8 +81,13 @@ class PandaReader:
91
81
  data: Union["DataFrame", str],
92
82
  *args: Any,
93
83
  **kwargs: Dict[str, Any],
94
- ) -> "SmartDataframe":
95
- r"""Loads a file or DataFrame and returns a SmartDataframe object.
84
+ ) -> Union["DataFrame", "SmartDataframe"]:
85
+ r"""Loads a file or DataFrame and returns a DataFrame or
86
+ SmartDataframe object.
87
+
88
+ If an LLM is configured in the config dictionary, a SmartDataframe
89
+ will be returned, otherwise a regular pandas DataFrame will be
90
+ returned.
96
91
 
97
92
  args:
98
93
  data (Union[DataFrame, str]): The data to load.
@@ -100,24 +95,32 @@ class PandaReader:
100
95
  **kwargs (Dict[str, Any]): Additional keyword arguments.
101
96
 
102
97
  Returns:
103
- SmartDataframe: The SmartDataframe object.
98
+ Union[DataFrame, SmartDataframe]: The DataFrame or SmartDataframe
99
+ object.
104
100
  """
105
101
  from pandas import DataFrame
106
- from pandasai import SmartDataframe
107
102
 
103
+ # Load the data into a pandas DataFrame
108
104
  if isinstance(data, DataFrame):
109
- return SmartDataframe(data, config=self.config)
110
- file_path = str(data)
111
- path = Path(file_path)
112
- if not file_path.startswith("http") and not path.exists():
113
- raise FileNotFoundError(f"File {file_path} not found")
114
- if path.suffix in self.__LOADER:
115
- return SmartDataframe(
116
- self.__LOADER[path.suffix](file_path, *args, **kwargs), # type: ignore[operator]
117
- config=self.config,
118
- )
105
+ df = data
119
106
  else:
120
- raise ValueError(f"Unsupported file format: {path.suffix}")
107
+ file_path = str(data)
108
+ path = Path(file_path)
109
+ if not file_path.startswith("http") and not path.exists():
110
+ raise FileNotFoundError(f"File {file_path} not found")
111
+ if path.suffix in self.__LOADER:
112
+ df = self.__LOADER[path.suffix](file_path, *args, **kwargs) # type: ignore[operator]
113
+ else:
114
+ raise ValueError(f"Unsupported file format: {path.suffix}")
115
+
116
+ # If an LLM is configured, return a SmartDataframe, otherwise return a
117
+ # regular DataFrame
118
+ if "llm" in self.config:
119
+ from pandasai import SmartDataframe
120
+
121
+ return SmartDataframe(df, config=self.config)
122
+ else:
123
+ return df
121
124
 
122
125
  @check_suffix([".csv"])
123
126
  def read_csv(
@@ -133,6 +136,8 @@ class PandaReader:
133
136
  Returns:
134
137
  DataFrame: The DataFrame object.
135
138
  """
139
+ import pandas as pd
140
+
136
141
  return pd.read_csv(file_path, *args, **kwargs)
137
142
 
138
143
  @check_suffix([".xlsx", ".xls"])
@@ -149,6 +154,8 @@ class PandaReader:
149
154
  Returns:
150
155
  DataFrame: The DataFrame object.
151
156
  """
157
+ import pandas as pd
158
+
152
159
  return pd.read_excel(file_path, *args, **kwargs)
153
160
 
154
161
  @check_suffix([".json"])
@@ -165,6 +172,8 @@ class PandaReader:
165
172
  Returns:
166
173
  DataFrame: The DataFrame object.
167
174
  """
175
+ import pandas as pd
176
+
168
177
  return pd.read_json(file_path, *args, **kwargs)
169
178
 
170
179
  @check_suffix([".parquet"])
@@ -181,6 +190,8 @@ class PandaReader:
181
190
  Returns:
182
191
  DataFrame: The DataFrame object.
183
192
  """
193
+ import pandas as pd
194
+
184
195
  return pd.read_parquet(file_path, *args, **kwargs)
185
196
 
186
197
  def read_sql(self, *args: Any, **kwargs: Dict[str, Any]) -> "DataFrame":
@@ -193,6 +204,8 @@ class PandaReader:
193
204
  Returns:
194
205
  DataFrame: The DataFrame object.
195
206
  """
207
+ import pandas as pd
208
+
196
209
  return pd.read_sql(*args, **kwargs)
197
210
 
198
211
  def read_table(
@@ -208,6 +221,8 @@ class PandaReader:
208
221
  Returns:
209
222
  DataFrame: The DataFrame object.
210
223
  """
224
+ import pandas as pd
225
+
211
226
  return pd.read_table(file_path, *args, **kwargs)
212
227
 
213
228
  def read_clipboard(
@@ -222,6 +237,8 @@ class PandaReader:
222
237
  Returns:
223
238
  DataFrame: The DataFrame object.
224
239
  """
240
+ import pandas as pd
241
+
225
242
  return pd.read_clipboard(*args, **kwargs)
226
243
 
227
244
  @check_suffix([".html"])
@@ -238,6 +255,8 @@ class PandaReader:
238
255
  Returns:
239
256
  DataFrame: The DataFrame object.
240
257
  """
258
+ import pandas as pd
259
+
241
260
  return pd.read_html(file_path, *args, **kwargs)
242
261
 
243
262
  @check_suffix([".feather"])
@@ -254,6 +273,8 @@ class PandaReader:
254
273
  Returns:
255
274
  DataFrame: The DataFrame object.
256
275
  """
276
+ import pandas as pd
277
+
257
278
  return pd.read_feather(file_path, *args, **kwargs)
258
279
 
259
280
  @check_suffix([".dta"])
@@ -270,6 +291,8 @@ class PandaReader:
270
291
  Returns:
271
292
  DataFrame: The DataFrame object.
272
293
  """
294
+ import pandas as pd
295
+
273
296
  return pd.read_stata(file_path, *args, **kwargs)
274
297
 
275
298
  @check_suffix([".sas"])
@@ -286,6 +309,8 @@ class PandaReader:
286
309
  Returns:
287
310
  DataFrame: The DataFrame object.
288
311
  """
312
+ import pandas as pd
313
+
289
314
  return pd.read_sas(file_path, *args, **kwargs)
290
315
 
291
316
  @check_suffix([".pkl"])
@@ -302,6 +327,8 @@ class PandaReader:
302
327
  Returns:
303
328
  DataFrame: The DataFrame object.
304
329
  """
330
+ import pandas as pd
331
+
305
332
  return pd.read_pickle(file_path, *args, **kwargs)
306
333
 
307
334
  @check_suffix([".h5"])
@@ -318,6 +345,8 @@ class PandaReader:
318
345
  Returns:
319
346
  DataFrame: The DataFrame object.
320
347
  """
348
+ import pandas as pd
349
+
321
350
  return pd.read_hdf(file_path, *args, **kwargs)
322
351
 
323
352
  @check_suffix([".orc"])
@@ -334,4 +363,6 @@ class PandaReader:
334
363
  Returns:
335
364
  DataFrame: The DataFrame object.
336
365
  """
366
+ import pandas as pd
367
+
337
368
  return pd.read_orc(file_path, *args, **kwargs)
@@ -11,6 +11,7 @@
11
11
  # See the License for the specific language governing permissions and
12
12
  # limitations under the License.
13
13
  # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+ import traceback
14
15
  import uuid
15
16
  import warnings
16
17
  from typing import (
@@ -151,7 +152,7 @@ class UnstructuredIO:
151
152
  elements = partition(file=f, **kwargs)
152
153
  return elements
153
154
  except Exception:
154
- warnings.warn(f"Failed to partition the file: {input_path}")
155
+ warnings.warn(traceback.format_exc())
155
156
  return None
156
157
 
157
158
  @staticmethod
@@ -38,7 +38,7 @@ class ChatHistoryBlock(MemoryBlock):
38
38
  keep_rate (float, optional): In historical messages, the score of the
39
39
  last message is 1.0, and with each step taken backward, the score
40
40
  of the message is multiplied by the `keep_rate`. Higher `keep_rate`
41
- leads to high possiblity to keep history messages during context
41
+ leads to high possibility to keep history messages during context
42
42
  creation.
43
43
  """
44
44