autobyteus 1.1.5__py3-none-any.whl → 1.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. autobyteus/agent/context/agent_config.py +6 -1
  2. autobyteus/agent/context/agent_runtime_state.py +7 -1
  3. autobyteus/agent/handlers/llm_user_message_ready_event_handler.py +30 -7
  4. autobyteus/agent/handlers/tool_result_event_handler.py +100 -88
  5. autobyteus/agent/handlers/user_input_message_event_handler.py +22 -25
  6. autobyteus/agent/llm_response_processor/provider_aware_tool_usage_processor.py +7 -1
  7. autobyteus/agent/message/__init__.py +7 -5
  8. autobyteus/agent/message/agent_input_user_message.py +6 -16
  9. autobyteus/agent/message/context_file.py +24 -24
  10. autobyteus/agent/message/context_file_type.py +29 -8
  11. autobyteus/agent/message/multimodal_message_builder.py +47 -0
  12. autobyteus/agent/streaming/stream_event_payloads.py +23 -4
  13. autobyteus/agent/system_prompt_processor/tool_manifest_injector_processor.py +6 -2
  14. autobyteus/agent/tool_invocation.py +27 -2
  15. autobyteus/agent_team/agent_team_builder.py +22 -1
  16. autobyteus/agent_team/bootstrap_steps/agent_configuration_preparation_step.py +9 -2
  17. autobyteus/agent_team/context/agent_team_config.py +1 -0
  18. autobyteus/agent_team/context/agent_team_runtime_state.py +0 -2
  19. autobyteus/llm/api/autobyteus_llm.py +33 -33
  20. autobyteus/llm/api/bedrock_llm.py +13 -5
  21. autobyteus/llm/api/claude_llm.py +13 -27
  22. autobyteus/llm/api/gemini_llm.py +108 -42
  23. autobyteus/llm/api/groq_llm.py +4 -3
  24. autobyteus/llm/api/mistral_llm.py +97 -51
  25. autobyteus/llm/api/nvidia_llm.py +6 -5
  26. autobyteus/llm/api/ollama_llm.py +37 -12
  27. autobyteus/llm/api/openai_compatible_llm.py +91 -91
  28. autobyteus/llm/autobyteus_provider.py +1 -1
  29. autobyteus/llm/base_llm.py +42 -139
  30. autobyteus/llm/extensions/base_extension.py +6 -6
  31. autobyteus/llm/extensions/token_usage_tracking_extension.py +3 -2
  32. autobyteus/llm/llm_factory.py +131 -61
  33. autobyteus/llm/ollama_provider_resolver.py +1 -0
  34. autobyteus/llm/providers.py +1 -0
  35. autobyteus/llm/token_counter/token_counter_factory.py +3 -1
  36. autobyteus/llm/user_message.py +43 -35
  37. autobyteus/llm/utils/llm_config.py +34 -18
  38. autobyteus/llm/utils/media_payload_formatter.py +99 -0
  39. autobyteus/llm/utils/messages.py +32 -25
  40. autobyteus/llm/utils/response_types.py +9 -3
  41. autobyteus/llm/utils/token_usage.py +6 -5
  42. autobyteus/multimedia/__init__.py +31 -0
  43. autobyteus/multimedia/audio/__init__.py +11 -0
  44. autobyteus/multimedia/audio/api/__init__.py +4 -0
  45. autobyteus/multimedia/audio/api/autobyteus_audio_client.py +59 -0
  46. autobyteus/multimedia/audio/api/gemini_audio_client.py +219 -0
  47. autobyteus/multimedia/audio/audio_client_factory.py +120 -0
  48. autobyteus/multimedia/audio/audio_model.py +97 -0
  49. autobyteus/multimedia/audio/autobyteus_audio_provider.py +108 -0
  50. autobyteus/multimedia/audio/base_audio_client.py +40 -0
  51. autobyteus/multimedia/image/__init__.py +11 -0
  52. autobyteus/multimedia/image/api/__init__.py +9 -0
  53. autobyteus/multimedia/image/api/autobyteus_image_client.py +97 -0
  54. autobyteus/multimedia/image/api/gemini_image_client.py +188 -0
  55. autobyteus/multimedia/image/api/openai_image_client.py +142 -0
  56. autobyteus/multimedia/image/autobyteus_image_provider.py +109 -0
  57. autobyteus/multimedia/image/base_image_client.py +67 -0
  58. autobyteus/multimedia/image/image_client_factory.py +118 -0
  59. autobyteus/multimedia/image/image_model.py +97 -0
  60. autobyteus/multimedia/providers.py +5 -0
  61. autobyteus/multimedia/runtimes.py +8 -0
  62. autobyteus/multimedia/utils/__init__.py +10 -0
  63. autobyteus/multimedia/utils/api_utils.py +19 -0
  64. autobyteus/multimedia/utils/multimedia_config.py +29 -0
  65. autobyteus/multimedia/utils/response_types.py +13 -0
  66. autobyteus/task_management/tools/publish_task_plan.py +4 -16
  67. autobyteus/task_management/tools/update_task_status.py +4 -19
  68. autobyteus/tools/__init__.py +5 -4
  69. autobyteus/tools/base_tool.py +98 -29
  70. autobyteus/tools/browser/standalone/__init__.py +0 -1
  71. autobyteus/tools/google_search.py +149 -0
  72. autobyteus/tools/mcp/schema_mapper.py +29 -71
  73. autobyteus/tools/multimedia/__init__.py +8 -0
  74. autobyteus/tools/multimedia/audio_tools.py +116 -0
  75. autobyteus/tools/multimedia/image_tools.py +186 -0
  76. autobyteus/tools/parameter_schema.py +82 -89
  77. autobyteus/tools/pydantic_schema_converter.py +81 -0
  78. autobyteus/tools/tool_category.py +1 -0
  79. autobyteus/tools/usage/formatters/default_json_example_formatter.py +89 -20
  80. autobyteus/tools/usage/formatters/default_xml_example_formatter.py +115 -41
  81. autobyteus/tools/usage/formatters/default_xml_schema_formatter.py +50 -20
  82. autobyteus/tools/usage/formatters/gemini_json_example_formatter.py +55 -22
  83. autobyteus/tools/usage/formatters/google_json_example_formatter.py +54 -21
  84. autobyteus/tools/usage/formatters/openai_json_example_formatter.py +53 -23
  85. autobyteus/tools/usage/parsers/default_xml_tool_usage_parser.py +270 -94
  86. autobyteus/tools/usage/parsers/provider_aware_tool_usage_parser.py +5 -2
  87. autobyteus/tools/usage/providers/tool_manifest_provider.py +43 -16
  88. autobyteus/tools/usage/registries/tool_formatting_registry.py +9 -2
  89. autobyteus/tools/usage/registries/tool_usage_parser_registry.py +9 -2
  90. autobyteus-1.1.7.dist-info/METADATA +204 -0
  91. {autobyteus-1.1.5.dist-info → autobyteus-1.1.7.dist-info}/RECORD +98 -71
  92. examples/run_browser_agent.py +1 -1
  93. examples/run_google_slides_agent.py +2 -2
  94. examples/run_mcp_google_slides_client.py +1 -1
  95. examples/run_sqlite_agent.py +1 -1
  96. autobyteus/llm/utils/image_payload_formatter.py +0 -89
  97. autobyteus/tools/ask_user_input.py +0 -40
  98. autobyteus/tools/browser/standalone/factory/google_search_factory.py +0 -25
  99. autobyteus/tools/browser/standalone/google_search_ui.py +0 -126
  100. autobyteus-1.1.5.dist-info/METADATA +0 -161
  101. {autobyteus-1.1.5.dist-info → autobyteus-1.1.7.dist-info}/WHEEL +0 -0
  102. {autobyteus-1.1.5.dist-info → autobyteus-1.1.7.dist-info}/licenses/LICENSE +0 -0
  103. {autobyteus-1.1.5.dist-info → autobyteus-1.1.7.dist-info}/top_level.txt +0 -0
@@ -2,18 +2,17 @@
2
2
 
3
3
  import logging
4
4
  from abc import ABC, abstractmethod
5
- from typing import Optional, Any, TYPE_CHECKING, List as TypingList, Dict
6
- import xml.sax.saxutils
5
+ from typing import Optional, Any, TYPE_CHECKING, List as TypingList, Dict, Union
7
6
 
8
7
  from autobyteus.events.event_emitter import EventEmitter
9
- from autobyteus.events.event_types import EventType
8
+ from autobyteus.tools.parameter_schema import ParameterType
10
9
 
11
10
  from .tool_meta import ToolMeta
12
11
  from .tool_state import ToolState
13
12
 
14
13
  if TYPE_CHECKING:
15
14
  from autobyteus.agent.context import AgentContext
16
- from autobyteus.tools.parameter_schema import ParameterSchema
15
+ from autobyteus.tools.parameter_schema import ParameterSchema, ParameterDefinition
17
16
  from autobyteus.tools.tool_config import ToolConfig
18
17
  from .tool_state import ToolState
19
18
  from autobyteus.tools.registry import ToolDefinition
@@ -27,41 +26,115 @@ class BaseTool(ABC, EventEmitter, metaclass=ToolMeta):
27
26
  def __init__(self, config: Optional['ToolConfig'] = None):
28
27
  super().__init__()
29
28
  self.agent_id: Optional[str] = None
30
- self.definition: Optional['ToolDefinition'] = None # Link back to its definition
31
- # The config is stored primarily for potential use by subclasses or future base features.
29
+ self.definition: Optional['ToolDefinition'] = None
32
30
  self._config = config
33
- # Add a dedicated state dictionary for the tool instance
34
- # CHANGED: Use ToolState class for explicit state management.
35
31
  self.tool_state: 'ToolState' = ToolState()
36
32
  logger.debug(f"BaseTool instance initializing for potential class {self.__class__.__name__}. tool_state initialized.")
37
33
 
38
34
  @classmethod
39
35
  def get_name(cls) -> str:
40
- """Returns the registered name of the tool."""
41
36
  return cls.__name__
42
37
 
43
38
  @classmethod
44
39
  @abstractmethod
45
40
  def get_description(cls) -> str:
46
- """Returns the description of the tool."""
47
41
  raise NotImplementedError("Subclasses must implement get_description().")
48
42
 
49
43
  @classmethod
50
44
  @abstractmethod
51
45
  def get_argument_schema(cls) -> Optional['ParameterSchema']:
52
- """
53
- Return a ParameterSchema defining the arguments this tool accepts for execution.
54
- Return None if the tool accepts no arguments.
55
- """
56
46
  raise NotImplementedError("Subclasses must implement get_argument_schema().")
57
47
 
58
48
  @classmethod
59
49
  def get_config_schema(cls) -> Optional['ParameterSchema']:
50
+ return None
51
+
52
+ def _coerce_argument_types(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
60
53
  """
61
- Return the ParameterSchema for tool *instantiation* parameters.
62
- This is optional. By default, tools have no instantiation config.
54
+ Coerces argument values from the parser (often strings) to their proper
55
+ Python types based on the tool's argument schema.
56
+ This method is fully recursive to handle nested objects and arrays.
63
57
  """
64
- return None
58
+ arg_schema = self.get_argument_schema()
59
+ if not arg_schema:
60
+ return kwargs
61
+
62
+ return self._coerce_object_recursively(kwargs, arg_schema)
63
+
64
+ def _coerce_object_recursively(self, data: Dict[str, Any], schema: 'ParameterSchema') -> Dict[str, Any]:
65
+ """ Helper to recursively coerce values in an object based on a ParameterSchema. """
66
+ coerced_data = data.copy()
67
+ for name, value in data.items():
68
+ param_def = schema.get_parameter(name)
69
+ if param_def:
70
+ coerced_data[name] = self._coerce_value_recursively(value, param_def)
71
+ return coerced_data
72
+
73
+ def _coerce_value_recursively(self, value: Any, param_def: 'ParameterDefinition') -> Any:
74
+ """ Coerces a single value based on its ParameterDefinition, recursing into complex types. """
75
+ if value is None:
76
+ return None
77
+
78
+ # 1. Coerce empty string to empty list for ARRAY types. This is a common parser artifact.
79
+ if param_def.param_type == ParameterType.ARRAY and value == "":
80
+ return []
81
+
82
+ # 2. Recurse into objects
83
+ if param_def.param_type == ParameterType.OBJECT and param_def.object_schema and isinstance(value, dict):
84
+ return self._coerce_object_recursively(value, param_def.object_schema)
85
+
86
+ # 3. Recurse into arrays.
87
+ if param_def.param_type == ParameterType.ARRAY and isinstance(value, list):
88
+ item_schema_dict = param_def.array_item_schema
89
+ # If items are objects described by a schema, coerce each one.
90
+ if item_schema_dict and isinstance(item_schema_dict, dict) and item_schema_dict.get("type") == "object":
91
+ # Create a temporary ParameterSchema for the item type to enable recursion.
92
+ # This is a simplified conversion for coercion purposes only.
93
+ from .parameter_schema import ParameterSchema as TempSchema
94
+ from .parameter_schema import ParameterDefinition as TempDef
95
+
96
+ item_param_schema = TempSchema()
97
+ props = item_schema_dict.get("properties", {})
98
+ reqs = item_schema_dict.get("required", [])
99
+ for prop_name, prop_details in props.items():
100
+ # This is a simplified conversion and might not capture all details,
101
+ # but it's sufficient for recursive coercion.
102
+ prop_type_str = prop_details.get("type", "string")
103
+ try:
104
+ prop_type = ParameterType(prop_type_str)
105
+ except ValueError:
106
+ prop_type = ParameterType.STRING
107
+
108
+ item_param_schema.add_parameter(TempDef(
109
+ name=prop_name,
110
+ param_type=prop_type,
111
+ description=prop_details.get("description", ""),
112
+ required=prop_name in reqs,
113
+ array_item_schema=prop_details.get("items") # Pass down nested array schemas
114
+ ))
115
+
116
+ return [self._coerce_object_recursively(item, item_param_schema) for item in value if isinstance(item, dict)]
117
+
118
+ return value # Return list of primitives as is
119
+
120
+ # 4. Coerce primitives if they are passed as strings
121
+ if isinstance(value, str):
122
+ try:
123
+ if param_def.param_type == ParameterType.INTEGER:
124
+ return int(value)
125
+ elif param_def.param_type == ParameterType.FLOAT:
126
+ return float(value)
127
+ elif param_def.param_type == ParameterType.BOOLEAN:
128
+ lower_val = value.lower()
129
+ if lower_val in ["true", "1", "yes"]:
130
+ return True
131
+ elif lower_val in ["false", "0", "no"]:
132
+ return False
133
+ except (ValueError, TypeError):
134
+ logger.warning(f"Could not coerce argument '{param_def.name}' with value '{value}' to type {param_def.param_type}. "
135
+ f"Passing string value to tool.")
136
+
137
+ return value
65
138
 
66
139
  def set_agent_id(self, agent_id: str):
67
140
  if not isinstance(agent_id, str) or not agent_id:
@@ -71,30 +144,26 @@ class BaseTool(ABC, EventEmitter, metaclass=ToolMeta):
71
144
  logger.debug(f"Agent ID '{agent_id}' set for tool instance '{self.__class__.get_name()}'")
72
145
 
73
146
  async def execute(self, context: 'AgentContext', **kwargs):
74
- # In this context, self.get_name() will call the instance-specific method if it exists.
75
147
  tool_name = self.get_name()
76
148
  if self.agent_id is None:
77
149
  self.set_agent_id(context.agent_id)
78
- elif self.agent_id != context.agent_id:
79
- logger.warning(
80
- f"Tool '{tool_name}' current agent_id '{self.agent_id}' differs from "
81
- f"calling context's agent_id '{context.agent_id}'. Updating tool's agent_id."
82
- )
83
- self.set_agent_id(context.agent_id)
150
+
151
+ # Coerce types before validation and execution
152
+ coerced_kwargs = self._coerce_argument_types(kwargs)
84
153
 
85
154
  arg_schema = self.get_argument_schema()
86
155
  if arg_schema:
87
- is_valid, errors = arg_schema.validate_config(kwargs)
156
+ is_valid, errors = arg_schema.validate_config(coerced_kwargs)
88
157
  if not is_valid:
89
158
  error_message = f"Invalid arguments for tool '{tool_name}': {'; '.join(errors)}"
90
159
  logger.error(error_message)
91
160
  raise ValueError(error_message)
92
- elif kwargs:
93
- logger.warning(f"Tool '{tool_name}' does not define an argument schema but received arguments: {kwargs}. These will be passed to _execute.")
161
+ elif coerced_kwargs:
162
+ logger.warning(f"Tool '{tool_name}' does not define an argument schema but received arguments: {coerced_kwargs}. These will be passed to _execute.")
94
163
 
95
- logger.info(f"Executing tool '{tool_name}' for agent '{self.agent_id}' with args: {kwargs}")
164
+ logger.info(f"Executing tool '{tool_name}' for agent '{self.agent_id}' with args: {coerced_kwargs}")
96
165
  try:
97
- result = await self._execute(context=context, **kwargs)
166
+ result = await self._execute(context=context, **coerced_kwargs)
98
167
  logger.info(f"Tool '{tool_name}' execution completed successfully for agent '{self.agent_id}'.")
99
168
  return result
100
169
  except Exception as e:
@@ -1,5 +1,4 @@
1
1
  # This file makes 'standalone' a package under 'browser'.
2
- # from .google_search_ui import GoogleSearch
3
2
  # from .webpage_reader import WebPageReader
4
3
  # from .webpage_screenshot_taker import WebPageScreenshotTaker
5
4
  # from .navigate_to import NavigateTo
@@ -0,0 +1,149 @@
1
+ import os
2
+ import json
3
+ import logging
4
+ import aiohttp
5
+ from typing import Optional, TYPE_CHECKING, Any, Dict, List
6
+
7
+ from autobyteus.tools.base_tool import BaseTool
8
+ from autobyteus.tools.tool_config import ToolConfig
9
+ from autobyteus.tools.parameter_schema import ParameterSchema, ParameterDefinition, ParameterType
10
+ from autobyteus.tools.tool_category import ToolCategory
11
+
12
+ if TYPE_CHECKING:
13
+ from autobyteus.agent.context import AgentContext
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ class GoogleSearch(BaseTool):
18
+ """
19
+ Performs a Google search using the Serper.dev API and returns a structured summary of the results.
20
+ This tool requires a Serper API key, which should be set in the SERPER_API_KEY environment variable.
21
+ """
22
+ CATEGORY = ToolCategory.WEB
23
+ API_URL = "https://google.serper.dev/search"
24
+
25
+ def __init__(self, config: Optional[ToolConfig] = None):
26
+ super().__init__(config=config)
27
+ self.api_key: Optional[str] = None
28
+
29
+ if config:
30
+ self.api_key = config.get('api_key')
31
+
32
+ if not self.api_key:
33
+ self.api_key = os.getenv("SERPER_API_KEY")
34
+
35
+ if not self.api_key:
36
+ raise ValueError(
37
+ "GoogleSearch tool requires a Serper API key. "
38
+ "Please provide it via the 'api_key' config parameter or set the 'SERPER_API_KEY' environment variable."
39
+ )
40
+ logger.debug("GoogleSearch (API-based) tool initialized.")
41
+
42
+ @classmethod
43
+ def get_name(cls) -> str:
44
+ return "GoogleSearch"
45
+
46
+ @classmethod
47
+ def get_description(cls) -> str:
48
+ return (
49
+ "Searches Google for a given query using the Serper API. "
50
+ "Returns a concise, structured summary of search results, including direct answers and top organic links."
51
+ )
52
+
53
+ @classmethod
54
+ def get_argument_schema(cls) -> Optional[ParameterSchema]:
55
+ schema = ParameterSchema()
56
+ schema.add_parameter(ParameterDefinition(
57
+ name="query",
58
+ param_type=ParameterType.STRING,
59
+ description="The search query string.",
60
+ required=True
61
+ ))
62
+ schema.add_parameter(ParameterDefinition(
63
+ name="num_results",
64
+ param_type=ParameterType.INTEGER,
65
+ description="The number of organic search results to return.",
66
+ required=False,
67
+ default_value=5,
68
+ min_value=1,
69
+ max_value=10
70
+ ))
71
+ return schema
72
+
73
+ @classmethod
74
+ def get_config_schema(cls) -> Optional[ParameterSchema]:
75
+ schema = ParameterSchema()
76
+ schema.add_parameter(ParameterDefinition(
77
+ name="api_key",
78
+ param_type=ParameterType.STRING,
79
+ description="The API key for the Serper.dev service. Overrides the SERPER_API_KEY environment variable.",
80
+ required=False
81
+ ))
82
+ return schema
83
+
84
+ def _format_results(self, data: Dict[str, Any]) -> str:
85
+ """Formats the JSON response from Serper into a clean string for an LLM."""
86
+ summary_parts = []
87
+
88
+ # 1. Answer Box (most important for direct questions)
89
+ if "answerBox" in data:
90
+ answer_box = data["answerBox"]
91
+ title = answer_box.get("title", "")
92
+ snippet = answer_box.get("snippet") or answer_box.get("answer")
93
+ summary_parts.append(f"Direct Answer for '{title}':\n{snippet}")
94
+
95
+ # 2. Knowledge Graph (for entity information)
96
+ if "knowledgeGraph" in data:
97
+ kg = data["knowledgeGraph"]
98
+ title = kg.get("title", "")
99
+ description = kg.get("description")
100
+ summary_parts.append(f"Summary for '{title}':\n{description}")
101
+
102
+ # 3. Organic Results (the main search links)
103
+ if "organic" in data and data["organic"]:
104
+ organic_results = data["organic"]
105
+ results_str = "\n".join(
106
+ f"{i+1}. {result.get('title', 'No Title')}\n"
107
+ f" Link: {result.get('link', 'No Link')}\n"
108
+ f" Snippet: {result.get('snippet', 'No Snippet')}"
109
+ for i, result in enumerate(organic_results)
110
+ )
111
+ summary_parts.append(f"Search Results:\n{results_str}")
112
+
113
+ if not summary_parts:
114
+ return "No relevant information found for the query."
115
+
116
+ return "\n\n---\n\n".join(summary_parts)
117
+
118
+
119
+ async def _execute(self, context: 'AgentContext', query: str, num_results: int = 5) -> str:
120
+ logger.info(f"Executing GoogleSearch (API) for agent {context.agent_id} with query: '{query}'")
121
+
122
+ headers = {
123
+ 'X-API-KEY': self.api_key,
124
+ 'Content-Type': 'application/json'
125
+ }
126
+ payload = json.dumps({
127
+ "q": query,
128
+ "num": num_results
129
+ })
130
+
131
+ try:
132
+ async with aiohttp.ClientSession() as session:
133
+ async with session.post(self.API_URL, headers=headers, data=payload) as response:
134
+ if response.status == 200:
135
+ data = await response.json()
136
+ return self._format_results(data)
137
+ else:
138
+ error_text = await response.text()
139
+ logger.error(
140
+ f"Serper API returned a non-200 status code: {response.status}. "
141
+ f"Response: {error_text}"
142
+ )
143
+ raise RuntimeError(f"API request failed with status {response.status}: {error_text}")
144
+ except aiohttp.ClientError as e:
145
+ logger.error(f"Network error during GoogleSearch API call: {e}", exc_info=True)
146
+ raise RuntimeError(f"A network error occurred: {e}")
147
+ except Exception as e:
148
+ logger.error(f"An unexpected error occurred in GoogleSearch tool: {e}", exc_info=True)
149
+ raise
@@ -1,4 +1,4 @@
1
- # file: autobyteus/autobyteus/mcp/schema_mapper.py
1
+ # file: autobyteus/autobyteus/tools/mcp/schema_mapper.py
2
2
  import logging
3
3
  from typing import Dict, Any, List, Optional
4
4
 
@@ -8,7 +8,8 @@ logger = logging.getLogger(__name__)
8
8
 
9
9
  class McpSchemaMapper:
10
10
  """
11
- Converts MCP tool JSON schemas to AutoByteUs ParameterSchema.
11
+ Converts MCP tool JSON schemas to AutoByteUs ParameterSchema,
12
+ handling nested object structures recursively.
12
13
  """
13
14
 
14
15
  _MCP_TYPE_TO_AUTOBYTEUS_TYPE_MAP = {
@@ -19,12 +20,6 @@ class McpSchemaMapper:
19
20
  "object": ParameterType.OBJECT,
20
21
  "array": ParameterType.ARRAY,
21
22
  }
22
-
23
- # REMOVED: _FILE_PATH_NAMES, _DIR_PATH_PARAM_NAMES, _URI_FORMATS
24
- # as FILE_PATH and DIRECTORY_PATH types are removed.
25
- # All string-based path parameters will now be ParameterType.STRING.
26
- # The 'format' hint from MCP schema (e.g., "uri", "url") will still be available
27
- # on the ParameterDefinition if it includes 'pattern', but it won't change the type from STRING.
28
23
 
29
24
  def map_to_autobyteus_schema(self, mcp_json_schema: Dict[str, Any]) -> ParameterSchema:
30
25
  if not isinstance(mcp_json_schema, dict):
@@ -37,95 +32,58 @@ class McpSchemaMapper:
37
32
 
38
33
  schema_type = mcp_json_schema.get("type")
39
34
  if schema_type != "object":
40
- logger.warning(f"MCP JSON schema root 'type' is '{schema_type}', not 'object'. "
41
- "Mapping may be incomplete or incorrect for non-object root schemas.")
42
- if schema_type in McpSchemaMapper._MCP_TYPE_TO_AUTOBYTEUS_TYPE_MAP:
43
- param_type_enum = McpSchemaMapper._MCP_TYPE_TO_AUTOBYTEUS_TYPE_MAP[schema_type]
44
- array_item_schema_for_root: Optional[Dict[str, Any]] = None
45
- if param_type_enum == ParameterType.ARRAY:
46
- array_item_schema_for_root = mcp_json_schema.get("items", True)
47
-
48
- param_def = ParameterDefinition(
49
- name="input_value",
50
- param_type=param_type_enum,
51
- description=mcp_json_schema.get("description", "Input value for the tool."),
52
- required=True,
53
- default_value=mcp_json_schema.get("default"),
54
- enum_values=mcp_json_schema.get("enum") if schema_type == "string" else None,
55
- array_item_schema=array_item_schema_for_root
56
- )
57
- autobyteus_schema.add_parameter(param_def)
58
- return autobyteus_schema
59
- else:
60
- logger.error(f"Unsupported root schema type '{schema_type}' for direct mapping to ParameterSchema properties.")
61
- raise ValueError(f"MCP JSON schema root 'type' must be 'object' for typical mapping, got '{schema_type}'.")
62
-
35
+ logger.error(f"Unsupported root schema type '{schema_type}' for mapping to ParameterSchema. Must be 'object'.")
36
+ raise ValueError(f"MCP JSON schema root 'type' must be 'object', got '{schema_type}'.")
63
37
 
64
38
  properties = mcp_json_schema.get("properties")
65
39
  if not isinstance(properties, dict):
66
- logger.warning("MCP JSON schema of type 'object' has no 'properties' or 'properties' is not a dict. Resulting ParameterSchema will be empty.")
40
+ logger.warning("MCP JSON schema of type 'object' has no 'properties'. Resulting ParameterSchema will be empty.")
67
41
  return autobyteus_schema
68
-
69
- required_params: List[str] = mcp_json_schema.get("required", [])
70
- if not isinstance(required_params, list) or not all(isinstance(p, str) for p in required_params):
71
- logger.warning("MCP JSON schema 'required' field is not a list of strings. Treating all params as optional.")
72
- required_params = []
42
+
43
+ # FIX: The 'required' list is specific to its own schema level.
44
+ required_params_at_this_level: List[str] = mcp_json_schema.get("required", [])
73
45
 
74
46
  for param_name, param_mcp_schema in properties.items():
75
47
  if not isinstance(param_mcp_schema, dict):
76
- logger.warning(f"Property '{param_name}' in MCP schema is not a dictionary. Skipping this parameter.")
48
+ logger.warning(f"Property '{param_name}' in MCP schema is not a dictionary. Skipping.")
77
49
  continue
78
50
 
79
51
  mcp_param_type_str = param_mcp_schema.get("type")
80
52
  description = param_mcp_schema.get("description", f"Parameter '{param_name}'.")
81
- default_value = param_mcp_schema.get("default")
82
- enum_values = param_mcp_schema.get("enum")
83
- # format_hint is still read but won't be used to change type to FILE_PATH/DIR_PATH
84
- # format_hint = param_mcp_schema.get("format", "").lower()
85
53
 
54
+ nested_object_schema: Optional[ParameterSchema] = None
86
55
  item_schema_for_array: Optional[Dict[str, Any]] = None
87
- if mcp_param_type_str == "array":
88
- item_schema_for_array = param_mcp_schema.get("items")
89
- if item_schema_for_array is None:
90
- item_schema_for_array = True
91
- logger.debug(f"MCP parameter '{param_name}' is 'array' type with no 'items' schema. Defaulting to generic items (true).")
92
-
93
- autobyteus_param_type: Optional[ParameterType] = None
94
- # REMOVED: Logic block that inferred FILE_PATH or DIRECTORY_PATH based on format_hint or param_name_lower.
95
- # All string types from MCP will now map to STRING or ENUM.
96
56
 
97
- if mcp_param_type_str in McpSchemaMapper._MCP_TYPE_TO_AUTOBYTEUS_TYPE_MAP:
98
- autobyteus_param_type = McpSchemaMapper._MCP_TYPE_TO_AUTOBYTEUS_TYPE_MAP[mcp_param_type_str]
99
- if autobyteus_param_type == ParameterType.STRING and enum_values:
100
- autobyteus_param_type = ParameterType.ENUM
101
- elif mcp_param_type_str:
102
- logger.warning(f"Unsupported MCP parameter type '{mcp_param_type_str}' for parameter '{param_name}'. Defaulting to STRING.")
103
- autobyteus_param_type = ParameterType.STRING
104
- else:
105
- logger.warning(f"MCP parameter '{param_name}' has no 'type' specified. Defaulting to STRING.")
106
- autobyteus_param_type = ParameterType.STRING
57
+ if mcp_param_type_str == "object" and "properties" in param_mcp_schema:
58
+ # Recursively map the nested object schema. The recursive call will handle its own 'required' list.
59
+ nested_object_schema = self.map_to_autobyteus_schema(param_mcp_schema)
107
60
 
108
- if autobyteus_param_type == ParameterType.ENUM:
109
- if not enum_values or not isinstance(enum_values, list) or not all(isinstance(ev, str) for ev in enum_values):
110
- logger.warning(f"Parameter '{param_name}' is ENUM type but 'enum' field is missing, not a list, or not list of strings in MCP schema. Problematic. Schema: {enum_values}")
61
+ elif mcp_param_type_str == "array":
62
+ item_schema_for_array = param_mcp_schema.get("items", True)
63
+
64
+ autobyteus_param_type = self._MCP_TYPE_TO_AUTOBYTEUS_TYPE_MAP.get(mcp_param_type_str, ParameterType.STRING)
65
+ enum_values = param_mcp_schema.get("enum")
66
+ if autobyteus_param_type == ParameterType.STRING and enum_values:
67
+ autobyteus_param_type = ParameterType.ENUM
111
68
 
112
69
  try:
113
70
  param_def = ParameterDefinition(
114
71
  name=param_name,
115
- param_type=autobyteus_param_type, # This will now be STRING for former path types
72
+ param_type=autobyteus_param_type,
116
73
  description=description,
117
- required=(param_name in required_params),
118
- default_value=default_value,
74
+ required=(param_name in required_params_at_this_level), # FIX: Use the list for the current level.
75
+ default_value=param_mcp_schema.get("default"),
119
76
  enum_values=enum_values if autobyteus_param_type == ParameterType.ENUM else None,
120
77
  min_value=param_mcp_schema.get("minimum"),
121
78
  max_value=param_mcp_schema.get("maximum"),
122
- pattern=param_mcp_schema.get("pattern") if mcp_param_type_str == "string" else None,
123
- array_item_schema=item_schema_for_array
79
+ pattern=param_mcp_schema.get("pattern"),
80
+ array_item_schema=item_schema_for_array,
81
+ object_schema=nested_object_schema,
124
82
  )
125
83
  autobyteus_schema.add_parameter(param_def)
126
84
  except ValueError as e:
127
- logger.error(f"Failed to create ParameterDefinition for '{param_name}': {e}. MCP schema for param: {param_mcp_schema}")
85
+ logger.error(f"Failed to create ParameterDefinition for '{param_name}': {e}.")
128
86
  continue
129
87
 
130
- logger.debug(f"Successfully mapped MCP schema to AutoByteUs ParameterSchema with {len(autobyteus_schema.parameters)} parameters.")
88
+ logger.debug(f"Successfully mapped MCP schema to ParameterSchema with {len(autobyteus_schema.parameters)} params.")
131
89
  return autobyteus_schema
@@ -0,0 +1,8 @@
1
+ from .image_tools import GenerateImageTool, EditImageTool
2
+ from .audio_tools import GenerateSpeechTool
3
+
4
+ __all__ = [
5
+ "GenerateImageTool",
6
+ "EditImageTool",
7
+ "GenerateSpeechTool",
8
+ ]
@@ -0,0 +1,116 @@
1
+ import os
2
+ import logging
3
+ from typing import Optional, List
4
+
5
+ from autobyteus.tools.base_tool import BaseTool
6
+ from autobyteus.tools.parameter_schema import ParameterSchema, ParameterDefinition, ParameterType
7
+ from autobyteus.tools.tool_category import ToolCategory
8
+ from autobyteus.multimedia.audio import audio_client_factory, AudioModel, AudioClientFactory
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ def _get_configured_model_identifier(env_var: str, default_model: Optional[str] = None) -> str:
14
+ """
15
+ Retrieves a model identifier from an environment variable.
16
+ """
17
+ model_identifier = os.getenv(env_var)
18
+ if not model_identifier:
19
+ if default_model:
20
+ return default_model
21
+ raise ValueError(f"The '{env_var}' environment variable is not set. Please configure it.")
22
+ return model_identifier
23
+
24
+
25
+ def _build_dynamic_audio_schema(base_params: List[ParameterDefinition], model_env_var: str, default_model: str) -> ParameterSchema:
26
+ """
27
+ Builds a tool schema dynamically based on a configured audio model.
28
+ """
29
+ try:
30
+ model_identifier = _get_configured_model_identifier(model_env_var, default_model)
31
+ AudioClientFactory.ensure_initialized()
32
+ model = AudioModel[model_identifier]
33
+ except (ValueError, KeyError) as e:
34
+ logger.error(f"Cannot generate audio tool schema. Check environment and model registry. Error: {e}")
35
+ raise RuntimeError(f"Failed to configure audio tool. Error: {e}")
36
+
37
+ config_schema = ParameterSchema()
38
+ if model.parameter_schema:
39
+ for name, meta in model.parameter_schema.items():
40
+ param_type_str = meta.get("type", "string").upper()
41
+ param_type = getattr(ParameterType, param_type_str, ParameterType.STRING)
42
+
43
+ allowed_values = meta.get("allowed_values")
44
+ if param_type == ParameterType.STRING and allowed_values:
45
+ param_type = ParameterType.ENUM
46
+
47
+ config_schema.add_parameter(ParameterDefinition(
48
+ name=name,
49
+ param_type=param_type,
50
+ description=meta.get("description", ""),
51
+ required=False,
52
+ default_value=meta.get("default"),
53
+ enum_values=allowed_values
54
+ ))
55
+
56
+ schema = ParameterSchema()
57
+ for param in base_params:
58
+ schema.add_parameter(param)
59
+
60
+ if config_schema.parameters:
61
+ schema.add_parameter(ParameterDefinition(
62
+ name="generation_config",
63
+ param_type=ParameterType.OBJECT,
64
+ description=f"Model-specific parameters for the configured '{model_identifier}' model.",
65
+ required=False,
66
+ object_schema=config_schema
67
+ ))
68
+ return schema
69
+
70
+
71
+ class GenerateSpeechTool(BaseTool):
72
+ """
73
+ An agent tool for generating speech from text using a Text-to-Speech (TTS) model.
74
+ """
75
+ CATEGORY = ToolCategory.MULTIMEDIA
76
+ MODEL_ENV_VAR = "DEFAULT_SPEECH_GENERATION_MODEL"
77
+ DEFAULT_MODEL = "gemini-2.5-flash-tts"
78
+
79
+ @classmethod
80
+ def get_name(cls) -> str:
81
+ return "GenerateSpeech"
82
+
83
+ @classmethod
84
+ def get_description(cls) -> str:
85
+ return (
86
+ "Generates spoken audio from text using the system's default Text-to-Speech (TTS) model. "
87
+ "Returns a list of local file paths to the generated audio files (.wav) upon success."
88
+ )
89
+
90
+ @classmethod
91
+ def get_argument_schema(cls) -> Optional[ParameterSchema]:
92
+ base_params = [
93
+ ParameterDefinition(
94
+ name="prompt",
95
+ param_type=ParameterType.STRING,
96
+ description="The text to be converted into spoken audio.",
97
+ required=True
98
+ )
99
+ ]
100
+ return _build_dynamic_audio_schema(base_params, cls.MODEL_ENV_VAR, cls.DEFAULT_MODEL)
101
+
102
+ async def _execute(self, context, prompt: str, generation_config: Optional[dict] = None) -> List[str]:
103
+ model_identifier = _get_configured_model_identifier(self.MODEL_ENV_VAR, self.DEFAULT_MODEL)
104
+ logger.info(f"GenerateSpeechTool executing with configured model '{model_identifier}'.")
105
+ client = None
106
+ try:
107
+ client = audio_client_factory.create_audio_client(model_identifier=model_identifier)
108
+ response = await client.generate_speech(prompt=prompt, generation_config=generation_config)
109
+
110
+ if not response.audio_urls:
111
+ raise ValueError("Speech generation failed to return any audio file paths.")
112
+
113
+ return response.audio_urls
114
+ finally:
115
+ if client:
116
+ await client.cleanup()