PyPI - datarobot-genai - Versions diffs - 0.2.42__tar.gz → 0.3.0__tar.gz - Mend

datarobot-genai 0.2.42tar.gz → 0.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

{datarobot_genai-0.2.42 → datarobot_genai-0.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: datarobot-genai
-Version: 0.2.42
+Version: 0.3.0
 Summary: Generic helpers for GenAI
 Project-URL: Homepage, https://github.com/datarobot-oss/datarobot-genai
 Author: DataRobot, Inc.
@@ -43,9 +43,11 @@ Requires-Dist: opentelemetry-api<2.0.0,>=1.22.0; extra == 'drmcp'
 Requires-Dist: opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0; extra == 'drmcp'
 Requires-Dist: opentelemetry-exporter-otlp<2.0.0,>=1.22.0; extra == 'drmcp'
 Requires-Dist: opentelemetry-sdk<2.0.0,>=1.22.0; extra == 'drmcp'
+Requires-Dist: perplexityai<1.0,>=0.27; extra == 'drmcp'
 Requires-Dist: pydantic-settings<3.0.0,>=2.1.0; extra == 'drmcp'
 Requires-Dist: pydantic<3.0.0,>=2.6.1; extra == 'drmcp'
 Requires-Dist: python-dotenv<2.0.0,>=1.1.0; extra == 'drmcp'
+Requires-Dist: tavily-python<1.0.0,>=0.7.20; extra == 'drmcp'
 Provides-Extra: langgraph
 Requires-Dist: langchain-mcp-adapters<0.2.0,>=0.1.12; extra == 'langgraph'
 Requires-Dist: langgraph-prebuilt<0.7.0,>=0.2.3; extra == 'langgraph'

{datarobot_genai-0.2.42 → datarobot_genai-0.3.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "datarobot-genai"
-version = "0.2.42"
+version = "0.3.0"
 description = "Generic helpers for GenAI"
 readme = "README.md"
 requires-python = ">=3.10, <3.13"
@@ -84,6 +84,8 @@ drmcp = [
   "python-dotenv>=1.1.0,<2.0.0",
   "boto3>=1.34.0,<2.0.0",
   "httpx>=0.28.1,<1.0.0",
+  "tavily-python>=0.7.20,<1.0.0",
+  "perplexityai>=0.27,<1.0",
   "pydantic>=2.6.1,<3.0.0",
   "pydantic-settings>=2.1.0,<3.0.0",
   "opentelemetry-api>=1.22.0,<2.0.0",

{datarobot_genai-0.2.42 → datarobot_genai-0.3.0}/src/datarobot_genai/core/agents/__init__.py RENAMED Viewed

@@ -17,7 +17,7 @@ This package provides:
 - BaseAgent: common initialization for agent env/config fields
 - Common helpers: make_system_prompt, extract_user_prompt_content
 - Framework utilities (optional extras):
-  - crewai: build_llm, create_pipeline_interactions_from_messages
+  - crewai: create_pipeline_interactions_from_messages
   - langgraph: create_pipeline_interactions_from_events
   - llamaindex: DataRobotLiteLLM, create_pipeline_interactions_from_events
 """

{datarobot_genai-0.2.42 → datarobot_genai-0.3.0}/src/datarobot_genai/crewai/__init__.py RENAMED Viewed

@@ -2,22 +2,19 @@
 Public API:
 - mcp_tools_context: Context manager returning available MCP tools for CrewAI.
-- build_llm: Construct a CrewAI LLM configured for DataRobot endpoints.
 - create_pipeline_interactions_from_messages: Convert messages to MultiTurnSample.
 """
 from datarobot_genai.core.mcp.common import MCPConfig
-from .agent import build_llm
+from .agent import CrewAIAgent
 from .agent import create_pipeline_interactions_from_messages
-from .base import CrewAIAgent
 from .events import CrewAIEventListener
 from .mcp import mcp_tools_context
 __all__ = [
     "mcp_tools_context",
     "CrewAIAgent",
-    "build_llm",
     "create_pipeline_interactions_from_messages",
     "CrewAIEventListener",
     "MCPConfig",

datarobot_genai-0.2.42/src/datarobot_genai/crewai/base.py → datarobot_genai-0.3.0/src/datarobot_genai/crewai/agent.py RENAMED Viewed

@@ -41,11 +41,24 @@ from datarobot_genai.core.agents.base import default_usage_metrics
 from datarobot_genai.core.agents.base import extract_user_prompt_content
 from datarobot_genai.core.agents.base import is_streaming
-from .agent import create_pipeline_interactions_from_messages
 from .mcp import mcp_tools_context
 if TYPE_CHECKING:
     from ragas import MultiTurnSample
+    from ragas.messages import AIMessage
+    from ragas.messages import HumanMessage
+    from ragas.messages import ToolMessage
+def create_pipeline_interactions_from_messages(
+    messages: list[HumanMessage | AIMessage | ToolMessage] | None,
+) -> MultiTurnSample | None:
+    if not messages:
+        return None
+    # Lazy import to reduce memory overhead when ragas is not used
+    from ragas import MultiTurnSample
+    return MultiTurnSample(user_input=messages)
 class CrewAIAgent(BaseAgent[BaseTool], abc.ABC):

{datarobot_genai-0.2.42 → datarobot_genai-0.3.0}/src/datarobot_genai/drmcp/core/config.py RENAMED Viewed

@@ -76,7 +76,25 @@ class MCPToolConfig(BaseSettings):
             RUNTIME_PARAM_ENV_VAR_NAME_PREFIX + "ENABLE_MICROSOFT_GRAPH_TOOLS",
             "ENABLE_MICROSOFT_GRAPH_TOOLS",
         ),
-        description="Enable/disable Sharepoint tools",
+        description="Enable/disable Microsoft Graph (Sharepoint/OneDrive) tools",
+    )
+    enable_perplexity_tools: bool = Field(
+        default=False,
+        validation_alias=AliasChoices(
+            RUNTIME_PARAM_ENV_VAR_NAME_PREFIX + "ENABLE_PERPLEXITY_TOOLS",
+            "ENABLE_PERPLEXITY_TOOLS",
+        ),
+        description="Enable/disable Perplexity tools",
+    )
+    enable_tavily_tools: bool = Field(
+        default=False,
+        validation_alias=AliasChoices(
+            RUNTIME_PARAM_ENV_VAR_NAME_PREFIX + "ENABLE_TAVILY_TOOLS",
+            "ENABLE_TAVILY_TOOLS",
+        ),
+        description="Enable/disable Tavily search tools",
     )
     is_atlassian_oauth_provider_configured: bool = Field(
@@ -131,6 +149,8 @@ class MCPToolConfig(BaseSettings):
         "enable_confluence_tools",
         "enable_gdrive_tools",
         "enable_microsoft_graph_tools",
+        "enable_perplexity_tools",
+        "enable_tavily_tools",
         "is_atlassian_oauth_provider_configured",
         "is_google_oauth_provider_configured",
         "is_microsoft_oauth_provider_configured",

{datarobot_genai-0.2.42 → datarobot_genai-0.3.0}/src/datarobot_genai/drmcp/core/tool_config.py RENAMED Viewed

@@ -31,6 +31,8 @@ class ToolType(str, Enum):
     CONFLUENCE = "confluence"
     GDRIVE = "gdrive"
     MICROSOFT_GRAPH = "microsoft_graph"
+    PERPLEXITY = "perplexity"
+    TAVILY = "tavily"
 class ToolConfig(TypedDict):
@@ -80,6 +82,20 @@ TOOL_CONFIGS: dict[ToolType, ToolConfig] = {
         package_prefix="datarobot_genai.drmcp.tools.microsoft_graph",
         config_field_name="enable_microsoft_graph_tools",
     ),
+    ToolType.PERPLEXITY: ToolConfig(
+        name="perplexity",
+        oauth_check=None,  # OAuth for Perplexity is not supported
+        directory="perplexity",
+        package_prefix="datarobot_genai.drmcp.tools.perplexity",
+        config_field_name="enable_perplexity_tools",
+    ),
+    ToolType.TAVILY: ToolConfig(
+        name="tavily",
+        oauth_check=None,
+        directory="tavily",
+        package_prefix="datarobot_genai.drmcp.tools.tavily",
+        config_field_name="enable_tavily_tools",
+    ),
 }

datarobot_genai-0.3.0/src/datarobot_genai/drmcp/tools/clients/perplexity.py ADDED Viewed

@@ -0,0 +1,173 @@
+# Copyright 2025 DataRobot, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from typing import Any
+from typing import Literal
+from fastmcp.exceptions import ToolError
+from fastmcp.server.dependencies import get_http_headers
+from perplexity import AsyncPerplexity
+from perplexity.types import search_create_response
+from pydantic import BaseModel
+from pydantic import ConfigDict
+logger = logging.getLogger(__name__)
+MAX_QUERIES: int = 5
+MAX_RESULTS: int = 20
+MAX_TOKENS_PER_PAGE: int = 8192
+MAX_SEARCH_DOMAIN_FILTER: int = 20
+MAX_RESULTS_DEFAULT: int = 10
+MAX_TOKENS_PER_PAGE_DEFAULT: int = 2048
+async def get_perplexity_access_token() -> str | ToolError:
+    """
+    Get Perplexity API key from HTTP headers.
+    At the moment of creating this fn. Perplexity does not support OAuth.
+    It allows only API-KEY authorized flow.
+    Returns
+    -------
+        Access token string on success, ToolError on failure
+    Example:
+        ```python
+        token = await get_perplexity_access_token()
+        if isinstance(token, ToolError):
+            # Handle error
+            return token
+        # Use token
+        ```
+    """
+    try:
+        headers = get_http_headers()
+        if api_key := headers.get("x-perplexity-api-key"):
+            return api_key
+        logger.warning("Perplexity API key not found in headers.")
+        return ToolError(
+            "Perplexity API key not found in headers. "
+            "Please provide it via 'x-perplexity-api-key' header."
+        )
+    except Exception as e:
+        logger.error(f"Unexpected error obtaining Perplexity API key: {e}.", exc_info=e)
+        return ToolError("An unexpected error occured while obtaining Perplexity API key.")
+class PerplexityError(Exception):
+    """Exception for Perplexity API errors."""
+    def __init__(self, message: str) -> None:
+        super().__init__(message)
+class PerplexitySearchResult(BaseModel):
+    snippet: str
+    title: str
+    url: str
+    date: str | None = None
+    last_updated: str | None = None
+    model_config = ConfigDict(populate_by_name=True)
+    @classmethod
+    def from_perplexity_sdk(cls, result: search_create_response.Result) -> "PerplexitySearchResult":
+        """Create a PerplexitySearchResult from perplexity sdk response data."""
+        return cls(**result.model_dump())
+    def as_flat_dict(self) -> dict[str, Any]:
+        """Return a flat dictionary representation of the search result."""
+        return self.model_dump(by_alias=True)
+class PerplexityClient:
+    """Client for interacting with Perplexity API.
+    Its simple wrapper around perplexity python sdk.
+    """
+    def __init__(self, access_token: str) -> None:
+        self._client = AsyncPerplexity(api_key=access_token)
+    async def search(
+        self,
+        query: str | list[str],
+        search_domain_filter: list[str] | None = None,
+        recency: Literal["hour", "day", "week", "month", "year"] | None = None,
+        max_results: int = MAX_RESULTS_DEFAULT,
+        max_tokens_per_page: int = MAX_TOKENS_PER_PAGE_DEFAULT,
+    ) -> list[PerplexitySearchResult]:
+        """
+        Search using Perplexity.
+        Args:
+            query: Query to filter results.
+            search_domain_filter: Up to 20 domains/URLs to allowlist or denylist.
+            recency: Filter results by time period.
+            max_results: Number of ranked results to return.
+            max_tokens_per_page: Context extraction cap per page.
+        Returns
+        -------
+            List of Perplexity search results.
+        """
+        if not query:
+            raise PerplexityError("Error: query cannot be empty.")
+        if query and isinstance(query, str) and not query.strip():
+            raise PerplexityError("Error: query cannot be empty.")
+        if query and isinstance(query, list) and len(query) > MAX_QUERIES:
+            raise PerplexityError(f"Error: query list cannot be bigger than {MAX_QUERIES}.")
+        if query and isinstance(query, list) and not all(q.strip() for q in query):
+            raise PerplexityError("Error: query cannot contain empty str.")
+        if search_domain_filter and len(search_domain_filter) > MAX_SEARCH_DOMAIN_FILTER:
+            raise PerplexityError("Error: maximum number of search domain filters is 20.")
+        if max_results <= 0:
+            raise PerplexityError("Error: max_results must be greater than 0.")
+        if max_results > MAX_RESULTS:
+            raise PerplexityError("Error: max_results must be smaller than or equal to 20.")
+        if max_tokens_per_page <= 0:
+            raise PerplexityError("Error: max_tokens_per_page must be greater than 0.")
+        if max_tokens_per_page > MAX_TOKENS_PER_PAGE:
+            raise PerplexityError(
+                "Error: max_tokens_per_page must be smaller than or equal to 8192."
+            )
+        max_results = min(max_results, MAX_RESULTS)
+        max_tokens_per_page = min(max_tokens_per_page, MAX_TOKENS_PER_PAGE)
+        search_result = await self._client.search.create(
+            query=query,
+            search_domain_filter=search_domain_filter,
+            search_recency_filter=recency,
+            max_results=max_results,
+            max_tokens_per_page=max_tokens_per_page,
+        )
+        return [
+            PerplexitySearchResult.from_perplexity_sdk(result) for result in search_result.results
+        ]
+    async def __aenter__(self) -> "PerplexityClient":
+        """Async context manager entry."""
+        return self
+    async def __aexit__(
+        self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: Any
+    ) -> None:
+        """Async context manager exit."""
+        await self._client.close()

datarobot_genai-0.3.0/src/datarobot_genai/drmcp/tools/clients/tavily.py ADDED Viewed

@@ -0,0 +1,199 @@
+# Copyright 2025 DataRobot, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tavily API Client and utilities for API key authentication."""
+import logging
+from typing import Any
+from typing import Literal
+from fastmcp.exceptions import ToolError
+from fastmcp.server.dependencies import get_http_headers
+from pydantic import BaseModel
+from pydantic import ConfigDict
+from tavily import AsyncTavilyClient
+logger = logging.getLogger(__name__)
+MAX_RESULTS: int = 20
+MAX_CHUNKS_PER_SOURCE: int = 3
+MAX_RESULTS_DEFAULT: int = 5
+CHUNKS_PER_SOURCE_DEFAULT: int = 1
+async def get_tavily_access_token() -> str:
+    """
+    Get Tavily API key from HTTP headers.
+    Returns
+    -------
+        API key string
+    Raises
+    ------
+        ToolError: If API key is not found in headers
+    """
+    headers = get_http_headers()
+    api_key = headers.get("x-tavily-api-key")
+    if api_key:
+        return api_key
+    logger.warning("Tavily API key not found in headers")
+    raise ToolError(
+        "Tavily API key not found in headers. Please provide it via 'x-tavily-api-key' header."
+    )
+class TavilySearchResult(BaseModel):
+    """A single search result from Tavily API."""
+    title: str
+    url: str
+    content: str
+    score: float
+    model_config = ConfigDict(populate_by_name=True)
+    @classmethod
+    def from_tavily_sdk(cls, result: dict[str, Any]) -> "TavilySearchResult":
+        """Create a TavilySearchResult from Tavily SDK response data."""
+        return cls(
+            title=result.get("title", ""),
+            url=result.get("url", ""),
+            content=result.get("content", ""),
+            score=result.get("score", 0.0),
+        )
+    def as_flat_dict(self) -> dict[str, Any]:
+        """Return a flat dictionary representation of the search result."""
+        return self.model_dump(by_alias=True)
+class TavilyImage(BaseModel):
+    """An image result from Tavily API."""
+    url: str
+    description: str | None = None
+    model_config = ConfigDict(populate_by_name=True)
+    @classmethod
+    def from_tavily_sdk(cls, image: dict[str, Any] | str) -> "TavilyImage":
+        """Create a TavilyImage from Tavily SDK response data."""
+        if isinstance(image, str):
+            return cls(url=image)
+        return cls(
+            url=image.get("url", ""),
+            description=image.get("description"),
+        )
+class TavilyClient:
+    """Client for interacting with Tavily Search API.
+    This is a wrapper around the official tavily-python SDK.
+    """
+    def __init__(self, api_key: str) -> None:
+        self._client = AsyncTavilyClient(api_key=api_key)
+    async def search(
+        self,
+        query: str,
+        *,
+        topic: Literal["general", "news", "finance"] = "general",
+        search_depth: Literal["basic", "advanced"] = "basic",
+        max_results: int = MAX_RESULTS_DEFAULT,
+        time_range: Literal["day", "week", "month", "year"] | None = None,
+        include_images: bool = False,
+        include_image_descriptions: bool = False,
+        chunks_per_source: int = CHUNKS_PER_SOURCE_DEFAULT,
+        include_answer: bool = False,
+    ) -> dict[str, Any]:
+        """
+        Perform a web search using Tavily API.
+        Args:
+            query: The search query to execute.
+            topic: The category of search ("general", "news", or "finance").
+            search_depth: The depth of search ("basic" or "advanced").
+            max_results: Maximum number of results to return (1-20).
+            time_range: Time range filter ("day", "week", "month", "year").
+            include_images: Whether to include images in results.
+            include_image_descriptions: Whether to include image descriptions.
+            chunks_per_source: Maximum content snippets per URL (1-3).
+            include_answer: Whether to include an AI-generated answer.
+        Returns
+        -------
+            Dict with search results from Tavily API.
+        Raises
+        ------
+            ValueError: If validation fails.
+            TavilyInvalidAPIKeyError: If the API key is invalid.
+            TavilyUsageLimitExceededError: If usage limit is exceeded.
+            TavilyForbiddenError: If access is forbidden.
+            TavilyBadRequestError: If the request is malformed.
+        """
+        # Validate inputs
+        if not query:
+            raise ValueError("query cannot be empty.")
+        if isinstance(query, str) and not query.strip():
+            raise ValueError("query cannot be empty.")
+        if max_results <= 0:
+            raise ValueError("max_results must be greater than 0.")
+        if max_results > MAX_RESULTS:
+            raise ValueError(f"max_results must be smaller than or equal to {MAX_RESULTS}.")
+        if chunks_per_source <= 0:
+            raise ValueError("chunks_per_source must be greater than 0.")
+        if chunks_per_source > MAX_CHUNKS_PER_SOURCE:
+            raise ValueError(
+                f"chunks_per_source must be smaller than or equal to {MAX_CHUNKS_PER_SOURCE}."
+            )
+        # Clamp values to valid ranges
+        max_results = min(max_results, MAX_RESULTS)
+        chunks_per_source = min(chunks_per_source, MAX_CHUNKS_PER_SOURCE)
+        # Build search parameters
+        search_kwargs: dict[str, Any] = {
+            "query": query,
+            "topic": topic,
+            "search_depth": search_depth,
+            "max_results": max_results,
+            "include_images": include_images,
+            "include_image_descriptions": include_image_descriptions,
+            "chunks_per_source": chunks_per_source,
+            "include_answer": include_answer,
+        }
+        if time_range:
+            search_kwargs["time_range"] = time_range
+        return await self._client.search(**search_kwargs)
+    async def __aenter__(self) -> "TavilyClient":
+        """Async context manager entry."""
+        return self
+    async def __aexit__(
+        self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: Any
+    ) -> None:
+        """Async context manager exit."""
+        # AsyncTavilyClient doesn't have a close method, but we keep the context manager
+        # pattern for consistency with other clients
+        pass

{datarobot_genai-0.2.42 → datarobot_genai-0.3.0}/src/datarobot_genai/drmcp/tools/gdrive/tools.py RENAMED Viewed

@@ -33,9 +33,7 @@ from datarobot_genai.drmcp.tools.clients.gdrive import get_gdrive_access_token
 logger = logging.getLogger(__name__)
-@dr_mcp_tool(
-    tags={"google", "gdrive", "list", "search", "files", "find", "contents"}, enabled=False
-)
+@dr_mcp_tool(tags={"google", "gdrive", "list", "search", "files", "find", "contents"})
 async def gdrive_find_contents(
     *,
     page_size: Annotated[
@@ -317,7 +315,7 @@ async def gdrive_update_metadata(
     )
-@dr_mcp_tool(tags={"google", "gdrive", "manage", "access", "acl"})
+@dr_mcp_tool(tags={"google", "gdrive", "manage", "access", "acl"}, enabled=False)
 async def gdrive_manage_access(
     *,
     file_id: Annotated[str, "The ID of the file or folder."],

datarobot_genai-0.3.0/src/datarobot_genai/drmcp/tools/perplexity/tools.py ADDED Viewed

@@ -0,0 +1,121 @@
+# Copyright 2026 DataRobot, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Perplexity MCP tools."""
+import logging
+from typing import Annotated
+from typing import Literal
+from fastmcp.exceptions import ToolError
+from fastmcp.tools.tool import ToolResult
+from datarobot_genai.drmcp.core.mcp_instance import dr_mcp_tool
+from datarobot_genai.drmcp.tools.clients.perplexity import MAX_QUERIES
+from datarobot_genai.drmcp.tools.clients.perplexity import MAX_RESULTS
+from datarobot_genai.drmcp.tools.clients.perplexity import MAX_RESULTS_DEFAULT
+from datarobot_genai.drmcp.tools.clients.perplexity import MAX_SEARCH_DOMAIN_FILTER
+from datarobot_genai.drmcp.tools.clients.perplexity import MAX_TOKENS_PER_PAGE
+from datarobot_genai.drmcp.tools.clients.perplexity import MAX_TOKENS_PER_PAGE_DEFAULT
+from datarobot_genai.drmcp.tools.clients.perplexity import PerplexityClient
+from datarobot_genai.drmcp.tools.clients.perplexity import get_perplexity_access_token
+logger = logging.getLogger(__name__)
+@dr_mcp_tool(tags={"perplexity", "web", "search", "websearch"})
+async def perplexity_search(
+    *,
+    query: Annotated[
+        str,
+        list[str],
+        f"The search query string OR "
+        f"a list of up to {MAX_QUERIES} sub-queries for multi-query research.",
+    ],
+    search_domain_filter: Annotated[
+        list[str] | None,
+        f"Up to {MAX_SEARCH_DOMAIN_FILTER} domains/URLs "
+        f"to allowlist or denylist (prefix with '-').",
+    ] = None,
+    recency: Annotated[
+        Literal["day", "week", "month", "year"] | None, "Filter results by time period."
+    ] = None,
+    max_results: Annotated[
+        int, f"Number of ranked results to return (1-{MAX_RESULTS})."
+    ] = MAX_RESULTS_DEFAULT,
+    max_tokens_per_page: Annotated[
+        int,
+        f"Content extraction cap per page (1-{MAX_TOKENS_PER_PAGE}) "
+        f"(default {MAX_TOKENS_PER_PAGE_DEFAULT}).",
+    ] = MAX_TOKENS_PER_PAGE_DEFAULT,
+) -> ToolResult:
+    """Perplexity web search tool combining multi-query research and content extraction control."""
+    if not query:
+        raise ToolError("Argument validation error: query cannot be empty.")
+    if query and isinstance(query, str) and not query.strip():
+        raise ToolError("Argument validation error: query cannot be empty.")
+    if query and isinstance(query, list) and len(query) > MAX_QUERIES:
+        raise ToolError(
+            f"Argument validation error: query list cannot be bigger than {MAX_QUERIES}."
+        )
+    if query and isinstance(query, list) and not all(q.strip() for q in query):
+        raise ToolError("Argument validation error: query cannot contain empty str.")
+    if search_domain_filter and len(search_domain_filter) > MAX_SEARCH_DOMAIN_FILTER:
+        raise ToolError(
+            f"Argument validation error: "
+            f"maximum number of search domain filters is {MAX_SEARCH_DOMAIN_FILTER}."
+        )
+    if max_results <= 0:
+        raise ToolError("Argument validation error: max_results must be greater than 0.")
+    if max_results > MAX_RESULTS:
+        raise ToolError(
+            f"Argument validation error: "
+            f"max_results must be smaller than or equal to {MAX_RESULTS}."
+        )
+    if max_tokens_per_page <= 0:
+        raise ToolError("Argument validation error: max_tokens_per_page must be greater than 0.")
+    if max_tokens_per_page > MAX_TOKENS_PER_PAGE:
+        raise ToolError(
+            f"Argument validation error: "
+            f"max_tokens_per_page must be smaller than or equal to {MAX_TOKENS_PER_PAGE}."
+        )
+    access_token = await get_perplexity_access_token()
+    if isinstance(access_token, ToolError):
+        raise access_token
+    async with PerplexityClient(access_token=access_token) as perplexity_client:
+        results = await perplexity_client.search(
+            query=query,
+            search_domain_filter=search_domain_filter,
+            recency=recency,
+            max_results=max_results,
+            max_tokens_per_page=max_tokens_per_page,
+        )
+    query_txt = f"query '{query}'" if isinstance(query, str) else f"queries '{', '.join(query)}'"
+    n = len(results)
+    return ToolResult(
+        content=f"Successfully executed search for {query_txt}. Found {n} result(s).",
+        structured_content={
+            "results": results,
+            "count": n,
+            "metadata": {
+                "queriesExecuted": len(query) if isinstance(query, list) else 1,
+                "filtersApplied": {"domains": search_domain_filter, "recency": recency},
+                "extractionLimit": max_tokens_per_page,
+            },
+        },
+    )

datarobot-genai 0.2.42__tar.gz → 0.3.0__tar.gz

datarobot-genai 0.2.42tar.gz → 0.3.0tar.gz