PyPI - exa-py - Versions diffs - 1.13.1__tar.gz → 1.13.2__tar.gz - Mend

exa-py 1.13.1tar.gz → 1.13.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of exa-py might be problematic. Click here for more details.

Files changed (28) hide show

{exa_py-1.13.1 → exa_py-1.13.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: exa-py
-Version: 1.13.1
+Version: 1.13.2
 Summary: Python SDK for Exa API.
 License: MIT
 Author: Exa AI
@@ -45,6 +45,7 @@ exa = Exa(api_key="your-api-key")
 ```
 ## Common requests
 ```python
   # basic search
@@ -63,9 +64,9 @@ exa = Exa(api_key="your-api-key")
   results = exa.search_and_contents("This is a Exa query:")
   # search and get contents with contents options
-  results = exa.search_and_contents("This is a Exa query:",
+  results = exa.search_and_contents("This is a Exa query:",
                                     text={"include_html_tags": True, "max_characters": 1000})
   # find similar documents
   results = exa.find_similar("https://example.com")
@@ -79,7 +80,7 @@ exa = Exa(api_key="your-api-key")
   results = exa.get_contents(["tesla.com"])
   # get contents with contents options
-  results = exa.get_contents(["urls"],
+  results = exa.get_contents(["urls"],
                              text={"include_html_tags": True, "max_characters": 1000})
   # basic answer
@@ -95,6 +96,38 @@ exa = Exa(api_key="your-api-key")
   for chunk in response:
     print(chunk, end='', flush=True)
+  # research task example – answer a question with citations
+  # Example prompt & schema inspired by the TypeScript example.
+  QUESTION = (
+      "Summarize the history of San Francisco highlighting one or two major events "
+      "for each decade from 1850 to 1950"
+  )
+  OUTPUT_SCHEMA: Dict[str, Any] = {
+      "type": "object",
+      "required": ["timeline"],
+      "properties": {
+          "timeline": {
+              "type": "array",
+              "items": {
+                  "type": "object",
+                  "required": ["decade", "notableEvents"],
+                  "properties": {
+                      "decade": {
+                          "type": "string",
+                          "description": 'Decade label e.g. "1850s"',
+                      },
+                      "notableEvents": {
+                          "type": "string",
+                          "description": "A summary of notable events.",
+                      },
+                  },
+              },
+          },
+      },
+  }
+  resp = exa.research.create_task(
+      input_instructions=QUESTION,
+      output_schema=OUTPUT_SCHEMA,
+  )
 ```

{exa_py-1.13.1 → exa_py-1.13.2}/README.md RENAMED Viewed

@@ -22,6 +22,7 @@ exa = Exa(api_key="your-api-key")
 ```
 ## Common requests
 ```python
   # basic search
@@ -40,9 +41,9 @@ exa = Exa(api_key="your-api-key")
   results = exa.search_and_contents("This is a Exa query:")
   # search and get contents with contents options
-  results = exa.search_and_contents("This is a Exa query:",
+  results = exa.search_and_contents("This is a Exa query:",
                                     text={"include_html_tags": True, "max_characters": 1000})
   # find similar documents
   results = exa.find_similar("https://example.com")
@@ -56,7 +57,7 @@ exa = Exa(api_key="your-api-key")
   results = exa.get_contents(["tesla.com"])
   # get contents with contents options
-  results = exa.get_contents(["urls"],
+  results = exa.get_contents(["urls"],
                              text={"include_html_tags": True, "max_characters": 1000})
   # basic answer
@@ -72,5 +73,37 @@ exa = Exa(api_key="your-api-key")
   for chunk in response:
     print(chunk, end='', flush=True)
+  # research task example – answer a question with citations
+  # Example prompt & schema inspired by the TypeScript example.
+  QUESTION = (
+      "Summarize the history of San Francisco highlighting one or two major events "
+      "for each decade from 1850 to 1950"
+  )
+  OUTPUT_SCHEMA: Dict[str, Any] = {
+      "type": "object",
+      "required": ["timeline"],
+      "properties": {
+          "timeline": {
+              "type": "array",
+              "items": {
+                  "type": "object",
+                  "required": ["decade", "notableEvents"],
+                  "properties": {
+                      "decade": {
+                          "type": "string",
+                          "description": 'Decade label e.g. "1850s"',
+                      },
+                      "notableEvents": {
+                          "type": "string",
+                          "description": "A summary of notable events.",
+                      },
+                  },
+              },
+          },
+      },
+  }
+  resp = exa.research.create_task(
+      input_instructions=QUESTION,
+      output_schema=OUTPUT_SCHEMA,
+  )
 ```

{exa_py-1.13.1 → exa_py-1.13.2}/exa_py/api.py RENAMED Viewed

@@ -38,6 +38,7 @@ from exa_py.utils import (
 )
 from .websets import WebsetsClient
 from .websets.core.base import ExaJSONEncoder
+from .research.client import ResearchClient, AsyncResearchClient
 is_beta = os.getenv("IS_BETA") == "True"
@@ -837,37 +838,6 @@ def nest_fields(original_dict: Dict, fields_to_nest: List[str], new_key: str):
     return original_dict
-@dataclass
-class ResearchTaskResponse:
-    """A class representing the response for a research task.
-    Attributes:
-        id (str): The unique identifier for the research request.
-        status (str): Status of the research request.
-        output (Optional[Dict[str, Any]]): The answer structured as JSON, if available.
-        citations (Optional[Dict[str, List[_Result]]]): List of citations used to generate the answer, grouped by root field in the output schema.
-    """
-    id: str
-    status: str
-    output: Optional[Dict[str, Any]]
-    citations: Dict[str, List[_Result]]
-    def __str__(self):
-        output_repr = (
-            json.dumps(self.output, indent=2, ensure_ascii=False)
-            if self.output is not None
-            else "None"
-        )
-        citations_str = "\n\n".join(str(src) for src in self.citations)
-        return (
-            f"ID: {self.id}\n"
-            f"Status: {self.status}\n"
-            f"Output: {output_repr}\n\n"
-            f"Citations:\n{citations_str}"
-        )
 class Exa:
     """A client for interacting with Exa API."""
@@ -875,7 +845,7 @@ class Exa:
         self,
         api_key: Optional[str],
         base_url: str = "https://api.exa.ai",
-        user_agent: str = "exa-py 1.12.1",
+        user_agent: str = "exa-py 1.12.4",
     ):
         """Initialize the Exa client with the provided API key and optional base URL and user agent.
@@ -898,6 +868,8 @@ class Exa:
             "Content-Type": "application/json",
         }
         self.websets = WebsetsClient(self)
+        # Research tasks client (new, mirrors Websets design)
+        self.research = ResearchClient(self)
     def request(
         self,
@@ -1952,40 +1924,12 @@ class Exa:
         raw_response = self.request("/answer", options)
         return StreamAnswerResponse(raw_response)
-    def researchTask(
-        self,
-        *,
-        input_instructions: str,
-        output_schema: Dict[str, Any],
-    ) -> ResearchTaskResponse:
-        """Submit a research request to Exa.
-        Args:
-            input_instructions (str): The instructions for the research task.
-            output_schema (Dict[str, Any]): JSON schema describing the desired answer structure.
-        """
-        # Build the request payload expected by the Exa API
-        options = {
-            "input": {"instructions": input_instructions},
-            "output": {"schema": output_schema},
-        }
-        response = self.request("/research/tasks", options)
-        return ResearchTaskResponse(
-            id=response["id"],
-            status=response["status"],
-            output=response.get("output"),
-            citations={
-                key: [_Result(**to_snake_case(citation)) for citation in citations_list]
-                for key, citations_list in response.get("citations", {}).items()
-            },
-        )
 class AsyncExa(Exa):
     def __init__(self, api_key: str, api_base: str = "https://api.exa.ai"):
         super().__init__(api_key, api_base)
+        # Override the synchronous ResearchClient with its async counterpart.
+        self.research = AsyncResearchClient(self)
         self._client = None
     @property
@@ -2316,36 +2260,3 @@ class AsyncExa(Exa):
         options["stream"] = True
         raw_response = await self.async_request("/answer", options)
         return AsyncStreamAnswerResponse(raw_response)
-    async def researchTask(
-        self,
-        *,
-        input_instructions: str,
-        output_schema: Dict[str, Any],
-    ) -> ResearchTaskResponse:
-        """Asynchronously submit a research request to Exa.
-        Args:
-            input_instructions (str): The instructions for the research task.
-            output_schema (Dict[str, Any]): JSON schema describing the desired answer structure.
-        Returns:
-            ResearchTaskResponse: The parsed response from the Exa API.
-        """
-        # Build the request payload expected by the Exa API
-        options = {
-            "input": {"instructions": input_instructions},
-            "output": {"schema": output_schema},
-        }
-        response = await self.async_request("/research/tasks", options)
-        return ResearchTaskResponse(
-            id=response["id"],
-            status=response["status"],
-            output=response.get("output"),
-            citations={
-                key: [_Result(**to_snake_case(citation)) for citation in citations_list]
-                for key, citations_list in response.get("citations", {}).items()
-            },
-        )

exa_py-1.13.2/exa_py/research/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+from .client import ResearchClient, AsyncResearchClient
+from .models import ResearchTask
+__all__ = [
+    "ResearchClient",
+    "AsyncResearchClient",
+    "ResearchTaskId",
+    "ResearchTask",
+]

exa_py-1.13.2/exa_py/research/client.py ADDED Viewed

@@ -0,0 +1,232 @@
+"""Lightweight research client wrappers for the Exa REST API.
+This module purposefully keeps its import surface minimal to avoid circular
+import problems with :pymod:`exa_py.api`.  Any heavy dependencies (including
+`exa_py.api` itself) are imported lazily **inside** functions.  This means
+that type-checkers still see the full, precise types via the ``TYPE_CHECKING``
+block, but at runtime we only pay the cost if/when a helper is actually used.
+"""
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any, Dict
+if TYPE_CHECKING:  # pragma: no cover – only for static analysers
+    # Import with full type info when static type-checking.  `_Result` still
+    # lives in ``exa_py.api`` but the response model moved to
+    # ``exa_py.research.models``.
+    from ..api import _Result  # noqa: F401
+    from .models import ResearchTask, ResearchTaskId  # noqa: F401
+# ---------------------------------------------------------------------------
+# Public, user-facing clients
+# ---------------------------------------------------------------------------
+class ResearchClient:
+    """Synchronous helper namespace accessed via :pyattr:`Exa.research`."""
+    def __init__(self, parent_client):
+        # A reference to the *already-constructed* ``Exa`` instance so that we
+        # can piggy-back on its HTTP plumbing (headers, base URL, retries, …).
+        self._client = parent_client
+    def create_task(
+        self,
+        *,
+        input_instructions: str,
+        output_schema: Dict[str, Any],
+    ) -> "ResearchTaskId":
+        """Submit a research request and return the *task identifier*."""
+        payload = {
+            "input": {"instructions": input_instructions},
+            "output": {"schema": output_schema},
+        }
+        raw_response: Dict[str, Any] = self._client.request("/research/tasks", payload)
+        # Defensive checks so that we fail loudly if the contract changes.
+        if not isinstance(raw_response, dict) or "id" not in raw_response:
+            raise RuntimeError(
+                f"Unexpected response while creating research task: {raw_response}"
+            )
+        # Lazily import to avoid circular deps at runtime.
+        from .models import ResearchTaskId  # noqa: WPS433 – runtime import
+        return ResearchTaskId(id=raw_response["id"])
+    def get_task(
+        self, id: str
+    ) -> "ResearchTask":  # noqa: D401 – imperative mood is fine
+        """Fetch the current status / result for a research task."""
+        endpoint = f"/research/tasks/{id}"
+        # The new endpoint is a simple GET.
+        raw_response: Dict[str, Any] = self._client.request(endpoint, method="GET")
+        return _build_research_task(raw_response)
+    # ------------------------------------------------------------------
+    # Convenience helpers
+    # ------------------------------------------------------------------
+    def poll_task(
+        self,
+        id: str,
+        *,
+        poll_interval: float = 1.0,
+        timeout_seconds: int = 15 * 60,
+    ) -> "ResearchTask":
+        """Blocking helper that polls until task completes or fails.
+        Parameters
+        ----------
+        id:
+            The ID of the research task to poll.
+        poll_interval:
+            Seconds to wait between successive polls (default 1s).
+        timeout_seconds:
+            Maximum time to wait before raising :class:`TimeoutError` (default 15 min).
+        """
+        import time
+        deadline = time.monotonic() + timeout_seconds
+        while True:
+            task = self.get_task(id)
+            status = task.status.lower() if isinstance(task.status, str) else ""
+            if status in {"completed", "failed", "complete", "finished", "done"}:
+                return task
+            if time.monotonic() > deadline:
+                raise TimeoutError(
+                    f"Research task {id} did not finish within {timeout_seconds} seconds"
+                )
+            time.sleep(poll_interval)
+class AsyncResearchClient:
+    """Async counterpart used via :pyattr:`AsyncExa.research`."""
+    def __init__(self, parent_client):
+        self._client = parent_client
+    async def create_task(
+        self,
+        *,
+        input_instructions: str,
+        output_schema: Dict[str, Any],
+    ) -> "ResearchTaskId":
+        """Submit a research request and return the *task identifier* (async)."""
+        payload = {
+            "input": {"instructions": input_instructions},
+            "output": {"schema": output_schema},
+        }
+        raw_response: Dict[str, Any] = await self._client.async_request(
+            "/research/tasks", payload
+        )
+        # Defensive checks so that we fail loudly if the contract changes.
+        if not isinstance(raw_response, dict) or "id" not in raw_response:
+            raise RuntimeError(
+                f"Unexpected response while creating research task: {raw_response}"
+            )
+        # Lazily import to avoid circular deps at runtime.
+        from .models import ResearchTaskId  # noqa: WPS433 – runtime import
+        return ResearchTaskId(id=raw_response["id"])
+    async def get_task(self, id: str) -> "ResearchTask":  # noqa: D401
+        """Fetch the current status / result for a research task (async)."""
+        endpoint = f"/research/tasks/{id}"
+        # Perform GET using the underlying HTTP client because `async_request`
+        # only supports POST semantics.
+        resp = await self._client.client.get(
+            self._client.base_url + endpoint, headers=self._client.headers
+        )
+        if resp.status_code >= 400:
+            raise RuntimeError(
+                f"Request failed with status code {resp.status_code}: {resp.text}"
+            )
+        raw_response: Dict[str, Any] = resp.json()
+        return _build_research_task(raw_response)
+    # ------------------------------------------------------------------
+    # Convenience helpers
+    # ------------------------------------------------------------------
+    async def poll_task(
+        self,
+        id: str,
+        *,
+        poll_interval: float = 1.0,
+        timeout_seconds: int = 15 * 60,
+    ) -> "ResearchTask":
+        """Async helper that polls until task completes or fails.
+        Mirrors :py:meth:`ResearchClient.poll_task` but uses ``await`` and
+        :pyfunc:`asyncio.sleep`.  Raises :class:`TimeoutError` on timeout.
+        """
+        import asyncio
+        import time
+        deadline = time.monotonic() + timeout_seconds
+        while True:
+            task = await self.get_task(id)
+            status = task.status.lower() if isinstance(task.status, str) else ""
+            if status in {"completed", "failed", "complete", "finished", "done"}:
+                return task
+            if time.monotonic() > deadline:
+                raise TimeoutError(
+                    f"Research task {id} did not finish within {timeout_seconds} seconds"
+                )
+            await asyncio.sleep(poll_interval)
+# ---------------------------------------------------------------------------
+# Internal helpers (lazy imports to avoid cycles)
+# ---------------------------------------------------------------------------
+def _build_research_task(raw: Dict[str, Any]):
+    """Convert raw API response into a :class:`ResearchTask` instance."""
+    # Defensive check – fail loudly if the API contract changes.
+    if not isinstance(raw, dict) or "id" not in raw:
+        raise RuntimeError(f"Unexpected response while fetching research task: {raw}")
+    # Lazily import heavy deps to avoid cycles and unnecessary startup cost.
+    from .models import ResearchTask  # noqa: WPS433 – runtime import
+    from ..api import _Result, to_snake_case  # noqa: WPS433 – runtime import
+    citations_raw = raw.get("citations", {}) or {}
+    citations_parsed = {
+        key: [_Result(**to_snake_case(c)) for c in cites]
+        for key, cites in citations_raw.items()
+    }
+    return ResearchTask(
+        id=raw["id"],
+        status=raw["status"],
+        instructions=raw.get("instructions", ""),
+        schema=raw.get("schema", {}),
+        data=raw.get("data"),
+        citations=citations_parsed,
+    )

exa_py-1.13.2/exa_py/research/models.py ADDED Viewed

@@ -0,0 +1,98 @@
+from __future__ import annotations
+import json
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+# Local import placed inside TYPE_CHECKING block to avoid runtime cycles.
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:  # pragma: no cover – for static analysers only
+    from ..api import _Result  # noqa: F401
+@dataclass
+class ResearchTaskId:
+    """Structured research task ID.
+    Attributes
+    ----------
+    id:
+        Unique identifier for the research task.
+    """
+    id: str
+    # ---------------------------------------------------------------------
+    # Pretty representation helpers
+    # ---------------------------------------------------------------------
+    def __str__(self) -> str:  # pragma: no cover – convenience only
+        return f"ID: {self.id}\n"
+@dataclass
+class ResearchTask:
+    """Structured research task.
+    Attributes
+    ----------
+    id:
+        Unique identifier for the research task.
+    status:
+        Current task status
+    instructions:
+        Instructions for the task
+    schema:
+        Output schema defining the task
+    data:
+        JSON-serialisable answer generated by Exa (may be ``None`` until the task
+        completes).
+    citations:
+        Mapping from *root field* in the output schema to the list of search
+        results that were used to generate that part of the answer.
+    """
+    id: str
+    status: str
+    instructions: str
+    schema: Dict[str, Any]
+    data: Optional[Dict[str, Any]]
+    citations: Dict[str, List["_Result"]]
+    # ---------------------------------------------------------------------
+    # Pretty representation helpers
+    # ---------------------------------------------------------------------
+    def __str__(self) -> str:  # pragma: no cover – convenience only
+        """Human-readable representation including *all* relevant fields."""
+        schema_repr = json.dumps(self.schema, indent=2, ensure_ascii=False)
+        data_repr = (
+            json.dumps(self.data, indent=2, ensure_ascii=False)
+            if self.data is not None
+            else "None"
+        )
+        # Render citations grouped by the root field they belong to.
+        if self.citations:
+            # Each key is a root field, each value is a list of _Result objects.
+            citations_lines = []
+            for field, sources in self.citations.items():
+                rendered_sources = "\n    ".join(str(src) for src in sources)
+                citations_lines.append(f"{field}:\n    {rendered_sources}")
+            citations_str = "\n\n".join(citations_lines)
+        else:
+            citations_str = "None"
+        return (
+            f"ID: {self.id}\n"
+            f"Status: {self.status}\n"
+            f"Instructions: {self.instructions}\n"
+            f"Schema:\n{schema_repr}\n"
+            f"Data:\n{data_repr}\n\n"
+            f"Citations:\n{citations_str}"
+        )
+__all__ = [
+    "ResearchTaskId",
+    "ResearchTask",
+]

{exa_py-1.13.1 → exa_py-1.13.2}/exa_py/websets/client.py RENAMED Viewed

@@ -1,7 +1,6 @@
 from __future__ import annotations
 import time
-from datetime import datetime
 from typing import List, Optional, Literal, Dict, Any, Union
 from .types import (
@@ -17,6 +16,7 @@ from .items import WebsetItemsClient
 from .searches import WebsetSearchesClient
 from .enrichments import WebsetEnrichmentsClient
 from .webhooks import WebsetWebhooksClient
+from .streams import StreamsClient
 class WebsetsClient(WebsetsBaseClient):
     """Client for managing Websets."""
@@ -27,6 +27,7 @@ class WebsetsClient(WebsetsBaseClient):
         self.searches = WebsetSearchesClient(client)
         self.enrichments = WebsetEnrichmentsClient(client)
         self.webhooks = WebsetWebhooksClient(client)
+        self.streams = StreamsClient(client)
     def create(self, params: Union[Dict[str, Any], CreateWebsetParameters]) -> Webset:
         """Create a new Webset.

{exa_py-1.13.1 → exa_py-1.13.2}/exa_py/websets/core/base.py RENAMED Viewed

@@ -29,7 +29,7 @@ class ExaBaseModel(BaseModel):
         str_to_upper=False,  # Don't convert strings to uppercase
         from_attributes=True,  # Allow initialization from attributes
         validate_assignment=True,  # Validate on assignment
-        extra='forbid',  # Forbid extra fields
+        extra='allow',
         json_encoders={AnyUrl: str}  # Convert AnyUrl to string when serializing to JSON
     )
@@ -92,5 +92,9 @@ class WebsetsBaseClient:
             # If data is a model instance, convert it to a dict
             data = data.model_dump(by_alias=True, exclude_none=True)
+        # Ensure proper URL construction by removing leading slash from endpoint if present
+        if endpoint.startswith("/"):
+            endpoint = endpoint[1:]
         return self._client.request("/websets/" + endpoint, data=data, method=method, params=params)

exa_py-1.13.2/exa_py/websets/streams/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .client import StreamsClient
+from .runs import StreamRunsClient
+__all__ = ["StreamsClient", "StreamRunsClient"]

exa-py 1.13.1__tar.gz → 1.13.2__tar.gz

Potentially problematic release.

exa-py 1.13.1tar.gz → 1.13.2tar.gz