exa-py 1.13.0__py3-none-any.whl → 1.13.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of exa-py might be problematic. Click here for more details.
- exa_py/api.py +47 -125
- exa_py/research/__init__.py +9 -0
- exa_py/research/client.py +232 -0
- exa_py/research/models.py +98 -0
- exa_py/websets/_generator/pydantic/BaseModel.jinja2 +42 -0
- exa_py/websets/client.py +2 -1
- exa_py/websets/core/base.py +6 -2
- exa_py/websets/streams/__init__.py +4 -0
- exa_py/websets/streams/client.py +96 -0
- exa_py/websets/streams/runs/__init__.py +3 -0
- exa_py/websets/streams/runs/client.py +38 -0
- exa_py/websets/types.py +302 -49
- {exa_py-1.13.0.dist-info → exa_py-1.13.2.dist-info}/METADATA +54 -16
- exa_py-1.13.2.dist-info/RECORD +28 -0
- {exa_py-1.13.0.dist-info → exa_py-1.13.2.dist-info}/WHEEL +1 -2
- exa_py-1.13.0.dist-info/RECORD +0 -21
- exa_py-1.13.0.dist-info/top_level.txt +0 -1
exa_py/api.py
CHANGED
|
@@ -38,6 +38,7 @@ from exa_py.utils import (
|
|
|
38
38
|
)
|
|
39
39
|
from .websets import WebsetsClient
|
|
40
40
|
from .websets.core.base import ExaJSONEncoder
|
|
41
|
+
from .research.client import ResearchClient, AsyncResearchClient
|
|
41
42
|
|
|
42
43
|
is_beta = os.getenv("IS_BETA") == "True"
|
|
43
44
|
|
|
@@ -56,7 +57,7 @@ def snake_to_camel(snake_str: str) -> str:
|
|
|
56
57
|
return "$schema"
|
|
57
58
|
if snake_str == "not_":
|
|
58
59
|
return "not"
|
|
59
|
-
|
|
60
|
+
|
|
60
61
|
components = snake_str.split("_")
|
|
61
62
|
return components[0] + "".join(x.title() for x in components[1:])
|
|
62
63
|
|
|
@@ -261,6 +262,7 @@ class JSONSchema(TypedDict, total=False):
|
|
|
261
262
|
"""Represents a JSON Schema definition used for structured summary output.
|
|
262
263
|
To learn more visit https://json-schema.org/overview/what-is-jsonschema.
|
|
263
264
|
"""
|
|
265
|
+
|
|
264
266
|
schema_: str # This will be converted to "$schema" in JSON
|
|
265
267
|
title: str
|
|
266
268
|
description: str
|
|
@@ -288,7 +290,7 @@ class SummaryContentsOptions(TypedDict, total=False):
|
|
|
288
290
|
|
|
289
291
|
query: str
|
|
290
292
|
schema: JSONSchema
|
|
291
|
-
|
|
293
|
+
|
|
292
294
|
|
|
293
295
|
class ExtrasOptions(TypedDict, total=False):
|
|
294
296
|
"""A class representing additional extraction fields (e.g. links, images)"""
|
|
@@ -669,7 +671,7 @@ class AnswerResponse:
|
|
|
669
671
|
citations (List[AnswerResult]): A list of citations used to generate the answer.
|
|
670
672
|
"""
|
|
671
673
|
|
|
672
|
-
answer: str
|
|
674
|
+
answer: Union[str, dict[str, Any]]
|
|
673
675
|
citations: List[AnswerResult]
|
|
674
676
|
|
|
675
677
|
def __str__(self):
|
|
@@ -765,9 +767,9 @@ class AsyncStreamAnswerResponse:
|
|
|
765
767
|
content = chunk["choices"][0]["delta"].get("content")
|
|
766
768
|
|
|
767
769
|
if (
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
770
|
+
"citations" in chunk
|
|
771
|
+
and chunk["citations"]
|
|
772
|
+
and chunk["citations"] != "null"
|
|
771
773
|
):
|
|
772
774
|
citations = [
|
|
773
775
|
AnswerResult(**to_snake_case(s)) for s in chunk["citations"]
|
|
@@ -776,6 +778,7 @@ class AsyncStreamAnswerResponse:
|
|
|
776
778
|
stream_chunk = StreamChunk(content=content, citations=citations)
|
|
777
779
|
if stream_chunk.has_data():
|
|
778
780
|
yield stream_chunk
|
|
781
|
+
|
|
779
782
|
return generator()
|
|
780
783
|
|
|
781
784
|
def close(self) -> None:
|
|
@@ -834,36 +837,6 @@ def nest_fields(original_dict: Dict, fields_to_nest: List[str], new_key: str):
|
|
|
834
837
|
|
|
835
838
|
return original_dict
|
|
836
839
|
|
|
837
|
-
@dataclass
|
|
838
|
-
class ResearchTaskResponse:
|
|
839
|
-
"""A class representing the response for a research task.
|
|
840
|
-
|
|
841
|
-
Attributes:
|
|
842
|
-
id (str): The unique identifier for the research request.
|
|
843
|
-
status (str): Status of the research request.
|
|
844
|
-
output (Optional[Dict[str, Any]]): The answer structured as JSON, if available.
|
|
845
|
-
citations (Optional[Dict[str, List[_Result]]]): List of citations used to generate the answer, grouped by root field in the output schema.
|
|
846
|
-
"""
|
|
847
|
-
|
|
848
|
-
id: str
|
|
849
|
-
status: str
|
|
850
|
-
output: Optional[Dict[str, Any]]
|
|
851
|
-
citations: Dict[str, List[_Result]]
|
|
852
|
-
|
|
853
|
-
def __str__(self):
|
|
854
|
-
output_repr = (
|
|
855
|
-
json.dumps(self.output, indent=2, ensure_ascii=False)
|
|
856
|
-
if self.output is not None
|
|
857
|
-
else "None"
|
|
858
|
-
)
|
|
859
|
-
citations_str = "\n\n".join(str(src) for src in self.citations)
|
|
860
|
-
return (
|
|
861
|
-
f"ID: {self.id}\n"
|
|
862
|
-
f"Status: {self.status}\n"
|
|
863
|
-
f"Output: {output_repr}\n\n"
|
|
864
|
-
f"Citations:\n{citations_str}"
|
|
865
|
-
)
|
|
866
|
-
|
|
867
840
|
|
|
868
841
|
class Exa:
|
|
869
842
|
"""A client for interacting with Exa API."""
|
|
@@ -872,7 +845,7 @@ class Exa:
|
|
|
872
845
|
self,
|
|
873
846
|
api_key: Optional[str],
|
|
874
847
|
base_url: str = "https://api.exa.ai",
|
|
875
|
-
user_agent: str = "exa-py 1.12.
|
|
848
|
+
user_agent: str = "exa-py 1.12.4",
|
|
876
849
|
):
|
|
877
850
|
"""Initialize the Exa client with the provided API key and optional base URL and user agent.
|
|
878
851
|
|
|
@@ -889,10 +862,22 @@ class Exa:
|
|
|
889
862
|
"API key must be provided as an argument or in EXA_API_KEY environment variable"
|
|
890
863
|
)
|
|
891
864
|
self.base_url = base_url
|
|
892
|
-
self.headers = {
|
|
865
|
+
self.headers = {
|
|
866
|
+
"x-api-key": api_key,
|
|
867
|
+
"User-Agent": user_agent,
|
|
868
|
+
"Content-Type": "application/json",
|
|
869
|
+
}
|
|
893
870
|
self.websets = WebsetsClient(self)
|
|
871
|
+
# Research tasks client (new, mirrors Websets design)
|
|
872
|
+
self.research = ResearchClient(self)
|
|
894
873
|
|
|
895
|
-
def request(
|
|
874
|
+
def request(
|
|
875
|
+
self,
|
|
876
|
+
endpoint: str,
|
|
877
|
+
data: Optional[Union[Dict[str, Any], str]] = None,
|
|
878
|
+
method: str = "POST",
|
|
879
|
+
params: Optional[Dict[str, Any]] = None,
|
|
880
|
+
) -> Union[Dict[str, Any], requests.Response]:
|
|
896
881
|
"""Send a request to the Exa API, optionally streaming if data['stream'] is True.
|
|
897
882
|
|
|
898
883
|
Args:
|
|
@@ -915,13 +900,13 @@ class Exa:
|
|
|
915
900
|
else:
|
|
916
901
|
# Otherwise, serialize the dictionary to JSON if it exists
|
|
917
902
|
json_data = json.dumps(data, cls=ExaJSONEncoder) if data else None
|
|
918
|
-
|
|
903
|
+
|
|
919
904
|
if data and data.get("stream"):
|
|
920
905
|
res = requests.post(
|
|
921
|
-
self.base_url + endpoint,
|
|
906
|
+
self.base_url + endpoint,
|
|
922
907
|
data=json_data,
|
|
923
|
-
headers=self.headers,
|
|
924
|
-
stream=True
|
|
908
|
+
headers=self.headers,
|
|
909
|
+
stream=True,
|
|
925
910
|
)
|
|
926
911
|
return res
|
|
927
912
|
|
|
@@ -931,20 +916,14 @@ class Exa:
|
|
|
931
916
|
)
|
|
932
917
|
elif method.upper() == "POST":
|
|
933
918
|
res = requests.post(
|
|
934
|
-
self.base_url + endpoint,
|
|
935
|
-
data=json_data,
|
|
936
|
-
headers=self.headers
|
|
919
|
+
self.base_url + endpoint, data=json_data, headers=self.headers
|
|
937
920
|
)
|
|
938
921
|
elif method.upper() == "PATCH":
|
|
939
922
|
res = requests.patch(
|
|
940
|
-
self.base_url + endpoint,
|
|
941
|
-
data=json_data,
|
|
942
|
-
headers=self.headers
|
|
923
|
+
self.base_url + endpoint, data=json_data, headers=self.headers
|
|
943
924
|
)
|
|
944
925
|
elif method.upper() == "DELETE":
|
|
945
|
-
res = requests.delete(
|
|
946
|
-
self.base_url + endpoint, headers=self.headers
|
|
947
|
-
)
|
|
926
|
+
res = requests.delete(self.base_url + endpoint, headers=self.headers)
|
|
948
927
|
else:
|
|
949
928
|
raise ValueError(f"Unsupported HTTP method: {method}")
|
|
950
929
|
|
|
@@ -1875,6 +1854,7 @@ class Exa:
|
|
|
1875
1854
|
text: Optional[bool] = False,
|
|
1876
1855
|
system_prompt: Optional[str] = None,
|
|
1877
1856
|
model: Optional[Literal["exa", "exa-pro"]] = None,
|
|
1857
|
+
output_schema: Optional[dict[str, Any]] = None,
|
|
1878
1858
|
) -> Union[AnswerResponse, StreamAnswerResponse]: ...
|
|
1879
1859
|
|
|
1880
1860
|
def answer(
|
|
@@ -1885,6 +1865,7 @@ class Exa:
|
|
|
1885
1865
|
text: Optional[bool] = False,
|
|
1886
1866
|
system_prompt: Optional[str] = None,
|
|
1887
1867
|
model: Optional[Literal["exa", "exa-pro"]] = None,
|
|
1868
|
+
output_schema: Optional[dict[str, Any]] = None,
|
|
1888
1869
|
) -> Union[AnswerResponse, StreamAnswerResponse]:
|
|
1889
1870
|
"""Generate an answer to a query using Exa's search and LLM capabilities.
|
|
1890
1871
|
|
|
@@ -1893,6 +1874,7 @@ class Exa:
|
|
|
1893
1874
|
text (bool, optional): Whether to include full text in the results. Defaults to False.
|
|
1894
1875
|
system_prompt (str, optional): A system prompt to guide the LLM's behavior when generating the answer.
|
|
1895
1876
|
model (str, optional): The model to use for answering. Either "exa" or "exa-pro". Defaults to None.
|
|
1877
|
+
output_schema (dict[str, Any], optional): JSON schema describing the desired answer structure.
|
|
1896
1878
|
|
|
1897
1879
|
Returns:
|
|
1898
1880
|
AnswerResponse: An object containing the answer and citations.
|
|
@@ -1922,6 +1904,7 @@ class Exa:
|
|
|
1922
1904
|
text: bool = False,
|
|
1923
1905
|
system_prompt: Optional[str] = None,
|
|
1924
1906
|
model: Optional[Literal["exa", "exa-pro"]] = None,
|
|
1907
|
+
output_schema: Optional[dict[str, Any]] = None,
|
|
1925
1908
|
) -> StreamAnswerResponse:
|
|
1926
1909
|
"""Generate a streaming answer response.
|
|
1927
1910
|
|
|
@@ -1930,7 +1913,7 @@ class Exa:
|
|
|
1930
1913
|
text (bool): Whether to include full text in the results. Defaults to False.
|
|
1931
1914
|
system_prompt (str, optional): A system prompt to guide the LLM's behavior when generating the answer.
|
|
1932
1915
|
model (str, optional): The model to use for answering. Either "exa" or "exa-pro". Defaults to None.
|
|
1933
|
-
|
|
1916
|
+
output_schema (dict[str, Any], optional): JSON schema describing the desired answer structure.
|
|
1934
1917
|
Returns:
|
|
1935
1918
|
StreamAnswerResponse: An object that can be iterated over to retrieve (partial text, partial citations).
|
|
1936
1919
|
Each iteration yields a tuple of (Optional[str], Optional[List[AnswerResult]]).
|
|
@@ -1941,40 +1924,12 @@ class Exa:
|
|
|
1941
1924
|
raw_response = self.request("/answer", options)
|
|
1942
1925
|
return StreamAnswerResponse(raw_response)
|
|
1943
1926
|
|
|
1944
|
-
def researchTask(
|
|
1945
|
-
self,
|
|
1946
|
-
*,
|
|
1947
|
-
input_instructions: str,
|
|
1948
|
-
output_schema: Dict[str, Any],
|
|
1949
|
-
) -> ResearchTaskResponse:
|
|
1950
|
-
"""Submit a research request to Exa.
|
|
1951
|
-
|
|
1952
|
-
Args:
|
|
1953
|
-
input_instructions (str): The instructions for the research task.
|
|
1954
|
-
output_schema (Dict[str, Any]): JSON schema describing the desired answer structure.
|
|
1955
|
-
"""
|
|
1956
|
-
# Build the request payload expected by the Exa API
|
|
1957
|
-
options = {
|
|
1958
|
-
"input": {"instructions": input_instructions},
|
|
1959
|
-
"output": {"schema": output_schema},
|
|
1960
|
-
}
|
|
1961
|
-
|
|
1962
|
-
response = self.request("/research/tasks", options)
|
|
1963
|
-
|
|
1964
|
-
return ResearchTaskResponse(
|
|
1965
|
-
id=response["id"],
|
|
1966
|
-
status=response["status"],
|
|
1967
|
-
output=response.get("output"),
|
|
1968
|
-
citations={
|
|
1969
|
-
key: [_Result(**to_snake_case(citation)) for citation in citations_list]
|
|
1970
|
-
for key, citations_list in response.get("citations", {}).items()
|
|
1971
|
-
},
|
|
1972
|
-
)
|
|
1973
|
-
|
|
1974
1927
|
|
|
1975
1928
|
class AsyncExa(Exa):
|
|
1976
1929
|
def __init__(self, api_key: str, api_base: str = "https://api.exa.ai"):
|
|
1977
1930
|
super().__init__(api_key, api_base)
|
|
1931
|
+
# Override the synchronous ResearchClient with its async counterpart.
|
|
1932
|
+
self.research = AsyncResearchClient(self)
|
|
1978
1933
|
self._client = None
|
|
1979
1934
|
|
|
1980
1935
|
@property
|
|
@@ -1982,9 +1937,7 @@ class AsyncExa(Exa):
|
|
|
1982
1937
|
# this may only be a
|
|
1983
1938
|
if self._client is None:
|
|
1984
1939
|
self._client = httpx.AsyncClient(
|
|
1985
|
-
base_url=self.base_url,
|
|
1986
|
-
headers=self.headers,
|
|
1987
|
-
timeout=60
|
|
1940
|
+
base_url=self.base_url, headers=self.headers, timeout=60
|
|
1988
1941
|
)
|
|
1989
1942
|
return self._client
|
|
1990
1943
|
|
|
@@ -2004,15 +1957,14 @@ class AsyncExa(Exa):
|
|
|
2004
1957
|
"""
|
|
2005
1958
|
if data.get("stream"):
|
|
2006
1959
|
request = httpx.Request(
|
|
2007
|
-
|
|
2008
|
-
self.base_url + endpoint,
|
|
2009
|
-
json=data,
|
|
2010
|
-
headers=self.headers
|
|
1960
|
+
"POST", self.base_url + endpoint, json=data, headers=self.headers
|
|
2011
1961
|
)
|
|
2012
1962
|
res = await self.client.send(request, stream=True)
|
|
2013
1963
|
return res
|
|
2014
1964
|
|
|
2015
|
-
res = await self.client.post(
|
|
1965
|
+
res = await self.client.post(
|
|
1966
|
+
self.base_url + endpoint, json=data, headers=self.headers
|
|
1967
|
+
)
|
|
2016
1968
|
if res.status_code != 200:
|
|
2017
1969
|
raise ValueError(
|
|
2018
1970
|
f"Request failed with status code {res.status_code}: {res.text}"
|
|
@@ -2250,6 +2202,7 @@ class AsyncExa(Exa):
|
|
|
2250
2202
|
text: Optional[bool] = False,
|
|
2251
2203
|
system_prompt: Optional[str] = None,
|
|
2252
2204
|
model: Optional[Literal["exa", "exa-pro"]] = None,
|
|
2205
|
+
output_schema: Optional[dict[str, Any]] = None,
|
|
2253
2206
|
) -> Union[AnswerResponse, StreamAnswerResponse]:
|
|
2254
2207
|
"""Generate an answer to a query using Exa's search and LLM capabilities.
|
|
2255
2208
|
|
|
@@ -2258,6 +2211,7 @@ class AsyncExa(Exa):
|
|
|
2258
2211
|
text (bool, optional): Whether to include full text in the results. Defaults to False.
|
|
2259
2212
|
system_prompt (str, optional): A system prompt to guide the LLM's behavior when generating the answer.
|
|
2260
2213
|
model (str, optional): The model to use for answering. Either "exa" or "exa-pro". Defaults to None.
|
|
2214
|
+
output_schema (dict[str, Any], optional): JSON schema describing the desired answer structure.
|
|
2261
2215
|
|
|
2262
2216
|
Returns:
|
|
2263
2217
|
AnswerResponse: An object containing the answer and citations.
|
|
@@ -2287,6 +2241,7 @@ class AsyncExa(Exa):
|
|
|
2287
2241
|
text: bool = False,
|
|
2288
2242
|
system_prompt: Optional[str] = None,
|
|
2289
2243
|
model: Optional[Literal["exa", "exa-pro"]] = None,
|
|
2244
|
+
output_schema: Optional[dict[str, Any]] = None,
|
|
2290
2245
|
) -> AsyncStreamAnswerResponse:
|
|
2291
2246
|
"""Generate a streaming answer response.
|
|
2292
2247
|
|
|
@@ -2295,7 +2250,7 @@ class AsyncExa(Exa):
|
|
|
2295
2250
|
text (bool): Whether to include full text in the results. Defaults to False.
|
|
2296
2251
|
system_prompt (str, optional): A system prompt to guide the LLM's behavior when generating the answer.
|
|
2297
2252
|
model (str, optional): The model to use for answering. Either "exa" or "exa-pro". Defaults to None.
|
|
2298
|
-
|
|
2253
|
+
output_schema (dict[str, Any], optional): JSON schema describing the desired answer structure.
|
|
2299
2254
|
Returns:
|
|
2300
2255
|
AsyncStreamAnswerResponse: An object that can be iterated over to retrieve (partial text, partial citations).
|
|
2301
2256
|
Each iteration yields a tuple of (Optional[str], Optional[List[AnswerResult]]).
|
|
@@ -2305,36 +2260,3 @@ class AsyncExa(Exa):
|
|
|
2305
2260
|
options["stream"] = True
|
|
2306
2261
|
raw_response = await self.async_request("/answer", options)
|
|
2307
2262
|
return AsyncStreamAnswerResponse(raw_response)
|
|
2308
|
-
|
|
2309
|
-
async def researchTask(
|
|
2310
|
-
self,
|
|
2311
|
-
*,
|
|
2312
|
-
input_instructions: str,
|
|
2313
|
-
output_schema: Dict[str, Any],
|
|
2314
|
-
) -> ResearchTaskResponse:
|
|
2315
|
-
"""Asynchronously submit a research request to Exa.
|
|
2316
|
-
|
|
2317
|
-
Args:
|
|
2318
|
-
input_instructions (str): The instructions for the research task.
|
|
2319
|
-
output_schema (Dict[str, Any]): JSON schema describing the desired answer structure.
|
|
2320
|
-
|
|
2321
|
-
Returns:
|
|
2322
|
-
ResearchTaskResponse: The parsed response from the Exa API.
|
|
2323
|
-
"""
|
|
2324
|
-
# Build the request payload expected by the Exa API
|
|
2325
|
-
options = {
|
|
2326
|
-
"input": {"instructions": input_instructions},
|
|
2327
|
-
"output": {"schema": output_schema},
|
|
2328
|
-
}
|
|
2329
|
-
|
|
2330
|
-
response = await self.async_request("/research/tasks", options)
|
|
2331
|
-
|
|
2332
|
-
return ResearchTaskResponse(
|
|
2333
|
-
id=response["id"],
|
|
2334
|
-
status=response["status"],
|
|
2335
|
-
output=response.get("output"),
|
|
2336
|
-
citations={
|
|
2337
|
-
key: [_Result(**to_snake_case(citation)) for citation in citations_list]
|
|
2338
|
-
for key, citations_list in response.get("citations", {}).items()
|
|
2339
|
-
},
|
|
2340
|
-
)
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
"""Lightweight research client wrappers for the Exa REST API.
|
|
2
|
+
|
|
3
|
+
This module purposefully keeps its import surface minimal to avoid circular
|
|
4
|
+
import problems with :pymod:`exa_py.api`. Any heavy dependencies (including
|
|
5
|
+
`exa_py.api` itself) are imported lazily **inside** functions. This means
|
|
6
|
+
that type-checkers still see the full, precise types via the ``TYPE_CHECKING``
|
|
7
|
+
block, but at runtime we only pay the cost if/when a helper is actually used.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from typing import TYPE_CHECKING, Any, Dict
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING: # pragma: no cover – only for static analysers
|
|
15
|
+
# Import with full type info when static type-checking. `_Result` still
|
|
16
|
+
# lives in ``exa_py.api`` but the response model moved to
|
|
17
|
+
# ``exa_py.research.models``.
|
|
18
|
+
from ..api import _Result # noqa: F401
|
|
19
|
+
from .models import ResearchTask, ResearchTaskId # noqa: F401
|
|
20
|
+
|
|
21
|
+
# ---------------------------------------------------------------------------
|
|
22
|
+
# Public, user-facing clients
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ResearchClient:
|
|
27
|
+
"""Synchronous helper namespace accessed via :pyattr:`Exa.research`."""
|
|
28
|
+
|
|
29
|
+
def __init__(self, parent_client):
|
|
30
|
+
# A reference to the *already-constructed* ``Exa`` instance so that we
|
|
31
|
+
# can piggy-back on its HTTP plumbing (headers, base URL, retries, …).
|
|
32
|
+
self._client = parent_client
|
|
33
|
+
|
|
34
|
+
def create_task(
|
|
35
|
+
self,
|
|
36
|
+
*,
|
|
37
|
+
input_instructions: str,
|
|
38
|
+
output_schema: Dict[str, Any],
|
|
39
|
+
) -> "ResearchTaskId":
|
|
40
|
+
"""Submit a research request and return the *task identifier*."""
|
|
41
|
+
payload = {
|
|
42
|
+
"input": {"instructions": input_instructions},
|
|
43
|
+
"output": {"schema": output_schema},
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
raw_response: Dict[str, Any] = self._client.request("/research/tasks", payload)
|
|
47
|
+
|
|
48
|
+
# Defensive checks so that we fail loudly if the contract changes.
|
|
49
|
+
if not isinstance(raw_response, dict) or "id" not in raw_response:
|
|
50
|
+
raise RuntimeError(
|
|
51
|
+
f"Unexpected response while creating research task: {raw_response}"
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# Lazily import to avoid circular deps at runtime.
|
|
55
|
+
from .models import ResearchTaskId # noqa: WPS433 – runtime import
|
|
56
|
+
|
|
57
|
+
return ResearchTaskId(id=raw_response["id"])
|
|
58
|
+
|
|
59
|
+
def get_task(
|
|
60
|
+
self, id: str
|
|
61
|
+
) -> "ResearchTask": # noqa: D401 – imperative mood is fine
|
|
62
|
+
"""Fetch the current status / result for a research task."""
|
|
63
|
+
endpoint = f"/research/tasks/{id}"
|
|
64
|
+
|
|
65
|
+
# The new endpoint is a simple GET.
|
|
66
|
+
raw_response: Dict[str, Any] = self._client.request(endpoint, method="GET")
|
|
67
|
+
|
|
68
|
+
return _build_research_task(raw_response)
|
|
69
|
+
|
|
70
|
+
# ------------------------------------------------------------------
|
|
71
|
+
# Convenience helpers
|
|
72
|
+
# ------------------------------------------------------------------
|
|
73
|
+
|
|
74
|
+
def poll_task(
|
|
75
|
+
self,
|
|
76
|
+
id: str,
|
|
77
|
+
*,
|
|
78
|
+
poll_interval: float = 1.0,
|
|
79
|
+
timeout_seconds: int = 15 * 60,
|
|
80
|
+
) -> "ResearchTask":
|
|
81
|
+
"""Blocking helper that polls until task completes or fails.
|
|
82
|
+
|
|
83
|
+
Parameters
|
|
84
|
+
----------
|
|
85
|
+
id:
|
|
86
|
+
The ID of the research task to poll.
|
|
87
|
+
poll_interval:
|
|
88
|
+
Seconds to wait between successive polls (default 1s).
|
|
89
|
+
timeout_seconds:
|
|
90
|
+
Maximum time to wait before raising :class:`TimeoutError` (default 15 min).
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
import time
|
|
94
|
+
|
|
95
|
+
deadline = time.monotonic() + timeout_seconds
|
|
96
|
+
|
|
97
|
+
while True:
|
|
98
|
+
task = self.get_task(id)
|
|
99
|
+
status = task.status.lower() if isinstance(task.status, str) else ""
|
|
100
|
+
|
|
101
|
+
if status in {"completed", "failed", "complete", "finished", "done"}:
|
|
102
|
+
return task
|
|
103
|
+
|
|
104
|
+
if time.monotonic() > deadline:
|
|
105
|
+
raise TimeoutError(
|
|
106
|
+
f"Research task {id} did not finish within {timeout_seconds} seconds"
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
time.sleep(poll_interval)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class AsyncResearchClient:
|
|
113
|
+
"""Async counterpart used via :pyattr:`AsyncExa.research`."""
|
|
114
|
+
|
|
115
|
+
def __init__(self, parent_client):
|
|
116
|
+
self._client = parent_client
|
|
117
|
+
|
|
118
|
+
async def create_task(
|
|
119
|
+
self,
|
|
120
|
+
*,
|
|
121
|
+
input_instructions: str,
|
|
122
|
+
output_schema: Dict[str, Any],
|
|
123
|
+
) -> "ResearchTaskId":
|
|
124
|
+
"""Submit a research request and return the *task identifier* (async)."""
|
|
125
|
+
|
|
126
|
+
payload = {
|
|
127
|
+
"input": {"instructions": input_instructions},
|
|
128
|
+
"output": {"schema": output_schema},
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
raw_response: Dict[str, Any] = await self._client.async_request(
|
|
132
|
+
"/research/tasks", payload
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Defensive checks so that we fail loudly if the contract changes.
|
|
136
|
+
if not isinstance(raw_response, dict) or "id" not in raw_response:
|
|
137
|
+
raise RuntimeError(
|
|
138
|
+
f"Unexpected response while creating research task: {raw_response}"
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# Lazily import to avoid circular deps at runtime.
|
|
142
|
+
from .models import ResearchTaskId # noqa: WPS433 – runtime import
|
|
143
|
+
|
|
144
|
+
return ResearchTaskId(id=raw_response["id"])
|
|
145
|
+
|
|
146
|
+
async def get_task(self, id: str) -> "ResearchTask": # noqa: D401
|
|
147
|
+
"""Fetch the current status / result for a research task (async)."""
|
|
148
|
+
|
|
149
|
+
endpoint = f"/research/tasks/{id}"
|
|
150
|
+
|
|
151
|
+
# Perform GET using the underlying HTTP client because `async_request`
|
|
152
|
+
# only supports POST semantics.
|
|
153
|
+
resp = await self._client.client.get(
|
|
154
|
+
self._client.base_url + endpoint, headers=self._client.headers
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
if resp.status_code >= 400:
|
|
158
|
+
raise RuntimeError(
|
|
159
|
+
f"Request failed with status code {resp.status_code}: {resp.text}"
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
raw_response: Dict[str, Any] = resp.json()
|
|
163
|
+
|
|
164
|
+
return _build_research_task(raw_response)
|
|
165
|
+
|
|
166
|
+
# ------------------------------------------------------------------
|
|
167
|
+
# Convenience helpers
|
|
168
|
+
# ------------------------------------------------------------------
|
|
169
|
+
|
|
170
|
+
async def poll_task(
|
|
171
|
+
self,
|
|
172
|
+
id: str,
|
|
173
|
+
*,
|
|
174
|
+
poll_interval: float = 1.0,
|
|
175
|
+
timeout_seconds: int = 15 * 60,
|
|
176
|
+
) -> "ResearchTask":
|
|
177
|
+
"""Async helper that polls until task completes or fails.
|
|
178
|
+
|
|
179
|
+
Mirrors :py:meth:`ResearchClient.poll_task` but uses ``await`` and
|
|
180
|
+
:pyfunc:`asyncio.sleep`. Raises :class:`TimeoutError` on timeout.
|
|
181
|
+
"""
|
|
182
|
+
|
|
183
|
+
import asyncio
|
|
184
|
+
import time
|
|
185
|
+
|
|
186
|
+
deadline = time.monotonic() + timeout_seconds
|
|
187
|
+
|
|
188
|
+
while True:
|
|
189
|
+
task = await self.get_task(id)
|
|
190
|
+
status = task.status.lower() if isinstance(task.status, str) else ""
|
|
191
|
+
|
|
192
|
+
if status in {"completed", "failed", "complete", "finished", "done"}:
|
|
193
|
+
return task
|
|
194
|
+
|
|
195
|
+
if time.monotonic() > deadline:
|
|
196
|
+
raise TimeoutError(
|
|
197
|
+
f"Research task {id} did not finish within {timeout_seconds} seconds"
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
await asyncio.sleep(poll_interval)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
# ---------------------------------------------------------------------------
|
|
204
|
+
# Internal helpers (lazy imports to avoid cycles)
|
|
205
|
+
# ---------------------------------------------------------------------------
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _build_research_task(raw: Dict[str, Any]):
|
|
209
|
+
"""Convert raw API response into a :class:`ResearchTask` instance."""
|
|
210
|
+
|
|
211
|
+
# Defensive check – fail loudly if the API contract changes.
|
|
212
|
+
if not isinstance(raw, dict) or "id" not in raw:
|
|
213
|
+
raise RuntimeError(f"Unexpected response while fetching research task: {raw}")
|
|
214
|
+
|
|
215
|
+
# Lazily import heavy deps to avoid cycles and unnecessary startup cost.
|
|
216
|
+
from .models import ResearchTask # noqa: WPS433 – runtime import
|
|
217
|
+
from ..api import _Result, to_snake_case # noqa: WPS433 – runtime import
|
|
218
|
+
|
|
219
|
+
citations_raw = raw.get("citations", {}) or {}
|
|
220
|
+
citations_parsed = {
|
|
221
|
+
key: [_Result(**to_snake_case(c)) for c in cites]
|
|
222
|
+
for key, cites in citations_raw.items()
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
return ResearchTask(
|
|
226
|
+
id=raw["id"],
|
|
227
|
+
status=raw["status"],
|
|
228
|
+
instructions=raw.get("instructions", ""),
|
|
229
|
+
schema=raw.get("schema", {}),
|
|
230
|
+
data=raw.get("data"),
|
|
231
|
+
citations=citations_parsed,
|
|
232
|
+
)
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any, Dict, List, Optional
|
|
6
|
+
|
|
7
|
+
# Local import placed inside TYPE_CHECKING block to avoid runtime cycles.
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING: # pragma: no cover – for static analysers only
|
|
11
|
+
from ..api import _Result # noqa: F401
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class ResearchTaskId:
|
|
16
|
+
"""Structured research task ID.
|
|
17
|
+
|
|
18
|
+
Attributes
|
|
19
|
+
----------
|
|
20
|
+
id:
|
|
21
|
+
Unique identifier for the research task.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
id: str
|
|
25
|
+
|
|
26
|
+
# ---------------------------------------------------------------------
|
|
27
|
+
# Pretty representation helpers
|
|
28
|
+
# ---------------------------------------------------------------------
|
|
29
|
+
def __str__(self) -> str: # pragma: no cover – convenience only
|
|
30
|
+
return f"ID: {self.id}\n"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class ResearchTask:
|
|
35
|
+
"""Structured research task.
|
|
36
|
+
|
|
37
|
+
Attributes
|
|
38
|
+
----------
|
|
39
|
+
id:
|
|
40
|
+
Unique identifier for the research task.
|
|
41
|
+
status:
|
|
42
|
+
Current task status
|
|
43
|
+
instructions:
|
|
44
|
+
Instructions for the task
|
|
45
|
+
schema:
|
|
46
|
+
Output schema defining the task
|
|
47
|
+
data:
|
|
48
|
+
JSON-serialisable answer generated by Exa (may be ``None`` until the task
|
|
49
|
+
completes).
|
|
50
|
+
citations:
|
|
51
|
+
Mapping from *root field* in the output schema to the list of search
|
|
52
|
+
results that were used to generate that part of the answer.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
id: str
|
|
56
|
+
status: str
|
|
57
|
+
instructions: str
|
|
58
|
+
schema: Dict[str, Any]
|
|
59
|
+
data: Optional[Dict[str, Any]]
|
|
60
|
+
citations: Dict[str, List["_Result"]]
|
|
61
|
+
|
|
62
|
+
# ---------------------------------------------------------------------
|
|
63
|
+
# Pretty representation helpers
|
|
64
|
+
# ---------------------------------------------------------------------
|
|
65
|
+
def __str__(self) -> str: # pragma: no cover – convenience only
|
|
66
|
+
"""Human-readable representation including *all* relevant fields."""
|
|
67
|
+
schema_repr = json.dumps(self.schema, indent=2, ensure_ascii=False)
|
|
68
|
+
data_repr = (
|
|
69
|
+
json.dumps(self.data, indent=2, ensure_ascii=False)
|
|
70
|
+
if self.data is not None
|
|
71
|
+
else "None"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# Render citations grouped by the root field they belong to.
|
|
75
|
+
if self.citations:
|
|
76
|
+
# Each key is a root field, each value is a list of _Result objects.
|
|
77
|
+
citations_lines = []
|
|
78
|
+
for field, sources in self.citations.items():
|
|
79
|
+
rendered_sources = "\n ".join(str(src) for src in sources)
|
|
80
|
+
citations_lines.append(f"{field}:\n {rendered_sources}")
|
|
81
|
+
citations_str = "\n\n".join(citations_lines)
|
|
82
|
+
else:
|
|
83
|
+
citations_str = "None"
|
|
84
|
+
|
|
85
|
+
return (
|
|
86
|
+
f"ID: {self.id}\n"
|
|
87
|
+
f"Status: {self.status}\n"
|
|
88
|
+
f"Instructions: {self.instructions}\n"
|
|
89
|
+
f"Schema:\n{schema_repr}\n"
|
|
90
|
+
f"Data:\n{data_repr}\n\n"
|
|
91
|
+
f"Citations:\n{citations_str}"
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
__all__ = [
|
|
96
|
+
"ResearchTaskId",
|
|
97
|
+
"ResearchTask",
|
|
98
|
+
]
|