exa-py 1.12.1__py3-none-any.whl → 1.12.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of exa-py might be problematic. Click here for more details.
- exa_py/api.py +48 -32
- exa_py/research/__init__.py +9 -0
- exa_py/research/client.py +232 -0
- exa_py/research/models.py +98 -0
- {exa_py-1.12.1.dist-info → exa_py-1.12.4.dist-info}/METADATA +42 -20
- {exa_py-1.12.1.dist-info → exa_py-1.12.4.dist-info}/RECORD +7 -4
- {exa_py-1.12.1.dist-info → exa_py-1.12.4.dist-info}/WHEEL +1 -1
exa_py/api.py
CHANGED
|
@@ -38,6 +38,7 @@ from exa_py.utils import (
|
|
|
38
38
|
)
|
|
39
39
|
from .websets import WebsetsClient
|
|
40
40
|
from .websets.core.base import ExaJSONEncoder
|
|
41
|
+
from .research.client import ResearchClient, AsyncResearchClient
|
|
41
42
|
|
|
42
43
|
is_beta = os.getenv("IS_BETA") == "True"
|
|
43
44
|
|
|
@@ -56,7 +57,7 @@ def snake_to_camel(snake_str: str) -> str:
|
|
|
56
57
|
return "$schema"
|
|
57
58
|
if snake_str == "not_":
|
|
58
59
|
return "not"
|
|
59
|
-
|
|
60
|
+
|
|
60
61
|
components = snake_str.split("_")
|
|
61
62
|
return components[0] + "".join(x.title() for x in components[1:])
|
|
62
63
|
|
|
@@ -261,6 +262,7 @@ class JSONSchema(TypedDict, total=False):
|
|
|
261
262
|
"""Represents a JSON Schema definition used for structured summary output.
|
|
262
263
|
To learn more visit https://json-schema.org/overview/what-is-jsonschema.
|
|
263
264
|
"""
|
|
265
|
+
|
|
264
266
|
schema_: str # This will be converted to "$schema" in JSON
|
|
265
267
|
title: str
|
|
266
268
|
description: str
|
|
@@ -288,7 +290,7 @@ class SummaryContentsOptions(TypedDict, total=False):
|
|
|
288
290
|
|
|
289
291
|
query: str
|
|
290
292
|
schema: JSONSchema
|
|
291
|
-
|
|
293
|
+
|
|
292
294
|
|
|
293
295
|
class ExtrasOptions(TypedDict, total=False):
|
|
294
296
|
"""A class representing additional extraction fields (e.g. links, images)"""
|
|
@@ -669,7 +671,7 @@ class AnswerResponse:
|
|
|
669
671
|
citations (List[AnswerResult]): A list of citations used to generate the answer.
|
|
670
672
|
"""
|
|
671
673
|
|
|
672
|
-
answer: str
|
|
674
|
+
answer: Union[str, dict[str, Any]]
|
|
673
675
|
citations: List[AnswerResult]
|
|
674
676
|
|
|
675
677
|
def __str__(self):
|
|
@@ -765,9 +767,9 @@ class AsyncStreamAnswerResponse:
|
|
|
765
767
|
content = chunk["choices"][0]["delta"].get("content")
|
|
766
768
|
|
|
767
769
|
if (
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
770
|
+
"citations" in chunk
|
|
771
|
+
and chunk["citations"]
|
|
772
|
+
and chunk["citations"] != "null"
|
|
771
773
|
):
|
|
772
774
|
citations = [
|
|
773
775
|
AnswerResult(**to_snake_case(s)) for s in chunk["citations"]
|
|
@@ -776,6 +778,7 @@ class AsyncStreamAnswerResponse:
|
|
|
776
778
|
stream_chunk = StreamChunk(content=content, citations=citations)
|
|
777
779
|
if stream_chunk.has_data():
|
|
778
780
|
yield stream_chunk
|
|
781
|
+
|
|
779
782
|
return generator()
|
|
780
783
|
|
|
781
784
|
def close(self) -> None:
|
|
@@ -842,7 +845,7 @@ class Exa:
|
|
|
842
845
|
self,
|
|
843
846
|
api_key: Optional[str],
|
|
844
847
|
base_url: str = "https://api.exa.ai",
|
|
845
|
-
user_agent: str = "exa-py 1.12.
|
|
848
|
+
user_agent: str = "exa-py 1.12.4",
|
|
846
849
|
):
|
|
847
850
|
"""Initialize the Exa client with the provided API key and optional base URL and user agent.
|
|
848
851
|
|
|
@@ -859,10 +862,22 @@ class Exa:
|
|
|
859
862
|
"API key must be provided as an argument or in EXA_API_KEY environment variable"
|
|
860
863
|
)
|
|
861
864
|
self.base_url = base_url
|
|
862
|
-
self.headers = {
|
|
865
|
+
self.headers = {
|
|
866
|
+
"x-api-key": api_key,
|
|
867
|
+
"User-Agent": user_agent,
|
|
868
|
+
"Content-Type": "application/json",
|
|
869
|
+
}
|
|
863
870
|
self.websets = WebsetsClient(self)
|
|
871
|
+
# Research tasks client (new, mirrors Websets design)
|
|
872
|
+
self.research = ResearchClient(self)
|
|
864
873
|
|
|
865
|
-
def request(
|
|
874
|
+
def request(
|
|
875
|
+
self,
|
|
876
|
+
endpoint: str,
|
|
877
|
+
data: Optional[Union[Dict[str, Any], str]] = None,
|
|
878
|
+
method: str = "POST",
|
|
879
|
+
params: Optional[Dict[str, Any]] = None,
|
|
880
|
+
) -> Union[Dict[str, Any], requests.Response]:
|
|
866
881
|
"""Send a request to the Exa API, optionally streaming if data['stream'] is True.
|
|
867
882
|
|
|
868
883
|
Args:
|
|
@@ -885,13 +900,13 @@ class Exa:
|
|
|
885
900
|
else:
|
|
886
901
|
# Otherwise, serialize the dictionary to JSON if it exists
|
|
887
902
|
json_data = json.dumps(data, cls=ExaJSONEncoder) if data else None
|
|
888
|
-
|
|
903
|
+
|
|
889
904
|
if data and data.get("stream"):
|
|
890
905
|
res = requests.post(
|
|
891
|
-
self.base_url + endpoint,
|
|
906
|
+
self.base_url + endpoint,
|
|
892
907
|
data=json_data,
|
|
893
|
-
headers=self.headers,
|
|
894
|
-
stream=True
|
|
908
|
+
headers=self.headers,
|
|
909
|
+
stream=True,
|
|
895
910
|
)
|
|
896
911
|
return res
|
|
897
912
|
|
|
@@ -901,20 +916,14 @@ class Exa:
|
|
|
901
916
|
)
|
|
902
917
|
elif method.upper() == "POST":
|
|
903
918
|
res = requests.post(
|
|
904
|
-
self.base_url + endpoint,
|
|
905
|
-
data=json_data,
|
|
906
|
-
headers=self.headers
|
|
919
|
+
self.base_url + endpoint, data=json_data, headers=self.headers
|
|
907
920
|
)
|
|
908
921
|
elif method.upper() == "PATCH":
|
|
909
922
|
res = requests.patch(
|
|
910
|
-
self.base_url + endpoint,
|
|
911
|
-
data=json_data,
|
|
912
|
-
headers=self.headers
|
|
923
|
+
self.base_url + endpoint, data=json_data, headers=self.headers
|
|
913
924
|
)
|
|
914
925
|
elif method.upper() == "DELETE":
|
|
915
|
-
res = requests.delete(
|
|
916
|
-
self.base_url + endpoint, headers=self.headers
|
|
917
|
-
)
|
|
926
|
+
res = requests.delete(self.base_url + endpoint, headers=self.headers)
|
|
918
927
|
else:
|
|
919
928
|
raise ValueError(f"Unsupported HTTP method: {method}")
|
|
920
929
|
|
|
@@ -1845,6 +1854,7 @@ class Exa:
|
|
|
1845
1854
|
text: Optional[bool] = False,
|
|
1846
1855
|
system_prompt: Optional[str] = None,
|
|
1847
1856
|
model: Optional[Literal["exa", "exa-pro"]] = None,
|
|
1857
|
+
output_schema: Optional[dict[str, Any]] = None,
|
|
1848
1858
|
) -> Union[AnswerResponse, StreamAnswerResponse]: ...
|
|
1849
1859
|
|
|
1850
1860
|
def answer(
|
|
@@ -1855,6 +1865,7 @@ class Exa:
|
|
|
1855
1865
|
text: Optional[bool] = False,
|
|
1856
1866
|
system_prompt: Optional[str] = None,
|
|
1857
1867
|
model: Optional[Literal["exa", "exa-pro"]] = None,
|
|
1868
|
+
output_schema: Optional[dict[str, Any]] = None,
|
|
1858
1869
|
) -> Union[AnswerResponse, StreamAnswerResponse]:
|
|
1859
1870
|
"""Generate an answer to a query using Exa's search and LLM capabilities.
|
|
1860
1871
|
|
|
@@ -1863,6 +1874,7 @@ class Exa:
|
|
|
1863
1874
|
text (bool, optional): Whether to include full text in the results. Defaults to False.
|
|
1864
1875
|
system_prompt (str, optional): A system prompt to guide the LLM's behavior when generating the answer.
|
|
1865
1876
|
model (str, optional): The model to use for answering. Either "exa" or "exa-pro". Defaults to None.
|
|
1877
|
+
output_schema (dict[str, Any], optional): JSON schema describing the desired answer structure.
|
|
1866
1878
|
|
|
1867
1879
|
Returns:
|
|
1868
1880
|
AnswerResponse: An object containing the answer and citations.
|
|
@@ -1892,6 +1904,7 @@ class Exa:
|
|
|
1892
1904
|
text: bool = False,
|
|
1893
1905
|
system_prompt: Optional[str] = None,
|
|
1894
1906
|
model: Optional[Literal["exa", "exa-pro"]] = None,
|
|
1907
|
+
output_schema: Optional[dict[str, Any]] = None,
|
|
1895
1908
|
) -> StreamAnswerResponse:
|
|
1896
1909
|
"""Generate a streaming answer response.
|
|
1897
1910
|
|
|
@@ -1900,7 +1913,7 @@ class Exa:
|
|
|
1900
1913
|
text (bool): Whether to include full text in the results. Defaults to False.
|
|
1901
1914
|
system_prompt (str, optional): A system prompt to guide the LLM's behavior when generating the answer.
|
|
1902
1915
|
model (str, optional): The model to use for answering. Either "exa" or "exa-pro". Defaults to None.
|
|
1903
|
-
|
|
1916
|
+
output_schema (dict[str, Any], optional): JSON schema describing the desired answer structure.
|
|
1904
1917
|
Returns:
|
|
1905
1918
|
StreamAnswerResponse: An object that can be iterated over to retrieve (partial text, partial citations).
|
|
1906
1919
|
Each iteration yields a tuple of (Optional[str], Optional[List[AnswerResult]]).
|
|
@@ -1911,9 +1924,12 @@ class Exa:
|
|
|
1911
1924
|
raw_response = self.request("/answer", options)
|
|
1912
1925
|
return StreamAnswerResponse(raw_response)
|
|
1913
1926
|
|
|
1927
|
+
|
|
1914
1928
|
class AsyncExa(Exa):
|
|
1915
1929
|
def __init__(self, api_key: str, api_base: str = "https://api.exa.ai"):
|
|
1916
1930
|
super().__init__(api_key, api_base)
|
|
1931
|
+
# Override the synchronous ResearchClient with its async counterpart.
|
|
1932
|
+
self.research = AsyncResearchClient(self)
|
|
1917
1933
|
self._client = None
|
|
1918
1934
|
|
|
1919
1935
|
@property
|
|
@@ -1921,9 +1937,7 @@ class AsyncExa(Exa):
|
|
|
1921
1937
|
# this may only be a
|
|
1922
1938
|
if self._client is None:
|
|
1923
1939
|
self._client = httpx.AsyncClient(
|
|
1924
|
-
base_url=self.base_url,
|
|
1925
|
-
headers=self.headers,
|
|
1926
|
-
timeout=60
|
|
1940
|
+
base_url=self.base_url, headers=self.headers, timeout=60
|
|
1927
1941
|
)
|
|
1928
1942
|
return self._client
|
|
1929
1943
|
|
|
@@ -1943,15 +1957,14 @@ class AsyncExa(Exa):
|
|
|
1943
1957
|
"""
|
|
1944
1958
|
if data.get("stream"):
|
|
1945
1959
|
request = httpx.Request(
|
|
1946
|
-
|
|
1947
|
-
self.base_url + endpoint,
|
|
1948
|
-
json=data,
|
|
1949
|
-
headers=self.headers
|
|
1960
|
+
"POST", self.base_url + endpoint, json=data, headers=self.headers
|
|
1950
1961
|
)
|
|
1951
1962
|
res = await self.client.send(request, stream=True)
|
|
1952
1963
|
return res
|
|
1953
1964
|
|
|
1954
|
-
res = await self.client.post(
|
|
1965
|
+
res = await self.client.post(
|
|
1966
|
+
self.base_url + endpoint, json=data, headers=self.headers
|
|
1967
|
+
)
|
|
1955
1968
|
if res.status_code != 200:
|
|
1956
1969
|
raise ValueError(
|
|
1957
1970
|
f"Request failed with status code {res.status_code}: {res.text}"
|
|
@@ -2189,6 +2202,7 @@ class AsyncExa(Exa):
|
|
|
2189
2202
|
text: Optional[bool] = False,
|
|
2190
2203
|
system_prompt: Optional[str] = None,
|
|
2191
2204
|
model: Optional[Literal["exa", "exa-pro"]] = None,
|
|
2205
|
+
output_schema: Optional[dict[str, Any]] = None,
|
|
2192
2206
|
) -> Union[AnswerResponse, StreamAnswerResponse]:
|
|
2193
2207
|
"""Generate an answer to a query using Exa's search and LLM capabilities.
|
|
2194
2208
|
|
|
@@ -2197,6 +2211,7 @@ class AsyncExa(Exa):
|
|
|
2197
2211
|
text (bool, optional): Whether to include full text in the results. Defaults to False.
|
|
2198
2212
|
system_prompt (str, optional): A system prompt to guide the LLM's behavior when generating the answer.
|
|
2199
2213
|
model (str, optional): The model to use for answering. Either "exa" or "exa-pro". Defaults to None.
|
|
2214
|
+
output_schema (dict[str, Any], optional): JSON schema describing the desired answer structure.
|
|
2200
2215
|
|
|
2201
2216
|
Returns:
|
|
2202
2217
|
AnswerResponse: An object containing the answer and citations.
|
|
@@ -2226,6 +2241,7 @@ class AsyncExa(Exa):
|
|
|
2226
2241
|
text: bool = False,
|
|
2227
2242
|
system_prompt: Optional[str] = None,
|
|
2228
2243
|
model: Optional[Literal["exa", "exa-pro"]] = None,
|
|
2244
|
+
output_schema: Optional[dict[str, Any]] = None,
|
|
2229
2245
|
) -> AsyncStreamAnswerResponse:
|
|
2230
2246
|
"""Generate a streaming answer response.
|
|
2231
2247
|
|
|
@@ -2234,7 +2250,7 @@ class AsyncExa(Exa):
|
|
|
2234
2250
|
text (bool): Whether to include full text in the results. Defaults to False.
|
|
2235
2251
|
system_prompt (str, optional): A system prompt to guide the LLM's behavior when generating the answer.
|
|
2236
2252
|
model (str, optional): The model to use for answering. Either "exa" or "exa-pro". Defaults to None.
|
|
2237
|
-
|
|
2253
|
+
output_schema (dict[str, Any], optional): JSON schema describing the desired answer structure.
|
|
2238
2254
|
Returns:
|
|
2239
2255
|
AsyncStreamAnswerResponse: An object that can be iterated over to retrieve (partial text, partial citations).
|
|
2240
2256
|
Each iteration yields a tuple of (Optional[str], Optional[List[AnswerResult]]).
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
"""Lightweight research client wrappers for the Exa REST API.
|
|
2
|
+
|
|
3
|
+
This module purposefully keeps its import surface minimal to avoid circular
|
|
4
|
+
import problems with :pymod:`exa_py.api`. Any heavy dependencies (including
|
|
5
|
+
`exa_py.api` itself) are imported lazily **inside** functions. This means
|
|
6
|
+
that type-checkers still see the full, precise types via the ``TYPE_CHECKING``
|
|
7
|
+
block, but at runtime we only pay the cost if/when a helper is actually used.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from typing import TYPE_CHECKING, Any, Dict
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING: # pragma: no cover – only for static analysers
|
|
15
|
+
# Import with full type info when static type-checking. `_Result` still
|
|
16
|
+
# lives in ``exa_py.api`` but the response model moved to
|
|
17
|
+
# ``exa_py.research.models``.
|
|
18
|
+
from ..api import _Result # noqa: F401
|
|
19
|
+
from .models import ResearchTask, ResearchTaskId # noqa: F401
|
|
20
|
+
|
|
21
|
+
# ---------------------------------------------------------------------------
|
|
22
|
+
# Public, user-facing clients
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ResearchClient:
|
|
27
|
+
"""Synchronous helper namespace accessed via :pyattr:`Exa.research`."""
|
|
28
|
+
|
|
29
|
+
def __init__(self, parent_client):
|
|
30
|
+
# A reference to the *already-constructed* ``Exa`` instance so that we
|
|
31
|
+
# can piggy-back on its HTTP plumbing (headers, base URL, retries, …).
|
|
32
|
+
self._client = parent_client
|
|
33
|
+
|
|
34
|
+
def create_task(
|
|
35
|
+
self,
|
|
36
|
+
*,
|
|
37
|
+
input_instructions: str,
|
|
38
|
+
output_schema: Dict[str, Any],
|
|
39
|
+
) -> "ResearchTaskId":
|
|
40
|
+
"""Submit a research request and return the *task identifier*."""
|
|
41
|
+
payload = {
|
|
42
|
+
"input": {"instructions": input_instructions},
|
|
43
|
+
"output": {"schema": output_schema},
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
raw_response: Dict[str, Any] = self._client.request("/research/tasks", payload)
|
|
47
|
+
|
|
48
|
+
# Defensive checks so that we fail loudly if the contract changes.
|
|
49
|
+
if not isinstance(raw_response, dict) or "id" not in raw_response:
|
|
50
|
+
raise RuntimeError(
|
|
51
|
+
f"Unexpected response while creating research task: {raw_response}"
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# Lazily import to avoid circular deps at runtime.
|
|
55
|
+
from .models import ResearchTaskId # noqa: WPS433 – runtime import
|
|
56
|
+
|
|
57
|
+
return ResearchTaskId(id=raw_response["id"])
|
|
58
|
+
|
|
59
|
+
def get_task(
|
|
60
|
+
self, id: str
|
|
61
|
+
) -> "ResearchTask": # noqa: D401 – imperative mood is fine
|
|
62
|
+
"""Fetch the current status / result for a research task."""
|
|
63
|
+
endpoint = f"/research/tasks/{id}"
|
|
64
|
+
|
|
65
|
+
# The new endpoint is a simple GET.
|
|
66
|
+
raw_response: Dict[str, Any] = self._client.request(endpoint, method="GET")
|
|
67
|
+
|
|
68
|
+
return _build_research_task(raw_response)
|
|
69
|
+
|
|
70
|
+
# ------------------------------------------------------------------
|
|
71
|
+
# Convenience helpers
|
|
72
|
+
# ------------------------------------------------------------------
|
|
73
|
+
|
|
74
|
+
def poll_task(
|
|
75
|
+
self,
|
|
76
|
+
id: str,
|
|
77
|
+
*,
|
|
78
|
+
poll_interval: float = 1.0,
|
|
79
|
+
timeout_seconds: int = 15 * 60,
|
|
80
|
+
) -> "ResearchTask":
|
|
81
|
+
"""Blocking helper that polls until task completes or fails.
|
|
82
|
+
|
|
83
|
+
Parameters
|
|
84
|
+
----------
|
|
85
|
+
id:
|
|
86
|
+
The ID of the research task to poll.
|
|
87
|
+
poll_interval:
|
|
88
|
+
Seconds to wait between successive polls (default 1s).
|
|
89
|
+
timeout_seconds:
|
|
90
|
+
Maximum time to wait before raising :class:`TimeoutError` (default 15 min).
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
import time
|
|
94
|
+
|
|
95
|
+
deadline = time.monotonic() + timeout_seconds
|
|
96
|
+
|
|
97
|
+
while True:
|
|
98
|
+
task = self.get_task(id)
|
|
99
|
+
status = task.status.lower() if isinstance(task.status, str) else ""
|
|
100
|
+
|
|
101
|
+
if status in {"completed", "failed", "complete", "finished", "done"}:
|
|
102
|
+
return task
|
|
103
|
+
|
|
104
|
+
if time.monotonic() > deadline:
|
|
105
|
+
raise TimeoutError(
|
|
106
|
+
f"Research task {id} did not finish within {timeout_seconds} seconds"
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
time.sleep(poll_interval)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class AsyncResearchClient:
|
|
113
|
+
"""Async counterpart used via :pyattr:`AsyncExa.research`."""
|
|
114
|
+
|
|
115
|
+
def __init__(self, parent_client):
|
|
116
|
+
self._client = parent_client
|
|
117
|
+
|
|
118
|
+
async def create_task(
|
|
119
|
+
self,
|
|
120
|
+
*,
|
|
121
|
+
input_instructions: str,
|
|
122
|
+
output_schema: Dict[str, Any],
|
|
123
|
+
) -> "ResearchTaskId":
|
|
124
|
+
"""Submit a research request and return the *task identifier* (async)."""
|
|
125
|
+
|
|
126
|
+
payload = {
|
|
127
|
+
"input": {"instructions": input_instructions},
|
|
128
|
+
"output": {"schema": output_schema},
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
raw_response: Dict[str, Any] = await self._client.async_request(
|
|
132
|
+
"/research/tasks", payload
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Defensive checks so that we fail loudly if the contract changes.
|
|
136
|
+
if not isinstance(raw_response, dict) or "id" not in raw_response:
|
|
137
|
+
raise RuntimeError(
|
|
138
|
+
f"Unexpected response while creating research task: {raw_response}"
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# Lazily import to avoid circular deps at runtime.
|
|
142
|
+
from .models import ResearchTaskId # noqa: WPS433 – runtime import
|
|
143
|
+
|
|
144
|
+
return ResearchTaskId(id=raw_response["id"])
|
|
145
|
+
|
|
146
|
+
async def get_task(self, id: str) -> "ResearchTask": # noqa: D401
|
|
147
|
+
"""Fetch the current status / result for a research task (async)."""
|
|
148
|
+
|
|
149
|
+
endpoint = f"/research/tasks/{id}"
|
|
150
|
+
|
|
151
|
+
# Perform GET using the underlying HTTP client because `async_request`
|
|
152
|
+
# only supports POST semantics.
|
|
153
|
+
resp = await self._client.client.get(
|
|
154
|
+
self._client.base_url + endpoint, headers=self._client.headers
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
if resp.status_code >= 400:
|
|
158
|
+
raise RuntimeError(
|
|
159
|
+
f"Request failed with status code {resp.status_code}: {resp.text}"
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
raw_response: Dict[str, Any] = resp.json()
|
|
163
|
+
|
|
164
|
+
return _build_research_task(raw_response)
|
|
165
|
+
|
|
166
|
+
# ------------------------------------------------------------------
|
|
167
|
+
# Convenience helpers
|
|
168
|
+
# ------------------------------------------------------------------
|
|
169
|
+
|
|
170
|
+
async def poll_task(
|
|
171
|
+
self,
|
|
172
|
+
id: str,
|
|
173
|
+
*,
|
|
174
|
+
poll_interval: float = 1.0,
|
|
175
|
+
timeout_seconds: int = 15 * 60,
|
|
176
|
+
) -> "ResearchTask":
|
|
177
|
+
"""Async helper that polls until task completes or fails.
|
|
178
|
+
|
|
179
|
+
Mirrors :py:meth:`ResearchClient.poll_task` but uses ``await`` and
|
|
180
|
+
:pyfunc:`asyncio.sleep`. Raises :class:`TimeoutError` on timeout.
|
|
181
|
+
"""
|
|
182
|
+
|
|
183
|
+
import asyncio
|
|
184
|
+
import time
|
|
185
|
+
|
|
186
|
+
deadline = time.monotonic() + timeout_seconds
|
|
187
|
+
|
|
188
|
+
while True:
|
|
189
|
+
task = await self.get_task(id)
|
|
190
|
+
status = task.status.lower() if isinstance(task.status, str) else ""
|
|
191
|
+
|
|
192
|
+
if status in {"completed", "failed", "complete", "finished", "done"}:
|
|
193
|
+
return task
|
|
194
|
+
|
|
195
|
+
if time.monotonic() > deadline:
|
|
196
|
+
raise TimeoutError(
|
|
197
|
+
f"Research task {id} did not finish within {timeout_seconds} seconds"
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
await asyncio.sleep(poll_interval)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
# ---------------------------------------------------------------------------
|
|
204
|
+
# Internal helpers (lazy imports to avoid cycles)
|
|
205
|
+
# ---------------------------------------------------------------------------
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _build_research_task(raw: Dict[str, Any]):
|
|
209
|
+
"""Convert raw API response into a :class:`ResearchTask` instance."""
|
|
210
|
+
|
|
211
|
+
# Defensive check – fail loudly if the API contract changes.
|
|
212
|
+
if not isinstance(raw, dict) or "id" not in raw:
|
|
213
|
+
raise RuntimeError(f"Unexpected response while fetching research task: {raw}")
|
|
214
|
+
|
|
215
|
+
# Lazily import heavy deps to avoid cycles and unnecessary startup cost.
|
|
216
|
+
from .models import ResearchTask # noqa: WPS433 – runtime import
|
|
217
|
+
from ..api import _Result, to_snake_case # noqa: WPS433 – runtime import
|
|
218
|
+
|
|
219
|
+
citations_raw = raw.get("citations", {}) or {}
|
|
220
|
+
citations_parsed = {
|
|
221
|
+
key: [_Result(**to_snake_case(c)) for c in cites]
|
|
222
|
+
for key, cites in citations_raw.items()
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
return ResearchTask(
|
|
226
|
+
id=raw["id"],
|
|
227
|
+
status=raw["status"],
|
|
228
|
+
instructions=raw.get("instructions", ""),
|
|
229
|
+
schema=raw.get("schema", {}),
|
|
230
|
+
data=raw.get("data"),
|
|
231
|
+
citations=citations_parsed,
|
|
232
|
+
)
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any, Dict, List, Optional
|
|
6
|
+
|
|
7
|
+
# Local import placed inside TYPE_CHECKING block to avoid runtime cycles.
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING: # pragma: no cover – for static analysers only
|
|
11
|
+
from ..api import _Result # noqa: F401
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class ResearchTaskId:
|
|
16
|
+
"""Structured research task ID.
|
|
17
|
+
|
|
18
|
+
Attributes
|
|
19
|
+
----------
|
|
20
|
+
id:
|
|
21
|
+
Unique identifier for the research task.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
id: str
|
|
25
|
+
|
|
26
|
+
# ---------------------------------------------------------------------
|
|
27
|
+
# Pretty representation helpers
|
|
28
|
+
# ---------------------------------------------------------------------
|
|
29
|
+
def __str__(self) -> str: # pragma: no cover – convenience only
|
|
30
|
+
return f"ID: {self.id}\n"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class ResearchTask:
|
|
35
|
+
"""Structured research task.
|
|
36
|
+
|
|
37
|
+
Attributes
|
|
38
|
+
----------
|
|
39
|
+
id:
|
|
40
|
+
Unique identifier for the research task.
|
|
41
|
+
status:
|
|
42
|
+
Current task status
|
|
43
|
+
instructions:
|
|
44
|
+
Instructions for the task
|
|
45
|
+
schema:
|
|
46
|
+
Output schema defining the task
|
|
47
|
+
data:
|
|
48
|
+
JSON-serialisable answer generated by Exa (may be ``None`` until the task
|
|
49
|
+
completes).
|
|
50
|
+
citations:
|
|
51
|
+
Mapping from *root field* in the output schema to the list of search
|
|
52
|
+
results that were used to generate that part of the answer.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
id: str
|
|
56
|
+
status: str
|
|
57
|
+
instructions: str
|
|
58
|
+
schema: Dict[str, Any]
|
|
59
|
+
data: Optional[Dict[str, Any]]
|
|
60
|
+
citations: Dict[str, List["_Result"]]
|
|
61
|
+
|
|
62
|
+
# ---------------------------------------------------------------------
|
|
63
|
+
# Pretty representation helpers
|
|
64
|
+
# ---------------------------------------------------------------------
|
|
65
|
+
def __str__(self) -> str: # pragma: no cover – convenience only
|
|
66
|
+
"""Human-readable representation including *all* relevant fields."""
|
|
67
|
+
schema_repr = json.dumps(self.schema, indent=2, ensure_ascii=False)
|
|
68
|
+
data_repr = (
|
|
69
|
+
json.dumps(self.data, indent=2, ensure_ascii=False)
|
|
70
|
+
if self.data is not None
|
|
71
|
+
else "None"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# Render citations grouped by the root field they belong to.
|
|
75
|
+
if self.citations:
|
|
76
|
+
# Each key is a root field, each value is a list of _Result objects.
|
|
77
|
+
citations_lines = []
|
|
78
|
+
for field, sources in self.citations.items():
|
|
79
|
+
rendered_sources = "\n ".join(str(src) for src in sources)
|
|
80
|
+
citations_lines.append(f"{field}:\n {rendered_sources}")
|
|
81
|
+
citations_str = "\n\n".join(citations_lines)
|
|
82
|
+
else:
|
|
83
|
+
citations_str = "None"
|
|
84
|
+
|
|
85
|
+
return (
|
|
86
|
+
f"ID: {self.id}\n"
|
|
87
|
+
f"Status: {self.status}\n"
|
|
88
|
+
f"Instructions: {self.instructions}\n"
|
|
89
|
+
f"Schema:\n{schema_repr}\n"
|
|
90
|
+
f"Data:\n{data_repr}\n\n"
|
|
91
|
+
f"Citations:\n{citations_str}"
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
__all__ = [
|
|
96
|
+
"ResearchTaskId",
|
|
97
|
+
"ResearchTask",
|
|
98
|
+
]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: exa-py
|
|
3
|
-
Version: 1.12.
|
|
3
|
+
Version: 1.12.4
|
|
4
4
|
Summary: Python SDK for Exa API.
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Exa AI
|
|
@@ -45,14 +45,12 @@ exa = Exa(api_key="your-api-key")
|
|
|
45
45
|
```
|
|
46
46
|
|
|
47
47
|
## Common requests
|
|
48
|
+
|
|
48
49
|
```python
|
|
49
50
|
|
|
50
51
|
# basic search
|
|
51
52
|
results = exa.search("This is a Exa query:")
|
|
52
53
|
|
|
53
|
-
# autoprompted search
|
|
54
|
-
results = exa.search("autopromptable query", use_autoprompt=True)
|
|
55
|
-
|
|
56
54
|
# keyword search (non-neural)
|
|
57
55
|
results = exa.search("Google-style query", type="keyword")
|
|
58
56
|
|
|
@@ -65,14 +63,10 @@ exa = Exa(api_key="your-api-key")
|
|
|
65
63
|
# search and get text contents
|
|
66
64
|
results = exa.search_and_contents("This is a Exa query:")
|
|
67
65
|
|
|
68
|
-
# search and get highlights
|
|
69
|
-
results = exa.search_and_contents("This is a Exa query:", highlights=True)
|
|
70
|
-
|
|
71
66
|
# search and get contents with contents options
|
|
72
|
-
results = exa.search_and_contents("This is a Exa query:",
|
|
73
|
-
text={"include_html_tags": True, "max_characters": 1000}
|
|
74
|
-
|
|
75
|
-
|
|
67
|
+
results = exa.search_and_contents("This is a Exa query:",
|
|
68
|
+
text={"include_html_tags": True, "max_characters": 1000})
|
|
69
|
+
|
|
76
70
|
# find similar documents
|
|
77
71
|
results = exa.find_similar("https://example.com")
|
|
78
72
|
|
|
@@ -80,18 +74,14 @@ exa = Exa(api_key="your-api-key")
|
|
|
80
74
|
results = exa.find_similar("https://example.com", exclude_source_domain=True)
|
|
81
75
|
|
|
82
76
|
# find similar with contents
|
|
83
|
-
results = exa.find_similar_and_contents("https://example.com", text=True
|
|
77
|
+
results = exa.find_similar_and_contents("https://example.com", text=True)
|
|
84
78
|
|
|
85
79
|
# get text contents
|
|
86
|
-
results = exa.get_contents(["
|
|
87
|
-
|
|
88
|
-
# get highlights
|
|
89
|
-
results = exa.get_contents(["urls"], highlights=True)
|
|
80
|
+
results = exa.get_contents(["tesla.com"])
|
|
90
81
|
|
|
91
82
|
# get contents with contents options
|
|
92
|
-
results = exa.get_contents(["urls"],
|
|
93
|
-
text={"include_html_tags": True, "max_characters": 1000}
|
|
94
|
-
highlights={"highlights_per_url": 2, "num_sentences": 1, "query": "This is the highlight query:"})
|
|
83
|
+
results = exa.get_contents(["urls"],
|
|
84
|
+
text={"include_html_tags": True, "max_characters": 1000})
|
|
95
85
|
|
|
96
86
|
# basic answer
|
|
97
87
|
response = exa.answer("This is a query to answer a question")
|
|
@@ -106,6 +96,38 @@ exa = Exa(api_key="your-api-key")
|
|
|
106
96
|
for chunk in response:
|
|
107
97
|
print(chunk, end='', flush=True)
|
|
108
98
|
|
|
99
|
+
# research task example – answer a question with citations
|
|
100
|
+
# Example prompt & schema inspired by the TypeScript example.
|
|
101
|
+
QUESTION = (
|
|
102
|
+
"Summarize the history of San Francisco highlighting one or two major events "
|
|
103
|
+
"for each decade from 1850 to 1950"
|
|
104
|
+
)
|
|
105
|
+
OUTPUT_SCHEMA: Dict[str, Any] = {
|
|
106
|
+
"type": "object",
|
|
107
|
+
"required": ["timeline"],
|
|
108
|
+
"properties": {
|
|
109
|
+
"timeline": {
|
|
110
|
+
"type": "array",
|
|
111
|
+
"items": {
|
|
112
|
+
"type": "object",
|
|
113
|
+
"required": ["decade", "notableEvents"],
|
|
114
|
+
"properties": {
|
|
115
|
+
"decade": {
|
|
116
|
+
"type": "string",
|
|
117
|
+
"description": 'Decade label e.g. "1850s"',
|
|
118
|
+
},
|
|
119
|
+
"notableEvents": {
|
|
120
|
+
"type": "string",
|
|
121
|
+
"description": "A summary of notable events.",
|
|
122
|
+
},
|
|
123
|
+
},
|
|
124
|
+
},
|
|
125
|
+
},
|
|
126
|
+
},
|
|
127
|
+
}
|
|
128
|
+
resp = exa.research.create_task(
|
|
129
|
+
input_instructions=QUESTION,
|
|
130
|
+
output_schema=OUTPUT_SCHEMA,
|
|
131
|
+
)
|
|
109
132
|
```
|
|
110
133
|
|
|
111
|
-
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
exa_py/__init__.py,sha256=M2GC9oSdoV6m2msboW0vMWWl8wrth4o6gmEV4MYLGG8,66
|
|
2
|
-
exa_py/api.py,sha256=
|
|
2
|
+
exa_py/api.py,sha256=Bn7h_eRvXmwBUmJi2B2JpHAQPrHfbwKf0A-XVXLjqa0,84876
|
|
3
3
|
exa_py/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
exa_py/research/__init__.py,sha256=D1xgm4VlbWtRb1cMgshcW4dIyR7IXhOW2s7ihxcE1Jc,195
|
|
5
|
+
exa_py/research/client.py,sha256=Zno5xblfwhX8gWgc4OvI24a-ZS7_g1b32_tr6j7C7Jg,8217
|
|
6
|
+
exa_py/research/models.py,sha256=WXTnALhM9FcVQ95Tzzc5EDKU48hyPhu8RSMmipqCjOk,2982
|
|
4
7
|
exa_py/utils.py,sha256=Rc1FJjoR9LQ7L_OJM91Sd1GNkbHjcLyEvJENhRix6gc,2405
|
|
5
8
|
exa_py/websets/__init__.py,sha256=uOBAb9VrIHrPKoddGOp2ai2KgWlyUVCLMZqfbGOlboA,70
|
|
6
9
|
exa_py/websets/_generator/pydantic/BaseModel.jinja2,sha256=RUDCmPZVamoVx1WudylscYFfDhGoNNtRYlpTvKjAiuA,1276
|
|
@@ -16,6 +19,6 @@ exa_py/websets/searches/client.py,sha256=X3f7axWGfecmxf-2tBTX0Yf_--xToz1X8ZHbbud
|
|
|
16
19
|
exa_py/websets/types.py,sha256=jKnJFAHTFN55EzsusgDce-yux71zVbdSJ1m8utR4EjU,28096
|
|
17
20
|
exa_py/websets/webhooks/__init__.py,sha256=iTPBCxFd73z4RifLQMX6iRECx_6pwlI5qscLNjMOUHE,77
|
|
18
21
|
exa_py/websets/webhooks/client.py,sha256=zsIRMTeJU65yj-zo7Zz-gG02Prtzgcx6utGFSoY4HQQ,4222
|
|
19
|
-
exa_py-1.12.
|
|
20
|
-
exa_py-1.12.
|
|
21
|
-
exa_py-1.12.
|
|
22
|
+
exa_py-1.12.4.dist-info/METADATA,sha256=DYYA35UrWmW9ND3x5L5YcDr1tPJN05x64UV4nJsRg1k,4098
|
|
23
|
+
exa_py-1.12.4.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
24
|
+
exa_py-1.12.4.dist-info/RECORD,,
|