exa-py 1.12.1__py3-none-any.whl → 1.12.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of exa-py might be problematic. Click here for more details.
- exa_py/api.py +86 -35
- exa_py/research/__init__.py +8 -0
- exa_py/research/client.py +257 -0
- exa_py/research/models.py +57 -0
- {exa_py-1.12.1.dist-info → exa_py-1.12.3.dist-info}/METADATA +42 -20
- {exa_py-1.12.1.dist-info → exa_py-1.12.3.dist-info}/RECORD +7 -4
- {exa_py-1.12.1.dist-info → exa_py-1.12.3.dist-info}/WHEEL +1 -1
exa_py/api.py
CHANGED
|
@@ -38,6 +38,8 @@ from exa_py.utils import (
|
|
|
38
38
|
)
|
|
39
39
|
from .websets import WebsetsClient
|
|
40
40
|
from .websets.core.base import ExaJSONEncoder
|
|
41
|
+
from .research.client import ResearchClient, AsyncResearchClient
|
|
42
|
+
from .research.models import ResearchTaskResponse # noqa: E402,F401
|
|
41
43
|
|
|
42
44
|
is_beta = os.getenv("IS_BETA") == "True"
|
|
43
45
|
|
|
@@ -56,7 +58,7 @@ def snake_to_camel(snake_str: str) -> str:
|
|
|
56
58
|
return "$schema"
|
|
57
59
|
if snake_str == "not_":
|
|
58
60
|
return "not"
|
|
59
|
-
|
|
61
|
+
|
|
60
62
|
components = snake_str.split("_")
|
|
61
63
|
return components[0] + "".join(x.title() for x in components[1:])
|
|
62
64
|
|
|
@@ -261,6 +263,7 @@ class JSONSchema(TypedDict, total=False):
|
|
|
261
263
|
"""Represents a JSON Schema definition used for structured summary output.
|
|
262
264
|
To learn more visit https://json-schema.org/overview/what-is-jsonschema.
|
|
263
265
|
"""
|
|
266
|
+
|
|
264
267
|
schema_: str # This will be converted to "$schema" in JSON
|
|
265
268
|
title: str
|
|
266
269
|
description: str
|
|
@@ -288,7 +291,7 @@ class SummaryContentsOptions(TypedDict, total=False):
|
|
|
288
291
|
|
|
289
292
|
query: str
|
|
290
293
|
schema: JSONSchema
|
|
291
|
-
|
|
294
|
+
|
|
292
295
|
|
|
293
296
|
class ExtrasOptions(TypedDict, total=False):
|
|
294
297
|
"""A class representing additional extraction fields (e.g. links, images)"""
|
|
@@ -669,7 +672,7 @@ class AnswerResponse:
|
|
|
669
672
|
citations (List[AnswerResult]): A list of citations used to generate the answer.
|
|
670
673
|
"""
|
|
671
674
|
|
|
672
|
-
answer: str
|
|
675
|
+
answer: Union[str, dict[str, Any]]
|
|
673
676
|
citations: List[AnswerResult]
|
|
674
677
|
|
|
675
678
|
def __str__(self):
|
|
@@ -765,9 +768,9 @@ class AsyncStreamAnswerResponse:
|
|
|
765
768
|
content = chunk["choices"][0]["delta"].get("content")
|
|
766
769
|
|
|
767
770
|
if (
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
+
"citations" in chunk
|
|
772
|
+
and chunk["citations"]
|
|
773
|
+
and chunk["citations"] != "null"
|
|
771
774
|
):
|
|
772
775
|
citations = [
|
|
773
776
|
AnswerResult(**to_snake_case(s)) for s in chunk["citations"]
|
|
@@ -776,6 +779,7 @@ class AsyncStreamAnswerResponse:
|
|
|
776
779
|
stream_chunk = StreamChunk(content=content, citations=citations)
|
|
777
780
|
if stream_chunk.has_data():
|
|
778
781
|
yield stream_chunk
|
|
782
|
+
|
|
779
783
|
return generator()
|
|
780
784
|
|
|
781
785
|
def close(self) -> None:
|
|
@@ -835,6 +839,37 @@ def nest_fields(original_dict: Dict, fields_to_nest: List[str], new_key: str):
|
|
|
835
839
|
return original_dict
|
|
836
840
|
|
|
837
841
|
|
|
842
|
+
@dataclass
|
|
843
|
+
class ResearchTaskResponse:
|
|
844
|
+
"""A class representing the response for a research task.
|
|
845
|
+
|
|
846
|
+
Attributes:
|
|
847
|
+
id (str): The unique identifier for the research request.
|
|
848
|
+
status (str): Status of the research request.
|
|
849
|
+
output (Optional[Dict[str, Any]]): The answer structured as JSON, if available.
|
|
850
|
+
citations (Optional[Dict[str, List[_Result]]]): List of citations used to generate the answer, grouped by root field in the output schema.
|
|
851
|
+
"""
|
|
852
|
+
|
|
853
|
+
id: str
|
|
854
|
+
status: str
|
|
855
|
+
output: Optional[Dict[str, Any]]
|
|
856
|
+
citations: Dict[str, List[_Result]]
|
|
857
|
+
|
|
858
|
+
def __str__(self):
|
|
859
|
+
output_repr = (
|
|
860
|
+
json.dumps(self.output, indent=2, ensure_ascii=False)
|
|
861
|
+
if self.output is not None
|
|
862
|
+
else "None"
|
|
863
|
+
)
|
|
864
|
+
citations_str = "\n\n".join(str(src) for src in self.citations)
|
|
865
|
+
return (
|
|
866
|
+
f"ID: {self.id}\n"
|
|
867
|
+
f"Status: {self.status}\n"
|
|
868
|
+
f"Output: {output_repr}\n\n"
|
|
869
|
+
f"Citations:\n{citations_str}"
|
|
870
|
+
)
|
|
871
|
+
|
|
872
|
+
|
|
838
873
|
class Exa:
|
|
839
874
|
"""A client for interacting with Exa API."""
|
|
840
875
|
|
|
@@ -842,7 +877,7 @@ class Exa:
|
|
|
842
877
|
self,
|
|
843
878
|
api_key: Optional[str],
|
|
844
879
|
base_url: str = "https://api.exa.ai",
|
|
845
|
-
user_agent: str = "exa-py 1.12.
|
|
880
|
+
user_agent: str = "exa-py 1.12.3",
|
|
846
881
|
):
|
|
847
882
|
"""Initialize the Exa client with the provided API key and optional base URL and user agent.
|
|
848
883
|
|
|
@@ -859,10 +894,23 @@ class Exa:
|
|
|
859
894
|
"API key must be provided as an argument or in EXA_API_KEY environment variable"
|
|
860
895
|
)
|
|
861
896
|
self.base_url = base_url
|
|
862
|
-
self.headers = {
|
|
897
|
+
self.headers = {
|
|
898
|
+
"x-api-key": api_key,
|
|
899
|
+
"User-Agent": user_agent,
|
|
900
|
+
"Content-Type": "application/json",
|
|
901
|
+
}
|
|
863
902
|
self.websets = WebsetsClient(self)
|
|
903
|
+
# Research tasks client (new, mirrors Websets design)
|
|
904
|
+
self.research = ResearchClient(self)
|
|
864
905
|
|
|
865
|
-
def request(
|
|
906
|
+
def request(
|
|
907
|
+
self,
|
|
908
|
+
endpoint: str,
|
|
909
|
+
data: Optional[Union[Dict[str, Any], str]] = None,
|
|
910
|
+
method: str = "POST",
|
|
911
|
+
params: Optional[Dict[str, Any]] = None,
|
|
912
|
+
force_stream: Optional[bool] = False,
|
|
913
|
+
) -> Union[Dict[str, Any], requests.Response]:
|
|
866
914
|
"""Send a request to the Exa API, optionally streaming if data['stream'] is True.
|
|
867
915
|
|
|
868
916
|
Args:
|
|
@@ -885,13 +933,13 @@ class Exa:
|
|
|
885
933
|
else:
|
|
886
934
|
# Otherwise, serialize the dictionary to JSON if it exists
|
|
887
935
|
json_data = json.dumps(data, cls=ExaJSONEncoder) if data else None
|
|
888
|
-
|
|
889
|
-
if data and data.get("stream"):
|
|
936
|
+
|
|
937
|
+
if (data and data.get("stream")) or force_stream:
|
|
890
938
|
res = requests.post(
|
|
891
|
-
self.base_url + endpoint,
|
|
939
|
+
self.base_url + endpoint,
|
|
892
940
|
data=json_data,
|
|
893
|
-
headers=self.headers,
|
|
894
|
-
stream=True
|
|
941
|
+
headers=self.headers,
|
|
942
|
+
stream=True,
|
|
895
943
|
)
|
|
896
944
|
return res
|
|
897
945
|
|
|
@@ -901,20 +949,14 @@ class Exa:
|
|
|
901
949
|
)
|
|
902
950
|
elif method.upper() == "POST":
|
|
903
951
|
res = requests.post(
|
|
904
|
-
self.base_url + endpoint,
|
|
905
|
-
data=json_data,
|
|
906
|
-
headers=self.headers
|
|
952
|
+
self.base_url + endpoint, data=json_data, headers=self.headers
|
|
907
953
|
)
|
|
908
954
|
elif method.upper() == "PATCH":
|
|
909
955
|
res = requests.patch(
|
|
910
|
-
self.base_url + endpoint,
|
|
911
|
-
data=json_data,
|
|
912
|
-
headers=self.headers
|
|
956
|
+
self.base_url + endpoint, data=json_data, headers=self.headers
|
|
913
957
|
)
|
|
914
958
|
elif method.upper() == "DELETE":
|
|
915
|
-
res = requests.delete(
|
|
916
|
-
self.base_url + endpoint, headers=self.headers
|
|
917
|
-
)
|
|
959
|
+
res = requests.delete(self.base_url + endpoint, headers=self.headers)
|
|
918
960
|
else:
|
|
919
961
|
raise ValueError(f"Unsupported HTTP method: {method}")
|
|
920
962
|
|
|
@@ -1845,6 +1887,7 @@ class Exa:
|
|
|
1845
1887
|
text: Optional[bool] = False,
|
|
1846
1888
|
system_prompt: Optional[str] = None,
|
|
1847
1889
|
model: Optional[Literal["exa", "exa-pro"]] = None,
|
|
1890
|
+
output_schema: Optional[dict[str, Any]] = None,
|
|
1848
1891
|
) -> Union[AnswerResponse, StreamAnswerResponse]: ...
|
|
1849
1892
|
|
|
1850
1893
|
def answer(
|
|
@@ -1855,6 +1898,7 @@ class Exa:
|
|
|
1855
1898
|
text: Optional[bool] = False,
|
|
1856
1899
|
system_prompt: Optional[str] = None,
|
|
1857
1900
|
model: Optional[Literal["exa", "exa-pro"]] = None,
|
|
1901
|
+
output_schema: Optional[dict[str, Any]] = None,
|
|
1858
1902
|
) -> Union[AnswerResponse, StreamAnswerResponse]:
|
|
1859
1903
|
"""Generate an answer to a query using Exa's search and LLM capabilities.
|
|
1860
1904
|
|
|
@@ -1863,6 +1907,7 @@ class Exa:
|
|
|
1863
1907
|
text (bool, optional): Whether to include full text in the results. Defaults to False.
|
|
1864
1908
|
system_prompt (str, optional): A system prompt to guide the LLM's behavior when generating the answer.
|
|
1865
1909
|
model (str, optional): The model to use for answering. Either "exa" or "exa-pro". Defaults to None.
|
|
1910
|
+
output_schema (dict[str, Any], optional): JSON schema describing the desired answer structure.
|
|
1866
1911
|
|
|
1867
1912
|
Returns:
|
|
1868
1913
|
AnswerResponse: An object containing the answer and citations.
|
|
@@ -1892,6 +1937,7 @@ class Exa:
|
|
|
1892
1937
|
text: bool = False,
|
|
1893
1938
|
system_prompt: Optional[str] = None,
|
|
1894
1939
|
model: Optional[Literal["exa", "exa-pro"]] = None,
|
|
1940
|
+
output_schema: Optional[dict[str, Any]] = None,
|
|
1895
1941
|
) -> StreamAnswerResponse:
|
|
1896
1942
|
"""Generate a streaming answer response.
|
|
1897
1943
|
|
|
@@ -1900,7 +1946,7 @@ class Exa:
|
|
|
1900
1946
|
text (bool): Whether to include full text in the results. Defaults to False.
|
|
1901
1947
|
system_prompt (str, optional): A system prompt to guide the LLM's behavior when generating the answer.
|
|
1902
1948
|
model (str, optional): The model to use for answering. Either "exa" or "exa-pro". Defaults to None.
|
|
1903
|
-
|
|
1949
|
+
output_schema (dict[str, Any], optional): JSON schema describing the desired answer structure.
|
|
1904
1950
|
Returns:
|
|
1905
1951
|
StreamAnswerResponse: An object that can be iterated over to retrieve (partial text, partial citations).
|
|
1906
1952
|
Each iteration yields a tuple of (Optional[str], Optional[List[AnswerResult]]).
|
|
@@ -1911,9 +1957,12 @@ class Exa:
|
|
|
1911
1957
|
raw_response = self.request("/answer", options)
|
|
1912
1958
|
return StreamAnswerResponse(raw_response)
|
|
1913
1959
|
|
|
1960
|
+
|
|
1914
1961
|
class AsyncExa(Exa):
|
|
1915
1962
|
def __init__(self, api_key: str, api_base: str = "https://api.exa.ai"):
|
|
1916
1963
|
super().__init__(api_key, api_base)
|
|
1964
|
+
# Override the synchronous ResearchClient with its async counterpart.
|
|
1965
|
+
self.research = AsyncResearchClient(self)
|
|
1917
1966
|
self._client = None
|
|
1918
1967
|
|
|
1919
1968
|
@property
|
|
@@ -1921,13 +1970,13 @@ class AsyncExa(Exa):
|
|
|
1921
1970
|
# this may only be a
|
|
1922
1971
|
if self._client is None:
|
|
1923
1972
|
self._client = httpx.AsyncClient(
|
|
1924
|
-
base_url=self.base_url,
|
|
1925
|
-
headers=self.headers,
|
|
1926
|
-
timeout=60
|
|
1973
|
+
base_url=self.base_url, headers=self.headers, timeout=60
|
|
1927
1974
|
)
|
|
1928
1975
|
return self._client
|
|
1929
1976
|
|
|
1930
|
-
async def async_request(
|
|
1977
|
+
async def async_request(
|
|
1978
|
+
self, endpoint: str, data, force_stream: Optional[bool] = False
|
|
1979
|
+
):
|
|
1931
1980
|
"""Send a POST request to the Exa API, optionally streaming if data['stream'] is True.
|
|
1932
1981
|
|
|
1933
1982
|
Args:
|
|
@@ -1941,17 +1990,16 @@ class AsyncExa(Exa):
|
|
|
1941
1990
|
Raises:
|
|
1942
1991
|
ValueError: If the request fails (non-200 status code).
|
|
1943
1992
|
"""
|
|
1944
|
-
if data.get("stream"):
|
|
1993
|
+
if data.get("stream") or force_stream:
|
|
1945
1994
|
request = httpx.Request(
|
|
1946
|
-
|
|
1947
|
-
self.base_url + endpoint,
|
|
1948
|
-
json=data,
|
|
1949
|
-
headers=self.headers
|
|
1995
|
+
"POST", self.base_url + endpoint, json=data, headers=self.headers
|
|
1950
1996
|
)
|
|
1951
1997
|
res = await self.client.send(request, stream=True)
|
|
1952
1998
|
return res
|
|
1953
1999
|
|
|
1954
|
-
res = await self.client.post(
|
|
2000
|
+
res = await self.client.post(
|
|
2001
|
+
self.base_url + endpoint, json=data, headers=self.headers
|
|
2002
|
+
)
|
|
1955
2003
|
if res.status_code != 200:
|
|
1956
2004
|
raise ValueError(
|
|
1957
2005
|
f"Request failed with status code {res.status_code}: {res.text}"
|
|
@@ -2189,6 +2237,7 @@ class AsyncExa(Exa):
|
|
|
2189
2237
|
text: Optional[bool] = False,
|
|
2190
2238
|
system_prompt: Optional[str] = None,
|
|
2191
2239
|
model: Optional[Literal["exa", "exa-pro"]] = None,
|
|
2240
|
+
output_schema: Optional[dict[str, Any]] = None,
|
|
2192
2241
|
) -> Union[AnswerResponse, StreamAnswerResponse]:
|
|
2193
2242
|
"""Generate an answer to a query using Exa's search and LLM capabilities.
|
|
2194
2243
|
|
|
@@ -2197,6 +2246,7 @@ class AsyncExa(Exa):
|
|
|
2197
2246
|
text (bool, optional): Whether to include full text in the results. Defaults to False.
|
|
2198
2247
|
system_prompt (str, optional): A system prompt to guide the LLM's behavior when generating the answer.
|
|
2199
2248
|
model (str, optional): The model to use for answering. Either "exa" or "exa-pro". Defaults to None.
|
|
2249
|
+
output_schema (dict[str, Any], optional): JSON schema describing the desired answer structure.
|
|
2200
2250
|
|
|
2201
2251
|
Returns:
|
|
2202
2252
|
AnswerResponse: An object containing the answer and citations.
|
|
@@ -2226,6 +2276,7 @@ class AsyncExa(Exa):
|
|
|
2226
2276
|
text: bool = False,
|
|
2227
2277
|
system_prompt: Optional[str] = None,
|
|
2228
2278
|
model: Optional[Literal["exa", "exa-pro"]] = None,
|
|
2279
|
+
output_schema: Optional[dict[str, Any]] = None,
|
|
2229
2280
|
) -> AsyncStreamAnswerResponse:
|
|
2230
2281
|
"""Generate a streaming answer response.
|
|
2231
2282
|
|
|
@@ -2234,7 +2285,7 @@ class AsyncExa(Exa):
|
|
|
2234
2285
|
text (bool): Whether to include full text in the results. Defaults to False.
|
|
2235
2286
|
system_prompt (str, optional): A system prompt to guide the LLM's behavior when generating the answer.
|
|
2236
2287
|
model (str, optional): The model to use for answering. Either "exa" or "exa-pro". Defaults to None.
|
|
2237
|
-
|
|
2288
|
+
output_schema (dict[str, Any], optional): JSON schema describing the desired answer structure.
|
|
2238
2289
|
Returns:
|
|
2239
2290
|
AsyncStreamAnswerResponse: An object that can be iterated over to retrieve (partial text, partial citations).
|
|
2240
2291
|
Each iteration yields a tuple of (Optional[str], Optional[List[AnswerResult]]).
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
"""Lightweight research client wrappers for the Exa REST API.
|
|
2
|
+
|
|
3
|
+
This module purposefully keeps its import surface minimal to avoid circular
|
|
4
|
+
import problems with :pymod:`exa_py.api`. Any heavy dependencies (including
|
|
5
|
+
`exa_py.api` itself) are imported lazily **inside** functions. This means
|
|
6
|
+
that type-checkers still see the full, precise types via the ``TYPE_CHECKING``
|
|
7
|
+
block, but at runtime we only pay the cost if/when a helper is actually used.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING: # pragma: no cover – only for static analysers
|
|
15
|
+
# Import with full type info when static type-checking. `_Result` still
|
|
16
|
+
# lives in ``exa_py.api`` but the response model moved to
|
|
17
|
+
# ``exa_py.research.models``.
|
|
18
|
+
from ..api import _Result # noqa: F401
|
|
19
|
+
from .models import ResearchTaskResponse # noqa: F401
|
|
20
|
+
|
|
21
|
+
# ---------------------------------------------------------------------------
|
|
22
|
+
# Public, user-facing clients
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ResearchClient:
|
|
27
|
+
"""Synchronous helper namespace accessed via :pyattr:`Exa.research`."""
|
|
28
|
+
|
|
29
|
+
def __init__(self, parent_client):
|
|
30
|
+
# A reference to the *already-constructed* ``Exa`` instance so that we
|
|
31
|
+
# can piggy-back on its HTTP plumbing (headers, base URL, retries, …).
|
|
32
|
+
self._client = parent_client
|
|
33
|
+
|
|
34
|
+
# ------------------------------------------------------------------
|
|
35
|
+
# API surface
|
|
36
|
+
# ------------------------------------------------------------------
|
|
37
|
+
def create_task(
|
|
38
|
+
self,
|
|
39
|
+
*,
|
|
40
|
+
input_instructions: str,
|
|
41
|
+
output_schema: Dict[str, Any],
|
|
42
|
+
) -> "ResearchTaskResponse":
|
|
43
|
+
"""Submit a research request to the Exa backend.
|
|
44
|
+
|
|
45
|
+
The public API remains synchronous – the function only returns once
|
|
46
|
+
the task has finished and the final structured answer is available.
|
|
47
|
+
Internally, however, the endpoint now streams *progress* updates via
|
|
48
|
+
Server-Sent Events (SSE). We therefore initiate a streaming request
|
|
49
|
+
and keep reading until we receive the terminal ``{"tag": "complete"}``
|
|
50
|
+
chunk, which carries the exact same payload shape that the blocking
|
|
51
|
+
variant returned previously. Any ``{"tag": "progress"}`` chunks are
|
|
52
|
+
ignored, while ``{"tag": "error"}`` chunks result in an exception.
|
|
53
|
+
|
|
54
|
+
Parameters
|
|
55
|
+
----------
|
|
56
|
+
input_instructions:
|
|
57
|
+
Natural-language instructions that describe *what* should be
|
|
58
|
+
researched or extracted.
|
|
59
|
+
output_schema:
|
|
60
|
+
JSON-schema describing the desired structured output format.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
import json
|
|
64
|
+
|
|
65
|
+
payload = {
|
|
66
|
+
"input": {"instructions": input_instructions},
|
|
67
|
+
"output": {"schema": output_schema},
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
raw_response = self._client.request(
|
|
71
|
+
"/research/tasks", payload, force_stream=True
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
def _handle_payload(tag: Optional[str], payload_dict: Dict[str, Any]):
|
|
75
|
+
"""Inner helper handling decoded JSON chunks."""
|
|
76
|
+
if tag is None:
|
|
77
|
+
tag_local = payload_dict.get("tag")
|
|
78
|
+
else:
|
|
79
|
+
tag_local = tag
|
|
80
|
+
|
|
81
|
+
if tag_local == "progress":
|
|
82
|
+
return None # ignore
|
|
83
|
+
if tag_local == "error":
|
|
84
|
+
msg = payload_dict.get("error", {}).get("message", "Unknown error")
|
|
85
|
+
raise RuntimeError(f"Research task failed: {msg}")
|
|
86
|
+
if tag_local == "complete":
|
|
87
|
+
data_obj = payload_dict.get("data")
|
|
88
|
+
if data_obj is None:
|
|
89
|
+
raise RuntimeError("Malformed 'complete' chunk with no data")
|
|
90
|
+
return _parse_research_response(data_obj)
|
|
91
|
+
|
|
92
|
+
# Fallback: if looks like final object
|
|
93
|
+
if {"id", "status"}.issubset(payload_dict.keys()):
|
|
94
|
+
return _parse_research_response(payload_dict)
|
|
95
|
+
return None
|
|
96
|
+
|
|
97
|
+
# ------------------------------------------------------------------
|
|
98
|
+
# Minimal SSE parser (sync)
|
|
99
|
+
# ------------------------------------------------------------------
|
|
100
|
+
event_name: Optional[str] = None
|
|
101
|
+
data_buf: str = ""
|
|
102
|
+
|
|
103
|
+
for raw_line in raw_response.iter_lines(decode_unicode=True):
|
|
104
|
+
line = raw_line
|
|
105
|
+
if line == "":
|
|
106
|
+
if data_buf:
|
|
107
|
+
try:
|
|
108
|
+
payload_dict = json.loads(data_buf)
|
|
109
|
+
except json.JSONDecodeError:
|
|
110
|
+
data_buf = ""
|
|
111
|
+
event_name = None
|
|
112
|
+
continue
|
|
113
|
+
maybe_resp = _handle_payload(event_name, payload_dict)
|
|
114
|
+
if maybe_resp is not None:
|
|
115
|
+
raw_response.close()
|
|
116
|
+
return maybe_resp
|
|
117
|
+
# reset after event
|
|
118
|
+
data_buf = ""
|
|
119
|
+
event_name = None
|
|
120
|
+
continue
|
|
121
|
+
|
|
122
|
+
if line.startswith("event:"):
|
|
123
|
+
event_name = line[len("event:") :].strip()
|
|
124
|
+
elif line.startswith("data:"):
|
|
125
|
+
data_buf += line[len("data:") :].strip()
|
|
126
|
+
|
|
127
|
+
# Process any remaining buffer (in case stream closed without blank line)
|
|
128
|
+
if data_buf:
|
|
129
|
+
try:
|
|
130
|
+
payload_dict = json.loads(data_buf)
|
|
131
|
+
maybe_resp = _handle_payload(event_name, payload_dict)
|
|
132
|
+
if maybe_resp is not None:
|
|
133
|
+
raw_response.close()
|
|
134
|
+
return maybe_resp
|
|
135
|
+
except json.JSONDecodeError:
|
|
136
|
+
pass
|
|
137
|
+
|
|
138
|
+
raise RuntimeError("Stream ended before completion of research task")
|
|
139
|
+
|
|
140
|
+
def get_task(self, id: str): # noqa: D401 – imperative mood is fine
|
|
141
|
+
"""Placeholder endpoint – not yet implemented on the server side."""
|
|
142
|
+
raise NotImplementedError(
|
|
143
|
+
"`exa.research.get_task` is not available yet. Please open an "
|
|
144
|
+
"issue if you need this sooner."
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class AsyncResearchClient:
|
|
149
|
+
"""Async counterpart used via :pyattr:`AsyncExa.research`."""
|
|
150
|
+
|
|
151
|
+
def __init__(self, parent_client):
|
|
152
|
+
self._client = parent_client
|
|
153
|
+
|
|
154
|
+
async def create_task(
|
|
155
|
+
self,
|
|
156
|
+
*,
|
|
157
|
+
input_instructions: str,
|
|
158
|
+
output_schema: Dict[str, Any],
|
|
159
|
+
) -> "ResearchTaskResponse":
|
|
160
|
+
"""Async variant mirroring the synchronous implementation above."""
|
|
161
|
+
|
|
162
|
+
import json
|
|
163
|
+
|
|
164
|
+
payload = {
|
|
165
|
+
"input": {"instructions": input_instructions},
|
|
166
|
+
"output": {"schema": output_schema},
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
raw_response = await self._client.async_request(
|
|
170
|
+
"/research/tasks", payload, force_stream=True
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
async def _handle_payload_async(
|
|
174
|
+
tag: Optional[str], payload_dict: Dict[str, Any]
|
|
175
|
+
):
|
|
176
|
+
if tag is None:
|
|
177
|
+
tag_local = payload_dict.get("tag")
|
|
178
|
+
else:
|
|
179
|
+
tag_local = tag
|
|
180
|
+
|
|
181
|
+
if tag_local == "progress":
|
|
182
|
+
return None
|
|
183
|
+
if tag_local == "error":
|
|
184
|
+
msg = payload_dict.get("error", {}).get("message", "Unknown error")
|
|
185
|
+
raise RuntimeError(f"Research task failed: {msg}")
|
|
186
|
+
if tag_local == "complete":
|
|
187
|
+
data_obj = payload_dict.get("data")
|
|
188
|
+
if data_obj is None:
|
|
189
|
+
raise RuntimeError("Malformed 'complete' chunk with no data")
|
|
190
|
+
return _parse_research_response(data_obj)
|
|
191
|
+
if {"id", "status"}.issubset(payload_dict.keys()):
|
|
192
|
+
return _parse_research_response(payload_dict)
|
|
193
|
+
return None
|
|
194
|
+
|
|
195
|
+
event_name: Optional[str] = None
|
|
196
|
+
data_buf: str = ""
|
|
197
|
+
|
|
198
|
+
async for line in raw_response.aiter_lines():
|
|
199
|
+
if line == "":
|
|
200
|
+
if data_buf:
|
|
201
|
+
try:
|
|
202
|
+
payload_dict = json.loads(data_buf)
|
|
203
|
+
except json.JSONDecodeError:
|
|
204
|
+
data_buf = ""
|
|
205
|
+
event_name = None
|
|
206
|
+
continue
|
|
207
|
+
maybe_resp = await _handle_payload_async(event_name, payload_dict)
|
|
208
|
+
if maybe_resp is not None:
|
|
209
|
+
await raw_response.aclose()
|
|
210
|
+
return maybe_resp
|
|
211
|
+
data_buf = ""
|
|
212
|
+
event_name = None
|
|
213
|
+
continue
|
|
214
|
+
|
|
215
|
+
if line.startswith("event:"):
|
|
216
|
+
event_name = line[len("event:") :].strip()
|
|
217
|
+
elif line.startswith("data:"):
|
|
218
|
+
data_buf += line[len("data:") :].strip()
|
|
219
|
+
|
|
220
|
+
if data_buf:
|
|
221
|
+
try:
|
|
222
|
+
payload_dict = json.loads(data_buf)
|
|
223
|
+
maybe_resp = await _handle_payload_async(event_name, payload_dict)
|
|
224
|
+
if maybe_resp is not None:
|
|
225
|
+
await raw_response.aclose()
|
|
226
|
+
return maybe_resp
|
|
227
|
+
except json.JSONDecodeError:
|
|
228
|
+
pass
|
|
229
|
+
|
|
230
|
+
raise RuntimeError("Stream ended before completion of research task")
|
|
231
|
+
|
|
232
|
+
async def get_task(self, id: str): # noqa: D401
|
|
233
|
+
raise NotImplementedError(
|
|
234
|
+
"`exa.research.get_task` is not available yet. Please open an "
|
|
235
|
+
"issue if you need this sooner."
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
# ---------------------------------------------------------------------------
|
|
240
|
+
# Internal helpers (lazy imports to avoid cycles)
|
|
241
|
+
# ---------------------------------------------------------------------------
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _parse_research_response(raw: Dict[str, Any]):
|
|
245
|
+
"""Transform camel-case API payload into rich Python objects."""
|
|
246
|
+
from .models import ResearchTaskResponse
|
|
247
|
+
from ..api import _Result, to_snake_case
|
|
248
|
+
|
|
249
|
+
return ResearchTaskResponse(
|
|
250
|
+
id=raw["id"],
|
|
251
|
+
status=raw["status"],
|
|
252
|
+
output=raw.get("output"),
|
|
253
|
+
citations={
|
|
254
|
+
key: [_Result(**to_snake_case(c)) for c in citations]
|
|
255
|
+
for key, citations in raw.get("citations", {}).items()
|
|
256
|
+
},
|
|
257
|
+
)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any, Dict, List, Optional
|
|
6
|
+
|
|
7
|
+
# Local import placed inside TYPE_CHECKING block to avoid runtime cycles.
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING: # pragma: no cover – for static analysers only
|
|
11
|
+
from ..api import _Result # noqa: F401
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class ResearchTaskResponse:
|
|
16
|
+
"""Structured response returned from the /research/tasks endpoint.
|
|
17
|
+
|
|
18
|
+
Attributes
|
|
19
|
+
----------
|
|
20
|
+
id:
|
|
21
|
+
Unique identifier for the research task.
|
|
22
|
+
status:
|
|
23
|
+
Current task status
|
|
24
|
+
output:
|
|
25
|
+
JSON-serialisable answer generated by Exa (may be ``None`` until the task
|
|
26
|
+
completes).
|
|
27
|
+
citations:
|
|
28
|
+
Mapping from *root field* in the output schema to the list of search
|
|
29
|
+
results that were used to generate that part of the answer.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
id: str
|
|
33
|
+
status: str
|
|
34
|
+
output: Optional[Dict[str, Any]]
|
|
35
|
+
citations: Dict[str, List["_Result"]]
|
|
36
|
+
|
|
37
|
+
# ---------------------------------------------------------------------
|
|
38
|
+
# Pretty representation helpers
|
|
39
|
+
# ---------------------------------------------------------------------
|
|
40
|
+
def __str__(self) -> str: # pragma: no cover – convenience only
|
|
41
|
+
output_repr = (
|
|
42
|
+
json.dumps(self.output, indent=2, ensure_ascii=False)
|
|
43
|
+
if self.output is not None
|
|
44
|
+
else "None"
|
|
45
|
+
)
|
|
46
|
+
citations_str = "\n\n".join(str(src) for src in self.citations)
|
|
47
|
+
return (
|
|
48
|
+
f"ID: {self.id}\n"
|
|
49
|
+
f"Status: {self.status}\n"
|
|
50
|
+
f"Output: {output_repr}\n\n"
|
|
51
|
+
f"Citations:\n{citations_str}"
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
__all__ = [
|
|
56
|
+
"ResearchTaskResponse",
|
|
57
|
+
]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: exa-py
|
|
3
|
-
Version: 1.12.
|
|
3
|
+
Version: 1.12.3
|
|
4
4
|
Summary: Python SDK for Exa API.
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Exa AI
|
|
@@ -45,14 +45,12 @@ exa = Exa(api_key="your-api-key")
|
|
|
45
45
|
```
|
|
46
46
|
|
|
47
47
|
## Common requests
|
|
48
|
+
|
|
48
49
|
```python
|
|
49
50
|
|
|
50
51
|
# basic search
|
|
51
52
|
results = exa.search("This is a Exa query:")
|
|
52
53
|
|
|
53
|
-
# autoprompted search
|
|
54
|
-
results = exa.search("autopromptable query", use_autoprompt=True)
|
|
55
|
-
|
|
56
54
|
# keyword search (non-neural)
|
|
57
55
|
results = exa.search("Google-style query", type="keyword")
|
|
58
56
|
|
|
@@ -65,14 +63,10 @@ exa = Exa(api_key="your-api-key")
|
|
|
65
63
|
# search and get text contents
|
|
66
64
|
results = exa.search_and_contents("This is a Exa query:")
|
|
67
65
|
|
|
68
|
-
# search and get highlights
|
|
69
|
-
results = exa.search_and_contents("This is a Exa query:", highlights=True)
|
|
70
|
-
|
|
71
66
|
# search and get contents with contents options
|
|
72
|
-
results = exa.search_and_contents("This is a Exa query:",
|
|
73
|
-
text={"include_html_tags": True, "max_characters": 1000}
|
|
74
|
-
|
|
75
|
-
|
|
67
|
+
results = exa.search_and_contents("This is a Exa query:",
|
|
68
|
+
text={"include_html_tags": True, "max_characters": 1000})
|
|
69
|
+
|
|
76
70
|
# find similar documents
|
|
77
71
|
results = exa.find_similar("https://example.com")
|
|
78
72
|
|
|
@@ -80,18 +74,14 @@ exa = Exa(api_key="your-api-key")
|
|
|
80
74
|
results = exa.find_similar("https://example.com", exclude_source_domain=True)
|
|
81
75
|
|
|
82
76
|
# find similar with contents
|
|
83
|
-
results = exa.find_similar_and_contents("https://example.com", text=True
|
|
77
|
+
results = exa.find_similar_and_contents("https://example.com", text=True)
|
|
84
78
|
|
|
85
79
|
# get text contents
|
|
86
|
-
results = exa.get_contents(["
|
|
87
|
-
|
|
88
|
-
# get highlights
|
|
89
|
-
results = exa.get_contents(["urls"], highlights=True)
|
|
80
|
+
results = exa.get_contents(["tesla.com"])
|
|
90
81
|
|
|
91
82
|
# get contents with contents options
|
|
92
|
-
results = exa.get_contents(["urls"],
|
|
93
|
-
text={"include_html_tags": True, "max_characters": 1000}
|
|
94
|
-
highlights={"highlights_per_url": 2, "num_sentences": 1, "query": "This is the highlight query:"})
|
|
83
|
+
results = exa.get_contents(["urls"],
|
|
84
|
+
text={"include_html_tags": True, "max_characters": 1000})
|
|
95
85
|
|
|
96
86
|
# basic answer
|
|
97
87
|
response = exa.answer("This is a query to answer a question")
|
|
@@ -106,6 +96,38 @@ exa = Exa(api_key="your-api-key")
|
|
|
106
96
|
for chunk in response:
|
|
107
97
|
print(chunk, end='', flush=True)
|
|
108
98
|
|
|
99
|
+
# research task example – answer a question with citations
|
|
100
|
+
# Example prompt & schema inspired by the TypeScript example.
|
|
101
|
+
QUESTION = (
|
|
102
|
+
"Summarize the history of San Francisco highlighting one or two major events "
|
|
103
|
+
"for each decade from 1850 to 1950"
|
|
104
|
+
)
|
|
105
|
+
OUTPUT_SCHEMA: Dict[str, Any] = {
|
|
106
|
+
"type": "object",
|
|
107
|
+
"required": ["timeline"],
|
|
108
|
+
"properties": {
|
|
109
|
+
"timeline": {
|
|
110
|
+
"type": "array",
|
|
111
|
+
"items": {
|
|
112
|
+
"type": "object",
|
|
113
|
+
"required": ["decade", "notableEvents"],
|
|
114
|
+
"properties": {
|
|
115
|
+
"decade": {
|
|
116
|
+
"type": "string",
|
|
117
|
+
"description": 'Decade label e.g. "1850s"',
|
|
118
|
+
},
|
|
119
|
+
"notableEvents": {
|
|
120
|
+
"type": "string",
|
|
121
|
+
"description": "A summary of notable events.",
|
|
122
|
+
},
|
|
123
|
+
},
|
|
124
|
+
},
|
|
125
|
+
},
|
|
126
|
+
},
|
|
127
|
+
}
|
|
128
|
+
resp = exa.research.create_task(
|
|
129
|
+
input_instructions=QUESTION,
|
|
130
|
+
output_schema=OUTPUT_SCHEMA,
|
|
131
|
+
)
|
|
109
132
|
```
|
|
110
133
|
|
|
111
|
-
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
exa_py/__init__.py,sha256=M2GC9oSdoV6m2msboW0vMWWl8wrth4o6gmEV4MYLGG8,66
|
|
2
|
-
exa_py/api.py,sha256=
|
|
2
|
+
exa_py/api.py,sha256=CNDOERNAWjzp_XMmSCjH59mPe_8ePOXSJ82qIAWf60g,86106
|
|
3
3
|
exa_py/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
exa_py/research/__init__.py,sha256=tFahd_P8Gqd9IkPQPk8GXag4AbxulRPpe8u44RhnB3w,189
|
|
5
|
+
exa_py/research/client.py,sha256=rJZf7iFnIAUBYwx0xqx76TbWx-yX1CKIwCr-t2P7RvA,9930
|
|
6
|
+
exa_py/research/models.py,sha256=n6_E3Kog6Pg-sLnXTsHocyWqz_qdzTckJW3aCAR71Vk,1698
|
|
4
7
|
exa_py/utils.py,sha256=Rc1FJjoR9LQ7L_OJM91Sd1GNkbHjcLyEvJENhRix6gc,2405
|
|
5
8
|
exa_py/websets/__init__.py,sha256=uOBAb9VrIHrPKoddGOp2ai2KgWlyUVCLMZqfbGOlboA,70
|
|
6
9
|
exa_py/websets/_generator/pydantic/BaseModel.jinja2,sha256=RUDCmPZVamoVx1WudylscYFfDhGoNNtRYlpTvKjAiuA,1276
|
|
@@ -16,6 +19,6 @@ exa_py/websets/searches/client.py,sha256=X3f7axWGfecmxf-2tBTX0Yf_--xToz1X8ZHbbud
|
|
|
16
19
|
exa_py/websets/types.py,sha256=jKnJFAHTFN55EzsusgDce-yux71zVbdSJ1m8utR4EjU,28096
|
|
17
20
|
exa_py/websets/webhooks/__init__.py,sha256=iTPBCxFd73z4RifLQMX6iRECx_6pwlI5qscLNjMOUHE,77
|
|
18
21
|
exa_py/websets/webhooks/client.py,sha256=zsIRMTeJU65yj-zo7Zz-gG02Prtzgcx6utGFSoY4HQQ,4222
|
|
19
|
-
exa_py-1.12.
|
|
20
|
-
exa_py-1.12.
|
|
21
|
-
exa_py-1.12.
|
|
22
|
+
exa_py-1.12.3.dist-info/METADATA,sha256=-OzwAZigBPwXnt9uRSV9XYsGioPwr55DR8WrnASkymk,4098
|
|
23
|
+
exa_py-1.12.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
24
|
+
exa_py-1.12.3.dist-info/RECORD,,
|