exa-py 1.12.0__tar.gz → 1.12.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of exa-py might be problematic. Click here for more details.

Files changed (24) hide show
  1. {exa_py-1.12.0 → exa_py-1.12.3}/PKG-INFO +42 -20
  2. {exa_py-1.12.0 → exa_py-1.12.3}/README.md +41 -19
  3. {exa_py-1.12.0 → exa_py-1.12.3}/exa_py/api.py +86 -35
  4. exa_py-1.12.3/exa_py/research/__init__.py +8 -0
  5. exa_py-1.12.3/exa_py/research/client.py +257 -0
  6. exa_py-1.12.3/exa_py/research/models.py +57 -0
  7. {exa_py-1.12.0 → exa_py-1.12.3}/exa_py/websets/types.py +8 -8
  8. {exa_py-1.12.0 → exa_py-1.12.3}/pyproject.toml +4 -6
  9. {exa_py-1.12.0 → exa_py-1.12.3}/exa_py/__init__.py +0 -0
  10. {exa_py-1.12.0 → exa_py-1.12.3}/exa_py/py.typed +0 -0
  11. {exa_py-1.12.0 → exa_py-1.12.3}/exa_py/utils.py +0 -0
  12. {exa_py-1.12.0 → exa_py-1.12.3}/exa_py/websets/__init__.py +0 -0
  13. {exa_py-1.12.0 → exa_py-1.12.3}/exa_py/websets/_generator/pydantic/BaseModel.jinja2 +0 -0
  14. {exa_py-1.12.0 → exa_py-1.12.3}/exa_py/websets/client.py +0 -0
  15. {exa_py-1.12.0 → exa_py-1.12.3}/exa_py/websets/core/__init__.py +0 -0
  16. {exa_py-1.12.0 → exa_py-1.12.3}/exa_py/websets/core/base.py +0 -0
  17. {exa_py-1.12.0 → exa_py-1.12.3}/exa_py/websets/enrichments/__init__.py +0 -0
  18. {exa_py-1.12.0 → exa_py-1.12.3}/exa_py/websets/enrichments/client.py +0 -0
  19. {exa_py-1.12.0 → exa_py-1.12.3}/exa_py/websets/items/__init__.py +0 -0
  20. {exa_py-1.12.0 → exa_py-1.12.3}/exa_py/websets/items/client.py +0 -0
  21. {exa_py-1.12.0 → exa_py-1.12.3}/exa_py/websets/searches/__init__.py +0 -0
  22. {exa_py-1.12.0 → exa_py-1.12.3}/exa_py/websets/searches/client.py +0 -0
  23. {exa_py-1.12.0 → exa_py-1.12.3}/exa_py/websets/webhooks/__init__.py +0 -0
  24. {exa_py-1.12.0 → exa_py-1.12.3}/exa_py/websets/webhooks/client.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: exa-py
3
- Version: 1.12.0
3
+ Version: 1.12.3
4
4
  Summary: Python SDK for Exa API.
5
5
  License: MIT
6
6
  Author: Exa AI
@@ -45,14 +45,12 @@ exa = Exa(api_key="your-api-key")
45
45
  ```
46
46
 
47
47
  ## Common requests
48
+
48
49
  ```python
49
50
 
50
51
  # basic search
51
52
  results = exa.search("This is a Exa query:")
52
53
 
53
- # autoprompted search
54
- results = exa.search("autopromptable query", use_autoprompt=True)
55
-
56
54
  # keyword search (non-neural)
57
55
  results = exa.search("Google-style query", type="keyword")
58
56
 
@@ -65,14 +63,10 @@ exa = Exa(api_key="your-api-key")
65
63
  # search and get text contents
66
64
  results = exa.search_and_contents("This is a Exa query:")
67
65
 
68
- # search and get highlights
69
- results = exa.search_and_contents("This is a Exa query:", highlights=True)
70
-
71
66
  # search and get contents with contents options
72
- results = exa.search_and_contents("This is a Exa query:",
73
- text={"include_html_tags": True, "max_characters": 1000},
74
- highlights={"highlights_per_url": 2, "num_sentences": 1, "query": "This is the highlight query:"})
75
-
67
+ results = exa.search_and_contents("This is a Exa query:",
68
+ text={"include_html_tags": True, "max_characters": 1000})
69
+
76
70
  # find similar documents
77
71
  results = exa.find_similar("https://example.com")
78
72
 
@@ -80,18 +74,14 @@ exa = Exa(api_key="your-api-key")
80
74
  results = exa.find_similar("https://example.com", exclude_source_domain=True)
81
75
 
82
76
  # find similar with contents
83
- results = exa.find_similar_and_contents("https://example.com", text=True, highlights=True)
77
+ results = exa.find_similar_and_contents("https://example.com", text=True)
84
78
 
85
79
  # get text contents
86
- results = exa.get_contents(["urls"])
87
-
88
- # get highlights
89
- results = exa.get_contents(["urls"], highlights=True)
80
+ results = exa.get_contents(["tesla.com"])
90
81
 
91
82
  # get contents with contents options
92
- results = exa.get_contents(["urls"],
93
- text={"include_html_tags": True, "max_characters": 1000},
94
- highlights={"highlights_per_url": 2, "num_sentences": 1, "query": "This is the highlight query:"})
83
+ results = exa.get_contents(["urls"],
84
+ text={"include_html_tags": True, "max_characters": 1000})
95
85
 
96
86
  # basic answer
97
87
  response = exa.answer("This is a query to answer a question")
@@ -106,6 +96,38 @@ exa = Exa(api_key="your-api-key")
106
96
  for chunk in response:
107
97
  print(chunk, end='', flush=True)
108
98
 
99
+ # research task example – answer a question with citations
100
+ # Example prompt & schema inspired by the TypeScript example.
101
+ QUESTION = (
102
+ "Summarize the history of San Francisco highlighting one or two major events "
103
+ "for each decade from 1850 to 1950"
104
+ )
105
+ OUTPUT_SCHEMA: Dict[str, Any] = {
106
+ "type": "object",
107
+ "required": ["timeline"],
108
+ "properties": {
109
+ "timeline": {
110
+ "type": "array",
111
+ "items": {
112
+ "type": "object",
113
+ "required": ["decade", "notableEvents"],
114
+ "properties": {
115
+ "decade": {
116
+ "type": "string",
117
+ "description": 'Decade label e.g. "1850s"',
118
+ },
119
+ "notableEvents": {
120
+ "type": "string",
121
+ "description": "A summary of notable events.",
122
+ },
123
+ },
124
+ },
125
+ },
126
+ },
127
+ }
128
+ resp = exa.research.create_task(
129
+ input_instructions=QUESTION,
130
+ output_schema=OUTPUT_SCHEMA,
131
+ )
109
132
  ```
110
133
 
111
-
@@ -22,14 +22,12 @@ exa = Exa(api_key="your-api-key")
22
22
  ```
23
23
 
24
24
  ## Common requests
25
+
25
26
  ```python
26
27
 
27
28
  # basic search
28
29
  results = exa.search("This is a Exa query:")
29
30
 
30
- # autoprompted search
31
- results = exa.search("autopromptable query", use_autoprompt=True)
32
-
33
31
  # keyword search (non-neural)
34
32
  results = exa.search("Google-style query", type="keyword")
35
33
 
@@ -42,14 +40,10 @@ exa = Exa(api_key="your-api-key")
42
40
  # search and get text contents
43
41
  results = exa.search_and_contents("This is a Exa query:")
44
42
 
45
- # search and get highlights
46
- results = exa.search_and_contents("This is a Exa query:", highlights=True)
47
-
48
43
  # search and get contents with contents options
49
- results = exa.search_and_contents("This is a Exa query:",
50
- text={"include_html_tags": True, "max_characters": 1000},
51
- highlights={"highlights_per_url": 2, "num_sentences": 1, "query": "This is the highlight query:"})
52
-
44
+ results = exa.search_and_contents("This is a Exa query:",
45
+ text={"include_html_tags": True, "max_characters": 1000})
46
+
53
47
  # find similar documents
54
48
  results = exa.find_similar("https://example.com")
55
49
 
@@ -57,18 +51,14 @@ exa = Exa(api_key="your-api-key")
57
51
  results = exa.find_similar("https://example.com", exclude_source_domain=True)
58
52
 
59
53
  # find similar with contents
60
- results = exa.find_similar_and_contents("https://example.com", text=True, highlights=True)
54
+ results = exa.find_similar_and_contents("https://example.com", text=True)
61
55
 
62
56
  # get text contents
63
- results = exa.get_contents(["urls"])
64
-
65
- # get highlights
66
- results = exa.get_contents(["urls"], highlights=True)
57
+ results = exa.get_contents(["tesla.com"])
67
58
 
68
59
  # get contents with contents options
69
- results = exa.get_contents(["urls"],
70
- text={"include_html_tags": True, "max_characters": 1000},
71
- highlights={"highlights_per_url": 2, "num_sentences": 1, "query": "This is the highlight query:"})
60
+ results = exa.get_contents(["urls"],
61
+ text={"include_html_tags": True, "max_characters": 1000})
72
62
 
73
63
  # basic answer
74
64
  response = exa.answer("This is a query to answer a question")
@@ -83,5 +73,37 @@ exa = Exa(api_key="your-api-key")
83
73
  for chunk in response:
84
74
  print(chunk, end='', flush=True)
85
75
 
76
+ # research task example – answer a question with citations
77
+ # Example prompt & schema inspired by the TypeScript example.
78
+ QUESTION = (
79
+ "Summarize the history of San Francisco highlighting one or two major events "
80
+ "for each decade from 1850 to 1950"
81
+ )
82
+ OUTPUT_SCHEMA: Dict[str, Any] = {
83
+ "type": "object",
84
+ "required": ["timeline"],
85
+ "properties": {
86
+ "timeline": {
87
+ "type": "array",
88
+ "items": {
89
+ "type": "object",
90
+ "required": ["decade", "notableEvents"],
91
+ "properties": {
92
+ "decade": {
93
+ "type": "string",
94
+ "description": 'Decade label e.g. "1850s"',
95
+ },
96
+ "notableEvents": {
97
+ "type": "string",
98
+ "description": "A summary of notable events.",
99
+ },
100
+ },
101
+ },
102
+ },
103
+ },
104
+ }
105
+ resp = exa.research.create_task(
106
+ input_instructions=QUESTION,
107
+ output_schema=OUTPUT_SCHEMA,
108
+ )
86
109
  ```
87
-
@@ -38,6 +38,8 @@ from exa_py.utils import (
38
38
  )
39
39
  from .websets import WebsetsClient
40
40
  from .websets.core.base import ExaJSONEncoder
41
+ from .research.client import ResearchClient, AsyncResearchClient
42
+ from .research.models import ResearchTaskResponse # noqa: E402,F401
41
43
 
42
44
  is_beta = os.getenv("IS_BETA") == "True"
43
45
 
@@ -56,7 +58,7 @@ def snake_to_camel(snake_str: str) -> str:
56
58
  return "$schema"
57
59
  if snake_str == "not_":
58
60
  return "not"
59
-
61
+
60
62
  components = snake_str.split("_")
61
63
  return components[0] + "".join(x.title() for x in components[1:])
62
64
 
@@ -261,6 +263,7 @@ class JSONSchema(TypedDict, total=False):
261
263
  """Represents a JSON Schema definition used for structured summary output.
262
264
  To learn more visit https://json-schema.org/overview/what-is-jsonschema.
263
265
  """
266
+
264
267
  schema_: str # This will be converted to "$schema" in JSON
265
268
  title: str
266
269
  description: str
@@ -288,7 +291,7 @@ class SummaryContentsOptions(TypedDict, total=False):
288
291
 
289
292
  query: str
290
293
  schema: JSONSchema
291
-
294
+
292
295
 
293
296
  class ExtrasOptions(TypedDict, total=False):
294
297
  """A class representing additional extraction fields (e.g. links, images)"""
@@ -669,7 +672,7 @@ class AnswerResponse:
669
672
  citations (List[AnswerResult]): A list of citations used to generate the answer.
670
673
  """
671
674
 
672
- answer: str
675
+ answer: Union[str, dict[str, Any]]
673
676
  citations: List[AnswerResult]
674
677
 
675
678
  def __str__(self):
@@ -765,9 +768,9 @@ class AsyncStreamAnswerResponse:
765
768
  content = chunk["choices"][0]["delta"].get("content")
766
769
 
767
770
  if (
768
- "citations" in chunk
769
- and chunk["citations"]
770
- and chunk["citations"] != "null"
771
+ "citations" in chunk
772
+ and chunk["citations"]
773
+ and chunk["citations"] != "null"
771
774
  ):
772
775
  citations = [
773
776
  AnswerResult(**to_snake_case(s)) for s in chunk["citations"]
@@ -776,6 +779,7 @@ class AsyncStreamAnswerResponse:
776
779
  stream_chunk = StreamChunk(content=content, citations=citations)
777
780
  if stream_chunk.has_data():
778
781
  yield stream_chunk
782
+
779
783
  return generator()
780
784
 
781
785
  def close(self) -> None:
@@ -835,6 +839,37 @@ def nest_fields(original_dict: Dict, fields_to_nest: List[str], new_key: str):
835
839
  return original_dict
836
840
 
837
841
 
842
+ @dataclass
843
+ class ResearchTaskResponse:
844
+ """A class representing the response for a research task.
845
+
846
+ Attributes:
847
+ id (str): The unique identifier for the research request.
848
+ status (str): Status of the research request.
849
+ output (Optional[Dict[str, Any]]): The answer structured as JSON, if available.
850
+ citations (Optional[Dict[str, List[_Result]]]): List of citations used to generate the answer, grouped by root field in the output schema.
851
+ """
852
+
853
+ id: str
854
+ status: str
855
+ output: Optional[Dict[str, Any]]
856
+ citations: Dict[str, List[_Result]]
857
+
858
+ def __str__(self):
859
+ output_repr = (
860
+ json.dumps(self.output, indent=2, ensure_ascii=False)
861
+ if self.output is not None
862
+ else "None"
863
+ )
864
+ citations_str = "\n\n".join(str(src) for src in self.citations)
865
+ return (
866
+ f"ID: {self.id}\n"
867
+ f"Status: {self.status}\n"
868
+ f"Output: {output_repr}\n\n"
869
+ f"Citations:\n{citations_str}"
870
+ )
871
+
872
+
838
873
  class Exa:
839
874
  """A client for interacting with Exa API."""
840
875
 
@@ -842,7 +877,7 @@ class Exa:
842
877
  self,
843
878
  api_key: Optional[str],
844
879
  base_url: str = "https://api.exa.ai",
845
- user_agent: str = "exa-py 1.12.0",
880
+ user_agent: str = "exa-py 1.12.3",
846
881
  ):
847
882
  """Initialize the Exa client with the provided API key and optional base URL and user agent.
848
883
 
@@ -859,10 +894,23 @@ class Exa:
859
894
  "API key must be provided as an argument or in EXA_API_KEY environment variable"
860
895
  )
861
896
  self.base_url = base_url
862
- self.headers = {"x-api-key": api_key, "User-Agent": user_agent, "Content-Type": "application/json"}
897
+ self.headers = {
898
+ "x-api-key": api_key,
899
+ "User-Agent": user_agent,
900
+ "Content-Type": "application/json",
901
+ }
863
902
  self.websets = WebsetsClient(self)
903
+ # Research tasks client (new, mirrors Websets design)
904
+ self.research = ResearchClient(self)
864
905
 
865
- def request(self, endpoint: str, data: Optional[Union[Dict[str, Any], str]] = None, method: str = "POST", params: Optional[Dict[str, Any]] = None) -> Union[Dict[str, Any], requests.Response]:
906
+ def request(
907
+ self,
908
+ endpoint: str,
909
+ data: Optional[Union[Dict[str, Any], str]] = None,
910
+ method: str = "POST",
911
+ params: Optional[Dict[str, Any]] = None,
912
+ force_stream: Optional[bool] = False,
913
+ ) -> Union[Dict[str, Any], requests.Response]:
866
914
  """Send a request to the Exa API, optionally streaming if data['stream'] is True.
867
915
 
868
916
  Args:
@@ -885,13 +933,13 @@ class Exa:
885
933
  else:
886
934
  # Otherwise, serialize the dictionary to JSON if it exists
887
935
  json_data = json.dumps(data, cls=ExaJSONEncoder) if data else None
888
-
889
- if data and data.get("stream"):
936
+
937
+ if (data and data.get("stream")) or force_stream:
890
938
  res = requests.post(
891
- self.base_url + endpoint,
939
+ self.base_url + endpoint,
892
940
  data=json_data,
893
- headers=self.headers,
894
- stream=True
941
+ headers=self.headers,
942
+ stream=True,
895
943
  )
896
944
  return res
897
945
 
@@ -901,20 +949,14 @@ class Exa:
901
949
  )
902
950
  elif method.upper() == "POST":
903
951
  res = requests.post(
904
- self.base_url + endpoint,
905
- data=json_data,
906
- headers=self.headers
952
+ self.base_url + endpoint, data=json_data, headers=self.headers
907
953
  )
908
954
  elif method.upper() == "PATCH":
909
955
  res = requests.patch(
910
- self.base_url + endpoint,
911
- data=json_data,
912
- headers=self.headers
956
+ self.base_url + endpoint, data=json_data, headers=self.headers
913
957
  )
914
958
  elif method.upper() == "DELETE":
915
- res = requests.delete(
916
- self.base_url + endpoint, headers=self.headers
917
- )
959
+ res = requests.delete(self.base_url + endpoint, headers=self.headers)
918
960
  else:
919
961
  raise ValueError(f"Unsupported HTTP method: {method}")
920
962
 
@@ -1845,6 +1887,7 @@ class Exa:
1845
1887
  text: Optional[bool] = False,
1846
1888
  system_prompt: Optional[str] = None,
1847
1889
  model: Optional[Literal["exa", "exa-pro"]] = None,
1890
+ output_schema: Optional[dict[str, Any]] = None,
1848
1891
  ) -> Union[AnswerResponse, StreamAnswerResponse]: ...
1849
1892
 
1850
1893
  def answer(
@@ -1855,6 +1898,7 @@ class Exa:
1855
1898
  text: Optional[bool] = False,
1856
1899
  system_prompt: Optional[str] = None,
1857
1900
  model: Optional[Literal["exa", "exa-pro"]] = None,
1901
+ output_schema: Optional[dict[str, Any]] = None,
1858
1902
  ) -> Union[AnswerResponse, StreamAnswerResponse]:
1859
1903
  """Generate an answer to a query using Exa's search and LLM capabilities.
1860
1904
 
@@ -1863,6 +1907,7 @@ class Exa:
1863
1907
  text (bool, optional): Whether to include full text in the results. Defaults to False.
1864
1908
  system_prompt (str, optional): A system prompt to guide the LLM's behavior when generating the answer.
1865
1909
  model (str, optional): The model to use for answering. Either "exa" or "exa-pro". Defaults to None.
1910
+ output_schema (dict[str, Any], optional): JSON schema describing the desired answer structure.
1866
1911
 
1867
1912
  Returns:
1868
1913
  AnswerResponse: An object containing the answer and citations.
@@ -1892,6 +1937,7 @@ class Exa:
1892
1937
  text: bool = False,
1893
1938
  system_prompt: Optional[str] = None,
1894
1939
  model: Optional[Literal["exa", "exa-pro"]] = None,
1940
+ output_schema: Optional[dict[str, Any]] = None,
1895
1941
  ) -> StreamAnswerResponse:
1896
1942
  """Generate a streaming answer response.
1897
1943
 
@@ -1900,7 +1946,7 @@ class Exa:
1900
1946
  text (bool): Whether to include full text in the results. Defaults to False.
1901
1947
  system_prompt (str, optional): A system prompt to guide the LLM's behavior when generating the answer.
1902
1948
  model (str, optional): The model to use for answering. Either "exa" or "exa-pro". Defaults to None.
1903
-
1949
+ output_schema (dict[str, Any], optional): JSON schema describing the desired answer structure.
1904
1950
  Returns:
1905
1951
  StreamAnswerResponse: An object that can be iterated over to retrieve (partial text, partial citations).
1906
1952
  Each iteration yields a tuple of (Optional[str], Optional[List[AnswerResult]]).
@@ -1911,9 +1957,12 @@ class Exa:
1911
1957
  raw_response = self.request("/answer", options)
1912
1958
  return StreamAnswerResponse(raw_response)
1913
1959
 
1960
+
1914
1961
  class AsyncExa(Exa):
1915
1962
  def __init__(self, api_key: str, api_base: str = "https://api.exa.ai"):
1916
1963
  super().__init__(api_key, api_base)
1964
+ # Override the synchronous ResearchClient with its async counterpart.
1965
+ self.research = AsyncResearchClient(self)
1917
1966
  self._client = None
1918
1967
 
1919
1968
  @property
@@ -1921,13 +1970,13 @@ class AsyncExa(Exa):
1921
1970
  # this may only be a
1922
1971
  if self._client is None:
1923
1972
  self._client = httpx.AsyncClient(
1924
- base_url=self.base_url,
1925
- headers=self.headers,
1926
- timeout=60
1973
+ base_url=self.base_url, headers=self.headers, timeout=60
1927
1974
  )
1928
1975
  return self._client
1929
1976
 
1930
- async def async_request(self, endpoint: str, data):
1977
+ async def async_request(
1978
+ self, endpoint: str, data, force_stream: Optional[bool] = False
1979
+ ):
1931
1980
  """Send a POST request to the Exa API, optionally streaming if data['stream'] is True.
1932
1981
 
1933
1982
  Args:
@@ -1941,17 +1990,16 @@ class AsyncExa(Exa):
1941
1990
  Raises:
1942
1991
  ValueError: If the request fails (non-200 status code).
1943
1992
  """
1944
- if data.get("stream"):
1993
+ if data.get("stream") or force_stream:
1945
1994
  request = httpx.Request(
1946
- 'POST',
1947
- self.base_url + endpoint,
1948
- json=data,
1949
- headers=self.headers
1995
+ "POST", self.base_url + endpoint, json=data, headers=self.headers
1950
1996
  )
1951
1997
  res = await self.client.send(request, stream=True)
1952
1998
  return res
1953
1999
 
1954
- res = await self.client.post(self.base_url + endpoint, json=data, headers=self.headers)
2000
+ res = await self.client.post(
2001
+ self.base_url + endpoint, json=data, headers=self.headers
2002
+ )
1955
2003
  if res.status_code != 200:
1956
2004
  raise ValueError(
1957
2005
  f"Request failed with status code {res.status_code}: {res.text}"
@@ -2189,6 +2237,7 @@ class AsyncExa(Exa):
2189
2237
  text: Optional[bool] = False,
2190
2238
  system_prompt: Optional[str] = None,
2191
2239
  model: Optional[Literal["exa", "exa-pro"]] = None,
2240
+ output_schema: Optional[dict[str, Any]] = None,
2192
2241
  ) -> Union[AnswerResponse, StreamAnswerResponse]:
2193
2242
  """Generate an answer to a query using Exa's search and LLM capabilities.
2194
2243
 
@@ -2197,6 +2246,7 @@ class AsyncExa(Exa):
2197
2246
  text (bool, optional): Whether to include full text in the results. Defaults to False.
2198
2247
  system_prompt (str, optional): A system prompt to guide the LLM's behavior when generating the answer.
2199
2248
  model (str, optional): The model to use for answering. Either "exa" or "exa-pro". Defaults to None.
2249
+ output_schema (dict[str, Any], optional): JSON schema describing the desired answer structure.
2200
2250
 
2201
2251
  Returns:
2202
2252
  AnswerResponse: An object containing the answer and citations.
@@ -2226,6 +2276,7 @@ class AsyncExa(Exa):
2226
2276
  text: bool = False,
2227
2277
  system_prompt: Optional[str] = None,
2228
2278
  model: Optional[Literal["exa", "exa-pro"]] = None,
2279
+ output_schema: Optional[dict[str, Any]] = None,
2229
2280
  ) -> AsyncStreamAnswerResponse:
2230
2281
  """Generate a streaming answer response.
2231
2282
 
@@ -2234,7 +2285,7 @@ class AsyncExa(Exa):
2234
2285
  text (bool): Whether to include full text in the results. Defaults to False.
2235
2286
  system_prompt (str, optional): A system prompt to guide the LLM's behavior when generating the answer.
2236
2287
  model (str, optional): The model to use for answering. Either "exa" or "exa-pro". Defaults to None.
2237
-
2288
+ output_schema (dict[str, Any], optional): JSON schema describing the desired answer structure.
2238
2289
  Returns:
2239
2290
  AsyncStreamAnswerResponse: An object that can be iterated over to retrieve (partial text, partial citations).
2240
2291
  Each iteration yields a tuple of (Optional[str], Optional[List[AnswerResult]]).
@@ -0,0 +1,8 @@
1
+ from .client import ResearchClient, AsyncResearchClient
2
+ from .models import ResearchTaskResponse
3
+
4
+ __all__ = [
5
+ "ResearchClient",
6
+ "AsyncResearchClient",
7
+ "ResearchTaskResponse",
8
+ ]
@@ -0,0 +1,257 @@
1
+ """Lightweight research client wrappers for the Exa REST API.
2
+
3
+ This module purposefully keeps its import surface minimal to avoid circular
4
+ import problems with :pymod:`exa_py.api`. Any heavy dependencies (including
5
+ `exa_py.api` itself) are imported lazily **inside** functions. This means
6
+ that type-checkers still see the full, precise types via the ``TYPE_CHECKING``
7
+ block, but at runtime we only pay the cost if/when a helper is actually used.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple
13
+
14
+ if TYPE_CHECKING: # pragma: no cover – only for static analysers
15
+ # Import with full type info when static type-checking. `_Result` still
16
+ # lives in ``exa_py.api`` but the response model moved to
17
+ # ``exa_py.research.models``.
18
+ from ..api import _Result # noqa: F401
19
+ from .models import ResearchTaskResponse # noqa: F401
20
+
21
+ # ---------------------------------------------------------------------------
22
+ # Public, user-facing clients
23
+ # ---------------------------------------------------------------------------
24
+
25
+
26
+ class ResearchClient:
27
+ """Synchronous helper namespace accessed via :pyattr:`Exa.research`."""
28
+
29
+ def __init__(self, parent_client):
30
+ # A reference to the *already-constructed* ``Exa`` instance so that we
31
+ # can piggy-back on its HTTP plumbing (headers, base URL, retries, …).
32
+ self._client = parent_client
33
+
34
+ # ------------------------------------------------------------------
35
+ # API surface
36
+ # ------------------------------------------------------------------
37
+ def create_task(
38
+ self,
39
+ *,
40
+ input_instructions: str,
41
+ output_schema: Dict[str, Any],
42
+ ) -> "ResearchTaskResponse":
43
+ """Submit a research request to the Exa backend.
44
+
45
+ The public API remains synchronous – the function only returns once
46
+ the task has finished and the final structured answer is available.
47
+ Internally, however, the endpoint now streams *progress* updates via
48
+ Server-Sent Events (SSE). We therefore initiate a streaming request
49
+ and keep reading until we receive the terminal ``{"tag": "complete"}``
50
+ chunk, which carries the exact same payload shape that the blocking
51
+ variant returned previously. Any ``{"tag": "progress"}`` chunks are
52
+ ignored, while ``{"tag": "error"}`` chunks result in an exception.
53
+
54
+ Parameters
55
+ ----------
56
+ input_instructions:
57
+ Natural-language instructions that describe *what* should be
58
+ researched or extracted.
59
+ output_schema:
60
+ JSON-schema describing the desired structured output format.
61
+ """
62
+
63
+ import json
64
+
65
+ payload = {
66
+ "input": {"instructions": input_instructions},
67
+ "output": {"schema": output_schema},
68
+ }
69
+
70
+ raw_response = self._client.request(
71
+ "/research/tasks", payload, force_stream=True
72
+ )
73
+
74
+ def _handle_payload(tag: Optional[str], payload_dict: Dict[str, Any]):
75
+ """Inner helper handling decoded JSON chunks."""
76
+ if tag is None:
77
+ tag_local = payload_dict.get("tag")
78
+ else:
79
+ tag_local = tag
80
+
81
+ if tag_local == "progress":
82
+ return None # ignore
83
+ if tag_local == "error":
84
+ msg = payload_dict.get("error", {}).get("message", "Unknown error")
85
+ raise RuntimeError(f"Research task failed: {msg}")
86
+ if tag_local == "complete":
87
+ data_obj = payload_dict.get("data")
88
+ if data_obj is None:
89
+ raise RuntimeError("Malformed 'complete' chunk with no data")
90
+ return _parse_research_response(data_obj)
91
+
92
+ # Fallback: if looks like final object
93
+ if {"id", "status"}.issubset(payload_dict.keys()):
94
+ return _parse_research_response(payload_dict)
95
+ return None
96
+
97
+ # ------------------------------------------------------------------
98
+ # Minimal SSE parser (sync)
99
+ # ------------------------------------------------------------------
100
+ event_name: Optional[str] = None
101
+ data_buf: str = ""
102
+
103
+ for raw_line in raw_response.iter_lines(decode_unicode=True):
104
+ line = raw_line
105
+ if line == "":
106
+ if data_buf:
107
+ try:
108
+ payload_dict = json.loads(data_buf)
109
+ except json.JSONDecodeError:
110
+ data_buf = ""
111
+ event_name = None
112
+ continue
113
+ maybe_resp = _handle_payload(event_name, payload_dict)
114
+ if maybe_resp is not None:
115
+ raw_response.close()
116
+ return maybe_resp
117
+ # reset after event
118
+ data_buf = ""
119
+ event_name = None
120
+ continue
121
+
122
+ if line.startswith("event:"):
123
+ event_name = line[len("event:") :].strip()
124
+ elif line.startswith("data:"):
125
+ data_buf += line[len("data:") :].strip()
126
+
127
+ # Process any remaining buffer (in case stream closed without blank line)
128
+ if data_buf:
129
+ try:
130
+ payload_dict = json.loads(data_buf)
131
+ maybe_resp = _handle_payload(event_name, payload_dict)
132
+ if maybe_resp is not None:
133
+ raw_response.close()
134
+ return maybe_resp
135
+ except json.JSONDecodeError:
136
+ pass
137
+
138
+ raise RuntimeError("Stream ended before completion of research task")
139
+
140
+ def get_task(self, id: str): # noqa: D401 – imperative mood is fine
141
+ """Placeholder endpoint – not yet implemented on the server side."""
142
+ raise NotImplementedError(
143
+ "`exa.research.get_task` is not available yet. Please open an "
144
+ "issue if you need this sooner."
145
+ )
146
+
147
+
148
+ class AsyncResearchClient:
149
+ """Async counterpart used via :pyattr:`AsyncExa.research`."""
150
+
151
+ def __init__(self, parent_client):
152
+ self._client = parent_client
153
+
154
+ async def create_task(
155
+ self,
156
+ *,
157
+ input_instructions: str,
158
+ output_schema: Dict[str, Any],
159
+ ) -> "ResearchTaskResponse":
160
+ """Async variant mirroring the synchronous implementation above."""
161
+
162
+ import json
163
+
164
+ payload = {
165
+ "input": {"instructions": input_instructions},
166
+ "output": {"schema": output_schema},
167
+ }
168
+
169
+ raw_response = await self._client.async_request(
170
+ "/research/tasks", payload, force_stream=True
171
+ )
172
+
173
+ async def _handle_payload_async(
174
+ tag: Optional[str], payload_dict: Dict[str, Any]
175
+ ):
176
+ if tag is None:
177
+ tag_local = payload_dict.get("tag")
178
+ else:
179
+ tag_local = tag
180
+
181
+ if tag_local == "progress":
182
+ return None
183
+ if tag_local == "error":
184
+ msg = payload_dict.get("error", {}).get("message", "Unknown error")
185
+ raise RuntimeError(f"Research task failed: {msg}")
186
+ if tag_local == "complete":
187
+ data_obj = payload_dict.get("data")
188
+ if data_obj is None:
189
+ raise RuntimeError("Malformed 'complete' chunk with no data")
190
+ return _parse_research_response(data_obj)
191
+ if {"id", "status"}.issubset(payload_dict.keys()):
192
+ return _parse_research_response(payload_dict)
193
+ return None
194
+
195
+ event_name: Optional[str] = None
196
+ data_buf: str = ""
197
+
198
+ async for line in raw_response.aiter_lines():
199
+ if line == "":
200
+ if data_buf:
201
+ try:
202
+ payload_dict = json.loads(data_buf)
203
+ except json.JSONDecodeError:
204
+ data_buf = ""
205
+ event_name = None
206
+ continue
207
+ maybe_resp = await _handle_payload_async(event_name, payload_dict)
208
+ if maybe_resp is not None:
209
+ await raw_response.aclose()
210
+ return maybe_resp
211
+ data_buf = ""
212
+ event_name = None
213
+ continue
214
+
215
+ if line.startswith("event:"):
216
+ event_name = line[len("event:") :].strip()
217
+ elif line.startswith("data:"):
218
+ data_buf += line[len("data:") :].strip()
219
+
220
+ if data_buf:
221
+ try:
222
+ payload_dict = json.loads(data_buf)
223
+ maybe_resp = await _handle_payload_async(event_name, payload_dict)
224
+ if maybe_resp is not None:
225
+ await raw_response.aclose()
226
+ return maybe_resp
227
+ except json.JSONDecodeError:
228
+ pass
229
+
230
+ raise RuntimeError("Stream ended before completion of research task")
231
+
232
+ async def get_task(self, id: str): # noqa: D401
233
+ raise NotImplementedError(
234
+ "`exa.research.get_task` is not available yet. Please open an "
235
+ "issue if you need this sooner."
236
+ )
237
+
238
+
239
+ # ---------------------------------------------------------------------------
240
+ # Internal helpers (lazy imports to avoid cycles)
241
+ # ---------------------------------------------------------------------------
242
+
243
+
244
+ def _parse_research_response(raw: Dict[str, Any]):
245
+ """Transform camel-case API payload into rich Python objects."""
246
+ from .models import ResearchTaskResponse
247
+ from ..api import _Result, to_snake_case
248
+
249
+ return ResearchTaskResponse(
250
+ id=raw["id"],
251
+ status=raw["status"],
252
+ output=raw.get("output"),
253
+ citations={
254
+ key: [_Result(**to_snake_case(c)) for c in citations]
255
+ for key, citations in raw.get("citations", {}).items()
256
+ },
257
+ )
@@ -0,0 +1,57 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from dataclasses import dataclass
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ # Local import placed inside TYPE_CHECKING block to avoid runtime cycles.
8
+ from typing import TYPE_CHECKING
9
+
10
+ if TYPE_CHECKING: # pragma: no cover – for static analysers only
11
+ from ..api import _Result # noqa: F401
12
+
13
+
14
+ @dataclass
15
+ class ResearchTaskResponse:
16
+ """Structured response returned from the /research/tasks endpoint.
17
+
18
+ Attributes
19
+ ----------
20
+ id:
21
+ Unique identifier for the research task.
22
+ status:
23
+ Current task status
24
+ output:
25
+ JSON-serialisable answer generated by Exa (may be ``None`` until the task
26
+ completes).
27
+ citations:
28
+ Mapping from *root field* in the output schema to the list of search
29
+ results that were used to generate that part of the answer.
30
+ """
31
+
32
+ id: str
33
+ status: str
34
+ output: Optional[Dict[str, Any]]
35
+ citations: Dict[str, List["_Result"]]
36
+
37
+ # ---------------------------------------------------------------------
38
+ # Pretty representation helpers
39
+ # ---------------------------------------------------------------------
40
+ def __str__(self) -> str: # pragma: no cover – convenience only
41
+ output_repr = (
42
+ json.dumps(self.output, indent=2, ensure_ascii=False)
43
+ if self.output is not None
44
+ else "None"
45
+ )
46
+ citations_str = "\n\n".join(str(src) for src in self.citations)
47
+ return (
48
+ f"ID: {self.id}\n"
49
+ f"Status: {self.status}\n"
50
+ f"Output: {output_repr}\n\n"
51
+ f"Citations:\n{citations_str}"
52
+ )
53
+
54
+
55
+ __all__ = [
56
+ "ResearchTaskResponse",
57
+ ]
@@ -18,14 +18,14 @@ class CanceledReason(Enum):
18
18
 
19
19
 
20
20
  class CreateCriterionParameters(ExaBaseModel):
21
- description: constr(min_length=1, max_length=300)
21
+ description: constr(min_length=1)
22
22
  """
23
23
  The description of the criterion
24
24
  """
25
25
 
26
26
 
27
27
  class CreateEnrichmentParameters(ExaBaseModel):
28
- description: constr(min_length=1, max_length=5000)
28
+ description: constr(min_length=1)
29
29
  """
30
30
  Provide a description of the enrichment task you want to perform to each Webset Item.
31
31
  """
@@ -88,7 +88,7 @@ class CreateWebsetSearchParameters(ExaBaseModel):
88
88
 
89
89
  The actual number of Items found may be less than this number depending on the query complexity.
90
90
  """
91
- query: constr(min_length=1, max_length=5000) = Field(
91
+ query: constr(min_length=1) = Field(
92
92
  ...,
93
93
  examples=[
94
94
  'Marketing agencies based in the US, that focus on consumer products. Get brands worked with and city'
@@ -136,7 +136,7 @@ class CreateWebsetSearchParameters(ExaBaseModel):
136
136
 
137
137
 
138
138
  class Criterion(ExaBaseModel):
139
- description: constr(min_length=1, max_length=300)
139
+ description: constr(min_length=1)
140
140
  """
141
141
  The description of the criterion
142
142
  """
@@ -338,7 +338,7 @@ class Search(ExaBaseModel):
338
338
  Create initial search for the Webset.
339
339
  """
340
340
 
341
- query: constr(min_length=1, max_length=5000) = Field(
341
+ query: constr(min_length=1) = Field(
342
342
  ...,
343
343
  examples=[
344
344
  'Marketing agencies based in the US, that focus on consumer products.'
@@ -405,7 +405,7 @@ class UpdateWebhookParameters(ExaBaseModel):
405
405
 
406
406
 
407
407
  class UpdateWebsetRequest(ExaBaseModel):
408
- metadata: Optional[Dict[str, constr(max_length=1000)]] = None
408
+ metadata: Optional[Dict[str, str]] = None
409
409
  """
410
410
  Set of key-value pairs you want to associate with this object.
411
411
  """
@@ -564,7 +564,7 @@ class WebsetCreatedEvent(ExaBaseModel):
564
564
 
565
565
  class WebsetCustomEntity(ExaBaseModel):
566
566
  type: Literal['custom']
567
- description: constr(min_length=2, max_length=200)
567
+ description: constr(min_length=2)
568
568
  """
569
569
  When you decide to use a custom entity, this is the description of the entity.
570
570
 
@@ -972,7 +972,7 @@ class WebsetSearch(ExaBaseModel):
972
972
  """
973
973
  The status of the search
974
974
  """
975
- query: constr(min_length=1, max_length=5000)
975
+ query: constr(min_length=1)
976
976
  """
977
977
  The query used to create the search.
978
978
  """
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "exa-py"
3
- version = "1.12.0"
3
+ version = "1.13.0"
4
4
  description = "Python SDK for Exa API."
5
5
  authors = ["Exa AI <hello@exa.ai>"]
6
6
  readme = "README.md"
@@ -32,14 +32,12 @@ in-project = true
32
32
 
33
33
  [project]
34
34
  name = "exa-py"
35
- version = "1.12.0"
35
+ version = "1.12.3"
36
36
  description = "Python SDK for Exa API."
37
37
  readme = "README.md"
38
38
  requires-python = ">=3.9"
39
- license = {text = "MIT"}
40
- authors = [
41
- {name = "Exa AI", email = "hello@exa.ai"}
42
- ]
39
+ license = { text = "MIT" }
40
+ authors = [{ name = "Exa AI", email = "hello@exa.ai" }]
43
41
  dependencies = [
44
42
  "requests>=2.32.3",
45
43
  "typing-extensions>=4.12.2",
File without changes
File without changes
File without changes