dhisana 0.0.1.dev85__py3-none-any.whl → 0.0.1.dev236__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. dhisana/schemas/common.py +33 -0
  2. dhisana/schemas/sales.py +224 -23
  3. dhisana/utils/add_mapping.py +72 -63
  4. dhisana/utils/apollo_tools.py +739 -109
  5. dhisana/utils/built_with_api_tools.py +4 -2
  6. dhisana/utils/cache_output_tools.py +23 -23
  7. dhisana/utils/check_email_validity_tools.py +456 -458
  8. dhisana/utils/check_for_intent_signal.py +1 -2
  9. dhisana/utils/check_linkedin_url_validity.py +34 -8
  10. dhisana/utils/clay_tools.py +3 -2
  11. dhisana/utils/clean_properties.py +3 -1
  12. dhisana/utils/compose_salesnav_query.py +0 -1
  13. dhisana/utils/compose_search_query.py +7 -3
  14. dhisana/utils/composite_tools.py +0 -1
  15. dhisana/utils/dataframe_tools.py +2 -2
  16. dhisana/utils/email_body_utils.py +72 -0
  17. dhisana/utils/email_provider.py +375 -0
  18. dhisana/utils/enrich_lead_information.py +585 -85
  19. dhisana/utils/fetch_openai_config.py +129 -0
  20. dhisana/utils/field_validators.py +1 -1
  21. dhisana/utils/g2_tools.py +0 -1
  22. dhisana/utils/generate_content.py +0 -1
  23. dhisana/utils/generate_email.py +69 -16
  24. dhisana/utils/generate_email_response.py +298 -41
  25. dhisana/utils/generate_flow.py +0 -1
  26. dhisana/utils/generate_linkedin_connect_message.py +19 -6
  27. dhisana/utils/generate_linkedin_response_message.py +156 -65
  28. dhisana/utils/generate_structured_output_internal.py +351 -131
  29. dhisana/utils/google_custom_search.py +150 -44
  30. dhisana/utils/google_oauth_tools.py +721 -0
  31. dhisana/utils/google_workspace_tools.py +391 -25
  32. dhisana/utils/hubspot_clearbit.py +3 -1
  33. dhisana/utils/hubspot_crm_tools.py +771 -167
  34. dhisana/utils/instantly_tools.py +3 -1
  35. dhisana/utils/lusha_tools.py +10 -7
  36. dhisana/utils/mailgun_tools.py +150 -0
  37. dhisana/utils/microsoft365_tools.py +447 -0
  38. dhisana/utils/openai_assistant_and_file_utils.py +121 -177
  39. dhisana/utils/openai_helpers.py +19 -16
  40. dhisana/utils/parse_linkedin_messages_txt.py +2 -3
  41. dhisana/utils/profile.py +37 -0
  42. dhisana/utils/proxy_curl_tools.py +507 -206
  43. dhisana/utils/proxycurl_search_leads.py +426 -0
  44. dhisana/utils/research_lead.py +121 -68
  45. dhisana/utils/sales_navigator_crawler.py +1 -6
  46. dhisana/utils/salesforce_crm_tools.py +323 -50
  47. dhisana/utils/search_router.py +131 -0
  48. dhisana/utils/search_router_jobs.py +51 -0
  49. dhisana/utils/sendgrid_tools.py +126 -91
  50. dhisana/utils/serarch_router_local_business.py +75 -0
  51. dhisana/utils/serpapi_additional_tools.py +290 -0
  52. dhisana/utils/serpapi_google_jobs.py +117 -0
  53. dhisana/utils/serpapi_google_search.py +188 -0
  54. dhisana/utils/serpapi_local_business_search.py +129 -0
  55. dhisana/utils/serpapi_search_tools.py +363 -432
  56. dhisana/utils/serperdev_google_jobs.py +125 -0
  57. dhisana/utils/serperdev_local_business.py +154 -0
  58. dhisana/utils/serperdev_search.py +233 -0
  59. dhisana/utils/smtp_email_tools.py +576 -0
  60. dhisana/utils/test_connect.py +1765 -92
  61. dhisana/utils/trasform_json.py +95 -16
  62. dhisana/utils/web_download_parse_tools.py +0 -1
  63. dhisana/utils/zoominfo_tools.py +2 -3
  64. dhisana/workflow/test.py +1 -1
  65. {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/METADATA +5 -2
  66. dhisana-0.0.1.dev236.dist-info/RECORD +100 -0
  67. {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/WHEEL +1 -1
  68. dhisana-0.0.1.dev85.dist-info/RECORD +0 -81
  69. {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/entry_points.txt +0 -0
  70. {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/top_level.txt +0 -0
@@ -1,217 +1,443 @@
1
1
  import asyncio
2
2
  import hashlib
3
3
  import json
4
- import os
5
- import re
6
- import time
7
4
  import logging
8
- import uuid
9
- from typing import Any, Dict, List, Optional, Tuple
5
+ import random
10
6
 
11
7
  from fastapi import HTTPException
12
- from pydantic import BaseModel, TypeAdapter
8
+ from pydantic import BaseModel
9
+
10
+ from openai import OpenAIError, RateLimitError
11
+ from openai.lib._parsing._completions import type_to_response_format_param
12
+
13
+ from json_repair import repair_json
13
14
 
14
- import openai
15
15
  from dhisana.utils import cache_output_tools
16
- from dhisana.utils.openai_helpers import get_openai_access_token
16
+ from dhisana.utils.fetch_openai_config import (
17
+ _extract_config,
18
+ create_async_openai_client,
19
+ )
20
+ from typing import Any, Dict, List, Optional, Tuple, Union
21
+
22
+ from openai import OpenAIError, RateLimitError
23
+ from pydantic import BaseModel
24
+
25
+
17
26
 
27
+ # ──────────────────────────────────────────────────────────────────────────────
28
+ # 2. Vector-store utilities (unchanged logic, new client factory)
29
+ # ──────────────────────────────────────────────────────────────────────────────
18
30
 
19
- # --------------------------------------------------------------------------
20
- # Utility: retrieve Vector Store and list its files (unchanged)
21
- # --------------------------------------------------------------------------
22
31
 
23
32
  async def get_vector_store_object(
24
- vector_store_id: str,
25
- tool_config: Optional[List[Dict]] = None
33
+ vector_store_id: str, tool_config: Optional[List[Dict]] = None
26
34
  ) -> Dict:
27
- openai_key = get_openai_access_token(tool_config)
28
- client_async = openai.AsyncOpenAI(api_key=openai_key)
29
- return await client_async.vector_stores.retrieve(vector_store_id=vector_store_id)
30
-
35
+ client_async = create_async_openai_client(tool_config)
36
+ try:
37
+ return await client_async.vector_stores.retrieve(vector_store_id=vector_store_id)
38
+ except OpenAIError as e:
39
+ logging.error(f"Error retrieving vector store {vector_store_id}: {e}")
40
+ return None
31
41
 
32
42
  async def list_vector_store_files(
33
- vector_store_id: str,
34
- tool_config: Optional[List[Dict]] = None
43
+ vector_store_id: str, tool_config: Optional[List[Dict]] = None
35
44
  ) -> List:
36
- openai_key = get_openai_access_token(tool_config)
37
- client_async = openai.AsyncOpenAI(api_key=openai_key)
45
+ client_async = create_async_openai_client(tool_config)
38
46
  page = await client_async.vector_stores.files.list(vector_store_id=vector_store_id)
39
47
  return page.data
40
48
 
41
49
 
42
- # --------------------------------------------------------------------------
43
- # get_structured_output_internal:
44
- # Updated to include search for first output with .type == 'message'
45
- # --------------------------------------------------------------------------
50
+ # ──────────────────────────────────────────────────────────────────────────────
51
+ # 3. Core logic – only the client initialisation lines changed
52
+ # ──────────────────────────────────────────────────────────────────────────────
46
53
 
47
54
  async def get_structured_output_internal(
48
55
  prompt: str,
49
56
  response_format: BaseModel,
50
57
  effort: str = "low",
51
- tool_config: Optional[List[Dict]] = None
58
+ use_web_search: bool = False,
59
+ model: str = "gpt-5.1-chat",
60
+ tool_config: Optional[List[Dict]] = None,
61
+ use_cache: bool = True
52
62
  ):
53
63
  """
54
- Makes a direct call to the new Responses API for structured output,
55
- bypassing file_search. No vector store usage, no chain-of-thought.
64
+ Makes a direct call to the new Responses API for structured output.
65
+
66
+ On a 429 (rate-limit) error the call is retried once after
67
+ 20 s + random exponential back-off.
56
68
  """
57
69
  try:
58
- # For caching
70
+ # ─── caching bookkeeping ────────────────────────────────────────────
59
71
  response_type_str = response_format.__name__
60
- message_hash = hashlib.md5(prompt.encode('utf-8')).hexdigest()
61
- response_type_hash = hashlib.md5(response_type_str.encode('utf-8')).hexdigest()
72
+ message_hash = hashlib.md5(prompt.encode("utf-8")).hexdigest()
73
+ response_type_hash = hashlib.md5(response_type_str.encode("utf-8")).hexdigest()
62
74
  cache_key = f"{message_hash}:{response_type_hash}"
63
- cached_response = cache_output_tools.retrieve_output(
64
- "get_structured_output_internal",
65
- cache_key
66
- )
67
- if cached_response is not None:
68
- parsed_cached_response = response_format.parse_raw(cached_response)
69
- return parsed_cached_response, "SUCCESS"
70
75
 
71
- schema = response_format.model_json_schema()
72
- schema["additionalProperties"] = False
73
- schema["title"] = response_type_str
76
+ if use_cache:
77
+ cached_response = cache_output_tools.retrieve_output(
78
+ "get_structured_output_internal", cache_key
79
+ )
80
+ if cached_response is not None:
81
+ parsed_cached_response = response_format.parse_raw(cached_response)
82
+ return parsed_cached_response, "SUCCESS"
74
83
 
84
+ # ─── JSON schema for function calling ───────────────────────────────
85
+ schema = type_to_response_format_param(response_format)
75
86
  json_schema_format = {
76
87
  "name": response_type_str,
77
88
  "type": "json_schema",
78
- "schema": schema
89
+ "schema": schema["json_schema"]["schema"],
79
90
  }
80
91
 
81
- openai_key = get_openai_access_token(tool_config)
82
- client_async = openai.AsyncOpenAI(api_key=openai_key)
92
+ # ─── client initialisation (NEW) ────────────────────────────────────
93
+ client_async = create_async_openai_client(tool_config)
83
94
 
84
- # Make the new "Responses" API call
85
- completion = await client_async.responses.create(
86
- input=[
87
- {"role": "system", "content": "You are a helpful AI. Output JSON only."},
88
- {"role": "user", "content": prompt}
89
- ],
90
- model="o3-mini",
91
- reasoning={"effort": effort},
92
- text={"format": json_schema_format},
93
- store=False,
94
- )
95
+ openai_cfg = _extract_config(tool_config, "openai")
96
+ # TODO: Azure OpenAI does not support web_search yet
97
+ if not openai_cfg:
98
+ use_web_search = False
95
99
 
96
- if completion.output and len(completion.output) > 0:
100
+ # -------------------------------------------------------------------
101
+ # Internal helper to perform ONE attempt
102
+ # -------------------------------------------------------------------
103
+ async def _make_request():
104
+ if use_web_search and model.startswith("gpt-"):
105
+ return await client_async.responses.create(
106
+ input=[
107
+ {"role": "system", "content": "You are a helpful AI. Output JSON only."},
108
+ {"role": "user", "content": prompt},
109
+ ],
110
+ model=model,
111
+ text={"format": json_schema_format},
112
+ tool_choice="required",
113
+ tools=[{"type": "web_search_preview"}],
114
+ store=False,
115
+ )
116
+ if model.startswith("o"): # reasoning param only for "o" family
117
+ return await client_async.responses.create(
118
+ input=[
119
+ {"role": "system", "content": "You are a helpful AI. Output JSON only."},
120
+ {"role": "user", "content": prompt},
121
+ ],
122
+ model=model,
123
+ reasoning={"effort": effort},
124
+ text={"format": json_schema_format},
125
+ store=False,
126
+ )
127
+ return await client_async.responses.create(
128
+ input=[
129
+ {"role": "system", "content": "You are a helpful AI. Output JSON only."},
130
+ {"role": "user", "content": prompt},
131
+ ],
132
+ model=model,
133
+ text={"format": json_schema_format},
134
+ store=False,
135
+ )
136
+
137
+ # -------------------------------------------------------------------
138
+ # Call with one retry on 429
139
+ # -------------------------------------------------------------------
140
+ max_retries = 1
141
+ attempt = 0
142
+ while True:
143
+ try:
144
+ completion = await _make_request()
145
+ break # success → exit loop
146
+ except (RateLimitError, OpenAIError) as e:
147
+ # Detect 429 / rate-limit
148
+ is_rl = (
149
+ isinstance(e, RateLimitError)
150
+ or getattr(e, "status_code", None) == 429
151
+ or "rate_limit" in str(e).lower()
152
+ )
153
+ if is_rl and attempt < max_retries:
154
+ attempt += 1
155
+ # 20 s base + exponential jitter
156
+ wait_time = 20 + random.uniform(0, 2 ** attempt)
157
+ logging.warning(
158
+ f"Rate-limit hit (429). Waiting {wait_time:.2f}s then retrying "
159
+ f"({attempt}/{max_retries})."
160
+ )
161
+ await asyncio.sleep(wait_time)
162
+ continue # retry once
163
+ logging.error(f"OpenAI API error: {e}")
164
+ raise HTTPException(status_code=502, detail="Error communicating with the OpenAI API.")
165
+
166
+ # ─── handle model output (unchanged) ────────────────────────────────
167
+ if completion and completion.output and len(completion.output) > 0:
97
168
  raw_text = None
98
- # Find the first output whose type is 'message'
99
169
  for out in completion.output:
100
- if out.type == 'message' and out.content and len(out.content) > 0:
101
- raw_text = out.content[0].text
102
- break
170
+ if out.type == "message" and out.content:
171
+ for content_item in out.content:
172
+ if hasattr(content_item, "text"):
173
+ raw_text = content_item.text
174
+ break
175
+ else:
176
+ logging.warning("request refused: %s", str(content_item))
177
+ return "Request refused.", "FAIL"
178
+ if raw_text:
179
+ break
103
180
 
104
181
  if not raw_text or not raw_text.strip():
105
- return "No text returned", "FAIL"
182
+ return "No text returned (possibly refusal or empty response)", "FAIL"
106
183
 
107
184
  try:
108
185
  parsed_obj = response_format.parse_raw(raw_text)
109
- # Cache the successful result
110
186
  cache_output_tools.cache_output(
111
- "get_structured_output_internal",
112
- cache_key,
113
- parsed_obj.json()
187
+ "get_structured_output_internal", cache_key, parsed_obj.json()
114
188
  )
115
189
  return parsed_obj, "SUCCESS"
116
- except json.JSONDecodeError:
190
+
191
+ except Exception:
117
192
  logging.warning("ERROR: Could not parse JSON from model output.")
118
- return raw_text, "FAIL"
193
+ try:
194
+ fixed_json = repair_json(raw_text)
195
+ parsed_obj = response_format.parse_raw(fixed_json)
196
+ cache_output_tools.cache_output(
197
+ "get_structured_output_internal", cache_key, parsed_obj.json()
198
+ )
199
+ return parsed_obj, "SUCCESS"
200
+ except Exception as e2:
201
+ logging.warning("JSON repair failed: %s", str(e2))
202
+ return raw_text, "FAIL"
119
203
  else:
120
204
  return "No output returned", "FAIL"
121
205
 
122
- except openai.OpenAIError as e:
206
+ except OpenAIError as e:
123
207
  logging.error(f"OpenAI API error: {e}")
124
- raise HTTPException(
125
- status_code=502,
126
- detail="Error communicating with the OpenAI API."
127
- )
208
+ raise HTTPException(status_code=502, detail="Error communicating with the OpenAI API.")
128
209
  except Exception as e:
129
210
  logging.error(f"Unexpected error: {e}")
130
- raise HTTPException(
131
- status_code=500,
132
- detail="An unexpected error occurred while processing your request."
133
- )
211
+ raise HTTPException(status_code=500, detail="Unexpected server error.")
134
212
 
135
213
 
136
- # --------------------------------------------------------------------------
137
- # get_structured_output_with_assistant_and_vector_store:
138
- # Similarly updated to provide search for first output .type == 'message'
139
- # --------------------------------------------------------------------------
214
+
215
+ async def get_structured_output_with_mcp(
216
+ prompt: str,
217
+ response_format: BaseModel,
218
+ effort: str = "low",
219
+ use_web_search: bool = False,
220
+ model: str = "gpt-5.1-chat",
221
+ tool_config: Optional[List[Dict[str, Any]]] = None,
222
+ ) -> Tuple[Union[BaseModel, str], str]:
223
+ """
224
+ Sends a JSON-schema-constrained prompt to an OpenAI model, with an MCP
225
+ server configured as a `tool`.
226
+
227
+ * If the model returns a tool call that *requires approval*, the function
228
+ immediately returns a minimal object that satisfies `response_format`
229
+ with `"APPROVAL_PENDING"` in `response_summary`, along with the status
230
+ string ``"PENDING_APPROVAL"``.
231
+ * Once the tool has executed (the provider returns `mcp_tool_result`) or
232
+ the model replies directly with the JSON payload, the parsed object is
233
+ cached and returned with status ``"SUCCESS"``.
234
+ * Any MCP tool-listing messages are ignored.
235
+ """
236
+ # ─── Validate MCP configuration ────────────────────────────────────────────
237
+ mcp_cfg = _extract_config(tool_config, "mcpServer") or {}
238
+ server_label: str = mcp_cfg.get("serverLabel", "")
239
+ server_url: str | None = mcp_cfg.get("serverUrl")
240
+ api_key_header_name: str | None = mcp_cfg.get("apiKeyHeaderName")
241
+ api_key_header_value: str | None = mcp_cfg.get("apiKeyHeaderValue")
242
+
243
+ if not (server_url and api_key_header_name and api_key_header_value):
244
+ raise HTTPException(400, detail="MCP server configuration incomplete.")
245
+
246
+ # ─── Cache key (prompt + schema) ──────────────────────────────────────────
247
+ response_type_str = response_format.__name__
248
+ cache_key = (
249
+ f"{hashlib.md5(prompt.encode()).hexdigest()}:"
250
+ f"{hashlib.md5(response_type_str.encode()).hexdigest()}"
251
+ )
252
+ if (cached := cache_output_tools.retrieve_output("get_structured_output_with_mcp", cache_key)):
253
+ return response_format.parse_raw(cached), "SUCCESS"
254
+
255
+ # ─── JSON-schema format for `text` param ──────────────────────────────────
256
+ schema_cfg = type_to_response_format_param(response_format)
257
+ json_schema_format = {
258
+ "name": response_type_str,
259
+ "type": "json_schema",
260
+ "schema": schema_cfg["json_schema"]["schema"],
261
+ }
262
+
263
+ # ─── Build tool list ──────────────────────────────────────────────────────
264
+ tools: List[Dict[str, Any]] = [
265
+ {
266
+ "type": "mcp",
267
+ "server_label": server_label,
268
+ "server_url": server_url,
269
+ "headers": {api_key_header_name: api_key_header_value},
270
+ "require_approval": "never"
271
+ }
272
+ ]
273
+ if use_web_search and model.startswith("gpt-"):
274
+ tools.append({"type": "web_search_preview"})
275
+
276
+ # ─── Async OpenAI client ──────────────────────────────────────────────────
277
+ client_async = create_async_openai_client(tool_config)
278
+
279
+ async def _make_request():
280
+ kwargs: Dict[str, Any] = {
281
+ "input": [
282
+ {"role": "system", "content": "You are a helpful AI. Output JSON only."},
283
+ {"role": "user", "content": prompt},
284
+ ],
285
+ "model": model,
286
+ "text": {"format": json_schema_format},
287
+ "store": False,
288
+ "tools": tools,
289
+ "tool_choice": "required",
290
+ }
291
+ if model.startswith("o"):
292
+ kwargs["reasoning"] = {"effort": effort}
293
+ return await client_async.responses.create(**kwargs)
294
+
295
+ # ─── Retry once for 429s ──────────────────────────────────────────────────
296
+ for attempt in range(2):
297
+ try:
298
+ completion = await _make_request()
299
+ break
300
+ except (RateLimitError, OpenAIError) as exc:
301
+ if attempt == 0 and (
302
+ isinstance(exc, RateLimitError)
303
+ or getattr(exc, "status_code", None) == 429
304
+ or "rate_limit" in str(exc).lower()
305
+ ):
306
+ sleep_for = 20 + random.uniform(0, 2.0)
307
+ logging.warning("429 rate-limit hit; retrying in %.1fs", sleep_for)
308
+ await asyncio.sleep(sleep_for)
309
+ continue
310
+ logging.error("OpenAI API error: %s", exc)
311
+ raise HTTPException(502, detail="Error communicating with the OpenAI API.") from exc
312
+ else: # pragma: no cover
313
+ raise HTTPException(502, detail="OpenAI request retry loop failed.")
314
+
315
+ # ─── Parse the model’s structured output ──────────────────────────────────
316
+ if not (completion and completion.output):
317
+ return "No output returned", "FAIL"
318
+
319
+ raw_text: str | None = None
320
+ status: str = "SUCCESS"
321
+
322
+ for out in completion.output:
323
+ # 1️⃣ Human approval required
324
+ if out.type == "mcp_approval_request":
325
+ logging.info("Tool call '%s' awaiting approval", out.name)
326
+ placeholder_obj = response_format.parse_obj({"response_summary": "APPROVAL_PENDING"})
327
+ return placeholder_obj, "PENDING_APPROVAL"
328
+
329
+ # 2️⃣ Ignore capability listings
330
+ if out.type == "mcp_list_tools":
331
+ continue
332
+
333
+ # 3️⃣ Tool finished: provider returned result object
334
+ if out.type == "mcp_tool_result":
335
+ try:
336
+ # If result already matches schema, emit directly
337
+ raw_text = (
338
+ json.dumps(out.result)
339
+ if isinstance(out.result, (dict, list))
340
+ else json.dumps({"response_summary": str(out.result)})
341
+ )
342
+ except Exception: # pragma: no cover
343
+ raw_text = json.dumps({"response_summary": "TOOL_EXECUTION_COMPLETE"})
344
+ break
345
+
346
+ # 4️⃣ Regular assistant message
347
+ if out.type == "message" and out.content:
348
+ for c in out.content:
349
+ if hasattr(c, "text") and c.text:
350
+ raw_text = c.text
351
+ break
352
+ if raw_text:
353
+ break
354
+
355
+ # 5️⃣ Anything else
356
+ logging.debug("Unhandled output type: %s", out.type)
357
+
358
+ if not raw_text or not raw_text.strip():
359
+ return "No response", status
360
+
361
+ # ─── Convert JSON -> pydantic object, with repair fallback ────────────────
362
+ try:
363
+ parsed_obj = response_format.parse_raw(raw_text)
364
+ except Exception:
365
+ logging.warning("Initial parse failed; attempting JSON repair")
366
+ parsed_obj = response_format.parse_raw(repair_json(raw_text))
367
+
368
+ # ─── Cache & return ───────────────────────────────────────────────────────
369
+ cache_output_tools.cache_output(
370
+ "get_structured_output_with_mcp", cache_key, parsed_obj.json()
371
+ )
372
+ return parsed_obj, status
140
373
 
141
374
  async def get_structured_output_with_assistant_and_vector_store(
142
375
  prompt: str,
143
376
  response_format: BaseModel,
144
377
  vector_store_id: str,
145
378
  effort: str = "low",
146
- tool_config: Optional[List[Dict]] = None
379
+ model="gpt-5.1-chat",
380
+ tool_config: Optional[List[Dict]] = None,
381
+ use_cache: bool = True
147
382
  ):
148
383
  """
149
- If the vector store has NO files, call get_structured_output_internal directly.
150
- Otherwise, do a single call to the new Responses API with a 'file_search' tool
151
- to incorporate vector-store knowledge.
384
+ Same logic, now uses create_async_openai_client().
152
385
  """
153
386
  try:
154
- # 1. Ensure vector store exists
155
- await get_vector_store_object(vector_store_id, tool_config)
156
-
157
- # 2. Check if the vector store contains any files
387
+ vector_store = await get_vector_store_object(vector_store_id, tool_config)
388
+ if not vector_store:
389
+ return await get_structured_output_internal(
390
+ prompt, response_format, tool_config=tool_config
391
+ )
392
+
158
393
  files = await list_vector_store_files(vector_store_id, tool_config)
159
394
  if not files:
160
- # No files => just do the internal structured approach
161
- return await get_structured_output_internal(prompt, response_format, tool_config=tool_config)
395
+ return await get_structured_output_internal(
396
+ prompt, response_format, tool_config=tool_config
397
+ )
162
398
 
163
- # 3. If files exist => do a single "Responses" call with file_search
164
399
  response_type_str = response_format.__name__
165
- message_hash = hashlib.md5(prompt.encode('utf-8')).hexdigest()
166
- response_type_hash = hashlib.md5(response_type_str.encode('utf-8')).hexdigest()
400
+ message_hash = hashlib.md5(prompt.encode("utf-8")).hexdigest()
401
+ response_type_hash = hashlib.md5(response_type_str.encode("utf-8")).hexdigest()
167
402
  cache_key = f"{message_hash}:{response_type_hash}"
168
- cached_response = cache_output_tools.retrieve_output(
169
- "get_structured_output_with_assistant_and_vector_store",
170
- cache_key
171
- )
172
- if cached_response is not None:
173
- parsed_cached_response = response_format.parse_raw(cached_response)
174
- return parsed_cached_response, "SUCCESS"
175
403
 
176
- schema = response_format.model_json_schema()
177
- schema["additionalProperties"] = False
178
- schema["title"] = response_type_str
404
+ if use_cache:
405
+ cached_response = cache_output_tools.retrieve_output(
406
+ "get_structured_output_with_assistant_and_vector_store", cache_key
407
+ )
408
+ if cached_response is not None:
409
+ parsed_cached_response = response_format.model_validate_json(cached_response)
410
+ return parsed_cached_response, "SUCCESS"
179
411
 
412
+ schema = type_to_response_format_param(response_format)
180
413
  json_schema_format = {
181
414
  "name": response_type_str,
182
415
  "type": "json_schema",
183
- "schema": schema
416
+ "schema": schema["json_schema"]["schema"],
184
417
  }
185
418
 
186
- openai_key = get_openai_access_token(tool_config)
187
- client_async = openai.AsyncOpenAI(api_key=openai_key)
419
+ client_async = create_async_openai_client(tool_config)
188
420
 
189
- # Single call to the new Responses API
190
421
  completion = await client_async.responses.create(
191
422
  input=[
192
423
  {"role": "system", "content": "You are a helpful AI. Output JSON only."},
193
- {"role": "user", "content": prompt}
424
+ {"role": "user", "content": prompt},
194
425
  ],
195
- model="o3-mini",
426
+ model=model,
196
427
  text={"format": json_schema_format},
197
- tools=[{
198
- "type": "file_search",
199
- "vector_store_ids": [vector_store_id],
200
- }],
201
- reasoning={"effort": effort},
202
- store=False
428
+ tools=[{"type": "file_search", "vector_store_ids": [vector_store_id]}],
429
+ tool_choice="required",
430
+ store=False,
203
431
  )
204
432
 
205
- if completion.output and len(completion.output) > 0:
433
+ if completion and completion.output and len(completion.output) > 0:
206
434
  raw_text = None
207
- # Find the first output whose type is 'message'
208
435
  for out in completion.output:
209
- if out.type == 'message' and out.content and len(out.content) > 0:
436
+ if out.type == "message" and out.content and len(out.content) > 0:
210
437
  raw_text = out.content[0].text
211
438
  break
212
439
 
213
440
  if not raw_text or not raw_text.strip():
214
- logging.error("No response text from the model.")
215
441
  raise HTTPException(status_code=502, detail="No response from the model.")
216
442
 
217
443
  try:
@@ -219,24 +445,18 @@ async def get_structured_output_with_assistant_and_vector_store(
219
445
  cache_output_tools.cache_output(
220
446
  "get_structured_output_with_assistant_and_vector_store",
221
447
  cache_key,
222
- parsed_obj.json()
448
+ parsed_obj.json(),
223
449
  )
224
450
  return parsed_obj, "SUCCESS"
225
- except json.JSONDecodeError:
226
- logging.warning("ERROR: Model returned invalid JSON.")
451
+ except Exception:
452
+ logging.warning("Model returned invalid JSON.")
227
453
  return raw_text, "FAIL"
228
454
  else:
229
455
  return "No output returned", "FAIL"
230
456
 
231
- except openai.OpenAIError as e:
457
+ except OpenAIError as e:
232
458
  logging.error(f"OpenAI API error: {e}")
233
- raise HTTPException(
234
- status_code=502,
235
- detail="Error communicating with the OpenAI API."
236
- )
459
+ raise HTTPException(status_code=502, detail="Error communicating with the OpenAI API.")
237
460
  except Exception as e:
238
461
  logging.error(f"Unexpected error: {e}")
239
- raise HTTPException(
240
- status_code=500,
241
- detail="An unexpected error occurred while processing your request."
242
- )
462
+ raise HTTPException(status_code=500, detail="Unexpected server error.")