dhisana 0.0.1.dev266__py3-none-any.whl → 0.0.1.dev268__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,6 +19,126 @@ from dhisana.utils.fetch_openai_config import (
19
19
  create_async_openai_client,
20
20
  )
21
21
 
22
+ # Import search and scrape utilities for web search tools
23
+ try:
24
+ from dhisana.utils.search_router import search_google_with_tools
25
+ except Exception:
26
+ async def search_google_with_tools(*a, **k):
27
+ return []
28
+
29
+ try:
30
+ from dhisana.utils.web_download_parse_tools import get_text_content_from_url
31
+ except Exception:
32
+ async def get_text_content_from_url(url: str) -> str:
33
+ return ""
34
+
35
+
36
+ # ──────────────────────────────────────────────────────────────────────────────
37
+ # Web search tool definitions for the Responses API
38
+ # ──────────────────────────────────────────────────────────────────────────────
39
+
40
+ SEARCH_GOOGLE_TOOL = {
41
+ "type": "function",
42
+ "name": "search_google",
43
+ "description": "Search Google for information. Returns a list of search results with titles, links, and snippets.",
44
+ "parameters": {
45
+ "type": "object",
46
+ "properties": {
47
+ "query": {
48
+ "type": "string",
49
+ "description": "The search query to look up on Google"
50
+ },
51
+ "num_results": {
52
+ "type": "integer",
53
+ "description": "Number of results to return (default: 5, max: 10)"
54
+ }
55
+ },
56
+ "required": ["query"],
57
+ "additionalProperties": False
58
+ }
59
+ }
60
+
61
+ FETCH_URL_CONTENT_TOOL = {
62
+ "type": "function",
63
+ "name": "fetch_url_content",
64
+ "description": "Fetch and extract text content from a URL. Use this to read the full content of a webpage.",
65
+ "parameters": {
66
+ "type": "object",
67
+ "properties": {
68
+ "url": {
69
+ "type": "string",
70
+ "description": "The URL to fetch content from"
71
+ }
72
+ },
73
+ "required": ["url"],
74
+ "additionalProperties": False
75
+ }
76
+ }
77
+
78
+
79
+ async def _execute_search_google(
80
+ query: str, num_results: int, tool_config: Optional[List[Dict]]
81
+ ) -> str:
82
+ """Execute Google search and return results as JSON string."""
83
+ try:
84
+ num_results = min(max(num_results, 1), 10)
85
+ raw = await search_google_with_tools(
86
+ query, number_of_results=num_results, offset=0, tool_config=tool_config
87
+ )
88
+ results = []
89
+ if isinstance(raw, list):
90
+ for item in raw:
91
+ try:
92
+ data = json.loads(item) if isinstance(item, str) else item
93
+ results.append({
94
+ "title": data.get("title", ""),
95
+ "link": data.get("link", ""),
96
+ "snippet": data.get("snippet", "")
97
+ })
98
+ except Exception:
99
+ continue
100
+ return json.dumps(results, default=str)
101
+ except Exception as e:
102
+ logging.warning("search_google tool failed: %s", e)
103
+ return json.dumps({"error": str(e)})
104
+
105
+
106
+ async def _execute_fetch_url_content(url: str) -> str:
107
+ """Fetch URL content and return as string."""
108
+ try:
109
+ content = await get_text_content_from_url(url)
110
+ if content:
111
+ max_len = 15000
112
+ if len(content) > max_len:
113
+ content = content[:max_len] + "\n... [content truncated]"
114
+ return content
115
+ return "Failed to fetch content from URL"
116
+ except Exception as e:
117
+ logging.warning("fetch_url_content tool failed for %s: %s", url, e)
118
+ return f"Error fetching URL: {str(e)}"
119
+
120
+
121
+ async def _execute_web_search_tool(
122
+ tool_name: str, args: dict, tool_config: Optional[List[Dict]]
123
+ ) -> str:
124
+ """Execute a web search tool and return the result as a string."""
125
+ if tool_name == "search_google":
126
+ query = args.get("query", "")
127
+ num_results = args.get("num_results", 5)
128
+ if not query:
129
+ return json.dumps({"error": "Missing required parameter: query"})
130
+ return await _execute_search_google(query, num_results, tool_config)
131
+
132
+ elif tool_name == "fetch_url_content":
133
+ url = args.get("url", "")
134
+ if not url:
135
+ return json.dumps({"error": "Missing required parameter: url"})
136
+ return await _execute_fetch_url_content(url)
137
+
138
+ else:
139
+ logging.warning(f"Unknown tool requested: {tool_name}")
140
+ return json.dumps({"error": f"Unknown tool: {tool_name}"})
141
+
22
142
 
23
143
  # ──────────────────────────────────────────────────────────────────────────────
24
144
  # 1. Helper functions
@@ -71,6 +191,9 @@ async def get_structured_output_internal(
71
191
 
72
192
  On a 429 (rate-limit) error the call is retried once after
73
193
  20 s + random exponential back-off.
194
+
195
+ If use_web_search=True, uses Google search and URL scraping tools
196
+ to enable web research (works with both OpenAI and Azure OpenAI).
74
197
  """
75
198
  try:
76
199
  # ─── caching bookkeeping ────────────────────────────────────────────
@@ -95,30 +218,24 @@ async def get_structured_output_internal(
95
218
  "schema": schema["json_schema"]["schema"],
96
219
  }
97
220
 
98
- # ─── client initialisation (NEW) ────────────────────────────────────
221
+ # ─── client initialisation ──────────────────────────────────────────
99
222
  client_async = create_async_openai_client(tool_config)
100
223
 
101
- openai_cfg = _extract_config(tool_config, "openai")
102
- # TODO: Azure OpenAI does not support web_search yet
103
- if not openai_cfg:
104
- use_web_search = False
224
+ # ─── Web search path (uses Google search + URL scraping tools) ──────
225
+ if use_web_search:
226
+ return await _get_structured_output_with_web_search(
227
+ client_async=client_async,
228
+ prompt=prompt,
229
+ response_format=response_format,
230
+ json_schema_format=json_schema_format,
231
+ model=model,
232
+ effort=effort,
233
+ tool_config=tool_config,
234
+ cache_key=cache_key,
235
+ )
105
236
 
106
- # -------------------------------------------------------------------
107
- # Internal helper to perform ONE attempt
108
- # -------------------------------------------------------------------
237
+ # ─── Standard path (no web search) ──────────────────────────────────
109
238
  async def _make_request():
110
- if use_web_search and model.startswith("gpt-"):
111
- return await client_async.responses.create(
112
- input=[
113
- {"role": "system", "content": "You are a helpful AI. Output JSON only."},
114
- {"role": "user", "content": prompt},
115
- ],
116
- model=model,
117
- text={"format": json_schema_format},
118
- tool_choice="required",
119
- tools=[{"type": "web_search_preview"}],
120
- store=False,
121
- )
122
239
  if model.startswith("o"): # reasoning param only for "o" family
123
240
  return await client_async.responses.create(
124
241
  input=[
@@ -175,45 +292,8 @@ async def get_structured_output_internal(
175
292
  logging.error(f"OpenAI API error: {e}")
176
293
  return f"OpenAI API error: {str(e)}", "API_ERROR"
177
294
 
178
- # ─── handle model output (unchanged) ────────────────────────────────
179
- if completion and completion.output and len(completion.output) > 0:
180
- raw_text = None
181
- for out in completion.output:
182
- if out.type == "message" and out.content:
183
- for content_item in out.content:
184
- if hasattr(content_item, "text"):
185
- raw_text = content_item.text
186
- break
187
- else:
188
- logging.warning("request refused: %s", str(content_item))
189
- return "Request refused.", "FAIL"
190
- if raw_text:
191
- break
192
-
193
- if not raw_text or not raw_text.strip():
194
- return "No text returned (possibly refusal or empty response)", "FAIL"
195
-
196
- try:
197
- parsed_obj = response_format.parse_raw(raw_text)
198
- cache_output_tools.cache_output(
199
- "get_structured_output_internal", cache_key, parsed_obj.json()
200
- )
201
- return parsed_obj, "SUCCESS"
202
-
203
- except Exception:
204
- logging.warning("ERROR: Could not parse JSON from model output.")
205
- try:
206
- fixed_json = repair_json(raw_text)
207
- parsed_obj = response_format.parse_raw(fixed_json)
208
- cache_output_tools.cache_output(
209
- "get_structured_output_internal", cache_key, parsed_obj.json()
210
- )
211
- return parsed_obj, "SUCCESS"
212
- except Exception as e2:
213
- logging.warning("JSON repair failed: %s", str(e2))
214
- return raw_text, "FAIL"
215
- else:
216
- return "No output returned", "FAIL"
295
+ # ─── handle model output ────────────────────────────────────────────
296
+ return _parse_completion_response(completion, response_format, cache_key)
217
297
 
218
298
  # Safety fallback: catch any OpenAI errors not caught by inner retry loop
219
299
  except OpenAIError as e:
@@ -226,6 +306,168 @@ async def get_structured_output_internal(
226
306
  return f"Unexpected error: {str(e)}", "ERROR"
227
307
 
228
308
 
309
+ async def _get_structured_output_with_web_search(
310
+ client_async,
311
+ prompt: str,
312
+ response_format: BaseModel,
313
+ json_schema_format: Dict,
314
+ model: str,
315
+ effort: str,
316
+ tool_config: Optional[List[Dict]],
317
+ cache_key: str,
318
+ ):
319
+ """
320
+ Handles structured output with web search using Google search and URL scraping tools.
321
+ Works with both OpenAI and Azure OpenAI.
322
+ """
323
+ tools = [SEARCH_GOOGLE_TOOL, FETCH_URL_CONTENT_TOOL]
324
+
325
+ system_content = (
326
+ "You are a helpful AI. Output JSON only.\n\n"
327
+ "Web Search Instructions:\n"
328
+ "- Use search_google to find relevant information on the web.\n"
329
+ "- Use fetch_url_content to read the full content of relevant URLs.\n"
330
+ "- After gathering information, provide your response in the required JSON format."
331
+ )
332
+
333
+ # Build conversation history that we'll extend with tool calls/results
334
+ conversation_history = [
335
+ {"role": "system", "content": system_content},
336
+ {"role": "user", "content": prompt},
337
+ ]
338
+
339
+ max_tool_iterations = 10
340
+ tool_iteration = 0
341
+ completion = None
342
+
343
+ while tool_iteration < max_tool_iterations:
344
+ tool_iteration += 1
345
+
346
+ # Build request with current conversation history
347
+ request = {
348
+ "input": conversation_history,
349
+ "model": model,
350
+ "text": {"format": json_schema_format},
351
+ "tools": tools,
352
+ "store": False,
353
+ }
354
+
355
+ if model.startswith("o"):
356
+ request["reasoning"] = {"effort": effort}
357
+
358
+ # Retry logic for rate limits
359
+ for attempt in range(2):
360
+ try:
361
+ completion = await client_async.responses.create(**request)
362
+ break
363
+ except (RateLimitError, OpenAIError) as e:
364
+ is_rl = (
365
+ isinstance(e, RateLimitError)
366
+ or getattr(e, "status_code", None) == 429
367
+ or "rate_limit" in str(e).lower()
368
+ )
369
+ if attempt == 0 and is_rl:
370
+ wait_time = 20 + random.uniform(0, 2.0)
371
+ logging.warning(f"Rate-limit hit (429). Waiting {wait_time:.2f}s then retrying.")
372
+ await asyncio.sleep(wait_time)
373
+ continue
374
+ logging.error(f"OpenAI API error: {e}")
375
+ raise HTTPException(status_code=502, detail="Error communicating with the OpenAI API.")
376
+
377
+ if not completion:
378
+ raise HTTPException(status_code=502, detail="OpenAI request failed.")
379
+
380
+ # Check for function tool calls in the response
381
+ tool_calls = []
382
+ for item in (completion.output or []):
383
+ item_type = getattr(item, "type", None)
384
+ if item_type == "function_call":
385
+ tool_calls.append(item)
386
+
387
+ if not tool_calls:
388
+ # No tool calls, we have the final response
389
+ break
390
+
391
+ # Execute tool calls and add to conversation history
392
+ logging.info(f"Processing {len(tool_calls)} web search tool call(s) in iteration {tool_iteration}")
393
+
394
+ for tc in tool_calls:
395
+ func_name = getattr(tc, "name", "")
396
+ call_id = getattr(tc, "call_id", "")
397
+ args_str = getattr(tc, "arguments", "{}")
398
+
399
+ try:
400
+ args = json.loads(args_str) if args_str else {}
401
+ except json.JSONDecodeError:
402
+ args = {}
403
+
404
+ # Add the tool call to conversation history
405
+ conversation_history.append({
406
+ "type": "function_call",
407
+ "id": call_id,
408
+ "call_id": call_id,
409
+ "name": func_name,
410
+ "arguments": args_str,
411
+ })
412
+
413
+ # Execute the tool
414
+ tool_result = await _execute_web_search_tool(func_name, args, tool_config)
415
+
416
+ # Add tool result to conversation history
417
+ conversation_history.append({
418
+ "type": "function_call_output",
419
+ "call_id": call_id,
420
+ "output": tool_result,
421
+ })
422
+
423
+ logging.info(f"Executed web search tool {func_name}, result length: {len(tool_result)}")
424
+
425
+ # Parse and return the final response
426
+ return _parse_completion_response(completion, response_format, cache_key)
427
+
428
+
429
+ def _parse_completion_response(completion, response_format: BaseModel, cache_key: str):
430
+ """Parse completion response and return structured output."""
431
+ if completion and completion.output and len(completion.output) > 0:
432
+ raw_text = None
433
+ for out in completion.output:
434
+ if out.type == "message" and out.content:
435
+ for content_item in out.content:
436
+ if hasattr(content_item, "text"):
437
+ raw_text = content_item.text
438
+ break
439
+ else:
440
+ logging.warning("request refused: %s", str(content_item))
441
+ return "Request refused.", "FAIL"
442
+ if raw_text:
443
+ break
444
+
445
+ if not raw_text or not raw_text.strip():
446
+ return "No text returned (possibly refusal or empty response)", "FAIL"
447
+
448
+ try:
449
+ parsed_obj = response_format.parse_raw(raw_text)
450
+ cache_output_tools.cache_output(
451
+ "get_structured_output_internal", cache_key, parsed_obj.json()
452
+ )
453
+ return parsed_obj, "SUCCESS"
454
+
455
+ except Exception:
456
+ logging.warning("ERROR: Could not parse JSON from model output.")
457
+ try:
458
+ fixed_json = repair_json(raw_text)
459
+ parsed_obj = response_format.parse_raw(fixed_json)
460
+ cache_output_tools.cache_output(
461
+ "get_structured_output_internal", cache_key, parsed_obj.json()
462
+ )
463
+ return parsed_obj, "SUCCESS"
464
+ except Exception as e2:
465
+ logging.warning("JSON repair failed: %s", str(e2))
466
+ return raw_text, "FAIL"
467
+ else:
468
+ return "No output returned", "FAIL"
469
+
470
+
229
471
 
230
472
  async def get_structured_output_with_mcp(
231
473
  prompt: str,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dhisana
3
- Version: 0.0.1.dev266
3
+ Version: 0.0.1.dev268
4
4
  Summary: A Python SDK for Dhisana AI Platform
5
5
  Home-page: https://github.com/dhisana-ai/dhisana-python-sdk
6
6
  Author: Admin
@@ -45,7 +45,7 @@ dhisana/utils/generate_flow.py,sha256=QMn6bWo0nH0fBvy2Ebub1XfH5udnVAqsPsbIqCtQPX
45
45
  dhisana/utils/generate_leads_salesnav.py,sha256=FG7q6GSm9IywZ9TgQnn5_N3QNfiI-Qk2gaO_3GS99nY,12236
46
46
  dhisana/utils/generate_linkedin_connect_message.py,sha256=QxsxDiT-3eQOqAAbW13d0HGJXV36WYPvC-7Zsw_2VTI,10208
47
47
  dhisana/utils/generate_linkedin_response_message.py,sha256=mWoSs5p2JSTIoFZFGm86x1kgs67J7dHPvGKZPzcdGdU,14569
48
- dhisana/utils/generate_structured_output_internal.py,sha256=k6w5zaaMigp7cUgFALr-TRBsoEQTlzyGfH4R8HIIfGU,22116
48
+ dhisana/utils/generate_structured_output_internal.py,sha256=g3g685JxOnxoObvY_ILSiJ38584QmZJ9WofDBGBcSJ8,31056
49
49
  dhisana/utils/google_custom_search.py,sha256=5rQ4uAF-hjFpd9ooJkd6CjRvSmhZHhqM0jfHItsbpzk,10071
50
50
  dhisana/utils/google_oauth_tools.py,sha256=ReG5lCpXL3_e_s0yn6ai4U7B4-feOWHJVtbv_c0g0rE,28525
51
51
  dhisana/utils/google_workspace_tools.py,sha256=fuV0UcvAqF9drLzj7-p6D5zh7d5jMXl1jNJTICk4XOo,50224
@@ -95,8 +95,8 @@ dhisana/workflow/agent.py,sha256=esv7_i_XuMkV2j1nz_UlsHov_m6X5WZZiZm_tG4OBHU,565
95
95
  dhisana/workflow/flow.py,sha256=xWE3qQbM7j2B3FH8XnY3zOL_QXX4LbTW4ArndnEYJE0,1638
96
96
  dhisana/workflow/task.py,sha256=HlWz9mtrwLYByoSnePOemBUBrMEcj7KbgNjEE1oF5wo,1830
97
97
  dhisana/workflow/test.py,sha256=E7lRnXK0PguTNzyasHytLzTJdkqIPxG5_4qk4hMEeKc,3399
98
- dhisana-0.0.1.dev266.dist-info/METADATA,sha256=KEVAlP8-K5O412xaBb3WeJWM6S1thO8dRYiEu3PQKvw,1190
99
- dhisana-0.0.1.dev266.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
100
- dhisana-0.0.1.dev266.dist-info/entry_points.txt,sha256=jujxteZmNI9EkEaK-pOCoWuBujU8TCevdkfl9ZcKHek,49
101
- dhisana-0.0.1.dev266.dist-info/top_level.txt,sha256=NETTHt6YifG_P7XtRHbQiXZlgSFk9Qh9aR-ng1XTf4s,8
102
- dhisana-0.0.1.dev266.dist-info/RECORD,,
98
+ dhisana-0.0.1.dev268.dist-info/METADATA,sha256=8qz6BDU9YaSqUcBCWrW30PB5Mc6io2BCalfA58uQA2s,1190
99
+ dhisana-0.0.1.dev268.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
100
+ dhisana-0.0.1.dev268.dist-info/entry_points.txt,sha256=jujxteZmNI9EkEaK-pOCoWuBujU8TCevdkfl9ZcKHek,49
101
+ dhisana-0.0.1.dev268.dist-info/top_level.txt,sha256=NETTHt6YifG_P7XtRHbQiXZlgSFk9Qh9aR-ng1XTf4s,8
102
+ dhisana-0.0.1.dev268.dist-info/RECORD,,