deepparallel 0.5.2__tar.gz → 0.5.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. {deepparallel-0.5.2 → deepparallel-0.5.3}/PKG-INFO +1 -1
  2. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/__init__.py +1 -1
  3. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/system_prompt.txt +4 -0
  4. deepparallel-0.5.3/deepparallel/tools/web.py +187 -0
  5. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel.egg-info/PKG-INFO +1 -1
  6. {deepparallel-0.5.2 → deepparallel-0.5.3}/pyproject.toml +1 -1
  7. deepparallel-0.5.3/tests/test_tools_web.py +140 -0
  8. deepparallel-0.5.2/deepparallel/tools/web.py +0 -82
  9. deepparallel-0.5.2/tests/test_tools_web.py +0 -97
  10. {deepparallel-0.5.2 → deepparallel-0.5.3}/README.md +0 -0
  11. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/agent.py +0 -0
  12. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/backend.py +0 -0
  13. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/branding.py +0 -0
  14. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/cli.py +0 -0
  15. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/config.py +0 -0
  16. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/crowe_id.py +0 -0
  17. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/dsml.py +0 -0
  18. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/fusion.py +0 -0
  19. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/licensing.py +0 -0
  20. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/registry.json +0 -0
  21. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/renderer.py +0 -0
  22. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/research/__init__.py +0 -0
  23. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/research/conduit.py +0 -0
  24. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/research/provider.py +0 -0
  25. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/routing.example.json +0 -0
  26. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/routing.py +0 -0
  27. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/serve.py +0 -0
  28. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/supply_chain.py +0 -0
  29. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/tools/__init__.py +0 -0
  30. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/tools/codeast.py +0 -0
  31. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/tools/edit.py +0 -0
  32. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/tools/files.py +0 -0
  33. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/tools/mcp.py +0 -0
  34. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/tools/registry.py +0 -0
  35. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/tools/sandbox.py +0 -0
  36. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/tools/search.py +0 -0
  37. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/tools/shell.py +0 -0
  38. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/tools/vision.py +0 -0
  39. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel/userinput.py +0 -0
  40. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel.egg-info/SOURCES.txt +0 -0
  41. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel.egg-info/dependency_links.txt +0 -0
  42. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel.egg-info/entry_points.txt +0 -0
  43. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel.egg-info/requires.txt +0 -0
  44. {deepparallel-0.5.2 → deepparallel-0.5.3}/deepparallel.egg-info/top_level.txt +0 -0
  45. {deepparallel-0.5.2 → deepparallel-0.5.3}/setup.cfg +0 -0
  46. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_agent.py +0 -0
  47. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_backend.py +0 -0
  48. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_backend_chat.py +0 -0
  49. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_backend_stream.py +0 -0
  50. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_branding.py +0 -0
  51. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_cli.py +0 -0
  52. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_config.py +0 -0
  53. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_crowe_backend.py +0 -0
  54. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_crowe_gateway_backend.py +0 -0
  55. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_crowe_id_auth.py +0 -0
  56. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_crowe_payment_required.py +0 -0
  57. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_dsml.py +0 -0
  58. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_fusion.py +0 -0
  59. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_issuer_signer.py +0 -0
  60. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_licensing.py +0 -0
  61. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_renderer.py +0 -0
  62. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_research.py +0 -0
  63. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_research_provider.py +0 -0
  64. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_routing.py +0 -0
  65. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_serve.py +0 -0
  66. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_spinner_color.py +0 -0
  67. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_supply_chain.py +0 -0
  68. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_tool_registry.py +0 -0
  69. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_tools_codeast.py +0 -0
  70. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_tools_edit.py +0 -0
  71. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_tools_files.py +0 -0
  72. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_tools_mcp.py +0 -0
  73. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_tools_sandbox.py +0 -0
  74. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_tools_search.py +0 -0
  75. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_tools_shell.py +0 -0
  76. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_tools_vision.py +0 -0
  77. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_userinput.py +0 -0
  78. {deepparallel-0.5.2 → deepparallel-0.5.3}/tests/test_userinput_paste.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepparallel
3
- Version: 0.5.2
3
+ Version: 0.5.3
4
4
  Summary: DeepParallel - a multi-model agentic coding CLI with cross-model Guardian review, served via Crowe Logic.
5
5
  Author-email: Michael Crowe <michael@crowelogic.com>
6
6
  License: Apache-2.0
@@ -1,3 +1,3 @@
1
1
  """DeepParallel CLI package."""
2
2
 
3
- __version__ = "0.5.2"
3
+ __version__ = "0.5.3"
@@ -12,3 +12,7 @@ Engineering discipline — how you build:
12
12
  - Treat warnings as signal, not noise. A Guardian "risky/bug" verdict, a supply-chain flag, a parser or valence error, or a low similarity-to-reference score is evidence that something is wrong. Investigate and fix the cause; never rationalize it away or approve through it.
13
13
  - Validate against known-good references. When generating structured artifacts (molecules, schemas, configs, queries), check a sample against a known-correct example before trusting the whole batch. If your output does not resemble the references you expect, the generator is wrong, not the references.
14
14
  - Label honestly. Never emit an output whose name, ID, or label does not match what it actually is. If you cannot represent something correctly, say so rather than silently substituting a near-miss.
15
+
16
+ Where to write files: write into the current working directory using relative paths (e.g. `analysis/report.md`), or a path the user explicitly gave you. Do NOT invent absolute system paths like `/home/user/...`; the run_code sandbox is a SEPARATE environment from where write_file and edit_file act, and a host path like `/home/user` may not exist (writes there fail). Create any folders you need under the working directory.
17
+
18
+ Grounding is mandatory, not optional. Before writing substantive domain content (a protocol, analysis, dataset, literature claim, or any statement of fact a reader would trust), you MUST ground it first: web_search (works with no API key), web_fetch on a specific source, mcp_search to find a domain data server (try a single keyword like "pubmed", "clinicaltrials", "pubchem"), or read_file/grep for local truth. If one grounding tool errors, try another route before proceeding — never conclude "search is down, I'll rely on my knowledge" and generate from memory. When you genuinely cannot ground a claim, state that explicitly and label it unverified rather than presenting recall as established fact. Cite the sources you used.
@@ -0,0 +1,187 @@
1
+ """Web tools: fetch a page's text, and search.
2
+
3
+ web_search needs NO API key: providers are tried in order of quality and the
4
+ keyless DuckDuckGo scrape is the always-available floor, so search never dies
5
+ for lack of a key.
6
+ - PERPLEXITY_API_KEY -> Perplexity Sonar: synthesized, cited answers (best for
7
+ research; Enterprise does not train on your data).
8
+ - DEEPPARALLEL_SEARCH_API_KEY -> Brave Search: ranked link results.
9
+ - (no key) -> DuckDuckGo HTML scrape.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import json
15
+ import os
16
+ import re
17
+ from urllib.parse import parse_qs, urlparse
18
+
19
+ import httpx
20
+
21
+ from deepparallel.tools import tool
22
+
23
+ _SCRIPT_STYLE = re.compile(r"<(script|style)\b[^>]*>.*?</\1>", re.IGNORECASE | re.DOTALL)
24
+ _TAG = re.compile(r"<[^>]+>")
25
+ _TITLE = re.compile(r"<title[^>]*>(.*?)</title>", re.IGNORECASE | re.DOTALL)
26
+ _WS = re.compile(r"\s+")
27
+ _TIMEOUT = 15.0
28
+ _RESEARCH_TIMEOUT = 40.0 # Perplexity synthesizes; it is slower than a link API
29
+ _UA = "DeepParallel/0.1"
30
+ # DuckDuckGo's HTML endpoint rejects non-browser agents, so the fallback needs
31
+ # a realistic UA. Kept separate from _UA (used for plain fetches).
32
+ _BROWSER_UA = (
33
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 "
34
+ "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
35
+ )
36
+ _DDG_LINK = re.compile(r'<a[^>]+class="result__a"[^>]+href="([^"]+)"[^>]*>(.*?)</a>', re.I | re.S)
37
+ _DDG_SNIP = re.compile(r'<a[^>]+class="result__snippet"[^>]*>(.*?)</a>', re.I | re.S)
38
+
39
+
40
+ @tool(dangerous=False)
41
+ def web_fetch(url: str, max_chars: int = 8000) -> str:
42
+ """Fetch a web page and return its readable text (HTML stripped).
43
+
44
+ :param url: The URL to fetch.
45
+ :param max_chars: Maximum characters of text to return.
46
+ """
47
+ try:
48
+ r = httpx.get(url, timeout=_TIMEOUT, follow_redirects=True, headers={"user-agent": _UA})
49
+ r.raise_for_status()
50
+ except Exception as e: # noqa: BLE001 - surface fetch failure to the model
51
+ return json.dumps({"error": f"fetch failed: {type(e).__name__}: {e}"})
52
+ html = r.text or ""
53
+ title_m = _TITLE.search(html)
54
+ title = _WS.sub(" ", _TAG.sub("", title_m.group(1))).strip() if title_m else ""
55
+ text = _WS.sub(" ", _TAG.sub(" ", _SCRIPT_STYLE.sub(" ", html))).strip()
56
+ return json.dumps({"url": url, "title": title, "text": text[:max_chars]})
57
+
58
+
59
+ def _strip_html(fragment: str) -> str:
60
+ return _WS.sub(" ", _TAG.sub("", fragment)).strip()
61
+
62
+
63
+ def _ddg_decode(href: str) -> str:
64
+ """DuckDuckGo wraps result links as //duckduckgo.com/l/?uddg=<encoded-url>.
65
+ Pull the real destination back out; pass through anything already direct."""
66
+ if "uddg=" in href:
67
+ query = urlparse(href if href.startswith("http") else "https:" + href).query
68
+ target = parse_qs(query).get("uddg")
69
+ if target:
70
+ return target[0]
71
+ return href
72
+
73
+
74
+ def _perplexity_search(query: str, count: int, key: str):
75
+ """(ok, payload | error-string) from Perplexity Sonar: a synthesized,
76
+ citation-backed answer plus its source URLs."""
77
+ try:
78
+ r = httpx.post(
79
+ "https://api.perplexity.ai/chat/completions",
80
+ headers={"authorization": f"Bearer {key}", "content-type": "application/json"},
81
+ json={
82
+ "model": os.environ.get("PERPLEXITY_MODEL", "sonar"),
83
+ "messages": [{"role": "user", "content": query}],
84
+ },
85
+ timeout=_RESEARCH_TIMEOUT,
86
+ )
87
+ if r.status_code >= 400:
88
+ return False, f"HTTP {r.status_code}: {(r.text or '')[:200]}"
89
+ data = r.json()
90
+ except Exception as e: # noqa: BLE001 - surface to caller for fallback
91
+ return False, f"{type(e).__name__}: {e}"
92
+ answer = (data.get("choices") or [{}])[0].get("message", {}).get("content", "")
93
+ sources = data.get("search_results") or []
94
+ if sources:
95
+ results = [
96
+ {"title": s.get("title", ""), "url": s.get("url", ""), "snippet": s.get("date", "")}
97
+ for s in sources[:count]
98
+ ]
99
+ else:
100
+ results = [{"title": "", "url": u, "snippet": ""} for u in (data.get("citations") or [])[:count]]
101
+ return True, {"provider": "perplexity", "answer": answer, "results": results}
102
+
103
+
104
+ def _brave_search(query: str, count: int, key: str):
105
+ """(ok, results | error-string) from the Brave Search API."""
106
+ url = os.environ.get(
107
+ "DEEPPARALLEL_SEARCH_URL", "https://api.search.brave.com/res/v1/web/search"
108
+ )
109
+ try:
110
+ r = httpx.get(
111
+ url,
112
+ params={"q": query, "count": count},
113
+ headers={"X-Subscription-Token": key, "accept": "application/json"},
114
+ timeout=_TIMEOUT,
115
+ )
116
+ if r.status_code >= 400:
117
+ return False, f"HTTP {r.status_code}: {(r.text or '')[:200]}"
118
+ data = r.json()
119
+ except Exception as e: # noqa: BLE001 - surface to caller for fallback
120
+ return False, f"{type(e).__name__}: {e}"
121
+ results = [
122
+ {"title": it.get("title", ""), "url": it.get("url", ""), "snippet": it.get("description", "")}
123
+ for it in (data.get("web", {}).get("results") or [])[:count]
124
+ ]
125
+ return True, results
126
+
127
+
128
+ def _duckduckgo_search(query: str, count: int):
129
+ """(ok, results | error-string) by scraping DuckDuckGo's keyless HTML endpoint."""
130
+ try:
131
+ r = httpx.post(
132
+ "https://html.duckduckgo.com/html/",
133
+ data={"q": query},
134
+ headers={"user-agent": _BROWSER_UA},
135
+ timeout=_TIMEOUT,
136
+ follow_redirects=True,
137
+ )
138
+ if r.status_code >= 400:
139
+ return False, f"HTTP {r.status_code}"
140
+ html = r.text or ""
141
+ except Exception as e: # noqa: BLE001 - surface to caller
142
+ return False, f"{type(e).__name__}: {e}"
143
+ links = _DDG_LINK.findall(html)
144
+ snippets = _DDG_SNIP.findall(html)
145
+ results = []
146
+ for i, (href, title) in enumerate(links[:count]):
147
+ results.append(
148
+ {
149
+ "title": _strip_html(title),
150
+ "url": _ddg_decode(href),
151
+ "snippet": _strip_html(snippets[i]) if i < len(snippets) else "",
152
+ }
153
+ )
154
+ if not results:
155
+ return False, "no results parsed"
156
+ return True, results
157
+
158
+
159
+ @tool(dangerous=False)
160
+ def web_search(query: str, count: int = 5) -> str:
161
+ """Search the web and return result titles, URLs, and snippets.
162
+
163
+ Works with no API key (DuckDuckGo fallback). Providers are tried best-first:
164
+ Perplexity Sonar (PERPLEXITY_API_KEY) for cited, synthesized answers, then
165
+ Brave (DEEPPARALLEL_SEARCH_API_KEY), then keyless DuckDuckGo.
166
+
167
+ :param query: The search query.
168
+ :param count: Maximum number of results.
169
+ """
170
+ errors = []
171
+ pplx = (os.environ.get("PERPLEXITY_API_KEY") or "").strip()
172
+ if pplx:
173
+ ok, payload = _perplexity_search(query, count, pplx)
174
+ if ok:
175
+ return json.dumps(payload)
176
+ errors.append(f"perplexity: {payload}")
177
+ brave = (os.environ.get("DEEPPARALLEL_SEARCH_API_KEY") or "").strip()
178
+ if brave:
179
+ ok, payload = _brave_search(query, count, brave)
180
+ if ok:
181
+ return json.dumps({"provider": "brave", "results": payload})
182
+ errors.append(f"brave: {payload}")
183
+ ok, payload = _duckduckgo_search(query, count)
184
+ if ok:
185
+ return json.dumps({"provider": "duckduckgo", "results": payload})
186
+ errors.append(f"duckduckgo: {payload}")
187
+ return json.dumps({"error": "search failed: " + "; ".join(errors)})
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepparallel
3
- Version: 0.5.2
3
+ Version: 0.5.3
4
4
  Summary: DeepParallel - a multi-model agentic coding CLI with cross-model Guardian review, served via Crowe Logic.
5
5
  Author-email: Michael Crowe <michael@crowelogic.com>
6
6
  License: Apache-2.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "deepparallel"
7
- version = "0.5.2"
7
+ version = "0.5.3"
8
8
  description = "DeepParallel - a multi-model agentic coding CLI with cross-model Guardian review, served via Crowe Logic."
9
9
  readme = "README.md"
10
10
  license = { text = "Apache-2.0" }
@@ -0,0 +1,140 @@
1
+ import json
2
+
3
+ import httpx
4
+
5
+ import deepparallel.tools.web as web_mod
6
+ from deepparallel.tools import get_registry
7
+
8
+ _HTML = """<html><head><title>Hello Page</title>
9
+ <style>.x{color:red}</style><script>var a=1;</script></head>
10
+ <body><h1>Welcome</h1><p>This is the <b>main</b> content.</p></body></html>"""
11
+
12
+ _DDG_HTML = """
13
+ <div class="result results_links">
14
+ <a class="result__a" href="//duckduckgo.com/l/?uddg=https%3A%2F%2Fpubmed.ncbi.nlm.nih.gov%2F123%2F&rut=x">Psilocybin PTSD trial</a>
15
+ <a class="result__snippet">A randomized controlled trial of psilocybin for PTSD.</a>
16
+ </div>
17
+ <div class="result results_links">
18
+ <a class="result__a" href="//duckduckgo.com/l/?uddg=https%3A%2F%2Fwww.thelancet.com%2Fa&rut=y">Lancet review</a>
19
+ <a class="result__snippet">Systematic review of psychedelics.</a>
20
+ </div>
21
+ """
22
+
23
+ _PPLX_JSON = {
24
+ "choices": [{"message": {"content": "Yes, psilocybin is studied for PTSD [1][2]."}}],
25
+ "search_results": [
26
+ {"title": "PMC article", "url": "https://pmc.ncbi.nlm.nih.gov/x", "date": "2025"},
27
+ {"title": "Lancet", "url": "https://thelancet.com/y", "date": "2025"},
28
+ ],
29
+ }
30
+
31
+ _BRAVE_JSON = {
32
+ "web": {"results": [
33
+ {"title": "T1", "url": "https://a", "description": "d1"},
34
+ {"title": "T2", "url": "https://b", "description": "d2"},
35
+ ]}
36
+ }
37
+
38
+
39
+ class _Resp:
40
+ def __init__(self, text="", payload=None, status=200):
41
+ self.text = text
42
+ self._payload = payload
43
+ self.status_code = status
44
+
45
+ def raise_for_status(self):
46
+ if self.status_code >= 400:
47
+ raise httpx.HTTPStatusError("err", request=None, response=self)
48
+
49
+ def json(self):
50
+ return self._payload
51
+
52
+
53
+ def test_web_fetch_strips_html_to_text(monkeypatch):
54
+ monkeypatch.setattr(httpx, "get", lambda url, **kw: _Resp(text=_HTML))
55
+ out = json.loads(web_mod.web_fetch("https://example.com"))
56
+ assert "Welcome" in out["text"] and "main" in out["text"]
57
+ assert "var a=1" not in out["text"] and "color:red" not in out["text"]
58
+ assert "<" not in out["text"]
59
+ assert out["title"] == "Hello Page"
60
+
61
+
62
+ def test_web_fetch_truncates(monkeypatch):
63
+ big = "<p>" + ("word " * 5000) + "</p>"
64
+ monkeypatch.setattr(httpx, "get", lambda url, **kw: _Resp(text=big))
65
+ out = json.loads(web_mod.web_fetch("https://example.com", max_chars=100))
66
+ assert len(out["text"]) <= 100
67
+
68
+
69
+ def test_web_fetch_is_non_dangerous():
70
+ assert get_registry().get("web_fetch").dangerous is False
71
+
72
+
73
+ def test_web_search_is_non_dangerous():
74
+ assert get_registry().get("web_search").dangerous is False
75
+
76
+
77
+ def test_ddg_decode_extracts_real_url():
78
+ href = "//duckduckgo.com/l/?uddg=https%3A%2F%2Fpubmed.ncbi.nlm.nih.gov%2F999%2F&rut=z"
79
+ assert web_mod._ddg_decode(href) == "https://pubmed.ncbi.nlm.nih.gov/999/"
80
+ assert web_mod._ddg_decode("https://direct.example/x") == "https://direct.example/x"
81
+
82
+
83
+ def test_web_search_perplexity_first_when_keyed(monkeypatch):
84
+ monkeypatch.setenv("PERPLEXITY_API_KEY", "pplx-test")
85
+ monkeypatch.delenv("DEEPPARALLEL_SEARCH_API_KEY", raising=False)
86
+ called = {"get": False}
87
+ monkeypatch.setattr(httpx, "post", lambda url, **kw: _Resp(payload=_PPLX_JSON))
88
+ monkeypatch.setattr(httpx, "get", lambda *a, **k: called.__setitem__("get", True) or _Resp())
89
+ out = json.loads(web_mod.web_search("psilocybin PTSD", count=2))
90
+ assert out["provider"] == "perplexity"
91
+ assert "psilocybin" in out["answer"].lower()
92
+ assert out["results"][0]["url"].startswith("https://")
93
+ assert called["get"] is False # never fell through to Brave
94
+
95
+
96
+ def test_web_search_brave_when_only_brave_keyed(monkeypatch):
97
+ monkeypatch.delenv("PERPLEXITY_API_KEY", raising=False)
98
+ monkeypatch.setenv("DEEPPARALLEL_SEARCH_API_KEY", "k")
99
+ captured = {}
100
+
101
+ def fake_get(url, **kw):
102
+ captured["params"] = kw.get("params", {})
103
+ captured["headers"] = kw.get("headers", {})
104
+ return _Resp(payload=_BRAVE_JSON)
105
+
106
+ monkeypatch.setattr(httpx, "get", fake_get)
107
+ out = json.loads(web_mod.web_search("python testing"))
108
+ assert out["provider"] == "brave"
109
+ assert [r["title"] for r in out["results"]] == ["T1", "T2"]
110
+ assert captured["params"]["q"] == "python testing"
111
+ assert captured["headers"]["X-Subscription-Token"] == "k"
112
+
113
+
114
+ def test_web_search_duckduckgo_without_any_key(monkeypatch):
115
+ monkeypatch.delenv("PERPLEXITY_API_KEY", raising=False)
116
+ monkeypatch.delenv("DEEPPARALLEL_SEARCH_API_KEY", raising=False)
117
+ monkeypatch.setattr(httpx, "post", lambda url, **kw: _Resp(text=_DDG_HTML))
118
+ out = json.loads(web_mod.web_search("psilocybin PTSD", count=2))
119
+ assert out["provider"] == "duckduckgo"
120
+ assert out["results"][0]["url"] == "https://pubmed.ncbi.nlm.nih.gov/123/"
121
+ assert "randomized" in out["results"][0]["snippet"].lower()
122
+
123
+
124
+ def test_web_search_recovers_to_ddg_when_brave_fails(monkeypatch):
125
+ monkeypatch.delenv("PERPLEXITY_API_KEY", raising=False)
126
+ monkeypatch.setenv("DEEPPARALLEL_SEARCH_API_KEY", "k")
127
+ monkeypatch.setattr(httpx, "get", lambda url, **kw: _Resp(text="bad", status=422))
128
+ monkeypatch.setattr(httpx, "post", lambda url, **kw: _Resp(text=_DDG_HTML))
129
+ out = json.loads(web_mod.web_search("psilocybin"))
130
+ assert out["provider"] == "duckduckgo" # recovered after Brave 422
131
+
132
+
133
+ def test_web_search_reports_all_errors_when_all_fail(monkeypatch):
134
+ monkeypatch.setenv("PERPLEXITY_API_KEY", "pplx-test")
135
+ monkeypatch.setenv("DEEPPARALLEL_SEARCH_API_KEY", "k")
136
+ monkeypatch.setattr(httpx, "post", lambda url, **kw: _Resp(text="down", status=500))
137
+ monkeypatch.setattr(httpx, "get", lambda url, **kw: _Resp(text="down", status=500))
138
+ out = json.loads(web_mod.web_search("x"))
139
+ assert "error" in out
140
+ assert "perplexity" in out["error"] and "duckduckgo" in out["error"]
@@ -1,82 +0,0 @@
1
- """Web tools: fetch a page's text, and search (key-gated)."""
2
-
3
- from __future__ import annotations
4
-
5
- import json
6
- import os
7
- import re
8
-
9
- import httpx
10
-
11
- from deepparallel.tools import tool
12
-
13
- _SCRIPT_STYLE = re.compile(r"<(script|style)\b[^>]*>.*?</\1>", re.IGNORECASE | re.DOTALL)
14
- _TAG = re.compile(r"<[^>]+>")
15
- _TITLE = re.compile(r"<title[^>]*>(.*?)</title>", re.IGNORECASE | re.DOTALL)
16
- _WS = re.compile(r"\s+")
17
- _TIMEOUT = 15.0
18
- _UA = "DeepParallel/0.1"
19
-
20
-
21
- @tool(dangerous=False)
22
- def web_fetch(url: str, max_chars: int = 8000) -> str:
23
- """Fetch a web page and return its readable text (HTML stripped).
24
-
25
- :param url: The URL to fetch.
26
- :param max_chars: Maximum characters of text to return.
27
- """
28
- try:
29
- r = httpx.get(url, timeout=_TIMEOUT, follow_redirects=True, headers={"user-agent": _UA})
30
- r.raise_for_status()
31
- except Exception as e: # noqa: BLE001 - surface fetch failure to the model
32
- return json.dumps({"error": f"fetch failed: {type(e).__name__}: {e}"})
33
- html = r.text or ""
34
- title_m = _TITLE.search(html)
35
- title = _WS.sub(" ", _TAG.sub("", title_m.group(1))).strip() if title_m else ""
36
- text = _WS.sub(" ", _TAG.sub(" ", _SCRIPT_STYLE.sub(" ", html))).strip()
37
- return json.dumps({"url": url, "title": title, "text": text[:max_chars]})
38
-
39
-
40
- @tool(dangerous=False)
41
- def web_search(query: str, count: int = 5) -> str:
42
- """Search the web and return result titles, URLs, and snippets.
43
-
44
- Requires DEEPPARALLEL_SEARCH_API_KEY (Brave Search API by default).
45
-
46
- :param query: The search query.
47
- :param count: Maximum number of results.
48
- """
49
- key = (os.environ.get("DEEPPARALLEL_SEARCH_API_KEY") or "").strip()
50
- if not key:
51
- return json.dumps(
52
- {"error": "search not configured: set DEEPPARALLEL_SEARCH_API_KEY (Brave Search API key)"}
53
- )
54
- url = os.environ.get(
55
- "DEEPPARALLEL_SEARCH_URL", "https://api.search.brave.com/res/v1/web/search"
56
- )
57
- try:
58
- r = httpx.get(
59
- url,
60
- params={"q": query, "count": count},
61
- headers={"X-Subscription-Token": key, "accept": "application/json"},
62
- timeout=_TIMEOUT,
63
- )
64
- if r.status_code >= 400:
65
- # Surface the provider's error body: a bare "422" hides the reason
66
- # (missing key header, over-long query, plan limit). The body tells
67
- # the model and the user exactly what to fix.
68
- return json.dumps(
69
- {"error": f"search failed: HTTP {r.status_code}: {(r.text or '')[:300]}"}
70
- )
71
- data = r.json()
72
- except Exception as e: # noqa: BLE001 - surface search failure to the model
73
- return json.dumps({"error": f"search failed: {type(e).__name__}: {e}"})
74
- results = [
75
- {
76
- "title": item.get("title", ""),
77
- "url": item.get("url", ""),
78
- "snippet": item.get("description", ""),
79
- }
80
- for item in (data.get("web", {}).get("results") or [])[:count]
81
- ]
82
- return json.dumps({"results": results})
@@ -1,97 +0,0 @@
1
- import json
2
-
3
- import httpx
4
-
5
- import deepparallel.tools.web as web_mod
6
- from deepparallel.tools import get_registry
7
-
8
- _HTML = """<html><head><title>Hello Page</title>
9
- <style>.x{color:red}</style><script>var a=1;</script></head>
10
- <body><h1>Welcome</h1><p>This is the <b>main</b> content.</p></body></html>"""
11
-
12
-
13
- class _Resp:
14
- def __init__(self, text="", payload=None, status=200):
15
- self.text = text
16
- self._payload = payload
17
- self.status_code = status
18
-
19
- def raise_for_status(self):
20
- if self.status_code >= 400:
21
- raise httpx.HTTPStatusError("err", request=None, response=self)
22
-
23
- def json(self):
24
- return self._payload
25
-
26
-
27
- def test_web_fetch_strips_html_to_text(monkeypatch):
28
- monkeypatch.setattr(httpx, "get", lambda url, **kw: _Resp(text=_HTML))
29
- out = json.loads(web_mod.web_fetch("https://example.com"))
30
- assert "Welcome" in out["text"]
31
- assert "main" in out["text"]
32
- assert "var a=1" not in out["text"] # script stripped
33
- assert "color:red" not in out["text"] # style stripped
34
- assert "<" not in out["text"] # tags stripped
35
- assert out["title"] == "Hello Page"
36
-
37
-
38
- def test_web_fetch_truncates(monkeypatch):
39
- big = "<p>" + ("word " * 5000) + "</p>"
40
- monkeypatch.setattr(httpx, "get", lambda url, **kw: _Resp(text=big))
41
- out = json.loads(web_mod.web_fetch("https://example.com", max_chars=100))
42
- assert len(out["text"]) <= 100
43
-
44
-
45
- def test_web_fetch_is_non_dangerous():
46
- assert get_registry().get("web_fetch").dangerous is False
47
-
48
-
49
- def test_web_search_unconfigured_returns_error(monkeypatch):
50
- monkeypatch.delenv("DEEPPARALLEL_SEARCH_API_KEY", raising=False)
51
- out = json.loads(web_mod.web_search("anything"))
52
- assert "error" in out
53
- assert "DEEPPARALLEL_SEARCH_API_KEY" in out["error"]
54
-
55
-
56
- def test_web_search_parses_results(monkeypatch):
57
- monkeypatch.setenv("DEEPPARALLEL_SEARCH_API_KEY", "k")
58
- payload = {
59
- "web": {
60
- "results": [
61
- {"title": "T1", "url": "https://a", "description": "d1"},
62
- {"title": "T2", "url": "https://b", "description": "d2"},
63
- ]
64
- }
65
- }
66
- captured = {}
67
-
68
- def fake_get(url, **kw):
69
- captured["url"] = url
70
- captured["headers"] = kw.get("headers", {})
71
- captured["params"] = kw.get("params", {})
72
- return _Resp(payload=payload)
73
-
74
- monkeypatch.setattr(httpx, "get", fake_get)
75
- out = json.loads(web_mod.web_search("python testing"))
76
- assert [r["title"] for r in out["results"]] == ["T1", "T2"]
77
- assert captured["params"]["q"] == "python testing"
78
- assert captured["headers"]["X-Subscription-Token"] == "k"
79
-
80
-
81
- def test_web_search_is_non_dangerous():
82
- assert get_registry().get("web_search").dangerous is False
83
-
84
-
85
- def test_web_search_surfaces_http_error_body(monkeypatch):
86
- monkeypatch.setenv("DEEPPARALLEL_SEARCH_API_KEY", "k")
87
- body = '{"error":{"detail":"x-subscription-token Field required"}}'
88
- monkeypatch.setattr(httpx, "get", lambda url, **kw: _Resp(text=body, status=422))
89
- out = json.loads(web_mod.web_search("anything"))
90
- assert "HTTP 422" in out["error"]
91
- assert "x-subscription-token" in out["error"] # the real reason, not a bare 422
92
-
93
-
94
- def test_web_search_blank_key_treated_as_unconfigured(monkeypatch):
95
- monkeypatch.setenv("DEEPPARALLEL_SEARCH_API_KEY", " ")
96
- out = json.loads(web_mod.web_search("anything"))
97
- assert "DEEPPARALLEL_SEARCH_API_KEY" in out["error"]
File without changes
File without changes