deepparallel 0.5.1__tar.gz → 0.5.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deepparallel-0.5.1 → deepparallel-0.5.3}/PKG-INFO +1 -1
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/__init__.py +1 -1
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/agent.py +33 -1
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/supply_chain.py +19 -8
- deepparallel-0.5.3/deepparallel/system_prompt.txt +18 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/tools/mcp.py +52 -27
- deepparallel-0.5.3/deepparallel/tools/web.py +187 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel.egg-info/PKG-INFO +1 -1
- {deepparallel-0.5.1 → deepparallel-0.5.3}/pyproject.toml +1 -1
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_supply_chain.py +18 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_tools_mcp.py +35 -0
- deepparallel-0.5.3/tests/test_tools_web.py +140 -0
- deepparallel-0.5.1/deepparallel/system_prompt.txt +0 -7
- deepparallel-0.5.1/deepparallel/tools/web.py +0 -76
- deepparallel-0.5.1/tests/test_tools_web.py +0 -82
- {deepparallel-0.5.1 → deepparallel-0.5.3}/README.md +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/backend.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/branding.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/cli.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/config.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/crowe_id.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/dsml.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/fusion.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/licensing.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/registry.json +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/renderer.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/research/__init__.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/research/conduit.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/research/provider.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/routing.example.json +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/routing.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/serve.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/tools/__init__.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/tools/codeast.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/tools/edit.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/tools/files.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/tools/registry.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/tools/sandbox.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/tools/search.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/tools/shell.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/tools/vision.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel/userinput.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel.egg-info/SOURCES.txt +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel.egg-info/dependency_links.txt +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel.egg-info/entry_points.txt +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel.egg-info/requires.txt +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/deepparallel.egg-info/top_level.txt +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/setup.cfg +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_agent.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_backend.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_backend_chat.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_backend_stream.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_branding.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_cli.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_config.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_crowe_backend.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_crowe_gateway_backend.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_crowe_id_auth.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_crowe_payment_required.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_dsml.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_fusion.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_issuer_signer.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_licensing.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_renderer.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_research.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_research_provider.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_routing.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_serve.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_spinner_color.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_tool_registry.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_tools_codeast.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_tools_edit.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_tools_files.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_tools_sandbox.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_tools_search.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_tools_shell.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_tools_vision.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_userinput.py +0 -0
- {deepparallel-0.5.1 → deepparallel-0.5.3}/tests/test_userinput_paste.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deepparallel
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.3
|
|
4
4
|
Summary: DeepParallel - a multi-model agentic coding CLI with cross-model Guardian review, served via Crowe Logic.
|
|
5
5
|
Author-email: Michael Crowe <michael@crowelogic.com>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -204,6 +204,38 @@ def _guardian_verdict(guardian, name: str, args: dict) -> str | None:
|
|
|
204
204
|
return guardian_review(guardian, _guardian_review_content(name, args))
|
|
205
205
|
|
|
206
206
|
|
|
207
|
+
def _local_module_names(target_path: str) -> set[str]:
|
|
208
|
+
"""Module names that resolve to files in the workspace, so a sibling import
|
|
209
|
+
like `from compound_library import ...` is never flagged as a hallucinated
|
|
210
|
+
PyPI package. Scans the target file's directory and the cwd (capped)."""
|
|
211
|
+
names: set[str] = set()
|
|
212
|
+
roots = []
|
|
213
|
+
try:
|
|
214
|
+
roots.append(Path(target_path).expanduser().resolve().parent)
|
|
215
|
+
except Exception: # noqa: BLE001
|
|
216
|
+
pass
|
|
217
|
+
try:
|
|
218
|
+
roots.append(Path.cwd().resolve())
|
|
219
|
+
except Exception: # noqa: BLE001
|
|
220
|
+
pass
|
|
221
|
+
seen_roots: set[Path] = set()
|
|
222
|
+
for root in roots:
|
|
223
|
+
if root in seen_roots:
|
|
224
|
+
continue
|
|
225
|
+
seen_roots.add(root)
|
|
226
|
+
try:
|
|
227
|
+
for i, p in enumerate(root.rglob("*.py")):
|
|
228
|
+
names.add(p.stem)
|
|
229
|
+
if p.name == "__init__.py":
|
|
230
|
+
names.add(p.parent.name)
|
|
231
|
+
if i >= 5000:
|
|
232
|
+
break
|
|
233
|
+
except Exception: # noqa: BLE001
|
|
234
|
+
pass
|
|
235
|
+
names.discard("__init__")
|
|
236
|
+
return names
|
|
237
|
+
|
|
238
|
+
|
|
207
239
|
def _supply_chain_note(name: str, args: dict) -> str | None:
|
|
208
240
|
"""Best-effort: flag hallucinated/slopsquatted deps an edit introduces."""
|
|
209
241
|
content = args.get("new_source") or args.get("new_string") or args.get("content") or ""
|
|
@@ -213,7 +245,7 @@ def _supply_chain_note(name: str, args: dict) -> str | None:
|
|
|
213
245
|
try:
|
|
214
246
|
from deepparallel import supply_chain
|
|
215
247
|
|
|
216
|
-
result = supply_chain.audit(content, path)
|
|
248
|
+
result = supply_chain.audit(content, path, _local_module_names(path))
|
|
217
249
|
except Exception: # noqa: BLE001 - supply-chain check is best-effort
|
|
218
250
|
return None
|
|
219
251
|
if result["hallucinated"]:
|
|
@@ -44,23 +44,30 @@ _IMPORT_RE = re.compile(r"^\s*(?:import\s+([a-zA-Z0-9_.]+)|from\s+([a-zA-Z0-9_.]
|
|
|
44
44
|
_REQ_RE = re.compile(r"^\s*([A-Za-z0-9][A-Za-z0-9._-]*)")
|
|
45
45
|
|
|
46
46
|
|
|
47
|
-
def extract_dependencies(
|
|
48
|
-
|
|
47
|
+
def extract_dependencies(
|
|
48
|
+
content: str, filename: str, local_modules: set[str] | None = None
|
|
49
|
+
) -> list[dict]:
|
|
50
|
+
"""Return [{name, ecosystem, raw}] introduced by this content.
|
|
51
|
+
|
|
52
|
+
`local_modules` names resolve to files in the workspace (sibling modules,
|
|
53
|
+
local packages) and are never treated as third-party dependencies.
|
|
54
|
+
"""
|
|
49
55
|
fn = filename.rsplit("/", 1)[-1].lower()
|
|
50
56
|
if fn == "package.json":
|
|
51
57
|
return _from_package_json(content)
|
|
52
58
|
if fn in ("requirements.txt",) or fn.startswith("requirements"):
|
|
53
59
|
return _from_requirements(content)
|
|
54
60
|
if fn.endswith(".py"):
|
|
55
|
-
return _from_python(content)
|
|
61
|
+
return _from_python(content, local_modules)
|
|
56
62
|
return []
|
|
57
63
|
|
|
58
64
|
|
|
59
|
-
def _from_python(content: str) -> list[dict]:
|
|
65
|
+
def _from_python(content: str, local: set[str] | None = None) -> list[dict]:
|
|
66
|
+
local = local or set()
|
|
60
67
|
out, seen = [], set()
|
|
61
68
|
for imp, frm in _IMPORT_RE.findall(content):
|
|
62
69
|
mod = (imp or frm).split(".")[0]
|
|
63
|
-
if not mod or mod.startswith("_") or mod in _STDLIB or mod in seen:
|
|
70
|
+
if not mod or mod.startswith("_") or mod in _STDLIB or mod in seen or mod in local:
|
|
64
71
|
continue
|
|
65
72
|
seen.add(mod)
|
|
66
73
|
dist = _PYPI_ALIASES.get(mod, mod)
|
|
@@ -117,11 +124,15 @@ def check_exists(name: str, ecosystem: str) -> bool | None:
|
|
|
117
124
|
return None
|
|
118
125
|
|
|
119
126
|
|
|
120
|
-
def audit(content: str, filename: str) -> dict:
|
|
121
|
-
"""Audit a change's dependencies. Returns findings + the hallucinated list.
|
|
127
|
+
def audit(content: str, filename: str, local_modules: set[str] | None = None) -> dict:
|
|
128
|
+
"""Audit a change's dependencies. Returns findings + the hallucinated list.
|
|
129
|
+
|
|
130
|
+
`local_modules` are workspace-local module names (sibling files, local
|
|
131
|
+
packages) that must not be checked against PyPI/npm.
|
|
132
|
+
"""
|
|
122
133
|
findings = []
|
|
123
134
|
hallucinated = []
|
|
124
|
-
for dep in extract_dependencies(content, filename):
|
|
135
|
+
for dep in extract_dependencies(content, filename, local_modules):
|
|
125
136
|
exists = check_exists(dep["name"], dep["ecosystem"])
|
|
126
137
|
status = "ok" if exists is True else "missing" if exists is False else "unknown"
|
|
127
138
|
findings.append({"name": dep["name"], "ecosystem": dep["ecosystem"], "status": status})
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
You are DeepParallel, a precise coding assistant from Crowe Logic.
|
|
2
|
+
|
|
3
|
+
Voice: direct and concise. Lead with the answer, not a preamble — never open with "I'd be happy to", "Great question", or by restating the request. Give depth only when asked or when the problem genuinely needs it. Stop when the answer is complete; don't pad with summaries or follow-up offers unless they add real value.
|
|
4
|
+
|
|
5
|
+
Formatting: clean Markdown. Single blank lines between paragraphs — never double. Fenced code blocks with a language tag for code, inline backticks for identifiers and paths, tight lists (no blank line between items). Prefer a short prose answer over a bulleted list when a sentence will do.
|
|
6
|
+
|
|
7
|
+
You can use tools to read, search, analyze, edit, open, and run code. Use them when they help; do not call them speculatively. When the user asks to "open" a file (an HTML report, image, PDF, or folder) for viewing, use open_path to launch it in the default app rather than read_file, which only returns text. When asked to run something with different parameters, prefer non-destructive approaches (CLI arguments, environment variables, or a temporary copy) over editing the user's source files. Only edit a source file when changing it is the actual goal, and explain what you changed.
|
|
8
|
+
|
|
9
|
+
Engineering discipline — how you build:
|
|
10
|
+
- Build the smallest unit that can be verified, and verify it before scaling up. Before generating a multi-file system, write the single most fundamental primitive and run it (or a one-line check) to confirm it works. Never emit hundreds of lines across several files before executing anything — a wrong assumption then costs many rounds of debugging instead of one.
|
|
11
|
+
- Ground before you generate. For domain work (chemistry, biology, finance, an API or library you are not certain of), prefer retrieving real data or references over inventing them. Use mcp_search with a single keyword (e.g. "pubchem") to find a domain MCP server, web_fetch for documentation, and read_file/grep for local truth. Reach for a database or a reference before reconstructing it from memory.
|
|
12
|
+
- Treat warnings as signal, not noise. A Guardian "risky/bug" verdict, a supply-chain flag, a parser or valence error, or a low similarity-to-reference score is evidence that something is wrong. Investigate and fix the cause; never rationalize it away or approve through it.
|
|
13
|
+
- Validate against known-good references. When generating structured artifacts (molecules, schemas, configs, queries), check a sample against a known-correct example before trusting the whole batch. If your output does not resemble the references you expect, the generator is wrong, not the references.
|
|
14
|
+
- Label honestly. Never emit an output whose name, ID, or label does not match what it actually is. If you cannot represent something correctly, say so rather than silently substituting a near-miss.
|
|
15
|
+
|
|
16
|
+
Where to write files: write into the current working directory using relative paths (e.g. `analysis/report.md`), or a path the user explicitly gave you. Do NOT invent absolute system paths like `/home/user/...`; the run_code sandbox is a SEPARATE environment from where write_file and edit_file act, and a host path like `/home/user` may not exist (writes there fail). Create any folders you need under the working directory.
|
|
17
|
+
|
|
18
|
+
Grounding is mandatory, not optional. Before writing substantive domain content (a protocol, analysis, dataset, literature claim, or any statement of fact a reader would trust), you MUST ground it first: web_search (works with no API key), web_fetch on a specific source, mcp_search to find a domain data server (try a single keyword like "pubmed", "clinicaltrials", "pubchem"), or read_file/grep for local truth. If one grounding tool errors, try another route before proceeding — never conclude "search is down, I'll rely on my knowledge" and generate from memory. When you genuinely cannot ground a claim, state that explicitly and label it unverified rather than presenting recall as established fact. Cite the sources you used.
|
|
@@ -10,6 +10,7 @@ from __future__ import annotations
|
|
|
10
10
|
|
|
11
11
|
import json
|
|
12
12
|
import os
|
|
13
|
+
import re
|
|
13
14
|
import subprocess
|
|
14
15
|
import sys
|
|
15
16
|
import threading
|
|
@@ -159,6 +160,35 @@ def _reap_idle() -> None:
|
|
|
159
160
|
del _pool[key]
|
|
160
161
|
|
|
161
162
|
|
|
163
|
+
def _registry_query(query: str, limit: int) -> list[dict]:
|
|
164
|
+
"""One registry search call; returns raw server entries (possibly empty)."""
|
|
165
|
+
r = httpx.get(
|
|
166
|
+
_REGISTRY_API,
|
|
167
|
+
params={"search": query, "limit": limit},
|
|
168
|
+
timeout=_TIMEOUT,
|
|
169
|
+
headers={"user-agent": _UA},
|
|
170
|
+
)
|
|
171
|
+
r.raise_for_status()
|
|
172
|
+
return r.json().get("servers", [])
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _format_server(entry: dict) -> dict:
|
|
176
|
+
server = entry.get("server", entry)
|
|
177
|
+
return {
|
|
178
|
+
"name": server.get("name", "unknown"),
|
|
179
|
+
"description": server.get("description", ""),
|
|
180
|
+
"version": server.get("version", ""),
|
|
181
|
+
"packages": [
|
|
182
|
+
{
|
|
183
|
+
"type": p.get("registryType", ""),
|
|
184
|
+
"package": p.get("identifier", ""),
|
|
185
|
+
"transport": p.get("transport", {}).get("type", "unknown"),
|
|
186
|
+
}
|
|
187
|
+
for p in server.get("packages", [])
|
|
188
|
+
],
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
|
|
162
192
|
@tool(dangerous=False)
|
|
163
193
|
def mcp_search(query: str, limit: int = 10) -> str:
|
|
164
194
|
"""Search the MCP server registry (5,800+ servers) for a capability.
|
|
@@ -166,39 +196,34 @@ def mcp_search(query: str, limit: int = 10) -> str:
|
|
|
166
196
|
Returns matching server names, descriptions, and package install info.
|
|
167
197
|
Use this first to discover what exists, then mcp_list_tools to connect.
|
|
168
198
|
|
|
169
|
-
:param query:
|
|
199
|
+
:param query: A single capability keyword works best (e.g. "pubchem",
|
|
200
|
+
"postgres", "slack"); multi-word phrases are split and merged automatically.
|
|
170
201
|
:param limit: Maximum number of results (max 50).
|
|
171
202
|
"""
|
|
172
203
|
limit = min(int(limit), 50)
|
|
173
204
|
try:
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
205
|
+
servers = _registry_query(query, limit)
|
|
206
|
+
# The registry matches substrings, not phrases or meaning: a natural-
|
|
207
|
+
# language query like "chemistry molecular docking" matches nothing,
|
|
208
|
+
# while the single word "chemistry" finds servers. When a multi-word
|
|
209
|
+
# query comes back empty, retry each significant word and merge, so the
|
|
210
|
+
# model discovers servers without having to guess the exact term.
|
|
211
|
+
if not servers:
|
|
212
|
+
words = [w for w in re.findall(r"[A-Za-z0-9]+", query.lower()) if len(w) > 2]
|
|
213
|
+
seen: set[str] = set()
|
|
214
|
+
merged: list[dict] = []
|
|
215
|
+
for word in dict.fromkeys(words):
|
|
216
|
+
for entry in _registry_query(word, limit):
|
|
217
|
+
name = entry.get("server", entry).get("name", "")
|
|
218
|
+
if name and name not in seen:
|
|
219
|
+
seen.add(name)
|
|
220
|
+
merged.append(entry)
|
|
221
|
+
if len(merged) >= limit:
|
|
222
|
+
break
|
|
223
|
+
servers = merged[:limit]
|
|
182
224
|
except Exception as e: # noqa: BLE001 - surface registry failure to the model
|
|
183
225
|
return json.dumps({"error": f"registry search failed: {type(e).__name__}: {e}"})
|
|
184
|
-
results = []
|
|
185
|
-
for entry in data.get("servers", []):
|
|
186
|
-
server = entry.get("server", entry)
|
|
187
|
-
results.append(
|
|
188
|
-
{
|
|
189
|
-
"name": server.get("name", "unknown"),
|
|
190
|
-
"description": server.get("description", ""),
|
|
191
|
-
"version": server.get("version", ""),
|
|
192
|
-
"packages": [
|
|
193
|
-
{
|
|
194
|
-
"type": p.get("registryType", ""),
|
|
195
|
-
"package": p.get("identifier", ""),
|
|
196
|
-
"transport": p.get("transport", {}).get("type", "unknown"),
|
|
197
|
-
}
|
|
198
|
-
for p in server.get("packages", [])
|
|
199
|
-
],
|
|
200
|
-
}
|
|
201
|
-
)
|
|
226
|
+
results = [_format_server(entry) for entry in servers]
|
|
202
227
|
return json.dumps({"query": query, "count": len(results), "servers": results})
|
|
203
228
|
|
|
204
229
|
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""Web tools: fetch a page's text, and search.
|
|
2
|
+
|
|
3
|
+
web_search needs NO API key: providers are tried in order of quality and the
|
|
4
|
+
keyless DuckDuckGo scrape is the always-available floor, so search never dies
|
|
5
|
+
for lack of a key.
|
|
6
|
+
- PERPLEXITY_API_KEY -> Perplexity Sonar: synthesized, cited answers (best for
|
|
7
|
+
research; Enterprise does not train on your data).
|
|
8
|
+
- DEEPPARALLEL_SEARCH_API_KEY -> Brave Search: ranked link results.
|
|
9
|
+
- (no key) -> DuckDuckGo HTML scrape.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import os
|
|
16
|
+
import re
|
|
17
|
+
from urllib.parse import parse_qs, urlparse
|
|
18
|
+
|
|
19
|
+
import httpx
|
|
20
|
+
|
|
21
|
+
from deepparallel.tools import tool
|
|
22
|
+
|
|
23
|
+
_SCRIPT_STYLE = re.compile(r"<(script|style)\b[^>]*>.*?</\1>", re.IGNORECASE | re.DOTALL)
|
|
24
|
+
_TAG = re.compile(r"<[^>]+>")
|
|
25
|
+
_TITLE = re.compile(r"<title[^>]*>(.*?)</title>", re.IGNORECASE | re.DOTALL)
|
|
26
|
+
_WS = re.compile(r"\s+")
|
|
27
|
+
_TIMEOUT = 15.0
|
|
28
|
+
_RESEARCH_TIMEOUT = 40.0 # Perplexity synthesizes; it is slower than a link API
|
|
29
|
+
_UA = "DeepParallel/0.1"
|
|
30
|
+
# DuckDuckGo's HTML endpoint rejects non-browser agents, so the fallback needs
|
|
31
|
+
# a realistic UA. Kept separate from _UA (used for plain fetches).
|
|
32
|
+
_BROWSER_UA = (
|
|
33
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 "
|
|
34
|
+
"(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
|
35
|
+
)
|
|
36
|
+
_DDG_LINK = re.compile(r'<a[^>]+class="result__a"[^>]+href="([^"]+)"[^>]*>(.*?)</a>', re.I | re.S)
|
|
37
|
+
_DDG_SNIP = re.compile(r'<a[^>]+class="result__snippet"[^>]*>(.*?)</a>', re.I | re.S)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@tool(dangerous=False)
|
|
41
|
+
def web_fetch(url: str, max_chars: int = 8000) -> str:
|
|
42
|
+
"""Fetch a web page and return its readable text (HTML stripped).
|
|
43
|
+
|
|
44
|
+
:param url: The URL to fetch.
|
|
45
|
+
:param max_chars: Maximum characters of text to return.
|
|
46
|
+
"""
|
|
47
|
+
try:
|
|
48
|
+
r = httpx.get(url, timeout=_TIMEOUT, follow_redirects=True, headers={"user-agent": _UA})
|
|
49
|
+
r.raise_for_status()
|
|
50
|
+
except Exception as e: # noqa: BLE001 - surface fetch failure to the model
|
|
51
|
+
return json.dumps({"error": f"fetch failed: {type(e).__name__}: {e}"})
|
|
52
|
+
html = r.text or ""
|
|
53
|
+
title_m = _TITLE.search(html)
|
|
54
|
+
title = _WS.sub(" ", _TAG.sub("", title_m.group(1))).strip() if title_m else ""
|
|
55
|
+
text = _WS.sub(" ", _TAG.sub(" ", _SCRIPT_STYLE.sub(" ", html))).strip()
|
|
56
|
+
return json.dumps({"url": url, "title": title, "text": text[:max_chars]})
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _strip_html(fragment: str) -> str:
|
|
60
|
+
return _WS.sub(" ", _TAG.sub("", fragment)).strip()
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _ddg_decode(href: str) -> str:
|
|
64
|
+
"""DuckDuckGo wraps result links as //duckduckgo.com/l/?uddg=<encoded-url>.
|
|
65
|
+
Pull the real destination back out; pass through anything already direct."""
|
|
66
|
+
if "uddg=" in href:
|
|
67
|
+
query = urlparse(href if href.startswith("http") else "https:" + href).query
|
|
68
|
+
target = parse_qs(query).get("uddg")
|
|
69
|
+
if target:
|
|
70
|
+
return target[0]
|
|
71
|
+
return href
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _perplexity_search(query: str, count: int, key: str):
|
|
75
|
+
"""(ok, payload | error-string) from Perplexity Sonar: a synthesized,
|
|
76
|
+
citation-backed answer plus its source URLs."""
|
|
77
|
+
try:
|
|
78
|
+
r = httpx.post(
|
|
79
|
+
"https://api.perplexity.ai/chat/completions",
|
|
80
|
+
headers={"authorization": f"Bearer {key}", "content-type": "application/json"},
|
|
81
|
+
json={
|
|
82
|
+
"model": os.environ.get("PERPLEXITY_MODEL", "sonar"),
|
|
83
|
+
"messages": [{"role": "user", "content": query}],
|
|
84
|
+
},
|
|
85
|
+
timeout=_RESEARCH_TIMEOUT,
|
|
86
|
+
)
|
|
87
|
+
if r.status_code >= 400:
|
|
88
|
+
return False, f"HTTP {r.status_code}: {(r.text or '')[:200]}"
|
|
89
|
+
data = r.json()
|
|
90
|
+
except Exception as e: # noqa: BLE001 - surface to caller for fallback
|
|
91
|
+
return False, f"{type(e).__name__}: {e}"
|
|
92
|
+
answer = (data.get("choices") or [{}])[0].get("message", {}).get("content", "")
|
|
93
|
+
sources = data.get("search_results") or []
|
|
94
|
+
if sources:
|
|
95
|
+
results = [
|
|
96
|
+
{"title": s.get("title", ""), "url": s.get("url", ""), "snippet": s.get("date", "")}
|
|
97
|
+
for s in sources[:count]
|
|
98
|
+
]
|
|
99
|
+
else:
|
|
100
|
+
results = [{"title": "", "url": u, "snippet": ""} for u in (data.get("citations") or [])[:count]]
|
|
101
|
+
return True, {"provider": "perplexity", "answer": answer, "results": results}
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _brave_search(query: str, count: int, key: str):
|
|
105
|
+
"""(ok, results | error-string) from the Brave Search API."""
|
|
106
|
+
url = os.environ.get(
|
|
107
|
+
"DEEPPARALLEL_SEARCH_URL", "https://api.search.brave.com/res/v1/web/search"
|
|
108
|
+
)
|
|
109
|
+
try:
|
|
110
|
+
r = httpx.get(
|
|
111
|
+
url,
|
|
112
|
+
params={"q": query, "count": count},
|
|
113
|
+
headers={"X-Subscription-Token": key, "accept": "application/json"},
|
|
114
|
+
timeout=_TIMEOUT,
|
|
115
|
+
)
|
|
116
|
+
if r.status_code >= 400:
|
|
117
|
+
return False, f"HTTP {r.status_code}: {(r.text or '')[:200]}"
|
|
118
|
+
data = r.json()
|
|
119
|
+
except Exception as e: # noqa: BLE001 - surface to caller for fallback
|
|
120
|
+
return False, f"{type(e).__name__}: {e}"
|
|
121
|
+
results = [
|
|
122
|
+
{"title": it.get("title", ""), "url": it.get("url", ""), "snippet": it.get("description", "")}
|
|
123
|
+
for it in (data.get("web", {}).get("results") or [])[:count]
|
|
124
|
+
]
|
|
125
|
+
return True, results
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _duckduckgo_search(query: str, count: int):
|
|
129
|
+
"""(ok, results | error-string) by scraping DuckDuckGo's keyless HTML endpoint."""
|
|
130
|
+
try:
|
|
131
|
+
r = httpx.post(
|
|
132
|
+
"https://html.duckduckgo.com/html/",
|
|
133
|
+
data={"q": query},
|
|
134
|
+
headers={"user-agent": _BROWSER_UA},
|
|
135
|
+
timeout=_TIMEOUT,
|
|
136
|
+
follow_redirects=True,
|
|
137
|
+
)
|
|
138
|
+
if r.status_code >= 400:
|
|
139
|
+
return False, f"HTTP {r.status_code}"
|
|
140
|
+
html = r.text or ""
|
|
141
|
+
except Exception as e: # noqa: BLE001 - surface to caller
|
|
142
|
+
return False, f"{type(e).__name__}: {e}"
|
|
143
|
+
links = _DDG_LINK.findall(html)
|
|
144
|
+
snippets = _DDG_SNIP.findall(html)
|
|
145
|
+
results = []
|
|
146
|
+
for i, (href, title) in enumerate(links[:count]):
|
|
147
|
+
results.append(
|
|
148
|
+
{
|
|
149
|
+
"title": _strip_html(title),
|
|
150
|
+
"url": _ddg_decode(href),
|
|
151
|
+
"snippet": _strip_html(snippets[i]) if i < len(snippets) else "",
|
|
152
|
+
}
|
|
153
|
+
)
|
|
154
|
+
if not results:
|
|
155
|
+
return False, "no results parsed"
|
|
156
|
+
return True, results
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
@tool(dangerous=False)
|
|
160
|
+
def web_search(query: str, count: int = 5) -> str:
|
|
161
|
+
"""Search the web and return result titles, URLs, and snippets.
|
|
162
|
+
|
|
163
|
+
Works with no API key (DuckDuckGo fallback). Providers are tried best-first:
|
|
164
|
+
Perplexity Sonar (PERPLEXITY_API_KEY) for cited, synthesized answers, then
|
|
165
|
+
Brave (DEEPPARALLEL_SEARCH_API_KEY), then keyless DuckDuckGo.
|
|
166
|
+
|
|
167
|
+
:param query: The search query.
|
|
168
|
+
:param count: Maximum number of results.
|
|
169
|
+
"""
|
|
170
|
+
errors = []
|
|
171
|
+
pplx = (os.environ.get("PERPLEXITY_API_KEY") or "").strip()
|
|
172
|
+
if pplx:
|
|
173
|
+
ok, payload = _perplexity_search(query, count, pplx)
|
|
174
|
+
if ok:
|
|
175
|
+
return json.dumps(payload)
|
|
176
|
+
errors.append(f"perplexity: {payload}")
|
|
177
|
+
brave = (os.environ.get("DEEPPARALLEL_SEARCH_API_KEY") or "").strip()
|
|
178
|
+
if brave:
|
|
179
|
+
ok, payload = _brave_search(query, count, brave)
|
|
180
|
+
if ok:
|
|
181
|
+
return json.dumps({"provider": "brave", "results": payload})
|
|
182
|
+
errors.append(f"brave: {payload}")
|
|
183
|
+
ok, payload = _duckduckgo_search(query, count)
|
|
184
|
+
if ok:
|
|
185
|
+
return json.dumps({"provider": "duckduckgo", "results": payload})
|
|
186
|
+
errors.append(f"duckduckgo: {payload}")
|
|
187
|
+
return json.dumps({"error": "search failed: " + "; ".join(errors)})
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deepparallel
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.3
|
|
4
4
|
Summary: DeepParallel - a multi-model agentic coding CLI with cross-model Guardian review, served via Crowe Logic.
|
|
5
5
|
Author-email: Michael Crowe <michael@crowelogic.com>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "deepparallel"
|
|
7
|
-
version = "0.5.
|
|
7
|
+
version = "0.5.3"
|
|
8
8
|
description = "DeepParallel - a multi-model agentic coding CLI with cross-model Guardian review, served via Crowe Logic."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { text = "Apache-2.0" }
|
|
@@ -69,3 +69,21 @@ def test_check_pypi_existence(monkeypatch):
|
|
|
69
69
|
def test_audit_empty_when_no_deps():
|
|
70
70
|
out = sc.audit("x = 1\nprint(x)\n", "app.py")
|
|
71
71
|
assert out["findings"] == [] and out["hallucinated"] == []
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def test_audit_skips_workspace_local_modules(monkeypatch):
|
|
75
|
+
# 200 for requests, 404 for everything else (so a local module would be
|
|
76
|
+
# falsely flagged if it were checked).
|
|
77
|
+
def fake_get(url, **kw):
|
|
78
|
+
code = 200 if "/requests/" in url else 404
|
|
79
|
+
return httpx.Response(code, request=httpx.Request("GET", url))
|
|
80
|
+
|
|
81
|
+
monkeypatch.setattr(httpx, "get", fake_get)
|
|
82
|
+
code = "from compound_library import build\nimport receptor_analysis\nimport requests\n"
|
|
83
|
+
out = sc.audit(code, "scripts/workflow.py",
|
|
84
|
+
local_modules={"compound_library", "receptor_analysis"})
|
|
85
|
+
names = {f["name"] for f in out["findings"]}
|
|
86
|
+
assert "compound_library" not in names # local, never checked
|
|
87
|
+
assert "receptor_analysis" not in names
|
|
88
|
+
assert "requests" in names
|
|
89
|
+
assert out["hallucinated"] == [] # the false positive is gone
|
|
@@ -45,3 +45,38 @@ def test_call_tool_rejects_bad_arguments_json():
|
|
|
45
45
|
def test_stop_server_not_running():
|
|
46
46
|
out = json.loads(mcp_mod.mcp_stop_server("never-started"))
|
|
47
47
|
assert out["note"] == "never-started was not running"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def test_mcp_search_multiword_falls_back_to_keywords(monkeypatch):
|
|
51
|
+
# The registry matches substrings: a phrase returns nothing, but a single
|
|
52
|
+
# keyword finds a server. mcp_search should split and merge automatically.
|
|
53
|
+
calls = []
|
|
54
|
+
|
|
55
|
+
def fake_query(query, limit):
|
|
56
|
+
calls.append(query)
|
|
57
|
+
if " " in query:
|
|
58
|
+
return [] # phrase matches nothing, like the real registry
|
|
59
|
+
if query == "pubchem":
|
|
60
|
+
return [{"server": {"name": "io.github.cyanheads/pubchem-mcp-server",
|
|
61
|
+
"description": "Search compounds.", "packages": []}}]
|
|
62
|
+
return []
|
|
63
|
+
|
|
64
|
+
monkeypatch.setattr(mcp_mod, "_registry_query", fake_query)
|
|
65
|
+
out = json.loads(mcp_mod.mcp_search("pubchem chembl bioassay"))
|
|
66
|
+
assert out["count"] >= 1
|
|
67
|
+
assert any("pubchem" in s["name"] for s in out["servers"])
|
|
68
|
+
assert "pubchem chembl bioassay" in calls # tried the phrase first
|
|
69
|
+
assert "pubchem" in calls # then fell back to the keyword
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def test_mcp_search_single_word_skips_fallback(monkeypatch):
|
|
73
|
+
calls = []
|
|
74
|
+
|
|
75
|
+
def fake_query(query, limit):
|
|
76
|
+
calls.append(query)
|
|
77
|
+
return [{"server": {"name": "ai.waystation/postgres", "packages": []}}]
|
|
78
|
+
|
|
79
|
+
monkeypatch.setattr(mcp_mod, "_registry_query", fake_query)
|
|
80
|
+
out = json.loads(mcp_mod.mcp_search("postgres"))
|
|
81
|
+
assert out["count"] == 1
|
|
82
|
+
assert calls == ["postgres"] # primary hit, no fallback fan-out
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
import httpx
|
|
4
|
+
|
|
5
|
+
import deepparallel.tools.web as web_mod
|
|
6
|
+
from deepparallel.tools import get_registry
|
|
7
|
+
|
|
8
|
+
_HTML = """<html><head><title>Hello Page</title>
|
|
9
|
+
<style>.x{color:red}</style><script>var a=1;</script></head>
|
|
10
|
+
<body><h1>Welcome</h1><p>This is the <b>main</b> content.</p></body></html>"""
|
|
11
|
+
|
|
12
|
+
_DDG_HTML = """
|
|
13
|
+
<div class="result results_links">
|
|
14
|
+
<a class="result__a" href="//duckduckgo.com/l/?uddg=https%3A%2F%2Fpubmed.ncbi.nlm.nih.gov%2F123%2F&rut=x">Psilocybin PTSD trial</a>
|
|
15
|
+
<a class="result__snippet">A randomized controlled trial of psilocybin for PTSD.</a>
|
|
16
|
+
</div>
|
|
17
|
+
<div class="result results_links">
|
|
18
|
+
<a class="result__a" href="//duckduckgo.com/l/?uddg=https%3A%2F%2Fwww.thelancet.com%2Fa&rut=y">Lancet review</a>
|
|
19
|
+
<a class="result__snippet">Systematic review of psychedelics.</a>
|
|
20
|
+
</div>
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
_PPLX_JSON = {
|
|
24
|
+
"choices": [{"message": {"content": "Yes, psilocybin is studied for PTSD [1][2]."}}],
|
|
25
|
+
"search_results": [
|
|
26
|
+
{"title": "PMC article", "url": "https://pmc.ncbi.nlm.nih.gov/x", "date": "2025"},
|
|
27
|
+
{"title": "Lancet", "url": "https://thelancet.com/y", "date": "2025"},
|
|
28
|
+
],
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
_BRAVE_JSON = {
|
|
32
|
+
"web": {"results": [
|
|
33
|
+
{"title": "T1", "url": "https://a", "description": "d1"},
|
|
34
|
+
{"title": "T2", "url": "https://b", "description": "d2"},
|
|
35
|
+
]}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class _Resp:
|
|
40
|
+
def __init__(self, text="", payload=None, status=200):
|
|
41
|
+
self.text = text
|
|
42
|
+
self._payload = payload
|
|
43
|
+
self.status_code = status
|
|
44
|
+
|
|
45
|
+
def raise_for_status(self):
|
|
46
|
+
if self.status_code >= 400:
|
|
47
|
+
raise httpx.HTTPStatusError("err", request=None, response=self)
|
|
48
|
+
|
|
49
|
+
def json(self):
|
|
50
|
+
return self._payload
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_web_fetch_strips_html_to_text(monkeypatch):
|
|
54
|
+
monkeypatch.setattr(httpx, "get", lambda url, **kw: _Resp(text=_HTML))
|
|
55
|
+
out = json.loads(web_mod.web_fetch("https://example.com"))
|
|
56
|
+
assert "Welcome" in out["text"] and "main" in out["text"]
|
|
57
|
+
assert "var a=1" not in out["text"] and "color:red" not in out["text"]
|
|
58
|
+
assert "<" not in out["text"]
|
|
59
|
+
assert out["title"] == "Hello Page"
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def test_web_fetch_truncates(monkeypatch):
|
|
63
|
+
big = "<p>" + ("word " * 5000) + "</p>"
|
|
64
|
+
monkeypatch.setattr(httpx, "get", lambda url, **kw: _Resp(text=big))
|
|
65
|
+
out = json.loads(web_mod.web_fetch("https://example.com", max_chars=100))
|
|
66
|
+
assert len(out["text"]) <= 100
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def test_web_fetch_is_non_dangerous():
|
|
70
|
+
assert get_registry().get("web_fetch").dangerous is False
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def test_web_search_is_non_dangerous():
|
|
74
|
+
assert get_registry().get("web_search").dangerous is False
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def test_ddg_decode_extracts_real_url():
|
|
78
|
+
href = "//duckduckgo.com/l/?uddg=https%3A%2F%2Fpubmed.ncbi.nlm.nih.gov%2F999%2F&rut=z"
|
|
79
|
+
assert web_mod._ddg_decode(href) == "https://pubmed.ncbi.nlm.nih.gov/999/"
|
|
80
|
+
assert web_mod._ddg_decode("https://direct.example/x") == "https://direct.example/x"
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def test_web_search_perplexity_first_when_keyed(monkeypatch):
|
|
84
|
+
monkeypatch.setenv("PERPLEXITY_API_KEY", "pplx-test")
|
|
85
|
+
monkeypatch.delenv("DEEPPARALLEL_SEARCH_API_KEY", raising=False)
|
|
86
|
+
called = {"get": False}
|
|
87
|
+
monkeypatch.setattr(httpx, "post", lambda url, **kw: _Resp(payload=_PPLX_JSON))
|
|
88
|
+
monkeypatch.setattr(httpx, "get", lambda *a, **k: called.__setitem__("get", True) or _Resp())
|
|
89
|
+
out = json.loads(web_mod.web_search("psilocybin PTSD", count=2))
|
|
90
|
+
assert out["provider"] == "perplexity"
|
|
91
|
+
assert "psilocybin" in out["answer"].lower()
|
|
92
|
+
assert out["results"][0]["url"].startswith("https://")
|
|
93
|
+
assert called["get"] is False # never fell through to Brave
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def test_web_search_brave_when_only_brave_keyed(monkeypatch):
|
|
97
|
+
monkeypatch.delenv("PERPLEXITY_API_KEY", raising=False)
|
|
98
|
+
monkeypatch.setenv("DEEPPARALLEL_SEARCH_API_KEY", "k")
|
|
99
|
+
captured = {}
|
|
100
|
+
|
|
101
|
+
def fake_get(url, **kw):
|
|
102
|
+
captured["params"] = kw.get("params", {})
|
|
103
|
+
captured["headers"] = kw.get("headers", {})
|
|
104
|
+
return _Resp(payload=_BRAVE_JSON)
|
|
105
|
+
|
|
106
|
+
monkeypatch.setattr(httpx, "get", fake_get)
|
|
107
|
+
out = json.loads(web_mod.web_search("python testing"))
|
|
108
|
+
assert out["provider"] == "brave"
|
|
109
|
+
assert [r["title"] for r in out["results"]] == ["T1", "T2"]
|
|
110
|
+
assert captured["params"]["q"] == "python testing"
|
|
111
|
+
assert captured["headers"]["X-Subscription-Token"] == "k"
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def test_web_search_duckduckgo_without_any_key(monkeypatch):
|
|
115
|
+
monkeypatch.delenv("PERPLEXITY_API_KEY", raising=False)
|
|
116
|
+
monkeypatch.delenv("DEEPPARALLEL_SEARCH_API_KEY", raising=False)
|
|
117
|
+
monkeypatch.setattr(httpx, "post", lambda url, **kw: _Resp(text=_DDG_HTML))
|
|
118
|
+
out = json.loads(web_mod.web_search("psilocybin PTSD", count=2))
|
|
119
|
+
assert out["provider"] == "duckduckgo"
|
|
120
|
+
assert out["results"][0]["url"] == "https://pubmed.ncbi.nlm.nih.gov/123/"
|
|
121
|
+
assert "randomized" in out["results"][0]["snippet"].lower()
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def test_web_search_recovers_to_ddg_when_brave_fails(monkeypatch):
|
|
125
|
+
monkeypatch.delenv("PERPLEXITY_API_KEY", raising=False)
|
|
126
|
+
monkeypatch.setenv("DEEPPARALLEL_SEARCH_API_KEY", "k")
|
|
127
|
+
monkeypatch.setattr(httpx, "get", lambda url, **kw: _Resp(text="bad", status=422))
|
|
128
|
+
monkeypatch.setattr(httpx, "post", lambda url, **kw: _Resp(text=_DDG_HTML))
|
|
129
|
+
out = json.loads(web_mod.web_search("psilocybin"))
|
|
130
|
+
assert out["provider"] == "duckduckgo" # recovered after Brave 422
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def test_web_search_reports_all_errors_when_all_fail(monkeypatch):
|
|
134
|
+
monkeypatch.setenv("PERPLEXITY_API_KEY", "pplx-test")
|
|
135
|
+
monkeypatch.setenv("DEEPPARALLEL_SEARCH_API_KEY", "k")
|
|
136
|
+
monkeypatch.setattr(httpx, "post", lambda url, **kw: _Resp(text="down", status=500))
|
|
137
|
+
monkeypatch.setattr(httpx, "get", lambda url, **kw: _Resp(text="down", status=500))
|
|
138
|
+
out = json.loads(web_mod.web_search("x"))
|
|
139
|
+
assert "error" in out
|
|
140
|
+
assert "perplexity" in out["error"] and "duckduckgo" in out["error"]
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
You are DeepParallel, a precise coding assistant from Crowe Logic.
|
|
2
|
-
|
|
3
|
-
Voice: direct and concise. Lead with the answer, not a preamble — never open with "I'd be happy to", "Great question", or by restating the request. Give depth only when asked or when the problem genuinely needs it. Stop when the answer is complete; don't pad with summaries or follow-up offers unless they add real value.
|
|
4
|
-
|
|
5
|
-
Formatting: clean Markdown. Single blank lines between paragraphs — never double. Fenced code blocks with a language tag for code, inline backticks for identifiers and paths, tight lists (no blank line between items). Prefer a short prose answer over a bulleted list when a sentence will do.
|
|
6
|
-
|
|
7
|
-
You can use tools to read, search, analyze, edit, open, and run code. Use them when they help; do not call them speculatively. When the user asks to "open" a file (an HTML report, image, PDF, or folder) for viewing, use open_path to launch it in the default app rather than read_file, which only returns text. When asked to run something with different parameters, prefer non-destructive approaches (CLI arguments, environment variables, or a temporary copy) over editing the user's source files. Only edit a source file when changing it is the actual goal, and explain what you changed.
|
|
@@ -1,76 +0,0 @@
|
|
|
1
|
-
"""Web tools: fetch a page's text, and search (key-gated)."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import json
|
|
6
|
-
import os
|
|
7
|
-
import re
|
|
8
|
-
|
|
9
|
-
import httpx
|
|
10
|
-
|
|
11
|
-
from deepparallel.tools import tool
|
|
12
|
-
|
|
13
|
-
_SCRIPT_STYLE = re.compile(r"<(script|style)\b[^>]*>.*?</\1>", re.IGNORECASE | re.DOTALL)
|
|
14
|
-
_TAG = re.compile(r"<[^>]+>")
|
|
15
|
-
_TITLE = re.compile(r"<title[^>]*>(.*?)</title>", re.IGNORECASE | re.DOTALL)
|
|
16
|
-
_WS = re.compile(r"\s+")
|
|
17
|
-
_TIMEOUT = 15.0
|
|
18
|
-
_UA = "DeepParallel/0.1"
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
@tool(dangerous=False)
|
|
22
|
-
def web_fetch(url: str, max_chars: int = 8000) -> str:
|
|
23
|
-
"""Fetch a web page and return its readable text (HTML stripped).
|
|
24
|
-
|
|
25
|
-
:param url: The URL to fetch.
|
|
26
|
-
:param max_chars: Maximum characters of text to return.
|
|
27
|
-
"""
|
|
28
|
-
try:
|
|
29
|
-
r = httpx.get(url, timeout=_TIMEOUT, follow_redirects=True, headers={"user-agent": _UA})
|
|
30
|
-
r.raise_for_status()
|
|
31
|
-
except Exception as e: # noqa: BLE001 - surface fetch failure to the model
|
|
32
|
-
return json.dumps({"error": f"fetch failed: {type(e).__name__}: {e}"})
|
|
33
|
-
html = r.text or ""
|
|
34
|
-
title_m = _TITLE.search(html)
|
|
35
|
-
title = _WS.sub(" ", _TAG.sub("", title_m.group(1))).strip() if title_m else ""
|
|
36
|
-
text = _WS.sub(" ", _TAG.sub(" ", _SCRIPT_STYLE.sub(" ", html))).strip()
|
|
37
|
-
return json.dumps({"url": url, "title": title, "text": text[:max_chars]})
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
@tool(dangerous=False)
|
|
41
|
-
def web_search(query: str, count: int = 5) -> str:
|
|
42
|
-
"""Search the web and return result titles, URLs, and snippets.
|
|
43
|
-
|
|
44
|
-
Requires DEEPPARALLEL_SEARCH_API_KEY (Brave Search API by default).
|
|
45
|
-
|
|
46
|
-
:param query: The search query.
|
|
47
|
-
:param count: Maximum number of results.
|
|
48
|
-
"""
|
|
49
|
-
key = os.environ.get("DEEPPARALLEL_SEARCH_API_KEY")
|
|
50
|
-
if not key:
|
|
51
|
-
return json.dumps(
|
|
52
|
-
{"error": "search not configured: set DEEPPARALLEL_SEARCH_API_KEY (Brave Search API)"}
|
|
53
|
-
)
|
|
54
|
-
url = os.environ.get(
|
|
55
|
-
"DEEPPARALLEL_SEARCH_URL", "https://api.search.brave.com/res/v1/web/search"
|
|
56
|
-
)
|
|
57
|
-
try:
|
|
58
|
-
r = httpx.get(
|
|
59
|
-
url,
|
|
60
|
-
params={"q": query, "count": count},
|
|
61
|
-
headers={"X-Subscription-Token": key, "accept": "application/json"},
|
|
62
|
-
timeout=_TIMEOUT,
|
|
63
|
-
)
|
|
64
|
-
r.raise_for_status()
|
|
65
|
-
data = r.json()
|
|
66
|
-
except Exception as e: # noqa: BLE001 - surface search failure to the model
|
|
67
|
-
return json.dumps({"error": f"search failed: {type(e).__name__}: {e}"})
|
|
68
|
-
results = [
|
|
69
|
-
{
|
|
70
|
-
"title": item.get("title", ""),
|
|
71
|
-
"url": item.get("url", ""),
|
|
72
|
-
"snippet": item.get("description", ""),
|
|
73
|
-
}
|
|
74
|
-
for item in (data.get("web", {}).get("results") or [])[:count]
|
|
75
|
-
]
|
|
76
|
-
return json.dumps({"results": results})
|
|
@@ -1,82 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
|
|
3
|
-
import httpx
|
|
4
|
-
|
|
5
|
-
import deepparallel.tools.web as web_mod
|
|
6
|
-
from deepparallel.tools import get_registry
|
|
7
|
-
|
|
8
|
-
_HTML = """<html><head><title>Hello Page</title>
|
|
9
|
-
<style>.x{color:red}</style><script>var a=1;</script></head>
|
|
10
|
-
<body><h1>Welcome</h1><p>This is the <b>main</b> content.</p></body></html>"""
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class _Resp:
|
|
14
|
-
def __init__(self, text="", payload=None, status=200):
|
|
15
|
-
self.text = text
|
|
16
|
-
self._payload = payload
|
|
17
|
-
self.status_code = status
|
|
18
|
-
|
|
19
|
-
def raise_for_status(self):
|
|
20
|
-
if self.status_code >= 400:
|
|
21
|
-
raise httpx.HTTPStatusError("err", request=None, response=self)
|
|
22
|
-
|
|
23
|
-
def json(self):
|
|
24
|
-
return self._payload
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def test_web_fetch_strips_html_to_text(monkeypatch):
|
|
28
|
-
monkeypatch.setattr(httpx, "get", lambda url, **kw: _Resp(text=_HTML))
|
|
29
|
-
out = json.loads(web_mod.web_fetch("https://example.com"))
|
|
30
|
-
assert "Welcome" in out["text"]
|
|
31
|
-
assert "main" in out["text"]
|
|
32
|
-
assert "var a=1" not in out["text"] # script stripped
|
|
33
|
-
assert "color:red" not in out["text"] # style stripped
|
|
34
|
-
assert "<" not in out["text"] # tags stripped
|
|
35
|
-
assert out["title"] == "Hello Page"
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def test_web_fetch_truncates(monkeypatch):
|
|
39
|
-
big = "<p>" + ("word " * 5000) + "</p>"
|
|
40
|
-
monkeypatch.setattr(httpx, "get", lambda url, **kw: _Resp(text=big))
|
|
41
|
-
out = json.loads(web_mod.web_fetch("https://example.com", max_chars=100))
|
|
42
|
-
assert len(out["text"]) <= 100
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
def test_web_fetch_is_non_dangerous():
|
|
46
|
-
assert get_registry().get("web_fetch").dangerous is False
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def test_web_search_unconfigured_returns_error(monkeypatch):
|
|
50
|
-
monkeypatch.delenv("DEEPPARALLEL_SEARCH_API_KEY", raising=False)
|
|
51
|
-
out = json.loads(web_mod.web_search("anything"))
|
|
52
|
-
assert "error" in out
|
|
53
|
-
assert "DEEPPARALLEL_SEARCH_API_KEY" in out["error"]
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
def test_web_search_parses_results(monkeypatch):
|
|
57
|
-
monkeypatch.setenv("DEEPPARALLEL_SEARCH_API_KEY", "k")
|
|
58
|
-
payload = {
|
|
59
|
-
"web": {
|
|
60
|
-
"results": [
|
|
61
|
-
{"title": "T1", "url": "https://a", "description": "d1"},
|
|
62
|
-
{"title": "T2", "url": "https://b", "description": "d2"},
|
|
63
|
-
]
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
captured = {}
|
|
67
|
-
|
|
68
|
-
def fake_get(url, **kw):
|
|
69
|
-
captured["url"] = url
|
|
70
|
-
captured["headers"] = kw.get("headers", {})
|
|
71
|
-
captured["params"] = kw.get("params", {})
|
|
72
|
-
return _Resp(payload=payload)
|
|
73
|
-
|
|
74
|
-
monkeypatch.setattr(httpx, "get", fake_get)
|
|
75
|
-
out = json.loads(web_mod.web_search("python testing"))
|
|
76
|
-
assert [r["title"] for r in out["results"]] == ["T1", "T2"]
|
|
77
|
-
assert captured["params"]["q"] == "python testing"
|
|
78
|
-
assert captured["headers"]["X-Subscription-Token"] == "k"
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
def test_web_search_is_non_dangerous():
|
|
82
|
-
assert get_registry().get("web_search").dangerous is False
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|