ultimate-pi 0.18.1 → 0.19.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-debate-plan/SKILL.md +1 -1
- package/.agents/skills/harness-decisions/SKILL.md +1 -2
- package/.agents/skills/harness-governor/SKILL.md +6 -5
- package/.agents/skills/web-retrieval/SKILL.md +163 -0
- package/.agents/skills/wiki-autoresearch/SKILL.md +6 -6
- package/.pi/PACKAGING.md +4 -4
- package/.pi/SYSTEM.md +75 -123
- package/.pi/agents/harness/incident-recorder.md +0 -1
- package/.pi/agents/harness/planning/decompose.md +0 -2
- package/.pi/agents/harness/planning/execution-plan-author.md +0 -2
- package/.pi/agents/harness/planning/hypothesis-validator.md +0 -2
- package/.pi/agents/harness/planning/hypothesis.md +0 -2
- package/.pi/agents/harness/planning/implementation-researcher.md +1 -3
- package/.pi/agents/harness/planning/plan-adversary.md +0 -2
- package/.pi/agents/harness/planning/plan-evaluator.md +1 -3
- package/.pi/agents/harness/planning/planning-context.md +0 -2
- package/.pi/agents/harness/planning/review-integrator.md +0 -2
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +0 -2
- package/.pi/agents/harness/planning/stack-researcher.md +5 -3
- package/.pi/agents/harness/reviewing/adversary.md +0 -2
- package/.pi/agents/harness/reviewing/evaluator.md +0 -2
- package/.pi/agents/harness/reviewing/tie-breaker.md +0 -2
- package/.pi/agents/harness/running/executor.md +0 -2
- package/.pi/agents/harness/sentrux-bootstrap.md +0 -1
- package/.pi/agents/harness/sentrux-steward.md +0 -2
- package/.pi/agents/harness/trace-librarian.md +0 -1
- package/.pi/agents/harness/web-retrieval/web-answerer.md +35 -0
- package/.pi/agents/harness/web-retrieval/web-criteria-verifier.md +28 -0
- package/.pi/agents/harness/web-retrieval/web-gap-analyzer.md +31 -0
- package/.pi/agents/harness/web-retrieval/web-query-expander-fast.md +34 -0
- package/.pi/agents/harness/web-retrieval/web-query-expander.md +60 -0
- package/.pi/agents/harness/web-retrieval/web-summarizer.md +18 -0
- package/.pi/extensions/agt-kill-switch.ts +57 -0
- package/.pi/extensions/agt-prompt-guard.ts +32 -0
- package/.pi/extensions/custom-footer.ts +46 -145
- package/.pi/extensions/custom-header.ts +1 -1
- package/.pi/extensions/custom-system-prompt.ts +1 -1
- package/.pi/extensions/debate-orchestrator.ts +6 -6
- package/.pi/extensions/harness-ask-user.ts +7 -7
- package/.pi/extensions/harness-debate-tools.ts +26 -42
- package/.pi/extensions/harness-lens.ts +94 -0
- package/.pi/extensions/harness-plan-approval.ts +11 -11
- package/.pi/extensions/harness-run-context.ts +1070 -876
- package/.pi/extensions/harness-subagent-governance.ts +8 -0
- package/.pi/extensions/harness-subagent-submit.ts +34 -163
- package/.pi/extensions/harness-subagents.ts +3 -3
- package/.pi/extensions/harness-telemetry.ts +2 -2
- package/.pi/extensions/harness-web-guard.ts +2 -1
- package/.pi/extensions/harness-web-tools.ts +691 -53
- package/.pi/extensions/policy-gate.ts +25 -5
- package/.pi/extensions/sentrux-rules-sync.ts +1 -1
- package/.pi/extensions/subagent-governance.ts +92 -0
- package/.pi/extensions/trace-recorder.ts +1 -1
- package/.pi/extensions/{ultimate-pi-vcc.ts → vcc-compaction.ts} +1 -1
- package/.pi/harness/README.md +6 -2
- package/.pi/harness/agents.manifest.json +46 -25
- package/.pi/harness/agents.policy.yaml +309 -0
- package/.pi/harness/docs/adrs/0030-inhouse-vcc-compaction.md +1 -1
- package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +1 -1
- package/.pi/harness/docs/adrs/0045-harness-lens-minimal-contract.md +49 -0
- package/.pi/harness/docs/adrs/0046-agt-policy-engine.md +51 -0
- package/.pi/harness/docs/adrs/0047-agt-layered-security.md +39 -0
- package/.pi/harness/docs/adrs/0048-tool-call-hook-order.md +25 -0
- package/.pi/harness/docs/adrs/0049-agents-policy-manifest.md +36 -0
- package/.pi/harness/docs/adrs/0050-agentic-web-retrieval-stack.md +46 -0
- package/.pi/harness/docs/adrs/README.md +5 -0
- package/.pi/harness/docs/harness-web-search.md +97 -0
- package/.pi/harness/env.harness.template +9 -1
- package/.pi/harness/evolution/README.md +1 -2
- package/.pi/harness/examples/agents.policy.project.yaml +19 -0
- package/.pi/harness/examples/policies/custom-deny-bash.yaml +9 -0
- package/.pi/harness/examples/web-heuristic-angles.project.yaml +22 -0
- package/.pi/harness/policies/bash-denylists.yaml +5 -0
- package/.pi/harness/policies/defaults.yaml +51 -0
- package/.pi/harness/policies/orchestrator.yaml +18 -0
- package/.pi/harness/policies/phases.yaml +10 -0
- package/.pi/harness/policies/roles.yaml +5 -0
- package/.pi/harness/policies/web-guard.yaml +5 -0
- package/.pi/harness/policies/workflow-sequences.yaml +9 -0
- package/.pi/harness/sentrux/architecture.manifest.json +26 -4
- package/.pi/harness/specs/observation.schema.json +2 -1
- package/.pi/harness/web-heuristic-angles.json +278 -0
- package/.pi/harness/web-heuristic-angles.yaml +182 -0
- package/.pi/lib/agents-policy.d.mts +70 -0
- package/.pi/lib/agents-policy.mjs +331 -0
- package/.pi/lib/agents-policy.ts +19 -0
- package/.pi/lib/agt/audit-run-sink.ts +52 -0
- package/.pi/lib/agt/build-evaluation-context.ts +285 -0
- package/.pi/lib/agt/config.ts +28 -0
- package/.pi/lib/agt/delegation.ts +69 -0
- package/.pi/lib/agt/evaluate-policy.ts +56 -0
- package/.pi/lib/agt/identity-registry.ts +41 -0
- package/.pi/lib/agt/index.ts +55 -0
- package/.pi/lib/agt/kill-switch-state.ts +11 -0
- package/.pi/lib/agt/legacy-evaluate.ts +101 -0
- package/.pi/lib/agt/policy-engine.ts +154 -0
- package/.pi/lib/agt/rings.ts +21 -0
- package/.pi/lib/agt/sre-hooks.ts +45 -0
- package/.pi/lib/agt/trust-run-store.ts +26 -0
- package/.pi/lib/agt/workflow-history.ts +29 -0
- package/.pi/lib/agt-governance-active.ts +14 -0
- package/.pi/lib/agt-tool-guard.ts +78 -0
- package/.pi/lib/ask-user/dialog.ts +314 -0
- package/.pi/{extensions/lib → lib}/debate-bus-core.ts +10 -10
- package/.pi/{extensions/lib → lib}/debate-bus-state.ts +1 -1
- package/.pi/{extensions/lib → lib}/extension-load-guard.ts +13 -2
- package/.pi/lib/harness-agt-tool-guard.ts +5 -0
- package/.pi/{extensions/lib → lib}/harness-artifact-gate.ts +1 -1
- package/.pi/lib/harness-debate-core-deps.ts +14 -0
- package/.pi/lib/harness-debate-workflow-deps.ts +43 -0
- package/.pi/lib/harness-lens/.gitattributes +1 -0
- package/.pi/lib/harness-lens/clients/edit-autopatch.ts +88 -0
- package/.pi/lib/harness-lens/clients/file-kinds.ts +380 -0
- package/.pi/lib/harness-lens/clients/file-time.ts +215 -0
- package/.pi/lib/harness-lens/clients/file-utils.ts +484 -0
- package/.pi/lib/harness-lens/clients/format-service.ts +276 -0
- package/.pi/lib/harness-lens/clients/formatters.ts +1000 -0
- package/.pi/lib/harness-lens/clients/git-guard.ts +31 -0
- package/.pi/lib/harness-lens/clients/indent-retarget.ts +90 -0
- package/.pi/lib/harness-lens/clients/installer/index.ts +2368 -0
- package/.pi/lib/harness-lens/clients/latency-logger.ts +80 -0
- package/.pi/lib/harness-lens/clients/lens-config.ts +43 -0
- package/.pi/lib/harness-lens/clients/lens-events.ts +164 -0
- package/.pi/lib/harness-lens/clients/lsp/aggregation.ts +91 -0
- package/.pi/lib/harness-lens/clients/lsp/client.ts +1466 -0
- package/.pi/lib/harness-lens/clients/lsp/config.ts +216 -0
- package/.pi/lib/harness-lens/clients/lsp/edits.ts +297 -0
- package/.pi/lib/harness-lens/clients/lsp/index.ts +1355 -0
- package/.pi/lib/harness-lens/clients/lsp/interactive-install.ts +424 -0
- package/.pi/lib/harness-lens/clients/lsp/language.ts +223 -0
- package/.pi/lib/harness-lens/clients/lsp/launch.ts +939 -0
- package/.pi/lib/harness-lens/clients/lsp/lsp-index.ts +11 -0
- package/.pi/lib/harness-lens/clients/lsp/path-utils.ts +12 -0
- package/.pi/lib/harness-lens/clients/lsp/server-strategies.ts +81 -0
- package/.pi/lib/harness-lens/clients/lsp/server.ts +1971 -0
- package/.pi/lib/harness-lens/clients/path-utils.ts +182 -0
- package/.pi/lib/harness-lens/clients/pipeline.ts +360 -0
- package/.pi/lib/harness-lens/clients/project-profile.ts +117 -0
- package/.pi/lib/harness-lens/clients/runtime-agent-end.ts +112 -0
- package/.pi/lib/harness-lens/clients/runtime-config.ts +33 -0
- package/.pi/lib/harness-lens/clients/runtime-coordinator.ts +186 -0
- package/.pi/lib/harness-lens/clients/runtime-tool-result.ts +171 -0
- package/.pi/lib/harness-lens/clients/safe-spawn.ts +339 -0
- package/.pi/lib/harness-lens/clients/secrets-scanner.ts +214 -0
- package/.pi/lib/harness-lens/clients/tool-policy.ts +2072 -0
- package/.pi/lib/harness-lens/clients/types.ts +59 -0
- package/.pi/lib/harness-lens/clients/widget-state.ts +283 -0
- package/.pi/lib/harness-lens/index.ts +532 -0
- package/.pi/lib/harness-lens/tools/lsp-diagnostics.ts +706 -0
- package/.pi/lib/harness-lens/tools/lsp-navigation.ts +1246 -0
- package/.pi/{extensions/lib → lib}/harness-posthog.ts +3 -0
- package/.pi/lib/harness-run-context-responses.ts +9 -0
- package/.pi/lib/harness-run-context.ts +0 -2
- package/.pi/{extensions/lib/spawn-policy.ts → lib/harness-spawn-policy.ts} +1 -0
- package/.pi/{extensions/lib → lib}/harness-spawn-topology.ts +1 -1
- package/.pi/lib/harness-subagent-auth.ts +81 -0
- package/.pi/{extensions/lib → lib}/harness-subagent-precheck.ts +10 -7
- package/.pi/{extensions/lib → lib}/harness-subagent-submit-pipeline.ts +3 -3
- package/.pi/lib/harness-subagent-submit-register.ts +163 -0
- package/.pi/{extensions/lib → lib}/harness-subagent-submit-registry.ts +1 -37
- package/.pi/{extensions/lib → lib}/harness-subagents-bridge.ts +74 -14
- package/.pi/{extensions/lib → lib}/harness-subprocess-bootstrap.ts +1 -1
- package/.pi/lib/harness-web/artifacts.ts +200 -0
- package/.pi/lib/harness-web/cache.ts +369 -0
- package/.pi/{extensions/lib → lib}/harness-web/run-cli.ts +42 -2
- package/.pi/{extensions/lib → lib}/plan-approval/create-plan.ts +2 -2
- package/.pi/{extensions/lib → lib}/plan-approval/format-plan.ts +2 -2
- package/.pi/{extensions/lib → lib}/plan-approval/plan-review.ts +162 -201
- package/.pi/{extensions/lib → lib}/plan-approval/render.ts +1 -1
- package/.pi/{extensions/lib → lib}/plan-approval/resolve-disk.ts +2 -2
- package/.pi/{extensions/lib → lib}/plan-approval/types.ts +1 -1
- package/.pi/{extensions/lib → lib}/plan-approval/validate.ts +3 -3
- package/.pi/{extensions/lib → lib}/plan-debate-envelope.ts +1 -1
- package/.pi/{extensions/lib → lib}/plan-debate-gate.ts +1 -1
- package/.pi/{extensions/lib → lib}/plan-debate-lane.ts +1 -4
- package/.pi/{extensions/lib → lib}/plan-messenger.ts +1 -1
- package/.pi/prompts/harness-plan.md +2 -1
- package/.pi/prompts/harness-setup.md +40 -65
- package/.pi/scripts/README.md +2 -5
- package/.pi/scripts/gen-web-heuristic-angles-json.mjs +24 -0
- package/.pi/scripts/generate-agents-policy-yaml.mjs +148 -0
- package/.pi/scripts/harness-agents-manifest.mjs +60 -3
- package/.pi/scripts/harness-agt-doctor.ts +36 -0
- package/.pi/scripts/harness-cli-verify.sh +14 -2
- package/.pi/scripts/harness-verify.mjs +191 -39
- package/.pi/scripts/harness-web-policy-guard.mjs +3 -3
- package/.pi/scripts/harness-web.py +218 -15
- package/.pi/scripts/harness_web/deep_search.py +55 -0
- package/.pi/scripts/harness_web/evidence_bundle.py +47 -0
- package/.pi/scripts/harness_web/find_similar.py +88 -0
- package/.pi/scripts/harness_web/heuristic_angles_shipped.py +85 -0
- package/.pi/scripts/harness_web/heuristic_config.py +251 -0
- package/.pi/scripts/harness_web/highlights.py +47 -0
- package/.pi/scripts/harness_web/multi_search.py +59 -0
- package/.pi/scripts/harness_web/output.py +24 -0
- package/.pi/scripts/harness_web/query_angles.py +116 -0
- package/.pi/scripts/harness_web/rank.py +163 -0
- package/.pi/scripts/harness_web/scrape.py +30 -0
- package/.pi/scripts/tests/test_harness_web_heuristic_config.py +132 -0
- package/.pi/scripts/tests/test_harness_web_query_angles.py +45 -0
- package/.pi/scripts/tests/test_harness_web_rank.py +56 -0
- package/.pi/scripts/validate-plan-dag.mjs +65 -74
- package/.pi/scripts/vendor-pi-vcc-settings.stub.ts +2 -2
- package/.pi/scripts/vendor-sync-pi-vcc.sh +1 -1
- package/.pi/skills/architecture/broker-domain/SKILL.md +65 -0
- package/.pi/skills/architecture/cqrs/SKILL.md +63 -0
- package/.pi/skills/architecture/event-driven/SKILL.md +60 -0
- package/.pi/skills/architecture/hexagonal-ports-adapters/SKILL.md +66 -0
- package/.pi/skills/architecture/layered/SKILL.md +68 -0
- package/.pi/skills/architecture/microkernel/SKILL.md +62 -0
- package/.pi/skills/architecture/microservices/SKILL.md +64 -0
- package/.pi/skills/architecture/modular-monolith/SKILL.md +65 -0
- package/.pi/skills/architecture/orchestration-driven-soa/SKILL.md +61 -0
- package/.pi/skills/architecture/pipeline/SKILL.md +63 -0
- package/.pi/skills/architecture/service-based/SKILL.md +64 -0
- package/.pi/skills/architecture/service-mesh/SKILL.md +60 -0
- package/.pi/skills/architecture/space-based/SKILL.md +60 -0
- package/.pi/skills/ast-grep/SKILL.md +40 -321
- package/.pi/skills/delivery/debugging-discipline/SKILL.md +36 -0
- package/.pi/skills/delivery/documentation-update/SKILL.md +33 -0
- package/.pi/skills/delivery/requirements-to-implementation/SKILL.md +34 -0
- package/.pi/skills/delivery/risk-based-verification/SKILL.md +43 -0
- package/.pi/skills/delivery/tradeoff-analysis/SKILL.md +34 -0
- package/.pi/skills/engineering/api-contract-design/SKILL.md +38 -0
- package/.pi/skills/engineering/cohesion-coupling/SKILL.md +43 -0
- package/.pi/skills/engineering/complexity-control/SKILL.md +31 -0
- package/.pi/skills/engineering/defensive-programming/SKILL.md +38 -0
- package/.pi/skills/engineering/dependency-management/SKILL.md +29 -0
- package/.pi/skills/engineering/domain-modeling/SKILL.md +32 -0
- package/.pi/skills/engineering/error-handling/SKILL.md +37 -0
- package/.pi/skills/engineering/legacy-code-seams/SKILL.md +35 -0
- package/.pi/skills/engineering/naming-and-intent/SKILL.md +29 -0
- package/.pi/skills/engineering/refactoring-safe-evolution/SKILL.md +35 -0
- package/.pi/skills/engineering/routine-function-design/SKILL.md +34 -0
- package/.pi/skills/engineering/small-change-discipline/SKILL.md +35 -0
- package/.pi/skills/lsp-navigation/SKILL.md +89 -0
- package/.pi/skills/quality/code-review-self-check/SKILL.md +35 -0
- package/.pi/skills/quality/privacy-data-handling/SKILL.md +26 -0
- package/.pi/skills/quality/security-review/SKILL.md +34 -0
- package/.pi/skills/quality/test-strategy/SKILL.md +33 -0
- package/.pi/skills/quality/testability-design/SKILL.md +33 -0
- package/.pi/skills/systems/concurrency-safety/SKILL.md +32 -0
- package/.pi/skills/systems/data-modeling-migrations/SKILL.md +31 -0
- package/.pi/skills/systems/observability-instrumentation/SKILL.md +32 -0
- package/.pi/skills/systems/performance-measurement/SKILL.md +35 -0
- package/.pi/skills/systems/reliability-design/SKILL.md +32 -0
- package/.sentrux/rules.toml +20 -4
- package/AGENTS.md +7 -2
- package/CHANGELOG.md +20 -0
- package/README.md +3 -12
- package/THIRD_PARTY_NOTICES.md +12 -21
- package/package.json +17 -7
- package/vendor/pi-subagents/src/agents.ts +45 -1
- package/vendor/pi-subagents/src/subagents.ts +866 -811
- package/vendor/pi-vcc/src/core/brief.ts +68 -99
- package/vendor/pi-vcc/src/core/settings.ts +2 -2
- package/.agents/skills/caveman/SKILL.md +0 -67
- package/.agents/skills/scrapling-web/SKILL.md +0 -98
- package/.pi/agents/harness/meta-optimizer.md +0 -36
- package/.pi/extensions/00-posthog-network-bootstrap.ts +0 -11
- package/.pi/extensions/lib/ask-user/dialog.ts +0 -260
- package/.pi/extensions/lib/harness-subagent-auth.ts +0 -207
- package/.pi/extensions/lib/harness-subagent-policy.ts +0 -236
- package/.pi/extensions/pi-model-router-harness.ts +0 -42
- package/.pi/harness/evolution/meta-optimizer.mjs +0 -99
- package/.pi/harness/specs/router-tuning-proposal.schema.json +0 -114
- package/.pi/model-router.example.json +0 -36
- package/.pi/prompts/harness-critic.md +0 -10
- package/.pi/prompts/harness-eval.md +0 -10
- package/.pi/prompts/harness-router-tune.md +0 -52
- package/.pi/scripts/harness-generate-model-router.mjs +0 -327
- package/.pi/scripts/harness-model-router-routing.test.mjs +0 -97
- package/.pi/scripts/harness-sync-model-router.mjs +0 -97
- package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0
- package/.pi/scripts/vendor-sync-pi-model-router.sh +0 -47
- package/vendor/pi-model-router/.prettierignore +0 -4
- package/vendor/pi-model-router/.prettierrc +0 -5
- package/vendor/pi-model-router/AGENTS.md +0 -39
- package/vendor/pi-model-router/LICENSE +0 -21
- package/vendor/pi-model-router/README.md +0 -99
- package/vendor/pi-model-router/UPSTREAM_PIN.md +0 -10
- package/vendor/pi-model-router/docs/ARCHITECTURE.md +0 -54
- package/vendor/pi-model-router/extensions/commands.ts +0 -720
- package/vendor/pi-model-router/extensions/config.ts +0 -348
- package/vendor/pi-model-router/extensions/constants.ts +0 -1
- package/vendor/pi-model-router/extensions/index.ts +0 -478
- package/vendor/pi-model-router/extensions/provider.ts +0 -580
- package/vendor/pi-model-router/extensions/routing.ts +0 -564
- package/vendor/pi-model-router/extensions/state.ts +0 -52
- package/vendor/pi-model-router/extensions/types.ts +0 -95
- package/vendor/pi-model-router/extensions/ui.ts +0 -144
- package/vendor/pi-model-router/model-router.example.json +0 -48
- package/vendor/pi-model-router/package.json +0 -48
- package/vendor/pi-model-router/tsconfig.json +0 -16
- /package/.pi/{prompts → harness/docs}/planning-rubrics.md +0 -0
- /package/.pi/{extensions/lib → lib}/ask-user/fallback.ts +0 -0
- /package/.pi/{extensions/lib → lib}/ask-user/render.ts +0 -0
- /package/.pi/{extensions/lib → lib}/ask-user/schema.ts +0 -0
- /package/.pi/{extensions/lib → lib}/ask-user/types.ts +0 -0
- /package/.pi/{extensions/lib → lib}/ask-user/validate-core.mjs +0 -0
- /package/.pi/{extensions/lib → lib}/ask-user/validate.ts +0 -0
- /package/.pi/{extensions/lib → lib}/harness-cocoindex-refresh.ts +0 -0
- /package/.pi/{extensions/lib → lib}/harness-paths.ts +0 -0
- /package/.pi/{extensions/lib → lib}/harness-spawn-budget.ts +0 -0
- /package/.pi/{extensions/lib → lib}/harness-vcc-settings.ts +0 -0
- /package/.pi/{extensions/lib → lib}/plan-approval/dialog.ts +0 -0
- /package/.pi/{extensions/lib → lib}/plan-approval/schema.ts +0 -0
- /package/.pi/{extensions/lib → lib}/plan-approval-readiness.ts +0 -0
- /package/.pi/{extensions/lib → lib}/plan-debate-eligibility.ts +0 -0
- /package/.pi/{extensions/lib → lib}/plan-debate-focus.ts +0 -0
- /package/.pi/{extensions/lib → lib}/plan-debate-id.ts +0 -0
- /package/.pi/{extensions/lib → lib}/plan-debate-lanes.ts +0 -0
- /package/.pi/{extensions/lib → lib}/plan-debate-round-status.ts +0 -0
- /package/.pi/{extensions/lib → lib}/plan-debate-write-guard.ts +0 -0
- /package/.pi/{extensions/lib → lib}/plan-review-gate.ts +0 -0
- /package/.pi/{extensions/lib → lib}/plan-review-integrator-rules.ts +0 -0
- /package/.pi/{extensions/lib → lib}/plan-scope-guard.ts +0 -0
- /package/.pi/{extensions/lib → lib}/posthog-client.ts +0 -0
- /package/.pi/{extensions/lib → lib}/posthog-node.d.ts +0 -0
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Build evidence-bundle.json from search-deep + optional highlight fetches."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def build_evidence_bundle(
|
|
11
|
+
search_deep_path: Path,
|
|
12
|
+
*,
|
|
13
|
+
highlight_files: dict[str, Path] | None = None,
|
|
14
|
+
query: str = "",
|
|
15
|
+
) -> dict[str, Any]:
|
|
16
|
+
data = json.loads(search_deep_path.read_text(encoding="utf-8"))
|
|
17
|
+
intent = data.get("query") or query
|
|
18
|
+
hits = data.get("data", {}).get("web", [])
|
|
19
|
+
sources: list[dict[str, Any]] = []
|
|
20
|
+
for hit in hits:
|
|
21
|
+
url = hit.get("url", "")
|
|
22
|
+
entry: dict[str, Any] = {
|
|
23
|
+
"url": url,
|
|
24
|
+
"title": hit.get("title", ""),
|
|
25
|
+
"description": hit.get("description", ""),
|
|
26
|
+
"score": hit.get("score"),
|
|
27
|
+
"angle_ids": hit.get("angle_ids", []),
|
|
28
|
+
}
|
|
29
|
+
if highlight_files and url in highlight_files:
|
|
30
|
+
hp = highlight_files[url]
|
|
31
|
+
if hp.exists():
|
|
32
|
+
try:
|
|
33
|
+
entry["highlights"] = json.loads(hp.read_text(encoding="utf-8"))
|
|
34
|
+
except json.JSONDecodeError:
|
|
35
|
+
pass
|
|
36
|
+
sources.append(entry)
|
|
37
|
+
return {
|
|
38
|
+
"intent": intent,
|
|
39
|
+
"mode": data.get("mode", "deep"),
|
|
40
|
+
"engine": data.get("engine", ""),
|
|
41
|
+
"sources": sources,
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def write_evidence_bundle(path: Path, payload: dict[str, Any]) -> None:
|
|
46
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
47
|
+
path.write_text(json.dumps(payload, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""Seed-URL discovery (Exa findSimilar analog)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from .config import HarnessWebConfig
|
|
9
|
+
from .deep_search import run_deep_search
|
|
10
|
+
from .query_angles import AnglesPlan, SearchAngle
|
|
11
|
+
from .rank import RankedHit, fuse_angle_results, normalize_url, tokenize
|
|
12
|
+
from .scrape import fetch_page
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _extract_seed_phrases(url: str, *, config: HarnessWebConfig, fast: bool) -> list[str]:
|
|
16
|
+
page = fetch_page(url, config=config, fast=fast, wait_ms=None)
|
|
17
|
+
title = ""
|
|
18
|
+
if hasattr(page, "css"):
|
|
19
|
+
for sel in ("title", "h1"):
|
|
20
|
+
nodes = page.css(sel)
|
|
21
|
+
if nodes:
|
|
22
|
+
title = (nodes[0].get_all_text(strip=True) or "").strip()
|
|
23
|
+
if title:
|
|
24
|
+
break
|
|
25
|
+
if not title and hasattr(page, "get_all_text"):
|
|
26
|
+
title = (page.get_all_text(strip=True) or "")[:200].strip()
|
|
27
|
+
title = re.sub(r"\s+", " ", title).strip()
|
|
28
|
+
phrases: list[str] = []
|
|
29
|
+
if title:
|
|
30
|
+
phrases.append(title[:120])
|
|
31
|
+
# Key tokens from title
|
|
32
|
+
tokens = sorted(tokenize(title), key=len, reverse=True)[:6]
|
|
33
|
+
if tokens:
|
|
34
|
+
phrases.append(" ".join(tokens[:5]))
|
|
35
|
+
phrases.append(f"similar to {title[:80]}" if title else f"related pages {url}")
|
|
36
|
+
return [p for p in phrases if p.strip()][:3]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def run_find_similar(
|
|
40
|
+
seed_url: str,
|
|
41
|
+
*,
|
|
42
|
+
config: HarnessWebConfig,
|
|
43
|
+
final_limit: int = 10,
|
|
44
|
+
per_angle_limit: int = 6,
|
|
45
|
+
fast_fetch: bool = True,
|
|
46
|
+
) -> tuple[AnglesPlan, list[dict]]:
|
|
47
|
+
phrases = _extract_seed_phrases(seed_url, config=config, fast=fast_fetch)
|
|
48
|
+
angles = tuple(
|
|
49
|
+
SearchAngle(f"similar_{i + 1}", q, f"Derived from seed {seed_url}")
|
|
50
|
+
for i, q in enumerate(phrases)
|
|
51
|
+
)
|
|
52
|
+
plan = AnglesPlan(intent=f"pages similar to {seed_url}", angles=angles)
|
|
53
|
+
from .multi_search import multi_search
|
|
54
|
+
|
|
55
|
+
per_angle = multi_search(plan, per_angle_limit=per_angle_limit, config=config)
|
|
56
|
+
clean: dict[str, list[dict[str, str]]] = {}
|
|
57
|
+
for aid, rows in per_angle.items():
|
|
58
|
+
clean[aid] = [
|
|
59
|
+
{
|
|
60
|
+
"url": r.get("url", ""),
|
|
61
|
+
"title": r.get("title", ""),
|
|
62
|
+
"description": r.get("description", ""),
|
|
63
|
+
}
|
|
64
|
+
for r in rows
|
|
65
|
+
]
|
|
66
|
+
ranked = fuse_angle_results(clean, final_limit=final_limit * 2, intent=plan.intent)
|
|
67
|
+
|
|
68
|
+
# Boost overlap with seed text
|
|
69
|
+
seed_norm = normalize_url(seed_url)
|
|
70
|
+
seed_tokens = tokenize(" ".join(phrases))
|
|
71
|
+
rescored: list[RankedHit] = []
|
|
72
|
+
for h in ranked:
|
|
73
|
+
if normalize_url(h.url) == seed_norm:
|
|
74
|
+
continue
|
|
75
|
+
blob = f"{h.title} {h.description}".lower()
|
|
76
|
+
overlap = len(seed_tokens & tokenize(blob)) / max(len(seed_tokens), 1)
|
|
77
|
+
rescored.append(
|
|
78
|
+
RankedHit(
|
|
79
|
+
url=h.url,
|
|
80
|
+
title=h.title,
|
|
81
|
+
description=h.description,
|
|
82
|
+
score=h.score + 0.2 * overlap,
|
|
83
|
+
angle_ids=h.angle_ids,
|
|
84
|
+
ranks=h.ranks,
|
|
85
|
+
)
|
|
86
|
+
)
|
|
87
|
+
rescored.sort(key=lambda x: -x.score)
|
|
88
|
+
return plan, [h.to_web_dict() for h in rescored[:final_limit]]
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Shipped WRS heuristic angles (stdlib-only). Keep in sync with web-heuristic-angles.yaml."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
# fmt: off
|
|
8
|
+
SHIPPED_HEURISTIC_ANGLES: dict[str, Any] = {
|
|
9
|
+
"version": 1,
|
|
10
|
+
"max_angles": 8,
|
|
11
|
+
"base": [
|
|
12
|
+
{"id": "definitional", "query": "{query}", "rationale": "Core intent phrasing"},
|
|
13
|
+
{
|
|
14
|
+
"id": "authoritative",
|
|
15
|
+
"query": "{query} official documentation OR specification OR RFC",
|
|
16
|
+
"rationale": "Primary specs and vendor docs",
|
|
17
|
+
},
|
|
18
|
+
],
|
|
19
|
+
"categories": {
|
|
20
|
+
"code": [
|
|
21
|
+
{"id": "github", "query": "{query} site:github.com", "rationale": "Source, issues, discussions"},
|
|
22
|
+
{"id": "stackoverflow", "query": "{query} site:stackoverflow.com", "rationale": "Debugging and API usage Q&A"},
|
|
23
|
+
{"id": "stackexchange", "query": "{query} site:stackexchange.com", "rationale": "Broader SE network (Super User, Server Fault, etc.)"},
|
|
24
|
+
{"id": "readthedocs", "query": "{query} site:readthedocs.io", "rationale": "OSS library documentation"},
|
|
25
|
+
{"id": "mdn", "query": "{query} site:developer.mozilla.org", "rationale": "Web platform and browser APIs"},
|
|
26
|
+
{"id": "package_registries", "query": "{query} site:npmjs.com OR site:pypi.org OR site:pkg.go.dev OR site:crates.io", "rationale": "Package metadata across major ecosystems"},
|
|
27
|
+
{"id": "microsoft_learn", "query": "{query} site:learn.microsoft.com", "rationale": ".NET, Azure, Windows, and enterprise stacks"},
|
|
28
|
+
{"id": "hacker_news", "query": "{query} site:news.ycombinator.com", "rationale": "High-signal practitioner discussion"},
|
|
29
|
+
{"id": "gitlab", "query": "{query} site:gitlab.com", "rationale": "Alternate host and CI-visible code"},
|
|
30
|
+
{"id": "devto", "query": "{query} site:dev.to OR site:medium.com", "rationale": "Tutorials and implementation writeups"},
|
|
31
|
+
],
|
|
32
|
+
"paper": [
|
|
33
|
+
{"id": "arxiv", "query": "{query} site:arxiv.org", "rationale": "Preprints and latest ML/CS uploads"},
|
|
34
|
+
{"id": "semantic_scholar", "query": "{query} site:semanticscholar.org", "rationale": "Citations, influences, and PDF links"},
|
|
35
|
+
{"id": "google_scholar", "query": "{query} site:scholar.google.com", "rationale": "Broad academic discovery"},
|
|
36
|
+
{"id": "papers_with_code", "query": "{query} site:paperswithcode.com", "rationale": "Benchmarks tied to implementations"},
|
|
37
|
+
{"id": "openreview", "query": "{query} site:openreview.net", "rationale": "Peer reviews and ML conference submissions"},
|
|
38
|
+
{"id": "acl_anthology", "query": "{query} site:aclanthology.org", "rationale": "NLP and computational linguistics"},
|
|
39
|
+
{"id": "acm_dl", "query": "{query} site:dl.acm.org", "rationale": "ACM proceedings and journals"},
|
|
40
|
+
{"id": "pubmed", "query": "{query} site:pubmed.ncbi.nlm.nih.gov", "rationale": "Biomedical and life-sciences literature"},
|
|
41
|
+
],
|
|
42
|
+
"news": [
|
|
43
|
+
{"id": "recent", "query": "{query} news 2025 2026", "rationale": "Recency-biased open web"},
|
|
44
|
+
{"id": "wire_reuters", "query": "{query} site:reuters.com", "rationale": "Wire-service reporting"},
|
|
45
|
+
{"id": "wire_ap", "query": "{query} site:apnews.com", "rationale": "Associated Press coverage"},
|
|
46
|
+
{"id": "tech_press", "query": "{query} site:techcrunch.com OR site:theverge.com OR site:arstechnica.com", "rationale": "Technology industry news"},
|
|
47
|
+
{"id": "business_press", "query": "{query} site:bloomberg.com OR site:ft.com OR site:wsj.com", "rationale": "Markets and business context"},
|
|
48
|
+
{"id": "analysis", "query": "{query} in-depth analysis explainer", "rationale": "Long-form journalism and explainers"},
|
|
49
|
+
{"id": "bbc", "query": "{query} site:bbc.com/news", "rationale": "International general news desk"},
|
|
50
|
+
],
|
|
51
|
+
"company": [
|
|
52
|
+
{"id": "official_site", "query": "{query} official website", "rationale": "Company-controlled messaging"},
|
|
53
|
+
{"id": "crunchbase", "query": "{query} site:crunchbase.com", "rationale": "Funding, investors, and competitors"},
|
|
54
|
+
{"id": "linkedin_company", "query": "{query} site:linkedin.com/company", "rationale": "Headcount, hiring, and positioning"},
|
|
55
|
+
{"id": "sec_filings", "query": "{query} site:sec.gov 10-K OR 10-Q OR S-1", "rationale": "US public-company disclosures"},
|
|
56
|
+
{"id": "g2_reviews", "query": "{query} site:g2.com OR site:capterra.com", "rationale": "B2B software reviews and comparisons"},
|
|
57
|
+
{"id": "company_news", "query": "{query} company announcement press release", "rationale": "Launches, partnerships, and earnings"},
|
|
58
|
+
{"id": "glassdoor", "query": "{query} site:glassdoor.com", "rationale": "Employee sentiment and culture signals"},
|
|
59
|
+
],
|
|
60
|
+
"people": [
|
|
61
|
+
{"id": "linkedin", "query": "{query} site:linkedin.com/in", "rationale": "Professional profiles"},
|
|
62
|
+
{"id": "github_person", "query": "{query} site:github.com", "rationale": "Open-source footprint for builders"},
|
|
63
|
+
{"id": "wikipedia", "query": "{query} site:en.wikipedia.org", "rationale": "Neutral biographical baseline"},
|
|
64
|
+
{"id": "scholar_person", "query": "{query} site:scholar.google.com", "rationale": "Publication record for researchers"},
|
|
65
|
+
{"id": "interviews", "query": "{query} interview podcast keynote", "rationale": "First-person statements and talks"},
|
|
66
|
+
{"id": "twitter_x", "query": "{query} site:x.com OR site:twitter.com", "rationale": "Public statements and discourse"},
|
|
67
|
+
],
|
|
68
|
+
"security": [
|
|
69
|
+
{"id": "cve_nvd", "query": "{query} CVE site:nvd.nist.gov", "rationale": "National Vulnerability Database"},
|
|
70
|
+
{"id": "owasp", "query": "{query} site:owasp.org", "rationale": "AppSec standards and cheat sheets"},
|
|
71
|
+
{"id": "cwe", "query": "{query} site:cwe.mitre.org", "rationale": "Weakness taxonomy"},
|
|
72
|
+
{"id": "github_advisories", "query": "{query} site:github.com/advisories OR dependabot", "rationale": "Ecosystem security advisories"},
|
|
73
|
+
{"id": "snyk_blog", "query": "{query} site:snyk.io/blog OR vulnerability", "rationale": "Practitioner security writeups"},
|
|
74
|
+
],
|
|
75
|
+
"default": [
|
|
76
|
+
{"id": "technical", "query": "{query} how it works architecture internals", "rationale": "Mechanism and design"},
|
|
77
|
+
{"id": "criticism", "query": "{query} limitations criticism drawbacks", "rationale": "Counterpoints and failure modes"},
|
|
78
|
+
{"id": "wikipedia", "query": "{query} site:en.wikipedia.org", "rationale": "Structured overview"},
|
|
79
|
+
{"id": "comparison", "query": "{query} vs alternatives comparison benchmark", "rationale": "Competitive landscape"},
|
|
80
|
+
{"id": "reddit", "query": "{query} site:reddit.com", "rationale": "Community experience reports"},
|
|
81
|
+
{"id": "hn_default", "query": "{query} site:news.ycombinator.com", "rationale": "Practitioner threads when category unknown"},
|
|
82
|
+
],
|
|
83
|
+
},
|
|
84
|
+
}
|
|
85
|
+
# fmt: on
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
"""Load and merge WRS heuristic angle templates from YAML."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from functools import lru_cache
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from .query_angles import SearchAngle
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
import yaml # type: ignore[import-untyped]
|
|
16
|
+
except ImportError:
|
|
17
|
+
yaml = None # type: ignore[assignment]
|
|
18
|
+
|
|
19
|
+
CONFIG_BASENAME = "web-heuristic-angles.yaml"
|
|
20
|
+
CONFIG_JSON_BASENAME = "web-heuristic-angles.json"
|
|
21
|
+
ENV_CONFIG_FILE = "HARNESS_WEB_HEURISTIC_ANGLES_FILE"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(frozen=True)
|
|
25
|
+
class HeuristicAnglesConfig:
|
|
26
|
+
max_angles: int
|
|
27
|
+
base: tuple[SearchAngle, ...]
|
|
28
|
+
categories: dict[str, tuple[SearchAngle, ...]]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _project_root() -> Path:
|
|
32
|
+
env = os.environ.get("HARNESS_PROJECT_ROOT", "").strip()
|
|
33
|
+
if env:
|
|
34
|
+
return Path(env).resolve()
|
|
35
|
+
return Path.cwd().resolve()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _package_root() -> Path | None:
|
|
39
|
+
env = os.environ.get("HARNESS_PKG_ROOT", "").strip()
|
|
40
|
+
if env:
|
|
41
|
+
return Path(env).resolve()
|
|
42
|
+
here = Path(__file__).resolve()
|
|
43
|
+
for anc in here.parents:
|
|
44
|
+
if (anc / ".pi" / "harness" / CONFIG_BASENAME).is_file():
|
|
45
|
+
return anc
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def discover_heuristic_config_paths() -> list[Path]:
|
|
50
|
+
"""Package defaults first, then project override, then env file last (wins on id)."""
|
|
51
|
+
paths: list[Path] = []
|
|
52
|
+
pkg = _package_root()
|
|
53
|
+
if pkg is not None:
|
|
54
|
+
pkg_yaml = pkg / ".pi" / "harness" / CONFIG_BASENAME
|
|
55
|
+
pkg_json = pkg / ".pi" / "harness" / CONFIG_JSON_BASENAME
|
|
56
|
+
if pkg_yaml.is_file():
|
|
57
|
+
paths.append(pkg_yaml)
|
|
58
|
+
if pkg_json.is_file() and pkg_json not in paths:
|
|
59
|
+
paths.append(pkg_json)
|
|
60
|
+
proj_file = _project_root() / ".pi" / "harness" / CONFIG_BASENAME
|
|
61
|
+
if proj_file.is_file() and proj_file not in paths:
|
|
62
|
+
paths.append(proj_file)
|
|
63
|
+
env_path = os.environ.get(ENV_CONFIG_FILE, "").strip()
|
|
64
|
+
if env_path:
|
|
65
|
+
p = Path(env_path).expanduser().resolve()
|
|
66
|
+
if p.is_file() and p not in paths:
|
|
67
|
+
paths.append(p)
|
|
68
|
+
return paths
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _format_query(template: str, query: str) -> str:
|
|
72
|
+
return template.replace("{query}", query.strip())
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _parse_angle_list(raw: Any, *, source: str) -> list[SearchAngle]:
|
|
76
|
+
if raw is None:
|
|
77
|
+
return []
|
|
78
|
+
if not isinstance(raw, list):
|
|
79
|
+
raise ValueError(f"{source}: expected list of angle objects")
|
|
80
|
+
out: list[SearchAngle] = []
|
|
81
|
+
for i, item in enumerate(raw):
|
|
82
|
+
if not isinstance(item, dict):
|
|
83
|
+
raise ValueError(f"{source}[{i}]: expected object")
|
|
84
|
+
aid = str(item.get("id") or item.get("name") or f"angle_{i + 1}").strip()
|
|
85
|
+
qtpl = str(item.get("query") or "").strip()
|
|
86
|
+
if not aid or not qtpl:
|
|
87
|
+
raise ValueError(f"{source}[{i}]: id and query required")
|
|
88
|
+
rationale = str(item.get("rationale") or item.get("reason") or "").strip()
|
|
89
|
+
out.append(
|
|
90
|
+
SearchAngle(
|
|
91
|
+
id=aid,
|
|
92
|
+
query=qtpl,
|
|
93
|
+
rationale=rationale,
|
|
94
|
+
)
|
|
95
|
+
)
|
|
96
|
+
return out
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _merge_config_dict(accum: dict[str, Any], layer: dict[str, Any]) -> dict[str, Any]:
|
|
100
|
+
out = dict(accum)
|
|
101
|
+
if "max_angles" in layer:
|
|
102
|
+
out["max_angles"] = layer["max_angles"]
|
|
103
|
+
if "version" in layer:
|
|
104
|
+
out["version"] = layer["version"]
|
|
105
|
+
base_acc = list(out.get("base") or [])
|
|
106
|
+
base_acc.extend(layer.get("base") or [])
|
|
107
|
+
out["base"] = base_acc
|
|
108
|
+
cats: dict[str, list[Any]] = dict(out.get("categories") or {})
|
|
109
|
+
layer_cats = layer.get("categories")
|
|
110
|
+
if isinstance(layer_cats, dict):
|
|
111
|
+
for key, angles in layer_cats.items():
|
|
112
|
+
cat = str(key).strip().lower()
|
|
113
|
+
if not cat:
|
|
114
|
+
continue
|
|
115
|
+
existing = list(cats.get(cat) or [])
|
|
116
|
+
if isinstance(angles, list):
|
|
117
|
+
existing.extend(angles)
|
|
118
|
+
cats[cat] = existing
|
|
119
|
+
out["categories"] = cats
|
|
120
|
+
return out
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _load_config_file(path: Path) -> dict[str, Any]:
|
|
124
|
+
text = path.read_text(encoding="utf-8")
|
|
125
|
+
if path.suffix.lower() == ".json":
|
|
126
|
+
data = json.loads(text)
|
|
127
|
+
elif yaml is not None:
|
|
128
|
+
data = yaml.safe_load(text)
|
|
129
|
+
else:
|
|
130
|
+
raise ValueError(f"PyYAML required to load {path} (or use .json)")
|
|
131
|
+
if not isinstance(data, dict):
|
|
132
|
+
raise ValueError(f"{path}: root must be a mapping")
|
|
133
|
+
return data
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _embedded_builtin_dict() -> dict[str, Any]:
|
|
137
|
+
"""Fallback when no config files load (stdlib shipped defaults)."""
|
|
138
|
+
pkg = _package_root()
|
|
139
|
+
if pkg is not None:
|
|
140
|
+
for name in (CONFIG_JSON_BASENAME, CONFIG_BASENAME):
|
|
141
|
+
pkg_file = pkg / ".pi" / "harness" / name
|
|
142
|
+
if not pkg_file.is_file():
|
|
143
|
+
continue
|
|
144
|
+
try:
|
|
145
|
+
return _load_config_file(pkg_file)
|
|
146
|
+
except (ValueError, json.JSONDecodeError, OSError):
|
|
147
|
+
continue
|
|
148
|
+
from .heuristic_angles_shipped import SHIPPED_HEURISTIC_ANGLES
|
|
149
|
+
|
|
150
|
+
return dict(SHIPPED_HEURISTIC_ANGLES)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def heuristic_config_from_merged(merged: dict[str, Any]) -> HeuristicAnglesConfig:
|
|
154
|
+
max_angles = int(merged.get("max_angles") or 5)
|
|
155
|
+
max_angles = max(2, min(max_angles, 8))
|
|
156
|
+
|
|
157
|
+
base_templates = _parse_angle_list(merged.get("base"), source="base")
|
|
158
|
+
raw_cats = merged.get("categories")
|
|
159
|
+
categories: dict[str, tuple[SearchAngle, ...]] = {}
|
|
160
|
+
if isinstance(raw_cats, dict):
|
|
161
|
+
for key, raw_list in raw_cats.items():
|
|
162
|
+
cat = str(key).strip().lower()
|
|
163
|
+
if not cat:
|
|
164
|
+
continue
|
|
165
|
+
categories[cat] = tuple(
|
|
166
|
+
_parse_angle_list(raw_list, source=f"categories.{cat}")
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
if "default" not in categories:
|
|
170
|
+
categories["default"] = (
|
|
171
|
+
SearchAngle("technical", "{query} how it works architecture", "Technical"),
|
|
172
|
+
SearchAngle("criticism", "{query} limitations criticism", "Counterpoints"),
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
return HeuristicAnglesConfig(
|
|
176
|
+
max_angles=max_angles,
|
|
177
|
+
base=tuple(base_templates),
|
|
178
|
+
categories=categories,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
@lru_cache(maxsize=8)
|
|
183
|
+
def load_heuristic_angles_config_cached(paths_key: tuple[str, ...]) -> HeuristicAnglesConfig:
|
|
184
|
+
paths = [Path(p) for p in paths_key] if paths_key else discover_heuristic_config_paths()
|
|
185
|
+
layers: list[dict[str, Any]] = []
|
|
186
|
+
for p in paths:
|
|
187
|
+
try:
|
|
188
|
+
layers.append(_load_config_file(p))
|
|
189
|
+
except (ValueError, json.JSONDecodeError, OSError):
|
|
190
|
+
continue
|
|
191
|
+
if layers:
|
|
192
|
+
merged: dict[str, Any] = {}
|
|
193
|
+
for layer in layers:
|
|
194
|
+
merged = _merge_config_dict(merged, layer)
|
|
195
|
+
else:
|
|
196
|
+
merged = _embedded_builtin_dict()
|
|
197
|
+
return heuristic_config_from_merged(merged)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def load_heuristic_angles_config() -> HeuristicAnglesConfig:
|
|
201
|
+
paths = discover_heuristic_config_paths()
|
|
202
|
+
return load_heuristic_angles_config_cached(tuple(str(p) for p in paths))
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def clear_heuristic_config_cache() -> None:
|
|
206
|
+
load_heuristic_angles_config_cached.cache_clear()
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def build_heuristic_angles(
|
|
210
|
+
query: str,
|
|
211
|
+
*,
|
|
212
|
+
category: str | None = None,
|
|
213
|
+
config: HeuristicAnglesConfig | None = None,
|
|
214
|
+
) -> tuple[SearchAngle, ...]:
|
|
215
|
+
cfg = config or load_heuristic_angles_config()
|
|
216
|
+
q = query.strip()
|
|
217
|
+
cat = (category or "").strip().lower()
|
|
218
|
+
|
|
219
|
+
angles: list[SearchAngle] = []
|
|
220
|
+
for tmpl in cfg.base:
|
|
221
|
+
angles.append(
|
|
222
|
+
SearchAngle(
|
|
223
|
+
id=tmpl.id,
|
|
224
|
+
query=_format_query(tmpl.query, q),
|
|
225
|
+
rationale=tmpl.rationale,
|
|
226
|
+
)
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
cat_angles = cfg.categories.get(cat) if cat else None
|
|
230
|
+
if not cat_angles:
|
|
231
|
+
cat_angles = cfg.categories.get("default", ())
|
|
232
|
+
|
|
233
|
+
for tmpl in cat_angles:
|
|
234
|
+
angles.append(
|
|
235
|
+
SearchAngle(
|
|
236
|
+
id=tmpl.id,
|
|
237
|
+
query=_format_query(tmpl.query, q),
|
|
238
|
+
rationale=tmpl.rationale,
|
|
239
|
+
)
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
# Stable dedupe by id (first wins — base before category)
|
|
243
|
+
seen: set[str] = set()
|
|
244
|
+
unique: list[SearchAngle] = []
|
|
245
|
+
for a in angles:
|
|
246
|
+
if a.id in seen:
|
|
247
|
+
continue
|
|
248
|
+
seen.add(a.id)
|
|
249
|
+
unique.append(a)
|
|
250
|
+
|
|
251
|
+
return tuple(unique[: cfg.max_angles])
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Query-aligned excerpt extraction from page markdown."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _tokenize(text: str) -> set[str]:
|
|
10
|
+
return {t for t in re.findall(r"[a-z0-9]{3,}", text.lower()) if len(t) >= 3}
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def extract_highlights(
|
|
14
|
+
markdown: str,
|
|
15
|
+
query: str,
|
|
16
|
+
*,
|
|
17
|
+
max_spans: int = 5,
|
|
18
|
+
max_chars_per_span: int = 400,
|
|
19
|
+
) -> list[dict[str, Any]]:
|
|
20
|
+
q_tokens = _tokenize(query)
|
|
21
|
+
if not q_tokens:
|
|
22
|
+
return []
|
|
23
|
+
|
|
24
|
+
paragraphs = [p.strip() for p in re.split(r"\n\s*\n", markdown) if p.strip()]
|
|
25
|
+
if not paragraphs:
|
|
26
|
+
paragraphs = [line.strip() for line in markdown.splitlines() if line.strip()]
|
|
27
|
+
|
|
28
|
+
scored: list[tuple[float, int, str]] = []
|
|
29
|
+
for idx, para in enumerate(paragraphs):
|
|
30
|
+
if len(para) < 40:
|
|
31
|
+
continue
|
|
32
|
+
tokens = _tokenize(para)
|
|
33
|
+
if not tokens:
|
|
34
|
+
continue
|
|
35
|
+
overlap = len(q_tokens & tokens) / max(len(q_tokens), 1)
|
|
36
|
+
scored.append((overlap, idx, para))
|
|
37
|
+
|
|
38
|
+
scored.sort(key=lambda x: (-x[0], x[1]))
|
|
39
|
+
out: list[dict[str, Any]] = []
|
|
40
|
+
for score, idx, para in scored[:max_spans]:
|
|
41
|
+
if score <= 0:
|
|
42
|
+
continue
|
|
43
|
+
text = para[:max_chars_per_span]
|
|
44
|
+
if len(para) > max_chars_per_span:
|
|
45
|
+
text += "…"
|
|
46
|
+
out.append({"score": round(score, 4), "paragraph_index": idx, "text": text})
|
|
47
|
+
return out
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""Parallel SERP queries per search angle."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import time
|
|
7
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
8
|
+
|
|
9
|
+
from .config import HarnessWebConfig
|
|
10
|
+
from .query_angles import AnglesPlan, SearchAngle
|
|
11
|
+
from .search import search
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _concurrency() -> int:
|
|
15
|
+
raw = os.environ.get("HARNESS_WEB_DEEP_CONCURRENCY", "4").strip()
|
|
16
|
+
try:
|
|
17
|
+
return max(1, min(8, int(raw)))
|
|
18
|
+
except ValueError:
|
|
19
|
+
return 4
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def multi_search(
|
|
23
|
+
plan: AnglesPlan,
|
|
24
|
+
*,
|
|
25
|
+
per_angle_limit: int,
|
|
26
|
+
config: HarnessWebConfig,
|
|
27
|
+
rate_limit_ms: int | None = None,
|
|
28
|
+
) -> dict[str, list[dict[str, str]]]:
|
|
29
|
+
"""Run search() for each angle; return angle_id -> hits."""
|
|
30
|
+
sleep_sec = (rate_limit_ms if rate_limit_ms is not None else config.rate_limit_ms) / 1000.0
|
|
31
|
+
results: dict[str, list[dict[str, str]]] = {}
|
|
32
|
+
angles = list(plan.angles)
|
|
33
|
+
|
|
34
|
+
def run_one(angle: SearchAngle) -> tuple[str, list[dict[str, str]]]:
|
|
35
|
+
hits = search(angle.query, limit=per_angle_limit, config=config)
|
|
36
|
+
tagged = []
|
|
37
|
+
for i, h in enumerate(hits):
|
|
38
|
+
row = dict(h)
|
|
39
|
+
row["_angle_id"] = angle.id
|
|
40
|
+
row["_angle_rank"] = str(i + 1)
|
|
41
|
+
tagged.append(row)
|
|
42
|
+
return angle.id, tagged
|
|
43
|
+
|
|
44
|
+
if len(angles) == 1:
|
|
45
|
+
aid, hits = run_one(angles[0])
|
|
46
|
+
results[aid] = hits
|
|
47
|
+
return results
|
|
48
|
+
|
|
49
|
+
with ThreadPoolExecutor(max_workers=min(_concurrency(), len(angles))) as pool:
|
|
50
|
+
futures = {pool.submit(run_one, a): a for a in angles}
|
|
51
|
+
done = 0
|
|
52
|
+
for fut in as_completed(futures):
|
|
53
|
+
aid, hits = fut.result()
|
|
54
|
+
results[aid] = hits
|
|
55
|
+
done += 1
|
|
56
|
+
if done < len(angles) and sleep_sec > 0:
|
|
57
|
+
time.sleep(sleep_sec)
|
|
58
|
+
|
|
59
|
+
return results
|
|
@@ -24,6 +24,7 @@ def write_search_results(
|
|
|
24
24
|
query: str,
|
|
25
25
|
*,
|
|
26
26
|
engine: str,
|
|
27
|
+
tier: str = "standard",
|
|
27
28
|
) -> None:
|
|
28
29
|
"""Firecrawl-compatible envelope: data.web[].url|title|description."""
|
|
29
30
|
write_json(
|
|
@@ -31,6 +32,7 @@ def write_search_results(
|
|
|
31
32
|
{
|
|
32
33
|
"query": query,
|
|
33
34
|
"engine": engine,
|
|
35
|
+
"tier": tier,
|
|
34
36
|
"data": {
|
|
35
37
|
"web": [
|
|
36
38
|
{
|
|
@@ -45,6 +47,28 @@ def write_search_results(
|
|
|
45
47
|
)
|
|
46
48
|
|
|
47
49
|
|
|
50
|
+
def write_deep_search_results(
|
|
51
|
+
path: Path,
|
|
52
|
+
*,
|
|
53
|
+
query: str,
|
|
54
|
+
engine: str,
|
|
55
|
+
tier: str,
|
|
56
|
+
plan_angles: list[dict],
|
|
57
|
+
ranked_web: list[dict],
|
|
58
|
+
) -> None:
|
|
59
|
+
write_json(
|
|
60
|
+
path,
|
|
61
|
+
{
|
|
62
|
+
"query": query,
|
|
63
|
+
"engine": engine,
|
|
64
|
+
"mode": tier,
|
|
65
|
+
"tier": tier,
|
|
66
|
+
"angles": plan_angles,
|
|
67
|
+
"data": {"web": ranked_web},
|
|
68
|
+
},
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
48
72
|
def write_page_markdown(path: Path, page: Any, *, main_content_only: bool = True) -> None:
|
|
49
73
|
ensure_parent(path)
|
|
50
74
|
try:
|