ultimate-pi 0.18.1 → 0.19.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-debate-plan/SKILL.md +1 -1
- package/.agents/skills/harness-decisions/SKILL.md +1 -2
- package/.agents/skills/harness-governor/SKILL.md +6 -5
- package/.agents/skills/web-retrieval/SKILL.md +163 -0
- package/.agents/skills/wiki-autoresearch/SKILL.md +6 -6
- package/.pi/PACKAGING.md +4 -4
- package/.pi/SYSTEM.md +75 -123
- package/.pi/agents/harness/incident-recorder.md +0 -1
- package/.pi/agents/harness/planning/decompose.md +0 -2
- package/.pi/agents/harness/planning/execution-plan-author.md +0 -2
- package/.pi/agents/harness/planning/hypothesis-validator.md +0 -2
- package/.pi/agents/harness/planning/hypothesis.md +0 -2
- package/.pi/agents/harness/planning/implementation-researcher.md +1 -3
- package/.pi/agents/harness/planning/plan-adversary.md +0 -2
- package/.pi/agents/harness/planning/plan-evaluator.md +1 -3
- package/.pi/agents/harness/planning/planning-context.md +0 -2
- package/.pi/agents/harness/planning/review-integrator.md +0 -2
- package/.pi/agents/harness/planning/sprint-contract-auditor.md +0 -2
- package/.pi/agents/harness/planning/stack-researcher.md +5 -3
- package/.pi/agents/harness/reviewing/adversary.md +0 -2
- package/.pi/agents/harness/reviewing/evaluator.md +0 -2
- package/.pi/agents/harness/reviewing/tie-breaker.md +0 -2
- package/.pi/agents/harness/running/executor.md +0 -2
- package/.pi/agents/harness/sentrux-bootstrap.md +0 -1
- package/.pi/agents/harness/sentrux-steward.md +0 -2
- package/.pi/agents/harness/trace-librarian.md +0 -1
- package/.pi/agents/harness/web-retrieval/web-answerer.md +35 -0
- package/.pi/agents/harness/web-retrieval/web-criteria-verifier.md +28 -0
- package/.pi/agents/harness/web-retrieval/web-gap-analyzer.md +31 -0
- package/.pi/agents/harness/web-retrieval/web-query-expander-fast.md +34 -0
- package/.pi/agents/harness/web-retrieval/web-query-expander.md +60 -0
- package/.pi/agents/harness/web-retrieval/web-summarizer.md +18 -0
- package/.pi/extensions/agt-kill-switch.ts +57 -0
- package/.pi/extensions/agt-prompt-guard.ts +32 -0
- package/.pi/extensions/custom-footer.ts +46 -145
- package/.pi/extensions/custom-header.ts +1 -1
- package/.pi/extensions/custom-system-prompt.ts +1 -1
- package/.pi/extensions/debate-orchestrator.ts +6 -6
- package/.pi/extensions/harness-ask-user.ts +7 -7
- package/.pi/extensions/harness-debate-tools.ts +26 -42
- package/.pi/extensions/harness-lens.ts +94 -0
- package/.pi/extensions/harness-plan-approval.ts +11 -11
- package/.pi/extensions/harness-run-context.ts +1070 -876
- package/.pi/extensions/harness-subagent-governance.ts +8 -0
- package/.pi/extensions/harness-subagent-submit.ts +34 -163
- package/.pi/extensions/harness-subagents.ts +3 -3
- package/.pi/extensions/harness-telemetry.ts +2 -2
- package/.pi/extensions/harness-web-guard.ts +2 -1
- package/.pi/extensions/harness-web-tools.ts +691 -53
- package/.pi/extensions/policy-gate.ts +25 -5
- package/.pi/extensions/sentrux-rules-sync.ts +1 -1
- package/.pi/extensions/subagent-governance.ts +92 -0
- package/.pi/extensions/trace-recorder.ts +1 -1
- package/.pi/extensions/{ultimate-pi-vcc.ts → vcc-compaction.ts} +1 -1
- package/.pi/harness/README.md +6 -2
- package/.pi/harness/agents.manifest.json +46 -25
- package/.pi/harness/agents.policy.yaml +309 -0
- package/.pi/harness/docs/adrs/0030-inhouse-vcc-compaction.md +1 -1
- package/.pi/harness/docs/adrs/0035-plan-phase-review-gate.md +1 -1
- package/.pi/harness/docs/adrs/0045-harness-lens-minimal-contract.md +49 -0
- package/.pi/harness/docs/adrs/0046-agt-policy-engine.md +51 -0
- package/.pi/harness/docs/adrs/0047-agt-layered-security.md +39 -0
- package/.pi/harness/docs/adrs/0048-tool-call-hook-order.md +25 -0
- package/.pi/harness/docs/adrs/0049-agents-policy-manifest.md +36 -0
- package/.pi/harness/docs/adrs/0050-agentic-web-retrieval-stack.md +46 -0
- package/.pi/harness/docs/adrs/README.md +5 -0
- package/.pi/harness/docs/harness-web-search.md +97 -0
- package/.pi/harness/env.harness.template +9 -1
- package/.pi/harness/evolution/README.md +1 -2
- package/.pi/harness/examples/agents.policy.project.yaml +19 -0
- package/.pi/harness/examples/policies/custom-deny-bash.yaml +9 -0
- package/.pi/harness/examples/web-heuristic-angles.project.yaml +22 -0
- package/.pi/harness/policies/bash-denylists.yaml +5 -0
- package/.pi/harness/policies/defaults.yaml +51 -0
- package/.pi/harness/policies/orchestrator.yaml +18 -0
- package/.pi/harness/policies/phases.yaml +10 -0
- package/.pi/harness/policies/roles.yaml +5 -0
- package/.pi/harness/policies/web-guard.yaml +5 -0
- package/.pi/harness/policies/workflow-sequences.yaml +9 -0
- package/.pi/harness/sentrux/architecture.manifest.json +26 -4
- package/.pi/harness/specs/observation.schema.json +2 -1
- package/.pi/harness/web-heuristic-angles.json +278 -0
- package/.pi/harness/web-heuristic-angles.yaml +182 -0
- package/.pi/lib/agents-policy.d.mts +70 -0
- package/.pi/lib/agents-policy.mjs +331 -0
- package/.pi/lib/agents-policy.ts +19 -0
- package/.pi/lib/agt/audit-run-sink.ts +52 -0
- package/.pi/lib/agt/build-evaluation-context.ts +285 -0
- package/.pi/lib/agt/config.ts +28 -0
- package/.pi/lib/agt/delegation.ts +69 -0
- package/.pi/lib/agt/evaluate-policy.ts +56 -0
- package/.pi/lib/agt/identity-registry.ts +41 -0
- package/.pi/lib/agt/index.ts +55 -0
- package/.pi/lib/agt/kill-switch-state.ts +11 -0
- package/.pi/lib/agt/legacy-evaluate.ts +101 -0
- package/.pi/lib/agt/policy-engine.ts +154 -0
- package/.pi/lib/agt/rings.ts +21 -0
- package/.pi/lib/agt/sre-hooks.ts +45 -0
- package/.pi/lib/agt/trust-run-store.ts +26 -0
- package/.pi/lib/agt/workflow-history.ts +29 -0
- package/.pi/lib/agt-governance-active.ts +14 -0
- package/.pi/lib/agt-tool-guard.ts +78 -0
- package/.pi/lib/ask-user/dialog.ts +314 -0
- package/.pi/{extensions/lib → lib}/debate-bus-core.ts +10 -10
- package/.pi/{extensions/lib → lib}/debate-bus-state.ts +1 -1
- package/.pi/{extensions/lib → lib}/extension-load-guard.ts +13 -2
- package/.pi/lib/harness-agt-tool-guard.ts +5 -0
- package/.pi/{extensions/lib → lib}/harness-artifact-gate.ts +1 -1
- package/.pi/lib/harness-debate-core-deps.ts +14 -0
- package/.pi/lib/harness-debate-workflow-deps.ts +43 -0
- package/.pi/lib/harness-lens/.gitattributes +1 -0
- package/.pi/lib/harness-lens/clients/edit-autopatch.ts +88 -0
- package/.pi/lib/harness-lens/clients/file-kinds.ts +380 -0
- package/.pi/lib/harness-lens/clients/file-time.ts +215 -0
- package/.pi/lib/harness-lens/clients/file-utils.ts +484 -0
- package/.pi/lib/harness-lens/clients/format-service.ts +276 -0
- package/.pi/lib/harness-lens/clients/formatters.ts +1000 -0
- package/.pi/lib/harness-lens/clients/git-guard.ts +31 -0
- package/.pi/lib/harness-lens/clients/indent-retarget.ts +90 -0
- package/.pi/lib/harness-lens/clients/installer/index.ts +2368 -0
- package/.pi/lib/harness-lens/clients/latency-logger.ts +80 -0
- package/.pi/lib/harness-lens/clients/lens-config.ts +43 -0
- package/.pi/lib/harness-lens/clients/lens-events.ts +164 -0
- package/.pi/lib/harness-lens/clients/lsp/aggregation.ts +91 -0
- package/.pi/lib/harness-lens/clients/lsp/client.ts +1466 -0
- package/.pi/lib/harness-lens/clients/lsp/config.ts +216 -0
- package/.pi/lib/harness-lens/clients/lsp/edits.ts +297 -0
- package/.pi/lib/harness-lens/clients/lsp/index.ts +1355 -0
- package/.pi/lib/harness-lens/clients/lsp/interactive-install.ts +424 -0
- package/.pi/lib/harness-lens/clients/lsp/language.ts +223 -0
- package/.pi/lib/harness-lens/clients/lsp/launch.ts +939 -0
- package/.pi/lib/harness-lens/clients/lsp/lsp-index.ts +11 -0
- package/.pi/lib/harness-lens/clients/lsp/path-utils.ts +12 -0
- package/.pi/lib/harness-lens/clients/lsp/server-strategies.ts +81 -0
- package/.pi/lib/harness-lens/clients/lsp/server.ts +1971 -0
- package/.pi/lib/harness-lens/clients/path-utils.ts +182 -0
- package/.pi/lib/harness-lens/clients/pipeline.ts +360 -0
- package/.pi/lib/harness-lens/clients/project-profile.ts +117 -0
- package/.pi/lib/harness-lens/clients/runtime-agent-end.ts +112 -0
- package/.pi/lib/harness-lens/clients/runtime-config.ts +33 -0
- package/.pi/lib/harness-lens/clients/runtime-coordinator.ts +186 -0
- package/.pi/lib/harness-lens/clients/runtime-tool-result.ts +171 -0
- package/.pi/lib/harness-lens/clients/safe-spawn.ts +339 -0
- package/.pi/lib/harness-lens/clients/secrets-scanner.ts +214 -0
- package/.pi/lib/harness-lens/clients/tool-policy.ts +2072 -0
- package/.pi/lib/harness-lens/clients/types.ts +59 -0
- package/.pi/lib/harness-lens/clients/widget-state.ts +283 -0
- package/.pi/lib/harness-lens/index.ts +532 -0
- package/.pi/lib/harness-lens/tools/lsp-diagnostics.ts +706 -0
- package/.pi/lib/harness-lens/tools/lsp-navigation.ts +1246 -0
- package/.pi/{extensions/lib → lib}/harness-posthog.ts +3 -0
- package/.pi/lib/harness-run-context-responses.ts +9 -0
- package/.pi/lib/harness-run-context.ts +0 -2
- package/.pi/{extensions/lib/spawn-policy.ts → lib/harness-spawn-policy.ts} +1 -0
- package/.pi/{extensions/lib → lib}/harness-spawn-topology.ts +1 -1
- package/.pi/lib/harness-subagent-auth.ts +81 -0
- package/.pi/{extensions/lib → lib}/harness-subagent-precheck.ts +10 -7
- package/.pi/{extensions/lib → lib}/harness-subagent-submit-pipeline.ts +3 -3
- package/.pi/lib/harness-subagent-submit-register.ts +163 -0
- package/.pi/{extensions/lib → lib}/harness-subagent-submit-registry.ts +1 -37
- package/.pi/{extensions/lib → lib}/harness-subagents-bridge.ts +74 -14
- package/.pi/{extensions/lib → lib}/harness-subprocess-bootstrap.ts +1 -1
- package/.pi/lib/harness-web/artifacts.ts +200 -0
- package/.pi/lib/harness-web/cache.ts +369 -0
- package/.pi/{extensions/lib → lib}/harness-web/run-cli.ts +42 -2
- package/.pi/{extensions/lib → lib}/plan-approval/create-plan.ts +2 -2
- package/.pi/{extensions/lib → lib}/plan-approval/format-plan.ts +2 -2
- package/.pi/{extensions/lib → lib}/plan-approval/plan-review.ts +162 -201
- package/.pi/{extensions/lib → lib}/plan-approval/render.ts +1 -1
- package/.pi/{extensions/lib → lib}/plan-approval/resolve-disk.ts +2 -2
- package/.pi/{extensions/lib → lib}/plan-approval/types.ts +1 -1
- package/.pi/{extensions/lib → lib}/plan-approval/validate.ts +3 -3
- package/.pi/{extensions/lib → lib}/plan-debate-envelope.ts +1 -1
- package/.pi/{extensions/lib → lib}/plan-debate-gate.ts +1 -1
- package/.pi/{extensions/lib → lib}/plan-debate-lane.ts +1 -4
- package/.pi/{extensions/lib → lib}/plan-messenger.ts +1 -1
- package/.pi/prompts/harness-plan.md +2 -1
- package/.pi/prompts/harness-setup.md +40 -65
- package/.pi/scripts/README.md +2 -5
- package/.pi/scripts/gen-web-heuristic-angles-json.mjs +24 -0
- package/.pi/scripts/generate-agents-policy-yaml.mjs +148 -0
- package/.pi/scripts/harness-agents-manifest.mjs +60 -3
- package/.pi/scripts/harness-agt-doctor.ts +36 -0
- package/.pi/scripts/harness-cli-verify.sh +14 -2
- package/.pi/scripts/harness-verify.mjs +191 -39
- package/.pi/scripts/harness-web-policy-guard.mjs +3 -3
- package/.pi/scripts/harness-web.py +218 -15
- package/.pi/scripts/harness_web/deep_search.py +55 -0
- package/.pi/scripts/harness_web/evidence_bundle.py +47 -0
- package/.pi/scripts/harness_web/find_similar.py +88 -0
- package/.pi/scripts/harness_web/heuristic_angles_shipped.py +85 -0
- package/.pi/scripts/harness_web/heuristic_config.py +251 -0
- package/.pi/scripts/harness_web/highlights.py +47 -0
- package/.pi/scripts/harness_web/multi_search.py +59 -0
- package/.pi/scripts/harness_web/output.py +24 -0
- package/.pi/scripts/harness_web/query_angles.py +116 -0
- package/.pi/scripts/harness_web/rank.py +163 -0
- package/.pi/scripts/harness_web/scrape.py +30 -0
- package/.pi/scripts/tests/test_harness_web_heuristic_config.py +132 -0
- package/.pi/scripts/tests/test_harness_web_query_angles.py +45 -0
- package/.pi/scripts/tests/test_harness_web_rank.py +56 -0
- package/.pi/scripts/validate-plan-dag.mjs +65 -74
- package/.pi/scripts/vendor-pi-vcc-settings.stub.ts +2 -2
- package/.pi/scripts/vendor-sync-pi-vcc.sh +1 -1
- package/.pi/skills/architecture/broker-domain/SKILL.md +65 -0
- package/.pi/skills/architecture/cqrs/SKILL.md +63 -0
- package/.pi/skills/architecture/event-driven/SKILL.md +60 -0
- package/.pi/skills/architecture/hexagonal-ports-adapters/SKILL.md +66 -0
- package/.pi/skills/architecture/layered/SKILL.md +68 -0
- package/.pi/skills/architecture/microkernel/SKILL.md +62 -0
- package/.pi/skills/architecture/microservices/SKILL.md +64 -0
- package/.pi/skills/architecture/modular-monolith/SKILL.md +65 -0
- package/.pi/skills/architecture/orchestration-driven-soa/SKILL.md +61 -0
- package/.pi/skills/architecture/pipeline/SKILL.md +63 -0
- package/.pi/skills/architecture/service-based/SKILL.md +64 -0
- package/.pi/skills/architecture/service-mesh/SKILL.md +60 -0
- package/.pi/skills/architecture/space-based/SKILL.md +60 -0
- package/.pi/skills/ast-grep/SKILL.md +40 -321
- package/.pi/skills/delivery/debugging-discipline/SKILL.md +36 -0
- package/.pi/skills/delivery/documentation-update/SKILL.md +33 -0
- package/.pi/skills/delivery/requirements-to-implementation/SKILL.md +34 -0
- package/.pi/skills/delivery/risk-based-verification/SKILL.md +43 -0
- package/.pi/skills/delivery/tradeoff-analysis/SKILL.md +34 -0
- package/.pi/skills/engineering/api-contract-design/SKILL.md +38 -0
- package/.pi/skills/engineering/cohesion-coupling/SKILL.md +43 -0
- package/.pi/skills/engineering/complexity-control/SKILL.md +31 -0
- package/.pi/skills/engineering/defensive-programming/SKILL.md +38 -0
- package/.pi/skills/engineering/dependency-management/SKILL.md +29 -0
- package/.pi/skills/engineering/domain-modeling/SKILL.md +32 -0
- package/.pi/skills/engineering/error-handling/SKILL.md +37 -0
- package/.pi/skills/engineering/legacy-code-seams/SKILL.md +35 -0
- package/.pi/skills/engineering/naming-and-intent/SKILL.md +29 -0
- package/.pi/skills/engineering/refactoring-safe-evolution/SKILL.md +35 -0
- package/.pi/skills/engineering/routine-function-design/SKILL.md +34 -0
- package/.pi/skills/engineering/small-change-discipline/SKILL.md +35 -0
- package/.pi/skills/lsp-navigation/SKILL.md +89 -0
- package/.pi/skills/quality/code-review-self-check/SKILL.md +35 -0
- package/.pi/skills/quality/privacy-data-handling/SKILL.md +26 -0
- package/.pi/skills/quality/security-review/SKILL.md +34 -0
- package/.pi/skills/quality/test-strategy/SKILL.md +33 -0
- package/.pi/skills/quality/testability-design/SKILL.md +33 -0
- package/.pi/skills/systems/concurrency-safety/SKILL.md +32 -0
- package/.pi/skills/systems/data-modeling-migrations/SKILL.md +31 -0
- package/.pi/skills/systems/observability-instrumentation/SKILL.md +32 -0
- package/.pi/skills/systems/performance-measurement/SKILL.md +35 -0
- package/.pi/skills/systems/reliability-design/SKILL.md +32 -0
- package/.sentrux/rules.toml +20 -4
- package/AGENTS.md +7 -2
- package/CHANGELOG.md +20 -0
- package/README.md +3 -12
- package/THIRD_PARTY_NOTICES.md +12 -21
- package/package.json +17 -7
- package/vendor/pi-subagents/src/agents.ts +45 -1
- package/vendor/pi-subagents/src/subagents.ts +866 -811
- package/vendor/pi-vcc/src/core/brief.ts +68 -99
- package/vendor/pi-vcc/src/core/settings.ts +2 -2
- package/.agents/skills/caveman/SKILL.md +0 -67
- package/.agents/skills/scrapling-web/SKILL.md +0 -98
- package/.pi/agents/harness/meta-optimizer.md +0 -36
- package/.pi/extensions/00-posthog-network-bootstrap.ts +0 -11
- package/.pi/extensions/lib/ask-user/dialog.ts +0 -260
- package/.pi/extensions/lib/harness-subagent-auth.ts +0 -207
- package/.pi/extensions/lib/harness-subagent-policy.ts +0 -236
- package/.pi/extensions/pi-model-router-harness.ts +0 -42
- package/.pi/harness/evolution/meta-optimizer.mjs +0 -99
- package/.pi/harness/specs/router-tuning-proposal.schema.json +0 -114
- package/.pi/model-router.example.json +0 -36
- package/.pi/prompts/harness-critic.md +0 -10
- package/.pi/prompts/harness-eval.md +0 -10
- package/.pi/prompts/harness-router-tune.md +0 -52
- package/.pi/scripts/harness-generate-model-router.mjs +0 -327
- package/.pi/scripts/harness-model-router-routing.test.mjs +0 -97
- package/.pi/scripts/harness-sync-model-router.mjs +0 -97
- package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0
- package/.pi/scripts/vendor-sync-pi-model-router.sh +0 -47
- package/vendor/pi-model-router/.prettierignore +0 -4
- package/vendor/pi-model-router/.prettierrc +0 -5
- package/vendor/pi-model-router/AGENTS.md +0 -39
- package/vendor/pi-model-router/LICENSE +0 -21
- package/vendor/pi-model-router/README.md +0 -99
- package/vendor/pi-model-router/UPSTREAM_PIN.md +0 -10
- package/vendor/pi-model-router/docs/ARCHITECTURE.md +0 -54
- package/vendor/pi-model-router/extensions/commands.ts +0 -720
- package/vendor/pi-model-router/extensions/config.ts +0 -348
- package/vendor/pi-model-router/extensions/constants.ts +0 -1
- package/vendor/pi-model-router/extensions/index.ts +0 -478
- package/vendor/pi-model-router/extensions/provider.ts +0 -580
- package/vendor/pi-model-router/extensions/routing.ts +0 -564
- package/vendor/pi-model-router/extensions/state.ts +0 -52
- package/vendor/pi-model-router/extensions/types.ts +0 -95
- package/vendor/pi-model-router/extensions/ui.ts +0 -144
- package/vendor/pi-model-router/model-router.example.json +0 -48
- package/vendor/pi-model-router/package.json +0 -48
- package/vendor/pi-model-router/tsconfig.json +0 -16
- /package/.pi/{prompts → harness/docs}/planning-rubrics.md +0 -0
- /package/.pi/{extensions/lib → lib}/ask-user/fallback.ts +0 -0
- /package/.pi/{extensions/lib → lib}/ask-user/render.ts +0 -0
- /package/.pi/{extensions/lib → lib}/ask-user/schema.ts +0 -0
- /package/.pi/{extensions/lib → lib}/ask-user/types.ts +0 -0
- /package/.pi/{extensions/lib → lib}/ask-user/validate-core.mjs +0 -0
- /package/.pi/{extensions/lib → lib}/ask-user/validate.ts +0 -0
- /package/.pi/{extensions/lib → lib}/harness-cocoindex-refresh.ts +0 -0
- /package/.pi/{extensions/lib → lib}/harness-paths.ts +0 -0
- /package/.pi/{extensions/lib → lib}/harness-spawn-budget.ts +0 -0
- /package/.pi/{extensions/lib → lib}/harness-vcc-settings.ts +0 -0
- /package/.pi/{extensions/lib → lib}/plan-approval/dialog.ts +0 -0
- /package/.pi/{extensions/lib → lib}/plan-approval/schema.ts +0 -0
- /package/.pi/{extensions/lib → lib}/plan-approval-readiness.ts +0 -0
- /package/.pi/{extensions/lib → lib}/plan-debate-eligibility.ts +0 -0
- /package/.pi/{extensions/lib → lib}/plan-debate-focus.ts +0 -0
- /package/.pi/{extensions/lib → lib}/plan-debate-id.ts +0 -0
- /package/.pi/{extensions/lib → lib}/plan-debate-lanes.ts +0 -0
- /package/.pi/{extensions/lib → lib}/plan-debate-round-status.ts +0 -0
- /package/.pi/{extensions/lib → lib}/plan-debate-write-guard.ts +0 -0
- /package/.pi/{extensions/lib → lib}/plan-review-gate.ts +0 -0
- /package/.pi/{extensions/lib → lib}/plan-review-integrator-rules.ts +0 -0
- /package/.pi/{extensions/lib → lib}/plan-scope-guard.ts +0 -0
- /package/.pi/{extensions/lib → lib}/posthog-client.ts +0 -0
- /package/.pi/{extensions/lib → lib}/posthog-node.d.ts +0 -0
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""Parse and validate WRS search angles (YAML/JSON)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
import yaml # type: ignore[import-untyped]
|
|
13
|
+
except ImportError:
|
|
14
|
+
yaml = None # type: ignore[assignment]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass(frozen=True)
|
|
18
|
+
class SearchAngle:
|
|
19
|
+
id: str
|
|
20
|
+
query: str
|
|
21
|
+
rationale: str = ""
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(frozen=True)
|
|
25
|
+
class AnglesPlan:
|
|
26
|
+
intent: str
|
|
27
|
+
angles: tuple[SearchAngle, ...]
|
|
28
|
+
category: str | None = None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _heuristic_angles(query: str, *, category: str | None = None) -> AnglesPlan:
|
|
32
|
+
"""Emergency fallback when no expander output — templates from YAML config."""
|
|
33
|
+
from .heuristic_config import build_heuristic_angles, load_heuristic_angles_config
|
|
34
|
+
|
|
35
|
+
q = query.strip()
|
|
36
|
+
cfg = load_heuristic_angles_config()
|
|
37
|
+
built = build_heuristic_angles(q, category=category, config=cfg)
|
|
38
|
+
if len(built) < 2:
|
|
39
|
+
built = (
|
|
40
|
+
SearchAngle("definitional", q, "Core intent phrasing"),
|
|
41
|
+
SearchAngle("official", f"{q} official documentation", "Authoritative sources"),
|
|
42
|
+
)
|
|
43
|
+
return AnglesPlan(intent=q, angles=built, category=category or None)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _parse_angle_item(raw: Any, idx: int) -> SearchAngle:
|
|
47
|
+
if isinstance(raw, str):
|
|
48
|
+
s = raw.strip()
|
|
49
|
+
if not s:
|
|
50
|
+
raise ValueError(f"angles[{idx}]: empty query string")
|
|
51
|
+
return SearchAngle(id=f"angle_{idx + 1}", query=s)
|
|
52
|
+
if not isinstance(raw, dict):
|
|
53
|
+
raise ValueError(f"angles[{idx}]: expected object or string")
|
|
54
|
+
aid = str(raw.get("id") or raw.get("name") or f"angle_{idx + 1}").strip()
|
|
55
|
+
query = str(raw.get("query") or "").strip()
|
|
56
|
+
if not query:
|
|
57
|
+
raise ValueError(f"angles[{idx}]: missing query")
|
|
58
|
+
rationale = str(raw.get("rationale") or raw.get("reason") or "").strip()
|
|
59
|
+
return SearchAngle(id=aid or f"angle_{idx + 1}", query=query, rationale=rationale)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _load_structured(data: dict[str, Any]) -> AnglesPlan:
|
|
63
|
+
intent = str(data.get("intent") or data.get("query") or "").strip()
|
|
64
|
+
raw_angles = data.get("angles")
|
|
65
|
+
if not isinstance(raw_angles, list) or not raw_angles:
|
|
66
|
+
raise ValueError("angles: expected non-empty list")
|
|
67
|
+
angles = tuple(_parse_angle_item(item, i) for i, item in enumerate(raw_angles))
|
|
68
|
+
if len(angles) < 2:
|
|
69
|
+
raise ValueError("angles: need at least 2 entries for deep search")
|
|
70
|
+
if len(angles) > 8:
|
|
71
|
+
angles = angles[:8]
|
|
72
|
+
category = data.get("category")
|
|
73
|
+
cat_str = str(category).strip() if category else None
|
|
74
|
+
return AnglesPlan(intent=intent or angles[0].query, angles=angles, category=cat_str)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def load_angles_file(path: Path) -> AnglesPlan:
|
|
78
|
+
text = path.read_text(encoding="utf-8")
|
|
79
|
+
# Strip markdown fences if present
|
|
80
|
+
fenced = re.search(r"```(?:ya?ml|json)?\s*\n([\s\S]*?)```", text)
|
|
81
|
+
if fenced:
|
|
82
|
+
text = fenced.group(1)
|
|
83
|
+
text = text.strip()
|
|
84
|
+
if not text:
|
|
85
|
+
raise ValueError(f"empty angles file: {path}")
|
|
86
|
+
|
|
87
|
+
data: Any
|
|
88
|
+
if text.startswith("{"):
|
|
89
|
+
data = json.loads(text)
|
|
90
|
+
elif yaml is not None:
|
|
91
|
+
data = yaml.safe_load(text)
|
|
92
|
+
else:
|
|
93
|
+
raise SystemExit(
|
|
94
|
+
"angles file is YAML but PyYAML is not installed. "
|
|
95
|
+
"Use JSON angles or: pip install pyyaml"
|
|
96
|
+
)
|
|
97
|
+
if not isinstance(data, dict):
|
|
98
|
+
raise ValueError("angles file root must be an object")
|
|
99
|
+
return _load_structured(data)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def resolve_angles(
|
|
103
|
+
query: str,
|
|
104
|
+
*,
|
|
105
|
+
angles_file: Path | None = None,
|
|
106
|
+
expand_heuristic: bool = False,
|
|
107
|
+
category: str | None = None,
|
|
108
|
+
) -> AnglesPlan:
|
|
109
|
+
if angles_file is not None:
|
|
110
|
+
return load_angles_file(angles_file)
|
|
111
|
+
if expand_heuristic:
|
|
112
|
+
return _heuristic_angles(query, category=category)
|
|
113
|
+
raise SystemExit(
|
|
114
|
+
"deep search requires --angles-file (.web/angles.yaml from web-query-expander) "
|
|
115
|
+
"or --expand-heuristic for emergency fallback"
|
|
116
|
+
)
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"""URL normalization and RRF fusion for multi-angle SERP results."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import Any
|
|
8
|
+
from urllib.parse import parse_qs, urlparse, urlunparse
|
|
9
|
+
|
|
10
|
+
RRF_K = 60
|
|
11
|
+
|
|
12
|
+
_TRACKING_PARAMS = frozenset(
|
|
13
|
+
{
|
|
14
|
+
"utm_source",
|
|
15
|
+
"utm_medium",
|
|
16
|
+
"utm_campaign",
|
|
17
|
+
"utm_term",
|
|
18
|
+
"utm_content",
|
|
19
|
+
"fbclid",
|
|
20
|
+
"gclid",
|
|
21
|
+
"mc_cid",
|
|
22
|
+
"mc_eid",
|
|
23
|
+
}
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class RankedHit:
|
|
29
|
+
url: str
|
|
30
|
+
title: str
|
|
31
|
+
description: str
|
|
32
|
+
score: float
|
|
33
|
+
angle_ids: list[str] = field(default_factory=list)
|
|
34
|
+
ranks: dict[str, int] = field(default_factory=dict)
|
|
35
|
+
|
|
36
|
+
def to_web_dict(self) -> dict[str, Any]:
|
|
37
|
+
return {
|
|
38
|
+
"url": self.url,
|
|
39
|
+
"title": self.title,
|
|
40
|
+
"description": self.description,
|
|
41
|
+
"score": round(self.score, 6),
|
|
42
|
+
"angle_ids": list(self.angle_ids),
|
|
43
|
+
"ranks": dict(self.ranks),
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def normalize_url(url: str) -> str:
|
|
48
|
+
u = url.strip()
|
|
49
|
+
if not u:
|
|
50
|
+
return ""
|
|
51
|
+
parsed = urlparse(u)
|
|
52
|
+
scheme = (parsed.scheme or "https").lower()
|
|
53
|
+
host = (parsed.hostname or "").lower()
|
|
54
|
+
if not host:
|
|
55
|
+
return u
|
|
56
|
+
port = parsed.port
|
|
57
|
+
netloc = host
|
|
58
|
+
if port and not ((scheme == "http" and port == 80) or (scheme == "https" and port == 443)):
|
|
59
|
+
netloc = f"{host}:{port}"
|
|
60
|
+
path = parsed.path or "/"
|
|
61
|
+
if path != "/" and path.endswith("/"):
|
|
62
|
+
path = path.rstrip("/")
|
|
63
|
+
qs = parse_qs(parsed.query, keep_blank_values=False)
|
|
64
|
+
filtered = []
|
|
65
|
+
for key in sorted(qs.keys()):
|
|
66
|
+
if key.lower() in _TRACKING_PARAMS:
|
|
67
|
+
continue
|
|
68
|
+
for val in qs[key]:
|
|
69
|
+
filtered.append(f"{key}={val}")
|
|
70
|
+
query = "&".join(filtered)
|
|
71
|
+
return urlunparse((scheme, netloc, path, "", query, ""))
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def tokenize(text: str) -> set[str]:
|
|
75
|
+
return {t for t in re.findall(r"[a-z0-9]{3,}", text.lower()) if len(t) >= 3}
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def lexical_rerank(hits: list[RankedHit], intent: str) -> list[RankedHit]:
|
|
79
|
+
"""Lightweight O3 boost when HARNESS_WEB_RERANK=lexical."""
|
|
80
|
+
intent_tokens = tokenize(intent)
|
|
81
|
+
if not intent_tokens:
|
|
82
|
+
return hits
|
|
83
|
+
|
|
84
|
+
def lex_score(h: RankedHit) -> float:
|
|
85
|
+
blob = f"{h.title} {h.description}".lower()
|
|
86
|
+
tokens = tokenize(blob)
|
|
87
|
+
if not tokens:
|
|
88
|
+
return 0.0
|
|
89
|
+
overlap = len(intent_tokens & tokens) / max(len(intent_tokens), 1)
|
|
90
|
+
return overlap
|
|
91
|
+
|
|
92
|
+
scored = [(h, h.score + 0.15 * lex_score(h)) for h in hits]
|
|
93
|
+
scored.sort(key=lambda x: x[1], reverse=True)
|
|
94
|
+
out: list[RankedHit] = []
|
|
95
|
+
for h, s in scored:
|
|
96
|
+
out.append(
|
|
97
|
+
RankedHit(
|
|
98
|
+
url=h.url,
|
|
99
|
+
title=h.title,
|
|
100
|
+
description=h.description,
|
|
101
|
+
score=s,
|
|
102
|
+
angle_ids=h.angle_ids,
|
|
103
|
+
ranks=h.ranks,
|
|
104
|
+
)
|
|
105
|
+
)
|
|
106
|
+
return out
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def fuse_angle_results(
|
|
110
|
+
per_angle: dict[str, list[dict[str, str]]],
|
|
111
|
+
*,
|
|
112
|
+
final_limit: int = 10,
|
|
113
|
+
intent: str = "",
|
|
114
|
+
rerank_mode: str = "off",
|
|
115
|
+
) -> list[RankedHit]:
|
|
116
|
+
"""Reciprocal Rank Fusion across angle result lists."""
|
|
117
|
+
accum: dict[str, dict[str, Any]] = {}
|
|
118
|
+
|
|
119
|
+
for angle_id, results in per_angle.items():
|
|
120
|
+
for rank_1based, item in enumerate(results, start=1):
|
|
121
|
+
raw_url = (item.get("url") or "").strip()
|
|
122
|
+
norm = normalize_url(raw_url)
|
|
123
|
+
if not norm or not norm.startswith("http"):
|
|
124
|
+
continue
|
|
125
|
+
entry = accum.setdefault(
|
|
126
|
+
norm,
|
|
127
|
+
{
|
|
128
|
+
"url": raw_url,
|
|
129
|
+
"title": "",
|
|
130
|
+
"description": "",
|
|
131
|
+
"score": 0.0,
|
|
132
|
+
"angle_ids": [],
|
|
133
|
+
"ranks": {},
|
|
134
|
+
},
|
|
135
|
+
)
|
|
136
|
+
entry["score"] += 1.0 / (RRF_K + rank_1based)
|
|
137
|
+
if angle_id not in entry["angle_ids"]:
|
|
138
|
+
entry["angle_ids"].append(angle_id)
|
|
139
|
+
entry["ranks"][angle_id] = rank_1based
|
|
140
|
+
title = (item.get("title") or "").strip()
|
|
141
|
+
desc = (item.get("description") or "").strip()
|
|
142
|
+
if title and not entry["title"]:
|
|
143
|
+
entry["title"] = title
|
|
144
|
+
if desc and (not entry["description"] or len(desc) > len(entry["description"])):
|
|
145
|
+
entry["description"] = desc
|
|
146
|
+
|
|
147
|
+
hits = [
|
|
148
|
+
RankedHit(
|
|
149
|
+
url=e["url"],
|
|
150
|
+
title=e["title"],
|
|
151
|
+
description=e["description"],
|
|
152
|
+
score=e["score"],
|
|
153
|
+
angle_ids=e["angle_ids"],
|
|
154
|
+
ranks=e["ranks"],
|
|
155
|
+
)
|
|
156
|
+
for e in accum.values()
|
|
157
|
+
]
|
|
158
|
+
hits.sort(key=lambda h: (-h.score, -len(h.angle_ids), min(h.ranks.values()) if h.ranks else 999))
|
|
159
|
+
|
|
160
|
+
if rerank_mode == "lexical" and intent:
|
|
161
|
+
hits = lexical_rerank(hits, intent)
|
|
162
|
+
|
|
163
|
+
return hits[:final_limit]
|
|
@@ -41,6 +41,36 @@ def scrape_url(
|
|
|
41
41
|
write_page_markdown(Path(output), page, main_content_only=True)
|
|
42
42
|
|
|
43
43
|
|
|
44
|
+
def scrape_url_with_highlights(
|
|
45
|
+
url: str,
|
|
46
|
+
markdown_output: str,
|
|
47
|
+
highlights_output: str | None,
|
|
48
|
+
*,
|
|
49
|
+
config: HarnessWebConfig,
|
|
50
|
+
fast: bool,
|
|
51
|
+
wait_ms: int | None,
|
|
52
|
+
highlight_query: str,
|
|
53
|
+
) -> None:
|
|
54
|
+
import json
|
|
55
|
+
from pathlib import Path
|
|
56
|
+
|
|
57
|
+
from .highlights import extract_highlights
|
|
58
|
+
|
|
59
|
+
page = fetch_page(url, config=config, fast=fast, wait_ms=wait_ms)
|
|
60
|
+
md_path = Path(markdown_output)
|
|
61
|
+
write_page_markdown(md_path, page, main_content_only=True)
|
|
62
|
+
if highlights_output and highlight_query.strip():
|
|
63
|
+
text = md_path.read_text(encoding="utf-8")
|
|
64
|
+
spans = extract_highlights(text, highlight_query)
|
|
65
|
+
hp = Path(highlights_output)
|
|
66
|
+
hp.parent.mkdir(parents=True, exist_ok=True)
|
|
67
|
+
hp.write_text(
|
|
68
|
+
json.dumps({"url": url, "query": highlight_query, "highlights": spans}, indent=2)
|
|
69
|
+
+ "\n",
|
|
70
|
+
encoding="utf-8",
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
44
74
|
def map_url(
|
|
45
75
|
url: str,
|
|
46
76
|
output: str,
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""Unit tests for harness_web.heuristic_config."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import tempfile
|
|
8
|
+
import unittest
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from harness_web.heuristic_config import (
|
|
12
|
+
_embedded_builtin_dict,
|
|
13
|
+
_merge_config_dict,
|
|
14
|
+
build_heuristic_angles,
|
|
15
|
+
clear_heuristic_config_cache,
|
|
16
|
+
heuristic_config_from_merged,
|
|
17
|
+
load_heuristic_angles_config_cached,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class TestHeuristicConfig(unittest.TestCase):
|
|
22
|
+
def tearDown(self) -> None:
|
|
23
|
+
clear_heuristic_config_cache()
|
|
24
|
+
|
|
25
|
+
def test_builtin_code_includes_stackoverflow(self) -> None:
|
|
26
|
+
cfg = heuristic_config_from_merged(_embedded_builtin_dict())
|
|
27
|
+
angles = build_heuristic_angles("rust async", category="code", config=cfg)
|
|
28
|
+
ids = {a.id for a in angles}
|
|
29
|
+
self.assertIn("stackoverflow", ids)
|
|
30
|
+
self.assertIn("github", ids)
|
|
31
|
+
self.assertTrue(any("site:stackoverflow.com" in a.query for a in angles))
|
|
32
|
+
|
|
33
|
+
def test_shipped_code_includes_mdn_and_registries(self) -> None:
|
|
34
|
+
pkg = Path(__file__).resolve().parents[2] / "harness" / "web-heuristic-angles.yaml"
|
|
35
|
+
if not pkg.is_file():
|
|
36
|
+
self.skipTest("package yaml missing")
|
|
37
|
+
clear_heuristic_config_cache()
|
|
38
|
+
cfg = load_heuristic_angles_config_cached((str(pkg),))
|
|
39
|
+
angles = build_heuristic_angles("websocket api", category="code", config=cfg)
|
|
40
|
+
ids = {a.id for a in angles}
|
|
41
|
+
self.assertIn("mdn", ids)
|
|
42
|
+
self.assertIn("package_registries", ids)
|
|
43
|
+
self.assertLessEqual(len(angles), cfg.max_angles)
|
|
44
|
+
|
|
45
|
+
def test_shipped_security_category(self) -> None:
|
|
46
|
+
pkg = Path(__file__).resolve().parents[2] / "harness" / "web-heuristic-angles.yaml"
|
|
47
|
+
if not pkg.is_file():
|
|
48
|
+
self.skipTest("package yaml missing")
|
|
49
|
+
clear_heuristic_config_cache()
|
|
50
|
+
cfg = load_heuristic_angles_config_cached((str(pkg),))
|
|
51
|
+
angles = build_heuristic_angles("jwt validation", category="security", config=cfg)
|
|
52
|
+
ids = {a.id for a in angles}
|
|
53
|
+
self.assertIn("owasp", ids)
|
|
54
|
+
self.assertIn("cve_nvd", ids)
|
|
55
|
+
|
|
56
|
+
def test_merge_extends_code_category(self) -> None:
|
|
57
|
+
merged = _merge_config_dict(
|
|
58
|
+
_embedded_builtin_dict(),
|
|
59
|
+
{
|
|
60
|
+
"max_angles": 12,
|
|
61
|
+
"categories": {
|
|
62
|
+
"code": [
|
|
63
|
+
{
|
|
64
|
+
"id": "docs_rs",
|
|
65
|
+
"query": "{query} site:docs.rs",
|
|
66
|
+
"rationale": "Rust docs",
|
|
67
|
+
},
|
|
68
|
+
],
|
|
69
|
+
},
|
|
70
|
+
},
|
|
71
|
+
)
|
|
72
|
+
cfg = heuristic_config_from_merged(merged)
|
|
73
|
+
merged_ids = [a["id"] for a in merged["categories"]["code"]]
|
|
74
|
+
self.assertIn("docs_rs", merged_ids)
|
|
75
|
+
angles = build_heuristic_angles("tokio", category="code", config=cfg)
|
|
76
|
+
ids = {a.id for a in angles}
|
|
77
|
+
self.assertIn("stackoverflow", ids)
|
|
78
|
+
self.assertIn("github", ids)
|
|
79
|
+
|
|
80
|
+
def test_merge_adds_new_category(self) -> None:
|
|
81
|
+
merged = _merge_config_dict(
|
|
82
|
+
_embedded_builtin_dict(),
|
|
83
|
+
{
|
|
84
|
+
"categories": {
|
|
85
|
+
"security": [
|
|
86
|
+
{"id": "cve", "query": "{query} CVE", "rationale": "vulns"},
|
|
87
|
+
{
|
|
88
|
+
"id": "owasp",
|
|
89
|
+
"query": "{query} site:owasp.org",
|
|
90
|
+
"rationale": "guidance",
|
|
91
|
+
},
|
|
92
|
+
],
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
)
|
|
96
|
+
cfg = heuristic_config_from_merged(merged)
|
|
97
|
+
angles = build_heuristic_angles("jwt auth", category="security", config=cfg)
|
|
98
|
+
ids = {a.id for a in angles}
|
|
99
|
+
self.assertIn("cve", ids)
|
|
100
|
+
self.assertIn("owasp", ids)
|
|
101
|
+
|
|
102
|
+
def test_json_project_file_merges(self) -> None:
|
|
103
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
104
|
+
proj = Path(tmp)
|
|
105
|
+
harness_dir = proj / ".pi" / "harness"
|
|
106
|
+
harness_dir.mkdir(parents=True)
|
|
107
|
+
proj_file = harness_dir / "web-heuristic-angles.json"
|
|
108
|
+
proj_file.write_text(
|
|
109
|
+
json.dumps(
|
|
110
|
+
{
|
|
111
|
+
"categories": {
|
|
112
|
+
"code": [
|
|
113
|
+
{
|
|
114
|
+
"id": "crates_io",
|
|
115
|
+
"query": "{query} site:crates.io",
|
|
116
|
+
"rationale": "crates",
|
|
117
|
+
},
|
|
118
|
+
],
|
|
119
|
+
},
|
|
120
|
+
}
|
|
121
|
+
),
|
|
122
|
+
encoding="utf-8",
|
|
123
|
+
)
|
|
124
|
+
clear_heuristic_config_cache()
|
|
125
|
+
cfg = load_heuristic_angles_config_cached((str(proj_file),))
|
|
126
|
+
angles = build_heuristic_angles("serde", category="code", config=cfg)
|
|
127
|
+
ids = {a.id for a in angles}
|
|
128
|
+
self.assertIn("crates_io", ids)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
if __name__ == "__main__":
|
|
132
|
+
unittest.main()
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Unit tests for harness_web.query_angles (no network)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import tempfile
|
|
7
|
+
import unittest
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from harness_web.query_angles import AnglesPlan, load_angles_file, resolve_angles
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TestResolveAngles(unittest.TestCase):
|
|
14
|
+
def test_heuristic_code_category(self) -> None:
|
|
15
|
+
plan = resolve_angles("rust async", expand_heuristic=True, category="code")
|
|
16
|
+
ids = {a.id for a in plan.angles}
|
|
17
|
+
self.assertIn("github", ids)
|
|
18
|
+
self.assertGreaterEqual(len(plan.angles), 2)
|
|
19
|
+
self.assertLessEqual(len(plan.angles), 5)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class TestLoadFile(unittest.TestCase):
|
|
23
|
+
def test_load_json_file(self) -> None:
|
|
24
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
25
|
+
p = Path(tmp) / "angles.json"
|
|
26
|
+
p.write_text(
|
|
27
|
+
json.dumps(
|
|
28
|
+
{
|
|
29
|
+
"intent": "load test",
|
|
30
|
+
"angles": [
|
|
31
|
+
{"id": "a", "query": "first angle query"},
|
|
32
|
+
{"id": "b", "query": "second angle query"},
|
|
33
|
+
],
|
|
34
|
+
}
|
|
35
|
+
),
|
|
36
|
+
encoding="utf-8",
|
|
37
|
+
)
|
|
38
|
+
plan = load_angles_file(p)
|
|
39
|
+
self.assertIsInstance(plan, AnglesPlan)
|
|
40
|
+
self.assertEqual(plan.intent, "load test")
|
|
41
|
+
self.assertEqual(len(plan.angles), 2)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
if __name__ == "__main__":
|
|
45
|
+
unittest.main()
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""Unit tests for harness_web.rank (no network)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import unittest
|
|
6
|
+
|
|
7
|
+
from harness_web.rank import RankedHit, fuse_angle_results, lexical_rerank, normalize_url, tokenize
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TestNormalizeUrl(unittest.TestCase):
|
|
11
|
+
def test_strips_tracking(self) -> None:
|
|
12
|
+
a = normalize_url("https://Example.com/path?utm_source=x&id=1")
|
|
13
|
+
b = normalize_url("https://example.com/path?id=1")
|
|
14
|
+
self.assertEqual(a, b)
|
|
15
|
+
|
|
16
|
+
def test_trailing_slash(self) -> None:
|
|
17
|
+
self.assertEqual(
|
|
18
|
+
normalize_url("https://example.com/foo/"),
|
|
19
|
+
normalize_url("https://example.com/foo"),
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class TestRrfFusion(unittest.TestCase):
|
|
24
|
+
def test_merges_duplicate_urls(self) -> None:
|
|
25
|
+
angle_results = {
|
|
26
|
+
"a": [
|
|
27
|
+
{"url": "https://x.com/1", "title": "T1", "description": "d1"},
|
|
28
|
+
{"url": "https://x.com/2", "title": "T2", "description": "d2"},
|
|
29
|
+
],
|
|
30
|
+
"b": [
|
|
31
|
+
{"url": "https://x.com/1", "title": "T1b", "description": "d1b"},
|
|
32
|
+
],
|
|
33
|
+
}
|
|
34
|
+
fused = fuse_angle_results(angle_results, final_limit=5)
|
|
35
|
+
self.assertEqual(len(fused), 2)
|
|
36
|
+
top = fused[0]
|
|
37
|
+
self.assertEqual(top.url, "https://x.com/1")
|
|
38
|
+
self.assertIn("a", top.angle_ids)
|
|
39
|
+
self.assertIn("b", top.angle_ids)
|
|
40
|
+
self.assertGreater(top.score, fused[1].score)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class TestLexicalRerank(unittest.TestCase):
|
|
44
|
+
def test_boosts_intent_overlap(self) -> None:
|
|
45
|
+
hits = [
|
|
46
|
+
RankedHit("https://a", "unrelated", "noise", 0.52, ["a"]),
|
|
47
|
+
RankedHit("https://b", "kubernetes architecture", "how kubernetes works", 0.50, ["b"]),
|
|
48
|
+
]
|
|
49
|
+
reranked = lexical_rerank(hits, "kubernetes architecture")
|
|
50
|
+
self.assertEqual(reranked[0].url, "https://b")
|
|
51
|
+
self.assertGreater(reranked[0].score, reranked[1].score)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class TestTokenize(unittest.TestCase):
|
|
55
|
+
def test_min_length(self) -> None:
|
|
56
|
+
self.assertIn("hello", tokenize("hello hi"))
|