bosun 0.42.6 → 0.43.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +26 -15
- package/README.md +1 -11
- package/agent/agent-event-bus.mjs +33 -2
- package/agent/agent-hooks.mjs +4 -52
- package/agent/agent-launcher.mjs +6210 -0
- package/agent/agent-pool.mjs +7 -4182
- package/agent/agent-prompt-catalog.mjs +3 -4
- package/agent/agent-sdk.mjs +1 -4
- package/agent/agent-supervisor.mjs +30 -6
- package/agent/auth/_shared.mjs +129 -0
- package/agent/auth/anthropic-api-key.mjs +13 -0
- package/agent/auth/azure-openai.mjs +17 -0
- package/agent/auth/cerebras.mjs +14 -0
- package/agent/auth/chatgpt-codex-subscription.mjs +15 -0
- package/agent/auth/claude-subscription.mjs +15 -0
- package/agent/auth/copilot-oauth.mjs +13 -0
- package/agent/auth/deepinfra.mjs +14 -0
- package/agent/auth/fireworks.mjs +14 -0
- package/agent/auth/gemini-api-key.mjs +14 -0
- package/agent/auth/groq.mjs +14 -0
- package/agent/auth/index.mjs +85 -0
- package/agent/auth/nebius.mjs +14 -0
- package/agent/auth/ollama.mjs +14 -0
- package/agent/auth/openai-api-key.mjs +13 -0
- package/agent/auth/openai-compatible.mjs +15 -0
- package/agent/auth/openrouter.mjs +14 -0
- package/agent/auth/perplexity.mjs +14 -0
- package/agent/auth/sambanova.mjs +14 -0
- package/agent/auth/together.mjs +14 -0
- package/agent/auth/xai.mjs +14 -0
- package/agent/bosun-skills.mjs +177 -68
- package/agent/fleet-coordinator.mjs +153 -37
- package/agent/harness/agent-loop.mjs +26 -0
- package/agent/harness/event-contract.mjs +125 -0
- package/agent/harness/followup-queue.mjs +33 -0
- package/agent/harness/message-normalizer.mjs +43 -0
- package/agent/harness/module-boundaries.md +73 -0
- package/agent/harness/run-contract.mjs +122 -0
- package/agent/harness/runtime-config.mjs +132 -0
- package/agent/harness/session-state.mjs +80 -0
- package/agent/harness/steering-queue.mjs +35 -0
- package/agent/harness/tool-runner.mjs +95 -0
- package/agent/harness/turn-runner.mjs +135 -0
- package/agent/harness-agent-service.mjs +852 -0
- package/agent/harness-executor-config.mjs +384 -0
- package/agent/hook-library.mjs +139 -0
- package/agent/hook-profiles.mjs +46 -108
- package/agent/internal-harness-control-plane.mjs +672 -0
- package/agent/internal-harness-profile.mjs +519 -0
- package/agent/internal-harness-runtime.mjs +1219 -0
- package/agent/lineage-graph.mjs +141 -0
- package/agent/primary-agent.mjs +593 -646
- package/agent/provider-auth-manager.mjs +830 -0
- package/agent/provider-auth-state.mjs +440 -0
- package/agent/provider-capabilities.mjs +116 -0
- package/agent/provider-kernel.mjs +596 -0
- package/agent/provider-message-transform.mjs +583 -0
- package/agent/provider-model-catalog.mjs +163 -0
- package/agent/provider-registry.mjs +657 -0
- package/agent/provider-runtime-discovery.mjs +147 -0
- package/agent/provider-session.mjs +767 -0
- package/agent/providers/_shared.mjs +397 -0
- package/agent/providers/anthropic-messages.mjs +64 -0
- package/agent/providers/azure-openai-responses.mjs +69 -0
- package/agent/providers/cerebras.mjs +66 -0
- package/agent/providers/claude-subscription-shim.mjs +68 -0
- package/agent/providers/copilot-oauth.mjs +66 -0
- package/agent/providers/deepinfra.mjs +66 -0
- package/agent/providers/fireworks.mjs +66 -0
- package/agent/providers/gemini-generate-content.mjs +66 -0
- package/agent/providers/groq.mjs +66 -0
- package/agent/providers/index.mjs +208 -0
- package/agent/providers/nebius.mjs +66 -0
- package/agent/providers/ollama.mjs +66 -0
- package/agent/providers/openai-codex-subscription.mjs +75 -0
- package/agent/providers/openai-compatible.mjs +65 -0
- package/agent/providers/openai-responses.mjs +67 -0
- package/agent/providers/openrouter.mjs +66 -0
- package/agent/providers/perplexity.mjs +66 -0
- package/agent/providers/provider-contract.mjs +138 -0
- package/agent/providers/provider-errors.mjs +63 -0
- package/agent/providers/provider-model-pricing.mjs +246 -0
- package/agent/providers/provider-stream-normalizer.mjs +7 -0
- package/agent/providers/provider-usage-normalizer.mjs +48 -0
- package/agent/providers/sambanova.mjs +66 -0
- package/agent/providers/together.mjs +66 -0
- package/agent/providers/xai.mjs +66 -0
- package/agent/query-engine.mjs +260 -0
- package/agent/retry-queue.mjs +1 -0
- package/agent/session-contract.mjs +127 -0
- package/agent/session-manager.mjs +1859 -0
- package/agent/session-replay.mjs +617 -0
- package/agent/session-snapshot-store.mjs +379 -0
- package/agent/skills/agent-coordination.md +6 -0
- package/agent/skills/background-task-execution.md +6 -0
- package/agent/skills/bosun-agent-api.md +6 -0
- package/agent/skills/code-quality-anti-patterns.md +7 -0
- package/agent/skills/commit-conventions.md +6 -0
- package/agent/skills/custom-tool-creation.md +6 -0
- package/agent/skills/error-recovery.md +6 -0
- package/agent/skills/pr-workflow.md +6 -0
- package/agent/skills/tdd-pattern.md +6 -0
- package/agent/subagent-contract.mjs +104 -0
- package/agent/subagent-control.mjs +633 -0
- package/agent/subagent-pool.mjs +260 -0
- package/agent/thread-contract.mjs +88 -0
- package/agent/thread-registry.mjs +552 -0
- package/agent/tool-approval-manager.mjs +259 -0
- package/agent/tool-builtin-catalog.mjs +855 -0
- package/agent/tool-contract.mjs +101 -0
- package/agent/tool-event-contract.mjs +99 -0
- package/agent/tool-execution-ledger.mjs +32 -0
- package/agent/tool-network-policy.mjs +86 -0
- package/agent/tool-orchestrator.mjs +382 -0
- package/agent/tool-output-truncation.mjs +70 -0
- package/agent/tool-registry.mjs +200 -0
- package/agent/tool-retry-policy.mjs +57 -0
- package/agent/tool-runtime-context.mjs +220 -0
- package/bench/harness-load-bench.mjs +281 -0
- package/bench/harness-parity-bench.mjs +214 -0
- package/bench/swebench/bosun-swebench.mjs +1 -0
- package/bosun-tui.mjs +59 -13
- package/bosun.schema.json +358 -1
- package/cli.mjs +641 -190
- package/config/config-doctor.mjs +78 -4
- package/config/config-editor.mjs +417 -0
- package/config/config.mjs +217 -117
- package/config/repo-config.mjs +78 -10
- package/config/repo-root.mjs +33 -1
- package/desktop/main.mjs +438 -99
- package/desktop/package.json +1 -1
- package/git/diff-stats.mjs +7 -5
- package/infra/anomaly-detector.mjs +115 -15
- package/infra/approval-projection-store.mjs +75 -0
- package/infra/config-reload-bus.mjs +33 -0
- package/infra/container-runner.mjs +37 -3
- package/infra/error-detector.mjs +109 -34
- package/infra/event-schema.mjs +353 -0
- package/infra/guardrails.mjs +383 -0
- package/infra/heartbeat-monitor.mjs +432 -0
- package/infra/library-manager.mjs +359 -233
- package/infra/live-event-projector.mjs +197 -0
- package/infra/maintenance.mjs +176 -37
- package/infra/monitor.mjs +1280 -194
- package/infra/preflight.mjs +71 -5
- package/infra/presence.mjs +33 -9
- package/infra/projection-contract.mjs +27 -0
- package/infra/provider-usage-ledger.mjs +73 -0
- package/infra/replay-reader.mjs +140 -0
- package/infra/runtime-accumulator.mjs +303 -8
- package/infra/runtime-metrics.mjs +156 -0
- package/infra/session-projection-store.mjs +169 -0
- package/infra/session-telemetry-runtime.mjs +580 -0
- package/infra/session-telemetry.mjs +338 -0
- package/infra/session-tracker.mjs +1407 -174
- package/infra/startup-service.mjs +0 -2
- package/infra/storage-janitor.mjs +1046 -0
- package/infra/subagent-projection-store.mjs +89 -0
- package/infra/test-runtime.mjs +53 -20
- package/infra/trace-export.mjs +103 -0
- package/infra/tracing.mjs +13 -125
- package/infra/tui-bridge.mjs +607 -5
- package/infra/update-check.mjs +7 -8
- package/infra/windows-hidden-child-processes.mjs +99 -0
- package/infra/worktree-recovery-state.mjs +15 -5
- package/kanban/kanban-adapter.mjs +674 -41
- package/kanban/repo-mirror-projection-store.mjs +871 -0
- package/lib/agent-configuration-guide.mjs +280 -0
- package/lib/hot-path-runtime.mjs +1061 -0
- package/lib/integrations-registry.mjs +294 -0
- package/lib/log-tail.mjs +101 -0
- package/lib/mojibake-repair.mjs +40 -0
- package/lib/repo-map.mjs +137 -24
- package/lib/request-json-api.mjs +59 -0
- package/lib/safe-box.mjs +56 -0
- package/lib/session-insights.mjs +3 -65
- package/lib/state-ledger-sqlite.mjs +4462 -0
- package/lib/vault-keychain.mjs +259 -0
- package/lib/vault.mjs +374 -0
- package/lib/workflow-flowchart-utils.mjs +326 -0
- package/native/bosun-telemetry/Cargo.toml +9 -0
- package/native/bosun-telemetry/src/export.rs +151 -0
- package/native/bosun-telemetry/src/main.rs +76 -0
- package/native/bosun-telemetry/src/metrics.rs +114 -0
- package/native/bosun-telemetry/src/session_telemetry.rs +178 -0
- package/native/bosun-unified-exec/Cargo.lock +107 -0
- package/native/bosun-unified-exec/Cargo.toml +8 -0
- package/native/bosun-unified-exec/src/async_watcher.rs +145 -0
- package/native/bosun-unified-exec/src/head_tail_buffer.rs +241 -0
- package/native/bosun-unified-exec/src/main.rs +86 -0
- package/native/bosun-unified-exec/src/process_manager.rs +308 -0
- package/native/bosun-unified-exec/src/tool_orchestrator.rs +187 -0
- package/package.json +214 -36
- package/postinstall.mjs +182 -12
- package/server/bosun-mcp-server.mjs +333 -3
- package/server/routes/harness-agent-bridge.mjs +128 -0
- package/server/routes/harness-approvals.mjs +290 -0
- package/server/routes/harness-events.mjs +469 -0
- package/server/routes/harness-providers.mjs +385 -0
- package/server/routes/harness-sessions.mjs +2230 -0
- package/server/routes/harness-subagents.mjs +138 -0
- package/server/routes/harness-surface-payload.mjs +74 -0
- package/server/setup-web-server.mjs +199 -24
- package/server/ui-server.mjs +10983 -3421
- package/setup.mjs +25 -2
- package/shell/anthropic-native-adapter.mjs +1218 -0
- package/shell/auth-resolver.mjs +247 -0
- package/shell/claude-shell.mjs +85 -2
- package/shell/codex-config-file.mjs +9 -0
- package/shell/codex-config.mjs +49 -5
- package/shell/codex-model-profiles.mjs +29 -59
- package/shell/codex-sdk-import.mjs +7 -0
- package/shell/codex-shell.mjs +574 -159
- package/shell/context-compaction.mjs +898 -0
- package/shell/copilot-shell.mjs +362 -130
- package/shell/gemini-native-adapter.mjs +411 -0
- package/shell/gemini-shell.mjs +121 -13
- package/shell/mcp-client.mjs +401 -0
- package/shell/mcp-registry.mjs +72 -0
- package/shell/message-pruner.mjs +248 -0
- package/shell/openai-native-adapter.mjs +1975 -0
- package/shell/opencode-providers.mjs +16 -816
- package/shell/opencode-shell.mjs +180 -9
- package/shell/provider-transform.mjs +386 -0
- package/shell/retry-fetch.mjs +244 -0
- package/shell/session-resume.mjs +97 -0
- package/shell/session-store.mjs +215 -0
- package/shell/shell-adapter-registry.mjs +346 -0
- package/shell/shell-session-compat.mjs +442 -0
- package/shell/smooth-stream.mjs +233 -0
- package/shell/stop-condition.mjs +238 -0
- package/shell/tool-call-repair.mjs +345 -0
- package/shell/tool-executor.mjs +571 -0
- package/task/pipeline.mjs +3 -1
- package/task/task-assessment.mjs +312 -6
- package/task/task-claims.mjs +311 -47
- package/task/task-cli.mjs +79 -4
- package/task/task-context.mjs +37 -0
- package/task/task-debt-ledger.mjs +110 -0
- package/task/task-executor.mjs +1093 -107
- package/task/task-replanner.mjs +553 -0
- package/task/task-simulate-cli.mjs +1481 -0
- package/task/task-store.mjs +960 -64
- package/telegram/executor-health-region-cache.mjs +75 -0
- package/telegram/harness-api-client.mjs +124 -0
- package/telegram/sticky-menu-state.mjs +384 -0
- package/telegram/telegram-bot.mjs +418 -812
- package/telegram/telegram-sentinel.mjs +24 -13
- package/telegram/telegram-surface-runtime.mjs +53 -0
- package/tools/generate-demo-defaults.mjs +23 -4
- package/tools/harness-hotpath-bench.mjs +246 -0
- package/tools/import-check.mjs +56 -11
- package/tools/install-git-hooks.mjs +96 -20
- package/tools/native-rust.mjs +124 -0
- package/tools/packed-cli-smoke.mjs +139 -53
- package/tools/prepublish-check.mjs +53 -1
- package/tools/run-workflow-guaranteed-suite.mjs +56 -0
- package/tools/sync-demo-ui.mjs +188 -0
- package/tools/syntax-check.mjs +32 -28
- package/tools/vite-windows-realpath-shim.mjs +274 -0
- package/tools/vitest-esbuild-shim.mjs +45 -0
- package/tools/vitest-full-suite.mjs +310 -0
- package/tools/vitest-runner.mjs +474 -7
- package/tools/workflow-orphan-worktree-recovery.mjs +24 -7
- package/tui/CommandPalette.js +87 -0
- package/tui/app.mjs +422 -36
- package/tui/components/status-header.mjs +39 -0
- package/tui/lib/command-palette.mjs +191 -0
- package/tui/lib/connection-target.mjs +577 -0
- package/tui/lib/navigation.mjs +6 -2
- package/tui/lib/ws-bridge.mjs +141 -51
- package/tui/screens/agents-screen-helpers.mjs +87 -3
- package/tui/screens/agents.mjs +1068 -200
- package/tui/screens/connection-setup.mjs +363 -0
- package/tui/screens/harness-approvals.mjs +7 -0
- package/tui/screens/harness-sessions.mjs +109 -0
- package/tui/screens/harness-subagents.mjs +18 -0
- package/tui/screens/harness-telemetry.mjs +67 -0
- package/tui/screens/logs.mjs +1 -5
- package/tui/screens/settings-screen-helpers.mjs +75 -0
- package/tui/screens/settings.mjs +397 -0
- package/tui/screens/status.mjs +126 -4
- package/tui/screens/telemetry-screen-helpers.mjs +158 -0
- package/tui/screens/telemetry.mjs +246 -0
- package/tui/screens/workflows.mjs +984 -0
- package/ui/app.js +450 -183
- package/ui/assets/toastui-editor-all.min.js +24 -0
- package/ui/components/agent-selector.js +706 -49
- package/ui/components/charts.js +16 -12
- package/ui/components/chat-view.js +536 -35
- package/ui/components/context-menu.js +89 -0
- package/ui/components/diff-viewer.js +83 -34
- package/ui/components/kanban-board.js +513 -94
- package/ui/components/session-list.js +292 -65
- package/ui/components/task-markdown.js +272 -0
- package/ui/components/workspace-switcher.js +11 -2
- package/ui/demo-defaults.js +13806 -3965
- package/ui/demo.html +817 -3
- package/ui/index.html +32 -6
- package/ui/modules/agent-events.js +309 -36
- package/ui/modules/api.js +236 -13
- package/ui/modules/chat-turn-groups.js +101 -0
- package/ui/modules/harness-client.js +56 -0
- package/ui/modules/icons.js +18 -2
- package/ui/modules/router.js +2 -0
- package/ui/modules/session-api.js +158 -14
- package/ui/modules/session-insights-worker.js +28 -0
- package/ui/modules/session-insights.js +173 -4
- package/ui/modules/session-surface.js +221 -0
- package/ui/modules/settings-schema.js +146 -26
- package/ui/modules/state.js +56 -3
- package/ui/modules/streaming.js +196 -60
- package/ui/modules/structured-values.js +47 -0
- package/ui/modules/task-hierarchy.js +374 -0
- package/ui/modules/worktree-recovery.js +10 -1
- package/ui/setup.html +174 -6
- package/ui/styles/components.css +982 -130
- package/ui/styles/kanban.css +229 -0
- package/ui/styles/layout.css +404 -144
- package/ui/styles/toastui-editor-dark.css +1 -0
- package/ui/styles/toastui-editor-viewer.css +6 -0
- package/ui/styles/toastui-editor.css +6 -0
- package/ui/styles/variables.css +14 -2
- package/ui/styles/workspace-switcher.css +22 -0
- package/ui/styles.css +19 -0
- package/ui/tabs/agents.js +1032 -96
- package/ui/tabs/chat.js +588 -146
- package/ui/tabs/context-compression-lab.js +962 -0
- package/ui/tabs/control.js +121 -0
- package/ui/tabs/dashboard.js +302 -86
- package/ui/tabs/guardrails.js +1140 -0
- package/ui/tabs/integrations.js +388 -0
- package/ui/tabs/library.js +19 -4
- package/ui/tabs/manual-flows.js +268 -52
- package/ui/tabs/settings.js +1604 -104
- package/ui/tabs/tasks.js +2544 -300
- package/ui/tabs/telemetry.js +102 -6
- package/ui/tabs/workflow-canvas-utils.mjs +172 -15
- package/ui/tabs/workflows.js +2421 -196
- package/ui/tui/App.js +117 -117
- package/ui/tui/HelpScreen.js +201 -0
- package/ui/tui/SettingsScreen.js +388 -0
- package/ui/tui/TasksScreen.js +25 -11
- package/ui/tui/TelemetryScreen.js +155 -0
- package/ui/tui/WorkflowsScreen.js +350 -0
- package/ui/tui/config-events.js +13 -0
- package/ui/tui/constants.js +1 -1
- package/ui/tui/tasks-screen-helpers.js +52 -0
- package/ui/tui/telemetry-helpers.js +158 -0
- package/ui/tui/useTasks.js +1 -6
- package/ui/tui/useWebSocket.js +1 -7
- package/ui/tui/useWorkflows.js +120 -5
- package/ui/tui/workflows-screen-helpers.js +220 -0
- package/utils.mjs +9 -22
- package/voice/vision-session-state.mjs +257 -0
- package/voice/voice-action-dispatcher.mjs +57 -12
- package/voice/voice-agents-sdk.mjs +1 -1
- package/voice/voice-auth-manager.mjs +102 -123
- package/voice/voice-tool-definitions.mjs +7 -7
- package/voice/voice-tools.mjs +837 -101
- package/workflow/action-approval.mjs +415 -0
- package/workflow/approval-queue.mjs +1254 -0
- package/workflow/credential-store.mjs +553 -0
- package/workflow/cron-scheduler.mjs +512 -0
- package/workflow/declarative-workflows.mjs +21 -2
- package/workflow/delegation-runtime.mjs +557 -0
- package/workflow/execution-ledger.mjs +1317 -33
- package/workflow/harness-approval-node.mjs +237 -0
- package/workflow/harness-output-contract.mjs +86 -0
- package/workflow/harness-session-node.mjs +160 -0
- package/workflow/harness-subagent-node.mjs +483 -0
- package/workflow/harness-tool-node.mjs +176 -0
- package/workflow/heavy-runner-pool.mjs +71 -4
- package/workflow/manual-flows.mjs +969 -28
- package/workflow/mcp-discovery-proxy.mjs +349 -137
- package/workflow/mcp-registry.mjs +331 -27
- package/workflow/meeting-workflow-service.mjs +24 -12
- package/workflow/pipeline-workflows.mjs +44 -2
- package/workflow/pipeline.mjs +72 -28
- package/workflow/project-detection.mjs +31 -6
- package/workflow/research-evidence-sidecar.mjs +1246 -0
- package/workflow/run-evaluator.mjs +2155 -0
- package/workflow/workflow-cli.mjs +229 -2
- package/workflow/workflow-contract.mjs +130 -2
- package/workflow/workflow-engine.mjs +5520 -298
- package/workflow/workflow-nodes/actions.mjs +15526 -0
- package/workflow/workflow-nodes/agent.mjs +1863 -0
- package/workflow/workflow-nodes/conditions.mjs +307 -0
- package/workflow/workflow-nodes/definitions.mjs +176 -15
- package/workflow/workflow-nodes/flow.mjs +749 -0
- package/workflow/workflow-nodes/loop.mjs +449 -0
- package/workflow/workflow-nodes/meetings.mjs +456 -0
- package/workflow/workflow-nodes/notifications.mjs +169 -0
- package/workflow/workflow-nodes/transforms.mjs +50 -24
- package/workflow/workflow-nodes/triggers.mjs +1405 -0
- package/workflow/workflow-nodes/validation.mjs +722 -0
- package/workflow/workflow-nodes.mjs +38 -15781
- package/workflow/workflow-serializer.mjs +294 -0
- package/workflow/workflow-templates.mjs +197 -12
- package/workflow-templates/_helpers.mjs +1 -3
- package/workflow-templates/agents.mjs +235 -27
- package/workflow-templates/bosun-native.mjs +3 -1
- package/workflow-templates/code-quality.mjs +1 -217
- package/workflow-templates/continuation-loop.mjs +22 -7
- package/workflow-templates/coverage.mjs +6 -2
- package/workflow-templates/github.mjs +2263 -136
- package/workflow-templates/planning.mjs +1 -22
- package/workflow-templates/reliability.mjs +383 -12
- package/workflow-templates/research-evidence.mjs +389 -0
- package/workflow-templates/security.mjs +75 -62
- package/workflow-templates/sub-workflows.mjs +11 -3
- package/workflow-templates/task-batch.mjs +100 -10
- package/workflow-templates/task-lifecycle.mjs +313 -49
- package/workspace/context-cache.mjs +934 -102
- package/workspace/context-indexer.mjs +915 -9
- package/workspace/context-injector.mjs +144 -0
- package/workspace/execution-journal.mjs +255 -0
- package/workspace/scope-locks.mjs +481 -0
- package/workspace/shared-knowledge.mjs +496 -91
- package/workspace/shared-state-manager.mjs +344 -1
- package/workspace/skillbook-store.mjs +681 -0
- package/workspace/workspace-manager.mjs +14 -8
- package/workspace/workspace-monitor.mjs +3 -3
- package/workspace/worktree-manager.mjs +54 -12
- package/workspace/worktree-setup.mjs +634 -43
- package/agent/rotate-agent-logs.sh +0 -134
|
@@ -0,0 +1,1246 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import { existsSync, mkdirSync, readFileSync, statSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { readdir } from "node:fs/promises";
|
|
4
|
+
import { basename, extname, isAbsolute, relative, resolve } from "node:path";
|
|
5
|
+
import { spawn, spawnSync } from "node:child_process";
|
|
6
|
+
import { fileURLToPath } from "node:url";
|
|
7
|
+
import { inflateSync } from "node:zlib";
|
|
8
|
+
import { getResearchToolBundle } from "./mcp-registry.mjs";
|
|
9
|
+
|
|
10
|
+
const SIDECAR_VERSION = "1.2.0";
|
|
11
|
+
const ARTIFACT_DIR = [".bosun", "research-evidence"];
|
|
12
|
+
const DEFAULT_MAX_SOURCES = 6;
|
|
13
|
+
const MAX_TEXT_FILE_BYTES = 256 * 1024;
|
|
14
|
+
const MAX_PDF_FILE_BYTES = 12 * 1024 * 1024;
|
|
15
|
+
const MAX_LOCAL_FILES = 32;
|
|
16
|
+
const MAX_RUNTIME_LOG_BYTES = 256 * 1024;
|
|
17
|
+
const DEFAULT_RUNTIME_LOG_FILES = Object.freeze([
|
|
18
|
+
".bosun/logs/monitor.log",
|
|
19
|
+
".bosun/logs/error.log",
|
|
20
|
+
".bosun/logs/daemon.log",
|
|
21
|
+
]);
|
|
22
|
+
const TEXT_FILE_EXTENSIONS = new Set([
|
|
23
|
+
".md",
|
|
24
|
+
".markdown",
|
|
25
|
+
".txt",
|
|
26
|
+
".rst",
|
|
27
|
+
".json",
|
|
28
|
+
".jsonl",
|
|
29
|
+
".yaml",
|
|
30
|
+
".yml",
|
|
31
|
+
".csv",
|
|
32
|
+
".tsv",
|
|
33
|
+
".html",
|
|
34
|
+
".htm",
|
|
35
|
+
".xml",
|
|
36
|
+
".mjs",
|
|
37
|
+
".js",
|
|
38
|
+
".cjs",
|
|
39
|
+
".ts",
|
|
40
|
+
".tsx",
|
|
41
|
+
".jsx",
|
|
42
|
+
".py",
|
|
43
|
+
".go",
|
|
44
|
+
".rs",
|
|
45
|
+
".java",
|
|
46
|
+
".c",
|
|
47
|
+
".cc",
|
|
48
|
+
".cpp",
|
|
49
|
+
".h",
|
|
50
|
+
".hpp",
|
|
51
|
+
".sql",
|
|
52
|
+
".sh",
|
|
53
|
+
]);
|
|
54
|
+
const PDF_FILE_EXTENSIONS = new Set([".pdf"]);
|
|
55
|
+
const PDFTOTEXT_COMMAND = "pdftotext";
|
|
56
|
+
|
|
57
|
+
function normalizeString(value) {
|
|
58
|
+
return String(value ?? "").trim();
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function truncate(text, maxLength = 320) {
|
|
62
|
+
const normalized = normalizeString(text).replace(/\s+/g, " ");
|
|
63
|
+
if (normalized.length <= maxLength) return normalized;
|
|
64
|
+
return `${normalized.slice(0, Math.max(0, maxLength - 1)).trimEnd()}…`;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function dedupeStrings(values = []) {
|
|
68
|
+
const out = [];
|
|
69
|
+
const seen = new Set();
|
|
70
|
+
for (const value of values) {
|
|
71
|
+
const normalized = normalizeString(value);
|
|
72
|
+
if (!normalized) continue;
|
|
73
|
+
const key = normalized.toLowerCase();
|
|
74
|
+
if (seen.has(key)) continue;
|
|
75
|
+
seen.add(key);
|
|
76
|
+
out.push(normalized);
|
|
77
|
+
}
|
|
78
|
+
return out;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function parseBoolean(value, fallback = false) {
|
|
82
|
+
if (typeof value === "boolean") return value;
|
|
83
|
+
const normalized = normalizeString(value).toLowerCase();
|
|
84
|
+
if (!normalized) return fallback;
|
|
85
|
+
if (["1", "true", "yes", "on", "y"].includes(normalized)) return true;
|
|
86
|
+
if (["0", "false", "no", "off", "n"].includes(normalized)) return false;
|
|
87
|
+
return fallback;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function parseInteger(value, fallback, min = 1, max = 50) {
|
|
91
|
+
const parsed = Number(value);
|
|
92
|
+
if (!Number.isFinite(parsed)) return fallback;
|
|
93
|
+
return Math.max(min, Math.min(max, Math.floor(parsed)));
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function decodePdfLiteralString(value) {
|
|
97
|
+
let out = "";
|
|
98
|
+
for (let index = 0; index < value.length; index += 1) {
|
|
99
|
+
const char = value[index];
|
|
100
|
+
if (char !== "\\") {
|
|
101
|
+
out += char;
|
|
102
|
+
continue;
|
|
103
|
+
}
|
|
104
|
+
const next = value[index + 1];
|
|
105
|
+
if (next == null) break;
|
|
106
|
+
if (/[0-7]/.test(next)) {
|
|
107
|
+
let octal = next;
|
|
108
|
+
for (let offset = 2; offset <= 3; offset += 1) {
|
|
109
|
+
const candidate = value[index + offset];
|
|
110
|
+
if (candidate == null || !/[0-7]/.test(candidate)) break;
|
|
111
|
+
octal += candidate;
|
|
112
|
+
}
|
|
113
|
+
out += String.fromCharCode(Number.parseInt(octal, 8));
|
|
114
|
+
index += octal.length;
|
|
115
|
+
continue;
|
|
116
|
+
}
|
|
117
|
+
switch (next) {
|
|
118
|
+
case "n":
|
|
119
|
+
out += "\n";
|
|
120
|
+
break;
|
|
121
|
+
case "r":
|
|
122
|
+
out += "\r";
|
|
123
|
+
break;
|
|
124
|
+
case "t":
|
|
125
|
+
out += "\t";
|
|
126
|
+
break;
|
|
127
|
+
case "b":
|
|
128
|
+
out += "\b";
|
|
129
|
+
break;
|
|
130
|
+
case "f":
|
|
131
|
+
out += "\f";
|
|
132
|
+
break;
|
|
133
|
+
case "(":
|
|
134
|
+
case ")":
|
|
135
|
+
case "\\":
|
|
136
|
+
out += next;
|
|
137
|
+
break;
|
|
138
|
+
case "\r":
|
|
139
|
+
if (value[index + 2] === "\n") index += 1;
|
|
140
|
+
break;
|
|
141
|
+
case "\n":
|
|
142
|
+
break;
|
|
143
|
+
default:
|
|
144
|
+
out += next;
|
|
145
|
+
break;
|
|
146
|
+
}
|
|
147
|
+
index += 1;
|
|
148
|
+
}
|
|
149
|
+
return out;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
function decodePdfHexString(value) {
|
|
153
|
+
const normalized = value.replace(/\s+/g, "");
|
|
154
|
+
if (!normalized) return "";
|
|
155
|
+
const even = normalized.length % 2 === 0 ? normalized : `${normalized}0`;
|
|
156
|
+
const bytes = [];
|
|
157
|
+
for (let index = 0; index < even.length; index += 2) {
|
|
158
|
+
const byte = Number.parseInt(even.slice(index, index + 2), 16);
|
|
159
|
+
if (Number.isFinite(byte)) bytes.push(byte);
|
|
160
|
+
}
|
|
161
|
+
return Buffer.from(bytes).toString("latin1");
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
function extractTextFromPdfOperators(content) {
|
|
165
|
+
const tokens = [];
|
|
166
|
+
const pushToken = (value) => {
|
|
167
|
+
const normalized = normalizeString(value).replace(/\s+/g, " ");
|
|
168
|
+
if (normalized) tokens.push(normalized);
|
|
169
|
+
};
|
|
170
|
+
|
|
171
|
+
const literalPattern = /\((?:\\.|[^\\()])*\)\s*Tj\b/g;
|
|
172
|
+
for (const match of content.matchAll(literalPattern)) {
|
|
173
|
+
pushToken(decodePdfLiteralString(match[0].replace(/\)\s*Tj\b$/, "").slice(1)));
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
const hexPattern = /<([0-9a-fA-F\s]+)>\s*Tj\b/g;
|
|
177
|
+
for (const match of content.matchAll(hexPattern)) {
|
|
178
|
+
pushToken(decodePdfHexString(match[1]));
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
const arrayPattern = /\[((?:.|\r|\n)*?)\]\s*TJ\b/g;
|
|
182
|
+
for (const match of content.matchAll(arrayPattern)) {
|
|
183
|
+
const arrayContent = match[1];
|
|
184
|
+
const fragments = [];
|
|
185
|
+
for (const entry of arrayContent.matchAll(/\((?:\\.|[^\\()])*\)|<([0-9a-fA-F\s]+)>/g)) {
|
|
186
|
+
if (entry[0].startsWith("(")) {
|
|
187
|
+
fragments.push(decodePdfLiteralString(entry[0].slice(1, -1)));
|
|
188
|
+
} else {
|
|
189
|
+
fragments.push(decodePdfHexString(entry[1] || ""));
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
pushToken(fragments.join(""));
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
return tokens.join("\n");
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
export function extractPdfTextHeuristically(buffer) {
|
|
199
|
+
const binary = buffer.toString("latin1");
|
|
200
|
+
const pageMatches = binary.match(/\/Type\s*\/Page\b/g);
|
|
201
|
+
const pageCount = pageMatches?.length || 0;
|
|
202
|
+
const texts = [];
|
|
203
|
+
const streamPattern = /stream\r?\n([\s\S]*?)endstream/g;
|
|
204
|
+
let match;
|
|
205
|
+
while ((match = streamPattern.exec(binary))) {
|
|
206
|
+
const streamStart = match.index + match[0].indexOf(match[1]);
|
|
207
|
+
const streamEnd = streamStart + match[1].length;
|
|
208
|
+
let streamBuffer = buffer.subarray(streamStart, streamEnd);
|
|
209
|
+
const dictPreview = binary.slice(Math.max(0, match.index - 256), match.index);
|
|
210
|
+
if (/\/FlateDecode\b/.test(dictPreview)) {
|
|
211
|
+
try {
|
|
212
|
+
streamBuffer = inflateSync(streamBuffer);
|
|
213
|
+
} catch {
|
|
214
|
+
continue;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
const extracted = extractTextFromPdfOperators(streamBuffer.toString("latin1"));
|
|
218
|
+
if (normalizeString(extracted)) texts.push(extracted);
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
const text = normalizeString(texts.join("\n")).replace(/\s+/g, " ").trim();
|
|
222
|
+
if (!text) {
|
|
223
|
+
throw new Error("No extractable text operators found in PDF content streams.");
|
|
224
|
+
}
|
|
225
|
+
return {
|
|
226
|
+
text,
|
|
227
|
+
pageCount: Math.max(1, pageCount),
|
|
228
|
+
ingestionMethod: "pdf-inline-parser",
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
export function extractPdfText(filePath) {
|
|
233
|
+
const pdfBytes = readFileSync(filePath);
|
|
234
|
+
try {
|
|
235
|
+
const result = spawnSync(PDFTOTEXT_COMMAND, ["-q", "-enc", "UTF-8", filePath, "-"], {
|
|
236
|
+
encoding: "utf8",
|
|
237
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
238
|
+
windowsHide: true,
|
|
239
|
+
});
|
|
240
|
+
const stdout = normalizeString(result.stdout || "").replace(/\s+/g, " ").trim();
|
|
241
|
+
if (result.status === 0 && stdout) {
|
|
242
|
+
const binary = pdfBytes.toString("latin1");
|
|
243
|
+
const pageCount = Math.max(1, binary.match(/\/Type\s*\/Page\b/g)?.length || 0);
|
|
244
|
+
return {
|
|
245
|
+
text: stdout,
|
|
246
|
+
pageCount,
|
|
247
|
+
ingestionMethod: "pdftotext",
|
|
248
|
+
};
|
|
249
|
+
}
|
|
250
|
+
} catch {
|
|
251
|
+
// Fall through to the inline parser when Poppler is unavailable.
|
|
252
|
+
}
|
|
253
|
+
return extractPdfTextHeuristically(pdfBytes);
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
function tokenize(text) {
|
|
257
|
+
return Array.from(
|
|
258
|
+
new Set(
|
|
259
|
+
normalizeString(text)
|
|
260
|
+
.toLowerCase()
|
|
261
|
+
.split(/[^a-z0-9]+/i)
|
|
262
|
+
.map((token) => token.trim())
|
|
263
|
+
.filter((token) => token.length >= 3),
|
|
264
|
+
),
|
|
265
|
+
);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
function scoreEvidenceCandidate(problemTokens, candidateText) {
|
|
269
|
+
const haystack = new Set(tokenize(candidateText));
|
|
270
|
+
let score = 0;
|
|
271
|
+
for (const token of problemTokens) {
|
|
272
|
+
if (haystack.has(token)) score += 1;
|
|
273
|
+
}
|
|
274
|
+
return score;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
function slugify(text) {
|
|
278
|
+
return normalizeString(text)
|
|
279
|
+
.toLowerCase()
|
|
280
|
+
.replace(/[^a-z0-9]+/g, "-")
|
|
281
|
+
.replace(/^-+|-+$/g, "")
|
|
282
|
+
.slice(0, 48) || "research";
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
function makeSourceId(seed) {
|
|
286
|
+
return createHash("sha1").update(seed).digest("hex").slice(0, 12);
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
function toArtifactDir(repoRoot) {
|
|
290
|
+
return resolve(repoRoot, ...ARTIFACT_DIR);
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
function normalizeRepoRelativePath(filePath) {
|
|
294
|
+
return normalizeString(filePath).replace(/\\/g, "/");
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
function asArray(value) {
|
|
298
|
+
if (Array.isArray(value)) return value;
|
|
299
|
+
if (value == null || value === "") return [];
|
|
300
|
+
return [value];
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
function parseCorpusPaths(value) {
|
|
304
|
+
if (Array.isArray(value)) {
|
|
305
|
+
return dedupeStrings(
|
|
306
|
+
value.flatMap((entry) => parseCorpusPaths(entry)),
|
|
307
|
+
);
|
|
308
|
+
}
|
|
309
|
+
return dedupeStrings(
|
|
310
|
+
normalizeString(value)
|
|
311
|
+
.split(/[\r\n,;]+/)
|
|
312
|
+
.map((entry) => entry.trim()),
|
|
313
|
+
);
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
function parsePathList(value) {
|
|
317
|
+
return parseCorpusPaths(value);
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
function normalizeLiteratureResult(raw, index) {
|
|
321
|
+
if (!raw || typeof raw !== "object") return null;
|
|
322
|
+
const title = normalizeString(raw.title || raw.name || `Web Result ${index + 1}`);
|
|
323
|
+
const url = normalizeString(raw.url || raw.link || raw.href);
|
|
324
|
+
const snippet = normalizeString(raw.snippet || raw.description || raw.body || raw.text);
|
|
325
|
+
return {
|
|
326
|
+
id: `web-${makeSourceId(`${title}|${url}|${snippet}`)}`,
|
|
327
|
+
title,
|
|
328
|
+
citation: title,
|
|
329
|
+
locator: url || null,
|
|
330
|
+
origin: "literature-search",
|
|
331
|
+
excerpt: truncate(snippet, 420),
|
|
332
|
+
score: 0,
|
|
333
|
+
metadata: {
|
|
334
|
+
rank: index + 1,
|
|
335
|
+
domain: normalizeString(raw.domain || ""),
|
|
336
|
+
},
|
|
337
|
+
};
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
function normalizeExternalSource(raw, index) {
|
|
341
|
+
if (!raw || typeof raw !== "object") return null;
|
|
342
|
+
const title = normalizeString(raw.title || raw.citation || `External Source ${index + 1}`);
|
|
343
|
+
const locator = normalizeString(raw.locator || raw.url || raw.path);
|
|
344
|
+
const excerpt = truncate(raw.excerpt || raw.summary || raw.text || raw.snippet, 420);
|
|
345
|
+
return {
|
|
346
|
+
id: normalizeString(raw.id) || `external-${makeSourceId(`${title}|${locator}|${excerpt}`)}`,
|
|
347
|
+
title,
|
|
348
|
+
citation: normalizeString(raw.citation) || title,
|
|
349
|
+
locator: locator || null,
|
|
350
|
+
origin: normalizeString(raw.origin || "external-sidecar"),
|
|
351
|
+
excerpt,
|
|
352
|
+
score: Number.isFinite(Number(raw.score)) ? Number(raw.score) : 0,
|
|
353
|
+
metadata: raw.metadata && typeof raw.metadata === "object" ? raw.metadata : {},
|
|
354
|
+
};
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
async function walkCandidateFiles(targetPath, collected, warnings, limit = MAX_LOCAL_FILES) {
|
|
358
|
+
if (collected.length >= limit) return;
|
|
359
|
+
if (!existsSync(targetPath)) {
|
|
360
|
+
warnings.push(`Corpus path not found: ${targetPath}`);
|
|
361
|
+
return;
|
|
362
|
+
}
|
|
363
|
+
const stats = statSync(targetPath);
|
|
364
|
+
if (stats.isDirectory()) {
|
|
365
|
+
const names = await readdir(targetPath);
|
|
366
|
+
for (const name of names.sort()) {
|
|
367
|
+
if (collected.length >= limit) break;
|
|
368
|
+
await walkCandidateFiles(resolve(targetPath, name), collected, warnings, limit);
|
|
369
|
+
}
|
|
370
|
+
return;
|
|
371
|
+
}
|
|
372
|
+
if (!stats.isFile()) return;
|
|
373
|
+
const extension = extname(targetPath).toLowerCase();
|
|
374
|
+
const isTextFile = TEXT_FILE_EXTENSIONS.has(extension);
|
|
375
|
+
const isPdfFile = PDF_FILE_EXTENSIONS.has(extension);
|
|
376
|
+
if (!isTextFile && !isPdfFile) {
|
|
377
|
+
warnings.push(`Skipped non-text corpus file: ${targetPath}`);
|
|
378
|
+
return;
|
|
379
|
+
}
|
|
380
|
+
const byteLimit = isPdfFile ? MAX_PDF_FILE_BYTES : MAX_TEXT_FILE_BYTES;
|
|
381
|
+
if (stats.size > byteLimit) {
|
|
382
|
+
warnings.push(`Skipped large corpus file (${stats.size} bytes): ${targetPath}`);
|
|
383
|
+
return;
|
|
384
|
+
}
|
|
385
|
+
collected.push(targetPath);
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
function readTextExcerpt(filePath) {
|
|
389
|
+
const raw = readFileSync(filePath, "utf8");
|
|
390
|
+
const normalized = raw.replace(/\0/g, "").trim();
|
|
391
|
+
return truncate(normalized, 1200);
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
function readTailUtf8(filePath, maxBytes = MAX_RUNTIME_LOG_BYTES) {
|
|
395
|
+
const buffer = readFileSync(filePath);
|
|
396
|
+
if (buffer.length <= maxBytes) {
|
|
397
|
+
return buffer.toString("utf8").replace(/\0/g, "");
|
|
398
|
+
}
|
|
399
|
+
return buffer.subarray(buffer.length - maxBytes).toString("utf8").replace(/\0/g, "");
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
function collectMatchedLogLines(text, problemTokens, maxLines = 8) {
|
|
403
|
+
const lines = String(text || "")
|
|
404
|
+
.split(/\r?\n/)
|
|
405
|
+
.map((line) => line.trim())
|
|
406
|
+
.filter(Boolean);
|
|
407
|
+
if (lines.length === 0) return [];
|
|
408
|
+
const matched = [];
|
|
409
|
+
for (const line of lines) {
|
|
410
|
+
const haystack = line.toLowerCase();
|
|
411
|
+
const hits = problemTokens.filter((token) => haystack.includes(token));
|
|
412
|
+
if (hits.length === 0) continue;
|
|
413
|
+
matched.push({
|
|
414
|
+
line,
|
|
415
|
+
hits,
|
|
416
|
+
score: hits.length,
|
|
417
|
+
});
|
|
418
|
+
}
|
|
419
|
+
matched.sort((left, right) => right.score - left.score || left.line.localeCompare(right.line));
|
|
420
|
+
const excerptLines = matched.slice(0, maxLines).map((entry) => entry.line);
|
|
421
|
+
return dedupeStrings(excerptLines);
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
function resolveContentType(extension) {
|
|
425
|
+
if (PDF_FILE_EXTENSIONS.has(extension)) return "application/pdf";
|
|
426
|
+
if (extension === ".json") return "application/json";
|
|
427
|
+
if (extension === ".jsonl") return "application/x-ndjson";
|
|
428
|
+
if (extension === ".yaml" || extension === ".yml") return "application/yaml";
|
|
429
|
+
if (extension === ".html" || extension === ".htm") return "text/html";
|
|
430
|
+
if (extension === ".xml") return "application/xml";
|
|
431
|
+
if (extension === ".csv") return "text/csv";
|
|
432
|
+
if (extension === ".tsv") return "text/tab-separated-values";
|
|
433
|
+
return "text/plain";
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
function readCorpusSource(filePath) {
|
|
437
|
+
const stats = statSync(filePath);
|
|
438
|
+
const extension = extname(filePath).toLowerCase();
|
|
439
|
+
const baseMetadata = {
|
|
440
|
+
fileExtension: extension,
|
|
441
|
+
fileSizeBytes: stats.size,
|
|
442
|
+
contentType: resolveContentType(extension),
|
|
443
|
+
};
|
|
444
|
+
if (PDF_FILE_EXTENSIONS.has(extension)) {
|
|
445
|
+
const extraction = extractPdfText(filePath);
|
|
446
|
+
return {
|
|
447
|
+
excerpt: truncate(extraction.text, 1200),
|
|
448
|
+
metadata: {
|
|
449
|
+
...baseMetadata,
|
|
450
|
+
sourceKind: "pdf",
|
|
451
|
+
ingestionMethod: extraction.ingestionMethod,
|
|
452
|
+
pageCount: extraction.pageCount,
|
|
453
|
+
extractedCharacters: extraction.text.length,
|
|
454
|
+
},
|
|
455
|
+
};
|
|
456
|
+
}
|
|
457
|
+
return {
|
|
458
|
+
excerpt: readTextExcerpt(filePath),
|
|
459
|
+
metadata: {
|
|
460
|
+
...baseMetadata,
|
|
461
|
+
sourceKind: "text",
|
|
462
|
+
ingestionMethod: "utf8-read",
|
|
463
|
+
},
|
|
464
|
+
};
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
async function collectCorpusSources(config) {
|
|
468
|
+
const warnings = [];
|
|
469
|
+
const files = [];
|
|
470
|
+
for (const rawPath of config.corpusPaths) {
|
|
471
|
+
const absolutePath = isAbsolute(rawPath) ? rawPath : resolve(config.repoRoot, rawPath);
|
|
472
|
+
await walkCandidateFiles(absolutePath, files, warnings);
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
const problemTokens = tokenize(config.problem);
|
|
476
|
+
const sources = [];
|
|
477
|
+
for (const filePath of files) {
|
|
478
|
+
try {
|
|
479
|
+
const { excerpt, metadata } = readCorpusSource(filePath);
|
|
480
|
+
const relPath = normalizeRepoRelativePath(relative(config.repoRoot, filePath) || basename(filePath));
|
|
481
|
+
sources.push({
|
|
482
|
+
id: `corpus-${makeSourceId(`${filePath}|${excerpt}`)}`,
|
|
483
|
+
title: relPath,
|
|
484
|
+
citation: relPath,
|
|
485
|
+
locator: filePath,
|
|
486
|
+
origin: "corpus",
|
|
487
|
+
excerpt,
|
|
488
|
+
score: scoreEvidenceCandidate(problemTokens, `${relPath}\n${excerpt}`),
|
|
489
|
+
metadata: {
|
|
490
|
+
relativePath: relPath,
|
|
491
|
+
...metadata,
|
|
492
|
+
},
|
|
493
|
+
});
|
|
494
|
+
} catch (error) {
|
|
495
|
+
warnings.push(`Failed to read corpus file ${filePath}: ${error.message}`);
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
return { sources, warnings };
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
async function collectRuntimeLogSources(config) {
|
|
503
|
+
if (config.includeRuntimeLogEvidence !== true) {
|
|
504
|
+
return { sources: [], warnings: [] };
|
|
505
|
+
}
|
|
506
|
+
const warnings = [];
|
|
507
|
+
const problemTokens = tokenize(config.problem);
|
|
508
|
+
const candidatePaths = dedupeStrings(
|
|
509
|
+
(config.runtimeLogPaths.length > 0 ? config.runtimeLogPaths : DEFAULT_RUNTIME_LOG_FILES)
|
|
510
|
+
.map((rawPath) => (isAbsolute(rawPath) ? rawPath : resolve(config.repoRoot, rawPath))),
|
|
511
|
+
);
|
|
512
|
+
const sources = [];
|
|
513
|
+
for (const filePath of candidatePaths) {
|
|
514
|
+
if (!existsSync(filePath)) continue;
|
|
515
|
+
try {
|
|
516
|
+
const stats = statSync(filePath);
|
|
517
|
+
if (!stats.isFile()) continue;
|
|
518
|
+
const logText = readTailUtf8(filePath, MAX_RUNTIME_LOG_BYTES);
|
|
519
|
+
const excerptLines = collectMatchedLogLines(logText, problemTokens);
|
|
520
|
+
if (excerptLines.length === 0) continue;
|
|
521
|
+
const relPath = normalizeRepoRelativePath(relative(config.repoRoot, filePath) || basename(filePath));
|
|
522
|
+
const excerpt = truncate(excerptLines.join("\n"), 1200);
|
|
523
|
+
sources.push({
|
|
524
|
+
id: `runtime-log-${makeSourceId(`${filePath}|${excerpt}`)}`,
|
|
525
|
+
title: relPath,
|
|
526
|
+
citation: relPath,
|
|
527
|
+
locator: filePath,
|
|
528
|
+
origin: "runtime-log",
|
|
529
|
+
excerpt,
|
|
530
|
+
score: scoreEvidenceCandidate(problemTokens, `${relPath}\n${excerpt}`),
|
|
531
|
+
metadata: {
|
|
532
|
+
relativePath: relPath,
|
|
533
|
+
sourceKind: "runtime-log",
|
|
534
|
+
contentType: "text/plain",
|
|
535
|
+
ingestionMethod: "tail-line-query",
|
|
536
|
+
fileSizeBytes: stats.size,
|
|
537
|
+
matchedLineCount: excerptLines.length,
|
|
538
|
+
},
|
|
539
|
+
});
|
|
540
|
+
} catch (error) {
|
|
541
|
+
warnings.push(`Failed to read runtime log ${filePath}: ${error.message}`);
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
return { sources, warnings };
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
function buildAnalysisPromptHint(mode) {
|
|
548
|
+
switch (mode) {
|
|
549
|
+
case "contradictions":
|
|
550
|
+
return "Prioritize conflicting findings, disputed assumptions, and explicit disagreement between sources.";
|
|
551
|
+
case "summarize":
|
|
552
|
+
return "Prioritize concise evidence synthesis with citations rather than exploratory speculation.";
|
|
553
|
+
case "evidence-only":
|
|
554
|
+
return "Return an evidence inventory and uncertainty summary without inventing unsupported conclusions.";
|
|
555
|
+
case "answer":
|
|
556
|
+
default:
|
|
557
|
+
return "Ground the answer in the strongest cited evidence and state uncertainty when support is incomplete.";
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
function intersection(left = [], right = []) {
|
|
562
|
+
const rightSet = new Set(right);
|
|
563
|
+
return left.filter((value) => rightSet.has(value));
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
function splitIntoSentences(text) {
|
|
567
|
+
return normalizeString(text)
|
|
568
|
+
.split(/(?<=[.!?])\s+/)
|
|
569
|
+
.map((sentence) => normalizeString(sentence))
|
|
570
|
+
.filter(Boolean);
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
function classifySupportStrength(score, overlapCount) {
|
|
574
|
+
const strengthScore = Math.max(Number(score) || 0, overlapCount || 0);
|
|
575
|
+
if (strengthScore >= 5) return "strong";
|
|
576
|
+
if (strengthScore >= 3) return "moderate";
|
|
577
|
+
return "limited";
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
function chooseRepresentativeSentence(source, problemTokens) {
|
|
581
|
+
const sentences = splitIntoSentences(source.excerpt);
|
|
582
|
+
if (sentences.length === 0) {
|
|
583
|
+
return {
|
|
584
|
+
sentence: truncate(source.excerpt, 220),
|
|
585
|
+
overlapTokens: [],
|
|
586
|
+
};
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
let bestSentence = sentences[0];
|
|
590
|
+
let bestOverlap = intersection(problemTokens, tokenize(sentences[0]));
|
|
591
|
+
let bestScore = bestOverlap.length;
|
|
592
|
+
for (const sentence of sentences.slice(1)) {
|
|
593
|
+
const overlapTokens = intersection(problemTokens, tokenize(sentence));
|
|
594
|
+
const overlapScore = overlapTokens.length;
|
|
595
|
+
if (overlapScore > bestScore || (overlapScore === bestScore && sentence.length > bestSentence.length)) {
|
|
596
|
+
bestSentence = sentence;
|
|
597
|
+
bestOverlap = overlapTokens;
|
|
598
|
+
bestScore = overlapScore;
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
return {
|
|
603
|
+
sentence: truncate(bestSentence, 220),
|
|
604
|
+
overlapTokens: bestOverlap,
|
|
605
|
+
};
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
function buildSourceHighlights(sources, problemTokens) {
|
|
609
|
+
return sources.map((source, index) => {
|
|
610
|
+
const citationKey = `[E${index + 1}]`;
|
|
611
|
+
const representative = chooseRepresentativeSentence(source, problemTokens);
|
|
612
|
+
return {
|
|
613
|
+
citationKey,
|
|
614
|
+
finding: representative.sentence,
|
|
615
|
+
supportStrength: classifySupportStrength(source.score, representative.overlapTokens.length),
|
|
616
|
+
overlapTokens: representative.overlapTokens,
|
|
617
|
+
title: source.title,
|
|
618
|
+
};
|
|
619
|
+
});
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
const SUPPORT_PATTERNS = [
|
|
623
|
+
/\bimprov(?:e|es|ed|ing)\b/i,
|
|
624
|
+
/\beffective\b/i,
|
|
625
|
+
/\bbenefit(?:s|ed|ing)?\b/i,
|
|
626
|
+
/\brecommend(?:ed|s|ing)?\b/i,
|
|
627
|
+
/\bsupport(?:s|ed|ing)?\b/i,
|
|
628
|
+
/\breduc(?:e|es|ed|ing)\b/i,
|
|
629
|
+
/\blower(?:s|ed|ing)?\b/i,
|
|
630
|
+
/\boutperform(?:s|ed|ing)?\b/i,
|
|
631
|
+
];
|
|
632
|
+
|
|
633
|
+
const REFUTE_PATTERNS = [
|
|
634
|
+
/\b(?:does not|do not|did not|no)\s+\w+/i,
|
|
635
|
+
/\bineffective\b/i,
|
|
636
|
+
/\bnot recommended\b/i,
|
|
637
|
+
/\bno evidence\b/i,
|
|
638
|
+
/\bfail(?:s|ed|ing)?\s+to\b/i,
|
|
639
|
+
/\bdoes not support\b/i,
|
|
640
|
+
/\bnot support(?:ed|ing)?\b/i,
|
|
641
|
+
/\bworsen(?:s|ed|ing)?\b/i,
|
|
642
|
+
/\bincreas(?:e|es|ed|ing)\b.*\brisk\b/i,
|
|
643
|
+
/\bmay increase\b/i,
|
|
644
|
+
];
|
|
645
|
+
|
|
646
|
+
const UNCERTAINTY_PATTERNS = [
|
|
647
|
+
/\buncertain\b/i,
|
|
648
|
+
/\bmixed\b/i,
|
|
649
|
+
/\blimited\b/i,
|
|
650
|
+
/\bpreliminary\b/i,
|
|
651
|
+
/\binconclusive\b/i,
|
|
652
|
+
/\bmay\b/i,
|
|
653
|
+
/\bmight\b/i,
|
|
654
|
+
];
|
|
655
|
+
|
|
656
|
+
function scorePatternMatches(text, patterns) {
|
|
657
|
+
return patterns.reduce((score, pattern) => score + (pattern.test(text) ? 1 : 0), 0);
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
function classifyEvidenceStance(text) {
|
|
661
|
+
const normalized = normalizeString(text);
|
|
662
|
+
const supportScore = scorePatternMatches(normalized, SUPPORT_PATTERNS);
|
|
663
|
+
const refuteScore = scorePatternMatches(normalized, REFUTE_PATTERNS);
|
|
664
|
+
const uncertaintyScore = scorePatternMatches(normalized, UNCERTAINTY_PATTERNS);
|
|
665
|
+
|
|
666
|
+
if (supportScore > refuteScore && supportScore > 0) return "support";
|
|
667
|
+
if (refuteScore > supportScore && refuteScore > 0) return "refute";
|
|
668
|
+
if (uncertaintyScore > 0) return "uncertain";
|
|
669
|
+
return "neutral";
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
function detectEvidenceConflicts(sources, problemTokens) {
|
|
673
|
+
const conflicts = [];
|
|
674
|
+
for (let leftIndex = 0; leftIndex < sources.length; leftIndex += 1) {
|
|
675
|
+
for (let rightIndex = leftIndex + 1; rightIndex < sources.length; rightIndex += 1) {
|
|
676
|
+
const left = sources[leftIndex];
|
|
677
|
+
const right = sources[rightIndex];
|
|
678
|
+
const leftTokens = tokenize(`${left.title}\n${left.excerpt}`);
|
|
679
|
+
const rightTokens = tokenize(`${right.title}\n${right.excerpt}`);
|
|
680
|
+
const topicTokens = intersection(problemTokens, intersection(leftTokens, rightTokens)).slice(0, 4);
|
|
681
|
+
if (topicTokens.length === 0) continue;
|
|
682
|
+
|
|
683
|
+
const leftStance = classifyEvidenceStance(left.excerpt);
|
|
684
|
+
const rightStance = classifyEvidenceStance(right.excerpt);
|
|
685
|
+
if (!((leftStance === "support" && rightStance === "refute") || (leftStance === "refute" && rightStance === "support"))) {
|
|
686
|
+
continue;
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
conflicts.push({
|
|
690
|
+
left: left.citationKey || `[E${leftIndex + 1}]`,
|
|
691
|
+
right: right.citationKey || `[E${rightIndex + 1}]`,
|
|
692
|
+
reason: `Sources disagree about ${topicTokens.join(", ")}.`,
|
|
693
|
+
severity: topicTokens.length >= 2 ? "high" : "medium",
|
|
694
|
+
topicTokens,
|
|
695
|
+
});
|
|
696
|
+
}
|
|
697
|
+
}
|
|
698
|
+
return conflicts;
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
function computeEvidenceCoverage(problemTokens, sources) {
|
|
702
|
+
if (problemTokens.length === 0) {
|
|
703
|
+
return { coveredTokens: [], missingTokens: [], coverageRatio: 1 };
|
|
704
|
+
}
|
|
705
|
+
const covered = new Set();
|
|
706
|
+
for (const source of sources) {
|
|
707
|
+
for (const token of tokenize(`${source.title}\n${source.excerpt}`)) {
|
|
708
|
+
if (problemTokens.includes(token)) covered.add(token);
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
const coveredTokens = problemTokens.filter((token) => covered.has(token));
|
|
712
|
+
return {
|
|
713
|
+
coveredTokens,
|
|
714
|
+
missingTokens: problemTokens.filter((token) => !covered.has(token)),
|
|
715
|
+
coverageRatio: coveredTokens.length / problemTokens.length,
|
|
716
|
+
};
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
function buildUncertaintySummary({ sources, conflicts, coverage }) {
|
|
720
|
+
const clauses = [];
|
|
721
|
+
if (sources.length === 0) {
|
|
722
|
+
clauses.push("No retained evidence sources were available for synthesis.");
|
|
723
|
+
} else if (sources.length === 1) {
|
|
724
|
+
clauses.push("Only one retained source was available, so cross-source corroboration is weak.");
|
|
725
|
+
}
|
|
726
|
+
if ((coverage?.coverageRatio || 0) < 0.45) {
|
|
727
|
+
clauses.push("The retained evidence only partially overlaps the problem statement.");
|
|
728
|
+
}
|
|
729
|
+
if (conflicts.length > 0) {
|
|
730
|
+
clauses.push("Some retained sources point in different directions and need careful verification.");
|
|
731
|
+
}
|
|
732
|
+
return clauses.join(" ").trim();
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
function buildLocalReviewHints({ sources, conflicts, coverage, warnings }) {
|
|
736
|
+
const hints = [];
|
|
737
|
+
if (sources.length === 0) {
|
|
738
|
+
hints.push("No retained evidence sources are available. The answer should stay at limitations and next-data-needed only.");
|
|
739
|
+
} else if (sources.length === 1) {
|
|
740
|
+
hints.push("Only one retained evidence source is available. Treat any conclusion as provisional until corroborated.");
|
|
741
|
+
}
|
|
742
|
+
if ((coverage?.coverageRatio || 0) < 0.45) {
|
|
743
|
+
hints.push(
|
|
744
|
+
`Problem-token coverage is weak (${coverage.coveredTokens.length}/${coverage.coveredTokens.length + coverage.missingTokens.length}). Check for unsupported leaps and note the missing direct evidence.`,
|
|
745
|
+
);
|
|
746
|
+
}
|
|
747
|
+
if (sources.every((source) => (Number(source.score) || 0) < 2)) {
|
|
748
|
+
hints.push("The retained evidence is indirect relative to the question. Avoid strong causal claims.");
|
|
749
|
+
}
|
|
750
|
+
if (conflicts.length > 0) {
|
|
751
|
+
hints.push(`Conflicting evidence detected across ${conflicts.map((conflict) => `${conflict.left}/${conflict.right}`).join(", ")}. Verify the disagreement explicitly before promoting findings.`);
|
|
752
|
+
}
|
|
753
|
+
if (warnings.length > 0) {
|
|
754
|
+
hints.push(`Collection warnings: ${warnings.join(" | ")}`);
|
|
755
|
+
}
|
|
756
|
+
return hints.join(" ").trim();
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
function summarizeEvidenceSupport({ problem, mode, highlights, conflicts, uncertaintySummary }) {
|
|
760
|
+
if (highlights.length === 0) {
|
|
761
|
+
return `The retained evidence does not yet answer "${problem}" directly. ${uncertaintySummary}`.trim();
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
const topHighlights = highlights.slice(0, 3);
|
|
765
|
+
const renderHighlight = (highlight) => `${highlight.finding} ${highlight.citationKey}`.trim();
|
|
766
|
+
|
|
767
|
+
switch (mode) {
|
|
768
|
+
case "contradictions": {
|
|
769
|
+
const opening = conflicts.length > 0
|
|
770
|
+
? `The retained evidence is mixed for "${problem}". ${renderHighlight(topHighlights[0])}`
|
|
771
|
+
: `The retained evidence does not show a strong contradiction for "${problem}", but it remains mixed. ${renderHighlight(topHighlights[0])}`;
|
|
772
|
+
const contrast = topHighlights[1]
|
|
773
|
+
? ` In contrast, ${renderHighlight(topHighlights[1]).replace(/^[A-Z]/, (match) => match.toLowerCase())}`
|
|
774
|
+
: "";
|
|
775
|
+
const conflictLine = conflicts[0] ? ` ${conflicts[0].left} and ${conflicts[0].right} disagree: ${conflicts[0].reason}` : "";
|
|
776
|
+
return `${opening}${contrast}${conflictLine}${uncertaintySummary ? ` ${uncertaintySummary}` : ""}`.trim();
|
|
777
|
+
}
|
|
778
|
+
case "summarize":
|
|
779
|
+
return `Across the retained evidence for "${problem}", ${topHighlights.map((highlight) => renderHighlight(highlight).replace(/^[A-Z]/, (match) => match.toLowerCase())).join("; ")}.${uncertaintySummary ? ` ${uncertaintySummary}` : ""}`.trim();
|
|
780
|
+
case "evidence-only":
|
|
781
|
+
return `Evidence inventory for "${problem}": ${topHighlights.map((highlight) => `${highlight.citationKey} ${highlight.finding}`).join("; ")}.${uncertaintySummary ? ` ${uncertaintySummary}` : ""}`.trim();
|
|
782
|
+
case "answer":
|
|
783
|
+
default: {
|
|
784
|
+
const supporting = topHighlights.slice(1).map((highlight) => renderHighlight(highlight).replace(/^[A-Z]/, (match) => match.toLowerCase()));
|
|
785
|
+
const supportLine = supporting.length > 0 ? ` Additional retained evidence indicates ${supporting.join("; ")}.` : "";
|
|
786
|
+
return `The strongest retained evidence for "${problem}" indicates that ${renderHighlight(topHighlights[0]).replace(/^[A-Z]/, (match) => match.toLowerCase())}.${supportLine}${uncertaintySummary ? ` ${uncertaintySummary}` : ""}`.trim();
|
|
787
|
+
}
|
|
788
|
+
}
|
|
789
|
+
}
|
|
790
|
+
|
|
791
|
+
function buildEvidenceBrief(bundle) {
|
|
792
|
+
const lines = [
|
|
793
|
+
`Mode: ${bundle.mode}`,
|
|
794
|
+
`Evidence sources retained: ${bundle.sources.length}`,
|
|
795
|
+
];
|
|
796
|
+
if (bundle.toolBundleBrief) {
|
|
797
|
+
lines.push(`Tool bundle: ${bundle.toolBundleBrief}`);
|
|
798
|
+
}
|
|
799
|
+
if (bundle.warnings.length > 0) {
|
|
800
|
+
lines.push(`Warnings: ${bundle.warnings.join(" | ")}`);
|
|
801
|
+
}
|
|
802
|
+
lines.push("");
|
|
803
|
+
for (const [index, source] of bundle.sources.entries()) {
|
|
804
|
+
const locator = source.locator ? ` (${source.locator})` : "";
|
|
805
|
+
lines.push(`[E${index + 1}] ${source.citation}${locator}`);
|
|
806
|
+
lines.push(`Excerpt: ${truncate(source.excerpt, 260)}`);
|
|
807
|
+
}
|
|
808
|
+
return lines.join("\n").trim();
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
function summarizeToolBundle(bundle) {
|
|
812
|
+
if (!bundle) return "";
|
|
813
|
+
const recommended = Array.isArray(bundle.recommendedCatalogEntries)
|
|
814
|
+
? bundle.recommendedCatalogEntries.map((entry) => entry.id)
|
|
815
|
+
: [];
|
|
816
|
+
const optional = Array.isArray(bundle.optionalCatalogEntries)
|
|
817
|
+
? bundle.optionalCatalogEntries.map((entry) => entry.id)
|
|
818
|
+
: [];
|
|
819
|
+
const nativeCaps = Array.isArray(bundle.nativeCapabilities)
|
|
820
|
+
? bundle.nativeCapabilities.map((entry) => entry.id)
|
|
821
|
+
: [];
|
|
822
|
+
return [
|
|
823
|
+
`${bundle.name} (${bundle.id})`,
|
|
824
|
+
recommended.length > 0 ? `recommended MCP: ${recommended.join(", ")}` : "",
|
|
825
|
+
optional.length > 0 ? `optional MCP: ${optional.join(", ")}` : "",
|
|
826
|
+
nativeCaps.length > 0 ? `native: ${nativeCaps.join(", ")}` : "",
|
|
827
|
+
].filter(Boolean).join(" | ");
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
function normalizeExternalBundle(raw, config) {
|
|
831
|
+
if (!raw || typeof raw !== "object") return null;
|
|
832
|
+
const externalSources = asArray(raw.sources)
|
|
833
|
+
.map((entry, index) => normalizeExternalSource(entry, index))
|
|
834
|
+
.filter(Boolean);
|
|
835
|
+
return {
|
|
836
|
+
mode: normalizeString(raw.mode) || config.evidenceMode,
|
|
837
|
+
summary: normalizeString(raw.summary || raw.answer || ""),
|
|
838
|
+
citations: dedupeStrings(asArray(raw.citations)),
|
|
839
|
+
reviewHints: normalizeString(raw.reviewHints || raw.verificationHints || ""),
|
|
840
|
+
warnings: dedupeStrings(asArray(raw.warnings)),
|
|
841
|
+
sources: externalSources,
|
|
842
|
+
conflicts: Array.isArray(raw.conflicts) ? raw.conflicts : [],
|
|
843
|
+
highlights: Array.isArray(raw.highlights) ? raw.highlights : [],
|
|
844
|
+
uncertaintySummary: normalizeString(raw.uncertaintySummary),
|
|
845
|
+
metrics: raw.metrics && typeof raw.metrics === "object" ? raw.metrics : {},
|
|
846
|
+
raw,
|
|
847
|
+
};
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
function mergeEvidenceBundles(localBundle, externalBundle, config) {
|
|
851
|
+
const merged = [];
|
|
852
|
+
const seen = new Set();
|
|
853
|
+
for (const source of [...(externalBundle?.sources || []), ...(localBundle.sources || [])]) {
|
|
854
|
+
if (!source) continue;
|
|
855
|
+
const key = `${normalizeString(source.citation).toLowerCase()}|${normalizeString(source.locator).toLowerCase()}|${normalizeString(source.excerpt).toLowerCase()}`;
|
|
856
|
+
if (seen.has(key)) continue;
|
|
857
|
+
seen.add(key);
|
|
858
|
+
merged.push(source);
|
|
859
|
+
}
|
|
860
|
+
|
|
861
|
+
const problemTokens = tokenize(config.problem);
|
|
862
|
+
for (const source of merged) {
|
|
863
|
+
const intrinsic = scoreEvidenceCandidate(problemTokens, `${source.title}\n${source.excerpt}`);
|
|
864
|
+
source.score = Math.max(Number(source.score) || 0, intrinsic);
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
merged.sort((left, right) => {
|
|
868
|
+
if ((right.score || 0) !== (left.score || 0)) return (right.score || 0) - (left.score || 0);
|
|
869
|
+
return normalizeString(left.citation).localeCompare(normalizeString(right.citation));
|
|
870
|
+
});
|
|
871
|
+
|
|
872
|
+
const sources = merged.slice(0, config.maxEvidenceSources).map((source, index) => ({
|
|
873
|
+
...source,
|
|
874
|
+
citationKey: `[E${index + 1}]`,
|
|
875
|
+
}));
|
|
876
|
+
const coverage = computeEvidenceCoverage(problemTokens, sources);
|
|
877
|
+
const localHighlights = buildSourceHighlights(sources, problemTokens);
|
|
878
|
+
const localConflicts = detectEvidenceConflicts(sources, problemTokens);
|
|
879
|
+
const localUncertaintySummary = buildUncertaintySummary({
|
|
880
|
+
sources,
|
|
881
|
+
conflicts: localConflicts,
|
|
882
|
+
coverage,
|
|
883
|
+
});
|
|
884
|
+
const localReviewHints = buildLocalReviewHints({
|
|
885
|
+
sources,
|
|
886
|
+
conflicts: localConflicts,
|
|
887
|
+
coverage,
|
|
888
|
+
warnings: dedupeStrings([
|
|
889
|
+
...(localBundle.warnings || []),
|
|
890
|
+
...(externalBundle?.warnings || []),
|
|
891
|
+
]),
|
|
892
|
+
});
|
|
893
|
+
const localSummary = summarizeEvidenceSupport({
|
|
894
|
+
problem: config.problem,
|
|
895
|
+
mode: config.evidenceMode,
|
|
896
|
+
highlights: localHighlights,
|
|
897
|
+
conflicts: localConflicts,
|
|
898
|
+
uncertaintySummary: localUncertaintySummary,
|
|
899
|
+
});
|
|
900
|
+
return {
|
|
901
|
+
schemaVersion: 1,
|
|
902
|
+
sidecarVersion: SIDECAR_VERSION,
|
|
903
|
+
problem: config.problem,
|
|
904
|
+
domain: config.domain,
|
|
905
|
+
mode: config.evidenceMode,
|
|
906
|
+
analysisPromptHint: buildAnalysisPromptHint(config.evidenceMode),
|
|
907
|
+
summary: normalizeString(externalBundle?.summary) || localSummary,
|
|
908
|
+
citations: dedupeStrings([
|
|
909
|
+
...(externalBundle?.citations || []),
|
|
910
|
+
...sources.map((source, index) => `[E${index + 1}] ${source.citation}`),
|
|
911
|
+
]),
|
|
912
|
+
reviewHints: normalizeString(externalBundle?.reviewHints) || localReviewHints,
|
|
913
|
+
warnings: dedupeStrings([
|
|
914
|
+
...(localBundle.warnings || []),
|
|
915
|
+
...(externalBundle?.warnings || []),
|
|
916
|
+
]),
|
|
917
|
+
sources,
|
|
918
|
+
highlights: externalBundle?.highlights?.length ? externalBundle.highlights : localHighlights,
|
|
919
|
+
conflicts: externalBundle?.conflicts?.length ? externalBundle.conflicts : localConflicts,
|
|
920
|
+
uncertaintySummary: normalizeString(externalBundle?.uncertaintySummary) || localUncertaintySummary,
|
|
921
|
+
metrics: {
|
|
922
|
+
literatureSearchSourceCount: localBundle.metrics.literatureSearchSourceCount || 0,
|
|
923
|
+
corpusSourceCount: localBundle.metrics.corpusSourceCount || 0,
|
|
924
|
+
runtimeLogSourceCount: localBundle.metrics.runtimeLogSourceCount || 0,
|
|
925
|
+
retainedSourceCount: sources.length,
|
|
926
|
+
delegationUsed: externalBundle != null,
|
|
927
|
+
unsupportedCorpusCount: (localBundle.metrics.unsupportedCorpusCount || 0),
|
|
928
|
+
externalMetrics: externalBundle?.metrics || {},
|
|
929
|
+
problemTokenCoverageRatio: Number(coverage.coverageRatio.toFixed(3)),
|
|
930
|
+
coveredProblemTokenCount: coverage.coveredTokens.length,
|
|
931
|
+
conflictingSourcePairs: (externalBundle?.conflicts?.length || localConflicts.length),
|
|
932
|
+
},
|
|
933
|
+
};
|
|
934
|
+
}
|
|
935
|
+
|
|
936
|
+
function splitCommandArguments(command) {
|
|
937
|
+
const tokens = [];
|
|
938
|
+
let current = "";
|
|
939
|
+
let quote = null;
|
|
940
|
+
for (let index = 0; index < command.length; index += 1) {
|
|
941
|
+
const char = command[index];
|
|
942
|
+
if (quote) {
|
|
943
|
+
if (char === quote) {
|
|
944
|
+
quote = null;
|
|
945
|
+
continue;
|
|
946
|
+
}
|
|
947
|
+
if (char === "\\" && quote === '"' && index + 1 < command.length) {
|
|
948
|
+
const next = command[index + 1];
|
|
949
|
+
if (next === '"' || next === "\\") {
|
|
950
|
+
current += next;
|
|
951
|
+
index += 1;
|
|
952
|
+
continue;
|
|
953
|
+
}
|
|
954
|
+
}
|
|
955
|
+
current += char;
|
|
956
|
+
continue;
|
|
957
|
+
}
|
|
958
|
+
if (char === '"' || char === "'") {
|
|
959
|
+
quote = char;
|
|
960
|
+
continue;
|
|
961
|
+
}
|
|
962
|
+
if (/\s/.test(char)) {
|
|
963
|
+
if (current) {
|
|
964
|
+
tokens.push(current);
|
|
965
|
+
current = "";
|
|
966
|
+
}
|
|
967
|
+
continue;
|
|
968
|
+
}
|
|
969
|
+
current += char;
|
|
970
|
+
}
|
|
971
|
+
if (quote) return null;
|
|
972
|
+
if (current) tokens.push(current);
|
|
973
|
+
return tokens;
|
|
974
|
+
}
|
|
975
|
+
|
|
976
|
+
function commandRequiresShell(command, tokens) {
|
|
977
|
+
if (!tokens?.length) return true;
|
|
978
|
+
if (/[|&;<>()`]/.test(command)) return true;
|
|
979
|
+
const executable = normalizeString(tokens[0]).toLowerCase();
|
|
980
|
+
return executable.endsWith(".cmd") || executable.endsWith(".bat");
|
|
981
|
+
}
|
|
982
|
+
|
|
983
|
+
async function runExternalResearchEvidenceSidecar(config) {
|
|
984
|
+
const command = normalizeString(config.sidecarCommand);
|
|
985
|
+
if (!command) return null;
|
|
986
|
+
return new Promise((resolvePromise) => {
|
|
987
|
+
const tokens = splitCommandArguments(command);
|
|
988
|
+
const useShell = commandRequiresShell(command, tokens);
|
|
989
|
+
const child = spawn(
|
|
990
|
+
useShell
|
|
991
|
+
? (process.platform === "win32" ? "cmd.exe" : "sh")
|
|
992
|
+
: tokens[0],
|
|
993
|
+
useShell
|
|
994
|
+
? (process.platform === "win32" ? ["/d", "/s", "/c", command] : ["-lc", command])
|
|
995
|
+
: tokens.slice(1),
|
|
996
|
+
{
|
|
997
|
+
cwd: config.repoRoot,
|
|
998
|
+
env: {
|
|
999
|
+
...process.env,
|
|
1000
|
+
BOSUN_RESEARCH_SIDECAR_MODE: config.evidenceMode,
|
|
1001
|
+
BOSUN_RESEARCH_SIDECAR_DOMAIN: config.domain,
|
|
1002
|
+
},
|
|
1003
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
1004
|
+
windowsHide: true,
|
|
1005
|
+
},
|
|
1006
|
+
);
|
|
1007
|
+
|
|
1008
|
+
let stdout = "";
|
|
1009
|
+
let stderr = "";
|
|
1010
|
+
child.stdout.on("data", (chunk) => {
|
|
1011
|
+
stdout += String(chunk);
|
|
1012
|
+
});
|
|
1013
|
+
child.stderr.on("data", (chunk) => {
|
|
1014
|
+
stderr += String(chunk);
|
|
1015
|
+
});
|
|
1016
|
+
child.on("error", (error) => {
|
|
1017
|
+
resolvePromise({
|
|
1018
|
+
success: false,
|
|
1019
|
+
error: error.message,
|
|
1020
|
+
stdout,
|
|
1021
|
+
stderr,
|
|
1022
|
+
});
|
|
1023
|
+
});
|
|
1024
|
+
child.on("close", (exitCode) => {
|
|
1025
|
+
let parsed = null;
|
|
1026
|
+
try {
|
|
1027
|
+
parsed = JSON.parse(stdout.trim() || "{}");
|
|
1028
|
+
} catch {
|
|
1029
|
+
parsed = null;
|
|
1030
|
+
}
|
|
1031
|
+
resolvePromise({
|
|
1032
|
+
success: exitCode === 0 && parsed != null,
|
|
1033
|
+
exitCode,
|
|
1034
|
+
parsed,
|
|
1035
|
+
stdout: truncate(stdout, 1600),
|
|
1036
|
+
stderr: truncate(stderr, 1200),
|
|
1037
|
+
error: exitCode === 0 ? null : `External sidecar exited with code ${exitCode}`,
|
|
1038
|
+
});
|
|
1039
|
+
});
|
|
1040
|
+
child.stdin.end(JSON.stringify({
|
|
1041
|
+
schemaVersion: 1,
|
|
1042
|
+
problem: config.problem,
|
|
1043
|
+
domain: config.domain,
|
|
1044
|
+
evidenceMode: config.evidenceMode,
|
|
1045
|
+
maxEvidenceSources: config.maxEvidenceSources,
|
|
1046
|
+
corpusPaths: config.corpusPaths,
|
|
1047
|
+
literatureResults: config.literatureResults,
|
|
1048
|
+
repoRoot: config.repoRoot,
|
|
1049
|
+
}));
|
|
1050
|
+
});
|
|
1051
|
+
}
|
|
1052
|
+
|
|
1053
|
+
function writeArtifact(repoRoot, problem, payload) {
|
|
1054
|
+
const artifactDir = toArtifactDir(repoRoot);
|
|
1055
|
+
mkdirSync(artifactDir, { recursive: true });
|
|
1056
|
+
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
|
|
1057
|
+
const artifactPath = resolve(artifactDir, `${timestamp}-${slugify(problem)}.json`);
|
|
1058
|
+
writeFileSync(artifactPath, JSON.stringify(payload, null, 2) + "\n", "utf8");
|
|
1059
|
+
return artifactPath;
|
|
1060
|
+
}
|
|
1061
|
+
|
|
1062
|
+
export function resolveResearchEvidenceSidecarConfig(input = {}) {
|
|
1063
|
+
const repoRoot = resolve(normalizeString(input.repoRoot) || process.cwd());
|
|
1064
|
+
const problem = normalizeString(input.problem || input.question);
|
|
1065
|
+
if (!problem) {
|
|
1066
|
+
throw new Error("Research evidence sidecar requires a problem statement.");
|
|
1067
|
+
}
|
|
1068
|
+
|
|
1069
|
+
return {
|
|
1070
|
+
repoRoot,
|
|
1071
|
+
workspaceId: normalizeString(input.workspaceId),
|
|
1072
|
+
domain: normalizeString(input.domain || "computer-science") || "computer-science",
|
|
1073
|
+
problem,
|
|
1074
|
+
researchToolBundleId: normalizeString(input.researchToolBundleId || input.toolBundleId || "scientific-evidence") || "scientific-evidence",
|
|
1075
|
+
evidenceMode: normalizeString(input.evidenceMode || input.mode || "answer") || "answer",
|
|
1076
|
+
maxEvidenceSources: parseInteger(input.maxEvidenceSources || input.maxSources, DEFAULT_MAX_SOURCES, 1, 20),
|
|
1077
|
+
searchLiterature: parseBoolean(input.searchLiterature, true),
|
|
1078
|
+
promoteReviewedFindings: parseBoolean(input.promoteReviewedFindings, true),
|
|
1079
|
+
includeRuntimeLogEvidence: parseBoolean(input.includeRuntimeLogEvidence, true),
|
|
1080
|
+
corpusPaths: parseCorpusPaths(input.corpusPaths),
|
|
1081
|
+
runtimeLogPaths: parsePathList(input.runtimeLogPaths),
|
|
1082
|
+
literatureResults: asArray(input.literatureResults),
|
|
1083
|
+
triggerSource: normalizeString(input.triggerSource || "manual"),
|
|
1084
|
+
sidecarCommand: normalizeString(input.sidecarCommand),
|
|
1085
|
+
};
|
|
1086
|
+
}
|
|
1087
|
+
|
|
1088
|
+
export async function runResearchEvidenceSidecar(input = {}) {
|
|
1089
|
+
const config = resolveResearchEvidenceSidecarConfig(input);
|
|
1090
|
+
const researchToolBundle = getResearchToolBundle(config.researchToolBundleId, {
|
|
1091
|
+
includeExperimental: true,
|
|
1092
|
+
});
|
|
1093
|
+
const literatureSources = config.searchLiterature
|
|
1094
|
+
? config.literatureResults.map((entry, index) => normalizeLiteratureResult(entry, index)).filter(Boolean)
|
|
1095
|
+
: [];
|
|
1096
|
+
const { sources: corpusSources, warnings: corpusWarnings } = await collectCorpusSources(config);
|
|
1097
|
+
const { sources: runtimeLogSources, warnings: runtimeLogWarnings } = await collectRuntimeLogSources(config);
|
|
1098
|
+
|
|
1099
|
+
const localBundle = {
|
|
1100
|
+
sources: [...literatureSources, ...corpusSources, ...runtimeLogSources],
|
|
1101
|
+
warnings: [...corpusWarnings, ...runtimeLogWarnings],
|
|
1102
|
+
metrics: {
|
|
1103
|
+
literatureSearchSourceCount: literatureSources.length,
|
|
1104
|
+
corpusSourceCount: corpusSources.length,
|
|
1105
|
+
runtimeLogSourceCount: runtimeLogSources.length,
|
|
1106
|
+
unsupportedCorpusCount: corpusWarnings.filter((warning) => warning.startsWith("Skipped")).length,
|
|
1107
|
+
},
|
|
1108
|
+
};
|
|
1109
|
+
|
|
1110
|
+
let externalResult = null;
|
|
1111
|
+
let externalBundle = null;
|
|
1112
|
+
if (config.sidecarCommand) {
|
|
1113
|
+
externalResult = await runExternalResearchEvidenceSidecar(config);
|
|
1114
|
+
if (externalResult?.success && externalResult.parsed) {
|
|
1115
|
+
externalBundle = normalizeExternalBundle(externalResult.parsed, config);
|
|
1116
|
+
} else if (externalResult) {
|
|
1117
|
+
localBundle.warnings.push(
|
|
1118
|
+
`External sidecar fallback engaged: ${externalResult.error || externalResult.stderr || "unknown delegation error"}`,
|
|
1119
|
+
);
|
|
1120
|
+
}
|
|
1121
|
+
}
|
|
1122
|
+
|
|
1123
|
+
const bundle = mergeEvidenceBundles(localBundle, externalBundle, config);
|
|
1124
|
+
bundle.researchToolBundle = researchToolBundle;
|
|
1125
|
+
bundle.toolBundleBrief = summarizeToolBundle(researchToolBundle);
|
|
1126
|
+
bundle.summary = bundle.summary || summarizeEvidenceSupport({
|
|
1127
|
+
problem: config.problem,
|
|
1128
|
+
mode: config.evidenceMode,
|
|
1129
|
+
highlights: bundle.highlights || [],
|
|
1130
|
+
conflicts: bundle.conflicts || [],
|
|
1131
|
+
uncertaintySummary: bundle.uncertaintySummary || "",
|
|
1132
|
+
}) || buildEvidenceBrief(bundle);
|
|
1133
|
+
const artifactPayload = {
|
|
1134
|
+
schemaVersion: 1,
|
|
1135
|
+
createdAt: new Date().toISOString(),
|
|
1136
|
+
config,
|
|
1137
|
+
bundle,
|
|
1138
|
+
delegation: externalResult
|
|
1139
|
+
? {
|
|
1140
|
+
command: config.sidecarCommand,
|
|
1141
|
+
success: externalResult.success === true,
|
|
1142
|
+
exitCode: externalResult.exitCode ?? null,
|
|
1143
|
+
stderr: externalResult.stderr || "",
|
|
1144
|
+
}
|
|
1145
|
+
: null,
|
|
1146
|
+
};
|
|
1147
|
+
const artifactPath = writeArtifact(config.repoRoot, config.problem, artifactPayload);
|
|
1148
|
+
|
|
1149
|
+
return {
|
|
1150
|
+
success: true,
|
|
1151
|
+
artifactPath,
|
|
1152
|
+
bundle,
|
|
1153
|
+
evidenceBrief: buildEvidenceBrief(bundle),
|
|
1154
|
+
citationsMarkdown: bundle.citations.join("\n"),
|
|
1155
|
+
researchToolBundle,
|
|
1156
|
+
toolBundleBrief: bundle.toolBundleBrief,
|
|
1157
|
+
delegation: artifactPayload.delegation,
|
|
1158
|
+
};
|
|
1159
|
+
}
|
|
1160
|
+
|
|
1161
|
+
function normalizeVerdict(value) {
|
|
1162
|
+
const text = normalizeString(value).toLowerCase();
|
|
1163
|
+
if (text.includes("critical")) return "critical";
|
|
1164
|
+
if (text.includes("minor")) return "minor";
|
|
1165
|
+
if (text.includes("correct")) return "correct";
|
|
1166
|
+
return text || "unknown";
|
|
1167
|
+
}
|
|
1168
|
+
|
|
1169
|
+
export function buildReviewedKnowledgeCandidate(input = {}) {
|
|
1170
|
+
const bundle = input.bundle && typeof input.bundle === "object" ? input.bundle : null;
|
|
1171
|
+
const verdict = normalizeVerdict(input.verdict || input.verifierOutput);
|
|
1172
|
+
if (verdict !== "correct") {
|
|
1173
|
+
return {
|
|
1174
|
+
success: false,
|
|
1175
|
+
promote: false,
|
|
1176
|
+
reason: `Reviewed finding promotion requires a correct verdict, received: ${verdict || "unknown"}`,
|
|
1177
|
+
};
|
|
1178
|
+
}
|
|
1179
|
+
|
|
1180
|
+
const problem = normalizeString(input.problem || bundle?.problem);
|
|
1181
|
+
const domain = normalizeString(input.domain || bundle?.domain || "computer-science");
|
|
1182
|
+
const finalAnswer = truncate(input.finalAnswer || input.answer || "", 900);
|
|
1183
|
+
const verifierOutput = truncate(input.verifierOutput || "", 280);
|
|
1184
|
+
const artifactPath = normalizeString(input.artifactPath);
|
|
1185
|
+
const citations = asArray(bundle?.citations).slice(0, 4);
|
|
1186
|
+
const topSources = asArray(bundle?.sources)
|
|
1187
|
+
.slice(0, 3)
|
|
1188
|
+
.map((source, index) => `[E${index + 1}] ${source.citation}${source.locator ? ` (${source.locator})` : ""}`);
|
|
1189
|
+
|
|
1190
|
+
const lines = [
|
|
1191
|
+
`Reviewed research finding in ${domain}: ${problem}`,
|
|
1192
|
+
`Conclusion: ${finalAnswer}`,
|
|
1193
|
+
];
|
|
1194
|
+
if (topSources.length > 0) {
|
|
1195
|
+
lines.push(`Evidence: ${topSources.join("; ")}`);
|
|
1196
|
+
}
|
|
1197
|
+
if (citations.length > 0) {
|
|
1198
|
+
lines.push(`Citation keys: ${citations.join(", ")}`);
|
|
1199
|
+
}
|
|
1200
|
+
if (verifierOutput) {
|
|
1201
|
+
lines.push(`Verifier: ${verifierOutput}`);
|
|
1202
|
+
}
|
|
1203
|
+
if (artifactPath) {
|
|
1204
|
+
lines.push(`Artifact: ${artifactPath}`);
|
|
1205
|
+
}
|
|
1206
|
+
|
|
1207
|
+
return {
|
|
1208
|
+
success: true,
|
|
1209
|
+
promote: true,
|
|
1210
|
+
content: truncate(lines.join("\n"), 1800),
|
|
1211
|
+
scope: `research/${domain}`,
|
|
1212
|
+
category: "tip",
|
|
1213
|
+
tags: dedupeStrings(["research", "evidence-sidecar", domain, normalizeString(bundle?.mode || input.evidenceMode)]),
|
|
1214
|
+
artifactPath,
|
|
1215
|
+
};
|
|
1216
|
+
}
|
|
1217
|
+
|
|
1218
|
+
function readCliInput() {
|
|
1219
|
+
if (process.env.BOSUN_RESEARCH_SIDECAR_INPUT) {
|
|
1220
|
+
return JSON.parse(process.env.BOSUN_RESEARCH_SIDECAR_INPUT);
|
|
1221
|
+
}
|
|
1222
|
+
const stdin = readFileSync(0, "utf8").trim();
|
|
1223
|
+
if (!stdin) return {};
|
|
1224
|
+
return JSON.parse(stdin);
|
|
1225
|
+
}
|
|
1226
|
+
|
|
1227
|
+
async function runCli() {
|
|
1228
|
+
const command = normalizeString(process.argv[2] || "run").toLowerCase();
|
|
1229
|
+
const input = readCliInput();
|
|
1230
|
+
if (command === "promote") {
|
|
1231
|
+
const result = buildReviewedKnowledgeCandidate(input);
|
|
1232
|
+
process.stdout.write(JSON.stringify(result));
|
|
1233
|
+
return;
|
|
1234
|
+
}
|
|
1235
|
+
const result = await runResearchEvidenceSidecar(input);
|
|
1236
|
+
process.stdout.write(JSON.stringify(result));
|
|
1237
|
+
}
|
|
1238
|
+
|
|
1239
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
1240
|
+
|
|
1241
|
+
if (process.argv[1] && resolve(process.argv[1]) === __filename) {
|
|
1242
|
+
runCli().catch((error) => {
|
|
1243
|
+
process.stderr.write(`${error.message}\n`);
|
|
1244
|
+
process.exitCode = 1;
|
|
1245
|
+
});
|
|
1246
|
+
}
|