@howlil/ez-agents 3.5.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +735 -537
- package/agents/ez-architect-agent.md +267 -0
- package/agents/ez-backend-agent.md +303 -0
- package/agents/ez-chief-strategist.md +271 -0
- package/agents/ez-codebase-mapper.md +770 -770
- package/agents/ez-context-manager.md +319 -0
- package/agents/ez-debugger.md +1255 -1255
- package/agents/ez-design-expert.md +347 -0
- package/agents/ez-devops-agent.md +331 -0
- package/agents/ez-executor.md +487 -487
- package/agents/ez-frontend-agent.md +322 -0
- package/agents/ez-phase-researcher.md +553 -553
- package/agents/ez-planner.md +1307 -1307
- package/agents/ez-product-engineer.md +435 -0
- package/agents/ez-project-researcher.md +629 -629
- package/agents/ez-qa-agent.md +320 -0
- package/agents/ez-release-agent.md +333 -333
- package/agents/ez-requirements-agent.md +377 -377
- package/agents/ez-roadmapper.md +650 -650
- package/agents/ez-technical-writer.md +551 -0
- package/agents/ez-ux-expert.md +393 -0
- package/agents/ez-verifier.md +579 -579
- package/bin/guards/autonomy-guard.cjs +346 -0
- package/bin/guards/context-budget-guard.cjs +278 -0
- package/bin/guards/hallucination-guard.cjs +380 -0
- package/bin/guards/hidden-state-guard.cjs +182 -0
- package/bin/guards/team-overhead-guard.cjs +266 -0
- package/bin/guards/tool-sprawl-guard.cjs +271 -0
- package/bin/lib/analytics/analytics-collector.cjs +86 -0
- package/bin/lib/analytics/analytics-reporter.cjs +130 -0
- package/bin/lib/analytics/cohort-analyzer.cjs +138 -0
- package/bin/lib/analytics/funnel-analyzer.cjs +147 -0
- package/bin/lib/analytics/nps-tracker.cjs +147 -0
- package/bin/lib/archetype-detector.cjs +289 -0
- package/bin/lib/assistant-adapter.cjs +361 -0
- package/bin/lib/audit-exec.cjs +175 -0
- package/bin/lib/auth.cjs +176 -0
- package/bin/lib/backup-service.cjs +422 -0
- package/bin/lib/bdd-validator.cjs +622 -0
- package/bin/lib/business-flow-mapper.cjs +429 -0
- package/bin/lib/circuit-breaker.cjs +276 -0
- package/bin/lib/code-complexity-analyzer.cjs +360 -0
- package/bin/lib/codebase-analyzer.cjs +241 -0
- package/bin/lib/commands.cjs +691 -0
- package/bin/lib/config.cjs +236 -0
- package/bin/lib/constraint-extractor.cjs +526 -0
- package/bin/lib/content-scanner.cjs +238 -0
- package/bin/lib/context-cache.cjs +154 -0
- package/bin/lib/context-compressor.cjs +102 -0
- package/bin/lib/context-deduplicator.cjs +105 -0
- package/bin/lib/context-errors.cjs +78 -0
- package/bin/lib/context-manager.cjs +338 -0
- package/bin/lib/context-metadata-tracker.cjs +140 -0
- package/bin/lib/context-relevance-scorer.cjs +99 -0
- package/bin/lib/core.cjs +507 -0
- package/bin/lib/cost-alerts.cjs +174 -0
- package/bin/lib/cost-tracker.cjs +275 -0
- package/bin/lib/crash-recovery.cjs +220 -0
- package/bin/lib/dependency-graph.cjs +319 -0
- package/bin/lib/deploy/deploy-audit-log.cjs +76 -0
- package/bin/lib/deploy/deploy-detector.cjs +69 -0
- package/bin/lib/deploy/deploy-env-manager.cjs +109 -0
- package/bin/lib/deploy/deploy-health-check.cjs +88 -0
- package/bin/lib/deploy/deploy-pre-flight.cjs +57 -0
- package/bin/lib/deploy/deploy-rollback.cjs +72 -0
- package/bin/lib/deploy/deploy-runner.cjs +97 -0
- package/bin/lib/deploy/deploy-status.cjs +74 -0
- package/bin/lib/discussion-synthesizer.cjs +439 -0
- package/bin/lib/error-cache.cjs +114 -0
- package/bin/lib/error-registry.cjs +177 -0
- package/bin/lib/file-access.cjs +207 -0
- package/bin/lib/file-lock.cjs +236 -0
- package/bin/lib/finops/budget-enforcer.cjs +126 -0
- package/bin/lib/finops/cost-reporter.cjs +132 -0
- package/bin/lib/finops/finops-analyzer.cjs +112 -0
- package/bin/lib/finops/spot-manager.cjs +118 -0
- package/bin/lib/framework-detector.cjs +396 -0
- package/bin/lib/frontmatter.cjs +313 -0
- package/bin/lib/fs-utils.cjs +153 -0
- package/bin/lib/gate-executor.cjs +272 -0
- package/bin/lib/gates/README.md +374 -0
- package/bin/lib/gates/gate-01-requirement.cjs +303 -0
- package/bin/lib/gates/gate-02-architecture.cjs +555 -0
- package/bin/lib/gates/gate-03-code.cjs +635 -0
- package/bin/lib/gates/gate-04-security.cjs +829 -0
- package/bin/lib/git-errors.cjs +83 -0
- package/bin/lib/git-utils.cjs +321 -0
- package/bin/lib/git-workflow-engine.cjs +1157 -0
- package/bin/lib/health-check.cjs +227 -0
- package/bin/lib/index.cjs +279 -0
- package/bin/lib/init.cjs +725 -0
- package/bin/lib/lock-logger.cjs +194 -0
- package/bin/lib/lock-state.cjs +263 -0
- package/bin/lib/lockfile-validator.cjs +227 -0
- package/bin/lib/log-rotation.cjs +71 -0
- package/bin/lib/logger.cjs +125 -0
- package/bin/lib/memory-compression.cjs +256 -0
- package/bin/lib/milestone.cjs +247 -0
- package/bin/lib/model-provider.cjs +241 -0
- package/bin/lib/package-manager-detector.cjs +203 -0
- package/bin/lib/package-manager-executor.cjs +385 -0
- package/bin/lib/package-manager-service.cjs +216 -0
- package/bin/lib/perf/api-monitor.cjs +88 -0
- package/bin/lib/perf/db-optimizer.cjs +78 -0
- package/bin/lib/perf/frontend-performance.cjs +56 -0
- package/bin/lib/perf/perf-analyzer.cjs +77 -0
- package/bin/lib/perf/perf-baseline.cjs +102 -0
- package/bin/lib/perf/perf-reporter.cjs +117 -0
- package/bin/lib/perf/regression-detector.cjs +92 -0
- package/bin/lib/phase.cjs +963 -0
- package/bin/lib/planning-write.cjs +123 -0
- package/bin/lib/project-reporter.cjs +565 -0
- package/bin/lib/quality-gate.cjs +332 -0
- package/bin/lib/quality-metrics.cjs +324 -0
- package/bin/lib/recovery-manager.cjs +98 -0
- package/bin/lib/release-validator.cjs +617 -0
- package/bin/lib/retry.cjs +119 -0
- package/bin/lib/roadmap.cjs +309 -0
- package/bin/lib/safe-exec.cjs +173 -0
- package/bin/lib/safe-path.cjs +130 -0
- package/bin/lib/security-errors.cjs +62 -0
- package/bin/lib/session-chain.cjs +304 -0
- package/bin/lib/session-errors.cjs +81 -0
- package/bin/lib/session-export.cjs +251 -0
- package/bin/lib/session-import.cjs +262 -0
- package/bin/lib/session-manager.cjs +280 -0
- package/bin/lib/skill-context.cjs +148 -0
- package/bin/lib/skill-matcher.cjs +236 -0
- package/bin/lib/skill-registry.cjs +360 -0
- package/bin/lib/skill-resolver.cjs +449 -0
- package/bin/lib/skill-triggers.cjs +90 -0
- package/bin/lib/skill-validator.cjs +270 -0
- package/bin/lib/skill-versioning.cjs +355 -0
- package/bin/lib/stack-detector.cjs +399 -0
- package/bin/lib/state.cjs +736 -0
- package/bin/lib/tech-debt-analyzer.cjs +309 -0
- package/bin/lib/temp-file.cjs +239 -0
- package/bin/lib/template.cjs +223 -0
- package/bin/lib/test-file-lock.cjs +112 -0
- package/bin/lib/test-graceful.cjs +93 -0
- package/bin/lib/test-logger.cjs +60 -0
- package/bin/lib/test-safe-exec.cjs +38 -0
- package/bin/lib/test-safe-path.cjs +33 -0
- package/bin/lib/test-temp-file.cjs +125 -0
- package/bin/lib/tier-manager.cjs +428 -0
- package/bin/lib/timeout-exec.cjs +63 -0
- package/bin/lib/tradeoff-analyzer.cjs +284 -0
- package/bin/lib/url-fetch.cjs +170 -0
- package/bin/lib/verify.cjs +863 -0
- package/bin/update.js +217 -214
- package/commands/deploy.cjs +53 -0
- package/commands/ez/add-tests.md +41 -41
- package/commands/ez/audit-milestone.md +36 -36
- package/commands/ez/complete-milestone.md +136 -136
- package/commands/ez/discuss-phase.md +90 -90
- package/commands/ez/execute-phase.md +52 -52
- package/commands/ez/help.md +22 -22
- package/commands/ez/map-codebase.md +71 -71
- package/commands/ez/new-milestone.md +44 -44
- package/commands/ez/new-project.md +51 -42
- package/commands/ez/plan-phase.md +53 -53
- package/commands/ez/progress.md +36 -36
- package/commands/ez/quick.md +45 -45
- package/commands/ez/resume-work.md +40 -40
- package/commands/ez/run-phase.md +580 -0
- package/commands/ez/settings.md +36 -36
- package/commands/ez/update.md +37 -37
- package/commands/ez/verify-work.md +402 -38
- package/commands/health-check.cjs +44 -0
- package/commands/rollback.cjs +47 -0
- package/ez-agents/bin/ez-tools.cjs +599 -2
- package/ez-agents/bin/guards/autonomy-guard.cjs +346 -0
- package/ez-agents/bin/guards/context-budget-guard.cjs +247 -0
- package/ez-agents/bin/guards/hallucination-guard.cjs +271 -0
- package/ez-agents/bin/guards/hidden-state-guard.cjs +182 -0
- package/ez-agents/bin/guards/team-overhead-guard.cjs +266 -0
- package/ez-agents/bin/guards/tool-sprawl-guard.cjs +271 -0
- package/ez-agents/bin/lib/analytics/analytics-collector.cjs +86 -0
- package/ez-agents/bin/lib/analytics/analytics-reporter.cjs +130 -0
- package/ez-agents/bin/lib/analytics/cohort-analyzer.cjs +138 -0
- package/ez-agents/bin/lib/analytics/funnel-analyzer.cjs +147 -0
- package/ez-agents/bin/lib/analytics/nps-tracker.cjs +147 -0
- package/ez-agents/bin/lib/archetype-detector.cjs +289 -0
- package/ez-agents/bin/lib/audit-exec.cjs +166 -167
- package/ez-agents/bin/lib/auth.cjs +176 -176
- package/ez-agents/bin/lib/backup-service.cjs +422 -0
- package/ez-agents/bin/lib/bdd-validator.cjs +622 -622
- package/ez-agents/bin/lib/business-flow-mapper.cjs +429 -0
- package/ez-agents/bin/lib/code-complexity-analyzer.cjs +360 -0
- package/ez-agents/bin/lib/codebase-analyzer.cjs +241 -0
- package/ez-agents/bin/lib/commands.cjs +685 -685
- package/ez-agents/bin/lib/config.cjs +41 -1
- package/ez-agents/bin/lib/constraint-extractor.cjs +526 -0
- package/ez-agents/bin/lib/content-scanner.cjs +238 -238
- package/ez-agents/bin/lib/context-cache.cjs +154 -154
- package/ez-agents/bin/lib/context-errors.cjs +71 -71
- package/ez-agents/bin/lib/context-manager.cjs +220 -220
- package/ez-agents/bin/lib/core.cjs +507 -512
- package/ez-agents/bin/lib/cost-tracker.cjs +243 -0
- package/ez-agents/bin/lib/crash-recovery.cjs +172 -0
- package/ez-agents/bin/lib/dependency-graph.cjs +319 -0
- package/ez-agents/bin/lib/deploy/deploy-audit-log.cjs +76 -0
- package/ez-agents/bin/lib/deploy/deploy-detector.cjs +69 -0
- package/ez-agents/bin/lib/deploy/deploy-env-manager.cjs +109 -0
- package/ez-agents/bin/lib/deploy/deploy-health-check.cjs +88 -0
- package/ez-agents/bin/lib/deploy/deploy-pre-flight.cjs +57 -0
- package/ez-agents/bin/lib/deploy/deploy-rollback.cjs +72 -0
- package/ez-agents/bin/lib/deploy/deploy-runner.cjs +97 -0
- package/ez-agents/bin/lib/deploy/deploy-status.cjs +74 -0
- package/ez-agents/bin/lib/file-access.cjs +207 -207
- package/ez-agents/bin/lib/finops/budget-enforcer.cjs +126 -0
- package/ez-agents/bin/lib/finops/cost-reporter.cjs +132 -0
- package/ez-agents/bin/lib/finops/finops-analyzer.cjs +112 -0
- package/ez-agents/bin/lib/finops/spot-manager.cjs +118 -0
- package/ez-agents/bin/lib/framework-detector.cjs +396 -0
- package/ez-agents/bin/lib/frontmatter.cjs +3 -1
- package/ez-agents/bin/lib/gates/README.md +374 -0
- package/ez-agents/bin/lib/gates/gate-01-requirement.cjs +303 -0
- package/ez-agents/bin/lib/gates/gate-02-architecture.cjs +555 -0
- package/ez-agents/bin/lib/gates/gate-03-code.cjs +635 -0
- package/ez-agents/bin/lib/gates/gate-04-security.cjs +829 -0
- package/ez-agents/bin/lib/git-errors.cjs +83 -83
- package/ez-agents/bin/lib/git-utils.cjs +321 -321
- package/ez-agents/bin/lib/git-workflow-engine.cjs +1157 -1157
- package/ez-agents/bin/lib/health-check.cjs +162 -162
- package/ez-agents/bin/lib/index.cjs +2 -8
- package/ez-agents/bin/lib/init.cjs +0 -2
- package/ez-agents/bin/lib/lockfile-validator.cjs +227 -227
- package/ez-agents/bin/lib/log-rotation.cjs +71 -0
- package/ez-agents/bin/lib/logger.cjs +22 -47
- package/ez-agents/bin/lib/memory-compression.cjs +256 -256
- package/ez-agents/bin/lib/package-manager-detector.cjs +203 -203
- package/ez-agents/bin/lib/package-manager-executor.cjs +385 -385
- package/ez-agents/bin/lib/package-manager-service.cjs +216 -216
- package/ez-agents/bin/lib/perf/api-monitor.cjs +88 -0
- package/ez-agents/bin/lib/perf/db-optimizer.cjs +78 -0
- package/ez-agents/bin/lib/perf/frontend-performance.cjs +56 -0
- package/ez-agents/bin/lib/perf/perf-analyzer.cjs +77 -0
- package/ez-agents/bin/lib/perf/perf-baseline.cjs +102 -0
- package/ez-agents/bin/lib/perf/perf-reporter.cjs +117 -0
- package/ez-agents/bin/lib/perf/regression-detector.cjs +92 -0
- package/ez-agents/bin/lib/project-reporter.cjs +502 -0
- package/ez-agents/bin/lib/quality-gate.cjs +332 -0
- package/ez-agents/bin/lib/recovery-manager.cjs +98 -0
- package/ez-agents/bin/lib/release-validator.cjs +617 -614
- package/ez-agents/bin/lib/security-errors.cjs +62 -0
- package/ez-agents/bin/lib/session-chain.cjs +304 -304
- package/ez-agents/bin/lib/session-errors.cjs +81 -81
- package/ez-agents/bin/lib/session-export.cjs +251 -251
- package/ez-agents/bin/lib/session-import.cjs +262 -262
- package/ez-agents/bin/lib/session-manager.cjs +280 -280
- package/ez-agents/bin/lib/skill-context.cjs +148 -0
- package/ez-agents/bin/lib/skill-matcher.cjs +236 -0
- package/ez-agents/bin/lib/skill-registry.cjs +341 -0
- package/ez-agents/bin/lib/skill-resolver.cjs +449 -0
- package/ez-agents/bin/lib/skill-triggers.cjs +90 -0
- package/ez-agents/bin/lib/skill-validator.cjs +270 -0
- package/ez-agents/bin/lib/skill-versioning.cjs +355 -0
- package/ez-agents/bin/lib/stack-detector.cjs +399 -0
- package/ez-agents/bin/lib/tech-debt-analyzer.cjs +309 -0
- package/ez-agents/bin/lib/tier-manager.cjs +428 -428
- package/ez-agents/bin/lib/tradeoff-analyzer.cjs +284 -0
- package/ez-agents/bin/lib/url-fetch.cjs +170 -170
- package/ez-agents/bin/lib/verify.cjs +863 -863
- package/ez-agents/references/decimal-phase-calculation.md +65 -65
- package/ez-agents/references/git-integration.md +248 -248
- package/ez-agents/references/git-planning-commit.md +38 -38
- package/ez-agents/references/metrics-schema.md +118 -118
- package/ez-agents/references/model-profile-resolution.md +34 -34
- package/ez-agents/references/model-profiles.md +93 -93
- package/ez-agents/references/phase-argument-parsing.md +61 -61
- package/ez-agents/references/planning-config.md +340 -340
- package/ez-agents/references/tier-strategy.md +103 -103
- package/ez-agents/references/ui-brand.md +160 -160
- package/ez-agents/references/verification-patterns.md +612 -612
- package/ez-agents/templates/DEBUG.md +164 -164
- package/ez-agents/templates/UAT.md +247 -247
- package/ez-agents/templates/agent-output-format.md +404 -0
- package/ez-agents/templates/bdd-feature.md +173 -173
- package/ez-agents/templates/codebase/architecture.md +255 -255
- package/ez-agents/templates/codebase/structure.md +285 -285
- package/ez-agents/templates/copilot-instructions.md +7 -7
- package/ez-agents/templates/debug-subagent-prompt.md +91 -91
- package/ez-agents/templates/discovery.md +146 -146
- package/ez-agents/templates/discussion.md +68 -68
- package/ez-agents/templates/handoff-protocol.md +294 -0
- package/ez-agents/templates/incident-runbook.md +205 -205
- package/ez-agents/templates/mode-workflow-templates.md +301 -0
- package/ez-agents/templates/phase-prompt.md +610 -610
- package/ez-agents/templates/planner-subagent-prompt.md +117 -117
- package/ez-agents/templates/project.md +184 -184
- package/ez-agents/templates/release-checklist.md +136 -133
- package/ez-agents/templates/research.md +552 -552
- package/ez-agents/templates/rollback-plan.md +201 -201
- package/ez-agents/templates/security-user-setup.md +244 -0
- package/ez-agents/templates/skill-validation-rules.md +476 -0
- package/ez-agents/templates/state.md +180 -176
- package/ez-agents/templates/summary-complex.md +59 -59
- package/ez-agents/tests/gates/gate-01-02.test.cjs +812 -0
- package/ez-agents/tests/gates/gate-03-04.test.cjs +762 -0
- package/ez-agents/tests/gates/gate-05-validator.test.cjs +145 -0
- package/ez-agents/tests/gates/gate-06-docs-validator.test.cjs +244 -0
- package/ez-agents/tests/gates/gate-07-release-validator.test.cjs +219 -0
- package/ez-agents/tests/guards/context-budget-guard.test.cjs +145 -0
- package/ez-agents/tests/guards/edge-case-guards.test.cjs +238 -0
- package/ez-agents/tests/guards/hallucination-guard.test.cjs +124 -0
- package/ez-agents/workflows/audit-milestone.md +1 -1
- package/ez-agents/workflows/autonomous.md +844 -844
- package/ez-agents/workflows/complete-milestone.md +1 -1
- package/ez-agents/workflows/discuss-phase.md +1 -1
- package/ez-agents/workflows/execute-phase.md +124 -3
- package/ez-agents/workflows/help.md +42 -181
- package/ez-agents/workflows/hotfix.md +291 -291
- package/ez-agents/workflows/new-milestone.md +713 -713
- package/ez-agents/workflows/new-project.md +1089 -1107
- package/ez-agents/workflows/plan-phase.md +0 -40
- package/ez-agents/workflows/release.md +253 -253
- package/ez-agents/workflows/resume-session.md +215 -215
- package/ez-agents/workflows/run-phase.md +531 -0
- package/ez-agents/workflows/settings.md +2 -35
- package/hooks/dist/ez-check-update.js +81 -81
- package/hooks/dist/ez-context-monitor.js +148 -141
- package/hooks/dist/ez-statusline.js +115 -115
- package/package.json +78 -71
- package/scripts/fix-qwen-installation.js +144 -144
- package/agents/ez-integration-checker.md +0 -443
- package/agents/ez-nyquist-auditor.md +0 -176
- package/agents/ez-observer-agent.md +0 -260
- package/agents/ez-plan-checker.md +0 -706
- package/agents/ez-research-synthesizer.md +0 -247
- package/agents/ez-scrum-master-agent.md +0 -242
- package/agents/ez-tech-lead-agent.md +0 -267
- package/agents/ez-ui-auditor.md +0 -439
- package/agents/ez-ui-checker.md +0 -300
- package/agents/ez-ui-researcher.md +0 -353
- package/commands/ez/add-phase.md +0 -43
- package/commands/ez/add-todo.md +0 -47
- package/commands/ez/arch-review.md +0 -102
- package/commands/ez/auth.md +0 -87
- package/commands/ez/autonomous.md +0 -41
- package/commands/ez/check-todos.md +0 -45
- package/commands/ez/cleanup.md +0 -18
- package/commands/ez/debug.md +0 -168
- package/commands/ez/export-session.md +0 -79
- package/commands/ez/gather-requirements.md +0 -117
- package/commands/ez/git-workflow.md +0 -72
- package/commands/ez/health.md +0 -22
- package/commands/ez/hotfix.md +0 -120
- package/commands/ez/import-session.md +0 -82
- package/commands/ez/insert-phase.md +0 -32
- package/commands/ez/join-discord.md +0 -18
- package/commands/ez/list-phase-assumptions.md +0 -46
- package/commands/ez/list-sessions.md +0 -96
- package/commands/ez/package-manager.md +0 -316
- package/commands/ez/pause-work.md +0 -38
- package/commands/ez/plan-milestone-gaps.md +0 -34
- package/commands/ez/preflight.md +0 -79
- package/commands/ez/reapply-patches.md +0 -124
- package/commands/ez/release.md +0 -153
- package/commands/ez/remove-phase.md +0 -31
- package/commands/ez/research-phase.md +0 -190
- package/commands/ez/resume.md +0 -107
- package/commands/ez/set-profile.md +0 -34
- package/commands/ez/standup.md +0 -85
- package/commands/ez/stats.md +0 -18
- package/commands/ez/ui-phase.md +0 -34
- package/commands/ez/ui-review.md +0 -32
- package/commands/ez/validate-phase.md +0 -35
- package/ez-agents/bin/lib/metrics-tracker.cjs +0 -406
- package/ez-agents/templates/UI-SPEC.md +0 -100
- package/ez-agents/templates/VALIDATION.md +0 -76
- package/ez-agents/templates/context.md +0 -352
- package/ez-agents/templates/verification-report.md +0 -322
- package/ez-agents/workflows/arch-review.md +0 -54
- package/ez-agents/workflows/export-session.md +0 -255
- package/ez-agents/workflows/gather-requirements.md +0 -206
- package/ez-agents/workflows/import-session.md +0 -303
- package/ez-agents/workflows/research-phase.md +0 -74
- package/ez-agents/workflows/standup.md +0 -64
- package/ez-agents/workflows/ui-phase.md +0 -290
- package/ez-agents/workflows/ui-review.md +0 -157
- package/ez-agents/workflows/validate-phase.md +0 -167
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Context Budget Guard Tests
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
const { describe, it } = require('vitest');
|
|
6
|
+
const { strict: assert } = require('assert');
|
|
7
|
+
const fs = require('fs');
|
|
8
|
+
const path = require('path');
|
|
9
|
+
const os = require('os');
|
|
10
|
+
|
|
11
|
+
// Import the guard
|
|
12
|
+
const {
|
|
13
|
+
getTokenUsage,
|
|
14
|
+
checkContextBudget,
|
|
15
|
+
shouldStop,
|
|
16
|
+
getRecommendedAction,
|
|
17
|
+
checkBudget,
|
|
18
|
+
THRESHOLDS
|
|
19
|
+
} = require('../../ez-agents/bin/guards/context-budget-guard.cjs');
|
|
20
|
+
|
|
21
|
+
describe('Context Budget Guard', () => {
|
|
22
|
+
describe('checkContextBudget', () => {
|
|
23
|
+
it('should return no warnings for low usage', () => {
|
|
24
|
+
const result = checkContextBudget(10000, 100000);
|
|
25
|
+
assert.strictEqual(result.percent, 10);
|
|
26
|
+
assert.strictEqual(result.warnings.length, 0);
|
|
27
|
+
assert.strictEqual(result.shouldStop, false);
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
it('should return info warning at 50%', () => {
|
|
31
|
+
const result = checkContextBudget(50000, 100000);
|
|
32
|
+
assert.strictEqual(result.percent, 50);
|
|
33
|
+
assert.strictEqual(result.warnings.length, 1);
|
|
34
|
+
assert.strictEqual(result.warnings[0].level, 'info');
|
|
35
|
+
assert.strictEqual(result.shouldStop, false);
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
it('should return warning at 70%', () => {
|
|
39
|
+
const result = checkContextBudget(70000, 100000);
|
|
40
|
+
assert.strictEqual(result.percent, 70);
|
|
41
|
+
assert.strictEqual(result.warnings.length, 2);
|
|
42
|
+
assert.strictEqual(result.warnings[1].level, 'warning');
|
|
43
|
+
assert.strictEqual(result.shouldStop, false);
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it('should return error and shouldStop at 80%', () => {
|
|
47
|
+
const result = checkContextBudget(80000, 100000);
|
|
48
|
+
assert.strictEqual(result.percent, 80);
|
|
49
|
+
assert.strictEqual(result.warnings.length, 3);
|
|
50
|
+
assert.strictEqual(result.warnings[2].level, 'error');
|
|
51
|
+
assert.strictEqual(result.shouldStop, true);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
it('should return all warnings at 95%', () => {
|
|
55
|
+
const result = checkContextBudget(95000, 100000);
|
|
56
|
+
assert.strictEqual(result.percent, 95);
|
|
57
|
+
assert.strictEqual(result.warnings.length, 3);
|
|
58
|
+
assert.strictEqual(result.shouldStop, true);
|
|
59
|
+
assert.strictEqual(result.remaining, 5000);
|
|
60
|
+
});
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
describe('shouldStop', () => {
|
|
64
|
+
it('should return false below 80%', () => {
|
|
65
|
+
assert.strictEqual(shouldStop(79000, 100000), false);
|
|
66
|
+
assert.strictEqual(shouldStop(50000, 100000), false);
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
it('should return true at 80% and above', () => {
|
|
70
|
+
assert.strictEqual(shouldStop(80000, 100000), true);
|
|
71
|
+
assert.strictEqual(shouldStop(90000, 100000), true);
|
|
72
|
+
});
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
describe('getRecommendedAction', () => {
|
|
76
|
+
it('should return OK for low usage', () => {
|
|
77
|
+
const action = getRecommendedAction(30000, 100000);
|
|
78
|
+
assert.ok(action.includes('healthy'));
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
it('should return INFO for 50-69%', () => {
|
|
82
|
+
const action = getRecommendedAction(60000, 100000);
|
|
83
|
+
assert.ok(action.includes('INFO'));
|
|
84
|
+
assert.ok(action.includes('Monitor'));
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
it('should return WARNING for 70-79%', () => {
|
|
88
|
+
const action = getRecommendedAction(75000, 100000);
|
|
89
|
+
assert.ok(action.includes('WARNING'));
|
|
90
|
+
assert.ok(action.includes('split'));
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
it('should return STOP for 80%+', () => {
|
|
94
|
+
const action = getRecommendedAction(85000, 100000);
|
|
95
|
+
assert.ok(action.includes('STOP'));
|
|
96
|
+
assert.ok(action.includes('Split'));
|
|
97
|
+
});
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
describe('getTokenUsage', () => {
|
|
101
|
+
it('should return default when no context file', () => {
|
|
102
|
+
const result = getTokenUsage(null);
|
|
103
|
+
assert.strictEqual(result.current, 0);
|
|
104
|
+
assert.strictEqual(result.max, 100000);
|
|
105
|
+
assert.strictEqual(result.model, 'default');
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
it('should return default when file does not exist', () => {
|
|
109
|
+
const result = getTokenUsage('/nonexistent/file.md');
|
|
110
|
+
assert.strictEqual(result.current, 0);
|
|
111
|
+
assert.strictEqual(result.max, 100000);
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
it('should parse token usage from context file', () => {
|
|
115
|
+
const tempFile = path.join(os.tmpdir(), `context-${Date.now()}.md`);
|
|
116
|
+
const content = `
|
|
117
|
+
# Context
|
|
118
|
+
tokens_used: 50000
|
|
119
|
+
token_limit: 100000
|
|
120
|
+
model: claude-3-sonnet
|
|
121
|
+
`;
|
|
122
|
+
fs.writeFileSync(tempFile, content);
|
|
123
|
+
|
|
124
|
+
try {
|
|
125
|
+
const result = getTokenUsage(tempFile);
|
|
126
|
+
assert.strictEqual(result.current, 50000);
|
|
127
|
+
assert.strictEqual(result.max, 100000);
|
|
128
|
+
assert.strictEqual(result.model, 'claude-3-sonnet');
|
|
129
|
+
} finally {
|
|
130
|
+
fs.unlinkSync(tempFile);
|
|
131
|
+
}
|
|
132
|
+
});
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
describe('checkBudget', () => {
|
|
136
|
+
it('should return complete budget status', () => {
|
|
137
|
+
const result = checkBudget(null, 'gpt-4');
|
|
138
|
+
assert.ok(result.model);
|
|
139
|
+
assert.ok(result.limit);
|
|
140
|
+
assert.ok('percent' in result);
|
|
141
|
+
assert.ok('warnings' in result);
|
|
142
|
+
assert.ok('action' in result);
|
|
143
|
+
});
|
|
144
|
+
});
|
|
145
|
+
});
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Remaining Edge Case Guards Tests
|
|
3
|
+
* Tests for EDGE-03, EDGE-04, EDGE-05, EDGE-06
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { describe, it } from 'vitest';
|
|
7
|
+
import { strict as assert } from 'assert';
|
|
8
|
+
import fs from 'fs';
|
|
9
|
+
import path from 'path';
|
|
10
|
+
import os from 'os';
|
|
11
|
+
import { fileURLToPath } from 'url';
|
|
12
|
+
import { dirname, join } from 'path';
|
|
13
|
+
|
|
14
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
15
|
+
const __dirname = dirname(__filename);
|
|
16
|
+
|
|
17
|
+
// Import the guards
|
|
18
|
+
import {
|
|
19
|
+
checkHiddenState,
|
|
20
|
+
listStateFiles,
|
|
21
|
+
validatePersistence
|
|
22
|
+
} from '../../../bin/guards/hidden-state-guard.cjs';
|
|
23
|
+
|
|
24
|
+
import {
|
|
25
|
+
checkIrreversibleOps,
|
|
26
|
+
requiresHumanApproval,
|
|
27
|
+
checkAutonomy
|
|
28
|
+
} from '../../../bin/guards/autonomy-guard.cjs';
|
|
29
|
+
|
|
30
|
+
import {
|
|
31
|
+
checkToolCount,
|
|
32
|
+
getActiveTools,
|
|
33
|
+
checkToolSprawl
|
|
34
|
+
} from '../../../bin/guards/tool-sprawl-guard.cjs';
|
|
35
|
+
|
|
36
|
+
import {
|
|
37
|
+
detectOrgChanges,
|
|
38
|
+
checkTeamOverhead,
|
|
39
|
+
flagTeamRestructure
|
|
40
|
+
} from '../../../bin/guards/team-overhead-guard.cjs';
|
|
41
|
+
|
|
42
|
+
describe('EDGE-03: Hidden State Guard', () => {
|
|
43
|
+
describe('listStateFiles', () => {
|
|
44
|
+
it('should return empty array when no .planning directory', () => {
|
|
45
|
+
const result = listStateFiles(os.tmpdir());
|
|
46
|
+
assert.ok(Array.isArray(result));
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it('should find markdown files in .planning directory', () => {
|
|
50
|
+
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'state-test-'));
|
|
51
|
+
const planningDir = path.join(tempDir, '.planning');
|
|
52
|
+
fs.mkdirSync(planningDir);
|
|
53
|
+
fs.writeFileSync(path.join(planningDir, 'test.md'), '# Test');
|
|
54
|
+
fs.writeFileSync(path.join(planningDir, 'other.txt'), 'not markdown');
|
|
55
|
+
|
|
56
|
+
try {
|
|
57
|
+
const result = listStateFiles(tempDir);
|
|
58
|
+
assert.strictEqual(result.length, 1);
|
|
59
|
+
assert.ok(result[0].endsWith('test.md'));
|
|
60
|
+
} finally {
|
|
61
|
+
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
62
|
+
}
|
|
63
|
+
});
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
describe('checkHiddenState', () => {
|
|
67
|
+
it('should return no hidden state for empty output', () => {
|
|
68
|
+
const result = checkHiddenState('', os.tmpdir());
|
|
69
|
+
assert.strictEqual(result.hasHiddenState, false);
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
it('should detect state references in output', () => {
|
|
73
|
+
const output = 'Phase 1 is complete. Task 5-02 is done. Status: BLOCKED';
|
|
74
|
+
const result = checkHiddenState(output, os.tmpdir());
|
|
75
|
+
assert.ok(result.stateReferences.length > 0);
|
|
76
|
+
});
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
describe('validatePersistence', () => {
|
|
80
|
+
it('should validate persistence correctly', () => {
|
|
81
|
+
const output = 'All state is documented';
|
|
82
|
+
const result = validatePersistence(output, os.tmpdir());
|
|
83
|
+
assert.ok('valid' in result);
|
|
84
|
+
assert.ok('stateFiles' in result);
|
|
85
|
+
});
|
|
86
|
+
});
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
describe('EDGE-04: Autonomy Guard', () => {
|
|
90
|
+
describe('checkIrreversibleOps', () => {
|
|
91
|
+
it('should detect database drop as irreversible', () => {
|
|
92
|
+
const result = checkIrreversibleOps('Drop the database and recreate it');
|
|
93
|
+
assert.strictEqual(result.irreversible, true);
|
|
94
|
+
assert.strictEqual(result.requiresApproval, true);
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
it('should detect production deploy as irreversible', () => {
|
|
98
|
+
const result = checkIrreversibleOps('Deploy to production server');
|
|
99
|
+
assert.strictEqual(result.irreversible, true);
|
|
100
|
+
assert.strictEqual(result.requiresApproval, true);
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
it('should allow safe operations', () => {
|
|
104
|
+
const result = checkIrreversibleOps('Read the configuration file');
|
|
105
|
+
assert.strictEqual(result.irreversible, false);
|
|
106
|
+
assert.strictEqual(result.requiresApproval, false);
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
it('should classify delete operations as irreversible', () => {
|
|
110
|
+
const result = checkIrreversibleOps('Delete all records from users table');
|
|
111
|
+
assert.strictEqual(result.irreversible, true);
|
|
112
|
+
});
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
describe('requiresHumanApproval', () => {
|
|
116
|
+
it('should require approval for irreversible ops', () => {
|
|
117
|
+
assert.strictEqual(requiresHumanApproval('DROP DATABASE'), true);
|
|
118
|
+
assert.strictEqual(requiresHumanApproval('truncate table'), true);
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
it('should not require approval for safe ops', () => {
|
|
122
|
+
assert.strictEqual(requiresHumanApproval('SELECT * FROM users'), false);
|
|
123
|
+
assert.strictEqual(requiresHumanApproval('read file'), false);
|
|
124
|
+
});
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
describe('checkAutonomy', () => {
|
|
128
|
+
it('should flag irreversible operations in output', () => {
|
|
129
|
+
const output = 'Step 1: Drop the database. Step 2: Recreate schema.';
|
|
130
|
+
const result = checkAutonomy(output, os.tmpdir());
|
|
131
|
+
assert.strictEqual(result.requiresApproval, true);
|
|
132
|
+
assert.ok(result.flaggedOperations.length > 0);
|
|
133
|
+
});
|
|
134
|
+
});
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
describe('EDGE-05: Tool Sprawl Guard', () => {
|
|
138
|
+
describe('getActiveTools', () => {
|
|
139
|
+
it('should extract tools from context', () => {
|
|
140
|
+
const context = 'Using express for server and react for UI';
|
|
141
|
+
const tools = getActiveTools(context);
|
|
142
|
+
assert.ok(tools.includes('express'));
|
|
143
|
+
assert.ok(tools.includes('react'));
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
it('should extract npm packages', () => {
|
|
147
|
+
const context = 'Install @scope/package and another-lib';
|
|
148
|
+
const tools = getActiveTools(context);
|
|
149
|
+
assert.ok(tools.some(t => t.includes('@scope/package') || t === 'another-lib'));
|
|
150
|
+
});
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
describe('checkToolCount', () => {
|
|
154
|
+
it('should return optimal for 3-7 tools', () => {
|
|
155
|
+
const tools = ['tool1', 'tool2', 'tool3', 'tool4', 'tool5'];
|
|
156
|
+
const result = checkToolCount(tools);
|
|
157
|
+
assert.strictEqual(result.status, 'optimal');
|
|
158
|
+
assert.strictEqual(result.withinLimit, true);
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
it('should return exceeded for >7 tools', () => {
|
|
162
|
+
const tools = ['t1', 't2', 't3', 't4', 't5', 't6', 't7', 't8', 't9', 't10'];
|
|
163
|
+
const result = checkToolCount(tools);
|
|
164
|
+
assert.strictEqual(result.status, 'exceeded');
|
|
165
|
+
assert.strictEqual(result.withinLimit, false);
|
|
166
|
+
assert.strictEqual(result.exceeded, 3);
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
it('should return below-recommended for <3 tools', () => {
|
|
170
|
+
const tools = ['tool1'];
|
|
171
|
+
const result = checkToolCount(tools);
|
|
172
|
+
assert.strictEqual(result.status, 'below-recommended');
|
|
173
|
+
});
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
describe('checkToolSprawl', () => {
|
|
177
|
+
it('should analyze tool usage', () => {
|
|
178
|
+
const context = 'Using vitest, playwright, and typescript for this task';
|
|
179
|
+
const result = checkToolSprawl(context);
|
|
180
|
+
assert.ok('count' in result);
|
|
181
|
+
assert.ok('tools' in result);
|
|
182
|
+
assert.ok('summary' in result);
|
|
183
|
+
});
|
|
184
|
+
});
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
describe('EDGE-06: Team Overhead Guard', () => {
|
|
188
|
+
describe('detectOrgChanges', () => {
|
|
189
|
+
it('should detect team structure suggestions', () => {
|
|
190
|
+
const output = 'You should reorganize team into squads';
|
|
191
|
+
const result = detectOrgChanges(output);
|
|
192
|
+
assert.strictEqual(result.hasOrgChanges, true);
|
|
193
|
+
assert.ok(result.suggestions.length > 0);
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
it('should detect meeting suggestions', () => {
|
|
197
|
+
const output = 'Add a daily standup meeting';
|
|
198
|
+
const result = detectOrgChanges(output);
|
|
199
|
+
assert.strictEqual(result.hasOrgChanges, true);
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
it('should detect role suggestions', () => {
|
|
203
|
+
const output = 'Create a new tech lead role';
|
|
204
|
+
const result = detectOrgChanges(output);
|
|
205
|
+
assert.strictEqual(result.hasOrgChanges, true);
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
it('should not flag technical suggestions', () => {
|
|
209
|
+
const output = 'Add tests for the new feature';
|
|
210
|
+
const result = detectOrgChanges(output);
|
|
211
|
+
assert.strictEqual(result.hasOrgChanges, false);
|
|
212
|
+
});
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
describe('flagTeamRestructure', () => {
|
|
216
|
+
it('should flag restructuring suggestions', () => {
|
|
217
|
+
const suggestion = 'Reorganize the team structure';
|
|
218
|
+
const result = flagTeamRestructure(suggestion);
|
|
219
|
+
assert.strictEqual(result.flagged, true);
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
it('should assign severity based on keyword count', () => {
|
|
223
|
+
const suggestion = 'Reorganize team structure and create new roles and add meetings';
|
|
224
|
+
const result = flagTeamRestructure(suggestion);
|
|
225
|
+
assert.strictEqual(result.severity, 'high');
|
|
226
|
+
});
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
describe('checkTeamOverhead', () => {
|
|
230
|
+
it('should provide complete analysis', () => {
|
|
231
|
+
const output = 'You should hire a new engineer and add sprint planning';
|
|
232
|
+
const result = checkTeamOverhead(output);
|
|
233
|
+
assert.strictEqual(result.hasOrgChanges, true);
|
|
234
|
+
assert.ok('summary' in result);
|
|
235
|
+
assert.ok('actionable' in result);
|
|
236
|
+
});
|
|
237
|
+
});
|
|
238
|
+
});
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hallucination Guard Tests
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
const { describe, it } = require('vitest');
|
|
6
|
+
const { strict: assert } = require('assert');
|
|
7
|
+
const fs = require('fs');
|
|
8
|
+
const path = require('path');
|
|
9
|
+
const os = require('os');
|
|
10
|
+
|
|
11
|
+
// Import the guard
|
|
12
|
+
const {
|
|
13
|
+
checkCitation,
|
|
14
|
+
verifyClaim,
|
|
15
|
+
flagUncertainty,
|
|
16
|
+
checkHallucination
|
|
17
|
+
} = require('../../ez-agents/bin/guards/hallucination-guard.cjs');
|
|
18
|
+
|
|
19
|
+
describe('Hallucination Guard', () => {
|
|
20
|
+
describe('checkCitation', () => {
|
|
21
|
+
it('should return no citation for non-existent claims', () => {
|
|
22
|
+
const result = checkCitation('nonexistent_claim_xyz123', os.tmpdir());
|
|
23
|
+
assert.strictEqual(result.cited, false);
|
|
24
|
+
assert.strictEqual(result.uncertainty, true);
|
|
25
|
+
assert.strictEqual(result.citations.length, 0);
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
it('should find citation for existing content', () => {
|
|
29
|
+
// Create a temp file with known content
|
|
30
|
+
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'citation-test-'));
|
|
31
|
+
const testFile = path.join(tempDir, 'test.txt');
|
|
32
|
+
fs.writeFileSync(testFile, 'This is a test claim that should be found');
|
|
33
|
+
|
|
34
|
+
try {
|
|
35
|
+
const result = checkCitation('test claim', tempDir);
|
|
36
|
+
assert.strictEqual(result.cited, true);
|
|
37
|
+
assert.strictEqual(result.uncertainty, false);
|
|
38
|
+
assert.ok(result.citations.length > 0);
|
|
39
|
+
} finally {
|
|
40
|
+
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
41
|
+
}
|
|
42
|
+
});
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
describe('verifyClaim', () => {
|
|
46
|
+
it('should verify library in package.json', () => {
|
|
47
|
+
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'verify-test-'));
|
|
48
|
+
const pkgPath = path.join(tempDir, 'package.json');
|
|
49
|
+
fs.writeFileSync(pkgPath, JSON.stringify({
|
|
50
|
+
dependencies: {
|
|
51
|
+
'express': '^4.18.0',
|
|
52
|
+
'vitest': '^1.0.0'
|
|
53
|
+
}
|
|
54
|
+
}));
|
|
55
|
+
|
|
56
|
+
try {
|
|
57
|
+
const result = verifyClaim('library: express', tempDir);
|
|
58
|
+
assert.strictEqual(result.verified, true);
|
|
59
|
+
assert.strictEqual(result.source, 'package.json');
|
|
60
|
+
assert.strictEqual(result.details.name, 'express');
|
|
61
|
+
} finally {
|
|
62
|
+
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
63
|
+
}
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
it('should verify known libraries', () => {
|
|
67
|
+
const result = verifyClaim('using vitest for testing');
|
|
68
|
+
assert.strictEqual(result.verified, true);
|
|
69
|
+
assert.strictEqual(result.source, 'known-library');
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
it('should not verify unknown libraries', () => {
|
|
73
|
+
const result = verifyClaim('using unknown-library-xyz123');
|
|
74
|
+
assert.strictEqual(result.verified, false);
|
|
75
|
+
assert.strictEqual(result.source, 'none');
|
|
76
|
+
});
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
describe('flagUncertainty', () => {
|
|
80
|
+
it('should flag uncertain language', () => {
|
|
81
|
+
const result = flagUncertainty('This might work, but I am not sure');
|
|
82
|
+
assert.strictEqual(result.flagged, true);
|
|
83
|
+
assert.ok(result.uncertainPhrases.includes('might'));
|
|
84
|
+
assert.ok(result.uncertainPhrases.includes('not sure'));
|
|
85
|
+
assert.strictEqual(result.confidence, 'low');
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
it('should not flag confident language', () => {
|
|
89
|
+
const result = flagUncertainty('This is the correct approach');
|
|
90
|
+
assert.strictEqual(result.flagged, false);
|
|
91
|
+
assert.strictEqual(result.uncertainPhrases.length, 0);
|
|
92
|
+
assert.strictEqual(result.confidence, 'high');
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
it('should detect citation claims', () => {
|
|
96
|
+
const result = flagUncertainty('According to the documentation, this works');
|
|
97
|
+
assert.strictEqual(result.flagged, true);
|
|
98
|
+
assert.ok(result.uncertainPhrases.some(p => p.includes('citation claim')));
|
|
99
|
+
});
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
describe('checkHallucination', () => {
|
|
103
|
+
it('should detect unverified library claims', () => {
|
|
104
|
+
const output = 'You should use the library unknown-lib-xyz for this';
|
|
105
|
+
const result = checkHallucination(output);
|
|
106
|
+
assert.strictEqual(result.hallucinationRisk, true);
|
|
107
|
+
assert.ok(result.unverifiedClaims.length > 0);
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
it('should verify known library claims', () => {
|
|
111
|
+
const output = 'You should use express for the server';
|
|
112
|
+
const result = checkHallucination(output);
|
|
113
|
+
assert.strictEqual(result.hallucinationRisk, false);
|
|
114
|
+
assert.ok(result.verifiedClaims.length > 0);
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
it('should flag uncertainty in output', () => {
|
|
118
|
+
const output = 'This might work, I think it should be correct';
|
|
119
|
+
const result = checkHallucination(output);
|
|
120
|
+
assert.strictEqual(result.uncertainty.flagged, true);
|
|
121
|
+
assert.strictEqual(result.confidence, 'medium');
|
|
122
|
+
});
|
|
123
|
+
});
|
|
124
|
+
});
|
|
@@ -74,7 +74,7 @@ Milestone Requirements:
|
|
|
74
74
|
MUST map each integration finding to affected requirement IDs where applicable.
|
|
75
75
|
|
|
76
76
|
Verify cross-phase wiring and E2E user flows.",
|
|
77
|
-
subagent_type="ez-
|
|
77
|
+
subagent_type="ez-verifier",
|
|
78
78
|
model="{integration_checker_model}"
|
|
79
79
|
)
|
|
80
80
|
```
|