cognitive-core 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.json +111 -2
- package/.sessionlog/settings.json +4 -0
- package/dist/atlas.d.ts +10 -0
- package/dist/atlas.d.ts.map +1 -1
- package/dist/atlas.js +65 -0
- package/dist/atlas.js.map +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -1
- package/dist/index.js.map +1 -1
- package/dist/learning/index.d.ts +1 -1
- package/dist/learning/index.d.ts.map +1 -1
- package/dist/learning/index.js.map +1 -1
- package/dist/learning/pipeline.d.ts +4 -31
- package/dist/learning/pipeline.d.ts.map +1 -1
- package/dist/learning/pipeline.js +12 -64
- package/dist/learning/pipeline.js.map +1 -1
- package/dist/learning/unified-pipeline.d.ts +30 -0
- package/dist/learning/unified-pipeline.d.ts.map +1 -1
- package/dist/learning/unified-pipeline.js +207 -0
- package/dist/learning/unified-pipeline.js.map +1 -1
- package/dist/memory/candidate-retrieval.d.ts.map +1 -1
- package/dist/memory/candidate-retrieval.js +3 -1
- package/dist/memory/candidate-retrieval.js.map +1 -1
- package/dist/memory/curated-loader.d.ts +21 -4
- package/dist/memory/curated-loader.d.ts.map +1 -1
- package/dist/memory/curated-loader.js +53 -16
- package/dist/memory/curated-loader.js.map +1 -1
- package/dist/memory/index.d.ts +2 -1
- package/dist/memory/index.d.ts.map +1 -1
- package/dist/memory/index.js +3 -1
- package/dist/memory/index.js.map +1 -1
- package/dist/memory/playbook.d.ts +6 -0
- package/dist/memory/playbook.d.ts.map +1 -1
- package/dist/memory/playbook.js +15 -0
- package/dist/memory/playbook.js.map +1 -1
- package/dist/memory/source-resolver.d.ts +120 -0
- package/dist/memory/source-resolver.d.ts.map +1 -0
- package/dist/memory/source-resolver.js +300 -0
- package/dist/memory/source-resolver.js.map +1 -0
- package/dist/types/config.d.ts +141 -0
- package/dist/types/config.d.ts.map +1 -1
- package/dist/types/config.js +40 -0
- package/dist/types/config.js.map +1 -1
- package/dist/types/index.d.ts +1 -1
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/index.js +1 -1
- package/dist/types/index.js.map +1 -1
- package/dist/utils/error-classifier.js +8 -8
- package/dist/utils/error-classifier.js.map +1 -1
- package/dist/workspace/efficacy-toolkit.d.ts +164 -0
- package/dist/workspace/efficacy-toolkit.d.ts.map +1 -0
- package/dist/workspace/efficacy-toolkit.js +281 -0
- package/dist/workspace/efficacy-toolkit.js.map +1 -0
- package/dist/workspace/index.d.ts +2 -1
- package/dist/workspace/index.d.ts.map +1 -1
- package/dist/workspace/index.js +3 -1
- package/dist/workspace/index.js.map +1 -1
- package/dist/workspace/templates/index.d.ts +3 -0
- package/dist/workspace/templates/index.d.ts.map +1 -1
- package/dist/workspace/templates/index.js +6 -0
- package/dist/workspace/templates/index.js.map +1 -1
- package/dist/workspace/templates/playbook-decay-detection.d.ts +46 -0
- package/dist/workspace/templates/playbook-decay-detection.d.ts.map +1 -0
- package/dist/workspace/templates/playbook-decay-detection.js +197 -0
- package/dist/workspace/templates/playbook-decay-detection.js.map +1 -0
- package/dist/workspace/templates/playbook-efficacy-audit.d.ts +46 -0
- package/dist/workspace/templates/playbook-efficacy-audit.d.ts.map +1 -0
- package/dist/workspace/templates/playbook-efficacy-audit.js +160 -0
- package/dist/workspace/templates/playbook-efficacy-audit.js.map +1 -0
- package/dist/workspace/templates/playbook-lifecycle-review.d.ts +51 -0
- package/dist/workspace/templates/playbook-lifecycle-review.d.ts.map +1 -0
- package/dist/workspace/templates/playbook-lifecycle-review.js +187 -0
- package/dist/workspace/templates/playbook-lifecycle-review.js.map +1 -0
- package/dist/workspace/types.d.ts +12 -54
- package/dist/workspace/types.d.ts.map +1 -1
- package/dist/workspace/types.js.map +1 -1
- package/package.json +8 -2
- package/playbooks/compound-engineering/adversarial-review.json +51 -0
- package/playbooks/compound-engineering/agent-native-architecture.json +59 -0
- package/playbooks/compound-engineering/agent-native-review.json +54 -0
- package/playbooks/compound-engineering/api-contract-review.json +52 -0
- package/playbooks/compound-engineering/brainstorm-requirements.json +55 -0
- package/playbooks/compound-engineering/bug-reproduction.json +62 -0
- package/playbooks/compound-engineering/confidence-calibration.json +49 -0
- package/playbooks/compound-engineering/correctness-review.json +49 -0
- package/playbooks/compound-engineering/data-migration-safety.json +59 -0
- package/playbooks/compound-engineering/deployment-verification.json +63 -0
- package/playbooks/compound-engineering/error-recovery-patterns.json +53 -0
- package/playbooks/compound-engineering/implementation-planning.json +64 -0
- package/playbooks/compound-engineering/issue-pattern-analysis.json +53 -0
- package/playbooks/compound-engineering/knowledge-compounding.json +63 -0
- package/playbooks/compound-engineering/learnings-research.json +54 -0
- package/playbooks/compound-engineering/maintainability-review.json +49 -0
- package/playbooks/compound-engineering/performance-review.json +54 -0
- package/playbooks/compound-engineering/plan-adversarial-review.json +56 -0
- package/playbooks/compound-engineering/plan-feasibility-review.json +56 -0
- package/playbooks/compound-engineering/project-standards-review.json +52 -0
- package/playbooks/compound-engineering/reliability-review.json +53 -0
- package/playbooks/compound-engineering/review-orchestration.json +64 -0
- package/playbooks/compound-engineering/security-review.json +54 -0
- package/playbooks/compound-engineering/systematic-execution.json +64 -0
- package/playbooks/compound-engineering/testing-review.json +50 -0
- package/src/atlas.ts +96 -0
- package/src/index.ts +27 -0
- package/src/learning/index.ts +1 -0
- package/src/learning/unified-pipeline.ts +271 -1
- package/src/memory/candidate-retrieval.ts +2 -1
- package/src/memory/curated-loader.ts +69 -16
- package/src/memory/index.ts +16 -0
- package/src/memory/playbook.ts +19 -0
- package/src/memory/source-resolver.ts +422 -0
- package/src/types/config.ts +46 -0
- package/src/types/index.ts +4 -0
- package/src/utils/error-classifier.ts +8 -8
- package/src/workspace/efficacy-toolkit.ts +496 -0
- package/src/workspace/index.ts +29 -0
- package/src/workspace/templates/index.ts +24 -0
- package/src/workspace/templates/playbook-decay-detection.ts +272 -0
- package/src/workspace/templates/playbook-efficacy-audit.ts +246 -0
- package/src/workspace/templates/playbook-lifecycle-review.ts +274 -0
- package/src/workspace/types.ts +22 -78
- package/tests/fixtures/behavioral-trajectories.ts +210 -0
- package/tests/integration/curated-sources-e2e.test.ts +502 -0
- package/tests/integration/pipeline-data-correctness.test.ts +794 -0
- package/tests/learning/meta-learner.test.ts +418 -0
- package/tests/learning/pipeline-memory-updates.test.ts +721 -0
- package/tests/learning/unified-pipeline-efficacy.test.ts +232 -0
- package/tests/memory/candidate-retrieval.test.ts +167 -0
- package/tests/memory/compound-engineering-seed.test.ts +338 -0
- package/tests/memory/curated-loader-extended.test.ts +225 -0
- package/tests/memory/meta.test.ts +399 -0
- package/tests/memory/playbook-quality-validation.test.ts +430 -0
- package/tests/memory/source-resolver.test.ts +700 -0
- package/tests/search/evaluator.test.ts +257 -0
- package/tests/search/verification-runner.test.ts +357 -0
- package/tests/utils/error-classifier.test.ts +149 -0
- package/tests/utils/trajectory-helpers.test.ts +163 -0
- package/tests/workspace/efficacy-toolkit.test.ts +404 -0
- package/tests/workspace/templates/playbook-efficacy.test.ts +377 -0
- package/.claude/settings.local.json +0 -11
- package/dist/learning/llm-extractor.d.ts +0 -88
- package/dist/learning/llm-extractor.d.ts.map +0 -1
- package/dist/learning/llm-extractor.js +0 -372
- package/dist/learning/llm-extractor.js.map +0 -1
- package/dist/learning/loop-coordinator.d.ts +0 -61
- package/dist/learning/loop-coordinator.d.ts.map +0 -1
- package/dist/learning/loop-coordinator.js +0 -96
- package/dist/learning/loop-coordinator.js.map +0 -1
- package/references/agent-workspace/CLAUDE.md +0 -74
- package/references/agent-workspace/README.md +0 -587
- package/references/agent-workspace/media/banner.png +0 -0
- package/references/agent-workspace/package-lock.json +0 -2061
- package/references/agent-workspace/package.json +0 -54
- package/references/agent-workspace/src/handle.ts +0 -122
- package/references/agent-workspace/src/index.ts +0 -32
- package/references/agent-workspace/src/manager.ts +0 -102
- package/references/agent-workspace/src/readers/json.ts +0 -71
- package/references/agent-workspace/src/readers/markdown.ts +0 -37
- package/references/agent-workspace/src/readers/raw.ts +0 -27
- package/references/agent-workspace/src/types.ts +0 -68
- package/references/agent-workspace/src/validation.ts +0 -93
- package/references/agent-workspace/src/writers/json.ts +0 -17
- package/references/agent-workspace/src/writers/markdown.ts +0 -27
- package/references/agent-workspace/src/writers/raw.ts +0 -22
- package/references/agent-workspace/tests/errors.test.ts +0 -652
- package/references/agent-workspace/tests/handle.test.ts +0 -144
- package/references/agent-workspace/tests/manager.test.ts +0 -124
- package/references/agent-workspace/tests/readers.test.ts +0 -205
- package/references/agent-workspace/tests/validation.test.ts +0 -196
- package/references/agent-workspace/tests/writers.test.ts +0 -108
- package/references/agent-workspace/tsconfig.json +0 -20
- package/references/agent-workspace/tsup.config.ts +0 -9
- package/references/minimem/.claude/settings.json +0 -7
- package/references/minimem/.sudocode/issues.jsonl +0 -18
- package/references/minimem/.sudocode/specs.jsonl +0 -1
- package/references/minimem/CLAUDE.md +0 -310
- package/references/minimem/README.md +0 -556
- package/references/minimem/claude-plugin/.claude-plugin/plugin.json +0 -10
- package/references/minimem/claude-plugin/.mcp.json +0 -7
- package/references/minimem/claude-plugin/README.md +0 -158
- package/references/minimem/claude-plugin/commands/recall.md +0 -47
- package/references/minimem/claude-plugin/commands/remember.md +0 -41
- package/references/minimem/claude-plugin/hooks/__tests__/hooks.test.ts +0 -272
- package/references/minimem/claude-plugin/hooks/hooks.json +0 -27
- package/references/minimem/claude-plugin/hooks/session-end.sh +0 -86
- package/references/minimem/claude-plugin/hooks/session-start.sh +0 -85
- package/references/minimem/claude-plugin/skills/memory/SKILL.md +0 -108
- package/references/minimem/package-lock.json +0 -5373
- package/references/minimem/package.json +0 -60
- package/references/minimem/scripts/postbuild.js +0 -35
- package/references/minimem/src/__tests__/edge-cases.test.ts +0 -371
- package/references/minimem/src/__tests__/errors.test.ts +0 -265
- package/references/minimem/src/__tests__/helpers.ts +0 -199
- package/references/minimem/src/__tests__/internal.test.ts +0 -407
- package/references/minimem/src/__tests__/knowledge.test.ts +0 -287
- package/references/minimem/src/__tests__/minimem.integration.test.ts +0 -1127
- package/references/minimem/src/__tests__/session.test.ts +0 -190
- package/references/minimem/src/cli/__tests__/commands.test.ts +0 -759
- package/references/minimem/src/cli/commands/__tests__/conflicts.test.ts +0 -141
- package/references/minimem/src/cli/commands/append.ts +0 -76
- package/references/minimem/src/cli/commands/config.ts +0 -262
- package/references/minimem/src/cli/commands/conflicts.ts +0 -413
- package/references/minimem/src/cli/commands/daemon.ts +0 -169
- package/references/minimem/src/cli/commands/index.ts +0 -12
- package/references/minimem/src/cli/commands/init.ts +0 -88
- package/references/minimem/src/cli/commands/mcp.ts +0 -177
- package/references/minimem/src/cli/commands/push-pull.ts +0 -213
- package/references/minimem/src/cli/commands/search.ts +0 -158
- package/references/minimem/src/cli/commands/status.ts +0 -84
- package/references/minimem/src/cli/commands/sync-init.ts +0 -290
- package/references/minimem/src/cli/commands/sync.ts +0 -70
- package/references/minimem/src/cli/commands/upsert.ts +0 -197
- package/references/minimem/src/cli/config.ts +0 -584
- package/references/minimem/src/cli/index.ts +0 -264
- package/references/minimem/src/cli/shared.ts +0 -161
- package/references/minimem/src/cli/sync/__tests__/central.test.ts +0 -152
- package/references/minimem/src/cli/sync/__tests__/conflicts.test.ts +0 -209
- package/references/minimem/src/cli/sync/__tests__/daemon.test.ts +0 -118
- package/references/minimem/src/cli/sync/__tests__/detection.test.ts +0 -207
- package/references/minimem/src/cli/sync/__tests__/integration.test.ts +0 -476
- package/references/minimem/src/cli/sync/__tests__/registry.test.ts +0 -363
- package/references/minimem/src/cli/sync/__tests__/state.test.ts +0 -255
- package/references/minimem/src/cli/sync/__tests__/validation.test.ts +0 -193
- package/references/minimem/src/cli/sync/__tests__/watcher.test.ts +0 -178
- package/references/minimem/src/cli/sync/central.ts +0 -292
- package/references/minimem/src/cli/sync/conflicts.ts +0 -204
- package/references/minimem/src/cli/sync/daemon.ts +0 -407
- package/references/minimem/src/cli/sync/detection.ts +0 -138
- package/references/minimem/src/cli/sync/index.ts +0 -107
- package/references/minimem/src/cli/sync/operations.ts +0 -373
- package/references/minimem/src/cli/sync/registry.ts +0 -279
- package/references/minimem/src/cli/sync/state.ts +0 -355
- package/references/minimem/src/cli/sync/validation.ts +0 -206
- package/references/minimem/src/cli/sync/watcher.ts +0 -234
- package/references/minimem/src/cli/version.ts +0 -34
- package/references/minimem/src/core/index.ts +0 -9
- package/references/minimem/src/core/indexer.ts +0 -628
- package/references/minimem/src/core/searcher.ts +0 -221
- package/references/minimem/src/db/schema.ts +0 -183
- package/references/minimem/src/db/sqlite-vec.ts +0 -24
- package/references/minimem/src/embeddings/__tests__/embeddings.test.ts +0 -431
- package/references/minimem/src/embeddings/batch-gemini.ts +0 -392
- package/references/minimem/src/embeddings/batch-openai.ts +0 -409
- package/references/minimem/src/embeddings/embeddings.ts +0 -434
- package/references/minimem/src/index.ts +0 -109
- package/references/minimem/src/internal.ts +0 -299
- package/references/minimem/src/minimem.ts +0 -1276
- package/references/minimem/src/search/__tests__/hybrid.test.ts +0 -247
- package/references/minimem/src/search/graph.ts +0 -234
- package/references/minimem/src/search/hybrid.ts +0 -151
- package/references/minimem/src/search/search.ts +0 -256
- package/references/minimem/src/server/__tests__/mcp.test.ts +0 -341
- package/references/minimem/src/server/__tests__/tools.test.ts +0 -364
- package/references/minimem/src/server/mcp.ts +0 -326
- package/references/minimem/src/server/tools.ts +0 -720
- package/references/minimem/src/session.ts +0 -460
- package/references/minimem/tsconfig.json +0 -19
- package/references/minimem/tsup.config.ts +0 -26
- package/references/minimem/vitest.config.ts +0 -24
- package/references/sessionlog/.husky/pre-commit +0 -1
- package/references/sessionlog/.lintstagedrc.json +0 -4
- package/references/sessionlog/.prettierignore +0 -4
- package/references/sessionlog/.prettierrc.json +0 -11
- package/references/sessionlog/LICENSE +0 -21
- package/references/sessionlog/README.md +0 -453
- package/references/sessionlog/eslint.config.js +0 -58
- package/references/sessionlog/package-lock.json +0 -3672
- package/references/sessionlog/package.json +0 -65
- package/references/sessionlog/src/__tests__/agent-hooks.test.ts +0 -570
- package/references/sessionlog/src/__tests__/agent-registry.test.ts +0 -127
- package/references/sessionlog/src/__tests__/claude-code-hooks.test.ts +0 -225
- package/references/sessionlog/src/__tests__/claude-generator.test.ts +0 -46
- package/references/sessionlog/src/__tests__/commit-msg.test.ts +0 -86
- package/references/sessionlog/src/__tests__/cursor-agent.test.ts +0 -224
- package/references/sessionlog/src/__tests__/e2e-live.test.ts +0 -890
- package/references/sessionlog/src/__tests__/event-log.test.ts +0 -183
- package/references/sessionlog/src/__tests__/flush-sentinel.test.ts +0 -105
- package/references/sessionlog/src/__tests__/gemini-agent.test.ts +0 -375
- package/references/sessionlog/src/__tests__/git-hooks.test.ts +0 -78
- package/references/sessionlog/src/__tests__/hook-managers.test.ts +0 -121
- package/references/sessionlog/src/__tests__/lifecycle-tasks.test.ts +0 -759
- package/references/sessionlog/src/__tests__/opencode-agent.test.ts +0 -338
- package/references/sessionlog/src/__tests__/redaction.test.ts +0 -136
- package/references/sessionlog/src/__tests__/session-repo.test.ts +0 -353
- package/references/sessionlog/src/__tests__/session-store.test.ts +0 -166
- package/references/sessionlog/src/__tests__/setup-ccweb.test.ts +0 -466
- package/references/sessionlog/src/__tests__/skill-live.test.ts +0 -461
- package/references/sessionlog/src/__tests__/summarize.test.ts +0 -348
- package/references/sessionlog/src/__tests__/task-plan-e2e.test.ts +0 -610
- package/references/sessionlog/src/__tests__/task-plan-live.test.ts +0 -632
- package/references/sessionlog/src/__tests__/transcript-timestamp.test.ts +0 -121
- package/references/sessionlog/src/__tests__/types.test.ts +0 -166
- package/references/sessionlog/src/__tests__/utils.test.ts +0 -333
- package/references/sessionlog/src/__tests__/validation.test.ts +0 -103
- package/references/sessionlog/src/__tests__/worktree.test.ts +0 -57
- package/references/sessionlog/src/agent/agents/claude-code.ts +0 -1089
- package/references/sessionlog/src/agent/agents/cursor.ts +0 -361
- package/references/sessionlog/src/agent/agents/gemini-cli.ts +0 -632
- package/references/sessionlog/src/agent/agents/opencode.ts +0 -540
- package/references/sessionlog/src/agent/registry.ts +0 -143
- package/references/sessionlog/src/agent/session-types.ts +0 -113
- package/references/sessionlog/src/agent/types.ts +0 -220
- package/references/sessionlog/src/cli.ts +0 -597
- package/references/sessionlog/src/commands/clean.ts +0 -133
- package/references/sessionlog/src/commands/disable.ts +0 -84
- package/references/sessionlog/src/commands/doctor.ts +0 -145
- package/references/sessionlog/src/commands/enable.ts +0 -202
- package/references/sessionlog/src/commands/explain.ts +0 -261
- package/references/sessionlog/src/commands/reset.ts +0 -105
- package/references/sessionlog/src/commands/resume.ts +0 -180
- package/references/sessionlog/src/commands/rewind.ts +0 -195
- package/references/sessionlog/src/commands/setup-ccweb.ts +0 -275
- package/references/sessionlog/src/commands/status.ts +0 -172
- package/references/sessionlog/src/config.ts +0 -165
- package/references/sessionlog/src/events/event-log.ts +0 -126
- package/references/sessionlog/src/git-operations.ts +0 -558
- package/references/sessionlog/src/hooks/git-hooks.ts +0 -165
- package/references/sessionlog/src/hooks/lifecycle.ts +0 -391
- package/references/sessionlog/src/index.ts +0 -650
- package/references/sessionlog/src/security/redaction.ts +0 -283
- package/references/sessionlog/src/session/state-machine.ts +0 -452
- package/references/sessionlog/src/store/checkpoint-store.ts +0 -509
- package/references/sessionlog/src/store/native-store.ts +0 -173
- package/references/sessionlog/src/store/provider-types.ts +0 -99
- package/references/sessionlog/src/store/session-store.ts +0 -266
- package/references/sessionlog/src/strategy/attribution.ts +0 -296
- package/references/sessionlog/src/strategy/common.ts +0 -207
- package/references/sessionlog/src/strategy/content-overlap.ts +0 -228
- package/references/sessionlog/src/strategy/manual-commit.ts +0 -988
- package/references/sessionlog/src/strategy/types.ts +0 -279
- package/references/sessionlog/src/summarize/claude-generator.ts +0 -115
- package/references/sessionlog/src/summarize/summarize.ts +0 -432
- package/references/sessionlog/src/types.ts +0 -508
- package/references/sessionlog/src/utils/chunk-files.ts +0 -49
- package/references/sessionlog/src/utils/commit-message.ts +0 -65
- package/references/sessionlog/src/utils/detect-agent.ts +0 -36
- package/references/sessionlog/src/utils/hook-managers.ts +0 -125
- package/references/sessionlog/src/utils/ide-tags.ts +0 -32
- package/references/sessionlog/src/utils/paths.ts +0 -79
- package/references/sessionlog/src/utils/preview-rewind.ts +0 -80
- package/references/sessionlog/src/utils/rewind-conflict.ts +0 -121
- package/references/sessionlog/src/utils/shadow-branch.ts +0 -109
- package/references/sessionlog/src/utils/string-utils.ts +0 -46
- package/references/sessionlog/src/utils/todo-extract.ts +0 -188
- package/references/sessionlog/src/utils/trailers.ts +0 -187
- package/references/sessionlog/src/utils/transcript-parse.ts +0 -177
- package/references/sessionlog/src/utils/transcript-timestamp.ts +0 -59
- package/references/sessionlog/src/utils/tree-ops.ts +0 -219
- package/references/sessionlog/src/utils/tty.ts +0 -72
- package/references/sessionlog/src/utils/validation.ts +0 -65
- package/references/sessionlog/src/utils/worktree.ts +0 -58
- package/references/sessionlog/src/wire-types.ts +0 -59
- package/references/sessionlog/templates/setup-env.sh +0 -153
- package/references/sessionlog/tsconfig.json +0 -18
- package/references/sessionlog/vitest.config.ts +0 -12
- package/references/skill-tree/.claude/settings.json +0 -6
- package/references/skill-tree/.sudocode/issues.jsonl +0 -19
- package/references/skill-tree/.sudocode/specs.jsonl +0 -3
- package/references/skill-tree/CLAUDE.md +0 -126
- package/references/skill-tree/README.md +0 -372
- package/references/skill-tree/docs/GAPS_v1.md +0 -221
- package/references/skill-tree/docs/INTEGRATION_PLAN.md +0 -467
- package/references/skill-tree/docs/TODOS.md +0 -91
- package/references/skill-tree/docs/anthropic_skill_guide.md +0 -1364
- package/references/skill-tree/docs/design/federated-skill-trees.md +0 -524
- package/references/skill-tree/docs/design/multi-agent-sync.md +0 -759
- package/references/skill-tree/docs/scraper/BRAINSTORM.md +0 -583
- package/references/skill-tree/docs/scraper/POC_PLAN.md +0 -420
- package/references/skill-tree/docs/scraper/README.md +0 -170
- package/references/skill-tree/examples/basic-usage.ts +0 -164
- package/references/skill-tree/package-lock.json +0 -1852
- package/references/skill-tree/package.json +0 -66
- package/references/skill-tree/scraper/README.md +0 -123
- package/references/skill-tree/scraper/docs/DESIGN.md +0 -683
- package/references/skill-tree/scraper/docs/PLAN.md +0 -336
- package/references/skill-tree/scraper/drizzle.config.ts +0 -10
- package/references/skill-tree/scraper/package-lock.json +0 -6329
- package/references/skill-tree/scraper/package.json +0 -68
- package/references/skill-tree/scraper/test/fixtures/invalid-skill/missing-description.md +0 -7
- package/references/skill-tree/scraper/test/fixtures/invalid-skill/missing-name.md +0 -7
- package/references/skill-tree/scraper/test/fixtures/minimal-skill/SKILL.md +0 -27
- package/references/skill-tree/scraper/test/fixtures/skill-json/SKILL.json +0 -21
- package/references/skill-tree/scraper/test/fixtures/skill-with-meta/SKILL.md +0 -54
- package/references/skill-tree/scraper/test/fixtures/skill-with-meta/_meta.json +0 -24
- package/references/skill-tree/scraper/test/fixtures/valid-skill/SKILL.md +0 -93
- package/references/skill-tree/scraper/test/fixtures/valid-skill/_meta.json +0 -22
- package/references/skill-tree/scraper/tsup.config.ts +0 -14
- package/references/skill-tree/scraper/vitest.config.ts +0 -17
- package/references/skill-tree/scripts/convert-to-vitest.ts +0 -166
- package/references/skill-tree/skills/skill-writer/SKILL.md +0 -339
- package/references/skill-tree/skills/skill-writer/references/examples.md +0 -326
- package/references/skill-tree/skills/skill-writer/references/patterns.md +0 -210
- package/references/skill-tree/skills/skill-writer/references/quality-checklist.md +0 -123
- package/references/skill-tree/test/run-all.ts +0 -106
- package/references/skill-tree/test/utils.ts +0 -128
- package/references/skill-tree/vitest.config.ts +0 -16
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach } from 'vitest';
|
|
2
|
+
import {
|
|
3
|
+
SolutionEvaluator,
|
|
4
|
+
createSolutionEvaluator,
|
|
5
|
+
type VerificationResult,
|
|
6
|
+
} from '../../src/search/evaluator.js';
|
|
7
|
+
import { createTrajectory } from '../../src/types/trajectory.js';
|
|
8
|
+
import { createTask } from '../../src/types/task.js';
|
|
9
|
+
import { createStep } from '../../src/types/step.js';
|
|
10
|
+
import { successOutcome, failureOutcome } from '../../src/types/outcome.js';
|
|
11
|
+
import type { Trajectory } from '../../src/types/trajectory.js';
|
|
12
|
+
import type { Task } from '../../src/types/task.js';
|
|
13
|
+
|
|
14
|
+
function makeTask(overrides?: Partial<Task>): Task {
|
|
15
|
+
return createTask({
|
|
16
|
+
domain: 'code',
|
|
17
|
+
description: 'fix a bug in the login module',
|
|
18
|
+
...overrides,
|
|
19
|
+
});
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function makeTrajectory(opts?: {
|
|
23
|
+
success?: boolean;
|
|
24
|
+
stepCount?: number;
|
|
25
|
+
errorSteps?: number;
|
|
26
|
+
withAttribution?: boolean;
|
|
27
|
+
}): Trajectory {
|
|
28
|
+
const steps = [];
|
|
29
|
+
const count = opts?.stepCount ?? 3;
|
|
30
|
+
const errorCount = opts?.errorSteps ?? 0;
|
|
31
|
+
|
|
32
|
+
for (let i = 0; i < count; i++) {
|
|
33
|
+
steps.push(
|
|
34
|
+
createStep({
|
|
35
|
+
action: `Step ${i + 1}`,
|
|
36
|
+
observation: i < errorCount ? 'error: something failed' : 'ok',
|
|
37
|
+
attributionScore: opts?.withAttribution ? 0.2 : undefined,
|
|
38
|
+
})
|
|
39
|
+
);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
return createTrajectory({
|
|
43
|
+
task: makeTask(),
|
|
44
|
+
steps,
|
|
45
|
+
outcome: (opts?.success ?? true) ? successOutcome('done') : failureOutcome('failed'),
|
|
46
|
+
agentId: 'agent-1',
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
describe('SolutionEvaluator', () => {
|
|
51
|
+
let evaluator: SolutionEvaluator;
|
|
52
|
+
|
|
53
|
+
beforeEach(() => {
|
|
54
|
+
evaluator = createSolutionEvaluator(null);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
describe('heuristic evaluation (fallback)', () => {
|
|
58
|
+
it('should rate successful trajectory with few steps well', async () => {
|
|
59
|
+
const result = await evaluator.evaluate(
|
|
60
|
+
makeTrajectory({ success: true, stepCount: 3 }),
|
|
61
|
+
makeTask()
|
|
62
|
+
);
|
|
63
|
+
|
|
64
|
+
expect(result.method).toBe('heuristic');
|
|
65
|
+
expect(result.score).toBeGreaterThanOrEqual(0.7);
|
|
66
|
+
expect(result.acceptable).toBe(true);
|
|
67
|
+
expect(result.issues).toHaveLength(0);
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it('should penalize failed trajectory', async () => {
|
|
71
|
+
const result = await evaluator.evaluate(
|
|
72
|
+
makeTrajectory({ success: false }),
|
|
73
|
+
makeTask()
|
|
74
|
+
);
|
|
75
|
+
|
|
76
|
+
expect(result.score).toBeLessThan(0.6);
|
|
77
|
+
expect(result.acceptable).toBe(false);
|
|
78
|
+
expect(result.issues.length).toBeGreaterThan(0);
|
|
79
|
+
expect(result.issues[0].type).toBe('error');
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
it('should penalize empty trajectory', async () => {
|
|
83
|
+
const trajectory = createTrajectory({
|
|
84
|
+
task: makeTask(),
|
|
85
|
+
steps: [],
|
|
86
|
+
outcome: failureOutcome('no steps taken'),
|
|
87
|
+
agentId: 'a',
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
const result = await evaluator.evaluate(trajectory, makeTask());
|
|
91
|
+
expect(result.score).toBeLessThan(0.3);
|
|
92
|
+
expect(result.issues.some((i) => i.type === 'incomplete')).toBe(true);
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
it('should penalize very long trajectories', async () => {
|
|
96
|
+
const result = await evaluator.evaluate(
|
|
97
|
+
makeTrajectory({ success: true, stepCount: 25 }),
|
|
98
|
+
makeTask()
|
|
99
|
+
);
|
|
100
|
+
|
|
101
|
+
expect(result.issues.some((i) => i.type === 'inefficient')).toBe(true);
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
it('should penalize trajectories with error steps', async () => {
|
|
105
|
+
const result = await evaluator.evaluate(
|
|
106
|
+
makeTrajectory({ success: true, stepCount: 5, errorSteps: 3 }),
|
|
107
|
+
makeTask()
|
|
108
|
+
);
|
|
109
|
+
|
|
110
|
+
expect(result.score).toBeLessThan(
|
|
111
|
+
// Compare with clean trajectory
|
|
112
|
+
(await evaluator.evaluate(makeTrajectory({ success: true, stepCount: 5 }), makeTask())).score
|
|
113
|
+
);
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it('should boost score for steps with high attribution', async () => {
|
|
117
|
+
const withAttr = await evaluator.evaluate(
|
|
118
|
+
makeTrajectory({ success: true, withAttribution: true }),
|
|
119
|
+
makeTask()
|
|
120
|
+
);
|
|
121
|
+
const withoutAttr = await evaluator.evaluate(
|
|
122
|
+
makeTrajectory({ success: true }),
|
|
123
|
+
makeTask()
|
|
124
|
+
);
|
|
125
|
+
|
|
126
|
+
expect(withAttr.score).toBeGreaterThanOrEqual(withoutAttr.score);
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
it('should clamp score between 0 and 1', async () => {
|
|
130
|
+
// Very bad: failed + empty + errors
|
|
131
|
+
const trajectory = createTrajectory({
|
|
132
|
+
task: makeTask(),
|
|
133
|
+
steps: [],
|
|
134
|
+
outcome: failureOutcome('total failure'),
|
|
135
|
+
agentId: 'a',
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
const result = await evaluator.evaluate(trajectory, makeTask());
|
|
139
|
+
expect(result.score).toBeGreaterThanOrEqual(0);
|
|
140
|
+
expect(result.score).toBeLessThanOrEqual(1);
|
|
141
|
+
});
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
describe('verification-based evaluation', () => {
|
|
145
|
+
it('should use registered verifier for matching domain', async () => {
|
|
146
|
+
const verifier = async (_t: Trajectory, _task: Task): Promise<VerificationResult> => ({
|
|
147
|
+
passed: true,
|
|
148
|
+
confidence: 0.95,
|
|
149
|
+
issues: [],
|
|
150
|
+
details: 'all tests pass',
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
evaluator.registerVerifier('code', verifier);
|
|
154
|
+
|
|
155
|
+
const result = await evaluator.evaluate(makeTrajectory(), makeTask({ domain: 'code' }));
|
|
156
|
+
|
|
157
|
+
expect(result.method).toBe('verification');
|
|
158
|
+
expect(result.acceptable).toBe(true);
|
|
159
|
+
expect(result.score).toBeGreaterThanOrEqual(0.7);
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
it('should fall through to heuristic when verification confidence too low', async () => {
|
|
163
|
+
const verifier = async (): Promise<VerificationResult> => ({
|
|
164
|
+
passed: true,
|
|
165
|
+
confidence: 0.3, // Below default threshold of 0.8
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
evaluator.registerVerifier('code', verifier);
|
|
169
|
+
|
|
170
|
+
const result = await evaluator.evaluate(makeTrajectory(), makeTask({ domain: 'code' }));
|
|
171
|
+
expect(result.method).toBe('heuristic');
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
it('should fall through when verifier throws', async () => {
|
|
175
|
+
const verifier = async (): Promise<VerificationResult> => {
|
|
176
|
+
throw new Error('verification crashed');
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
evaluator.registerVerifier('code', verifier);
|
|
180
|
+
|
|
181
|
+
const result = await evaluator.evaluate(makeTrajectory(), makeTask({ domain: 'code' }));
|
|
182
|
+
expect(result.method).toBe('heuristic');
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
it('should handle failed verification with issues', async () => {
|
|
186
|
+
const verifier = async (): Promise<VerificationResult> => ({
|
|
187
|
+
passed: false,
|
|
188
|
+
confidence: 0.9,
|
|
189
|
+
issues: [
|
|
190
|
+
{ type: 'incorrect', description: 'test failed: should return 42', severity: 'major' },
|
|
191
|
+
],
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
evaluator.registerVerifier('code', verifier);
|
|
195
|
+
|
|
196
|
+
const result = await evaluator.evaluate(makeTrajectory(), makeTask({ domain: 'code' }));
|
|
197
|
+
|
|
198
|
+
expect(result.method).toBe('verification');
|
|
199
|
+
expect(result.acceptable).toBe(false);
|
|
200
|
+
expect(result.score).toBeLessThan(0.5);
|
|
201
|
+
expect(result.issues).toHaveLength(1);
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
it('should not use verifier for wrong domain', async () => {
|
|
205
|
+
const verifier = async (): Promise<VerificationResult> => ({
|
|
206
|
+
passed: true,
|
|
207
|
+
confidence: 0.95,
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
evaluator.registerVerifier('python', verifier);
|
|
211
|
+
|
|
212
|
+
// Task domain is 'code', not 'python'
|
|
213
|
+
const result = await evaluator.evaluate(makeTrajectory(), makeTask({ domain: 'code' }));
|
|
214
|
+
expect(result.method).toBe('heuristic');
|
|
215
|
+
});
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
describe('alwaysUseAgent config', () => {
|
|
219
|
+
it('should skip verification shortcut when alwaysUseAgent=true', async () => {
|
|
220
|
+
const alwaysAgentEval = createSolutionEvaluator(null, { alwaysUseAgent: true });
|
|
221
|
+
|
|
222
|
+
const verifier = async (): Promise<VerificationResult> => ({
|
|
223
|
+
passed: true,
|
|
224
|
+
confidence: 0.99,
|
|
225
|
+
});
|
|
226
|
+
alwaysAgentEval.registerVerifier('code', verifier);
|
|
227
|
+
|
|
228
|
+
// With no agent or taskRunner, falls through to heuristic
|
|
229
|
+
const result = await alwaysAgentEval.evaluate(makeTrajectory(), makeTask({ domain: 'code' }));
|
|
230
|
+
expect(result.method).toBe('heuristic');
|
|
231
|
+
});
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
describe('parseQuality edge cases', () => {
|
|
235
|
+
// Test through parseAgentEvaluation indirectly using mock agent
|
|
236
|
+
it('should handle unknown quality strings gracefully', async () => {
|
|
237
|
+
// Falls through to heuristic since no agent available
|
|
238
|
+
const result = await evaluator.evaluate(makeTrajectory(), makeTask());
|
|
239
|
+
expect(['excellent', 'good', 'needs_work', 'poor']).toContain(result.quality);
|
|
240
|
+
});
|
|
241
|
+
});
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
describe('scoreToQuality (used by evaluator)', () => {
|
|
245
|
+
it('should map scores to correct quality levels', async () => {
|
|
246
|
+
const { scoreToQuality } = await import('../../src/search/refinement-types.js');
|
|
247
|
+
|
|
248
|
+
expect(scoreToQuality(0.9)).toBe('excellent');
|
|
249
|
+
expect(scoreToQuality(0.85)).toBe('excellent');
|
|
250
|
+
expect(scoreToQuality(0.75)).toBe('good');
|
|
251
|
+
expect(scoreToQuality(0.7)).toBe('good');
|
|
252
|
+
expect(scoreToQuality(0.5)).toBe('needs_work');
|
|
253
|
+
expect(scoreToQuality(0.4)).toBe('needs_work');
|
|
254
|
+
expect(scoreToQuality(0.3)).toBe('poor');
|
|
255
|
+
expect(scoreToQuality(0.0)).toBe('poor');
|
|
256
|
+
});
|
|
257
|
+
});
|
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach } from 'vitest';
|
|
2
|
+
import {
|
|
3
|
+
VerificationRunner,
|
|
4
|
+
createVerificationRunner,
|
|
5
|
+
TestRunners,
|
|
6
|
+
type CommandVerificationConfig,
|
|
7
|
+
} from '../../src/search/verification-runner.js';
|
|
8
|
+
import { createTrajectory } from '../../src/types/trajectory.js';
|
|
9
|
+
import { createTask } from '../../src/types/task.js';
|
|
10
|
+
import { createStep } from '../../src/types/step.js';
|
|
11
|
+
import { successOutcome } from '../../src/types/outcome.js';
|
|
12
|
+
import type { Trajectory } from '../../src/types/trajectory.js';
|
|
13
|
+
import type { Task } from '../../src/types/task.js';
|
|
14
|
+
|
|
15
|
+
function makeTask(): Task {
|
|
16
|
+
return createTask({
|
|
17
|
+
domain: 'test-domain',
|
|
18
|
+
description: 'test task',
|
|
19
|
+
});
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function makeTrajectory(): Trajectory {
|
|
23
|
+
return createTrajectory({
|
|
24
|
+
task: makeTask(),
|
|
25
|
+
steps: [createStep({ action: 'echo hello', observation: 'hello' })],
|
|
26
|
+
outcome: successOutcome('hello world'),
|
|
27
|
+
agentId: 'agent',
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
describe('VerificationRunner', () => {
|
|
32
|
+
let runner: VerificationRunner;
|
|
33
|
+
|
|
34
|
+
beforeEach(() => {
|
|
35
|
+
runner = createVerificationRunner({ cwd: '/tmp', timeout: 5000 });
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
describe('runCommand', () => {
|
|
39
|
+
it('should execute a simple command and capture stdout', async () => {
|
|
40
|
+
const result = await runner.runCommand('echo hello', {});
|
|
41
|
+
expect(result.stdout.trim()).toBe('hello');
|
|
42
|
+
expect(result.exitCode).toBe(0);
|
|
43
|
+
expect(result.timedOut).toBe(false);
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it('should capture stderr', async () => {
|
|
47
|
+
const result = await runner.runCommand('echo error >&2', {});
|
|
48
|
+
expect(result.stderr.trim()).toBe('error');
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
it('should return non-zero exit code for failing commands', async () => {
|
|
52
|
+
const result = await runner.runCommand('exit 42', {});
|
|
53
|
+
expect(result.exitCode).toBe(42);
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
it('should handle command timeout', { timeout: 20000 }, async () => {
|
|
57
|
+
// Use a bash loop that responds to SIGTERM better than plain sleep
|
|
58
|
+
const result = await runner.runCommand('while true; do sleep 0.1; done', { timeout: 500 });
|
|
59
|
+
expect(result.timedOut).toBe(true);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
it('should handle command not found', async () => {
|
|
63
|
+
const result = await runner.runCommand('nonexistent_command_xyz_12345', {});
|
|
64
|
+
expect(result.exitCode).not.toBe(0);
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
it('should pass environment variables', async () => {
|
|
68
|
+
const result = await runner.runCommand('echo $MY_TEST_VAR', {
|
|
69
|
+
env: { MY_TEST_VAR: 'hello123' },
|
|
70
|
+
});
|
|
71
|
+
expect(result.stdout.trim()).toBe('hello123');
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
it('should use specified working directory', async () => {
|
|
75
|
+
const result = await runner.runCommand('pwd', { cwd: '/tmp' });
|
|
76
|
+
expect(result.stdout.trim()).toMatch(/\/tmp/);
|
|
77
|
+
});
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
describe('runCommandSync', () => {
|
|
81
|
+
it('should execute command synchronously', () => {
|
|
82
|
+
const result = runner.runCommandSync('echo sync-test');
|
|
83
|
+
expect(result.stdout.trim()).toBe('sync-test');
|
|
84
|
+
expect(result.exitCode).toBe(0);
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
it('should handle sync command failure', () => {
|
|
88
|
+
const result = runner.runCommandSync('exit 1');
|
|
89
|
+
expect(result.exitCode).toBe(1);
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
it('should detect sync command failure on timeout', () => {
|
|
93
|
+
const result = runner.runCommandSync('sleep 30', { timeout: 500 });
|
|
94
|
+
// execSync throws on timeout; killed + SIGTERM detection may vary by platform
|
|
95
|
+
// The important thing is that the command does not succeed
|
|
96
|
+
expect(result.exitCode).not.toBe(0);
|
|
97
|
+
});
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
describe('createVerifier', () => {
|
|
101
|
+
it('should create verifier that passes on exit code 0', async () => {
|
|
102
|
+
const config: CommandVerificationConfig = {
|
|
103
|
+
command: 'echo "all tests pass"',
|
|
104
|
+
timeout: 5000,
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
const verifier = runner.createVerifier(config);
|
|
108
|
+
const result = await verifier(makeTrajectory(), makeTask());
|
|
109
|
+
|
|
110
|
+
expect(result.passed).toBe(true);
|
|
111
|
+
expect(result.confidence).toBe(0.7); // No pattern/extractor → 0.7
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
it('should create verifier that fails on non-zero exit', async () => {
|
|
115
|
+
const config: CommandVerificationConfig = {
|
|
116
|
+
command: 'exit 1',
|
|
117
|
+
timeout: 5000,
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
const verifier = runner.createVerifier(config);
|
|
121
|
+
const result = await verifier(makeTrajectory(), makeTask());
|
|
122
|
+
|
|
123
|
+
expect(result.passed).toBe(false);
|
|
124
|
+
expect(result.issues!.length).toBeGreaterThan(0);
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
it('should check success pattern when provided', async () => {
|
|
128
|
+
const config: CommandVerificationConfig = {
|
|
129
|
+
command: 'echo "Tests: 5 passed, 0 failed"',
|
|
130
|
+
successPattern: /\d+ passed, 0 failed/,
|
|
131
|
+
timeout: 5000,
|
|
132
|
+
};
|
|
133
|
+
|
|
134
|
+
const verifier = runner.createVerifier(config);
|
|
135
|
+
const result = await verifier(makeTrajectory(), makeTask());
|
|
136
|
+
|
|
137
|
+
expect(result.passed).toBe(true);
|
|
138
|
+
expect(result.confidence).toBe(0.9); // Has successPattern → 0.9
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
it('should check failure pattern when provided', async () => {
|
|
142
|
+
const config: CommandVerificationConfig = {
|
|
143
|
+
command: 'echo "FAIL test_login"',
|
|
144
|
+
failurePattern: /FAIL/,
|
|
145
|
+
successOnZeroExit: false,
|
|
146
|
+
timeout: 5000,
|
|
147
|
+
};
|
|
148
|
+
|
|
149
|
+
const verifier = runner.createVerifier(config);
|
|
150
|
+
const result = await verifier(makeTrajectory(), makeTask());
|
|
151
|
+
|
|
152
|
+
expect(result.passed).toBe(false);
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
it('should use custom issue extractor', async () => {
|
|
156
|
+
const config: CommandVerificationConfig = {
|
|
157
|
+
command: 'echo "ERROR: missing return"',
|
|
158
|
+
timeout: 5000,
|
|
159
|
+
successOnZeroExit: false,
|
|
160
|
+
issueExtractor: (output) => {
|
|
161
|
+
const match = output.match(/ERROR:\s*(.+)/);
|
|
162
|
+
return match
|
|
163
|
+
? [{ type: 'error' as const, description: match[1], severity: 'critical' as const }]
|
|
164
|
+
: [];
|
|
165
|
+
},
|
|
166
|
+
};
|
|
167
|
+
|
|
168
|
+
const verifier = runner.createVerifier(config);
|
|
169
|
+
const result = await verifier(makeTrajectory(), makeTask());
|
|
170
|
+
|
|
171
|
+
expect(result.confidence).toBe(0.9); // Has issueExtractor
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
it('should handle timeout in verifier', { timeout: 20000 }, async () => {
|
|
175
|
+
const config: CommandVerificationConfig = {
|
|
176
|
+
command: 'while true; do sleep 0.1; done',
|
|
177
|
+
timeout: 500,
|
|
178
|
+
};
|
|
179
|
+
|
|
180
|
+
const verifier = runner.createVerifier(config);
|
|
181
|
+
const result = await verifier(makeTrajectory(), makeTask());
|
|
182
|
+
|
|
183
|
+
expect(result.passed).toBe(false);
|
|
184
|
+
expect(result.confidence).toBe(0.5); // Timeout → 0.5
|
|
185
|
+
expect(result.details).toContain('timed out');
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
it('should interpolate {{taskId}} and {{trajectoryId}} in command', async () => {
|
|
189
|
+
const config: CommandVerificationConfig = {
|
|
190
|
+
command: 'echo "task={{taskId}} traj={{trajectoryId}}"',
|
|
191
|
+
timeout: 5000,
|
|
192
|
+
};
|
|
193
|
+
|
|
194
|
+
const trajectory = makeTrajectory();
|
|
195
|
+
const task = makeTask();
|
|
196
|
+
const verifier = runner.createVerifier(config);
|
|
197
|
+
const result = await verifier(trajectory, task);
|
|
198
|
+
|
|
199
|
+
expect(result.details).toContain(`task=${task.id}`);
|
|
200
|
+
expect(result.details).toContain(`traj=${trajectory.id}`);
|
|
201
|
+
});
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
describe('createCompositeVerifier', () => {
|
|
205
|
+
it('should pass when all verifiers pass', async () => {
|
|
206
|
+
const composite = runner.createCompositeVerifier([
|
|
207
|
+
{ command: 'echo pass1', timeout: 5000 },
|
|
208
|
+
{ command: 'echo pass2', timeout: 5000 },
|
|
209
|
+
]);
|
|
210
|
+
|
|
211
|
+
const result = await composite(makeTrajectory(), makeTask());
|
|
212
|
+
expect(result.passed).toBe(true);
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
it('should fail when any verifier fails', async () => {
|
|
216
|
+
const composite = runner.createCompositeVerifier([
|
|
217
|
+
{ command: 'echo pass', timeout: 5000 },
|
|
218
|
+
{ command: 'exit 1', timeout: 5000 },
|
|
219
|
+
]);
|
|
220
|
+
|
|
221
|
+
const result = await composite(makeTrajectory(), makeTask());
|
|
222
|
+
expect(result.passed).toBe(false);
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
it('should aggregate issues from all verifiers', async () => {
|
|
226
|
+
const composite = runner.createCompositeVerifier([
|
|
227
|
+
{ command: 'exit 1', timeout: 5000 },
|
|
228
|
+
{ command: 'exit 2', timeout: 5000 },
|
|
229
|
+
]);
|
|
230
|
+
|
|
231
|
+
const result = await composite(makeTrajectory(), makeTask());
|
|
232
|
+
expect(result.issues!.length).toBeGreaterThanOrEqual(1);
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
it('should fail fast on critical issues', async () => {
|
|
236
|
+
let secondRan = false;
|
|
237
|
+
const configs: CommandVerificationConfig[] = [
|
|
238
|
+
{
|
|
239
|
+
command: 'exit 1',
|
|
240
|
+
timeout: 5000,
|
|
241
|
+
issueExtractor: () => [
|
|
242
|
+
{ type: 'error', description: 'critical failure', severity: 'critical' },
|
|
243
|
+
],
|
|
244
|
+
},
|
|
245
|
+
{
|
|
246
|
+
command: 'echo should-not-run',
|
|
247
|
+
timeout: 5000,
|
|
248
|
+
issueExtractor: () => {
|
|
249
|
+
secondRan = true;
|
|
250
|
+
return [];
|
|
251
|
+
},
|
|
252
|
+
},
|
|
253
|
+
];
|
|
254
|
+
|
|
255
|
+
const composite = runner.createCompositeVerifier(configs);
|
|
256
|
+
await composite(makeTrajectory(), makeTask());
|
|
257
|
+
|
|
258
|
+
// Second verifier should not have run due to fail-fast
|
|
259
|
+
expect(secondRan).toBe(false);
|
|
260
|
+
});
|
|
261
|
+
|
|
262
|
+
it('should average confidence across verifiers', async () => {
|
|
263
|
+
const composite = runner.createCompositeVerifier([
|
|
264
|
+
{ command: 'echo pass', timeout: 5000 }, // confidence 0.7
|
|
265
|
+
{ command: 'echo pass', timeout: 5000 }, // confidence 0.7
|
|
266
|
+
]);
|
|
267
|
+
|
|
268
|
+
const result = await composite(makeTrajectory(), makeTask());
|
|
269
|
+
expect(result.confidence).toBeCloseTo(0.7, 1);
|
|
270
|
+
});
|
|
271
|
+
});
|
|
272
|
+
|
|
273
|
+
describe('TestRunners presets', () => {
|
|
274
|
+
it('should create vitest configuration', () => {
|
|
275
|
+
const config = TestRunners.vitest('tests/foo.test.ts');
|
|
276
|
+
expect(config.command).toContain('vitest');
|
|
277
|
+
expect(config.command).toContain('tests/foo.test.ts');
|
|
278
|
+
expect(config.timeout).toBe(60000);
|
|
279
|
+
expect(config.issueExtractor).toBeDefined();
|
|
280
|
+
});
|
|
281
|
+
|
|
282
|
+
it('should create jest configuration', () => {
|
|
283
|
+
const config = TestRunners.jest();
|
|
284
|
+
expect(config.command).toContain('jest');
|
|
285
|
+
expect(config.issueExtractor).toBeDefined();
|
|
286
|
+
});
|
|
287
|
+
|
|
288
|
+
it('should create typescript configuration', () => {
|
|
289
|
+
const config = TestRunners.typescript();
|
|
290
|
+
expect(config.command).toContain('tsc --noEmit');
|
|
291
|
+
expect(config.issueExtractor).toBeDefined();
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
it('should create eslint configuration', () => {
|
|
295
|
+
const config = TestRunners.eslint('src/');
|
|
296
|
+
expect(config.command).toContain('eslint');
|
|
297
|
+
expect(config.command).toContain('src/');
|
|
298
|
+
});
|
|
299
|
+
|
|
300
|
+
it('should create pytest configuration', () => {
|
|
301
|
+
const config = TestRunners.pytest();
|
|
302
|
+
expect(config.command).toContain('pytest');
|
|
303
|
+
expect(config.failurePattern).toBeDefined();
|
|
304
|
+
});
|
|
305
|
+
|
|
306
|
+
it('should create go test configuration', () => {
|
|
307
|
+
const config = TestRunners.goTest();
|
|
308
|
+
expect(config.command).toContain('go test');
|
|
309
|
+
});
|
|
310
|
+
|
|
311
|
+
it('should create cargo test configuration', () => {
|
|
312
|
+
const config = TestRunners.cargoTest();
|
|
313
|
+
expect(config.command).toContain('cargo test');
|
|
314
|
+
expect(config.timeout).toBe(120000);
|
|
315
|
+
});
|
|
316
|
+
});
|
|
317
|
+
|
|
318
|
+
describe('default issue extraction', () => {
|
|
319
|
+
it('should extract error messages from output', async () => {
|
|
320
|
+
const config: CommandVerificationConfig = {
|
|
321
|
+
command: 'echo "Error: cannot read file" && exit 1',
|
|
322
|
+
timeout: 5000,
|
|
323
|
+
};
|
|
324
|
+
|
|
325
|
+
const verifier = runner.createVerifier(config);
|
|
326
|
+
const result = await verifier(makeTrajectory(), makeTask());
|
|
327
|
+
|
|
328
|
+
expect(result.issues!.length).toBeGreaterThan(0);
|
|
329
|
+
expect(result.issues![0].description).toContain('cannot read file');
|
|
330
|
+
});
|
|
331
|
+
|
|
332
|
+
it('should extract fail messages from output', async () => {
|
|
333
|
+
const config: CommandVerificationConfig = {
|
|
334
|
+
command: 'echo "Fail: test_login" && exit 1',
|
|
335
|
+
timeout: 5000,
|
|
336
|
+
};
|
|
337
|
+
|
|
338
|
+
const verifier = runner.createVerifier(config);
|
|
339
|
+
const result = await verifier(makeTrajectory(), makeTask());
|
|
340
|
+
|
|
341
|
+
expect(result.issues!.length).toBeGreaterThan(0);
|
|
342
|
+
});
|
|
343
|
+
|
|
344
|
+
it('should generate generic issue when no pattern matches', async () => {
|
|
345
|
+
const config: CommandVerificationConfig = {
|
|
346
|
+
command: 'exit 99',
|
|
347
|
+
timeout: 5000,
|
|
348
|
+
};
|
|
349
|
+
|
|
350
|
+
const verifier = runner.createVerifier(config);
|
|
351
|
+
const result = await verifier(makeTrajectory(), makeTask());
|
|
352
|
+
|
|
353
|
+
expect(result.issues!.length).toBeGreaterThan(0);
|
|
354
|
+
expect(result.issues![0].description).toContain('exit code 99');
|
|
355
|
+
});
|
|
356
|
+
});
|
|
357
|
+
});
|