@chongyan/autospec 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +240 -21
- package/README.md +47 -608
- package/dist/README.md +47 -0
- package/dist/adapters/claude-code/README.md.enc +6 -0
- package/dist/adapters/claude-code/agents.js +1 -0
- package/dist/adapters/claude-code/commands.config.js +1 -0
- package/dist/adapters/claude-code/commands.js +1 -0
- package/dist/adapters/claude-code/hooks.config.js +2 -0
- package/dist/adapters/claude-code/hooks.js +1 -0
- package/dist/adapters/claude-code/install.js +1 -0
- package/dist/adapters/claude-code/skills.js +1 -0
- package/dist/adapters/codex/README.md.enc +6 -0
- package/dist/adapters/codex/hooks/pre-commit.sh +10 -0
- package/dist/adapters/codex/install.js +1 -0
- package/dist/adapters/codex/prompts/env-learn.md.enc +6 -0
- package/dist/adapters/codex/prompts/review.md.enc +6 -0
- package/dist/adapters/codex/wrappers/autospec-cli.sh +118 -0
- package/dist/adapters/codex/wrappers/parallel.sh +20 -0
- package/dist/adapters/codex/wrappers/post-task.sh +138 -0
- package/dist/bin/autospec.js +2 -0
- package/dist/knowledge/README.md.enc +6 -0
- package/dist/knowledge/change-management.md.enc +6 -0
- package/dist/knowledge/cognition-engine.md.enc +6 -0
- package/dist/knowledge/config/baseline-permissions.md.enc +6 -0
- package/dist/knowledge/config/external-mounts.yaml.enc +6 -0
- package/dist/knowledge/config/model-profiles.yaml.enc +6 -0
- package/dist/knowledge/config/role-composition.yaml.enc +6 -0
- package/dist/knowledge/config/token-optimization.yaml.enc +6 -0
- package/dist/knowledge/config/validation-patterns.yaml.enc +6 -0
- package/dist/knowledge/constitution.md.enc +6 -0
- package/dist/knowledge/core-rules.md.enc +6 -0
- package/dist/knowledge/environment/adapters/README.md.enc +6 -0
- package/dist/knowledge/environment/detection-patterns.yaml.enc +6 -0
- package/dist/knowledge/environment/failure-patterns.json +223 -0
- package/dist/knowledge/environment/repair-strategies.json +206 -0
- package/dist/knowledge/memory/README.md.enc +6 -0
- package/dist/knowledge/memory/field/README.md.enc +6 -0
- package/dist/knowledge/memory/project/decisions/README.md.enc +6 -0
- package/dist/knowledge/memory/project/evolution-log.md.enc +6 -0
- package/dist/knowledge/memory/project/health-metrics.md.enc +6 -0
- package/dist/knowledge/memory/team/best-practices.md.enc +6 -0
- package/dist/knowledge/pipeline/code.md.enc +6 -0
- package/dist/knowledge/pipeline/explore.md.enc +6 -0
- package/dist/knowledge/pipeline/plan.md.enc +6 -0
- package/dist/knowledge/pipeline/protocol.md.enc +6 -0
- package/dist/knowledge/protocol/capabilities.yaml.enc +6 -0
- package/dist/knowledge/protocol/evolve-integration.md.enc +6 -0
- package/dist/knowledge/skills/README.md.enc +6 -0
- package/dist/knowledge/skills/adversarial-review.md.enc +6 -0
- package/dist/knowledge/skills/analyze-requirement.md.enc +6 -0
- package/dist/knowledge/skills/channel-operations.md.enc +6 -0
- package/dist/knowledge/skills/content-operations.md.enc +6 -0
- package/dist/knowledge/skills/content-prompts.md.enc +6 -0
- package/dist/knowledge/skills/conversion-optimization.md.enc +6 -0
- package/dist/knowledge/skills/data-operations.md.enc +6 -0
- package/dist/knowledge/skills/design-solution.md.enc +6 -0
- package/dist/knowledge/skills/growth-strategies.md.enc +6 -0
- package/dist/knowledge/skills/implement-code.md.enc +6 -0
- package/dist/knowledge/skills/knowledge-distill.md.enc +6 -0
- package/dist/knowledge/skills/parallel-dev.md.enc +6 -0
- package/dist/knowledge/skills/private-domain-traffic.md.enc +6 -0
- package/dist/knowledge/skills/skill-format.md.enc +6 -0
- package/dist/knowledge/skills/social-commerce.md.enc +6 -0
- package/dist/knowledge/skills/team-orchestration.md.enc +6 -0
- package/dist/knowledge/skills/unified-review.md.enc +6 -0
- package/dist/knowledge/skills/user-operations.md.enc +6 -0
- package/dist/knowledge/templates/autospec-config.yaml.enc +6 -0
- package/dist/knowledge/templates/smoke-test.md.enc +6 -0
- package/dist/knowledge/templates/spec/SPEC.md.enc +6 -0
- package/dist/knowledge/templates/spec/layers/delta.md.enc +6 -0
- package/dist/knowledge/templates/spec/layers/how.md.enc +6 -0
- package/dist/knowledge/templates/spec/layers/plan.md.enc +6 -0
- package/dist/knowledge/templates/spec/layers/what.md.enc +6 -0
- package/dist/knowledge/templates/spec/layers/why.md.enc +6 -0
- package/dist/knowledge/templates/wiki/catalog.yaml.enc +6 -0
- package/dist/knowledge/templates/wiki/content.md.enc +6 -0
- package/dist/knowledge/templates/wiki/meta.yaml.enc +6 -0
- package/dist/package.json +54 -0
- package/{plugins → dist/plugins}/.claude-plugin/plugin.json +259 -101
- package/dist/plugins/agents/roles/ai-engineer.md.enc +6 -0
- package/dist/plugins/agents/roles/backend-engineer.md.enc +6 -0
- package/dist/plugins/agents/roles/ceo.md.enc +6 -0
- package/dist/plugins/agents/roles/channel-ops.md.enc +6 -0
- package/dist/plugins/agents/roles/content-ops.md.enc +6 -0
- package/dist/plugins/agents/roles/conversion-ops.md.enc +6 -0
- package/dist/plugins/agents/roles/data-engineer.md.enc +6 -0
- package/dist/plugins/agents/roles/data-ops.md.enc +6 -0
- package/dist/plugins/agents/roles/devops-engineer.md.enc +6 -0
- package/dist/plugins/agents/roles/frontend-engineer.md.enc +6 -0
- package/dist/plugins/agents/roles/marketing-director.md.enc +6 -0
- package/dist/plugins/agents/roles/operations-director.md.enc +6 -0
- package/dist/plugins/agents/roles/private-traffic.md.enc +6 -0
- package/dist/plugins/agents/roles/product-owner.md.enc +6 -0
- package/dist/plugins/agents/roles/quality-engineer.md.enc +6 -0
- package/dist/plugins/agents/roles/security-engineer.md.enc +6 -0
- package/dist/plugins/agents/roles/tech-lead.md.enc +6 -0
- package/dist/plugins/agents/roles/user-ops.md.enc +6 -0
- package/dist/plugins/agents/support/blind-comparator.md.enc +6 -0
- package/dist/plugins/agents/support/consistency-checker.md.enc +6 -0
- package/dist/plugins/agents/support/experiment-evaluator.md.enc +6 -0
- package/dist/plugins/agents/support/failure-diagnostician.md.enc +6 -0
- package/dist/plugins/agents/support/independent-reviewer.md.enc +6 -0
- package/dist/plugins/agents/support/memory-curator.md.enc +6 -0
- package/dist/plugins/agents/support/monitoring-agent.md.enc +6 -0
- package/dist/plugins/agents/support/safety-auditor.md.enc +6 -0
- package/dist/plugins/agents/support/skill-benchmarker.md.enc +6 -0
- package/dist/plugins/agents/support/skill-forger.md.enc +6 -0
- package/dist/plugins/agents/support/stage-gate-evaluator.md.enc +6 -0
- package/dist/plugins/agents/support/team-orchestrator.md.enc +6 -0
- package/dist/plugins/agents/support/test-coverage-reviewer.md.enc +6 -0
- package/dist/plugins/benchmarks/templates/README.md.enc +6 -0
- package/dist/plugins/benchmarks/templates/commands/code-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/commands/explore-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/commands/field-evolve-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/commands/plan-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/commands/project-evolve-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/commands/review-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/commands/run-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/benchmark-executor-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/benchmark-generator-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/delivery-stage-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/design-stage-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/exploration-phase-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/field-evolve-analyzer-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/field-evolve-distiller-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/field-evolve-executor-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/field-evolve-fixer-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/field-evolve-learner-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/field-evolve-scanner-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/field-evolve-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/field-evolve-verifier-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/implementation-stage-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/layer1-validation-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/project-evolve-analyzer-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/project-evolve-fixer-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/project-evolve-generator-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/project-evolve-learner-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/project-evolve-reviewer-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/project-evolve-scanner-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/project-evolve-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/project-evolve-verifier-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/requirement-analyzer-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/skill-forge-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/startup-guard-template.yaml.enc +6 -0
- package/dist/plugins/benchmarks/templates/skills/testing-stage-template.yaml.enc +6 -0
- package/dist/plugins/commands/README.md.enc +6 -0
- package/dist/plugins/commands/automation.md.enc +6 -0
- package/dist/plugins/commands/code.md.enc +6 -0
- package/dist/plugins/commands/contribute.md.enc +6 -0
- package/dist/plugins/commands/dashboard.md.enc +6 -0
- package/dist/plugins/commands/env.md.enc +6 -0
- package/dist/plugins/commands/explore.md.enc +6 -0
- package/dist/plugins/commands/field-evolve.md.enc +6 -0
- package/dist/plugins/commands/global.md.enc +6 -0
- package/dist/plugins/commands/init.md.enc +6 -0
- package/dist/plugins/commands/list.md.enc +6 -0
- package/dist/plugins/commands/memory.md.enc +6 -0
- package/dist/plugins/commands/monitor.md.enc +6 -0
- package/dist/plugins/commands/org.md.enc +6 -0
- package/dist/plugins/commands/persist.md.enc +6 -0
- package/dist/plugins/commands/plan.md.enc +6 -0
- package/dist/plugins/commands/plugin.md.enc +6 -0
- package/dist/plugins/commands/project-evolve.md.enc +6 -0
- package/dist/plugins/commands/review.md.enc +6 -0
- package/dist/plugins/commands/run.md.enc +6 -0
- package/dist/plugins/commands/status.md.enc +6 -0
- package/dist/plugins/commands/sync.md.enc +6 -0
- package/dist/plugins/commands/update.md.enc +6 -0
- package/dist/plugins/env-capabilities/env-core/plugin.json +33 -0
- package/dist/plugins/hooks/README.md.enc +6 -0
- package/dist/plugins/hooks/artifact-evaluation-hook.js +2 -0
- package/dist/plugins/hooks/cognitive-dreamer.js +2 -0
- package/dist/plugins/hooks/cognitive-sync.js +2 -0
- package/dist/plugins/hooks/cognitive-tracker.js +2 -0
- package/dist/plugins/hooks/config/detection-patterns.yaml.enc +6 -0
- package/dist/plugins/hooks/constitution-guard.js +2 -0
- package/dist/plugins/hooks/do-review-separation-guard.js +2 -0
- package/dist/plugins/hooks/environment-autocommit.js +2 -0
- package/dist/plugins/hooks/environment-doctor.js +1 -0
- package/dist/plugins/hooks/environment-startup-scan.js +2 -0
- package/dist/plugins/hooks/execution-tracker.js +2 -0
- package/dist/plugins/hooks/frozen-zone-guard.js +2 -0
- package/dist/plugins/hooks/layer1-validator.js +2 -0
- package/dist/plugins/hooks/lib/artifact-evaluator.js +1 -0
- package/dist/plugins/hooks/lib/assets/asset-decision-store.js +1 -0
- package/dist/plugins/hooks/lib/assets/asset-graph.js +1 -0
- package/dist/plugins/hooks/lib/assets/asset-hydrator.js +1 -0
- package/dist/plugins/hooks/lib/assets/asset-outcome-store.js +1 -0
- package/dist/plugins/hooks/lib/assets/asset-projection-compiler.js +1 -0
- package/dist/plugins/hooks/lib/assets/asset-registry.js +1 -0
- package/dist/plugins/hooks/lib/assets/asset-selector.js +1 -0
- package/dist/plugins/hooks/lib/assets/index.js +1 -0
- package/dist/plugins/hooks/lib/assets/paths.js +1 -0
- package/dist/plugins/hooks/lib/assets/projection-targets/claude-code.js +1 -0
- package/dist/plugins/hooks/lib/assets/projection-targets/codex.js +1 -0
- package/dist/plugins/hooks/lib/auto-fix-loop.js +1 -0
- package/dist/plugins/hooks/lib/benchmarks/change-detector.js +1 -0
- package/dist/plugins/hooks/lib/benchmarks/evaluator.js +1 -0
- package/dist/plugins/hooks/lib/benchmarks/integration-example.js +1 -0
- package/dist/plugins/hooks/lib/cognitive/adaptive-matcher.js +1 -0
- package/dist/plugins/hooks/lib/cognitive/base-store.js +1 -0
- package/dist/plugins/hooks/lib/cognitive/causal-engine.js +1 -0
- package/dist/plugins/hooks/lib/cognitive/cognitive-config.js +1 -0
- package/dist/plugins/hooks/lib/cognitive/cognitive-fingerprint.js +1 -0
- package/dist/plugins/hooks/lib/cognitive/collective-sync.js +1 -0
- package/dist/plugins/hooks/lib/cognitive/confidence-engine.js +1 -0
- package/dist/plugins/hooks/lib/cognitive/dream-engine.js +1 -0
- package/dist/plugins/hooks/lib/cognitive/episodic-store.js +1 -0
- package/dist/plugins/hooks/lib/cognitive/index.js +1 -0
- package/dist/plugins/hooks/lib/cognitive/kernel.js +1 -0
- package/dist/plugins/hooks/lib/cognitive/knowledge-distiller.js +1 -0
- package/dist/plugins/hooks/lib/cognitive/memory-transport.js +1 -0
- package/dist/plugins/hooks/lib/cognitive/persistence-manager.js +1 -0
- package/dist/plugins/hooks/lib/cognitive/priority-store.js +1 -0
- package/dist/plugins/hooks/lib/cognitive/procedural-store.js +1 -0
- package/dist/plugins/hooks/lib/cognitive/semantic-store.js +1 -0
- package/dist/plugins/hooks/lib/cognitive/skill-forge-candidate-loader.js +1 -0
- package/dist/plugins/hooks/lib/cognitive/skill-forge-draft.js +1 -0
- package/dist/plugins/hooks/lib/cognitive/skill-forge-proposal.js +1 -0
- package/dist/plugins/hooks/lib/cognitive/skill-forge-queue.js +1 -0
- package/dist/plugins/hooks/lib/cognitive/skill-tuner.js +1 -0
- package/dist/plugins/hooks/lib/cognitive/wiki-materializer.js +1 -0
- package/dist/plugins/hooks/lib/detection-pattern-loader.js +1 -0
- package/dist/plugins/hooks/lib/directory-discovery.js +1 -0
- package/dist/plugins/hooks/lib/environment-capability-package.js +1 -0
- package/dist/plugins/hooks/lib/environment-capability-probe.js +1 -0
- package/dist/plugins/hooks/lib/environment-config-loader.js +1 -0
- package/dist/plugins/hooks/lib/environment-executor.js +1 -0
- package/dist/plugins/hooks/lib/environment-feedback.js +1 -0
- package/dist/plugins/hooks/lib/environment-health-monitor.js +1 -0
- package/dist/plugins/hooks/lib/environment-knowledge-synthesizer.js +1 -0
- package/dist/plugins/hooks/lib/environment-knowledge-validator.js +1 -0
- package/dist/plugins/hooks/lib/environment-learning-discoverer.js +1 -0
- package/dist/plugins/hooks/lib/environment-learning-engine.js +1 -0
- package/dist/plugins/hooks/lib/environment-module-repository.js +1 -0
- package/dist/plugins/hooks/lib/environment-planner.js +1 -0
- package/dist/plugins/hooks/lib/environment-plugin-registry.js +1 -0
- package/dist/plugins/hooks/lib/environment-readiness.js +1 -0
- package/dist/plugins/hooks/lib/environment-route-ranking.js +1 -0
- package/dist/plugins/hooks/lib/environment-strategy-overlay.js +1 -0
- package/dist/plugins/hooks/lib/execution-path.js +1 -0
- package/dist/plugins/hooks/lib/external-mount-adapter.js +1 -0
- package/dist/plugins/hooks/lib/external-scanner.js +1 -0
- package/dist/plugins/hooks/lib/hook-error-recorder.js +1 -0
- package/dist/plugins/hooks/lib/hook-logger.js +1 -0
- package/dist/plugins/hooks/lib/hook-runner.js +2 -0
- package/dist/plugins/hooks/lib/hook-state-manager.js +1 -0
- package/dist/plugins/hooks/lib/memory-extractor.js +1 -0
- package/dist/plugins/hooks/lib/memory-manager.js +1 -0
- package/dist/plugins/hooks/lib/metrics-analyzer.js +1 -0
- package/dist/plugins/hooks/lib/mount-manager.js +1 -0
- package/dist/plugins/hooks/lib/plugin-activation-registry.js +1 -0
- package/dist/plugins/hooks/lib/plugin-selector.js +1 -0
- package/dist/plugins/hooks/lib/plugin-source-registry.js +1 -0
- package/dist/plugins/hooks/lib/plugin-workspace-registry.js +1 -0
- package/dist/plugins/hooks/lib/project-evolution/auto-fixer.js +1 -0
- package/dist/plugins/hooks/lib/project-evolution/memory-manager.js +1 -0
- package/dist/plugins/hooks/lib/project-evolution/pattern-detector.js +1 -0
- package/dist/plugins/hooks/lib/project-evolution/semantic-indexer.js +1 -0
- package/dist/plugins/hooks/lib/rollback-tracker.js +1 -0
- package/dist/plugins/hooks/lib/source-code-scanner.js +1 -0
- package/dist/plugins/hooks/lib/technology-stack-detector.js +1 -0
- package/dist/plugins/hooks/lib/test-failure-analyzer.js +1 -0
- package/dist/plugins/hooks/lib/test-failure-fixer.js +1 -0
- package/dist/plugins/hooks/lib/trace-context.js +1 -0
- package/dist/plugins/hooks/lib/validation-patterns.js +1 -0
- package/dist/plugins/hooks/memory-sync.js +1 -0
- package/dist/plugins/hooks/pipeline-observer.js +2 -0
- package/dist/plugins/hooks/retry-limit-guard.js +2 -0
- package/dist/plugins/hooks/scope-sentinel.js +2 -0
- package/dist/plugins/hooks/secret-scanner.js +2 -0
- package/dist/plugins/hooks/stop-evolve-prompt.js +1 -0
- package/dist/plugins/hooks/trace-initialization.js +2 -0
- package/dist/plugins/hooks/version-checker.js +2 -0
- package/dist/plugins/memory/templates/code-quality.yaml.enc +6 -0
- package/dist/plugins/memory/templates/multi-system.yaml.enc +6 -0
- package/dist/plugins/memory/templates/team-habits.yaml.enc +6 -0
- package/dist/plugins/memory/templates/testing.yaml.enc +6 -0
- package/dist/plugins/skills/README.md.enc +6 -0
- package/dist/plugins/skills/ab-test-executor/SKILL.md.enc +6 -0
- package/dist/plugins/skills/benchmark-executor/SKILL.md.enc +6 -0
- package/dist/plugins/skills/benchmark-generator/SKILL.md.enc +6 -0
- package/dist/plugins/skills/content-prompts/SKILL.md.enc +6 -0
- package/dist/plugins/skills/delivery-stage/SKILL.md.enc +6 -0
- package/dist/plugins/skills/design-stage/SKILL.md.enc +6 -0
- package/dist/plugins/skills/environment-learning/SKILL.md.enc +6 -0
- package/dist/plugins/skills/environment-resilience/build-failure-doctor.md.enc +6 -0
- package/dist/plugins/skills/environment-resilience/environment-repair.md.enc +6 -0
- package/dist/plugins/skills/environment-resilience/pre-flight-check.md.enc +6 -0
- package/dist/plugins/skills/experiment-evaluator/SKILL.md.enc +6 -0
- package/dist/plugins/skills/exploration-phase/SKILL.md.enc +6 -0
- package/dist/plugins/skills/field-evolve-analyzer/SKILL.md.enc +6 -0
- package/dist/plugins/skills/field-evolve-distiller/SKILL.md.enc +6 -0
- package/dist/plugins/skills/field-evolve-executor/SKILL.md.enc +6 -0
- package/dist/plugins/skills/field-evolve-executor/executor.js +2 -0
- package/dist/plugins/skills/field-evolve-fixer/SKILL.md.enc +6 -0
- package/dist/plugins/skills/field-evolve-learner/SKILL.md.enc +6 -0
- package/dist/plugins/skills/field-evolve-scanner/SKILL.md.enc +6 -0
- package/dist/plugins/skills/field-evolve-scanner/scripts/fallback-scanner.js +2 -0
- package/dist/plugins/skills/field-evolve-verifier/SKILL.md.enc +6 -0
- package/dist/plugins/skills/heartbeat-monitor/SKILL.md.enc +6 -0
- package/dist/plugins/skills/implementation-stage/SKILL.md.enc +6 -0
- package/dist/plugins/skills/layer1-validation/SKILL.md.enc +6 -0
- package/dist/plugins/skills/multi-role-orchestration/SKILL.md.enc +6 -0
- package/dist/plugins/skills/ops-content-marketing/SKILL.md.enc +6 -0
- package/dist/plugins/skills/ops-conversion/SKILL.md.enc +6 -0
- package/dist/plugins/skills/ops-data-driven/SKILL.md.enc +6 -0
- package/dist/plugins/skills/ops-growth-strategies/SKILL.md.enc +6 -0
- package/dist/plugins/skills/ops-private-domain/SKILL.md.enc +6 -0
- package/dist/plugins/skills/ops-social-commerce/SKILL.md.enc +6 -0
- package/dist/plugins/skills/ops-user-growth/SKILL.md.enc +6 -0
- package/dist/plugins/skills/pending-dashboard/SKILL.md.enc +6 -0
- package/dist/plugins/skills/project-evolve-analyzer/SKILL.md.enc +6 -0
- package/dist/plugins/skills/project-evolve-fixer/SKILL.md.enc +6 -0
- package/dist/plugins/skills/project-evolve-generator/SKILL.md.enc +6 -0
- package/dist/plugins/skills/project-evolve-learner/SKILL.md.enc +6 -0
- package/dist/plugins/skills/project-evolve-reviewer/SKILL.md.enc +6 -0
- package/dist/plugins/skills/project-evolve-scanner/SKILL.md.enc +6 -0
- package/dist/plugins/skills/project-evolve-scanner/scripts/dependency-reuse-checker.js +2 -0
- package/dist/plugins/skills/project-evolve-scanner/scripts/subsystem-coverage.js +2 -0
- package/dist/plugins/skills/project-evolve-verifier/SKILL.md.enc +6 -0
- package/dist/plugins/skills/requirement-stage/SKILL.md.enc +6 -0
- package/dist/plugins/skills/secret-scanner/SKILL.md.enc +6 -0
- package/dist/plugins/skills/skill-forge/SKILL.md.enc +6 -0
- package/dist/plugins/skills/skill-forge/references/description-guide.md.enc +6 -0
- package/dist/plugins/skills/skill-forge/references/quality-rubric.md.enc +6 -0
- package/dist/plugins/skills/skill-forge/references/skill-template.md.enc +6 -0
- package/dist/plugins/skills/startup-guard/SKILL.md.enc +6 -0
- package/dist/plugins/skills/tdd-workflow/SKILL.md.enc +6 -0
- package/dist/plugins/skills/testing-stage/SKILL.md.enc +6 -0
- package/dist/plugins/skills/tracking-validator/SKILL.md.enc +6 -0
- package/dist/scripts/build-crypto.js +2 -0
- package/dist/scripts/cli/assets.js +1 -0
- package/dist/scripts/cli/contribute.js +1 -0
- package/dist/scripts/cli/dashboard.js +1 -0
- package/dist/scripts/cli/env.js +1 -0
- package/dist/scripts/cli/global-init.js +1 -0
- package/dist/scripts/cli/global.js +1 -0
- package/dist/scripts/cli/index.js +1 -0
- package/dist/scripts/cli/init.js +1 -0
- package/dist/scripts/cli/list.js +1 -0
- package/dist/scripts/cli/memory.js +1 -0
- package/dist/scripts/cli/org.js +1 -0
- package/dist/scripts/cli/plugin.js +1 -0
- package/dist/scripts/cli/practice-report.js +1 -0
- package/dist/scripts/cli/runtime-governance.js +1 -0
- package/dist/scripts/cli/system.js +1 -0
- package/dist/scripts/cli/update.js +1 -0
- package/dist/scripts/commands/catalog.js +1 -0
- package/dist/scripts/commands/slash-command-docs.js +1 -0
- package/dist/scripts/config/external-mounts.config.js +2 -0
- package/dist/scripts/heartbeat/check.js +2 -0
- package/dist/scripts/heartbeat/setup-cron.js +2 -0
- package/dist/scripts/install/adapters.js +1 -0
- package/dist/scripts/install/constants.js +1 -0
- package/dist/scripts/install/file-reader.js +1 -0
- package/dist/scripts/install/index.js +1 -0
- package/dist/scripts/install/shards/constants-shard.js +1 -0
- package/dist/scripts/install/shards/crypto-config-shard.js +1 -0
- package/dist/scripts/install/shards/error-messages-shard.js +1 -0
- package/dist/scripts/install/shards/reassemble.js +1 -0
- package/dist/scripts/install/shards/utils-shard.js +1 -0
- package/dist/scripts/install/shards/version-info-shard.js +1 -0
- package/dist/scripts/postinstall.js +1 -0
- package/dist/scripts/state.js +1 -0
- package/package.json +22 -21
- package/README.en.md +0 -598
- package/bin/autospec.js +0 -3
- package/knowledge/01-principles/00-principles-hierarchy.md +0 -247
- package/knowledge/01-principles/01-first-principles.md +0 -241
- package/knowledge/01-principles/02-strategic-principles.md +0 -286
- package/knowledge/01-principles/03-tactical-principles.md +0 -385
- package/knowledge/01-principles/04-operational-principles.md +0 -275
- package/knowledge/01-principles/05-domain-principles.md +0 -539
- package/knowledge/01-principles/06-methodology-principles.md +0 -281
- package/knowledge/01-principles/07-cognitive-principles.md +0 -277
- package/knowledge/01-principles/08-auto-fix-principles.md +0 -320
- package/knowledge/01-principles/09-constitution.md +0 -220
- package/knowledge/01-principles/10-evolution-mechanism.md +0 -699
- package/knowledge/01-principles/README.en.md +0 -385
- package/knowledge/01-principles/README.md +0 -385
- package/knowledge/02-process/00-overview.md +0 -404
- package/knowledge/02-process/01-requirement.md +0 -113
- package/knowledge/02-process/02-design.md +0 -123
- package/knowledge/02-process/03-implementation.md +0 -90
- package/knowledge/02-process/04-review.md +0 -80
- package/knowledge/02-process/05-testing.md +0 -90
- package/knowledge/02-process/06-delivery.md +0 -88
- package/knowledge/02-process/README.en.md +0 -143
- package/knowledge/02-process/README.md +0 -186
- package/knowledge/03-guides/00-pipeline-protocol.md +0 -438
- package/knowledge/03-guides/01-team-orchestrator.md +0 -368
- package/knowledge/03-guides/02-analyze-requirement.md +0 -195
- package/knowledge/03-guides/03-design-solution.md +0 -401
- package/knowledge/03-guides/04-implement-code.md +0 -205
- package/knowledge/03-guides/05-plan-testing.md +0 -183
- package/knowledge/03-guides/06-generate-tests.md +0 -241
- package/knowledge/03-guides/07-check-release.md +0 -205
- package/knowledge/03-guides/08-evaluate-ai-effect.md +0 -100
- package/knowledge/03-guides/09-review-requirement.md +0 -83
- package/knowledge/03-guides/10-review-design.md +0 -83
- package/knowledge/03-guides/11-review-code.md +0 -111
- package/knowledge/03-guides/12-review-testing.md +0 -76
- package/knowledge/03-guides/13-audit-security.md +0 -89
- package/knowledge/03-guides/14-check-consistency.md +0 -177
- package/knowledge/03-guides/15-run-unit-tests.md +0 -83
- package/knowledge/03-guides/16-run-integration-tests.md +0 -105
- package/knowledge/03-guides/17-analyze-test-context.md +0 -250
- package/knowledge/03-guides/18-log-practice.md +0 -359
- package/knowledge/03-guides/19-distill-skill.md +0 -91
- package/knowledge/03-guides/20-update-skill.md +0 -45
- package/knowledge/03-guides/21-validate-skill.md +0 -72
- package/knowledge/03-guides/22-extract-methodology.md +0 -55
- package/knowledge/03-guides/23-infer-scope.md +0 -174
- package/knowledge/03-guides/24-assess-complexity.md +0 -270
- package/knowledge/03-guides/25-discover-component.md +0 -183
- package/knowledge/03-guides/26-analyze-tech-stack.md +0 -139
- package/knowledge/03-guides/27-scan-environment.md +0 -207
- package/knowledge/03-guides/28-validate-environment.md +0 -207
- package/knowledge/03-guides/29-generate-knowledge.md +0 -234
- package/knowledge/03-guides/30-analyze-ai-capability.md +0 -193
- package/knowledge/03-guides/31-analyze-ai-component.md +0 -169
- package/knowledge/03-guides/32-analyze-ai-agent.md +0 -362
- package/knowledge/03-guides/33-analyze-ai-rag.md +0 -339
- package/knowledge/03-guides/34-assess-ai-task.md +0 -418
- package/knowledge/03-guides/35-evaluate-ai-pipeline.md +0 -219
- package/knowledge/03-guides/36-evaluate-ai-artifact.md +0 -192
- package/knowledge/03-guides/37-plan-ai-evaluation.md +0 -374
- package/knowledge/03-guides/38-evaluate-ai-path.md +0 -274
- package/knowledge/03-guides/39-validate-ai-data.md +0 -276
- package/knowledge/03-guides/40-detect-ai-anomaly.md +0 -213
- package/knowledge/03-guides/41-diagnose-ai-test.md +0 -133
- package/knowledge/03-guides/42-apply-ddd.md +0 -345
- package/knowledge/03-guides/43-run-ai-sdlc.md +0 -475
- package/knowledge/03-guides/44-manage-knowledge.md +0 -369
- package/knowledge/03-guides/45-test-runner.md +0 -254
- package/knowledge/03-guides/README.en.md +0 -212
- package/knowledge/03-guides/README.md +0 -212
- package/knowledge/04-checklists/00-requirement.md +0 -169
- package/knowledge/04-checklists/01-design.md +0 -196
- package/knowledge/04-checklists/02-code.md +0 -197
- package/knowledge/04-checklists/03-test.md +0 -46
- package/knowledge/04-checklists/04-release.md +0 -70
- package/knowledge/04-checklists/README.en.md +0 -119
- package/knowledge/04-checklists/README.md +0 -123
- package/knowledge/05-config/00-validation-patterns.yaml +0 -137
- package/knowledge/05-config/01-team-stage.yaml +0 -95
- package/knowledge/05-config/02-team-tasks.yaml +0 -139
- package/knowledge/05-config/03-role-composition.yaml +0 -346
- package/knowledge/05-config/04-role-extensions.yaml +0 -140
- package/knowledge/05-config/05-skill-compositions.yaml +0 -142
- package/knowledge/05-config/README.en.md +0 -54
- package/knowledge/05-config/README.md +0 -132
- package/knowledge/06-environment/00-template-registry.md +0 -310
- package/knowledge/06-environment/01-detection-patterns.yaml +0 -1692
- package/knowledge/06-environment/README.en.md +0 -40
- package/knowledge/06-environment/README.md +0 -128
- package/knowledge/07-standards/00-coding-style.md +0 -1059
- package/knowledge/07-standards/01-code-review.md +0 -876
- package/knowledge/07-standards/02-data-consistency.md +0 -1085
- package/knowledge/07-standards/03-document-versioning.md +0 -210
- package/knowledge/07-standards/04-risk-detection.md +0 -186
- package/knowledge/07-standards/README.en.md +0 -119
- package/knowledge/07-standards/README.md +0 -123
- package/knowledge/08-organization/00-vision-mission.md +0 -113
- package/knowledge/08-organization/01-ai-native-culture.md +0 -318
- package/knowledge/08-organization/02-team-metrics.md +0 -228
- package/knowledge/08-organization/03-committee-structure.md +0 -54
- package/knowledge/08-organization/04-governance-metrics.md +0 -55
- package/knowledge/08-organization/05-improvement-process.md +0 -71
- package/knowledge/08-organization/README.en.md +0 -165
- package/knowledge/08-organization/README.md +0 -165
- package/knowledge/09-templates/00-requirement-proposal.md +0 -344
- package/knowledge/09-templates/01-architecture-design.md +0 -494
- package/knowledge/09-templates/02-api-design.md +0 -408
- package/knowledge/09-templates/03-database-design.md +0 -313
- package/knowledge/09-templates/04-product-design.md +0 -237
- package/knowledge/09-templates/05-domain-business.md +0 -388
- package/knowledge/09-templates/06-test-design.md +0 -268
- package/knowledge/09-templates/07-evaluation-design.md +0 -372
- package/knowledge/09-templates/08-component-knowledge.md +0 -272
- package/knowledge/09-templates/09-best-practices.md +0 -218
- package/knowledge/09-templates/10-middleware-knowledge.md +0 -342
- package/knowledge/09-templates/README.en.md +0 -222
- package/knowledge/09-templates/README.md +0 -216
- package/knowledge/README.en.md +0 -372
- package/knowledge/README.md +0 -399
- package/plugins/agents/roles/ai-engineer.md +0 -129
- package/plugins/agents/roles/backend-engineer.md +0 -165
- package/plugins/agents/roles/ceo.md +0 -94
- package/plugins/agents/roles/data-engineer.md +0 -135
- package/plugins/agents/roles/devops-engineer.md +0 -181
- package/plugins/agents/roles/frontend-engineer.md +0 -129
- package/plugins/agents/roles/product-owner.md +0 -98
- package/plugins/agents/roles/quality-engineer.md +0 -129
- package/plugins/agents/roles/security-engineer.md +0 -180
- package/plugins/agents/roles/tech-lead.md +0 -97
- package/plugins/agents/support/blind-comparator.md +0 -88
- package/plugins/agents/support/consistency-checker.md +0 -136
- package/plugins/agents/support/failure-diagnostician.md +0 -141
- package/plugins/agents/support/independent-reviewer.md +0 -80
- package/plugins/agents/support/monitoring-agent.md +0 -215
- package/plugins/agents/support/safety-auditor.md +0 -121
- package/plugins/agents/support/skill-benchmarker.md +0 -86
- package/plugins/agents/support/skill-forger.md +0 -105
- package/plugins/agents/support/stage-gate-evaluator.md +0 -205
- package/plugins/agents/support/test-coverage-reviewer.md +0 -73
- package/plugins/benchmarks/templates/README.md +0 -196
- package/plugins/benchmarks/templates/commands/apply-template.yaml +0 -108
- package/plugins/benchmarks/templates/commands/archive-template.yaml +0 -65
- package/plugins/benchmarks/templates/commands/env-export-template.yaml +0 -64
- package/plugins/benchmarks/templates/commands/env-sync-template.yaml +0 -104
- package/plugins/benchmarks/templates/commands/env-template-template.yaml +0 -96
- package/plugins/benchmarks/templates/commands/env-template.yaml +0 -58
- package/plugins/benchmarks/templates/commands/env-update-template.yaml +0 -110
- package/plugins/benchmarks/templates/commands/env-validate-template.yaml +0 -95
- package/plugins/benchmarks/templates/commands/explore-template.yaml +0 -48
- package/plugins/benchmarks/templates/commands/field-evolve-template.yaml +0 -104
- package/plugins/benchmarks/templates/commands/project-evolve-template.yaml +0 -104
- package/plugins/benchmarks/templates/commands/propose-template.yaml +0 -88
- package/plugins/benchmarks/templates/commands/review-template.yaml +0 -124
- package/plugins/benchmarks/templates/commands/run-template.yaml +0 -127
- package/plugins/benchmarks/templates/commands/test-template.yaml +0 -149
- package/plugins/benchmarks/templates/pipeline/agile-template.yaml +0 -84
- package/plugins/benchmarks/templates/pipeline/experiment-template.yaml +0 -92
- package/plugins/benchmarks/templates/pipeline/hotfix-template.yaml +0 -81
- package/plugins/benchmarks/templates/pipeline/waterfall-template.yaml +0 -106
- package/plugins/benchmarks/templates/skills/agile-iteration-template.yaml +0 -78
- package/plugins/benchmarks/templates/skills/benchmark-executor-template.yaml +0 -114
- package/plugins/benchmarks/templates/skills/benchmark-generator-template.yaml +0 -52
- package/plugins/benchmarks/templates/skills/delivery-stage-template.yaml +0 -130
- package/plugins/benchmarks/templates/skills/design-stage-template.yaml +0 -131
- package/plugins/benchmarks/templates/skills/experiment-iteration-template.yaml +0 -60
- package/plugins/benchmarks/templates/skills/exploration-phase-template.yaml +0 -114
- package/plugins/benchmarks/templates/skills/field-evolve-analyzer-template.yaml +0 -51
- package/plugins/benchmarks/templates/skills/field-evolve-distiller-template.yaml +0 -34
- package/plugins/benchmarks/templates/skills/field-evolve-executor-template.yaml +0 -50
- package/plugins/benchmarks/templates/skills/field-evolve-fixer-template.yaml +0 -52
- package/plugins/benchmarks/templates/skills/field-evolve-learner-template.yaml +0 -33
- package/plugins/benchmarks/templates/skills/field-evolve-scanner-template.yaml +0 -74
- package/plugins/benchmarks/templates/skills/field-evolve-template.yaml +0 -71
- package/plugins/benchmarks/templates/skills/field-evolve-verifier-template.yaml +0 -51
- package/plugins/benchmarks/templates/skills/hotfix-iteration-template.yaml +0 -54
- package/plugins/benchmarks/templates/skills/implementation-stage-template.yaml +0 -127
- package/plugins/benchmarks/templates/skills/layer1-validation-template.yaml +0 -121
- package/plugins/benchmarks/templates/skills/project-evolve-analyzer-template.yaml +0 -51
- package/plugins/benchmarks/templates/skills/project-evolve-fixer-template.yaml +0 -52
- package/plugins/benchmarks/templates/skills/project-evolve-generator-template.yaml +0 -34
- package/plugins/benchmarks/templates/skills/project-evolve-learner-template.yaml +0 -50
- package/plugins/benchmarks/templates/skills/project-evolve-reviewer-template.yaml +0 -50
- package/plugins/benchmarks/templates/skills/project-evolve-scanner-template.yaml +0 -75
- package/plugins/benchmarks/templates/skills/project-evolve-template.yaml +0 -72
- package/plugins/benchmarks/templates/skills/project-evolve-verifier-template.yaml +0 -51
- package/plugins/benchmarks/templates/skills/requirement-analyzer-template.yaml +0 -48
- package/plugins/benchmarks/templates/skills/skill-forge-template.yaml +0 -117
- package/plugins/benchmarks/templates/skills/startup-guard-template.yaml +0 -103
- package/plugins/benchmarks/templates/skills/testing-stage-template.yaml +0 -146
- package/plugins/benchmarks/templates/skills/waterfall-iteration-template.yaml +0 -55
- package/plugins/commands/README.en.md +0 -96
- package/plugins/commands/README.md +0 -96
- package/plugins/commands/apply.md +0 -277
- package/plugins/commands/archive.md +0 -132
- package/plugins/commands/env-export.md +0 -79
- package/plugins/commands/env-sync.md +0 -1281
- package/plugins/commands/env-template.md +0 -99
- package/plugins/commands/env-update.md +0 -264
- package/plugins/commands/env-validate.md +0 -176
- package/plugins/commands/env.md +0 -79
- package/plugins/commands/explore.md +0 -193
- package/plugins/commands/field-evolve.md +0 -412
- package/plugins/commands/memory.md +0 -249
- package/plugins/commands/project-evolve.md +0 -920
- package/plugins/commands/propose.md +0 -184
- package/plugins/commands/review.md +0 -140
- package/plugins/commands/run.md +0 -1052
- package/plugins/commands/status.md +0 -183
- package/plugins/commands/test.md +0 -389
- package/plugins/hooks/README.en.md +0 -56
- package/plugins/hooks/README.md +0 -56
- package/plugins/hooks/ai-project-guard.js +0 -329
- package/plugins/hooks/artifact-evaluation-hook.js +0 -237
- package/plugins/hooks/constitution-guard.js +0 -211
- package/plugins/hooks/environment-autocommit.js +0 -606
- package/plugins/hooks/environment-manager.js +0 -779
- package/plugins/hooks/execution-tracker.js +0 -459
- package/plugins/hooks/frozen-zone-guard.js +0 -140
- package/plugins/hooks/layer1-validator.js +0 -539
- package/plugins/hooks/lib/artifact-evaluator.js +0 -414
- package/plugins/hooks/lib/auto-fix-loop.js +0 -605
- package/plugins/hooks/lib/benchmarks/change-detector.js +0 -390
- package/plugins/hooks/lib/benchmarks/evaluator.js +0 -605
- package/plugins/hooks/lib/benchmarks/integration-example.js +0 -169
- package/plugins/hooks/lib/data-and-ai-detector.js +0 -275
- package/plugins/hooks/lib/detection-pattern-loader.js +0 -865
- package/plugins/hooks/lib/directory-discovery.js +0 -395
- package/plugins/hooks/lib/environment-config-loader.js +0 -345
- package/plugins/hooks/lib/environment-detector.js +0 -553
- package/plugins/hooks/lib/environment-evolver.js +0 -564
- package/plugins/hooks/lib/environment-registry.js +0 -813
- package/plugins/hooks/lib/execution-path.js +0 -427
- package/plugins/hooks/lib/hook-error-recorder.js +0 -245
- package/plugins/hooks/lib/hook-logger.js +0 -538
- package/plugins/hooks/lib/hook-runner.js +0 -97
- package/plugins/hooks/lib/hook-state-manager.js +0 -578
- package/plugins/hooks/lib/memory-extractor.js +0 -399
- package/plugins/hooks/lib/memory-manager.js +0 -673
- package/plugins/hooks/lib/metrics-analyzer.js +0 -489
- package/plugins/hooks/lib/project-evolution/auto-fixer.js +0 -511
- package/plugins/hooks/lib/project-evolution/memory-manager.js +0 -346
- package/plugins/hooks/lib/project-evolution/pattern-detector.js +0 -476
- package/plugins/hooks/lib/project-evolution/semantic-indexer.js +0 -480
- package/plugins/hooks/lib/project-structure-detector.js +0 -326
- package/plugins/hooks/lib/rollback-tracker.js +0 -346
- package/plugins/hooks/lib/source-code-scanner.js +0 -596
- package/plugins/hooks/lib/technology-stack-detector.js +0 -374
- package/plugins/hooks/lib/test-auto-fix.test.js +0 -194
- package/plugins/hooks/lib/test-failure-analyzer.js +0 -375
- package/plugins/hooks/lib/test-failure-fixer.js +0 -268
- package/plugins/hooks/lib/trace-context.js +0 -277
- package/plugins/hooks/lib/validation-patterns.js +0 -415
- package/plugins/hooks/memory-sync.js +0 -171
- package/plugins/hooks/monitoring-trigger.js +0 -467
- package/plugins/hooks/pipeline-observer.js +0 -413
- package/plugins/hooks/scope-sentinel.js +0 -204
- package/plugins/hooks/trace-initialization.js +0 -169
- package/plugins/memory/templates/code-quality.yaml +0 -149
- package/plugins/memory/templates/multi-system.yaml +0 -155
- package/plugins/memory/templates/team-habits.yaml +0 -119
- package/plugins/memory/templates/testing.yaml +0 -121
- package/plugins/skills/README.en.md +0 -59
- package/plugins/skills/README.md +0 -114
- package/plugins/skills/agile-iteration/SKILL.md +0 -187
- package/plugins/skills/benchmark-executor/SKILL.md +0 -647
- package/plugins/skills/benchmark-generator/SKILL.md +0 -349
- package/plugins/skills/delivery-stage/SKILL.md +0 -324
- package/plugins/skills/design-stage/SKILL.md +0 -307
- package/plugins/skills/experiment-evaluator/SKILL.md +0 -271
- package/plugins/skills/experiment-iteration/SKILL.md +0 -154
- package/plugins/skills/exploration-phase/SKILL.md +0 -216
- package/plugins/skills/field-evolve-analyzer/SKILL.md +0 -65
- package/plugins/skills/field-evolve-distiller/SKILL.md +0 -66
- package/plugins/skills/field-evolve-executor/SKILL.md +0 -94
- package/plugins/skills/field-evolve-executor/executor.js +0 -342
- package/plugins/skills/field-evolve-fixer/SKILL.md +0 -69
- package/plugins/skills/field-evolve-learner/SKILL.md +0 -65
- package/plugins/skills/field-evolve-scanner/SKILL.md +0 -87
- package/plugins/skills/field-evolve-scanner/scripts/fallback-scanner.js +0 -288
- package/plugins/skills/field-evolve-verifier/SKILL.md +0 -64
- package/plugins/skills/hotfix-iteration/SKILL.md +0 -279
- package/plugins/skills/implementation-stage/SKILL.md +0 -320
- package/plugins/skills/layer1-validation/SKILL.md +0 -79
- package/plugins/skills/pending-dashboard/SKILL.md +0 -110
- package/plugins/skills/project-evolve-analyzer/SKILL.md +0 -95
- package/plugins/skills/project-evolve-fixer/SKILL.md +0 -99
- package/plugins/skills/project-evolve-generator/SKILL.md +0 -149
- package/plugins/skills/project-evolve-learner/SKILL.md +0 -103
- package/plugins/skills/project-evolve-reviewer/SKILL.md +0 -104
- package/plugins/skills/project-evolve-scanner/SKILL.md +0 -95
- package/plugins/skills/project-evolve-scanner/scripts/dependency-reuse-checker.js +0 -395
- package/plugins/skills/project-evolve-scanner/scripts/subsystem-coverage.js +0 -315
- package/plugins/skills/project-evolve-verifier/SKILL.md +0 -105
- package/plugins/skills/requirement-stage/SKILL.md +0 -217
- package/plugins/skills/skill-forge/SKILL.md +0 -223
- package/plugins/skills/skill-forge/references/description-guide.md +0 -92
- package/plugins/skills/skill-forge/references/quality-rubric.md +0 -104
- package/plugins/skills/skill-forge/references/skill-template.md +0 -106
- package/plugins/skills/startup-guard/SKILL.md +0 -38
- package/plugins/skills/testing-stage/SKILL.md +0 -770
- package/plugins/skills/waterfall-iteration/SKILL.md +0 -115
- package/scripts/cli/global-init.js +0 -288
- package/scripts/cli/global.js +0 -324
- package/scripts/cli/index.js +0 -55
- package/scripts/cli/init.js +0 -408
- package/scripts/cli/list.js +0 -70
- package/scripts/cli/org.js +0 -340
- package/scripts/cli/update.js +0 -44
- package/scripts/config/commands.config.js +0 -145
- package/scripts/config/hooks.config.js +0 -197
- package/scripts/install/agents.js +0 -106
- package/scripts/install/commands.js +0 -133
- package/scripts/install/constants.js +0 -463
- package/scripts/install/hook-logger.js +0 -536
- package/scripts/install/hooks.js +0 -110
- package/scripts/install/index.js +0 -39
- package/scripts/install/skills.js +0 -95
- package/scripts/postinstall.js +0 -25
- package/scripts/state.js +0 -585
- /package/{plugins → dist/plugins}/hooks/lib/hook-runner.sh +0 -0
|
@@ -1,192 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: ai-artifact-evaluator
|
|
3
|
-
description: AI 评估产出物质量,生成结构化评分和改进建议
|
|
4
|
-
type: review
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
## 定位
|
|
8
|
-
|
|
9
|
-
对各类产出物进行深度质量评估,补充自动化检查无法覆盖的质量判断。
|
|
10
|
-
|
|
11
|
-
## 与现有技能的关系
|
|
12
|
-
|
|
13
|
-
- **复用**:consistency-checker.md 的六维检测能力
|
|
14
|
-
- **增强**:新增结构化评分输出
|
|
15
|
-
- **新增**:下游反馈追踪机制
|
|
16
|
-
|
|
17
|
-
## 输入
|
|
18
|
-
|
|
19
|
-
- 必须输入:
|
|
20
|
-
- artifactPath: 产出物路径
|
|
21
|
-
- artifactType: 产出物类型 (REQUIREMENT | DESIGN | CODE | REVIEW | TEST)
|
|
22
|
-
- 可选输入:
|
|
23
|
-
- context: 相关上下文(上游产物、历史案例)
|
|
24
|
-
- evalConfig: 评测配置
|
|
25
|
-
|
|
26
|
-
## 评估维度
|
|
27
|
-
|
|
28
|
-
### 需求文档 (REQUIREMENT)
|
|
29
|
-
|
|
30
|
-
| 维度 | 权重 | 评估要点 |
|
|
31
|
-
|-----|------|---------|
|
|
32
|
-
| 清晰度 | 30% | 需求描述是否明确无歧义 |
|
|
33
|
-
| 完整性 | 25% | 是否覆盖所有业务场景 |
|
|
34
|
-
| 可测试性 | 25% | 验收标准是否可转化为测试 |
|
|
35
|
-
| 边界覆盖 | 20% | 是否考虑异常场景 |
|
|
36
|
-
|
|
37
|
-
### 设计文档 (DESIGN)
|
|
38
|
-
|
|
39
|
-
| 维度 | 权重 | 评估要点 |
|
|
40
|
-
|-----|------|---------|
|
|
41
|
-
| 合理性 | 30% | 架构设计是否合理 |
|
|
42
|
-
| 覆盖度 | 25% | 是否覆盖所有需求点 |
|
|
43
|
-
| 可实现性 | 25% | 技术方案是否可行 |
|
|
44
|
-
| 接口完整性 | 20% | API 定义是否完整 |
|
|
45
|
-
|
|
46
|
-
### 代码审查报告 (REVIEW)
|
|
47
|
-
|
|
48
|
-
| 维度 | 权重 | 评估要点 |
|
|
49
|
-
|-----|------|---------|
|
|
50
|
-
| 问题真实性 | 35% | 发现的问题是否真实存在 |
|
|
51
|
-
| 建议可执行性 | 30% | 修复建议是否具体可行 |
|
|
52
|
-
| 覆盖全面性 | 20% | 是否覆盖关键模块 |
|
|
53
|
-
| 深度 | 15% | 分析是否深入 |
|
|
54
|
-
|
|
55
|
-
## 输出格式
|
|
56
|
-
|
|
57
|
-
```json
|
|
58
|
-
{
|
|
59
|
-
"score": 85,
|
|
60
|
-
"grade": "B",
|
|
61
|
-
"dimensions": {
|
|
62
|
-
"clarity": {
|
|
63
|
-
"score": 90,
|
|
64
|
-
"issues": [],
|
|
65
|
-
"strengths": ["需求描述清晰", "验收标准明确"]
|
|
66
|
-
},
|
|
67
|
-
"completeness": {
|
|
68
|
-
"score": 80,
|
|
69
|
-
"issues": ["缺少异常场景描述"],
|
|
70
|
-
"strengths": ["覆盖主要业务流程"]
|
|
71
|
-
}
|
|
72
|
-
},
|
|
73
|
-
"improvements": [
|
|
74
|
-
{
|
|
75
|
-
"priority": "high",
|
|
76
|
-
"area": "completeness",
|
|
77
|
-
"suggestion": "补充异常场景处理:用户取消操作、网络超时等",
|
|
78
|
-
"example": "建议添加:当用户取消支付时,系统应..."
|
|
79
|
-
}
|
|
80
|
-
],
|
|
81
|
-
"comparisonWithHistory": {
|
|
82
|
-
"betterThan": 0.75,
|
|
83
|
-
"comment": "质量优于 75% 的历史同类产出物"
|
|
84
|
-
},
|
|
85
|
-
"confidence": 0.85
|
|
86
|
-
}
|
|
87
|
-
```
|
|
88
|
-
|
|
89
|
-
## 执行步骤
|
|
90
|
-
|
|
91
|
-
### Step 1: 读取产出物
|
|
92
|
-
|
|
93
|
-
```
|
|
94
|
-
1. 读取指定路径的产出物内容
|
|
95
|
-
2. 识别产出物类型
|
|
96
|
-
3. 加载对应的评估维度配置
|
|
97
|
-
```
|
|
98
|
-
|
|
99
|
-
### Step 2: 读取上下文
|
|
100
|
-
|
|
101
|
-
```
|
|
102
|
-
1. 如果是设计文档,读取对应的需求文档作为上下文
|
|
103
|
-
2. 如果是审查报告,读取被审查的代码/文档
|
|
104
|
-
3. 加载历史优秀案例(如有)
|
|
105
|
-
```
|
|
106
|
-
|
|
107
|
-
### Step 3: 分维度评估
|
|
108
|
-
|
|
109
|
-
```
|
|
110
|
-
对每个评估维度:
|
|
111
|
-
1. 分析产出物在该维度的表现
|
|
112
|
-
2. 识别具体问题和优点
|
|
113
|
-
3. 给出 0-100 的评分
|
|
114
|
-
4. 记录评估依据
|
|
115
|
-
```
|
|
116
|
-
|
|
117
|
-
### Step 4: 计算综合评分
|
|
118
|
-
|
|
119
|
-
```
|
|
120
|
-
1. 按权重计算各维度得分
|
|
121
|
-
2. 汇总为综合评分
|
|
122
|
-
3. 确定等级 (A/B/C/D/F)
|
|
123
|
-
```
|
|
124
|
-
|
|
125
|
-
### Step 5: 生成改进建议
|
|
126
|
-
|
|
127
|
-
```
|
|
128
|
-
1. 按优先级排序改进建议
|
|
129
|
-
2. 每个建议包含:
|
|
130
|
-
- 优先级
|
|
131
|
-
- 改进领域
|
|
132
|
-
- 具体建议
|
|
133
|
-
- 示例(如适用)
|
|
134
|
-
```
|
|
135
|
-
|
|
136
|
-
### Step 6: 输出结果
|
|
137
|
-
|
|
138
|
-
```
|
|
139
|
-
1. 输出结构化 JSON 结果
|
|
140
|
-
2. 将结果写入 metrics.json
|
|
141
|
-
```
|
|
142
|
-
|
|
143
|
-
## 反模式清单 (DP7)
|
|
144
|
-
|
|
145
|
-
1. **无依据评分**:没有具体分析就给出分数
|
|
146
|
-
- 检测:每个评分必须有 issues 或 strengths 支撑
|
|
147
|
-
|
|
148
|
-
2. **过于宽松**:所有评分都很高,没有改进空间
|
|
149
|
-
- 检测:必须有至少一个改进建议
|
|
150
|
-
|
|
151
|
-
3. **建议不可执行**:建议过于笼统
|
|
152
|
-
- 检测:每个建议必须有具体的改进方向
|
|
153
|
-
|
|
154
|
-
4. **忽略上下文**:评估设计时不考虑需求
|
|
155
|
-
- 检测:必须引用上游产物进行覆盖度分析
|
|
156
|
-
|
|
157
|
-
## 采样策略
|
|
158
|
-
|
|
159
|
-
| 场景 | 执行策略 |
|
|
160
|
-
|-----|---------|
|
|
161
|
-
| 首次生成 | 必评(建立基线) |
|
|
162
|
-
| 后续生成 | 30% 采样 |
|
|
163
|
-
| 低分产物 (<70) | 100% 重评 |
|
|
164
|
-
| 人工标记 | 100% 评估 |
|
|
165
|
-
|
|
166
|
-
## 与 metrics.json 的集成
|
|
167
|
-
|
|
168
|
-
评估完成后,更新 metrics.json:
|
|
169
|
-
|
|
170
|
-
```json
|
|
171
|
-
{
|
|
172
|
-
"artifacts": {
|
|
173
|
-
"requirement.md": {
|
|
174
|
-
"evaluations": {
|
|
175
|
-
"quality": {
|
|
176
|
-
"score": 85,
|
|
177
|
-
"grade": "B",
|
|
178
|
-
"dimensions": {...},
|
|
179
|
-
"improvements": [...],
|
|
180
|
-
"evaluatedAt": "2026-03-24T10:00:00Z"
|
|
181
|
-
}
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
}
|
|
186
|
-
```
|
|
187
|
-
|
|
188
|
-
## 适用场景
|
|
189
|
-
|
|
190
|
-
- 产出物生成后自动触发
|
|
191
|
-
- 手动执行 `/autospec:evaluate`
|
|
192
|
-
- 低分产出物重评
|
|
@@ -1,374 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: evaluation-planner
|
|
3
|
-
description: 当项目包含需要效果评测的组件(模型训练、Agent、RAG等)时,规划评测方案。包括评测维度、数据集、指标和流程设计。
|
|
4
|
-
type: ai
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
## 定位
|
|
8
|
-
|
|
9
|
-
AI专用技能。为需要效果评测的AI组件规划评测方案,包括评测维度、数据集构建、指标选择和流程设计。
|
|
10
|
-
|
|
11
|
-
## 输入
|
|
12
|
-
|
|
13
|
-
- 必须输入:项目结构分析结果、已检测的AI组件
|
|
14
|
-
- 可选输入:Agent分析结果、RAG分析结果
|
|
15
|
-
|
|
16
|
-
## 输出
|
|
17
|
-
|
|
18
|
-
```json
|
|
19
|
-
{
|
|
20
|
-
"evaluationScope": {
|
|
21
|
-
"components": ["agent-framework", "rag-application"],
|
|
22
|
-
"priority": "high",
|
|
23
|
-
"reason": "Agent和RAG直接面向用户,效果影响用户体验"
|
|
24
|
-
},
|
|
25
|
-
"evaluationPlan": [
|
|
26
|
-
{
|
|
27
|
-
"component": "ResearchAgent",
|
|
28
|
-
"type": "agent",
|
|
29
|
-
"dimensions": [
|
|
30
|
-
{
|
|
31
|
-
"name": "任务完成率",
|
|
32
|
-
"description": "Agent是否正确完成指定任务",
|
|
33
|
-
"metrics": ["success_rate", "error_rate"],
|
|
34
|
-
"method": "人工评估或自动验证"
|
|
35
|
-
},
|
|
36
|
-
{
|
|
37
|
-
"name": "工具使用正确性",
|
|
38
|
-
"description": "Agent是否正确选择和使用工具",
|
|
39
|
-
"metrics": ["tool_selection_accuracy", "tool_call_success_rate"],
|
|
40
|
-
"method": "日志分析"
|
|
41
|
-
},
|
|
42
|
-
{
|
|
43
|
-
"name": "响应质量",
|
|
44
|
-
"description": "Agent输出内容的质量",
|
|
45
|
-
"metrics": ["relevance_score", "helpfulness_score"],
|
|
46
|
-
"method": "LLM-as-Judge或人工评估"
|
|
47
|
-
}
|
|
48
|
-
],
|
|
49
|
-
"dataset": {
|
|
50
|
-
"type": "synthetic",
|
|
51
|
-
"size": 100,
|
|
52
|
-
"generationMethod": "基于真实场景生成测试用例"
|
|
53
|
-
},
|
|
54
|
-
"process": {
|
|
55
|
-
"steps": [
|
|
56
|
-
"1. 准备测试用例数据集",
|
|
57
|
-
"2. 运行Agent执行任务",
|
|
58
|
-
"3. 收集执行日志和输出",
|
|
59
|
-
"4. 自动计算可量化指标",
|
|
60
|
-
"5. LLM-as-Judge评估响应质量",
|
|
61
|
-
"6. 人工抽检验证"
|
|
62
|
-
]
|
|
63
|
-
}
|
|
64
|
-
},
|
|
65
|
-
{
|
|
66
|
-
"component": "RAG系统",
|
|
67
|
-
"type": "rag",
|
|
68
|
-
"dimensions": [
|
|
69
|
-
{
|
|
70
|
-
"name": "检索准确率",
|
|
71
|
-
"description": "检索到的文档是否相关",
|
|
72
|
-
"metrics": ["precision@k", "recall@k", "mrr"],
|
|
73
|
-
"method": "标注数据集评估"
|
|
74
|
-
},
|
|
75
|
-
{
|
|
76
|
-
"name": "回答相关性",
|
|
77
|
-
"description": "生成回答是否回答了问题",
|
|
78
|
-
"metrics": ["relevance_score", "faithfulness_score"],
|
|
79
|
-
"method": "LLM-as-Judge"
|
|
80
|
-
},
|
|
81
|
-
{
|
|
82
|
-
"name": "幻觉率",
|
|
83
|
-
"description": "回答是否包含虚假信息",
|
|
84
|
-
"metrics": ["hallucination_rate", "groundedness_score"],
|
|
85
|
-
"method": "事实核查"
|
|
86
|
-
}
|
|
87
|
-
],
|
|
88
|
-
"dataset": {
|
|
89
|
-
"type": "curated",
|
|
90
|
-
"size": 50,
|
|
91
|
-
"generationMethod": "人工构建问答对"
|
|
92
|
-
},
|
|
93
|
-
"process": {
|
|
94
|
-
"steps": [
|
|
95
|
-
"1. 构建评测问答数据集",
|
|
96
|
-
"2. 运行RAG系统生成回答",
|
|
97
|
-
"3. 计算检索指标",
|
|
98
|
-
"4. LLM评估回答质量",
|
|
99
|
-
"5. 人工抽检幻觉问题"
|
|
100
|
-
]
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
],
|
|
104
|
-
"tools": {
|
|
105
|
-
"suggested": ["ragas", "deepeval", "arize-phoenix"],
|
|
106
|
-
"reason": "这些工具支持RAG和Agent评测,与LangChain集成良好"
|
|
107
|
-
},
|
|
108
|
-
"timeline": {
|
|
109
|
-
"estimated": "2-3天",
|
|
110
|
-
"breakdown": {
|
|
111
|
-
"dataset_preparation": "0.5天",
|
|
112
|
-
"evaluation_implementation": "1天",
|
|
113
|
-
"execution_and_analysis": "0.5-1天"
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
```
|
|
118
|
-
|
|
119
|
-
## 执行步骤
|
|
120
|
-
|
|
121
|
-
### Step 1: 确定评测范围(确定性)
|
|
122
|
-
|
|
123
|
-
基于检测结果确定需要评测的组件:
|
|
124
|
-
|
|
125
|
-
```
|
|
126
|
-
评测触发条件:
|
|
127
|
-
- needsEvaluation = true 的组件
|
|
128
|
-
- 组件类型:model-training, inference-service, llm-application, agent-framework, rag-application
|
|
129
|
-
```
|
|
130
|
-
|
|
131
|
-
### Step 2: 分析组件特性(模型)
|
|
132
|
-
|
|
133
|
-
分析每个组件的评测需求:
|
|
134
|
-
|
|
135
|
-
```
|
|
136
|
-
模型输入:
|
|
137
|
-
{
|
|
138
|
-
"components": [
|
|
139
|
-
{"type": "agent-framework", "name": "ResearchAgent", "tools": ["web_search", "doc_reader"]},
|
|
140
|
-
{"type": "rag-application", "vectorStore": "ChromaDB", "retriever": "similarity"}
|
|
141
|
-
],
|
|
142
|
-
"task": "为每个组件确定评测维度和指标"
|
|
143
|
-
}
|
|
144
|
-
```
|
|
145
|
-
|
|
146
|
-
### Step 3: 设计评测维度(模型)
|
|
147
|
-
|
|
148
|
-
基于组件类型设计评测维度:
|
|
149
|
-
|
|
150
|
-
```
|
|
151
|
-
Agent评测维度:
|
|
152
|
-
- 任务完成率:是否完成指定任务
|
|
153
|
-
- 工具使用:是否正确选择和使用工具
|
|
154
|
-
- 推理能力:决策逻辑是否合理
|
|
155
|
-
- 响应质量:输出是否有帮助
|
|
156
|
-
|
|
157
|
-
RAG评测维度:
|
|
158
|
-
- 检索质量:召回率、精确率、MRR
|
|
159
|
-
- 生成质量:相关性、准确性、流畅性
|
|
160
|
-
- 上下文利用:是否有效使用检索内容
|
|
161
|
-
- 幻觉检测:是否存在虚假信息
|
|
162
|
-
|
|
163
|
-
模型评测维度:
|
|
164
|
-
- 准确性:Accuracy、F1、AUC
|
|
165
|
-
- 性能:延迟、吞吐量
|
|
166
|
-
- 鲁棒性:边界情况表现
|
|
167
|
-
- 公平性:不同群体表现差异
|
|
168
|
-
```
|
|
169
|
-
|
|
170
|
-
### Step 4: 规划数据集(模型)
|
|
171
|
-
|
|
172
|
-
设计评测数据集:
|
|
173
|
-
|
|
174
|
-
```
|
|
175
|
-
数据集类型:
|
|
176
|
-
- synthetic: 合成数据(LLM生成)
|
|
177
|
-
- curated: 人工构建
|
|
178
|
-
- production: 生产数据采样
|
|
179
|
-
- benchmark: 公开基准数据集
|
|
180
|
-
|
|
181
|
-
考虑因素:
|
|
182
|
-
- 数据量:平衡成本和统计显著性
|
|
183
|
-
- 覆盖度:覆盖主要使用场景
|
|
184
|
-
- 多样性:包含边界情况
|
|
185
|
-
```
|
|
186
|
-
|
|
187
|
-
### Step 5: 选择工具(确定性 + 模型)
|
|
188
|
-
|
|
189
|
-
推荐评测工具:
|
|
190
|
-
|
|
191
|
-
```
|
|
192
|
-
确定性规则:
|
|
193
|
-
- RAG评测 → ragas, deepeval, trulens
|
|
194
|
-
- Agent评测 → langsmith, arize-phoenix
|
|
195
|
-
- 模型评测 → mlflow, wandb, evaluate
|
|
196
|
-
|
|
197
|
-
模型判断:
|
|
198
|
-
- 根据项目技术栈选择兼容工具
|
|
199
|
-
- 根据评测维度选择支持工具
|
|
200
|
-
```
|
|
201
|
-
|
|
202
|
-
### Step 6: 输出结果
|
|
203
|
-
|
|
204
|
-
汇总评测方案,包括维度、数据集、工具和时间估算。
|
|
205
|
-
|
|
206
|
-
## 评测设计
|
|
207
|
-
|
|
208
|
-
根据业界最佳实践:
|
|
209
|
-
|
|
210
|
-
### 评测结构
|
|
211
|
-
|
|
212
|
-
**评估(eval)** = 给AI一个输入 + 应用评分逻辑到输出测量成功
|
|
213
|
-
|
|
214
|
-
### 单轮 vs 多轮评测
|
|
215
|
-
|
|
216
|
-
| 类型 | 说明 | 适用场景 |
|
|
217
|
-
|------|------|----------|
|
|
218
|
-
| **单轮评测** | 提示 → 响应 → 评分 | 简单任务、LLM非Agent场景 |
|
|
219
|
-
| **多轮评测** | 多步交互、工具调用、状态修改 | Agent、复杂任务 |
|
|
220
|
-
|
|
221
|
-
### Agent评测最佳实践
|
|
222
|
-
|
|
223
|
-
1. **匹配系统复杂度**
|
|
224
|
-
- 简单Agent:单轮评测
|
|
225
|
-
- 复杂Agent:多轮评测 + 工具调用验证
|
|
226
|
-
|
|
227
|
-
2. **评测维度**
|
|
228
|
-
- 任务完成率(自动验证)
|
|
229
|
-
- 工具使用正确性(日志分析)
|
|
230
|
-
- 决策质量(LLM评估)
|
|
231
|
-
- 效率(延迟、token消耗)
|
|
232
|
-
|
|
233
|
-
3. **评分逻辑设计**
|
|
234
|
-
- 精确匹配:用于有明确答案的任务
|
|
235
|
-
- LLM-as-Judge:用于开放式任务
|
|
236
|
-
- 规则引擎:用于结构化输出
|
|
237
|
-
|
|
238
|
-
### AI-Resistant评估设计
|
|
239
|
-
|
|
240
|
-
根据AI-Resistant评估设计原则:
|
|
241
|
-
|
|
242
|
-
1. **防止数据泄露**
|
|
243
|
-
- 使用未见过的测试用例
|
|
244
|
-
- 动态生成评估数据
|
|
245
|
-
- 分离训练和评估数据
|
|
246
|
-
|
|
247
|
-
2. **防止提示注入**
|
|
248
|
-
- 评估输入多样化
|
|
249
|
-
- 边界情况测试
|
|
250
|
-
|
|
251
|
-
3. **真实能力测试**
|
|
252
|
-
- 开放式任务评估
|
|
253
|
-
- 多步骤推理测试
|
|
254
|
-
- 实际场景模拟
|
|
255
|
-
|
|
256
|
-
### 基础设施噪声控制
|
|
257
|
-
|
|
258
|
-
根据基础设施噪声控制原则:
|
|
259
|
-
|
|
260
|
-
1. **识别噪声来源**
|
|
261
|
-
- 环境差异(操作系统、依赖版本)
|
|
262
|
-
- 并发干扰
|
|
263
|
-
- 网络延迟
|
|
264
|
-
|
|
265
|
-
2. **控制方法**
|
|
266
|
-
- 隔离测试环境
|
|
267
|
-
- 多次运行取中位数
|
|
268
|
-
- 记录和排除异常值
|
|
269
|
-
|
|
270
|
-
## 评测维度模板
|
|
271
|
-
|
|
272
|
-
### Agent评测
|
|
273
|
-
|
|
274
|
-
| 维度 | 指标 | 方法 |
|
|
275
|
-
|------|------|------|
|
|
276
|
-
| 任务完成率 | success_rate, error_rate | 自动验证 |
|
|
277
|
-
| 工具使用 | tool_accuracy, call_success | 日志分析 |
|
|
278
|
-
| 推理质量 | reasoning_score | LLM评估 |
|
|
279
|
-
| 响应质量 | relevance, helpfulness | LLM/人工评估 |
|
|
280
|
-
| 效率 | latency, token_usage | 自动统计 |
|
|
281
|
-
| **决策透明度** | decision_traceability | 审计日志 |
|
|
282
|
-
| **错误恢复** | error_recovery_rate | 故障注入测试 |
|
|
283
|
-
|
|
284
|
-
### RAG评测
|
|
285
|
-
|
|
286
|
-
| 维度 | 指标 | 方法 |
|
|
287
|
-
|------|------|------|
|
|
288
|
-
| 检索质量 | precision@k, recall@k, MRR | 标注数据 |
|
|
289
|
-
| 上下文相关性 | context_relevance | LLM评估 |
|
|
290
|
-
| 忠实度 | faithfulness, groundedness | LLM评估 |
|
|
291
|
-
| 回答相关性 | answer_relevance | LLM评估 |
|
|
292
|
-
| 幻觉率 | hallucination_rate | 事实核查 |
|
|
293
|
-
| **检索延迟** | retrieval_latency | 自动统计 |
|
|
294
|
-
|
|
295
|
-
### 模型评测
|
|
296
|
-
|
|
297
|
-
| 维度 | 指标 | 方法 |
|
|
298
|
-
|------|------|------|
|
|
299
|
-
| 准确性 | Accuracy, F1, AUC, BLEU, ROUGE | 自动计算 |
|
|
300
|
-
| 性能 | Latency, Throughput | 压测 |
|
|
301
|
-
| 鲁棒性 | Edge case accuracy | 边界测试 |
|
|
302
|
-
| 公平性 | Demographic parity | 分组统计 |
|
|
303
|
-
| **AI-Resistance** | unseen_test_performance | 未知数据测试 |
|
|
304
|
-
|
|
305
|
-
## 调用时机
|
|
306
|
-
|
|
307
|
-
- 检测到需要评测的AI组件时
|
|
308
|
-
- 设计阶段规划评测方案
|
|
309
|
-
- 交付前确认评测覆盖
|
|
310
|
-
|
|
311
|
-
## 示例
|
|
312
|
-
|
|
313
|
-
**输入**:
|
|
314
|
-
```json
|
|
315
|
-
{
|
|
316
|
-
"components": [
|
|
317
|
-
{"name": "ResearchAgent", "type": "agent-framework", "needsEvaluation": true},
|
|
318
|
-
{"name": "RAG系统", "type": "rag-application", "needsEvaluation": true}
|
|
319
|
-
],
|
|
320
|
-
"techStack": ["langchain", "openai", "chromadb"]
|
|
321
|
-
}
|
|
322
|
-
```
|
|
323
|
-
|
|
324
|
-
**输出**:
|
|
325
|
-
```json
|
|
326
|
-
{
|
|
327
|
-
"evaluationScope": {
|
|
328
|
-
"components": ["ResearchAgent", "RAG系统"],
|
|
329
|
-
"priority": "high"
|
|
330
|
-
},
|
|
331
|
-
"evaluationPlan": [
|
|
332
|
-
{
|
|
333
|
-
"component": "ResearchAgent",
|
|
334
|
-
"type": "agent",
|
|
335
|
-
"dimensions": [
|
|
336
|
-
{"name": "任务完成率", "metrics": ["success_rate"], "method": "自动验证"},
|
|
337
|
-
{"name": "工具使用正确性", "metrics": ["tool_accuracy"], "method": "日志分析"},
|
|
338
|
-
{"name": "响应质量", "metrics": ["relevance_score"], "method": "LLM-as-Judge"}
|
|
339
|
-
],
|
|
340
|
-
"dataset": {
|
|
341
|
-
"type": "synthetic",
|
|
342
|
-
"size": 50,
|
|
343
|
-
"generationMethod": "生成研究任务测试用例"
|
|
344
|
-
}
|
|
345
|
-
},
|
|
346
|
-
{
|
|
347
|
-
"component": "RAG系统",
|
|
348
|
-
"type": "rag",
|
|
349
|
-
"dimensions": [
|
|
350
|
-
{"name": "检索准确率", "metrics": ["recall@4", "mrr"], "method": "标注评估"},
|
|
351
|
-
{"name": "回答相关性", "metrics": ["relevance"], "method": "LLM-as-Judge"},
|
|
352
|
-
{"name": "幻觉率", "metrics": ["hallucination_rate"], "method": "事实核查"}
|
|
353
|
-
],
|
|
354
|
-
"dataset": {
|
|
355
|
-
"type": "curated",
|
|
356
|
-
"size": 30,
|
|
357
|
-
"generationMethod": "人工构建问答对"
|
|
358
|
-
}
|
|
359
|
-
}
|
|
360
|
-
],
|
|
361
|
-
"tools": {
|
|
362
|
-
"suggested": ["ragas", "langsmith"],
|
|
363
|
-
"reason": "与LangChain集成良好,支持Agent和RAG评测"
|
|
364
|
-
},
|
|
365
|
-
"timeline": {
|
|
366
|
-
"estimated": "2天",
|
|
367
|
-
"breakdown": {
|
|
368
|
-
"dataset_preparation": "0.5天",
|
|
369
|
-
"evaluation_implementation": "1天",
|
|
370
|
-
"execution_and_analysis": "0.5天"
|
|
371
|
-
}
|
|
372
|
-
}
|
|
373
|
-
}
|
|
374
|
-
```
|