aiox-core 5.0.3 → 5.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.aiox-core/core/execution/predictive-pipeline.js +1283 -0
- package/.aiox-core/core/memory/decision-memory.js +564 -0
- package/.aiox-core/data/entity-registry.yaml +1068 -1028
- package/.aiox-core/data/registry-update-log.jsonl +2 -2
- package/.aiox-core/development/templates/service-template/README.md.hbs +158 -158
- package/.aiox-core/development/templates/service-template/__tests__/index.test.ts.hbs +237 -237
- package/.aiox-core/development/templates/service-template/client.ts.hbs +403 -403
- package/.aiox-core/development/templates/service-template/errors.ts.hbs +182 -182
- package/.aiox-core/development/templates/service-template/index.ts.hbs +120 -120
- package/.aiox-core/development/templates/service-template/package.json.hbs +87 -87
- package/.aiox-core/development/templates/service-template/types.ts.hbs +145 -145
- package/.aiox-core/development/templates/squad-template/LICENSE +21 -21
- package/.aiox-core/infrastructure/templates/aiox-sync.yaml.template +182 -182
- package/.aiox-core/infrastructure/templates/coderabbit.yaml.template +279 -279
- package/.aiox-core/infrastructure/templates/github-workflows/ci.yml.template +169 -169
- package/.aiox-core/infrastructure/templates/github-workflows/pr-automation.yml.template +330 -330
- package/.aiox-core/infrastructure/templates/github-workflows/release.yml.template +196 -196
- package/.aiox-core/infrastructure/templates/gitignore/gitignore-aiox-base.tmpl +63 -63
- package/.aiox-core/infrastructure/templates/gitignore/gitignore-brownfield-merge.tmpl +18 -18
- package/.aiox-core/infrastructure/templates/gitignore/gitignore-node.tmpl +85 -85
- package/.aiox-core/infrastructure/templates/gitignore/gitignore-python.tmpl +145 -145
- package/.aiox-core/install-manifest.yaml +63 -55
- package/.aiox-core/local-config.yaml.template +71 -71
- package/.aiox-core/monitor/hooks/lib/__init__.py +1 -1
- package/.aiox-core/monitor/hooks/lib/enrich.py +58 -58
- package/.aiox-core/monitor/hooks/lib/send_event.py +47 -47
- package/.aiox-core/monitor/hooks/notification.py +29 -29
- package/.aiox-core/monitor/hooks/post_tool_use.py +45 -45
- package/.aiox-core/monitor/hooks/pre_compact.py +29 -29
- package/.aiox-core/monitor/hooks/pre_tool_use.py +40 -40
- package/.aiox-core/monitor/hooks/stop.py +29 -29
- package/.aiox-core/monitor/hooks/subagent_stop.py +29 -29
- package/.aiox-core/monitor/hooks/user_prompt_submit.py +38 -38
- package/.aiox-core/product/templates/adr.hbs +125 -125
- package/.aiox-core/product/templates/dbdr.hbs +241 -241
- package/.aiox-core/product/templates/epic.hbs +212 -212
- package/.aiox-core/product/templates/pmdr.hbs +186 -186
- package/.aiox-core/product/templates/prd-v2.0.hbs +216 -216
- package/.aiox-core/product/templates/prd.hbs +201 -201
- package/.aiox-core/product/templates/story.hbs +263 -263
- package/.aiox-core/product/templates/task.hbs +170 -170
- package/.aiox-core/product/templates/tmpl-comment-on-examples.sql +158 -158
- package/.aiox-core/product/templates/tmpl-migration-script.sql +91 -91
- package/.aiox-core/product/templates/tmpl-rls-granular-policies.sql +104 -104
- package/.aiox-core/product/templates/tmpl-rls-kiss-policy.sql +10 -10
- package/.aiox-core/product/templates/tmpl-rls-roles.sql +135 -135
- package/.aiox-core/product/templates/tmpl-rls-simple.sql +77 -77
- package/.aiox-core/product/templates/tmpl-rls-tenant.sql +152 -152
- package/.aiox-core/product/templates/tmpl-rollback-script.sql +77 -77
- package/.aiox-core/product/templates/tmpl-seed-data.sql +140 -140
- package/.aiox-core/product/templates/tmpl-smoke-test.sql +16 -16
- package/.aiox-core/product/templates/tmpl-staging-copy-merge.sql +139 -139
- package/.aiox-core/product/templates/tmpl-stored-proc.sql +140 -140
- package/.aiox-core/product/templates/tmpl-trigger.sql +152 -152
- package/.aiox-core/product/templates/tmpl-view-materialized.sql +133 -133
- package/.aiox-core/product/templates/tmpl-view.sql +177 -177
- package/.aiox-core/scripts/pm.sh +0 -0
- package/.claude/hooks/enforce-architecture-first.py +196 -196
- package/.claude/hooks/mind-clone-governance.py +192 -192
- package/.claude/hooks/read-protection.py +151 -151
- package/.claude/hooks/slug-validation.py +176 -176
- package/.claude/hooks/sql-governance.py +182 -182
- package/.claude/hooks/write-path-validation.py +194 -194
- package/LICENSE +33 -33
- package/bin/aiox-graph.js +0 -0
- package/bin/aiox-minimal.js +0 -0
- package/bin/aiox.js +0 -0
- package/package.json +1 -1
- package/packages/aiox-install/bin/aiox-install.js +0 -0
- package/packages/aiox-install/bin/edmcp.js +0 -0
- package/packages/aiox-pro-cli/bin/aiox-pro.js +0 -0
- package/packages/installer/src/wizard/pro-setup.js +28 -0
- package/pro/README.md +66 -66
- package/pro/feature-registry.yaml +225 -223
- package/pro/license/license-api.js +701 -679
- package/pro/package.json +39 -39
- package/pro/pro-config.yaml +63 -63
- package/pro/squads/README.md +24 -24
- package/pro/squads/design/HEADLINE.md +3 -3
- package/pro/squads/design/README.md +109 -109
- package/pro/squads/design/agents/brad-frost.md +1097 -1097
- package/pro/squads/design/agents/dan-mall.md +857 -857
- package/pro/squads/design/agents/dave-malouf.md +2272 -2272
- package/pro/squads/design/agents/design-chief.md +114 -114
- package/pro/squads/design/agents/ds-foundations-lead.md +194 -194
- package/pro/squads/design/agents/ds-token-architect.md +361 -361
- package/pro/squads/design/agents/nano-banana-generator.md +162 -162
- package/pro/squads/design/agents/storybook-expert.md +809 -809
- package/pro/squads/design/checklists/atomic-refactor-checklist.md +299 -299
- package/pro/squads/design/checklists/component-adaptation-checklist.md +81 -81
- package/pro/squads/design/checklists/design-fidelity-checklist.md +283 -283
- package/pro/squads/design/checklists/design-handoff-checklist.md +55 -55
- package/pro/squads/design/checklists/design-team-health-checklist.md +454 -454
- package/pro/squads/design/checklists/designops-maturity-checklist.md +518 -518
- package/pro/squads/design/checklists/ds-a11y-release-gate-checklist.md +45 -45
- package/pro/squads/design/checklists/ds-accessibility-wcag-checklist.md +147 -147
- package/pro/squads/design/checklists/ds-component-quality-checklist.md +150 -150
- package/pro/squads/design/checklists/ds-critical-eye-review-checklist.md +147 -147
- package/pro/squads/design/checklists/ds-migration-readiness-checklist.md +99 -99
- package/pro/squads/design/checklists/ds-pattern-audit-checklist.md +164 -164
- package/pro/squads/design/checklists/reading-accessibility-checklist.md +275 -275
- package/pro/squads/design/checklists/token-mapping-checklist.md +107 -107
- package/pro/squads/design/config/coding-standards.md +286 -286
- package/pro/squads/design/config/source-tree.md +59 -59
- package/pro/squads/design/config/tech-stack.md +48 -48
- package/pro/squads/design/config.yaml +204 -204
- package/pro/squads/design/data/agentic-design-systems-guide.md +46 -46
- package/pro/squads/design/data/agentic-ds-principles.md +100 -100
- package/pro/squads/design/data/atomic-design-principles.md +108 -108
- package/pro/squads/design/data/atomic-refactor-rules.md +582 -582
- package/pro/squads/design/data/base-component-specs.md +972 -972
- package/pro/squads/design/data/brad-frost-analysis-extract-implicit.yaml +270 -270
- package/pro/squads/design/data/brad-frost-analysis-find-0.8.yaml +176 -176
- package/pro/squads/design/data/brad-frost-analysis-qa-report.yaml +168 -168
- package/pro/squads/design/data/brad-frost-dna.yaml +713 -713
- package/pro/squads/design/data/capability-tools.yaml +124 -124
- package/pro/squads/design/data/component-adaptation-changelog.md +318 -318
- package/pro/squads/design/data/consolidation-algorithms.md +168 -168
- package/pro/squads/design/data/critical-eye-scoring-rules.yaml +240 -240
- package/pro/squads/design/data/design-token-best-practices.md +107 -107
- package/pro/squads/design/data/design-tokens-spec.yaml +418 -418
- package/pro/squads/design/data/ds-reference-architectures.md +93 -93
- package/pro/squads/design/data/f2-qa-report.md +168 -168
- package/pro/squads/design/data/f3-derived-components-changelog.md +100 -100
- package/pro/squads/design/data/f3-qa-report.md +208 -208
- package/pro/squads/design/data/figma-base-components-raw.md +101 -101
- package/pro/squads/design/data/figma-tokens-raw.md +1548 -1548
- package/pro/squads/design/data/fluent2-design-principles.md +114 -114
- package/pro/squads/design/data/high-retention-reading-guide.md +349 -349
- package/pro/squads/design/data/integration-patterns.md +207 -207
- package/pro/squads/design/data/internal-quality-chain.yaml +48 -48
- package/pro/squads/design/data/motion-tokens-guide.md +202 -202
- package/pro/squads/design/data/roi-calculation-guide.md +142 -142
- package/pro/squads/design/data/token-mapping-reference.md +213 -213
- package/pro/squads/design/data/w3c-dtcg-spec-reference.md +149 -149
- package/pro/squads/design/data/wcag-compliance-guide.md +267 -267
- package/pro/squads/design/docs/AUDIT_REPORT.md +97 -97
- package/pro/squads/design/docs/DS-CURATION-PIPELINE-PROPOSAL.md +577 -577
- package/pro/squads/design/docs/UPGRADE_PLAN.md +618 -618
- package/pro/squads/design/docs/brad-frost-research-validation.md +372 -372
- package/pro/squads/design/docs/dave-malouf-research-validation.md +391 -391
- package/pro/squads/design/docs/tool-discovery-report.md +87 -87
- package/pro/squads/design/docs/tool-integration-plan.md +44 -44
- package/pro/squads/design/protocols/ai-first-governance.md +56 -56
- package/pro/squads/design/protocols/governance-execution-boundary.md +59 -59
- package/pro/squads/design/protocols/handoff.md +60 -60
- package/pro/squads/design/rules/.claude-rules.md +88 -88
- package/pro/squads/design/scripts/design-system/curate_colors.cjs +447 -447
- package/pro/squads/design/scripts/design-system/curate_components.cjs +217 -217
- package/pro/squads/design/scripts/design-system/curate_radius.cjs +190 -190
- package/pro/squads/design/scripts/design-system/curate_shadows.cjs +208 -208
- package/pro/squads/design/scripts/design-system/curate_spacing.cjs +243 -243
- package/pro/squads/design/scripts/design-system/curate_typography.cjs +404 -404
- package/pro/squads/design/scripts/design-system/design-system-metadata.test.js +49 -49
- package/pro/squads/design/scripts/design-system/design_manifest_lib.cjs +142 -142
- package/pro/squads/design/scripts/design-system/fetch_page_images.cjs +195 -195
- package/pro/squads/design/scripts/design-system/generate_components_metadata.cjs +114 -114
- package/pro/squads/design/scripts/design-system/generate_curation_report.cjs +258 -258
- package/pro/squads/design/scripts/design-system/generate_tokens.cjs +342 -342
- package/pro/squads/design/scripts/design-system/sync_design_manifest.cjs +27 -27
- package/pro/squads/design/scripts/design-system/test_mcp_tools.cjs +232 -232
- package/pro/squads/design/scripts/design-system/validate_components_metadata.cjs +96 -96
- package/pro/squads/design/scripts/design-system/validate_curation.cjs +226 -226
- package/pro/squads/design/scripts/design-system/validate_design_manifest_drift.cjs +72 -72
- package/pro/squads/design/scripts/design-system/validate_mcp_skeleton.cjs +38 -38
- package/pro/squads/design/scripts/design-system/validate_registry.cjs +186 -186
- package/pro/squads/design/scripts/design-system/validate_task_checklist_bindings.cjs +78 -78
- package/pro/squads/design/scripts/dissect-artifact.cjs +806 -806
- package/pro/squads/design/scripts/validate-a11y-integration.cjs +40 -40
- package/pro/squads/design/scripts/validate-design-squad.py +411 -411
- package/pro/squads/design/squad.yaml +714 -714
- package/pro/squads/design/tasks/a11y-audit.md +340 -340
- package/pro/squads/design/tasks/aria-audit.md +525 -525
- package/pro/squads/design/tasks/atomic-refactor-execute.md +391 -391
- package/pro/squads/design/tasks/atomic-refactor-plan.md +262 -262
- package/pro/squads/design/tasks/audit-reading-experience.md +350 -350
- package/pro/squads/design/tasks/audit-tailwind-config.md +101 -101
- package/pro/squads/design/tasks/bootstrap-shadcn-library.md +96 -96
- package/pro/squads/design/tasks/bundle-audit.md +245 -245
- package/pro/squads/design/tasks/contrast-matrix.md +373 -373
- package/pro/squads/design/tasks/create-doc.md +135 -135
- package/pro/squads/design/tasks/dead-code-detection.md +329 -329
- package/pro/squads/design/tasks/design-compare.md +414 -414
- package/pro/squads/design/tasks/design-process-optimization.md +407 -407
- package/pro/squads/design/tasks/design-review-orchestration.md +99 -99
- package/pro/squads/design/tasks/design-team-scaling.md +407 -407
- package/pro/squads/design/tasks/design-tooling-audit.md +404 -404
- package/pro/squads/design/tasks/design-triage.md +89 -89
- package/pro/squads/design/tasks/designops-maturity-assessment.md +364 -364
- package/pro/squads/design/tasks/designops-metrics-setup.md +465 -465
- package/pro/squads/design/tasks/ds-agentic-audit.md +100 -100
- package/pro/squads/design/tasks/ds-agentic-setup.md +103 -103
- package/pro/squads/design/tasks/ds-audit-codebase.md +273 -273
- package/pro/squads/design/tasks/ds-build-component.md +349 -349
- package/pro/squads/design/tasks/ds-build-mcp-server.md +84 -84
- package/pro/squads/design/tasks/ds-calculate-roi.md +282 -282
- package/pro/squads/design/tasks/ds-compose-molecule.md +106 -106
- package/pro/squads/design/tasks/ds-consolidate-patterns.md +253 -253
- package/pro/squads/design/tasks/ds-context-contract.md +194 -194
- package/pro/squads/design/tasks/ds-critical-eye-compare.md +130 -130
- package/pro/squads/design/tasks/ds-critical-eye-decide.md +139 -139
- package/pro/squads/design/tasks/ds-critical-eye-inventory.md +111 -111
- package/pro/squads/design/tasks/ds-critical-eye-report.md +101 -101
- package/pro/squads/design/tasks/ds-critical-eye-score.md +109 -109
- package/pro/squads/design/tasks/ds-designops.md +99 -99
- package/pro/squads/design/tasks/ds-extend-pattern.md +91 -91
- package/pro/squads/design/tasks/ds-extract-tokens.md +312 -312
- package/pro/squads/design/tasks/ds-figma-pipeline.md +95 -95
- package/pro/squads/design/tasks/ds-fluent-audit.md +105 -105
- package/pro/squads/design/tasks/ds-fluent-build.md +110 -110
- package/pro/squads/design/tasks/ds-generate-ai-metadata.md +81 -81
- package/pro/squads/design/tasks/ds-generate-cursor-rules.md +74 -74
- package/pro/squads/design/tasks/ds-generate-documentation.md +101 -101
- package/pro/squads/design/tasks/ds-generate-migration-strategy.md +331 -331
- package/pro/squads/design/tasks/ds-generate-shock-report.md +323 -323
- package/pro/squads/design/tasks/ds-govern-a11y-compliance.md +93 -93
- package/pro/squads/design/tasks/ds-governance.md +187 -187
- package/pro/squads/design/tasks/ds-health-metrics.md +278 -278
- package/pro/squads/design/tasks/ds-integrate-squad.md +130 -130
- package/pro/squads/design/tasks/ds-integrate-workspace.md +100 -100
- package/pro/squads/design/tasks/ds-legacy-modernization.md +302 -302
- package/pro/squads/design/tasks/ds-mcp-status.md +65 -65
- package/pro/squads/design/tasks/ds-motion-audit.md +118 -118
- package/pro/squads/design/tasks/ds-multi-framework.md +96 -96
- package/pro/squads/design/tasks/ds-parallelization-gate.md +246 -246
- package/pro/squads/design/tasks/ds-query.md +90 -90
- package/pro/squads/design/tasks/ds-rebuild-artifact.md +369 -369
- package/pro/squads/design/tasks/ds-reverse-engineer.md +194 -194
- package/pro/squads/design/tasks/ds-scan-artifact.md +131 -131
- package/pro/squads/design/tasks/ds-setup-design-system.md +297 -297
- package/pro/squads/design/tasks/ds-sync-registry.md +287 -287
- package/pro/squads/design/tasks/ds-theme-multi-brand.md +90 -90
- package/pro/squads/design/tasks/ds-token-modes.md +108 -108
- package/pro/squads/design/tasks/ds-token-w3c-extract.md +105 -105
- package/pro/squads/design/tasks/ds-validate-ai-readiness.md +69 -69
- package/pro/squads/design/tasks/ds-visual-regression.md +130 -130
- package/pro/squads/design/tasks/execute-checklist.md +141 -141
- package/pro/squads/design/tasks/export-design-tokens-dtcg.md +97 -97
- package/pro/squads/design/tasks/f1-apply-foundations.md +154 -154
- package/pro/squads/design/tasks/f1-ingest-figma-tokens.md +130 -130
- package/pro/squads/design/tasks/f1-map-tokens-to-shadcn.md +145 -145
- package/pro/squads/design/tasks/f1-qa-foundations.md +95 -95
- package/pro/squads/design/tasks/f2-adapt-shadcn-components.md +155 -155
- package/pro/squads/design/tasks/f2-ingest-base-components.md +148 -148
- package/pro/squads/design/tasks/f2-qa-base-components.md +98 -98
- package/pro/squads/design/tasks/f3-derive-components.md +145 -145
- package/pro/squads/design/tasks/f3-qa-derived-components.md +101 -101
- package/pro/squads/design/tasks/focus-order-audit.md +450 -450
- package/pro/squads/design/tasks/sb-brownfield-migrate.md +367 -367
- package/pro/squads/design/tasks/sb-brownfield-scan.md +318 -318
- package/pro/squads/design/tasks/sb-configure.md +230 -230
- package/pro/squads/design/tasks/sb-expand-shadcn.md +213 -213
- package/pro/squads/design/tasks/sb-generate-all-stories.md +288 -288
- package/pro/squads/design/tasks/sb-install.md +152 -152
- package/pro/squads/design/tasks/sb-sync-workspace.md +239 -239
- package/pro/squads/design/tasks/sb-verify.md +203 -203
- package/pro/squads/design/tasks/tailwind-upgrade.md +117 -117
- package/pro/squads/design/tasks/token-usage-analytics.md +262 -262
- package/pro/squads/design/tasks/ux-rewrite-sixth-grade.md +82 -82
- package/pro/squads/design/tasks/validate-design-fidelity.md +222 -222
- package/pro/squads/design/templates/agent-template.yaml +46 -46
- package/pro/squads/design/templates/clone-mind-template.md +352 -352
- package/pro/squads/design/templates/component-prompt-injection-tmpl.md +236 -236
- package/pro/squads/design/templates/component-visual-spec-tmpl.md +378 -378
- package/pro/squads/design/templates/critical-eye-cycle-report-tmpl.md +165 -165
- package/pro/squads/design/templates/design-fidelity-report-tmpl.md +155 -155
- package/pro/squads/design/templates/ds-ai-component-metadata-schema-tmpl.json +138 -138
- package/pro/squads/design/templates/ds-artifact-analysis.md +70 -70
- package/pro/squads/design/templates/ds-health-report-tmpl.md +236 -236
- package/pro/squads/design/templates/ds-migration-strategy-tmpl.md +524 -524
- package/pro/squads/design/templates/ds-state-persistence-tmpl.yaml +194 -194
- package/pro/squads/design/templates/ds-tokens-schema-tmpl.yaml +139 -139
- package/pro/squads/design/templates/migration-strategy-tmpl.md +524 -524
- package/pro/squads/design/templates/reading-design-tokens.css +26 -26
- package/pro/squads/design/templates/state-persistence-tmpl.yaml +219 -219
- package/pro/squads/design/templates/tokens-schema-tmpl.yaml +305 -305
- package/pro/squads/design/workflows/agentic-readiness.yaml +83 -83
- package/pro/squads/design/workflows/audit-only.yaml +198 -198
- package/pro/squads/design/workflows/brownfield-complete.yaml +257 -257
- package/pro/squads/design/workflows/critical-eye.yaml +184 -184
- package/pro/squads/design/workflows/dtcg-tokens-governance.yaml +64 -64
- package/pro/squads/design/workflows/foundations-pipeline.yaml +192 -192
- package/pro/squads/design/workflows/greenfield-new.yaml +192 -192
- package/pro/squads/design/workflows/motion-quality.yaml +65 -65
- package/pro/squads/design/workflows/self-healing-workflow.yaml +237 -237
- package/pro/squads/design/workflows/storybook-brownfield-migration.yaml +400 -400
- package/pro/squads/design/workflows/storybook-full-setup.yaml +280 -280
- package/pro/squads/mmos-squad/minds/alex_hormozi/artifacts/ARQUITETURA_COGNITIVA_DE_ALEX_HORMOZI_EXTRA/303/207/303/203O_COMPLETA.md +215 -0
- package/pro/squads/mmos-squad/minds/alex_hormozi/artifacts/A_Rotina_de_Alta_Performance_de_Alex_Hormozi_Arquitetura,_Motiva/303/247/303/265es_e_Replica/303/247/303/243o.md +309 -0
- package/pro/squads/mmos-squad/minds/alex_hormozi/artifacts/O_sistema_completo_de_cria/303/247/303/243o_de_conte/303/272do_de_Alex_Hormozi.md +416 -0
- package/pro/squads/mmos-squad/minds/alex_hormozi/artifacts/Processo_Cria/303/247/303/243o_Conte/303/272do_Hormozi.md +0 -0
- package/pro/squads/mmos-squad/minds/brad_frost/.backup/2026-01-13/artifacts/DECIS/303/225ES_ESTRAT/303/211GICAS_DE_DESIGN_SYSTEMS_(2022_2025).md +1038 -0
- package/pro/squads/mmos-squad/minds/brad_frost/.backup/2026-01-13/artifacts/FRAMEWORK_COMPLETO_DE_IMPLEMENTA/303/207/303/203O_ATOMIC_DESIGN.md +797 -0
- package/pro/squads/mmos-squad/minds/brad_frost/.backup/2026-01-13/artifacts/O_Cemit/303/251rio_de_Design_Systems.md +447 -0
- package/pro/squads/mmos-squad/minds/brad_frost/.backup/2026-01-13/artifacts/PRINC/303/215PIOS_DE_RACIOC/303/215NIO.md +190 -0
- package/pro/squads/mmos-squad/minds/brad_frost/artifacts/DECIS/303/225ES_ESTRAT/303/211GICAS_DE_DESIGN_SYSTEMS_(2022_2025).md +1038 -0
- package/pro/squads/mmos-squad/minds/brad_frost/artifacts/FRAMEWORK_COMPLETO_DE_IMPLEMENTA/303/207/303/203O_ATOMIC_DESIGN.md +797 -0
- package/pro/squads/mmos-squad/minds/brad_frost/artifacts/O_Cemit/303/251rio_de_Design_Systems.md +447 -0
- package/pro/squads/mmos-squad/minds/brad_frost/artifacts/PRINC/303/215PIOS_DE_RACIOC/303/215NIO.md +190 -0
- package/pro/squads/mmos-squad/minds/elon_musk/artifacts/AN/303/201LISE_PSICOM/303/211TRICA_PROFUNDA_ELON_MUSK.md +291 -0
- package/pro/squads/mmos-squad/minds/elon_musk/artifacts/ASSINATURA_LINGU/303/215STICA_ELON_MUSK.md +485 -0
- package/pro/squads/mmos-squad/minds/elon_musk/artifacts/A_Arquitetura_Mental_de_Elon_Musk_Uma_An/303/241lise_Sistem/303/241tica_dos_Frameworks_de_Pensamento.md +907 -0
- package/pro/squads/mmos-squad/minds/elon_musk/artifacts/Dossi/303/252_Estrat/303/251gico_A_Arquitetura_Psicol/303/263gica_de_Elon_Musk.md +252 -0
- package/pro/squads/mmos-squad/minds/elon_musk/artifacts/Os_Padr/303/265es_de_Leitura_de_Elon_Musk_e_Sua_Influ/303/252ncia_Sistem/303/241tica.md +287 -0
- package/pro/squads/mmos-squad/minds/elon_musk/artifacts/Uma_an/303/241lise_psicol/303/263gica_abrangente.md +187 -0
- package/pro/squads/mmos-squad/minds/eugene_schwartz/artifacts/AN/303/201LISE_PSICOM/303/211TRICA_PROFUNDA_EUGENE_M._SCHWARTZ.md +790 -0
- package/pro/squads/mmos-squad/minds/eugene_schwartz/artifacts/An/303/241lise_Completa_Eugene_Schwartz_Arquitetura_Cognitiva_DEEP.md +210 -0
- package/pro/squads/mmos-squad/minds/pedro_valerio/sources/artifacts_v1.6/5H_EXTRA/303/207/303/203O_COGNITIVA_COMPLETA_PEDRO_VAL/303/211RIO_LOPEZ.md +226 -0
- package/pro/squads/mmos-squad/minds/pedro_valerio/sources/artifacts_v1.6/AN/303/201LISE_COMPARATIVA_REVISADA_PEDRO_VAL/303/211RIO_LOPEZ.md +246 -0
- package/pro/squads/mmos-squad/minds/pedro_valerio/sources/artifacts_v1.6/AN/303/201LISE_LINGU/303/215STICA_CARIOCA_PEDRO_VAL/303/211RIO_LOPEZ.md +274 -0
- package/pro/squads/mmos-squad/minds/pedro_valerio/sources/artifacts_v1.6/AN/303/201LISE_PSICOM/303/211TRICA_DEFINITIVA_PEDRO_VAL/303/211RIO_LOPEZ.md +821 -0
- package/pro/squads/mmos-squad/minds/pedro_valerio/sources/artifacts_v1.6/AN/303/201LISE_PSICOM/303/211TRICA_PROFUNDA_PEDRO_VAL/303/211RIO.md +1844 -0
- package/pro/squads/mmos-squad/minds/pedro_valerio/sources/artifacts_v1.6/C/303/201LCULO_DE_RARIDADE_ESTAT/303/215STICA_PEDRO_VAL/303/211RIO_LOPEZ.md +154 -0
- package/pro/squads/mmos-squad/minds/pedro_valerio/sources/artifacts_v1.6/EXTRA/303/207/303/203O_PEDRO_VAL/303/211RIO.md +237 -0
- package/pro/squads/mmos-squad/minds/pedro_valerio/sources/artifacts_v1.6/MAPEAMENTO_LINGU/303/215STICO_PROFUNDO.md +161 -0
- package/pro/squads/mmos-squad/minds/pedro_valerio/sources/artifacts_v1.6/META_AXIOMAS_DE_PEDRO_VAL/303/211RIO.md +256 -0
- package/pro/squads/mmos-squad/minds/pedro_valerio/sources/artifacts_v1.6/SISTEMA_IMUNOL/303/223GICO_COGNITIVO_PEDRO_VAL/303/211RIO_LOPEZ.md +586 -0
- package/pro/squads/mmos-squad/minds/pedro_valerio/sources/artifacts_v1.6/SISTEMA_IMUNOL/303/223GICO_COGNITIVO_V2_/342/200/224_CLONE_IA.md +452 -0
- package/pro/squads/mmos-squad/minds/pedro_valerio/sources/artifacts_v1.6/TABELA_COMPARATIVA_AN/303/201LISE_COMPLETA_DOS_CLONES_IA.md +102 -0
- package/pro/squads/mmos-squad/minds/pedro_valerio/sources/artifacts_v1.6/WHATSAPP_PADR/303/225ES_LINGU/303/215STICOS_PEDRO_VAL/303/211RIO_LOPEZ.md +286 -0
- package/pro/squads/mmos-squad/minds/pedro_valerio/sources/artifacts_v1.6/heur/303/255sticas_de_decis/303/243o_e_algoritmos_mentais_/303/272nicos.md +268 -0
- package/pro/squads/mmos-squad/minds/ray_kurzweil/sources/books/PROTOCOLO_COMPLETO_DE_INTERROGA/303/207/303/203O_-_NAVAL_RAVIKANT.md +3624 -0
- package/pro/squads/mmos-squad/minds/steve_jobs/artifacts/FRAMEWORK_COMPLETO_DE_IMPLEMENTA/303/207/303/203O_JOBS.md +488 -0
- package/pro/squads/mmos-squad/minds/steve_jobs/artifacts/Framework_Cabe/303/247a_Steve.md +257 -0
- package/pro/squads/mmos-squad/minds/steve_jobs/artifacts/Relat/303/263rio_Abrangente_sobre_Steve_Jobs_para_Cria/303/247/303/243o_de_Clone_de_IA.md +370 -0
- package/pro/squads/mmos-squad/minds/steve_jobs/artifacts/Steve_Jobs_An/303/241lise_Psicol/303/263gica_Profunda_e_Valida/303/247/303/243o_Comportamental.md +65 -0
- package/pro/squads/squad-creator-pro/HEADLINE.md +3 -3
- package/pro/squads/squad-creator-pro/agents/oalanicolas.md +438 -438
- package/pro/squads/squad-creator-pro/agents/squad-chief.md +1651 -1651
- package/pro/squads/squad-creator-pro/agents/thiago_finch.md +976 -976
- package/pro/squads/squad-creator-pro/assessments/axioma-assessment-wf-create-squad.yaml +325 -325
- package/pro/squads/squad-creator-pro/checklists/create-agent-checklist.md +184 -184
- package/pro/squads/squad-creator-pro/checklists/create-squad-checklist.md +219 -219
- package/pro/squads/squad-creator-pro/checklists/create-workflow-checklist.md +224 -224
- package/pro/squads/squad-creator-pro/checklists/mental-model-integration-checklist.md +95 -95
- package/pro/squads/squad-creator-pro/checklists/squad-overview-checklist.md +393 -393
- package/pro/squads/squad-creator-pro/config/model-routing.yaml +693 -693
- package/pro/squads/squad-creator-pro/config/scoring-rubric.yaml +199 -199
- package/pro/squads/squad-creator-pro/config.yaml +35 -35
- package/pro/squads/squad-creator-pro/data/internal-infrastructure-library.yaml +99 -99
- package/pro/squads/squad-creator-pro/data/mental-model-task-matrix.yaml +692 -692
- package/pro/squads/squad-creator-pro/docs/ADR-001-model-tier-qualification.md +344 -344
- package/pro/squads/squad-creator-pro/docs/AGENT-COLLABORATION.md +609 -609
- package/pro/squads/squad-creator-pro/docs/MIGRATION-PLAN-AGENT-CONFORMITY.md +861 -861
- package/pro/squads/squad-creator-pro/docs/MODEL-TIER-QUALIFICATION.md +337 -337
- package/pro/squads/squad-creator-pro/docs/optimize-v4-proposal.md +354 -354
- package/pro/squads/squad-creator-pro/docs/task-optimization-framework.md +229 -229
- package/pro/squads/squad-creator-pro/minds/oalanicolas/heuristics/AN_KE_010.md +240 -240
- package/pro/squads/squad-creator-pro/protocols/ai-first-governance.md +63 -63
- package/pro/squads/squad-creator-pro/scripts/assess-sources.sh +443 -443
- package/pro/squads/squad-creator-pro/scripts/clone-review.sh +394 -394
- package/pro/squads/squad-creator-pro/scripts/create-agent-preflight.py +243 -243
- package/pro/squads/squad-creator-pro/scripts/cross-provider/compare-results.js +281 -281
- package/pro/squads/squad-creator-pro/scripts/cross-provider/cross-provider-runner.js +462 -462
- package/pro/squads/squad-creator-pro/scripts/fidelity-score.sh +519 -519
- package/pro/squads/squad-creator-pro/scripts/generate-squad-guide.js +558 -558
- package/pro/squads/squad-creator-pro/scripts/lib/config-loader.js +151 -151
- package/pro/squads/squad-creator-pro/scripts/model-tier-validator.cjs +369 -369
- package/pro/squads/squad-creator-pro/scripts/model-usage-logger.cjs +245 -245
- package/pro/squads/squad-creator-pro/scripts/modernization-score.sh +308 -308
- package/pro/squads/squad-creator-pro/scripts/scaffold-squad.cjs +281 -281
- package/pro/squads/squad-creator-pro/scripts/security_scanner.py +378 -378
- package/pro/squads/squad-creator-pro/scripts/squad-context-loader.cjs +205 -205
- package/pro/squads/squad-creator-pro/scripts/squad-state-manager.cjs +451 -451
- package/pro/squads/squad-creator-pro/scripts/squad-workflow-runner.cjs +471 -471
- package/pro/squads/squad-creator-pro/scripts/squad_utils.py +261 -261
- package/pro/squads/squad-creator-pro/scripts/tests/run_bash_tests.sh +29 -29
- package/pro/squads/squad-creator-pro/scripts/tests/test_assess_sources.sh +216 -216
- package/pro/squads/squad-creator-pro/scripts/tests/test_clone_review.sh +239 -239
- package/pro/squads/squad-creator-pro/scripts/tests/test_coherence_validator.py +212 -212
- package/pro/squads/squad-creator-pro/scripts/tests/test_fidelity_score.sh +298 -298
- package/pro/squads/squad-creator-pro/scripts/tests/test_modernization_score.sh +211 -211
- package/pro/squads/squad-creator-pro/scripts/tests/test_security_scanner.py +354 -354
- package/pro/squads/squad-creator-pro/scripts/tests/test_validate_clone.sh +252 -252
- package/pro/squads/squad-creator-pro/squad.yaml +36 -36
- package/pro/squads/squad-creator-pro/tasks/an-compare-outputs.md +354 -354
- package/pro/squads/squad-creator-pro/tasks/create-squad.md +933 -933
- package/pro/squads/squad-creator-pro/tasks/detect-squad-context.md +81 -81
- package/pro/squads/squad-creator-pro/tasks/lookup-model.md +78 -78
- package/pro/squads/squad-creator-pro/tasks/next-squad.md +487 -487
- package/pro/squads/squad-creator-pro/tasks/optimize-workflow.md +851 -851
- package/pro/squads/squad-creator-pro/tasks/parallel-discovery.md +58 -58
- package/pro/squads/squad-creator-pro/tasks/pv-axioma-assessment-wf-clone-mind.yaml +256 -256
- package/pro/squads/squad-creator-pro/tasks/qualify-task.md +265 -265
- package/pro/squads/squad-creator-pro/tasks/reexecute-squad-phase.md +64 -64
- package/pro/squads/squad-creator-pro/tasks/smoke-test-model-routing.md +167 -167
- package/pro/squads/squad-creator-pro/tasks/squad-overview.md +683 -683
- package/pro/squads/squad-creator-pro/tasks/validate-final-artifacts.md +80 -80
- package/pro/squads/squad-creator-pro/templates/orchestrator-tmpl.md +74 -74
- package/pro/squads/squad-creator-pro/test-cases/BATCH-PROGRESS.md +268 -268
- package/pro/squads/squad-creator-pro/test-cases/QUALIFICATION-DASHBOARD.yaml +13 -13
- package/pro/squads/squad-creator-pro/test-cases/_template.yaml +147 -147
- package/pro/squads/squad-creator-pro/test-cases/an-assess-sources/ASSESSMENT-SUMMARY.md +275 -275
- package/pro/squads/squad-creator-pro/test-cases/an-assess-sources/ASSESSMENT_SUMMARY.md +140 -140
- package/pro/squads/squad-creator-pro/test-cases/an-assess-sources/CHECKPOINT_MATRIX.md +202 -202
- package/pro/squads/squad-creator-pro/test-cases/an-assess-sources/EXECUTION-REPORT.md +413 -413
- package/pro/squads/squad-creator-pro/test-cases/an-assess-sources/EXECUTION_NOTES.md +358 -358
- package/pro/squads/squad-creator-pro/test-cases/an-assess-sources/README-v2.2.2.md +299 -299
- package/pro/squads/squad-creator-pro/test-cases/an-assess-sources/README.md +320 -320
- package/pro/squads/squad-creator-pro/test-cases/an-assess-sources/TEST-REPORT-v2.1.md +351 -351
- package/pro/squads/squad-creator-pro/test-cases/an-assess-sources/VERIFICATION-CHECKLIST.txt +247 -247
- package/pro/squads/squad-creator-pro/test-cases/an-assess-sources/formal-qualification-report.yaml +389 -389
- package/pro/squads/squad-creator-pro/test-cases/an-assess-sources/haiku-output.yaml +366 -366
- package/pro/squads/squad-creator-pro/test-cases/an-assess-sources/haiku-v2.1-output.yaml +452 -452
- package/pro/squads/squad-creator-pro/test-cases/an-assess-sources/haiku-v2.2.1-output.yaml +281 -281
- package/pro/squads/squad-creator-pro/test-cases/an-assess-sources/haiku-v2.2.2-output.yaml +332 -332
- package/pro/squads/squad-creator-pro/test-cases/an-assess-sources/opus-baseline.yaml +517 -517
- package/pro/squads/squad-creator-pro/test-cases/an-assess-sources/qualification-report.yaml +213 -213
- package/pro/squads/squad-creator-pro/test-cases/an-assess-sources/test-case.yaml +69 -69
- package/pro/squads/squad-creator-pro/test-cases/an-clone-review/haiku-round-1.yaml +213 -213
- package/pro/squads/squad-creator-pro/test-cases/an-clone-review/opus-baseline.yaml +566 -566
- package/pro/squads/squad-creator-pro/test-cases/an-clone-review/qualification-report.yaml +82 -82
- package/pro/squads/squad-creator-pro/test-cases/an-design-clone/test-case.yaml +102 -102
- package/pro/squads/squad-creator-pro/test-cases/an-extract-dna/test-case.yaml +105 -105
- package/pro/squads/squad-creator-pro/test-cases/an-fidelity-score/haiku-round-1.yaml +262 -262
- package/pro/squads/squad-creator-pro/test-cases/an-fidelity-score/opus-baseline.yaml +266 -266
- package/pro/squads/squad-creator-pro/test-cases/an-fidelity-score/qualification-report.yaml +94 -94
- package/pro/squads/squad-creator-pro/test-cases/an-validate-clone/haiku-round-1.yaml +282 -282
- package/pro/squads/squad-creator-pro/test-cases/an-validate-clone/opus-baseline.yaml +470 -470
- package/pro/squads/squad-creator-pro/test-cases/an-validate-clone/qualification-report.yaml +106 -106
- package/pro/squads/squad-creator-pro/test-cases/collect-sources/test-case.yaml +105 -105
- package/pro/squads/squad-creator-pro/test-cases/create-task/test-case.yaml +104 -104
- package/pro/squads/squad-creator-pro/test-cases/cross-provider/DASHBOARD.yaml +11 -11
- package/pro/squads/squad-creator-pro/test-cases/pv-audit/test-case.yaml +106 -106
- package/pro/squads/squad-creator-pro/test-cases/pv-axioma-assessment/haiku-output.yaml +209 -209
- package/pro/squads/squad-creator-pro/test-cases/pv-axioma-assessment/opus-baseline.yaml +96 -96
- package/pro/squads/squad-creator-pro/test-cases/pv-axioma-assessment/sonnet-output.yaml +30 -30
- package/pro/squads/squad-creator-pro/test-cases/pv-axioma-assessment/test-case.yaml +129 -129
- package/pro/squads/squad-creator-pro/test-cases/pv-modernization-score/comparison-round-1.yaml +242 -242
- package/pro/squads/squad-creator-pro/test-cases/pv-modernization-score/haiku-round-1.yaml +393 -393
- package/pro/squads/squad-creator-pro/test-cases/pv-modernization-score/opus-baseline.yaml +488 -488
- package/pro/squads/squad-creator-pro/test-cases/pv-modernization-score/qualification-report.yaml +74 -74
- package/pro/squads/squad-creator-pro/test-cases/qa-after-creation/haiku-round-1.yaml +292 -292
- package/pro/squads/squad-creator-pro/test-cases/qa-after-creation/opus-baseline.yaml +603 -603
- package/pro/squads/squad-creator-pro/test-cases/qa-after-creation/qualification-report.yaml +97 -97
- package/pro/squads/squad-creator-pro/test-cases/smoke-test-model-routing/test-case.yaml +100 -100
- package/pro/squads/squad-creator-pro/test-cases/upgrade-squad/test-case.yaml +106 -106
- package/pro/squads/squad-creator-pro/test-cases/validate-squad/comparison-round-1.yaml +223 -223
- package/pro/squads/squad-creator-pro/test-cases/validate-squad/haiku-round-1-MINE.yaml +36 -36
- package/pro/squads/squad-creator-pro/test-cases/validate-squad/haiku-round-1.yaml +193 -193
- package/pro/squads/squad-creator-pro/test-cases/validate-squad/haiku-round-2.yaml +303 -303
- package/pro/squads/squad-creator-pro/test-cases/validate-squad/haiku-round-3-v4-task.yaml +149 -149
- package/pro/squads/squad-creator-pro/test-cases/validate-squad/opus-baseline.yaml +529 -529
- package/pro/squads/squad-creator-pro/test-cases/validate-squad/opus-round-3-v4-task.yaml +132 -132
- package/pro/squads/squad-creator-pro/test-cases/validate-squad/qualification-report.yaml +104 -104
- package/pro/squads/squad-creator-pro/test-cases/wf-clone-mind/haiku-output-v2-calibrated.yaml +200 -200
- package/pro/squads/squad-creator-pro/test-cases/wf-clone-mind/haiku-output.yaml +183 -183
- package/pro/squads/squad-creator-pro/test-cases/wf-clone-mind/opus-baseline.yaml +112 -112
- package/pro/squads/squad-creator-pro/workflows/create-squad.yaml +348 -348
- package/pro/squads/squad-creator-pro/workflows/modules/module-discovery.yaml +16 -16
- package/pro/squads/squad-creator-pro/workflows/modules/module-integration.yaml +16 -16
- package/pro/squads/squad-creator-pro/workflows/modules/module-quality-gates.yaml +15 -15
- package/pro/squads/squad-creator-pro/workflows/wf-brownfield-upgrade-squad.yaml +46 -46
- package/pro/squads/squad-creator-pro/workflows/wf-context-aware-create-squad.yaml +47 -47
- package/pro/squads/squad-creator-pro/workflows/wf-create-squad.yaml +1619 -1619
- package/pro/squads/squad-creator-pro/workflows/wf-cross-provider-qualification.yaml +711 -711
- package/pro/squads/squad-creator-pro/workflows/wf-model-tier-qualification.yaml +800 -800
- package/pro/squads/squad-creator-pro/workflows/wf-optimize-squad.yaml +684 -684
- package/scripts/check-markdown-links.py +352 -352
- package/scripts/dashboard-parallel-dev.sh +0 -0
- package/scripts/dashboard-parallel-phase3.sh +0 -0
- package/scripts/dashboard-parallel-phase4.sh +0 -0
- package/scripts/install-monitor-hooks.sh +0 -0
- package/.claude/hooks/code-intel-pretool.cjs +0 -107
- package/docs/guides/aios-workflows/README.md +0 -247
- package/docs/guides/aios-workflows/bob-orchestrator-workflow.md +0 -1536
- package/scripts/glue/README.md +0 -355
- package/scripts/glue/compose-agent-prompt.cjs +0 -362
|
@@ -1,369 +1,369 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
/**
|
|
3
|
-
* Model Tier Validator for Squad Creator Token Economy
|
|
4
|
-
*
|
|
5
|
-
* Validates tasks against Opus/Sonnet/Haiku to determine optimal tier.
|
|
6
|
-
*
|
|
7
|
-
* Usage:
|
|
8
|
-
* node model-tier-validator.cjs validate <task-name>
|
|
9
|
-
* node model-tier-validator.cjs list
|
|
10
|
-
* node model-tier-validator.cjs report
|
|
11
|
-
* node model-tier-validator.cjs update-routing <task-name> <tier> [reason]
|
|
12
|
-
*
|
|
13
|
-
* Examples:
|
|
14
|
-
* node model-tier-validator.cjs validate pv-axioma-assessment
|
|
15
|
-
* node model-tier-validator.cjs list
|
|
16
|
-
* node model-tier-validator.cjs report
|
|
17
|
-
*/
|
|
18
|
-
|
|
19
|
-
const fs = require('fs');
|
|
20
|
-
const path = require('path');
|
|
21
|
-
const yaml = require('js-yaml');
|
|
22
|
-
|
|
23
|
-
// Paths
|
|
24
|
-
const SQUAD_ROOT = path.join(__dirname, '..');
|
|
25
|
-
const CONFIG_PATH = path.join(SQUAD_ROOT, 'config/scoring-rubric.yaml');
|
|
26
|
-
const TEST_CASES_DIR = path.join(SQUAD_ROOT, 'test-cases');
|
|
27
|
-
const ROUTING_CONFIG = path.join(SQUAD_ROOT, 'config/model-routing.yaml');
|
|
28
|
-
// Note: Results are stored inside test-cases/{task-name}/ alongside test-case.yaml
|
|
29
|
-
|
|
30
|
-
// Ensure test-cases directory exists
|
|
31
|
-
if (!fs.existsSync(TEST_CASES_DIR)) {
|
|
32
|
-
fs.mkdirSync(TEST_CASES_DIR, { recursive: true });
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
// Load scoring rubric
|
|
36
|
-
function loadRubric() {
|
|
37
|
-
try {
|
|
38
|
-
const content = fs.readFileSync(CONFIG_PATH, 'utf8');
|
|
39
|
-
return yaml.load(content);
|
|
40
|
-
} catch (e) {
|
|
41
|
-
console.error(`Error loading rubric: ${e.message}`);
|
|
42
|
-
return null;
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
// Load test case
|
|
47
|
-
function loadTestCase(taskName) {
|
|
48
|
-
// New structure: test-cases/{task-name}/test-case.yaml
|
|
49
|
-
let testCasePath = path.join(TEST_CASES_DIR, taskName, 'test-case.yaml');
|
|
50
|
-
|
|
51
|
-
// Fallback to old structure: test-cases/{task-name}.yaml
|
|
52
|
-
if (!fs.existsSync(testCasePath)) {
|
|
53
|
-
testCasePath = path.join(TEST_CASES_DIR, `${taskName}.yaml`);
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
try {
|
|
57
|
-
const content = fs.readFileSync(testCasePath, 'utf8');
|
|
58
|
-
return yaml.load(content);
|
|
59
|
-
} catch (e) {
|
|
60
|
-
console.error(`Error loading test case: ${e.message}`);
|
|
61
|
-
return null;
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
// Load routing config
|
|
66
|
-
function loadRouting() {
|
|
67
|
-
try {
|
|
68
|
-
const content = fs.readFileSync(ROUTING_CONFIG, 'utf8');
|
|
69
|
-
return yaml.load(content);
|
|
70
|
-
} catch (e) {
|
|
71
|
-
console.error(`Error loading routing config: ${e.message}`);
|
|
72
|
-
return null;
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
// Calculate score for an output against baseline
|
|
77
|
-
function calculateScore(output, baseline, testCase, rubric) {
|
|
78
|
-
const scores = {};
|
|
79
|
-
let totalScore = 0;
|
|
80
|
-
|
|
81
|
-
// For each dimension in rubric
|
|
82
|
-
for (const [dimName, dimConfig] of Object.entries(rubric.dimensions)) {
|
|
83
|
-
// This would need actual comparison logic
|
|
84
|
-
// For now, placeholder scoring
|
|
85
|
-
const dimScore = evaluateDimension(dimName, output, baseline, testCase);
|
|
86
|
-
scores[dimName] = {
|
|
87
|
-
score: dimScore,
|
|
88
|
-
weight: dimConfig.weight,
|
|
89
|
-
weighted: dimScore * dimConfig.weight
|
|
90
|
-
};
|
|
91
|
-
totalScore += dimScore * dimConfig.weight;
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
return {
|
|
95
|
-
total: totalScore,
|
|
96
|
-
dimensions: scores,
|
|
97
|
-
percentage: (totalScore / 10) * 100
|
|
98
|
-
};
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
// Evaluate a single dimension
|
|
102
|
-
// Uses pre-recorded scores from test case if available, otherwise returns null for manual scoring
|
|
103
|
-
function evaluateDimension(dimName, output, baseline, testCase) {
|
|
104
|
-
// If test case has pre-recorded dimension scores, use them
|
|
105
|
-
if (testCase.results) {
|
|
106
|
-
for (const model of ['haiku', 'sonnet', 'opus']) {
|
|
107
|
-
if (testCase.results[model] && testCase.results[model].dimensions) {
|
|
108
|
-
const dimResult = testCase.results[model].dimensions[dimName];
|
|
109
|
-
if (dimResult && typeof dimResult.score === 'number') {
|
|
110
|
-
return dimResult.score;
|
|
111
|
-
}
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
// If overall score exists, distribute proportionally across dimensions
|
|
117
|
-
if (testCase.results && testCase.results.haiku && testCase.results.haiku.score) {
|
|
118
|
-
// Use the overall score as approximation for each dimension
|
|
119
|
-
return testCase.results.haiku.score;
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
// No pre-recorded data - return null to indicate manual scoring needed
|
|
123
|
-
return null;
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
// Determine tier recommendation
|
|
127
|
-
function recommendTier(results, rubric) {
|
|
128
|
-
const { haiku, sonnet, opus } = results;
|
|
129
|
-
|
|
130
|
-
if (haiku && haiku.percentage >= rubric.thresholds.haiku * 100) {
|
|
131
|
-
return {
|
|
132
|
-
tier: 'haiku',
|
|
133
|
-
reason: `Haiku score (${haiku.percentage.toFixed(1)}%) >= threshold (${rubric.thresholds.haiku * 100}%)`,
|
|
134
|
-
savings: '93%'
|
|
135
|
-
};
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
if (sonnet && sonnet.percentage >= rubric.thresholds.sonnet * 100) {
|
|
139
|
-
return {
|
|
140
|
-
tier: 'sonnet',
|
|
141
|
-
reason: `Sonnet score (${sonnet.percentage.toFixed(1)}%) >= threshold (${rubric.thresholds.sonnet * 100}%)`,
|
|
142
|
-
savings: '80%'
|
|
143
|
-
};
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
return {
|
|
147
|
-
tier: 'opus',
|
|
148
|
-
reason: 'Neither Haiku nor Sonnet met quality threshold',
|
|
149
|
-
savings: '0%'
|
|
150
|
-
};
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
// Generate comparison report
|
|
154
|
-
function generateReport(taskName, results, recommendation) {
|
|
155
|
-
const report = {
|
|
156
|
-
task: taskName,
|
|
157
|
-
timestamp: new Date().toISOString(),
|
|
158
|
-
results: {
|
|
159
|
-
haiku: results.haiku ? {
|
|
160
|
-
score: results.haiku.total.toFixed(2),
|
|
161
|
-
percentage: results.haiku.percentage.toFixed(1) + '%',
|
|
162
|
-
qualified: results.haiku.percentage >= 90
|
|
163
|
-
} : null,
|
|
164
|
-
sonnet: results.sonnet ? {
|
|
165
|
-
score: results.sonnet.total.toFixed(2),
|
|
166
|
-
percentage: results.sonnet.percentage.toFixed(1) + '%',
|
|
167
|
-
qualified: results.sonnet.percentage >= 95
|
|
168
|
-
} : null,
|
|
169
|
-
opus: {
|
|
170
|
-
score: '10.00',
|
|
171
|
-
percentage: '100%',
|
|
172
|
-
qualified: true
|
|
173
|
-
}
|
|
174
|
-
},
|
|
175
|
-
recommendation: recommendation,
|
|
176
|
-
compensation_needed: recommendation.tier !== 'haiku' && results.haiku && results.haiku.percentage < 90
|
|
177
|
-
};
|
|
178
|
-
|
|
179
|
-
return report;
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
// Print comparison table
|
|
183
|
-
function printComparisonTable(reports) {
|
|
184
|
-
console.log('\n' + '='.repeat(80));
|
|
185
|
-
console.log('MODEL TIER QUALIFICATION REPORT');
|
|
186
|
-
console.log('='.repeat(80));
|
|
187
|
-
console.log('');
|
|
188
|
-
console.log('| Task | Haiku | Sonnet | Opus | Recommended | Savings |');
|
|
189
|
-
console.log('|------|-------|--------|------|-------------|---------|');
|
|
190
|
-
|
|
191
|
-
for (const report of reports) {
|
|
192
|
-
const haiku = report.results.haiku?.percentage != null ? `${report.results.haiku.percentage}%` : 'N/A';
|
|
193
|
-
const sonnet = report.results.sonnet?.percentage != null ? `${report.results.sonnet.percentage}%` : 'N/A';
|
|
194
|
-
const opus = report.results.opus?.percentage != null ? `${report.results.opus.percentage}%` : '100%';
|
|
195
|
-
const rec = report.recommendation.tier.toUpperCase();
|
|
196
|
-
const savings = report.recommendation.savings;
|
|
197
|
-
|
|
198
|
-
console.log(`| ${report.task.padEnd(20)} | ${haiku.padEnd(5)} | ${sonnet.padEnd(6)} | ${opus.padEnd(4)} | ${rec.padEnd(11)} | ${savings.padEnd(7)} |`);
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
console.log('');
|
|
202
|
-
console.log('='.repeat(80));
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
// List all test cases
|
|
206
|
-
function listTestCases() {
|
|
207
|
-
const entries = fs.readdirSync(TEST_CASES_DIR, { withFileTypes: true });
|
|
208
|
-
const cases = [];
|
|
209
|
-
|
|
210
|
-
for (const entry of entries) {
|
|
211
|
-
if (entry.isDirectory() && !entry.name.startsWith('_')) {
|
|
212
|
-
// New structure: test-cases/{task-name}/test-case.yaml
|
|
213
|
-
const testCasePath = path.join(TEST_CASES_DIR, entry.name, 'test-case.yaml');
|
|
214
|
-
if (fs.existsSync(testCasePath)) {
|
|
215
|
-
cases.push(entry.name);
|
|
216
|
-
}
|
|
217
|
-
} else if (entry.isFile() && entry.name.endsWith('.yaml') && !entry.name.startsWith('_')) {
|
|
218
|
-
// Old structure: test-cases/{task-name}.yaml
|
|
219
|
-
cases.push(entry.name.replace('.yaml', ''));
|
|
220
|
-
}
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
return cases;
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
// Update routing config with validated tier
|
|
227
|
-
function updateRoutingConfig(taskName, tier, reason) {
|
|
228
|
-
const routing = loadRouting();
|
|
229
|
-
if (!routing || !routing.tasks) return false;
|
|
230
|
-
|
|
231
|
-
const taskKey = `${taskName}.md`;
|
|
232
|
-
if (routing.tasks[taskKey]) {
|
|
233
|
-
routing.tasks[taskKey].tier = tier;
|
|
234
|
-
routing.tasks[taskKey].validated = true;
|
|
235
|
-
routing.tasks[taskKey].validation_reason = reason;
|
|
236
|
-
routing.tasks[taskKey].validated_date = new Date().toISOString().split('T')[0];
|
|
237
|
-
|
|
238
|
-
fs.writeFileSync(ROUTING_CONFIG, yaml.dump(routing, { lineWidth: 120 }));
|
|
239
|
-
return true;
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
return false;
|
|
243
|
-
}
|
|
244
|
-
|
|
245
|
-
// CLI handler
|
|
246
|
-
const [,, command, ...args] = process.argv;
|
|
247
|
-
|
|
248
|
-
switch (command) {
|
|
249
|
-
case 'validate':
|
|
250
|
-
const taskName = args[0];
|
|
251
|
-
if (!taskName) {
|
|
252
|
-
console.log('Usage: node model-tier-validator.cjs validate <task-name>');
|
|
253
|
-
process.exit(1);
|
|
254
|
-
}
|
|
255
|
-
|
|
256
|
-
const rubric = loadRubric();
|
|
257
|
-
const testCase = loadTestCase(taskName);
|
|
258
|
-
|
|
259
|
-
if (!rubric || !testCase) {
|
|
260
|
-
console.error('Failed to load required files');
|
|
261
|
-
process.exit(1);
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
console.log(`\nValidating: ${taskName}`);
|
|
265
|
-
console.log('Test case loaded:', testCase.test_case.name);
|
|
266
|
-
console.log('');
|
|
267
|
-
|
|
268
|
-
// Show test case info
|
|
269
|
-
console.log('Input:', testCase.input.target);
|
|
270
|
-
console.log('Expected sections:', testCase.expected_output.sections.join(', '));
|
|
271
|
-
console.log('');
|
|
272
|
-
|
|
273
|
-
// Show current results if available
|
|
274
|
-
if (testCase.results) {
|
|
275
|
-
console.log('--- Current Results ---');
|
|
276
|
-
if (testCase.results.haiku && testCase.results.haiku.score) {
|
|
277
|
-
console.log(`Haiku: ${testCase.results.haiku.percentage}% - ${testCase.results.haiku.qualified ? '✅ Qualified' : '❌ Not qualified'}`);
|
|
278
|
-
}
|
|
279
|
-
if (testCase.results.sonnet && testCase.results.sonnet.score) {
|
|
280
|
-
console.log(`Sonnet: ${testCase.results.sonnet.percentage}% - ${testCase.results.sonnet.qualified ? '✅ Qualified' : '❌ Not qualified'}`);
|
|
281
|
-
}
|
|
282
|
-
console.log(`Opus: ${testCase.results.opus.percentage}% - ✅ Baseline`);
|
|
283
|
-
console.log('');
|
|
284
|
-
console.log(`Final tier: ${testCase.results.final_tier || 'Not determined'}`);
|
|
285
|
-
console.log(`Recommendation: ${testCase.results.recommendation || 'None'}`);
|
|
286
|
-
}
|
|
287
|
-
break;
|
|
288
|
-
|
|
289
|
-
case 'list':
|
|
290
|
-
const tasks = listTestCases();
|
|
291
|
-
console.log('\nAvailable test cases:');
|
|
292
|
-
tasks.forEach(t => console.log(` - ${t}`));
|
|
293
|
-
console.log(`\nTotal: ${tasks.length} test cases`);
|
|
294
|
-
break;
|
|
295
|
-
|
|
296
|
-
case 'report':
|
|
297
|
-
const allTasks = listTestCases();
|
|
298
|
-
const reports = [];
|
|
299
|
-
|
|
300
|
-
for (const task of allTasks) {
|
|
301
|
-
const tc = loadTestCase(task);
|
|
302
|
-
if (tc && tc.results && tc.results.final_tier) {
|
|
303
|
-
reports.push({
|
|
304
|
-
task,
|
|
305
|
-
results: tc.results,
|
|
306
|
-
recommendation: {
|
|
307
|
-
tier: tc.results.final_tier,
|
|
308
|
-
savings: tc.results.final_tier === 'haiku' ? '93%' :
|
|
309
|
-
tc.results.final_tier === 'sonnet' ? '80%' : '0%'
|
|
310
|
-
}
|
|
311
|
-
});
|
|
312
|
-
}
|
|
313
|
-
}
|
|
314
|
-
|
|
315
|
-
if (reports.length === 0) {
|
|
316
|
-
console.log('\nNo validated test cases yet.');
|
|
317
|
-
console.log('Run validation first: node model-tier-validator.cjs validate <task-name>');
|
|
318
|
-
} else {
|
|
319
|
-
printComparisonTable(reports);
|
|
320
|
-
}
|
|
321
|
-
break;
|
|
322
|
-
|
|
323
|
-
case 'update-routing':
|
|
324
|
-
const updateTask = args[0];
|
|
325
|
-
const updateTier = args[1];
|
|
326
|
-
const updateReason = args.slice(2).join(' ') || 'Validated via model-tier-validator';
|
|
327
|
-
|
|
328
|
-
if (!updateTask || !updateTier) {
|
|
329
|
-
console.log('Usage: node model-tier-validator.cjs update-routing <task-name> <tier> [reason]');
|
|
330
|
-
process.exit(1);
|
|
331
|
-
}
|
|
332
|
-
|
|
333
|
-
if (updateRoutingConfig(updateTask, updateTier, updateReason)) {
|
|
334
|
-
console.log(`✅ Updated ${updateTask}.md to tier: ${updateTier}`);
|
|
335
|
-
} else {
|
|
336
|
-
console.log(`❌ Failed to update routing config for ${updateTask}`);
|
|
337
|
-
}
|
|
338
|
-
break;
|
|
339
|
-
|
|
340
|
-
default:
|
|
341
|
-
console.log(`
|
|
342
|
-
Model Tier Validator - Squad Creator Token Economy
|
|
343
|
-
|
|
344
|
-
Commands:
|
|
345
|
-
validate <task-name>
|
|
346
|
-
Show validation status for a task
|
|
347
|
-
|
|
348
|
-
list
|
|
349
|
-
List all available test cases
|
|
350
|
-
|
|
351
|
-
report
|
|
352
|
-
Generate comparison report for all validated tasks
|
|
353
|
-
|
|
354
|
-
update-routing <task-name> <tier> [reason]
|
|
355
|
-
Update model-routing.yaml with validated tier
|
|
356
|
-
|
|
357
|
-
Examples:
|
|
358
|
-
node model-tier-validator.cjs validate pv-axioma-assessment
|
|
359
|
-
node model-tier-validator.cjs list
|
|
360
|
-
node model-tier-validator.cjs report
|
|
361
|
-
node model-tier-validator.cjs update-routing pv-axioma-assessment haiku "Scoring determinístico"
|
|
362
|
-
|
|
363
|
-
Note:
|
|
364
|
-
Actual model comparison requires running tasks through Claude API.
|
|
365
|
-
This tool manages test cases and tracks results.
|
|
366
|
-
Use the Skill tool to run actual tests:
|
|
367
|
-
/pv-axioma-assessment <input>
|
|
368
|
-
`);
|
|
369
|
-
}
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Model Tier Validator for Squad Creator Token Economy
|
|
4
|
+
*
|
|
5
|
+
* Validates tasks against Opus/Sonnet/Haiku to determine optimal tier.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* node model-tier-validator.cjs validate <task-name>
|
|
9
|
+
* node model-tier-validator.cjs list
|
|
10
|
+
* node model-tier-validator.cjs report
|
|
11
|
+
* node model-tier-validator.cjs update-routing <task-name> <tier> [reason]
|
|
12
|
+
*
|
|
13
|
+
* Examples:
|
|
14
|
+
* node model-tier-validator.cjs validate pv-axioma-assessment
|
|
15
|
+
* node model-tier-validator.cjs list
|
|
16
|
+
* node model-tier-validator.cjs report
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
const fs = require('fs');
|
|
20
|
+
const path = require('path');
|
|
21
|
+
const yaml = require('js-yaml');
|
|
22
|
+
|
|
23
|
+
// Paths
|
|
24
|
+
const SQUAD_ROOT = path.join(__dirname, '..');
|
|
25
|
+
const CONFIG_PATH = path.join(SQUAD_ROOT, 'config/scoring-rubric.yaml');
|
|
26
|
+
const TEST_CASES_DIR = path.join(SQUAD_ROOT, 'test-cases');
|
|
27
|
+
const ROUTING_CONFIG = path.join(SQUAD_ROOT, 'config/model-routing.yaml');
|
|
28
|
+
// Note: Results are stored inside test-cases/{task-name}/ alongside test-case.yaml
|
|
29
|
+
|
|
30
|
+
// Ensure test-cases directory exists
|
|
31
|
+
if (!fs.existsSync(TEST_CASES_DIR)) {
|
|
32
|
+
fs.mkdirSync(TEST_CASES_DIR, { recursive: true });
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// Load scoring rubric
|
|
36
|
+
function loadRubric() {
|
|
37
|
+
try {
|
|
38
|
+
const content = fs.readFileSync(CONFIG_PATH, 'utf8');
|
|
39
|
+
return yaml.load(content);
|
|
40
|
+
} catch (e) {
|
|
41
|
+
console.error(`Error loading rubric: ${e.message}`);
|
|
42
|
+
return null;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// Load test case
|
|
47
|
+
function loadTestCase(taskName) {
|
|
48
|
+
// New structure: test-cases/{task-name}/test-case.yaml
|
|
49
|
+
let testCasePath = path.join(TEST_CASES_DIR, taskName, 'test-case.yaml');
|
|
50
|
+
|
|
51
|
+
// Fallback to old structure: test-cases/{task-name}.yaml
|
|
52
|
+
if (!fs.existsSync(testCasePath)) {
|
|
53
|
+
testCasePath = path.join(TEST_CASES_DIR, `${taskName}.yaml`);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
try {
|
|
57
|
+
const content = fs.readFileSync(testCasePath, 'utf8');
|
|
58
|
+
return yaml.load(content);
|
|
59
|
+
} catch (e) {
|
|
60
|
+
console.error(`Error loading test case: ${e.message}`);
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Load routing config
|
|
66
|
+
function loadRouting() {
|
|
67
|
+
try {
|
|
68
|
+
const content = fs.readFileSync(ROUTING_CONFIG, 'utf8');
|
|
69
|
+
return yaml.load(content);
|
|
70
|
+
} catch (e) {
|
|
71
|
+
console.error(`Error loading routing config: ${e.message}`);
|
|
72
|
+
return null;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Calculate score for an output against baseline
|
|
77
|
+
function calculateScore(output, baseline, testCase, rubric) {
|
|
78
|
+
const scores = {};
|
|
79
|
+
let totalScore = 0;
|
|
80
|
+
|
|
81
|
+
// For each dimension in rubric
|
|
82
|
+
for (const [dimName, dimConfig] of Object.entries(rubric.dimensions)) {
|
|
83
|
+
// This would need actual comparison logic
|
|
84
|
+
// For now, placeholder scoring
|
|
85
|
+
const dimScore = evaluateDimension(dimName, output, baseline, testCase);
|
|
86
|
+
scores[dimName] = {
|
|
87
|
+
score: dimScore,
|
|
88
|
+
weight: dimConfig.weight,
|
|
89
|
+
weighted: dimScore * dimConfig.weight
|
|
90
|
+
};
|
|
91
|
+
totalScore += dimScore * dimConfig.weight;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
return {
|
|
95
|
+
total: totalScore,
|
|
96
|
+
dimensions: scores,
|
|
97
|
+
percentage: (totalScore / 10) * 100
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Evaluate a single dimension
|
|
102
|
+
// Uses pre-recorded scores from test case if available, otherwise returns null for manual scoring
|
|
103
|
+
function evaluateDimension(dimName, output, baseline, testCase) {
|
|
104
|
+
// If test case has pre-recorded dimension scores, use them
|
|
105
|
+
if (testCase.results) {
|
|
106
|
+
for (const model of ['haiku', 'sonnet', 'opus']) {
|
|
107
|
+
if (testCase.results[model] && testCase.results[model].dimensions) {
|
|
108
|
+
const dimResult = testCase.results[model].dimensions[dimName];
|
|
109
|
+
if (dimResult && typeof dimResult.score === 'number') {
|
|
110
|
+
return dimResult.score;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// If overall score exists, distribute proportionally across dimensions
|
|
117
|
+
if (testCase.results && testCase.results.haiku && testCase.results.haiku.score) {
|
|
118
|
+
// Use the overall score as approximation for each dimension
|
|
119
|
+
return testCase.results.haiku.score;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// No pre-recorded data - return null to indicate manual scoring needed
|
|
123
|
+
return null;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Determine tier recommendation
|
|
127
|
+
function recommendTier(results, rubric) {
|
|
128
|
+
const { haiku, sonnet, opus } = results;
|
|
129
|
+
|
|
130
|
+
if (haiku && haiku.percentage >= rubric.thresholds.haiku * 100) {
|
|
131
|
+
return {
|
|
132
|
+
tier: 'haiku',
|
|
133
|
+
reason: `Haiku score (${haiku.percentage.toFixed(1)}%) >= threshold (${rubric.thresholds.haiku * 100}%)`,
|
|
134
|
+
savings: '93%'
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
if (sonnet && sonnet.percentage >= rubric.thresholds.sonnet * 100) {
|
|
139
|
+
return {
|
|
140
|
+
tier: 'sonnet',
|
|
141
|
+
reason: `Sonnet score (${sonnet.percentage.toFixed(1)}%) >= threshold (${rubric.thresholds.sonnet * 100}%)`,
|
|
142
|
+
savings: '80%'
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
return {
|
|
147
|
+
tier: 'opus',
|
|
148
|
+
reason: 'Neither Haiku nor Sonnet met quality threshold',
|
|
149
|
+
savings: '0%'
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Generate comparison report
|
|
154
|
+
function generateReport(taskName, results, recommendation) {
|
|
155
|
+
const report = {
|
|
156
|
+
task: taskName,
|
|
157
|
+
timestamp: new Date().toISOString(),
|
|
158
|
+
results: {
|
|
159
|
+
haiku: results.haiku ? {
|
|
160
|
+
score: results.haiku.total.toFixed(2),
|
|
161
|
+
percentage: results.haiku.percentage.toFixed(1) + '%',
|
|
162
|
+
qualified: results.haiku.percentage >= 90
|
|
163
|
+
} : null,
|
|
164
|
+
sonnet: results.sonnet ? {
|
|
165
|
+
score: results.sonnet.total.toFixed(2),
|
|
166
|
+
percentage: results.sonnet.percentage.toFixed(1) + '%',
|
|
167
|
+
qualified: results.sonnet.percentage >= 95
|
|
168
|
+
} : null,
|
|
169
|
+
opus: {
|
|
170
|
+
score: '10.00',
|
|
171
|
+
percentage: '100%',
|
|
172
|
+
qualified: true
|
|
173
|
+
}
|
|
174
|
+
},
|
|
175
|
+
recommendation: recommendation,
|
|
176
|
+
compensation_needed: recommendation.tier !== 'haiku' && results.haiku && results.haiku.percentage < 90
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
return report;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// Print comparison table
|
|
183
|
+
function printComparisonTable(reports) {
|
|
184
|
+
console.log('\n' + '='.repeat(80));
|
|
185
|
+
console.log('MODEL TIER QUALIFICATION REPORT');
|
|
186
|
+
console.log('='.repeat(80));
|
|
187
|
+
console.log('');
|
|
188
|
+
console.log('| Task | Haiku | Sonnet | Opus | Recommended | Savings |');
|
|
189
|
+
console.log('|------|-------|--------|------|-------------|---------|');
|
|
190
|
+
|
|
191
|
+
for (const report of reports) {
|
|
192
|
+
const haiku = report.results.haiku?.percentage != null ? `${report.results.haiku.percentage}%` : 'N/A';
|
|
193
|
+
const sonnet = report.results.sonnet?.percentage != null ? `${report.results.sonnet.percentage}%` : 'N/A';
|
|
194
|
+
const opus = report.results.opus?.percentage != null ? `${report.results.opus.percentage}%` : '100%';
|
|
195
|
+
const rec = report.recommendation.tier.toUpperCase();
|
|
196
|
+
const savings = report.recommendation.savings;
|
|
197
|
+
|
|
198
|
+
console.log(`| ${report.task.padEnd(20)} | ${haiku.padEnd(5)} | ${sonnet.padEnd(6)} | ${opus.padEnd(4)} | ${rec.padEnd(11)} | ${savings.padEnd(7)} |`);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
console.log('');
|
|
202
|
+
console.log('='.repeat(80));
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// List all test cases
|
|
206
|
+
function listTestCases() {
|
|
207
|
+
const entries = fs.readdirSync(TEST_CASES_DIR, { withFileTypes: true });
|
|
208
|
+
const cases = [];
|
|
209
|
+
|
|
210
|
+
for (const entry of entries) {
|
|
211
|
+
if (entry.isDirectory() && !entry.name.startsWith('_')) {
|
|
212
|
+
// New structure: test-cases/{task-name}/test-case.yaml
|
|
213
|
+
const testCasePath = path.join(TEST_CASES_DIR, entry.name, 'test-case.yaml');
|
|
214
|
+
if (fs.existsSync(testCasePath)) {
|
|
215
|
+
cases.push(entry.name);
|
|
216
|
+
}
|
|
217
|
+
} else if (entry.isFile() && entry.name.endsWith('.yaml') && !entry.name.startsWith('_')) {
|
|
218
|
+
// Old structure: test-cases/{task-name}.yaml
|
|
219
|
+
cases.push(entry.name.replace('.yaml', ''));
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
return cases;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// Update routing config with validated tier
|
|
227
|
+
function updateRoutingConfig(taskName, tier, reason) {
|
|
228
|
+
const routing = loadRouting();
|
|
229
|
+
if (!routing || !routing.tasks) return false;
|
|
230
|
+
|
|
231
|
+
const taskKey = `${taskName}.md`;
|
|
232
|
+
if (routing.tasks[taskKey]) {
|
|
233
|
+
routing.tasks[taskKey].tier = tier;
|
|
234
|
+
routing.tasks[taskKey].validated = true;
|
|
235
|
+
routing.tasks[taskKey].validation_reason = reason;
|
|
236
|
+
routing.tasks[taskKey].validated_date = new Date().toISOString().split('T')[0];
|
|
237
|
+
|
|
238
|
+
fs.writeFileSync(ROUTING_CONFIG, yaml.dump(routing, { lineWidth: 120 }));
|
|
239
|
+
return true;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
return false;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// CLI handler
|
|
246
|
+
const [,, command, ...args] = process.argv;
|
|
247
|
+
|
|
248
|
+
switch (command) {
|
|
249
|
+
case 'validate':
|
|
250
|
+
const taskName = args[0];
|
|
251
|
+
if (!taskName) {
|
|
252
|
+
console.log('Usage: node model-tier-validator.cjs validate <task-name>');
|
|
253
|
+
process.exit(1);
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
const rubric = loadRubric();
|
|
257
|
+
const testCase = loadTestCase(taskName);
|
|
258
|
+
|
|
259
|
+
if (!rubric || !testCase) {
|
|
260
|
+
console.error('Failed to load required files');
|
|
261
|
+
process.exit(1);
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
console.log(`\nValidating: ${taskName}`);
|
|
265
|
+
console.log('Test case loaded:', testCase.test_case.name);
|
|
266
|
+
console.log('');
|
|
267
|
+
|
|
268
|
+
// Show test case info
|
|
269
|
+
console.log('Input:', testCase.input.target);
|
|
270
|
+
console.log('Expected sections:', testCase.expected_output.sections.join(', '));
|
|
271
|
+
console.log('');
|
|
272
|
+
|
|
273
|
+
// Show current results if available
|
|
274
|
+
if (testCase.results) {
|
|
275
|
+
console.log('--- Current Results ---');
|
|
276
|
+
if (testCase.results.haiku && testCase.results.haiku.score) {
|
|
277
|
+
console.log(`Haiku: ${testCase.results.haiku.percentage}% - ${testCase.results.haiku.qualified ? '✅ Qualified' : '❌ Not qualified'}`);
|
|
278
|
+
}
|
|
279
|
+
if (testCase.results.sonnet && testCase.results.sonnet.score) {
|
|
280
|
+
console.log(`Sonnet: ${testCase.results.sonnet.percentage}% - ${testCase.results.sonnet.qualified ? '✅ Qualified' : '❌ Not qualified'}`);
|
|
281
|
+
}
|
|
282
|
+
console.log(`Opus: ${testCase.results.opus.percentage}% - ✅ Baseline`);
|
|
283
|
+
console.log('');
|
|
284
|
+
console.log(`Final tier: ${testCase.results.final_tier || 'Not determined'}`);
|
|
285
|
+
console.log(`Recommendation: ${testCase.results.recommendation || 'None'}`);
|
|
286
|
+
}
|
|
287
|
+
break;
|
|
288
|
+
|
|
289
|
+
case 'list':
|
|
290
|
+
const tasks = listTestCases();
|
|
291
|
+
console.log('\nAvailable test cases:');
|
|
292
|
+
tasks.forEach(t => console.log(` - ${t}`));
|
|
293
|
+
console.log(`\nTotal: ${tasks.length} test cases`);
|
|
294
|
+
break;
|
|
295
|
+
|
|
296
|
+
case 'report':
|
|
297
|
+
const allTasks = listTestCases();
|
|
298
|
+
const reports = [];
|
|
299
|
+
|
|
300
|
+
for (const task of allTasks) {
|
|
301
|
+
const tc = loadTestCase(task);
|
|
302
|
+
if (tc && tc.results && tc.results.final_tier) {
|
|
303
|
+
reports.push({
|
|
304
|
+
task,
|
|
305
|
+
results: tc.results,
|
|
306
|
+
recommendation: {
|
|
307
|
+
tier: tc.results.final_tier,
|
|
308
|
+
savings: tc.results.final_tier === 'haiku' ? '93%' :
|
|
309
|
+
tc.results.final_tier === 'sonnet' ? '80%' : '0%'
|
|
310
|
+
}
|
|
311
|
+
});
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
if (reports.length === 0) {
|
|
316
|
+
console.log('\nNo validated test cases yet.');
|
|
317
|
+
console.log('Run validation first: node model-tier-validator.cjs validate <task-name>');
|
|
318
|
+
} else {
|
|
319
|
+
printComparisonTable(reports);
|
|
320
|
+
}
|
|
321
|
+
break;
|
|
322
|
+
|
|
323
|
+
case 'update-routing':
|
|
324
|
+
const updateTask = args[0];
|
|
325
|
+
const updateTier = args[1];
|
|
326
|
+
const updateReason = args.slice(2).join(' ') || 'Validated via model-tier-validator';
|
|
327
|
+
|
|
328
|
+
if (!updateTask || !updateTier) {
|
|
329
|
+
console.log('Usage: node model-tier-validator.cjs update-routing <task-name> <tier> [reason]');
|
|
330
|
+
process.exit(1);
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
if (updateRoutingConfig(updateTask, updateTier, updateReason)) {
|
|
334
|
+
console.log(`✅ Updated ${updateTask}.md to tier: ${updateTier}`);
|
|
335
|
+
} else {
|
|
336
|
+
console.log(`❌ Failed to update routing config for ${updateTask}`);
|
|
337
|
+
}
|
|
338
|
+
break;
|
|
339
|
+
|
|
340
|
+
default:
|
|
341
|
+
console.log(`
|
|
342
|
+
Model Tier Validator - Squad Creator Token Economy
|
|
343
|
+
|
|
344
|
+
Commands:
|
|
345
|
+
validate <task-name>
|
|
346
|
+
Show validation status for a task
|
|
347
|
+
|
|
348
|
+
list
|
|
349
|
+
List all available test cases
|
|
350
|
+
|
|
351
|
+
report
|
|
352
|
+
Generate comparison report for all validated tasks
|
|
353
|
+
|
|
354
|
+
update-routing <task-name> <tier> [reason]
|
|
355
|
+
Update model-routing.yaml with validated tier
|
|
356
|
+
|
|
357
|
+
Examples:
|
|
358
|
+
node model-tier-validator.cjs validate pv-axioma-assessment
|
|
359
|
+
node model-tier-validator.cjs list
|
|
360
|
+
node model-tier-validator.cjs report
|
|
361
|
+
node model-tier-validator.cjs update-routing pv-axioma-assessment haiku "Scoring determinístico"
|
|
362
|
+
|
|
363
|
+
Note:
|
|
364
|
+
Actual model comparison requires running tasks through Claude API.
|
|
365
|
+
This tool manages test cases and tracks results.
|
|
366
|
+
Use the Skill tool to run actual tests:
|
|
367
|
+
/pv-axioma-assessment <input>
|
|
368
|
+
`);
|
|
369
|
+
}
|