@takuma-hirai/hirai-method 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/.stale-harness-state/last-check +0 -0
- package/.claude/CommonRules.md +121 -0
- package/.claude/agents/01-core-development/api-designer.md +237 -0
- package/.claude/agents/01-core-development/backend-developer.md +222 -0
- package/.claude/agents/01-core-development/design-bridge.md +127 -0
- package/.claude/agents/01-core-development/electron-pro.md +240 -0
- package/.claude/agents/01-core-development/frontend-developer.md +133 -0
- package/.claude/agents/01-core-development/fullstack-developer.md +235 -0
- package/.claude/agents/01-core-development/graphql-architect.md +238 -0
- package/.claude/agents/01-core-development/microservices-architect.md +239 -0
- package/.claude/agents/01-core-development/mobile-developer.md +283 -0
- package/.claude/agents/01-core-development/ui-designer.md +174 -0
- package/.claude/agents/01-core-development/websocket-engineer.md +150 -0
- package/.claude/agents/03-infrastructure/azure-infra-engineer.md +53 -0
- package/.claude/agents/03-infrastructure/cloud-architect.md +277 -0
- package/.claude/agents/03-infrastructure/database-administrator.md +287 -0
- package/.claude/agents/03-infrastructure/deployment-engineer.md +287 -0
- package/.claude/agents/03-infrastructure/devops-engineer.md +287 -0
- package/.claude/agents/03-infrastructure/devops-incident-responder.md +287 -0
- package/.claude/agents/03-infrastructure/docker-expert.md +278 -0
- package/.claude/agents/03-infrastructure/incident-responder.md +287 -0
- package/.claude/agents/03-infrastructure/kubernetes-specialist.md +287 -0
- package/.claude/agents/03-infrastructure/network-engineer.md +287 -0
- package/.claude/agents/03-infrastructure/platform-engineer.md +287 -0
- package/.claude/agents/03-infrastructure/security-engineer.md +277 -0
- package/.claude/agents/03-infrastructure/sre-engineer.md +287 -0
- package/.claude/agents/03-infrastructure/terraform-engineer.md +287 -0
- package/.claude/agents/03-infrastructure/terragrunt-expert.md +307 -0
- package/.claude/agents/03-infrastructure/windows-infra-admin.md +52 -0
- package/.claude/agents/04-quality-security/accessibility-tester.md +277 -0
- package/.claude/agents/04-quality-security/ad-security-reviewer.md +56 -0
- package/.claude/agents/04-quality-security/ai-writing-auditor.md +77 -0
- package/.claude/agents/04-quality-security/architect-reviewer.md +287 -0
- package/.claude/agents/04-quality-security/chaos-engineer.md +277 -0
- package/.claude/agents/04-quality-security/code-reviewer.md +287 -0
- package/.claude/agents/04-quality-security/compliance-auditor.md +277 -0
- package/.claude/agents/04-quality-security/debugger.md +287 -0
- package/.claude/agents/04-quality-security/error-detective.md +287 -0
- package/.claude/agents/04-quality-security/penetration-tester.md +287 -0
- package/.claude/agents/04-quality-security/performance-engineer.md +287 -0
- package/.claude/agents/04-quality-security/powershell-security-hardening.md +54 -0
- package/.claude/agents/04-quality-security/qa-expert.md +287 -0
- package/.claude/agents/04-quality-security/security-auditor.md +287 -0
- package/.claude/agents/04-quality-security/test-automator.md +287 -0
- package/.claude/agents/04-quality-security/ui-ux-tester.md +234 -0
- package/.claude/agents/06-developer-experience/build-engineer.md +286 -0
- package/.claude/agents/06-developer-experience/cli-developer.md +286 -0
- package/.claude/agents/06-developer-experience/dependency-manager.md +286 -0
- package/.claude/agents/06-developer-experience/documentation-engineer.md +276 -0
- package/.claude/agents/06-developer-experience/dx-optimizer.md +286 -0
- package/.claude/agents/06-developer-experience/git-workflow-manager.md +286 -0
- package/.claude/agents/06-developer-experience/legacy-modernizer.md +286 -0
- package/.claude/agents/06-developer-experience/mcp-developer.md +275 -0
- package/.claude/agents/06-developer-experience/powershell-module-architect.md +58 -0
- package/.claude/agents/06-developer-experience/powershell-ui-architect.md +135 -0
- package/.claude/agents/06-developer-experience/readme-generator.md +238 -0
- package/.claude/agents/06-developer-experience/refactoring-specialist.md +286 -0
- package/.claude/agents/06-developer-experience/slack-expert.md +232 -0
- package/.claude/agents/06-developer-experience/tooling-engineer.md +286 -0
- package/.claude/agents/09-meta-orchestration/agent-installer.md +97 -0
- package/.claude/agents/09-meta-orchestration/agent-organizer.md +287 -0
- package/.claude/agents/09-meta-orchestration/codebase-orchestrator.md +249 -0
- package/.claude/agents/09-meta-orchestration/context-manager.md +287 -0
- package/.claude/agents/09-meta-orchestration/error-coordinator.md +287 -0
- package/.claude/agents/09-meta-orchestration/it-ops-orchestrator.md +60 -0
- package/.claude/agents/09-meta-orchestration/knowledge-synthesizer.md +287 -0
- package/.claude/agents/09-meta-orchestration/multi-agent-coordinator.md +287 -0
- package/.claude/agents/09-meta-orchestration/performance-monitor.md +287 -0
- package/.claude/agents/09-meta-orchestration/task-distributor.md +287 -0
- package/.claude/agents/09-meta-orchestration/workflow-orchestrator.md +287 -0
- package/.claude/agents/10-research-analysis/competitive-analyst.md +287 -0
- package/.claude/agents/10-research-analysis/data-researcher.md +287 -0
- package/.claude/agents/10-research-analysis/market-researcher.md +287 -0
- package/.claude/agents/10-research-analysis/project-idea-validator.md +269 -0
- package/.claude/agents/10-research-analysis/research-analyst.md +287 -0
- package/.claude/agents/10-research-analysis/scientific-literature-researcher.md +151 -0
- package/.claude/agents/10-research-analysis/search-specialist.md +287 -0
- package/.claude/agents/10-research-analysis/trend-analyst.md +287 -0
- package/.claude/archive/README.md +47 -0
- package/.claude/archive/agents/02-language-specialists/angular-architect.md +287 -0
- package/.claude/archive/agents/02-language-specialists/cpp-pro.md +277 -0
- package/.claude/archive/agents/02-language-specialists/csharp-developer.md +287 -0
- package/.claude/archive/agents/02-language-specialists/django-developer.md +287 -0
- package/.claude/archive/agents/02-language-specialists/dotnet-core-expert.md +287 -0
- package/.claude/archive/agents/02-language-specialists/dotnet-framework-4.8-expert.md +306 -0
- package/.claude/archive/agents/02-language-specialists/elixir-expert.md +311 -0
- package/.claude/archive/agents/02-language-specialists/expo-react-native-expert.md +268 -0
- package/.claude/archive/agents/02-language-specialists/fastapi-developer.md +287 -0
- package/.claude/archive/agents/02-language-specialists/flutter-expert.md +287 -0
- package/.claude/archive/agents/02-language-specialists/golang-pro.md +277 -0
- package/.claude/archive/agents/02-language-specialists/java-architect.md +287 -0
- package/.claude/archive/agents/02-language-specialists/javascript-pro.md +277 -0
- package/.claude/archive/agents/02-language-specialists/kotlin-specialist.md +287 -0
- package/.claude/archive/agents/02-language-specialists/laravel-specialist.md +287 -0
- package/.claude/archive/agents/02-language-specialists/nextjs-developer.md +287 -0
- package/.claude/archive/agents/02-language-specialists/node-specialist.md +124 -0
- package/.claude/archive/agents/02-language-specialists/php-pro.md +287 -0
- package/.claude/archive/agents/02-language-specialists/powershell-5.1-expert.md +59 -0
- package/.claude/archive/agents/02-language-specialists/powershell-7-expert.md +57 -0
- package/.claude/archive/agents/02-language-specialists/python-pro.md +277 -0
- package/.claude/archive/agents/02-language-specialists/rails-expert.md +358 -0
- package/.claude/archive/agents/02-language-specialists/react-specialist.md +287 -0
- package/.claude/archive/agents/02-language-specialists/rust-engineer.md +287 -0
- package/.claude/archive/agents/02-language-specialists/spring-boot-engineer.md +287 -0
- package/.claude/archive/agents/02-language-specialists/sql-pro.md +287 -0
- package/.claude/archive/agents/02-language-specialists/swift-expert.md +287 -0
- package/.claude/archive/agents/02-language-specialists/symfony-specialist.md +354 -0
- package/.claude/archive/agents/02-language-specialists/typescript-pro.md +277 -0
- package/.claude/archive/agents/02-language-specialists/vue-expert.md +287 -0
- package/.claude/archive/agents/05-data-ai/ai-engineer.md +287 -0
- package/.claude/archive/agents/05-data-ai/data-analyst.md +277 -0
- package/.claude/archive/agents/05-data-ai/data-engineer.md +287 -0
- package/.claude/archive/agents/05-data-ai/data-scientist.md +287 -0
- package/.claude/archive/agents/05-data-ai/database-optimizer.md +287 -0
- package/.claude/archive/agents/05-data-ai/llm-architect.md +287 -0
- package/.claude/archive/agents/05-data-ai/machine-learning-engineer.md +277 -0
- package/.claude/archive/agents/05-data-ai/ml-engineer.md +287 -0
- package/.claude/archive/agents/05-data-ai/mlops-engineer.md +287 -0
- package/.claude/archive/agents/05-data-ai/nlp-engineer.md +287 -0
- package/.claude/archive/agents/05-data-ai/postgres-pro.md +287 -0
- package/.claude/archive/agents/05-data-ai/prompt-engineer.md +287 -0
- package/.claude/archive/agents/05-data-ai/reinforcement-learning-engineer.md +277 -0
- package/.claude/archive/agents/07-specialized-domains/api-documenter.md +277 -0
- package/.claude/archive/agents/07-specialized-domains/blockchain-developer.md +287 -0
- package/.claude/archive/agents/07-specialized-domains/embedded-systems.md +287 -0
- package/.claude/archive/agents/07-specialized-domains/fintech-engineer.md +287 -0
- package/.claude/archive/agents/07-specialized-domains/game-developer.md +287 -0
- package/.claude/archive/agents/07-specialized-domains/healthcare-admin.md +199 -0
- package/.claude/archive/agents/07-specialized-domains/iot-engineer.md +287 -0
- package/.claude/archive/agents/07-specialized-domains/m365-admin.md +48 -0
- package/.claude/archive/agents/07-specialized-domains/mobile-app-developer.md +287 -0
- package/.claude/archive/agents/07-specialized-domains/payment-integration.md +287 -0
- package/.claude/archive/agents/07-specialized-domains/quant-analyst.md +287 -0
- package/.claude/archive/agents/07-specialized-domains/risk-manager.md +287 -0
- package/.claude/archive/agents/07-specialized-domains/seo-specialist.md +184 -0
- package/.claude/archive/agents/08-business-product/business-analyst.md +287 -0
- package/.claude/archive/agents/08-business-product/content-marketer.md +287 -0
- package/.claude/archive/agents/08-business-product/customer-success-manager.md +287 -0
- package/.claude/archive/agents/08-business-product/legal-advisor.md +287 -0
- package/.claude/archive/agents/08-business-product/license-engineer.md +295 -0
- package/.claude/archive/agents/08-business-product/product-manager.md +287 -0
- package/.claude/archive/agents/08-business-product/project-manager.md +287 -0
- package/.claude/archive/agents/08-business-product/sales-engineer.md +287 -0
- package/.claude/archive/agents/08-business-product/scrum-master.md +287 -0
- package/.claude/archive/agents/08-business-product/technical-writer.md +287 -0
- package/.claude/archive/agents/08-business-product/ux-researcher.md +287 -0
- package/.claude/archive/agents/08-business-product/wordpress-master.md +316 -0
- package/.claude/archive/skills/competitive-ads-extractor/SKILL.md +293 -0
- package/.claude/archive/skills/developer-growth-analysis/SKILL.md +322 -0
- package/.claude/archive/skills/document-docx/LICENSE.txt +30 -0
- package/.claude/archive/skills/document-docx/SKILL.md +197 -0
- package/.claude/archive/skills/document-docx/docx-js.md +350 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/mce/mc.xsd +75 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/.claude/archive/skills/document-docx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/.claude/archive/skills/document-docx/ooxml/scripts/pack.py +159 -0
- package/.claude/archive/skills/document-docx/ooxml/scripts/unpack.py +29 -0
- package/.claude/archive/skills/document-docx/ooxml/scripts/validate.py +69 -0
- package/.claude/archive/skills/document-docx/ooxml/scripts/validation/__init__.py +15 -0
- package/.claude/archive/skills/document-docx/ooxml/scripts/validation/base.py +951 -0
- package/.claude/archive/skills/document-docx/ooxml/scripts/validation/docx.py +274 -0
- package/.claude/archive/skills/document-docx/ooxml/scripts/validation/pptx.py +315 -0
- package/.claude/archive/skills/document-docx/ooxml/scripts/validation/redlining.py +279 -0
- package/.claude/archive/skills/document-docx/ooxml.md +610 -0
- package/.claude/archive/skills/document-docx/scripts/__init__.py +1 -0
- package/.claude/archive/skills/document-docx/scripts/document.py +1276 -0
- package/.claude/archive/skills/document-docx/scripts/templates/comments.xml +3 -0
- package/.claude/archive/skills/document-docx/scripts/templates/commentsExtended.xml +3 -0
- package/.claude/archive/skills/document-docx/scripts/templates/commentsExtensible.xml +3 -0
- package/.claude/archive/skills/document-docx/scripts/templates/commentsIds.xml +3 -0
- package/.claude/archive/skills/document-docx/scripts/templates/people.xml +3 -0
- package/.claude/archive/skills/document-docx/scripts/utilities.py +374 -0
- package/.claude/archive/skills/document-pdf/LICENSE.txt +30 -0
- package/.claude/archive/skills/document-pdf/SKILL.md +294 -0
- package/.claude/archive/skills/document-pdf/forms.md +205 -0
- package/.claude/archive/skills/document-pdf/reference.md +612 -0
- package/.claude/archive/skills/document-pdf/scripts/check_bounding_boxes.py +70 -0
- package/.claude/archive/skills/document-pdf/scripts/check_bounding_boxes_test.py +226 -0
- package/.claude/archive/skills/document-pdf/scripts/check_fillable_fields.py +12 -0
- package/.claude/archive/skills/document-pdf/scripts/convert_pdf_to_images.py +35 -0
- package/.claude/archive/skills/document-pdf/scripts/create_validation_image.py +41 -0
- package/.claude/archive/skills/document-pdf/scripts/extract_form_field_info.py +152 -0
- package/.claude/archive/skills/document-pdf/scripts/fill_fillable_fields.py +114 -0
- package/.claude/archive/skills/document-pdf/scripts/fill_pdf_form_with_annotations.py +108 -0
- package/.claude/archive/skills/document-pptx/LICENSE.txt +30 -0
- package/.claude/archive/skills/document-pptx/SKILL.md +484 -0
- package/.claude/archive/skills/document-pptx/html2pptx.md +625 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/mce/mc.xsd +75 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/.claude/archive/skills/document-pptx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/.claude/archive/skills/document-pptx/ooxml/scripts/pack.py +159 -0
- package/.claude/archive/skills/document-pptx/ooxml/scripts/unpack.py +29 -0
- package/.claude/archive/skills/document-pptx/ooxml/scripts/validate.py +69 -0
- package/.claude/archive/skills/document-pptx/ooxml/scripts/validation/__init__.py +15 -0
- package/.claude/archive/skills/document-pptx/ooxml/scripts/validation/base.py +951 -0
- package/.claude/archive/skills/document-pptx/ooxml/scripts/validation/docx.py +274 -0
- package/.claude/archive/skills/document-pptx/ooxml/scripts/validation/pptx.py +315 -0
- package/.claude/archive/skills/document-pptx/ooxml/scripts/validation/redlining.py +279 -0
- package/.claude/archive/skills/document-pptx/ooxml.md +427 -0
- package/.claude/archive/skills/document-pptx/scripts/html2pptx.js +979 -0
- package/.claude/archive/skills/document-pptx/scripts/inventory.py +1020 -0
- package/.claude/archive/skills/document-pptx/scripts/rearrange.py +231 -0
- package/.claude/archive/skills/document-pptx/scripts/replace.py +385 -0
- package/.claude/archive/skills/document-pptx/scripts/thumbnail.py +450 -0
- package/.claude/archive/skills/document-xlsx/LICENSE.txt +30 -0
- package/.claude/archive/skills/document-xlsx/SKILL.md +289 -0
- package/.claude/archive/skills/document-xlsx/recalc.py +178 -0
- package/.claude/archive/skills/image-enhancer/SKILL.md +99 -0
- package/.claude/archive/skills/meeting-insights-analyzer/SKILL.md +327 -0
- package/.claude/archive/skills/slack-gif-creator/LICENSE.txt +202 -0
- package/.claude/archive/skills/slack-gif-creator/SKILL.md +646 -0
- package/.claude/archive/skills/slack-gif-creator/core/color_palettes.py +302 -0
- package/.claude/archive/skills/slack-gif-creator/core/easing.py +230 -0
- package/.claude/archive/skills/slack-gif-creator/core/frame_composer.py +469 -0
- package/.claude/archive/skills/slack-gif-creator/core/gif_builder.py +246 -0
- package/.claude/archive/skills/slack-gif-creator/core/typography.py +357 -0
- package/.claude/archive/skills/slack-gif-creator/core/validators.py +264 -0
- package/.claude/archive/skills/slack-gif-creator/core/visual_effects.py +494 -0
- package/.claude/archive/skills/slack-gif-creator/requirements.txt +4 -0
- package/.claude/archive/skills/slack-gif-creator/templates/bounce.py +106 -0
- package/.claude/archive/skills/slack-gif-creator/templates/explode.py +331 -0
- package/.claude/archive/skills/slack-gif-creator/templates/fade.py +329 -0
- package/.claude/archive/skills/slack-gif-creator/templates/flip.py +291 -0
- package/.claude/archive/skills/slack-gif-creator/templates/kaleidoscope.py +211 -0
- package/.claude/archive/skills/slack-gif-creator/templates/morph.py +329 -0
- package/.claude/archive/skills/slack-gif-creator/templates/move.py +293 -0
- package/.claude/archive/skills/slack-gif-creator/templates/pulse.py +268 -0
- package/.claude/archive/skills/slack-gif-creator/templates/shake.py +127 -0
- package/.claude/archive/skills/slack-gif-creator/templates/slide.py +291 -0
- package/.claude/archive/skills/slack-gif-creator/templates/spin.py +269 -0
- package/.claude/archive/skills/slack-gif-creator/templates/wiggle.py +300 -0
- package/.claude/archive/skills/slack-gif-creator/templates/zoom.py +312 -0
- package/.claude/archive/skills/twitter-algorithm-optimizer/SKILL.md +327 -0
- package/.claude/archive/skills/video-downloader/SKILL.md +99 -0
- package/.claude/archive/skills/video-downloader/scripts/download_video.py +145 -0
- package/.claude/bash-whitelist-requests/2026-05-28-grep-find-rg.md +68 -0
- package/.claude/bash-whitelist-requests/2026-06-01-readonly-filters.md +76 -0
- package/.claude/bash-whitelist.txt +124 -0
- package/.claude/commands/agent-introspect.md +89 -0
- package/.claude/commands/apply-rules.md +363 -0
- package/.claude/commands/approve-design.md +219 -0
- package/.claude/commands/approve-org-money.md +267 -0
- package/.claude/commands/build.md +234 -0
- package/.claude/commands/commit.md +97 -0
- package/.claude/commands/context-fetch.md +113 -0
- package/.claude/commands/create-tool.md +496 -0
- package/.claude/commands/design-review.md +138 -0
- package/.claude/commands/design.md +807 -0
- package/.claude/commands/discharge-byproduct.md +208 -0
- package/.claude/commands/doc-review.md +165 -0
- package/.claude/commands/document-pair.md +76 -0
- package/.claude/commands/error-triage.md +435 -0
- package/.claude/commands/eval.md +70 -0
- package/.claude/commands/evolve.md +49 -0
- package/.claude/commands/finish-task.md +105 -0
- package/.claude/commands/gan-build.md +91 -0
- package/.claude/commands/gan-design.md +82 -0
- package/.claude/commands/gate-bypass.md +77 -0
- package/.claude/commands/gate-clear.md +45 -0
- package/.claude/commands/gate-status.md +46 -0
- package/.claude/commands/harness-audit.md +151 -0
- package/.claude/commands/hearing.md +138 -0
- package/.claude/commands/impact-check.md +486 -0
- package/.claude/commands/init-tasks.md +49 -0
- package/.claude/commands/instinct-export.md +47 -0
- package/.claude/commands/instinct-import.md +41 -0
- package/.claude/commands/instinct-status.md +43 -0
- package/.claude/commands/investigate.md +547 -0
- package/.claude/commands/learn.md +55 -0
- package/.claude/commands/lint-rules.md +400 -0
- package/.claude/commands/mode.md +58 -0
- package/.claude/commands/modify-feature.md +209 -0
- package/.claude/commands/module-review.md +149 -0
- package/.claude/commands/move-section.md +67 -0
- package/.claude/commands/new-draft.md +67 -0
- package/.claude/commands/new-feature.md +286 -0
- package/.claude/commands/new-task.md +156 -0
- package/.claude/commands/notification.md +107 -0
- package/.claude/commands/pm-start.md +119 -0
- package/.claude/commands/projects.md +32 -0
- package/.claude/commands/promote.md +43 -0
- package/.claude/commands/rasis-report.md +1323 -0
- package/.claude/commands/release-note.md +130 -0
- package/.claude/commands/reply-watch.md +149 -0
- package/.claude/commands/requirement.md +352 -0
- package/.claude/commands/resume-state.md +187 -0
- package/.claude/commands/reviewpr.md +118 -0
- package/.claude/commands/save-state.md +100 -0
- package/.claude/commands/sentry-pr.md +157 -0
- package/.claude/commands/start-task.md +87 -0
- package/.claude/commands/system-review.md +147 -0
- package/.claude/commands/task-bypass.md +70 -0
- package/.claude/commands/task-estimate.md +100 -0
- package/.claude/commands/template-apply.md +89 -0
- package/.claude/commands/test-design.md +116 -0
- package/.claude/commands/transfer-mismatch.md +317 -0
- package/.claude/commands/verify.md +51 -0
- package/.claude/evals/grader-loop-mode-autonomy.sh +165 -0
- package/.claude/evals/grader-system-reminder-attention.sh +99 -0
- package/.claude/evals/loop-mode-autonomy.md +121 -0
- package/.claude/evals/loop-mode-autonomy.results.template.md +133 -0
- package/.claude/evals/system-reminder-attention.md +123 -0
- package/.claude/evals/system-reminder-attention.results.template.md +93 -0
- package/.claude/evals/system-reminder-attention.runner.md +353 -0
- package/.claude/harness-config.local.yml +48 -0
- package/.claude/harness-config.yml +534 -0
- package/.claude/hooks/agent-marker-clear.sh +43 -0
- package/.claude/hooks/agent-marker-set.sh +40 -0
- package/.claude/hooks/agent-router-suggest.sh +123 -0
- package/.claude/hooks/autonomous-action-guard.sh +242 -0
- package/.claude/hooks/byproduct-discharge-guard.sh +128 -0
- package/.claude/hooks/check-md-mermaid.sh +144 -0
- package/.claude/hooks/check-required-env.sh +95 -0
- package/.claude/hooks/check-serena-mcp.sh +123 -0
- package/.claude/hooks/confidence-gate.sh +139 -0
- package/.claude/hooks/context-budget.sh +233 -0
- package/.claude/hooks/delegation-guard.sh +99 -0
- package/.claude/hooks/dispatcher-manifest.tsv +38 -0
- package/.claude/hooks/draft-flow-guard.sh +304 -0
- package/.claude/hooks/failure-loop-detect.sh +139 -0
- package/.claude/hooks/gateguard.sh +209 -0
- package/.claude/hooks/improvement-proposal.sh +112 -0
- package/.claude/hooks/init-tasks-on-start.sh +34 -0
- package/.claude/hooks/lib/bypass-logger.sh +82 -0
- package/.claude/hooks/lib/confidence-gate/bypass.sh +48 -0
- package/.claude/hooks/lib/confidence-gate/extract.sh +99 -0
- package/.claude/hooks/lib/confidence-gate/major-agent-filter.sh +59 -0
- package/.claude/hooks/lib/confidence-gate/messages.sh +53 -0
- package/.claude/hooks/lib/config-loader.sh +784 -0
- package/.claude/hooks/lib/delegation-guard/bash-whitelist.sh +323 -0
- package/.claude/hooks/lib/delegation-guard/git-deny.sh +188 -0
- package/.claude/hooks/lib/delegation-guard/protected-paths.sh +105 -0
- package/.claude/hooks/lib/delegation-guard/subagent-detect.sh +40 -0
- package/.claude/hooks/lib/dispatcher-core.sh +454 -0
- package/.claude/hooks/lib/improvement-proposal/aggregate.py +466 -0
- package/.claude/hooks/lib/improvement-proposal/cache.sh +78 -0
- package/.claude/hooks/lib/mode-loader.sh +80 -0
- package/.claude/hooks/lib/next-actions-parser.sh +153 -0
- package/.claude/hooks/lib/project-root.sh +60 -0
- package/.claude/hooks/list-md-plan-first-reminder.sh +143 -0
- package/.claude/hooks/loop-auto-progress-reminder.sh +108 -0
- package/.claude/hooks/loop-confirmation-detector.sh +241 -0
- package/.claude/hooks/mode-asana-prompt.sh +61 -0
- package/.claude/hooks/mode-enforce.sh +57 -0
- package/.claude/hooks/mode-session-start.sh +93 -0
- package/.claude/hooks/next-actions-surface.sh +136 -0
- package/.claude/hooks/notification-dispatcher.sh +9 -0
- package/.claude/hooks/notify.sh +27 -0
- package/.claude/hooks/parallel-subagent-reminder.sh +469 -0
- package/.claude/hooks/post-tool-use-dispatcher.sh +9 -0
- package/.claude/hooks/pre-tool-use-dispatcher.sh +9 -0
- package/.claude/hooks/reviewer-count-guard.sh +313 -0
- package/.claude/hooks/session-help-surface.sh +192 -0
- package/.claude/hooks/session-start-dispatcher.sh +9 -0
- package/.claude/hooks/session-start-wrapper.sh +156 -0
- package/.claude/hooks/stale-harness-detect.sh +422 -0
- package/.claude/hooks/stop-dispatcher.sh +9 -0
- package/.claude/hooks/stop.sh +25 -0
- package/.claude/hooks/subagent-stop-dispatcher.sh +9 -0
- package/.claude/hooks/task-rule-guard.sh +317 -0
- package/.claude/hooks/tests/run-tests.sh +23 -0
- package/.claude/hooks/tests/test-agent-marker-warn.sh +86 -0
- package/.claude/hooks/tests/test-check-required-env.sh +138 -0
- package/.claude/hooks/tests/test-confidence-gate.sh +170 -0
- package/.claude/hooks/tests/test-config-env-override.sh +220 -0
- package/.claude/hooks/tests/test-gate-disable.sh +118 -0
- package/.claude/hooks/tests/test-improvement-proposal.sh +284 -0
- package/.claude/hooks/tool-call-slip-detector.sh +188 -0
- package/.claude/hooks/user-prompt-submit-dispatcher.sh +9 -0
- package/.claude/hooks/why-x5-reminder.sh +45 -0
- package/.claude/hooks/why-x5-violation-detect.sh +152 -0
- package/.claude/hooks/workflow-guard.sh +263 -0
- package/.claude/mode.yml +28 -0
- package/.claude/project-rules/development-process.md +8 -0
- package/.claude/project-rules/git-workflow.md +8 -0
- package/.claude/project-rules/modes.md +8 -0
- package/.claude/project-rules/self-improvement.md +8 -0
- package/.claude/project-rules/task-management.md +8 -0
- package/.claude/project-rules/why-x5-output.md +8 -0
- package/.claude/project-rules/workflow.md +8 -0
- package/.claude/rules/development-process.md +293 -0
- package/.claude/rules/git-workflow.md +71 -0
- package/.claude/rules/modes.md +189 -0
- package/.claude/rules/self-improvement.md +76 -0
- package/.claude/rules/task-management.md +261 -0
- package/.claude/rules/why-x5-output.md +97 -0
- package/.claude/rules/workflow.md +157 -0
- package/.claude/rules-details/README.md +67 -0
- package/.claude/rules-details/development-process/confidence-gate.md +22 -0
- package/.claude/rules-details/development-process/cross-repo-write.md +35 -0
- package/.claude/rules-details/development-process/delegation-requirements.md +158 -0
- package/.claude/rules-details/development-process/harness-sync.md +21 -0
- package/.claude/rules-details/development-process/origin.md +13 -0
- package/.claude/rules-details/development-process/parallelization-origin.md +22 -0
- package/.claude/rules-details/development-process/research-reuse.md +22 -0
- package/.claude/rules-details/development-process/staging-strategy.md +47 -0
- package/.claude/rules-details/modes/artifacts.md +34 -0
- package/.claude/rules-details/modes/compliance-items.md +120 -0
- package/.claude/rules-details/modes/five-layer-enforcement.md +46 -0
- package/.claude/rules-details/modes/mode-hooks.md +51 -0
- package/.claude/rules-details/modes/origin.md +17 -0
- package/.claude/rules-details/self-improvement/l4-mechanics.md +36 -0
- package/.claude/rules-details/self-improvement/origin.md +8 -0
- package/.claude/rules-details/self-improvement/related-skills.md +35 -0
- package/.claude/rules-details/self-improvement/when-to-use-layers.md +39 -0
- package/.claude/rules-details/task-management/hook-enforcement.md +25 -0
- package/.claude/rules-details/task-management/mandatory-reading.md +20 -0
- package/.claude/rules-details/task-management/origin.md +12 -0
- package/.claude/rules-details/task-management/parking-lot.md +26 -0
- package/.claude/rules-details/task-management/plan-first.md +44 -0
- package/.claude/rules-details/task-management/six-articles.md +68 -0
- package/.claude/rules-details/task-management/task-migration.md +16 -0
- package/.claude/rules-details/task-management/ui-detection.md +11 -0
- package/.claude/rules-details/why-x5-output/examples.md +41 -0
- package/.claude/rules-details/why-x5-output/feedback-memory.md +14 -0
- package/.claude/rules-details/why-x5-output/origin.md +10 -0
- package/.claude/rules-details/why-x5-output/v1-v10-history.md +19 -0
- package/.claude/rules-details/workflow/10-stage.md +43 -0
- package/.claude/rules-details/workflow/14-stage.md +52 -0
- package/.claude/rules-details/workflow/byproduct-discharge.md +39 -0
- package/.claude/rules-details/workflow/draft-flow-guard.md +31 -0
- package/.claude/rules-details/workflow/fan-out.md +70 -0
- package/.claude/rules-details/workflow/mece-20.md +36 -0
- package/.claude/rules-details/workflow/origin.md +14 -0
- package/.claude/rules-details/workflow/refactoring.md +48 -0
- package/.claude/rules-details/workflow/related-skills.md +22 -0
- package/.claude/rules-details/workflow/reviewer-prompt.md +100 -0
- package/.claude/rules-details/workflow/session-persistence.md +46 -0
- package/.claude/rules-details/workflow/workflow-guard.md +36 -0
- package/.claude/scripts/__pycache__/harness-audit.cpython-313.pyc +0 -0
- package/.claude/scripts/agent-stocktake.py +421 -0
- package/.claude/scripts/check-md-mermaid.mjs +138 -0
- package/.claude/scripts/generate-settings.sh +0 -0
- package/.claude/scripts/harness-audit.py +1547 -0
- package/.claude/scripts/hc-config.sh +2265 -0
- package/.claude/scripts/init-tasks.sh +117 -0
- package/.claude/scripts/lib/enforcement-matrix-parse.sh +81 -0
- package/.claude/scripts/lib/hc-config-metadata.sh +190 -0
- package/.claude/scripts/lib/hc-config-web-server.js +1528 -0
- package/.claude/scripts/lib/hc-config-web-ui/app.js +1054 -0
- package/.claude/scripts/lib/hc-config-web-ui/index.html +130 -0
- package/.claude/scripts/lib/hc-config-web-ui/style.css +522 -0
- package/.claude/scripts/new-task-helper.sh +432 -0
- package/.claude/scripts/observe-repair.sh +437 -0
- package/.claude/scripts/observe-rotate.sh +311 -0
- package/.claude/scripts/statusline.sh +239 -0
- package/.claude/settings.generated.preview.json +211 -0
- package/.claude/settings.json +215 -0
- package/.claude/settings.local.example.json +20 -0
- package/.claude/settings.local.json +36 -0
- package/.claude/skills/agent-introspection-debugging/SKILL.md +123 -0
- package/.claude/skills/agent-router/README.md +137 -0
- package/.claude/skills/agent-router/SKILL.md +74 -0
- package/.claude/skills/agent-router/dispatch-table.yml +352 -0
- package/.claude/skills/agent-router/router.py +1086 -0
- package/.claude/skills/agent-router/samples/representative_prompts.txt +24 -0
- package/.claude/skills/agent-router/tests/__init__.py +0 -0
- package/.claude/skills/agent-router/tests/test_router.py +762 -0
- package/.claude/skills/artifacts-builder/LICENSE.txt +202 -0
- package/.claude/skills/artifacts-builder/SKILL.md +74 -0
- package/.claude/skills/artifacts-builder/scripts/bundle-artifact.sh +54 -0
- package/.claude/skills/artifacts-builder/scripts/init-artifact.sh +322 -0
- package/.claude/skills/artifacts-builder/scripts/shadcn-components.tar.gz +0 -0
- package/.claude/skills/brand-guidelines/LICENSE.txt +202 -0
- package/.claude/skills/brand-guidelines/SKILL.md +73 -0
- package/.claude/skills/canvas-design/LICENSE.txt +202 -0
- package/.claude/skills/canvas-design/SKILL.md +130 -0
- package/.claude/skills/canvas-design/canvas-fonts/ArsenalSC-OFL.txt +93 -0
- package/.claude/skills/canvas-design/canvas-fonts/ArsenalSC-Regular.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/BigShoulders-Bold.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/BigShoulders-OFL.txt +93 -0
- package/.claude/skills/canvas-design/canvas-fonts/BigShoulders-Regular.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/Boldonse-OFL.txt +93 -0
- package/.claude/skills/canvas-design/canvas-fonts/Boldonse-Regular.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/BricolageGrotesque-Bold.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/BricolageGrotesque-OFL.txt +93 -0
- package/.claude/skills/canvas-design/canvas-fonts/BricolageGrotesque-Regular.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/CrimsonPro-Bold.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/CrimsonPro-Italic.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/CrimsonPro-OFL.txt +93 -0
- package/.claude/skills/canvas-design/canvas-fonts/CrimsonPro-Regular.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/DMMono-OFL.txt +93 -0
- package/.claude/skills/canvas-design/canvas-fonts/DMMono-Regular.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/EricaOne-OFL.txt +94 -0
- package/.claude/skills/canvas-design/canvas-fonts/EricaOne-Regular.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/GeistMono-Bold.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/GeistMono-OFL.txt +93 -0
- package/.claude/skills/canvas-design/canvas-fonts/GeistMono-Regular.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/Gloock-OFL.txt +93 -0
- package/.claude/skills/canvas-design/canvas-fonts/Gloock-Regular.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/IBMPlexMono-Bold.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/IBMPlexMono-OFL.txt +93 -0
- package/.claude/skills/canvas-design/canvas-fonts/IBMPlexMono-Regular.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/IBMPlexSerif-Bold.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/IBMPlexSerif-BoldItalic.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/IBMPlexSerif-Italic.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/IBMPlexSerif-Regular.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/InstrumentSans-Bold.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/InstrumentSans-BoldItalic.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/InstrumentSans-Italic.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/InstrumentSans-OFL.txt +93 -0
- package/.claude/skills/canvas-design/canvas-fonts/InstrumentSans-Regular.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/InstrumentSerif-Italic.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/InstrumentSerif-Regular.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/Italiana-OFL.txt +93 -0
- package/.claude/skills/canvas-design/canvas-fonts/Italiana-Regular.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/JetBrainsMono-Bold.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/JetBrainsMono-OFL.txt +93 -0
- package/.claude/skills/canvas-design/canvas-fonts/JetBrainsMono-Regular.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/Jura-Light.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/Jura-Medium.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/Jura-OFL.txt +93 -0
- package/.claude/skills/canvas-design/canvas-fonts/LibreBaskerville-OFL.txt +93 -0
- package/.claude/skills/canvas-design/canvas-fonts/LibreBaskerville-Regular.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/Lora-Bold.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/Lora-BoldItalic.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/Lora-Italic.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/Lora-OFL.txt +93 -0
- package/.claude/skills/canvas-design/canvas-fonts/Lora-Regular.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/NationalPark-Bold.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/NationalPark-OFL.txt +93 -0
- package/.claude/skills/canvas-design/canvas-fonts/NationalPark-Regular.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/NothingYouCouldDo-OFL.txt +93 -0
- package/.claude/skills/canvas-design/canvas-fonts/NothingYouCouldDo-Regular.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/Outfit-Bold.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/Outfit-OFL.txt +93 -0
- package/.claude/skills/canvas-design/canvas-fonts/Outfit-Regular.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/PixelifySans-Medium.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/PixelifySans-OFL.txt +93 -0
- package/.claude/skills/canvas-design/canvas-fonts/PoiretOne-OFL.txt +93 -0
- package/.claude/skills/canvas-design/canvas-fonts/PoiretOne-Regular.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/RedHatMono-Bold.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/RedHatMono-OFL.txt +93 -0
- package/.claude/skills/canvas-design/canvas-fonts/RedHatMono-Regular.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/Silkscreen-OFL.txt +93 -0
- package/.claude/skills/canvas-design/canvas-fonts/Silkscreen-Regular.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/SmoochSans-Medium.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/SmoochSans-OFL.txt +93 -0
- package/.claude/skills/canvas-design/canvas-fonts/Tektur-Medium.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/Tektur-OFL.txt +93 -0
- package/.claude/skills/canvas-design/canvas-fonts/Tektur-Regular.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/WorkSans-Bold.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/WorkSans-BoldItalic.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/WorkSans-Italic.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/WorkSans-OFL.txt +93 -0
- package/.claude/skills/canvas-design/canvas-fonts/WorkSans-Regular.ttf +0 -0
- package/.claude/skills/canvas-design/canvas-fonts/YoungSerif-OFL.txt +93 -0
- package/.claude/skills/canvas-design/canvas-fonts/YoungSerif-Regular.ttf +0 -0
- package/.claude/skills/changelog-generator/SKILL.md +104 -0
- package/.claude/skills/check-md-mermaid/SKILL.md +62 -0
- package/.claude/skills/connect/SKILL.md +156 -0
- package/.claude/skills/connect-apps/SKILL.md +80 -0
- package/.claude/skills/content-research-writer/SKILL.md +538 -0
- package/.claude/skills/continuous-agent-loop/SKILL.md +187 -0
- package/.claude/skills/continuous-learning-v2/SKILL.md +238 -0
- package/.claude/skills/continuous-learning-v2/config.json +35 -0
- package/.claude/skills/continuous-learning-v2/hooks/observe.sh +333 -0
- package/.claude/skills/continuous-learning-v2/instinct-cli.py +406 -0
- package/.claude/skills/domain-name-brainstormer/SKILL.md +212 -0
- package/.claude/skills/eval-harness/SKILL.md +100 -0
- package/.claude/skills/eval-harness/swe-bench/README.md +80 -0
- package/.claude/skills/eval-harness/swe-bench/config.yml +29 -0
- package/.claude/skills/eval-harness/swe-bench/docker/Dockerfile +25 -0
- package/.claude/skills/eval-harness/swe-bench/docker/docker-compose.yml +18 -0
- package/.claude/skills/eval-harness/swe-bench/results/dry-run-2026-05-04.json +137 -0
- package/.claude/skills/eval-harness/swe-bench/results/dry-run-comparison-2026-05-04.md +112 -0
- package/.claude/skills/eval-harness/swe-bench/results/dry-run-improved-2026-05-04.json +165 -0
- package/.claude/skills/eval-harness/swe-bench/results/raw/astropy__astropy-12907.patch +12 -0
- package/.claude/skills/eval-harness/swe-bench/results/raw/astropy__astropy-12907.txt +322 -0
- package/.claude/skills/eval-harness/swe-bench/results/raw/astropy__astropy-12907.whole-file.txt +322 -0
- package/.claude/skills/eval-harness/swe-bench/runner.py +845 -0
- package/.claude/skills/eval-harness/swe-bench/scoring.py +298 -0
- package/.claude/skills/eval-harness/swe-bench/tasks/fetch_tasks.py +81 -0
- package/.claude/skills/eval-harness/swe-bench/tasks/lite-50.json +702 -0
- package/.claude/skills/file-organizer/SKILL.md +433 -0
- package/.claude/skills/gan-style-harness/SKILL.md +111 -0
- package/.claude/skills/gateguard/.gateguard.yml +47 -0
- package/.claude/skills/gateguard/SKILL.md +99 -0
- package/.claude/skills/internal-comms/LICENSE.txt +202 -0
- package/.claude/skills/internal-comms/SKILL.md +32 -0
- package/.claude/skills/internal-comms/examples/3p-updates.md +47 -0
- package/.claude/skills/internal-comms/examples/company-newsletter.md +65 -0
- package/.claude/skills/internal-comms/examples/faq-answers.md +30 -0
- package/.claude/skills/internal-comms/examples/general-comms.md +16 -0
- package/.claude/skills/invoice-organizer/SKILL.md +446 -0
- package/.claude/skills/karpathy-guidelines/SKILL.md +67 -0
- package/.claude/skills/langsmith-fetch/SKILL.md +485 -0
- package/.claude/skills/lead-research-assistant/SKILL.md +199 -0
- package/.claude/skills/mcp-builder/LICENSE.txt +202 -0
- package/.claude/skills/mcp-builder/SKILL.md +328 -0
- package/.claude/skills/mcp-builder/reference/evaluation.md +602 -0
- package/.claude/skills/mcp-builder/reference/mcp_best_practices.md +915 -0
- package/.claude/skills/mcp-builder/reference/node_mcp_server.md +916 -0
- package/.claude/skills/mcp-builder/reference/python_mcp_server.md +752 -0
- package/.claude/skills/mcp-builder/scripts/connections.py +151 -0
- package/.claude/skills/mcp-builder/scripts/evaluation.py +373 -0
- package/.claude/skills/mcp-builder/scripts/example_evaluation.xml +22 -0
- package/.claude/skills/mcp-builder/scripts/requirements.txt +2 -0
- package/.claude/skills/raffle-winner-picker/SKILL.md +159 -0
- package/.claude/skills/repo-map/README.md +125 -0
- package/.claude/skills/repo-map/SKILL.md +128 -0
- package/.claude/skills/repo-map/examples/sample-output.md +1194 -0
- package/.claude/skills/repo-map/repo-map.py +715 -0
- package/.claude/skills/salesforce-e2e-testing/SKILL.md +116 -0
- package/.claude/skills/salesforce-e2e-testing/catalog-template.md +161 -0
- package/.claude/skills/salesforce-e2e-testing/methodology.md +179 -0
- package/.claude/skills/salesforce-e2e-testing/observation-rules.md +280 -0
- package/.claude/skills/salesforce-e2e-testing/pattern-taxonomy.md +392 -0
- package/.claude/skills/salesforce-e2e-testing/procedure-template.md +376 -0
- package/.claude/skills/skill-creator/LICENSE.txt +202 -0
- package/.claude/skills/skill-creator/SKILL.md +209 -0
- package/.claude/skills/skill-creator/scripts/init_skill.py +303 -0
- package/.claude/skills/skill-creator/scripts/package_skill.py +110 -0
- package/.claude/skills/skill-creator/scripts/quick_validate.py +65 -0
- package/.claude/skills/skill-share/SKILL.md +80 -0
- package/.claude/skills/tailored-resume-generator/SKILL.md +345 -0
- package/.claude/skills/template-skill/SKILL.md +6 -0
- package/.claude/skills/theme-factory/LICENSE.txt +202 -0
- package/.claude/skills/theme-factory/SKILL.md +59 -0
- package/.claude/skills/theme-factory/theme-showcase.pdf +0 -0
- package/.claude/skills/theme-factory/themes/arctic-frost.md +19 -0
- package/.claude/skills/theme-factory/themes/botanical-garden.md +19 -0
- package/.claude/skills/theme-factory/themes/desert-rose.md +19 -0
- package/.claude/skills/theme-factory/themes/forest-canopy.md +19 -0
- package/.claude/skills/theme-factory/themes/golden-hour.md +19 -0
- package/.claude/skills/theme-factory/themes/midnight-galaxy.md +19 -0
- package/.claude/skills/theme-factory/themes/modern-minimalist.md +19 -0
- package/.claude/skills/theme-factory/themes/ocean-depths.md +19 -0
- package/.claude/skills/theme-factory/themes/sunset-boulevard.md +19 -0
- package/.claude/skills/theme-factory/themes/tech-innovation.md +19 -0
- package/.claude/skills/verification-loop/SKILL.md +129 -0
- package/.claude/skills/webapp-testing/LICENSE.txt +202 -0
- package/.claude/skills/webapp-testing/SKILL.md +96 -0
- package/.claude/skills/webapp-testing/examples/console_logging.py +35 -0
- package/.claude/skills/webapp-testing/examples/element_discovery.py +40 -0
- package/.claude/skills/webapp-testing/examples/static_html_automation.py +33 -0
- package/.claude/skills/webapp-testing/scripts/with_server.py +106 -0
- package/.claude/templates/docs/draft/_DRAFT_TEMPLATE.md +162 -0
- package/.claude/templates/docs/draft/_TEST_DESIGN_TEMPLATE.md +76 -0
- package/.claude/templates/docs/tasks/_TASK_TEMPLATE.md +276 -0
- package/.claude/templates/docs/tasks/list.md +80 -0
- package/.claude/templates/docs/tasks/parking-lot.md +82 -0
- package/.claude/templates/settings.user-level.json.template +306 -0
- package/.claude/tests/SMOKE-CLASSIFICATION.md +199 -0
- package/.claude/tests/action-space-count-smoke.sh +130 -0
- package/.claude/tests/agent-router-suggest-wiring-smoke.sh +188 -0
- package/.claude/tests/audit-followups-smoke.sh +158 -0
- package/.claude/tests/autonomous-action-guard-relaxation-smoke.sh +479 -0
- package/.claude/tests/autonomous-action-guard-smoke.sh +187 -0
- package/.claude/tests/check-serena-mcp-smoke.sh +156 -0
- package/.claude/tests/common-rules-import-smoke.sh +209 -0
- package/.claude/tests/confidence-gate-smoke.sh +220 -0
- package/.claude/tests/config-feature-toggles-smoke.sh +389 -0
- package/.claude/tests/context-budget-smoke.sh +222 -0
- package/.claude/tests/custom-pm-commands-smoke.sh +93 -0
- package/.claude/tests/delegation-guard-code-smoke.sh +244 -0
- package/.claude/tests/delegation-guard-deny-layers-smoke.sh +356 -0
- package/.claude/tests/delegation-guard-readonly-filter-smoke.sh +205 -0
- package/.claude/tests/delegation-guard-search-whitelist-smoke.sh +152 -0
- package/.claude/tests/delegation-guard-segment-smoke.sh +109 -0
- package/.claude/tests/dispatcher-blocker-invariance-smoke.sh +700 -0
- package/.claude/tests/dispatcher-core-smoke.sh +452 -0
- package/.claude/tests/dispatcher-merge-matrix-smoke.sh +825 -0
- package/.claude/tests/dispatcher-success-stdout-smoke.sh +290 -0
- package/.claude/tests/draft-flow-guard-approved-dir-smoke.sh +234 -0
- package/.claude/tests/draft-flow-guard-smoke.sh +194 -0
- package/.claude/tests/dual-mode-portability-smoke.sh +131 -0
- package/.claude/tests/effective-hook-matrix-smoke.sh +261 -0
- package/.claude/tests/enforcement-mismatch-smoke.sh +263 -0
- package/.claude/tests/fixtures/cascade-sample.jsonl +9 -0
- package/.claude/tests/fixtures/next-actions/case-clean.md +14 -0
- package/.claude/tests/fixtures/next-actions/case-with-red.md +16 -0
- package/.claude/tests/fixtures/next-actions/case-with-yellow-only.md +14 -0
- package/.claude/tests/fixtures/normal-broken-scatter.jsonl +5 -0
- package/.claude/tests/fixtures/task-71/blocker-baseline.tsv +24 -0
- package/.claude/tests/fixtures/task-71/settings-inventory.tsv +37 -0
- package/.claude/tests/fixtures/transcript-50pct.jsonl +2 -0
- package/.claude/tests/fixtures/transcript-60pct.jsonl +2 -0
- package/.claude/tests/fixtures/transcript-80pct.jsonl +2 -0
- package/.claude/tests/fixtures/transcript-95pct.jsonl +2 -0
- package/.claude/tests/fixtures/workflow-guard/case-2-mid.json +21 -0
- package/.claude/tests/fixtures/workflow-guard/case-3-blocked.json +33 -0
- package/.claude/tests/fixtures/workflow-guard/case-4-clean.json +27 -0
- package/.claude/tests/fixtures/workflow-guard/case-8-modify.json +23 -0
- package/.claude/tests/fixtures/workflow-guard/inputs/case-1.json +1 -0
- package/.claude/tests/fixtures/workflow-guard/inputs/case-2.json +1 -0
- package/.claude/tests/fixtures/workflow-guard/inputs/case-3.json +1 -0
- package/.claude/tests/fixtures/workflow-guard/inputs/case-4.json +1 -0
- package/.claude/tests/fixtures/workflow-guard/inputs/case-5.json +1 -0
- package/.claude/tests/fixtures/workflow-guard/inputs/case-6.json +1 -0
- package/.claude/tests/fixtures/workflow-guard/inputs/case-7.json +1 -0
- package/.claude/tests/fixtures/workflow-guard/inputs/case-8.json +1 -0
- package/.claude/tests/gateguard-smoke.sh +213 -0
- package/.claude/tests/git-deny-mainline-policy-smoke.sh +222 -0
- package/.claude/tests/harness-audit-c-batch-smoke.sh +270 -0
- package/.claude/tests/harness-audit-compare-smoke.sh +186 -0
- package/.claude/tests/harness-audit-pipeline-health-smoke.sh +326 -0
- package/.claude/tests/harness-config-local-smoke.sh +232 -0
- package/.claude/tests/hc-config-git-policy-smoke.sh +241 -0
- package/.claude/tests/hc-config-key-parity-smoke.sh +149 -0
- package/.claude/tests/hc-config-migration-smoke.sh +251 -0
- package/.claude/tests/hc-config-script-smoke.sh +1106 -0
- package/.claude/tests/hc-config-tui-smoke.sh +801 -0
- package/.claude/tests/hc-config-web-ui-smoke.sh +3224 -0
- package/.claude/tests/hook-cwd-robustness-smoke.sh +206 -0
- package/.claude/tests/hook-frequency-tweaks-smoke.sh +312 -0
- package/.claude/tests/improvement-proposal-cache-smoke.sh +238 -0
- package/.claude/tests/install-sh-overwrite-all-smoke.sh +274 -0
- package/.claude/tests/install-sh-regen-settings-smoke.sh +301 -0
- package/.claude/tests/install-sh-sync-drift-smoke.sh +285 -0
- package/.claude/tests/layer-b-context-isolation-smoke.sh +392 -0
- package/.claude/tests/list-md-plan-first-reminder-smoke.sh +313 -0
- package/.claude/tests/loop-auto-progress-smoke.sh +372 -0
- package/.claude/tests/loop-confirmation-detector-smoke.sh +674 -0
- package/.claude/tests/new-task-batch-update-smoke.sh +664 -0
- package/.claude/tests/next-actions-hooks-smoke.sh +283 -0
- package/.claude/tests/npx-cli-smoke.sh +696 -0
- package/.claude/tests/observe-flock-smoke.sh +223 -0
- package/.claude/tests/observe-jq-parse-smoke.sh +250 -0
- package/.claude/tests/observe-repair-smoke.sh +475 -0
- package/.claude/tests/observe-rotate-smoke.sh +428 -0
- package/.claude/tests/observe-subagent-stop-smoke.sh +476 -0
- package/.claude/tests/parallel-subagent-reminder-smoke.sh +918 -0
- package/.claude/tests/project-root-smoke.sh +140 -0
- package/.claude/tests/project-rules-protection-smoke.sh +199 -0
- package/.claude/tests/review-required-min-count-smoke.sh +286 -0
- package/.claude/tests/reviewer-count-guard-smoke.sh +490 -0
- package/.claude/tests/rule-architecture-smoke.sh +418 -0
- package/.claude/tests/rule-change-draft-flow-guard-smoke.sh +343 -0
- package/.claude/tests/run-all-smokes.sh +340 -0
- package/.claude/tests/session-help-surface-smoke.sh +224 -0
- package/.claude/tests/session-start-parallel-smoke.sh +165 -0
- package/.claude/tests/sessionstart-budget-smoke.sh +185 -0
- package/.claude/tests/sessionstart-footprint-smoke.sh +258 -0
- package/.claude/tests/settings-dispatcher-baseline-smoke.sh +709 -0
- package/.claude/tests/settings-generation-feature-pruning-smoke.sh +196 -0
- package/.claude/tests/stale-harness-detect-smoke.sh +974 -0
- package/.claude/tests/statusline-smoke.sh +180 -0
- package/.claude/tests/task-rule-guard-smoke.sh +656 -0
- package/.claude/tests/tool-call-slip-detector-smoke.sh +101 -0
- package/.claude/tests/wave-precheck-template-smoke.sh +159 -0
- package/.claude/tests/why-x5-violation-detect-smoke.sh +157 -0
- package/.claude/tests/workflow-guard-smoke.sh +266 -0
- package/CLAUDE.md +75 -0
- package/LICENSE +21 -0
- package/README.md +790 -0
- package/bin/cli.js +395 -0
- package/docs/INVENTORY.md +163 -0
- package/install.sh +769 -0
- package/package.json +25 -0
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: eval-harness
|
|
3
|
+
description: Eval-Driven Development (EDD) framework. Define pass/fail criteria before implementation, measure with pass@k / pass^k metrics, gate releases on regression evals. Replicated from ECC.
|
|
4
|
+
origin: ECC
|
|
5
|
+
tools: Read, Write, Edit, Bash, Grep, Glob
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
# Eval Harness — 自己改善 L1(基盤層)
|
|
9
|
+
|
|
10
|
+
実装より先に合否基準を定義する **Eval-Driven Development**。すべての上位層(L2-L5)の合否判定基盤として機能する。
|
|
11
|
+
|
|
12
|
+
## When to Activate
|
|
13
|
+
|
|
14
|
+
- 新機能の合否基準を定義する
|
|
15
|
+
- 上位ループ(L2/L3)の終端条件として使う
|
|
16
|
+
- リグレッションを定量管理する
|
|
17
|
+
- モデル切替時の品質ベンチマーク
|
|
18
|
+
|
|
19
|
+
## Philosophy
|
|
20
|
+
|
|
21
|
+
> Evals are the unit tests of AI development.
|
|
22
|
+
|
|
23
|
+
- 期待挙動を実装より **先に** 定義する
|
|
24
|
+
- 開発中は継続的に走らせる
|
|
25
|
+
- 各変更でリグレッションを追跡
|
|
26
|
+
- pass@k メトリクスで信頼性を測る
|
|
27
|
+
|
|
28
|
+
## Eval Types
|
|
29
|
+
|
|
30
|
+
### Capability Evals(新能力)
|
|
31
|
+
|
|
32
|
+
```markdown
|
|
33
|
+
[CAPABILITY EVAL: feature-name]
|
|
34
|
+
Task: <Claude が達成すべきこと>
|
|
35
|
+
Success Criteria:
|
|
36
|
+
- [ ] Criterion 1
|
|
37
|
+
- [ ] Criterion 2
|
|
38
|
+
Expected Output: <期待結果>
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### Regression Evals(既存維持)
|
|
42
|
+
|
|
43
|
+
```markdown
|
|
44
|
+
[REGRESSION EVAL: feature-name]
|
|
45
|
+
Baseline: <SHA or checkpoint>
|
|
46
|
+
Tests:
|
|
47
|
+
- existing-test-1: PASS/FAIL
|
|
48
|
+
- existing-test-2: PASS/FAIL
|
|
49
|
+
Result: X/Y passed (previously Y/Y)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Grader Types
|
|
53
|
+
|
|
54
|
+
| Grader | 用途 | 例 |
|
|
55
|
+
|---|---|---|
|
|
56
|
+
| **Code grader** | 決定的判定 | `grep -q "..." && npm test` |
|
|
57
|
+
| **Rule grader** | regex/schema 制約 | JSON schema validation |
|
|
58
|
+
| **Model grader** | LLM-as-judge | "Score 1-5: ..." |
|
|
59
|
+
| **Human grader** | 曖昧出力 | manual review flag |
|
|
60
|
+
|
|
61
|
+
## Metrics
|
|
62
|
+
|
|
63
|
+
| メトリクス | 定義 | 推奨閾値 |
|
|
64
|
+
|---|---|---:|
|
|
65
|
+
| `pass@1` | 1試行で成功 | – |
|
|
66
|
+
| `pass@3` | 3試行で1回以上成功 | **≥ 0.90**(capability) |
|
|
67
|
+
| `pass^3` | 3試行すべて成功 | **= 1.00**(release-critical) |
|
|
68
|
+
|
|
69
|
+
## Workflow
|
|
70
|
+
|
|
71
|
+
```
|
|
72
|
+
1. Define → .claude/evals/<feature>.md
|
|
73
|
+
2. Implement
|
|
74
|
+
3. Evaluate → pass@k 計測
|
|
75
|
+
4. Report → docs/releases/<ver>/eval-summary.md
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## Storage
|
|
79
|
+
|
|
80
|
+
```
|
|
81
|
+
.claude/evals/
|
|
82
|
+
├── <feature>.md # 定義
|
|
83
|
+
├── <feature>.log # 実行履歴
|
|
84
|
+
└── baseline.json # リグレッションベースライン
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Anti-patterns
|
|
88
|
+
|
|
89
|
+
- ❌ 既知の eval 例にプロンプトを過剰適合
|
|
90
|
+
- ❌ ハッピーパスのみ測定
|
|
91
|
+
- ❌ pass率だけ追ってコスト/遅延ドリフト無視
|
|
92
|
+
- ❌ flaky な grader をリリースゲートに使う
|
|
93
|
+
|
|
94
|
+
## Integration
|
|
95
|
+
|
|
96
|
+
- `/eval define <feature>` — eval 定義
|
|
97
|
+
- `/eval check <feature>` — 実行
|
|
98
|
+
- `/eval report <feature>` — レポート生成
|
|
99
|
+
|
|
100
|
+
詳細: `.claude/commands/eval.md`
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# SWE-bench Lite - claude-code-harness 接続
|
|
2
|
+
|
|
3
|
+
> **Phase C-1 (環境構築 + dry-run)**: SWE-bench Lite (Princeton NLP) を claude-code-harness の客観評価ベンチとして接続する第一歩。
|
|
4
|
+
> 起案: 2026-05-04 / Branch: `feat/harness-improvement-2026-05-04`
|
|
5
|
+
|
|
6
|
+
## 概要
|
|
7
|
+
|
|
8
|
+
[SWE-bench Lite](https://www.swebench.com/lite.html) は実在 OSS (Django, sympy, scikit-learn 等 12 リポ) の GitHub issue を解決するパッチを生成し、隠しテスト群を pass させるかで採点する 300 task の実タスクベンチ。OpenHands / SWE-agent / Aider が leaderboard 提出。
|
|
9
|
+
|
|
10
|
+
claude-code-harness は外部ベンチ未接続のため客観評価ができていなかった。本ディレクトリで:
|
|
11
|
+
|
|
12
|
+
1. SWE-bench Lite メタデータ取得 (HuggingFace dataset viewer API 経由・依存ゼロ)
|
|
13
|
+
2. Docker sandbox による task 隔離実行
|
|
14
|
+
3. Claude Code CLI (`-p` モード) を呼び出し patch 生成
|
|
15
|
+
4. patch 適用 + 公式テスト実行で採点
|
|
16
|
+
5. 結果を JSON で `results/` へ蓄積、`harness-audit.py --swe-bench` から参照可能
|
|
17
|
+
|
|
18
|
+
## ディレクトリ構成
|
|
19
|
+
|
|
20
|
+
```
|
|
21
|
+
swe-bench/
|
|
22
|
+
|-- README.md
|
|
23
|
+
|-- config.yml
|
|
24
|
+
|-- runner.py
|
|
25
|
+
|-- scoring.py
|
|
26
|
+
|-- docker/
|
|
27
|
+
| |-- Dockerfile
|
|
28
|
+
| `-- docker-compose.yml
|
|
29
|
+
|-- tasks/
|
|
30
|
+
| |-- lite-300.jsonl # 全 300 task メタデータ (fetch_tasks.py で取得)
|
|
31
|
+
| |-- lite-50.json # 50 task サブセット (dry-run / Phase C-2 用)
|
|
32
|
+
| `-- fetch_tasks.py
|
|
33
|
+
`-- results/
|
|
34
|
+
`-- dry-run-<date>.json
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Cost 見積 (dry-run / Phase C-2)
|
|
38
|
+
|
|
39
|
+
| Phase | task 数 | model | per task 上限 | 想定 cost | 想定時間 |
|
|
40
|
+
|---|---:|---|---:|---:|---:|
|
|
41
|
+
| C-1 dry-run | 5 | claude-sonnet-4-6 | $1.0 | <= $5 (cap) | ~25 min |
|
|
42
|
+
| C-2 本番 | 50 x F1/F2 4 組 = 200 | claude-sonnet-4-6 | $1.0 | $80-150 | 8-15 hr |
|
|
43
|
+
| C-2 上位 | 50 x claude-opus-4-7 | opus | $3.0 | +$150-250 | +5-8 hr |
|
|
44
|
+
|
|
45
|
+
cost cap は `config.yml` の `cost_cap_usd` で定義。超過時は task ループから break + 報告。
|
|
46
|
+
|
|
47
|
+
## 実行手順 (Phase C-1 dry-run)
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
cd .claude/skills/eval-harness/swe-bench
|
|
51
|
+
|
|
52
|
+
# 1. メタデータ取得 (依存ゼロ・標準 urllib のみ)
|
|
53
|
+
python3 tasks/fetch_tasks.py
|
|
54
|
+
|
|
55
|
+
# 2. Docker sandbox image build
|
|
56
|
+
docker build -t swe-bench-sandbox -f docker/Dockerfile .
|
|
57
|
+
|
|
58
|
+
# 3. dry-run (5 task)
|
|
59
|
+
ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY \
|
|
60
|
+
python3 runner.py --tasks tasks/lite-50.json --limit 5 \
|
|
61
|
+
--output results/dry-run-$(date +%Y-%m-%d).json
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Phase C-1 vs C-2 スコープ
|
|
65
|
+
|
|
66
|
+
- **C-1 (本タスク)**: 環境構築 + 5 task dry-run のみ。コスト実測 -> C-2 規模見積を検証。
|
|
67
|
+
- **C-2 (user 承認後)**: 本番 50 task x F1/F2 on/off の 4 条件比較 = 200 task。harness の effective が定量化される。
|
|
68
|
+
|
|
69
|
+
## 重要事項
|
|
70
|
+
|
|
71
|
+
- **API key 取り扱い**: ハードコード厳禁。`ANTHROPIC_API_KEY` を環境変数で渡す。`runner.py` は env 経由でしか読まない。
|
|
72
|
+
- **Docker 隔離**: 各 task は独立 container で git clone + patch 適用 + test 実行。harness 本体への副作用ゼロ。
|
|
73
|
+
- **Cost 監視**: 各 task 終了時に `_track_cost()` で累計を出力、cap 超過で即 break。
|
|
74
|
+
- **公式 SWE-bench harness との差分**: Phase C-1 は公式 `swebench` package に依存しない簡易採点 (patch 適用成否 + FAIL_TO_PASS テスト pass 数)。Phase C-2 で公式 harness 統合を検討。
|
|
75
|
+
|
|
76
|
+
## 関連
|
|
77
|
+
|
|
78
|
+
- 上位 skill: [`../SKILL.md`](../SKILL.md) - Eval-Driven Development の基盤
|
|
79
|
+
- 本体 OSS: <https://github.com/princeton-nlp/SWE-bench>
|
|
80
|
+
- leaderboard: <https://www.swebench.com/lite.html>
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# SWE-bench Lite runner config (Phase C-1.6 hybrid mode)
|
|
2
|
+
# 編集後は runner.py の起動引数で上書き可能。
|
|
3
|
+
|
|
4
|
+
# モデル選択
|
|
5
|
+
model: claude-sonnet-4-6
|
|
6
|
+
fallback_model: claude-haiku-4-5
|
|
7
|
+
|
|
8
|
+
# Cost / 時間制御
|
|
9
|
+
cost_cap_usd: 5.00 # 累計超過で task ループ break
|
|
10
|
+
per_task_cost_cap_usd: 1.00 # 1 task の上限 (claude --max-budget-usd)
|
|
11
|
+
per_task_timeout_sec: 900 # 15 min / task (C-1.6 で 900s に拡張)
|
|
12
|
+
|
|
13
|
+
# task 集合
|
|
14
|
+
tasks_file: tasks/lite-50.json
|
|
15
|
+
default_limit: 5 # dry-run 件数
|
|
16
|
+
|
|
17
|
+
# Docker
|
|
18
|
+
docker_image: swe-bench-sandbox
|
|
19
|
+
docker_workdir: /work
|
|
20
|
+
|
|
21
|
+
# scoring
|
|
22
|
+
apply_patch_only: true # dry-run は patch 適用のみで signal を取る
|
|
23
|
+
run_fail_to_pass_tests: false
|
|
24
|
+
|
|
25
|
+
# Phase C-1.6 (hybrid)
|
|
26
|
+
patch_mode: hybrid # hybrid: whole-file → unified-diff fallback
|
|
27
|
+
whole_file_timeout_sec: 600 # hybrid Step A timeout (残時間が unified-diff fallback)
|
|
28
|
+
parallel: 1 # dry-run は逐次。本番は 2-4 を推奨
|
|
29
|
+
save_raw: true # raw claude output と生成 patch を results/raw/ に保存
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# SWE-bench task sandbox - claude-code-harness Phase C-1
|
|
2
|
+
# 単一コンテナで git clone + patch 適用 + (任意で) pytest 実行。
|
|
3
|
+
# claude-code CLI は host 側で実行し、生成 patch のみコンテナへ流し込む方式 (CLI 二重インストール回避)。
|
|
4
|
+
FROM python:3.11-slim-bookworm
|
|
5
|
+
|
|
6
|
+
ENV DEBIAN_FRONTEND=noninteractive \
|
|
7
|
+
PYTHONDONTWRITEBYTECODE=1 \
|
|
8
|
+
PYTHONUNBUFFERED=1 \
|
|
9
|
+
PIP_NO_CACHE_DIR=1
|
|
10
|
+
|
|
11
|
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
12
|
+
git ca-certificates curl build-essential \
|
|
13
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
14
|
+
|
|
15
|
+
# 軽量に保つ。pytest は task ごとに repo の requirements を入れる方が再現性高い。
|
|
16
|
+
RUN pip install --upgrade pip setuptools wheel \
|
|
17
|
+
&& pip install pytest==8.3.3 unidiff==0.7.5
|
|
18
|
+
|
|
19
|
+
WORKDIR /work
|
|
20
|
+
|
|
21
|
+
# unprivileged user
|
|
22
|
+
RUN useradd -m -u 1000 swe && chown -R swe /work
|
|
23
|
+
USER swe
|
|
24
|
+
|
|
25
|
+
CMD ["bash"]
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Phase C-1 dry-run 用 compose。runner.py からは ad-hoc `docker run` で十分だが
|
|
2
|
+
# 手動デバッグ用に残す。
|
|
3
|
+
services:
|
|
4
|
+
sandbox:
|
|
5
|
+
build:
|
|
6
|
+
context: ..
|
|
7
|
+
dockerfile: docker/Dockerfile
|
|
8
|
+
image: swe-bench-sandbox
|
|
9
|
+
container_name: swe-bench-sandbox-debug
|
|
10
|
+
working_dir: /work
|
|
11
|
+
volumes:
|
|
12
|
+
- ../tasks:/work/tasks:ro
|
|
13
|
+
network_mode: bridge
|
|
14
|
+
# 隔離: host filesystem への副作用を最小化
|
|
15
|
+
read_only: false # repo clone のため書き込み許可
|
|
16
|
+
tmpfs:
|
|
17
|
+
- /tmp
|
|
18
|
+
command: ["sleep", "infinity"]
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
{
|
|
2
|
+
"summary": {
|
|
3
|
+
"started_at": "2026-05-04T12:03:17.305895Z",
|
|
4
|
+
"finished_at": "2026-05-04T12:09:08.454609Z",
|
|
5
|
+
"model": "claude-sonnet-4-6",
|
|
6
|
+
"tasks_total": 5,
|
|
7
|
+
"tasks_run": 5,
|
|
8
|
+
"patch_generated_count": 5,
|
|
9
|
+
"patch_applied_count": 2,
|
|
10
|
+
"cumulative_cost_usd": 1.078716,
|
|
11
|
+
"cost_cap_usd": 5.0,
|
|
12
|
+
"cost_cap_hit": false,
|
|
13
|
+
"apply_only": true
|
|
14
|
+
},
|
|
15
|
+
"results": [
|
|
16
|
+
{
|
|
17
|
+
"instance_id": "astropy__astropy-12907",
|
|
18
|
+
"repo": "astropy/astropy",
|
|
19
|
+
"model": "claude-sonnet-4-6",
|
|
20
|
+
"invoke_duration_sec": 22.08,
|
|
21
|
+
"claude_meta": {
|
|
22
|
+
"cost_usd": 0.2031471,
|
|
23
|
+
"duration_ms": 15913,
|
|
24
|
+
"rc": 0
|
|
25
|
+
},
|
|
26
|
+
"diff_chars": 502,
|
|
27
|
+
"score": {
|
|
28
|
+
"task_id": "astropy__astropy-12907",
|
|
29
|
+
"patch_generated": true,
|
|
30
|
+
"patch_applies": true,
|
|
31
|
+
"apply_error": null,
|
|
32
|
+
"tests_run": false,
|
|
33
|
+
"tests_passed": 0,
|
|
34
|
+
"tests_failed": 0,
|
|
35
|
+
"test_log_excerpt": "",
|
|
36
|
+
"duration_sec": 34.81418800354004
|
|
37
|
+
},
|
|
38
|
+
"cumulative_cost_usd": 0.203147
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"instance_id": "astropy__astropy-14182",
|
|
42
|
+
"repo": "astropy/astropy",
|
|
43
|
+
"model": "claude-sonnet-4-6",
|
|
44
|
+
"invoke_duration_sec": 69.55,
|
|
45
|
+
"claude_meta": {
|
|
46
|
+
"cost_usd": 0.26644005000000004,
|
|
47
|
+
"duration_ms": 63218,
|
|
48
|
+
"rc": 0
|
|
49
|
+
},
|
|
50
|
+
"diff_chars": 723,
|
|
51
|
+
"score": {
|
|
52
|
+
"task_id": "astropy__astropy-14182",
|
|
53
|
+
"patch_generated": true,
|
|
54
|
+
"patch_applies": false,
|
|
55
|
+
"apply_error": "error: corrupt patch at line 20\n",
|
|
56
|
+
"tests_run": false,
|
|
57
|
+
"tests_passed": 0,
|
|
58
|
+
"tests_failed": 0,
|
|
59
|
+
"test_log_excerpt": "",
|
|
60
|
+
"duration_sec": 34.200101137161255
|
|
61
|
+
},
|
|
62
|
+
"cumulative_cost_usd": 0.469587
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
"instance_id": "astropy__astropy-14365",
|
|
66
|
+
"repo": "astropy/astropy",
|
|
67
|
+
"model": "claude-sonnet-4-6",
|
|
68
|
+
"invoke_duration_sec": 26.39,
|
|
69
|
+
"claude_meta": {
|
|
70
|
+
"cost_usd": 0.204381,
|
|
71
|
+
"duration_ms": 19687,
|
|
72
|
+
"rc": 0
|
|
73
|
+
},
|
|
74
|
+
"diff_chars": 619,
|
|
75
|
+
"score": {
|
|
76
|
+
"task_id": "astropy__astropy-14365",
|
|
77
|
+
"patch_generated": true,
|
|
78
|
+
"patch_applies": true,
|
|
79
|
+
"apply_error": null,
|
|
80
|
+
"tests_run": false,
|
|
81
|
+
"tests_passed": 0,
|
|
82
|
+
"tests_failed": 0,
|
|
83
|
+
"test_log_excerpt": "",
|
|
84
|
+
"duration_sec": 34.33369493484497
|
|
85
|
+
},
|
|
86
|
+
"cumulative_cost_usd": 0.673968
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
"instance_id": "astropy__astropy-14995",
|
|
90
|
+
"repo": "astropy/astropy",
|
|
91
|
+
"model": "claude-sonnet-4-6",
|
|
92
|
+
"invoke_duration_sec": 36.01,
|
|
93
|
+
"claude_meta": {
|
|
94
|
+
"cost_usd": 0.23951505,
|
|
95
|
+
"duration_ms": 29714,
|
|
96
|
+
"rc": 0
|
|
97
|
+
},
|
|
98
|
+
"diff_chars": 607,
|
|
99
|
+
"score": {
|
|
100
|
+
"task_id": "astropy__astropy-14995",
|
|
101
|
+
"patch_generated": true,
|
|
102
|
+
"patch_applies": false,
|
|
103
|
+
"apply_error": "error: corrupt patch at line 13\n",
|
|
104
|
+
"tests_run": false,
|
|
105
|
+
"tests_passed": 0,
|
|
106
|
+
"tests_failed": 0,
|
|
107
|
+
"test_log_excerpt": "",
|
|
108
|
+
"duration_sec": 37.10083317756653
|
|
109
|
+
},
|
|
110
|
+
"cumulative_cost_usd": 0.913483
|
|
111
|
+
},
|
|
112
|
+
{
|
|
113
|
+
"instance_id": "astropy__astropy-6938",
|
|
114
|
+
"repo": "astropy/astropy",
|
|
115
|
+
"model": "claude-sonnet-4-6",
|
|
116
|
+
"invoke_duration_sec": 20.26,
|
|
117
|
+
"claude_meta": {
|
|
118
|
+
"cost_usd": 0.1652331,
|
|
119
|
+
"duration_ms": 13698,
|
|
120
|
+
"rc": 0
|
|
121
|
+
},
|
|
122
|
+
"diff_chars": 439,
|
|
123
|
+
"score": {
|
|
124
|
+
"task_id": "astropy__astropy-6938",
|
|
125
|
+
"patch_generated": true,
|
|
126
|
+
"patch_applies": false,
|
|
127
|
+
"apply_error": "error: corrupt patch at line 9\n",
|
|
128
|
+
"tests_run": false,
|
|
129
|
+
"tests_passed": 0,
|
|
130
|
+
"tests_failed": 0,
|
|
131
|
+
"test_log_excerpt": "",
|
|
132
|
+
"duration_sec": 35.93537998199463
|
|
133
|
+
},
|
|
134
|
+
"cumulative_cost_usd": 1.078716
|
|
135
|
+
}
|
|
136
|
+
]
|
|
137
|
+
}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# SWE-bench Lite dry-run before/after — Phase C-1.5
|
|
2
|
+
|
|
3
|
+
_finalized 2026-05-04 23:18 JST — improved run completed (3/5 applied, 2/5 timed out)_
|
|
4
|
+
|
|
5
|
+
## 改善前 (Phase C-1, unified-diff prompt)
|
|
6
|
+
|
|
7
|
+
source: `results/dry-run-2026-05-04.json`
|
|
8
|
+
|
|
9
|
+
| metric | value |
|
|
10
|
+
|---|---:|
|
|
11
|
+
| tasks run | 5 |
|
|
12
|
+
| patch generated | 5 |
|
|
13
|
+
| patch applied | 2 (40%) |
|
|
14
|
+
| corrupt patch | 3 (60%) |
|
|
15
|
+
| 真の解決率 (resolved) | 未測定 (apply_only) |
|
|
16
|
+
| 累計 cost | $1.078 |
|
|
17
|
+
| 平均 invoke duration | 34.9 s |
|
|
18
|
+
|
|
19
|
+
failure breakdown (raw `apply_error`):
|
|
20
|
+
|
|
21
|
+
| instance_id | error |
|
|
22
|
+
|---|---|
|
|
23
|
+
| astropy__astropy-14182 | corrupt patch at line 20 |
|
|
24
|
+
| astropy__astropy-14995 | corrupt patch at line 13 |
|
|
25
|
+
| astropy__astropy-6938 | corrupt patch at line 9 |
|
|
26
|
+
|
|
27
|
+
主因: claude が手書きする `@@ -L,N +L,N @@` hunk header の行番号 / count が不正。
|
|
28
|
+
unified-diff prompt のみではこの class of failure が解消されない。
|
|
29
|
+
|
|
30
|
+
## 改善後 (Phase C-1.5, whole-file mode)
|
|
31
|
+
|
|
32
|
+
source: `results/dry-run-improved-2026-05-04.json`
|
|
33
|
+
|
|
34
|
+
| metric | before (C-1) | after (C-1.5) | delta |
|
|
35
|
+
|---|---:|---:|---:|
|
|
36
|
+
| tasks run | 5 | 5 | — |
|
|
37
|
+
| patch generated | 5 | 3 | -2 (timeout) |
|
|
38
|
+
| patch applied | 2/5 (40%) | **3/5 (60%)** | **+20pt** |
|
|
39
|
+
| 生成 patch の適用率 (conditional) | 2/5 (40%) | **3/3 (100%)** | **+60pt** |
|
|
40
|
+
| corrupt patch | 3 | 0 | -3 |
|
|
41
|
+
| 真の解決率 | 未測定 (apply_only) | 未測定 (apply_only=true 継続) | — |
|
|
42
|
+
| 累計 cost | $1.078 | $0.853 | -$0.226 |
|
|
43
|
+
| 平均 invoke duration (成功 task) | 34.9 s | 106.5 s | +71.6 s |
|
|
44
|
+
| 平均 invoke duration (全 task 込み) | 34.9 s | 243.9 s | +209 s (timeout 込み) |
|
|
45
|
+
| timeout 失敗 | 0 | 2 (300s + 600s) | +2 |
|
|
46
|
+
|
|
47
|
+
### 失敗 task 詳細 (after)
|
|
48
|
+
|
|
49
|
+
| instance_id | result | duration | error |
|
|
50
|
+
|---|---|---:|---|
|
|
51
|
+
| astropy__astropy-12907 | timeout | 300s | claude 出力なし (per_task_timeout の前回 run の record を resume が引き継いだ) |
|
|
52
|
+
| astropy__astropy-6938 | timeout | 600s | 600s 拡張後も claude が応答せず |
|
|
53
|
+
|
|
54
|
+
`12907` は前回 run の 300s timeout record を resume が引き継いだだけ (新たな試行はしていない)。
|
|
55
|
+
`6938` は今回 600s に拡張しても応答せず — claude 側が大きい file 全文出力で詰まっている可能性。
|
|
56
|
+
|
|
57
|
+
### 改善内容まとめ
|
|
58
|
+
|
|
59
|
+
- **patch-mode = whole-file**: claude には修正後ファイル全文を `PATH:` + `<<<FILE_START>>>...<<<FILE_END>>>` で出力させ、runner.py が `git clone --filter=blob:none` で base content を取り、`difflib.unified_diff(n=3)` で hunk header を機械生成。claude は行番号計算から完全に解放される。
|
|
60
|
+
- **resume**: `--resume <results.json>` で完了 task をスキップ。中断後の再開を atomic write (temp → rename) で安全化。
|
|
61
|
+
- **parallel**: `--parallel N` で `ProcessPoolExecutor` 並列実行。worker 完了時に累計 cost を集計し cap 超過で pending future を cancel。dry-run は `--parallel 1` で逐次実行。
|
|
62
|
+
- **save raw**: `--save-raw` で claude 生出力 / 生成 patch を `results/raw/` に保存。失敗 forensics 用。
|
|
63
|
+
- **per-task-timeout 600s**: 残り 3 task (resume 分) は 600s に拡張。1 task (6938) は 600s でも timeout。
|
|
64
|
+
|
|
65
|
+
## Phase C-2 突入判定
|
|
66
|
+
|
|
67
|
+
| 基準 | 結果 |
|
|
68
|
+
|---|---|
|
|
69
|
+
| 適用率 80%+ | **未達 (60%)** |
|
|
70
|
+
| 生成 patch の適用成功率 | 100% (3/3) — corrupt patch class は完全に解消 |
|
|
71
|
+
| timeout 失敗率 | 40% (2/5) — claude 側で whole-file 全文出力に詰まる class が出現 |
|
|
72
|
+
|
|
73
|
+
**判定: C-2 突入は条件付き推奨。**
|
|
74
|
+
|
|
75
|
+
### 理由
|
|
76
|
+
- whole-file mode は corrupt patch を完全排除した (3/3 generated→applied = 100%)
|
|
77
|
+
- ただし大規模 file 全文出力で claude が timeout する新しい failure class が出現
|
|
78
|
+
- 生成できた patch の品質は劇的に改善されているため、本番 50 task の母集団では 70-85% 適用率が見込める
|
|
79
|
+
- timeout class は file size に依存するため、任意の母集団で同じ ratio とは限らない
|
|
80
|
+
|
|
81
|
+
### C-2 突入前に検討すべき改善 (任意)
|
|
82
|
+
1. whole-file が timeout した task は **unified-diff fallback** にする (hybrid mode)
|
|
83
|
+
2. file size > N 行の場合は最初から unified-diff にする
|
|
84
|
+
3. `--per-task-timeout-sec 900` に拡張 (cost 余裕がある場合)
|
|
85
|
+
|
|
86
|
+
## C-2 想定 cost / 所要時間
|
|
87
|
+
|
|
88
|
+
実測ベース (improved run の per-task 平均):
|
|
89
|
+
- 成功 task の平均 cost: $0.851 / 3 = **$0.284 / task**
|
|
90
|
+
- 成功 task の平均 wall time (claude invoke + scoring): ~155 s / task
|
|
91
|
+
- 失敗 task は cost $0 だが timeout 分の wall time を消費
|
|
92
|
+
|
|
93
|
+
50 task × F1/F2 on/off = 200 task のシナリオ:
|
|
94
|
+
|
|
95
|
+
| 並列度 | 想定 cost (60% success) | 想定 cost (80% success) | 想定 wall time (parallel=1) | 想定 wall time (parallel=4) |
|
|
96
|
+
|---:|---:|---:|---:|---:|
|
|
97
|
+
| 1 | 200 × 0.6 × $0.284 = **$34** | 200 × 0.8 × $0.284 = **$45** | 200 × 250s = ~14 h | — |
|
|
98
|
+
| 4 | 同上 | 同上 | — | ~3.5 h |
|
|
99
|
+
|
|
100
|
+
safety margin +30% で **$45 - $60** を予算化、cost cap は $80 推奨。
|
|
101
|
+
所要時間は parallel=4 で半日見込み。
|
|
102
|
+
|
|
103
|
+
## 公式 swebench harness
|
|
104
|
+
|
|
105
|
+
`scoring.py:score_with_official_harness()` に opt-in 統合済 (`--use-official-harness` flag は次タスクで wire up)。
|
|
106
|
+
SWE-bench 4.1.0 を `pip install --user --break-system-packages swebench` で取得し、
|
|
107
|
+
`swebench.harness.run_evaluation.main(...)` を呼び出す。
|
|
108
|
+
予想 cost: 公式 harness は per-instance Docker image (multi-GB) を pull するため、
|
|
109
|
+
本番 (Phase C-2) 50 task で +30-60 GB ストレージ + 数時間。dry-run では未実行。
|
|
110
|
+
|
|
111
|
+
`apply_only=true` で取得できる「適用率」は patch 生成品質の必要条件であり、
|
|
112
|
+
真の resolved rate (FAIL_TO_PASS pass) は C-2 で公式 harness を回して確認する。
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
{
|
|
2
|
+
"summary": {
|
|
3
|
+
"started_at": "2026-05-04T13:57:14.132952Z",
|
|
4
|
+
"finished_at": "2026-05-04T14:13:05.622956Z",
|
|
5
|
+
"model": "claude-sonnet-4-6",
|
|
6
|
+
"patch_mode": "whole-file",
|
|
7
|
+
"parallel": 1,
|
|
8
|
+
"tasks_total": 5,
|
|
9
|
+
"tasks_run": 5,
|
|
10
|
+
"patch_generated_count": 3,
|
|
11
|
+
"patch_applied_count": 3,
|
|
12
|
+
"applied_rate": 0.6,
|
|
13
|
+
"resolved_count": 0,
|
|
14
|
+
"resolved_rate": 0.0,
|
|
15
|
+
"cumulative_cost_usd": 0.852753,
|
|
16
|
+
"cost_cap_usd": 5.0,
|
|
17
|
+
"cost_cap_hit": false,
|
|
18
|
+
"apply_only": true
|
|
19
|
+
},
|
|
20
|
+
"results": [
|
|
21
|
+
{
|
|
22
|
+
"instance_id": "astropy__astropy-12907",
|
|
23
|
+
"repo": "astropy/astropy",
|
|
24
|
+
"model": "claude-sonnet-4-6",
|
|
25
|
+
"patch_mode": "whole-file",
|
|
26
|
+
"invoke_duration_sec": 300.01,
|
|
27
|
+
"claude_meta": {
|
|
28
|
+
"error": "timeout",
|
|
29
|
+
"cost_usd": 0.0
|
|
30
|
+
},
|
|
31
|
+
"diff_chars": 0,
|
|
32
|
+
"diff_gen_error": "no file blocks (or NO_FIX)",
|
|
33
|
+
"score": {
|
|
34
|
+
"task_id": "astropy__astropy-12907",
|
|
35
|
+
"patch_generated": false,
|
|
36
|
+
"patch_applies": false,
|
|
37
|
+
"apply_error": "empty patch",
|
|
38
|
+
"tests_run": false,
|
|
39
|
+
"tests_passed": 0,
|
|
40
|
+
"tests_failed": 0,
|
|
41
|
+
"test_log_excerpt": "",
|
|
42
|
+
"duration_sec": 5.245208740234375e-06,
|
|
43
|
+
"fail_to_pass_total": 0,
|
|
44
|
+
"pass_to_pass_total": 0,
|
|
45
|
+
"pass_to_pass_passed": 0
|
|
46
|
+
},
|
|
47
|
+
"cumulative_cost_usd": 0.0
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
"instance_id": "astropy__astropy-14182",
|
|
51
|
+
"repo": "astropy/astropy",
|
|
52
|
+
"model": "claude-sonnet-4-6",
|
|
53
|
+
"patch_mode": "whole-file",
|
|
54
|
+
"invoke_duration_sec": 55.24,
|
|
55
|
+
"claude_meta": {
|
|
56
|
+
"cost_usd": 0.23533215000000002,
|
|
57
|
+
"duration_ms": 47969,
|
|
58
|
+
"rc": 0
|
|
59
|
+
},
|
|
60
|
+
"diff_chars": 886,
|
|
61
|
+
"diff_gen_error": null,
|
|
62
|
+
"score": {
|
|
63
|
+
"task_id": "astropy__astropy-14182",
|
|
64
|
+
"patch_generated": true,
|
|
65
|
+
"patch_applies": true,
|
|
66
|
+
"apply_error": null,
|
|
67
|
+
"tests_run": false,
|
|
68
|
+
"tests_passed": 0,
|
|
69
|
+
"tests_failed": 0,
|
|
70
|
+
"test_log_excerpt": "",
|
|
71
|
+
"duration_sec": 34.36135983467102,
|
|
72
|
+
"fail_to_pass_total": 1,
|
|
73
|
+
"pass_to_pass_total": 9,
|
|
74
|
+
"pass_to_pass_passed": 0
|
|
75
|
+
},
|
|
76
|
+
"cumulative_cost_usd": 0.235332
|
|
77
|
+
},
|
|
78
|
+
{
|
|
79
|
+
"instance_id": "astropy__astropy-14365",
|
|
80
|
+
"repo": "astropy/astropy",
|
|
81
|
+
"model": "claude-sonnet-4-6",
|
|
82
|
+
"patch_mode": "whole-file",
|
|
83
|
+
"invoke_duration_sec": 119.49,
|
|
84
|
+
"claude_meta": {
|
|
85
|
+
"cost_usd": 0.29174715,
|
|
86
|
+
"duration_ms": 112311,
|
|
87
|
+
"rc": 0
|
|
88
|
+
},
|
|
89
|
+
"diff_chars": 1047,
|
|
90
|
+
"diff_gen_error": null,
|
|
91
|
+
"score": {
|
|
92
|
+
"task_id": "astropy__astropy-14365",
|
|
93
|
+
"patch_generated": true,
|
|
94
|
+
"patch_applies": true,
|
|
95
|
+
"apply_error": null,
|
|
96
|
+
"tests_run": false,
|
|
97
|
+
"tests_passed": 0,
|
|
98
|
+
"tests_failed": 0,
|
|
99
|
+
"test_log_excerpt": "",
|
|
100
|
+
"duration_sec": 34.99594187736511,
|
|
101
|
+
"fail_to_pass_total": 1,
|
|
102
|
+
"pass_to_pass_total": 8,
|
|
103
|
+
"pass_to_pass_passed": 0
|
|
104
|
+
},
|
|
105
|
+
"cumulative_cost_usd": 0.527079
|
|
106
|
+
},
|
|
107
|
+
{
|
|
108
|
+
"instance_id": "astropy__astropy-14995",
|
|
109
|
+
"repo": "astropy/astropy",
|
|
110
|
+
"model": "claude-sonnet-4-6",
|
|
111
|
+
"patch_mode": "whole-file",
|
|
112
|
+
"invoke_duration_sec": 144.61,
|
|
113
|
+
"claude_meta": {
|
|
114
|
+
"cost_usd": 0.32567415,
|
|
115
|
+
"duration_ms": 136801,
|
|
116
|
+
"rc": 0
|
|
117
|
+
},
|
|
118
|
+
"diff_chars": 618,
|
|
119
|
+
"diff_gen_error": null,
|
|
120
|
+
"score": {
|
|
121
|
+
"task_id": "astropy__astropy-14995",
|
|
122
|
+
"patch_generated": true,
|
|
123
|
+
"patch_applies": true,
|
|
124
|
+
"apply_error": null,
|
|
125
|
+
"tests_run": false,
|
|
126
|
+
"tests_passed": 0,
|
|
127
|
+
"tests_failed": 0,
|
|
128
|
+
"test_log_excerpt": "",
|
|
129
|
+
"duration_sec": 34.26308584213257,
|
|
130
|
+
"fail_to_pass_total": 1,
|
|
131
|
+
"pass_to_pass_total": 179,
|
|
132
|
+
"pass_to_pass_passed": 0
|
|
133
|
+
},
|
|
134
|
+
"cumulative_cost_usd": 0.852753
|
|
135
|
+
},
|
|
136
|
+
{
|
|
137
|
+
"instance_id": "astropy__astropy-6938",
|
|
138
|
+
"repo": "astropy/astropy",
|
|
139
|
+
"model": "claude-sonnet-4-6",
|
|
140
|
+
"patch_mode": "whole-file",
|
|
141
|
+
"invoke_duration_sec": 600.01,
|
|
142
|
+
"claude_meta": {
|
|
143
|
+
"error": "timeout",
|
|
144
|
+
"cost_usd": 0.0
|
|
145
|
+
},
|
|
146
|
+
"diff_chars": 0,
|
|
147
|
+
"diff_gen_error": "no file blocks (or NO_FIX)",
|
|
148
|
+
"score": {
|
|
149
|
+
"task_id": "astropy__astropy-6938",
|
|
150
|
+
"patch_generated": false,
|
|
151
|
+
"patch_applies": false,
|
|
152
|
+
"apply_error": "empty patch",
|
|
153
|
+
"tests_run": false,
|
|
154
|
+
"tests_passed": 0,
|
|
155
|
+
"tests_failed": 0,
|
|
156
|
+
"test_log_excerpt": "",
|
|
157
|
+
"duration_sec": 1.0967254638671875e-05,
|
|
158
|
+
"fail_to_pass_total": 0,
|
|
159
|
+
"pass_to_pass_total": 0,
|
|
160
|
+
"pass_to_pass_passed": 0
|
|
161
|
+
},
|
|
162
|
+
"cumulative_cost_usd": 0.852753
|
|
163
|
+
}
|
|
164
|
+
]
|
|
165
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
diff --git a/astropy/modeling/separable.py b/astropy/modeling/separable.py
|
|
2
|
+
--- a/astropy/modeling/separable.py
|
|
3
|
+
+++ b/astropy/modeling/separable.py
|
|
4
|
+
@@ -242,7 +242,7 @@
|
|
5
|
+
cright = _coord_matrix(right, 'right', noutp)
|
|
6
|
+
else:
|
|
7
|
+
cright = np.zeros((noutp, right.shape[1]))
|
|
8
|
+
- cright[-right.shape[0]:, -right.shape[1]:] = 1
|
|
9
|
+
+ cright[-right.shape[0]:, -right.shape[1]:] = right
|
|
10
|
+
|
|
11
|
+
return np.hstack([cleft, cright])
|
|
12
|
+
|