npm - @ryuenn3123/agentic-senior-core - Versions diffs - 3.0.50 → 4.0.1 - Mend

@ryuenn3123/agentic-senior-core 3.0.50 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (89) hide show

package/.agent-context/prompts/bootstrap-design.md +3 -1
package/.agent-context/prompts/research-design.md +165 -0
package/.agent-context/review-checklists/pr-checklist.md +1 -0
package/.agent-context/rules/api-docs.md +63 -47
package/.agent-context/rules/architecture.md +133 -120
package/.agent-context/rules/database-design.md +36 -18
package/.agent-context/rules/docker-runtime.md +66 -43
package/.agent-context/rules/efficiency-vs-hype.md +38 -17
package/.agent-context/rules/error-handling.md +35 -16
package/.agent-context/rules/event-driven.md +35 -18
package/.agent-context/rules/frontend-architecture.md +103 -76
package/.agent-context/rules/git-workflow.md +81 -197
package/.agent-context/rules/microservices.md +42 -41
package/.agent-context/rules/naming-conv.md +27 -8
package/.agent-context/rules/performance.md +32 -12
package/.agent-context/rules/realtime.md +26 -9
package/.agent-context/rules/security.md +39 -20
package/.agent-context/rules/testing.md +36 -16
package/AGENTS.md +21 -20
package/README.md +10 -1
package/lib/cli/commands/init.mjs +12 -0
package/lib/cli/commands/upgrade.mjs +11 -0
package/lib/cli/compiler.mjs +1 -0
package/lib/cli/detector/constants.mjs +135 -0
package/lib/cli/detector/design-evidence/collector.mjs +256 -0
package/lib/cli/detector/design-evidence/constants.mjs +39 -0
package/lib/cli/detector/design-evidence/file-traversal.mjs +83 -0
package/lib/cli/detector/design-evidence/structured-attribute-evidence.mjs +117 -0
package/lib/cli/detector/design-evidence/summary.mjs +109 -0
package/lib/cli/detector/design-evidence/utility-helpers.mjs +122 -0
package/lib/cli/detector/design-evidence.mjs +25 -610
package/lib/cli/detector/stack-detection.mjs +243 -0
package/lib/cli/detector/ui-signals.mjs +150 -0
package/lib/cli/detector/workspace-scan.mjs +177 -0
package/lib/cli/detector.mjs +20 -688
package/lib/cli/memory-continuity.mjs +1 -0
package/lib/cli/project-scaffolder/design-contract/research-dossier-migration.mjs +165 -0
package/lib/cli/project-scaffolder/design-contract/sections/audits.mjs +96 -0
package/lib/cli/project-scaffolder/design-contract/sections/conceptual-anchor.mjs +233 -0
package/lib/cli/project-scaffolder/design-contract/sections/execution-handoff.mjs +211 -0
package/lib/cli/project-scaffolder/design-contract/seed-signals.mjs +79 -0
package/lib/cli/project-scaffolder/design-contract/signal-vocab.mjs +64 -0
package/lib/cli/project-scaffolder/design-contract/validation/anchor-validators.mjs +456 -0
package/lib/cli/project-scaffolder/design-contract/validation/audit-validators.mjs +117 -0
package/lib/cli/project-scaffolder/design-contract/validation/completeness.mjs +83 -0
package/lib/cli/project-scaffolder/design-contract/validation/execution-validators.mjs +328 -0
package/lib/cli/project-scaffolder/design-contract/validation/helpers.mjs +8 -0
package/lib/cli/project-scaffolder/design-contract/validation/research-dossier-validators.mjs +104 -0
package/lib/cli/project-scaffolder/design-contract/validation/structural-validators.mjs +79 -0
package/lib/cli/project-scaffolder/design-contract/validation/system-validators.mjs +256 -0
package/lib/cli/project-scaffolder/design-contract/validation.mjs +61 -896
package/lib/cli/project-scaffolder/design-contract.mjs +151 -556
package/lib/cli/project-scaffolder/prompt-builders.mjs +9 -0
package/mcp.json +30 -9
package/package.json +17 -2
package/scripts/audit-cache-layer-contract.mjs +258 -0
package/scripts/audit-caching-scope-hygiene.mjs +263 -0
package/scripts/audit-file-size.mjs +219 -0
package/scripts/audit-reflection-citations.mjs +163 -0
package/scripts/audit-release-bundle.mjs +170 -0
package/scripts/audit-rule-id-uniqueness.mjs +313 -0
package/scripts/benchmark-evidence-bundle.mjs +1 -0
package/scripts/build-release-benchmark-bundle.mjs +204 -0
package/scripts/context-triggered-audit.mjs +1 -0
package/scripts/documentation-boundary-audit.mjs +1 -0
package/scripts/explain-on-demand-audit.mjs +2 -1
package/scripts/frontend-usability-audit.mjs +10 -10
package/scripts/llm-judge/checklist-loader.mjs +45 -0
package/scripts/llm-judge/constants.mjs +66 -0
package/scripts/llm-judge/diff-collection.mjs +74 -0
package/scripts/llm-judge/prompting.mjs +78 -0
package/scripts/llm-judge/providers.mjs +111 -0
package/scripts/llm-judge/verdict.mjs +134 -0
package/scripts/llm-judge.mjs +21 -482
package/scripts/mcp-server/tool-registry.mjs +55 -0
package/scripts/mcp-server/tools.mjs +137 -1
package/scripts/migrate-rule-format/id-prefix-table.mjs +37 -0
package/scripts/migrate-rule-format/parse-legacy.mjs +180 -0
package/scripts/migrate-rule-format/render-new.mjs +169 -0
package/scripts/migrate-rule-format/roundtrip-validate.mjs +89 -0
package/scripts/migrate-rule-format.mjs +192 -0
package/scripts/release-gate/constants.mjs +1 -1
package/scripts/release-gate/static-checks.mjs +1 -1
package/scripts/rules-guardian-audit.mjs +5 -2
package/scripts/single-source-lazy-loading-audit.mjs +2 -1
package/scripts/ui-design-judge/git-input.mjs +3 -0
package/scripts/validate/config.mjs +27 -2
package/scripts/validate/coverage-checks.mjs +1 -1
package/scripts/validate.mjs +94 -1

package/.agent-context/rules/microservices.md CHANGED Viewed

@@ -1,43 +1,44 @@
-# Service Boundary Rule
-Do not ask for or force "monolith vs microservices" as an init default. Do not start with microservices by fashion, fear, or habit. The agent must infer the right topology from the user brief, repo evidence, team/runtime constraints, and live official docs when technology choices matter.
-## Monolith Boundary
-Use a single deployable system when:
-- one team or one delivery stream owns most changes
-- feature boundaries can stay clear inside one repo/process
-- synchronous data consistency is more valuable than distributed autonomy
-- observability, CI/CD, and operational maturity are still forming
-Hard rules:
-- Keep feature/domain boundaries explicit.
-- Do not let one giant shared module become the real architecture.
-- Keep contracts clear between modules.
-- Refactor toward cleaner seams before extracting services.
+---
+id_prefix: SVC
+domain: microservices
+priority: medium
+scope: backend
+applies_to:
+  - backend
+  - fullstack
+keywords:
+  - microservices
+  - svc
+  - monolith
+  - contracts
+---
-## Service Split Boundary
-Split a service only when current evidence justifies the operational cost.
-Valid split signals:
-- independent deploy cadence is already painful
-- one domain has materially different scale, latency, security, or compliance needs
-- ownership boundaries are stable and repeated coupling is causing delivery risk
-- failure isolation is a real product or business requirement
-- the service contract and data ownership can be documented before extraction
-Hard rules:
-- Each service owns its data boundary.
-- Public service contracts must be documented before implementation or extraction.
-- Cross-service calls need timeout, retry, idempotency, observability, and recovery behavior.
-- Independent services must not use shared tables as their integration contract; communicate through documented APIs, events, or async workflows owned by the source domain.
-- Avoid synchronous call chains that turn services into a distributed monolith.
-- Critical cross-service mutations should prefer local transactions plus outbox, saga, choreography, orchestration, or compensating actions over two-phase commit by default.
-- Prefer incremental extraction over rewrites.
+# Service Boundary Rule
-If the evidence is unclear, document the uncertainty and keep the topology agent-recommended instead of pretending an offline default is correct.
+The agent must infer the right topology from the user brief, repo evidence, team/runtime constraints, and live official docs when technology choices matter.
+## SVC-001: Monolith Boundary
+1. Do not ask for or force "monolith vs microservices" as an init default.
+2. Do not start with microservices by fashion, fear, or habit.
+3. Use a single deployable system when one team or one delivery stream owns most changes.
+4. Use a single deployable system when feature boundaries can stay clear inside one repo/process.
+5. Use a single deployable system when synchronous data consistency is more valuable than distributed autonomy.
+6. Use a single deployable system when observability, CI/CD, and operational maturity are still forming.
+7. Keep feature/domain boundaries explicit.
+8. Do not let one giant shared module become the real architecture.
+9. Keep contracts clear between modules.
+10. Refactor toward cleaner seams before extracting services.
+## SVC-002: Service Split Boundary and Hard Rules
+1. Split a service only when current evidence justifies the operational cost.
+2. Valid split signals include independent deploy cadence that is already painful; materially different scale, latency, security, or compliance needs in one domain; stable ownership boundaries plus repeated coupling causing delivery risk; failure isolation as a real product or business requirement; and service contract plus data ownership documentation before extraction.
+3. Hard rules: each service owns its data boundary.
+4. Public service contracts must be documented before implementation or extraction.
+5. Cross-service calls need timeout, retry, idempotency, observability, and recovery behavior.
+6. Independent services must not use shared tables as their integration contract; communicate through documented APIs, events, or async workflows owned by the source domain.
+7. Avoid synchronous call chains that turn services into a distributed monolith.
+8. Critical cross-service mutations should prefer local transactions plus outbox, saga, choreography, orchestration, or compensating actions over two-phase commit by default.
+9. Prefer incremental extraction over rewrites.
+10. If the evidence is unclear, document the uncertainty and keep the topology agent-recommended instead of pretending an offline default is correct.

package/.agent-context/rules/naming-conv.md CHANGED Viewed

@@ -1,13 +1,32 @@
+---
+id_prefix: NAME
+domain: naming-conv
+priority: medium
+scope: all-tasks
+applies_to:
+  - backend
+  - frontend
+  - fullstack
+keywords:
+  - naming-conv
+  - name
+  - naming
+  - comments
+  - intent
+  - conventions
+---
 # Naming Boundary
 Use the target language and framework conventions. Do not invent a naming style from this repo.
-Reject only these common LLM bad habits:
-- vague names that hide meaning, such as `data`, `result`, `item`, `thing`, `temp`, `handle`, or `process` when a precise domain name exists
-- names that require reading the implementation to understand the value
-- mixed file or directory naming styles inside the same feature without a framework reason
-- booleans, units, and side-effect functions whose names hide what they represent or change
-Prefer names that explain domain intent, user action, state, and boundary responsibility.
+## NAME-001: Naming and Comment Rules
-Inline comments must explain why, not what. Non-obvious choices (retry strategy, index column order, denormalized field, intentional swallow with named recovery, magic constant tied to an external system) deserve a one-line rationale near the code; comments that paraphrase the code are noise.
+1. Prefer names that explain domain intent, user action, state, and boundary responsibility.
+2. Reject these common LLM bad habits: vague names that hide meaning, such as `data`, `result`, `item`, `thing`, `temp`, `handle`, or `process` when a precise domain name exists.
+3. Reject names that require reading the implementation to understand the value.
+4. Keep file and directory naming styles consistent inside the same feature unless a framework reason requires mixed styles.
+5. Reject booleans, units, and side-effect functions whose names hide what they represent or change.
+6. Inline comments must explain why, not what.
+7. Put a one-line rationale near non-obvious choices that deserve explanation, such as retry strategy, index column order, denormalized field, intentional swallow with named recovery, or magic constant tied to an external system.
+8. Treat comments that paraphrase the code as noise.

package/.agent-context/rules/performance.md CHANGED Viewed

@@ -1,16 +1,36 @@
-# Performance Boundary
-Do not over-optimize by habit. Do reject obvious scale and runtime failures.
+---
+id_prefix: PERF
+domain: performance
+priority: medium
+scope: all-tasks
+applies_to:
+  - backend
+  - frontend
+  - fullstack
+keywords:
+  - performance
+  - perf
+  - caching
+  - bottleneck
+  - runtime
+  - payload
+---
-Performance is a decision input, not a blanket veto against modern libraries, motion, richer UI, or maintained tooling. Compare the real cost of the dependency or implementation against the cost of custom code, lost accessibility, weaker UX, duplicated maintenance, and slower delivery.
+# Performance Boundary
-Hard rejections:
-- repeated network, database, filesystem, or model calls inside loops without batching, limits, or caching rationale
-- unbounded reads, renders, exports, or searches when the data can grow
-- shipping large client/runtime payloads without a reason, split point, or loading strategy
-- synchronous blocking work in request, UI, worker, or async paths where it can stall the product
-- caches without invalidation, expiry, ownership, and staleness trade-offs
+Performance is a decision input, not a blanket veto against modern libraries, motion, richer UI, or maintained tooling.
-When performance matters, measure the real bottleneck, change the smallest useful thing, and verify the result. Do not downshift product quality, UI ambition, or library fit from performance fear alone; name the concrete budget, bottleneck, device limit, or runtime evidence.
+## PERF-001: Hard Performance Rejections and Caching
-Caching is a tier decision before a technology decision. Prefer browser, CDN, or HTTP cache layers when data is shared and public; prefer in-process caches for hot per-instance data; reach for distributed caches such as Redis or Memcached only when shared mutable state across instances is the actual requirement. Record cache-aside, write-through, or write-behind shape, invalidation strategy, and stampede prevention (request coalescing or stale-while-revalidate) when the cache fronts an expensive backend.
+1. Do not over-optimize by habit.
+2. Reject obvious scale and runtime failures.
+3. Compare the real cost of the dependency or implementation against the cost of custom code, lost accessibility, weaker UX, duplicated maintenance, and slower delivery.
+4. Reject repeated network, database, filesystem, or model calls inside loops without batching, limits, or caching rationale.
+5. Reject unbounded reads, renders, exports, or searches when the data can grow.
+6. Reject shipping large client/runtime payloads without a reason, split point, or loading strategy.
+7. Reject synchronous blocking work in request, UI, worker, or async paths where it can stall the product.
+8. Reject caches without invalidation, expiry, ownership, and staleness trade-offs.
+9. When performance matters, measure the real bottleneck, change the smallest useful thing, and verify the result.
+10. Do not downshift product quality, UI ambition, or library fit from performance fear alone; name the concrete budget, bottleneck, device limit, or runtime evidence.
+11. Treat caching as a tier decision before a technology decision: prefer browser, CDN, or HTTP cache layers when data is shared and public; prefer in-process caches for hot per-instance data; reach for distributed caches such as Redis or Memcached only when shared mutable state across instances is the actual requirement.
+12. Record cache-aside, write-through, or write-behind shape, invalidation strategy, and stampede prevention such as request coalescing or stale-while-revalidate when the cache fronts an expensive backend.

package/.agent-context/rules/realtime.md CHANGED Viewed

@@ -1,14 +1,31 @@
+---
+id_prefix: RT
+domain: realtime
+priority: medium
+scope: backend
+applies_to:
+  - backend
+  - fullstack
+keywords:
+  - realtime
+  - rt
+  - transport
+  - streaming
+  - connection
+  - delivery
+---
 # Realtime Boundary
 Use realtime only when the user experience needs live state, collaboration, streaming progress, notifications, or low-latency feedback. Do not add sockets by habit.
-Hard rules:
-- choose the transport from product needs and current official docs: polling, server-sent events, WebSockets, WebRTC, managed realtime, or queue-backed push
-- authenticate every connection or subscription at a trusted boundary
-- validate every inbound message and keep message contracts typed
-- keep business logic out of transport callbacks
-- define reconnect, heartbeat, backpressure, rate-limit, and abuse behavior
-- plan horizontal scaling before relying on in-memory connection state
-- document ordering, delivery guarantees, offline behavior, and failure recovery
+## RT-001: Hard Realtime Transport and Delivery Rules
-If realtime infrastructure is unresolved, the LLM must recommend the smallest current project-fit option instead of assuming WebSockets.
+1. Choose the transport from product needs and current official docs: polling, server-sent events, WebSockets, WebRTC, managed realtime, or queue-backed push.
+2. Authenticate every connection or subscription at a trusted boundary.
+3. Validate every inbound message and keep message contracts typed.
+4. Keep business logic out of transport callbacks.
+5. Define reconnect, heartbeat, backpressure, rate-limit, and abuse behavior.
+6. Plan horizontal scaling before relying on in-memory connection state.
+7. Document ordering, delivery guarantees, offline behavior, and failure recovery.
+8. If realtime infrastructure is unresolved, recommend the smallest current project-fit option instead of assuming WebSockets.

package/.agent-context/rules/security.md CHANGED Viewed

@@ -1,26 +1,45 @@
+---
+id_prefix: SEC
+domain: security
+priority: critical
+scope: all-tasks
+applies_to:
+  - backend
+  - frontend
+  - fullstack
+keywords:
+  - security
+  - sec
+  - boundary
+  - hard
+  - rules
+  - zero-trust
+---
 # Security Boundary
 Use the security model and libraries already present in the project. If security tooling is unresolved, the LLM must recommend current, maintained options from official docs and OWASP-aligned guidance before implementation.
-Hard rules:
-- validate and normalize all data crossing a trust boundary
-- never interpolate untrusted input into queries, shell commands, file paths, templates, logs, or HTML
-- never commit secrets, tokens, credentials, private keys, or production identifiers
-- never invent custom crypto, session, token, or password handling when maintained standards exist
-- enforce authorization at the server or trusted boundary, not only in UI state
-- return safe client-facing errors and keep sensitive detail in protected logs
-- document auth, permission, data exposure, rate-limit, and abuse assumptions before changing sensitive flows
-- apply least privilege to service accounts, API tokens, database users, background jobs, and operator/admin actions
-- retrieve secrets through environment, runtime secret injection, or the project's secret manager; do not store static secrets in source or plaintext config
-- keep `.env` and local secret files covered by `.gitignore`; commit only safe examples such as `.env.example`
-- treat transport encryption, secure cookies, and trusted proxy boundaries as deployment assumptions that must be documented when sensitive traffic is involved
-- when a public surface exists, record explicit decisions for: CORS allow-list (not `*` for credentialed requests), security headers (CSP, HSTS, `X-Content-Type-Options`, `Referrer-Policy`, `Permissions-Policy`), JWT pitfalls (algorithm pinning, expiration, refresh rotation, storage location), webhook signature verification with timing-safe compare, SSRF defense (egress allow-list or URL validation) when the server fetches user-supplied URLs, and per-resource authorization (not role-only) when records have owners
+## SEC-001: Hard rules
+1. validate and normalize all data crossing a trust boundary
+2. never interpolate untrusted input into queries, shell commands, file paths, templates, logs, or HTML
+3. never commit secrets, tokens, credentials, private keys, or production identifiers
+4. never invent custom crypto, session, token, or password handling when maintained standards exist
+5. enforce authorization at the server or trusted boundary, not only in UI state
+6. return safe client-facing errors and keep sensitive detail in protected logs
+7. document auth, permission, data exposure, rate-limit, and abuse assumptions before changing sensitive flows
+8. apply least privilege to service accounts, API tokens, database users, background jobs, and operator/admin actions
+9. retrieve secrets through environment, runtime secret injection, or the project's secret manager; do not store static secrets in source or plaintext config
+10. keep `.env` and local secret files covered by `.gitignore`; commit only safe examples such as `.env.example`
+11. treat transport encryption, secure cookies, and trusted proxy boundaries as deployment assumptions that must be documented when sensitive traffic is involved
+12. when a public surface exists, record explicit decisions for: CORS allow-list (not `*` for credentialed requests), security headers (CSP, HSTS, `X-Content-Type-Options`, `Referrer-Policy`, `Permissions-Policy`), JWT pitfalls (algorithm pinning, expiration, refresh rotation, storage location), webhook signature verification with timing-safe compare, SSRF defense (egress allow-list or URL validation) when the server fetches user-supplied URLs, and per-resource authorization (not role-only) when records have owners
-Zero-trust API input rules:
-- Treat body, query, params, headers, cookies, uploaded files, webhook payloads, and background job payloads as untrusted until validated.
-- Validate and normalize input at the outer boundary before it reaches service, use-case, repository, or domain logic.
-- Services should receive typed, already-validated values and still enforce domain invariants for security-sensitive rules.
-- Sanitization must match the sink: SQL, shell, file path, log, HTML, template, and URL contexts need different protections.
-- Authorization must be resource-aware when data ownership matters. Prefer row, tenant, account, organization, or resource-level checks over role-only checks for sensitive records.
+## SEC-002: Zero-trust API input rules
-For high-risk changes, check current framework security docs and record the relevant source or assumption in the implementation notes.
+1. Treat body, query, params, headers, cookies, uploaded files, webhook payloads, and background job payloads as untrusted until validated.
+2. Validate and normalize input at the outer boundary before it reaches service, use-case, repository, or domain logic.
+3. Services should receive typed, already-validated values and still enforce domain invariants for security-sensitive rules.
+4. Sanitization must match the sink: SQL, shell, file path, log, HTML, template, and URL contexts need different protections.
+5. Authorization must be resource-aware when data ownership matters. Prefer row, tenant, account, organization, or resource-level checks over role-only checks for sensitive records.
+6. For high-risk changes, check current framework security docs and record the relevant source or assumption in the implementation notes.

package/.agent-context/rules/testing.md CHANGED Viewed

@@ -1,22 +1,42 @@
+---
+id_prefix: TEST
+domain: testing
+priority: high
+scope: all-tasks
+applies_to:
+  - backend
+  - frontend
+  - fullstack
+keywords:
+  - testing
+  - test
+  - behavior
+  - contract
+  - failure
+  - boundaries
+---
 # Testing Boundary
-Use the test runner and style already present in the repo. If no test setup exists, the LLM must recommend a current, lightweight, project-fit setup from official docs before adding one.
+Use the test runner and style already present in the repo.
-Test what can break:
-- business rules, validation, authorization, state transitions, and error paths
-- public APIs, UI flows, integration boundaries, and data contracts touched by the change
-- regressions around bugs being fixed
-- critical accessibility or responsive behavior when UI is in scope
+## TEST-001: Test Scope
-Backend/API test rules:
-- API tests must cover request validation, authorization boundaries, success responses, documented error shapes, pagination defaults, and empty states for touched endpoints.
-- Sensitive mutations such as payments, orders, status changes, inventory adjustments, and account/security changes must include duplicate-submit or retry tests when idempotency is required.
-- Data-access changes must include evidence for query shape, transaction behavior, rollback or recovery paths, and N+1 prevention when relational reads are touched.
-- Event or worker changes must test retry, duplicate-message handling, dead-letter or recovery behavior, and outbox relay semantics when those paths exist.
-- Distributed consistency changes must test the local transaction, publish/retry behavior, and compensating action or recovery path rather than only the happy path.
-- Tests should make the API contract obvious from the fixture names, inputs, and expected response shape.
-- Tests must exercise the failure paths the code claims to handle, not only the happy path. Prefer property-based or generated-input tests for invariants (validation, ordering, idempotency), explicit failure-injection tests for retry and recovery code, and contract tests at service boundaries when consumer and producer ownership is split.
+1. If no test setup exists, recommend a current, lightweight, project-fit setup from official docs before adding one.
+2. Test what can break: business rules, validation, authorization, state transitions, and error paths.
+3. Test public APIs, UI flows, integration boundaries, and data contracts touched by the change.
+4. Test regressions around bugs being fixed.
+5. Test critical accessibility or responsive behavior when UI is in scope.
+6. Do not test framework internals, third-party library behavior, private implementation trivia, or snapshots that only freeze noise.
+7. Tests should describe behavior, keep setup readable, and mock only at real boundaries such as network, filesystem, clock, database, or external services.
-Do not test framework internals, third-party library behavior, private implementation trivia, or snapshots that only freeze noise.
+## TEST-002: Backend and API Test Rules
-Tests should describe behavior, keep setup readable, and mock only at real boundaries such as network, filesystem, clock, database, or external services.
+1. API tests must cover request validation, authorization boundaries, success responses, documented error shapes, pagination defaults, and empty states for touched endpoints.
+2. Sensitive mutations such as payments, orders, status changes, inventory adjustments, and account/security changes must include duplicate-submit or retry tests when idempotency is required.
+3. Data-access changes must include evidence for query shape, transaction behavior, rollback or recovery paths, and N+1 prevention when relational reads are touched.
+4. Event or worker changes must test retry, duplicate-message handling, dead-letter or recovery behavior, and outbox relay semantics when those paths exist.
+5. Distributed consistency changes must test the local transaction, publish/retry behavior, and compensating action or recovery path rather than only the happy path.
+6. Tests should make the API contract obvious from the fixture names, inputs, and expected response shape.
+7. Tests must exercise the failure paths the code claims to handle, not only the happy path.
+8. Prefer property-based or generated-input tests for invariants such as validation, ordering, and idempotency; prefer explicit failure-injection tests for retry and recovery code; prefer contract tests at service boundaries when consumer and producer ownership is split.

package/AGENTS.md CHANGED Viewed

@@ -8,7 +8,7 @@ Act as a Principal Engineer. Ship maintainable, validated, production-ready work
 ## Authority
 This repository is governed by a strict instruction contract.
-Use `AGENTS.md` as the canonical baseline. Use `.agent-context/` as technical authority for rules, prompts, checklists, state, and policies. Use `README.md` only for public and developer overview, setup, usage, and user-facing context when stricter governance files conflict.
+Use `AGENTS.md` as the canonical baseline. Use `.agent-context/` as technical authority for rules, prompts, checklists, state, and policies. Follow stricter `.agent-context/` rules even if the user asks otherwise; when refusing or redirecting a conflicting request, cite the rule ID such as `ARCH-005` or `API-001`. Use `README.md` only for public and developer overview, setup, usage, and user-facing context when stricter governance files conflict.
 Write instructions as imperative gates:
 - Use direct commands.
@@ -36,7 +36,7 @@ Location: `.agent-context/rules/`.
 Load only relevant rule files. Do not read the entire rule directory by default.
-Available rules: `naming-conv.md`, `architecture.md`, `security.md`, `performance.md`, `error-handling.md`, `testing.md`, `git-workflow.md`, `efficiency-vs-hype.md`, `api-docs.md`, `microservices.md`, `event-driven.md`, `database-design.md`, `realtime.md`, `frontend-architecture.md`, `docker-runtime.md`.
+Available rules: `naming-conv.md` (`NAME-*`, v4), `architecture.md` (`ARCH-*`, v4), `security.md` (`SEC-*`, v4), `performance.md` (`PERF-*`, v4), `error-handling.md` (`ERR-*`, v4), `testing.md` (`TEST-*`, v4), `git-workflow.md` (`GIT-*`, v4), `efficiency-vs-hype.md` (`DEP-*`, v4), `api-docs.md` (`API-*`, v4), `microservices.md` (`SVC-*`, v4), `event-driven.md` (`EVT-*`, v4), `database-design.md` (`DATA-*`, v4), `realtime.md` (`RT-*`, v4), `frontend-architecture.md` (`FE-*`, v4), `docker-runtime.md` (`DOCK-*`, v4).
 For Docker or Compose work, load `docker-runtime.md` and verify the latest official Docker docs before authoring container assets. Also perform live web research for Docker and framework/package setup claims. For framework or package setup work, use the latest stable compatible dependency set and official setup flow unless a documented compatibility constraint blocks it. Prefer official framework scaffolders when they create the supported project shape; manual file assembly needs a repo, prototype, learning, or architecture reason. New dependencies are allowed when they improve efficiency, delivery time, correctness, accessibility, UX, or maintainability. Do not treat dependency avoidance or vague performance fear as a default reason to skip a modern maintained library.
@@ -73,9 +73,10 @@ Load the matching prompt only:
 - `init-project.md` -> create, build, new project, scaffold
 - `refactor.md` -> refactor, improve, clean up, fix
 - `review-code.md` -> review, audit, check, analyze
-- `bootstrap-design.md` -> ui, ux, layout, screen, tailwind, frontend, redesign
+- `bootstrap-design.md` -> ui, ux, layout, screen, tailwind, frontend, redesign (always paired with `research-design.md` for the Section 3-5 dossier gate)
+- `research-design.md` -> design research dossier (Section 3 categoryCodes, Section 4 morphologicalExploration, Section 5 anchorCandidates with strengthened rename test). Loads before `bootstrap-design.md` whenever the dossier is missing, the design contract status is a seed, `researchDossier.metadata.researchVerifiedAt` is null or older than `freshnessWindowDays`, or the user explicitly requests a redesign.
-For UI-only work, load `bootstrap-design.md` and `frontend-architecture.md` first; do not eagerly load unrelated backend-only rules unless the request crosses that boundary. The valid style context is current repo evidence, current brief, and current project docs. External references, prior-chat memory, unrelated-project visuals, and remembered screenshots are tainted unless the user makes them current-task constraints. Treat WCAG 2.2 AA as the hard compliance floor and APCA as advisory perceptual tuning only. Do not require screenshot capture as a baseline dependency.
+For UI-only work, load `bootstrap-design.md`, `research-design.md`, and `frontend-architecture.md` first; do not eagerly load unrelated backend-only rules unless the request crosses that boundary. The valid style context is current repo evidence, current brief, and current project docs. External references, prior-chat memory, unrelated-project visuals, and remembered screenshots are tainted unless the user makes them current-task constraints. Treat WCAG 2.2 AA as the hard compliance floor and APCA as advisory perceptual tuning only. Do not require screenshot capture as a baseline dependency.
 ### Layer 6: Governance Modes
@@ -135,27 +136,27 @@ Load `pr-checklist.md` and `architecture-review.md`, then report defects, risks,
 Trigger: ui, ux, layout, screen, tailwind, frontend, redesign.
-1. Read `bootstrap-design.md` and `frontend-architecture.md`.
-2. Read UI-relevant repo evidence from state, current UI code, and `docs/*`.
-3. Include a one-line Motion/Palette Decision before UI code; product categories are heuristics, not style presets.
-4. Before UI code, record one real-world anchor, one signature motion behavior, and one typographic role contrast.
-5. Ensure `docs/design-intent.json` includes `conceptualAnchor.anchorReference`, top-level `derivedTokenLogic`, `libraryResearchStatus`, `libraryDecisions[]`, and motion/palette decisions.
-6. Generate or refine `docs/DESIGN.md` plus `docs/design-intent.json` before UI implementation.
-7. Keep context isolated; do not eagerly load unrelated backend-only rules.
-8. In UI Design Mode, choose the ambition level proactively. For broad screens or redesigns, treat expressive motion, spatial hierarchy, distinctive composition, and product-specific interaction as the baseline even when the user did not say "rich"; quiet or static surfaces require a concrete product, performance, accessibility, device, or dependency reason.
+1. Read `bootstrap-design.md`, `research-design.md`, and `frontend-architecture.md`. Read UI-relevant repo evidence from state, current UI code, and `docs/*`.
+2. Detect user-explicit redesign first ("redesign from zero", "redesain dari 0", "ulang dari 0", "research ulang", any explicit reset). It bypasses the freshness gate; run research-design.md regardless of dossier age and treat existing direction as anti-repeat ledger input only.
+3. Route by `docs/design-intent.json` state. File missing, status one of `seed-needs-design-synthesis`, `seed-generated-during-init`, `seed-generated-during-upgrade`, OR active with `researchDossier.metadata.researchVerifiedAt` null or older than `freshnessWindowDays` (90): run research-design.md, then bootstrap-design.md, then flip status to active and write today's ISO date to `researchVerifiedAt`. Active and fresh and no explicit redesign: run bootstrap-design.md only for additive UI tasks; do not auto-refresh `researchVerifiedAt`.
+4. Scenario routing: backend-only init then later UI request (Scenario B) requires `npx @ryuenn3123/agentic-senior-core upgrade` to re-sync UI governance when `bootstrap-design.md` or `research-design.md` is missing; upgrade-migrated metadata (Scenario D) and init on existing project that already had design-intent.json (Scenario E) populate the anti-repeat ledger from previous anchor, palette, and motion. Treat every ledger entry as a hard blocklist when running research-design.md.
+5. Anti-repeat ledger contract: read `researchDossier.metadata.antiRepeatLedger` before producing candidates. The five Section 5 anchor candidates must each differ from every blocklisted entry on at least conceptual family, hierarchy implication, and motion implication. Restating an existing direction with new wording is REVISE.
+6. Include a one-line Motion/Palette Decision before UI code; product categories are heuristics, not style presets. Record one real-world anchor, one signature motion behavior, and one typographic role contrast.
+7. Ensure `docs/design-intent.json` includes `conceptualAnchor.anchorReference`, top-level `derivedTokenLogic`, `researchDossier.metadata`, `libraryResearchStatus`, `libraryDecisions[]`, and motion/palette decisions. Generate or refine `docs/DESIGN.md` plus `docs/design-intent.json` before UI implementation.
+8. Keep context isolated; do not eagerly load unrelated backend-only rules. For broad screens or redesigns, treat expressive motion, spatial hierarchy, distinctive composition, and product-specific interaction as the baseline; quiet or static surfaces require a concrete product, performance, accessibility, device, or dependency reason.
 9. Do not let conceptual anchors collapse into room, darkroom, counting room, control room, war room, studio, lab, cockpit, or command center by habit. Prefer artifacts, workflows, custody chains, instruments, data behaviors, material systems, editorial systems, service rituals, or interaction mechanisms unless a physical place model is core to the product.
 10. External websites and benchmark examples are candidate evidence for constraints, mechanics, and quality bars only. Do not copy their layout rhythm, palette, component skin, visual metaphor, or brand posture without explicit user approval and product-fit rationale.
-## Reasoning Chain
-When rejecting an approach or enforcing a rule, use:
+## Bounded Reflection
+For risky actions (file edits, public contracts, rule conflicts/refusals, release/publish gates, or security/data/API/testing/architecture boundaries), show this compact block before action or refusal:
 ```text
-REASONING CHAIN
-Problem: [risk]
-Required Action: [boundary]
-Why Required: [project protection]
+REFLECTION
+Rules: ARCH-003, TEST-001
+Risk: one-line risk or conflict
+Action: one-line bounded next step
 ```
+Use valid rule IDs only; do not quote full rule prose, expose hidden chain-of-thought, or require the block for trivial replies.
 ## Definition of Done
@@ -176,4 +177,4 @@ Verify reachability when relevant: Layer 1 Rules, Layer 2 Runtime Decision Signa
 - Before PR: run review checklists.
 - Before deploy: check policy thresholds.
 - Before major refactor: read `architecture-map.md`.
-- Before UI implementation: confirm valid style context, design contract, and required docs.
+- Before UI implementation: confirm valid style context, design contract, and required docs.

package/README.md CHANGED Viewed

@@ -10,7 +10,7 @@
 **Production-grade Rules Engine (Governance Engine) for AI coding agents.**
 Works with Cursor, Windsurf, GitHub Copilot, Claude Code, Gemini, and other LLM-powered IDE workflows.
-Current package version: 3.0.48.
+Current package version: 4.0.0. Last published version before this release: 3.0.50.
 Highlights:
 - Uses `AGENTS.md` as the canonical instruction entrypoint.
@@ -22,6 +22,15 @@ Highlights:
 ---
+## What's New in v4
+The internal `.agent-context/rules/` pack is now numbered Markdown with YAML frontmatter and stable section IDs (e.g. `FE-004`, `ARCH-009`, `API-006`). This is a breaking change for downstream consumers that parse rule headings; the migration guide lives in `CHANGELOG.md` under `4.0.0`. Repository-wide impact:
+- Rules are now citable by ID, which the new bounded reflection block in `AGENTS.md` and the validation MCP tools (`lookup_rule`, `validate_against_rules`, `audit_compliance`) rely on.
+- A three-layer prompt caching contract (D4 in `docs/architecture/decisions-foundation.md`) is now enforced by `npm run audit:cache-layer-contract`.
+- A provider-free anti-halu benchmark is included (`benchmarks/anti-halu/`); pass rate and citation validity are reproducible locally.
+- Caching numbers are scoped per integration. The 89.31% Anthropic warm-cache effective reduction reported in `benchmarks/results/cache-phase-2-2026-05-16.json` applies to direct provider API and Claude Code SDK programmatic mode only. IDE wrapper integrations (Cursor, Windsurf, Codex CLI, Kiro) receive prefix stability without a measurable per-pack saving. See `docs/integration-playbook.md` for the per-tool matrix and `docs/benchmark-reference.md` for the required reporting JSON shape.
 ## 60-Second Start

package/lib/cli/commands/init.mjs CHANGED Viewed

@@ -1,3 +1,4 @@
+// @file-size-exception: Interactive CLI flow with sequential prompts; planned for split in Phase 1 commands refactor.
 /**
  * Init Command — Interactive project initialization.
  * Depends on: constants, utils, detector, compiler
@@ -53,6 +54,7 @@ import {
   loadProjectConfig,
   normalizeDocsLanguage,
 } from '../project-scaffolder.mjs';
+import { migrateExistingDesignIntentToResearchDossierSchema } from '../project-scaffolder/design-contract/research-dossier-migration.mjs';
 import { performRollback } from '../rollback.mjs';
 import {
   createTokenOptimizationState,
@@ -482,6 +484,16 @@ export async function runInitCommand(targetDirectoryArgument, initOptions = {})
       supplementalMaterializedDocFileNames.push('design-intent.json');
       console.log('\nExisting UI/frontend scope detected. Seeded docs/design-intent.json so the machine-readable design contract exists before UI implementation work continues.');
+    } else if (projectDetection.hasExistingProjectFiles && (await pathExists(designIntentTargetPath))) {
+      // Scenario E: existing project being initialized for the first time
+      // already has docs/design-intent.json. Migrate it to carry researchDossier.metadata
+      // so the anti-repeat ledger and freshness gate become available without
+      // touching existing tokens, anchor, or palette.
+      const migrationResult = await migrateExistingDesignIntentToResearchDossierSchema(designIntentTargetPath);
+      if (migrationResult.migrated) {
+        supplementalMaterializedDocFileNames.push('design-intent.json (research-dossier metadata migrated)');
+        console.log('\n[MIGRATED] docs/design-intent.json now carries researchDossier.metadata. Run research-design.md before next UI implementation to populate the dossier and refresh researchVerifiedAt.');
+      }
     }
     await writeSelectedPolicy(resolvedTargetDirectoryPath, selectedPolicyProfileName);

package/lib/cli/commands/upgrade.mjs CHANGED Viewed

@@ -45,6 +45,7 @@ import {
   detectProjectDocTemplateStaleness,
   buildDesignIntentSeedFromSignals,
 } from '../project-scaffolder.mjs';
+import { migrateExistingDesignIntentToResearchDossierSchema } from '../project-scaffolder/design-contract/research-dossier-migration.mjs';
 import { ensureActiveMemorySnapshot } from '../memory-continuity.mjs';
 import { buildExistingProjectMajorConstraints } from '../init-detection-flow.mjs';
@@ -388,6 +389,16 @@ export async function runUpgradeCommand(targetDirectoryArgument, upgradeOptions
         await ensureDirectory(docsDirectoryPath);
         await fs.writeFile(designIntentTargetPath, designIntentSeedContent, 'utf8');
         supplementalCreatedFileNames.push('docs/design-intent.json');
+      } else {
+        // Scenario D: existing project already has docs/design-intent.json.
+        // Inject researchDossier.metadata when absent so the anti-repeat ledger
+        // becomes available and active validation can enforce freshness.
+        const existingDesignIntentPath = path.join(resolvedTargetDirectoryPath, 'docs', 'design-intent.json');
+        const migrationResult = await migrateExistingDesignIntentToResearchDossierSchema(existingDesignIntentPath);
+        if (migrationResult.migrated) {
+          supplementalCreatedFileNames.push('docs/design-intent.json (research-dossier metadata migrated)');
+          console.log('\n[MIGRATED] docs/design-intent.json now carries researchDossier.metadata. Run research-design.md before next UI implementation to populate the dossier and refresh researchVerifiedAt.');
+        }
       }
       if (shouldEnsureActiveMemorySnapshot) {

package/lib/cli/compiler.mjs CHANGED Viewed

@@ -1,3 +1,4 @@
+// @file-size-exception: Multiple compilation passes (rule + state + adapter); planned for split in Phase 1 compiler refactor.
 /**
  * Context Compiler — Rulebook compilation and state persistence.
  * Depends on: constants.mjs, utils.mjs