@ryuenn3123/agentic-senior-core 3.0.50 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/.agent-context/prompts/bootstrap-design.md +3 -1
  2. package/.agent-context/prompts/research-design.md +165 -0
  3. package/.agent-context/review-checklists/pr-checklist.md +1 -0
  4. package/.agent-context/rules/api-docs.md +63 -47
  5. package/.agent-context/rules/architecture.md +133 -120
  6. package/.agent-context/rules/database-design.md +36 -18
  7. package/.agent-context/rules/docker-runtime.md +66 -43
  8. package/.agent-context/rules/efficiency-vs-hype.md +38 -17
  9. package/.agent-context/rules/error-handling.md +35 -16
  10. package/.agent-context/rules/event-driven.md +35 -18
  11. package/.agent-context/rules/frontend-architecture.md +103 -76
  12. package/.agent-context/rules/git-workflow.md +81 -197
  13. package/.agent-context/rules/microservices.md +42 -41
  14. package/.agent-context/rules/naming-conv.md +27 -8
  15. package/.agent-context/rules/performance.md +32 -12
  16. package/.agent-context/rules/realtime.md +26 -9
  17. package/.agent-context/rules/security.md +39 -20
  18. package/.agent-context/rules/testing.md +36 -16
  19. package/AGENTS.md +21 -20
  20. package/README.md +10 -1
  21. package/lib/cli/commands/init.mjs +12 -0
  22. package/lib/cli/commands/upgrade.mjs +11 -0
  23. package/lib/cli/compiler.mjs +1 -0
  24. package/lib/cli/detector/constants.mjs +135 -0
  25. package/lib/cli/detector/design-evidence/collector.mjs +256 -0
  26. package/lib/cli/detector/design-evidence/constants.mjs +39 -0
  27. package/lib/cli/detector/design-evidence/file-traversal.mjs +83 -0
  28. package/lib/cli/detector/design-evidence/structured-attribute-evidence.mjs +117 -0
  29. package/lib/cli/detector/design-evidence/summary.mjs +109 -0
  30. package/lib/cli/detector/design-evidence/utility-helpers.mjs +122 -0
  31. package/lib/cli/detector/design-evidence.mjs +25 -610
  32. package/lib/cli/detector/stack-detection.mjs +243 -0
  33. package/lib/cli/detector/ui-signals.mjs +150 -0
  34. package/lib/cli/detector/workspace-scan.mjs +177 -0
  35. package/lib/cli/detector.mjs +20 -688
  36. package/lib/cli/memory-continuity.mjs +1 -0
  37. package/lib/cli/project-scaffolder/design-contract/research-dossier-migration.mjs +165 -0
  38. package/lib/cli/project-scaffolder/design-contract/sections/audits.mjs +96 -0
  39. package/lib/cli/project-scaffolder/design-contract/sections/conceptual-anchor.mjs +233 -0
  40. package/lib/cli/project-scaffolder/design-contract/sections/execution-handoff.mjs +211 -0
  41. package/lib/cli/project-scaffolder/design-contract/seed-signals.mjs +79 -0
  42. package/lib/cli/project-scaffolder/design-contract/signal-vocab.mjs +64 -0
  43. package/lib/cli/project-scaffolder/design-contract/validation/anchor-validators.mjs +456 -0
  44. package/lib/cli/project-scaffolder/design-contract/validation/audit-validators.mjs +117 -0
  45. package/lib/cli/project-scaffolder/design-contract/validation/completeness.mjs +83 -0
  46. package/lib/cli/project-scaffolder/design-contract/validation/execution-validators.mjs +328 -0
  47. package/lib/cli/project-scaffolder/design-contract/validation/helpers.mjs +8 -0
  48. package/lib/cli/project-scaffolder/design-contract/validation/research-dossier-validators.mjs +104 -0
  49. package/lib/cli/project-scaffolder/design-contract/validation/structural-validators.mjs +79 -0
  50. package/lib/cli/project-scaffolder/design-contract/validation/system-validators.mjs +256 -0
  51. package/lib/cli/project-scaffolder/design-contract/validation.mjs +61 -896
  52. package/lib/cli/project-scaffolder/design-contract.mjs +151 -556
  53. package/lib/cli/project-scaffolder/prompt-builders.mjs +9 -0
  54. package/mcp.json +30 -9
  55. package/package.json +17 -2
  56. package/scripts/audit-cache-layer-contract.mjs +258 -0
  57. package/scripts/audit-caching-scope-hygiene.mjs +263 -0
  58. package/scripts/audit-file-size.mjs +219 -0
  59. package/scripts/audit-reflection-citations.mjs +163 -0
  60. package/scripts/audit-release-bundle.mjs +170 -0
  61. package/scripts/audit-rule-id-uniqueness.mjs +313 -0
  62. package/scripts/benchmark-evidence-bundle.mjs +1 -0
  63. package/scripts/build-release-benchmark-bundle.mjs +204 -0
  64. package/scripts/context-triggered-audit.mjs +1 -0
  65. package/scripts/documentation-boundary-audit.mjs +1 -0
  66. package/scripts/explain-on-demand-audit.mjs +2 -1
  67. package/scripts/frontend-usability-audit.mjs +10 -10
  68. package/scripts/llm-judge/checklist-loader.mjs +45 -0
  69. package/scripts/llm-judge/constants.mjs +66 -0
  70. package/scripts/llm-judge/diff-collection.mjs +74 -0
  71. package/scripts/llm-judge/prompting.mjs +78 -0
  72. package/scripts/llm-judge/providers.mjs +111 -0
  73. package/scripts/llm-judge/verdict.mjs +134 -0
  74. package/scripts/llm-judge.mjs +21 -482
  75. package/scripts/mcp-server/tool-registry.mjs +55 -0
  76. package/scripts/mcp-server/tools.mjs +137 -1
  77. package/scripts/migrate-rule-format/id-prefix-table.mjs +37 -0
  78. package/scripts/migrate-rule-format/parse-legacy.mjs +180 -0
  79. package/scripts/migrate-rule-format/render-new.mjs +169 -0
  80. package/scripts/migrate-rule-format/roundtrip-validate.mjs +89 -0
  81. package/scripts/migrate-rule-format.mjs +192 -0
  82. package/scripts/release-gate/constants.mjs +1 -1
  83. package/scripts/release-gate/static-checks.mjs +1 -1
  84. package/scripts/rules-guardian-audit.mjs +5 -2
  85. package/scripts/single-source-lazy-loading-audit.mjs +2 -1
  86. package/scripts/ui-design-judge/git-input.mjs +3 -0
  87. package/scripts/validate/config.mjs +27 -2
  88. package/scripts/validate/coverage-checks.mjs +1 -1
  89. package/scripts/validate.mjs +94 -1
@@ -1,43 +1,44 @@
1
- # Service Boundary Rule
2
-
3
- Do not ask for or force "monolith vs microservices" as an init default. Do not start with microservices by fashion, fear, or habit. The agent must infer the right topology from the user brief, repo evidence, team/runtime constraints, and live official docs when technology choices matter.
4
-
5
- ## Monolith Boundary
6
-
7
- Use a single deployable system when:
8
-
9
- - one team or one delivery stream owns most changes
10
- - feature boundaries can stay clear inside one repo/process
11
- - synchronous data consistency is more valuable than distributed autonomy
12
- - observability, CI/CD, and operational maturity are still forming
13
-
14
- Hard rules:
15
-
16
- - Keep feature/domain boundaries explicit.
17
- - Do not let one giant shared module become the real architecture.
18
- - Keep contracts clear between modules.
19
- - Refactor toward cleaner seams before extracting services.
1
+ ---
2
+ id_prefix: SVC
3
+ domain: microservices
4
+ priority: medium
5
+ scope: backend
6
+ applies_to:
7
+ - backend
8
+ - fullstack
9
+ keywords:
10
+ - microservices
11
+ - svc
12
+ - monolith
13
+ - contracts
14
+ ---
20
15
 
21
- ## Service Split Boundary
22
-
23
- Split a service only when current evidence justifies the operational cost.
24
-
25
- Valid split signals:
26
-
27
- - independent deploy cadence is already painful
28
- - one domain has materially different scale, latency, security, or compliance needs
29
- - ownership boundaries are stable and repeated coupling is causing delivery risk
30
- - failure isolation is a real product or business requirement
31
- - the service contract and data ownership can be documented before extraction
32
-
33
- Hard rules:
34
-
35
- - Each service owns its data boundary.
36
- - Public service contracts must be documented before implementation or extraction.
37
- - Cross-service calls need timeout, retry, idempotency, observability, and recovery behavior.
38
- - Independent services must not use shared tables as their integration contract; communicate through documented APIs, events, or async workflows owned by the source domain.
39
- - Avoid synchronous call chains that turn services into a distributed monolith.
40
- - Critical cross-service mutations should prefer local transactions plus outbox, saga, choreography, orchestration, or compensating actions over two-phase commit by default.
41
- - Prefer incremental extraction over rewrites.
16
+ # Service Boundary Rule
42
17
 
43
- If the evidence is unclear, document the uncertainty and keep the topology agent-recommended instead of pretending an offline default is correct.
18
+ The agent must infer the right topology from the user brief, repo evidence, team/runtime constraints, and live official docs when technology choices matter.
19
+
20
+ ## SVC-001: Monolith Boundary
21
+
22
+ 1. Do not ask for or force "monolith vs microservices" as an init default.
23
+ 2. Do not start with microservices by fashion, fear, or habit.
24
+ 3. Use a single deployable system when one team or one delivery stream owns most changes.
25
+ 4. Use a single deployable system when feature boundaries can stay clear inside one repo/process.
26
+ 5. Use a single deployable system when synchronous data consistency is more valuable than distributed autonomy.
27
+ 6. Use a single deployable system when observability, CI/CD, and operational maturity are still forming.
28
+ 7. Keep feature/domain boundaries explicit.
29
+ 8. Do not let one giant shared module become the real architecture.
30
+ 9. Keep contracts clear between modules.
31
+ 10. Refactor toward cleaner seams before extracting services.
32
+
33
+ ## SVC-002: Service Split Boundary and Hard Rules
34
+
35
+ 1. Split a service only when current evidence justifies the operational cost.
36
+ 2. Valid split signals include independent deploy cadence that is already painful; materially different scale, latency, security, or compliance needs in one domain; stable ownership boundaries plus repeated coupling causing delivery risk; failure isolation as a real product or business requirement; and service contract plus data ownership documentation before extraction.
37
+ 3. Hard rules: each service owns its data boundary.
38
+ 4. Public service contracts must be documented before implementation or extraction.
39
+ 5. Cross-service calls need timeout, retry, idempotency, observability, and recovery behavior.
40
+ 6. Independent services must not use shared tables as their integration contract; communicate through documented APIs, events, or async workflows owned by the source domain.
41
+ 7. Avoid synchronous call chains that turn services into a distributed monolith.
42
+ 8. Critical cross-service mutations should prefer local transactions plus outbox, saga, choreography, orchestration, or compensating actions over two-phase commit by default.
43
+ 9. Prefer incremental extraction over rewrites.
44
+ 10. If the evidence is unclear, document the uncertainty and keep the topology agent-recommended instead of pretending an offline default is correct.
@@ -1,13 +1,32 @@
1
+ ---
2
+ id_prefix: NAME
3
+ domain: naming-conv
4
+ priority: medium
5
+ scope: all-tasks
6
+ applies_to:
7
+ - backend
8
+ - frontend
9
+ - fullstack
10
+ keywords:
11
+ - naming-conv
12
+ - name
13
+ - naming
14
+ - comments
15
+ - intent
16
+ - conventions
17
+ ---
18
+
1
19
  # Naming Boundary
2
20
 
3
21
  Use the target language and framework conventions. Do not invent a naming style from this repo.
4
22
 
5
- Reject only these common LLM bad habits:
6
- - vague names that hide meaning, such as `data`, `result`, `item`, `thing`, `temp`, `handle`, or `process` when a precise domain name exists
7
- - names that require reading the implementation to understand the value
8
- - mixed file or directory naming styles inside the same feature without a framework reason
9
- - booleans, units, and side-effect functions whose names hide what they represent or change
10
-
11
- Prefer names that explain domain intent, user action, state, and boundary responsibility.
23
+ ## NAME-001: Naming and Comment Rules
12
24
 
13
- Inline comments must explain why, not what. Non-obvious choices (retry strategy, index column order, denormalized field, intentional swallow with named recovery, magic constant tied to an external system) deserve a one-line rationale near the code; comments that paraphrase the code are noise.
25
+ 1. Prefer names that explain domain intent, user action, state, and boundary responsibility.
26
+ 2. Reject these common LLM bad habits: vague names that hide meaning, such as `data`, `result`, `item`, `thing`, `temp`, `handle`, or `process` when a precise domain name exists.
27
+ 3. Reject names that require reading the implementation to understand the value.
28
+ 4. Keep file and directory naming styles consistent inside the same feature unless a framework reason requires mixed styles.
29
+ 5. Reject booleans, units, and side-effect functions whose names hide what they represent or change.
30
+ 6. Inline comments must explain why, not what.
31
+ 7. Put a one-line rationale near non-obvious choices that deserve explanation, such as retry strategy, index column order, denormalized field, intentional swallow with named recovery, or magic constant tied to an external system.
32
+ 8. Treat comments that paraphrase the code as noise.
@@ -1,16 +1,36 @@
1
- # Performance Boundary
2
-
3
- Do not over-optimize by habit. Do reject obvious scale and runtime failures.
1
+ ---
2
+ id_prefix: PERF
3
+ domain: performance
4
+ priority: medium
5
+ scope: all-tasks
6
+ applies_to:
7
+ - backend
8
+ - frontend
9
+ - fullstack
10
+ keywords:
11
+ - performance
12
+ - perf
13
+ - caching
14
+ - bottleneck
15
+ - runtime
16
+ - payload
17
+ ---
4
18
 
5
- Performance is a decision input, not a blanket veto against modern libraries, motion, richer UI, or maintained tooling. Compare the real cost of the dependency or implementation against the cost of custom code, lost accessibility, weaker UX, duplicated maintenance, and slower delivery.
19
+ # Performance Boundary
6
20
 
7
- Hard rejections:
8
- - repeated network, database, filesystem, or model calls inside loops without batching, limits, or caching rationale
9
- - unbounded reads, renders, exports, or searches when the data can grow
10
- - shipping large client/runtime payloads without a reason, split point, or loading strategy
11
- - synchronous blocking work in request, UI, worker, or async paths where it can stall the product
12
- - caches without invalidation, expiry, ownership, and staleness trade-offs
21
+ Performance is a decision input, not a blanket veto against modern libraries, motion, richer UI, or maintained tooling.
13
22
 
14
- When performance matters, measure the real bottleneck, change the smallest useful thing, and verify the result. Do not downshift product quality, UI ambition, or library fit from performance fear alone; name the concrete budget, bottleneck, device limit, or runtime evidence.
23
+ ## PERF-001: Hard Performance Rejections and Caching
15
24
 
16
- Caching is a tier decision before a technology decision. Prefer browser, CDN, or HTTP cache layers when data is shared and public; prefer in-process caches for hot per-instance data; reach for distributed caches such as Redis or Memcached only when shared mutable state across instances is the actual requirement. Record cache-aside, write-through, or write-behind shape, invalidation strategy, and stampede prevention (request coalescing or stale-while-revalidate) when the cache fronts an expensive backend.
25
+ 1. Do not over-optimize by habit.
26
+ 2. Reject obvious scale and runtime failures.
27
+ 3. Compare the real cost of the dependency or implementation against the cost of custom code, lost accessibility, weaker UX, duplicated maintenance, and slower delivery.
28
+ 4. Reject repeated network, database, filesystem, or model calls inside loops without batching, limits, or caching rationale.
29
+ 5. Reject unbounded reads, renders, exports, or searches when the data can grow.
30
+ 6. Reject shipping large client/runtime payloads without a reason, split point, or loading strategy.
31
+ 7. Reject synchronous blocking work in request, UI, worker, or async paths where it can stall the product.
32
+ 8. Reject caches without invalidation, expiry, ownership, and staleness trade-offs.
33
+ 9. When performance matters, measure the real bottleneck, change the smallest useful thing, and verify the result.
34
+ 10. Do not downshift product quality, UI ambition, or library fit from performance fear alone; name the concrete budget, bottleneck, device limit, or runtime evidence.
35
+ 11. Treat caching as a tier decision before a technology decision: prefer browser, CDN, or HTTP cache layers when data is shared and public; prefer in-process caches for hot per-instance data; reach for distributed caches such as Redis or Memcached only when shared mutable state across instances is the actual requirement.
36
+ 12. Record cache-aside, write-through, or write-behind shape, invalidation strategy, and stampede prevention such as request coalescing or stale-while-revalidate when the cache fronts an expensive backend.
@@ -1,14 +1,31 @@
1
+ ---
2
+ id_prefix: RT
3
+ domain: realtime
4
+ priority: medium
5
+ scope: backend
6
+ applies_to:
7
+ - backend
8
+ - fullstack
9
+ keywords:
10
+ - realtime
11
+ - rt
12
+ - transport
13
+ - streaming
14
+ - connection
15
+ - delivery
16
+ ---
17
+
1
18
  # Realtime Boundary
2
19
 
3
20
  Use realtime only when the user experience needs live state, collaboration, streaming progress, notifications, or low-latency feedback. Do not add sockets by habit.
4
21
 
5
- Hard rules:
6
- - choose the transport from product needs and current official docs: polling, server-sent events, WebSockets, WebRTC, managed realtime, or queue-backed push
7
- - authenticate every connection or subscription at a trusted boundary
8
- - validate every inbound message and keep message contracts typed
9
- - keep business logic out of transport callbacks
10
- - define reconnect, heartbeat, backpressure, rate-limit, and abuse behavior
11
- - plan horizontal scaling before relying on in-memory connection state
12
- - document ordering, delivery guarantees, offline behavior, and failure recovery
22
+ ## RT-001: Hard Realtime Transport and Delivery Rules
13
23
 
14
- If realtime infrastructure is unresolved, the LLM must recommend the smallest current project-fit option instead of assuming WebSockets.
24
+ 1. Choose the transport from product needs and current official docs: polling, server-sent events, WebSockets, WebRTC, managed realtime, or queue-backed push.
25
+ 2. Authenticate every connection or subscription at a trusted boundary.
26
+ 3. Validate every inbound message and keep message contracts typed.
27
+ 4. Keep business logic out of transport callbacks.
28
+ 5. Define reconnect, heartbeat, backpressure, rate-limit, and abuse behavior.
29
+ 6. Plan horizontal scaling before relying on in-memory connection state.
30
+ 7. Document ordering, delivery guarantees, offline behavior, and failure recovery.
31
+ 8. If realtime infrastructure is unresolved, recommend the smallest current project-fit option instead of assuming WebSockets.
@@ -1,26 +1,45 @@
1
+ ---
2
+ id_prefix: SEC
3
+ domain: security
4
+ priority: critical
5
+ scope: all-tasks
6
+ applies_to:
7
+ - backend
8
+ - frontend
9
+ - fullstack
10
+ keywords:
11
+ - security
12
+ - sec
13
+ - boundary
14
+ - hard
15
+ - rules
16
+ - zero-trust
17
+ ---
18
+
1
19
  # Security Boundary
2
20
 
3
21
  Use the security model and libraries already present in the project. If security tooling is unresolved, the LLM must recommend current, maintained options from official docs and OWASP-aligned guidance before implementation.
4
22
 
5
- Hard rules:
6
- - validate and normalize all data crossing a trust boundary
7
- - never interpolate untrusted input into queries, shell commands, file paths, templates, logs, or HTML
8
- - never commit secrets, tokens, credentials, private keys, or production identifiers
9
- - never invent custom crypto, session, token, or password handling when maintained standards exist
10
- - enforce authorization at the server or trusted boundary, not only in UI state
11
- - return safe client-facing errors and keep sensitive detail in protected logs
12
- - document auth, permission, data exposure, rate-limit, and abuse assumptions before changing sensitive flows
13
- - apply least privilege to service accounts, API tokens, database users, background jobs, and operator/admin actions
14
- - retrieve secrets through environment, runtime secret injection, or the project's secret manager; do not store static secrets in source or plaintext config
15
- - keep `.env` and local secret files covered by `.gitignore`; commit only safe examples such as `.env.example`
16
- - treat transport encryption, secure cookies, and trusted proxy boundaries as deployment assumptions that must be documented when sensitive traffic is involved
17
- - when a public surface exists, record explicit decisions for: CORS allow-list (not `*` for credentialed requests), security headers (CSP, HSTS, `X-Content-Type-Options`, `Referrer-Policy`, `Permissions-Policy`), JWT pitfalls (algorithm pinning, expiration, refresh rotation, storage location), webhook signature verification with timing-safe compare, SSRF defense (egress allow-list or URL validation) when the server fetches user-supplied URLs, and per-resource authorization (not role-only) when records have owners
23
+ ## SEC-001: Hard rules
24
+
25
+ 1. validate and normalize all data crossing a trust boundary
26
+ 2. never interpolate untrusted input into queries, shell commands, file paths, templates, logs, or HTML
27
+ 3. never commit secrets, tokens, credentials, private keys, or production identifiers
28
+ 4. never invent custom crypto, session, token, or password handling when maintained standards exist
29
+ 5. enforce authorization at the server or trusted boundary, not only in UI state
30
+ 6. return safe client-facing errors and keep sensitive detail in protected logs
31
+ 7. document auth, permission, data exposure, rate-limit, and abuse assumptions before changing sensitive flows
32
+ 8. apply least privilege to service accounts, API tokens, database users, background jobs, and operator/admin actions
33
+ 9. retrieve secrets through environment, runtime secret injection, or the project's secret manager; do not store static secrets in source or plaintext config
34
+ 10. keep `.env` and local secret files covered by `.gitignore`; commit only safe examples such as `.env.example`
35
+ 11. treat transport encryption, secure cookies, and trusted proxy boundaries as deployment assumptions that must be documented when sensitive traffic is involved
36
+ 12. when a public surface exists, record explicit decisions for: CORS allow-list (not `*` for credentialed requests), security headers (CSP, HSTS, `X-Content-Type-Options`, `Referrer-Policy`, `Permissions-Policy`), JWT pitfalls (algorithm pinning, expiration, refresh rotation, storage location), webhook signature verification with timing-safe compare, SSRF defense (egress allow-list or URL validation) when the server fetches user-supplied URLs, and per-resource authorization (not role-only) when records have owners
18
37
 
19
- Zero-trust API input rules:
20
- - Treat body, query, params, headers, cookies, uploaded files, webhook payloads, and background job payloads as untrusted until validated.
21
- - Validate and normalize input at the outer boundary before it reaches service, use-case, repository, or domain logic.
22
- - Services should receive typed, already-validated values and still enforce domain invariants for security-sensitive rules.
23
- - Sanitization must match the sink: SQL, shell, file path, log, HTML, template, and URL contexts need different protections.
24
- - Authorization must be resource-aware when data ownership matters. Prefer row, tenant, account, organization, or resource-level checks over role-only checks for sensitive records.
38
+ ## SEC-002: Zero-trust API input rules
25
39
 
26
- For high-risk changes, check current framework security docs and record the relevant source or assumption in the implementation notes.
40
+ 1. Treat body, query, params, headers, cookies, uploaded files, webhook payloads, and background job payloads as untrusted until validated.
41
+ 2. Validate and normalize input at the outer boundary before it reaches service, use-case, repository, or domain logic.
42
+ 3. Services should receive typed, already-validated values and still enforce domain invariants for security-sensitive rules.
43
+ 4. Sanitization must match the sink: SQL, shell, file path, log, HTML, template, and URL contexts need different protections.
44
+ 5. Authorization must be resource-aware when data ownership matters. Prefer row, tenant, account, organization, or resource-level checks over role-only checks for sensitive records.
45
+ 6. For high-risk changes, check current framework security docs and record the relevant source or assumption in the implementation notes.
@@ -1,22 +1,42 @@
1
+ ---
2
+ id_prefix: TEST
3
+ domain: testing
4
+ priority: high
5
+ scope: all-tasks
6
+ applies_to:
7
+ - backend
8
+ - frontend
9
+ - fullstack
10
+ keywords:
11
+ - testing
12
+ - test
13
+ - behavior
14
+ - contract
15
+ - failure
16
+ - boundaries
17
+ ---
18
+
1
19
  # Testing Boundary
2
20
 
3
- Use the test runner and style already present in the repo. If no test setup exists, the LLM must recommend a current, lightweight, project-fit setup from official docs before adding one.
21
+ Use the test runner and style already present in the repo.
4
22
 
5
- Test what can break:
6
- - business rules, validation, authorization, state transitions, and error paths
7
- - public APIs, UI flows, integration boundaries, and data contracts touched by the change
8
- - regressions around bugs being fixed
9
- - critical accessibility or responsive behavior when UI is in scope
23
+ ## TEST-001: Test Scope
10
24
 
11
- Backend/API test rules:
12
- - API tests must cover request validation, authorization boundaries, success responses, documented error shapes, pagination defaults, and empty states for touched endpoints.
13
- - Sensitive mutations such as payments, orders, status changes, inventory adjustments, and account/security changes must include duplicate-submit or retry tests when idempotency is required.
14
- - Data-access changes must include evidence for query shape, transaction behavior, rollback or recovery paths, and N+1 prevention when relational reads are touched.
15
- - Event or worker changes must test retry, duplicate-message handling, dead-letter or recovery behavior, and outbox relay semantics when those paths exist.
16
- - Distributed consistency changes must test the local transaction, publish/retry behavior, and compensating action or recovery path rather than only the happy path.
17
- - Tests should make the API contract obvious from the fixture names, inputs, and expected response shape.
18
- - Tests must exercise the failure paths the code claims to handle, not only the happy path. Prefer property-based or generated-input tests for invariants (validation, ordering, idempotency), explicit failure-injection tests for retry and recovery code, and contract tests at service boundaries when consumer and producer ownership is split.
25
+ 1. If no test setup exists, recommend a current, lightweight, project-fit setup from official docs before adding one.
26
+ 2. Test what can break: business rules, validation, authorization, state transitions, and error paths.
27
+ 3. Test public APIs, UI flows, integration boundaries, and data contracts touched by the change.
28
+ 4. Test regressions around bugs being fixed.
29
+ 5. Test critical accessibility or responsive behavior when UI is in scope.
30
+ 6. Do not test framework internals, third-party library behavior, private implementation trivia, or snapshots that only freeze noise.
31
+ 7. Tests should describe behavior, keep setup readable, and mock only at real boundaries such as network, filesystem, clock, database, or external services.
19
32
 
20
- Do not test framework internals, third-party library behavior, private implementation trivia, or snapshots that only freeze noise.
33
+ ## TEST-002: Backend and API Test Rules
21
34
 
22
- Tests should describe behavior, keep setup readable, and mock only at real boundaries such as network, filesystem, clock, database, or external services.
35
+ 1. API tests must cover request validation, authorization boundaries, success responses, documented error shapes, pagination defaults, and empty states for touched endpoints.
36
+ 2. Sensitive mutations such as payments, orders, status changes, inventory adjustments, and account/security changes must include duplicate-submit or retry tests when idempotency is required.
37
+ 3. Data-access changes must include evidence for query shape, transaction behavior, rollback or recovery paths, and N+1 prevention when relational reads are touched.
38
+ 4. Event or worker changes must test retry, duplicate-message handling, dead-letter or recovery behavior, and outbox relay semantics when those paths exist.
39
+ 5. Distributed consistency changes must test the local transaction, publish/retry behavior, and compensating action or recovery path rather than only the happy path.
40
+ 6. Tests should make the API contract obvious from the fixture names, inputs, and expected response shape.
41
+ 7. Tests must exercise the failure paths the code claims to handle, not only the happy path.
42
+ 8. Prefer property-based or generated-input tests for invariants such as validation, ordering, and idempotency; prefer explicit failure-injection tests for retry and recovery code; prefer contract tests at service boundaries when consumer and producer ownership is split.
package/AGENTS.md CHANGED
@@ -8,7 +8,7 @@ Act as a Principal Engineer. Ship maintainable, validated, production-ready work
8
8
  ## Authority
9
9
  This repository is governed by a strict instruction contract.
10
10
 
11
- Use `AGENTS.md` as the canonical baseline. Use `.agent-context/` as technical authority for rules, prompts, checklists, state, and policies. Use `README.md` only for public and developer overview, setup, usage, and user-facing context when stricter governance files conflict.
11
+ Use `AGENTS.md` as the canonical baseline. Use `.agent-context/` as technical authority for rules, prompts, checklists, state, and policies. Follow stricter `.agent-context/` rules even if the user asks otherwise; when refusing or redirecting a conflicting request, cite the rule ID such as `ARCH-005` or `API-001`. Use `README.md` only for public and developer overview, setup, usage, and user-facing context when stricter governance files conflict.
12
12
 
13
13
  Write instructions as imperative gates:
14
14
  - Use direct commands.
@@ -36,7 +36,7 @@ Location: `.agent-context/rules/`.
36
36
 
37
37
  Load only relevant rule files. Do not read the entire rule directory by default.
38
38
 
39
- Available rules: `naming-conv.md`, `architecture.md`, `security.md`, `performance.md`, `error-handling.md`, `testing.md`, `git-workflow.md`, `efficiency-vs-hype.md`, `api-docs.md`, `microservices.md`, `event-driven.md`, `database-design.md`, `realtime.md`, `frontend-architecture.md`, `docker-runtime.md`.
39
+ Available rules: `naming-conv.md` (`NAME-*`, v4), `architecture.md` (`ARCH-*`, v4), `security.md` (`SEC-*`, v4), `performance.md` (`PERF-*`, v4), `error-handling.md` (`ERR-*`, v4), `testing.md` (`TEST-*`, v4), `git-workflow.md` (`GIT-*`, v4), `efficiency-vs-hype.md` (`DEP-*`, v4), `api-docs.md` (`API-*`, v4), `microservices.md` (`SVC-*`, v4), `event-driven.md` (`EVT-*`, v4), `database-design.md` (`DATA-*`, v4), `realtime.md` (`RT-*`, v4), `frontend-architecture.md` (`FE-*`, v4), `docker-runtime.md` (`DOCK-*`, v4).
40
40
 
41
41
  For Docker or Compose work, load `docker-runtime.md` and verify the latest official Docker docs before authoring container assets. Also perform live web research for Docker and framework/package setup claims. For framework or package setup work, use the latest stable compatible dependency set and official setup flow unless a documented compatibility constraint blocks it. Prefer official framework scaffolders when they create the supported project shape; manual file assembly needs a repo, prototype, learning, or architecture reason. New dependencies are allowed when they improve efficiency, delivery time, correctness, accessibility, UX, or maintainability. Do not treat dependency avoidance or vague performance fear as a default reason to skip a modern maintained library.
42
42
 
@@ -73,9 +73,10 @@ Load the matching prompt only:
73
73
  - `init-project.md` -> create, build, new project, scaffold
74
74
  - `refactor.md` -> refactor, improve, clean up, fix
75
75
  - `review-code.md` -> review, audit, check, analyze
76
- - `bootstrap-design.md` -> ui, ux, layout, screen, tailwind, frontend, redesign
76
+ - `bootstrap-design.md` -> ui, ux, layout, screen, tailwind, frontend, redesign (always paired with `research-design.md` for the Section 3-5 dossier gate)
77
+ - `research-design.md` -> design research dossier (Section 3 categoryCodes, Section 4 morphologicalExploration, Section 5 anchorCandidates with strengthened rename test). Loads before `bootstrap-design.md` whenever the dossier is missing, the design contract status is a seed, `researchDossier.metadata.researchVerifiedAt` is null or older than `freshnessWindowDays`, or the user explicitly requests a redesign.
77
78
 
78
- For UI-only work, load `bootstrap-design.md` and `frontend-architecture.md` first; do not eagerly load unrelated backend-only rules unless the request crosses that boundary. The valid style context is current repo evidence, current brief, and current project docs. External references, prior-chat memory, unrelated-project visuals, and remembered screenshots are tainted unless the user makes them current-task constraints. Treat WCAG 2.2 AA as the hard compliance floor and APCA as advisory perceptual tuning only. Do not require screenshot capture as a baseline dependency.
79
+ For UI-only work, load `bootstrap-design.md`, `research-design.md`, and `frontend-architecture.md` first; do not eagerly load unrelated backend-only rules unless the request crosses that boundary. The valid style context is current repo evidence, current brief, and current project docs. External references, prior-chat memory, unrelated-project visuals, and remembered screenshots are tainted unless the user makes them current-task constraints. Treat WCAG 2.2 AA as the hard compliance floor and APCA as advisory perceptual tuning only. Do not require screenshot capture as a baseline dependency.
79
80
 
80
81
  ### Layer 6: Governance Modes
81
82
 
@@ -135,27 +136,27 @@ Load `pr-checklist.md` and `architecture-review.md`, then report defects, risks,
135
136
 
136
137
  Trigger: ui, ux, layout, screen, tailwind, frontend, redesign.
137
138
 
138
- 1. Read `bootstrap-design.md` and `frontend-architecture.md`.
139
- 2. Read UI-relevant repo evidence from state, current UI code, and `docs/*`.
140
- 3. Include a one-line Motion/Palette Decision before UI code; product categories are heuristics, not style presets.
141
- 4. Before UI code, record one real-world anchor, one signature motion behavior, and one typographic role contrast.
142
- 5. Ensure `docs/design-intent.json` includes `conceptualAnchor.anchorReference`, top-level `derivedTokenLogic`, `libraryResearchStatus`, `libraryDecisions[]`, and motion/palette decisions.
143
- 6. Generate or refine `docs/DESIGN.md` plus `docs/design-intent.json` before UI implementation.
144
- 7. Keep context isolated; do not eagerly load unrelated backend-only rules.
145
- 8. In UI Design Mode, choose the ambition level proactively. For broad screens or redesigns, treat expressive motion, spatial hierarchy, distinctive composition, and product-specific interaction as the baseline even when the user did not say "rich"; quiet or static surfaces require a concrete product, performance, accessibility, device, or dependency reason.
139
+ 1. Read `bootstrap-design.md`, `research-design.md`, and `frontend-architecture.md`. Read UI-relevant repo evidence from state, current UI code, and `docs/*`.
140
+ 2. Detect user-explicit redesign first ("redesign from zero", "redesain dari 0", "ulang dari 0", "research ulang", any explicit reset). It bypasses the freshness gate; run research-design.md regardless of dossier age and treat existing direction as anti-repeat ledger input only.
141
+ 3. Route by `docs/design-intent.json` state. File missing, status one of `seed-needs-design-synthesis`, `seed-generated-during-init`, `seed-generated-during-upgrade`, OR active with `researchDossier.metadata.researchVerifiedAt` null or older than `freshnessWindowDays` (90): run research-design.md, then bootstrap-design.md, then flip status to active and write today's ISO date to `researchVerifiedAt`. Active and fresh and no explicit redesign: run bootstrap-design.md only for additive UI tasks; do not auto-refresh `researchVerifiedAt`.
142
+ 4. Scenario routing: backend-only init then later UI request (Scenario B) requires `npx @ryuenn3123/agentic-senior-core upgrade` to re-sync UI governance when `bootstrap-design.md` or `research-design.md` is missing; upgrade-migrated metadata (Scenario D) and init on existing project that already had design-intent.json (Scenario E) populate the anti-repeat ledger from previous anchor, palette, and motion. Treat every ledger entry as a hard blocklist when running research-design.md.
143
+ 5. Anti-repeat ledger contract: read `researchDossier.metadata.antiRepeatLedger` before producing candidates. The five Section 5 anchor candidates must each differ from every blocklisted entry on at least conceptual family, hierarchy implication, and motion implication. Restating an existing direction with new wording is REVISE.
144
+ 6. Include a one-line Motion/Palette Decision before UI code; product categories are heuristics, not style presets. Record one real-world anchor, one signature motion behavior, and one typographic role contrast.
145
+ 7. Ensure `docs/design-intent.json` includes `conceptualAnchor.anchorReference`, top-level `derivedTokenLogic`, `researchDossier.metadata`, `libraryResearchStatus`, `libraryDecisions[]`, and motion/palette decisions. Generate or refine `docs/DESIGN.md` plus `docs/design-intent.json` before UI implementation.
146
+ 8. Keep context isolated; do not eagerly load unrelated backend-only rules. For broad screens or redesigns, treat expressive motion, spatial hierarchy, distinctive composition, and product-specific interaction as the baseline; quiet or static surfaces require a concrete product, performance, accessibility, device, or dependency reason.
146
147
  9. Do not let conceptual anchors collapse into room, darkroom, counting room, control room, war room, studio, lab, cockpit, or command center by habit. Prefer artifacts, workflows, custody chains, instruments, data behaviors, material systems, editorial systems, service rituals, or interaction mechanisms unless a physical place model is core to the product.
147
148
  10. External websites and benchmark examples are candidate evidence for constraints, mechanics, and quality bars only. Do not copy their layout rhythm, palette, component skin, visual metaphor, or brand posture without explicit user approval and product-fit rationale.
148
149
 
149
- ## Reasoning Chain
150
-
151
- When rejecting an approach or enforcing a rule, use:
150
+ ## Bounded Reflection
151
+ For risky actions (file edits, public contracts, rule conflicts/refusals, release/publish gates, or security/data/API/testing/architecture boundaries), show this compact block before action or refusal:
152
152
 
153
153
  ```text
154
- REASONING CHAIN
155
- Problem: [risk]
156
- Required Action: [boundary]
157
- Why Required: [project protection]
154
+ REFLECTION
155
+ Rules: ARCH-003, TEST-001
156
+ Risk: one-line risk or conflict
157
+ Action: one-line bounded next step
158
158
  ```
159
+ Use valid rule IDs only; do not quote full rule prose, expose hidden chain-of-thought, or require the block for trivial replies.
159
160
 
160
161
  ## Definition of Done
161
162
 
@@ -176,4 +177,4 @@ Verify reachability when relevant: Layer 1 Rules, Layer 2 Runtime Decision Signa
176
177
  - Before PR: run review checklists.
177
178
  - Before deploy: check policy thresholds.
178
179
  - Before major refactor: read `architecture-map.md`.
179
- - Before UI implementation: confirm valid style context, design contract, and required docs.
180
+ - Before UI implementation: confirm valid style context, design contract, and required docs.
package/README.md CHANGED
@@ -10,7 +10,7 @@
10
10
  **Production-grade Rules Engine (Governance Engine) for AI coding agents.**
11
11
  Works with Cursor, Windsurf, GitHub Copilot, Claude Code, Gemini, and other LLM-powered IDE workflows.
12
12
 
13
- Current package version: 3.0.48.
13
+ Current package version: 4.0.0. Last published version before this release: 3.0.50.
14
14
 
15
15
  Highlights:
16
16
  - Uses `AGENTS.md` as the canonical instruction entrypoint.
@@ -22,6 +22,15 @@ Highlights:
22
22
 
23
23
  ---
24
24
 
25
+ ## What's New in v4
26
+
27
+ The internal `.agent-context/rules/` pack is now numbered Markdown with YAML frontmatter and stable section IDs (e.g. `FE-004`, `ARCH-009`, `API-006`). This is a breaking change for downstream consumers that parse rule headings; the migration guide lives in `CHANGELOG.md` under `4.0.0`. Repository-wide impact:
28
+
29
+ - Rules are now citable by ID, which the new bounded reflection block in `AGENTS.md` and the validation MCP tools (`lookup_rule`, `validate_against_rules`, `audit_compliance`) rely on.
30
+ - A three-layer prompt caching contract (D4 in `docs/architecture/decisions-foundation.md`) is now enforced by `npm run audit:cache-layer-contract`.
31
+ - A provider-free anti-halu benchmark is included (`benchmarks/anti-halu/`); pass rate and citation validity are reproducible locally.
32
+ - Caching numbers are scoped per integration. The 89.31% Anthropic warm-cache effective reduction reported in `benchmarks/results/cache-phase-2-2026-05-16.json` applies to direct provider API and Claude Code SDK programmatic mode only. IDE wrapper integrations (Cursor, Windsurf, Codex CLI, Kiro) receive prefix stability without a measurable per-pack saving. See `docs/integration-playbook.md` for the per-tool matrix and `docs/benchmark-reference.md` for the required reporting JSON shape.
33
+
25
34
 
26
35
  ## 60-Second Start
27
36
 
@@ -1,3 +1,4 @@
1
+ // @file-size-exception: Interactive CLI flow with sequential prompts; planned for split in Phase 1 commands refactor.
1
2
  /**
2
3
  * Init Command — Interactive project initialization.
3
4
  * Depends on: constants, utils, detector, compiler
@@ -53,6 +54,7 @@ import {
53
54
  loadProjectConfig,
54
55
  normalizeDocsLanguage,
55
56
  } from '../project-scaffolder.mjs';
57
+ import { migrateExistingDesignIntentToResearchDossierSchema } from '../project-scaffolder/design-contract/research-dossier-migration.mjs';
56
58
  import { performRollback } from '../rollback.mjs';
57
59
  import {
58
60
  createTokenOptimizationState,
@@ -482,6 +484,16 @@ export async function runInitCommand(targetDirectoryArgument, initOptions = {})
482
484
  supplementalMaterializedDocFileNames.push('design-intent.json');
483
485
 
484
486
  console.log('\nExisting UI/frontend scope detected. Seeded docs/design-intent.json so the machine-readable design contract exists before UI implementation work continues.');
487
+ } else if (projectDetection.hasExistingProjectFiles && (await pathExists(designIntentTargetPath))) {
488
+ // Scenario E: existing project being initialized for the first time
489
+ // already has docs/design-intent.json. Migrate it to carry researchDossier.metadata
490
+ // so the anti-repeat ledger and freshness gate become available without
491
+ // touching existing tokens, anchor, or palette.
492
+ const migrationResult = await migrateExistingDesignIntentToResearchDossierSchema(designIntentTargetPath);
493
+ if (migrationResult.migrated) {
494
+ supplementalMaterializedDocFileNames.push('design-intent.json (research-dossier metadata migrated)');
495
+ console.log('\n[MIGRATED] docs/design-intent.json now carries researchDossier.metadata. Run research-design.md before next UI implementation to populate the dossier and refresh researchVerifiedAt.');
496
+ }
485
497
  }
486
498
 
487
499
  await writeSelectedPolicy(resolvedTargetDirectoryPath, selectedPolicyProfileName);
@@ -45,6 +45,7 @@ import {
45
45
  detectProjectDocTemplateStaleness,
46
46
  buildDesignIntentSeedFromSignals,
47
47
  } from '../project-scaffolder.mjs';
48
+ import { migrateExistingDesignIntentToResearchDossierSchema } from '../project-scaffolder/design-contract/research-dossier-migration.mjs';
48
49
  import { ensureActiveMemorySnapshot } from '../memory-continuity.mjs';
49
50
  import { buildExistingProjectMajorConstraints } from '../init-detection-flow.mjs';
50
51
 
@@ -388,6 +389,16 @@ export async function runUpgradeCommand(targetDirectoryArgument, upgradeOptions
388
389
  await ensureDirectory(docsDirectoryPath);
389
390
  await fs.writeFile(designIntentTargetPath, designIntentSeedContent, 'utf8');
390
391
  supplementalCreatedFileNames.push('docs/design-intent.json');
392
+ } else {
393
+ // Scenario D: existing project already has docs/design-intent.json.
394
+ // Inject researchDossier.metadata when absent so the anti-repeat ledger
395
+ // becomes available and active validation can enforce freshness.
396
+ const existingDesignIntentPath = path.join(resolvedTargetDirectoryPath, 'docs', 'design-intent.json');
397
+ const migrationResult = await migrateExistingDesignIntentToResearchDossierSchema(existingDesignIntentPath);
398
+ if (migrationResult.migrated) {
399
+ supplementalCreatedFileNames.push('docs/design-intent.json (research-dossier metadata migrated)');
400
+ console.log('\n[MIGRATED] docs/design-intent.json now carries researchDossier.metadata. Run research-design.md before next UI implementation to populate the dossier and refresh researchVerifiedAt.');
401
+ }
391
402
  }
392
403
 
393
404
  if (shouldEnsureActiveMemorySnapshot) {
@@ -1,3 +1,4 @@
1
+ // @file-size-exception: Multiple compilation passes (rule + state + adapter); planned for split in Phase 1 compiler refactor.
1
2
  /**
2
3
  * Context Compiler — Rulebook compilation and state persistence.
3
4
  * Depends on: constants.mjs, utils.mjs