npm - mustflow - Versions diffs - 2.18.7 → 2.18.20 - Mend

mustflow 2.18.7 → 2.18.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/templates/default/locales/en/.mustflow/skills/routes.toml CHANGED Viewed

@@ -42,6 +42,18 @@ route_type = "primary"
 priority = 80
 applies_to_reasons = ["code_change", "behavior_change"]
+[routes."command-contract-authoring"]
+category = "workflow_contracts"
+route_type = "authoring"
+priority = 80
+applies_to_reasons = ["mustflow_config_change", "mustflow_docs_change"]
+[routes."cli-output-contract-review"]
+category = "workflow_contracts"
+route_type = "adjunct"
+priority = 65
+applies_to_reasons = ["public_api_change", "behavior_change", "docs_change"]
 [routes."facade-pattern"]
 category = "architecture_patterns"
 route_type = "primary"
@@ -108,6 +120,12 @@ route_type = "adjunct"
 priority = 75
 applies_to_reasons = ["docs_change"]
+[routes."llm-service-ux-review"]
+category = "ui_assets"
+route_type = "primary"
+priority = 65
+applies_to_reasons = ["ui_change", "product_change"]
 [routes."diff-risk-review"]
 category = "general_code"
 route_type = "adjunct"
@@ -136,7 +154,13 @@ applies_to_reasons = ["code_change", "behavior_change"]
 category = "data_external"
 route_type = "adjunct"
 priority = 45
-applies_to_reasons = ["code_change", "docs_change"]
+applies_to_reasons = ["code_change", "docs_change", "security_change"]
+[routes."cross-platform-filesystem-safety"]
+category = "data_external"
+route_type = "adjunct"
+priority = 65
+applies_to_reasons = ["code_change", "security_change", "migration_change"]
 [routes."adapter-boundary"]
 category = "data_external"
@@ -144,6 +168,12 @@ route_type = "primary"
 priority = 55
 applies_to_reasons = ["code_change", "behavior_change"]
+[routes."process-execution-safety"]
+category = "data_external"
+route_type = "primary"
+priority = 70
+applies_to_reasons = ["code_change", "behavior_change", "security_change"]
 [routes."dependency-injection"]
 category = "data_external"
 route_type = "primary"
@@ -202,7 +232,7 @@ applies_to_reasons = ["command_failure"]
 category = "security_privacy"
 route_type = "adjunct"
 priority = 40
-applies_to_reasons = ["docs_change", "security_change"]
+applies_to_reasons = ["docs_change", "security_change", "mustflow_config_change"]
 [routes."external-skill-intake"]
 category = "workflow_contracts"
@@ -276,6 +306,12 @@ route_type = "primary"
 priority = 55
 applies_to_reasons = ["release_risk", "docs_change"]
+[routes."search-ad-content-authoring"]
+category = "docs_release"
+route_type = "primary"
+priority = 60
+applies_to_reasons = ["docs_change", "copy_change", "product_change"]
 [routes."docs-prose-review"]
 category = "docs_release"
 route_type = "adjunct"

package/templates/default/locales/en/.mustflow/skills/search-ad-content-authoring/SKILL.md ADDED Viewed

@@ -0,0 +1,148 @@
+---
+mustflow_doc: skill.search-ad-content-authoring
+locale: en
+canonical: true
+revision: 3
+lifecycle: mustflow-owned
+authority: procedure
+name: search-ad-content-authoring
+description: Apply this skill when planning, writing, editing, or reviewing search-friendly, ad-supported articles, blog posts, guides, reviews, comparisons, FAQs, or evergreen content.
+metadata:
+  mustflow_schema: "1"
+  mustflow_kind: procedure
+  pack_id: mustflow.core
+  skill_id: mustflow.core.search-ad-content-authoring
+  command_intents:
+    - changes_status
+    - changes_diff_summary
+    - docs_validate_fast
+    - test_release
+    - mustflow_check
+---
+# Search Ad Content Authoring
+<!-- mustflow-section: purpose -->
+## Purpose
+Create useful, readable, search-oriented content that can support advertising layouts without keyword stuffing, thin-content filler, misleading ad placement, or unverifiable ranking and revenue claims.
+<!-- mustflow-section: use-when -->
+## Use When
+- A task asks for a blog post, article, guide, comparison, review, cost breakdown, how-to page, FAQ, glossary entry, or evergreen content intended for search traffic.
+- A task mentions search visibility, SEO, featured snippets, Google traffic, AdSense, Ezoic, Raptive, Mediavine, RPM, ad viewability, affiliate content, or monetized content layout.
+- A content draft needs paragraph structure, heading hierarchy, table or list placement, FAQ coverage, source use, image placement, internal links, or ad slot layout review.
+- A report claims that an article is search-friendly, mobile-readable, ad-friendly, snippet-ready, or aligned with a publisher monetization strategy.
+<!-- mustflow-section: do-not-use-when -->
+## Do Not Use When
+- The task is only product UI copy, release notes, README writing, legal policy text, or technical docs with no search or monetization goal; use the narrower writing or documentation skill.
+- The task asks to manipulate rankings, hide ads, mislead readers, copy competitor content, generate doorway pages, or maximize ads at the expense of user value.
+- Current Google, ad-network, legal, or policy claims are required but cannot be checked; use `source-freshness-check` and keep claims conservative.
+- The task only changes ad scripts, consent management, performance code, or analytics implementation without article content; use the relevant frontend, privacy, performance, or dependency skill.
+<!-- mustflow-section: required-inputs -->
+## Required Inputs
+- Target reader, search intent, article topic, jurisdiction or market if relevant, and the action the reader should be able to complete after reading.
+- Content type: definition, how-to, troubleshooting, comparison, cost guide, review, alternatives, checklist, buying guide, FAQ, or news-style update.
+- Known source requirements, freshness needs, original experience, product data, pricing, images, tables, calculators, affiliate disclosures, and monetization constraints.
+- Existing content style, heading conventions, article-type defaults, link policy, image policy, accessibility rules, ad layout rules, and performance constraints.
+- Title, introduction, conclusion, call-to-action, semantic markup, ad slot, and link constraints when the content will be rendered as a webpage.
+- Publishing metadata requirements such as title, summary, search tags, author, published date, updated date, canonical URL, and structured data when the site supports them.
+- Relevant command-intent contract entries for status, diff, docs, package, visual, or mustflow validation.
+<!-- mustflow-section: preconditions -->
+## Preconditions
+- The task matches the Use When conditions and does not match the Do Not Use When exclusions.
+- Required inputs are available, or missing inputs can be reported without guessing.
+- Higher-priority instructions and `.mustflow/config/commands.toml` have been checked for the current scope.
+- If the article depends on current facts, prices, policy behavior, product availability, laws, medical, legal, financial, or safety-sensitive claims, also use `source-freshness-check`.
+- If the content includes personal data, user submissions, health, finance, legal, minors, consent, tracking, affiliate disclosure, or ad personalization concerns, also use `security-privacy-review`.
+<!-- mustflow-section: allowed-edits -->
+## Allowed Edits
+- Add or revise outlines, headings, paragraphs, lists, tables, FAQs, summaries, source notes, image captions, internal links, and disclosure wording that improve reader value.
+- Adjust paragraph breaks, section order, table placement, media placement, and ad-slot separation to support mobile readability and stable ad layout.
+- Add semantic content-structure guardrails for titles, introductions, conclusions, calls to action, paragraphs, headings, image blocks, and ad-slot separation.
+- Add conservative content-quality guardrails that prevent thin filler, keyword stuffing, misleading ad adjacency, invented sources, or unsupported ranking claims.
+- Do not promise search rankings, featured snippets, approval by a specific ad network, RPM improvement, or ad-policy compliance unless verified against current authoritative sources.
+- Do not treat exact word counts, heading counts, paragraph counts, keyword positions, or FAQ counts as universal ranking formulas; use them only as project-specific editorial defaults.
+- Do not pad content solely to create more ad slots, add unrelated FAQs, or place ads where they can be mistaken for navigation, images, controls, or editorial recommendations.
+- Do not recommend delaying the reader's primary answer, using uncloseable or deceptive sticky ads, or adding visual spacers, widgets, or media solely to inflate scroll depth.
+<!-- mustflow-section: procedure -->
+## Procedure
+1. Classify the search intent. Decide whether the reader needs a quick definition, step-by-step fix, comparison, price range, recommendation, troubleshooting path, or deeper research.
+2. Check volatile monetization claims. RPM formulas, network thresholds, revenue estimates, ad-refresh behavior, traffic eligibility, and current policy rules must be sourced and dated or omitted.
+3. Shape the title, summary, and introduction around the query. Use the target phrase naturally in the title or opening when it helps clarity, then open with the direct answer, reader problem, promised outcome, and any real evidence or experience without generic throat-clearing.
+4. Build the outline around reader decisions. Use H2 and H3 sections that match real subquestions, not keyword variants created only for search coverage.
+5. Apply site-specific editorial defaults when they exist. Article-type defaults for section count, paragraph count, or paragraph length can guide editing, but they are not ranking promises and should not override completeness.
+6. Keep paragraphs mobile-readable. Prefer one to three focused sentences per paragraph, but do not split a technical idea so aggressively that meaning becomes fragmented.
+7. Use semantic content structure. Real paragraphs, headings, figures, images, captions, lists, and tables should carry the structure; avoid stacked line breaks or meaningless wrapper markup when authoring rendered article templates.
+8. Use structured elements only when they help. Tables should compare real attributes; lists should sequence actions or options; pull summaries should reduce scanning cost.
+9. Add evidence and experience. Include first-hand observations, examples, screenshots, data, source links, or methodology when available. For data-heavy claims, use the pattern: number or claim, interpretation, then limitation.
+10. Handle freshness. Dates, prices, policy behavior, product availability, screenshots, benchmarks, and network rules need a source date or conservative wording.
+11. Design ad-friendly layout without harming trust. Keep content readable around ad slots, reserve layout space where applicable, separate ads from images and controls, avoid deceptive placement, and never make ads look like menus, downloads, recommendations, or content actions.
+12. Protect performance and accessibility. Use meaningful alt text, captions when useful, explicit image dimensions, lazy loading after critical content where appropriate, and avoid layout shifts.
+13. Add internal and external navigation thoughtfully. Use a table of contents, jump links, related articles, internal links, or authoritative external source links only when they help readers verify, choose, or continue.
+14. Add FAQs only for genuine follow-up questions. Three to five concise FAQs are often enough; avoid duplicated headings, fabricated long-tail questions, or answers that repeat the body.
+15. Check publishing metadata and machine-readable article signals when the platform supports them. Keep title, summary, tags, author, dates, canonical URL, images, and structured data aligned with the article body.
+16. Check monetization-sensitive ethics. Include affiliate or sponsorship disclosure when relevant, avoid exaggerated claims, keep editorial recommendations distinct from ads, and do not hide the core answer or resource at the bottom solely to force more scrolling.
+17. Close with a clean conclusion. Summarize the decision or next step, include a useful call to action when appropriate, and do not introduce new claims in the conclusion.
+18. Check final shape. The article should have a direct answer, useful body sections, structured support, source or experience signals, clear next steps, and no filler written only for algorithms or ad inventory.
+19. Run the narrowest configured verification that covers changed content, docs, template, package, or mustflow contracts.
+<!-- mustflow-section: postconditions -->
+## Postconditions
+- The content serves the reader's search intent before optimizing for ad viewability or page length.
+- Paragraphs, headings, tables, lists, FAQs, images, links, and disclosures are purposeful and not filler.
+- The rendered article structure uses semantic blocks and avoids deceptive scroll-depth tactics.
+- Article length, section counts, paragraph counts, and keyword placement follow local editorial defaults when available, not universal SEO myths.
+- Publishing metadata and structured article signals match the visible content when the platform supports them.
+- Advertising layout considerations are separated from editorial claims and do not create deceptive or unstable UI.
+- Ranking, network approval, revenue, or policy-compliance claims are either verified, dated, or omitted.
+- Final reports separate content improvements from unverified search, ad-network, or revenue expectations.
+<!-- mustflow-section: verification -->
+## Verification
+Use configured oneshot command intents when available:
+- `changes_status`
+- `changes_diff_summary`
+- `docs_validate_fast`
+- `test_release`
+- `mustflow_check`
+Use a narrower configured prose, docs, link, accessibility, performance, visual, or package check when it better proves the changed content surface.
+<!-- mustflow-section: failure-handling -->
+## Failure Handling
+- If source freshness cannot be checked, remove or soften claims about current rankings, ad-network rules, prices, dates, or policy behavior.
+- If the draft becomes keyword-stuffed, repetitive, or ad-slot filler, shorten it and restore reader-first structure.
+- If exact length, section, paragraph, or keyword-count advice conflicts with reader intent or local style, treat the number as an editorial suggestion and report the tradeoff.
+- If a source recommends intrusive, uncloseable, deceptive, or artificially delayed monetization patterns, keep only the user-respecting layout principle and reject the tactic.
+- If ad placement conflicts with readability, accessibility, privacy, consent, or performance constraints, prioritize user trust and report the monetization tradeoff.
+- If the topic is regulated or high stakes, avoid generic advice and require authoritative sources, qualified review, or a narrower scope.
+- If verification requires external policy pages, analytics, ad-console access, or live browser inspection not available in the current environment, report the skipped check.
+<!-- mustflow-section: output-format -->
+## Output Format
+- Search and reader intent
+- Article type and outline shape
+- Title, summary, introduction, paragraph, heading, semantic markup, table, list, FAQ, image, link, metadata, structured data, conclusion, call-to-action, and disclosure checks
+- Source freshness and evidence notes
+- Ad layout, readability, performance, accessibility, and trust checks
+- Ranking, policy, revenue, or network claims omitted or verified
+- Command intents run
+- Skipped checks and reasons
+- Remaining content or monetization risk

package/templates/default/locales/en/.mustflow/skills/security-privacy-review/SKILL.md CHANGED Viewed

@@ -2,11 +2,11 @@
 mustflow_doc: skill.security-privacy-review
 locale: en
 canonical: true
-revision: 4
+revision: 7
 lifecycle: mustflow-owned
 authority: procedure
 name: security-privacy-review
-description: Apply this skill when code, configuration, docs, templates, logs, telemetry, credentials, or data flows affect secrets, personal data, authentication, authorization, retention, or external disclosure.
+description: Apply this skill when code, configuration, docs, templates, logs, telemetry, credentials, data flows, AI-generated code, authentication, authorization, network calls, dependencies, cryptography, secure transport, agent configuration, or release surfaces affect secrets, personal data, retention, or external disclosure.
 metadata:
   mustflow_schema: "1"
   mustflow_kind: procedure
@@ -31,7 +31,14 @@ Catch security, privacy, and disclosure risks introduced by ordinary code, docum
 ## Use When
 - A change touches authentication, authorization, sessions, admin behavior, tenant boundaries, personal data, secrets, tokens, credentials, API keys, or private files.
+- A change comes from AI-generated code, vibe-coded output, copied examples, or a broad assistant patch that may have optimized for the happy path without proving abuse boundaries.
 - A change adds or modifies logging, telemetry, diagnostics, receipts, reports, caches, generated state, retention, redaction, export, or external transmission.
+- A change adds external URL fetching, webhook callbacks, redirects, browser previews, remote downloads, database-as-a-service rules, security headers, CORS, CSRF handling, or rate limits.
+- A change touches cookies, JWTs, reset tokens, invite tokens, OAuth callbacks, file upload or download, browser storage, business rules, pricing, entitlements, database queries, ORM bulk operations, or deployment configuration.
+- A change touches cryptography, password hashing, token generation, random number generation, TLS/HTTPS, certificate validation, scanner gates, or a security invariant that could drift across architecture boundaries.
+- A change adds, imports, recommends, or installs third-party dependencies that may affect the software supply chain.
+- A change introduces or edits agent configuration, MCP/tool configuration, prompt files, model instructions, or repository-local rule files.
+- A change affects CI/CD workflow permissions, fork pull-request handling, build scripts, package lifecycle scripts, deployment secrets, container users, storage buckets, debug flags, or public admin, metrics, GraphQL, cache, or search endpoints.
 - Documentation, templates, examples, tests, or final reports mention sensitive data handling, privacy behavior, secret handling, or user-identifying data.
 - A diff could expose data through filenames, paths, command output, screenshots, generated artifacts, package contents, or public docs.
 - A change constructs, recommends, copies, resolves, or runs commands based on repository-controlled names, configuration, or generated reports.
@@ -51,6 +58,9 @@ Catch security, privacy, and disclosure risks introduced by ordinary code, docum
 - Changed files, diff summary, and the user goal.
 - Sensitive data, actor, trust boundary, storage, logging, retention, export, or external disclosure surfaces involved.
+- Actor, resource owner, tenant boundary, server-side authorization rule, state-changing route, external network target, dependency source, and agent/tool permission surface involved.
+- Cookie, JWT, OAuth, file upload, file download, business-value, database mutation, ORM bulk operation, CI/CD permission, deployment setting, or secret-source surface involved.
+- Cryptographic primitive, password hashing, random-token, secure transport, certificate validation, scanner gate, or security invariant involved.
 - Existing project rules for secrets, privacy, generated state, public docs, package contents, and command output.
 - Relevant command-intent contract entries for status, diff, docs, release, or mustflow validation.
 - Any repository-controlled names, paths, symlinks, command strings, environment path entries, workflow actions, or package contents that cross a trust boundary.
@@ -70,6 +80,7 @@ Catch security, privacy, and disclosure risks introduced by ordinary code, docum
 - Remove sensitive-looking sample values from docs, fixtures, templates, logs, reports, and final output when they are not required.
 - Mark unknown privacy or secret-handling behavior as unverified instead of claiming it is safe.
 - Do not invent compliance claims, privacy guarantees, secret scanning results, or audit coverage.
+- Do not treat a working UI, passing happy-path test, or generated assistant explanation as proof that authorization, privacy, dependency, or external-request boundaries are safe.
 <!-- mustflow-section: procedure -->
 ## Procedure
@@ -77,21 +88,41 @@ Catch security, privacy, and disclosure risks introduced by ordinary code, docum
 1. Identify the sensitive surface: secret, personal data, actor, permission, storage location, log, generated artifact, package file, public document, or external recipient.
 2. Decide whether the change creates, stores, reads, transforms, logs, exports, deletes, or reports sensitive information.
 3. Check whether the changed surface is public, packaged, generated, cached, retained, user-visible, or sent outside the repository boundary.
-4. Treat shell commands, copyable command text, executable names, workflow action references, publish identities, package manifests, and environment path entries as disclosure and execution surfaces, not as harmless strings.
-5. For filesystem changes, distinguish lexical containment from the real target. Check symlinks, generated state, package contents, and file APIs that may follow links before claiming a path stays inside the repository.
-6. For code-scanning alerts, group findings by root cause and rule. Fix the underlying pattern, not only the exact flagged line, and separate repository-setting alerts such as branch protection or maintainer activity from code changes.
-7. For workflow scanner alerts, check action pinning, `persist-credentials`, job-level permissions, reusable workflow permissions, artifact upload boundaries, and privileged identity timing before treating the warning as cosmetic.
-8. For pinned action references, distinguish tag objects from the commit that implements the tag. Verify pinned SHAs against the action repository so scanner tooling does not report an imposter or non-member commit.
-9. For dependency scanner alerts, separate production dependency manifests from fixtures, examples, generated test repositories, and intentionally vulnerable samples. Narrow the scan scope before treating fixture-only alerts as product vulnerabilities.
-10. Verify that examples, fixtures, screenshots, command outputs, and final reports do not expose real-looking secrets or unnecessary personal data.
-11. Prefer omission or minimal metadata over masking when the sensitive value is not needed for the user to understand the result.
-12. If the change affects an authorization or abuse boundary, activate `security-regression-tests` for test selection instead of folding test generation into this review.
-13. Run the narrowest configured verification that covers the changed docs, templates, package, or mustflow contract.
+4. Treat AI-generated code as untrusted until the protected resource, actor, ownership rule, and denied case are inspected. UI-only hiding, client-side role checks, and passing happy-path flows do not prove server-side authorization.
+5. For each read, write, update, delete, export, or admin route, confirm the server-side query or policy binds the session actor to the target resource owner, tenant, role, or capability.
+6. Do not stop at "is logged in". Separate authentication from authorization, then inspect tenant, workspace, organization, team, owner, role, and guest filters on both reads and writes.
+7. For database and ORM changes, check for unscoped `findMany`, `updateMany`, `deleteMany`, mass assignment of `role`, `price`, `ownerId`, `isPaid`, or similar privileged fields, unsafe migration defaults, and missing row-level or policy-based access controls where the platform supports them.
+8. For state-changing routes that rely on cookies or browser credentials, check CSRF, origin, CORS, same-site, and rate-limit behavior instead of assuming the framework default is active.
+9. For session and token behavior, check cookie flags, JWT verification instead of decode-only logic, expiration, issuer and audience validation, reset or invite token entropy and lifetime, server-side revocation, logout invalidation, and reauthentication before sensitive account or payment changes.
+10. For external URL, webhook, preview, redirect, download, or callback behavior, check allowlists, protocol restrictions, redirect handling, DNS/IP re-resolution, private network ranges, link-local metadata endpoints, webhook signatures, timeout limits, retry limits, and open redirect parameters such as `next` or `redirect`.
+11. For database-as-a-service, storage bucket, or realtime rules, check that server-side policies are default-deny, ownership-scoped, and not left in public read/write development mode.
+12. For input sinks, check parameterized queries, ORM binding, static command maps, output encoding, HTML/Markdown rendering boundaries, unsafe dynamic evaluation, XML/YAML/Markdown parser options, redirect and sort parameters, page-size limits, and framework escape hatches.
+13. For file upload and download, check MIME and content signatures, size limits, storage outside executable web roots, SVG/HTML/PDF rendering rules, image or document metadata, filename controls, Unicode confusion, path traversal, download authorization, and resource limits for resizing, archive extraction, or document conversion.
+14. For business logic, check that server code does not trust client-supplied prices, discounts, roles, owners, entitlement state, plan limits, usage counters, inventory, seats, refunds, credits, or coupon state. Inspect idempotency, transactions, uniqueness, and concurrent requests for repeated side effects.
+15. For secrets and logs, check hardcoded credentials, frontend bundle exposure, public versus secret key confusion, real-looking samples, raw request or session dumps, stack traces, error payloads, screenshots, receipts, generated reports, and whether leaked keys need revocation guidance.
+16. Treat shell commands, copyable command text, executable names, workflow action references, publish identities, package manifests, lifecycle scripts, Dockerfiles, and environment path entries as disclosure and execution surfaces, not as harmless strings.
+17. For dependency changes, activate `dependency-reality-check` to confirm the package is declared, real, necessary, locked when appropriate, and not an assistant-hallucinated or lookalike dependency.
+18. For agent configuration, MCP/tool setup, prompt files, external instructions, or AI context settings, activate `external-prompt-injection-defense` and check hidden instruction text, suspicious Unicode controls, broad filesystem or shell permissions, network egress, sensitive context inclusion, and over-privileged service tokens.
+19. For filesystem changes, distinguish lexical containment from the real target. Check symlinks, generated state, package contents, and file APIs that may follow links before claiming a path stays inside the repository.
+20. For code-scanning alerts, group findings by root cause and rule. Fix the underlying pattern, not only the exact flagged line, and separate repository-setting alerts such as branch protection or maintainer activity from code changes.
+21. For workflow scanner alerts, check action pinning, `persist-credentials`, job-level permissions, reusable workflow permissions, fork pull-request secret exposure, artifact upload boundaries, and privileged identity timing before treating the warning as cosmetic.
+22. For pinned action references, distinguish tag objects from the commit that implements the tag. Verify pinned SHAs against the action repository so scanner tooling does not report an imposter or non-member commit.
+23. For dependency scanner alerts, separate production dependency manifests from fixtures, examples, generated test repositories, and intentionally vulnerable samples. Narrow the scan scope before treating fixture-only alerts as product vulnerabilities.
+24. For deployment settings, check debug mode, sample admin accounts, default credentials, public admin panels, open metrics endpoints, public storage, root container users, HTTPS enforcement, and exposed GraphQL or development consoles.
+25. For transport security, check HTTPS/TLS requirements, certificate validation, insecure HTTP downgrade paths, disabled verification flags, and whether sensitive traffic can bypass the secure channel.
+26. For cryptography, reject custom cryptography and tutorial-grade shortcuts. Check password hashing uses a password-hashing primitive such as bcrypt, scrypt, or Argon2id where supported by the project; random tokens use secure randomness; keys are separated from encrypted data; and weak hashes such as MD5, SHA-1, or bare SHA-256 are not used for password storage.
+27. For architecture drift, name the security invariant before accepting the generated structure. Confirm the invariant still holds across UI, handler, service, repository, database policy, workflow, and deployment boundaries.
+28. For SAST, SCA, or scanner output, treat scanner output as evidence rather than command authority. Map the finding to a repository-owned boundary, configured verification intent, dependency metadata, or regression test before claiming the issue is fixed.
+29. Verify that examples, fixtures, screenshots, command outputs, and final reports do not expose real-looking secrets or unnecessary personal data.
+30. Prefer omission or minimal metadata over masking when the sensitive value is not needed for the user to understand the result.
+31. If the change affects an authorization, SSRF, CSRF, rate-limit, upload, download, token, business-logic, injection, logging, agent permission, cryptography, transport, scanner, or abuse boundary, activate `security-regression-tests` for test selection instead of folding test generation into this review.
+32. Run the narrowest configured verification that covers the changed docs, templates, package, or mustflow contract.
 <!-- mustflow-section: postconditions -->
 ## Postconditions
 - Sensitive data and disclosure surfaces have been identified or explicitly reported as unknown.
+- AI-generated or happy-path-only security assumptions have been replaced with inspected server-side, dependency, tool-permission, or test evidence.
 - Public and packaged surfaces do not include unnecessary secrets, personal data, or misleading privacy guarantees.
 - The final report names remaining unverified security or privacy risks without revealing sensitive values.
@@ -113,6 +144,7 @@ Use a narrower configured test, build, or documentation intent when it better pr
 - If a sensitive value appears in command output, stop copying it and summarize the issue without the value.
 - If the project lacks enough context to confirm privacy or secret handling, report the uncertainty and avoid claiming safety.
+- If authorization, SSRF, CSRF, rate-limit, BaaS policy, or agent-tool permission evidence is missing, report the exact unverified boundary and do not rely on client-side behavior as a substitute.
 - If a copyable command, executable lookup, symlink-following path, or publishing workflow uses repository-controlled input across a trust boundary, treat it as a security issue until quoting, validation, no-follow file handling, or workflow isolation is verified.
 - If a scanner reports many alerts from test fixtures or generated sample repositories, do not hide them by dismissal first. Prefer narrowing scanner inputs to the real release and runtime dependency surfaces, then document any intentionally scanned fixture exceptions.
 - If a package, generated artifact, or public doc includes sensitive data, remove or redact it before continuing unrelated work.
@@ -122,7 +154,9 @@ Use a narrower configured test, build, or documentation intent when it better pr
 ## Output Format
 - Sensitive surfaces reviewed
+- AI-generated happy-path assumptions checked
 - Disclosure or retention paths checked
+- Authorization, session, token, input, file, network, business-logic, dependency, cryptography, transport, deployment, scanner, and agent-tool boundaries checked
 - Redaction, omission, or wording changes made
 - Related security-regression test need
 - Command intents run

package/templates/default/locales/en/.mustflow/skills/security-regression-tests/SKILL.md CHANGED Viewed

@@ -2,7 +2,7 @@
 mustflow_doc: skill.security-regression-tests
 locale: en
 canonical: true
-revision: 6
+revision: 9
 lifecycle: mustflow-owned
 authority: procedure
 name: security-regression-tests
@@ -32,6 +32,9 @@ Convert security-sensitive behavior changes into safe negative tests that preser
 - Authentication, authorization, session, CSRF, rate-limit, admin, payment, credit, subscription, personal-data, or tenant-boundary behavior changes.
 - Input validation, output encoding, file upload, path handling, webhook callback, redirect, or external URL handling changes.
+- Cookie, JWT, OAuth callback, reset token, invite token, logout, reauthentication, file download, upload processing, business-rule, entitlement, pricing, inventory, database query, ORM bulk operation, or deployment-configuration behavior changes.
+- AI-generated or vibe-coded routes, data access, external fetchers, admin screens, or database rules need denied-case coverage beyond a happy-path test.
+- Cryptography, password hashing, secure randomness, HTTPS/TLS, certificate validation, scanner-gate, or security-invariant behavior changes.
 - Command construction, command recommendation, executable resolution, command-contract linting, or copy-to-clipboard command behavior changes.
 - Filesystem containment, symlink handling, package publishing, build pipeline, or release automation behavior changes.
 - A bug fix closes an abuse case and the fix needs a regression test to prevent reintroduction.
@@ -81,16 +84,31 @@ Convert security-sensitive behavior changes into safe negative tests that preser
 1. Identify the protected boundary: actor, resource, operation, trust boundary, and expected defensive outcome.
 2. Classify the abuse case using project-specific facts, not broad labels alone:
    - unauthorized actor or cross-tenant access
+   - BOLA/IDOR-style object access where the resource identifier is valid but belongs to another actor or tenant
    - invalid ownership or privilege escalation
+   - UI-only admin gating without server-side role, owner, or capability enforcement
+   - authentication-only checks that omit owner, tenant, workspace, organization, team, or capability constraints
+   - unsafe session or token handling such as decode-only JWT checks, missing expiry, missing issuer or audience validation, missing logout revocation, or missing reauthentication before sensitive changes
    - unsafe input shape, size, encoding, path, or MIME mismatch
+   - unsafe sort, redirect, pagination, parser, Markdown, XML, YAML, or template input that reaches a query, file path, HTML, or command boundary
    - unsafe output rendering or serialization
+   - file upload or download authorization, content-type, signature, size, filename, metadata, web-root, or conversion resource-limit failure
    - unsafe external URL, callback, redirect, or server-side request target
+   - SSRF-style private network, localhost, link-local metadata, redirect, or DNS re-resolution target
+   - missing webhook signature validation or unsafe retry behavior for external callbacks
+   - CSRF-style state change that relies on browser credentials without an origin, token, or same-site boundary
+   - missing rate limit or lockout on login, signup, token reset, invitation, webhook, or expensive generation endpoints
+   - client-supplied price, discount, role, owner, entitlement, plan, inventory, seat, refund, coupon, or usage value trusted by the server
+   - ORM mass assignment, unscoped `findMany`, `updateMany`, `deleteMany`, unsafe migration default, or missing database policy enforcement
    - unsafe shell command construction, command name interpolation, clipboard command output, or executable lookup
    - filesystem escape through symlinks, path traversal, archive entries, generated state, or package contents
    - mismatch between two validators, linters, dashboards, schemas, or release gates that claim the same policy
    - release or package-publishing pipeline code execution before artifact publication
    - incomplete escaping, quoting, encoding, or sanitization where the safe behavior can be asserted without invoking a real shell or network target
    - stack trace or internal error exposure through a user-visible API, report, dashboard, or command output
+   - insecure password storage, custom cryptography, weak hash use, insecure randomness, or predictable reset or invite tokens
+   - disabled certificate validation, insecure HTTP downgrade, or missing HTTPS enforcement for sensitive traffic
+   - architecture drift where a refactor preserves the happy path but drops a security invariant across a layer boundary
    - workflow permission drift, mutable action references, wrong pinned-action object type, dependency scan overreach, or artifact credential leakage that can be checked through repository-local workflow tests or linters
    - payment, credit, coupon, subscription, refund, or entitlement abuse
    - personal-data or admin-only access leakage
@@ -98,19 +116,32 @@ Convert security-sensitive behavior changes into safe negative tests that preser
    - missing capability or scoped permission object where a sensitive operation depends on broad user, role, or global authorization state
    - missing invariant policy where a sensitive state change could violate a non-negotiable rule such as last-owner, entitlement, paid-order, refund, or retention constraints
    - missing idempotency key, action ledger, or outbox/inbox record where repeated execution of a side effect could charge, refund, notify, grant, revoke, publish, or delete more than once
+   - exposed debug, admin, metrics, storage, GraphQL, development console, root container user, default credential, or fork pull-request secret path that can be checked locally
 3. Search for existing tests that already cover the same boundary. Strengthen the existing test when that gives clearer coverage than adding a new one.
 4. Build the smallest safe negative test data: at least one allowed control case when useful, and one denied case that proves the boundary rejects the abuse condition.
-5. For parser, validator, serializer, path, command, or workflow boundaries, consider a bounded property-based or fuzz-style regression when the invariant is clearer than a list of hand-written examples. Keep generators local, deterministic under the test runner, size-limited, and focused on the defensive invariant.
-6. When adding a fuzzing or property-based testing dependency, keep dependency metadata, lockfiles, test selection rules, and package tests synchronized. Prefer an existing project dependency when it can express the invariant cleanly.
-7. Use mocks or local fakes for external requests, uploads, redirects, webhooks, payment providers, file systems, shell commands, package registries, and CI workflows. Do not contact live suspicious endpoints or publish real artifacts.
-8. Name the test after the defensive expectation, such as `cannot_read_other_users_invoice` or `rejects_private_network_callback_url`.
-9. Keep assertions tied to observable behavior: status code, returned error shape, unchanged database state, missing side effect, sanitized output, rejected job, or invariant preserved for all generated cases.
-10. Avoid dumping long exploit strings into the test. Use minimal representative inputs or generated values that prove the validation or boundary rule without becoming an offensive payload corpus.
-11. For command and filesystem boundaries, assert the denied side effect directly: no injected command appears in a runnable recommendation, no repository-local shim is executed, no background shell pattern is counted runnable, no symlink target outside the root is read or written.
-12. For plan/apply, capability, invariant, time, and idempotency boundaries, assert the safety contract directly: planning produces no side effect, commit rejects stale or unauthorized capability, invalid transitions preserve state, injected time controls expiry, and repeated side-effect keys do not repeat the effect.
-13. For workflow scanner fixes, prefer repository-local assertions for durable contracts: action references are pinned to commit SHAs or digest-pinned containers, privileged permissions are job-scoped, deployment or scanner jobs can be manually rerun when useful, and dependency scans exclude fixture-only manifests unless intentionally included.
-14. For scanner-driven fixes, include a regression only when the rule reflects a durable project contract. Do not add brittle tests that merely assert the scanner's current wording, line number, or severity.
-15. If the project lacks enough context to write a deterministic test, output a concrete test proposal instead of inventing fixtures or behavior.
+5. For ownership and tenant boundaries, use two actors and two resources. Prove that the valid owner succeeds and the non-owner fails for the same resource identifier shape.
+6. For SSRF and redirect boundaries, use local fake resolvers or request adapters and assert that private, loopback, link-local, metadata, unsupported protocol, and redirect-to-denied targets are rejected without making live network calls.
+7. For CSRF and browser-credential state changes, assert that the mutating operation rejects missing or mismatched token, origin, or same-site evidence according to the project framework.
+8. For rate limits and lockouts, use injected time, local stores, or fake counters to prove repeated attempts are bounded without slowing the suite.
+9. For session, JWT, OAuth, reset, invite, logout, or reauthentication boundaries, assert the denied condition directly: invalid signature, expired token, wrong issuer, wrong audience, missing state, revoked token, reused token, or missing recent authentication.
+10. For upload and download boundaries, use local fixture files and fake storage. Assert authorization, content signature, MIME, size, filename, path, metadata stripping, and conversion resource-limit behavior without using live user files.
+11. For business-rule boundaries, use server-side fixtures that try manipulated price, discount, role, owner, entitlement, plan, inventory, seat, refund, coupon, or usage fields. Assert that state remains unchanged or is recalculated from trusted server data.
+12. For database and ORM boundaries, assert scoped queries or policies through observable behavior: cross-tenant rows stay invisible, bulk update or delete affects only owned rows, mass-assigned privileged fields are ignored, and unsafe migration defaults cannot create elevated access.
+13. For cryptography and token-generation boundaries, assert behavior through the project-owned API rather than hard-coding private implementation details: password verifiers reject plaintext or fast-hash storage, token generation uses injected secure randomness or a deterministic test double, and custom cryptography shortcuts are absent where the project exposes that decision.
+14. For transport-security boundaries, assert configuration rejects disabled certificate validation or insecure HTTP for sensitive endpoints when the project owns that configuration.
+15. For architecture-drift boundaries, write the test around the security invariant, not the refactor shape: unauthorized access stays denied, sensitive output stays omitted, and side effects remain scoped after the generated structure changes.
+16. For parser, validator, serializer, path, command, or workflow boundaries, consider a bounded property-based or fuzz-style regression when the invariant is clearer than a list of hand-written examples. Keep generators local, deterministic under the test runner, size-limited, and focused on the defensive invariant.
+17. When adding a fuzzing or property-based testing dependency, keep dependency metadata, lockfiles, test selection rules, and package tests synchronized. Prefer an existing project dependency when it can express the invariant cleanly.
+18. Use mocks or local fakes for external requests, uploads, redirects, webhooks, payment providers, file systems, shell commands, package registries, and CI workflows. Do not contact live suspicious endpoints or publish real artifacts.
+19. Name the test after the defensive expectation, such as `cannot_read_other_users_invoice` or `rejects_private_network_callback_url`.
+20. Keep assertions tied to observable behavior: status code, returned error shape, unchanged database state, missing side effect, sanitized output, rejected job, or invariant preserved for all generated cases.
+21. Avoid dumping long exploit strings into the test. Use minimal representative inputs or generated values that prove the validation or boundary rule without becoming an offensive payload corpus.
+22. For command and filesystem boundaries, assert the denied side effect directly: no injected command appears in a runnable recommendation, no repository-local shim is executed, no background shell pattern is counted runnable, no symlink target outside the root is read or written.
+23. For plan/apply, capability, invariant, time, and idempotency boundaries, assert the safety contract directly: planning produces no side effect, commit rejects stale or unauthorized capability, invalid transitions preserve state, injected time controls expiry, and repeated side-effect keys do not repeat the effect.
+24. For workflow scanner fixes, prefer repository-local assertions for durable contracts: action references are pinned to commit SHAs or digest-pinned containers, privileged permissions are job-scoped, fork pull requests do not receive secrets, deployment or scanner jobs can be manually rerun when useful, and dependency scans exclude fixture-only manifests unless intentionally included.
+25. For deployment and configuration fixes, prefer local config assertions: debug flags are off for production, sample credentials are absent, public admin or metrics endpoints are not enabled by default, storage is not public, containers do not run as root when the project controls that setting, and HTTPS requirements are preserved.
+26. For scanner-driven fixes, include a regression only when the rule reflects a durable project contract. Do not add brittle tests that merely assert the scanner's current wording, line number, or severity.
+27. If the project lacks enough context to write a deterministic test, output a concrete test proposal instead of inventing fixtures or behavior.
 <!-- mustflow-section: postconditions -->
 ## Postconditions

package/templates/default/locales/en/.mustflow/skills/ui-quality-gate/SKILL.md CHANGED Viewed

@@ -2,7 +2,7 @@
 mustflow_doc: skill.ui-quality-gate
 locale: en
 canonical: true
-revision: 3
+revision: 6
 lifecycle: mustflow-owned
 authority: procedure
 name: ui-quality-gate
@@ -34,11 +34,14 @@ Keep user-facing interfaces usable, minimal, accessible, responsive, localizatio
 - A task asks for UI polish, layout, responsive behavior, accessibility, visual states, language switching, labels, or interaction feedback.
 - A report claims that UI text fits, controls are understandable, language updates apply, or a page renders correctly.
 - A change could add explanatory, marketing-like, decorative, duplicate, invented, or non-actionable UI content.
+- AI-generated or vibe-coded UI needs review for predictable conventions, visual hierarchy, mobile usability, touch targets, component boundaries, and interaction feedback.
+- A repeated AI-editing loop may have introduced style drift, duplicated state, missing edge cases, undeclared UI dependencies, or oversized components.
 <!-- mustflow-section: do-not-use-when -->
 ## Do Not Use When
 - The task changes only backend logic, CLI output, metadata, or documentation with no user-facing UI surface.
+- The task is specifically about conversational AI, chat, copilot, prompt, multimodal input, streaming generation, citations, feedback, or conversation history; use `llm-service-ux-review`.
 - The task is only image asset conversion; use `web-asset-optimization` for that part.
 - The UI change cannot be rendered or inspected in the current environment; report the inspection gap instead of claiming visual verification.
@@ -48,6 +51,9 @@ Keep user-facing interfaces usable, minimal, accessible, responsive, localizatio
 - The changed UI surface, user task, and expected interaction path.
 - Existing design patterns, task-essential controls, labels, states, accessibility conventions, and localization rules in the same area.
 - Viewports, themes, languages, and state combinations that need inspection.
+- The target devices and interaction style, including mobile-first behavior, pointer or touch input, expected keyboard use, and any project breakpoint or design-token conventions.
+- Existing design-token, component, data, state, dependency, and accessibility contracts that the changed UI must preserve.
+- Any high-risk widget involved, such as toast notifications, tree views, editable grids, drag-and-drop, custom selects, comboboxes, dialogs, or virtualized lists.
 - Performance, asset-size, animation, or network constraints that affect the changed surface.
 - Relevant command-intent contract entries for status, diff, docs, build, release, or mustflow validation.
@@ -64,7 +70,10 @@ Keep user-facing interfaces usable, minimal, accessible, responsive, localizatio
 - Add, remove, or refine UI controls, labels, states, layout constraints, localization hooks, and accessibility attributes when they support the user's real task.
 - Remove decorative, explanatory, invented, or marketing-like UI content that does not help the user act on real data.
 - Prefer existing component patterns and stable dimensions over new visual systems.
+- Add subtle interaction feedback only when it clarifies state, confirms action, or improves perceived responsiveness without harming reduced-motion users.
+- Add a small intermediate UI contract for complex surfaces before implementation: view tree, data inputs, user actions, state transitions, visual tokens, and verification targets.
 - Do not claim a UI is visually verified without an actual render, screenshot, DOM inspection, or clear reason that visual verification was unavailable.
+- Do not add undeclared packages, invented component APIs, ad hoc style scales, or framework-specific patterns that conflict with the current project.
 <!-- mustflow-section: procedure -->
 ## Procedure
@@ -72,23 +81,33 @@ Keep user-facing interfaces usable, minimal, accessible, responsive, localizatio
 1. Identify the real user task and the UI surface that supports it.
 2. Check nearby UI patterns before adding new layout, component, color, copy, or state conventions.
 3. Keep task-essential controls only. Remove or avoid non-essential welcome text, feature summaries, decorative cards, fake metrics, marketing copy, invented filters, and controls that do not operate on real data.
-4. Verify controls are understandable and state-aware: icon buttons need accessible names or tooltips, destructive or state-changing actions need clear labels, selected or disabled states need clear visual treatment, and disabled states need a visible reason when useful.
-5. Check keyboard and focus behavior before visual polish: native elements first, tab order, focus order and return, visible focus state, names for icon-only controls, form error linkage, live status announcements, reduced-motion handling, and sufficient contrast.
-6. Check accessible names and states against the actual interaction model, not only the rendered text. Dynamic controls must expose the current expanded, selected, checked, invalid, busy, or disabled state when applicable.
-7. Check form error and empty-state behavior. Errors should point to the field or action that needs attention, and empty states should be short and action-oriented rather than explaining the product.
-8. Check localization-safe labels: language switching, fallback text, placeholders, plural or formatted values, long translated labels, bidirectional text, logical spacing, and date, time, number, currency, or unit display where applicable.
-9. Check responsive layout without text overlap: text should not overflow, clip, overlap, resize fixed-format controls unexpectedly, or depend on viewport-width font scaling.
-10. Check performance and asset-size awareness when the change adds images, icons, animation, third-party UI code, large client data, or extra network work. Prefer existing assets and bounded rendering cost.
-11. Check state coverage: loading, empty, error, saved, changed, disabled, selected, focused, and language-switched states should update consistently where applicable.
-12. Inspect responsive and localization-sensitive surfaces when the change affects layout or translated text.
-13. Use visual verification only when a configured one-shot command or approved browser workflow exists for the surface. Do not start development servers, watchers, or browser sessions directly from the skill.
-14. Run the narrowest configured verification that covers the changed UI, documentation, package, or mustflow contract.
+4. Check predictability and visual hierarchy. Follow familiar platform or product conventions, make the next likely action visible, and use spacing, size, weight, grouping, and order to make the primary task easier to scan.
+5. Check responsive and touch ergonomics. Prefer mobile-first layout decisions, preserve readable spacing at small widths, keep touch targets and gaps usable, and follow existing breakpoint or design-token conventions instead of inventing one-off sizes.
+6. Verify controls are understandable and state-aware: icon buttons need accessible names or tooltips, destructive or state-changing actions need clear labels, hover, active, selected, loading, and disabled states need clear visual treatment, and disabled states need a visible reason when useful.
+7. Check keyboard and focus behavior before visual polish: native elements first, semantic landmarks when they clarify page structure, tab order, focus order and return, visible focus state, names for icon-only controls, form error linkage, live status announcements, reduced-motion handling, and sufficient contrast.
+8. Check accessible names and states against the actual interaction model, not only the rendered text. Dynamic controls must expose the current expanded, selected, checked, invalid, busy, or disabled state when applicable.
+9. Check form validation, error, and empty-state behavior. Validate close to the field when useful, place errors next to the action or input that needs attention, preserve user input after failure, and keep empty states short and action-oriented rather than explaining the product.
+10. Check interaction feedback. Loading, skeleton, saving, success, failure, toast, inline message, or micro-interaction feedback should map to real state and should not distract from the task or hide a slow operation.
+11. Check localization-safe labels: language switching, fallback text, placeholders, plural or formatted values, long translated labels, bidirectional text, logical spacing, and date, time, number, currency, or unit display where applicable.
+12. Check responsive layout without text overlap: text should not overflow, clip, overlap, resize fixed-format controls unexpectedly, or depend on viewport-width font scaling.
+13. Check style drift. Repeated AI edits should not create one-off spacing, color, radius, typography, shadow, or inline-style variants when an existing token, utility, or component variant already covers the need.
+14. Check state architecture. Async UI should cover the relevant idle, loading, success, empty, error, retrying, and stale-data states without duplicating state variables or leaving race-prone updates after unmount.
+15. Check component boundaries. Reusable UI pieces should be small enough to maintain consistent states and accessibility, but not split into wrappers that obscure the user task or duplicate design rules.
+16. Check dependency and API reality. Imported UI packages, generated helpers, component props, browser APIs, and event contracts must exist in the project or be handled through the dependency workflow before code relies on them.
+17. Check high-risk widgets. Toasts need pauseable timing and appropriate status announcements; tree views need composite keyboard behavior; editable grids need navigation and editing modes; custom selects, dialogs, and comboboxes need proven accessibility patterns or an existing library.
+18. Check performance and asset-size awareness when the change adds images, icons, animation, third-party UI code, large client data, or extra network work. Prefer existing assets, lazy loading when appropriate, explicit image dimensions, and bounded rendering cost.
+19. Check state coverage: loading, empty, error, saved, changed, disabled, selected, focused, hover, active, validating, and language-switched states should update consistently where applicable.
+20. For complex surfaces, write or confirm a compact UI contract before broad implementation: view tree, data contract, interaction model, state model, design-token contract, and verification targets.
+21. Inspect responsive and localization-sensitive surfaces when the change affects layout or translated text.
+22. Use visual verification only when a configured one-shot command or approved browser workflow exists for the surface. Do not start development servers, watchers, or browser sessions directly from the skill.
+23. Run the narrowest configured verification that covers the changed UI, documentation, package, or mustflow contract.
 <!-- mustflow-section: postconditions -->
 ## Postconditions
 - The UI supports the user's task without unnecessary explanatory or decorative surface.
-- Important controls, labels, states, keyboard and focus paths, layout constraints, localization updates, and performance-sensitive assets are checked or reported as unverified.
+- Important controls, labels, states, visual hierarchy, touch ergonomics, keyboard and focus paths, layout constraints, localization updates, and performance-sensitive assets are checked or reported as unverified.
+- AI-generated changes preserve existing style tokens, component boundaries, state contracts, dependency reality, and high-risk widget accessibility expectations.
 - Final reports distinguish code-level verification from visual or interactive verification.
 <!-- mustflow-section: verification -->
@@ -120,7 +139,8 @@ Use a narrower configured test, build, browser, screenshot, or accessibility int
 - UI surface reviewed
 - User task and states checked
 - Task-essential controls kept or removed
-- Layout, keyboard and focus, accessibility, localization, performance, and asset-size checks
+- Visual hierarchy, responsive layout, touch ergonomics, keyboard and focus, accessibility, localization, performance, and asset-size checks
+- Interaction feedback, style drift, state architecture, dependency, high-risk widget, and component-boundary checks
 - Decorative or unnecessary UI avoided or removed
 - Command intents run
 - Skipped visual checks and reasons