npm - kushi-agents - Versions diffs - 4.7.4 → 4.8.1 - Mend

kushi-agents 4.7.4 → 4.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "kushi-agents",
-  "version": "4.7.4",
+  "version": "4.8.1",
   "description": "Install Kushi — multi-source project evidence agent with snapshot+stream capture across Email, Teams, OneNote, Loop, SharePoint, Meetings, CRM, ADO. WorkIQ-only for M365 sources (Graph / m365_* FORBIDDEN as fallbacks; user-paste is first-class). Host-agnostic.",
   "type": "module",
   "bin": {
@@ -42,6 +42,7 @@
   "license": "MIT",
   "scripts": {
     "test": "node --test src/check-workiq.test.mjs src/seed-config.test.mjs src/sanitize-workiq-input.test.mjs src/detect-vertex-repo.test.mjs src/vertex-validate.test.mjs src/emit-vertex.e2e.test.mjs src/config-root-resolve.test.mjs src/forbidden-workiq-phrasings.test.mjs",
+    "test:integration:bootstrap": "node src/bootstrap-dryrun.integration.test.mjs",
     "smoke": "node scripts/smoke.mjs",
     "prepublishOnly": "npm test && npm run smoke"
   },

package/plugin/instructions/customer-hint-discovery.instructions.md ADDED Viewed

@@ -0,0 +1,130 @@
+---
+applyTo: "**/skills/bootstrap-project/**, **/skills/pull-email/**, **/skills/pull-teams/**, **/skills/pull-meetings/**, **/skills/pull-sharepoint/**, **/skills/refresh-project/**"
+description: "Customer-hint discovery sweep — bootstrap MUST attempt WorkIQ-driven discovery for every source whose boundary is empty before declaring blocked-config. Mirrors crm-bootstrap-discovery + loop-bootstrap-discovery pattern. Kushi v4.8.0+."
+---
+# Customer-hint discovery sweep (HARD RULE — kushi v4.8.0+)
+## The defect this rule exists to prevent
+Bootstrap runs that scaffold an empty `<project>/integrations.yml#boundaries.*` and immediately declare every source `blocked-config` — without ever asking WorkIQ "who/what mentions this customer?". The result: a fresh project bootstrap finishes with 0 evidence pulled, the user is told to hand-populate mailboxes / chat IDs / channel IDs / meeting join URLs / SharePoint sites, and the entire value proposition of "bootstrap a project from a customer hint" collapses to a config form. Discovered 2026-05-26 on the HCA bootstrap — every source row read `blocked-config` despite extensive HCA email / Teams / meeting history in the tenant.
+## The rule
+Before `bootstrap-project` (or `refresh-project` on a first pull) is allowed to write `last_status: blocked-config` for **email, teams, meetings, or sharepoint**, the per-source customer-hint discovery sweep defined in the matching doctrine MUST be attempted with the customer hint + lookback window:
+| Source | Doctrine file (mandatory) |
+|---|---|
+| email | `email-bootstrap-discovery.instructions.md` |
+| teams | `teams-bootstrap-discovery.instructions.md` |
+| meetings | `meetings-bootstrap-discovery.instructions.md` |
+| sharepoint | `sharepoint-bootstrap-discovery.instructions.md` |
+| onenote | `bootstrap-project/SKILL.md#step-4a` (already shipped v4.7.x — display-name driven) |
+| loop | `loop-bootstrap-discovery.instructions.md` (already shipped v4.6.0) |
+| crm | `crm-bootstrap-discovery.instructions.md` (already shipped v3.11.0) |
+| ado | `ado-bootstrap-discovery.instructions.md` |
+`blocked-config` is ONLY legitimate when:
+1. **The sweep ran** and returned 0 candidates → status is `unresolved` (sweep succeeded but no hits), not `blocked-config`. Use `discovery-empty` annotation in the per-source notes.
+2. **The sweep COULD not run** because a prerequisite is genuinely missing (e.g. CRM/ADO need `<workspace>/.kushi/config/shared/integrations.yml` populated, SharePoint local-folder enumeration needs a local OneDrive sync path the customer hint cannot infer). In that case, `blocked-config` is correct, and the per-source `next_step` MUST cite the specific missing config field.
+Any other path that writes `blocked-config` without attempting the sweep is a **defect**.
+## Required inputs
+- `<customer-hint>` — verbatim string the user provided at bootstrap invocation (e.g. `HCA`). Captured by `bootstrap-project` Step 0/1 and persisted to `<project>/bootstrap-status.md` under `Customer Hint:`. Used VERBATIM in v4.8.0 — no fuzzy expansion (deferred to v4.9.0).
+- `<lookback-days>` — defaults to **90** for discovery (longer than the 30-day pull window so historical chats / series still surface). Configurable via `<workspace>/.kushi/config/user/m365-mutable.json#bootstrap.discoveryLookbackDays`.
+- `<project>` — engagement name (already resolved).
+- `<alias>` — current contributor.
+## Required outputs
+For each source whose sweep runs:
+1. **Append discovered IDs / URLs / paths to `<engagement-root>/<project>/integrations.yml#boundaries.<source>.<key>`** as plain strings (existing pull-* skills consume strings — do NOT change the array element type). Idempotent: deduplicate by exact-string equality.
+2. **Write a sidecar discovery record to `<engagement-root>/<project>/Evidence/_discovery/<YYYY-MM-DD>_<source>_discovery.yml`** with the per-row metadata (`discovered_by`, `discovered_at`, `needs_review`, `query`, `request_id`, `confidence`). Schema:
+   ```yaml
+   source: email | teams | meetings | sharepoint
+   project: <project>
+   customer_hint: '<hint>'
+   lookback_days: 90
+   discovered_at: '<ISO-8601>'
+   discovered_by: <alias>
+   query: '<exact WorkIQ prompt>'
+   workiq_request_id: '<request-id from response>'
+   total_candidates_found: <N>
+   candidates_persisted: <M>     # ≤ 10 after cap
+   candidates_deferred: <N - M>  # written to OPEN-QUESTIONS-DRAFT.md
+   results:
+     - value: '<string written to boundary>'
+       label: '<human-readable label>'
+       confidence: high | medium | low
+       needs_review: true
+   ```
+3. **If `total_candidates_found > 10`**, persist the top 10 by recency to the boundary and append the remaining N–10 to `<project>/OPEN-QUESTIONS-DRAFT.md` under `## Discovery sweep — candidates over cap` with one row per candidate.
+4. **Add a `## Discovery Sweep Results` section to `<project>/bootstrap-status.md`** after `## Context Artifact Status`. Table shape:
+   ```
+   ## Discovery Sweep Results
+   | Source | Hint | Query attempted | Candidates found | Persisted | Deferred | Discovered by |
+   |---|---|---|---|---|---|---|
+   | email | HCA | "In my Inbox..." | 7 | 7 | 0 | ushak |
+   | teams | HCA | "In my Teams chats..." | 14 | 10 | 4 | ushak |
+   | meetings | HCA | "In my calendar..." | 3 | 3 | 0 | ushak |
+   | sharepoint | HCA | "In my SharePoint sites..." | 0 | 0 | 0 | ushak |
+   ```
+## Behavior matrix (per source)
+| Sweep result | `boundaries.<source>.<key>` written? | `last_status` | `retry_signal` | Open Questions written? |
+|---|---|---|---|---|
+| 0 candidates | no | `unresolved` (annotated `discovery-empty`) | `user-action` | yes — "Discovery sweep for `<source>` returned 0 hits for `<hint>`; widen hint or seed `boundaries.<source>.<key>` manually" |
+| 1–10 candidates | yes (all) | `completed-with-coverage-gaps` (because rows are `needs_review`) | `watch` | only if any row low-confidence |
+| > 10 candidates | yes (top 10 by recency) | `completed-with-coverage-gaps` | `watch` | yes — "Discovery sweep returned >10 candidates; review the deferred list" |
+| **WorkIQ punted — no surface for this source** (sharepoint sites, teams channels — v4.8.1 empirical) | no | `unresolved` (annotated `discovery-empty-no-workiq-surface`) | `user-action` | yes — cite that this source has no known WorkIQ discovery surface and direct the user to manual configuration (the per-source doctrine has the exact wording) |
+| Sweep query failed (WorkIQ error / classified per `fallback-status-reporting.instructions.md`) | no | `deferred` (write marker per `deferred-retry-on-workiq-fail.instructions.md`) | `retry` | no — next refresh will drain |
+| Prerequisite genuinely missing (e.g. CRM shared config empty) | no | `blocked-config` | `user-action` | yes — cite specific missing field |
+## Multi-contributor safety
+When the boundary already has rows from another alias (or the sidecar `Evidence/_discovery/` already has entries):
+1. **Append-only** — the new alias's sweep adds rows that are not already present (dedupe by exact-string equality on the boundary value). Never remove or rewrite another alias's row.
+2. **Sidecar file is per-source-per-date** — multiple contributors on the same day produce separate sidecar files (`2026-05-26_email_discovery-ushak.yml`, `2026-05-26_email_discovery-stand.yml`). Bootstrap-status's Discovery Sweep Results table shows one row per source × alias.
+3. **`Discovered by` column in bootstrap-status's per-source Context Artifact Status row** cites the most recent discovering alias. Preserve other aliases' rows in `## Contributors who have bootstrapped this project` per `multi-user-shared-files.instructions.md`.
+## Rerun behavior
+When the user re-runs bootstrap on a project that already has populated boundaries:
+| Boundary state | Sweep behavior |
+|---|---|
+| Empty | Run sweep (full discovery). |
+| Has rows, none `needs_review: true` (all confirmed) | **Skip sweep.** Boundary is gospel — do not re-discover. |
+| Has rows, some `needs_review: true` (sidecar shows confidence < high) | Run sweep. Merge new candidates by ID. Promote previously-discovered candidates from `needs_review: true` → `false` only if user has manually confirmed (i.e. removed the `needs_review` flag from the sidecar). |
+| User passes `--force-rediscover` | Run sweep regardless of state. Merge new candidates; never delete user-confirmed rows. |
+## Forbidden behaviors
+1. **Declaring `blocked-config` without running the sweep first** for email/teams/meetings/sharepoint — see "The defect" above. CRM/ADO have their own mandates in `crm-bootstrap-discovery` / `ado-bootstrap-discovery`.
+2. **Auto-narrowing the customer hint** ("HCA" → "HCA Healthcare Inc"). Use the hint verbatim. Smart-expansion is v4.9.0.
+3. **Discovering across all sources in one mega-query.** Each source has its own narrow WorkIQ prompt (see per-source doctrines). Mega-queries punt to Graph and return empty.
+4. **Inferring local OneDrive sync paths from the hint** for SharePoint `local_folders[]`. Discovery populates `site_urls[]` only.
+5. **Calling Graph / `m365_*` directly for discovery.** Per `workiq-only.instructions.md` the four sources covered here use WorkIQ exclusively. The only allowed `m365_*` exceptions remain `m365_list_chat_messages` (parallel structured dump per pull-teams) and the per-source carve-outs already named in their pull-* SKILLs.
+## References
+- `crm-bootstrap-discovery.instructions.md` — the original "must-attempt-before-declaring-disabled" doctrine; this file mirrors its pattern for the WorkIQ-driven sources.
+- `loop-bootstrap-discovery.instructions.md` — Loop-specific discovery + registry shape.
+- `scope-boundaries.instructions.md` — the broader partial-determinism contract; this doctrine is the discovery-time enabler that makes boundaries achievable.
+- `workiq-only.instructions.md` — what discovery is NOT allowed to call (Graph, m365_* for content).
+- `status-taxonomy.instructions.md` — the closed-set status vocabulary; `unresolved` + `discovery-empty` annotation vs. `blocked-config`.
+- `fallback-status-reporting.instructions.md` — how to classify WorkIQ punts during the sweep.
+- `multi-user-shared-files.instructions.md` — append-only rules for the boundary file.
+- `bootstrap-status-format.instructions.md` — where `## Discovery Sweep Results` slots in the report.

package/plugin/instructions/email-bootstrap-discovery.instructions.md ADDED Viewed

@@ -0,0 +1,105 @@
+---
+applyTo: "**/skills/bootstrap-project/**, **/skills/pull-email/**, **/skills/refresh-project/**"
+description: "Outlook mail folder discovery — single approved WorkIQ phrasing that scans recent Inbox/subfolder contents for the customer hint, ranks folders by hit-density, writes top candidates into mailboxes[]. Source-specific subset of customer-hint-discovery."
+---
+# Email bootstrap discovery (kushi v4.8.0+)
+Governed by `customer-hint-discovery.instructions.md` — read that file first for the orchestration contract, rerun rules, multi-contributor merge behavior, and behavior matrix.
+## What this sweep populates
+| Boundary key | Element shape | Example |
+|---|---|---|
+| `boundaries.email.mailboxes[]` | string — mail folder path relative to mailbox root | `"Inbox"`, `"Inbox/HCA"`, `"FDE/HCA Intake"` |
+Optional narrowing fields (`sender_domains[]`, `subject_keywords[]`) are NOT populated by the sweep — they are user-supplied narrowing.
+## Approved WorkIQ query (the ONLY shape that returns this data)
+Issued ONCE per bootstrap, per project:
+```
+workiq ask -q "In my Outlook mail folders, find the top mail folders that contain emails mentioning '<HINT>' received in the last <N> days. Return a flat table with: folder path (from mailbox root), message count, most recent received date. Sort by message count descending. Do not summarize. Do not truncate. Flat table only."
+```
+Substitution rules:
+- `<HINT>` = the verbatim customer hint from `bootstrap-status.md#Customer Hint` (e.g. `HCA`).
+- `<N>` = `m365-mutable.json#bootstrap.discoveryLookbackDays` (default 90).
+The phrasing is **natural-language by folder content** — empirically the only shape that returns folder paths. WorkIQ punts on any other shape (see Forbidden phrasings).
+## Forbidden phrasings (will fail empirically — do NOT emit)
+| Forbidden phrasing | Why it fails |
+|---|---|
+| `"List all my Outlook mail folders. Return folder name and folderId for each."` | Enumerate-verb on the folder space punts to `m365_list_mail_folders` / Graph and returns Graph-Explorer guidance instead of data. |
+| `"Search Microsoft 365 for mail folders matching '<hint>'."` | Structured-search verb routes to summary mode, not folder data. |
+| `"What is the folder ID for the '<name>' folder in my mailbox?"` | ID-lookup question punts to Graph. |
+| `"Get the mail folder hierarchy from my Outlook account."` | Hierarchy verb returns prose, not folder paths. |
+| `"$filter=displayName eq '<name>'"` (OData syntax embedded in the query) | Filter syntax fails — WorkIQ does not pass through OData. |
+## Parsing the response
+WorkIQ returns a markdown table. Parse rows where `folder path` is non-empty and `message count >= 1`:
+1. Trim each folder path (no leading/trailing slashes).
+2. Deduplicate against existing `boundaries.email.mailboxes[]`.
+3. Cap at top 10 by `message count`.
+4. The remainder (if any) goes to `<project>/OPEN-QUESTIONS-DRAFT.md` per the orchestration doctrine.
+5. Confidence ranking:
+   - `high` — folder path explicitly contains the hint (case-insensitive substring), e.g. `Inbox/HCA Intake` for hint `HCA`.
+   - `medium` — message count ≥ 10 AND folder is a known well-known root (`Inbox`, `Sent Items`, `Archive`).
+   - `low` — everything else.
+## Sidecar file shape
+Written to `<engagement-root>/<project>/Evidence/_discovery/<YYYY-MM-DD>_email_discovery-<alias>.yml`:
+```yaml
+source: email
+project: '<project>'
+customer_hint: '<HINT>'
+lookback_days: 90
+discovered_at: '<ISO-8601>'
+discovered_by: '<alias>'
+query: 'In my Outlook mail folders, find the top mail folders that contain emails mentioning ''<HINT>'' received in the last 90 days. ...'
+workiq_request_id: '<request-id>'
+total_candidates_found: 7
+candidates_persisted: 7
+candidates_deferred: 0
+results:
+  - value: 'Inbox/HCA Intake'
+    label: 'Inbox/HCA Intake (245 messages, last 2026-05-25)'
+    confidence: high
+    needs_review: true
+  - value: 'Inbox'
+    label: 'Inbox (87 messages, last 2026-05-26)'
+    confidence: medium
+    needs_review: true
+```
+`needs_review: true` is set on EVERY row by default. The user clears it manually after confirming the folder.
+## Bootstrap-status row
+The Discovery Sweep Results table row (per `customer-hint-discovery.instructions.md`):
+```
+| email | <HINT> | "In my Outlook mail folders..." | 7 | 7 | 0 | <alias> |
+```
+## When this sweep does NOT run
+- The user already populated `boundaries.email.mailboxes[]` with at least one entry that does NOT carry `needs_review: true` in the sidecar — boundary is gospel; skip sweep.
+- The active profile disables email (`m365-mutable.json#sources.email.enabled = false`) — write `last_status: not-applicable`.
+- WorkIQ itself is unreachable (signed out, EULA pending, CLI missing) — write `last_status: blocked-auth`, retry_signal `user-action`. Do NOT write `blocked-config`.
+## References
+- `customer-hint-discovery.instructions.md` — orchestration contract.
+- `pull-email/SKILL.md` — what consumes `boundaries.email.mailboxes[]` (folder fast-path + root fallback).
+- `workiq-only.instructions.md` — why m365_list_mail_folders / Graph are forbidden.
+- `deferred-retry-on-workiq-fail.instructions.md` — marker shape when sweep returns a WorkIQ error.
+- `status-taxonomy.instructions.md` — `unresolved` vs. `blocked-config` distinction.

package/plugin/instructions/meetings-bootstrap-discovery.instructions.md ADDED Viewed

@@ -0,0 +1,104 @@
+---
+applyTo: "**/skills/bootstrap-project/**, **/skills/pull-meetings/**, **/skills/refresh-project/**"
+description: "Recurring meeting series resolution — subject-scoped WorkIQ query spanning past 30 days + forward 30 days; collapses occurrence join-urls to a single series url; cross-references discovered Teams chat ids to find implied series. Writes series_join_urls[]. Source-specific subset of customer-hint-discovery."
+---
+# Meetings bootstrap discovery (kushi v4.8.0+)
+Governed by `customer-hint-discovery.instructions.md` — read that file first for the orchestration contract.
+## What this sweep populates
+| Boundary key | Element shape | Example |
+|---|---|---|
+| `boundaries.meetings.series_join_urls[]` | string — Teams meeting URL. Two accepted shapes (v4.8.1+): (1) `meetup-join` URL `https://teams.microsoft.com/l/meetup-join/...` if returned, (2) `meeting/details?eventId=` URL `https://teams.microsoft.com/l/meeting/details?eventId=...` — empirically the form WorkIQ returns. Pull-meetings accepts both via the eventId resolver. | `"https://teams.microsoft.com/l/meeting/details?eventId=AAMkAD..."` |
+Optional `organizer_emails[]` is NOT populated by the sweep (user narrowing only).
+## Approved WorkIQ query (the ONLY shape that returns this data)
+Issued ONCE per bootstrap, per project:
+```
+workiq ask -q "In my Outlook calendar, find the recurring meeting series and one-off meetings whose subject mentions '<HINT>' and that have at least one occurrence in the last <N> days OR the next 30 days. Return a flat table with: subject, organizer name, organizer email, recurrence pattern (single | daily | weekly | other), Teams meeting join URL or meeting details URL, most recent occurrence date. Do not summarize. Do not truncate. Flat table only."
+```
+**v4.8.1 empirical finding:** WorkIQ rarely returns the `meetup-join` URL form — it returns `meeting/details?eventId=...` URLs instead (the calendar-event-ID-anchored details URL). Both forms uniquely identify the meeting/series; pull-meetings v2.x+ accepts either and resolves the join URL on demand.
+Substitution rules:
+- `<HINT>` = verbatim customer hint.
+- `<N>` = `m365-mutable.json#bootstrap.discoveryLookbackDays` (default 90).
+The look-forward 30-day window catches series whose previous occurrence was outside the lookback but next occurrence is imminent — common for new engagements where the customer hint first appears in upcoming meetings.
+This phrasing — **natural-language by subject + organizer + join URL request** — is empirically the only shape that returns calendar series data. Other phrasings punt.
+## Forbidden phrasings (will fail empirically — do NOT emit)
+| Forbidden phrasing | Why it fails |
+|---|---|
+| `"List all my Teams meetings."` / `"List my upcoming meetings."` | Bulk enumerate punts to `m365_list_meetings` / `m365_list_events` / Graph calendar API. |
+| `"Get the calendar event ID for the meeting titled '<subject>'."` | ID-lookup punts to Graph. |
+| `"Search Microsoft 365 calendar for events matching '<hint>'."` | Structured-search verb routes to summary mode. |
+| `"What meetings did I have last week?"` | Vague time question routes to summary, not a structured table. |
+| `"$filter=subject eq '<subject>'"` (OData) | Filter syntax fails. |
+## Parsing the response
+1. Extract any URL column. Accept rows whose URL matches EITHER pattern (v4.8.1):
+   - `^https?://teams\.microsoft\.com/l/meetup-join/` — preferred form.
+   - `^https?://teams\.microsoft\.com/l/meeting/details\?eventId=` — empirical form WorkIQ returns. Persist as-is; pull-meetings resolves the join URL via eventId lookup.
+   If the row says `"Not available in source"` or similar AND no URL is present in any other cell of the row, log the row to OPEN-QUESTIONS-DRAFT.md under `## Meetings discovered without join URLs` (subject + organizer + recurrence) so the user can supply the URL manually. Do NOT persist a placeholder.
+2. **Collapse recurring series:** for rows with `recurrence pattern != single`, the URL of any occurrence is the canonical series URL (Outlook reuses one URL per series). Deduplicate by URL.
+3. Deduplicate against existing `boundaries.meetings.series_join_urls[]`.
+4. Cap at top 10 by `most recent occurrence date` (descending).
+5. Confidence ranking:
+   - `high` — subject contains the hint (case-insensitive substring) AND recurrence is `daily` / `weekly` (active recurring series).
+   - `medium` — subject contains the hint, single occurrence.
+   - `low` — match was on organizer email/domain only (rare — the query is subject-scoped, but if WorkIQ returns inferred matches, flag them).
+## Sidecar file shape
+Written to `<engagement-root>/<project>/Evidence/_discovery/<YYYY-MM-DD>_meetings_discovery-<alias>.yml`. Schema per `email-bootstrap-discovery.instructions.md` § Sidecar file shape, with `source: meetings`.
+Additional field for meetings sidecar:
+```yaml
+results:
+  - value: 'https://teams.microsoft.com/l/meetup-join/19%3a...'
+    label: 'HCA Weekly Sync (Weekly Tue 10:00 ET; organizer: jdoe@hcahealthcare.com)'
+    recurrence: weekly
+    organizer_email: 'jdoe@hcahealthcare.com'
+    most_recent_occurrence: '2026-05-21'
+    confidence: high
+    needs_review: true
+```
+## Bootstrap-status row
+```
+| meetings | <HINT> | "In my Outlook calendar..." | 3 | 3 | 0 | <alias> |
+```
+## When this sweep does NOT run
+- `boundaries.meetings.series_join_urls[]` is populated and contains no `needs_review` rows.
+- The active profile disables meetings.
+- WorkIQ unreachable — `blocked-auth`.
+## Cross-reference with teams sweep
+After both sweeps complete, the orchestrator MAY cross-reference: each meeting's join URL implies a chat-id (Teams creates one chat per recurring meeting). If the meeting sweep persists a join URL whose implied chat-id is NOT already in `boundaries.teams.chat_ids[]`, log a note in `OPEN-QUESTIONS-DRAFT.md`:
+> Discovery cross-reference: meeting `<subject>` (`<joinUrl>`) implies a Teams chat that is not in `boundaries.teams.chat_ids[]`. Consider adding the chat manually if you want chat-side evidence captured alongside the meeting.
+This is informational only — the orchestrator does NOT auto-add the chat-id (the meeting-implied chat-id format is not directly recoverable from the join URL without a separate Graph call, which is forbidden).
+## References
+- `customer-hint-discovery.instructions.md` — orchestration contract.
+- `pull-meetings/SKILL.md` — what consumes `series_join_urls[]`.
+- `workiq-only.instructions.md` — `m365_list_meetings`, `m365_list_events`, Graph calendar API all forbidden.
+- `meetings-verbatim-required.instructions.md` — what happens after the boundary is populated.
+- `status-taxonomy.instructions.md` — `unresolved` vs. `blocked-config`.

package/plugin/instructions/sharepoint-bootstrap-discovery.instructions.md ADDED Viewed

@@ -0,0 +1,110 @@
+---
+applyTo: "**/skills/bootstrap-project/**, **/skills/pull-sharepoint/**, **/skills/refresh-project/**"
+description: "SharePoint site URL resolution — title/URL-substring WorkIQ phrasing that finds team sites mentioning the customer hint; opens a companion Open Question for the user to supply machine-local OneDrive sync paths. Writes site_urls[] only — local_folders[] stays user-supplied. Source-specific subset of customer-hint-discovery."
+---
+# SharePoint bootstrap discovery (kushi v4.8.0+)
+Governed by `customer-hint-discovery.instructions.md` — read that file first for the orchestration contract.
+## Empirical finding (kushi v4.8.1) — WorkIQ has no SharePoint site-inventory surface
+Validated against the live WorkIQ surface on 2026-05-26 with hint `HCA`: WorkIQ returns a hard punt — *"the sources do not contain site inventory or SharePoint site properties; I cannot construct the requested table without fabricating data (which I will not do per policy)."* This is consistent — tenant-wide SharePoint site enumeration requires Graph admin endpoints that WorkIQ does not expose.
+**Therefore:**
+- The sweep below is retained as a **best-effort** opt-in. It MUST be attempted (so the doctrine self-validates if WorkIQ ever gains a site-inventory surface) but bootstrap MUST NOT block on it and MUST NOT write `last_status: blocked-config` when it returns 0 or a punt response.
+- **The reliable path is `local_folders[]` — user-supplied OneDrive sync paths.** The companion Open Question (below) is the PRIMARY UX for SharePoint bootstrap.
+- When the sweep returns a punt response (recognizable by classifier-keywords `cannot construct`, `fabricating data`, `no site inventory`, `routing to Graph`), classify as `discovery-empty-no-workiq-surface` (NOT `blocked-config`).
+## What this sweep populates — and what it does NOT
+| Boundary key | Populated by sweep? | Why |
+|---|---|---|
+| `boundaries.sharepoint.site_urls[]` | **YES** | WorkIQ can enumerate sites by hint in site title or description. |
+| `boundaries.sharepoint.local_folders[]` | **NO** | OneDrive sync paths are local-machine state; the customer hint cannot infer the user's local folder layout. User-supplied only. |
+| `boundaries.sharepoint.drive_ids[]` | NO | Graph-direct Drive IDs — out of scope for WorkIQ-only discovery. |
+## Approved WorkIQ query (the ONLY shape that returns this data)
+Issued ONCE per bootstrap, per project:
+```
+workiq ask -q "In the SharePoint sites I have access to, find sites whose site title, site description, or URL mentions '<HINT>'. Return a flat table with: site title, full site URL, default document library name, site description, most recent activity date. Do not summarize. Do not truncate. Flat table only."
+```
+Substitution rules:
+- `<HINT>` = verbatim customer hint.
+- No lookback parameter — site discovery is not time-windowed (sites are durable).
+This phrasing — **natural-language by site title/URL** — is empirically the only shape that returns SharePoint site enumerations. Other phrasings punt to Graph or summary mode.
+## Forbidden phrasings (will fail empirically — do NOT emit)
+| Forbidden phrasing | Why it fails |
+|---|---|
+| `"List all SharePoint sites in my tenant."` | Tenant-wide enumerate is privileged and routes to Graph admin endpoints; fails for non-admin users. |
+| `"List the SharePoint sites I am a member of. Return siteId for each."` | ID-lookup phrasing punts to `m365_*` / Graph. |
+| `"Search SharePoint for sites matching '<hint>'."` | Structured-search verb routes to summary mode. |
+| `"What is the site ID for '<name>'?"` | ID-lookup punts to Graph. |
+| `"List the document libraries in site '<url>'."` | Drive-enumeration verb routes to Graph `/drives`. |
+| `"$filter=title eq '<name>'"` (OData) | Filter syntax fails. |
+## Parsing the response
+WorkIQ commonly punts on this query (see Empirical finding above). Apply this classifier BEFORE attempting to parse a table:
+1. **Punt detection** — if the response body contains any of: `cannot construct`, `fabricating data`, `no site inventory`, `routing to Graph`, `Graph admin endpoint`, `SharePoint Admin`, OR the body has no markdown table at all → classify as `discovery-empty-no-workiq-surface`, write `last_status: unresolved` (NOT `blocked-config`), proceed to the companion Open Question. Do NOT attempt further parsing.
+2. If a table IS present, extract `full site URL` column — keep rows whose URL matches `^https?://[^/]+\.sharepoint\.com/sites/`.
+3. Normalize: strip query strings and trailing slashes.
+4. Deduplicate against existing `boundaries.sharepoint.site_urls[]`.
+5. Cap at top 10 by `most recent activity date` (descending). If activity date is missing, fall back to alphabetical site title.
+6. Confidence ranking:
+   - `high` — site title contains the hint (case-insensitive substring).
+   - `medium` — site URL slug contains the hint (e.g. `/sites/HCA-Engagement` for hint `HCA`).
+   - `low` — match was on site description only.
+## Sidecar file shape
+Written to `<engagement-root>/<project>/Evidence/_discovery/<YYYY-MM-DD>_sharepoint_discovery-<alias>.yml`. Schema per `email-bootstrap-discovery.instructions.md` § Sidecar file shape, with `source: sharepoint`.
+```yaml
+results:
+  - value: 'https://contoso.sharepoint.com/sites/HCA-Engagement'
+    label: 'HCA Engagement (last activity 2026-05-24; default library: Documents)'
+    site_title: 'HCA Engagement'
+    default_library: 'Documents'
+    most_recent_activity: '2026-05-24'
+    confidence: high
+    needs_review: true
+```
+## Bootstrap-status row
+```
+| sharepoint | <HINT> | "In the SharePoint sites I have access to..." | 2 | 2 | 0 | <alias> |
+```
+## Companion follow-up — local folder hint
+After the sweep completes and at least one site URL is persisted, write a one-line follow-up to `<project>/OPEN-QUESTIONS-DRAFT.md`:
+> SharePoint sweep discovered N site URLs. To enable local-synced walks (filename + mtime + change events without WorkIQ round-trips), add the matching local OneDrive sync path(s) to `boundaries.sharepoint.local_folders[]` in `<project>/integrations.yml`. The sync path is typically `C:\Users\<you>\<TenantName>\<Site Name> - <Library Name>` on Windows. The sweep cannot infer this — it is machine-local state.
+If the sweep returned 0 site URLs AND `boundaries.sharepoint.local_folders[]` is also empty, the source is `unresolved` and the Open Question is:
+> SharePoint discovery returned no sites for `<HINT>` and no local synced folder is configured. To enable, either widen the customer hint, populate `boundaries.sharepoint.site_urls[]` manually with the engagement's SharePoint URL, or add a local synced folder to `boundaries.sharepoint.local_folders[]`.
+## When this sweep does NOT run
+- `boundaries.sharepoint.site_urls[]` is populated and contains no `needs_review` rows.
+- The active profile disables sharepoint.
+- WorkIQ unreachable — `blocked-auth`.
+## References
+- `customer-hint-discovery.instructions.md` — orchestration contract.
+- `pull-sharepoint/SKILL.md` — what consumes `site_urls[]` (and the always-allowed local folder walk).
+- `workiq-only.instructions.md` — `m365_search_files`, `m365_list_files`, Graph `/drives/*` forbidden for content; local folder walk is ALWAYS allowed because it is a filesystem read.
+- `status-taxonomy.instructions.md` — `unresolved` vs. `blocked-config`.

package/plugin/instructions/status-taxonomy.instructions.md CHANGED Viewed

@@ -20,7 +20,7 @@ Every place a kushi skill renders a per-source or per-task status MUST use a val
 | `blocked-config` | A required configuration value (per-project + global both empty) prevented querying. | `<source>-config-missing` or `<source>-boundary-missing` per `scope-boundaries.instructions.md` Rule 3. |
 | `blocked-permission` | Auth succeeded but the principal lacks access to the target. | HTTP 403 / `accessDenied` / SharePoint `UnauthorizedAccessException` on the canonical API. |
 | `blocked-throttled` | Service throttled the request; no narrower query succeeded. | `tooManyRequests`, `More than 3 retries performed`, `high demand` per `auth-and-retry §3`. |
-| `unresolved` | Discovery returned no matches inside the configured boundary. | All resolution-order steps returned 0 candidates; user has not yet picked or widened. |
+| `unresolved` | Discovery returned no matches inside the configured boundary. | All resolution-order steps returned 0 candidates; user has not yet picked or widened. Includes the kushi v4.8.0+ `discovery-empty` annotation when a customer-hint discovery sweep ran (per `customer-hint-discovery.instructions.md`) and returned 0 hits — distinct from `blocked-config` (sweep was never attempted because a prerequisite was genuinely missing). |
 | `deferred` | WorkIQ failed after doubled-strict retry; deferred-retry marker written. | Per `deferred-retry-on-workiq-fail.instructions.md`. Next refresh drains the queue. |
 | `not-applicable` | Source is not configured for this project. | Source enabled=false or boundary list empty by design (e.g. SharePoint when project has no team site). |
 | `no-run-history` | Source has never been pulled for this project. | Used in backfill rows when creating status artifacts retroactively. |

package/plugin/instructions/teams-bootstrap-discovery.instructions.md ADDED Viewed

@@ -0,0 +1,114 @@
+---
+applyTo: "**/skills/bootstrap-project/**, **/skills/pull-teams/**, **/skills/refresh-project/**"
+description: "Teams chat-id + channel-id resolution — two paired WorkIQ phrasings that surface 1:1 and group chats plus joined-team channels whose messages reference the customer hint. Writes chat_ids[] and channel_ids[] separately. Source-specific subset of customer-hint-discovery."
+---
+# Teams bootstrap discovery (kushi v4.8.0+)
+Governed by `customer-hint-discovery.instructions.md` — read that file first for the orchestration contract, rerun rules, multi-contributor merge behavior, and behavior matrix.
+## What this sweep populates
+| Boundary key | Element shape | Example |
+|---|---|---|
+| `boundaries.teams.chat_ids[]` | string — Teams chat ID (Graph thread-id format) | `"19:abc...@thread.v2"` |
+| `boundaries.teams.channel_ids[]` | string — `<teamId>:<channelId>` composite | `"abc-team-guid:19:def@thread.tacv2"` |
+## Empirical findings (kushi v4.8.1 — what WorkIQ actually returns)
+Validated against the live WorkIQ surface on 2026-05-26 with hint `HCA`:
+- **Query 1 (chats) — works**, but WorkIQ returns `chat ID = N/A` (the column is present but empty). The chat ID MUST be extracted from the per-row message permalink URL — see "Parsing the response" below.
+- **Query 2 (channels) — DOES NOT WORK.** WorkIQ has no Teams channel-enumeration surface. Every empirical attempt either returned empty or reclassified channel hits as chats with `N/A` IDs. The query is retained below as a `best-effort` opt-in only — bootstrap MUST NOT block on it and MUST NOT write `last_status: blocked-config` when it returns 0. Manual user-supplied `channel_ids[]` is the only reliable path.
+## Approved WorkIQ queries (the ONLY shapes that may return this data)
+Two narrow queries — issued in sequence (chats first, then channels). Each issued ONCE per bootstrap, per project.
+### Query 1 — Chats (REQUIRED, primary path)
+```
+workiq ask -q "In my Microsoft Teams 1:1 and group chats, find chats where the topic or any member's name or email mentions '<HINT>' and that had at least one message in the last <N> days. Return a flat table with: chat topic, chat ID, all member display names, all member emails, most recent message date. Do not summarize. Do not truncate. Flat table only."
+```
+### Query 2 — Channels (BEST-EFFORT only; v4.8.1 empirical: returns nothing usable)
+```
+workiq ask -q "In the Microsoft Teams I have joined, find channels whose channel display name or parent team display name mentions '<HINT>'. Return a flat table with: team name, team ID, channel name, channel ID, channel type. Do not summarize. Do not truncate. Flat table only."
+```
+Substitution rules:
+- `<HINT>` = verbatim customer hint.
+- `<N>` = `m365-mutable.json#bootstrap.discoveryLookbackDays` (default 90).
+The phrasings are **natural-language by chat/channel content** — empirically the only shapes that return chat/channel data. WorkIQ punts on any other shape.
+## Forbidden phrasings (will fail empirically — do NOT emit)
+| Forbidden phrasing | Why it fails |
+|---|---|
+| `"List all my Teams chats. Return chat ID and topic for each."` | Enumerate-verb punts to `m365_list_chats` / Graph; returns Graph-Explorer guidance instead of data. |
+| `"List the channels in every team I am a member of."` | Bulk enumerate punts to Graph beta endpoints. |
+| `"What is the chat ID for the chat with '<name>'?"` | ID-lookup question — punts to Graph. |
+| `"Search Microsoft 365 Teams for chats matching '<hint>'."` | Structured-search verb routes to summary mode. |
+| `"Get the Teams chat hierarchy for my account."` | Hierarchy verb returns prose. |
+| `"Filter my chats where topic contains '<hint>'."` | Filter-verb routes to OData/Graph and fails. |
+## Parsing the response
+For each query, WorkIQ returns a markdown table.
+### Chat parsing (Query 1)
+WorkIQ returns the `chat ID` column populated as `N/A` (empirical, v4.8.1). The canonical chat ID lives in the per-row message permalink URL. Extract it as follows:
+1. For each row, scan all hyperlink URLs in the `Most Recent Message Date` (and any other) cell. The shape is:
+   `https://teams.microsoft.com/l/message/<chatId>/<messageId>?context=...`
+   where `<chatId>` matches `^19:[^/]+@thread\.(v2|skype|tacv2|spaces)$`.
+2. Extract the first matching `<chatId>`. URL-decode any `%3a` → `:` and `%40` → `@`.
+3. If the URL form is `19:48ed1b82-...-..._...@unq.gbl.spaces/...` (1:1 chat), that IS the chat id — keep as-is.
+4. Deduplicate against existing `boundaries.teams.chat_ids[]`.
+5. Cap at top 10 by `most recent message date` (descending).
+6. Confidence ranking:
+   - `high` — `chat topic` explicitly contains the hint (case-insensitive substring).
+   - `medium` — any member's email domain matches a known customer domain (heuristic: hint appears as substring in domain), e.g. hint `HCA` + member `@hcahealthcare.com`.
+   - `low` — match was on a member's display name only.
+**If no message permalink URL is found in a row**, that row CANNOT be persisted to `boundaries.teams.chat_ids[]` — log it to OPEN-QUESTIONS-DRAFT.md under `## Teams chats discovered without IDs` with the topic + members so the user can supply the chat id manually.
+### Channel parsing (Query 2)
+**v4.8.1 empirical finding:** WorkIQ does not surface true Teams channel data. Every attempted query either returned empty or reclassified channel hits as 1:1/group chats with `N/A` IDs. Treat any output from Query 2 as informational:
+1. If the response is empty or every row has `team ID = N/A` AND `channel ID = N/A`: write `last_status: unresolved` with annotation `discovery-empty-no-workiq-surface` (NOT `blocked-config`). Open Question: *"Teams channel-id discovery has no working WorkIQ surface as of kushi v4.8.1. If this engagement uses Teams channels (not chats), populate `boundaries.teams.channel_ids[]` manually with the `<teamId>:<channelId>` composite — found in the channel URL under Teams ⟶ ⋯ ⟶ Get link to channel."*
+2. If a row HAS both `team ID` and `channel ID` populated (rare — happens only when WorkIQ has cached channel metadata from a recent direct probe): build composite `<teamId>:<channelId>`, dedupe, cap at 10.
+3. The cross-reference with Query 1 chats is informational only — do NOT auto-convert a chat-id into a channel-id.
+## Sidecar file shape
+Two sidecar files (one per query). Written to `<engagement-root>/<project>/Evidence/_discovery/<YYYY-MM-DD>_teams-chats_discovery-<alias>.yml` and `<YYYY-MM-DD>_teams-channels_discovery-<alias>.yml`.
+Schema is the same as `email-bootstrap-discovery.instructions.md` § Sidecar file shape, with `source: teams-chats` or `source: teams-channels`.
+## Bootstrap-status rows
+Two rows in the Discovery Sweep Results table:
+```
+| teams-chats | <HINT> | "In my Microsoft Teams 1:1 and group chats..." | 14 | 10 | 4 | <alias> |
+| teams-channels | <HINT> | "In the Microsoft Teams I have joined..." | 2 | 2 | 0 | <alias> |
+```
+## When this sweep does NOT run
+- Both `boundaries.teams.chat_ids[]` AND `boundaries.teams.channel_ids[]` are populated and contain no `needs_review` rows.
+- The active profile disables teams.
+- WorkIQ unreachable — write `blocked-auth`.
+## References
+- `customer-hint-discovery.instructions.md` — orchestration contract.
+- `pull-teams/SKILL.md` — what consumes `chat_ids[]` and `channel_ids[]`.
+- `workiq-only.instructions.md` — `m365_list_chats` / `m365_search_chats` forbidden.
+- `status-taxonomy.instructions.md` — `unresolved` vs. `blocked-config`.

package/plugin/skills/bootstrap-project/SKILL.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 name: "bootstrap-project"
-version: "2.3.3"
-description: "First-time setup for a project: machine preflight, side-by-side config, engagement-root + project resolution, initial 30d snapshot+stream pull across all enabled sources. Verbatim-by-default per `verbatim-by-default.instructions.md` — every enabled source dispatched, no silent skips. CRM bootstrap discovery REQUIRED to run live Dataverse 4-step resolution before declaring disabled per `crm-bootstrap-discovery.instructions.md` (v2.3.0). Writes per-user refresh report per `run-reports.instructions.md`. Cleans stale no-match notes on resolution per `cleanup-on-resolution.instructions.md`. Builds State/ only on `full` profile. Idempotent."
+version: "2.4.0"
+description: "First-time setup for a project: machine preflight, side-by-side config, engagement-root + project resolution, customer-hint discovery sweep across all WorkIQ-driven sources (kushi v4.8.0+, per customer-hint-discovery.instructions.md), initial 30d snapshot+stream pull across all enabled sources. Verbatim-by-default per `verbatim-by-default.instructions.md` — every enabled source dispatched, no silent skips. Discovery sweep MANDATORY before declaring blocked-config for email/teams/meetings/sharepoint per customer-hint-discovery.instructions.md. CRM bootstrap discovery REQUIRED per `crm-bootstrap-discovery.instructions.md` (v2.3.0). Writes per-user refresh report per `run-reports.instructions.md`. Cleans stale no-match notes on resolution per `cleanup-on-resolution.instructions.md`. Builds State/ only on `full` profile. Idempotent."
 ---
 # Skill: bootstrap-project
@@ -114,9 +114,46 @@ The `State/` subtree is created **only on `full` profile**. On `standard`, only
 **Pin hook (v4.5.0):** immediately after scaffold, per `onedrive-pin-policy.instructions.md`, extend the OneDrive pin set to cover the new folders — `<project>/integrations.yml`, `<project>/bootstrap-status.md` (after Step 7 writes it), `<project>/Evidence/<alias>/`, `<project>/Evidence/_Consolidated/` (when created), and `<project>/State/` (when scaffolded). Idempotent + additive — never unpins. Skips silently on Linux or when OneDrive isn't running. This keeps every contributor's own slice always-on-device while leaving other contributors' evidence cloud-only.
+### Step 3.5 — Customer-hint discovery sweep (REQUIRED, kushi v4.8.0+)
+Per `customer-hint-discovery.instructions.md` — **before the boundaries gate runs in Step 4**, bootstrap MUST attempt a WorkIQ-driven discovery sweep for every source whose boundary is currently empty AND whose WorkIQ-driven discovery doctrine exists. This prevents the silent-skip defect where bootstrap scaffolds empty boundaries and immediately writes every source as `blocked-config` without ever asking WorkIQ what mentions the customer.
+For each source in the table below, if `<engagement-root>/<project>/integrations.yml#boundaries.<source>.<required-key>` is empty (or contains only `needs_review: true` rows from a prior incomplete sweep), dispatch the sweep using the customer hint from Step 1 + `<discoveryLookbackDays>` (default 90, configurable via `m365-mutable.json#bootstrap.discoveryLookbackDays`):
+| Source | Sweep doctrine | Populates | Required-key gate |
+|---|---|---|---|
+| email | `email-bootstrap-discovery.instructions.md` | `boundaries.email.mailboxes[]` | `mailboxes` empty |
+| teams | `teams-bootstrap-discovery.instructions.md` | `boundaries.teams.chat_ids[]` + `channel_ids[]` | BOTH empty |
+| meetings | `meetings-bootstrap-discovery.instructions.md` | `boundaries.meetings.series_join_urls[]` | `series_join_urls` empty |
+| sharepoint | `sharepoint-bootstrap-discovery.instructions.md` | `boundaries.sharepoint.site_urls[]` | `site_urls` empty AND `local_folders` empty |
+| onenote | existing Step 4a (display-name driven, v4.7.x) | `boundaries.onenote.section_file_ids[]` + `section_group_ids[]` | both empty |
+| loop | `loop-bootstrap-discovery.instructions.md` (v4.6.0) | `boundaries.loop.workspace_ids[]` | empty |
+| crm | `crm-bootstrap-discovery.instructions.md` (v3.11.0) | `boundaries.crm.record_ids[]` + `request_ids[]` | both empty AND `crm:` shared block populated |
+| ado | `ado-bootstrap-discovery.instructions.md` | `boundaries.ado.area_paths[]` | empty AND `ado:` shared block populated |
+For each sweep:
+1. Read the per-source doctrine for the exact approved WorkIQ query shape.
+2. Issue ONE WorkIQ ask per source per project (chats + channels are TWO asks under the same teams doctrine).
+3. Parse the response per the doctrine's parsing rules.
+4. Cap at 10 candidates by recency (or per-doctrine ordering).
+5. Append discovered values to `<project>/integrations.yml#boundaries.<source>.<key>` as plain strings (deduplicate by exact-string equality).
+6. Write sidecar `<project>/Evidence/_discovery/<YYYY-MM-DD>_<source>_discovery-<alias>.yml` with per-row metadata (`discovered_by`, `discovered_at`, `needs_review: true`, `confidence`, `query`, `workiq_request_id`).
+7. If `> 10` candidates: append the remainder to `<project>/OPEN-QUESTIONS-DRAFT.md` under `## Discovery sweep — candidates over cap`.
+8. If `0` candidates: write `last_status: unresolved` (NOT `blocked-config`) and append a one-line widen-hint suggestion to Open Questions.
+9. If WorkIQ errors: write a `deferred-retry` marker per `deferred-retry-on-workiq-fail.instructions.md` and set `last_status: deferred`. Do NOT skip ahead to `blocked-config`.
+Run sweeps **in parallel** where possible (they're independent WorkIQ asks). Total wall time should be ≤ 5× single-ask latency.
+**Rerun rule** — if the boundary already has at least one row that is NOT `needs_review: true` in the sidecar (user manually confirmed), the sweep is **skipped** for that source. Boundary is gospel. Pass `--force-rediscover` to override.
+**Forbidden:** declaring `last_status: blocked-config` for email/teams/meetings/sharepoint without first running this sweep. That is a defect per `customer-hint-discovery.instructions.md` § The rule. `blocked-config` is only legitimate when a prerequisite is genuinely missing (CRM/ADO shared config, SharePoint when both `site_urls` AND `local_folders` are empty AND the sweep returned 0).
+After all sweeps complete, write the `## Discovery Sweep Results` table to `<project>/bootstrap-status.md` per `customer-hint-discovery.instructions.md` § Required outputs (4).
 ### Step 4 — Initial pull (last 30 days)
-**Boundaries gate** (kushi v3.7.0+, per `scope-boundaries.instructions.md`): before dispatching any `pull-*`, read `<engagement-root>/<project>/integrations.yml#boundaries` and verify each enabled source has its required boundary key populated. For sources where bootstrap can auto-populate from existing `m365-mutable.json` discovery hints (e.g. a previously-discovered `section_file_id` lands in `boundaries.onenote.section_file_ids`), do so and continue. For sources where the boundary cannot be auto-populated, write the source as **disabled** in `integrations.yml` and add a one-liner to `<project>/OPEN-QUESTIONS-DRAFT.md` (or `State/09_open-questions.md` on `full` profile) asking the user to fill the boundary and re-enable.
+**Boundaries gate** (kushi v3.7.0+, per `scope-boundaries.instructions.md`): before dispatching any `pull-*`, read `<engagement-root>/<project>/integrations.yml#boundaries` and verify each enabled source has its required boundary key populated. After Step 3.5, many of these should now contain discovered rows (annotated `needs_review: true` in their sidecars). For sources where the sweep ran but returned 0 candidates, the status is `unresolved` (not `blocked-config`) — add a one-liner to `<project>/OPEN-QUESTIONS-DRAFT.md` asking the user to widen the hint or manually seed the boundary. For sources where the sweep COULD NOT run because a prerequisite is genuinely missing (CRM/ADO shared connection block empty; SharePoint local-folder discovery), `blocked-config` is correct and the `next_step` MUST cite the specific missing field.
 For CRM and ADO additionally verify the shared connection block exists in `<workspace>/.kushi/config/shared/integrations.yml` (`crm:` block with `environmentUrl` + `tenantId`, OR `ado:` block with `organization` + `project`) with non-placeholder values; if missing, prompt the user to fill those two/four fields directly (no separate template files — they live in `templates/init/integrations.template.yml`) and park in Open Questions with the path. **Do NOT auto-improvise** by inferring a tenant/org or by narrating CRM evidence from email — both are explicit anti-patterns in v3.7.0.

package/plugin/skills/pull-email/SKILL.md CHANGED Viewed

@@ -43,6 +43,10 @@ This skill REFUSES to query unless `<engagement-root>/<project>/integrations.yml
 - `boundaries.email.subject_keywords` — optional narrowing.
 - `boundaries.date_window_days` — defaults to 30 if absent.
+## Bootstrap discovery (kushi v4.8.0+, per `customer-hint-discovery.instructions.md`)
+When `bootstrap-project` runs with `boundaries.email.mailboxes[]` empty, it MUST attempt the WorkIQ customer-hint discovery sweep before declaring `blocked-config`. The full doctrine — approved query phrasing, forbidden phrasings, parsing rules, confidence ranking, sidecar shape, cap-at-10 behavior, and rerun rules — lives in `email-bootstrap-discovery.instructions.md`. Declaring `last_status: blocked-config` for email without first running that sweep is a defect.
 Every WorkIQ ask MUST be scoped to those mailboxes + (if set) sender_domains + subject_keywords. Empty hits inside the boundary → write Coverage Notes citing the limiting key; do NOT widen the scope. (`m365_search_emails` / `m365_list_emails` / any Graph call is FORBIDDEN per `workiq-only.instructions.md`; on WorkIQ failure, write a deferred-retry marker per `deferred-retry-on-workiq-fail.instructions.md` and continue.)
 Refusal message when boundary is missing:

package/plugin/skills/pull-meetings/SKILL.md CHANGED Viewed

@@ -52,6 +52,10 @@ This skill REFUSES to query unless `<engagement-root>/<project>/integrations.yml
 - `boundaries.meetings.organizer_emails` — optional additional filter.
 - `boundaries.date_window_days` — defaults to 30 if absent.
+## Bootstrap discovery (kushi v4.8.0+, per `customer-hint-discovery.instructions.md`)
+When `bootstrap-project` runs with `boundaries.meetings.series_join_urls[]` empty, it MUST attempt the WorkIQ customer-hint discovery sweep before declaring `blocked-config`. The full doctrine — approved WorkIQ query for meeting series by subject, forbidden phrasings, recurring-series URL collapse, confidence ranking, sidecar shape, cap-at-10 behavior, cross-reference-with-teams-sweep hint, and rerun rules — lives in `meetings-bootstrap-discovery.instructions.md`. Declaring `last_status: blocked-config` for meetings without first running that sweep is a defect.
 The pre-existing `subjectKeywords` / `knownSeries` discovery loop is now a **bootstrap-time aid only** — it helps populate `boundaries.meetings.series_join_urls` once. At pull time, only meetings whose `joinUrl` is in the boundary list are processed.
 Refusal message when boundary is missing:

package/plugin/skills/pull-sharepoint/SKILL.md CHANGED Viewed

@@ -46,6 +46,10 @@ This skill REFUSES to query unless `<engagement-root>/<project>/integrations.yml
 - `boundaries.sharepoint.drive_ids` — optional Graph-direct access.
 - `boundaries.date_window_days` — defaults to 30 if absent.
+## Bootstrap discovery (kushi v4.8.0+, per `customer-hint-discovery.instructions.md`)
+When `bootstrap-project` runs with BOTH `boundaries.sharepoint.site_urls[]` AND `local_folders[]` empty, it MUST attempt the WorkIQ customer-hint discovery sweep before declaring `blocked-config`. The full doctrine — approved WorkIQ query for sites by title/description/URL, forbidden phrasings, confidence ranking, sidecar shape, cap-at-10 behavior, companion local-folder follow-up Open Question, and rerun rules — lives in `sharepoint-bootstrap-discovery.instructions.md`. The sweep populates `site_urls[]` only — `local_folders[]` remains user-supplied because the customer hint cannot infer machine-local OneDrive sync paths. Declaring `last_status: blocked-config` for sharepoint when both arrays are empty without first running that sweep is a defect.
 The walk is **strictly inclusive** — only paths/sites/drives in the boundary are walked. There is no auto-discovery of additional folders. `path_includes`/`path_excludes` from `.settings.yml` further narrow inside the boundary; they cannot widen it.
 Refusal message when boundary is missing:

package/plugin/skills/pull-teams/SKILL.md CHANGED Viewed

@@ -62,6 +62,10 @@ This skill REFUSES to query unless `<engagement-root>/<project>/integrations.yml
 - `boundaries.teams.channel_ids` — REQUIRED for any channel evidence (empty = no channel pulls).
 - `boundaries.date_window_days` — defaults to 30 if absent.
+## Bootstrap discovery (kushi v4.8.0+, per `customer-hint-discovery.instructions.md`)
+When `bootstrap-project` runs with `boundaries.teams.chat_ids[]` AND `channel_ids[]` empty, it MUST attempt the WorkIQ customer-hint discovery sweep before declaring `blocked-config`. The full doctrine — two narrow WorkIQ queries (one for chats, one for channels), approved/forbidden phrasings, parsing rules, confidence ranking, sidecar shape, cap-at-10 behavior, and rerun rules — lives in `teams-bootstrap-discovery.instructions.md`. Declaring `last_status: blocked-config` for teams without first running both sweeps is a defect.
 There is **no fuzzy chat discovery** in v3.7.0+. The pre-existing `chatHints` / `participantHints` are now treated as **discovery aids during bootstrap only** — they help the user populate `boundaries.teams.chat_ids` once, and after that they are ignored at pull time.
 Refusal message when boundary is missing:

package/src/bootstrap-dryrun.integration.test.mjs ADDED Viewed

@@ -0,0 +1,235 @@
+// Integration test: validate that each per-source discovery doctrine's approved WorkIQ query
+// actually returns usable data against the live WorkIQ surface. OPT-IN — not run by `npm test`.
+//
+// Run with: KUSHI_DRYRUN_HINT=HCA node src/bootstrap-dryrun.integration.test.mjs
+//   or:     npm run test:integration:bootstrap
+//
+// This is the gate that would have caught the v4.8.0 shipping defects (teams-channels punt,
+// sharepoint sites punt, meetings join URL not surfaced, teams chat IDs returned as N/A).
+import { execSync, spawnSync } from 'node:child_process';
+import { existsSync, readFileSync, readdirSync, mkdirSync, writeFileSync } from 'node:fs';
+import { dirname, join, resolve } from 'node:path';
+import { fileURLToPath } from 'node:url';
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const repoRoot = resolve(__dirname, '..');
+const HINT = process.env.KUSHI_DRYRUN_HINT || 'HCA';
+const WORKIQ = process.env.KUSHI_WORKIQ_BIN || 'C:\\Users\\ushak\\.copilot\\bin\\workiq.cmd';
+const LOOKBACK = Number(process.env.KUSHI_DRYRUN_LOOKBACK || 90);
+// Source -> { doctrine file, approved-query extraction rule, classifier }
+const SOURCES = [
+  {
+    id: 'email',
+    doctrine: 'email-bootstrap-discovery.instructions.md',
+    // Doctrine has ONE ```\nworkiq ask -q "..."\n``` block — the approved query.
+    expects: 'table with folder path column populated',
+    classify(stdout) {
+      if (/cannot construct|fabricating data|no .* inventory|routing to Graph/i.test(stdout)) return 'punt';
+      const lines = stdout.split(/\r?\n/).filter(l => /^\|/.test(l));
+      if (lines.length < 3) return 'empty';
+      // Header + separator + ≥1 data row. Look for folder path column (non-empty).
+      const dataRows = lines.slice(2);
+      const folderRows = dataRows.filter(l => {
+        const cols = l.split('|').map(c => c.trim()).filter(c => c.length > 0);
+        return cols[0] && cols[0] !== 'N/A' && !/^-+$/.test(cols[0]);
+      });
+      return folderRows.length > 0 ? `ok (${folderRows.length} folders)` : 'empty';
+    },
+  },
+  {
+    id: 'teams-chats',
+    doctrine: 'teams-bootstrap-discovery.instructions.md',
+    queryIndex: 0, // first ```workiq ask block
+    expects: 'table with chat topic + extractable chat ID from message URL',
+    classify(stdout) {
+      if (/cannot construct|fabricating data|no .* inventory/i.test(stdout)) return 'punt';
+      const lines = stdout.split(/\r?\n/).filter(l => /^\|/.test(l));
+      if (lines.length < 3) return 'empty';
+      // Need chat topic non-empty AND at least one row with extractable chat ID from a URL
+      const dataRows = lines.slice(2);
+      const withTopic = dataRows.filter(l => {
+        const cols = l.split('|').map(c => c.trim());
+        return cols[1] && cols[1] !== 'N/A';
+      });
+      const extractableIds = stdout.match(/19:[^/)\s"']+@thread\.(v2|skype|tacv2|spaces)/g) || [];
+      if (withTopic.length === 0) return 'empty';
+      return `ok (${withTopic.length} chats, ${extractableIds.length} extractable thread IDs in response)`;
+    },
+  },
+  {
+    id: 'teams-channels',
+    doctrine: 'teams-bootstrap-discovery.instructions.md',
+    queryIndex: 1, // second ```workiq ask block
+    expects: 'KNOWN-EMPTY (v4.8.1 empirical: WorkIQ has no Teams channel-enumeration surface)',
+    classify(stdout) {
+      if (/cannot construct|fabricating data|no .* inventory|Graph-backed|not a channel/i.test(stdout)) return 'punt (as expected — no WorkIQ surface)';
+      const lines = stdout.split(/\r?\n/).filter(l => /^\|/.test(l));
+      if (lines.length < 3) return 'empty (as expected)';
+      const dataRows = lines.slice(2);
+      const withChannelId = dataRows.filter(l => {
+        const cols = l.split('|').map(c => c.trim());
+        // team ID col index 2, channel ID col index 4 in the doctrine table
+        return cols[2] && cols[2] !== 'N/A' && cols[4] && cols[4] !== 'N/A';
+      });
+      return withChannelId.length > 0
+        ? `unexpected-ok (${withChannelId.length} channels with IDs — UPDATE doctrine!)`
+        : 'punt (as expected — no WorkIQ surface)';
+    },
+    expectedClassification: /^punt|^empty/,
+  },
+  {
+    id: 'meetings',
+    doctrine: 'meetings-bootstrap-discovery.instructions.md',
+    expects: 'table with subject + meeting URL (meetup-join OR meeting/details?eventId)',
+    classify(stdout) {
+      if (/cannot construct|fabricating data|no .* inventory/i.test(stdout)) return 'punt';
+      const lines = stdout.split(/\r?\n/).filter(l => /^\|/.test(l));
+      if (lines.length < 3) return 'empty';
+      const dataRows = lines.slice(2);
+      const withSubject = dataRows.filter(l => {
+        const cols = l.split('|').map(c => c.trim());
+        return cols[1] && cols[1] !== 'N/A';
+      });
+      const withJoinUrl = dataRows.filter(l => /teams\.microsoft\.com\/l\/(meetup-join|meeting\/details)/i.test(l));
+      if (withSubject.length === 0) return 'empty';
+      return `ok (${withSubject.length} meetings, ${withJoinUrl.length} with extractable URLs)`;
+    },
+  },
+  {
+    id: 'sharepoint',
+    doctrine: 'sharepoint-bootstrap-discovery.instructions.md',
+    expects: 'KNOWN-EMPTY (v4.8.1 empirical: WorkIQ has no SharePoint site-inventory surface)',
+    classify(stdout) {
+      if (/cannot construct|fabricating data|no .* inventory|routing to Graph|SharePoint Admin/i.test(stdout)) return 'punt (as expected — no WorkIQ surface)';
+      const lines = stdout.split(/\r?\n/).filter(l => /^\|/.test(l));
+      if (lines.length < 3) return 'empty (as expected)';
+      const dataRows = lines.slice(2);
+      const withUrl = dataRows.filter(l => {
+        const cols = l.split('|').map(c => c.trim());
+        // cols[2] is Full Site URL column. Must be a real /sites/ or /teams/ collection URL,
+        // not a personal mysite (-my.sharepoint.com) or a citation in another column.
+        const url = cols[2] || '';
+        return /^https:\/\/[^/]*sharepoint\.com\/(sites|teams)\//i.test(url);
+      });
+      return withUrl.length > 0
+        ? `unexpected-ok (${withUrl.length} sites — UPDATE doctrine!)`
+        : 'punt (as expected — no usable WorkIQ surface)';
+    },
+    expectedClassification: /^punt|^empty/,
+  },
+];
+function extractApprovedQuery(doctrineFile, queryIndex = 0) {
+  const text = readFileSync(join(repoRoot, 'plugin', 'instructions', doctrineFile), 'utf8');
+  // Find every ``` fenced block containing `workiq ask -q "..."`. Capture the quoted query.
+  const re = /```[^\n]*\n([\s\S]*?)\n```/g;
+  const queries = [];
+  let m;
+  while ((m = re.exec(text)) !== null) {
+    const block = m[1];
+    const qMatch = block.match(/workiq\s+ask\s+-q\s+"([\s\S]+?)"/);
+    if (qMatch) queries.push(qMatch[1]);
+  }
+  if (queries.length <= queryIndex) {
+    throw new Error(`Doctrine ${doctrineFile} does not contain query #${queryIndex} (found ${queries.length})`);
+  }
+  return queries[queryIndex];
+}
+function substituteHint(query, hint, lookback) {
+  return query
+    .replace(/'<HINT>'/g, `'${hint}'`)
+    .replace(/<HINT>/g, hint)
+    .replace(/<N>/g, String(lookback));
+}
+function runWorkIQ(query) {
+  const t0 = Date.now();
+  // cmd.exe quote-handling: spawn without shell, use .cmd directly via shell escape, OR
+  // pass the full command as a single argv[0] with shell:true. Easier: spawn workiq directly
+  // (cmd shim works with shell:false on Windows when the path ends in .cmd via the runtime).
+  // We pre-escape the query to survive cmd's parser: wrap in "...", escape internal " as "".
+  const escaped = `"${query.replace(/"/g, '""')}"`;
+  const res = spawnSync(WORKIQ, ['ask', '-q', escaped], {
+    encoding: 'utf8',
+    maxBuffer: 50 * 1024 * 1024,
+    shell: true,
+    timeout: 240_000,
+    windowsVerbatimArguments: true,
+  });
+  // Strip ANSI color codes — WorkIQ emits them and they break ^| line classifiers.
+  const ansi = /\x1b\[[0-9;]*m/g;
+  const raw = (res.stdout || '') + (res.stderr || '');
+  return {
+    stdout: raw.replace(ansi, ''),
+    exitCode: res.status,
+    elapsedMs: Date.now() - t0,
+  };
+}
+function main() {
+  if (!existsSync(WORKIQ)) {
+    console.error(`✗ workiq binary not found at ${WORKIQ}. Set KUSHI_WORKIQ_BIN or install workiq.`);
+    process.exit(2);
+  }
+  console.log(`# kushi bootstrap-dryrun integration test`);
+  console.log(`hint=${HINT}  lookback=${LOOKBACK}d  workiq=${WORKIQ}`);
+  console.log(``);
+  const outDir = join(repoRoot, '.dryrun-output');
+  mkdirSync(outDir, { recursive: true });
+  const ts = new Date().toISOString().replace(/[:.]/g, '-');
+  const results = [];
+  let failures = 0;
+  for (const src of SOURCES) {
+    process.stdout.write(`▶ ${src.id.padEnd(16)} `);
+    let query;
+    try {
+      query = substituteHint(extractApprovedQuery(src.doctrine, src.queryIndex || 0), HINT, LOOKBACK);
+    } catch (e) {
+      console.log(`extract-failed: ${e.message}`);
+      failures++;
+      results.push({ source: src.id, status: `extract-failed: ${e.message}`, elapsedMs: 0 });
+      continue;
+    }
+    let { stdout, exitCode, elapsedMs } = runWorkIQ(query);
+    // Retry once on transient WorkIQ HTTP/2 / protocol / 5xx errors.
+    const transient = /HttpProtocolError|HTTP\/2 server reset|INTERNAL_ERROR|503|502|504|ECONNRESET|ETIMEDOUT/i;
+    if (transient.test(stdout)) {
+      process.stdout.write('(transient, retry) ');
+      const retry = runWorkIQ(query);
+      stdout = retry.stdout;
+      exitCode = retry.exitCode;
+      elapsedMs += retry.elapsedMs;
+    }
+    const classification = src.classify(stdout);
+    const expectedRe = src.expectedClassification || /^ok/;
+    const pass = expectedRe.test(classification);
+    console.log(`${pass ? '✓' : '✗'} ${classification}  (${elapsedMs}ms, exit=${exitCode})`);
+    if (!pass) {
+      failures++;
+      console.log(`     expects: ${src.expects}`);
+      console.log(`     query  : ${query.slice(0, 120)}...`);
+    }
+    results.push({ source: src.id, classification, expects: src.expects, pass, elapsedMs, exitCode });
+    // Save raw response for debugging
+    writeFileSync(join(outDir, `${ts}_${src.id}.txt`), `# query\n${query}\n\n# response\n${stdout}\n`, 'utf8');
+  }
+  console.log('');
+  console.log(`Outputs saved to: ${outDir}`);
+  console.log(`Result: ${results.length - failures}/${results.length} passed.`);
+  if (failures > 0) {
+    console.log('');
+    console.log('FAILED — review .dryrun-output/ files. If WorkIQ surface changed, update the doctrine and re-run.');
+    process.exit(1);
+  }
+}
+main();

package/src/forbidden-workiq-phrasings.test.mjs CHANGED Viewed

@@ -27,22 +27,41 @@ const allowlistFiles = new Set([
   'plugin/instructions/workiq-onenote-query-shape.instructions.md',
   // Anti-pattern lists in the registry doctrine also quote them.
   'plugin/instructions/m365-id-registry.instructions.md',
+  // v4.8.0 — customer-hint discovery doctrines MUST quote their forbidden phrasings.
+  'plugin/instructions/customer-hint-discovery.instructions.md',
+  'plugin/instructions/email-bootstrap-discovery.instructions.md',
+  'plugin/instructions/teams-bootstrap-discovery.instructions.md',
+  'plugin/instructions/meetings-bootstrap-discovery.instructions.md',
+  'plugin/instructions/sharepoint-bootstrap-discovery.instructions.md',
   // Historical record.
   'plugin/learnings/onenote.md',
   'CHANGELOG.md',
 ].map(p => p.replaceAll('/', path.sep)));
-// Forbidden phrasings. Use lowercase substring match — these patterns empirically fail in WorkIQ
-// regardless of capitalization or surrounding sentence structure.
+// Forbidden phrasings. Each entry tags the doctrine file that MUST quote it (to forbid it).
 const forbiddenPhrasings = [
-  { phrase: 'list my onenote notebooks', why: 'WorkIQ has no notebook-inventory endpoint; punts to Graph Explorer.' },
-  { phrase: 'what is the onenote notebook id', why: 'WorkIQ has no notebook-ID lookup; punts to Graph Explorer.' },
-  { phrase: 'list sections in onenote notebook', why: 'WorkIQ has no bulk-section enumeration surface.' },
-  { phrase: 'search microsoft 365 onenote for sections matching', why: 'Structured-field enumeration query; routes WorkIQ to summary mode (v3.7.9 finding).' },
-  { phrase: 'wdsectionfileid =', why: 'Filter-syntax query routes WorkIQ to summary mode; "OneNote internal properties not exposed as searchable fields" refusal.' },
-  // Note: bare "wdsectionfileid=" (no space) appears legitimately in URL fragments like
-  // "...&wdpartid={GUID}&wdsectionfileid={GUID}" — those are RESPONSE shapes, not query syntax.
-  // Only the space-equals form (`wdsectionfileid = <id>`) is a forbidden filter expression.
+  // OneNote (v4.7.x doctrine)
+  { phrase: 'list my onenote notebooks',                                doctrine: 'workiq-onenote-query-shape', why: 'WorkIQ has no notebook-inventory endpoint; punts to Graph Explorer.' },
+  { phrase: 'what is the onenote notebook id',                          doctrine: 'workiq-onenote-query-shape', why: 'WorkIQ has no notebook-ID lookup; punts to Graph Explorer.' },
+  { phrase: 'list sections in onenote notebook',                        doctrine: 'workiq-onenote-query-shape', why: 'WorkIQ has no bulk-section enumeration surface.' },
+  { phrase: 'search microsoft 365 onenote for sections matching',       doctrine: 'workiq-onenote-query-shape', why: 'Structured-field enumeration query; routes WorkIQ to summary mode (v3.7.9 finding).' },
+  { phrase: 'wdsectionfileid =',                                        doctrine: 'workiq-onenote-query-shape', why: 'Filter-syntax query routes WorkIQ to summary mode; "OneNote internal properties not exposed as searchable fields" refusal.' },
+  // Note: bare "wdsectionfileid=" (no space) appears legitimately in URL fragments — see Doc.aspx URLs in response shapes.
+  // Customer-hint discovery sweep (v4.8.0 doctrines) — bulk-enumerate / ID-lookup / structured-search
+  // phrasings that empirically punt to Graph or summary mode. Each lives in its source-specific doctrine.
+  { phrase: 'list all my outlook mail folders',                         doctrine: 'email-bootstrap-discovery',      why: 'Bulk enumerate punts to m365_list_mail_folders / Graph; returns Graph-Explorer guidance.' },
+  { phrase: 'search microsoft 365 for mail folders matching',           doctrine: 'email-bootstrap-discovery',      why: 'Structured-search verb routes WorkIQ to summary mode.' },
+  { phrase: 'what is the folder id for the',                            doctrine: 'email-bootstrap-discovery',      why: 'Folder ID-lookup punts to Graph.' },
+  { phrase: 'list all my teams chats',                                  doctrine: 'teams-bootstrap-discovery',      why: 'Bulk enumerate punts to m365_list_chats / Graph.' },
+  { phrase: 'what is the chat id for the chat with',                    doctrine: 'teams-bootstrap-discovery',      why: 'Chat ID-lookup punts to Graph.' },
+  { phrase: 'search microsoft 365 teams for chats matching',            doctrine: 'teams-bootstrap-discovery',      why: 'Structured-search verb routes to summary mode.' },
+  { phrase: 'list all my teams meetings',                               doctrine: 'meetings-bootstrap-discovery',   why: 'Bulk calendar enumerate punts to m365_list_meetings / m365_list_events.' },
+  { phrase: 'get the calendar event id for the meeting',                doctrine: 'meetings-bootstrap-discovery',   why: 'Event ID-lookup punts to Graph.' },
+  { phrase: 'search microsoft 365 calendar for events matching',        doctrine: 'meetings-bootstrap-discovery',   why: 'Structured-search verb routes to summary mode.' },
+  { phrase: 'list all sharepoint sites in my tenant',                   doctrine: 'sharepoint-bootstrap-discovery', why: 'Tenant-wide enumerate is privileged; routes to Graph admin endpoints.' },
+  { phrase: 'what is the site id for',                                  doctrine: 'sharepoint-bootstrap-discovery', why: 'Site ID-lookup punts to Graph.' },
+  { phrase: 'search sharepoint for sites matching',                     doctrine: 'sharepoint-bootstrap-discovery', why: 'Structured-search verb routes to summary mode.' },
 ];
 function walkMarkdown(dir, out = []) {
@@ -81,15 +100,23 @@ test('forbidden WorkIQ phrasings do not appear in skill/prompt/agent files', ()
   );
 });
-test('the doctrine file itself exists and lists the forbidden phrasings', () => {
-  const doctrinePath = path.join(repoRoot, 'plugin', 'instructions', 'workiq-onenote-query-shape.instructions.md');
-  assert.ok(fs.existsSync(doctrinePath), 'workiq-onenote-query-shape.instructions.md must exist');
-  const text = fs.readFileSync(doctrinePath, 'utf8').toLowerCase();
-  for (const { phrase } of forbiddenPhrasings) {
-    assert.ok(
-      text.includes(phrase),
-      `Doctrine file MUST quote forbidden phrasing "${phrase}" to forbid it.`
-    );
+test('each forbidden phrasing is quoted in its home doctrine file (so the doctrine actually forbids what it claims to)', () => {
+  const doctrineDir = path.join(repoRoot, 'plugin', 'instructions');
+  // Group phrasings by their home doctrine and assert each doctrine file contains all of its phrasings.
+  const grouped = forbiddenPhrasings.reduce((acc, { phrase, doctrine }) => {
+    (acc[doctrine] ??= []).push(phrase);
+    return acc;
+  }, {});
+  for (const [doctrine, phrases] of Object.entries(grouped)) {
+    const file = path.join(doctrineDir, `${doctrine}.instructions.md`);
+    assert.ok(fs.existsSync(file), `Doctrine file MUST exist: ${doctrine}.instructions.md`);
+    const text = fs.readFileSync(file, 'utf8').toLowerCase();
+    for (const phrase of phrases) {
+      assert.ok(
+        text.includes(phrase),
+        `Doctrine file ${doctrine}.instructions.md MUST quote forbidden phrasing "${phrase}" to forbid it.`
+      );
+    }
   }
 });
@@ -109,3 +136,32 @@ test('pull-onenote SKILL.md cites the new doctrine and lists WorkIQ as PRIMARY',
     'pull-onenote/SKILL.md must label Playwright as opt-in / recovery / fallback'
   );
 });
+test('customer-hint discovery doctrines exist and each per-source pull SKILL.md cites its doctrine (v4.8.0+)', () => {
+  const requiredDoctrines = [
+    'plugin/instructions/customer-hint-discovery.instructions.md',
+    'plugin/instructions/email-bootstrap-discovery.instructions.md',
+    'plugin/instructions/teams-bootstrap-discovery.instructions.md',
+    'plugin/instructions/meetings-bootstrap-discovery.instructions.md',
+    'plugin/instructions/sharepoint-bootstrap-discovery.instructions.md',
+  ];
+  for (const rel of requiredDoctrines) {
+    const full = path.join(repoRoot, rel);
+    assert.ok(fs.existsSync(full), `Doctrine MUST exist: ${rel}`);
+  }
+  const skillCitations = [
+    { skill: 'plugin/skills/pull-email/SKILL.md',      cites: 'email-bootstrap-discovery.instructions.md' },
+    { skill: 'plugin/skills/pull-teams/SKILL.md',      cites: 'teams-bootstrap-discovery.instructions.md' },
+    { skill: 'plugin/skills/pull-meetings/SKILL.md',   cites: 'meetings-bootstrap-discovery.instructions.md' },
+    { skill: 'plugin/skills/pull-sharepoint/SKILL.md', cites: 'sharepoint-bootstrap-discovery.instructions.md' },
+  ];
+  for (const { skill, cites } of skillCitations) {
+    const text = fs.readFileSync(path.join(repoRoot, skill), 'utf8');
+    assert.ok(text.includes(cites),               `${skill} must cite ${cites}`);
+    assert.ok(text.includes('Bootstrap discovery'), `${skill} must have a ## Bootstrap discovery section`);
+  }
+  // Orchestration doctrine MUST appear in bootstrap-project SKILL.md
+  const bootstrap = fs.readFileSync(path.join(repoRoot, 'plugin/skills/bootstrap-project/SKILL.md'), 'utf8');
+  assert.ok(bootstrap.includes('customer-hint-discovery.instructions.md'), 'bootstrap-project SKILL.md must cite customer-hint-discovery.instructions.md');
+  assert.ok(/Step 3\.5/.test(bootstrap),                                    'bootstrap-project SKILL.md must define Step 3.5 (customer-hint discovery sweep)');
+});