kushi-agents 3.4.2 → 3.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/README.md +33 -0
  2. package/package.json +15 -3
  3. package/plugin/agents/kushi.agent.md +155 -147
  4. package/plugin/instructions/ado-bootstrap-discovery.instructions.md +111 -0
  5. package/plugin/instructions/ado-engagement-tree.instructions.md +73 -0
  6. package/plugin/instructions/answer-from-evidence.instructions.md +1 -1
  7. package/plugin/instructions/auth-and-retry.instructions.md +51 -16
  8. package/plugin/instructions/azure-auth-patterns.instructions.md +13 -6
  9. package/plugin/instructions/bootstrap-status-format.instructions.md +113 -0
  10. package/plugin/instructions/capture-learnings.instructions.md +95 -0
  11. package/plugin/instructions/cleanup-on-resolution.instructions.md +69 -0
  12. package/plugin/instructions/crm-bootstrap-discovery.instructions.md +79 -0
  13. package/plugin/instructions/crm-internal-vs-confirmed.instructions.md +79 -0
  14. package/plugin/instructions/evidence-confidence-ladder.instructions.md +66 -0
  15. package/plugin/instructions/evidence-layout-canonical.instructions.md +115 -0
  16. package/plugin/instructions/evidence-thoroughness.instructions.md +82 -12
  17. package/plugin/instructions/full-view-gate.instructions.md +91 -0
  18. package/plugin/instructions/m365-id-registry.instructions.md +134 -0
  19. package/plugin/instructions/meetings-verbatim-required.instructions.md +176 -0
  20. package/plugin/instructions/run-reports.instructions.md +129 -0
  21. package/plugin/instructions/scope-boundaries.instructions.md +218 -0
  22. package/plugin/instructions/snapshot-vs-stream.instructions.md +2 -0
  23. package/plugin/instructions/update-ledger.instructions.md +132 -0
  24. package/plugin/instructions/verbatim-by-default.instructions.md +73 -0
  25. package/plugin/instructions/workiq-first.instructions.md +15 -31
  26. package/plugin/instructions/workiq-only.instructions.md +193 -0
  27. package/plugin/learnings/README.md +50 -0
  28. package/plugin/learnings/ado.md +45 -0
  29. package/plugin/learnings/crm.md +96 -0
  30. package/plugin/learnings/cross-cutting.md +36 -0
  31. package/plugin/learnings/email.md +33 -0
  32. package/plugin/learnings/meetings.md +30 -0
  33. package/plugin/learnings/misc.md +46 -0
  34. package/plugin/learnings/onenote.md +215 -0
  35. package/plugin/learnings/sharepoint.md +5 -0
  36. package/plugin/learnings/teams.md +5 -0
  37. package/plugin/plugin.json +22 -2
  38. package/plugin/prompts/apply-ado.prompt.md +14 -0
  39. package/plugin/prompts/propose-ado.prompt.md +12 -0
  40. package/plugin/reference-packs/fde/crm-field-manifest.md +165 -0
  41. package/plugin/skills/apply-ado-update/SKILL.md +125 -0
  42. package/plugin/skills/ask-project/SKILL.md +2 -0
  43. package/plugin/skills/bootstrap-project/SKILL.md +81 -3
  44. package/plugin/skills/propose-ado-update/SKILL.md +108 -0
  45. package/plugin/skills/pull-ado/SKILL.md +173 -23
  46. package/plugin/skills/pull-crm/SKILL.md +168 -15
  47. package/plugin/skills/pull-email/SKILL.md +139 -22
  48. package/plugin/skills/pull-meetings/SKILL.md +109 -25
  49. package/plugin/skills/pull-misc/README.md +84 -0
  50. package/plugin/skills/pull-misc/SKILL.md +257 -0
  51. package/plugin/skills/pull-misc/runner.mjs +280 -0
  52. package/plugin/skills/pull-onenote/README.md +90 -0
  53. package/plugin/skills/pull-onenote/SKILL.md +400 -51
  54. package/plugin/skills/pull-onenote/runner.mjs +356 -0
  55. package/plugin/skills/pull-onenote/scripts/recapture-section-url.mjs +295 -0
  56. package/plugin/skills/pull-onenote/write-snapshot.mjs +271 -0
  57. package/plugin/skills/pull-sharepoint/SKILL.md +44 -12
  58. package/plugin/skills/pull-teams/SKILL.md +40 -11
  59. package/plugin/skills/refresh-project/SKILL.md +33 -2
  60. package/plugin/skills/self-check/run.ps1 +186 -4
  61. package/plugin/templates/ado-update/discussion-comment.template.md +26 -0
  62. package/plugin/templates/ado-update/integrations-ado-writes.example.yml +49 -0
  63. package/plugin/templates/ado-update/proposed.template.md +78 -0
  64. package/plugin/templates/init/external-links.template.txt +30 -0
  65. package/plugin/templates/init/project-integrations.template.yml +57 -2
  66. package/plugin/templates/snapshot/meeting-verbatim.template.md +110 -0
  67. package/plugin/templates/snapshot/meetings-series-index.template.md +3 -1
  68. package/plugin/templates/snapshot/onenote-page.template.md +92 -23
  69. package/plugin/templates/weekly/meetings-stream.template.md +11 -6
@@ -0,0 +1,96 @@
1
+ # Learnings — CRM / Dataverse (`pull-crm`)
2
+
3
+ Newest on top. Format defined in [`README.md`](./README.md).
4
+
5
+ ---
6
+
7
+ ## 2026-05-18 — Bootstrap declared `crm.disabled: true` from a shallow probe; live REST resolved instantly
8
+
9
+ ### What happened
10
+ John Deere bootstrap (kushi v3.10.x, 2026-05-18 morning) wrote `boundaries.crm.disabled: true, reason: 'no-crm-record-discovered-during-bootstrap-2026-05-18'` to `John Deere/integrations.yml`. As a result, `pull-crm` was never dispatched on bootstrap or on later refreshes, the JD `Evidence/ushak/crm/` folder stayed empty, and the user asked "why is there nothing in CRM?" the same day.
11
+
12
+ A live Dataverse REST probe afternoon of 2026-05-18 — `GET /new_frontierengineeringtriages?$filter=contains(new_title,'Deere')` — returned **1 exact match in <1s**: `FE-2026-001791 — John Deere Dealer Operations (JDDO) Platform Integration Improvement`, status `4 - Technical Assessment`, customer `DEERE COMPANY`. The accounts-fallback (Step 2) also resolved cleanly (3 Deere-named accounts; `DEERE COMPANY` linked to the same FE record).
13
+
14
+ ### Why this was a defect
15
+ The pull-crm SKILL `Resolution order` section already documented the 4-step sequence (title → account → recent-slice → ask) — but bootstrap was not actually executing it. It appears bootstrap relied on a WorkIQ-only / metadata-only probe that didn't reach Dataverse, then silently wrote `disabled: true`. That is the worst possible disposition: it pretends there is no CRM record, hides the failure from future refreshes, and the project loses CRM evidence entirely.
16
+
17
+ Adjacent observation: Nova (sibling tool) does this resolution sequence in its bootstrap and would not have missed FE-2026-001791. The doctrine was present in kushi but the execution path was weak.
18
+
19
+ ### Fix
20
+ - New HARD-rule instruction `plugin/instructions/crm-bootstrap-discovery.instructions.md` — `disabled: true` is ONLY allowed after the FULL 4-step REST sequence returns 0 AND the user is presented with top candidates. Auth/reachability failures must leave the boundary EMPTY (with `reason: 'crm-auth-unavailable-<date>'`) so the next refresh retries — NEVER write `disabled: true`.
21
+ - New HARD-rule instruction `plugin/instructions/crm-internal-vs-confirmed.instructions.md` — once resolved, CRM field values must be tagged `CRM-only` / `Cross-verified` / `Conflicting evidence` and never collapsed into bare assertions in State/, FDE reports, or ask-project answers.
22
+ - `plugin/skills/pull-crm/SKILL.md` v2.2.0 → v2.3.0 — Resolution order hardened with: (a) anti-patterns block at top (no statecode filter, `new_companyname` is NOT a valid attribute, never give up on one probe, iterate ALL matching accounts in Step 2 not just the first), (b) new Step 3 wide-text fallback (`new_businessscenariotechnicalblocker`, `new_engagedwith`, `new_engagementobjectives`), (c) renumbered to 5 steps with explicit user-ask, (d) required attempt-trail logging to refresh-report.
23
+ - `plugin/skills/bootstrap-project/SKILL.md` v2.2.0 → v2.3.0 — Step 4 boundaries gate now references the new instruction and explicitly requires the live 4-step sequence before `disabled: true`.
24
+
25
+ ### Verification
26
+ - JD `integrations.yml` updated: `record_ids: ['FE-2026-001791']`, `request_ids: ['FE-2026-001791']`, `boundaries.crm.disabled` removed, `crm.record_id` / `entity_set` / `customer_account_id` / `resolution_path` populated.
27
+ - Full record + 3 annotations pulled and rendered to `Evidence/ushak/crm/snapshot/new_frontierengineeringtriages/1b529705-2340-f111-88b3-00224803accf.md` (9.5 KB; 58 fields + 3 verbatim notes; long-text `new_businessscenariotechnicalblocker` includes the full JDDO platform pain-points narrative).
28
+
29
+ ### Pattern to remember
30
+ `disabled: true` for CRM is a load-bearing disposition. It MUST be earned by the full resolution sequence, not assumed from absence-of-quick-match. When in doubt, leave the boundary empty so the next refresh retries.
31
+
32
+ ---
33
+
34
+ ### 2026-05-13 — Custom entities don't expose `Annotations` as a navigation property
35
+
36
+ **Symptom**: `GET /api/data/v9.2/new_frontierengineeringtriages(<id>)?$expand=Annotations(...)` returns `0x80060888: Could not find a property named 'Annotations' on type 'Microsoft.Dynamics.CRM.new_frontierengineeringtriage'.`
37
+
38
+ **Root cause**: For OOB entities like `incident`, the `Annotations` navigation collection is generated. For custom entities (prefix `new_`, `crXXX_`, etc.), `$expand=Annotations` is NOT auto-wired even though `annotations` is a polymorphic regarding entity that targets all entities. The expand collection name on the parent entity doesn't exist.
39
+
40
+ **Fix / workaround**: Two requests instead of one expand:
41
+ 1. `GET /api/data/v9.2/<entitySet>(<id>)` — record alone.
42
+ 2. `GET /api/data/v9.2/annotations?$filter=_objectid_value eq <id>&$select=annotationid,subject,notetext,createdon,_createdby_value,filename,mimetype&$orderby=createdon asc` — notes filtered by `_objectid_value`.
43
+
44
+ Both with the formatted-value Prefer header. Merge in code.
45
+
46
+ **Doctrine impact**: `plugin/skills/pull-crm/SKILL.md` — replaced the single `$expand=Annotations` template with the two-request pattern (v3.7.6). Marked the expand pattern OK only for `incident`/`account`/`contact`/`opportunity` and other OOB entities.
47
+
48
+ **Discovered during**: `HCA / pull-crm` — record `e561b31e-...` returned 0 annotations on the expand call; the separate-filter call returned 24.
49
+
50
+ ---
51
+
52
+ ### 2026-05-13 — FDE intake records live in `iscrm.crm.dynamics.com`, not the global default `microsoftit.crm.dynamics.com`
53
+
54
+ **Symptom**: HCA record search via the global `.project-evidence/crm/config.yml` (`environment_url: microsoftit.crm.dynamics.com`, `entity_set: msdyn_engagements`) returned 0 hits. The record exists.
55
+
56
+ **Root cause**: Two distinct CRM environments are in play for IS:
57
+ - `microsoftit.crm.dynamics.com` / `msdyn_engagements` — generic IS engagement entity.
58
+ - `iscrm.crm.dynamics.com` / `new_frontierengineeringtriages` — FDE intake triage entity (custom, `new_` prefix).
59
+
60
+ FDE-intake projects (anything with an FE-YYYY-NNNNNN request id) need the iscrm env override.
61
+
62
+ **Fix / workaround**: Per-project pin in `<project>/integrations.yml`:
63
+ ```yaml
64
+ crm:
65
+ environment_url_override: 'https://iscrm.crm.dynamics.com'
66
+ entity_set_override: 'new_frontierengineeringtriages'
67
+ title_field: 'new_title'
68
+ request_id_field: 'new_requestid'
69
+ customer_lookup: '_new_customer_value'
70
+ ```
71
+ Resolution order: project override > global default. Detect FDE-intake by request id format `^FE-\d{4}-\d{6}$` and prompt to apply the override automatically.
72
+
73
+ **Doctrine impact**:
74
+ - `plugin/skills/pull-crm/SKILL.md` — added FDE-env-override block + auto-detect heuristic (v3.7.6).
75
+ - `plugin/skills/bootstrap-project/SKILL.md` — when seeding `integrations.yml`, ask the user about FDE intake and pre-fill the override block.
76
+ - `plugin/templates/init/integrations.template.yml` — added commented-out override block.
77
+
78
+ **Discovered during**: `HCA / pull-crm`.
79
+
80
+ ---
81
+
82
+ ### 2026-05-13 — Always send the formatted-value Prefer header
83
+
84
+ **Symptom**: Without the header, lookups return only GUIDs (`_new_customer_value: <guid>`) and option-set fields return only numeric codes (`statuscode: 4`). The output is unreadable.
85
+
86
+ **Root cause**: Dataverse OData strips the human-readable formatted values unless the client opts in.
87
+
88
+ **Fix / workaround**: Every Dataverse REST call MUST include:
89
+ ```
90
+ Prefer: odata.include-annotations="OData.Community.Display.V1.FormattedValue"
91
+ ```
92
+ Then read formatted values from `<field>@OData.Community.Display.V1.FormattedValue` properties on the response.
93
+
94
+ **Doctrine impact**: Already in `plugin/skills/pull-crm/SKILL.md` since v3.7.4. Reaffirmed in v3.7.6 with a worked example.
95
+
96
+ **Discovered during**: `HCA / pull-crm` — initial run forgot the header and lookups came back as raw GUIDs.
@@ -0,0 +1,36 @@
1
+ # Learnings — Cross-cutting
2
+
3
+ Newest on top. Format defined in [`README.md`](./README.md). Use this file when a learning spans multiple `pull-*` skills (auth tokens, encoding, PowerShell quirks, host-tool behavior).
4
+
5
+ ---
6
+
7
+ ### 2026-05-13 — Two CRM environments coexist for IS (microsoftit vs iscrm)
8
+
9
+ **Symptom**: Skills assumed a single global CRM env. FDE-intake projects (HCA) live in a different env from generic IS engagements.
10
+
11
+ **Root cause**: Org-level split — see [`crm.md`](./crm.md) for the full entry.
12
+
13
+ **Fix / workaround**: See `crm.md` — per-project override in `integrations.yml`. Cross-cutting note: any future skill that touches Dataverse must read `crm.environment_url_override` and `crm.entity_set_override` from `<project>/integrations.yml` BEFORE falling back to the global config.
14
+
15
+ **Doctrine impact**: tracked in `crm.md` and `pull-crm/SKILL.md`.
16
+
17
+ **Discovered during**: `HCA / pull-crm`.
18
+
19
+ ---
20
+
21
+ ### 2026-05-13 — Mutable hint cache vs project integrations.yml: write to BOTH
22
+
23
+ **Symptom**: Manual HCA refresh pinned resolved IDs only into `<project>/integrations.yml`. The cross-skill mutable cache `<engagement-root>/.project-evidence/m365/m365-mutable.json` was not touched. Next-run fast-path won't fire for sibling skills that read the mutable cache.
24
+
25
+ **Root cause**: `side-by-side-config.instructions.md` says to upsert mutable hints during the run, but the rule wasn't enforced — and skill SKILL.md files mention it inconsistently.
26
+
27
+ **Fix / workaround**: Every `pull-*` skill MUST upsert to BOTH stores in the same turn the ID is resolved:
28
+ 1. `<project>/integrations.yml` under `<source>.<key>` — source of truth, OneDrive-synced.
29
+ 2. `<engagement-root>/.project-evidence/m365/m365-mutable.json` under `m365Mutable.knownSections.<project>.<source>.<key>` — speed cache for cross-skill reuse, with `discoveredOn` + `confidence`.
30
+
31
+ **Doctrine impact**:
32
+ - `plugin/instructions/side-by-side-config.instructions.md` — strengthened "discover → upsert immediately" with explicit dual-store contract (v3.7.6).
33
+ - All `pull-*` SKILL.md files — each pull skill's "Mutable hints to upsert" section now explicitly lists both files (v3.7.6).
34
+ - `plugin/skills/self-check/run.ps1` — new D8 rule: warn if `<project>/integrations.yml` has resolved `ado.engagement_id` or `crm.record_id` but `m365-mutable.json` lacks the matching `knownSections.<project>` entry.
35
+
36
+ **Discovered during**: `HCA / pull-crm + pull-ado` — user asked "did you make sure you updated the config files for all these to be able to do better next time".
@@ -0,0 +1,33 @@
1
+ # Learnings — Email (`pull-email`)
2
+
3
+ Newest on top. Format defined in [`README.md`](./README.md).
4
+
5
+ ## 2026-05-18 — `pull-email` rewritten to WorkIQ-ONLY (kushi v3.11.1); `m365_get_email` / `m365_search_emails` / Graph REST FORBIDDEN
6
+
7
+ **What happened.** During John Deere bootstrap (2026-05-18), email evidence was missing entirely. Root cause: prior pull-email SKILL v2.2.0 listed `m365_get_email` + `m365_search_emails` as the **preferred** body-fetch path with WorkIQ as fallback. In this workspace those host tools have a near-100% failure rate (Tool execution failed / 401 / empty payloads), so pull-email runs gave up before reaching WorkIQ.
8
+
9
+ **Fix.**
10
+ 1. Ran the WorkIQ root-scope keyword query for JD (aliases: John Deere, Deere, JDDO, JDIS, deere.com, johndeere.com; window 2026-04-01 → 2026-05-18). 3 emails returned first try (FDE Intake thread, folder `109. John Deere`), request-ids `941b2dda-3db2-4735-8e4f-1974f6436b3e` and `e26a14ed-e592-4323-8de7-7f197a047318`.
11
+ 2. Wrote durable JD artifacts under `John Deere/email-context/`: `index.md`, `2026-05-18-1100-email-summary.md`, `current-state.md`.
12
+ 3. Upserted `m365Mutable.knownSections."John Deere".emailContext.folder = "109. John Deere"` with `confidence: high` so next run uses the folder-scoped fast path.
13
+ 4. Bumped `pull-email` SKILL v2.2.0 → v2.3.0 with HARD `workiq-only` contract:
14
+ - Front blockquote cites `workiq-only.instructions.md` (v3.11.0).
15
+ - Step B "Per-message body fetch" rewritten — only WorkIQ allowed.
16
+ - Tools section rewritten — `m365_get_email` / `m365_search_emails` / `m365_list_emails` / `m365_list_mail_folders` / `m365_search_mail` / Graph REST URLs explicitly listed as FORBIDDEN.
17
+ - User-paste promoted to first-class fallback (NOT a degradation).
18
+ 5. Same treatment applied to `pull-teams` (v2.0.0 → v2.1.0) and `pull-sharepoint` (v2.0.0 → v2.1.0). `m365_list_chat_messages` retained for `pull-teams` as parallel **structured-data dump** (writes `chat-messages.json`) — explicitly NOT a fallback for the human-readable thread.
19
+
20
+ **Codified canonical prompts** (do not re-derive):
21
+
22
+ - Root-scope discovery (use when no folder hint exists for the project):
23
+ ```
24
+ workiq ask -q "Search my Outlook mailbox for emails from <floor> onward in all folders (including subfolders) related to <aliases>, or from any sender from <sender_domains>. For each email return: sent datetime, subject, sender, recipients, folder path, and a short relevance reason."
25
+ ```
26
+ - Folder-scoped fast path (use when `emailContext.folder` is pinned in mutable with `confidence >= medium`):
27
+ ```
28
+ workiq ask -q "Search my Outlook mailbox for emails from <floor> onward in folder(s) '<folder>' (including all subfolders) related to <aliases>. Return: sent datetime, subject, sender, recipients, folder path, message link, and a short relevance reason. Do not return NO_RESULTS unless the folder truly has no messages in that range."
29
+ ```
30
+ - Per-message body fetch (codified in `workiq-only.instructions.md`).
31
+
32
+ **Defect signature to watch for.** `pull-email` coverage trail showing `m365_get_email: failed` or `m365_search_emails: 0 results` followed by either silence (skill gave up) or a WorkIQ retry. Either pattern is the v2.2.0 cascade leaking through — must be eradicated.
33
+
@@ -0,0 +1,30 @@
1
+ # Learnings — Meetings (`pull-meetings`)
2
+
3
+ Newest on top. Format defined in [`README.md`](./README.md).
4
+
5
+ ## 2026-05-18 — Meetings need a verbatim/ folder because meetings EXPIRE
6
+
7
+ ### What happened
8
+ During John Deere bootstrap on 2026-05-18, `pull-meetings` produced the FDE Intake (2026-05-13) curated 7-section block in `Evidence/ushak/meetings/snapshot/FDE-Intake-John-Deere-2026-05-13.md`. Source was reconstructed from chat (22 messages) because `m365_get_transcript` returned "Tool execution failed" and `m365_get_facilitator_notes` was unavailable. A sibling tool (running independently) successfully pulled a much richer WorkIQ-derived summary AND captured the raw recording URL, AND noted in its output that the recording is at `https://microsoft-my.sharepoint-df.com/.../Recordings/FDE%20Intake%20-%20John%20Deere-20260513_123238-Meeting%20Recording.mp4` — a URL that will be purged when tenant retention kicks in (default ~60 days).
9
+
10
+ ### Why this was a defect
11
+ The curated snapshot, however rich, is a SUMMARY. The 22 raw chat messages, the recording URL, the WorkIQ-returned Copilot summary text, and any shared chat attachments are the **source-of-truth verbatim**. Kushi was not persisting any of them as immutable raw artifacts; the snapshot file is curated text that cannot be re-derived once the recording disappears. Every other evidence class (email, OneNote, SharePoint, CRM, ADO) persists in its source system for years — meetings are the ONE class where the source expires.
12
+
13
+ If the user had asked "what exactly did Sachin say in the chat?" two months from now, the answer would have been "the chat is gone; the recording is gone; the snapshot is what we have." That is unacceptable when the chat thread WAS retrievable at capture time.
14
+
15
+ ### Fix
16
+ - New instruction: `plugin/instructions/meetings-verbatim-required.instructions.md` (HARD rule, v3.10.0, hardened in v3.10.1). Every captured meeting MUST produce `Evidence/<alias>/meetings/verbatim/<YYYY-MM-DD-HHMM>_<slug>/` containing a transcript-class file (`transcript.vtt` preferred, else `transcript.txt`, else `transcript-source.md` with WARNING header). Chat alone is NOT a transcript — the transcript cascade is exhaustive: `m365_get_transcript` → Graph REST `/onlineMeetings/.../transcripts/.../content` → `m365_get_facilitator_notes` → WorkIQ strict full-text pull → recording download.
17
+ - `pull-meetings` bumped 2.1.0 → 2.3.0. Cascade restructured into Half A (verbatim/ capture, transcript-first, REQUIRED first) → Half B (curated stream/ block citing verbatim files).
18
+ - New template: `templates/snapshot/meeting-verbatim.template.md` (Validation enforces transcript-class file presence).
19
+ - `templates/weekly/meetings-stream.template.md` per-meeting block now requires a `Verbatim folder` field + 3 validation checks (folder exists, non-empty, transcript-class present).
20
+ - `templates/snapshot/meetings-series-index.template.md` Notes section documents the verbatim/ sibling.
21
+ - `verbatim-by-default.instructions.md` adds anti-pattern #9.
22
+ - `self-check/run.ps1` adds deep-mode rule D13 (a/b/c): walks stream/*.md, warns if (a) verbatim/<dir> missing, (b) verbatim/<dir> empty, (c) verbatim/<dir> has chat but NO transcript-class file.
23
+
24
+ ### Verification
25
+ After this fix lands, John Deere FDE Intake was backfilled: `Evidence/ushak/meetings/verbatim/2026-05-13-1530_fde-intake-john-deere/` contains chat-messages.json (22 msgs), chat-messages.md (rendered), transcript-source.md (WorkIQ summary), recording-url.txt, coverage.md, captured-at.txt. Recording URL is now captured locally even though the file itself will eventually expire.
26
+
27
+ ### Source
28
+ - User feedback 2026-05-18 ~09:30 EDT: "why is there no meetings transcripts for john deere" followed by paste of sibling tool's rich output and "for meetings alone, create a verbatim folder and put verbatims in it. Since meetings expire, this is required for meetings alone" and "make this true skill and so. not just one off".
29
+
30
+
@@ -0,0 +1,46 @@
1
+ # Misc / external links — learnings
2
+
3
+ ## 2026-05-14 — v3.9.0 introduction
4
+
5
+ **Why pull-misc exists:** The dedicated `pull-*` skills cover the structured M365 surfaces, but projects routinely depend on evidence that lives outside — Loop pages, Learn articles, GitHub repos, PDFs, local files, public web pages. The External Links Context doctrine has tracked these as `<project>/external-links.txt` for a while; v3.9.0 promotes that file from "context for the LLM to read" to "first-class evidence with retry registry + snapshot files."
6
+
7
+ **Design decision: deterministic, not fuzzy.** Considered an auto-discovery approach (crawl SharePoint search / Teams chats / bookmarks for project-name mentions). Rejected for three reasons:
8
+
9
+ 1. SharePoint search-index freshness is non-deterministic — same crawl twice = different results.
10
+ 2. Cross-project bleed (a chat thread mentioning both A and B gets attributed to whichever project the crawl ran for).
11
+ 3. Tenant-wide access spans projects the user isn't consulting on — crawls leak content.
12
+
13
+ The user pasting links into `external-links.txt` is the right boundary: auditable, version-controllable, diffable, deterministic.
14
+
15
+ **Format preserved:** v3.9.0 does NOT change the existing `<type>|<owner>|<title>|<url-or-path>|<notes>` format. Just adds `loop` to the recognized types and builds a runner around it.
16
+
17
+ **Routing model:**
18
+
19
+ - `onenote / sharepoint / ado` → delegated to dedicated skills (recorded as `delegated` in registry, not double-pulled here).
20
+ - `loop` → browser path (Playwright, reuses `~/.copilot/playwright-profile/onenote/` because same M365 cookie scope).
21
+ - `web / learn / docs / github / confluence / pdf / unknown` → HTTP path (fetch + @mozilla/readability for HTML extraction).
22
+ - `file` → local file read.
23
+ - Anything matching `<PASTE_*_URL>` or `<TODO*>` → `placeholder`, surfaced in run report until filled.
24
+
25
+ **Test results 2026-05-14:**
26
+
27
+ - ABN AMRO `external-links.txt` (13 entries): 8 delegated (onenote+sharepoint+ado correctly skipped), 3 placeholders detected, 2 file links failed (paths point at customer_workspace/FDEDocs which doesn't exist on this machine — correct behavior, surfaced as fetch-failed not silently dropped).
28
+ - Synthetic test with `learn.microsoft.com/fabric/` and `httpbin.org/html`: 2/2 captured, Readability extracted clean text (~2KB and ~3KB respectively).
29
+
30
+ **Loop branch is structurally validated but not yet exercised against real Loop URLs** — no project's `external-links.txt` contains a `loop|` entry yet. The branch is a thin variation of pull-onenote v2.6.0 (same Playwright profile, same auth-required detection, body extraction via cascade `[data-loop-canvas]` → `.fui-FluentProvider` → `[role="main"]` → `body`). Will get its first real exercise when a user adds Loop links to a project.
31
+
32
+ **Anti-patterns codified:**
33
+
34
+ 1. ❌ Auto-discovering links by crawling SharePoint or Teams (non-deterministic, boundary-violating).
35
+ 2. ❌ Storing only `url` (not `(type, url)` tuple) as registry key.
36
+ 3. ❌ Silently dropping links when they disappear from `external-links.txt` — mark `removed`, keep snapshot for audit.
37
+ 4. ❌ Pulling delegated types here (creates conflicting evidence with the dedicated pull-* skill).
38
+ 5. ❌ Persisting placeholder URLs as `fetch-failed` — they're `placeholder`, distinct state.
39
+ 6. ❌ Loop link without Playwright profile silently writing empty snapshot — must mark `auth-required`.
40
+
41
+ **Lessons:**
42
+
43
+ 1. The format already existed — promoting it cost less than designing a new one. Always look for existing artifacts before inventing.
44
+ 2. Reusing the OneNote Playwright profile for Loop saved an entire bootstrap step. Same auth scope = same profile.
45
+ 3. Readability is sufficient for ~80% of "give me the body of this web page" cases. Fall back to raw stripped HTML only when it fails.
46
+ 4. Per-link retry registry (same shape as v3.8.0 OneNote `one_pages[]`) is the right durability primitive — survives auth gaps, surfaces placeholders, distinguishes states.
@@ -0,0 +1,215 @@
1
+ # Learnings — OneNote (`pull-onenote`)
2
+
3
+ Newest on top. Format defined in [`README.md`](./README.md).
4
+
5
+ _(no entries yet — append the moment a fix lands during a `pull-onenote` run)_
6
+
7
+
8
+ ## 2026-05-14 — Pre-flight gate: distinguish notebook-unavailable from auth-required
9
+
10
+ **Trigger:** AGCO refresh, immediately after v3.10.2 (URL synthesis killed). Even with a freshly user-pasted `one_sectionWebUrl` and a valid Edge profile, the runner returned `auth-required`. Manual sanity-check: the user opened `https://onenote.cloud.microsoft/` directly in their browser and got the **"Sorry, we ran into a problem"** dialog at the notebook-list level — before they could even click into the AGCO section. So the runner couldn't possibly succeed; the failure was OneNote-for-Web side, not auth.
11
+
12
+ **Root cause classification was overloaded.** The runner only knew two end-states: "canvas frame attached → success" and "didn't attach → auth-required". But OneNote-for-Web has at least three failure surfaces:
13
+ 1. **Login redirect** (`login.microsoftonline.com`) — genuine auth-required.
14
+ 2. **Service/notebook error dialog** ("Sorry, we ran into a problem", "We couldn't open", "This notebook can't be opened", "There was a problem") — service- or notebook-side. Auth is fine.
15
+ 3. **Silent timeout** (no chrome, no error, no redirect) — usually network or extreme service degradation.
16
+
17
+ Conflating #2 with #1 sends the operator down the wrong recovery path: re-bootstrap auth when the real fix is to recover the notebook (open in OneNote desktop, force sync, wait, or re-capture the section URL).
18
+
19
+ **Fix (v3.11.0):**
20
+
21
+ - New `preflightOneNoteWeb()` in `runner.mjs` runs BEFORE navigating to any section URL. Probes `https://onenote.cloud.microsoft/` and classifies the end-state into `ok` / `auth-required` / `onenote-web-unavailable`. The pre-flight is also exposed as a standalone CLI mode (`--preflight`) for gate drivers.
22
+ - The canvas-attach wait loop now ALSO scans every frame's body for the same error-dialog patterns each tick, so a section-load error (e.g. moved section, sync drift) is caught ~500ms after the dialog appears, not after `TIMEOUT_MS`.
23
+ - New `runStatus: "notebook-unavailable"` is emitted for #2 — distinct from `auth-required` — and the runner's error message includes a verbatim recovery checklist (hard-refresh; open in OneNote desktop; wait 10–15min; re-capture URL).
24
+
25
+ **Doctrinal lesson codified into `pull-onenote/SKILL.md` Pre-flight A.4:** the three-way classification IS the contract; the runner MUST distinguish all three end-states; auto-retry is allowed for `auth-required` (next refresh) but FORBIDDEN for `notebook-unavailable` — that one needs human/notebook-side recovery first.
26
+
27
+ **Validation marker:** when AGCO's notebook becomes available again in OneNote-for-Web, re-run kushi pull and confirm the new runStatus values round-trip through to refresh-reports. (Pending — notebook still showing the dialog as of write time.)
28
+
29
+
30
+ ## 2026-05-14 — Conditional Access requires Edge; cookie domains don't transfer; URL must be canonical
31
+
32
+ **Trigger:** Pulling AGCO OneNote evidence for the first time in this session. Bootstrap appeared to succeed (no errors), but every subsequent headless run returned `auth-required`. Re-bootstrapping multiple times did not help.
33
+
34
+ **Three independent root causes had stacked, each masking the next:**
35
+
36
+ 1. **Wrong URL formula (fixed in v3.10.0).** AGCO's pre-doctrine registry entry had a synthesized URL using `wd=target(/<name>/)` (no fileId, no pipe). The canonical formula is `wd=target(<sectionName>|<sectionFileId>/)`. Wrong formula → OneNote-for-Web silently shows "Sorry, we ran into a problem" — indistinguishable from auth failure to a headless runner. Fixed by `recapture-section-url.mjs` auto-heal from sibling registry entries (HCA shares the same notebook, so its `notebookSourceDoc` + `spoBaseUrl` were inherited).
37
+
38
+ 2. **Cookie-domain isolation (fixed in v3.10.1).** Bootstrap was navigating only to `https://onenote.cloud.microsoft/`. But the canonical Doc.aspx URLs live on `https://microsoft-my.sharepoint-df.com/personal/<upn>/...`. These are SEPARATE cookie domains — signing into one does NOT authenticate the other. After `--bootstrap` succeeded, headless runs against the SPO URL still hit `login.microsoftonline.com` because SPO had no cookies. Fixed by making `--bootstrap` walk: `onenote.cloud.microsoft` → `microsoft-my.sharepoint.com` → `microsoft-my.sharepoint-df.com` in the same session, so cookies plant on all three.
39
+
40
+ 3. **Conditional Access blocks vanilla Chromium (fixed in v3.10.1).** Even after fixing the cookie-domain issue, the SPO sign-in surface returned a hard block: **"You can't get there from here — this application contains sensitive information and can only be accessed from devices or client applications that meet Microsoft management compliance policy. Since you're using Chrome... Alternatively, you can use Microsoft Edge or Internet Explorer."** Playwright's bundled Chromium is not Intune-managed. Fixed by switching `runner.mjs` to `chromium.launchPersistentContext(profile, { channel: 'msedge', ... })` — Playwright drives the user's installed Edge, which IS Intune-trusted.
41
+
42
+ **Doctrinal lessons codified into `pull-onenote/SKILL.md` Pre-flight A.1/A.2/A.3:**
43
+
44
+ - **A.1** — `channel: 'msedge'` is HARD-required. Vanilla Chromium WILL be CA-blocked in Microsoft tenants. Do not fall back to Chromium.
45
+ - **A.2** — Bootstrap MUST visit both `onenote.cloud.microsoft` AND both `sharepoint.com` + `sharepoint-df.com` to plant cookies in all required domains. Single-host bootstrap is broken by design.
46
+ - **A.3** — When switching browser channel (Chromium ↔ Edge), DELETE the existing profile first; cookie/cache formats are not compatible.
47
+
48
+ **The "auth-required" signal is overloaded.** It can mean: (a) genuine cookie expiry, (b) wrong URL formula returning OneNote's "Sorry" dialog, (c) cookies present for one domain but not the navigation target's domain, (d) Conditional Access blocking the browser channel. The runner cannot distinguish these — bootstrap-then-retest is the only protocol. v3.10.1 closes (b)/(c)/(d) as systemic failure modes; only (a) should remain after this release.
49
+
50
+ **Validation:** AGCO bootstrap completed cleanly with Edge + dual-surface walk on 2026-05-14. Profile size shrank from 27MB (Chromium) to ~8MB (Edge, just cookies after fresh start). Sign-in walked through MFA on `onenote.cloud.microsoft` and silently SSO'd on the SPO hosts.
51
+
52
+
53
+ ## 2026-05-13 — WorkIQ has TWO answer modes; index field names are the contract
54
+
55
+ **Trigger:** After shipping v3.7.7 (no-fabrication rule), user disputed the claim that OneNote bodies cannot be retrieved and pasted a historical `m365-mutable.json` shape that used `one_sectionFileId`, `one_sectionPath`, `one_sectionOneNoteGuid`, citing a prior project-evidence flow that successfully pulled OneNote pages.
56
+
57
+ **Root cause:** WorkIQ has two answer paths:
58
+ 1. **Search-index extractor** — triggers when the query body contains the literal M365 search index field names (`wdsectionfileid`, `wdsectiongroupid`, `wdpartid`, `wdsectiononenoteguid`). Returns the indexed body fragment verbatim.
59
+ 2. **LLM-summary path** — triggers when the query uses prose like "the page titled X" or "page id Y" or "the OneNote section called Z". Synthesizes a summary from titles + adjacent evidence. This is what v2.2.0 and v2.3.0 hit.
60
+
61
+ The first eight `pull-onenote` versions used prose phrasing (`"sectionFileId <id>"`, `"pageId <pageId>"`) — this is mechanically different from `wdsectionfileid = <id>` / `wdpartid = <id>`. The previous project-evidence flow used the index field names verbatim, which is why it worked.
62
+
63
+ **Fix shipped (v2.4.0 of pull-onenote):**
64
+ 1. New "Canonical lookup keys" subsection in Step A enumerates the index field name → m365-mutable.json key mapping (`wdsectionfileid` ↔ `one_sectionFileId`, etc.) with explicit "DO NOT GUESS, DO NOT PARAPHRASE" directive.
65
+ 2. Step A.1 (by `wdsectionfileid`) and A.2 (by `wdsectiongroupid`) WorkIQ queries rewritten to use the index field names verbatim with `=` syntax.
66
+ 3. Step B verbatim probe rewritten: `wdpartid = <wdpartid> AND wdsectionfileid = <sectionFileId>` lookup, asks for the **indexed page body content** (not "the body of the page").
67
+ 4. Step B explanatory paragraph added: "WorkIQ has two answer modes... the field names are the contract."
68
+ 5. Step B retry phrasing now explicitly tells WorkIQ "Your previous response was generated by the LLM-summary path, not the search-index extractor. Re-run the query against the search index using wdpartid = <wdpartid> AND wdsectionfileid = <sectionFileId>."
69
+ 6. Step C stream pass also rewritten to use `wdsectionfileid` / `wdsectiongroupid` / `lastModifiedDateTime` field names.
70
+ 7. Failure-handling note for Graph `/me/onenote/*` updated with empirical 401 evidence (az CLI app id `04b07795-…` lacks Notes.Read; tenant denies admin consent).
71
+
72
+ **Doctrinal lesson:** when an enterprise tool exposes index-aware query semantics, **document the literal field names in the skill** instead of paraphrasing. Paraphrasing routes to the wrong answer path. This applies to all pull-* skills, not just OneNote — verify the same pattern for SharePoint (`SiteId`, `WebId`, `ListItemId`, `Path`), Teams (`channelIdentity`, `chatId`, `messageId`), Email (`internetMessageId`, `conversationId`).
73
+
74
+
75
+ ## 2026-05-13 — WorkIQ summarization masquerades as captured bodies; never fabricate narrative
76
+
77
+ **Trigger:** User said "onenote is still sparse, did you rerun or tighten" after the HCA refresh shipped 5 OneNote page snapshots that were each ~1.5K bytes. Inspection showed every page file had: header + `❌ Partial via WorkIQ — body not extractable` marker + a 3-paragraph **AI Narrative Summary inferred from adjacent emails and chat traffic** ("plausibly the engagement-level rollup page", "if Usha's backfill includes verbatim chat-summary…"). The narrative was speculation, not capture.
78
+
79
+ **Root cause (skill-level):**
80
+ - `pull-onenote/SKILL.md` v2.2.0 allowed snapshot files with a `page-body-unavailable` marker AND an AI Narrative Summary in the same file. The depth-bar said "AI Narrative Summary REQUIRED FIRST" without an exception for the unavailable case, so the producer satisfied the contract by inferring narrative from adjacent evidence.
81
+ - Graph `/me/onenote/*` is not a viable fallback in this tenant — Notes.Read scope requires admin consent that is denied. WorkIQ is the only path, and WorkIQ summarizes by default.
82
+
83
+ **Fix shipped (v2.3.0 of pull-onenote, plus verbatim-by-default v3.7.6.1):**
84
+ 1. `pull-onenote` Step B now uses a strict verbatim-or-marker probe (the WorkIQ prompt forces one of two outcomes only — verbatim body OR the literal `page-body-unavailable: <reason>` marker, no third option).
85
+ 2. Verbatim acceptance check (HARD): rejects responses containing `"plausibly"`, `"likely"`, `"appears to"`, `"inferred from"`, `"based on adjacent evidence"`, `"this page is about"`, `"key topics include"`.
86
+ 3. No-fabrication rule (HARD): if the body is unavailable, the snapshot file MUST contain ONLY the header + the marker + a `next_step` asking the user to paste. AI Narrative Summary is forbidden in this case. Empty is correct.
87
+ 4. `items_verbatim` added to run-log alongside `items_pulled` and `items_enumerated`. Run is classified `partial-bodies` when the verbatim ratio is < 0.5 — stops "5 page files written" from masquerading as "5 pages captured" in the per-user refresh report.
88
+ 5. `verbatim-by-default.instructions.md` adds anti-pattern #8: "Inferred narrative as substitute for body".
89
+
90
+ **Recovery action for HCA (this turn):** the 5 existing HCA OneNote page files at `HCA\Evidence\ushak\onenote\snapshot\pages\` will be rewritten to the v2.3.0 shape (header + unavailable marker + paste-ask, no inferred narrative) and the user will be asked once at the end of the next refresh to paste the page bodies.
91
+
92
+
93
+ ## 2026-05-14 — v3.7.8 retraction + v3.7.9 corrected doctrine
94
+
95
+ **What v3.7.8 claimed (WRONG):** WorkIQ has a "search-index extractor" mode triggered by literal field names (`wdsectionfileid`, `wdpartid`) in the query body. Using these field names would return verbatim indexed bodies; using natural language would route to summary mode.
96
+
97
+ **What was empirically proven against HCA on 2026-05-13/14:**
98
+
99
+ 1. WorkIQ does NOT honor `wdsectionfileid = <id>` as filter syntax — it routes to summary mode AND returns "OneNote internal properties not exposed as searchable fields" refusal text.
100
+ 2. The wdpartid GUIDs we observed in earlier runs were **URL fragments inside SharePoint Doc.aspx hyperlinks** that WorkIQ rendered as response footnotes — not search-index extractor outputs.
101
+ 3. The Nova-pattern (natural-language query naming the section + notebook by display name and the page by quoted title) is the actual working pattern. It returned a real verbatim body for the HCA `4/3 - HCA with Jay and Martin` page.
102
+ 4. **Body retrieval is non-deterministic** — the same 4/3 page returned a verbatim body at 19:42 PDT and `BODY-NOT-EXPOSED` at 19:48 PDT, same query, no edits. The M365 search index's exposure of OneNote bodies oscillates over time.
103
+ 5. **The blocker for months was the WorkIQ EULA.** Without `workiq accept-eula`, every OneNote query silently returns nothing useful. This is a one-time setup step, not a per-call gate.
104
+
105
+ **v3.7.9 corrected doctrine (now in pull-onenote SKILL.md v2.5.0):**
106
+
107
+ - Pre-flight: probe WorkIQ; if EULA prompt returned, run `workiq accept-eula` and retry.
108
+ - Step A enumerate: natural-language query naming section + notebook by display name (NOT field-name filter syntax). Returns a markdown table with one row per page; wdpartid GUIDs extracted from response URL fragments per row.
109
+ - Step B per-page: natural-language query naming page title + section + notebook by display name. Asks for verbatim body or the literal string `BODY-NOT-EXPOSED`.
110
+ - Per-page retry registry: every page lives in `m365-mutable.json#knownSections.<projectKey>.one_pages` with `last_status` and `attempts`. Pages stuck at `BODY-NOT-EXPOSED` are retried on every refresh until they succeed or the user pastes.
111
+ - Snapshot files carry yaml front-matter with the same fields, so refresh runs can read state from disk if the registry is unavailable.
112
+
113
+ **HCA result (2026-05-14):** 18 pages enumerated. 1 captured verbatim (4/3). 15 pending retry (BODY-NOT-EXPOSED). 2 enumeration-only (will be probed in Step B on next refresh).
114
+
115
+ **Key lesson:** when a doctrine is grounded in pattern-matching against tool responses (e.g. "field names route to extractor"), validate it empirically against the live tool BEFORE shipping. The v3.7.8 doctrine was internally consistent and self-citing but never actually tested end-to-end — the 4/3 success that motivated v3.7.9 happened only after honestly retracting v3.7.8 and replicating the Nova workflow step-by-step.
116
+
117
+
118
+ ## 2026-05-14 — v3.7.9 retraction + v3.8.0 architectural pivot
119
+
120
+ **What v3.7.9 (yesterday) shipped:** WorkIQ natural-language by display name + per-page retry registry. Validated 1-page capture and codified as primary path.
121
+
122
+ **What we proved against HCA on 2026-05-14:** the v3.7.9 capture-rate is structurally too low for a Mon-9am scheduled refresh. WorkIQ body retrieval is non-deterministic (same page flips exposed/not exposed across queries minutes apart), and across 18 enumerated HCA pages WorkIQ returned exactly 1 verbatim body. A Monday-9am run that captures 1 page out of 16-18 is not a refresh, it's a coincidence.
123
+
124
+ **The pivot — browser-scrape via OneNote-for-Web with persisted Playwright profile:**
125
+
126
+ - Constructed the OneNote-for-Web deep-link URL using values already in `m365-mutable.json`: `<spoBaseUrl>/_layouts/15/Doc.aspx?sourcedoc={<notebookSourceDoc>}&action=edit&wd=target(<sectionName>|<sectionFileId>/)`.
127
+ - Picked the Microsoft work account at the consent prompt (one-time).
128
+ - Found the OneNote canvas frame at `ffc-onenote.officeapps.live.com/onenoteframe.aspx` (nested 2 frames deep).
129
+ - Enumerated pages from the accessibility tree: every page has `aria-label="<title>, page X of N, Page. Select to open page contents."` — gives ordered, complete, authoritative page list.
130
+ - Clicked each page in the rail, waited 2.5s for canvas to settle, read `document.querySelector('#PageContentWrapper').innerText` — got full verbatim body.
131
+
132
+ **Result:** 16/16 HCA pages captured (~120KB total) in ~50 seconds. Compare to WorkIQ's 1/18 in 30+ minutes of probing. Includes pages WorkIQ flagged BODY-NOT-EXPOSED on previous attempts, proving the bodies were always retrievable — WorkIQ just couldn't reach them.
133
+
134
+ **Architectural decision:**
135
+
136
+ - Browser-scrape is the PRIMARY path in pull-onenote v2.6.0.
137
+ - WorkIQ is the FALLBACK (when Playwright profile auth-expires) AND the source of stream events (page-edit signals via search index — those ARE deterministic).
138
+ - Per-page registry stores BOTH `webPageId` (browser navigation GUID) AND `wdpartid` (WorkIQ correlation GUID). New `last_status` value `auth-required` for unattended-MFA-blocked runs.
139
+ - New runner: `plugin/skills/pull-onenote/runner.mjs` (Playwright + `launchPersistentContext` for unattended refreshes).
140
+
141
+ **Known gap (documented, accepted):** Conditional Access / MFA challenges cannot be satisfied unattended. Roughly every 1-4 weeks the runner will hit a sign-in redirect, exit with `runStatus: "auth-required"`, mark all queued pages `auth-required`, and surface in the run report. The user does ONE interactive `node runner.mjs --bootstrap` and the next scheduled run resumes silently.
142
+
143
+ **Lessons compounded across v3.7.x → v3.8.0:**
144
+
145
+ 1. When a doctrine is grounded in pattern-matching against tool responses, validate it empirically end-to-end BEFORE shipping. v3.7.8 and v3.7.9 both shipped with one-page-of-evidence and were retracted within 24 hours.
146
+ 2. When a primary path's capture-rate drops below ~80%, treat it as architecturally inadequate, not "needs more retries". v3.7.9's retry registry was the correct durability layer for a HIGH-capture-rate path; on a 5%-capture-rate path it just made the bad output more visible.
147
+ 3. Browser automation is acceptable infrastructure for evidence pulls when the tool surface is deterministic and the auth model can be persisted across runs. The Loop and expense-report skills already proved this; pull-onenote v2.6.0 follows their lead.
148
+ 4. Always store identifiers in EVERY form a tool surface uses. `wdpartid` (WorkIQ) and `webPageId` (browser) are both small strings; storing both costs nothing and keeps both retrieval paths viable.
149
+
150
+
151
+ ## 2026-05-18 — Bare runner JSON + hand-rolled file writes = layout violation + UTF-8 corruption (v3.11.5)
152
+
153
+ **Trigger (John Deere):** After fixing the bootstrap sign-in and single-page regex bugs, the JD scrape succeeded — but the agent wrote a single `section.md` at `Evidence/ushak/onenote/snapshot/section.md` instead of the doctrine-mandated `snapshot/pages/<safe-title>.md`. The PowerShell-piped UTF-8 also corrupted every NBSP to `┬á` (1825 occurrences in a 13 KB body). Neither defect was caught at the runner level — both were the driver's responsibility, and the driver was the agent improvising.
154
+
155
+ **Root cause: the runner JSON contract has no automated writer.** `runner.mjs` is silent about the canonical snapshot layout from `snapshot-vs-stream.instructions.md` (`snapshot/pages/<safe-title>.md`, one file per page, with the full front-matter schema). Any driver — PowerShell, Clawpilot, future automation — has to re-derive: which directory, what filename, what front-matter keys, what registry shape, what run-report format. That's where things go wrong, every time.
156
+
157
+ Additionally, PowerShell's default `Out-File -Encoding utf8` writes UTF-8-BOM and re-encodes non-ASCII via the system code page — NBSP (U+00A0) becomes `┬á` (the CP1252 bytes for the UTF-8 encoding of NBSP). Even `Tee-Object` does this. The only safe way to round-trip OneNote body bytes through PowerShell is `[IO.File]::WriteAllText($path, $text, [Text.UTF8Encoding]::new($false))` — or to avoid PowerShell entirely.
158
+
159
+ **Fix (v3.11.5):**
160
+
161
+ - New `plugin/skills/pull-onenote/write-snapshot.mjs` is the single supported driver. It:
162
+ - invokes `runner.mjs` via `child_process.spawnSync` (no shell, UTF-8 preserved end-to-end);
163
+ - writes one `snapshot/pages/<safe-title>.md` per captured page, with the full front-matter schema mandated by `pull-onenote/SKILL.md` §"Snapshot file shape";
164
+ - upserts `m365Mutable.knownSections.<project>.one_pages[]` with attempts counter, dual-ID schema, snapshot_path, captured_at;
165
+ - emits a per-run report at `Evidence/<alias>/onenote/refresh-reports/<YYYYMMDD-HHMM>-onenote.md` per `run-reports.instructions.md`.
166
+ - `pull-onenote/SKILL.md` "Canonical CLI invocations" section now mandates `write-snapshot.mjs` as the production path; the bare `runner.mjs` is documented only as a diagnostic tool.
167
+
168
+ **Doctrinal lessons:**
169
+
170
+ 1. Any tool whose output requires structured file-layout work MUST ship a writer of its own, not delegate to a shell script. Hand-rolled wiring is where doctrine gets lost.
171
+ 2. PowerShell `Out-File`, `Set-Content`, `Tee-Object`, and stdout `>` redirection ALL re-encode non-ASCII when capturing piped data. If a tool's output contains non-ASCII (and OneNote bodies always do — NBSP, smart quotes, em-dashes, bullet chars), the driver MUST use `child_process` or write through Node, not PowerShell pipes.
172
+
173
+ **Validation:** JD `3/13 - account team` re-pulled via `write-snapshot.mjs` on 2026-05-18 — clean layout (`Evidence/ushak/onenote/snapshot/pages/3-13-account-team.md`), clean UTF-8 (`-match '┬á'` returned False), registry upserted with attempts=1, run report written.
174
+
175
+
176
+ ## 2026-05-18 — Bootstrap sign-in skipped + single-page section regex (v3.11.2 / v3.11.3)
177
+
178
+ **Triggers (John Deere refresh):**
179
+
180
+ 1. Re-bootstrap appeared to succeed but every subsequent scrape returned `auth-required`.
181
+ 2. After fixing #1, scrape preflight passed and the canvas frame attached, but page enumeration timed out — even with a 3-minute window — for a section with only one page.
182
+
183
+ **Root causes:**
184
+
185
+ 1. `--bootstrap` used `page.waitForURL(/onenote\.cloud\.microsoft|m365\.cloud\.microsoft/)` which **matches instantly** because we just navigated to `https://onenote.cloud.microsoft/`. The 5-min wait collapsed to ~0 and we seeded SharePoint cookies before the user could even type their email. Logs showed `Step 1/2: Sign in…` immediately followed by `Step 2/2: Seeding SharePoint cookies` with no real wait between them. Subsequent preflight failed because OneNote-for-Web cookies (different from the SPO cookies) were never minted.
186
+ 2. The page-rail enumerator regex only accepted multi-page aria labels (`, page N of M, Page.`). **Single-page sections** (e.g. John Deere's section with just `3/13 - account team`) render as `<title>, Page. Selected.` — the regex never matched and `waitForFunction` hung until timeout, producing `pages: []` with error `frame.waitForFunction: Timeout`.
187
+
188
+ **Fixes:**
189
+
190
+ - **v3.11.2** — `--bootstrap` now waits for a real OneNote post-auth UI indicator (`[aria-label*="Account manager" i], [data-automationid="NotebookList"], button[aria-label*="notebook" i], iframe[src*="onenoteframe.aspx"]`) — same selectors `preflightOneNoteWeb` uses. Logs `Sign-in detected (OneNote chrome rendered).` when satisfied, or a warning if 5 min elapses without sign-in.
191
+ - **v3.11.3** — page-rail `waitForFunction` and the `pages` enumerator both accept either format:
192
+ - multi-page: `<title>, page N of M, Page.`
193
+ - single-page: `<title>, Page. Selected.` → emitted as `{ pos: 1, total: 1 }`
194
+ - dedup via `seen` set so a page matched by both rules isn't double-counted.
195
+
196
+ **Validation:** JD `John Deere.one` captured end-to-end on 2026-05-18 — 1/1 pages, 13 KB verbatim body for `3/13 - account team` (Black Box → Dual Write FDE intake meeting). Snapshot at `John Deere/Evidence/ushak/onenote/snapshot/section.md`.
197
+
198
+ **Doctrinal lesson:** any `waitForURL` against a URL we just navigated to is a no-op — must wait for a **post-auth UI signal** (chrome that only renders after token exchange). Same anti-pattern would apply to any future bootstrap (SharePoint, Loop, M365 admin, etc.). And: any aria-label-driven enumerator must handle the **N=1 special-case format** for that UI surface, not assume the multi-element format applies universally.
199
+
200
+
201
+ ## 2026-05-18 — WorkIQ OneNote three-tier output codified (kushi v3.11.1)
202
+
203
+ **What happened.** User pointed out that `hca-snip.txt` and `hca-onenote-raw.txt` (Scratchpad, 2026-05-13) exist as WorkIQ-derived OneNote evidence, contradicting the agent's earlier claim that "OneNote was unavailable" during JD bootstrap. Root cause: I had asked WorkIQ for "all 21 pages' full verbatim bodies in one call" — WorkIQ silently degraded to tier A (page enumeration) and tier B (search snippets) and returned an explicit note `combined content size … exceeds the maximum response payload that Copilot can safely render in-chat`. I read that as "OneNote unavailable" instead of "Tier C must be per-page."
204
+
205
+ **Empirical contract (now codified in `workiq-only.instructions.md`):**
206
+
207
+ WorkIQ returns OneNote in THREE tiers:
208
+
209
+ - **Tier A — Enumeration**: titles + wdpartid + wdsectionfileid + last-modified + author + deep-link. Bulk OK. (HCA evidence: request-ids `bc780473-66d7-4809-89dc-e910e4b8ced8`, `fdbf3290-649d-4066-84ea-ceab678697ed`.)
210
+ - **Tier B — Search snippets**: ~500-char Graph search snippet per page, verbatim from search index. Bulk OK. (HCA evidence: request-ids `f3744946-5b22-4c09-96ac-714c8508d5f6`, `d93deb47-147b-4d19-8cd7-c0a0e2f76f30`.)
211
+ - **Tier C — Full verbatim body**: ONE page per call. Bulk refused.
212
+
213
+ **Defect signature:** "give me full bodies of all pages in section X" prompt → response includes the phrase `combined content size … exceeds the maximum response payload` plus a tier-A inventory table. Misreading that as "OneNote unavailable" is the defect. Correct response: re-issue the per-page tier-C prompt (one wdpartid at a time).
214
+
215
+ **Result.** `workiq-only.instructions.md` OneNote section now lists all four prompts (Tier A discovery, Tier A page index, Tier B snippets, Tier C single-page body) with the exact WorkIQ wording. `pull-onenote` v2.9.0 Playwright-primary doctrine is unchanged — Playwright is not a Graph call, and it remains the verbatim-bulk path. WorkIQ tier C is the fallback when Playwright auth expires (do per-page tier-C calls in that mode, not bulk).
@@ -0,0 +1,5 @@
1
+ # Learnings — SharePoint / OneDrive (`pull-sharepoint`)
2
+
3
+ Newest on top. Format defined in [`README.md`](./README.md).
4
+
5
+ _(no entries yet — append the moment a fix lands during a `pull-sharepoint` run)_
@@ -0,0 +1,5 @@
1
+ # Learnings — Teams (`pull-teams`)
2
+
3
+ Newest on top. Format defined in [`README.md`](./README.md).
4
+
5
+ _(no entries yet — append the moment a fix lands during a `pull-teams` run)_
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "kushi",
3
- "description": "Multi-source project evidence + Q&A agent. Snapshot + stream capture across Email, Teams, OneNote, SharePoint, Meetings, CRM, ADO; plus read-only natural-language Q&A over the captured evidence. WorkIQ-first. Host-agnostic. Three install profiles: core (aggregator only), standard (default — adds bootstrap/refresh + FDE authoring), full (adds State/ rollup).",
4
- "version": "3.3.0",
3
+ "description": "Multi-source project evidence + Q&A agent. Snapshot + stream capture across Email, Teams, OneNote, SharePoint, Meetings, CRM, ADO; plus read-only natural-language Q&A over the captured evidence. WorkIQ-only for M365 sources (Graph / m365_* FORBIDDEN as fallbacks; user-paste is first-class). Host-agnostic. Three install profiles: core (aggregator only), standard (default — adds bootstrap/refresh + FDE authoring), full (adds State/ rollup).",
4
+ "version": "3.12.1",
5
5
  "author": "ushakrishnan",
6
6
  "repository": "https://github.com/gim-home/kushi",
7
7
  "default_profile": "standard",
@@ -91,6 +91,26 @@
91
91
  "verbs": [
92
92
  "state"
93
93
  ]
94
+ },
95
+ "preview": {
96
+ "extends": "standard",
97
+ "description": "PREVIEW: opt-in two-way sync skills. Adds propose-ado-update (read-only proposal generator) and apply-ado-update (gated; v0.1.0-preview is dry-mode only — produces planned.jsonl, no real ADO writes). Governed by update-ledger.instructions.md. See docs/concepts/roadmap.md and docs/how-to/two-way-ado-update.md.",
98
+ "skills": [
99
+ "propose-ado-update",
100
+ "apply-ado-update"
101
+ ],
102
+ "prompts": [
103
+ "propose-ado",
104
+ "apply-ado"
105
+ ],
106
+ "templates": [
107
+ "ado-update"
108
+ ],
109
+ "reference_packs": [],
110
+ "verbs": [
111
+ "propose-ado",
112
+ "apply-ado"
113
+ ]
94
114
  }
95
115
  }
96
116
  }
@@ -0,0 +1,14 @@
1
+ ---
2
+ name: apply-ado
3
+ description: Apply approved ADO updates from a proposed.md (gated). Currently a preview-stub — writes a planned.jsonl, no real ADO calls yet.
4
+ ---
5
+
6
+ # /apply-ado
7
+
8
+ Route to `@Kushi apply ado <project>`.
9
+
10
+ Delegates to the `apply-ado-update` skill. **Gated** — every write is reviewed (or auto-allowlisted via `<project>/.kushi/ado-update.yml`), every applied write appended to `ledger.jsonl` with a reverse op.
11
+
12
+ In v0.1.0-preview this runs in **dry-mode only** — it produces `planned.jsonl` instead of calling ADO. The real write path lands in a follow-up release once the proposal format has been validated against real projects.
13
+
14
+ Profile: **`preview`** only.
@@ -0,0 +1,12 @@
1
+ ---
2
+ name: propose-ado
3
+ description: Read-only ADO update proposal — generate proposed.md from the latest consolidated evidence. NO writes to ADO.
4
+ ---
5
+
6
+ # /propose-ado
7
+
8
+ Route to `@Kushi propose ado <project>`.
9
+
10
+ Delegates to the `propose-ado-update` skill. Read-only — produces a Markdown preview at `<engagement-root>/ado-updates/<YYYY-MM-DD>/proposed.md`. No ADO writes.
11
+
12
+ Profile: **`preview`** only.