@rishildi/ldi-process-skills 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"embedded.d.ts","sourceRoot":"","sources":["../../src/skills/embedded.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,SAAS;IACxB,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,SAAS,EAAE,CAAC;CACpB;AAED,eAAO,MAAM,eAAe,EAAE,aAAa,
|
|
1
|
+
{"version":3,"file":"embedded.d.ts","sourceRoot":"","sources":["../../src/skills/embedded.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,SAAS;IACxB,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,SAAS,EAAE,CAAC;CACpB;AAED,eAAO,MAAM,eAAe,EAAE,aAAa,EAuP1C,CAAC"}
|
package/build/skills/embedded.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// AUTO-GENERATED by scripts/embed-skills.ts — do not edit
|
|
2
|
-
// Generated at: 2026-04-
|
|
2
|
+
// Generated at: 2026-04-04T21:51:33.150Z
|
|
3
3
|
export const EMBEDDED_SKILLS = [
|
|
4
4
|
{
|
|
5
5
|
name: "create-fabric-lakehouses",
|
|
@@ -75,7 +75,7 @@ export const EMBEDDED_SKILLS = [
|
|
|
75
75
|
},
|
|
76
76
|
{
|
|
77
77
|
relativePath: "assets/agent-template.md",
|
|
78
|
-
content: "# Orchestration Agent: {PROCESS_NAME}\r\n\r\n## Context\r\n\r\n**Process**: {PROCESS_NAME}\r\n**Requirements**: {REQUIREMENTS_SUMMARY}\r\n\r\n---\r\n\r\n## How to Run This Agent\r\n\r\n**Start with Sub-Agent 0 (Environment Discovery).** This gathers the user's\r\npermissions, tooling, and preferences so that every subsequent sub-agent produces\r\nplans tailored to their actual environment. Do not skip this step.\r\n\r\nThen execute each remaining sub-agent in sequence:\r\n\r\n1. Use only the inputs and instructions provided in this file.\r\n2. Produce the specified output document in the designated subfolder.\r\n3. Present the output to the user; ask clarifying questions if anything is unclear.\r\n4. Refine until the user explicitly confirms the output.\r\n5. Append a timestamped entry to `CHANGE_LOG.md` recording what was produced or decided.\r\n6. Pass the confirmed output as the primary input to the next sub-agent.\r\n **Every sub-agent must also read `00-environment-discovery/environment-profile.md`**\r\n and respect the path decisions recorded there.\r\n\r\n**Do not proceed to the next sub-agent without explicit user confirmation.**\r\n**Do not produce code, scripts, or data artefacts not described in each sub-agent below.**\r\n\r\n### Notebook Documentation Standard\r\n\r\nEvery Fabric notebook produced by any skill **must** include a numbered markdown cell\r\nimmediately above each code cell. Each markdown cell must:\r\n\r\n1. State the cell number and a short title (e.g. `## Cell 1 — Install dependencies`).\r\n2. Explain **what** the code cell does in 1–2 sentences.\r\n3. Explain **how to use it**: variables to change, flags to toggle, prerequisites.\r\n\r\nAll transformation logic and design rationale must be **embedded as markdown cells inside\r\nthe notebook** — not maintained as separate documentation files. The notebook is the single\r\nsource of truth. A reader must be able to understand what each cell does, why the logic was\r\nchosen, and how to run it without opening any other file.\r\n\r\n### Output Conventions\r\n\r\n- Each sub-agent writes to its own **numbered subfolder** (`01-implementation-plan/`,\r\n `02-business-process/`, etc.). Execution steps continue the numbering (e.g.,\r\n `05-execution/`, `06-gold-layer/`).\r\n- Within each subfolder, distinguish **final deliverables** (notebooks, SQL scripts,\r\n documentation the user runs or deploys) from **intermediate artefacts** (generator\r\n scripts that produce the deliverables). When presenting outputs, label each file.\r\n- All transformation logic and design rationale must be **embedded as markdown cells\r\n inside notebooks** — not maintained as separate documentation files. The notebook\r\n is the single source of truth.\r\n\r\n---\r\n\r\n## Sub-Agent 0: Environment Discovery\r\n\r\n**Input**: Requirements above\r\n**Output**: `00-environment-discovery/environment-profile.md`\r\n\r\nThis sub-agent runs **before anything is planned or built**. Its purpose is to\r\nunderstand the user's environment, permissions, installed tooling, and preferences\r\nso that every subsequent sub-agent produces plans tailored to what is actually\r\npossible and practical.\r\n\r\n### How it works\r\n\r\n1. **Derive questions from the requirements.** Read the requirements and identify\r\n which environment factors will determine which approaches are viable. Group\r\n questions into the relevant discovery domains (see below). Do not ask about\r\n things the requirements don't need — if a process doesn't create workspaces,\r\n don't ask about workspace creation permissions.\r\n\r\n2. **Present the questionnaire.** Show all questions at once, grouped by domain.\r\n Aim for **5–7 questions** — enough to cover the critical unknowns without\r\n overwhelming the user. Prioritise by impact: if an answer could change the\r\n entire approach, ask it; if it's a nice-to-have detail, skip it.\r\n Each question must:\r\n - State **why** the answer matters (what it unlocks or blocks).\r\n - Offer concrete options where applicable (e.g., checkboxes, multiple choice).\r\n - Explain what the agent will do differently depending on the answer.\r\n\r\n3. **Confirm understanding.** After the user answers, present a brief summary:\r\n > \"Based on your answers, here's my understanding of your environment: [2–4\r\n > sentence summary of key decisions]. Is this accurate, or anything to correct\r\n > before I proceed to planning?\"\r\n Wait for explicit confirmation. If new gaps surface, ask only the follow-up\r\n questions needed to resolve them — do not re-ask the full questionnaire.\r\n\r\n4. **Record the answers.** Save the complete environment profile as\r\n `00-environment-discovery/environment-profile.md`. This file is the primary\r\n input for Sub-Agent 1 (Implementation Plan) and is referenced by all\r\n subsequent sub-agents.\r\n\r\n### Discovery domains\r\n\r\nSelect only the domains relevant to the requirements. **Every question must\r\nexplain why it is being asked** — what activity needs the permission or tool,\r\nand what the agent will do differently based on the answer.\r\n\r\n#### Permissions & roles\r\n\r\nProbe platform admin rights, resource creation permissions, role assignments,\r\nand domain management. Frame each question around the **specific activity** that\r\nneeds the permission.\r\n\r\nExample — workspace role assignment with Entra groups (a real technical constraint):\r\n\r\n> **Can you assign Entra security groups to Fabric workspace roles?**\r\n>\r\n> _Why this matters:_ The SOP assigns groups to workspace roles for RBAC. The\r\n> Fabric REST API and CLI require **Entra group Object IDs** — display names\r\n> are not accepted. The Fabric UI allows searching by name but is manual.\r\n>\r\n> Pick the option that best fits your situation:\r\n>\r\n> - **A) I can look up group Object IDs myself** (e.g., from Entra portal or\r\n> from my admin) → Agent will ask you for the Object IDs and script the\r\n> assignments via Fabric CLI.\r\n> - **B) I have Azure CLI (`az`) installed and can query Entra** → Agent will\r\n> generate `az ad group list --display-name \"...\"` commands so you can\r\n> retrieve Object IDs yourself, then script the assignments.\r\n> - **C) I have PowerShell with the Microsoft.Graph module** → Agent will\r\n> generate `Get-MgGroup -Filter \"displayName eq '...'\"` commands instead.\r\n> - **D) I only have access to the Fabric UI** → Agent will provide step-by-step\r\n> UI instructions with screenshots guidance. Role assignment becomes a manual\r\n> step in the SOP.\r\n> - **E) I'm not sure / I need to check** → Agent will provide a quick check\r\n> command (`az ad group list --display-name \"YourGroupName\" --query \"[].id\"`)\r\n> and pause until you confirm.\r\n\r\nOther permission questions follow the same pattern — always state the activity,\r\nthe constraint, and the options:\r\n\r\n- \"Can you **create workspaces** in Fabric? _(Step 1 needs this. If not, the\r\n agent will produce a workspace specification for your admin to create.)_\"\r\n- \"Can you **create or manage domains** and assign workspaces to them? _(The SOP\r\n organises workspaces under a domain. If you lack domain-admin rights, the agent\r\n will produce a domain-assignment request instead.)_\"\r\n- \"Can you **create lakehouses** in the target workspaces? _(Steps 3-5 provision\r\n lakehouses. If you only have Viewer/Member access, the agent will produce\r\n creation requests for a workspace admin.)_\"\r\n\r\n#### Installed tooling\r\n\r\nProbe CLI tools, SDKs, and runtimes — but only the ones the requirements\r\nactually need. **Tell the user what each tool is used for** so they can make\r\nan informed decision about whether to install it.\r\n\r\n- \"Is the **Fabric CLI (`fab`)** installed and authenticated? _(Used for:\r\n creating workspaces, uploading files, creating shortcuts, listing resources.\r\n If not installed, the agent will provide notebook-based alternatives or guide\r\n you through installation.)_\"\r\n- \"Is **Azure CLI (`az`)** available? _(Used for: querying Entra group/user\r\n Object IDs when assigning roles. Not needed if you can supply Object IDs\r\n directly or prefer PowerShell.)_\"\r\n- \"Do you have **Python 3.10+**? _(Used for: running generator scripts that\r\n produce notebooks and SQL. If not available, the agent can provide pre-built\r\n notebooks instead.)_\"\r\n\r\n#### Execution preferences\r\n\r\nGive the user agency over *how* the process is delivered:\r\n\r\n- \"How do you prefer to **run commands**? _(Terminal / Notebook cells / Fabric UI\r\n — the agent will format all instructions accordingly.)_\"\r\n- \"Do you want the agent to **execute commands directly** or **produce scripts\r\n for you to review and run**? _(Direct execution is faster; review-first gives\r\n you more control.)_\"\r\n\r\n#### Data access & connectivity\r\n\r\nOnly ask when the requirements involve data ingestion or movement:\r\n\r\n- \"Where is the **source data**? _(Local files / SharePoint / Azure Storage /\r\n API / already in OneLake — determines upload method and whether shortcuts\r\n can replace copies.)_\"\r\n- \"Can notebooks in your Fabric workspace **access the source location**?\r\n _(Network restrictions or firewall rules may block runtime access. If blocked,\r\n the agent will add a local-upload step.)_\"\r\n\r\n#### Capacity & licensing\r\n\r\nOnly ask when relevant to compute or feature availability:\r\n\r\n- \"What **Fabric capacity SKU** are you on? _(F2/F4 have lower parallelism\r\n limits — the agent will adjust batch sizes. Trial capacities have time and\r\n feature limits the agent will flag.)_\"\r\n\r\n#### Existing infrastructure\r\n\r\nOnly ask when the requirements could reuse existing resources:\r\n\r\n- \"Are there **existing workspaces or lakehouses** the process should reuse\r\n rather than create? _(If so, the agent will skip creation steps and wire up\r\n shortcuts to existing resources.)_\"\r\n\r\n#### Team & handoff\r\n\r\nOnly ask when multi-user or governance concerns apply:\r\n\r\n- \"Will **other team members** run or maintain this pipeline? _(If yes, the\r\n agent will add role-assignment steps, document naming conventions, and\r\n produce a handoff checklist.)_\"\r\n\r\n### Path table\r\n\r\nOnce answers are collected, produce a **path table** summarising how the answers\r\nshape the approach. **Each row links an answer back to the specific step it\r\naffects**, so the user can see exactly how their environment shapes the plan:\r\n\r\n```markdown\r\n## Path Decisions\r\n\r\n| # | Question | Your answer | What this means for the plan |\r\n|---|----------|-------------|------------------------------|\r\n| 1 | Workspace creation rights | Admin on capacity | Steps 1-2: Agent will create workspaces directly via `fab workspace create` |\r\n| 2 | Workspace creation rights | No admin rights | Steps 1-2: Agent will produce a workspace spec document; you hand it to your admin. SOP marks this as a manual gate. |\r\n| 3 | Entra group role assignment | Option B — has Azure CLI | Step 2: Agent will generate `az ad group list` commands to fetch Object IDs, then script `fab workspace role assign` |\r\n| 4 | Entra group role assignment | Option D — UI only | Step 2: Agent will provide click-by-click UI instructions. Role assignment becomes a manual SOP step. |\r\n| 5 | Fabric CLI installed | Yes, authenticated | All CLI steps presented as `fab ...` terminal commands |\r\n| 6 | Fabric CLI installed | Not installed | Agent will either (a) guide installation, or (b) provide notebook `!pip install` + `!fab` alternatives — your choice |\r\n```\r\n\r\n### Rules for question design\r\n\r\n- **Contextual, not generic.** Every question must name the activity it enables\r\n and the step(s) it affects. A questionnaire that reads like a bureaucratic\r\n intake form is wrong — it should read like a knowledgeable consultant scoping\r\n a project.\r\n- **Explain technical constraints in plain language.** When a platform limitation\r\n exists (e.g., \"the API requires Object IDs, not display names\"), say so — then\r\n immediately offer the user multiple ways to work around it. The user should\r\n never feel blocked; they should feel informed and in control.\r\n- **Give power to the user.** Options should not be \"yes/no you can or can't do\r\n this.\" They should be \"here are 3-4 ways to achieve this — which fits your\r\n situation?\" Even a user with limited permissions should see a viable path.\r\n- **Offer verification commands.** If the user isn't sure about an answer, give\r\n them a one-liner they can run to find out (e.g., \"Run `fab ls` — if it\r\n returns workspace names, you're authenticated.\").\r\n- **Do not guess or assume.** If the answer matters to the plan, ask. If the\r\n user says \"I'm not sure,\" help them check — don't default silently.\r\n- **Keep it proportional.** Target 5–7 questions. A simple 3-step process may\r\n need only 3–4; a complex multi-workspace pipeline might need 7. Beyond 7,\r\n split into a first wave and ask follow-ups only if gaps emerge. Never pad\r\n with irrelevant questions to look thorough.\r\n- The environment profile is a **living document** — if a later sub-agent\r\n discovers a new constraint, append it and re-confirm with the user.\r\n- Append to `CHANGE_LOG.md`: `[{DATETIME}] Sub-Agent 0 complete — environment-profile.md produced. [N] path decisions recorded.`\r\n- **Confirm the environment profile with the user before proceeding to Sub-Agent 1.**\r\n\r\n---\r\n\r\n## Sub-Agent 1: Implementation Plan\r\n\r\n**Input**: Requirements above\r\n**Output**: `01-implementation-plan/implementation-plan.md`\r\n\r\nProduce a phased implementation plan using the structure below. Keep ≤50 lines.\r\nUpdate the RAID log whenever a later sub-agent raises a new risk or dependency.\r\n\r\n```markdown\r\n---\r\ngoal: {PROCESS_NAME} — Implementation Plan\r\nstatus: Planned\r\ndate_created: {DATE}\r\n---\r\n\r\n# Implementation Plan: {PROCESS_NAME}\r\n\r\n## Requirements & Constraints\r\n- REQ-001: [Requirement drawn from the context above]\r\n- CON-001: [Key constraint]\r\n\r\n## Phases\r\n\r\n### Phase 1: [Phase name]\r\n| Task | Description | Status |\r\n|----------|-------------|---------|\r\n| TASK-001 | [Task] | Planned |\r\n| TASK-002 | [Task] | Planned |\r\n\r\n### Phase 2: [Phase name]\r\n| Task | Description | Status |\r\n|----------|-------------|---------|\r\n| TASK-003 | [Task] | Planned |\r\n\r\n## RAID Log\r\n| Type | ID | Description | Mitigation / Action | Status |\r\n|------------|-------|--------------|---------------------|--------|\r\n| Risk | R-001 | [Risk] | [Mitigation] | Open |\r\n| Assumption | A-001 | [Assumption] | [Validation] | Open |\r\n| Issue | I-001 | [Issue] | [Resolution] | Open |\r\n| Dependency | D-001 | [Dependency] | [Owner] | Open |\r\n```\r\n\r\nRules:\r\n- Use REQ-, CON-, TASK-, R-, A-, I-, D- prefixes consistently.\r\n- Task status values: Planned / In Progress / Done.\r\n- Do not include implementation code or scripts.\r\n- Append to `CHANGE_LOG.md`: `[{DATETIME}] Sub-Agent 1 complete — implementation-plan.md produced.`\r\n- **Confirm with user before proceeding to Sub-Agent 2.**\r\n\r\n---\r\n\r\n## Sub-Agent 2: Business Process Mapping\r\n\r\n**Input**: Confirmed output of Sub-Agent 1 + Requirements above\r\n**Output**: `02-business-process/sop.md`\r\n\r\nThis sub-agent maps requirements to process skills, creates any that are missing,\r\nand produces a Standard Operating Procedure. Work through the three steps below.\r\n\r\n### Step 1 — Decompose requirements into process steps\r\n\r\nRead the requirements and break them into discrete, ordered steps. For each step,\r\nwrite a one-line description of what it needs to do and what its output is.\r\n\r\n### Step 2 — Map each step to a process skill\r\n\r\nFor each step, search the skills directory for a matching process skill\r\n(a skill whose description covers the same action and output).\r\n\r\nFor every step, one of three outcomes applies:\r\n\r\n**A — Skill found**: Read the skill's `SKILL.md`. Note its inputs, outputs, and\r\nany parameters it needs from earlier steps. Mark the step as covered.\r\n\r\n**B — Skill not found**: Determine the deterministic logic needed to automate\r\nthis step (the specific inputs, the repeatable actions, and the expected output).\r\nInvoke `create-fabric-process-skill` to create a new skill definition for this step.\r\nOnce created, read its `SKILL.md` and mark the step as covered.\r\nAppend to `CHANGE_LOG.md`:\r\n`[{DATETIME}] New skill created: [skill-name] — [one-line description of what it does].`\r\nAdd the new skill as a dependency in the RAID log from Sub-Agent 1.\r\n\r\n**C — Step must be manual**: If the step cannot be automated (e.g. requires human\r\njudgement or a physical action), document it as a manual step with exact operator\r\ninstructions and mark it accordingly.\r\n\r\nRepeat until every step is either covered by a skill or accepted as manual.\r\nAsk the user to confirm the skill list before proceeding to Step 3.\r\n\r\n### Step 3 — Produce the SOP\r\n\r\n```markdown\r\n# SOP: {PROCESS_NAME}\r\n\r\n## Step Sequence\r\n| Step | Skill / Action | Input Parameters | Output | Manual? |\r\n|------|---------------------|--------------------|-------------------|---------|\r\n| 1 | [skill-name] | param=value | [output artefact] | No |\r\n| 2 | [skill-name] | output from step 1 | [output artefact] | No |\r\n| 3 | [Manual: action] | — | — | Yes |\r\n\r\n## Shared Parameters\r\n| Parameter | Source | Passed to steps |\r\n|-----------|------------|-----------------|\r\n| [param] | User input | 1, 3 |\r\n\r\n## Newly Created Skills\r\n| Skill name | Step | Description |\r\n|--------------|------|------------------------------------|\r\n| [skill-name] | 2 | [What it does — one line] |\r\n\r\n## Manual Steps\r\n- MANUAL-001: [Step] — [Reason] — [Exact operator instructions]\r\n```\r\n\r\nRules:\r\n- If requirements are unclear for any step, ask a targeted question and update\r\n requirements before continuing.\r\n- New skills created in this sub-agent are a permanent addition to the skills\r\n library and will be available for future agents.\r\n- Append to `CHANGE_LOG.md`: `[{DATETIME}] Sub-Agent 2 complete — sop.md produced. [N] new skills created.`\r\n- **Confirm with user before proceeding to Sub-Agent 3.**\r\n\r\n---\r\n\r\n## Sub-Agent 3: Solution Architecture\r\n\r\n**Input**: Confirmed output of Sub-Agent 2\r\n**Output**: `03-solution-architecture/specification.md`\r\n\r\nProduce a plain-language specification. Keep total length ≤50 lines.\r\nWrite for a non-technical reader — no code, no implementation detail.\r\n\r\n```markdown\r\n---\r\ntitle: {PROCESS_NAME} — Solution Specification\r\nstatus: Draft\r\ndate_created: {DATE}\r\n---\r\n\r\n# Specification: {PROCESS_NAME}\r\n\r\n## Purpose\r\n[One paragraph: what this solution does and what problem it solves.]\r\n\r\n## Scope\r\n[What is included and what is explicitly excluded.]\r\n\r\n## How It Works\r\n| Step | What happens | Automated? | Notes |\r\n|------|-------------------------------|------------|-----------------|\r\n| 1 | [Plain-language description] | Yes | |\r\n| 2 | [Plain-language description] | No | See MANUAL-001 |\r\n\r\n## Manual Steps\r\n- MANUAL-001: [Step] — [Reason] — [Exact operator instructions]\r\n\r\n## Acceptance Criteria\r\n- AC-001: Given [context], when [action], then [expected outcome].\r\n\r\n## Dependencies\r\n- DEP-001: [External system, file, or service] — [Purpose]\r\n```\r\n\r\nRules:\r\n- Write for a non-technical reader. No jargon without explanation.\r\n- Every manual step must include exact operator instructions.\r\n- Append to `CHANGE_LOG.md`: `[{DATETIME}] Sub-Agent 3 complete — specification.md produced.`\r\n- **Confirm with user before proceeding to Sub-Agent 4.**\r\n\r\n---\r\n\r\n## Sub-Agent 4: Security, Testing and Governance\r\n\r\n**Input**: Confirmed output of Sub-Agent 3\r\n**Output**: `04-governance/governance-plan.md`\r\n\r\nProduce a governance and deployment plan. Keep total length ≤45 lines.\r\n\r\n```markdown\r\n---\r\ntitle: {PROCESS_NAME} — Governance Plan\r\ndate_created: {DATE}\r\n---\r\n\r\n# Governance Plan: {PROCESS_NAME}\r\n\r\n## Agent Boundaries\r\n| Boundary | Rule |\r\n|-------------------------|--------------------------------------------|\r\n| Allowed actions | [Permitted operations] |\r\n| Blocked actions | [Prohibited operations] |\r\n| Requires human approval | [Steps needing explicit sign-off] |\r\n\r\n## Testing Checklist\r\n- [ ] Validate each sub-agent output before passing it to the next\r\n- [ ] Test all manual steps with a real operator before production use\r\n- [ ] Run against a minimal test dataset before using real data\r\n- [ ] Review CHANGE_LOG.md to confirm all new skills are correct\r\n- [ ] Verify the output folder structure after scaffolding\r\n\r\n## Microsoft Responsible AI Alignment\r\n| Principle | How Applied |\r\n|----------------|--------------------------------------------------------|\r\n| Fairness | [How bias is avoided in outputs and decisions] |\r\n| Reliability | [Validation steps, error handling, new skill review] |\r\n| Privacy | [Data handling — no PII retained in output files] |\r\n| Inclusiveness | [Plain language; no domain assumptions made] |\r\n| Transparency | [User validates every sub-agent output; CHANGE_LOG] |\r\n| Accountability | [Human sign-off required before production execution] |\r\n\r\n## Deployment Guidance\r\n- Review `CHANGE_LOG.md` to verify all newly created skills before first run.\r\n- Store `agent.md`, all outputs, and new skills in version control.\r\n- Review the RAID log from Sub-Agent 1 before each new run.\r\n- Human sign-off required before running against production systems.\r\n```\r\n\r\nRules:\r\n- Every RAI principle row must be completed — state explicitly if not applicable and why.\r\n- Human approval must be required for any step that modifies production systems.\r\n- Append to `CHANGE_LOG.md`: `[{DATETIME}] Sub-Agent 4 complete — governance-plan.md produced. Agent definition finalised.`\r\n- **Confirm with user before finalising.**\r\n",
|
|
78
|
+
content: "# Orchestration Agent: {PROCESS_NAME}\r\n\r\n## Context\r\n\r\n**Process**: {PROCESS_NAME}\r\n**Requirements**: {REQUIREMENTS_SUMMARY}\r\n\r\n---\r\n\r\n## How to Run This Agent\r\n\r\n**Start with Sub-Agent 0 (Environment Discovery).** This gathers the user's\r\npermissions, tooling, and preferences so that every subsequent sub-agent produces\r\nplans tailored to their actual environment. Do not skip this step.\r\n\r\nThen execute each remaining sub-agent in sequence:\r\n\r\n1. Use only the inputs and instructions provided in this file.\r\n2. Produce the specified output document in the designated subfolder.\r\n3. Present the output to the user; ask clarifying questions if anything is unclear.\r\n4. Refine until the user explicitly confirms the output.\r\n5. Append a timestamped entry to `CHANGE_LOG.md` recording what was produced or decided.\r\n6. Pass the confirmed output as the primary input to the next sub-agent.\r\n **Every sub-agent must also read `00-environment-discovery/environment-profile.md`**\r\n and respect the path decisions recorded there.\r\n\r\n**Do not proceed to the next sub-agent without explicit user confirmation.**\r\n**Do not produce code, scripts, or data artefacts not described in each sub-agent below.**\r\n\r\n### Notebook Documentation Standard\r\n\r\nEvery Fabric notebook produced by any skill **must** include a numbered markdown cell\r\nimmediately above each code cell. Each markdown cell must:\r\n\r\n1. State the cell number and a short title (e.g. `## Cell 1 — Install dependencies`).\r\n2. Explain **what** the code cell does in 1–2 sentences.\r\n3. Explain **how to use it**: variables to change, flags to toggle, prerequisites.\r\n\r\nAll transformation logic and design rationale must be **embedded as markdown cells inside\r\nthe notebook** — not maintained as separate documentation files. The notebook is the single\r\nsource of truth. A reader must be able to understand what each cell does, why the logic was\r\nchosen, and how to run it without opening any other file.\r\n\r\n### Output Conventions\r\n\r\n- Each sub-agent writes to its own **numbered subfolder** (`01-implementation-plan/`,\r\n `02-business-process/`, etc.). Execution steps continue the numbering (e.g.,\r\n `05-execution/`, `06-gold-layer/`).\r\n- Within each subfolder, only present **final deliverables** to the user: notebooks,\r\n SQL scripts, and documentation they run or deploy. Generator scripts (e.g.\r\n `generate_notebook.py`) are internal tools the skill runs to produce deliverables —\r\n **never present generator scripts as outputs and never generate notebook or script\r\n content directly**. Run the generator script via Bash; present what it produces.\r\n- All transformation logic and design rationale must be **embedded as markdown cells\r\n inside notebooks** — not maintained as separate documentation files. The notebook\r\n is the single source of truth.\r\n\r\n---\r\n\r\n## Sub-Agent 0: Environment Discovery\r\n\r\n**Input**: Requirements above\r\n**Output**: `00-environment-discovery/environment-profile.md`\r\n\r\nThis sub-agent runs **before anything is planned or built**. Its sole purpose is to\r\nunderstand the operator's environment, permissions, and preferences so that every\r\nsubsequent sub-agent produces plans tailored to what is actually possible and practical.\r\n\r\n**Invoke the `fabric-process-discovery` skill to run this step.**\r\n\r\nThe skill defines the full adaptive questioning tree — which questions to ask, in what\r\norder, and how to branch based on answers. Key principles:\r\n\r\n- **Read the requirements first.** Only ask about domains the process actually needs.\r\n A CSV ingestion job does not need workspace creation questions. A full pipeline\r\n needs all domains.\r\n- **Present all questions in a single turn**, grouped by domain. Never ask one question\r\n at a time. Target **5–7 questions** for most processes; simpler ones may need 3–4.\r\n- **Branch adaptively.** The skill defines conditional follow-ups — apply them after\r\n the first-turn answers before presenting the confirmation summary.\r\n- **Confirm before proceeding.** After processing answers, present the path table and\r\n ask: *\"Is this accurate, or anything to correct before I proceed to planning?\"*\r\n Wait for explicit confirmation.\r\n\r\nThe skill covers these domains (use only those relevant to the requirements):\r\n\r\n| Domain | When to include |\r\n|--------|----------------|\r\n| **A — Workspace access** | Any step creates or uses workspaces |\r\n| **A — Domain assignment** | Requirements mention domain governance (only if creating workspaces) |\r\n| **A — Access control / groups** | Process assigns roles to users or groups |\r\n| **B — Deployment approach** | Any step generates notebooks, scripts, or CLI commands |\r\n| **C — Source data location** | Process ingests files (CSV, PDF, etc.) |\r\n| **D — Capacity / SKU** | Process involves compute-intensive operations |\r\n\r\n**Critical framing rules from the skill — do not deviate:**\r\n\r\n1. **Deployment approach is NOT a CLI vs no-CLI question.** All three options (PySpark\r\n notebook, PowerShell script, CLI commands) use the Fabric CLI internally. The\r\n question is only about *how* the operator runs it. Present it as:\r\n - **A) PySpark notebook** — imported into Fabric, run cell-by-cell in the Fabric UI\r\n - **B) PowerShell script** — generated `.ps1` reviewed and run locally\r\n - **C) CLI commands** — individual `fab` commands run interactively in the terminal\r\n\r\n2. **Workspace creation must branch correctly.** If the operator cannot create\r\n workspaces, immediately ask for the exact names of existing hub and spoke\r\n workspaces — do not ask about domain assignment or access control (they only\r\n apply when creating).\r\n\r\n3. **Entra group Object IDs are a known technical constraint.** When groups are\r\n involved, always surface this: *\"The Fabric API requires Object IDs — display\r\n names are not accepted programmatically.\"* Then offer the resolution options\r\n (have IDs / Azure CLI / PowerShell Graph / UI manual).\r\n\r\n4. **Never leave the user blocked.** If a step requires permissions they don't have,\r\n offer: (a) skip and mark as manual, (b) produce a spec for their admin, or\r\n (c) substitute a UI-based workaround.\r\n\r\nOnce the environment profile is confirmed, save it as\r\n`00-environment-discovery/environment-profile.md` and append to `CHANGE_LOG.md`:\r\n`[{DATETIME}] Sub-Agent 0 complete — environment-profile.md produced. [N] path decisions recorded. Manual gates: [list or none].`\r\n\r\n**Confirm the environment profile with the user before proceeding to Sub-Agent 1.**\r\n\r\n---\r\n\r\n## Sub-Agent 1: Implementation Plan\r\n\r\n**Input**: Requirements above\r\n**Output**: `01-implementation-plan/implementation-plan.md`\r\n\r\nProduce a phased implementation plan using the structure below. Keep ≤50 lines.\r\nUpdate the RAID log whenever a later sub-agent raises a new risk or dependency.\r\n\r\n```markdown\r\n---\r\ngoal: {PROCESS_NAME} — Implementation Plan\r\nstatus: Planned\r\ndate_created: {DATE}\r\n---\r\n\r\n# Implementation Plan: {PROCESS_NAME}\r\n\r\n## Requirements & Constraints\r\n- REQ-001: [Requirement drawn from the context above]\r\n- CON-001: [Key constraint]\r\n\r\n## Phases\r\n\r\n### Phase 1: [Phase name]\r\n| Task | Description | Status |\r\n|----------|-------------|---------|\r\n| TASK-001 | [Task] | Planned |\r\n| TASK-002 | [Task] | Planned |\r\n\r\n### Phase 2: [Phase name]\r\n| Task | Description | Status |\r\n|----------|-------------|---------|\r\n| TASK-003 | [Task] | Planned |\r\n\r\n## RAID Log\r\n| Type | ID | Description | Mitigation / Action | Status |\r\n|------------|-------|--------------|---------------------|--------|\r\n| Risk | R-001 | [Risk] | [Mitigation] | Open |\r\n| Assumption | A-001 | [Assumption] | [Validation] | Open |\r\n| Issue | I-001 | [Issue] | [Resolution] | Open |\r\n| Dependency | D-001 | [Dependency] | [Owner] | Open |\r\n```\r\n\r\nRules:\r\n- Use REQ-, CON-, TASK-, R-, A-, I-, D- prefixes consistently.\r\n- Task status values: Planned / In Progress / Done.\r\n- Do not include implementation code or scripts.\r\n- Append to `CHANGE_LOG.md`: `[{DATETIME}] Sub-Agent 1 complete — implementation-plan.md produced.`\r\n- **Confirm with user before proceeding to Sub-Agent 2.**\r\n\r\n---\r\n\r\n## Sub-Agent 2: Business Process Mapping\r\n\r\n**Input**: Confirmed output of Sub-Agent 1 + Requirements above\r\n**Output**: `02-business-process/sop.md`\r\n\r\nThis sub-agent maps requirements to process skills, creates any that are missing,\r\nand produces a Standard Operating Procedure. Work through the three steps below.\r\n\r\n### Step 1 — Decompose requirements into process steps\r\n\r\nRead the requirements and break them into discrete, ordered steps. For each step,\r\nwrite a one-line description of what it needs to do and what its output is.\r\n\r\n### Step 2 — Map each step to a process skill\r\n\r\nFor each step, search the skills directory for a matching process skill\r\n(a skill whose description covers the same action and output).\r\n\r\nFor every step, one of three outcomes applies:\r\n\r\n**A — Skill found**: Read the skill's `SKILL.md`. Note its inputs, outputs, and\r\nany parameters it needs from earlier steps. Mark the step as covered.\r\n\r\n**B — Skill not found**: Determine the deterministic logic needed to automate\r\nthis step (the specific inputs, the repeatable actions, and the expected output).\r\nInvoke `create-fabric-process-skill` to create a new skill definition for this step.\r\nOnce created, read its `SKILL.md` and mark the step as covered.\r\nAppend to `CHANGE_LOG.md`:\r\n`[{DATETIME}] New skill created: [skill-name] — [one-line description of what it does].`\r\nAdd the new skill as a dependency in the RAID log from Sub-Agent 1.\r\n\r\n**C — Step must be manual**: If the step cannot be automated (e.g. requires human\r\njudgement or a physical action), document it as a manual step with exact operator\r\ninstructions and mark it accordingly.\r\n\r\nRepeat until every step is either covered by a skill or accepted as manual.\r\nAsk the user to confirm the skill list before proceeding to Step 3.\r\n\r\n### Step 3 — Produce the SOP\r\n\r\n```markdown\r\n# SOP: {PROCESS_NAME}\r\n\r\n## Step Sequence\r\n| Step | Skill / Action | Input Parameters | Output | Manual? |\r\n|------|---------------------|--------------------|-------------------|---------|\r\n| 1 | [skill-name] | param=value | [output artefact] | No |\r\n| 2 | [skill-name] | output from step 1 | [output artefact] | No |\r\n| 3 | [Manual: action] | — | — | Yes |\r\n\r\n## Shared Parameters\r\n| Parameter | Source | Passed to steps |\r\n|-----------|------------|-----------------|\r\n| [param] | User input | 1, 3 |\r\n\r\n## Newly Created Skills\r\n| Skill name | Step | Description |\r\n|--------------|------|------------------------------------|\r\n| [skill-name] | 2 | [What it does — one line] |\r\n\r\n## Manual Steps\r\n- MANUAL-001: [Step] — [Reason] — [Exact operator instructions]\r\n```\r\n\r\nRules:\r\n- If requirements are unclear for any step, ask a targeted question and update\r\n requirements before continuing.\r\n- New skills created in this sub-agent are a permanent addition to the skills\r\n library and will be available for future agents.\r\n- Append to `CHANGE_LOG.md`: `[{DATETIME}] Sub-Agent 2 complete — sop.md produced. [N] new skills created.`\r\n- **Confirm with user before proceeding to Sub-Agent 3.**\r\n\r\n---\r\n\r\n## Sub-Agent 3: Solution Architecture\r\n\r\n**Input**: Confirmed output of Sub-Agent 2\r\n**Output**: `03-solution-architecture/specification.md`\r\n\r\nProduce a plain-language specification. Keep total length ≤50 lines.\r\nWrite for a non-technical reader — no code, no implementation detail.\r\n\r\n```markdown\r\n---\r\ntitle: {PROCESS_NAME} — Solution Specification\r\nstatus: Draft\r\ndate_created: {DATE}\r\n---\r\n\r\n# Specification: {PROCESS_NAME}\r\n\r\n## Purpose\r\n[One paragraph: what this solution does and what problem it solves.]\r\n\r\n## Scope\r\n[What is included and what is explicitly excluded.]\r\n\r\n## How It Works\r\n| Step | What happens | Automated? | Notes |\r\n|------|-------------------------------|------------|-----------------|\r\n| 1 | [Plain-language description] | Yes | |\r\n| 2 | [Plain-language description] | No | See MANUAL-001 |\r\n\r\n## Manual Steps\r\n- MANUAL-001: [Step] — [Reason] — [Exact operator instructions]\r\n\r\n## Acceptance Criteria\r\n- AC-001: Given [context], when [action], then [expected outcome].\r\n\r\n## Dependencies\r\n- DEP-001: [External system, file, or service] — [Purpose]\r\n```\r\n\r\nRules:\r\n- Write for a non-technical reader. No jargon without explanation.\r\n- Every manual step must include exact operator instructions.\r\n- Append to `CHANGE_LOG.md`: `[{DATETIME}] Sub-Agent 3 complete — specification.md produced.`\r\n- **Confirm with user before proceeding to Sub-Agent 4.**\r\n\r\n---\r\n\r\n## Sub-Agent 4: Security, Testing and Governance\r\n\r\n**Input**: Confirmed output of Sub-Agent 3\r\n**Output**: `04-governance/governance-plan.md`\r\n\r\nProduce a governance and deployment plan. Keep total length ≤45 lines.\r\n\r\n```markdown\r\n---\r\ntitle: {PROCESS_NAME} — Governance Plan\r\ndate_created: {DATE}\r\n---\r\n\r\n# Governance Plan: {PROCESS_NAME}\r\n\r\n## Agent Boundaries\r\n| Boundary | Rule |\r\n|-------------------------|--------------------------------------------|\r\n| Allowed actions | [Permitted operations] |\r\n| Blocked actions | [Prohibited operations] |\r\n| Requires human approval | [Steps needing explicit sign-off] |\r\n\r\n## Testing Checklist\r\n- [ ] Validate each sub-agent output before passing it to the next\r\n- [ ] Test all manual steps with a real operator before production use\r\n- [ ] Run against a minimal test dataset before using real data\r\n- [ ] Review CHANGE_LOG.md to confirm all new skills are correct\r\n- [ ] Verify the output folder structure after scaffolding\r\n\r\n## Microsoft Responsible AI Alignment\r\n| Principle | How Applied |\r\n|----------------|--------------------------------------------------------|\r\n| Fairness | [How bias is avoided in outputs and decisions] |\r\n| Reliability | [Validation steps, error handling, new skill review] |\r\n| Privacy | [Data handling — no PII retained in output files] |\r\n| Inclusiveness | [Plain language; no domain assumptions made] |\r\n| Transparency | [User validates every sub-agent output; CHANGE_LOG] |\r\n| Accountability | [Human sign-off required before production execution] |\r\n\r\n## Deployment Guidance\r\n- Review `CHANGE_LOG.md` to verify all newly created skills before first run.\r\n- Store `agent.md`, all outputs, and new skills in version control.\r\n- Review the RAID log from Sub-Agent 1 before each new run.\r\n- Human sign-off required before running against production systems.\r\n```\r\n\r\nRules:\r\n- Every RAI principle row must be completed — state explicitly if not applicable and why.\r\n- Human approval must be required for any step that modifies production systems.\r\n- Append to `CHANGE_LOG.md`: `[{DATETIME}] Sub-Agent 4 complete — governance-plan.md produced. Agent definition finalised.`\r\n- **Confirm with user before finalising.**\r\n",
|
|
79
79
|
},
|
|
80
80
|
{
|
|
81
81
|
relativePath: "references/section-descriptions.md",
|
|
@@ -183,7 +183,15 @@ export const EMBEDDED_SKILLS = [
|
|
|
183
183
|
files: [
|
|
184
184
|
{
|
|
185
185
|
relativePath: "SKILL.md",
|
|
186
|
-
content: "---\nname: fabric-process-discovery\ndescription: >\n Use this skill to conduct the initial environment discovery conversation for any\n Microsoft Fabric process workflow. Collects workspace access, deployment approach,\n access control preferences, capacity, and data location through a FATA-aligned,\n one-question-at-a-time adaptive conversation grounded in what the downstream Fabric\n skills actually require. Output is a structured environment profile used by the\n orchestrating agent to plan execution. Triggers as Sub-Agent 0 in any Fabric\n process workflow agent.\nlicense: MIT\ncompatibility: Works in any Claude context — no external tools required at this stage.\n---\n\n# Fabric Process Discovery\n\n> ⚠️ **GOVERNANCE**: This skill only gathers context — it never executes commands or\n> creates resources. All collected information feeds into the execution plan which the\n> operator reviews and confirms before anything runs.\n>\n> ⚠️ **PRIVACY**: Never ask for passwords, access tokens, client secrets, or any\n> credential values. If the plan requires a Service Principal, record only that one\n> is needed — not the values. Credentials are entered by the operator at runtime,\n> not during discovery.\n\n## Workflow\n\n1. Adopt a Fabric architect expert perspective before asking anything.\n2. Read process requirements and identify which domains are relevant.\n3. Gather contextual and historical background first (one question).\n4. Work through relevant domains — one question at a time, branching on each answer.\n5. Present a confirmation summary and wait for explicit approval.\n6. Write the environment profile and append to `CHANGE_LOG.md`.\n\n---\n\n## Core Principles\n\nThese govern how every question is asked. They are not optional — apply all of them\nthroughout the conversation.\n\n**1. Adopt expert perspective first (FATA: Domain Expert Activation).**\nBefore generating any questions, reason as a senior Fabric architect reviewing the\nrequirements. Ask yourself: *what information gaps, if left unfilled, would cause\nthe plan to fail or need rework?* Those are the questions worth asking. Surface\nthings the operator may not know they need to tell you.\n\n**2. One question at a time — Yes/No or 3–4 options.**\nNever present multiple questions in one turn. Each question must be answerable with\na yes/no or a single choice from 3–4 clearly labelled options (A/B/C or A/B/C/D).\nWait for the answer before deciding what to ask next. This is intentional:\nin Fabric discovery, each answer materially changes which questions are relevant —\npresenting all questions at once produces noise. Single-turn efficiency is the right\ndefault for general LLMs; one-at-a-time branching is correct here.\n\n**3. Scaffold before asking (FATA: User Experience Scaffolding).**\nBefore each question, write one sentence explaining what the question is trying to\nunderstand and why it matters for the plan. Operators new to Fabric cannot anticipate\nwhat a Fabric architect considers essential. Make the purpose visible.\n\n**4. Cover all five FATA information dimensions.**\nStructure discovery to address all five dimensions — not just the obvious ones:\n\n| Dimension | What to establish |\n|---|---|\n| **Contextual** | Project background, team, experience level with Fabric |\n| **Constraint-based** | Permissions, tooling, licensing limits |\n| **Preference-oriented** | Deployment style, governance priorities, reuse goals |\n| **Environmental** | Capacity, existing workspaces, data locations |\n| **Historical** | Previous runs, existing naming conventions, known issues |\n\n**5. Always offer a way forward.**\nEvery question must include an option equivalent to \"I'm not sure / I'll find out.\"\nFor questions requiring specific values (names, IDs), offer a command the operator\ncan run to retrieve them. Never leave the operator blocked.\n\n**6. Distinguish path decisions from parameter values.**\n- **Path decisions** determine the shape of the plan — always collect these.\n- **Parameter values** (exact names, IDs) are needed before execution — collect now\n if the operator has them, otherwise flag as *required before running*.\n\n**7. Prevent over-questioning.**\nCover only the domains the requirements actually need. For simple processes (e.g.\na single notebook), 4–6 questions is sufficient. For a full pipeline, up to 10 is\nreasonable. Stop when all path decisions are resolved — do not ask about things that\nwon't change the plan.\n\n**8. Protect privacy.**\nDo not ask for credentials, secrets, tokens, or Object IDs at this stage. If the\nplan needs a Service Principal, record that one is required and note the permissions\nneeded — the operator enters values at runtime.\n\n---\n\n## Question Sequence\n\n### Phase 1 — Contextual and Historical (always run first)\n\nAsk about background before asking about specifics. This sets the right level of\nexplanation for subsequent questions and surfaces constraints the operator may not\nthink to mention.\n\n**Contextual background question** — ask something like:\n*\"To make sure I pitch the questions at the right level — is this your first time\nsetting up a Fabric environment for this project, or are you extending something\nthat already exists?\"*\n\nOptions should cover: brand new setup / extending an existing one / rebuilding or\nmigrating from somewhere else / unsure.\n\n**Historical question** (ask if the answer above suggests existing work) — ask\nsomething like:\n*\"Are there existing naming conventions, workspace patterns, or previous deployments\nI should follow or be aware of?\"*\n\nOptions: yes (they'll describe) / no / unsure.\n\nThese answers shape how specific later questions need to be and whether defaults\ncan be inferred from what already exists.\n\n---\n\n### Phase 2 — Relevant Domains\n\nCover only the domains relevant to the process requirements. Typical mapping:\n\n| Process involves | Domains to cover |\n|---|---|\n| Creating workspaces | A, B, C, D, F |\n| Creating lakehouses | A, D, F |\n| Ingesting files (CSV/PDF) | D, E |\n| Running notebooks/scripts | D, F |\n| Full pipeline | All domains |\n\nWork through domains in order A → F, skipping irrelevant ones. Within each domain,\nask one question and branch before moving to the next domain.\n\n---\n\n#### Domain A — Workspace access (Constraint-based + Environmental)\n\n**What to establish:** Can the operator create new workspaces, or must they use\nexisting ones? What are the names?\n\n**Technical context:**\n- Workspace names are case-sensitive in `fab` paths.\n- If unsure about create rights: `pip install ms-fabric-cli` → `fab auth login`\n → `fab ls`. If workspace names are returned, they have access.\n- Read requirements to determine how many workspaces are needed before asking.\n\n**Question format:** Can you create new Fabric workspaces?\n- A) Yes — I can create workspaces\n- B) No — I need to use existing workspaces\n- C) I'm not sure — I can run `fab ls` to check\n\n**Branch:**\n- A → ask for intended names (or placeholder if not decided yet)\n- B → ask for exact names of existing workspaces (verbatim — case-sensitive)\n- C → provide the `fab ls` command; wait for output; branch as A or B\n\n---\n\n#### Domain B — Domain assignment (Constraint-based)\n\n**What to establish:** Should workspaces be assigned to a Fabric domain? If yes,\ndoes the operator have the rights needed?\n\n**Technical context:**\n- Domain assignment is optional and can be done later via the portal.\n- Assigning to an *existing* domain requires no special rights.\n- *Creating* a new domain requires Fabric Administrator rights (tenant-level —\n not the same as Workspace Admin). Default to \"skip\" or \"assign existing\" if\n there is any doubt.\n\n**Question format:** Would you like to assign these workspaces to a Fabric domain?\n- A) Yes — assign to an existing domain\n- B) Yes — create a new domain for these workspaces\n- C) No — skip domain assignment for now\n\n**Branch:**\n- A → ask for the domain name\n- B → ask if they have Fabric Administrator rights (Yes / No / Unsure);\n if No or Unsure → mark as manual gate, note intended domain name for documentation\n- C → no domain parameters needed\n\n---\n\n#### Domain C — Access control (Environmental + Constraint-based)\n\n**What to establish:** Who else needs access? How will group identifiers be obtained?\n\n**Technical context:**\n- The workspace creator is automatically assigned as Admin — no action needed.\n- Individual users are identified by email address (UPN) — straightforward.\n- **Entra security groups require Object IDs (GUIDs) — the Fabric REST API does not\n accept display names.** This is a hard API constraint, not a preference.\n- Object IDs can be found: Azure portal (AAD → Groups → select → Object ID field),\n Azure CLI (`az ad group show --group \"Name\" --query id -o tsv`), or PowerShell\n (`Get-MgGroup -Filter \"displayName eq 'Name'\" | Select-Object Id`).\n- **If deployment is a PySpark notebook AND groups are involved:** `notebookutils`\n cannot query Microsoft Graph. Either provide Object IDs directly, resolve via\n Azure CLI/PowerShell before running, or switch deployment approach for this step.\n- Do not ask for Object ID values during discovery — flag that they will be needed\n and establish how they will be obtained.\n\n**Question format:** Beyond yourself as Admin, does anyone else need access?\n- A) No — just me for now\n- B) Yes — specific users (by email)\n- C) Yes — Entra security groups\n- D) Yes — a mix of users and groups\n\n**Branch:**\n- A → skip role collection\n- B → ask for email addresses and intended roles (Admin/Member/Contributor/Viewer)\n- C or D → ask: \"Can you see the security groups in the Azure portal\n (Azure Active Directory → Groups)?\"\n - Yes → ask: will you provide Object IDs directly, or should the agent generate\n Azure CLI lookup commands to retrieve them automatically?\n - Provide directly → flag IDs as required before run; ask for group names and roles\n - CLI lookup → note that lookup commands will be generated; ask for group names and roles\n - No → mark group role assignment as manual gate; provide portal instructions\n\n---\n\n#### Domain D — Deployment approach (Preference-oriented)\n\n**What to establish:** How does the operator prefer to run generated scripts/notebooks?\n\n**Technical context:**\n- **All three approaches use the Fabric CLI (`fab`) internally.** This is not a\n question about whether to use the CLI — it is about how the operator runs the\n generated artefacts.\n- PySpark notebook: runs inside the Fabric UI cell-by-cell. Authentication is\n automatic. Best for operators who prefer working inside Fabric.\n- PowerShell script: reviewed and run locally. Requires `fab` CLI installed\n (`pip install ms-fabric-cli`) and PowerShell.\n- Terminal commands: `fab` commands run one at a time interactively. Requires `fab`\n CLI installed locally. Best for operators who want step-by-step control.\n- If notebook is chosen AND Entra groups are involved, flag the Service Principal\n constraint from Domain C.\n\n**Question format:** How would you like to run the generated artefacts?\n- A) PySpark notebook — import into Fabric and run cell-by-cell in the Fabric UI\n- B) PowerShell script — review and run locally\n- C) Individual CLI commands — run interactively in the terminal, one step at a time\n\n---\n\n#### Domain E — Source data (Environmental)\n\n*Only ask if the process involves ingesting files.*\n\n**What to establish:** Where are the source files?\n\n**Technical context:**\n- Local files require an upload step before they can be used in Fabric.\n- Files already in OneLake can be referenced by path directly.\n- SharePoint/Azure Blob files can be connected via Fabric shortcuts — no copying needed.\n\n**Question format:** Where are the source files you want to ingest?\n- A) On my local machine\n- B) Already in OneLake / Fabric\n- C) In cloud storage (SharePoint, Azure Blob, etc.)\n\n**Branch:**\n- A → include upload step in plan\n- B → ask for OneLake path; skip upload\n- C → ask for source URL/path; include shortcut creation step\n\n---\n\n#### Domain F — Capacity (Environmental + Constraint-based)\n\n*Ask whenever workspaces are being created.*\n\n**What to establish:** What Fabric capacity will workspaces be assigned to?\n\n**Technical context:**\n- Every workspace must be assigned to an active capacity at creation.\n- Capacity must be in Active state — if paused, the operator resumes it in the\n Azure portal before running.\n- `fab ls` output includes capacity information. Also visible in the Fabric Admin portal.\n\n**Question format:** Do you know the name of the Fabric capacity to use?\n- A) Yes — I know it (provide the name)\n- B) I can find it — I'll run `fab ls` or check the Fabric Admin portal\n- C) I'll provide it later — use a placeholder for now\n\n**Branch:**\n- A → embed capacity name in plan\n- B → provide `fab ls` command; wait for name; embed in plan\n- C → use `[CAPACITY_NAME]` placeholder; flag as required before running\n\n---\n\n### Phase 3 — Preference check (Preference-oriented)\n\nAfter the main domains, ask one closing preference question if the requirements\ninvolve choices between rigour and speed:\n\n*\"For any optional steps (e.g. domain assignment, access control), would you prefer\nto include everything now for a complete setup, or keep it minimal and add\ngovernance steps later?\"*\n\n- A) Include everything — set it up completely now\n- B) Keep it minimal — flag optional steps as manual for later\n- C) Decide step by step — I'll confirm each optional item\n\nThis shapes how the plan presents optional components.\n\n---\n\n## Confirmation\n\nBefore writing the environment profile, present a concise summary table of all path\ndecisions and collected parameters. Ask the operator to confirm accuracy. If anything\nis missing or unclear, ask only the targeted follow-up needed.\n\n```\n| # | Dimension | Question | Your answer | What this means |\n|---|-----------------|---------------------- |--------------------------------------|------------------------------------------------------|\n| 0 | Contextual | Project context | New setup | No existing conventions to inherit |\n| A | Constraint | Workspace creation | Creating new | Agent will create hub + spoke workspaces |\n| B | Constraint | Domain assignment | New domain (manual gate) | Domain creation flagged manual — admin rights needed |\n| C | Environmental | Access control | Groups — IDs to be provided directly | Role assignment scripted; IDs needed before run |\n| D | Preference | Deployment approach | PySpark notebook | Agent generates .ipynb for import into Fabric |\n| F | Environmental | Capacity | ldifabricdev | Embedded in notebook |\n| | Preference | Setup completeness | Include everything | All optional steps included in plan |\n```\n\n---\n\n## Output\n\nSave the confirmed profile as `00-environment-discovery/environment-profile.md`.\n\nInclude:\n- All path decisions (with FATA dimension label)\n- All collected parameter values\n- Parameters flagged as required before execution, with instructions for obtaining them\n- Manual gates — steps the operator must perform themselves, and why\n- Deployment prerequisites (e.g. `pip install ms-fabric-cli` if PowerShell/terminal)\n- Any historical/contextual notes that should inform naming or structure decisions\n\nAppend to `CHANGE_LOG.md`:\n`[{DATETIME}] Sub-Agent 0 complete — environment-profile.md produced. [N] path decisions recorded. Manual gates: [list or none]. Parameters still needed: [list or none].`\n\n---\n\n## Gotchas\n\n- **Never frame deployment as CLI vs no-CLI.** All three approaches use `fab`.\n- **Workspace names are case-sensitive in `fab` paths.** Always confirm exact casing.\n- **Entra group Object IDs are GUIDs, not display names.** Do not ask for them during\n discovery — flag that they are needed and establish how they will be obtained.\n- **`notebookutils` does not support Microsoft Graph.** A Fabric notebook cannot\n resolve group names to Object IDs at runtime.\n- **Domain creation requires Fabric Administrator rights — tenant-level.** Default to\n assigning an existing domain or skipping if there is any doubt.\n- **Never ask for credentials, secrets, or token values.** Discovery is about shape\n and approach — not credentials. Flag that a Service Principal is needed; the\n operator provides the values at runtime.\n- **Never leave the user blocked.** If a step requires permissions they don't have,\n offer: (a) skip and mark as manual, (b) produce a spec for their admin, or\n (c) substitute a UI-based workaround.\n- **Stop when path decisions are resolved.** Do not continue asking questions once\n everything that affects the plan structure is known.\n",
|
|
186
|
+
content: "---\nname: fabric-process-discovery\ndescription: >\n Use this skill to conduct the initial environment discovery conversation for any\n Microsoft Fabric process workflow. Collects workload scope, workspace access,\n deployment approach, access control, capacity, data location, and environment\n promotion needs through a FATA-aligned, one-question-at-a-time adaptive\n conversation. Output is a structured environment profile used by the orchestrating\n agent to plan execution. Triggers as Sub-Agent 0 in any Fabric process workflow agent.\nlicense: MIT\ncompatibility: Works in any Claude context — no external tools required at this stage.\n---\n\n# Fabric Process Discovery\n\n> ⚠️ **GOVERNANCE**: This skill only gathers context — it never executes commands or\n> creates resources. All collected information feeds into the execution plan which the\n> operator reviews and confirms before anything runs.\n>\n> ⚠️ **PRIVACY**: Never ask for passwords, tokens, client secrets, or Object IDs\n> during discovery. If a Service Principal is needed, record that it is required and\n> the permissions needed — the operator enters credential values at runtime only.\n\n## Workflow\n\n1. Adopt a Fabric architect expert perspective before asking anything.\n2. Read process requirements — identify which domains are relevant.\n3. Ask Phase 1 (contextual + historical background) first.\n4. Work through relevant domains one question at a time, branching on each answer.\n5. Present a confirmation summary and wait for explicit approval.\n6. Write the environment profile and append to `CHANGE_LOG.md`.\n\n## References\n\n- `references/technical-constraints.md` — authentication separation, Object IDs,\n `notebookutils` Graph limitation, Service Principal requirements, capacity state\n- `references/fabric-architecture.md` — workload landscape, medallion architecture,\n environment promotion patterns, credential management\n\nLoad the relevant reference when a domain question requires deeper technical context\nor when the operator asks a technical follow-up.\n\n---\n\n## Core Principles\n\n**1. Expert perspective first.**\nBefore generating questions, reason as a senior Fabric architect. Ask: *what gaps,\nif left unfilled, would cause the plan to fail or need rework?* Surface things the\noperator may not know they need to tell you.\n\n**2. One question at a time — Yes/No or 3–4 labelled options.**\nNever present multiple questions in one turn. Each question must be answerable with\na yes/no or a single choice (A/B/C or A/B/C/D). Wait for the answer before\nbranching. In Fabric discovery, each answer materially changes which questions are\nworth asking next — this is why one-at-a-time is correct here even though FATA\ndefaults to single-turn efficiency.\n\n**3. Scaffold before asking.**\nOne sentence of context before each question explaining what it establishes and why\nit matters for the plan. Operators new to Fabric cannot anticipate what a Fabric\narchitect considers essential.\n\n**4. Cover all five FATA dimensions.**\n\n| Dimension | What to establish |\n|---|---|\n| **Contextual** | Project background, team, experience level |\n| **Constraint-based** | Permissions, tooling, licensing |\n| **Preference-oriented** | Deployment style, governance vs speed, reuse goals |\n| **Environmental** | Capacity, workloads, existing workspaces, data locations |\n| **Historical** | Previous runs, naming conventions, existing patterns |\n\n**5. Path decisions vs parameter values.**\nPath decisions (can you create workspaces? which workloads?) determine plan structure\n— always collect. Parameter values (exact names, IDs) — collect now if available,\notherwise flag as *required before running*.\n\n**6. Offer a way forward on every question.**\nInclude an \"I'm not sure / I'll find out\" option. For specific values the operator\nmay not have ready, offer the command to retrieve them.\n\n**7. Prevent over-questioning.**\nOnly cover domains the requirements actually need. Stop when all path decisions are\nresolved. Roughly: 4–6 questions for simple processes, up to 12 for full pipelines.\n\n---\n\n## Question Sequence\n\n### Phase 1 — Contextual and Historical (always run first)\n\nEstablish background before specifics. Ask one question covering:\n- Is this a new setup, an extension of something existing, or a migration?\n- (If extending/migrating) Are there naming conventions or existing patterns to follow?\n\nOptions should cover: new / extending existing / migrating / unsure.\nThese answers shape the level of explanation needed in later questions and whether\ndefaults can be inferred from what already exists.\n\n---\n\n### Phase 2 — Relevant Domains\n\nSelect domains based on requirements. Work through them in order, one question at a\ntime, completing each branch before moving to the next domain.\n\n| Process involves | Domains to cover |\n|---|---|\n| Creating workspaces | A, B, C, D, F, G |\n| Creating lakehouses | A, D, F, G + medallion question |\n| Ingesting files | D, E |\n| Full pipeline (multiple workloads) | Workload scope question first, then A–G |\n| Notebooks / scripts only | D, F |\n\n---\n\n#### Workload scope (ask first for full pipelines)\n\n*Only ask when requirements span more than one workload or mention end-to-end pipelines.*\n\nOne sentence of context: the answer determines which downstream skills are needed\nand what the workspace structure should look like.\n\nQuestion: Which Fabric workloads does this process involve? (Select all that apply)\n- A) Lakehouse / Spark (Delta tables, PySpark notebooks, file ingestion)\n- B) Data Warehouse (T-SQL analytics)\n- C) Pipelines (orchestration, data movement)\n- D) KQL / Eventhouse (real-time or time-series data)\n- E) Power BI / Semantic Model (reporting layer)\n\nLoad `references/fabric-architecture.md` → Workload Landscape for downstream skill mapping.\n\n---\n\n#### Domain A — Workspace access (Constraint-based + Environmental)\n\n**Establish:** Can the operator create workspaces, or must they use existing ones?\nWhat names?\n\nQuestion: Can you create new Fabric workspaces?\n- A) Yes — I can create workspaces\n- B) No — I need to use existing workspaces\n- C) I'm not sure — I can run `fab ls` to check\n\nBranch:\n- A → ask for intended names (or placeholder); if lakehouses involved, ask whether\n medallion naming is expected (load `references/fabric-architecture.md` → Medallion)\n- B → ask for exact verbatim names of existing workspaces (case-sensitive in `fab`)\n- C → provide `fab ls` command (`pip install ms-fabric-cli` → `fab auth login` → `fab ls`); wait; branch as A or B\n\n---\n\n#### Domain B — Domain assignment (Constraint-based)\n\n**Establish:** Should workspaces be assigned to a Fabric domain?\n\nQuestion: Would you like to assign these workspaces to a Fabric domain?\n- A) Yes — assign to an existing domain (provide name)\n- B) Yes — create a new domain for these workspaces\n- C) No — skip for now\n\nBranch:\n- B → ask if they have Fabric Administrator rights (Yes / No / Unsure);\n No or Unsure → mark as manual gate, note intended domain name for documentation\n\n---\n\n#### Domain C — Access control (Environmental + Constraint-based)\n\n**Establish:** Who else needs workspace access? How will group identifiers be obtained?\n\nKey constraint: Fabric REST API requires Entra group **Object IDs** — display names\nare not accepted. Load `references/technical-constraints.md` → Entra Group Object IDs\nfor resolution methods.\n\nQuestion: Beyond yourself as Admin, does anyone else need workspace access?\n- A) No — just me for now\n- B) Yes — specific users (by email address)\n- C) Yes — Entra security groups\n- D) Yes — a mix of users and groups\n\nBranch (C or D):\n- Ask: Can you see these security groups in the Azure portal\n (Azure Active Directory → Groups)?\n - Yes → Ask: will you provide Object IDs directly, or should the agent generate\n Azure CLI lookup commands?\n - Either way: flag Object IDs as required before run; ask for group names and roles\n - No → mark group role assignment as manual gate; provide portal navigation instructions\n\nIf notebook deployment is chosen AND groups are involved: flag the `notebookutils`\nGraph limitation. Load `references/technical-constraints.md` → notebookutils and\nMicrosoft Graph. Ask whether a Service Principal is available or if the operator\nprefers to switch to PowerShell/terminal for role assignment.\n\n**Roles available:** Admin, Member, Contributor, Viewer\n\n---\n\n#### Domain D — Deployment approach (Preference-oriented)\n\n**Establish:** How does the operator prefer to run generated artefacts?\n\nKey context: all three approaches use the Fabric CLI (`fab`) internally — this is\nabout how the operator runs the generated artefacts, not whether they use the CLI.\nPowerShell and terminal approaches require **two separate logins**: `fab auth login`\n(Fabric) AND `az login` (Azure CLI, for group lookups). Load\n`references/technical-constraints.md` → Authentication for details.\n\nQuestion: How would you like to run the generated scripts or notebooks?\n- A) PySpark notebook — import into Fabric and run cell-by-cell in the Fabric UI\n- B) PowerShell script — review and run locally\n- C) Individual CLI commands — run step-by-step in the terminal\n\n---\n\n#### Domain E — Source data (Environmental)\n\n*Only ask if the process involves ingesting files.*\n\n**Establish:** Where are the source files?\n\nQuestion: Where are the source files you want to ingest?\n- A) On my local machine\n- B) Already in OneLake / Fabric (I have the path)\n- C) In cloud storage — SharePoint, Azure Blob, or similar\n\nBranch:\n- A → include upload step in plan\n- B → ask for OneLake path; skip upload\n- C → ask for source URL; include shortcut creation step\n\n---\n\n#### Domain F — Capacity (Environmental)\n\n*Ask whenever workspaces are being created.*\n\n**Establish:** What Fabric capacity will workspaces be assigned to?\n\nNote: capacity must be in Active state at creation time. Load\n`references/technical-constraints.md` → Capacity State Prerequisite if relevant.\n\nQuestion: Do you know the name of the Fabric capacity to use?\n- A) Yes — I know it (provide the name)\n- B) I can find it — I'll check via `fab ls` or the Fabric Admin portal\n- C) I'll provide it later — use a placeholder for now\n\n---\n\n#### Domain G — Environments (Constraint-based + Preference-oriented)\n\n*Ask whenever the process will run beyond a one-off or dev-only context.*\n\n**Establish:** How many environments need to be supported? This determines whether\nthe plan needs promotion logic and parameterised naming.\n\nLoad `references/fabric-architecture.md` → Environment Promotion for naming patterns.\n\nQuestion: Is this deployment for a single environment, or will it need to be\npromoted across environments?\n- A) Dev only — single environment, no promotion needed\n- B) Dev + prod — two environments, plan should parameterise workspace references\n- C) Dev + test + prod — three environments with a full promotion path\n- D) I'm not sure yet — build for single environment and we'll add promotion later\n\n---\n\n### Phase 3 — Credential management (ask if a Service Principal was flagged)\n\n*Only ask if Domain C or Domain D established that a Service Principal is needed.*\n\n**Establish:** How should SP credentials be managed in the generated artefacts?\n\nLoad `references/fabric-architecture.md` → Credential Management for options.\n\nQuestion: How would you like to handle the Service Principal credentials in the\ngenerated notebook or script?\n- A) Azure Key Vault reference — retrieve the secret at runtime from Key Vault\n- B) Runtime parameter entry — I'll paste in the values when running\n- C) Environment variable — set in my terminal session before running\n\n---\n\n### Phase 4 — Preference check\n\nAfter domains are resolved, ask one closing question if optional steps were\nidentified:\n\nQuestion: For optional steps (e.g. domain assignment, access control), would you\nprefer to include everything now or keep it minimal and add governance steps later?\n- A) Include everything — complete setup now\n- B) Keep it minimal — flag optional steps as manual for later\n- C) Decide step by step — confirm each optional item as we go\n\n---\n\n## Confirmation\n\nPresent a summary table before writing the profile. Include the FATA dimension for\neach item. Ask for explicit confirmation. If gaps remain, ask only the targeted\nfollow-up needed.\n\n```\n| # | Dimension | Question | Answer | What this means |\n|---|---------------|---------------------|-----------------------------|----------------------------------------------|\n| 0 | Contextual | Project context | New setup | No existing conventions to inherit |\n| A | Environmental | Workspace creation | Creating new | Agent creates workspaces |\n| B | Constraint | Domain assignment | New (manual gate) | Flagged manual — Fabric Admin rights needed |\n| C | Environmental | Access control | Groups — IDs direct | IDs required before run |\n| D | Preference | Deployment | PySpark notebook | .ipynb generated for Fabric import |\n| F | Environmental | Capacity | ldifabricdev | Embedded in notebook |\n| G | Constraint | Environments | Dev + prod | Plan parameterises all workspace references |\n```\n\n---\n\n## Output\n\nSave as `00-environment-discovery/environment-profile.md`. Include:\n- All path decisions (with FATA dimension)\n- Collected parameter values\n- Parameters flagged as required before execution (with retrieval instructions)\n- Manual gates with reason and operator instructions\n- Deployment prerequisites (auth steps, CLI installation)\n- Contextual/historical notes affecting naming or structure\n\nAppend to `CHANGE_LOG.md`:\n`[{DATETIME}] Sub-Agent 0 complete — environment-profile.md produced. [N] path decisions recorded. Manual gates: [list or none]. Parameters still needed: [list or none].`\n\n---\n\n## Gotchas\n\n- **Never frame deployment as CLI vs no-CLI** — all three approaches use `fab`\n- **`az login` and `fab auth login` are separate** — both required for PowerShell/terminal deployments that include group lookups\n- **Workspace names are case-sensitive** — confirm exact casing from `fab ls` output\n- **Entra group Object IDs required** — display names rejected by Fabric API; see `references/technical-constraints.md`\n- **`notebookutils` cannot query Microsoft Graph** — notebook + groups = SP or pre-resolved IDs required\n- **Domain creation = Fabric Admin rights** — not workspace-level; default to skip if uncertain\n- **Never collect credential values** — flag that they are needed; operator enters at runtime\n- **Stop when path decisions are resolved** — do not continue asking once the plan structure is clear\n",
|
|
187
|
+
},
|
|
188
|
+
{
|
|
189
|
+
relativePath: "references/fabric-architecture.md",
|
|
190
|
+
content: "# Fabric Architecture Reference\n\nLoad this file when questions arise about workload scope, environment promotion,\nmedallion architecture, or credential management patterns.\n\n---\n\n## Fabric Workload Landscape\n\nUnderstanding which workloads a process involves determines which downstream skills\nare needed and what environment questions are relevant.\n\n| Workload | Primary use | Downstream skill |\n|---|---|---|\n| **Lakehouse / Spark** | Delta tables, PySpark notebooks, file ingestion | spark-authoring-cli |\n| **Data Warehouse** | T-SQL analytics, structured serving layer | sqldw-authoring-cli |\n| **Pipelines** | Orchestration, data movement between workloads | Fabric Data Factory |\n| **KQL / Eventhouse** | Real-time and time-series analytics | eventhouse-authoring-cli |\n| **Power BI / Semantic Model** | Reporting layer, DAX, XMLA | powerbi-authoring-cli |\n| **Data Science / Agents** | ML models, conversational data agents | Fabric Data Science |\n\nMost process workflows involve a subset of these. Establishing workload scope early\nlets the plan delegate correctly and avoids discovering scope gaps mid-execution.\n\n**Ask about workload scope when:** requirements mention more than one of the above,\nor when the process spans ingestion → transformation → reporting (full pipeline).\n\n---\n\n## Medallion Architecture (Bronze / Silver / Gold)\n\nThe standard Fabric data engineering pattern organises data into three layers:\n\n| Layer | Contains | Format |\n|---|---|---|\n| **Bronze** | Raw ingested data — unmodified | Delta tables, files |\n| **Silver** | Validated, cleaned, conformed data | Delta tables |\n| **Gold** | Aggregated, business-ready data | Delta tables, views |\n\n**Why it matters for discovery:**\n- Lakehouse naming conventions typically reflect the layer\n (e.g. `lh_bronze`, `lh_silver`, `lh_gold`)\n- Shortcut and schema structures differ by layer\n- Pipelines must include validation gates between Bronze→Silver and Silver→Gold\n transitions — omitting these creates hard-to-debug data quality issues\n\n**Ask about medallion pattern when:** requirements involve lakehouses, ingestion,\nor transformation steps. The operator may not use the bronze/silver/gold naming —\nask whether they follow this pattern or have an existing naming convention.\n\n---\n\n## Environment Promotion (Dev / Test / Prod)\n\nThe FabricDataEngineer agent mandates explicit environment parameterisation.\nOne-off implementation choices that cannot be promoted across environments are\nexplicitly avoided.\n\n**What this means for discovery:**\n\n| Scenario | Plan impact |\n|---|---|\n| Dev only | Single workspace set; no promotion logic needed |\n| Dev + prod | Two workspace sets; plan must parameterise all workspace/lakehouse references |\n| Dev + test + prod | Three sets; deployment pipeline or scripted promotion required |\n\nWhen multiple environments are in scope:\n- Workspace names should follow a consistent pattern (e.g. `[Name]-Dev`, `[Name]-Prod`)\n- All IDs and names must be externalised — never hardcoded\n- The environment profile should record the naming pattern for each environment\n\n**Ask about environments when:** the process will run in production, or when the\noperator mentions promotion, CI/CD, or deploying to other teams.\n\n---\n\n## Credential Management\n\nCredentials required by Fabric processes (Service Principal secrets, storage keys,\nAPI tokens) should never be hardcoded in notebooks or scripts.\n\n| Method | Best for | Notes |\n|---|---|---|\n| **Azure Key Vault** | Production environments | Requires Key Vault resource + permissions |\n| **Notebook parameters** | Development / interactive runs | Operator enters at runtime; not stored |\n| **Environment variables** | Local PowerShell/terminal scripts | Set in shell session; not persisted |\n| **Fabric environment secrets** | Shared Spark environments | Requires Fabric environment configuration |\n\n**During discovery:** Do not collect credential values. If the plan requires a\nService Principal or storage credential, ask how the operator wants to manage it —\nKey Vault reference, runtime parameter entry, or environment variable. Record the\napproach in the environment profile so generated notebooks and scripts use the\ncorrect pattern.\n\n---\n\n## Developer vs Consumer Patterns\n\nUnderstanding the operator's role prevents over-engineering the plan:\n\n**Developers** (building pipelines, creating artefacts):\n- Use Fabric REST APIs for creating/managing workspaces, lakehouses, notebooks\n- Use protocol-specific connections for data access (Spark, ODBC/JDBC, XMLA, KQL)\n- Relevant to this skill — discovery is aimed at developers\n\n**Consumers** (querying data, running reports):\n- Use MCP servers or Fabric UI for natural language / report access\n- Typically do not need workspace creation or deployment scripts\n- If the operator is a consumer, scope the plan accordingly\n",
|
|
191
|
+
},
|
|
192
|
+
{
|
|
193
|
+
relativePath: "references/technical-constraints.md",
|
|
194
|
+
content: "# Technical Constraints Reference\n\nLoad this file when an operator's answer raises a technical question about\nauthentication, API limitations, or Fabric-specific constraints.\n\n---\n\n## Authentication — Two Separate Steps\n\nFabric CLI and Azure CLI use **different authentication sessions**. Both are\nrequired whenever the deployment involves Azure CLI lookups (e.g. resolving\nEntra group Object IDs) alongside Fabric CLI workspace operations.\n\n| Tool | Login command | Used for |\n|---|---|---|\n| Fabric CLI (`fab`) | `fab auth login` | Workspace creation, role assignment, lakehouse ops |\n| Azure CLI (`az`) | `az login` | Entra group/user Object ID resolution |\n\nOperators who choose PowerShell or terminal deployment must complete **both** logins\nbefore running the generated scripts. The generated artefacts will include both\ncommands with a clear note that they are separate.\n\nFor PySpark notebooks inside Fabric: a Power BI / Fabric scoped token is obtained\nautomatically at runtime — no manual login needed. **This is a discovery context\nnote about authentication flow, not a pattern for generating notebook content.**\nWorkspace creation notebooks use `fab` CLI commands via Python subprocess for all\noperations; `notebookutils` is not used for workspace or role assignment logic.\nThe token scope covers Fabric REST APIs only (see below).\n\n---\n\n## Entra Group Object IDs\n\nThe Fabric REST API and Fabric CLI require **Object IDs (GUIDs)** for group role\nassignment — display names are not accepted. This is a hard API constraint.\n\nResolution options for operators:\n\n| Method | Command | Requires |\n|---|---|---|\n| Azure portal | AAD → Groups → select → Object ID field | Portal access |\n| Azure CLI | `az ad group show --group \"Name\" --query id -o tsv` | `az login` |\n| PowerShell (Graph) | `Get-MgGroup -Filter \"displayName eq 'Name'\" \\| Select-Object Id` | Microsoft.Graph module |\n\nAlways ask operators to confirm group display names exactly as they appear in AAD —\nnames are case-sensitive in the API.\n\n---\n\n## `notebookutils` and Microsoft Graph\n\n`notebookutils.credentials.getToken('pbi')` inside a Fabric notebook returns a\nPower BI / Fabric scoped token. It **cannot** obtain a Microsoft Graph token.\n\nThis means a Fabric notebook **cannot**:\n- Look up Entra group Object IDs at runtime\n- Query AAD for user or group information\n- Call any Graph API endpoint\n\n**Consequence:** If the deployment approach is a PySpark notebook AND the plan\nincludes Entra group role assignment, one of these must be true before the notebook runs:\n- The operator provides Object IDs directly (entered into a parameter cell)\n- Object IDs are resolved via Azure CLI or PowerShell beforehand and passed in\n\nIf neither is practical, steer the operator toward PowerShell or terminal deployment\nfor the role assignment step — both support `az login` → Graph lookups inline.\n\n---\n\n## Service Principal — When Required\n\nA Service Principal with application permissions is required only when a Fabric\nnotebook needs to call Microsoft Graph at runtime. This applies when:\n- Deployment = PySpark notebook\n- Role assignment includes Entra groups\n- Operator wants ID resolution to happen inside the notebook automatically\n\nRequired SP permissions: `Group.Read.All` + `User.Read.All` (application, not delegated),\nwith admin consent granted in Azure AD.\n\n**During discovery:** Do not ask for SP credentials. Record that one is required,\nnote the permissions needed, and flag credential management as a runtime concern\n(see `fabric-architecture.md` → Credential Management).\n\n---\n\n## Workspace Name Case Sensitivity\n\nWorkspace names in `fab` paths are case-sensitive. `fab ls` returns exact names —\nalways confirm the operator is using the verbatim casing from that output.\n\nCommon failure: workspace names with leading/trailing spaces, or names that differ\nonly in capitalisation (e.g. `Finance Hub` vs `finance hub`).\n\n---\n\n## Capacity State Prerequisite\n\nA Fabric workspace must be assigned to an **Active** capacity at creation time.\nIf the capacity is paused, workspace creation will fail with `CapacityNotInActiveState`.\n\nThe operator must resume the capacity in the Azure portal before running the\nworkspace creation step. Flag this in the environment profile if there is any\nuncertainty about capacity state.\n",
|
|
187
195
|
},
|
|
188
196
|
],
|
|
189
197
|
},
|
|
@@ -193,7 +201,7 @@ export const EMBEDDED_SKILLS = [
|
|
|
193
201
|
files: [
|
|
194
202
|
{
|
|
195
203
|
relativePath: "SKILL.md",
|
|
196
|
-
content: "---\r\nname: generate-fabric-workspace\r\ndescription: >\r\n Use this skill when asked to create, provision, or set up a Microsoft Fabric\r\n workspace. Triggers on: \"create a Fabric workspace\", \"provision a workspace\r\n in Fabric\", \"set up a new Fabric workspace\", \"generate a workspace with\r\n capacity and permissions\", \"create workspace and assign roles in Fabric\".\r\n Collects workspace name, capacity, principals/roles, and optional domain\r\n settings, then creates the workspace using one of three approaches: PySpark\r\n notebook, PowerShell script, or interactive terminal commands. Produces a\r\n workspace definition markdown as a creation audit record. Does NOT trigger\r\n for general Fabric questions, item creation within a workspace, or\r\n workspace deletion tasks.\r\nlicense: MIT\r\ncompatibility: >\r\n ms-fabric-cli required (pip install ms-fabric-cli). Approach 1 requires a\r\n Fabric notebook environment. Approaches 2 and 3 require fab CLI installed\r\n locally with network access to Microsoft Fabric.\r\n---\r\n\r\n# Generate Fabric Workspace\r\n\r\n> ⚠️ **GOVERNANCE**: This skill produces notebooks and scripts for the operator to\r\n> review and run — it never executes commands directly against a live Fabric environment.\r\n> Present each generated artefact to the operator before they run it.\r\n\r\nCreates a Microsoft Fabric workspace assigned to a specified capacity, with\r\naccess roles and optional domain assignment. If the workspace already exists,\r\ncreation is skipped and roles/domain are updated. Outputs a workspace\r\ndefinition markdown as an audit trail.\r\n\r\n## Step 1 — Choose Approach\r\n\r\nAsk the user:\r\n\r\n> \"Which approach would you like to use?\r\n> 1. **PySpark Notebook** — generates a notebook to run inside Fabric\r\n> (authenticated automatically via the notebook environment)\r\n> 2. **PowerShell Script** — generates a `.ps1` for your review before execution\r\n> (requires fab CLI installed locally)\r\n> 3. **Interactive Terminal** — runs fab CLI commands one by one in the terminal,\r\n> with your confirmation at each step (requires fab CLI installed locally)\"\r\n\r\n### Authentication by approach\r\n\r\n| Approach | Authentication |\r\n|---|---|\r\n| PySpark Notebook | Auto via `notebookutils.credentials.getToken('pbi')` inside Fabric |\r\n| PowerShell / Terminal | `fab auth login` (browser pop-up) or set `$env:FAB_TOKEN` / `FAB_TOKEN` |\r\n\r\n## Step 2 — Domain Handling\r\n\r\nAsk the user:\r\n\r\n> \"Would you like to:\r\n> A. **Create a new domain** and assign the workspace to it\r\n> ⚠️ Requires **Fabric Admin** tenant-level permissions.\r\n> You will also need to specify an **Entra group** that will be allowed to\r\n> add/remove workspaces from this domain (the domain contributor group).\r\n> B. **Assign the workspace to an existing domain**\r\n> C. **Skip domain assignment**\"\r\n\r\n- If **A**: collect `DOMAIN_NAME` and `DOMAIN_CONTRIBUTOR_GROUP` (the Entra\r\n group display name allowed to add/remove workspaces from the domain). Confirm\r\n the user has Fabric Admin rights.\r\n- If **B**: collect `DOMAIN_NAME` only.\r\n- If **C**: no domain parameters needed.\r\n\r\n## Step 3 — Collect Parameters\r\n\r\nCollect these values from the user:\r\n\r\n| Parameter | Required | Description |\r\n|---|---|---|\r\n| `WORKSPACE_NAME` | Yes | Display name for the workspace |\r\n| `CAPACITY_NAME` | Yes | Exact name of the Fabric capacity to assign |\r\n| `DOMAIN_NAME` | If A or B | Name of the domain (new or existing) |\r\n| `DOMAIN_CONTRIBUTOR_GROUP` | If A | Display name of the Entra group that manages the domain |\r\n| `WORKSPACE_ROLES` | Conditional | Additional principals + roles (see approach-specific guidance below) |\r\n\r\n### Workspace roles — approach-specific guidance\r\n\r\nThe workspace creator is **automatically assigned as Admin**. Before collecting\r\nadditional roles, ask:\r\n\r\n> \"You (the creator) will be automatically assigned as workspace Admin. Do you\r\n> want to assign additional roles to other users or groups?\"\r\n\r\nIf **no**, skip role collection entirely. If **yes**, load\r\n`references/role-assignment.md` for approach-specific guidance on collecting\r\nprincipals, group resolution requirements, and Service Principal prerequisites.\r\n\r\nFor each additional principal, collect:\r\n- User **email address (UPN)** or Entra **group display name** — do NOT ask for Object IDs\r\n- Principal type: `User` or `Group` (or `ServicePrincipal`)\r\n- Role: `Admin`, `Member`, `Contributor`, or `Viewer`\r\n\r\n## Step 4 — Execute\r\n\r\n### Approach 1: PySpark Notebook\r\n\r\nIf role assignment includes Entra groups, `TENANT_ID`, `CLIENT_ID`, and `CLIENT_SECRET`\r\nare required — entered directly into Cell 1 of the generated notebook. See\r\n`references/role-assignment.md` for prerequisite details.\r\n\r\nRun `scripts/generate_notebook.py` with the collected parameters:\r\n\r\n```bash\r\npython scripts/generate_notebook.py \\\r\n --workspace-name \"WORKSPACE_NAME\" \\\r\n --capacity-name \"CAPACITY_NAME\" \\\r\n --roles \"user@corp.com:User:Admin,Finance Team:Group:Member\" \\\r\n [--domain-name \"DOMAIN_NAME\"] \\\r\n [--create-domain] \\\r\n [--domain-contributor-group \"DOMAIN_CONTRIBUTOR_GROUP\"] \\\r\n --output workspace_setup.ipynb\r\n```\r\n\r\nPresent the generated `workspace_setup.ipynb` to the user and instruct them to:\r\n1. Upload to any Fabric workspace as a notebook\r\n2. Run each cell **one at a time**, reading the output before proceeding\r\n3. ✅ Verification cells are clearly marked — confirm output before moving on\r\n4. Share the output of Cell 7 (`fab ls`) and Cell 9 (`fab acl ls`)\r\n\r\n### Approach 2: PowerShell Script\r\n\r\nRun `scripts/generate_ps1.py` with the collected parameters:\r\n\r\n```bash\r\npython scripts/generate_ps1.py \\\r\n --workspace-name \"WORKSPACE_NAME\" \\\r\n --capacity-name \"CAPACITY_NAME\" \\\r\n --roles \"user@corp.com:User:Admin,Finance Team:Group:Member\" \\\r\n [--domain-name \"DOMAIN_NAME\"] \\\r\n [--create-domain] \\\r\n [--domain-contributor-group \"DOMAIN_CONTRIBUTOR_GROUP\"] \\\r\n --output workspace_setup.ps1\r\n```\r\n\r\nShow `workspace_setup.ps1` to the user for review. **Do not execute until the\r\nuser confirms.** Then run:\r\n\r\n```powershell\r\n.\\workspace_setup.ps1\r\n```\r\n\r\n### Approach 3: Interactive Terminal\r\n\r\nRun these commands in sequence. Show output after each and ask the user to\r\nconfirm before continuing.\r\n\r\n**Install and authenticate:**\r\n```bash\r\npip install ms-fabric-cli\r\nfab auth login\r\n```\r\n\r\n**Check if workspace already exists:**\r\n```bash\r\nfab exists \"WORKSPACE_NAME.Workspace\"\r\n```\r\n- Exit code 0 → workspace exists → skip creation, go to role assignment\r\n- Non-zero → proceed to create\r\n\r\n**Create workspace:**\r\n```bash\r\nfab mkdir \"WORKSPACE_NAME.Workspace\" -P capacityName=CAPACITY_NAME\r\n```\r\n\r\n**Verify creation:**\r\n```bash\r\nfab exists \"WORKSPACE_NAME.Workspace\"\r\nfab ls \"WORKSPACE_NAME.Workspace\"\r\n```\r\n\r\n**Resolve principal IDs** (before assigning roles — repeat for each principal):\r\n```bash\r\n# For a user (by UPN / email):\r\naz ad user show --id user@corp.com --query id -o tsv\r\n\r\n# For a group (by display name):\r\naz ad group show --group \"Finance Team\" --query id -o tsv\r\n\r\n# For a service principal (by display name or app ID):\r\naz ad sp show --id \"My App Name\" --query id -o tsv\r\n```\r\n\r\n**Assign roles** (use the resolved Object ID, role in lowercase):\r\n```bash\r\nfab acl set \"WORKSPACE_NAME.Workspace\" -I <RESOLVED_OBJECT_ID> -R role\r\n```\r\n\r\n**Verify roles:**\r\n```bash\r\nfab acl ls \"WORKSPACE_NAME.Workspace\"\r\n```\r\n\r\n**Create domain** (if Step 2 = A):\r\n```bash\r\n# Resolve domain contributor group ID:\r\naz ad group show --group \"DOMAIN_CONTRIBUTOR_GROUP\" --query id -o tsv\r\n\r\nfab mkdir \"DOMAIN_NAME.domain\"\r\nfab acl set \".domains/DOMAIN_NAME.Domain\" -I <RESOLVED_GROUP_ID> -R contributor\r\n```\r\n\r\n**Assign workspace to domain** (if Step 2 = A or B):\r\n```bash\r\nfab assign \".domains/DOMAIN_NAME.Domain\" -W \"WORKSPACE_NAME.Workspace\"\r\n```\r\n\r\n## Step 5 — Generate Workspace Definition\r\n\r\nCollect from the command output (or ask the user):\r\n- Workspace ID (appears in `fab ls` output)\r\n- Tenant name or tenant ID\r\n- Confirmed principals and roles\r\n- Domain name (if assigned)\r\n\r\nRun `scripts/generate_definition.py`:\r\n\r\n```bash\r\npython scripts/generate_definition.py \\\r\n --workspace-name \"WORKSPACE_NAME\" \\\r\n --workspace-id \"WORKSPACE_ID\" \\\r\n --capacity-name \"CAPACITY_NAME\" \\\r\n --tenant \"TENANT_NAME\" \\\r\n --roles \"user@corp.com:User:Admin,Finance Team:Group:Member\" \\\r\n [--domain-name \"DOMAIN_NAME\"] \\\r\n --approach \"notebook|powershell|terminal\" \\\r\n --output workspace_definition.md\r\n```\r\n\r\nPresent `workspace_definition.md` to the user.\r\n\r\n## Gotchas\r\n\r\n- Workspace path format is `WorkspaceName.Workspace` — the `.Workspace` suffix is required.\r\n- The capacity must be **Active** before `fab mkdir`. If you see `CapacityNotInActiveState`,\r\n ask the user to resume the capacity in the Azure portal before retrying.\r\n- `notebookutils.credentials.getToken()` in Fabric notebooks **does not support Microsoft Graph**.\r\n The notebook approach requires a Service Principal with `Group.Read.All` + `User.Read.All`\r\n application permissions and admin consent. The SP credentials are entered in Cell 1 of\r\n the generated notebook. If the user doesn't have an SP, direct them to the PowerShell\r\n or Interactive Terminal approach instead.\r\n- Domain creation requires Fabric Administrator tenant-level rights. If the user cannot\r\n create a domain, fall back to assigning an existing one or skipping.\r\n- `fab exists` uses exit code (0 = exists, non-zero = not found) — do not rely on stdout text alone.\r\n- In the notebook approach, `notebookutils` is only available inside a Fabric notebook.\r\n The generated script must not be run as a plain Python script outside Fabric.\r\n- The `.domain` suffix (lowercase) is used in `fab mkdir`; `.Domain` (capitalised) is\r\n used in `fab assign` and `fab acl set` — these are different and both matter.\r\n- Role values passed to `fab acl set` must be **lowercase** (`admin`, `member`, `contributor`, `viewer`).\r\n The scripts handle this conversion automatically.\r\n- For PowerShell/terminal approaches, `az login` must be completed before `az ad user/group show` will work.\r\n This is separate from `fab auth login` — both are required.\r\n\r\n## Available Scripts\r\n\r\n- **`scripts/generate_notebook.py`** — Generates PySpark notebook. Run: `python scripts/generate_notebook.py --help`\r\n- **`scripts/generate_ps1.py`** — Generates PowerShell script. Run: `python scripts/generate_ps1.py --help`\r\n- **`scripts/generate_definition.py`** — Generates workspace definition markdown. Run: `python scripts/generate_definition.py --help`\r\n\r\n## Available References\r\n\r\n- **`references/role-assignment.md`** — Approach-specific guidance for assigning roles to users and Entra groups. Load when user wants to assign additional workspace roles.\r\n- **`references/fabric-cli-reference.md`** — Fabric CLI command reference.\r\n",
|
|
204
|
+
content: "---\r\nname: generate-fabric-workspace\r\ndescription: >\r\n Use this skill when asked to create, provision, or set up a Microsoft Fabric\r\n workspace. Triggers on: \"create a Fabric workspace\", \"provision a workspace\r\n in Fabric\", \"set up a new Fabric workspace\", \"generate a workspace with\r\n capacity and permissions\", \"create workspace and assign roles in Fabric\".\r\n Collects workspace name, capacity, principals/roles, and optional domain\r\n settings, then creates the workspace using one of three approaches: PySpark\r\n notebook, PowerShell script, or interactive terminal commands. Produces a\r\n workspace definition markdown as a creation audit record. Does NOT trigger\r\n for general Fabric questions, item creation within a workspace, or\r\n workspace deletion tasks.\r\nlicense: MIT\r\ncompatibility: >\r\n ms-fabric-cli required (pip install ms-fabric-cli). Approach 1 requires a\r\n Fabric notebook environment. Approaches 2 and 3 require fab CLI installed\r\n locally with network access to Microsoft Fabric.\r\n---\r\n\r\n# Generate Fabric Workspace\r\n\r\n> ⚠️ **GOVERNANCE**: This skill produces notebooks and scripts for the operator to\r\n> review and run — it never executes commands directly against a live Fabric environment.\r\n> Present each generated artefact to the operator before they run it.\r\n>\r\n> ⚠️ **GENERATION**: Always run the generator scripts (`scripts/generate_notebook.py`,\r\n> `scripts/generate_ps1.py`) via Bash to produce artefacts — never generate notebook\r\n> or script content directly. Do not present generator scripts themselves as outputs.\r\n> Workspace notebooks use `fab` CLI commands via Python subprocess for all operations;\r\n> `notebookutils` is used only for the authentication token step, not for workspace\r\n> or role assignment logic.\r\n\r\nCreates a Microsoft Fabric workspace assigned to a specified capacity, with\r\naccess roles and optional domain assignment. If the workspace already exists,\r\ncreation is skipped and roles/domain are updated. Outputs a workspace\r\ndefinition markdown as an audit trail.\r\n\r\n## Step 1 — Choose Approach\r\n\r\nAsk the user:\r\n\r\n> \"Which approach would you like to use?\r\n> 1. **PySpark Notebook** — generates a notebook to run inside Fabric\r\n> (authenticated automatically via the notebook environment)\r\n> 2. **PowerShell Script** — generates a `.ps1` for your review before execution\r\n> (requires fab CLI installed locally)\r\n> 3. **Interactive Terminal** — runs fab CLI commands one by one in the terminal,\r\n> with your confirmation at each step (requires fab CLI installed locally)\"\r\n\r\n### Authentication by approach\r\n\r\n| Approach | Authentication |\r\n|---|---|\r\n| PySpark Notebook | Auto via `notebookutils.credentials.getToken('pbi')` inside Fabric |\r\n| PowerShell / Terminal | `fab auth login` (browser pop-up) or set `$env:FAB_TOKEN` / `FAB_TOKEN` |\r\n\r\n## Step 2 — Domain Handling\r\n\r\nAsk the user:\r\n\r\n> \"Would you like to:\r\n> A. **Create a new domain** and assign the workspace to it\r\n> ⚠️ Requires **Fabric Admin** tenant-level permissions.\r\n> You will also need to specify an **Entra group** that will be allowed to\r\n> add/remove workspaces from this domain (the domain contributor group).\r\n> B. **Assign the workspace to an existing domain**\r\n> C. **Skip domain assignment**\"\r\n\r\n- If **A**: collect `DOMAIN_NAME` and `DOMAIN_CONTRIBUTOR_GROUP` (the Entra\r\n group display name allowed to add/remove workspaces from the domain). Confirm\r\n the user has Fabric Admin rights.\r\n- If **B**: collect `DOMAIN_NAME` only.\r\n- If **C**: no domain parameters needed.\r\n\r\n## Step 3 — Collect Parameters\r\n\r\nCollect these values from the user:\r\n\r\n| Parameter | Required | Description |\r\n|---|---|---|\r\n| `WORKSPACE_NAME` | Yes | Display name for the workspace |\r\n| `CAPACITY_NAME` | Yes | Exact name of the Fabric capacity to assign |\r\n| `DOMAIN_NAME` | If A or B | Name of the domain (new or existing) |\r\n| `DOMAIN_CONTRIBUTOR_GROUP` | If A | Display name of the Entra group that manages the domain |\r\n| `WORKSPACE_ROLES` | Conditional | Additional principals + roles (see approach-specific guidance below) |\r\n\r\n### Workspace roles — approach-specific guidance\r\n\r\nThe workspace creator is **automatically assigned as Admin**. Before collecting\r\nadditional roles, ask:\r\n\r\n> \"You (the creator) will be automatically assigned as workspace Admin. Do you\r\n> want to assign additional roles to other users or groups?\"\r\n\r\nIf **no**, skip role collection entirely. If **yes**, load\r\n`references/role-assignment.md` for approach-specific guidance on collecting\r\nprincipals, group resolution requirements, and Service Principal prerequisites.\r\n\r\nFor each additional principal, collect:\r\n- User **email address (UPN)** or Entra **group display name** — do NOT ask for Object IDs\r\n- Principal type: `User` or `Group` (or `ServicePrincipal`)\r\n- Role: `Admin`, `Member`, `Contributor`, or `Viewer`\r\n\r\n## Step 4 — Execute\r\n\r\n### Approach 1: PySpark Notebook\r\n\r\nIf role assignment includes Entra groups, `TENANT_ID`, `CLIENT_ID`, and `CLIENT_SECRET`\r\nare required — entered directly into Cell 1 of the generated notebook. See\r\n`references/role-assignment.md` for prerequisite details.\r\n\r\nRun `scripts/generate_notebook.py` with the collected parameters:\r\n\r\n```bash\r\npython scripts/generate_notebook.py \\\r\n --workspace-name \"WORKSPACE_NAME\" \\\r\n --capacity-name \"CAPACITY_NAME\" \\\r\n --roles \"user@corp.com:User:Admin,Finance Team:Group:Member\" \\\r\n [--domain-name \"DOMAIN_NAME\"] \\\r\n [--create-domain] \\\r\n [--domain-contributor-group \"DOMAIN_CONTRIBUTOR_GROUP\"] \\\r\n --output workspace_setup.ipynb\r\n```\r\n\r\nPresent the generated `workspace_setup.ipynb` to the user and instruct them to:\r\n1. Upload to any Fabric workspace as a notebook\r\n2. Run each cell **one at a time**, reading the output before proceeding\r\n3. ✅ Verification cells are clearly marked — confirm output before moving on\r\n4. Share the output of Cell 7 (`fab ls`) and Cell 9 (`fab acl ls`)\r\n\r\n### Approach 2: PowerShell Script\r\n\r\nRun `scripts/generate_ps1.py` with the collected parameters:\r\n\r\n```bash\r\npython scripts/generate_ps1.py \\\r\n --workspace-name \"WORKSPACE_NAME\" \\\r\n --capacity-name \"CAPACITY_NAME\" \\\r\n --roles \"user@corp.com:User:Admin,Finance Team:Group:Member\" \\\r\n [--domain-name \"DOMAIN_NAME\"] \\\r\n [--create-domain] \\\r\n [--domain-contributor-group \"DOMAIN_CONTRIBUTOR_GROUP\"] \\\r\n --output workspace_setup.ps1\r\n```\r\n\r\nShow `workspace_setup.ps1` to the user for review. **Do not execute until the\r\nuser confirms.** Then run:\r\n\r\n```powershell\r\n.\\workspace_setup.ps1\r\n```\r\n\r\n### Approach 3: Interactive Terminal\r\n\r\nRun these commands in sequence. Show output after each and ask the user to\r\nconfirm before continuing.\r\n\r\n**Install and authenticate:**\r\n```bash\r\npip install ms-fabric-cli\r\nfab auth login\r\n```\r\n\r\n**Check if workspace already exists:**\r\n```bash\r\nfab exists \"WORKSPACE_NAME.Workspace\"\r\n```\r\n- Exit code 0 → workspace exists → skip creation, go to role assignment\r\n- Non-zero → proceed to create\r\n\r\n**Create workspace:**\r\n```bash\r\nfab mkdir \"WORKSPACE_NAME.Workspace\" -P capacityName=CAPACITY_NAME\r\n```\r\n\r\n**Verify creation:**\r\n```bash\r\nfab exists \"WORKSPACE_NAME.Workspace\"\r\nfab ls \"WORKSPACE_NAME.Workspace\"\r\n```\r\n\r\n**Resolve principal IDs** (before assigning roles — repeat for each principal):\r\n```bash\r\n# For a user (by UPN / email):\r\naz ad user show --id user@corp.com --query id -o tsv\r\n\r\n# For a group (by display name):\r\naz ad group show --group \"Finance Team\" --query id -o tsv\r\n\r\n# For a service principal (by display name or app ID):\r\naz ad sp show --id \"My App Name\" --query id -o tsv\r\n```\r\n\r\n**Assign roles** (use the resolved Object ID, role in lowercase):\r\n```bash\r\nfab acl set \"WORKSPACE_NAME.Workspace\" -I <RESOLVED_OBJECT_ID> -R role\r\n```\r\n\r\n**Verify roles:**\r\n```bash\r\nfab acl ls \"WORKSPACE_NAME.Workspace\"\r\n```\r\n\r\n**Create domain** (if Step 2 = A):\r\n```bash\r\n# Resolve domain contributor group ID:\r\naz ad group show --group \"DOMAIN_CONTRIBUTOR_GROUP\" --query id -o tsv\r\n\r\nfab mkdir \"DOMAIN_NAME.domain\"\r\nfab acl set \".domains/DOMAIN_NAME.Domain\" -I <RESOLVED_GROUP_ID> -R contributor\r\n```\r\n\r\n**Assign workspace to domain** (if Step 2 = A or B):\r\n```bash\r\nfab assign \".domains/DOMAIN_NAME.Domain\" -W \"WORKSPACE_NAME.Workspace\"\r\n```\r\n\r\n## Step 5 — Generate Workspace Definition\r\n\r\nCollect from the command output (or ask the user):\r\n- Workspace ID (appears in `fab ls` output)\r\n- Tenant name or tenant ID\r\n- Confirmed principals and roles\r\n- Domain name (if assigned)\r\n\r\nRun `scripts/generate_definition.py`:\r\n\r\n```bash\r\npython scripts/generate_definition.py \\\r\n --workspace-name \"WORKSPACE_NAME\" \\\r\n --workspace-id \"WORKSPACE_ID\" \\\r\n --capacity-name \"CAPACITY_NAME\" \\\r\n --tenant \"TENANT_NAME\" \\\r\n --roles \"user@corp.com:User:Admin,Finance Team:Group:Member\" \\\r\n [--domain-name \"DOMAIN_NAME\"] \\\r\n --approach \"notebook|powershell|terminal\" \\\r\n --output workspace_definition.md\r\n```\r\n\r\nPresent `workspace_definition.md` to the user.\r\n\r\n## Gotchas\r\n\r\n- Workspace path format is `WorkspaceName.Workspace` — the `.Workspace` suffix is required.\r\n- The capacity must be **Active** before `fab mkdir`. If you see `CapacityNotInActiveState`,\r\n ask the user to resume the capacity in the Azure portal before retrying.\r\n- `notebookutils.credentials.getToken()` in Fabric notebooks **does not support Microsoft Graph**.\r\n The notebook approach requires a Service Principal with `Group.Read.All` + `User.Read.All`\r\n application permissions and admin consent. The SP credentials are entered in Cell 1 of\r\n the generated notebook. If the user doesn't have an SP, direct them to the PowerShell\r\n or Interactive Terminal approach instead.\r\n- Domain creation requires Fabric Administrator tenant-level rights. If the user cannot\r\n create a domain, fall back to assigning an existing one or skipping.\r\n- `fab exists` uses exit code (0 = exists, non-zero = not found) — do not rely on stdout text alone.\r\n- In the notebook approach, `notebookutils` is only available inside a Fabric notebook.\r\n The generated script must not be run as a plain Python script outside Fabric.\r\n- The `.domain` suffix (lowercase) is used in `fab mkdir`; `.Domain` (capitalised) is\r\n used in `fab assign` and `fab acl set` — these are different and both matter.\r\n- Role values passed to `fab acl set` must be **lowercase** (`admin`, `member`, `contributor`, `viewer`).\r\n The scripts handle this conversion automatically.\r\n- For PowerShell/terminal approaches, `az login` must be completed before `az ad user/group show` will work.\r\n This is separate from `fab auth login` — both are required.\r\n\r\n## Available Scripts\r\n\r\n- **`scripts/generate_notebook.py`** — Generates PySpark notebook. Run: `python scripts/generate_notebook.py --help`\r\n- **`scripts/generate_ps1.py`** — Generates PowerShell script. Run: `python scripts/generate_ps1.py --help`\r\n- **`scripts/generate_definition.py`** — Generates workspace definition markdown. Run: `python scripts/generate_definition.py --help`\r\n\r\n## Available References\r\n\r\n- **`references/role-assignment.md`** — Approach-specific guidance for assigning roles to users and Entra groups. Load when user wants to assign additional workspace roles.\r\n- **`references/fabric-cli-reference.md`** — Fabric CLI command reference.\r\n",
|
|
197
205
|
},
|
|
198
206
|
{
|
|
199
207
|
relativePath: "references/fabric-cli-reference.md",
|