pvw-cli 1.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pvw-cli might be problematic. Click here for more details.
- purviewcli/__init__.py +27 -0
- purviewcli/__main__.py +15 -0
- purviewcli/cli/__init__.py +5 -0
- purviewcli/cli/account.py +199 -0
- purviewcli/cli/cli.py +170 -0
- purviewcli/cli/collections.py +502 -0
- purviewcli/cli/domain.py +361 -0
- purviewcli/cli/entity.py +2436 -0
- purviewcli/cli/glossary.py +533 -0
- purviewcli/cli/health.py +250 -0
- purviewcli/cli/insight.py +113 -0
- purviewcli/cli/lineage.py +1103 -0
- purviewcli/cli/management.py +141 -0
- purviewcli/cli/policystore.py +103 -0
- purviewcli/cli/relationship.py +75 -0
- purviewcli/cli/scan.py +357 -0
- purviewcli/cli/search.py +527 -0
- purviewcli/cli/share.py +478 -0
- purviewcli/cli/types.py +831 -0
- purviewcli/cli/unified_catalog.py +3540 -0
- purviewcli/cli/workflow.py +402 -0
- purviewcli/client/__init__.py +21 -0
- purviewcli/client/_account.py +1877 -0
- purviewcli/client/_collections.py +1761 -0
- purviewcli/client/_domain.py +414 -0
- purviewcli/client/_entity.py +3545 -0
- purviewcli/client/_glossary.py +3233 -0
- purviewcli/client/_health.py +501 -0
- purviewcli/client/_insight.py +2873 -0
- purviewcli/client/_lineage.py +2138 -0
- purviewcli/client/_management.py +2202 -0
- purviewcli/client/_policystore.py +2915 -0
- purviewcli/client/_relationship.py +1351 -0
- purviewcli/client/_scan.py +2607 -0
- purviewcli/client/_search.py +1472 -0
- purviewcli/client/_share.py +272 -0
- purviewcli/client/_types.py +2708 -0
- purviewcli/client/_unified_catalog.py +5112 -0
- purviewcli/client/_workflow.py +2734 -0
- purviewcli/client/api_client.py +1295 -0
- purviewcli/client/business_rules.py +675 -0
- purviewcli/client/config.py +231 -0
- purviewcli/client/data_quality.py +433 -0
- purviewcli/client/endpoint.py +123 -0
- purviewcli/client/endpoints.py +554 -0
- purviewcli/client/exceptions.py +38 -0
- purviewcli/client/lineage_visualization.py +797 -0
- purviewcli/client/monitoring_dashboard.py +712 -0
- purviewcli/client/rate_limiter.py +30 -0
- purviewcli/client/retry_handler.py +125 -0
- purviewcli/client/scanning_operations.py +523 -0
- purviewcli/client/settings.py +1 -0
- purviewcli/client/sync_client.py +250 -0
- purviewcli/plugins/__init__.py +1 -0
- purviewcli/plugins/plugin_system.py +709 -0
- pvw_cli-1.2.8.dist-info/METADATA +1618 -0
- pvw_cli-1.2.8.dist-info/RECORD +60 -0
- pvw_cli-1.2.8.dist-info/WHEEL +5 -0
- pvw_cli-1.2.8.dist-info/entry_points.txt +3 -0
- pvw_cli-1.2.8.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1618 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pvw-cli
|
|
3
|
+
Version: 1.2.8
|
|
4
|
+
Summary: Microsoft Purview CLI with comprehensive automation capabilities
|
|
5
|
+
Author-email: AYOUB KEBAILI <keayoub@msn.com>
|
|
6
|
+
Maintainer-email: AYOUB KEBAILI <keayoub@msn.com>
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
Project-URL: Homepage, https://github.com/Keayoub/Purview_cli
|
|
9
|
+
Project-URL: Documentation, https://github.com/Keayoub/Purview_cli/wiki
|
|
10
|
+
Project-URL: Repository, https://github.com/Keayoub/Purview_cli.git
|
|
11
|
+
Project-URL: Bug Tracker, https://github.com/Keayoub/Purview_cli/issues
|
|
12
|
+
Project-URL: Source, https://github.com/Keayoub/Purview_cli
|
|
13
|
+
Keywords: azure,purview,cli,data,catalog,governance,automation,pvw
|
|
14
|
+
Classifier: Development Status :: 4 - Beta
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: Intended Audience :: System Administrators
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
24
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
25
|
+
Classifier: Topic :: System :: Systems Administration
|
|
26
|
+
Classifier: Topic :: Database
|
|
27
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
28
|
+
Requires-Python: >=3.8
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
Requires-Dist: azure-identity>=1.12.0
|
|
31
|
+
Requires-Dist: azure-core>=1.24.0
|
|
32
|
+
Requires-Dist: click>=8.0.0
|
|
33
|
+
Requires-Dist: rich>=12.0.0
|
|
34
|
+
Requires-Dist: requests>=2.28.0
|
|
35
|
+
Requires-Dist: pandas>=1.5.0
|
|
36
|
+
Requires-Dist: aiohttp>=3.8.0
|
|
37
|
+
Requires-Dist: pydantic<2.12,>=1.10.0
|
|
38
|
+
Requires-Dist: PyYAML>=6.0
|
|
39
|
+
Requires-Dist: cryptography<46.0.0,>=41.0.5
|
|
40
|
+
Provides-Extra: dev
|
|
41
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
42
|
+
Requires-Dist: pytest-asyncio>=0.20.0; extra == "dev"
|
|
43
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
44
|
+
Requires-Dist: black>=22.0.0; extra == "dev"
|
|
45
|
+
Requires-Dist: isort>=5.10.0; extra == "dev"
|
|
46
|
+
Requires-Dist: flake8>=5.0.0; extra == "dev"
|
|
47
|
+
Requires-Dist: mypy>=0.991; extra == "dev"
|
|
48
|
+
Requires-Dist: pre-commit>=2.20.0; extra == "dev"
|
|
49
|
+
Provides-Extra: docs
|
|
50
|
+
Requires-Dist: sphinx>=5.0.0; extra == "docs"
|
|
51
|
+
Requires-Dist: sphinx-rtd-theme>=1.0.0; extra == "docs"
|
|
52
|
+
Requires-Dist: myst-parser>=0.18.0; extra == "docs"
|
|
53
|
+
Provides-Extra: test
|
|
54
|
+
Requires-Dist: pytest>=7.0.0; extra == "test"
|
|
55
|
+
Requires-Dist: pytest-asyncio>=0.20.0; extra == "test"
|
|
56
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "test"
|
|
57
|
+
Requires-Dist: requests-mock>=1.9.0; extra == "test"
|
|
58
|
+
|
|
59
|
+
# PURVIEW CLI v1.2.8 - Microsoft Purview Automation & Data Governance
|
|
60
|
+
|
|
61
|
+
[](https://github.com/Keayoub/pvw-cli/releases/tag/v1.2.8)
|
|
62
|
+
[](https://github.com/Keayoub/pvw-cli)
|
|
63
|
+
[](https://github.com/Keayoub/pvw-cli)
|
|
64
|
+
[](https://github.com/Keayoub/pvw-cli)
|
|
65
|
+
|
|
66
|
+
> **LATEST UPDATE v1.2.8 (November 3, 2025):**
|
|
67
|
+
>
|
|
68
|
+
> **🔗 Advanced Lineage Features & Column-Level Mapping**
|
|
69
|
+
>
|
|
70
|
+
> **New Lineage Capabilities:**
|
|
71
|
+
> - **[NEW]** Column-level lineage with multi-target support (1 source → N targets)
|
|
72
|
+
> - **[NEW]** Direct lineage (UI-style) - No visible Process entity
|
|
73
|
+
> - **[NEW]** Dual-mode CSV import - Automatic detection of Process vs Direct lineage
|
|
74
|
+
> - **[NEW]** Column mapping in direct relationships - Granular data flow tracking
|
|
75
|
+
> - **[NEW]** Enhanced error handling with SSL retry strategies
|
|
76
|
+
>
|
|
77
|
+
> **New CLI Commands:**
|
|
78
|
+
> ```bash
|
|
79
|
+
> pvw lineage create-column # Column-level lineage (Process-based)
|
|
80
|
+
> pvw lineage create-direct # Direct lineage (UI-style, no Process)
|
|
81
|
+
> pvw lineage list-column # List column lineages
|
|
82
|
+
> pvw lineage delete-column # Delete column lineage
|
|
83
|
+
> ```
|
|
84
|
+
>
|
|
85
|
+
> **What's New:**
|
|
86
|
+
> - ✅ Column mapping visible in Purview UI
|
|
87
|
+
> - ✅ Compatible with manual UI lineage creation
|
|
88
|
+
> - ✅ Type validation (prevent invalid lineage)
|
|
89
|
+
> - ✅ Batch CSV import with 5 sample files
|
|
90
|
+
> - ✅ Comprehensive documentation & examples
|
|
91
|
+
>
|
|
92
|
+
> **Previous Releases:**
|
|
93
|
+
> - **v1.2.6** - Initial lineage improvements
|
|
94
|
+
> - **v1.2.5** - 86% UC API Coverage (45/52 operations) with Relationships, Query, Policies APIs
|
|
95
|
+
>
|
|
96
|
+
> **[Full Release Notes v1.2.8](releases/v1.2.8.md)** | **[v1.2.5 Release Notes](releases/v1.2.5.md)** | **[Migration Guide](releases/v1.2.8.md#migration-guide)**
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## What is PVW CLI?
|
|
101
|
+
|
|
102
|
+
**PVW CLI v1.2.8** is a modern, full-featured command-line interface and Python library for Microsoft Purview. It enables automation and management of *all major Purview APIs* with **86% Unified Catalog API coverage** (45 of 52 operations).
|
|
103
|
+
|
|
104
|
+
### Key Capabilities
|
|
105
|
+
|
|
106
|
+
**Unified Catalog (UC) Management - 86% Complete**
|
|
107
|
+
- Complete governance domains, glossary terms, data products, OKRs, CDEs
|
|
108
|
+
- Relationships API - Link data products/CDEs/terms to entities and columns
|
|
109
|
+
- Query APIs - Advanced OData filtering with multi-criteria search
|
|
110
|
+
- Policy Management - Complete CRUD for governance and RBAC policies
|
|
111
|
+
- Custom Metadata & Attributes - Extensible business metadata and attributes
|
|
112
|
+
|
|
113
|
+
**Data Operations**
|
|
114
|
+
- Entity management (create, update, bulk, import/export)
|
|
115
|
+
- Lineage operations with interactive creation and CSV import
|
|
116
|
+
- Advanced search and discovery with fixed suggest/autocomplete
|
|
117
|
+
- Business metadata with proper scope configuration
|
|
118
|
+
|
|
119
|
+
**Automation & Scripting**
|
|
120
|
+
- Bulk Operations - Import/export from CSV/JSON with dry-run support
|
|
121
|
+
- Scriptable Output - Multiple formats (table, json, jsonc) for PowerShell/bash
|
|
122
|
+
- 80+ usage examples and 15+ comprehensive guides
|
|
123
|
+
- PowerShell integration with ConvertFrom-Json support
|
|
124
|
+
|
|
125
|
+
**Legacy API Support**
|
|
126
|
+
- Collection and account management
|
|
127
|
+
- Data product management (legacy compatibility)
|
|
128
|
+
- Classification, label, and status management
|
|
129
|
+
|
|
130
|
+
The CLI is designed for data engineers, stewards, architects, and platform teams to automate, scale, and enhance their Microsoft Purview experience.
|
|
131
|
+
|
|
132
|
+
### NEW: MCP Server for AI Assistants
|
|
133
|
+
|
|
134
|
+
**[NEW]** Model Context Protocol (MCP) server enables LLM-powered data governance workflows!
|
|
135
|
+
|
|
136
|
+
- Natural language interface to Purview catalog
|
|
137
|
+
- 20+ tools for AI assistants (Claude, Cline, etc.)
|
|
138
|
+
- Automate complex multi-step operations
|
|
139
|
+
- See `mcp/README.md` for setup instructions
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
## What's New in Recent Releases
|
|
144
|
+
|
|
145
|
+
### v1.2.8 (November 3, 2025) - Advanced Lineage Features
|
|
146
|
+
|
|
147
|
+
**Column-Level Lineage & Direct Relationships:**
|
|
148
|
+
- Column-level lineage with multi-target support (1→N)
|
|
149
|
+
- Direct lineage creation (UI-style, no visible Process)
|
|
150
|
+
- Dual-mode CSV import with automatic type detection
|
|
151
|
+
- Column mapping in direct relationships
|
|
152
|
+
- Enhanced error handling with SSL retry strategies
|
|
153
|
+
|
|
154
|
+
**New Commands:**
|
|
155
|
+
```bash
|
|
156
|
+
pvw lineage create-column # Column lineage (Process-based)
|
|
157
|
+
pvw lineage create-direct # Direct lineage (UI-style)
|
|
158
|
+
pvw lineage list-column # List column lineages
|
|
159
|
+
pvw lineage delete-column # Delete lineage
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
**CSV Import Examples:**
|
|
163
|
+
```csv
|
|
164
|
+
# Direct lineage with column mapping
|
|
165
|
+
source_entity_guid,target_entity_guid,relationship_type,column_mapping
|
|
166
|
+
guid1,guid2,direct_lineage_dataset_dataset,"[{""Source"":""ID"",""Sink"":""ID""}]"
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
**[Full v1.2.8 Release Notes](releases/v1.2.8.md)**
|
|
170
|
+
|
|
171
|
+
---
|
|
172
|
+
|
|
173
|
+
### v1.2.5 (October 30, 2025) - 86% UC API Coverage
|
|
174
|
+
|
|
175
|
+
Version 1.2.5 achieves **86% coverage** of the Microsoft Purview Unified Catalog API with **35 new operations**:
|
|
176
|
+
|
|
177
|
+
| Resource Type | Coverage | Operations | Status |
|
|
178
|
+
|--------------|----------|------------|---------|
|
|
179
|
+
| **Business Domains** | 100% | 5/5 | ✅ Complete |
|
|
180
|
+
| **Data Products** | 90% | 9/10 | ⚠️ 1 missing (Facets) |
|
|
181
|
+
| **Glossary Terms** | 73% | 8/11 | ⚠️ 3 missing |
|
|
182
|
+
| **Objectives & Key Results** | 92% | 11/12 | ⚠️ 1 missing |
|
|
183
|
+
| **Critical Data Elements** | 90% | 9/10 | ⚠️ 1 missing |
|
|
184
|
+
| **Policies** | 100% | 5/5 | ✅ Complete |
|
|
185
|
+
| **Relationships** | 100% | 6/6 | ✅ Complete |
|
|
186
|
+
| **Query** | 100% | 4/4 | ✅ Complete |
|
|
187
|
+
| **Custom Metadata** | 100% | 5/5 | ✅ Complete |
|
|
188
|
+
| **Custom Attributes** | 100% | 5/5 | ✅ Complete |
|
|
189
|
+
| **TOTAL** | **86%** | **45/52** | 🎯 **A- Grade** |
|
|
190
|
+
|
|
191
|
+
### 🚀 New APIs Implemented
|
|
192
|
+
|
|
193
|
+
1. **Relationships API (6 operations)**
|
|
194
|
+
```bash
|
|
195
|
+
# Link data product to entity
|
|
196
|
+
pvw uc dataproduct link-entity --id <dp-id> --entity-id <guid>
|
|
197
|
+
|
|
198
|
+
# Link CDE to column
|
|
199
|
+
pvw uc cde link-entity --id <cde-id> --entity-id <guid> --column-qualified-name "..."
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
2. **Query APIs (4 operations)**
|
|
203
|
+
```bash
|
|
204
|
+
# Advanced OData filtering
|
|
205
|
+
pvw uc term query --domain-ids "finance" --status Approved --top 50
|
|
206
|
+
|
|
207
|
+
# Multi-criteria search with pagination
|
|
208
|
+
pvw uc dataproduct query --keywords "customer,revenue" --skip 10 --top 25
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
3. **Policy Management (5 operations)**
|
|
212
|
+
```bash
|
|
213
|
+
# Complete policy CRUD
|
|
214
|
+
pvw uc policy list
|
|
215
|
+
pvw uc policy create --payload-file policy.json
|
|
216
|
+
pvw uc policy update --id <policy-id> --payload-file updated.json
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
4. **Custom Metadata (5 operations)**
|
|
220
|
+
```bash
|
|
221
|
+
# Business metadata via Atlas API
|
|
222
|
+
pvw uc custom-metadata import --file metadata.csv
|
|
223
|
+
pvw uc custom-metadata add --guid <entity-guid> --name "BusinessConcept"
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
5. **Custom Attributes (5 operations)**
|
|
227
|
+
```bash
|
|
228
|
+
# Extensible attribute definitions
|
|
229
|
+
pvw uc custom-attribute create --name "Department" --type String
|
|
230
|
+
pvw uc custom-attribute list
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
### 🔧 Major Fixes & Improvements
|
|
234
|
+
|
|
235
|
+
- **Lineage Management Overhaul** - Complete rewrite with interactive PowerShell script, real entity support, and proper Process entities
|
|
236
|
+
- **Search API Fixed** - Resolved HTTP 400 errors in suggest and autocomplete endpoints
|
|
237
|
+
- **Business Metadata Scope** - Fixed Business Concept attributes on Glossary Terms with proper applicableEntityTypes
|
|
238
|
+
- **Architecture Refactoring** - Unified endpoints dictionary, zero hardcoded URLs, complete consistency
|
|
239
|
+
|
|
240
|
+
### 📚 Documentation (3,500+ lines)
|
|
241
|
+
|
|
242
|
+
- 15+ new guides including relationships, query APIs, lineage creation, business metadata
|
|
243
|
+
- 80+ usage examples across all new features
|
|
244
|
+
- Complete API coverage gap analysis
|
|
245
|
+
- Roadmap to 100% with implementation plans
|
|
246
|
+
|
|
247
|
+
**[View Full Release Notes](releases/v1.2.8.md)**
|
|
248
|
+
|
|
249
|
+
---
|
|
250
|
+
|
|
251
|
+
## Getting Started
|
|
252
|
+
|
|
253
|
+
Follow this short flow to get PVW CLI installed and running quickly.
|
|
254
|
+
|
|
255
|
+
1. Install (from PyPI):
|
|
256
|
+
|
|
257
|
+
```bash
|
|
258
|
+
pip install pvw-cli
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
For the bleeding edge or development:
|
|
262
|
+
|
|
263
|
+
```bash
|
|
264
|
+
pip install git+https://github.com/Keayoub/Purview_cli.git
|
|
265
|
+
# or for editable development
|
|
266
|
+
git clone https://github.com/Keayoub/Purview_cli.git
|
|
267
|
+
cd Purview_cli
|
|
268
|
+
pip install -r requirements.txt
|
|
269
|
+
pip install -e .
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
2. Set required environment variables (examples for cmd, PowerShell, and pwsh)
|
|
273
|
+
|
|
274
|
+
Windows cmd (example):
|
|
275
|
+
|
|
276
|
+
```cmd
|
|
277
|
+
set PURVIEW_ACCOUNT_NAME=your-purview-account
|
|
278
|
+
set PURVIEW_ACCOUNT_ID=your-purview-account-id-guid
|
|
279
|
+
set PURVIEW_RESOURCE_GROUP=your-resource-group-name
|
|
280
|
+
set AZURE_REGION= # optional
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
PowerShell (Windows PowerShell):
|
|
284
|
+
|
|
285
|
+
```powershell
|
|
286
|
+
$env:PURVIEW_ACCOUNT_NAME = "your-purview-account"
|
|
287
|
+
$env:PURVIEW_ACCOUNT_ID = "your-purview-account-id-guid"
|
|
288
|
+
$env:PURVIEW_RESOURCE_GROUP = "your-resource-group-name"
|
|
289
|
+
$env:AZURE_REGION = "" # optional
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
pwsh (PowerShell Core - cross-platform, recommended):
|
|
293
|
+
|
|
294
|
+
```pwsh
|
|
295
|
+
$env:PURVIEW_ACCOUNT_NAME = 'your-purview-account'
|
|
296
|
+
$env:PURVIEW_ACCOUNT_ID = 'your-purview-account-id-guid'
|
|
297
|
+
$env:PURVIEW_RESOURCE_GROUP = 'your-resource-group-name'
|
|
298
|
+
$env:AZURE_REGION = '' # optional
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
3. Authenticate
|
|
302
|
+
|
|
303
|
+
- Run `az login` (recommended), or
|
|
304
|
+
- Provide Service Principal credentials via environment variables.
|
|
305
|
+
|
|
306
|
+
4. Try a few commands:
|
|
307
|
+
|
|
308
|
+
```bash
|
|
309
|
+
# List governance domains
|
|
310
|
+
pvw uc domain list
|
|
311
|
+
|
|
312
|
+
# Search
|
|
313
|
+
pvw search query --keywords="customer" --limit=5
|
|
314
|
+
|
|
315
|
+
# Get help
|
|
316
|
+
pvw --help
|
|
317
|
+
pvw uc --help
|
|
318
|
+
```
|
|
319
|
+
|
|
320
|
+
For more advanced usage, see the documentation in `doc/` or the project docs: <https://pvw-cli.readthedocs.io/>
|
|
321
|
+
|
|
322
|
+
---
|
|
323
|
+
|
|
324
|
+
## Quick Start Examples
|
|
325
|
+
|
|
326
|
+
### v1.2.8 - Column-Level Lineage
|
|
327
|
+
|
|
328
|
+
```bash
|
|
329
|
+
# Create column-level lineage (Process-based)
|
|
330
|
+
pvw lineage create-column \
|
|
331
|
+
--process-name "ETL_Sales_Transform" \
|
|
332
|
+
--source-table-guid "9ebbd583-4987-4d1b-b4f5-d8f6f6f60000" \
|
|
333
|
+
--target-table-guids "c88126ba-5fb5-4d33-bbe2-5ff6f6f60000" \
|
|
334
|
+
--column-mapping "ProductID:ProductID,Name:Name"
|
|
335
|
+
|
|
336
|
+
# Create direct lineage (UI-style, no visible Process)
|
|
337
|
+
pvw lineage create-direct \
|
|
338
|
+
--source-guid "9ebbd583-4987-4d1b-b4f5-d8f6f6f60000" \
|
|
339
|
+
--target-guid "c88126ba-5fb5-4d33-bbe2-5ff6f6f60000" \
|
|
340
|
+
--column-mapping "ProductID:ProductID,Name:Name,Amount:TotalAmount"
|
|
341
|
+
|
|
342
|
+
# Import lineage from CSV (automatic type detection)
|
|
343
|
+
pvw lineage import samples/csv/lineage_with_columns.csv
|
|
344
|
+
|
|
345
|
+
# List column lineages
|
|
346
|
+
pvw lineage list-column --format table
|
|
347
|
+
|
|
348
|
+
# Delete column lineage
|
|
349
|
+
pvw lineage delete-column --process-guid <guid> --force
|
|
350
|
+
```
|
|
351
|
+
|
|
352
|
+
### v1.2.5 - Relationships API
|
|
353
|
+
|
|
354
|
+
```bash
|
|
355
|
+
# Link data product to SQL table
|
|
356
|
+
pvw uc dataproduct link-entity \
|
|
357
|
+
--id "dp-sales-2024" \
|
|
358
|
+
--entity-id "4fae348b-e960-42f7-834c-38f6f6f60000" \
|
|
359
|
+
--type-name "azure_sql_table"
|
|
360
|
+
|
|
361
|
+
# Link CDE to specific column
|
|
362
|
+
pvw uc cde link-entity \
|
|
363
|
+
--id "cde-customer-email" \
|
|
364
|
+
--entity-id "ea3412c3-7387-4bc1-9923-11f6f6f60000" \
|
|
365
|
+
--column-qualified-name "mssql://server/db/schema/table#EmailAddress"
|
|
366
|
+
|
|
367
|
+
# List all linked entities
|
|
368
|
+
pvw uc dataproduct list-entities --id "dp-sales-2024"
|
|
369
|
+
```
|
|
370
|
+
|
|
371
|
+
### v1.2.5 - Query APIs
|
|
372
|
+
|
|
373
|
+
```bash
|
|
374
|
+
# Query terms by domain and status
|
|
375
|
+
pvw uc term query --domain-ids "finance,sales" --status Approved --top 50
|
|
376
|
+
|
|
377
|
+
# Query data products with keywords
|
|
378
|
+
pvw uc dataproduct query --keywords "customer,revenue" --skip 0 --top 25
|
|
379
|
+
|
|
380
|
+
# Query CDEs by domain with pagination
|
|
381
|
+
pvw uc cde query --domain-ids "compliance" --orderby "name" --top 100
|
|
382
|
+
```
|
|
383
|
+
|
|
384
|
+
### v1.2.5 - Policy Management
|
|
385
|
+
|
|
386
|
+
```bash
|
|
387
|
+
# List all policies
|
|
388
|
+
pvw uc policy list
|
|
389
|
+
|
|
390
|
+
# Create new policy
|
|
391
|
+
pvw uc policy create --payload-file policy-rbac.json
|
|
392
|
+
|
|
393
|
+
# Update existing policy
|
|
394
|
+
pvw uc policy update --id "policy-001" --payload-file updated.json
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
### v1.2.5 - Custom Metadata
|
|
398
|
+
|
|
399
|
+
```bash
|
|
400
|
+
# Import business metadata from CSV
|
|
401
|
+
pvw uc custom-metadata import --file business_concept.csv
|
|
402
|
+
|
|
403
|
+
# Add metadata to entity
|
|
404
|
+
pvw uc custom-metadata add \
|
|
405
|
+
--guid "4fae348b-e960-42f7-834c-38f6f6f60000" \
|
|
406
|
+
--name "BusinessConcept" \
|
|
407
|
+
--attributes '{"Department":"Sales"}'
|
|
408
|
+
|
|
409
|
+
# Create custom attribute
|
|
410
|
+
pvw uc custom-attribute create --name "Department" --type String
|
|
411
|
+
```
|
|
412
|
+
|
|
413
|
+
---
|
|
414
|
+
|
|
415
|
+
## Overview
|
|
416
|
+
|
|
417
|
+
**PVW CLI v1.2.8** is a modern command-line interface and Python library for Microsoft Purview, enabling:
|
|
418
|
+
|
|
419
|
+
- **MCP Server** - Natural language interface for AI assistants (Claude, Cline)
|
|
420
|
+
- Advanced data catalog search and discovery
|
|
421
|
+
- Bulk import/export of entities, glossary terms, and lineage
|
|
422
|
+
- Real-time monitoring and analytics
|
|
423
|
+
- Automated governance and compliance
|
|
424
|
+
- Extensible plugin system
|
|
425
|
+
|
|
426
|
+
---
|
|
427
|
+
|
|
428
|
+
## Installation
|
|
429
|
+
|
|
430
|
+
You can install PVW CLI in two ways:
|
|
431
|
+
|
|
432
|
+
1. **From PyPI (recommended for most users):**
|
|
433
|
+
|
|
434
|
+
```bash
|
|
435
|
+
pip install pvw-cli
|
|
436
|
+
```
|
|
437
|
+
|
|
438
|
+
2. **Directly from the GitHub repository (for latest/dev version):**
|
|
439
|
+
|
|
440
|
+
```bash
|
|
441
|
+
pip install git+https://github.com/Keayoub/Purview_cli.git
|
|
442
|
+
```
|
|
443
|
+
|
|
444
|
+
Or for development (editable install):
|
|
445
|
+
|
|
446
|
+
```bash
|
|
447
|
+
git clone https://github.com/Keayoub/Purview_cli.git
|
|
448
|
+
cd Purview_cli
|
|
449
|
+
pip install -r requirements.txt
|
|
450
|
+
pip install -e .
|
|
451
|
+
```
|
|
452
|
+
|
|
453
|
+
---
|
|
454
|
+
|
|
455
|
+
## Requirements
|
|
456
|
+
|
|
457
|
+
- Python 3.8+
|
|
458
|
+
- Azure CLI (`az login`) or Service Principal credentials
|
|
459
|
+
- Microsoft Purview account
|
|
460
|
+
|
|
461
|
+
---
|
|
462
|
+
|
|
463
|
+
## Getting Started
|
|
464
|
+
|
|
465
|
+
1. **Install**
|
|
466
|
+
|
|
467
|
+
```bash
|
|
468
|
+
pip install pvw-cli
|
|
469
|
+
```
|
|
470
|
+
|
|
471
|
+
2. **Set Required Environment Variables**
|
|
472
|
+
|
|
473
|
+
```bash
|
|
474
|
+
# Required for Purview API access
|
|
475
|
+
set PURVIEW_ACCOUNT_NAME=your-purview-account
|
|
476
|
+
set PURVIEW_ACCOUNT_ID=your-purview-account-id-guid
|
|
477
|
+
set PURVIEW_RESOURCE_GROUP=your-resource-group-name
|
|
478
|
+
|
|
479
|
+
# Optional
|
|
480
|
+
set AZURE_REGION= # (optional, e.g. 'china', 'usgov')
|
|
481
|
+
```
|
|
482
|
+
|
|
483
|
+
3. **Authenticate**
|
|
484
|
+
|
|
485
|
+
- Azure CLI: `az login`
|
|
486
|
+
|
|
487
|
+
- Or set Service Principal credentials as environment variables
|
|
488
|
+
|
|
489
|
+
4. **Run a Command**
|
|
490
|
+
|
|
491
|
+
```bash
|
|
492
|
+
pvw search query --keywords="customer" --limit=5
|
|
493
|
+
```
|
|
494
|
+
|
|
495
|
+
5. **See All Commands**
|
|
496
|
+
|
|
497
|
+
```bash
|
|
498
|
+
pvw --help
|
|
499
|
+
```
|
|
500
|
+
|
|
501
|
+
---
|
|
502
|
+
|
|
503
|
+
## Authentication
|
|
504
|
+
|
|
505
|
+
PVW CLI supports multiple authentication methods for connecting to Microsoft Purview, powered by Azure Identity's `DefaultAzureCredential`. This allows you to use the CLI securely in local development, CI/CD, and production environments.
|
|
506
|
+
|
|
507
|
+
### 1. Azure CLI Authentication (Recommended for Interactive Use)
|
|
508
|
+
|
|
509
|
+
- Run `az login` to authenticate interactively with your Azure account.
|
|
510
|
+
- The CLI will automatically use your Azure CLI credentials.
|
|
511
|
+
|
|
512
|
+
### 2. Service Principal Authentication (Recommended for Automation/CI/CD)
|
|
513
|
+
|
|
514
|
+
Set the following environment variables before running any PVW CLI command:
|
|
515
|
+
|
|
516
|
+
- `AZURE_CLIENT_ID` (your Azure AD app registration/client ID)
|
|
517
|
+
- `AZURE_TENANT_ID` (your Azure AD tenant ID)
|
|
518
|
+
- `AZURE_CLIENT_SECRET` (your client secret)
|
|
519
|
+
|
|
520
|
+
**Example (Windows):**
|
|
521
|
+
|
|
522
|
+
```cmd
|
|
523
|
+
set AZURE_CLIENT_ID=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
|
|
524
|
+
set AZURE_TENANT_ID=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
|
|
525
|
+
set AZURE_CLIENT_SECRET=your-client-secret
|
|
526
|
+
```
|
|
527
|
+
|
|
528
|
+
**Example (Linux/macOS):**
|
|
529
|
+
|
|
530
|
+
```bash
|
|
531
|
+
export AZURE_CLIENT_ID=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
|
|
532
|
+
export AZURE_TENANT_ID=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
|
|
533
|
+
export AZURE_CLIENT_SECRET=your-client-secret
|
|
534
|
+
```
|
|
535
|
+
|
|
536
|
+
### 3. Managed Identity (for Azure VMs, App Services, etc.)
|
|
537
|
+
|
|
538
|
+
If running in Azure with a managed identity, no extra configuration is needed. The CLI will use the managed identity automatically.
|
|
539
|
+
|
|
540
|
+
### 4. Visual Studio/VS Code Authentication
|
|
541
|
+
|
|
542
|
+
If you are signed in to Azure in Visual Studio or VS Code, `DefaultAzureCredential` can use those credentials as a fallback.
|
|
543
|
+
|
|
544
|
+
---
|
|
545
|
+
|
|
546
|
+
**Note:**
|
|
547
|
+
|
|
548
|
+
- The CLI will try all supported authentication methods in order. The first one that works will be used.
|
|
549
|
+
- For most automation and CI/CD scenarios, service principal authentication is recommended.
|
|
550
|
+
- For local development, Azure CLI authentication is easiest.
|
|
551
|
+
|
|
552
|
+
For more details, see the [Azure Identity documentation](https://learn.microsoft.com/en-us/python/api/overview/azure/identity-readme?view=azure-python).
|
|
553
|
+
|
|
554
|
+
---
|
|
555
|
+
|
|
556
|
+
## Output Formats & Scripting Integration
|
|
557
|
+
|
|
558
|
+
PVW CLI supports multiple output formats to fit different use cases - from human-readable tables to machine-parseable JSON.
|
|
559
|
+
|
|
560
|
+
### Output Format Options
|
|
561
|
+
|
|
562
|
+
All `list` commands now support the `--output` parameter with three formats:
|
|
563
|
+
|
|
564
|
+
1. **`table`** (default) - Rich formatted table with colors for human viewing
|
|
565
|
+
2. **`json`** - Plain JSON for scripting with PowerShell, bash, jq, etc.
|
|
566
|
+
3. **`jsonc`** - Colored JSON with syntax highlighting for viewing
|
|
567
|
+
|
|
568
|
+
### PowerShell Integration
|
|
569
|
+
|
|
570
|
+
The `--output json` format produces plain JSON that works perfectly with PowerShell's `ConvertFrom-Json`:
|
|
571
|
+
|
|
572
|
+
```powershell
|
|
573
|
+
# Get all terms as PowerShell objects
|
|
574
|
+
$domainId = "59ae27b5-40bc-4c90-abfe-fe1a0638fe3a"
|
|
575
|
+
$terms = py -m purviewcli uc term list --domain-id $domainId --output json | ConvertFrom-Json
|
|
576
|
+
|
|
577
|
+
# Access properties
|
|
578
|
+
Write-Host "Found $($terms.Count) terms"
|
|
579
|
+
foreach ($term in $terms) {
|
|
580
|
+
Write-Host " • $($term.name) - $($term.status)"
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
# Filter and export
|
|
584
|
+
$draftTerms = $terms | Where-Object { $_.status -eq "Draft" }
|
|
585
|
+
$draftTerms | Export-Csv -Path "draft_terms.csv" -NoTypeInformation
|
|
586
|
+
|
|
587
|
+
# Group by status
|
|
588
|
+
$terms | Group-Object status | Format-Table Count, Name
|
|
589
|
+
```
|
|
590
|
+
|
|
591
|
+
### Bash/Linux Integration
|
|
592
|
+
|
|
593
|
+
Use `jq` for JSON processing in bash:
|
|
594
|
+
|
|
595
|
+
```bash
|
|
596
|
+
# Get domain ID
|
|
597
|
+
DOMAIN_ID="59ae27b5-40bc-4c90-abfe-fe1a0638fe3a"
|
|
598
|
+
|
|
599
|
+
# Get term names only
|
|
600
|
+
pvw uc term list --domain-id $DOMAIN_ID --output json | jq -r '.[] | .name'
|
|
601
|
+
|
|
602
|
+
# Count terms
|
|
603
|
+
pvw uc term list --domain-id $DOMAIN_ID --output json | jq 'length'
|
|
604
|
+
|
|
605
|
+
# Filter by status
|
|
606
|
+
pvw uc term list --domain-id $DOMAIN_ID --output json | jq '.[] | select(.status == "Draft")'
|
|
607
|
+
|
|
608
|
+
# Group by status
|
|
609
|
+
pvw uc term list --domain-id $DOMAIN_ID --output json | jq 'group_by(.status) | map({status: .[0].status, count: length})'
|
|
610
|
+
|
|
611
|
+
# Save to file
|
|
612
|
+
pvw uc term list --domain-id $DOMAIN_ID --output json > terms.json
|
|
613
|
+
```
|
|
614
|
+
|
|
615
|
+
### Examples by Command
|
|
616
|
+
|
|
617
|
+
```bash
|
|
618
|
+
# Domains
|
|
619
|
+
pvw uc domain list --output json | jq '.[] | .name'
|
|
620
|
+
|
|
621
|
+
# Terms
|
|
622
|
+
pvw uc term list --domain-id "abc-123" --output json
|
|
623
|
+
pvw uc term list --domain-id "abc-123" --output table # Default
|
|
624
|
+
pvw uc term list --domain-id "abc-123" --output jsonc # Colored for viewing
|
|
625
|
+
|
|
626
|
+
# Data Products
|
|
627
|
+
pvw uc dataproduct list --domain-id "abc-123" --output json
|
|
628
|
+
```
|
|
629
|
+
|
|
630
|
+
### Migration from Old --json Flag
|
|
631
|
+
|
|
632
|
+
**Old (deprecated):**
|
|
633
|
+
|
|
634
|
+
```bash
|
|
635
|
+
pvw uc term list --domain-id "abc-123" --json
|
|
636
|
+
```
|
|
637
|
+
|
|
638
|
+
**New (recommended):**
|
|
639
|
+
|
|
640
|
+
```bash
|
|
641
|
+
pvw uc term list --domain-id "abc-123" --output json # Plain JSON for scripting
|
|
642
|
+
pvw uc term list --domain-id "abc-123" --output jsonc # Colored JSON (old behavior)
|
|
643
|
+
```
|
|
644
|
+
|
|
645
|
+
---
|
|
646
|
+
|
|
647
|
+
## Required Purview Configuration
|
|
648
|
+
|
|
649
|
+
Before using PVW CLI, you need to set three essential environment variables. Here's how to find them:
|
|
650
|
+
|
|
651
|
+
### 🔍 **How to Find Your Purview Values**
|
|
652
|
+
|
|
653
|
+
#### **1. PURVIEW_ACCOUNT_NAME**
|
|
654
|
+
|
|
655
|
+
- This is your Purview account name as it appears in Azure Portal
|
|
656
|
+
- Example: `kaydemopurview`
|
|
657
|
+
|
|
658
|
+
#### **2. PURVIEW_ACCOUNT_ID**
|
|
659
|
+
|
|
660
|
+
- This is the GUID that identifies your Purview account for Unified Catalog APIs
|
|
661
|
+
- **Important: For most Purview deployments, this is your Azure Tenant ID**
|
|
662
|
+
|
|
663
|
+
- **Method 1 - Get your Tenant ID (recommended):**
|
|
664
|
+
|
|
665
|
+
**Bash/Command Prompt:**
|
|
666
|
+
|
|
667
|
+
```bash
|
|
668
|
+
az account show --query tenantId -o tsv
|
|
669
|
+
```
|
|
670
|
+
|
|
671
|
+
**PowerShell:**
|
|
672
|
+
|
|
673
|
+
```powershell
|
|
674
|
+
az account show --query tenantId -o tsv
|
|
675
|
+
# Or store directly in environment variable:
|
|
676
|
+
$env:PURVIEW_ACCOUNT_ID = az account show --query tenantId -o tsv
|
|
677
|
+
```
|
|
678
|
+
|
|
679
|
+
- **Method 2 - Azure CLI (extract from Atlas endpoint):**
|
|
680
|
+
|
|
681
|
+
```bash
|
|
682
|
+
az purview account show --name YOUR_ACCOUNT_NAME --resource-group YOUR_RG --query endpoints.catalog -o tsv
|
|
683
|
+
```
|
|
684
|
+
|
|
685
|
+
Extract the GUID from the URL (before `-api.purview-service.microsoft.com`)
|
|
686
|
+
|
|
687
|
+
- **Method 3 - Azure Portal:**
|
|
688
|
+
1. Go to your Purview account in Azure Portal
|
|
689
|
+
2. Navigate to Properties → Atlas endpoint URL
|
|
690
|
+
3. Extract GUID from: `https://GUID-api.purview-service.microsoft.com/catalog`
|
|
691
|
+
|
|
692
|
+
#### **3. PURVIEW_RESOURCE_GROUP**
|
|
693
|
+
|
|
694
|
+
- The Azure resource group containing your Purview account
|
|
695
|
+
- Example: `fabric-artifacts`
|
|
696
|
+
|
|
697
|
+
### 📋 **Setting the Variables**
|
|
698
|
+
|
|
699
|
+
**Windows Command Prompt:**
|
|
700
|
+
|
|
701
|
+
```cmd
|
|
702
|
+
set PURVIEW_ACCOUNT_NAME=your-purview-account
|
|
703
|
+
set PURVIEW_ACCOUNT_ID=your-purview-account-id
|
|
704
|
+
set PURVIEW_RESOURCE_GROUP=your-resource-group
|
|
705
|
+
```
|
|
706
|
+
|
|
707
|
+
**Windows PowerShell:**
|
|
708
|
+
|
|
709
|
+
```powershell
|
|
710
|
+
$env:PURVIEW_ACCOUNT_NAME="your-purview-account"
|
|
711
|
+
$env:PURVIEW_ACCOUNT_ID="your-purview-account-id"
|
|
712
|
+
$env:PURVIEW_RESOURCE_GROUP="your-resource-group"
|
|
713
|
+
```
|
|
714
|
+
|
|
715
|
+
**Linux/macOS:**
|
|
716
|
+
|
|
717
|
+
```bash
|
|
718
|
+
export PURVIEW_ACCOUNT_NAME=your-purview-account
|
|
719
|
+
export PURVIEW_ACCOUNT_ID=your-purview-account-id
|
|
720
|
+
export PURVIEW_RESOURCE_GROUP=your-resource-group
|
|
721
|
+
```
|
|
722
|
+
|
|
723
|
+
**Permanent (Windows Command Prompt):**
|
|
724
|
+
|
|
725
|
+
```cmd
|
|
726
|
+
setx PURVIEW_ACCOUNT_NAME "your-purview-account"
|
|
727
|
+
setx PURVIEW_ACCOUNT_ID "your-purview-account-id"
|
|
728
|
+
setx PURVIEW_RESOURCE_GROUP "your-resource-group"
|
|
729
|
+
```
|
|
730
|
+
|
|
731
|
+
**Permanent (Windows PowerShell):**
|
|
732
|
+
|
|
733
|
+
```powershell
|
|
734
|
+
[Environment]::SetEnvironmentVariable("PURVIEW_ACCOUNT_NAME", "your-purview-account", "User")
|
|
735
|
+
[Environment]::SetEnvironmentVariable("PURVIEW_ACCOUNT_ID", "your-purview-account-id", "User")
|
|
736
|
+
[Environment]::SetEnvironmentVariable("PURVIEW_RESOURCE_GROUP", "your-resource-group", "User")
|
|
737
|
+
```
|
|
738
|
+
|
|
739
|
+
### **Debug Environment Issues**
|
|
740
|
+
|
|
741
|
+
If you experience issues with environment variables between different terminals, use these debug commands:
|
|
742
|
+
|
|
743
|
+
**Command Prompt/Bash:**
|
|
744
|
+
|
|
745
|
+
```bash
|
|
746
|
+
# Run this to check your current environment
|
|
747
|
+
python -c "
|
|
748
|
+
import os
|
|
749
|
+
print('PURVIEW_ACCOUNT_NAME:', os.getenv('PURVIEW_ACCOUNT_NAME'))
|
|
750
|
+
print('PURVIEW_ACCOUNT_ID:', os.getenv('PURVIEW_ACCOUNT_ID'))
|
|
751
|
+
print('PURVIEW_RESOURCE_GROUP:', os.getenv('PURVIEW_RESOURCE_GROUP'))
|
|
752
|
+
"
|
|
753
|
+
```
|
|
754
|
+
|
|
755
|
+
**PowerShell:**
|
|
756
|
+
|
|
757
|
+
```powershell
|
|
758
|
+
# Check environment variables in PowerShell
|
|
759
|
+
python -c "
|
|
760
|
+
import os
|
|
761
|
+
print('PURVIEW_ACCOUNT_NAME:', os.getenv('PURVIEW_ACCOUNT_NAME'))
|
|
762
|
+
print('PURVIEW_ACCOUNT_ID:', os.getenv('PURVIEW_ACCOUNT_ID'))
|
|
763
|
+
print('PURVIEW_RESOURCE_GROUP:', os.getenv('PURVIEW_RESOURCE_GROUP'))
|
|
764
|
+
"
|
|
765
|
+
|
|
766
|
+
# Or use PowerShell native commands
|
|
767
|
+
Write-Host "PURVIEW_ACCOUNT_NAME: $env:PURVIEW_ACCOUNT_NAME"
|
|
768
|
+
Write-Host "PURVIEW_ACCOUNT_ID: $env:PURVIEW_ACCOUNT_ID"
|
|
769
|
+
Write-Host "PURVIEW_RESOURCE_GROUP: $env:PURVIEW_RESOURCE_GROUP"
|
|
770
|
+
```
|
|
771
|
+
|
|
772
|
+
---
|
|
773
|
+
|
|
774
|
+
## Search Command (Discovery Query API)
|
|
775
|
+
|
|
776
|
+
The PVW CLI provides advanced search using the latest Microsoft Purview Discovery Query API:
|
|
777
|
+
|
|
778
|
+
- Search for assets, tables, files, and more with flexible filters
|
|
779
|
+
- Use autocomplete and suggestion endpoints
|
|
780
|
+
- Perform faceted, time-based, and entity-type-specific queries
|
|
781
|
+
|
|
782
|
+
**v1.2.8 Improvements:**
|
|
783
|
+
|
|
784
|
+
- Fixed `suggest` and `autocomplete` API payload format (removed empty filter causing HTTP 400 errors)
|
|
785
|
+
- Enhanced collection display with robust type checking and fallback logic
|
|
786
|
+
- All search commands validated and working correctly (query, browse, suggest, find-table)
|
|
787
|
+
|
|
788
|
+
### CLI Usage Examples
|
|
789
|
+
|
|
790
|
+
#### **Multiple Output Formats**
|
|
791
|
+
|
|
792
|
+
```bash
|
|
793
|
+
# 1. Table Format (Default) - Quick overview
|
|
794
|
+
pvw search query --keywords="customer" --limit=5
|
|
795
|
+
# → Clean table with Name, Type, Collection, Classifications, Qualified Name
|
|
796
|
+
|
|
797
|
+
# 2. Detailed Format - Human-readable with all metadata
|
|
798
|
+
pvw search query --keywords="customer" --limit=5 --detailed
|
|
799
|
+
# → Rich panels showing full details, timestamps, search scores
|
|
800
|
+
|
|
801
|
+
# 3. JSON Format - Complete technical details with syntax highlighting (WELL-FORMATTED)
|
|
802
|
+
pvw search query --keywords="customer" --limit=5 --json
|
|
803
|
+
# → Full JSON response with indentation, line numbers and color coding
|
|
804
|
+
|
|
805
|
+
# 4. Table with IDs - For entity operations
|
|
806
|
+
pvw search query --keywords="customer" --limit=5 --show-ids
|
|
807
|
+
# → Table format + entity GUIDs for copy/paste into update commands
|
|
808
|
+
```
|
|
809
|
+
|
|
810
|
+
#### **Search Operations**
|
|
811
|
+
|
|
812
|
+
```bash
|
|
813
|
+
# Basic search for assets with keyword 'customer'
|
|
814
|
+
pvw search query --keywords="customer" --limit=5
|
|
815
|
+
|
|
816
|
+
# Advanced search with classification filter
|
|
817
|
+
pvw search query --keywords="sales" --classification="PII" --objectType="Tables" --limit=10
|
|
818
|
+
|
|
819
|
+
# Pagination through large result sets
|
|
820
|
+
pvw search query --keywords="SQL" --offset=10 --limit=5
|
|
821
|
+
|
|
822
|
+
# Autocomplete suggestions for partial keyword
|
|
823
|
+
pvw search autocomplete --keywords="ord" --limit=3
|
|
824
|
+
|
|
825
|
+
# Get search suggestions (fuzzy matching)
|
|
826
|
+
pvw search suggest --keywords="prod" --limit=2
|
|
827
|
+
|
|
828
|
+
**IMPORTANT - Command Line Quoting:**
|
|
829
|
+
```cmd
|
|
830
|
+
# [OK] CORRECT - Use quotes around keywords
|
|
831
|
+
pvw search query --keywords="customer" --limit=5
|
|
832
|
+
|
|
833
|
+
# [OK] CORRECT - For wildcard searches, use quotes
|
|
834
|
+
pvw search query --keywords="*" --limit=5
|
|
835
|
+
|
|
836
|
+
# ❌ WRONG - Don't use unquoted * (shell expands to file names)
|
|
837
|
+
pvw search query --keywords=* --limit=5
|
|
838
|
+
# This causes: "Error: Got unexpected extra arguments (dist doc ...)"
|
|
839
|
+
```
|
|
840
|
+
|
|
841
|
+
```bash
|
|
842
|
+
# Faceted search with aggregation
|
|
843
|
+
pvw search query --keywords="finance" --facetFields="objectType,classification" --limit=5
|
|
844
|
+
|
|
845
|
+
# Browse entities by type and path
|
|
846
|
+
pvw search browse --entityType="Tables" --path="/root/finance" --limit=2
|
|
847
|
+
|
|
848
|
+
# Time-based search for assets created after a date
|
|
849
|
+
pvw search query --keywords="audit" --createdAfter="2024-01-01" --limit=1
|
|
850
|
+
|
|
851
|
+
# Entity type specific search
|
|
852
|
+
pvw search query --keywords="finance" --entityTypes="Files,Tables" --limit=2
|
|
853
|
+
```
|
|
854
|
+
|
|
855
|
+
#### **Usage Scenarios**
|
|
856
|
+
|
|
857
|
+
- **Daily browsing**: Use default table format for quick scans
|
|
858
|
+
- **Understanding assets**: Use `--detailed` for rich information panels
|
|
859
|
+
- **Technical work**: Use `--json` for complete API data access
|
|
860
|
+
- **Entity operations**: Use `--show-ids` to get GUIDs for updates
|
|
861
|
+
|
|
862
|
+
### Python Usage Example
|
|
863
|
+
|
|
864
|
+
```python
|
|
865
|
+
from purviewcli.client._search import Search
|
|
866
|
+
|
|
867
|
+
search = Search()
|
|
868
|
+
args = {"--keywords": "customer", "--limit": 5}
|
|
869
|
+
search.searchQuery(args)
|
|
870
|
+
print(search.payload) # Shows the constructed search payload
|
|
871
|
+
```
|
|
872
|
+
|
|
873
|
+
### Test Examples
|
|
874
|
+
|
|
875
|
+
See `tests/test_search_examples.py` for ready-to-run pytest examples covering all search scenarios:
|
|
876
|
+
|
|
877
|
+
- Basic query
|
|
878
|
+
- Advanced filter
|
|
879
|
+
- Autocomplete
|
|
880
|
+
- Suggest
|
|
881
|
+
- Faceted search
|
|
882
|
+
- Browse
|
|
883
|
+
- Time-based search
|
|
884
|
+
- Entity type search
|
|
885
|
+
|
|
886
|
+
---
|
|
887
|
+
|
|
888
|
+
## Unified Catalog Management (NEW)
|
|
889
|
+
|
|
890
|
+
PVW CLI now includes comprehensive **Microsoft Purview Unified Catalog (UC)** support with the new `uc` command group. This provides complete management of modern data governance features including governance domains, glossary terms, data products, objectives (OKRs), and critical data elements.
|
|
891
|
+
|
|
892
|
+
**🎯 Feature Parity**: Full compatibility with [UnifiedCatalogPy](https://github.com/olafwrieden/unifiedcatalogpy) functionality.
|
|
893
|
+
|
|
894
|
+
See [`doc/commands/unified-catalog.md`](doc/commands/unified-catalog.md) for complete documentation and examples.
|
|
895
|
+
|
|
896
|
+
### Quick UC Examples
|
|
897
|
+
|
|
898
|
+
#### **Governance Domains Management**
|
|
899
|
+
|
|
900
|
+
```bash
|
|
901
|
+
# List all governance domains
|
|
902
|
+
pvw uc domain list
|
|
903
|
+
|
|
904
|
+
# Create a new governance domain
|
|
905
|
+
pvw uc domain create --name "Finance" --description "Financial data governance domain"
|
|
906
|
+
|
|
907
|
+
# Get domain details
|
|
908
|
+
pvw uc domain get --domain-id "abc-123-def-456"
|
|
909
|
+
|
|
910
|
+
# Update domain information
|
|
911
|
+
pvw uc domain update --domain-id "abc-123" --description "Updated financial governance"
|
|
912
|
+
```
|
|
913
|
+
|
|
914
|
+
#### **Glossary Terms in UC**
|
|
915
|
+
|
|
916
|
+
```bash
|
|
917
|
+
# List all terms in a domain
|
|
918
|
+
pvw uc term list --domain-id "abc-123"
|
|
919
|
+
pvw uc term list --domain-id "abc-123" --output json # Plain JSON for scripting
|
|
920
|
+
pvw uc term list --domain-id "abc-123" --output jsonc # Colored JSON for viewing
|
|
921
|
+
|
|
922
|
+
# Create a single glossary term
|
|
923
|
+
pvw uc term create --name "Customer" --domain-id "abc-123" --description "A person or entity that purchases products"
|
|
924
|
+
|
|
925
|
+
# Get term details
|
|
926
|
+
pvw uc term show --term-id "term-456"
|
|
927
|
+
|
|
928
|
+
# Update term
|
|
929
|
+
pvw uc term update --term-id "term-456" --description "Updated description"
|
|
930
|
+
|
|
931
|
+
# Delete term
|
|
932
|
+
pvw uc term delete --term-id "term-456" --confirm
|
|
933
|
+
```
|
|
934
|
+
|
|
935
|
+
**📦 Bulk Import (NEW)**
|
|
936
|
+
|
|
937
|
+
Import multiple terms from CSV or JSON files with validation and progress tracking:
|
|
938
|
+
|
|
939
|
+
```bash
|
|
940
|
+
# CSV Import - Preview with dry-run
|
|
941
|
+
pvw uc term import-csv --csv-file "samples/csv/uc_terms_bulk_example.csv" --domain-id "abc-123" --dry-run
|
|
942
|
+
|
|
943
|
+
# CSV Import - Actual import
|
|
944
|
+
pvw uc term import-csv --csv-file "samples/csv/uc_terms_bulk_example.csv" --domain-id "abc-123"
|
|
945
|
+
|
|
946
|
+
# JSON Import - Preview with dry-run
|
|
947
|
+
pvw uc term import-json --json-file "samples/json/term/uc_terms_bulk_example.json" --dry-run
|
|
948
|
+
|
|
949
|
+
# JSON Import - Actual import (domain_id from JSON or override with flag)
|
|
950
|
+
pvw uc term import-json --json-file "samples/json/term/uc_terms_bulk_example.json"
|
|
951
|
+
pvw uc term import-json --json-file "samples/json/term/uc_terms_bulk_example.json" --domain-id "abc-123"
|
|
952
|
+
```
|
|
953
|
+
|
|
954
|
+
**Bulk Import Features:**
|
|
955
|
+
|
|
956
|
+
- [OK] Import from CSV or JSON files
|
|
957
|
+
- [OK] Dry-run mode to preview before importing
|
|
958
|
+
- [OK] Support for multiple owners (Entra ID Object IDs), acronyms, and resources
|
|
959
|
+
- [OK] Progress tracking with Rich console output
|
|
960
|
+
- [OK] Detailed error messages and summary reports
|
|
961
|
+
- [OK] Sequential POST requests (no native bulk endpoint available)
|
|
962
|
+
|
|
963
|
+
**CSV Format Example:**
|
|
964
|
+
|
|
965
|
+
```csv
|
|
966
|
+
name,description,status,acronym,owner_id,resource_name,resource_url
|
|
967
|
+
Customer Acquisition Cost,Cost to acquire new customer,Draft,CAC,<guid>,Metrics Guide,https://docs.example.com
|
|
968
|
+
Monthly Recurring Revenue,Predictable monthly revenue,Draft,MRR,<guid>,Finance Dashboard,https://finance.example.com
|
|
969
|
+
```
|
|
970
|
+
|
|
971
|
+
**JSON Format Example:**
|
|
972
|
+
|
|
973
|
+
```json
|
|
974
|
+
{
|
|
975
|
+
"terms": [
|
|
976
|
+
{
|
|
977
|
+
"name": "Data Lake",
|
|
978
|
+
"description": "Centralized repository for structured/unstructured data",
|
|
979
|
+
"domain_id": "your-domain-id-here",
|
|
980
|
+
"status": "Draft",
|
|
981
|
+
"acronyms": ["DL"],
|
|
982
|
+
"owner_ids": ["<entra-id-object-id-guid>"],
|
|
983
|
+
"resources": [{"name": "Architecture Guide", "url": "https://example.com"}]
|
|
984
|
+
}
|
|
985
|
+
]
|
|
986
|
+
}
|
|
987
|
+
```
|
|
988
|
+
|
|
989
|
+
**Important Notes:**
|
|
990
|
+
|
|
991
|
+
- ⚠️ **Owner IDs must be Entra ID Object IDs (GUIDs)**, not email addresses
|
|
992
|
+
- ⚠️ **Terms cannot be "Published" in unpublished domains** - use "Draft" status
|
|
993
|
+
- [OK] Sample files available: `samples/csv/uc_terms_bulk_example.csv`, `samples/json/term/uc_terms_bulk_example.json`
|
|
994
|
+
- 📖 Complete documentation: [`doc/commands/unified-catalog/term-bulk-import.md`](doc/commands/unified-catalog/term-bulk-import.md)
|
|
995
|
+
|
|
996
|
+
**🗑️ Bulk Delete (NEW)**
|
|
997
|
+
|
|
998
|
+
Delete all terms in a domain using PowerShell or Python scripts:
|
|
999
|
+
|
|
1000
|
+
```powershell
|
|
1001
|
+
# PowerShell - Delete all terms with confirmation
|
|
1002
|
+
.\scripts\delete-all-uc-terms.ps1 -DomainId "abc-123"
|
|
1003
|
+
|
|
1004
|
+
# PowerShell - Delete without confirmation
|
|
1005
|
+
.\scripts\delete-all-uc-terms.ps1 -DomainId "abc-123" -Force
|
|
1006
|
+
```
|
|
1007
|
+
|
|
1008
|
+
```bash
|
|
1009
|
+
# Python - Delete all terms with confirmation
|
|
1010
|
+
python scripts/delete_all_uc_terms_v2.py --domain-id "abc-123"
|
|
1011
|
+
|
|
1012
|
+
# Python - Delete without confirmation
|
|
1013
|
+
python scripts/delete_all_uc_terms_v2.py --domain-id "abc-123" --force
|
|
1014
|
+
```
|
|
1015
|
+
|
|
1016
|
+
**Bulk Delete Features:**
|
|
1017
|
+
|
|
1018
|
+
- [OK] Interactive confirmation prompts (type "DELETE" to confirm)
|
|
1019
|
+
- [OK] Beautiful progress display with colors
|
|
1020
|
+
- [OK] Success/failure tracking per term
|
|
1021
|
+
- [OK] Detailed summary reports
|
|
1022
|
+
- [OK] Rate limiting (200ms delay between deletes)
|
|
1023
|
+
- [OK] Graceful error handling and Ctrl+C support
|
|
1024
|
+
|
|
1025
|
+
#### **Data Products Management**
|
|
1026
|
+
|
|
1027
|
+
```bash
|
|
1028
|
+
# List all data products in a domain
|
|
1029
|
+
pvw uc dataproduct list --domain-id "abc-123"
|
|
1030
|
+
|
|
1031
|
+
# Create a comprehensive data product
|
|
1032
|
+
pvw uc dataproduct create \
|
|
1033
|
+
--name "Customer Analytics Dashboard" \
|
|
1034
|
+
--domain-id "abc-123" \
|
|
1035
|
+
--description "360-degree customer analytics with behavioral insights" \
|
|
1036
|
+
--type Analytical \
|
|
1037
|
+
--status Draft
|
|
1038
|
+
|
|
1039
|
+
# Get detailed data product information
|
|
1040
|
+
pvw uc dataproduct show --product-id "prod-789"
|
|
1041
|
+
|
|
1042
|
+
# Update data product (partial updates supported - only specify fields to change)
|
|
1043
|
+
pvw uc dataproduct update \
|
|
1044
|
+
--product-id "prod-789" \
|
|
1045
|
+
--status Published \
|
|
1046
|
+
--description "Updated comprehensive customer analytics" \
|
|
1047
|
+
--endorsed
|
|
1048
|
+
|
|
1049
|
+
# Update multiple fields at once
|
|
1050
|
+
pvw uc dataproduct update \
|
|
1051
|
+
--product-id "prod-789" \
|
|
1052
|
+
--status Published \
|
|
1053
|
+
--update-frequency Monthly \
|
|
1054
|
+
--endorsed
|
|
1055
|
+
|
|
1056
|
+
# Delete a data product (with confirmation)
|
|
1057
|
+
pvw uc dataproduct delete --product-id "prod-789"
|
|
1058
|
+
|
|
1059
|
+
# Delete without confirmation prompt
|
|
1060
|
+
pvw uc dataproduct delete --product-id "prod-789" --yes
|
|
1061
|
+
```
|
|
1062
|
+
|
|
1063
|
+
#### **Objectives & Key Results (OKRs)**
|
|
1064
|
+
|
|
1065
|
+
```bash
|
|
1066
|
+
# List objectives for a domain
|
|
1067
|
+
pvw uc objective list --domain-id "abc-123"
|
|
1068
|
+
|
|
1069
|
+
# Create measurable objectives
|
|
1070
|
+
pvw uc objective create \
|
|
1071
|
+
--definition "Improve data quality score by 25% within Q4" \
|
|
1072
|
+
--domain-id "abc-123" \
|
|
1073
|
+
--target-value "95" \
|
|
1074
|
+
--measurement-unit "percentage"
|
|
1075
|
+
|
|
1076
|
+
# Track objective progress
|
|
1077
|
+
pvw uc objective update \
|
|
1078
|
+
--objective-id "obj-456" \
|
|
1079
|
+
--domain-id "abc-123" \
|
|
1080
|
+
--current-value "87" \
|
|
1081
|
+
--status "in-progress"
|
|
1082
|
+
```
|
|
1083
|
+
|
|
1084
|
+
#### **Critical Data Elements (CDEs)**
|
|
1085
|
+
|
|
1086
|
+
```bash
|
|
1087
|
+
# List critical data elements
|
|
1088
|
+
pvw uc cde list --domain-id "abc-123"
|
|
1089
|
+
|
|
1090
|
+
# Define critical data elements with governance rules
|
|
1091
|
+
pvw uc cde create \
|
|
1092
|
+
--name "Social Security Number" \
|
|
1093
|
+
--data-type "String" \
|
|
1094
|
+
--domain-id "abc-123" \
|
|
1095
|
+
--classification "PII" \
|
|
1096
|
+
--retention-period "7-years"
|
|
1097
|
+
|
|
1098
|
+
# Associate CDEs with data assets
|
|
1099
|
+
pvw uc cde link \
|
|
1100
|
+
--cde-id "cde-789" \
|
|
1101
|
+
--domain-id "abc-123" \
|
|
1102
|
+
--asset-id "ea3412c3-7387-4bc1-9923-11f6f6f60000"
|
|
1103
|
+
```
|
|
1104
|
+
|
|
1105
|
+
#### **Health Monitoring (NEW)**
|
|
1106
|
+
|
|
1107
|
+
Monitor governance health and get automated recommendations to improve your data governance posture.
|
|
1108
|
+
|
|
1109
|
+
```bash
|
|
1110
|
+
# List all health findings and recommendations
|
|
1111
|
+
pvw uc health query
|
|
1112
|
+
|
|
1113
|
+
# Filter by severity
|
|
1114
|
+
pvw uc health query --severity High
|
|
1115
|
+
pvw uc health query --severity Medium
|
|
1116
|
+
|
|
1117
|
+
# Filter by status
|
|
1118
|
+
pvw uc health query --status NotStarted
|
|
1119
|
+
pvw uc health query --status InProgress
|
|
1120
|
+
|
|
1121
|
+
# Get detailed information about a specific health action
|
|
1122
|
+
pvw uc health show --action-id "5ea3fc78-6a77-4098-8779-ed81de6f87c9"
|
|
1123
|
+
|
|
1124
|
+
# Update health action status
|
|
1125
|
+
pvw uc health update \
|
|
1126
|
+
--action-id "5ea3fc78-6a77-4098-8779-ed81de6f87c9" \
|
|
1127
|
+
--status InProgress \
|
|
1128
|
+
--reason "Working on assigning glossary terms to data products"
|
|
1129
|
+
|
|
1130
|
+
# Get health summary statistics
|
|
1131
|
+
pvw uc health summary
|
|
1132
|
+
|
|
1133
|
+
# Output health findings in JSON format
|
|
1134
|
+
pvw uc health query --json
|
|
1135
|
+
```
|
|
1136
|
+
|
|
1137
|
+
**Health Finding Types:**
|
|
1138
|
+
|
|
1139
|
+
- Missing glossary terms on data products (High)
|
|
1140
|
+
- Data products without OKRs (Medium)
|
|
1141
|
+
- Missing data quality scores (Medium)
|
|
1142
|
+
- Classification gaps on data assets (Medium)
|
|
1143
|
+
- Description quality issues (Medium)
|
|
1144
|
+
- Business domains without critical data entities (Medium)
|
|
1145
|
+
|
|
1146
|
+
#### **Workflow Management (NEW)**
|
|
1147
|
+
|
|
1148
|
+
Manage approval workflows and business process automation in Purview.
|
|
1149
|
+
|
|
1150
|
+
```bash
|
|
1151
|
+
# List all workflows
|
|
1152
|
+
pvw workflow list
|
|
1153
|
+
|
|
1154
|
+
# Get workflow details
|
|
1155
|
+
pvw workflow get --workflow-id "workflow-123"
|
|
1156
|
+
|
|
1157
|
+
# Create a new workflow (requires JSON definition)
|
|
1158
|
+
pvw workflow create --workflow-id "approval-flow-1" --payload-file workflow-definition.json
|
|
1159
|
+
|
|
1160
|
+
# Execute a workflow
|
|
1161
|
+
pvw workflow execute --workflow-id "workflow-123"
|
|
1162
|
+
|
|
1163
|
+
# List workflow executions
|
|
1164
|
+
pvw workflow executions --workflow-id "workflow-123"
|
|
1165
|
+
|
|
1166
|
+
# View specific execution details
|
|
1167
|
+
pvw workflow execution-details --workflow-id "workflow-123" --execution-id "exec-456"
|
|
1168
|
+
|
|
1169
|
+
# Update workflow configuration
|
|
1170
|
+
pvw workflow update --workflow-id "workflow-123" --payload-file updated-workflow.json
|
|
1171
|
+
|
|
1172
|
+
# Delete a workflow
|
|
1173
|
+
pvw workflow delete --workflow-id "workflow-123"
|
|
1174
|
+
|
|
1175
|
+
# Output workflows in JSON format
|
|
1176
|
+
pvw workflow list --json
|
|
1177
|
+
```
|
|
1178
|
+
|
|
1179
|
+
**Workflow Use Cases:**
|
|
1180
|
+
|
|
1181
|
+
- Data access request approvals
|
|
1182
|
+
- Glossary term certification workflows
|
|
1183
|
+
- Data product publishing approvals
|
|
1184
|
+
- Classification review processes
|
|
1185
|
+
|
|
1186
|
+
#### **Integrated Workflow Example**
|
|
1187
|
+
|
|
1188
|
+
```bash
|
|
1189
|
+
# 1. Discover assets to govern
|
|
1190
|
+
pvw search query --keywords="customer" --detailed
|
|
1191
|
+
|
|
1192
|
+
# 2. Create governance domain for discovered assets
|
|
1193
|
+
pvw uc domain create --name "Customer Data" --description "Customer information governance"
|
|
1194
|
+
|
|
1195
|
+
# 3. Define governance terms
|
|
1196
|
+
pvw uc term create --name "Customer PII" --domain-id "new-domain-id" --definition "Personal customer information"
|
|
1197
|
+
|
|
1198
|
+
# 4. Create data product from discovered assets
|
|
1199
|
+
pvw uc dataproduct create --name "Customer Master Data" --domain-id "new-domain-id"
|
|
1200
|
+
|
|
1201
|
+
# 5. Set governance objectives
|
|
1202
|
+
pvw uc objective create --definition "Ensure 100% PII classification compliance" --domain-id "new-domain-id"
|
|
1203
|
+
```
|
|
1204
|
+
|
|
1205
|
+
---
|
|
1206
|
+
|
|
1207
|
+
## Entity Management & Updates
|
|
1208
|
+
|
|
1209
|
+
PVW CLI provides comprehensive entity management capabilities for updating Purview assets like descriptions, classifications, and custom attributes.
|
|
1210
|
+
|
|
1211
|
+
### **Entity Update Examples**
|
|
1212
|
+
|
|
1213
|
+
#### **Update Asset Descriptions**
|
|
1214
|
+
|
|
1215
|
+
```bash
|
|
1216
|
+
# Update table description using GUID
|
|
1217
|
+
pvw entity update-attribute \
|
|
1218
|
+
--guid "ece43ce5-ac45-4e50-a4d0-365a64299efc" \
|
|
1219
|
+
--attribute "description" \
|
|
1220
|
+
--value "Updated customer data warehouse table with enhanced analytics"
|
|
1221
|
+
|
|
1222
|
+
# Update dataset description using qualified name
|
|
1223
|
+
pvw entity update-attribute \
|
|
1224
|
+
--qualifiedName "https://app.powerbi.com/groups/abc-123/datasets/def-456" \
|
|
1225
|
+
--attribute "description" \
|
|
1226
|
+
--value "Power BI dataset for customer analytics dashboard"
|
|
1227
|
+
```
|
|
1228
|
+
|
|
1229
|
+
#### **Bulk Entity Operations**
|
|
1230
|
+
|
|
1231
|
+
```bash
|
|
1232
|
+
# Read entity details before updating
|
|
1233
|
+
pvw entity read-by-attribute \
|
|
1234
|
+
--guid "ea3412c3-7387-4bc1-9923-11f6f6f60000" \
|
|
1235
|
+
--attribute "description,classifications,customAttributes"
|
|
1236
|
+
|
|
1237
|
+
# Update multiple attributes at once
|
|
1238
|
+
pvw entity update-bulk \
|
|
1239
|
+
--input-file entities_to_update.json \
|
|
1240
|
+
--output-file update_results.json
|
|
1241
|
+
```
|
|
1242
|
+
|
|
1243
|
+
#### **Column-Level Updates**
|
|
1244
|
+
|
|
1245
|
+
```bash
|
|
1246
|
+
# Update specific column descriptions in a table
|
|
1247
|
+
pvw entity update-attribute \
|
|
1248
|
+
--guid "column-guid-123" \
|
|
1249
|
+
--attribute "description" \
|
|
1250
|
+
--value "Customer unique identifier - Primary Key"
|
|
1251
|
+
|
|
1252
|
+
# Add classifications to sensitive columns
|
|
1253
|
+
pvw entity add-classification \
|
|
1254
|
+
--guid "column-guid-456" \
|
|
1255
|
+
--classification "MICROSOFT.PERSONAL.EMAIL"
|
|
1256
|
+
```
|
|
1257
|
+
|
|
1258
|
+
### **Discovery to Update Workflow**
|
|
1259
|
+
|
|
1260
|
+
```bash
|
|
1261
|
+
# 1. Find assets that need updates
|
|
1262
|
+
pvw search query --keywords="customer table" --show-ids --limit=10
|
|
1263
|
+
|
|
1264
|
+
# 2. Get detailed information about a specific asset
|
|
1265
|
+
pvw entity read-by-attribute --guid "FOUND_GUID" --attribute "description,classifications"
|
|
1266
|
+
|
|
1267
|
+
# 3. Update the asset description
|
|
1268
|
+
pvw entity update-attribute \
|
|
1269
|
+
--guid "FOUND_GUID" \
|
|
1270
|
+
--attribute "description" \
|
|
1271
|
+
--value "Updated description based on business requirements"
|
|
1272
|
+
|
|
1273
|
+
# 4. Verify the update
|
|
1274
|
+
pvw search query --keywords="FOUND_GUID" --detailed
|
|
1275
|
+
```
|
|
1276
|
+
|
|
1277
|
+
---
|
|
1278
|
+
|
|
1279
|
+
## Lineage CSV Import & Management
|
|
1280
|
+
|
|
1281
|
+
PVW CLI provides powerful lineage management capabilities including CSV-based bulk import for automating data lineage creation.
|
|
1282
|
+
|
|
1283
|
+
### **Lineage CSV Import**
|
|
1284
|
+
|
|
1285
|
+
Import lineage relationships from CSV files to automate the creation of data flow documentation in Microsoft Purview.
|
|
1286
|
+
|
|
1287
|
+
#### **CSV Format**
|
|
1288
|
+
|
|
1289
|
+
The CSV file must contain the following columns:
|
|
1290
|
+
|
|
1291
|
+
**Required columns:**
|
|
1292
|
+
|
|
1293
|
+
- `source_entity_guid` - GUID of the source entity
|
|
1294
|
+
- `target_entity_guid` - GUID of the target entity
|
|
1295
|
+
|
|
1296
|
+
**Optional columns:**
|
|
1297
|
+
|
|
1298
|
+
- `relationship_type` - Type of relationship (default: "Process")
|
|
1299
|
+
- `process_name` - Name of the transformation process
|
|
1300
|
+
- `description` - Description of the transformation
|
|
1301
|
+
- `confidence_score` - Confidence score (0-1)
|
|
1302
|
+
- `owner` - Process owner
|
|
1303
|
+
- `metadata` - Additional JSON metadata
|
|
1304
|
+
|
|
1305
|
+
**Example CSV:**
|
|
1306
|
+
|
|
1307
|
+
```csv
|
|
1308
|
+
source_entity_guid,target_entity_guid,relationship_type,process_name,description,confidence_score,owner,metadata
|
|
1309
|
+
dcfc99ed-c74d-49aa-bd0b-72f6f6f60000,1db9c650-acfb-4914-8bc5-1cf6f6f60000,Process,Transform_Product_Data,Transform product data for analytics,0.95,data-engineering,"{""tool"": ""Azure Data Factory""}"
|
|
1310
|
+
```
|
|
1311
|
+
|
|
1312
|
+
#### **Lineage Commands**
|
|
1313
|
+
|
|
1314
|
+
```bash
|
|
1315
|
+
# Validate CSV format before import (no API calls)
|
|
1316
|
+
pvw lineage validate lineage_data.csv
|
|
1317
|
+
|
|
1318
|
+
# Import lineage relationships from CSV
|
|
1319
|
+
pvw lineage import lineage_data.csv
|
|
1320
|
+
|
|
1321
|
+
# Generate sample CSV file with examples
|
|
1322
|
+
pvw lineage sample output.csv --num-samples 10 --template detailed
|
|
1323
|
+
|
|
1324
|
+
# View available CSV templates
|
|
1325
|
+
pvw lineage templates
|
|
1326
|
+
```
|
|
1327
|
+
|
|
1328
|
+
#### **Available Templates**
|
|
1329
|
+
|
|
1330
|
+
- **`basic`** - Minimal columns (source, target, process name)
|
|
1331
|
+
- **`detailed`** - All columns including metadata and confidence scores
|
|
1332
|
+
- **`qualified_names`** - Use qualified names instead of GUIDs
|
|
1333
|
+
|
|
1334
|
+
#### **Workflow Example**
|
|
1335
|
+
|
|
1336
|
+
```bash
|
|
1337
|
+
# 1. Find entity GUIDs using search
|
|
1338
|
+
pvw search find-table --name "Product" --schema "dbo" --id-only
|
|
1339
|
+
|
|
1340
|
+
# 2. Create CSV file with lineage relationships
|
|
1341
|
+
# (use the GUIDs from step 1)
|
|
1342
|
+
|
|
1343
|
+
# 3. Validate CSV format
|
|
1344
|
+
pvw lineage validate my_lineage.csv
|
|
1345
|
+
# Output: SUCCESS: Lineage validation passed (5 rows, 8 columns)
|
|
1346
|
+
|
|
1347
|
+
# 4. Import to Purview
|
|
1348
|
+
pvw lineage import my_lineage.csv
|
|
1349
|
+
# Output: SUCCESS: Lineage import completed successfully
|
|
1350
|
+
```
|
|
1351
|
+
|
|
1352
|
+
#### **Advanced Features**
|
|
1353
|
+
|
|
1354
|
+
- **GUID Validation**: Automatic validation of GUID format with helpful error messages
|
|
1355
|
+
- **Process Entity Creation**: Creates intermediate "Process" entities to link source→target relationships
|
|
1356
|
+
- **Metadata Support**: Add custom JSON metadata to each lineage relationship
|
|
1357
|
+
- **Dry-Run Validation**: Validate CSV format locally before making API calls
|
|
1358
|
+
|
|
1359
|
+
**For detailed documentation, see:** [`doc/guides/lineage-csv-import.md`](doc/guides/lineage-csv-import.md)
|
|
1360
|
+
|
|
1361
|
+
---
|
|
1362
|
+
|
|
1363
|
+
## Data Product Management (Legacy)
|
|
1364
|
+
|
|
1365
|
+
PVW CLI also includes the original `data-product` command group for backward compatibility with traditional data product lifecycle management.
|
|
1366
|
+
|
|
1367
|
+
See [`doc/commands/data-product.md`](doc/commands/data-product.md) for full documentation and examples.
|
|
1368
|
+
|
|
1369
|
+
### Example Commands
|
|
1370
|
+
|
|
1371
|
+
```bash
|
|
1372
|
+
# Create a data product
|
|
1373
|
+
pvw data-product create --qualified-name="product.test.1" --name="Test Product" --description="A test data product"
|
|
1374
|
+
|
|
1375
|
+
# Add classification and label
|
|
1376
|
+
pvw data-product add-classification --qualified-name="product.test.1" --classification="PII"
|
|
1377
|
+
pvw data-product add-label --qualified-name="product.test.1" --label="gold"
|
|
1378
|
+
|
|
1379
|
+
# Link glossary term
|
|
1380
|
+
pvw data-product link-glossary --qualified-name="product.test.1" --term="Customer"
|
|
1381
|
+
|
|
1382
|
+
# Set status and show lineage
|
|
1383
|
+
pvw data-product set-status --qualified-name="product.test.1" --status="active"
|
|
1384
|
+
pvw data-product show-lineage --qualified-name="product.test.1"
|
|
1385
|
+
```
|
|
1386
|
+
|
|
1387
|
+
---
|
|
1388
|
+
|
|
1389
|
+
## Core Features
|
|
1390
|
+
|
|
1391
|
+
- **Unified Catalog (UC)**: Complete modern data governance (NEW)
|
|
1392
|
+
|
|
1393
|
+
```bash
|
|
1394
|
+
# Manage governance domains, terms, data products, OKRs, CDEs
|
|
1395
|
+
pvw uc domain list
|
|
1396
|
+
pvw uc term create --name "Customer" --domain-id "abc-123"
|
|
1397
|
+
pvw uc objective create --definition "Improve quality" --domain-id "abc-123"
|
|
1398
|
+
```
|
|
1399
|
+
|
|
1400
|
+
- **Discovery Query/Search**: Flexible, advanced search for all catalog assets
|
|
1401
|
+
- **Entity Management**: Bulk import/export, update, and validation
|
|
1402
|
+
- **Glossary Management**: Import/export terms, assign terms in bulk
|
|
1403
|
+
|
|
1404
|
+
```bash
|
|
1405
|
+
# List all terms in a glossary
|
|
1406
|
+
pvw glossary list-terms --glossary-guid "your-glossary-guid"
|
|
1407
|
+
|
|
1408
|
+
# Create and manage glossary terms
|
|
1409
|
+
pvw glossary create-term --payload-file term.json
|
|
1410
|
+
```
|
|
1411
|
+
|
|
1412
|
+
- **Lineage Operations**: Lineage discovery, CSV-based bulk lineage import/export
|
|
1413
|
+
|
|
1414
|
+
```bash
|
|
1415
|
+
# Import lineage relationships from CSV
|
|
1416
|
+
pvw lineage import lineage_data.csv
|
|
1417
|
+
|
|
1418
|
+
# Validate CSV format before import
|
|
1419
|
+
pvw lineage validate lineage_data.csv
|
|
1420
|
+
|
|
1421
|
+
# Generate sample CSV file
|
|
1422
|
+
pvw lineage sample output.csv --num-samples 10
|
|
1423
|
+
```
|
|
1424
|
+
|
|
1425
|
+
- **Monitoring & Analytics**: Real-time dashboards, metrics, and reporting
|
|
1426
|
+
- **Plugin System**: Extensible with custom plugins
|
|
1427
|
+
|
|
1428
|
+
---
|
|
1429
|
+
|
|
1430
|
+
## API Coverage and Support
|
|
1431
|
+
|
|
1432
|
+
PVW CLI provides comprehensive automation for all major Microsoft Purview APIs, including the new **Unified Catalog APIs** for modern data governance.
|
|
1433
|
+
|
|
1434
|
+
### Supported API Groups
|
|
1435
|
+
|
|
1436
|
+
- **Unified Catalog**: Complete governance domains, glossary terms, data products, OKRs, CDEs management [OK]
|
|
1437
|
+
- **Health Monitoring**: Automated governance health checks and recommendations [OK] NEW
|
|
1438
|
+
- **Workflows**: Approval workflows and business process automation [OK] NEW
|
|
1439
|
+
- **Data Map**: Full entity and lineage management [OK]
|
|
1440
|
+
- **Discovery**: Advanced search, browse, and query capabilities [OK]
|
|
1441
|
+
- **Collections**: Collection and account management [OK]
|
|
1442
|
+
- **Management**: Administrative operations [OK]
|
|
1443
|
+
- **Scan**: Data source scanning and configuration [OK]
|
|
1444
|
+
|
|
1445
|
+
### API Version Support
|
|
1446
|
+
|
|
1447
|
+
- **Unified Catalog**: Latest UC API endpoints (September 2025)
|
|
1448
|
+
- Data Map: **2024-03-01-preview** (default) or **2023-09-01** (stable)
|
|
1449
|
+
- Collections: **2019-11-01-preview**
|
|
1450
|
+
- Account: **2019-11-01-preview**
|
|
1451
|
+
- Management: **2021-07-01**
|
|
1452
|
+
- Scan: **2018-12-01-preview**
|
|
1453
|
+
|
|
1454
|
+
For the latest API documentation and updates, see:
|
|
1455
|
+
|
|
1456
|
+
- [Microsoft Purview REST API reference](https://learn.microsoft.com/en-us/rest/api/purview/)
|
|
1457
|
+
- [Atlas 2.2 API documentation](https://learn.microsoft.com/en-us/purview/data-gov-api-atlas-2-2)
|
|
1458
|
+
- [Azure Updates](https://azure.microsoft.com/updates/) for new releases
|
|
1459
|
+
|
|
1460
|
+
If you need a feature that is not yet implemented, please open an issue or check for updates in future releases.
|
|
1461
|
+
|
|
1462
|
+
---
|
|
1463
|
+
|
|
1464
|
+
## Sample Files & Scripts
|
|
1465
|
+
|
|
1466
|
+
PVW CLI includes comprehensive sample files and scripts for bulk operations:
|
|
1467
|
+
|
|
1468
|
+
### Bulk Import Samples
|
|
1469
|
+
|
|
1470
|
+
- **CSV Samples:** `samples/csv/uc_terms_bulk_example.csv` (8 sample terms)
|
|
1471
|
+
- **JSON Samples:**
|
|
1472
|
+
- `samples/json/term/uc_terms_bulk_example.json` (8 data management terms)
|
|
1473
|
+
- `samples/json/term/uc_terms_sample.json` (8 business terms)
|
|
1474
|
+
- **Lineage CSV Samples:** `samples/csv/lineage_example.csv` - Multiple lineage relationships with metadata
|
|
1475
|
+
|
|
1476
|
+
### Lineage Documentation
|
|
1477
|
+
|
|
1478
|
+
- **Comprehensive Guide:** `doc/guides/lineage-csv-import.md` - Complete lineage CSV import documentation
|
|
1479
|
+
- CSV format specification with required/optional columns
|
|
1480
|
+
- Command examples for validate, import, sample, templates
|
|
1481
|
+
- Workflow recommendations and troubleshooting
|
|
1482
|
+
- Advanced scenarios with metadata and multiple transformations
|
|
1483
|
+
|
|
1484
|
+
### Bulk Delete Scripts
|
|
1485
|
+
|
|
1486
|
+
- **PowerShell:** `scripts/delete-all-uc-terms.ps1` - Full-featured with confirmation prompts
|
|
1487
|
+
- **Python:** `scripts/delete_all_uc_terms_v2.py` - Rich progress bars and error handling
|
|
1488
|
+
|
|
1489
|
+
### Test Scripts
|
|
1490
|
+
|
|
1491
|
+
- **PowerShell:** `scripts/test-json-output.ps1` - Validates JSON output parsing
|
|
1492
|
+
|
|
1493
|
+
### Jupyter Notebooks
|
|
1494
|
+
|
|
1495
|
+
- `samples/notebooks (plus)/unified_catalog_terms_examples.ipynb` - Complete examples including:
|
|
1496
|
+
- Examples 10-16: Bulk import demonstrations
|
|
1497
|
+
- Code generation for CSV/JSON files
|
|
1498
|
+
- Dry-run and actual import examples
|
|
1499
|
+
- Term verification workflows
|
|
1500
|
+
|
|
1501
|
+
---
|
|
1502
|
+
|
|
1503
|
+
## Documentation
|
|
1504
|
+
|
|
1505
|
+
### Core Documentation
|
|
1506
|
+
|
|
1507
|
+
- **Main Documentation:** [`doc/README.md`](doc/README.md)
|
|
1508
|
+
- **Unified Catalog:** [`doc/commands/unified-catalog.md`](doc/commands/unified-catalog.md)
|
|
1509
|
+
- **Bulk Import Guide:** [`doc/commands/unified-catalog/term-bulk-import.md`](doc/commands/unified-catalog/term-bulk-import.md)
|
|
1510
|
+
- **Data Products:** [`doc/commands/data-product.md`](doc/commands/data-product.md)
|
|
1511
|
+
|
|
1512
|
+
### Quick Reference
|
|
1513
|
+
|
|
1514
|
+
- **API Coverage:** All major Purview APIs including Unified Catalog, Data Map, Discovery, Collections
|
|
1515
|
+
- **Authentication:** Azure CLI, Service Principal, Managed Identity support
|
|
1516
|
+
- **Output Formats:** Table (default), JSON (plain), JSONC (colored)
|
|
1517
|
+
- **Bulk Operations:** Import/export terms from CSV/JSON, bulk delete scripts
|
|
1518
|
+
|
|
1519
|
+
---
|
|
1520
|
+
|
|
1521
|
+
## Recent Updates (October 2025)
|
|
1522
|
+
|
|
1523
|
+
### Bulk Term Import/Export
|
|
1524
|
+
|
|
1525
|
+
- Import multiple terms from CSV or JSON files
|
|
1526
|
+
- Dry-run mode for validation before import
|
|
1527
|
+
- Support for owners (Entra ID GUIDs), acronyms, resources
|
|
1528
|
+
- Progress tracking and detailed error reporting
|
|
1529
|
+
- 100% success rate in testing (8/8 terms)
|
|
1530
|
+
|
|
1531
|
+
### PowerShell & Scripting Integration
|
|
1532
|
+
|
|
1533
|
+
- New `--output` parameter with table/json/jsonc formats
|
|
1534
|
+
- Plain JSON works with PowerShell's `ConvertFrom-Json`
|
|
1535
|
+
- Compatible with jq, Python json module, and other tools
|
|
1536
|
+
- Migration from deprecated `--json` flag
|
|
1537
|
+
|
|
1538
|
+
### Bulk Delete Scripts
|
|
1539
|
+
|
|
1540
|
+
- PowerShell script with interactive confirmation ("DELETE" to confirm)
|
|
1541
|
+
- Python script with Rich progress bars
|
|
1542
|
+
- Beautiful UI with colored output
|
|
1543
|
+
- Success/failure tracking per term
|
|
1544
|
+
- Rate limiting (200ms delay)
|
|
1545
|
+
|
|
1546
|
+
### Critical Fixes (v1.2.8)
|
|
1547
|
+
|
|
1548
|
+
- **Search API Suggest/Autocomplete:** Fixed HTTP 400 errors by removing empty filter objects from payload
|
|
1549
|
+
- **Collection Display:** Enhanced collection name detection with proper fallback logic (isinstance checks)
|
|
1550
|
+
- **Owner ID Format:** Must use Entra ID Object IDs (GUIDs), not email addresses
|
|
1551
|
+
- **Domain Status:** Terms cannot be "Published" in unpublished domains - use "Draft"
|
|
1552
|
+
- **Error Validation:** Enhanced error handling shows actual API responses
|
|
1553
|
+
- **Windows Console Compatibility:** All emoji removed for CP-1252 encoding support
|
|
1554
|
+
|
|
1555
|
+
---
|
|
1556
|
+
|
|
1557
|
+
## Key Features Summary
|
|
1558
|
+
|
|
1559
|
+
### **Unified Catalog (UC) - Complete Management**
|
|
1560
|
+
|
|
1561
|
+
- Governance domains, glossary terms, data products
|
|
1562
|
+
- Objectives & Key Results (OKRs), Critical Data Elements (CDEs)
|
|
1563
|
+
- Health monitoring and workflow automation
|
|
1564
|
+
- Full CRUD operations with smart partial updates
|
|
1565
|
+
|
|
1566
|
+
### **Bulk Operations**
|
|
1567
|
+
|
|
1568
|
+
- CSV/JSON import with dry-run validation
|
|
1569
|
+
- PowerShell and Python bulk delete scripts
|
|
1570
|
+
- Progress tracking and error handling
|
|
1571
|
+
- Sample files and templates included
|
|
1572
|
+
|
|
1573
|
+
### **Multiple Output Formats**
|
|
1574
|
+
|
|
1575
|
+
- Table format for human viewing (default)
|
|
1576
|
+
- Plain JSON for PowerShell/bash scripting
|
|
1577
|
+
- Colored JSON for visual inspection
|
|
1578
|
+
|
|
1579
|
+
### **Automation & Integration**
|
|
1580
|
+
|
|
1581
|
+
- Azure CLI, Service Principal, Managed Identity auth
|
|
1582
|
+
- Works in local development, CI/CD, and production
|
|
1583
|
+
- Compatible with PowerShell, bash, Python, jq
|
|
1584
|
+
|
|
1585
|
+
### **Comprehensive Documentation**
|
|
1586
|
+
|
|
1587
|
+
- Complete API coverage documentation
|
|
1588
|
+
- Jupyter notebook examples
|
|
1589
|
+
- Troubleshooting guides
|
|
1590
|
+
- Sample files and templates
|
|
1591
|
+
|
|
1592
|
+
---
|
|
1593
|
+
|
|
1594
|
+
## Contributing & Support
|
|
1595
|
+
|
|
1596
|
+
- **Documentation:** [Full Documentation](https://github.com/Keayoub/Purview_cli/blob/main/doc/README.md)
|
|
1597
|
+
- **Issue Tracker:** [GitHub Issues](https://github.com/Keayoub/Purview_cli/issues)
|
|
1598
|
+
- **Email Support:** [keayoub@msn.com](mailto:keayoub@msn.com)
|
|
1599
|
+
- **Repository:** [GitHub - Keayoub/Purview_cli](https://github.com/Keayoub/Purview_cli)
|
|
1600
|
+
|
|
1601
|
+
---
|
|
1602
|
+
|
|
1603
|
+
## License
|
|
1604
|
+
|
|
1605
|
+
See [LICENSE](LICENSE) file for details.
|
|
1606
|
+
|
|
1607
|
+
---
|
|
1608
|
+
|
|
1609
|
+
**PVW CLI v1.2.8 empowers data engineers, stewards, and architects to automate, scale, and enhance their Microsoft Purview experience with powerful command-line and programmatic capabilities.**
|
|
1610
|
+
|
|
1611
|
+
**Latest in v1.2.8:**
|
|
1612
|
+
|
|
1613
|
+
- Fixed Search API suggest/autocomplete (HTTP 400 errors resolved)
|
|
1614
|
+
- Enhanced collection display with robust fallback logic
|
|
1615
|
+
- Comprehensive search command validation
|
|
1616
|
+
- Bulk term import/export with dry-run support
|
|
1617
|
+
- PowerShell integration with plain JSON output
|
|
1618
|
+
- Multiple output formats and beautiful progress tracking
|