pvw-cli 1.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pvw-cli might be problematic. Click here for more details.

Files changed (60) hide show
  1. purviewcli/__init__.py +27 -0
  2. purviewcli/__main__.py +15 -0
  3. purviewcli/cli/__init__.py +5 -0
  4. purviewcli/cli/account.py +199 -0
  5. purviewcli/cli/cli.py +170 -0
  6. purviewcli/cli/collections.py +502 -0
  7. purviewcli/cli/domain.py +361 -0
  8. purviewcli/cli/entity.py +2436 -0
  9. purviewcli/cli/glossary.py +533 -0
  10. purviewcli/cli/health.py +250 -0
  11. purviewcli/cli/insight.py +113 -0
  12. purviewcli/cli/lineage.py +1103 -0
  13. purviewcli/cli/management.py +141 -0
  14. purviewcli/cli/policystore.py +103 -0
  15. purviewcli/cli/relationship.py +75 -0
  16. purviewcli/cli/scan.py +357 -0
  17. purviewcli/cli/search.py +527 -0
  18. purviewcli/cli/share.py +478 -0
  19. purviewcli/cli/types.py +831 -0
  20. purviewcli/cli/unified_catalog.py +3540 -0
  21. purviewcli/cli/workflow.py +402 -0
  22. purviewcli/client/__init__.py +21 -0
  23. purviewcli/client/_account.py +1877 -0
  24. purviewcli/client/_collections.py +1761 -0
  25. purviewcli/client/_domain.py +414 -0
  26. purviewcli/client/_entity.py +3545 -0
  27. purviewcli/client/_glossary.py +3233 -0
  28. purviewcli/client/_health.py +501 -0
  29. purviewcli/client/_insight.py +2873 -0
  30. purviewcli/client/_lineage.py +2138 -0
  31. purviewcli/client/_management.py +2202 -0
  32. purviewcli/client/_policystore.py +2915 -0
  33. purviewcli/client/_relationship.py +1351 -0
  34. purviewcli/client/_scan.py +2607 -0
  35. purviewcli/client/_search.py +1472 -0
  36. purviewcli/client/_share.py +272 -0
  37. purviewcli/client/_types.py +2708 -0
  38. purviewcli/client/_unified_catalog.py +5112 -0
  39. purviewcli/client/_workflow.py +2734 -0
  40. purviewcli/client/api_client.py +1295 -0
  41. purviewcli/client/business_rules.py +675 -0
  42. purviewcli/client/config.py +231 -0
  43. purviewcli/client/data_quality.py +433 -0
  44. purviewcli/client/endpoint.py +123 -0
  45. purviewcli/client/endpoints.py +554 -0
  46. purviewcli/client/exceptions.py +38 -0
  47. purviewcli/client/lineage_visualization.py +797 -0
  48. purviewcli/client/monitoring_dashboard.py +712 -0
  49. purviewcli/client/rate_limiter.py +30 -0
  50. purviewcli/client/retry_handler.py +125 -0
  51. purviewcli/client/scanning_operations.py +523 -0
  52. purviewcli/client/settings.py +1 -0
  53. purviewcli/client/sync_client.py +250 -0
  54. purviewcli/plugins/__init__.py +1 -0
  55. purviewcli/plugins/plugin_system.py +709 -0
  56. pvw_cli-1.2.8.dist-info/METADATA +1618 -0
  57. pvw_cli-1.2.8.dist-info/RECORD +60 -0
  58. pvw_cli-1.2.8.dist-info/WHEEL +5 -0
  59. pvw_cli-1.2.8.dist-info/entry_points.txt +3 -0
  60. pvw_cli-1.2.8.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1618 @@
1
+ Metadata-Version: 2.4
2
+ Name: pvw-cli
3
+ Version: 1.2.8
4
+ Summary: Microsoft Purview CLI with comprehensive automation capabilities
5
+ Author-email: AYOUB KEBAILI <keayoub@msn.com>
6
+ Maintainer-email: AYOUB KEBAILI <keayoub@msn.com>
7
+ License-Expression: MIT
8
+ Project-URL: Homepage, https://github.com/Keayoub/Purview_cli
9
+ Project-URL: Documentation, https://github.com/Keayoub/Purview_cli/wiki
10
+ Project-URL: Repository, https://github.com/Keayoub/Purview_cli.git
11
+ Project-URL: Bug Tracker, https://github.com/Keayoub/Purview_cli/issues
12
+ Project-URL: Source, https://github.com/Keayoub/Purview_cli
13
+ Keywords: azure,purview,cli,data,catalog,governance,automation,pvw
14
+ Classifier: Development Status :: 4 - Beta
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: Intended Audience :: System Administrators
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.8
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
25
+ Classifier: Topic :: System :: Systems Administration
26
+ Classifier: Topic :: Database
27
+ Classifier: Topic :: Internet :: WWW/HTTP
28
+ Requires-Python: >=3.8
29
+ Description-Content-Type: text/markdown
30
+ Requires-Dist: azure-identity>=1.12.0
31
+ Requires-Dist: azure-core>=1.24.0
32
+ Requires-Dist: click>=8.0.0
33
+ Requires-Dist: rich>=12.0.0
34
+ Requires-Dist: requests>=2.28.0
35
+ Requires-Dist: pandas>=1.5.0
36
+ Requires-Dist: aiohttp>=3.8.0
37
+ Requires-Dist: pydantic<2.12,>=1.10.0
38
+ Requires-Dist: PyYAML>=6.0
39
+ Requires-Dist: cryptography<46.0.0,>=41.0.5
40
+ Provides-Extra: dev
41
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
42
+ Requires-Dist: pytest-asyncio>=0.20.0; extra == "dev"
43
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
44
+ Requires-Dist: black>=22.0.0; extra == "dev"
45
+ Requires-Dist: isort>=5.10.0; extra == "dev"
46
+ Requires-Dist: flake8>=5.0.0; extra == "dev"
47
+ Requires-Dist: mypy>=0.991; extra == "dev"
48
+ Requires-Dist: pre-commit>=2.20.0; extra == "dev"
49
+ Provides-Extra: docs
50
+ Requires-Dist: sphinx>=5.0.0; extra == "docs"
51
+ Requires-Dist: sphinx-rtd-theme>=1.0.0; extra == "docs"
52
+ Requires-Dist: myst-parser>=0.18.0; extra == "docs"
53
+ Provides-Extra: test
54
+ Requires-Dist: pytest>=7.0.0; extra == "test"
55
+ Requires-Dist: pytest-asyncio>=0.20.0; extra == "test"
56
+ Requires-Dist: pytest-cov>=4.0.0; extra == "test"
57
+ Requires-Dist: requests-mock>=1.9.0; extra == "test"
58
+
59
+ # PURVIEW CLI v1.2.8 - Microsoft Purview Automation & Data Governance
60
+
61
+ [![Version](https://img.shields.io/badge/version-1.2.7-blue.svg)](https://github.com/Keayoub/pvw-cli/releases/tag/v1.2.8)
62
+ [![API Coverage](https://img.shields.io/badge/UC%20API%20Coverage-86%25-green.svg)](https://github.com/Keayoub/pvw-cli)
63
+ [![Lineage](https://img.shields.io/badge/Lineage-Enhanced-green.svg)](https://github.com/Keayoub/pvw-cli)
64
+ [![Status](https://img.shields.io/badge/status-stable-success.svg)](https://github.com/Keayoub/pvw-cli)
65
+
66
+ > **LATEST UPDATE v1.2.8 (November 3, 2025):**
67
+ >
68
+ > **🔗 Advanced Lineage Features & Column-Level Mapping**
69
+ >
70
+ > **New Lineage Capabilities:**
71
+ > - **[NEW]** Column-level lineage with multi-target support (1 source → N targets)
72
+ > - **[NEW]** Direct lineage (UI-style) - No visible Process entity
73
+ > - **[NEW]** Dual-mode CSV import - Automatic detection of Process vs Direct lineage
74
+ > - **[NEW]** Column mapping in direct relationships - Granular data flow tracking
75
+ > - **[NEW]** Enhanced error handling with SSL retry strategies
76
+ >
77
+ > **New CLI Commands:**
78
+ > ```bash
79
+ > pvw lineage create-column # Column-level lineage (Process-based)
80
+ > pvw lineage create-direct # Direct lineage (UI-style, no Process)
81
+ > pvw lineage list-column # List column lineages
82
+ > pvw lineage delete-column # Delete column lineage
83
+ > ```
84
+ >
85
+ > **What's New:**
86
+ > - ✅ Column mapping visible in Purview UI
87
+ > - ✅ Compatible with manual UI lineage creation
88
+ > - ✅ Type validation (prevent invalid lineage)
89
+ > - ✅ Batch CSV import with 5 sample files
90
+ > - ✅ Comprehensive documentation & examples
91
+ >
92
+ > **Previous Releases:**
93
+ > - **v1.2.6** - Initial lineage improvements
94
+ > - **v1.2.5** - 86% UC API Coverage (45/52 operations) with Relationships, Query, Policies APIs
95
+ >
96
+ > **[Full Release Notes v1.2.8](releases/v1.2.8.md)** | **[v1.2.5 Release Notes](releases/v1.2.5.md)** | **[Migration Guide](releases/v1.2.8.md#migration-guide)**
97
+
98
+ ---
99
+
100
+ ## What is PVW CLI?
101
+
102
+ **PVW CLI v1.2.8** is a modern, full-featured command-line interface and Python library for Microsoft Purview. It enables automation and management of *all major Purview APIs* with **86% Unified Catalog API coverage** (45 of 52 operations).
103
+
104
+ ### Key Capabilities
105
+
106
+ **Unified Catalog (UC) Management - 86% Complete**
107
+ - Complete governance domains, glossary terms, data products, OKRs, CDEs
108
+ - Relationships API - Link data products/CDEs/terms to entities and columns
109
+ - Query APIs - Advanced OData filtering with multi-criteria search
110
+ - Policy Management - Complete CRUD for governance and RBAC policies
111
+ - Custom Metadata & Attributes - Extensible business metadata and attributes
112
+
113
+ **Data Operations**
114
+ - Entity management (create, update, bulk, import/export)
115
+ - Lineage operations with interactive creation and CSV import
116
+ - Advanced search and discovery with fixed suggest/autocomplete
117
+ - Business metadata with proper scope configuration
118
+
119
+ **Automation & Scripting**
120
+ - Bulk Operations - Import/export from CSV/JSON with dry-run support
121
+ - Scriptable Output - Multiple formats (table, json, jsonc) for PowerShell/bash
122
+ - 80+ usage examples and 15+ comprehensive guides
123
+ - PowerShell integration with ConvertFrom-Json support
124
+
125
+ **Legacy API Support**
126
+ - Collection and account management
127
+ - Data product management (legacy compatibility)
128
+ - Classification, label, and status management
129
+
130
+ The CLI is designed for data engineers, stewards, architects, and platform teams to automate, scale, and enhance their Microsoft Purview experience.
131
+
132
+ ### NEW: MCP Server for AI Assistants
133
+
134
+ **[NEW]** Model Context Protocol (MCP) server enables LLM-powered data governance workflows!
135
+
136
+ - Natural language interface to Purview catalog
137
+ - 20+ tools for AI assistants (Claude, Cline, etc.)
138
+ - Automate complex multi-step operations
139
+ - See `mcp/README.md` for setup instructions
140
+
141
+ ---
142
+
143
+ ## What's New in Recent Releases
144
+
145
+ ### v1.2.8 (November 3, 2025) - Advanced Lineage Features
146
+
147
+ **Column-Level Lineage & Direct Relationships:**
148
+ - Column-level lineage with multi-target support (1→N)
149
+ - Direct lineage creation (UI-style, no visible Process)
150
+ - Dual-mode CSV import with automatic type detection
151
+ - Column mapping in direct relationships
152
+ - Enhanced error handling with SSL retry strategies
153
+
154
+ **New Commands:**
155
+ ```bash
156
+ pvw lineage create-column # Column lineage (Process-based)
157
+ pvw lineage create-direct # Direct lineage (UI-style)
158
+ pvw lineage list-column # List column lineages
159
+ pvw lineage delete-column # Delete lineage
160
+ ```
161
+
162
+ **CSV Import Examples:**
163
+ ```csv
164
+ # Direct lineage with column mapping
165
+ source_entity_guid,target_entity_guid,relationship_type,column_mapping
166
+ guid1,guid2,direct_lineage_dataset_dataset,"[{""Source"":""ID"",""Sink"":""ID""}]"
167
+ ```
168
+
169
+ **[Full v1.2.8 Release Notes](releases/v1.2.8.md)**
170
+
171
+ ---
172
+
173
+ ### v1.2.5 (October 30, 2025) - 86% UC API Coverage
174
+
175
+ Version 1.2.5 achieves **86% coverage** of the Microsoft Purview Unified Catalog API with **35 new operations**:
176
+
177
+ | Resource Type | Coverage | Operations | Status |
178
+ |--------------|----------|------------|---------|
179
+ | **Business Domains** | 100% | 5/5 | ✅ Complete |
180
+ | **Data Products** | 90% | 9/10 | ⚠️ 1 missing (Facets) |
181
+ | **Glossary Terms** | 73% | 8/11 | ⚠️ 3 missing |
182
+ | **Objectives & Key Results** | 92% | 11/12 | ⚠️ 1 missing |
183
+ | **Critical Data Elements** | 90% | 9/10 | ⚠️ 1 missing |
184
+ | **Policies** | 100% | 5/5 | ✅ Complete |
185
+ | **Relationships** | 100% | 6/6 | ✅ Complete |
186
+ | **Query** | 100% | 4/4 | ✅ Complete |
187
+ | **Custom Metadata** | 100% | 5/5 | ✅ Complete |
188
+ | **Custom Attributes** | 100% | 5/5 | ✅ Complete |
189
+ | **TOTAL** | **86%** | **45/52** | 🎯 **A- Grade** |
190
+
191
+ ### 🚀 New APIs Implemented
192
+
193
+ 1. **Relationships API (6 operations)**
194
+ ```bash
195
+ # Link data product to entity
196
+ pvw uc dataproduct link-entity --id <dp-id> --entity-id <guid>
197
+
198
+ # Link CDE to column
199
+ pvw uc cde link-entity --id <cde-id> --entity-id <guid> --column-qualified-name "..."
200
+ ```
201
+
202
+ 2. **Query APIs (4 operations)**
203
+ ```bash
204
+ # Advanced OData filtering
205
+ pvw uc term query --domain-ids "finance" --status Approved --top 50
206
+
207
+ # Multi-criteria search with pagination
208
+ pvw uc dataproduct query --keywords "customer,revenue" --skip 10 --top 25
209
+ ```
210
+
211
+ 3. **Policy Management (5 operations)**
212
+ ```bash
213
+ # Complete policy CRUD
214
+ pvw uc policy list
215
+ pvw uc policy create --payload-file policy.json
216
+ pvw uc policy update --id <policy-id> --payload-file updated.json
217
+ ```
218
+
219
+ 4. **Custom Metadata (5 operations)**
220
+ ```bash
221
+ # Business metadata via Atlas API
222
+ pvw uc custom-metadata import --file metadata.csv
223
+ pvw uc custom-metadata add --guid <entity-guid> --name "BusinessConcept"
224
+ ```
225
+
226
+ 5. **Custom Attributes (5 operations)**
227
+ ```bash
228
+ # Extensible attribute definitions
229
+ pvw uc custom-attribute create --name "Department" --type String
230
+ pvw uc custom-attribute list
231
+ ```
232
+
233
+ ### 🔧 Major Fixes & Improvements
234
+
235
+ - **Lineage Management Overhaul** - Complete rewrite with interactive PowerShell script, real entity support, and proper Process entities
236
+ - **Search API Fixed** - Resolved HTTP 400 errors in suggest and autocomplete endpoints
237
+ - **Business Metadata Scope** - Fixed Business Concept attributes on Glossary Terms with proper applicableEntityTypes
238
+ - **Architecture Refactoring** - Unified endpoints dictionary, zero hardcoded URLs, complete consistency
239
+
240
+ ### 📚 Documentation (3,500+ lines)
241
+
242
+ - 15+ new guides including relationships, query APIs, lineage creation, business metadata
243
+ - 80+ usage examples across all new features
244
+ - Complete API coverage gap analysis
245
+ - Roadmap to 100% with implementation plans
246
+
247
+ **[View Full Release Notes](releases/v1.2.8.md)**
248
+
249
+ ---
250
+
251
+ ## Getting Started
252
+
253
+ Follow this short flow to get PVW CLI installed and running quickly.
254
+
255
+ 1. Install (from PyPI):
256
+
257
+ ```bash
258
+ pip install pvw-cli
259
+ ```
260
+
261
+ For the bleeding edge or development:
262
+
263
+ ```bash
264
+ pip install git+https://github.com/Keayoub/Purview_cli.git
265
+ # or for editable development
266
+ git clone https://github.com/Keayoub/Purview_cli.git
267
+ cd Purview_cli
268
+ pip install -r requirements.txt
269
+ pip install -e .
270
+ ```
271
+
272
+ 2. Set required environment variables (examples for cmd, PowerShell, and pwsh)
273
+
274
+ Windows cmd (example):
275
+
276
+ ```cmd
277
+ set PURVIEW_ACCOUNT_NAME=your-purview-account
278
+ set PURVIEW_ACCOUNT_ID=your-purview-account-id-guid
279
+ set PURVIEW_RESOURCE_GROUP=your-resource-group-name
280
+ set AZURE_REGION= # optional
281
+ ```
282
+
283
+ PowerShell (Windows PowerShell):
284
+
285
+ ```powershell
286
+ $env:PURVIEW_ACCOUNT_NAME = "your-purview-account"
287
+ $env:PURVIEW_ACCOUNT_ID = "your-purview-account-id-guid"
288
+ $env:PURVIEW_RESOURCE_GROUP = "your-resource-group-name"
289
+ $env:AZURE_REGION = "" # optional
290
+ ```
291
+
292
+ pwsh (PowerShell Core - cross-platform, recommended):
293
+
294
+ ```pwsh
295
+ $env:PURVIEW_ACCOUNT_NAME = 'your-purview-account'
296
+ $env:PURVIEW_ACCOUNT_ID = 'your-purview-account-id-guid'
297
+ $env:PURVIEW_RESOURCE_GROUP = 'your-resource-group-name'
298
+ $env:AZURE_REGION = '' # optional
299
+ ```
300
+
301
+ 3. Authenticate
302
+
303
+ - Run `az login` (recommended), or
304
+ - Provide Service Principal credentials via environment variables.
305
+
306
+ 4. Try a few commands:
307
+
308
+ ```bash
309
+ # List governance domains
310
+ pvw uc domain list
311
+
312
+ # Search
313
+ pvw search query --keywords="customer" --limit=5
314
+
315
+ # Get help
316
+ pvw --help
317
+ pvw uc --help
318
+ ```
319
+
320
+ For more advanced usage, see the documentation in `doc/` or the project docs: <https://pvw-cli.readthedocs.io/>
321
+
322
+ ---
323
+
324
+ ## Quick Start Examples
325
+
326
+ ### v1.2.8 - Column-Level Lineage
327
+
328
+ ```bash
329
+ # Create column-level lineage (Process-based)
330
+ pvw lineage create-column \
331
+ --process-name "ETL_Sales_Transform" \
332
+ --source-table-guid "9ebbd583-4987-4d1b-b4f5-d8f6f6f60000" \
333
+ --target-table-guids "c88126ba-5fb5-4d33-bbe2-5ff6f6f60000" \
334
+ --column-mapping "ProductID:ProductID,Name:Name"
335
+
336
+ # Create direct lineage (UI-style, no visible Process)
337
+ pvw lineage create-direct \
338
+ --source-guid "9ebbd583-4987-4d1b-b4f5-d8f6f6f60000" \
339
+ --target-guid "c88126ba-5fb5-4d33-bbe2-5ff6f6f60000" \
340
+ --column-mapping "ProductID:ProductID,Name:Name,Amount:TotalAmount"
341
+
342
+ # Import lineage from CSV (automatic type detection)
343
+ pvw lineage import samples/csv/lineage_with_columns.csv
344
+
345
+ # List column lineages
346
+ pvw lineage list-column --format table
347
+
348
+ # Delete column lineage
349
+ pvw lineage delete-column --process-guid <guid> --force
350
+ ```
351
+
352
+ ### v1.2.5 - Relationships API
353
+
354
+ ```bash
355
+ # Link data product to SQL table
356
+ pvw uc dataproduct link-entity \
357
+ --id "dp-sales-2024" \
358
+ --entity-id "4fae348b-e960-42f7-834c-38f6f6f60000" \
359
+ --type-name "azure_sql_table"
360
+
361
+ # Link CDE to specific column
362
+ pvw uc cde link-entity \
363
+ --id "cde-customer-email" \
364
+ --entity-id "ea3412c3-7387-4bc1-9923-11f6f6f60000" \
365
+ --column-qualified-name "mssql://server/db/schema/table#EmailAddress"
366
+
367
+ # List all linked entities
368
+ pvw uc dataproduct list-entities --id "dp-sales-2024"
369
+ ```
370
+
371
+ ### v1.2.5 - Query APIs
372
+
373
+ ```bash
374
+ # Query terms by domain and status
375
+ pvw uc term query --domain-ids "finance,sales" --status Approved --top 50
376
+
377
+ # Query data products with keywords
378
+ pvw uc dataproduct query --keywords "customer,revenue" --skip 0 --top 25
379
+
380
+ # Query CDEs by domain with pagination
381
+ pvw uc cde query --domain-ids "compliance" --orderby "name" --top 100
382
+ ```
383
+
384
+ ### v1.2.5 - Policy Management
385
+
386
+ ```bash
387
+ # List all policies
388
+ pvw uc policy list
389
+
390
+ # Create new policy
391
+ pvw uc policy create --payload-file policy-rbac.json
392
+
393
+ # Update existing policy
394
+ pvw uc policy update --id "policy-001" --payload-file updated.json
395
+ ```
396
+
397
+ ### v1.2.5 - Custom Metadata
398
+
399
+ ```bash
400
+ # Import business metadata from CSV
401
+ pvw uc custom-metadata import --file business_concept.csv
402
+
403
+ # Add metadata to entity
404
+ pvw uc custom-metadata add \
405
+ --guid "4fae348b-e960-42f7-834c-38f6f6f60000" \
406
+ --name "BusinessConcept" \
407
+ --attributes '{"Department":"Sales"}'
408
+
409
+ # Create custom attribute
410
+ pvw uc custom-attribute create --name "Department" --type String
411
+ ```
412
+
413
+ ---
414
+
415
+ ## Overview
416
+
417
+ **PVW CLI v1.2.8** is a modern command-line interface and Python library for Microsoft Purview, enabling:
418
+
419
+ - **MCP Server** - Natural language interface for AI assistants (Claude, Cline)
420
+ - Advanced data catalog search and discovery
421
+ - Bulk import/export of entities, glossary terms, and lineage
422
+ - Real-time monitoring and analytics
423
+ - Automated governance and compliance
424
+ - Extensible plugin system
425
+
426
+ ---
427
+
428
+ ## Installation
429
+
430
+ You can install PVW CLI in two ways:
431
+
432
+ 1. **From PyPI (recommended for most users):**
433
+
434
+ ```bash
435
+ pip install pvw-cli
436
+ ```
437
+
438
+ 2. **Directly from the GitHub repository (for latest/dev version):**
439
+
440
+ ```bash
441
+ pip install git+https://github.com/Keayoub/Purview_cli.git
442
+ ```
443
+
444
+ Or for development (editable install):
445
+
446
+ ```bash
447
+ git clone https://github.com/Keayoub/Purview_cli.git
448
+ cd Purview_cli
449
+ pip install -r requirements.txt
450
+ pip install -e .
451
+ ```
452
+
453
+ ---
454
+
455
+ ## Requirements
456
+
457
+ - Python 3.8+
458
+ - Azure CLI (`az login`) or Service Principal credentials
459
+ - Microsoft Purview account
460
+
461
+ ---
462
+
463
+ ## Getting Started
464
+
465
+ 1. **Install**
466
+
467
+ ```bash
468
+ pip install pvw-cli
469
+ ```
470
+
471
+ 2. **Set Required Environment Variables**
472
+
473
+ ```bash
474
+ # Required for Purview API access
475
+ set PURVIEW_ACCOUNT_NAME=your-purview-account
476
+ set PURVIEW_ACCOUNT_ID=your-purview-account-id-guid
477
+ set PURVIEW_RESOURCE_GROUP=your-resource-group-name
478
+
479
+ # Optional
480
+ set AZURE_REGION= # (optional, e.g. 'china', 'usgov')
481
+ ```
482
+
483
+ 3. **Authenticate**
484
+
485
+ - Azure CLI: `az login`
486
+
487
+ - Or set Service Principal credentials as environment variables
488
+
489
+ 4. **Run a Command**
490
+
491
+ ```bash
492
+ pvw search query --keywords="customer" --limit=5
493
+ ```
494
+
495
+ 5. **See All Commands**
496
+
497
+ ```bash
498
+ pvw --help
499
+ ```
500
+
501
+ ---
502
+
503
+ ## Authentication
504
+
505
+ PVW CLI supports multiple authentication methods for connecting to Microsoft Purview, powered by Azure Identity's `DefaultAzureCredential`. This allows you to use the CLI securely in local development, CI/CD, and production environments.
506
+
507
+ ### 1. Azure CLI Authentication (Recommended for Interactive Use)
508
+
509
+ - Run `az login` to authenticate interactively with your Azure account.
510
+ - The CLI will automatically use your Azure CLI credentials.
511
+
512
+ ### 2. Service Principal Authentication (Recommended for Automation/CI/CD)
513
+
514
+ Set the following environment variables before running any PVW CLI command:
515
+
516
+ - `AZURE_CLIENT_ID` (your Azure AD app registration/client ID)
517
+ - `AZURE_TENANT_ID` (your Azure AD tenant ID)
518
+ - `AZURE_CLIENT_SECRET` (your client secret)
519
+
520
+ **Example (Windows):**
521
+
522
+ ```cmd
523
+ set AZURE_CLIENT_ID=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
524
+ set AZURE_TENANT_ID=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
525
+ set AZURE_CLIENT_SECRET=your-client-secret
526
+ ```
527
+
528
+ **Example (Linux/macOS):**
529
+
530
+ ```bash
531
+ export AZURE_CLIENT_ID=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
532
+ export AZURE_TENANT_ID=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
533
+ export AZURE_CLIENT_SECRET=your-client-secret
534
+ ```
535
+
536
+ ### 3. Managed Identity (for Azure VMs, App Services, etc.)
537
+
538
+ If running in Azure with a managed identity, no extra configuration is needed. The CLI will use the managed identity automatically.
539
+
540
+ ### 4. Visual Studio/VS Code Authentication
541
+
542
+ If you are signed in to Azure in Visual Studio or VS Code, `DefaultAzureCredential` can use those credentials as a fallback.
543
+
544
+ ---
545
+
546
+ **Note:**
547
+
548
+ - The CLI will try all supported authentication methods in order. The first one that works will be used.
549
+ - For most automation and CI/CD scenarios, service principal authentication is recommended.
550
+ - For local development, Azure CLI authentication is easiest.
551
+
552
+ For more details, see the [Azure Identity documentation](https://learn.microsoft.com/en-us/python/api/overview/azure/identity-readme?view=azure-python).
553
+
554
+ ---
555
+
556
+ ## Output Formats & Scripting Integration
557
+
558
+ PVW CLI supports multiple output formats to fit different use cases - from human-readable tables to machine-parseable JSON.
559
+
560
+ ### Output Format Options
561
+
562
+ All `list` commands now support the `--output` parameter with three formats:
563
+
564
+ 1. **`table`** (default) - Rich formatted table with colors for human viewing
565
+ 2. **`json`** - Plain JSON for scripting with PowerShell, bash, jq, etc.
566
+ 3. **`jsonc`** - Colored JSON with syntax highlighting for viewing
567
+
568
+ ### PowerShell Integration
569
+
570
+ The `--output json` format produces plain JSON that works perfectly with PowerShell's `ConvertFrom-Json`:
571
+
572
+ ```powershell
573
+ # Get all terms as PowerShell objects
574
+ $domainId = "59ae27b5-40bc-4c90-abfe-fe1a0638fe3a"
575
+ $terms = py -m purviewcli uc term list --domain-id $domainId --output json | ConvertFrom-Json
576
+
577
+ # Access properties
578
+ Write-Host "Found $($terms.Count) terms"
579
+ foreach ($term in $terms) {
580
+ Write-Host " • $($term.name) - $($term.status)"
581
+ }
582
+
583
+ # Filter and export
584
+ $draftTerms = $terms | Where-Object { $_.status -eq "Draft" }
585
+ $draftTerms | Export-Csv -Path "draft_terms.csv" -NoTypeInformation
586
+
587
+ # Group by status
588
+ $terms | Group-Object status | Format-Table Count, Name
589
+ ```
590
+
591
+ ### Bash/Linux Integration
592
+
593
+ Use `jq` for JSON processing in bash:
594
+
595
+ ```bash
596
+ # Get domain ID
597
+ DOMAIN_ID="59ae27b5-40bc-4c90-abfe-fe1a0638fe3a"
598
+
599
+ # Get term names only
600
+ pvw uc term list --domain-id $DOMAIN_ID --output json | jq -r '.[] | .name'
601
+
602
+ # Count terms
603
+ pvw uc term list --domain-id $DOMAIN_ID --output json | jq 'length'
604
+
605
+ # Filter by status
606
+ pvw uc term list --domain-id $DOMAIN_ID --output json | jq '.[] | select(.status == "Draft")'
607
+
608
+ # Group by status
609
+ pvw uc term list --domain-id $DOMAIN_ID --output json | jq 'group_by(.status) | map({status: .[0].status, count: length})'
610
+
611
+ # Save to file
612
+ pvw uc term list --domain-id $DOMAIN_ID --output json > terms.json
613
+ ```
614
+
615
+ ### Examples by Command
616
+
617
+ ```bash
618
+ # Domains
619
+ pvw uc domain list --output json | jq '.[] | .name'
620
+
621
+ # Terms
622
+ pvw uc term list --domain-id "abc-123" --output json
623
+ pvw uc term list --domain-id "abc-123" --output table # Default
624
+ pvw uc term list --domain-id "abc-123" --output jsonc # Colored for viewing
625
+
626
+ # Data Products
627
+ pvw uc dataproduct list --domain-id "abc-123" --output json
628
+ ```
629
+
630
+ ### Migration from Old --json Flag
631
+
632
+ **Old (deprecated):**
633
+
634
+ ```bash
635
+ pvw uc term list --domain-id "abc-123" --json
636
+ ```
637
+
638
+ **New (recommended):**
639
+
640
+ ```bash
641
+ pvw uc term list --domain-id "abc-123" --output json # Plain JSON for scripting
642
+ pvw uc term list --domain-id "abc-123" --output jsonc # Colored JSON (old behavior)
643
+ ```
644
+
645
+ ---
646
+
647
+ ## Required Purview Configuration
648
+
649
+ Before using PVW CLI, you need to set three essential environment variables. Here's how to find them:
650
+
651
+ ### 🔍 **How to Find Your Purview Values**
652
+
653
+ #### **1. PURVIEW_ACCOUNT_NAME**
654
+
655
+ - This is your Purview account name as it appears in Azure Portal
656
+ - Example: `kaydemopurview`
657
+
658
+ #### **2. PURVIEW_ACCOUNT_ID**
659
+
660
+ - This is the GUID that identifies your Purview account for Unified Catalog APIs
661
+ - **Important: For most Purview deployments, this is your Azure Tenant ID**
662
+
663
+ - **Method 1 - Get your Tenant ID (recommended):**
664
+
665
+ **Bash/Command Prompt:**
666
+
667
+ ```bash
668
+ az account show --query tenantId -o tsv
669
+ ```
670
+
671
+ **PowerShell:**
672
+
673
+ ```powershell
674
+ az account show --query tenantId -o tsv
675
+ # Or store directly in environment variable:
676
+ $env:PURVIEW_ACCOUNT_ID = az account show --query tenantId -o tsv
677
+ ```
678
+
679
+ - **Method 2 - Azure CLI (extract from Atlas endpoint):**
680
+
681
+ ```bash
682
+ az purview account show --name YOUR_ACCOUNT_NAME --resource-group YOUR_RG --query endpoints.catalog -o tsv
683
+ ```
684
+
685
+ Extract the GUID from the URL (before `-api.purview-service.microsoft.com`)
686
+
687
+ - **Method 3 - Azure Portal:**
688
+ 1. Go to your Purview account in Azure Portal
689
+ 2. Navigate to Properties → Atlas endpoint URL
690
+ 3. Extract GUID from: `https://GUID-api.purview-service.microsoft.com/catalog`
691
+
692
+ #### **3. PURVIEW_RESOURCE_GROUP**
693
+
694
+ - The Azure resource group containing your Purview account
695
+ - Example: `fabric-artifacts`
696
+
697
+ ### 📋 **Setting the Variables**
698
+
699
+ **Windows Command Prompt:**
700
+
701
+ ```cmd
702
+ set PURVIEW_ACCOUNT_NAME=your-purview-account
703
+ set PURVIEW_ACCOUNT_ID=your-purview-account-id
704
+ set PURVIEW_RESOURCE_GROUP=your-resource-group
705
+ ```
706
+
707
+ **Windows PowerShell:**
708
+
709
+ ```powershell
710
+ $env:PURVIEW_ACCOUNT_NAME="your-purview-account"
711
+ $env:PURVIEW_ACCOUNT_ID="your-purview-account-id"
712
+ $env:PURVIEW_RESOURCE_GROUP="your-resource-group"
713
+ ```
714
+
715
+ **Linux/macOS:**
716
+
717
+ ```bash
718
+ export PURVIEW_ACCOUNT_NAME=your-purview-account
719
+ export PURVIEW_ACCOUNT_ID=your-purview-account-id
720
+ export PURVIEW_RESOURCE_GROUP=your-resource-group
721
+ ```
722
+
723
+ **Permanent (Windows Command Prompt):**
724
+
725
+ ```cmd
726
+ setx PURVIEW_ACCOUNT_NAME "your-purview-account"
727
+ setx PURVIEW_ACCOUNT_ID "your-purview-account-id"
728
+ setx PURVIEW_RESOURCE_GROUP "your-resource-group"
729
+ ```
730
+
731
+ **Permanent (Windows PowerShell):**
732
+
733
+ ```powershell
734
+ [Environment]::SetEnvironmentVariable("PURVIEW_ACCOUNT_NAME", "your-purview-account", "User")
735
+ [Environment]::SetEnvironmentVariable("PURVIEW_ACCOUNT_ID", "your-purview-account-id", "User")
736
+ [Environment]::SetEnvironmentVariable("PURVIEW_RESOURCE_GROUP", "your-resource-group", "User")
737
+ ```
738
+
739
+ ### **Debug Environment Issues**
740
+
741
+ If you experience issues with environment variables between different terminals, use these debug commands:
742
+
743
+ **Command Prompt/Bash:**
744
+
745
+ ```bash
746
+ # Run this to check your current environment
747
+ python -c "
748
+ import os
749
+ print('PURVIEW_ACCOUNT_NAME:', os.getenv('PURVIEW_ACCOUNT_NAME'))
750
+ print('PURVIEW_ACCOUNT_ID:', os.getenv('PURVIEW_ACCOUNT_ID'))
751
+ print('PURVIEW_RESOURCE_GROUP:', os.getenv('PURVIEW_RESOURCE_GROUP'))
752
+ "
753
+ ```
754
+
755
+ **PowerShell:**
756
+
757
+ ```powershell
758
+ # Check environment variables in PowerShell
759
+ python -c "
760
+ import os
761
+ print('PURVIEW_ACCOUNT_NAME:', os.getenv('PURVIEW_ACCOUNT_NAME'))
762
+ print('PURVIEW_ACCOUNT_ID:', os.getenv('PURVIEW_ACCOUNT_ID'))
763
+ print('PURVIEW_RESOURCE_GROUP:', os.getenv('PURVIEW_RESOURCE_GROUP'))
764
+ "
765
+
766
+ # Or use PowerShell native commands
767
+ Write-Host "PURVIEW_ACCOUNT_NAME: $env:PURVIEW_ACCOUNT_NAME"
768
+ Write-Host "PURVIEW_ACCOUNT_ID: $env:PURVIEW_ACCOUNT_ID"
769
+ Write-Host "PURVIEW_RESOURCE_GROUP: $env:PURVIEW_RESOURCE_GROUP"
770
+ ```
771
+
772
+ ---
773
+
774
+ ## Search Command (Discovery Query API)
775
+
776
+ The PVW CLI provides advanced search using the latest Microsoft Purview Discovery Query API:
777
+
778
+ - Search for assets, tables, files, and more with flexible filters
779
+ - Use autocomplete and suggestion endpoints
780
+ - Perform faceted, time-based, and entity-type-specific queries
781
+
782
+ **v1.2.8 Improvements:**
783
+
784
+ - Fixed `suggest` and `autocomplete` API payload format (removed empty filter causing HTTP 400 errors)
785
+ - Enhanced collection display with robust type checking and fallback logic
786
+ - All search commands validated and working correctly (query, browse, suggest, find-table)
787
+
788
+ ### CLI Usage Examples
789
+
790
+ #### **Multiple Output Formats**
791
+
792
+ ```bash
793
+ # 1. Table Format (Default) - Quick overview
794
+ pvw search query --keywords="customer" --limit=5
795
+ # → Clean table with Name, Type, Collection, Classifications, Qualified Name
796
+
797
+ # 2. Detailed Format - Human-readable with all metadata
798
+ pvw search query --keywords="customer" --limit=5 --detailed
799
+ # → Rich panels showing full details, timestamps, search scores
800
+
801
+ # 3. JSON Format - Complete technical details with syntax highlighting (WELL-FORMATTED)
802
+ pvw search query --keywords="customer" --limit=5 --json
803
+ # → Full JSON response with indentation, line numbers and color coding
804
+
805
+ # 4. Table with IDs - For entity operations
806
+ pvw search query --keywords="customer" --limit=5 --show-ids
807
+ # → Table format + entity GUIDs for copy/paste into update commands
808
+ ```
809
+
810
+ #### **Search Operations**
811
+
812
+ ```bash
813
+ # Basic search for assets with keyword 'customer'
814
+ pvw search query --keywords="customer" --limit=5
815
+
816
+ # Advanced search with classification filter
817
+ pvw search query --keywords="sales" --classification="PII" --objectType="Tables" --limit=10
818
+
819
+ # Pagination through large result sets
820
+ pvw search query --keywords="SQL" --offset=10 --limit=5
821
+
822
+ # Autocomplete suggestions for partial keyword
823
+ pvw search autocomplete --keywords="ord" --limit=3
824
+
825
+ # Get search suggestions (fuzzy matching)
826
+ pvw search suggest --keywords="prod" --limit=2
827
+
828
+ **IMPORTANT - Command Line Quoting:**
829
+ ```cmd
830
+ # [OK] CORRECT - Use quotes around keywords
831
+ pvw search query --keywords="customer" --limit=5
832
+
833
+ # [OK] CORRECT - For wildcard searches, use quotes
834
+ pvw search query --keywords="*" --limit=5
835
+
836
+ # ❌ WRONG - Don't use unquoted * (shell expands to file names)
837
+ pvw search query --keywords=* --limit=5
838
+ # This causes: "Error: Got unexpected extra arguments (dist doc ...)"
839
+ ```
840
+
841
+ ```bash
842
+ # Faceted search with aggregation
843
+ pvw search query --keywords="finance" --facetFields="objectType,classification" --limit=5
844
+
845
+ # Browse entities by type and path
846
+ pvw search browse --entityType="Tables" --path="/root/finance" --limit=2
847
+
848
+ # Time-based search for assets created after a date
849
+ pvw search query --keywords="audit" --createdAfter="2024-01-01" --limit=1
850
+
851
+ # Entity type specific search
852
+ pvw search query --keywords="finance" --entityTypes="Files,Tables" --limit=2
853
+ ```
854
+
855
+ #### **Usage Scenarios**
856
+
857
+ - **Daily browsing**: Use default table format for quick scans
858
+ - **Understanding assets**: Use `--detailed` for rich information panels
859
+ - **Technical work**: Use `--json` for complete API data access
860
+ - **Entity operations**: Use `--show-ids` to get GUIDs for updates
861
+
862
+ ### Python Usage Example
863
+
864
+ ```python
865
+ from purviewcli.client._search import Search
866
+
867
+ search = Search()
868
+ args = {"--keywords": "customer", "--limit": 5}
869
+ search.searchQuery(args)
870
+ print(search.payload) # Shows the constructed search payload
871
+ ```
872
+
873
+ ### Test Examples
874
+
875
+ See `tests/test_search_examples.py` for ready-to-run pytest examples covering all search scenarios:
876
+
877
+ - Basic query
878
+ - Advanced filter
879
+ - Autocomplete
880
+ - Suggest
881
+ - Faceted search
882
+ - Browse
883
+ - Time-based search
884
+ - Entity type search
885
+
886
+ ---
887
+
888
+ ## Unified Catalog Management (NEW)
889
+
890
+ PVW CLI now includes comprehensive **Microsoft Purview Unified Catalog (UC)** support with the new `uc` command group. This provides complete management of modern data governance features including governance domains, glossary terms, data products, objectives (OKRs), and critical data elements.
891
+
892
+ **🎯 Feature Parity**: Full compatibility with [UnifiedCatalogPy](https://github.com/olafwrieden/unifiedcatalogpy) functionality.
893
+
894
+ See [`doc/commands/unified-catalog.md`](doc/commands/unified-catalog.md) for complete documentation and examples.
895
+
896
+ ### Quick UC Examples
897
+
898
+ #### **Governance Domains Management**
899
+
900
+ ```bash
901
+ # List all governance domains
902
+ pvw uc domain list
903
+
904
+ # Create a new governance domain
905
+ pvw uc domain create --name "Finance" --description "Financial data governance domain"
906
+
907
+ # Get domain details
908
+ pvw uc domain get --domain-id "abc-123-def-456"
909
+
910
+ # Update domain information
911
+ pvw uc domain update --domain-id "abc-123" --description "Updated financial governance"
912
+ ```
913
+
914
+ #### **Glossary Terms in UC**
915
+
916
+ ```bash
917
+ # List all terms in a domain
918
+ pvw uc term list --domain-id "abc-123"
919
+ pvw uc term list --domain-id "abc-123" --output json # Plain JSON for scripting
920
+ pvw uc term list --domain-id "abc-123" --output jsonc # Colored JSON for viewing
921
+
922
+ # Create a single glossary term
923
+ pvw uc term create --name "Customer" --domain-id "abc-123" --description "A person or entity that purchases products"
924
+
925
+ # Get term details
926
+ pvw uc term show --term-id "term-456"
927
+
928
+ # Update term
929
+ pvw uc term update --term-id "term-456" --description "Updated description"
930
+
931
+ # Delete term
932
+ pvw uc term delete --term-id "term-456" --confirm
933
+ ```
934
+
935
+ **📦 Bulk Import (NEW)**
936
+
937
+ Import multiple terms from CSV or JSON files with validation and progress tracking:
938
+
939
+ ```bash
940
+ # CSV Import - Preview with dry-run
941
+ pvw uc term import-csv --csv-file "samples/csv/uc_terms_bulk_example.csv" --domain-id "abc-123" --dry-run
942
+
943
+ # CSV Import - Actual import
944
+ pvw uc term import-csv --csv-file "samples/csv/uc_terms_bulk_example.csv" --domain-id "abc-123"
945
+
946
+ # JSON Import - Preview with dry-run
947
+ pvw uc term import-json --json-file "samples/json/term/uc_terms_bulk_example.json" --dry-run
948
+
949
+ # JSON Import - Actual import (domain_id from JSON or override with flag)
950
+ pvw uc term import-json --json-file "samples/json/term/uc_terms_bulk_example.json"
951
+ pvw uc term import-json --json-file "samples/json/term/uc_terms_bulk_example.json" --domain-id "abc-123"
952
+ ```
953
+
954
+ **Bulk Import Features:**
955
+
956
+ - [OK] Import from CSV or JSON files
957
+ - [OK] Dry-run mode to preview before importing
958
+ - [OK] Support for multiple owners (Entra ID Object IDs), acronyms, and resources
959
+ - [OK] Progress tracking with Rich console output
960
+ - [OK] Detailed error messages and summary reports
961
+ - [OK] Sequential POST requests (no native bulk endpoint available)
962
+
963
+ **CSV Format Example:**
964
+
965
+ ```csv
966
+ name,description,status,acronym,owner_id,resource_name,resource_url
967
+ Customer Acquisition Cost,Cost to acquire new customer,Draft,CAC,<guid>,Metrics Guide,https://docs.example.com
968
+ Monthly Recurring Revenue,Predictable monthly revenue,Draft,MRR,<guid>,Finance Dashboard,https://finance.example.com
969
+ ```
970
+
971
+ **JSON Format Example:**
972
+
973
+ ```json
974
+ {
975
+ "terms": [
976
+ {
977
+ "name": "Data Lake",
978
+ "description": "Centralized repository for structured/unstructured data",
979
+ "domain_id": "your-domain-id-here",
980
+ "status": "Draft",
981
+ "acronyms": ["DL"],
982
+ "owner_ids": ["<entra-id-object-id-guid>"],
983
+ "resources": [{"name": "Architecture Guide", "url": "https://example.com"}]
984
+ }
985
+ ]
986
+ }
987
+ ```
988
+
989
+ **Important Notes:**
990
+
991
+ - ⚠️ **Owner IDs must be Entra ID Object IDs (GUIDs)**, not email addresses
992
+ - ⚠️ **Terms cannot be "Published" in unpublished domains** - use "Draft" status
993
+ - [OK] Sample files available: `samples/csv/uc_terms_bulk_example.csv`, `samples/json/term/uc_terms_bulk_example.json`
994
+ - 📖 Complete documentation: [`doc/commands/unified-catalog/term-bulk-import.md`](doc/commands/unified-catalog/term-bulk-import.md)
995
+
996
+ **🗑️ Bulk Delete (NEW)**
997
+
998
+ Delete all terms in a domain using PowerShell or Python scripts:
999
+
1000
+ ```powershell
1001
+ # PowerShell - Delete all terms with confirmation
1002
+ .\scripts\delete-all-uc-terms.ps1 -DomainId "abc-123"
1003
+
1004
+ # PowerShell - Delete without confirmation
1005
+ .\scripts\delete-all-uc-terms.ps1 -DomainId "abc-123" -Force
1006
+ ```
1007
+
1008
+ ```bash
1009
+ # Python - Delete all terms with confirmation
1010
+ python scripts/delete_all_uc_terms_v2.py --domain-id "abc-123"
1011
+
1012
+ # Python - Delete without confirmation
1013
+ python scripts/delete_all_uc_terms_v2.py --domain-id "abc-123" --force
1014
+ ```
1015
+
1016
+ **Bulk Delete Features:**
1017
+
1018
+ - [OK] Interactive confirmation prompts (type "DELETE" to confirm)
1019
+ - [OK] Beautiful progress display with colors
1020
+ - [OK] Success/failure tracking per term
1021
+ - [OK] Detailed summary reports
1022
+ - [OK] Rate limiting (200ms delay between deletes)
1023
+ - [OK] Graceful error handling and Ctrl+C support
1024
+
1025
+ #### **Data Products Management**
1026
+
1027
+ ```bash
1028
+ # List all data products in a domain
1029
+ pvw uc dataproduct list --domain-id "abc-123"
1030
+
1031
+ # Create a comprehensive data product
1032
+ pvw uc dataproduct create \
1033
+ --name "Customer Analytics Dashboard" \
1034
+ --domain-id "abc-123" \
1035
+ --description "360-degree customer analytics with behavioral insights" \
1036
+ --type Analytical \
1037
+ --status Draft
1038
+
1039
+ # Get detailed data product information
1040
+ pvw uc dataproduct show --product-id "prod-789"
1041
+
1042
+ # Update data product (partial updates supported - only specify fields to change)
1043
+ pvw uc dataproduct update \
1044
+ --product-id "prod-789" \
1045
+ --status Published \
1046
+ --description "Updated comprehensive customer analytics" \
1047
+ --endorsed
1048
+
1049
+ # Update multiple fields at once
1050
+ pvw uc dataproduct update \
1051
+ --product-id "prod-789" \
1052
+ --status Published \
1053
+ --update-frequency Monthly \
1054
+ --endorsed
1055
+
1056
+ # Delete a data product (with confirmation)
1057
+ pvw uc dataproduct delete --product-id "prod-789"
1058
+
1059
+ # Delete without confirmation prompt
1060
+ pvw uc dataproduct delete --product-id "prod-789" --yes
1061
+ ```
1062
+
1063
+ #### **Objectives & Key Results (OKRs)**
1064
+
1065
+ ```bash
1066
+ # List objectives for a domain
1067
+ pvw uc objective list --domain-id "abc-123"
1068
+
1069
+ # Create measurable objectives
1070
+ pvw uc objective create \
1071
+ --definition "Improve data quality score by 25% within Q4" \
1072
+ --domain-id "abc-123" \
1073
+ --target-value "95" \
1074
+ --measurement-unit "percentage"
1075
+
1076
+ # Track objective progress
1077
+ pvw uc objective update \
1078
+ --objective-id "obj-456" \
1079
+ --domain-id "abc-123" \
1080
+ --current-value "87" \
1081
+ --status "in-progress"
1082
+ ```
1083
+
1084
+ #### **Critical Data Elements (CDEs)**
1085
+
1086
+ ```bash
1087
+ # List critical data elements
1088
+ pvw uc cde list --domain-id "abc-123"
1089
+
1090
+ # Define critical data elements with governance rules
1091
+ pvw uc cde create \
1092
+ --name "Social Security Number" \
1093
+ --data-type "String" \
1094
+ --domain-id "abc-123" \
1095
+ --classification "PII" \
1096
+ --retention-period "7-years"
1097
+
1098
+ # Associate CDEs with data assets
1099
+ pvw uc cde link \
1100
+ --cde-id "cde-789" \
1101
+ --domain-id "abc-123" \
1102
+ --asset-id "ea3412c3-7387-4bc1-9923-11f6f6f60000"
1103
+ ```
1104
+
1105
+ #### **Health Monitoring (NEW)**
1106
+
1107
+ Monitor governance health and get automated recommendations to improve your data governance posture.
1108
+
1109
+ ```bash
1110
+ # List all health findings and recommendations
1111
+ pvw uc health query
1112
+
1113
+ # Filter by severity
1114
+ pvw uc health query --severity High
1115
+ pvw uc health query --severity Medium
1116
+
1117
+ # Filter by status
1118
+ pvw uc health query --status NotStarted
1119
+ pvw uc health query --status InProgress
1120
+
1121
+ # Get detailed information about a specific health action
1122
+ pvw uc health show --action-id "5ea3fc78-6a77-4098-8779-ed81de6f87c9"
1123
+
1124
+ # Update health action status
1125
+ pvw uc health update \
1126
+ --action-id "5ea3fc78-6a77-4098-8779-ed81de6f87c9" \
1127
+ --status InProgress \
1128
+ --reason "Working on assigning glossary terms to data products"
1129
+
1130
+ # Get health summary statistics
1131
+ pvw uc health summary
1132
+
1133
+ # Output health findings in JSON format
1134
+ pvw uc health query --json
1135
+ ```
1136
+
1137
+ **Health Finding Types:**
1138
+
1139
+ - Missing glossary terms on data products (High)
1140
+ - Data products without OKRs (Medium)
1141
+ - Missing data quality scores (Medium)
1142
+ - Classification gaps on data assets (Medium)
1143
+ - Description quality issues (Medium)
1144
+ - Business domains without critical data entities (Medium)
1145
+
1146
+ #### **Workflow Management (NEW)**
1147
+
1148
+ Manage approval workflows and business process automation in Purview.
1149
+
1150
+ ```bash
1151
+ # List all workflows
1152
+ pvw workflow list
1153
+
1154
+ # Get workflow details
1155
+ pvw workflow get --workflow-id "workflow-123"
1156
+
1157
+ # Create a new workflow (requires JSON definition)
1158
+ pvw workflow create --workflow-id "approval-flow-1" --payload-file workflow-definition.json
1159
+
1160
+ # Execute a workflow
1161
+ pvw workflow execute --workflow-id "workflow-123"
1162
+
1163
+ # List workflow executions
1164
+ pvw workflow executions --workflow-id "workflow-123"
1165
+
1166
+ # View specific execution details
1167
+ pvw workflow execution-details --workflow-id "workflow-123" --execution-id "exec-456"
1168
+
1169
+ # Update workflow configuration
1170
+ pvw workflow update --workflow-id "workflow-123" --payload-file updated-workflow.json
1171
+
1172
+ # Delete a workflow
1173
+ pvw workflow delete --workflow-id "workflow-123"
1174
+
1175
+ # Output workflows in JSON format
1176
+ pvw workflow list --json
1177
+ ```
1178
+
1179
+ **Workflow Use Cases:**
1180
+
1181
+ - Data access request approvals
1182
+ - Glossary term certification workflows
1183
+ - Data product publishing approvals
1184
+ - Classification review processes
1185
+
1186
+ #### **Integrated Workflow Example**
1187
+
1188
+ ```bash
1189
+ # 1. Discover assets to govern
1190
+ pvw search query --keywords="customer" --detailed
1191
+
1192
+ # 2. Create governance domain for discovered assets
1193
+ pvw uc domain create --name "Customer Data" --description "Customer information governance"
1194
+
1195
+ # 3. Define governance terms
1196
+ pvw uc term create --name "Customer PII" --domain-id "new-domain-id" --definition "Personal customer information"
1197
+
1198
+ # 4. Create data product from discovered assets
1199
+ pvw uc dataproduct create --name "Customer Master Data" --domain-id "new-domain-id"
1200
+
1201
+ # 5. Set governance objectives
1202
+ pvw uc objective create --definition "Ensure 100% PII classification compliance" --domain-id "new-domain-id"
1203
+ ```
1204
+
1205
+ ---
1206
+
1207
+ ## Entity Management & Updates
1208
+
1209
+ PVW CLI provides comprehensive entity management capabilities for updating Purview assets like descriptions, classifications, and custom attributes.
1210
+
1211
+ ### **Entity Update Examples**
1212
+
1213
+ #### **Update Asset Descriptions**
1214
+
1215
+ ```bash
1216
+ # Update table description using GUID
1217
+ pvw entity update-attribute \
1218
+ --guid "ece43ce5-ac45-4e50-a4d0-365a64299efc" \
1219
+ --attribute "description" \
1220
+ --value "Updated customer data warehouse table with enhanced analytics"
1221
+
1222
+ # Update dataset description using qualified name
1223
+ pvw entity update-attribute \
1224
+ --qualifiedName "https://app.powerbi.com/groups/abc-123/datasets/def-456" \
1225
+ --attribute "description" \
1226
+ --value "Power BI dataset for customer analytics dashboard"
1227
+ ```
1228
+
1229
+ #### **Bulk Entity Operations**
1230
+
1231
+ ```bash
1232
+ # Read entity details before updating
1233
+ pvw entity read-by-attribute \
1234
+ --guid "ea3412c3-7387-4bc1-9923-11f6f6f60000" \
1235
+ --attribute "description,classifications,customAttributes"
1236
+
1237
+ # Update multiple attributes at once
1238
+ pvw entity update-bulk \
1239
+ --input-file entities_to_update.json \
1240
+ --output-file update_results.json
1241
+ ```
1242
+
1243
+ #### **Column-Level Updates**
1244
+
1245
+ ```bash
1246
+ # Update specific column descriptions in a table
1247
+ pvw entity update-attribute \
1248
+ --guid "column-guid-123" \
1249
+ --attribute "description" \
1250
+ --value "Customer unique identifier - Primary Key"
1251
+
1252
+ # Add classifications to sensitive columns
1253
+ pvw entity add-classification \
1254
+ --guid "column-guid-456" \
1255
+ --classification "MICROSOFT.PERSONAL.EMAIL"
1256
+ ```
1257
+
1258
+ ### **Discovery to Update Workflow**
1259
+
1260
+ ```bash
1261
+ # 1. Find assets that need updates
1262
+ pvw search query --keywords="customer table" --show-ids --limit=10
1263
+
1264
+ # 2. Get detailed information about a specific asset
1265
+ pvw entity read-by-attribute --guid "FOUND_GUID" --attribute "description,classifications"
1266
+
1267
+ # 3. Update the asset description
1268
+ pvw entity update-attribute \
1269
+ --guid "FOUND_GUID" \
1270
+ --attribute "description" \
1271
+ --value "Updated description based on business requirements"
1272
+
1273
+ # 4. Verify the update
1274
+ pvw search query --keywords="FOUND_GUID" --detailed
1275
+ ```
1276
+
1277
+ ---
1278
+
1279
+ ## Lineage CSV Import & Management
1280
+
1281
+ PVW CLI provides powerful lineage management capabilities including CSV-based bulk import for automating data lineage creation.
1282
+
1283
+ ### **Lineage CSV Import**
1284
+
1285
+ Import lineage relationships from CSV files to automate the creation of data flow documentation in Microsoft Purview.
1286
+
1287
+ #### **CSV Format**
1288
+
1289
+ The CSV file must contain the following columns:
1290
+
1291
+ **Required columns:**
1292
+
1293
+ - `source_entity_guid` - GUID of the source entity
1294
+ - `target_entity_guid` - GUID of the target entity
1295
+
1296
+ **Optional columns:**
1297
+
1298
+ - `relationship_type` - Type of relationship (default: "Process")
1299
+ - `process_name` - Name of the transformation process
1300
+ - `description` - Description of the transformation
1301
+ - `confidence_score` - Confidence score (0-1)
1302
+ - `owner` - Process owner
1303
+ - `metadata` - Additional JSON metadata
1304
+
1305
+ **Example CSV:**
1306
+
1307
+ ```csv
1308
+ source_entity_guid,target_entity_guid,relationship_type,process_name,description,confidence_score,owner,metadata
1309
+ dcfc99ed-c74d-49aa-bd0b-72f6f6f60000,1db9c650-acfb-4914-8bc5-1cf6f6f60000,Process,Transform_Product_Data,Transform product data for analytics,0.95,data-engineering,"{""tool"": ""Azure Data Factory""}"
1310
+ ```
1311
+
1312
+ #### **Lineage Commands**
1313
+
1314
+ ```bash
1315
+ # Validate CSV format before import (no API calls)
1316
+ pvw lineage validate lineage_data.csv
1317
+
1318
+ # Import lineage relationships from CSV
1319
+ pvw lineage import lineage_data.csv
1320
+
1321
+ # Generate sample CSV file with examples
1322
+ pvw lineage sample output.csv --num-samples 10 --template detailed
1323
+
1324
+ # View available CSV templates
1325
+ pvw lineage templates
1326
+ ```
1327
+
1328
+ #### **Available Templates**
1329
+
1330
+ - **`basic`** - Minimal columns (source, target, process name)
1331
+ - **`detailed`** - All columns including metadata and confidence scores
1332
+ - **`qualified_names`** - Use qualified names instead of GUIDs
1333
+
1334
+ #### **Workflow Example**
1335
+
1336
+ ```bash
1337
+ # 1. Find entity GUIDs using search
1338
+ pvw search find-table --name "Product" --schema "dbo" --id-only
1339
+
1340
+ # 2. Create CSV file with lineage relationships
1341
+ # (use the GUIDs from step 1)
1342
+
1343
+ # 3. Validate CSV format
1344
+ pvw lineage validate my_lineage.csv
1345
+ # Output: SUCCESS: Lineage validation passed (5 rows, 8 columns)
1346
+
1347
+ # 4. Import to Purview
1348
+ pvw lineage import my_lineage.csv
1349
+ # Output: SUCCESS: Lineage import completed successfully
1350
+ ```
1351
+
1352
+ #### **Advanced Features**
1353
+
1354
+ - **GUID Validation**: Automatic validation of GUID format with helpful error messages
1355
+ - **Process Entity Creation**: Creates intermediate "Process" entities to link source→target relationships
1356
+ - **Metadata Support**: Add custom JSON metadata to each lineage relationship
1357
+ - **Dry-Run Validation**: Validate CSV format locally before making API calls
1358
+
1359
+ **For detailed documentation, see:** [`doc/guides/lineage-csv-import.md`](doc/guides/lineage-csv-import.md)
1360
+
1361
+ ---
1362
+
1363
+ ## Data Product Management (Legacy)
1364
+
1365
+ PVW CLI also includes the original `data-product` command group for backward compatibility with traditional data product lifecycle management.
1366
+
1367
+ See [`doc/commands/data-product.md`](doc/commands/data-product.md) for full documentation and examples.
1368
+
1369
+ ### Example Commands
1370
+
1371
+ ```bash
1372
+ # Create a data product
1373
+ pvw data-product create --qualified-name="product.test.1" --name="Test Product" --description="A test data product"
1374
+
1375
+ # Add classification and label
1376
+ pvw data-product add-classification --qualified-name="product.test.1" --classification="PII"
1377
+ pvw data-product add-label --qualified-name="product.test.1" --label="gold"
1378
+
1379
+ # Link glossary term
1380
+ pvw data-product link-glossary --qualified-name="product.test.1" --term="Customer"
1381
+
1382
+ # Set status and show lineage
1383
+ pvw data-product set-status --qualified-name="product.test.1" --status="active"
1384
+ pvw data-product show-lineage --qualified-name="product.test.1"
1385
+ ```
1386
+
1387
+ ---
1388
+
1389
+ ## Core Features
1390
+
1391
+ - **Unified Catalog (UC)**: Complete modern data governance (NEW)
1392
+
1393
+ ```bash
1394
+ # Manage governance domains, terms, data products, OKRs, CDEs
1395
+ pvw uc domain list
1396
+ pvw uc term create --name "Customer" --domain-id "abc-123"
1397
+ pvw uc objective create --definition "Improve quality" --domain-id "abc-123"
1398
+ ```
1399
+
1400
+ - **Discovery Query/Search**: Flexible, advanced search for all catalog assets
1401
+ - **Entity Management**: Bulk import/export, update, and validation
1402
+ - **Glossary Management**: Import/export terms, assign terms in bulk
1403
+
1404
+ ```bash
1405
+ # List all terms in a glossary
1406
+ pvw glossary list-terms --glossary-guid "your-glossary-guid"
1407
+
1408
+ # Create and manage glossary terms
1409
+ pvw glossary create-term --payload-file term.json
1410
+ ```
1411
+
1412
+ - **Lineage Operations**: Lineage discovery, CSV-based bulk lineage import/export
1413
+
1414
+ ```bash
1415
+ # Import lineage relationships from CSV
1416
+ pvw lineage import lineage_data.csv
1417
+
1418
+ # Validate CSV format before import
1419
+ pvw lineage validate lineage_data.csv
1420
+
1421
+ # Generate sample CSV file
1422
+ pvw lineage sample output.csv --num-samples 10
1423
+ ```
1424
+
1425
+ - **Monitoring & Analytics**: Real-time dashboards, metrics, and reporting
1426
+ - **Plugin System**: Extensible with custom plugins
1427
+
1428
+ ---
1429
+
1430
+ ## API Coverage and Support
1431
+
1432
+ PVW CLI provides comprehensive automation for all major Microsoft Purview APIs, including the new **Unified Catalog APIs** for modern data governance.
1433
+
1434
+ ### Supported API Groups
1435
+
1436
+ - **Unified Catalog**: Complete governance domains, glossary terms, data products, OKRs, CDEs management [OK]
1437
+ - **Health Monitoring**: Automated governance health checks and recommendations [OK] NEW
1438
+ - **Workflows**: Approval workflows and business process automation [OK] NEW
1439
+ - **Data Map**: Full entity and lineage management [OK]
1440
+ - **Discovery**: Advanced search, browse, and query capabilities [OK]
1441
+ - **Collections**: Collection and account management [OK]
1442
+ - **Management**: Administrative operations [OK]
1443
+ - **Scan**: Data source scanning and configuration [OK]
1444
+
1445
+ ### API Version Support
1446
+
1447
+ - **Unified Catalog**: Latest UC API endpoints (September 2025)
1448
+ - Data Map: **2024-03-01-preview** (default) or **2023-09-01** (stable)
1449
+ - Collections: **2019-11-01-preview**
1450
+ - Account: **2019-11-01-preview**
1451
+ - Management: **2021-07-01**
1452
+ - Scan: **2018-12-01-preview**
1453
+
1454
+ For the latest API documentation and updates, see:
1455
+
1456
+ - [Microsoft Purview REST API reference](https://learn.microsoft.com/en-us/rest/api/purview/)
1457
+ - [Atlas 2.2 API documentation](https://learn.microsoft.com/en-us/purview/data-gov-api-atlas-2-2)
1458
+ - [Azure Updates](https://azure.microsoft.com/updates/) for new releases
1459
+
1460
+ If you need a feature that is not yet implemented, please open an issue or check for updates in future releases.
1461
+
1462
+ ---
1463
+
1464
+ ## Sample Files & Scripts
1465
+
1466
+ PVW CLI includes comprehensive sample files and scripts for bulk operations:
1467
+
1468
+ ### Bulk Import Samples
1469
+
1470
+ - **CSV Samples:** `samples/csv/uc_terms_bulk_example.csv` (8 sample terms)
1471
+ - **JSON Samples:**
1472
+ - `samples/json/term/uc_terms_bulk_example.json` (8 data management terms)
1473
+ - `samples/json/term/uc_terms_sample.json` (8 business terms)
1474
+ - **Lineage CSV Samples:** `samples/csv/lineage_example.csv` - Multiple lineage relationships with metadata
1475
+
1476
+ ### Lineage Documentation
1477
+
1478
+ - **Comprehensive Guide:** `doc/guides/lineage-csv-import.md` - Complete lineage CSV import documentation
1479
+ - CSV format specification with required/optional columns
1480
+ - Command examples for validate, import, sample, templates
1481
+ - Workflow recommendations and troubleshooting
1482
+ - Advanced scenarios with metadata and multiple transformations
1483
+
1484
+ ### Bulk Delete Scripts
1485
+
1486
+ - **PowerShell:** `scripts/delete-all-uc-terms.ps1` - Full-featured with confirmation prompts
1487
+ - **Python:** `scripts/delete_all_uc_terms_v2.py` - Rich progress bars and error handling
1488
+
1489
+ ### Test Scripts
1490
+
1491
+ - **PowerShell:** `scripts/test-json-output.ps1` - Validates JSON output parsing
1492
+
1493
+ ### Jupyter Notebooks
1494
+
1495
+ - `samples/notebooks (plus)/unified_catalog_terms_examples.ipynb` - Complete examples including:
1496
+ - Examples 10-16: Bulk import demonstrations
1497
+ - Code generation for CSV/JSON files
1498
+ - Dry-run and actual import examples
1499
+ - Term verification workflows
1500
+
1501
+ ---
1502
+
1503
+ ## Documentation
1504
+
1505
+ ### Core Documentation
1506
+
1507
+ - **Main Documentation:** [`doc/README.md`](doc/README.md)
1508
+ - **Unified Catalog:** [`doc/commands/unified-catalog.md`](doc/commands/unified-catalog.md)
1509
+ - **Bulk Import Guide:** [`doc/commands/unified-catalog/term-bulk-import.md`](doc/commands/unified-catalog/term-bulk-import.md)
1510
+ - **Data Products:** [`doc/commands/data-product.md`](doc/commands/data-product.md)
1511
+
1512
+ ### Quick Reference
1513
+
1514
+ - **API Coverage:** All major Purview APIs including Unified Catalog, Data Map, Discovery, Collections
1515
+ - **Authentication:** Azure CLI, Service Principal, Managed Identity support
1516
+ - **Output Formats:** Table (default), JSON (plain), JSONC (colored)
1517
+ - **Bulk Operations:** Import/export terms from CSV/JSON, bulk delete scripts
1518
+
1519
+ ---
1520
+
1521
+ ## Recent Updates (October 2025)
1522
+
1523
+ ### Bulk Term Import/Export
1524
+
1525
+ - Import multiple terms from CSV or JSON files
1526
+ - Dry-run mode for validation before import
1527
+ - Support for owners (Entra ID GUIDs), acronyms, resources
1528
+ - Progress tracking and detailed error reporting
1529
+ - 100% success rate in testing (8/8 terms)
1530
+
1531
+ ### PowerShell & Scripting Integration
1532
+
1533
+ - New `--output` parameter with table/json/jsonc formats
1534
+ - Plain JSON works with PowerShell's `ConvertFrom-Json`
1535
+ - Compatible with jq, Python json module, and other tools
1536
+ - Migration from deprecated `--json` flag
1537
+
1538
+ ### Bulk Delete Scripts
1539
+
1540
+ - PowerShell script with interactive confirmation ("DELETE" to confirm)
1541
+ - Python script with Rich progress bars
1542
+ - Beautiful UI with colored output
1543
+ - Success/failure tracking per term
1544
+ - Rate limiting (200ms delay)
1545
+
1546
+ ### Critical Fixes (v1.2.8)
1547
+
1548
+ - **Search API Suggest/Autocomplete:** Fixed HTTP 400 errors by removing empty filter objects from payload
1549
+ - **Collection Display:** Enhanced collection name detection with proper fallback logic (isinstance checks)
1550
+ - **Owner ID Format:** Must use Entra ID Object IDs (GUIDs), not email addresses
1551
+ - **Domain Status:** Terms cannot be "Published" in unpublished domains - use "Draft"
1552
+ - **Error Validation:** Enhanced error handling shows actual API responses
1553
+ - **Windows Console Compatibility:** All emoji removed for CP-1252 encoding support
1554
+
1555
+ ---
1556
+
1557
+ ## Key Features Summary
1558
+
1559
+ ### **Unified Catalog (UC) - Complete Management**
1560
+
1561
+ - Governance domains, glossary terms, data products
1562
+ - Objectives & Key Results (OKRs), Critical Data Elements (CDEs)
1563
+ - Health monitoring and workflow automation
1564
+ - Full CRUD operations with smart partial updates
1565
+
1566
+ ### **Bulk Operations**
1567
+
1568
+ - CSV/JSON import with dry-run validation
1569
+ - PowerShell and Python bulk delete scripts
1570
+ - Progress tracking and error handling
1571
+ - Sample files and templates included
1572
+
1573
+ ### **Multiple Output Formats**
1574
+
1575
+ - Table format for human viewing (default)
1576
+ - Plain JSON for PowerShell/bash scripting
1577
+ - Colored JSON for visual inspection
1578
+
1579
+ ### **Automation & Integration**
1580
+
1581
+ - Azure CLI, Service Principal, Managed Identity auth
1582
+ - Works in local development, CI/CD, and production
1583
+ - Compatible with PowerShell, bash, Python, jq
1584
+
1585
+ ### **Comprehensive Documentation**
1586
+
1587
+ - Complete API coverage documentation
1588
+ - Jupyter notebook examples
1589
+ - Troubleshooting guides
1590
+ - Sample files and templates
1591
+
1592
+ ---
1593
+
1594
+ ## Contributing & Support
1595
+
1596
+ - **Documentation:** [Full Documentation](https://github.com/Keayoub/Purview_cli/blob/main/doc/README.md)
1597
+ - **Issue Tracker:** [GitHub Issues](https://github.com/Keayoub/Purview_cli/issues)
1598
+ - **Email Support:** [keayoub@msn.com](mailto:keayoub@msn.com)
1599
+ - **Repository:** [GitHub - Keayoub/Purview_cli](https://github.com/Keayoub/Purview_cli)
1600
+
1601
+ ---
1602
+
1603
+ ## License
1604
+
1605
+ See [LICENSE](LICENSE) file for details.
1606
+
1607
+ ---
1608
+
1609
+ **PVW CLI v1.2.8 empowers data engineers, stewards, and architects to automate, scale, and enhance their Microsoft Purview experience with powerful command-line and programmatic capabilities.**
1610
+
1611
+ **Latest in v1.2.8:**
1612
+
1613
+ - Fixed Search API suggest/autocomplete (HTTP 400 errors resolved)
1614
+ - Enhanced collection display with robust fallback logic
1615
+ - Comprehensive search command validation
1616
+ - Bulk term import/export with dry-run support
1617
+ - PowerShell integration with plain JSON output
1618
+ - Multiple output formats and beautiful progress tracking