pvw-cli 1.0.14__tar.gz → 1.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pvw-cli might be problematic. Click here for more details.
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/PKG-INFO +6 -6
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/README.md +4 -4
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/__init__.py +1 -1
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/cli/entity.py +137 -31
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/cli/search.py +235 -33
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/cli/unified_catalog.py +20 -6
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/_entity.py +35 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/_unified_catalog.py +7 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/pvw_cli.egg-info/PKG-INFO +6 -6
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/pvw_cli.egg-info/requires.txt +1 -1
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/pyproject.toml +2 -5
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/__main__.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/cli/__init__.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/cli/account.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/cli/cli.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/cli/collections.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/cli/domain.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/cli/glossary.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/cli/health.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/cli/insight.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/cli/lineage.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/cli/management.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/cli/policystore.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/cli/relationship.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/cli/scan.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/cli/share.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/cli/types.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/cli/workflow.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/__init__.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/_account.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/_collections.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/_domain.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/_glossary.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/_health.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/_insight.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/_lineage.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/_management.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/_policystore.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/_relationship.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/_scan.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/_search.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/_share.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/_types.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/_workflow.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/api_client.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/business_rules.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/config.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/data_quality.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/endpoint.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/endpoints.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/exceptions.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/lineage_visualization.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/monitoring_dashboard.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/rate_limiter.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/retry_handler.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/scanning_operations.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/settings.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/client/sync_client.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/plugins/__init__.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/purviewcli/plugins/plugin_system.py +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/pvw_cli.egg-info/SOURCES.txt +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/pvw_cli.egg-info/dependency_links.txt +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/pvw_cli.egg-info/entry_points.txt +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/pvw_cli.egg-info/not-zip-safe +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/pvw_cli.egg-info/top_level.txt +0 -0
- {pvw_cli-1.0.14 → pvw_cli-1.2.2}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pvw-cli
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.2
|
|
4
4
|
Summary: Microsoft Purview CLI with comprehensive automation capabilities
|
|
5
5
|
Author-email: AYOUB KEBAILI <keayoub@msn.com>
|
|
6
6
|
Maintainer-email: AYOUB KEBAILI <keayoub@msn.com>
|
|
@@ -34,7 +34,7 @@ Requires-Dist: rich>=12.0.0
|
|
|
34
34
|
Requires-Dist: requests>=2.28.0
|
|
35
35
|
Requires-Dist: pandas>=1.5.0
|
|
36
36
|
Requires-Dist: aiohttp>=3.8.0
|
|
37
|
-
Requires-Dist: pydantic<
|
|
37
|
+
Requires-Dist: pydantic<2.12,>=1.10.0
|
|
38
38
|
Requires-Dist: PyYAML>=6.0
|
|
39
39
|
Requires-Dist: cryptography<46.0.0,>=41.0.5
|
|
40
40
|
Provides-Extra: dev
|
|
@@ -56,7 +56,7 @@ Requires-Dist: pytest-asyncio>=0.20.0; extra == "test"
|
|
|
56
56
|
Requires-Dist: pytest-cov>=4.0.0; extra == "test"
|
|
57
57
|
Requires-Dist: requests-mock>=1.9.0; extra == "test"
|
|
58
58
|
|
|
59
|
-
# PURVIEW CLI v1.
|
|
59
|
+
# PURVIEW CLI v1.2.1 - Microsoft Purview Automation & Data Governance
|
|
60
60
|
|
|
61
61
|
> **LATEST UPDATE (October 2025):**
|
|
62
62
|
> - **� NEW: Bulk Term Import/Export** - Import multiple terms from CSV/JSON with dry-run support
|
|
@@ -72,7 +72,7 @@ Requires-Dist: requests-mock>=1.9.0; extra == "test"
|
|
|
72
72
|
|
|
73
73
|
## What is PVW CLI?
|
|
74
74
|
|
|
75
|
-
**PVW CLI v1.
|
|
75
|
+
**PVW CLI v1.2.1** is a modern, full-featured command-line interface and Python library for Microsoft Purview. It enables automation and management of *all major Purview APIs* including:
|
|
76
76
|
|
|
77
77
|
- **Unified Catalog (UC) Management** - Complete governance domains, glossary terms, data products, OKRs, CDEs
|
|
78
78
|
- **Bulk Operations** - Import/export terms from CSV/JSON, bulk delete scripts with progress tracking
|
|
@@ -164,7 +164,7 @@ For more advanced usage, see the documentation in `doc/` or the project docs: ht
|
|
|
164
164
|
|
|
165
165
|
## Overview
|
|
166
166
|
|
|
167
|
-
**PVW CLI v1.
|
|
167
|
+
**PVW CLI v1.2.1** is a modern command-line interface and Python library for Microsoft Purview, enabling:
|
|
168
168
|
|
|
169
169
|
- Advanced data catalog search and discovery
|
|
170
170
|
- Bulk import/export of entities, glossary terms, and lineage
|
|
@@ -1203,6 +1203,6 @@ See [LICENSE](LICENSE) file for details.
|
|
|
1203
1203
|
|
|
1204
1204
|
---
|
|
1205
1205
|
|
|
1206
|
-
**PVW CLI v1.
|
|
1206
|
+
**PVW CLI v1.2.1 empowers data engineers, stewards, and architects to automate, scale, and enhance their Microsoft Purview experience with powerful command-line and programmatic capabilities.**
|
|
1207
1207
|
|
|
1208
1208
|
**Latest Features:** Bulk term import/export, PowerShell integration, multiple output formats, and comprehensive bulk delete scripts with beautiful progress tracking.
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# PURVIEW CLI v1.
|
|
1
|
+
# PURVIEW CLI v1.2.1 - Microsoft Purview Automation & Data Governance
|
|
2
2
|
|
|
3
3
|
> **LATEST UPDATE (October 2025):**
|
|
4
4
|
> - **� NEW: Bulk Term Import/Export** - Import multiple terms from CSV/JSON with dry-run support
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
## What is PVW CLI?
|
|
16
16
|
|
|
17
|
-
**PVW CLI v1.
|
|
17
|
+
**PVW CLI v1.2.1** is a modern, full-featured command-line interface and Python library for Microsoft Purview. It enables automation and management of *all major Purview APIs* including:
|
|
18
18
|
|
|
19
19
|
- **Unified Catalog (UC) Management** - Complete governance domains, glossary terms, data products, OKRs, CDEs
|
|
20
20
|
- **Bulk Operations** - Import/export terms from CSV/JSON, bulk delete scripts with progress tracking
|
|
@@ -106,7 +106,7 @@ For more advanced usage, see the documentation in `doc/` or the project docs: ht
|
|
|
106
106
|
|
|
107
107
|
## Overview
|
|
108
108
|
|
|
109
|
-
**PVW CLI v1.
|
|
109
|
+
**PVW CLI v1.2.1** is a modern command-line interface and Python library for Microsoft Purview, enabling:
|
|
110
110
|
|
|
111
111
|
- Advanced data catalog search and discovery
|
|
112
112
|
- Bulk import/export of entities, glossary terms, and lineage
|
|
@@ -1145,6 +1145,6 @@ See [LICENSE](LICENSE) file for details.
|
|
|
1145
1145
|
|
|
1146
1146
|
---
|
|
1147
1147
|
|
|
1148
|
-
**PVW CLI v1.
|
|
1148
|
+
**PVW CLI v1.2.1 empowers data engineers, stewards, and architects to automate, scale, and enhance their Microsoft Purview experience with powerful command-line and programmatic capabilities.**
|
|
1149
1149
|
|
|
1150
1150
|
**Latest Features:** Bulk term import/export, PowerShell integration, multiple output formats, and comprehensive bulk delete scripts with beautiful progress tracking.
|
|
@@ -1689,44 +1689,150 @@ def bulk_update_csv(ctx, csv_file, batch_size, dry_run, error_csv):
|
|
|
1689
1689
|
return
|
|
1690
1690
|
|
|
1691
1691
|
df = pd.read_csv(csv_file)
|
|
1692
|
-
if
|
|
1693
|
-
console.print("[
|
|
1692
|
+
if df.empty:
|
|
1693
|
+
console.print("[yellow]No rows found in CSV. Exiting.[/yellow]")
|
|
1694
1694
|
return
|
|
1695
|
+
|
|
1695
1696
|
entity_client = Entity()
|
|
1696
1697
|
total = len(df)
|
|
1697
1698
|
success, failed = 0, 0
|
|
1698
1699
|
errors = []
|
|
1699
1700
|
failed_rows = []
|
|
1701
|
+
|
|
1702
|
+
# Determine mode:
|
|
1703
|
+
# - If CSV has both 'typeName' and 'qualifiedName' -> map rows to Purview entities and call bulk create-or-update
|
|
1704
|
+
# - Else if CSV has 'guid' -> build guid-based payloads (preferred for partial attribute updates)
|
|
1705
|
+
has_type_qn = ("typeName" in df.columns and "qualifiedName" in df.columns)
|
|
1706
|
+
has_guid = "guid" in df.columns
|
|
1707
|
+
|
|
1700
1708
|
for i in range(0, total, batch_size):
|
|
1701
|
-
batch = df.iloc[i:i+batch_size]
|
|
1702
|
-
|
|
1703
|
-
|
|
1704
|
-
|
|
1705
|
-
|
|
1706
|
-
|
|
1707
|
-
|
|
1708
|
-
|
|
1709
|
-
|
|
1710
|
-
|
|
1711
|
-
|
|
1712
|
-
|
|
1713
|
-
|
|
1714
|
-
|
|
1715
|
-
|
|
1716
|
-
|
|
1717
|
-
|
|
1718
|
-
|
|
1719
|
-
|
|
1720
|
-
|
|
1709
|
+
batch = df.iloc[i : i + batch_size]
|
|
1710
|
+
|
|
1711
|
+
if has_type_qn:
|
|
1712
|
+
# Map flat rows to Purview entity objects using helper
|
|
1713
|
+
from purviewcli.client._entity import map_flat_entity_to_purview_entity
|
|
1714
|
+
|
|
1715
|
+
entities = [map_flat_entity_to_purview_entity(row) for _, row in batch.iterrows()]
|
|
1716
|
+
payload = {"entities": entities}
|
|
1717
|
+
|
|
1718
|
+
if dry_run:
|
|
1719
|
+
console.print(f"[blue]DRY RUN: Would bulk-create/update batch {i//batch_size+1} with {len(batch)} entities[/blue]")
|
|
1720
|
+
continue
|
|
1721
|
+
|
|
1722
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False, encoding="utf-8") as tmpf:
|
|
1723
|
+
json.dump(payload, tmpf, indent=2)
|
|
1724
|
+
tmpf.flush()
|
|
1725
|
+
payload_file = tmpf.name
|
|
1726
|
+
|
|
1727
|
+
try:
|
|
1728
|
+
args = {"--payloadFile": payload_file}
|
|
1729
|
+
result = entity_client.entityCreateBulk(args)
|
|
1730
|
+
if result and (not isinstance(result, dict) or result.get("status") != "error"):
|
|
1731
|
+
success += len(batch)
|
|
1732
|
+
else:
|
|
1733
|
+
failed += len(batch)
|
|
1734
|
+
errors.append(f"Batch {i//batch_size+1}: {result}")
|
|
1735
|
+
failed_rows.extend(batch.to_dict(orient="records"))
|
|
1736
|
+
except Exception as e:
|
|
1721
1737
|
failed += len(batch)
|
|
1722
|
-
errors.append(f"Batch {i//batch_size+1}: {
|
|
1738
|
+
errors.append(f"Batch {i//batch_size+1}: {str(e)}")
|
|
1723
1739
|
failed_rows.extend(batch.to_dict(orient="records"))
|
|
1724
|
-
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
|
|
1728
|
-
|
|
1729
|
-
|
|
1740
|
+
finally:
|
|
1741
|
+
try:
|
|
1742
|
+
os.remove(payload_file)
|
|
1743
|
+
except Exception:
|
|
1744
|
+
pass
|
|
1745
|
+
|
|
1746
|
+
elif has_guid:
|
|
1747
|
+
# Build guid-based updates. If the CSV contains only guid + attr columns, we'll attempt to perform
|
|
1748
|
+
# partial attribute updates by calling entityPartialUpdateAttribute where possible.
|
|
1749
|
+
# If a row contains multiple attributes, we will call entityCreateBulk with a payload containing
|
|
1750
|
+
# the guid and attributes (server supports bulk create-or-update by guid in some endpoints).
|
|
1751
|
+
|
|
1752
|
+
# Normalize rows into dicts
|
|
1753
|
+
rows = [row.to_dict() for _, row in batch.iterrows()]
|
|
1754
|
+
|
|
1755
|
+
# Attempt to detect single-attribute update pattern: columns [guid, attrName, attrValue]
|
|
1756
|
+
if set(["guid", "attrName", "attrValue"]).issubset(set(batch.columns)):
|
|
1757
|
+
# perform per-guid partial updates in batch
|
|
1758
|
+
for r in rows:
|
|
1759
|
+
guid = str(r.get("guid"))
|
|
1760
|
+
attr_name = r.get("attrName")
|
|
1761
|
+
attr_value = r.get("attrValue")
|
|
1762
|
+
if pd.isna(guid) or pd.isna(attr_name):
|
|
1763
|
+
failed += 1
|
|
1764
|
+
failed_rows.append(r)
|
|
1765
|
+
continue
|
|
1766
|
+
if dry_run:
|
|
1767
|
+
console.print(f"[blue]DRY RUN: Would update GUID {guid} set {attr_name}={attr_value}[/blue]")
|
|
1768
|
+
success += 1
|
|
1769
|
+
continue
|
|
1770
|
+
try:
|
|
1771
|
+
args = {"--guid": [guid], "--attrName": attr_name, "--attrValue": attr_value}
|
|
1772
|
+
result = entity_client.entityPartialUpdateAttribute(args)
|
|
1773
|
+
if result and (not isinstance(result, dict) or result.get("status") != "error"):
|
|
1774
|
+
success += 1
|
|
1775
|
+
else:
|
|
1776
|
+
failed += 1
|
|
1777
|
+
errors.append(f"GUID {guid}: {result}")
|
|
1778
|
+
failed_rows.append(r)
|
|
1779
|
+
except Exception as e:
|
|
1780
|
+
failed += 1
|
|
1781
|
+
errors.append(f"GUID {guid}: {str(e)}")
|
|
1782
|
+
failed_rows.append(r)
|
|
1783
|
+
|
|
1784
|
+
else:
|
|
1785
|
+
# Fallback: call bulk create-or-update with guid included in each entity object.
|
|
1786
|
+
# Map each row into an entity dict keeping non-null columns.
|
|
1787
|
+
entities = []
|
|
1788
|
+
for r in rows:
|
|
1789
|
+
if pd.isna(r.get("guid")):
|
|
1790
|
+
failed_rows.append(r)
|
|
1791
|
+
failed += 1
|
|
1792
|
+
continue
|
|
1793
|
+
ent = {k: v for k, v in r.items() if pd.notnull(v)}
|
|
1794
|
+
# ensure guid is string under top-level 'guid' field for server bulk endpoints
|
|
1795
|
+
ent["guid"] = str(ent.get("guid"))
|
|
1796
|
+
entities.append(ent)
|
|
1797
|
+
|
|
1798
|
+
if not entities:
|
|
1799
|
+
continue
|
|
1800
|
+
|
|
1801
|
+
payload = {"entities": entities}
|
|
1802
|
+
if dry_run:
|
|
1803
|
+
console.print(f"[blue]DRY RUN: Would bulk-update (by guid) batch {i//batch_size+1} with {len(entities)} entities[/blue]")
|
|
1804
|
+
success += len(entities)
|
|
1805
|
+
continue
|
|
1806
|
+
|
|
1807
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False, encoding="utf-8") as tmpf:
|
|
1808
|
+
json.dump(payload, tmpf, indent=2)
|
|
1809
|
+
tmpf.flush()
|
|
1810
|
+
payload_file = tmpf.name
|
|
1811
|
+
|
|
1812
|
+
try:
|
|
1813
|
+
args = {"--payloadFile": payload_file}
|
|
1814
|
+
# Use the create-or-update bulk endpoint - server will use guid when present
|
|
1815
|
+
result = entity_client.entityCreateBulk(args)
|
|
1816
|
+
if result and (not isinstance(result, dict) or result.get("status") != "error"):
|
|
1817
|
+
success += len(entities)
|
|
1818
|
+
else:
|
|
1819
|
+
failed += len(entities)
|
|
1820
|
+
errors.append(f"Batch {i//batch_size+1}: {result}")
|
|
1821
|
+
failed_rows.extend(batch.to_dict(orient="records"))
|
|
1822
|
+
except Exception as e:
|
|
1823
|
+
failed += len(entities)
|
|
1824
|
+
errors.append(f"Batch {i//batch_size+1}: {str(e)}")
|
|
1825
|
+
failed_rows.extend(batch.to_dict(orient="records"))
|
|
1826
|
+
finally:
|
|
1827
|
+
try:
|
|
1828
|
+
os.remove(payload_file)
|
|
1829
|
+
except Exception:
|
|
1830
|
+
pass
|
|
1831
|
+
|
|
1832
|
+
else:
|
|
1833
|
+
console.print(f"[red][X] CSV must contain either (typeName and qualifiedName) or guid column[/red]")
|
|
1834
|
+
return
|
|
1835
|
+
|
|
1730
1836
|
console.print(f"[green][OK] Bulk update completed. Success: {success}, Failed: {failed}[/green]")
|
|
1731
1837
|
if errors:
|
|
1732
1838
|
console.print("[red]Errors:[/red]")
|
|
@@ -1734,7 +1840,7 @@ def bulk_update_csv(ctx, csv_file, batch_size, dry_run, error_csv):
|
|
|
1734
1840
|
console.print(f"[red]- {err}[/red]")
|
|
1735
1841
|
if error_csv and failed_rows:
|
|
1736
1842
|
pd.DataFrame(failed_rows).to_csv(error_csv, index=False)
|
|
1737
|
-
console.print(f"[yellow]
|
|
1843
|
+
console.print(f"[yellow]WARNING: Failed rows written to {error_csv}[/yellow]")
|
|
1738
1844
|
except Exception as e:
|
|
1739
1845
|
console.print(f"[red][X] Error executing entity bulk-update-csv: {str(e)}[/red]")
|
|
1740
1846
|
|
|
@@ -1774,7 +1880,7 @@ def bulk_delete_csv(ctx, csv_file, batch_size, dry_run, error_csv):
|
|
|
1774
1880
|
continue
|
|
1775
1881
|
try:
|
|
1776
1882
|
args = {"--guid": guids}
|
|
1777
|
-
result = entity_client.
|
|
1883
|
+
result = entity_client.entityDeleteBulk(args)
|
|
1778
1884
|
if result and (not isinstance(result, dict) or result.get("status") != "error"):
|
|
1779
1885
|
success += len(guids)
|
|
1780
1886
|
else:
|
|
@@ -139,12 +139,8 @@ def _format_search_results(data, show_ids=False):
|
|
|
139
139
|
table = Table(title=f"Search Results ({len(items)} of {count} total)")
|
|
140
140
|
table.add_column("Name", style="cyan", min_width=15, max_width=25)
|
|
141
141
|
table.add_column("Type", style="green", min_width=15, max_width=20)
|
|
142
|
+
table.add_column("ID", style="yellow", min_width=36, max_width=36)
|
|
142
143
|
table.add_column("Collection", style="blue", min_width=12, max_width=20)
|
|
143
|
-
table.add_column("Classifications", style="magenta", min_width=15, max_width=30)
|
|
144
|
-
|
|
145
|
-
if show_ids:
|
|
146
|
-
table.add_column("ID", style="yellow", min_width=36, max_width=36)
|
|
147
|
-
|
|
148
144
|
table.add_column("Qualified Name", style="white", min_width=30)
|
|
149
145
|
|
|
150
146
|
for item in items:
|
|
@@ -158,34 +154,17 @@ def _format_search_results(data, show_ids=False):
|
|
|
158
154
|
if len(qualified_name) > 60:
|
|
159
155
|
qualified_name = qualified_name[:57] + "..."
|
|
160
156
|
|
|
161
|
-
# Handle collection
|
|
157
|
+
# Handle collection - try multiple sources
|
|
162
158
|
collection = 'N/A'
|
|
163
159
|
if 'collection' in item and item['collection']:
|
|
164
160
|
collection = item['collection'].get('name', 'N/A')
|
|
161
|
+
elif 'collectionId' in item:
|
|
162
|
+
collection = item.get('collectionId', 'N/A')
|
|
163
|
+
elif 'assetName' in item:
|
|
164
|
+
collection = item.get('assetName', 'N/A')
|
|
165
165
|
|
|
166
|
-
#
|
|
167
|
-
|
|
168
|
-
if 'classification' in item and item['classification']:
|
|
169
|
-
for cls in item['classification']:
|
|
170
|
-
if isinstance(cls, dict):
|
|
171
|
-
cls_name = cls.get('typeName', str(cls))
|
|
172
|
-
# Simplify Microsoft classifications for display
|
|
173
|
-
if cls_name.startswith('MICROSOFT.'):
|
|
174
|
-
cls_name = cls_name.replace('MICROSOFT.', 'MS.')
|
|
175
|
-
classifications.append(cls_name)
|
|
176
|
-
else:
|
|
177
|
-
classifications.append(str(cls))
|
|
178
|
-
|
|
179
|
-
# Truncate classifications if too long
|
|
180
|
-
cls_display = ", ".join(classifications) if classifications else ""
|
|
181
|
-
if len(cls_display) > 40:
|
|
182
|
-
cls_display = cls_display[:37] + "..."
|
|
183
|
-
|
|
184
|
-
# Build row data
|
|
185
|
-
row_data = [name, entity_type, collection, cls_display]
|
|
186
|
-
if show_ids:
|
|
187
|
-
row_data.append(entity_id)
|
|
188
|
-
row_data.append(qualified_name)
|
|
166
|
+
# Build row data with ID always shown
|
|
167
|
+
row_data = [name, entity_type, entity_id, collection, qualified_name]
|
|
189
168
|
|
|
190
169
|
# Add row to table
|
|
191
170
|
table.add_row(*row_data)
|
|
@@ -214,9 +193,9 @@ def _invoke_search_method(method_name, **kwargs):
|
|
|
214
193
|
# Choose output format
|
|
215
194
|
if output_json:
|
|
216
195
|
_format_json_output(result)
|
|
217
|
-
elif detailed and method_name in ['searchQuery', 'searchBrowse', 'searchSuggest', '
|
|
196
|
+
elif detailed and method_name in ['searchQuery', 'searchBrowse', 'searchSuggest', 'searchAutocomplete', 'searchFaceted']:
|
|
218
197
|
_format_detailed_output(result)
|
|
219
|
-
elif method_name in ['searchQuery', 'searchBrowse', 'searchSuggest', '
|
|
198
|
+
elif method_name in ['searchQuery', 'searchBrowse', 'searchSuggest', 'searchAutocomplete', 'searchFaceted']:
|
|
220
199
|
_format_search_results(result, show_ids=show_ids)
|
|
221
200
|
else:
|
|
222
201
|
_format_json_output(result)
|
|
@@ -230,7 +209,7 @@ def _invoke_search_method(method_name, **kwargs):
|
|
|
230
209
|
@click.option('--json', 'output_json', is_flag=True, help='Show full JSON details instead of table')
|
|
231
210
|
def autocomplete(keywords, limit, filterfile, output_json):
|
|
232
211
|
"""Autocomplete search suggestions"""
|
|
233
|
-
_invoke_search_method('
|
|
212
|
+
_invoke_search_method('searchAutocomplete', keywords=keywords, limit=limit, filterFile=filterfile, output_json=output_json)
|
|
234
213
|
|
|
235
214
|
@search.command()
|
|
236
215
|
@click.option('--entityType', required=False)
|
|
@@ -305,7 +284,7 @@ def advanced(keywords, limit, offset, filterfile, facets_file, businessmetadata,
|
|
|
305
284
|
with open(businessmetadata, 'r', encoding='utf-8') as f:
|
|
306
285
|
business_metadata_content = json.load(f)
|
|
307
286
|
_invoke_search_method(
|
|
308
|
-
'
|
|
287
|
+
'searchAdvanced',
|
|
309
288
|
keywords=keywords,
|
|
310
289
|
limit=limit,
|
|
311
290
|
offset=offset,
|
|
@@ -316,4 +295,227 @@ def advanced(keywords, limit, offset, filterfile, facets_file, businessmetadata,
|
|
|
316
295
|
termAssignments=termassignments
|
|
317
296
|
)
|
|
318
297
|
|
|
298
|
+
@search.command('find-table')
|
|
299
|
+
@click.option('--name', required=False, help='Table name (e.g., Address)')
|
|
300
|
+
@click.option('--schema', required=False, help='Schema name (e.g., SalesLT, dbo)')
|
|
301
|
+
@click.option('--database', required=False, help='Database name (e.g., Adventureworks)')
|
|
302
|
+
@click.option('--server', required=False, help='Server name (e.g., fabricdemos001.database.windows.net)')
|
|
303
|
+
@click.option('--qualified-name', required=False, help='Full qualified name from Purview (e.g., mssql://server/database/schema/table)')
|
|
304
|
+
@click.option('--entity-type', required=False, help='Entity type to search for (e.g., azure_sql_table, mssql_table)')
|
|
305
|
+
@click.option('--limit', required=False, type=int, default=25, help='Maximum number of results to return')
|
|
306
|
+
@click.option('--show-ids', is_flag=True, help='Show entity IDs in the results')
|
|
307
|
+
@click.option('--json', 'output_json', is_flag=True, help='Show full JSON details')
|
|
308
|
+
@click.option('--detailed', is_flag=True, help='Show detailed information')
|
|
309
|
+
@click.option('--id-only', is_flag=True, help='Output only the GUID (useful for scripts and automation)')
|
|
310
|
+
def find_table(name, schema, database, server, qualified_name, entity_type, limit, show_ids, output_json, detailed, id_only):
|
|
311
|
+
"""Find a table by name, schema, database, or get all tables in a schema/database.
|
|
312
|
+
|
|
313
|
+
Perfect for getting the GUID of a data asset before updating it.
|
|
314
|
+
You can search for ONE specific table or ALL tables in a schema/database.
|
|
315
|
+
|
|
316
|
+
\b
|
|
317
|
+
SEARCH ONE SPECIFIC TABLE:
|
|
318
|
+
pvw search find-table --name Address --schema SalesLT --database Adventureworks
|
|
319
|
+
pvw search find-table --qualified-name "mssql://server/database/schema/table"
|
|
320
|
+
|
|
321
|
+
\b
|
|
322
|
+
SEARCH MULTIPLE TABLES:
|
|
323
|
+
pvw search find-table --schema SalesLT --database Adventureworks
|
|
324
|
+
pvw search find-table --database Adventureworks
|
|
325
|
+
pvw search find-table --schema SalesLT
|
|
326
|
+
|
|
327
|
+
\b
|
|
328
|
+
GET GUIDS FOR AUTOMATION:
|
|
329
|
+
pvw search find-table --name Address --schema SalesLT --database Adventureworks --id-only
|
|
330
|
+
pvw search find-table --schema SalesLT --database Adventureworks --id-only
|
|
331
|
+
|
|
332
|
+
\b
|
|
333
|
+
USE IN SCRIPTS (PowerShell):
|
|
334
|
+
$guid = pvw search find-table --name Address --schema SalesLT --database Adventureworks --id-only
|
|
335
|
+
pvw entity update --guid $guid --payload update.json
|
|
336
|
+
|
|
337
|
+
$guids = pvw search find-table --schema SalesLT --database Adventureworks --id-only
|
|
338
|
+
foreach ($guid in $guids) { pvw entity update --guid $guid --payload update.json }
|
|
339
|
+
"""
|
|
340
|
+
search_client = Search()
|
|
341
|
+
|
|
342
|
+
# Validate that at least some search criteria is provided
|
|
343
|
+
if not name and not qualified_name and not schema and not database:
|
|
344
|
+
console.print("[red]ERROR:[/red] You must provide at least --name, --qualified-name, --schema, or --database")
|
|
345
|
+
return
|
|
346
|
+
|
|
347
|
+
# Build search pattern
|
|
348
|
+
search_pattern = qualified_name
|
|
349
|
+
if not search_pattern:
|
|
350
|
+
# Build pattern from components
|
|
351
|
+
# Try to build a full qualified name pattern that matches Purview's format
|
|
352
|
+
if server and database and schema and name:
|
|
353
|
+
# Full path with server: mssql://server/database/schema/table
|
|
354
|
+
search_pattern = f"mssql://{server}/{database}/{schema}/{name}"
|
|
355
|
+
elif database and schema and name:
|
|
356
|
+
# Database.schema.table format
|
|
357
|
+
search_pattern = f"{database}/{schema}/{name}"
|
|
358
|
+
elif database and schema:
|
|
359
|
+
# Database.schema format (all tables in schema)
|
|
360
|
+
search_pattern = f"{database}/{schema}"
|
|
361
|
+
elif schema and name:
|
|
362
|
+
# Schema.table format
|
|
363
|
+
search_pattern = f"{schema}/{name}"
|
|
364
|
+
elif database:
|
|
365
|
+
# Just database (all tables in database)
|
|
366
|
+
search_pattern = database
|
|
367
|
+
elif schema:
|
|
368
|
+
# Just schema (all tables in schema)
|
|
369
|
+
search_pattern = schema
|
|
370
|
+
elif name:
|
|
371
|
+
# Just the table name
|
|
372
|
+
search_pattern = name
|
|
373
|
+
else:
|
|
374
|
+
console.print("[red]ERROR:[/red] You must provide at least one search criterion")
|
|
375
|
+
return
|
|
376
|
+
|
|
377
|
+
# For keyword search, use different strategies based on what we have
|
|
378
|
+
if name:
|
|
379
|
+
search_keywords = name
|
|
380
|
+
elif schema:
|
|
381
|
+
search_keywords = schema
|
|
382
|
+
elif database:
|
|
383
|
+
search_keywords = database
|
|
384
|
+
else:
|
|
385
|
+
search_keywords = search_pattern.split('/')[-1]
|
|
386
|
+
|
|
387
|
+
# Build search arguments - use keywords that will match
|
|
388
|
+
args = {
|
|
389
|
+
'--keywords': search_keywords,
|
|
390
|
+
'--limit': limit,
|
|
391
|
+
'--offset': 0
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
# Create filter for entity type if specified
|
|
395
|
+
import tempfile
|
|
396
|
+
import json
|
|
397
|
+
import os
|
|
398
|
+
|
|
399
|
+
temp_filter_file = None
|
|
400
|
+
|
|
401
|
+
if entity_type:
|
|
402
|
+
filter_obj = {
|
|
403
|
+
'entityType': entity_type
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
# Write filter to temp file
|
|
407
|
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False, encoding='utf-8') as f:
|
|
408
|
+
json.dump(filter_obj, f)
|
|
409
|
+
temp_filter_file = f.name
|
|
410
|
+
|
|
411
|
+
args['--filterFile'] = temp_filter_file
|
|
412
|
+
|
|
413
|
+
try:
|
|
414
|
+
# Execute search
|
|
415
|
+
result = search_client.searchQuery(args)
|
|
416
|
+
|
|
417
|
+
if not result:
|
|
418
|
+
console.print("[yellow]No results returned from search[/yellow]")
|
|
419
|
+
if temp_filter_file:
|
|
420
|
+
os.unlink(temp_filter_file)
|
|
421
|
+
return
|
|
422
|
+
|
|
423
|
+
# Filter results by qualified name match if provided
|
|
424
|
+
if result and 'value' in result and result['value']:
|
|
425
|
+
filtered_results = []
|
|
426
|
+
search_lower = search_pattern.lower()
|
|
427
|
+
|
|
428
|
+
for item in result.get('value', []):
|
|
429
|
+
item_qn = item.get('qualifiedName', '').lower()
|
|
430
|
+
item_name = item.get('name', '').lower()
|
|
431
|
+
|
|
432
|
+
# Build matching criteria
|
|
433
|
+
matches = False
|
|
434
|
+
|
|
435
|
+
# If we have all components, do strict matching
|
|
436
|
+
if name and schema and database:
|
|
437
|
+
# Exact name match (not substring) - critical for precision
|
|
438
|
+
name_match = name.lower() == item_name
|
|
439
|
+
schema_match = schema.lower() in item_qn
|
|
440
|
+
database_match = database.lower() in item_qn
|
|
441
|
+
server_match = not server or server.lower() in item_qn
|
|
442
|
+
matches = name_match and schema_match and database_match and server_match
|
|
443
|
+
|
|
444
|
+
# If we have database and schema (all tables in this schema)
|
|
445
|
+
elif database and schema and not name:
|
|
446
|
+
schema_match = schema.lower() in item_qn
|
|
447
|
+
database_match = database.lower() in item_qn
|
|
448
|
+
server_match = not server or server.lower() in item_qn
|
|
449
|
+
matches = schema_match and database_match and server_match
|
|
450
|
+
|
|
451
|
+
# If we have schema and name
|
|
452
|
+
elif name and schema:
|
|
453
|
+
# Exact name match
|
|
454
|
+
name_match = name.lower() == item_name
|
|
455
|
+
schema_match = schema.lower() in item_qn
|
|
456
|
+
matches = name_match and schema_match
|
|
457
|
+
|
|
458
|
+
# If we have just database (all tables in this database)
|
|
459
|
+
elif database and not name and not schema:
|
|
460
|
+
database_match = database.lower() in item_qn
|
|
461
|
+
server_match = not server or server.lower() in item_qn
|
|
462
|
+
matches = database_match and server_match
|
|
463
|
+
|
|
464
|
+
# If we have just schema (all tables in this schema)
|
|
465
|
+
elif schema and not name and not database:
|
|
466
|
+
schema_match = schema.lower() in item_qn
|
|
467
|
+
matches = schema_match
|
|
468
|
+
|
|
469
|
+
# If we have just name or a qualified name pattern
|
|
470
|
+
elif name or qualified_name:
|
|
471
|
+
# If qualified_name was provided, do exact match
|
|
472
|
+
if qualified_name:
|
|
473
|
+
# Check for exact match of the qualified name
|
|
474
|
+
matches = search_lower == item_qn or item_qn.endswith('/' + search_keywords.lower())
|
|
475
|
+
else:
|
|
476
|
+
# Just name provided, match by name
|
|
477
|
+
matches = search_keywords.lower() == item_name
|
|
478
|
+
|
|
479
|
+
if matches:
|
|
480
|
+
filtered_results.append(item)
|
|
481
|
+
|
|
482
|
+
if filtered_results:
|
|
483
|
+
result['value'] = filtered_results
|
|
484
|
+
result['@search.count'] = len(filtered_results)
|
|
485
|
+
else:
|
|
486
|
+
console.print(f"[yellow]No results found matching '{search_pattern}'[/yellow]")
|
|
487
|
+
if temp_filter_file:
|
|
488
|
+
os.unlink(temp_filter_file)
|
|
489
|
+
return
|
|
490
|
+
|
|
491
|
+
# Display results
|
|
492
|
+
if id_only:
|
|
493
|
+
# Output only the ID(s) for scripting purposes
|
|
494
|
+
if result and 'value' in result and result['value']:
|
|
495
|
+
for item in result['value']:
|
|
496
|
+
print(item.get('id', ''))
|
|
497
|
+
else:
|
|
498
|
+
console.print("[yellow]No results found[/yellow]")
|
|
499
|
+
elif output_json:
|
|
500
|
+
_format_json_output(result)
|
|
501
|
+
elif detailed:
|
|
502
|
+
_format_detailed_output(result)
|
|
503
|
+
else:
|
|
504
|
+
_format_search_results(result, show_ids=show_ids)
|
|
505
|
+
|
|
506
|
+
# Clean up temp file
|
|
507
|
+
if temp_filter_file:
|
|
508
|
+
import os
|
|
509
|
+
os.unlink(temp_filter_file)
|
|
510
|
+
|
|
511
|
+
except Exception as e:
|
|
512
|
+
console.print(f"[red]ERROR:[/red] {str(e)}")
|
|
513
|
+
# Clean up temp file on error
|
|
514
|
+
if temp_filter_file:
|
|
515
|
+
import os
|
|
516
|
+
try:
|
|
517
|
+
os.unlink(temp_filter_file)
|
|
518
|
+
except:
|
|
519
|
+
pass
|
|
520
|
+
|
|
319
521
|
__all__ = ['search']
|
|
@@ -813,6 +813,7 @@ def term():
|
|
|
813
813
|
@click.option("--name", required=True, help="Name of the glossary term")
|
|
814
814
|
@click.option("--description", required=False, default="", help="Rich text description of the term")
|
|
815
815
|
@click.option("--domain-id", required=True, help="Governance domain ID")
|
|
816
|
+
@click.option("--parent-id", required=False, help="Parent term ID (for hierarchical terms)")
|
|
816
817
|
@click.option(
|
|
817
818
|
"--status",
|
|
818
819
|
required=False,
|
|
@@ -834,7 +835,7 @@ def term():
|
|
|
834
835
|
)
|
|
835
836
|
@click.option("--resource-name", required=False, help="Resource name for additional reading (can be specified multiple times)", multiple=True)
|
|
836
837
|
@click.option("--resource-url", required=False, help="Resource URL for additional reading (can be specified multiple times)", multiple=True)
|
|
837
|
-
def create(name, description, domain_id, status, acronym, owner_id, resource_name, resource_url):
|
|
838
|
+
def create(name, description, domain_id, parent_id, status, acronym, owner_id, resource_name, resource_url):
|
|
838
839
|
"""Create a new Unified Catalog term (Governance Domain term)."""
|
|
839
840
|
try:
|
|
840
841
|
client = UnifiedCatalogClient()
|
|
@@ -847,6 +848,8 @@ def create(name, description, domain_id, status, acronym, owner_id, resource_nam
|
|
|
847
848
|
"--status": [status],
|
|
848
849
|
}
|
|
849
850
|
|
|
851
|
+
if parent_id:
|
|
852
|
+
args["--parent-id"] = [parent_id]
|
|
850
853
|
if acronym:
|
|
851
854
|
args["--acronym"] = list(acronym)
|
|
852
855
|
if owner_id:
|
|
@@ -1037,6 +1040,7 @@ def delete(term_id, force):
|
|
|
1037
1040
|
@click.option("--name", required=False, help="Name of the glossary term")
|
|
1038
1041
|
@click.option("--description", required=False, help="Rich text description of the term")
|
|
1039
1042
|
@click.option("--domain-id", required=False, help="Governance domain ID")
|
|
1043
|
+
@click.option("--parent-id", required=False, help="Parent term ID (for hierarchical terms)")
|
|
1040
1044
|
@click.option(
|
|
1041
1045
|
"--status",
|
|
1042
1046
|
required=False,
|
|
@@ -1059,7 +1063,7 @@ def delete(term_id, force):
|
|
|
1059
1063
|
@click.option("--resource-url", required=False, help="Resource URL for additional reading (can be specified multiple times, replaces existing)", multiple=True)
|
|
1060
1064
|
@click.option("--add-acronym", required=False, help="Add acronym to existing ones (can be specified multiple times)", multiple=True)
|
|
1061
1065
|
@click.option("--add-owner-id", required=False, help="Add owner to existing ones (can be specified multiple times)", multiple=True)
|
|
1062
|
-
def update(term_id, name, description, domain_id, status, acronym, owner_id, resource_name, resource_url, add_acronym, add_owner_id):
|
|
1066
|
+
def update(term_id, name, description, domain_id, parent_id, status, acronym, owner_id, resource_name, resource_url, add_acronym, add_owner_id):
|
|
1063
1067
|
"""Update an existing Unified Catalog term."""
|
|
1064
1068
|
try:
|
|
1065
1069
|
client = UnifiedCatalogClient()
|
|
@@ -1073,6 +1077,8 @@ def update(term_id, name, description, domain_id, status, acronym, owner_id, res
|
|
|
1073
1077
|
args["--description"] = [description]
|
|
1074
1078
|
if domain_id:
|
|
1075
1079
|
args["--governance-domain-id"] = [domain_id]
|
|
1080
|
+
if parent_id:
|
|
1081
|
+
args["--parent-id"] = [parent_id]
|
|
1076
1082
|
if status:
|
|
1077
1083
|
args["--status"] = [status]
|
|
1078
1084
|
|
|
@@ -1386,7 +1392,7 @@ def update_terms_from_csv(csv_file, dry_run):
|
|
|
1386
1392
|
"""Bulk update glossary terms from a CSV file.
|
|
1387
1393
|
|
|
1388
1394
|
CSV Format:
|
|
1389
|
-
term_id,name,description,status,acronyms,owner_ids,add_acronyms,add_owner_ids
|
|
1395
|
+
term_id,name,description,status,parent_id,acronyms,owner_ids,add_acronyms,add_owner_ids
|
|
1390
1396
|
|
|
1391
1397
|
Required:
|
|
1392
1398
|
- term_id: The ID of the term to update
|
|
@@ -1395,15 +1401,16 @@ def update_terms_from_csv(csv_file, dry_run):
|
|
|
1395
1401
|
- name: New term name (replaces existing)
|
|
1396
1402
|
- description: New description (replaces existing)
|
|
1397
1403
|
- status: New status (Draft, Published, Archived)
|
|
1404
|
+
- parent_id: Parent term ID for hierarchical relationships (replaces existing)
|
|
1398
1405
|
- acronyms: New acronyms separated by semicolons (replaces all existing)
|
|
1399
1406
|
- owner_ids: New owner IDs separated by semicolons (replaces all existing)
|
|
1400
1407
|
- add_acronyms: Acronyms to add separated by semicolons (preserves existing)
|
|
1401
1408
|
- add_owner_ids: Owner IDs to add separated by semicolons (preserves existing)
|
|
1402
1409
|
|
|
1403
1410
|
Example CSV:
|
|
1404
|
-
term_id,name,description,status,add_acronyms,add_owner_ids
|
|
1405
|
-
abc-123,,Updated description,Published,API;REST,user1@company.com
|
|
1406
|
-
def-456,New Name,,,SQL,
|
|
1411
|
+
term_id,name,description,status,parent_id,add_acronyms,add_owner_ids
|
|
1412
|
+
abc-123,,Updated description,Published,parent-term-guid,API;REST,user1@company.com
|
|
1413
|
+
def-456,New Name,,,parent-term-guid,SQL,
|
|
1407
1414
|
"""
|
|
1408
1415
|
import csv
|
|
1409
1416
|
|
|
@@ -1440,6 +1447,8 @@ def update_terms_from_csv(csv_file, dry_run):
|
|
|
1440
1447
|
changes.append(f"desc: {update['description'][:50]}...")
|
|
1441
1448
|
if update.get('status', '').strip():
|
|
1442
1449
|
changes.append(f"status: {update['status']}")
|
|
1450
|
+
if update.get('parent_id', '').strip():
|
|
1451
|
+
changes.append(f"parent: {update['parent_id'][:20]}...")
|
|
1443
1452
|
if update.get('acronyms', '').strip():
|
|
1444
1453
|
changes.append(f"acronyms: {update['acronyms']}")
|
|
1445
1454
|
if update.get('add_acronyms', '').strip():
|
|
@@ -1479,6 +1488,8 @@ def update_terms_from_csv(csv_file, dry_run):
|
|
|
1479
1488
|
args['--description'] = [update['description'].strip()]
|
|
1480
1489
|
if update.get('status', '').strip():
|
|
1481
1490
|
args['--status'] = [update['status'].strip()]
|
|
1491
|
+
if update.get('parent_id', '').strip():
|
|
1492
|
+
args['--parent-id'] = [update['parent_id'].strip()]
|
|
1482
1493
|
if update.get('acronyms', '').strip():
|
|
1483
1494
|
args['--acronym'] = [a.strip() for a in update['acronyms'].split(';') if a.strip()]
|
|
1484
1495
|
if update.get('owner_ids', '').strip():
|
|
@@ -1537,6 +1548,7 @@ def update_terms_from_json(json_file, dry_run):
|
|
|
1537
1548
|
"name": "New Name", // Optional: Replace name
|
|
1538
1549
|
"description": "New description", // Optional: Replace description
|
|
1539
1550
|
"status": "Published", // Optional: Change status
|
|
1551
|
+
"parent_id": "parent-term-guid", // Optional: Set parent term (hierarchical)
|
|
1540
1552
|
"acronyms": ["API", "REST"], // Optional: Replace all acronyms
|
|
1541
1553
|
"owner_ids": ["user@company.com"], // Optional: Replace all owners
|
|
1542
1554
|
"add_acronyms": ["SQL"], // Optional: Add acronyms (preserves existing)
|
|
@@ -1599,6 +1611,8 @@ def update_terms_from_json(json_file, dry_run):
|
|
|
1599
1611
|
args['--description'] = [update['description']]
|
|
1600
1612
|
if update.get('status'):
|
|
1601
1613
|
args['--status'] = [update['status']]
|
|
1614
|
+
if update.get('parent_id'):
|
|
1615
|
+
args['--parent-id'] = [update['parent_id']]
|
|
1602
1616
|
if update.get('acronyms'):
|
|
1603
1617
|
args['--acronym'] = update['acronyms'] if isinstance(update['acronyms'], list) else [update['acronyms']]
|
|
1604
1618
|
if update.get('owner_ids'):
|
|
@@ -19,6 +19,41 @@ from .endpoint import Endpoint, decorator, get_json, no_api_call_decorator
|
|
|
19
19
|
from .endpoints import ENDPOINTS, get_api_version_params
|
|
20
20
|
|
|
21
21
|
|
|
22
|
+
def map_flat_entity_to_purview_entity(row):
|
|
23
|
+
"""Map a flat row (pandas Series or dict) into a Purview entity dict.
|
|
24
|
+
|
|
25
|
+
Expected minimal input: { 'typeName': 'DataSet', 'qualifiedName': '...','attr1': 'v', ... }
|
|
26
|
+
Produces: { 'typeName': ..., 'attributes': { 'qualifiedName': ..., 'attr1': 'v', ... } }
|
|
27
|
+
"""
|
|
28
|
+
try:
|
|
29
|
+
data = row.to_dict()
|
|
30
|
+
except Exception:
|
|
31
|
+
data = dict(row)
|
|
32
|
+
|
|
33
|
+
# pop typeName
|
|
34
|
+
type_name = data.pop("typeName", None)
|
|
35
|
+
|
|
36
|
+
# build attributes, skipping null-like values
|
|
37
|
+
attrs = {}
|
|
38
|
+
from math import isnan
|
|
39
|
+
|
|
40
|
+
for k, v in data.items():
|
|
41
|
+
# skip empty column names
|
|
42
|
+
if k is None or (isinstance(k, str) and k.strip() == ""):
|
|
43
|
+
continue
|
|
44
|
+
# treat NaN/None as missing
|
|
45
|
+
try:
|
|
46
|
+
if v is None:
|
|
47
|
+
continue
|
|
48
|
+
if isinstance(v, float) and isnan(v):
|
|
49
|
+
continue
|
|
50
|
+
except Exception:
|
|
51
|
+
pass
|
|
52
|
+
attrs[k] = v
|
|
53
|
+
|
|
54
|
+
return {"typeName": type_name, "attributes": attrs}
|
|
55
|
+
|
|
56
|
+
|
|
22
57
|
class Entity(Endpoint):
|
|
23
58
|
"""Entity Management Operations - Complete Official API Implementation with 100% Coverage"""
|
|
24
59
|
|
|
@@ -411,6 +411,11 @@ class UnifiedCatalogClient(Endpoint):
|
|
|
411
411
|
"status": status,
|
|
412
412
|
}
|
|
413
413
|
|
|
414
|
+
# Add parent_id if provided
|
|
415
|
+
parent_id = args.get("--parent-id", [""])[0]
|
|
416
|
+
if parent_id:
|
|
417
|
+
payload["parentId"] = parent_id
|
|
418
|
+
|
|
414
419
|
# Add optional fields
|
|
415
420
|
if owners:
|
|
416
421
|
payload["contacts"] = {"owner": owners}
|
|
@@ -450,6 +455,8 @@ class UnifiedCatalogClient(Endpoint):
|
|
|
450
455
|
payload["description"] = args.get("--description", [""])[0]
|
|
451
456
|
if args.get("--governance-domain-id"):
|
|
452
457
|
payload["domain"] = args["--governance-domain-id"][0]
|
|
458
|
+
if args.get("--parent-id"):
|
|
459
|
+
payload["parentId"] = args["--parent-id"][0]
|
|
453
460
|
if args.get("--status"):
|
|
454
461
|
payload["status"] = args["--status"][0]
|
|
455
462
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pvw-cli
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.2
|
|
4
4
|
Summary: Microsoft Purview CLI with comprehensive automation capabilities
|
|
5
5
|
Author-email: AYOUB KEBAILI <keayoub@msn.com>
|
|
6
6
|
Maintainer-email: AYOUB KEBAILI <keayoub@msn.com>
|
|
@@ -34,7 +34,7 @@ Requires-Dist: rich>=12.0.0
|
|
|
34
34
|
Requires-Dist: requests>=2.28.0
|
|
35
35
|
Requires-Dist: pandas>=1.5.0
|
|
36
36
|
Requires-Dist: aiohttp>=3.8.0
|
|
37
|
-
Requires-Dist: pydantic<
|
|
37
|
+
Requires-Dist: pydantic<2.12,>=1.10.0
|
|
38
38
|
Requires-Dist: PyYAML>=6.0
|
|
39
39
|
Requires-Dist: cryptography<46.0.0,>=41.0.5
|
|
40
40
|
Provides-Extra: dev
|
|
@@ -56,7 +56,7 @@ Requires-Dist: pytest-asyncio>=0.20.0; extra == "test"
|
|
|
56
56
|
Requires-Dist: pytest-cov>=4.0.0; extra == "test"
|
|
57
57
|
Requires-Dist: requests-mock>=1.9.0; extra == "test"
|
|
58
58
|
|
|
59
|
-
# PURVIEW CLI v1.
|
|
59
|
+
# PURVIEW CLI v1.2.1 - Microsoft Purview Automation & Data Governance
|
|
60
60
|
|
|
61
61
|
> **LATEST UPDATE (October 2025):**
|
|
62
62
|
> - **� NEW: Bulk Term Import/Export** - Import multiple terms from CSV/JSON with dry-run support
|
|
@@ -72,7 +72,7 @@ Requires-Dist: requests-mock>=1.9.0; extra == "test"
|
|
|
72
72
|
|
|
73
73
|
## What is PVW CLI?
|
|
74
74
|
|
|
75
|
-
**PVW CLI v1.
|
|
75
|
+
**PVW CLI v1.2.1** is a modern, full-featured command-line interface and Python library for Microsoft Purview. It enables automation and management of *all major Purview APIs* including:
|
|
76
76
|
|
|
77
77
|
- **Unified Catalog (UC) Management** - Complete governance domains, glossary terms, data products, OKRs, CDEs
|
|
78
78
|
- **Bulk Operations** - Import/export terms from CSV/JSON, bulk delete scripts with progress tracking
|
|
@@ -164,7 +164,7 @@ For more advanced usage, see the documentation in `doc/` or the project docs: ht
|
|
|
164
164
|
|
|
165
165
|
## Overview
|
|
166
166
|
|
|
167
|
-
**PVW CLI v1.
|
|
167
|
+
**PVW CLI v1.2.1** is a modern command-line interface and Python library for Microsoft Purview, enabling:
|
|
168
168
|
|
|
169
169
|
- Advanced data catalog search and discovery
|
|
170
170
|
- Bulk import/export of entities, glossary terms, and lineage
|
|
@@ -1203,6 +1203,6 @@ See [LICENSE](LICENSE) file for details.
|
|
|
1203
1203
|
|
|
1204
1204
|
---
|
|
1205
1205
|
|
|
1206
|
-
**PVW CLI v1.
|
|
1206
|
+
**PVW CLI v1.2.1 empowers data engineers, stewards, and architects to automate, scale, and enhance their Microsoft Purview experience with powerful command-line and programmatic capabilities.**
|
|
1207
1207
|
|
|
1208
1208
|
**Latest Features:** Bulk term import/export, PowerShell integration, multiple output formats, and comprehensive bulk delete scripts with beautiful progress tracking.
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "pvw-cli"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.2.2"
|
|
8
8
|
description = "Microsoft Purview CLI with comprehensive automation capabilities"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
@@ -41,7 +41,7 @@ dependencies = [
|
|
|
41
41
|
"requests>=2.28.0",
|
|
42
42
|
"pandas>=1.5.0",
|
|
43
43
|
"aiohttp>=3.8.0",
|
|
44
|
-
"pydantic>=1.10.0,<
|
|
44
|
+
"pydantic>=1.10.0,<2.12",
|
|
45
45
|
"PyYAML>=6.0",
|
|
46
46
|
"cryptography>=41.0.5,<46.0.0",
|
|
47
47
|
]
|
|
@@ -177,6 +177,3 @@ exclude_lines = [
|
|
|
177
177
|
"class .*\\bProtocol\\):",
|
|
178
178
|
"@(abc\\.)?abstractmethod",
|
|
179
179
|
]
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|