CPILake-Utils 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cpilake_utils-0.0.1/CPILake_Utils/CPILake_Utils.py +1380 -0
- cpilake_utils-0.0.1/CPILake_Utils/__init__.py +12 -0
- cpilake_utils-0.0.1/CPILake_Utils.egg-info/PKG-INFO +445 -0
- cpilake_utils-0.0.1/CPILake_Utils.egg-info/SOURCES.txt +9 -0
- cpilake_utils-0.0.1/CPILake_Utils.egg-info/dependency_links.txt +1 -0
- cpilake_utils-0.0.1/CPILake_Utils.egg-info/requires.txt +1 -0
- cpilake_utils-0.0.1/CPILake_Utils.egg-info/top_level.txt +1 -0
- cpilake_utils-0.0.1/PKG-INFO +445 -0
- cpilake_utils-0.0.1/README.md +436 -0
- cpilake_utils-0.0.1/pyproject.toml +15 -0
- cpilake_utils-0.0.1/setup.cfg +4 -0
|
@@ -0,0 +1,1380 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import json
|
|
3
|
+
import time
|
|
4
|
+
import pytz
|
|
5
|
+
import html
|
|
6
|
+
import base64
|
|
7
|
+
import requests
|
|
8
|
+
import pandas as pd
|
|
9
|
+
# import sempy.fabric as fabric
|
|
10
|
+
from pyspark.sql import SparkSession
|
|
11
|
+
from pyspark.sql import DataFrame, functions as F
|
|
12
|
+
from pyspark.sql.functions import unix_timestamp, col, max, from_utc_timestamp
|
|
13
|
+
from pyspark.conf import SparkConf
|
|
14
|
+
from datetime import datetime
|
|
15
|
+
# from notebookutils.credentials import getSecret
|
|
16
|
+
# from azure.identity import CertificateCredential
|
|
17
|
+
# from tqdm.auto import tqdm
|
|
18
|
+
from typing import Optional, List, Dict, Tuple, Union
|
|
19
|
+
# from fabric.analytics.environment.credentials import SetFabricAnalyticsDefaultTokenCredentials
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
## <<<<<<<<<<<<<<<< hash_function
|
|
23
|
+
|
|
24
|
+
def hash_function(s):
|
|
25
|
+
"""Hash function for alphanumeric strings"""
|
|
26
|
+
if s is None:
|
|
27
|
+
return None
|
|
28
|
+
s = str(s).upper()
|
|
29
|
+
s = re.sub(r'[^A-Z0-9]', '', s)
|
|
30
|
+
base36_map = {ch: idx for idx, ch in enumerate("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ")}
|
|
31
|
+
result = 0
|
|
32
|
+
for i, ch in enumerate(reversed(s)):
|
|
33
|
+
result += base36_map.get(ch, 0) * (36 ** i)
|
|
34
|
+
result += len(s) * (36 ** (len(s) + 1))
|
|
35
|
+
return result
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
## <<<<<<<<<<<<<<<< send_email_via_http
|
|
39
|
+
|
|
40
|
+
def send_email_via_http(
|
|
41
|
+
body: str,
|
|
42
|
+
to: List[str],
|
|
43
|
+
tenant_id: str,
|
|
44
|
+
client_id: str,
|
|
45
|
+
certificate_secret_name: str,
|
|
46
|
+
keyvault_url: str,
|
|
47
|
+
df_in_body: bool,
|
|
48
|
+
df_attach: bool,
|
|
49
|
+
endpoint_url: Optional[str] = None,
|
|
50
|
+
scope: Optional[str] = None,
|
|
51
|
+
subject: Optional[str] = None,
|
|
52
|
+
headers: Optional[Dict[str, str]] = None,
|
|
53
|
+
timeout: int = 15
|
|
54
|
+
) -> Tuple[Optional[int], str]:
|
|
55
|
+
|
|
56
|
+
import base64
|
|
57
|
+
import requests
|
|
58
|
+
from notebookutils.credentials import getSecret
|
|
59
|
+
from azure.identity import CertificateCredential
|
|
60
|
+
from pyspark.sql import SparkSession
|
|
61
|
+
spark = SparkSession.builder.appName("send_email_via_http").getOrCreate()
|
|
62
|
+
|
|
63
|
+
tenant_id = spark.conf.get("spark.tenantid")
|
|
64
|
+
client_id = spark.conf.get("spark.clientid")
|
|
65
|
+
certificate_secret_name = spark.conf.get("spark.certname")
|
|
66
|
+
keyvault_url = spark.conf.get("spark.vaultname")
|
|
67
|
+
|
|
68
|
+
# Defaults
|
|
69
|
+
endpoint_url = (
|
|
70
|
+
"https://fdne-inframail-logicapp01.azurewebsites.net:443/"
|
|
71
|
+
"api/fdne-infra-appmail-sender/triggers/"
|
|
72
|
+
"When_a_HTTP_request_is_received/invoke"
|
|
73
|
+
"?api-version=2022-05-01" )
|
|
74
|
+
|
|
75
|
+
scope = "api://27d45411-0d7a-4f27-bc5f-412d74ea249b/.default"
|
|
76
|
+
|
|
77
|
+
# Credential
|
|
78
|
+
secret_value = getSecret(keyvault_url, certificate_secret_name)
|
|
79
|
+
certificate_data = base64.b64decode(secret_value)
|
|
80
|
+
|
|
81
|
+
credential = CertificateCredential(
|
|
82
|
+
tenant_id=tenant_id,
|
|
83
|
+
client_id=client_id,
|
|
84
|
+
certificate_data=certificate_data,
|
|
85
|
+
send_certificate_chain=True
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
access_token = credential.get_token(scope).token
|
|
89
|
+
|
|
90
|
+
params = {
|
|
91
|
+
"body": body,
|
|
92
|
+
"to": to,
|
|
93
|
+
"subject": subject,
|
|
94
|
+
"df_in_body": df_in_body,
|
|
95
|
+
"df_attach": df_attach,
|
|
96
|
+
"headers": headers,
|
|
97
|
+
"timeout": timeout}
|
|
98
|
+
|
|
99
|
+
# Required checks
|
|
100
|
+
required = ['to', 'subject', 'body']
|
|
101
|
+
missing = [f for f in required if not params.get(f)]
|
|
102
|
+
if missing:
|
|
103
|
+
return None, f"Missing required fields: {', '.join(missing)}"
|
|
104
|
+
|
|
105
|
+
# Base payload
|
|
106
|
+
payload = {
|
|
107
|
+
"to": ";".join(params["to"]) if isinstance(params["to"], list) else params["to"],
|
|
108
|
+
"subject": params["subject"],
|
|
109
|
+
"body": params["body"],
|
|
110
|
+
}
|
|
111
|
+
if params.get("cc"):
|
|
112
|
+
payload["cc"] = params["cc"] if isinstance(params["cc"], list) else [params["cc"]]
|
|
113
|
+
if params.get("bcc"):
|
|
114
|
+
payload["bcc"] = params["bcc"] if isinstance(params["bcc"], list) else [params["bcc"]]
|
|
115
|
+
if params.get("from_addr"):
|
|
116
|
+
payload["from"] = params["from_addr"]
|
|
117
|
+
if params.get("attachments"):
|
|
118
|
+
payload["attachments"] = params["attachments"]
|
|
119
|
+
|
|
120
|
+
# ---- DataFrame → HTML body (your existing style) ----
|
|
121
|
+
df = params.get("df")
|
|
122
|
+
if df is not None:
|
|
123
|
+
df_limit = int(params.get("df_limit", 1000))
|
|
124
|
+
tz_name = params.get("tz_name", "America/Los_Angeles")
|
|
125
|
+
df_in_body = params.get("df_in_body", True)
|
|
126
|
+
df_attach = params.get("df_attach", False)
|
|
127
|
+
df_name = params.get("df_name", "data.html")
|
|
128
|
+
|
|
129
|
+
# Get pandas DataFrame
|
|
130
|
+
pdf = None
|
|
131
|
+
try:
|
|
132
|
+
from pyspark.sql import DataFrame as SparkDF
|
|
133
|
+
if isinstance(df, SparkDF):
|
|
134
|
+
pdf = df.limit(df_limit).toPandas()
|
|
135
|
+
else:
|
|
136
|
+
pdf = df # assume already pandas
|
|
137
|
+
except Exception:
|
|
138
|
+
pdf = df
|
|
139
|
+
|
|
140
|
+
html_body = _df_to_html_table(pdf, tz_name=tz_name)
|
|
141
|
+
|
|
142
|
+
if df_in_body:
|
|
143
|
+
subject = str(params.get("subject", ""))
|
|
144
|
+
if "QA Success" in subject:
|
|
145
|
+
payload["body"] ='<html><body><h4>No data available to display.</h4></body></html>'
|
|
146
|
+
else:
|
|
147
|
+
payload["body"] = html_body
|
|
148
|
+
else:
|
|
149
|
+
# append to body if you prefer not to replace
|
|
150
|
+
payload["body"] = f'{payload["body"]}{html_body}'
|
|
151
|
+
|
|
152
|
+
if df_attach:
|
|
153
|
+
content_b64 = base64.b64encode(html_body.encode("utf-8")).decode("utf-8")
|
|
154
|
+
attach = {"name": df_name, "contentBytes": content_b64, "contentType": "text/html"}
|
|
155
|
+
if "attachments" in payload and isinstance(payload["attachments"], list):
|
|
156
|
+
payload["attachments"].append(attach)
|
|
157
|
+
else:
|
|
158
|
+
payload["attachments"] = [attach]
|
|
159
|
+
|
|
160
|
+
# Auth header
|
|
161
|
+
req_headers = {"Authorization": f"Bearer {access_token}"}
|
|
162
|
+
if params.get("headers"):
|
|
163
|
+
req_headers.update(params["headers"])
|
|
164
|
+
|
|
165
|
+
timeout = params.get("timeout", 15)
|
|
166
|
+
|
|
167
|
+
# Send
|
|
168
|
+
try:
|
|
169
|
+
response = requests.post(endpoint_url, json=payload, headers=req_headers, timeout=timeout)
|
|
170
|
+
status_msg = "Success" if response.status_code == 200 else f"Failed ({response.status_code})"
|
|
171
|
+
print(f"Email send: {status_msg}")
|
|
172
|
+
return response.status_code, response.text, req_headers
|
|
173
|
+
except requests.RequestException as e:
|
|
174
|
+
error_msg = f"Request failed: {str(e)}"
|
|
175
|
+
print(f"{error_msg}")
|
|
176
|
+
return None, error_msg
|
|
177
|
+
"""
|
|
178
|
+
# call the function
|
|
179
|
+
status, response, req_headers = send_email_via_http(
|
|
180
|
+
body = body_html,
|
|
181
|
+
to = RECIPIENTS,
|
|
182
|
+
subject = subject,
|
|
183
|
+
tenant_id = tenant_id,
|
|
184
|
+
client_id = client_id,
|
|
185
|
+
certificate_secret_name = certificate_secret_name,
|
|
186
|
+
keyvault_url = keyvault_url,
|
|
187
|
+
df_in_body = False,
|
|
188
|
+
df_attach = False )
|
|
189
|
+
"""
|
|
190
|
+
|
|
191
|
+
## <<<<<<<<<<<<<<<< _df_to_html_table
|
|
192
|
+
|
|
193
|
+
def _df_to_html_table(pdf, tz_name="America/Los_Angeles"):
|
|
194
|
+
"""Render a pandas DataFrame to your styled HTML table."""
|
|
195
|
+
# Empty DF → simple message
|
|
196
|
+
if pdf is None or len(pdf.index) == 0:
|
|
197
|
+
return '<html><body><h4>No data available to display.</h4></body></html>'
|
|
198
|
+
|
|
199
|
+
# Header with PST time
|
|
200
|
+
pst = pytz.timezone(tz_name)
|
|
201
|
+
now_pst = datetime.now(pst).strftime("%Y-%m-%d %H:%M:%S")
|
|
202
|
+
|
|
203
|
+
html_Table = []
|
|
204
|
+
html_Table.append('<html><head><style>')
|
|
205
|
+
html_Table.append('table {border-collapse: collapse; width: 100%} '
|
|
206
|
+
'table, td, th {border: 1px solid black; padding: 3px; font-size: 9pt;} '
|
|
207
|
+
'td, th {text-align: left;}')
|
|
208
|
+
html_Table.append('</style></head><body>')
|
|
209
|
+
html_Table.append(f'<h4>(Refresh Time: {now_pst})</h4><hr>')
|
|
210
|
+
html_Table.append('<table style="width:100%; border-collapse: collapse;">')
|
|
211
|
+
html_Table.append('<thead style="background-color:#000000; color:#ffffff;"><tr>')
|
|
212
|
+
|
|
213
|
+
# Columns (skip FailureFlag in header, to match your code)
|
|
214
|
+
cols = list(pdf.columns)
|
|
215
|
+
visible_cols = [c for c in cols if c != "FailureFlag"]
|
|
216
|
+
for c in visible_cols:
|
|
217
|
+
html_Table.append(f'<th style="border: 1px solid black; padding: 5px;">{html.escape(str(c))}</th>')
|
|
218
|
+
html_Table.append('</tr></thead><tbody>')
|
|
219
|
+
|
|
220
|
+
# Rows (highlight red if FailureFlag == 'Yes', else light green default)
|
|
221
|
+
ff_present = "FailureFlag" in cols
|
|
222
|
+
for _, row in pdf.iterrows():
|
|
223
|
+
row_bg_color = '#ccff66' # default
|
|
224
|
+
if ff_present:
|
|
225
|
+
try:
|
|
226
|
+
if str(row["FailureFlag"]).strip().lower() == "yes":
|
|
227
|
+
row_bg_color = '#ff8080'
|
|
228
|
+
except Exception:
|
|
229
|
+
pass
|
|
230
|
+
html_Table.append(f'<tr style="background-color:{row_bg_color};">')
|
|
231
|
+
for c in visible_cols:
|
|
232
|
+
val = row[c]
|
|
233
|
+
html_Table.append(f'<td>{html.escape("" if val is None else str(val))}</td>')
|
|
234
|
+
html_Table.append('</tr>')
|
|
235
|
+
|
|
236
|
+
html_Table.append('</tbody></table></body></html>')
|
|
237
|
+
return "".join(html_Table)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
# s<<<<<<<<<<< end_email_no_attachment
|
|
241
|
+
|
|
242
|
+
def send_email_no_attachment(
|
|
243
|
+
body: str,
|
|
244
|
+
recipients: List[str],
|
|
245
|
+
tenant_id: str,
|
|
246
|
+
client_id: str,
|
|
247
|
+
certificate_secret_name: str,
|
|
248
|
+
keyvault_url: str,
|
|
249
|
+
endpoint_url: Optional[str] = None,
|
|
250
|
+
scope: Optional[str] = None,
|
|
251
|
+
subject: Optional[str] = None,
|
|
252
|
+
headers: Optional[Dict[str, str]] = None,
|
|
253
|
+
timeout: int = 15
|
|
254
|
+
) -> Tuple[Optional[int], str]:
|
|
255
|
+
|
|
256
|
+
import base64
|
|
257
|
+
import requests
|
|
258
|
+
from notebookutils.credentials import getSecret
|
|
259
|
+
from azure.identity import CertificateCredential
|
|
260
|
+
from pyspark.sql import SparkSession
|
|
261
|
+
spark = SparkSession.builder.appName("send_email_no_attachment").getOrCreate()
|
|
262
|
+
|
|
263
|
+
tenant_id = spark.conf.get("spark.tenantid")
|
|
264
|
+
client_id = spark.conf.get("spark.clientid")
|
|
265
|
+
certificate_secret_name = spark.conf.get("spark.certname")
|
|
266
|
+
keyvault_url = spark.conf.get("spark.vaultname")
|
|
267
|
+
|
|
268
|
+
# Defaults
|
|
269
|
+
endpoint_url = (
|
|
270
|
+
"https://fdne-inframail-logicapp01.azurewebsites.net:443/"
|
|
271
|
+
"api/fdne-infra-appmail-sender/triggers/"
|
|
272
|
+
"When_a_HTTP_request_is_received/invoke"
|
|
273
|
+
"?api-version=2022-05-01" )
|
|
274
|
+
|
|
275
|
+
scope = "api://27d45411-0d7a-4f27-bc5f-412d74ea249b/.default"
|
|
276
|
+
|
|
277
|
+
# Payload
|
|
278
|
+
payload = {
|
|
279
|
+
"to": ";".join(recipients),
|
|
280
|
+
"subject": subject or "",
|
|
281
|
+
"body": body, }
|
|
282
|
+
|
|
283
|
+
# Credential
|
|
284
|
+
secret_value = getSecret(keyvault_url, certificate_secret_name)
|
|
285
|
+
certificate_data = base64.b64decode(secret_value)
|
|
286
|
+
|
|
287
|
+
credential = CertificateCredential(
|
|
288
|
+
tenant_id=tenant_id,
|
|
289
|
+
client_id=client_id,
|
|
290
|
+
certificate_data=certificate_data,
|
|
291
|
+
send_certificate_chain=True
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
access_token = credential.get_token(scope).token
|
|
295
|
+
|
|
296
|
+
# Headers
|
|
297
|
+
request_headers = {
|
|
298
|
+
"Content-Type": "application/json",
|
|
299
|
+
"Authorization": f"Bearer {access_token}",
|
|
300
|
+
**(headers or {}),
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
# Call API
|
|
304
|
+
try:
|
|
305
|
+
resp = requests.post(
|
|
306
|
+
endpoint_url,
|
|
307
|
+
json=payload,
|
|
308
|
+
headers=request_headers,
|
|
309
|
+
timeout=timeout,
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
if resp.status_code in (200, 201, 202):
|
|
313
|
+
return resp.status_code, resp.text
|
|
314
|
+
|
|
315
|
+
return resp.status_code, f"Failed: {resp.text}"
|
|
316
|
+
|
|
317
|
+
except requests.RequestException as e:
|
|
318
|
+
return None, str(e)
|
|
319
|
+
"""
|
|
320
|
+
# call the function
|
|
321
|
+
status, response = send_email_no_attachment(
|
|
322
|
+
body=markdown,
|
|
323
|
+
recipients=recipients,
|
|
324
|
+
subject=subject,
|
|
325
|
+
tenant_id=tenant_id,
|
|
326
|
+
client_id=client_id,
|
|
327
|
+
certificate_secret_name = certificate_secret_name,
|
|
328
|
+
keyvault_url=keyvault_url
|
|
329
|
+
)
|
|
330
|
+
"""
|
|
331
|
+
|
|
332
|
+
# >>>>>>>>>>>>>>>>> end_email_no_attachment
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
## <<<<<<<<<<<<<<<< QA_CheckUtil
|
|
336
|
+
"""
|
|
337
|
+
A status column: PASS / FAIL / SKIPPED
|
|
338
|
+
A skip_reason column
|
|
339
|
+
Checks are skipped instead of failing when:
|
|
340
|
+
One or both DataFrames are empty
|
|
341
|
+
Required columns are missing
|
|
342
|
+
Aggregation column exists but contains only nulls
|
|
343
|
+
match becomes None when skipped (clearer than False)
|
|
344
|
+
"""
|
|
345
|
+
def QA_CheckUtil(
|
|
346
|
+
source_df: DataFrame,
|
|
347
|
+
qa_df: DataFrame
|
|
348
|
+
) -> DataFrame:
|
|
349
|
+
|
|
350
|
+
spark = source_df.sparkSession
|
|
351
|
+
qa_rows: List[tuple] = []
|
|
352
|
+
|
|
353
|
+
def calc_diff(src, qa):
|
|
354
|
+
if src is None or qa is None:
|
|
355
|
+
return None
|
|
356
|
+
return float(src) - float(qa)
|
|
357
|
+
|
|
358
|
+
def add_row(check_type, check_name, column, src, qa, skip_reason=None):
|
|
359
|
+
if skip_reason:
|
|
360
|
+
qa_rows.append((
|
|
361
|
+
check_type,
|
|
362
|
+
check_name,
|
|
363
|
+
column,
|
|
364
|
+
src,
|
|
365
|
+
qa,
|
|
366
|
+
None,
|
|
367
|
+
None,
|
|
368
|
+
"SKIPPED",
|
|
369
|
+
skip_reason
|
|
370
|
+
))
|
|
371
|
+
else:
|
|
372
|
+
match = src == qa
|
|
373
|
+
qa_rows.append((
|
|
374
|
+
check_type,
|
|
375
|
+
check_name,
|
|
376
|
+
column,
|
|
377
|
+
src,
|
|
378
|
+
qa,
|
|
379
|
+
calc_diff(src, qa),
|
|
380
|
+
match,
|
|
381
|
+
"PASS" if match else "FAIL",
|
|
382
|
+
None
|
|
383
|
+
))
|
|
384
|
+
|
|
385
|
+
# Row count
|
|
386
|
+
src_count = source_df.count()
|
|
387
|
+
qa_count = qa_df.count()
|
|
388
|
+
|
|
389
|
+
add_row(
|
|
390
|
+
"ROW_COUNT",
|
|
391
|
+
"row_count",
|
|
392
|
+
None,
|
|
393
|
+
float(src_count),
|
|
394
|
+
float(qa_count)
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
# Null check
|
|
398
|
+
common_cols = set(source_df.columns).intersection(set(qa_df.columns))
|
|
399
|
+
|
|
400
|
+
if not common_cols:
|
|
401
|
+
add_row(
|
|
402
|
+
"NULL_CHECK",
|
|
403
|
+
"null_count",
|
|
404
|
+
None,
|
|
405
|
+
None,
|
|
406
|
+
None,
|
|
407
|
+
"No common columns between source and QA"
|
|
408
|
+
)
|
|
409
|
+
else:
|
|
410
|
+
for col in common_cols:
|
|
411
|
+
src_nulls = source_df.filter(F.col(col).isNull()).count()
|
|
412
|
+
qa_nulls = qa_df.filter(F.col(col).isNull()).count()
|
|
413
|
+
|
|
414
|
+
add_row(
|
|
415
|
+
"NULL_CHECK",
|
|
416
|
+
"null_count",
|
|
417
|
+
col,
|
|
418
|
+
float(src_nulls),
|
|
419
|
+
float(qa_nulls)
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
# Aggregation check
|
|
424
|
+
if "amount" not in source_df.columns or "amount" not in qa_df.columns:
|
|
425
|
+
add_row(
|
|
426
|
+
"AGG_CHECK",
|
|
427
|
+
"sum",
|
|
428
|
+
"amount",
|
|
429
|
+
None,
|
|
430
|
+
None,
|
|
431
|
+
"Column 'amount' missing in one or both DataFrames"
|
|
432
|
+
)
|
|
433
|
+
else:
|
|
434
|
+
src_sum = source_df.select(F.sum("amount")).collect()[0][0]
|
|
435
|
+
qa_sum = qa_df.select(F.sum("amount")).collect()[0][0]
|
|
436
|
+
|
|
437
|
+
if src_sum is None and qa_sum is None:
|
|
438
|
+
add_row(
|
|
439
|
+
"AGG_CHECK",
|
|
440
|
+
"sum",
|
|
441
|
+
"amount",
|
|
442
|
+
None,
|
|
443
|
+
None,
|
|
444
|
+
"All values are NULL in both DataFrames"
|
|
445
|
+
)
|
|
446
|
+
else:
|
|
447
|
+
add_row(
|
|
448
|
+
"AGG_CHECK",
|
|
449
|
+
"sum",
|
|
450
|
+
"amount",
|
|
451
|
+
float(src_sum or 0.0),
|
|
452
|
+
float(qa_sum or 0.0)
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
# Duplicate check on id column
|
|
457
|
+
if "id" not in source_df.columns or "id" not in qa_df.columns:
|
|
458
|
+
add_row(
|
|
459
|
+
"DUPLICATE_CHECK",
|
|
460
|
+
"duplicate_id",
|
|
461
|
+
"id",
|
|
462
|
+
None,
|
|
463
|
+
None,
|
|
464
|
+
"Column 'id' missing in one or both DataFrames"
|
|
465
|
+
)
|
|
466
|
+
else:
|
|
467
|
+
src_dupes = source_df.count() - source_df.select("id").distinct().count()
|
|
468
|
+
qa_dupes = qa_df.count() - qa_df.select("id").distinct().count()
|
|
469
|
+
|
|
470
|
+
add_row(
|
|
471
|
+
"DUPLICATE_CHECK",
|
|
472
|
+
"duplicate_id",
|
|
473
|
+
"id",
|
|
474
|
+
float(src_dupes),
|
|
475
|
+
float(qa_dupes)
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
# Create final QA DataFrame
|
|
479
|
+
return spark.createDataFrame(
|
|
480
|
+
qa_rows,
|
|
481
|
+
[
|
|
482
|
+
"check_type",
|
|
483
|
+
"check_name",
|
|
484
|
+
"column_name",
|
|
485
|
+
"source_value",
|
|
486
|
+
"qa_value",
|
|
487
|
+
"diff",
|
|
488
|
+
"match",
|
|
489
|
+
"status",
|
|
490
|
+
"skip_reason"
|
|
491
|
+
]
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
"""
|
|
495
|
+
## <<<<<<<<<<<<<<<< create_lakehouse_shortcuts SPN
|
|
496
|
+
|
|
497
|
+
def create_lakehouse_shortcuts_02(shortcut_configs):
|
|
498
|
+
import base64
|
|
499
|
+
import requests
|
|
500
|
+
from notebookutils.credentials import getSecret
|
|
501
|
+
from azure.identity import CertificateCredential
|
|
502
|
+
|
|
503
|
+
tenant_id = spark.conf.get("spark.tenantid")
|
|
504
|
+
client_id = spark.conf.get("spark.clientid")
|
|
505
|
+
certificate_secret_name = spark.conf.get("spark.certname")
|
|
506
|
+
keyvault_url = spark.conf.get("spark.vaultname")
|
|
507
|
+
|
|
508
|
+
# Defaults
|
|
509
|
+
endpoint_url = (
|
|
510
|
+
"https://fdne-inframail-logicapp01.azurewebsites.net:443/"
|
|
511
|
+
"api/fdne-infra-appmail-sender/triggers/"
|
|
512
|
+
"When_a_HTTP_request_is_received/invoke"
|
|
513
|
+
"?api-version=2022-05-01" )
|
|
514
|
+
|
|
515
|
+
scope = "api://27d45411-0d7a-4f27-bc5f-412d74ea249b/.default"
|
|
516
|
+
|
|
517
|
+
# Credential
|
|
518
|
+
secret_value = getSecret(keyvault_url, certificate_secret_name)
|
|
519
|
+
certificate_data = base64.b64decode(secret_value)
|
|
520
|
+
|
|
521
|
+
credential = CertificateCredential(
|
|
522
|
+
tenant_id=tenant_id,
|
|
523
|
+
client_id=client_id,
|
|
524
|
+
certificate_data=certificate_data,
|
|
525
|
+
send_certificate_chain=True
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
access_token = credential.get_token(scope).token
|
|
529
|
+
|
|
530
|
+
headers = {"Authorization": f"Bearer {access_token}", "Content-Type": "application/json" }
|
|
531
|
+
print("Access token starts with:", access_token[:20])
|
|
532
|
+
|
|
533
|
+
for config in shortcut_configs:
|
|
534
|
+
source_path = config["source_subpath"]
|
|
535
|
+
target_schema = config["target_schema"]
|
|
536
|
+
workspace_name = config["workspace_name"]
|
|
537
|
+
lakehouse_name = config["lakehouse_name"]
|
|
538
|
+
target_shortcut_name = config["target_shortcut_name"]
|
|
539
|
+
|
|
540
|
+
resp_ws = requests.get("https://api.fabric.microsoft.com/v1/workspaces", headers=headers)
|
|
541
|
+
resp_ws.raise_for_status()
|
|
542
|
+
workspace_id = next(ws["id"] for ws in resp_ws.json()["value"] if ws["displayName"] == workspace_name)
|
|
543
|
+
|
|
544
|
+
resp_lh = requests.get(f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/lakehouses", headers=headers)
|
|
545
|
+
resp_lh.raise_for_status()
|
|
546
|
+
lakehouse_id = next(lh["id"] for lh in resp_lh.json()["value"] if lh["displayName"] == lakehouse_name)
|
|
547
|
+
|
|
548
|
+
target_path = f"Tables/{target_schema or 'dbo'}/"
|
|
549
|
+
|
|
550
|
+
payload = {
|
|
551
|
+
"path": target_path,
|
|
552
|
+
"name": target_shortcut_name,
|
|
553
|
+
"target": {
|
|
554
|
+
"type": "OneLake",
|
|
555
|
+
"oneLake": {
|
|
556
|
+
"workspaceId" : workspace_id,
|
|
557
|
+
"itemId" : lakehouse_id,
|
|
558
|
+
"path" : source_path,
|
|
559
|
+
"target_schema" : config["target_schema"]
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts"
|
|
565
|
+
print(f"Creating shortcut '{target_shortcut_name}' → {target_path}")
|
|
566
|
+
print(json.dumps(payload, indent=2))
|
|
567
|
+
|
|
568
|
+
# --- Send POST request ---
|
|
569
|
+
response = requests.post(url, headers=headers, json=payload)
|
|
570
|
+
|
|
571
|
+
if response.status_code in [200, 201]:
|
|
572
|
+
print(f"Shortcut '{target_shortcut_name}' created successfully.")
|
|
573
|
+
print(response.json())
|
|
574
|
+
else:
|
|
575
|
+
print(f"Failed to create shortcut '{target_shortcut_name}'.")
|
|
576
|
+
print("Status Code:", response.status_code)
|
|
577
|
+
print("Response:", response.text)
|
|
578
|
+
|
|
579
|
+
# How to call function
|
|
580
|
+
|
|
581
|
+
shortcut_configs = [
|
|
582
|
+
{
|
|
583
|
+
"target_shortcut_name" : "DIM_Date",
|
|
584
|
+
"workspace_name" : "FDnECostHubReporting_DEV",
|
|
585
|
+
"lakehouse_name" : "Cost_Hub",
|
|
586
|
+
"source_subpath" : "Tables/DIM_Date",
|
|
587
|
+
"target_schema" : "CostHub",
|
|
588
|
+
}
|
|
589
|
+
]
|
|
590
|
+
create_lakehouse_shortcuts_02(shortcut_configs)
|
|
591
|
+
|
|
592
|
+
"""
|
|
593
|
+
|
|
594
|
+
## <<<<<<<<<<<<<<<< create_lakehouse_shortcuts MI
|
|
595
|
+
|
|
596
|
+
def create_lakehouse_shortcuts(shortcut_configs):
|
|
597
|
+
import requests, json
|
|
598
|
+
import sempy.fabric as fabric
|
|
599
|
+
from notebookutils.credentials import getToken
|
|
600
|
+
|
|
601
|
+
access_token = getToken("https://api.fabric.microsoft.com/.default")
|
|
602
|
+
|
|
603
|
+
headers = {"Authorization": f"Bearer {access_token}", "Content-Type": "application/json" }
|
|
604
|
+
|
|
605
|
+
workspace_id = fabric.get_notebook_workspace_id()
|
|
606
|
+
lakehouse_id = fabric.get_lakehouse_id()
|
|
607
|
+
|
|
608
|
+
for config in shortcut_configs:
|
|
609
|
+
source_path = config["source_subpath"]
|
|
610
|
+
target_schema = config["target_schema"]
|
|
611
|
+
target_shortcut_name = config["target_shortcut_name"]
|
|
612
|
+
target_path = f"Tables/{target_schema}/"
|
|
613
|
+
|
|
614
|
+
payload = {
|
|
615
|
+
"path": target_path,
|
|
616
|
+
"name": target_shortcut_name,
|
|
617
|
+
"target": {
|
|
618
|
+
"type": "OneLake",
|
|
619
|
+
"oneLake": {
|
|
620
|
+
"workspaceId" : workspace_id,
|
|
621
|
+
"itemId" : lakehouse_id,
|
|
622
|
+
"path" : source_path,
|
|
623
|
+
"target_schema" : config["target_schema"]
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts"
|
|
629
|
+
print(f"Creating shortcut '{target_shortcut_name}' → {target_path}")
|
|
630
|
+
print(json.dumps(payload, indent=2))
|
|
631
|
+
|
|
632
|
+
# --- Send POST request ---
|
|
633
|
+
response = requests.post(url, headers=headers, json=payload)
|
|
634
|
+
|
|
635
|
+
if response.status_code in [200, 201]:
|
|
636
|
+
print(f"Shortcut '{target_shortcut_name}' created successfully.")
|
|
637
|
+
print(response.json())
|
|
638
|
+
else:
|
|
639
|
+
print(f"Failed to create shortcut '{target_shortcut_name}'.")
|
|
640
|
+
print("Status Code:", response.status_code)
|
|
641
|
+
print("Response:", response.text)
|
|
642
|
+
|
|
643
|
+
"""
|
|
644
|
+
# How to call function
|
|
645
|
+
|
|
646
|
+
target_schema = spark.conf.get("spark.CostHubSchema")
|
|
647
|
+
|
|
648
|
+
shortcut_configs = [
|
|
649
|
+
{
|
|
650
|
+
"target_shortcut_name" : "DIM_Date_01",
|
|
651
|
+
"source_subpath" : "Tables/CostHub/DIM_Date",
|
|
652
|
+
"target_schema" : target_schema,
|
|
653
|
+
"target_path" : "Tables/CostHub/"
|
|
654
|
+
}
|
|
655
|
+
]
|
|
656
|
+
create_lakehouse_shortcuts(shortcut_configs)
|
|
657
|
+
"""
|
|
658
|
+
|
|
659
|
+
|
|
660
|
+
## <<<<<<<<<<<<<<<< create_adls_shortcuts with MI
|
|
661
|
+
|
|
662
|
+
def create_adls_shortcuts(shortcut_configs):
|
|
663
|
+
|
|
664
|
+
import requests
|
|
665
|
+
import sempy.fabric as fabric
|
|
666
|
+
from notebookutils.credentials import getToken
|
|
667
|
+
|
|
668
|
+
access_token = getToken("https://api.fabric.microsoft.com/.default")
|
|
669
|
+
|
|
670
|
+
headers = { "Authorization": f"Bearer {access_token}", "Content-Type": "application/json" }
|
|
671
|
+
|
|
672
|
+
workspace_id = fabric.get_notebook_workspace_id()
|
|
673
|
+
lakehouse_id = fabric.get_lakehouse_id()
|
|
674
|
+
|
|
675
|
+
for config in shortcut_configs:
|
|
676
|
+
target_schema = config["target_schema"]
|
|
677
|
+
connection_name = config["connection_name"]
|
|
678
|
+
target_path = f"Tables/{target_schema}/"
|
|
679
|
+
|
|
680
|
+
resp_cn = requests.get(f"https://api.fabric.microsoft.com/v1/connections", headers=headers)
|
|
681
|
+
connection_id = next(conn["id"] for conn in resp_cn.json()["value"] if conn["displayName"] == connection_name)
|
|
682
|
+
conn_loc = next(conn for conn in resp_cn.json()["value"] if conn["displayName"] == connection_name)
|
|
683
|
+
location = conn_loc["connectionDetails"]["path"]
|
|
684
|
+
|
|
685
|
+
for config in shortcut_configs:
|
|
686
|
+
payload = {
|
|
687
|
+
"name": config["name"],
|
|
688
|
+
"path": target_path,
|
|
689
|
+
"target": {
|
|
690
|
+
"type": "AdlsGen2",
|
|
691
|
+
"adlsGen2": {
|
|
692
|
+
"connectionId": connection_id,
|
|
693
|
+
"location": location,
|
|
694
|
+
"subpath": config["subpath"] } } }
|
|
695
|
+
|
|
696
|
+
url = (f"https://api.fabric.microsoft.com/v1/workspaces/"
|
|
697
|
+
f"{workspace_id}/items/{lakehouse_id}/shortcuts" )
|
|
698
|
+
|
|
699
|
+
response = requests.post(url, headers=headers, json=payload)
|
|
700
|
+
|
|
701
|
+
if response.status_code in (200, 201):
|
|
702
|
+
print(f"Shortcut '{config['name']}' created successfully.")
|
|
703
|
+
else:
|
|
704
|
+
print(f"Failed to create shortcut '{config['name']}'.")
|
|
705
|
+
print("Status Code:", response.status_code)
|
|
706
|
+
print("Response:", response.text)
|
|
707
|
+
|
|
708
|
+
"""
|
|
709
|
+
# How to call function
|
|
710
|
+
|
|
711
|
+
target_workspace_id = spark.conf.get("spark.workspaceid")
|
|
712
|
+
target_lakehouse_id = spark.conf.get("spark.lakehouseid")
|
|
713
|
+
target_schema = spark.conf.get("spark.CostHubSchema")
|
|
714
|
+
target_location = spark.conf.get("spark.adlslocation")
|
|
715
|
+
target_connection = spark.conf.get("spark.connectionid")
|
|
716
|
+
|
|
717
|
+
# Define shortcut configurations
|
|
718
|
+
shortcut_configs = [
|
|
719
|
+
{
|
|
720
|
+
"name" : "Z-RefreshTime_01",
|
|
721
|
+
"target_schema" : target_schema,
|
|
722
|
+
"connection_name" : "CostHub_ADLS abibrahi",
|
|
723
|
+
"target_path" : f"Tables/{target_schema}/",
|
|
724
|
+
"subpath" : "/abidatamercury/MercuryDataProd/CostHub/Bridge_ExecOrgSummary"
|
|
725
|
+
}]
|
|
726
|
+
|
|
727
|
+
# Call the function
|
|
728
|
+
create_adls_shortcuts(shortcut_configs)
|
|
729
|
+
|
|
730
|
+
"""
|
|
731
|
+
"""
|
|
732
|
+
## <<<<<<<<<<<<<<<< create_adls_shortcuts with spn
|
|
733
|
+
|
|
734
|
+
def create_adls_shortcuts_02(shortcut_configs):
|
|
735
|
+
# access_token = notebookutils.credentials.getToken("https://api.fabric.microsoft.com/.default")
|
|
736
|
+
import requests
|
|
737
|
+
import sempy.fabric as fabric
|
|
738
|
+
from notebookutils.credentials import getToken
|
|
739
|
+
|
|
740
|
+
spark = SparkSession.builder.getOrCreate()
|
|
741
|
+
|
|
742
|
+
tenant_id = spark.conf.get("spark.tenantid")
|
|
743
|
+
client_id = spark.conf.get("spark.clientid")
|
|
744
|
+
certificate_secret_name = spark.conf.get("spark.certname")
|
|
745
|
+
keyvault_url = spark.conf.get("spark.vaultname")
|
|
746
|
+
|
|
747
|
+
# Defaults
|
|
748
|
+
endpoint_url = (
|
|
749
|
+
"https://fdne-inframail-logicapp01.azurewebsites.net:443/"
|
|
750
|
+
"api/fdne-infra-appmail-sender/triggers/"
|
|
751
|
+
"When_a_HTTP_request_is_received/invoke"
|
|
752
|
+
"?api-version=2022-05-01" )
|
|
753
|
+
|
|
754
|
+
scope = "api://27d45411-0d7a-4f27-bc5f-412d74ea249b/.default"
|
|
755
|
+
|
|
756
|
+
# Credential
|
|
757
|
+
secret_value = getSecret(keyvault_url, certificate_secret_name)
|
|
758
|
+
certificate_data = base64.b64decode(secret_value)
|
|
759
|
+
|
|
760
|
+
credential = CertificateCredential(
|
|
761
|
+
tenant_id=tenant_id,
|
|
762
|
+
client_id=client_id,
|
|
763
|
+
certificate_data=certificate_data,
|
|
764
|
+
send_certificate_chain=True
|
|
765
|
+
)
|
|
766
|
+
|
|
767
|
+
access_token = credential.get_token(scope).token
|
|
768
|
+
|
|
769
|
+
headers = {"Authorization": f"Bearer {access_token}", "Content-Type": "application/json" }
|
|
770
|
+
print("Access token starts with:", access_token[:20])
|
|
771
|
+
|
|
772
|
+
for config in shortcut_configs:
|
|
773
|
+
target_schema = config["target_schema"]
|
|
774
|
+
workspace_name = config["workspace_name"]
|
|
775
|
+
lakehouse_name = config["lakehouse_name"]
|
|
776
|
+
connection_name = config["connection_name"]
|
|
777
|
+
target_path = f"Tables/{target_schema}/"
|
|
778
|
+
|
|
779
|
+
resp_ws = requests.get("https://api.fabric.microsoft.com/v1/workspaces", headers=headers)
|
|
780
|
+
resp_ws.raise_for_status()
|
|
781
|
+
workspace_id = next(ws["id"] for ws in resp_ws.json()["value"] if ws["displayName"] == workspace_name)
|
|
782
|
+
|
|
783
|
+
resp_lh = requests.get(f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/lakehouses", headers=headers)
|
|
784
|
+
resp_lh.raise_for_status()
|
|
785
|
+
lakehouse_id = next(lh["id"] for lh in resp_lh.json()["value"] if lh["displayName"] == lakehouse_name)
|
|
786
|
+
|
|
787
|
+
resp_cn = requests.get(f"https://api.fabric.microsoft.com/v1/connections", headers=headers)
|
|
788
|
+
resp_cn.raise_for_status()
|
|
789
|
+
connection_id = next(conn["id"] for conn in resp_cn.json()["value"] if conn["displayName"] == connection_name)
|
|
790
|
+
|
|
791
|
+
conn_loc = next(conn for conn in resp_cn.json()["value"] if conn["displayName"] == connection_name)
|
|
792
|
+
location = conn_loc["connectionDetails"]["path"]
|
|
793
|
+
|
|
794
|
+
payload = {
|
|
795
|
+
"name": config["name"],
|
|
796
|
+
"path": target_path,
|
|
797
|
+
"target": {
|
|
798
|
+
"type": "AdlsGen2",
|
|
799
|
+
"adlsGen2": {
|
|
800
|
+
"connectionId" : connection_id,
|
|
801
|
+
"location" : location,
|
|
802
|
+
"subpath" : config["subpath"]
|
|
803
|
+
}
|
|
804
|
+
}
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts"
|
|
808
|
+
response = requests.post(url, headers=headers, json=payload)
|
|
809
|
+
|
|
810
|
+
if response.status_code in [200, 201]:
|
|
811
|
+
print(f"Shortcut '{config['name']}' created successfully.")
|
|
812
|
+
else:
|
|
813
|
+
print(f"Failed to create shortcut '{config['name']}'.")
|
|
814
|
+
print("Status Code:", response.status_code)
|
|
815
|
+
print("Response:", response.text)
|
|
816
|
+
|
|
817
|
+
|
|
818
|
+
# How to call function
|
|
819
|
+
|
|
820
|
+
# Define shortcut configurations
|
|
821
|
+
shortcut_configs = [
|
|
822
|
+
{
|
|
823
|
+
"name" : "Bridge_ExecOrgSummary",
|
|
824
|
+
"target_schema" : "CostHub",
|
|
825
|
+
"workspace_name" : "FDnECostHubReporting_DEV",
|
|
826
|
+
"lakehouse_name" : "Cost_Hub",
|
|
827
|
+
"connection_name" : "CostHub_ADLS abibrahi",
|
|
828
|
+
"subpath" : "/abidatamercury/MercuryDataProd/CostHub/Bridge_ExecOrgSummary"
|
|
829
|
+
}]
|
|
830
|
+
|
|
831
|
+
# Call the function
|
|
832
|
+
create_adls_shortcuts_02(shortcut_configs)
|
|
833
|
+
"""
|
|
834
|
+
|
|
835
|
+
## <<<<<<<<<<<<<<<< lakehouse_metadata_sync
|
|
836
|
+
|
|
837
|
+
def pad_or_truncate_string(input_string, length, pad_char=' '):
|
|
838
|
+
if len(input_string) > length:
|
|
839
|
+
return input_string[:length]
|
|
840
|
+
return input_string.ljust(length, pad_char)
|
|
841
|
+
|
|
842
|
+
def lakehouse_metadata_sync(workspace_id, lakehouse_id):
|
|
843
|
+
client = fabric.FabricRestClient()
|
|
844
|
+
|
|
845
|
+
# Get the SQL endpoint ID from the lakehouse
|
|
846
|
+
lakehouse_props = client.get(f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}").json()
|
|
847
|
+
sqlendpoint = lakehouse_props['properties']['sqlEndpointProperties']['id']
|
|
848
|
+
|
|
849
|
+
# Prepare the metadata refresh payload
|
|
850
|
+
uri = f"/v1.0/myorg/lhdatamarts/{sqlendpoint}"
|
|
851
|
+
payload = {
|
|
852
|
+
"commands": [
|
|
853
|
+
{"$type": "MetadataRefreshExternalCommand"}
|
|
854
|
+
]
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
try:
|
|
858
|
+
response = client.post(uri, json=payload)
|
|
859
|
+
response_data = response.json()
|
|
860
|
+
|
|
861
|
+
batchId = response_data["batchId"]
|
|
862
|
+
progressState = response_data["progressState"]
|
|
863
|
+
statusuri = f"/v1.0/myorg/lhdatamarts/{sqlendpoint}/batches/{batchId}"
|
|
864
|
+
|
|
865
|
+
# Poll the status until it's no longer "inProgress"
|
|
866
|
+
while progressState == 'inProgress':
|
|
867
|
+
time.sleep(2)
|
|
868
|
+
status_response = client.get(statusuri).json()
|
|
869
|
+
progressState = status_response["progressState"]
|
|
870
|
+
display(f"Sync state: {progressState}")
|
|
871
|
+
|
|
872
|
+
# Handle success
|
|
873
|
+
if progressState == 'success':
|
|
874
|
+
table_details = [
|
|
875
|
+
{
|
|
876
|
+
'tableName': t['tableName'],
|
|
877
|
+
'warningMessages': t.get('warningMessages', []),
|
|
878
|
+
'lastSuccessfulUpdate': t.get('lastSuccessfulUpdate', 'N/A'),
|
|
879
|
+
'tableSyncState': t['tableSyncState'],
|
|
880
|
+
'sqlSyncState': t['sqlSyncState']
|
|
881
|
+
}
|
|
882
|
+
for t in status_response['operationInformation'][0]['progressDetail']['tablesSyncStatus']
|
|
883
|
+
]
|
|
884
|
+
|
|
885
|
+
print("✅ Extracted Table Details:")
|
|
886
|
+
for detail in table_details:
|
|
887
|
+
print(
|
|
888
|
+
f"Table: {pad_or_truncate_string(detail['tableName'], 30)}"
|
|
889
|
+
f" | Last Update: {detail['lastSuccessfulUpdate']}"
|
|
890
|
+
f" | tableSyncState: {detail['tableSyncState']}"
|
|
891
|
+
f" | Warnings: {detail['warningMessages']}"
|
|
892
|
+
)
|
|
893
|
+
return {"status": "success", "details": table_details}
|
|
894
|
+
|
|
895
|
+
# Handle failure
|
|
896
|
+
elif progressState == 'failure':
|
|
897
|
+
print("❌ Metadata sync failed.")
|
|
898
|
+
display(status_response)
|
|
899
|
+
return {"status": "failure", "error": status_response}
|
|
900
|
+
|
|
901
|
+
else:
|
|
902
|
+
print(f"⚠️ Unexpected progress state: {progressState}")
|
|
903
|
+
return {"status": "unknown", "raw_response": status_response}
|
|
904
|
+
|
|
905
|
+
except Exception as e:
|
|
906
|
+
print("🚨 Error during metadata sync:", str(e))
|
|
907
|
+
return {"status": "exception", "error": str(e)}
|
|
908
|
+
|
|
909
|
+
"""
|
|
910
|
+
# How to call function ( lakehouse_metadata_sync )
|
|
911
|
+
workspace_id = spark.conf.get("trident.workspace.id")
|
|
912
|
+
lakehouse_id = spark.conf.get("trident.lakehouse.id")
|
|
913
|
+
|
|
914
|
+
# Call the function
|
|
915
|
+
result = lakehouse_metadata_sync(workspace_id, lakehouse_id)
|
|
916
|
+
display(result)
|
|
917
|
+
"""
|
|
918
|
+
|
|
919
|
+
|
|
920
|
+
|
|
921
|
+
|
|
922
|
+
|
|
923
|
+
"""
|
|
924
|
+
# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< Archieve
|
|
925
|
+
|
|
926
|
+
|
|
927
|
+
## <<<<<<<<<<<<<<<< create_adls_shortcuts_01
|
|
928
|
+
|
|
929
|
+
def create_adls_shortcuts_01(shortcut_configs, workspace_id, lakehouse_id, target_schema):
|
|
930
|
+
access_token = notebookutils.credentials.getToken("https://api.fabric.microsoft.com/.default")
|
|
931
|
+
headers = {
|
|
932
|
+
"Authorization": f"Bearer {access_token}",
|
|
933
|
+
"Content-Type": "application/json"
|
|
934
|
+
}
|
|
935
|
+
print("Access token starts with:", access_token[:20])
|
|
936
|
+
|
|
937
|
+
target_path = f"Tables/{target_schema}/"
|
|
938
|
+
|
|
939
|
+
for config in shortcut_configs:
|
|
940
|
+
payload = {
|
|
941
|
+
"name": config["name"],
|
|
942
|
+
"path": target_path,
|
|
943
|
+
"target": {
|
|
944
|
+
"type": "AdlsGen2",
|
|
945
|
+
"adlsGen2": {
|
|
946
|
+
"connectionId": config["connection_id"],
|
|
947
|
+
"location": config["location"],
|
|
948
|
+
"subpath": config["subpath"]
|
|
949
|
+
}
|
|
950
|
+
}
|
|
951
|
+
}
|
|
952
|
+
|
|
953
|
+
url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts"
|
|
954
|
+
response = requests.post(url, headers=headers, json=payload)
|
|
955
|
+
|
|
956
|
+
if response.status_code in [200, 201]:
|
|
957
|
+
print(f"Shortcut '{config['name']}' created successfully.")
|
|
958
|
+
else:
|
|
959
|
+
print(f"Failed to create shortcut '{config['name']}'.")
|
|
960
|
+
print("Status Code:", response.status_code)
|
|
961
|
+
print("Response:", response.text)
|
|
962
|
+
|
|
963
|
+
## <<<<<<<<<<<<<<<< create_lakehouse_shortcuts_01
|
|
964
|
+
|
|
965
|
+
def create_lakehouse_shortcuts_01(shortcut_configs, workspace_id, lakehouse_id, target_schema):
|
|
966
|
+
access_token = notebookutils.credentials.getToken("https://api.fabric.microsoft.com/.default")
|
|
967
|
+
headers = {
|
|
968
|
+
"Authorization": f"Bearer {access_token}",
|
|
969
|
+
"Content-Type": "application/json"
|
|
970
|
+
}
|
|
971
|
+
print("Access token starts with:", access_token[:20])
|
|
972
|
+
|
|
973
|
+
for config in shortcut_configs:
|
|
974
|
+
source_path = config["source_subpath"]
|
|
975
|
+
target_shortcut_name = config["target_shortcut_name"]
|
|
976
|
+
source_workspace_id = config["source_workspace_id"]
|
|
977
|
+
source_lakehouse_id = config["source_lakehouse_id"]
|
|
978
|
+
|
|
979
|
+
target_path = f"Tables/{target_schema or 'dbo'}/"
|
|
980
|
+
|
|
981
|
+
payload = {
|
|
982
|
+
"path": target_path,
|
|
983
|
+
"name": target_shortcut_name,
|
|
984
|
+
"target": {
|
|
985
|
+
"type": "OneLake",
|
|
986
|
+
"oneLake": {
|
|
987
|
+
"workspaceId": source_workspace_id,
|
|
988
|
+
"itemId": source_lakehouse_id,
|
|
989
|
+
"path": source_path
|
|
990
|
+
}
|
|
991
|
+
}
|
|
992
|
+
}
|
|
993
|
+
|
|
994
|
+
url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts"
|
|
995
|
+
print(f"Creating shortcut '{target_shortcut_name}' → {target_path}")
|
|
996
|
+
print(json.dumps(payload, indent=2))
|
|
997
|
+
|
|
998
|
+
# --- Send POST request ---
|
|
999
|
+
response = requests.post(url, headers=headers, json=payload)
|
|
1000
|
+
|
|
1001
|
+
if response.status_code in [200, 201]:
|
|
1002
|
+
print(f"Shortcut '{target_shortcut_name}' created successfully.")
|
|
1003
|
+
print(response.json())
|
|
1004
|
+
else:
|
|
1005
|
+
print(f"Failed to create shortcut '{target_shortcut_name}'.")
|
|
1006
|
+
print("Status Code:", response.status_code)
|
|
1007
|
+
print("Response:", response.text)
|
|
1008
|
+
|
|
1009
|
+
|
|
1010
|
+
## <<<<<<<<<<<<<<<< QA_CheckUtil_01
|
|
1011
|
+
def QA_CheckUtil_01(
|
|
1012
|
+
source_df: DataFrame,
|
|
1013
|
+
qa_df: DataFrame
|
|
1014
|
+
) -> DataFrame:
|
|
1015
|
+
|
|
1016
|
+
spark = source_df.sparkSession
|
|
1017
|
+
qa_rows: List[tuple] = []
|
|
1018
|
+
|
|
1019
|
+
def calc_diff(src: Optional[Union[int, float]], qa: Optional[Union[int, float]]) -> Optional[float]:
|
|
1020
|
+
if src is None or qa is None:
|
|
1021
|
+
return None
|
|
1022
|
+
return float(src) - float(qa)
|
|
1023
|
+
|
|
1024
|
+
# Row count
|
|
1025
|
+
src_count = float(source_df.count())
|
|
1026
|
+
qa_count = float(qa_df.count())
|
|
1027
|
+
qa_rows.append((
|
|
1028
|
+
"ROW_COUNT",
|
|
1029
|
+
"row_count",
|
|
1030
|
+
None,
|
|
1031
|
+
src_count,
|
|
1032
|
+
qa_count,
|
|
1033
|
+
calc_diff(src_count, qa_count),
|
|
1034
|
+
src_count == qa_count
|
|
1035
|
+
))
|
|
1036
|
+
|
|
1037
|
+
# Null check
|
|
1038
|
+
common_cols = set(source_df.columns).intersection(set(qa_df.columns))
|
|
1039
|
+
for col in common_cols:
|
|
1040
|
+
src_nulls = float(source_df.filter(F.col(col).isNull()).count())
|
|
1041
|
+
qa_nulls = float(qa_df.filter(F.col(col).isNull()).count())
|
|
1042
|
+
qa_rows.append((
|
|
1043
|
+
"NULL_CHECK",
|
|
1044
|
+
"null_count",
|
|
1045
|
+
col,
|
|
1046
|
+
src_nulls,
|
|
1047
|
+
qa_nulls,
|
|
1048
|
+
calc_diff(src_nulls, qa_nulls),
|
|
1049
|
+
src_nulls == qa_nulls
|
|
1050
|
+
))
|
|
1051
|
+
|
|
1052
|
+
# Aggregation check (SUM for amount)
|
|
1053
|
+
if "amount" in source_df.columns and "amount" in qa_df.columns:
|
|
1054
|
+
src_sum = float(source_df.select(F.sum("amount")).collect()[0][0] or 0.0)
|
|
1055
|
+
qa_sum = float(qa_df.select(F.sum("amount")).collect()[0][0] or 0.0)
|
|
1056
|
+
qa_rows.append((
|
|
1057
|
+
"AGG_CHECK",
|
|
1058
|
+
"sum",
|
|
1059
|
+
"amount",
|
|
1060
|
+
src_sum,
|
|
1061
|
+
qa_sum,
|
|
1062
|
+
calc_diff(src_sum, qa_sum),
|
|
1063
|
+
src_sum == qa_sum
|
|
1064
|
+
))
|
|
1065
|
+
|
|
1066
|
+
# Duplicate check on id column
|
|
1067
|
+
if "id" in source_df.columns and "id" in qa_df.columns:
|
|
1068
|
+
src_dupes = float(source_df.count() - source_df.select("id").distinct().count())
|
|
1069
|
+
qa_dupes = float(qa_df.count() - qa_df.select("id").distinct().count())
|
|
1070
|
+
qa_rows.append((
|
|
1071
|
+
"DUPLICATE_CHECK",
|
|
1072
|
+
"duplicate_id",
|
|
1073
|
+
"id",
|
|
1074
|
+
src_dupes,
|
|
1075
|
+
qa_dupes,
|
|
1076
|
+
calc_diff(src_dupes, qa_dupes),
|
|
1077
|
+
src_dupes == qa_dupes
|
|
1078
|
+
))
|
|
1079
|
+
|
|
1080
|
+
# Create final QA DataFrame
|
|
1081
|
+
qa_df_result = spark.createDataFrame(
|
|
1082
|
+
qa_rows,
|
|
1083
|
+
[
|
|
1084
|
+
"check_type",
|
|
1085
|
+
"check_name",
|
|
1086
|
+
"column_name",
|
|
1087
|
+
"source_value",
|
|
1088
|
+
"qa_value",
|
|
1089
|
+
"diff",
|
|
1090
|
+
"match"
|
|
1091
|
+
]
|
|
1092
|
+
)
|
|
1093
|
+
return qa_df_result
|
|
1094
|
+
|
|
1095
|
+
## >>>>>>>>>>>>>>>>>> QA_CheckUtil_01
|
|
1096
|
+
|
|
1097
|
+
|
|
1098
|
+
## <<<<<<<<<<<<<<<< send_email_no_attachment_02
|
|
1099
|
+
|
|
1100
|
+
def send_email_no_attachment_02(p, endpoint_url=None, access_token=None):
|
|
1101
|
+
|
|
1102
|
+
Parameters:
|
|
1103
|
+
p (dict): {
|
|
1104
|
+
"to": str | list[str],
|
|
1105
|
+
"subject": str,
|
|
1106
|
+
"body": str,
|
|
1107
|
+
"headers": dict (optional),
|
|
1108
|
+
"timeout": int (optional)
|
|
1109
|
+
}
|
|
1110
|
+
endpoint_url (str): API endpoint for sending mail
|
|
1111
|
+
access_token (str): Bearer token
|
|
1112
|
+
|
|
1113
|
+
Returns:
|
|
1114
|
+
(status_code, response_text) or (None, error_message)
|
|
1115
|
+
if not endpoint_url:
|
|
1116
|
+
raise ValueError("endpoint_url is required")
|
|
1117
|
+
if not access_token:
|
|
1118
|
+
raise ValueError("access_token is required")
|
|
1119
|
+
|
|
1120
|
+
missing = [k for k in ("to", "subject", "body") if not p.get(k)]
|
|
1121
|
+
if missing:
|
|
1122
|
+
return None, f"Missing required fields: {', '.join(missing)}"
|
|
1123
|
+
|
|
1124
|
+
payload = {
|
|
1125
|
+
"to": ";".join(p["to"]) if isinstance(p["to"], list) else p["to"],
|
|
1126
|
+
"subject": p["subject"],
|
|
1127
|
+
"body": p["body"],
|
|
1128
|
+
}
|
|
1129
|
+
|
|
1130
|
+
headers = {
|
|
1131
|
+
"Authorization": f"Bearer {access_token}",
|
|
1132
|
+
**p.get("headers", {})
|
|
1133
|
+
}
|
|
1134
|
+
|
|
1135
|
+
try:
|
|
1136
|
+
resp = requests.post(
|
|
1137
|
+
endpoint_url,
|
|
1138
|
+
json=payload,
|
|
1139
|
+
headers=headers,
|
|
1140
|
+
timeout=p.get("timeout", 15)
|
|
1141
|
+
)
|
|
1142
|
+
success_codes = (200, 201, 202)
|
|
1143
|
+
return resp.status_code, resp.text
|
|
1144
|
+
except requests.RequestException as e:
|
|
1145
|
+
return None, str(e)
|
|
1146
|
+
|
|
1147
|
+
|
|
1148
|
+
## <<<<<<<<<<<<<<<< send_email_no_attachment_01
|
|
1149
|
+
|
|
1150
|
+
def send_email_no_attachment_01(
|
|
1151
|
+
body : Optional[str] = None,
|
|
1152
|
+
endpoint_url: Optional[str] = None,
|
|
1153
|
+
access_token: Optional[str] = None,
|
|
1154
|
+
subject : Optional[str] = None,
|
|
1155
|
+
recipients : Optional[List[str]] = None,
|
|
1156
|
+
headers : Optional[Dict[str, str]] = None,
|
|
1157
|
+
timeout : int = 15,
|
|
1158
|
+
tz_name : str = "America/Los_Angeles"
|
|
1159
|
+
) -> Tuple[Optional[int], str]:
|
|
1160
|
+
# If endpoint or token not provided, skip sending
|
|
1161
|
+
if not endpoint_url or not access_token:
|
|
1162
|
+
return None, "Skipping send: endpoint_url or access_token not provided."
|
|
1163
|
+
|
|
1164
|
+
# Determine recipients
|
|
1165
|
+
final_recipients = recipients
|
|
1166
|
+
if not final_recipients:
|
|
1167
|
+
return None, "Skipping send: no recipients provided."
|
|
1168
|
+
|
|
1169
|
+
# Determine body content
|
|
1170
|
+
final_body = body
|
|
1171
|
+
if not final_body:
|
|
1172
|
+
return None, "Skipping send: no body content provided."
|
|
1173
|
+
|
|
1174
|
+
payload = {
|
|
1175
|
+
"to": ";".join(final_recipients) if isinstance(final_recipients, list) else final_recipients,
|
|
1176
|
+
"subject": subject or "",
|
|
1177
|
+
"body": final_body
|
|
1178
|
+
}
|
|
1179
|
+
|
|
1180
|
+
request_headers = {
|
|
1181
|
+
"Authorization": f"Bearer {access_token}",
|
|
1182
|
+
**(headers or {})
|
|
1183
|
+
}
|
|
1184
|
+
|
|
1185
|
+
try:
|
|
1186
|
+
resp = requests.post(
|
|
1187
|
+
endpoint_url,
|
|
1188
|
+
json=payload,
|
|
1189
|
+
headers=request_headers,
|
|
1190
|
+
timeout=timeout
|
|
1191
|
+
)
|
|
1192
|
+
if resp.status_code in (200, 201, 202):
|
|
1193
|
+
return resp.status_code, resp.text
|
|
1194
|
+
else:
|
|
1195
|
+
return resp.status_code, f"Failed: {resp.text}"
|
|
1196
|
+
except requests.RequestException as e:
|
|
1197
|
+
return None, str(e)
|
|
1198
|
+
|
|
1199
|
+
# How to call function
|
|
1200
|
+
|
|
1201
|
+
apiid = spark.conf.get("spark.scopeid")
|
|
1202
|
+
scope = f"api://{apiid}/.default"
|
|
1203
|
+
access_token = credential.get_token(scope).token
|
|
1204
|
+
endpoint_base = "https://fdne-inframail-logicapp01.azurewebsites.net:443/api/fdne-infra-appmail-sender"
|
|
1205
|
+
endpoint_url = f"{endpoint_base}/triggers/When_a_HTTP_request_is_received/invoke?api-version=2022-05-01"
|
|
1206
|
+
|
|
1207
|
+
status, response = send_email_no_attachment_01(
|
|
1208
|
+
body=markdown,
|
|
1209
|
+
recipients=recipients,
|
|
1210
|
+
endpoint_url=endpoint_url,
|
|
1211
|
+
access_token=access_token,
|
|
1212
|
+
subject=subject)
|
|
1213
|
+
|
|
1214
|
+
|
|
1215
|
+
|
|
1216
|
+
# <<<<<<<<<<<<<<<< send_email_via_http_01
|
|
1217
|
+
|
|
1218
|
+
def send_email_via_http_01(params):
|
|
1219
|
+
|
|
1220
|
+
# Ensure init_mail() ran
|
|
1221
|
+
try:
|
|
1222
|
+
_ = endpoint_url
|
|
1223
|
+
except NameError:
|
|
1224
|
+
raise RuntimeError("endpoint_url not set. Call init_mail(...) once in this session before send_email_via_http().")
|
|
1225
|
+
try:
|
|
1226
|
+
_ = access_token
|
|
1227
|
+
except NameError:
|
|
1228
|
+
raise RuntimeError("access_token not set. Call init_mail(...) once in this session before send_email_via_http().")
|
|
1229
|
+
|
|
1230
|
+
# Required checks
|
|
1231
|
+
required = ['to', 'subject', 'body']
|
|
1232
|
+
missing = [f for f in required if not params.get(f)]
|
|
1233
|
+
if missing:
|
|
1234
|
+
return None, f"Missing required fields: {', '.join(missing)}"
|
|
1235
|
+
|
|
1236
|
+
# Base payload
|
|
1237
|
+
payload = {
|
|
1238
|
+
"to": ";".join(params["to"]) if isinstance(params["to"], list) else params["to"],
|
|
1239
|
+
"subject": params["subject"],
|
|
1240
|
+
"body": params["body"],
|
|
1241
|
+
}
|
|
1242
|
+
if params.get("cc"):
|
|
1243
|
+
payload["cc"] = params["cc"] if isinstance(params["cc"], list) else [params["cc"]]
|
|
1244
|
+
if params.get("bcc"):
|
|
1245
|
+
payload["bcc"] = params["bcc"] if isinstance(params["bcc"], list) else [params["bcc"]]
|
|
1246
|
+
if params.get("from_addr"):
|
|
1247
|
+
payload["from"] = params["from_addr"]
|
|
1248
|
+
if params.get("attachments"):
|
|
1249
|
+
payload["attachments"] = params["attachments"]
|
|
1250
|
+
|
|
1251
|
+
# ---- DataFrame → HTML body (your existing style) ----
|
|
1252
|
+
df = params.get("df")
|
|
1253
|
+
if df is not None:
|
|
1254
|
+
df_limit = int(params.get("df_limit", 1000))
|
|
1255
|
+
tz_name = params.get("tz_name", "America/Los_Angeles")
|
|
1256
|
+
df_in_body = params.get("df_in_body", True)
|
|
1257
|
+
df_attach = params.get("df_attach", False)
|
|
1258
|
+
df_name = params.get("df_name", "data.html")
|
|
1259
|
+
|
|
1260
|
+
# Get pandas DataFrame
|
|
1261
|
+
pdf = None
|
|
1262
|
+
try:
|
|
1263
|
+
from pyspark.sql import DataFrame as SparkDF
|
|
1264
|
+
if isinstance(df, SparkDF):
|
|
1265
|
+
pdf = df.limit(df_limit).toPandas()
|
|
1266
|
+
else:
|
|
1267
|
+
pdf = df # assume already pandas
|
|
1268
|
+
except Exception:
|
|
1269
|
+
pdf = df
|
|
1270
|
+
|
|
1271
|
+
html_body = _df_to_html_table(pdf, tz_name=tz_name)
|
|
1272
|
+
|
|
1273
|
+
if df_in_body:
|
|
1274
|
+
subject = str(params.get("subject", ""))
|
|
1275
|
+
if "QA Success" in subject:
|
|
1276
|
+
payload["body"] ='<html><body><h4>No data available to display.</h4></body></html>'
|
|
1277
|
+
else:
|
|
1278
|
+
payload["body"] = html_body
|
|
1279
|
+
else:
|
|
1280
|
+
# append to body if you prefer not to replace
|
|
1281
|
+
payload["body"] = f'{payload["body"]}{html_body}'
|
|
1282
|
+
|
|
1283
|
+
if df_attach:
|
|
1284
|
+
content_b64 = base64.b64encode(html_body.encode("utf-8")).decode("utf-8")
|
|
1285
|
+
attach = {"name": df_name, "contentBytes": content_b64, "contentType": "text/html"}
|
|
1286
|
+
if "attachments" in payload and isinstance(payload["attachments"], list):
|
|
1287
|
+
payload["attachments"].append(attach)
|
|
1288
|
+
else:
|
|
1289
|
+
payload["attachments"] = [attach]
|
|
1290
|
+
|
|
1291
|
+
# Auth header
|
|
1292
|
+
req_headers = {"Authorization": f"Bearer {access_token}"}
|
|
1293
|
+
if params.get("headers"):
|
|
1294
|
+
req_headers.update(params["headers"])
|
|
1295
|
+
|
|
1296
|
+
timeout = params.get("timeout", 15)
|
|
1297
|
+
|
|
1298
|
+
# Send
|
|
1299
|
+
try:
|
|
1300
|
+
response = requests.post(endpoint_url, json=payload, headers=req_headers, timeout=timeout)
|
|
1301
|
+
status_msg = "✅ Success" if response.status_code == 200 else f"❌ Failed ({response.status_code})"
|
|
1302
|
+
print(f"Email send: {status_msg}")
|
|
1303
|
+
return response.status_code, response.text, req_headers
|
|
1304
|
+
except requests.RequestException as e:
|
|
1305
|
+
error_msg = f"Request failed: {str(e)}"
|
|
1306
|
+
print(f"❌ {error_msg}")
|
|
1307
|
+
return None, error_msg
|
|
1308
|
+
|
|
1309
|
+
|
|
1310
|
+
########### base working version # adls_shortcut_utils.py
|
|
1311
|
+
|
|
1312
|
+
# adls_shortcut_utils.py
|
|
1313
|
+
|
|
1314
|
+
import requests
|
|
1315
|
+
from notebookutils import mssparkutils
|
|
1316
|
+
from notebookutils.credentials import getToken
|
|
1317
|
+
|
|
1318
|
+
def create_adls_shortcuts_03(shortcut_configs, workspace_id, lakehouse_id, target_schema):
|
|
1319
|
+
# access_token = mssparkutils.credentials.getToken("https://api.fabric.microsoft.com/.default")
|
|
1320
|
+
access_token = getToken("https://api.fabric.microsoft.com/.default")
|
|
1321
|
+
headers = {
|
|
1322
|
+
"Authorization": f"Bearer {access_token}",
|
|
1323
|
+
"Content-Type": "application/json"
|
|
1324
|
+
}
|
|
1325
|
+
print("Access token starts with:", access_token[:20])
|
|
1326
|
+
|
|
1327
|
+
target_path = f"Tables/{target_schema}/"
|
|
1328
|
+
|
|
1329
|
+
for config in shortcut_configs:
|
|
1330
|
+
payload = {
|
|
1331
|
+
"name": config["name"],
|
|
1332
|
+
"path": target_path,
|
|
1333
|
+
"target": {
|
|
1334
|
+
"type": "AdlsGen2",
|
|
1335
|
+
"adlsGen2": {
|
|
1336
|
+
"connectionId": config["connection_id"],
|
|
1337
|
+
"location": config["location"],
|
|
1338
|
+
"subpath": config["subpath"]
|
|
1339
|
+
}
|
|
1340
|
+
}
|
|
1341
|
+
}
|
|
1342
|
+
|
|
1343
|
+
url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts"
|
|
1344
|
+
response = requests.post(url, headers=headers, json=payload)
|
|
1345
|
+
|
|
1346
|
+
if response.status_code in [200, 201]:
|
|
1347
|
+
print(f"Shortcut '{config['name']}' created successfully.")
|
|
1348
|
+
print()
|
|
1349
|
+
else:
|
|
1350
|
+
print(f"Failed to create shortcut '{config['name']}'.")
|
|
1351
|
+
print("Status Code:", response.status_code)
|
|
1352
|
+
print("Response:", response.text)
|
|
1353
|
+
print()
|
|
1354
|
+
|
|
1355
|
+
|
|
1356
|
+
|
|
1357
|
+
# CostHub
|
|
1358
|
+
from pyspark.conf import SparkConf
|
|
1359
|
+
|
|
1360
|
+
# Required Spark conf values
|
|
1361
|
+
target_workspace_id = spark.conf.get("spark.workspaceid")
|
|
1362
|
+
target_lakehouse_id = spark.conf.get("spark.lakehouseid")
|
|
1363
|
+
target_schema = spark.conf.get("spark.CostHubSchema")
|
|
1364
|
+
target_location = spark.conf.get("spark.adlslocation")
|
|
1365
|
+
target_connection = spark.conf.get("spark.connectionid")
|
|
1366
|
+
|
|
1367
|
+
# Define shortcut configurations
|
|
1368
|
+
shortcut_configs = [
|
|
1369
|
+
{
|
|
1370
|
+
"name": "Z-RefreshTime_03",
|
|
1371
|
+
"connection_id": target_connection,
|
|
1372
|
+
"location": target_location,
|
|
1373
|
+
"subpath": "/abidatamercury/MercuryDataProd/CostHub/MercuryUpstreamRefreshLog"
|
|
1374
|
+
}
|
|
1375
|
+
]
|
|
1376
|
+
|
|
1377
|
+
# Call the function
|
|
1378
|
+
create_adls_shortcuts_03(shortcut_configs, target_workspace_id, target_lakehouse_id, target_schema)
|
|
1379
|
+
|
|
1380
|
+
"""
|