CPILake-Utils 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1380 @@
1
+ import re
2
+ import json
3
+ import time
4
+ import pytz
5
+ import html
6
+ import base64
7
+ import requests
8
+ import pandas as pd
9
+ # import sempy.fabric as fabric
10
+ from pyspark.sql import SparkSession
11
+ from pyspark.sql import DataFrame, functions as F
12
+ from pyspark.sql.functions import unix_timestamp, col, max, from_utc_timestamp
13
+ from pyspark.conf import SparkConf
14
+ from datetime import datetime
15
+ # from notebookutils.credentials import getSecret
16
+ # from azure.identity import CertificateCredential
17
+ # from tqdm.auto import tqdm
18
+ from typing import Optional, List, Dict, Tuple, Union
19
+ # from fabric.analytics.environment.credentials import SetFabricAnalyticsDefaultTokenCredentials
20
+
21
+
22
+ ## <<<<<<<<<<<<<<<< hash_function
23
+
24
+ def hash_function(s):
25
+ """Hash function for alphanumeric strings"""
26
+ if s is None:
27
+ return None
28
+ s = str(s).upper()
29
+ s = re.sub(r'[^A-Z0-9]', '', s)
30
+ base36_map = {ch: idx for idx, ch in enumerate("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ")}
31
+ result = 0
32
+ for i, ch in enumerate(reversed(s)):
33
+ result += base36_map.get(ch, 0) * (36 ** i)
34
+ result += len(s) * (36 ** (len(s) + 1))
35
+ return result
36
+
37
+
38
+ ## <<<<<<<<<<<<<<<< send_email_via_http
39
+
40
+ def send_email_via_http(
41
+ body: str,
42
+ to: List[str],
43
+ tenant_id: str,
44
+ client_id: str,
45
+ certificate_secret_name: str,
46
+ keyvault_url: str,
47
+ df_in_body: bool,
48
+ df_attach: bool,
49
+ endpoint_url: Optional[str] = None,
50
+ scope: Optional[str] = None,
51
+ subject: Optional[str] = None,
52
+ headers: Optional[Dict[str, str]] = None,
53
+ timeout: int = 15
54
+ ) -> Tuple[Optional[int], str]:
55
+
56
+ import base64
57
+ import requests
58
+ from notebookutils.credentials import getSecret
59
+ from azure.identity import CertificateCredential
60
+ from pyspark.sql import SparkSession
61
+ spark = SparkSession.builder.appName("send_email_via_http").getOrCreate()
62
+
63
+ tenant_id = spark.conf.get("spark.tenantid")
64
+ client_id = spark.conf.get("spark.clientid")
65
+ certificate_secret_name = spark.conf.get("spark.certname")
66
+ keyvault_url = spark.conf.get("spark.vaultname")
67
+
68
+ # Defaults
69
+ endpoint_url = (
70
+ "https://fdne-inframail-logicapp01.azurewebsites.net:443/"
71
+ "api/fdne-infra-appmail-sender/triggers/"
72
+ "When_a_HTTP_request_is_received/invoke"
73
+ "?api-version=2022-05-01" )
74
+
75
+ scope = "api://27d45411-0d7a-4f27-bc5f-412d74ea249b/.default"
76
+
77
+ # Credential
78
+ secret_value = getSecret(keyvault_url, certificate_secret_name)
79
+ certificate_data = base64.b64decode(secret_value)
80
+
81
+ credential = CertificateCredential(
82
+ tenant_id=tenant_id,
83
+ client_id=client_id,
84
+ certificate_data=certificate_data,
85
+ send_certificate_chain=True
86
+ )
87
+
88
+ access_token = credential.get_token(scope).token
89
+
90
+ params = {
91
+ "body": body,
92
+ "to": to,
93
+ "subject": subject,
94
+ "df_in_body": df_in_body,
95
+ "df_attach": df_attach,
96
+ "headers": headers,
97
+ "timeout": timeout}
98
+
99
+ # Required checks
100
+ required = ['to', 'subject', 'body']
101
+ missing = [f for f in required if not params.get(f)]
102
+ if missing:
103
+ return None, f"Missing required fields: {', '.join(missing)}"
104
+
105
+ # Base payload
106
+ payload = {
107
+ "to": ";".join(params["to"]) if isinstance(params["to"], list) else params["to"],
108
+ "subject": params["subject"],
109
+ "body": params["body"],
110
+ }
111
+ if params.get("cc"):
112
+ payload["cc"] = params["cc"] if isinstance(params["cc"], list) else [params["cc"]]
113
+ if params.get("bcc"):
114
+ payload["bcc"] = params["bcc"] if isinstance(params["bcc"], list) else [params["bcc"]]
115
+ if params.get("from_addr"):
116
+ payload["from"] = params["from_addr"]
117
+ if params.get("attachments"):
118
+ payload["attachments"] = params["attachments"]
119
+
120
+ # ---- DataFrame → HTML body (your existing style) ----
121
+ df = params.get("df")
122
+ if df is not None:
123
+ df_limit = int(params.get("df_limit", 1000))
124
+ tz_name = params.get("tz_name", "America/Los_Angeles")
125
+ df_in_body = params.get("df_in_body", True)
126
+ df_attach = params.get("df_attach", False)
127
+ df_name = params.get("df_name", "data.html")
128
+
129
+ # Get pandas DataFrame
130
+ pdf = None
131
+ try:
132
+ from pyspark.sql import DataFrame as SparkDF
133
+ if isinstance(df, SparkDF):
134
+ pdf = df.limit(df_limit).toPandas()
135
+ else:
136
+ pdf = df # assume already pandas
137
+ except Exception:
138
+ pdf = df
139
+
140
+ html_body = _df_to_html_table(pdf, tz_name=tz_name)
141
+
142
+ if df_in_body:
143
+ subject = str(params.get("subject", ""))
144
+ if "QA Success" in subject:
145
+ payload["body"] ='<html><body><h4>No data available to display.</h4></body></html>'
146
+ else:
147
+ payload["body"] = html_body
148
+ else:
149
+ # append to body if you prefer not to replace
150
+ payload["body"] = f'{payload["body"]}{html_body}'
151
+
152
+ if df_attach:
153
+ content_b64 = base64.b64encode(html_body.encode("utf-8")).decode("utf-8")
154
+ attach = {"name": df_name, "contentBytes": content_b64, "contentType": "text/html"}
155
+ if "attachments" in payload and isinstance(payload["attachments"], list):
156
+ payload["attachments"].append(attach)
157
+ else:
158
+ payload["attachments"] = [attach]
159
+
160
+ # Auth header
161
+ req_headers = {"Authorization": f"Bearer {access_token}"}
162
+ if params.get("headers"):
163
+ req_headers.update(params["headers"])
164
+
165
+ timeout = params.get("timeout", 15)
166
+
167
+ # Send
168
+ try:
169
+ response = requests.post(endpoint_url, json=payload, headers=req_headers, timeout=timeout)
170
+ status_msg = "Success" if response.status_code == 200 else f"Failed ({response.status_code})"
171
+ print(f"Email send: {status_msg}")
172
+ return response.status_code, response.text, req_headers
173
+ except requests.RequestException as e:
174
+ error_msg = f"Request failed: {str(e)}"
175
+ print(f"{error_msg}")
176
+ return None, error_msg
177
+ """
178
+ # call the function
179
+ status, response, req_headers = send_email_via_http(
180
+ body = body_html,
181
+ to = RECIPIENTS,
182
+ subject = subject,
183
+ tenant_id = tenant_id,
184
+ client_id = client_id,
185
+ certificate_secret_name = certificate_secret_name,
186
+ keyvault_url = keyvault_url,
187
+ df_in_body = False,
188
+ df_attach = False )
189
+ """
190
+
191
+ ## <<<<<<<<<<<<<<<< _df_to_html_table
192
+
193
+ def _df_to_html_table(pdf, tz_name="America/Los_Angeles"):
194
+ """Render a pandas DataFrame to your styled HTML table."""
195
+ # Empty DF → simple message
196
+ if pdf is None or len(pdf.index) == 0:
197
+ return '<html><body><h4>No data available to display.</h4></body></html>'
198
+
199
+ # Header with PST time
200
+ pst = pytz.timezone(tz_name)
201
+ now_pst = datetime.now(pst).strftime("%Y-%m-%d %H:%M:%S")
202
+
203
+ html_Table = []
204
+ html_Table.append('<html><head><style>')
205
+ html_Table.append('table {border-collapse: collapse; width: 100%} '
206
+ 'table, td, th {border: 1px solid black; padding: 3px; font-size: 9pt;} '
207
+ 'td, th {text-align: left;}')
208
+ html_Table.append('</style></head><body>')
209
+ html_Table.append(f'<h4>(Refresh Time: {now_pst})</h4><hr>')
210
+ html_Table.append('<table style="width:100%; border-collapse: collapse;">')
211
+ html_Table.append('<thead style="background-color:#000000; color:#ffffff;"><tr>')
212
+
213
+ # Columns (skip FailureFlag in header, to match your code)
214
+ cols = list(pdf.columns)
215
+ visible_cols = [c for c in cols if c != "FailureFlag"]
216
+ for c in visible_cols:
217
+ html_Table.append(f'<th style="border: 1px solid black; padding: 5px;">{html.escape(str(c))}</th>')
218
+ html_Table.append('</tr></thead><tbody>')
219
+
220
+ # Rows (highlight red if FailureFlag == 'Yes', else light green default)
221
+ ff_present = "FailureFlag" in cols
222
+ for _, row in pdf.iterrows():
223
+ row_bg_color = '#ccff66' # default
224
+ if ff_present:
225
+ try:
226
+ if str(row["FailureFlag"]).strip().lower() == "yes":
227
+ row_bg_color = '#ff8080'
228
+ except Exception:
229
+ pass
230
+ html_Table.append(f'<tr style="background-color:{row_bg_color};">')
231
+ for c in visible_cols:
232
+ val = row[c]
233
+ html_Table.append(f'<td>{html.escape("" if val is None else str(val))}</td>')
234
+ html_Table.append('</tr>')
235
+
236
+ html_Table.append('</tbody></table></body></html>')
237
+ return "".join(html_Table)
238
+
239
+
240
+ # s<<<<<<<<<<< end_email_no_attachment
241
+
242
+ def send_email_no_attachment(
243
+ body: str,
244
+ recipients: List[str],
245
+ tenant_id: str,
246
+ client_id: str,
247
+ certificate_secret_name: str,
248
+ keyvault_url: str,
249
+ endpoint_url: Optional[str] = None,
250
+ scope: Optional[str] = None,
251
+ subject: Optional[str] = None,
252
+ headers: Optional[Dict[str, str]] = None,
253
+ timeout: int = 15
254
+ ) -> Tuple[Optional[int], str]:
255
+
256
+ import base64
257
+ import requests
258
+ from notebookutils.credentials import getSecret
259
+ from azure.identity import CertificateCredential
260
+ from pyspark.sql import SparkSession
261
+ spark = SparkSession.builder.appName("send_email_no_attachment").getOrCreate()
262
+
263
+ tenant_id = spark.conf.get("spark.tenantid")
264
+ client_id = spark.conf.get("spark.clientid")
265
+ certificate_secret_name = spark.conf.get("spark.certname")
266
+ keyvault_url = spark.conf.get("spark.vaultname")
267
+
268
+ # Defaults
269
+ endpoint_url = (
270
+ "https://fdne-inframail-logicapp01.azurewebsites.net:443/"
271
+ "api/fdne-infra-appmail-sender/triggers/"
272
+ "When_a_HTTP_request_is_received/invoke"
273
+ "?api-version=2022-05-01" )
274
+
275
+ scope = "api://27d45411-0d7a-4f27-bc5f-412d74ea249b/.default"
276
+
277
+ # Payload
278
+ payload = {
279
+ "to": ";".join(recipients),
280
+ "subject": subject or "",
281
+ "body": body, }
282
+
283
+ # Credential
284
+ secret_value = getSecret(keyvault_url, certificate_secret_name)
285
+ certificate_data = base64.b64decode(secret_value)
286
+
287
+ credential = CertificateCredential(
288
+ tenant_id=tenant_id,
289
+ client_id=client_id,
290
+ certificate_data=certificate_data,
291
+ send_certificate_chain=True
292
+ )
293
+
294
+ access_token = credential.get_token(scope).token
295
+
296
+ # Headers
297
+ request_headers = {
298
+ "Content-Type": "application/json",
299
+ "Authorization": f"Bearer {access_token}",
300
+ **(headers or {}),
301
+ }
302
+
303
+ # Call API
304
+ try:
305
+ resp = requests.post(
306
+ endpoint_url,
307
+ json=payload,
308
+ headers=request_headers,
309
+ timeout=timeout,
310
+ )
311
+
312
+ if resp.status_code in (200, 201, 202):
313
+ return resp.status_code, resp.text
314
+
315
+ return resp.status_code, f"Failed: {resp.text}"
316
+
317
+ except requests.RequestException as e:
318
+ return None, str(e)
319
+ """
320
+ # call the function
321
+ status, response = send_email_no_attachment(
322
+ body=markdown,
323
+ recipients=recipients,
324
+ subject=subject,
325
+ tenant_id=tenant_id,
326
+ client_id=client_id,
327
+ certificate_secret_name = certificate_secret_name,
328
+ keyvault_url=keyvault_url
329
+ )
330
+ """
331
+
332
+ # >>>>>>>>>>>>>>>>> end_email_no_attachment
333
+
334
+
335
+ ## <<<<<<<<<<<<<<<< QA_CheckUtil
336
+ """
337
+ A status column: PASS / FAIL / SKIPPED
338
+ A skip_reason column
339
+ Checks are skipped instead of failing when:
340
+ One or both DataFrames are empty
341
+ Required columns are missing
342
+ Aggregation column exists but contains only nulls
343
+ match becomes None when skipped (clearer than False)
344
+ """
345
+ def QA_CheckUtil(
346
+ source_df: DataFrame,
347
+ qa_df: DataFrame
348
+ ) -> DataFrame:
349
+
350
+ spark = source_df.sparkSession
351
+ qa_rows: List[tuple] = []
352
+
353
+ def calc_diff(src, qa):
354
+ if src is None or qa is None:
355
+ return None
356
+ return float(src) - float(qa)
357
+
358
+ def add_row(check_type, check_name, column, src, qa, skip_reason=None):
359
+ if skip_reason:
360
+ qa_rows.append((
361
+ check_type,
362
+ check_name,
363
+ column,
364
+ src,
365
+ qa,
366
+ None,
367
+ None,
368
+ "SKIPPED",
369
+ skip_reason
370
+ ))
371
+ else:
372
+ match = src == qa
373
+ qa_rows.append((
374
+ check_type,
375
+ check_name,
376
+ column,
377
+ src,
378
+ qa,
379
+ calc_diff(src, qa),
380
+ match,
381
+ "PASS" if match else "FAIL",
382
+ None
383
+ ))
384
+
385
+ # Row count
386
+ src_count = source_df.count()
387
+ qa_count = qa_df.count()
388
+
389
+ add_row(
390
+ "ROW_COUNT",
391
+ "row_count",
392
+ None,
393
+ float(src_count),
394
+ float(qa_count)
395
+ )
396
+
397
+ # Null check
398
+ common_cols = set(source_df.columns).intersection(set(qa_df.columns))
399
+
400
+ if not common_cols:
401
+ add_row(
402
+ "NULL_CHECK",
403
+ "null_count",
404
+ None,
405
+ None,
406
+ None,
407
+ "No common columns between source and QA"
408
+ )
409
+ else:
410
+ for col in common_cols:
411
+ src_nulls = source_df.filter(F.col(col).isNull()).count()
412
+ qa_nulls = qa_df.filter(F.col(col).isNull()).count()
413
+
414
+ add_row(
415
+ "NULL_CHECK",
416
+ "null_count",
417
+ col,
418
+ float(src_nulls),
419
+ float(qa_nulls)
420
+ )
421
+
422
+
423
+ # Aggregation check
424
+ if "amount" not in source_df.columns or "amount" not in qa_df.columns:
425
+ add_row(
426
+ "AGG_CHECK",
427
+ "sum",
428
+ "amount",
429
+ None,
430
+ None,
431
+ "Column 'amount' missing in one or both DataFrames"
432
+ )
433
+ else:
434
+ src_sum = source_df.select(F.sum("amount")).collect()[0][0]
435
+ qa_sum = qa_df.select(F.sum("amount")).collect()[0][0]
436
+
437
+ if src_sum is None and qa_sum is None:
438
+ add_row(
439
+ "AGG_CHECK",
440
+ "sum",
441
+ "amount",
442
+ None,
443
+ None,
444
+ "All values are NULL in both DataFrames"
445
+ )
446
+ else:
447
+ add_row(
448
+ "AGG_CHECK",
449
+ "sum",
450
+ "amount",
451
+ float(src_sum or 0.0),
452
+ float(qa_sum or 0.0)
453
+ )
454
+
455
+
456
+ # Duplicate check on id column
457
+ if "id" not in source_df.columns or "id" not in qa_df.columns:
458
+ add_row(
459
+ "DUPLICATE_CHECK",
460
+ "duplicate_id",
461
+ "id",
462
+ None,
463
+ None,
464
+ "Column 'id' missing in one or both DataFrames"
465
+ )
466
+ else:
467
+ src_dupes = source_df.count() - source_df.select("id").distinct().count()
468
+ qa_dupes = qa_df.count() - qa_df.select("id").distinct().count()
469
+
470
+ add_row(
471
+ "DUPLICATE_CHECK",
472
+ "duplicate_id",
473
+ "id",
474
+ float(src_dupes),
475
+ float(qa_dupes)
476
+ )
477
+
478
+ # Create final QA DataFrame
479
+ return spark.createDataFrame(
480
+ qa_rows,
481
+ [
482
+ "check_type",
483
+ "check_name",
484
+ "column_name",
485
+ "source_value",
486
+ "qa_value",
487
+ "diff",
488
+ "match",
489
+ "status",
490
+ "skip_reason"
491
+ ]
492
+ )
493
+
494
+ """
495
+ ## <<<<<<<<<<<<<<<< create_lakehouse_shortcuts SPN
496
+
497
+ def create_lakehouse_shortcuts_02(shortcut_configs):
498
+ import base64
499
+ import requests
500
+ from notebookutils.credentials import getSecret
501
+ from azure.identity import CertificateCredential
502
+
503
+ tenant_id = spark.conf.get("spark.tenantid")
504
+ client_id = spark.conf.get("spark.clientid")
505
+ certificate_secret_name = spark.conf.get("spark.certname")
506
+ keyvault_url = spark.conf.get("spark.vaultname")
507
+
508
+ # Defaults
509
+ endpoint_url = (
510
+ "https://fdne-inframail-logicapp01.azurewebsites.net:443/"
511
+ "api/fdne-infra-appmail-sender/triggers/"
512
+ "When_a_HTTP_request_is_received/invoke"
513
+ "?api-version=2022-05-01" )
514
+
515
+ scope = "api://27d45411-0d7a-4f27-bc5f-412d74ea249b/.default"
516
+
517
+ # Credential
518
+ secret_value = getSecret(keyvault_url, certificate_secret_name)
519
+ certificate_data = base64.b64decode(secret_value)
520
+
521
+ credential = CertificateCredential(
522
+ tenant_id=tenant_id,
523
+ client_id=client_id,
524
+ certificate_data=certificate_data,
525
+ send_certificate_chain=True
526
+ )
527
+
528
+ access_token = credential.get_token(scope).token
529
+
530
+ headers = {"Authorization": f"Bearer {access_token}", "Content-Type": "application/json" }
531
+ print("Access token starts with:", access_token[:20])
532
+
533
+ for config in shortcut_configs:
534
+ source_path = config["source_subpath"]
535
+ target_schema = config["target_schema"]
536
+ workspace_name = config["workspace_name"]
537
+ lakehouse_name = config["lakehouse_name"]
538
+ target_shortcut_name = config["target_shortcut_name"]
539
+
540
+ resp_ws = requests.get("https://api.fabric.microsoft.com/v1/workspaces", headers=headers)
541
+ resp_ws.raise_for_status()
542
+ workspace_id = next(ws["id"] for ws in resp_ws.json()["value"] if ws["displayName"] == workspace_name)
543
+
544
+ resp_lh = requests.get(f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/lakehouses", headers=headers)
545
+ resp_lh.raise_for_status()
546
+ lakehouse_id = next(lh["id"] for lh in resp_lh.json()["value"] if lh["displayName"] == lakehouse_name)
547
+
548
+ target_path = f"Tables/{target_schema or 'dbo'}/"
549
+
550
+ payload = {
551
+ "path": target_path,
552
+ "name": target_shortcut_name,
553
+ "target": {
554
+ "type": "OneLake",
555
+ "oneLake": {
556
+ "workspaceId" : workspace_id,
557
+ "itemId" : lakehouse_id,
558
+ "path" : source_path,
559
+ "target_schema" : config["target_schema"]
560
+ }
561
+ }
562
+ }
563
+
564
+ url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts"
565
+ print(f"Creating shortcut '{target_shortcut_name}' → {target_path}")
566
+ print(json.dumps(payload, indent=2))
567
+
568
+ # --- Send POST request ---
569
+ response = requests.post(url, headers=headers, json=payload)
570
+
571
+ if response.status_code in [200, 201]:
572
+ print(f"Shortcut '{target_shortcut_name}' created successfully.")
573
+ print(response.json())
574
+ else:
575
+ print(f"Failed to create shortcut '{target_shortcut_name}'.")
576
+ print("Status Code:", response.status_code)
577
+ print("Response:", response.text)
578
+
579
+ # How to call function
580
+
581
+ shortcut_configs = [
582
+ {
583
+ "target_shortcut_name" : "DIM_Date",
584
+ "workspace_name" : "FDnECostHubReporting_DEV",
585
+ "lakehouse_name" : "Cost_Hub",
586
+ "source_subpath" : "Tables/DIM_Date",
587
+ "target_schema" : "CostHub",
588
+ }
589
+ ]
590
+ create_lakehouse_shortcuts_02(shortcut_configs)
591
+
592
+ """
593
+
594
+ ## <<<<<<<<<<<<<<<< create_lakehouse_shortcuts MI
595
+
596
+ def create_lakehouse_shortcuts(shortcut_configs):
597
+ import requests, json
598
+ import sempy.fabric as fabric
599
+ from notebookutils.credentials import getToken
600
+
601
+ access_token = getToken("https://api.fabric.microsoft.com/.default")
602
+
603
+ headers = {"Authorization": f"Bearer {access_token}", "Content-Type": "application/json" }
604
+
605
+ workspace_id = fabric.get_notebook_workspace_id()
606
+ lakehouse_id = fabric.get_lakehouse_id()
607
+
608
+ for config in shortcut_configs:
609
+ source_path = config["source_subpath"]
610
+ target_schema = config["target_schema"]
611
+ target_shortcut_name = config["target_shortcut_name"]
612
+ target_path = f"Tables/{target_schema}/"
613
+
614
+ payload = {
615
+ "path": target_path,
616
+ "name": target_shortcut_name,
617
+ "target": {
618
+ "type": "OneLake",
619
+ "oneLake": {
620
+ "workspaceId" : workspace_id,
621
+ "itemId" : lakehouse_id,
622
+ "path" : source_path,
623
+ "target_schema" : config["target_schema"]
624
+ }
625
+ }
626
+ }
627
+
628
+ url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts"
629
+ print(f"Creating shortcut '{target_shortcut_name}' → {target_path}")
630
+ print(json.dumps(payload, indent=2))
631
+
632
+ # --- Send POST request ---
633
+ response = requests.post(url, headers=headers, json=payload)
634
+
635
+ if response.status_code in [200, 201]:
636
+ print(f"Shortcut '{target_shortcut_name}' created successfully.")
637
+ print(response.json())
638
+ else:
639
+ print(f"Failed to create shortcut '{target_shortcut_name}'.")
640
+ print("Status Code:", response.status_code)
641
+ print("Response:", response.text)
642
+
643
+ """
644
+ # How to call function
645
+
646
+ target_schema = spark.conf.get("spark.CostHubSchema")
647
+
648
+ shortcut_configs = [
649
+ {
650
+ "target_shortcut_name" : "DIM_Date_01",
651
+ "source_subpath" : "Tables/CostHub/DIM_Date",
652
+ "target_schema" : target_schema,
653
+ "target_path" : "Tables/CostHub/"
654
+ }
655
+ ]
656
+ create_lakehouse_shortcuts(shortcut_configs)
657
+ """
658
+
659
+
660
+ ## <<<<<<<<<<<<<<<< create_adls_shortcuts with MI
661
+
662
+ def create_adls_shortcuts(shortcut_configs):
663
+
664
+ import requests
665
+ import sempy.fabric as fabric
666
+ from notebookutils.credentials import getToken
667
+
668
+ access_token = getToken("https://api.fabric.microsoft.com/.default")
669
+
670
+ headers = { "Authorization": f"Bearer {access_token}", "Content-Type": "application/json" }
671
+
672
+ workspace_id = fabric.get_notebook_workspace_id()
673
+ lakehouse_id = fabric.get_lakehouse_id()
674
+
675
+ for config in shortcut_configs:
676
+ target_schema = config["target_schema"]
677
+ connection_name = config["connection_name"]
678
+ target_path = f"Tables/{target_schema}/"
679
+
680
+ resp_cn = requests.get(f"https://api.fabric.microsoft.com/v1/connections", headers=headers)
681
+ connection_id = next(conn["id"] for conn in resp_cn.json()["value"] if conn["displayName"] == connection_name)
682
+ conn_loc = next(conn for conn in resp_cn.json()["value"] if conn["displayName"] == connection_name)
683
+ location = conn_loc["connectionDetails"]["path"]
684
+
685
+ for config in shortcut_configs:
686
+ payload = {
687
+ "name": config["name"],
688
+ "path": target_path,
689
+ "target": {
690
+ "type": "AdlsGen2",
691
+ "adlsGen2": {
692
+ "connectionId": connection_id,
693
+ "location": location,
694
+ "subpath": config["subpath"] } } }
695
+
696
+ url = (f"https://api.fabric.microsoft.com/v1/workspaces/"
697
+ f"{workspace_id}/items/{lakehouse_id}/shortcuts" )
698
+
699
+ response = requests.post(url, headers=headers, json=payload)
700
+
701
+ if response.status_code in (200, 201):
702
+ print(f"Shortcut '{config['name']}' created successfully.")
703
+ else:
704
+ print(f"Failed to create shortcut '{config['name']}'.")
705
+ print("Status Code:", response.status_code)
706
+ print("Response:", response.text)
707
+
708
+ """
709
+ # How to call function
710
+
711
+ target_workspace_id = spark.conf.get("spark.workspaceid")
712
+ target_lakehouse_id = spark.conf.get("spark.lakehouseid")
713
+ target_schema = spark.conf.get("spark.CostHubSchema")
714
+ target_location = spark.conf.get("spark.adlslocation")
715
+ target_connection = spark.conf.get("spark.connectionid")
716
+
717
+ # Define shortcut configurations
718
+ shortcut_configs = [
719
+ {
720
+ "name" : "Z-RefreshTime_01",
721
+ "target_schema" : target_schema,
722
+ "connection_name" : "CostHub_ADLS abibrahi",
723
+ "target_path" : f"Tables/{target_schema}/",
724
+ "subpath" : "/abidatamercury/MercuryDataProd/CostHub/Bridge_ExecOrgSummary"
725
+ }]
726
+
727
+ # Call the function
728
+ create_adls_shortcuts(shortcut_configs)
729
+
730
+ """
731
+ """
732
+ ## <<<<<<<<<<<<<<<< create_adls_shortcuts with spn
733
+
734
+ def create_adls_shortcuts_02(shortcut_configs):
735
+ # access_token = notebookutils.credentials.getToken("https://api.fabric.microsoft.com/.default")
736
+ import requests
737
+ import sempy.fabric as fabric
738
+ from notebookutils.credentials import getToken
739
+
740
+ spark = SparkSession.builder.getOrCreate()
741
+
742
+ tenant_id = spark.conf.get("spark.tenantid")
743
+ client_id = spark.conf.get("spark.clientid")
744
+ certificate_secret_name = spark.conf.get("spark.certname")
745
+ keyvault_url = spark.conf.get("spark.vaultname")
746
+
747
+ # Defaults
748
+ endpoint_url = (
749
+ "https://fdne-inframail-logicapp01.azurewebsites.net:443/"
750
+ "api/fdne-infra-appmail-sender/triggers/"
751
+ "When_a_HTTP_request_is_received/invoke"
752
+ "?api-version=2022-05-01" )
753
+
754
+ scope = "api://27d45411-0d7a-4f27-bc5f-412d74ea249b/.default"
755
+
756
+ # Credential
757
+ secret_value = getSecret(keyvault_url, certificate_secret_name)
758
+ certificate_data = base64.b64decode(secret_value)
759
+
760
+ credential = CertificateCredential(
761
+ tenant_id=tenant_id,
762
+ client_id=client_id,
763
+ certificate_data=certificate_data,
764
+ send_certificate_chain=True
765
+ )
766
+
767
+ access_token = credential.get_token(scope).token
768
+
769
+ headers = {"Authorization": f"Bearer {access_token}", "Content-Type": "application/json" }
770
+ print("Access token starts with:", access_token[:20])
771
+
772
+ for config in shortcut_configs:
773
+ target_schema = config["target_schema"]
774
+ workspace_name = config["workspace_name"]
775
+ lakehouse_name = config["lakehouse_name"]
776
+ connection_name = config["connection_name"]
777
+ target_path = f"Tables/{target_schema}/"
778
+
779
+ resp_ws = requests.get("https://api.fabric.microsoft.com/v1/workspaces", headers=headers)
780
+ resp_ws.raise_for_status()
781
+ workspace_id = next(ws["id"] for ws in resp_ws.json()["value"] if ws["displayName"] == workspace_name)
782
+
783
+ resp_lh = requests.get(f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/lakehouses", headers=headers)
784
+ resp_lh.raise_for_status()
785
+ lakehouse_id = next(lh["id"] for lh in resp_lh.json()["value"] if lh["displayName"] == lakehouse_name)
786
+
787
+ resp_cn = requests.get(f"https://api.fabric.microsoft.com/v1/connections", headers=headers)
788
+ resp_cn.raise_for_status()
789
+ connection_id = next(conn["id"] for conn in resp_cn.json()["value"] if conn["displayName"] == connection_name)
790
+
791
+ conn_loc = next(conn for conn in resp_cn.json()["value"] if conn["displayName"] == connection_name)
792
+ location = conn_loc["connectionDetails"]["path"]
793
+
794
+ payload = {
795
+ "name": config["name"],
796
+ "path": target_path,
797
+ "target": {
798
+ "type": "AdlsGen2",
799
+ "adlsGen2": {
800
+ "connectionId" : connection_id,
801
+ "location" : location,
802
+ "subpath" : config["subpath"]
803
+ }
804
+ }
805
+ }
806
+
807
+ url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts"
808
+ response = requests.post(url, headers=headers, json=payload)
809
+
810
+ if response.status_code in [200, 201]:
811
+ print(f"Shortcut '{config['name']}' created successfully.")
812
+ else:
813
+ print(f"Failed to create shortcut '{config['name']}'.")
814
+ print("Status Code:", response.status_code)
815
+ print("Response:", response.text)
816
+
817
+
818
+ # How to call function
819
+
820
+ # Define shortcut configurations
821
+ shortcut_configs = [
822
+ {
823
+ "name" : "Bridge_ExecOrgSummary",
824
+ "target_schema" : "CostHub",
825
+ "workspace_name" : "FDnECostHubReporting_DEV",
826
+ "lakehouse_name" : "Cost_Hub",
827
+ "connection_name" : "CostHub_ADLS abibrahi",
828
+ "subpath" : "/abidatamercury/MercuryDataProd/CostHub/Bridge_ExecOrgSummary"
829
+ }]
830
+
831
+ # Call the function
832
+ create_adls_shortcuts_02(shortcut_configs)
833
+ """
834
+
835
+ ## <<<<<<<<<<<<<<<< lakehouse_metadata_sync
836
+
837
+ def pad_or_truncate_string(input_string, length, pad_char=' '):
838
+ if len(input_string) > length:
839
+ return input_string[:length]
840
+ return input_string.ljust(length, pad_char)
841
+
842
+ def lakehouse_metadata_sync(workspace_id, lakehouse_id):
843
+ client = fabric.FabricRestClient()
844
+
845
+ # Get the SQL endpoint ID from the lakehouse
846
+ lakehouse_props = client.get(f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}").json()
847
+ sqlendpoint = lakehouse_props['properties']['sqlEndpointProperties']['id']
848
+
849
+ # Prepare the metadata refresh payload
850
+ uri = f"/v1.0/myorg/lhdatamarts/{sqlendpoint}"
851
+ payload = {
852
+ "commands": [
853
+ {"$type": "MetadataRefreshExternalCommand"}
854
+ ]
855
+ }
856
+
857
+ try:
858
+ response = client.post(uri, json=payload)
859
+ response_data = response.json()
860
+
861
+ batchId = response_data["batchId"]
862
+ progressState = response_data["progressState"]
863
+ statusuri = f"/v1.0/myorg/lhdatamarts/{sqlendpoint}/batches/{batchId}"
864
+
865
+ # Poll the status until it's no longer "inProgress"
866
+ while progressState == 'inProgress':
867
+ time.sleep(2)
868
+ status_response = client.get(statusuri).json()
869
+ progressState = status_response["progressState"]
870
+ display(f"Sync state: {progressState}")
871
+
872
+ # Handle success
873
+ if progressState == 'success':
874
+ table_details = [
875
+ {
876
+ 'tableName': t['tableName'],
877
+ 'warningMessages': t.get('warningMessages', []),
878
+ 'lastSuccessfulUpdate': t.get('lastSuccessfulUpdate', 'N/A'),
879
+ 'tableSyncState': t['tableSyncState'],
880
+ 'sqlSyncState': t['sqlSyncState']
881
+ }
882
+ for t in status_response['operationInformation'][0]['progressDetail']['tablesSyncStatus']
883
+ ]
884
+
885
+ print("✅ Extracted Table Details:")
886
+ for detail in table_details:
887
+ print(
888
+ f"Table: {pad_or_truncate_string(detail['tableName'], 30)}"
889
+ f" | Last Update: {detail['lastSuccessfulUpdate']}"
890
+ f" | tableSyncState: {detail['tableSyncState']}"
891
+ f" | Warnings: {detail['warningMessages']}"
892
+ )
893
+ return {"status": "success", "details": table_details}
894
+
895
+ # Handle failure
896
+ elif progressState == 'failure':
897
+ print("❌ Metadata sync failed.")
898
+ display(status_response)
899
+ return {"status": "failure", "error": status_response}
900
+
901
+ else:
902
+ print(f"⚠️ Unexpected progress state: {progressState}")
903
+ return {"status": "unknown", "raw_response": status_response}
904
+
905
+ except Exception as e:
906
+ print("🚨 Error during metadata sync:", str(e))
907
+ return {"status": "exception", "error": str(e)}
908
+
909
+ """
910
+ # How to call function ( lakehouse_metadata_sync )
911
+ workspace_id = spark.conf.get("trident.workspace.id")
912
+ lakehouse_id = spark.conf.get("trident.lakehouse.id")
913
+
914
+ # Call the function
915
+ result = lakehouse_metadata_sync(workspace_id, lakehouse_id)
916
+ display(result)
917
+ """
918
+
919
+
920
+
921
+
922
+
923
+ """
924
+ # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< Archieve
925
+
926
+
927
+ ## <<<<<<<<<<<<<<<< create_adls_shortcuts_01
928
+
929
+ def create_adls_shortcuts_01(shortcut_configs, workspace_id, lakehouse_id, target_schema):
930
+ access_token = notebookutils.credentials.getToken("https://api.fabric.microsoft.com/.default")
931
+ headers = {
932
+ "Authorization": f"Bearer {access_token}",
933
+ "Content-Type": "application/json"
934
+ }
935
+ print("Access token starts with:", access_token[:20])
936
+
937
+ target_path = f"Tables/{target_schema}/"
938
+
939
+ for config in shortcut_configs:
940
+ payload = {
941
+ "name": config["name"],
942
+ "path": target_path,
943
+ "target": {
944
+ "type": "AdlsGen2",
945
+ "adlsGen2": {
946
+ "connectionId": config["connection_id"],
947
+ "location": config["location"],
948
+ "subpath": config["subpath"]
949
+ }
950
+ }
951
+ }
952
+
953
+ url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts"
954
+ response = requests.post(url, headers=headers, json=payload)
955
+
956
+ if response.status_code in [200, 201]:
957
+ print(f"Shortcut '{config['name']}' created successfully.")
958
+ else:
959
+ print(f"Failed to create shortcut '{config['name']}'.")
960
+ print("Status Code:", response.status_code)
961
+ print("Response:", response.text)
962
+
963
+ ## <<<<<<<<<<<<<<<< create_lakehouse_shortcuts_01
964
+
965
+ def create_lakehouse_shortcuts_01(shortcut_configs, workspace_id, lakehouse_id, target_schema):
966
+ access_token = notebookutils.credentials.getToken("https://api.fabric.microsoft.com/.default")
967
+ headers = {
968
+ "Authorization": f"Bearer {access_token}",
969
+ "Content-Type": "application/json"
970
+ }
971
+ print("Access token starts with:", access_token[:20])
972
+
973
+ for config in shortcut_configs:
974
+ source_path = config["source_subpath"]
975
+ target_shortcut_name = config["target_shortcut_name"]
976
+ source_workspace_id = config["source_workspace_id"]
977
+ source_lakehouse_id = config["source_lakehouse_id"]
978
+
979
+ target_path = f"Tables/{target_schema or 'dbo'}/"
980
+
981
+ payload = {
982
+ "path": target_path,
983
+ "name": target_shortcut_name,
984
+ "target": {
985
+ "type": "OneLake",
986
+ "oneLake": {
987
+ "workspaceId": source_workspace_id,
988
+ "itemId": source_lakehouse_id,
989
+ "path": source_path
990
+ }
991
+ }
992
+ }
993
+
994
+ url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts"
995
+ print(f"Creating shortcut '{target_shortcut_name}' → {target_path}")
996
+ print(json.dumps(payload, indent=2))
997
+
998
+ # --- Send POST request ---
999
+ response = requests.post(url, headers=headers, json=payload)
1000
+
1001
+ if response.status_code in [200, 201]:
1002
+ print(f"Shortcut '{target_shortcut_name}' created successfully.")
1003
+ print(response.json())
1004
+ else:
1005
+ print(f"Failed to create shortcut '{target_shortcut_name}'.")
1006
+ print("Status Code:", response.status_code)
1007
+ print("Response:", response.text)
1008
+
1009
+
1010
+ ## <<<<<<<<<<<<<<<< QA_CheckUtil_01
1011
+ def QA_CheckUtil_01(
1012
+ source_df: DataFrame,
1013
+ qa_df: DataFrame
1014
+ ) -> DataFrame:
1015
+
1016
+ spark = source_df.sparkSession
1017
+ qa_rows: List[tuple] = []
1018
+
1019
+ def calc_diff(src: Optional[Union[int, float]], qa: Optional[Union[int, float]]) -> Optional[float]:
1020
+ if src is None or qa is None:
1021
+ return None
1022
+ return float(src) - float(qa)
1023
+
1024
+ # Row count
1025
+ src_count = float(source_df.count())
1026
+ qa_count = float(qa_df.count())
1027
+ qa_rows.append((
1028
+ "ROW_COUNT",
1029
+ "row_count",
1030
+ None,
1031
+ src_count,
1032
+ qa_count,
1033
+ calc_diff(src_count, qa_count),
1034
+ src_count == qa_count
1035
+ ))
1036
+
1037
+ # Null check
1038
+ common_cols = set(source_df.columns).intersection(set(qa_df.columns))
1039
+ for col in common_cols:
1040
+ src_nulls = float(source_df.filter(F.col(col).isNull()).count())
1041
+ qa_nulls = float(qa_df.filter(F.col(col).isNull()).count())
1042
+ qa_rows.append((
1043
+ "NULL_CHECK",
1044
+ "null_count",
1045
+ col,
1046
+ src_nulls,
1047
+ qa_nulls,
1048
+ calc_diff(src_nulls, qa_nulls),
1049
+ src_nulls == qa_nulls
1050
+ ))
1051
+
1052
+ # Aggregation check (SUM for amount)
1053
+ if "amount" in source_df.columns and "amount" in qa_df.columns:
1054
+ src_sum = float(source_df.select(F.sum("amount")).collect()[0][0] or 0.0)
1055
+ qa_sum = float(qa_df.select(F.sum("amount")).collect()[0][0] or 0.0)
1056
+ qa_rows.append((
1057
+ "AGG_CHECK",
1058
+ "sum",
1059
+ "amount",
1060
+ src_sum,
1061
+ qa_sum,
1062
+ calc_diff(src_sum, qa_sum),
1063
+ src_sum == qa_sum
1064
+ ))
1065
+
1066
+ # Duplicate check on id column
1067
+ if "id" in source_df.columns and "id" in qa_df.columns:
1068
+ src_dupes = float(source_df.count() - source_df.select("id").distinct().count())
1069
+ qa_dupes = float(qa_df.count() - qa_df.select("id").distinct().count())
1070
+ qa_rows.append((
1071
+ "DUPLICATE_CHECK",
1072
+ "duplicate_id",
1073
+ "id",
1074
+ src_dupes,
1075
+ qa_dupes,
1076
+ calc_diff(src_dupes, qa_dupes),
1077
+ src_dupes == qa_dupes
1078
+ ))
1079
+
1080
+ # Create final QA DataFrame
1081
+ qa_df_result = spark.createDataFrame(
1082
+ qa_rows,
1083
+ [
1084
+ "check_type",
1085
+ "check_name",
1086
+ "column_name",
1087
+ "source_value",
1088
+ "qa_value",
1089
+ "diff",
1090
+ "match"
1091
+ ]
1092
+ )
1093
+ return qa_df_result
1094
+
1095
+ ## >>>>>>>>>>>>>>>>>> QA_CheckUtil_01
1096
+
1097
+
1098
+ ## <<<<<<<<<<<<<<<< send_email_no_attachment_02
1099
+
1100
+ def send_email_no_attachment_02(p, endpoint_url=None, access_token=None):
1101
+
1102
+ Parameters:
1103
+ p (dict): {
1104
+ "to": str | list[str],
1105
+ "subject": str,
1106
+ "body": str,
1107
+ "headers": dict (optional),
1108
+ "timeout": int (optional)
1109
+ }
1110
+ endpoint_url (str): API endpoint for sending mail
1111
+ access_token (str): Bearer token
1112
+
1113
+ Returns:
1114
+ (status_code, response_text) or (None, error_message)
1115
+ if not endpoint_url:
1116
+ raise ValueError("endpoint_url is required")
1117
+ if not access_token:
1118
+ raise ValueError("access_token is required")
1119
+
1120
+ missing = [k for k in ("to", "subject", "body") if not p.get(k)]
1121
+ if missing:
1122
+ return None, f"Missing required fields: {', '.join(missing)}"
1123
+
1124
+ payload = {
1125
+ "to": ";".join(p["to"]) if isinstance(p["to"], list) else p["to"],
1126
+ "subject": p["subject"],
1127
+ "body": p["body"],
1128
+ }
1129
+
1130
+ headers = {
1131
+ "Authorization": f"Bearer {access_token}",
1132
+ **p.get("headers", {})
1133
+ }
1134
+
1135
+ try:
1136
+ resp = requests.post(
1137
+ endpoint_url,
1138
+ json=payload,
1139
+ headers=headers,
1140
+ timeout=p.get("timeout", 15)
1141
+ )
1142
+ success_codes = (200, 201, 202)
1143
+ return resp.status_code, resp.text
1144
+ except requests.RequestException as e:
1145
+ return None, str(e)
1146
+
1147
+
1148
+ ## <<<<<<<<<<<<<<<< send_email_no_attachment_01
1149
+
1150
+ def send_email_no_attachment_01(
1151
+ body : Optional[str] = None,
1152
+ endpoint_url: Optional[str] = None,
1153
+ access_token: Optional[str] = None,
1154
+ subject : Optional[str] = None,
1155
+ recipients : Optional[List[str]] = None,
1156
+ headers : Optional[Dict[str, str]] = None,
1157
+ timeout : int = 15,
1158
+ tz_name : str = "America/Los_Angeles"
1159
+ ) -> Tuple[Optional[int], str]:
1160
+ # If endpoint or token not provided, skip sending
1161
+ if not endpoint_url or not access_token:
1162
+ return None, "Skipping send: endpoint_url or access_token not provided."
1163
+
1164
+ # Determine recipients
1165
+ final_recipients = recipients
1166
+ if not final_recipients:
1167
+ return None, "Skipping send: no recipients provided."
1168
+
1169
+ # Determine body content
1170
+ final_body = body
1171
+ if not final_body:
1172
+ return None, "Skipping send: no body content provided."
1173
+
1174
+ payload = {
1175
+ "to": ";".join(final_recipients) if isinstance(final_recipients, list) else final_recipients,
1176
+ "subject": subject or "",
1177
+ "body": final_body
1178
+ }
1179
+
1180
+ request_headers = {
1181
+ "Authorization": f"Bearer {access_token}",
1182
+ **(headers or {})
1183
+ }
1184
+
1185
+ try:
1186
+ resp = requests.post(
1187
+ endpoint_url,
1188
+ json=payload,
1189
+ headers=request_headers,
1190
+ timeout=timeout
1191
+ )
1192
+ if resp.status_code in (200, 201, 202):
1193
+ return resp.status_code, resp.text
1194
+ else:
1195
+ return resp.status_code, f"Failed: {resp.text}"
1196
+ except requests.RequestException as e:
1197
+ return None, str(e)
1198
+
1199
+ # How to call function
1200
+
1201
+ apiid = spark.conf.get("spark.scopeid")
1202
+ scope = f"api://{apiid}/.default"
1203
+ access_token = credential.get_token(scope).token
1204
+ endpoint_base = "https://fdne-inframail-logicapp01.azurewebsites.net:443/api/fdne-infra-appmail-sender"
1205
+ endpoint_url = f"{endpoint_base}/triggers/When_a_HTTP_request_is_received/invoke?api-version=2022-05-01"
1206
+
1207
+ status, response = send_email_no_attachment_01(
1208
+ body=markdown,
1209
+ recipients=recipients,
1210
+ endpoint_url=endpoint_url,
1211
+ access_token=access_token,
1212
+ subject=subject)
1213
+
1214
+
1215
+
1216
+ # <<<<<<<<<<<<<<<< send_email_via_http_01
1217
+
1218
+ def send_email_via_http_01(params):
1219
+
1220
+ # Ensure init_mail() ran
1221
+ try:
1222
+ _ = endpoint_url
1223
+ except NameError:
1224
+ raise RuntimeError("endpoint_url not set. Call init_mail(...) once in this session before send_email_via_http().")
1225
+ try:
1226
+ _ = access_token
1227
+ except NameError:
1228
+ raise RuntimeError("access_token not set. Call init_mail(...) once in this session before send_email_via_http().")
1229
+
1230
+ # Required checks
1231
+ required = ['to', 'subject', 'body']
1232
+ missing = [f for f in required if not params.get(f)]
1233
+ if missing:
1234
+ return None, f"Missing required fields: {', '.join(missing)}"
1235
+
1236
+ # Base payload
1237
+ payload = {
1238
+ "to": ";".join(params["to"]) if isinstance(params["to"], list) else params["to"],
1239
+ "subject": params["subject"],
1240
+ "body": params["body"],
1241
+ }
1242
+ if params.get("cc"):
1243
+ payload["cc"] = params["cc"] if isinstance(params["cc"], list) else [params["cc"]]
1244
+ if params.get("bcc"):
1245
+ payload["bcc"] = params["bcc"] if isinstance(params["bcc"], list) else [params["bcc"]]
1246
+ if params.get("from_addr"):
1247
+ payload["from"] = params["from_addr"]
1248
+ if params.get("attachments"):
1249
+ payload["attachments"] = params["attachments"]
1250
+
1251
+ # ---- DataFrame → HTML body (your existing style) ----
1252
+ df = params.get("df")
1253
+ if df is not None:
1254
+ df_limit = int(params.get("df_limit", 1000))
1255
+ tz_name = params.get("tz_name", "America/Los_Angeles")
1256
+ df_in_body = params.get("df_in_body", True)
1257
+ df_attach = params.get("df_attach", False)
1258
+ df_name = params.get("df_name", "data.html")
1259
+
1260
+ # Get pandas DataFrame
1261
+ pdf = None
1262
+ try:
1263
+ from pyspark.sql import DataFrame as SparkDF
1264
+ if isinstance(df, SparkDF):
1265
+ pdf = df.limit(df_limit).toPandas()
1266
+ else:
1267
+ pdf = df # assume already pandas
1268
+ except Exception:
1269
+ pdf = df
1270
+
1271
+ html_body = _df_to_html_table(pdf, tz_name=tz_name)
1272
+
1273
+ if df_in_body:
1274
+ subject = str(params.get("subject", ""))
1275
+ if "QA Success" in subject:
1276
+ payload["body"] ='<html><body><h4>No data available to display.</h4></body></html>'
1277
+ else:
1278
+ payload["body"] = html_body
1279
+ else:
1280
+ # append to body if you prefer not to replace
1281
+ payload["body"] = f'{payload["body"]}{html_body}'
1282
+
1283
+ if df_attach:
1284
+ content_b64 = base64.b64encode(html_body.encode("utf-8")).decode("utf-8")
1285
+ attach = {"name": df_name, "contentBytes": content_b64, "contentType": "text/html"}
1286
+ if "attachments" in payload and isinstance(payload["attachments"], list):
1287
+ payload["attachments"].append(attach)
1288
+ else:
1289
+ payload["attachments"] = [attach]
1290
+
1291
+ # Auth header
1292
+ req_headers = {"Authorization": f"Bearer {access_token}"}
1293
+ if params.get("headers"):
1294
+ req_headers.update(params["headers"])
1295
+
1296
+ timeout = params.get("timeout", 15)
1297
+
1298
+ # Send
1299
+ try:
1300
+ response = requests.post(endpoint_url, json=payload, headers=req_headers, timeout=timeout)
1301
+ status_msg = "✅ Success" if response.status_code == 200 else f"❌ Failed ({response.status_code})"
1302
+ print(f"Email send: {status_msg}")
1303
+ return response.status_code, response.text, req_headers
1304
+ except requests.RequestException as e:
1305
+ error_msg = f"Request failed: {str(e)}"
1306
+ print(f"❌ {error_msg}")
1307
+ return None, error_msg
1308
+
1309
+
1310
+ ########### base working version # adls_shortcut_utils.py
1311
+
1312
+ # adls_shortcut_utils.py
1313
+
1314
+ import requests
1315
+ from notebookutils import mssparkutils
1316
+ from notebookutils.credentials import getToken
1317
+
1318
+ def create_adls_shortcuts_03(shortcut_configs, workspace_id, lakehouse_id, target_schema):
1319
+ # access_token = mssparkutils.credentials.getToken("https://api.fabric.microsoft.com/.default")
1320
+ access_token = getToken("https://api.fabric.microsoft.com/.default")
1321
+ headers = {
1322
+ "Authorization": f"Bearer {access_token}",
1323
+ "Content-Type": "application/json"
1324
+ }
1325
+ print("Access token starts with:", access_token[:20])
1326
+
1327
+ target_path = f"Tables/{target_schema}/"
1328
+
1329
+ for config in shortcut_configs:
1330
+ payload = {
1331
+ "name": config["name"],
1332
+ "path": target_path,
1333
+ "target": {
1334
+ "type": "AdlsGen2",
1335
+ "adlsGen2": {
1336
+ "connectionId": config["connection_id"],
1337
+ "location": config["location"],
1338
+ "subpath": config["subpath"]
1339
+ }
1340
+ }
1341
+ }
1342
+
1343
+ url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts"
1344
+ response = requests.post(url, headers=headers, json=payload)
1345
+
1346
+ if response.status_code in [200, 201]:
1347
+ print(f"Shortcut '{config['name']}' created successfully.")
1348
+ print()
1349
+ else:
1350
+ print(f"Failed to create shortcut '{config['name']}'.")
1351
+ print("Status Code:", response.status_code)
1352
+ print("Response:", response.text)
1353
+ print()
1354
+
1355
+
1356
+
1357
+ # CostHub
1358
+ from pyspark.conf import SparkConf
1359
+
1360
+ # Required Spark conf values
1361
+ target_workspace_id = spark.conf.get("spark.workspaceid")
1362
+ target_lakehouse_id = spark.conf.get("spark.lakehouseid")
1363
+ target_schema = spark.conf.get("spark.CostHubSchema")
1364
+ target_location = spark.conf.get("spark.adlslocation")
1365
+ target_connection = spark.conf.get("spark.connectionid")
1366
+
1367
+ # Define shortcut configurations
1368
+ shortcut_configs = [
1369
+ {
1370
+ "name": "Z-RefreshTime_03",
1371
+ "connection_id": target_connection,
1372
+ "location": target_location,
1373
+ "subpath": "/abidatamercury/MercuryDataProd/CostHub/MercuryUpstreamRefreshLog"
1374
+ }
1375
+ ]
1376
+
1377
+ # Call the function
1378
+ create_adls_shortcuts_03(shortcut_configs, target_workspace_id, target_lakehouse_id, target_schema)
1379
+
1380
+ """