dao-ai 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1072 @@
1
+ """
2
+ App resources module for generating Databricks App resource configurations.
3
+
4
+ This module provides utilities to dynamically discover and generate Databricks App
5
+ resource configurations from dao-ai AppConfig. Resources are extracted from the
6
+ config and converted to the format expected by Databricks Apps.
7
+
8
+ Databricks Apps resource documentation:
9
+ https://learn.microsoft.com/en-us/azure/databricks/dev-tools/databricks-apps/resources
10
+
11
+ Supported resource types and their mappings:
12
+ - LLMModel → serving-endpoint (Model Serving Endpoint)
13
+ - VectorStoreModel/IndexModel → vector-search-index (via UC Securable - not yet supported)
14
+ - WarehouseModel → sql-warehouse
15
+ - GenieRoomModel → genie-space
16
+ - VolumeModel → volume (via UC Securable)
17
+ - FunctionModel → function (via UC Securable - not yet supported)
18
+ - ConnectionModel → connection (not yet supported in SDK)
19
+ - DatabaseModel → database (Lakebase)
20
+ - DatabricksAppModel → app (not yet supported in SDK)
21
+
22
+ Usage:
23
+ from dao_ai.apps.resources import generate_app_resources, generate_sdk_resources
24
+ from dao_ai.config import AppConfig
25
+
26
+ config = AppConfig.from_file("model_config.yaml")
27
+
28
+ # For SDK-based deployment (recommended)
29
+ sdk_resources = generate_sdk_resources(config)
30
+
31
+ # For YAML-based documentation
32
+ resources = generate_app_resources(config)
33
+ """
34
+
35
+ from typing import Any
36
+
37
+ from databricks.sdk.service.apps import (
38
+ AppResource,
39
+ AppResourceDatabase,
40
+ AppResourceDatabaseDatabasePermission,
41
+ AppResourceExperiment,
42
+ AppResourceExperimentExperimentPermission,
43
+ AppResourceGenieSpace,
44
+ AppResourceGenieSpaceGenieSpacePermission,
45
+ AppResourceSecret,
46
+ AppResourceSecretSecretPermission,
47
+ AppResourceServingEndpoint,
48
+ AppResourceServingEndpointServingEndpointPermission,
49
+ AppResourceSqlWarehouse,
50
+ AppResourceSqlWarehouseSqlWarehousePermission,
51
+ AppResourceUcSecurable,
52
+ AppResourceUcSecurableUcSecurablePermission,
53
+ AppResourceUcSecurableUcSecurableType,
54
+ )
55
+ from loguru import logger
56
+
57
+ from dao_ai.config import (
58
+ AppConfig,
59
+ CompositeVariableModel,
60
+ ConnectionModel,
61
+ DatabaseModel,
62
+ DatabricksAppModel,
63
+ EnvironmentVariableModel,
64
+ FunctionModel,
65
+ GenieRoomModel,
66
+ IsDatabricksResource,
67
+ LLMModel,
68
+ SecretVariableModel,
69
+ TableModel,
70
+ VectorStoreModel,
71
+ VolumeModel,
72
+ WarehouseModel,
73
+ value_of,
74
+ )
75
+
76
+ # Resource type mappings from dao-ai to Databricks Apps
77
+ RESOURCE_TYPE_MAPPING: dict[type, str] = {
78
+ LLMModel: "serving-endpoint",
79
+ VectorStoreModel: "vector-search-index",
80
+ WarehouseModel: "sql-warehouse",
81
+ GenieRoomModel: "genie-space",
82
+ VolumeModel: "volume",
83
+ FunctionModel: "function",
84
+ ConnectionModel: "connection",
85
+ DatabaseModel: "database",
86
+ DatabricksAppModel: "app",
87
+ }
88
+
89
+ # Default permissions for each resource type
90
+ DEFAULT_PERMISSIONS: dict[str, list[str]] = {
91
+ "serving-endpoint": ["CAN_QUERY"],
92
+ "vector-search-index": ["CAN_SELECT"],
93
+ "sql-warehouse": ["CAN_USE"],
94
+ "genie-space": ["CAN_RUN"],
95
+ "volume": ["CAN_READ"],
96
+ "function": ["CAN_EXECUTE"],
97
+ "connection": ["USE_CONNECTION"],
98
+ "database": ["CAN_CONNECT_AND_CREATE"],
99
+ "app": ["CAN_VIEW"],
100
+ }
101
+
102
+ # Valid user API scopes for Databricks Apps
103
+ # These are the only scopes that can be requested for on-behalf-of-user access
104
+ VALID_USER_API_SCOPES: set[str] = {
105
+ "sql",
106
+ "serving.serving-endpoints",
107
+ "vectorsearch.vector-search-indexes",
108
+ "files.files",
109
+ "dashboards.genie",
110
+ "catalog.connections",
111
+ "catalog.catalogs:read",
112
+ "catalog.schemas:read",
113
+ "catalog.tables:read",
114
+ }
115
+
116
+ # Mapping from resource api_scopes to valid user_api_scopes
117
+ # Some resource scopes map directly, others need translation
118
+ API_SCOPE_TO_USER_SCOPE: dict[str, str] = {
119
+ # Direct mappings
120
+ "serving.serving-endpoints": "serving.serving-endpoints",
121
+ "vectorsearch.vector-search-indexes": "vectorsearch.vector-search-indexes",
122
+ "files.files": "files.files",
123
+ "dashboards.genie": "dashboards.genie",
124
+ "catalog.connections": "catalog.connections",
125
+ # SQL-related scopes map to "sql"
126
+ "sql.warehouses": "sql",
127
+ "sql.statement-execution": "sql",
128
+ # Vector search endpoints also need serving
129
+ "vectorsearch.vector-search-endpoints": "serving.serving-endpoints",
130
+ # Catalog scopes
131
+ "catalog.volumes": "files.files",
132
+ }
133
+
134
+
135
+ def _extract_llm_resources(
136
+ llms: dict[str, LLMModel],
137
+ ) -> list[dict[str, Any]]:
138
+ """Extract model serving endpoint resources from LLMModels."""
139
+ resources: list[dict[str, Any]] = []
140
+ for idx, (key, llm) in enumerate(llms.items()):
141
+ resource: dict[str, Any] = {
142
+ "name": key,
143
+ "type": "serving-endpoint",
144
+ "serving_endpoint_name": llm.name,
145
+ "permissions": [
146
+ {"level": p} for p in DEFAULT_PERMISSIONS["serving-endpoint"]
147
+ ],
148
+ }
149
+ resources.append(resource)
150
+ logger.debug(f"Extracted serving endpoint resource: {key} -> {llm.name}")
151
+ return resources
152
+
153
+
154
+ def _extract_vector_search_resources(
155
+ vector_stores: dict[str, VectorStoreModel],
156
+ ) -> list[dict[str, Any]]:
157
+ """Extract vector search index resources from VectorStoreModels."""
158
+ resources: list[dict[str, Any]] = []
159
+ for key, vs in vector_stores.items():
160
+ if vs.index is None:
161
+ continue
162
+ resource: dict[str, Any] = {
163
+ "name": key,
164
+ "type": "vector-search-index",
165
+ "vector_search_index_name": vs.index.full_name,
166
+ "permissions": [
167
+ {"level": p} for p in DEFAULT_PERMISSIONS["vector-search-index"]
168
+ ],
169
+ }
170
+ resources.append(resource)
171
+ logger.debug(f"Extracted vector search resource: {key} -> {vs.index.full_name}")
172
+ return resources
173
+
174
+
175
+ def _extract_warehouse_resources(
176
+ warehouses: dict[str, WarehouseModel],
177
+ ) -> list[dict[str, Any]]:
178
+ """Extract SQL warehouse resources from WarehouseModels."""
179
+ resources: list[dict[str, Any]] = []
180
+ for key, warehouse in warehouses.items():
181
+ warehouse_id = value_of(warehouse.warehouse_id)
182
+ resource: dict[str, Any] = {
183
+ "name": key,
184
+ "type": "sql-warehouse",
185
+ "sql_warehouse_id": warehouse_id,
186
+ "permissions": [{"level": p} for p in DEFAULT_PERMISSIONS["sql-warehouse"]],
187
+ }
188
+ resources.append(resource)
189
+ logger.debug(f"Extracted SQL warehouse resource: {key} -> {warehouse_id}")
190
+ return resources
191
+
192
+
193
+ def _extract_genie_resources(
194
+ genie_rooms: dict[str, GenieRoomModel],
195
+ ) -> list[dict[str, Any]]:
196
+ """Extract Genie space resources from GenieRoomModels."""
197
+ resources: list[dict[str, Any]] = []
198
+ for key, genie in genie_rooms.items():
199
+ space_id = value_of(genie.space_id)
200
+ resource: dict[str, Any] = {
201
+ "name": key,
202
+ "type": "genie-space",
203
+ "genie_space_id": space_id,
204
+ "permissions": [{"level": p} for p in DEFAULT_PERMISSIONS["genie-space"]],
205
+ }
206
+ resources.append(resource)
207
+ logger.debug(f"Extracted Genie space resource: {key} -> {space_id}")
208
+ return resources
209
+
210
+
211
+ def _extract_volume_resources(
212
+ volumes: dict[str, VolumeModel],
213
+ ) -> list[dict[str, Any]]:
214
+ """Extract UC Volume resources from VolumeModels."""
215
+ resources: list[dict[str, Any]] = []
216
+ for key, volume in volumes.items():
217
+ resource: dict[str, Any] = {
218
+ "name": key,
219
+ "type": "volume",
220
+ "volume_name": volume.full_name,
221
+ "permissions": [{"level": p} for p in DEFAULT_PERMISSIONS["volume"]],
222
+ }
223
+ resources.append(resource)
224
+ logger.debug(f"Extracted volume resource: {key} -> {volume.full_name}")
225
+ return resources
226
+
227
+
228
+ def _extract_function_resources(
229
+ functions: dict[str, FunctionModel],
230
+ ) -> list[dict[str, Any]]:
231
+ """Extract UC Function resources from FunctionModels."""
232
+ resources: list[dict[str, Any]] = []
233
+ for key, func in functions.items():
234
+ resource: dict[str, Any] = {
235
+ "name": key,
236
+ "type": "function",
237
+ "function_name": func.full_name,
238
+ "permissions": [{"level": p} for p in DEFAULT_PERMISSIONS["function"]],
239
+ }
240
+ resources.append(resource)
241
+ logger.debug(f"Extracted function resource: {key} -> {func.full_name}")
242
+ return resources
243
+
244
+
245
+ def _extract_connection_resources(
246
+ connections: dict[str, ConnectionModel],
247
+ ) -> list[dict[str, Any]]:
248
+ """Extract UC Connection resources from ConnectionModels."""
249
+ resources: list[dict[str, Any]] = []
250
+ for key, conn in connections.items():
251
+ resource: dict[str, Any] = {
252
+ "name": key,
253
+ "type": "connection",
254
+ "connection_name": conn.name,
255
+ "permissions": [{"level": p} for p in DEFAULT_PERMISSIONS["connection"]],
256
+ }
257
+ resources.append(resource)
258
+ logger.debug(f"Extracted connection resource: {key} -> {conn.name}")
259
+ return resources
260
+
261
+
262
+ def _extract_database_resources(
263
+ databases: dict[str, DatabaseModel],
264
+ ) -> list[dict[str, Any]]:
265
+ """Extract Lakebase database resources from DatabaseModels."""
266
+ resources: list[dict[str, Any]] = []
267
+ for key, db in databases.items():
268
+ # Only include Lakebase databases (those with instance_name)
269
+ if not db.is_lakebase:
270
+ continue
271
+ resource: dict[str, Any] = {
272
+ "name": key,
273
+ "type": "database",
274
+ "database_instance_name": db.instance_name,
275
+ "permissions": [{"level": p} for p in DEFAULT_PERMISSIONS["database"]],
276
+ }
277
+ resources.append(resource)
278
+ logger.debug(f"Extracted database resource: {key} -> {db.instance_name}")
279
+ return resources
280
+
281
+
282
+ def _extract_app_resources(
283
+ apps: dict[str, DatabricksAppModel],
284
+ ) -> list[dict[str, Any]]:
285
+ """Extract Databricks App resources from DatabricksAppModels."""
286
+ resources: list[dict[str, Any]] = []
287
+ for key, app in apps.items():
288
+ resource: dict[str, Any] = {
289
+ "name": key,
290
+ "type": "app",
291
+ "app_name": app.name,
292
+ "permissions": [{"level": p} for p in DEFAULT_PERMISSIONS["app"]],
293
+ }
294
+ resources.append(resource)
295
+ logger.debug(f"Extracted app resource: {key} -> {app.name}")
296
+ return resources
297
+
298
+
299
+ def _extract_secrets_from_config(config: AppConfig) -> list[dict[str, Any]]:
300
+ """
301
+ Extract all secrets referenced in the config as resources.
302
+
303
+ This function walks through the entire config object to find all
304
+ SecretVariableModel instances and extracts their scope and key.
305
+
306
+ Args:
307
+ config: The AppConfig containing secret references
308
+
309
+ Returns:
310
+ A list of secret resource dictionaries with unique scope/key pairs
311
+ """
312
+ secrets: dict[tuple[str, str], dict[str, Any]] = {}
313
+
314
+ def extract_from_value(value: Any, path: str = "") -> None:
315
+ """Recursively extract secrets from any value."""
316
+ if isinstance(value, SecretVariableModel):
317
+ secret_key = (value.scope, value.secret)
318
+ if secret_key not in secrets:
319
+ # Create a unique name for the secret resource
320
+ resource_name = f"{value.scope}_{value.secret}".replace(
321
+ "-", "_"
322
+ ).replace("/", "_")
323
+ secrets[secret_key] = {
324
+ "name": resource_name,
325
+ "type": "secret",
326
+ "scope": value.scope,
327
+ "key": value.secret,
328
+ "permissions": [{"level": "READ"}],
329
+ }
330
+ logger.debug(f"Found secret: {value.scope}/{value.secret} at {path}")
331
+ elif isinstance(value, dict):
332
+ for k, v in value.items():
333
+ extract_from_value(v, f"{path}.{k}" if path else k)
334
+ elif isinstance(value, (list, tuple)):
335
+ for i, v in enumerate(value):
336
+ extract_from_value(v, f"{path}[{i}]")
337
+ elif hasattr(value, "__dict__"):
338
+ # Handle Pydantic models and other objects with __dict__
339
+ for k, v in value.__dict__.items():
340
+ if not k.startswith("_"): # Skip private attributes
341
+ extract_from_value(v, f"{path}.{k}" if path else k)
342
+
343
+ # Walk through the entire config
344
+ extract_from_value(config)
345
+
346
+ resources = list(secrets.values())
347
+ logger.info(f"Extracted {len(resources)} secret resources from config")
348
+ return resources
349
+
350
+
351
+ def generate_app_resources(config: AppConfig) -> list[dict[str, Any]]:
352
+ """
353
+ Generate Databricks App resource configurations from an AppConfig.
354
+
355
+ This function extracts all resources defined in the AppConfig and converts
356
+ them to the format expected by Databricks Apps. Resources are used to
357
+ grant the app's service principal access to Databricks platform features.
358
+
359
+ Args:
360
+ config: The AppConfig containing resource definitions
361
+
362
+ Returns:
363
+ A list of resource dictionaries in Databricks Apps format
364
+
365
+ Example:
366
+ >>> config = AppConfig.from_file("model_config.yaml")
367
+ >>> resources = generate_app_resources(config)
368
+ >>> print(resources)
369
+ [
370
+ {
371
+ "name": "default_llm",
372
+ "type": "serving-endpoint",
373
+ "serving_endpoint_name": "databricks-claude-sonnet-4",
374
+ "permissions": [{"level": "CAN_QUERY"}]
375
+ },
376
+ ...
377
+ ]
378
+ """
379
+ resources: list[dict[str, Any]] = []
380
+
381
+ if config.resources is None:
382
+ logger.debug("No resources defined in config")
383
+ return resources
384
+
385
+ # Extract resources from each category
386
+ resources.extend(_extract_llm_resources(config.resources.llms))
387
+ resources.extend(_extract_vector_search_resources(config.resources.vector_stores))
388
+ resources.extend(_extract_warehouse_resources(config.resources.warehouses))
389
+ resources.extend(_extract_genie_resources(config.resources.genie_rooms))
390
+ resources.extend(_extract_volume_resources(config.resources.volumes))
391
+ resources.extend(_extract_function_resources(config.resources.functions))
392
+ resources.extend(_extract_connection_resources(config.resources.connections))
393
+ resources.extend(_extract_database_resources(config.resources.databases))
394
+ resources.extend(_extract_app_resources(config.resources.apps))
395
+
396
+ # Extract secrets from the entire config
397
+ resources.extend(_extract_secrets_from_config(config))
398
+
399
+ logger.info(f"Generated {len(resources)} app resources from config")
400
+ return resources
401
+
402
+
403
+ def generate_user_api_scopes(config: AppConfig) -> list[str]:
404
+ """
405
+ Generate user API scopes from resources with on_behalf_of_user=True.
406
+
407
+ This function examines all resources in the config and collects the
408
+ API scopes needed for on-behalf-of-user authentication. Only valid
409
+ user API scopes are returned.
410
+
411
+ Args:
412
+ config: The AppConfig containing resource definitions
413
+
414
+ Returns:
415
+ A list of unique user API scopes needed for OBO authentication
416
+
417
+ Example:
418
+ >>> config = AppConfig.from_file("model_config.yaml")
419
+ >>> scopes = generate_user_api_scopes(config)
420
+ >>> print(scopes)
421
+ ['sql', 'serving.serving-endpoints', 'dashboards.genie']
422
+ """
423
+ scopes: set[str] = set()
424
+
425
+ if config.resources is None:
426
+ return []
427
+
428
+ # Collect all resources that have on_behalf_of_user=True
429
+ obo_resources: list[IsDatabricksResource] = []
430
+
431
+ # Check each resource category
432
+ for llm in config.resources.llms.values():
433
+ if llm.on_behalf_of_user:
434
+ obo_resources.append(llm)
435
+
436
+ for vs in config.resources.vector_stores.values():
437
+ if vs.on_behalf_of_user:
438
+ obo_resources.append(vs)
439
+
440
+ for warehouse in config.resources.warehouses.values():
441
+ if warehouse.on_behalf_of_user:
442
+ obo_resources.append(warehouse)
443
+
444
+ for genie in config.resources.genie_rooms.values():
445
+ if genie.on_behalf_of_user:
446
+ obo_resources.append(genie)
447
+
448
+ for volume in config.resources.volumes.values():
449
+ if volume.on_behalf_of_user:
450
+ obo_resources.append(volume)
451
+
452
+ for func in config.resources.functions.values():
453
+ if func.on_behalf_of_user:
454
+ obo_resources.append(func)
455
+
456
+ for conn in config.resources.connections.values():
457
+ if conn.on_behalf_of_user:
458
+ obo_resources.append(conn)
459
+
460
+ for db in config.resources.databases.values():
461
+ if db.on_behalf_of_user:
462
+ obo_resources.append(db)
463
+
464
+ for table in config.resources.tables.values():
465
+ if table.on_behalf_of_user:
466
+ obo_resources.append(table)
467
+
468
+ # Collect api_scopes from all OBO resources and map to user_api_scopes
469
+ for resource in obo_resources:
470
+ for api_scope in resource.api_scopes:
471
+ # Map the api_scope to a valid user_api_scope
472
+ if api_scope in API_SCOPE_TO_USER_SCOPE:
473
+ user_scope = API_SCOPE_TO_USER_SCOPE[api_scope]
474
+ if user_scope in VALID_USER_API_SCOPES:
475
+ scopes.add(user_scope)
476
+ elif api_scope in VALID_USER_API_SCOPES:
477
+ # Direct match
478
+ scopes.add(api_scope)
479
+
480
+ # Always add catalog read scopes if we have any table or function access
481
+ if any(isinstance(r, (TableModel, FunctionModel)) for r in obo_resources):
482
+ scopes.add("catalog.catalogs:read")
483
+ scopes.add("catalog.schemas:read")
484
+ scopes.add("catalog.tables:read")
485
+
486
+ # Sort for consistent ordering
487
+ result = sorted(scopes)
488
+ logger.info(f"Generated {len(result)} user API scopes for OBO resources: {result}")
489
+ return result
490
+
491
+
492
+ def _sanitize_resource_name(name: str) -> str:
493
+ """
494
+ Sanitize a resource name to meet Databricks Apps requirements.
495
+
496
+ Resource names must be:
497
+ - Between 2 and 30 characters
498
+ - Only contain alphanumeric characters, hyphens, and underscores
499
+
500
+ Args:
501
+ name: The original resource name
502
+
503
+ Returns:
504
+ A sanitized name that meets the requirements
505
+ """
506
+ # Replace dots and special characters with underscores
507
+ sanitized = name.replace(".", "_").replace("-", "_")
508
+
509
+ # Remove any characters that aren't alphanumeric or underscore
510
+ sanitized = "".join(c for c in sanitized if c.isalnum() or c == "_")
511
+
512
+ # Ensure minimum length of 2
513
+ if len(sanitized) < 2:
514
+ sanitized = sanitized + "_r"
515
+
516
+ # Truncate to maximum length of 30
517
+ if len(sanitized) > 30:
518
+ sanitized = sanitized[:30]
519
+
520
+ return sanitized
521
+
522
+
523
+ def generate_sdk_resources(
524
+ config: AppConfig,
525
+ experiment_id: str | None = None,
526
+ ) -> list[AppResource]:
527
+ """
528
+ Generate Databricks SDK AppResource objects from an AppConfig.
529
+
530
+ This function extracts all resources defined in the AppConfig and converts
531
+ them to SDK AppResource objects that can be passed to the Apps API when
532
+ creating or updating an app.
533
+
534
+ Args:
535
+ config: The AppConfig containing resource definitions
536
+ experiment_id: Optional MLflow experiment ID to add as a resource.
537
+ When provided, the experiment is added with CAN_EDIT permission,
538
+ allowing the app to log traces and runs.
539
+
540
+ Returns:
541
+ A list of AppResource objects for the Databricks SDK
542
+
543
+ Example:
544
+ >>> from databricks.sdk import WorkspaceClient
545
+ >>> from databricks.sdk.service.apps import App
546
+ >>> config = AppConfig.from_file("model_config.yaml")
547
+ >>> resources = generate_sdk_resources(config, experiment_id="12345")
548
+ >>> w = WorkspaceClient()
549
+ >>> app = App(name="my-app", resources=resources)
550
+ >>> w.apps.create_and_wait(app=app)
551
+ """
552
+ resources: list[AppResource] = []
553
+
554
+ # Add experiment resource if provided
555
+ if experiment_id:
556
+ resources.append(_extract_sdk_experiment_resource(experiment_id))
557
+
558
+ if config.resources is None:
559
+ logger.debug("No resources defined in config")
560
+ return resources
561
+
562
+ # Extract SDK resources from each category
563
+ resources.extend(_extract_sdk_llm_resources(config.resources.llms))
564
+ resources.extend(_extract_sdk_warehouse_resources(config.resources.warehouses))
565
+ resources.extend(_extract_sdk_genie_resources(config.resources.genie_rooms))
566
+ resources.extend(_extract_sdk_database_resources(config.resources.databases))
567
+ resources.extend(_extract_sdk_volume_resources(config.resources.volumes))
568
+
569
+ # Extract secrets from the entire config
570
+ resources.extend(_extract_sdk_secrets_from_config(config))
571
+
572
+ # Note: Vector search indexes, functions, and connections are not yet
573
+ # supported as app resources in the SDK
574
+
575
+ logger.info(f"Generated {len(resources)} SDK app resources from config")
576
+ return resources
577
+
578
+
579
+ def _extract_sdk_llm_resources(
580
+ llms: dict[str, LLMModel],
581
+ ) -> list[AppResource]:
582
+ """Extract SDK AppResource objects for model serving endpoints."""
583
+ resources: list[AppResource] = []
584
+ for key, llm in llms.items():
585
+ sanitized_name = _sanitize_resource_name(key)
586
+ resource = AppResource(
587
+ name=sanitized_name,
588
+ description=llm.description,
589
+ serving_endpoint=AppResourceServingEndpoint(
590
+ name=llm.name,
591
+ permission=AppResourceServingEndpointServingEndpointPermission.CAN_QUERY,
592
+ ),
593
+ )
594
+ resources.append(resource)
595
+ logger.debug(
596
+ f"Extracted SDK serving endpoint resource: {sanitized_name} -> {llm.name}"
597
+ )
598
+ return resources
599
+
600
+
601
+ def _extract_sdk_warehouse_resources(
602
+ warehouses: dict[str, WarehouseModel],
603
+ ) -> list[AppResource]:
604
+ """Extract SDK AppResource objects for SQL warehouses."""
605
+ resources: list[AppResource] = []
606
+ for key, warehouse in warehouses.items():
607
+ warehouse_id = value_of(warehouse.warehouse_id)
608
+ sanitized_name = _sanitize_resource_name(key)
609
+ resource = AppResource(
610
+ name=sanitized_name,
611
+ description=warehouse.description,
612
+ sql_warehouse=AppResourceSqlWarehouse(
613
+ id=warehouse_id,
614
+ permission=AppResourceSqlWarehouseSqlWarehousePermission.CAN_USE,
615
+ ),
616
+ )
617
+ resources.append(resource)
618
+ logger.debug(
619
+ f"Extracted SDK SQL warehouse resource: {sanitized_name} -> {warehouse_id}"
620
+ )
621
+ return resources
622
+
623
+
624
+ def _extract_sdk_genie_resources(
625
+ genie_rooms: dict[str, GenieRoomModel],
626
+ ) -> list[AppResource]:
627
+ """Extract SDK AppResource objects for Genie spaces."""
628
+ resources: list[AppResource] = []
629
+ for key, genie in genie_rooms.items():
630
+ space_id = value_of(genie.space_id)
631
+ sanitized_name = _sanitize_resource_name(key)
632
+ resource = AppResource(
633
+ name=sanitized_name,
634
+ description=genie.description,
635
+ genie_space=AppResourceGenieSpace(
636
+ name=genie.name or key,
637
+ space_id=space_id,
638
+ permission=AppResourceGenieSpaceGenieSpacePermission.CAN_RUN,
639
+ ),
640
+ )
641
+ resources.append(resource)
642
+ logger.debug(
643
+ f"Extracted SDK Genie space resource: {sanitized_name} -> {space_id}"
644
+ )
645
+ return resources
646
+
647
+
648
+ def _extract_sdk_database_resources(
649
+ databases: dict[str, DatabaseModel],
650
+ ) -> list[AppResource]:
651
+ """Extract SDK AppResource objects for Lakebase databases."""
652
+ resources: list[AppResource] = []
653
+ for key, db in databases.items():
654
+ # Only include Lakebase databases (those with instance_name)
655
+ if not db.is_lakebase:
656
+ continue
657
+ sanitized_name = _sanitize_resource_name(key)
658
+ # Use db.database for the actual database name (defaults to "databricks_postgres")
659
+ # db.name is just the config key/description, not the actual database name
660
+ database_name = value_of(db.database) if db.database else "databricks_postgres"
661
+ resource = AppResource(
662
+ name=sanitized_name,
663
+ description=db.description,
664
+ database=AppResourceDatabase(
665
+ instance_name=db.instance_name,
666
+ database_name=database_name,
667
+ permission=AppResourceDatabaseDatabasePermission.CAN_CONNECT_AND_CREATE,
668
+ ),
669
+ )
670
+ resources.append(resource)
671
+ logger.debug(
672
+ f"Extracted SDK database resource: {sanitized_name} -> "
673
+ f"{db.instance_name}/{database_name}"
674
+ )
675
+ return resources
676
+
677
+
678
+ def _extract_sdk_volume_resources(
679
+ volumes: dict[str, VolumeModel],
680
+ ) -> list[AppResource]:
681
+ """Extract SDK AppResource objects for Unity Catalog volumes."""
682
+ resources: list[AppResource] = []
683
+ for key, volume in volumes.items():
684
+ sanitized_name = _sanitize_resource_name(key)
685
+ resource = AppResource(
686
+ name=sanitized_name,
687
+ uc_securable=AppResourceUcSecurable(
688
+ securable_full_name=volume.full_name,
689
+ securable_type=AppResourceUcSecurableUcSecurableType.VOLUME,
690
+ permission=AppResourceUcSecurableUcSecurablePermission.READ_VOLUME,
691
+ ),
692
+ )
693
+ resources.append(resource)
694
+ logger.debug(
695
+ f"Extracted SDK volume resource: {sanitized_name} -> {volume.full_name}"
696
+ )
697
+ return resources
698
+
699
+
700
+ def _extract_sdk_experiment_resource(
701
+ experiment_id: str,
702
+ resource_name: str = "experiment",
703
+ ) -> AppResource:
704
+ """Create SDK AppResource for MLflow experiment.
705
+
706
+ This allows the Databricks App to log traces and runs to the specified
707
+ MLflow experiment. The experiment ID is exposed via the MLFLOW_EXPERIMENT_ID
708
+ environment variable using valueFrom: experiment in app.yaml.
709
+
710
+ Args:
711
+ experiment_id: The MLflow experiment ID
712
+ resource_name: The resource key name (default: "experiment")
713
+
714
+ Returns:
715
+ An AppResource for the MLflow experiment
716
+ """
717
+ resource = AppResource(
718
+ name=resource_name,
719
+ experiment=AppResourceExperiment(
720
+ experiment_id=experiment_id,
721
+ permission=AppResourceExperimentExperimentPermission.CAN_EDIT,
722
+ ),
723
+ )
724
+ logger.debug(
725
+ f"Extracted SDK experiment resource: {resource_name} -> {experiment_id}"
726
+ )
727
+ return resource
728
+
729
+
730
+ def _extract_sdk_secrets_from_config(config: AppConfig) -> list[AppResource]:
731
+ """
732
+ Extract SDK AppResource objects for all secrets referenced in the config.
733
+
734
+ This function walks through the entire config object to find all
735
+ SecretVariableModel instances and creates AppResource objects with
736
+ READ permission for each unique scope/key pair.
737
+
738
+ Args:
739
+ config: The AppConfig containing secret references
740
+
741
+ Returns:
742
+ A list of AppResource objects for secrets
743
+ """
744
+ secrets: dict[tuple[str, str], AppResource] = {}
745
+
746
+ def extract_from_value(value: Any) -> None:
747
+ """Recursively extract secrets from any value."""
748
+ if isinstance(value, SecretVariableModel):
749
+ secret_key = (value.scope, value.secret)
750
+ if secret_key not in secrets:
751
+ # Create a unique name for the secret resource
752
+ resource_name = f"{value.scope}_{value.secret}".replace(
753
+ "-", "_"
754
+ ).replace("/", "_")
755
+ resource_name = _sanitize_resource_name(resource_name)
756
+
757
+ resource = AppResource(
758
+ name=resource_name,
759
+ secret=AppResourceSecret(
760
+ scope=value.scope,
761
+ key=value.secret,
762
+ permission=AppResourceSecretSecretPermission.READ,
763
+ ),
764
+ )
765
+ secrets[secret_key] = resource
766
+ logger.debug(
767
+ f"Found secret for SDK resource: {value.scope}/{value.secret}"
768
+ )
769
+ elif isinstance(value, dict):
770
+ for v in value.values():
771
+ extract_from_value(v)
772
+ elif isinstance(value, (list, tuple)):
773
+ for v in value:
774
+ extract_from_value(v)
775
+ elif hasattr(value, "__dict__"):
776
+ # Handle Pydantic models and other objects with __dict__
777
+ for k, v in value.__dict__.items():
778
+ if not k.startswith("_"): # Skip private attributes
779
+ extract_from_value(v)
780
+
781
+ # Walk through the entire config
782
+ extract_from_value(config)
783
+
784
+ resources = list(secrets.values())
785
+ logger.info(f"Extracted {len(resources)} SDK secret resources from config")
786
+ return resources
787
+
788
+
789
+ def generate_resources_yaml(config: AppConfig) -> str:
790
+ """
791
+ Generate the resources section of app.yaml as a YAML string.
792
+
793
+ Args:
794
+ config: The AppConfig containing resource definitions
795
+
796
+ Returns:
797
+ A YAML-formatted string for the resources section
798
+ """
799
+ import yaml
800
+
801
+ resources = generate_app_resources(config)
802
+ if not resources:
803
+ return ""
804
+
805
+ return yaml.dump(
806
+ {"resources": resources}, default_flow_style=False, sort_keys=False
807
+ )
808
+
809
+
810
+ def _extract_env_vars_from_config(config: AppConfig) -> list[dict[str, str]]:
811
+ """
812
+ Extract environment variables from config.app.environment_vars for app.yaml.
813
+
814
+ This function converts the environment_vars dict from AppConfig into the
815
+ format expected by Databricks Apps. For each variable:
816
+ - EnvironmentVariableModel: Creates env var with "value" (the env var name)
817
+ - SecretVariableModel: Creates env var with "valueFrom" referencing the secret resource
818
+ - CompositeVariableModel: Uses the first option in the list to determine the type
819
+ - Plain strings: Creates env var with "value"
820
+
821
+ Args:
822
+ config: The AppConfig containing environment variable definitions
823
+
824
+ Returns:
825
+ A list of environment variable dictionaries for app.yaml
826
+
827
+ Example:
828
+ >>> config = AppConfig.from_file("model_config.yaml")
829
+ >>> env_vars = _extract_env_vars_from_config(config)
830
+ >>> # Returns:
831
+ >>> # [
832
+ >>> # {"name": "API_KEY", "valueFrom": "my_scope_api_key"},
833
+ >>> # {"name": "LOG_LEVEL", "value": "INFO"},
834
+ >>> # ]
835
+ """
836
+ env_vars: list[dict[str, str]] = []
837
+
838
+ if config.app is None:
839
+ return env_vars
840
+
841
+ environment_vars = config.app.environment_vars
842
+ if not environment_vars:
843
+ return env_vars
844
+
845
+ for var_name, var_value in environment_vars.items():
846
+ env_entry: dict[str, str] = {"name": var_name}
847
+
848
+ # Determine the type of the variable and create appropriate entry
849
+ resolved_type = _resolve_variable_type(var_value)
850
+
851
+ if resolved_type is None:
852
+ # Plain value - use as-is
853
+ if isinstance(var_value, str):
854
+ env_entry["value"] = var_value
855
+ else:
856
+ env_entry["value"] = str(var_value)
857
+ elif isinstance(resolved_type, SecretVariableModel):
858
+ # Secret reference - use valueFrom with sanitized resource name
859
+ resource_name = f"{resolved_type.scope}_{resolved_type.secret}".replace(
860
+ "-", "_"
861
+ ).replace("/", "_")
862
+ resource_name = _sanitize_resource_name(resource_name)
863
+ env_entry["valueFrom"] = resource_name
864
+ logger.debug(
865
+ f"Environment variable {var_name} references secret: "
866
+ f"{resolved_type.scope}/{resolved_type.secret}"
867
+ )
868
+ elif isinstance(resolved_type, EnvironmentVariableModel):
869
+ # Environment variable - resolve the value
870
+ resolved_value = value_of(resolved_type)
871
+ if resolved_value is not None:
872
+ env_entry["value"] = str(resolved_value)
873
+ elif resolved_type.default_value is not None:
874
+ env_entry["value"] = str(resolved_type.default_value)
875
+ else:
876
+ # Skip if no value can be resolved
877
+ logger.warning(
878
+ f"Environment variable {var_name} has no value "
879
+ f"(env: {resolved_type.env})"
880
+ )
881
+ continue
882
+ else:
883
+ # Other types - convert to string
884
+ env_entry["value"] = str(var_value)
885
+
886
+ env_vars.append(env_entry)
887
+ logger.debug(f"Extracted environment variable: {var_name}")
888
+
889
+ logger.info(f"Extracted {len(env_vars)} environment variables from config")
890
+ return env_vars
891
+
892
+
893
+ def _resolve_variable_type(
894
+ value: Any,
895
+ ) -> SecretVariableModel | EnvironmentVariableModel | None:
896
+ """
897
+ Resolve the type of a variable for environment variable extraction.
898
+
899
+ For CompositeVariableModel, returns the first option in the list to
900
+ determine whether to use value or valueFrom in the app.yaml.
901
+
902
+ Args:
903
+ value: The variable value to analyze
904
+
905
+ Returns:
906
+ The resolved variable model (SecretVariableModel or EnvironmentVariableModel),
907
+ or None if it's a plain value
908
+ """
909
+ if isinstance(value, SecretVariableModel):
910
+ return value
911
+ elif isinstance(value, EnvironmentVariableModel):
912
+ return value
913
+ elif isinstance(value, CompositeVariableModel):
914
+ # Use the first option to determine the type
915
+ if value.options:
916
+ first_option = value.options[0]
917
+ return _resolve_variable_type(first_option)
918
+ return None
919
+ else:
920
+ # Plain value (str, int, etc.) or PrimitiveVariableModel
921
+ return None
922
+
923
+
924
+ def generate_app_yaml(
925
+ config: AppConfig,
926
+ command: str | list[str] | None = None,
927
+ include_resources: bool = True,
928
+ ) -> str:
929
+ """
930
+ Generate a complete app.yaml for Databricks Apps deployment.
931
+
932
+ This function creates a complete app.yaml configuration file that includes:
933
+ - Command to run the app
934
+ - Environment variables for MLflow and dao-ai
935
+ - Resources extracted from the AppConfig (if include_resources is True)
936
+
937
+ Args:
938
+ config: The AppConfig containing deployment configuration
939
+ command: Optional custom command. If not provided, uses default dao-ai app_server
940
+ include_resources: Whether to include the resources section (default: True)
941
+
942
+ Returns:
943
+ A complete app.yaml as a string
944
+
945
+ Example:
946
+ >>> config = AppConfig.from_file("model_config.yaml")
947
+ >>> app_yaml = generate_app_yaml(config)
948
+ >>> print(app_yaml)
949
+ """
950
+ import yaml
951
+
952
+ # Build the app.yaml structure
953
+ app_config: dict[str, Any] = {}
954
+
955
+ # Command section
956
+ if command is None:
957
+ app_config["command"] = [
958
+ "/bin/bash",
959
+ "-c",
960
+ "pip install dao-ai && python -m dao_ai.apps.server",
961
+ ]
962
+ elif isinstance(command, str):
963
+ app_config["command"] = [command]
964
+ else:
965
+ app_config["command"] = command
966
+
967
+ # Base environment variables for MLflow and dao-ai
968
+ env_vars: list[dict[str, str]] = [
969
+ {"name": "MLFLOW_TRACKING_URI", "value": "databricks"},
970
+ {"name": "MLFLOW_REGISTRY_URI", "value": "databricks-uc"},
971
+ {"name": "MLFLOW_EXPERIMENT_ID", "valueFrom": "experiment"},
972
+ {"name": "DAO_AI_CONFIG_PATH", "value": "model_config.yaml"},
973
+ ]
974
+
975
+ # Extract environment variables from config.app.environment_vars
976
+ config_env_vars = _extract_env_vars_from_config(config)
977
+
978
+ # Merge config env vars, avoiding duplicates (config takes precedence)
979
+ base_env_names = {e["name"] for e in env_vars}
980
+ for config_env in config_env_vars:
981
+ if config_env["name"] not in base_env_names:
982
+ env_vars.append(config_env)
983
+ else:
984
+ # Config env var takes precedence - replace the base one
985
+ env_vars = [e for e in env_vars if e["name"] != config_env["name"]]
986
+ env_vars.append(config_env)
987
+
988
+ app_config["env"] = env_vars
989
+
990
+ # Resources section (if requested)
991
+ if include_resources:
992
+ resources = generate_app_resources(config)
993
+ if resources:
994
+ app_config["resources"] = resources
995
+
996
+ return yaml.dump(app_config, default_flow_style=False, sort_keys=False)
997
+
998
+
999
+ def get_resource_env_mappings(config: AppConfig) -> list[dict[str, Any]]:
1000
+ """
1001
+ Generate environment variable mappings that reference app resources.
1002
+
1003
+ This creates environment variables that use `valueFrom` to reference
1004
+ configured resources, allowing the app to access resource values at runtime.
1005
+
1006
+ Args:
1007
+ config: The AppConfig containing resource definitions
1008
+
1009
+ Returns:
1010
+ A list of environment variable definitions with valueFrom references
1011
+
1012
+ Example:
1013
+ >>> env_vars = get_resource_env_mappings(config)
1014
+ >>> # Returns:
1015
+ >>> # [
1016
+ >>> # {"name": "SQL_WAREHOUSE_ID", "valueFrom": "default_warehouse"},
1017
+ >>> # ...
1018
+ >>> # ]
1019
+ """
1020
+ env_mappings: list[dict[str, Any]] = []
1021
+
1022
+ if config.resources is None:
1023
+ return env_mappings
1024
+
1025
+ # Map warehouse IDs
1026
+ for key, warehouse in config.resources.warehouses.items():
1027
+ env_mappings.append(
1028
+ {
1029
+ "name": f"{key.upper()}_WAREHOUSE_ID",
1030
+ "valueFrom": key,
1031
+ }
1032
+ )
1033
+
1034
+ # Map serving endpoint names
1035
+ for key, llm in config.resources.llms.items():
1036
+ env_mappings.append(
1037
+ {
1038
+ "name": f"{key.upper()}_ENDPOINT",
1039
+ "valueFrom": key,
1040
+ }
1041
+ )
1042
+
1043
+ # Map Genie space IDs
1044
+ for key, genie in config.resources.genie_rooms.items():
1045
+ env_mappings.append(
1046
+ {
1047
+ "name": f"{key.upper()}_SPACE_ID",
1048
+ "valueFrom": key,
1049
+ }
1050
+ )
1051
+
1052
+ # Map vector search indexes
1053
+ for key, vs in config.resources.vector_stores.items():
1054
+ if vs.index:
1055
+ env_mappings.append(
1056
+ {
1057
+ "name": f"{key.upper()}_INDEX",
1058
+ "valueFrom": key,
1059
+ }
1060
+ )
1061
+
1062
+ # Map database instances
1063
+ for key, db in config.resources.databases.items():
1064
+ if db.is_lakebase:
1065
+ env_mappings.append(
1066
+ {
1067
+ "name": f"{key.upper()}_DATABASE",
1068
+ "valueFrom": key,
1069
+ }
1070
+ )
1071
+
1072
+ return env_mappings