dao-ai 0.1.9__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1029 @@
1
+ """
2
+ App resources module for generating Databricks App resource configurations.
3
+
4
+ This module provides utilities to dynamically discover and generate Databricks App
5
+ resource configurations from dao-ai AppConfig. Resources are extracted from the
6
+ config and converted to the format expected by Databricks Apps.
7
+
8
+ Databricks Apps resource documentation:
9
+ https://learn.microsoft.com/en-us/azure/databricks/dev-tools/databricks-apps/resources
10
+
11
+ Supported resource types and their mappings:
12
+ - LLMModel → serving-endpoint (Model Serving Endpoint)
13
+ - VectorStoreModel/IndexModel → vector-search-index (via UC Securable - not yet supported)
14
+ - WarehouseModel → sql-warehouse
15
+ - GenieRoomModel → genie-space
16
+ - VolumeModel → volume (via UC Securable)
17
+ - FunctionModel → function (via UC Securable - not yet supported)
18
+ - ConnectionModel → connection (not yet supported in SDK)
19
+ - DatabaseModel → database (Lakebase)
20
+ - DatabricksAppModel → app (not yet supported in SDK)
21
+
22
+ Usage:
23
+ from dao_ai.apps.resources import generate_app_resources, generate_sdk_resources
24
+ from dao_ai.config import AppConfig
25
+
26
+ config = AppConfig.from_file("model_config.yaml")
27
+
28
+ # For SDK-based deployment (recommended)
29
+ sdk_resources = generate_sdk_resources(config)
30
+
31
+ # For YAML-based documentation
32
+ resources = generate_app_resources(config)
33
+ """
34
+
35
+ from typing import Any
36
+
37
+ from databricks.sdk.service.apps import (
38
+ AppResource,
39
+ AppResourceDatabase,
40
+ AppResourceDatabaseDatabasePermission,
41
+ AppResourceGenieSpace,
42
+ AppResourceGenieSpaceGenieSpacePermission,
43
+ AppResourceSecret,
44
+ AppResourceSecretSecretPermission,
45
+ AppResourceServingEndpoint,
46
+ AppResourceServingEndpointServingEndpointPermission,
47
+ AppResourceSqlWarehouse,
48
+ AppResourceSqlWarehouseSqlWarehousePermission,
49
+ AppResourceUcSecurable,
50
+ AppResourceUcSecurableUcSecurablePermission,
51
+ AppResourceUcSecurableUcSecurableType,
52
+ )
53
+ from loguru import logger
54
+
55
+ from dao_ai.config import (
56
+ AppConfig,
57
+ CompositeVariableModel,
58
+ ConnectionModel,
59
+ DatabaseModel,
60
+ DatabricksAppModel,
61
+ EnvironmentVariableModel,
62
+ FunctionModel,
63
+ GenieRoomModel,
64
+ IsDatabricksResource,
65
+ LLMModel,
66
+ SecretVariableModel,
67
+ TableModel,
68
+ VectorStoreModel,
69
+ VolumeModel,
70
+ WarehouseModel,
71
+ value_of,
72
+ )
73
+
74
+ # Resource type mappings from dao-ai to Databricks Apps
75
+ RESOURCE_TYPE_MAPPING: dict[type, str] = {
76
+ LLMModel: "serving-endpoint",
77
+ VectorStoreModel: "vector-search-index",
78
+ WarehouseModel: "sql-warehouse",
79
+ GenieRoomModel: "genie-space",
80
+ VolumeModel: "volume",
81
+ FunctionModel: "function",
82
+ ConnectionModel: "connection",
83
+ DatabaseModel: "database",
84
+ DatabricksAppModel: "app",
85
+ }
86
+
87
+ # Default permissions for each resource type
88
+ DEFAULT_PERMISSIONS: dict[str, list[str]] = {
89
+ "serving-endpoint": ["CAN_QUERY"],
90
+ "vector-search-index": ["CAN_SELECT"],
91
+ "sql-warehouse": ["CAN_USE"],
92
+ "genie-space": ["CAN_RUN"],
93
+ "volume": ["CAN_READ"],
94
+ "function": ["CAN_EXECUTE"],
95
+ "connection": ["USE_CONNECTION"],
96
+ "database": ["CAN_CONNECT_AND_CREATE"],
97
+ "app": ["CAN_VIEW"],
98
+ }
99
+
100
+ # Valid user API scopes for Databricks Apps
101
+ # These are the only scopes that can be requested for on-behalf-of-user access
102
+ VALID_USER_API_SCOPES: set[str] = {
103
+ "sql",
104
+ "serving.serving-endpoints",
105
+ "vectorsearch.vector-search-indexes",
106
+ "files.files",
107
+ "dashboards.genie",
108
+ "catalog.connections",
109
+ "catalog.catalogs:read",
110
+ "catalog.schemas:read",
111
+ "catalog.tables:read",
112
+ }
113
+
114
+ # Mapping from resource api_scopes to valid user_api_scopes
115
+ # Some resource scopes map directly, others need translation
116
+ API_SCOPE_TO_USER_SCOPE: dict[str, str] = {
117
+ # Direct mappings
118
+ "serving.serving-endpoints": "serving.serving-endpoints",
119
+ "vectorsearch.vector-search-indexes": "vectorsearch.vector-search-indexes",
120
+ "files.files": "files.files",
121
+ "dashboards.genie": "dashboards.genie",
122
+ "catalog.connections": "catalog.connections",
123
+ # SQL-related scopes map to "sql"
124
+ "sql.warehouses": "sql",
125
+ "sql.statement-execution": "sql",
126
+ # Vector search endpoints also need serving
127
+ "vectorsearch.vector-search-endpoints": "serving.serving-endpoints",
128
+ # Catalog scopes
129
+ "catalog.volumes": "files.files",
130
+ }
131
+
132
+
133
+ def _extract_llm_resources(
134
+ llms: dict[str, LLMModel],
135
+ ) -> list[dict[str, Any]]:
136
+ """Extract model serving endpoint resources from LLMModels."""
137
+ resources: list[dict[str, Any]] = []
138
+ for idx, (key, llm) in enumerate(llms.items()):
139
+ resource: dict[str, Any] = {
140
+ "name": key,
141
+ "type": "serving-endpoint",
142
+ "serving_endpoint_name": llm.name,
143
+ "permissions": [
144
+ {"level": p} for p in DEFAULT_PERMISSIONS["serving-endpoint"]
145
+ ],
146
+ }
147
+ resources.append(resource)
148
+ logger.debug(f"Extracted serving endpoint resource: {key} -> {llm.name}")
149
+ return resources
150
+
151
+
152
+ def _extract_vector_search_resources(
153
+ vector_stores: dict[str, VectorStoreModel],
154
+ ) -> list[dict[str, Any]]:
155
+ """Extract vector search index resources from VectorStoreModels."""
156
+ resources: list[dict[str, Any]] = []
157
+ for key, vs in vector_stores.items():
158
+ if vs.index is None:
159
+ continue
160
+ resource: dict[str, Any] = {
161
+ "name": key,
162
+ "type": "vector-search-index",
163
+ "vector_search_index_name": vs.index.full_name,
164
+ "permissions": [
165
+ {"level": p} for p in DEFAULT_PERMISSIONS["vector-search-index"]
166
+ ],
167
+ }
168
+ resources.append(resource)
169
+ logger.debug(f"Extracted vector search resource: {key} -> {vs.index.full_name}")
170
+ return resources
171
+
172
+
173
+ def _extract_warehouse_resources(
174
+ warehouses: dict[str, WarehouseModel],
175
+ ) -> list[dict[str, Any]]:
176
+ """Extract SQL warehouse resources from WarehouseModels."""
177
+ resources: list[dict[str, Any]] = []
178
+ for key, warehouse in warehouses.items():
179
+ warehouse_id = value_of(warehouse.warehouse_id)
180
+ resource: dict[str, Any] = {
181
+ "name": key,
182
+ "type": "sql-warehouse",
183
+ "sql_warehouse_id": warehouse_id,
184
+ "permissions": [{"level": p} for p in DEFAULT_PERMISSIONS["sql-warehouse"]],
185
+ }
186
+ resources.append(resource)
187
+ logger.debug(f"Extracted SQL warehouse resource: {key} -> {warehouse_id}")
188
+ return resources
189
+
190
+
191
+ def _extract_genie_resources(
192
+ genie_rooms: dict[str, GenieRoomModel],
193
+ ) -> list[dict[str, Any]]:
194
+ """Extract Genie space resources from GenieRoomModels."""
195
+ resources: list[dict[str, Any]] = []
196
+ for key, genie in genie_rooms.items():
197
+ space_id = value_of(genie.space_id)
198
+ resource: dict[str, Any] = {
199
+ "name": key,
200
+ "type": "genie-space",
201
+ "genie_space_id": space_id,
202
+ "permissions": [{"level": p} for p in DEFAULT_PERMISSIONS["genie-space"]],
203
+ }
204
+ resources.append(resource)
205
+ logger.debug(f"Extracted Genie space resource: {key} -> {space_id}")
206
+ return resources
207
+
208
+
209
+ def _extract_volume_resources(
210
+ volumes: dict[str, VolumeModel],
211
+ ) -> list[dict[str, Any]]:
212
+ """Extract UC Volume resources from VolumeModels."""
213
+ resources: list[dict[str, Any]] = []
214
+ for key, volume in volumes.items():
215
+ resource: dict[str, Any] = {
216
+ "name": key,
217
+ "type": "volume",
218
+ "volume_name": volume.full_name,
219
+ "permissions": [{"level": p} for p in DEFAULT_PERMISSIONS["volume"]],
220
+ }
221
+ resources.append(resource)
222
+ logger.debug(f"Extracted volume resource: {key} -> {volume.full_name}")
223
+ return resources
224
+
225
+
226
+ def _extract_function_resources(
227
+ functions: dict[str, FunctionModel],
228
+ ) -> list[dict[str, Any]]:
229
+ """Extract UC Function resources from FunctionModels."""
230
+ resources: list[dict[str, Any]] = []
231
+ for key, func in functions.items():
232
+ resource: dict[str, Any] = {
233
+ "name": key,
234
+ "type": "function",
235
+ "function_name": func.full_name,
236
+ "permissions": [{"level": p} for p in DEFAULT_PERMISSIONS["function"]],
237
+ }
238
+ resources.append(resource)
239
+ logger.debug(f"Extracted function resource: {key} -> {func.full_name}")
240
+ return resources
241
+
242
+
243
+ def _extract_connection_resources(
244
+ connections: dict[str, ConnectionModel],
245
+ ) -> list[dict[str, Any]]:
246
+ """Extract UC Connection resources from ConnectionModels."""
247
+ resources: list[dict[str, Any]] = []
248
+ for key, conn in connections.items():
249
+ resource: dict[str, Any] = {
250
+ "name": key,
251
+ "type": "connection",
252
+ "connection_name": conn.name,
253
+ "permissions": [{"level": p} for p in DEFAULT_PERMISSIONS["connection"]],
254
+ }
255
+ resources.append(resource)
256
+ logger.debug(f"Extracted connection resource: {key} -> {conn.name}")
257
+ return resources
258
+
259
+
260
+ def _extract_database_resources(
261
+ databases: dict[str, DatabaseModel],
262
+ ) -> list[dict[str, Any]]:
263
+ """Extract Lakebase database resources from DatabaseModels."""
264
+ resources: list[dict[str, Any]] = []
265
+ for key, db in databases.items():
266
+ # Only include Lakebase databases (those with instance_name)
267
+ if not db.is_lakebase:
268
+ continue
269
+ resource: dict[str, Any] = {
270
+ "name": key,
271
+ "type": "database",
272
+ "database_instance_name": db.instance_name,
273
+ "permissions": [{"level": p} for p in DEFAULT_PERMISSIONS["database"]],
274
+ }
275
+ resources.append(resource)
276
+ logger.debug(f"Extracted database resource: {key} -> {db.instance_name}")
277
+ return resources
278
+
279
+
280
+ def _extract_app_resources(
281
+ apps: dict[str, DatabricksAppModel],
282
+ ) -> list[dict[str, Any]]:
283
+ """Extract Databricks App resources from DatabricksAppModels."""
284
+ resources: list[dict[str, Any]] = []
285
+ for key, app in apps.items():
286
+ resource: dict[str, Any] = {
287
+ "name": key,
288
+ "type": "app",
289
+ "app_name": app.name,
290
+ "permissions": [{"level": p} for p in DEFAULT_PERMISSIONS["app"]],
291
+ }
292
+ resources.append(resource)
293
+ logger.debug(f"Extracted app resource: {key} -> {app.name}")
294
+ return resources
295
+
296
+
297
+ def _extract_secrets_from_config(config: AppConfig) -> list[dict[str, Any]]:
298
+ """
299
+ Extract all secrets referenced in the config as resources.
300
+
301
+ This function walks through the entire config object to find all
302
+ SecretVariableModel instances and extracts their scope and key.
303
+
304
+ Args:
305
+ config: The AppConfig containing secret references
306
+
307
+ Returns:
308
+ A list of secret resource dictionaries with unique scope/key pairs
309
+ """
310
+ secrets: dict[tuple[str, str], dict[str, Any]] = {}
311
+
312
+ def extract_from_value(value: Any, path: str = "") -> None:
313
+ """Recursively extract secrets from any value."""
314
+ if isinstance(value, SecretVariableModel):
315
+ secret_key = (value.scope, value.secret)
316
+ if secret_key not in secrets:
317
+ # Create a unique name for the secret resource
318
+ resource_name = f"{value.scope}_{value.secret}".replace(
319
+ "-", "_"
320
+ ).replace("/", "_")
321
+ secrets[secret_key] = {
322
+ "name": resource_name,
323
+ "type": "secret",
324
+ "scope": value.scope,
325
+ "key": value.secret,
326
+ "permissions": [{"level": "READ"}],
327
+ }
328
+ logger.debug(f"Found secret: {value.scope}/{value.secret} at {path}")
329
+ elif isinstance(value, dict):
330
+ for k, v in value.items():
331
+ extract_from_value(v, f"{path}.{k}" if path else k)
332
+ elif isinstance(value, (list, tuple)):
333
+ for i, v in enumerate(value):
334
+ extract_from_value(v, f"{path}[{i}]")
335
+ elif hasattr(value, "__dict__"):
336
+ # Handle Pydantic models and other objects with __dict__
337
+ for k, v in value.__dict__.items():
338
+ if not k.startswith("_"): # Skip private attributes
339
+ extract_from_value(v, f"{path}.{k}" if path else k)
340
+
341
+ # Walk through the entire config
342
+ extract_from_value(config)
343
+
344
+ resources = list(secrets.values())
345
+ logger.info(f"Extracted {len(resources)} secret resources from config")
346
+ return resources
347
+
348
+
349
+ def generate_app_resources(config: AppConfig) -> list[dict[str, Any]]:
350
+ """
351
+ Generate Databricks App resource configurations from an AppConfig.
352
+
353
+ This function extracts all resources defined in the AppConfig and converts
354
+ them to the format expected by Databricks Apps. Resources are used to
355
+ grant the app's service principal access to Databricks platform features.
356
+
357
+ Args:
358
+ config: The AppConfig containing resource definitions
359
+
360
+ Returns:
361
+ A list of resource dictionaries in Databricks Apps format
362
+
363
+ Example:
364
+ >>> config = AppConfig.from_file("model_config.yaml")
365
+ >>> resources = generate_app_resources(config)
366
+ >>> print(resources)
367
+ [
368
+ {
369
+ "name": "default_llm",
370
+ "type": "serving-endpoint",
371
+ "serving_endpoint_name": "databricks-claude-sonnet-4",
372
+ "permissions": [{"level": "CAN_QUERY"}]
373
+ },
374
+ ...
375
+ ]
376
+ """
377
+ resources: list[dict[str, Any]] = []
378
+
379
+ if config.resources is None:
380
+ logger.debug("No resources defined in config")
381
+ return resources
382
+
383
+ # Extract resources from each category
384
+ resources.extend(_extract_llm_resources(config.resources.llms))
385
+ resources.extend(_extract_vector_search_resources(config.resources.vector_stores))
386
+ resources.extend(_extract_warehouse_resources(config.resources.warehouses))
387
+ resources.extend(_extract_genie_resources(config.resources.genie_rooms))
388
+ resources.extend(_extract_volume_resources(config.resources.volumes))
389
+ resources.extend(_extract_function_resources(config.resources.functions))
390
+ resources.extend(_extract_connection_resources(config.resources.connections))
391
+ resources.extend(_extract_database_resources(config.resources.databases))
392
+ resources.extend(_extract_app_resources(config.resources.apps))
393
+
394
+ # Extract secrets from the entire config
395
+ resources.extend(_extract_secrets_from_config(config))
396
+
397
+ logger.info(f"Generated {len(resources)} app resources from config")
398
+ return resources
399
+
400
+
401
+ def generate_user_api_scopes(config: AppConfig) -> list[str]:
402
+ """
403
+ Generate user API scopes from resources with on_behalf_of_user=True.
404
+
405
+ This function examines all resources in the config and collects the
406
+ API scopes needed for on-behalf-of-user authentication. Only valid
407
+ user API scopes are returned.
408
+
409
+ Args:
410
+ config: The AppConfig containing resource definitions
411
+
412
+ Returns:
413
+ A list of unique user API scopes needed for OBO authentication
414
+
415
+ Example:
416
+ >>> config = AppConfig.from_file("model_config.yaml")
417
+ >>> scopes = generate_user_api_scopes(config)
418
+ >>> print(scopes)
419
+ ['sql', 'serving.serving-endpoints', 'dashboards.genie']
420
+ """
421
+ scopes: set[str] = set()
422
+
423
+ if config.resources is None:
424
+ return []
425
+
426
+ # Collect all resources that have on_behalf_of_user=True
427
+ obo_resources: list[IsDatabricksResource] = []
428
+
429
+ # Check each resource category
430
+ for llm in config.resources.llms.values():
431
+ if llm.on_behalf_of_user:
432
+ obo_resources.append(llm)
433
+
434
+ for vs in config.resources.vector_stores.values():
435
+ if vs.on_behalf_of_user:
436
+ obo_resources.append(vs)
437
+
438
+ for warehouse in config.resources.warehouses.values():
439
+ if warehouse.on_behalf_of_user:
440
+ obo_resources.append(warehouse)
441
+
442
+ for genie in config.resources.genie_rooms.values():
443
+ if genie.on_behalf_of_user:
444
+ obo_resources.append(genie)
445
+
446
+ for volume in config.resources.volumes.values():
447
+ if volume.on_behalf_of_user:
448
+ obo_resources.append(volume)
449
+
450
+ for func in config.resources.functions.values():
451
+ if func.on_behalf_of_user:
452
+ obo_resources.append(func)
453
+
454
+ for conn in config.resources.connections.values():
455
+ if conn.on_behalf_of_user:
456
+ obo_resources.append(conn)
457
+
458
+ for db in config.resources.databases.values():
459
+ if db.on_behalf_of_user:
460
+ obo_resources.append(db)
461
+
462
+ for table in config.resources.tables.values():
463
+ if table.on_behalf_of_user:
464
+ obo_resources.append(table)
465
+
466
+ # Collect api_scopes from all OBO resources and map to user_api_scopes
467
+ for resource in obo_resources:
468
+ for api_scope in resource.api_scopes:
469
+ # Map the api_scope to a valid user_api_scope
470
+ if api_scope in API_SCOPE_TO_USER_SCOPE:
471
+ user_scope = API_SCOPE_TO_USER_SCOPE[api_scope]
472
+ if user_scope in VALID_USER_API_SCOPES:
473
+ scopes.add(user_scope)
474
+ elif api_scope in VALID_USER_API_SCOPES:
475
+ # Direct match
476
+ scopes.add(api_scope)
477
+
478
+ # Always add catalog read scopes if we have any table or function access
479
+ if any(isinstance(r, (TableModel, FunctionModel)) for r in obo_resources):
480
+ scopes.add("catalog.catalogs:read")
481
+ scopes.add("catalog.schemas:read")
482
+ scopes.add("catalog.tables:read")
483
+
484
+ # Sort for consistent ordering
485
+ result = sorted(scopes)
486
+ logger.info(f"Generated {len(result)} user API scopes for OBO resources: {result}")
487
+ return result
488
+
489
+
490
+ def _sanitize_resource_name(name: str) -> str:
491
+ """
492
+ Sanitize a resource name to meet Databricks Apps requirements.
493
+
494
+ Resource names must be:
495
+ - Between 2 and 30 characters
496
+ - Only contain alphanumeric characters, hyphens, and underscores
497
+
498
+ Args:
499
+ name: The original resource name
500
+
501
+ Returns:
502
+ A sanitized name that meets the requirements
503
+ """
504
+ # Replace dots and special characters with underscores
505
+ sanitized = name.replace(".", "_").replace("-", "_")
506
+
507
+ # Remove any characters that aren't alphanumeric or underscore
508
+ sanitized = "".join(c for c in sanitized if c.isalnum() or c == "_")
509
+
510
+ # Ensure minimum length of 2
511
+ if len(sanitized) < 2:
512
+ sanitized = sanitized + "_r"
513
+
514
+ # Truncate to maximum length of 30
515
+ if len(sanitized) > 30:
516
+ sanitized = sanitized[:30]
517
+
518
+ return sanitized
519
+
520
+
521
+ def generate_sdk_resources(config: AppConfig) -> list[AppResource]:
522
+ """
523
+ Generate Databricks SDK AppResource objects from an AppConfig.
524
+
525
+ This function extracts all resources defined in the AppConfig and converts
526
+ them to SDK AppResource objects that can be passed to the Apps API when
527
+ creating or updating an app.
528
+
529
+ Args:
530
+ config: The AppConfig containing resource definitions
531
+
532
+ Returns:
533
+ A list of AppResource objects for the Databricks SDK
534
+
535
+ Example:
536
+ >>> from databricks.sdk import WorkspaceClient
537
+ >>> from databricks.sdk.service.apps import App
538
+ >>> config = AppConfig.from_file("model_config.yaml")
539
+ >>> resources = generate_sdk_resources(config)
540
+ >>> w = WorkspaceClient()
541
+ >>> app = App(name="my-app", resources=resources)
542
+ >>> w.apps.create_and_wait(app=app)
543
+ """
544
+ resources: list[AppResource] = []
545
+
546
+ if config.resources is None:
547
+ logger.debug("No resources defined in config")
548
+ return resources
549
+
550
+ # Extract SDK resources from each category
551
+ resources.extend(_extract_sdk_llm_resources(config.resources.llms))
552
+ resources.extend(_extract_sdk_warehouse_resources(config.resources.warehouses))
553
+ resources.extend(_extract_sdk_genie_resources(config.resources.genie_rooms))
554
+ resources.extend(_extract_sdk_database_resources(config.resources.databases))
555
+ resources.extend(_extract_sdk_volume_resources(config.resources.volumes))
556
+
557
+ # Extract secrets from the entire config
558
+ resources.extend(_extract_sdk_secrets_from_config(config))
559
+
560
+ # Note: Vector search indexes, functions, and connections are not yet
561
+ # supported as app resources in the SDK
562
+
563
+ logger.info(f"Generated {len(resources)} SDK app resources from config")
564
+ return resources
565
+
566
+
567
+ def _extract_sdk_llm_resources(
568
+ llms: dict[str, LLMModel],
569
+ ) -> list[AppResource]:
570
+ """Extract SDK AppResource objects for model serving endpoints."""
571
+ resources: list[AppResource] = []
572
+ for key, llm in llms.items():
573
+ sanitized_name = _sanitize_resource_name(key)
574
+ resource = AppResource(
575
+ name=sanitized_name,
576
+ description=llm.description,
577
+ serving_endpoint=AppResourceServingEndpoint(
578
+ name=llm.name,
579
+ permission=AppResourceServingEndpointServingEndpointPermission.CAN_QUERY,
580
+ ),
581
+ )
582
+ resources.append(resource)
583
+ logger.debug(
584
+ f"Extracted SDK serving endpoint resource: {sanitized_name} -> {llm.name}"
585
+ )
586
+ return resources
587
+
588
+
589
+ def _extract_sdk_warehouse_resources(
590
+ warehouses: dict[str, WarehouseModel],
591
+ ) -> list[AppResource]:
592
+ """Extract SDK AppResource objects for SQL warehouses."""
593
+ resources: list[AppResource] = []
594
+ for key, warehouse in warehouses.items():
595
+ warehouse_id = value_of(warehouse.warehouse_id)
596
+ sanitized_name = _sanitize_resource_name(key)
597
+ resource = AppResource(
598
+ name=sanitized_name,
599
+ description=warehouse.description,
600
+ sql_warehouse=AppResourceSqlWarehouse(
601
+ id=warehouse_id,
602
+ permission=AppResourceSqlWarehouseSqlWarehousePermission.CAN_USE,
603
+ ),
604
+ )
605
+ resources.append(resource)
606
+ logger.debug(
607
+ f"Extracted SDK SQL warehouse resource: {sanitized_name} -> {warehouse_id}"
608
+ )
609
+ return resources
610
+
611
+
612
+ def _extract_sdk_genie_resources(
613
+ genie_rooms: dict[str, GenieRoomModel],
614
+ ) -> list[AppResource]:
615
+ """Extract SDK AppResource objects for Genie spaces."""
616
+ resources: list[AppResource] = []
617
+ for key, genie in genie_rooms.items():
618
+ space_id = value_of(genie.space_id)
619
+ sanitized_name = _sanitize_resource_name(key)
620
+ resource = AppResource(
621
+ name=sanitized_name,
622
+ description=genie.description,
623
+ genie_space=AppResourceGenieSpace(
624
+ name=genie.name or key,
625
+ space_id=space_id,
626
+ permission=AppResourceGenieSpaceGenieSpacePermission.CAN_RUN,
627
+ ),
628
+ )
629
+ resources.append(resource)
630
+ logger.debug(
631
+ f"Extracted SDK Genie space resource: {sanitized_name} -> {space_id}"
632
+ )
633
+ return resources
634
+
635
+
636
+ def _extract_sdk_database_resources(
637
+ databases: dict[str, DatabaseModel],
638
+ ) -> list[AppResource]:
639
+ """Extract SDK AppResource objects for Lakebase databases."""
640
+ resources: list[AppResource] = []
641
+ for key, db in databases.items():
642
+ # Only include Lakebase databases (those with instance_name)
643
+ if not db.is_lakebase:
644
+ continue
645
+ sanitized_name = _sanitize_resource_name(key)
646
+ # Use db.database for the actual database name (defaults to "databricks_postgres")
647
+ # db.name is just the config key/description, not the actual database name
648
+ database_name = value_of(db.database) if db.database else "databricks_postgres"
649
+ resource = AppResource(
650
+ name=sanitized_name,
651
+ description=db.description,
652
+ database=AppResourceDatabase(
653
+ instance_name=db.instance_name,
654
+ database_name=database_name,
655
+ permission=AppResourceDatabaseDatabasePermission.CAN_CONNECT_AND_CREATE,
656
+ ),
657
+ )
658
+ resources.append(resource)
659
+ logger.debug(
660
+ f"Extracted SDK database resource: {sanitized_name} -> "
661
+ f"{db.instance_name}/{database_name}"
662
+ )
663
+ return resources
664
+
665
+
666
+ def _extract_sdk_volume_resources(
667
+ volumes: dict[str, VolumeModel],
668
+ ) -> list[AppResource]:
669
+ """Extract SDK AppResource objects for Unity Catalog volumes."""
670
+ resources: list[AppResource] = []
671
+ for key, volume in volumes.items():
672
+ sanitized_name = _sanitize_resource_name(key)
673
+ resource = AppResource(
674
+ name=sanitized_name,
675
+ uc_securable=AppResourceUcSecurable(
676
+ securable_full_name=volume.full_name,
677
+ securable_type=AppResourceUcSecurableUcSecurableType.VOLUME,
678
+ permission=AppResourceUcSecurableUcSecurablePermission.READ_VOLUME,
679
+ ),
680
+ )
681
+ resources.append(resource)
682
+ logger.debug(
683
+ f"Extracted SDK volume resource: {sanitized_name} -> {volume.full_name}"
684
+ )
685
+ return resources
686
+
687
+
688
+ def _extract_sdk_secrets_from_config(config: AppConfig) -> list[AppResource]:
689
+ """
690
+ Extract SDK AppResource objects for all secrets referenced in the config.
691
+
692
+ This function walks through the entire config object to find all
693
+ SecretVariableModel instances and creates AppResource objects with
694
+ READ permission for each unique scope/key pair.
695
+
696
+ Args:
697
+ config: The AppConfig containing secret references
698
+
699
+ Returns:
700
+ A list of AppResource objects for secrets
701
+ """
702
+ secrets: dict[tuple[str, str], AppResource] = {}
703
+
704
+ def extract_from_value(value: Any) -> None:
705
+ """Recursively extract secrets from any value."""
706
+ if isinstance(value, SecretVariableModel):
707
+ secret_key = (value.scope, value.secret)
708
+ if secret_key not in secrets:
709
+ # Create a unique name for the secret resource
710
+ resource_name = f"{value.scope}_{value.secret}".replace(
711
+ "-", "_"
712
+ ).replace("/", "_")
713
+ resource_name = _sanitize_resource_name(resource_name)
714
+
715
+ resource = AppResource(
716
+ name=resource_name,
717
+ secret=AppResourceSecret(
718
+ scope=value.scope,
719
+ key=value.secret,
720
+ permission=AppResourceSecretSecretPermission.READ,
721
+ ),
722
+ )
723
+ secrets[secret_key] = resource
724
+ logger.debug(
725
+ f"Found secret for SDK resource: {value.scope}/{value.secret}"
726
+ )
727
+ elif isinstance(value, dict):
728
+ for v in value.values():
729
+ extract_from_value(v)
730
+ elif isinstance(value, (list, tuple)):
731
+ for v in value:
732
+ extract_from_value(v)
733
+ elif hasattr(value, "__dict__"):
734
+ # Handle Pydantic models and other objects with __dict__
735
+ for k, v in value.__dict__.items():
736
+ if not k.startswith("_"): # Skip private attributes
737
+ extract_from_value(v)
738
+
739
+ # Walk through the entire config
740
+ extract_from_value(config)
741
+
742
+ resources = list(secrets.values())
743
+ logger.info(f"Extracted {len(resources)} SDK secret resources from config")
744
+ return resources
745
+
746
+
747
+ def generate_resources_yaml(config: AppConfig) -> str:
748
+ """
749
+ Generate the resources section of app.yaml as a YAML string.
750
+
751
+ Args:
752
+ config: The AppConfig containing resource definitions
753
+
754
+ Returns:
755
+ A YAML-formatted string for the resources section
756
+ """
757
+ import yaml
758
+
759
+ resources = generate_app_resources(config)
760
+ if not resources:
761
+ return ""
762
+
763
+ return yaml.dump(
764
+ {"resources": resources}, default_flow_style=False, sort_keys=False
765
+ )
766
+
767
+
768
+ def _extract_env_vars_from_config(config: AppConfig) -> list[dict[str, str]]:
769
+ """
770
+ Extract environment variables from config.app.environment_vars for app.yaml.
771
+
772
+ This function converts the environment_vars dict from AppConfig into the
773
+ format expected by Databricks Apps. For each variable:
774
+ - EnvironmentVariableModel: Creates env var with "value" (the env var name)
775
+ - SecretVariableModel: Creates env var with "valueFrom" referencing the secret resource
776
+ - CompositeVariableModel: Uses the first option in the list to determine the type
777
+ - Plain strings: Creates env var with "value"
778
+
779
+ Args:
780
+ config: The AppConfig containing environment variable definitions
781
+
782
+ Returns:
783
+ A list of environment variable dictionaries for app.yaml
784
+
785
+ Example:
786
+ >>> config = AppConfig.from_file("model_config.yaml")
787
+ >>> env_vars = _extract_env_vars_from_config(config)
788
+ >>> # Returns:
789
+ >>> # [
790
+ >>> # {"name": "API_KEY", "valueFrom": "my_scope_api_key"},
791
+ >>> # {"name": "LOG_LEVEL", "value": "INFO"},
792
+ >>> # ]
793
+ """
794
+ env_vars: list[dict[str, str]] = []
795
+
796
+ if config.app is None:
797
+ return env_vars
798
+
799
+ environment_vars = config.app.environment_vars
800
+ if not environment_vars:
801
+ return env_vars
802
+
803
+ for var_name, var_value in environment_vars.items():
804
+ env_entry: dict[str, str] = {"name": var_name}
805
+
806
+ # Determine the type of the variable and create appropriate entry
807
+ resolved_type = _resolve_variable_type(var_value)
808
+
809
+ if resolved_type is None:
810
+ # Plain value - use as-is
811
+ if isinstance(var_value, str):
812
+ env_entry["value"] = var_value
813
+ else:
814
+ env_entry["value"] = str(var_value)
815
+ elif isinstance(resolved_type, SecretVariableModel):
816
+ # Secret reference - use valueFrom with sanitized resource name
817
+ resource_name = f"{resolved_type.scope}_{resolved_type.secret}".replace(
818
+ "-", "_"
819
+ ).replace("/", "_")
820
+ resource_name = _sanitize_resource_name(resource_name)
821
+ env_entry["valueFrom"] = resource_name
822
+ logger.debug(
823
+ f"Environment variable {var_name} references secret: "
824
+ f"{resolved_type.scope}/{resolved_type.secret}"
825
+ )
826
+ elif isinstance(resolved_type, EnvironmentVariableModel):
827
+ # Environment variable - resolve the value
828
+ resolved_value = value_of(resolved_type)
829
+ if resolved_value is not None:
830
+ env_entry["value"] = str(resolved_value)
831
+ elif resolved_type.default_value is not None:
832
+ env_entry["value"] = str(resolved_type.default_value)
833
+ else:
834
+ # Skip if no value can be resolved
835
+ logger.warning(
836
+ f"Environment variable {var_name} has no value "
837
+ f"(env: {resolved_type.env})"
838
+ )
839
+ continue
840
+ else:
841
+ # Other types - convert to string
842
+ env_entry["value"] = str(var_value)
843
+
844
+ env_vars.append(env_entry)
845
+ logger.debug(f"Extracted environment variable: {var_name}")
846
+
847
+ logger.info(f"Extracted {len(env_vars)} environment variables from config")
848
+ return env_vars
849
+
850
+
851
+ def _resolve_variable_type(
852
+ value: Any,
853
+ ) -> SecretVariableModel | EnvironmentVariableModel | None:
854
+ """
855
+ Resolve the type of a variable for environment variable extraction.
856
+
857
+ For CompositeVariableModel, returns the first option in the list to
858
+ determine whether to use value or valueFrom in the app.yaml.
859
+
860
+ Args:
861
+ value: The variable value to analyze
862
+
863
+ Returns:
864
+ The resolved variable model (SecretVariableModel or EnvironmentVariableModel),
865
+ or None if it's a plain value
866
+ """
867
+ if isinstance(value, SecretVariableModel):
868
+ return value
869
+ elif isinstance(value, EnvironmentVariableModel):
870
+ return value
871
+ elif isinstance(value, CompositeVariableModel):
872
+ # Use the first option to determine the type
873
+ if value.options:
874
+ first_option = value.options[0]
875
+ return _resolve_variable_type(first_option)
876
+ return None
877
+ else:
878
+ # Plain value (str, int, etc.) or PrimitiveVariableModel
879
+ return None
880
+
881
+
882
+ def generate_app_yaml(
883
+ config: AppConfig,
884
+ command: str | list[str] | None = None,
885
+ include_resources: bool = True,
886
+ ) -> str:
887
+ """
888
+ Generate a complete app.yaml for Databricks Apps deployment.
889
+
890
+ This function creates a complete app.yaml configuration file that includes:
891
+ - Command to run the app
892
+ - Environment variables for MLflow and dao-ai
893
+ - Resources extracted from the AppConfig (if include_resources is True)
894
+
895
+ Args:
896
+ config: The AppConfig containing deployment configuration
897
+ command: Optional custom command. If not provided, uses default dao-ai app_server
898
+ include_resources: Whether to include the resources section (default: True)
899
+
900
+ Returns:
901
+ A complete app.yaml as a string
902
+
903
+ Example:
904
+ >>> config = AppConfig.from_file("model_config.yaml")
905
+ >>> app_yaml = generate_app_yaml(config)
906
+ >>> print(app_yaml)
907
+ """
908
+ import yaml
909
+
910
+ # Build the app.yaml structure
911
+ app_config: dict[str, Any] = {}
912
+
913
+ # Command section
914
+ if command is None:
915
+ app_config["command"] = [
916
+ "/bin/bash",
917
+ "-c",
918
+ "pip install dao-ai && python -m dao_ai.apps.server",
919
+ ]
920
+ elif isinstance(command, str):
921
+ app_config["command"] = [command]
922
+ else:
923
+ app_config["command"] = command
924
+
925
+ # Base environment variables for MLflow and dao-ai
926
+ env_vars: list[dict[str, str]] = [
927
+ {"name": "MLFLOW_TRACKING_URI", "value": "databricks"},
928
+ {"name": "MLFLOW_REGISTRY_URI", "value": "databricks-uc"},
929
+ {"name": "DAO_AI_CONFIG_PATH", "value": "model_config.yaml"},
930
+ ]
931
+
932
+ # Extract environment variables from config.app.environment_vars
933
+ config_env_vars = _extract_env_vars_from_config(config)
934
+
935
+ # Merge config env vars, avoiding duplicates (config takes precedence)
936
+ base_env_names = {e["name"] for e in env_vars}
937
+ for config_env in config_env_vars:
938
+ if config_env["name"] not in base_env_names:
939
+ env_vars.append(config_env)
940
+ else:
941
+ # Config env var takes precedence - replace the base one
942
+ env_vars = [e for e in env_vars if e["name"] != config_env["name"]]
943
+ env_vars.append(config_env)
944
+
945
+ app_config["env"] = env_vars
946
+
947
+ # Resources section (if requested)
948
+ if include_resources:
949
+ resources = generate_app_resources(config)
950
+ if resources:
951
+ app_config["resources"] = resources
952
+
953
+ return yaml.dump(app_config, default_flow_style=False, sort_keys=False)
954
+
955
+
956
+ def get_resource_env_mappings(config: AppConfig) -> list[dict[str, Any]]:
957
+ """
958
+ Generate environment variable mappings that reference app resources.
959
+
960
+ This creates environment variables that use `valueFrom` to reference
961
+ configured resources, allowing the app to access resource values at runtime.
962
+
963
+ Args:
964
+ config: The AppConfig containing resource definitions
965
+
966
+ Returns:
967
+ A list of environment variable definitions with valueFrom references
968
+
969
+ Example:
970
+ >>> env_vars = get_resource_env_mappings(config)
971
+ >>> # Returns:
972
+ >>> # [
973
+ >>> # {"name": "SQL_WAREHOUSE_ID", "valueFrom": "default_warehouse"},
974
+ >>> # ...
975
+ >>> # ]
976
+ """
977
+ env_mappings: list[dict[str, Any]] = []
978
+
979
+ if config.resources is None:
980
+ return env_mappings
981
+
982
+ # Map warehouse IDs
983
+ for key, warehouse in config.resources.warehouses.items():
984
+ env_mappings.append(
985
+ {
986
+ "name": f"{key.upper()}_WAREHOUSE_ID",
987
+ "valueFrom": key,
988
+ }
989
+ )
990
+
991
+ # Map serving endpoint names
992
+ for key, llm in config.resources.llms.items():
993
+ env_mappings.append(
994
+ {
995
+ "name": f"{key.upper()}_ENDPOINT",
996
+ "valueFrom": key,
997
+ }
998
+ )
999
+
1000
+ # Map Genie space IDs
1001
+ for key, genie in config.resources.genie_rooms.items():
1002
+ env_mappings.append(
1003
+ {
1004
+ "name": f"{key.upper()}_SPACE_ID",
1005
+ "valueFrom": key,
1006
+ }
1007
+ )
1008
+
1009
+ # Map vector search indexes
1010
+ for key, vs in config.resources.vector_stores.items():
1011
+ if vs.index:
1012
+ env_mappings.append(
1013
+ {
1014
+ "name": f"{key.upper()}_INDEX",
1015
+ "valueFrom": key,
1016
+ }
1017
+ )
1018
+
1019
+ # Map database instances
1020
+ for key, db in config.resources.databases.items():
1021
+ if db.is_lakebase:
1022
+ env_mappings.append(
1023
+ {
1024
+ "name": f"{key.upper()}_DATABASE",
1025
+ "valueFrom": key,
1026
+ }
1027
+ )
1028
+
1029
+ return env_mappings