dao-ai 0.1.2__py3-none-any.whl → 0.1.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. dao_ai/apps/__init__.py +24 -0
  2. dao_ai/apps/handlers.py +105 -0
  3. dao_ai/apps/model_serving.py +29 -0
  4. dao_ai/apps/resources.py +1122 -0
  5. dao_ai/apps/server.py +39 -0
  6. dao_ai/cli.py +546 -37
  7. dao_ai/config.py +1179 -139
  8. dao_ai/evaluation.py +543 -0
  9. dao_ai/genie/__init__.py +55 -7
  10. dao_ai/genie/cache/__init__.py +34 -7
  11. dao_ai/genie/cache/base.py +143 -2
  12. dao_ai/genie/cache/context_aware/__init__.py +31 -0
  13. dao_ai/genie/cache/context_aware/base.py +1151 -0
  14. dao_ai/genie/cache/context_aware/in_memory.py +609 -0
  15. dao_ai/genie/cache/context_aware/persistent.py +802 -0
  16. dao_ai/genie/cache/context_aware/postgres.py +1166 -0
  17. dao_ai/genie/cache/core.py +1 -1
  18. dao_ai/genie/cache/lru.py +257 -75
  19. dao_ai/genie/cache/optimization.py +890 -0
  20. dao_ai/genie/core.py +235 -11
  21. dao_ai/memory/postgres.py +175 -39
  22. dao_ai/middleware/__init__.py +38 -0
  23. dao_ai/middleware/assertions.py +3 -3
  24. dao_ai/middleware/context_editing.py +230 -0
  25. dao_ai/middleware/core.py +4 -4
  26. dao_ai/middleware/guardrails.py +3 -3
  27. dao_ai/middleware/human_in_the_loop.py +3 -2
  28. dao_ai/middleware/message_validation.py +4 -4
  29. dao_ai/middleware/model_call_limit.py +77 -0
  30. dao_ai/middleware/model_retry.py +121 -0
  31. dao_ai/middleware/pii.py +157 -0
  32. dao_ai/middleware/summarization.py +1 -1
  33. dao_ai/middleware/tool_call_limit.py +210 -0
  34. dao_ai/middleware/tool_retry.py +174 -0
  35. dao_ai/middleware/tool_selector.py +129 -0
  36. dao_ai/models.py +327 -370
  37. dao_ai/nodes.py +9 -16
  38. dao_ai/orchestration/core.py +33 -9
  39. dao_ai/orchestration/supervisor.py +29 -13
  40. dao_ai/orchestration/swarm.py +6 -1
  41. dao_ai/{prompts.py → prompts/__init__.py} +12 -61
  42. dao_ai/prompts/instructed_retriever_decomposition.yaml +58 -0
  43. dao_ai/prompts/instruction_reranker.yaml +14 -0
  44. dao_ai/prompts/router.yaml +37 -0
  45. dao_ai/prompts/verifier.yaml +46 -0
  46. dao_ai/providers/base.py +28 -2
  47. dao_ai/providers/databricks.py +363 -33
  48. dao_ai/state.py +1 -0
  49. dao_ai/tools/__init__.py +5 -3
  50. dao_ai/tools/genie.py +103 -26
  51. dao_ai/tools/instructed_retriever.py +366 -0
  52. dao_ai/tools/instruction_reranker.py +202 -0
  53. dao_ai/tools/mcp.py +539 -97
  54. dao_ai/tools/router.py +89 -0
  55. dao_ai/tools/slack.py +13 -2
  56. dao_ai/tools/sql.py +7 -3
  57. dao_ai/tools/unity_catalog.py +32 -10
  58. dao_ai/tools/vector_search.py +493 -160
  59. dao_ai/tools/verifier.py +159 -0
  60. dao_ai/utils.py +182 -2
  61. dao_ai/vector_search.py +46 -1
  62. {dao_ai-0.1.2.dist-info → dao_ai-0.1.20.dist-info}/METADATA +45 -9
  63. dao_ai-0.1.20.dist-info/RECORD +89 -0
  64. dao_ai/agent_as_code.py +0 -22
  65. dao_ai/genie/cache/semantic.py +0 -970
  66. dao_ai-0.1.2.dist-info/RECORD +0 -64
  67. {dao_ai-0.1.2.dist-info → dao_ai-0.1.20.dist-info}/WHEEL +0 -0
  68. {dao_ai-0.1.2.dist-info → dao_ai-0.1.20.dist-info}/entry_points.txt +0 -0
  69. {dao_ai-0.1.2.dist-info → dao_ai-0.1.20.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1122 @@
1
+ """
2
+ App resources module for generating Databricks App resource configurations.
3
+
4
+ This module provides utilities to dynamically discover and generate Databricks App
5
+ resource configurations from dao-ai AppConfig. Resources are extracted from the
6
+ config and converted to the format expected by Databricks Apps.
7
+
8
+ Databricks Apps resource documentation:
9
+ https://learn.microsoft.com/en-us/azure/databricks/dev-tools/databricks-apps/resources
10
+
11
+ Supported resource types and their mappings:
12
+ - LLMModel → serving-endpoint (Model Serving Endpoint)
13
+ - VectorStoreModel/IndexModel → vector-search-index (via UC Securable - not yet supported)
14
+ - WarehouseModel → sql-warehouse
15
+ - GenieRoomModel → genie-space
16
+ - VolumeModel → volume (via UC Securable)
17
+ - FunctionModel → function (via UC Securable - not yet supported)
18
+ - ConnectionModel → connection (not yet supported in SDK)
19
+ - DatabaseModel → database (Lakebase)
20
+ - DatabricksAppModel → app (not yet supported in SDK)
21
+
22
+ Usage:
23
+ from dao_ai.apps.resources import generate_app_resources, generate_sdk_resources
24
+ from dao_ai.config import AppConfig
25
+
26
+ config = AppConfig.from_file("model_config.yaml")
27
+
28
+ # For SDK-based deployment (recommended)
29
+ sdk_resources = generate_sdk_resources(config)
30
+
31
+ # For YAML-based documentation
32
+ resources = generate_app_resources(config)
33
+ """
34
+
35
+ from typing import Any
36
+
37
+ from databricks.sdk.service.apps import (
38
+ AppResource,
39
+ AppResourceDatabase,
40
+ AppResourceDatabaseDatabasePermission,
41
+ AppResourceExperiment,
42
+ AppResourceExperimentExperimentPermission,
43
+ AppResourceGenieSpace,
44
+ AppResourceGenieSpaceGenieSpacePermission,
45
+ AppResourceSecret,
46
+ AppResourceSecretSecretPermission,
47
+ AppResourceServingEndpoint,
48
+ AppResourceServingEndpointServingEndpointPermission,
49
+ AppResourceSqlWarehouse,
50
+ AppResourceSqlWarehouseSqlWarehousePermission,
51
+ AppResourceUcSecurable,
52
+ AppResourceUcSecurableUcSecurablePermission,
53
+ AppResourceUcSecurableUcSecurableType,
54
+ )
55
+ from loguru import logger
56
+
57
+ from dao_ai.config import (
58
+ AppConfig,
59
+ CompositeVariableModel,
60
+ ConnectionModel,
61
+ DatabaseModel,
62
+ DatabricksAppModel,
63
+ EnvironmentVariableModel,
64
+ FunctionModel,
65
+ GenieRoomModel,
66
+ IsDatabricksResource,
67
+ LLMModel,
68
+ SecretVariableModel,
69
+ TableModel,
70
+ VectorStoreModel,
71
+ VolumeModel,
72
+ WarehouseModel,
73
+ value_of,
74
+ )
75
+
76
+ # Resource type mappings from dao-ai to Databricks Apps
77
+ RESOURCE_TYPE_MAPPING: dict[type, str] = {
78
+ LLMModel: "serving-endpoint",
79
+ VectorStoreModel: "vector-search-index",
80
+ WarehouseModel: "sql-warehouse",
81
+ GenieRoomModel: "genie-space",
82
+ VolumeModel: "volume",
83
+ FunctionModel: "function",
84
+ ConnectionModel: "connection",
85
+ DatabaseModel: "database",
86
+ DatabricksAppModel: "app",
87
+ }
88
+
89
+ # Default permissions for each resource type
90
+ DEFAULT_PERMISSIONS: dict[str, list[str]] = {
91
+ "serving-endpoint": ["CAN_QUERY"],
92
+ "vector-search-index": ["CAN_SELECT"],
93
+ "sql-warehouse": ["CAN_USE"],
94
+ "genie-space": ["CAN_RUN"],
95
+ "volume": ["CAN_READ"],
96
+ "function": ["CAN_EXECUTE"],
97
+ "connection": ["USE_CONNECTION"],
98
+ "database": ["CAN_CONNECT_AND_CREATE"],
99
+ "app": ["CAN_VIEW"],
100
+ }
101
+
102
+ # Valid user API scopes for Databricks Apps
103
+ # These are the only scopes that can be requested for on-behalf-of-user access
104
+ VALID_USER_API_SCOPES: set[str] = {
105
+ "sql",
106
+ "serving.serving-endpoints",
107
+ "vectorsearch.vector-search-indexes",
108
+ "files.files",
109
+ "dashboards.genie",
110
+ "catalog.connections",
111
+ "catalog.catalogs:read",
112
+ "catalog.schemas:read",
113
+ "catalog.tables:read",
114
+ }
115
+
116
+ # Mapping from resource api_scopes to valid user_api_scopes
117
+ # Some resource scopes map directly, others need translation
118
+ API_SCOPE_TO_USER_SCOPE: dict[str, str] = {
119
+ # Direct mappings
120
+ "serving.serving-endpoints": "serving.serving-endpoints",
121
+ "vectorsearch.vector-search-indexes": "vectorsearch.vector-search-indexes",
122
+ "files.files": "files.files",
123
+ "dashboards.genie": "dashboards.genie",
124
+ "catalog.connections": "catalog.connections",
125
+ # SQL-related scopes map to "sql"
126
+ "sql.warehouses": "sql",
127
+ "sql.statement-execution": "sql",
128
+ # Vector search endpoints also need serving
129
+ "vectorsearch.vector-search-endpoints": "serving.serving-endpoints",
130
+ # Catalog scopes
131
+ "catalog.volumes": "files.files",
132
+ }
133
+
134
+
135
+ def _extract_llm_resources(
136
+ llms: dict[str, LLMModel],
137
+ ) -> list[dict[str, Any]]:
138
+ """Extract model serving endpoint resources from LLMModels."""
139
+ resources: list[dict[str, Any]] = []
140
+ for idx, (key, llm) in enumerate(llms.items()):
141
+ resource: dict[str, Any] = {
142
+ "name": key,
143
+ "type": "serving-endpoint",
144
+ "serving_endpoint_name": llm.name,
145
+ "permissions": [
146
+ {"level": p} for p in DEFAULT_PERMISSIONS["serving-endpoint"]
147
+ ],
148
+ }
149
+ resources.append(resource)
150
+ logger.debug(f"Extracted serving endpoint resource: {key} -> {llm.name}")
151
+ return resources
152
+
153
+
154
+ def _extract_vector_search_resources(
155
+ vector_stores: dict[str, VectorStoreModel],
156
+ ) -> list[dict[str, Any]]:
157
+ """Extract vector search index resources from VectorStoreModels."""
158
+ resources: list[dict[str, Any]] = []
159
+ for key, vs in vector_stores.items():
160
+ if vs.index is None:
161
+ continue
162
+ resource: dict[str, Any] = {
163
+ "name": key,
164
+ "type": "vector-search-index",
165
+ "vector_search_index_name": vs.index.full_name,
166
+ "permissions": [
167
+ {"level": p} for p in DEFAULT_PERMISSIONS["vector-search-index"]
168
+ ],
169
+ }
170
+ resources.append(resource)
171
+ logger.debug(f"Extracted vector search resource: {key} -> {vs.index.full_name}")
172
+ return resources
173
+
174
+
175
+ def _extract_warehouse_resources(
176
+ warehouses: dict[str, WarehouseModel],
177
+ ) -> list[dict[str, Any]]:
178
+ """Extract SQL warehouse resources from WarehouseModels."""
179
+ resources: list[dict[str, Any]] = []
180
+ for key, warehouse in warehouses.items():
181
+ warehouse_id = value_of(warehouse.warehouse_id)
182
+ resource: dict[str, Any] = {
183
+ "name": key,
184
+ "type": "sql-warehouse",
185
+ "sql_warehouse_id": warehouse_id,
186
+ "permissions": [{"level": p} for p in DEFAULT_PERMISSIONS["sql-warehouse"]],
187
+ }
188
+ resources.append(resource)
189
+ logger.debug(f"Extracted SQL warehouse resource: {key} -> {warehouse_id}")
190
+ return resources
191
+
192
+
193
+ def _extract_genie_resources(
194
+ genie_rooms: dict[str, GenieRoomModel],
195
+ ) -> list[dict[str, Any]]:
196
+ """Extract Genie space resources from GenieRoomModels."""
197
+ resources: list[dict[str, Any]] = []
198
+ for key, genie in genie_rooms.items():
199
+ space_id = value_of(genie.space_id)
200
+ resource: dict[str, Any] = {
201
+ "name": key,
202
+ "type": "genie-space",
203
+ "genie_space_id": space_id,
204
+ "permissions": [{"level": p} for p in DEFAULT_PERMISSIONS["genie-space"]],
205
+ }
206
+ resources.append(resource)
207
+ logger.debug(f"Extracted Genie space resource: {key} -> {space_id}")
208
+ return resources
209
+
210
+
211
+ def _extract_volume_resources(
212
+ volumes: dict[str, VolumeModel],
213
+ ) -> list[dict[str, Any]]:
214
+ """Extract UC Volume resources from VolumeModels."""
215
+ resources: list[dict[str, Any]] = []
216
+ for key, volume in volumes.items():
217
+ resource: dict[str, Any] = {
218
+ "name": key,
219
+ "type": "volume",
220
+ "volume_name": volume.full_name,
221
+ "permissions": [{"level": p} for p in DEFAULT_PERMISSIONS["volume"]],
222
+ }
223
+ resources.append(resource)
224
+ logger.debug(f"Extracted volume resource: {key} -> {volume.full_name}")
225
+ return resources
226
+
227
+
228
+ def _extract_function_resources(
229
+ functions: dict[str, FunctionModel],
230
+ ) -> list[dict[str, Any]]:
231
+ """Extract UC Function resources from FunctionModels."""
232
+ resources: list[dict[str, Any]] = []
233
+ for key, func in functions.items():
234
+ resource: dict[str, Any] = {
235
+ "name": key,
236
+ "type": "function",
237
+ "function_name": func.full_name,
238
+ "permissions": [{"level": p} for p in DEFAULT_PERMISSIONS["function"]],
239
+ }
240
+ resources.append(resource)
241
+ logger.debug(f"Extracted function resource: {key} -> {func.full_name}")
242
+ return resources
243
+
244
+
245
+ def _extract_connection_resources(
246
+ connections: dict[str, ConnectionModel],
247
+ ) -> list[dict[str, Any]]:
248
+ """Extract UC Connection resources from ConnectionModels."""
249
+ resources: list[dict[str, Any]] = []
250
+ for key, conn in connections.items():
251
+ resource: dict[str, Any] = {
252
+ "name": key,
253
+ "type": "connection",
254
+ "connection_name": conn.name,
255
+ "permissions": [{"level": p} for p in DEFAULT_PERMISSIONS["connection"]],
256
+ }
257
+ resources.append(resource)
258
+ logger.debug(f"Extracted connection resource: {key} -> {conn.name}")
259
+ return resources
260
+
261
+
262
+ def _extract_database_resources(
263
+ databases: dict[str, DatabaseModel],
264
+ ) -> list[dict[str, Any]]:
265
+ """Extract Lakebase database resources from DatabaseModels."""
266
+ resources: list[dict[str, Any]] = []
267
+ for key, db in databases.items():
268
+ # Only include Lakebase databases (those with instance_name)
269
+ if not db.is_lakebase:
270
+ continue
271
+ resource: dict[str, Any] = {
272
+ "name": key,
273
+ "type": "database",
274
+ "database_instance_name": db.instance_name,
275
+ "permissions": [{"level": p} for p in DEFAULT_PERMISSIONS["database"]],
276
+ }
277
+ resources.append(resource)
278
+ logger.debug(f"Extracted database resource: {key} -> {db.instance_name}")
279
+ return resources
280
+
281
+
282
+ def _extract_app_resources(
283
+ apps: dict[str, DatabricksAppModel],
284
+ ) -> list[dict[str, Any]]:
285
+ """Extract Databricks App resources from DatabricksAppModels."""
286
+ resources: list[dict[str, Any]] = []
287
+ for key, app in apps.items():
288
+ resource: dict[str, Any] = {
289
+ "name": key,
290
+ "type": "app",
291
+ "app_name": app.name,
292
+ "permissions": [{"level": p} for p in DEFAULT_PERMISSIONS["app"]],
293
+ }
294
+ resources.append(resource)
295
+ logger.debug(f"Extracted app resource: {key} -> {app.name}")
296
+ return resources
297
+
298
+
299
+ def _extract_secrets_from_config(config: AppConfig) -> list[dict[str, Any]]:
300
+ """
301
+ Extract all secrets referenced in the config as resources.
302
+
303
+ This function walks through the entire config object to find all
304
+ SecretVariableModel instances and extracts their scope and key.
305
+
306
+ Args:
307
+ config: The AppConfig containing secret references
308
+
309
+ Returns:
310
+ A list of secret resource dictionaries with unique scope/key pairs
311
+ """
312
+ secrets: dict[tuple[str, str], dict[str, Any]] = {}
313
+ used_names: set[str] = set()
314
+
315
+ def get_unique_resource_name(base_name: str) -> str:
316
+ """Generate a unique resource name, adding suffix if needed."""
317
+ sanitized = _sanitize_resource_name(base_name)
318
+ if sanitized not in used_names:
319
+ used_names.add(sanitized)
320
+ return sanitized
321
+ # Name collision - add numeric suffix
322
+ counter = 1
323
+ while True:
324
+ # Leave room for suffix (e.g., "_1", "_2", etc.)
325
+ suffix = f"_{counter}"
326
+ max_base_len = 30 - len(suffix)
327
+ candidate = sanitized[:max_base_len] + suffix
328
+ if candidate not in used_names:
329
+ used_names.add(candidate)
330
+ return candidate
331
+ counter += 1
332
+
333
+ def extract_from_value(value: Any, path: str = "") -> None:
334
+ """Recursively extract secrets from any value."""
335
+ if isinstance(value, SecretVariableModel):
336
+ secret_key = (value.scope, value.secret)
337
+ if secret_key not in secrets:
338
+ # Create a unique name for the secret resource
339
+ base_name = f"{value.scope}_{value.secret}".replace("-", "_").replace(
340
+ "/", "_"
341
+ )
342
+ resource_name = get_unique_resource_name(base_name)
343
+ secrets[secret_key] = {
344
+ "name": resource_name,
345
+ "type": "secret",
346
+ "scope": value.scope,
347
+ "key": value.secret,
348
+ "permissions": [{"level": "READ"}],
349
+ }
350
+ logger.debug(
351
+ f"Found secret: {value.scope}/{value.secret} at {path} -> resource: {resource_name}"
352
+ )
353
+ elif isinstance(value, dict):
354
+ for k, v in value.items():
355
+ extract_from_value(v, f"{path}.{k}" if path else k)
356
+ elif isinstance(value, (list, tuple)):
357
+ for i, v in enumerate(value):
358
+ extract_from_value(v, f"{path}[{i}]")
359
+ elif hasattr(value, "__dict__"):
360
+ # Handle Pydantic models and other objects with __dict__
361
+ for k, v in value.__dict__.items():
362
+ if not k.startswith("_"): # Skip private attributes
363
+ extract_from_value(v, f"{path}.{k}" if path else k)
364
+
365
+ # Walk through the entire config
366
+ extract_from_value(config)
367
+
368
+ resources = list(secrets.values())
369
+ logger.info(f"Extracted {len(resources)} secret resources from config")
370
+ return resources
371
+
372
+
373
+ def generate_app_resources(config: AppConfig) -> list[dict[str, Any]]:
374
+ """
375
+ Generate Databricks App resource configurations from an AppConfig.
376
+
377
+ This function extracts all resources defined in the AppConfig and converts
378
+ them to the format expected by Databricks Apps. Resources are used to
379
+ grant the app's service principal access to Databricks platform features.
380
+
381
+ Args:
382
+ config: The AppConfig containing resource definitions
383
+
384
+ Returns:
385
+ A list of resource dictionaries in Databricks Apps format
386
+
387
+ Example:
388
+ >>> config = AppConfig.from_file("model_config.yaml")
389
+ >>> resources = generate_app_resources(config)
390
+ >>> print(resources)
391
+ [
392
+ {
393
+ "name": "default_llm",
394
+ "type": "serving-endpoint",
395
+ "serving_endpoint_name": "databricks-claude-sonnet-4",
396
+ "permissions": [{"level": "CAN_QUERY"}]
397
+ },
398
+ ...
399
+ ]
400
+ """
401
+ resources: list[dict[str, Any]] = []
402
+
403
+ if config.resources is None:
404
+ logger.debug("No resources defined in config")
405
+ return resources
406
+
407
+ # Extract resources from each category
408
+ resources.extend(_extract_llm_resources(config.resources.llms))
409
+ resources.extend(_extract_vector_search_resources(config.resources.vector_stores))
410
+ resources.extend(_extract_warehouse_resources(config.resources.warehouses))
411
+ resources.extend(_extract_genie_resources(config.resources.genie_rooms))
412
+ resources.extend(_extract_volume_resources(config.resources.volumes))
413
+ resources.extend(_extract_function_resources(config.resources.functions))
414
+ resources.extend(_extract_connection_resources(config.resources.connections))
415
+ resources.extend(_extract_database_resources(config.resources.databases))
416
+ resources.extend(_extract_app_resources(config.resources.apps))
417
+
418
+ # Extract secrets from the entire config
419
+ resources.extend(_extract_secrets_from_config(config))
420
+
421
+ logger.info(f"Generated {len(resources)} app resources from config")
422
+ return resources
423
+
424
+
425
+ def generate_user_api_scopes(config: AppConfig) -> list[str]:
426
+ """
427
+ Generate user API scopes from resources with on_behalf_of_user=True.
428
+
429
+ This function examines all resources in the config and collects the
430
+ API scopes needed for on-behalf-of-user authentication. Only valid
431
+ user API scopes are returned.
432
+
433
+ Args:
434
+ config: The AppConfig containing resource definitions
435
+
436
+ Returns:
437
+ A list of unique user API scopes needed for OBO authentication
438
+
439
+ Example:
440
+ >>> config = AppConfig.from_file("model_config.yaml")
441
+ >>> scopes = generate_user_api_scopes(config)
442
+ >>> print(scopes)
443
+ ['sql', 'serving.serving-endpoints', 'dashboards.genie']
444
+ """
445
+ scopes: set[str] = set()
446
+
447
+ if config.resources is None:
448
+ return []
449
+
450
+ # Collect all resources that have on_behalf_of_user=True
451
+ obo_resources: list[IsDatabricksResource] = []
452
+
453
+ # Check each resource category
454
+ for llm in config.resources.llms.values():
455
+ if llm.on_behalf_of_user:
456
+ obo_resources.append(llm)
457
+
458
+ for vs in config.resources.vector_stores.values():
459
+ if vs.on_behalf_of_user:
460
+ obo_resources.append(vs)
461
+
462
+ for warehouse in config.resources.warehouses.values():
463
+ if warehouse.on_behalf_of_user:
464
+ obo_resources.append(warehouse)
465
+
466
+ for genie in config.resources.genie_rooms.values():
467
+ if genie.on_behalf_of_user:
468
+ obo_resources.append(genie)
469
+
470
+ for volume in config.resources.volumes.values():
471
+ if volume.on_behalf_of_user:
472
+ obo_resources.append(volume)
473
+
474
+ for func in config.resources.functions.values():
475
+ if func.on_behalf_of_user:
476
+ obo_resources.append(func)
477
+
478
+ for conn in config.resources.connections.values():
479
+ if conn.on_behalf_of_user:
480
+ obo_resources.append(conn)
481
+
482
+ for db in config.resources.databases.values():
483
+ if db.on_behalf_of_user:
484
+ obo_resources.append(db)
485
+
486
+ for table in config.resources.tables.values():
487
+ if table.on_behalf_of_user:
488
+ obo_resources.append(table)
489
+
490
+ # Collect api_scopes from all OBO resources and map to user_api_scopes
491
+ for resource in obo_resources:
492
+ for api_scope in resource.api_scopes:
493
+ # Map the api_scope to a valid user_api_scope
494
+ if api_scope in API_SCOPE_TO_USER_SCOPE:
495
+ user_scope = API_SCOPE_TO_USER_SCOPE[api_scope]
496
+ if user_scope in VALID_USER_API_SCOPES:
497
+ scopes.add(user_scope)
498
+ elif api_scope in VALID_USER_API_SCOPES:
499
+ # Direct match
500
+ scopes.add(api_scope)
501
+
502
+ # Always add catalog read scopes if we have any table or function access
503
+ if any(isinstance(r, (TableModel, FunctionModel)) for r in obo_resources):
504
+ scopes.add("catalog.catalogs:read")
505
+ scopes.add("catalog.schemas:read")
506
+ scopes.add("catalog.tables:read")
507
+
508
+ # Sort for consistent ordering
509
+ result = sorted(scopes)
510
+ logger.info(f"Generated {len(result)} user API scopes for OBO resources: {result}")
511
+ return result
512
+
513
+
514
+ def _sanitize_resource_name(name: str) -> str:
515
+ """
516
+ Sanitize a resource name to meet Databricks Apps requirements.
517
+
518
+ Resource names must be:
519
+ - Between 2 and 30 characters
520
+ - Only contain alphanumeric characters, hyphens, and underscores
521
+
522
+ Args:
523
+ name: The original resource name
524
+
525
+ Returns:
526
+ A sanitized name that meets the requirements
527
+ """
528
+ # Replace dots and special characters with underscores
529
+ sanitized = name.replace(".", "_").replace("-", "_")
530
+
531
+ # Remove any characters that aren't alphanumeric or underscore
532
+ sanitized = "".join(c for c in sanitized if c.isalnum() or c == "_")
533
+
534
+ # Ensure minimum length of 2
535
+ if len(sanitized) < 2:
536
+ sanitized = sanitized + "_r"
537
+
538
+ # Truncate to maximum length of 30
539
+ if len(sanitized) > 30:
540
+ sanitized = sanitized[:30]
541
+
542
+ return sanitized
543
+
544
+
545
+ def generate_sdk_resources(
546
+ config: AppConfig,
547
+ experiment_id: str | None = None,
548
+ ) -> list[AppResource]:
549
+ """
550
+ Generate Databricks SDK AppResource objects from an AppConfig.
551
+
552
+ This function extracts all resources defined in the AppConfig and converts
553
+ them to SDK AppResource objects that can be passed to the Apps API when
554
+ creating or updating an app.
555
+
556
+ Args:
557
+ config: The AppConfig containing resource definitions
558
+ experiment_id: Optional MLflow experiment ID to add as a resource.
559
+ When provided, the experiment is added with CAN_EDIT permission,
560
+ allowing the app to log traces and runs.
561
+
562
+ Returns:
563
+ A list of AppResource objects for the Databricks SDK
564
+
565
+ Example:
566
+ >>> from databricks.sdk import WorkspaceClient
567
+ >>> from databricks.sdk.service.apps import App
568
+ >>> config = AppConfig.from_file("model_config.yaml")
569
+ >>> resources = generate_sdk_resources(config, experiment_id="12345")
570
+ >>> w = WorkspaceClient()
571
+ >>> app = App(name="my-app", resources=resources)
572
+ >>> w.apps.create_and_wait(app=app)
573
+ """
574
+ resources: list[AppResource] = []
575
+
576
+ # Add experiment resource if provided
577
+ if experiment_id:
578
+ resources.append(_extract_sdk_experiment_resource(experiment_id))
579
+
580
+ if config.resources is None:
581
+ logger.debug("No resources defined in config")
582
+ return resources
583
+
584
+ # Extract SDK resources from each category
585
+ resources.extend(_extract_sdk_llm_resources(config.resources.llms))
586
+ resources.extend(_extract_sdk_warehouse_resources(config.resources.warehouses))
587
+ resources.extend(_extract_sdk_genie_resources(config.resources.genie_rooms))
588
+ resources.extend(_extract_sdk_database_resources(config.resources.databases))
589
+ resources.extend(_extract_sdk_volume_resources(config.resources.volumes))
590
+
591
+ # Extract secrets from the entire config
592
+ resources.extend(_extract_sdk_secrets_from_config(config))
593
+
594
+ # Note: Vector search indexes, functions, and connections are not yet
595
+ # supported as app resources in the SDK
596
+
597
+ logger.info(f"Generated {len(resources)} SDK app resources from config")
598
+ return resources
599
+
600
+
601
+ def _extract_sdk_llm_resources(
602
+ llms: dict[str, LLMModel],
603
+ ) -> list[AppResource]:
604
+ """Extract SDK AppResource objects for model serving endpoints."""
605
+ resources: list[AppResource] = []
606
+ for key, llm in llms.items():
607
+ sanitized_name = _sanitize_resource_name(key)
608
+ resource = AppResource(
609
+ name=sanitized_name,
610
+ description=llm.description,
611
+ serving_endpoint=AppResourceServingEndpoint(
612
+ name=llm.name,
613
+ permission=AppResourceServingEndpointServingEndpointPermission.CAN_QUERY,
614
+ ),
615
+ )
616
+ resources.append(resource)
617
+ logger.debug(
618
+ f"Extracted SDK serving endpoint resource: {sanitized_name} -> {llm.name}"
619
+ )
620
+ return resources
621
+
622
+
623
+ def _extract_sdk_warehouse_resources(
624
+ warehouses: dict[str, WarehouseModel],
625
+ ) -> list[AppResource]:
626
+ """Extract SDK AppResource objects for SQL warehouses."""
627
+ resources: list[AppResource] = []
628
+ for key, warehouse in warehouses.items():
629
+ warehouse_id = value_of(warehouse.warehouse_id)
630
+ sanitized_name = _sanitize_resource_name(key)
631
+ resource = AppResource(
632
+ name=sanitized_name,
633
+ description=warehouse.description,
634
+ sql_warehouse=AppResourceSqlWarehouse(
635
+ id=warehouse_id,
636
+ permission=AppResourceSqlWarehouseSqlWarehousePermission.CAN_USE,
637
+ ),
638
+ )
639
+ resources.append(resource)
640
+ logger.debug(
641
+ f"Extracted SDK SQL warehouse resource: {sanitized_name} -> {warehouse_id}"
642
+ )
643
+ return resources
644
+
645
+
646
+ def _extract_sdk_genie_resources(
647
+ genie_rooms: dict[str, GenieRoomModel],
648
+ ) -> list[AppResource]:
649
+ """Extract SDK AppResource objects for Genie spaces."""
650
+ resources: list[AppResource] = []
651
+ for key, genie in genie_rooms.items():
652
+ space_id = value_of(genie.space_id)
653
+ sanitized_name = _sanitize_resource_name(key)
654
+ resource = AppResource(
655
+ name=sanitized_name,
656
+ description=genie.description,
657
+ genie_space=AppResourceGenieSpace(
658
+ name=genie.name or key,
659
+ space_id=space_id,
660
+ permission=AppResourceGenieSpaceGenieSpacePermission.CAN_RUN,
661
+ ),
662
+ )
663
+ resources.append(resource)
664
+ logger.debug(
665
+ f"Extracted SDK Genie space resource: {sanitized_name} -> {space_id}"
666
+ )
667
+ return resources
668
+
669
+
670
+ def _extract_sdk_database_resources(
671
+ databases: dict[str, DatabaseModel],
672
+ ) -> list[AppResource]:
673
+ """Extract SDK AppResource objects for Lakebase databases."""
674
+ resources: list[AppResource] = []
675
+ for key, db in databases.items():
676
+ # Only include Lakebase databases (those with instance_name)
677
+ if not db.is_lakebase:
678
+ continue
679
+ sanitized_name = _sanitize_resource_name(key)
680
+ # Use db.database for the actual database name (defaults to "databricks_postgres")
681
+ # db.name is just the config key/description, not the actual database name
682
+ database_name = value_of(db.database) if db.database else "databricks_postgres"
683
+ resource = AppResource(
684
+ name=sanitized_name,
685
+ description=db.description,
686
+ database=AppResourceDatabase(
687
+ instance_name=db.instance_name,
688
+ database_name=database_name,
689
+ permission=AppResourceDatabaseDatabasePermission.CAN_CONNECT_AND_CREATE,
690
+ ),
691
+ )
692
+ resources.append(resource)
693
+ logger.debug(
694
+ f"Extracted SDK database resource: {sanitized_name} -> "
695
+ f"{db.instance_name}/{database_name}"
696
+ )
697
+ return resources
698
+
699
+
700
+ def _extract_sdk_volume_resources(
701
+ volumes: dict[str, VolumeModel],
702
+ ) -> list[AppResource]:
703
+ """Extract SDK AppResource objects for Unity Catalog volumes."""
704
+ resources: list[AppResource] = []
705
+ for key, volume in volumes.items():
706
+ sanitized_name = _sanitize_resource_name(key)
707
+ resource = AppResource(
708
+ name=sanitized_name,
709
+ uc_securable=AppResourceUcSecurable(
710
+ securable_full_name=volume.full_name,
711
+ securable_type=AppResourceUcSecurableUcSecurableType.VOLUME,
712
+ permission=AppResourceUcSecurableUcSecurablePermission.READ_VOLUME,
713
+ ),
714
+ )
715
+ resources.append(resource)
716
+ logger.debug(
717
+ f"Extracted SDK volume resource: {sanitized_name} -> {volume.full_name}"
718
+ )
719
+ return resources
720
+
721
+
722
+ def _extract_sdk_experiment_resource(
723
+ experiment_id: str,
724
+ resource_name: str = "experiment",
725
+ ) -> AppResource:
726
+ """Create SDK AppResource for MLflow experiment.
727
+
728
+ This allows the Databricks App to log traces and runs to the specified
729
+ MLflow experiment. The experiment ID is exposed via the MLFLOW_EXPERIMENT_ID
730
+ environment variable using valueFrom: experiment in app.yaml.
731
+
732
+ Args:
733
+ experiment_id: The MLflow experiment ID
734
+ resource_name: The resource key name (default: "experiment")
735
+
736
+ Returns:
737
+ An AppResource for the MLflow experiment
738
+ """
739
+ resource = AppResource(
740
+ name=resource_name,
741
+ experiment=AppResourceExperiment(
742
+ experiment_id=experiment_id,
743
+ permission=AppResourceExperimentExperimentPermission.CAN_EDIT,
744
+ ),
745
+ )
746
+ logger.debug(
747
+ f"Extracted SDK experiment resource: {resource_name} -> {experiment_id}"
748
+ )
749
+ return resource
750
+
751
+
752
+ def _extract_sdk_secrets_from_config(config: AppConfig) -> list[AppResource]:
753
+ """
754
+ Extract SDK AppResource objects for all secrets referenced in the config.
755
+
756
+ This function walks through the entire config object to find all
757
+ SecretVariableModel instances and creates AppResource objects with
758
+ READ permission for each unique scope/key pair.
759
+
760
+ Args:
761
+ config: The AppConfig containing secret references
762
+
763
+ Returns:
764
+ A list of AppResource objects for secrets
765
+ """
766
+ secrets: dict[tuple[str, str], AppResource] = {}
767
+ used_names: set[str] = set()
768
+
769
+ def get_unique_resource_name(base_name: str) -> str:
770
+ """Generate a unique resource name, adding suffix if needed."""
771
+ sanitized = _sanitize_resource_name(base_name)
772
+ if sanitized not in used_names:
773
+ used_names.add(sanitized)
774
+ return sanitized
775
+ # Name collision - add numeric suffix
776
+ counter = 1
777
+ while True:
778
+ # Leave room for suffix (e.g., "_1", "_2", etc.)
779
+ suffix = f"_{counter}"
780
+ max_base_len = 30 - len(suffix)
781
+ candidate = sanitized[:max_base_len] + suffix
782
+ if candidate not in used_names:
783
+ used_names.add(candidate)
784
+ return candidate
785
+ counter += 1
786
+
787
+ def extract_from_value(value: Any) -> None:
788
+ """Recursively extract secrets from any value."""
789
+ if isinstance(value, SecretVariableModel):
790
+ secret_key = (value.scope, value.secret)
791
+ if secret_key not in secrets:
792
+ # Create a unique name for the secret resource
793
+ base_name = f"{value.scope}_{value.secret}".replace("-", "_").replace(
794
+ "/", "_"
795
+ )
796
+ resource_name = get_unique_resource_name(base_name)
797
+
798
+ resource = AppResource(
799
+ name=resource_name,
800
+ secret=AppResourceSecret(
801
+ scope=value.scope,
802
+ key=value.secret,
803
+ permission=AppResourceSecretSecretPermission.READ,
804
+ ),
805
+ )
806
+ secrets[secret_key] = resource
807
+ logger.debug(
808
+ f"Found secret for SDK resource: {value.scope}/{value.secret} -> resource: {resource_name}"
809
+ )
810
+ elif isinstance(value, dict):
811
+ for v in value.values():
812
+ extract_from_value(v)
813
+ elif isinstance(value, (list, tuple)):
814
+ for v in value:
815
+ extract_from_value(v)
816
+ elif hasattr(value, "__dict__"):
817
+ # Handle Pydantic models and other objects with __dict__
818
+ for k, v in value.__dict__.items():
819
+ if not k.startswith("_"): # Skip private attributes
820
+ extract_from_value(v)
821
+
822
+ # Walk through the entire config
823
+ extract_from_value(config)
824
+
825
+ resources = list(secrets.values())
826
+ logger.info(f"Extracted {len(resources)} SDK secret resources from config")
827
+ return resources
828
+
829
+
830
+ def generate_resources_yaml(config: AppConfig) -> str:
831
+ """
832
+ Generate the resources section of app.yaml as a YAML string.
833
+
834
+ Args:
835
+ config: The AppConfig containing resource definitions
836
+
837
+ Returns:
838
+ A YAML-formatted string for the resources section
839
+ """
840
+ import yaml
841
+
842
+ resources = generate_app_resources(config)
843
+ if not resources:
844
+ return ""
845
+
846
+ return yaml.dump(
847
+ {"resources": resources}, default_flow_style=False, sort_keys=False
848
+ )
849
+
850
+
851
+ def _extract_env_vars_from_config(config: AppConfig) -> list[dict[str, str]]:
852
+ """
853
+ Extract environment variables from config.app.environment_vars for app.yaml.
854
+
855
+ This function converts the environment_vars dict from AppConfig into the
856
+ format expected by Databricks Apps. For each variable:
857
+ - EnvironmentVariableModel: Creates env var with "value" (the env var name)
858
+ - SecretVariableModel: Creates env var with "valueFrom" referencing the secret resource
859
+ - CompositeVariableModel: Uses the first option in the list to determine the type
860
+ - Plain strings: Creates env var with "value"
861
+
862
+ Args:
863
+ config: The AppConfig containing environment variable definitions
864
+
865
+ Returns:
866
+ A list of environment variable dictionaries for app.yaml
867
+
868
+ Example:
869
+ >>> config = AppConfig.from_file("model_config.yaml")
870
+ >>> env_vars = _extract_env_vars_from_config(config)
871
+ >>> # Returns:
872
+ >>> # [
873
+ >>> # {"name": "API_KEY", "valueFrom": "my_scope_api_key"},
874
+ >>> # {"name": "LOG_LEVEL", "value": "INFO"},
875
+ >>> # ]
876
+ """
877
+ env_vars: list[dict[str, str]] = []
878
+
879
+ if config.app is None:
880
+ return env_vars
881
+
882
+ environment_vars = config.app.environment_vars
883
+ if not environment_vars:
884
+ return env_vars
885
+
886
+ for var_name, var_value in environment_vars.items():
887
+ env_entry: dict[str, str] = {"name": var_name}
888
+
889
+ # Determine the type of the variable and create appropriate entry
890
+ resolved_type = _resolve_variable_type(var_value)
891
+
892
+ if resolved_type is None:
893
+ # Plain value - use as-is
894
+ if isinstance(var_value, str):
895
+ env_entry["value"] = var_value
896
+ else:
897
+ env_entry["value"] = str(var_value)
898
+ elif isinstance(resolved_type, SecretVariableModel):
899
+ # Secret reference - use valueFrom with sanitized resource name
900
+ resource_name = f"{resolved_type.scope}_{resolved_type.secret}".replace(
901
+ "-", "_"
902
+ ).replace("/", "_")
903
+ resource_name = _sanitize_resource_name(resource_name)
904
+ env_entry["valueFrom"] = resource_name
905
+ logger.debug(
906
+ f"Environment variable {var_name} references secret: "
907
+ f"{resolved_type.scope}/{resolved_type.secret}"
908
+ )
909
+ elif isinstance(resolved_type, EnvironmentVariableModel):
910
+ # Environment variable - resolve the value
911
+ resolved_value = value_of(resolved_type)
912
+ if resolved_value is not None:
913
+ env_entry["value"] = str(resolved_value)
914
+ elif resolved_type.default_value is not None:
915
+ env_entry["value"] = str(resolved_type.default_value)
916
+ else:
917
+ # Skip if no value can be resolved
918
+ logger.warning(
919
+ f"Environment variable {var_name} has no value "
920
+ f"(env: {resolved_type.env})"
921
+ )
922
+ continue
923
+ else:
924
+ # Other types - convert to string
925
+ env_entry["value"] = str(var_value)
926
+
927
+ env_vars.append(env_entry)
928
+ logger.debug(f"Extracted environment variable: {var_name}")
929
+
930
+ logger.info(f"Extracted {len(env_vars)} environment variables from config")
931
+ return env_vars
932
+
933
+
934
+ def _resolve_variable_type(
935
+ value: Any,
936
+ ) -> SecretVariableModel | EnvironmentVariableModel | None:
937
+ """
938
+ Resolve the type of a variable for environment variable extraction.
939
+
940
+ For CompositeVariableModel, returns the first option in the list to
941
+ determine whether to use value or valueFrom in the app.yaml.
942
+
943
+ Args:
944
+ value: The variable value to analyze
945
+
946
+ Returns:
947
+ The resolved variable model (SecretVariableModel or EnvironmentVariableModel),
948
+ or None if it's a plain value
949
+ """
950
+ if isinstance(value, SecretVariableModel):
951
+ return value
952
+ elif isinstance(value, EnvironmentVariableModel):
953
+ return value
954
+ elif isinstance(value, CompositeVariableModel):
955
+ # Use the first option to determine the type
956
+ if value.options:
957
+ first_option = value.options[0]
958
+ return _resolve_variable_type(first_option)
959
+ return None
960
+ else:
961
+ # Plain value (str, int, etc.) or PrimitiveVariableModel
962
+ return None
963
+
964
+
965
+ def generate_app_yaml(
966
+ config: AppConfig,
967
+ command: str | list[str] | None = None,
968
+ include_resources: bool = True,
969
+ ) -> str:
970
+ """
971
+ Generate a complete app.yaml for Databricks Apps deployment.
972
+
973
+ This function creates a complete app.yaml configuration file that includes:
974
+ - Command to run the app
975
+ - Environment variables for MLflow and dao-ai
976
+ - Resources extracted from the AppConfig (if include_resources is True)
977
+
978
+ Args:
979
+ config: The AppConfig containing deployment configuration
980
+ command: Optional custom command. If not provided, uses default dao-ai app_server
981
+ include_resources: Whether to include the resources section (default: True)
982
+
983
+ Returns:
984
+ A complete app.yaml as a string
985
+
986
+ Example:
987
+ >>> config = AppConfig.from_file("model_config.yaml")
988
+ >>> app_yaml = generate_app_yaml(config)
989
+ >>> print(app_yaml)
990
+ """
991
+ import yaml
992
+
993
+ # Build the app.yaml structure
994
+ app_config: dict[str, Any] = {}
995
+
996
+ # Command section
997
+ if command is None:
998
+ app_config["command"] = [
999
+ "/bin/bash",
1000
+ "-c",
1001
+ "pip install dao-ai && python -m dao_ai.apps.server",
1002
+ ]
1003
+ elif isinstance(command, str):
1004
+ app_config["command"] = [command]
1005
+ else:
1006
+ app_config["command"] = command
1007
+
1008
+ # Base environment variables for MLflow and dao-ai
1009
+ env_vars: list[dict[str, str]] = [
1010
+ {"name": "MLFLOW_TRACKING_URI", "value": "databricks"},
1011
+ {"name": "MLFLOW_REGISTRY_URI", "value": "databricks-uc"},
1012
+ {"name": "MLFLOW_EXPERIMENT_ID", "valueFrom": "experiment"},
1013
+ {"name": "DAO_AI_CONFIG_PATH", "value": "dao_ai.yaml"},
1014
+ ]
1015
+
1016
+ # Extract environment variables from config.app.environment_vars
1017
+ config_env_vars = _extract_env_vars_from_config(config)
1018
+
1019
+ # Environment variables that are automatically provided by Databricks Apps
1020
+ # and should not be included in app.yaml
1021
+ platform_provided_env_vars = {"DATABRICKS_HOST"}
1022
+
1023
+ # Filter out platform-provided env vars from config
1024
+ config_env_vars = [
1025
+ e for e in config_env_vars if e["name"] not in platform_provided_env_vars
1026
+ ]
1027
+
1028
+ # Merge config env vars, avoiding duplicates (config takes precedence)
1029
+ base_env_names = {e["name"] for e in env_vars}
1030
+ for config_env in config_env_vars:
1031
+ if config_env["name"] not in base_env_names:
1032
+ env_vars.append(config_env)
1033
+ else:
1034
+ # Config env var takes precedence - replace the base one
1035
+ env_vars = [e for e in env_vars if e["name"] != config_env["name"]]
1036
+ env_vars.append(config_env)
1037
+
1038
+ app_config["env"] = env_vars
1039
+
1040
+ # Resources section (if requested)
1041
+ if include_resources:
1042
+ resources = generate_app_resources(config)
1043
+ if resources:
1044
+ app_config["resources"] = resources
1045
+
1046
+ return yaml.dump(app_config, default_flow_style=False, sort_keys=False)
1047
+
1048
+
1049
+ def get_resource_env_mappings(config: AppConfig) -> list[dict[str, Any]]:
1050
+ """
1051
+ Generate environment variable mappings that reference app resources.
1052
+
1053
+ This creates environment variables that use `valueFrom` to reference
1054
+ configured resources, allowing the app to access resource values at runtime.
1055
+
1056
+ Args:
1057
+ config: The AppConfig containing resource definitions
1058
+
1059
+ Returns:
1060
+ A list of environment variable definitions with valueFrom references
1061
+
1062
+ Example:
1063
+ >>> env_vars = get_resource_env_mappings(config)
1064
+ >>> # Returns:
1065
+ >>> # [
1066
+ >>> # {"name": "SQL_WAREHOUSE_ID", "valueFrom": "default_warehouse"},
1067
+ >>> # ...
1068
+ >>> # ]
1069
+ """
1070
+ env_mappings: list[dict[str, Any]] = []
1071
+
1072
+ if config.resources is None:
1073
+ return env_mappings
1074
+
1075
+ # Map warehouse IDs
1076
+ for key, warehouse in config.resources.warehouses.items():
1077
+ env_mappings.append(
1078
+ {
1079
+ "name": f"{key.upper()}_WAREHOUSE_ID",
1080
+ "valueFrom": key,
1081
+ }
1082
+ )
1083
+
1084
+ # Map serving endpoint names
1085
+ for key, llm in config.resources.llms.items():
1086
+ env_mappings.append(
1087
+ {
1088
+ "name": f"{key.upper()}_ENDPOINT",
1089
+ "valueFrom": key,
1090
+ }
1091
+ )
1092
+
1093
+ # Map Genie space IDs
1094
+ for key, genie in config.resources.genie_rooms.items():
1095
+ env_mappings.append(
1096
+ {
1097
+ "name": f"{key.upper()}_SPACE_ID",
1098
+ "valueFrom": key,
1099
+ }
1100
+ )
1101
+
1102
+ # Map vector search indexes
1103
+ for key, vs in config.resources.vector_stores.items():
1104
+ if vs.index:
1105
+ env_mappings.append(
1106
+ {
1107
+ "name": f"{key.upper()}_INDEX",
1108
+ "valueFrom": key,
1109
+ }
1110
+ )
1111
+
1112
+ # Map database instances
1113
+ for key, db in config.resources.databases.items():
1114
+ if db.is_lakebase:
1115
+ env_mappings.append(
1116
+ {
1117
+ "name": f"{key.upper()}_DATABASE",
1118
+ "valueFrom": key,
1119
+ }
1120
+ )
1121
+
1122
+ return env_mappings