dao-ai 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dao_ai/cli.py +104 -18
- dao_ai/config.py +149 -40
- dao_ai/providers/databricks.py +11 -0
- dao_ai/vector_search.py +37 -0
- {dao_ai-0.1.2.dist-info → dao_ai-0.1.4.dist-info}/METADATA +36 -2
- {dao_ai-0.1.2.dist-info → dao_ai-0.1.4.dist-info}/RECORD +9 -9
- {dao_ai-0.1.2.dist-info → dao_ai-0.1.4.dist-info}/WHEEL +0 -0
- {dao_ai-0.1.2.dist-info → dao_ai-0.1.4.dist-info}/entry_points.txt +0 -0
- {dao_ai-0.1.2.dist-info → dao_ai-0.1.4.dist-info}/licenses/LICENSE +0 -0
dao_ai/cli.py
CHANGED
|
@@ -47,6 +47,57 @@ def get_default_user_id() -> str:
|
|
|
47
47
|
return local_user
|
|
48
48
|
|
|
49
49
|
|
|
50
|
+
def detect_cloud_provider(profile: Optional[str] = None) -> Optional[str]:
|
|
51
|
+
"""
|
|
52
|
+
Detect the cloud provider from the Databricks workspace URL.
|
|
53
|
+
|
|
54
|
+
The cloud provider is determined by the workspace URL pattern:
|
|
55
|
+
- Azure: *.azuredatabricks.net
|
|
56
|
+
- AWS: *.cloud.databricks.com (without gcp subdomain)
|
|
57
|
+
- GCP: *.gcp.databricks.com
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
profile: Optional Databricks CLI profile name
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
Cloud provider string ('azure', 'aws', 'gcp') or None if detection fails
|
|
64
|
+
"""
|
|
65
|
+
try:
|
|
66
|
+
from databricks.sdk import WorkspaceClient
|
|
67
|
+
|
|
68
|
+
# Create workspace client with optional profile
|
|
69
|
+
if profile:
|
|
70
|
+
w = WorkspaceClient(profile=profile)
|
|
71
|
+
else:
|
|
72
|
+
w = WorkspaceClient()
|
|
73
|
+
|
|
74
|
+
# Get the workspace URL from config
|
|
75
|
+
host = w.config.host
|
|
76
|
+
if not host:
|
|
77
|
+
logger.warning("Could not determine workspace URL for cloud detection")
|
|
78
|
+
return None
|
|
79
|
+
|
|
80
|
+
host_lower = host.lower()
|
|
81
|
+
|
|
82
|
+
if "azuredatabricks.net" in host_lower:
|
|
83
|
+
logger.debug(f"Detected Azure cloud from workspace URL: {host}")
|
|
84
|
+
return "azure"
|
|
85
|
+
elif ".gcp.databricks.com" in host_lower:
|
|
86
|
+
logger.debug(f"Detected GCP cloud from workspace URL: {host}")
|
|
87
|
+
return "gcp"
|
|
88
|
+
elif ".cloud.databricks.com" in host_lower or "databricks.com" in host_lower:
|
|
89
|
+
# AWS uses *.cloud.databricks.com or regional patterns
|
|
90
|
+
logger.debug(f"Detected AWS cloud from workspace URL: {host}")
|
|
91
|
+
return "aws"
|
|
92
|
+
else:
|
|
93
|
+
logger.warning(f"Could not determine cloud provider from URL: {host}")
|
|
94
|
+
return None
|
|
95
|
+
|
|
96
|
+
except Exception as e:
|
|
97
|
+
logger.warning(f"Could not detect cloud provider: {e}")
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
|
|
50
101
|
env_path: str = find_dotenv()
|
|
51
102
|
if env_path:
|
|
52
103
|
logger.info(f"Loading environment variables from: {env_path}")
|
|
@@ -220,6 +271,13 @@ Examples:
|
|
|
220
271
|
"-t",
|
|
221
272
|
"--target",
|
|
222
273
|
type=str,
|
|
274
|
+
help="Bundle target name (default: auto-generated from app name and cloud)",
|
|
275
|
+
)
|
|
276
|
+
bundle_parser.add_argument(
|
|
277
|
+
"--cloud",
|
|
278
|
+
type=str,
|
|
279
|
+
choices=["azure", "aws", "gcp"],
|
|
280
|
+
help="Cloud provider (auto-detected from workspace URL if not specified)",
|
|
223
281
|
)
|
|
224
282
|
bundle_parser.add_argument(
|
|
225
283
|
"--dry-run",
|
|
@@ -676,7 +734,7 @@ def generate_bundle_from_template(config_path: Path, app_name: str) -> Path:
|
|
|
676
734
|
4. Returns the path to the generated file
|
|
677
735
|
|
|
678
736
|
The generated databricks.yaml is overwritten on each deployment and is not tracked in git.
|
|
679
|
-
|
|
737
|
+
The template contains cloud-specific targets (azure, aws, gcp) with appropriate node types.
|
|
680
738
|
|
|
681
739
|
Args:
|
|
682
740
|
config_path: Path to the app config file
|
|
@@ -713,39 +771,59 @@ def run_databricks_command(
|
|
|
713
771
|
profile: Optional[str] = None,
|
|
714
772
|
config: Optional[str] = None,
|
|
715
773
|
target: Optional[str] = None,
|
|
774
|
+
cloud: Optional[str] = None,
|
|
716
775
|
dry_run: bool = False,
|
|
717
776
|
) -> None:
|
|
718
|
-
"""Execute a databricks CLI command with optional profile and
|
|
777
|
+
"""Execute a databricks CLI command with optional profile, target, and cloud.
|
|
778
|
+
|
|
779
|
+
Args:
|
|
780
|
+
command: The databricks CLI command to execute (e.g., ["bundle", "deploy"])
|
|
781
|
+
profile: Optional Databricks CLI profile name
|
|
782
|
+
config: Optional path to the configuration file
|
|
783
|
+
target: Optional bundle target name (if not provided, auto-generated from app name and cloud)
|
|
784
|
+
cloud: Optional cloud provider ('azure', 'aws', 'gcp'). Auto-detected if not specified.
|
|
785
|
+
dry_run: If True, print the command without executing
|
|
786
|
+
"""
|
|
719
787
|
config_path = Path(config) if config else None
|
|
720
788
|
|
|
721
789
|
if config_path and not config_path.exists():
|
|
722
790
|
logger.error(f"Configuration file {config_path} does not exist.")
|
|
723
791
|
sys.exit(1)
|
|
724
792
|
|
|
725
|
-
# Load app config
|
|
793
|
+
# Load app config
|
|
726
794
|
app_config: AppConfig = AppConfig.from_file(config_path) if config_path else None
|
|
727
795
|
normalized_name: str = normalize_name(app_config.app.name) if app_config else None
|
|
728
796
|
|
|
797
|
+
# Auto-detect cloud provider if not specified
|
|
798
|
+
if not cloud:
|
|
799
|
+
cloud = detect_cloud_provider(profile)
|
|
800
|
+
if cloud:
|
|
801
|
+
logger.info(f"Auto-detected cloud provider: {cloud}")
|
|
802
|
+
else:
|
|
803
|
+
logger.warning("Could not detect cloud provider. Defaulting to 'azure'.")
|
|
804
|
+
cloud = "azure"
|
|
805
|
+
|
|
729
806
|
# Generate app-specific bundle from template (overwrites databricks.yaml temporarily)
|
|
730
807
|
if config_path and app_config:
|
|
731
808
|
generate_bundle_from_template(config_path, normalized_name)
|
|
732
809
|
|
|
733
|
-
# Use
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
target
|
|
737
|
-
logger.debug(f"Using app-specific target: {target}")
|
|
810
|
+
# Use cloud as target (azure, aws, gcp) - can be overridden with explicit --target
|
|
811
|
+
if not target:
|
|
812
|
+
target = cloud
|
|
813
|
+
logger.debug(f"Using cloud-based target: {target}")
|
|
738
814
|
|
|
739
|
-
# Build databricks command
|
|
815
|
+
# Build databricks command
|
|
816
|
+
# --profile is a global flag, --target is a subcommand flag for 'bundle'
|
|
740
817
|
cmd = ["databricks"]
|
|
741
818
|
if profile:
|
|
742
819
|
cmd.extend(["--profile", profile])
|
|
743
820
|
|
|
821
|
+
cmd.extend(command)
|
|
822
|
+
|
|
823
|
+
# --target must come after the bundle subcommand (it's a subcommand-specific flag)
|
|
744
824
|
if target:
|
|
745
825
|
cmd.extend(["--target", target])
|
|
746
826
|
|
|
747
|
-
cmd.extend(command)
|
|
748
|
-
|
|
749
827
|
# Add config_path variable for notebooks
|
|
750
828
|
if config_path and app_config:
|
|
751
829
|
# Calculate relative path from notebooks directory to config file
|
|
@@ -800,30 +878,38 @@ def handle_bundle_command(options: Namespace) -> None:
|
|
|
800
878
|
profile: Optional[str] = options.profile
|
|
801
879
|
config: Optional[str] = options.config
|
|
802
880
|
target: Optional[str] = options.target
|
|
881
|
+
cloud: Optional[str] = options.cloud
|
|
803
882
|
dry_run: bool = options.dry_run
|
|
804
883
|
|
|
805
884
|
if options.deploy:
|
|
806
885
|
logger.info("Deploying DAO AI asset bundle...")
|
|
807
886
|
run_databricks_command(
|
|
808
|
-
["bundle", "deploy"],
|
|
887
|
+
["bundle", "deploy"],
|
|
888
|
+
profile=profile,
|
|
889
|
+
config=config,
|
|
890
|
+
target=target,
|
|
891
|
+
cloud=cloud,
|
|
892
|
+
dry_run=dry_run,
|
|
809
893
|
)
|
|
810
894
|
if options.run:
|
|
811
895
|
logger.info("Running DAO AI system with current configuration...")
|
|
812
896
|
# Use static job resource key that matches databricks.yaml (resources.jobs.deploy_job)
|
|
813
897
|
run_databricks_command(
|
|
814
898
|
["bundle", "run", "deploy_job"],
|
|
815
|
-
profile,
|
|
816
|
-
config,
|
|
817
|
-
target,
|
|
899
|
+
profile=profile,
|
|
900
|
+
config=config,
|
|
901
|
+
target=target,
|
|
902
|
+
cloud=cloud,
|
|
818
903
|
dry_run=dry_run,
|
|
819
904
|
)
|
|
820
905
|
if options.destroy:
|
|
821
906
|
logger.info("Destroying DAO AI system with current configuration...")
|
|
822
907
|
run_databricks_command(
|
|
823
908
|
["bundle", "destroy", "--auto-approve"],
|
|
824
|
-
profile,
|
|
825
|
-
config,
|
|
826
|
-
target,
|
|
909
|
+
profile=profile,
|
|
910
|
+
config=config,
|
|
911
|
+
target=target,
|
|
912
|
+
cloud=cloud,
|
|
827
913
|
dry_run=dry_run,
|
|
828
914
|
)
|
|
829
915
|
else:
|
dao_ai/config.py
CHANGED
|
@@ -601,6 +601,8 @@ class VectorSearchEndpoint(BaseModel):
|
|
|
601
601
|
|
|
602
602
|
|
|
603
603
|
class IndexModel(IsDatabricksResource, HasFullName):
|
|
604
|
+
"""Model representing a Databricks Vector Search index."""
|
|
605
|
+
|
|
604
606
|
model_config = ConfigDict(use_enum_values=True, extra="forbid")
|
|
605
607
|
schema_model: Optional[SchemaModel] = Field(default=None, alias="schema")
|
|
606
608
|
name: str
|
|
@@ -624,6 +626,22 @@ class IndexModel(IsDatabricksResource, HasFullName):
|
|
|
624
626
|
)
|
|
625
627
|
]
|
|
626
628
|
|
|
629
|
+
def exists(self) -> bool:
|
|
630
|
+
"""Check if this vector search index exists.
|
|
631
|
+
|
|
632
|
+
Returns:
|
|
633
|
+
True if the index exists, False otherwise.
|
|
634
|
+
"""
|
|
635
|
+
try:
|
|
636
|
+
self.workspace_client.vector_search_indexes.get_index(self.full_name)
|
|
637
|
+
return True
|
|
638
|
+
except NotFound:
|
|
639
|
+
logger.debug(f"Index not found: {self.full_name}")
|
|
640
|
+
return False
|
|
641
|
+
except Exception as e:
|
|
642
|
+
logger.warning(f"Error checking index existence for {self.full_name}: {e}")
|
|
643
|
+
return False
|
|
644
|
+
|
|
627
645
|
|
|
628
646
|
class FunctionModel(IsDatabricksResource, HasFullName):
|
|
629
647
|
model_config = ConfigDict(use_enum_values=True, extra="forbid")
|
|
@@ -1009,27 +1027,92 @@ class VolumePathModel(BaseModel, HasFullName):
|
|
|
1009
1027
|
|
|
1010
1028
|
|
|
1011
1029
|
class VectorStoreModel(IsDatabricksResource):
|
|
1030
|
+
"""
|
|
1031
|
+
Configuration model for a Databricks Vector Search store.
|
|
1032
|
+
|
|
1033
|
+
Supports two modes:
|
|
1034
|
+
1. **Use Existing Index**: Provide only `index` (fully qualified name).
|
|
1035
|
+
Used for querying an existing vector search index at runtime.
|
|
1036
|
+
2. **Provisioning Mode**: Provide `source_table` + `embedding_source_column`.
|
|
1037
|
+
Used for creating a new vector search index.
|
|
1038
|
+
|
|
1039
|
+
Examples:
|
|
1040
|
+
Minimal configuration (use existing index):
|
|
1041
|
+
```yaml
|
|
1042
|
+
vector_stores:
|
|
1043
|
+
products_search:
|
|
1044
|
+
index:
|
|
1045
|
+
name: catalog.schema.my_index
|
|
1046
|
+
```
|
|
1047
|
+
|
|
1048
|
+
Full provisioning configuration:
|
|
1049
|
+
```yaml
|
|
1050
|
+
vector_stores:
|
|
1051
|
+
products_search:
|
|
1052
|
+
source_table:
|
|
1053
|
+
schema: *my_schema
|
|
1054
|
+
name: products
|
|
1055
|
+
embedding_source_column: description
|
|
1056
|
+
endpoint:
|
|
1057
|
+
name: my_endpoint
|
|
1058
|
+
```
|
|
1059
|
+
"""
|
|
1060
|
+
|
|
1012
1061
|
model_config = ConfigDict(use_enum_values=True, extra="forbid")
|
|
1013
|
-
|
|
1062
|
+
|
|
1063
|
+
# RUNTIME: Only index is truly required for querying existing indexes
|
|
1014
1064
|
index: Optional[IndexModel] = None
|
|
1065
|
+
|
|
1066
|
+
# PROVISIONING ONLY: Required when creating a new index
|
|
1067
|
+
source_table: Optional[TableModel] = None
|
|
1068
|
+
embedding_source_column: Optional[str] = None
|
|
1069
|
+
embedding_model: Optional[LLMModel] = None
|
|
1015
1070
|
endpoint: Optional[VectorSearchEndpoint] = None
|
|
1016
|
-
|
|
1071
|
+
|
|
1072
|
+
# OPTIONAL: For both modes
|
|
1017
1073
|
source_path: Optional[VolumePathModel] = None
|
|
1018
1074
|
checkpoint_path: Optional[VolumePathModel] = None
|
|
1019
1075
|
primary_key: Optional[str] = None
|
|
1020
1076
|
columns: Optional[list[str]] = Field(default_factory=list)
|
|
1021
1077
|
doc_uri: Optional[str] = None
|
|
1022
|
-
|
|
1078
|
+
|
|
1079
|
+
@model_validator(mode="after")
|
|
1080
|
+
def validate_configuration_mode(self) -> Self:
|
|
1081
|
+
"""
|
|
1082
|
+
Validate that configuration is valid for either:
|
|
1083
|
+
- Use existing mode: index is provided
|
|
1084
|
+
- Provisioning mode: source_table + embedding_source_column provided
|
|
1085
|
+
"""
|
|
1086
|
+
has_index = self.index is not None
|
|
1087
|
+
has_source_table = self.source_table is not None
|
|
1088
|
+
has_embedding_col = self.embedding_source_column is not None
|
|
1089
|
+
|
|
1090
|
+
# Must have at least index OR source_table
|
|
1091
|
+
if not has_index and not has_source_table:
|
|
1092
|
+
raise ValueError(
|
|
1093
|
+
"Either 'index' (for existing indexes) or 'source_table' "
|
|
1094
|
+
"(for provisioning) must be provided"
|
|
1095
|
+
)
|
|
1096
|
+
|
|
1097
|
+
# If provisioning mode, need embedding_source_column
|
|
1098
|
+
if has_source_table and not has_embedding_col:
|
|
1099
|
+
raise ValueError(
|
|
1100
|
+
"embedding_source_column is required when source_table is provided (provisioning mode)"
|
|
1101
|
+
)
|
|
1102
|
+
|
|
1103
|
+
return self
|
|
1023
1104
|
|
|
1024
1105
|
@model_validator(mode="after")
|
|
1025
1106
|
def set_default_embedding_model(self) -> Self:
|
|
1026
|
-
|
|
1107
|
+
# Only set default embedding model in provisioning mode
|
|
1108
|
+
if self.source_table is not None and not self.embedding_model:
|
|
1027
1109
|
self.embedding_model = LLMModel(name="databricks-gte-large-en")
|
|
1028
1110
|
return self
|
|
1029
1111
|
|
|
1030
1112
|
@model_validator(mode="after")
|
|
1031
1113
|
def set_default_primary_key(self) -> Self:
|
|
1032
|
-
|
|
1114
|
+
# Only auto-discover primary key in provisioning mode
|
|
1115
|
+
if self.primary_key is None and self.source_table is not None:
|
|
1033
1116
|
from dao_ai.providers.databricks import DatabricksProvider
|
|
1034
1117
|
|
|
1035
1118
|
provider: DatabricksProvider = DatabricksProvider()
|
|
@@ -1050,14 +1133,16 @@ class VectorStoreModel(IsDatabricksResource):
|
|
|
1050
1133
|
|
|
1051
1134
|
@model_validator(mode="after")
|
|
1052
1135
|
def set_default_index(self) -> Self:
|
|
1053
|
-
|
|
1136
|
+
# Only generate index from source_table in provisioning mode
|
|
1137
|
+
if self.index is None and self.source_table is not None:
|
|
1054
1138
|
name: str = f"{self.source_table.name}_index"
|
|
1055
1139
|
self.index = IndexModel(schema=self.source_table.schema_model, name=name)
|
|
1056
1140
|
return self
|
|
1057
1141
|
|
|
1058
1142
|
@model_validator(mode="after")
|
|
1059
1143
|
def set_default_endpoint(self) -> Self:
|
|
1060
|
-
|
|
1144
|
+
# Only find/create endpoint in provisioning mode
|
|
1145
|
+
if self.endpoint is None and self.source_table is not None:
|
|
1061
1146
|
from dao_ai.providers.databricks import (
|
|
1062
1147
|
DatabricksProvider,
|
|
1063
1148
|
with_available_indexes,
|
|
@@ -1092,18 +1177,60 @@ class VectorStoreModel(IsDatabricksResource):
|
|
|
1092
1177
|
return self.index.as_resources()
|
|
1093
1178
|
|
|
1094
1179
|
def as_index(self, vsc: VectorSearchClient | None = None) -> VectorSearchIndex:
|
|
1095
|
-
from dao_ai.providers.base import ServiceProvider
|
|
1096
1180
|
from dao_ai.providers.databricks import DatabricksProvider
|
|
1097
1181
|
|
|
1098
|
-
provider:
|
|
1182
|
+
provider: DatabricksProvider = DatabricksProvider(vsc=vsc)
|
|
1099
1183
|
index: VectorSearchIndex = provider.get_vector_index(self)
|
|
1100
1184
|
return index
|
|
1101
1185
|
|
|
1102
1186
|
def create(self, vsc: VectorSearchClient | None = None) -> None:
|
|
1103
|
-
|
|
1187
|
+
"""
|
|
1188
|
+
Create or validate the vector search index.
|
|
1189
|
+
|
|
1190
|
+
Behavior depends on configuration mode:
|
|
1191
|
+
- **Provisioning Mode** (source_table provided): Creates the index
|
|
1192
|
+
- **Use Existing Mode** (only index provided): Validates the index exists
|
|
1193
|
+
|
|
1194
|
+
Args:
|
|
1195
|
+
vsc: Optional VectorSearchClient instance
|
|
1196
|
+
|
|
1197
|
+
Raises:
|
|
1198
|
+
ValueError: If configuration is invalid or index doesn't exist
|
|
1199
|
+
"""
|
|
1104
1200
|
from dao_ai.providers.databricks import DatabricksProvider
|
|
1105
1201
|
|
|
1106
|
-
provider:
|
|
1202
|
+
provider: DatabricksProvider = DatabricksProvider(vsc=vsc)
|
|
1203
|
+
|
|
1204
|
+
if self.source_table is not None:
|
|
1205
|
+
self._create_new_index(provider)
|
|
1206
|
+
else:
|
|
1207
|
+
self._validate_existing_index(provider)
|
|
1208
|
+
|
|
1209
|
+
def _validate_existing_index(self, provider: Any) -> None:
|
|
1210
|
+
"""Validate that an existing index is accessible."""
|
|
1211
|
+
if self.index is None:
|
|
1212
|
+
raise ValueError("index is required for 'use existing' mode")
|
|
1213
|
+
|
|
1214
|
+
if self.index.exists():
|
|
1215
|
+
logger.info(
|
|
1216
|
+
"Vector search index exists and ready",
|
|
1217
|
+
index_name=self.index.full_name,
|
|
1218
|
+
)
|
|
1219
|
+
else:
|
|
1220
|
+
raise ValueError(
|
|
1221
|
+
f"Index '{self.index.full_name}' does not exist. "
|
|
1222
|
+
"Provide 'source_table' to provision it."
|
|
1223
|
+
)
|
|
1224
|
+
|
|
1225
|
+
def _create_new_index(self, provider: Any) -> None:
|
|
1226
|
+
"""Create a new vector search index from source table."""
|
|
1227
|
+
if self.embedding_source_column is None:
|
|
1228
|
+
raise ValueError("embedding_source_column is required for provisioning")
|
|
1229
|
+
if self.endpoint is None:
|
|
1230
|
+
raise ValueError("endpoint is required for provisioning")
|
|
1231
|
+
if self.index is None:
|
|
1232
|
+
raise ValueError("index is required for provisioning")
|
|
1233
|
+
|
|
1107
1234
|
provider.create_vector_store(self)
|
|
1108
1235
|
|
|
1109
1236
|
|
|
@@ -1266,32 +1393,12 @@ class DatabaseModel(IsDatabricksResource):
|
|
|
1266
1393
|
|
|
1267
1394
|
@model_validator(mode="after")
|
|
1268
1395
|
def update_host(self) -> Self:
|
|
1269
|
-
|
|
1396
|
+
# Lakebase uses instance_name directly via databricks_langchain - host not needed
|
|
1397
|
+
if self.is_lakebase:
|
|
1270
1398
|
return self
|
|
1271
1399
|
|
|
1272
|
-
#
|
|
1273
|
-
#
|
|
1274
|
-
if self.is_lakebase:
|
|
1275
|
-
try:
|
|
1276
|
-
existing_instance: DatabaseInstance = (
|
|
1277
|
-
self.workspace_client.database.get_database_instance(
|
|
1278
|
-
name=self.instance_name
|
|
1279
|
-
)
|
|
1280
|
-
)
|
|
1281
|
-
self.host = existing_instance.read_write_dns
|
|
1282
|
-
except Exception as e:
|
|
1283
|
-
# For Lakebase with OBO/ambient auth, we can't fetch at config time
|
|
1284
|
-
# The host will need to be provided explicitly or fetched at runtime
|
|
1285
|
-
if self.on_behalf_of_user:
|
|
1286
|
-
logger.debug(
|
|
1287
|
-
f"Could not fetch host for database {self.instance_name} "
|
|
1288
|
-
f"(Lakebase with OBO mode - will be resolved at runtime): {e}"
|
|
1289
|
-
)
|
|
1290
|
-
else:
|
|
1291
|
-
raise ValueError(
|
|
1292
|
-
f"Could not fetch host for database {self.instance_name}. "
|
|
1293
|
-
f"Please provide the 'host' explicitly or ensure the instance exists: {e}"
|
|
1294
|
-
)
|
|
1400
|
+
# For standard PostgreSQL, host must be provided by the user
|
|
1401
|
+
# (enforced by validate_connection_type)
|
|
1295
1402
|
return self
|
|
1296
1403
|
|
|
1297
1404
|
@model_validator(mode="after")
|
|
@@ -1549,11 +1656,13 @@ class RerankParametersModel(BaseModel):
|
|
|
1549
1656
|
top_n: 5 # Return top 5 after reranking
|
|
1550
1657
|
```
|
|
1551
1658
|
|
|
1552
|
-
Available models (
|
|
1553
|
-
- "ms-marco-TinyBERT-L-2-v2" (
|
|
1554
|
-
- "ms-marco-MiniLM-L-
|
|
1555
|
-
- "
|
|
1556
|
-
- "
|
|
1659
|
+
Available models (see https://github.com/PrithivirajDamodaran/FlashRank):
|
|
1660
|
+
- "ms-marco-TinyBERT-L-2-v2" (~4MB, fastest)
|
|
1661
|
+
- "ms-marco-MiniLM-L-12-v2" (~34MB, best cross-encoder, default)
|
|
1662
|
+
- "rank-T5-flan" (~110MB, best non cross-encoder)
|
|
1663
|
+
- "ms-marco-MultiBERT-L-12" (~150MB, multilingual 100+ languages)
|
|
1664
|
+
- "ce-esci-MiniLM-L12-v2" (e-commerce optimized, Amazon ESCI)
|
|
1665
|
+
- "miniReranker_arabic_v1" (Arabic language)
|
|
1557
1666
|
"""
|
|
1558
1667
|
|
|
1559
1668
|
model_config = ConfigDict(use_enum_values=True, extra="forbid")
|
dao_ai/providers/databricks.py
CHANGED
|
@@ -625,6 +625,17 @@ class DatabricksProvider(ServiceProvider):
|
|
|
625
625
|
df.write.mode("overwrite").saveAsTable(table)
|
|
626
626
|
|
|
627
627
|
def create_vector_store(self, vector_store: VectorStoreModel) -> None:
|
|
628
|
+
"""
|
|
629
|
+
Create a vector search index from a source table.
|
|
630
|
+
|
|
631
|
+
This method expects a VectorStoreModel in provisioning mode with all
|
|
632
|
+
required fields validated. Use VectorStoreModel.create() which handles
|
|
633
|
+
mode detection and validation.
|
|
634
|
+
|
|
635
|
+
Args:
|
|
636
|
+
vector_store: VectorStoreModel configured for provisioning
|
|
637
|
+
"""
|
|
638
|
+
# Ensure endpoint exists
|
|
628
639
|
if not endpoint_exists(self.vsc, vector_store.endpoint.name):
|
|
629
640
|
self.vsc.create_endpoint_and_wait(
|
|
630
641
|
name=vector_store.endpoint.name,
|
dao_ai/vector_search.py
CHANGED
|
@@ -72,3 +72,40 @@ def index_exists(
|
|
|
72
72
|
raise e
|
|
73
73
|
# If we reach here, the index doesn't exist
|
|
74
74
|
return False
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def find_index(
|
|
78
|
+
vsc: VectorSearchClient, index_full_name: str
|
|
79
|
+
) -> tuple[bool, str | None]:
|
|
80
|
+
"""
|
|
81
|
+
Find a Vector Search index across all endpoints.
|
|
82
|
+
|
|
83
|
+
Searches all available endpoints to find where the index is located.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
vsc: Databricks Vector Search client instance
|
|
87
|
+
index_full_name: Fully qualified name of the index (catalog.schema.index)
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
Tuple of (exists: bool, endpoint_name: str | None)
|
|
91
|
+
- (True, endpoint_name) if index is found
|
|
92
|
+
- (False, None) if index is not found on any endpoint
|
|
93
|
+
"""
|
|
94
|
+
try:
|
|
95
|
+
endpoints = vsc.list_endpoints().get("endpoints", [])
|
|
96
|
+
except Exception as e:
|
|
97
|
+
if "REQUEST_LIMIT_EXCEEDED" in str(e):
|
|
98
|
+
print("WARN: couldn't list endpoints due to REQUEST_LIMIT_EXCEEDED error.")
|
|
99
|
+
return (False, None)
|
|
100
|
+
raise e
|
|
101
|
+
|
|
102
|
+
for endpoint in endpoints:
|
|
103
|
+
endpoint_name: str = endpoint["name"]
|
|
104
|
+
try:
|
|
105
|
+
vsc.get_index(endpoint_name, index_full_name).describe()
|
|
106
|
+
return (True, endpoint_name)
|
|
107
|
+
except Exception:
|
|
108
|
+
# Index not on this endpoint, try next
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
return (False, None)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dao-ai
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.4
|
|
4
4
|
Summary: DAO AI: A modular, multi-agent orchestration framework for complex AI workflows. Supports agent handoff, tool integration, and dynamic configuration via YAML.
|
|
5
5
|
Project-URL: Homepage, https://github.com/natefleming/dao-ai
|
|
6
6
|
Project-URL: Documentation, https://natefleming.github.io/dao-ai
|
|
@@ -79,7 +79,7 @@ Description-Content-Type: text/markdown
|
|
|
79
79
|
|
|
80
80
|
# DAO: Declarative Agent Orchestration
|
|
81
81
|
|
|
82
|
-
[](CHANGELOG.md)
|
|
83
83
|
[](https://www.python.org/)
|
|
84
84
|
[](LICENSE)
|
|
85
85
|
|
|
@@ -293,6 +293,16 @@ This single command:
|
|
|
293
293
|
3. Deploys it to Databricks
|
|
294
294
|
4. Creates a serving endpoint
|
|
295
295
|
|
|
296
|
+
**Deploying to a specific workspace:**
|
|
297
|
+
|
|
298
|
+
```bash
|
|
299
|
+
# Deploy to AWS workspace
|
|
300
|
+
dao-ai bundle --deploy --run -c config/my_agent.yaml --profile aws-field-eng
|
|
301
|
+
|
|
302
|
+
# Deploy to Azure workspace
|
|
303
|
+
dao-ai bundle --deploy --run -c config/my_agent.yaml --profile azure-retail
|
|
304
|
+
```
|
|
305
|
+
|
|
296
306
|
**Step 5: Interact with your agent**
|
|
297
307
|
|
|
298
308
|
Once deployed, you can chat with your agent using Python:
|
|
@@ -422,10 +432,34 @@ dao-ai graph -c config/my_config.yaml -o workflow.png
|
|
|
422
432
|
# Deploy with Databricks Asset Bundles
|
|
423
433
|
dao-ai bundle --deploy --run -c config/my_config.yaml
|
|
424
434
|
|
|
435
|
+
# Deploy to a specific workspace (multi-cloud support)
|
|
436
|
+
dao-ai bundle --deploy -c config/my_config.yaml --profile aws-field-eng
|
|
437
|
+
dao-ai bundle --deploy -c config/my_config.yaml --profile azure-retail
|
|
438
|
+
|
|
425
439
|
# Interactive chat with agent
|
|
426
440
|
dao-ai chat -c config/my_config.yaml
|
|
427
441
|
```
|
|
428
442
|
|
|
443
|
+
### Multi-Cloud Deployment
|
|
444
|
+
|
|
445
|
+
DAO AI supports deploying to Azure, AWS, and GCP workspaces with automatic cloud detection:
|
|
446
|
+
|
|
447
|
+
```bash
|
|
448
|
+
# Deploy to AWS workspace
|
|
449
|
+
dao-ai bundle --deploy -c config/my_config.yaml --profile aws-prod
|
|
450
|
+
|
|
451
|
+
# Deploy to Azure workspace
|
|
452
|
+
dao-ai bundle --deploy -c config/my_config.yaml --profile azure-prod
|
|
453
|
+
|
|
454
|
+
# Deploy to GCP workspace
|
|
455
|
+
dao-ai bundle --deploy -c config/my_config.yaml --profile gcp-prod
|
|
456
|
+
```
|
|
457
|
+
|
|
458
|
+
The CLI automatically:
|
|
459
|
+
- Detects the cloud provider from your profile's workspace URL
|
|
460
|
+
- Selects appropriate compute node types for each cloud
|
|
461
|
+
- Creates isolated deployment state per profile
|
|
462
|
+
|
|
429
463
|
👉 **Learn more:** [CLI Reference Documentation](docs/cli-reference.md)
|
|
430
464
|
|
|
431
465
|
---
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
dao_ai/__init__.py,sha256=18P98ExEgUaJ1Byw440Ct1ty59v6nxyWtc5S6Uq2m9Q,1062
|
|
2
2
|
dao_ai/agent_as_code.py,sha256=xIlLDpPVfmDVzLvbdY_V_CrC4Jvj2ItCWJ-NzdrszTo,538
|
|
3
3
|
dao_ai/catalog.py,sha256=sPZpHTD3lPx4EZUtIWeQV7VQM89WJ6YH__wluk1v2lE,4947
|
|
4
|
-
dao_ai/cli.py,sha256=
|
|
5
|
-
dao_ai/config.py,sha256=
|
|
4
|
+
dao_ai/cli.py,sha256=htmnPXBpKZuXlWRhIl-91zndZO8-nm-REzTBZ49Y-T8,35849
|
|
5
|
+
dao_ai/config.py,sha256=GWBmrbiixMG0ZszLk_XTRKRIS0QqOk_TIQhauK--MIY,120863
|
|
6
6
|
dao_ai/graph.py,sha256=1-uQlo7iXZQTT3uU8aYu0N5rnhw5_g_2YLwVsAs6M-U,1119
|
|
7
7
|
dao_ai/logging.py,sha256=lYy4BmucCHvwW7aI3YQkQXKJtMvtTnPDu9Hnd7_O4oc,1556
|
|
8
8
|
dao_ai/messages.py,sha256=4ZBzO4iFdktGSLrmhHzFjzMIt2tpaL-aQLHOQJysGnY,6959
|
|
@@ -13,7 +13,7 @@ dao_ai/prompts.py,sha256=G0ng5f2PkzfgdKrSl03Rnd6riZn5APedof0GAzsWQI8,4792
|
|
|
13
13
|
dao_ai/state.py,sha256=0wbbzfQmldkCu26gdTE5j0Rl-_pfilza-YIHPbSWlvI,6394
|
|
14
14
|
dao_ai/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
15
|
dao_ai/utils.py,sha256=_Urd7Nj2VzrgPKf3NS4E6vt0lWRhEUddBqWN9BksqeE,11543
|
|
16
|
-
dao_ai/vector_search.py,sha256=
|
|
16
|
+
dao_ai/vector_search.py,sha256=8d3xROg9zSIYNXjRRl6rSexsJTlufjRl5Fy1ZA8daKA,4019
|
|
17
17
|
dao_ai/genie/__init__.py,sha256=vdEyGhrt6L8GlK75SyYvTnl8QpHKDCJC5hJKLg4DesQ,1063
|
|
18
18
|
dao_ai/genie/core.py,sha256=HPKbocvhnnw_PkQwfoq5bpgQmL9lZyyS6_goTJL8yiY,1073
|
|
19
19
|
dao_ai/genie/cache/__init__.py,sha256=JfgCJl1NYQ1aZvZ4kly4T6uQK6ZCJ6PX_htuq7nJF50,1203
|
|
@@ -42,7 +42,7 @@ dao_ai/orchestration/supervisor.py,sha256=0aQLL1a3oBjG9LYTuV1y73rB0mqkIkax7MwtZ7
|
|
|
42
42
|
dao_ai/orchestration/swarm.py,sha256=8tp1eGmsQqqWpaDcjPoJckddPWohZdmmN0RGRJ_xzOA,9198
|
|
43
43
|
dao_ai/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
44
|
dao_ai/providers/base.py,sha256=-fjKypCOk28h6vioPfMj9YZSw_3Kcbi2nMuAyY7vX9k,1383
|
|
45
|
-
dao_ai/providers/databricks.py,sha256=
|
|
45
|
+
dao_ai/providers/databricks.py,sha256=XxYkyoDYkwGV_Xg1IJBpGOl4d7U5HiFP4RtjjSLgenI,61437
|
|
46
46
|
dao_ai/tools/__init__.py,sha256=SRd7W2DOCXKbWWy8lclRtJiCskz7SDAm94qaFF47urQ,1664
|
|
47
47
|
dao_ai/tools/agent.py,sha256=plIWALywRjaDSnot13nYehBsrHRpBUpsVZakoGeajOE,1858
|
|
48
48
|
dao_ai/tools/core.py,sha256=bRIN3BZhRQX8-Kpu3HPomliodyskCqjxynQmYbk6Vjs,3783
|
|
@@ -57,8 +57,8 @@ dao_ai/tools/sql.py,sha256=tKd1gjpLuKdQDyfmyYYtMiNRHDW6MGRbdEVaeqyB8Ok,7632
|
|
|
57
57
|
dao_ai/tools/time.py,sha256=tufJniwivq29y0LIffbgeBTIDE6VgrLpmVf8Qr90qjw,9224
|
|
58
58
|
dao_ai/tools/unity_catalog.py,sha256=AjQfW7bvV8NurqDLIyntYRv2eJuTwNdbvex1L5CRjOk,15534
|
|
59
59
|
dao_ai/tools/vector_search.py,sha256=oe2uBwl2TfeJIXPpwiS6Rmz7wcHczSxNyqS9P3hE6co,14542
|
|
60
|
-
dao_ai-0.1.
|
|
61
|
-
dao_ai-0.1.
|
|
62
|
-
dao_ai-0.1.
|
|
63
|
-
dao_ai-0.1.
|
|
64
|
-
dao_ai-0.1.
|
|
60
|
+
dao_ai-0.1.4.dist-info/METADATA,sha256=d19ZTQ88c21cR9B9W14BZX-zk2YUcMJk1NRioMjeCuw,16685
|
|
61
|
+
dao_ai-0.1.4.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
62
|
+
dao_ai-0.1.4.dist-info/entry_points.txt,sha256=Xa-UFyc6gWGwMqMJOt06ZOog2vAfygV_DSwg1AiP46g,43
|
|
63
|
+
dao_ai-0.1.4.dist-info/licenses/LICENSE,sha256=YZt3W32LtPYruuvHE9lGk2bw6ZPMMJD8yLrjgHybyz4,1069
|
|
64
|
+
dao_ai-0.1.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|