dao-ai 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dao_ai/cli.py CHANGED
@@ -715,7 +715,15 @@ def run_databricks_command(
715
715
  target: Optional[str] = None,
716
716
  dry_run: bool = False,
717
717
  ) -> None:
718
- """Execute a databricks CLI command with optional profile and target."""
718
+ """Execute a databricks CLI command with optional profile and target.
719
+
720
+ Args:
721
+ command: The databricks CLI command to execute (e.g., ["bundle", "deploy"])
722
+ profile: Optional Databricks CLI profile name
723
+ config: Optional path to the configuration file
724
+ target: Optional bundle target name
725
+ dry_run: If True, print the command without executing
726
+ """
719
727
  config_path = Path(config) if config else None
720
728
 
721
729
  if config_path and not config_path.exists():
@@ -737,15 +745,17 @@ def run_databricks_command(
737
745
  logger.debug(f"Using app-specific target: {target}")
738
746
 
739
747
  # Build databricks command (no -c flag needed, uses databricks.yaml in current dir)
748
+ # Note: --profile is a global flag, but --target is a subcommand flag for 'bundle'
740
749
  cmd = ["databricks"]
741
750
  if profile:
742
751
  cmd.extend(["--profile", profile])
743
752
 
753
+ cmd.extend(command)
754
+
755
+ # --target must come after the bundle subcommand (it's a subcommand-specific flag)
744
756
  if target:
745
757
  cmd.extend(["--target", target])
746
758
 
747
- cmd.extend(command)
748
-
749
759
  # Add config_path variable for notebooks
750
760
  if config_path and app_config:
751
761
  # Calculate relative path from notebooks directory to config file
dao_ai/config.py CHANGED
@@ -1009,27 +1009,92 @@ class VolumePathModel(BaseModel, HasFullName):
1009
1009
 
1010
1010
 
1011
1011
  class VectorStoreModel(IsDatabricksResource):
1012
+ """
1013
+ Configuration model for a Databricks Vector Search store.
1014
+
1015
+ Supports two modes:
1016
+ 1. **Use Existing Index**: Provide only `index` (fully qualified name).
1017
+ Used for querying an existing vector search index at runtime.
1018
+ 2. **Provisioning Mode**: Provide `source_table` + `embedding_source_column`.
1019
+ Used for creating a new vector search index.
1020
+
1021
+ Examples:
1022
+ Minimal configuration (use existing index):
1023
+ ```yaml
1024
+ vector_stores:
1025
+ products_search:
1026
+ index:
1027
+ name: catalog.schema.my_index
1028
+ ```
1029
+
1030
+ Full provisioning configuration:
1031
+ ```yaml
1032
+ vector_stores:
1033
+ products_search:
1034
+ source_table:
1035
+ schema: *my_schema
1036
+ name: products
1037
+ embedding_source_column: description
1038
+ endpoint:
1039
+ name: my_endpoint
1040
+ ```
1041
+ """
1042
+
1012
1043
  model_config = ConfigDict(use_enum_values=True, extra="forbid")
1013
- embedding_model: Optional[LLMModel] = None
1044
+
1045
+ # RUNTIME: Only index is truly required for querying existing indexes
1014
1046
  index: Optional[IndexModel] = None
1047
+
1048
+ # PROVISIONING ONLY: Required when creating a new index
1049
+ source_table: Optional[TableModel] = None
1050
+ embedding_source_column: Optional[str] = None
1051
+ embedding_model: Optional[LLMModel] = None
1015
1052
  endpoint: Optional[VectorSearchEndpoint] = None
1016
- source_table: TableModel
1053
+
1054
+ # OPTIONAL: For both modes
1017
1055
  source_path: Optional[VolumePathModel] = None
1018
1056
  checkpoint_path: Optional[VolumePathModel] = None
1019
1057
  primary_key: Optional[str] = None
1020
1058
  columns: Optional[list[str]] = Field(default_factory=list)
1021
1059
  doc_uri: Optional[str] = None
1022
- embedding_source_column: str
1060
+
1061
+ @model_validator(mode="after")
1062
+ def validate_configuration_mode(self) -> Self:
1063
+ """
1064
+ Validate that configuration is valid for either:
1065
+ - Use existing mode: index is provided
1066
+ - Provisioning mode: source_table + embedding_source_column provided
1067
+ """
1068
+ has_index = self.index is not None
1069
+ has_source_table = self.source_table is not None
1070
+ has_embedding_col = self.embedding_source_column is not None
1071
+
1072
+ # Must have at least index OR source_table
1073
+ if not has_index and not has_source_table:
1074
+ raise ValueError(
1075
+ "Either 'index' (for existing indexes) or 'source_table' "
1076
+ "(for provisioning) must be provided"
1077
+ )
1078
+
1079
+ # If provisioning mode, need embedding_source_column
1080
+ if has_source_table and not has_embedding_col:
1081
+ raise ValueError(
1082
+ "embedding_source_column is required when source_table is provided (provisioning mode)"
1083
+ )
1084
+
1085
+ return self
1023
1086
 
1024
1087
  @model_validator(mode="after")
1025
1088
  def set_default_embedding_model(self) -> Self:
1026
- if not self.embedding_model:
1089
+ # Only set default embedding model in provisioning mode
1090
+ if self.source_table is not None and not self.embedding_model:
1027
1091
  self.embedding_model = LLMModel(name="databricks-gte-large-en")
1028
1092
  return self
1029
1093
 
1030
1094
  @model_validator(mode="after")
1031
1095
  def set_default_primary_key(self) -> Self:
1032
- if self.primary_key is None:
1096
+ # Only auto-discover primary key in provisioning mode
1097
+ if self.primary_key is None and self.source_table is not None:
1033
1098
  from dao_ai.providers.databricks import DatabricksProvider
1034
1099
 
1035
1100
  provider: DatabricksProvider = DatabricksProvider()
@@ -1050,14 +1115,16 @@ class VectorStoreModel(IsDatabricksResource):
1050
1115
 
1051
1116
  @model_validator(mode="after")
1052
1117
  def set_default_index(self) -> Self:
1053
- if self.index is None:
1118
+ # Only generate index from source_table in provisioning mode
1119
+ if self.index is None and self.source_table is not None:
1054
1120
  name: str = f"{self.source_table.name}_index"
1055
1121
  self.index = IndexModel(schema=self.source_table.schema_model, name=name)
1056
1122
  return self
1057
1123
 
1058
1124
  @model_validator(mode="after")
1059
1125
  def set_default_endpoint(self) -> Self:
1060
- if self.endpoint is None:
1126
+ # Only find/create endpoint in provisioning mode
1127
+ if self.endpoint is None and self.source_table is not None:
1061
1128
  from dao_ai.providers.databricks import (
1062
1129
  DatabricksProvider,
1063
1130
  with_available_indexes,
@@ -1549,11 +1616,13 @@ class RerankParametersModel(BaseModel):
1549
1616
  top_n: 5 # Return top 5 after reranking
1550
1617
  ```
1551
1618
 
1552
- Available models (from fastest to most accurate):
1553
- - "ms-marco-TinyBERT-L-2-v2" (fastest, smallest)
1554
- - "ms-marco-MiniLM-L-6-v2"
1555
- - "ms-marco-MiniLM-L-12-v2" (default, good balance)
1556
- - "rank-T5-flan" (most accurate, slower)
1619
+ Available models (see https://github.com/PrithivirajDamodaran/FlashRank):
1620
+ - "ms-marco-TinyBERT-L-2-v2" (~4MB, fastest)
1621
+ - "ms-marco-MiniLM-L-12-v2" (~34MB, best cross-encoder, default)
1622
+ - "rank-T5-flan" (~110MB, best non cross-encoder)
1623
+ - "ms-marco-MultiBERT-L-12" (~150MB, multilingual 100+ languages)
1624
+ - "ce-esci-MiniLM-L12-v2" (e-commerce optimized, Amazon ESCI)
1625
+ - "miniReranker_arabic_v1" (Arabic language)
1557
1626
  """
1558
1627
 
1559
1628
  model_config = ConfigDict(use_enum_values=True, extra="forbid")
@@ -625,6 +625,22 @@ class DatabricksProvider(ServiceProvider):
625
625
  df.write.mode("overwrite").saveAsTable(table)
626
626
 
627
627
  def create_vector_store(self, vector_store: VectorStoreModel) -> None:
628
+ # Validate that this is a provisioning-mode config
629
+ if vector_store.source_table is None:
630
+ raise ValueError(
631
+ "Cannot create vector store: source_table is required for provisioning. "
632
+ "This VectorStoreModel appears to be configured for 'use existing index' mode. "
633
+ "To provision a new vector store, provide source_table and embedding_source_column."
634
+ )
635
+ if vector_store.embedding_source_column is None:
636
+ raise ValueError(
637
+ "Cannot create vector store: embedding_source_column is required for provisioning."
638
+ )
639
+ if vector_store.endpoint is None:
640
+ raise ValueError(
641
+ "Cannot create vector store: endpoint is required for provisioning."
642
+ )
643
+
628
644
  if not endpoint_exists(self.vsc, vector_store.endpoint.name):
629
645
  self.vsc.create_endpoint_and_wait(
630
646
  name=vector_store.endpoint.name,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dao-ai
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: DAO AI: A modular, multi-agent orchestration framework for complex AI workflows. Supports agent handoff, tool integration, and dynamic configuration via YAML.
5
5
  Project-URL: Homepage, https://github.com/natefleming/dao-ai
6
6
  Project-URL: Documentation, https://natefleming.github.io/dao-ai
@@ -79,7 +79,7 @@ Description-Content-Type: text/markdown
79
79
 
80
80
  # DAO: Declarative Agent Orchestration
81
81
 
82
- [![Version](https://img.shields.io/badge/version-0.1.0-blue.svg)](CHANGELOG.md)
82
+ [![Version](https://img.shields.io/badge/version-0.1.2-blue.svg)](CHANGELOG.md)
83
83
  [![Python](https://img.shields.io/badge/python-3.11+-green.svg)](https://www.python.org/)
84
84
  [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
85
85
 
@@ -1,8 +1,8 @@
1
1
  dao_ai/__init__.py,sha256=18P98ExEgUaJ1Byw440Ct1ty59v6nxyWtc5S6Uq2m9Q,1062
2
2
  dao_ai/agent_as_code.py,sha256=xIlLDpPVfmDVzLvbdY_V_CrC4Jvj2ItCWJ-NzdrszTo,538
3
3
  dao_ai/catalog.py,sha256=sPZpHTD3lPx4EZUtIWeQV7VQM89WJ6YH__wluk1v2lE,4947
4
- dao_ai/cli.py,sha256=azkzrG0RCKyb_euXsmiLCMXWtz7n-8KMNG-45pufqUg,32655
5
- dao_ai/config.py,sha256=o8BYHB-9R3PVWP0rtGAVrge3CHQqqnIZgfUl_awNPTU,117044
4
+ dao_ai/cli.py,sha256=1TS4I6LQq6ixxd0JQA1bt3DHRizcSiynZkSBcZA3-XE,33149
5
+ dao_ai/config.py,sha256=FmXXwFMWqSxl2ofxPIL2ZLRbx7kx-u6VjFx4wdQnFVk,119755
6
6
  dao_ai/graph.py,sha256=1-uQlo7iXZQTT3uU8aYu0N5rnhw5_g_2YLwVsAs6M-U,1119
7
7
  dao_ai/logging.py,sha256=lYy4BmucCHvwW7aI3YQkQXKJtMvtTnPDu9Hnd7_O4oc,1556
8
8
  dao_ai/messages.py,sha256=4ZBzO4iFdktGSLrmhHzFjzMIt2tpaL-aQLHOQJysGnY,6959
@@ -42,7 +42,7 @@ dao_ai/orchestration/supervisor.py,sha256=0aQLL1a3oBjG9LYTuV1y73rB0mqkIkax7MwtZ7
42
42
  dao_ai/orchestration/swarm.py,sha256=8tp1eGmsQqqWpaDcjPoJckddPWohZdmmN0RGRJ_xzOA,9198
43
43
  dao_ai/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
44
  dao_ai/providers/base.py,sha256=-fjKypCOk28h6vioPfMj9YZSw_3Kcbi2nMuAyY7vX9k,1383
45
- dao_ai/providers/databricks.py,sha256=4IBV9_staE6vMci4hUjHWYHc8iS87mM8lT6LX0xh8Mw,61040
45
+ dao_ai/providers/databricks.py,sha256=63jUkvFvdQoJGUlSEJgCsq6hPtj6tviE-AW_TPJvuvc,61850
46
46
  dao_ai/tools/__init__.py,sha256=SRd7W2DOCXKbWWy8lclRtJiCskz7SDAm94qaFF47urQ,1664
47
47
  dao_ai/tools/agent.py,sha256=plIWALywRjaDSnot13nYehBsrHRpBUpsVZakoGeajOE,1858
48
48
  dao_ai/tools/core.py,sha256=bRIN3BZhRQX8-Kpu3HPomliodyskCqjxynQmYbk6Vjs,3783
@@ -57,8 +57,8 @@ dao_ai/tools/sql.py,sha256=tKd1gjpLuKdQDyfmyYYtMiNRHDW6MGRbdEVaeqyB8Ok,7632
57
57
  dao_ai/tools/time.py,sha256=tufJniwivq29y0LIffbgeBTIDE6VgrLpmVf8Qr90qjw,9224
58
58
  dao_ai/tools/unity_catalog.py,sha256=AjQfW7bvV8NurqDLIyntYRv2eJuTwNdbvex1L5CRjOk,15534
59
59
  dao_ai/tools/vector_search.py,sha256=oe2uBwl2TfeJIXPpwiS6Rmz7wcHczSxNyqS9P3hE6co,14542
60
- dao_ai-0.1.2.dist-info/METADATA,sha256=hNysdpnl7dp_T1gC5tJQ4YHwSLoeg2UJY0aHg4-gvYc,15615
61
- dao_ai-0.1.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
62
- dao_ai-0.1.2.dist-info/entry_points.txt,sha256=Xa-UFyc6gWGwMqMJOt06ZOog2vAfygV_DSwg1AiP46g,43
63
- dao_ai-0.1.2.dist-info/licenses/LICENSE,sha256=YZt3W32LtPYruuvHE9lGk2bw6ZPMMJD8yLrjgHybyz4,1069
64
- dao_ai-0.1.2.dist-info/RECORD,,
60
+ dao_ai-0.1.3.dist-info/METADATA,sha256=0WeeUk6J2uQRdk9Hm1FkCk75RG8psnh3WrSt4A8MfNY,15615
61
+ dao_ai-0.1.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
62
+ dao_ai-0.1.3.dist-info/entry_points.txt,sha256=Xa-UFyc6gWGwMqMJOt06ZOog2vAfygV_DSwg1AiP46g,43
63
+ dao_ai-0.1.3.dist-info/licenses/LICENSE,sha256=YZt3W32LtPYruuvHE9lGk2bw6ZPMMJD8yLrjgHybyz4,1069
64
+ dao_ai-0.1.3.dist-info/RECORD,,
File without changes