dao-ai 0.0.20__py3-none-any.whl → 0.0.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dao_ai/config.py +114 -17
- dao_ai/graph.py +3 -0
- dao_ai/memory/core.py +10 -6
- dao_ai/memory/postgres.py +102 -33
- dao_ai/models.py +137 -2
- dao_ai/providers/databricks.py +282 -0
- dao_ai/state.py +3 -0
- dao_ai/tools/genie.py +346 -34
- dao_ai/utils.py +4 -0
- {dao_ai-0.0.20.dist-info → dao_ai-0.0.22.dist-info}/METADATA +3 -3
- {dao_ai-0.0.20.dist-info → dao_ai-0.0.22.dist-info}/RECORD +14 -14
- {dao_ai-0.0.20.dist-info → dao_ai-0.0.22.dist-info}/WHEEL +0 -0
- {dao_ai-0.0.20.dist-info → dao_ai-0.0.22.dist-info}/entry_points.txt +0 -0
- {dao_ai-0.0.20.dist-info → dao_ai-0.0.22.dist-info}/licenses/LICENSE +0 -0
dao_ai/providers/databricks.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import base64
|
|
2
|
+
import uuid
|
|
2
3
|
from importlib.metadata import version
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
from typing import Any, Callable, Final, Sequence
|
|
@@ -21,6 +22,7 @@ from databricks.sdk.service.catalog import (
|
|
|
21
22
|
VolumeInfo,
|
|
22
23
|
VolumeType,
|
|
23
24
|
)
|
|
25
|
+
from databricks.sdk.service.database import DatabaseCredential
|
|
24
26
|
from databricks.sdk.service.iam import User
|
|
25
27
|
from databricks.sdk.service.workspace import GetSecretResponse
|
|
26
28
|
from databricks.vector_search.client import VectorSearchClient
|
|
@@ -226,6 +228,7 @@ class DatabricksProvider(ServiceProvider):
|
|
|
226
228
|
config.resources.connections.values()
|
|
227
229
|
)
|
|
228
230
|
databases: Sequence[DatabaseModel] = list(config.resources.databases.values())
|
|
231
|
+
volumes: Sequence[VolumeModel] = list(config.resources.volumes.values())
|
|
229
232
|
|
|
230
233
|
resources: Sequence[IsDatabricksResource] = (
|
|
231
234
|
llms
|
|
@@ -236,6 +239,7 @@ class DatabricksProvider(ServiceProvider):
|
|
|
236
239
|
+ tables
|
|
237
240
|
+ connections
|
|
238
241
|
+ databases
|
|
242
|
+
+ volumes
|
|
239
243
|
)
|
|
240
244
|
|
|
241
245
|
# Flatten all resources from all models into a single list
|
|
@@ -741,3 +745,281 @@ class DatabricksProvider(ServiceProvider):
|
|
|
741
745
|
break
|
|
742
746
|
logger.debug(f"Vector search index found: {found_endpoint_name}")
|
|
743
747
|
return found_endpoint_name
|
|
748
|
+
|
|
749
|
+
def create_lakebase(self, database: DatabaseModel) -> None:
|
|
750
|
+
"""
|
|
751
|
+
Create a Lakebase database instance using the Databricks workspace client.
|
|
752
|
+
|
|
753
|
+
This method handles idempotent database creation, gracefully handling cases where:
|
|
754
|
+
- The database instance already exists
|
|
755
|
+
- The database is in an intermediate state (STARTING, UPDATING, etc.)
|
|
756
|
+
|
|
757
|
+
Args:
|
|
758
|
+
database: DatabaseModel containing the database configuration
|
|
759
|
+
|
|
760
|
+
Returns:
|
|
761
|
+
None
|
|
762
|
+
|
|
763
|
+
Raises:
|
|
764
|
+
Exception: If an unexpected error occurs during database creation
|
|
765
|
+
"""
|
|
766
|
+
import time
|
|
767
|
+
from typing import Any
|
|
768
|
+
|
|
769
|
+
workspace_client: WorkspaceClient = database.workspace_client
|
|
770
|
+
|
|
771
|
+
try:
|
|
772
|
+
# First, check if the database instance already exists
|
|
773
|
+
existing_instance: Any = workspace_client.database.get_database_instance(
|
|
774
|
+
name=database.instance_name
|
|
775
|
+
)
|
|
776
|
+
|
|
777
|
+
if existing_instance:
|
|
778
|
+
logger.debug(
|
|
779
|
+
f"Database instance {database.instance_name} already exists with state: {existing_instance.state}"
|
|
780
|
+
)
|
|
781
|
+
|
|
782
|
+
# Check if database is in an intermediate state
|
|
783
|
+
if existing_instance.state in ["STARTING", "UPDATING"]:
|
|
784
|
+
logger.info(
|
|
785
|
+
f"Database instance {database.instance_name} is in {existing_instance.state} state, waiting for it to become AVAILABLE..."
|
|
786
|
+
)
|
|
787
|
+
|
|
788
|
+
# Wait for database to reach a stable state
|
|
789
|
+
max_wait_time: int = 600 # 10 minutes
|
|
790
|
+
wait_interval: int = 10 # 10 seconds
|
|
791
|
+
elapsed: int = 0
|
|
792
|
+
|
|
793
|
+
while elapsed < max_wait_time:
|
|
794
|
+
try:
|
|
795
|
+
current_instance: Any = (
|
|
796
|
+
workspace_client.database.get_database_instance(
|
|
797
|
+
name=database.instance_name
|
|
798
|
+
)
|
|
799
|
+
)
|
|
800
|
+
current_state: str = current_instance.state
|
|
801
|
+
logger.debug(f"Database instance state: {current_state}")
|
|
802
|
+
|
|
803
|
+
if current_state == "AVAILABLE":
|
|
804
|
+
logger.info(
|
|
805
|
+
f"Database instance {database.instance_name} is now AVAILABLE"
|
|
806
|
+
)
|
|
807
|
+
break
|
|
808
|
+
elif current_state in ["STARTING", "UPDATING"]:
|
|
809
|
+
logger.debug(
|
|
810
|
+
f"Database instance still in {current_state} state, waiting {wait_interval} seconds..."
|
|
811
|
+
)
|
|
812
|
+
time.sleep(wait_interval)
|
|
813
|
+
elapsed += wait_interval
|
|
814
|
+
elif current_state in ["STOPPED", "DELETING"]:
|
|
815
|
+
logger.warning(
|
|
816
|
+
f"Database instance {database.instance_name} is in unexpected state: {current_state}"
|
|
817
|
+
)
|
|
818
|
+
break
|
|
819
|
+
else:
|
|
820
|
+
logger.warning(
|
|
821
|
+
f"Unknown database state: {current_state}, proceeding anyway"
|
|
822
|
+
)
|
|
823
|
+
break
|
|
824
|
+
except NotFound:
|
|
825
|
+
logger.warning(
|
|
826
|
+
f"Database instance {database.instance_name} no longer exists, will attempt to recreate"
|
|
827
|
+
)
|
|
828
|
+
break
|
|
829
|
+
except Exception as state_error:
|
|
830
|
+
logger.warning(
|
|
831
|
+
f"Could not check database state: {state_error}, proceeding anyway"
|
|
832
|
+
)
|
|
833
|
+
break
|
|
834
|
+
|
|
835
|
+
if elapsed >= max_wait_time:
|
|
836
|
+
logger.warning(
|
|
837
|
+
f"Timed out waiting for database instance {database.instance_name} to become AVAILABLE after {max_wait_time} seconds"
|
|
838
|
+
)
|
|
839
|
+
|
|
840
|
+
elif existing_instance.state == "AVAILABLE":
|
|
841
|
+
logger.info(
|
|
842
|
+
f"Database instance {database.instance_name} already exists and is AVAILABLE"
|
|
843
|
+
)
|
|
844
|
+
return
|
|
845
|
+
elif existing_instance.state in ["STOPPED", "DELETING"]:
|
|
846
|
+
logger.warning(
|
|
847
|
+
f"Database instance {database.instance_name} is in {existing_instance.state} state"
|
|
848
|
+
)
|
|
849
|
+
return
|
|
850
|
+
else:
|
|
851
|
+
logger.info(
|
|
852
|
+
f"Database instance {database.instance_name} already exists with state: {existing_instance.state}"
|
|
853
|
+
)
|
|
854
|
+
return
|
|
855
|
+
|
|
856
|
+
except NotFound:
|
|
857
|
+
# Database doesn't exist, proceed with creation
|
|
858
|
+
logger.debug(
|
|
859
|
+
f"Database instance {database.instance_name} not found, creating new instance..."
|
|
860
|
+
)
|
|
861
|
+
|
|
862
|
+
try:
|
|
863
|
+
# Resolve variable values for database parameters
|
|
864
|
+
from databricks.sdk.service.database import DatabaseInstance
|
|
865
|
+
|
|
866
|
+
capacity: str = database.capacity if database.capacity else "CU_2"
|
|
867
|
+
|
|
868
|
+
# Create the database instance object
|
|
869
|
+
database_instance: DatabaseInstance = DatabaseInstance(
|
|
870
|
+
name=database.instance_name,
|
|
871
|
+
capacity=capacity,
|
|
872
|
+
node_count=database.node_count,
|
|
873
|
+
)
|
|
874
|
+
|
|
875
|
+
# Create the database instance via API
|
|
876
|
+
workspace_client.database.create_database_instance(
|
|
877
|
+
database_instance=database_instance
|
|
878
|
+
)
|
|
879
|
+
logger.info(
|
|
880
|
+
f"Successfully created database instance: {database.instance_name}"
|
|
881
|
+
)
|
|
882
|
+
|
|
883
|
+
except Exception as create_error:
|
|
884
|
+
error_msg: str = str(create_error)
|
|
885
|
+
|
|
886
|
+
# Handle case where database was created by another process concurrently
|
|
887
|
+
if (
|
|
888
|
+
"already exists" in error_msg.lower()
|
|
889
|
+
or "RESOURCE_ALREADY_EXISTS" in error_msg
|
|
890
|
+
):
|
|
891
|
+
logger.info(
|
|
892
|
+
f"Database instance {database.instance_name} was created concurrently by another process"
|
|
893
|
+
)
|
|
894
|
+
return
|
|
895
|
+
else:
|
|
896
|
+
# Re-raise unexpected errors
|
|
897
|
+
logger.error(
|
|
898
|
+
f"Error creating database instance {database.instance_name}: {create_error}"
|
|
899
|
+
)
|
|
900
|
+
raise
|
|
901
|
+
|
|
902
|
+
except Exception as e:
|
|
903
|
+
# Handle other unexpected errors
|
|
904
|
+
error_msg: str = str(e)
|
|
905
|
+
|
|
906
|
+
# Check if this is actually a "resource already exists" type error
|
|
907
|
+
if (
|
|
908
|
+
"already exists" in error_msg.lower()
|
|
909
|
+
or "RESOURCE_ALREADY_EXISTS" in error_msg
|
|
910
|
+
):
|
|
911
|
+
logger.info(
|
|
912
|
+
f"Database instance {database.instance_name} already exists (detected via exception)"
|
|
913
|
+
)
|
|
914
|
+
return
|
|
915
|
+
else:
|
|
916
|
+
logger.error(
|
|
917
|
+
f"Unexpected error while handling database {database.instance_name}: {e}"
|
|
918
|
+
)
|
|
919
|
+
raise
|
|
920
|
+
|
|
921
|
+
def lakebase_password_provider(self, instance_name: str) -> str:
|
|
922
|
+
"""
|
|
923
|
+
Ask Databricks to mint a fresh DB credential for this instance.
|
|
924
|
+
"""
|
|
925
|
+
logger.debug(f"Generating password for lakebase instance: {instance_name}")
|
|
926
|
+
w: WorkspaceClient = self.w
|
|
927
|
+
cred: DatabaseCredential = w.database.generate_database_credential(
|
|
928
|
+
request_id=str(uuid.uuid4()),
|
|
929
|
+
instance_names=[instance_name],
|
|
930
|
+
)
|
|
931
|
+
return cred.token
|
|
932
|
+
|
|
933
|
+
def create_lakebase_instance_role(self, database: DatabaseModel) -> None:
|
|
934
|
+
"""
|
|
935
|
+
Create a database instance role for a Lakebase instance.
|
|
936
|
+
|
|
937
|
+
This method creates a role with DATABRICKS_SUPERUSER membership for the
|
|
938
|
+
service principal specified in the database configuration.
|
|
939
|
+
|
|
940
|
+
Args:
|
|
941
|
+
database: DatabaseModel containing the database and service principal configuration
|
|
942
|
+
|
|
943
|
+
Returns:
|
|
944
|
+
None
|
|
945
|
+
|
|
946
|
+
Raises:
|
|
947
|
+
ValueError: If client_id is not provided in the database configuration
|
|
948
|
+
Exception: If an unexpected error occurs during role creation
|
|
949
|
+
"""
|
|
950
|
+
from databricks.sdk.service.database import (
|
|
951
|
+
DatabaseInstanceRole,
|
|
952
|
+
DatabaseInstanceRoleIdentityType,
|
|
953
|
+
DatabaseInstanceRoleMembershipRole,
|
|
954
|
+
)
|
|
955
|
+
|
|
956
|
+
from dao_ai.config import value_of
|
|
957
|
+
|
|
958
|
+
# Validate that client_id is provided
|
|
959
|
+
if not database.client_id:
|
|
960
|
+
logger.warning(
|
|
961
|
+
f"client_id is required to create instance role for database {database.instance_name}"
|
|
962
|
+
)
|
|
963
|
+
return
|
|
964
|
+
|
|
965
|
+
# Resolve the client_id value
|
|
966
|
+
client_id: str = value_of(database.client_id)
|
|
967
|
+
role_name: str = client_id
|
|
968
|
+
instance_name: str = database.instance_name
|
|
969
|
+
|
|
970
|
+
logger.debug(
|
|
971
|
+
f"Creating instance role '{role_name}' for database {instance_name} with principal {client_id}"
|
|
972
|
+
)
|
|
973
|
+
|
|
974
|
+
try:
|
|
975
|
+
# Check if role already exists
|
|
976
|
+
try:
|
|
977
|
+
_ = self.w.database.get_database_instance_role(
|
|
978
|
+
instance_name=instance_name,
|
|
979
|
+
name=role_name,
|
|
980
|
+
)
|
|
981
|
+
logger.info(
|
|
982
|
+
f"Instance role '{role_name}' already exists for database {instance_name}"
|
|
983
|
+
)
|
|
984
|
+
return
|
|
985
|
+
except NotFound:
|
|
986
|
+
# Role doesn't exist, proceed with creation
|
|
987
|
+
logger.debug(
|
|
988
|
+
f"Instance role '{role_name}' not found, creating new role..."
|
|
989
|
+
)
|
|
990
|
+
|
|
991
|
+
# Create the database instance role
|
|
992
|
+
role: DatabaseInstanceRole = DatabaseInstanceRole(
|
|
993
|
+
name=role_name,
|
|
994
|
+
identity_type=DatabaseInstanceRoleIdentityType.SERVICE_PRINCIPAL,
|
|
995
|
+
membership_role=DatabaseInstanceRoleMembershipRole.DATABRICKS_SUPERUSER,
|
|
996
|
+
)
|
|
997
|
+
|
|
998
|
+
# Create the role using the API
|
|
999
|
+
self.w.database.create_database_instance_role(
|
|
1000
|
+
instance_name=instance_name,
|
|
1001
|
+
database_instance_role=role,
|
|
1002
|
+
)
|
|
1003
|
+
|
|
1004
|
+
logger.info(
|
|
1005
|
+
f"Successfully created instance role '{role_name}' for database {instance_name}"
|
|
1006
|
+
)
|
|
1007
|
+
|
|
1008
|
+
except Exception as e:
|
|
1009
|
+
error_msg: str = str(e)
|
|
1010
|
+
|
|
1011
|
+
# Handle case where role was created concurrently
|
|
1012
|
+
if (
|
|
1013
|
+
"already exists" in error_msg.lower()
|
|
1014
|
+
or "RESOURCE_ALREADY_EXISTS" in error_msg
|
|
1015
|
+
):
|
|
1016
|
+
logger.info(
|
|
1017
|
+
f"Instance role '{role_name}' was created concurrently for database {instance_name}"
|
|
1018
|
+
)
|
|
1019
|
+
return
|
|
1020
|
+
|
|
1021
|
+
# Re-raise unexpected errors
|
|
1022
|
+
logger.error(
|
|
1023
|
+
f"Error creating instance role '{role_name}' for database {instance_name}: {e}"
|
|
1024
|
+
)
|
|
1025
|
+
raise
|
dao_ai/state.py
CHANGED
|
@@ -31,6 +31,9 @@ class SharedState(MessagesState):
|
|
|
31
31
|
is_valid: bool # message validation node
|
|
32
32
|
message_error: str
|
|
33
33
|
|
|
34
|
+
# A mapping of genie space_id to conversation_id
|
|
35
|
+
genie_conversation_ids: dict[str, str] # Genie
|
|
36
|
+
|
|
34
37
|
|
|
35
38
|
class Context(BaseModel):
|
|
36
39
|
user_id: str | None = None
|