dao-ai 0.1.18__py3-none-any.whl → 0.1.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dao_ai/config.py CHANGED
@@ -1773,6 +1773,105 @@ class GenieSemanticCacheParametersModel(BaseModel):
1773
1773
  return self
1774
1774
 
1775
1775
 
1776
+ # Memory estimation for capacity planning:
1777
+ # - Each entry: ~20KB (8KB question embedding + 8KB context embedding + 4KB strings/overhead)
1778
+ # - 1,000 entries: ~20MB (0.4% of 8GB)
1779
+ # - 5,000 entries: ~100MB (2% of 8GB)
1780
+ # - 10,000 entries: ~200MB (4-5% of 8GB) - default for ~30 users
1781
+ # - 20,000 entries: ~400MB (8-10% of 8GB)
1782
+ # Default 10,000 entries provides ~330 queries per user for 30 users.
1783
+ class GenieInMemorySemanticCacheParametersModel(BaseModel):
1784
+ """
1785
+ Configuration for in-memory semantic cache (no database required).
1786
+
1787
+ This cache stores embeddings and cache entries entirely in memory, providing
1788
+ semantic similarity matching without requiring external database dependencies
1789
+ like PostgreSQL or Databricks Lakebase.
1790
+
1791
+ Default settings are tuned for ~30 users on an 8GB machine:
1792
+ - Capacity: 10,000 entries (~200MB memory, ~330 queries per user)
1793
+ - Eviction: LRU (Least Recently Used) - keeps frequently accessed queries
1794
+ - TTL: 1 week (accommodates weekly work patterns and batch jobs)
1795
+ - Memory overhead: ~4-5% of 8GB system
1796
+
1797
+ The LRU eviction strategy ensures hot queries stay cached while cold queries
1798
+ are evicted, providing better hit rates than FIFO eviction.
1799
+
1800
+ For larger deployments or memory-constrained environments, adjust capacity and TTL accordingly.
1801
+
1802
+ Use this when:
1803
+ - No external database access is available
1804
+ - Single-instance deployments (cache not shared across instances)
1805
+ - Cache persistence across restarts is not required
1806
+ - Cache sizes are moderate (hundreds to low thousands of entries)
1807
+
1808
+ For multi-instance deployments or large cache sizes, use GenieSemanticCacheParametersModel
1809
+ with PostgreSQL backend instead.
1810
+ """
1811
+
1812
+ model_config = ConfigDict(use_enum_values=True, extra="forbid")
1813
+ time_to_live_seconds: int | None = (
1814
+ 60 * 60 * 24 * 7
1815
+ ) # 1 week default (604800 seconds), None or negative = never expires
1816
+ similarity_threshold: float = 0.85 # Minimum similarity for question matching (L2 distance converted to 0-1 scale)
1817
+ context_similarity_threshold: float = 0.80 # Minimum similarity for context matching (L2 distance converted to 0-1 scale)
1818
+ question_weight: Optional[float] = (
1819
+ 0.6 # Weight for question similarity in combined score (0-1). If not provided, computed as 1 - context_weight
1820
+ )
1821
+ context_weight: Optional[float] = (
1822
+ None # Weight for context similarity in combined score (0-1). If not provided, computed as 1 - question_weight
1823
+ )
1824
+ embedding_model: str | LLMModel = "databricks-gte-large-en"
1825
+ embedding_dims: int | None = None # Auto-detected if None
1826
+ warehouse: WarehouseModel
1827
+ capacity: int | None = (
1828
+ 10000 # Maximum cache entries. ~200MB for 10000 entries (1024-dim embeddings). LRU eviction when full. None = unlimited (not recommended for production).
1829
+ )
1830
+ context_window_size: int = 3 # Number of previous turns to include for context
1831
+ max_context_tokens: int = (
1832
+ 2000 # Maximum context length to prevent extremely long embeddings
1833
+ )
1834
+
1835
+ @model_validator(mode="after")
1836
+ def compute_and_validate_weights(self) -> Self:
1837
+ """
1838
+ Compute missing weight and validate that question_weight + context_weight = 1.0.
1839
+
1840
+ Either question_weight or context_weight (or both) can be provided.
1841
+ The missing one will be computed as 1.0 - provided_weight.
1842
+ If both are provided, they must sum to 1.0.
1843
+ """
1844
+ if self.question_weight is None and self.context_weight is None:
1845
+ # Both missing - use defaults
1846
+ self.question_weight = 0.6
1847
+ self.context_weight = 0.4
1848
+ elif self.question_weight is None:
1849
+ # Compute question_weight from context_weight
1850
+ if not (0.0 <= self.context_weight <= 1.0):
1851
+ raise ValueError(
1852
+ f"context_weight must be between 0.0 and 1.0, got {self.context_weight}"
1853
+ )
1854
+ self.question_weight = 1.0 - self.context_weight
1855
+ elif self.context_weight is None:
1856
+ # Compute context_weight from question_weight
1857
+ if not (0.0 <= self.question_weight <= 1.0):
1858
+ raise ValueError(
1859
+ f"question_weight must be between 0.0 and 1.0, got {self.question_weight}"
1860
+ )
1861
+ self.context_weight = 1.0 - self.question_weight
1862
+ else:
1863
+ # Both provided - validate they sum to 1.0
1864
+ total_weight = self.question_weight + self.context_weight
1865
+ if not abs(total_weight - 1.0) < 0.0001: # Allow small floating point error
1866
+ raise ValueError(
1867
+ f"question_weight ({self.question_weight}) + context_weight ({self.context_weight}) "
1868
+ f"must equal 1.0 (got {total_weight}). These weights determine the relative importance "
1869
+ f"of question vs context similarity in the combined score."
1870
+ )
1871
+
1872
+ return self
1873
+
1874
+
1776
1875
  class SearchParametersModel(BaseModel):
1777
1876
  model_config = ConfigDict(use_enum_values=True, extra="forbid")
1778
1877
  num_results: Optional[int] = 10
@@ -28,6 +28,7 @@ from dao_ai.genie.cache.base import (
28
28
  SQLCacheEntry,
29
29
  )
30
30
  from dao_ai.genie.cache.core import execute_sql_via_warehouse
31
+ from dao_ai.genie.cache.in_memory_semantic import InMemorySemanticCacheService
31
32
  from dao_ai.genie.cache.lru import LRUCacheService
32
33
  from dao_ai.genie.cache.semantic import SemanticCacheService
33
34
 
@@ -38,6 +39,7 @@ __all__ = [
38
39
  "SQLCacheEntry",
39
40
  "execute_sql_via_warehouse",
40
41
  # Cache implementations
42
+ "InMemorySemanticCacheService",
41
43
  "LRUCacheService",
42
44
  "SemanticCacheService",
43
45
  ]
@@ -38,7 +38,7 @@ def execute_sql_via_warehouse(
38
38
  w: WorkspaceClient = warehouse.workspace_client
39
39
  warehouse_id: str = str(warehouse.warehouse_id)
40
40
 
41
- logger.trace("Executing cached SQL", layer=layer_name, sql_prefix=sql[:100])
41
+ logger.trace("Executing cached SQL", layer=layer_name, sql=sql[:100])
42
42
 
43
43
  statement_response: StatementResponse = w.statement_execution.execute_statement(
44
44
  statement=sql,