orchestrator-core 4.4.0rc2__py3-none-any.whl → 5.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. orchestrator/__init__.py +1 -1
  2. orchestrator/api/api_v1/api.py +7 -0
  3. orchestrator/api/api_v1/endpoints/agent.py +62 -0
  4. orchestrator/api/api_v1/endpoints/processes.py +6 -12
  5. orchestrator/api/api_v1/endpoints/search.py +197 -0
  6. orchestrator/api/api_v1/endpoints/subscriptions.py +0 -1
  7. orchestrator/app.py +4 -0
  8. orchestrator/cli/index_llm.py +73 -0
  9. orchestrator/cli/main.py +8 -1
  10. orchestrator/cli/resize_embedding.py +136 -0
  11. orchestrator/cli/scheduler.py +29 -40
  12. orchestrator/cli/search_explore.py +203 -0
  13. orchestrator/db/models.py +37 -1
  14. orchestrator/graphql/schema.py +0 -5
  15. orchestrator/graphql/schemas/process.py +2 -2
  16. orchestrator/graphql/utils/create_resolver_error_handler.py +1 -1
  17. orchestrator/migrations/versions/schema/2025-08-12_52b37b5b2714_search_index_model_for_llm_integration.py +95 -0
  18. orchestrator/schedules/__init__.py +2 -1
  19. orchestrator/schedules/resume_workflows.py +2 -2
  20. orchestrator/schedules/scheduling.py +24 -64
  21. orchestrator/schedules/task_vacuum.py +2 -2
  22. orchestrator/schedules/validate_products.py +2 -8
  23. orchestrator/schedules/validate_subscriptions.py +2 -2
  24. orchestrator/schemas/search.py +101 -0
  25. orchestrator/search/__init__.py +0 -0
  26. orchestrator/search/agent/__init__.py +1 -0
  27. orchestrator/search/agent/prompts.py +62 -0
  28. orchestrator/search/agent/state.py +8 -0
  29. orchestrator/search/agent/tools.py +122 -0
  30. orchestrator/search/core/__init__.py +0 -0
  31. orchestrator/search/core/embedding.py +64 -0
  32. orchestrator/search/core/exceptions.py +16 -0
  33. orchestrator/search/core/types.py +162 -0
  34. orchestrator/search/core/validators.py +27 -0
  35. orchestrator/search/docs/index.md +37 -0
  36. orchestrator/search/docs/running_local_text_embedding_inference.md +45 -0
  37. orchestrator/search/filters/__init__.py +27 -0
  38. orchestrator/search/filters/base.py +236 -0
  39. orchestrator/search/filters/date_filters.py +75 -0
  40. orchestrator/search/filters/definitions.py +76 -0
  41. orchestrator/search/filters/ltree_filters.py +31 -0
  42. orchestrator/search/filters/numeric_filter.py +60 -0
  43. orchestrator/search/indexing/__init__.py +3 -0
  44. orchestrator/search/indexing/indexer.py +316 -0
  45. orchestrator/search/indexing/registry.py +88 -0
  46. orchestrator/search/indexing/tasks.py +53 -0
  47. orchestrator/search/indexing/traverse.py +209 -0
  48. orchestrator/search/retrieval/__init__.py +3 -0
  49. orchestrator/search/retrieval/builder.py +64 -0
  50. orchestrator/search/retrieval/engine.py +96 -0
  51. orchestrator/search/retrieval/ranker.py +202 -0
  52. orchestrator/search/retrieval/utils.py +88 -0
  53. orchestrator/search/retrieval/validation.py +174 -0
  54. orchestrator/search/schemas/__init__.py +0 -0
  55. orchestrator/search/schemas/parameters.py +114 -0
  56. orchestrator/search/schemas/results.py +47 -0
  57. orchestrator/services/processes.py +11 -16
  58. orchestrator/services/subscriptions.py +0 -4
  59. orchestrator/settings.py +29 -1
  60. orchestrator/targets.py +0 -1
  61. orchestrator/workflow.py +1 -8
  62. orchestrator/workflows/utils.py +1 -48
  63. {orchestrator_core-4.4.0rc2.dist-info → orchestrator_core-5.0.0a1.dist-info}/METADATA +6 -3
  64. {orchestrator_core-4.4.0rc2.dist-info → orchestrator_core-5.0.0a1.dist-info}/RECORD +66 -30
  65. orchestrator/graphql/resolvers/scheduled_tasks.py +0 -36
  66. orchestrator/graphql/schemas/scheduled_task.py +0 -8
  67. orchestrator/schedules/scheduler.py +0 -163
  68. {orchestrator_core-4.4.0rc2.dist-info → orchestrator_core-5.0.0a1.dist-info}/WHEEL +0 -0
  69. {orchestrator_core-4.4.0rc2.dist-info → orchestrator_core-5.0.0a1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,64 @@
1
+ from sqlalchemy import Select, String, cast, func, select
2
+
3
+ from orchestrator.db.models import AiSearchIndex
4
+ from orchestrator.search.core.types import EntityType, FilterOp
5
+ from orchestrator.search.filters import LtreeFilter
6
+ from orchestrator.search.schemas.parameters import BaseSearchParameters
7
+
8
+
9
+ def create_path_autocomplete_lquery(prefix: str) -> str:
10
+ """Create the lquery pattern for a multi-level path autocomplete search."""
11
+ return f"{prefix}*.*"
12
+
13
+
14
+ def build_candidate_query(params: BaseSearchParameters) -> Select:
15
+ """Build the base query for retrieving candidate entities.
16
+
17
+ Constructs a `SELECT` statement that retrieves distinct `entity_id` values
18
+ from the index table for the given entity type, applying any structured
19
+ filters from the provided search parameters.
20
+
21
+ Parameters
22
+ ----------
23
+ params : BaseSearchParameters
24
+ The search parameters containing the entity type and optional filters.
25
+
26
+ Returns:
27
+ -------
28
+ Select
29
+ The SQLAlchemy `Select` object representing the query.
30
+ """
31
+ stmt = select(AiSearchIndex.entity_id).where(AiSearchIndex.entity_type == params.entity_type.value).distinct()
32
+
33
+ if params.filters is not None:
34
+ entity_id_col = AiSearchIndex.entity_id
35
+ stmt = stmt.where(
36
+ params.filters.to_expression(
37
+ entity_id_col,
38
+ entity_type_value=params.entity_type.value,
39
+ )
40
+ )
41
+
42
+ return stmt
43
+
44
+
45
+ def build_paths_query(entity_type: EntityType, prefix: str | None = None, q: str | None = None) -> Select:
46
+ """Build the query for retrieving paths."""
47
+ stmt = select(AiSearchIndex.path, AiSearchIndex.value_type).where(AiSearchIndex.entity_type == entity_type.value)
48
+
49
+ if prefix:
50
+ lquery_pattern = create_path_autocomplete_lquery(prefix)
51
+ ltree_filter = LtreeFilter(op=FilterOp.MATCHES_LQUERY, value=lquery_pattern)
52
+ stmt = stmt.where(ltree_filter.to_expression(AiSearchIndex.path, path=""))
53
+
54
+ if q:
55
+ score = func.similarity(cast(AiSearchIndex.path, String), q).label("score")
56
+ stmt = (
57
+ stmt.add_columns(score)
58
+ .group_by(AiSearchIndex.path, AiSearchIndex.value_type, score)
59
+ .order_by(score.desc(), AiSearchIndex.path)
60
+ )
61
+ else:
62
+ stmt = stmt.group_by(AiSearchIndex.path, AiSearchIndex.value_type).order_by(AiSearchIndex.path)
63
+
64
+ return stmt
@@ -0,0 +1,96 @@
1
+ from collections.abc import Sequence
2
+
3
+ import structlog
4
+ from sqlalchemy.engine.row import RowMapping
5
+ from sqlalchemy.orm import Session
6
+
7
+ from orchestrator.search.schemas.parameters import BaseSearchParameters
8
+ from orchestrator.search.schemas.results import Highlight, SearchResponse, SearchResult
9
+
10
+ from .builder import build_candidate_query
11
+ from .ranker import Ranker
12
+ from .utils import generate_highlight_indices
13
+
14
+ logger = structlog.get_logger(__name__)
15
+
16
+
17
+ def _format_response(db_rows: Sequence[RowMapping], search_params: BaseSearchParameters) -> SearchResponse:
18
+ """Format database query results into a `SearchResponse`.
19
+
20
+ Converts raw SQLAlchemy `RowMapping` objects into `SearchResult` instances,
21
+ optionally generating highlight metadata if a fuzzy term is present.
22
+
23
+ Parameters
24
+ ----------
25
+ db_rows : Sequence[RowMapping]
26
+ The rows returned from the executed SQLAlchemy query.
27
+ search_params : BaseSearchParameters
28
+ The parameters used for the search, including any fuzzy term for highlighting.
29
+
30
+ Returns:
31
+ -------
32
+ SearchResponse
33
+ A list of `SearchResult` objects containing entity IDs, scores, and
34
+ optional highlight information.
35
+ """
36
+ response: SearchResponse = []
37
+ for row in db_rows:
38
+ highlight = None
39
+ if search_params.fuzzy_term and row.get("highlight_text"):
40
+ text = row.highlight_text
41
+ indices = generate_highlight_indices(text, search_params.fuzzy_term)
42
+ if indices:
43
+ highlight = Highlight(text=text, indices=indices)
44
+
45
+ response.append(
46
+ SearchResult(
47
+ entity_id=str(row.entity_id),
48
+ score=row.score,
49
+ highlight=highlight,
50
+ )
51
+ )
52
+ return response
53
+
54
+
55
+ async def execute_search(search_params: BaseSearchParameters, db_session: Session, limit: int = 5) -> SearchResponse:
56
+ """Execute a hybrid search and return ranked results.
57
+
58
+ Builds a candidate entity query based on the given search parameters,
59
+ applies the appropriate ranking strategy, and executes the final ranked
60
+ query to retrieve results.
61
+
62
+ Parameters
63
+ ----------
64
+ search_params : BaseSearchParameters
65
+ The search parameters specifying vector, fuzzy, or filter criteria.
66
+ db_session : Session
67
+ The active SQLAlchemy session for executing the query.
68
+ limit : int, optional
69
+ The maximum number of search results to return, by default 5.
70
+
71
+ Returns:
72
+ -------
73
+ SearchResponse
74
+ A list of `SearchResult` objects containing entity IDs, scores, and
75
+ optional highlight metadata.
76
+
77
+ Notes:
78
+ -----
79
+ If no vector query, filters, or fuzzy term are provided, a warning is logged
80
+ and an empty result set is returned.
81
+ """
82
+ if not search_params.vector_query and not search_params.filters and not search_params.fuzzy_term:
83
+ logger.warning("No search criteria provided (vector_query, fuzzy_term, or filters).")
84
+ return []
85
+
86
+ candidate_query = build_candidate_query(search_params)
87
+
88
+ ranker = await Ranker.from_params(search_params)
89
+ logger.debug("Using ranker", ranker_type=ranker.__class__.__name__)
90
+
91
+ final_stmt = ranker.apply(candidate_query)
92
+ final_stmt = final_stmt.limit(limit)
93
+ logger.debug(final_stmt)
94
+ result = db_session.execute(final_stmt).mappings().all()
95
+
96
+ return _format_response(result, search_params)
@@ -0,0 +1,202 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Any
3
+
4
+ import structlog
5
+ from sqlalchemy import Select, bindparam, case, func, literal, select
6
+ from sqlalchemy.sql.expression import ColumnElement
7
+
8
+ from orchestrator.db.models import AiSearchIndex
9
+ from orchestrator.search.core.embedding import QueryEmbedder
10
+ from orchestrator.search.schemas.parameters import BaseSearchParameters
11
+
12
+ logger = structlog.get_logger(__name__)
13
+ Index = AiSearchIndex
14
+
15
+
16
+ class Ranker(ABC):
17
+ """Abstract base class for applying a ranking strategy to a search query."""
18
+
19
+ @classmethod
20
+ async def from_params(cls, params: BaseSearchParameters, use_rrf: bool = True) -> "Ranker":
21
+ """Create the appropriate ranker instance from search parameters.
22
+
23
+ Parameters
24
+ ----------
25
+ params : BaseSearchParameters
26
+ Search parameters including vector queries, fuzzy terms, and filters.
27
+ use_rrf : bool, optional
28
+ Whether to use Reciprocal Rank Fusion for hybrid searches, by default True.
29
+
30
+ Returns:
31
+ -------
32
+ Ranker
33
+ A concrete ranker instance (semantic, fuzzy, hybrid, RRF hybrid, or structured).
34
+ """
35
+ vq, fq = params.vector_query, params.fuzzy_term
36
+ q_vec = None
37
+ if vq:
38
+ q_vec = await QueryEmbedder.generate_for_text_async(vq)
39
+ if not q_vec:
40
+ logger.warning("Embedding generation failed; using non-semantic ranker")
41
+ vq = None
42
+
43
+ if vq and fq and q_vec is not None:
44
+ return RrfHybridRanker(q_vec, fq) if use_rrf else HybridRanker(q_vec, fq)
45
+ if vq and q_vec is not None:
46
+ return SemanticRanker(q_vec)
47
+ if fq:
48
+ return FuzzyRanker(fq)
49
+ return StructuredRanker()
50
+
51
+ @abstractmethod
52
+ def apply(self, candidate_query: Select) -> Select:
53
+ """Apply the ranking logic to the given candidate query.
54
+
55
+ Parameters
56
+ ----------
57
+ candidate_query : Select
58
+ A SQLAlchemy `Select` statement returning candidate entity IDs.
59
+
60
+ Returns:
61
+ -------
62
+ Select
63
+ A new `Select` statement with ranking expressions applied.
64
+ """
65
+ ...
66
+
67
+
68
+ class StructuredRanker(Ranker):
69
+ """Applies a dummy score for purely structured searches with no text query."""
70
+
71
+ def apply(self, candidate_query: Select) -> Select:
72
+ cand = candidate_query.subquery()
73
+ return select(cand.c.entity_id, literal(1.0).label("score")).select_from(cand).order_by(cand.c.entity_id.asc())
74
+
75
+
76
+ class FuzzyRanker(Ranker):
77
+ """Ranks results based on the max of fuzzy text similarity scores."""
78
+
79
+ def __init__(self, fuzzy_term: str) -> None:
80
+ self.fuzzy_term = fuzzy_term
81
+
82
+ def apply(self, candidate_query: Select) -> Select:
83
+
84
+ cand = candidate_query.subquery()
85
+ score_expr = func.max(func.similarity(Index.value, self.fuzzy_term))
86
+
87
+ return (
88
+ select(Index.entity_id, score_expr.label("score"))
89
+ .select_from(Index)
90
+ .join(cand, cand.c.entity_id == Index.entity_id)
91
+ .group_by(Index.entity_id)
92
+ .order_by(score_expr.desc().nulls_last(), Index.entity_id.asc())
93
+ )
94
+
95
+
96
+ class SemanticRanker(Ranker):
97
+ """Ranks results based on the minimum semantic vector distance."""
98
+
99
+ def __init__(self, vector_query: list[float]) -> None:
100
+ self.vector_query = vector_query
101
+
102
+ def apply(self, candidate_query: Select) -> Select:
103
+ cand = candidate_query.subquery()
104
+
105
+ dist = Index.embedding.l2_distance(self.vector_query)
106
+ score_expr = func.min(dist).label("score")
107
+
108
+ return (
109
+ select(Index.entity_id, score_expr)
110
+ .select_from(Index)
111
+ .join(cand, cand.c.entity_id == Index.entity_id)
112
+ .where(Index.embedding.isnot(None))
113
+ .group_by(Index.entity_id)
114
+ .order_by(score_expr.asc().nulls_last(), Index.entity_id.asc())
115
+ )
116
+
117
+
118
+ class HybridRanker(Ranker):
119
+ """Ranks results by combining semantic distance and fuzzy similarity.
120
+
121
+ Prioritizes fuzzy score, using semantic score as a tie-breaker.
122
+ """
123
+
124
+ def __init__(self, q_vec: list[float], fuzzy_term: str) -> None:
125
+ self.q_vec = q_vec
126
+ self.fuzzy_term = fuzzy_term
127
+
128
+ def apply(self, candidate_query: Select) -> Select:
129
+ cand = candidate_query.subquery()
130
+
131
+ dist = Index.embedding.l2_distance(self.q_vec)
132
+ # Semantic: only consider rows where an embedding exists
133
+ sem_agg = func.min(dist).filter(Index.embedding.isnot(None))
134
+ # Fuzzy: consider all rows (strings, uuids, etc.)
135
+ fuzzy_agg = func.max(func.similarity(Index.value, self.fuzzy_term))
136
+
137
+ score = sem_agg.label("score")
138
+
139
+ return (
140
+ select(Index.entity_id, score)
141
+ .select_from(Index)
142
+ .join(cand, cand.c.entity_id == Index.entity_id)
143
+ .group_by(Index.entity_id)
144
+ .order_by(
145
+ fuzzy_agg.desc().nulls_last(),
146
+ sem_agg.asc().nulls_last(),
147
+ Index.entity_id.asc(),
148
+ )
149
+ )
150
+
151
+
152
+ class RrfHybridRanker(Ranker):
153
+ """Reciprocal Rank Fusion of semantic and fuzzy ranking."""
154
+
155
+ def __init__(self, q_vec: list[float], fuzzy_term: str, k: int = 60) -> None:
156
+ self.q_vec = q_vec
157
+ self.fuzzy_term = fuzzy_term
158
+ self.k = k
159
+
160
+ def apply(self, candidate_query: Select) -> Select:
161
+ cand = candidate_query.subquery()
162
+
163
+ # centroid over rows that have embeddings
164
+ q_param: ColumnElement[Any] = bindparam("q_vec", self.q_vec, type_=Index.embedding.type)
165
+ avg_vec = func.avg(Index.embedding).filter(Index.embedding.isnot(None))
166
+ sem_dist = avg_vec.op("<->")(q_param)
167
+
168
+ # fuzzy over ALL rows, substring-friendly for partial UUIDs
169
+ sim_base = func.similarity(Index.value, self.fuzzy_term)
170
+ sim_word = func.word_similarity(self.fuzzy_term, Index.value)
171
+ fuzzy_agg = func.max(func.greatest(sim_base, sim_word))
172
+
173
+ scores = (
174
+ select(
175
+ Index.entity_id,
176
+ sem_dist.label("semantic_distance"),
177
+ fuzzy_agg.label("fuzzy_score"),
178
+ )
179
+ .select_from(Index)
180
+ .join(cand, cand.c.entity_id == Index.entity_id)
181
+ .group_by(Index.entity_id)
182
+ .cte("scores")
183
+ )
184
+
185
+ ranked = select(
186
+ scores.c.entity_id,
187
+ scores.c.semantic_distance,
188
+ scores.c.fuzzy_score,
189
+ func.dense_rank().over(order_by=scores.c.semantic_distance.asc().nulls_last()).label("sem_rank"),
190
+ func.dense_rank().over(order_by=scores.c.fuzzy_score.desc().nulls_last()).label("fuzzy_rank"),
191
+ ).cte("ranked_results")
192
+
193
+ rrf = (1.0 / (self.k + ranked.c.sem_rank)) + (1.0 / (self.k + ranked.c.fuzzy_rank))
194
+ score_expr = rrf.label("score")
195
+ perfect = case((ranked.c.fuzzy_score >= 0.9, 0), else_=1)
196
+
197
+ return (
198
+ select(ranked.c.entity_id, score_expr)
199
+ .select_from(ranked)
200
+ .order_by(perfect.asc(), score_expr.desc(), ranked.c.entity_id.asc())
201
+ .params(q_vec=self.q_vec)
202
+ )
@@ -0,0 +1,88 @@
1
+ import json
2
+
3
+ import structlog
4
+ from sqlalchemy import and_
5
+ from sqlalchemy_utils.types.ltree import Ltree
6
+
7
+ from orchestrator.db.database import WrappedSession
8
+ from orchestrator.db.models import AiSearchIndex
9
+ from orchestrator.search.core.types import EntityType
10
+ from orchestrator.search.indexing.registry import ENTITY_CONFIG_REGISTRY
11
+ from orchestrator.search.schemas.parameters import BaseSearchParameters
12
+ from orchestrator.search.schemas.results import SearchResult
13
+
14
+ logger = structlog.get_logger(__name__)
15
+
16
+
17
+ def generate_highlight_indices(text: str, term: str) -> list[tuple[int, int]]:
18
+ if not text or not term:
19
+ return []
20
+ indices = []
21
+ start = text.lower().find(term.lower())
22
+ if start != -1:
23
+ end = start + len(term)
24
+ indices.append((start, end))
25
+ return indices
26
+
27
+
28
+ def display_filtered_paths_only(
29
+ results: list[SearchResult], search_params: BaseSearchParameters, db_session: WrappedSession
30
+ ) -> None:
31
+ """Display only the paths that were searched for in the results."""
32
+ if not results:
33
+ logger.info("No results found.")
34
+ return
35
+
36
+ logger.info("--- Search Results ---")
37
+
38
+ searched_paths = search_params.filters.get_all_paths() if search_params.filters else []
39
+ if not searched_paths:
40
+ return
41
+
42
+ for result in results:
43
+ for path in searched_paths:
44
+ record: AiSearchIndex | None = (
45
+ db_session.query(AiSearchIndex)
46
+ .filter(and_(AiSearchIndex.entity_id == result.entity_id, AiSearchIndex.path == Ltree(path)))
47
+ .first()
48
+ )
49
+
50
+ if record:
51
+ logger.info(f" {record.path}: {record.value}")
52
+
53
+ logger.info("-" * 40)
54
+
55
+
56
+ def display_results(
57
+ results: list[SearchResult],
58
+ db_session: WrappedSession,
59
+ score_label: str = "Score",
60
+ ) -> None:
61
+ """Finds the original DB record for each search result and logs its traversed fields."""
62
+ if not results:
63
+ logger.info("No results found.")
64
+ return
65
+
66
+ logger.info("--- Search Results ---")
67
+ for result in results:
68
+ entity_id = result.entity_id
69
+ score = result.score
70
+
71
+ index_records = db_session.query(AiSearchIndex).filter(AiSearchIndex.entity_id == entity_id).all()
72
+ if not index_records:
73
+ logger.warning(f"Could not find indexed records for entity_id={entity_id}")
74
+ continue
75
+
76
+ first_record = index_records[0]
77
+ kind = EntityType(first_record.entity_type)
78
+ config = ENTITY_CONFIG_REGISTRY[kind]
79
+
80
+ db_entity = db_session.get(config.table, entity_id) if config.table else None
81
+
82
+ if db_entity and config.traverser:
83
+ fields = config.traverser.get_fields(db_entity, config.pk_name, config.root_name)
84
+ result_obj = {p: v for p, v, _ in fields}
85
+ logger.info(json.dumps(result_obj, indent=2, default=str))
86
+ logger.info(f"{score_label}: {score:.4f}\n" + "-" * 20)
87
+ else:
88
+ logger.warning(f"Could not display entity {kind.value} with id={entity_id}")
@@ -0,0 +1,174 @@
1
+ from typing import assert_never
2
+
3
+ from sqlalchemy import select, text
4
+ from sqlalchemy.exc import ProgrammingError
5
+ from sqlalchemy_utils import Ltree
6
+
7
+ from orchestrator.db import db
8
+ from orchestrator.db.database import WrappedSession
9
+ from orchestrator.db.models import AiSearchIndex
10
+ from orchestrator.search.core.types import EntityType, FieldType
11
+ from orchestrator.search.filters import (
12
+ DateRangeFilter,
13
+ DateValueFilter,
14
+ EqualityFilter,
15
+ FilterCondition,
16
+ FilterTree,
17
+ LtreeFilter,
18
+ NumericRangeFilter,
19
+ NumericValueFilter,
20
+ PathFilter,
21
+ StringFilter,
22
+ )
23
+
24
+
25
+ def is_filter_compatible_with_field_type(filter_condition: FilterCondition, field_type: FieldType) -> bool:
26
+ """Check whether a filter condition is compatible with a given field type.
27
+
28
+ Parameters
29
+ ----------
30
+ filter_condition : FilterCondition
31
+ The filter condition instance to check.
32
+ field_type : FieldType
33
+ The type of field from the index schema.
34
+
35
+ Returns:
36
+ -------
37
+ bool
38
+ True if the filter condition is valid for the given field type, False otherwise.
39
+ """
40
+
41
+ match filter_condition:
42
+ case LtreeFilter():
43
+ return True # Filters for path only
44
+ case DateRangeFilter() | DateValueFilter():
45
+ return field_type == FieldType.DATETIME
46
+ case NumericRangeFilter() | NumericValueFilter():
47
+ return field_type in {FieldType.INTEGER, FieldType.FLOAT}
48
+ case StringFilter():
49
+ return field_type == FieldType.STRING
50
+ case EqualityFilter():
51
+ return field_type in {
52
+ FieldType.BOOLEAN,
53
+ FieldType.UUID,
54
+ FieldType.BLOCK,
55
+ FieldType.RESOURCE_TYPE,
56
+ FieldType.STRING,
57
+ }
58
+ case _:
59
+ assert_never(filter_condition)
60
+
61
+
62
+ def is_lquery_syntactically_valid(pattern: str, db_session: WrappedSession) -> bool:
63
+ """Validate whether a string is a syntactically correct `lquery` pattern.
64
+
65
+ Parameters
66
+ ----------
67
+ pattern : str
68
+ The LTree lquery pattern string to validate.
69
+ db_session : WrappedSession
70
+ The database session used to test casting.
71
+
72
+ Returns:
73
+ -------
74
+ bool
75
+ True if the pattern is valid, False if it fails to cast in PostgreSQL.
76
+ """
77
+ try:
78
+ with db_session.begin_nested():
79
+ db_session.execute(text("SELECT CAST(:pattern AS lquery)"), {"pattern": pattern})
80
+ return True
81
+ except ProgrammingError:
82
+ return False
83
+
84
+
85
+ def get_structured_filter_schema() -> dict[str, str]:
86
+ """Retrieve all distinct filterable paths and their field types from the index.
87
+
88
+ Returns:
89
+ -------
90
+ Dict[str, str]
91
+ Mapping of path strings to their corresponding field type values.
92
+ """
93
+ stmt = select(AiSearchIndex.path, AiSearchIndex.value_type).distinct().order_by(AiSearchIndex.path)
94
+ result = db.session.execute(stmt)
95
+ return {str(path): value_type.value for path, value_type in result}
96
+
97
+
98
+ def validate_filter_path(path: str) -> str | None:
99
+ """Check if a given path exists in the index and return its field type.
100
+
101
+ Parameters
102
+ ----------
103
+ path : str
104
+ The fully qualified LTree path.
105
+
106
+ Returns:
107
+ -------
108
+ Optional[str]
109
+ The value type of the field if found, otherwise None.
110
+ """
111
+ stmt = select(AiSearchIndex.value_type).where(AiSearchIndex.path == Ltree(path)).limit(1)
112
+ result = db.session.execute(stmt).scalar_one_or_none()
113
+ return result.value if result else None
114
+
115
+
116
+ async def complete_filter_validation(filter: PathFilter, entity_type: EntityType) -> None:
117
+ """Validate a PathFilter against the database schema and entity type.
118
+
119
+ Checks performed:
120
+ 1. LTree filter syntax (for LtreeFilter only)
121
+ 2. Non-empty path
122
+ 3. Path exists in the database schema
123
+ 4. Filter type matches the field's value_type
124
+ 5. Path starts with the correct entity type prefix (unless wildcard)
125
+
126
+ Parameters
127
+ ----------
128
+ filter : PathFilter
129
+ The filter to validate.
130
+ entity_type : EntityType
131
+ The entity type being searched.
132
+
133
+ Raises:
134
+ ------
135
+ ValueError
136
+ If any of the validation checks fail.
137
+ """
138
+ # Ltree is a special case
139
+ if isinstance(filter.condition, LtreeFilter):
140
+ lquery_pattern = filter.condition.value
141
+ if not is_lquery_syntactically_valid(lquery_pattern, db.session):
142
+ raise ValueError(f"Ltree pattern '{lquery_pattern}' has invalid syntax.")
143
+ return
144
+
145
+ if not filter.path or not filter.path.strip():
146
+ raise ValueError("Filter path cannot be empty")
147
+
148
+ # 1. Check if path exists in database
149
+ db_field_type_str = validate_filter_path(filter.path)
150
+ if db_field_type_str is None:
151
+ raise ValueError(f"Path '{filter.path}' does not exist in database schema")
152
+
153
+ db_field_type = FieldType(db_field_type_str)
154
+
155
+ # 2. Check filter compatibility with field type
156
+ if not is_filter_compatible_with_field_type(filter.condition, db_field_type):
157
+ raise ValueError(
158
+ f"Filter '{type(filter.condition).__name__}' not compatible with field type '{db_field_type.value}'"
159
+ )
160
+
161
+ # 3. Check entity type prefix requirements (unless it's a wildcard path)
162
+ expected_prefix = f"{entity_type.value.lower()}."
163
+ if not filter.path.startswith(expected_prefix) and not filter.path.startswith("*"):
164
+ raise ValueError(
165
+ f"Filter path '{filter.path}' must start with '{expected_prefix}' for {entity_type.value} searches."
166
+ )
167
+
168
+
169
+ async def validate_filter_tree(filters: FilterTree | None, entity_type: EntityType) -> None:
170
+ """Validate all PathFilter leaves in a FilterTree."""
171
+ if filters is None:
172
+ return
173
+ for leaf in filters.get_all_leaves():
174
+ await complete_filter_validation(leaf, entity_type)
File without changes