orchestrator-core 4.5.0a2__py3-none-any.whl → 4.5.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. orchestrator/__init__.py +1 -1
  2. orchestrator/api/api_v1/endpoints/search.py +13 -0
  3. orchestrator/devtools/populator.py +16 -0
  4. orchestrator/log_config.py +1 -0
  5. orchestrator/migrations/helpers.py +1 -1
  6. orchestrator/schemas/search.py +13 -0
  7. orchestrator/schemas/workflow.py +1 -0
  8. orchestrator/search/agent/__init__.py +13 -0
  9. orchestrator/search/agent/agent.py +13 -0
  10. orchestrator/search/agent/prompts.py +13 -0
  11. orchestrator/search/agent/state.py +13 -0
  12. orchestrator/search/agent/tools.py +27 -5
  13. orchestrator/search/core/__init__.py +12 -0
  14. orchestrator/search/core/embedding.py +13 -4
  15. orchestrator/search/core/exceptions.py +14 -0
  16. orchestrator/search/core/types.py +15 -0
  17. orchestrator/search/core/validators.py +13 -0
  18. orchestrator/search/filters/__init__.py +13 -0
  19. orchestrator/search/filters/base.py +23 -18
  20. orchestrator/search/filters/date_filters.py +13 -0
  21. orchestrator/search/filters/definitions.py +16 -2
  22. orchestrator/search/filters/ltree_filters.py +16 -3
  23. orchestrator/search/filters/numeric_filter.py +13 -0
  24. orchestrator/search/indexing/__init__.py +13 -0
  25. orchestrator/search/indexing/indexer.py +13 -0
  26. orchestrator/search/indexing/registry.py +13 -0
  27. orchestrator/search/indexing/tasks.py +13 -0
  28. orchestrator/search/indexing/traverse.py +17 -5
  29. orchestrator/search/retrieval/__init__.py +13 -0
  30. orchestrator/search/retrieval/builder.py +17 -7
  31. orchestrator/search/retrieval/engine.py +35 -29
  32. orchestrator/search/retrieval/exceptions.py +90 -0
  33. orchestrator/search/retrieval/pagination.py +13 -0
  34. orchestrator/search/retrieval/retrievers/__init__.py +26 -0
  35. orchestrator/search/retrieval/retrievers/base.py +122 -0
  36. orchestrator/search/retrieval/retrievers/fuzzy.py +94 -0
  37. orchestrator/search/retrieval/retrievers/hybrid.py +188 -0
  38. orchestrator/search/retrieval/retrievers/semantic.py +94 -0
  39. orchestrator/search/retrieval/retrievers/structured.py +39 -0
  40. orchestrator/search/retrieval/utils.py +21 -7
  41. orchestrator/search/retrieval/validation.py +54 -76
  42. orchestrator/search/schemas/__init__.py +12 -0
  43. orchestrator/search/schemas/parameters.py +13 -0
  44. orchestrator/search/schemas/results.py +14 -1
  45. orchestrator/workflows/tasks/validate_products.py +1 -1
  46. {orchestrator_core-4.5.0a2.dist-info → orchestrator_core-4.5.0a3.dist-info}/METADATA +2 -2
  47. {orchestrator_core-4.5.0a2.dist-info → orchestrator_core-4.5.0a3.dist-info}/RECORD +49 -43
  48. orchestrator/search/retrieval/retriever.py +0 -447
  49. {orchestrator_core-4.5.0a2.dist-info → orchestrator_core-4.5.0a3.dist-info}/WHEEL +0 -0
  50. {orchestrator_core-4.5.0a2.dist-info → orchestrator_core-4.5.0a3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,188 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ from sqlalchemy import BindParameter, Select, and_, bindparam, case, cast, func, literal, or_, select
15
+ from sqlalchemy.sql.expression import ColumnElement
16
+
17
+ from orchestrator.db.models import AiSearchIndex
18
+ from orchestrator.search.core.types import SearchMetadata
19
+
20
+ from ..pagination import PaginationParams
21
+ from .base import Retriever
22
+
23
+
24
+ class RrfHybridRetriever(Retriever):
25
+ """Reciprocal Rank Fusion of semantic and fuzzy ranking with parent-child retrieval."""
26
+
27
+ def __init__(
28
+ self,
29
+ q_vec: list[float],
30
+ fuzzy_term: str,
31
+ pagination_params: PaginationParams,
32
+ k: int = 60,
33
+ field_candidates_limit: int = 100,
34
+ ) -> None:
35
+ self.q_vec = q_vec
36
+ self.fuzzy_term = fuzzy_term
37
+ self.page_after_score = pagination_params.page_after_score
38
+ self.page_after_id = pagination_params.page_after_id
39
+ self.k = k
40
+ self.field_candidates_limit = field_candidates_limit
41
+
42
+ def apply(self, candidate_query: Select) -> Select:
43
+ cand = candidate_query.subquery()
44
+ q_param: BindParameter[list[float]] = bindparam("q_vec", self.q_vec, type_=AiSearchIndex.embedding.type)
45
+
46
+ best_similarity = func.word_similarity(self.fuzzy_term, AiSearchIndex.value)
47
+ sem_expr = case(
48
+ (AiSearchIndex.embedding.is_(None), None),
49
+ else_=AiSearchIndex.embedding.op("<->")(q_param),
50
+ )
51
+ sem_val = func.coalesce(sem_expr, literal(1.0)).label("semantic_distance")
52
+
53
+ filter_condition = literal(self.fuzzy_term).op("<%")(AiSearchIndex.value)
54
+
55
+ field_candidates = (
56
+ select(
57
+ AiSearchIndex.entity_id,
58
+ AiSearchIndex.path,
59
+ AiSearchIndex.value,
60
+ sem_val,
61
+ best_similarity.label("fuzzy_score"),
62
+ )
63
+ .select_from(AiSearchIndex)
64
+ .join(cand, cand.c.entity_id == AiSearchIndex.entity_id)
65
+ .where(
66
+ and_(
67
+ AiSearchIndex.value_type.in_(self.SEARCHABLE_FIELD_TYPES),
68
+ filter_condition,
69
+ )
70
+ )
71
+ .order_by(
72
+ best_similarity.desc().nulls_last(),
73
+ sem_expr.asc().nulls_last(),
74
+ AiSearchIndex.entity_id.asc(),
75
+ )
76
+ .limit(self.field_candidates_limit)
77
+ ).cte("field_candidates")
78
+
79
+ entity_scores = (
80
+ select(
81
+ field_candidates.c.entity_id,
82
+ func.avg(field_candidates.c.semantic_distance).label("avg_semantic_distance"),
83
+ func.avg(field_candidates.c.fuzzy_score).label("avg_fuzzy_score"),
84
+ ).group_by(field_candidates.c.entity_id)
85
+ ).cte("entity_scores")
86
+
87
+ entity_highlights = (
88
+ select(
89
+ field_candidates.c.entity_id,
90
+ func.first_value(field_candidates.c.value)
91
+ .over(
92
+ partition_by=field_candidates.c.entity_id,
93
+ order_by=[field_candidates.c.fuzzy_score.desc(), field_candidates.c.path.asc()],
94
+ )
95
+ .label(self.HIGHLIGHT_TEXT_LABEL),
96
+ func.first_value(field_candidates.c.path)
97
+ .over(
98
+ partition_by=field_candidates.c.entity_id,
99
+ order_by=[field_candidates.c.fuzzy_score.desc(), field_candidates.c.path.asc()],
100
+ )
101
+ .label(self.HIGHLIGHT_PATH_LABEL),
102
+ ).distinct(field_candidates.c.entity_id)
103
+ ).cte("entity_highlights")
104
+
105
+ ranked = (
106
+ select(
107
+ entity_scores.c.entity_id,
108
+ entity_scores.c.avg_semantic_distance,
109
+ entity_scores.c.avg_fuzzy_score,
110
+ entity_highlights.c.highlight_text,
111
+ entity_highlights.c.highlight_path,
112
+ func.dense_rank()
113
+ .over(
114
+ order_by=[entity_scores.c.avg_semantic_distance.asc().nulls_last(), entity_scores.c.entity_id.asc()]
115
+ )
116
+ .label("sem_rank"),
117
+ func.dense_rank()
118
+ .over(order_by=[entity_scores.c.avg_fuzzy_score.desc().nulls_last(), entity_scores.c.entity_id.asc()])
119
+ .label("fuzzy_rank"),
120
+ ).select_from(
121
+ entity_scores.join(entity_highlights, entity_scores.c.entity_id == entity_highlights.c.entity_id)
122
+ )
123
+ ).cte("ranked_results")
124
+
125
+ # RRF (rank-based)
126
+ rrf_raw = (1.0 / (self.k + ranked.c.sem_rank)) + (1.0 / (self.k + ranked.c.fuzzy_rank))
127
+ rrf_num = cast(rrf_raw, self.SCORE_NUMERIC_TYPE)
128
+
129
+ # Perfect flag to boost near perfect fuzzy matches as this most likely indicates the desired record.
130
+ perfect = case((ranked.c.avg_fuzzy_score >= 0.9, 1), else_=0).label("perfect_match")
131
+
132
+ # Dynamic beta based on k (and number of sources)
133
+ # rrf_max = n_sources / (k + 1)
134
+ k_num = literal(float(self.k), type_=self.SCORE_NUMERIC_TYPE)
135
+ n_sources = literal(2.0, type_=self.SCORE_NUMERIC_TYPE) # semantic + fuzzy
136
+ rrf_max = n_sources / (k_num + literal(1.0, type_=self.SCORE_NUMERIC_TYPE))
137
+
138
+ # Choose a small positive margin above rrf_max to ensure strict separation
139
+ # Keep it small to avoid compressing perfects near 1 after normalization
140
+ margin = rrf_max * literal(0.05, type_=self.SCORE_NUMERIC_TYPE) # 5% above bound
141
+ beta = rrf_max + margin
142
+
143
+ fused_num = rrf_num + beta * cast(perfect, self.SCORE_NUMERIC_TYPE)
144
+
145
+ # Normalize to [0,1] via the theoretical max (beta + rrf_max)
146
+ norm_den = beta + rrf_max
147
+ normalized_score = fused_num / norm_den
148
+
149
+ score = cast(
150
+ func.round(cast(normalized_score, self.SCORE_NUMERIC_TYPE), self.SCORE_PRECISION),
151
+ self.SCORE_NUMERIC_TYPE,
152
+ ).label(self.SCORE_LABEL)
153
+
154
+ stmt = select(
155
+ ranked.c.entity_id,
156
+ score,
157
+ ranked.c.highlight_text,
158
+ ranked.c.highlight_path,
159
+ perfect.label("perfect_match"),
160
+ ).select_from(ranked)
161
+
162
+ stmt = self._apply_fused_pagination(stmt, score, ranked.c.entity_id)
163
+
164
+ return stmt.order_by(
165
+ score.desc().nulls_last(),
166
+ ranked.c.entity_id.asc(),
167
+ ).params(q_vec=self.q_vec)
168
+
169
+ def _apply_fused_pagination(
170
+ self,
171
+ stmt: Select,
172
+ score_column: ColumnElement,
173
+ entity_id_column: ColumnElement,
174
+ ) -> Select:
175
+ """Keyset paginate by fused score + id."""
176
+ if self.page_after_score is not None and self.page_after_id is not None:
177
+ score_param = self._quantize_score_for_pagination(self.page_after_score)
178
+ stmt = stmt.where(
179
+ or_(
180
+ score_column < score_param,
181
+ and_(score_column == score_param, entity_id_column > self.page_after_id),
182
+ )
183
+ )
184
+ return stmt
185
+
186
+ @property
187
+ def metadata(self) -> SearchMetadata:
188
+ return SearchMetadata.hybrid()
@@ -0,0 +1,94 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ from sqlalchemy import Select, and_, cast, func, literal, or_, select
15
+ from sqlalchemy.sql.expression import ColumnElement
16
+
17
+ from orchestrator.db.models import AiSearchIndex
18
+ from orchestrator.search.core.types import SearchMetadata
19
+
20
+ from ..pagination import PaginationParams
21
+ from .base import Retriever
22
+
23
+
24
+ class SemanticRetriever(Retriever):
25
+ """Ranks results based on the minimum semantic vector distance."""
26
+
27
+ def __init__(self, vector_query: list[float], pagination_params: PaginationParams) -> None:
28
+ self.vector_query = vector_query
29
+ self.page_after_score = pagination_params.page_after_score
30
+ self.page_after_id = pagination_params.page_after_id
31
+
32
+ def apply(self, candidate_query: Select) -> Select:
33
+ cand = candidate_query.subquery()
34
+
35
+ dist = AiSearchIndex.embedding.l2_distance(self.vector_query)
36
+
37
+ raw_min = func.min(dist).over(partition_by=AiSearchIndex.entity_id)
38
+
39
+ # Normalize score to preserve ordering in accordance with other retrievers:
40
+ # smaller distance = higher score
41
+ similarity = literal(1.0, type_=self.SCORE_NUMERIC_TYPE) / (
42
+ literal(1.0, type_=self.SCORE_NUMERIC_TYPE) + cast(raw_min, self.SCORE_NUMERIC_TYPE)
43
+ )
44
+
45
+ score = cast(
46
+ func.round(cast(similarity, self.SCORE_NUMERIC_TYPE), self.SCORE_PRECISION), self.SCORE_NUMERIC_TYPE
47
+ ).label(self.SCORE_LABEL)
48
+
49
+ combined_query = (
50
+ select(
51
+ AiSearchIndex.entity_id,
52
+ score,
53
+ func.first_value(AiSearchIndex.value)
54
+ .over(partition_by=AiSearchIndex.entity_id, order_by=[dist.asc(), AiSearchIndex.path.asc()])
55
+ .label(self.HIGHLIGHT_TEXT_LABEL),
56
+ func.first_value(AiSearchIndex.path)
57
+ .over(partition_by=AiSearchIndex.entity_id, order_by=[dist.asc(), AiSearchIndex.path.asc()])
58
+ .label(self.HIGHLIGHT_PATH_LABEL),
59
+ )
60
+ .select_from(AiSearchIndex)
61
+ .join(cand, cand.c.entity_id == AiSearchIndex.entity_id)
62
+ .where(AiSearchIndex.embedding.isnot(None))
63
+ .distinct(AiSearchIndex.entity_id)
64
+ )
65
+ final_query = combined_query.subquery("ranked_semantic")
66
+
67
+ stmt = select(
68
+ final_query.c.entity_id,
69
+ final_query.c.score,
70
+ final_query.c.highlight_text,
71
+ final_query.c.highlight_path,
72
+ ).select_from(final_query)
73
+
74
+ stmt = self._apply_semantic_pagination(stmt, final_query.c.score, final_query.c.entity_id)
75
+
76
+ return stmt.order_by(final_query.c.score.desc().nulls_last(), final_query.c.entity_id.asc())
77
+
78
+ @property
79
+ def metadata(self) -> SearchMetadata:
80
+ return SearchMetadata.semantic()
81
+
82
+ def _apply_semantic_pagination(
83
+ self, stmt: Select, score_column: ColumnElement, entity_id_column: ColumnElement
84
+ ) -> Select:
85
+ """Apply semantic score pagination with precise Decimal handling."""
86
+ if self.page_after_score is not None and self.page_after_id is not None:
87
+ score_param = self._quantize_score_for_pagination(self.page_after_score)
88
+ stmt = stmt.where(
89
+ or_(
90
+ score_column < score_param,
91
+ and_(score_column == score_param, entity_id_column > self.page_after_id),
92
+ )
93
+ )
94
+ return stmt
@@ -0,0 +1,39 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ from sqlalchemy import Select, literal, select
15
+
16
+ from orchestrator.search.core.types import SearchMetadata
17
+
18
+ from ..pagination import PaginationParams
19
+ from .base import Retriever
20
+
21
+
22
+ class StructuredRetriever(Retriever):
23
+ """Applies a dummy score for purely structured searches with no text query."""
24
+
25
+ def __init__(self, pagination_params: PaginationParams) -> None:
26
+ self.page_after_id = pagination_params.page_after_id
27
+
28
+ def apply(self, candidate_query: Select) -> Select:
29
+ cand = candidate_query.subquery()
30
+ stmt = select(cand.c.entity_id, literal(1.0).label("score")).select_from(cand)
31
+
32
+ if self.page_after_id:
33
+ stmt = stmt.where(cand.c.entity_id > self.page_after_id)
34
+
35
+ return stmt.order_by(cand.c.entity_id.asc())
36
+
37
+ @property
38
+ def metadata(self) -> SearchMetadata:
39
+ return SearchMetadata.structured()
@@ -1,3 +1,16 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
1
14
  import json
2
15
  import re
3
16
 
@@ -16,7 +29,7 @@ logger = structlog.get_logger(__name__)
16
29
 
17
30
 
18
31
  def generate_highlight_indices(text: str, term: str) -> list[tuple[int, int]]:
19
- """Finds all occurrences of individual words from the term with word boundary matching case-insensitively."""
32
+ """Finds all occurrences of individual words from the term, including both word boundary and substring matches."""
20
33
  if not text or not term:
21
34
  return []
22
35
 
@@ -24,14 +37,15 @@ def generate_highlight_indices(text: str, term: str) -> list[tuple[int, int]]:
24
37
  words = [w.strip() for w in term.split() if w.strip()]
25
38
 
26
39
  for word in words:
40
+ # First find word boundary matches
27
41
  word_boundary_pattern = rf"\b{re.escape(word)}\b"
28
- matches = list(re.finditer(word_boundary_pattern, text, re.IGNORECASE))
29
-
30
- if not matches:
31
- substring_pattern = re.escape(word)
32
- matches = list(re.finditer(substring_pattern, text, re.IGNORECASE))
42
+ word_matches = list(re.finditer(word_boundary_pattern, text, re.IGNORECASE))
43
+ all_matches.extend([(m.start(), m.end()) for m in word_matches])
33
44
 
34
- all_matches.extend([(m.start(), m.end()) for m in matches])
45
+ # Then find all substring matches
46
+ substring_pattern = re.escape(word)
47
+ substring_matches = list(re.finditer(substring_pattern, text, re.IGNORECASE))
48
+ all_matches.extend([(m.start(), m.end()) for m in substring_matches])
35
49
 
36
50
  return sorted(set(all_matches))
37
51
 
@@ -1,4 +1,15 @@
1
- from typing import assert_never
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
2
13
 
3
14
  from sqlalchemy import select, text
4
15
  from sqlalchemy.exc import ProgrammingError
@@ -8,72 +19,48 @@ from orchestrator.db import db
8
19
  from orchestrator.db.database import WrappedSession
9
20
  from orchestrator.db.models import AiSearchIndex
10
21
  from orchestrator.search.core.types import EntityType, FieldType
11
- from orchestrator.search.filters import (
12
- DateRangeFilter,
13
- DateValueFilter,
14
- EqualityFilter,
15
- FilterCondition,
16
- FilterTree,
17
- LtreeFilter,
18
- NumericRangeFilter,
19
- NumericValueFilter,
20
- PathFilter,
21
- StringFilter,
22
+ from orchestrator.search.filters import FilterCondition, FilterTree, LtreeFilter, PathFilter
23
+ from orchestrator.search.filters.definitions import operators_for
24
+ from orchestrator.search.retrieval.exceptions import (
25
+ EmptyFilterPathError,
26
+ IncompatibleFilterTypeError,
27
+ InvalidEntityPrefixError,
28
+ InvalidLtreePatternError,
29
+ PathNotFoundError,
22
30
  )
23
31
 
24
32
 
25
33
  def is_filter_compatible_with_field_type(filter_condition: FilterCondition, field_type: FieldType) -> bool:
26
34
  """Check whether a filter condition is compatible with a given field type.
27
35
 
28
- Parameters
29
- ----------
30
- filter_condition : FilterCondition
31
- The filter condition instance to check.
32
- field_type : FieldType
33
- The type of field from the index schema.
36
+ Args:
37
+ filter_condition (FilterCondition): The filter condition instance to check.
38
+ field_type (FieldType): The type of field from the index schema.
34
39
 
35
40
  Returns:
36
- -------
37
- bool
38
- True if the filter condition is valid for the given field type, False otherwise.
41
+ bool: True if the filter condition is valid for the given field type, False otherwise.
39
42
  """
40
43
 
41
- match filter_condition:
42
- case LtreeFilter():
43
- return True # Filters for path only
44
- case DateRangeFilter() | DateValueFilter():
45
- return field_type == FieldType.DATETIME
46
- case NumericRangeFilter() | NumericValueFilter():
47
- return field_type in {FieldType.INTEGER, FieldType.FLOAT}
48
- case StringFilter():
49
- return field_type == FieldType.STRING
50
- case EqualityFilter():
51
- return field_type in {
52
- FieldType.BOOLEAN,
53
- FieldType.UUID,
54
- FieldType.BLOCK,
55
- FieldType.RESOURCE_TYPE,
56
- FieldType.STRING,
57
- }
58
- case _:
59
- assert_never(filter_condition)
44
+ # LtreeFilter is for path filtering only and is thus compatible with all field types.
45
+ if isinstance(filter_condition, LtreeFilter):
46
+ return True
47
+
48
+ # Get valid operators for this field type and check if the filter's operator is valid.
49
+ valid_operators = operators_for(field_type)
50
+ return filter_condition.op in valid_operators
60
51
 
61
52
 
62
53
  def is_lquery_syntactically_valid(pattern: str, db_session: WrappedSession) -> bool:
63
54
  """Validate whether a string is a syntactically correct `lquery` pattern.
64
55
 
65
- Parameters
66
- ----------
67
- pattern : str
68
- The LTree lquery pattern string to validate.
69
- db_session : WrappedSession
70
- The database session used to test casting.
56
+ Args:
57
+ pattern (str): The LTree lquery pattern string to validate.
58
+ db_session (WrappedSession): The database session used to test casting.
71
59
 
72
60
  Returns:
73
- -------
74
- bool
75
- True if the pattern is valid, False if it fails to cast in PostgreSQL.
61
+ bool: True if the pattern is valid, False if it fails to cast in PostgreSQL.
76
62
  """
63
+
77
64
  try:
78
65
  with db_session.begin_nested():
79
66
  db_session.execute(text("SELECT CAST(:pattern AS lquery)"), {"pattern": pattern})
@@ -86,10 +73,9 @@ def get_structured_filter_schema() -> dict[str, str]:
86
73
  """Retrieve all distinct filterable paths and their field types from the index.
87
74
 
88
75
  Returns:
89
- -------
90
- Dict[str, str]
91
- Mapping of path strings to their corresponding field type values.
76
+ Dict[str, str]: Mapping of path strings to their corresponding field type values.
92
77
  """
78
+
93
79
  stmt = select(AiSearchIndex.path, AiSearchIndex.value_type).distinct().order_by(AiSearchIndex.path)
94
80
  result = db.session.execute(stmt)
95
81
  return {str(path): value_type.value for path, value_type in result}
@@ -98,16 +84,13 @@ def get_structured_filter_schema() -> dict[str, str]:
98
84
  def validate_filter_path(path: str) -> str | None:
99
85
  """Check if a given path exists in the index and return its field type.
100
86
 
101
- Parameters
102
- ----------
103
- path : str
104
- The fully qualified LTree path.
87
+ Args:
88
+ path (str): The fully qualified LTree path.
105
89
 
106
90
  Returns:
107
- -------
108
- Optional[str]
109
- The value type of the field if found, otherwise None.
91
+ Optional[str]: The value type of the field if found, otherwise None.
110
92
  """
93
+
111
94
  stmt = select(AiSearchIndex.value_type).where(AiSearchIndex.path == Ltree(path)).limit(1)
112
95
  result = db.session.execute(stmt).scalar_one_or_none()
113
96
  return result.value if result else None
@@ -123,47 +106,42 @@ async def complete_filter_validation(filter: PathFilter, entity_type: EntityType
123
106
  4. Filter type matches the field's value_type
124
107
  5. Path starts with the correct entity type prefix (unless wildcard)
125
108
 
126
- Parameters
127
- ----------
128
- filter : PathFilter
129
- The filter to validate.
130
- entity_type : EntityType
131
- The entity type being searched.
109
+ Args:
110
+ filter (PathFilter): The filter to validate.
111
+ entity_type (EntityType): The entity type being searched.
132
112
 
133
113
  Raises:
134
- ------
135
- ValueError
136
- If any of the validation checks fail.
114
+ ValueError: If any of the validation checks fail.
137
115
  """
116
+
138
117
  # Ltree is a special case
139
118
  if isinstance(filter.condition, LtreeFilter):
140
119
  lquery_pattern = filter.condition.value
141
120
  if not is_lquery_syntactically_valid(lquery_pattern, db.session):
142
- raise ValueError(f"Ltree pattern '{lquery_pattern}' has invalid syntax.")
121
+ raise InvalidLtreePatternError(lquery_pattern)
143
122
  return
144
123
 
145
124
  if not filter.path or not filter.path.strip():
146
- raise ValueError("Filter path cannot be empty")
125
+ raise EmptyFilterPathError()
147
126
 
148
127
  # 1. Check if path exists in database
149
128
  db_field_type_str = validate_filter_path(filter.path)
150
129
  if db_field_type_str is None:
151
- raise ValueError(f"Path '{filter.path}' does not exist in database schema")
130
+ raise PathNotFoundError(filter.path)
152
131
 
153
132
  db_field_type = FieldType(db_field_type_str)
154
133
 
155
134
  # 2. Check filter compatibility with field type
156
135
  if not is_filter_compatible_with_field_type(filter.condition, db_field_type):
157
- raise ValueError(
158
- f"Filter '{type(filter.condition).__name__}' not compatible with field type '{db_field_type.value}'"
136
+ expected_operators = operators_for(db_field_type)
137
+ raise IncompatibleFilterTypeError(
138
+ filter.condition.op.value, db_field_type.value, filter.path, expected_operators
159
139
  )
160
140
 
161
141
  # 3. Check entity type prefix requirements (unless it's a wildcard path)
162
142
  expected_prefix = f"{entity_type.value.lower()}."
163
143
  if not filter.path.startswith(expected_prefix) and not filter.path.startswith("*"):
164
- raise ValueError(
165
- f"Filter path '{filter.path}' must start with '{expected_prefix}' for {entity_type.value} searches."
166
- )
144
+ raise InvalidEntityPrefixError(filter.path, expected_prefix, entity_type.value)
167
145
 
168
146
 
169
147
  async def validate_filter_tree(filters: FilterTree | None, entity_type: EntityType) -> None:
@@ -0,0 +1,12 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
@@ -1,3 +1,16 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
1
14
  import uuid
2
15
  from typing import Any, Literal
3
16
 
@@ -1,3 +1,16 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
1
14
  from typing import Literal
2
15
 
3
16
  from pydantic import BaseModel, ConfigDict
@@ -59,6 +72,6 @@ class ComponentInfo(BaseModel):
59
72
 
60
73
  class TypeDefinition(BaseModel):
61
74
  operators: list[FilterOp]
62
- valueSchema: dict[FilterOp, ValueSchema]
75
+ value_schema: dict[FilterOp, ValueSchema]
63
76
 
64
77
  model_config = ConfigDict(use_enum_values=True)
@@ -105,7 +105,7 @@ def check_that_products_have_create_modify_and_terminate_workflows() -> State:
105
105
  product_data = get_products(filters=[ProductTable.status == "active"])
106
106
 
107
107
  workflows_not_complete: list = []
108
- targets = ["CREATE", "TERMINATE", "MODIFY", "VALIDATE"]
108
+ targets = ["CREATE", "TERMINATE", "MODIFY", "RECONCILE", "VALIDATE"]
109
109
  for product in product_data:
110
110
  workflows = {c.target for c in product.workflows if c.target in targets and c.name != "modify_note"}
111
111
  if len(workflows) < len(targets):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: orchestrator-core
3
- Version: 4.5.0a2
3
+ Version: 4.5.0a3
4
4
  Summary: This is the orchestrator workflow engine.
5
5
  Author-email: SURF <automation-beheer@surf.nl>
6
6
  Requires-Python: >=3.11,<3.14
@@ -47,7 +47,7 @@ Requires-Dist: orjson==3.10.18
47
47
  Requires-Dist: pgvector>=0.4.1
48
48
  Requires-Dist: prometheus-client==0.22.1
49
49
  Requires-Dist: psycopg2-binary==2.9.10
50
- Requires-Dist: pydantic-forms>=1.4.0,<=2.1.0
50
+ Requires-Dist: pydantic-forms>=1.4.0
51
51
  Requires-Dist: pydantic-settings~=2.9.1
52
52
  Requires-Dist: pydantic[email]~=2.11.0
53
53
  Requires-Dist: python-dateutil==2.8.2