orchestrator-core 4.4.1__py3-none-any.whl → 4.5.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. orchestrator/__init__.py +26 -2
  2. orchestrator/agentic_app.py +84 -0
  3. orchestrator/api/api_v1/api.py +10 -0
  4. orchestrator/api/api_v1/endpoints/search.py +290 -0
  5. orchestrator/app.py +32 -0
  6. orchestrator/cli/index_llm.py +73 -0
  7. orchestrator/cli/main.py +22 -1
  8. orchestrator/cli/resize_embedding.py +135 -0
  9. orchestrator/cli/search_explore.py +208 -0
  10. orchestrator/cli/speedtest.py +151 -0
  11. orchestrator/db/models.py +37 -1
  12. orchestrator/devtools/populator.py +16 -0
  13. orchestrator/llm_settings.py +51 -0
  14. orchestrator/log_config.py +1 -0
  15. orchestrator/migrations/helpers.py +1 -1
  16. orchestrator/migrations/versions/schema/2025-08-12_52b37b5b2714_search_index_model_for_llm_integration.py +95 -0
  17. orchestrator/schemas/search.py +130 -0
  18. orchestrator/schemas/workflow.py +1 -0
  19. orchestrator/search/__init__.py +12 -0
  20. orchestrator/search/agent/__init__.py +21 -0
  21. orchestrator/search/agent/agent.py +60 -0
  22. orchestrator/search/agent/prompts.py +100 -0
  23. orchestrator/search/agent/state.py +21 -0
  24. orchestrator/search/agent/tools.py +258 -0
  25. orchestrator/search/core/__init__.py +12 -0
  26. orchestrator/search/core/embedding.py +73 -0
  27. orchestrator/search/core/exceptions.py +36 -0
  28. orchestrator/search/core/types.py +296 -0
  29. orchestrator/search/core/validators.py +40 -0
  30. orchestrator/search/docs/index.md +37 -0
  31. orchestrator/search/docs/running_local_text_embedding_inference.md +45 -0
  32. orchestrator/search/filters/__init__.py +40 -0
  33. orchestrator/search/filters/base.py +280 -0
  34. orchestrator/search/filters/date_filters.py +88 -0
  35. orchestrator/search/filters/definitions.py +107 -0
  36. orchestrator/search/filters/ltree_filters.py +56 -0
  37. orchestrator/search/filters/numeric_filter.py +73 -0
  38. orchestrator/search/indexing/__init__.py +16 -0
  39. orchestrator/search/indexing/indexer.py +336 -0
  40. orchestrator/search/indexing/registry.py +101 -0
  41. orchestrator/search/indexing/tasks.py +66 -0
  42. orchestrator/search/indexing/traverse.py +334 -0
  43. orchestrator/search/retrieval/__init__.py +16 -0
  44. orchestrator/search/retrieval/builder.py +123 -0
  45. orchestrator/search/retrieval/engine.py +158 -0
  46. orchestrator/search/retrieval/exceptions.py +90 -0
  47. orchestrator/search/retrieval/pagination.py +96 -0
  48. orchestrator/search/retrieval/retrievers/__init__.py +26 -0
  49. orchestrator/search/retrieval/retrievers/base.py +122 -0
  50. orchestrator/search/retrieval/retrievers/fuzzy.py +94 -0
  51. orchestrator/search/retrieval/retrievers/hybrid.py +188 -0
  52. orchestrator/search/retrieval/retrievers/semantic.py +94 -0
  53. orchestrator/search/retrieval/retrievers/structured.py +39 -0
  54. orchestrator/search/retrieval/utils.py +120 -0
  55. orchestrator/search/retrieval/validation.py +152 -0
  56. orchestrator/search/schemas/__init__.py +12 -0
  57. orchestrator/search/schemas/parameters.py +129 -0
  58. orchestrator/search/schemas/results.py +77 -0
  59. orchestrator/services/settings_env_variables.py +2 -2
  60. orchestrator/settings.py +1 -1
  61. orchestrator/workflows/tasks/validate_products.py +1 -1
  62. {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0a3.dist-info}/METADATA +9 -4
  63. {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0a3.dist-info}/RECORD +65 -16
  64. {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0a3.dist-info}/WHEEL +0 -0
  65. {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0a3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,94 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ from sqlalchemy import Select, and_, cast, func, literal, or_, select
15
+ from sqlalchemy.sql.expression import ColumnElement
16
+
17
+ from orchestrator.db.models import AiSearchIndex
18
+ from orchestrator.search.core.types import SearchMetadata
19
+
20
+ from ..pagination import PaginationParams
21
+ from .base import Retriever
22
+
23
+
24
+ class SemanticRetriever(Retriever):
25
+ """Ranks results based on the minimum semantic vector distance."""
26
+
27
+ def __init__(self, vector_query: list[float], pagination_params: PaginationParams) -> None:
28
+ self.vector_query = vector_query
29
+ self.page_after_score = pagination_params.page_after_score
30
+ self.page_after_id = pagination_params.page_after_id
31
+
32
+ def apply(self, candidate_query: Select) -> Select:
33
+ cand = candidate_query.subquery()
34
+
35
+ dist = AiSearchIndex.embedding.l2_distance(self.vector_query)
36
+
37
+ raw_min = func.min(dist).over(partition_by=AiSearchIndex.entity_id)
38
+
39
+ # Normalize score to preserve ordering in accordance with other retrievers:
40
+ # smaller distance = higher score
41
+ similarity = literal(1.0, type_=self.SCORE_NUMERIC_TYPE) / (
42
+ literal(1.0, type_=self.SCORE_NUMERIC_TYPE) + cast(raw_min, self.SCORE_NUMERIC_TYPE)
43
+ )
44
+
45
+ score = cast(
46
+ func.round(cast(similarity, self.SCORE_NUMERIC_TYPE), self.SCORE_PRECISION), self.SCORE_NUMERIC_TYPE
47
+ ).label(self.SCORE_LABEL)
48
+
49
+ combined_query = (
50
+ select(
51
+ AiSearchIndex.entity_id,
52
+ score,
53
+ func.first_value(AiSearchIndex.value)
54
+ .over(partition_by=AiSearchIndex.entity_id, order_by=[dist.asc(), AiSearchIndex.path.asc()])
55
+ .label(self.HIGHLIGHT_TEXT_LABEL),
56
+ func.first_value(AiSearchIndex.path)
57
+ .over(partition_by=AiSearchIndex.entity_id, order_by=[dist.asc(), AiSearchIndex.path.asc()])
58
+ .label(self.HIGHLIGHT_PATH_LABEL),
59
+ )
60
+ .select_from(AiSearchIndex)
61
+ .join(cand, cand.c.entity_id == AiSearchIndex.entity_id)
62
+ .where(AiSearchIndex.embedding.isnot(None))
63
+ .distinct(AiSearchIndex.entity_id)
64
+ )
65
+ final_query = combined_query.subquery("ranked_semantic")
66
+
67
+ stmt = select(
68
+ final_query.c.entity_id,
69
+ final_query.c.score,
70
+ final_query.c.highlight_text,
71
+ final_query.c.highlight_path,
72
+ ).select_from(final_query)
73
+
74
+ stmt = self._apply_semantic_pagination(stmt, final_query.c.score, final_query.c.entity_id)
75
+
76
+ return stmt.order_by(final_query.c.score.desc().nulls_last(), final_query.c.entity_id.asc())
77
+
78
+ @property
79
+ def metadata(self) -> SearchMetadata:
80
+ return SearchMetadata.semantic()
81
+
82
+ def _apply_semantic_pagination(
83
+ self, stmt: Select, score_column: ColumnElement, entity_id_column: ColumnElement
84
+ ) -> Select:
85
+ """Apply semantic score pagination with precise Decimal handling."""
86
+ if self.page_after_score is not None and self.page_after_id is not None:
87
+ score_param = self._quantize_score_for_pagination(self.page_after_score)
88
+ stmt = stmt.where(
89
+ or_(
90
+ score_column < score_param,
91
+ and_(score_column == score_param, entity_id_column > self.page_after_id),
92
+ )
93
+ )
94
+ return stmt
@@ -0,0 +1,39 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ from sqlalchemy import Select, literal, select
15
+
16
+ from orchestrator.search.core.types import SearchMetadata
17
+
18
+ from ..pagination import PaginationParams
19
+ from .base import Retriever
20
+
21
+
22
+ class StructuredRetriever(Retriever):
23
+ """Applies a dummy score for purely structured searches with no text query."""
24
+
25
+ def __init__(self, pagination_params: PaginationParams) -> None:
26
+ self.page_after_id = pagination_params.page_after_id
27
+
28
+ def apply(self, candidate_query: Select) -> Select:
29
+ cand = candidate_query.subquery()
30
+ stmt = select(cand.c.entity_id, literal(1.0).label("score")).select_from(cand)
31
+
32
+ if self.page_after_id:
33
+ stmt = stmt.where(cand.c.entity_id > self.page_after_id)
34
+
35
+ return stmt.order_by(cand.c.entity_id.asc())
36
+
37
+ @property
38
+ def metadata(self) -> SearchMetadata:
39
+ return SearchMetadata.structured()
@@ -0,0 +1,120 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ import json
15
+ import re
16
+
17
+ import structlog
18
+ from sqlalchemy import and_
19
+ from sqlalchemy_utils.types.ltree import Ltree
20
+
21
+ from orchestrator.db.database import WrappedSession
22
+ from orchestrator.db.models import AiSearchIndex
23
+ from orchestrator.search.core.types import EntityType
24
+ from orchestrator.search.indexing.registry import ENTITY_CONFIG_REGISTRY
25
+ from orchestrator.search.schemas.parameters import BaseSearchParameters
26
+ from orchestrator.search.schemas.results import SearchResult
27
+
28
+ logger = structlog.get_logger(__name__)
29
+
30
+
31
+ def generate_highlight_indices(text: str, term: str) -> list[tuple[int, int]]:
32
+ """Finds all occurrences of individual words from the term, including both word boundary and substring matches."""
33
+ if not text or not term:
34
+ return []
35
+
36
+ all_matches = []
37
+ words = [w.strip() for w in term.split() if w.strip()]
38
+
39
+ for word in words:
40
+ # First find word boundary matches
41
+ word_boundary_pattern = rf"\b{re.escape(word)}\b"
42
+ word_matches = list(re.finditer(word_boundary_pattern, text, re.IGNORECASE))
43
+ all_matches.extend([(m.start(), m.end()) for m in word_matches])
44
+
45
+ # Then find all substring matches
46
+ substring_pattern = re.escape(word)
47
+ substring_matches = list(re.finditer(substring_pattern, text, re.IGNORECASE))
48
+ all_matches.extend([(m.start(), m.end()) for m in substring_matches])
49
+
50
+ return sorted(set(all_matches))
51
+
52
+
53
+ def display_filtered_paths_only(
54
+ results: list[SearchResult], search_params: BaseSearchParameters, db_session: WrappedSession
55
+ ) -> None:
56
+ """Display only the paths that were searched for in the results."""
57
+ if not results:
58
+ logger.info("No results found.")
59
+ return
60
+
61
+ logger.info("--- Search Results ---")
62
+
63
+ searched_paths = search_params.filters.get_all_paths() if search_params.filters else []
64
+ if not searched_paths:
65
+ return
66
+
67
+ for result in results:
68
+ for path in searched_paths:
69
+ record: AiSearchIndex | None = (
70
+ db_session.query(AiSearchIndex)
71
+ .filter(and_(AiSearchIndex.entity_id == result.entity_id, AiSearchIndex.path == Ltree(path)))
72
+ .first()
73
+ )
74
+
75
+ if record:
76
+ logger.info(f" {record.path}: {record.value}")
77
+
78
+ logger.info("-" * 40)
79
+
80
+
81
+ def display_results(
82
+ results: list[SearchResult],
83
+ db_session: WrappedSession,
84
+ score_label: str = "Score",
85
+ ) -> None:
86
+ """Display search results, showing matched field when available or uuid+name for vector search."""
87
+ if not results:
88
+ logger.info("No results found.")
89
+ return
90
+
91
+ logger.info("--- Search Results ---")
92
+ for result in results:
93
+ entity_id = result.entity_id
94
+ score = result.score
95
+
96
+ # If we have a matching field from fuzzy search, display only that
97
+ if result.matching_field:
98
+ logger.info(f"Entity ID: {entity_id}")
99
+ logger.info(f"Matched field ({result.matching_field.path}): {result.matching_field.text}")
100
+ logger.info(f"{score_label}: {score:.4f}\n" + "-" * 20)
101
+ continue
102
+
103
+ index_records = db_session.query(AiSearchIndex).filter(AiSearchIndex.entity_id == entity_id).all()
104
+ if not index_records:
105
+ logger.warning(f"Could not find indexed records for entity_id={entity_id}")
106
+ continue
107
+
108
+ first_record = index_records[0]
109
+ kind = EntityType(first_record.entity_type)
110
+ config = ENTITY_CONFIG_REGISTRY[kind]
111
+
112
+ db_entity = db_session.get(config.table, entity_id) if config.table else None
113
+
114
+ if db_entity and config.traverser:
115
+ fields = config.traverser.get_fields(db_entity, config.pk_name, config.root_name)
116
+ result_obj = {p: v for p, v, _ in fields}
117
+ logger.info(json.dumps(result_obj, indent=2, default=str))
118
+ logger.info(f"{score_label}: {score:.4f}\n" + "-" * 20)
119
+ else:
120
+ logger.warning(f"Could not display entity {kind.value} with id={entity_id}")
@@ -0,0 +1,152 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ from sqlalchemy import select, text
15
+ from sqlalchemy.exc import ProgrammingError
16
+ from sqlalchemy_utils import Ltree
17
+
18
+ from orchestrator.db import db
19
+ from orchestrator.db.database import WrappedSession
20
+ from orchestrator.db.models import AiSearchIndex
21
+ from orchestrator.search.core.types import EntityType, FieldType
22
+ from orchestrator.search.filters import FilterCondition, FilterTree, LtreeFilter, PathFilter
23
+ from orchestrator.search.filters.definitions import operators_for
24
+ from orchestrator.search.retrieval.exceptions import (
25
+ EmptyFilterPathError,
26
+ IncompatibleFilterTypeError,
27
+ InvalidEntityPrefixError,
28
+ InvalidLtreePatternError,
29
+ PathNotFoundError,
30
+ )
31
+
32
+
33
+ def is_filter_compatible_with_field_type(filter_condition: FilterCondition, field_type: FieldType) -> bool:
34
+ """Check whether a filter condition is compatible with a given field type.
35
+
36
+ Args:
37
+ filter_condition (FilterCondition): The filter condition instance to check.
38
+ field_type (FieldType): The type of field from the index schema.
39
+
40
+ Returns:
41
+ bool: True if the filter condition is valid for the given field type, False otherwise.
42
+ """
43
+
44
+ # LtreeFilter is for path filtering only and is thus compatible with all field types.
45
+ if isinstance(filter_condition, LtreeFilter):
46
+ return True
47
+
48
+ # Get valid operators for this field type and check if the filter's operator is valid.
49
+ valid_operators = operators_for(field_type)
50
+ return filter_condition.op in valid_operators
51
+
52
+
53
+ def is_lquery_syntactically_valid(pattern: str, db_session: WrappedSession) -> bool:
54
+ """Validate whether a string is a syntactically correct `lquery` pattern.
55
+
56
+ Args:
57
+ pattern (str): The LTree lquery pattern string to validate.
58
+ db_session (WrappedSession): The database session used to test casting.
59
+
60
+ Returns:
61
+ bool: True if the pattern is valid, False if it fails to cast in PostgreSQL.
62
+ """
63
+
64
+ try:
65
+ with db_session.begin_nested():
66
+ db_session.execute(text("SELECT CAST(:pattern AS lquery)"), {"pattern": pattern})
67
+ return True
68
+ except ProgrammingError:
69
+ return False
70
+
71
+
72
+ def get_structured_filter_schema() -> dict[str, str]:
73
+ """Retrieve all distinct filterable paths and their field types from the index.
74
+
75
+ Returns:
76
+ Dict[str, str]: Mapping of path strings to their corresponding field type values.
77
+ """
78
+
79
+ stmt = select(AiSearchIndex.path, AiSearchIndex.value_type).distinct().order_by(AiSearchIndex.path)
80
+ result = db.session.execute(stmt)
81
+ return {str(path): value_type.value for path, value_type in result}
82
+
83
+
84
+ def validate_filter_path(path: str) -> str | None:
85
+ """Check if a given path exists in the index and return its field type.
86
+
87
+ Args:
88
+ path (str): The fully qualified LTree path.
89
+
90
+ Returns:
91
+ Optional[str]: The value type of the field if found, otherwise None.
92
+ """
93
+
94
+ stmt = select(AiSearchIndex.value_type).where(AiSearchIndex.path == Ltree(path)).limit(1)
95
+ result = db.session.execute(stmt).scalar_one_or_none()
96
+ return result.value if result else None
97
+
98
+
99
+ async def complete_filter_validation(filter: PathFilter, entity_type: EntityType) -> None:
100
+ """Validate a PathFilter against the database schema and entity type.
101
+
102
+ Checks performed:
103
+ 1. LTree filter syntax (for LtreeFilter only)
104
+ 2. Non-empty path
105
+ 3. Path exists in the database schema
106
+ 4. Filter type matches the field's value_type
107
+ 5. Path starts with the correct entity type prefix (unless wildcard)
108
+
109
+ Args:
110
+ filter (PathFilter): The filter to validate.
111
+ entity_type (EntityType): The entity type being searched.
112
+
113
+ Raises:
114
+ ValueError: If any of the validation checks fail.
115
+ """
116
+
117
+ # Ltree is a special case
118
+ if isinstance(filter.condition, LtreeFilter):
119
+ lquery_pattern = filter.condition.value
120
+ if not is_lquery_syntactically_valid(lquery_pattern, db.session):
121
+ raise InvalidLtreePatternError(lquery_pattern)
122
+ return
123
+
124
+ if not filter.path or not filter.path.strip():
125
+ raise EmptyFilterPathError()
126
+
127
+ # 1. Check if path exists in database
128
+ db_field_type_str = validate_filter_path(filter.path)
129
+ if db_field_type_str is None:
130
+ raise PathNotFoundError(filter.path)
131
+
132
+ db_field_type = FieldType(db_field_type_str)
133
+
134
+ # 2. Check filter compatibility with field type
135
+ if not is_filter_compatible_with_field_type(filter.condition, db_field_type):
136
+ expected_operators = operators_for(db_field_type)
137
+ raise IncompatibleFilterTypeError(
138
+ filter.condition.op.value, db_field_type.value, filter.path, expected_operators
139
+ )
140
+
141
+ # 3. Check entity type prefix requirements (unless it's a wildcard path)
142
+ expected_prefix = f"{entity_type.value.lower()}."
143
+ if not filter.path.startswith(expected_prefix) and not filter.path.startswith("*"):
144
+ raise InvalidEntityPrefixError(filter.path, expected_prefix, entity_type.value)
145
+
146
+
147
+ async def validate_filter_tree(filters: FilterTree | None, entity_type: EntityType) -> None:
148
+ """Validate all PathFilter leaves in a FilterTree."""
149
+ if filters is None:
150
+ return
151
+ for leaf in filters.get_all_leaves():
152
+ await complete_filter_validation(leaf, entity_type)
@@ -0,0 +1,12 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
@@ -0,0 +1,129 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ import uuid
15
+ from typing import Any, Literal
16
+
17
+ from pydantic import BaseModel, ConfigDict, Field
18
+
19
+ from orchestrator.search.core.types import ActionType, EntityType
20
+ from orchestrator.search.filters import FilterTree
21
+
22
+
23
+ class BaseSearchParameters(BaseModel):
24
+ """Base model with common search parameters."""
25
+
26
+ action: ActionType = Field(default=ActionType.SELECT, description="The action to perform.")
27
+ entity_type: EntityType
28
+
29
+ filters: FilterTree | None = Field(default=None, description="A list of structured filters to apply to the search.")
30
+
31
+ query: str | None = Field(
32
+ default=None, description="Unified search query - will be processed into vector_query and/or fuzzy_term"
33
+ )
34
+
35
+ limit: int = Field(default=10, ge=1, le=30, description="Maximum number of search results to return.")
36
+ model_config = ConfigDict(extra="forbid")
37
+
38
+ @classmethod
39
+ def create(cls, entity_type: EntityType, **kwargs: Any) -> "BaseSearchParameters":
40
+ try:
41
+ return PARAMETER_REGISTRY[entity_type](entity_type=entity_type, **kwargs)
42
+ except KeyError:
43
+ raise ValueError(f"No search parameter class found for entity type: {entity_type.value}")
44
+
45
+ @property
46
+ def vector_query(self) -> str | None:
47
+ """Extract vector query from unified query field."""
48
+ if not self.query:
49
+ return None
50
+ try:
51
+ uuid.UUID(self.query)
52
+ return None # It's a UUID, so disable vector search.
53
+ except ValueError:
54
+ return self.query
55
+
56
+ @property
57
+ def fuzzy_term(self) -> str | None:
58
+ """Extract fuzzy term from unified query field."""
59
+ if not self.query:
60
+ return None
61
+
62
+ words = self.query.split()
63
+ # Only use fuzzy for single words
64
+ # otherwise, trigram operator filters out too much.
65
+ return self.query if len(words) == 1 else None
66
+
67
+
68
+ class SubscriptionSearchParameters(BaseSearchParameters):
69
+ entity_type: Literal[EntityType.SUBSCRIPTION] = Field(
70
+ default=EntityType.SUBSCRIPTION, description="The type of entity to search."
71
+ )
72
+ model_config = ConfigDict(
73
+ json_schema_extra={
74
+ "title": "SearchSubscriptions",
75
+ "description": "Search subscriptions based on specific criteria.",
76
+ "examples": [
77
+ {
78
+ "filters": {
79
+ "op": "AND",
80
+ "children": [
81
+ {"path": "subscription.status", "condition": {"op": "eq", "value": "provisioning"}},
82
+ {"path": "subscription.end_date", "condition": {"op": "gte", "value": "2025-01-01"}},
83
+ ],
84
+ }
85
+ }
86
+ ],
87
+ }
88
+ )
89
+
90
+
91
+ class ProductSearchParameters(BaseSearchParameters):
92
+ entity_type: Literal[EntityType.PRODUCT] = Field(
93
+ default=EntityType.PRODUCT, description="The type of entity to search."
94
+ )
95
+ model_config = ConfigDict(
96
+ json_schema_extra={
97
+ "title": "SearchProducts",
98
+ "description": "Search products based on specific criteria.",
99
+ "examples": [
100
+ {
101
+ "filters": [
102
+ {"path": "product.product_type", "condition": {"op": "eq", "value": "Shop"}},
103
+ ]
104
+ }
105
+ ],
106
+ }
107
+ )
108
+
109
+
110
+ class WorkflowSearchParameters(BaseSearchParameters):
111
+ entity_type: Literal[EntityType.WORKFLOW] = Field(
112
+ default=EntityType.WORKFLOW, description="The type of entity to search."
113
+ )
114
+
115
+
116
+ class ProcessSearchParameters(BaseSearchParameters):
117
+ """Search parameters specifically for PROCESS entities."""
118
+
119
+ entity_type: Literal[EntityType.PROCESS] = Field(
120
+ default=EntityType.PROCESS, description="The type of entity to search."
121
+ )
122
+
123
+
124
+ PARAMETER_REGISTRY: dict[EntityType, type[BaseSearchParameters]] = {
125
+ EntityType.SUBSCRIPTION: SubscriptionSearchParameters,
126
+ EntityType.PRODUCT: ProductSearchParameters,
127
+ EntityType.WORKFLOW: WorkflowSearchParameters,
128
+ EntityType.PROCESS: ProcessSearchParameters,
129
+ }
@@ -0,0 +1,77 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ from typing import Literal
15
+
16
+ from pydantic import BaseModel, ConfigDict
17
+
18
+ from orchestrator.search.core.types import FilterOp, SearchMetadata, UIType
19
+
20
+
21
+ class MatchingField(BaseModel):
22
+ """Contains the field that contributed most to the (fuzzy) search result."""
23
+
24
+ text: str
25
+ path: str
26
+ highlight_indices: list[tuple[int, int]] | None = None
27
+
28
+
29
+ class SearchResult(BaseModel):
30
+ """Represents a single search result item."""
31
+
32
+ entity_id: str
33
+ score: float
34
+ perfect_match: int = 0
35
+ matching_field: MatchingField | None = None
36
+
37
+
38
+ class SearchResponse(BaseModel):
39
+ """Response containing search results and metadata."""
40
+
41
+ results: list[SearchResult]
42
+ metadata: SearchMetadata
43
+
44
+
45
+ class ValueSchema(BaseModel):
46
+ kind: UIType | Literal["none", "object"] = UIType.STRING
47
+ fields: dict[str, "ValueSchema"] | None = None
48
+
49
+ model_config = ConfigDict(extra="forbid")
50
+
51
+
52
+ class LeafInfo(BaseModel):
53
+ name: str
54
+ ui_types: list[UIType]
55
+ paths: list[str]
56
+
57
+ model_config = ConfigDict(
58
+ extra="forbid",
59
+ use_enum_values=True,
60
+ )
61
+
62
+
63
+ class ComponentInfo(BaseModel):
64
+ name: str
65
+ ui_types: list[UIType]
66
+
67
+ model_config = ConfigDict(
68
+ extra="forbid",
69
+ use_enum_values=True,
70
+ )
71
+
72
+
73
+ class TypeDefinition(BaseModel):
74
+ operators: list[FilterOp]
75
+ value_schema: dict[FilterOp, ValueSchema]
76
+
77
+ model_config = ConfigDict(use_enum_values=True)
@@ -14,7 +14,7 @@
14
14
  from typing import Any, Dict, Type
15
15
 
16
16
  from pydantic import SecretStr as PydanticSecretStr
17
- from pydantic_core import MultiHostUrl, Url
17
+ from pydantic.networks import AnyUrl, _BaseMultiHostUrl
18
18
  from pydantic_settings import BaseSettings
19
19
 
20
20
  from orchestrator.utils.expose_settings import SecretStr as OrchSecretStr
@@ -34,7 +34,7 @@ def mask_value(key: str, value: Any) -> Any:
34
34
  key_lower = key.lower()
35
35
  is_sensitive_key = "secret" in key_lower or "password" in key_lower
36
36
 
37
- if is_sensitive_key or isinstance(value, (OrchSecretStr, PydanticSecretStr, MultiHostUrl, Url)):
37
+ if is_sensitive_key or isinstance(value, (OrchSecretStr, PydanticSecretStr, _BaseMultiHostUrl, AnyUrl)):
38
38
  return MASK
39
39
 
40
40
  return value
orchestrator/settings.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright 2019-2020 SURF, GÉANT.
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
2
  # Licensed under the Apache License, Version 2.0 (the "License");
3
3
  # you may not use this file except in compliance with the License.
4
4
  # You may obtain a copy of the License at
@@ -105,7 +105,7 @@ def check_that_products_have_create_modify_and_terminate_workflows() -> State:
105
105
  product_data = get_products(filters=[ProductTable.status == "active"])
106
106
 
107
107
  workflows_not_complete: list = []
108
- targets = ["CREATE", "TERMINATE", "MODIFY", "VALIDATE"]
108
+ targets = ["CREATE", "TERMINATE", "MODIFY", "RECONCILE", "VALIDATE"]
109
109
  for product in product_data:
110
110
  workflows = {c.target for c in product.workflows if c.target in targets and c.name != "modify_note"}
111
111
  if len(workflows) < len(targets):