orchestrator-core 4.4.1__py3-none-any.whl → 4.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. orchestrator/__init__.py +17 -2
  2. orchestrator/agentic_app.py +103 -0
  3. orchestrator/api/api_v1/api.py +14 -2
  4. orchestrator/api/api_v1/endpoints/processes.py +2 -0
  5. orchestrator/api/api_v1/endpoints/search.py +296 -0
  6. orchestrator/app.py +32 -0
  7. orchestrator/cli/main.py +22 -1
  8. orchestrator/cli/search/__init__.py +32 -0
  9. orchestrator/cli/search/index_llm.py +73 -0
  10. orchestrator/cli/search/resize_embedding.py +135 -0
  11. orchestrator/cli/search/search_explore.py +208 -0
  12. orchestrator/cli/search/speedtest.py +151 -0
  13. orchestrator/db/models.py +37 -1
  14. orchestrator/devtools/populator.py +16 -0
  15. orchestrator/domain/base.py +2 -7
  16. orchestrator/domain/lifecycle.py +24 -7
  17. orchestrator/llm_settings.py +57 -0
  18. orchestrator/log_config.py +1 -0
  19. orchestrator/migrations/helpers.py +7 -1
  20. orchestrator/schemas/search.py +130 -0
  21. orchestrator/schemas/workflow.py +1 -0
  22. orchestrator/search/__init__.py +12 -0
  23. orchestrator/search/agent/__init__.py +21 -0
  24. orchestrator/search/agent/agent.py +62 -0
  25. orchestrator/search/agent/prompts.py +100 -0
  26. orchestrator/search/agent/state.py +21 -0
  27. orchestrator/search/agent/tools.py +258 -0
  28. orchestrator/search/core/__init__.py +12 -0
  29. orchestrator/search/core/embedding.py +73 -0
  30. orchestrator/search/core/exceptions.py +36 -0
  31. orchestrator/search/core/types.py +296 -0
  32. orchestrator/search/core/validators.py +40 -0
  33. orchestrator/search/docs/index.md +37 -0
  34. orchestrator/search/docs/running_local_text_embedding_inference.md +46 -0
  35. orchestrator/search/filters/__init__.py +40 -0
  36. orchestrator/search/filters/base.py +295 -0
  37. orchestrator/search/filters/date_filters.py +88 -0
  38. orchestrator/search/filters/definitions.py +107 -0
  39. orchestrator/search/filters/ltree_filters.py +56 -0
  40. orchestrator/search/filters/numeric_filter.py +73 -0
  41. orchestrator/search/indexing/__init__.py +16 -0
  42. orchestrator/search/indexing/indexer.py +334 -0
  43. orchestrator/search/indexing/registry.py +101 -0
  44. orchestrator/search/indexing/tasks.py +69 -0
  45. orchestrator/search/indexing/traverse.py +334 -0
  46. orchestrator/search/llm_migration.py +108 -0
  47. orchestrator/search/retrieval/__init__.py +16 -0
  48. orchestrator/search/retrieval/builder.py +123 -0
  49. orchestrator/search/retrieval/engine.py +154 -0
  50. orchestrator/search/retrieval/exceptions.py +90 -0
  51. orchestrator/search/retrieval/pagination.py +96 -0
  52. orchestrator/search/retrieval/retrievers/__init__.py +26 -0
  53. orchestrator/search/retrieval/retrievers/base.py +123 -0
  54. orchestrator/search/retrieval/retrievers/fuzzy.py +94 -0
  55. orchestrator/search/retrieval/retrievers/hybrid.py +277 -0
  56. orchestrator/search/retrieval/retrievers/semantic.py +94 -0
  57. orchestrator/search/retrieval/retrievers/structured.py +39 -0
  58. orchestrator/search/retrieval/utils.py +120 -0
  59. orchestrator/search/retrieval/validation.py +152 -0
  60. orchestrator/search/schemas/__init__.py +12 -0
  61. orchestrator/search/schemas/parameters.py +129 -0
  62. orchestrator/search/schemas/results.py +77 -0
  63. orchestrator/services/processes.py +2 -1
  64. orchestrator/services/settings_env_variables.py +2 -2
  65. orchestrator/settings.py +8 -1
  66. orchestrator/utils/state.py +6 -1
  67. orchestrator/workflows/steps.py +15 -1
  68. orchestrator/workflows/tasks/validate_products.py +1 -1
  69. {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0.dist-info}/METADATA +15 -8
  70. {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0.dist-info}/RECORD +72 -22
  71. {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0.dist-info}/WHEEL +0 -0
  72. {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,40 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ import uuid
15
+
16
+ from dateutil.parser import isoparse
17
+
18
+
19
+ def is_uuid(value: str) -> bool:
20
+ """Check if a string is a valid UUID."""
21
+ try:
22
+ uuid.UUID(value)
23
+ return True
24
+ except (ValueError, TypeError):
25
+ return False
26
+
27
+
28
+ def is_iso_date(value: str) -> bool:
29
+ """Check if a string is a valid ISO 8601 date."""
30
+ try:
31
+ isoparse(value)
32
+ return True
33
+ except (ValueError, TypeError):
34
+ return False
35
+
36
+
37
+ def is_bool_string(value: str) -> bool:
38
+ """Check if a string explicitly represents a boolean value with true/false."""
39
+
40
+ return value.strip().lower() in {"true", "false"}
@@ -0,0 +1,37 @@
1
+ # Search Indexing CLI
2
+
3
+ Typer-based CLI for maintaining search indexes (subscriptions, products, processes, workflows).
4
+
5
+ ## Usage
6
+
7
+ Run from project root:
8
+
9
+ ```
10
+ dotenv run python main.py index [COMMAND] [OPTIONS]
11
+ ```
12
+
13
+ ### Commands
14
+
15
+ - `subscriptions` – index `subscription_search_index`
16
+ - `products` – index `product_search_index`
17
+ - `processes` – index `process_search_index`
18
+ - `workflows` – index `workflow_search_index`
19
+
20
+ ### Options
21
+
22
+ - `--<id>` – UUID of a specific entity (default: all)
23
+ - `--dry-run` – no DB writes
24
+ - `--force-index` – re-index even if unchanged
25
+
26
+ ### Examples
27
+
28
+ ```
29
+ # Index all subscriptions
30
+ dotenv run python main.py index subscriptions
31
+
32
+ # Re-index all subscriptions
33
+ dotenv run python main.py index subscriptions --force-index
34
+
35
+ # Index a single subscription
36
+ dotenv run python main.py index subscriptions --subscription-id=<UUID>
37
+ ```
@@ -0,0 +1,46 @@
1
+ # Running a local MiniLM embedding server with Hugging Face TEI
2
+
3
+ Only **OpenAI-compatible endpoints** are supported locally.
4
+
5
+ You can spin up a embedding API based on **sentence-transformers/all-MiniLM-L6-v2** using [Hugging Face TEI](https://github.com/huggingface/text-embeddings-inference):
6
+
7
+ ```bash
8
+ docker run --rm -p 8080:80 ghcr.io/huggingface/text-embeddings-inference:cpu-1.8 \
9
+ --model-id sentence-transformers/all-MiniLM-L6-v2
10
+ ```
11
+
12
+ ---
13
+
14
+ ## Environment variables
15
+
16
+ Point your backend to the local endpoint and declare the new vector size:
17
+
18
+ ```env
19
+ OPENAI_BASE_URL=http://localhost:8080/v1
20
+ EMBEDDING_DIMENSION=384
21
+ EMBEDDING_MAX_BATCH_SIZE=32 # Not required when using OpenAI embeddings
22
+ ```
23
+
24
+ Depending on the model, you might want to change the `EMBEDDING_FALLBACK_MAX_TOKENS` and `EMBEDDING_MAX_BATCH_SIZE` settings, which are set conservatively and according to the requirements of the setup used in this example.
25
+
26
+ ---
27
+
28
+ ## Apply the schema change
29
+
30
+ With these new settings run:
31
+
32
+ ```bash
33
+ dotenv run python main.py embedding resize
34
+ ```
35
+
36
+ **Note** that this will delete all records and you will have to re-index.
37
+
38
+ ---
39
+
40
+ ## Re-index embeddings
41
+
42
+ ```bash
43
+ dotenv run python main.py index subscriptions
44
+ ```
45
+
46
+ The search index now uses **384-dimension MiniLM vectors** served from your local Docker container. That’s it! 🚀
@@ -0,0 +1,40 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ from .base import (
15
+ EqualityFilter,
16
+ FilterCondition,
17
+ FilterTree,
18
+ PathFilter,
19
+ StringFilter,
20
+ )
21
+ from .date_filters import DateFilter, DateRangeFilter, DateValueFilter
22
+ from .ltree_filters import LtreeFilter
23
+ from .numeric_filter import NumericFilter, NumericRangeFilter, NumericValueFilter
24
+
25
+ __all__ = [
26
+ # Base filter classes
27
+ "PathFilter",
28
+ "FilterTree",
29
+ "FilterCondition",
30
+ "StringFilter",
31
+ "EqualityFilter",
32
+ # Filters for specific value types
33
+ "NumericValueFilter",
34
+ "NumericRangeFilter",
35
+ "DateValueFilter",
36
+ "DateRangeFilter",
37
+ "DateFilter",
38
+ "LtreeFilter",
39
+ "NumericFilter",
40
+ ]
@@ -0,0 +1,295 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ from __future__ import annotations
15
+
16
+ from itertools import count
17
+ from typing import Any, ClassVar, Literal
18
+
19
+ from pydantic import BaseModel, ConfigDict, Field, model_validator
20
+ from sqlalchemy import BinaryExpression, and_, cast, exists, literal, or_, select
21
+ from sqlalchemy.dialects.postgresql import BOOLEAN
22
+ from sqlalchemy.sql.elements import ColumnElement
23
+ from sqlalchemy_utils.types.ltree import Ltree
24
+
25
+ from orchestrator.db.models import AiSearchIndex
26
+ from orchestrator.search.core.types import BooleanOperator, FieldType, FilterOp, SQLAColumn, UIType
27
+
28
+ from .date_filters import DateFilter
29
+ from .ltree_filters import LtreeFilter
30
+ from .numeric_filter import NumericFilter
31
+
32
+
33
+ class EqualityFilter(BaseModel):
34
+ op: Literal[FilterOp.EQ, FilterOp.NEQ]
35
+ value: Any
36
+
37
+ def to_expression(self, column: SQLAColumn, path: str) -> BinaryExpression[bool] | ColumnElement[bool]:
38
+ if isinstance(self.value, bool):
39
+ colb = cast(column, BOOLEAN)
40
+ return colb.is_(self.value) if self.op == FilterOp.EQ else ~colb.is_(self.value)
41
+ sv = str(self.value)
42
+ return (column == sv) if self.op == FilterOp.EQ else (column != sv)
43
+
44
+
45
+ class StringFilter(BaseModel):
46
+ op: Literal[FilterOp.LIKE]
47
+ value: str
48
+
49
+ def to_expression(self, column: SQLAColumn, path: str) -> ColumnElement[bool]:
50
+ return column.like(self.value)
51
+
52
+ @model_validator(mode="after")
53
+ def validate_like_pattern(self) -> StringFilter:
54
+ """If the operation is 'like', the value must contain a wildcard."""
55
+ if self.op == FilterOp.LIKE:
56
+ if "%" not in self.value and "_" not in self.value:
57
+ raise ValueError("The value for a 'like' operation must contain a wildcard character ('%' or '_').")
58
+ return self
59
+
60
+
61
+ # Order matters! Ambiguous ops (like 'eq') are resolved by first matching filter
62
+ FilterCondition = (
63
+ DateFilter # DATETIME
64
+ | NumericFilter # INT/FLOAT
65
+ | StringFilter # STRING TODO: convert to hybrid search?
66
+ | LtreeFilter # Path
67
+ | EqualityFilter # BOOLEAN/UUID/BLOCK/RESOURCE_TYPE - most generic, try last
68
+ )
69
+
70
+
71
+ class PathFilter(BaseModel):
72
+
73
+ path: str = Field(description="The ltree path of the field to filter on, e.g., 'subscription.customer_id'.")
74
+ condition: FilterCondition = Field(description="The filter condition to apply.")
75
+
76
+ value_kind: UIType
77
+
78
+ model_config = ConfigDict(
79
+ json_schema_extra={
80
+ "examples": [
81
+ {"path": "subscription.status", "condition": {"op": "eq", "value": "active"}, "value_kind": "string"},
82
+ {
83
+ "path": "subscription.customer_id",
84
+ "condition": {"op": "neq", "value": "acme"},
85
+ "value_kind": "string",
86
+ },
87
+ {
88
+ "path": "subscription.start_date",
89
+ "condition": {"op": "gt", "value": "2025-01-01"},
90
+ "value_kind": "datetime",
91
+ },
92
+ {
93
+ "path": "subscription.end_date",
94
+ "condition": {
95
+ "op": "between",
96
+ "value": {"start": "2025-06-01", "end": "2025-07-01"},
97
+ },
98
+ "value_kind": "datetime",
99
+ },
100
+ {
101
+ "path": "subscription",
102
+ "condition": {"op": "has_component", "value": "node"},
103
+ "value_kind": "component",
104
+ },
105
+ ]
106
+ }
107
+ )
108
+
109
+ @model_validator(mode="before")
110
+ @classmethod
111
+ def _transfer_path_to_value_if_needed(cls, data: Any) -> Any:
112
+ """Transform for path-only filters.
113
+
114
+ If `op` is `has_component`, `not_has_component`, or `ends_with` and no `value` is
115
+ provided in the `condition`, this validator will automatically use the `path`
116
+ field as the `value` and set the `path` to a wildcard '*' for the query.
117
+ """
118
+ if isinstance(data, dict):
119
+ path = data.get("path")
120
+ condition = data.get("condition")
121
+
122
+ if path and isinstance(condition, dict):
123
+ op = condition.get("op")
124
+ value = condition.get("value")
125
+
126
+ path_only_ops = [FilterOp.HAS_COMPONENT, FilterOp.NOT_HAS_COMPONENT, FilterOp.ENDS_WITH]
127
+
128
+ if op in path_only_ops and value is None:
129
+ condition["value"] = path
130
+ data["path"] = "*"
131
+ return data
132
+
133
+ def to_expression(self, value_column: SQLAColumn, value_type_column: SQLAColumn) -> ColumnElement[bool]:
134
+ """Convert the path filter into a SQLAlchemy expression with type safety.
135
+
136
+ This method creates a type guard to ensure we only match compatible field types,
137
+ then delegates to the specific filter condition.
138
+
139
+ Args:
140
+ value_column (ColumnElement): The SQLAlchemy column element representing the value to be filtered.
141
+ value_type_column (ColumnElement): The SQLAlchemy column element representing the field type.
142
+
143
+ Returns:
144
+ ColumnElement[bool]: A SQLAlchemy boolean expression that can be used in a ``WHERE`` clause.
145
+ """
146
+
147
+ # Type guard - only match compatible field types
148
+ allowed_field_types = [ft.value for ft in FieldType if UIType.from_field_type(ft) == self.value_kind]
149
+ type_guard = value_type_column.in_(allowed_field_types) if allowed_field_types else literal(True)
150
+
151
+ return and_(type_guard, self.condition.to_expression(value_column, self.path))
152
+
153
+
154
+ class FilterTree(BaseModel):
155
+ op: BooleanOperator = Field(
156
+ description="Operator for grouping conditions in uppercase.", default=BooleanOperator.AND
157
+ )
158
+
159
+ children: list[FilterTree | PathFilter] = Field(min_length=1, description="Path filters or nested groups.")
160
+
161
+ MAX_DEPTH: ClassVar[int] = 5
162
+
163
+ model_config = ConfigDict(
164
+ json_schema_extra={
165
+ "description": (
166
+ "Boolean filter tree. Operators must be UPPERCASE: AND / OR.\n"
167
+ "Node shapes:\n"
168
+ " • Group: {'op':'AND'|'OR', 'children': [<PathFilter|FilterTree>, ...]}\n"
169
+ " • Leaf (PathFilter): {'path':'<ltree>', 'condition': {...}}\n"
170
+ "Rules:\n"
171
+ " • Do NOT put 'op' or 'children' inside a leaf 'condition'.\n"
172
+ f" • Max depth = {MAX_DEPTH}.\n"
173
+ ),
174
+ "examples": [
175
+ {
176
+ "description": "Simple filters",
177
+ "op": "AND",
178
+ "children": [
179
+ {"path": "subscription.status", "condition": {"op": "eq", "value": "active"}},
180
+ {"path": "subscription.start_date", "condition": {"op": "gt", "value": "2021-01-01"}},
181
+ ],
182
+ },
183
+ {
184
+ "description": "Complex filters with OR group",
185
+ "op": "AND",
186
+ "children": [
187
+ {"path": "subscription.start_date", "condition": {"op": "gte", "value": "2024-01-01"}},
188
+ {
189
+ "op": "OR",
190
+ "children": [
191
+ {"path": "subscription.product.name", "condition": {"op": "like", "value": "%fiber%"}},
192
+ {"path": "subscription.customer_id", "condition": {"op": "eq", "value": "Surf"}},
193
+ ],
194
+ },
195
+ ],
196
+ },
197
+ ],
198
+ }
199
+ )
200
+
201
+ @model_validator(mode="after")
202
+ def _validate_depth(self) -> FilterTree:
203
+ def depth(node: "FilterTree | PathFilter") -> int:
204
+ return 1 + max(depth(c) for c in node.children) if isinstance(node, FilterTree) else 1
205
+
206
+ if depth(self) > self.MAX_DEPTH:
207
+ raise ValueError(f"FilterTree nesting exceeds MAX_DEPTH={self.MAX_DEPTH}")
208
+ return self
209
+
210
+ @classmethod
211
+ def from_flat_and(cls, filters: list[PathFilter]) -> FilterTree | None:
212
+ """Wrap a flat list of PathFilter into an AND group (or None)."""
213
+ return None if not filters else cls(op=BooleanOperator.AND, children=list(filters))
214
+
215
+ def get_all_paths(self) -> set[str]:
216
+ """Collects all unique paths from the PathFilter leaves in the tree."""
217
+ return {leaf.path for leaf in self.get_all_leaves()}
218
+
219
+ def get_all_leaves(self) -> list[PathFilter]:
220
+ """Collect all PathFilter leaves in the tree."""
221
+ leaves: list[PathFilter] = []
222
+ for child in self.children:
223
+ if isinstance(child, PathFilter):
224
+ leaves.append(child)
225
+ else:
226
+ leaves.extend(child.get_all_leaves())
227
+ return leaves
228
+
229
+ @staticmethod
230
+ def _build_correlates(
231
+ alias: Any, entity_id_col: SQLAColumn, entity_type_value: str | None
232
+ ) -> list[ColumnElement[bool]]:
233
+ """Build the correlation predicates that link the subquery to the outer query."""
234
+ correlates = [alias.entity_id == entity_id_col]
235
+ if entity_type_value is not None:
236
+ correlates.append(alias.entity_type == entity_type_value)
237
+ return correlates
238
+
239
+ @staticmethod
240
+ def _handle_ltree_filter(pf: PathFilter, alias: Any, correlates: list[ColumnElement[bool]]) -> ColumnElement[bool]:
241
+ """Handle path-only filters (has_component, not_has_component, ends_with)."""
242
+ # row-level predicate is always positive
243
+ positive = pf.condition.to_expression(alias.path, pf.path)
244
+ subq = select(1).select_from(alias).where(and_(*correlates, positive))
245
+ if pf.condition.op == FilterOp.NOT_HAS_COMPONENT:
246
+ return ~exists(subq) # NOT at the entity level
247
+ return exists(subq)
248
+
249
+ @staticmethod
250
+ def _handle_value_filter(pf: PathFilter, alias: Any, correlates: list[ColumnElement[bool]]) -> ColumnElement[bool]:
251
+ """Handle value-based filters (equality, comparison, etc)."""
252
+ if "." not in pf.path:
253
+ path_pred = LtreeFilter(op=FilterOp.ENDS_WITH, value=pf.path).to_expression(alias.path, "")
254
+ else:
255
+ path_pred = alias.path == Ltree(pf.path)
256
+
257
+ value_pred = pf.to_expression(alias.value, alias.value_type)
258
+ subq = select(1).select_from(alias).where(and_(*correlates, path_pred, value_pred))
259
+ return exists(subq)
260
+
261
+ def to_expression(
262
+ self,
263
+ entity_id_col: SQLAColumn,
264
+ *,
265
+ entity_type_value: str | None = None,
266
+ ) -> ColumnElement[bool]:
267
+ """Compile this tree into a SQLAlchemy boolean expression.
268
+
269
+ Args:
270
+ entity_id_col (SQLAColumn): Column in the outer query representing the entity ID.
271
+ entity_type_value (str, optional): If provided, each subquery is additionally constrained to this entity type.
272
+
273
+ Returns:
274
+ ColumnElement[bool]: A SQLAlchemy expression suitable for use in a WHERE clause.
275
+ """
276
+ from sqlalchemy.orm import aliased
277
+
278
+ alias_idx = count(1)
279
+
280
+ def leaf_exists(pf: PathFilter) -> ColumnElement[bool]:
281
+ """Convert a PathFilter into an EXISTS subquery."""
282
+ alias = aliased(AiSearchIndex, name=f"flt_{next(alias_idx)}")
283
+ correlates = self._build_correlates(alias, entity_id_col, entity_type_value)
284
+
285
+ if isinstance(pf.condition, LtreeFilter):
286
+ return self._handle_ltree_filter(pf, alias, correlates)
287
+ return self._handle_value_filter(pf, alias, correlates)
288
+
289
+ def compile_node(node: FilterTree | PathFilter) -> ColumnElement[bool]:
290
+ if isinstance(node, FilterTree):
291
+ compiled = [compile_node(ch) for ch in node.children]
292
+ return and_(*compiled) if node.op == BooleanOperator.AND else or_(*compiled)
293
+ return leaf_exists(node)
294
+
295
+ return compile_node(self)
@@ -0,0 +1,88 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ from datetime import date, datetime
15
+ from typing import Annotated, Any, Literal
16
+
17
+ from dateutil.parser import parse as dt_parse
18
+ from pydantic import BaseModel, BeforeValidator, Field, model_validator
19
+ from sqlalchemy import TIMESTAMP, and_
20
+ from sqlalchemy import cast as sa_cast
21
+ from sqlalchemy.sql.elements import ColumnElement
22
+
23
+ from orchestrator.search.core.types import FilterOp, SQLAColumn
24
+
25
+
26
+ def _validate_date_string(v: Any) -> Any:
27
+ if not isinstance(v, str):
28
+ return v
29
+ try:
30
+ dt_parse(v)
31
+ return v
32
+ except Exception as exc:
33
+ raise ValueError("is not a valid date or datetime string") from exc
34
+
35
+
36
+ DateValue = datetime | date | str
37
+ ValidatedDateValue = Annotated[DateValue, BeforeValidator(_validate_date_string)]
38
+
39
+
40
+ class DateRange(BaseModel):
41
+
42
+ start: ValidatedDateValue
43
+ end: ValidatedDateValue
44
+
45
+ @model_validator(mode="after")
46
+ def _order(self) -> "DateRange":
47
+ to_datetime = dt_parse(str(self.end))
48
+ from_datetime = dt_parse(str(self.start))
49
+ if to_datetime <= from_datetime:
50
+ raise ValueError("'to' must be after 'from'")
51
+ return self
52
+
53
+
54
+ class DateValueFilter(BaseModel):
55
+ """A filter that operates on a single date value."""
56
+
57
+ op: Literal[FilterOp.EQ, FilterOp.NEQ, FilterOp.LT, FilterOp.LTE, FilterOp.GT, FilterOp.GTE]
58
+ value: ValidatedDateValue
59
+
60
+ def to_expression(self, column: SQLAColumn, path: str) -> ColumnElement[bool]:
61
+ date_column = sa_cast(column, TIMESTAMP(timezone=True))
62
+ match self.op:
63
+ case FilterOp.EQ:
64
+ return date_column == self.value
65
+ case FilterOp.NEQ:
66
+ return date_column != self.value
67
+ case FilterOp.LT:
68
+ return date_column < self.value
69
+ case FilterOp.LTE:
70
+ return date_column <= self.value
71
+ case FilterOp.GT:
72
+ return date_column > self.value
73
+ case FilterOp.GTE:
74
+ return date_column >= self.value
75
+
76
+
77
+ class DateRangeFilter(BaseModel):
78
+ """A filter that operates on a range of dates."""
79
+
80
+ op: Literal[FilterOp.BETWEEN]
81
+ value: DateRange
82
+
83
+ def to_expression(self, column: SQLAColumn, path: str) -> ColumnElement[bool]:
84
+ date_column = sa_cast(column, TIMESTAMP(timezone=True))
85
+ return and_(date_column >= self.value.start, date_column < self.value.end)
86
+
87
+
88
+ DateFilter = Annotated[DateValueFilter | DateRangeFilter, Field(discriminator="op")]
@@ -0,0 +1,107 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ from orchestrator.search.core.types import FieldType, FilterOp, UIType
15
+ from orchestrator.search.schemas.results import TypeDefinition, ValueSchema
16
+
17
+
18
+ def operators_for(ft: FieldType) -> list[FilterOp]:
19
+ """Return the list of valid operators for a given FieldType."""
20
+ return list(value_schema_for(ft).keys())
21
+
22
+
23
+ def component_operators() -> dict[FilterOp, ValueSchema]:
24
+ """Return operators available for path components."""
25
+ return {
26
+ FilterOp.HAS_COMPONENT: ValueSchema(kind=UIType.COMPONENT),
27
+ FilterOp.NOT_HAS_COMPONENT: ValueSchema(kind=UIType.COMPONENT),
28
+ }
29
+
30
+
31
+ def value_schema_for(ft: FieldType) -> dict[FilterOp, ValueSchema]:
32
+ """Return the value schema map for a given FieldType."""
33
+ if ft in (FieldType.INTEGER, FieldType.FLOAT):
34
+ return {
35
+ FilterOp.EQ: ValueSchema(kind=UIType.NUMBER),
36
+ FilterOp.NEQ: ValueSchema(kind=UIType.NUMBER),
37
+ FilterOp.LT: ValueSchema(kind=UIType.NUMBER),
38
+ FilterOp.LTE: ValueSchema(kind=UIType.NUMBER),
39
+ FilterOp.GT: ValueSchema(kind=UIType.NUMBER),
40
+ FilterOp.GTE: ValueSchema(kind=UIType.NUMBER),
41
+ FilterOp.BETWEEN: ValueSchema(
42
+ kind="object",
43
+ fields={
44
+ "start": ValueSchema(kind=UIType.NUMBER),
45
+ "end": ValueSchema(kind=UIType.NUMBER),
46
+ },
47
+ ),
48
+ }
49
+
50
+ if ft == FieldType.BOOLEAN:
51
+ return {
52
+ FilterOp.EQ: ValueSchema(kind=UIType.BOOLEAN),
53
+ FilterOp.NEQ: ValueSchema(kind=UIType.BOOLEAN),
54
+ }
55
+
56
+ if ft == FieldType.DATETIME:
57
+ return {
58
+ FilterOp.EQ: ValueSchema(kind=UIType.DATETIME),
59
+ FilterOp.NEQ: ValueSchema(kind=UIType.DATETIME),
60
+ FilterOp.LT: ValueSchema(kind=UIType.DATETIME),
61
+ FilterOp.LTE: ValueSchema(kind=UIType.DATETIME),
62
+ FilterOp.GT: ValueSchema(kind=UIType.DATETIME),
63
+ FilterOp.GTE: ValueSchema(kind=UIType.DATETIME),
64
+ FilterOp.BETWEEN: ValueSchema(
65
+ kind="object",
66
+ fields={
67
+ "start": ValueSchema(kind=UIType.DATETIME),
68
+ "end": ValueSchema(kind=UIType.DATETIME),
69
+ },
70
+ ),
71
+ }
72
+
73
+ return {
74
+ FilterOp.EQ: ValueSchema(kind=UIType.STRING),
75
+ FilterOp.NEQ: ValueSchema(kind=UIType.STRING),
76
+ FilterOp.LIKE: ValueSchema(kind=UIType.STRING),
77
+ }
78
+
79
+
80
+ def generate_definitions() -> dict[UIType, TypeDefinition]:
81
+ """Generate the full definitions dictionary for all UI types."""
82
+ definitions: dict[UIType, TypeDefinition] = {}
83
+
84
+ for ui_type in UIType:
85
+ if ui_type == UIType.COMPONENT:
86
+ # Special case for component filtering
87
+ comp_ops = component_operators()
88
+ definitions[ui_type] = TypeDefinition(
89
+ operators=list(comp_ops.keys()),
90
+ value_schema=comp_ops,
91
+ )
92
+ else:
93
+ # Regular field types
94
+ if ui_type == UIType.NUMBER:
95
+ rep_ft = FieldType.INTEGER
96
+ elif ui_type == UIType.DATETIME:
97
+ rep_ft = FieldType.DATETIME
98
+ elif ui_type == UIType.BOOLEAN:
99
+ rep_ft = FieldType.BOOLEAN
100
+ else:
101
+ rep_ft = FieldType.STRING
102
+
103
+ definitions[ui_type] = TypeDefinition(
104
+ operators=operators_for(rep_ft),
105
+ value_schema=value_schema_for(rep_ft),
106
+ )
107
+ return definitions