orchestrator-core 4.4.2__py3-none-any.whl → 4.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orchestrator/__init__.py +17 -2
- orchestrator/agentic_app.py +103 -0
- orchestrator/api/api_v1/api.py +14 -2
- orchestrator/api/api_v1/endpoints/search.py +296 -0
- orchestrator/app.py +32 -0
- orchestrator/cli/main.py +22 -1
- orchestrator/cli/search/__init__.py +32 -0
- orchestrator/cli/search/index_llm.py +73 -0
- orchestrator/cli/search/resize_embedding.py +135 -0
- orchestrator/cli/search/search_explore.py +208 -0
- orchestrator/cli/search/speedtest.py +151 -0
- orchestrator/db/models.py +37 -1
- orchestrator/devtools/populator.py +16 -0
- orchestrator/domain/base.py +2 -7
- orchestrator/domain/lifecycle.py +24 -7
- orchestrator/llm_settings.py +57 -0
- orchestrator/log_config.py +1 -0
- orchestrator/migrations/helpers.py +7 -1
- orchestrator/schemas/search.py +130 -0
- orchestrator/schemas/workflow.py +1 -0
- orchestrator/search/__init__.py +12 -0
- orchestrator/search/agent/__init__.py +21 -0
- orchestrator/search/agent/agent.py +62 -0
- orchestrator/search/agent/prompts.py +100 -0
- orchestrator/search/agent/state.py +21 -0
- orchestrator/search/agent/tools.py +258 -0
- orchestrator/search/core/__init__.py +12 -0
- orchestrator/search/core/embedding.py +73 -0
- orchestrator/search/core/exceptions.py +36 -0
- orchestrator/search/core/types.py +296 -0
- orchestrator/search/core/validators.py +40 -0
- orchestrator/search/docs/index.md +37 -0
- orchestrator/search/docs/running_local_text_embedding_inference.md +46 -0
- orchestrator/search/filters/__init__.py +40 -0
- orchestrator/search/filters/base.py +295 -0
- orchestrator/search/filters/date_filters.py +88 -0
- orchestrator/search/filters/definitions.py +107 -0
- orchestrator/search/filters/ltree_filters.py +56 -0
- orchestrator/search/filters/numeric_filter.py +73 -0
- orchestrator/search/indexing/__init__.py +16 -0
- orchestrator/search/indexing/indexer.py +334 -0
- orchestrator/search/indexing/registry.py +101 -0
- orchestrator/search/indexing/tasks.py +69 -0
- orchestrator/search/indexing/traverse.py +334 -0
- orchestrator/search/llm_migration.py +108 -0
- orchestrator/search/retrieval/__init__.py +16 -0
- orchestrator/search/retrieval/builder.py +123 -0
- orchestrator/search/retrieval/engine.py +154 -0
- orchestrator/search/retrieval/exceptions.py +90 -0
- orchestrator/search/retrieval/pagination.py +96 -0
- orchestrator/search/retrieval/retrievers/__init__.py +26 -0
- orchestrator/search/retrieval/retrievers/base.py +123 -0
- orchestrator/search/retrieval/retrievers/fuzzy.py +94 -0
- orchestrator/search/retrieval/retrievers/hybrid.py +277 -0
- orchestrator/search/retrieval/retrievers/semantic.py +94 -0
- orchestrator/search/retrieval/retrievers/structured.py +39 -0
- orchestrator/search/retrieval/utils.py +120 -0
- orchestrator/search/retrieval/validation.py +152 -0
- orchestrator/search/schemas/__init__.py +12 -0
- orchestrator/search/schemas/parameters.py +129 -0
- orchestrator/search/schemas/results.py +77 -0
- orchestrator/services/processes.py +1 -1
- orchestrator/services/settings_env_variables.py +2 -2
- orchestrator/settings.py +8 -1
- orchestrator/utils/state.py +6 -1
- orchestrator/workflows/steps.py +15 -1
- orchestrator/workflows/tasks/validate_products.py +1 -1
- {orchestrator_core-4.4.2.dist-info → orchestrator_core-4.5.0.dist-info}/METADATA +15 -8
- {orchestrator_core-4.4.2.dist-info → orchestrator_core-4.5.0.dist-info}/RECORD +71 -21
- {orchestrator_core-4.4.2.dist-info → orchestrator_core-4.5.0.dist-info}/WHEEL +0 -0
- {orchestrator_core-4.4.2.dist-info → orchestrator_core-4.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Copyright 2019-2025 SURF, GÉANT.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
|
|
14
|
+
import uuid
|
|
15
|
+
|
|
16
|
+
from dateutil.parser import isoparse
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def is_uuid(value: str) -> bool:
|
|
20
|
+
"""Check if a string is a valid UUID."""
|
|
21
|
+
try:
|
|
22
|
+
uuid.UUID(value)
|
|
23
|
+
return True
|
|
24
|
+
except (ValueError, TypeError):
|
|
25
|
+
return False
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def is_iso_date(value: str) -> bool:
|
|
29
|
+
"""Check if a string is a valid ISO 8601 date."""
|
|
30
|
+
try:
|
|
31
|
+
isoparse(value)
|
|
32
|
+
return True
|
|
33
|
+
except (ValueError, TypeError):
|
|
34
|
+
return False
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def is_bool_string(value: str) -> bool:
|
|
38
|
+
"""Check if a string explicitly represents a boolean value with true/false."""
|
|
39
|
+
|
|
40
|
+
return value.strip().lower() in {"true", "false"}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Search Indexing CLI
|
|
2
|
+
|
|
3
|
+
Typer-based CLI for maintaining search indexes (subscriptions, products, processes, workflows).
|
|
4
|
+
|
|
5
|
+
## Usage
|
|
6
|
+
|
|
7
|
+
Run from project root:
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
dotenv run python main.py index [COMMAND] [OPTIONS]
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
### Commands
|
|
14
|
+
|
|
15
|
+
- `subscriptions` – index `subscription_search_index`
|
|
16
|
+
- `products` – index `product_search_index`
|
|
17
|
+
- `processes` – index `process_search_index`
|
|
18
|
+
- `workflows` – index `workflow_search_index`
|
|
19
|
+
|
|
20
|
+
### Options
|
|
21
|
+
|
|
22
|
+
- `--<id>` – UUID of a specific entity (default: all)
|
|
23
|
+
- `--dry-run` – no DB writes
|
|
24
|
+
- `--force-index` – re-index even if unchanged
|
|
25
|
+
|
|
26
|
+
### Examples
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
# Index all subscriptions
|
|
30
|
+
dotenv run python main.py index subscriptions
|
|
31
|
+
|
|
32
|
+
# Re-index all subscriptions
|
|
33
|
+
dotenv run python main.py index subscriptions --force-index
|
|
34
|
+
|
|
35
|
+
# Index a single subscription
|
|
36
|
+
dotenv run python main.py index subscriptions --subscription-id=<UUID>
|
|
37
|
+
```
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Running a local MiniLM embedding server with Hugging Face TEI
|
|
2
|
+
|
|
3
|
+
Only **OpenAI-compatible endpoints** are supported locally.
|
|
4
|
+
|
|
5
|
+
You can spin up a embedding API based on **sentence-transformers/all-MiniLM-L6-v2** using [Hugging Face TEI](https://github.com/huggingface/text-embeddings-inference):
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
docker run --rm -p 8080:80 ghcr.io/huggingface/text-embeddings-inference:cpu-1.8 \
|
|
9
|
+
--model-id sentence-transformers/all-MiniLM-L6-v2
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## Environment variables
|
|
15
|
+
|
|
16
|
+
Point your backend to the local endpoint and declare the new vector size:
|
|
17
|
+
|
|
18
|
+
```env
|
|
19
|
+
OPENAI_BASE_URL=http://localhost:8080/v1
|
|
20
|
+
EMBEDDING_DIMENSION=384
|
|
21
|
+
EMBEDDING_MAX_BATCH_SIZE=32 # Not required when using OpenAI embeddings
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
Depending on the model, you might want to change the `EMBEDDING_FALLBACK_MAX_TOKENS` and `EMBEDDING_MAX_BATCH_SIZE` settings, which are set conservatively and according to the requirements of the setup used in this example.
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Apply the schema change
|
|
29
|
+
|
|
30
|
+
With these new settings run:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
dotenv run python main.py embedding resize
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
**Note** that this will delete all records and you will have to re-index.
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## Re-index embeddings
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
dotenv run python main.py index subscriptions
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
The search index now uses **384-dimension MiniLM vectors** served from your local Docker container. That’s it! 🚀
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Copyright 2019-2025 SURF, GÉANT.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
|
|
14
|
+
from .base import (
|
|
15
|
+
EqualityFilter,
|
|
16
|
+
FilterCondition,
|
|
17
|
+
FilterTree,
|
|
18
|
+
PathFilter,
|
|
19
|
+
StringFilter,
|
|
20
|
+
)
|
|
21
|
+
from .date_filters import DateFilter, DateRangeFilter, DateValueFilter
|
|
22
|
+
from .ltree_filters import LtreeFilter
|
|
23
|
+
from .numeric_filter import NumericFilter, NumericRangeFilter, NumericValueFilter
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
# Base filter classes
|
|
27
|
+
"PathFilter",
|
|
28
|
+
"FilterTree",
|
|
29
|
+
"FilterCondition",
|
|
30
|
+
"StringFilter",
|
|
31
|
+
"EqualityFilter",
|
|
32
|
+
# Filters for specific value types
|
|
33
|
+
"NumericValueFilter",
|
|
34
|
+
"NumericRangeFilter",
|
|
35
|
+
"DateValueFilter",
|
|
36
|
+
"DateRangeFilter",
|
|
37
|
+
"DateFilter",
|
|
38
|
+
"LtreeFilter",
|
|
39
|
+
"NumericFilter",
|
|
40
|
+
]
|
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
# Copyright 2019-2025 SURF, GÉANT.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from itertools import count
|
|
17
|
+
from typing import Any, ClassVar, Literal
|
|
18
|
+
|
|
19
|
+
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
|
20
|
+
from sqlalchemy import BinaryExpression, and_, cast, exists, literal, or_, select
|
|
21
|
+
from sqlalchemy.dialects.postgresql import BOOLEAN
|
|
22
|
+
from sqlalchemy.sql.elements import ColumnElement
|
|
23
|
+
from sqlalchemy_utils.types.ltree import Ltree
|
|
24
|
+
|
|
25
|
+
from orchestrator.db.models import AiSearchIndex
|
|
26
|
+
from orchestrator.search.core.types import BooleanOperator, FieldType, FilterOp, SQLAColumn, UIType
|
|
27
|
+
|
|
28
|
+
from .date_filters import DateFilter
|
|
29
|
+
from .ltree_filters import LtreeFilter
|
|
30
|
+
from .numeric_filter import NumericFilter
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class EqualityFilter(BaseModel):
|
|
34
|
+
op: Literal[FilterOp.EQ, FilterOp.NEQ]
|
|
35
|
+
value: Any
|
|
36
|
+
|
|
37
|
+
def to_expression(self, column: SQLAColumn, path: str) -> BinaryExpression[bool] | ColumnElement[bool]:
|
|
38
|
+
if isinstance(self.value, bool):
|
|
39
|
+
colb = cast(column, BOOLEAN)
|
|
40
|
+
return colb.is_(self.value) if self.op == FilterOp.EQ else ~colb.is_(self.value)
|
|
41
|
+
sv = str(self.value)
|
|
42
|
+
return (column == sv) if self.op == FilterOp.EQ else (column != sv)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class StringFilter(BaseModel):
|
|
46
|
+
op: Literal[FilterOp.LIKE]
|
|
47
|
+
value: str
|
|
48
|
+
|
|
49
|
+
def to_expression(self, column: SQLAColumn, path: str) -> ColumnElement[bool]:
|
|
50
|
+
return column.like(self.value)
|
|
51
|
+
|
|
52
|
+
@model_validator(mode="after")
|
|
53
|
+
def validate_like_pattern(self) -> StringFilter:
|
|
54
|
+
"""If the operation is 'like', the value must contain a wildcard."""
|
|
55
|
+
if self.op == FilterOp.LIKE:
|
|
56
|
+
if "%" not in self.value and "_" not in self.value:
|
|
57
|
+
raise ValueError("The value for a 'like' operation must contain a wildcard character ('%' or '_').")
|
|
58
|
+
return self
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# Order matters! Ambiguous ops (like 'eq') are resolved by first matching filter
|
|
62
|
+
FilterCondition = (
|
|
63
|
+
DateFilter # DATETIME
|
|
64
|
+
| NumericFilter # INT/FLOAT
|
|
65
|
+
| StringFilter # STRING TODO: convert to hybrid search?
|
|
66
|
+
| LtreeFilter # Path
|
|
67
|
+
| EqualityFilter # BOOLEAN/UUID/BLOCK/RESOURCE_TYPE - most generic, try last
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class PathFilter(BaseModel):
|
|
72
|
+
|
|
73
|
+
path: str = Field(description="The ltree path of the field to filter on, e.g., 'subscription.customer_id'.")
|
|
74
|
+
condition: FilterCondition = Field(description="The filter condition to apply.")
|
|
75
|
+
|
|
76
|
+
value_kind: UIType
|
|
77
|
+
|
|
78
|
+
model_config = ConfigDict(
|
|
79
|
+
json_schema_extra={
|
|
80
|
+
"examples": [
|
|
81
|
+
{"path": "subscription.status", "condition": {"op": "eq", "value": "active"}, "value_kind": "string"},
|
|
82
|
+
{
|
|
83
|
+
"path": "subscription.customer_id",
|
|
84
|
+
"condition": {"op": "neq", "value": "acme"},
|
|
85
|
+
"value_kind": "string",
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
"path": "subscription.start_date",
|
|
89
|
+
"condition": {"op": "gt", "value": "2025-01-01"},
|
|
90
|
+
"value_kind": "datetime",
|
|
91
|
+
},
|
|
92
|
+
{
|
|
93
|
+
"path": "subscription.end_date",
|
|
94
|
+
"condition": {
|
|
95
|
+
"op": "between",
|
|
96
|
+
"value": {"start": "2025-06-01", "end": "2025-07-01"},
|
|
97
|
+
},
|
|
98
|
+
"value_kind": "datetime",
|
|
99
|
+
},
|
|
100
|
+
{
|
|
101
|
+
"path": "subscription",
|
|
102
|
+
"condition": {"op": "has_component", "value": "node"},
|
|
103
|
+
"value_kind": "component",
|
|
104
|
+
},
|
|
105
|
+
]
|
|
106
|
+
}
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
@model_validator(mode="before")
|
|
110
|
+
@classmethod
|
|
111
|
+
def _transfer_path_to_value_if_needed(cls, data: Any) -> Any:
|
|
112
|
+
"""Transform for path-only filters.
|
|
113
|
+
|
|
114
|
+
If `op` is `has_component`, `not_has_component`, or `ends_with` and no `value` is
|
|
115
|
+
provided in the `condition`, this validator will automatically use the `path`
|
|
116
|
+
field as the `value` and set the `path` to a wildcard '*' for the query.
|
|
117
|
+
"""
|
|
118
|
+
if isinstance(data, dict):
|
|
119
|
+
path = data.get("path")
|
|
120
|
+
condition = data.get("condition")
|
|
121
|
+
|
|
122
|
+
if path and isinstance(condition, dict):
|
|
123
|
+
op = condition.get("op")
|
|
124
|
+
value = condition.get("value")
|
|
125
|
+
|
|
126
|
+
path_only_ops = [FilterOp.HAS_COMPONENT, FilterOp.NOT_HAS_COMPONENT, FilterOp.ENDS_WITH]
|
|
127
|
+
|
|
128
|
+
if op in path_only_ops and value is None:
|
|
129
|
+
condition["value"] = path
|
|
130
|
+
data["path"] = "*"
|
|
131
|
+
return data
|
|
132
|
+
|
|
133
|
+
def to_expression(self, value_column: SQLAColumn, value_type_column: SQLAColumn) -> ColumnElement[bool]:
|
|
134
|
+
"""Convert the path filter into a SQLAlchemy expression with type safety.
|
|
135
|
+
|
|
136
|
+
This method creates a type guard to ensure we only match compatible field types,
|
|
137
|
+
then delegates to the specific filter condition.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
value_column (ColumnElement): The SQLAlchemy column element representing the value to be filtered.
|
|
141
|
+
value_type_column (ColumnElement): The SQLAlchemy column element representing the field type.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
ColumnElement[bool]: A SQLAlchemy boolean expression that can be used in a ``WHERE`` clause.
|
|
145
|
+
"""
|
|
146
|
+
|
|
147
|
+
# Type guard - only match compatible field types
|
|
148
|
+
allowed_field_types = [ft.value for ft in FieldType if UIType.from_field_type(ft) == self.value_kind]
|
|
149
|
+
type_guard = value_type_column.in_(allowed_field_types) if allowed_field_types else literal(True)
|
|
150
|
+
|
|
151
|
+
return and_(type_guard, self.condition.to_expression(value_column, self.path))
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class FilterTree(BaseModel):
|
|
155
|
+
op: BooleanOperator = Field(
|
|
156
|
+
description="Operator for grouping conditions in uppercase.", default=BooleanOperator.AND
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
children: list[FilterTree | PathFilter] = Field(min_length=1, description="Path filters or nested groups.")
|
|
160
|
+
|
|
161
|
+
MAX_DEPTH: ClassVar[int] = 5
|
|
162
|
+
|
|
163
|
+
model_config = ConfigDict(
|
|
164
|
+
json_schema_extra={
|
|
165
|
+
"description": (
|
|
166
|
+
"Boolean filter tree. Operators must be UPPERCASE: AND / OR.\n"
|
|
167
|
+
"Node shapes:\n"
|
|
168
|
+
" • Group: {'op':'AND'|'OR', 'children': [<PathFilter|FilterTree>, ...]}\n"
|
|
169
|
+
" • Leaf (PathFilter): {'path':'<ltree>', 'condition': {...}}\n"
|
|
170
|
+
"Rules:\n"
|
|
171
|
+
" • Do NOT put 'op' or 'children' inside a leaf 'condition'.\n"
|
|
172
|
+
f" • Max depth = {MAX_DEPTH}.\n"
|
|
173
|
+
),
|
|
174
|
+
"examples": [
|
|
175
|
+
{
|
|
176
|
+
"description": "Simple filters",
|
|
177
|
+
"op": "AND",
|
|
178
|
+
"children": [
|
|
179
|
+
{"path": "subscription.status", "condition": {"op": "eq", "value": "active"}},
|
|
180
|
+
{"path": "subscription.start_date", "condition": {"op": "gt", "value": "2021-01-01"}},
|
|
181
|
+
],
|
|
182
|
+
},
|
|
183
|
+
{
|
|
184
|
+
"description": "Complex filters with OR group",
|
|
185
|
+
"op": "AND",
|
|
186
|
+
"children": [
|
|
187
|
+
{"path": "subscription.start_date", "condition": {"op": "gte", "value": "2024-01-01"}},
|
|
188
|
+
{
|
|
189
|
+
"op": "OR",
|
|
190
|
+
"children": [
|
|
191
|
+
{"path": "subscription.product.name", "condition": {"op": "like", "value": "%fiber%"}},
|
|
192
|
+
{"path": "subscription.customer_id", "condition": {"op": "eq", "value": "Surf"}},
|
|
193
|
+
],
|
|
194
|
+
},
|
|
195
|
+
],
|
|
196
|
+
},
|
|
197
|
+
],
|
|
198
|
+
}
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
@model_validator(mode="after")
|
|
202
|
+
def _validate_depth(self) -> FilterTree:
|
|
203
|
+
def depth(node: "FilterTree | PathFilter") -> int:
|
|
204
|
+
return 1 + max(depth(c) for c in node.children) if isinstance(node, FilterTree) else 1
|
|
205
|
+
|
|
206
|
+
if depth(self) > self.MAX_DEPTH:
|
|
207
|
+
raise ValueError(f"FilterTree nesting exceeds MAX_DEPTH={self.MAX_DEPTH}")
|
|
208
|
+
return self
|
|
209
|
+
|
|
210
|
+
@classmethod
|
|
211
|
+
def from_flat_and(cls, filters: list[PathFilter]) -> FilterTree | None:
|
|
212
|
+
"""Wrap a flat list of PathFilter into an AND group (or None)."""
|
|
213
|
+
return None if not filters else cls(op=BooleanOperator.AND, children=list(filters))
|
|
214
|
+
|
|
215
|
+
def get_all_paths(self) -> set[str]:
|
|
216
|
+
"""Collects all unique paths from the PathFilter leaves in the tree."""
|
|
217
|
+
return {leaf.path for leaf in self.get_all_leaves()}
|
|
218
|
+
|
|
219
|
+
def get_all_leaves(self) -> list[PathFilter]:
|
|
220
|
+
"""Collect all PathFilter leaves in the tree."""
|
|
221
|
+
leaves: list[PathFilter] = []
|
|
222
|
+
for child in self.children:
|
|
223
|
+
if isinstance(child, PathFilter):
|
|
224
|
+
leaves.append(child)
|
|
225
|
+
else:
|
|
226
|
+
leaves.extend(child.get_all_leaves())
|
|
227
|
+
return leaves
|
|
228
|
+
|
|
229
|
+
@staticmethod
|
|
230
|
+
def _build_correlates(
|
|
231
|
+
alias: Any, entity_id_col: SQLAColumn, entity_type_value: str | None
|
|
232
|
+
) -> list[ColumnElement[bool]]:
|
|
233
|
+
"""Build the correlation predicates that link the subquery to the outer query."""
|
|
234
|
+
correlates = [alias.entity_id == entity_id_col]
|
|
235
|
+
if entity_type_value is not None:
|
|
236
|
+
correlates.append(alias.entity_type == entity_type_value)
|
|
237
|
+
return correlates
|
|
238
|
+
|
|
239
|
+
@staticmethod
|
|
240
|
+
def _handle_ltree_filter(pf: PathFilter, alias: Any, correlates: list[ColumnElement[bool]]) -> ColumnElement[bool]:
|
|
241
|
+
"""Handle path-only filters (has_component, not_has_component, ends_with)."""
|
|
242
|
+
# row-level predicate is always positive
|
|
243
|
+
positive = pf.condition.to_expression(alias.path, pf.path)
|
|
244
|
+
subq = select(1).select_from(alias).where(and_(*correlates, positive))
|
|
245
|
+
if pf.condition.op == FilterOp.NOT_HAS_COMPONENT:
|
|
246
|
+
return ~exists(subq) # NOT at the entity level
|
|
247
|
+
return exists(subq)
|
|
248
|
+
|
|
249
|
+
@staticmethod
|
|
250
|
+
def _handle_value_filter(pf: PathFilter, alias: Any, correlates: list[ColumnElement[bool]]) -> ColumnElement[bool]:
|
|
251
|
+
"""Handle value-based filters (equality, comparison, etc)."""
|
|
252
|
+
if "." not in pf.path:
|
|
253
|
+
path_pred = LtreeFilter(op=FilterOp.ENDS_WITH, value=pf.path).to_expression(alias.path, "")
|
|
254
|
+
else:
|
|
255
|
+
path_pred = alias.path == Ltree(pf.path)
|
|
256
|
+
|
|
257
|
+
value_pred = pf.to_expression(alias.value, alias.value_type)
|
|
258
|
+
subq = select(1).select_from(alias).where(and_(*correlates, path_pred, value_pred))
|
|
259
|
+
return exists(subq)
|
|
260
|
+
|
|
261
|
+
def to_expression(
|
|
262
|
+
self,
|
|
263
|
+
entity_id_col: SQLAColumn,
|
|
264
|
+
*,
|
|
265
|
+
entity_type_value: str | None = None,
|
|
266
|
+
) -> ColumnElement[bool]:
|
|
267
|
+
"""Compile this tree into a SQLAlchemy boolean expression.
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
entity_id_col (SQLAColumn): Column in the outer query representing the entity ID.
|
|
271
|
+
entity_type_value (str, optional): If provided, each subquery is additionally constrained to this entity type.
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
ColumnElement[bool]: A SQLAlchemy expression suitable for use in a WHERE clause.
|
|
275
|
+
"""
|
|
276
|
+
from sqlalchemy.orm import aliased
|
|
277
|
+
|
|
278
|
+
alias_idx = count(1)
|
|
279
|
+
|
|
280
|
+
def leaf_exists(pf: PathFilter) -> ColumnElement[bool]:
|
|
281
|
+
"""Convert a PathFilter into an EXISTS subquery."""
|
|
282
|
+
alias = aliased(AiSearchIndex, name=f"flt_{next(alias_idx)}")
|
|
283
|
+
correlates = self._build_correlates(alias, entity_id_col, entity_type_value)
|
|
284
|
+
|
|
285
|
+
if isinstance(pf.condition, LtreeFilter):
|
|
286
|
+
return self._handle_ltree_filter(pf, alias, correlates)
|
|
287
|
+
return self._handle_value_filter(pf, alias, correlates)
|
|
288
|
+
|
|
289
|
+
def compile_node(node: FilterTree | PathFilter) -> ColumnElement[bool]:
|
|
290
|
+
if isinstance(node, FilterTree):
|
|
291
|
+
compiled = [compile_node(ch) for ch in node.children]
|
|
292
|
+
return and_(*compiled) if node.op == BooleanOperator.AND else or_(*compiled)
|
|
293
|
+
return leaf_exists(node)
|
|
294
|
+
|
|
295
|
+
return compile_node(self)
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# Copyright 2019-2025 SURF, GÉANT.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
|
|
14
|
+
from datetime import date, datetime
|
|
15
|
+
from typing import Annotated, Any, Literal
|
|
16
|
+
|
|
17
|
+
from dateutil.parser import parse as dt_parse
|
|
18
|
+
from pydantic import BaseModel, BeforeValidator, Field, model_validator
|
|
19
|
+
from sqlalchemy import TIMESTAMP, and_
|
|
20
|
+
from sqlalchemy import cast as sa_cast
|
|
21
|
+
from sqlalchemy.sql.elements import ColumnElement
|
|
22
|
+
|
|
23
|
+
from orchestrator.search.core.types import FilterOp, SQLAColumn
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _validate_date_string(v: Any) -> Any:
|
|
27
|
+
if not isinstance(v, str):
|
|
28
|
+
return v
|
|
29
|
+
try:
|
|
30
|
+
dt_parse(v)
|
|
31
|
+
return v
|
|
32
|
+
except Exception as exc:
|
|
33
|
+
raise ValueError("is not a valid date or datetime string") from exc
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
DateValue = datetime | date | str
|
|
37
|
+
ValidatedDateValue = Annotated[DateValue, BeforeValidator(_validate_date_string)]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class DateRange(BaseModel):
|
|
41
|
+
|
|
42
|
+
start: ValidatedDateValue
|
|
43
|
+
end: ValidatedDateValue
|
|
44
|
+
|
|
45
|
+
@model_validator(mode="after")
|
|
46
|
+
def _order(self) -> "DateRange":
|
|
47
|
+
to_datetime = dt_parse(str(self.end))
|
|
48
|
+
from_datetime = dt_parse(str(self.start))
|
|
49
|
+
if to_datetime <= from_datetime:
|
|
50
|
+
raise ValueError("'to' must be after 'from'")
|
|
51
|
+
return self
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class DateValueFilter(BaseModel):
|
|
55
|
+
"""A filter that operates on a single date value."""
|
|
56
|
+
|
|
57
|
+
op: Literal[FilterOp.EQ, FilterOp.NEQ, FilterOp.LT, FilterOp.LTE, FilterOp.GT, FilterOp.GTE]
|
|
58
|
+
value: ValidatedDateValue
|
|
59
|
+
|
|
60
|
+
def to_expression(self, column: SQLAColumn, path: str) -> ColumnElement[bool]:
|
|
61
|
+
date_column = sa_cast(column, TIMESTAMP(timezone=True))
|
|
62
|
+
match self.op:
|
|
63
|
+
case FilterOp.EQ:
|
|
64
|
+
return date_column == self.value
|
|
65
|
+
case FilterOp.NEQ:
|
|
66
|
+
return date_column != self.value
|
|
67
|
+
case FilterOp.LT:
|
|
68
|
+
return date_column < self.value
|
|
69
|
+
case FilterOp.LTE:
|
|
70
|
+
return date_column <= self.value
|
|
71
|
+
case FilterOp.GT:
|
|
72
|
+
return date_column > self.value
|
|
73
|
+
case FilterOp.GTE:
|
|
74
|
+
return date_column >= self.value
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class DateRangeFilter(BaseModel):
|
|
78
|
+
"""A filter that operates on a range of dates."""
|
|
79
|
+
|
|
80
|
+
op: Literal[FilterOp.BETWEEN]
|
|
81
|
+
value: DateRange
|
|
82
|
+
|
|
83
|
+
def to_expression(self, column: SQLAColumn, path: str) -> ColumnElement[bool]:
|
|
84
|
+
date_column = sa_cast(column, TIMESTAMP(timezone=True))
|
|
85
|
+
return and_(date_column >= self.value.start, date_column < self.value.end)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
DateFilter = Annotated[DateValueFilter | DateRangeFilter, Field(discriminator="op")]
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# Copyright 2019-2025 SURF, GÉANT.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
|
|
14
|
+
from orchestrator.search.core.types import FieldType, FilterOp, UIType
|
|
15
|
+
from orchestrator.search.schemas.results import TypeDefinition, ValueSchema
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def operators_for(ft: FieldType) -> list[FilterOp]:
|
|
19
|
+
"""Return the list of valid operators for a given FieldType."""
|
|
20
|
+
return list(value_schema_for(ft).keys())
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def component_operators() -> dict[FilterOp, ValueSchema]:
|
|
24
|
+
"""Return operators available for path components."""
|
|
25
|
+
return {
|
|
26
|
+
FilterOp.HAS_COMPONENT: ValueSchema(kind=UIType.COMPONENT),
|
|
27
|
+
FilterOp.NOT_HAS_COMPONENT: ValueSchema(kind=UIType.COMPONENT),
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def value_schema_for(ft: FieldType) -> dict[FilterOp, ValueSchema]:
|
|
32
|
+
"""Return the value schema map for a given FieldType."""
|
|
33
|
+
if ft in (FieldType.INTEGER, FieldType.FLOAT):
|
|
34
|
+
return {
|
|
35
|
+
FilterOp.EQ: ValueSchema(kind=UIType.NUMBER),
|
|
36
|
+
FilterOp.NEQ: ValueSchema(kind=UIType.NUMBER),
|
|
37
|
+
FilterOp.LT: ValueSchema(kind=UIType.NUMBER),
|
|
38
|
+
FilterOp.LTE: ValueSchema(kind=UIType.NUMBER),
|
|
39
|
+
FilterOp.GT: ValueSchema(kind=UIType.NUMBER),
|
|
40
|
+
FilterOp.GTE: ValueSchema(kind=UIType.NUMBER),
|
|
41
|
+
FilterOp.BETWEEN: ValueSchema(
|
|
42
|
+
kind="object",
|
|
43
|
+
fields={
|
|
44
|
+
"start": ValueSchema(kind=UIType.NUMBER),
|
|
45
|
+
"end": ValueSchema(kind=UIType.NUMBER),
|
|
46
|
+
},
|
|
47
|
+
),
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if ft == FieldType.BOOLEAN:
|
|
51
|
+
return {
|
|
52
|
+
FilterOp.EQ: ValueSchema(kind=UIType.BOOLEAN),
|
|
53
|
+
FilterOp.NEQ: ValueSchema(kind=UIType.BOOLEAN),
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
if ft == FieldType.DATETIME:
|
|
57
|
+
return {
|
|
58
|
+
FilterOp.EQ: ValueSchema(kind=UIType.DATETIME),
|
|
59
|
+
FilterOp.NEQ: ValueSchema(kind=UIType.DATETIME),
|
|
60
|
+
FilterOp.LT: ValueSchema(kind=UIType.DATETIME),
|
|
61
|
+
FilterOp.LTE: ValueSchema(kind=UIType.DATETIME),
|
|
62
|
+
FilterOp.GT: ValueSchema(kind=UIType.DATETIME),
|
|
63
|
+
FilterOp.GTE: ValueSchema(kind=UIType.DATETIME),
|
|
64
|
+
FilterOp.BETWEEN: ValueSchema(
|
|
65
|
+
kind="object",
|
|
66
|
+
fields={
|
|
67
|
+
"start": ValueSchema(kind=UIType.DATETIME),
|
|
68
|
+
"end": ValueSchema(kind=UIType.DATETIME),
|
|
69
|
+
},
|
|
70
|
+
),
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
return {
|
|
74
|
+
FilterOp.EQ: ValueSchema(kind=UIType.STRING),
|
|
75
|
+
FilterOp.NEQ: ValueSchema(kind=UIType.STRING),
|
|
76
|
+
FilterOp.LIKE: ValueSchema(kind=UIType.STRING),
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def generate_definitions() -> dict[UIType, TypeDefinition]:
|
|
81
|
+
"""Generate the full definitions dictionary for all UI types."""
|
|
82
|
+
definitions: dict[UIType, TypeDefinition] = {}
|
|
83
|
+
|
|
84
|
+
for ui_type in UIType:
|
|
85
|
+
if ui_type == UIType.COMPONENT:
|
|
86
|
+
# Special case for component filtering
|
|
87
|
+
comp_ops = component_operators()
|
|
88
|
+
definitions[ui_type] = TypeDefinition(
|
|
89
|
+
operators=list(comp_ops.keys()),
|
|
90
|
+
value_schema=comp_ops,
|
|
91
|
+
)
|
|
92
|
+
else:
|
|
93
|
+
# Regular field types
|
|
94
|
+
if ui_type == UIType.NUMBER:
|
|
95
|
+
rep_ft = FieldType.INTEGER
|
|
96
|
+
elif ui_type == UIType.DATETIME:
|
|
97
|
+
rep_ft = FieldType.DATETIME
|
|
98
|
+
elif ui_type == UIType.BOOLEAN:
|
|
99
|
+
rep_ft = FieldType.BOOLEAN
|
|
100
|
+
else:
|
|
101
|
+
rep_ft = FieldType.STRING
|
|
102
|
+
|
|
103
|
+
definitions[ui_type] = TypeDefinition(
|
|
104
|
+
operators=operators_for(rep_ft),
|
|
105
|
+
value_schema=value_schema_for(rep_ft),
|
|
106
|
+
)
|
|
107
|
+
return definitions
|