orchestrator-core 4.5.1a1__py3-none-any.whl → 4.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. orchestrator/__init__.py +3 -12
  2. orchestrator/agentic_app.py +48 -29
  3. orchestrator/api/api_v1/api.py +8 -6
  4. orchestrator/api/api_v1/endpoints/processes.py +2 -0
  5. orchestrator/api/api_v1/endpoints/search.py +26 -7
  6. orchestrator/cli/main.py +2 -2
  7. orchestrator/cli/search/__init__.py +32 -0
  8. orchestrator/devtools/populator.py +16 -0
  9. orchestrator/domain/base.py +2 -7
  10. orchestrator/domain/lifecycle.py +24 -7
  11. orchestrator/llm_settings.py +9 -3
  12. orchestrator/log_config.py +1 -0
  13. orchestrator/migrations/helpers.py +7 -1
  14. orchestrator/schemas/search.py +13 -0
  15. orchestrator/schemas/workflow.py +1 -0
  16. orchestrator/search/agent/__init__.py +15 -2
  17. orchestrator/search/agent/agent.py +30 -15
  18. orchestrator/search/agent/prompts.py +75 -37
  19. orchestrator/search/agent/state.py +13 -0
  20. orchestrator/search/agent/tools.py +148 -11
  21. orchestrator/search/core/__init__.py +12 -0
  22. orchestrator/search/core/embedding.py +13 -4
  23. orchestrator/search/core/exceptions.py +14 -0
  24. orchestrator/search/core/types.py +15 -0
  25. orchestrator/search/core/validators.py +13 -0
  26. orchestrator/search/docs/running_local_text_embedding_inference.md +1 -0
  27. orchestrator/search/filters/__init__.py +13 -0
  28. orchestrator/search/filters/base.py +84 -61
  29. orchestrator/search/filters/date_filters.py +13 -0
  30. orchestrator/search/filters/definitions.py +16 -2
  31. orchestrator/search/filters/ltree_filters.py +16 -3
  32. orchestrator/search/filters/numeric_filter.py +13 -0
  33. orchestrator/search/indexing/__init__.py +13 -0
  34. orchestrator/search/indexing/indexer.py +14 -3
  35. orchestrator/search/indexing/registry.py +13 -0
  36. orchestrator/search/indexing/tasks.py +17 -1
  37. orchestrator/search/indexing/traverse.py +17 -5
  38. orchestrator/search/llm_migration.py +108 -0
  39. orchestrator/search/retrieval/__init__.py +13 -0
  40. orchestrator/search/retrieval/builder.py +23 -8
  41. orchestrator/search/retrieval/engine.py +36 -34
  42. orchestrator/search/retrieval/exceptions.py +90 -0
  43. orchestrator/search/retrieval/pagination.py +13 -0
  44. orchestrator/search/retrieval/retrievers/__init__.py +26 -0
  45. orchestrator/search/retrieval/retrievers/base.py +123 -0
  46. orchestrator/search/retrieval/retrievers/fuzzy.py +94 -0
  47. orchestrator/search/retrieval/retrievers/hybrid.py +277 -0
  48. orchestrator/search/retrieval/retrievers/semantic.py +94 -0
  49. orchestrator/search/retrieval/retrievers/structured.py +39 -0
  50. orchestrator/search/retrieval/utils.py +21 -7
  51. orchestrator/search/retrieval/validation.py +54 -76
  52. orchestrator/search/schemas/__init__.py +12 -0
  53. orchestrator/search/schemas/parameters.py +13 -0
  54. orchestrator/search/schemas/results.py +15 -1
  55. orchestrator/services/processes.py +2 -1
  56. orchestrator/settings.py +7 -0
  57. orchestrator/utils/state.py +6 -1
  58. orchestrator/workflows/steps.py +16 -1
  59. {orchestrator_core-4.5.1a1.dist-info → orchestrator_core-4.5.2.dist-info}/METADATA +13 -11
  60. {orchestrator_core-4.5.1a1.dist-info → orchestrator_core-4.5.2.dist-info}/RECORD +66 -59
  61. orchestrator/migrations/versions/schema/2025-08-12_52b37b5b2714_search_index_model_for_llm_integration.py +0 -95
  62. orchestrator/search/retrieval/retriever.py +0 -447
  63. /orchestrator/cli/{index_llm.py → search/index_llm.py} +0 -0
  64. /orchestrator/cli/{resize_embedding.py → search/resize_embedding.py} +0 -0
  65. /orchestrator/cli/{search_explore.py → search/search_explore.py} +0 -0
  66. /orchestrator/cli/{speedtest.py → search/speedtest.py} +0 -0
  67. {orchestrator_core-4.5.1a1.dist-info → orchestrator_core-4.5.2.dist-info}/WHEEL +0 -0
  68. {orchestrator_core-4.5.1a1.dist-info → orchestrator_core-4.5.2.dist-info}/licenses/LICENSE +0 -0
@@ -1,3 +1,16 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
1
14
  from __future__ import annotations
2
15
 
3
16
  from itertools import count
@@ -45,12 +58,13 @@ class StringFilter(BaseModel):
45
58
  return self
46
59
 
47
60
 
61
+ # Order matters! Ambiguous ops (like 'eq') are resolved by first matching filter
48
62
  FilterCondition = (
49
63
  DateFilter # DATETIME
50
64
  | NumericFilter # INT/FLOAT
51
- | EqualityFilter # BOOLEAN/UUID/BLOCK/RESOURCE_TYPE
52
- | StringFilter # STRING TODO: convert to hybrid search
65
+ | StringFilter # STRING TODO: convert to hybrid search?
53
66
  | LtreeFilter # Path
67
+ | EqualityFilter # BOOLEAN/UUID/BLOCK/RESOURCE_TYPE - most generic, try last
54
68
  )
55
69
 
56
70
 
@@ -64,28 +78,29 @@ class PathFilter(BaseModel):
64
78
  model_config = ConfigDict(
65
79
  json_schema_extra={
66
80
  "examples": [
67
- {
68
- "path": "subscription.status",
69
- "condition": {"op": "eq", "value": "active"},
70
- },
81
+ {"path": "subscription.status", "condition": {"op": "eq", "value": "active"}, "value_kind": "string"},
71
82
  {
72
83
  "path": "subscription.customer_id",
73
- "condition": {"op": "ne", "value": "acme"},
84
+ "condition": {"op": "neq", "value": "acme"},
85
+ "value_kind": "string",
74
86
  },
75
87
  {
76
88
  "path": "subscription.start_date",
77
89
  "condition": {"op": "gt", "value": "2025-01-01"},
90
+ "value_kind": "datetime",
78
91
  },
79
92
  {
80
93
  "path": "subscription.end_date",
81
94
  "condition": {
82
95
  "op": "between",
83
- "value": {"from": "2025-06-01", "to": "2025-07-01"},
96
+ "value": {"start": "2025-06-01", "end": "2025-07-01"},
84
97
  },
98
+ "value_kind": "datetime",
85
99
  },
86
100
  {
87
- "path": "subscription.*.name",
88
- "condition": {"op": "matches_lquery", "value": "*.foo_*"},
101
+ "path": "subscription",
102
+ "condition": {"op": "has_component", "value": "node"},
103
+ "value_kind": "component",
89
104
  },
90
105
  ]
91
106
  }
@@ -121,18 +136,14 @@ class PathFilter(BaseModel):
121
136
  This method creates a type guard to ensure we only match compatible field types,
122
137
  then delegates to the specific filter condition.
123
138
 
124
- Parameters
125
- ----------
126
- value_column : ColumnElement
127
- The SQLAlchemy column element representing the value to be filtered.
128
- value_type_column : ColumnElement
129
- The SQLAlchemy column element representing the field type.
139
+ Args:
140
+ value_column (ColumnElement): The SQLAlchemy column element representing the value to be filtered.
141
+ value_type_column (ColumnElement): The SQLAlchemy column element representing the field type.
130
142
 
131
143
  Returns:
132
- -------
133
- ColumnElement[bool]
134
- A SQLAlchemy boolean expression that can be used in a ``WHERE`` clause.
144
+ ColumnElement[bool]: A SQLAlchemy boolean expression that can be used in a ``WHERE`` clause.
135
145
  """
146
+
136
147
  # Type guard - only match compatible field types
137
148
  allowed_field_types = [ft.value for ft in FieldType if UIType.from_field_type(ft) == self.value_kind]
138
149
  type_guard = value_type_column.in_(allowed_field_types) if allowed_field_types else literal(True)
@@ -141,6 +152,14 @@ class PathFilter(BaseModel):
141
152
 
142
153
 
143
154
  class FilterTree(BaseModel):
155
+ op: BooleanOperator = Field(
156
+ description="Operator for grouping conditions in uppercase.", default=BooleanOperator.AND
157
+ )
158
+
159
+ children: list[FilterTree | PathFilter] = Field(min_length=1, description="Path filters or nested groups.")
160
+
161
+ MAX_DEPTH: ClassVar[int] = 5
162
+
144
163
  model_config = ConfigDict(
145
164
  json_schema_extra={
146
165
  "description": (
@@ -150,11 +169,11 @@ class FilterTree(BaseModel):
150
169
  " • Leaf (PathFilter): {'path':'<ltree>', 'condition': {...}}\n"
151
170
  "Rules:\n"
152
171
  " • Do NOT put 'op' or 'children' inside a leaf 'condition'.\n"
153
- " • Max depth = 5.\n"
154
- " • Use from_flat_and() for a flat list of leaves."
172
+ f" • Max depth = {MAX_DEPTH}.\n"
155
173
  ),
156
174
  "examples": [
157
175
  {
176
+ "description": "Simple filters",
158
177
  "op": "AND",
159
178
  "children": [
160
179
  {"path": "subscription.status", "condition": {"op": "eq", "value": "active"}},
@@ -162,13 +181,14 @@ class FilterTree(BaseModel):
162
181
  ],
163
182
  },
164
183
  {
184
+ "description": "Complex filters with OR group",
165
185
  "op": "AND",
166
186
  "children": [
167
187
  {"path": "subscription.start_date", "condition": {"op": "gte", "value": "2024-01-01"}},
168
188
  {
169
189
  "op": "OR",
170
190
  "children": [
171
- {"path": "subscription.product_name", "condition": {"op": "like", "value": "%fiber%"}},
191
+ {"path": "subscription.product.name", "condition": {"op": "like", "value": "%fiber%"}},
172
192
  {"path": "subscription.customer_id", "condition": {"op": "eq", "value": "Surf"}},
173
193
  ],
174
194
  },
@@ -178,14 +198,6 @@ class FilterTree(BaseModel):
178
198
  }
179
199
  )
180
200
 
181
- op: BooleanOperator = Field(
182
- description="Operator for grouping conditions in uppercase.", default=BooleanOperator.AND
183
- )
184
-
185
- children: list[FilterTree | PathFilter] = Field(min_length=1, description="Path filters or nested groups.")
186
-
187
- MAX_DEPTH: ClassVar[int] = 5
188
-
189
201
  @model_validator(mode="after")
190
202
  def _validate_depth(self) -> FilterTree:
191
203
  def depth(node: "FilterTree | PathFilter") -> int:
@@ -214,6 +226,38 @@ class FilterTree(BaseModel):
214
226
  leaves.extend(child.get_all_leaves())
215
227
  return leaves
216
228
 
229
+ @staticmethod
230
+ def _build_correlates(
231
+ alias: Any, entity_id_col: SQLAColumn, entity_type_value: str | None
232
+ ) -> list[ColumnElement[bool]]:
233
+ """Build the correlation predicates that link the subquery to the outer query."""
234
+ correlates = [alias.entity_id == entity_id_col]
235
+ if entity_type_value is not None:
236
+ correlates.append(alias.entity_type == entity_type_value)
237
+ return correlates
238
+
239
+ @staticmethod
240
+ def _handle_ltree_filter(pf: PathFilter, alias: Any, correlates: list[ColumnElement[bool]]) -> ColumnElement[bool]:
241
+ """Handle path-only filters (has_component, not_has_component, ends_with)."""
242
+ # row-level predicate is always positive
243
+ positive = pf.condition.to_expression(alias.path, pf.path)
244
+ subq = select(1).select_from(alias).where(and_(*correlates, positive))
245
+ if pf.condition.op == FilterOp.NOT_HAS_COMPONENT:
246
+ return ~exists(subq) # NOT at the entity level
247
+ return exists(subq)
248
+
249
+ @staticmethod
250
+ def _handle_value_filter(pf: PathFilter, alias: Any, correlates: list[ColumnElement[bool]]) -> ColumnElement[bool]:
251
+ """Handle value-based filters (equality, comparison, etc)."""
252
+ if "." not in pf.path:
253
+ path_pred = LtreeFilter(op=FilterOp.ENDS_WITH, value=pf.path).to_expression(alias.path, "")
254
+ else:
255
+ path_pred = alias.path == Ltree(pf.path)
256
+
257
+ value_pred = pf.to_expression(alias.value, alias.value_type)
258
+ subq = select(1).select_from(alias).where(and_(*correlates, path_pred, value_pred))
259
+ return exists(subq)
260
+
217
261
  def to_expression(
218
262
  self,
219
263
  entity_id_col: SQLAColumn,
@@ -222,46 +266,25 @@ class FilterTree(BaseModel):
222
266
  ) -> ColumnElement[bool]:
223
267
  """Compile this tree into a SQLAlchemy boolean expression.
224
268
 
225
- Parameters
226
- ----------
227
- entity_id_col : SQLAColumn
228
- Column in the outer query representing the entity ID.
229
- entity_type_value : str, optional
230
- If provided, each subquery is additionally constrained to this entity type.
269
+ Args:
270
+ entity_id_col (SQLAColumn): Column in the outer query representing the entity ID.
271
+ entity_type_value (str, optional): If provided, each subquery is additionally constrained to this entity type.
231
272
 
232
273
  Returns:
233
- -------
234
- ColumnElement[bool]
235
- A SQLAlchemy expression suitable for use in a WHERE clause.
274
+ ColumnElement[bool]: A SQLAlchemy expression suitable for use in a WHERE clause.
236
275
  """
276
+ from sqlalchemy.orm import aliased
277
+
237
278
  alias_idx = count(1)
238
279
 
239
280
  def leaf_exists(pf: PathFilter) -> ColumnElement[bool]:
240
- from sqlalchemy.orm import aliased
241
-
281
+ """Convert a PathFilter into an EXISTS subquery."""
242
282
  alias = aliased(AiSearchIndex, name=f"flt_{next(alias_idx)}")
243
-
244
- correlates = [alias.entity_id == entity_id_col]
245
- if entity_type_value is not None:
246
- correlates.append(alias.entity_type == entity_type_value)
283
+ correlates = self._build_correlates(alias, entity_id_col, entity_type_value)
247
284
 
248
285
  if isinstance(pf.condition, LtreeFilter):
249
- # row-level predicate is always positive
250
- positive = pf.condition.to_expression(alias.path, pf.path)
251
- subq = select(1).select_from(alias).where(and_(*correlates, positive))
252
- if pf.condition.op == FilterOp.NOT_HAS_COMPONENT:
253
- return ~exists(subq) # NOT at the entity level
254
- return exists(subq)
255
-
256
- # value leaf: path predicate + typed value compare
257
- if "." not in pf.path:
258
- path_pred = LtreeFilter(op=FilterOp.ENDS_WITH, value=pf.path).to_expression(alias.path, "")
259
- else:
260
- path_pred = alias.path == Ltree(pf.path)
261
-
262
- value_pred = pf.to_expression(alias.value, alias.value_type)
263
- subq = select(1).select_from(alias).where(and_(*correlates, path_pred, value_pred))
264
- return exists(subq)
286
+ return self._handle_ltree_filter(pf, alias, correlates)
287
+ return self._handle_value_filter(pf, alias, correlates)
265
288
 
266
289
  def compile_node(node: FilterTree | PathFilter) -> ColumnElement[bool]:
267
290
  if isinstance(node, FilterTree):
@@ -1,3 +1,16 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
1
14
  from datetime import date, datetime
2
15
  from typing import Annotated, Any, Literal
3
16
 
@@ -1,3 +1,16 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
1
14
  from orchestrator.search.core.types import FieldType, FilterOp, UIType
2
15
  from orchestrator.search.schemas.results import TypeDefinition, ValueSchema
3
16
 
@@ -60,6 +73,7 @@ def value_schema_for(ft: FieldType) -> dict[FilterOp, ValueSchema]:
60
73
  return {
61
74
  FilterOp.EQ: ValueSchema(kind=UIType.STRING),
62
75
  FilterOp.NEQ: ValueSchema(kind=UIType.STRING),
76
+ FilterOp.LIKE: ValueSchema(kind=UIType.STRING),
63
77
  }
64
78
 
65
79
 
@@ -73,7 +87,7 @@ def generate_definitions() -> dict[UIType, TypeDefinition]:
73
87
  comp_ops = component_operators()
74
88
  definitions[ui_type] = TypeDefinition(
75
89
  operators=list(comp_ops.keys()),
76
- valueSchema=comp_ops,
90
+ value_schema=comp_ops,
77
91
  )
78
92
  else:
79
93
  # Regular field types
@@ -88,6 +102,6 @@ def generate_definitions() -> dict[UIType, TypeDefinition]:
88
102
 
89
103
  definitions[ui_type] = TypeDefinition(
90
104
  operators=operators_for(rep_ft),
91
- valueSchema=value_schema_for(rep_ft),
105
+ value_schema=value_schema_for(rep_ft),
92
106
  )
93
107
  return definitions
@@ -1,3 +1,16 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
1
14
  from typing import Literal
2
15
 
3
16
  from pydantic import BaseModel, Field
@@ -5,7 +18,7 @@ from sqlalchemy import TEXT, bindparam
5
18
  from sqlalchemy.sql.elements import ColumnElement
6
19
  from sqlalchemy_utils.types.ltree import Ltree
7
20
 
8
- from orchestrator.search.core.types import FilterOp, SQLAColumn
21
+ from orchestrator.search.core.types import LTREE_SEPARATOR, FilterOp, SQLAColumn
9
22
 
10
23
 
11
24
  class LtreeFilter(BaseModel):
@@ -38,6 +51,6 @@ class LtreeFilter(BaseModel):
38
51
  ltree_value = Ltree(path)
39
52
  return column == ltree_value
40
53
  case FilterOp.HAS_COMPONENT | FilterOp.NOT_HAS_COMPONENT:
41
- return column.op("~")(bindparam(None, f"*.{self.value}.*", type_=TEXT))
54
+ return column.op("~")(bindparam(None, f"*{LTREE_SEPARATOR}{self.value}{LTREE_SEPARATOR}*", type_=TEXT))
42
55
  case FilterOp.ENDS_WITH:
43
- return column.op("~")(bindparam(None, f"*.{self.value}", type_=TEXT))
56
+ return column.op("~")(bindparam(None, f"*{LTREE_SEPARATOR}{self.value}", type_=TEXT))
@@ -1,3 +1,16 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
1
14
  from typing import Annotated, Any, Literal
2
15
 
3
16
  from pydantic import BaseModel, Field, model_validator
@@ -1,3 +1,16 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
1
14
  from .tasks import run_indexing_for_entity
2
15
 
3
16
  __all__ = ["run_indexing_for_entity"]
@@ -1,3 +1,16 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
1
14
  import hashlib
2
15
  from collections.abc import Generator, Iterable, Iterator
3
16
  from contextlib import contextmanager, nullcontext
@@ -213,9 +226,7 @@ class Indexer:
213
226
  safe_margin = int(max_ctx * llm_settings.EMBEDDING_SAFE_MARGIN_PERCENT)
214
227
  token_budget = max(1, max_ctx - safe_margin)
215
228
 
216
- max_batch_size = None
217
- if llm_settings.OPENAI_BASE_URL: # We are using a local model
218
- max_batch_size = llm_settings.EMBEDDING_MAX_BATCH_SIZE
229
+ max_batch_size = llm_settings.EMBEDDING_MAX_BATCH_SIZE
219
230
 
220
231
  for entity_id, field in fields_to_upsert:
221
232
  if field.value_type.is_embeddable(field.value):
@@ -1,3 +1,16 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
1
14
  from dataclasses import dataclass
2
15
  from typing import Generic, TypeVar
3
16
  from uuid import UUID
@@ -1,7 +1,21 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
1
14
  import structlog
2
15
  from sqlalchemy.orm import Query
3
16
 
4
17
  from orchestrator.db import db
18
+ from orchestrator.domain.context_cache import cache_subscription_models
5
19
  from orchestrator.search.core.types import EntityType
6
20
  from orchestrator.search.indexing.indexer import Indexer
7
21
  from orchestrator.search.indexing.registry import ENTITY_CONFIG_REGISTRY
@@ -50,4 +64,6 @@ def run_indexing_for_entity(
50
64
  entities = db.session.execute(stmt).scalars()
51
65
 
52
66
  indexer = Indexer(config=config, dry_run=dry_run, force_index=force_index, chunk_size=chunk_size)
53
- indexer.run(entities)
67
+
68
+ with cache_subscription_models():
69
+ indexer.run(entities)
@@ -1,3 +1,16 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
1
14
  import re
2
15
  from abc import ABC, abstractmethod
3
16
  from collections.abc import Iterable
@@ -19,7 +32,7 @@ from orchestrator.domain.lifecycle import (
19
32
  from orchestrator.schemas.process import ProcessSchema
20
33
  from orchestrator.schemas.workflow import WorkflowSchema
21
34
  from orchestrator.search.core.exceptions import ModelLoadError, ProductNotInRegistryError
22
- from orchestrator.search.core.types import ExtractedField, FieldType
35
+ from orchestrator.search.core.types import LTREE_SEPARATOR, ExtractedField, FieldType
23
36
  from orchestrator.types import SubscriptionLifecycle
24
37
 
25
38
  logger = structlog.get_logger(__name__)
@@ -30,7 +43,6 @@ DatabaseEntity = SubscriptionTable | ProductTable | ProcessTable | WorkflowTable
30
43
  class BaseTraverser(ABC):
31
44
  """Base class for traversing database models and extracting searchable fields."""
32
45
 
33
- _LTREE_SEPARATOR = "."
34
46
  _MAX_DEPTH = 40
35
47
 
36
48
  @classmethod
@@ -62,7 +74,7 @@ class BaseTraverser(ABC):
62
74
  except Exception as e:
63
75
  logger.error(f"Failed to access field '{name}' on {model_class.__name__}", error=str(e))
64
76
  continue
65
- new_path = f"{path}{cls._LTREE_SEPARATOR}{name}" if path else name
77
+ new_path = f"{path}{LTREE_SEPARATOR}{name}" if path else name
66
78
  annotation = field.annotation if hasattr(field, "annotation") else field.return_type
67
79
  yield from cls._yield_fields_for_value(value, new_path, annotation)
68
80
 
@@ -197,7 +209,7 @@ class ProductTraverser(BaseTraverser):
197
209
  fields = []
198
210
 
199
211
  # Add the block itself as a BLOCK type
200
- block_name = block_path.split(cls._LTREE_SEPARATOR)[-1]
212
+ block_name = block_path.split(LTREE_SEPARATOR)[-1]
201
213
  fields.append(ExtractedField(path=block_path, value=block_name, value_type=FieldType.BLOCK))
202
214
 
203
215
  # Extract all field names from the block as RESOURCE_TYPE
@@ -223,7 +235,7 @@ class ProductTraverser(BaseTraverser):
223
235
  ExtractedField(path=field_path, value=field_name, value_type=FieldType.RESOURCE_TYPE)
224
236
  )
225
237
  # And potentially traverse the first item for schema
226
- first_item_path = f"{field_path}{cls._LTREE_SEPARATOR}0"
238
+ first_item_path = f"{field_path}{LTREE_SEPARATOR}0"
227
239
  nested_fields = cls._extract_block_schema(field_value[0], first_item_path)
228
240
  fields.extend(nested_fields)
229
241
  else:
@@ -0,0 +1,108 @@
1
+ # Copyright 2019-2025 SURF
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ """Simple search migration function that runs when SEARCH_ENABLED = True."""
15
+
16
+ from sqlalchemy import text
17
+ from sqlalchemy.engine import Connection
18
+ from structlog import get_logger
19
+
20
+ from orchestrator.llm_settings import llm_settings
21
+ from orchestrator.search.core.types import FieldType
22
+
23
+ logger = get_logger(__name__)
24
+
25
+ TABLE = "ai_search_index"
26
+ TARGET_DIM = 1536
27
+
28
+
29
+ def run_migration(connection: Connection) -> None:
30
+ """Run LLM migration with ON CONFLICT DO NOTHING pattern."""
31
+ logger.info("Running LLM migration")
32
+
33
+ try:
34
+ # Test to see if the extenstion exists and then skip the migration; Needed for certain situations where db user
35
+ # has insufficient priviledges to run the `CREATE EXTENSION ...` command.
36
+ res = connection.execute(text("SELECT * FROM pg_extension where extname = 'vector';"))
37
+ if llm_settings.LLM_FORCE_EXTENTION_MIGRATION or res.rowcount == 0:
38
+ # Create PostgreSQL extensions
39
+ logger.info("Attempting to run the extention creation;")
40
+ connection.execute(text("CREATE EXTENSION IF NOT EXISTS ltree;"))
41
+ connection.execute(text("CREATE EXTENSION IF NOT EXISTS unaccent;"))
42
+ connection.execute(text("CREATE EXTENSION IF NOT EXISTS pg_trgm;"))
43
+ connection.execute(text("CREATE EXTENSION IF NOT EXISTS vector;"))
44
+
45
+ # Create field_type enum
46
+ field_type_values = "', '".join([ft.value for ft in FieldType])
47
+ connection.execute(
48
+ text(
49
+ f"""
50
+ DO $$
51
+ BEGIN
52
+ IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'field_type') THEN
53
+ CREATE TYPE field_type AS ENUM ('{field_type_values}');
54
+ END IF;
55
+ END $$;
56
+ """
57
+ )
58
+ )
59
+
60
+ # Create table with ON CONFLICT DO NOTHING pattern
61
+ connection.execute(
62
+ text(
63
+ f"""
64
+ CREATE TABLE IF NOT EXISTS {TABLE} (
65
+ entity_type TEXT NOT NULL,
66
+ entity_id UUID NOT NULL,
67
+ path LTREE NOT NULL,
68
+ value TEXT NOT NULL,
69
+ embedding VECTOR({TARGET_DIM}),
70
+ content_hash VARCHAR(64) NOT NULL,
71
+ value_type field_type NOT NULL DEFAULT '{FieldType.STRING.value}',
72
+ CONSTRAINT pk_ai_search_index PRIMARY KEY (entity_id, path)
73
+ );
74
+ """
75
+ )
76
+ )
77
+
78
+ # Drop default
79
+ connection.execute(text(f"ALTER TABLE {TABLE} ALTER COLUMN value_type DROP DEFAULT;"))
80
+
81
+ # Create indexes with IF NOT EXISTS
82
+ connection.execute(text(f"CREATE INDEX IF NOT EXISTS ix_ai_search_index_entity_id ON {TABLE} (entity_id);"))
83
+ connection.execute(
84
+ text(f"CREATE INDEX IF NOT EXISTS idx_ai_search_index_content_hash ON {TABLE} (content_hash);")
85
+ )
86
+ connection.execute(
87
+ text(f"CREATE INDEX IF NOT EXISTS ix_flat_path_gist ON {TABLE} USING GIST (path gist_ltree_ops);")
88
+ )
89
+ connection.execute(text(f"CREATE INDEX IF NOT EXISTS ix_flat_path_btree ON {TABLE} (path);"))
90
+ connection.execute(
91
+ text(f"CREATE INDEX IF NOT EXISTS ix_flat_value_trgm ON {TABLE} USING GIN (value gin_trgm_ops);")
92
+ )
93
+ connection.execute(
94
+ text(
95
+ f"CREATE INDEX IF NOT EXISTS ix_flat_embed_hnsw ON {TABLE} USING HNSW (embedding vector_l2_ops) WITH (m = 16, ef_construction = 64);"
96
+ )
97
+ )
98
+
99
+ connection.commit()
100
+ logger.info("LLM migration completed successfully")
101
+
102
+ except Exception as e:
103
+ logger.error("LLM migration failed", error=str(e))
104
+ raise Exception(
105
+ f"LLM migration failed. This likely means the pgvector extension "
106
+ f"is not installed. Please install pgvector and ensure your PostgreSQL "
107
+ f"version supports it. Error: {e}"
108
+ ) from e
@@ -1,3 +1,16 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
1
14
  from .engine import execute_search
2
15
 
3
16
  __all__ = ["execute_search"]