orchestrator-core 4.4.1__py3-none-any.whl → 4.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. orchestrator/__init__.py +17 -2
  2. orchestrator/agentic_app.py +103 -0
  3. orchestrator/api/api_v1/api.py +14 -2
  4. orchestrator/api/api_v1/endpoints/processes.py +2 -0
  5. orchestrator/api/api_v1/endpoints/search.py +296 -0
  6. orchestrator/app.py +32 -0
  7. orchestrator/cli/main.py +22 -1
  8. orchestrator/cli/search/__init__.py +32 -0
  9. orchestrator/cli/search/index_llm.py +73 -0
  10. orchestrator/cli/search/resize_embedding.py +135 -0
  11. orchestrator/cli/search/search_explore.py +208 -0
  12. orchestrator/cli/search/speedtest.py +151 -0
  13. orchestrator/db/models.py +37 -1
  14. orchestrator/devtools/populator.py +16 -0
  15. orchestrator/domain/base.py +2 -7
  16. orchestrator/domain/lifecycle.py +24 -7
  17. orchestrator/llm_settings.py +57 -0
  18. orchestrator/log_config.py +1 -0
  19. orchestrator/migrations/helpers.py +7 -1
  20. orchestrator/schemas/search.py +130 -0
  21. orchestrator/schemas/workflow.py +1 -0
  22. orchestrator/search/__init__.py +12 -0
  23. orchestrator/search/agent/__init__.py +21 -0
  24. orchestrator/search/agent/agent.py +62 -0
  25. orchestrator/search/agent/prompts.py +100 -0
  26. orchestrator/search/agent/state.py +21 -0
  27. orchestrator/search/agent/tools.py +258 -0
  28. orchestrator/search/core/__init__.py +12 -0
  29. orchestrator/search/core/embedding.py +73 -0
  30. orchestrator/search/core/exceptions.py +36 -0
  31. orchestrator/search/core/types.py +296 -0
  32. orchestrator/search/core/validators.py +40 -0
  33. orchestrator/search/docs/index.md +37 -0
  34. orchestrator/search/docs/running_local_text_embedding_inference.md +46 -0
  35. orchestrator/search/filters/__init__.py +40 -0
  36. orchestrator/search/filters/base.py +295 -0
  37. orchestrator/search/filters/date_filters.py +88 -0
  38. orchestrator/search/filters/definitions.py +107 -0
  39. orchestrator/search/filters/ltree_filters.py +56 -0
  40. orchestrator/search/filters/numeric_filter.py +73 -0
  41. orchestrator/search/indexing/__init__.py +16 -0
  42. orchestrator/search/indexing/indexer.py +334 -0
  43. orchestrator/search/indexing/registry.py +101 -0
  44. orchestrator/search/indexing/tasks.py +69 -0
  45. orchestrator/search/indexing/traverse.py +334 -0
  46. orchestrator/search/llm_migration.py +108 -0
  47. orchestrator/search/retrieval/__init__.py +16 -0
  48. orchestrator/search/retrieval/builder.py +123 -0
  49. orchestrator/search/retrieval/engine.py +154 -0
  50. orchestrator/search/retrieval/exceptions.py +90 -0
  51. orchestrator/search/retrieval/pagination.py +96 -0
  52. orchestrator/search/retrieval/retrievers/__init__.py +26 -0
  53. orchestrator/search/retrieval/retrievers/base.py +123 -0
  54. orchestrator/search/retrieval/retrievers/fuzzy.py +94 -0
  55. orchestrator/search/retrieval/retrievers/hybrid.py +277 -0
  56. orchestrator/search/retrieval/retrievers/semantic.py +94 -0
  57. orchestrator/search/retrieval/retrievers/structured.py +39 -0
  58. orchestrator/search/retrieval/utils.py +120 -0
  59. orchestrator/search/retrieval/validation.py +152 -0
  60. orchestrator/search/schemas/__init__.py +12 -0
  61. orchestrator/search/schemas/parameters.py +129 -0
  62. orchestrator/search/schemas/results.py +77 -0
  63. orchestrator/services/processes.py +2 -1
  64. orchestrator/services/settings_env_variables.py +2 -2
  65. orchestrator/settings.py +8 -1
  66. orchestrator/utils/state.py +6 -1
  67. orchestrator/workflows/steps.py +15 -1
  68. orchestrator/workflows/tasks/validate_products.py +1 -1
  69. {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0.dist-info}/METADATA +15 -8
  70. {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0.dist-info}/RECORD +72 -22
  71. {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0.dist-info}/WHEEL +0 -0
  72. {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,258 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ from collections.abc import Awaitable, Callable
15
+ from typing import Any, TypeVar
16
+
17
+ import structlog
18
+ from ag_ui.core import EventType, StateSnapshotEvent
19
+ from pydantic_ai import RunContext
20
+ from pydantic_ai.ag_ui import StateDeps
21
+ from pydantic_ai.exceptions import ModelRetry
22
+ from pydantic_ai.messages import ModelRequest, UserPromptPart
23
+ from pydantic_ai.toolsets import FunctionToolset
24
+
25
+ from orchestrator.api.api_v1.endpoints.search import (
26
+ get_definitions,
27
+ list_paths,
28
+ search_processes,
29
+ search_products,
30
+ search_subscriptions,
31
+ search_workflows,
32
+ )
33
+ from orchestrator.schemas.search import SearchResultsSchema
34
+ from orchestrator.search.core.types import ActionType, EntityType, FilterOp
35
+ from orchestrator.search.filters import FilterTree
36
+ from orchestrator.search.retrieval.exceptions import FilterValidationError, PathNotFoundError
37
+ from orchestrator.search.retrieval.validation import validate_filter_tree
38
+ from orchestrator.search.schemas.parameters import PARAMETER_REGISTRY, BaseSearchParameters
39
+
40
+ from .state import SearchState
41
+
42
+ logger = structlog.get_logger(__name__)
43
+
44
+
45
+ P = TypeVar("P", bound=BaseSearchParameters)
46
+
47
+ SearchFn = Callable[[P], Awaitable[SearchResultsSchema[Any]]]
48
+
49
+ SEARCH_FN_MAP: dict[EntityType, SearchFn] = {
50
+ EntityType.SUBSCRIPTION: search_subscriptions,
51
+ EntityType.WORKFLOW: search_workflows,
52
+ EntityType.PRODUCT: search_products,
53
+ EntityType.PROCESS: search_processes,
54
+ }
55
+
56
+ search_toolset: FunctionToolset[StateDeps[SearchState]] = FunctionToolset(max_retries=1)
57
+
58
+
59
+ def last_user_message(ctx: RunContext[StateDeps[SearchState]]) -> str | None:
60
+ for msg in reversed(ctx.messages):
61
+ if isinstance(msg, ModelRequest):
62
+ for part in msg.parts:
63
+ if isinstance(part, UserPromptPart) and isinstance(part.content, str):
64
+ return part.content
65
+ return None
66
+
67
+
68
+ @search_toolset.tool
69
+ async def set_search_parameters(
70
+ ctx: RunContext[StateDeps[SearchState]],
71
+ entity_type: EntityType,
72
+ action: str | ActionType = ActionType.SELECT,
73
+ ) -> StateSnapshotEvent:
74
+ """Sets the initial search context, like the entity type and the user's query.
75
+
76
+ This MUST be the first tool called to start any new search.
77
+ Warning: Calling this tool will erase any existing filters and search results from the state.
78
+ """
79
+ params = ctx.deps.state.parameters or {}
80
+ is_new_search = params.get("entity_type") != entity_type.value
81
+ final_query = (last_user_message(ctx) or "") if is_new_search else params.get("query", "")
82
+
83
+ logger.debug(
84
+ "Setting search parameters",
85
+ entity_type=entity_type.value,
86
+ action=action,
87
+ is_new_search=is_new_search,
88
+ query=final_query,
89
+ )
90
+
91
+ ctx.deps.state.parameters = {"action": action, "entity_type": entity_type, "filters": None, "query": final_query}
92
+ ctx.deps.state.results = []
93
+ logger.debug("Search parameters set", parameters=ctx.deps.state.parameters)
94
+
95
+ return StateSnapshotEvent(
96
+ type=EventType.STATE_SNAPSHOT,
97
+ snapshot=ctx.deps.state.model_dump(),
98
+ )
99
+
100
+
101
+ @search_toolset.tool(retries=2)
102
+ async def set_filter_tree(
103
+ ctx: RunContext[StateDeps[SearchState]],
104
+ filters: FilterTree | None,
105
+ ) -> StateSnapshotEvent:
106
+ """Replace current filters atomically with a full FilterTree, or clear with None.
107
+
108
+ Requirements:
109
+ - Root/group operators must be 'AND' or 'OR' (uppercase).
110
+ - Provide either PathFilters or nested groups under `children`.
111
+ - See the FilterTree schema examples for the exact shape.
112
+ """
113
+ if ctx.deps.state.parameters is None:
114
+ raise ModelRetry("Search parameters are not initialized. Call set_search_parameters first.")
115
+
116
+ entity_type = EntityType(ctx.deps.state.parameters["entity_type"])
117
+
118
+ logger.debug(
119
+ "Setting filter tree",
120
+ entity_type=entity_type.value,
121
+ has_filters=filters is not None,
122
+ filter_summary=f"{len(filters.get_all_leaves())} filters" if filters else "no filters",
123
+ )
124
+
125
+ try:
126
+ await validate_filter_tree(filters, entity_type)
127
+ except PathNotFoundError as e:
128
+ logger.debug(f"{PathNotFoundError.__name__}: {str(e)}")
129
+ raise ModelRetry(f"{str(e)} Use discover_filter_paths tool to find valid paths.")
130
+ except FilterValidationError as e:
131
+ # ModelRetry will trigger an agent retry, containing the specific validation error.
132
+ logger.debug(f"Filter validation failed: {str(e)}")
133
+ raise ModelRetry(str(e))
134
+ except Exception as e:
135
+ logger.error("Unexpected Filter validation exception", error=str(e))
136
+ raise ModelRetry(f"Filter validation failed: {str(e)}. Please check your filter structure and try again.")
137
+
138
+ filter_data = None if filters is None else filters.model_dump(mode="json", by_alias=True)
139
+ ctx.deps.state.parameters["filters"] = filter_data
140
+ return StateSnapshotEvent(type=EventType.STATE_SNAPSHOT, snapshot=ctx.deps.state.model_dump())
141
+
142
+
143
+ @search_toolset.tool
144
+ async def execute_search(
145
+ ctx: RunContext[StateDeps[SearchState]],
146
+ limit: int = 10,
147
+ ) -> StateSnapshotEvent:
148
+ """Execute the search with the current parameters."""
149
+ if not ctx.deps.state.parameters:
150
+ raise ValueError("No search parameters set")
151
+
152
+ entity_type = EntityType(ctx.deps.state.parameters["entity_type"])
153
+ param_class = PARAMETER_REGISTRY.get(entity_type)
154
+ if not param_class:
155
+ raise ValueError(f"Unknown entity type: {entity_type}")
156
+
157
+ params = param_class(**ctx.deps.state.parameters)
158
+ logger.debug(
159
+ "Executing database search",
160
+ search_entity_type=entity_type.value,
161
+ limit=limit,
162
+ has_filters=params.filters is not None,
163
+ query=params.query,
164
+ action=params.action,
165
+ )
166
+
167
+ if params.filters:
168
+ logger.debug("Search filters", filters=params.filters)
169
+
170
+ params.limit = limit
171
+
172
+ fn = SEARCH_FN_MAP[entity_type]
173
+ search_results = await fn(params)
174
+
175
+ logger.debug(
176
+ "Search completed",
177
+ total_results=len(search_results.data) if search_results.data else 0,
178
+ )
179
+
180
+ ctx.deps.state.results = search_results.data
181
+
182
+ return StateSnapshotEvent(type=EventType.STATE_SNAPSHOT, snapshot=ctx.deps.state.model_dump())
183
+
184
+
185
+ @search_toolset.tool
186
+ async def discover_filter_paths(
187
+ ctx: RunContext[StateDeps[SearchState]],
188
+ field_names: list[str],
189
+ entity_type: EntityType | None = None,
190
+ ) -> dict[str, dict[str, Any]]:
191
+ """Discovers available filter paths for a list of field names.
192
+
193
+ Returns a dictionary where each key is a field_name from the input list and
194
+ the value is its discovery result.
195
+ """
196
+ if not entity_type and ctx.deps.state.parameters:
197
+ entity_type = EntityType(ctx.deps.state.parameters.get("entity_type"))
198
+ if not entity_type:
199
+ entity_type = EntityType.SUBSCRIPTION
200
+
201
+ all_results = {}
202
+ for field_name in field_names:
203
+ paths_response = await list_paths(prefix="", q=field_name, entity_type=entity_type, limit=100)
204
+
205
+ matching_leaves = []
206
+ for leaf in paths_response.leaves:
207
+ if field_name.lower() in leaf.name.lower():
208
+ matching_leaves.append(
209
+ {
210
+ "name": leaf.name,
211
+ "value_kind": leaf.ui_types,
212
+ "paths": leaf.paths,
213
+ }
214
+ )
215
+
216
+ matching_components = []
217
+ for comp in paths_response.components:
218
+ if field_name.lower() in comp.name.lower():
219
+ matching_components.append(
220
+ {
221
+ "name": comp.name,
222
+ "value_kind": comp.ui_types,
223
+ }
224
+ )
225
+
226
+ result_for_field: dict[str, Any]
227
+ if not matching_leaves and not matching_components:
228
+ result_for_field = {
229
+ "status": "NOT_FOUND",
230
+ "guidance": f"No filterable paths found containing '{field_name}'. Do not create a filter for this.",
231
+ "leaves": [],
232
+ "components": [],
233
+ }
234
+ else:
235
+ result_for_field = {
236
+ "status": "OK",
237
+ "guidance": f"Found {len(matching_leaves)} field(s) and {len(matching_components)} component(s) for '{field_name}'.",
238
+ "leaves": matching_leaves,
239
+ "components": matching_components,
240
+ }
241
+
242
+ all_results[field_name] = result_for_field
243
+ logger.debug("Returning found fieldname - path mapping", all_results=all_results)
244
+ return all_results
245
+
246
+
247
+ @search_toolset.tool
248
+ async def get_valid_operators() -> dict[str, list[FilterOp]]:
249
+ """Gets the mapping of field types to their valid filter operators."""
250
+ definitions = await get_definitions()
251
+
252
+ operator_map = {}
253
+ for ui_type, type_def in definitions.items():
254
+ key = ui_type.value
255
+
256
+ if hasattr(type_def, "operators"):
257
+ operator_map[key] = type_def.operators
258
+ return operator_map
@@ -0,0 +1,12 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
@@ -0,0 +1,73 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+
15
+ import structlog
16
+ from litellm import aembedding as llm_aembedding
17
+ from litellm import embedding as llm_embedding
18
+ from litellm import exceptions as llm_exc
19
+
20
+ from orchestrator.llm_settings import llm_settings
21
+
22
+ logger = structlog.get_logger(__name__)
23
+
24
+
25
+ class EmbeddingIndexer:
26
+
27
+ @classmethod
28
+ def get_embeddings_from_api_batch(cls, texts: list[str], dry_run: bool) -> list[list[float]]:
29
+ if not texts:
30
+ return []
31
+ if dry_run:
32
+ logger.debug("Dry Run: returning empty embeddings")
33
+ return [[] for _ in texts]
34
+
35
+ try:
36
+ resp = llm_embedding(
37
+ model=llm_settings.EMBEDDING_MODEL,
38
+ input=[t.lower() for t in texts],
39
+ api_key=llm_settings.OPENAI_API_KEY,
40
+ api_base=llm_settings.OPENAI_BASE_URL,
41
+ timeout=llm_settings.LLM_TIMEOUT,
42
+ max_retries=llm_settings.LLM_MAX_RETRIES,
43
+ )
44
+ data = sorted(resp.data, key=lambda e: e["index"])
45
+ return [row["embedding"] for row in data]
46
+ except (llm_exc.APIError, llm_exc.APIConnectionError, llm_exc.RateLimitError, llm_exc.Timeout) as e:
47
+ logger.error("Embedding request failed", error=str(e))
48
+ return [[] for _ in texts]
49
+ except Exception as e:
50
+ logger.error("Unexpected embedding error", error=str(e))
51
+ return [[] for _ in texts]
52
+
53
+
54
+ class QueryEmbedder:
55
+ """A stateless, async utility for embedding real-time user queries."""
56
+
57
+ @classmethod
58
+ async def generate_for_text_async(cls, text: str) -> list[float]:
59
+ if not text:
60
+ return []
61
+ try:
62
+ resp = await llm_aembedding(
63
+ model=llm_settings.EMBEDDING_MODEL,
64
+ input=[text.lower()],
65
+ api_key=llm_settings.OPENAI_API_KEY,
66
+ api_base=llm_settings.OPENAI_BASE_URL,
67
+ timeout=5.0,
68
+ max_retries=0, # No retries, prioritize speed.
69
+ )
70
+ return resp.data[0]["embedding"]
71
+ except Exception as e:
72
+ logger.error("Async embedding generation failed", error=str(e))
73
+ return []
@@ -0,0 +1,36 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+
15
+ class SearchUtilsError(Exception):
16
+ """Base exception for this module."""
17
+
18
+ pass
19
+
20
+
21
+ class ProductNotInRegistryError(SearchUtilsError):
22
+ """Raised when a product is not found in the model registry."""
23
+
24
+ pass
25
+
26
+
27
+ class ModelLoadError(SearchUtilsError):
28
+ """Raised when a Pydantic model fails to load from a subscription."""
29
+
30
+ pass
31
+
32
+
33
+ class InvalidCursorError(SearchUtilsError):
34
+ """Raised when cursor cannot be decoded."""
35
+
36
+ pass
@@ -0,0 +1,296 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ from dataclasses import dataclass
15
+ from datetime import date, datetime
16
+ from enum import Enum, IntEnum
17
+ from typing import Annotated, Any, Literal, NamedTuple, TypeAlias, TypedDict, get_args, get_origin
18
+ from uuid import UUID
19
+
20
+ from sqlalchemy.orm.attributes import InstrumentedAttribute
21
+ from sqlalchemy.sql.elements import ColumnElement
22
+ from sqlalchemy_utils.types.ltree import Ltree
23
+
24
+ from orchestrator.types import filter_nonetype, get_origin_and_args, is_optional_type, is_union_type
25
+
26
+ from .validators import is_bool_string, is_iso_date, is_uuid
27
+
28
+ SQLAColumn: TypeAlias = ColumnElement[Any] | InstrumentedAttribute[Any]
29
+
30
+ LTREE_SEPARATOR = "."
31
+
32
+
33
+ @dataclass
34
+ class SearchMetadata:
35
+ """Metadata about the search operation performed."""
36
+
37
+ search_type: str
38
+ description: str
39
+
40
+ @classmethod
41
+ def structured(cls) -> "SearchMetadata":
42
+ return cls(
43
+ search_type="structured", description="This search performs a filter-based search using structured queries."
44
+ )
45
+
46
+ @classmethod
47
+ def fuzzy(cls) -> "SearchMetadata":
48
+ return cls(
49
+ search_type="fuzzy",
50
+ description="This search performs a trigram similarity search.",
51
+ )
52
+
53
+ @classmethod
54
+ def semantic(cls) -> "SearchMetadata":
55
+ return cls(
56
+ search_type="semantic",
57
+ description="This search performs a vector similarity search, using L2 distance on embeddings with minimum distance scoring (normalized).",
58
+ )
59
+
60
+ @classmethod
61
+ def hybrid(cls) -> "SearchMetadata":
62
+ return cls(
63
+ search_type="hybrid",
64
+ description="This search performs reciprocal rank fusion combining trigram similarity, word_similarity, and L2 vector distance.",
65
+ )
66
+
67
+ @classmethod
68
+ def empty(cls) -> "SearchMetadata":
69
+ return cls(search_type="empty", description="Empty search - no criteria provided")
70
+
71
+
72
+ class BooleanOperator(str, Enum):
73
+ AND = "AND"
74
+ OR = "OR"
75
+
76
+
77
+ class FilterOp(str, Enum):
78
+ EQ = "eq"
79
+ NEQ = "neq"
80
+ LT = "lt"
81
+ LIKE = "like"
82
+ LTE = "lte"
83
+ GT = "gt"
84
+ GTE = "gte"
85
+ BETWEEN = "between"
86
+
87
+ MATCHES_LQUERY = "matches_lquery" # The ~ operator for wildcard matching
88
+ IS_ANCESTOR = "is_ancestor" # The @> operator
89
+ IS_DESCENDANT = "is_descendant" # The <@ operator
90
+ PATH_MATCH = "path_match"
91
+
92
+ HAS_COMPONENT = "has_component" # Path contains this segment
93
+ NOT_HAS_COMPONENT = "not_has_component" # Path doesn't contain segment
94
+ ENDS_WITH = "ends_with"
95
+
96
+
97
+ class EntityType(str, Enum):
98
+ SUBSCRIPTION = "SUBSCRIPTION"
99
+ PRODUCT = "PRODUCT"
100
+ WORKFLOW = "WORKFLOW"
101
+ PROCESS = "PROCESS"
102
+
103
+
104
+ class ActionType(str, Enum):
105
+ """Defines the explicit, safe actions the agent can request."""
106
+
107
+ SELECT = "select" # Retrieve a list of matching records.
108
+ # COUNT = "count" # For phase1; the agent will not support this yet.
109
+
110
+
111
+ class UIType(str, Enum):
112
+ STRING = "string"
113
+ NUMBER = "number"
114
+ BOOLEAN = "boolean"
115
+ DATETIME = "datetime"
116
+ COMPONENT = "component"
117
+
118
+ @classmethod
119
+ def from_field_type(cls, ft: "FieldType") -> "UIType":
120
+ """Create a UIType from a backend FieldType to indicate how a value must be rendered."""
121
+ if ft in (FieldType.INTEGER, FieldType.FLOAT):
122
+ return cls.NUMBER
123
+ if ft == FieldType.BOOLEAN:
124
+ return cls.BOOLEAN
125
+ if ft == FieldType.DATETIME:
126
+ return cls.DATETIME
127
+ return cls.STRING
128
+
129
+
130
+ class FieldType(str, Enum):
131
+ STRING = "string"
132
+ INTEGER = "integer"
133
+ FLOAT = "float"
134
+ BOOLEAN = "boolean"
135
+ DATETIME = "datetime"
136
+ UUID = "uuid"
137
+ BLOCK = "block"
138
+ RESOURCE_TYPE = "resource_type"
139
+
140
+ @classmethod
141
+ def infer(cls, val: Any) -> "FieldType":
142
+ if isinstance(val, TypedValue):
143
+ return cls._infer_typed_value(val)
144
+
145
+ if isinstance(val, bool):
146
+ return cls.BOOLEAN
147
+ if isinstance(val, int):
148
+ return cls.INTEGER
149
+ if isinstance(val, float):
150
+ return cls.FLOAT
151
+ if isinstance(val, UUID):
152
+ return cls.UUID
153
+ if isinstance(val, (datetime, date)):
154
+ return cls.DATETIME
155
+ if isinstance(val, str):
156
+ return cls._infer_from_str(val)
157
+
158
+ return cls.STRING
159
+
160
+ @classmethod
161
+ def _infer_typed_value(cls, val: "TypedValue") -> "FieldType":
162
+ if val.type == cls.BLOCK:
163
+ return cls.BLOCK
164
+ if val.type == cls.RESOURCE_TYPE:
165
+ return cls.RESOURCE_TYPE
166
+ return cls.STRING
167
+
168
+ @classmethod
169
+ def _infer_from_str(cls, val: str) -> "FieldType":
170
+ if is_uuid(val):
171
+ return cls.UUID
172
+ if is_iso_date(val):
173
+ return cls.DATETIME
174
+ if is_bool_string(val):
175
+ return cls.BOOLEAN
176
+ if val.isdigit():
177
+ return cls.INTEGER
178
+ try:
179
+ float(val)
180
+ return cls.FLOAT
181
+ except ValueError:
182
+ return cls.STRING
183
+
184
+ @classmethod
185
+ def from_type_hint(cls, type_hint: object) -> "FieldType":
186
+ """Convert type hint to FieldType."""
187
+ _type_mapping = {
188
+ int: cls.INTEGER,
189
+ float: cls.FLOAT,
190
+ bool: cls.BOOLEAN,
191
+ str: cls.STRING,
192
+ datetime: cls.DATETIME,
193
+ UUID: cls.UUID,
194
+ }
195
+
196
+ if type_hint in _type_mapping:
197
+ return _type_mapping[type_hint] # type: ignore[index]
198
+
199
+ if get_origin(type_hint) is Annotated:
200
+ inner_type = get_args(type_hint)[0]
201
+ return cls.from_type_hint(inner_type)
202
+
203
+ origin, args = get_origin_and_args(type_hint)
204
+
205
+ if origin is list:
206
+ return cls._handle_list_type(args)
207
+
208
+ if origin is Literal:
209
+ return cls._handle_literal_type(args)
210
+
211
+ if is_optional_type(type_hint) or is_union_type(type_hint):
212
+ return cls._handle_union_type(args)
213
+
214
+ if isinstance(type_hint, type):
215
+ return cls._handle_class_type(type_hint)
216
+
217
+ return cls.STRING
218
+
219
+ @classmethod
220
+ def _handle_list_type(cls, args: tuple) -> "FieldType":
221
+ if args:
222
+ element_type = args[0]
223
+ return cls.from_type_hint(element_type)
224
+ return cls.STRING
225
+
226
+ @classmethod
227
+ def _handle_literal_type(cls, args: tuple) -> "FieldType":
228
+ if not args:
229
+ return cls.STRING
230
+ first_value = args[0]
231
+ if isinstance(first_value, bool):
232
+ return cls.BOOLEAN
233
+ if isinstance(first_value, int):
234
+ return cls.INTEGER
235
+ if isinstance(first_value, str):
236
+ return cls.STRING
237
+ if isinstance(first_value, float):
238
+ return cls.FLOAT
239
+ return cls.STRING
240
+
241
+ @classmethod
242
+ def _handle_union_type(cls, args: tuple) -> "FieldType":
243
+ non_none_types = list(filter_nonetype(args))
244
+ if non_none_types:
245
+ return cls.from_type_hint(non_none_types[0])
246
+ return cls.STRING
247
+
248
+ @classmethod
249
+ def _handle_class_type(cls, type_hint: type) -> "FieldType":
250
+ if issubclass(type_hint, IntEnum):
251
+ return cls.INTEGER
252
+ if issubclass(type_hint, Enum):
253
+ return cls.STRING
254
+
255
+ from orchestrator.domain.base import ProductBlockModel
256
+
257
+ if issubclass(type_hint, ProductBlockModel):
258
+ return cls.BLOCK
259
+
260
+ return cls.STRING
261
+
262
+ def is_embeddable(self, value: str | None) -> bool:
263
+ """Check if a field should be embedded."""
264
+ if value is None:
265
+ return False
266
+
267
+ # If inference suggests it's not actually a string, don't embed it
268
+ return FieldType._infer_from_str(value) == FieldType.STRING
269
+
270
+
271
+ @dataclass(frozen=True)
272
+ class TypedValue:
273
+ value: Any
274
+ type: FieldType
275
+
276
+
277
+ class ExtractedField(NamedTuple):
278
+ path: str
279
+ value: str
280
+ value_type: FieldType
281
+
282
+ @classmethod
283
+ def from_raw(cls, path: str, raw_value: Any) -> "ExtractedField":
284
+ value = str(raw_value.value if isinstance(raw_value, TypedValue) else raw_value)
285
+ value_type = FieldType.infer(raw_value)
286
+ return cls(path=path, value=value, value_type=value_type)
287
+
288
+
289
+ class IndexableRecord(TypedDict):
290
+ entity_id: str
291
+ entity_type: str
292
+ path: Ltree
293
+ value: Any
294
+ value_type: Any
295
+ content_hash: str
296
+ embedding: list[float] | None