orchestrator-core 4.5.1a1__py3-none-any.whl → 4.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. orchestrator/__init__.py +3 -12
  2. orchestrator/agentic_app.py +48 -29
  3. orchestrator/api/api_v1/api.py +8 -6
  4. orchestrator/api/api_v1/endpoints/processes.py +2 -0
  5. orchestrator/api/api_v1/endpoints/search.py +26 -7
  6. orchestrator/cli/main.py +2 -2
  7. orchestrator/cli/search/__init__.py +32 -0
  8. orchestrator/devtools/populator.py +16 -0
  9. orchestrator/domain/base.py +2 -7
  10. orchestrator/domain/lifecycle.py +24 -7
  11. orchestrator/llm_settings.py +9 -3
  12. orchestrator/log_config.py +1 -0
  13. orchestrator/migrations/helpers.py +7 -1
  14. orchestrator/schemas/search.py +13 -0
  15. orchestrator/schemas/workflow.py +1 -0
  16. orchestrator/search/agent/__init__.py +15 -2
  17. orchestrator/search/agent/agent.py +30 -15
  18. orchestrator/search/agent/prompts.py +75 -37
  19. orchestrator/search/agent/state.py +13 -0
  20. orchestrator/search/agent/tools.py +148 -11
  21. orchestrator/search/core/__init__.py +12 -0
  22. orchestrator/search/core/embedding.py +13 -4
  23. orchestrator/search/core/exceptions.py +14 -0
  24. orchestrator/search/core/types.py +15 -0
  25. orchestrator/search/core/validators.py +13 -0
  26. orchestrator/search/docs/running_local_text_embedding_inference.md +1 -0
  27. orchestrator/search/filters/__init__.py +13 -0
  28. orchestrator/search/filters/base.py +84 -61
  29. orchestrator/search/filters/date_filters.py +13 -0
  30. orchestrator/search/filters/definitions.py +16 -2
  31. orchestrator/search/filters/ltree_filters.py +16 -3
  32. orchestrator/search/filters/numeric_filter.py +13 -0
  33. orchestrator/search/indexing/__init__.py +13 -0
  34. orchestrator/search/indexing/indexer.py +14 -3
  35. orchestrator/search/indexing/registry.py +13 -0
  36. orchestrator/search/indexing/tasks.py +17 -1
  37. orchestrator/search/indexing/traverse.py +17 -5
  38. orchestrator/search/llm_migration.py +108 -0
  39. orchestrator/search/retrieval/__init__.py +13 -0
  40. orchestrator/search/retrieval/builder.py +23 -8
  41. orchestrator/search/retrieval/engine.py +36 -34
  42. orchestrator/search/retrieval/exceptions.py +90 -0
  43. orchestrator/search/retrieval/pagination.py +13 -0
  44. orchestrator/search/retrieval/retrievers/__init__.py +26 -0
  45. orchestrator/search/retrieval/retrievers/base.py +123 -0
  46. orchestrator/search/retrieval/retrievers/fuzzy.py +94 -0
  47. orchestrator/search/retrieval/retrievers/hybrid.py +277 -0
  48. orchestrator/search/retrieval/retrievers/semantic.py +94 -0
  49. orchestrator/search/retrieval/retrievers/structured.py +39 -0
  50. orchestrator/search/retrieval/utils.py +21 -7
  51. orchestrator/search/retrieval/validation.py +54 -76
  52. orchestrator/search/schemas/__init__.py +12 -0
  53. orchestrator/search/schemas/parameters.py +13 -0
  54. orchestrator/search/schemas/results.py +15 -1
  55. orchestrator/services/processes.py +2 -1
  56. orchestrator/settings.py +7 -0
  57. orchestrator/utils/state.py +6 -1
  58. orchestrator/workflows/steps.py +16 -1
  59. {orchestrator_core-4.5.1a1.dist-info → orchestrator_core-4.5.2.dist-info}/METADATA +13 -11
  60. {orchestrator_core-4.5.1a1.dist-info → orchestrator_core-4.5.2.dist-info}/RECORD +66 -59
  61. orchestrator/migrations/versions/schema/2025-08-12_52b37b5b2714_search_index_model_for_llm_integration.py +0 -95
  62. orchestrator/search/retrieval/retriever.py +0 -447
  63. /orchestrator/cli/{index_llm.py → search/index_llm.py} +0 -0
  64. /orchestrator/cli/{resize_embedding.py → search/resize_embedding.py} +0 -0
  65. /orchestrator/cli/{search_explore.py → search/search_explore.py} +0 -0
  66. /orchestrator/cli/{speedtest.py → search/speedtest.py} +0 -0
  67. {orchestrator_core-4.5.1a1.dist-info → orchestrator_core-4.5.2.dist-info}/WHEEL +0 -0
  68. {orchestrator_core-4.5.1a1.dist-info → orchestrator_core-4.5.2.dist-info}/licenses/LICENSE +0 -0
@@ -1,3 +1,16 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
1
14
  import json
2
15
  from textwrap import dedent
3
16
 
@@ -6,57 +19,82 @@ from pydantic_ai import RunContext
6
19
  from pydantic_ai.ag_ui import StateDeps
7
20
 
8
21
  from orchestrator.search.agent.state import SearchState
9
- from orchestrator.search.retrieval.validation import get_structured_filter_schema
10
22
 
11
23
  logger = structlog.get_logger(__name__)
12
24
 
13
25
 
14
26
  async def get_base_instructions() -> str:
27
+ return dedent(
28
+ """
29
+ You are an expert assistant designed to find relevant information by building and running database queries.
15
30
 
16
- try:
17
- schema_dict = get_structured_filter_schema()
18
- if schema_dict:
19
- schema_info = "\n".join([f" {path}: {field_type}" for path, field_type in schema_dict.items()])
20
- else:
21
- schema_info = " No filterable fields available"
22
- except Exception as e:
23
- logger.warning(f"Failed to load schema for prompt: {e}")
24
- schema_info = " Schema temporarily unavailable"
25
- logger.error(f"Generated schema for agent prompt:\n{schema_info}")
31
+ ---
32
+ ### 1. Your Goal and Method
26
33
 
27
- return dedent(
28
- f"""
29
- You are a helpful assistant for building and running database queries.
30
-
31
- **Available Data Schema:**
32
- Use the following schema to understand the available fields.
33
- When you build filters, each `path` MUST be a valid path from this schema,
34
- and the operator/value MUST match that path's type.
35
- ```
36
- {schema_info}
37
- ```
38
- **Workflow (do in order):**
39
- 1) `set_search_parameters` to define the main entity being searched.
40
- 2) Build a complete `FilterTree` (AND at root unless the user asks for OR).
41
- 3) `set_filter_tree(filters=<FilterTree or null>)`.
42
- 4) `execute_search()`.
43
- 5) Summarize the results for the user.
44
-
45
- """
34
+ Your ultimate goal is to **find information** that answers the user's request.
35
+
36
+ To do this, you will perform either a broad search or a filtered search.
37
+ For **filtered searches**, your primary method is to **construct a valid `FilterTree` object**.
38
+ To do this correctly, you must infer the exact structure, operators, and nesting rules from the Pydantic schema of the `set_filter_tree` tool itself.
39
+
40
+ ---
41
+ ### 2. Information-Gathering Tools
42
+
43
+ **If you determine that a `FilterTree` is needed**, use these tools to gather information first:
44
+
45
+ - **discover_filter_paths(field_names: list[str])**: Use this to discover all valid filter paths for a list of field names in a single call.
46
+ - **get_valid_operators()**: Use this to get the JSON map of all valid operators for each field type.
47
+
48
+ ---
49
+ ### 3. Execution Workflow
50
+
51
+ Follow these steps in strict order:
52
+
53
+ 1. **Set Context**: Always begin by calling `set_search_parameters`.
54
+ 2. **Analyze for Filters**: Based on the user's request, decide if specific filters are necessary.
55
+ - **If filters ARE required**, follow these sub-steps:
56
+ a. **Gather Intel**: Identify all needed field names, then call `discover_filter_paths` and `get_valid_operators` **once each** to get all required information.
57
+ b. **Construct FilterTree**: Build the `FilterTree` object.
58
+ c. **Set Filters**: Call `set_filter_tree`.
59
+ 3. **Execute**: Call `execute_search`. This is done for both filtered and non-filtered searches.
60
+ 4. **Report**: Answer the users' question directly and summarize when appropiate.
61
+
62
+ ---
63
+ ### 4. Critical Rules
64
+
65
+ - **NEVER GUESS PATHS IN THE DATABASE**: You *must* verify every filter path by calling `discover_filter_paths` first. If a path does not exist, you may attempt to map the question on an existing paths that are valid and available from `discover_filter_paths`. If you cannot infer a match, inform the user and do not include it in the `FilterTree`.
66
+ - **USE FULL PATHS**: Always use the full, unambiguous path returned by the discovery tool.
67
+ - **MATCH OPERATORS**: Only use operators that are compatible with the field type as confirmed by `get_filter_operators`.
68
+ """
46
69
  )
47
70
 
48
71
 
49
72
  async def get_dynamic_instructions(ctx: RunContext[StateDeps[SearchState]]) -> str:
50
- """Dynamically generate the system prompt for the agent."""
51
- param_state = json.dumps(ctx.deps.state.parameters, indent=2, default=str) if ctx.deps.state.parameters else "{}"
73
+ """Dynamically provides 'next step' coaching based on the current state."""
74
+ state = ctx.deps.state
75
+ param_state_str = json.dumps(state.parameters, indent=2, default=str) if state.parameters else "Not set."
52
76
 
77
+ next_step_guidance = ""
78
+ if not state.parameters or not state.parameters.get("entity_type"):
79
+ next_step_guidance = (
80
+ "INSTRUCTION: The search context is not set. Your next action is to call `set_search_parameters`."
81
+ )
82
+ else:
83
+ next_step_guidance = (
84
+ "INSTRUCTION: Context is set. Now, analyze the user's request. "
85
+ "If specific filters ARE required, use the information-gathering tools to build a `FilterTree` and call `set_filter_tree`. "
86
+ "If no specific filters are needed, you can proceed directly to `execute_search`."
87
+ )
53
88
  return dedent(
54
89
  f"""
55
- Current search parameters state:
56
- {param_state}
90
+ ---
91
+ ### Current State & Next Action
92
+
93
+ **Current Search Parameters:**
94
+ ```json
95
+ {param_state_str}
96
+ ```
57
97
 
58
- Remember:
59
- - If filters are missing or incomplete, construct a full FilterTree and call `set_filter_tree`.
60
- - Then call `execute_search`.
98
+ **{next_step_guidance}**
61
99
  """
62
100
  )
@@ -1,3 +1,16 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
1
14
  from typing import Any
2
15
 
3
16
  from pydantic import BaseModel, Field
@@ -1,3 +1,16 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
1
14
  from collections.abc import Awaitable, Callable
2
15
  from typing import Any, TypeVar
3
16
 
@@ -10,20 +23,25 @@ from pydantic_ai.messages import ModelRequest, UserPromptPart
10
23
  from pydantic_ai.toolsets import FunctionToolset
11
24
 
12
25
  from orchestrator.api.api_v1.endpoints.search import (
26
+ get_definitions,
27
+ list_paths,
13
28
  search_processes,
14
29
  search_products,
15
30
  search_subscriptions,
16
31
  search_workflows,
17
32
  )
18
33
  from orchestrator.schemas.search import SearchResultsSchema
19
- from orchestrator.search.core.types import ActionType, EntityType
34
+ from orchestrator.search.core.types import ActionType, EntityType, FilterOp
20
35
  from orchestrator.search.filters import FilterTree
36
+ from orchestrator.search.retrieval.exceptions import FilterValidationError, PathNotFoundError
21
37
  from orchestrator.search.retrieval.validation import validate_filter_tree
22
38
  from orchestrator.search.schemas.parameters import PARAMETER_REGISTRY, BaseSearchParameters
23
39
 
24
40
  from .state import SearchState
25
41
 
26
42
  logger = structlog.get_logger(__name__)
43
+
44
+
27
45
  P = TypeVar("P", bound=BaseSearchParameters)
28
46
 
29
47
  SearchFn = Callable[[P], Awaitable[SearchResultsSchema[Any]]]
@@ -53,13 +71,26 @@ async def set_search_parameters(
53
71
  entity_type: EntityType,
54
72
  action: str | ActionType = ActionType.SELECT,
55
73
  ) -> StateSnapshotEvent:
74
+ """Sets the initial search context, like the entity type and the user's query.
75
+
76
+ This MUST be the first tool called to start any new search.
77
+ Warning: Calling this tool will erase any existing filters and search results from the state.
78
+ """
56
79
  params = ctx.deps.state.parameters or {}
57
80
  is_new_search = params.get("entity_type") != entity_type.value
58
81
  final_query = (last_user_message(ctx) or "") if is_new_search else params.get("query", "")
59
82
 
83
+ logger.debug(
84
+ "Setting search parameters",
85
+ entity_type=entity_type.value,
86
+ action=action,
87
+ is_new_search=is_new_search,
88
+ query=final_query,
89
+ )
90
+
60
91
  ctx.deps.state.parameters = {"action": action, "entity_type": entity_type, "filters": None, "query": final_query}
61
92
  ctx.deps.state.results = []
62
- logger.info(f"Set search parameters: entity_type={entity_type}, action={action}")
93
+ logger.debug("Search parameters set", parameters=ctx.deps.state.parameters)
63
94
 
64
95
  return StateSnapshotEvent(
65
96
  type=EventType.STATE_SNAPSHOT,
@@ -84,23 +115,35 @@ async def set_filter_tree(
84
115
 
85
116
  entity_type = EntityType(ctx.deps.state.parameters["entity_type"])
86
117
 
118
+ logger.debug(
119
+ "Setting filter tree",
120
+ entity_type=entity_type.value,
121
+ has_filters=filters is not None,
122
+ filter_summary=f"{len(filters.get_all_leaves())} filters" if filters else "no filters",
123
+ )
124
+
87
125
  try:
88
126
  await validate_filter_tree(filters, entity_type)
89
- except Exception as e:
127
+ except PathNotFoundError as e:
128
+ logger.debug(f"{PathNotFoundError.__name__}: {str(e)}")
129
+ raise ModelRetry(f"{str(e)} Use discover_filter_paths tool to find valid paths.")
130
+ except FilterValidationError as e:
131
+ # ModelRetry will trigger an agent retry, containing the specific validation error.
132
+ logger.debug(f"Filter validation failed: {str(e)}")
90
133
  raise ModelRetry(str(e))
134
+ except Exception as e:
135
+ logger.error("Unexpected Filter validation exception", error=str(e))
136
+ raise ModelRetry(f"Filter validation failed: {str(e)}. Please check your filter structure and try again.")
91
137
 
92
- ctx.deps.state.parameters["filters"] = None if filters is None else filters.model_dump(mode="json", by_alias=True)
93
- logger.info(
94
- "Set filter tree",
95
- filters=None if filters is None else filters.model_dump(mode="json", by_alias=True),
96
- )
138
+ filter_data = None if filters is None else filters.model_dump(mode="json", by_alias=True)
139
+ ctx.deps.state.parameters["filters"] = filter_data
97
140
  return StateSnapshotEvent(type=EventType.STATE_SNAPSHOT, snapshot=ctx.deps.state.model_dump())
98
141
 
99
142
 
100
143
  @search_toolset.tool
101
144
  async def execute_search(
102
145
  ctx: RunContext[StateDeps[SearchState]],
103
- limit: int = 5,
146
+ limit: int = 10,
104
147
  ) -> StateSnapshotEvent:
105
148
  """Execute the search with the current parameters."""
106
149
  if not ctx.deps.state.parameters:
@@ -112,10 +155,104 @@ async def execute_search(
112
155
  raise ValueError(f"Unknown entity type: {entity_type}")
113
156
 
114
157
  params = param_class(**ctx.deps.state.parameters)
115
- logger.info("Executing database search", **params.model_dump(mode="json"))
158
+ logger.debug(
159
+ "Executing database search",
160
+ search_entity_type=entity_type.value,
161
+ limit=limit,
162
+ has_filters=params.filters is not None,
163
+ query=params.query,
164
+ action=params.action,
165
+ )
166
+
167
+ if params.filters:
168
+ logger.debug("Search filters", filters=params.filters)
169
+
170
+ params.limit = limit
116
171
 
117
172
  fn = SEARCH_FN_MAP[entity_type]
118
173
  search_results = await fn(params)
119
- ctx.deps.state.results = search_results.data[:limit]
174
+
175
+ logger.debug(
176
+ "Search completed",
177
+ total_results=len(search_results.data) if search_results.data else 0,
178
+ )
179
+
180
+ ctx.deps.state.results = search_results.data
120
181
 
121
182
  return StateSnapshotEvent(type=EventType.STATE_SNAPSHOT, snapshot=ctx.deps.state.model_dump())
183
+
184
+
185
+ @search_toolset.tool
186
+ async def discover_filter_paths(
187
+ ctx: RunContext[StateDeps[SearchState]],
188
+ field_names: list[str],
189
+ entity_type: EntityType | None = None,
190
+ ) -> dict[str, dict[str, Any]]:
191
+ """Discovers available filter paths for a list of field names.
192
+
193
+ Returns a dictionary where each key is a field_name from the input list and
194
+ the value is its discovery result.
195
+ """
196
+ if not entity_type and ctx.deps.state.parameters:
197
+ entity_type = EntityType(ctx.deps.state.parameters.get("entity_type"))
198
+ if not entity_type:
199
+ entity_type = EntityType.SUBSCRIPTION
200
+
201
+ all_results = {}
202
+ for field_name in field_names:
203
+ paths_response = await list_paths(prefix="", q=field_name, entity_type=entity_type, limit=100)
204
+
205
+ matching_leaves = []
206
+ for leaf in paths_response.leaves:
207
+ if field_name.lower() in leaf.name.lower():
208
+ matching_leaves.append(
209
+ {
210
+ "name": leaf.name,
211
+ "value_kind": leaf.ui_types,
212
+ "paths": leaf.paths,
213
+ }
214
+ )
215
+
216
+ matching_components = []
217
+ for comp in paths_response.components:
218
+ if field_name.lower() in comp.name.lower():
219
+ matching_components.append(
220
+ {
221
+ "name": comp.name,
222
+ "value_kind": comp.ui_types,
223
+ }
224
+ )
225
+
226
+ result_for_field: dict[str, Any]
227
+ if not matching_leaves and not matching_components:
228
+ result_for_field = {
229
+ "status": "NOT_FOUND",
230
+ "guidance": f"No filterable paths found containing '{field_name}'. Do not create a filter for this.",
231
+ "leaves": [],
232
+ "components": [],
233
+ }
234
+ else:
235
+ result_for_field = {
236
+ "status": "OK",
237
+ "guidance": f"Found {len(matching_leaves)} field(s) and {len(matching_components)} component(s) for '{field_name}'.",
238
+ "leaves": matching_leaves,
239
+ "components": matching_components,
240
+ }
241
+
242
+ all_results[field_name] = result_for_field
243
+ logger.debug("Returning found fieldname - path mapping", all_results=all_results)
244
+ return all_results
245
+
246
+
247
+ @search_toolset.tool
248
+ async def get_valid_operators() -> dict[str, list[FilterOp]]:
249
+ """Gets the mapping of field types to their valid filter operators."""
250
+ definitions = await get_definitions()
251
+
252
+ operator_map = {}
253
+ for ui_type, type_def in definitions.items():
254
+ key = ui_type.value
255
+
256
+ if hasattr(type_def, "operators"):
257
+ operator_map[key] = type_def.operators
258
+ return operator_map
@@ -0,0 +1,12 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
@@ -1,4 +1,16 @@
1
- import logging
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
2
14
 
3
15
  import structlog
4
16
  from litellm import aembedding as llm_aembedding
@@ -9,9 +21,6 @@ from orchestrator.llm_settings import llm_settings
9
21
 
10
22
  logger = structlog.get_logger(__name__)
11
23
 
12
- # Its logging alot of noise such as embedding vectors.
13
- logging.getLogger("LiteLLM").setLevel(logging.WARNING)
14
-
15
24
 
16
25
  class EmbeddingIndexer:
17
26
 
@@ -1,3 +1,17 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+
1
15
  class SearchUtilsError(Exception):
2
16
  """Base exception for this module."""
3
17
 
@@ -1,3 +1,16 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
1
14
  from dataclasses import dataclass
2
15
  from datetime import date, datetime
3
16
  from enum import Enum, IntEnum
@@ -14,6 +27,8 @@ from .validators import is_bool_string, is_iso_date, is_uuid
14
27
 
15
28
  SQLAColumn: TypeAlias = ColumnElement[Any] | InstrumentedAttribute[Any]
16
29
 
30
+ LTREE_SEPARATOR = "."
31
+
17
32
 
18
33
  @dataclass
19
34
  class SearchMetadata:
@@ -1,3 +1,16 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
1
14
  import uuid
2
15
 
3
16
  from dateutil.parser import isoparse
@@ -18,6 +18,7 @@ Point your backend to the local endpoint and declare the new vector size:
18
18
  ```env
19
19
  OPENAI_BASE_URL=http://localhost:8080/v1
20
20
  EMBEDDING_DIMENSION=384
21
+ EMBEDDING_MAX_BATCH_SIZE=32 # Not required when using OpenAI embeddings
21
22
  ```
22
23
 
23
24
  Depending on the model, you might want to change the `EMBEDDING_FALLBACK_MAX_TOKENS` and `EMBEDDING_MAX_BATCH_SIZE` settings, which are set conservatively and according to the requirements of the setup used in this example.
@@ -1,3 +1,16 @@
1
+ # Copyright 2019-2025 SURF, GÉANT.
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
1
14
  from .base import (
2
15
  EqualityFilter,
3
16
  FilterCondition,