orchestrator-core 4.6.3__py3-none-any.whl → 4.6.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orchestrator/__init__.py +1 -1
- orchestrator/cli/search/index_llm.py +8 -0
- orchestrator/search/filters/date_filters.py +4 -5
- orchestrator/search/indexing/indexer.py +43 -7
- orchestrator/search/indexing/registry.py +16 -1
- orchestrator/search/indexing/tasks.py +22 -1
- orchestrator/search/indexing/traverse.py +30 -8
- orchestrator/search/query/results.py +60 -2
- orchestrator/search/retrieval/retrievers/__init__.py +2 -0
- orchestrator/search/retrieval/retrievers/base.py +8 -1
- orchestrator/search/retrieval/retrievers/process.py +225 -0
- {orchestrator_core-4.6.3.dist-info → orchestrator_core-4.6.4.dist-info}/METADATA +4 -4
- {orchestrator_core-4.6.3.dist-info → orchestrator_core-4.6.4.dist-info}/RECORD +15 -14
- {orchestrator_core-4.6.3.dist-info → orchestrator_core-4.6.4.dist-info}/WHEEL +0 -0
- {orchestrator_core-4.6.3.dist-info → orchestrator_core-4.6.4.dist-info}/licenses/LICENSE +0 -0
orchestrator/__init__.py
CHANGED
|
@@ -14,6 +14,7 @@ def subscriptions_command(
|
|
|
14
14
|
subscription_id: str | None = typer.Option(None, help="UUID (default = all)"),
|
|
15
15
|
dry_run: bool = typer.Option(False, help="No DB writes"),
|
|
16
16
|
force_index: bool = typer.Option(False, help="Force re-index (ignore hash cache)"),
|
|
17
|
+
show_progress: bool = typer.Option(False, help="Show per-entity progress"),
|
|
17
18
|
) -> None:
|
|
18
19
|
"""Index subscription_search_index."""
|
|
19
20
|
run_indexing_for_entity(
|
|
@@ -21,6 +22,7 @@ def subscriptions_command(
|
|
|
21
22
|
entity_id=subscription_id,
|
|
22
23
|
dry_run=dry_run,
|
|
23
24
|
force_index=force_index,
|
|
25
|
+
show_progress=show_progress,
|
|
24
26
|
)
|
|
25
27
|
|
|
26
28
|
|
|
@@ -29,6 +31,7 @@ def products_command(
|
|
|
29
31
|
product_id: str | None = typer.Option(None, help="UUID (default = all)"),
|
|
30
32
|
dry_run: bool = typer.Option(False, help="No DB writes"),
|
|
31
33
|
force_index: bool = typer.Option(False, help="Force re-index (ignore hash cache)"),
|
|
34
|
+
show_progress: bool = typer.Option(False, help="Show per-entity progress"),
|
|
32
35
|
) -> None:
|
|
33
36
|
"""Index product_search_index."""
|
|
34
37
|
run_indexing_for_entity(
|
|
@@ -36,6 +39,7 @@ def products_command(
|
|
|
36
39
|
entity_id=product_id,
|
|
37
40
|
dry_run=dry_run,
|
|
38
41
|
force_index=force_index,
|
|
42
|
+
show_progress=show_progress,
|
|
39
43
|
)
|
|
40
44
|
|
|
41
45
|
|
|
@@ -44,6 +48,7 @@ def processes_command(
|
|
|
44
48
|
process_id: str | None = typer.Option(None, help="UUID (default = all)"),
|
|
45
49
|
dry_run: bool = typer.Option(False, help="No DB writes"),
|
|
46
50
|
force_index: bool = typer.Option(False, help="Force re-index (ignore hash cache)"),
|
|
51
|
+
show_progress: bool = typer.Option(False, help="Show per-entity progress"),
|
|
47
52
|
) -> None:
|
|
48
53
|
"""Index process_search_index."""
|
|
49
54
|
run_indexing_for_entity(
|
|
@@ -51,6 +56,7 @@ def processes_command(
|
|
|
51
56
|
entity_id=process_id,
|
|
52
57
|
dry_run=dry_run,
|
|
53
58
|
force_index=force_index,
|
|
59
|
+
show_progress=show_progress,
|
|
54
60
|
)
|
|
55
61
|
|
|
56
62
|
|
|
@@ -59,6 +65,7 @@ def workflows_command(
|
|
|
59
65
|
workflow_id: str | None = typer.Option(None, help="UUID (default = all)"),
|
|
60
66
|
dry_run: bool = typer.Option(False, help="No DB writes"),
|
|
61
67
|
force_index: bool = typer.Option(False, help="Force re-index (ignore hash cache)"),
|
|
68
|
+
show_progress: bool = typer.Option(False, help="Show per-entity progress"),
|
|
62
69
|
) -> None:
|
|
63
70
|
"""Index workflow_search_index."""
|
|
64
71
|
run_indexing_for_entity(
|
|
@@ -66,6 +73,7 @@ def workflows_command(
|
|
|
66
73
|
entity_id=workflow_id,
|
|
67
74
|
dry_run=dry_run,
|
|
68
75
|
force_index=force_index,
|
|
76
|
+
show_progress=show_progress,
|
|
69
77
|
)
|
|
70
78
|
|
|
71
79
|
|
|
@@ -14,7 +14,6 @@
|
|
|
14
14
|
from datetime import date, datetime
|
|
15
15
|
from typing import Annotated, Any, Literal
|
|
16
16
|
|
|
17
|
-
from dateutil.parser import parse as dt_parse
|
|
18
17
|
from pydantic import BaseModel, BeforeValidator, Field, model_validator
|
|
19
18
|
from sqlalchemy import TIMESTAMP, and_
|
|
20
19
|
from sqlalchemy import cast as sa_cast
|
|
@@ -27,10 +26,10 @@ def _validate_date_string(v: Any) -> Any:
|
|
|
27
26
|
if not isinstance(v, str):
|
|
28
27
|
return v
|
|
29
28
|
try:
|
|
30
|
-
|
|
29
|
+
datetime.fromisoformat(v)
|
|
31
30
|
return v
|
|
32
31
|
except Exception as exc:
|
|
33
|
-
raise ValueError("is not a valid date or datetime string") from exc
|
|
32
|
+
raise ValueError("is not a valid ISO-8601 date or datetime string") from exc
|
|
34
33
|
|
|
35
34
|
|
|
36
35
|
DateValue = datetime | date | str
|
|
@@ -44,8 +43,8 @@ class DateRange(BaseModel):
|
|
|
44
43
|
|
|
45
44
|
@model_validator(mode="after")
|
|
46
45
|
def _order(self) -> "DateRange":
|
|
47
|
-
to_datetime =
|
|
48
|
-
from_datetime =
|
|
46
|
+
to_datetime = datetime.fromisoformat(str(self.end))
|
|
47
|
+
from_datetime = datetime.fromisoformat(str(self.start))
|
|
49
48
|
if to_datetime <= from_datetime:
|
|
50
49
|
raise ValueError("'to' must be after 'from'")
|
|
51
50
|
return self
|
|
@@ -45,6 +45,23 @@ def _maybe_begin(session: Session | None) -> Iterator[None]:
|
|
|
45
45
|
yield
|
|
46
46
|
|
|
47
47
|
|
|
48
|
+
@contextmanager
|
|
49
|
+
def _maybe_progress(show_progress: bool, total_count: int | None, label: str) -> Iterator[Any]:
|
|
50
|
+
"""Context manager that optionally creates a progress bar."""
|
|
51
|
+
if show_progress:
|
|
52
|
+
import typer
|
|
53
|
+
|
|
54
|
+
with typer.progressbar(
|
|
55
|
+
length=total_count,
|
|
56
|
+
label=label,
|
|
57
|
+
show_eta=True,
|
|
58
|
+
show_percent=bool(total_count),
|
|
59
|
+
) as progress:
|
|
60
|
+
yield progress
|
|
61
|
+
else:
|
|
62
|
+
yield None
|
|
63
|
+
|
|
64
|
+
|
|
48
65
|
class Indexer:
|
|
49
66
|
"""Index entities into `AiSearchIndex` using streaming reads and batched writes.
|
|
50
67
|
|
|
@@ -89,11 +106,21 @@ class Indexer:
|
|
|
89
106
|
8) Repeat until the stream is exhausted.
|
|
90
107
|
"""
|
|
91
108
|
|
|
92
|
-
def __init__(
|
|
109
|
+
def __init__(
|
|
110
|
+
self,
|
|
111
|
+
config: EntityConfig,
|
|
112
|
+
dry_run: bool,
|
|
113
|
+
force_index: bool,
|
|
114
|
+
chunk_size: int = 1000,
|
|
115
|
+
show_progress: bool = False,
|
|
116
|
+
total_count: int | None = None,
|
|
117
|
+
) -> None:
|
|
93
118
|
self.config = config
|
|
94
119
|
self.dry_run = dry_run
|
|
95
120
|
self.force_index = force_index
|
|
96
121
|
self.chunk_size = chunk_size
|
|
122
|
+
self.show_progress = show_progress
|
|
123
|
+
self.total_count = total_count
|
|
97
124
|
self.embedding_model = llm_settings.EMBEDDING_MODEL
|
|
98
125
|
self.logger = logger.bind(entity_kind=config.entity_kind.value)
|
|
99
126
|
self._entity_titles: dict[str, str] = {}
|
|
@@ -116,13 +143,22 @@ class Indexer:
|
|
|
116
143
|
|
|
117
144
|
with write_scope as database:
|
|
118
145
|
session: Session | None = getattr(database, "session", None)
|
|
119
|
-
for entity in entities:
|
|
120
|
-
chunk.append(entity)
|
|
121
|
-
if len(chunk) >= self.chunk_size:
|
|
122
|
-
flush()
|
|
123
146
|
|
|
124
|
-
|
|
125
|
-
|
|
147
|
+
with _maybe_progress(
|
|
148
|
+
self.show_progress, self.total_count, f"Indexing {self.config.entity_kind.value}"
|
|
149
|
+
) as progress:
|
|
150
|
+
for entity in entities:
|
|
151
|
+
chunk.append(entity)
|
|
152
|
+
|
|
153
|
+
if len(chunk) >= self.chunk_size:
|
|
154
|
+
flush()
|
|
155
|
+
if progress:
|
|
156
|
+
progress.update(self.chunk_size)
|
|
157
|
+
|
|
158
|
+
if chunk:
|
|
159
|
+
flush()
|
|
160
|
+
if progress:
|
|
161
|
+
progress.update(len(chunk))
|
|
126
162
|
|
|
127
163
|
final_log_message = (
|
|
128
164
|
f"processed {total_records_processed} records and skipped {total_identical_records} identical records."
|
|
@@ -66,6 +66,21 @@ class EntityConfig(Generic[ModelT]):
|
|
|
66
66
|
return "UNKNOWN"
|
|
67
67
|
|
|
68
68
|
|
|
69
|
+
@dataclass(frozen=True)
|
|
70
|
+
class ProcessConfig(EntityConfig[ProcessTable]):
|
|
71
|
+
"""Processes need to eager load workflow for workflow_name field."""
|
|
72
|
+
|
|
73
|
+
def get_all_query(self, entity_id: str | None = None) -> Query | Select:
|
|
74
|
+
from sqlalchemy.orm import selectinload
|
|
75
|
+
|
|
76
|
+
# Only load workflow, not subscriptions (keeps it lightweight)
|
|
77
|
+
query = self.table.query.options(selectinload(ProcessTable.workflow))
|
|
78
|
+
if entity_id:
|
|
79
|
+
pk_column = getattr(self.table, self.pk_name)
|
|
80
|
+
query = query.filter(pk_column == UUID(entity_id))
|
|
81
|
+
return query
|
|
82
|
+
|
|
83
|
+
|
|
69
84
|
@dataclass(frozen=True)
|
|
70
85
|
class WorkflowConfig(EntityConfig[WorkflowTable]):
|
|
71
86
|
"""Workflows have a custom select() function that filters out deleted workflows."""
|
|
@@ -95,7 +110,7 @@ ENTITY_CONFIG_REGISTRY: dict[EntityType, EntityConfig] = {
|
|
|
95
110
|
root_name="product",
|
|
96
111
|
title_paths=["product.description", "product.name"],
|
|
97
112
|
),
|
|
98
|
-
EntityType.PROCESS:
|
|
113
|
+
EntityType.PROCESS: ProcessConfig(
|
|
99
114
|
entity_kind=EntityType.PROCESS,
|
|
100
115
|
table=ProcessTable,
|
|
101
116
|
traverser=ProcessTraverser,
|
|
@@ -11,7 +11,10 @@
|
|
|
11
11
|
# See the License for the specific language governing permissions and
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
14
16
|
import structlog
|
|
17
|
+
from sqlalchemy import func, select
|
|
15
18
|
from sqlalchemy.orm import Query
|
|
16
19
|
|
|
17
20
|
from orchestrator.db import db
|
|
@@ -23,12 +26,20 @@ from orchestrator.search.indexing.registry import ENTITY_CONFIG_REGISTRY
|
|
|
23
26
|
logger = structlog.get_logger(__name__)
|
|
24
27
|
|
|
25
28
|
|
|
29
|
+
def _get_entity_count(stmt: Any) -> int | None:
|
|
30
|
+
"""Get total count of entities from a select statement."""
|
|
31
|
+
|
|
32
|
+
count_stmt = select(func.count()).select_from(stmt.subquery())
|
|
33
|
+
return db.session.execute(count_stmt).scalar()
|
|
34
|
+
|
|
35
|
+
|
|
26
36
|
def run_indexing_for_entity(
|
|
27
37
|
entity_kind: EntityType,
|
|
28
38
|
entity_id: str | None = None,
|
|
29
39
|
dry_run: bool = False,
|
|
30
40
|
force_index: bool = False,
|
|
31
41
|
chunk_size: int = 1000,
|
|
42
|
+
show_progress: bool = False,
|
|
32
43
|
) -> None:
|
|
33
44
|
"""Stream and index entities for the given kind.
|
|
34
45
|
|
|
@@ -46,6 +57,7 @@ def run_indexing_for_entity(
|
|
|
46
57
|
existing hashes.
|
|
47
58
|
chunk_size (int): Number of rows fetched per round-trip and passed to
|
|
48
59
|
the indexer per batch.
|
|
60
|
+
show_progress (bool): When True, logs progress for each processed entity.
|
|
49
61
|
|
|
50
62
|
Returns:
|
|
51
63
|
None
|
|
@@ -60,10 +72,19 @@ def run_indexing_for_entity(
|
|
|
60
72
|
else:
|
|
61
73
|
stmt = q
|
|
62
74
|
|
|
75
|
+
total_count = _get_entity_count(stmt) if show_progress else None
|
|
76
|
+
|
|
63
77
|
stmt = stmt.execution_options(stream_results=True, yield_per=chunk_size)
|
|
64
78
|
entities = db.session.execute(stmt).scalars()
|
|
65
79
|
|
|
66
|
-
indexer = Indexer(
|
|
80
|
+
indexer = Indexer(
|
|
81
|
+
config=config,
|
|
82
|
+
dry_run=dry_run,
|
|
83
|
+
force_index=force_index,
|
|
84
|
+
chunk_size=chunk_size,
|
|
85
|
+
show_progress=show_progress,
|
|
86
|
+
total_count=total_count,
|
|
87
|
+
)
|
|
67
88
|
|
|
68
89
|
with cache_subscription_models():
|
|
69
90
|
indexer.run(entities)
|
|
@@ -29,7 +29,7 @@ from orchestrator.domain.base import ProductBlockModel, ProductModel
|
|
|
29
29
|
from orchestrator.domain.lifecycle import (
|
|
30
30
|
lookup_specialized_type,
|
|
31
31
|
)
|
|
32
|
-
from orchestrator.schemas.process import
|
|
32
|
+
from orchestrator.schemas.process import ProcessBaseSchema
|
|
33
33
|
from orchestrator.schemas.workflow import WorkflowSchema
|
|
34
34
|
from orchestrator.search.core.exceptions import ModelLoadError, ProductNotInRegistryError
|
|
35
35
|
from orchestrator.search.core.types import LTREE_SEPARATOR, ExtractedField, FieldType
|
|
@@ -307,17 +307,39 @@ class ProductTraverser(BaseTraverser):
|
|
|
307
307
|
|
|
308
308
|
|
|
309
309
|
class ProcessTraverser(BaseTraverser):
|
|
310
|
-
"""Traverser for process entities using
|
|
310
|
+
"""Traverser for process entities using ProcessBaseSchema.
|
|
311
311
|
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
- Related workflow information beyond workflow_name
|
|
312
|
+
Only indexes top-level process fields (no subscriptions or steps)
|
|
313
|
+
to keep the index size manageable.
|
|
315
314
|
"""
|
|
316
315
|
|
|
316
|
+
EXCLUDED_FIELDS = {"traceback", "failed_reason"}
|
|
317
|
+
|
|
318
|
+
@classmethod
|
|
319
|
+
def _load_model(cls, entity: ProcessTable) -> ProcessBaseSchema | None:
|
|
320
|
+
return cls._load_model_with_schema(entity, ProcessBaseSchema, "process_id")
|
|
321
|
+
|
|
317
322
|
@classmethod
|
|
318
|
-
def
|
|
319
|
-
"""
|
|
320
|
-
|
|
323
|
+
def get_fields(cls, entity: ProcessTable, pk_name: str, root_name: str) -> list[ExtractedField]: # type: ignore[override]
|
|
324
|
+
"""Extract fields from process, excluding fields in EXCLUDED_FIELDS."""
|
|
325
|
+
try:
|
|
326
|
+
model = cls._load_model(entity)
|
|
327
|
+
if model is None:
|
|
328
|
+
return []
|
|
329
|
+
|
|
330
|
+
return sorted(
|
|
331
|
+
(
|
|
332
|
+
field
|
|
333
|
+
for field in cls.traverse(model, root_name)
|
|
334
|
+
if field.path.split(LTREE_SEPARATOR)[-1] not in cls.EXCLUDED_FIELDS
|
|
335
|
+
),
|
|
336
|
+
key=lambda f: f.path,
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
except (ProductNotInRegistryError, ModelLoadError) as e:
|
|
340
|
+
entity_id = getattr(entity, pk_name, "unknown")
|
|
341
|
+
logger.error(f"Failed to extract fields from {entity.__class__.__name__}", id=str(entity_id), error=str(e))
|
|
342
|
+
return []
|
|
321
343
|
|
|
322
344
|
|
|
323
345
|
class WorkflowTraverser(BaseTraverser):
|
|
@@ -139,6 +139,63 @@ def format_aggregation_response(
|
|
|
139
139
|
)
|
|
140
140
|
|
|
141
141
|
|
|
142
|
+
def truncate_text_with_highlights(
|
|
143
|
+
text: str, highlight_indices: list[tuple[int, int]] | None = None, max_length: int = 500, context_chars: int = 100
|
|
144
|
+
) -> tuple[str, list[tuple[int, int]] | None]:
|
|
145
|
+
"""Truncate text to max_length while preserving context around the first highlight.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
text: The text to truncate
|
|
149
|
+
highlight_indices: List of (start, end) tuples indicating highlight positions, or None
|
|
150
|
+
max_length: Maximum length of the returned text
|
|
151
|
+
context_chars: Number of characters to show before and after the first highlight
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
Tuple of (truncated_text, adjusted_highlight_indices)
|
|
155
|
+
"""
|
|
156
|
+
# If text is short enough, return as-is
|
|
157
|
+
if len(text) <= max_length:
|
|
158
|
+
return text, highlight_indices
|
|
159
|
+
|
|
160
|
+
# If no highlights, truncate from beginning
|
|
161
|
+
if not highlight_indices:
|
|
162
|
+
truncated_text = text[:max_length]
|
|
163
|
+
suffix = "..." if len(text) > max_length else ""
|
|
164
|
+
return truncated_text + suffix, None
|
|
165
|
+
|
|
166
|
+
# Use first highlight to determine what to show
|
|
167
|
+
first_highlight_start = highlight_indices[0][0]
|
|
168
|
+
|
|
169
|
+
# Calculate start position: try to center around first highlight
|
|
170
|
+
start = max(0, first_highlight_start - context_chars)
|
|
171
|
+
end = min(len(text), start + max_length)
|
|
172
|
+
|
|
173
|
+
# Adjust start if we hit the end boundary
|
|
174
|
+
if end == len(text) and (end - start) < max_length:
|
|
175
|
+
start = max(0, end - max_length)
|
|
176
|
+
|
|
177
|
+
truncated_text = text[start:end]
|
|
178
|
+
|
|
179
|
+
# Add ellipsis to indicate truncation
|
|
180
|
+
truncated_from_start = start > 0
|
|
181
|
+
truncated_from_end = end < len(text)
|
|
182
|
+
|
|
183
|
+
if truncated_from_start:
|
|
184
|
+
truncated_text = "..." + truncated_text
|
|
185
|
+
if truncated_from_end:
|
|
186
|
+
truncated_text = truncated_text + "..."
|
|
187
|
+
|
|
188
|
+
# Adjust highlight indices to be relative to truncated text
|
|
189
|
+
offset = start - (3 if truncated_from_start else 0) # Account for leading "..."
|
|
190
|
+
adjusted_indices = []
|
|
191
|
+
for hl_start, hl_end in highlight_indices:
|
|
192
|
+
# Only include highlights that are within the truncated range
|
|
193
|
+
if hl_start >= start and hl_end <= end:
|
|
194
|
+
adjusted_indices.append((hl_start - offset, hl_end - offset))
|
|
195
|
+
|
|
196
|
+
return truncated_text, adjusted_indices if adjusted_indices else None
|
|
197
|
+
|
|
198
|
+
|
|
142
199
|
def generate_highlight_indices(text: str, term: str) -> list[tuple[int, int]]:
|
|
143
200
|
"""Finds all occurrences of individual words from the term, including both word boundary and substring matches."""
|
|
144
201
|
import re
|
|
@@ -201,8 +258,9 @@ def format_search_response(
|
|
|
201
258
|
if not isinstance(path, str):
|
|
202
259
|
path = str(path)
|
|
203
260
|
|
|
204
|
-
highlight_indices = generate_highlight_indices(text, user_query)
|
|
205
|
-
|
|
261
|
+
highlight_indices = generate_highlight_indices(text, user_query)
|
|
262
|
+
truncated_text, adjusted_indices = truncate_text_with_highlights(text, highlight_indices)
|
|
263
|
+
matching_field = MatchingField(text=truncated_text, path=path, highlight_indices=adjusted_indices)
|
|
206
264
|
|
|
207
265
|
elif not user_query and query.filters and metadata.search_type == "structured":
|
|
208
266
|
# Structured search (filter-only)
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
from .base import Retriever
|
|
15
15
|
from .fuzzy import FuzzyRetriever
|
|
16
16
|
from .hybrid import RrfHybridRetriever
|
|
17
|
+
from .process import ProcessHybridRetriever
|
|
17
18
|
from .semantic import SemanticRetriever
|
|
18
19
|
from .structured import StructuredRetriever
|
|
19
20
|
|
|
@@ -21,6 +22,7 @@ __all__ = [
|
|
|
21
22
|
"Retriever",
|
|
22
23
|
"FuzzyRetriever",
|
|
23
24
|
"RrfHybridRetriever",
|
|
25
|
+
"ProcessHybridRetriever",
|
|
24
26
|
"SemanticRetriever",
|
|
25
27
|
"StructuredRetriever",
|
|
26
28
|
]
|
|
@@ -17,7 +17,7 @@ from decimal import Decimal
|
|
|
17
17
|
import structlog
|
|
18
18
|
from sqlalchemy import BindParameter, Numeric, Select, literal
|
|
19
19
|
|
|
20
|
-
from orchestrator.search.core.types import FieldType, SearchMetadata
|
|
20
|
+
from orchestrator.search.core.types import EntityType, FieldType, SearchMetadata
|
|
21
21
|
from orchestrator.search.query.queries import ExportQuery, SelectQuery
|
|
22
22
|
|
|
23
23
|
from ..pagination import PageCursor
|
|
@@ -63,12 +63,15 @@ class Retriever(ABC):
|
|
|
63
63
|
Returns:
|
|
64
64
|
A concrete retriever instance based on available search criteria
|
|
65
65
|
"""
|
|
66
|
+
|
|
66
67
|
from .fuzzy import FuzzyRetriever
|
|
67
68
|
from .hybrid import RrfHybridRetriever
|
|
69
|
+
from .process import ProcessHybridRetriever
|
|
68
70
|
from .semantic import SemanticRetriever
|
|
69
71
|
from .structured import StructuredRetriever
|
|
70
72
|
|
|
71
73
|
fuzzy_term = query.fuzzy_term
|
|
74
|
+
is_process = query.entity_type == EntityType.PROCESS
|
|
72
75
|
|
|
73
76
|
# If vector_query exists but embedding generation failed, fall back to fuzzy search with full query text
|
|
74
77
|
if query_embedding is None and query.vector_query is not None and query.query_text is not None:
|
|
@@ -76,10 +79,14 @@ class Retriever(ABC):
|
|
|
76
79
|
|
|
77
80
|
# Select retriever based on available search criteria
|
|
78
81
|
if query_embedding is not None and fuzzy_term is not None:
|
|
82
|
+
if is_process:
|
|
83
|
+
return ProcessHybridRetriever(query_embedding, fuzzy_term, cursor)
|
|
79
84
|
return RrfHybridRetriever(query_embedding, fuzzy_term, cursor)
|
|
80
85
|
if query_embedding is not None:
|
|
81
86
|
return SemanticRetriever(query_embedding, cursor)
|
|
82
87
|
if fuzzy_term is not None:
|
|
88
|
+
if is_process:
|
|
89
|
+
return ProcessHybridRetriever(None, fuzzy_term, cursor)
|
|
83
90
|
return FuzzyRetriever(fuzzy_term, cursor)
|
|
84
91
|
|
|
85
92
|
return StructuredRetriever(cursor)
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
# Copyright 2019-2025 SURF, GÉANT.
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from sqlalchemy import BindParameter, Select, String, and_, case, cast, func, literal, select
|
|
17
|
+
from sqlalchemy.sql.expression import ColumnElement, Label
|
|
18
|
+
from sqlalchemy_utils import LtreeType
|
|
19
|
+
|
|
20
|
+
from orchestrator.db.models import AiSearchIndex, ProcessStepTable
|
|
21
|
+
from orchestrator.search.core.types import SearchMetadata
|
|
22
|
+
|
|
23
|
+
from .hybrid import RrfHybridRetriever, compute_rrf_hybrid_score_sql
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ProcessHybridRetriever(RrfHybridRetriever):
|
|
27
|
+
"""Process-specific hybrid retriever with process.last_step JSONB search.
|
|
28
|
+
|
|
29
|
+
Extends RrfHybridRetriever to include fuzzy search over the process.last_step
|
|
30
|
+
(JSONB) column. For process searches:
|
|
31
|
+
- Indexed fields (from AiSearchIndex): semantic + fuzzy search
|
|
32
|
+
- Last step JSONB field: fuzzy search only (no embeddings for dynamic data)
|
|
33
|
+
|
|
34
|
+
The retriever:
|
|
35
|
+
1. Gets field candidates from AiSearchIndex
|
|
36
|
+
2. Uses process.last_step JSONB column directly for fuzzy matching
|
|
37
|
+
3. Combines both sources (indexed + JSONB) in unified ranking
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
q_vec: list[float] | None # type: ignore[assignment] # Override parent's type to allow None for fuzzy-only search
|
|
41
|
+
|
|
42
|
+
def __init__(self, q_vec: list[float] | None, *args: Any, **kwargs: Any) -> None:
|
|
43
|
+
# ProcessHybridRetriever accepts None for q_vec (fuzzy-only search)
|
|
44
|
+
# We pass empty list to parent to satisfy type requirements, but override behavior in _get_semantic_distance_expr
|
|
45
|
+
super().__init__(q_vec or [], *args, **kwargs)
|
|
46
|
+
self.q_vec = q_vec
|
|
47
|
+
|
|
48
|
+
def _get_semantic_distance_expr(self) -> Label[Any]:
|
|
49
|
+
"""Get semantic distance expression, handling optional q_vec."""
|
|
50
|
+
if self.q_vec is None:
|
|
51
|
+
return literal(1.0).label("semantic_distance")
|
|
52
|
+
|
|
53
|
+
from sqlalchemy import bindparam
|
|
54
|
+
|
|
55
|
+
q_param: BindParameter[list[float]] = bindparam("q_vec", type_=AiSearchIndex.embedding.type)
|
|
56
|
+
sem_expr = case(
|
|
57
|
+
(AiSearchIndex.embedding.is_(None), None),
|
|
58
|
+
else_=AiSearchIndex.embedding.op("<->")(q_param),
|
|
59
|
+
)
|
|
60
|
+
return func.coalesce(sem_expr, literal(1.0)).label("semantic_distance")
|
|
61
|
+
|
|
62
|
+
def _build_indexed_candidates(
|
|
63
|
+
self, cand: Any, sem_val: Label[Any], best_similarity: ColumnElement[Any], filter_condition: ColumnElement[Any]
|
|
64
|
+
) -> Select:
|
|
65
|
+
"""Build candidates from indexed fields in AiSearchIndex."""
|
|
66
|
+
return (
|
|
67
|
+
select(
|
|
68
|
+
AiSearchIndex.entity_id,
|
|
69
|
+
AiSearchIndex.entity_title,
|
|
70
|
+
AiSearchIndex.path,
|
|
71
|
+
AiSearchIndex.value,
|
|
72
|
+
sem_val,
|
|
73
|
+
best_similarity.label("fuzzy_score"),
|
|
74
|
+
)
|
|
75
|
+
.select_from(AiSearchIndex)
|
|
76
|
+
.join(cand, cand.c.entity_id == AiSearchIndex.entity_id)
|
|
77
|
+
.where(
|
|
78
|
+
and_(
|
|
79
|
+
AiSearchIndex.value_type.in_(self.SEARCHABLE_FIELD_TYPES),
|
|
80
|
+
filter_condition,
|
|
81
|
+
)
|
|
82
|
+
)
|
|
83
|
+
.limit(self.field_candidates_limit)
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
def _build_jsonb_candidates(self, cand: Any) -> Select:
|
|
87
|
+
"""Build candidates from last process_step.state JSONB column."""
|
|
88
|
+
# Get the last step per process using LATERAL subquery
|
|
89
|
+
last_step_subq = (
|
|
90
|
+
select(ProcessStepTable.process_id, ProcessStepTable.state)
|
|
91
|
+
.where(ProcessStepTable.process_id == cand.c.entity_id)
|
|
92
|
+
.order_by(ProcessStepTable.completed_at.desc())
|
|
93
|
+
.limit(1)
|
|
94
|
+
.lateral("last_step")
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# Cast JSONB to text for substring search
|
|
98
|
+
state_text = cast(last_step_subq.c.state, String)
|
|
99
|
+
jsonb_fuzzy_score = func.word_similarity(self.fuzzy_term, state_text)
|
|
100
|
+
jsonb_filter = state_text.ilike(f"%{self.fuzzy_term}%")
|
|
101
|
+
|
|
102
|
+
return (
|
|
103
|
+
select(
|
|
104
|
+
cand.c.entity_id,
|
|
105
|
+
cand.c.entity_title,
|
|
106
|
+
cast(literal("process.last_step.state"), LtreeType).label("path"),
|
|
107
|
+
state_text.label("value"),
|
|
108
|
+
literal(1.0).label("semantic_distance"),
|
|
109
|
+
jsonb_fuzzy_score.label("fuzzy_score"),
|
|
110
|
+
)
|
|
111
|
+
.select_from(cand)
|
|
112
|
+
.join(last_step_subq, literal(True))
|
|
113
|
+
.where(and_(last_step_subq.c.state.isnot(None), jsonb_filter))
|
|
114
|
+
.limit(self.field_candidates_limit)
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
def apply(self, candidate_query: Select) -> Select:
|
|
118
|
+
"""Apply process-specific hybrid search with process.last_step JSONB.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
candidate_query: Base query returning process entity_id candidates
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
Select statement with RRF scoring including last step JSONB fields
|
|
125
|
+
"""
|
|
126
|
+
cand = candidate_query.subquery()
|
|
127
|
+
|
|
128
|
+
best_similarity = func.word_similarity(self.fuzzy_term, AiSearchIndex.value)
|
|
129
|
+
sem_val = self._get_semantic_distance_expr()
|
|
130
|
+
filter_condition = literal(self.fuzzy_term).op("<%")(AiSearchIndex.value)
|
|
131
|
+
|
|
132
|
+
indexed_candidates = self._build_indexed_candidates(cand, sem_val, best_similarity, filter_condition)
|
|
133
|
+
jsonb_candidates = self._build_jsonb_candidates(cand)
|
|
134
|
+
|
|
135
|
+
field_candidates = indexed_candidates.union_all(jsonb_candidates).cte("field_candidates")
|
|
136
|
+
|
|
137
|
+
entity_scores = (
|
|
138
|
+
select(
|
|
139
|
+
field_candidates.c.entity_id,
|
|
140
|
+
field_candidates.c.entity_title,
|
|
141
|
+
func.avg(field_candidates.c.semantic_distance).label("avg_semantic_distance"),
|
|
142
|
+
func.avg(field_candidates.c.fuzzy_score).label("avg_fuzzy_score"),
|
|
143
|
+
).group_by(field_candidates.c.entity_id, field_candidates.c.entity_title)
|
|
144
|
+
).cte("entity_scores")
|
|
145
|
+
|
|
146
|
+
entity_highlights = (
|
|
147
|
+
select(
|
|
148
|
+
field_candidates.c.entity_id,
|
|
149
|
+
func.first_value(field_candidates.c.value)
|
|
150
|
+
.over(
|
|
151
|
+
partition_by=field_candidates.c.entity_id,
|
|
152
|
+
order_by=[field_candidates.c.fuzzy_score.desc(), field_candidates.c.path.asc()],
|
|
153
|
+
)
|
|
154
|
+
.label(self.HIGHLIGHT_TEXT_LABEL),
|
|
155
|
+
func.first_value(field_candidates.c.path)
|
|
156
|
+
.over(
|
|
157
|
+
partition_by=field_candidates.c.entity_id,
|
|
158
|
+
order_by=[field_candidates.c.fuzzy_score.desc(), field_candidates.c.path.asc()],
|
|
159
|
+
)
|
|
160
|
+
.label(self.HIGHLIGHT_PATH_LABEL),
|
|
161
|
+
).distinct(field_candidates.c.entity_id)
|
|
162
|
+
).cte("entity_highlights")
|
|
163
|
+
|
|
164
|
+
ranked = (
|
|
165
|
+
select(
|
|
166
|
+
entity_scores.c.entity_id,
|
|
167
|
+
entity_scores.c.entity_title,
|
|
168
|
+
entity_scores.c.avg_semantic_distance,
|
|
169
|
+
entity_scores.c.avg_fuzzy_score,
|
|
170
|
+
entity_highlights.c.highlight_text,
|
|
171
|
+
entity_highlights.c.highlight_path,
|
|
172
|
+
func.dense_rank()
|
|
173
|
+
.over(
|
|
174
|
+
order_by=[entity_scores.c.avg_semantic_distance.asc().nulls_last(), entity_scores.c.entity_id.asc()]
|
|
175
|
+
)
|
|
176
|
+
.label("sem_rank"),
|
|
177
|
+
func.dense_rank()
|
|
178
|
+
.over(order_by=[entity_scores.c.avg_fuzzy_score.desc().nulls_last(), entity_scores.c.entity_id.asc()])
|
|
179
|
+
.label("fuzzy_rank"),
|
|
180
|
+
).select_from(
|
|
181
|
+
entity_scores.join(entity_highlights, entity_scores.c.entity_id == entity_highlights.c.entity_id)
|
|
182
|
+
)
|
|
183
|
+
).cte("ranked_results")
|
|
184
|
+
|
|
185
|
+
score_components = compute_rrf_hybrid_score_sql(
|
|
186
|
+
sem_rank_col=ranked.c.sem_rank,
|
|
187
|
+
fuzzy_rank_col=ranked.c.fuzzy_rank,
|
|
188
|
+
avg_fuzzy_score_col=ranked.c.avg_fuzzy_score,
|
|
189
|
+
k=self.k,
|
|
190
|
+
perfect_threshold=0.9,
|
|
191
|
+
score_numeric_type=self.SCORE_NUMERIC_TYPE,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
perfect = score_components["perfect"]
|
|
195
|
+
normalized_score = score_components["normalized_score"]
|
|
196
|
+
|
|
197
|
+
score = cast(
|
|
198
|
+
func.round(cast(normalized_score, self.SCORE_NUMERIC_TYPE), self.SCORE_PRECISION),
|
|
199
|
+
self.SCORE_NUMERIC_TYPE,
|
|
200
|
+
).label(self.SCORE_LABEL)
|
|
201
|
+
|
|
202
|
+
stmt = select(
|
|
203
|
+
ranked.c.entity_id,
|
|
204
|
+
ranked.c.entity_title,
|
|
205
|
+
score,
|
|
206
|
+
ranked.c.highlight_text,
|
|
207
|
+
ranked.c.highlight_path,
|
|
208
|
+
perfect.label("perfect_match"),
|
|
209
|
+
).select_from(ranked)
|
|
210
|
+
|
|
211
|
+
stmt = self._apply_fused_pagination(stmt, score, ranked.c.entity_id)
|
|
212
|
+
|
|
213
|
+
stmt = stmt.order_by(
|
|
214
|
+
score.desc().nulls_last(),
|
|
215
|
+
ranked.c.entity_id.asc(),
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
if self.q_vec is not None:
|
|
219
|
+
stmt = stmt.params(q_vec=self.q_vec)
|
|
220
|
+
|
|
221
|
+
return stmt
|
|
222
|
+
|
|
223
|
+
@property
|
|
224
|
+
def metadata(self) -> SearchMetadata:
|
|
225
|
+
return SearchMetadata.hybrid()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: orchestrator-core
|
|
3
|
-
Version: 4.6.
|
|
3
|
+
Version: 4.6.4
|
|
4
4
|
Summary: This is the orchestrator workflow engine.
|
|
5
5
|
Author-email: SURF <automation-beheer@surf.nl>
|
|
6
6
|
Requires-Python: >=3.11,<3.15
|
|
@@ -42,7 +42,7 @@ Requires-Dist: fastapi-etag==0.4.0
|
|
|
42
42
|
Requires-Dist: itsdangerous>=2.2.0
|
|
43
43
|
Requires-Dist: jinja2==3.1.6
|
|
44
44
|
Requires-Dist: more-itertools~=10.8.0
|
|
45
|
-
Requires-Dist: nwa-stdlib~=1.
|
|
45
|
+
Requires-Dist: nwa-stdlib~=1.11.0
|
|
46
46
|
Requires-Dist: oauth2-lib>=2.5.0
|
|
47
47
|
Requires-Dist: orjson==3.11.4
|
|
48
48
|
Requires-Dist: pgvector>=0.4.1
|
|
@@ -54,11 +54,11 @@ Requires-Dist: pydantic[email]~=2.12.4
|
|
|
54
54
|
Requires-Dist: python-dateutil==2.9.0.post0
|
|
55
55
|
Requires-Dist: python-rapidjson>=1.22,<1.23
|
|
56
56
|
Requires-Dist: pytz==2025.2
|
|
57
|
-
Requires-Dist: redis==5.
|
|
57
|
+
Requires-Dist: redis==5.3.1
|
|
58
58
|
Requires-Dist: semver==3.0.4
|
|
59
59
|
Requires-Dist: sentry-sdk[fastapi]>=2.29.1
|
|
60
60
|
Requires-Dist: sqlalchemy==2.0.44
|
|
61
|
-
Requires-Dist: sqlalchemy-utils==0.
|
|
61
|
+
Requires-Dist: sqlalchemy-utils==0.42.0
|
|
62
62
|
Requires-Dist: strawberry-graphql>=0.281.0,<0.285.0
|
|
63
63
|
Requires-Dist: structlog>=25.4.0
|
|
64
64
|
Requires-Dist: tabulate==0.9.0
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
orchestrator/__init__.py,sha256=
|
|
1
|
+
orchestrator/__init__.py,sha256=Lby3r_nYazHbsANmKYjUnfpWl5UDYWncINvMVs0RipU,1454
|
|
2
2
|
orchestrator/agentic_app.py,sha256=ouiyyZiS4uS6Lox2DtbGGRnb2njJBMSHpSAGe-T5rX0,3028
|
|
3
3
|
orchestrator/app.py,sha256=w8ubXaaogwjmwLM0TXqZaLkAhmaOTWzVlwiYbi5mHeE,13203
|
|
4
4
|
orchestrator/exception_handlers.py,sha256=UsW3dw8q0QQlNLcV359bIotah8DYjMsj2Ts1LfX4ClY,1268
|
|
@@ -105,7 +105,7 @@ orchestrator/cli/helpers/input_helpers.py,sha256=pv5GTMuIWLzBE_bKNhn1XD_gxoqB0s1
|
|
|
105
105
|
orchestrator/cli/helpers/print_helpers.py,sha256=b3ePg6HfBLKPYBBVr5XOA__JnFEMI5HBjbjov3CP8Po,859
|
|
106
106
|
orchestrator/cli/search/__init__.py,sha256=K15_iW9ogR7xtX7qHDal4H09tmwVGnOBZWyPBLWhuzc,1274
|
|
107
107
|
orchestrator/cli/search/display.py,sha256=PKy9sOTpq0WUdSfY2khLrIQ1OdAfsyl95ogF1Z6Dae0,3629
|
|
108
|
-
orchestrator/cli/search/index_llm.py,sha256=
|
|
108
|
+
orchestrator/cli/search/index_llm.py,sha256=VQlLNGXro4ZbehWZIMKPHGpGE4oF1at2bkWZssimWc4,2707
|
|
109
109
|
orchestrator/cli/search/resize_embedding.py,sha256=iJdM7L6Kyq4CzRjXHWLwpGRiMnKK7xZ9133C0THebBE,4847
|
|
110
110
|
orchestrator/cli/search/search_explore.py,sha256=LNAn6w13Q69fpv8CHcicHAbabrctrjGvwTjjJyC0AZY,8447
|
|
111
111
|
orchestrator/cli/search/speedtest.py,sha256=J_l-8WxgN3YnqmwnbRhDyVbeqtvk3d2SfIpRBOJuhvE,4840
|
|
@@ -299,15 +299,15 @@ orchestrator/search/docs/index.md,sha256=zKzE2fbtHDfYTKaHg628wAsqCTOJ5yFUWV0ucFH
|
|
|
299
299
|
orchestrator/search/docs/running_local_text_embedding_inference.md,sha256=OR0NVZMb8DqpgXYxlwDUrJwfRk0bYOk1-LkDMqsV6bU,1327
|
|
300
300
|
orchestrator/search/filters/__init__.py,sha256=pmnHyq5SrqxS6eEiyhSIify776W9BIZ0gjOZFuYZ0nU,1335
|
|
301
301
|
orchestrator/search/filters/base.py,sha256=lUr0eW0zi4oIMVUHuRD3GAQ9xEbHiFUl_EfAI6ABPVo,12456
|
|
302
|
-
orchestrator/search/filters/date_filters.py,sha256=
|
|
302
|
+
orchestrator/search/filters/date_filters.py,sha256=DrmOcjL3v7bh93xjC4_Q0kkZV-yanBaUetwnYTRcAGI,3045
|
|
303
303
|
orchestrator/search/filters/definitions.py,sha256=k30Dp1bEr3CWMeuIcF1wPgmmF3rxI9Urx-sCaPfaE3c,4607
|
|
304
304
|
orchestrator/search/filters/ltree_filters.py,sha256=1OOmM5K90NsGBQmTqyoDlphdAOGd9r2rmz1rNItm8yk,2341
|
|
305
305
|
orchestrator/search/filters/numeric_filter.py,sha256=do52w5Dmb5Rt4ipfX1iEObNcdymbWHtgS2HI1Otq-JQ,2771
|
|
306
306
|
orchestrator/search/indexing/__init__.py,sha256=Or78bizNPiuNOgwLGJQ0mspCF1G_gSe5C9Ap7qi0MZk,662
|
|
307
|
-
orchestrator/search/indexing/indexer.py,sha256=
|
|
308
|
-
orchestrator/search/indexing/registry.py,sha256=
|
|
309
|
-
orchestrator/search/indexing/tasks.py,sha256=
|
|
310
|
-
orchestrator/search/indexing/traverse.py,sha256=
|
|
307
|
+
orchestrator/search/indexing/indexer.py,sha256=4Oh-gspJrjhyecw87TK68lvGb3inVy2Sa8RlD_FHo3c,16357
|
|
308
|
+
orchestrator/search/indexing/registry.py,sha256=V6Q4aRXHON1gSE6wsavEIfwHwCPicSzFBS2mqNExFGs,4305
|
|
309
|
+
orchestrator/search/indexing/tasks.py,sha256=0p68RNwJnHSGZQjfdpyFsS2Ma5Gr2PpZROZgal_R1wI,3064
|
|
310
|
+
orchestrator/search/indexing/traverse.py,sha256=JLut9t4LoPCWhJ_63VgYhRKfjwyxRv-mTbQLC8mA_mU,15158
|
|
311
311
|
orchestrator/search/query/__init__.py,sha256=nCjvK_n2WQdV_ACrncFXEfnvLcHtuI__J7KLlFIaQvo,2437
|
|
312
312
|
orchestrator/search/query/builder.py,sha256=kgnJ93TOCm8UTL5k09nWLsG4NXAlvFFa65gbciOwZ8E,10153
|
|
313
313
|
orchestrator/search/query/engine.py,sha256=TFdV_sSoSXCSDSpyhVA2S6YaJysDSW2WtPj7duAyomk,5745
|
|
@@ -315,15 +315,16 @@ orchestrator/search/query/exceptions.py,sha256=DrkNzXVbQAOi28FTHKimf_eTrXmhYwXrH
|
|
|
315
315
|
orchestrator/search/query/export.py,sha256=_0ncVpTqN6AoQfW3WX0fWnDQX3hBz6ZGC31Beu4PVwQ,6678
|
|
316
316
|
orchestrator/search/query/mixins.py,sha256=BdVDzCOFDXT6N9LI_WrbVzGrk61UNplX-UZPvD0rEV0,3019
|
|
317
317
|
orchestrator/search/query/queries.py,sha256=j1uKSQgF_ifVaDJaxjs4h2z48KqGVEIKCXOoJ7Ur9Mk,3805
|
|
318
|
-
orchestrator/search/query/results.py,sha256=
|
|
318
|
+
orchestrator/search/query/results.py,sha256=5OgAs39oncDIBdpB3NJltPr-UvLvLlxTWw9sn-lyfQA,10989
|
|
319
319
|
orchestrator/search/query/state.py,sha256=fMSBJs39kZTkpDE2T4h4x0x-51GqUvzAuePg2YUbO6I,3220
|
|
320
320
|
orchestrator/search/query/validation.py,sha256=m0xJ71A0Qa5hm8b71zKRjSVpPrnkG7LbqPu4lv_GboI,8260
|
|
321
321
|
orchestrator/search/retrieval/__init__.py,sha256=q5G0z3nKjIHKFs1PkEG3nvTUy3Wp4kCyBtCbqUITj3A,579
|
|
322
322
|
orchestrator/search/retrieval/pagination.py,sha256=kcUzq1QQk4GrZq02M4hsKwAelUo1qDeCqsXImLUK6DA,3006
|
|
323
|
-
orchestrator/search/retrieval/retrievers/__init__.py,sha256=
|
|
324
|
-
orchestrator/search/retrieval/retrievers/base.py,sha256=
|
|
323
|
+
orchestrator/search/retrieval/retrievers/__init__.py,sha256=dJlN6a0oHSquzjE5POYxrMGOXMx4Bx2khbJI-rA_qwg,971
|
|
324
|
+
orchestrator/search/retrieval/retrievers/base.py,sha256=esdYrkyUjwjpg-fg7BurOMe7WCTUr2cjxHqdMKDc3DI,4490
|
|
325
325
|
orchestrator/search/retrieval/retrievers/fuzzy.py,sha256=PLp_ANRLzmtGQP1t9X4jt43_JLKDnOxWU2xqlexSH1U,3779
|
|
326
326
|
orchestrator/search/retrieval/retrievers/hybrid.py,sha256=l-7J4qct0h28wSi0KvdFJw2lyh3jyobbrCbg0PuX-4I,11141
|
|
327
|
+
orchestrator/search/retrieval/retrievers/process.py,sha256=_nEEYex9iO4iBVrn6VCbvSIHf7Kb76c6id2krs-uef0,9255
|
|
327
328
|
orchestrator/search/retrieval/retrievers/semantic.py,sha256=36ky_A_LNWs13IYe809qy1RPrd0Fab-G-9pf2ZDARhA,3905
|
|
328
329
|
orchestrator/search/retrieval/retrievers/structured.py,sha256=13TxC52fpNGXHnPX40J2GczRYFk8LAvWn2a0HWZCd2Q,1426
|
|
329
330
|
orchestrator/services/__init__.py,sha256=GyHNfEFCGKQwRiN6rQmvSRH2iYX7npjMZn97n8XzmLU,571
|
|
@@ -379,7 +380,7 @@ orchestrator/workflows/tasks/resume_workflows.py,sha256=T3iobSJjVgiupe0rClD34kUZ
|
|
|
379
380
|
orchestrator/workflows/tasks/validate_product_type.py,sha256=lo2TX_MZOfcOmYFjLyD82FrJ5AAN3HOsE6BhDVFuy9Q,3210
|
|
380
381
|
orchestrator/workflows/tasks/validate_products.py,sha256=GZJBoFF-WMphS7ghMs2-gqvV2iL1F0POhk0uSNt93n0,8510
|
|
381
382
|
orchestrator/workflows/translations/en-GB.json,sha256=Gc5gy_RghZOeSNcJIntAsz_7DsCg8n_vzoHBPXxCn_U,908
|
|
382
|
-
orchestrator_core-4.6.
|
|
383
|
-
orchestrator_core-4.6.
|
|
384
|
-
orchestrator_core-4.6.
|
|
385
|
-
orchestrator_core-4.6.
|
|
383
|
+
orchestrator_core-4.6.4.dist-info/licenses/LICENSE,sha256=b-aA5OZQuuBATmLKo_mln8CQrDPPhg3ghLzjPjLn4Tg,11409
|
|
384
|
+
orchestrator_core-4.6.4.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
|
385
|
+
orchestrator_core-4.6.4.dist-info/METADATA,sha256=wtpq1zMwlyS21ZMcodbT338EwqmKBXWV28KAcyJRStg,6416
|
|
386
|
+
orchestrator_core-4.6.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|