orchestrator-core 4.4.1__py3-none-any.whl → 4.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orchestrator/__init__.py +17 -2
- orchestrator/agentic_app.py +103 -0
- orchestrator/api/api_v1/api.py +14 -2
- orchestrator/api/api_v1/endpoints/processes.py +2 -0
- orchestrator/api/api_v1/endpoints/search.py +296 -0
- orchestrator/app.py +32 -0
- orchestrator/cli/main.py +22 -1
- orchestrator/cli/search/__init__.py +32 -0
- orchestrator/cli/search/index_llm.py +73 -0
- orchestrator/cli/search/resize_embedding.py +135 -0
- orchestrator/cli/search/search_explore.py +208 -0
- orchestrator/cli/search/speedtest.py +151 -0
- orchestrator/db/models.py +37 -1
- orchestrator/devtools/populator.py +16 -0
- orchestrator/domain/base.py +2 -7
- orchestrator/domain/lifecycle.py +24 -7
- orchestrator/llm_settings.py +57 -0
- orchestrator/log_config.py +1 -0
- orchestrator/migrations/helpers.py +7 -1
- orchestrator/schemas/search.py +130 -0
- orchestrator/schemas/workflow.py +1 -0
- orchestrator/search/__init__.py +12 -0
- orchestrator/search/agent/__init__.py +21 -0
- orchestrator/search/agent/agent.py +62 -0
- orchestrator/search/agent/prompts.py +100 -0
- orchestrator/search/agent/state.py +21 -0
- orchestrator/search/agent/tools.py +258 -0
- orchestrator/search/core/__init__.py +12 -0
- orchestrator/search/core/embedding.py +73 -0
- orchestrator/search/core/exceptions.py +36 -0
- orchestrator/search/core/types.py +296 -0
- orchestrator/search/core/validators.py +40 -0
- orchestrator/search/docs/index.md +37 -0
- orchestrator/search/docs/running_local_text_embedding_inference.md +46 -0
- orchestrator/search/filters/__init__.py +40 -0
- orchestrator/search/filters/base.py +295 -0
- orchestrator/search/filters/date_filters.py +88 -0
- orchestrator/search/filters/definitions.py +107 -0
- orchestrator/search/filters/ltree_filters.py +56 -0
- orchestrator/search/filters/numeric_filter.py +73 -0
- orchestrator/search/indexing/__init__.py +16 -0
- orchestrator/search/indexing/indexer.py +334 -0
- orchestrator/search/indexing/registry.py +101 -0
- orchestrator/search/indexing/tasks.py +69 -0
- orchestrator/search/indexing/traverse.py +334 -0
- orchestrator/search/llm_migration.py +108 -0
- orchestrator/search/retrieval/__init__.py +16 -0
- orchestrator/search/retrieval/builder.py +123 -0
- orchestrator/search/retrieval/engine.py +154 -0
- orchestrator/search/retrieval/exceptions.py +90 -0
- orchestrator/search/retrieval/pagination.py +96 -0
- orchestrator/search/retrieval/retrievers/__init__.py +26 -0
- orchestrator/search/retrieval/retrievers/base.py +123 -0
- orchestrator/search/retrieval/retrievers/fuzzy.py +94 -0
- orchestrator/search/retrieval/retrievers/hybrid.py +277 -0
- orchestrator/search/retrieval/retrievers/semantic.py +94 -0
- orchestrator/search/retrieval/retrievers/structured.py +39 -0
- orchestrator/search/retrieval/utils.py +120 -0
- orchestrator/search/retrieval/validation.py +152 -0
- orchestrator/search/schemas/__init__.py +12 -0
- orchestrator/search/schemas/parameters.py +129 -0
- orchestrator/search/schemas/results.py +77 -0
- orchestrator/services/processes.py +2 -1
- orchestrator/services/settings_env_variables.py +2 -2
- orchestrator/settings.py +8 -1
- orchestrator/utils/state.py +6 -1
- orchestrator/workflows/steps.py +15 -1
- orchestrator/workflows/tasks/validate_products.py +1 -1
- {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0.dist-info}/METADATA +15 -8
- {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0.dist-info}/RECORD +72 -22
- {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0.dist-info}/WHEEL +0 -0
- {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import structlog
|
|
2
|
+
import typer
|
|
3
|
+
from sqlalchemy import text
|
|
4
|
+
from sqlalchemy.exc import SQLAlchemyError
|
|
5
|
+
|
|
6
|
+
from orchestrator.db import db
|
|
7
|
+
from orchestrator.db.models import AiSearchIndex
|
|
8
|
+
from orchestrator.llm_settings import llm_settings
|
|
9
|
+
|
|
10
|
+
logger = structlog.get_logger(__name__)
|
|
11
|
+
|
|
12
|
+
app = typer.Typer(
|
|
13
|
+
name="embedding",
|
|
14
|
+
help="Resize vector dimensions of the embeddings.",
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_current_embedding_dimension() -> int | None:
|
|
19
|
+
"""Get the current dimension of the embedding column from ai_search_index table.
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
Current dimension size or None if no records exist or column doesn't exist
|
|
23
|
+
"""
|
|
24
|
+
try:
|
|
25
|
+
query = text(
|
|
26
|
+
"""
|
|
27
|
+
SELECT vector_dims(embedding) as dimension
|
|
28
|
+
FROM ai_search_index
|
|
29
|
+
WHERE embedding IS NOT NULL
|
|
30
|
+
LIMIT 1
|
|
31
|
+
"""
|
|
32
|
+
)
|
|
33
|
+
result = db.session.execute(query).fetchone()
|
|
34
|
+
if result and result[0]:
|
|
35
|
+
return result[0]
|
|
36
|
+
return None
|
|
37
|
+
|
|
38
|
+
except SQLAlchemyError as e:
|
|
39
|
+
logger.error("Failed to get current embedding dimension", error=str(e))
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def drop_all_embeddings() -> int:
|
|
44
|
+
"""Drop all records from the ai_search_index table.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Number of records deleted
|
|
48
|
+
"""
|
|
49
|
+
try:
|
|
50
|
+
result = db.session.query(AiSearchIndex).delete()
|
|
51
|
+
db.session.commit()
|
|
52
|
+
logger.info(f"Deleted {result} records from ai_search_index")
|
|
53
|
+
return result
|
|
54
|
+
|
|
55
|
+
except SQLAlchemyError as e:
|
|
56
|
+
db.session.rollback()
|
|
57
|
+
logger.error("Failed to drop embeddings records", error=str(e))
|
|
58
|
+
raise
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def alter_embedding_column_dimension(new_dimension: int) -> None:
|
|
62
|
+
"""Alter the embedding column to use the new dimension size.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
new_dimension: New vector dimension size
|
|
66
|
+
"""
|
|
67
|
+
try:
|
|
68
|
+
drop_query = text("ALTER TABLE ai_search_index DROP COLUMN IF EXISTS embedding")
|
|
69
|
+
db.session.execute(drop_query)
|
|
70
|
+
|
|
71
|
+
add_query = text(f"ALTER TABLE ai_search_index ADD COLUMN embedding vector({new_dimension})")
|
|
72
|
+
db.session.execute(add_query)
|
|
73
|
+
|
|
74
|
+
db.session.commit()
|
|
75
|
+
logger.info(f"Altered embedding column to dimension {new_dimension}")
|
|
76
|
+
|
|
77
|
+
except SQLAlchemyError as e:
|
|
78
|
+
db.session.rollback()
|
|
79
|
+
logger.error("Failed to alter embedding column dimension", error=str(e))
|
|
80
|
+
raise
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@app.command("resize")
|
|
84
|
+
def resize_embeddings_command() -> None:
|
|
85
|
+
"""Resize vector dimensions of the ai_search_index embedding column.
|
|
86
|
+
|
|
87
|
+
Compares the current embedding dimension in the database with the configured
|
|
88
|
+
dimension in llm_settings. If they differ, drops all records and alters the
|
|
89
|
+
column to match the new dimension.
|
|
90
|
+
"""
|
|
91
|
+
new_dimension = llm_settings.EMBEDDING_DIMENSION
|
|
92
|
+
|
|
93
|
+
logger.info("Starting embedding dimension resize", new_dimension=new_dimension)
|
|
94
|
+
|
|
95
|
+
current_dimension = get_current_embedding_dimension()
|
|
96
|
+
|
|
97
|
+
if current_dimension is None:
|
|
98
|
+
logger.warning("Could not determine current dimension for embedding column")
|
|
99
|
+
|
|
100
|
+
if current_dimension == new_dimension:
|
|
101
|
+
logger.info(
|
|
102
|
+
"Embedding dimensions match, no resize needed",
|
|
103
|
+
current_dimension=current_dimension,
|
|
104
|
+
new_dimension=new_dimension,
|
|
105
|
+
)
|
|
106
|
+
return
|
|
107
|
+
|
|
108
|
+
logger.info("Dimension mismatch detected", current_dimension=current_dimension, new_dimension=new_dimension)
|
|
109
|
+
|
|
110
|
+
if not typer.confirm("This will DELETE ALL RECORDS from ai_search_index and alter the embedding column. Continue?"):
|
|
111
|
+
logger.info("Operation cancelled by user")
|
|
112
|
+
return
|
|
113
|
+
|
|
114
|
+
try:
|
|
115
|
+
# Drop all records first.
|
|
116
|
+
logger.info("Dropping all embedding records...")
|
|
117
|
+
deleted_count = drop_all_embeddings()
|
|
118
|
+
|
|
119
|
+
# Then alter column dimension.
|
|
120
|
+
logger.info(f"Altering embedding column to dimension {new_dimension}...")
|
|
121
|
+
alter_embedding_column_dimension(new_dimension)
|
|
122
|
+
|
|
123
|
+
logger.info(
|
|
124
|
+
"Embedding dimension resize completed successfully",
|
|
125
|
+
records_deleted=deleted_count,
|
|
126
|
+
new_dimension=new_dimension,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
except Exception as e:
|
|
130
|
+
logger.error("Embedding dimension resize failed", error=str(e))
|
|
131
|
+
raise typer.Exit(1)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
if __name__ == "__main__":
|
|
135
|
+
app()
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
|
|
3
|
+
import structlog
|
|
4
|
+
import typer
|
|
5
|
+
from pydantic import ValidationError
|
|
6
|
+
|
|
7
|
+
from orchestrator.db import db
|
|
8
|
+
from orchestrator.search.core.types import EntityType, FilterOp, UIType
|
|
9
|
+
from orchestrator.search.filters import EqualityFilter, FilterTree, LtreeFilter, PathFilter
|
|
10
|
+
from orchestrator.search.retrieval import execute_search
|
|
11
|
+
from orchestrator.search.retrieval.utils import display_filtered_paths_only, display_results
|
|
12
|
+
from orchestrator.search.retrieval.validation import get_structured_filter_schema
|
|
13
|
+
from orchestrator.search.schemas.parameters import BaseSearchParameters
|
|
14
|
+
|
|
15
|
+
app = typer.Typer(help="Experiment with the subscription search indexes.")
|
|
16
|
+
|
|
17
|
+
logger = structlog.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@app.command()
|
|
21
|
+
def structured(path: str, value: str, entity_type: EntityType = EntityType.SUBSCRIPTION, limit: int = 10) -> None:
|
|
22
|
+
"""Finds subscriptions where a specific field path contains an exact value.
|
|
23
|
+
|
|
24
|
+
Example:
|
|
25
|
+
dotenv run python main.py search structured "subscription.status" "provisioning"
|
|
26
|
+
...
|
|
27
|
+
{
|
|
28
|
+
"path": "subscription.status",
|
|
29
|
+
"value": "provisioning"
|
|
30
|
+
},
|
|
31
|
+
...
|
|
32
|
+
"""
|
|
33
|
+
path_filter = PathFilter(path=path, condition=EqualityFilter(op=FilterOp.EQ, value=value), value_kind=UIType.STRING)
|
|
34
|
+
search_params = BaseSearchParameters.create(
|
|
35
|
+
entity_type=entity_type, filters=FilterTree.from_flat_and([path_filter]), limit=limit
|
|
36
|
+
)
|
|
37
|
+
search_response = asyncio.run(execute_search(search_params=search_params, db_session=db.session))
|
|
38
|
+
display_filtered_paths_only(search_response.results, search_params, db.session)
|
|
39
|
+
display_results(search_response.results, db.session, "Match")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@app.command()
|
|
43
|
+
def semantic(query: str, entity_type: EntityType = EntityType.SUBSCRIPTION, limit: int = 10) -> None:
|
|
44
|
+
"""Finds subscriptions that are conceptually most similar to the query text.
|
|
45
|
+
|
|
46
|
+
Example:
|
|
47
|
+
dotenv run python main.py search semantic "Shop for an alligator store"
|
|
48
|
+
...
|
|
49
|
+
{
|
|
50
|
+
"path": "subscription.shop.shop_description",
|
|
51
|
+
"value": "Kingswood reptiles shop"
|
|
52
|
+
},
|
|
53
|
+
...
|
|
54
|
+
"""
|
|
55
|
+
search_params = BaseSearchParameters.create(entity_type=entity_type, query=query, limit=limit)
|
|
56
|
+
search_response = asyncio.run(execute_search(search_params=search_params, db_session=db.session))
|
|
57
|
+
display_results(search_response.results, db.session, "Distance")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@app.command()
|
|
61
|
+
def fuzzy(term: str, entity_type: EntityType = EntityType.SUBSCRIPTION, limit: int = 10) -> None:
|
|
62
|
+
"""Finds subscriptions containing text similar to the query, tolerating typos.
|
|
63
|
+
|
|
64
|
+
Example:
|
|
65
|
+
dotenv run python main.py search fuzzy "Colonel"
|
|
66
|
+
...
|
|
67
|
+
{
|
|
68
|
+
"path": "description",
|
|
69
|
+
"value": "X Follower WF for TimCoronel"
|
|
70
|
+
},
|
|
71
|
+
...
|
|
72
|
+
"""
|
|
73
|
+
search_params = BaseSearchParameters.create(entity_type=entity_type, query=term, limit=limit)
|
|
74
|
+
search_response = asyncio.run(execute_search(search_params=search_params, db_session=db.session))
|
|
75
|
+
display_results(search_response.results, db.session, "Similarity")
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@app.command()
|
|
79
|
+
def hierarchical(
|
|
80
|
+
op: str = typer.Argument(..., help="The hierarchical operation to perform."),
|
|
81
|
+
path: str = typer.Argument(..., help="The ltree path or lquery pattern for the operation."),
|
|
82
|
+
query: str | None = typer.Option(None, "--query", "-f", help="An optional fuzzy term to rank the results."),
|
|
83
|
+
entity_type: EntityType = EntityType.SUBSCRIPTION,
|
|
84
|
+
limit: int = 10,
|
|
85
|
+
) -> None:
|
|
86
|
+
"""Performs a hierarchical search, optionally combined with fuzzy ranking.
|
|
87
|
+
|
|
88
|
+
Examples:
|
|
89
|
+
dotenv run python main.py search hierarchical is_descendant "subscription.shop" --query "Kingwood"
|
|
90
|
+
dotenv run python main.py search hierarchical matches_lquery "*.x_follower.x_follower_status*"
|
|
91
|
+
"""
|
|
92
|
+
try:
|
|
93
|
+
condition = LtreeFilter(value=path, op=op) # type: ignore[arg-type]
|
|
94
|
+
except (ValueError, ValidationError) as e:
|
|
95
|
+
raise typer.BadParameter(f"Invalid filter: {e}")
|
|
96
|
+
|
|
97
|
+
path_filter = PathFilter(path="ltree_hierarchical_filter", condition=condition, value_kind=UIType.STRING)
|
|
98
|
+
|
|
99
|
+
search_params = BaseSearchParameters.create(
|
|
100
|
+
entity_type=entity_type, filters=FilterTree.from_flat_and([path_filter]), query=query, limit=limit
|
|
101
|
+
)
|
|
102
|
+
search_response = asyncio.run(execute_search(search_params=search_params, db_session=db.session))
|
|
103
|
+
display_results(search_response.results, db.session, "Hierarchical Score")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@app.command()
|
|
107
|
+
def hybrid(query: str, term: str, entity_type: EntityType = EntityType.SUBSCRIPTION, limit: int = 10) -> None:
|
|
108
|
+
"""Performs a hybrid search, combining semantic and fuzzy matching.
|
|
109
|
+
|
|
110
|
+
Example:
|
|
111
|
+
dotenv run python main.py search hybrid "reptile store" "Kingswood"
|
|
112
|
+
"""
|
|
113
|
+
search_params = BaseSearchParameters.create(entity_type=entity_type, query=query, limit=limit)
|
|
114
|
+
logger.info("Executing Hybrid Search", query=query, term=term)
|
|
115
|
+
search_response = asyncio.run(execute_search(search_params=search_params, db_session=db.session))
|
|
116
|
+
display_results(search_response.results, db.session, "Hybrid Score")
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@app.command("generate-schema")
|
|
120
|
+
def generate_schema() -> None:
|
|
121
|
+
"""Generates and prints the dynamic filter schema from the live search index.
|
|
122
|
+
|
|
123
|
+
This queries the index for all distinct non-string paths to be used as
|
|
124
|
+
context for the LLM agent.
|
|
125
|
+
|
|
126
|
+
Example:
|
|
127
|
+
dotenv run python main.py search generate-schema
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
schema_map = get_structured_filter_schema()
|
|
131
|
+
|
|
132
|
+
if not schema_map:
|
|
133
|
+
logger.warning("No filterable paths found in the search index.")
|
|
134
|
+
return
|
|
135
|
+
|
|
136
|
+
logger.info("\nAvailable Structured Filters:\n")
|
|
137
|
+
for path, value_type in schema_map.items():
|
|
138
|
+
logger.info(f"- {path}: {value_type}")
|
|
139
|
+
|
|
140
|
+
logger.info("Successfully generated dynamic schema.", path_count=len(schema_map))
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
@app.command("nested-demo")
|
|
144
|
+
def nested_demo(entity_type: EntityType = EntityType.SUBSCRIPTION, limit: int = 10) -> None:
|
|
145
|
+
tree = FilterTree.model_validate(
|
|
146
|
+
{
|
|
147
|
+
"op": "AND",
|
|
148
|
+
"children": [
|
|
149
|
+
{
|
|
150
|
+
"op": "OR",
|
|
151
|
+
"children": [
|
|
152
|
+
# First OR case: Active subscriptions from 2024
|
|
153
|
+
{
|
|
154
|
+
"op": "AND",
|
|
155
|
+
"children": [
|
|
156
|
+
{
|
|
157
|
+
"path": "subscription.status",
|
|
158
|
+
"condition": {"op": "eq", "value": "active"},
|
|
159
|
+
"value_kind": "string",
|
|
160
|
+
},
|
|
161
|
+
{
|
|
162
|
+
"path": "subscription.start_date",
|
|
163
|
+
"condition": {
|
|
164
|
+
"op": "between",
|
|
165
|
+
"value": {
|
|
166
|
+
"start": "2024-01-01T00:00:00Z",
|
|
167
|
+
"end": "2024-12-31T23:59:59Z",
|
|
168
|
+
},
|
|
169
|
+
},
|
|
170
|
+
"value_kind": "datetime",
|
|
171
|
+
},
|
|
172
|
+
],
|
|
173
|
+
},
|
|
174
|
+
# Second OR case: Terminated subscriptions before 2026
|
|
175
|
+
{
|
|
176
|
+
"op": "AND",
|
|
177
|
+
"children": [
|
|
178
|
+
{
|
|
179
|
+
"path": "subscription.status",
|
|
180
|
+
"condition": {"op": "eq", "value": "terminated"},
|
|
181
|
+
"value_kind": "string",
|
|
182
|
+
},
|
|
183
|
+
{
|
|
184
|
+
"path": "subscription.end_date",
|
|
185
|
+
"condition": {"op": "lte", "value": "2025-12-31"},
|
|
186
|
+
"value_kind": "datetime",
|
|
187
|
+
},
|
|
188
|
+
],
|
|
189
|
+
},
|
|
190
|
+
],
|
|
191
|
+
},
|
|
192
|
+
{
|
|
193
|
+
"path": "subscription.*.port_mode",
|
|
194
|
+
"condition": {"op": "matches_lquery", "value": "*.port_mode"},
|
|
195
|
+
"value_kind": "string",
|
|
196
|
+
},
|
|
197
|
+
],
|
|
198
|
+
}
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
params = BaseSearchParameters.create(entity_type=entity_type, filters=tree, limit=limit)
|
|
202
|
+
search_response = asyncio.run(execute_search(params, db.session))
|
|
203
|
+
|
|
204
|
+
display_results(search_response.results, db.session, "Score")
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
if __name__ == "__main__":
|
|
208
|
+
app()
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import time
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import structlog
|
|
6
|
+
import typer
|
|
7
|
+
from rich.console import Console
|
|
8
|
+
from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
|
|
9
|
+
from rich.table import Table
|
|
10
|
+
|
|
11
|
+
from orchestrator.db import db
|
|
12
|
+
from orchestrator.search.core.embedding import QueryEmbedder
|
|
13
|
+
from orchestrator.search.core.types import EntityType
|
|
14
|
+
from orchestrator.search.core.validators import is_uuid
|
|
15
|
+
from orchestrator.search.retrieval.engine import execute_search
|
|
16
|
+
from orchestrator.search.retrieval.pagination import PaginationParams
|
|
17
|
+
from orchestrator.search.schemas.parameters import BaseSearchParameters
|
|
18
|
+
|
|
19
|
+
logger = structlog.get_logger(__name__)
|
|
20
|
+
console = Console()
|
|
21
|
+
|
|
22
|
+
app = typer.Typer(name="speedtest", help="Search speed testing")
|
|
23
|
+
|
|
24
|
+
DEFAULT_QUERIES = [
|
|
25
|
+
"network",
|
|
26
|
+
"fiber",
|
|
27
|
+
"port",
|
|
28
|
+
"network infrastructure",
|
|
29
|
+
"fiber connection",
|
|
30
|
+
"internet service",
|
|
31
|
+
"subscription",
|
|
32
|
+
"active",
|
|
33
|
+
"configuration",
|
|
34
|
+
"service provider",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
async def generate_embeddings_for_queries(queries: list[str]) -> dict[str, list[float]]:
|
|
39
|
+
embedding_lookup = {}
|
|
40
|
+
|
|
41
|
+
for query in queries:
|
|
42
|
+
try:
|
|
43
|
+
embedding = await QueryEmbedder.generate_for_text_async(query)
|
|
44
|
+
if embedding:
|
|
45
|
+
embedding_lookup[query] = embedding
|
|
46
|
+
else:
|
|
47
|
+
logger.warning("Failed to generate embedding for query", query=query)
|
|
48
|
+
except Exception as e:
|
|
49
|
+
logger.error("Error generating embedding", query=query, error=str(e))
|
|
50
|
+
|
|
51
|
+
return embedding_lookup
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
async def run_single_query(query: str, embedding_lookup: dict[str, list[float]]) -> dict[str, Any]:
|
|
55
|
+
search_params = BaseSearchParameters(entity_type=EntityType.SUBSCRIPTION, query=query, limit=30)
|
|
56
|
+
|
|
57
|
+
if is_uuid(query):
|
|
58
|
+
pagination_params = PaginationParams()
|
|
59
|
+
logger.debug("Using fuzzy-only ranking for full UUID", query=query)
|
|
60
|
+
else:
|
|
61
|
+
|
|
62
|
+
cached_embedding = embedding_lookup[query]
|
|
63
|
+
pagination_params = PaginationParams(q_vec_override=cached_embedding)
|
|
64
|
+
|
|
65
|
+
with db.session as session:
|
|
66
|
+
start_time = time.perf_counter()
|
|
67
|
+
response = await execute_search(search_params, session, pagination_params=pagination_params)
|
|
68
|
+
end_time = time.perf_counter()
|
|
69
|
+
|
|
70
|
+
return {
|
|
71
|
+
"query": query,
|
|
72
|
+
"time": end_time - start_time,
|
|
73
|
+
"results": len(response.results),
|
|
74
|
+
"search_type": response.metadata.search_type if hasattr(response, "metadata") else "unknown",
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@app.command()
|
|
79
|
+
def quick(
|
|
80
|
+
queries: list[str] | None = typer.Option(None, "--query", "-q", help="Custom queries to test"),
|
|
81
|
+
) -> None:
|
|
82
|
+
test_queries = queries if queries else DEFAULT_QUERIES
|
|
83
|
+
|
|
84
|
+
console.print(f"[bold blue]Quick Speed Test[/bold blue] - Testing {len(test_queries)} queries")
|
|
85
|
+
|
|
86
|
+
async def run_tests() -> list[dict[str, Any]]:
|
|
87
|
+
embedding_lookup = await generate_embeddings_for_queries(test_queries)
|
|
88
|
+
|
|
89
|
+
results = []
|
|
90
|
+
|
|
91
|
+
with Progress(
|
|
92
|
+
SpinnerColumn(),
|
|
93
|
+
TextColumn("[progress.description]{task.description}"),
|
|
94
|
+
TimeElapsedColumn(),
|
|
95
|
+
console=console,
|
|
96
|
+
) as progress:
|
|
97
|
+
task = progress.add_task("Running queries...", total=len(test_queries))
|
|
98
|
+
|
|
99
|
+
for query in test_queries:
|
|
100
|
+
result = await run_single_query(query, embedding_lookup)
|
|
101
|
+
results.append(result)
|
|
102
|
+
progress.advance(task)
|
|
103
|
+
|
|
104
|
+
return results
|
|
105
|
+
|
|
106
|
+
results = asyncio.run(run_tests())
|
|
107
|
+
|
|
108
|
+
table = Table(show_header=True, header_style="bold magenta")
|
|
109
|
+
table.add_column("Query", style="dim", width=25)
|
|
110
|
+
table.add_column("Time", justify="right", style="cyan")
|
|
111
|
+
table.add_column("Type", justify="center", style="yellow")
|
|
112
|
+
table.add_column("Results", justify="right", style="green")
|
|
113
|
+
|
|
114
|
+
total_time = 0
|
|
115
|
+
|
|
116
|
+
for result in results:
|
|
117
|
+
time_ms = result["time"] * 1000
|
|
118
|
+
total_time += result["time"]
|
|
119
|
+
|
|
120
|
+
table.add_row(
|
|
121
|
+
result["query"][:24] + "..." if len(result["query"]) > 24 else result["query"],
|
|
122
|
+
f"{time_ms:.1f}ms",
|
|
123
|
+
result["search_type"],
|
|
124
|
+
str(result["results"]),
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
console.print(table)
|
|
128
|
+
console.print()
|
|
129
|
+
|
|
130
|
+
avg_time = total_time / len(results) * 1000
|
|
131
|
+
max_time = max(r["time"] for r in results) * 1000
|
|
132
|
+
|
|
133
|
+
console.print("[bold]Summary:[/bold]")
|
|
134
|
+
console.print(f" Total time: {total_time * 1000:.1f}ms")
|
|
135
|
+
console.print(f" Average: {avg_time:.1f}ms")
|
|
136
|
+
console.print(f" Slowest: {max_time:.1f}ms")
|
|
137
|
+
|
|
138
|
+
by_type: dict[str, list[float]] = {}
|
|
139
|
+
for result in results:
|
|
140
|
+
search_type = result["search_type"]
|
|
141
|
+
if search_type not in by_type:
|
|
142
|
+
by_type[search_type] = []
|
|
143
|
+
by_type[search_type].append(result["time"] * 1000)
|
|
144
|
+
|
|
145
|
+
for search_type, times in by_type.items():
|
|
146
|
+
avg = sum(times) / len(times)
|
|
147
|
+
console.print(f" {search_type.capitalize()}: {avg:.1f}ms avg ({len(times)} queries)")
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
if __name__ == "__main__":
|
|
151
|
+
app()
|
orchestrator/db/models.py
CHANGED
|
@@ -20,7 +20,9 @@ from uuid import UUID
|
|
|
20
20
|
import sqlalchemy
|
|
21
21
|
import structlog
|
|
22
22
|
from more_itertools import first_true
|
|
23
|
+
from pgvector.sqlalchemy import Vector
|
|
23
24
|
from sqlalchemy import (
|
|
25
|
+
TEXT,
|
|
24
26
|
TIMESTAMP,
|
|
25
27
|
Boolean,
|
|
26
28
|
CheckConstraint,
|
|
@@ -29,6 +31,7 @@ from sqlalchemy import (
|
|
|
29
31
|
ForeignKey,
|
|
30
32
|
Index,
|
|
31
33
|
Integer,
|
|
34
|
+
PrimaryKeyConstraint,
|
|
32
35
|
Select,
|
|
33
36
|
String,
|
|
34
37
|
Table,
|
|
@@ -45,10 +48,12 @@ from sqlalchemy.ext.associationproxy import association_proxy
|
|
|
45
48
|
from sqlalchemy.ext.orderinglist import ordering_list
|
|
46
49
|
from sqlalchemy.orm import Mapped, deferred, mapped_column, object_session, relationship, undefer
|
|
47
50
|
from sqlalchemy.sql.functions import GenericFunction
|
|
48
|
-
from sqlalchemy_utils import TSVectorType, UUIDType
|
|
51
|
+
from sqlalchemy_utils import LtreeType, TSVectorType, UUIDType
|
|
49
52
|
|
|
50
53
|
from orchestrator.config.assignee import Assignee
|
|
51
54
|
from orchestrator.db.database import BaseModel, SearchQuery
|
|
55
|
+
from orchestrator.llm_settings import llm_settings
|
|
56
|
+
from orchestrator.search.core.types import FieldType
|
|
52
57
|
from orchestrator.targets import Target
|
|
53
58
|
from orchestrator.utils.datetime import nowtz
|
|
54
59
|
from orchestrator.version import GIT_COMMIT_HASH
|
|
@@ -685,3 +690,34 @@ class SubscriptionInstanceAsJsonFunction(GenericFunction):
|
|
|
685
690
|
|
|
686
691
|
def __init__(self, sub_inst_id: UUID):
|
|
687
692
|
super().__init__(sub_inst_id)
|
|
693
|
+
|
|
694
|
+
|
|
695
|
+
class AiSearchIndex(BaseModel):
|
|
696
|
+
|
|
697
|
+
__tablename__ = "ai_search_index"
|
|
698
|
+
|
|
699
|
+
entity_type = mapped_column(
|
|
700
|
+
TEXT,
|
|
701
|
+
nullable=False,
|
|
702
|
+
index=True,
|
|
703
|
+
)
|
|
704
|
+
entity_id = mapped_column(
|
|
705
|
+
UUIDType,
|
|
706
|
+
nullable=False,
|
|
707
|
+
)
|
|
708
|
+
|
|
709
|
+
# Ltree path for hierarchical data
|
|
710
|
+
path = mapped_column(LtreeType, nullable=False, index=True)
|
|
711
|
+
value = mapped_column(TEXT, nullable=False)
|
|
712
|
+
|
|
713
|
+
value_type = mapped_column(
|
|
714
|
+
Enum(FieldType, name="field_type", values_callable=lambda obj: [e.value for e in obj]), nullable=False
|
|
715
|
+
)
|
|
716
|
+
|
|
717
|
+
# Embedding
|
|
718
|
+
embedding = mapped_column(Vector(llm_settings.EMBEDDING_DIMENSION), nullable=True)
|
|
719
|
+
|
|
720
|
+
# SHA-256
|
|
721
|
+
content_hash = mapped_column(String(64), nullable=False, index=True)
|
|
722
|
+
|
|
723
|
+
__table_args__ = (PrimaryKeyConstraint("entity_id", "path", name="pk_ai_search_index"),)
|
|
@@ -371,6 +371,22 @@ class Populator:
|
|
|
371
371
|
self.log.info("Started modify workflow")
|
|
372
372
|
return self._start_workflow(workflow_name, subscription_id=subscription_id, **kwargs)
|
|
373
373
|
|
|
374
|
+
def start_reconcile_workflow(self, workflow_name: str, subscription_id: UUIDstr | UUID, **kwargs: Any) -> UUIDstr:
|
|
375
|
+
"""Start a reconcile workflow for the provided name and subscription_id.
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
workflow_name: workflow name
|
|
379
|
+
subscription_id: uuid of the subscription you want to modify
|
|
380
|
+
kwargs: values to be used as form input
|
|
381
|
+
|
|
382
|
+
Returns: the process_id of the workflow process
|
|
383
|
+
|
|
384
|
+
"""
|
|
385
|
+
subscription_id = str(subscription_id)
|
|
386
|
+
self.log = self.log.bind(subscription_id=subscription_id)
|
|
387
|
+
self.log.info("Started reconcile workflow")
|
|
388
|
+
return self._start_workflow(workflow_name, subscription_id=subscription_id, **kwargs)
|
|
389
|
+
|
|
374
390
|
def start_verify_workflow(self, workflow_name: str, subscription_id: UUIDstr | UUID) -> UUIDstr:
|
|
375
391
|
subscription_id = str(subscription_id)
|
|
376
392
|
self.log = self.log.bind(subscription_id=subscription_id)
|
orchestrator/domain/base.py
CHANGED
|
@@ -614,9 +614,7 @@ class ProductBlockModel(DomainModel):
|
|
|
614
614
|
product_blocks_in_model = cls._get_depends_on_product_block_types()
|
|
615
615
|
product_blocks_types_in_model = get_depends_on_product_block_type_list(product_blocks_in_model)
|
|
616
616
|
|
|
617
|
-
product_blocks_in_model = set(
|
|
618
|
-
flatten(map(attrgetter("__names__"), product_blocks_types_in_model))
|
|
619
|
-
) # type: ignore
|
|
617
|
+
product_blocks_in_model = set(flatten(map(attrgetter("__names__"), product_blocks_types_in_model))) # type: ignore
|
|
620
618
|
|
|
621
619
|
missing_product_blocks_in_db = product_blocks_in_model - product_blocks_in_db # type: ignore
|
|
622
620
|
missing_product_blocks_in_model = product_blocks_in_db - product_blocks_in_model # type: ignore
|
|
@@ -1084,9 +1082,7 @@ class SubscriptionModel(DomainModel):
|
|
|
1084
1082
|
product_blocks_in_model = cls._get_depends_on_product_block_types()
|
|
1085
1083
|
product_blocks_types_in_model = get_depends_on_product_block_type_list(product_blocks_in_model)
|
|
1086
1084
|
|
|
1087
|
-
product_blocks_in_model = set(
|
|
1088
|
-
flatten(map(attrgetter("__names__"), product_blocks_types_in_model))
|
|
1089
|
-
) # type: ignore
|
|
1085
|
+
product_blocks_in_model = set(flatten(map(attrgetter("__names__"), product_blocks_types_in_model))) # type: ignore
|
|
1090
1086
|
|
|
1091
1087
|
missing_product_blocks_in_db = product_blocks_in_model - product_blocks_in_db # type: ignore
|
|
1092
1088
|
missing_product_blocks_in_model = product_blocks_in_db - product_blocks_in_model # type: ignore
|
|
@@ -1294,7 +1290,6 @@ class SubscriptionModel(DomainModel):
|
|
|
1294
1290
|
# Some common functions shared by from_other_product and from_subscription
|
|
1295
1291
|
@classmethod
|
|
1296
1292
|
def _get_subscription(cls: type[S], subscription_id: UUID | UUIDstr) -> SubscriptionTable | None:
|
|
1297
|
-
|
|
1298
1293
|
if not isinstance(subscription_id, UUID | UUIDstr):
|
|
1299
1294
|
raise TypeError(f"subscription_id is of type {type(subscription_id)} instead of UUID | UUIDstr")
|
|
1300
1295
|
|
orchestrator/domain/lifecycle.py
CHANGED
|
@@ -16,11 +16,16 @@ from typing import TYPE_CHECKING, TypeVar
|
|
|
16
16
|
import strawberry
|
|
17
17
|
import structlog
|
|
18
18
|
|
|
19
|
+
from orchestrator.settings import LifecycleValidationMode, app_settings
|
|
19
20
|
from orchestrator.types import SubscriptionLifecycle
|
|
20
21
|
from pydantic_forms.types import strEnum
|
|
21
22
|
|
|
22
23
|
if TYPE_CHECKING:
|
|
23
|
-
from orchestrator.domain.base import DomainModel
|
|
24
|
+
from orchestrator.domain.base import DomainModel, SubscriptionModel
|
|
25
|
+
else:
|
|
26
|
+
SubscriptionModel = None
|
|
27
|
+
DomainModel = None
|
|
28
|
+
T = TypeVar("T", bound=SubscriptionModel)
|
|
24
29
|
|
|
25
30
|
logger = structlog.get_logger(__name__)
|
|
26
31
|
|
|
@@ -71,9 +76,21 @@ def validate_lifecycle_status(
|
|
|
71
76
|
)
|
|
72
77
|
|
|
73
78
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
79
|
+
def validate_subscription_model_product_type(
|
|
80
|
+
subscription: SubscriptionModel,
|
|
81
|
+
validation_mode: LifecycleValidationMode = app_settings.LIFECYCLE_VALIDATION_MODE,
|
|
82
|
+
) -> None:
|
|
83
|
+
"""Validate that a subscription model has been instantiated with the correct product type class for its lifecycle status."""
|
|
84
|
+
|
|
85
|
+
actual_class = subscription.__class__
|
|
86
|
+
expected_class = lookup_specialized_type(actual_class, subscription.status)
|
|
87
|
+
|
|
88
|
+
if actual_class != expected_class:
|
|
89
|
+
msg = f"Subscription of type {actual_class} should use {expected_class} for lifecycle status '{subscription.status}'"
|
|
90
|
+
if validation_mode == LifecycleValidationMode.STRICT:
|
|
91
|
+
logger.error(msg)
|
|
92
|
+
raise ValueError(msg)
|
|
93
|
+
if validation_mode == LifecycleValidationMode.LOOSE:
|
|
94
|
+
logger.warning(msg)
|
|
95
|
+
elif validation_mode == LifecycleValidationMode.IGNORED:
|
|
96
|
+
pass
|