orchestrator-core 4.4.1__py3-none-any.whl → 4.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. orchestrator/__init__.py +17 -2
  2. orchestrator/agentic_app.py +103 -0
  3. orchestrator/api/api_v1/api.py +14 -2
  4. orchestrator/api/api_v1/endpoints/processes.py +2 -0
  5. orchestrator/api/api_v1/endpoints/search.py +296 -0
  6. orchestrator/app.py +32 -0
  7. orchestrator/cli/main.py +22 -1
  8. orchestrator/cli/search/__init__.py +32 -0
  9. orchestrator/cli/search/index_llm.py +73 -0
  10. orchestrator/cli/search/resize_embedding.py +135 -0
  11. orchestrator/cli/search/search_explore.py +208 -0
  12. orchestrator/cli/search/speedtest.py +151 -0
  13. orchestrator/db/models.py +37 -1
  14. orchestrator/devtools/populator.py +16 -0
  15. orchestrator/domain/base.py +2 -7
  16. orchestrator/domain/lifecycle.py +24 -7
  17. orchestrator/llm_settings.py +57 -0
  18. orchestrator/log_config.py +1 -0
  19. orchestrator/migrations/helpers.py +7 -1
  20. orchestrator/schemas/search.py +130 -0
  21. orchestrator/schemas/workflow.py +1 -0
  22. orchestrator/search/__init__.py +12 -0
  23. orchestrator/search/agent/__init__.py +21 -0
  24. orchestrator/search/agent/agent.py +62 -0
  25. orchestrator/search/agent/prompts.py +100 -0
  26. orchestrator/search/agent/state.py +21 -0
  27. orchestrator/search/agent/tools.py +258 -0
  28. orchestrator/search/core/__init__.py +12 -0
  29. orchestrator/search/core/embedding.py +73 -0
  30. orchestrator/search/core/exceptions.py +36 -0
  31. orchestrator/search/core/types.py +296 -0
  32. orchestrator/search/core/validators.py +40 -0
  33. orchestrator/search/docs/index.md +37 -0
  34. orchestrator/search/docs/running_local_text_embedding_inference.md +46 -0
  35. orchestrator/search/filters/__init__.py +40 -0
  36. orchestrator/search/filters/base.py +295 -0
  37. orchestrator/search/filters/date_filters.py +88 -0
  38. orchestrator/search/filters/definitions.py +107 -0
  39. orchestrator/search/filters/ltree_filters.py +56 -0
  40. orchestrator/search/filters/numeric_filter.py +73 -0
  41. orchestrator/search/indexing/__init__.py +16 -0
  42. orchestrator/search/indexing/indexer.py +334 -0
  43. orchestrator/search/indexing/registry.py +101 -0
  44. orchestrator/search/indexing/tasks.py +69 -0
  45. orchestrator/search/indexing/traverse.py +334 -0
  46. orchestrator/search/llm_migration.py +108 -0
  47. orchestrator/search/retrieval/__init__.py +16 -0
  48. orchestrator/search/retrieval/builder.py +123 -0
  49. orchestrator/search/retrieval/engine.py +154 -0
  50. orchestrator/search/retrieval/exceptions.py +90 -0
  51. orchestrator/search/retrieval/pagination.py +96 -0
  52. orchestrator/search/retrieval/retrievers/__init__.py +26 -0
  53. orchestrator/search/retrieval/retrievers/base.py +123 -0
  54. orchestrator/search/retrieval/retrievers/fuzzy.py +94 -0
  55. orchestrator/search/retrieval/retrievers/hybrid.py +277 -0
  56. orchestrator/search/retrieval/retrievers/semantic.py +94 -0
  57. orchestrator/search/retrieval/retrievers/structured.py +39 -0
  58. orchestrator/search/retrieval/utils.py +120 -0
  59. orchestrator/search/retrieval/validation.py +152 -0
  60. orchestrator/search/schemas/__init__.py +12 -0
  61. orchestrator/search/schemas/parameters.py +129 -0
  62. orchestrator/search/schemas/results.py +77 -0
  63. orchestrator/services/processes.py +2 -1
  64. orchestrator/services/settings_env_variables.py +2 -2
  65. orchestrator/settings.py +8 -1
  66. orchestrator/utils/state.py +6 -1
  67. orchestrator/workflows/steps.py +15 -1
  68. orchestrator/workflows/tasks/validate_products.py +1 -1
  69. {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0.dist-info}/METADATA +15 -8
  70. {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0.dist-info}/RECORD +72 -22
  71. {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0.dist-info}/WHEEL +0 -0
  72. {orchestrator_core-4.4.1.dist-info → orchestrator_core-4.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,135 @@
1
+ import structlog
2
+ import typer
3
+ from sqlalchemy import text
4
+ from sqlalchemy.exc import SQLAlchemyError
5
+
6
+ from orchestrator.db import db
7
+ from orchestrator.db.models import AiSearchIndex
8
+ from orchestrator.llm_settings import llm_settings
9
+
10
+ logger = structlog.get_logger(__name__)
11
+
12
+ app = typer.Typer(
13
+ name="embedding",
14
+ help="Resize vector dimensions of the embeddings.",
15
+ )
16
+
17
+
18
+ def get_current_embedding_dimension() -> int | None:
19
+ """Get the current dimension of the embedding column from ai_search_index table.
20
+
21
+ Returns:
22
+ Current dimension size or None if no records exist or column doesn't exist
23
+ """
24
+ try:
25
+ query = text(
26
+ """
27
+ SELECT vector_dims(embedding) as dimension
28
+ FROM ai_search_index
29
+ WHERE embedding IS NOT NULL
30
+ LIMIT 1
31
+ """
32
+ )
33
+ result = db.session.execute(query).fetchone()
34
+ if result and result[0]:
35
+ return result[0]
36
+ return None
37
+
38
+ except SQLAlchemyError as e:
39
+ logger.error("Failed to get current embedding dimension", error=str(e))
40
+ return None
41
+
42
+
43
+ def drop_all_embeddings() -> int:
44
+ """Drop all records from the ai_search_index table.
45
+
46
+ Returns:
47
+ Number of records deleted
48
+ """
49
+ try:
50
+ result = db.session.query(AiSearchIndex).delete()
51
+ db.session.commit()
52
+ logger.info(f"Deleted {result} records from ai_search_index")
53
+ return result
54
+
55
+ except SQLAlchemyError as e:
56
+ db.session.rollback()
57
+ logger.error("Failed to drop embeddings records", error=str(e))
58
+ raise
59
+
60
+
61
+ def alter_embedding_column_dimension(new_dimension: int) -> None:
62
+ """Alter the embedding column to use the new dimension size.
63
+
64
+ Args:
65
+ new_dimension: New vector dimension size
66
+ """
67
+ try:
68
+ drop_query = text("ALTER TABLE ai_search_index DROP COLUMN IF EXISTS embedding")
69
+ db.session.execute(drop_query)
70
+
71
+ add_query = text(f"ALTER TABLE ai_search_index ADD COLUMN embedding vector({new_dimension})")
72
+ db.session.execute(add_query)
73
+
74
+ db.session.commit()
75
+ logger.info(f"Altered embedding column to dimension {new_dimension}")
76
+
77
+ except SQLAlchemyError as e:
78
+ db.session.rollback()
79
+ logger.error("Failed to alter embedding column dimension", error=str(e))
80
+ raise
81
+
82
+
83
+ @app.command("resize")
84
+ def resize_embeddings_command() -> None:
85
+ """Resize vector dimensions of the ai_search_index embedding column.
86
+
87
+ Compares the current embedding dimension in the database with the configured
88
+ dimension in llm_settings. If they differ, drops all records and alters the
89
+ column to match the new dimension.
90
+ """
91
+ new_dimension = llm_settings.EMBEDDING_DIMENSION
92
+
93
+ logger.info("Starting embedding dimension resize", new_dimension=new_dimension)
94
+
95
+ current_dimension = get_current_embedding_dimension()
96
+
97
+ if current_dimension is None:
98
+ logger.warning("Could not determine current dimension for embedding column")
99
+
100
+ if current_dimension == new_dimension:
101
+ logger.info(
102
+ "Embedding dimensions match, no resize needed",
103
+ current_dimension=current_dimension,
104
+ new_dimension=new_dimension,
105
+ )
106
+ return
107
+
108
+ logger.info("Dimension mismatch detected", current_dimension=current_dimension, new_dimension=new_dimension)
109
+
110
+ if not typer.confirm("This will DELETE ALL RECORDS from ai_search_index and alter the embedding column. Continue?"):
111
+ logger.info("Operation cancelled by user")
112
+ return
113
+
114
+ try:
115
+ # Drop all records first.
116
+ logger.info("Dropping all embedding records...")
117
+ deleted_count = drop_all_embeddings()
118
+
119
+ # Then alter column dimension.
120
+ logger.info(f"Altering embedding column to dimension {new_dimension}...")
121
+ alter_embedding_column_dimension(new_dimension)
122
+
123
+ logger.info(
124
+ "Embedding dimension resize completed successfully",
125
+ records_deleted=deleted_count,
126
+ new_dimension=new_dimension,
127
+ )
128
+
129
+ except Exception as e:
130
+ logger.error("Embedding dimension resize failed", error=str(e))
131
+ raise typer.Exit(1)
132
+
133
+
134
+ if __name__ == "__main__":
135
+ app()
@@ -0,0 +1,208 @@
1
+ import asyncio
2
+
3
+ import structlog
4
+ import typer
5
+ from pydantic import ValidationError
6
+
7
+ from orchestrator.db import db
8
+ from orchestrator.search.core.types import EntityType, FilterOp, UIType
9
+ from orchestrator.search.filters import EqualityFilter, FilterTree, LtreeFilter, PathFilter
10
+ from orchestrator.search.retrieval import execute_search
11
+ from orchestrator.search.retrieval.utils import display_filtered_paths_only, display_results
12
+ from orchestrator.search.retrieval.validation import get_structured_filter_schema
13
+ from orchestrator.search.schemas.parameters import BaseSearchParameters
14
+
15
+ app = typer.Typer(help="Experiment with the subscription search indexes.")
16
+
17
+ logger = structlog.getLogger(__name__)
18
+
19
+
20
+ @app.command()
21
+ def structured(path: str, value: str, entity_type: EntityType = EntityType.SUBSCRIPTION, limit: int = 10) -> None:
22
+ """Finds subscriptions where a specific field path contains an exact value.
23
+
24
+ Example:
25
+ dotenv run python main.py search structured "subscription.status" "provisioning"
26
+ ...
27
+ {
28
+ "path": "subscription.status",
29
+ "value": "provisioning"
30
+ },
31
+ ...
32
+ """
33
+ path_filter = PathFilter(path=path, condition=EqualityFilter(op=FilterOp.EQ, value=value), value_kind=UIType.STRING)
34
+ search_params = BaseSearchParameters.create(
35
+ entity_type=entity_type, filters=FilterTree.from_flat_and([path_filter]), limit=limit
36
+ )
37
+ search_response = asyncio.run(execute_search(search_params=search_params, db_session=db.session))
38
+ display_filtered_paths_only(search_response.results, search_params, db.session)
39
+ display_results(search_response.results, db.session, "Match")
40
+
41
+
42
+ @app.command()
43
+ def semantic(query: str, entity_type: EntityType = EntityType.SUBSCRIPTION, limit: int = 10) -> None:
44
+ """Finds subscriptions that are conceptually most similar to the query text.
45
+
46
+ Example:
47
+ dotenv run python main.py search semantic "Shop for an alligator store"
48
+ ...
49
+ {
50
+ "path": "subscription.shop.shop_description",
51
+ "value": "Kingswood reptiles shop"
52
+ },
53
+ ...
54
+ """
55
+ search_params = BaseSearchParameters.create(entity_type=entity_type, query=query, limit=limit)
56
+ search_response = asyncio.run(execute_search(search_params=search_params, db_session=db.session))
57
+ display_results(search_response.results, db.session, "Distance")
58
+
59
+
60
+ @app.command()
61
+ def fuzzy(term: str, entity_type: EntityType = EntityType.SUBSCRIPTION, limit: int = 10) -> None:
62
+ """Finds subscriptions containing text similar to the query, tolerating typos.
63
+
64
+ Example:
65
+ dotenv run python main.py search fuzzy "Colonel"
66
+ ...
67
+ {
68
+ "path": "description",
69
+ "value": "X Follower WF for TimCoronel"
70
+ },
71
+ ...
72
+ """
73
+ search_params = BaseSearchParameters.create(entity_type=entity_type, query=term, limit=limit)
74
+ search_response = asyncio.run(execute_search(search_params=search_params, db_session=db.session))
75
+ display_results(search_response.results, db.session, "Similarity")
76
+
77
+
78
+ @app.command()
79
+ def hierarchical(
80
+ op: str = typer.Argument(..., help="The hierarchical operation to perform."),
81
+ path: str = typer.Argument(..., help="The ltree path or lquery pattern for the operation."),
82
+ query: str | None = typer.Option(None, "--query", "-f", help="An optional fuzzy term to rank the results."),
83
+ entity_type: EntityType = EntityType.SUBSCRIPTION,
84
+ limit: int = 10,
85
+ ) -> None:
86
+ """Performs a hierarchical search, optionally combined with fuzzy ranking.
87
+
88
+ Examples:
89
+ dotenv run python main.py search hierarchical is_descendant "subscription.shop" --query "Kingwood"
90
+ dotenv run python main.py search hierarchical matches_lquery "*.x_follower.x_follower_status*"
91
+ """
92
+ try:
93
+ condition = LtreeFilter(value=path, op=op) # type: ignore[arg-type]
94
+ except (ValueError, ValidationError) as e:
95
+ raise typer.BadParameter(f"Invalid filter: {e}")
96
+
97
+ path_filter = PathFilter(path="ltree_hierarchical_filter", condition=condition, value_kind=UIType.STRING)
98
+
99
+ search_params = BaseSearchParameters.create(
100
+ entity_type=entity_type, filters=FilterTree.from_flat_and([path_filter]), query=query, limit=limit
101
+ )
102
+ search_response = asyncio.run(execute_search(search_params=search_params, db_session=db.session))
103
+ display_results(search_response.results, db.session, "Hierarchical Score")
104
+
105
+
106
+ @app.command()
107
+ def hybrid(query: str, term: str, entity_type: EntityType = EntityType.SUBSCRIPTION, limit: int = 10) -> None:
108
+ """Performs a hybrid search, combining semantic and fuzzy matching.
109
+
110
+ Example:
111
+ dotenv run python main.py search hybrid "reptile store" "Kingswood"
112
+ """
113
+ search_params = BaseSearchParameters.create(entity_type=entity_type, query=query, limit=limit)
114
+ logger.info("Executing Hybrid Search", query=query, term=term)
115
+ search_response = asyncio.run(execute_search(search_params=search_params, db_session=db.session))
116
+ display_results(search_response.results, db.session, "Hybrid Score")
117
+
118
+
119
+ @app.command("generate-schema")
120
+ def generate_schema() -> None:
121
+ """Generates and prints the dynamic filter schema from the live search index.
122
+
123
+ This queries the index for all distinct non-string paths to be used as
124
+ context for the LLM agent.
125
+
126
+ Example:
127
+ dotenv run python main.py search generate-schema
128
+ """
129
+
130
+ schema_map = get_structured_filter_schema()
131
+
132
+ if not schema_map:
133
+ logger.warning("No filterable paths found in the search index.")
134
+ return
135
+
136
+ logger.info("\nAvailable Structured Filters:\n")
137
+ for path, value_type in schema_map.items():
138
+ logger.info(f"- {path}: {value_type}")
139
+
140
+ logger.info("Successfully generated dynamic schema.", path_count=len(schema_map))
141
+
142
+
143
+ @app.command("nested-demo")
144
+ def nested_demo(entity_type: EntityType = EntityType.SUBSCRIPTION, limit: int = 10) -> None:
145
+ tree = FilterTree.model_validate(
146
+ {
147
+ "op": "AND",
148
+ "children": [
149
+ {
150
+ "op": "OR",
151
+ "children": [
152
+ # First OR case: Active subscriptions from 2024
153
+ {
154
+ "op": "AND",
155
+ "children": [
156
+ {
157
+ "path": "subscription.status",
158
+ "condition": {"op": "eq", "value": "active"},
159
+ "value_kind": "string",
160
+ },
161
+ {
162
+ "path": "subscription.start_date",
163
+ "condition": {
164
+ "op": "between",
165
+ "value": {
166
+ "start": "2024-01-01T00:00:00Z",
167
+ "end": "2024-12-31T23:59:59Z",
168
+ },
169
+ },
170
+ "value_kind": "datetime",
171
+ },
172
+ ],
173
+ },
174
+ # Second OR case: Terminated subscriptions before 2026
175
+ {
176
+ "op": "AND",
177
+ "children": [
178
+ {
179
+ "path": "subscription.status",
180
+ "condition": {"op": "eq", "value": "terminated"},
181
+ "value_kind": "string",
182
+ },
183
+ {
184
+ "path": "subscription.end_date",
185
+ "condition": {"op": "lte", "value": "2025-12-31"},
186
+ "value_kind": "datetime",
187
+ },
188
+ ],
189
+ },
190
+ ],
191
+ },
192
+ {
193
+ "path": "subscription.*.port_mode",
194
+ "condition": {"op": "matches_lquery", "value": "*.port_mode"},
195
+ "value_kind": "string",
196
+ },
197
+ ],
198
+ }
199
+ )
200
+
201
+ params = BaseSearchParameters.create(entity_type=entity_type, filters=tree, limit=limit)
202
+ search_response = asyncio.run(execute_search(params, db.session))
203
+
204
+ display_results(search_response.results, db.session, "Score")
205
+
206
+
207
+ if __name__ == "__main__":
208
+ app()
@@ -0,0 +1,151 @@
1
+ import asyncio
2
+ import time
3
+ from typing import Any
4
+
5
+ import structlog
6
+ import typer
7
+ from rich.console import Console
8
+ from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
9
+ from rich.table import Table
10
+
11
+ from orchestrator.db import db
12
+ from orchestrator.search.core.embedding import QueryEmbedder
13
+ from orchestrator.search.core.types import EntityType
14
+ from orchestrator.search.core.validators import is_uuid
15
+ from orchestrator.search.retrieval.engine import execute_search
16
+ from orchestrator.search.retrieval.pagination import PaginationParams
17
+ from orchestrator.search.schemas.parameters import BaseSearchParameters
18
+
19
+ logger = structlog.get_logger(__name__)
20
+ console = Console()
21
+
22
+ app = typer.Typer(name="speedtest", help="Search speed testing")
23
+
24
+ DEFAULT_QUERIES = [
25
+ "network",
26
+ "fiber",
27
+ "port",
28
+ "network infrastructure",
29
+ "fiber connection",
30
+ "internet service",
31
+ "subscription",
32
+ "active",
33
+ "configuration",
34
+ "service provider",
35
+ ]
36
+
37
+
38
+ async def generate_embeddings_for_queries(queries: list[str]) -> dict[str, list[float]]:
39
+ embedding_lookup = {}
40
+
41
+ for query in queries:
42
+ try:
43
+ embedding = await QueryEmbedder.generate_for_text_async(query)
44
+ if embedding:
45
+ embedding_lookup[query] = embedding
46
+ else:
47
+ logger.warning("Failed to generate embedding for query", query=query)
48
+ except Exception as e:
49
+ logger.error("Error generating embedding", query=query, error=str(e))
50
+
51
+ return embedding_lookup
52
+
53
+
54
+ async def run_single_query(query: str, embedding_lookup: dict[str, list[float]]) -> dict[str, Any]:
55
+ search_params = BaseSearchParameters(entity_type=EntityType.SUBSCRIPTION, query=query, limit=30)
56
+
57
+ if is_uuid(query):
58
+ pagination_params = PaginationParams()
59
+ logger.debug("Using fuzzy-only ranking for full UUID", query=query)
60
+ else:
61
+
62
+ cached_embedding = embedding_lookup[query]
63
+ pagination_params = PaginationParams(q_vec_override=cached_embedding)
64
+
65
+ with db.session as session:
66
+ start_time = time.perf_counter()
67
+ response = await execute_search(search_params, session, pagination_params=pagination_params)
68
+ end_time = time.perf_counter()
69
+
70
+ return {
71
+ "query": query,
72
+ "time": end_time - start_time,
73
+ "results": len(response.results),
74
+ "search_type": response.metadata.search_type if hasattr(response, "metadata") else "unknown",
75
+ }
76
+
77
+
78
+ @app.command()
79
+ def quick(
80
+ queries: list[str] | None = typer.Option(None, "--query", "-q", help="Custom queries to test"),
81
+ ) -> None:
82
+ test_queries = queries if queries else DEFAULT_QUERIES
83
+
84
+ console.print(f"[bold blue]Quick Speed Test[/bold blue] - Testing {len(test_queries)} queries")
85
+
86
+ async def run_tests() -> list[dict[str, Any]]:
87
+ embedding_lookup = await generate_embeddings_for_queries(test_queries)
88
+
89
+ results = []
90
+
91
+ with Progress(
92
+ SpinnerColumn(),
93
+ TextColumn("[progress.description]{task.description}"),
94
+ TimeElapsedColumn(),
95
+ console=console,
96
+ ) as progress:
97
+ task = progress.add_task("Running queries...", total=len(test_queries))
98
+
99
+ for query in test_queries:
100
+ result = await run_single_query(query, embedding_lookup)
101
+ results.append(result)
102
+ progress.advance(task)
103
+
104
+ return results
105
+
106
+ results = asyncio.run(run_tests())
107
+
108
+ table = Table(show_header=True, header_style="bold magenta")
109
+ table.add_column("Query", style="dim", width=25)
110
+ table.add_column("Time", justify="right", style="cyan")
111
+ table.add_column("Type", justify="center", style="yellow")
112
+ table.add_column("Results", justify="right", style="green")
113
+
114
+ total_time = 0
115
+
116
+ for result in results:
117
+ time_ms = result["time"] * 1000
118
+ total_time += result["time"]
119
+
120
+ table.add_row(
121
+ result["query"][:24] + "..." if len(result["query"]) > 24 else result["query"],
122
+ f"{time_ms:.1f}ms",
123
+ result["search_type"],
124
+ str(result["results"]),
125
+ )
126
+
127
+ console.print(table)
128
+ console.print()
129
+
130
+ avg_time = total_time / len(results) * 1000
131
+ max_time = max(r["time"] for r in results) * 1000
132
+
133
+ console.print("[bold]Summary:[/bold]")
134
+ console.print(f" Total time: {total_time * 1000:.1f}ms")
135
+ console.print(f" Average: {avg_time:.1f}ms")
136
+ console.print(f" Slowest: {max_time:.1f}ms")
137
+
138
+ by_type: dict[str, list[float]] = {}
139
+ for result in results:
140
+ search_type = result["search_type"]
141
+ if search_type not in by_type:
142
+ by_type[search_type] = []
143
+ by_type[search_type].append(result["time"] * 1000)
144
+
145
+ for search_type, times in by_type.items():
146
+ avg = sum(times) / len(times)
147
+ console.print(f" {search_type.capitalize()}: {avg:.1f}ms avg ({len(times)} queries)")
148
+
149
+
150
+ if __name__ == "__main__":
151
+ app()
orchestrator/db/models.py CHANGED
@@ -20,7 +20,9 @@ from uuid import UUID
20
20
  import sqlalchemy
21
21
  import structlog
22
22
  from more_itertools import first_true
23
+ from pgvector.sqlalchemy import Vector
23
24
  from sqlalchemy import (
25
+ TEXT,
24
26
  TIMESTAMP,
25
27
  Boolean,
26
28
  CheckConstraint,
@@ -29,6 +31,7 @@ from sqlalchemy import (
29
31
  ForeignKey,
30
32
  Index,
31
33
  Integer,
34
+ PrimaryKeyConstraint,
32
35
  Select,
33
36
  String,
34
37
  Table,
@@ -45,10 +48,12 @@ from sqlalchemy.ext.associationproxy import association_proxy
45
48
  from sqlalchemy.ext.orderinglist import ordering_list
46
49
  from sqlalchemy.orm import Mapped, deferred, mapped_column, object_session, relationship, undefer
47
50
  from sqlalchemy.sql.functions import GenericFunction
48
- from sqlalchemy_utils import TSVectorType, UUIDType
51
+ from sqlalchemy_utils import LtreeType, TSVectorType, UUIDType
49
52
 
50
53
  from orchestrator.config.assignee import Assignee
51
54
  from orchestrator.db.database import BaseModel, SearchQuery
55
+ from orchestrator.llm_settings import llm_settings
56
+ from orchestrator.search.core.types import FieldType
52
57
  from orchestrator.targets import Target
53
58
  from orchestrator.utils.datetime import nowtz
54
59
  from orchestrator.version import GIT_COMMIT_HASH
@@ -685,3 +690,34 @@ class SubscriptionInstanceAsJsonFunction(GenericFunction):
685
690
 
686
691
  def __init__(self, sub_inst_id: UUID):
687
692
  super().__init__(sub_inst_id)
693
+
694
+
695
+ class AiSearchIndex(BaseModel):
696
+
697
+ __tablename__ = "ai_search_index"
698
+
699
+ entity_type = mapped_column(
700
+ TEXT,
701
+ nullable=False,
702
+ index=True,
703
+ )
704
+ entity_id = mapped_column(
705
+ UUIDType,
706
+ nullable=False,
707
+ )
708
+
709
+ # Ltree path for hierarchical data
710
+ path = mapped_column(LtreeType, nullable=False, index=True)
711
+ value = mapped_column(TEXT, nullable=False)
712
+
713
+ value_type = mapped_column(
714
+ Enum(FieldType, name="field_type", values_callable=lambda obj: [e.value for e in obj]), nullable=False
715
+ )
716
+
717
+ # Embedding
718
+ embedding = mapped_column(Vector(llm_settings.EMBEDDING_DIMENSION), nullable=True)
719
+
720
+ # SHA-256
721
+ content_hash = mapped_column(String(64), nullable=False, index=True)
722
+
723
+ __table_args__ = (PrimaryKeyConstraint("entity_id", "path", name="pk_ai_search_index"),)
@@ -371,6 +371,22 @@ class Populator:
371
371
  self.log.info("Started modify workflow")
372
372
  return self._start_workflow(workflow_name, subscription_id=subscription_id, **kwargs)
373
373
 
374
+ def start_reconcile_workflow(self, workflow_name: str, subscription_id: UUIDstr | UUID, **kwargs: Any) -> UUIDstr:
375
+ """Start a reconcile workflow for the provided name and subscription_id.
376
+
377
+ Args:
378
+ workflow_name: workflow name
379
+ subscription_id: uuid of the subscription you want to modify
380
+ kwargs: values to be used as form input
381
+
382
+ Returns: the process_id of the workflow process
383
+
384
+ """
385
+ subscription_id = str(subscription_id)
386
+ self.log = self.log.bind(subscription_id=subscription_id)
387
+ self.log.info("Started reconcile workflow")
388
+ return self._start_workflow(workflow_name, subscription_id=subscription_id, **kwargs)
389
+
374
390
  def start_verify_workflow(self, workflow_name: str, subscription_id: UUIDstr | UUID) -> UUIDstr:
375
391
  subscription_id = str(subscription_id)
376
392
  self.log = self.log.bind(subscription_id=subscription_id)
@@ -614,9 +614,7 @@ class ProductBlockModel(DomainModel):
614
614
  product_blocks_in_model = cls._get_depends_on_product_block_types()
615
615
  product_blocks_types_in_model = get_depends_on_product_block_type_list(product_blocks_in_model)
616
616
 
617
- product_blocks_in_model = set(
618
- flatten(map(attrgetter("__names__"), product_blocks_types_in_model))
619
- ) # type: ignore
617
+ product_blocks_in_model = set(flatten(map(attrgetter("__names__"), product_blocks_types_in_model))) # type: ignore
620
618
 
621
619
  missing_product_blocks_in_db = product_blocks_in_model - product_blocks_in_db # type: ignore
622
620
  missing_product_blocks_in_model = product_blocks_in_db - product_blocks_in_model # type: ignore
@@ -1084,9 +1082,7 @@ class SubscriptionModel(DomainModel):
1084
1082
  product_blocks_in_model = cls._get_depends_on_product_block_types()
1085
1083
  product_blocks_types_in_model = get_depends_on_product_block_type_list(product_blocks_in_model)
1086
1084
 
1087
- product_blocks_in_model = set(
1088
- flatten(map(attrgetter("__names__"), product_blocks_types_in_model))
1089
- ) # type: ignore
1085
+ product_blocks_in_model = set(flatten(map(attrgetter("__names__"), product_blocks_types_in_model))) # type: ignore
1090
1086
 
1091
1087
  missing_product_blocks_in_db = product_blocks_in_model - product_blocks_in_db # type: ignore
1092
1088
  missing_product_blocks_in_model = product_blocks_in_db - product_blocks_in_model # type: ignore
@@ -1294,7 +1290,6 @@ class SubscriptionModel(DomainModel):
1294
1290
  # Some common functions shared by from_other_product and from_subscription
1295
1291
  @classmethod
1296
1292
  def _get_subscription(cls: type[S], subscription_id: UUID | UUIDstr) -> SubscriptionTable | None:
1297
-
1298
1293
  if not isinstance(subscription_id, UUID | UUIDstr):
1299
1294
  raise TypeError(f"subscription_id is of type {type(subscription_id)} instead of UUID | UUIDstr")
1300
1295
 
@@ -16,11 +16,16 @@ from typing import TYPE_CHECKING, TypeVar
16
16
  import strawberry
17
17
  import structlog
18
18
 
19
+ from orchestrator.settings import LifecycleValidationMode, app_settings
19
20
  from orchestrator.types import SubscriptionLifecycle
20
21
  from pydantic_forms.types import strEnum
21
22
 
22
23
  if TYPE_CHECKING:
23
- from orchestrator.domain.base import DomainModel
24
+ from orchestrator.domain.base import DomainModel, SubscriptionModel
25
+ else:
26
+ SubscriptionModel = None
27
+ DomainModel = None
28
+ T = TypeVar("T", bound=SubscriptionModel)
24
29
 
25
30
  logger = structlog.get_logger(__name__)
26
31
 
@@ -71,9 +76,21 @@ def validate_lifecycle_status(
71
76
  )
72
77
 
73
78
 
74
- if TYPE_CHECKING:
75
- from orchestrator.domain.base import DomainModel, SubscriptionModel
76
- else:
77
- SubscriptionModel = None
78
- DomainModel = None
79
- T = TypeVar("T", bound=SubscriptionModel)
79
+ def validate_subscription_model_product_type(
80
+ subscription: SubscriptionModel,
81
+ validation_mode: LifecycleValidationMode = app_settings.LIFECYCLE_VALIDATION_MODE,
82
+ ) -> None:
83
+ """Validate that a subscription model has been instantiated with the correct product type class for its lifecycle status."""
84
+
85
+ actual_class = subscription.__class__
86
+ expected_class = lookup_specialized_type(actual_class, subscription.status)
87
+
88
+ if actual_class != expected_class:
89
+ msg = f"Subscription of type {actual_class} should use {expected_class} for lifecycle status '{subscription.status}'"
90
+ if validation_mode == LifecycleValidationMode.STRICT:
91
+ logger.error(msg)
92
+ raise ValueError(msg)
93
+ if validation_mode == LifecycleValidationMode.LOOSE:
94
+ logger.warning(msg)
95
+ elif validation_mode == LifecycleValidationMode.IGNORED:
96
+ pass