orchestrator-core 4.4.0rc2__py3-none-any.whl → 5.0.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orchestrator/__init__.py +1 -1
- orchestrator/api/api_v1/api.py +7 -0
- orchestrator/api/api_v1/endpoints/agent.py +62 -0
- orchestrator/api/api_v1/endpoints/processes.py +6 -12
- orchestrator/api/api_v1/endpoints/search.py +197 -0
- orchestrator/api/api_v1/endpoints/subscriptions.py +0 -1
- orchestrator/app.py +4 -0
- orchestrator/cli/index_llm.py +73 -0
- orchestrator/cli/main.py +8 -1
- orchestrator/cli/resize_embedding.py +136 -0
- orchestrator/cli/scheduler.py +29 -40
- orchestrator/cli/search_explore.py +203 -0
- orchestrator/db/models.py +37 -1
- orchestrator/graphql/schema.py +0 -5
- orchestrator/graphql/schemas/process.py +2 -2
- orchestrator/graphql/utils/create_resolver_error_handler.py +1 -1
- orchestrator/migrations/versions/schema/2025-08-12_52b37b5b2714_search_index_model_for_llm_integration.py +95 -0
- orchestrator/schedules/__init__.py +2 -1
- orchestrator/schedules/resume_workflows.py +2 -2
- orchestrator/schedules/scheduling.py +24 -64
- orchestrator/schedules/task_vacuum.py +2 -2
- orchestrator/schedules/validate_products.py +2 -8
- orchestrator/schedules/validate_subscriptions.py +2 -2
- orchestrator/schemas/search.py +101 -0
- orchestrator/search/__init__.py +0 -0
- orchestrator/search/agent/__init__.py +1 -0
- orchestrator/search/agent/prompts.py +62 -0
- orchestrator/search/agent/state.py +8 -0
- orchestrator/search/agent/tools.py +122 -0
- orchestrator/search/core/__init__.py +0 -0
- orchestrator/search/core/embedding.py +64 -0
- orchestrator/search/core/exceptions.py +16 -0
- orchestrator/search/core/types.py +162 -0
- orchestrator/search/core/validators.py +27 -0
- orchestrator/search/docs/index.md +37 -0
- orchestrator/search/docs/running_local_text_embedding_inference.md +45 -0
- orchestrator/search/filters/__init__.py +27 -0
- orchestrator/search/filters/base.py +236 -0
- orchestrator/search/filters/date_filters.py +75 -0
- orchestrator/search/filters/definitions.py +76 -0
- orchestrator/search/filters/ltree_filters.py +31 -0
- orchestrator/search/filters/numeric_filter.py +60 -0
- orchestrator/search/indexing/__init__.py +3 -0
- orchestrator/search/indexing/indexer.py +316 -0
- orchestrator/search/indexing/registry.py +88 -0
- orchestrator/search/indexing/tasks.py +53 -0
- orchestrator/search/indexing/traverse.py +209 -0
- orchestrator/search/retrieval/__init__.py +3 -0
- orchestrator/search/retrieval/builder.py +64 -0
- orchestrator/search/retrieval/engine.py +96 -0
- orchestrator/search/retrieval/ranker.py +202 -0
- orchestrator/search/retrieval/utils.py +88 -0
- orchestrator/search/retrieval/validation.py +174 -0
- orchestrator/search/schemas/__init__.py +0 -0
- orchestrator/search/schemas/parameters.py +114 -0
- orchestrator/search/schemas/results.py +47 -0
- orchestrator/services/processes.py +11 -16
- orchestrator/services/subscriptions.py +0 -4
- orchestrator/settings.py +29 -1
- orchestrator/targets.py +0 -1
- orchestrator/workflow.py +1 -8
- orchestrator/workflows/utils.py +1 -48
- {orchestrator_core-4.4.0rc2.dist-info → orchestrator_core-5.0.0a1.dist-info}/METADATA +6 -3
- {orchestrator_core-4.4.0rc2.dist-info → orchestrator_core-5.0.0a1.dist-info}/RECORD +66 -30
- orchestrator/graphql/resolvers/scheduled_tasks.py +0 -36
- orchestrator/graphql/schemas/scheduled_task.py +0 -8
- orchestrator/schedules/scheduler.py +0 -163
- {orchestrator_core-4.4.0rc2.dist-info → orchestrator_core-5.0.0a1.dist-info}/WHEEL +0 -0
- {orchestrator_core-4.4.0rc2.dist-info → orchestrator_core-5.0.0a1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
|
|
3
|
+
import structlog
|
|
4
|
+
import typer
|
|
5
|
+
from pydantic import ValidationError
|
|
6
|
+
|
|
7
|
+
from orchestrator.db import db
|
|
8
|
+
from orchestrator.search.core.types import EntityType, FilterOp
|
|
9
|
+
from orchestrator.search.filters import EqualityFilter, FilterTree, LtreeFilter, PathFilter
|
|
10
|
+
from orchestrator.search.retrieval import execute_search
|
|
11
|
+
from orchestrator.search.retrieval.utils import display_filtered_paths_only, display_results
|
|
12
|
+
from orchestrator.search.retrieval.validation import get_structured_filter_schema
|
|
13
|
+
from orchestrator.search.schemas.parameters import BaseSearchParameters
|
|
14
|
+
|
|
15
|
+
app = typer.Typer(help="Experiment with the subscription search indexes.")
|
|
16
|
+
|
|
17
|
+
logger = structlog.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@app.command()
|
|
21
|
+
def structured(path: str, value: str, entity_type: EntityType = EntityType.SUBSCRIPTION, limit: int = 10) -> None:
|
|
22
|
+
"""Finds subscriptions where a specific field path contains an exact value.
|
|
23
|
+
|
|
24
|
+
Example:
|
|
25
|
+
dotenv run python main.py search structured "subscription.status" "provisioning"
|
|
26
|
+
...
|
|
27
|
+
{
|
|
28
|
+
"path": "subscription.status",
|
|
29
|
+
"value": "provisioning"
|
|
30
|
+
},
|
|
31
|
+
...
|
|
32
|
+
"""
|
|
33
|
+
path_filter = PathFilter(path=path, condition=EqualityFilter(op=FilterOp.EQ, value=value))
|
|
34
|
+
search_params = BaseSearchParameters.create(
|
|
35
|
+
entity_type=entity_type, filters=FilterTree.from_flat_and([path_filter])
|
|
36
|
+
)
|
|
37
|
+
results = asyncio.run(execute_search(search_params=search_params, db_session=db.session, limit=limit))
|
|
38
|
+
display_filtered_paths_only(results, search_params, db.session)
|
|
39
|
+
display_results(results, db.session, "Match")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@app.command()
|
|
43
|
+
def semantic(query: str, entity_type: EntityType = EntityType.SUBSCRIPTION, limit: int = 10) -> None:
|
|
44
|
+
"""Finds subscriptions that are conceptually most similar to the query text.
|
|
45
|
+
|
|
46
|
+
Example:
|
|
47
|
+
dotenv run python main.py search semantic "Shop for an alligator store"
|
|
48
|
+
...
|
|
49
|
+
{
|
|
50
|
+
"path": "subscription.shop.shop_description",
|
|
51
|
+
"value": "Kingswood reptiles shop"
|
|
52
|
+
},
|
|
53
|
+
...
|
|
54
|
+
"""
|
|
55
|
+
search_params = BaseSearchParameters.create(entity_type=entity_type, query=query)
|
|
56
|
+
results = asyncio.run(execute_search(search_params=search_params, db_session=db.session, limit=limit))
|
|
57
|
+
display_results(results, db.session, "Distance")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@app.command()
|
|
61
|
+
def fuzzy(term: str, entity_type: EntityType = EntityType.SUBSCRIPTION, limit: int = 10) -> None:
|
|
62
|
+
"""Finds subscriptions containing text similar to the query, tolerating typos.
|
|
63
|
+
|
|
64
|
+
Example:
|
|
65
|
+
dotenv run python main.py search fuzzy "Colonel"
|
|
66
|
+
...
|
|
67
|
+
{
|
|
68
|
+
"path": "description",
|
|
69
|
+
"value": "X Follower WF for TimCoronel"
|
|
70
|
+
},
|
|
71
|
+
...
|
|
72
|
+
"""
|
|
73
|
+
search_params = BaseSearchParameters.create(entity_type=entity_type, query=term)
|
|
74
|
+
results = asyncio.run(execute_search(search_params=search_params, db_session=db.session, limit=limit))
|
|
75
|
+
display_results(results, db.session, "Similarity")
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@app.command()
|
|
79
|
+
def hierarchical(
|
|
80
|
+
op: str = typer.Argument(..., help="The hierarchical operation to perform."),
|
|
81
|
+
path: str = typer.Argument(..., help="The ltree path or lquery pattern for the operation."),
|
|
82
|
+
query: str | None = typer.Option(None, "--query", "-f", help="An optional fuzzy term to rank the results."),
|
|
83
|
+
entity_type: EntityType = EntityType.SUBSCRIPTION,
|
|
84
|
+
limit: int = 10,
|
|
85
|
+
) -> None:
|
|
86
|
+
"""Performs a hierarchical search, optionally combined with fuzzy ranking.
|
|
87
|
+
|
|
88
|
+
Examples:
|
|
89
|
+
dotenv run python main.py search hierarchical is_descendant "subscription.shop" --query "Kingwood"
|
|
90
|
+
dotenv run python main.py search hierarchical matches_lquery "*.x_follower.x_follower_status*"
|
|
91
|
+
"""
|
|
92
|
+
try:
|
|
93
|
+
condition = LtreeFilter(value=path, op=op) # type: ignore[arg-type]
|
|
94
|
+
except (ValueError, ValidationError) as e:
|
|
95
|
+
raise typer.BadParameter(f"Invalid filter: {e}")
|
|
96
|
+
|
|
97
|
+
path_filter = PathFilter(path="ltree_hierarchical_filter", condition=condition)
|
|
98
|
+
|
|
99
|
+
search_params = BaseSearchParameters.create(
|
|
100
|
+
entity_type=entity_type, filters=[FilterTree.from_flat_and([path_filter])], query=query
|
|
101
|
+
)
|
|
102
|
+
results = asyncio.run(execute_search(search_params=search_params, db_session=db.session, limit=limit))
|
|
103
|
+
display_results(results, db.session, "Hierarchical Score")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@app.command()
|
|
107
|
+
def hybrid(query: str, term: str, entity_type: EntityType = EntityType.SUBSCRIPTION, limit: int = 10) -> None:
|
|
108
|
+
"""Performs a hybrid search, combining semantic and fuzzy matching.
|
|
109
|
+
|
|
110
|
+
Example:
|
|
111
|
+
dotenv run python main.py search hybrid "reptile store" "Kingswood"
|
|
112
|
+
"""
|
|
113
|
+
search_params = BaseSearchParameters.create(entity_type=entity_type, query=query)
|
|
114
|
+
logger.info("Executing Hybrid Search", query=query, term=term)
|
|
115
|
+
results = asyncio.run(execute_search(search_params=search_params, db_session=db.session, limit=limit))
|
|
116
|
+
display_results(results, db.session, "Hybrid Score")
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@app.command("generate-schema")
|
|
120
|
+
def generate_schema() -> None:
|
|
121
|
+
"""Generates and prints the dynamic filter schema from the live search index.
|
|
122
|
+
|
|
123
|
+
This queries the index for all distinct non-string paths to be used as
|
|
124
|
+
context for the LLM agent.
|
|
125
|
+
|
|
126
|
+
Example:
|
|
127
|
+
dotenv run python main.py search generate-schema
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
schema_map = get_structured_filter_schema()
|
|
131
|
+
|
|
132
|
+
if not schema_map:
|
|
133
|
+
logger.warning("No filterable paths found in the search index.")
|
|
134
|
+
return
|
|
135
|
+
|
|
136
|
+
logger.info("\nAvailable Structured Filters:\n")
|
|
137
|
+
for path, value_type in schema_map.items():
|
|
138
|
+
logger.info(f"- {path}: {value_type}")
|
|
139
|
+
|
|
140
|
+
logger.info("Successfully generated dynamic schema.", path_count=len(schema_map))
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
@app.command("nested-demo")
|
|
144
|
+
def nested_demo(entity_type: EntityType = EntityType.SUBSCRIPTION, limit: int = 10) -> None:
|
|
145
|
+
tree = FilterTree.model_validate(
|
|
146
|
+
{
|
|
147
|
+
"op": "AND",
|
|
148
|
+
"children": [
|
|
149
|
+
{
|
|
150
|
+
"op": "OR",
|
|
151
|
+
"children": [
|
|
152
|
+
# First OR case: Active subscriptions from 2024
|
|
153
|
+
{
|
|
154
|
+
"op": "AND",
|
|
155
|
+
"children": [
|
|
156
|
+
{
|
|
157
|
+
"path": "subscription.status",
|
|
158
|
+
"condition": {"op": "eq", "value": "active"},
|
|
159
|
+
},
|
|
160
|
+
{
|
|
161
|
+
"path": "subscription.start_date",
|
|
162
|
+
"condition": {
|
|
163
|
+
"op": "between",
|
|
164
|
+
"value": {
|
|
165
|
+
"start": "2024-01-01T00:00:00Z",
|
|
166
|
+
"end": "2024-12-31T23:59:59Z",
|
|
167
|
+
},
|
|
168
|
+
},
|
|
169
|
+
},
|
|
170
|
+
],
|
|
171
|
+
},
|
|
172
|
+
# Second OR case: Terminated subscriptions before 2026
|
|
173
|
+
{
|
|
174
|
+
"op": "AND",
|
|
175
|
+
"children": [
|
|
176
|
+
{
|
|
177
|
+
"path": "subscription.status",
|
|
178
|
+
"condition": {"op": "eq", "value": "terminated"},
|
|
179
|
+
},
|
|
180
|
+
{
|
|
181
|
+
"path": "subscription.end_date",
|
|
182
|
+
"condition": {"op": "lte", "value": "2025-12-31"},
|
|
183
|
+
},
|
|
184
|
+
],
|
|
185
|
+
},
|
|
186
|
+
],
|
|
187
|
+
},
|
|
188
|
+
{
|
|
189
|
+
"path": "subscription.*.port_mode",
|
|
190
|
+
"condition": {"op": "matches_lquery", "value": "*.port_mode"},
|
|
191
|
+
},
|
|
192
|
+
],
|
|
193
|
+
}
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
params = BaseSearchParameters.create(entity_type=entity_type, filters=tree)
|
|
197
|
+
results = asyncio.run(execute_search(params, db.session, limit=limit))
|
|
198
|
+
|
|
199
|
+
display_results(results, db.session, "Score")
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
if __name__ == "__main__":
|
|
203
|
+
app()
|
orchestrator/db/models.py
CHANGED
|
@@ -20,7 +20,9 @@ from uuid import UUID
|
|
|
20
20
|
import sqlalchemy
|
|
21
21
|
import structlog
|
|
22
22
|
from more_itertools import first_true
|
|
23
|
+
from pgvector.sqlalchemy import Vector
|
|
23
24
|
from sqlalchemy import (
|
|
25
|
+
TEXT,
|
|
24
26
|
TIMESTAMP,
|
|
25
27
|
Boolean,
|
|
26
28
|
CheckConstraint,
|
|
@@ -29,6 +31,7 @@ from sqlalchemy import (
|
|
|
29
31
|
ForeignKey,
|
|
30
32
|
Index,
|
|
31
33
|
Integer,
|
|
34
|
+
PrimaryKeyConstraint,
|
|
32
35
|
Select,
|
|
33
36
|
String,
|
|
34
37
|
Table,
|
|
@@ -45,10 +48,12 @@ from sqlalchemy.ext.associationproxy import association_proxy
|
|
|
45
48
|
from sqlalchemy.ext.orderinglist import ordering_list
|
|
46
49
|
from sqlalchemy.orm import Mapped, deferred, mapped_column, object_session, relationship, undefer
|
|
47
50
|
from sqlalchemy.sql.functions import GenericFunction
|
|
48
|
-
from sqlalchemy_utils import TSVectorType, UUIDType
|
|
51
|
+
from sqlalchemy_utils import LtreeType, TSVectorType, UUIDType
|
|
49
52
|
|
|
50
53
|
from orchestrator.config.assignee import Assignee
|
|
51
54
|
from orchestrator.db.database import BaseModel, SearchQuery
|
|
55
|
+
from orchestrator.search.core.types import FieldType
|
|
56
|
+
from orchestrator.settings import app_settings
|
|
52
57
|
from orchestrator.targets import Target
|
|
53
58
|
from orchestrator.utils.datetime import nowtz
|
|
54
59
|
from orchestrator.version import GIT_COMMIT_HASH
|
|
@@ -685,3 +690,34 @@ class SubscriptionInstanceAsJsonFunction(GenericFunction):
|
|
|
685
690
|
|
|
686
691
|
def __init__(self, sub_inst_id: UUID):
|
|
687
692
|
super().__init__(sub_inst_id)
|
|
693
|
+
|
|
694
|
+
|
|
695
|
+
class AiSearchIndex(BaseModel):
|
|
696
|
+
|
|
697
|
+
__tablename__ = "ai_search_index"
|
|
698
|
+
|
|
699
|
+
entity_type = mapped_column(
|
|
700
|
+
TEXT,
|
|
701
|
+
nullable=False,
|
|
702
|
+
index=True,
|
|
703
|
+
)
|
|
704
|
+
entity_id = mapped_column(
|
|
705
|
+
UUIDType,
|
|
706
|
+
nullable=False,
|
|
707
|
+
)
|
|
708
|
+
|
|
709
|
+
# Ltree path for hierarchical data
|
|
710
|
+
path = mapped_column(LtreeType, nullable=False, index=True)
|
|
711
|
+
value = mapped_column(TEXT, nullable=False)
|
|
712
|
+
|
|
713
|
+
value_type = mapped_column(
|
|
714
|
+
Enum(FieldType, name="field_type", values_callable=lambda obj: [e.value for e in obj]), nullable=False
|
|
715
|
+
)
|
|
716
|
+
|
|
717
|
+
# Embedding
|
|
718
|
+
embedding = mapped_column(Vector(app_settings.EMBEDDING_DIMENSION), nullable=True)
|
|
719
|
+
|
|
720
|
+
# SHA-256
|
|
721
|
+
content_hash = mapped_column(String(64), nullable=False, index=True)
|
|
722
|
+
|
|
723
|
+
__table_args__ = (PrimaryKeyConstraint("entity_id", "path", name="pk_ai_search_index"),)
|
orchestrator/graphql/schema.py
CHANGED
|
@@ -51,14 +51,12 @@ from orchestrator.graphql.resolvers import (
|
|
|
51
51
|
resolve_version,
|
|
52
52
|
resolve_workflows,
|
|
53
53
|
)
|
|
54
|
-
from orchestrator.graphql.resolvers.scheduled_tasks import resolve_scheduled_tasks
|
|
55
54
|
from orchestrator.graphql.schemas import DEFAULT_GRAPHQL_MODELS
|
|
56
55
|
from orchestrator.graphql.schemas.customer import CustomerType
|
|
57
56
|
from orchestrator.graphql.schemas.process import ProcessType
|
|
58
57
|
from orchestrator.graphql.schemas.product import ProductType
|
|
59
58
|
from orchestrator.graphql.schemas.product_block import ProductBlock
|
|
60
59
|
from orchestrator.graphql.schemas.resource_type import ResourceType
|
|
61
|
-
from orchestrator.graphql.schemas.scheduled_task import ScheduledTaskGraphql
|
|
62
60
|
from orchestrator.graphql.schemas.settings import StatusType
|
|
63
61
|
from orchestrator.graphql.schemas.subscription import SubscriptionInterface
|
|
64
62
|
from orchestrator.graphql.schemas.version import VersionType
|
|
@@ -101,9 +99,6 @@ class OrchestratorQuery:
|
|
|
101
99
|
description="Returns information about cache, workers, and global engine settings",
|
|
102
100
|
)
|
|
103
101
|
version: VersionType = authenticated_field(resolver=resolve_version, description="Returns version information")
|
|
104
|
-
scheduled_tasks: Connection[ScheduledTaskGraphql] = authenticated_field(
|
|
105
|
-
resolver=resolve_scheduled_tasks, description="Returns scheduled job information"
|
|
106
|
-
)
|
|
107
102
|
|
|
108
103
|
|
|
109
104
|
@strawberry.federation.type(description="Orchestrator customer Query")
|
|
@@ -6,7 +6,7 @@ from strawberry.federation.schema_directives import Key
|
|
|
6
6
|
from strawberry.scalars import JSON
|
|
7
7
|
|
|
8
8
|
from oauth2_lib.strawberry import authenticated_field
|
|
9
|
-
from orchestrator.api.api_v1.endpoints.processes import get_auth_callbacks,
|
|
9
|
+
from orchestrator.api.api_v1.endpoints.processes import get_auth_callbacks, get_current_steps
|
|
10
10
|
from orchestrator.db import ProcessTable, ProductTable, db
|
|
11
11
|
from orchestrator.graphql.pagination import EMPTY_PAGE, Connection
|
|
12
12
|
from orchestrator.graphql.schemas.customer import CustomerType
|
|
@@ -86,7 +86,7 @@ class ProcessType:
|
|
|
86
86
|
oidc_user = info.context.get_current_user
|
|
87
87
|
workflow = get_workflow(self.workflow_name)
|
|
88
88
|
process = load_process(db.session.get(ProcessTable, self.process_id)) # type: ignore[arg-type]
|
|
89
|
-
auth_resume, auth_retry = get_auth_callbacks(
|
|
89
|
+
auth_resume, auth_retry = get_auth_callbacks(get_current_steps(process), workflow) # type: ignore[arg-type]
|
|
90
90
|
|
|
91
91
|
return FormUserPermissionsType(
|
|
92
92
|
retryAllowed=auth_retry and auth_retry(oidc_user), # type: ignore[arg-type]
|
|
@@ -25,6 +25,6 @@ def _format_context(context: dict) -> str:
|
|
|
25
25
|
|
|
26
26
|
def create_resolver_error_handler(info: OrchestratorInfo) -> CallableErrorHandler:
|
|
27
27
|
def handle_error(message: str, **context) -> None: # type: ignore
|
|
28
|
-
return register_error(
|
|
28
|
+
return register_error(" ".join([message, _format_context(context)]), info, error_type=ErrorType.BAD_REQUEST)
|
|
29
29
|
|
|
30
30
|
return handle_error
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""Search index model for llm integration.
|
|
2
|
+
|
|
3
|
+
Revision ID: 52b37b5b2714
|
|
4
|
+
Revises: 850dccac3b02
|
|
5
|
+
Create Date: 2025-08-12 22:34:26.694750
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import sqlalchemy as sa
|
|
10
|
+
from alembic import op
|
|
11
|
+
from pgvector.sqlalchemy import Vector
|
|
12
|
+
from sqlalchemy.dialects import postgresql
|
|
13
|
+
from sqlalchemy_utils import LtreeType
|
|
14
|
+
|
|
15
|
+
from orchestrator.search.core.types import FieldType
|
|
16
|
+
|
|
17
|
+
# revision identifiers, used by Alembic.
|
|
18
|
+
revision = "52b37b5b2714"
|
|
19
|
+
down_revision = "850dccac3b02"
|
|
20
|
+
branch_labels = None
|
|
21
|
+
depends_on = None
|
|
22
|
+
|
|
23
|
+
TABLE = "ai_search_index"
|
|
24
|
+
IDX_EMBED_HNSW = "ix_flat_embed_hnsw"
|
|
25
|
+
IDX_PATH_GIST = "ix_flat_path_gist"
|
|
26
|
+
IDX_PATH_BTREE = "ix_flat_path_btree"
|
|
27
|
+
IDX_VALUE_TRGM = "ix_flat_value_trgm"
|
|
28
|
+
IDX_CONTENT_HASH = "idx_ai_search_index_content_hash"
|
|
29
|
+
|
|
30
|
+
TARGET_DIM = 1536
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def upgrade() -> None:
|
|
34
|
+
# Create PostgreSQL extensions
|
|
35
|
+
op.execute("CREATE EXTENSION IF NOT EXISTS ltree;")
|
|
36
|
+
op.execute("CREATE EXTENSION IF NOT EXISTS unaccent;")
|
|
37
|
+
op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm;")
|
|
38
|
+
op.execute("CREATE EXTENSION IF NOT EXISTS vector;")
|
|
39
|
+
|
|
40
|
+
# Create the ai_search_index table
|
|
41
|
+
op.create_table(
|
|
42
|
+
TABLE,
|
|
43
|
+
sa.Column("entity_type", sa.Text, nullable=False),
|
|
44
|
+
sa.Column("entity_id", postgresql.UUID, nullable=False),
|
|
45
|
+
sa.Column("path", LtreeType, nullable=False),
|
|
46
|
+
sa.Column("value", sa.Text, nullable=False),
|
|
47
|
+
sa.Column("embedding", Vector(TARGET_DIM), nullable=True),
|
|
48
|
+
sa.Column("content_hash", sa.String(64), nullable=False),
|
|
49
|
+
sa.PrimaryKeyConstraint("entity_id", "path", name="pk_ai_search_index"),
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
field_type_enum = sa.Enum(*[ft.value for ft in FieldType], name="field_type")
|
|
53
|
+
field_type_enum.create(op.get_bind(), checkfirst=True)
|
|
54
|
+
op.add_column(
|
|
55
|
+
TABLE,
|
|
56
|
+
sa.Column("value_type", field_type_enum, nullable=False, server_default=FieldType.STRING.value),
|
|
57
|
+
)
|
|
58
|
+
op.alter_column(TABLE, "value_type", server_default=None)
|
|
59
|
+
|
|
60
|
+
op.create_index(op.f("ix_ai_search_index_entity_id"), TABLE, ["entity_id"], unique=False)
|
|
61
|
+
op.create_index(IDX_CONTENT_HASH, TABLE, ["content_hash"])
|
|
62
|
+
|
|
63
|
+
op.create_index(
|
|
64
|
+
IDX_PATH_GIST,
|
|
65
|
+
TABLE,
|
|
66
|
+
["path"],
|
|
67
|
+
postgresql_using="GIST",
|
|
68
|
+
postgresql_ops={"path": "gist_ltree_ops"},
|
|
69
|
+
)
|
|
70
|
+
op.create_index(IDX_PATH_BTREE, TABLE, ["path"])
|
|
71
|
+
op.create_index(IDX_VALUE_TRGM, TABLE, ["value"], postgresql_using="GIN", postgresql_ops={"value": "gin_trgm_ops"})
|
|
72
|
+
|
|
73
|
+
op.create_index(
|
|
74
|
+
IDX_EMBED_HNSW,
|
|
75
|
+
TABLE,
|
|
76
|
+
["embedding"],
|
|
77
|
+
postgresql_using="HNSW",
|
|
78
|
+
postgresql_with={"m": 16, "ef_construction": 64},
|
|
79
|
+
postgresql_ops={"embedding": "vector_l2_ops"},
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def downgrade() -> None:
|
|
84
|
+
# Drop all indexes
|
|
85
|
+
op.drop_index(IDX_EMBED_HNSW, table_name=TABLE, if_exists=True)
|
|
86
|
+
op.drop_index(IDX_VALUE_TRGM, table_name=TABLE, if_exists=True)
|
|
87
|
+
op.drop_index(IDX_PATH_BTREE, table_name=TABLE, if_exists=True)
|
|
88
|
+
op.drop_index(IDX_PATH_GIST, table_name=TABLE, if_exists=True)
|
|
89
|
+
op.drop_index(IDX_CONTENT_HASH, table_name=TABLE, if_exists=True)
|
|
90
|
+
op.drop_index(op.f("ix_ai_search_index_entity_id"), table_name=TABLE, if_exists=True)
|
|
91
|
+
|
|
92
|
+
# Drop table and enum
|
|
93
|
+
op.drop_table(TABLE, if_exists=True)
|
|
94
|
+
field_type_enum = sa.Enum(name="field_type")
|
|
95
|
+
field_type_enum.drop(op.get_bind(), checkfirst=True)
|
|
@@ -13,11 +13,12 @@
|
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
from orchestrator.schedules.resume_workflows import run_resume_workflows
|
|
16
|
+
from orchestrator.schedules.scheduling import SchedulingFunction
|
|
16
17
|
from orchestrator.schedules.task_vacuum import vacuum_tasks
|
|
17
18
|
from orchestrator.schedules.validate_products import validate_products
|
|
18
19
|
from orchestrator.schedules.validate_subscriptions import validate_subscriptions
|
|
19
20
|
|
|
20
|
-
ALL_SCHEDULERS: list = [
|
|
21
|
+
ALL_SCHEDULERS: list[SchedulingFunction] = [
|
|
21
22
|
run_resume_workflows,
|
|
22
23
|
vacuum_tasks,
|
|
23
24
|
validate_subscriptions,
|
|
@@ -12,10 +12,10 @@
|
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
from orchestrator.schedules.
|
|
15
|
+
from orchestrator.schedules.scheduling import scheduler
|
|
16
16
|
from orchestrator.services.processes import start_process
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
@scheduler
|
|
19
|
+
@scheduler(name="Resume workflows", time_unit="hour", period=1)
|
|
20
20
|
def run_resume_workflows() -> None:
|
|
21
21
|
start_process("task_resume_workflows")
|
|
@@ -12,77 +12,37 @@
|
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
|
|
14
14
|
from collections.abc import Callable
|
|
15
|
-
from typing import
|
|
15
|
+
from typing import Protocol, cast
|
|
16
16
|
|
|
17
|
-
from
|
|
18
|
-
from deprecated import deprecated
|
|
17
|
+
from schedule import CancelJob
|
|
19
18
|
|
|
20
|
-
from orchestrator.schedules.scheduler import scheduler as default_scheduler # your global scheduler instance
|
|
21
19
|
|
|
22
|
-
|
|
20
|
+
class SchedulingFunction(Protocol):
|
|
21
|
+
__name__: str
|
|
22
|
+
name: str
|
|
23
|
+
time_unit: str
|
|
24
|
+
period: int | None
|
|
25
|
+
at: str | None
|
|
23
26
|
|
|
27
|
+
def __call__(self) -> CancelJob | None: ...
|
|
24
28
|
|
|
25
|
-
@deprecated(
|
|
26
|
-
reason="We changed from scheduler to apscheduler which has its own decoractor, use `@scheduler.scheduled_job()` from `from orchestrator.scheduling.scheduler import scheduler`"
|
|
27
|
-
)
|
|
28
|
-
def scheduler(
|
|
29
|
-
name: str,
|
|
30
|
-
time_unit: str,
|
|
31
|
-
period: int = 1,
|
|
32
|
-
at: str | None = None,
|
|
33
|
-
*,
|
|
34
|
-
id: str | None = None,
|
|
35
|
-
scheduler: BaseScheduler = default_scheduler,
|
|
36
|
-
) -> Callable[[F], F]:
|
|
37
|
-
"""APScheduler-compatible decorator to schedule a function.
|
|
38
29
|
|
|
39
|
-
|
|
30
|
+
def scheduler(
|
|
31
|
+
name: str, time_unit: str, period: int = 1, at: str | None = None
|
|
32
|
+
) -> Callable[[Callable[[], CancelJob | None]], SchedulingFunction]:
|
|
33
|
+
"""Create schedule.
|
|
40
34
|
|
|
41
|
-
|
|
42
|
-
|
|
35
|
+
Either specify the period or the at. Examples:
|
|
36
|
+
time_unit = "hours", period = 12 -> will run every 12 hours
|
|
37
|
+
time_unit = "day", at="01:00" -> will run every day at 1 o'clock
|
|
43
38
|
"""
|
|
44
39
|
|
|
45
|
-
def
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
try:
|
|
53
|
-
hour, minute = map(int, at.split(":"))
|
|
54
|
-
except ValueError:
|
|
55
|
-
raise ValueError(f"Invalid time format for 'at': {at}, expected 'HH:MM'")
|
|
56
|
-
|
|
57
|
-
kwargs = {
|
|
58
|
-
"hour": hour,
|
|
59
|
-
"minute": minute,
|
|
60
|
-
}
|
|
61
|
-
else:
|
|
62
|
-
# Map string units to timedelta kwargs for IntervalTrigger
|
|
63
|
-
unit_map = {
|
|
64
|
-
"seconds": "seconds",
|
|
65
|
-
"second": "seconds",
|
|
66
|
-
"minutes": "minutes",
|
|
67
|
-
"minute": "minutes",
|
|
68
|
-
"hours": "hours",
|
|
69
|
-
"hour": "hours",
|
|
70
|
-
"days": "days",
|
|
71
|
-
"day": "days",
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
interval_arg = unit_map.get(time_unit.lower(), time_unit.lower())
|
|
75
|
-
kwargs = {interval_arg: period}
|
|
76
|
-
|
|
77
|
-
scheduler.add_job(
|
|
78
|
-
func,
|
|
79
|
-
trigger=trigger,
|
|
80
|
-
id=job_id,
|
|
81
|
-
name=name,
|
|
82
|
-
replace_existing=True,
|
|
83
|
-
**kwargs,
|
|
84
|
-
)
|
|
85
|
-
|
|
86
|
-
return func
|
|
40
|
+
def _scheduler(f: Callable[[], CancelJob | None]) -> SchedulingFunction:
|
|
41
|
+
schedule = cast(SchedulingFunction, f)
|
|
42
|
+
schedule.name = name
|
|
43
|
+
schedule.time_unit = time_unit
|
|
44
|
+
schedule.period = period
|
|
45
|
+
schedule.at = at
|
|
46
|
+
return schedule
|
|
87
47
|
|
|
88
|
-
return
|
|
48
|
+
return _scheduler
|
|
@@ -12,10 +12,10 @@
|
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
from orchestrator.schedules.
|
|
15
|
+
from orchestrator.schedules.scheduling import scheduler
|
|
16
16
|
from orchestrator.services.processes import start_process
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
@scheduler
|
|
19
|
+
@scheduler(name="Clean up tasks", time_unit="hours", period=6)
|
|
20
20
|
def vacuum_tasks() -> None:
|
|
21
21
|
start_process("task_clean_up_tasks")
|
|
@@ -14,17 +14,11 @@ from sqlalchemy import func, select
|
|
|
14
14
|
|
|
15
15
|
from orchestrator.db import db
|
|
16
16
|
from orchestrator.db.models import ProcessTable
|
|
17
|
-
from orchestrator.schedules.
|
|
17
|
+
from orchestrator.schedules.scheduling import scheduler
|
|
18
18
|
from orchestrator.services.processes import start_process
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
@scheduler
|
|
22
|
-
id="validate-products",
|
|
23
|
-
name="Validate Products and inactive subscriptions",
|
|
24
|
-
trigger="cron",
|
|
25
|
-
hour=2,
|
|
26
|
-
minute=30,
|
|
27
|
-
)
|
|
21
|
+
@scheduler(name="Validate Products and inactive subscriptions", time_unit="day", at="02:30")
|
|
28
22
|
def validate_products() -> None:
|
|
29
23
|
uncompleted_products = db.session.scalar(
|
|
30
24
|
select(func.count())
|
|
@@ -16,7 +16,7 @@ from threading import BoundedSemaphore
|
|
|
16
16
|
|
|
17
17
|
import structlog
|
|
18
18
|
|
|
19
|
-
from orchestrator.schedules.
|
|
19
|
+
from orchestrator.schedules.scheduling import scheduler
|
|
20
20
|
from orchestrator.services.subscriptions import (
|
|
21
21
|
get_subscriptions_on_product_table,
|
|
22
22
|
get_subscriptions_on_product_table_in_sync,
|
|
@@ -33,7 +33,7 @@ logger = structlog.get_logger(__name__)
|
|
|
33
33
|
task_semaphore = BoundedSemaphore(value=2)
|
|
34
34
|
|
|
35
35
|
|
|
36
|
-
@scheduler
|
|
36
|
+
@scheduler(name="Subscriptions Validator", time_unit="day", at="00:10")
|
|
37
37
|
def validate_subscriptions() -> None:
|
|
38
38
|
if app_settings.VALIDATE_OUT_OF_SYNC_SUBSCRIPTIONS:
|
|
39
39
|
# Automatically re-validate out-of-sync subscriptions. This is not recommended for production.
|