UncountablePythonSDK 0.0.115__py3-none-any.whl → 0.0.142.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of UncountablePythonSDK might be problematic. Click here for more details.
- docs/conf.py +52 -5
- docs/index.md +107 -4
- docs/integration_examples/create_ingredient.md +43 -0
- docs/integration_examples/create_output.md +56 -0
- docs/integration_examples/index.md +6 -0
- docs/justfile +1 -1
- docs/requirements.txt +3 -2
- examples/basic_auth.py +7 -0
- examples/integration-server/jobs/materials_auto/example_cron.py +3 -0
- examples/integration-server/jobs/materials_auto/example_http.py +19 -7
- examples/integration-server/jobs/materials_auto/example_instrument.py +100 -0
- examples/integration-server/jobs/materials_auto/example_parse.py +140 -0
- examples/integration-server/jobs/materials_auto/example_predictions.py +61 -0
- examples/integration-server/jobs/materials_auto/example_runsheet_wh.py +57 -16
- examples/integration-server/jobs/materials_auto/profile.yaml +27 -0
- examples/integration-server/pyproject.toml +4 -4
- examples/oauth.py +7 -0
- pkgs/argument_parser/__init__.py +1 -0
- pkgs/argument_parser/_is_namedtuple.py +3 -0
- pkgs/argument_parser/argument_parser.py +22 -3
- pkgs/serialization_util/serialization_helpers.py +3 -1
- pkgs/type_spec/builder.py +66 -19
- pkgs/type_spec/builder_types.py +9 -0
- pkgs/type_spec/config.py +26 -5
- pkgs/type_spec/cross_output_links.py +10 -16
- pkgs/type_spec/emit_open_api.py +72 -22
- pkgs/type_spec/emit_open_api_util.py +1 -0
- pkgs/type_spec/emit_python.py +76 -12
- pkgs/type_spec/emit_typescript.py +48 -32
- pkgs/type_spec/emit_typescript_util.py +44 -6
- pkgs/type_spec/load_types.py +2 -2
- pkgs/type_spec/open_api_util.py +16 -1
- pkgs/type_spec/parts/base.ts.prepart +4 -0
- pkgs/type_spec/type_info/emit_type_info.py +37 -4
- pkgs/type_spec/ui_entry_actions/generate_ui_entry_actions.py +1 -0
- pkgs/type_spec/value_spec/__main__.py +2 -2
- pkgs/type_spec/value_spec/emit_python.py +6 -1
- uncountable/core/client.py +10 -3
- uncountable/integration/cli.py +175 -23
- uncountable/integration/executors/executors.py +1 -2
- uncountable/integration/executors/generic_upload_executor.py +1 -1
- uncountable/integration/http_server/types.py +3 -1
- uncountable/integration/job.py +35 -3
- uncountable/integration/queue_runner/command_server/__init__.py +4 -0
- uncountable/integration/queue_runner/command_server/command_client.py +89 -0
- uncountable/integration/queue_runner/command_server/command_server.py +117 -5
- uncountable/integration/queue_runner/command_server/constants.py +4 -0
- uncountable/integration/queue_runner/command_server/protocol/command_server.proto +51 -0
- uncountable/integration/queue_runner/command_server/protocol/command_server_pb2.py +34 -11
- uncountable/integration/queue_runner/command_server/protocol/command_server_pb2.pyi +102 -1
- uncountable/integration/queue_runner/command_server/protocol/command_server_pb2_grpc.py +180 -0
- uncountable/integration/queue_runner/command_server/types.py +44 -1
- uncountable/integration/queue_runner/datastore/datastore_sqlite.py +189 -8
- uncountable/integration/queue_runner/datastore/interface.py +13 -0
- uncountable/integration/queue_runner/datastore/model.py +8 -1
- uncountable/integration/queue_runner/job_scheduler.py +85 -21
- uncountable/integration/queue_runner/queue_runner.py +10 -2
- uncountable/integration/queue_runner/types.py +2 -0
- uncountable/integration/queue_runner/worker.py +28 -29
- uncountable/integration/scheduler.py +121 -23
- uncountable/integration/server.py +36 -6
- uncountable/integration/telemetry.py +129 -8
- uncountable/integration/webhook_server/entrypoint.py +2 -0
- uncountable/types/__init__.py +38 -0
- uncountable/types/api/entity/create_or_update_entity.py +1 -0
- uncountable/types/api/entity/export_entities.py +13 -0
- uncountable/types/api/entity/list_aggregate.py +79 -0
- uncountable/types/api/entity/list_entities.py +25 -0
- uncountable/types/api/entity/set_barcode.py +43 -0
- uncountable/types/api/entity/transition_entity_phase.py +2 -1
- uncountable/types/api/files/download_file.py +15 -1
- uncountable/types/api/integrations/__init__.py +1 -0
- uncountable/types/api/integrations/publish_realtime_data.py +41 -0
- uncountable/types/api/integrations/push_notification.py +49 -0
- uncountable/types/api/integrations/register_sockets_token.py +41 -0
- uncountable/types/api/listing/__init__.py +1 -0
- uncountable/types/api/listing/fetch_listing.py +57 -0
- uncountable/types/api/notebooks/__init__.py +1 -0
- uncountable/types/api/notebooks/add_notebook_content.py +119 -0
- uncountable/types/api/outputs/get_output_organization.py +173 -0
- uncountable/types/api/recipes/edit_recipe_inputs.py +1 -1
- uncountable/types/api/recipes/get_recipe_output_metadata.py +2 -2
- uncountable/types/api/recipes/get_recipes_data.py +29 -0
- uncountable/types/api/recipes/lock_recipes.py +2 -1
- uncountable/types/api/recipes/set_recipe_total.py +59 -0
- uncountable/types/api/recipes/unlock_recipes.py +2 -1
- uncountable/types/api/runsheet/export_default_runsheet.py +44 -0
- uncountable/types/api/uploader/complete_async_parse.py +46 -0
- uncountable/types/api/user/__init__.py +1 -0
- uncountable/types/api/user/get_current_user_info.py +40 -0
- uncountable/types/async_batch_processor.py +266 -0
- uncountable/types/async_batch_t.py +5 -0
- uncountable/types/client_base.py +432 -2
- uncountable/types/client_config.py +1 -0
- uncountable/types/client_config_t.py +10 -0
- uncountable/types/entity_t.py +9 -1
- uncountable/types/exports_t.py +1 -0
- uncountable/types/integration_server_t.py +2 -0
- uncountable/types/integration_session.py +10 -0
- uncountable/types/integration_session_t.py +60 -0
- uncountable/types/integrations.py +10 -0
- uncountable/types/integrations_t.py +62 -0
- uncountable/types/listing.py +46 -0
- uncountable/types/listing_t.py +533 -0
- uncountable/types/notices.py +8 -0
- uncountable/types/notices_t.py +37 -0
- uncountable/types/notifications.py +11 -0
- uncountable/types/notifications_t.py +74 -0
- uncountable/types/queued_job.py +2 -0
- uncountable/types/queued_job_t.py +20 -2
- uncountable/types/sockets.py +20 -0
- uncountable/types/sockets_t.py +169 -0
- uncountable/types/uploader.py +24 -0
- uncountable/types/uploader_t.py +222 -0
- {uncountablepythonsdk-0.0.115.dist-info → uncountablepythonsdk-0.0.142.dev0.dist-info}/METADATA +5 -2
- {uncountablepythonsdk-0.0.115.dist-info → uncountablepythonsdk-0.0.142.dev0.dist-info}/RECORD +118 -79
- docs/quickstart.md +0 -19
- {uncountablepythonsdk-0.0.115.dist-info → uncountablepythonsdk-0.0.142.dev0.dist-info}/WHEEL +0 -0
- {uncountablepythonsdk-0.0.115.dist-info → uncountablepythonsdk-0.0.142.dev0.dist-info}/top_level.txt +0 -0
|
@@ -1,19 +1,27 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import os
|
|
2
3
|
import sys
|
|
4
|
+
import threading
|
|
3
5
|
import typing
|
|
4
6
|
from concurrent.futures import ProcessPoolExecutor
|
|
5
7
|
from dataclasses import dataclass
|
|
6
8
|
|
|
7
9
|
from opentelemetry.trace import get_current_span
|
|
8
10
|
|
|
9
|
-
from uncountable.integration.db.connect import IntegrationDBService, create_db_engine
|
|
10
|
-
from uncountable.integration.db.session import get_session_maker
|
|
11
11
|
from uncountable.integration.queue_runner.command_server import (
|
|
12
12
|
CommandEnqueueJob,
|
|
13
13
|
CommandEnqueueJobResponse,
|
|
14
14
|
CommandQueue,
|
|
15
|
+
CommandRetryJob,
|
|
16
|
+
CommandRetryJobResponse,
|
|
15
17
|
CommandTask,
|
|
16
18
|
)
|
|
19
|
+
from uncountable.integration.queue_runner.command_server.types import (
|
|
20
|
+
CommandCancelJob,
|
|
21
|
+
CommandCancelJobResponse,
|
|
22
|
+
CommandCancelJobStatus,
|
|
23
|
+
CommandVaccuumQueuedJobs,
|
|
24
|
+
)
|
|
17
25
|
from uncountable.integration.queue_runner.datastore import DatastoreSqlite
|
|
18
26
|
from uncountable.integration.queue_runner.datastore.interface import Datastore
|
|
19
27
|
from uncountable.integration.queue_runner.worker import Worker
|
|
@@ -21,7 +29,7 @@ from uncountable.integration.scan_profiles import load_profiles
|
|
|
21
29
|
from uncountable.integration.telemetry import Logger
|
|
22
30
|
from uncountable.types import job_definition_t, queued_job_t
|
|
23
31
|
|
|
24
|
-
from .types import ResultQueue, ResultTask
|
|
32
|
+
from .types import RESTART_EXIT_CODE, ResultQueue, ResultTask
|
|
25
33
|
|
|
26
34
|
_MAX_JOB_WORKERS = 5
|
|
27
35
|
|
|
@@ -83,14 +91,11 @@ def _start_workers(
|
|
|
83
91
|
return job_worker_lookup
|
|
84
92
|
|
|
85
93
|
|
|
86
|
-
async def start_scheduler(
|
|
94
|
+
async def start_scheduler(
|
|
95
|
+
command_queue: CommandQueue, datastore: DatastoreSqlite
|
|
96
|
+
) -> None:
|
|
87
97
|
logger = Logger(get_current_span())
|
|
88
98
|
result_queue: ResultQueue = asyncio.Queue()
|
|
89
|
-
engine = create_db_engine(IntegrationDBService.RUNNER)
|
|
90
|
-
session_maker = get_session_maker(engine)
|
|
91
|
-
|
|
92
|
-
datastore = DatastoreSqlite(session_maker)
|
|
93
|
-
datastore.setup(engine)
|
|
94
99
|
|
|
95
100
|
with ProcessPoolExecutor(max_workers=_MAX_JOB_WORKERS) as process_pool:
|
|
96
101
|
job_worker_lookup = _start_workers(
|
|
@@ -104,7 +109,9 @@ async def start_scheduler(command_queue: CommandQueue) -> None:
|
|
|
104
109
|
worker = job_worker_lookup[queued_job.job_ref_name]
|
|
105
110
|
except KeyError as e:
|
|
106
111
|
logger.log_exception(e)
|
|
107
|
-
datastore.
|
|
112
|
+
datastore.update_job_status(
|
|
113
|
+
queued_job.queued_job_uuid, queued_job_t.JobStatus.FAILED
|
|
114
|
+
)
|
|
108
115
|
return
|
|
109
116
|
await worker.listen_queue.put(queued_job)
|
|
110
117
|
|
|
@@ -119,17 +126,11 @@ async def start_scheduler(command_queue: CommandQueue) -> None:
|
|
|
119
126
|
queued_job_t.InvocationContextManual,
|
|
120
127
|
),
|
|
121
128
|
):
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
(
|
|
125
|
-
job
|
|
126
|
-
for job in existing_queued_jobs
|
|
127
|
-
if job.job_ref_name == job_ref_name
|
|
128
|
-
),
|
|
129
|
-
None,
|
|
129
|
+
existing_queued_job = datastore.get_next_queued_job_for_ref_name(
|
|
130
|
+
job_ref_name=job_ref_name
|
|
130
131
|
)
|
|
131
|
-
if
|
|
132
|
-
return
|
|
132
|
+
if existing_queued_job is not None:
|
|
133
|
+
return existing_queued_job.queued_job_uuid
|
|
133
134
|
queued_job = datastore.add_job_to_queue(
|
|
134
135
|
job_payload=payload,
|
|
135
136
|
job_ref_name=job_ref_name,
|
|
@@ -146,6 +147,53 @@ async def start_scheduler(command_queue: CommandQueue) -> None:
|
|
|
146
147
|
CommandEnqueueJobResponse(queued_job_uuid=queued_job_uuid)
|
|
147
148
|
)
|
|
148
149
|
|
|
150
|
+
async def _handle_cancel_job_command(command: CommandCancelJob) -> None:
|
|
151
|
+
queued_job = datastore.get_queued_job(uuid=command.queued_job_uuid)
|
|
152
|
+
if queued_job is None:
|
|
153
|
+
await command.response_queue.put(
|
|
154
|
+
CommandCancelJobResponse(status=CommandCancelJobStatus.NO_JOB_FOUND)
|
|
155
|
+
)
|
|
156
|
+
return
|
|
157
|
+
|
|
158
|
+
if queued_job.status == queued_job_t.JobStatus.QUEUED:
|
|
159
|
+
datastore.remove_job_from_queue(command.queued_job_uuid)
|
|
160
|
+
await command.response_queue.put(
|
|
161
|
+
CommandCancelJobResponse(
|
|
162
|
+
status=CommandCancelJobStatus.CANCELLED_WITH_RESTART
|
|
163
|
+
)
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
def delayed_exit() -> None:
|
|
167
|
+
os._exit(RESTART_EXIT_CODE)
|
|
168
|
+
|
|
169
|
+
threading.Timer(interval=5, function=delayed_exit).start()
|
|
170
|
+
|
|
171
|
+
else:
|
|
172
|
+
await command.response_queue.put(
|
|
173
|
+
CommandCancelJobResponse(
|
|
174
|
+
status=CommandCancelJobStatus.JOB_ALREADY_COMPLETED
|
|
175
|
+
)
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
async def _handle_retry_job_command(command: CommandRetryJob) -> None:
|
|
179
|
+
queued_job = datastore.retry_job(command.queued_job_uuid)
|
|
180
|
+
if queued_job is None:
|
|
181
|
+
await command.response_queue.put(
|
|
182
|
+
CommandRetryJobResponse(queued_job_uuid=None)
|
|
183
|
+
)
|
|
184
|
+
return
|
|
185
|
+
|
|
186
|
+
await enqueue_queued_job(queued_job)
|
|
187
|
+
await command.response_queue.put(
|
|
188
|
+
CommandRetryJobResponse(queued_job_uuid=queued_job.queued_job_uuid)
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
def _handle_vaccuum_queued_jobs_command(
|
|
192
|
+
command: CommandVaccuumQueuedJobs,
|
|
193
|
+
) -> None:
|
|
194
|
+
logger.log_info("Vaccuuming queued jobs...")
|
|
195
|
+
datastore.vaccuum_queued_jobs()
|
|
196
|
+
|
|
149
197
|
for queued_job in queued_jobs:
|
|
150
198
|
await enqueue_queued_job(queued_job)
|
|
151
199
|
|
|
@@ -162,10 +210,26 @@ async def start_scheduler(command_queue: CommandQueue) -> None:
|
|
|
162
210
|
match command:
|
|
163
211
|
case CommandEnqueueJob():
|
|
164
212
|
await _handle_enqueue_job_command(command=command)
|
|
213
|
+
case CommandRetryJob():
|
|
214
|
+
await _handle_retry_job_command(command=command)
|
|
215
|
+
case CommandVaccuumQueuedJobs():
|
|
216
|
+
_handle_vaccuum_queued_jobs_command(command=command)
|
|
217
|
+
case CommandCancelJob():
|
|
218
|
+
await _handle_cancel_job_command(command=command)
|
|
165
219
|
case _:
|
|
166
220
|
typing.assert_never(command)
|
|
167
221
|
command_task = asyncio.create_task(command_queue.get())
|
|
168
222
|
elif task == result_task:
|
|
169
223
|
queued_job_result = result_task.result()
|
|
170
|
-
|
|
224
|
+
match queued_job_result.job_result.success:
|
|
225
|
+
case True:
|
|
226
|
+
datastore.update_job_status(
|
|
227
|
+
queued_job_result.queued_job_uuid,
|
|
228
|
+
queued_job_t.JobStatus.SUCCESS,
|
|
229
|
+
)
|
|
230
|
+
case False:
|
|
231
|
+
datastore.update_job_status(
|
|
232
|
+
queued_job_result.queued_job_uuid,
|
|
233
|
+
queued_job_t.JobStatus.FAILED,
|
|
234
|
+
)
|
|
171
235
|
result_task = asyncio.create_task(result_queue.get())
|
|
@@ -1,16 +1,24 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
|
|
3
|
+
from uncountable.integration.db.connect import IntegrationDBService, create_db_engine
|
|
4
|
+
from uncountable.integration.db.session import get_session_maker
|
|
3
5
|
from uncountable.integration.queue_runner.command_server import serve
|
|
4
6
|
from uncountable.integration.queue_runner.command_server.types import CommandQueue
|
|
7
|
+
from uncountable.integration.queue_runner.datastore import DatastoreSqlite
|
|
5
8
|
from uncountable.integration.queue_runner.job_scheduler import start_scheduler
|
|
6
9
|
|
|
7
10
|
|
|
8
11
|
async def queue_runner_loop() -> None:
|
|
9
12
|
command_queue: CommandQueue = asyncio.Queue()
|
|
13
|
+
engine = create_db_engine(IntegrationDBService.RUNNER)
|
|
14
|
+
session_maker = get_session_maker(engine)
|
|
10
15
|
|
|
11
|
-
|
|
16
|
+
datastore = DatastoreSqlite(session_maker)
|
|
17
|
+
datastore.setup(engine)
|
|
12
18
|
|
|
13
|
-
|
|
19
|
+
command_server = asyncio.create_task(serve(command_queue, datastore))
|
|
20
|
+
|
|
21
|
+
scheduler = asyncio.create_task(start_scheduler(command_queue, datastore))
|
|
14
22
|
|
|
15
23
|
await scheduler
|
|
16
24
|
await command_server
|
|
@@ -87,33 +87,32 @@ def run_queued_job(
|
|
|
87
87
|
base_span=span,
|
|
88
88
|
profile_metadata=job_details.profile_metadata,
|
|
89
89
|
job_definition=job_details.job_definition,
|
|
90
|
+
queued_job_uuid=queued_job.queued_job_uuid,
|
|
90
91
|
)
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
job_logger.log_exception(e)
|
|
119
|
-
raise e
|
|
92
|
+
with job_logger.resource_tracking():
|
|
93
|
+
try:
|
|
94
|
+
client = construct_uncountable_client(
|
|
95
|
+
profile_meta=job_details.profile_metadata, logger=job_logger
|
|
96
|
+
)
|
|
97
|
+
batch_processor = AsyncBatchProcessor(client=client)
|
|
98
|
+
|
|
99
|
+
payload = _resolve_queued_job_payload(queued_job)
|
|
100
|
+
|
|
101
|
+
args = JobArguments(
|
|
102
|
+
job_definition=job_details.job_definition,
|
|
103
|
+
client=client,
|
|
104
|
+
batch_processor=batch_processor,
|
|
105
|
+
profile_metadata=job_details.profile_metadata,
|
|
106
|
+
logger=job_logger,
|
|
107
|
+
payload=payload,
|
|
108
|
+
job_uuid=queued_job.queued_job_uuid,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
return execute_job(
|
|
112
|
+
args=args,
|
|
113
|
+
profile_metadata=job_details.profile_metadata,
|
|
114
|
+
job_definition=job_details.job_definition,
|
|
115
|
+
)
|
|
116
|
+
except BaseException as e:
|
|
117
|
+
job_logger.log_exception(e)
|
|
118
|
+
return job_definition_t.JobResult(success=False)
|
|
@@ -5,6 +5,8 @@ import sys
|
|
|
5
5
|
import time
|
|
6
6
|
from dataclasses import dataclass
|
|
7
7
|
from datetime import UTC
|
|
8
|
+
from enum import StrEnum
|
|
9
|
+
from typing import assert_never
|
|
8
10
|
|
|
9
11
|
from opentelemetry.trace import get_current_span
|
|
10
12
|
|
|
@@ -15,15 +17,24 @@ from uncountable.integration.queue_runner.command_server import (
|
|
|
15
17
|
check_health,
|
|
16
18
|
)
|
|
17
19
|
from uncountable.integration.queue_runner.queue_runner import start_queue_runner
|
|
20
|
+
from uncountable.integration.queue_runner.types import RESTART_EXIT_CODE
|
|
18
21
|
from uncountable.integration.telemetry import Logger
|
|
19
22
|
|
|
20
23
|
SHUTDOWN_TIMEOUT_SECS = 30
|
|
21
24
|
|
|
25
|
+
AnyProcess = multiprocessing.Process | subprocess.Popen[bytes]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ProcessName(StrEnum):
|
|
29
|
+
QUEUE_RUNNER = "queue_runner"
|
|
30
|
+
CRON_SERVER = "cron_server"
|
|
31
|
+
UWSGI = "uwsgi"
|
|
32
|
+
|
|
22
33
|
|
|
23
34
|
@dataclass(kw_only=True)
|
|
24
35
|
class ProcessInfo:
|
|
25
|
-
name:
|
|
26
|
-
process:
|
|
36
|
+
name: ProcessName
|
|
37
|
+
process: AnyProcess
|
|
27
38
|
|
|
28
39
|
@property
|
|
29
40
|
def is_alive(self) -> bool:
|
|
@@ -46,14 +57,27 @@ class ProcessInfo:
|
|
|
46
57
|
return self.process.poll()
|
|
47
58
|
|
|
48
59
|
|
|
49
|
-
|
|
60
|
+
@dataclass(kw_only=True)
|
|
61
|
+
class ProcessAlarmRestart:
|
|
62
|
+
process: ProcessInfo
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass(kw_only=True)
|
|
66
|
+
class ProcessAlarmShutdownAll:
|
|
67
|
+
pass
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
ProcessAlarm = ProcessAlarmRestart | ProcessAlarmShutdownAll
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def handle_shutdown(logger: Logger, processes: dict[ProcessName, ProcessInfo]) -> None:
|
|
50
74
|
logger.log_info("received shutdown command, shutting down sub-processes")
|
|
51
|
-
for proc_info in processes:
|
|
75
|
+
for proc_info in processes.values():
|
|
52
76
|
if proc_info.is_alive:
|
|
53
77
|
proc_info.process.terminate()
|
|
54
78
|
|
|
55
79
|
shutdown_start = time.time()
|
|
56
|
-
still_living_processes = processes
|
|
80
|
+
still_living_processes = list(processes.values())
|
|
57
81
|
while (
|
|
58
82
|
time.time() - shutdown_start < SHUTDOWN_TIMEOUT_SECS
|
|
59
83
|
and len(still_living_processes) > 0
|
|
@@ -82,14 +106,59 @@ def handle_shutdown(logger: Logger, processes: list[ProcessInfo]) -> None:
|
|
|
82
106
|
proc_info.process.kill()
|
|
83
107
|
|
|
84
108
|
|
|
85
|
-
def
|
|
86
|
-
|
|
109
|
+
def restart_process(
|
|
110
|
+
logger: Logger, proc_info: ProcessInfo, processes: dict[ProcessName, ProcessInfo]
|
|
111
|
+
) -> None:
|
|
112
|
+
logger.log_error(
|
|
113
|
+
f"process {proc_info.name} shut down unexpectedly - exit code {proc_info.exitcode}. Restarting..."
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
match proc_info.name:
|
|
117
|
+
case ProcessName.QUEUE_RUNNER:
|
|
118
|
+
queue_proc = multiprocessing.Process(target=start_queue_runner)
|
|
119
|
+
queue_proc.start()
|
|
120
|
+
new_info = ProcessInfo(name=ProcessName.QUEUE_RUNNER, process=queue_proc)
|
|
121
|
+
processes[ProcessName.QUEUE_RUNNER] = new_info
|
|
122
|
+
try:
|
|
123
|
+
_wait_queue_runner_online()
|
|
124
|
+
logger.log_info("queue runner restarted successfully")
|
|
125
|
+
except Exception as e:
|
|
126
|
+
logger.log_exception(e)
|
|
127
|
+
logger.log_error(
|
|
128
|
+
"queue runner failed to restart, shutting down scheduler"
|
|
129
|
+
)
|
|
130
|
+
handle_shutdown(logger, processes)
|
|
131
|
+
sys.exit(1)
|
|
132
|
+
|
|
133
|
+
case ProcessName.CRON_SERVER:
|
|
134
|
+
cron_proc = multiprocessing.Process(target=cron_target)
|
|
135
|
+
cron_proc.start()
|
|
136
|
+
new_info = ProcessInfo(name=ProcessName.CRON_SERVER, process=cron_proc)
|
|
137
|
+
processes[ProcessName.CRON_SERVER] = new_info
|
|
138
|
+
logger.log_info("cron server restarted successfully")
|
|
139
|
+
|
|
140
|
+
case ProcessName.UWSGI:
|
|
141
|
+
uwsgi_proc: AnyProcess = subprocess.Popen(["uwsgi", "--die-on-term"])
|
|
142
|
+
new_info = ProcessInfo(name=ProcessName.UWSGI, process=uwsgi_proc)
|
|
143
|
+
processes[ProcessName.UWSGI] = new_info
|
|
144
|
+
logger.log_info("uwsgi restarted successfully")
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def check_process_alarms(
|
|
148
|
+
logger: Logger, processes: dict[ProcessName, ProcessInfo]
|
|
149
|
+
) -> ProcessAlarm | None:
|
|
150
|
+
for proc_info in processes.values():
|
|
87
151
|
if not proc_info.is_alive:
|
|
152
|
+
if proc_info.exitcode == RESTART_EXIT_CODE:
|
|
153
|
+
logger.log_warning(
|
|
154
|
+
f"process {proc_info.name} requested restart! restarting"
|
|
155
|
+
)
|
|
156
|
+
return ProcessAlarmRestart(process=proc_info)
|
|
88
157
|
logger.log_error(
|
|
89
158
|
f"process {proc_info.name} shut down unexpectedly! shutting down scheduler; exit code is {proc_info.exitcode}"
|
|
90
159
|
)
|
|
91
|
-
|
|
92
|
-
|
|
160
|
+
return ProcessAlarmShutdownAll()
|
|
161
|
+
return None
|
|
93
162
|
|
|
94
163
|
|
|
95
164
|
def _wait_queue_runner_online() -> None:
|
|
@@ -113,36 +182,65 @@ def _wait_queue_runner_online() -> None:
|
|
|
113
182
|
|
|
114
183
|
def main() -> None:
|
|
115
184
|
logger = Logger(get_current_span())
|
|
116
|
-
processes:
|
|
185
|
+
processes: dict[ProcessName, ProcessInfo] = {}
|
|
186
|
+
|
|
187
|
+
multiprocessing.set_start_method("forkserver")
|
|
117
188
|
|
|
118
189
|
def add_process(process: ProcessInfo) -> None:
|
|
119
|
-
processes.
|
|
190
|
+
processes[process.name] = process
|
|
120
191
|
logger.log_info(f"started process {process.name}")
|
|
121
192
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
193
|
+
def _start_queue_runner() -> None:
|
|
194
|
+
runner_process = multiprocessing.Process(target=start_queue_runner)
|
|
195
|
+
runner_process.start()
|
|
196
|
+
add_process(
|
|
197
|
+
ProcessInfo(
|
|
198
|
+
name=ProcessName.QUEUE_RUNNER,
|
|
199
|
+
process=runner_process,
|
|
200
|
+
)
|
|
201
|
+
)
|
|
125
202
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
203
|
+
try:
|
|
204
|
+
_wait_queue_runner_online()
|
|
205
|
+
except Exception as e:
|
|
206
|
+
logger.log_exception(e)
|
|
207
|
+
handle_shutdown(logger, processes=processes)
|
|
208
|
+
return
|
|
209
|
+
|
|
210
|
+
_start_queue_runner()
|
|
132
211
|
|
|
133
212
|
cron_process = multiprocessing.Process(target=cron_target)
|
|
134
213
|
cron_process.start()
|
|
135
|
-
add_process(ProcessInfo(name=
|
|
214
|
+
add_process(ProcessInfo(name=ProcessName.CRON_SERVER, process=cron_process))
|
|
136
215
|
|
|
137
216
|
uwsgi_process = subprocess.Popen([
|
|
138
217
|
"uwsgi",
|
|
139
218
|
"--die-on-term",
|
|
140
219
|
])
|
|
141
|
-
add_process(ProcessInfo(name=
|
|
220
|
+
add_process(ProcessInfo(name=ProcessName.UWSGI, process=uwsgi_process))
|
|
142
221
|
|
|
143
222
|
try:
|
|
144
223
|
while True:
|
|
145
|
-
|
|
224
|
+
process_alarm = check_process_alarms(logger, processes=processes)
|
|
225
|
+
match process_alarm:
|
|
226
|
+
case ProcessAlarmRestart():
|
|
227
|
+
match process_alarm.process.name:
|
|
228
|
+
case ProcessName.QUEUE_RUNNER:
|
|
229
|
+
del processes[ProcessName.QUEUE_RUNNER]
|
|
230
|
+
_start_queue_runner()
|
|
231
|
+
case ProcessName.CRON_SERVER | ProcessName.UWSGI:
|
|
232
|
+
raise NotImplementedError(
|
|
233
|
+
f"restarting {process_alarm.process.name} not yet implemented"
|
|
234
|
+
)
|
|
235
|
+
case _:
|
|
236
|
+
assert_never(process_alarm.process.name)
|
|
237
|
+
case ProcessAlarmShutdownAll():
|
|
238
|
+
handle_shutdown(logger, processes)
|
|
239
|
+
sys.exit(1)
|
|
240
|
+
case None:
|
|
241
|
+
pass
|
|
242
|
+
case _:
|
|
243
|
+
assert_never(process_alarm)
|
|
146
244
|
time.sleep(1)
|
|
147
245
|
except KeyboardInterrupt:
|
|
148
246
|
handle_shutdown(logger, processes=processes)
|
|
@@ -11,7 +11,11 @@ from apscheduler.triggers.cron import CronTrigger
|
|
|
11
11
|
from opentelemetry.trace import get_current_span
|
|
12
12
|
from sqlalchemy.engine.base import Engine
|
|
13
13
|
|
|
14
|
+
from uncountable.core.environment import get_local_admin_server_port
|
|
14
15
|
from uncountable.integration.cron import CronJobArgs, cron_job_executor
|
|
16
|
+
from uncountable.integration.queue_runner.command_server.command_client import (
|
|
17
|
+
send_vaccuum_queued_jobs_message,
|
|
18
|
+
)
|
|
15
19
|
from uncountable.integration.telemetry import Logger
|
|
16
20
|
from uncountable.types import base_t, job_definition_t
|
|
17
21
|
from uncountable.types.job_definition_t import (
|
|
@@ -21,6 +25,14 @@ from uncountable.types.job_definition_t import (
|
|
|
21
25
|
|
|
22
26
|
_MAX_APSCHEDULER_CONCURRENT_JOBS = 1
|
|
23
27
|
|
|
28
|
+
VACCUUM_QUEUED_JOBS_JOB_ID = "vacuum_queued_jobs"
|
|
29
|
+
|
|
30
|
+
STATIC_JOB_IDS = {VACCUUM_QUEUED_JOBS_JOB_ID}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def vaccuum_queued_jobs() -> None:
|
|
34
|
+
send_vaccuum_queued_jobs_message(port=get_local_admin_server_port())
|
|
35
|
+
|
|
24
36
|
|
|
25
37
|
class IntegrationServer:
|
|
26
38
|
_scheduler: BaseScheduler
|
|
@@ -36,11 +48,27 @@ class IntegrationServer:
|
|
|
36
48
|
)
|
|
37
49
|
self._server_logger = Logger(get_current_span())
|
|
38
50
|
|
|
51
|
+
def _register_static_jobs(self) -> None:
|
|
52
|
+
all_job_ids = {job.id for job in self._scheduler.get_jobs()}
|
|
53
|
+
if VACCUUM_QUEUED_JOBS_JOB_ID in all_job_ids:
|
|
54
|
+
self._scheduler.remove_job(VACCUUM_QUEUED_JOBS_JOB_ID)
|
|
55
|
+
|
|
56
|
+
self._scheduler.add_job(
|
|
57
|
+
vaccuum_queued_jobs,
|
|
58
|
+
max_instances=1,
|
|
59
|
+
coalesce=True,
|
|
60
|
+
trigger=CronTrigger.from_crontab("5 4 * * 4"),
|
|
61
|
+
name="Vaccuum queued jobs",
|
|
62
|
+
id=VACCUUM_QUEUED_JOBS_JOB_ID,
|
|
63
|
+
kwargs={},
|
|
64
|
+
misfire_grace_time=None,
|
|
65
|
+
)
|
|
66
|
+
|
|
39
67
|
def register_jobs(self, profiles: list[job_definition_t.ProfileMetadata]) -> None:
|
|
40
|
-
valid_job_ids =
|
|
68
|
+
valid_job_ids: set[str] = set()
|
|
41
69
|
for profile_metadata in profiles:
|
|
42
70
|
for job_defn in profile_metadata.jobs:
|
|
43
|
-
valid_job_ids.
|
|
71
|
+
valid_job_ids.add(job_defn.id)
|
|
44
72
|
match job_defn:
|
|
45
73
|
case CronJobDefinition():
|
|
46
74
|
# Add to ap scheduler
|
|
@@ -90,10 +118,11 @@ class IntegrationServer:
|
|
|
90
118
|
pass
|
|
91
119
|
case _:
|
|
92
120
|
assert_never(job_defn)
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
121
|
+
all_job_ids = {job.id for job in self._scheduler.get_jobs()}
|
|
122
|
+
invalid_job_ids = all_job_ids.difference(valid_job_ids.union(STATIC_JOB_IDS))
|
|
123
|
+
|
|
124
|
+
for job_id in invalid_job_ids:
|
|
125
|
+
self._scheduler.remove_job(job_id)
|
|
97
126
|
|
|
98
127
|
def serve_forever(self) -> None:
|
|
99
128
|
signal.pause()
|
|
@@ -106,6 +135,7 @@ class IntegrationServer:
|
|
|
106
135
|
|
|
107
136
|
def __enter__(self) -> "IntegrationServer":
|
|
108
137
|
self._start_apscheduler()
|
|
138
|
+
self._register_static_jobs()
|
|
109
139
|
return self
|
|
110
140
|
|
|
111
141
|
def __exit__(
|