UncountablePythonSDK 0.0.115__py3-none-any.whl → 0.0.142.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of UncountablePythonSDK might be problematic. Click here for more details.

Files changed (119) hide show
  1. docs/conf.py +52 -5
  2. docs/index.md +107 -4
  3. docs/integration_examples/create_ingredient.md +43 -0
  4. docs/integration_examples/create_output.md +56 -0
  5. docs/integration_examples/index.md +6 -0
  6. docs/justfile +1 -1
  7. docs/requirements.txt +3 -2
  8. examples/basic_auth.py +7 -0
  9. examples/integration-server/jobs/materials_auto/example_cron.py +3 -0
  10. examples/integration-server/jobs/materials_auto/example_http.py +19 -7
  11. examples/integration-server/jobs/materials_auto/example_instrument.py +100 -0
  12. examples/integration-server/jobs/materials_auto/example_parse.py +140 -0
  13. examples/integration-server/jobs/materials_auto/example_predictions.py +61 -0
  14. examples/integration-server/jobs/materials_auto/example_runsheet_wh.py +57 -16
  15. examples/integration-server/jobs/materials_auto/profile.yaml +27 -0
  16. examples/integration-server/pyproject.toml +4 -4
  17. examples/oauth.py +7 -0
  18. pkgs/argument_parser/__init__.py +1 -0
  19. pkgs/argument_parser/_is_namedtuple.py +3 -0
  20. pkgs/argument_parser/argument_parser.py +22 -3
  21. pkgs/serialization_util/serialization_helpers.py +3 -1
  22. pkgs/type_spec/builder.py +66 -19
  23. pkgs/type_spec/builder_types.py +9 -0
  24. pkgs/type_spec/config.py +26 -5
  25. pkgs/type_spec/cross_output_links.py +10 -16
  26. pkgs/type_spec/emit_open_api.py +72 -22
  27. pkgs/type_spec/emit_open_api_util.py +1 -0
  28. pkgs/type_spec/emit_python.py +76 -12
  29. pkgs/type_spec/emit_typescript.py +48 -32
  30. pkgs/type_spec/emit_typescript_util.py +44 -6
  31. pkgs/type_spec/load_types.py +2 -2
  32. pkgs/type_spec/open_api_util.py +16 -1
  33. pkgs/type_spec/parts/base.ts.prepart +4 -0
  34. pkgs/type_spec/type_info/emit_type_info.py +37 -4
  35. pkgs/type_spec/ui_entry_actions/generate_ui_entry_actions.py +1 -0
  36. pkgs/type_spec/value_spec/__main__.py +2 -2
  37. pkgs/type_spec/value_spec/emit_python.py +6 -1
  38. uncountable/core/client.py +10 -3
  39. uncountable/integration/cli.py +175 -23
  40. uncountable/integration/executors/executors.py +1 -2
  41. uncountable/integration/executors/generic_upload_executor.py +1 -1
  42. uncountable/integration/http_server/types.py +3 -1
  43. uncountable/integration/job.py +35 -3
  44. uncountable/integration/queue_runner/command_server/__init__.py +4 -0
  45. uncountable/integration/queue_runner/command_server/command_client.py +89 -0
  46. uncountable/integration/queue_runner/command_server/command_server.py +117 -5
  47. uncountable/integration/queue_runner/command_server/constants.py +4 -0
  48. uncountable/integration/queue_runner/command_server/protocol/command_server.proto +51 -0
  49. uncountable/integration/queue_runner/command_server/protocol/command_server_pb2.py +34 -11
  50. uncountable/integration/queue_runner/command_server/protocol/command_server_pb2.pyi +102 -1
  51. uncountable/integration/queue_runner/command_server/protocol/command_server_pb2_grpc.py +180 -0
  52. uncountable/integration/queue_runner/command_server/types.py +44 -1
  53. uncountable/integration/queue_runner/datastore/datastore_sqlite.py +189 -8
  54. uncountable/integration/queue_runner/datastore/interface.py +13 -0
  55. uncountable/integration/queue_runner/datastore/model.py +8 -1
  56. uncountable/integration/queue_runner/job_scheduler.py +85 -21
  57. uncountable/integration/queue_runner/queue_runner.py +10 -2
  58. uncountable/integration/queue_runner/types.py +2 -0
  59. uncountable/integration/queue_runner/worker.py +28 -29
  60. uncountable/integration/scheduler.py +121 -23
  61. uncountable/integration/server.py +36 -6
  62. uncountable/integration/telemetry.py +129 -8
  63. uncountable/integration/webhook_server/entrypoint.py +2 -0
  64. uncountable/types/__init__.py +38 -0
  65. uncountable/types/api/entity/create_or_update_entity.py +1 -0
  66. uncountable/types/api/entity/export_entities.py +13 -0
  67. uncountable/types/api/entity/list_aggregate.py +79 -0
  68. uncountable/types/api/entity/list_entities.py +25 -0
  69. uncountable/types/api/entity/set_barcode.py +43 -0
  70. uncountable/types/api/entity/transition_entity_phase.py +2 -1
  71. uncountable/types/api/files/download_file.py +15 -1
  72. uncountable/types/api/integrations/__init__.py +1 -0
  73. uncountable/types/api/integrations/publish_realtime_data.py +41 -0
  74. uncountable/types/api/integrations/push_notification.py +49 -0
  75. uncountable/types/api/integrations/register_sockets_token.py +41 -0
  76. uncountable/types/api/listing/__init__.py +1 -0
  77. uncountable/types/api/listing/fetch_listing.py +57 -0
  78. uncountable/types/api/notebooks/__init__.py +1 -0
  79. uncountable/types/api/notebooks/add_notebook_content.py +119 -0
  80. uncountable/types/api/outputs/get_output_organization.py +173 -0
  81. uncountable/types/api/recipes/edit_recipe_inputs.py +1 -1
  82. uncountable/types/api/recipes/get_recipe_output_metadata.py +2 -2
  83. uncountable/types/api/recipes/get_recipes_data.py +29 -0
  84. uncountable/types/api/recipes/lock_recipes.py +2 -1
  85. uncountable/types/api/recipes/set_recipe_total.py +59 -0
  86. uncountable/types/api/recipes/unlock_recipes.py +2 -1
  87. uncountable/types/api/runsheet/export_default_runsheet.py +44 -0
  88. uncountable/types/api/uploader/complete_async_parse.py +46 -0
  89. uncountable/types/api/user/__init__.py +1 -0
  90. uncountable/types/api/user/get_current_user_info.py +40 -0
  91. uncountable/types/async_batch_processor.py +266 -0
  92. uncountable/types/async_batch_t.py +5 -0
  93. uncountable/types/client_base.py +432 -2
  94. uncountable/types/client_config.py +1 -0
  95. uncountable/types/client_config_t.py +10 -0
  96. uncountable/types/entity_t.py +9 -1
  97. uncountable/types/exports_t.py +1 -0
  98. uncountable/types/integration_server_t.py +2 -0
  99. uncountable/types/integration_session.py +10 -0
  100. uncountable/types/integration_session_t.py +60 -0
  101. uncountable/types/integrations.py +10 -0
  102. uncountable/types/integrations_t.py +62 -0
  103. uncountable/types/listing.py +46 -0
  104. uncountable/types/listing_t.py +533 -0
  105. uncountable/types/notices.py +8 -0
  106. uncountable/types/notices_t.py +37 -0
  107. uncountable/types/notifications.py +11 -0
  108. uncountable/types/notifications_t.py +74 -0
  109. uncountable/types/queued_job.py +2 -0
  110. uncountable/types/queued_job_t.py +20 -2
  111. uncountable/types/sockets.py +20 -0
  112. uncountable/types/sockets_t.py +169 -0
  113. uncountable/types/uploader.py +24 -0
  114. uncountable/types/uploader_t.py +222 -0
  115. {uncountablepythonsdk-0.0.115.dist-info → uncountablepythonsdk-0.0.142.dev0.dist-info}/METADATA +5 -2
  116. {uncountablepythonsdk-0.0.115.dist-info → uncountablepythonsdk-0.0.142.dev0.dist-info}/RECORD +118 -79
  117. docs/quickstart.md +0 -19
  118. {uncountablepythonsdk-0.0.115.dist-info → uncountablepythonsdk-0.0.142.dev0.dist-info}/WHEEL +0 -0
  119. {uncountablepythonsdk-0.0.115.dist-info → uncountablepythonsdk-0.0.142.dev0.dist-info}/top_level.txt +0 -0
@@ -1,19 +1,27 @@
1
1
  import asyncio
2
+ import os
2
3
  import sys
4
+ import threading
3
5
  import typing
4
6
  from concurrent.futures import ProcessPoolExecutor
5
7
  from dataclasses import dataclass
6
8
 
7
9
  from opentelemetry.trace import get_current_span
8
10
 
9
- from uncountable.integration.db.connect import IntegrationDBService, create_db_engine
10
- from uncountable.integration.db.session import get_session_maker
11
11
  from uncountable.integration.queue_runner.command_server import (
12
12
  CommandEnqueueJob,
13
13
  CommandEnqueueJobResponse,
14
14
  CommandQueue,
15
+ CommandRetryJob,
16
+ CommandRetryJobResponse,
15
17
  CommandTask,
16
18
  )
19
+ from uncountable.integration.queue_runner.command_server.types import (
20
+ CommandCancelJob,
21
+ CommandCancelJobResponse,
22
+ CommandCancelJobStatus,
23
+ CommandVaccuumQueuedJobs,
24
+ )
17
25
  from uncountable.integration.queue_runner.datastore import DatastoreSqlite
18
26
  from uncountable.integration.queue_runner.datastore.interface import Datastore
19
27
  from uncountable.integration.queue_runner.worker import Worker
@@ -21,7 +29,7 @@ from uncountable.integration.scan_profiles import load_profiles
21
29
  from uncountable.integration.telemetry import Logger
22
30
  from uncountable.types import job_definition_t, queued_job_t
23
31
 
24
- from .types import ResultQueue, ResultTask
32
+ from .types import RESTART_EXIT_CODE, ResultQueue, ResultTask
25
33
 
26
34
  _MAX_JOB_WORKERS = 5
27
35
 
@@ -83,14 +91,11 @@ def _start_workers(
83
91
  return job_worker_lookup
84
92
 
85
93
 
86
- async def start_scheduler(command_queue: CommandQueue) -> None:
94
+ async def start_scheduler(
95
+ command_queue: CommandQueue, datastore: DatastoreSqlite
96
+ ) -> None:
87
97
  logger = Logger(get_current_span())
88
98
  result_queue: ResultQueue = asyncio.Queue()
89
- engine = create_db_engine(IntegrationDBService.RUNNER)
90
- session_maker = get_session_maker(engine)
91
-
92
- datastore = DatastoreSqlite(session_maker)
93
- datastore.setup(engine)
94
99
 
95
100
  with ProcessPoolExecutor(max_workers=_MAX_JOB_WORKERS) as process_pool:
96
101
  job_worker_lookup = _start_workers(
@@ -104,7 +109,9 @@ async def start_scheduler(command_queue: CommandQueue) -> None:
104
109
  worker = job_worker_lookup[queued_job.job_ref_name]
105
110
  except KeyError as e:
106
111
  logger.log_exception(e)
107
- datastore.remove_job_from_queue(queued_job.queued_job_uuid)
112
+ datastore.update_job_status(
113
+ queued_job.queued_job_uuid, queued_job_t.JobStatus.FAILED
114
+ )
108
115
  return
109
116
  await worker.listen_queue.put(queued_job)
110
117
 
@@ -119,17 +126,11 @@ async def start_scheduler(command_queue: CommandQueue) -> None:
119
126
  queued_job_t.InvocationContextManual,
120
127
  ),
121
128
  ):
122
- existing_queued_jobs = datastore.load_job_queue()
123
- duplicate_job = next(
124
- (
125
- job
126
- for job in existing_queued_jobs
127
- if job.job_ref_name == job_ref_name
128
- ),
129
- None,
129
+ existing_queued_job = datastore.get_next_queued_job_for_ref_name(
130
+ job_ref_name=job_ref_name
130
131
  )
131
- if duplicate_job is not None:
132
- return duplicate_job.queued_job_uuid
132
+ if existing_queued_job is not None:
133
+ return existing_queued_job.queued_job_uuid
133
134
  queued_job = datastore.add_job_to_queue(
134
135
  job_payload=payload,
135
136
  job_ref_name=job_ref_name,
@@ -146,6 +147,53 @@ async def start_scheduler(command_queue: CommandQueue) -> None:
146
147
  CommandEnqueueJobResponse(queued_job_uuid=queued_job_uuid)
147
148
  )
148
149
 
150
+ async def _handle_cancel_job_command(command: CommandCancelJob) -> None:
151
+ queued_job = datastore.get_queued_job(uuid=command.queued_job_uuid)
152
+ if queued_job is None:
153
+ await command.response_queue.put(
154
+ CommandCancelJobResponse(status=CommandCancelJobStatus.NO_JOB_FOUND)
155
+ )
156
+ return
157
+
158
+ if queued_job.status == queued_job_t.JobStatus.QUEUED:
159
+ datastore.remove_job_from_queue(command.queued_job_uuid)
160
+ await command.response_queue.put(
161
+ CommandCancelJobResponse(
162
+ status=CommandCancelJobStatus.CANCELLED_WITH_RESTART
163
+ )
164
+ )
165
+
166
+ def delayed_exit() -> None:
167
+ os._exit(RESTART_EXIT_CODE)
168
+
169
+ threading.Timer(interval=5, function=delayed_exit).start()
170
+
171
+ else:
172
+ await command.response_queue.put(
173
+ CommandCancelJobResponse(
174
+ status=CommandCancelJobStatus.JOB_ALREADY_COMPLETED
175
+ )
176
+ )
177
+
178
+ async def _handle_retry_job_command(command: CommandRetryJob) -> None:
179
+ queued_job = datastore.retry_job(command.queued_job_uuid)
180
+ if queued_job is None:
181
+ await command.response_queue.put(
182
+ CommandRetryJobResponse(queued_job_uuid=None)
183
+ )
184
+ return
185
+
186
+ await enqueue_queued_job(queued_job)
187
+ await command.response_queue.put(
188
+ CommandRetryJobResponse(queued_job_uuid=queued_job.queued_job_uuid)
189
+ )
190
+
191
+ def _handle_vaccuum_queued_jobs_command(
192
+ command: CommandVaccuumQueuedJobs,
193
+ ) -> None:
194
+ logger.log_info("Vaccuuming queued jobs...")
195
+ datastore.vaccuum_queued_jobs()
196
+
149
197
  for queued_job in queued_jobs:
150
198
  await enqueue_queued_job(queued_job)
151
199
 
@@ -162,10 +210,26 @@ async def start_scheduler(command_queue: CommandQueue) -> None:
162
210
  match command:
163
211
  case CommandEnqueueJob():
164
212
  await _handle_enqueue_job_command(command=command)
213
+ case CommandRetryJob():
214
+ await _handle_retry_job_command(command=command)
215
+ case CommandVaccuumQueuedJobs():
216
+ _handle_vaccuum_queued_jobs_command(command=command)
217
+ case CommandCancelJob():
218
+ await _handle_cancel_job_command(command=command)
165
219
  case _:
166
220
  typing.assert_never(command)
167
221
  command_task = asyncio.create_task(command_queue.get())
168
222
  elif task == result_task:
169
223
  queued_job_result = result_task.result()
170
- datastore.remove_job_from_queue(queued_job_result.queued_job_uuid)
224
+ match queued_job_result.job_result.success:
225
+ case True:
226
+ datastore.update_job_status(
227
+ queued_job_result.queued_job_uuid,
228
+ queued_job_t.JobStatus.SUCCESS,
229
+ )
230
+ case False:
231
+ datastore.update_job_status(
232
+ queued_job_result.queued_job_uuid,
233
+ queued_job_t.JobStatus.FAILED,
234
+ )
171
235
  result_task = asyncio.create_task(result_queue.get())
@@ -1,16 +1,24 @@
1
1
  import asyncio
2
2
 
3
+ from uncountable.integration.db.connect import IntegrationDBService, create_db_engine
4
+ from uncountable.integration.db.session import get_session_maker
3
5
  from uncountable.integration.queue_runner.command_server import serve
4
6
  from uncountable.integration.queue_runner.command_server.types import CommandQueue
7
+ from uncountable.integration.queue_runner.datastore import DatastoreSqlite
5
8
  from uncountable.integration.queue_runner.job_scheduler import start_scheduler
6
9
 
7
10
 
8
11
  async def queue_runner_loop() -> None:
9
12
  command_queue: CommandQueue = asyncio.Queue()
13
+ engine = create_db_engine(IntegrationDBService.RUNNER)
14
+ session_maker = get_session_maker(engine)
10
15
 
11
- command_server = asyncio.create_task(serve(command_queue))
16
+ datastore = DatastoreSqlite(session_maker)
17
+ datastore.setup(engine)
12
18
 
13
- scheduler = asyncio.create_task(start_scheduler(command_queue))
19
+ command_server = asyncio.create_task(serve(command_queue, datastore))
20
+
21
+ scheduler = asyncio.create_task(start_scheduler(command_queue, datastore))
14
22
 
15
23
  await scheduler
16
24
  await command_server
@@ -5,3 +5,5 @@ from uncountable.types import queued_job_t
5
5
  ListenQueue = Queue[queued_job_t.QueuedJob]
6
6
  ResultQueue = Queue[queued_job_t.QueuedJobResult]
7
7
  ResultTask = Task[queued_job_t.QueuedJobResult]
8
+
9
+ RESTART_EXIT_CODE = 147
@@ -87,33 +87,32 @@ def run_queued_job(
87
87
  base_span=span,
88
88
  profile_metadata=job_details.profile_metadata,
89
89
  job_definition=job_details.job_definition,
90
+ queued_job_uuid=queued_job.queued_job_uuid,
90
91
  )
91
- try:
92
- client = construct_uncountable_client(
93
- profile_meta=job_details.profile_metadata, logger=job_logger
94
- )
95
- batch_processor = AsyncBatchProcessor(client=client)
96
-
97
- payload = _resolve_queued_job_payload(queued_job)
98
-
99
- args = JobArguments(
100
- job_definition=job_details.job_definition,
101
- client=client,
102
- batch_processor=batch_processor,
103
- profile_metadata=job_details.profile_metadata,
104
- logger=job_logger,
105
- payload=payload,
106
- )
107
-
108
- return execute_job(
109
- args=args,
110
- profile_metadata=job_details.profile_metadata,
111
- job_definition=job_details.job_definition,
112
- job_uuid=queued_job.queued_job_uuid,
113
- )
114
- except Exception as e:
115
- job_logger.log_exception(e)
116
- return job_definition_t.JobResult(success=False)
117
- except BaseException as e:
118
- job_logger.log_exception(e)
119
- raise e
92
+ with job_logger.resource_tracking():
93
+ try:
94
+ client = construct_uncountable_client(
95
+ profile_meta=job_details.profile_metadata, logger=job_logger
96
+ )
97
+ batch_processor = AsyncBatchProcessor(client=client)
98
+
99
+ payload = _resolve_queued_job_payload(queued_job)
100
+
101
+ args = JobArguments(
102
+ job_definition=job_details.job_definition,
103
+ client=client,
104
+ batch_processor=batch_processor,
105
+ profile_metadata=job_details.profile_metadata,
106
+ logger=job_logger,
107
+ payload=payload,
108
+ job_uuid=queued_job.queued_job_uuid,
109
+ )
110
+
111
+ return execute_job(
112
+ args=args,
113
+ profile_metadata=job_details.profile_metadata,
114
+ job_definition=job_details.job_definition,
115
+ )
116
+ except BaseException as e:
117
+ job_logger.log_exception(e)
118
+ return job_definition_t.JobResult(success=False)
@@ -5,6 +5,8 @@ import sys
5
5
  import time
6
6
  from dataclasses import dataclass
7
7
  from datetime import UTC
8
+ from enum import StrEnum
9
+ from typing import assert_never
8
10
 
9
11
  from opentelemetry.trace import get_current_span
10
12
 
@@ -15,15 +17,24 @@ from uncountable.integration.queue_runner.command_server import (
15
17
  check_health,
16
18
  )
17
19
  from uncountable.integration.queue_runner.queue_runner import start_queue_runner
20
+ from uncountable.integration.queue_runner.types import RESTART_EXIT_CODE
18
21
  from uncountable.integration.telemetry import Logger
19
22
 
20
23
  SHUTDOWN_TIMEOUT_SECS = 30
21
24
 
25
+ AnyProcess = multiprocessing.Process | subprocess.Popen[bytes]
26
+
27
+
28
+ class ProcessName(StrEnum):
29
+ QUEUE_RUNNER = "queue_runner"
30
+ CRON_SERVER = "cron_server"
31
+ UWSGI = "uwsgi"
32
+
22
33
 
23
34
  @dataclass(kw_only=True)
24
35
  class ProcessInfo:
25
- name: str
26
- process: multiprocessing.Process | subprocess.Popen[bytes]
36
+ name: ProcessName
37
+ process: AnyProcess
27
38
 
28
39
  @property
29
40
  def is_alive(self) -> bool:
@@ -46,14 +57,27 @@ class ProcessInfo:
46
57
  return self.process.poll()
47
58
 
48
59
 
49
- def handle_shutdown(logger: Logger, processes: list[ProcessInfo]) -> None:
60
+ @dataclass(kw_only=True)
61
+ class ProcessAlarmRestart:
62
+ process: ProcessInfo
63
+
64
+
65
+ @dataclass(kw_only=True)
66
+ class ProcessAlarmShutdownAll:
67
+ pass
68
+
69
+
70
+ ProcessAlarm = ProcessAlarmRestart | ProcessAlarmShutdownAll
71
+
72
+
73
+ def handle_shutdown(logger: Logger, processes: dict[ProcessName, ProcessInfo]) -> None:
50
74
  logger.log_info("received shutdown command, shutting down sub-processes")
51
- for proc_info in processes:
75
+ for proc_info in processes.values():
52
76
  if proc_info.is_alive:
53
77
  proc_info.process.terminate()
54
78
 
55
79
  shutdown_start = time.time()
56
- still_living_processes = processes
80
+ still_living_processes = list(processes.values())
57
81
  while (
58
82
  time.time() - shutdown_start < SHUTDOWN_TIMEOUT_SECS
59
83
  and len(still_living_processes) > 0
@@ -82,14 +106,59 @@ def handle_shutdown(logger: Logger, processes: list[ProcessInfo]) -> None:
82
106
  proc_info.process.kill()
83
107
 
84
108
 
85
- def check_process_alive(logger: Logger, processes: list[ProcessInfo]) -> None:
86
- for proc_info in processes:
109
+ def restart_process(
110
+ logger: Logger, proc_info: ProcessInfo, processes: dict[ProcessName, ProcessInfo]
111
+ ) -> None:
112
+ logger.log_error(
113
+ f"process {proc_info.name} shut down unexpectedly - exit code {proc_info.exitcode}. Restarting..."
114
+ )
115
+
116
+ match proc_info.name:
117
+ case ProcessName.QUEUE_RUNNER:
118
+ queue_proc = multiprocessing.Process(target=start_queue_runner)
119
+ queue_proc.start()
120
+ new_info = ProcessInfo(name=ProcessName.QUEUE_RUNNER, process=queue_proc)
121
+ processes[ProcessName.QUEUE_RUNNER] = new_info
122
+ try:
123
+ _wait_queue_runner_online()
124
+ logger.log_info("queue runner restarted successfully")
125
+ except Exception as e:
126
+ logger.log_exception(e)
127
+ logger.log_error(
128
+ "queue runner failed to restart, shutting down scheduler"
129
+ )
130
+ handle_shutdown(logger, processes)
131
+ sys.exit(1)
132
+
133
+ case ProcessName.CRON_SERVER:
134
+ cron_proc = multiprocessing.Process(target=cron_target)
135
+ cron_proc.start()
136
+ new_info = ProcessInfo(name=ProcessName.CRON_SERVER, process=cron_proc)
137
+ processes[ProcessName.CRON_SERVER] = new_info
138
+ logger.log_info("cron server restarted successfully")
139
+
140
+ case ProcessName.UWSGI:
141
+ uwsgi_proc: AnyProcess = subprocess.Popen(["uwsgi", "--die-on-term"])
142
+ new_info = ProcessInfo(name=ProcessName.UWSGI, process=uwsgi_proc)
143
+ processes[ProcessName.UWSGI] = new_info
144
+ logger.log_info("uwsgi restarted successfully")
145
+
146
+
147
+ def check_process_alarms(
148
+ logger: Logger, processes: dict[ProcessName, ProcessInfo]
149
+ ) -> ProcessAlarm | None:
150
+ for proc_info in processes.values():
87
151
  if not proc_info.is_alive:
152
+ if proc_info.exitcode == RESTART_EXIT_CODE:
153
+ logger.log_warning(
154
+ f"process {proc_info.name} requested restart! restarting"
155
+ )
156
+ return ProcessAlarmRestart(process=proc_info)
88
157
  logger.log_error(
89
158
  f"process {proc_info.name} shut down unexpectedly! shutting down scheduler; exit code is {proc_info.exitcode}"
90
159
  )
91
- handle_shutdown(logger, processes)
92
- sys.exit(1)
160
+ return ProcessAlarmShutdownAll()
161
+ return None
93
162
 
94
163
 
95
164
  def _wait_queue_runner_online() -> None:
@@ -113,36 +182,65 @@ def _wait_queue_runner_online() -> None:
113
182
 
114
183
  def main() -> None:
115
184
  logger = Logger(get_current_span())
116
- processes: list[ProcessInfo] = []
185
+ processes: dict[ProcessName, ProcessInfo] = {}
186
+
187
+ multiprocessing.set_start_method("forkserver")
117
188
 
118
189
  def add_process(process: ProcessInfo) -> None:
119
- processes.append(process)
190
+ processes[process.name] = process
120
191
  logger.log_info(f"started process {process.name}")
121
192
 
122
- runner_process = multiprocessing.Process(target=start_queue_runner)
123
- runner_process.start()
124
- add_process(ProcessInfo(name="queue runner", process=runner_process))
193
+ def _start_queue_runner() -> None:
194
+ runner_process = multiprocessing.Process(target=start_queue_runner)
195
+ runner_process.start()
196
+ add_process(
197
+ ProcessInfo(
198
+ name=ProcessName.QUEUE_RUNNER,
199
+ process=runner_process,
200
+ )
201
+ )
125
202
 
126
- try:
127
- _wait_queue_runner_online()
128
- except Exception as e:
129
- logger.log_exception(e)
130
- handle_shutdown(logger, processes=processes)
131
- return
203
+ try:
204
+ _wait_queue_runner_online()
205
+ except Exception as e:
206
+ logger.log_exception(e)
207
+ handle_shutdown(logger, processes=processes)
208
+ return
209
+
210
+ _start_queue_runner()
132
211
 
133
212
  cron_process = multiprocessing.Process(target=cron_target)
134
213
  cron_process.start()
135
- add_process(ProcessInfo(name="cron server", process=cron_process))
214
+ add_process(ProcessInfo(name=ProcessName.CRON_SERVER, process=cron_process))
136
215
 
137
216
  uwsgi_process = subprocess.Popen([
138
217
  "uwsgi",
139
218
  "--die-on-term",
140
219
  ])
141
- add_process(ProcessInfo(name="uwsgi", process=uwsgi_process))
220
+ add_process(ProcessInfo(name=ProcessName.UWSGI, process=uwsgi_process))
142
221
 
143
222
  try:
144
223
  while True:
145
- check_process_alive(logger, processes=processes)
224
+ process_alarm = check_process_alarms(logger, processes=processes)
225
+ match process_alarm:
226
+ case ProcessAlarmRestart():
227
+ match process_alarm.process.name:
228
+ case ProcessName.QUEUE_RUNNER:
229
+ del processes[ProcessName.QUEUE_RUNNER]
230
+ _start_queue_runner()
231
+ case ProcessName.CRON_SERVER | ProcessName.UWSGI:
232
+ raise NotImplementedError(
233
+ f"restarting {process_alarm.process.name} not yet implemented"
234
+ )
235
+ case _:
236
+ assert_never(process_alarm.process.name)
237
+ case ProcessAlarmShutdownAll():
238
+ handle_shutdown(logger, processes)
239
+ sys.exit(1)
240
+ case None:
241
+ pass
242
+ case _:
243
+ assert_never(process_alarm)
146
244
  time.sleep(1)
147
245
  except KeyboardInterrupt:
148
246
  handle_shutdown(logger, processes=processes)
@@ -11,7 +11,11 @@ from apscheduler.triggers.cron import CronTrigger
11
11
  from opentelemetry.trace import get_current_span
12
12
  from sqlalchemy.engine.base import Engine
13
13
 
14
+ from uncountable.core.environment import get_local_admin_server_port
14
15
  from uncountable.integration.cron import CronJobArgs, cron_job_executor
16
+ from uncountable.integration.queue_runner.command_server.command_client import (
17
+ send_vaccuum_queued_jobs_message,
18
+ )
15
19
  from uncountable.integration.telemetry import Logger
16
20
  from uncountable.types import base_t, job_definition_t
17
21
  from uncountable.types.job_definition_t import (
@@ -21,6 +25,14 @@ from uncountable.types.job_definition_t import (
21
25
 
22
26
  _MAX_APSCHEDULER_CONCURRENT_JOBS = 1
23
27
 
28
+ VACCUUM_QUEUED_JOBS_JOB_ID = "vacuum_queued_jobs"
29
+
30
+ STATIC_JOB_IDS = {VACCUUM_QUEUED_JOBS_JOB_ID}
31
+
32
+
33
+ def vaccuum_queued_jobs() -> None:
34
+ send_vaccuum_queued_jobs_message(port=get_local_admin_server_port())
35
+
24
36
 
25
37
  class IntegrationServer:
26
38
  _scheduler: BaseScheduler
@@ -36,11 +48,27 @@ class IntegrationServer:
36
48
  )
37
49
  self._server_logger = Logger(get_current_span())
38
50
 
51
+ def _register_static_jobs(self) -> None:
52
+ all_job_ids = {job.id for job in self._scheduler.get_jobs()}
53
+ if VACCUUM_QUEUED_JOBS_JOB_ID in all_job_ids:
54
+ self._scheduler.remove_job(VACCUUM_QUEUED_JOBS_JOB_ID)
55
+
56
+ self._scheduler.add_job(
57
+ vaccuum_queued_jobs,
58
+ max_instances=1,
59
+ coalesce=True,
60
+ trigger=CronTrigger.from_crontab("5 4 * * 4"),
61
+ name="Vaccuum queued jobs",
62
+ id=VACCUUM_QUEUED_JOBS_JOB_ID,
63
+ kwargs={},
64
+ misfire_grace_time=None,
65
+ )
66
+
39
67
  def register_jobs(self, profiles: list[job_definition_t.ProfileMetadata]) -> None:
40
- valid_job_ids = []
68
+ valid_job_ids: set[str] = set()
41
69
  for profile_metadata in profiles:
42
70
  for job_defn in profile_metadata.jobs:
43
- valid_job_ids.append(job_defn.id)
71
+ valid_job_ids.add(job_defn.id)
44
72
  match job_defn:
45
73
  case CronJobDefinition():
46
74
  # Add to ap scheduler
@@ -90,10 +118,11 @@ class IntegrationServer:
90
118
  pass
91
119
  case _:
92
120
  assert_never(job_defn)
93
- all_jobs = self._scheduler.get_jobs()
94
- for job in all_jobs:
95
- if job.id not in valid_job_ids:
96
- self._scheduler.remove_job(job.id)
121
+ all_job_ids = {job.id for job in self._scheduler.get_jobs()}
122
+ invalid_job_ids = all_job_ids.difference(valid_job_ids.union(STATIC_JOB_IDS))
123
+
124
+ for job_id in invalid_job_ids:
125
+ self._scheduler.remove_job(job_id)
97
126
 
98
127
  def serve_forever(self) -> None:
99
128
  signal.pause()
@@ -106,6 +135,7 @@ class IntegrationServer:
106
135
 
107
136
  def __enter__(self) -> "IntegrationServer":
108
137
  self._start_apscheduler()
138
+ self._register_static_jobs()
109
139
  return self
110
140
 
111
141
  def __exit__(