dbos 0.23.0a14__py3-none-any.whl → 0.24.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dbos might be problematic. Click here for more details.

dbos/__init__.py CHANGED
@@ -1,13 +1,14 @@
1
1
  from . import _error as error
2
2
  from ._context import DBOSContextEnsure, DBOSContextSetAuth, SetWorkflowID
3
3
  from ._dbos import DBOS, DBOSConfiguredInstance, WorkflowHandle, WorkflowStatus
4
- from ._dbos_config import ConfigFile, get_dbos_database_url, load_config
4
+ from ._dbos_config import ConfigFile, DBOSConfig, get_dbos_database_url, load_config
5
5
  from ._kafka_message import KafkaMessage
6
6
  from ._queue import Queue
7
7
  from ._sys_db import GetWorkflowsInput, WorkflowStatusString
8
8
 
9
9
  __all__ = [
10
10
  "ConfigFile",
11
+ "DBOSConfig",
11
12
  "DBOS",
12
13
  "DBOSConfiguredInstance",
13
14
  "DBOSContextEnsure",
dbos/_app_db.py CHANGED
@@ -5,7 +5,7 @@ import sqlalchemy.dialects.postgresql as pg
5
5
  from sqlalchemy.exc import DBAPIError
6
6
  from sqlalchemy.orm import Session, sessionmaker
7
7
 
8
- from ._dbos_config import ConfigFile
8
+ from ._dbos_config import ConfigFile, DatabaseConfig
9
9
  from ._error import DBOSWorkflowConflictIDError
10
10
  from ._schemas.application_database import ApplicationSchema
11
11
 
@@ -61,8 +61,22 @@ class ApplicationDatabase:
61
61
  port=config["database"]["port"],
62
62
  database=app_db_name,
63
63
  )
64
+
65
+ connect_args = {}
66
+ if (
67
+ "connectionTimeoutMillis" in config["database"]
68
+ and config["database"]["connectionTimeoutMillis"]
69
+ ):
70
+ connect_args["connect_timeout"] = int(
71
+ config["database"]["connectionTimeoutMillis"] / 1000
72
+ )
73
+
64
74
  self.engine = sa.create_engine(
65
- app_db_url, pool_size=20, max_overflow=5, pool_timeout=30
75
+ app_db_url,
76
+ pool_size=config["database"]["app_db_pool_size"],
77
+ max_overflow=0,
78
+ pool_timeout=30,
79
+ connect_args=connect_args,
66
80
  )
67
81
  self.sessionmaker = sessionmaker(bind=self.engine)
68
82
  self.debug_mode = debug_mode
@@ -1,9 +1,10 @@
1
+ import socket
1
2
  import threading
2
3
  import time
3
4
  import traceback
4
5
  from typing import TYPE_CHECKING, Optional
5
6
 
6
- from websockets import ConnectionClosed, ConnectionClosedOK
7
+ from websockets import ConnectionClosed, ConnectionClosedOK, InvalidStatus
7
8
  from websockets.sync.client import connect
8
9
  from websockets.sync.connection import Connection
9
10
 
@@ -33,27 +34,31 @@ class ConductorWebsocket(threading.Thread):
33
34
  def run(self) -> None:
34
35
  while not self.evt.is_set():
35
36
  try:
36
- with connect(self.url) as websocket:
37
+ with connect(
38
+ self.url, open_timeout=5, logger=self.dbos.logger
39
+ ) as websocket:
37
40
  self.websocket = websocket
38
41
  while not self.evt.is_set():
39
42
  message = websocket.recv()
40
43
  if not isinstance(message, str):
41
44
  self.dbos.logger.warning(
42
- "Receieved unexpected non-str message"
45
+ "Received unexpected non-str message"
43
46
  )
44
47
  continue
45
48
  base_message = p.BaseMessage.from_json(message)
46
- type = base_message.type
47
- if type == p.MessageType.EXECUTOR_INFO:
49
+ msg_type = base_message.type
50
+ error_message = None
51
+ if msg_type == p.MessageType.EXECUTOR_INFO:
48
52
  info_response = p.ExecutorInfoResponse(
49
53
  type=p.MessageType.EXECUTOR_INFO,
50
54
  request_id=base_message.request_id,
51
55
  executor_id=GlobalParams.executor_id,
52
56
  application_version=GlobalParams.app_version,
57
+ hostname=socket.gethostname(),
53
58
  )
54
59
  websocket.send(info_response.to_json())
55
60
  self.dbos.logger.info("Connected to DBOS conductor")
56
- elif type == p.MessageType.RECOVERY:
61
+ elif msg_type == p.MessageType.RECOVERY:
57
62
  recovery_message = p.RecoveryRequest.from_json(message)
58
63
  success = True
59
64
  try:
@@ -61,83 +66,89 @@ class ConductorWebsocket(threading.Thread):
61
66
  recovery_message.executor_ids
62
67
  )
63
68
  except Exception as e:
64
- self.dbos.logger.error(
65
- f"Exception encountered when recovering workflows: {traceback.format_exc()}"
66
- )
69
+ error_message = f"Exception encountered when recovering workflows: {traceback.format_exc()}"
70
+ self.dbos.logger.error(error_message)
67
71
  success = False
68
72
  recovery_response = p.RecoveryResponse(
69
73
  type=p.MessageType.RECOVERY,
70
74
  request_id=base_message.request_id,
71
75
  success=success,
76
+ error_message=error_message,
72
77
  )
73
78
  websocket.send(recovery_response.to_json())
74
- elif type == p.MessageType.CANCEL:
79
+ elif msg_type == p.MessageType.CANCEL:
75
80
  cancel_message = p.CancelRequest.from_json(message)
76
81
  success = True
77
82
  try:
78
83
  self.dbos.cancel_workflow(cancel_message.workflow_id)
79
84
  except Exception as e:
80
- self.dbos.logger.error(
81
- f"Exception encountered when cancelling workflow {cancel_message.workflow_id}: {traceback.format_exc()}"
82
- )
85
+ error_message = f"Exception encountered when cancelling workflow {cancel_message.workflow_id}: {traceback.format_exc()}"
86
+ self.dbos.logger.error(error_message)
83
87
  success = False
84
88
  cancel_response = p.CancelResponse(
85
89
  type=p.MessageType.CANCEL,
86
90
  request_id=base_message.request_id,
87
91
  success=success,
92
+ error_message=error_message,
88
93
  )
89
94
  websocket.send(cancel_response.to_json())
90
- elif type == p.MessageType.RESUME:
95
+ elif msg_type == p.MessageType.RESUME:
91
96
  resume_message = p.ResumeRequest.from_json(message)
92
97
  success = True
93
98
  try:
94
99
  self.dbos.resume_workflow(resume_message.workflow_id)
95
100
  except Exception as e:
96
- self.dbos.logger.error(
97
- f"Exception encountered when resuming workflow {resume_message.workflow_id}: {traceback.format_exc()}"
98
- )
101
+ error_message = f"Exception encountered when resuming workflow {resume_message.workflow_id}: {traceback.format_exc()}"
102
+ self.dbos.logger.error(error_message)
99
103
  success = False
100
104
  resume_response = p.ResumeResponse(
101
105
  type=p.MessageType.RESUME,
102
106
  request_id=base_message.request_id,
103
107
  success=success,
108
+ error_message=error_message,
104
109
  )
105
110
  websocket.send(resume_response.to_json())
106
- elif type == p.MessageType.RESTART:
111
+ elif msg_type == p.MessageType.RESTART:
107
112
  restart_message = p.RestartRequest.from_json(message)
108
113
  success = True
109
114
  try:
110
115
  self.dbos.restart_workflow(restart_message.workflow_id)
111
116
  except Exception as e:
112
- self.dbos.logger.error(
113
- f"Exception encountered when restarting workflow {restart_message.workflow_id}: {traceback.format_exc()}"
114
- )
117
+ error_message = f"Exception encountered when restarting workflow {restart_message.workflow_id}: {traceback.format_exc()}"
118
+ self.dbos.logger.error(error_message)
115
119
  success = False
116
120
  restart_response = p.RestartResponse(
117
121
  type=p.MessageType.RESTART,
118
122
  request_id=base_message.request_id,
119
123
  success=success,
124
+ error_message=error_message,
120
125
  )
121
126
  websocket.send(restart_response.to_json())
122
- elif type == p.MessageType.LIST_WORKFLOWS:
127
+ elif msg_type == p.MessageType.LIST_WORKFLOWS:
123
128
  list_workflows_message = p.ListWorkflowsRequest.from_json(
124
129
  message
125
130
  )
126
131
  body = list_workflows_message.body
127
- infos = list_workflows(
128
- self.dbos._sys_db,
129
- workflow_ids=body["workflow_uuids"],
130
- user=body["authenticated_user"],
131
- start_time=body["start_time"],
132
- end_time=body["end_time"],
133
- status=body["status"],
134
- request=False,
135
- app_version=body["application_version"],
136
- name=body["workflow_name"],
137
- limit=body["limit"],
138
- offset=body["offset"],
139
- sort_desc=body["sort_desc"],
140
- )
132
+ infos = []
133
+ try:
134
+ infos = list_workflows(
135
+ self.dbos._sys_db,
136
+ workflow_ids=body["workflow_uuids"],
137
+ user=body["authenticated_user"],
138
+ start_time=body["start_time"],
139
+ end_time=body["end_time"],
140
+ status=body["status"],
141
+ request=False,
142
+ app_version=body["application_version"],
143
+ name=body["workflow_name"],
144
+ limit=body["limit"],
145
+ offset=body["offset"],
146
+ sort_desc=body["sort_desc"],
147
+ )
148
+ except Exception as e:
149
+ error_message = f"Exception encountered when listing workflows: {traceback.format_exc()}"
150
+ self.dbos.logger.error(error_message)
151
+
141
152
  list_workflows_response = p.ListWorkflowsResponse(
142
153
  type=p.MessageType.LIST_WORKFLOWS,
143
154
  request_id=base_message.request_id,
@@ -145,25 +156,32 @@ class ConductorWebsocket(threading.Thread):
145
156
  p.WorkflowsOutput.from_workflow_information(i)
146
157
  for i in infos
147
158
  ],
159
+ error_message=error_message,
148
160
  )
149
161
  websocket.send(list_workflows_response.to_json())
150
- elif type == p.MessageType.LIST_QUEUED_WORKFLOWS:
162
+ elif msg_type == p.MessageType.LIST_QUEUED_WORKFLOWS:
151
163
  list_queued_workflows_message = (
152
164
  p.ListQueuedWorkflowsRequest.from_json(message)
153
165
  )
154
166
  q_body = list_queued_workflows_message.body
155
- infos = list_queued_workflows(
156
- self.dbos._sys_db,
157
- start_time=q_body["start_time"],
158
- end_time=q_body["end_time"],
159
- status=q_body["status"],
160
- request=False,
161
- name=q_body["workflow_name"],
162
- limit=q_body["limit"],
163
- offset=q_body["offset"],
164
- queue_name=q_body["queue_name"],
165
- sort_desc=q_body["sort_desc"],
166
- )
167
+ infos = []
168
+ try:
169
+ infos = list_queued_workflows(
170
+ self.dbos._sys_db,
171
+ start_time=q_body["start_time"],
172
+ end_time=q_body["end_time"],
173
+ status=q_body["status"],
174
+ request=False,
175
+ name=q_body["workflow_name"],
176
+ limit=q_body["limit"],
177
+ offset=q_body["offset"],
178
+ queue_name=q_body["queue_name"],
179
+ sort_desc=q_body["sort_desc"],
180
+ )
181
+ except Exception as e:
182
+ error_message = f"Exception encountered when listing queued workflows: {traceback.format_exc()}"
183
+ self.dbos.logger.error(error_message)
184
+
167
185
  list_queued_workflows_response = (
168
186
  p.ListQueuedWorkflowsResponse(
169
187
  type=p.MessageType.LIST_QUEUED_WORKFLOWS,
@@ -172,18 +190,25 @@ class ConductorWebsocket(threading.Thread):
172
190
  p.WorkflowsOutput.from_workflow_information(i)
173
191
  for i in infos
174
192
  ],
193
+ error_message=error_message,
175
194
  )
176
195
  )
177
196
  websocket.send(list_queued_workflows_response.to_json())
178
- elif type == p.MessageType.GET_WORKFLOW:
197
+ elif msg_type == p.MessageType.GET_WORKFLOW:
179
198
  get_workflow_message = p.GetWorkflowRequest.from_json(
180
199
  message
181
200
  )
182
- info = get_workflow(
183
- self.dbos._sys_db,
184
- get_workflow_message.workflow_id,
185
- getRequest=False,
186
- )
201
+ info = None
202
+ try:
203
+ info = get_workflow(
204
+ self.dbos._sys_db,
205
+ get_workflow_message.workflow_id,
206
+ getRequest=False,
207
+ )
208
+ except Exception as e:
209
+ error_message = f"Exception encountered when getting workflow {get_workflow_message.workflow_id}: {traceback.format_exc()}"
210
+ self.dbos.logger.error(error_message)
211
+
187
212
  get_workflow_response = p.GetWorkflowResponse(
188
213
  type=p.MessageType.GET_WORKFLOW,
189
214
  request_id=base_message.request_id,
@@ -192,10 +217,43 @@ class ConductorWebsocket(threading.Thread):
192
217
  if info is not None
193
218
  else None
194
219
  ),
220
+ error_message=error_message,
195
221
  )
196
222
  websocket.send(get_workflow_response.to_json())
223
+ elif msg_type == p.MessageType.EXIST_PENDING_WORKFLOWS:
224
+ exist_pending_workflows_message = (
225
+ p.ExistPendingWorkflowsRequest.from_json(message)
226
+ )
227
+ pending_wfs = []
228
+ try:
229
+ pending_wfs = self.dbos._sys_db.get_pending_workflows(
230
+ exist_pending_workflows_message.executor_id,
231
+ exist_pending_workflows_message.application_version,
232
+ )
233
+ except Exception as e:
234
+ error_message = f"Exception encountered when checking for pending workflows: {traceback.format_exc()}"
235
+ self.dbos.logger.error(error_message)
236
+
237
+ exist_pending_workflows_response = (
238
+ p.ExistPendingWorkflowsResponse(
239
+ type=p.MessageType.EXIST_PENDING_WORKFLOWS,
240
+ request_id=base_message.request_id,
241
+ exist=len(pending_wfs) > 0,
242
+ error_message=error_message,
243
+ )
244
+ )
245
+ websocket.send(exist_pending_workflows_response.to_json())
197
246
  else:
198
- self.dbos.logger.warning(f"Unexpected message type: {type}")
247
+ self.dbos.logger.warning(
248
+ f"Unexpected message type: {msg_type}"
249
+ )
250
+ unknown_message = p.BaseResponse(
251
+ request_id=base_message.request_id,
252
+ type=msg_type,
253
+ error_message="Unknown message type",
254
+ )
255
+ # Still need to send a response to the conductor
256
+ websocket.send(unknown_message.to_json())
199
257
  except ConnectionClosedOK:
200
258
  self.dbos.logger.info("Conductor connection terminated")
201
259
  break
@@ -205,6 +263,14 @@ class ConductorWebsocket(threading.Thread):
205
263
  )
206
264
  time.sleep(1)
207
265
  continue
266
+ except InvalidStatus as e:
267
+ # This happens when it cannot open a connection to the conductor. E.g., the conductor rejects the request
268
+ json_data = e.response.body.decode("utf-8")
269
+ self.dbos.logger.error(
270
+ f"Failed to connect to conductor. Retrying: {str(e) }. Details: {json_data}"
271
+ )
272
+ time.sleep(1)
273
+ continue
208
274
  except Exception as e:
209
275
  self.dbos.logger.error(
210
276
  f"Unexpected exception in connection to conductor. Reconnecting: {e}"
@@ -15,6 +15,7 @@ class MessageType(str, Enum):
15
15
  RESUME = "resume"
16
16
  RESTART = "restart"
17
17
  GET_WORKFLOW = "get_workflow"
18
+ EXIST_PENDING_WORKFLOWS = "exist_pending_workflows"
18
19
 
19
20
 
20
21
  T = TypeVar("T", bound="BaseMessage")
@@ -44,6 +45,11 @@ class BaseMessage:
44
45
  return json.dumps(dict_data)
45
46
 
46
47
 
48
+ @dataclass
49
+ class BaseResponse(BaseMessage):
50
+ error_message: Optional[str] = None
51
+
52
+
47
53
  @dataclass
48
54
  class ExecutorInfoRequest(BaseMessage):
49
55
  pass
@@ -53,6 +59,8 @@ class ExecutorInfoRequest(BaseMessage):
53
59
  class ExecutorInfoResponse(BaseMessage):
54
60
  executor_id: str
55
61
  application_version: str
62
+ hostname: Optional[str]
63
+ error_message: Optional[str] = None
56
64
 
57
65
 
58
66
  @dataclass
@@ -63,6 +71,7 @@ class RecoveryRequest(BaseMessage):
63
71
  @dataclass
64
72
  class RecoveryResponse(BaseMessage):
65
73
  success: bool
74
+ error_message: Optional[str] = None
66
75
 
67
76
 
68
77
  @dataclass
@@ -73,6 +82,7 @@ class CancelRequest(BaseMessage):
73
82
  @dataclass
74
83
  class CancelResponse(BaseMessage):
75
84
  success: bool
85
+ error_message: Optional[str] = None
76
86
 
77
87
 
78
88
  @dataclass
@@ -83,6 +93,7 @@ class ResumeRequest(BaseMessage):
83
93
  @dataclass
84
94
  class ResumeResponse(BaseMessage):
85
95
  success: bool
96
+ error_message: Optional[str] = None
86
97
 
87
98
 
88
99
  @dataclass
@@ -93,6 +104,7 @@ class RestartRequest(BaseMessage):
93
104
  @dataclass
94
105
  class RestartResponse(BaseMessage):
95
106
  success: bool
107
+ error_message: Optional[str] = None
96
108
 
97
109
 
98
110
  class ListWorkflowsBody(TypedDict):
@@ -126,6 +138,7 @@ class WorkflowsOutput:
126
138
  UpdatedAt: Optional[str]
127
139
  QueueName: Optional[str]
128
140
  ApplicationVersion: Optional[str]
141
+ ExecutorID: Optional[str]
129
142
 
130
143
  @classmethod
131
144
  def from_workflow_information(cls, info: WorkflowInformation) -> "WorkflowsOutput":
@@ -153,6 +166,7 @@ class WorkflowsOutput:
153
166
  UpdatedAt=updated_at_str,
154
167
  QueueName=info.queue_name,
155
168
  ApplicationVersion=info.app_version,
169
+ ExecutorID=info.executor_id,
156
170
  )
157
171
 
158
172
 
@@ -164,6 +178,7 @@ class ListWorkflowsRequest(BaseMessage):
164
178
  @dataclass
165
179
  class ListWorkflowsResponse(BaseMessage):
166
180
  output: List[WorkflowsOutput]
181
+ error_message: Optional[str] = None
167
182
 
168
183
 
169
184
  class ListQueuedWorkflowsBody(TypedDict):
@@ -185,6 +200,7 @@ class ListQueuedWorkflowsRequest(BaseMessage):
185
200
  @dataclass
186
201
  class ListQueuedWorkflowsResponse(BaseMessage):
187
202
  output: List[WorkflowsOutput]
203
+ error_message: Optional[str] = None
188
204
 
189
205
 
190
206
  @dataclass
@@ -195,3 +211,16 @@ class GetWorkflowRequest(BaseMessage):
195
211
  @dataclass
196
212
  class GetWorkflowResponse(BaseMessage):
197
213
  output: Optional[WorkflowsOutput]
214
+ error_message: Optional[str] = None
215
+
216
+
217
+ @dataclass
218
+ class ExistPendingWorkflowsRequest(BaseMessage):
219
+ executor_id: str
220
+ application_version: str
221
+
222
+
223
+ @dataclass
224
+ class ExistPendingWorkflowsResponse(BaseMessage):
225
+ exist: bool
226
+ error_message: Optional[str] = None