indexify 0.3.18__tar.gz → 0.3.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {indexify-0.3.18 → indexify-0.3.19}/PKG-INFO +1 -1
  2. {indexify-0.3.18 → indexify-0.3.19}/pyproject.toml +3 -1
  3. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/cli/cli.py +3 -17
  4. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/api_objects.py +12 -0
  5. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/downloader.py +4 -1
  6. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/executor.py +51 -29
  7. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/function_executor/function_executor.py +24 -11
  8. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/function_executor/function_executor_state.py +9 -1
  9. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/function_executor/function_executor_states_container.py +3 -1
  10. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/function_executor/function_executor_status.py +2 -0
  11. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py +6 -0
  12. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/function_executor/single_task_runner.py +15 -11
  13. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/function_executor/task_output.py +35 -2
  14. indexify-0.3.19/src/indexify/executor/grpc/completed_tasks_container.py +26 -0
  15. indexify-0.3.19/src/indexify/executor/grpc/function_executor_controller.py +421 -0
  16. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/grpc/state_reconciler.py +24 -34
  17. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/grpc/state_reporter.py +35 -32
  18. indexify-0.3.19/src/indexify/executor/grpc/task_controller.py +449 -0
  19. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/metrics/task_reporter.py +14 -0
  20. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/task_reporter.py +95 -4
  21. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/task_runner.py +1 -0
  22. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/proto/executor_api.proto +63 -5
  23. indexify-0.3.19/src/indexify/proto/executor_api_pb2.py +80 -0
  24. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/proto/executor_api_pb2.pyi +118 -3
  25. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/proto/executor_api_pb2_grpc.py +47 -0
  26. indexify-0.3.18/src/indexify/proto/executor_api_pb2.py +0 -70
  27. {indexify-0.3.18 → indexify-0.3.19}/README.md +0 -0
  28. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/README.md +0 -0
  29. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/executor_flavor.py +0 -0
  30. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/function_executor/health_checker.py +0 -0
  31. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/function_executor/invocation_state_client.py +0 -0
  32. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/function_executor/metrics/function_executor.py +0 -0
  33. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/function_executor/metrics/function_executor_state.py +0 -0
  34. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/function_executor/metrics/function_executor_state_container.py +0 -0
  35. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/function_executor/metrics/health_checker.py +0 -0
  36. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/function_executor/metrics/invocation_state_client.py +0 -0
  37. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/function_executor/metrics/single_task_runner.py +0 -0
  38. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/function_executor/server/client_configuration.py +0 -0
  39. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/function_executor/server/function_executor_server.py +0 -0
  40. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/function_executor/server/function_executor_server_factory.py +0 -0
  41. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/function_executor/server/subprocess_function_executor_server.py +0 -0
  42. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/function_executor/task_input.py +0 -0
  43. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/grpc/channel_manager.py +0 -0
  44. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/grpc/metrics/channel_manager.py +0 -0
  45. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/grpc/metrics/state_reporter.py +0 -0
  46. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/metrics/downloader.py +0 -0
  47. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/metrics/executor.py +0 -0
  48. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/metrics/task_fetcher.py +0 -0
  49. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/metrics/task_runner.py +0 -0
  50. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/monitoring/function_allowlist.py +0 -0
  51. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/monitoring/handler.py +0 -0
  52. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/monitoring/health_check_handler.py +0 -0
  53. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/monitoring/health_checker/generic_health_checker.py +0 -0
  54. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/monitoring/health_checker/health_checker.py +0 -0
  55. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/monitoring/metrics.py +0 -0
  56. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/monitoring/prometheus_metrics_handler.py +0 -0
  57. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/monitoring/server.py +0 -0
  58. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/monitoring/startup_probe_handler.py +0 -0
  59. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/runtime_probes.py +0 -0
  60. {indexify-0.3.18 → indexify-0.3.19}/src/indexify/executor/task_fetcher.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: indexify
3
- Version: 0.3.18
3
+ Version: 0.3.19
4
4
  Summary: Open Source Indexify components and helper tools
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
@@ -1,7 +1,7 @@
1
1
  [tool.poetry]
2
2
  name = "indexify"
3
3
  # Incremented if any of the components provided in this packages are updated.
4
- version = "0.3.18"
4
+ version = "0.3.19"
5
5
  description = "Open Source Indexify components and helper tools"
6
6
  authors = ["Tensorlake Inc. <support@tensorlake.ai>"]
7
7
  license = "Apache 2.0"
@@ -24,6 +24,8 @@ aiohttp = "^3.11.0"
24
24
  prometheus-client = "^0.21.1"
25
25
  # Adds function-executor binary and utils lib.
26
26
  tensorlake = ">=0.1"
27
+ # Uncomment the next line to use local tensorlake package (only for development!)
28
+ # tensorlake = { path = "../tensorlake", develop = true }
27
29
  # pydantic is provided by tensorlake
28
30
  # httpx-sse is provided by tensorlake
29
31
  # grpcio is provided by tensorlake
@@ -78,6 +78,7 @@ def build_image(
78
78
  )
79
79
  def executor(
80
80
  server_addr: str = "localhost:8900",
81
+ grpc_server_addr: str = "localhost:8901",
81
82
  dev: Annotated[
82
83
  bool, typer.Option("--dev", "-d", help="Run the executor in development mode")
83
84
  ] = False,
@@ -120,16 +121,6 @@ def executor(
120
121
  help="Port where to run Executor Monitoring server",
121
122
  ),
122
123
  ] = 7000,
123
- grpc_server_addr: Annotated[
124
- Optional[str],
125
- typer.Option(
126
- "--grpc-server-addr",
127
- help=(
128
- "(exprimental) Address of server gRPC API to connect to, e.g. 'localhost:8901'.\n"
129
- "Enables gRPC state reporter that will periodically report the state of the Function Executors to Server\n"
130
- ),
131
- ),
132
- ] = None,
133
124
  enable_grpc_state_reconciler: Annotated[
134
125
  bool,
135
126
  typer.Option(
@@ -166,11 +157,6 @@ def executor(
166
157
  "--executor-id should be at least 10 characters long and only include characters _-[0-9][a-z][A-Z]"
167
158
  )
168
159
 
169
- if enable_grpc_state_reconciler and grpc_server_addr is None:
170
- raise typer.BadParameter(
171
- "--grpc-server-addr must be set when --enable-grpc-state-reconciler is set"
172
- )
173
-
174
160
  kv_labels: Dict[str, str] = {}
175
161
  for label in labels:
176
162
  key, value = label.split("=")
@@ -183,6 +169,7 @@ def executor(
183
169
  "starting executor",
184
170
  hostname=gethostname(),
185
171
  server_addr=server_addr,
172
+ grpc_server_addr=grpc_server_addr,
186
173
  config_path=config_path,
187
174
  executor_version=executor_version,
188
175
  labels=kv_labels,
@@ -192,7 +179,6 @@ def executor(
192
179
  dev_mode=dev,
193
180
  monitoring_server_host=monitoring_server_host,
194
181
  monitoring_server_port=monitoring_server_port,
195
- grpc_server_addr=grpc_server_addr,
196
182
  enable_grpc_state_reconciler=enable_grpc_state_reconciler,
197
183
  )
198
184
 
@@ -231,10 +217,10 @@ def executor(
231
217
  server_ports=range(ports[0], ports[1]),
232
218
  ),
233
219
  server_addr=server_addr,
220
+ grpc_server_addr=grpc_server_addr,
234
221
  config_path=config_path,
235
222
  monitoring_server_host=monitoring_server_host,
236
223
  monitoring_server_port=monitoring_server_port,
237
- grpc_server_addr=grpc_server_addr,
238
224
  enable_grpc_state_reconciler=enable_grpc_state_reconciler,
239
225
  ).run()
240
226
 
@@ -49,5 +49,17 @@ class TaskResult(BaseModel):
49
49
  reducer: bool = False
50
50
 
51
51
 
52
+ class DataPayload(BaseModel):
53
+ path: str
54
+ size: int
55
+ sha256_hash: str
56
+
57
+
58
+ class IngestFnOutputsResponse(BaseModel):
59
+ data_payloads: List[DataPayload]
60
+ stdout: Optional[DataPayload] = None
61
+ stderr: Optional[DataPayload] = None
62
+
63
+
52
64
  TASK_OUTCOME_SUCCESS = "success"
53
65
  TASK_OUTCOME_FAILURE = "failure"
@@ -241,7 +241,10 @@ class Downloader:
241
241
  def serialized_object_from_http_response(response: httpx.Response) -> SerializedObject:
242
242
  # We're hardcoding the content type currently used by Python SDK. It might change in the future.
243
243
  # There's no other way for now to determine if the response is a bytes or string.
244
- if response.headers["content-type"] == "application/octet-stream":
244
+ if response.headers["content-type"] in [
245
+ "application/octet-stream",
246
+ "application/pickle",
247
+ ]:
245
248
  return SerializedObject(
246
249
  bytes=response.content, content_type=response.headers["content-type"]
247
250
  )
@@ -64,10 +64,10 @@ class Executor:
64
64
  function_allowlist: Optional[List[FunctionURI]],
65
65
  function_executor_server_factory: FunctionExecutorServerFactory,
66
66
  server_addr: str,
67
+ grpc_server_addr: str,
67
68
  config_path: Optional[str],
68
69
  monitoring_server_host: str,
69
70
  monitoring_server_port: int,
70
- grpc_server_addr: Optional[str],
71
71
  enable_grpc_state_reconciler: bool,
72
72
  ):
73
73
  self._logger = structlog.get_logger(module=__name__)
@@ -97,43 +97,40 @@ class Executor:
97
97
  self._downloader = Downloader(
98
98
  code_path=code_path, base_url=self._base_url, config_path=config_path
99
99
  )
100
+ self._function_allowlist: Optional[List[FunctionURI]] = function_allowlist
101
+ self._function_executor_server_factory = function_executor_server_factory
102
+ self._channel_manager = ChannelManager(
103
+ server_address=grpc_server_addr,
104
+ config_path=config_path,
105
+ logger=self._logger,
106
+ )
107
+ self._state_reporter = ExecutorStateReporter(
108
+ executor_id=id,
109
+ flavor=flavor,
110
+ version=version,
111
+ labels=labels,
112
+ development_mode=development_mode,
113
+ function_allowlist=self._function_allowlist,
114
+ function_executor_states=self._function_executor_states,
115
+ channel_manager=self._channel_manager,
116
+ logger=self._logger,
117
+ )
118
+ self._state_reporter.update_executor_status(
119
+ ExecutorStatus.EXECUTOR_STATUS_STARTING_UP
120
+ )
100
121
  self._task_reporter = TaskReporter(
101
122
  base_url=self._base_url,
102
123
  executor_id=id,
103
124
  config_path=config_path,
125
+ channel_manager=self._channel_manager,
104
126
  )
105
- self._function_allowlist: Optional[List[FunctionURI]] = function_allowlist
106
- self._function_executor_server_factory = function_executor_server_factory
107
127
 
108
- # HTTP mode services
128
+ # HTTP mode task runner
109
129
  self._task_runner: Optional[TaskRunner] = None
110
130
  self._task_fetcher: Optional[TaskFetcher] = None
111
- # gRPC mode services
112
- self._channel_manager: Optional[ChannelManager] = None
113
- self._state_reporter: Optional[ExecutorStateReporter] = None
131
+ # gRPC mode state reconciler that runs tasks
114
132
  self._state_reconciler: Optional[ExecutorStateReconciler] = None
115
133
 
116
- if grpc_server_addr is not None:
117
- self._channel_manager = ChannelManager(
118
- server_address=grpc_server_addr,
119
- config_path=config_path,
120
- logger=self._logger,
121
- )
122
- self._state_reporter = ExecutorStateReporter(
123
- executor_id=id,
124
- flavor=flavor,
125
- version=version,
126
- labels=labels,
127
- development_mode=development_mode,
128
- function_allowlist=self._function_allowlist,
129
- function_executor_states=self._function_executor_states,
130
- channel_manager=self._channel_manager,
131
- logger=self._logger,
132
- )
133
- self._state_reporter.update_executor_status(
134
- ExecutorStatus.EXECUTOR_STATUS_STARTING_UP
135
- )
136
-
137
134
  if enable_grpc_state_reconciler:
138
135
  self._state_reconciler = ExecutorStateReconciler(
139
136
  executor_id=id,
@@ -171,8 +168,8 @@ class Executor:
171
168
  "version": version,
172
169
  "code_path": str(code_path),
173
170
  "server_addr": server_addr,
174
- "config_path": str(config_path),
175
171
  "grpc_server_addr": str(grpc_server_addr),
172
+ "config_path": str(config_path),
176
173
  "enable_grpc_state_reconciler": str(enable_grpc_state_reconciler),
177
174
  "hostname": gethostname(),
178
175
  }
@@ -256,6 +253,9 @@ class Executor:
256
253
  )
257
254
  logger.error("task execution failed", exc_info=e)
258
255
 
256
+ if output.metrics is not None:
257
+ self.log_function_metrics(output)
258
+
259
259
  with (
260
260
  metric_tasks_reporting_outcome.track_inprogress(),
261
261
  metric_task_outcome_report_latency.time(),
@@ -265,6 +265,28 @@ class Executor:
265
265
 
266
266
  metric_task_completion_latency.observe(time.monotonic() - start_time)
267
267
 
268
+ def log_function_metrics(self, output: TaskOutput):
269
+ for counter_name, counter_value in output.metrics.counters.items():
270
+ self._logger.info(
271
+ f"function_metric",
272
+ counter_name=counter_name,
273
+ counter_value=counter_value,
274
+ invocation_id=output.graph_invocation_id,
275
+ function_name=output.function_name,
276
+ graph_name=output.graph_name,
277
+ namespace=output.namespace,
278
+ )
279
+ for timer_name, timer_value in output.metrics.timers.items():
280
+ self._logger.info(
281
+ f"function_metric",
282
+ timer_name=timer_name,
283
+ timer_value=timer_value,
284
+ invocation_id=output.graph_invocation_id,
285
+ function_name=output.function_name,
286
+ graph_name=output.graph_name,
287
+ namespace=output.namespace,
288
+ )
289
+
268
290
  async def _run_task_and_get_output(self, task: Task, logger: Any) -> TaskOutput:
269
291
  graph: SerializedObject = await self._downloader.download_graph(
270
292
  namespace=task.namespace,
@@ -88,6 +88,7 @@ class FunctionExecutor:
88
88
  initialize_request: InitializeRequest,
89
89
  base_url: str,
90
90
  config_path: Optional[str],
91
+ customer_code_timeout_sec: Optional[float] = None,
91
92
  ):
92
93
  """Creates and initializes a FunctionExecutorServer and all resources associated with it.
93
94
 
@@ -103,7 +104,9 @@ class FunctionExecutor:
103
104
  await self._establish_channel()
104
105
  stub: FunctionExecutorStub = FunctionExecutorStub(self._channel)
105
106
  await _collect_server_info(stub)
106
- await _initialize_server(stub, initialize_request)
107
+ await _initialize_server(
108
+ stub, initialize_request, customer_code_timeout_sec
109
+ )
107
110
  await self._create_invocation_state_client(
108
111
  stub=stub,
109
112
  base_url=base_url,
@@ -293,18 +296,28 @@ async def _collect_server_info(stub: FunctionExecutorStub) -> None:
293
296
 
294
297
 
295
298
  async def _initialize_server(
296
- stub: FunctionExecutorStub, initialize_request: InitializeRequest
299
+ stub: FunctionExecutorStub,
300
+ initialize_request: InitializeRequest,
301
+ customer_code_timeout_sec: Optional[float],
297
302
  ) -> None:
298
303
  with (
299
304
  metric_initialize_rpc_errors.count_exceptions(),
300
305
  metric_initialize_rpc_latency.time(),
301
306
  ):
302
- initialize_response: InitializeResponse = await stub.initialize(
303
- initialize_request
304
- )
305
- if initialize_response.success:
306
- return
307
- if initialize_response.HasField("customer_error"):
308
- raise CustomerError(initialize_response.customer_error)
309
- else:
310
- raise Exception("initialize RPC failed at function executor server")
307
+ try:
308
+ initialize_response: InitializeResponse = await stub.initialize(
309
+ initialize_request,
310
+ timeout=customer_code_timeout_sec,
311
+ )
312
+ if initialize_response.success:
313
+ return
314
+ if initialize_response.HasField("customer_error"):
315
+ raise CustomerError(initialize_response.customer_error)
316
+ else:
317
+ raise Exception("initialize RPC failed at function executor server")
318
+ except grpc.aio.AioRpcError as e:
319
+ if e.code() == grpc.StatusCode.DEADLINE_EXCEEDED:
320
+ raise CustomerError(
321
+ f"Customer code timeout {customer_code_timeout_sec} sec expired"
322
+ ) from e
323
+ raise
@@ -25,6 +25,7 @@ class FunctionExecutorState:
25
25
  graph_version: str,
26
26
  function_name: str,
27
27
  image_uri: Optional[str],
28
+ secret_names: List[str],
28
29
  logger: Any,
29
30
  ):
30
31
  # Read only fields.
@@ -33,6 +34,7 @@ class FunctionExecutorState:
33
34
  self.graph_name: str = graph_name
34
35
  self.function_name: str = function_name
35
36
  self.image_uri: Optional[str] = image_uri
37
+ self.secret_names: List[str] = secret_names
36
38
  self._logger: Any = logger.bind(
37
39
  module=__name__,
38
40
  function_executor_id=id,
@@ -47,6 +49,7 @@ class FunctionExecutorState:
47
49
  # TODO: Move graph_version to immutable fields once we migrate to gRPC State Reconciler.
48
50
  self.graph_version: str = graph_version
49
51
  self.status: FunctionExecutorStatus = FunctionExecutorStatus.DESTROYED
52
+ self.status_message: str = ""
50
53
  self.status_change_notifier: asyncio.Condition = asyncio.Condition(
51
54
  lock=self.lock
52
55
  )
@@ -62,7 +65,9 @@ class FunctionExecutorState:
62
65
  while self.status not in allowlist:
63
66
  await self.status_change_notifier.wait()
64
67
 
65
- async def set_status(self, new_status: FunctionExecutorStatus) -> None:
68
+ async def set_status(
69
+ self, new_status: FunctionExecutorStatus, status_message: str = ""
70
+ ) -> None:
66
71
  """Sets the status of the Function Executor.
67
72
 
68
73
  The caller must hold the lock.
@@ -70,6 +75,7 @@ class FunctionExecutorState:
70
75
  """
71
76
  self.check_locked()
72
77
  if is_status_change_allowed(self.status, new_status):
78
+ # If status didn't change then still log it for visibility.
73
79
  self._logger.info(
74
80
  "function executor status changed",
75
81
  old_status=self.status.name,
@@ -78,12 +84,14 @@ class FunctionExecutorState:
78
84
  metric_function_executors_with_status.labels(status=self.status.name).dec()
79
85
  metric_function_executors_with_status.labels(status=new_status.name).inc()
80
86
  self.status = new_status
87
+ self.status_message = status_message
81
88
  self.status_change_notifier.notify_all()
82
89
  else:
83
90
  raise ValueError(
84
91
  f"Invalid status change from {self.status} to {new_status}"
85
92
  )
86
93
 
94
+ # TODO: Delete this method once HTTP protocol is removed as it's used only there.
87
95
  async def destroy_function_executor(self) -> None:
88
96
  """Destroys the Function Executor if it exists.
89
97
 
@@ -1,5 +1,5 @@
1
1
  import asyncio
2
- from typing import Any, AsyncGenerator, Dict, Optional
2
+ from typing import Any, AsyncGenerator, Dict, List, Optional
3
3
 
4
4
  from .function_executor_state import FunctionExecutorState
5
5
  from .function_executor_status import FunctionExecutorStatus
@@ -26,6 +26,7 @@ class FunctionExecutorStatesContainer:
26
26
  graph_version: str,
27
27
  function_name: str,
28
28
  image_uri: Optional[str],
29
+ secret_names: List[str],
29
30
  ) -> FunctionExecutorState:
30
31
  """Get or create a function executor state with the given ID.
31
32
 
@@ -45,6 +46,7 @@ class FunctionExecutorStatesContainer:
45
46
  graph_version=graph_version,
46
47
  function_name=function_name,
47
48
  image_uri=image_uri,
49
+ secret_names=secret_names,
48
50
  logger=self._logger,
49
51
  )
50
52
  self._states[id] = state
@@ -23,6 +23,7 @@ class FunctionExecutorStatus(Enum):
23
23
  UNHEALTHY = "Unhealthy"
24
24
  # STARTUP_FAILED_CUSTOMER_ERROR -> DESTROYING
25
25
  # STARTUP_FAILED_PLATFORM_ERROR -> DESTROYING
26
+ # RUNNING_TASK -> DESTROYING
26
27
  # UNHEALTHY -> DESTROYING
27
28
  # IDLE -> DESTROYING
28
29
  DESTROYING = "Destroying"
@@ -69,6 +70,7 @@ def is_status_change_allowed(
69
70
  ],
70
71
  FunctionExecutorStatus.RUNNING_TASK: [
71
72
  FunctionExecutorStatus.RUNNING_TASK,
73
+ FunctionExecutorStatus.DESTROYING,
72
74
  FunctionExecutorStatus.IDLE,
73
75
  FunctionExecutorStatus.UNHEALTHY,
74
76
  FunctionExecutorStatus.SHUTDOWN,
@@ -25,6 +25,12 @@ class SubprocessFunctionExecutorServerFactory(FunctionExecutorServerFactory):
25
25
  logger = logger.bind(module=__name__)
26
26
  port: Optional[int] = None
27
27
 
28
+ if len(config.secret_names) > 0:
29
+ logger.warning(
30
+ "Subprocess Function Executor does not support secrets. Please supply secrets as environment variables.",
31
+ secret_names=config.secret_names,
32
+ )
33
+
28
34
  try:
29
35
  port = self._allocate_port()
30
36
  args = [
@@ -10,6 +10,7 @@ from tensorlake.function_executor.proto.function_executor_pb2 import (
10
10
  from tensorlake.function_executor.proto.function_executor_pb2_grpc import (
11
11
  FunctionExecutorStub,
12
12
  )
13
+ from tensorlake.function_executor.proto.message_validator import MessageValidator
13
14
 
14
15
  from ..api_objects import Task
15
16
  from .function_executor import CustomerError, FunctionExecutor
@@ -26,7 +27,7 @@ from .server.function_executor_server_factory import (
26
27
  FunctionExecutorServerFactory,
27
28
  )
28
29
  from .task_input import TaskInput
29
- from .task_output import TaskOutput
30
+ from .task_output import TaskMetrics, TaskOutput
30
31
 
31
32
 
32
33
  class SingleTaskRunner:
@@ -286,16 +287,17 @@ class _RunningTaskContextManager:
286
287
 
287
288
 
288
289
  def _task_output(task: Task, response: RunTaskResponse) -> TaskOutput:
289
- required_fields = [
290
- "stdout",
291
- "stderr",
292
- "is_reducer",
293
- "success",
294
- ]
295
-
296
- for field in required_fields:
297
- if not response.HasField(field):
298
- raise ValueError(f"Response is missing required field: {field}")
290
+ response_validator = MessageValidator(response)
291
+ response_validator.required_field("stdout")
292
+ response_validator.required_field("stderr")
293
+ response_validator.required_field("is_reducer")
294
+ response_validator.required_field("success")
295
+
296
+ metrics = TaskMetrics(counters={}, timers={})
297
+ if response.HasField("metrics"):
298
+ # Can be None if e.g. function failed.
299
+ metrics.counters = dict(response.metrics.counters)
300
+ metrics.timers = dict(response.metrics.timers)
299
301
 
300
302
  output = TaskOutput(
301
303
  task_id=task.id,
@@ -308,10 +310,12 @@ def _task_output(task: Task, response: RunTaskResponse) -> TaskOutput:
308
310
  stderr=response.stderr,
309
311
  reducer=response.is_reducer,
310
312
  success=response.success,
313
+ metrics=metrics,
311
314
  )
312
315
 
313
316
  if response.HasField("function_output"):
314
317
  output.function_output = response.function_output
318
+ output.output_encoding = response.function_output.output_encoding
315
319
  if response.HasField("router_output"):
316
320
  output.router_output = response.router_output
317
321
 
@@ -1,11 +1,17 @@
1
- from typing import Optional
1
+ from typing import Dict, Optional
2
2
 
3
3
  from tensorlake.function_executor.proto.function_executor_pb2 import (
4
4
  FunctionOutput,
5
5
  RouterOutput,
6
6
  )
7
7
 
8
- from ..api_objects import Task
8
+
9
+ class TaskMetrics:
10
+ """Metrics for a task."""
11
+
12
+ def __init__(self, counters: Dict[str, int], timers: Dict[str, float]):
13
+ self.counters = counters
14
+ self.timers = timers
9
15
 
10
16
 
11
17
  class TaskOutput:
@@ -19,6 +25,7 @@ class TaskOutput:
19
25
  function_name: str,
20
26
  graph_version: str,
21
27
  graph_invocation_id: str,
28
+ output_encoding: Optional[str] = None,
22
29
  function_output: Optional[FunctionOutput] = None,
23
30
  router_output: Optional[RouterOutput] = None,
24
31
  stdout: Optional[str] = None,
@@ -26,6 +33,7 @@ class TaskOutput:
26
33
  reducer: bool = False,
27
34
  success: bool = False,
28
35
  is_internal_error: bool = False,
36
+ metrics: Optional[TaskMetrics] = None,
29
37
  ):
30
38
  self.task_id = task_id
31
39
  self.namespace = namespace
@@ -40,6 +48,8 @@ class TaskOutput:
40
48
  self.reducer = reducer
41
49
  self.success = success
42
50
  self.is_internal_error = is_internal_error
51
+ self.metrics = metrics
52
+ self.output_encoding = output_encoding
43
53
 
44
54
  @classmethod
45
55
  def internal_error(
@@ -63,3 +73,26 @@ class TaskOutput:
63
73
  stderr="Platform failed to execute the function.",
64
74
  is_internal_error=True,
65
75
  )
76
+
77
+ @classmethod
78
+ def function_timeout(
79
+ cls,
80
+ task_id: str,
81
+ namespace: str,
82
+ graph_name: str,
83
+ function_name: str,
84
+ graph_version: str,
85
+ graph_invocation_id: str,
86
+ ) -> "TaskOutput":
87
+ """Creates a TaskOutput for an function timeout error."""
88
+ # Task stdout, stderr is not available.
89
+ return TaskOutput(
90
+ task_id=task_id,
91
+ namespace=namespace,
92
+ graph_name=graph_name,
93
+ function_name=function_name,
94
+ graph_version=graph_version,
95
+ graph_invocation_id=graph_invocation_id,
96
+ stderr="Function execution timed out.",
97
+ is_internal_error=False,
98
+ )
@@ -0,0 +1,26 @@
1
+ import asyncio
2
+ from typing import List, Set
3
+
4
+
5
+ class CompletedTasksContainer:
6
+ """An asyncio concurrent container for the completed task IDs."""
7
+
8
+ def __init__(self):
9
+ # The fields below are protected by the lock.
10
+ self._lock: asyncio.Lock = asyncio.Lock()
11
+ self._completed_task_ids: Set[str] = set()
12
+
13
+ async def add(self, task_id: str) -> None:
14
+ """Add a task to the container."""
15
+ async with self._lock:
16
+ self._completed_task_ids.add(task_id)
17
+
18
+ async def contains(self, task_id: str) -> bool:
19
+ """Check if the task is in the container."""
20
+ async with self._lock:
21
+ return task_id in self._completed_task_ids
22
+
23
+ async def replace(self, task_ids: List[str]) -> None:
24
+ """Replaces the task IDs with the supplied task IDs."""
25
+ async with self._lock:
26
+ self._completed_task_ids = set(task_ids)