indexify 0.4.9__py3-none-any.whl → 0.4.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -52,7 +52,7 @@ class Executor:
52
52
  blob_store: BLOBStore,
53
53
  host_resources_provider: HostResourcesProvider,
54
54
  ):
55
- self._logger = structlog.get_logger(module=__name__)
55
+ self._logger = structlog.get_logger(module=__name__, executor_id=id)
56
56
  protocol: str = "http"
57
57
  if config_path:
58
58
  self._logger.info("running the extractor with TLS enabled")
@@ -7,6 +7,8 @@ import grpc
7
7
  from tensorlake.function_executor.proto.function_executor_pb2 import (
8
8
  InfoRequest,
9
9
  InfoResponse,
10
+ InitializationFailureReason,
11
+ InitializationOutcomeCode,
10
12
  InitializeRequest,
11
13
  InitializeResponse,
12
14
  )
@@ -315,16 +317,42 @@ async def _initialize_server(
315
317
  initialize_request,
316
318
  timeout=customer_code_timeout_sec,
317
319
  )
318
- # TODO: set real stdout and stderr when their proper capturing on FE initialization is implemented.
319
- if initialize_response.success:
320
- return FunctionExecutorInitializationResult()
321
- elif initialize_response.HasField("customer_error"):
320
+
321
+ if (
322
+ initialize_response.outcome_code
323
+ == InitializationOutcomeCode.INITIALIZE_OUTCOME_CODE_SUCCESS
324
+ ):
322
325
  return FunctionExecutorInitializationResult(
323
- error=FunctionExecutorInitializationError.FUNCTION_ERROR,
324
- stderr=initialize_response.customer_error,
326
+ stdout=initialize_response.stdout, stderr=initialize_response.stderr
325
327
  )
328
+ elif (
329
+ initialize_response.outcome_code
330
+ == InitializationOutcomeCode.INITIALIZE_OUTCOME_CODE_FAILURE
331
+ ):
332
+ if (
333
+ initialize_response.failure_reason
334
+ == InitializationFailureReason.INITIALIZATION_FAILURE_REASON_FUNCTION_ERROR
335
+ ):
336
+ return FunctionExecutorInitializationResult(
337
+ error=FunctionExecutorInitializationError.FUNCTION_ERROR,
338
+ stdout=initialize_response.stdout,
339
+ stderr=initialize_response.stderr,
340
+ )
341
+ elif (
342
+ initialize_response.failure_reason
343
+ == InitializationFailureReason.INITIALIZATION_FAILURE_REASON_INTERNAL_ERROR
344
+ ):
345
+ # Don't add stdout/stderr because this is customer data.
346
+ raise RuntimeError("initialize RPC failed with internal error")
347
+ else:
348
+ raise ValueError(
349
+ f"unexpected failure reason {InitializationFailureReason.Name(initialize_response.failure_reason)} in initialize RPC response"
350
+ )
326
351
  else:
327
- raise Exception("initialize RPC failed at function executor server")
352
+ raise ValueError(
353
+ f"unexpected outcome code {InitializationOutcomeCode.Name(initialize_response.outcome_code)} in initialize RPC response"
354
+ )
355
+
328
356
  except grpc.aio.AioRpcError as e:
329
357
  if e.code() == grpc.StatusCode.DEADLINE_EXCEEDED:
330
358
  return FunctionExecutorInitializationResult(
@@ -8,6 +8,7 @@ from tensorlake.function_executor.proto.function_executor_pb2 import (
8
8
  InvocationStateRequest,
9
9
  InvocationStateResponse,
10
10
  SerializedObject,
11
+ SerializedObjectEncoding,
11
12
  SetInvocationStateResponse,
12
13
  )
13
14
  from tensorlake.function_executor.proto.function_executor_pb2_grpc import (
@@ -25,6 +26,10 @@ from .metrics.invocation_state_client import (
25
26
  metric_server_set_state_requests,
26
27
  )
27
28
 
29
+ # We're currently only supporting CloudPickle for invocation state values.
30
+ # FIXME: if Function Executor sends us something else then we fail the calls.
31
+ _VALUE_CONTENT_TYPE = "application/octet-stream"
32
+
28
33
 
29
34
  class InvocationStateClient:
30
35
  """InvocationStateClient is a client for the invocation state server of a Function Executor.
@@ -196,14 +201,21 @@ class InvocationStateClient:
196
201
  url: str = (
197
202
  f"{self._base_url}/internal/namespaces/{self._namespace}/compute_graphs/{self._graph}/invocations/{invocation_id}/ctx/{key}"
198
203
  )
199
- payload = value.bytes if value.HasField("bytes") else value.string
204
+ if (
205
+ value.encoding
206
+ != SerializedObjectEncoding.SERIALIZED_OBJECT_ENCODING_BINARY_PICKLE
207
+ ):
208
+ raise ValueError(
209
+ f"Unsupported value encoding: {SerializedObjectEncoding.Name(value.encoding)}. "
210
+ "Only binary pickle is supported for invocation state values."
211
+ )
200
212
 
201
213
  response = await self._http_client.post(
202
214
  url=url,
203
215
  files=[
204
216
  (
205
217
  "value",
206
- ("value", payload, value.content_type),
218
+ ("value", value.data, _VALUE_CONTENT_TYPE),
207
219
  ),
208
220
  ],
209
221
  )
@@ -245,7 +257,7 @@ class InvocationStateClient:
245
257
  )
246
258
  raise
247
259
 
248
- return serialized_object_from_http_response(response)
260
+ return _serialized_object_from_http_response(response)
249
261
 
250
262
  def _validate_request(self, request: InvocationStateRequest) -> None:
251
263
  (
@@ -265,17 +277,15 @@ class InvocationStateClient:
265
277
  raise ValueError("unknown request type")
266
278
 
267
279
 
268
- def serialized_object_from_http_response(response: httpx.Response) -> SerializedObject:
269
- # We're hardcoding the content type currently used by Python SDK. It might change in the future.
270
- # There's no other way for now to determine if the response is a bytes or string.
271
- if response.headers["content-type"] in [
272
- "application/octet-stream",
273
- "application/pickle",
274
- ]:
275
- return SerializedObject(
276
- bytes=response.content, content_type=response.headers["content-type"]
277
- )
278
- else:
279
- return SerializedObject(
280
- string=response.text, content_type=response.headers["content-type"]
280
+ def _serialized_object_from_http_response(response: httpx.Response) -> SerializedObject:
281
+ if response.headers["content-type"] != _VALUE_CONTENT_TYPE:
282
+ raise ValueError(
283
+ f"Unexpected content type: {response.headers['content-type']}. "
284
+ f"Expected: {_VALUE_CONTENT_TYPE}."
281
285
  )
286
+
287
+ return SerializedObject(
288
+ data=response.content,
289
+ encoding=SerializedObjectEncoding.SERIALIZED_OBJECT_ENCODING_BINARY_PICKLE,
290
+ encoding_version=0,
291
+ )
@@ -66,6 +66,7 @@ def emit_completed_task_metrics(task_info: TaskInfo, logger: Any) -> None:
66
66
  elif task_failure_reason in [
67
67
  TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_ERROR,
68
68
  TaskFailureReason.TASK_FAILURE_REASON_FUNCTION_TIMEOUT,
69
+ TaskFailureReason.TASK_FAILURE_REASON_INVOCATION_ERROR,
69
70
  ]:
70
71
  metric_tasks_completed.labels(
71
72
  outcome_code=METRIC_TASKS_COMPLETED_OUTCOME_CODE_FAILURE,
@@ -5,6 +5,7 @@ from typing import Any, Optional, Tuple
5
5
  from tensorlake.function_executor.proto.function_executor_pb2 import (
6
6
  InitializeRequest,
7
7
  SerializedObject,
8
+ SerializedObjectEncoding,
8
9
  )
9
10
 
10
11
  from indexify.executor.blob_store.blob_store import BLOBStore
@@ -1,28 +1,31 @@
1
+ import asyncio
1
2
  from typing import Any, Optional
2
3
 
3
4
  from indexify.executor.function_executor.function_executor import FunctionExecutor
4
- from indexify.proto.executor_api_pb2 import FunctionExecutorTerminationReason
5
5
 
6
6
  from .events import FunctionExecutorDestroyed
7
7
 
8
8
 
9
9
  async def destroy_function_executor(
10
10
  function_executor: Optional[FunctionExecutor],
11
- termination_reason: FunctionExecutorTerminationReason,
11
+ lock: asyncio.Lock,
12
12
  logger: Any,
13
13
  ) -> FunctionExecutorDestroyed:
14
- """Destroys a function executor if it's not None.
14
+ """Destroys the function executor if it's not None.
15
+
16
+ The supplied lock is used to ensure that if a destroy operation is in progress,
17
+ then another caller won't return immediately assuming that the destroy is complete
18
+ due to its idempotency.
15
19
 
16
20
  Doesn't raise any exceptions.
17
21
  """
18
22
  logger = logger.bind(module=__name__)
19
23
 
20
24
  if function_executor is not None:
21
- logger.info(
22
- "destroying function executor",
23
- )
24
- await function_executor.destroy()
25
-
26
- return FunctionExecutorDestroyed(
27
- is_success=True, termination_reason=termination_reason
28
- )
25
+ async with lock:
26
+ logger.info(
27
+ "destroying function executor",
28
+ )
29
+ await function_executor.destroy()
30
+
31
+ return FunctionExecutorDestroyed(is_success=True)
@@ -4,7 +4,10 @@ from pathlib import Path
4
4
  from typing import Any, Optional
5
5
 
6
6
  import nanoid
7
- from tensorlake.function_executor.proto.function_executor_pb2 import SerializedObject
7
+ from tensorlake.function_executor.proto.function_executor_pb2 import (
8
+ SerializedObject,
9
+ SerializedObjectEncoding,
10
+ )
8
11
 
9
12
  from indexify.executor.blob_store.blob_store import BLOBStore
10
13
  from indexify.proto.executor_api_pb2 import (
@@ -179,20 +182,28 @@ def _serialized_object_from_data_payload_proto(
179
182
  """
180
183
  if data_payload.encoding == DataPayloadEncoding.DATA_PAYLOAD_ENCODING_BINARY_PICKLE:
181
184
  return SerializedObject(
182
- bytes=data,
183
- content_type="application/octet-stream",
185
+ data=data,
186
+ encoding=SerializedObjectEncoding.SERIALIZED_OBJECT_ENCODING_BINARY_PICKLE,
187
+ encoding_version=data_payload.encoding_version,
184
188
  )
185
189
  elif data_payload.encoding == DataPayloadEncoding.DATA_PAYLOAD_ENCODING_UTF8_TEXT:
186
190
  return SerializedObject(
187
- content_type="text/plain",
188
- string=data.decode("utf-8"),
191
+ data=data,
192
+ encoding=SerializedObjectEncoding.SERIALIZED_OBJECT_ENCODING_UTF8_TEXT,
193
+ encoding_version=data_payload.encoding_version,
189
194
  )
190
195
  elif data_payload.encoding == DataPayloadEncoding.DATA_PAYLOAD_ENCODING_UTF8_JSON:
191
- result = SerializedObject(
192
- content_type="application/json",
193
- string=data.decode("utf-8"),
196
+ return SerializedObject(
197
+ data=data,
198
+ encoding=SerializedObjectEncoding.SERIALIZED_OBJECT_ENCODING_UTF8_JSON,
199
+ encoding_version=data_payload.encoding_version,
200
+ )
201
+ elif data_payload.encoding == DataPayloadEncoding.DATA_PAYLOAD_ENCODING_BINARY_ZIP:
202
+ return SerializedObject(
203
+ data=data,
204
+ encoding=SerializedObjectEncoding.SERIALIZED_OBJECT_ENCODING_BINARY_ZIP,
205
+ encoding_version=data_payload.encoding_version,
194
206
  )
195
- return result
196
207
 
197
208
  raise ValueError(
198
209
  f"Can't convert data payload {data_payload} into serialized object"
@@ -55,19 +55,12 @@ class FunctionExecutorDestroyed(BaseEvent):
55
55
  Event indicating that Function Executor has been destroyed.
56
56
  """
57
57
 
58
- def __init__(
59
- self, is_success: bool, termination_reason: FunctionExecutorTerminationReason
60
- ):
58
+ def __init__(self, is_success: bool):
61
59
  super().__init__(EventType.FUNCTION_EXECUTOR_DESTROYED)
62
60
  self.is_success: bool = is_success
63
- self.termination_reason: FunctionExecutorTerminationReason = termination_reason
64
61
 
65
62
  def __str__(self) -> str:
66
- return (
67
- f"Event(type={self.event_type.name}, "
68
- f"is_success={self.is_success}, "
69
- f"termination_reason={FunctionExecutorTerminationReason.Name(self.termination_reason)})"
70
- )
63
+ return f"Event(type={self.event_type.name}, " f"is_success={self.is_success})"
71
64
 
72
65
 
73
66
  class ShutdownInitiated(BaseEvent):
@@ -75,15 +68,8 @@ class ShutdownInitiated(BaseEvent):
75
68
  Event indicating that Function Executor shutdown has been initiated.
76
69
  """
77
70
 
78
- def __init__(self, termination_reason: FunctionExecutorTerminationReason):
71
+ def __init__(self):
79
72
  super().__init__(EventType.SHUTDOWN_INITIATED)
80
- self.termination_reason: FunctionExecutorTerminationReason = termination_reason
81
-
82
- def __str__(self) -> str:
83
- return (
84
- f"Event(type={self.event_type.name}, "
85
- f"termination_reason={FunctionExecutorTerminationReason.Name(self.termination_reason)})"
86
- )
87
73
 
88
74
 
89
75
  class TaskPreparationFinished(BaseEvent):