indexify 0.2.39__py3-none-any.whl → 0.2.41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. indexify/cli.py +92 -52
  2. indexify/executor/agent.py +99 -187
  3. indexify/executor/api_objects.py +2 -8
  4. indexify/executor/downloader.py +129 -90
  5. indexify/executor/executor_tasks.py +15 -30
  6. indexify/executor/function_executor/function_executor.py +32 -0
  7. indexify/executor/function_executor/function_executor_factory.py +26 -0
  8. indexify/executor/function_executor/function_executor_map.py +91 -0
  9. indexify/executor/function_executor/process_function_executor.py +64 -0
  10. indexify/executor/function_executor/process_function_executor_factory.py +102 -0
  11. indexify/executor/function_worker.py +227 -184
  12. indexify/executor/runtime_probes.py +9 -8
  13. indexify/executor/task_fetcher.py +80 -0
  14. indexify/executor/task_reporter.py +18 -25
  15. indexify/executor/task_store.py +35 -16
  16. indexify/function_executor/function_executor_service.py +86 -0
  17. indexify/function_executor/handlers/run_function/function_inputs_loader.py +54 -0
  18. indexify/function_executor/handlers/run_function/handler.py +149 -0
  19. indexify/function_executor/handlers/run_function/request_validator.py +24 -0
  20. indexify/function_executor/handlers/run_function/response_helper.py +98 -0
  21. indexify/function_executor/initialize_request_validator.py +22 -0
  22. indexify/function_executor/proto/configuration.py +13 -0
  23. indexify/function_executor/proto/function_executor.proto +70 -0
  24. indexify/function_executor/proto/function_executor_pb2.py +53 -0
  25. indexify/function_executor/proto/function_executor_pb2.pyi +125 -0
  26. indexify/function_executor/proto/function_executor_pb2_grpc.py +163 -0
  27. indexify/function_executor/proto/message_validator.py +38 -0
  28. indexify/function_executor/server.py +31 -0
  29. indexify/functions_sdk/data_objects.py +0 -9
  30. indexify/functions_sdk/graph.py +17 -10
  31. indexify/functions_sdk/graph_definition.py +3 -2
  32. indexify/functions_sdk/image.py +35 -30
  33. indexify/functions_sdk/indexify_functions.py +5 -5
  34. indexify/http_client.py +15 -23
  35. indexify/logging.py +32 -0
  36. {indexify-0.2.39.dist-info → indexify-0.2.41.dist-info}/METADATA +3 -1
  37. indexify-0.2.41.dist-info/RECORD +53 -0
  38. indexify/executor/indexify_executor.py +0 -32
  39. indexify-0.2.39.dist-info/RECORD +0 -34
  40. {indexify-0.2.39.dist-info → indexify-0.2.41.dist-info}/LICENSE.txt +0 -0
  41. {indexify-0.2.39.dist-info → indexify-0.2.41.dist-info}/WHEEL +0 -0
  42. {indexify-0.2.39.dist-info → indexify-0.2.41.dist-info}/entry_points.txt +0 -0
@@ -9,7 +9,6 @@ from indexify.common_util import get_httpx_client
9
9
  from indexify.executor.api_objects import RouterOutput as ApiRouterOutput
10
10
  from indexify.executor.api_objects import TaskResult
11
11
  from indexify.executor.task_store import CompletedTask
12
- from indexify.functions_sdk.object_serializer import get_serializer
13
12
 
14
13
  logger = structlog.get_logger(__name__)
15
14
 
@@ -42,19 +41,20 @@ class TaskReporter:
42
41
  self._client = get_httpx_client(config_path)
43
42
 
44
43
  def report_task_outcome(self, completed_task: CompletedTask):
45
-
46
44
  report = ReportingData()
47
45
  fn_outputs = []
48
- for output in completed_task.outputs or []:
49
- serializer = get_serializer(output.encoder)
50
- fn_outputs.append(
51
- (
52
- "node_outputs",
53
- (nanoid.generate(), output.payload, serializer.content_type),
46
+
47
+ if completed_task.function_output:
48
+ for output in completed_task.function_output.outputs or []:
49
+ payload = output.bytes if output.HasField("bytes") else output.string
50
+ fn_outputs.append(
51
+ (
52
+ "node_outputs",
53
+ (nanoid.generate(), payload, output.content_type),
54
+ )
54
55
  )
55
- )
56
- report.output_count += 1
57
- report.output_total_bytes += len(output.payload)
56
+ report.output_count += 1
57
+ report.output_total_bytes += len(payload)
58
58
 
59
59
  if completed_task.stdout:
60
60
  fn_outputs.append(
@@ -134,24 +134,17 @@ class TaskReporter:
134
134
  kwargs["files"] = fn_outputs
135
135
  else:
136
136
  kwargs["files"] = FORCE_MULTIPART
137
- try:
138
- response = self._client.post(
139
- url=f"{self._base_url}/internal/ingest_files",
140
- **kwargs,
141
- )
142
- except Exception as e:
143
- logger.error(
144
- "failed to report task outcome",
145
- task_id=completed_task.task.id,
146
- retries=completed_task.reporting_retries,
147
- error=type(e).__name__,
148
- message=str(e),
149
- )
150
- raise e
137
+
138
+ response = self._client.post(
139
+ url=f"{self._base_url}/internal/ingest_files",
140
+ **kwargs,
141
+ )
151
142
 
152
143
  try:
153
144
  response.raise_for_status()
154
145
  except Exception as e:
146
+ # Caller catches and logs the exception.
147
+ # Log response details here for easier debugging.
155
148
  logger.error(
156
149
  "failed to report task outcome",
157
150
  task_id=completed_task.task.id,
@@ -1,23 +1,38 @@
1
1
  import asyncio
2
2
  from typing import Dict, List, Literal, Optional
3
3
 
4
- from pydantic import BaseModel
5
- from rich import print
4
+ import structlog
6
5
 
7
- from indexify.functions_sdk.data_objects import IndexifyData, RouterOutput
6
+ from indexify.function_executor.proto.function_executor_pb2 import (
7
+ FunctionOutput,
8
+ RouterOutput,
9
+ )
8
10
 
9
11
  from .api_objects import Task
10
12
 
11
-
12
- class CompletedTask(BaseModel):
13
- task: Task
14
- task_outcome: Literal["success", "failure"]
15
- outputs: Optional[List[IndexifyData]] = None
16
- router_output: Optional[RouterOutput] = None
17
- stdout: Optional[str] = None
18
- stderr: Optional[str] = None
19
- reducer: bool = False
20
- reporting_retries: int = 0
13
+ logger = structlog.get_logger(module=__name__)
14
+
15
+
16
+ class CompletedTask:
17
+ def __init__(
18
+ self,
19
+ task: Task,
20
+ task_outcome: Literal["success", "failure"],
21
+ function_output: Optional[FunctionOutput] = None,
22
+ router_output: Optional[RouterOutput] = None,
23
+ stdout: Optional[str] = None,
24
+ stderr: Optional[str] = None,
25
+ reducer: bool = False,
26
+ reporting_retries: int = 0,
27
+ ):
28
+ self.task = task
29
+ self.task_outcome = task_outcome
30
+ self.function_output = function_output
31
+ self.router_output = router_output
32
+ self.stdout = stdout
33
+ self.stderr = stderr
34
+ self.reducer = reducer
35
+ self.reporting_retries = reporting_retries
21
36
 
22
37
 
23
38
  class TaskStore:
@@ -41,8 +56,12 @@ class TaskStore:
41
56
  or (task.id in self._finished)
42
57
  ):
43
58
  continue
44
- print(
45
- f"[bold] task store: [/bold] added task: {task.id} graph: {task.compute_graph} fn: {task.compute_fn} to queue"
59
+ logger.info(
60
+ "added task",
61
+ task_id=task.id,
62
+ namespace=task.namespace,
63
+ graph=task.compute_graph,
64
+ fn=task.compute_fn,
46
65
  )
47
66
  self._tasks[task.id] = task
48
67
  self._new_task_event.set()
@@ -87,7 +106,7 @@ class TaskStore:
87
106
  def mark_reported(self, task_id: str):
88
107
  self._tasks.pop(task_id)
89
108
  self._finished.pop(task_id)
90
- print(f"[bold] task store: [/bold] removed task: {task_id} from queue")
109
+ logger.info("removed task", task_id=task_id)
91
110
 
92
111
  def report_failed(self, task_id: str):
93
112
  if self._finished[task_id].task_outcome != "Failed":
@@ -0,0 +1,86 @@
1
+ from typing import Optional, Union
2
+
3
+ import grpc
4
+ import structlog
5
+
6
+ from indexify.functions_sdk.indexify_functions import (
7
+ IndexifyFunction,
8
+ IndexifyRouter,
9
+ )
10
+ from indexify.functions_sdk.object_serializer import get_serializer
11
+
12
+ from .handlers.run_function.handler import Handler as RunTaskHandler
13
+ from .handlers.run_function.request_validator import (
14
+ RequestValidator as RunTaskRequestValidator,
15
+ )
16
+ from .initialize_request_validator import InitializeRequestValidator
17
+ from .proto.function_executor_pb2 import (
18
+ InitializeRequest,
19
+ InitializeResponse,
20
+ RunTaskRequest,
21
+ RunTaskResponse,
22
+ )
23
+ from .proto.function_executor_pb2_grpc import FunctionExecutorServicer
24
+
25
+
26
+ class FunctionExecutorService(FunctionExecutorServicer):
27
+ def __init__(self, indexify_server_address: str, config_path: Optional[str]):
28
+ self._indexify_server_address = indexify_server_address
29
+ self._config_path = config_path
30
+ self._logger = structlog.get_logger(module=__name__)
31
+ self._namespace: Optional[str] = None
32
+ self._graph_name: Optional[str] = None
33
+ self._graph_version: Optional[int] = None
34
+ self._function_name: Optional[str] = None
35
+ self._function: Optional[Union[IndexifyFunction, IndexifyRouter]] = None
36
+
37
+ def initialize(
38
+ self, request: InitializeRequest, context: grpc.ServicerContext
39
+ ) -> InitializeResponse:
40
+ request_validator: InitializeRequestValidator = InitializeRequestValidator(
41
+ request
42
+ )
43
+ request_validator.check()
44
+
45
+ self._namespace = request.namespace
46
+ self._graph_name = request.graph_name
47
+ self._graph_version = request.graph_version
48
+ self._function_name = request.function_name
49
+ # The function is only loaded once per Function Executor. It's important to use a single
50
+ # loaded function so all the tasks when executed are sharing the same memory. This allows
51
+ # implementing smart caching in customer code. E.g. load a model into GPU only once and
52
+ # share the model's file descriptor between all tasks or download function configuration
53
+ # only once.
54
+ graph_serializer = get_serializer(request.graph.content_type)
55
+ graph = graph_serializer.deserialize(request.graph.bytes)
56
+ self._function = graph_serializer.deserialize(graph[request.function_name])
57
+
58
+ self._logger = self._logger.bind(
59
+ namespace=request.namespace,
60
+ graph_name=request.graph_name,
61
+ graph_version=str(request.graph_version),
62
+ function_name=request.function_name,
63
+ )
64
+ self._logger.info("initialized function executor service")
65
+
66
+ return InitializeResponse(success=True)
67
+
68
+ def run_task(
69
+ self, request: RunTaskRequest, context: grpc.ServicerContext
70
+ ) -> RunTaskResponse:
71
+ # Customer function code never raises an exception because we catch all of them and add
72
+ # their details to the response. We can only get an exception here if our own code failed.
73
+ # If our code raises an exception the grpc framework converts it into GRPC_STATUS_UNKNOWN
74
+ # error with the exception message. Differentiating errors is not needed for now.
75
+ RunTaskRequestValidator(request=request).check()
76
+ return RunTaskHandler(
77
+ request=request,
78
+ namespace=self._namespace,
79
+ graph_name=self._graph_name,
80
+ graph_version=self._graph_version,
81
+ function_name=self._function_name,
82
+ function=self._function,
83
+ logger=self._logger,
84
+ indexify_server_addr=self._indexify_server_address,
85
+ config_path=self._config_path,
86
+ ).run()
@@ -0,0 +1,54 @@
1
+ from typing import Optional
2
+
3
+ from pydantic import BaseModel
4
+
5
+ from indexify.function_executor.proto.function_executor_pb2 import (
6
+ RunTaskRequest,
7
+ SerializedObject,
8
+ )
9
+ from indexify.functions_sdk.data_objects import IndexifyData
10
+ from indexify.functions_sdk.object_serializer import get_serializer
11
+
12
+
13
+ class FunctionInputs(BaseModel):
14
+ input: IndexifyData
15
+ init_value: Optional[IndexifyData] = None
16
+
17
+
18
+ class FunctionInputsLoader:
19
+ def __init__(self, request: RunTaskRequest):
20
+ self._request = request
21
+
22
+ def load(self) -> FunctionInputs:
23
+ return FunctionInputs(
24
+ input=self._function_input(),
25
+ init_value=self._accumulator_input(),
26
+ )
27
+
28
+ def _function_input(self) -> IndexifyData:
29
+ return _to_indexify_data(
30
+ self._request.graph_invocation_id, self._request.function_input
31
+ )
32
+
33
+ def _accumulator_input(self) -> Optional[IndexifyData]:
34
+ return (
35
+ _to_indexify_data(
36
+ self._request.graph_invocation_id, self._request.function_init_value
37
+ )
38
+ if self._request.HasField("function_init_value")
39
+ else None
40
+ )
41
+
42
+
43
+ def _to_indexify_data(
44
+ input_id: str, serialized_object: SerializedObject
45
+ ) -> IndexifyData:
46
+ return IndexifyData(
47
+ input_id=input_id,
48
+ payload=(
49
+ serialized_object.bytes
50
+ if serialized_object.HasField("bytes")
51
+ else serialized_object.string
52
+ ),
53
+ encoder=get_serializer(serialized_object.content_type).encoding_type,
54
+ )
@@ -0,0 +1,149 @@
1
+ import io
2
+ import sys
3
+ import traceback
4
+ from contextlib import redirect_stderr, redirect_stdout
5
+ from typing import Any, Optional, Union
6
+
7
+ from indexify.function_executor.proto.function_executor_pb2 import (
8
+ RunTaskRequest,
9
+ RunTaskResponse,
10
+ )
11
+ from indexify.functions_sdk.indexify_functions import (
12
+ FunctionCallResult,
13
+ GraphInvocationContext,
14
+ IndexifyFunction,
15
+ IndexifyFunctionWrapper,
16
+ IndexifyRouter,
17
+ RouterCallResult,
18
+ )
19
+ from indexify.http_client import IndexifyClient
20
+
21
+ from .function_inputs_loader import FunctionInputs, FunctionInputsLoader
22
+ from .response_helper import ResponseHelper
23
+
24
+
25
+ class Handler:
26
+ def __init__(
27
+ self,
28
+ request: RunTaskRequest,
29
+ namespace: str,
30
+ graph_name: str,
31
+ graph_version: int,
32
+ function_name: str,
33
+ function: Union[IndexifyFunction, IndexifyRouter],
34
+ logger: Any,
35
+ indexify_server_addr: str,
36
+ config_path: Optional[str],
37
+ ):
38
+ self._function_name: str = function_name
39
+ self._logger = logger.bind(
40
+ graph_invocation_id=request.graph_invocation_id,
41
+ task_id=request.task_id,
42
+ )
43
+ self._input_loader = FunctionInputsLoader(request)
44
+ self._response_helper = ResponseHelper(task_id=request.task_id)
45
+ # TODO: use files for stdout, stderr capturing. This puts a natural and thus reasonable
46
+ # rate limit on the rate of writes and allows to not consume expensive memory for function logs.
47
+ self._func_stdout: io.StringIO = io.StringIO()
48
+ self._func_stderr: io.StringIO = io.StringIO()
49
+
50
+ self._function_wrapper: IndexifyFunctionWrapper = IndexifyFunctionWrapper(
51
+ indexify_function=function,
52
+ context=GraphInvocationContext(
53
+ invocation_id=request.graph_invocation_id,
54
+ graph_name=graph_name,
55
+ graph_version=str(graph_version),
56
+ indexify_client=_indexify_client(
57
+ logger=self._logger,
58
+ namespace=namespace,
59
+ indexify_server_addr=indexify_server_addr,
60
+ config_path=config_path,
61
+ ),
62
+ ),
63
+ )
64
+
65
+ def run(self) -> RunTaskResponse:
66
+ """Runs the task.
67
+
68
+ Raises an exception if our own code failed, customer function failure doesn't result in any exception.
69
+ Details of customer function failure are returned in the response.
70
+ """
71
+ self._logger.info("running function")
72
+ inputs: FunctionInputs = self._input_loader.load()
73
+ self._flush_logs()
74
+ return self._run_func_safe_and_captured(inputs)
75
+
76
+ def _run_func_safe_and_captured(self, inputs: FunctionInputs) -> RunTaskResponse:
77
+ """Runs the customer function while capturing what happened in it.
78
+
79
+ Function stdout and stderr are captured so they don't get into Function Executor process stdout
80
+ and stderr. Never throws an Exception. Caller can determine if the function succeeded
81
+ using the response.
82
+ """
83
+ try:
84
+ with redirect_stdout(self._func_stdout), redirect_stderr(self._func_stderr):
85
+ return self._run_func(inputs)
86
+ except Exception:
87
+ return self._response_helper.failure_response(
88
+ message=traceback.format_exc(),
89
+ stdout=self._func_stdout.getvalue(),
90
+ stderr=self._func_stderr.getvalue(),
91
+ )
92
+
93
+ def _run_func(self, inputs: FunctionInputs) -> RunTaskResponse:
94
+ if _is_router(self._function_wrapper):
95
+ result: RouterCallResult = self._function_wrapper.invoke_router(
96
+ self._function_name, inputs.input
97
+ )
98
+ return self._response_helper.router_response(
99
+ result=result,
100
+ stdout=self._func_stdout.getvalue(),
101
+ stderr=self._func_stderr.getvalue(),
102
+ )
103
+ else:
104
+ result: FunctionCallResult = self._function_wrapper.invoke_fn_ser(
105
+ self._function_name, inputs.input, inputs.init_value
106
+ )
107
+ return self._response_helper.function_response(
108
+ result=result,
109
+ is_reducer=_func_is_reducer(self._function_wrapper),
110
+ stdout=self._func_stdout.getvalue(),
111
+ stderr=self._func_stderr.getvalue(),
112
+ )
113
+
114
+ def _flush_logs(self) -> None:
115
+ # Flush any logs buffered in memory before running the function with stdout, stderr capture.
116
+ # Otherwise our logs logged before this point will end up in the function's stdout.
117
+ # structlog.PrintLogger uses print function. This is why flushing with print works.
118
+ print("", flush=True)
119
+ sys.stdout.flush()
120
+ sys.stderr.flush()
121
+
122
+
123
+ def _indexify_client(
124
+ logger: Any,
125
+ namespace: str,
126
+ indexify_server_addr: str,
127
+ config_path: Optional[str],
128
+ ) -> IndexifyClient:
129
+ # This client is required to implement key/value store functionality for customer functions.
130
+ protocol: str = "http"
131
+ if config_path:
132
+ logger.info("TLS is enabled")
133
+ protocol = "https"
134
+ return IndexifyClient(
135
+ service_url=f"{protocol}://{indexify_server_addr}",
136
+ namespace=namespace,
137
+ config_path=config_path,
138
+ )
139
+
140
+
141
+ def _is_router(func_wrapper: IndexifyFunctionWrapper) -> bool:
142
+ return (
143
+ str(type(func_wrapper.indexify_function))
144
+ == "<class 'indexify.functions_sdk.indexify_functions.IndexifyRouter'>"
145
+ )
146
+
147
+
148
+ def _func_is_reducer(func_wrapper: IndexifyFunctionWrapper) -> bool:
149
+ return func_wrapper.indexify_function.accumulate is not None
@@ -0,0 +1,24 @@
1
+ from typing import Any
2
+
3
+ from indexify.function_executor.proto.function_executor_pb2 import (
4
+ RunTaskRequest,
5
+ )
6
+ from indexify.function_executor.proto.message_validator import MessageValidator
7
+
8
+
9
+ class RequestValidator:
10
+ def __init__(self, request: RunTaskRequest):
11
+ self._request = request
12
+ self._message_validator = MessageValidator(request)
13
+
14
+ def check(self):
15
+ """Validates the request.
16
+
17
+ Raises: ValueError: If the request is invalid.
18
+ """
19
+ (
20
+ self._message_validator.required_field("graph_invocation_id")
21
+ .required_field("task_id")
22
+ .required_serialized_object("function_input")
23
+ .optional_serialized_object("function_init_value")
24
+ )
@@ -0,0 +1,98 @@
1
+ from typing import List
2
+
3
+ from indexify.function_executor.proto.function_executor_pb2 import (
4
+ FunctionOutput,
5
+ RouterOutput,
6
+ RunTaskResponse,
7
+ SerializedObject,
8
+ )
9
+ from indexify.functions_sdk.data_objects import IndexifyData
10
+ from indexify.functions_sdk.indexify_functions import (
11
+ FunctionCallResult,
12
+ RouterCallResult,
13
+ )
14
+ from indexify.functions_sdk.object_serializer import get_serializer
15
+
16
+
17
+ class ResponseHelper:
18
+ """Helper class for generating RunFunctionResponse."""
19
+
20
+ def __init__(self, task_id: str):
21
+ self._task_id = task_id
22
+
23
+ def function_response(
24
+ self,
25
+ result: FunctionCallResult,
26
+ is_reducer: bool,
27
+ stdout: str = "",
28
+ stderr: str = "",
29
+ ) -> RunTaskResponse:
30
+ if result.traceback_msg is None:
31
+ return RunTaskResponse(
32
+ task_id=self._task_id,
33
+ function_output=self._to_function_output(result.ser_outputs),
34
+ router_output=None,
35
+ stdout=stdout,
36
+ stderr=stderr,
37
+ is_reducer=is_reducer,
38
+ success=True,
39
+ )
40
+ else:
41
+ return self.failure_response(
42
+ message=result.traceback_msg,
43
+ stdout=stdout,
44
+ stderr=stderr,
45
+ )
46
+
47
+ def router_response(
48
+ self,
49
+ result: RouterCallResult,
50
+ stdout: str = "",
51
+ stderr: str = "",
52
+ ) -> RunTaskResponse:
53
+ if result.traceback_msg is None:
54
+ return RunTaskResponse(
55
+ task_id=self._task_id,
56
+ function_output=None,
57
+ router_output=RouterOutput(edges=result.edges),
58
+ stdout=stdout,
59
+ stderr=stderr,
60
+ is_reducer=False,
61
+ success=True,
62
+ )
63
+ else:
64
+ return self.failure_response(
65
+ message=result.traceback_msg,
66
+ stdout=stdout,
67
+ stderr=stderr,
68
+ )
69
+
70
+ def failure_response(
71
+ self, message: str, stdout: str, stderr: str
72
+ ) -> RunTaskResponse:
73
+ stderr = "\n".join([stderr, message])
74
+ return RunTaskResponse(
75
+ task_id=self._task_id,
76
+ function_output=None,
77
+ router_output=None,
78
+ stdout=stdout,
79
+ stderr=stderr,
80
+ is_reducer=False,
81
+ success=False,
82
+ )
83
+
84
+ def _to_function_output(self, outputs: List[IndexifyData]) -> FunctionOutput:
85
+ output = FunctionOutput(outputs=[])
86
+ for ix_data in outputs:
87
+ serialized_object: SerializedObject = SerializedObject(
88
+ content_type=get_serializer(ix_data.encoder).content_type,
89
+ )
90
+ if isinstance(ix_data.payload, bytes):
91
+ serialized_object.bytes = ix_data.payload
92
+ elif isinstance(ix_data.payload, str):
93
+ serialized_object.string = ix_data.payload
94
+ else:
95
+ raise ValueError(f"Unsupported payload type: {type(ix_data.payload)}")
96
+
97
+ output.outputs.append(serialized_object)
98
+ return output
@@ -0,0 +1,22 @@
1
+ from indexify.function_executor.proto.message_validator import MessageValidator
2
+
3
+ from .proto.function_executor_pb2 import InitializeRequest
4
+
5
+
6
+ class InitializeRequestValidator:
7
+ def __init__(self, request: InitializeRequest):
8
+ self._request = request
9
+ self._message_validator = MessageValidator(request)
10
+
11
+ def check(self):
12
+ """Validates the request.
13
+
14
+ Raises: ValueError: If the request is invalid.
15
+ """
16
+ (
17
+ self._message_validator.required_field("namespace")
18
+ .required_field("graph_name")
19
+ .required_field("graph_version")
20
+ .required_field("function_name")
21
+ .required_serialized_object("graph")
22
+ )
@@ -0,0 +1,13 @@
1
+ # We send function inputs and outputs over gRPC.
2
+ # -1 means unlimited. We don't want to limit the size of data customers are using.
3
+ # The effective max message size in this case is about 1.9 GB, see the max payload test.
4
+ # This is due to internal hard gRPC limits. When we want to increase the message sizes
5
+ # we'll have to implement chunking for large messages.
6
+ _MAX_GRPC_MESSAGE_LENGTH = -1
7
+
8
+ GRPC_SERVER_OPTIONS = [
9
+ ("grpc.max_receive_message_length", _MAX_GRPC_MESSAGE_LENGTH),
10
+ ("grpc.max_send_message_length", _MAX_GRPC_MESSAGE_LENGTH),
11
+ ]
12
+
13
+ GRPC_CHANNEL_OPTIONS = GRPC_SERVER_OPTIONS
@@ -0,0 +1,70 @@
1
+
2
+
3
+ syntax = "proto3";
4
+
5
+ package function_executor_service;
6
+
7
+ // The messages should not use any Python SDK objects. Only Function Executor implemented
8
+ // in Python is allowed to import Python SDK to run customer functions. This ensures that
9
+ // all the other components can be written in any language.
10
+
11
+ message SerializedObject {
12
+ oneof data {
13
+ // Set bytes_data if the object is serialized as bytes.
14
+ bytes bytes = 1;
15
+ // Set string_data if the object is serialized as string.
16
+ string string = 2;
17
+ }
18
+ // The content type determines the serializer used to serialize the object.
19
+ optional string content_type = 3;
20
+ }
21
+
22
+ // InitializeRequest contains information about the function
23
+ // that Function Executor is going to run the tasks for.
24
+ message InitializeRequest {
25
+ optional string namespace = 1;
26
+ optional string graph_name = 2;
27
+ optional int32 graph_version = 3;
28
+ optional string function_name = 5;
29
+ optional SerializedObject graph = 7;
30
+ }
31
+
32
+ message InitializeResponse {
33
+ optional bool success = 1;
34
+ }
35
+
36
+ message FunctionOutput {
37
+ repeated SerializedObject outputs = 1;
38
+ }
39
+
40
+ message RouterOutput {
41
+ repeated string edges = 1;
42
+ }
43
+
44
+ message RunTaskRequest {
45
+ optional string graph_invocation_id = 4;
46
+ optional string task_id = 6;
47
+ optional SerializedObject function_input = 9;
48
+ optional SerializedObject function_init_value = 10;
49
+ }
50
+
51
+ message RunTaskResponse {
52
+ optional string task_id = 1;
53
+ optional FunctionOutput function_output = 2;
54
+ optional RouterOutput router_output = 3;
55
+ optional string stdout = 4;
56
+ optional string stderr = 5;
57
+ optional bool is_reducer = 6;
58
+ optional bool success = 7;
59
+ }
60
+
61
+ service FunctionExecutor {
62
+ // Initializes the Function Executor to run tasks
63
+ // for a particular function. This method is called only
64
+ // once per Function Executor as it can only run a single function.
65
+ // It should be called before calling RunTask for the function.
66
+ rpc initialize(InitializeRequest) returns (InitializeResponse);
67
+ // Executes the task defined in the request.
68
+ // Multiple tasks can be running in parallel.
69
+ rpc run_task(RunTaskRequest) returns (RunTaskResponse);
70
+ }