indexify 0.2.40__py3-none-any.whl → 0.2.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- indexify/cli.py +92 -52
- indexify/executor/agent.py +99 -187
- indexify/executor/api_objects.py +2 -8
- indexify/executor/downloader.py +129 -90
- indexify/executor/executor_tasks.py +15 -30
- indexify/executor/function_executor/function_executor.py +32 -0
- indexify/executor/function_executor/function_executor_factory.py +26 -0
- indexify/executor/function_executor/function_executor_map.py +91 -0
- indexify/executor/function_executor/process_function_executor.py +64 -0
- indexify/executor/function_executor/process_function_executor_factory.py +102 -0
- indexify/executor/function_worker.py +227 -184
- indexify/executor/runtime_probes.py +9 -8
- indexify/executor/task_fetcher.py +80 -0
- indexify/executor/task_reporter.py +18 -25
- indexify/executor/task_store.py +35 -16
- indexify/function_executor/function_executor_service.py +86 -0
- indexify/function_executor/handlers/run_function/function_inputs_loader.py +54 -0
- indexify/function_executor/handlers/run_function/handler.py +149 -0
- indexify/function_executor/handlers/run_function/request_validator.py +24 -0
- indexify/function_executor/handlers/run_function/response_helper.py +98 -0
- indexify/function_executor/initialize_request_validator.py +22 -0
- indexify/function_executor/proto/configuration.py +13 -0
- indexify/function_executor/proto/function_executor.proto +70 -0
- indexify/function_executor/proto/function_executor_pb2.py +53 -0
- indexify/function_executor/proto/function_executor_pb2.pyi +125 -0
- indexify/function_executor/proto/function_executor_pb2_grpc.py +163 -0
- indexify/function_executor/proto/message_validator.py +38 -0
- indexify/function_executor/server.py +31 -0
- indexify/functions_sdk/data_objects.py +0 -9
- indexify/functions_sdk/graph.py +10 -11
- indexify/functions_sdk/graph_definition.py +2 -2
- indexify/functions_sdk/image.py +35 -30
- indexify/functions_sdk/indexify_functions.py +5 -5
- indexify/http_client.py +15 -23
- indexify/logging.py +32 -0
- {indexify-0.2.40.dist-info → indexify-0.2.41.dist-info}/METADATA +3 -1
- indexify-0.2.41.dist-info/RECORD +53 -0
- indexify/executor/indexify_executor.py +0 -32
- indexify-0.2.40.dist-info/RECORD +0 -34
- {indexify-0.2.40.dist-info → indexify-0.2.41.dist-info}/LICENSE.txt +0 -0
- {indexify-0.2.40.dist-info → indexify-0.2.41.dist-info}/WHEEL +0 -0
- {indexify-0.2.40.dist-info → indexify-0.2.41.dist-info}/entry_points.txt +0 -0
@@ -9,7 +9,6 @@ from indexify.common_util import get_httpx_client
|
|
9
9
|
from indexify.executor.api_objects import RouterOutput as ApiRouterOutput
|
10
10
|
from indexify.executor.api_objects import TaskResult
|
11
11
|
from indexify.executor.task_store import CompletedTask
|
12
|
-
from indexify.functions_sdk.object_serializer import get_serializer
|
13
12
|
|
14
13
|
logger = structlog.get_logger(__name__)
|
15
14
|
|
@@ -42,19 +41,20 @@ class TaskReporter:
|
|
42
41
|
self._client = get_httpx_client(config_path)
|
43
42
|
|
44
43
|
def report_task_outcome(self, completed_task: CompletedTask):
|
45
|
-
|
46
44
|
report = ReportingData()
|
47
45
|
fn_outputs = []
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
(
|
52
|
-
|
53
|
-
(
|
46
|
+
|
47
|
+
if completed_task.function_output:
|
48
|
+
for output in completed_task.function_output.outputs or []:
|
49
|
+
payload = output.bytes if output.HasField("bytes") else output.string
|
50
|
+
fn_outputs.append(
|
51
|
+
(
|
52
|
+
"node_outputs",
|
53
|
+
(nanoid.generate(), payload, output.content_type),
|
54
|
+
)
|
54
55
|
)
|
55
|
-
|
56
|
-
|
57
|
-
report.output_total_bytes += len(output.payload)
|
56
|
+
report.output_count += 1
|
57
|
+
report.output_total_bytes += len(payload)
|
58
58
|
|
59
59
|
if completed_task.stdout:
|
60
60
|
fn_outputs.append(
|
@@ -134,24 +134,17 @@ class TaskReporter:
|
|
134
134
|
kwargs["files"] = fn_outputs
|
135
135
|
else:
|
136
136
|
kwargs["files"] = FORCE_MULTIPART
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
except Exception as e:
|
143
|
-
logger.error(
|
144
|
-
"failed to report task outcome",
|
145
|
-
task_id=completed_task.task.id,
|
146
|
-
retries=completed_task.reporting_retries,
|
147
|
-
error=type(e).__name__,
|
148
|
-
message=str(e),
|
149
|
-
)
|
150
|
-
raise e
|
137
|
+
|
138
|
+
response = self._client.post(
|
139
|
+
url=f"{self._base_url}/internal/ingest_files",
|
140
|
+
**kwargs,
|
141
|
+
)
|
151
142
|
|
152
143
|
try:
|
153
144
|
response.raise_for_status()
|
154
145
|
except Exception as e:
|
146
|
+
# Caller catches and logs the exception.
|
147
|
+
# Log response details here for easier debugging.
|
155
148
|
logger.error(
|
156
149
|
"failed to report task outcome",
|
157
150
|
task_id=completed_task.task.id,
|
indexify/executor/task_store.py
CHANGED
@@ -1,23 +1,38 @@
|
|
1
1
|
import asyncio
|
2
2
|
from typing import Dict, List, Literal, Optional
|
3
3
|
|
4
|
-
|
5
|
-
from rich import print
|
4
|
+
import structlog
|
6
5
|
|
7
|
-
from indexify.
|
6
|
+
from indexify.function_executor.proto.function_executor_pb2 import (
|
7
|
+
FunctionOutput,
|
8
|
+
RouterOutput,
|
9
|
+
)
|
8
10
|
|
9
11
|
from .api_objects import Task
|
10
12
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
13
|
+
logger = structlog.get_logger(module=__name__)
|
14
|
+
|
15
|
+
|
16
|
+
class CompletedTask:
|
17
|
+
def __init__(
|
18
|
+
self,
|
19
|
+
task: Task,
|
20
|
+
task_outcome: Literal["success", "failure"],
|
21
|
+
function_output: Optional[FunctionOutput] = None,
|
22
|
+
router_output: Optional[RouterOutput] = None,
|
23
|
+
stdout: Optional[str] = None,
|
24
|
+
stderr: Optional[str] = None,
|
25
|
+
reducer: bool = False,
|
26
|
+
reporting_retries: int = 0,
|
27
|
+
):
|
28
|
+
self.task = task
|
29
|
+
self.task_outcome = task_outcome
|
30
|
+
self.function_output = function_output
|
31
|
+
self.router_output = router_output
|
32
|
+
self.stdout = stdout
|
33
|
+
self.stderr = stderr
|
34
|
+
self.reducer = reducer
|
35
|
+
self.reporting_retries = reporting_retries
|
21
36
|
|
22
37
|
|
23
38
|
class TaskStore:
|
@@ -41,8 +56,12 @@ class TaskStore:
|
|
41
56
|
or (task.id in self._finished)
|
42
57
|
):
|
43
58
|
continue
|
44
|
-
|
45
|
-
|
59
|
+
logger.info(
|
60
|
+
"added task",
|
61
|
+
task_id=task.id,
|
62
|
+
namespace=task.namespace,
|
63
|
+
graph=task.compute_graph,
|
64
|
+
fn=task.compute_fn,
|
46
65
|
)
|
47
66
|
self._tasks[task.id] = task
|
48
67
|
self._new_task_event.set()
|
@@ -87,7 +106,7 @@ class TaskStore:
|
|
87
106
|
def mark_reported(self, task_id: str):
|
88
107
|
self._tasks.pop(task_id)
|
89
108
|
self._finished.pop(task_id)
|
90
|
-
|
109
|
+
logger.info("removed task", task_id=task_id)
|
91
110
|
|
92
111
|
def report_failed(self, task_id: str):
|
93
112
|
if self._finished[task_id].task_outcome != "Failed":
|
@@ -0,0 +1,86 @@
|
|
1
|
+
from typing import Optional, Union
|
2
|
+
|
3
|
+
import grpc
|
4
|
+
import structlog
|
5
|
+
|
6
|
+
from indexify.functions_sdk.indexify_functions import (
|
7
|
+
IndexifyFunction,
|
8
|
+
IndexifyRouter,
|
9
|
+
)
|
10
|
+
from indexify.functions_sdk.object_serializer import get_serializer
|
11
|
+
|
12
|
+
from .handlers.run_function.handler import Handler as RunTaskHandler
|
13
|
+
from .handlers.run_function.request_validator import (
|
14
|
+
RequestValidator as RunTaskRequestValidator,
|
15
|
+
)
|
16
|
+
from .initialize_request_validator import InitializeRequestValidator
|
17
|
+
from .proto.function_executor_pb2 import (
|
18
|
+
InitializeRequest,
|
19
|
+
InitializeResponse,
|
20
|
+
RunTaskRequest,
|
21
|
+
RunTaskResponse,
|
22
|
+
)
|
23
|
+
from .proto.function_executor_pb2_grpc import FunctionExecutorServicer
|
24
|
+
|
25
|
+
|
26
|
+
class FunctionExecutorService(FunctionExecutorServicer):
|
27
|
+
def __init__(self, indexify_server_address: str, config_path: Optional[str]):
|
28
|
+
self._indexify_server_address = indexify_server_address
|
29
|
+
self._config_path = config_path
|
30
|
+
self._logger = structlog.get_logger(module=__name__)
|
31
|
+
self._namespace: Optional[str] = None
|
32
|
+
self._graph_name: Optional[str] = None
|
33
|
+
self._graph_version: Optional[int] = None
|
34
|
+
self._function_name: Optional[str] = None
|
35
|
+
self._function: Optional[Union[IndexifyFunction, IndexifyRouter]] = None
|
36
|
+
|
37
|
+
def initialize(
|
38
|
+
self, request: InitializeRequest, context: grpc.ServicerContext
|
39
|
+
) -> InitializeResponse:
|
40
|
+
request_validator: InitializeRequestValidator = InitializeRequestValidator(
|
41
|
+
request
|
42
|
+
)
|
43
|
+
request_validator.check()
|
44
|
+
|
45
|
+
self._namespace = request.namespace
|
46
|
+
self._graph_name = request.graph_name
|
47
|
+
self._graph_version = request.graph_version
|
48
|
+
self._function_name = request.function_name
|
49
|
+
# The function is only loaded once per Function Executor. It's important to use a single
|
50
|
+
# loaded function so all the tasks when executed are sharing the same memory. This allows
|
51
|
+
# implementing smart caching in customer code. E.g. load a model into GPU only once and
|
52
|
+
# share the model's file descriptor between all tasks or download function configuration
|
53
|
+
# only once.
|
54
|
+
graph_serializer = get_serializer(request.graph.content_type)
|
55
|
+
graph = graph_serializer.deserialize(request.graph.bytes)
|
56
|
+
self._function = graph_serializer.deserialize(graph[request.function_name])
|
57
|
+
|
58
|
+
self._logger = self._logger.bind(
|
59
|
+
namespace=request.namespace,
|
60
|
+
graph_name=request.graph_name,
|
61
|
+
graph_version=str(request.graph_version),
|
62
|
+
function_name=request.function_name,
|
63
|
+
)
|
64
|
+
self._logger.info("initialized function executor service")
|
65
|
+
|
66
|
+
return InitializeResponse(success=True)
|
67
|
+
|
68
|
+
def run_task(
|
69
|
+
self, request: RunTaskRequest, context: grpc.ServicerContext
|
70
|
+
) -> RunTaskResponse:
|
71
|
+
# Customer function code never raises an exception because we catch all of them and add
|
72
|
+
# their details to the response. We can only get an exception here if our own code failed.
|
73
|
+
# If our code raises an exception the grpc framework converts it into GRPC_STATUS_UNKNOWN
|
74
|
+
# error with the exception message. Differentiating errors is not needed for now.
|
75
|
+
RunTaskRequestValidator(request=request).check()
|
76
|
+
return RunTaskHandler(
|
77
|
+
request=request,
|
78
|
+
namespace=self._namespace,
|
79
|
+
graph_name=self._graph_name,
|
80
|
+
graph_version=self._graph_version,
|
81
|
+
function_name=self._function_name,
|
82
|
+
function=self._function,
|
83
|
+
logger=self._logger,
|
84
|
+
indexify_server_addr=self._indexify_server_address,
|
85
|
+
config_path=self._config_path,
|
86
|
+
).run()
|
@@ -0,0 +1,54 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
3
|
+
from pydantic import BaseModel
|
4
|
+
|
5
|
+
from indexify.function_executor.proto.function_executor_pb2 import (
|
6
|
+
RunTaskRequest,
|
7
|
+
SerializedObject,
|
8
|
+
)
|
9
|
+
from indexify.functions_sdk.data_objects import IndexifyData
|
10
|
+
from indexify.functions_sdk.object_serializer import get_serializer
|
11
|
+
|
12
|
+
|
13
|
+
class FunctionInputs(BaseModel):
|
14
|
+
input: IndexifyData
|
15
|
+
init_value: Optional[IndexifyData] = None
|
16
|
+
|
17
|
+
|
18
|
+
class FunctionInputsLoader:
|
19
|
+
def __init__(self, request: RunTaskRequest):
|
20
|
+
self._request = request
|
21
|
+
|
22
|
+
def load(self) -> FunctionInputs:
|
23
|
+
return FunctionInputs(
|
24
|
+
input=self._function_input(),
|
25
|
+
init_value=self._accumulator_input(),
|
26
|
+
)
|
27
|
+
|
28
|
+
def _function_input(self) -> IndexifyData:
|
29
|
+
return _to_indexify_data(
|
30
|
+
self._request.graph_invocation_id, self._request.function_input
|
31
|
+
)
|
32
|
+
|
33
|
+
def _accumulator_input(self) -> Optional[IndexifyData]:
|
34
|
+
return (
|
35
|
+
_to_indexify_data(
|
36
|
+
self._request.graph_invocation_id, self._request.function_init_value
|
37
|
+
)
|
38
|
+
if self._request.HasField("function_init_value")
|
39
|
+
else None
|
40
|
+
)
|
41
|
+
|
42
|
+
|
43
|
+
def _to_indexify_data(
|
44
|
+
input_id: str, serialized_object: SerializedObject
|
45
|
+
) -> IndexifyData:
|
46
|
+
return IndexifyData(
|
47
|
+
input_id=input_id,
|
48
|
+
payload=(
|
49
|
+
serialized_object.bytes
|
50
|
+
if serialized_object.HasField("bytes")
|
51
|
+
else serialized_object.string
|
52
|
+
),
|
53
|
+
encoder=get_serializer(serialized_object.content_type).encoding_type,
|
54
|
+
)
|
@@ -0,0 +1,149 @@
|
|
1
|
+
import io
|
2
|
+
import sys
|
3
|
+
import traceback
|
4
|
+
from contextlib import redirect_stderr, redirect_stdout
|
5
|
+
from typing import Any, Optional, Union
|
6
|
+
|
7
|
+
from indexify.function_executor.proto.function_executor_pb2 import (
|
8
|
+
RunTaskRequest,
|
9
|
+
RunTaskResponse,
|
10
|
+
)
|
11
|
+
from indexify.functions_sdk.indexify_functions import (
|
12
|
+
FunctionCallResult,
|
13
|
+
GraphInvocationContext,
|
14
|
+
IndexifyFunction,
|
15
|
+
IndexifyFunctionWrapper,
|
16
|
+
IndexifyRouter,
|
17
|
+
RouterCallResult,
|
18
|
+
)
|
19
|
+
from indexify.http_client import IndexifyClient
|
20
|
+
|
21
|
+
from .function_inputs_loader import FunctionInputs, FunctionInputsLoader
|
22
|
+
from .response_helper import ResponseHelper
|
23
|
+
|
24
|
+
|
25
|
+
class Handler:
|
26
|
+
def __init__(
|
27
|
+
self,
|
28
|
+
request: RunTaskRequest,
|
29
|
+
namespace: str,
|
30
|
+
graph_name: str,
|
31
|
+
graph_version: int,
|
32
|
+
function_name: str,
|
33
|
+
function: Union[IndexifyFunction, IndexifyRouter],
|
34
|
+
logger: Any,
|
35
|
+
indexify_server_addr: str,
|
36
|
+
config_path: Optional[str],
|
37
|
+
):
|
38
|
+
self._function_name: str = function_name
|
39
|
+
self._logger = logger.bind(
|
40
|
+
graph_invocation_id=request.graph_invocation_id,
|
41
|
+
task_id=request.task_id,
|
42
|
+
)
|
43
|
+
self._input_loader = FunctionInputsLoader(request)
|
44
|
+
self._response_helper = ResponseHelper(task_id=request.task_id)
|
45
|
+
# TODO: use files for stdout, stderr capturing. This puts a natural and thus reasonable
|
46
|
+
# rate limit on the rate of writes and allows to not consume expensive memory for function logs.
|
47
|
+
self._func_stdout: io.StringIO = io.StringIO()
|
48
|
+
self._func_stderr: io.StringIO = io.StringIO()
|
49
|
+
|
50
|
+
self._function_wrapper: IndexifyFunctionWrapper = IndexifyFunctionWrapper(
|
51
|
+
indexify_function=function,
|
52
|
+
context=GraphInvocationContext(
|
53
|
+
invocation_id=request.graph_invocation_id,
|
54
|
+
graph_name=graph_name,
|
55
|
+
graph_version=str(graph_version),
|
56
|
+
indexify_client=_indexify_client(
|
57
|
+
logger=self._logger,
|
58
|
+
namespace=namespace,
|
59
|
+
indexify_server_addr=indexify_server_addr,
|
60
|
+
config_path=config_path,
|
61
|
+
),
|
62
|
+
),
|
63
|
+
)
|
64
|
+
|
65
|
+
def run(self) -> RunTaskResponse:
|
66
|
+
"""Runs the task.
|
67
|
+
|
68
|
+
Raises an exception if our own code failed, customer function failure doesn't result in any exception.
|
69
|
+
Details of customer function failure are returned in the response.
|
70
|
+
"""
|
71
|
+
self._logger.info("running function")
|
72
|
+
inputs: FunctionInputs = self._input_loader.load()
|
73
|
+
self._flush_logs()
|
74
|
+
return self._run_func_safe_and_captured(inputs)
|
75
|
+
|
76
|
+
def _run_func_safe_and_captured(self, inputs: FunctionInputs) -> RunTaskResponse:
|
77
|
+
"""Runs the customer function while capturing what happened in it.
|
78
|
+
|
79
|
+
Function stdout and stderr are captured so they don't get into Function Executor process stdout
|
80
|
+
and stderr. Never throws an Exception. Caller can determine if the function succeeded
|
81
|
+
using the response.
|
82
|
+
"""
|
83
|
+
try:
|
84
|
+
with redirect_stdout(self._func_stdout), redirect_stderr(self._func_stderr):
|
85
|
+
return self._run_func(inputs)
|
86
|
+
except Exception:
|
87
|
+
return self._response_helper.failure_response(
|
88
|
+
message=traceback.format_exc(),
|
89
|
+
stdout=self._func_stdout.getvalue(),
|
90
|
+
stderr=self._func_stderr.getvalue(),
|
91
|
+
)
|
92
|
+
|
93
|
+
def _run_func(self, inputs: FunctionInputs) -> RunTaskResponse:
|
94
|
+
if _is_router(self._function_wrapper):
|
95
|
+
result: RouterCallResult = self._function_wrapper.invoke_router(
|
96
|
+
self._function_name, inputs.input
|
97
|
+
)
|
98
|
+
return self._response_helper.router_response(
|
99
|
+
result=result,
|
100
|
+
stdout=self._func_stdout.getvalue(),
|
101
|
+
stderr=self._func_stderr.getvalue(),
|
102
|
+
)
|
103
|
+
else:
|
104
|
+
result: FunctionCallResult = self._function_wrapper.invoke_fn_ser(
|
105
|
+
self._function_name, inputs.input, inputs.init_value
|
106
|
+
)
|
107
|
+
return self._response_helper.function_response(
|
108
|
+
result=result,
|
109
|
+
is_reducer=_func_is_reducer(self._function_wrapper),
|
110
|
+
stdout=self._func_stdout.getvalue(),
|
111
|
+
stderr=self._func_stderr.getvalue(),
|
112
|
+
)
|
113
|
+
|
114
|
+
def _flush_logs(self) -> None:
|
115
|
+
# Flush any logs buffered in memory before running the function with stdout, stderr capture.
|
116
|
+
# Otherwise our logs logged before this point will end up in the function's stdout.
|
117
|
+
# structlog.PrintLogger uses print function. This is why flushing with print works.
|
118
|
+
print("", flush=True)
|
119
|
+
sys.stdout.flush()
|
120
|
+
sys.stderr.flush()
|
121
|
+
|
122
|
+
|
123
|
+
def _indexify_client(
|
124
|
+
logger: Any,
|
125
|
+
namespace: str,
|
126
|
+
indexify_server_addr: str,
|
127
|
+
config_path: Optional[str],
|
128
|
+
) -> IndexifyClient:
|
129
|
+
# This client is required to implement key/value store functionality for customer functions.
|
130
|
+
protocol: str = "http"
|
131
|
+
if config_path:
|
132
|
+
logger.info("TLS is enabled")
|
133
|
+
protocol = "https"
|
134
|
+
return IndexifyClient(
|
135
|
+
service_url=f"{protocol}://{indexify_server_addr}",
|
136
|
+
namespace=namespace,
|
137
|
+
config_path=config_path,
|
138
|
+
)
|
139
|
+
|
140
|
+
|
141
|
+
def _is_router(func_wrapper: IndexifyFunctionWrapper) -> bool:
|
142
|
+
return (
|
143
|
+
str(type(func_wrapper.indexify_function))
|
144
|
+
== "<class 'indexify.functions_sdk.indexify_functions.IndexifyRouter'>"
|
145
|
+
)
|
146
|
+
|
147
|
+
|
148
|
+
def _func_is_reducer(func_wrapper: IndexifyFunctionWrapper) -> bool:
|
149
|
+
return func_wrapper.indexify_function.accumulate is not None
|
@@ -0,0 +1,24 @@
|
|
1
|
+
from typing import Any
|
2
|
+
|
3
|
+
from indexify.function_executor.proto.function_executor_pb2 import (
|
4
|
+
RunTaskRequest,
|
5
|
+
)
|
6
|
+
from indexify.function_executor.proto.message_validator import MessageValidator
|
7
|
+
|
8
|
+
|
9
|
+
class RequestValidator:
|
10
|
+
def __init__(self, request: RunTaskRequest):
|
11
|
+
self._request = request
|
12
|
+
self._message_validator = MessageValidator(request)
|
13
|
+
|
14
|
+
def check(self):
|
15
|
+
"""Validates the request.
|
16
|
+
|
17
|
+
Raises: ValueError: If the request is invalid.
|
18
|
+
"""
|
19
|
+
(
|
20
|
+
self._message_validator.required_field("graph_invocation_id")
|
21
|
+
.required_field("task_id")
|
22
|
+
.required_serialized_object("function_input")
|
23
|
+
.optional_serialized_object("function_init_value")
|
24
|
+
)
|
@@ -0,0 +1,98 @@
|
|
1
|
+
from typing import List
|
2
|
+
|
3
|
+
from indexify.function_executor.proto.function_executor_pb2 import (
|
4
|
+
FunctionOutput,
|
5
|
+
RouterOutput,
|
6
|
+
RunTaskResponse,
|
7
|
+
SerializedObject,
|
8
|
+
)
|
9
|
+
from indexify.functions_sdk.data_objects import IndexifyData
|
10
|
+
from indexify.functions_sdk.indexify_functions import (
|
11
|
+
FunctionCallResult,
|
12
|
+
RouterCallResult,
|
13
|
+
)
|
14
|
+
from indexify.functions_sdk.object_serializer import get_serializer
|
15
|
+
|
16
|
+
|
17
|
+
class ResponseHelper:
|
18
|
+
"""Helper class for generating RunFunctionResponse."""
|
19
|
+
|
20
|
+
def __init__(self, task_id: str):
|
21
|
+
self._task_id = task_id
|
22
|
+
|
23
|
+
def function_response(
|
24
|
+
self,
|
25
|
+
result: FunctionCallResult,
|
26
|
+
is_reducer: bool,
|
27
|
+
stdout: str = "",
|
28
|
+
stderr: str = "",
|
29
|
+
) -> RunTaskResponse:
|
30
|
+
if result.traceback_msg is None:
|
31
|
+
return RunTaskResponse(
|
32
|
+
task_id=self._task_id,
|
33
|
+
function_output=self._to_function_output(result.ser_outputs),
|
34
|
+
router_output=None,
|
35
|
+
stdout=stdout,
|
36
|
+
stderr=stderr,
|
37
|
+
is_reducer=is_reducer,
|
38
|
+
success=True,
|
39
|
+
)
|
40
|
+
else:
|
41
|
+
return self.failure_response(
|
42
|
+
message=result.traceback_msg,
|
43
|
+
stdout=stdout,
|
44
|
+
stderr=stderr,
|
45
|
+
)
|
46
|
+
|
47
|
+
def router_response(
|
48
|
+
self,
|
49
|
+
result: RouterCallResult,
|
50
|
+
stdout: str = "",
|
51
|
+
stderr: str = "",
|
52
|
+
) -> RunTaskResponse:
|
53
|
+
if result.traceback_msg is None:
|
54
|
+
return RunTaskResponse(
|
55
|
+
task_id=self._task_id,
|
56
|
+
function_output=None,
|
57
|
+
router_output=RouterOutput(edges=result.edges),
|
58
|
+
stdout=stdout,
|
59
|
+
stderr=stderr,
|
60
|
+
is_reducer=False,
|
61
|
+
success=True,
|
62
|
+
)
|
63
|
+
else:
|
64
|
+
return self.failure_response(
|
65
|
+
message=result.traceback_msg,
|
66
|
+
stdout=stdout,
|
67
|
+
stderr=stderr,
|
68
|
+
)
|
69
|
+
|
70
|
+
def failure_response(
|
71
|
+
self, message: str, stdout: str, stderr: str
|
72
|
+
) -> RunTaskResponse:
|
73
|
+
stderr = "\n".join([stderr, message])
|
74
|
+
return RunTaskResponse(
|
75
|
+
task_id=self._task_id,
|
76
|
+
function_output=None,
|
77
|
+
router_output=None,
|
78
|
+
stdout=stdout,
|
79
|
+
stderr=stderr,
|
80
|
+
is_reducer=False,
|
81
|
+
success=False,
|
82
|
+
)
|
83
|
+
|
84
|
+
def _to_function_output(self, outputs: List[IndexifyData]) -> FunctionOutput:
|
85
|
+
output = FunctionOutput(outputs=[])
|
86
|
+
for ix_data in outputs:
|
87
|
+
serialized_object: SerializedObject = SerializedObject(
|
88
|
+
content_type=get_serializer(ix_data.encoder).content_type,
|
89
|
+
)
|
90
|
+
if isinstance(ix_data.payload, bytes):
|
91
|
+
serialized_object.bytes = ix_data.payload
|
92
|
+
elif isinstance(ix_data.payload, str):
|
93
|
+
serialized_object.string = ix_data.payload
|
94
|
+
else:
|
95
|
+
raise ValueError(f"Unsupported payload type: {type(ix_data.payload)}")
|
96
|
+
|
97
|
+
output.outputs.append(serialized_object)
|
98
|
+
return output
|
@@ -0,0 +1,22 @@
|
|
1
|
+
from indexify.function_executor.proto.message_validator import MessageValidator
|
2
|
+
|
3
|
+
from .proto.function_executor_pb2 import InitializeRequest
|
4
|
+
|
5
|
+
|
6
|
+
class InitializeRequestValidator:
|
7
|
+
def __init__(self, request: InitializeRequest):
|
8
|
+
self._request = request
|
9
|
+
self._message_validator = MessageValidator(request)
|
10
|
+
|
11
|
+
def check(self):
|
12
|
+
"""Validates the request.
|
13
|
+
|
14
|
+
Raises: ValueError: If the request is invalid.
|
15
|
+
"""
|
16
|
+
(
|
17
|
+
self._message_validator.required_field("namespace")
|
18
|
+
.required_field("graph_name")
|
19
|
+
.required_field("graph_version")
|
20
|
+
.required_field("function_name")
|
21
|
+
.required_serialized_object("graph")
|
22
|
+
)
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# We send function inputs and outputs over gRPC.
|
2
|
+
# -1 means unlimited. We don't want to limit the size of data customers are using.
|
3
|
+
# The effective max message size in this case is about 1.9 GB, see the max payload test.
|
4
|
+
# This is due to internal hard gRPC limits. When we want to increase the message sizes
|
5
|
+
# we'll have to implement chunking for large messages.
|
6
|
+
_MAX_GRPC_MESSAGE_LENGTH = -1
|
7
|
+
|
8
|
+
GRPC_SERVER_OPTIONS = [
|
9
|
+
("grpc.max_receive_message_length", _MAX_GRPC_MESSAGE_LENGTH),
|
10
|
+
("grpc.max_send_message_length", _MAX_GRPC_MESSAGE_LENGTH),
|
11
|
+
]
|
12
|
+
|
13
|
+
GRPC_CHANNEL_OPTIONS = GRPC_SERVER_OPTIONS
|
@@ -0,0 +1,70 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
syntax = "proto3";
|
4
|
+
|
5
|
+
package function_executor_service;
|
6
|
+
|
7
|
+
// The messages should not use any Python SDK objects. Only Function Executor implemented
|
8
|
+
// in Python is allowed to import Python SDK to run customer functions. This ensures that
|
9
|
+
// all the other components can be written in any language.
|
10
|
+
|
11
|
+
message SerializedObject {
|
12
|
+
oneof data {
|
13
|
+
// Set bytes_data if the object is serialized as bytes.
|
14
|
+
bytes bytes = 1;
|
15
|
+
// Set string_data if the object is serialized as string.
|
16
|
+
string string = 2;
|
17
|
+
}
|
18
|
+
// The content type determines the serializer used to serialize the object.
|
19
|
+
optional string content_type = 3;
|
20
|
+
}
|
21
|
+
|
22
|
+
// InitializeRequest contains information about the function
|
23
|
+
// that Function Executor is going to run the tasks for.
|
24
|
+
message InitializeRequest {
|
25
|
+
optional string namespace = 1;
|
26
|
+
optional string graph_name = 2;
|
27
|
+
optional int32 graph_version = 3;
|
28
|
+
optional string function_name = 5;
|
29
|
+
optional SerializedObject graph = 7;
|
30
|
+
}
|
31
|
+
|
32
|
+
message InitializeResponse {
|
33
|
+
optional bool success = 1;
|
34
|
+
}
|
35
|
+
|
36
|
+
message FunctionOutput {
|
37
|
+
repeated SerializedObject outputs = 1;
|
38
|
+
}
|
39
|
+
|
40
|
+
message RouterOutput {
|
41
|
+
repeated string edges = 1;
|
42
|
+
}
|
43
|
+
|
44
|
+
message RunTaskRequest {
|
45
|
+
optional string graph_invocation_id = 4;
|
46
|
+
optional string task_id = 6;
|
47
|
+
optional SerializedObject function_input = 9;
|
48
|
+
optional SerializedObject function_init_value = 10;
|
49
|
+
}
|
50
|
+
|
51
|
+
message RunTaskResponse {
|
52
|
+
optional string task_id = 1;
|
53
|
+
optional FunctionOutput function_output = 2;
|
54
|
+
optional RouterOutput router_output = 3;
|
55
|
+
optional string stdout = 4;
|
56
|
+
optional string stderr = 5;
|
57
|
+
optional bool is_reducer = 6;
|
58
|
+
optional bool success = 7;
|
59
|
+
}
|
60
|
+
|
61
|
+
service FunctionExecutor {
|
62
|
+
// Initializes the Function Executor to run tasks
|
63
|
+
// for a particular function. This method is called only
|
64
|
+
// once per Function Executor as it can only run a single function.
|
65
|
+
// It should be called before calling RunTask for the function.
|
66
|
+
rpc initialize(InitializeRequest) returns (InitializeResponse);
|
67
|
+
// Executes the task defined in the request.
|
68
|
+
// Multiple tasks can be running in parallel.
|
69
|
+
rpc run_task(RunTaskRequest) returns (RunTaskResponse);
|
70
|
+
}
|