indexify 0.2.40__py3-none-any.whl → 0.2.42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- indexify/cli.py +92 -52
- indexify/executor/agent.py +99 -187
- indexify/executor/api_objects.py +2 -8
- indexify/executor/downloader.py +129 -90
- indexify/executor/executor_tasks.py +15 -30
- indexify/executor/function_executor/function_executor.py +32 -0
- indexify/executor/function_executor/function_executor_factory.py +26 -0
- indexify/executor/function_executor/function_executor_map.py +91 -0
- indexify/executor/function_executor/process_function_executor.py +64 -0
- indexify/executor/function_executor/process_function_executor_factory.py +102 -0
- indexify/executor/function_worker.py +227 -184
- indexify/executor/runtime_probes.py +9 -8
- indexify/executor/task_fetcher.py +80 -0
- indexify/executor/task_reporter.py +18 -25
- indexify/executor/task_store.py +35 -16
- indexify/function_executor/function_executor_service.py +86 -0
- indexify/function_executor/handlers/run_function/function_inputs_loader.py +54 -0
- indexify/function_executor/handlers/run_function/handler.py +149 -0
- indexify/function_executor/handlers/run_function/request_validator.py +24 -0
- indexify/function_executor/handlers/run_function/response_helper.py +98 -0
- indexify/function_executor/initialize_request_validator.py +22 -0
- indexify/function_executor/proto/configuration.py +13 -0
- indexify/function_executor/proto/function_executor.proto +70 -0
- indexify/function_executor/proto/function_executor_pb2.py +53 -0
- indexify/function_executor/proto/function_executor_pb2.pyi +125 -0
- indexify/function_executor/proto/function_executor_pb2_grpc.py +163 -0
- indexify/function_executor/proto/message_validator.py +38 -0
- indexify/function_executor/server.py +31 -0
- indexify/functions_sdk/data_objects.py +0 -9
- indexify/functions_sdk/graph.py +10 -11
- indexify/functions_sdk/graph_definition.py +2 -2
- indexify/functions_sdk/image.py +35 -30
- indexify/functions_sdk/indexify_functions.py +5 -5
- indexify/http_client.py +15 -23
- indexify/logging.py +32 -0
- {indexify-0.2.40.dist-info → indexify-0.2.42.dist-info}/METADATA +3 -1
- indexify-0.2.42.dist-info/RECORD +53 -0
- indexify/executor/indexify_executor.py +0 -32
- indexify-0.2.40.dist-info/RECORD +0 -34
- {indexify-0.2.40.dist-info → indexify-0.2.42.dist-info}/LICENSE.txt +0 -0
- {indexify-0.2.40.dist-info → indexify-0.2.42.dist-info}/WHEEL +0 -0
- {indexify-0.2.40.dist-info → indexify-0.2.42.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,102 @@
|
|
1
|
+
import asyncio
|
2
|
+
from typing import Any, Optional
|
3
|
+
|
4
|
+
from .function_executor_factory import FunctionExecutorFactory
|
5
|
+
from .process_function_executor import ProcessFunctionExecutor
|
6
|
+
|
7
|
+
|
8
|
+
class ProcessFunctionExecutorFactory(FunctionExecutorFactory):
|
9
|
+
def __init__(
|
10
|
+
self,
|
11
|
+
indexify_server_address: str,
|
12
|
+
development_mode: bool,
|
13
|
+
config_path: Optional[str],
|
14
|
+
):
|
15
|
+
self._indexify_server_address: str = indexify_server_address
|
16
|
+
self._development_mode: bool = development_mode
|
17
|
+
self._config_path: Optional[str] = config_path
|
18
|
+
# Registred ports range end at 49151. We start from 50000 to hopefully avoid conflicts.
|
19
|
+
self._free_ports = set(range(50000, 51000))
|
20
|
+
|
21
|
+
async def create(
|
22
|
+
self, logger: Any, state: Optional[Any] = None
|
23
|
+
) -> ProcessFunctionExecutor:
|
24
|
+
logger = logger.bind(module=__name__)
|
25
|
+
port: Optional[int] = None
|
26
|
+
|
27
|
+
try:
|
28
|
+
port = self._allocate_port()
|
29
|
+
args = [
|
30
|
+
"function-executor",
|
31
|
+
"--function-executor-server-address",
|
32
|
+
_server_address(port),
|
33
|
+
"--indexify-server-address",
|
34
|
+
self._indexify_server_address,
|
35
|
+
]
|
36
|
+
if self._development_mode:
|
37
|
+
args.append("--dev")
|
38
|
+
if self._config_path is not None:
|
39
|
+
args.extend(["--config-path", self._config_path])
|
40
|
+
# Run the process with our stdout, stderr. We want to see process logs and exceptions in our process output.
|
41
|
+
# This is useful for dubugging. Customer function stdout and stderr is captured and returned in the response
|
42
|
+
# so we won't see it in our process outputs. This is the right behavior as customer function stdout and stderr
|
43
|
+
# contains private customer data.
|
44
|
+
proc: asyncio.subprocess.Process = await asyncio.create_subprocess_exec(
|
45
|
+
"indexify-cli",
|
46
|
+
*args,
|
47
|
+
)
|
48
|
+
return ProcessFunctionExecutor(
|
49
|
+
process=proc,
|
50
|
+
port=port,
|
51
|
+
address=_server_address(port),
|
52
|
+
logger=logger,
|
53
|
+
state=state,
|
54
|
+
)
|
55
|
+
except Exception as e:
|
56
|
+
if port is not None:
|
57
|
+
self._release_port(port)
|
58
|
+
logger.error(
|
59
|
+
"failed starting a new Function Executor process at port {port}",
|
60
|
+
exc_info=e,
|
61
|
+
)
|
62
|
+
raise
|
63
|
+
|
64
|
+
async def destroy(self, executor: ProcessFunctionExecutor, logger: Any) -> None:
|
65
|
+
proc: asyncio.subprocess.Process = executor._proc
|
66
|
+
port: int = executor._port
|
67
|
+
logger = logger.bind(
|
68
|
+
module=__name__,
|
69
|
+
pid=proc.pid,
|
70
|
+
port=port,
|
71
|
+
)
|
72
|
+
|
73
|
+
try:
|
74
|
+
if proc.returncode is not None:
|
75
|
+
# The process already exited and was waited() sucessfully.
|
76
|
+
return
|
77
|
+
|
78
|
+
proc.kill()
|
79
|
+
await proc.wait()
|
80
|
+
except Exception as e:
|
81
|
+
logger.error(
|
82
|
+
"failed to cleanup Function Executor process",
|
83
|
+
exc_info=e,
|
84
|
+
)
|
85
|
+
finally:
|
86
|
+
self._release_port(port)
|
87
|
+
if executor._channel is not None:
|
88
|
+
await executor._channel.close()
|
89
|
+
|
90
|
+
def _allocate_port(self) -> int:
|
91
|
+
# No asyncio.Lock is required here because this operation never awaits
|
92
|
+
# and it is always called from the same thread where the event loop is running.
|
93
|
+
return self._free_ports.pop()
|
94
|
+
|
95
|
+
def _release_port(self, port: int) -> None:
|
96
|
+
# No asyncio.Lock is required here because this operation never awaits
|
97
|
+
# and it is always called from the same thread where the event loop is running.
|
98
|
+
self._free_ports.add(port)
|
99
|
+
|
100
|
+
|
101
|
+
def _server_address(port: int) -> str:
|
102
|
+
return f"localhost:{port}"
|
@@ -1,212 +1,255 @@
|
|
1
|
-
import
|
2
|
-
import
|
3
|
-
|
4
|
-
|
5
|
-
import
|
6
|
-
|
7
|
-
from
|
8
|
-
|
9
|
-
|
10
|
-
from indexify.functions_sdk.data_objects import (
|
11
|
-
FunctionWorkerOutput,
|
12
|
-
IndexifyData,
|
1
|
+
import asyncio
|
2
|
+
from typing import Any, Dict, Optional
|
3
|
+
|
4
|
+
import grpc
|
5
|
+
import structlog
|
6
|
+
|
7
|
+
from indexify.function_executor.proto.function_executor_pb2 import (
|
8
|
+
FunctionOutput,
|
9
|
+
InitializeRequest,
|
13
10
|
RouterOutput,
|
11
|
+
RunTaskRequest,
|
12
|
+
RunTaskResponse,
|
13
|
+
SerializedObject,
|
14
|
+
)
|
15
|
+
from indexify.function_executor.proto.function_executor_pb2_grpc import (
|
16
|
+
FunctionExecutorStub,
|
14
17
|
)
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
RouterCallResult,
|
18
|
+
|
19
|
+
from .api_objects import Task
|
20
|
+
from .downloader import DownloadedInputs
|
21
|
+
from .function_executor.function_executor import FunctionExecutor
|
22
|
+
from .function_executor.function_executor_factory import (
|
23
|
+
FunctionExecutorFactory,
|
22
24
|
)
|
25
|
+
from .function_executor.function_executor_map import FunctionExecutorMap
|
26
|
+
|
23
27
|
|
24
|
-
|
28
|
+
class FunctionWorkerInput:
|
29
|
+
"""Task with all the resources required to run it."""
|
25
30
|
|
26
|
-
|
31
|
+
def __init__(
|
32
|
+
self,
|
33
|
+
task: Task,
|
34
|
+
graph: Optional[SerializedObject] = None,
|
35
|
+
function_input: Optional[DownloadedInputs] = None,
|
36
|
+
):
|
37
|
+
self.task = task
|
38
|
+
# Must not be None when running the task.
|
39
|
+
self.graph = graph
|
40
|
+
# Must not be None when running the task.
|
41
|
+
self.function_input = function_input
|
27
42
|
|
28
43
|
|
29
|
-
class
|
44
|
+
class FunctionWorkerOutput:
|
30
45
|
def __init__(
|
31
|
-
self,
|
46
|
+
self,
|
47
|
+
function_output: Optional[FunctionOutput] = None,
|
48
|
+
router_output: Optional[RouterOutput] = None,
|
49
|
+
stdout: Optional[str] = None,
|
50
|
+
stderr: Optional[str] = None,
|
51
|
+
reducer: bool = False,
|
52
|
+
success: bool = False,
|
32
53
|
):
|
33
|
-
|
34
|
-
self.
|
54
|
+
self.function_output = function_output
|
55
|
+
self.router_output = router_output
|
35
56
|
self.stdout = stdout
|
36
57
|
self.stderr = stderr
|
37
|
-
self.
|
38
|
-
|
39
|
-
|
40
|
-
class FunctionOutput(BaseModel):
|
41
|
-
fn_outputs: Optional[List[IndexifyData]]
|
42
|
-
router_output: Optional[RouterOutput]
|
43
|
-
reducer: bool = False
|
44
|
-
success: bool = True
|
45
|
-
stdout: str = ""
|
46
|
-
stderr: str = ""
|
47
|
-
|
48
|
-
|
49
|
-
def _load_function(
|
50
|
-
namespace: str,
|
51
|
-
graph_name: str,
|
52
|
-
fn_name: str,
|
53
|
-
code_path: str,
|
54
|
-
version: int,
|
55
|
-
invocation_id: str,
|
56
|
-
indexify_client: IndexifyClient,
|
57
|
-
):
|
58
|
-
"""Load an extractor to the memory: extractor_wrapper_map."""
|
59
|
-
global function_wrapper_map
|
60
|
-
key = f"{namespace}/{graph_name}/{version}/{fn_name}"
|
61
|
-
if key in function_wrapper_map:
|
62
|
-
return
|
63
|
-
with open(code_path, "rb") as f:
|
64
|
-
code = f.read()
|
65
|
-
pickled_functions = cloudpickle.loads(code)
|
66
|
-
context = GraphInvocationContext(
|
67
|
-
invocation_id=invocation_id,
|
68
|
-
graph_name=graph_name,
|
69
|
-
graph_version=str(version),
|
70
|
-
indexify_client=indexify_client,
|
71
|
-
)
|
72
|
-
function_wrapper = IndexifyFunctionWrapper(
|
73
|
-
cloudpickle.loads(pickled_functions[fn_name]),
|
74
|
-
context,
|
75
|
-
)
|
76
|
-
function_wrapper_map[key] = function_wrapper
|
58
|
+
self.reducer = reducer
|
59
|
+
self.success = success
|
77
60
|
|
78
61
|
|
79
|
-
class
|
62
|
+
class FunctionExecutorState:
|
80
63
|
def __init__(
|
81
|
-
self, workers: int = 1, indexify_client: IndexifyClient = None
|
82
|
-
) -> None:
|
83
|
-
self._executor: concurrent.futures.ProcessPoolExecutor = (
|
84
|
-
concurrent.futures.ProcessPoolExecutor(max_workers=workers)
|
85
|
-
)
|
86
|
-
self._workers = workers
|
87
|
-
self._indexify_client = indexify_client
|
88
|
-
|
89
|
-
async def async_submit(
|
90
64
|
self,
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
65
|
+
function_id_with_version: str,
|
66
|
+
function_id_without_version: str,
|
67
|
+
ongoing_tasks_count: int,
|
68
|
+
):
|
69
|
+
self.function_id_with_version: str = function_id_with_version
|
70
|
+
self.function_id_without_version: str = function_id_without_version
|
71
|
+
self.ongoing_tasks_count: int = ongoing_tasks_count
|
72
|
+
|
73
|
+
|
74
|
+
class FunctionWorker:
|
75
|
+
def __init__(self, function_executor_factory: FunctionExecutorFactory):
|
76
|
+
self._function_executors = FunctionExecutorMap(function_executor_factory)
|
77
|
+
|
78
|
+
async def run(self, input: FunctionWorkerInput) -> FunctionWorkerOutput:
|
79
|
+
logger = _logger(input.task)
|
80
|
+
function_executor: Optional[FunctionExecutor] = None
|
100
81
|
try:
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
fn_name,
|
105
|
-
input,
|
106
|
-
code_path,
|
107
|
-
version,
|
108
|
-
init_value,
|
109
|
-
invocation_id,
|
110
|
-
self._indexify_client,
|
82
|
+
function_executor = await self._obtain_function_executor(input, logger)
|
83
|
+
return await self._run_in_executor(
|
84
|
+
function_executor=function_executor, input=input
|
111
85
|
)
|
112
|
-
# TODO - bring back running in a separate process
|
113
86
|
except Exception as e:
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
reducer=e.is_reducer,
|
118
|
-
success=False,
|
87
|
+
logger.error(
|
88
|
+
"failed running the task",
|
89
|
+
exc_info=e,
|
119
90
|
)
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
91
|
+
if function_executor is not None:
|
92
|
+
# This will fail all the tasks concurrently running in this Function Executor. Not great.
|
93
|
+
await self._function_executors.delete(
|
94
|
+
id=_function_id_without_version(input.task),
|
95
|
+
function_executor=function_executor,
|
96
|
+
logger=logger,
|
97
|
+
)
|
98
|
+
return _internal_error_output()
|
99
|
+
|
100
|
+
async def _obtain_function_executor(
|
101
|
+
self, input: FunctionWorkerInput, logger: Any
|
102
|
+
) -> FunctionExecutor:
|
103
|
+
# Temporary policy for Function Executors lifecycle:
|
104
|
+
# There can only be a single Function Executor per function.
|
105
|
+
# If a Function Executor already exists for a different function version then wait until
|
106
|
+
# all the tasks finish in the existing Function Executor and then destroy it first.
|
107
|
+
initialize_request: InitializeRequest = InitializeRequest(
|
108
|
+
namespace=input.task.namespace,
|
109
|
+
graph_name=input.task.compute_graph,
|
110
|
+
graph_version=input.task.graph_version,
|
111
|
+
function_name=input.task.compute_fn,
|
112
|
+
graph=input.graph,
|
113
|
+
)
|
114
|
+
initial_function_executor_state: FunctionExecutorState = FunctionExecutorState(
|
115
|
+
function_id_with_version=_function_id_with_version(input.task),
|
116
|
+
function_id_without_version=_function_id_without_version(input.task),
|
117
|
+
ongoing_tasks_count=0,
|
128
118
|
)
|
129
119
|
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
fn_name: str,
|
138
|
-
input: IndexifyData,
|
139
|
-
code_path: str,
|
140
|
-
version: int,
|
141
|
-
init_value: Optional[IndexifyData] = None,
|
142
|
-
invocation_id: Optional[str] = None,
|
143
|
-
indexify_client: Optional[IndexifyClient] = None,
|
144
|
-
) -> FunctionOutput:
|
145
|
-
import io
|
146
|
-
from contextlib import redirect_stderr, redirect_stdout
|
147
|
-
|
148
|
-
stdout_capture = io.StringIO()
|
149
|
-
stderr_capture = io.StringIO()
|
150
|
-
is_reducer = False
|
151
|
-
router_output = None
|
152
|
-
fn_output = None
|
153
|
-
has_failed = False
|
154
|
-
print(
|
155
|
-
f"[bold] function_worker: [/bold] invoking function {fn_name} in graph {graph_name}"
|
156
|
-
)
|
157
|
-
with redirect_stdout(stdout_capture), redirect_stderr(stderr_capture):
|
158
|
-
try:
|
159
|
-
key = f"{namespace}/{graph_name}/{version}/{fn_name}"
|
160
|
-
if key not in function_wrapper_map:
|
161
|
-
_load_function(
|
162
|
-
namespace,
|
163
|
-
graph_name,
|
164
|
-
fn_name,
|
165
|
-
code_path,
|
166
|
-
version,
|
167
|
-
invocation_id,
|
168
|
-
indexify_client,
|
169
|
-
)
|
120
|
+
while True:
|
121
|
+
function_executor = await self._function_executors.get_or_create(
|
122
|
+
id=_function_id_without_version(input.task),
|
123
|
+
initialize_request=initialize_request,
|
124
|
+
initial_state=initial_function_executor_state,
|
125
|
+
logger=logger,
|
126
|
+
)
|
170
127
|
|
171
|
-
|
128
|
+
# No need to lock Function Executor state as we are not awaiting.
|
129
|
+
function_executor_state: FunctionExecutorState = function_executor.state()
|
172
130
|
if (
|
173
|
-
|
174
|
-
==
|
131
|
+
function_executor_state.function_id_with_version
|
132
|
+
== _function_id_with_version(input.task)
|
175
133
|
):
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
134
|
+
# The existing Function Executor is for the same function version so we can run the task in it.
|
135
|
+
# Increment the ongoing tasks count before awaiting to prevent the Function Executor from being destroyed
|
136
|
+
# by another coroutine.
|
137
|
+
function_executor_state.ongoing_tasks_count += 1
|
138
|
+
return function_executor
|
139
|
+
|
140
|
+
# This loop implements the temporary policy so it's implemented using polling instead of a lower
|
141
|
+
# latency event based mechanism with a higher complexity.
|
142
|
+
if function_executor_state.ongoing_tasks_count == 0:
|
143
|
+
logger.info(
|
144
|
+
"destroying existing Function Executor for different function version",
|
145
|
+
function_id=_function_id_with_version(input.task),
|
146
|
+
executor_function_id=function_executor_state.function_id_with_version,
|
147
|
+
)
|
148
|
+
await self._function_executors.delete(
|
149
|
+
id=_function_id_without_version(input.task),
|
150
|
+
function_executor=function_executor,
|
151
|
+
logger=logger,
|
152
|
+
)
|
181
153
|
else:
|
182
|
-
|
183
|
-
|
154
|
+
logger.info(
|
155
|
+
"waiting for existing Function Executor to finish",
|
156
|
+
function_id=_function_id_with_version(input.task),
|
157
|
+
executor_function_id=function_executor_state.function_id_with_version,
|
158
|
+
)
|
159
|
+
await asyncio.sleep(
|
160
|
+
5
|
161
|
+
) # Wait for 5 secs before checking if all tasks for the existing Function Executor finished.
|
162
|
+
|
163
|
+
async def _run_in_executor(
|
164
|
+
self, function_executor: FunctionExecutor, input: FunctionWorkerInput
|
165
|
+
) -> FunctionWorkerOutput:
|
166
|
+
"""Runs the task in the Function Executor.
|
167
|
+
|
168
|
+
The Function Executor's ongoing_tasks_count must be incremented before calling this function.
|
169
|
+
"""
|
170
|
+
try:
|
171
|
+
run_task_request: RunTaskRequest = RunTaskRequest(
|
172
|
+
graph_invocation_id=input.task.invocation_id,
|
173
|
+
task_id=input.task.id,
|
174
|
+
function_input=input.function_input.input,
|
175
|
+
)
|
176
|
+
if input.function_input.init_value is not None:
|
177
|
+
run_task_request.function_init_value.CopyFrom(
|
178
|
+
input.function_input.init_value
|
184
179
|
)
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
fn_outputs=None,
|
199
|
-
router_output=None,
|
200
|
-
stdout=stdout_capture.getvalue(),
|
201
|
-
stderr=stderr_capture.getvalue(),
|
202
|
-
reducer=is_reducer,
|
203
|
-
success=False,
|
180
|
+
channel: grpc.aio.Channel = await function_executor.channel()
|
181
|
+
run_task_response: RunTaskResponse = await FunctionExecutorStub(
|
182
|
+
channel
|
183
|
+
).run_task(run_task_request)
|
184
|
+
return _to_output(run_task_response)
|
185
|
+
finally:
|
186
|
+
# If this Function Executor was destroyed then it's not
|
187
|
+
# visible in the map but we still have a reference to it.
|
188
|
+
function_executor.state().ongoing_tasks_count -= 1
|
189
|
+
|
190
|
+
async def shutdown(self) -> None:
|
191
|
+
await self._function_executors.clear(
|
192
|
+
logger=structlog.get_logger(module=__name__, event="shutdown")
|
204
193
|
)
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
194
|
+
|
195
|
+
|
196
|
+
def _to_output(response: RunTaskResponse) -> FunctionWorkerOutput:
|
197
|
+
required_fields = [
|
198
|
+
"stdout",
|
199
|
+
"stderr",
|
200
|
+
"is_reducer",
|
201
|
+
"success",
|
202
|
+
]
|
203
|
+
|
204
|
+
for field in required_fields:
|
205
|
+
if not response.HasField(field):
|
206
|
+
raise ValueError(f"Response is missing required field: {field}")
|
207
|
+
|
208
|
+
output = FunctionWorkerOutput(
|
209
|
+
stdout=response.stdout,
|
210
|
+
stderr=response.stderr,
|
211
|
+
reducer=response.is_reducer,
|
212
|
+
success=response.success,
|
213
|
+
)
|
214
|
+
|
215
|
+
if response.HasField("function_output"):
|
216
|
+
output.function_output = response.function_output
|
217
|
+
if response.HasField("router_output"):
|
218
|
+
output.router_output = response.router_output
|
219
|
+
|
220
|
+
return output
|
221
|
+
|
222
|
+
|
223
|
+
def _internal_error_output() -> FunctionWorkerOutput:
|
224
|
+
return FunctionWorkerOutput(
|
225
|
+
function_output=None,
|
226
|
+
router_output=None,
|
227
|
+
stdout=None,
|
228
|
+
# We are not sharing internal error messages with the customer.
|
229
|
+
# If customer code failed then we won't get any exceptions here.
|
230
|
+
# All customer code errors are returned in the gRPC response.
|
231
|
+
stderr="Platform failed to execute the function.",
|
232
|
+
reducer=False,
|
233
|
+
success=False,
|
234
|
+
)
|
235
|
+
|
236
|
+
|
237
|
+
def _logger(task: Task) -> Any:
|
238
|
+
return structlog.get_logger(
|
239
|
+
module=__name__,
|
240
|
+
namespace=task.namespace,
|
241
|
+
graph_name=task.compute_graph,
|
242
|
+
graph_version=task.graph_version,
|
243
|
+
function_name=task.compute_fn,
|
244
|
+
graph_invocation_id=task.invocation_id,
|
245
|
+
task_id=task.id,
|
246
|
+
function_id=_function_id_with_version(task),
|
212
247
|
)
|
248
|
+
|
249
|
+
|
250
|
+
def _function_id_with_version(task: Task) -> str:
|
251
|
+
return f"versioned/{task.namespace}/{task.compute_graph}/{task.graph_version}/{task.compute_fn}"
|
252
|
+
|
253
|
+
|
254
|
+
def _function_id_without_version(task: Task) -> str:
|
255
|
+
return f"not_versioned/{task.namespace}/{task.compute_graph}/{task.compute_fn}"
|
@@ -6,12 +6,13 @@ from typing import Any, Dict, Tuple
|
|
6
6
|
from pydantic import BaseModel
|
7
7
|
|
8
8
|
DEFAULT_EXECUTOR = "tensorlake/indexify-executor-default"
|
9
|
-
|
9
|
+
# Empty string is used as a default hash which tells the scheduler to accept any hash.
|
10
|
+
DEFAULT_HASH = ""
|
10
11
|
|
11
12
|
|
12
13
|
class ProbeInfo(BaseModel):
|
13
14
|
image_name: str
|
14
|
-
|
15
|
+
image_hash: str
|
15
16
|
python_major_version: int
|
16
17
|
labels: Dict[str, Any] = {}
|
17
18
|
is_default_executor: bool
|
@@ -20,7 +21,7 @@ class ProbeInfo(BaseModel):
|
|
20
21
|
class RuntimeProbes:
|
21
22
|
def __init__(self) -> None:
|
22
23
|
self._image_name = self._read_image_name()
|
23
|
-
self.
|
24
|
+
self._image_hash = self._read_image_hash()
|
24
25
|
self._os_name = platform.system()
|
25
26
|
self._architecture = platform.machine()
|
26
27
|
(
|
@@ -35,12 +36,12 @@ class RuntimeProbes:
|
|
35
36
|
return file.read().strip()
|
36
37
|
return DEFAULT_EXECUTOR
|
37
38
|
|
38
|
-
def
|
39
|
-
file_path = os.path.expanduser("~/.indexify/
|
39
|
+
def _read_image_hash(self) -> str:
|
40
|
+
file_path = os.path.expanduser("~/.indexify/image_hash")
|
40
41
|
if os.path.exists(file_path):
|
41
42
|
with open(file_path, "r") as file:
|
42
|
-
return
|
43
|
-
return
|
43
|
+
return file.read().strip()
|
44
|
+
return DEFAULT_HASH
|
44
45
|
|
45
46
|
def _get_python_version(self) -> Tuple[int, int]:
|
46
47
|
version_info = sys.version_info
|
@@ -60,7 +61,7 @@ class RuntimeProbes:
|
|
60
61
|
|
61
62
|
return ProbeInfo(
|
62
63
|
image_name=self._image_name,
|
63
|
-
|
64
|
+
image_hash=self._image_hash,
|
64
65
|
python_major_version=self._python_version_major,
|
65
66
|
labels=labels,
|
66
67
|
is_default_executor=self._is_default_executor(),
|
@@ -0,0 +1,80 @@
|
|
1
|
+
import json
|
2
|
+
from importlib.metadata import version
|
3
|
+
from typing import AsyncGenerator, Optional
|
4
|
+
|
5
|
+
import httpx
|
6
|
+
import structlog
|
7
|
+
from httpx_sse import aconnect_sse
|
8
|
+
|
9
|
+
from indexify.common_util import get_httpx_client
|
10
|
+
|
11
|
+
from .api_objects import ExecutorMetadata, Task
|
12
|
+
from .runtime_probes import ProbeInfo, RuntimeProbes
|
13
|
+
|
14
|
+
|
15
|
+
class TaskFetcher:
|
16
|
+
"""Registers with Indexify server and fetches tasks from it."""
|
17
|
+
|
18
|
+
def __init__(
|
19
|
+
self,
|
20
|
+
protocol: str,
|
21
|
+
indexify_server_addr: str,
|
22
|
+
executor_id: str,
|
23
|
+
name_alias: Optional[str] = None,
|
24
|
+
image_hash: Optional[int] = None,
|
25
|
+
config_path: Optional[str] = None,
|
26
|
+
):
|
27
|
+
self._protocol: str = protocol
|
28
|
+
self._indexify_server_addr: str = indexify_server_addr
|
29
|
+
self.config_path = config_path
|
30
|
+
self._logger = structlog.get_logger(module=__name__)
|
31
|
+
|
32
|
+
probe_info: ProbeInfo = RuntimeProbes().probe()
|
33
|
+
self._executor_metadata: ExecutorMetadata = ExecutorMetadata(
|
34
|
+
id=executor_id,
|
35
|
+
executor_version=version("indexify"),
|
36
|
+
addr="",
|
37
|
+
image_name=probe_info.image_name if name_alias is None else name_alias,
|
38
|
+
image_hash=(probe_info.image_hash if image_hash is None else image_hash),
|
39
|
+
labels=probe_info.labels,
|
40
|
+
)
|
41
|
+
|
42
|
+
async def run(self) -> AsyncGenerator[Task, None]:
|
43
|
+
"""Fetches tasks that Indexify server assigned to the Executor.
|
44
|
+
|
45
|
+
Raises an exception if error occurred."""
|
46
|
+
url = f"{self._protocol}://{self._indexify_server_addr}/internal/executors/{self._executor_metadata.id}/tasks"
|
47
|
+
|
48
|
+
self._logger.info(
|
49
|
+
"registering_executor",
|
50
|
+
executor_id=self._executor_metadata.id,
|
51
|
+
url=url,
|
52
|
+
executor_version=self._executor_metadata.executor_version,
|
53
|
+
)
|
54
|
+
async with get_httpx_client(
|
55
|
+
config_path=self.config_path, make_async=True
|
56
|
+
) as client:
|
57
|
+
async with aconnect_sse(
|
58
|
+
client,
|
59
|
+
"POST",
|
60
|
+
url,
|
61
|
+
json=self._executor_metadata.model_dump(),
|
62
|
+
headers={"Content-Type": "application/json"},
|
63
|
+
) as event_source:
|
64
|
+
try:
|
65
|
+
event_source.response.raise_for_status()
|
66
|
+
except Exception as e:
|
67
|
+
await event_source.response.aread()
|
68
|
+
raise Exception(
|
69
|
+
"Failed to register at server. "
|
70
|
+
f"Response code: {event_source.response.status_code}. "
|
71
|
+
f"Response text: '{event_source.response.text}'."
|
72
|
+
) from e
|
73
|
+
|
74
|
+
self._logger.info(
|
75
|
+
"executor_registered", executor_id=self._executor_metadata.id
|
76
|
+
)
|
77
|
+
async for sse in event_source.aiter_sse():
|
78
|
+
task_dicts = json.loads(sse.data)
|
79
|
+
for task_dict in task_dicts:
|
80
|
+
yield Task.model_validate(task_dict, strict=False)
|