indexify 0.2.47__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. indexify-0.3.0/PKG-INFO +38 -0
  2. indexify-0.3.0/README.md +8 -0
  3. {indexify-0.2.47 → indexify-0.3.0}/pyproject.toml +21 -13
  4. {indexify-0.2.47/indexify → indexify-0.3.0/src/indexify/cli}/cli.py +75 -82
  5. indexify-0.3.0/src/indexify/executor/README.md +35 -0
  6. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/executor/api_objects.py +9 -3
  7. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/executor/downloader.py +5 -5
  8. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/executor/executor.py +35 -22
  9. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/executor/function_executor/function_executor.py +14 -3
  10. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/executor/function_executor/function_executor_state.py +13 -10
  11. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/executor/function_executor/invocation_state_client.py +2 -1
  12. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py +22 -10
  13. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/executor/function_executor/single_task_runner.py +43 -26
  14. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/executor/function_executor/task_input.py +1 -3
  15. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/executor/task_fetcher.py +5 -7
  16. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/executor/task_reporter.py +3 -5
  17. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/executor/task_runner.py +30 -23
  18. indexify-0.3.0/src/indexify/function_executor/README.md +18 -0
  19. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/function_executor/handlers/run_function/function_inputs_loader.py +13 -14
  20. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/function_executor/handlers/run_function/handler.py +16 -40
  21. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/function_executor/handlers/run_function/request_validator.py +7 -5
  22. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/function_executor/handlers/run_function/response_helper.py +6 -8
  23. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/function_executor/initialize_request_validator.py +1 -2
  24. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/function_executor/invocation_state/invocation_state_proxy_server.py +1 -1
  25. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/function_executor/invocation_state/proxied_invocation_state.py +1 -3
  26. indexify-0.3.0/src/indexify/function_executor/main.py +50 -0
  27. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/function_executor/proto/configuration.py +8 -0
  28. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/function_executor/proto/function_executor.proto +9 -4
  29. indexify-0.3.0/src/indexify/function_executor/proto/function_executor_pb2.py +65 -0
  30. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/function_executor/proto/function_executor_pb2.pyi +24 -4
  31. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/function_executor/server.py +4 -6
  32. indexify-0.2.47/indexify/function_executor/function_executor_service.py → indexify-0.3.0/src/indexify/function_executor/service.py +35 -18
  33. indexify-0.3.0/src/indexify/utils/README.md +3 -0
  34. indexify-0.2.47/indexify/common_util.py → indexify-0.3.0/src/indexify/utils/http_client.py +2 -2
  35. {indexify-0.2.47/indexify → indexify-0.3.0/src/indexify/utils}/logging.py +36 -2
  36. indexify-0.2.47/LICENSE.txt +0 -201
  37. indexify-0.2.47/PKG-INFO +0 -154
  38. indexify-0.2.47/README.md +0 -122
  39. indexify-0.2.47/indexify/__init__.py +0 -31
  40. indexify-0.2.47/indexify/data_loaders/__init__.py +0 -58
  41. indexify-0.2.47/indexify/data_loaders/local_directory_loader.py +0 -37
  42. indexify-0.2.47/indexify/data_loaders/url_loader.py +0 -52
  43. indexify-0.2.47/indexify/error.py +0 -8
  44. indexify-0.2.47/indexify/function_executor/proto/function_executor_pb2.py +0 -65
  45. indexify-0.2.47/indexify/functions_sdk/data_objects.py +0 -27
  46. indexify-0.2.47/indexify/functions_sdk/graph.py +0 -364
  47. indexify-0.2.47/indexify/functions_sdk/graph_definition.py +0 -63
  48. indexify-0.2.47/indexify/functions_sdk/graph_validation.py +0 -70
  49. indexify-0.2.47/indexify/functions_sdk/image.py +0 -210
  50. indexify-0.2.47/indexify/functions_sdk/indexify_functions.py +0 -354
  51. indexify-0.2.47/indexify/functions_sdk/invocation_state/invocation_state.py +0 -22
  52. indexify-0.2.47/indexify/functions_sdk/invocation_state/local_invocation_state.py +0 -30
  53. indexify-0.2.47/indexify/functions_sdk/object_serializer.py +0 -68
  54. indexify-0.2.47/indexify/functions_sdk/pipeline.py +0 -33
  55. indexify-0.2.47/indexify/http_client.py +0 -379
  56. indexify-0.2.47/indexify/remote_graph.py +0 -138
  57. indexify-0.2.47/indexify/remote_pipeline.py +0 -25
  58. indexify-0.2.47/indexify/settings.py +0 -1
  59. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/executor/function_executor/server/function_executor_server.py +0 -0
  60. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/executor/function_executor/server/function_executor_server_factory.py +0 -0
  61. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/executor/function_executor/server/subprocess_function_executor_server.py +0 -0
  62. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/executor/function_executor/task_output.py +0 -0
  63. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/executor/runtime_probes.py +0 -0
  64. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/function_executor/invocation_state/response_validator.py +0 -0
  65. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/function_executor/proto/function_executor_pb2_grpc.py +0 -0
  66. {indexify-0.2.47 → indexify-0.3.0/src}/indexify/function_executor/proto/message_validator.py +0 -0
@@ -0,0 +1,38 @@
1
+ Metadata-Version: 2.3
2
+ Name: indexify
3
+ Version: 0.3.0
4
+ Summary: Open Source Indexify components and helper tools
5
+ Home-page: https://github.com/tensorlakeai/indexify
6
+ License: Apache 2.0
7
+ Author: Tensorlake Inc.
8
+ Author-email: support@tensorlake.ai
9
+ Requires-Python: >=3.9,<4.0
10
+ Classifier: License :: Other/Proprietary License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.9
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Requires-Dist: grpcio (==1.68.1)
18
+ Requires-Dist: grpcio-tools (==1.68.1)
19
+ Requires-Dist: httpx-sse (>=0.4.0,<0.5.0)
20
+ Requires-Dist: httpx[http2] (>=0.28.1,<0.29.0)
21
+ Requires-Dist: nanoid (>=2.0.0,<3.0.0)
22
+ Requires-Dist: pydantic (==2.10.4)
23
+ Requires-Dist: pyyaml (>=6,<7)
24
+ Requires-Dist: rich (>=13.9.2,<14.0.0)
25
+ Requires-Dist: structlog (>=24.4.0,<25.0.0)
26
+ Requires-Dist: tensorlake (==0.1.7)
27
+ Requires-Dist: typer (>=0.12,<0.13)
28
+ Project-URL: Repository, https://github.com/tensorlakeai/indexify
29
+ Description-Content-Type: text/markdown
30
+
31
+ ## Overview
32
+
33
+ This a package with all Open Source Indexify components and helper tools
34
+ available via a CLI.
35
+
36
+ The CLI allows to:
37
+ * Setup a local or a distributed Indexify cluster.
38
+ * Build container images for Indexify functions.
@@ -0,0 +1,8 @@
1
+ ## Overview
2
+
3
+ This a package with all Open Source Indexify components and helper tools
4
+ available via a CLI.
5
+
6
+ The CLI allows to:
7
+ * Setup a local or a distributed Indexify cluster.
8
+ * Build container images for Indexify functions.
@@ -1,7 +1,8 @@
1
1
  [tool.poetry]
2
2
  name = "indexify"
3
- version = "0.2.47"
4
- description = "Python Client for Indexify"
3
+ # Incremented if any of the components provided in this packages are updated.
4
+ version = "0.3.0"
5
+ description = "Open Source Indexify components and helper tools"
5
6
  authors = ["Tensorlake Inc. <support@tensorlake.ai>"]
6
7
  license = "Apache 2.0"
7
8
  readme = "README.md"
@@ -9,28 +10,35 @@ homepage = "https://github.com/tensorlakeai/indexify"
9
10
  repository = "https://github.com/tensorlakeai/indexify"
10
11
 
11
12
  [tool.poetry.scripts]
12
- indexify-cli = "indexify.cli:app"
13
+ indexify-cli = "indexify.cli.cli:app"
14
+ function-executor = "indexify.function_executor.main:main"
13
15
 
14
16
  [tool.poetry.dependencies]
17
+ # Common dependencies
15
18
  python = "^3.9"
16
- httpx = { version = "0.27.2", extras = ["http2"] }
19
+ structlog = "^24.4.0"
17
20
  pyyaml = "^6"
21
+ httpx = { version = "^0.28.1", extras = ["http2"] }
22
+ grpcio = "1.68.1"
23
+
24
+ # Function Executor only
25
+ grpcio-tools = "1.68.1"
26
+ tensorlake = "0.1.7"
27
+
28
+ # Executor only
18
29
  pydantic = "2.10.4"
19
- cloudpickle = "^3.1.0"
30
+ httpx-sse = "^0.4.0"
31
+
32
+ # CLI only
20
33
  rich = "^13.9.2"
21
- nanoid = "^2.0.0"
22
- docker = "^7.1.0"
23
34
  typer = "^0.12"
24
- httpx-sse = "^0.4.0"
25
- structlog = "^24.4.0"
26
- grpcio = "1.68.1"
27
- grpcio-tools = "1.68.1"
35
+ nanoid = "^2.0.0"
28
36
 
29
- [tool.poetry.dev-dependencies]
37
+ [tool.poetry.group.dev.dependencies]
30
38
  black = "^24.10.0"
31
39
  pylint = "^2.4.0"
32
40
  parameterized = "^0.9.0"
33
41
 
34
42
  [build-system]
35
- requires = ["poetry>=1.2"]
43
+ requires = ["poetry==2.0.0"]
36
44
  build-backend = "poetry.core.masonry.api"
@@ -1,8 +1,11 @@
1
- from .logging import configure_logging_early, configure_production_logging
1
+ from indexify.utils.logging import (
2
+ configure_development_mode_logging,
3
+ configure_logging_early,
4
+ configure_production_mode_logging,
5
+ )
2
6
 
3
7
  configure_logging_early()
4
8
 
5
- import asyncio
6
9
  import os
7
10
  import shutil
8
11
  import signal
@@ -11,25 +14,22 @@ import sys
11
14
  import threading
12
15
  import time
13
16
  from importlib.metadata import version
14
- from typing import Annotated, List, Optional
17
+ from pathlib import Path
18
+ from typing import Annotated, List, Optional, Tuple
15
19
 
16
20
  import nanoid
17
21
  import structlog
18
22
  import typer
19
23
  from rich.console import Console
20
- from rich.panel import Panel
21
24
  from rich.text import Text
22
25
  from rich.theme import Theme
26
+ from tensorlake.functions_sdk.image import GetDefaultPythonImage, Image
23
27
 
28
+ from indexify.executor.api_objects import FunctionURI
24
29
  from indexify.executor.executor import Executor
25
30
  from indexify.executor.function_executor.server.subprocess_function_executor_server_factory import (
26
31
  SubprocessFunctionExecutorServerFactory,
27
32
  )
28
- from indexify.function_executor.function_executor_service import (
29
- FunctionExecutorService,
30
- )
31
- from indexify.function_executor.server import Server as FunctionExecutorServer
32
- from indexify.functions_sdk.image import Build, GetDefaultPythonImage, Image
33
33
 
34
34
  logger = structlog.get_logger(module=__name__)
35
35
 
@@ -71,8 +71,10 @@ def server_dev_mode():
71
71
  print("starting indexify server and executor in dev mode...")
72
72
  print("press Ctrl+C to stop the server and executor.")
73
73
  print(f"server binary path: {indexify_server_path}")
74
- commands = [indexify_server_path, "indexify-cli executor"]
75
-
74
+ commands: List[List[str]] = [
75
+ [indexify_server_path, "--dev"],
76
+ ["indexify-cli", "executor", "--dev"],
77
+ ]
76
78
  processes = []
77
79
  stop_event = threading.Event()
78
80
 
@@ -103,7 +105,7 @@ def server_dev_mode():
103
105
 
104
106
  for cmd in commands:
105
107
  process = subprocess.Popen(
106
- cmd.split(),
108
+ cmd,
107
109
  stdout=subprocess.PIPE,
108
110
  stderr=subprocess.STDOUT,
109
111
  bufsize=1,
@@ -155,32 +157,6 @@ def build_image(
155
157
  _create_image(obj, python_sdk_path)
156
158
 
157
159
 
158
- @app.command(help="Build platform images for function names")
159
- def build_platform_image(
160
- workflow_file_path: Annotated[str, typer.Argument()],
161
- image_names: Optional[List[str]] = None,
162
- build_service="https://api.tensorlake.ai/images/v1",
163
- ):
164
-
165
- globals_dict = {}
166
-
167
- # Add the folder in the workflow file path to the current Python path
168
- folder_path = os.path.dirname(workflow_file_path)
169
- if folder_path not in sys.path:
170
- sys.path.append(folder_path)
171
-
172
- try:
173
- exec(open(workflow_file_path).read(), globals_dict)
174
- except FileNotFoundError as e:
175
- raise Exception(
176
- f"Could not find workflow file to execute at: " f"`{workflow_file_path}`"
177
- )
178
- for _, obj in globals_dict.items():
179
- if type(obj) and isinstance(obj, Image):
180
- if image_names is None or obj._image_name in image_names:
181
- _create_platform_image(obj, build_service)
182
-
183
-
184
160
  @app.command(help="Build default image for indexify")
185
161
  def build_default_image(
186
162
  python_version: Optional[str] = typer.Option(
@@ -200,87 +176,104 @@ def build_default_image(
200
176
  )
201
177
 
202
178
 
203
- @app.command(help="Joins the extractors to the coordinator server")
179
+ @app.command(
180
+ help="Runs Executor that connects to the Indexify server and starts running its tasks"
181
+ )
204
182
  def executor(
205
183
  server_addr: str = "localhost:8900",
206
184
  dev: Annotated[
207
185
  bool, typer.Option("--dev", "-d", help="Run the executor in development mode")
208
186
  ] = False,
187
+ function_uris: Annotated[
188
+ Optional[List[str]],
189
+ typer.Option(
190
+ "--function",
191
+ "-f",
192
+ help="Function that the executor will run "
193
+ "specified as <namespace>:<workflow>:<function>:<version>",
194
+ ),
195
+ ] = None,
209
196
  config_path: Optional[str] = typer.Option(
210
197
  None, help="Path to the TLS configuration file"
211
198
  ),
212
199
  executor_cache: Optional[str] = typer.Option(
213
200
  "~/.indexify/executor_cache", help="Path to the executor cache directory"
214
201
  ),
215
- name_alias: Optional[str] = typer.Option(
216
- None, help="Image name override for the executor"
217
- ),
218
- image_hash: Optional[str] = typer.Option(
219
- None, help="Image hash override for the executor"
202
+ # Registred ports range ends at 49151.
203
+ ports: Tuple[int, int] = typer.Option(
204
+ (50000, 51000), help="Range of localhost TCP ports to be used by the executor"
220
205
  ),
221
206
  ):
222
- if not dev:
223
- configure_production_logging()
207
+ if dev:
208
+ configure_development_mode_logging()
209
+ else:
210
+ configure_production_mode_logging()
211
+ if function_uris is None:
212
+ raise typer.BadParameter(
213
+ "At least one function must be specified when not running in development mode"
214
+ )
224
215
 
225
216
  id = nanoid.generate()
226
- executor_version = version("indexify")
227
217
  logger.info(
228
- "executor started",
218
+ "starting executor",
229
219
  server_addr=server_addr,
230
220
  config_path=config_path,
231
221
  executor_id=id,
232
- executor_version=executor_version,
222
+ executor_version=version("indexify"),
233
223
  executor_cache=executor_cache,
234
- name_alias=name_alias,
235
- image_hash=image_hash,
224
+ ports=ports,
225
+ functions=function_uris,
236
226
  dev_mode=dev,
237
227
  )
238
228
 
239
- from pathlib import Path
240
-
241
229
  executor_cache = Path(executor_cache).expanduser().absolute()
242
230
  if os.path.exists(executor_cache):
243
231
  shutil.rmtree(executor_cache)
244
232
  Path(executor_cache).mkdir(parents=True, exist_ok=True)
245
233
 
246
- executor = Executor(
234
+ start_port: int = ports[0]
235
+ end_port: int = ports[1]
236
+ if start_port >= end_port:
237
+ console.print(
238
+ Text(
239
+ f"start port {start_port} should be less than {end_port}", style="red"
240
+ ),
241
+ )
242
+ exit(1)
243
+
244
+ Executor(
247
245
  id,
248
246
  server_addr=server_addr,
249
247
  config_path=config_path,
250
248
  code_path=executor_cache,
251
- name_alias=name_alias,
252
- image_hash=image_hash,
249
+ function_allowlist=_parse_function_uris(function_uris),
253
250
  function_executor_server_factory=SubprocessFunctionExecutorServerFactory(
254
- development_mode=dev
251
+ development_mode=dev,
252
+ server_ports=range(ports[0], ports[1]),
255
253
  ),
256
- )
257
- try:
258
- asyncio.get_event_loop().run_until_complete(executor.run())
259
- except asyncio.CancelledError:
260
- logger.info("graceful shutdown")
254
+ ).run()
261
255
 
262
256
 
263
- @app.command(help="Runs a Function Executor server")
264
- def function_executor(
265
- function_executor_server_address: str = typer.Option(
266
- help="Function Executor server address"
267
- ),
268
- dev: Annotated[
269
- bool, typer.Option("--dev", "-d", help="Run the executor in development mode")
270
- ] = False,
271
- ):
272
- if not dev:
273
- configure_production_logging()
257
+ def _parse_function_uris(uri_strs: Optional[List[str]]) -> Optional[List[FunctionURI]]:
258
+ if uri_strs is None:
259
+ return None
274
260
 
275
- logger.info(
276
- "starting function executor server",
277
- function_executor_server_address=function_executor_server_address,
278
- )
279
-
280
- FunctionExecutorServer(
281
- server_address=function_executor_server_address,
282
- service=FunctionExecutorService(),
283
- ).run()
261
+ uris: List[FunctionURI] = []
262
+ for uri_str in uri_strs:
263
+ tokens = uri_str.split(":")
264
+ if len(tokens) != 4:
265
+ raise typer.BadParameter(
266
+ "Function should be specified as <namespace>:<workflow>:<function>:<version>"
267
+ )
268
+ uris.append(
269
+ FunctionURI(
270
+ namespace=tokens[0],
271
+ compute_graph=tokens[1],
272
+ compute_fn=tokens[2],
273
+ version=tokens[3],
274
+ )
275
+ )
276
+ return uris
284
277
 
285
278
 
286
279
  def _create_image(image: Image, python_sdk_path):
@@ -0,0 +1,35 @@
1
+ ## Overview
2
+
3
+ Executor registers at Indexify Server and continuously pulls tasks assigned to it from the Indexify Server
4
+ and executes them. While registering it shares its capabilities like available hardware with the Indexify
5
+ Server and periodically updates the Server about its current state. Executor spins up Function Executors
6
+ to run customer functions. Executor should never link with Tensorlake Python-SDK. It should not know anything
7
+ about programming languages and runtime environments used by Tensorlake Functions. Function Executor is
8
+ responsible for this.
9
+
10
+ This subpackage doesn't provide an executable entry point that runs an Executor. This is intentional
11
+ as Executor has many configurable sub-components. indexify cli subpackage provides `executor`
12
+ command that runs Executor with functionality available in Open Source offering.
13
+
14
+ ## Deployment
15
+
16
+ ### Production setup
17
+
18
+ A single Executor runs in a Virtual Machine, container or a in bare metal host. An Indexify cluster
19
+ is scaled by adding more Executor hosts. Open Source users manage and scale the hosts themselves e.g.
20
+ using Kubernetes, any other orchestrator or even manually. E.g. the users provision secrets,
21
+ persistent volumes to each host using the orchestrator or manually. Each Executor runs a single function.
22
+ The function name and other qualifiers are defined in Executor arguments.
23
+
24
+ ### Development setup
25
+
26
+ To make Indexify development and testing easier an Executor in development mode can run any function.
27
+ Running multiple Executors on the same host is supported too. In this case each Executor requires a
28
+ unique port range passed to it in its arguments.
29
+
30
+ ## Threat model
31
+
32
+ A VM/container/bare metal host where an Executor is running is fully trusted. This works well for single
33
+ tenant deployments where customer functions' code is fully trusted. If this is not the case then Function
34
+ Executors that run customer functions need to get isolated from Executor using e.g. Virtual Machines.
35
+ This functionality is not included into the Open Source offering.
@@ -11,17 +11,23 @@ class Task(BaseModel):
11
11
  invocation_id: str
12
12
  input_key: str
13
13
  reducer_output_id: Optional[str] = None
14
- graph_version: int
14
+ graph_version: str
15
15
  image_uri: Optional[str] = None
16
16
  "image_uri defines the URI of the image of this task. Optional since some executors do not require it."
17
17
 
18
18
 
19
+ class FunctionURI(BaseModel):
20
+ namespace: str
21
+ compute_graph: str
22
+ compute_fn: str
23
+ version: str
24
+
25
+
19
26
  class ExecutorMetadata(BaseModel):
20
27
  id: str
21
28
  executor_version: str
22
29
  addr: str
23
- image_name: str
24
- image_hash: str
30
+ function_allowlist: Optional[List[FunctionURI]] = None
25
31
  labels: Dict[str, Any]
26
32
 
27
33
 
@@ -5,11 +5,9 @@ from typing import Any, Optional
5
5
  import httpx
6
6
  import structlog
7
7
 
8
- from indexify.function_executor.proto.function_executor_pb2 import (
9
- SerializedObject,
10
- )
8
+ from indexify.function_executor.proto.function_executor_pb2 import SerializedObject
9
+ from indexify.utils.http_client import get_httpx_client
11
10
 
12
- from ..common_util import get_httpx_client
13
11
  from .api_objects import Task
14
12
 
15
13
 
@@ -27,7 +25,8 @@ class Downloader:
27
25
  self.code_path,
28
26
  "graph_cache",
29
27
  task.namespace,
30
- f"{task.compute_graph}.{task.graph_version}",
28
+ task.compute_graph,
29
+ task.graph_version,
31
30
  )
32
31
  # Filesystem operations are synchronous.
33
32
  # Run in a separate thread to not block the main event loop.
@@ -70,6 +69,7 @@ class Downloader:
70
69
  # Atomically rename the fully written file at tmp path.
71
70
  # This allows us to not use any locking because file link/unlink
72
71
  # are atomic operations at filesystem level.
72
+ # This also allows to share the same cache between multiple Executors.
73
73
  os.replace(tmp_path, path)
74
74
 
75
75
  async def download_input(self, task: Task) -> SerializedObject:
@@ -1,15 +1,14 @@
1
1
  import asyncio
2
2
  import signal
3
3
  from pathlib import Path
4
- from typing import Any, Optional
4
+ from typing import Any, List, Optional
5
5
 
6
6
  import structlog
7
7
 
8
- from indexify.function_executor.proto.function_executor_pb2 import (
9
- SerializedObject,
10
- )
8
+ from indexify.function_executor.proto.function_executor_pb2 import SerializedObject
9
+ from indexify.utils.logging import suppress as suppress_logging
11
10
 
12
- from .api_objects import Task
11
+ from .api_objects import FunctionURI, Task
13
12
  from .downloader import Downloader
14
13
  from .function_executor.server.function_executor_server_factory import (
15
14
  FunctionExecutorServerFactory,
@@ -24,14 +23,13 @@ class Executor:
24
23
  self,
25
24
  executor_id: str,
26
25
  code_path: Path,
26
+ function_allowlist: Optional[List[FunctionURI]],
27
27
  function_executor_server_factory: FunctionExecutorServerFactory,
28
28
  server_addr: str = "localhost:8900",
29
29
  config_path: Optional[str] = None,
30
- name_alias: Optional[str] = None,
31
- image_hash: Optional[str] = None,
32
30
  ):
33
31
  self._logger = structlog.get_logger(module=__name__)
34
- self._should_run = True
32
+ self._is_shutdown: bool = False
35
33
  self._config_path = config_path
36
34
  protocol: str = "http"
37
35
  if config_path:
@@ -41,7 +39,7 @@ class Executor:
41
39
  self._server_addr = server_addr
42
40
  self._base_url = f"{protocol}://{self._server_addr}"
43
41
  self._code_path = code_path
44
- self._task_runnner = TaskRunner(
42
+ self._task_runner = TaskRunner(
45
43
  function_executor_server_factory=function_executor_server_factory,
46
44
  base_url=self._base_url,
47
45
  config_path=config_path,
@@ -53,8 +51,7 @@ class Executor:
53
51
  protocol=protocol,
54
52
  indexify_server_addr=self._server_addr,
55
53
  executor_id=executor_id,
56
- name_alias=name_alias,
57
- image_hash=image_hash,
54
+ function_allowlist=function_allowlist,
58
55
  config_path=config_path,
59
56
  )
60
57
  self._task_reporter = TaskReporter(
@@ -63,15 +60,25 @@ class Executor:
63
60
  config_path=self._config_path,
64
61
  )
65
62
 
66
- async def run(self):
67
- asyncio.get_event_loop().add_signal_handler(
68
- signal.SIGINT, self.shutdown, asyncio.get_event_loop()
69
- )
70
- asyncio.get_event_loop().add_signal_handler(
71
- signal.SIGTERM, self.shutdown, asyncio.get_event_loop()
72
- )
63
+ def run(self):
64
+ for signum in [
65
+ signal.SIGABRT,
66
+ signal.SIGINT,
67
+ signal.SIGTERM,
68
+ signal.SIGQUIT,
69
+ signal.SIGHUP,
70
+ ]:
71
+ asyncio.get_event_loop().add_signal_handler(
72
+ signum, self.shutdown, asyncio.get_event_loop()
73
+ )
74
+
75
+ try:
76
+ asyncio.get_event_loop().run_until_complete(self._run_async())
77
+ except asyncio.CancelledError:
78
+ pass # Suppress this expected exception and return without error (normally).
73
79
 
74
- while self._should_run:
80
+ async def _run_async(self):
81
+ while not self._is_shutdown:
75
82
  try:
76
83
  async for task in self._task_fetcher.run():
77
84
  asyncio.create_task(self._run_task(task))
@@ -95,7 +102,7 @@ class Executor:
95
102
  await self._downloader.download_init_value(task)
96
103
  )
97
104
  logger.info("task_execution_started")
98
- output: TaskOutput = await self._task_runnner.run(
105
+ output: TaskOutput = await self._task_runner.run(
99
106
  TaskInput(
100
107
  task=task,
101
108
  graph=graph,
@@ -130,8 +137,14 @@ class Executor:
130
137
 
131
138
  async def _shutdown(self, loop):
132
139
  self._logger.info("shutting_down")
133
- self._should_run = False
134
- await self._task_runnner.shutdown()
140
+ # There will be lots of task cancellation exceptions and "X is shutting down"
141
+ # exceptions logged during Executor shutdown. Suppress their logs as they are
142
+ # expected and are confusing for users.
143
+ suppress_logging()
144
+
145
+ self._is_shutdown = True
146
+ await self._task_runner.shutdown()
147
+ # We mainly need to cancel the task that runs _run_async() loop.
135
148
  for task in asyncio.all_tasks(loop):
136
149
  task.cancel()
137
150
 
@@ -3,7 +3,6 @@ from typing import Any, Optional
3
3
 
4
4
  import grpc
5
5
 
6
- from indexify.common_util import get_httpx_client
7
6
  from indexify.function_executor.proto.function_executor_pb2 import (
8
7
  InitializeRequest,
9
8
  InitializeResponse,
@@ -11,6 +10,7 @@ from indexify.function_executor.proto.function_executor_pb2 import (
11
10
  from indexify.function_executor.proto.function_executor_pb2_grpc import (
12
11
  FunctionExecutorStub,
13
12
  )
13
+ from indexify.utils.http_client import get_httpx_client
14
14
 
15
15
  from .invocation_state_client import InvocationStateClient
16
16
  from .server.function_executor_server import (
@@ -23,6 +23,10 @@ from .server.function_executor_server_factory import (
23
23
  )
24
24
 
25
25
 
26
+ class CustomerError(RuntimeError):
27
+ pass
28
+
29
+
26
30
  class FunctionExecutor:
27
31
  """Executor side class supporting a running FunctionExecutorServer.
28
32
 
@@ -50,7 +54,10 @@ class FunctionExecutor:
50
54
  base_url: str,
51
55
  config_path: Optional[str],
52
56
  ):
53
- """Creates and initializes a FunctionExecutorServer and all resources associated with it."""
57
+ """Creates and initializes a FunctionExecutorServer and all resources associated with it.
58
+
59
+ Raises CustomerError if the server failed to initialize due to an error in customer owned code or data.
60
+ Raises an Exception if an internal error occured."""
54
61
  try:
55
62
  self._server = await self._server_factory.create(
56
63
  config=config, logger=self._logger
@@ -129,5 +136,9 @@ async def _initialize_server(
129
136
  stub: FunctionExecutorStub, initialize_request: InitializeRequest
130
137
  ):
131
138
  initialize_response: InitializeResponse = await stub.initialize(initialize_request)
132
- if not initialize_response.success:
139
+ if initialize_response.success:
140
+ return
141
+ if initialize_response.HasField("customer_error"):
142
+ raise CustomerError(initialize_response.customer_error)
143
+ else:
133
144
  raise Exception("initialize RPC failed at function executor server")
@@ -15,9 +15,11 @@ class FunctionExecutorState:
15
15
  def __init__(self, function_id_with_version: str, function_id_without_version: str):
16
16
  self.function_id_with_version: str = function_id_with_version
17
17
  self.function_id_without_version: str = function_id_without_version
18
+ # All the fields below are protected by the lock.
19
+ self.lock: asyncio.Lock = asyncio.Lock()
20
+ self.is_shutdown: bool = False
18
21
  self.function_executor: Optional[FunctionExecutor] = None
19
22
  self.running_tasks: int = 0
20
- self.lock: asyncio.Lock = asyncio.Lock()
21
23
  self.running_tasks_change_notifier: asyncio.Condition = asyncio.Condition(
22
24
  lock=self.lock
23
25
  )
@@ -58,16 +60,17 @@ class FunctionExecutorState:
58
60
  await self.function_executor.destroy()
59
61
  self.function_executor = None
60
62
 
61
- async def destroy_function_executor_not_locked(self) -> None:
62
- """Destroys the Function Executor if it exists.
63
+ async def shutdown(self) -> None:
64
+ """Shuts down the state.
63
65
 
64
- The caller doesn't need to hold the lock but this call
65
- might make the state inconsistent."""
66
- if self.function_executor is not None:
67
- # Atomically hide the destroyed Function Executor from other asyncio tasks.
68
- ref = self.function_executor
69
- self.function_executor = None
70
- await ref.destroy()
66
+ Called only during Executor shutdown so it's okay to fail all running and pending
67
+ Function Executor tasks. The state is not valid anymore after this call.
68
+ The caller must hold the lock.
69
+ """
70
+ self.check_locked()
71
+ # Pending tasks will not create a new Function Executor and won't run.
72
+ self.is_shutdown = True
73
+ await self.destroy_function_executor()
71
74
 
72
75
  def check_locked(self) -> None:
73
76
  """Raises an exception if the lock is not held."""
@@ -4,7 +4,6 @@ from typing import Any, AsyncGenerator, Optional, Union
4
4
  import grpc
5
5
  import httpx
6
6
 
7
- from indexify.executor.downloader import serialized_object_from_http_response
8
7
  from indexify.function_executor.proto.function_executor_pb2 import (
9
8
  GetInvocationStateResponse,
10
9
  InvocationStateRequest,
@@ -17,6 +16,8 @@ from indexify.function_executor.proto.function_executor_pb2_grpc import (
17
16
  )
18
17
  from indexify.function_executor.proto.message_validator import MessageValidator
19
18
 
19
+ from ..downloader import serialized_object_from_http_response
20
+
20
21
 
21
22
  class InvocationStateClient:
22
23
  """InvocationStateClient is a client for the invocation state server of a Function Executor.