indexify 0.3.30__tar.gz → 0.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. {indexify-0.3.30 → indexify-0.4.2}/PKG-INFO +1 -3
  2. {indexify-0.3.30 → indexify-0.4.2}/pyproject.toml +3 -5
  3. indexify-0.4.2/src/indexify/cli/__init__.py +18 -0
  4. indexify-0.4.2/src/indexify/cli/build_image.py +51 -0
  5. indexify-0.4.2/src/indexify/cli/deploy.py +57 -0
  6. indexify-0.4.2/src/indexify/cli/executor.py +205 -0
  7. {indexify-0.3.30/src/indexify/executor/grpc → indexify-0.4.2/src/indexify/executor}/channel_manager.py +17 -11
  8. indexify-0.4.2/src/indexify/executor/executor.py +175 -0
  9. indexify-0.4.2/src/indexify/executor/function_allowlist.py +59 -0
  10. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/executor/function_executor/function_executor.py +12 -6
  11. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/executor/function_executor/invocation_state_client.py +25 -3
  12. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/executor/function_executor/server/function_executor_server_factory.py +3 -3
  13. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py +22 -11
  14. indexify-0.4.2/src/indexify/executor/function_executor_controller/__init__.py +13 -0
  15. indexify-0.4.2/src/indexify/executor/function_executor_controller/completed_task_metrics.py +82 -0
  16. indexify-0.4.2/src/indexify/executor/function_executor_controller/create_function_executor.py +154 -0
  17. indexify-0.4.2/src/indexify/executor/function_executor_controller/debug_event_loop.py +37 -0
  18. indexify-0.4.2/src/indexify/executor/function_executor_controller/destroy_function_executor.py +28 -0
  19. indexify-0.4.2/src/indexify/executor/function_executor_controller/downloads.py +199 -0
  20. indexify-0.4.2/src/indexify/executor/function_executor_controller/events.py +172 -0
  21. indexify-0.4.2/src/indexify/executor/function_executor_controller/function_executor_controller.py +759 -0
  22. indexify-0.4.2/src/indexify/executor/function_executor_controller/loggers.py +57 -0
  23. indexify-0.4.2/src/indexify/executor/function_executor_controller/message_validators.py +65 -0
  24. indexify-0.4.2/src/indexify/executor/function_executor_controller/metrics/completed_task_metrics.py +68 -0
  25. indexify-0.3.30/src/indexify/executor/metrics/downloader.py → indexify-0.4.2/src/indexify/executor/function_executor_controller/metrics/downloads.py +1 -3
  26. indexify-0.4.2/src/indexify/executor/function_executor_controller/metrics/function_executor_controller.py +60 -0
  27. indexify-0.3.30/src/indexify/executor/function_executor/metrics/single_task_runner.py → indexify-0.4.2/src/indexify/executor/function_executor_controller/metrics/run_task.py +9 -3
  28. indexify-0.4.2/src/indexify/executor/function_executor_controller/metrics/upload_task_output.py +39 -0
  29. indexify-0.4.2/src/indexify/executor/function_executor_controller/prepare_task.py +38 -0
  30. indexify-0.4.2/src/indexify/executor/function_executor_controller/run_task.py +201 -0
  31. indexify-0.4.2/src/indexify/executor/function_executor_controller/task_info.py +33 -0
  32. indexify-0.4.2/src/indexify/executor/function_executor_controller/task_output.py +122 -0
  33. indexify-0.4.2/src/indexify/executor/function_executor_controller/upload_task_output.py +234 -0
  34. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/executor/host_resources/host_resources.py +20 -25
  35. {indexify-0.3.30/src/indexify/executor/grpc → indexify-0.4.2/src/indexify/executor}/metrics/channel_manager.py +1 -1
  36. indexify-0.4.2/src/indexify/executor/metrics/executor.py +13 -0
  37. {indexify-0.3.30/src/indexify/executor/grpc → indexify-0.4.2/src/indexify/executor}/metrics/state_reconciler.py +1 -1
  38. {indexify-0.3.30/src/indexify/executor/grpc → indexify-0.4.2/src/indexify/executor}/metrics/state_reporter.py +1 -1
  39. indexify-0.4.2/src/indexify/executor/monitoring/health_checker/generic_health_checker.py +20 -0
  40. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/executor/monitoring/health_checker/health_checker.py +0 -11
  41. {indexify-0.3.30/src/indexify/executor/grpc → indexify-0.4.2/src/indexify/executor}/state_reconciler.py +139 -141
  42. indexify-0.4.2/src/indexify/executor/state_reporter.py +364 -0
  43. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/proto/executor_api.proto +67 -59
  44. indexify-0.4.2/src/indexify/proto/executor_api_pb2.py +86 -0
  45. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/proto/executor_api_pb2.pyi +125 -104
  46. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/proto/executor_api_pb2_grpc.py +0 -47
  47. indexify-0.3.30/src/indexify/cli/cli.py +0 -267
  48. indexify-0.3.30/src/indexify/executor/api_objects.py +0 -92
  49. indexify-0.3.30/src/indexify/executor/downloader.py +0 -417
  50. indexify-0.3.30/src/indexify/executor/executor.py +0 -429
  51. indexify-0.3.30/src/indexify/executor/executor_flavor.py +0 -7
  52. indexify-0.3.30/src/indexify/executor/function_executor/function_executor_state.py +0 -107
  53. indexify-0.3.30/src/indexify/executor/function_executor/function_executor_states_container.py +0 -93
  54. indexify-0.3.30/src/indexify/executor/function_executor/function_executor_status.py +0 -95
  55. indexify-0.3.30/src/indexify/executor/function_executor/metrics/function_executor_state.py +0 -46
  56. indexify-0.3.30/src/indexify/executor/function_executor/metrics/function_executor_state_container.py +0 -10
  57. indexify-0.3.30/src/indexify/executor/function_executor/single_task_runner.py +0 -345
  58. indexify-0.3.30/src/indexify/executor/function_executor/task_input.py +0 -21
  59. indexify-0.3.30/src/indexify/executor/function_executor/task_output.py +0 -105
  60. indexify-0.3.30/src/indexify/executor/grpc/function_executor_controller.py +0 -418
  61. indexify-0.3.30/src/indexify/executor/grpc/metrics/task_controller.py +0 -8
  62. indexify-0.3.30/src/indexify/executor/grpc/state_reporter.py +0 -314
  63. indexify-0.3.30/src/indexify/executor/grpc/task_controller.py +0 -508
  64. indexify-0.3.30/src/indexify/executor/metrics/executor.py +0 -60
  65. indexify-0.3.30/src/indexify/executor/metrics/task_fetcher.py +0 -21
  66. indexify-0.3.30/src/indexify/executor/metrics/task_reporter.py +0 -53
  67. indexify-0.3.30/src/indexify/executor/metrics/task_runner.py +0 -52
  68. indexify-0.3.30/src/indexify/executor/monitoring/function_allowlist.py +0 -25
  69. indexify-0.3.30/src/indexify/executor/monitoring/health_checker/generic_health_checker.py +0 -73
  70. indexify-0.3.30/src/indexify/executor/runtime_probes.py +0 -68
  71. indexify-0.3.30/src/indexify/executor/task_fetcher.py +0 -96
  72. indexify-0.3.30/src/indexify/executor/task_reporter.py +0 -459
  73. indexify-0.3.30/src/indexify/executor/task_runner.py +0 -177
  74. indexify-0.3.30/src/indexify/proto/executor_api_pb2.py +0 -86
  75. {indexify-0.3.30 → indexify-0.4.2}/README.md +0 -0
  76. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/executor/README.md +0 -0
  77. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/executor/blob_store/blob_store.py +0 -0
  78. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/executor/blob_store/local_fs_blob_store.py +0 -0
  79. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/executor/blob_store/metrics/blob_store.py +0 -0
  80. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/executor/blob_store/s3_blob_store.py +0 -0
  81. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/executor/function_executor/health_checker.py +0 -0
  82. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/executor/function_executor/metrics/function_executor.py +0 -0
  83. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/executor/function_executor/metrics/health_checker.py +0 -0
  84. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/executor/function_executor/metrics/invocation_state_client.py +0 -0
  85. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/executor/function_executor/server/client_configuration.py +0 -0
  86. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/executor/function_executor/server/function_executor_server.py +0 -0
  87. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/executor/function_executor/server/subprocess_function_executor_server.py +0 -0
  88. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/executor/host_resources/nvidia_gpu.py +0 -0
  89. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/executor/host_resources/nvidia_gpu_allocator.py +0 -0
  90. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/executor/monitoring/handler.py +0 -0
  91. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/executor/monitoring/health_check_handler.py +0 -0
  92. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/executor/monitoring/metrics.py +0 -0
  93. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/executor/monitoring/prometheus_metrics_handler.py +0 -0
  94. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/executor/monitoring/server.py +0 -0
  95. {indexify-0.3.30 → indexify-0.4.2}/src/indexify/executor/monitoring/startup_probe_handler.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: indexify
3
- Version: 0.3.30
3
+ Version: 0.4.2
4
4
  Summary: Open Source Indexify components and helper tools
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
@@ -18,9 +18,7 @@ Requires-Dist: aiohttp (>=3.11.0,<4.0.0)
18
18
  Requires-Dist: boto3 (>=1.37.30,<2.0.0)
19
19
  Requires-Dist: prometheus-client (>=0.21.1,<0.22.0)
20
20
  Requires-Dist: psutil (>=7.0.0,<8.0.0)
21
- Requires-Dist: rich (>=13.9.2,<14.0.0)
22
21
  Requires-Dist: tensorlake (>=0.1)
23
- Requires-Dist: typer (>=0.12,<0.13)
24
22
  Project-URL: Repository, https://github.com/tensorlakeai/indexify
25
23
  Description-Content-Type: text/markdown
26
24
 
@@ -1,7 +1,7 @@
1
1
  [tool.poetry]
2
2
  name = "indexify"
3
3
  # Incremented if any of the components provided in this packages are updated.
4
- version = "0.3.30"
4
+ version = "0.4.2"
5
5
  description = "Open Source Indexify components and helper tools"
6
6
  authors = ["Tensorlake Inc. <support@tensorlake.ai>"]
7
7
  license = "Apache 2.0"
@@ -10,7 +10,7 @@ homepage = "https://github.com/tensorlakeai/indexify"
10
10
  repository = "https://github.com/tensorlakeai/indexify"
11
11
 
12
12
  [tool.poetry.scripts]
13
- indexify-cli = "indexify.cli.cli:app"
13
+ indexify-cli = "indexify.cli:cli"
14
14
 
15
15
  [tool.poetry.dependencies]
16
16
  # Common dependencies
@@ -28,14 +28,12 @@ tensorlake = ">=0.1"
28
28
  # Uncomment the next line to use local tensorlake package (only for development!)
29
29
  # tensorlake = { path = "../tensorlake", develop = true }
30
30
  # pydantic is provided by tensorlake
31
- # httpx-sse is provided by tensorlake
32
31
  # grpcio is provided by tensorlake
33
32
  # grpcio-tools is provided by tensorlake
34
33
 
35
34
  # CLI only
36
- rich = "^13.9.2"
37
- typer = "^0.12"
38
35
  # nanoid is provided by tensorlake
36
+ # click is provided by tensorlake
39
37
  boto3 = "^1.37.30"
40
38
 
41
39
  [tool.poetry.group.dev.dependencies]
@@ -0,0 +1,18 @@
1
+ import click
2
+
3
+ from . import build_image, deploy, executor
4
+
5
+
6
+ @click.group()
7
+ @click.version_option(package_name="indexify", prog_name="indexify-cli")
8
+ @click.pass_context
9
+ def cli(ctx: click.Context):
10
+ """
11
+ Indexify CLI to manage and deploy workflows to Indexify Server and run Indexify Executors.
12
+ """
13
+ pass
14
+
15
+
16
+ cli.add_command(build_image.build_image)
17
+ cli.add_command(deploy.deploy)
18
+ cli.add_command(executor.executor)
@@ -0,0 +1,51 @@
1
+ import click
2
+ from tensorlake.functions_sdk.image import Image
3
+ from tensorlake.functions_sdk.workflow_module import (
4
+ WorkflowModuleInfo,
5
+ load_workflow_module_info,
6
+ )
7
+
8
+
9
+ @click.command(
10
+ short_help="Build images for graphs/workflows defined in the workflow file"
11
+ )
12
+ # Path to the file where the graphs/workflows are defined as global variables
13
+ @click.argument(
14
+ "workflow-file-path",
15
+ type=click.Path(exists=True, file_okay=True, dir_okay=False),
16
+ )
17
+ @click.option(
18
+ "-i",
19
+ "--image-names",
20
+ multiple=True,
21
+ help="Names of images to build. Can be specified multiple times. If not provided, all images will be built.",
22
+ )
23
+ def build_image(
24
+ workflow_file_path: str,
25
+ image_names: tuple[str, ...] = None,
26
+ ):
27
+ try:
28
+ workflow_module_info: WorkflowModuleInfo = load_workflow_module_info(
29
+ workflow_file_path
30
+ )
31
+ except Exception as e:
32
+ click.secho(
33
+ f"Failed loading workflow file, please check the error message: {e}",
34
+ fg="red",
35
+ )
36
+ raise click.Abort
37
+
38
+ for image in workflow_module_info.images.keys():
39
+ image: Image
40
+ if image_names is not None and image.image_name not in image_names:
41
+ click.echo(
42
+ f"Skipping image `{image.image_name}` as it is not in the provided image names."
43
+ )
44
+ continue
45
+
46
+ click.echo(f"Building image `{image.image_name}`")
47
+ built_image, generator = image.build()
48
+ for output in generator:
49
+ click.secho(output)
50
+
51
+ click.secho(f"built image: {built_image.tags[0]}", fg="green")
@@ -0,0 +1,57 @@
1
+ import click
2
+ from tensorlake import Graph
3
+ from tensorlake.functions_sdk.graph_serialization import graph_code_dir_path
4
+ from tensorlake.functions_sdk.workflow_module import (
5
+ WorkflowModuleInfo,
6
+ load_workflow_module_info,
7
+ )
8
+ from tensorlake.remote_graph import RemoteGraph
9
+
10
+
11
+ @click.command(
12
+ short_help="Deploy all graphs/workflows defined in the workflow file to Indexify"
13
+ )
14
+ # Path to the file where the graphs/workflows are defined as global variables
15
+ @click.argument(
16
+ "workflow-file-path",
17
+ type=click.Path(exists=True, file_okay=True, dir_okay=False),
18
+ )
19
+ @click.option(
20
+ "-u",
21
+ "--upgrade-queued-requests",
22
+ is_flag=True,
23
+ default=False,
24
+ help="Upgrade invocations that are already queued or running to use the deployed version of the graphs/workflows",
25
+ )
26
+ def deploy(
27
+ workflow_file_path: str,
28
+ upgrade_queued_invocations: bool,
29
+ ):
30
+ click.echo(f"Preparing deployment for {workflow_file_path}")
31
+ try:
32
+ workflow_module_info: WorkflowModuleInfo = load_workflow_module_info(
33
+ workflow_file_path
34
+ )
35
+ except Exception as e:
36
+ click.secho(
37
+ f"Failed loading workflow file, please check the error message: {e}",
38
+ fg="red",
39
+ )
40
+ raise click.Abort
41
+
42
+ for graph in workflow_module_info.graphs:
43
+ graph: Graph
44
+ try:
45
+ RemoteGraph.deploy(
46
+ graph,
47
+ code_dir_path=graph_code_dir_path(workflow_file_path),
48
+ upgrade_tasks_to_latest_version=upgrade_queued_invocations,
49
+ )
50
+ except Exception as e:
51
+ click.secho(
52
+ f"Graph {graph.name} could not be deployed, please check the error message: {e}",
53
+ fg="red",
54
+ )
55
+ raise click.Abort
56
+
57
+ click.secho(f"Deployed {graph.name}", fg="green")
@@ -0,0 +1,205 @@
1
+ from tensorlake.utils.logging import (
2
+ configure_development_mode_logging,
3
+ configure_logging_early,
4
+ configure_production_mode_logging,
5
+ )
6
+
7
+ configure_logging_early()
8
+
9
+ import shutil
10
+ from importlib.metadata import version
11
+ from pathlib import Path
12
+ from socket import gethostname
13
+ from typing import Dict, List, Optional
14
+
15
+ import click
16
+ import nanoid
17
+ import prometheus_client
18
+ import structlog
19
+
20
+ from indexify.executor.blob_store.blob_store import BLOBStore
21
+ from indexify.executor.blob_store.local_fs_blob_store import LocalFSBLOBStore
22
+ from indexify.executor.blob_store.s3_blob_store import S3BLOBStore
23
+ from indexify.executor.executor import Executor
24
+ from indexify.executor.function_executor.server.subprocess_function_executor_server_factory import (
25
+ SubprocessFunctionExecutorServerFactory,
26
+ )
27
+ from indexify.executor.host_resources.host_resources import HostResourcesProvider
28
+ from indexify.executor.host_resources.nvidia_gpu_allocator import NvidiaGPUAllocator
29
+ from indexify.executor.monitoring.health_checker.generic_health_checker import (
30
+ GenericHealthChecker,
31
+ )
32
+
33
+
34
+ @click.command(
35
+ context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
36
+ short_help="Runs Executor that connects to the Indexify server and starts running its tasks",
37
+ )
38
+ @click.option(
39
+ "--server-addr",
40
+ "server_address",
41
+ default="localhost:8900",
42
+ help="Address of Indexify HTTP Server to connect to",
43
+ )
44
+ @click.option(
45
+ "--grpc-server-addr",
46
+ "grpc_server_address",
47
+ default="localhost:8901",
48
+ help="Address of Indexify gRPC Server to connect to",
49
+ )
50
+ @click.option(
51
+ "-v",
52
+ "--verbose",
53
+ is_flag=True,
54
+ default=False,
55
+ help="Verbose logging",
56
+ )
57
+ @click.option(
58
+ "-vv",
59
+ "--very-verbose",
60
+ is_flag=True,
61
+ default=False,
62
+ help="Very verbose logging",
63
+ )
64
+ @click.option(
65
+ "-f",
66
+ "--function",
67
+ "function_uris",
68
+ default=[],
69
+ multiple=True,
70
+ help="Functions that the executor will run "
71
+ "specified as <namespace>:<workflow>:<function>:<version>"
72
+ "version is optional, not specifying it will make the server send any version"
73
+ "of the function. Any number of --function arguments can be passed.",
74
+ )
75
+ @click.option(
76
+ "--config-path",
77
+ type=click.Path(exists=True, dir_okay=False, file_okay=True, readable=True),
78
+ default=None,
79
+ help="Path to the TLS configuration file",
80
+ )
81
+ @click.option(
82
+ "--executor-cache-path",
83
+ type=click.Path(exists=False, dir_okay=True, readable=True, writable=True),
84
+ default="~/.indexify/executor_cache",
85
+ help="Path to the executor cache directory",
86
+ )
87
+ @click.option(
88
+ "--monitoring-server-host",
89
+ default="localhost",
90
+ help="IP address or hostname where to run Executor Monitoring server",
91
+ )
92
+ @click.option(
93
+ "--monitoring-server-port",
94
+ default=7000,
95
+ type=int,
96
+ help="Port where to run Executor Monitoring server",
97
+ )
98
+ @click.option(
99
+ "-l",
100
+ "--label",
101
+ "labels",
102
+ default=[],
103
+ multiple=True,
104
+ help="Executor key-value label to be sent to the Server. "
105
+ "Specified as <key>=<value>",
106
+ )
107
+ @click.pass_context
108
+ def executor(
109
+ ctx: click.Context,
110
+ server_address: str,
111
+ grpc_server_address: str,
112
+ verbose: bool,
113
+ very_verbose: bool,
114
+ function_uris: List[str],
115
+ config_path: Optional[str],
116
+ executor_cache_path: str,
117
+ monitoring_server_host: str,
118
+ monitoring_server_port: int,
119
+ labels: List[str],
120
+ ):
121
+ if verbose or very_verbose:
122
+ configure_development_mode_logging(compact_tracebacks=not very_verbose)
123
+ else:
124
+ configure_production_mode_logging()
125
+
126
+ kv_labels: Dict[str, str] = {}
127
+ for label in labels:
128
+ key, value = label.split("=")
129
+ kv_labels[key] = value
130
+
131
+ executor_id: str = nanoid.generate()
132
+ executor_version = version("indexify")
133
+ logger = structlog.get_logger(module=__name__, executor_id=executor_id)
134
+
135
+ logger.info(
136
+ "starting executor",
137
+ hostname=gethostname(),
138
+ server_address=server_address,
139
+ grpc_server_address=grpc_server_address,
140
+ config_path=config_path,
141
+ executor_version=executor_version,
142
+ labels=kv_labels,
143
+ executor_cache_path=executor_cache_path,
144
+ functions=function_uris,
145
+ verbose=verbose,
146
+ very_verbose=very_verbose,
147
+ monitoring_server_host=monitoring_server_host,
148
+ monitoring_server_port=monitoring_server_port,
149
+ )
150
+ if ctx.args:
151
+ logger.warning(
152
+ "Unknown arguments passed to the executor",
153
+ unknown_args=ctx.args,
154
+ )
155
+ if len(function_uris) == 0:
156
+ logger.warning(
157
+ "No --function arguments were passed. Executor will run all functions. This scenario is only supported for testing purposes.",
158
+ )
159
+
160
+ executor_cache_path: Path = Path(executor_cache_path).expanduser().absolute()
161
+ if executor_cache_path.exists():
162
+ shutil.rmtree(str(executor_cache_path))
163
+ executor_cache_path.mkdir(parents=True, exist_ok=True)
164
+
165
+ blob_store: BLOBStore = BLOBStore(
166
+ # Local FS mode is used in tests and in cases when user wants to store data on NFS.
167
+ local=LocalFSBLOBStore(),
168
+ # S3 is initiliazed lazily so it's okay to create it even if the user is not going to use it.
169
+ s3=S3BLOBStore(),
170
+ )
171
+
172
+ host_resources_provider: HostResourcesProvider = HostResourcesProvider(
173
+ gpu_allocator=NvidiaGPUAllocator(logger),
174
+ # Assuming a simple setup in OSS where Executor container has a single file system
175
+ # used by all Function Executors and all the container resources are available to all Function Executors.
176
+ function_executors_ephimeral_disks_path="/",
177
+ host_overhead_cpus=0,
178
+ host_overhead_memory_gb=0,
179
+ host_overhead_function_executors_ephimeral_disks_gb=0,
180
+ )
181
+
182
+ prometheus_client.Info("cli", "CLI information").info(
183
+ {
184
+ "package": "indexify",
185
+ }
186
+ )
187
+
188
+ Executor(
189
+ id=executor_id,
190
+ version=executor_version,
191
+ labels=kv_labels,
192
+ health_checker=GenericHealthChecker(),
193
+ cache_path=executor_cache_path,
194
+ function_uris=function_uris,
195
+ function_executor_server_factory=SubprocessFunctionExecutorServerFactory(
196
+ verbose_logs=verbose or very_verbose
197
+ ),
198
+ server_addr=server_address,
199
+ grpc_server_addr=grpc_server_address,
200
+ config_path=config_path,
201
+ monitoring_server_host=monitoring_server_host,
202
+ monitoring_server_port=monitoring_server_port,
203
+ blob_store=blob_store,
204
+ host_resources_provider=host_resources_provider,
205
+ ).run()
@@ -84,15 +84,28 @@ class ChannelManager:
84
84
  # Use the lock to ensure that we only create one channel without race conditions.
85
85
  async with self._lock:
86
86
  if self._channel is None:
87
- self._channel = await self._create_channel()
87
+ self._channel = await self._create_ready_channel()
88
88
  elif not await self._locked_channel_is_healthy():
89
89
  self._logger.info("grpc channel to server is unhealthy")
90
90
  await self._destroy_locked_channel()
91
- self._channel = await self._create_channel()
91
+ self._channel = await self._create_ready_channel()
92
92
 
93
93
  return self._channel
94
94
 
95
- async def _create_channel(self) -> grpc.aio.Channel:
95
+ def create_channel(self) -> grpc.aio.Channel:
96
+ """Creates a new channel to the gRPC server.
97
+
98
+ The channel is not be ready to use. Raises an exception on failure.
99
+ """
100
+ if self._channel_credentials is None:
101
+ return grpc.aio.insecure_channel(target=self._server_address)
102
+ else:
103
+ return grpc.aio.secure_channel(
104
+ target=self._server_address,
105
+ credentials=self._channel_credentials,
106
+ )
107
+
108
+ async def _create_ready_channel(self) -> grpc.aio.Channel:
96
109
  """Creates a new channel to the gRPC server."
97
110
 
98
111
  Returns a ready to use channel. Blocks until the channel
@@ -104,14 +117,7 @@ class ChannelManager:
104
117
  metric_grpc_server_channel_creations.inc()
105
118
  while True:
106
119
  try:
107
- if self._channel_credentials is None:
108
- channel = grpc.aio.insecure_channel(target=self._server_address)
109
- else:
110
- channel = grpc.aio.secure_channel(
111
- target=self._server_address,
112
- credentials=self._channel_credentials,
113
- )
114
-
120
+ channel = self.create_channel()
115
121
  await asyncio.wait_for(
116
122
  channel.channel_ready(),
117
123
  timeout=_CONNECT_TIMEOUT_SEC,
@@ -0,0 +1,175 @@
1
+ import asyncio
2
+ import signal
3
+ from pathlib import Path
4
+ from socket import gethostname
5
+ from typing import Dict, List, Optional
6
+
7
+ import structlog
8
+
9
+ from indexify.proto.executor_api_pb2 import ExecutorStatus
10
+
11
+ from .blob_store.blob_store import BLOBStore
12
+ from .channel_manager import ChannelManager
13
+ from .function_allowlist import (
14
+ FunctionURI,
15
+ function_allowlist_to_indexed_dict,
16
+ parse_function_uris,
17
+ )
18
+ from .function_executor.server.function_executor_server_factory import (
19
+ FunctionExecutorServerFactory,
20
+ )
21
+ from .host_resources.host_resources import HostResourcesProvider
22
+ from .metrics.executor import (
23
+ metric_executor_info,
24
+ metric_executor_state,
25
+ )
26
+ from .monitoring.health_check_handler import HealthCheckHandler
27
+ from .monitoring.health_checker.health_checker import HealthChecker
28
+ from .monitoring.prometheus_metrics_handler import PrometheusMetricsHandler
29
+ from .monitoring.server import MonitoringServer
30
+ from .monitoring.startup_probe_handler import StartupProbeHandler
31
+ from .state_reconciler import ExecutorStateReconciler
32
+ from .state_reporter import ExecutorStateReporter
33
+
34
+ metric_executor_state.state("starting")
35
+
36
+
37
+ class Executor:
38
+ def __init__(
39
+ self,
40
+ id: str,
41
+ version: str,
42
+ labels: Dict[str, str],
43
+ cache_path: Path,
44
+ health_checker: HealthChecker,
45
+ function_uris: List[str],
46
+ function_executor_server_factory: FunctionExecutorServerFactory,
47
+ server_addr: str,
48
+ grpc_server_addr: str,
49
+ config_path: Optional[str],
50
+ monitoring_server_host: str,
51
+ monitoring_server_port: int,
52
+ blob_store: BLOBStore,
53
+ host_resources_provider: HostResourcesProvider,
54
+ ):
55
+ self._logger = structlog.get_logger(module=__name__)
56
+ protocol: str = "http"
57
+ if config_path:
58
+ self._logger.info("running the extractor with TLS enabled")
59
+ protocol = "https"
60
+
61
+ self._startup_probe_handler = StartupProbeHandler()
62
+ self._monitoring_server = MonitoringServer(
63
+ host=monitoring_server_host,
64
+ port=monitoring_server_port,
65
+ startup_probe_handler=self._startup_probe_handler,
66
+ health_probe_handler=HealthCheckHandler(health_checker),
67
+ metrics_handler=PrometheusMetricsHandler(),
68
+ )
69
+ self._channel_manager = ChannelManager(
70
+ server_address=grpc_server_addr,
71
+ config_path=config_path,
72
+ logger=self._logger,
73
+ )
74
+ function_allowlist: List[FunctionURI] = parse_function_uris(function_uris)
75
+ self._state_reporter = ExecutorStateReporter(
76
+ executor_id=id,
77
+ version=version,
78
+ labels=labels,
79
+ function_allowlist=function_allowlist,
80
+ channel_manager=self._channel_manager,
81
+ host_resources_provider=host_resources_provider,
82
+ logger=self._logger,
83
+ )
84
+ self._state_reporter.update_executor_status(
85
+ ExecutorStatus.EXECUTOR_STATUS_STARTING_UP
86
+ )
87
+ self._state_reconciler = ExecutorStateReconciler(
88
+ executor_id=id,
89
+ function_executor_server_factory=function_executor_server_factory,
90
+ base_url=f"{protocol}://{server_addr}",
91
+ config_path=config_path,
92
+ cache_path=cache_path,
93
+ blob_store=blob_store,
94
+ channel_manager=self._channel_manager,
95
+ state_reporter=self._state_reporter,
96
+ logger=self._logger,
97
+ )
98
+ self._run_aio_task: Optional[asyncio.Task] = None
99
+ self._shutdown_aio_task: Optional[asyncio.Task] = None
100
+
101
+ executor_info: Dict[str, str] = {
102
+ "id": id,
103
+ "version": version,
104
+ "cache_path": str(cache_path),
105
+ "server_addr": server_addr,
106
+ "grpc_server_addr": str(grpc_server_addr),
107
+ "config_path": str(config_path),
108
+ "hostname": gethostname(),
109
+ }
110
+ for key, value in labels.items():
111
+ executor_info["label_" + key] = value
112
+ executor_info.update(function_allowlist_to_indexed_dict(function_allowlist))
113
+ metric_executor_info.info(executor_info)
114
+
115
+ def run(self):
116
+ asyncio.new_event_loop()
117
+
118
+ self._run_aio_task = asyncio.get_event_loop().create_task(
119
+ self._run(),
120
+ name="executor startup and run loop",
121
+ )
122
+
123
+ try:
124
+ asyncio.get_event_loop().run_until_complete(self._run_aio_task)
125
+ except asyncio.CancelledError:
126
+ pass # Expected exception on shutdown
127
+
128
+ async def _run(self):
129
+ for signum in [
130
+ signal.SIGABRT,
131
+ signal.SIGINT,
132
+ signal.SIGTERM,
133
+ signal.SIGQUIT,
134
+ signal.SIGHUP,
135
+ ]:
136
+ asyncio.get_event_loop().add_signal_handler(
137
+ signum, self._shutdown_signal_handler, asyncio.get_event_loop()
138
+ )
139
+
140
+ asyncio.create_task(
141
+ self._monitoring_server.run(), name="monitoring server runner"
142
+ )
143
+ self._state_reporter.update_executor_status(
144
+ ExecutorStatus.EXECUTOR_STATUS_RUNNING
145
+ )
146
+ self._state_reporter.run()
147
+ self._state_reconciler.run()
148
+ metric_executor_state.state("running")
149
+ self._startup_probe_handler.set_ready()
150
+
151
+ # Run the Executor forever until it is shut down.
152
+ while True:
153
+ await asyncio.sleep(10)
154
+
155
+ def _shutdown_signal_handler(self, loop):
156
+ if self._shutdown_aio_task is None:
157
+ self._shutdown_aio_task = loop.create_task(
158
+ self._shutdown(), name="executor shutdown"
159
+ )
160
+
161
+ async def _shutdown(self):
162
+ self._logger.info("shutting down Executor")
163
+ metric_executor_state.state("shutting_down")
164
+
165
+ # Shutdown state reconciler first because it changes reported state on shutdown.
166
+ await self._state_reconciler.shutdown()
167
+
168
+ # Do one last state report with STOPPED status. This reduces latency in the system.
169
+ self._state_reporter.update_executor_status(
170
+ ExecutorStatus.EXECUTOR_STATUS_STOPPED
171
+ )
172
+ await self._state_reporter.shutdown()
173
+ await self._channel_manager.destroy()
174
+ await self._monitoring_server.shutdown()
175
+ self._run_aio_task.cancel()
@@ -0,0 +1,59 @@
1
+ from dataclasses import dataclass
2
+ from typing import Dict, List, Optional
3
+
4
+
5
+ @dataclass
6
+ class FunctionURI:
7
+ namespace: str
8
+ compute_graph: str
9
+ compute_fn: str
10
+ version: Optional[str] = None
11
+
12
+
13
+ def function_allowlist_to_indexed_dict(
14
+ function_allowlist: List[FunctionURI],
15
+ ) -> Dict[str, str]:
16
+ """Returns a dictionary with each function URI in the allowlist as a key-value pair.
17
+
18
+ The keys are prefixed indexes in function allowlist, and the values are the function URIs
19
+ """
20
+ indexed_dict = {}
21
+ counter = 0
22
+ for function_uri in function_allowlist:
23
+ function_uri: FunctionURI
24
+ indexed_dict[f"function_allowlist_{counter}"] = ":".join(
25
+ [
26
+ function_uri.namespace,
27
+ function_uri.compute_graph,
28
+ function_uri.compute_fn,
29
+ str(function_uri.version),
30
+ ]
31
+ )
32
+ counter += 1
33
+ return indexed_dict
34
+
35
+
36
+ def parse_function_uris(function_uri_strs: List[str]) -> List[FunctionURI]:
37
+ """Parses a list of function URIs from strings to FunctionURI objects."""
38
+ uris: List[FunctionURI] = []
39
+ for uri_str in function_uri_strs:
40
+ tokens = uri_str.split(":")
41
+ if len(tokens) < 3 or len(tokens) > 4:
42
+ raise ValueError(
43
+ "Function should be specified as <namespace>:<workflow>:<function>:<version> or"
44
+ "<namespace>:<workflow>:<function>"
45
+ )
46
+ version: Optional[str] = None
47
+ if len(tokens) == 4:
48
+ version = tokens[3]
49
+
50
+ uris.append(
51
+ FunctionURI(
52
+ namespace=tokens[0],
53
+ compute_graph=tokens[1],
54
+ compute_fn=tokens[2],
55
+ version=version,
56
+ )
57
+ )
58
+
59
+ return uris