indexify 0.3.31__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. indexify/cli/__init__.py +18 -0
  2. indexify/cli/build_image.py +51 -0
  3. indexify/cli/deploy.py +57 -0
  4. indexify/cli/executor.py +205 -0
  5. indexify/executor/{grpc/channel_manager.py → channel_manager.py} +17 -11
  6. indexify/executor/executor.py +57 -313
  7. indexify/executor/function_allowlist.py +59 -0
  8. indexify/executor/function_executor/function_executor.py +12 -6
  9. indexify/executor/function_executor/invocation_state_client.py +25 -3
  10. indexify/executor/function_executor/server/function_executor_server_factory.py +3 -3
  11. indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py +22 -11
  12. indexify/executor/function_executor_controller/__init__.py +13 -0
  13. indexify/executor/function_executor_controller/completed_task_metrics.py +82 -0
  14. indexify/executor/function_executor_controller/create_function_executor.py +154 -0
  15. indexify/executor/function_executor_controller/debug_event_loop.py +37 -0
  16. indexify/executor/function_executor_controller/destroy_function_executor.py +28 -0
  17. indexify/executor/function_executor_controller/downloads.py +199 -0
  18. indexify/executor/function_executor_controller/events.py +172 -0
  19. indexify/executor/function_executor_controller/function_executor_controller.py +759 -0
  20. indexify/executor/function_executor_controller/loggers.py +57 -0
  21. indexify/executor/function_executor_controller/message_validators.py +65 -0
  22. indexify/executor/function_executor_controller/metrics/completed_task_metrics.py +68 -0
  23. indexify/executor/{metrics/downloader.py → function_executor_controller/metrics/downloads.py} +1 -3
  24. indexify/executor/function_executor_controller/metrics/function_executor_controller.py +60 -0
  25. indexify/executor/{function_executor/metrics/single_task_runner.py → function_executor_controller/metrics/run_task.py} +9 -3
  26. indexify/executor/function_executor_controller/metrics/upload_task_output.py +39 -0
  27. indexify/executor/function_executor_controller/prepare_task.py +38 -0
  28. indexify/executor/function_executor_controller/run_task.py +201 -0
  29. indexify/executor/function_executor_controller/task_info.py +33 -0
  30. indexify/executor/function_executor_controller/task_output.py +122 -0
  31. indexify/executor/function_executor_controller/upload_task_output.py +234 -0
  32. indexify/executor/host_resources/host_resources.py +20 -25
  33. indexify/executor/{grpc/metrics → metrics}/channel_manager.py +1 -1
  34. indexify/executor/metrics/executor.py +0 -47
  35. indexify/executor/{grpc/metrics → metrics}/state_reconciler.py +1 -1
  36. indexify/executor/{grpc/metrics → metrics}/state_reporter.py +1 -1
  37. indexify/executor/monitoring/health_checker/generic_health_checker.py +6 -59
  38. indexify/executor/monitoring/health_checker/health_checker.py +0 -11
  39. indexify/executor/{grpc/state_reconciler.py → state_reconciler.py} +139 -141
  40. indexify/executor/state_reporter.py +364 -0
  41. indexify/proto/executor_api.proto +67 -59
  42. indexify/proto/executor_api_pb2.py +52 -52
  43. indexify/proto/executor_api_pb2.pyi +125 -104
  44. indexify/proto/executor_api_pb2_grpc.py +0 -47
  45. {indexify-0.3.31.dist-info → indexify-0.4.2.dist-info}/METADATA +1 -3
  46. indexify-0.4.2.dist-info/RECORD +68 -0
  47. indexify-0.4.2.dist-info/entry_points.txt +3 -0
  48. indexify/cli/cli.py +0 -268
  49. indexify/executor/api_objects.py +0 -92
  50. indexify/executor/downloader.py +0 -417
  51. indexify/executor/executor_flavor.py +0 -7
  52. indexify/executor/function_executor/function_executor_state.py +0 -107
  53. indexify/executor/function_executor/function_executor_states_container.py +0 -93
  54. indexify/executor/function_executor/function_executor_status.py +0 -95
  55. indexify/executor/function_executor/metrics/function_executor_state.py +0 -46
  56. indexify/executor/function_executor/metrics/function_executor_state_container.py +0 -10
  57. indexify/executor/function_executor/single_task_runner.py +0 -345
  58. indexify/executor/function_executor/task_input.py +0 -21
  59. indexify/executor/function_executor/task_output.py +0 -105
  60. indexify/executor/grpc/function_executor_controller.py +0 -418
  61. indexify/executor/grpc/metrics/task_controller.py +0 -8
  62. indexify/executor/grpc/state_reporter.py +0 -317
  63. indexify/executor/grpc/task_controller.py +0 -508
  64. indexify/executor/metrics/task_fetcher.py +0 -21
  65. indexify/executor/metrics/task_reporter.py +0 -53
  66. indexify/executor/metrics/task_runner.py +0 -52
  67. indexify/executor/monitoring/function_allowlist.py +0 -25
  68. indexify/executor/runtime_probes.py +0 -68
  69. indexify/executor/task_fetcher.py +0 -96
  70. indexify/executor/task_reporter.py +0 -459
  71. indexify/executor/task_runner.py +0 -177
  72. indexify-0.3.31.dist-info/RECORD +0 -68
  73. indexify-0.3.31.dist-info/entry_points.txt +0 -3
  74. {indexify-0.3.31.dist-info → indexify-0.4.2.dist-info}/WHEEL +0 -0
@@ -0,0 +1,18 @@
1
+ import click
2
+
3
+ from . import build_image, deploy, executor
4
+
5
+
6
+ @click.group()
7
+ @click.version_option(package_name="indexify", prog_name="indexify-cli")
8
+ @click.pass_context
9
+ def cli(ctx: click.Context):
10
+ """
11
+ Indexify CLI to manage and deploy workflows to Indexify Server and run Indexify Executors.
12
+ """
13
+ pass
14
+
15
+
16
+ cli.add_command(build_image.build_image)
17
+ cli.add_command(deploy.deploy)
18
+ cli.add_command(executor.executor)
@@ -0,0 +1,51 @@
1
+ import click
2
+ from tensorlake.functions_sdk.image import Image
3
+ from tensorlake.functions_sdk.workflow_module import (
4
+ WorkflowModuleInfo,
5
+ load_workflow_module_info,
6
+ )
7
+
8
+
9
+ @click.command(
10
+ short_help="Build images for graphs/workflows defined in the workflow file"
11
+ )
12
+ # Path to the file where the graphs/workflows are defined as global variables
13
+ @click.argument(
14
+ "workflow-file-path",
15
+ type=click.Path(exists=True, file_okay=True, dir_okay=False),
16
+ )
17
+ @click.option(
18
+ "-i",
19
+ "--image-names",
20
+ multiple=True,
21
+ help="Names of images to build. Can be specified multiple times. If not provided, all images will be built.",
22
+ )
23
+ def build_image(
24
+ workflow_file_path: str,
25
+ image_names: tuple[str, ...] = None,
26
+ ):
27
+ try:
28
+ workflow_module_info: WorkflowModuleInfo = load_workflow_module_info(
29
+ workflow_file_path
30
+ )
31
+ except Exception as e:
32
+ click.secho(
33
+ f"Failed loading workflow file, please check the error message: {e}",
34
+ fg="red",
35
+ )
36
+ raise click.Abort
37
+
38
+ for image in workflow_module_info.images.keys():
39
+ image: Image
40
+ if image_names is not None and image.image_name not in image_names:
41
+ click.echo(
42
+ f"Skipping image `{image.image_name}` as it is not in the provided image names."
43
+ )
44
+ continue
45
+
46
+ click.echo(f"Building image `{image.image_name}`")
47
+ built_image, generator = image.build()
48
+ for output in generator:
49
+ click.secho(output)
50
+
51
+ click.secho(f"built image: {built_image.tags[0]}", fg="green")
indexify/cli/deploy.py ADDED
@@ -0,0 +1,57 @@
1
+ import click
2
+ from tensorlake import Graph
3
+ from tensorlake.functions_sdk.graph_serialization import graph_code_dir_path
4
+ from tensorlake.functions_sdk.workflow_module import (
5
+ WorkflowModuleInfo,
6
+ load_workflow_module_info,
7
+ )
8
+ from tensorlake.remote_graph import RemoteGraph
9
+
10
+
11
+ @click.command(
12
+ short_help="Deploy all graphs/workflows defined in the workflow file to Indexify"
13
+ )
14
+ # Path to the file where the graphs/workflows are defined as global variables
15
+ @click.argument(
16
+ "workflow-file-path",
17
+ type=click.Path(exists=True, file_okay=True, dir_okay=False),
18
+ )
19
+ @click.option(
20
+ "-u",
21
+ "--upgrade-queued-requests",
22
+ is_flag=True,
23
+ default=False,
24
+ help="Upgrade invocations that are already queued or running to use the deployed version of the graphs/workflows",
25
+ )
26
+ def deploy(
27
+ workflow_file_path: str,
28
+ upgrade_queued_invocations: bool,
29
+ ):
30
+ click.echo(f"Preparing deployment for {workflow_file_path}")
31
+ try:
32
+ workflow_module_info: WorkflowModuleInfo = load_workflow_module_info(
33
+ workflow_file_path
34
+ )
35
+ except Exception as e:
36
+ click.secho(
37
+ f"Failed loading workflow file, please check the error message: {e}",
38
+ fg="red",
39
+ )
40
+ raise click.Abort
41
+
42
+ for graph in workflow_module_info.graphs:
43
+ graph: Graph
44
+ try:
45
+ RemoteGraph.deploy(
46
+ graph,
47
+ code_dir_path=graph_code_dir_path(workflow_file_path),
48
+ upgrade_tasks_to_latest_version=upgrade_queued_invocations,
49
+ )
50
+ except Exception as e:
51
+ click.secho(
52
+ f"Graph {graph.name} could not be deployed, please check the error message: {e}",
53
+ fg="red",
54
+ )
55
+ raise click.Abort
56
+
57
+ click.secho(f"Deployed {graph.name}", fg="green")
@@ -0,0 +1,205 @@
1
+ from tensorlake.utils.logging import (
2
+ configure_development_mode_logging,
3
+ configure_logging_early,
4
+ configure_production_mode_logging,
5
+ )
6
+
7
+ configure_logging_early()
8
+
9
+ import shutil
10
+ from importlib.metadata import version
11
+ from pathlib import Path
12
+ from socket import gethostname
13
+ from typing import Dict, List, Optional
14
+
15
+ import click
16
+ import nanoid
17
+ import prometheus_client
18
+ import structlog
19
+
20
+ from indexify.executor.blob_store.blob_store import BLOBStore
21
+ from indexify.executor.blob_store.local_fs_blob_store import LocalFSBLOBStore
22
+ from indexify.executor.blob_store.s3_blob_store import S3BLOBStore
23
+ from indexify.executor.executor import Executor
24
+ from indexify.executor.function_executor.server.subprocess_function_executor_server_factory import (
25
+ SubprocessFunctionExecutorServerFactory,
26
+ )
27
+ from indexify.executor.host_resources.host_resources import HostResourcesProvider
28
+ from indexify.executor.host_resources.nvidia_gpu_allocator import NvidiaGPUAllocator
29
+ from indexify.executor.monitoring.health_checker.generic_health_checker import (
30
+ GenericHealthChecker,
31
+ )
32
+
33
+
34
+ @click.command(
35
+ context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
36
+ short_help="Runs Executor that connects to the Indexify server and starts running its tasks",
37
+ )
38
+ @click.option(
39
+ "--server-addr",
40
+ "server_address",
41
+ default="localhost:8900",
42
+ help="Address of Indexify HTTP Server to connect to",
43
+ )
44
+ @click.option(
45
+ "--grpc-server-addr",
46
+ "grpc_server_address",
47
+ default="localhost:8901",
48
+ help="Address of Indexify gRPC Server to connect to",
49
+ )
50
+ @click.option(
51
+ "-v",
52
+ "--verbose",
53
+ is_flag=True,
54
+ default=False,
55
+ help="Verbose logging",
56
+ )
57
+ @click.option(
58
+ "-vv",
59
+ "--very-verbose",
60
+ is_flag=True,
61
+ default=False,
62
+ help="Very verbose logging",
63
+ )
64
+ @click.option(
65
+ "-f",
66
+ "--function",
67
+ "function_uris",
68
+ default=[],
69
+ multiple=True,
70
+ help="Functions that the executor will run "
71
+ "specified as <namespace>:<workflow>:<function>:<version>"
72
+ "version is optional, not specifying it will make the server send any version"
73
+ "of the function. Any number of --function arguments can be passed.",
74
+ )
75
+ @click.option(
76
+ "--config-path",
77
+ type=click.Path(exists=True, dir_okay=False, file_okay=True, readable=True),
78
+ default=None,
79
+ help="Path to the TLS configuration file",
80
+ )
81
+ @click.option(
82
+ "--executor-cache-path",
83
+ type=click.Path(exists=False, dir_okay=True, readable=True, writable=True),
84
+ default="~/.indexify/executor_cache",
85
+ help="Path to the executor cache directory",
86
+ )
87
+ @click.option(
88
+ "--monitoring-server-host",
89
+ default="localhost",
90
+ help="IP address or hostname where to run Executor Monitoring server",
91
+ )
92
+ @click.option(
93
+ "--monitoring-server-port",
94
+ default=7000,
95
+ type=int,
96
+ help="Port where to run Executor Monitoring server",
97
+ )
98
+ @click.option(
99
+ "-l",
100
+ "--label",
101
+ "labels",
102
+ default=[],
103
+ multiple=True,
104
+ help="Executor key-value label to be sent to the Server. "
105
+ "Specified as <key>=<value>",
106
+ )
107
+ @click.pass_context
108
+ def executor(
109
+ ctx: click.Context,
110
+ server_address: str,
111
+ grpc_server_address: str,
112
+ verbose: bool,
113
+ very_verbose: bool,
114
+ function_uris: List[str],
115
+ config_path: Optional[str],
116
+ executor_cache_path: str,
117
+ monitoring_server_host: str,
118
+ monitoring_server_port: int,
119
+ labels: List[str],
120
+ ):
121
+ if verbose or very_verbose:
122
+ configure_development_mode_logging(compact_tracebacks=not very_verbose)
123
+ else:
124
+ configure_production_mode_logging()
125
+
126
+ kv_labels: Dict[str, str] = {}
127
+ for label in labels:
128
+ key, value = label.split("=")
129
+ kv_labels[key] = value
130
+
131
+ executor_id: str = nanoid.generate()
132
+ executor_version = version("indexify")
133
+ logger = structlog.get_logger(module=__name__, executor_id=executor_id)
134
+
135
+ logger.info(
136
+ "starting executor",
137
+ hostname=gethostname(),
138
+ server_address=server_address,
139
+ grpc_server_address=grpc_server_address,
140
+ config_path=config_path,
141
+ executor_version=executor_version,
142
+ labels=kv_labels,
143
+ executor_cache_path=executor_cache_path,
144
+ functions=function_uris,
145
+ verbose=verbose,
146
+ very_verbose=very_verbose,
147
+ monitoring_server_host=monitoring_server_host,
148
+ monitoring_server_port=monitoring_server_port,
149
+ )
150
+ if ctx.args:
151
+ logger.warning(
152
+ "Unknown arguments passed to the executor",
153
+ unknown_args=ctx.args,
154
+ )
155
+ if len(function_uris) == 0:
156
+ logger.warning(
157
+ "No --function arguments were passed. Executor will run all functions. This scenario is only supported for testing purposes.",
158
+ )
159
+
160
+ executor_cache_path: Path = Path(executor_cache_path).expanduser().absolute()
161
+ if executor_cache_path.exists():
162
+ shutil.rmtree(str(executor_cache_path))
163
+ executor_cache_path.mkdir(parents=True, exist_ok=True)
164
+
165
+ blob_store: BLOBStore = BLOBStore(
166
+ # Local FS mode is used in tests and in cases when user wants to store data on NFS.
167
+ local=LocalFSBLOBStore(),
168
+ # S3 is initiliazed lazily so it's okay to create it even if the user is not going to use it.
169
+ s3=S3BLOBStore(),
170
+ )
171
+
172
+ host_resources_provider: HostResourcesProvider = HostResourcesProvider(
173
+ gpu_allocator=NvidiaGPUAllocator(logger),
174
+ # Assuming a simple setup in OSS where Executor container has a single file system
175
+ # used by all Function Executors and all the container resources are available to all Function Executors.
176
+ function_executors_ephimeral_disks_path="/",
177
+ host_overhead_cpus=0,
178
+ host_overhead_memory_gb=0,
179
+ host_overhead_function_executors_ephimeral_disks_gb=0,
180
+ )
181
+
182
+ prometheus_client.Info("cli", "CLI information").info(
183
+ {
184
+ "package": "indexify",
185
+ }
186
+ )
187
+
188
+ Executor(
189
+ id=executor_id,
190
+ version=executor_version,
191
+ labels=kv_labels,
192
+ health_checker=GenericHealthChecker(),
193
+ cache_path=executor_cache_path,
194
+ function_uris=function_uris,
195
+ function_executor_server_factory=SubprocessFunctionExecutorServerFactory(
196
+ verbose_logs=verbose or very_verbose
197
+ ),
198
+ server_addr=server_address,
199
+ grpc_server_addr=grpc_server_address,
200
+ config_path=config_path,
201
+ monitoring_server_host=monitoring_server_host,
202
+ monitoring_server_port=monitoring_server_port,
203
+ blob_store=blob_store,
204
+ host_resources_provider=host_resources_provider,
205
+ ).run()
@@ -84,15 +84,28 @@ class ChannelManager:
84
84
  # Use the lock to ensure that we only create one channel without race conditions.
85
85
  async with self._lock:
86
86
  if self._channel is None:
87
- self._channel = await self._create_channel()
87
+ self._channel = await self._create_ready_channel()
88
88
  elif not await self._locked_channel_is_healthy():
89
89
  self._logger.info("grpc channel to server is unhealthy")
90
90
  await self._destroy_locked_channel()
91
- self._channel = await self._create_channel()
91
+ self._channel = await self._create_ready_channel()
92
92
 
93
93
  return self._channel
94
94
 
95
- async def _create_channel(self) -> grpc.aio.Channel:
95
+ def create_channel(self) -> grpc.aio.Channel:
96
+ """Creates a new channel to the gRPC server.
97
+
98
+ The channel is not be ready to use. Raises an exception on failure.
99
+ """
100
+ if self._channel_credentials is None:
101
+ return grpc.aio.insecure_channel(target=self._server_address)
102
+ else:
103
+ return grpc.aio.secure_channel(
104
+ target=self._server_address,
105
+ credentials=self._channel_credentials,
106
+ )
107
+
108
+ async def _create_ready_channel(self) -> grpc.aio.Channel:
96
109
  """Creates a new channel to the gRPC server."
97
110
 
98
111
  Returns a ready to use channel. Blocks until the channel
@@ -104,14 +117,7 @@ class ChannelManager:
104
117
  metric_grpc_server_channel_creations.inc()
105
118
  while True:
106
119
  try:
107
- if self._channel_credentials is None:
108
- channel = grpc.aio.insecure_channel(target=self._server_address)
109
- else:
110
- channel = grpc.aio.secure_channel(
111
- target=self._server_address,
112
- credentials=self._channel_credentials,
113
- )
114
-
120
+ channel = self.create_channel()
115
121
  await asyncio.wait_for(
116
122
  channel.channel_ready(),
117
123
  timeout=_CONNECT_TIMEOUT_SEC,