indexify 0.2.47__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. indexify/{cli.py → cli/cli.py} +75 -82
  2. indexify/executor/README.md +35 -0
  3. indexify/executor/api_objects.py +9 -3
  4. indexify/executor/downloader.py +5 -5
  5. indexify/executor/executor.py +35 -22
  6. indexify/executor/function_executor/function_executor.py +14 -3
  7. indexify/executor/function_executor/function_executor_state.py +13 -10
  8. indexify/executor/function_executor/invocation_state_client.py +2 -1
  9. indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py +22 -10
  10. indexify/executor/function_executor/single_task_runner.py +43 -26
  11. indexify/executor/function_executor/task_input.py +1 -3
  12. indexify/executor/task_fetcher.py +5 -7
  13. indexify/executor/task_reporter.py +3 -5
  14. indexify/executor/task_runner.py +30 -23
  15. indexify/function_executor/README.md +18 -0
  16. indexify/function_executor/handlers/run_function/function_inputs_loader.py +13 -14
  17. indexify/function_executor/handlers/run_function/handler.py +16 -40
  18. indexify/function_executor/handlers/run_function/request_validator.py +7 -5
  19. indexify/function_executor/handlers/run_function/response_helper.py +6 -8
  20. indexify/function_executor/initialize_request_validator.py +1 -2
  21. indexify/function_executor/invocation_state/invocation_state_proxy_server.py +1 -1
  22. indexify/function_executor/invocation_state/proxied_invocation_state.py +1 -3
  23. indexify/function_executor/main.py +50 -0
  24. indexify/function_executor/proto/configuration.py +8 -0
  25. indexify/function_executor/proto/function_executor.proto +9 -4
  26. indexify/function_executor/proto/function_executor_pb2.py +24 -24
  27. indexify/function_executor/proto/function_executor_pb2.pyi +24 -4
  28. indexify/function_executor/server.py +4 -6
  29. indexify/function_executor/{function_executor_service.py → service.py} +35 -18
  30. indexify/utils/README.md +3 -0
  31. indexify/{common_util.py → utils/http_client.py} +2 -2
  32. indexify/{logging.py → utils/logging.py} +36 -2
  33. indexify-0.3.0.dist-info/METADATA +38 -0
  34. indexify-0.3.0.dist-info/RECORD +44 -0
  35. {indexify-0.2.47.dist-info → indexify-0.3.0.dist-info}/WHEEL +1 -1
  36. indexify-0.3.0.dist-info/entry_points.txt +4 -0
  37. indexify/__init__.py +0 -31
  38. indexify/data_loaders/__init__.py +0 -58
  39. indexify/data_loaders/local_directory_loader.py +0 -37
  40. indexify/data_loaders/url_loader.py +0 -52
  41. indexify/error.py +0 -8
  42. indexify/functions_sdk/data_objects.py +0 -27
  43. indexify/functions_sdk/graph.py +0 -364
  44. indexify/functions_sdk/graph_definition.py +0 -63
  45. indexify/functions_sdk/graph_validation.py +0 -70
  46. indexify/functions_sdk/image.py +0 -210
  47. indexify/functions_sdk/indexify_functions.py +0 -354
  48. indexify/functions_sdk/invocation_state/invocation_state.py +0 -22
  49. indexify/functions_sdk/invocation_state/local_invocation_state.py +0 -30
  50. indexify/functions_sdk/object_serializer.py +0 -68
  51. indexify/functions_sdk/pipeline.py +0 -33
  52. indexify/http_client.py +0 -379
  53. indexify/remote_graph.py +0 -138
  54. indexify/remote_pipeline.py +0 -25
  55. indexify/settings.py +0 -1
  56. indexify-0.2.47.dist-info/LICENSE.txt +0 -201
  57. indexify-0.2.47.dist-info/METADATA +0 -154
  58. indexify-0.2.47.dist-info/RECORD +0 -60
  59. indexify-0.2.47.dist-info/entry_points.txt +0 -3
@@ -1,27 +0,0 @@
1
- from typing import Any, Dict, List, Literal, Optional, Union
2
-
3
- from pydantic import BaseModel, Json
4
-
5
-
6
- class FileInput(BaseModel):
7
- url: str
8
- mime_type: Optional[str] = None
9
- metadata: Optional[Dict[str, Json]] = None
10
- sha_256: Optional[str] = None
11
-
12
-
13
- class RouterOutput(BaseModel):
14
- edges: List[str]
15
-
16
-
17
- class IndexifyData(BaseModel):
18
- id: Optional[str] = None
19
- payload: Union[bytes, str]
20
- encoder: Literal["cloudpickle", "json"] = "cloudpickle"
21
-
22
-
23
- class File(BaseModel):
24
- data: bytes
25
- mime_type: Optional[str] = None
26
- metadata: Optional[Dict[str, Any]] = None
27
- sha_256: Optional[str] = None
@@ -1,364 +0,0 @@
1
- import importlib
2
- import sys
3
- from collections import defaultdict
4
- from queue import deque
5
- from typing import (
6
- Annotated,
7
- Any,
8
- Callable,
9
- Dict,
10
- List,
11
- Optional,
12
- Type,
13
- Union,
14
- get_args,
15
- get_origin,
16
- )
17
-
18
- import cloudpickle
19
- from nanoid import generate
20
- from pydantic import BaseModel
21
- from typing_extensions import get_args, get_origin
22
-
23
- from .data_objects import IndexifyData, RouterOutput
24
- from .graph_definition import (
25
- ComputeGraphMetadata,
26
- FunctionMetadata,
27
- NodeMetadata,
28
- RouterMetadata,
29
- RuntimeInformation,
30
- )
31
- from .graph_validation import validate_node, validate_route
32
- from .indexify_functions import (
33
- FunctionCallResult,
34
- GraphInvocationContext,
35
- IndexifyFunction,
36
- IndexifyFunctionWrapper,
37
- IndexifyRouter,
38
- RouterCallResult,
39
- )
40
- from .invocation_state.local_invocation_state import LocalInvocationState
41
- from .object_serializer import get_serializer
42
-
43
- RouterFn = Annotated[
44
- Callable[[IndexifyData], Optional[List[IndexifyFunction]]], "RouterFn"
45
- ]
46
- GraphNode = Annotated[Union[IndexifyFunctionWrapper, RouterFn], "GraphNode"]
47
-
48
-
49
- def is_pydantic_model_from_annotation(type_annotation):
50
- if isinstance(type_annotation, str):
51
- class_name = type_annotation.split("'")[-2].split(".")[-1]
52
- return False # Default to False if we can't evaluate
53
-
54
- origin = get_origin(type_annotation)
55
- if origin is not None:
56
- args = get_args(type_annotation)
57
- if args:
58
- return is_pydantic_model_from_annotation(args[0])
59
-
60
- if isinstance(type_annotation, type):
61
- return issubclass(type_annotation, BaseModel)
62
-
63
- return False
64
-
65
-
66
- class Graph:
67
- def __init__(
68
- self,
69
- name: str,
70
- start_node: IndexifyFunction,
71
- description: Optional[str] = None,
72
- tags: Dict[str, str] = {},
73
- ):
74
- self.name = name
75
- self.description = description
76
- self.nodes: Dict[str, Union[IndexifyFunction, IndexifyRouter]] = {}
77
- self.routers: Dict[str, List[str]] = defaultdict(list)
78
- self.edges: Dict[str, List[str]] = defaultdict(list)
79
- self.accumulator_zero_values: Dict[str, Any] = {}
80
- self.tags = tags
81
-
82
- self.add_node(start_node)
83
- if issubclass(start_node, IndexifyRouter):
84
- self.routers[start_node.name] = []
85
- self._start_node: str = start_node.name
86
-
87
- # Storage for local execution
88
- self._results: Dict[str, Dict[str, List[IndexifyData]]] = {}
89
- self._accumulator_values: Dict[str, IndexifyData] = {}
90
- self._local_graph_ctx: Optional[GraphInvocationContext] = None
91
-
92
- def get_function(self, name: str) -> IndexifyFunctionWrapper:
93
- if name not in self.nodes:
94
- raise ValueError(f"Function {name} not found in graph")
95
- return IndexifyFunctionWrapper(self.nodes[name], self._local_graph_ctx)
96
-
97
- def get_accumulators(self) -> Dict[str, Any]:
98
- return self.accumulator_zero_values
99
-
100
- def add_node(
101
- self, indexify_fn: Union[Type[IndexifyFunction], Type[IndexifyRouter]]
102
- ) -> "Graph":
103
- validate_node(indexify_fn=indexify_fn)
104
-
105
- if indexify_fn.name in self.nodes:
106
- return self
107
-
108
- if issubclass(indexify_fn, IndexifyFunction) and indexify_fn.accumulate:
109
- self.accumulator_zero_values[indexify_fn.name] = indexify_fn.accumulate()
110
-
111
- self.nodes[indexify_fn.name] = indexify_fn
112
- return self
113
-
114
- def route(
115
- self, from_node: Type[IndexifyRouter], to_nodes: List[Type[IndexifyFunction]]
116
- ) -> "Graph":
117
-
118
- validate_route(from_node=from_node, to_nodes=to_nodes)
119
-
120
- print(
121
- f"Adding router {from_node.name} to nodes {[node.name for node in to_nodes]}"
122
- )
123
- self.add_node(from_node)
124
- for node in to_nodes:
125
- self.add_node(node)
126
- self.routers[from_node.name].append(node.name)
127
- return self
128
-
129
- def serialize(self, additional_modules):
130
- # Get all unique modules from nodes and edges
131
- pickled_functions = {}
132
- for module in additional_modules:
133
- cloudpickle.register_pickle_by_value(module)
134
- for node in self.nodes.values():
135
- cloudpickle.register_pickle_by_value(sys.modules[node.__module__])
136
- pickled_functions[node.name] = cloudpickle.dumps(node)
137
- if not sys.modules[node.__module__] in additional_modules:
138
- cloudpickle.unregister_pickle_by_value(sys.modules[node.__module__])
139
- return pickled_functions
140
-
141
- def add_edge(
142
- self,
143
- from_node: Type[IndexifyFunction],
144
- to_node: Union[Type[IndexifyFunction], RouterFn],
145
- ) -> "Graph":
146
- self.add_edges(from_node, [to_node])
147
- return self
148
-
149
- def add_edges(
150
- self,
151
- from_node: Union[Type[IndexifyFunction], Type[IndexifyRouter]],
152
- to_node: List[Union[Type[IndexifyFunction], Type[IndexifyRouter]]],
153
- ) -> "Graph":
154
- if issubclass(from_node, IndexifyRouter):
155
- raise ValueError(
156
- "Cannot add edges from a router node, use route method instead"
157
- )
158
-
159
- self.add_node(from_node)
160
- from_node_name = from_node.name
161
- for node in to_node:
162
- self.add_node(node)
163
- self.edges[from_node_name].append(node.name)
164
- return self
165
-
166
- def definition(self) -> ComputeGraphMetadata:
167
- start_node = self.nodes[self._start_node]
168
- is_reducer = False
169
- if hasattr(start_node, "accumulate"):
170
- is_reducer = start_node.accumulate is not None
171
- start_node = FunctionMetadata(
172
- name=start_node.name,
173
- fn_name=start_node.name,
174
- description=start_node.description,
175
- reducer=is_reducer,
176
- image_information=start_node.image.to_image_information(),
177
- input_encoder=start_node.input_encoder,
178
- output_encoder=start_node.output_encoder,
179
- )
180
- metadata_edges = self.edges.copy()
181
- metadata_nodes = {}
182
- for node_name, node in self.nodes.items():
183
- if node_name in self.routers:
184
- metadata_nodes[node_name] = NodeMetadata(
185
- dynamic_router=RouterMetadata(
186
- name=node_name,
187
- description=node.description or "",
188
- source_fn=node_name,
189
- target_fns=self.routers[node_name],
190
- input_encoder=node.input_encoder,
191
- output_encoder=node.output_encoder,
192
- image_information=node.image.to_image_information(),
193
- )
194
- )
195
- else:
196
- metadata_nodes[node_name] = NodeMetadata(
197
- compute_fn=FunctionMetadata(
198
- name=node_name,
199
- fn_name=node.name,
200
- description=node.description,
201
- reducer=node.accumulate is not None,
202
- image_information=node.image.to_image_information(),
203
- input_encoder=node.input_encoder,
204
- output_encoder=node.output_encoder,
205
- )
206
- )
207
-
208
- return ComputeGraphMetadata(
209
- name=self.name,
210
- description=self.description or "",
211
- start_node=NodeMetadata(compute_fn=start_node),
212
- nodes=metadata_nodes,
213
- edges=metadata_edges,
214
- tags=self.tags,
215
- runtime_information=RuntimeInformation(
216
- major_version=sys.version_info.major,
217
- minor_version=sys.version_info.minor,
218
- sdk_version=importlib.metadata.version("indexify"),
219
- ),
220
- )
221
-
222
- def run(self, block_until_done: bool = False, **kwargs) -> str:
223
- self.validate_graph()
224
- start_node = self.nodes[self._start_node]
225
- serializer = get_serializer(start_node.input_encoder)
226
- input = IndexifyData(
227
- id=generate(),
228
- payload=serializer.serialize(kwargs),
229
- encoder=start_node.input_encoder,
230
- )
231
- print(f"[bold] Invoking {self._start_node}[/bold]")
232
- outputs = defaultdict(list)
233
- for k, v in self.accumulator_zero_values.items():
234
- node = self.nodes[k]
235
- serializer = get_serializer(node.input_encoder)
236
- self._accumulator_values[k] = IndexifyData(
237
- payload=serializer.serialize(v), encoder=node.input_encoder
238
- )
239
- self._results[input.id] = outputs
240
- self._local_graph_ctx = GraphInvocationContext(
241
- invocation_id=input.id,
242
- graph_name=self.name,
243
- graph_version="1",
244
- invocation_state=LocalInvocationState(),
245
- )
246
- self._run(input, outputs)
247
- return input.id
248
-
249
- def validate_graph(self) -> None:
250
- """
251
- A method to validate that each node in the graph is
252
- reachable from start node using BFS.
253
- """
254
- total_number_of_nodes = len(self.nodes)
255
- queue = deque([self._start_node])
256
- visited = {self._start_node}
257
-
258
- while queue:
259
- current_node_name = queue.popleft()
260
- neighbours = (
261
- self.edges[current_node_name]
262
- if current_node_name in self.edges
263
- else (
264
- self.routers[current_node_name]
265
- if current_node_name in self.routers
266
- else []
267
- )
268
- )
269
-
270
- for neighbour in neighbours:
271
- if neighbour in visited:
272
- continue
273
- else:
274
- visited.add(neighbour)
275
- queue.append(neighbour)
276
-
277
- if total_number_of_nodes != len(visited):
278
- # all the nodes are not reachable from the start_node.
279
- raise Exception("Some nodes in the graph are not reachable from start node")
280
-
281
- def _run(
282
- self,
283
- initial_input: IndexifyData,
284
- outputs: Dict[str, List[bytes]],
285
- ):
286
- queue = deque([(self._start_node, initial_input)])
287
- while queue:
288
- node_name, input = queue.popleft()
289
- function_outputs: Union[FunctionCallResult, RouterCallResult] = (
290
- self._invoke_fn(node_name, input)
291
- )
292
- self._log_local_exec_tracebacks(function_outputs)
293
- if isinstance(function_outputs, RouterCallResult):
294
- for edge in function_outputs.edges:
295
- if edge in self.nodes:
296
- queue.append((edge, input))
297
- continue
298
- out_edges = self.edges.get(node_name, [])
299
- fn_outputs = function_outputs.ser_outputs
300
- print(f"ran {node_name}: num outputs: {len(fn_outputs)}")
301
- if self._accumulator_values.get(node_name, None) is not None:
302
- acc_output = fn_outputs[-1].copy()
303
- self._accumulator_values[node_name] = acc_output
304
- outputs[node_name] = []
305
- if fn_outputs:
306
- outputs[node_name].extend(fn_outputs)
307
- if self._accumulator_values.get(node_name, None) is not None and queue:
308
- print(
309
- f"accumulator not none for {node_name}, continuing, len queue: {len(queue)}"
310
- )
311
- continue
312
-
313
- for out_edge in out_edges:
314
- for output in fn_outputs:
315
- queue.append((out_edge, output))
316
-
317
- def _invoke_fn(
318
- self, node_name: str, input: IndexifyData
319
- ) -> Optional[Union[RouterCallResult, FunctionCallResult]]:
320
- node = self.nodes[node_name]
321
- if node_name in self.routers and len(self.routers[node_name]) > 0:
322
- result = IndexifyFunctionWrapper(node, self._local_graph_ctx).invoke_router(
323
- node_name, input
324
- )
325
- for dynamic_edge in result.edges:
326
- if dynamic_edge in self.nodes:
327
- print(f"[bold]dynamic router returned node: {dynamic_edge}[/bold]")
328
- return result
329
-
330
- acc_value = self._accumulator_values.get(node_name, None)
331
- return IndexifyFunctionWrapper(
332
- node, context=self._local_graph_ctx
333
- ).invoke_fn_ser(node_name, input, acc_value)
334
-
335
- def _log_local_exec_tracebacks(
336
- self, results: Union[FunctionCallResult, RouterCallResult]
337
- ):
338
- if results.traceback_msg is not None:
339
- print(results.traceback_msg)
340
- import os
341
-
342
- print("exiting local execution due to error")
343
- os._exit(1)
344
-
345
- def output(
346
- self,
347
- invocation_id: str,
348
- fn_name: str,
349
- ) -> List[Any]:
350
- results = self._results[invocation_id]
351
- if fn_name not in results:
352
- raise ValueError(f"no results found for fn {fn_name} on graph {self.name}")
353
- fn = self.nodes[fn_name]
354
- fn_model = self.get_function(fn_name).get_output_model()
355
- serializer = get_serializer(fn.output_encoder)
356
- outputs = []
357
- for result in results[fn_name]:
358
- payload_dict = serializer.deserialize(result.payload)
359
- if issubclass(fn_model, BaseModel) and isinstance(payload_dict, dict):
360
- payload = fn_model.model_validate(payload_dict)
361
- else:
362
- payload = payload_dict
363
- outputs.append(payload)
364
- return outputs
@@ -1,63 +0,0 @@
1
- from typing import Dict, List, Optional
2
-
3
- from pydantic import BaseModel
4
-
5
- from indexify.functions_sdk.image import ImageInformation
6
-
7
- from .object_serializer import get_serializer
8
-
9
-
10
- class FunctionMetadata(BaseModel):
11
- name: str
12
- fn_name: str
13
- description: str
14
- reducer: bool = False
15
- image_information: ImageInformation
16
- input_encoder: str = "cloudpickle"
17
- output_encoder: str = "cloudpickle"
18
-
19
-
20
- class RouterMetadata(BaseModel):
21
- name: str
22
- description: str
23
- source_fn: str
24
- target_fns: List[str]
25
- image_information: ImageInformation
26
- input_encoder: str = "cloudpickle"
27
- output_encoder: str = "cloudpickle"
28
-
29
-
30
- class NodeMetadata(BaseModel):
31
- dynamic_router: Optional[RouterMetadata] = None
32
- compute_fn: Optional[FunctionMetadata] = None
33
-
34
-
35
- # RuntimeInformation is a class that holds data about the environment in which the graph should run.
36
- class RuntimeInformation(BaseModel):
37
- major_version: int
38
- minor_version: int
39
- sdk_version: str
40
-
41
-
42
- class ComputeGraphMetadata(BaseModel):
43
- name: str
44
- description: str
45
- start_node: NodeMetadata
46
- tags: Dict[str, str] = {}
47
- nodes: Dict[str, NodeMetadata]
48
- edges: Dict[str, List[str]]
49
- accumulator_zero_values: Dict[str, bytes] = {}
50
- runtime_information: RuntimeInformation
51
- replaying: bool = False
52
- version: Optional[int] = -1
53
-
54
- def get_input_payload_serializer(self):
55
- return get_serializer(self.start_node.compute_fn.input_encoder)
56
-
57
- def get_input_encoder(self) -> str:
58
- if self.start_node.compute_fn:
59
- return self.start_node.compute_fn.input_encoder
60
- elif self.start_node.dynamic_router:
61
- return self.start_node.dynamic_router.input_encoder
62
-
63
- raise ValueError("start node is not set on the graph")
@@ -1,70 +0,0 @@
1
- import inspect
2
- import re
3
- from typing import List, Type, Union
4
-
5
- from .indexify_functions import IndexifyFunction, IndexifyRouter
6
-
7
-
8
- def validate_node(indexify_fn: Union[Type[IndexifyFunction], Type[IndexifyRouter]]):
9
- if inspect.isfunction(indexify_fn):
10
- raise Exception(
11
- f"Unable to add node of type `{type(indexify_fn)}`. "
12
- f"Required, `IndexifyFunction` or `IndexifyRouter`"
13
- )
14
- if not (
15
- issubclass(indexify_fn, IndexifyFunction)
16
- or issubclass(indexify_fn, IndexifyRouter)
17
- ):
18
- raise Exception(
19
- f"Unable to add node of type `{indexify_fn.__name__}`. "
20
- f"Required, `IndexifyFunction` or `IndexifyRouter`"
21
- )
22
-
23
- signature = inspect.signature(indexify_fn.run)
24
-
25
- for param in signature.parameters.values():
26
- if param.name == "self":
27
- continue
28
- if param.annotation == inspect.Parameter.empty:
29
- raise Exception(
30
- f"Input param {param.name} in {indexify_fn.name} has empty"
31
- f" type annotation"
32
- )
33
-
34
- if signature.return_annotation == inspect.Signature.empty:
35
- raise Exception(f"Function {indexify_fn.name} has empty return type annotation")
36
-
37
-
38
- def validate_route(
39
- from_node: Type[IndexifyRouter], to_nodes: List[Type[IndexifyFunction]]
40
- ):
41
- signature = inspect.signature(from_node.run)
42
-
43
- if signature.return_annotation == inspect.Signature.empty:
44
- raise Exception(f"Function {from_node.name} has empty return type annotation")
45
-
46
- return_annotation = signature.return_annotation
47
-
48
- if (
49
- hasattr(return_annotation, "__origin__")
50
- and return_annotation.__origin__ is Union
51
- ):
52
- for arg in return_annotation.__args__:
53
- if hasattr(arg, "name"):
54
- if arg not in to_nodes:
55
- raise Exception(
56
- f"Unable to find {arg.name} in to_nodes {[node.name for node in to_nodes]}"
57
- )
58
- elif (
59
- hasattr(return_annotation, "__origin__")
60
- and return_annotation.__origin__ is list
61
- ):
62
- union_args = return_annotation.__args__[0].__args__
63
- for arg in union_args:
64
- if hasattr(arg, "name"):
65
- if arg not in to_nodes:
66
- raise Exception(
67
- f"Unable to find {arg.name} in to_nodes {[node.name for node in to_nodes]}"
68
- )
69
- else:
70
- raise Exception(f"Return type of {from_node.name} is not a Union")