indexify 0.2.7__tar.gz → 0.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {indexify-0.2.7 → indexify-0.2.9}/PKG-INFO +1 -1
- {indexify-0.2.7 → indexify-0.2.9}/indexify/cli.py +34 -8
- {indexify-0.2.7 → indexify-0.2.9}/indexify/executor/function_worker.py +16 -26
- {indexify-0.2.7 → indexify-0.2.9}/indexify/executor/runtime_probes.py +1 -1
- {indexify-0.2.7 → indexify-0.2.9}/indexify/functions_sdk/data_objects.py +1 -1
- {indexify-0.2.7 → indexify-0.2.9}/indexify/functions_sdk/graph.py +13 -103
- {indexify-0.2.7 → indexify-0.2.9}/indexify/functions_sdk/graph_definition.py +0 -5
- {indexify-0.2.7 → indexify-0.2.9}/indexify/functions_sdk/image.py +2 -2
- {indexify-0.2.7 → indexify-0.2.9}/indexify/functions_sdk/indexify_functions.py +83 -0
- {indexify-0.2.7 → indexify-0.2.9}/indexify/http_client.py +33 -11
- {indexify-0.2.7 → indexify-0.2.9}/indexify/remote_graph.py +9 -8
- {indexify-0.2.7 → indexify-0.2.9}/pyproject.toml +1 -1
- {indexify-0.2.7 → indexify-0.2.9}/LICENSE.txt +0 -0
- {indexify-0.2.7 → indexify-0.2.9}/README.md +0 -0
- {indexify-0.2.7 → indexify-0.2.9}/indexify/__init__.py +0 -0
- {indexify-0.2.7 → indexify-0.2.9}/indexify/data_loaders/__init__.py +0 -0
- {indexify-0.2.7 → indexify-0.2.9}/indexify/data_loaders/local_directory_loader.py +0 -0
- {indexify-0.2.7 → indexify-0.2.9}/indexify/data_loaders/url_loader.py +0 -0
- {indexify-0.2.7 → indexify-0.2.9}/indexify/error.py +0 -0
- {indexify-0.2.7 → indexify-0.2.9}/indexify/executor/agent.py +0 -0
- {indexify-0.2.7 → indexify-0.2.9}/indexify/executor/api_objects.py +0 -0
- {indexify-0.2.7 → indexify-0.2.9}/indexify/executor/downloader.py +0 -0
- {indexify-0.2.7 → indexify-0.2.9}/indexify/executor/executor_tasks.py +0 -0
- {indexify-0.2.7 → indexify-0.2.9}/indexify/executor/indexify_executor.py +0 -0
- {indexify-0.2.7 → indexify-0.2.9}/indexify/executor/task_reporter.py +0 -0
- {indexify-0.2.7 → indexify-0.2.9}/indexify/executor/task_store.py +0 -0
- {indexify-0.2.7 → indexify-0.2.9}/indexify/functions_sdk/graph_validation.py +0 -0
- {indexify-0.2.7 → indexify-0.2.9}/indexify/functions_sdk/local_cache.py +0 -0
- {indexify-0.2.7 → indexify-0.2.9}/indexify/functions_sdk/object_serializer.py +0 -0
- {indexify-0.2.7 → indexify-0.2.9}/indexify/settings.py +0 -0
@@ -9,7 +9,6 @@ import threading
|
|
9
9
|
import time
|
10
10
|
from typing import Annotated, List, Optional
|
11
11
|
|
12
|
-
import docker
|
13
12
|
import nanoid
|
14
13
|
import typer
|
15
14
|
from rich.console import Console
|
@@ -119,7 +118,9 @@ def server_dev_mode():
|
|
119
118
|
|
120
119
|
|
121
120
|
@app.command(help="Build image for function names")
|
122
|
-
def build_image(
|
121
|
+
def build_image(
|
122
|
+
workflow_file_path: str, func_names: List[str], python_sdk_path: Optional[str] = None
|
123
|
+
):
|
123
124
|
globals_dict = {}
|
124
125
|
|
125
126
|
# Add the folder in the workflow file path to the current Python path
|
@@ -139,7 +140,9 @@ def build_image(workflow_file_path: str, func_names: List[str]):
|
|
139
140
|
for func_name in func_names:
|
140
141
|
if name == func_name:
|
141
142
|
found_funcs.append(name)
|
142
|
-
_create_image_for_func(
|
143
|
+
_create_image_for_func(
|
144
|
+
func_name=func_name, func_obj=obj, python_sdk_path=python_sdk_path
|
145
|
+
)
|
143
146
|
|
144
147
|
console.print(
|
145
148
|
Text(f"Processed functions: ", style="cyan"),
|
@@ -205,16 +208,18 @@ def executor(
|
|
205
208
|
console.print(Text(f"Exiting gracefully: {ex}", style="bold yellow"))
|
206
209
|
|
207
210
|
|
208
|
-
def _create_image_for_func(func_name, func_obj):
|
211
|
+
def _create_image_for_func(func_name, func_obj, python_sdk_path):
|
209
212
|
console.print(
|
210
213
|
Text("Creating container for ", style="cyan"),
|
211
214
|
Text(f"`{func_name}`", style="cyan bold"),
|
212
215
|
)
|
213
|
-
_build_image(image=func_obj.image,
|
216
|
+
_build_image(image=func_obj.image, python_sdk_path=python_sdk_path)
|
214
217
|
|
215
218
|
|
216
|
-
def _build_image(image: Image,
|
219
|
+
def _build_image(image: Image, python_sdk_path: Optional[str] = None):
|
217
220
|
try:
|
221
|
+
import docker
|
222
|
+
|
218
223
|
client = docker.from_env()
|
219
224
|
client.ping()
|
220
225
|
except Exception as e:
|
@@ -240,15 +245,36 @@ WORKDIR /app
|
|
240
245
|
run_strs = ["RUN " + i for i in image._run_strs]
|
241
246
|
|
242
247
|
docker_file += "\n".join(run_strs)
|
248
|
+
print(os.getcwd())
|
249
|
+
import docker
|
250
|
+
import docker.api.build
|
251
|
+
|
252
|
+
docker.api.build.process_dockerfile = lambda dockerfile, path: (
|
253
|
+
"Dockerfile",
|
254
|
+
dockerfile,
|
255
|
+
)
|
256
|
+
|
257
|
+
if python_sdk_path is not None:
|
258
|
+
if not os.path.exists(python_sdk_path):
|
259
|
+
print(f"error: {python_sdk_path} does not exist")
|
260
|
+
os.exit(1)
|
261
|
+
docker_file += f"\nCOPY {python_sdk_path} /app/python-sdk"
|
262
|
+
docker_file += f"\nRUN (cd /app/python-sdk && pip install .)"
|
263
|
+
else:
|
264
|
+
docker_file += f"\nRUN pip install indexify"
|
243
265
|
|
244
266
|
console.print("Creating image using Dockerfile contents:", style="cyan bold")
|
245
267
|
console.print(f"{docker_file}", style="magenta")
|
246
268
|
|
247
269
|
client = docker.from_env()
|
248
270
|
image_name = f"{image._image_name}:{image._tag}"
|
249
|
-
client.images.build(
|
250
|
-
|
271
|
+
(_image, generator) = client.images.build(
|
272
|
+
path=".",
|
273
|
+
dockerfile=docker_file,
|
251
274
|
tag=image_name,
|
252
275
|
rm=True,
|
253
276
|
)
|
277
|
+
for result in generator:
|
278
|
+
print(result)
|
279
|
+
|
254
280
|
print(f"built image: {image_name}")
|
@@ -1,8 +1,9 @@
|
|
1
1
|
import asyncio
|
2
2
|
import traceback
|
3
3
|
from concurrent.futures.process import BrokenProcessPool
|
4
|
-
from typing import Dict, List, Optional
|
4
|
+
from typing import Dict, List, Optional
|
5
5
|
|
6
|
+
import cloudpickle
|
6
7
|
from pydantic import BaseModel
|
7
8
|
from rich import print
|
8
9
|
|
@@ -11,18 +12,11 @@ from indexify.functions_sdk.data_objects import (
|
|
11
12
|
IndexifyData,
|
12
13
|
RouterOutput,
|
13
14
|
)
|
14
|
-
from indexify.functions_sdk.graph import Graph
|
15
15
|
from indexify.functions_sdk.indexify_functions import IndexifyFunctionWrapper
|
16
16
|
|
17
|
-
graphs: Dict[str, Graph] = {}
|
18
17
|
function_wrapper_map: Dict[str, IndexifyFunctionWrapper] = {}
|
19
18
|
|
20
19
|
import concurrent.futures
|
21
|
-
import io
|
22
|
-
from contextlib import redirect_stderr, redirect_stdout
|
23
|
-
|
24
|
-
from .runtime_probes import RuntimeProbes
|
25
|
-
|
26
20
|
|
27
21
|
class FunctionRunException(Exception):
|
28
22
|
def __init__(
|
@@ -53,12 +47,13 @@ def _load_function(
|
|
53
47
|
key = f"{namespace}/{graph_name}/{version}/{fn_name}"
|
54
48
|
if key in function_wrapper_map:
|
55
49
|
return
|
56
|
-
|
57
|
-
|
58
|
-
|
50
|
+
with open(code_path, "rb") as f:
|
51
|
+
code = f.read()
|
52
|
+
pickled_functions = cloudpickle.loads(code)
|
53
|
+
function_wrapper = IndexifyFunctionWrapper(
|
54
|
+
cloudpickle.loads(pickled_functions[fn_name])
|
55
|
+
)
|
59
56
|
function_wrapper_map[key] = function_wrapper
|
60
|
-
graph_key = f"{namespace}/{graph_name}/{version}"
|
61
|
-
graphs[graph_key] = graph
|
62
57
|
|
63
58
|
|
64
59
|
class FunctionWorker:
|
@@ -94,8 +89,6 @@ class FunctionWorker:
|
|
94
89
|
traceback.print_exc()
|
95
90
|
raise mp
|
96
91
|
except FunctionRunException as e:
|
97
|
-
print(e)
|
98
|
-
print(traceback.format_exc())
|
99
92
|
return FunctionWorkerOutput(
|
100
93
|
exception=str(e),
|
101
94
|
stdout=e.stdout,
|
@@ -138,26 +131,23 @@ def _run_function(
|
|
138
131
|
fn_output = None
|
139
132
|
has_failed = False
|
140
133
|
exception_msg = None
|
141
|
-
print(
|
142
|
-
f"[bold] function_worker: [/bold] invoking function {fn_name} in graph {graph_name}"
|
143
|
-
)
|
134
|
+
print(f"[bold] function_worker: [/bold] invoking function {fn_name} in graph {graph_name}")
|
144
135
|
with redirect_stdout(stdout_capture), redirect_stderr(stderr_capture):
|
145
136
|
try:
|
146
137
|
key = f"{namespace}/{graph_name}/{version}/{fn_name}"
|
147
138
|
if key not in function_wrapper_map:
|
148
139
|
_load_function(namespace, graph_name, fn_name, code_path, version)
|
149
140
|
|
150
|
-
|
151
|
-
if
|
152
|
-
router_output =
|
141
|
+
fn = function_wrapper_map[key]
|
142
|
+
if str(type(fn.indexify_function)) == "<class 'indexify.functions_sdk.indexify_functions.IndexifyRo'>":
|
143
|
+
router_output = fn.invoke_router(fn_name, input)
|
153
144
|
else:
|
154
|
-
fn_output =
|
145
|
+
fn_output = fn.invoke_fn_ser(fn_name, input, init_value)
|
155
146
|
|
156
|
-
is_reducer =
|
157
|
-
graph.get_function(fn_name).indexify_function.accumulate is not None
|
158
|
-
)
|
147
|
+
is_reducer = fn.indexify_function.accumulate is not None
|
159
148
|
except Exception as e:
|
160
|
-
|
149
|
+
import sys
|
150
|
+
print(traceback.format_exc(), file=sys.stderr)
|
161
151
|
has_failed = True
|
162
152
|
exception_msg = str(e)
|
163
153
|
|
@@ -27,7 +27,7 @@ class RuntimeProbes:
|
|
27
27
|
if os.path.exists(file_path):
|
28
28
|
with open(file_path, "r") as file:
|
29
29
|
return file.read().strip()
|
30
|
-
return "indexify-executor-default"
|
30
|
+
return "tensorlake/indexify-executor-default"
|
31
31
|
|
32
32
|
def _get_python_version(self) -> Tuple[int, int]:
|
33
33
|
version_info = sys.version_info
|
@@ -1,4 +1,3 @@
|
|
1
|
-
import inspect
|
2
1
|
import sys
|
3
2
|
from collections import defaultdict
|
4
3
|
from queue import deque
|
@@ -16,7 +15,6 @@ from typing import (
|
|
16
15
|
)
|
17
16
|
|
18
17
|
import cloudpickle
|
19
|
-
import msgpack
|
20
18
|
from nanoid import generate
|
21
19
|
from pydantic import BaseModel
|
22
20
|
from typing_extensions import get_args, get_origin
|
@@ -36,7 +34,7 @@ from .indexify_functions import (
|
|
36
34
|
IndexifyRouter,
|
37
35
|
)
|
38
36
|
from .local_cache import CacheAwareFunctionWrapper
|
39
|
-
from .object_serializer import
|
37
|
+
from .object_serializer import get_serializer
|
40
38
|
|
41
39
|
RouterFn = Annotated[
|
42
40
|
Callable[[IndexifyData], Optional[List[IndexifyFunction]]], "RouterFn"
|
@@ -45,26 +43,16 @@ GraphNode = Annotated[Union[IndexifyFunctionWrapper, RouterFn], "GraphNode"]
|
|
45
43
|
|
46
44
|
|
47
45
|
def is_pydantic_model_from_annotation(type_annotation):
|
48
|
-
# If it's a string representation
|
49
46
|
if isinstance(type_annotation, str):
|
50
|
-
# Extract the class name from the string
|
51
47
|
class_name = type_annotation.split("'")[-2].split(".")[-1]
|
52
|
-
# This part is tricky and might require additional context or imports
|
53
|
-
# You might need to import the actual class or module where it's defined
|
54
|
-
# For example:
|
55
|
-
# from indexify.functions_sdk.data_objects import File
|
56
|
-
# return issubclass(eval(class_name), BaseModel)
|
57
48
|
return False # Default to False if we can't evaluate
|
58
49
|
|
59
|
-
# If it's a Type object
|
60
50
|
origin = get_origin(type_annotation)
|
61
51
|
if origin is not None:
|
62
|
-
# Handle generic types like List[File], Optional[File], etc.
|
63
52
|
args = get_args(type_annotation)
|
64
53
|
if args:
|
65
54
|
return is_pydantic_model_from_annotation(args[0])
|
66
55
|
|
67
|
-
# If it's a direct class reference
|
68
56
|
if isinstance(type_annotation, type):
|
69
57
|
return issubclass(type_annotation, BaseModel)
|
70
58
|
|
@@ -98,10 +86,6 @@ class Graph:
|
|
98
86
|
def get_accumulators(self) -> Dict[str, Any]:
|
99
87
|
return self.accumulator_zero_values
|
100
88
|
|
101
|
-
def deserialize_fn_output(self, name: str, output: IndexifyData) -> Any:
|
102
|
-
serializer = get_serializer(self.nodes[name].payload_encoder)
|
103
|
-
return serializer.deserialize(output.payload)
|
104
|
-
|
105
89
|
def add_node(
|
106
90
|
self, indexify_fn: Union[Type[IndexifyFunction], Type[IndexifyRouter]]
|
107
91
|
) -> "Graph":
|
@@ -133,37 +117,14 @@ class Graph:
|
|
133
117
|
self.routers[from_node.name].append(node.name)
|
134
118
|
return self
|
135
119
|
|
136
|
-
def serialize(self)
|
120
|
+
def serialize(self):
|
137
121
|
# Get all unique modules from nodes and edges
|
138
|
-
|
122
|
+
pickled_functions = {}
|
139
123
|
for node in self.nodes.values():
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
for module in modules:
|
145
|
-
print(f"registering module {module} with cloudpickle")
|
146
|
-
cloudpickle.register_pickle_by_value(sys.modules[module])
|
147
|
-
pickled_nodes_by_image[image_name] = cloudpickle.dumps(self)
|
148
|
-
for module in modules:
|
149
|
-
cloudpickle.unregister_pickle_by_value(sys.modules[module])
|
150
|
-
|
151
|
-
# Register each module with cloudpickle
|
152
|
-
for module_name in modules:
|
153
|
-
module = sys.modules[module_name]
|
154
|
-
print(f"registering module {module_name} with cloudpickle")
|
155
|
-
cloudpickle.register_pickle_by_value(module)
|
156
|
-
return pickled_nodes_by_image
|
157
|
-
|
158
|
-
@staticmethod
|
159
|
-
def deserialize(serialized_code_by_images: Dict[str, bytes], image: str) -> "Graph":
|
160
|
-
return cloudpickle.loads(serialized_code_by_images[image])
|
161
|
-
|
162
|
-
@staticmethod
|
163
|
-
def from_path(path: str, image: str) -> "Graph":
|
164
|
-
with open(path, "rb") as f:
|
165
|
-
pickled_code_by_images: Dict[str, bytes] = cloudpickle.load(f)
|
166
|
-
return Graph.deserialize(pickled_code_by_images, image)
|
124
|
+
cloudpickle.register_pickle_by_value(sys.modules[node.__module__])
|
125
|
+
pickled_functions[node.name] = cloudpickle.dumps(node)
|
126
|
+
cloudpickle.unregister_pickle_by_value(sys.modules[node.__module__])
|
127
|
+
return pickled_functions
|
167
128
|
|
168
129
|
def add_edge(
|
169
130
|
self,
|
@@ -173,60 +134,6 @@ class Graph:
|
|
173
134
|
self.add_edges(from_node, [to_node])
|
174
135
|
return self
|
175
136
|
|
176
|
-
def invoke_fn_ser(
|
177
|
-
self, name: str, input: IndexifyData, acc: Optional[Any] = None
|
178
|
-
) -> List[IndexifyData]:
|
179
|
-
fn_wrapper = self.get_function(name)
|
180
|
-
input = self.deserialize_input(name, input)
|
181
|
-
serializer = get_serializer(fn_wrapper.indexify_function.payload_encoder)
|
182
|
-
if acc is not None:
|
183
|
-
acc = fn_wrapper.indexify_function.accumulate.model_validate(
|
184
|
-
serializer.deserialize(acc.payload)
|
185
|
-
)
|
186
|
-
if acc is None and fn_wrapper.indexify_function.accumulate is not None:
|
187
|
-
acc = fn_wrapper.indexify_function.accumulate.model_validate(
|
188
|
-
self.accumulator_zero_values[name]
|
189
|
-
)
|
190
|
-
outputs: List[Any] = fn_wrapper.run_fn(input, acc=acc)
|
191
|
-
return [
|
192
|
-
IndexifyData(payload=serializer.serialize(output)) for output in outputs
|
193
|
-
]
|
194
|
-
|
195
|
-
def invoke_router(self, name: str, input: IndexifyData) -> Optional[RouterOutput]:
|
196
|
-
fn_wrapper = self.get_function(name)
|
197
|
-
input = self.deserialize_input(name, input)
|
198
|
-
return RouterOutput(edges=fn_wrapper.run_router(input))
|
199
|
-
|
200
|
-
def deserialize_input(self, compute_fn: str, indexify_data: IndexifyData) -> Any:
|
201
|
-
compute_fn = self.nodes[compute_fn]
|
202
|
-
if not compute_fn:
|
203
|
-
raise ValueError(f"Compute function {compute_fn} not found in graph")
|
204
|
-
if compute_fn.payload_encoder == "cloudpickle":
|
205
|
-
return CloudPickleSerializer.deserialize(indexify_data.payload)
|
206
|
-
payload = msgpack.unpackb(indexify_data.payload)
|
207
|
-
signature = inspect.signature(compute_fn.run)
|
208
|
-
arg_types = {}
|
209
|
-
for name, param in signature.parameters.items():
|
210
|
-
if (
|
211
|
-
param.annotation != inspect.Parameter.empty
|
212
|
-
and param.annotation != getattr(compute_fn, "accumulate", None)
|
213
|
-
):
|
214
|
-
arg_types[name] = param.annotation
|
215
|
-
if len(arg_types) > 1:
|
216
|
-
raise ValueError(
|
217
|
-
f"Compute function {compute_fn} has multiple arguments, but only one is supported"
|
218
|
-
)
|
219
|
-
elif len(arg_types) == 0:
|
220
|
-
raise ValueError(f"Compute function {compute_fn} has no arguments")
|
221
|
-
arg_name, arg_type = next(iter(arg_types.items()))
|
222
|
-
if arg_type is None:
|
223
|
-
raise ValueError(f"Argument {arg_name} has no type annotation")
|
224
|
-
if is_pydantic_model_from_annotation(arg_type):
|
225
|
-
if len(payload.keys()) == 1 and isinstance(list(payload.values())[0], dict):
|
226
|
-
payload = list(payload.values())[0]
|
227
|
-
return arg_type.model_validate(payload)
|
228
|
-
return payload
|
229
|
-
|
230
137
|
def add_edges(
|
231
138
|
self,
|
232
139
|
from_node: Union[Type[IndexifyFunction], Type[IndexifyRouter]],
|
@@ -329,7 +236,9 @@ class Graph:
|
|
329
236
|
function_outputs.extend(cached_output_list)
|
330
237
|
outputs[node_name].extend(cached_output_list)
|
331
238
|
else:
|
332
|
-
function_outputs: List[IndexifyData] =
|
239
|
+
function_outputs: List[IndexifyData] = IndexifyFunctionWrapper(
|
240
|
+
node
|
241
|
+
).invoke_fn_ser(
|
333
242
|
node_name, input, accumulator_values.get(node_name, None)
|
334
243
|
)
|
335
244
|
print(f"ran {node_name}: num outputs: {len(function_outputs)}")
|
@@ -370,9 +279,10 @@ class Graph:
|
|
370
279
|
queue.append((out_edge, output))
|
371
280
|
|
372
281
|
def _route(self, node_name: str, input: IndexifyData) -> Optional[RouterOutput]:
|
373
|
-
|
282
|
+
router = self.nodes[node_name]
|
283
|
+
return IndexifyFunctionWrapper(router).invoke_router(node_name, input)
|
374
284
|
|
375
|
-
def
|
285
|
+
def output(
|
376
286
|
self,
|
377
287
|
invocation_id: str,
|
378
288
|
fn_name: str,
|
@@ -27,11 +27,6 @@ class NodeMetadata(BaseModel):
|
|
27
27
|
dynamic_router: Optional[RouterMetadata] = None
|
28
28
|
compute_fn: Optional[FunctionMetadata] = None
|
29
29
|
|
30
|
-
def image_name(self):
|
31
|
-
if self.dynamic_router:
|
32
|
-
return self.dynamic_router.image_name
|
33
|
-
return self.compute_fn.image_name
|
34
|
-
|
35
30
|
|
36
31
|
# RuntimeInformation is a class that holds data about the environment in which the graph should run.
|
37
32
|
class RuntimeInformation(BaseModel):
|
@@ -6,7 +6,7 @@ class Image:
|
|
6
6
|
|
7
7
|
self._base_image = "python:3.10.15-slim-bookworm"
|
8
8
|
|
9
|
-
self._run_strs = [
|
9
|
+
self._run_strs = []
|
10
10
|
|
11
11
|
def name(self, image_name):
|
12
12
|
self._image_name = image_name
|
@@ -27,7 +27,7 @@ class Image:
|
|
27
27
|
|
28
28
|
DEFAULT_IMAGE = (
|
29
29
|
Image()
|
30
|
-
.name("indexify-executor-default")
|
30
|
+
.name("tensorlake/indexify-executor-default")
|
31
31
|
.base_image("python:3.10.15-slim-bookworm")
|
32
32
|
.tag("latest")
|
33
33
|
.run("pip install indexify")
|
@@ -1,3 +1,4 @@
|
|
1
|
+
import inspect
|
1
2
|
from abc import ABC, abstractmethod
|
2
3
|
from functools import update_wrapper
|
3
4
|
from typing import (
|
@@ -12,11 +13,40 @@ from typing import (
|
|
12
13
|
get_origin,
|
13
14
|
)
|
14
15
|
|
16
|
+
import msgpack
|
15
17
|
from pydantic import BaseModel
|
16
18
|
from typing_extensions import get_type_hints
|
17
19
|
|
18
20
|
from .data_objects import IndexifyData, RouterOutput
|
19
21
|
from .image import DEFAULT_IMAGE, Image
|
22
|
+
from .object_serializer import CloudPickleSerializer, get_serializer
|
23
|
+
|
24
|
+
|
25
|
+
def is_pydantic_model_from_annotation(type_annotation):
|
26
|
+
# If it's a string representation
|
27
|
+
if isinstance(type_annotation, str):
|
28
|
+
# Extract the class name from the string
|
29
|
+
class_name = type_annotation.split("'")[-2].split(".")[-1]
|
30
|
+
# This part is tricky and might require additional context or imports
|
31
|
+
# You might need to import the actual class or module where it's defined
|
32
|
+
# For example:
|
33
|
+
# from indexify.functions_sdk.data_objects import File
|
34
|
+
# return issubclass(eval(class_name), BaseModel)
|
35
|
+
return False # Default to False if we can't evaluate
|
36
|
+
|
37
|
+
# If it's a Type object
|
38
|
+
origin = get_origin(type_annotation)
|
39
|
+
if origin is not None:
|
40
|
+
# Handle generic types like List[File], Optional[File], etc.
|
41
|
+
args = get_args(type_annotation)
|
42
|
+
if args:
|
43
|
+
return is_pydantic_model_from_annotation(args[0])
|
44
|
+
|
45
|
+
# If it's a direct class reference
|
46
|
+
if isinstance(type_annotation, type):
|
47
|
+
return issubclass(type_annotation, BaseModel)
|
48
|
+
|
49
|
+
return False
|
20
50
|
|
21
51
|
|
22
52
|
class EmbeddingIndexes(BaseModel):
|
@@ -186,3 +216,56 @@ class IndexifyFunctionWrapper:
|
|
186
216
|
extracted_data = self.indexify_function.run(*args, **kwargs)
|
187
217
|
|
188
218
|
return extracted_data if isinstance(extracted_data, list) else [extracted_data]
|
219
|
+
|
220
|
+
def invoke_fn_ser(
|
221
|
+
self, name: str, input: IndexifyData, acc: Optional[Any] = None
|
222
|
+
) -> List[IndexifyData]:
|
223
|
+
input = self.deserialize_input(name, input)
|
224
|
+
serializer = get_serializer(self.indexify_function.payload_encoder)
|
225
|
+
if acc is not None:
|
226
|
+
acc = self.indexify_function.accumulate.model_validate(
|
227
|
+
serializer.deserialize(acc.payload)
|
228
|
+
)
|
229
|
+
if acc is None and self.indexify_function.accumulate is not None:
|
230
|
+
acc = self.indexify_function.accumulate.model_validate(
|
231
|
+
self.indexify_function.accumulate()
|
232
|
+
)
|
233
|
+
outputs: List[Any] = self.run_fn(input, acc=acc)
|
234
|
+
return [
|
235
|
+
IndexifyData(payload=serializer.serialize(output)) for output in outputs
|
236
|
+
]
|
237
|
+
|
238
|
+
def invoke_router(self, name: str, input: IndexifyData) -> Optional[RouterOutput]:
|
239
|
+
input = self.deserialize_input(name, input)
|
240
|
+
return RouterOutput(edges=self.run_router(input))
|
241
|
+
|
242
|
+
def deserialize_input(self, compute_fn: str, indexify_data: IndexifyData) -> Any:
|
243
|
+
if self.indexify_function.payload_encoder == "cloudpickle":
|
244
|
+
return CloudPickleSerializer.deserialize(indexify_data.payload)
|
245
|
+
payload = msgpack.unpackb(indexify_data.payload)
|
246
|
+
signature = inspect.signature(self.indexify_function.run)
|
247
|
+
arg_types = {}
|
248
|
+
for name, param in signature.parameters.items():
|
249
|
+
if (
|
250
|
+
param.annotation != inspect.Parameter.empty
|
251
|
+
and param.annotation != getattr(compute_fn, "accumulate", None)
|
252
|
+
):
|
253
|
+
arg_types[name] = param.annotation
|
254
|
+
if len(arg_types) > 1:
|
255
|
+
raise ValueError(
|
256
|
+
f"Compute function {compute_fn} has multiple arguments, but only one is supported"
|
257
|
+
)
|
258
|
+
elif len(arg_types) == 0:
|
259
|
+
raise ValueError(f"Compute function {compute_fn} has no arguments")
|
260
|
+
arg_name, arg_type = next(iter(arg_types.items()))
|
261
|
+
if arg_type is None:
|
262
|
+
raise ValueError(f"Argument {arg_name} has no type annotation")
|
263
|
+
if is_pydantic_model_from_annotation(arg_type):
|
264
|
+
if len(payload.keys()) == 1 and isinstance(list(payload.values())[0], dict):
|
265
|
+
payload = list(payload.values())[0]
|
266
|
+
return arg_type.model_validate(payload)
|
267
|
+
return payload
|
268
|
+
|
269
|
+
def deserialize_fn_output(self, output: IndexifyData) -> Any:
|
270
|
+
serializer = get_serializer(self.indexify_function.payload_encoder)
|
271
|
+
return serializer.deserialize(output.payload)
|
@@ -13,6 +13,7 @@ from rich import print
|
|
13
13
|
from indexify.error import ApiException
|
14
14
|
from indexify.functions_sdk.data_objects import IndexifyData
|
15
15
|
from indexify.functions_sdk.graph import ComputeGraphMetadata, Graph
|
16
|
+
from indexify.functions_sdk.indexify_functions import IndexifyFunctionWrapper
|
16
17
|
from indexify.settings import DEFAULT_SERVICE_URL, DEFAULT_SERVICE_URL_HTTPS
|
17
18
|
|
18
19
|
|
@@ -70,6 +71,7 @@ class IndexifyClient:
|
|
70
71
|
self._service_url = service_url
|
71
72
|
self._timeout = kwargs.get("timeout")
|
72
73
|
self._graphs: Dict[str, Graph] = {}
|
74
|
+
self._fns = {}
|
73
75
|
|
74
76
|
def _request(self, method: str, **kwargs) -> httpx.Response:
|
75
77
|
try:
|
@@ -168,6 +170,8 @@ class IndexifyClient:
|
|
168
170
|
)
|
169
171
|
response.raise_for_status()
|
170
172
|
self._graphs[graph.name] = graph
|
173
|
+
for fn_name, fn in graph.nodes.items():
|
174
|
+
self._fns[f"{graph.name}/{fn_name}"] = fn
|
171
175
|
|
172
176
|
def graphs(self) -> List[str]:
|
173
177
|
response = self._get(f"graphs")
|
@@ -177,14 +181,14 @@ class IndexifyClient:
|
|
177
181
|
response = self._get(f"namespaces/{self.namespace}/compute_graphs/{name}")
|
178
182
|
return ComputeGraphMetadata(**response.json())
|
179
183
|
|
180
|
-
def
|
184
|
+
def load_fn_wrapper(self, name: str, fn_name: str) -> IndexifyFunctionWrapper:
|
181
185
|
response = self._get(
|
182
186
|
f"internal/namespaces/{self.namespace}/compute_graphs/{name}/code"
|
183
187
|
)
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
+
pickled_functions_by_name = cloudpickle.loads(response.content)
|
189
|
+
return IndexifyFunctionWrapper(
|
190
|
+
cloudpickle.loads(pickled_functions_by_name[fn_name])
|
191
|
+
)
|
188
192
|
|
189
193
|
def namespaces(self) -> List[str]:
|
190
194
|
response = self._get(f"namespaces")
|
@@ -193,9 +197,28 @@ class IndexifyClient:
|
|
193
197
|
for item in namespaces_dict:
|
194
198
|
namespaces.append(item["name"])
|
195
199
|
return namespaces
|
200
|
+
|
201
|
+
@classmethod
|
202
|
+
def new_namespace(cls, namespace: str, server_addr: Optional[str] = "http://localhost:8900"):
|
203
|
+
# Create a new client instance with the specified server address
|
204
|
+
client = cls(service_url=server_addr)
|
205
|
+
|
206
|
+
try:
|
207
|
+
# Create the new namespace using the client
|
208
|
+
client.create_namespace(namespace)
|
209
|
+
except ApiException as e:
|
210
|
+
print(f"Failed to create namespace '{namespace}': {e}")
|
211
|
+
raise
|
212
|
+
|
213
|
+
# Set the namespace for the newly created client
|
214
|
+
client.namespace = namespace
|
215
|
+
|
216
|
+
# Return the client instance with the new namespace
|
217
|
+
return client
|
218
|
+
|
196
219
|
|
197
220
|
def create_namespace(self, namespace: str):
|
198
|
-
self._post("namespaces", json={"
|
221
|
+
self._post("namespaces", json={"name": namespace})
|
199
222
|
|
200
223
|
def logs(
|
201
224
|
self, invocation_id: str, cg_name: str, fn_name: str, file: str
|
@@ -289,8 +312,9 @@ class IndexifyClient:
|
|
289
312
|
block_until_done: bool = True: If True, the method will block until the extraction is done. If False, the method will return immediately.
|
290
313
|
return: Union[Dict[str, List[Any]], List[Any]]: The extracted objects. If the extractor name is provided, the output is a list of extracted objects by the extractor. If the extractor name is not provided, the output is a dictionary with the extractor name as the key and the extracted objects as the value. If no objects are found, an empty list is returned.
|
291
314
|
"""
|
292
|
-
|
293
|
-
|
315
|
+
fn_key = f"{graph}/{fn_name}"
|
316
|
+
if fn_key not in self._fns:
|
317
|
+
self._fns[fn_key] = self.load_fn_wrapper(graph, fn_name)
|
294
318
|
response = self._get(
|
295
319
|
f"namespaces/{self.namespace}/compute_graphs/{graph}/invocations/{invocation_id}/outputs",
|
296
320
|
)
|
@@ -302,9 +326,7 @@ class IndexifyClient:
|
|
302
326
|
indexify_data = self._download_output(
|
303
327
|
self.namespace, graph, invocation_id, fn_name, output.id
|
304
328
|
)
|
305
|
-
output = self.
|
306
|
-
fn_name, indexify_data
|
307
|
-
)
|
329
|
+
output = self._fns[fn_key].deserialize_fn_output(indexify_data)
|
308
330
|
outputs.append(output)
|
309
331
|
return outputs
|
310
332
|
|
@@ -32,21 +32,22 @@ class RemoteGraph:
|
|
32
32
|
return self._client.invoke_graph_with_object(
|
33
33
|
self._name, block_until_done, **kwargs
|
34
34
|
)
|
35
|
+
|
36
|
+
def rerun(self):
|
37
|
+
"""
|
38
|
+
Rerun the graph with the given invocation ID.
|
39
|
+
:param invocation_id: The invocation ID of the graph execution.
|
40
|
+
"""
|
41
|
+
self._client.rerun_graph(self._name)
|
35
42
|
|
36
43
|
@classmethod
|
37
|
-
def deploy(
|
38
|
-
cls,
|
39
|
-
g: Graph,
|
40
|
-
additional_modules=[],
|
41
|
-
server_url: Optional[str] = "http://localhost:8900",
|
42
|
-
):
|
44
|
+
def deploy(cls, g: Graph, additional_modules=[], server_url: Optional[str] = "http://localhost:8900"):
|
43
45
|
"""
|
44
46
|
Create a new RemoteGraph from a local Graph object.
|
45
47
|
:param g: The local Graph object.
|
46
48
|
:param server_url: The URL of the server where the graph will be registered.
|
47
49
|
"""
|
48
50
|
import cloudpickle
|
49
|
-
|
50
51
|
for module in additional_modules:
|
51
52
|
cloudpickle.register_pickle_by_value(module)
|
52
53
|
client = IndexifyClient(service_url=server_url)
|
@@ -63,7 +64,7 @@ class RemoteGraph:
|
|
63
64
|
"""
|
64
65
|
return cls(name=name, server_url=server_url)
|
65
66
|
|
66
|
-
def
|
67
|
+
def output(
|
67
68
|
self,
|
68
69
|
invocation_id: str,
|
69
70
|
fn_name: str,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|