indexify 0.2.3__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {indexify-0.2.3 → indexify-0.2.4}/PKG-INFO +1 -1
- {indexify-0.2.3 → indexify-0.2.4}/indexify/__init__.py +4 -6
- {indexify-0.2.3 → indexify-0.2.4}/indexify/cli.py +1 -1
- {indexify-0.2.3 → indexify-0.2.4}/indexify/functions_sdk/graph.py +129 -36
- indexify-0.2.4/indexify/functions_sdk/graph_definition.py +47 -0
- {indexify-0.2.3 → indexify-0.2.4}/indexify/functions_sdk/graph_validation.py +11 -6
- indexify-0.2.3/indexify/remote_client.py → indexify-0.2.4/indexify/http_client.py +1 -3
- indexify-0.2.4/indexify/remote_graph.py +74 -0
- {indexify-0.2.3 → indexify-0.2.4}/pyproject.toml +1 -1
- indexify-0.2.3/indexify/base_client.py +0 -98
- indexify-0.2.3/indexify/client.py +0 -18
- indexify-0.2.3/indexify/foo +0 -72
- indexify-0.2.3/indexify/local_client.py +0 -183
- {indexify-0.2.3 → indexify-0.2.4}/LICENSE.txt +0 -0
- {indexify-0.2.3 → indexify-0.2.4}/README.md +0 -0
- {indexify-0.2.3 → indexify-0.2.4}/indexify/data_loaders/__init__.py +0 -0
- {indexify-0.2.3 → indexify-0.2.4}/indexify/data_loaders/local_directory_loader.py +0 -0
- {indexify-0.2.3 → indexify-0.2.4}/indexify/data_loaders/url_loader.py +0 -0
- {indexify-0.2.3 → indexify-0.2.4}/indexify/error.py +0 -0
- {indexify-0.2.3 → indexify-0.2.4}/indexify/executor/agent.py +0 -0
- {indexify-0.2.3 → indexify-0.2.4}/indexify/executor/api_objects.py +0 -0
- {indexify-0.2.3 → indexify-0.2.4}/indexify/executor/downloader.py +0 -0
- {indexify-0.2.3 → indexify-0.2.4}/indexify/executor/executor_tasks.py +0 -0
- {indexify-0.2.3 → indexify-0.2.4}/indexify/executor/function_worker.py +0 -0
- {indexify-0.2.3 → indexify-0.2.4}/indexify/executor/indexify_executor.py +0 -0
- {indexify-0.2.3 → indexify-0.2.4}/indexify/executor/runtime_probes.py +0 -0
- {indexify-0.2.3 → indexify-0.2.4}/indexify/executor/task_reporter.py +0 -0
- {indexify-0.2.3 → indexify-0.2.4}/indexify/executor/task_store.py +0 -0
- {indexify-0.2.3 → indexify-0.2.4}/indexify/functions_sdk/data_objects.py +0 -0
- {indexify-0.2.3 → indexify-0.2.4}/indexify/functions_sdk/image.py +0 -0
- {indexify-0.2.3 → indexify-0.2.4}/indexify/functions_sdk/indexify_functions.py +0 -0
- {indexify-0.2.3 → indexify-0.2.4}/indexify/functions_sdk/local_cache.py +0 -0
- {indexify-0.2.3 → indexify-0.2.4}/indexify/functions_sdk/object_serializer.py +0 -0
- {indexify-0.2.3 → indexify-0.2.4}/indexify/settings.py +0 -0
@@ -1,23 +1,21 @@
|
|
1
1
|
from . import data_loaders
|
2
|
-
from .client import create_client
|
3
2
|
from .functions_sdk.graph import Graph
|
4
3
|
from .functions_sdk.image import Image
|
5
4
|
from .functions_sdk.indexify_functions import (
|
6
5
|
indexify_function,
|
7
6
|
indexify_router,
|
8
7
|
)
|
9
|
-
from .
|
10
|
-
from .
|
8
|
+
from .http_client import IndexifyClient
|
9
|
+
from .remote_graph import RemoteGraph
|
11
10
|
from .settings import DEFAULT_SERVICE_URL
|
12
11
|
|
13
12
|
__all__ = [
|
14
13
|
"data_loaders",
|
15
14
|
"Graph",
|
15
|
+
"RemoteGraph",
|
16
16
|
"Image",
|
17
17
|
"indexify_function",
|
18
18
|
"indexify_router",
|
19
19
|
"DEFAULT_SERVICE_URL",
|
20
|
-
"
|
21
|
-
"LocalClient",
|
22
|
-
"create_client",
|
20
|
+
"IndexifyClient",
|
23
21
|
]
|
@@ -19,7 +19,7 @@ from rich.theme import Theme
|
|
19
19
|
|
20
20
|
from indexify.executor.agent import ExtractorAgent
|
21
21
|
from indexify.executor.function_worker import FunctionWorker
|
22
|
-
from indexify.functions_sdk.image import
|
22
|
+
from indexify.functions_sdk.image import DEFAULT_IMAGE, Image
|
23
23
|
|
24
24
|
custom_theme = Theme(
|
25
25
|
{
|
@@ -1,5 +1,7 @@
|
|
1
1
|
import inspect
|
2
|
+
import sys
|
2
3
|
from collections import defaultdict
|
4
|
+
from queue import deque
|
3
5
|
from typing import (
|
4
6
|
Annotated,
|
5
7
|
Any,
|
@@ -7,7 +9,6 @@ from typing import (
|
|
7
9
|
Dict,
|
8
10
|
List,
|
9
11
|
Optional,
|
10
|
-
Set,
|
11
12
|
Type,
|
12
13
|
Union,
|
13
14
|
get_args,
|
@@ -16,16 +17,25 @@ from typing import (
|
|
16
17
|
|
17
18
|
import cloudpickle
|
18
19
|
import msgpack
|
20
|
+
from nanoid import generate
|
19
21
|
from pydantic import BaseModel
|
20
22
|
from typing_extensions import get_args, get_origin
|
21
23
|
|
22
24
|
from .data_objects import IndexifyData, RouterOutput
|
25
|
+
from .graph_definition import (
|
26
|
+
ComputeGraphMetadata,
|
27
|
+
FunctionMetadata,
|
28
|
+
NodeMetadata,
|
29
|
+
RouterMetadata,
|
30
|
+
RuntimeInformation,
|
31
|
+
)
|
23
32
|
from .graph_validation import validate_node, validate_route
|
24
33
|
from .indexify_functions import (
|
25
34
|
IndexifyFunction,
|
26
35
|
IndexifyFunctionWrapper,
|
27
36
|
IndexifyRouter,
|
28
37
|
)
|
38
|
+
from .local_cache import CacheAwareFunctionWrapper
|
29
39
|
from .object_serializer import CloudPickleSerializer, get_serializer
|
30
40
|
|
31
41
|
RouterFn = Annotated[
|
@@ -61,41 +71,6 @@ def is_pydantic_model_from_annotation(type_annotation):
|
|
61
71
|
return False
|
62
72
|
|
63
73
|
|
64
|
-
class FunctionMetadata(BaseModel):
|
65
|
-
name: str
|
66
|
-
fn_name: str
|
67
|
-
description: str
|
68
|
-
reducer: bool = False
|
69
|
-
image_name: str
|
70
|
-
payload_encoder: str = "cloudpickle"
|
71
|
-
|
72
|
-
|
73
|
-
class RouterMetadata(BaseModel):
|
74
|
-
name: str
|
75
|
-
description: str
|
76
|
-
source_fn: str
|
77
|
-
target_fns: List[str]
|
78
|
-
image_name: str
|
79
|
-
payload_encoder: str = "cloudpickle"
|
80
|
-
|
81
|
-
|
82
|
-
class NodeMetadata(BaseModel):
|
83
|
-
dynamic_router: Optional[RouterMetadata] = None
|
84
|
-
compute_fn: Optional[FunctionMetadata] = None
|
85
|
-
|
86
|
-
|
87
|
-
class ComputeGraphMetadata(BaseModel):
|
88
|
-
name: str
|
89
|
-
description: str
|
90
|
-
start_node: NodeMetadata
|
91
|
-
nodes: Dict[str, NodeMetadata]
|
92
|
-
edges: Dict[str, List[str]]
|
93
|
-
accumulator_zero_values: Dict[str, bytes] = {}
|
94
|
-
|
95
|
-
def get_input_payload_serializer(self):
|
96
|
-
return get_serializer(self.start_node.compute_fn.payload_encoder)
|
97
|
-
|
98
|
-
|
99
74
|
class Graph:
|
100
75
|
def __init__(
|
101
76
|
self, name: str, start_node: IndexifyFunction, description: Optional[str] = None
|
@@ -110,6 +85,11 @@ class Graph:
|
|
110
85
|
self.add_node(start_node)
|
111
86
|
self._start_node: str = start_node.name
|
112
87
|
|
88
|
+
# Storage for local execution
|
89
|
+
self._results: Dict[str, Dict[str, List[IndexifyData]]] = {}
|
90
|
+
self._cache = CacheAwareFunctionWrapper("./indexify_local_runner_cache")
|
91
|
+
self._accumulator_values: Dict[str, Dict[str, IndexifyData]] = {}
|
92
|
+
|
113
93
|
def get_function(self, name: str) -> IndexifyFunctionWrapper:
|
114
94
|
if name not in self.nodes:
|
115
95
|
raise ValueError(f"Function {name} not found in graph")
|
@@ -272,10 +252,123 @@ class Graph:
|
|
272
252
|
image_name=node.image._image_name,
|
273
253
|
)
|
274
254
|
)
|
255
|
+
|
275
256
|
return ComputeGraphMetadata(
|
276
257
|
name=self.name,
|
277
258
|
description=self.description or "",
|
278
259
|
start_node=NodeMetadata(compute_fn=start_node),
|
279
260
|
nodes=metadata_nodes,
|
280
261
|
edges=metadata_edges,
|
262
|
+
runtime_information=RuntimeInformation(
|
263
|
+
major_version=sys.version_info.major,
|
264
|
+
minor_version=sys.version_info.minor,
|
265
|
+
),
|
281
266
|
)
|
267
|
+
|
268
|
+
def run(self, block_until_done: bool = False, **kwargs) -> str:
|
269
|
+
start_node = self.nodes[self._start_node]
|
270
|
+
serializer = get_serializer(start_node.payload_encoder)
|
271
|
+
input = IndexifyData(id=generate(), payload=serializer.serialize(kwargs))
|
272
|
+
print(f"[bold] Invoking {self._start_node}[/bold]")
|
273
|
+
outputs = defaultdict(list)
|
274
|
+
self._accumulator_values[input.id] = {}
|
275
|
+
for k, v in self.accumulator_zero_values.items():
|
276
|
+
node = self.nodes[k]
|
277
|
+
serializer = get_serializer(node.payload_encoder)
|
278
|
+
self._accumulator_values[input.id] = {
|
279
|
+
k: IndexifyData(payload=serializer.serialize(v))
|
280
|
+
}
|
281
|
+
self._results[input.id] = outputs
|
282
|
+
self._run(input, outputs)
|
283
|
+
return input.id
|
284
|
+
|
285
|
+
def _run(
|
286
|
+
self,
|
287
|
+
initial_input: IndexifyData,
|
288
|
+
outputs: Dict[str, List[bytes]],
|
289
|
+
):
|
290
|
+
accumulator_values = self._accumulator_values[initial_input.id]
|
291
|
+
queue = deque([(self._start_node, initial_input)])
|
292
|
+
while queue:
|
293
|
+
node_name, input = queue.popleft()
|
294
|
+
node = self.nodes[node_name]
|
295
|
+
serializer = get_serializer(node.payload_encoder)
|
296
|
+
input_bytes = serializer.serialize(input)
|
297
|
+
cached_output_bytes: Optional[bytes] = self._cache.get(
|
298
|
+
self.name, node_name, input_bytes
|
299
|
+
)
|
300
|
+
if cached_output_bytes is not None:
|
301
|
+
print(
|
302
|
+
f"ran {node_name}: num outputs: {len(cached_output_bytes)} (cache hit)"
|
303
|
+
)
|
304
|
+
function_outputs: List[IndexifyData] = []
|
305
|
+
cached_output_list = serializer.deserialize_list(cached_output_bytes)
|
306
|
+
if accumulator_values.get(node_name, None) is not None:
|
307
|
+
accumulator_values[node_name] = cached_output_list[-1].model_copy()
|
308
|
+
outputs[node_name] = []
|
309
|
+
function_outputs.extend(cached_output_list)
|
310
|
+
outputs[node_name].extend(cached_output_list)
|
311
|
+
else:
|
312
|
+
function_outputs: List[IndexifyData] = self.invoke_fn_ser(
|
313
|
+
node_name, input, accumulator_values.get(node_name, None)
|
314
|
+
)
|
315
|
+
print(f"ran {node_name}: num outputs: {len(function_outputs)}")
|
316
|
+
if accumulator_values.get(node_name, None) is not None:
|
317
|
+
accumulator_values[node_name] = function_outputs[-1].model_copy()
|
318
|
+
outputs[node_name] = []
|
319
|
+
outputs[node_name].extend(function_outputs)
|
320
|
+
function_outputs_bytes: List[bytes] = [
|
321
|
+
serializer.serialize_list(function_outputs)
|
322
|
+
]
|
323
|
+
self._cache.set(
|
324
|
+
self.name,
|
325
|
+
node_name,
|
326
|
+
input_bytes,
|
327
|
+
function_outputs_bytes,
|
328
|
+
)
|
329
|
+
if accumulator_values.get(node_name, None) is not None and queue:
|
330
|
+
print(
|
331
|
+
f"accumulator not none for {node_name}, continuing, len queue: {len(queue)}"
|
332
|
+
)
|
333
|
+
continue
|
334
|
+
|
335
|
+
out_edges = self.edges.get(node_name, [])
|
336
|
+
# Figure out if there are any routers for this node
|
337
|
+
for i, edge in enumerate(out_edges):
|
338
|
+
if edge in self.routers:
|
339
|
+
out_edges.remove(edge)
|
340
|
+
for output in function_outputs:
|
341
|
+
dynamic_edges = self._route(edge, output) or []
|
342
|
+
for dynamic_edge in dynamic_edges.edges:
|
343
|
+
if dynamic_edge in self.nodes:
|
344
|
+
print(
|
345
|
+
f"[bold]dynamic router returned node: {dynamic_edge}[/bold]"
|
346
|
+
)
|
347
|
+
out_edges.append(dynamic_edge)
|
348
|
+
for out_edge in out_edges:
|
349
|
+
for output in function_outputs:
|
350
|
+
queue.append((out_edge, output))
|
351
|
+
|
352
|
+
def _route(self, node_name: str, input: IndexifyData) -> Optional[RouterOutput]:
|
353
|
+
return self.invoke_router(node_name, input)
|
354
|
+
|
355
|
+
def get_output(
|
356
|
+
self,
|
357
|
+
invocation_id: str,
|
358
|
+
fn_name: str,
|
359
|
+
) -> List[Any]:
|
360
|
+
results = self._results[invocation_id]
|
361
|
+
if fn_name not in results:
|
362
|
+
raise ValueError(f"no results found for fn {fn_name} on graph {self.name}")
|
363
|
+
fn = self.nodes[fn_name]
|
364
|
+
fn_model = self.get_function(fn_name).get_output_model()
|
365
|
+
serializer = get_serializer(fn.payload_encoder)
|
366
|
+
outputs = []
|
367
|
+
for result in results[fn_name]:
|
368
|
+
payload_dict = serializer.deserialize(result.payload)
|
369
|
+
if issubclass(fn_model, BaseModel) and isinstance(payload_dict, dict):
|
370
|
+
payload = fn_model.model_validate(payload_dict)
|
371
|
+
else:
|
372
|
+
payload = payload_dict
|
373
|
+
outputs.append(payload)
|
374
|
+
return outputs
|
@@ -0,0 +1,47 @@
|
|
1
|
+
from typing import Dict, List, Optional
|
2
|
+
|
3
|
+
from pydantic import BaseModel
|
4
|
+
|
5
|
+
from .object_serializer import get_serializer
|
6
|
+
|
7
|
+
|
8
|
+
class FunctionMetadata(BaseModel):
|
9
|
+
name: str
|
10
|
+
fn_name: str
|
11
|
+
description: str
|
12
|
+
reducer: bool = False
|
13
|
+
image_name: str
|
14
|
+
payload_encoder: str = "cloudpickle"
|
15
|
+
|
16
|
+
|
17
|
+
class RouterMetadata(BaseModel):
|
18
|
+
name: str
|
19
|
+
description: str
|
20
|
+
source_fn: str
|
21
|
+
target_fns: List[str]
|
22
|
+
image_name: str
|
23
|
+
payload_encoder: str = "cloudpickle"
|
24
|
+
|
25
|
+
|
26
|
+
class NodeMetadata(BaseModel):
|
27
|
+
dynamic_router: Optional[RouterMetadata] = None
|
28
|
+
compute_fn: Optional[FunctionMetadata] = None
|
29
|
+
|
30
|
+
|
31
|
+
# RuntimeInformation is a class that holds data about the environment in which the graph should run.
|
32
|
+
class RuntimeInformation(BaseModel):
|
33
|
+
major_version: int
|
34
|
+
minor_version: int
|
35
|
+
|
36
|
+
|
37
|
+
class ComputeGraphMetadata(BaseModel):
|
38
|
+
name: str
|
39
|
+
description: str
|
40
|
+
start_node: NodeMetadata
|
41
|
+
nodes: Dict[str, NodeMetadata]
|
42
|
+
edges: Dict[str, List[str]]
|
43
|
+
accumulator_zero_values: Dict[str, bytes] = {}
|
44
|
+
runtime_information: RuntimeInformation
|
45
|
+
|
46
|
+
def get_input_payload_serializer(self):
|
47
|
+
return get_serializer(self.start_node.compute_fn.payload_encoder)
|
@@ -42,25 +42,30 @@ def validate_route(
|
|
42
42
|
|
43
43
|
if signature.return_annotation == inspect.Signature.empty:
|
44
44
|
raise Exception(f"Function {from_node.name} has empty return type annotation")
|
45
|
-
|
45
|
+
|
46
46
|
return_annotation = signature.return_annotation
|
47
47
|
|
48
|
-
if
|
48
|
+
if (
|
49
|
+
hasattr(return_annotation, "__origin__")
|
50
|
+
and return_annotation.__origin__ is Union
|
51
|
+
):
|
49
52
|
for arg in return_annotation.__args__:
|
50
|
-
if hasattr(arg,
|
53
|
+
if hasattr(arg, "name"):
|
51
54
|
if arg not in to_nodes:
|
52
55
|
raise Exception(
|
53
56
|
f"Unable to find {arg.name} in to_nodes {[node.name for node in to_nodes]}"
|
54
57
|
)
|
55
58
|
|
56
|
-
if
|
59
|
+
if (
|
60
|
+
hasattr(return_annotation, "__origin__")
|
61
|
+
and return_annotation.__origin__ is list
|
62
|
+
):
|
57
63
|
union_args = return_annotation.__args__[0].__args__
|
58
64
|
for arg in union_args:
|
59
|
-
if hasattr(arg,
|
65
|
+
if hasattr(arg, "name"):
|
60
66
|
if arg not in to_nodes:
|
61
67
|
raise Exception(
|
62
68
|
f"Unable to find {arg.name} in to_nodes {[node.name for node in to_nodes]}"
|
63
69
|
)
|
64
70
|
else:
|
65
71
|
raise Exception(f"Return type of {from_node.name} is not a Union")
|
66
|
-
|
@@ -10,7 +10,6 @@ from httpx_sse import connect_sse
|
|
10
10
|
from pydantic import BaseModel, Json
|
11
11
|
from rich import print
|
12
12
|
|
13
|
-
from indexify.base_client import IndexifyClient
|
14
13
|
from indexify.error import ApiException
|
15
14
|
from indexify.functions_sdk.data_objects import IndexifyData
|
16
15
|
from indexify.functions_sdk.graph import ComputeGraphMetadata, Graph
|
@@ -39,7 +38,7 @@ class GraphOutputs(BaseModel):
|
|
39
38
|
outputs: List[GraphOutputMetadata]
|
40
39
|
|
41
40
|
|
42
|
-
class
|
41
|
+
class IndexifyClient:
|
43
42
|
def __init__(
|
44
43
|
self,
|
45
44
|
service_url: str = DEFAULT_SERVICE_URL,
|
@@ -167,7 +166,6 @@ class RemoteClient(IndexifyClient):
|
|
167
166
|
files={"code": serialized_code},
|
168
167
|
data={"compute_graph": graph_metadata.model_dump_json(exclude_none=True)},
|
169
168
|
)
|
170
|
-
print(response.content.decode("utf-8"))
|
171
169
|
response.raise_for_status()
|
172
170
|
self._graphs[graph.name] = graph
|
173
171
|
|
@@ -0,0 +1,74 @@
|
|
1
|
+
from typing import Any, List, Optional
|
2
|
+
|
3
|
+
from indexify.functions_sdk.graph import Graph
|
4
|
+
|
5
|
+
from .http_client import IndexifyClient
|
6
|
+
from .settings import DEFAULT_SERVICE_URL
|
7
|
+
|
8
|
+
|
9
|
+
class RemoteGraph:
|
10
|
+
def __init__(
|
11
|
+
self,
|
12
|
+
name: str,
|
13
|
+
server_url: Optional[str] = DEFAULT_SERVICE_URL,
|
14
|
+
):
|
15
|
+
self._name = name
|
16
|
+
self._client = IndexifyClient(service_url=server_url)
|
17
|
+
|
18
|
+
def run(self, block_until_done: bool = False, **kwargs) -> str:
|
19
|
+
"""
|
20
|
+
Run the graph with the given inputs. The input is for the start function of the graph.
|
21
|
+
:param block_until_done: If True, the function will block until the graph execution is complete.
|
22
|
+
:param kwargs: The input to the start function of the graph. Pass the input as keyword arguments.
|
23
|
+
:return: The invocation ID of the graph execution.
|
24
|
+
|
25
|
+
Example:
|
26
|
+
@indexify_function()
|
27
|
+
def foo(x: int) -> int:
|
28
|
+
return x + 1
|
29
|
+
remote_graph = RemoteGraph.by_name("test")
|
30
|
+
invocation_id = remote_graph.run(x=1)
|
31
|
+
"""
|
32
|
+
return self._client.invoke_graph_with_object(
|
33
|
+
self._name, block_until_done, **kwargs
|
34
|
+
)
|
35
|
+
|
36
|
+
@classmethod
|
37
|
+
def deploy(cls, g: Graph, server_url: Optional[str] = "http://localhost:8090"):
|
38
|
+
"""
|
39
|
+
Create a new RemoteGraph from a local Graph object.
|
40
|
+
:param g: The local Graph object.
|
41
|
+
:param server_url: The URL of the server where the graph will be registered.
|
42
|
+
"""
|
43
|
+
client = IndexifyClient(service_url=server_url)
|
44
|
+
client.register_compute_graph(g)
|
45
|
+
return cls(name=g.name, server_url=server_url)
|
46
|
+
|
47
|
+
@classmethod
|
48
|
+
def by_name(cls, name: str, server_url: Optional[str] = "http://localhost:8090"):
|
49
|
+
"""
|
50
|
+
Create a handle to call a RemoteGraph by name.
|
51
|
+
:param name: The name of the graph.
|
52
|
+
:param server_url: The URL of the server where the graph is registered.
|
53
|
+
:return: A RemoteGraph object.
|
54
|
+
"""
|
55
|
+
return cls(name=name, server_url=server_url)
|
56
|
+
|
57
|
+
def get_output(
|
58
|
+
self,
|
59
|
+
invocation_id: str,
|
60
|
+
fn_name: str,
|
61
|
+
) -> List[Any]:
|
62
|
+
"""
|
63
|
+
Returns the extracted objects by a graph for an ingested object. If the extractor name is provided, only the objects extracted by that extractor are returned.
|
64
|
+
If the extractor name is not provided, all the extracted objects are returned for the input object.
|
65
|
+
invocation_id: str: The ID of the ingested object
|
66
|
+
fn_name: Optional[str]: The name of the function whose output is to be returned if provided
|
67
|
+
return: List[Any]: Output of the function.
|
68
|
+
"""
|
69
|
+
|
70
|
+
return self._client.graph_outputs(
|
71
|
+
graph=self._name,
|
72
|
+
invocation_id=invocation_id,
|
73
|
+
fn_name=fn_name,
|
74
|
+
)
|
@@ -1,98 +0,0 @@
|
|
1
|
-
from abc import ABC, abstractmethod
|
2
|
-
from typing import Any, Dict, List, Optional, Union
|
3
|
-
|
4
|
-
from pydantic import Json
|
5
|
-
|
6
|
-
from indexify.functions_sdk.graph import Graph
|
7
|
-
|
8
|
-
|
9
|
-
class IndexifyClient(ABC):
|
10
|
-
|
11
|
-
### Operational APIs
|
12
|
-
@abstractmethod
|
13
|
-
def register_compute_graph(self, graph: Graph):
|
14
|
-
"""
|
15
|
-
Register a compute graph.
|
16
|
-
graph: Graph: The graph to be registered
|
17
|
-
"""
|
18
|
-
pass
|
19
|
-
|
20
|
-
@abstractmethod
|
21
|
-
def graphs(self) -> List[str]:
|
22
|
-
"""
|
23
|
-
Get the graphs.
|
24
|
-
return: List[str]: The graphs
|
25
|
-
"""
|
26
|
-
pass
|
27
|
-
|
28
|
-
@abstractmethod
|
29
|
-
def namespaces(self) -> List[str]:
|
30
|
-
"""
|
31
|
-
Get the namespaces.
|
32
|
-
return: List[str]: The namespaces
|
33
|
-
"""
|
34
|
-
pass
|
35
|
-
|
36
|
-
@abstractmethod
|
37
|
-
def create_namespace(self, namespace: str):
|
38
|
-
"""
|
39
|
-
Create a namespace.
|
40
|
-
namespace: str: The name of the namespace to be created
|
41
|
-
"""
|
42
|
-
pass
|
43
|
-
|
44
|
-
### Ingestion APIs
|
45
|
-
@abstractmethod
|
46
|
-
def invoke_graph_with_object(
|
47
|
-
self, graph: str, block_until_done: bool = False, **kwargs
|
48
|
-
) -> str:
|
49
|
-
"""
|
50
|
-
Invokes a graph with an input object.
|
51
|
-
graph: str: The name of the graph to invoke
|
52
|
-
kwargs: Any: Named arguments to be passed to the graph. Example: url="https://www.google.com", web_page_text="Hello world!"
|
53
|
-
return: str: The ID of the ingested object
|
54
|
-
"""
|
55
|
-
pass
|
56
|
-
|
57
|
-
@abstractmethod
|
58
|
-
def invoke_graph_with_file(
|
59
|
-
self,
|
60
|
-
graph: str,
|
61
|
-
path: str,
|
62
|
-
metadata: Optional[Dict[str, Json]] = None,
|
63
|
-
block_until_done: bool = False,
|
64
|
-
) -> str:
|
65
|
-
"""
|
66
|
-
Invokes a graph with an input file. The file's mimetype is appropriately detected.
|
67
|
-
graph: str: The name of the graph to invoke
|
68
|
-
path: str: The path to the file to be ingested
|
69
|
-
return: str: The ID of the ingested object
|
70
|
-
"""
|
71
|
-
pass
|
72
|
-
|
73
|
-
@abstractmethod
|
74
|
-
def rerun_graph(self, graph: str):
|
75
|
-
"""
|
76
|
-
Rerun a graph.
|
77
|
-
graph: str: The name of the graph to rerun
|
78
|
-
"""
|
79
|
-
pass
|
80
|
-
|
81
|
-
### Retrieval APIs
|
82
|
-
@abstractmethod
|
83
|
-
def graph_outputs(
|
84
|
-
self,
|
85
|
-
graph: str,
|
86
|
-
invocation_id: str,
|
87
|
-
fn_name: Optional[str],
|
88
|
-
) -> Union[Dict[str, List[Any]], List[Any]]:
|
89
|
-
"""
|
90
|
-
Returns the extracted objects by a graph for an ingested object. If the extractor name is provided, only the objects extracted by that extractor are returned.
|
91
|
-
If the extractor name is not provided, all the extracted objects are returned for the input object.
|
92
|
-
graph: str: The name of the graph
|
93
|
-
ingested_object_id: str: The ID of the ingested object
|
94
|
-
extractor_name: Optional[str]: The name of the extractor whose output is to be returned if provided
|
95
|
-
block_until_done: bool = True: If True, the method will block until the extraction is done. If False, the method will return immediately.
|
96
|
-
return: Union[Dict[str, List[Any]], List[Any]]: The extracted objects. If the extractor name is provided, the output is a list of extracted objects by the extractor. If the extractor name is not provided, the output is a dictionary with the extractor name as the key and the extracted objects as the value. If no objects are found, an empty list is returned.
|
97
|
-
"""
|
98
|
-
pass
|
@@ -1,18 +0,0 @@
|
|
1
|
-
from typing import Optional
|
2
|
-
|
3
|
-
from .base_client import IndexifyClient
|
4
|
-
from .local_client import LocalClient
|
5
|
-
from .remote_client import RemoteClient
|
6
|
-
from .settings import DEFAULT_SERVICE_URL
|
7
|
-
|
8
|
-
|
9
|
-
def create_client(
|
10
|
-
service_url: str = DEFAULT_SERVICE_URL,
|
11
|
-
config_path: Optional[str] = None,
|
12
|
-
in_process: bool = False,
|
13
|
-
*args,
|
14
|
-
**kwargs,
|
15
|
-
) -> IndexifyClient:
|
16
|
-
if in_process:
|
17
|
-
return LocalClient()
|
18
|
-
return RemoteClient(config_path=config_path, service_url=service_url, **kwargs)
|
indexify-0.2.3/indexify/foo
DELETED
@@ -1,72 +0,0 @@
|
|
1
|
-
./functions_sdk/local_cache.py: outputs = []
|
2
|
-
./functions_sdk/local_cache.py: outputs.append(f.read())
|
3
|
-
./functions_sdk/local_cache.py: return outputs
|
4
|
-
./functions_sdk/graph.py: outputs: List[Any] = fn_wrapper.run_fn(input, acc=acc)
|
5
|
-
./functions_sdk/graph.py: IndexifyData(payload=CborSerializer.serialize(output)) for output in outputs
|
6
|
-
./functions_sdk/output_serializer.py:class OutputSerializer:
|
7
|
-
./functions_sdk/output_serializer.py: normalized_outputs = []
|
8
|
-
./functions_sdk/output_serializer.py: normalized_outputs.append(
|
9
|
-
./functions_sdk/output_serializer.py: return normalized_outputs
|
10
|
-
./base_client.py: def graph_outputs(
|
11
|
-
./remote_client.py:class GraphOutputs(BaseModel):
|
12
|
-
./remote_client.py: outputs: List[GraphOutputMetadata]
|
13
|
-
./remote_client.py: def graph_outputs(
|
14
|
-
./remote_client.py: f"namespaces/{self.namespace}/compute_graphs/{graph}/invocations/{invocation_id}/outputs",
|
15
|
-
./remote_client.py: graph_outputs = GraphOutputs(**response.json())
|
16
|
-
./remote_client.py: outputs = []
|
17
|
-
./remote_client.py: for output in graph_outputs.outputs:
|
18
|
-
./remote_client.py: outputs.append(output)
|
19
|
-
./remote_client.py: return outputs
|
20
|
-
./executor/task_reporter.py: fn_outputs = []
|
21
|
-
./executor/task_reporter.py: f"[bold]task-reporter[/bold] uploading output of size: {len(completed_task.outputs)}"
|
22
|
-
./executor/task_reporter.py: for output in completed_task.outputs:
|
23
|
-
./executor/task_reporter.py: fn_outputs.append(
|
24
|
-
./executor/task_reporter.py: ("node_outputs", (nanoid.generate(), io.BytesIO(output_bytes)))
|
25
|
-
./executor/task_reporter.py: fn_outputs.append(
|
26
|
-
./executor/task_reporter.py: fn_outputs.append(
|
27
|
-
./executor/task_reporter.py: fn_outputs.append(
|
28
|
-
./executor/task_reporter.py: if fn_outputs and len(fn_outputs) > 0:
|
29
|
-
./executor/task_reporter.py: kwargs["files"] = fn_outputs
|
30
|
-
./executor/task_store.py: outputs: List[IndexifyData]
|
31
|
-
./executor/task_store.py: task_id=task_id, task_outcome="failed", outputs=[]
|
32
|
-
./executor/downloader.py: url = f"{self.base_url}/internal/fn_outputs/{task.input_key}"
|
33
|
-
./executor/function_worker.py: outputs: Union[List[IndexifyData], RouterOutput]
|
34
|
-
./executor/function_worker.py: indexify_data=result.outputs,
|
35
|
-
./executor/function_worker.py: return FunctionOutput(outputs=output, reducer=is_reducer)
|
36
|
-
./executor/agent.py: f"Outputs: {len(task_outcome.outputs)}",
|
37
|
-
./executor/agent.py: outputs=[],
|
38
|
-
./executor/agent.py: outputs=[],
|
39
|
-
./executor/agent.py: outputs=[],
|
40
|
-
./executor/agent.py: outputs: FunctionWorkerOutput = await async_task
|
41
|
-
./executor/agent.py: outputs if isinstance(outputs, RouterOutput) else None
|
42
|
-
./executor/agent.py: if outputs.exception:
|
43
|
-
./executor/agent.py: fn_outputs = []
|
44
|
-
./executor/agent.py: fn_outputs = (
|
45
|
-
./executor/agent.py: outputs.indexify_data if not isinstance(outputs, RouterOutput) else []
|
46
|
-
./executor/agent.py: outputs=fn_outputs,
|
47
|
-
./executor/agent.py: errors=outputs.exception,
|
48
|
-
./executor/agent.py: stdout=outputs.stdout,
|
49
|
-
./executor/agent.py: stderr=outputs.stderr,
|
50
|
-
./executor/agent.py: reducer=outputs.reducer,
|
51
|
-
./executor/agent.py: outputs=[],
|
52
|
-
./local_client.py:# Holds the outputs of a
|
53
|
-
./local_client.py: outputs: Dict[str, List[IndexifyData]]
|
54
|
-
./local_client.py: outputs = defaultdict(list)
|
55
|
-
./local_client.py: self._results[input.id] = outputs
|
56
|
-
./local_client.py: self._run(g, input, outputs)
|
57
|
-
./local_client.py: outputs: Dict[str, List[bytes]],
|
58
|
-
./local_client.py: f"ran {node_name}: num outputs: {len(cached_output_bytes)} (cache hit)"
|
59
|
-
./local_client.py: function_outputs: List[IndexifyData] = []
|
60
|
-
./local_client.py: function_outputs.append(output)
|
61
|
-
./local_client.py: outputs[node_name].append(output)
|
62
|
-
./local_client.py: function_outputs: List[IndexifyData] = g.invoke_fn_ser(
|
63
|
-
./local_client.py: print(f"ran {node_name}: num outputs: {len(function_outputs)}")
|
64
|
-
./local_client.py: self._accumulators[node_name] = function_outputs[-1].model_copy()
|
65
|
-
./local_client.py: outputs[node_name] = []
|
66
|
-
./local_client.py: outputs[node_name].extend(function_outputs)
|
67
|
-
./local_client.py: function_outputs_bytes: List[bytes] = [
|
68
|
-
./local_client.py: for function_output in function_outputs
|
69
|
-
./local_client.py: function_outputs_bytes,
|
70
|
-
./local_client.py: for output in function_outputs:
|
71
|
-
./local_client.py: for output in function_outputs:
|
72
|
-
./local_client.py: def graph_outputs(
|
@@ -1,183 +0,0 @@
|
|
1
|
-
from collections import defaultdict
|
2
|
-
from queue import deque
|
3
|
-
from typing import Any, Dict, List, Optional, Type, Union
|
4
|
-
|
5
|
-
from nanoid import generate
|
6
|
-
from pydantic import BaseModel, Json
|
7
|
-
from rich import print
|
8
|
-
|
9
|
-
from indexify.base_client import IndexifyClient
|
10
|
-
from indexify.functions_sdk.data_objects import (
|
11
|
-
File,
|
12
|
-
IndexifyData,
|
13
|
-
RouterOutput,
|
14
|
-
)
|
15
|
-
from indexify.functions_sdk.graph import Graph
|
16
|
-
from indexify.functions_sdk.local_cache import CacheAwareFunctionWrapper
|
17
|
-
from indexify.functions_sdk.object_serializer import get_serializer
|
18
|
-
|
19
|
-
|
20
|
-
# Holds the outputs of a
|
21
|
-
class ContentTree(BaseModel):
|
22
|
-
id: str
|
23
|
-
outputs: Dict[str, List[IndexifyData]]
|
24
|
-
|
25
|
-
|
26
|
-
class LocalClient(IndexifyClient):
|
27
|
-
def __init__(self, cache_dir: str = "./indexify_local_runner_cache"):
|
28
|
-
self._cache_dir = cache_dir
|
29
|
-
self._graphs: Dict[str, Graph] = {}
|
30
|
-
self._results: Dict[str, Dict[str, List[IndexifyData]]] = {}
|
31
|
-
self._cache = CacheAwareFunctionWrapper(self._cache_dir)
|
32
|
-
self._accumulators: Dict[str, Dict[str, IndexifyData]] = {}
|
33
|
-
|
34
|
-
def register_compute_graph(self, graph: Graph):
|
35
|
-
self._graphs[graph.name] = graph
|
36
|
-
|
37
|
-
def run_from_serialized_code(self, code: bytes, **kwargs):
|
38
|
-
g = Graph.deserialize(graph=code)
|
39
|
-
self.run(g, **kwargs)
|
40
|
-
|
41
|
-
def run(self, g: Graph, **kwargs):
|
42
|
-
serializer = get_serializer(
|
43
|
-
g.get_function(g._start_node).indexify_function.payload_encoder
|
44
|
-
)
|
45
|
-
input = IndexifyData(id=generate(), payload=serializer.serialize(kwargs))
|
46
|
-
print(f"[bold] Invoking {g._start_node}[/bold]")
|
47
|
-
outputs = defaultdict(list)
|
48
|
-
for k, v in g.get_accumulators().items():
|
49
|
-
serializer = get_serializer(
|
50
|
-
g.get_function(k).indexify_function.payload_encoder
|
51
|
-
)
|
52
|
-
self._accumulators[k] = IndexifyData(payload=serializer.serialize(v))
|
53
|
-
self._results[input.id] = outputs
|
54
|
-
self._run(g, input, outputs)
|
55
|
-
return input.id
|
56
|
-
|
57
|
-
def _run(
|
58
|
-
self,
|
59
|
-
g: Graph,
|
60
|
-
initial_input: bytes,
|
61
|
-
outputs: Dict[str, List[bytes]],
|
62
|
-
):
|
63
|
-
queue = deque([(g._start_node, initial_input)])
|
64
|
-
while queue:
|
65
|
-
node_name, input = queue.popleft()
|
66
|
-
serializer = get_serializer(
|
67
|
-
g.get_function(node_name).indexify_function.payload_encoder
|
68
|
-
)
|
69
|
-
input_bytes = serializer.serialize(input)
|
70
|
-
cached_output_bytes: Optional[bytes] = self._cache.get(
|
71
|
-
g.name, node_name, input_bytes
|
72
|
-
)
|
73
|
-
if cached_output_bytes is not None:
|
74
|
-
print(
|
75
|
-
f"ran {node_name}: num outputs: {len(cached_output_bytes)} (cache hit)"
|
76
|
-
)
|
77
|
-
function_outputs: List[IndexifyData] = []
|
78
|
-
cached_output_list = serializer.deserialize_list(cached_output_bytes)
|
79
|
-
if self._accumulators.get(node_name, None) is not None:
|
80
|
-
self._accumulators[node_name] = cached_output_list[-1].model_copy()
|
81
|
-
outputs[node_name] = []
|
82
|
-
function_outputs.extend(cached_output_list)
|
83
|
-
outputs[node_name].extend(cached_output_list)
|
84
|
-
else:
|
85
|
-
function_outputs: List[IndexifyData] = g.invoke_fn_ser(
|
86
|
-
node_name, input, self._accumulators.get(node_name, None)
|
87
|
-
)
|
88
|
-
print(f"ran {node_name}: num outputs: {len(function_outputs)}")
|
89
|
-
if self._accumulators.get(node_name, None) is not None:
|
90
|
-
self._accumulators[node_name] = function_outputs[-1].model_copy()
|
91
|
-
outputs[node_name] = []
|
92
|
-
outputs[node_name].extend(function_outputs)
|
93
|
-
function_outputs_bytes: List[bytes] = [
|
94
|
-
serializer.serialize_list(function_outputs)
|
95
|
-
]
|
96
|
-
self._cache.set(
|
97
|
-
g.name,
|
98
|
-
node_name,
|
99
|
-
input_bytes,
|
100
|
-
function_outputs_bytes,
|
101
|
-
)
|
102
|
-
if self._accumulators.get(node_name, None) is not None and queue:
|
103
|
-
print(
|
104
|
-
f"accumulator not none for {node_name}, continuing, len queue: {len(queue)}"
|
105
|
-
)
|
106
|
-
continue
|
107
|
-
|
108
|
-
out_edges = g.edges.get(node_name, [])
|
109
|
-
# Figure out if there are any routers for this node
|
110
|
-
for i, edge in enumerate(out_edges):
|
111
|
-
if edge in g.routers:
|
112
|
-
out_edges.remove(edge)
|
113
|
-
for output in function_outputs:
|
114
|
-
dynamic_edges = self._route(g, edge, output) or []
|
115
|
-
for dynamic_edge in dynamic_edges.edges:
|
116
|
-
if dynamic_edge in g.nodes:
|
117
|
-
print(
|
118
|
-
f"[bold]dynamic router returned node: {dynamic_edge}[/bold]"
|
119
|
-
)
|
120
|
-
out_edges.append(dynamic_edge)
|
121
|
-
for out_edge in out_edges:
|
122
|
-
for output in function_outputs:
|
123
|
-
queue.append((out_edge, output))
|
124
|
-
|
125
|
-
def _route(
|
126
|
-
self, g: Graph, node_name: str, input: IndexifyData
|
127
|
-
) -> Optional[RouterOutput]:
|
128
|
-
return g.invoke_router(node_name, input)
|
129
|
-
|
130
|
-
def graphs(self) -> str:
|
131
|
-
return list(self._graphs.keys())
|
132
|
-
|
133
|
-
def namespaces(self) -> str:
|
134
|
-
return "local"
|
135
|
-
|
136
|
-
def create_namespace(self, namespace: str):
|
137
|
-
pass
|
138
|
-
|
139
|
-
def rerun_graph(self, graph: str):
|
140
|
-
return super().rerun_graph(graph)
|
141
|
-
|
142
|
-
def invoke_graph_with_object(
|
143
|
-
self, graph: str, block_until_done: bool = False, **kwargs
|
144
|
-
) -> str:
|
145
|
-
graph: Graph = self._graphs[graph]
|
146
|
-
return self.run(graph, **kwargs)
|
147
|
-
|
148
|
-
def invoke_graph_with_file(
|
149
|
-
self,
|
150
|
-
graph: str,
|
151
|
-
path: str,
|
152
|
-
metadata: Optional[Dict[str, Json]] = None,
|
153
|
-
block_until_done: bool = False,
|
154
|
-
) -> str:
|
155
|
-
graph = self._graphs[graph]
|
156
|
-
with open(path, "rb") as f:
|
157
|
-
data = f.read()
|
158
|
-
file = File(data=data, metadata=metadata).model_dump()
|
159
|
-
return self.run(graph, file=file)
|
160
|
-
|
161
|
-
def graph_outputs(
|
162
|
-
self,
|
163
|
-
graph: str,
|
164
|
-
invocation_id: str,
|
165
|
-
fn_name: str,
|
166
|
-
) -> Union[Dict[str, List[Any]], List[Any]]:
|
167
|
-
if invocation_id not in self._results:
|
168
|
-
raise ValueError(f"no results found for graph {graph}")
|
169
|
-
if fn_name not in self._results[invocation_id]:
|
170
|
-
raise ValueError(f"no results found for fn {fn_name} on graph {graph}")
|
171
|
-
results = []
|
172
|
-
fn_model = self._graphs[graph].get_function(fn_name).get_output_model()
|
173
|
-
serializer = get_serializer(
|
174
|
-
self._graphs[graph].get_function(fn_name).indexify_function.payload_encoder
|
175
|
-
)
|
176
|
-
for result in self._results[invocation_id][fn_name]:
|
177
|
-
payload_dict = serializer.deserialize(result.payload)
|
178
|
-
if issubclass(fn_model, BaseModel) and isinstance(payload_dict, dict):
|
179
|
-
payload = fn_model.model_validate(payload_dict)
|
180
|
-
else:
|
181
|
-
payload = payload_dict
|
182
|
-
results.append(payload)
|
183
|
-
return results
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|