indexify 0.2.3__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
indexify/__init__.py CHANGED
@@ -1,23 +1,21 @@
1
1
  from . import data_loaders
2
- from .client import create_client
3
2
  from .functions_sdk.graph import Graph
4
3
  from .functions_sdk.image import Image
5
4
  from .functions_sdk.indexify_functions import (
6
5
  indexify_function,
7
6
  indexify_router,
8
7
  )
9
- from .local_client import LocalClient
10
- from .remote_client import RemoteClient
8
+ from .http_client import IndexifyClient
9
+ from .remote_graph import RemoteGraph
11
10
  from .settings import DEFAULT_SERVICE_URL
12
11
 
13
12
  __all__ = [
14
13
  "data_loaders",
15
14
  "Graph",
15
+ "RemoteGraph",
16
16
  "Image",
17
17
  "indexify_function",
18
18
  "indexify_router",
19
19
  "DEFAULT_SERVICE_URL",
20
- "RemoteClient",
21
- "LocalClient",
22
- "create_client",
20
+ "IndexifyClient",
23
21
  ]
indexify/cli.py CHANGED
@@ -19,7 +19,7 @@ from rich.theme import Theme
19
19
 
20
20
  from indexify.executor.agent import ExtractorAgent
21
21
  from indexify.executor.function_worker import FunctionWorker
22
- from indexify.functions_sdk.image import Image, DEFAULT_IMAGE
22
+ from indexify.functions_sdk.image import DEFAULT_IMAGE, Image
23
23
 
24
24
  custom_theme = Theme(
25
25
  {
@@ -1,5 +1,7 @@
1
1
  import inspect
2
+ import sys
2
3
  from collections import defaultdict
4
+ from queue import deque
3
5
  from typing import (
4
6
  Annotated,
5
7
  Any,
@@ -7,7 +9,6 @@ from typing import (
7
9
  Dict,
8
10
  List,
9
11
  Optional,
10
- Set,
11
12
  Type,
12
13
  Union,
13
14
  get_args,
@@ -16,16 +17,25 @@ from typing import (
16
17
 
17
18
  import cloudpickle
18
19
  import msgpack
20
+ from nanoid import generate
19
21
  from pydantic import BaseModel
20
22
  from typing_extensions import get_args, get_origin
21
23
 
22
24
  from .data_objects import IndexifyData, RouterOutput
25
+ from .graph_definition import (
26
+ ComputeGraphMetadata,
27
+ FunctionMetadata,
28
+ NodeMetadata,
29
+ RouterMetadata,
30
+ RuntimeInformation,
31
+ )
23
32
  from .graph_validation import validate_node, validate_route
24
33
  from .indexify_functions import (
25
34
  IndexifyFunction,
26
35
  IndexifyFunctionWrapper,
27
36
  IndexifyRouter,
28
37
  )
38
+ from .local_cache import CacheAwareFunctionWrapper
29
39
  from .object_serializer import CloudPickleSerializer, get_serializer
30
40
 
31
41
  RouterFn = Annotated[
@@ -61,41 +71,6 @@ def is_pydantic_model_from_annotation(type_annotation):
61
71
  return False
62
72
 
63
73
 
64
- class FunctionMetadata(BaseModel):
65
- name: str
66
- fn_name: str
67
- description: str
68
- reducer: bool = False
69
- image_name: str
70
- payload_encoder: str = "cloudpickle"
71
-
72
-
73
- class RouterMetadata(BaseModel):
74
- name: str
75
- description: str
76
- source_fn: str
77
- target_fns: List[str]
78
- image_name: str
79
- payload_encoder: str = "cloudpickle"
80
-
81
-
82
- class NodeMetadata(BaseModel):
83
- dynamic_router: Optional[RouterMetadata] = None
84
- compute_fn: Optional[FunctionMetadata] = None
85
-
86
-
87
- class ComputeGraphMetadata(BaseModel):
88
- name: str
89
- description: str
90
- start_node: NodeMetadata
91
- nodes: Dict[str, NodeMetadata]
92
- edges: Dict[str, List[str]]
93
- accumulator_zero_values: Dict[str, bytes] = {}
94
-
95
- def get_input_payload_serializer(self):
96
- return get_serializer(self.start_node.compute_fn.payload_encoder)
97
-
98
-
99
74
  class Graph:
100
75
  def __init__(
101
76
  self, name: str, start_node: IndexifyFunction, description: Optional[str] = None
@@ -110,6 +85,11 @@ class Graph:
110
85
  self.add_node(start_node)
111
86
  self._start_node: str = start_node.name
112
87
 
88
+ # Storage for local execution
89
+ self._results: Dict[str, Dict[str, List[IndexifyData]]] = {}
90
+ self._cache = CacheAwareFunctionWrapper("./indexify_local_runner_cache")
91
+ self._accumulator_values: Dict[str, Dict[str, IndexifyData]] = {}
92
+
113
93
  def get_function(self, name: str) -> IndexifyFunctionWrapper:
114
94
  if name not in self.nodes:
115
95
  raise ValueError(f"Function {name} not found in graph")
@@ -272,10 +252,123 @@ class Graph:
272
252
  image_name=node.image._image_name,
273
253
  )
274
254
  )
255
+
275
256
  return ComputeGraphMetadata(
276
257
  name=self.name,
277
258
  description=self.description or "",
278
259
  start_node=NodeMetadata(compute_fn=start_node),
279
260
  nodes=metadata_nodes,
280
261
  edges=metadata_edges,
262
+ runtime_information=RuntimeInformation(
263
+ major_version=sys.version_info.major,
264
+ minor_version=sys.version_info.minor,
265
+ ),
281
266
  )
267
+
268
+ def run(self, block_until_done: bool = False, **kwargs) -> str:
269
+ start_node = self.nodes[self._start_node]
270
+ serializer = get_serializer(start_node.payload_encoder)
271
+ input = IndexifyData(id=generate(), payload=serializer.serialize(kwargs))
272
+ print(f"[bold] Invoking {self._start_node}[/bold]")
273
+ outputs = defaultdict(list)
274
+ self._accumulator_values[input.id] = {}
275
+ for k, v in self.accumulator_zero_values.items():
276
+ node = self.nodes[k]
277
+ serializer = get_serializer(node.payload_encoder)
278
+ self._accumulator_values[input.id] = {
279
+ k: IndexifyData(payload=serializer.serialize(v))
280
+ }
281
+ self._results[input.id] = outputs
282
+ self._run(input, outputs)
283
+ return input.id
284
+
285
+ def _run(
286
+ self,
287
+ initial_input: IndexifyData,
288
+ outputs: Dict[str, List[bytes]],
289
+ ):
290
+ accumulator_values = self._accumulator_values[initial_input.id]
291
+ queue = deque([(self._start_node, initial_input)])
292
+ while queue:
293
+ node_name, input = queue.popleft()
294
+ node = self.nodes[node_name]
295
+ serializer = get_serializer(node.payload_encoder)
296
+ input_bytes = serializer.serialize(input)
297
+ cached_output_bytes: Optional[bytes] = self._cache.get(
298
+ self.name, node_name, input_bytes
299
+ )
300
+ if cached_output_bytes is not None:
301
+ print(
302
+ f"ran {node_name}: num outputs: {len(cached_output_bytes)} (cache hit)"
303
+ )
304
+ function_outputs: List[IndexifyData] = []
305
+ cached_output_list = serializer.deserialize_list(cached_output_bytes)
306
+ if accumulator_values.get(node_name, None) is not None:
307
+ accumulator_values[node_name] = cached_output_list[-1].model_copy()
308
+ outputs[node_name] = []
309
+ function_outputs.extend(cached_output_list)
310
+ outputs[node_name].extend(cached_output_list)
311
+ else:
312
+ function_outputs: List[IndexifyData] = self.invoke_fn_ser(
313
+ node_name, input, accumulator_values.get(node_name, None)
314
+ )
315
+ print(f"ran {node_name}: num outputs: {len(function_outputs)}")
316
+ if accumulator_values.get(node_name, None) is not None:
317
+ accumulator_values[node_name] = function_outputs[-1].model_copy()
318
+ outputs[node_name] = []
319
+ outputs[node_name].extend(function_outputs)
320
+ function_outputs_bytes: List[bytes] = [
321
+ serializer.serialize_list(function_outputs)
322
+ ]
323
+ self._cache.set(
324
+ self.name,
325
+ node_name,
326
+ input_bytes,
327
+ function_outputs_bytes,
328
+ )
329
+ if accumulator_values.get(node_name, None) is not None and queue:
330
+ print(
331
+ f"accumulator not none for {node_name}, continuing, len queue: {len(queue)}"
332
+ )
333
+ continue
334
+
335
+ out_edges = self.edges.get(node_name, [])
336
+ # Figure out if there are any routers for this node
337
+ for i, edge in enumerate(out_edges):
338
+ if edge in self.routers:
339
+ out_edges.remove(edge)
340
+ for output in function_outputs:
341
+ dynamic_edges = self._route(edge, output) or []
342
+ for dynamic_edge in dynamic_edges.edges:
343
+ if dynamic_edge in self.nodes:
344
+ print(
345
+ f"[bold]dynamic router returned node: {dynamic_edge}[/bold]"
346
+ )
347
+ out_edges.append(dynamic_edge)
348
+ for out_edge in out_edges:
349
+ for output in function_outputs:
350
+ queue.append((out_edge, output))
351
+
352
+ def _route(self, node_name: str, input: IndexifyData) -> Optional[RouterOutput]:
353
+ return self.invoke_router(node_name, input)
354
+
355
+ def get_output(
356
+ self,
357
+ invocation_id: str,
358
+ fn_name: str,
359
+ ) -> List[Any]:
360
+ results = self._results[invocation_id]
361
+ if fn_name not in results:
362
+ raise ValueError(f"no results found for fn {fn_name} on graph {self.name}")
363
+ fn = self.nodes[fn_name]
364
+ fn_model = self.get_function(fn_name).get_output_model()
365
+ serializer = get_serializer(fn.payload_encoder)
366
+ outputs = []
367
+ for result in results[fn_name]:
368
+ payload_dict = serializer.deserialize(result.payload)
369
+ if issubclass(fn_model, BaseModel) and isinstance(payload_dict, dict):
370
+ payload = fn_model.model_validate(payload_dict)
371
+ else:
372
+ payload = payload_dict
373
+ outputs.append(payload)
374
+ return outputs
@@ -0,0 +1,47 @@
1
+ from typing import Dict, List, Optional
2
+
3
+ from pydantic import BaseModel
4
+
5
+ from .object_serializer import get_serializer
6
+
7
+
8
+ class FunctionMetadata(BaseModel):
9
+ name: str
10
+ fn_name: str
11
+ description: str
12
+ reducer: bool = False
13
+ image_name: str
14
+ payload_encoder: str = "cloudpickle"
15
+
16
+
17
+ class RouterMetadata(BaseModel):
18
+ name: str
19
+ description: str
20
+ source_fn: str
21
+ target_fns: List[str]
22
+ image_name: str
23
+ payload_encoder: str = "cloudpickle"
24
+
25
+
26
+ class NodeMetadata(BaseModel):
27
+ dynamic_router: Optional[RouterMetadata] = None
28
+ compute_fn: Optional[FunctionMetadata] = None
29
+
30
+
31
+ # RuntimeInformation is a class that holds data about the environment in which the graph should run.
32
+ class RuntimeInformation(BaseModel):
33
+ major_version: int
34
+ minor_version: int
35
+
36
+
37
+ class ComputeGraphMetadata(BaseModel):
38
+ name: str
39
+ description: str
40
+ start_node: NodeMetadata
41
+ nodes: Dict[str, NodeMetadata]
42
+ edges: Dict[str, List[str]]
43
+ accumulator_zero_values: Dict[str, bytes] = {}
44
+ runtime_information: RuntimeInformation
45
+
46
+ def get_input_payload_serializer(self):
47
+ return get_serializer(self.start_node.compute_fn.payload_encoder)
@@ -42,25 +42,30 @@ def validate_route(
42
42
 
43
43
  if signature.return_annotation == inspect.Signature.empty:
44
44
  raise Exception(f"Function {from_node.name} has empty return type annotation")
45
-
45
+
46
46
  return_annotation = signature.return_annotation
47
47
 
48
- if hasattr(return_annotation, '__origin__') and return_annotation.__origin__ is Union:
48
+ if (
49
+ hasattr(return_annotation, "__origin__")
50
+ and return_annotation.__origin__ is Union
51
+ ):
49
52
  for arg in return_annotation.__args__:
50
- if hasattr(arg, 'name'):
53
+ if hasattr(arg, "name"):
51
54
  if arg not in to_nodes:
52
55
  raise Exception(
53
56
  f"Unable to find {arg.name} in to_nodes {[node.name for node in to_nodes]}"
54
57
  )
55
58
 
56
- if hasattr(return_annotation, '__origin__') and return_annotation.__origin__ is list:
59
+ if (
60
+ hasattr(return_annotation, "__origin__")
61
+ and return_annotation.__origin__ is list
62
+ ):
57
63
  union_args = return_annotation.__args__[0].__args__
58
64
  for arg in union_args:
59
- if hasattr(arg, 'name'):
65
+ if hasattr(arg, "name"):
60
66
  if arg not in to_nodes:
61
67
  raise Exception(
62
68
  f"Unable to find {arg.name} in to_nodes {[node.name for node in to_nodes]}"
63
69
  )
64
70
  else:
65
71
  raise Exception(f"Return type of {from_node.name} is not a Union")
66
-
@@ -10,7 +10,6 @@ from httpx_sse import connect_sse
10
10
  from pydantic import BaseModel, Json
11
11
  from rich import print
12
12
 
13
- from indexify.base_client import IndexifyClient
14
13
  from indexify.error import ApiException
15
14
  from indexify.functions_sdk.data_objects import IndexifyData
16
15
  from indexify.functions_sdk.graph import ComputeGraphMetadata, Graph
@@ -39,7 +38,7 @@ class GraphOutputs(BaseModel):
39
38
  outputs: List[GraphOutputMetadata]
40
39
 
41
40
 
42
- class RemoteClient(IndexifyClient):
41
+ class IndexifyClient:
43
42
  def __init__(
44
43
  self,
45
44
  service_url: str = DEFAULT_SERVICE_URL,
@@ -167,7 +166,6 @@ class RemoteClient(IndexifyClient):
167
166
  files={"code": serialized_code},
168
167
  data={"compute_graph": graph_metadata.model_dump_json(exclude_none=True)},
169
168
  )
170
- print(response.content.decode("utf-8"))
171
169
  response.raise_for_status()
172
170
  self._graphs[graph.name] = graph
173
171
 
@@ -0,0 +1,74 @@
1
+ from typing import Any, List, Optional
2
+
3
+ from indexify.functions_sdk.graph import Graph
4
+
5
+ from .http_client import IndexifyClient
6
+ from .settings import DEFAULT_SERVICE_URL
7
+
8
+
9
+ class RemoteGraph:
10
+ def __init__(
11
+ self,
12
+ name: str,
13
+ server_url: Optional[str] = DEFAULT_SERVICE_URL,
14
+ ):
15
+ self._name = name
16
+ self._client = IndexifyClient(service_url=server_url)
17
+
18
+ def run(self, block_until_done: bool = False, **kwargs) -> str:
19
+ """
20
+ Run the graph with the given inputs. The input is for the start function of the graph.
21
+ :param block_until_done: If True, the function will block until the graph execution is complete.
22
+ :param kwargs: The input to the start function of the graph. Pass the input as keyword arguments.
23
+ :return: The invocation ID of the graph execution.
24
+
25
+ Example:
26
+ @indexify_function()
27
+ def foo(x: int) -> int:
28
+ return x + 1
29
+ remote_graph = RemoteGraph.by_name("test")
30
+ invocation_id = remote_graph.run(x=1)
31
+ """
32
+ return self._client.invoke_graph_with_object(
33
+ self._name, block_until_done, **kwargs
34
+ )
35
+
36
+ @classmethod
37
+ def deploy(cls, g: Graph, server_url: Optional[str] = "http://localhost:8090"):
38
+ """
39
+ Create a new RemoteGraph from a local Graph object.
40
+ :param g: The local Graph object.
41
+ :param server_url: The URL of the server where the graph will be registered.
42
+ """
43
+ client = IndexifyClient(service_url=server_url)
44
+ client.register_compute_graph(g)
45
+ return cls(name=g.name, server_url=server_url)
46
+
47
+ @classmethod
48
+ def by_name(cls, name: str, server_url: Optional[str] = "http://localhost:8090"):
49
+ """
50
+ Create a handle to call a RemoteGraph by name.
51
+ :param name: The name of the graph.
52
+ :param server_url: The URL of the server where the graph is registered.
53
+ :return: A RemoteGraph object.
54
+ """
55
+ return cls(name=name, server_url=server_url)
56
+
57
+ def get_output(
58
+ self,
59
+ invocation_id: str,
60
+ fn_name: str,
61
+ ) -> List[Any]:
62
+ """
63
+ Returns the extracted objects by a graph for an ingested object. If the extractor name is provided, only the objects extracted by that extractor are returned.
64
+ If the extractor name is not provided, all the extracted objects are returned for the input object.
65
+ invocation_id: str: The ID of the ingested object
66
+ fn_name: Optional[str]: The name of the function whose output is to be returned if provided
67
+ return: List[Any]: Output of the function.
68
+ """
69
+
70
+ return self._client.graph_outputs(
71
+ graph=self._name,
72
+ invocation_id=invocation_id,
73
+ fn_name=fn_name,
74
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: indexify
3
- Version: 0.2.3
3
+ Version: 0.2.4
4
4
  Summary: Python Client for Indexify
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
@@ -1,7 +1,5 @@
1
- indexify/__init__.py,sha256=yJ3K_TyocH2EpFr6kEbKaOqfk8dA33fpoCB_QXV-rko,549
2
- indexify/base_client.py,sha256=Si1XnZ6X_mFvkYCnS6qx6axFsBpkrGiorqmKohFwvLQ,3324
3
- indexify/cli.py,sha256=kLKruCRNlo1xdezxYQoN6c9EpGWAeNiSs7kAjYfxCao,7311
4
- indexify/client.py,sha256=6cwCxBky6IJYu4caq0E6SMWIxf3nn5SX795moHfS4Cw,501
1
+ indexify/__init__.py,sha256=qgMBKVrM_tI-tFeWpE8ktlC5rcExk05nbWyFqxxqeEU,496
2
+ indexify/cli.py,sha256=mikNNHY58i2YL1oB_Me7I90JrSS6IPKGFENIklxJyYM,7311
5
3
  indexify/data_loaders/__init__.py,sha256=Y5NEuseTcYAICRiweYw5wBQ2m2YplbsY21I7df-rdi4,1339
6
4
  indexify/data_loaders/local_directory_loader.py,sha256=fCrgj5drnW71ZUdDDvcB1-VJjIs1w6Q8sEW0HSGSAiA,1247
7
5
  indexify/data_loaders/url_loader.py,sha256=32SERljcq1Xsi4RdLz2dgyk2TER5pQPTtXl3gUzwHbY,1533
@@ -15,19 +13,19 @@ indexify/executor/indexify_executor.py,sha256=2Ut_VX-Su_lm4b4aEROyRJ3gXx-uFHA-V7
15
13
  indexify/executor/runtime_probes.py,sha256=tvi8KCaQTVJqcyBJ4-jzEUAnQ01ZbMmjCxV2KJ96_PI,1449
16
14
  indexify/executor/task_reporter.py,sha256=gnnse0v6rjjni8lNzeb-ZYq6iF2DgafKoT7dcGUZhQ4,3716
17
15
  indexify/executor/task_store.py,sha256=q8s2gImsFffWeXQR0mk1Xlo1Aj_2GfclNPjQ2EA_YBo,3984
18
- indexify/foo,sha256=e385Ws-u8zx-LOq3tdfTa-siK9pMaccdAE8_0rrp_k4,5165
19
16
  indexify/functions_sdk/data_objects.py,sha256=2LqAWJ_S2Xkp4OQTmhd3InVIrBs7juV41udnSQFMMfM,840
20
- indexify/functions_sdk/graph.py,sha256=3-uALmIdZiHsxHKMk8rb_Mphb109G-AKfWmGGXS90uA,9972
21
- indexify/functions_sdk/graph_validation.py,sha256=y-f0ZNiGYl_fjPA7v9OJWtoUMPELgtVR_ifpgqZ0IoY,2465
17
+ indexify/functions_sdk/graph.py,sha256=iWJV9tZwPniEQq1le-FP8FYJ8lHq8IBJMJNPfv987ys,14806
18
+ indexify/functions_sdk/graph_definition.py,sha256=EJfC0MdKEbFF1CBaU0htrveSlcAQJCH96DLSNfZ02V4,1178
19
+ indexify/functions_sdk/graph_validation.py,sha256=XLHiC9PAtZungJLysU3hIUOPNDkO5TXUDZ_jiZ0H4hg,2508
22
20
  indexify/functions_sdk/image.py,sha256=euuz2QTZQoS-JmwnPmWJ8lfIgKzrSEsfkUc2qU26xjM,679
23
21
  indexify/functions_sdk/indexify_functions.py,sha256=xxgvnw0MQ_csIksunIdero8be0PR4mfwgoHp3UlkMZU,5851
24
22
  indexify/functions_sdk/local_cache.py,sha256=cNWF67zbhbTJe3g86hyLBy3Rqzs6dNvp2SjLazGZWvw,1348
25
23
  indexify/functions_sdk/object_serializer.py,sha256=Zz4GobW3ZamBBtFDF76QxU3TP6oJNdWnhsfKd0OUFoc,1660
26
- indexify/local_client.py,sha256=zNEoRhIx0v8BqWDP4NJiuxkP2RctjkLZInegEj07iSo,7218
27
- indexify/remote_client.py,sha256=oKgTqLbIxQVDqkMjQmNCOOEIM156UeYMC1jDWWSqBAQ,12297
24
+ indexify/http_client.py,sha256=Ntyvpljuq5W6keQAqQWsx-2FQNM2_tgZQk9FXa4A-qA,12187
25
+ indexify/remote_graph.py,sha256=kG6Jfof2_Ze_fkNWFYw08NJeN9Eprc2wExbFud4xN-Y,2799
28
26
  indexify/settings.py,sha256=LSaWZ0ADIVmUv6o6dHWRC3-Ry5uLbCw2sBSg1e_U7UM,99
29
- indexify-0.2.3.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
30
- indexify-0.2.3.dist-info/METADATA,sha256=bbQHvSEcmjSAuRvEQsyb4Xac6jovD76h2jkRA4e27Ck,6129
31
- indexify-0.2.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
32
- indexify-0.2.3.dist-info/entry_points.txt,sha256=Pih7WV-XMpAzI5dEvROcpLr-ybVhd9Y-AtuzBKUdcDs,49
33
- indexify-0.2.3.dist-info/RECORD,,
27
+ indexify-0.2.4.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
28
+ indexify-0.2.4.dist-info/METADATA,sha256=463jg3rRzHN8aWEb_fW_d7faC7XwCYmFJA8Rkkd11vs,6129
29
+ indexify-0.2.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
30
+ indexify-0.2.4.dist-info/entry_points.txt,sha256=Pih7WV-XMpAzI5dEvROcpLr-ybVhd9Y-AtuzBKUdcDs,49
31
+ indexify-0.2.4.dist-info/RECORD,,
indexify/base_client.py DELETED
@@ -1,98 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from typing import Any, Dict, List, Optional, Union
3
-
4
- from pydantic import Json
5
-
6
- from indexify.functions_sdk.graph import Graph
7
-
8
-
9
- class IndexifyClient(ABC):
10
-
11
- ### Operational APIs
12
- @abstractmethod
13
- def register_compute_graph(self, graph: Graph):
14
- """
15
- Register a compute graph.
16
- graph: Graph: The graph to be registered
17
- """
18
- pass
19
-
20
- @abstractmethod
21
- def graphs(self) -> List[str]:
22
- """
23
- Get the graphs.
24
- return: List[str]: The graphs
25
- """
26
- pass
27
-
28
- @abstractmethod
29
- def namespaces(self) -> List[str]:
30
- """
31
- Get the namespaces.
32
- return: List[str]: The namespaces
33
- """
34
- pass
35
-
36
- @abstractmethod
37
- def create_namespace(self, namespace: str):
38
- """
39
- Create a namespace.
40
- namespace: str: The name of the namespace to be created
41
- """
42
- pass
43
-
44
- ### Ingestion APIs
45
- @abstractmethod
46
- def invoke_graph_with_object(
47
- self, graph: str, block_until_done: bool = False, **kwargs
48
- ) -> str:
49
- """
50
- Invokes a graph with an input object.
51
- graph: str: The name of the graph to invoke
52
- kwargs: Any: Named arguments to be passed to the graph. Example: url="https://www.google.com", web_page_text="Hello world!"
53
- return: str: The ID of the ingested object
54
- """
55
- pass
56
-
57
- @abstractmethod
58
- def invoke_graph_with_file(
59
- self,
60
- graph: str,
61
- path: str,
62
- metadata: Optional[Dict[str, Json]] = None,
63
- block_until_done: bool = False,
64
- ) -> str:
65
- """
66
- Invokes a graph with an input file. The file's mimetype is appropriately detected.
67
- graph: str: The name of the graph to invoke
68
- path: str: The path to the file to be ingested
69
- return: str: The ID of the ingested object
70
- """
71
- pass
72
-
73
- @abstractmethod
74
- def rerun_graph(self, graph: str):
75
- """
76
- Rerun a graph.
77
- graph: str: The name of the graph to rerun
78
- """
79
- pass
80
-
81
- ### Retrieval APIs
82
- @abstractmethod
83
- def graph_outputs(
84
- self,
85
- graph: str,
86
- invocation_id: str,
87
- fn_name: Optional[str],
88
- ) -> Union[Dict[str, List[Any]], List[Any]]:
89
- """
90
- Returns the extracted objects by a graph for an ingested object. If the extractor name is provided, only the objects extracted by that extractor are returned.
91
- If the extractor name is not provided, all the extracted objects are returned for the input object.
92
- graph: str: The name of the graph
93
- ingested_object_id: str: The ID of the ingested object
94
- extractor_name: Optional[str]: The name of the extractor whose output is to be returned if provided
95
- block_until_done: bool = True: If True, the method will block until the extraction is done. If False, the method will return immediately.
96
- return: Union[Dict[str, List[Any]], List[Any]]: The extracted objects. If the extractor name is provided, the output is a list of extracted objects by the extractor. If the extractor name is not provided, the output is a dictionary with the extractor name as the key and the extracted objects as the value. If no objects are found, an empty list is returned.
97
- """
98
- pass
indexify/client.py DELETED
@@ -1,18 +0,0 @@
1
- from typing import Optional
2
-
3
- from .base_client import IndexifyClient
4
- from .local_client import LocalClient
5
- from .remote_client import RemoteClient
6
- from .settings import DEFAULT_SERVICE_URL
7
-
8
-
9
- def create_client(
10
- service_url: str = DEFAULT_SERVICE_URL,
11
- config_path: Optional[str] = None,
12
- in_process: bool = False,
13
- *args,
14
- **kwargs,
15
- ) -> IndexifyClient:
16
- if in_process:
17
- return LocalClient()
18
- return RemoteClient(config_path=config_path, service_url=service_url, **kwargs)
indexify/foo DELETED
@@ -1,72 +0,0 @@
1
- ./functions_sdk/local_cache.py: outputs = []
2
- ./functions_sdk/local_cache.py: outputs.append(f.read())
3
- ./functions_sdk/local_cache.py: return outputs
4
- ./functions_sdk/graph.py: outputs: List[Any] = fn_wrapper.run_fn(input, acc=acc)
5
- ./functions_sdk/graph.py: IndexifyData(payload=CborSerializer.serialize(output)) for output in outputs
6
- ./functions_sdk/output_serializer.py:class OutputSerializer:
7
- ./functions_sdk/output_serializer.py: normalized_outputs = []
8
- ./functions_sdk/output_serializer.py: normalized_outputs.append(
9
- ./functions_sdk/output_serializer.py: return normalized_outputs
10
- ./base_client.py: def graph_outputs(
11
- ./remote_client.py:class GraphOutputs(BaseModel):
12
- ./remote_client.py: outputs: List[GraphOutputMetadata]
13
- ./remote_client.py: def graph_outputs(
14
- ./remote_client.py: f"namespaces/{self.namespace}/compute_graphs/{graph}/invocations/{invocation_id}/outputs",
15
- ./remote_client.py: graph_outputs = GraphOutputs(**response.json())
16
- ./remote_client.py: outputs = []
17
- ./remote_client.py: for output in graph_outputs.outputs:
18
- ./remote_client.py: outputs.append(output)
19
- ./remote_client.py: return outputs
20
- ./executor/task_reporter.py: fn_outputs = []
21
- ./executor/task_reporter.py: f"[bold]task-reporter[/bold] uploading output of size: {len(completed_task.outputs)}"
22
- ./executor/task_reporter.py: for output in completed_task.outputs:
23
- ./executor/task_reporter.py: fn_outputs.append(
24
- ./executor/task_reporter.py: ("node_outputs", (nanoid.generate(), io.BytesIO(output_bytes)))
25
- ./executor/task_reporter.py: fn_outputs.append(
26
- ./executor/task_reporter.py: fn_outputs.append(
27
- ./executor/task_reporter.py: fn_outputs.append(
28
- ./executor/task_reporter.py: if fn_outputs and len(fn_outputs) > 0:
29
- ./executor/task_reporter.py: kwargs["files"] = fn_outputs
30
- ./executor/task_store.py: outputs: List[IndexifyData]
31
- ./executor/task_store.py: task_id=task_id, task_outcome="failed", outputs=[]
32
- ./executor/downloader.py: url = f"{self.base_url}/internal/fn_outputs/{task.input_key}"
33
- ./executor/function_worker.py: outputs: Union[List[IndexifyData], RouterOutput]
34
- ./executor/function_worker.py: indexify_data=result.outputs,
35
- ./executor/function_worker.py: return FunctionOutput(outputs=output, reducer=is_reducer)
36
- ./executor/agent.py: f"Outputs: {len(task_outcome.outputs)}",
37
- ./executor/agent.py: outputs=[],
38
- ./executor/agent.py: outputs=[],
39
- ./executor/agent.py: outputs=[],
40
- ./executor/agent.py: outputs: FunctionWorkerOutput = await async_task
41
- ./executor/agent.py: outputs if isinstance(outputs, RouterOutput) else None
42
- ./executor/agent.py: if outputs.exception:
43
- ./executor/agent.py: fn_outputs = []
44
- ./executor/agent.py: fn_outputs = (
45
- ./executor/agent.py: outputs.indexify_data if not isinstance(outputs, RouterOutput) else []
46
- ./executor/agent.py: outputs=fn_outputs,
47
- ./executor/agent.py: errors=outputs.exception,
48
- ./executor/agent.py: stdout=outputs.stdout,
49
- ./executor/agent.py: stderr=outputs.stderr,
50
- ./executor/agent.py: reducer=outputs.reducer,
51
- ./executor/agent.py: outputs=[],
52
- ./local_client.py:# Holds the outputs of a
53
- ./local_client.py: outputs: Dict[str, List[IndexifyData]]
54
- ./local_client.py: outputs = defaultdict(list)
55
- ./local_client.py: self._results[input.id] = outputs
56
- ./local_client.py: self._run(g, input, outputs)
57
- ./local_client.py: outputs: Dict[str, List[bytes]],
58
- ./local_client.py: f"ran {node_name}: num outputs: {len(cached_output_bytes)} (cache hit)"
59
- ./local_client.py: function_outputs: List[IndexifyData] = []
60
- ./local_client.py: function_outputs.append(output)
61
- ./local_client.py: outputs[node_name].append(output)
62
- ./local_client.py: function_outputs: List[IndexifyData] = g.invoke_fn_ser(
63
- ./local_client.py: print(f"ran {node_name}: num outputs: {len(function_outputs)}")
64
- ./local_client.py: self._accumulators[node_name] = function_outputs[-1].model_copy()
65
- ./local_client.py: outputs[node_name] = []
66
- ./local_client.py: outputs[node_name].extend(function_outputs)
67
- ./local_client.py: function_outputs_bytes: List[bytes] = [
68
- ./local_client.py: for function_output in function_outputs
69
- ./local_client.py: function_outputs_bytes,
70
- ./local_client.py: for output in function_outputs:
71
- ./local_client.py: for output in function_outputs:
72
- ./local_client.py: def graph_outputs(
indexify/local_client.py DELETED
@@ -1,183 +0,0 @@
1
- from collections import defaultdict
2
- from queue import deque
3
- from typing import Any, Dict, List, Optional, Type, Union
4
-
5
- from nanoid import generate
6
- from pydantic import BaseModel, Json
7
- from rich import print
8
-
9
- from indexify.base_client import IndexifyClient
10
- from indexify.functions_sdk.data_objects import (
11
- File,
12
- IndexifyData,
13
- RouterOutput,
14
- )
15
- from indexify.functions_sdk.graph import Graph
16
- from indexify.functions_sdk.local_cache import CacheAwareFunctionWrapper
17
- from indexify.functions_sdk.object_serializer import get_serializer
18
-
19
-
20
- # Holds the outputs of a
21
- class ContentTree(BaseModel):
22
- id: str
23
- outputs: Dict[str, List[IndexifyData]]
24
-
25
-
26
- class LocalClient(IndexifyClient):
27
- def __init__(self, cache_dir: str = "./indexify_local_runner_cache"):
28
- self._cache_dir = cache_dir
29
- self._graphs: Dict[str, Graph] = {}
30
- self._results: Dict[str, Dict[str, List[IndexifyData]]] = {}
31
- self._cache = CacheAwareFunctionWrapper(self._cache_dir)
32
- self._accumulators: Dict[str, Dict[str, IndexifyData]] = {}
33
-
34
- def register_compute_graph(self, graph: Graph):
35
- self._graphs[graph.name] = graph
36
-
37
- def run_from_serialized_code(self, code: bytes, **kwargs):
38
- g = Graph.deserialize(graph=code)
39
- self.run(g, **kwargs)
40
-
41
- def run(self, g: Graph, **kwargs):
42
- serializer = get_serializer(
43
- g.get_function(g._start_node).indexify_function.payload_encoder
44
- )
45
- input = IndexifyData(id=generate(), payload=serializer.serialize(kwargs))
46
- print(f"[bold] Invoking {g._start_node}[/bold]")
47
- outputs = defaultdict(list)
48
- for k, v in g.get_accumulators().items():
49
- serializer = get_serializer(
50
- g.get_function(k).indexify_function.payload_encoder
51
- )
52
- self._accumulators[k] = IndexifyData(payload=serializer.serialize(v))
53
- self._results[input.id] = outputs
54
- self._run(g, input, outputs)
55
- return input.id
56
-
57
- def _run(
58
- self,
59
- g: Graph,
60
- initial_input: bytes,
61
- outputs: Dict[str, List[bytes]],
62
- ):
63
- queue = deque([(g._start_node, initial_input)])
64
- while queue:
65
- node_name, input = queue.popleft()
66
- serializer = get_serializer(
67
- g.get_function(node_name).indexify_function.payload_encoder
68
- )
69
- input_bytes = serializer.serialize(input)
70
- cached_output_bytes: Optional[bytes] = self._cache.get(
71
- g.name, node_name, input_bytes
72
- )
73
- if cached_output_bytes is not None:
74
- print(
75
- f"ran {node_name}: num outputs: {len(cached_output_bytes)} (cache hit)"
76
- )
77
- function_outputs: List[IndexifyData] = []
78
- cached_output_list = serializer.deserialize_list(cached_output_bytes)
79
- if self._accumulators.get(node_name, None) is not None:
80
- self._accumulators[node_name] = cached_output_list[-1].model_copy()
81
- outputs[node_name] = []
82
- function_outputs.extend(cached_output_list)
83
- outputs[node_name].extend(cached_output_list)
84
- else:
85
- function_outputs: List[IndexifyData] = g.invoke_fn_ser(
86
- node_name, input, self._accumulators.get(node_name, None)
87
- )
88
- print(f"ran {node_name}: num outputs: {len(function_outputs)}")
89
- if self._accumulators.get(node_name, None) is not None:
90
- self._accumulators[node_name] = function_outputs[-1].model_copy()
91
- outputs[node_name] = []
92
- outputs[node_name].extend(function_outputs)
93
- function_outputs_bytes: List[bytes] = [
94
- serializer.serialize_list(function_outputs)
95
- ]
96
- self._cache.set(
97
- g.name,
98
- node_name,
99
- input_bytes,
100
- function_outputs_bytes,
101
- )
102
- if self._accumulators.get(node_name, None) is not None and queue:
103
- print(
104
- f"accumulator not none for {node_name}, continuing, len queue: {len(queue)}"
105
- )
106
- continue
107
-
108
- out_edges = g.edges.get(node_name, [])
109
- # Figure out if there are any routers for this node
110
- for i, edge in enumerate(out_edges):
111
- if edge in g.routers:
112
- out_edges.remove(edge)
113
- for output in function_outputs:
114
- dynamic_edges = self._route(g, edge, output) or []
115
- for dynamic_edge in dynamic_edges.edges:
116
- if dynamic_edge in g.nodes:
117
- print(
118
- f"[bold]dynamic router returned node: {dynamic_edge}[/bold]"
119
- )
120
- out_edges.append(dynamic_edge)
121
- for out_edge in out_edges:
122
- for output in function_outputs:
123
- queue.append((out_edge, output))
124
-
125
- def _route(
126
- self, g: Graph, node_name: str, input: IndexifyData
127
- ) -> Optional[RouterOutput]:
128
- return g.invoke_router(node_name, input)
129
-
130
- def graphs(self) -> str:
131
- return list(self._graphs.keys())
132
-
133
- def namespaces(self) -> str:
134
- return "local"
135
-
136
- def create_namespace(self, namespace: str):
137
- pass
138
-
139
- def rerun_graph(self, graph: str):
140
- return super().rerun_graph(graph)
141
-
142
- def invoke_graph_with_object(
143
- self, graph: str, block_until_done: bool = False, **kwargs
144
- ) -> str:
145
- graph: Graph = self._graphs[graph]
146
- return self.run(graph, **kwargs)
147
-
148
- def invoke_graph_with_file(
149
- self,
150
- graph: str,
151
- path: str,
152
- metadata: Optional[Dict[str, Json]] = None,
153
- block_until_done: bool = False,
154
- ) -> str:
155
- graph = self._graphs[graph]
156
- with open(path, "rb") as f:
157
- data = f.read()
158
- file = File(data=data, metadata=metadata).model_dump()
159
- return self.run(graph, file=file)
160
-
161
- def graph_outputs(
162
- self,
163
- graph: str,
164
- invocation_id: str,
165
- fn_name: str,
166
- ) -> Union[Dict[str, List[Any]], List[Any]]:
167
- if invocation_id not in self._results:
168
- raise ValueError(f"no results found for graph {graph}")
169
- if fn_name not in self._results[invocation_id]:
170
- raise ValueError(f"no results found for fn {fn_name} on graph {graph}")
171
- results = []
172
- fn_model = self._graphs[graph].get_function(fn_name).get_output_model()
173
- serializer = get_serializer(
174
- self._graphs[graph].get_function(fn_name).indexify_function.payload_encoder
175
- )
176
- for result in self._results[invocation_id][fn_name]:
177
- payload_dict = serializer.deserialize(result.payload)
178
- if issubclass(fn_model, BaseModel) and isinstance(payload_dict, dict):
179
- payload = fn_model.model_validate(payload_dict)
180
- else:
181
- payload = payload_dict
182
- results.append(payload)
183
- return results