indexify 0.2.22__tar.gz → 0.2.24__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {indexify-0.2.22 → indexify-0.2.24}/PKG-INFO +2 -1
  2. {indexify-0.2.22 → indexify-0.2.24}/indexify/__init__.py +4 -0
  3. {indexify-0.2.22 → indexify-0.2.24}/indexify/cli.py +6 -3
  4. {indexify-0.2.22 → indexify-0.2.24}/indexify/error.py +1 -1
  5. {indexify-0.2.22 → indexify-0.2.24}/indexify/executor/agent.py +27 -11
  6. {indexify-0.2.22 → indexify-0.2.24}/indexify/executor/api_objects.py +1 -0
  7. {indexify-0.2.22 → indexify-0.2.24}/indexify/executor/executor_tasks.py +1 -0
  8. {indexify-0.2.22 → indexify-0.2.24}/indexify/executor/function_worker.py +43 -14
  9. {indexify-0.2.22 → indexify-0.2.24}/indexify/executor/image_dependency_installer.py +18 -6
  10. {indexify-0.2.22 → indexify-0.2.24}/indexify/executor/runtime_probes.py +11 -0
  11. {indexify-0.2.22 → indexify-0.2.24}/indexify/executor/task_reporter.py +1 -12
  12. {indexify-0.2.22 → indexify-0.2.24}/indexify/executor/task_store.py +0 -1
  13. {indexify-0.2.22 → indexify-0.2.24}/indexify/functions_sdk/data_objects.py +2 -3
  14. {indexify-0.2.22 → indexify-0.2.24}/indexify/functions_sdk/graph.py +21 -11
  15. {indexify-0.2.22 → indexify-0.2.24}/indexify/functions_sdk/graph_definition.py +1 -1
  16. {indexify-0.2.22 → indexify-0.2.24}/indexify/functions_sdk/indexify_functions.py +58 -43
  17. {indexify-0.2.22 → indexify-0.2.24}/indexify/functions_sdk/object_serializer.py +21 -2
  18. {indexify-0.2.22 → indexify-0.2.24}/indexify/functions_sdk/pipeline.py +3 -0
  19. {indexify-0.2.22 → indexify-0.2.24}/indexify/http_client.py +39 -8
  20. {indexify-0.2.22 → indexify-0.2.24}/indexify/remote_graph.py +7 -2
  21. indexify-0.2.24/indexify/settings.py +1 -0
  22. {indexify-0.2.22 → indexify-0.2.24}/pyproject.toml +2 -1
  23. indexify-0.2.22/indexify/settings.py +0 -2
  24. {indexify-0.2.22 → indexify-0.2.24}/LICENSE.txt +0 -0
  25. {indexify-0.2.22 → indexify-0.2.24}/README.md +0 -0
  26. {indexify-0.2.22 → indexify-0.2.24}/indexify/data_loaders/__init__.py +0 -0
  27. {indexify-0.2.22 → indexify-0.2.24}/indexify/data_loaders/local_directory_loader.py +0 -0
  28. {indexify-0.2.22 → indexify-0.2.24}/indexify/data_loaders/url_loader.py +0 -0
  29. {indexify-0.2.22 → indexify-0.2.24}/indexify/executor/downloader.py +0 -0
  30. {indexify-0.2.22 → indexify-0.2.24}/indexify/executor/indexify_executor.py +0 -0
  31. {indexify-0.2.22 → indexify-0.2.24}/indexify/functions_sdk/graph_validation.py +0 -0
  32. {indexify-0.2.22 → indexify-0.2.24}/indexify/functions_sdk/image.py +0 -0
  33. {indexify-0.2.22 → indexify-0.2.24}/indexify/functions_sdk/local_cache.py +0 -0
  34. {indexify-0.2.22 → indexify-0.2.24}/indexify/remote_pipeline.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: indexify
3
- Version: 0.2.22
3
+ Version: 0.2.24
4
4
  Summary: Python Client for Indexify
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
@@ -18,6 +18,7 @@ Requires-Dist: cloudpickle (>=3.1.0,<4.0.0)
18
18
  Requires-Dist: docker (>=7.1.0,<8.0.0)
19
19
  Requires-Dist: httpx-sse (>=0.4.0,<0.5.0)
20
20
  Requires-Dist: httpx[http2] (>=0,<1)
21
+ Requires-Dist: jsonpickle (>=3.3.0,<4.0.0)
21
22
  Requires-Dist: msgpack (>=1.1.0,<2.0.0)
22
23
  Requires-Dist: nanoid (>=2.0.0,<3.0.0)
23
24
  Requires-Dist: pydantic (>=2.9.2,<3.0.0)
@@ -2,6 +2,8 @@ from . import data_loaders
2
2
  from .functions_sdk.graph import Graph
3
3
  from .functions_sdk.image import Image
4
4
  from .functions_sdk.indexify_functions import (
5
+ IndexifyFunction,
6
+ get_ctx,
5
7
  indexify_function,
6
8
  indexify_router,
7
9
  )
@@ -19,6 +21,8 @@ __all__ = [
19
21
  "RemotePipeline",
20
22
  "Image",
21
23
  "indexify_function",
24
+ "get_ctx",
25
+ "IndexifyFunction",
22
26
  "indexify_router",
23
27
  "DEFAULT_SERVICE_URL",
24
28
  "IndexifyClient",
@@ -170,6 +170,9 @@ def executor(
170
170
  name_alias: Optional[str] = typer.Option(
171
171
  None, help="Name alias for the executor if it's spun up with the base image"
172
172
  ),
173
+ image_version: Optional[int] = typer.Option(
174
+ "1", help="Requested Image Version for this executor"
175
+ ),
173
176
  ):
174
177
  id = nanoid.generate()
175
178
  console.print(
@@ -179,13 +182,13 @@ def executor(
179
182
  f"Server address: {server_addr}\n"
180
183
  f"Executor ID: {id}\n"
181
184
  f"Executor cache: {executor_cache}\n"
182
- f"Name Alias: {name_alias}",
185
+ f"Name Alias: {name_alias}"
186
+ f"Image Version: {image_version}\n",
183
187
  title="Agent Configuration",
184
188
  border_style="info",
185
189
  )
186
190
  )
187
191
 
188
- function_worker = FunctionWorker(workers=workers)
189
192
  from pathlib import Path
190
193
 
191
194
  executor_cache = Path(executor_cache).expanduser().absolute()
@@ -196,11 +199,11 @@ def executor(
196
199
  agent = ExtractorAgent(
197
200
  id,
198
201
  num_workers=workers,
199
- function_worker=function_worker,
200
202
  server_addr=server_addr,
201
203
  config_path=config_path,
202
204
  code_path=executor_cache,
203
205
  name_alias=name_alias,
206
+ image_version=image_version,
204
207
  )
205
208
 
206
209
  try:
@@ -5,4 +5,4 @@ class ApiException(Exception):
5
5
 
6
6
  class GraphStillProcessing(Exception):
7
7
  def __init__(self) -> None:
8
- super().__init__("graph is still processing")
8
+ super().__init__("graph is still processing")
@@ -1,14 +1,12 @@
1
1
  import asyncio
2
2
  import json
3
3
  import ssl
4
+ import traceback
4
5
  from concurrent.futures.process import BrokenProcessPool
5
6
  from importlib.metadata import version
6
- import traceback
7
7
  from typing import Dict, List, Optional
8
8
 
9
9
  import httpx
10
- from indexify.functions_sdk.graph_definition import ComputeGraphMetadata
11
- from indexify.http_client import IndexifyClient
12
10
  import yaml
13
11
  from httpx_sse import aconnect_sse
14
12
  from pydantic import BaseModel
@@ -20,10 +18,12 @@ from rich.theme import Theme
20
18
  from indexify.functions_sdk.data_objects import (
21
19
  FunctionWorkerOutput,
22
20
  IndexifyData,
23
- RouterOutput,
24
21
  )
25
- from . import image_dependency_installer
22
+ from indexify.functions_sdk.graph_definition import ComputeGraphMetadata
23
+ from indexify.http_client import IndexifyClient
26
24
 
25
+ from ..functions_sdk.image import ImageInformation
26
+ from . import image_dependency_installer
27
27
  from .api_objects import ExecutorMetadata, Task
28
28
  from .downloader import DownloadedInputs, Downloader
29
29
  from .executor_tasks import DownloadGraphTask, DownloadInputTask, ExtractTask
@@ -31,7 +31,6 @@ from .function_worker import FunctionWorker
31
31
  from .runtime_probes import ProbeInfo, RuntimeProbes
32
32
  from .task_reporter import TaskReporter
33
33
  from .task_store import CompletedTask, TaskStore
34
- from ..functions_sdk.image import ImageInformation
35
34
 
36
35
  custom_theme = Theme(
37
36
  {
@@ -60,12 +59,13 @@ class ExtractorAgent:
60
59
  executor_id: str,
61
60
  num_workers,
62
61
  code_path: str,
63
- function_worker: FunctionWorker,
64
62
  server_addr: str = "localhost:8900",
65
63
  config_path: Optional[str] = None,
66
64
  name_alias: Optional[str] = None,
65
+ image_version: Optional[int] = None,
67
66
  ):
68
67
  self.name_alias = name_alias
68
+ self.image_version = image_version
69
69
 
70
70
  self._probe = RuntimeProbes()
71
71
 
@@ -111,7 +111,12 @@ class ExtractorAgent:
111
111
 
112
112
  self._task_store: TaskStore = TaskStore()
113
113
  self._executor_id = executor_id
114
- self._function_worker = function_worker
114
+ self._function_worker = FunctionWorker(
115
+ workers=num_workers,
116
+ indexify_client=IndexifyClient(
117
+ service_url=f"{self._protocol}://{server_addr}"
118
+ ),
119
+ )
115
120
  self._has_registered = False
116
121
  self._server_addr = server_addr
117
122
  self._base_url = f"{self._protocol}://{self._server_addr}"
@@ -195,7 +200,7 @@ class ExtractorAgent:
195
200
  task, self._protocol, self._server_addr
196
201
  )
197
202
  image_dependency_installer.executor_image_builder(
198
- image_info, self.name_alias
203
+ image_info, self.name_alias, self.image_version
199
204
  )
200
205
  self._require_image_bootstrap = False
201
206
  except Exception as e:
@@ -309,7 +314,7 @@ class ExtractorAgent:
309
314
  task=async_task.task,
310
315
  task_outcome="failure",
311
316
  outputs=[],
312
- errors=str(async_task.exception()),
317
+ stderr=str(async_task.exception()),
313
318
  )
314
319
  self._task_store.complete(outcome=completed_task)
315
320
  continue
@@ -326,7 +331,6 @@ class ExtractorAgent:
326
331
  task_outcome=task_outcome,
327
332
  outputs=outputs.fn_outputs,
328
333
  router_output=outputs.router_output,
329
- errors=outputs.exception,
330
334
  stdout=outputs.stdout,
331
335
  stderr=outputs.stderr,
332
336
  reducer=outputs.reducer,
@@ -378,11 +382,18 @@ class ExtractorAgent:
378
382
  else runtime_probe.image_name
379
383
  )
380
384
 
385
+ image_version: int = (
386
+ self.image_version
387
+ if self.image_version is not None
388
+ else runtime_probe.image_version
389
+ )
390
+
381
391
  data = ExecutorMetadata(
382
392
  id=self._executor_id,
383
393
  executor_version=executor_version,
384
394
  addr="",
385
395
  image_name=image_name,
396
+ image_version=image_version,
386
397
  labels=runtime_probe.labels,
387
398
  ).model_dump()
388
399
 
@@ -406,6 +417,11 @@ class ExtractorAgent:
406
417
  json=data,
407
418
  headers={"Content-Type": "application/json"},
408
419
  ) as event_source:
420
+ if not event_source.response.is_success:
421
+ resp = await event_source.response.aread().decode('utf-8')
422
+ console.print(f"failed to register: {str(resp)}")
423
+ await asyncio.sleep(5)
424
+ continue
409
425
  console.print(
410
426
  Text("executor registered successfully", style="bold green")
411
427
  )
@@ -21,6 +21,7 @@ class ExecutorMetadata(BaseModel):
21
21
  executor_version: str
22
22
  addr: str
23
23
  image_name: str
24
+ image_version: int
24
25
  labels: Dict[str, Any]
25
26
 
26
27
 
@@ -66,6 +66,7 @@ class ExtractTask(asyncio.Task):
66
66
  init_value=init_value,
67
67
  code_path=code_path,
68
68
  version=task.graph_version,
69
+ invocation_id=task.invocation_id,
69
70
  ),
70
71
  **kwargs,
71
72
  )
@@ -6,6 +6,7 @@ import cloudpickle
6
6
  from pydantic import BaseModel
7
7
  from rich import print
8
8
 
9
+ from indexify import IndexifyClient
9
10
  from indexify.functions_sdk.data_objects import (
10
11
  FunctionWorkerOutput,
11
12
  IndexifyData,
@@ -13,6 +14,7 @@ from indexify.functions_sdk.data_objects import (
13
14
  )
14
15
  from indexify.functions_sdk.indexify_functions import (
15
16
  FunctionCallResult,
17
+ GraphInvocationContext,
16
18
  IndexifyFunctionWrapper,
17
19
  RouterCallResult,
18
20
  )
@@ -38,13 +40,18 @@ class FunctionOutput(BaseModel):
38
40
  router_output: Optional[RouterOutput]
39
41
  reducer: bool = False
40
42
  success: bool = True
41
- exception: Optional[str] = None
42
43
  stdout: str = ""
43
44
  stderr: str = ""
44
45
 
45
46
 
46
47
  def _load_function(
47
- namespace: str, graph_name: str, fn_name: str, code_path: str, version: int
48
+ namespace: str,
49
+ graph_name: str,
50
+ fn_name: str,
51
+ code_path: str,
52
+ version: int,
53
+ invocation_id: str,
54
+ indexify_client: IndexifyClient,
48
55
  ):
49
56
  """Load an extractor to the memory: extractor_wrapper_map."""
50
57
  global function_wrapper_map
@@ -54,18 +61,28 @@ def _load_function(
54
61
  with open(code_path, "rb") as f:
55
62
  code = f.read()
56
63
  pickled_functions = cloudpickle.loads(code)
64
+ context = GraphInvocationContext(
65
+ invocation_id=invocation_id,
66
+ graph_name=graph_name,
67
+ graph_version=str(version),
68
+ indexify_client=indexify_client,
69
+ )
57
70
  function_wrapper = IndexifyFunctionWrapper(
58
- cloudpickle.loads(pickled_functions[fn_name])
71
+ cloudpickle.loads(pickled_functions[fn_name]),
72
+ context,
59
73
  )
60
74
  function_wrapper_map[key] = function_wrapper
61
75
 
62
76
 
63
77
  class FunctionWorker:
64
- def __init__(self, workers: int = 1) -> None:
78
+ def __init__(
79
+ self, workers: int = 1, indexify_client: IndexifyClient = None
80
+ ) -> None:
65
81
  self._executor: concurrent.futures.ProcessPoolExecutor = (
66
82
  concurrent.futures.ProcessPoolExecutor(max_workers=workers)
67
83
  )
68
84
  self._workers = workers
85
+ self._indexify_client = indexify_client
69
86
 
70
87
  async def async_submit(
71
88
  self,
@@ -76,15 +93,23 @@ class FunctionWorker:
76
93
  code_path: str,
77
94
  version: int,
78
95
  init_value: Optional[IndexifyData] = None,
96
+ invocation_id: Optional[str] = None,
79
97
  ) -> FunctionWorkerOutput:
80
98
  try:
81
99
  result = _run_function(
82
- namespace, graph_name, fn_name, input, code_path, version, init_value
100
+ namespace,
101
+ graph_name,
102
+ fn_name,
103
+ input,
104
+ code_path,
105
+ version,
106
+ init_value,
107
+ invocation_id,
108
+ self._indexify_client,
83
109
  )
84
110
  # TODO - bring back running in a separate process
85
111
  except Exception as e:
86
112
  return FunctionWorkerOutput(
87
- exception=str(e),
88
113
  stdout=e.stdout,
89
114
  stderr=e.stderr,
90
115
  reducer=e.is_reducer,
@@ -94,7 +119,6 @@ class FunctionWorker:
94
119
  return FunctionWorkerOutput(
95
120
  fn_outputs=result.fn_outputs,
96
121
  router_output=result.router_output,
97
- exception=result.exception,
98
122
  stdout=result.stdout,
99
123
  stderr=result.stderr,
100
124
  reducer=result.reducer,
@@ -113,6 +137,8 @@ def _run_function(
113
137
  code_path: str,
114
138
  version: int,
115
139
  init_value: Optional[IndexifyData] = None,
140
+ invocation_id: Optional[str] = None,
141
+ indexify_client: Optional[IndexifyClient] = None,
116
142
  ) -> FunctionOutput:
117
143
  import io
118
144
  from contextlib import redirect_stderr, redirect_stdout
@@ -123,7 +149,6 @@ def _run_function(
123
149
  router_output = None
124
150
  fn_output = None
125
151
  has_failed = False
126
- exception_msg = None
127
152
  print(
128
153
  f"[bold] function_worker: [/bold] invoking function {fn_name} in graph {graph_name}"
129
154
  )
@@ -131,7 +156,15 @@ def _run_function(
131
156
  try:
132
157
  key = f"{namespace}/{graph_name}/{version}/{fn_name}"
133
158
  if key not in function_wrapper_map:
134
- _load_function(namespace, graph_name, fn_name, code_path, version)
159
+ _load_function(
160
+ namespace,
161
+ graph_name,
162
+ fn_name,
163
+ code_path,
164
+ version,
165
+ invocation_id,
166
+ indexify_client,
167
+ )
135
168
 
136
169
  fn = function_wrapper_map[key]
137
170
  if (
@@ -143,7 +176,6 @@ def _run_function(
143
176
  if router_call_result.traceback_msg is not None:
144
177
  print(router_call_result.traceback_msg, file=sys.stderr)
145
178
  has_failed = True
146
- exception_msg = router_call_result.traceback_msg
147
179
  else:
148
180
  fn_call_result: FunctionCallResult = fn.invoke_fn_ser(
149
181
  fn_name, input, init_value
@@ -153,11 +185,9 @@ def _run_function(
153
185
  if fn_call_result.traceback_msg is not None:
154
186
  print(fn_call_result.traceback_msg, file=sys.stderr)
155
187
  has_failed = True
156
- exception_msg = fn_call_result.traceback_msg
157
- except Exception as e:
188
+ except Exception:
158
189
  print(traceback.format_exc(), file=sys.stderr)
159
190
  has_failed = True
160
- exception_msg = str(e)
161
191
 
162
192
  # WARNING - IF THIS FAILS, WE WILL NOT BE ABLE TO RECOVER
163
193
  # ANY LOGS
@@ -165,7 +195,6 @@ def _run_function(
165
195
  return FunctionOutput(
166
196
  fn_outputs=None,
167
197
  router_output=None,
168
- exception=exception_msg,
169
198
  stdout=stdout_capture.getvalue(),
170
199
  stderr=stderr_capture.getvalue(),
171
200
  reducer=is_reducer,
@@ -7,7 +7,6 @@ from rich.theme import Theme
7
7
 
8
8
  from indexify.functions_sdk.image import ImageInformation
9
9
 
10
-
11
10
  custom_theme = Theme(
12
11
  {
13
12
  "info": "cyan",
@@ -20,14 +19,19 @@ custom_theme = Theme(
20
19
  console = Console(theme=custom_theme)
21
20
 
22
21
 
23
- def _record_image_name(name: str):
22
+ def _record_image_name(name: str, version: int):
24
23
  dir_path = os.path.expanduser("~/.indexify/")
24
+
25
25
  file_path = os.path.expanduser("~/.indexify/image_name")
26
26
  os.makedirs(dir_path, exist_ok=True)
27
-
28
27
  with open(file_path, "w") as file:
29
28
  file.write(name)
30
29
 
30
+ file_path = os.path.expanduser("~/.indexify/image_version")
31
+ os.makedirs(dir_path, exist_ok=True)
32
+ with open(file_path, "w") as file:
33
+ file.write(str(version))
34
+
31
35
 
32
36
  def _install_dependencies(run_str: str):
33
37
  # Throw error to the caller if these subprocesses fail.
@@ -36,7 +40,9 @@ def _install_dependencies(run_str: str):
36
40
  raise Exception(f"Unable to install dep `{run_str}`")
37
41
 
38
42
 
39
- def executor_image_builder(image_info: ImageInformation, name_alias: str):
43
+ def executor_image_builder(
44
+ image_info: ImageInformation, name_alias: str, image_version: int
45
+ ):
40
46
  console.print(Text("Attempting Executor Bootstrap.", style="red bold"))
41
47
 
42
48
  run_strs = image_info.run_strs
@@ -48,5 +54,11 @@ def executor_image_builder(image_info: ImageInformation, name_alias: str):
48
54
 
49
55
  console.print(Text("Install dependencies done.", style="red bold"))
50
56
 
51
- console.print(Text(f"Recording image name {name_alias}", style="red bold"))
52
- _record_image_name(name_alias)
57
+ console.print(
58
+ Text(
59
+ f"Recording image name {name_alias} and version {image_version}",
60
+ style="red bold",
61
+ )
62
+ )
63
+
64
+ _record_image_name(name_alias, image_version)
@@ -6,10 +6,12 @@ from typing import Any, Dict, Tuple
6
6
  from pydantic import BaseModel
7
7
 
8
8
  DEFAULT_EXECUTOR = "tensorlake/indexify-executor-default"
9
+ DEFAULT_VERSION = 1
9
10
 
10
11
 
11
12
  class ProbeInfo(BaseModel):
12
13
  image_name: str
14
+ image_version: int
13
15
  python_major_version: int
14
16
  labels: Dict[str, Any] = {}
15
17
  is_default_executor: bool
@@ -18,6 +20,7 @@ class ProbeInfo(BaseModel):
18
20
  class RuntimeProbes:
19
21
  def __init__(self) -> None:
20
22
  self._image_name = self._read_image_name()
23
+ self._image_version = self._read_image_version()
21
24
  self._os_name = platform.system()
22
25
  self._architecture = platform.machine()
23
26
  (
@@ -32,6 +35,13 @@ class RuntimeProbes:
32
35
  return file.read().strip()
33
36
  return DEFAULT_EXECUTOR
34
37
 
38
+ def _read_image_version(self) -> int:
39
+ file_path = os.path.expanduser("~/.indexify/image_version")
40
+ if os.path.exists(file_path):
41
+ with open(file_path, "r") as file:
42
+ return int(file.read().strip())
43
+ return DEFAULT_VERSION
44
+
35
45
  def _get_python_version(self) -> Tuple[int, int]:
36
46
  version_info = sys.version_info
37
47
  return version_info.major, version_info.minor
@@ -50,6 +60,7 @@ class RuntimeProbes:
50
60
 
51
61
  return ProbeInfo(
52
62
  image_name=self._image_name,
63
+ image_version=self._image_version,
53
64
  python_major_version=self._python_version_major,
54
65
  labels=labels,
55
66
  is_default_executor=self._is_default_executor(),
@@ -32,22 +32,11 @@ class TaskReporter:
32
32
  print(
33
33
  f"[bold]task-reporter[/bold] uploading output of size: {len(output.payload)} bytes"
34
34
  )
35
- output_bytes = MsgPackSerializer.serialize(output)
35
+ output_bytes = MsgPackSerializer.serialize(output)
36
36
  fn_outputs.append(
37
37
  ("node_outputs", (nanoid.generate(), io.BytesIO(output_bytes)))
38
38
  )
39
39
 
40
- if completed_task.errors:
41
- print(
42
- f"[bold]task-reporter[/bold] uploading error of size: {len(completed_task.errors)}"
43
- )
44
- fn_outputs.append(
45
- (
46
- "exception_msg",
47
- (nanoid.generate(), io.BytesIO(completed_task.errors.encode())),
48
- )
49
- )
50
-
51
40
  if completed_task.stdout:
52
41
  print(
53
42
  f"[bold]task-reporter[/bold] uploading stdout of size: {len(completed_task.stdout)}"
@@ -14,7 +14,6 @@ class CompletedTask(BaseModel):
14
14
  task_outcome: Literal["success", "failure"]
15
15
  outputs: Optional[List[IndexifyData]] = None
16
16
  router_output: Optional[RouterOutput] = None
17
- errors: Optional[str] = None
18
17
  stdout: Optional[str] = None
19
18
  stderr: Optional[str] = None
20
19
  reducer: bool = False
@@ -1,4 +1,4 @@
1
- from typing import Any, Dict, List, Optional, Union
1
+ from typing import Any, Dict, List, Optional, Union, Literal
2
2
 
3
3
  from pydantic import BaseModel, Json
4
4
 
@@ -17,13 +17,12 @@ class RouterOutput(BaseModel):
17
17
  class IndexifyData(BaseModel):
18
18
  id: Optional[str] = None
19
19
  payload: bytes
20
- payload_encoding: str = "cloudpickle"
20
+ encoder: Literal["cloudpickle", "json"] = "cloudpickle"
21
21
 
22
22
 
23
23
  class FunctionWorkerOutput(BaseModel):
24
24
  fn_outputs: Optional[List[IndexifyData]]
25
25
  router_output: Optional[RouterOutput]
26
- exception: Optional[str]
27
26
  stdout: Optional[str]
28
27
  stderr: Optional[str]
29
28
  reducer: bool = False
@@ -33,6 +33,7 @@ from .indexify_functions import (
33
33
  IndexifyFunction,
34
34
  IndexifyFunctionWrapper,
35
35
  IndexifyRouter,
36
+ GraphInvocationContext,
36
37
  )
37
38
  from .local_cache import CacheAwareFunctionWrapper
38
39
  from .object_serializer import get_serializer
@@ -96,9 +97,9 @@ class Graph:
96
97
  return self
97
98
 
98
99
  if issubclass(indexify_fn, IndexifyFunction) and indexify_fn.accumulate:
99
- self.accumulator_zero_values[indexify_fn.name] = (
100
- indexify_fn.accumulate().model_dump()
101
- )
100
+ self.accumulator_zero_values[
101
+ indexify_fn.name
102
+ ] = indexify_fn.accumulate().model_dump()
102
103
 
103
104
  self.nodes[indexify_fn.name] = indexify_fn
104
105
  return self
@@ -159,6 +160,7 @@ class Graph:
159
160
  reducer=start_node.accumulate is not None,
160
161
  image_name=start_node.image._image_name,
161
162
  image_information=start_node.image.to_image_information(),
163
+ payload_encoder=start_node.encoder
162
164
  )
163
165
  metadata_edges = self.edges.copy()
164
166
  metadata_nodes = {}
@@ -170,7 +172,7 @@ class Graph:
170
172
  description=node.description or "",
171
173
  source_fn=node_name,
172
174
  target_fns=self.routers[node_name],
173
- payload_encoder=node.payload_encoder,
175
+ payload_encoder=node.encoder,
174
176
  image_name=node.image._image_name,
175
177
  image_information=node.image.to_image_information(),
176
178
  )
@@ -184,6 +186,7 @@ class Graph:
184
186
  reducer=node.accumulate is not None,
185
187
  image_name=node.image._image_name,
186
188
  image_information=node.image.to_image_information(),
189
+ encoder=node.encoder,
187
190
  )
188
191
  )
189
192
 
@@ -201,20 +204,26 @@ class Graph:
201
204
 
202
205
  def run(self, block_until_done: bool = False, **kwargs) -> str:
203
206
  start_node = self.nodes[self._start_node]
204
- serializer = get_serializer(start_node.payload_encoder)
205
- input = IndexifyData(id=generate(), payload=serializer.serialize(kwargs))
207
+ serializer = get_serializer(start_node.encoder)
208
+ input = IndexifyData(id=generate(), payload=serializer.serialize(kwargs), encoder=start_node.encoder)
206
209
  print(f"[bold] Invoking {self._start_node}[/bold]")
207
210
  outputs = defaultdict(list)
208
211
  self._accumulator_values[input.id] = {}
209
212
  for k, v in self.accumulator_zero_values.items():
210
213
  node = self.nodes[k]
211
- serializer = get_serializer(node.payload_encoder)
214
+ serializer = get_serializer(node.encoder)
212
215
  self._accumulator_values[input.id] = {
213
- k: IndexifyData(payload=serializer.serialize(v))
216
+ k: IndexifyData(payload=serializer.serialize(v), encoder=node.encoder)
214
217
  }
215
218
  self._results[input.id] = outputs
216
219
  enable_cache = kwargs.get("enable_cache", True)
217
- self._run(input, outputs, enable_cache)
220
+ ctx = GraphInvocationContext(
221
+ invocation_id=input.id,
222
+ graph_name=self.name,
223
+ graph_version="1",
224
+ indexify_client=None,
225
+ )
226
+ self._run(input, outputs, enable_cache, ctx)
218
227
  return input.id
219
228
 
220
229
  def _run(
@@ -222,6 +231,7 @@ class Graph:
222
231
  initial_input: IndexifyData,
223
232
  outputs: Dict[str, List[bytes]],
224
233
  enable_cache: bool,
234
+ ctx: GraphInvocationContext,
225
235
  ):
226
236
  accumulator_values = self._accumulator_values[initial_input.id]
227
237
  queue = deque([(self._start_node, initial_input)])
@@ -229,7 +239,7 @@ class Graph:
229
239
  node_name, input = queue.popleft()
230
240
  node = self.nodes[node_name]
231
241
  function_outputs: FunctionCallResult = IndexifyFunctionWrapper(
232
- node
242
+ node, context=ctx
233
243
  ).invoke_fn_ser(node_name, input, accumulator_values.get(node_name, None))
234
244
  if function_outputs.traceback_msg is not None:
235
245
  print(function_outputs.traceback_msg)
@@ -281,7 +291,7 @@ class Graph:
281
291
  raise ValueError(f"no results found for fn {fn_name} on graph {self.name}")
282
292
  fn = self.nodes[fn_name]
283
293
  fn_model = self.get_function(fn_name).get_output_model()
284
- serializer = get_serializer(fn.payload_encoder)
294
+ serializer = get_serializer(fn.encoder)
285
295
  outputs = []
286
296
  for result in results[fn_name]:
287
297
  payload_dict = serializer.deserialize(result.payload)
@@ -48,4 +48,4 @@ class ComputeGraphMetadata(BaseModel):
48
48
  runtime_information: RuntimeInformation
49
49
 
50
50
  def get_input_payload_serializer(self):
51
- return get_serializer(self.start_node.compute_fn.payload_encoder)
51
+ return get_serializer(self.start_node.compute_fn.encoder)
@@ -2,7 +2,6 @@ import inspect
2
2
  import re
3
3
  import sys
4
4
  import traceback
5
- from abc import ABC, abstractmethod
6
5
  from functools import update_wrapper
7
6
  from typing import (
8
7
  Any,
@@ -18,12 +17,35 @@ from typing import (
18
17
  )
19
18
 
20
19
  import msgpack
21
- from pydantic import BaseModel
20
+ from pydantic import BaseModel, Field, PrivateAttr, model_validator
22
21
  from typing_extensions import get_type_hints
23
22
 
24
- from .data_objects import IndexifyData, RouterOutput
23
+ from .data_objects import IndexifyData
25
24
  from .image import DEFAULT_IMAGE_3_10, Image
26
- from .object_serializer import CloudPickleSerializer, get_serializer
25
+ from .object_serializer import get_serializer
26
+
27
+
28
+ class GraphInvocationContext(BaseModel):
29
+ invocation_id: str
30
+ graph_name: str
31
+ graph_version: str
32
+ indexify_client: Optional[Any] = Field(default=None) # avoids circular import
33
+ _local_state: Dict[str, Any] = PrivateAttr(default_factory=dict)
34
+
35
+ def set_state_key(self, key: str, value: Any) -> None:
36
+ if self.indexify_client is None:
37
+ self._local_state[key] = value
38
+ return
39
+ self.indexify_client.set_state_key(
40
+ self.graph_name, self.invocation_id, key, value
41
+ )
42
+
43
+ def get_state_key(self, key: str) -> Any:
44
+ if self.indexify_client is None:
45
+ return self._local_state.get(key)
46
+ return self.indexify_client.get_state_key(
47
+ self.graph_name, self.invocation_id, key
48
+ )
27
49
 
28
50
 
29
51
  def format_filtered_traceback(exc_info=None):
@@ -97,7 +119,7 @@ class IndexifyFunction:
97
119
  image: Optional[Image] = DEFAULT_IMAGE_3_10
98
120
  placement_constraints: List[PlacementConstraints] = []
99
121
  accumulate: Optional[Type[Any]] = None
100
- payload_encoder: Optional[str] = "cloudpickle"
122
+ encoder: Optional[str] = "cloudpickle"
101
123
 
102
124
  def run(self, *args, **kwargs) -> Union[List[Any], Any]:
103
125
  pass
@@ -109,7 +131,7 @@ class IndexifyFunction:
109
131
 
110
132
  @classmethod
111
133
  def deserialize_output(cls, output: IndexifyData) -> Any:
112
- serializer = get_serializer(cls.payload_encoder)
134
+ serializer = get_serializer(cls.encoder)
113
135
  return serializer.deserialize(output.payload)
114
136
 
115
137
 
@@ -118,7 +140,7 @@ class IndexifyRouter:
118
140
  description: str = ""
119
141
  image: Optional[Image] = DEFAULT_IMAGE_3_10
120
142
  placement_constraints: List[PlacementConstraints] = []
121
- payload_encoder: Optional[str] = "cloudpickle"
143
+ encoder: Optional[str] = "cloudpickle"
122
144
 
123
145
  def run(self, *args, **kwargs) -> Optional[List[IndexifyFunction]]:
124
146
  pass
@@ -129,7 +151,7 @@ def indexify_router(
129
151
  description: Optional[str] = "",
130
152
  image: Optional[Image] = DEFAULT_IMAGE_3_10,
131
153
  placement_constraints: List[PlacementConstraints] = [],
132
- payload_encoder: Optional[str] = "cloudpickle",
154
+ encoder: Optional[str] = "cloudpickle",
133
155
  ):
134
156
  def construct(fn):
135
157
  args = locals().copy()
@@ -152,7 +174,7 @@ def indexify_router(
152
174
  setattr(IndexifyRo, key, value)
153
175
 
154
176
  IndexifyRo.image = image
155
- IndexifyRo.payload_encoder = payload_encoder
177
+ IndexifyRo.encoder = encoder
156
178
  return IndexifyRo
157
179
 
158
180
  return construct
@@ -163,7 +185,7 @@ def indexify_function(
163
185
  description: Optional[str] = "",
164
186
  image: Optional[Image] = DEFAULT_IMAGE_3_10,
165
187
  accumulate: Optional[Type[BaseModel]] = None,
166
- payload_encoder: Optional[str] = "cloudpickle",
188
+ encoder: Optional[str] = "cloudpickle",
167
189
  placement_constraints: List[PlacementConstraints] = [],
168
190
  ):
169
191
  def construct(fn):
@@ -185,10 +207,9 @@ def indexify_function(
185
207
  for key, value in args.items():
186
208
  if key != "fn" and key != "self":
187
209
  setattr(IndexifyFn, key, value)
188
-
189
210
  IndexifyFn.image = image
190
211
  IndexifyFn.accumulate = accumulate
191
- IndexifyFn.payload_encoder = payload_encoder
212
+ IndexifyFn.encoder = encoder
192
213
  return IndexifyFn
193
214
 
194
215
  return construct
@@ -205,10 +226,15 @@ class RouterCallResult(BaseModel):
205
226
 
206
227
 
207
228
  class IndexifyFunctionWrapper:
208
- def __init__(self, indexify_function: Union[IndexifyFunction, IndexifyRouter]):
209
- self.indexify_function: Union[IndexifyFunction, IndexifyRouter] = (
210
- indexify_function()
211
- )
229
+ def __init__(
230
+ self,
231
+ indexify_function: Union[IndexifyFunction, IndexifyRouter],
232
+ context: GraphInvocationContext,
233
+ ):
234
+ self.indexify_function: Union[
235
+ IndexifyFunction, IndexifyRouter
236
+ ] = indexify_function()
237
+ self.indexify_function._ctx = context
212
238
 
213
239
  def get_output_model(self) -> Any:
214
240
  if not isinstance(self.indexify_function, IndexifyFunction):
@@ -276,7 +302,7 @@ class IndexifyFunctionWrapper:
276
302
  self, name: str, input: IndexifyData, acc: Optional[Any] = None
277
303
  ) -> FunctionCallResult:
278
304
  input = self.deserialize_input(name, input)
279
- serializer = get_serializer(self.indexify_function.payload_encoder)
305
+ serializer = get_serializer(self.indexify_function.encoder)
280
306
  if acc is not None:
281
307
  acc = self.indexify_function.accumulate.model_validate(
282
308
  serializer.deserialize(acc.payload)
@@ -287,7 +313,7 @@ class IndexifyFunctionWrapper:
287
313
  )
288
314
  outputs, err = self.run_fn(input, acc=acc)
289
315
  ser_outputs = [
290
- IndexifyData(payload=serializer.serialize(output)) for output in outputs
316
+ IndexifyData(payload=serializer.serialize(output), encoder=self.indexify_function.encoder) for output in outputs
291
317
  ]
292
318
  return FunctionCallResult(ser_outputs=ser_outputs, traceback_msg=err)
293
319
 
@@ -297,28 +323,17 @@ class IndexifyFunctionWrapper:
297
323
  return RouterCallResult(edges=edges, traceback_msg=err)
298
324
 
299
325
  def deserialize_input(self, compute_fn: str, indexify_data: IndexifyData) -> Any:
300
- if self.indexify_function.payload_encoder == "cloudpickle":
301
- return CloudPickleSerializer.deserialize(indexify_data.payload)
302
- payload = msgpack.unpackb(indexify_data.payload)
303
- signature = inspect.signature(self.indexify_function.run)
304
- arg_types = {}
305
- for name, param in signature.parameters.items():
306
- if (
307
- param.annotation != inspect.Parameter.empty
308
- and param.annotation != getattr(compute_fn, "accumulate", None)
309
- ):
310
- arg_types[name] = param.annotation
311
- if len(arg_types) > 1:
312
- raise ValueError(
313
- f"Compute function {compute_fn} has multiple arguments, but only one is supported"
314
- )
315
- elif len(arg_types) == 0:
316
- raise ValueError(f"Compute function {compute_fn} has no arguments")
317
- arg_name, arg_type = next(iter(arg_types.items()))
318
- if arg_type is None:
319
- raise ValueError(f"Argument {arg_name} has no type annotation")
320
- if is_pydantic_model_from_annotation(arg_type):
321
- if len(payload.keys()) == 1 and isinstance(list(payload.values())[0], dict):
322
- payload = list(payload.values())[0]
323
- return arg_type.model_validate(payload)
324
- return payload
326
+ encoder = indexify_data.encoder
327
+ payload = indexify_data.payload
328
+ serializer = get_serializer(encoder)
329
+ return serializer.deserialize(payload)
330
+
331
+ def get_ctx() -> GraphInvocationContext:
332
+ frame = inspect.currentframe()
333
+ caller_frame = frame.f_back.f_back
334
+ function_instance = caller_frame.f_locals["self"]
335
+ del frame
336
+ del caller_frame
337
+ if isinstance(function_instance, IndexifyFunctionWrapper):
338
+ return function_instance.indexify_function._ctx
339
+ return function_instance._ctx
@@ -1,5 +1,6 @@
1
1
  from typing import Any, List
2
2
 
3
+ import jsonpickle
3
4
  import cloudpickle
4
5
  import msgpack
5
6
  from pydantic import BaseModel
@@ -12,8 +13,26 @@ def get_serializer(serializer_type: str) -> Any:
12
13
  return CloudPickleSerializer()
13
14
  elif serializer_type == "msgpack":
14
15
  return MsgPackSerializer()
15
- else:
16
- raise ValueError(f"Unknown serializer type: {serializer_type}")
16
+ elif serializer_type == "json":
17
+ return JsonSerializer()
18
+ raise ValueError(f"Unknown serializer type: {serializer_type}")
19
+
20
+ class JsonSerializer:
21
+ @staticmethod
22
+ def serialize(data: Any) -> str:
23
+ return jsonpickle.encode(data)
24
+
25
+ @staticmethod
26
+ def deserialize(data: str) -> Any:
27
+ return jsonpickle.decode(data)
28
+
29
+ @staticmethod
30
+ def serialize_list(data: List[Any]) -> str:
31
+ return jsonpickle.encode(data)
32
+
33
+ @staticmethod
34
+ def deserialize_list(data: str) -> List[Any]:
35
+ return jsonpickle.decode(data)
17
36
 
18
37
 
19
38
  class CloudPickleSerializer:
@@ -28,3 +28,6 @@ class Pipeline:
28
28
  def run(self, **kwargs):
29
29
  invocation_id = self._graph.run(**kwargs)
30
30
  return invocation_id
31
+
32
+ def output(self, invocation_id: str, function_name: str):
33
+ return self._graph.output(invocation_id, function_name)
@@ -14,8 +14,7 @@ from indexify.error import ApiException, GraphStillProcessing
14
14
  from indexify.functions_sdk.data_objects import IndexifyData
15
15
  from indexify.functions_sdk.graph import ComputeGraphMetadata, Graph
16
16
  from indexify.functions_sdk.indexify_functions import IndexifyFunction
17
- from indexify.settings import DEFAULT_SERVICE_URL, DEFAULT_SERVICE_URL_HTTPS
18
-
17
+ from indexify.settings import DEFAULT_SERVICE_URL
19
18
 
20
19
  class InvocationEventPayload(BaseModel):
21
20
  invocation_id: str
@@ -47,6 +46,7 @@ class IndexifyClient:
47
46
  service_url: str = DEFAULT_SERVICE_URL,
48
47
  config_path: Optional[str] = None,
49
48
  namespace: str = "default",
49
+ api_key: Optional[str] = None,
50
50
  **kwargs,
51
51
  ):
52
52
  if os.environ.get("INDEXIFY_URL"):
@@ -74,6 +74,10 @@ class IndexifyClient:
74
74
  self._timeout = kwargs.get("timeout")
75
75
  self._graphs: Dict[str, Graph] = {}
76
76
  self._fns: Dict[str, IndexifyFunction] = {}
77
+ self._api_key = api_key
78
+ if not self._api_key:
79
+ print("API key not provided. Trying to fetch from environment TENSORLAKE_API_KEY variable")
80
+ self._api_key = os.getenv("TENSORLAKE_API_KEY")
77
81
 
78
82
  def _request(self, method: str, **kwargs) -> httpx.Response:
79
83
  try:
@@ -87,10 +91,9 @@ class IndexifyClient:
87
91
  raise ApiException(response.text)
88
92
  except httpx.ConnectError:
89
93
  message = (
90
- f"Make sure the server is running and accesible at {self._service_url}"
94
+ f"Make sure the server is running and accessible at {self._service_url}"
91
95
  )
92
96
  ex = ApiException(message=message)
93
- print(ex)
94
97
  raise ex
95
98
  return response
96
99
 
@@ -100,7 +103,7 @@ class IndexifyClient:
100
103
  cert_path: str,
101
104
  key_path: str,
102
105
  ca_bundle_path: Optional[str] = None,
103
- service_url: str = DEFAULT_SERVICE_URL_HTTPS,
106
+ service_url: str = DEFAULT_SERVICE_URL,
104
107
  *args,
105
108
  **kwargs,
106
109
  ) -> "IndexifyClient":
@@ -140,17 +143,25 @@ class IndexifyClient:
140
143
  verify=verify_option,
141
144
  )
142
145
  return client
146
+
147
+ def _add_api_key(self, kwargs):
148
+ if self._api_key:
149
+ kwargs["headers"] = {"Authorization": f"Bearer {self._api_key}"}
143
150
 
144
151
  def _get(self, endpoint: str, **kwargs) -> httpx.Response:
152
+ self._add_api_key(kwargs)
145
153
  return self._request("GET", url=f"{self._service_url}/{endpoint}", **kwargs)
146
154
 
147
155
  def _post(self, endpoint: str, **kwargs) -> httpx.Response:
156
+ self._add_api_key(kwargs)
148
157
  return self._request("POST", url=f"{self._service_url}/{endpoint}", **kwargs)
149
158
 
150
159
  def _put(self, endpoint: str, **kwargs) -> httpx.Response:
160
+ self._add_api_key(kwargs)
151
161
  return self._request("PUT", url=f"{self._service_url}/{endpoint}", **kwargs)
152
162
 
153
163
  def _delete(self, endpoint: str, **kwargs) -> httpx.Response:
164
+ self._add_api_key(kwargs)
154
165
  return self._request("DELETE", url=f"{self._service_url}/{endpoint}", **kwargs)
155
166
 
156
167
  def _close(self):
@@ -198,6 +209,23 @@ class IndexifyClient:
198
209
  namespaces.append(item["name"])
199
210
  return namespaces
200
211
 
212
+ def set_state_key(
213
+ self, compute_graph: str, invocation_id: str, key: str, value: Json
214
+ ) -> None:
215
+ response = self._post(
216
+ f"internal/namespaces/{self.namespace}/compute_graphs/{compute_graph}/invocations/{invocation_id}/ctx",
217
+ json={"key": key, "value": value},
218
+ )
219
+ response.raise_for_status()
220
+
221
+ def get_state_key(self, compute_graph: str, invocation_id: str, key: str) -> Json:
222
+ response = self._get(
223
+ f"internal/namespaces/{self.namespace}/compute_graphs/{compute_graph}/invocations/{invocation_id}/ctx",
224
+ json={"key": key},
225
+ )
226
+ response.raise_for_status()
227
+ return response.json().get("value")
228
+
201
229
  @classmethod
202
230
  def new_namespace(
203
231
  cls, namespace: str, server_addr: Optional[str] = "http://localhost:8900"
@@ -242,15 +270,18 @@ class IndexifyClient:
242
270
  ) -> str:
243
271
  ser_input = cloudpickle.dumps(kwargs)
244
272
  params = {"block_until_finish": block_until_done}
273
+ kwargs = {"headers": {"Content-Type": "application/cbor"}, "data": ser_input, "params":params}
274
+ self._add_api_key(kwargs)
245
275
  with httpx.Client() as client:
246
276
  with connect_sse(
247
277
  client,
248
278
  "POST",
249
279
  f"{self.service_url}/namespaces/{self.namespace}/compute_graphs/{graph}/invoke_object",
250
- headers={"Content-Type": "application/cbor"},
251
- data=ser_input,
252
- params=params,
280
+ **kwargs,
253
281
  ) as event_source:
282
+ if not event_source.response.is_success:
283
+ resp = event_source.response.read().decode("utf-8")
284
+ raise Exception(f"failed to invoke graph: {resp}")
254
285
  for sse in event_source.iter_sse():
255
286
  obj = json.loads(sse.data)
256
287
  for k, v in obj.items():
@@ -84,7 +84,12 @@ class RemoteGraph:
84
84
  return cls(name=g.name, server_url=server_url, client=client)
85
85
 
86
86
  @classmethod
87
- def by_name(cls, name: str, server_url: Optional[str] = DEFAULT_SERVICE_URL, client: Optional[IndexifyClient] = None):
87
+ def by_name(
88
+ cls,
89
+ name: str,
90
+ server_url: Optional[str] = DEFAULT_SERVICE_URL,
91
+ client: Optional[IndexifyClient] = None,
92
+ ):
88
93
  """
89
94
  Create a handle to call a RemoteGraph by name.
90
95
 
@@ -104,7 +109,7 @@ class RemoteGraph:
104
109
  ) -> List[Any]:
105
110
  """
106
111
  Returns the extracted objects by a graph for an ingested object.
107
-
112
+
108
113
  - If the extractor name is provided, only the objects extracted by that extractor are returned.
109
114
  - If the extractor name is not provided, all the extracted objects are returned for the input object.
110
115
 
@@ -0,0 +1 @@
1
+ DEFAULT_SERVICE_URL = "https://api.tensorlake.ai"
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "indexify"
3
- version = "0.2.22"
3
+ version = "0.2.24"
4
4
  description = "Python Client for Indexify"
5
5
  authors = ["Tensorlake Inc. <support@tensorlake.ai>"]
6
6
  license = "Apache 2.0"
@@ -23,6 +23,7 @@ docker = "^7.1.0"
23
23
  msgpack= "^1.1.0"
24
24
  typer = "^0.12.5"
25
25
  httpx-sse = "^0.4.0"
26
+ jsonpickle = "^3.3.0"
26
27
 
27
28
  [tool.poetry.dev-dependencies]
28
29
  black = "^22.3.0"
@@ -1,2 +0,0 @@
1
- DEFAULT_SERVICE_URL = "http://localhost:8900"
2
- DEFAULT_SERVICE_URL_HTTPS = "https://localhost:8900"
File without changes
File without changes