indexify 0.2.28__tar.gz → 0.2.30__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {indexify-0.2.28 → indexify-0.2.30}/PKG-INFO +1 -1
  2. {indexify-0.2.28 → indexify-0.2.30}/indexify/executor/agent.py +16 -3
  3. {indexify-0.2.28 → indexify-0.2.30}/indexify/executor/task_reporter.py +49 -12
  4. {indexify-0.2.28 → indexify-0.2.30}/indexify/executor/task_store.py +1 -0
  5. {indexify-0.2.28 → indexify-0.2.30}/indexify/functions_sdk/graph_definition.py +9 -0
  6. {indexify-0.2.28 → indexify-0.2.30}/indexify/http_client.py +7 -10
  7. {indexify-0.2.28 → indexify-0.2.30}/indexify/remote_graph.py +11 -3
  8. {indexify-0.2.28 → indexify-0.2.30}/pyproject.toml +1 -1
  9. {indexify-0.2.28 → indexify-0.2.30}/LICENSE.txt +0 -0
  10. {indexify-0.2.28 → indexify-0.2.30}/README.md +0 -0
  11. {indexify-0.2.28 → indexify-0.2.30}/indexify/__init__.py +0 -0
  12. {indexify-0.2.28 → indexify-0.2.30}/indexify/cli.py +0 -0
  13. {indexify-0.2.28 → indexify-0.2.30}/indexify/common_util.py +0 -0
  14. {indexify-0.2.28 → indexify-0.2.30}/indexify/data_loaders/__init__.py +0 -0
  15. {indexify-0.2.28 → indexify-0.2.30}/indexify/data_loaders/local_directory_loader.py +0 -0
  16. {indexify-0.2.28 → indexify-0.2.30}/indexify/data_loaders/url_loader.py +0 -0
  17. {indexify-0.2.28 → indexify-0.2.30}/indexify/error.py +0 -0
  18. {indexify-0.2.28 → indexify-0.2.30}/indexify/executor/api_objects.py +0 -0
  19. {indexify-0.2.28 → indexify-0.2.30}/indexify/executor/downloader.py +0 -0
  20. {indexify-0.2.28 → indexify-0.2.30}/indexify/executor/executor_tasks.py +0 -0
  21. {indexify-0.2.28 → indexify-0.2.30}/indexify/executor/function_worker.py +0 -0
  22. {indexify-0.2.28 → indexify-0.2.30}/indexify/executor/image_dependency_installer.py +0 -0
  23. {indexify-0.2.28 → indexify-0.2.30}/indexify/executor/indexify_executor.py +0 -0
  24. {indexify-0.2.28 → indexify-0.2.30}/indexify/executor/runtime_probes.py +0 -0
  25. {indexify-0.2.28 → indexify-0.2.30}/indexify/functions_sdk/data_objects.py +0 -0
  26. {indexify-0.2.28 → indexify-0.2.30}/indexify/functions_sdk/graph.py +0 -0
  27. {indexify-0.2.28 → indexify-0.2.30}/indexify/functions_sdk/graph_validation.py +0 -0
  28. {indexify-0.2.28 → indexify-0.2.30}/indexify/functions_sdk/image.py +0 -0
  29. {indexify-0.2.28 → indexify-0.2.30}/indexify/functions_sdk/indexify_functions.py +0 -0
  30. {indexify-0.2.28 → indexify-0.2.30}/indexify/functions_sdk/local_cache.py +0 -0
  31. {indexify-0.2.28 → indexify-0.2.30}/indexify/functions_sdk/object_serializer.py +0 -0
  32. {indexify-0.2.28 → indexify-0.2.30}/indexify/functions_sdk/pipeline.py +0 -0
  33. {indexify-0.2.28 → indexify-0.2.30}/indexify/remote_pipeline.py +0 -0
  34. {indexify-0.2.28 → indexify-0.2.30}/indexify/settings.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: indexify
3
- Version: 0.2.28
3
+ Version: 0.2.30
4
4
  Summary: Python Client for Indexify
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
@@ -100,7 +100,9 @@ class ExtractorAgent:
100
100
  self._server_addr = server_addr
101
101
  self._base_url = f"{self._protocol}://{self._server_addr}"
102
102
  self._code_path = code_path
103
- self._downloader = Downloader(code_path=code_path, base_url=self._base_url)
103
+ self._downloader = Downloader(
104
+ code_path=code_path, base_url=self._base_url, config_path=self._config_path
105
+ )
104
106
  self._max_queued_tasks = 10
105
107
  self._task_reporter = TaskReporter(
106
108
  base_url=self._base_url,
@@ -112,9 +114,15 @@ class ExtractorAgent:
112
114
  console.print(Text("Starting task completion reporter", style="bold cyan"))
113
115
  # We should copy only the keys and not the values
114
116
  url = f"{self._protocol}://{self._server_addr}/write_content"
117
+
115
118
  while True:
116
119
  outcomes = await self._task_store.task_outcomes()
117
120
  for task_outcome in outcomes:
121
+ retryStr = (
122
+ f"\nRetries: {task_outcome.reporting_retries}"
123
+ if task_outcome.reporting_retries > 0
124
+ else ""
125
+ )
118
126
  outcome = task_outcome.task_outcome
119
127
  style_outcome = (
120
128
  f"[bold red] {outcome} [/]"
@@ -125,7 +133,9 @@ class ExtractorAgent:
125
133
  Panel(
126
134
  f"Reporting outcome of task: {task_outcome.task.id}, function: {task_outcome.task.compute_fn}\n"
127
135
  f"Outcome: {style_outcome}\n"
128
- f"Num Fn Outputs: {len(task_outcome.outputs or [])} Router Output: {task_outcome.router_output}",
136
+ f"Num Fn Outputs: {len(task_outcome.outputs or [])}\n"
137
+ f"Router Output: {task_outcome.router_output}\n"
138
+ f"Retries: {task_outcome.reporting_retries}",
129
139
  title="Task Completion",
130
140
  border_style="info",
131
141
  )
@@ -139,11 +149,14 @@ class ExtractorAgent:
139
149
  console.print(
140
150
  Panel(
141
151
  f"Failed to report task {task_outcome.task.id}\n"
142
- f"Exception: {e}\nRetrying...",
152
+ f"Exception: {type(e).__name__}({e})\n"
153
+ f"Retries: {task_outcome.reporting_retries}\n"
154
+ "Retrying...",
143
155
  title="Reporting Error",
144
156
  border_style="error",
145
157
  )
146
158
  )
159
+ task_outcome.reporting_retries += 1
147
160
  await asyncio.sleep(5)
148
161
  continue
149
162
 
@@ -2,6 +2,8 @@ import io
2
2
  from typing import Optional
3
3
 
4
4
  import nanoid
5
+ from httpx import Timeout
6
+ from pydantic import BaseModel
5
7
  from rich import print
6
8
 
7
9
  from indexify.common_util import get_httpx_client
@@ -21,6 +23,15 @@ FORCE_MULTIPART = ForceMultipartDict()
21
23
  UTF_8_CONTENT_TYPE = "application/octet-stream"
22
24
 
23
25
 
26
+ class ReportingData(BaseModel):
27
+ output_count: int = 0
28
+ output_total_bytes: int = 0
29
+ stdout_count: int = 0
30
+ stdout_total_bytes: int = 0
31
+ stderr_count: int = 0
32
+ stderr_total_bytes: int = 0
33
+
34
+
24
35
  class TaskReporter:
25
36
  def __init__(
26
37
  self, base_url: str, executor_id: str, config_path: Optional[str] = None
@@ -30,11 +41,10 @@ class TaskReporter:
30
41
  self._client = get_httpx_client(config_path)
31
42
 
32
43
  def report_task_outcome(self, completed_task: CompletedTask):
44
+
45
+ report = ReportingData()
33
46
  fn_outputs = []
34
47
  for output in completed_task.outputs or []:
35
- print(
36
- f"[bold]task-reporter[/bold] uploading output of size: {len(output.payload)} bytes"
37
- )
38
48
  serializer = get_serializer(output.encoder)
39
49
  serialized_output = serializer.serialize(output.payload)
40
50
  fn_outputs.append(
@@ -43,11 +53,10 @@ class TaskReporter:
43
53
  (nanoid.generate(), serialized_output, serializer.content_type),
44
54
  )
45
55
  )
56
+ report.output_count += 1
57
+ report.output_total_bytes += len(serialized_output)
46
58
 
47
59
  if completed_task.stdout:
48
- print(
49
- f"[bold]task-reporter[/bold] uploading stdout of size: {len(completed_task.stdout)}"
50
- )
51
60
  fn_outputs.append(
52
61
  (
53
62
  "stdout",
@@ -58,11 +67,10 @@ class TaskReporter:
58
67
  ),
59
68
  )
60
69
  )
70
+ report.stdout_count += 1
71
+ report.stdout_total_bytes += len(completed_task.stdout)
61
72
 
62
73
  if completed_task.stderr:
63
- print(
64
- f"[bold]task-reporter[/bold] uploading stderr of size: {len(completed_task.stderr)}"
65
- )
66
74
  fn_outputs.append(
67
75
  (
68
76
  "stderr",
@@ -73,6 +81,8 @@ class TaskReporter:
73
81
  ),
74
82
  )
75
83
  )
84
+ report.stderr_count += 1
85
+ report.stderr_total_bytes += len(completed_task.stderr)
76
86
 
77
87
  router_output = (
78
88
  ApiRouterOutput(edges=completed_task.router_output.edges)
@@ -93,7 +103,30 @@ class TaskReporter:
93
103
  )
94
104
  task_result_data = task_result.model_dump_json(exclude_none=True)
95
105
 
96
- kwargs = {"data": {"task_result": task_result_data}}
106
+ total_bytes = (
107
+ report.output_total_bytes
108
+ + report.stdout_total_bytes
109
+ + report.stderr_total_bytes
110
+ )
111
+
112
+ print(
113
+ f"[bold]task-reporter[/bold] reporting task outcome "
114
+ f"task_id={completed_task.task.id} retries={completed_task.reporting_retries} "
115
+ f"total_bytes={total_bytes} total_files={report.output_count + report.stdout_count + report.stderr_count} "
116
+ f"output_files={report.output_count} output_bytes={total_bytes} "
117
+ f"stdout_bytes={report.stdout_total_bytes} stderr_bytes={report.stderr_total_bytes} "
118
+ )
119
+
120
+ #
121
+ kwargs = {
122
+ "data": {"task_result": task_result_data},
123
+ # Use httpx default timeout of 5s for all timeout types.
124
+ # For read timeouts, use 5 minutes to allow for large file uploads.
125
+ "timeout": Timeout(
126
+ 5.0,
127
+ read=5.0 * 60,
128
+ ),
129
+ }
97
130
  if fn_outputs and len(fn_outputs) > 0:
98
131
  kwargs["files"] = fn_outputs
99
132
  else:
@@ -104,11 +137,15 @@ class TaskReporter:
104
137
  **kwargs,
105
138
  )
106
139
  except Exception as e:
107
- print(f"failed to report task outcome {e}")
140
+ print(
141
+ f"[bold]task-reporter[/bold] failed to report task outcome retries={completed_task.reporting_retries} {type(e).__name__}({e})"
142
+ )
108
143
  raise e
109
144
 
110
145
  try:
111
146
  response.raise_for_status()
112
147
  except Exception as e:
113
- print(f"failed to report task outcome {response.text}")
148
+ print(
149
+ f"[bold]task-reporter[/bold] failed to report task outcome retries={completed_task.reporting_retries} {response.text}"
150
+ )
114
151
  raise e
@@ -17,6 +17,7 @@ class CompletedTask(BaseModel):
17
17
  stdout: Optional[str] = None
18
18
  stderr: Optional[str] = None
19
19
  reducer: bool = False
20
+ reporting_retries: int = 0
20
21
 
21
22
 
22
23
  class TaskStore:
@@ -46,6 +46,15 @@ class ComputeGraphMetadata(BaseModel):
46
46
  edges: Dict[str, List[str]]
47
47
  accumulator_zero_values: Dict[str, bytes] = {}
48
48
  runtime_information: RuntimeInformation
49
+ replaying: bool = False
49
50
 
50
51
  def get_input_payload_serializer(self):
51
52
  return get_serializer(self.start_node.compute_fn.encoder)
53
+
54
+ def get_input_encoder(self) -> str:
55
+ if self.start_node.compute_fn:
56
+ return self.start_node.compute_fn.encoder
57
+ elif self.start_node.dynamic_router:
58
+ return self.start_node.dynamic_router.encoder
59
+
60
+ raise ValueError("start node is not set on the graph")
@@ -1,6 +1,6 @@
1
1
  import json
2
2
  import os
3
- from typing import Any, Dict, List, Optional, Union
3
+ from typing import Any, Dict, List, Optional
4
4
 
5
5
  import cloudpickle
6
6
  import httpx
@@ -13,11 +13,7 @@ from indexify.error import ApiException, GraphStillProcessing
13
13
  from indexify.functions_sdk.data_objects import IndexifyData
14
14
  from indexify.functions_sdk.graph import ComputeGraphMetadata, Graph
15
15
  from indexify.functions_sdk.indexify_functions import IndexifyFunction
16
- from indexify.functions_sdk.object_serializer import (
17
- CloudPickleSerializer,
18
- JsonSerializer,
19
- get_serializer,
20
- )
16
+ from indexify.functions_sdk.object_serializer import get_serializer
21
17
  from indexify.settings import DEFAULT_SERVICE_URL
22
18
 
23
19
 
@@ -139,7 +135,9 @@ class IndexifyClient:
139
135
 
140
136
  def _add_api_key(self, kwargs):
141
137
  if self._api_key:
142
- kwargs["headers"] = {"Authorization": f"Bearer {self._api_key}"}
138
+ if "headers" not in kwargs:
139
+ kwargs["headers"] = {}
140
+ kwargs["headers"]["Authorization"] = f"Bearer {self._api_key}"
143
141
 
144
142
  def _get(self, endpoint: str, **kwargs) -> httpx.Response:
145
143
  self._add_api_key(kwargs)
@@ -276,11 +274,10 @@ class IndexifyClient:
276
274
  self,
277
275
  graph: str,
278
276
  block_until_done: bool = False,
279
- serializer: Union[
280
- CloudPickleSerializer, JsonSerializer
281
- ] = CloudPickleSerializer,
277
+ serializer: str = "cloudpickle",
282
278
  **kwargs,
283
279
  ) -> str:
280
+ serializer = get_serializer(serializer)
284
281
  ser_input = serializer.serialize(kwargs)
285
282
  params = {"block_until_finish": block_until_done}
286
283
  kwargs = {
@@ -1,6 +1,7 @@
1
1
  from typing import Any, List, Optional
2
2
 
3
- from indexify.functions_sdk.graph import Graph
3
+ from indexify.functions_sdk.graph import ComputeGraphMetadata, Graph
4
+ from indexify.functions_sdk.graph_definition import ComputeGraphMetadata
4
5
 
5
6
  from .http_client import IndexifyClient
6
7
  from .settings import DEFAULT_SERVICE_URL
@@ -30,6 +31,8 @@ class RemoteGraph:
30
31
  else:
31
32
  self._client = IndexifyClient(service_url=server_url)
32
33
 
34
+ self._graph_definition: ComputeGraphMetadata = self._client.graph(self._name)
35
+
33
36
  def run(self, block_until_done: bool = False, **kwargs) -> str:
34
37
  """
35
38
  Run the graph with the given inputs. The input is for the start function of the graph.
@@ -49,10 +52,16 @@ class RemoteGraph:
49
52
  return self._client.invoke_graph_with_object(
50
53
  self._name,
51
54
  block_until_done,
52
- self.graph.definition().get_input_payload_serializer(),
55
+ self._graph_definition.get_input_encoder(),
53
56
  **kwargs
54
57
  )
55
58
 
59
+ def metadata(self) -> ComputeGraphMetadata:
60
+ """
61
+ Get the metadata of the graph.
62
+ """
63
+ return self._client.graph(self._name)
64
+
56
65
  def replay_invocations(self):
57
66
  """
58
67
  Replay all the graph previous runs/invocations on the latest version of the graph.
@@ -81,7 +90,6 @@ class RemoteGraph:
81
90
  :param client: The IndexifyClient used to communicate with the server.
82
91
  Prefered over server_url.
83
92
  """
84
- cls.graph = g
85
93
  if not client:
86
94
  client = IndexifyClient(service_url=server_url)
87
95
  client.register_compute_graph(g, additional_modules)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "indexify"
3
- version = "0.2.28"
3
+ version = "0.2.30"
4
4
  description = "Python Client for Indexify"
5
5
  authors = ["Tensorlake Inc. <support@tensorlake.ai>"]
6
6
  license = "Apache 2.0"
File without changes
File without changes
File without changes
File without changes