indexify 0.2.29__py3-none-any.whl → 0.2.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -100,7 +100,9 @@ class ExtractorAgent:
100
100
  self._server_addr = server_addr
101
101
  self._base_url = f"{self._protocol}://{self._server_addr}"
102
102
  self._code_path = code_path
103
- self._downloader = Downloader(code_path=code_path, base_url=self._base_url)
103
+ self._downloader = Downloader(
104
+ code_path=code_path, base_url=self._base_url, config_path=self._config_path
105
+ )
104
106
  self._max_queued_tasks = 10
105
107
  self._task_reporter = TaskReporter(
106
108
  base_url=self._base_url,
@@ -112,9 +114,15 @@ class ExtractorAgent:
112
114
  console.print(Text("Starting task completion reporter", style="bold cyan"))
113
115
  # We should copy only the keys and not the values
114
116
  url = f"{self._protocol}://{self._server_addr}/write_content"
117
+
115
118
  while True:
116
119
  outcomes = await self._task_store.task_outcomes()
117
120
  for task_outcome in outcomes:
121
+ retryStr = (
122
+ f"\nRetries: {task_outcome.reporting_retries}"
123
+ if task_outcome.reporting_retries > 0
124
+ else ""
125
+ )
118
126
  outcome = task_outcome.task_outcome
119
127
  style_outcome = (
120
128
  f"[bold red] {outcome} [/]"
@@ -125,7 +133,9 @@ class ExtractorAgent:
125
133
  Panel(
126
134
  f"Reporting outcome of task: {task_outcome.task.id}, function: {task_outcome.task.compute_fn}\n"
127
135
  f"Outcome: {style_outcome}\n"
128
- f"Num Fn Outputs: {len(task_outcome.outputs or [])} Router Output: {task_outcome.router_output}",
136
+ f"Num Fn Outputs: {len(task_outcome.outputs or [])}\n"
137
+ f"Router Output: {task_outcome.router_output}\n"
138
+ f"Retries: {task_outcome.reporting_retries}",
129
139
  title="Task Completion",
130
140
  border_style="info",
131
141
  )
@@ -139,11 +149,14 @@ class ExtractorAgent:
139
149
  console.print(
140
150
  Panel(
141
151
  f"Failed to report task {task_outcome.task.id}\n"
142
- f"Exception: {e}\nRetrying...",
152
+ f"Exception: {type(e).__name__}({e})\n"
153
+ f"Retries: {task_outcome.reporting_retries}\n"
154
+ "Retrying...",
143
155
  title="Reporting Error",
144
156
  border_style="error",
145
157
  )
146
158
  )
159
+ task_outcome.reporting_retries += 1
147
160
  await asyncio.sleep(5)
148
161
  continue
149
162
 
@@ -120,7 +120,7 @@ class Downloader:
120
120
  deserialized_content = serializer.deserialize(response.content)
121
121
 
122
122
  if reducer_url:
123
- init_value = httpx.get(reducer_url)
123
+ init_value = self._client.get(reducer_url)
124
124
  try:
125
125
  init_value.raise_for_status()
126
126
  except httpx.HTTPStatusError as e:
@@ -2,6 +2,8 @@ import io
2
2
  from typing import Optional
3
3
 
4
4
  import nanoid
5
+ from httpx import Timeout
6
+ from pydantic import BaseModel
5
7
  from rich import print
6
8
 
7
9
  from indexify.common_util import get_httpx_client
@@ -21,6 +23,15 @@ FORCE_MULTIPART = ForceMultipartDict()
21
23
  UTF_8_CONTENT_TYPE = "application/octet-stream"
22
24
 
23
25
 
26
+ class ReportingData(BaseModel):
27
+ output_count: int = 0
28
+ output_total_bytes: int = 0
29
+ stdout_count: int = 0
30
+ stdout_total_bytes: int = 0
31
+ stderr_count: int = 0
32
+ stderr_total_bytes: int = 0
33
+
34
+
24
35
  class TaskReporter:
25
36
  def __init__(
26
37
  self, base_url: str, executor_id: str, config_path: Optional[str] = None
@@ -30,11 +41,10 @@ class TaskReporter:
30
41
  self._client = get_httpx_client(config_path)
31
42
 
32
43
  def report_task_outcome(self, completed_task: CompletedTask):
44
+
45
+ report = ReportingData()
33
46
  fn_outputs = []
34
47
  for output in completed_task.outputs or []:
35
- print(
36
- f"[bold]task-reporter[/bold] uploading output of size: {len(output.payload)} bytes"
37
- )
38
48
  serializer = get_serializer(output.encoder)
39
49
  serialized_output = serializer.serialize(output.payload)
40
50
  fn_outputs.append(
@@ -43,11 +53,10 @@ class TaskReporter:
43
53
  (nanoid.generate(), serialized_output, serializer.content_type),
44
54
  )
45
55
  )
56
+ report.output_count += 1
57
+ report.output_total_bytes += len(serialized_output)
46
58
 
47
59
  if completed_task.stdout:
48
- print(
49
- f"[bold]task-reporter[/bold] uploading stdout of size: {len(completed_task.stdout)}"
50
- )
51
60
  fn_outputs.append(
52
61
  (
53
62
  "stdout",
@@ -58,11 +67,10 @@ class TaskReporter:
58
67
  ),
59
68
  )
60
69
  )
70
+ report.stdout_count += 1
71
+ report.stdout_total_bytes += len(completed_task.stdout)
61
72
 
62
73
  if completed_task.stderr:
63
- print(
64
- f"[bold]task-reporter[/bold] uploading stderr of size: {len(completed_task.stderr)}"
65
- )
66
74
  fn_outputs.append(
67
75
  (
68
76
  "stderr",
@@ -73,6 +81,8 @@ class TaskReporter:
73
81
  ),
74
82
  )
75
83
  )
84
+ report.stderr_count += 1
85
+ report.stderr_total_bytes += len(completed_task.stderr)
76
86
 
77
87
  router_output = (
78
88
  ApiRouterOutput(edges=completed_task.router_output.edges)
@@ -93,7 +103,30 @@ class TaskReporter:
93
103
  )
94
104
  task_result_data = task_result.model_dump_json(exclude_none=True)
95
105
 
96
- kwargs = {"data": {"task_result": task_result_data}}
106
+ total_bytes = (
107
+ report.output_total_bytes
108
+ + report.stdout_total_bytes
109
+ + report.stderr_total_bytes
110
+ )
111
+
112
+ print(
113
+ f"[bold]task-reporter[/bold] reporting task outcome "
114
+ f"task_id={completed_task.task.id} retries={completed_task.reporting_retries} "
115
+ f"total_bytes={total_bytes} total_files={report.output_count + report.stdout_count + report.stderr_count} "
116
+ f"output_files={report.output_count} output_bytes={total_bytes} "
117
+ f"stdout_bytes={report.stdout_total_bytes} stderr_bytes={report.stderr_total_bytes} "
118
+ )
119
+
120
+ #
121
+ kwargs = {
122
+ "data": {"task_result": task_result_data},
123
+ # Use httpx default timeout of 5s for all timeout types.
124
+ # For read timeouts, use 5 minutes to allow for large file uploads.
125
+ "timeout": Timeout(
126
+ 5.0,
127
+ read=5.0 * 60,
128
+ ),
129
+ }
97
130
  if fn_outputs and len(fn_outputs) > 0:
98
131
  kwargs["files"] = fn_outputs
99
132
  else:
@@ -104,11 +137,15 @@ class TaskReporter:
104
137
  **kwargs,
105
138
  )
106
139
  except Exception as e:
107
- print(f"failed to report task outcome {e}")
140
+ print(
141
+ f"[bold]task-reporter[/bold] failed to report task outcome retries={completed_task.reporting_retries} {type(e).__name__}({e})"
142
+ )
108
143
  raise e
109
144
 
110
145
  try:
111
146
  response.raise_for_status()
112
147
  except Exception as e:
113
- print(f"failed to report task outcome {response.text}")
148
+ print(
149
+ f"[bold]task-reporter[/bold] failed to report task outcome retries={completed_task.reporting_retries} {response.text}"
150
+ )
114
151
  raise e
@@ -17,6 +17,7 @@ class CompletedTask(BaseModel):
17
17
  stdout: Optional[str] = None
18
18
  stderr: Optional[str] = None
19
19
  reducer: bool = False
20
+ reporting_retries: int = 0
20
21
 
21
22
 
22
23
  class TaskStore:
@@ -101,9 +101,9 @@ class Graph:
101
101
  return self
102
102
 
103
103
  if issubclass(indexify_fn, IndexifyFunction) and indexify_fn.accumulate:
104
- self.accumulator_zero_values[
105
- indexify_fn.name
106
- ] = indexify_fn.accumulate().model_dump()
104
+ self.accumulator_zero_values[indexify_fn.name] = (
105
+ indexify_fn.accumulate().model_dump()
106
+ )
107
107
 
108
108
  self.nodes[indexify_fn.name] = indexify_fn
109
109
  return self
@@ -244,9 +244,9 @@ class Graph:
244
244
  queue = deque([(self._start_node, initial_input)])
245
245
  while queue:
246
246
  node_name, input = queue.popleft()
247
- function_outputs: Union[
248
- FunctionCallResult, RouterCallResult
249
- ] = self._invoke_fn(node_name, input)
247
+ function_outputs: Union[FunctionCallResult, RouterCallResult] = (
248
+ self._invoke_fn(node_name, input)
249
+ )
250
250
  self._log_local_exec_tracebacks(function_outputs)
251
251
  if isinstance(function_outputs, RouterCallResult):
252
252
  for edge in function_outputs.edges:
@@ -46,6 +46,7 @@ class ComputeGraphMetadata(BaseModel):
46
46
  edges: Dict[str, List[str]]
47
47
  accumulator_zero_values: Dict[str, bytes] = {}
48
48
  runtime_information: RuntimeInformation
49
+ replaying: bool = False
49
50
 
50
51
  def get_input_payload_serializer(self):
51
52
  return get_serializer(self.start_node.compute_fn.encoder)
@@ -137,9 +137,11 @@ def indexify_router(
137
137
 
138
138
  attrs = {
139
139
  "name": name if name else fn.__name__,
140
- "description": description
141
- if description
142
- else (fn.__doc__ or "").strip().replace("\n", ""),
140
+ "description": (
141
+ description
142
+ if description
143
+ else (fn.__doc__ or "").strip().replace("\n", "")
144
+ ),
143
145
  "image": image,
144
146
  "placement_constraints": placement_constraints,
145
147
  "encoder": encoder,
@@ -174,9 +176,11 @@ def indexify_function(
174
176
 
175
177
  attrs = {
176
178
  "name": name if name else fn.__name__,
177
- "description": description
178
- if description
179
- else (fn.__doc__ or "").strip().replace("\n", ""),
179
+ "description": (
180
+ description
181
+ if description
182
+ else (fn.__doc__ or "").strip().replace("\n", "")
183
+ ),
180
184
  "image": image,
181
185
  "placement_constraints": placement_constraints,
182
186
  "accumulate": accumulate,
@@ -205,9 +209,9 @@ class IndexifyFunctionWrapper:
205
209
  indexify_function: Union[IndexifyFunction, IndexifyRouter],
206
210
  context: GraphInvocationContext,
207
211
  ):
208
- self.indexify_function: Union[
209
- IndexifyFunction, IndexifyRouter
210
- ] = indexify_function()
212
+ self.indexify_function: Union[IndexifyFunction, IndexifyRouter] = (
213
+ indexify_function()
214
+ )
211
215
  self.indexify_function._ctx = context
212
216
 
213
217
  def get_output_model(self) -> Any:
indexify/http_client.py CHANGED
@@ -135,7 +135,9 @@ class IndexifyClient:
135
135
 
136
136
  def _add_api_key(self, kwargs):
137
137
  if self._api_key:
138
- kwargs["headers"] = {"Authorization": f"Bearer {self._api_key}"}
138
+ if "headers" not in kwargs:
139
+ kwargs["headers"] = {}
140
+ kwargs["headers"]["Authorization"] = f"Bearer {self._api_key}"
139
141
 
140
142
  def _get(self, endpoint: str, **kwargs) -> httpx.Response:
141
143
  self._add_api_key(kwargs)
indexify/remote_graph.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from typing import Any, List, Optional
2
2
 
3
3
  from indexify.functions_sdk.graph import ComputeGraphMetadata, Graph
4
+ from indexify.functions_sdk.graph_definition import ComputeGraphMetadata
4
5
 
5
6
  from .http_client import IndexifyClient
6
7
  from .settings import DEFAULT_SERVICE_URL
@@ -55,6 +56,12 @@ class RemoteGraph:
55
56
  **kwargs
56
57
  )
57
58
 
59
+ def metadata(self) -> ComputeGraphMetadata:
60
+ """
61
+ Get the metadata of the graph.
62
+ """
63
+ return self._client.graph(self._name)
64
+
58
65
  def replay_invocations(self):
59
66
  """
60
67
  Replay all the graph previous runs/invocations on the latest version of the graph.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: indexify
3
- Version: 0.2.29
3
+ Version: 0.2.31
4
4
  Summary: Python Client for Indexify
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
@@ -5,31 +5,31 @@ indexify/data_loaders/__init__.py,sha256=Y5NEuseTcYAICRiweYw5wBQ2m2YplbsY21I7df-
5
5
  indexify/data_loaders/local_directory_loader.py,sha256=fCrgj5drnW71ZUdDDvcB1-VJjIs1w6Q8sEW0HSGSAiA,1247
6
6
  indexify/data_loaders/url_loader.py,sha256=32SERljcq1Xsi4RdLz2dgyk2TER5pQPTtXl3gUzwHbY,1533
7
7
  indexify/error.py,sha256=qAWr8R6AxPkjsxHSzXTc8zqYnNO_AjOqqYEPsQvF1Zs,238
8
- indexify/executor/agent.py,sha256=mqEDcRdDfoUjw5XoFfVlOlJ-C_ODzyCQQMW90vDbWk4,18114
8
+ indexify/executor/agent.py,sha256=Zt-KU2yrOuFuHu8CiOg-QmAzkNU1idcv5UZ3YN2YSDU,18649
9
9
  indexify/executor/api_objects.py,sha256=mvmwGbK4paJNQGFvbtNHMPpiH_LpVhrlRnCcrqS6HOQ,859
10
- indexify/executor/downloader.py,sha256=_SQ-6-0Kj3Zg0Dqp_pMwTYxvBP3xvNUOREyDucCJp8M,4944
10
+ indexify/executor/downloader.py,sha256=wwkTdKRrDBHwKQOb_3uUb8pVl1tvg4QzOcYFnGQwNtA,4951
11
11
  indexify/executor/executor_tasks.py,sha256=A0UIEZ5VaB6zSkFQG81UmTW0E57MTYhGlaXuAbRV8lQ,1884
12
12
  indexify/executor/function_worker.py,sha256=wRW2-X9dNI80KhwTD1vD-pcyetsVKVs6vVdg7L7JjcQ,6462
13
13
  indexify/executor/image_dependency_installer.py,sha256=ct8GmzgkaPi6NAblk68IJJWo5MecIUubELotmSrgoRQ,1759
14
14
  indexify/executor/indexify_executor.py,sha256=2Ut_VX-Su_lm4b4aEROyRJ3gXx-uFHA-V7EN0sWiARE,771
15
15
  indexify/executor/runtime_probes.py,sha256=mjw2_mGQ622wRT_39WPGGgPEZQTgtrf3-ICcUUZOeyg,2126
16
- indexify/executor/task_reporter.py,sha256=Wr8caDtjHBHvK5C-wwghJ5TNxIK_moxjQokGRcEs2rw,3798
17
- indexify/executor/task_store.py,sha256=u48FdRKAh_KH7WOMQOArdOY5CawlyW5MJx8V0W79JM0,3951
16
+ indexify/executor/task_reporter.py,sha256=4unHxLUHedKwIoqO3e5YdDJpUe_pJng-vHReoqcPNNU,5141
17
+ indexify/executor/task_store.py,sha256=JlRlWwAm4YjFRkTNRx-6GsUcmOzcyvzb5Csa5XDpRTI,3982
18
18
  indexify/functions_sdk/data_objects.py,sha256=wXbUa9hjU6rsXmmk19vQ5Kixf3FsI59VBWPNmHasAX0,854
19
- indexify/functions_sdk/graph.py,sha256=TbHtIcAzRcEn3BWewVhNsUGMNfRduI1aSAvK6Vyx-fk,11801
20
- indexify/functions_sdk/graph_definition.py,sha256=5jUzjfk4gxxQCmQeOlNS9VndZK7KQBsZzABcDpQKgrs,1594
19
+ indexify/functions_sdk/graph.py,sha256=_XsHfkQaxTnBNVcDlq4KsnyZ0HQ_DZOAuwygDzpsCNo,11805
20
+ indexify/functions_sdk/graph_definition.py,sha256=UeC0PMchgZgo2T0Goog0SW1lIRdFf6ug4iHOS9ItRvI,1622
21
21
  indexify/functions_sdk/graph_validation.py,sha256=mN2Fcp91GIwFZEQP6z_qGqt4LkLM70SnI7AWBi4CmKQ,2509
22
22
  indexify/functions_sdk/image.py,sha256=QK0H6KxLWriB_z4M0kunKzzHdHxYLWL670RPYgYuf_8,1762
23
- indexify/functions_sdk/indexify_functions.py,sha256=EbNC5gZktRc-G9ribQrpMG2X_G-8cujsxTgot734iGQ,10340
23
+ indexify/functions_sdk/indexify_functions.py,sha256=cPUN7Zlgkwl6YUOuhgKogPdoBakQuf-VspQvU3IDmfU,10422
24
24
  indexify/functions_sdk/local_cache.py,sha256=cNWF67zbhbTJe3g86hyLBy3Rqzs6dNvp2SjLazGZWvw,1348
25
25
  indexify/functions_sdk/object_serializer.py,sha256=pOgUOWbRNRix9uZT0aQn0LTCnJCeMNGO1nAE0jAybmg,1546
26
26
  indexify/functions_sdk/pipeline.py,sha256=KmxZE8eBFAQ4bbEcYURXXR26HSyoAT3O6iu9H38-OXE,974
27
- indexify/http_client.py,sha256=rFYjv0rRMo4sXp-TjWoflQeJaxNZK-Di6Gk3G78xKQI,15838
28
- indexify/remote_graph.py,sha256=1JogFFcWCKc_Dq4Rh1s6IwAnxB-qrFnKuDQIbpJso1Y,4745
27
+ indexify/http_client.py,sha256=5d70A6j2vJBn3OfUVjrQvnPml07p7fxOXWBUAUfAkiU,15915
28
+ indexify/remote_graph.py,sha256=CWfFxB0jlx8Zgp_xUjvjgrIwv-yNAfEbQwlXt3jfRNM,4976
29
29
  indexify/remote_pipeline.py,sha256=oqx57rSPszNS3DToXO_nf-CKqkCZWptm1u_p3orV_gQ,790
30
30
  indexify/settings.py,sha256=Ny59mzYI4gbXoK8hjx66a_men6ndbd1J1zCTcKOoyzg,50
31
- indexify-0.2.29.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
32
- indexify-0.2.29.dist-info/METADATA,sha256=3lC5Cqd9Uym3Bk0AvQVTT8JTlHsodWXyktGjEAU9Y7s,6202
33
- indexify-0.2.29.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
34
- indexify-0.2.29.dist-info/entry_points.txt,sha256=Pih7WV-XMpAzI5dEvROcpLr-ybVhd9Y-AtuzBKUdcDs,49
35
- indexify-0.2.29.dist-info/RECORD,,
31
+ indexify-0.2.31.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
32
+ indexify-0.2.31.dist-info/METADATA,sha256=Jrl1EVAJoI1uYMYKaA1pofIao2iV8FhfytvxEHXZvCE,6202
33
+ indexify-0.2.31.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
34
+ indexify-0.2.31.dist-info/entry_points.txt,sha256=Pih7WV-XMpAzI5dEvROcpLr-ybVhd9Y-AtuzBKUdcDs,49
35
+ indexify-0.2.31.dist-info/RECORD,,