indexify 0.2.28__tar.gz → 0.2.30__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {indexify-0.2.28 → indexify-0.2.30}/PKG-INFO +1 -1
- {indexify-0.2.28 → indexify-0.2.30}/indexify/executor/agent.py +16 -3
- {indexify-0.2.28 → indexify-0.2.30}/indexify/executor/task_reporter.py +49 -12
- {indexify-0.2.28 → indexify-0.2.30}/indexify/executor/task_store.py +1 -0
- {indexify-0.2.28 → indexify-0.2.30}/indexify/functions_sdk/graph_definition.py +9 -0
- {indexify-0.2.28 → indexify-0.2.30}/indexify/http_client.py +7 -10
- {indexify-0.2.28 → indexify-0.2.30}/indexify/remote_graph.py +11 -3
- {indexify-0.2.28 → indexify-0.2.30}/pyproject.toml +1 -1
- {indexify-0.2.28 → indexify-0.2.30}/LICENSE.txt +0 -0
- {indexify-0.2.28 → indexify-0.2.30}/README.md +0 -0
- {indexify-0.2.28 → indexify-0.2.30}/indexify/__init__.py +0 -0
- {indexify-0.2.28 → indexify-0.2.30}/indexify/cli.py +0 -0
- {indexify-0.2.28 → indexify-0.2.30}/indexify/common_util.py +0 -0
- {indexify-0.2.28 → indexify-0.2.30}/indexify/data_loaders/__init__.py +0 -0
- {indexify-0.2.28 → indexify-0.2.30}/indexify/data_loaders/local_directory_loader.py +0 -0
- {indexify-0.2.28 → indexify-0.2.30}/indexify/data_loaders/url_loader.py +0 -0
- {indexify-0.2.28 → indexify-0.2.30}/indexify/error.py +0 -0
- {indexify-0.2.28 → indexify-0.2.30}/indexify/executor/api_objects.py +0 -0
- {indexify-0.2.28 → indexify-0.2.30}/indexify/executor/downloader.py +0 -0
- {indexify-0.2.28 → indexify-0.2.30}/indexify/executor/executor_tasks.py +0 -0
- {indexify-0.2.28 → indexify-0.2.30}/indexify/executor/function_worker.py +0 -0
- {indexify-0.2.28 → indexify-0.2.30}/indexify/executor/image_dependency_installer.py +0 -0
- {indexify-0.2.28 → indexify-0.2.30}/indexify/executor/indexify_executor.py +0 -0
- {indexify-0.2.28 → indexify-0.2.30}/indexify/executor/runtime_probes.py +0 -0
- {indexify-0.2.28 → indexify-0.2.30}/indexify/functions_sdk/data_objects.py +0 -0
- {indexify-0.2.28 → indexify-0.2.30}/indexify/functions_sdk/graph.py +0 -0
- {indexify-0.2.28 → indexify-0.2.30}/indexify/functions_sdk/graph_validation.py +0 -0
- {indexify-0.2.28 → indexify-0.2.30}/indexify/functions_sdk/image.py +0 -0
- {indexify-0.2.28 → indexify-0.2.30}/indexify/functions_sdk/indexify_functions.py +0 -0
- {indexify-0.2.28 → indexify-0.2.30}/indexify/functions_sdk/local_cache.py +0 -0
- {indexify-0.2.28 → indexify-0.2.30}/indexify/functions_sdk/object_serializer.py +0 -0
- {indexify-0.2.28 → indexify-0.2.30}/indexify/functions_sdk/pipeline.py +0 -0
- {indexify-0.2.28 → indexify-0.2.30}/indexify/remote_pipeline.py +0 -0
- {indexify-0.2.28 → indexify-0.2.30}/indexify/settings.py +0 -0
@@ -100,7 +100,9 @@ class ExtractorAgent:
|
|
100
100
|
self._server_addr = server_addr
|
101
101
|
self._base_url = f"{self._protocol}://{self._server_addr}"
|
102
102
|
self._code_path = code_path
|
103
|
-
self._downloader = Downloader(
|
103
|
+
self._downloader = Downloader(
|
104
|
+
code_path=code_path, base_url=self._base_url, config_path=self._config_path
|
105
|
+
)
|
104
106
|
self._max_queued_tasks = 10
|
105
107
|
self._task_reporter = TaskReporter(
|
106
108
|
base_url=self._base_url,
|
@@ -112,9 +114,15 @@ class ExtractorAgent:
|
|
112
114
|
console.print(Text("Starting task completion reporter", style="bold cyan"))
|
113
115
|
# We should copy only the keys and not the values
|
114
116
|
url = f"{self._protocol}://{self._server_addr}/write_content"
|
117
|
+
|
115
118
|
while True:
|
116
119
|
outcomes = await self._task_store.task_outcomes()
|
117
120
|
for task_outcome in outcomes:
|
121
|
+
retryStr = (
|
122
|
+
f"\nRetries: {task_outcome.reporting_retries}"
|
123
|
+
if task_outcome.reporting_retries > 0
|
124
|
+
else ""
|
125
|
+
)
|
118
126
|
outcome = task_outcome.task_outcome
|
119
127
|
style_outcome = (
|
120
128
|
f"[bold red] {outcome} [/]"
|
@@ -125,7 +133,9 @@ class ExtractorAgent:
|
|
125
133
|
Panel(
|
126
134
|
f"Reporting outcome of task: {task_outcome.task.id}, function: {task_outcome.task.compute_fn}\n"
|
127
135
|
f"Outcome: {style_outcome}\n"
|
128
|
-
f"Num Fn Outputs: {len(task_outcome.outputs or [])}
|
136
|
+
f"Num Fn Outputs: {len(task_outcome.outputs or [])}\n"
|
137
|
+
f"Router Output: {task_outcome.router_output}\n"
|
138
|
+
f"Retries: {task_outcome.reporting_retries}",
|
129
139
|
title="Task Completion",
|
130
140
|
border_style="info",
|
131
141
|
)
|
@@ -139,11 +149,14 @@ class ExtractorAgent:
|
|
139
149
|
console.print(
|
140
150
|
Panel(
|
141
151
|
f"Failed to report task {task_outcome.task.id}\n"
|
142
|
-
f"Exception: {e}\
|
152
|
+
f"Exception: {type(e).__name__}({e})\n"
|
153
|
+
f"Retries: {task_outcome.reporting_retries}\n"
|
154
|
+
"Retrying...",
|
143
155
|
title="Reporting Error",
|
144
156
|
border_style="error",
|
145
157
|
)
|
146
158
|
)
|
159
|
+
task_outcome.reporting_retries += 1
|
147
160
|
await asyncio.sleep(5)
|
148
161
|
continue
|
149
162
|
|
@@ -2,6 +2,8 @@ import io
|
|
2
2
|
from typing import Optional
|
3
3
|
|
4
4
|
import nanoid
|
5
|
+
from httpx import Timeout
|
6
|
+
from pydantic import BaseModel
|
5
7
|
from rich import print
|
6
8
|
|
7
9
|
from indexify.common_util import get_httpx_client
|
@@ -21,6 +23,15 @@ FORCE_MULTIPART = ForceMultipartDict()
|
|
21
23
|
UTF_8_CONTENT_TYPE = "application/octet-stream"
|
22
24
|
|
23
25
|
|
26
|
+
class ReportingData(BaseModel):
|
27
|
+
output_count: int = 0
|
28
|
+
output_total_bytes: int = 0
|
29
|
+
stdout_count: int = 0
|
30
|
+
stdout_total_bytes: int = 0
|
31
|
+
stderr_count: int = 0
|
32
|
+
stderr_total_bytes: int = 0
|
33
|
+
|
34
|
+
|
24
35
|
class TaskReporter:
|
25
36
|
def __init__(
|
26
37
|
self, base_url: str, executor_id: str, config_path: Optional[str] = None
|
@@ -30,11 +41,10 @@ class TaskReporter:
|
|
30
41
|
self._client = get_httpx_client(config_path)
|
31
42
|
|
32
43
|
def report_task_outcome(self, completed_task: CompletedTask):
|
44
|
+
|
45
|
+
report = ReportingData()
|
33
46
|
fn_outputs = []
|
34
47
|
for output in completed_task.outputs or []:
|
35
|
-
print(
|
36
|
-
f"[bold]task-reporter[/bold] uploading output of size: {len(output.payload)} bytes"
|
37
|
-
)
|
38
48
|
serializer = get_serializer(output.encoder)
|
39
49
|
serialized_output = serializer.serialize(output.payload)
|
40
50
|
fn_outputs.append(
|
@@ -43,11 +53,10 @@ class TaskReporter:
|
|
43
53
|
(nanoid.generate(), serialized_output, serializer.content_type),
|
44
54
|
)
|
45
55
|
)
|
56
|
+
report.output_count += 1
|
57
|
+
report.output_total_bytes += len(serialized_output)
|
46
58
|
|
47
59
|
if completed_task.stdout:
|
48
|
-
print(
|
49
|
-
f"[bold]task-reporter[/bold] uploading stdout of size: {len(completed_task.stdout)}"
|
50
|
-
)
|
51
60
|
fn_outputs.append(
|
52
61
|
(
|
53
62
|
"stdout",
|
@@ -58,11 +67,10 @@ class TaskReporter:
|
|
58
67
|
),
|
59
68
|
)
|
60
69
|
)
|
70
|
+
report.stdout_count += 1
|
71
|
+
report.stdout_total_bytes += len(completed_task.stdout)
|
61
72
|
|
62
73
|
if completed_task.stderr:
|
63
|
-
print(
|
64
|
-
f"[bold]task-reporter[/bold] uploading stderr of size: {len(completed_task.stderr)}"
|
65
|
-
)
|
66
74
|
fn_outputs.append(
|
67
75
|
(
|
68
76
|
"stderr",
|
@@ -73,6 +81,8 @@ class TaskReporter:
|
|
73
81
|
),
|
74
82
|
)
|
75
83
|
)
|
84
|
+
report.stderr_count += 1
|
85
|
+
report.stderr_total_bytes += len(completed_task.stderr)
|
76
86
|
|
77
87
|
router_output = (
|
78
88
|
ApiRouterOutput(edges=completed_task.router_output.edges)
|
@@ -93,7 +103,30 @@ class TaskReporter:
|
|
93
103
|
)
|
94
104
|
task_result_data = task_result.model_dump_json(exclude_none=True)
|
95
105
|
|
96
|
-
|
106
|
+
total_bytes = (
|
107
|
+
report.output_total_bytes
|
108
|
+
+ report.stdout_total_bytes
|
109
|
+
+ report.stderr_total_bytes
|
110
|
+
)
|
111
|
+
|
112
|
+
print(
|
113
|
+
f"[bold]task-reporter[/bold] reporting task outcome "
|
114
|
+
f"task_id={completed_task.task.id} retries={completed_task.reporting_retries} "
|
115
|
+
f"total_bytes={total_bytes} total_files={report.output_count + report.stdout_count + report.stderr_count} "
|
116
|
+
f"output_files={report.output_count} output_bytes={total_bytes} "
|
117
|
+
f"stdout_bytes={report.stdout_total_bytes} stderr_bytes={report.stderr_total_bytes} "
|
118
|
+
)
|
119
|
+
|
120
|
+
#
|
121
|
+
kwargs = {
|
122
|
+
"data": {"task_result": task_result_data},
|
123
|
+
# Use httpx default timeout of 5s for all timeout types.
|
124
|
+
# For read timeouts, use 5 minutes to allow for large file uploads.
|
125
|
+
"timeout": Timeout(
|
126
|
+
5.0,
|
127
|
+
read=5.0 * 60,
|
128
|
+
),
|
129
|
+
}
|
97
130
|
if fn_outputs and len(fn_outputs) > 0:
|
98
131
|
kwargs["files"] = fn_outputs
|
99
132
|
else:
|
@@ -104,11 +137,15 @@ class TaskReporter:
|
|
104
137
|
**kwargs,
|
105
138
|
)
|
106
139
|
except Exception as e:
|
107
|
-
print(
|
140
|
+
print(
|
141
|
+
f"[bold]task-reporter[/bold] failed to report task outcome retries={completed_task.reporting_retries} {type(e).__name__}({e})"
|
142
|
+
)
|
108
143
|
raise e
|
109
144
|
|
110
145
|
try:
|
111
146
|
response.raise_for_status()
|
112
147
|
except Exception as e:
|
113
|
-
print(
|
148
|
+
print(
|
149
|
+
f"[bold]task-reporter[/bold] failed to report task outcome retries={completed_task.reporting_retries} {response.text}"
|
150
|
+
)
|
114
151
|
raise e
|
@@ -46,6 +46,15 @@ class ComputeGraphMetadata(BaseModel):
|
|
46
46
|
edges: Dict[str, List[str]]
|
47
47
|
accumulator_zero_values: Dict[str, bytes] = {}
|
48
48
|
runtime_information: RuntimeInformation
|
49
|
+
replaying: bool = False
|
49
50
|
|
50
51
|
def get_input_payload_serializer(self):
|
51
52
|
return get_serializer(self.start_node.compute_fn.encoder)
|
53
|
+
|
54
|
+
def get_input_encoder(self) -> str:
|
55
|
+
if self.start_node.compute_fn:
|
56
|
+
return self.start_node.compute_fn.encoder
|
57
|
+
elif self.start_node.dynamic_router:
|
58
|
+
return self.start_node.dynamic_router.encoder
|
59
|
+
|
60
|
+
raise ValueError("start node is not set on the graph")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import json
|
2
2
|
import os
|
3
|
-
from typing import Any, Dict, List, Optional
|
3
|
+
from typing import Any, Dict, List, Optional
|
4
4
|
|
5
5
|
import cloudpickle
|
6
6
|
import httpx
|
@@ -13,11 +13,7 @@ from indexify.error import ApiException, GraphStillProcessing
|
|
13
13
|
from indexify.functions_sdk.data_objects import IndexifyData
|
14
14
|
from indexify.functions_sdk.graph import ComputeGraphMetadata, Graph
|
15
15
|
from indexify.functions_sdk.indexify_functions import IndexifyFunction
|
16
|
-
from indexify.functions_sdk.object_serializer import
|
17
|
-
CloudPickleSerializer,
|
18
|
-
JsonSerializer,
|
19
|
-
get_serializer,
|
20
|
-
)
|
16
|
+
from indexify.functions_sdk.object_serializer import get_serializer
|
21
17
|
from indexify.settings import DEFAULT_SERVICE_URL
|
22
18
|
|
23
19
|
|
@@ -139,7 +135,9 @@ class IndexifyClient:
|
|
139
135
|
|
140
136
|
def _add_api_key(self, kwargs):
|
141
137
|
if self._api_key:
|
142
|
-
|
138
|
+
if "headers" not in kwargs:
|
139
|
+
kwargs["headers"] = {}
|
140
|
+
kwargs["headers"]["Authorization"] = f"Bearer {self._api_key}"
|
143
141
|
|
144
142
|
def _get(self, endpoint: str, **kwargs) -> httpx.Response:
|
145
143
|
self._add_api_key(kwargs)
|
@@ -276,11 +274,10 @@ class IndexifyClient:
|
|
276
274
|
self,
|
277
275
|
graph: str,
|
278
276
|
block_until_done: bool = False,
|
279
|
-
serializer:
|
280
|
-
CloudPickleSerializer, JsonSerializer
|
281
|
-
] = CloudPickleSerializer,
|
277
|
+
serializer: str = "cloudpickle",
|
282
278
|
**kwargs,
|
283
279
|
) -> str:
|
280
|
+
serializer = get_serializer(serializer)
|
284
281
|
ser_input = serializer.serialize(kwargs)
|
285
282
|
params = {"block_until_finish": block_until_done}
|
286
283
|
kwargs = {
|
@@ -1,6 +1,7 @@
|
|
1
1
|
from typing import Any, List, Optional
|
2
2
|
|
3
|
-
from indexify.functions_sdk.graph import Graph
|
3
|
+
from indexify.functions_sdk.graph import ComputeGraphMetadata, Graph
|
4
|
+
from indexify.functions_sdk.graph_definition import ComputeGraphMetadata
|
4
5
|
|
5
6
|
from .http_client import IndexifyClient
|
6
7
|
from .settings import DEFAULT_SERVICE_URL
|
@@ -30,6 +31,8 @@ class RemoteGraph:
|
|
30
31
|
else:
|
31
32
|
self._client = IndexifyClient(service_url=server_url)
|
32
33
|
|
34
|
+
self._graph_definition: ComputeGraphMetadata = self._client.graph(self._name)
|
35
|
+
|
33
36
|
def run(self, block_until_done: bool = False, **kwargs) -> str:
|
34
37
|
"""
|
35
38
|
Run the graph with the given inputs. The input is for the start function of the graph.
|
@@ -49,10 +52,16 @@ class RemoteGraph:
|
|
49
52
|
return self._client.invoke_graph_with_object(
|
50
53
|
self._name,
|
51
54
|
block_until_done,
|
52
|
-
self.
|
55
|
+
self._graph_definition.get_input_encoder(),
|
53
56
|
**kwargs
|
54
57
|
)
|
55
58
|
|
59
|
+
def metadata(self) -> ComputeGraphMetadata:
|
60
|
+
"""
|
61
|
+
Get the metadata of the graph.
|
62
|
+
"""
|
63
|
+
return self._client.graph(self._name)
|
64
|
+
|
56
65
|
def replay_invocations(self):
|
57
66
|
"""
|
58
67
|
Replay all the graph previous runs/invocations on the latest version of the graph.
|
@@ -81,7 +90,6 @@ class RemoteGraph:
|
|
81
90
|
:param client: The IndexifyClient used to communicate with the server.
|
82
91
|
Prefered over server_url.
|
83
92
|
"""
|
84
|
-
cls.graph = g
|
85
93
|
if not client:
|
86
94
|
client = IndexifyClient(service_url=server_url)
|
87
95
|
client.register_compute_graph(g, additional_modules)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|