indexify 0.2.32__py3-none-any.whl → 0.2.34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- indexify/executor/agent.py +42 -165
- indexify/executor/downloader.py +26 -53
- indexify/executor/task_reporter.py +27 -16
- indexify/functions_sdk/graph.py +15 -12
- indexify/functions_sdk/graph_definition.py +7 -5
- indexify/functions_sdk/indexify_functions.py +31 -16
- indexify/functions_sdk/object_serializer.py +17 -7
- indexify/http_client.py +7 -7
- {indexify-0.2.32.dist-info → indexify-0.2.34.dist-info}/METADATA +2 -2
- {indexify-0.2.32.dist-info → indexify-0.2.34.dist-info}/RECORD +13 -13
- {indexify-0.2.32.dist-info → indexify-0.2.34.dist-info}/LICENSE.txt +0 -0
- {indexify-0.2.32.dist-info → indexify-0.2.34.dist-info}/WHEEL +0 -0
- {indexify-0.2.32.dist-info → indexify-0.2.34.dist-info}/entry_points.txt +0 -0
indexify/executor/agent.py
CHANGED
@@ -1,28 +1,21 @@
|
|
1
1
|
import asyncio
|
2
2
|
import json
|
3
|
-
import traceback
|
4
3
|
from concurrent.futures.process import BrokenProcessPool
|
5
4
|
from importlib.metadata import version
|
6
5
|
from pathlib import Path
|
7
6
|
from typing import Dict, List, Optional
|
8
7
|
|
8
|
+
import structlog
|
9
9
|
from httpx_sse import aconnect_sse
|
10
10
|
from pydantic import BaseModel
|
11
|
-
from rich.console import Console
|
12
|
-
from rich.panel import Panel
|
13
|
-
from rich.text import Text
|
14
|
-
from rich.theme import Theme
|
15
11
|
|
16
12
|
from indexify.common_util import get_httpx_client
|
17
13
|
from indexify.functions_sdk.data_objects import (
|
18
14
|
FunctionWorkerOutput,
|
19
15
|
IndexifyData,
|
20
16
|
)
|
21
|
-
from indexify.functions_sdk.graph_definition import ComputeGraphMetadata
|
22
17
|
from indexify.http_client import IndexifyClient
|
23
18
|
|
24
|
-
from ..functions_sdk.image import ImageInformation
|
25
|
-
from . import image_dependency_installer
|
26
19
|
from .api_objects import ExecutorMetadata, Task
|
27
20
|
from .downloader import DownloadedInputs, Downloader
|
28
21
|
from .executor_tasks import DownloadGraphTask, DownloadInputTask, ExtractTask
|
@@ -31,16 +24,7 @@ from .runtime_probes import ProbeInfo, RuntimeProbes
|
|
31
24
|
from .task_reporter import TaskReporter
|
32
25
|
from .task_store import CompletedTask, TaskStore
|
33
26
|
|
34
|
-
|
35
|
-
{
|
36
|
-
"info": "cyan",
|
37
|
-
"warning": "yellow",
|
38
|
-
"error": "red",
|
39
|
-
"success": "green",
|
40
|
-
}
|
41
|
-
)
|
42
|
-
|
43
|
-
console = Console(theme=custom_theme)
|
27
|
+
logging = structlog.get_logger(module=__name__)
|
44
28
|
|
45
29
|
|
46
30
|
class FunctionInput(BaseModel):
|
@@ -68,21 +52,9 @@ class ExtractorAgent:
|
|
68
52
|
self._config_path = config_path
|
69
53
|
self._probe = RuntimeProbes()
|
70
54
|
|
71
|
-
runtime_probe: ProbeInfo = self._probe.probe()
|
72
|
-
self._require_image_bootstrap = (
|
73
|
-
True
|
74
|
-
if (runtime_probe.is_default_executor and self.name_alias is not None)
|
75
|
-
else False
|
76
|
-
)
|
77
|
-
self._executor_bootstrap_failed = False
|
78
|
-
|
79
|
-
console.print(
|
80
|
-
f"Require Bootstrap? {self._require_image_bootstrap}", style="cyan bold"
|
81
|
-
)
|
82
|
-
|
83
55
|
self.num_workers = num_workers
|
84
56
|
if config_path:
|
85
|
-
|
57
|
+
logging.info("running the extractor with TLS enabled")
|
86
58
|
self._protocol = "https"
|
87
59
|
else:
|
88
60
|
self._protocol = "http"
|
@@ -111,10 +83,8 @@ class ExtractorAgent:
|
|
111
83
|
)
|
112
84
|
|
113
85
|
async def task_completion_reporter(self):
|
114
|
-
|
86
|
+
logging.info("starting task completion reporter")
|
115
87
|
# We should copy only the keys and not the values
|
116
|
-
url = f"{self._protocol}://{self._server_addr}/write_content"
|
117
|
-
|
118
88
|
while True:
|
119
89
|
outcomes = await self._task_store.task_outcomes()
|
120
90
|
for task_outcome in outcomes:
|
@@ -129,16 +99,14 @@ class ExtractorAgent:
|
|
129
99
|
if "fail" in outcome
|
130
100
|
else f"[bold green] {outcome} [/]"
|
131
101
|
)
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
border_style="info",
|
141
|
-
)
|
102
|
+
logging.info(
|
103
|
+
"reporting_task_outcome",
|
104
|
+
task_id=task_outcome.task.id,
|
105
|
+
fn_name=task_outcome.task.compute_fn,
|
106
|
+
num_outputs=len(task_outcome.outputs or []),
|
107
|
+
router_output=task_outcome.router_output,
|
108
|
+
outcome=task_outcome.task_outcome,
|
109
|
+
retries=task_outcome.reporting_retries,
|
142
110
|
)
|
143
111
|
|
144
112
|
try:
|
@@ -146,15 +114,11 @@ class ExtractorAgent:
|
|
146
114
|
self._task_reporter.report_task_outcome(completed_task=task_outcome)
|
147
115
|
except Exception as e:
|
148
116
|
# The connection was dropped in the middle of the reporting, process, retry
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
"Retrying...",
|
155
|
-
title="Reporting Error",
|
156
|
-
border_style="error",
|
157
|
-
)
|
117
|
+
logging.error(
|
118
|
+
"failed_to_report_task",
|
119
|
+
task_id=task_outcome.task.id,
|
120
|
+
exception=f"exception: {type(e).__name__}({e})",
|
121
|
+
retries=task_outcome.reporting_retries,
|
158
122
|
)
|
159
123
|
task_outcome.reporting_retries += 1
|
160
124
|
await asyncio.sleep(5)
|
@@ -176,44 +140,6 @@ class ExtractorAgent:
|
|
176
140
|
fn: FunctionInput
|
177
141
|
for fn in fn_queue:
|
178
142
|
task: Task = self._task_store.get_task(fn.task_id)
|
179
|
-
|
180
|
-
if self._executor_bootstrap_failed:
|
181
|
-
completed_task = CompletedTask(
|
182
|
-
task=task,
|
183
|
-
outputs=[],
|
184
|
-
task_outcome="failure",
|
185
|
-
)
|
186
|
-
self._task_store.complete(outcome=completed_task)
|
187
|
-
|
188
|
-
continue
|
189
|
-
|
190
|
-
# Bootstrap this executor. Fail the task if we can't.
|
191
|
-
if self._require_image_bootstrap:
|
192
|
-
try:
|
193
|
-
image_info = await _get_image_info_for_compute_graph(
|
194
|
-
task, self._protocol, self._server_addr, self._config_path
|
195
|
-
)
|
196
|
-
image_dependency_installer.executor_image_builder(
|
197
|
-
image_info, self.name_alias, self.image_version
|
198
|
-
)
|
199
|
-
self._require_image_bootstrap = False
|
200
|
-
except Exception as e:
|
201
|
-
console.print(
|
202
|
-
Text("Failed to bootstrap the executor ", style="red bold")
|
203
|
-
+ Text(f"Exception: {traceback.format_exc()}", style="red")
|
204
|
-
)
|
205
|
-
|
206
|
-
self._executor_bootstrap_failed = True
|
207
|
-
|
208
|
-
completed_task = CompletedTask(
|
209
|
-
task=task,
|
210
|
-
outputs=[],
|
211
|
-
task_outcome="failure",
|
212
|
-
)
|
213
|
-
self._task_store.complete(outcome=completed_task)
|
214
|
-
|
215
|
-
continue
|
216
|
-
|
217
143
|
async_tasks.append(
|
218
144
|
ExtractTask(
|
219
145
|
function_worker=self._function_worker,
|
@@ -233,12 +159,9 @@ class ExtractorAgent:
|
|
233
159
|
for async_task in done:
|
234
160
|
if async_task.get_name() == "get_runnable_tasks":
|
235
161
|
if async_task.exception():
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
f"Failed to get runnable tasks: {async_task.exception()}",
|
240
|
-
style="red",
|
241
|
-
)
|
162
|
+
logging.error(
|
163
|
+
"task_launcher_error, failed to get runnable tasks",
|
164
|
+
exception=async_task.exception(),
|
242
165
|
)
|
243
166
|
continue
|
244
167
|
result: Dict[str, Task] = await async_task
|
@@ -255,12 +178,9 @@ class ExtractorAgent:
|
|
255
178
|
)
|
256
179
|
elif async_task.get_name() == "download_graph":
|
257
180
|
if async_task.exception():
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
style="red bold",
|
262
|
-
)
|
263
|
-
+ Text(f"Exception: {async_task.exception()}", style="red")
|
181
|
+
logging.error(
|
182
|
+
"task_launcher_error, failed to download graph",
|
183
|
+
exception=async_task.exception(),
|
264
184
|
)
|
265
185
|
completed_task = CompletedTask(
|
266
186
|
task=async_task.task,
|
@@ -276,12 +196,9 @@ class ExtractorAgent:
|
|
276
196
|
)
|
277
197
|
elif async_task.get_name() == "download_input":
|
278
198
|
if async_task.exception():
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
style="red bold",
|
283
|
-
)
|
284
|
-
+ Text(f"Exception: {async_task.exception()}", style="red")
|
199
|
+
logging.error(
|
200
|
+
"task_launcher_error, failed to download input",
|
201
|
+
exception=str(async_task.exception()),
|
285
202
|
)
|
286
203
|
completed_task = CompletedTask(
|
287
204
|
task=async_task.task,
|
@@ -334,12 +251,10 @@ class ExtractorAgent:
|
|
334
251
|
self._task_store.retriable_failure(async_task.task.id)
|
335
252
|
continue
|
336
253
|
except Exception as e:
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
)
|
342
|
-
+ Text(f"Exception: {e}", style="red")
|
254
|
+
logging.error(
|
255
|
+
"failed to execute task",
|
256
|
+
task_id=async_task.task.id,
|
257
|
+
exception=str(e),
|
343
258
|
)
|
344
259
|
completed_task = CompletedTask(
|
345
260
|
task=async_task.task,
|
@@ -360,12 +275,6 @@ class ExtractorAgent:
|
|
360
275
|
self._should_run = True
|
361
276
|
while self._should_run:
|
362
277
|
url = f"{self._protocol}://{self._server_addr}/internal/executors/{self._executor_id}/tasks"
|
363
|
-
print(f"calling url: {url}")
|
364
|
-
|
365
|
-
def to_sentence_case(snake_str):
|
366
|
-
words = snake_str.split("_")
|
367
|
-
return words[0].capitalize() + "" + " ".join(words[1:])
|
368
|
-
|
369
278
|
runtime_probe: ProbeInfo = self._probe.probe()
|
370
279
|
|
371
280
|
executor_version = version("indexify")
|
@@ -390,17 +299,7 @@ class ExtractorAgent:
|
|
390
299
|
image_version=image_version,
|
391
300
|
labels=runtime_probe.labels,
|
392
301
|
).model_dump()
|
393
|
-
|
394
|
-
panel_content = "\n".join(
|
395
|
-
[f"{to_sentence_case(key)}: {value}" for key, value in data.items()]
|
396
|
-
)
|
397
|
-
console.print(
|
398
|
-
Panel(
|
399
|
-
panel_content,
|
400
|
-
title="attempting to Register Executor",
|
401
|
-
border_style="cyan",
|
402
|
-
)
|
403
|
-
)
|
302
|
+
logging.info("registering_executor", executor_id=self._executor_id)
|
404
303
|
try:
|
405
304
|
async with get_httpx_client(self._config_path, True) as client:
|
406
305
|
async with aconnect_sse(
|
@@ -411,12 +310,17 @@ class ExtractorAgent:
|
|
411
310
|
headers={"Content-Type": "application/json"},
|
412
311
|
) as event_source:
|
413
312
|
if not event_source.response.is_success:
|
414
|
-
resp = await event_source.response
|
415
|
-
|
313
|
+
resp = await event_source.response
|
314
|
+
resp_content = resp.aread()
|
315
|
+
logging.error(
|
316
|
+
f"failed to register",
|
317
|
+
resp=str(resp_content),
|
318
|
+
status_code=event_source.response.status_code,
|
319
|
+
)
|
416
320
|
await asyncio.sleep(5)
|
417
321
|
continue
|
418
|
-
|
419
|
-
|
322
|
+
logging.info(
|
323
|
+
"executor_registered", executor_id=self._executor_id
|
420
324
|
)
|
421
325
|
async for sse in event_source.aiter_sse():
|
422
326
|
data = json.loads(sse.data)
|
@@ -427,15 +331,12 @@ class ExtractorAgent:
|
|
427
331
|
)
|
428
332
|
self._task_store.add_tasks(tasks)
|
429
333
|
except Exception as e:
|
430
|
-
|
431
|
-
Text("registration Error: ", style="red bold")
|
432
|
-
+ Text(f"failed to register: {e}", style="red")
|
433
|
-
)
|
334
|
+
logging.error(f"failed to register: {e}")
|
434
335
|
await asyncio.sleep(5)
|
435
336
|
continue
|
436
337
|
|
437
338
|
async def _shutdown(self, loop):
|
438
|
-
|
339
|
+
logging.info("shutting_down")
|
439
340
|
self._should_run = False
|
440
341
|
for task in asyncio.all_tasks(loop):
|
441
342
|
task.cancel()
|
@@ -443,27 +344,3 @@ class ExtractorAgent:
|
|
443
344
|
def shutdown(self, loop):
|
444
345
|
self._function_worker.shutdown()
|
445
346
|
loop.create_task(self._shutdown(loop))
|
446
|
-
|
447
|
-
|
448
|
-
async def _get_image_info_for_compute_graph(
|
449
|
-
task: Task, protocol, server_addr, config_path: str
|
450
|
-
) -> ImageInformation:
|
451
|
-
namespace = task.namespace
|
452
|
-
graph_name: str = task.compute_graph
|
453
|
-
compute_fn_name: str = task.compute_fn
|
454
|
-
|
455
|
-
http_client = IndexifyClient(
|
456
|
-
service_url=f"{protocol}://{server_addr}",
|
457
|
-
namespace=namespace,
|
458
|
-
config_path=config_path,
|
459
|
-
)
|
460
|
-
compute_graph: ComputeGraphMetadata = http_client.graph(graph_name)
|
461
|
-
|
462
|
-
console.print(
|
463
|
-
Text(
|
464
|
-
f"Compute_fn name {compute_fn_name}, ComputeGraph {compute_graph} \n",
|
465
|
-
style="red yellow",
|
466
|
-
)
|
467
|
-
)
|
468
|
-
|
469
|
-
return compute_graph.nodes[compute_fn_name].compute_fn.image_information
|
indexify/executor/downloader.py
CHANGED
@@ -2,10 +2,8 @@ import os
|
|
2
2
|
from typing import Optional
|
3
3
|
|
4
4
|
import httpx
|
5
|
+
import structlog
|
5
6
|
from pydantic import BaseModel
|
6
|
-
from rich.console import Console
|
7
|
-
from rich.panel import Panel
|
8
|
-
from rich.theme import Theme
|
9
7
|
|
10
8
|
from indexify.functions_sdk.data_objects import IndexifyData
|
11
9
|
|
@@ -13,15 +11,7 @@ from ..common_util import get_httpx_client
|
|
13
11
|
from ..functions_sdk.object_serializer import JsonSerializer, get_serializer
|
14
12
|
from .api_objects import Task
|
15
13
|
|
16
|
-
|
17
|
-
{
|
18
|
-
"info": "cyan",
|
19
|
-
"warning": "yellow",
|
20
|
-
"error": "red",
|
21
|
-
}
|
22
|
-
)
|
23
|
-
|
24
|
-
console = Console(theme=custom_theme)
|
14
|
+
logger = structlog.get_logger(module=__name__)
|
25
15
|
|
26
16
|
|
27
17
|
class DownloadedInputs(BaseModel):
|
@@ -42,26 +32,21 @@ class Downloader:
|
|
42
32
|
if os.path.exists(path):
|
43
33
|
return path
|
44
34
|
|
45
|
-
|
46
|
-
|
47
|
-
f"Downloading graph: {name}\nPath: {path}",
|
48
|
-
title="downloader",
|
49
|
-
border_style="cyan",
|
50
|
-
)
|
35
|
+
logger.info(
|
36
|
+
"downloading graph", namespace=namespace, name=name, version=version
|
51
37
|
)
|
52
|
-
|
53
38
|
response = self._client.get(
|
54
39
|
f"{self.base_url}/internal/namespaces/{namespace}/compute_graphs/{name}/code"
|
55
40
|
)
|
56
41
|
try:
|
57
42
|
response.raise_for_status()
|
58
43
|
except httpx.HTTPStatusError as e:
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
44
|
+
logger.error(
|
45
|
+
"failed to download graph",
|
46
|
+
namespace=namespace,
|
47
|
+
name=name,
|
48
|
+
version=version,
|
49
|
+
error=response.text,
|
65
50
|
)
|
66
51
|
raise
|
67
52
|
|
@@ -81,25 +66,17 @@ class Downloader:
|
|
81
66
|
if task.reducer_output_id:
|
82
67
|
reducer_url = f"{self.base_url}/namespaces/{task.namespace}/compute_graphs/{task.compute_graph}/invocations/{task.invocation_id}/fn/{task.compute_fn}/output/{task.reducer_output_id}"
|
83
68
|
|
84
|
-
|
85
|
-
Panel(
|
86
|
-
f"downloading input\nURL: {url} \n reducer input URL: {reducer_url}",
|
87
|
-
title="downloader",
|
88
|
-
border_style="cyan",
|
89
|
-
)
|
90
|
-
)
|
91
|
-
|
69
|
+
logger.info("downloading input", url=url, reducer_url=reducer_url)
|
92
70
|
response = self._client.get(url)
|
93
71
|
|
94
72
|
try:
|
95
73
|
response.raise_for_status()
|
96
74
|
except httpx.HTTPStatusError as e:
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
)
|
75
|
+
logger.error(
|
76
|
+
"failed to download input",
|
77
|
+
url=url,
|
78
|
+
reducer_url=reducer_url,
|
79
|
+
error=response.text,
|
103
80
|
)
|
104
81
|
raise
|
105
82
|
|
@@ -108,8 +85,6 @@ class Downloader:
|
|
108
85
|
if response.headers["content-type"] == JsonSerializer.content_type
|
109
86
|
else "cloudpickle"
|
110
87
|
)
|
111
|
-
serializer = get_serializer(encoder)
|
112
|
-
|
113
88
|
if task.invocation_id == input_id:
|
114
89
|
return DownloadedInputs(
|
115
90
|
input=IndexifyData(
|
@@ -117,26 +92,24 @@ class Downloader:
|
|
117
92
|
),
|
118
93
|
)
|
119
94
|
|
120
|
-
|
95
|
+
input_payload = response.content
|
121
96
|
|
122
97
|
if reducer_url:
|
123
|
-
|
98
|
+
response = self._client.get(reducer_url)
|
124
99
|
try:
|
125
|
-
|
100
|
+
response.raise_for_status()
|
101
|
+
init_value = response.content
|
126
102
|
except httpx.HTTPStatusError as e:
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
border_style="error",
|
132
|
-
)
|
103
|
+
logger.error(
|
104
|
+
"failed to download reducer output",
|
105
|
+
url=reducer_url,
|
106
|
+
error=response.text,
|
133
107
|
)
|
134
108
|
raise
|
135
|
-
init_value = serializer.deserialize(init_value.content)
|
136
109
|
return DownloadedInputs(
|
137
110
|
input=IndexifyData(
|
138
111
|
input_id=task.invocation_id,
|
139
|
-
payload=
|
112
|
+
payload=input_payload,
|
140
113
|
encoder=encoder,
|
141
114
|
),
|
142
115
|
init_value=IndexifyData(
|
@@ -147,7 +120,7 @@ class Downloader:
|
|
147
120
|
return DownloadedInputs(
|
148
121
|
input=IndexifyData(
|
149
122
|
input_id=task.invocation_id,
|
150
|
-
payload=
|
123
|
+
payload=input_payload,
|
151
124
|
encoder=encoder,
|
152
125
|
)
|
153
126
|
)
|
@@ -1,10 +1,9 @@
|
|
1
|
-
import io
|
2
1
|
from typing import Optional
|
3
2
|
|
4
3
|
import nanoid
|
4
|
+
import structlog
|
5
5
|
from httpx import Timeout
|
6
6
|
from pydantic import BaseModel
|
7
|
-
from rich import print
|
8
7
|
|
9
8
|
from indexify.common_util import get_httpx_client
|
10
9
|
from indexify.executor.api_objects import RouterOutput as ApiRouterOutput
|
@@ -12,6 +11,8 @@ from indexify.executor.api_objects import TaskResult
|
|
12
11
|
from indexify.executor.task_store import CompletedTask
|
13
12
|
from indexify.functions_sdk.object_serializer import get_serializer
|
14
13
|
|
14
|
+
logger = structlog.get_logger(__name__)
|
15
|
+
|
15
16
|
|
16
17
|
# https://github.com/psf/requests/issues/1081#issuecomment-428504128
|
17
18
|
class ForceMultipartDict(dict):
|
@@ -46,15 +47,14 @@ class TaskReporter:
|
|
46
47
|
fn_outputs = []
|
47
48
|
for output in completed_task.outputs or []:
|
48
49
|
serializer = get_serializer(output.encoder)
|
49
|
-
serialized_output = serializer.serialize(output.payload)
|
50
50
|
fn_outputs.append(
|
51
51
|
(
|
52
52
|
"node_outputs",
|
53
|
-
(nanoid.generate(),
|
53
|
+
(nanoid.generate(), output.payload, serializer.content_type),
|
54
54
|
)
|
55
55
|
)
|
56
56
|
report.output_count += 1
|
57
|
-
report.output_total_bytes += len(
|
57
|
+
report.output_total_bytes += len(output.payload)
|
58
58
|
|
59
59
|
if completed_task.stdout:
|
60
60
|
fn_outputs.append(
|
@@ -109,14 +109,17 @@ class TaskReporter:
|
|
109
109
|
+ report.stderr_total_bytes
|
110
110
|
)
|
111
111
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
112
|
+
logger.info(
|
113
|
+
"reporting task outcome",
|
114
|
+
task_id=completed_task.task.id,
|
115
|
+
retries=completed_task.reporting_retries,
|
116
|
+
total_bytes=total_bytes,
|
117
|
+
total_files=report.output_count + report.stdout_count + report.stderr_count,
|
118
|
+
output_files=report.output_count,
|
119
|
+
output_bytes=total_bytes,
|
120
|
+
stdout_bytes=report.stdout_total_bytes,
|
121
|
+
stderr_bytes=report.stderr_total_bytes,
|
118
122
|
)
|
119
|
-
|
120
123
|
#
|
121
124
|
kwargs = {
|
122
125
|
"data": {"task_result": task_result_data},
|
@@ -137,15 +140,23 @@ class TaskReporter:
|
|
137
140
|
**kwargs,
|
138
141
|
)
|
139
142
|
except Exception as e:
|
140
|
-
|
141
|
-
|
143
|
+
logger.error(
|
144
|
+
"failed to report task outcome",
|
145
|
+
task_id=completed_task.task.id,
|
146
|
+
retries=completed_task.reporting_retries,
|
147
|
+
error=type(e).__name__,
|
148
|
+
message=str(e),
|
142
149
|
)
|
143
150
|
raise e
|
144
151
|
|
145
152
|
try:
|
146
153
|
response.raise_for_status()
|
147
154
|
except Exception as e:
|
148
|
-
|
149
|
-
|
155
|
+
logger.error(
|
156
|
+
"failed to report task outcome",
|
157
|
+
task_id=completed_task.task.id,
|
158
|
+
retries=completed_task.reporting_retries,
|
159
|
+
status_code=response.status_code,
|
160
|
+
response_text=response.text,
|
150
161
|
)
|
151
162
|
raise e
|
indexify/functions_sdk/graph.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
import json
|
1
2
|
import sys
|
2
3
|
from collections import defaultdict
|
3
4
|
from queue import deque
|
@@ -101,9 +102,7 @@ class Graph:
|
|
101
102
|
return self
|
102
103
|
|
103
104
|
if issubclass(indexify_fn, IndexifyFunction) and indexify_fn.accumulate:
|
104
|
-
self.accumulator_zero_values[indexify_fn.name] = (
|
105
|
-
indexify_fn.accumulate().model_dump()
|
106
|
-
)
|
105
|
+
self.accumulator_zero_values[indexify_fn.name] = indexify_fn.accumulate()
|
107
106
|
|
108
107
|
self.nodes[indexify_fn.name] = indexify_fn
|
109
108
|
return self
|
@@ -167,7 +166,8 @@ class Graph:
|
|
167
166
|
reducer=is_reducer,
|
168
167
|
image_name=start_node.image._image_name,
|
169
168
|
image_information=start_node.image.to_image_information(),
|
170
|
-
|
169
|
+
input_encoder=start_node.input_encoder,
|
170
|
+
output_encoder=start_node.output_encoder,
|
171
171
|
)
|
172
172
|
metadata_edges = self.edges.copy()
|
173
173
|
metadata_nodes = {}
|
@@ -179,7 +179,8 @@ class Graph:
|
|
179
179
|
description=node.description or "",
|
180
180
|
source_fn=node_name,
|
181
181
|
target_fns=self.routers[node_name],
|
182
|
-
|
182
|
+
input_encoder=node.input_encoder,
|
183
|
+
output_encoder=node.output_encoder,
|
183
184
|
image_name=node.image._image_name,
|
184
185
|
image_information=node.image.to_image_information(),
|
185
186
|
)
|
@@ -193,7 +194,8 @@ class Graph:
|
|
193
194
|
reducer=node.accumulate is not None,
|
194
195
|
image_name=node.image._image_name,
|
195
196
|
image_information=node.image.to_image_information(),
|
196
|
-
|
197
|
+
input_encoder=node.input_encoder,
|
198
|
+
output_encoder=node.output_encoder,
|
197
199
|
)
|
198
200
|
)
|
199
201
|
|
@@ -212,19 +214,19 @@ class Graph:
|
|
212
214
|
def run(self, block_until_done: bool = False, **kwargs) -> str:
|
213
215
|
self.validate_graph()
|
214
216
|
start_node = self.nodes[self._start_node]
|
215
|
-
serializer = get_serializer(start_node.
|
217
|
+
serializer = get_serializer(start_node.input_encoder)
|
216
218
|
input = IndexifyData(
|
217
219
|
id=generate(),
|
218
220
|
payload=serializer.serialize(kwargs),
|
219
|
-
encoder=start_node.
|
221
|
+
encoder=start_node.input_encoder,
|
220
222
|
)
|
221
223
|
print(f"[bold] Invoking {self._start_node}[/bold]")
|
222
224
|
outputs = defaultdict(list)
|
223
225
|
for k, v in self.accumulator_zero_values.items():
|
224
226
|
node = self.nodes[k]
|
225
|
-
serializer = get_serializer(node.
|
227
|
+
serializer = get_serializer(node.input_encoder)
|
226
228
|
self._accumulator_values[k] = IndexifyData(
|
227
|
-
payload=serializer.serialize(v), encoder=node.
|
229
|
+
payload=serializer.serialize(v), encoder=node.input_encoder
|
228
230
|
)
|
229
231
|
self._results[input.id] = outputs
|
230
232
|
ctx = GraphInvocationContext(
|
@@ -287,7 +289,8 @@ class Graph:
|
|
287
289
|
fn_outputs = function_outputs.ser_outputs
|
288
290
|
print(f"ran {node_name}: num outputs: {len(fn_outputs)}")
|
289
291
|
if self._accumulator_values.get(node_name, None) is not None:
|
290
|
-
|
292
|
+
acc_output = fn_outputs[-1].copy()
|
293
|
+
self._accumulator_values[node_name] = acc_output
|
291
294
|
outputs[node_name] = []
|
292
295
|
if fn_outputs:
|
293
296
|
outputs[node_name].extend(fn_outputs)
|
@@ -339,7 +342,7 @@ class Graph:
|
|
339
342
|
raise ValueError(f"no results found for fn {fn_name} on graph {self.name}")
|
340
343
|
fn = self.nodes[fn_name]
|
341
344
|
fn_model = self.get_function(fn_name).get_output_model()
|
342
|
-
serializer = get_serializer(fn.
|
345
|
+
serializer = get_serializer(fn.output_encoder)
|
343
346
|
outputs = []
|
344
347
|
for result in results[fn_name]:
|
345
348
|
payload_dict = serializer.deserialize(result.payload)
|
@@ -14,7 +14,8 @@ class FunctionMetadata(BaseModel):
|
|
14
14
|
reducer: bool = False
|
15
15
|
image_name: str
|
16
16
|
image_information: ImageInformation
|
17
|
-
|
17
|
+
input_encoder: str = "cloudpickle"
|
18
|
+
output_encoder: str = "cloudpickle"
|
18
19
|
|
19
20
|
|
20
21
|
class RouterMetadata(BaseModel):
|
@@ -24,7 +25,8 @@ class RouterMetadata(BaseModel):
|
|
24
25
|
target_fns: List[str]
|
25
26
|
image_name: str
|
26
27
|
image_information: ImageInformation
|
27
|
-
|
28
|
+
input_encoder: str = "cloudpickle"
|
29
|
+
output_encoder: str = "cloudpickle"
|
28
30
|
|
29
31
|
|
30
32
|
class NodeMetadata(BaseModel):
|
@@ -49,12 +51,12 @@ class ComputeGraphMetadata(BaseModel):
|
|
49
51
|
replaying: bool = False
|
50
52
|
|
51
53
|
def get_input_payload_serializer(self):
|
52
|
-
return get_serializer(self.start_node.compute_fn.
|
54
|
+
return get_serializer(self.start_node.compute_fn.input_encoder)
|
53
55
|
|
54
56
|
def get_input_encoder(self) -> str:
|
55
57
|
if self.start_node.compute_fn:
|
56
|
-
return self.start_node.compute_fn.
|
58
|
+
return self.start_node.compute_fn.input_encoder
|
57
59
|
elif self.start_node.dynamic_router:
|
58
|
-
return self.start_node.dynamic_router.
|
60
|
+
return self.start_node.dynamic_router.input_encoder
|
59
61
|
|
60
62
|
raise ValueError("start node is not set on the graph")
|
@@ -83,7 +83,8 @@ class IndexifyFunction:
|
|
83
83
|
image: Optional[Image] = DEFAULT_IMAGE_3_10
|
84
84
|
placement_constraints: List[PlacementConstraints] = []
|
85
85
|
accumulate: Optional[Type[Any]] = None
|
86
|
-
|
86
|
+
input_encoder: Optional[str] = "cloudpickle"
|
87
|
+
output_encoder: Optional[str] = "cloudpickle"
|
87
88
|
|
88
89
|
def run(self, *args, **kwargs) -> Union[List[Any], Any]:
|
89
90
|
pass
|
@@ -95,7 +96,7 @@ class IndexifyFunction:
|
|
95
96
|
|
96
97
|
@classmethod
|
97
98
|
def deserialize_output(cls, output: IndexifyData) -> Any:
|
98
|
-
serializer = get_serializer(cls.
|
99
|
+
serializer = get_serializer(cls.output_encoder)
|
99
100
|
return serializer.deserialize(output.payload)
|
100
101
|
|
101
102
|
|
@@ -104,7 +105,8 @@ class IndexifyRouter:
|
|
104
105
|
description: str = ""
|
105
106
|
image: Optional[Image] = DEFAULT_IMAGE_3_10
|
106
107
|
placement_constraints: List[PlacementConstraints] = []
|
107
|
-
|
108
|
+
input_encoder: Optional[str] = "cloudpickle"
|
109
|
+
output_encoder: Optional[str] = "cloudpickle"
|
108
110
|
|
109
111
|
def run(self, *args, **kwargs) -> Optional[List[IndexifyFunction]]:
|
110
112
|
pass
|
@@ -120,7 +122,8 @@ def indexify_router(
|
|
120
122
|
description: Optional[str] = "",
|
121
123
|
image: Optional[Image] = DEFAULT_IMAGE_3_10,
|
122
124
|
placement_constraints: List[PlacementConstraints] = [],
|
123
|
-
|
125
|
+
input_encoder: Optional[str] = "cloudpickle",
|
126
|
+
output_encoder: Optional[str] = "cloudpickle",
|
124
127
|
):
|
125
128
|
def construct(fn):
|
126
129
|
# Get function signature using inspect.signature
|
@@ -144,7 +147,8 @@ def indexify_router(
|
|
144
147
|
),
|
145
148
|
"image": image,
|
146
149
|
"placement_constraints": placement_constraints,
|
147
|
-
"
|
150
|
+
"input_encoder": input_encoder,
|
151
|
+
"output_encoder": output_encoder,
|
148
152
|
"run": run,
|
149
153
|
}
|
150
154
|
|
@@ -158,7 +162,8 @@ def indexify_function(
|
|
158
162
|
description: Optional[str] = "",
|
159
163
|
image: Optional[Image] = DEFAULT_IMAGE_3_10,
|
160
164
|
accumulate: Optional[Type[BaseModel]] = None,
|
161
|
-
|
165
|
+
input_encoder: Optional[str] = "cloudpickle",
|
166
|
+
output_encoder: Optional[str] = "cloudpickle",
|
162
167
|
placement_constraints: List[PlacementConstraints] = [],
|
163
168
|
):
|
164
169
|
def construct(fn):
|
@@ -184,7 +189,8 @@ def indexify_function(
|
|
184
189
|
"image": image,
|
185
190
|
"placement_constraints": placement_constraints,
|
186
191
|
"accumulate": accumulate,
|
187
|
-
"
|
192
|
+
"input_encoder": input_encoder,
|
193
|
+
"output_encoder": output_encoder,
|
188
194
|
"run": run,
|
189
195
|
}
|
190
196
|
|
@@ -231,6 +237,18 @@ class IndexifyFunctionWrapper:
|
|
231
237
|
)
|
232
238
|
return return_type
|
233
239
|
|
240
|
+
def get_input_types(self) -> Dict[str, Any]:
|
241
|
+
if not isinstance(self.indexify_function, IndexifyFunction):
|
242
|
+
raise TypeError("Input must be an instance of IndexifyFunction")
|
243
|
+
|
244
|
+
extract_method = self.indexify_function.run
|
245
|
+
type_hints = get_type_hints(extract_method)
|
246
|
+
return {
|
247
|
+
k: v
|
248
|
+
for k, v in type_hints.items()
|
249
|
+
if k != "return" and not is_pydantic_model_from_annotation(v)
|
250
|
+
}
|
251
|
+
|
234
252
|
def run_router(
|
235
253
|
self, input: Union[Dict, Type[BaseModel]]
|
236
254
|
) -> Tuple[List[str], Optional[str]]:
|
@@ -280,20 +298,17 @@ class IndexifyFunctionWrapper:
|
|
280
298
|
self, name: str, input: IndexifyData, acc: Optional[Any] = None
|
281
299
|
) -> FunctionCallResult:
|
282
300
|
input = self.deserialize_input(name, input)
|
283
|
-
|
301
|
+
input_serializer = get_serializer(self.indexify_function.input_encoder)
|
302
|
+
output_serializer = get_serializer(self.indexify_function.output_encoder)
|
284
303
|
if acc is not None:
|
285
|
-
acc =
|
286
|
-
serializer.deserialize(acc.payload)
|
287
|
-
)
|
304
|
+
acc = input_serializer.deserialize(acc.payload)
|
288
305
|
if acc is None and self.indexify_function.accumulate is not None:
|
289
|
-
acc = self.indexify_function.accumulate
|
290
|
-
self.indexify_function.accumulate()
|
291
|
-
)
|
306
|
+
acc = self.indexify_function.accumulate()
|
292
307
|
outputs, err = self.run_fn(input, acc=acc)
|
293
308
|
ser_outputs = [
|
294
309
|
IndexifyData(
|
295
|
-
payload=
|
296
|
-
encoder=self.indexify_function.
|
310
|
+
payload=output_serializer.serialize(output),
|
311
|
+
encoder=self.indexify_function.output_encoder,
|
297
312
|
)
|
298
313
|
for output in outputs
|
299
314
|
]
|
@@ -1,7 +1,7 @@
|
|
1
|
-
|
1
|
+
import json
|
2
|
+
from typing import Any, List, Type
|
2
3
|
|
3
4
|
import cloudpickle
|
4
|
-
import jsonpickle
|
5
5
|
|
6
6
|
|
7
7
|
def get_serializer(serializer_type: str) -> Any:
|
@@ -22,19 +22,29 @@ class JsonSerializer:
|
|
22
22
|
|
23
23
|
@staticmethod
|
24
24
|
def serialize(data: Any) -> str:
|
25
|
-
|
25
|
+
try:
|
26
|
+
return json.dumps(data)
|
27
|
+
except Exception as e:
|
28
|
+
raise ValueError(f"failed to serialize data with json: {e}")
|
26
29
|
|
27
30
|
@staticmethod
|
28
31
|
def deserialize(data: str) -> Any:
|
29
|
-
|
32
|
+
try:
|
33
|
+
if isinstance(data, bytes):
|
34
|
+
data = data.decode("utf-8")
|
35
|
+
return json.loads(data)
|
36
|
+
except Exception as e:
|
37
|
+
raise ValueError(f"failed to deserialize data with json: {e}")
|
30
38
|
|
31
39
|
@staticmethod
|
32
40
|
def serialize_list(data: List[Any]) -> str:
|
33
|
-
return
|
41
|
+
return json.dumps(data)
|
34
42
|
|
35
43
|
@staticmethod
|
36
|
-
def deserialize_list(data: str) -> List[Any]:
|
37
|
-
|
44
|
+
def deserialize_list(data: str, t: Type) -> List[Any]:
|
45
|
+
if isinstance(data, bytes):
|
46
|
+
data = data.decode("utf-8")
|
47
|
+
return json.loads(data)
|
38
48
|
|
39
49
|
|
40
50
|
class CloudPickleSerializer:
|
indexify/http_client.py
CHANGED
@@ -274,10 +274,10 @@ class IndexifyClient:
|
|
274
274
|
self,
|
275
275
|
graph: str,
|
276
276
|
block_until_done: bool = False,
|
277
|
-
|
277
|
+
input_encoding: str = "cloudpickle",
|
278
278
|
**kwargs,
|
279
279
|
) -> str:
|
280
|
-
serializer = get_serializer(
|
280
|
+
serializer = get_serializer(input_encoding)
|
281
281
|
ser_input = serializer.serialize(kwargs)
|
282
282
|
params = {"block_until_finish": block_until_done}
|
283
283
|
kwargs = {
|
@@ -351,11 +351,11 @@ class IndexifyClient:
|
|
351
351
|
)
|
352
352
|
response.raise_for_status()
|
353
353
|
content_type = response.headers.get("Content-Type")
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
)
|
354
|
+
if content_type == "application/octet-stream":
|
355
|
+
encoding = "cloudpickle"
|
356
|
+
else:
|
357
|
+
encoding = "json"
|
358
|
+
return IndexifyData(id=output_id, payload=response.content, encoder=encoding)
|
359
359
|
|
360
360
|
def graph_outputs(
|
361
361
|
self,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: indexify
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.34
|
4
4
|
Summary: Python Client for Indexify
|
5
5
|
Home-page: https://github.com/tensorlakeai/indexify
|
6
6
|
License: Apache 2.0
|
@@ -18,11 +18,11 @@ Requires-Dist: cloudpickle (>=3.1.0,<4.0.0)
|
|
18
18
|
Requires-Dist: docker (>=7.1.0,<8.0.0)
|
19
19
|
Requires-Dist: httpx-sse (>=0.4.0,<0.5.0)
|
20
20
|
Requires-Dist: httpx[http2] (>=0,<1)
|
21
|
-
Requires-Dist: jsonpickle (>=4.0.0,<5.0.0)
|
22
21
|
Requires-Dist: nanoid (>=2.0.0,<3.0.0)
|
23
22
|
Requires-Dist: pydantic (==2.10.2)
|
24
23
|
Requires-Dist: pyyaml (>=6,<7)
|
25
24
|
Requires-Dist: rich (>=13.9.2,<14.0.0)
|
25
|
+
Requires-Dist: structlog (>=24.4.0,<25.0.0)
|
26
26
|
Requires-Dist: typer (>=0.13.0,<0.14.0)
|
27
27
|
Project-URL: Repository, https://github.com/tensorlakeai/indexify
|
28
28
|
Description-Content-Type: text/markdown
|
@@ -5,31 +5,31 @@ indexify/data_loaders/__init__.py,sha256=Y5NEuseTcYAICRiweYw5wBQ2m2YplbsY21I7df-
|
|
5
5
|
indexify/data_loaders/local_directory_loader.py,sha256=fCrgj5drnW71ZUdDDvcB1-VJjIs1w6Q8sEW0HSGSAiA,1247
|
6
6
|
indexify/data_loaders/url_loader.py,sha256=32SERljcq1Xsi4RdLz2dgyk2TER5pQPTtXl3gUzwHbY,1533
|
7
7
|
indexify/error.py,sha256=qAWr8R6AxPkjsxHSzXTc8zqYnNO_AjOqqYEPsQvF1Zs,238
|
8
|
-
indexify/executor/agent.py,sha256=
|
8
|
+
indexify/executor/agent.py,sha256=FxKEoabt9b3YR7cl26uEcGtOuusTJyCQjoA7zOy2aX8,14100
|
9
9
|
indexify/executor/api_objects.py,sha256=mvmwGbK4paJNQGFvbtNHMPpiH_LpVhrlRnCcrqS6HOQ,859
|
10
|
-
indexify/executor/downloader.py,sha256=
|
10
|
+
indexify/executor/downloader.py,sha256=dHLxoBnX8-Bh4yZtFDYptZNF6rlVtmTk_70JK8Ect5w,4184
|
11
11
|
indexify/executor/executor_tasks.py,sha256=A0UIEZ5VaB6zSkFQG81UmTW0E57MTYhGlaXuAbRV8lQ,1884
|
12
12
|
indexify/executor/function_worker.py,sha256=wRW2-X9dNI80KhwTD1vD-pcyetsVKVs6vVdg7L7JjcQ,6462
|
13
13
|
indexify/executor/image_dependency_installer.py,sha256=ct8GmzgkaPi6NAblk68IJJWo5MecIUubELotmSrgoRQ,1759
|
14
14
|
indexify/executor/indexify_executor.py,sha256=2Ut_VX-Su_lm4b4aEROyRJ3gXx-uFHA-V7EN0sWiARE,771
|
15
15
|
indexify/executor/runtime_probes.py,sha256=mjw2_mGQ622wRT_39WPGGgPEZQTgtrf3-ICcUUZOeyg,2126
|
16
|
-
indexify/executor/task_reporter.py,sha256=
|
16
|
+
indexify/executor/task_reporter.py,sha256=XlEhNf_ScNnzG67zbtVwL7_9Bo8MvPZiHLI5UHymUnM,5305
|
17
17
|
indexify/executor/task_store.py,sha256=JlRlWwAm4YjFRkTNRx-6GsUcmOzcyvzb5Csa5XDpRTI,3982
|
18
18
|
indexify/functions_sdk/data_objects.py,sha256=wXbUa9hjU6rsXmmk19vQ5Kixf3FsI59VBWPNmHasAX0,854
|
19
|
-
indexify/functions_sdk/graph.py,sha256=
|
20
|
-
indexify/functions_sdk/graph_definition.py,sha256=
|
19
|
+
indexify/functions_sdk/graph.py,sha256=pca6LKbPAfYT-BMGTVL5sbPuuOhvvr_Yve6u3H0NWVk,13126
|
20
|
+
indexify/functions_sdk/graph_definition.py,sha256=rJmGcy9u5A_Sme6Ol33NsCnSKQVjyUfeN9LnH3bU88Y,1732
|
21
21
|
indexify/functions_sdk/graph_validation.py,sha256=mN2Fcp91GIwFZEQP6z_qGqt4LkLM70SnI7AWBi4CmKQ,2509
|
22
22
|
indexify/functions_sdk/image.py,sha256=QK0H6KxLWriB_z4M0kunKzzHdHxYLWL670RPYgYuf_8,1762
|
23
|
-
indexify/functions_sdk/indexify_functions.py,sha256=
|
23
|
+
indexify/functions_sdk/indexify_functions.py,sha256=J-etsuC_IGNCsUfbsJMrolyZVHZsurIMC5IaMQGiZnM,11187
|
24
24
|
indexify/functions_sdk/local_cache.py,sha256=cNWF67zbhbTJe3g86hyLBy3Rqzs6dNvp2SjLazGZWvw,1348
|
25
|
-
indexify/functions_sdk/object_serializer.py,sha256=
|
25
|
+
indexify/functions_sdk/object_serializer.py,sha256=R58ALsl2Lb87ii6km4D6hBBsqRs_CHNISxhUICE2d9o,1931
|
26
26
|
indexify/functions_sdk/pipeline.py,sha256=KmxZE8eBFAQ4bbEcYURXXR26HSyoAT3O6iu9H38-OXE,974
|
27
|
-
indexify/http_client.py,sha256=
|
27
|
+
indexify/http_client.py,sha256=iLafZagCFnlTS6uHfOjInogjg0uXW_zXEspIN7ttB5I,15903
|
28
28
|
indexify/remote_graph.py,sha256=aox9NibZIU8YDiP92syerFSjiZc2jVPkF6g2kfqOvCA,5003
|
29
29
|
indexify/remote_pipeline.py,sha256=oqx57rSPszNS3DToXO_nf-CKqkCZWptm1u_p3orV_gQ,790
|
30
30
|
indexify/settings.py,sha256=Ny59mzYI4gbXoK8hjx66a_men6ndbd1J1zCTcKOoyzg,50
|
31
|
-
indexify-0.2.
|
32
|
-
indexify-0.2.
|
33
|
-
indexify-0.2.
|
34
|
-
indexify-0.2.
|
35
|
-
indexify-0.2.
|
31
|
+
indexify-0.2.34.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
32
|
+
indexify-0.2.34.dist-info/METADATA,sha256=GMGPVYUyKI8b02nA7b5xkG7d8gvyyqlxEOw75uCTkYI,6197
|
33
|
+
indexify-0.2.34.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
34
|
+
indexify-0.2.34.dist-info/entry_points.txt,sha256=Pih7WV-XMpAzI5dEvROcpLr-ybVhd9Y-AtuzBKUdcDs,49
|
35
|
+
indexify-0.2.34.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|