indexify 0.0.43__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- indexify/__init__.py +15 -14
- indexify/base_client.py +48 -21
- indexify/cli.py +247 -0
- indexify/client.py +18 -790
- indexify/error.py +3 -30
- indexify/executor/agent.py +364 -0
- indexify/executor/api_objects.py +43 -0
- indexify/executor/downloader.py +124 -0
- indexify/executor/executor_tasks.py +72 -0
- indexify/executor/function_worker.py +177 -0
- indexify/executor/indexify_executor.py +32 -0
- indexify/executor/runtime_probes.py +48 -0
- indexify/executor/task_reporter.py +110 -0
- indexify/executor/task_store.py +113 -0
- indexify/foo +72 -0
- indexify/functions_sdk/data_objects.py +37 -0
- indexify/functions_sdk/graph.py +281 -0
- indexify/functions_sdk/graph_validation.py +66 -0
- indexify/functions_sdk/image.py +34 -0
- indexify/functions_sdk/indexify_functions.py +188 -0
- indexify/functions_sdk/local_cache.py +46 -0
- indexify/functions_sdk/object_serializer.py +60 -0
- indexify/local_client.py +183 -0
- indexify/remote_client.py +319 -0
- indexify-0.2.1.dist-info/METADATA +151 -0
- indexify-0.2.1.dist-info/RECORD +33 -0
- indexify-0.2.1.dist-info/entry_points.txt +3 -0
- indexify/exceptions.py +0 -3
- indexify/extraction_policy.py +0 -75
- indexify/extractor_sdk/__init__.py +0 -14
- indexify/extractor_sdk/data.py +0 -100
- indexify/extractor_sdk/extractor.py +0 -225
- indexify/extractor_sdk/utils.py +0 -102
- indexify/extractors/__init__.py +0 -0
- indexify/extractors/embedding.py +0 -55
- indexify/extractors/pdf_parser.py +0 -93
- indexify/graph.py +0 -133
- indexify/local_runner.py +0 -128
- indexify/runner.py +0 -22
- indexify/utils.py +0 -7
- indexify-0.0.43.dist-info/METADATA +0 -66
- indexify-0.0.43.dist-info/RECORD +0 -25
- {indexify-0.0.43.dist-info → indexify-0.2.1.dist-info}/LICENSE.txt +0 -0
- {indexify-0.0.43.dist-info → indexify-0.2.1.dist-info}/WHEEL +0 -0
indexify/error.py
CHANGED
@@ -1,30 +1,3 @@
|
|
1
|
-
class
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
def __init__(self, status: str, message: str):
|
6
|
-
self.status = status
|
7
|
-
self.message = message
|
8
|
-
|
9
|
-
@staticmethod
|
10
|
-
def from_tonic_error_string(url: str, error: str) -> "Error":
|
11
|
-
data = error.split(", ")
|
12
|
-
|
13
|
-
message = data[1].split(": ", 1)[1]
|
14
|
-
if message.startswith('"') and message.endswith('"'):
|
15
|
-
message = message[1:-1]
|
16
|
-
|
17
|
-
status = "GeneralError"
|
18
|
-
if "extraction_graph" in url:
|
19
|
-
status = "ExtractionGraphError"
|
20
|
-
elif "search" in url:
|
21
|
-
status = "SearchError"
|
22
|
-
|
23
|
-
error = Error(status, message)
|
24
|
-
return error
|
25
|
-
|
26
|
-
def __str__(self):
|
27
|
-
return f"{self.status} | {self.message.capitalize()}"
|
28
|
-
|
29
|
-
def __repr__(self):
|
30
|
-
return f"Error(status={self.status!r}, message={self.message!r})"
|
1
|
+
class ApiException(Exception):
|
2
|
+
def __init__(self, message: str) -> None:
|
3
|
+
super().__init__(message)
|
@@ -0,0 +1,364 @@
|
|
1
|
+
import asyncio
|
2
|
+
import json
|
3
|
+
import ssl
|
4
|
+
from concurrent.futures.process import BrokenProcessPool
|
5
|
+
from typing import Dict, List, Optional
|
6
|
+
|
7
|
+
import httpx
|
8
|
+
import yaml
|
9
|
+
from httpx_sse import aconnect_sse
|
10
|
+
from pydantic import BaseModel
|
11
|
+
from rich.console import Console
|
12
|
+
from rich.panel import Panel
|
13
|
+
from rich.text import Text
|
14
|
+
from rich.theme import Theme
|
15
|
+
|
16
|
+
from indexify.functions_sdk.data_objects import (
|
17
|
+
FunctionWorkerOutput,
|
18
|
+
IndexifyData,
|
19
|
+
RouterOutput,
|
20
|
+
)
|
21
|
+
|
22
|
+
from .api_objects import ExecutorMetadata, Task
|
23
|
+
from .downloader import DownloadedInputs, Downloader
|
24
|
+
from .executor_tasks import DownloadGraphTask, DownloadInputTask, ExtractTask
|
25
|
+
from .function_worker import FunctionWorker
|
26
|
+
from .runtime_probes import ProbeInfo, RuntimeProbes
|
27
|
+
from .task_reporter import TaskReporter
|
28
|
+
from .task_store import CompletedTask, TaskStore
|
29
|
+
|
30
|
+
custom_theme = Theme(
|
31
|
+
{
|
32
|
+
"info": "cyan",
|
33
|
+
"warning": "yellow",
|
34
|
+
"error": "red",
|
35
|
+
"success": "green",
|
36
|
+
}
|
37
|
+
)
|
38
|
+
|
39
|
+
console = Console(theme=custom_theme)
|
40
|
+
|
41
|
+
|
42
|
+
class FunctionInput(BaseModel):
|
43
|
+
task_id: str
|
44
|
+
namespace: str
|
45
|
+
compute_graph: str
|
46
|
+
function: str
|
47
|
+
input: IndexifyData
|
48
|
+
init_value: Optional[IndexifyData] = None
|
49
|
+
|
50
|
+
|
51
|
+
class ExtractorAgent:
|
52
|
+
def __init__(
|
53
|
+
self,
|
54
|
+
executor_id: str,
|
55
|
+
num_workers,
|
56
|
+
code_path: str,
|
57
|
+
function_worker: FunctionWorker,
|
58
|
+
server_addr: str = "localhost:8900",
|
59
|
+
config_path: Optional[str] = None,
|
60
|
+
):
|
61
|
+
self.num_workers = num_workers
|
62
|
+
self._use_tls = False
|
63
|
+
if config_path:
|
64
|
+
with open(config_path, "r") as f:
|
65
|
+
config = yaml.safe_load(f)
|
66
|
+
self._config = config
|
67
|
+
if config.get("use_tls", False):
|
68
|
+
console.print(
|
69
|
+
"Running the extractor with TLS enabled", style="cyan bold"
|
70
|
+
)
|
71
|
+
self._use_tls = True
|
72
|
+
tls_config = config["tls_config"]
|
73
|
+
self._ssl_context = ssl.create_default_context(
|
74
|
+
ssl.Purpose.SERVER_AUTH, cafile=tls_config["ca_bundle_path"]
|
75
|
+
)
|
76
|
+
self._ssl_context.load_cert_chain(
|
77
|
+
certfile=tls_config["cert_path"], keyfile=tls_config["key_path"]
|
78
|
+
)
|
79
|
+
self._protocol = "wss"
|
80
|
+
self._tls_config = tls_config
|
81
|
+
else:
|
82
|
+
self._ssl_context = None
|
83
|
+
self._protocol = "ws"
|
84
|
+
else:
|
85
|
+
self._ssl_context = None
|
86
|
+
self._protocol = "http"
|
87
|
+
self._config = {}
|
88
|
+
|
89
|
+
self._task_store: TaskStore = TaskStore()
|
90
|
+
self._executor_id = executor_id
|
91
|
+
self._function_worker = function_worker
|
92
|
+
self._has_registered = False
|
93
|
+
self._server_addr = server_addr
|
94
|
+
self._base_url = f"{self._protocol}://{self._server_addr}"
|
95
|
+
self._code_path = code_path
|
96
|
+
self._downloader = Downloader(code_path=code_path, base_url=self._base_url)
|
97
|
+
self._max_queued_tasks = 10
|
98
|
+
self._task_reporter = TaskReporter(
|
99
|
+
base_url=self._base_url, executor_id=self._executor_id
|
100
|
+
)
|
101
|
+
self._probe = RuntimeProbes()
|
102
|
+
|
103
|
+
async def task_completion_reporter(self):
|
104
|
+
console.print(Text("Starting task completion reporter", style="bold cyan"))
|
105
|
+
# We should copy only the keys and not the values
|
106
|
+
url = f"{self._protocol}://{self._server_addr}/write_content"
|
107
|
+
while True:
|
108
|
+
outcomes = await self._task_store.task_outcomes()
|
109
|
+
for task_outcome in outcomes:
|
110
|
+
outcome = task_outcome.task_outcome
|
111
|
+
style_outcome = (
|
112
|
+
f"[bold red] {outcome} [/]"
|
113
|
+
if "fail" in outcome
|
114
|
+
else f"[bold green] {outcome} [/]"
|
115
|
+
)
|
116
|
+
console.print(
|
117
|
+
Panel(
|
118
|
+
f"Reporting outcome of task {task_outcome.task.id}\n"
|
119
|
+
f"Outcome: {style_outcome}\n"
|
120
|
+
f"Outputs: {len(task_outcome.outputs or [])} Router Output: {task_outcome.router_output}",
|
121
|
+
title="Task Completion",
|
122
|
+
border_style="info",
|
123
|
+
)
|
124
|
+
)
|
125
|
+
|
126
|
+
try:
|
127
|
+
# Send task outcome to the server
|
128
|
+
self._task_reporter.report_task_outcome(completed_task=task_outcome)
|
129
|
+
except Exception as e:
|
130
|
+
# The connection was dropped in the middle of the reporting, process, retry
|
131
|
+
console.print(
|
132
|
+
Panel(
|
133
|
+
f"Failed to report task {task_outcome.task.id}\n"
|
134
|
+
f"Exception: {e}\nRetrying...",
|
135
|
+
title="Reporting Error",
|
136
|
+
border_style="error",
|
137
|
+
)
|
138
|
+
)
|
139
|
+
await asyncio.sleep(5)
|
140
|
+
continue
|
141
|
+
|
142
|
+
self._task_store.mark_reported(task_id=task_outcome.task.id)
|
143
|
+
|
144
|
+
async def task_launcher(self):
|
145
|
+
async_tasks: List[asyncio.Task] = []
|
146
|
+
fn_queue: List[FunctionInput] = []
|
147
|
+
async_tasks.append(
|
148
|
+
asyncio.create_task(
|
149
|
+
self._task_store.get_runnable_tasks(), name="get_runnable_tasks"
|
150
|
+
)
|
151
|
+
)
|
152
|
+
while True:
|
153
|
+
fn: FunctionInput
|
154
|
+
for fn in fn_queue:
|
155
|
+
task: Task = self._task_store.get_task(fn.task_id)
|
156
|
+
async_tasks.append(
|
157
|
+
ExtractTask(
|
158
|
+
function_worker=self._function_worker,
|
159
|
+
task=task,
|
160
|
+
input=fn.input,
|
161
|
+
code_path=f"{self._code_path}/{task.namespace}/{task.compute_graph}.{task.graph_version}",
|
162
|
+
init_value=fn.init_value,
|
163
|
+
)
|
164
|
+
)
|
165
|
+
|
166
|
+
fn_queue = []
|
167
|
+
done, pending = await asyncio.wait(
|
168
|
+
async_tasks, return_when=asyncio.FIRST_COMPLETED
|
169
|
+
)
|
170
|
+
|
171
|
+
async_tasks: List[asyncio.Task] = list(pending)
|
172
|
+
for async_task in done:
|
173
|
+
if async_task.get_name() == "get_runnable_tasks":
|
174
|
+
if async_task.exception():
|
175
|
+
console.print(
|
176
|
+
Text("Task Launcher Error: ", style="red bold")
|
177
|
+
+ Text(
|
178
|
+
f"Failed to get runnable tasks: {async_task.exception()}",
|
179
|
+
style="red",
|
180
|
+
)
|
181
|
+
)
|
182
|
+
continue
|
183
|
+
result: Dict[str, Task] = await async_task
|
184
|
+
task: Task
|
185
|
+
for _, task in result.items():
|
186
|
+
async_tasks.append(
|
187
|
+
DownloadGraphTask(task=task, downloader=self._downloader)
|
188
|
+
)
|
189
|
+
async_tasks.append(
|
190
|
+
asyncio.create_task(
|
191
|
+
self._task_store.get_runnable_tasks(),
|
192
|
+
name="get_runnable_tasks",
|
193
|
+
)
|
194
|
+
)
|
195
|
+
elif async_task.get_name() == "download_graph":
|
196
|
+
if async_task.exception():
|
197
|
+
console.print(
|
198
|
+
Text(
|
199
|
+
f"Failed to download graph for task {async_task.task.id}\n",
|
200
|
+
style="red bold",
|
201
|
+
)
|
202
|
+
+ Text(f"Exception: {async_task.exception()}", style="red")
|
203
|
+
)
|
204
|
+
completed_task = CompletedTask(
|
205
|
+
task=async_task.task,
|
206
|
+
outputs=[],
|
207
|
+
task_outcome="failure",
|
208
|
+
)
|
209
|
+
self._task_store.complete(outcome=completed_task)
|
210
|
+
continue
|
211
|
+
async_tasks.append(
|
212
|
+
DownloadInputTask(
|
213
|
+
task=async_task.task, downloader=self._downloader
|
214
|
+
)
|
215
|
+
)
|
216
|
+
elif async_task.get_name() == "download_input":
|
217
|
+
if async_task.exception():
|
218
|
+
console.print(
|
219
|
+
Text(
|
220
|
+
f"Failed to download input for task {async_task.task.id}\n",
|
221
|
+
style="red bold",
|
222
|
+
)
|
223
|
+
+ Text(f"Exception: {async_task.exception()}", style="red")
|
224
|
+
)
|
225
|
+
completed_task = CompletedTask(
|
226
|
+
task=async_task.task,
|
227
|
+
outputs=[],
|
228
|
+
task_outcome="failure",
|
229
|
+
)
|
230
|
+
self._task_store.complete(outcome=completed_task)
|
231
|
+
continue
|
232
|
+
downloaded_inputs: DownloadedInputs = await async_task
|
233
|
+
task: Task = async_task.task
|
234
|
+
fn_queue.append(
|
235
|
+
FunctionInput(
|
236
|
+
task_id=task.id,
|
237
|
+
namespace=task.namespace,
|
238
|
+
compute_graph=task.compute_graph,
|
239
|
+
function=task.compute_fn,
|
240
|
+
input=downloaded_inputs.input,
|
241
|
+
init_value=downloaded_inputs.init_value,
|
242
|
+
)
|
243
|
+
)
|
244
|
+
elif async_task.get_name() == "run_function":
|
245
|
+
if async_task.exception():
|
246
|
+
completed_task = CompletedTask(
|
247
|
+
task=async_task.task,
|
248
|
+
task_outcome="failure",
|
249
|
+
outputs=[],
|
250
|
+
errors=str(async_task.exception()),
|
251
|
+
)
|
252
|
+
self._task_store.complete(outcome=completed_task)
|
253
|
+
continue
|
254
|
+
async_task: ExtractTask
|
255
|
+
try:
|
256
|
+
outputs: FunctionWorkerOutput = await async_task
|
257
|
+
if not outputs.success:
|
258
|
+
task_outcome = "failure"
|
259
|
+
else:
|
260
|
+
task_outcome = "success"
|
261
|
+
|
262
|
+
completed_task = CompletedTask(
|
263
|
+
task=async_task.task,
|
264
|
+
task_outcome=task_outcome,
|
265
|
+
outputs=outputs.fn_outputs,
|
266
|
+
router_output=outputs.router_output,
|
267
|
+
errors=outputs.exception,
|
268
|
+
stdout=outputs.stdout,
|
269
|
+
stderr=outputs.stderr,
|
270
|
+
reducer=outputs.reducer,
|
271
|
+
)
|
272
|
+
self._task_store.complete(outcome=completed_task)
|
273
|
+
except BrokenProcessPool:
|
274
|
+
self._task_store.retriable_failure(async_task.task.id)
|
275
|
+
continue
|
276
|
+
except Exception as e:
|
277
|
+
console.print(
|
278
|
+
Text(
|
279
|
+
f"Failed to execute task {async_task.task.id}\n",
|
280
|
+
style="red bold",
|
281
|
+
)
|
282
|
+
+ Text(f"Exception: {e}", style="red")
|
283
|
+
)
|
284
|
+
completed_task = CompletedTask(
|
285
|
+
task=async_task.task,
|
286
|
+
task_outcome="failure",
|
287
|
+
outputs=[],
|
288
|
+
)
|
289
|
+
self._task_store.complete(outcome=completed_task)
|
290
|
+
continue
|
291
|
+
|
292
|
+
async def run(self):
|
293
|
+
import signal
|
294
|
+
|
295
|
+
asyncio.get_event_loop().add_signal_handler(
|
296
|
+
signal.SIGINT, self.shutdown, asyncio.get_event_loop()
|
297
|
+
)
|
298
|
+
asyncio.create_task(self.task_launcher())
|
299
|
+
asyncio.create_task(self.task_completion_reporter())
|
300
|
+
self._should_run = True
|
301
|
+
while self._should_run:
|
302
|
+
self._protocol = "http"
|
303
|
+
url = f"{self._protocol}://{self._server_addr}/internal/executors/{self._executor_id}/tasks"
|
304
|
+
|
305
|
+
def to_sentence_case(snake_str):
|
306
|
+
words = snake_str.split("_")
|
307
|
+
return words[0].capitalize() + "" + " ".join(words[1:])
|
308
|
+
|
309
|
+
runtime_probe: ProbeInfo = self._probe.probe()
|
310
|
+
data = ExecutorMetadata(
|
311
|
+
id=self._executor_id,
|
312
|
+
addr="",
|
313
|
+
image_name=runtime_probe.image_name,
|
314
|
+
labels=runtime_probe.labels,
|
315
|
+
).model_dump()
|
316
|
+
|
317
|
+
panel_content = "\n".join(
|
318
|
+
[f"{to_sentence_case(key)}: {value}" for key, value in data.items()]
|
319
|
+
)
|
320
|
+
console.print(
|
321
|
+
Panel(
|
322
|
+
panel_content,
|
323
|
+
title="attempting to Register Executor",
|
324
|
+
border_style="cyan",
|
325
|
+
)
|
326
|
+
)
|
327
|
+
|
328
|
+
try:
|
329
|
+
async with httpx.AsyncClient() as client:
|
330
|
+
async with aconnect_sse(
|
331
|
+
client,
|
332
|
+
"POST",
|
333
|
+
url,
|
334
|
+
json=data,
|
335
|
+
headers={"Content-Type": "application/json"},
|
336
|
+
) as event_source:
|
337
|
+
console.print(
|
338
|
+
Text("executor registered successfully", style="bold green")
|
339
|
+
)
|
340
|
+
async for sse in event_source.aiter_sse():
|
341
|
+
data = json.loads(sse.data)
|
342
|
+
tasks = []
|
343
|
+
for task_dict in data:
|
344
|
+
tasks.append(
|
345
|
+
Task.model_validate(task_dict, strict=False)
|
346
|
+
)
|
347
|
+
self._task_store.add_tasks(tasks)
|
348
|
+
except Exception as e:
|
349
|
+
console.print(
|
350
|
+
Text("registration Error: ", style="red bold")
|
351
|
+
+ Text(f"failed to register: {e}", style="red")
|
352
|
+
)
|
353
|
+
await asyncio.sleep(5)
|
354
|
+
continue
|
355
|
+
|
356
|
+
async def _shutdown(self, loop):
|
357
|
+
console.print(Text("shutting down agent...", style="bold yellow"))
|
358
|
+
self._should_run = False
|
359
|
+
for task in asyncio.all_tasks(loop):
|
360
|
+
task.cancel()
|
361
|
+
|
362
|
+
def shutdown(self, loop):
|
363
|
+
self._function_worker.shutdown()
|
364
|
+
loop.create_task(self._shutdown(loop))
|
@@ -0,0 +1,43 @@
|
|
1
|
+
from typing import Any, Dict, List, Optional
|
2
|
+
|
3
|
+
from pydantic import BaseModel, Json
|
4
|
+
|
5
|
+
from indexify.functions_sdk.data_objects import IndexifyData
|
6
|
+
|
7
|
+
|
8
|
+
class Task(BaseModel):
|
9
|
+
id: str
|
10
|
+
namespace: str
|
11
|
+
compute_graph: str
|
12
|
+
compute_fn: str
|
13
|
+
invocation_id: str
|
14
|
+
input_key: str
|
15
|
+
reducer_output_id: Optional[str] = None
|
16
|
+
graph_version: int
|
17
|
+
|
18
|
+
|
19
|
+
class ExecutorMetadata(BaseModel):
|
20
|
+
id: str
|
21
|
+
addr: str
|
22
|
+
image_name: str
|
23
|
+
labels: Dict[str, Any]
|
24
|
+
|
25
|
+
|
26
|
+
class RouterOutput(BaseModel):
|
27
|
+
edges: List[str]
|
28
|
+
|
29
|
+
|
30
|
+
class FnOutput(BaseModel):
|
31
|
+
payload: Json
|
32
|
+
|
33
|
+
|
34
|
+
class TaskResult(BaseModel):
|
35
|
+
router_output: Optional[RouterOutput] = None
|
36
|
+
outcome: str
|
37
|
+
namespace: str
|
38
|
+
compute_graph: str
|
39
|
+
compute_fn: str
|
40
|
+
invocation_id: str
|
41
|
+
executor_id: str
|
42
|
+
task_id: str
|
43
|
+
reducer: bool = False
|
@@ -0,0 +1,124 @@
|
|
1
|
+
import os
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
import httpx
|
5
|
+
from pydantic import BaseModel
|
6
|
+
from rich.console import Console
|
7
|
+
from rich.panel import Panel
|
8
|
+
from rich.theme import Theme
|
9
|
+
|
10
|
+
from indexify.functions_sdk.data_objects import IndexifyData
|
11
|
+
from indexify.functions_sdk.object_serializer import MsgPackSerializer
|
12
|
+
|
13
|
+
from .api_objects import Task
|
14
|
+
|
15
|
+
custom_theme = Theme(
|
16
|
+
{
|
17
|
+
"info": "cyan",
|
18
|
+
"warning": "yellow",
|
19
|
+
"error": "red",
|
20
|
+
}
|
21
|
+
)
|
22
|
+
|
23
|
+
console = Console(theme=custom_theme)
|
24
|
+
|
25
|
+
|
26
|
+
class DownloadedInputs(BaseModel):
|
27
|
+
input: IndexifyData
|
28
|
+
init_value: Optional[IndexifyData] = None
|
29
|
+
|
30
|
+
|
31
|
+
class Downloader:
|
32
|
+
def __init__(self, code_path: str, base_url: str):
|
33
|
+
self.code_path = code_path
|
34
|
+
self.base_url = base_url
|
35
|
+
|
36
|
+
async def download_graph(self, namespace: str, name: str, version: int) -> str:
|
37
|
+
path = os.path.join(self.code_path, namespace, f"{name}.{version}")
|
38
|
+
if os.path.exists(path):
|
39
|
+
return path
|
40
|
+
|
41
|
+
console.print(
|
42
|
+
Panel(
|
43
|
+
f"Downloading graph: {name}\nPath: {path}",
|
44
|
+
title="downloader",
|
45
|
+
border_style="cyan",
|
46
|
+
)
|
47
|
+
)
|
48
|
+
|
49
|
+
response = httpx.get(
|
50
|
+
f"{self.base_url}/internal/namespaces/{namespace}/compute_graphs/{name}/code"
|
51
|
+
)
|
52
|
+
try:
|
53
|
+
response.raise_for_status()
|
54
|
+
except httpx.HTTPStatusError as e:
|
55
|
+
console.print(
|
56
|
+
Panel(
|
57
|
+
f"Failed to download graph: {name}\nError: {response.text}",
|
58
|
+
title="downloader error",
|
59
|
+
border_style="error",
|
60
|
+
)
|
61
|
+
)
|
62
|
+
raise
|
63
|
+
|
64
|
+
os.makedirs(os.path.dirname(path), exist_ok=True)
|
65
|
+
with open(path, "wb") as f:
|
66
|
+
f.write(response.content)
|
67
|
+
return path
|
68
|
+
|
69
|
+
async def download_input(self, task: Task) -> IndexifyData:
|
70
|
+
input_id = task.input_key.split("|")[-1]
|
71
|
+
if task.invocation_id == input_id:
|
72
|
+
url = f"{self.base_url}/namespaces/{task.namespace}/compute_graphs/{task.compute_graph}/invocations/{task.invocation_id}/payload"
|
73
|
+
else:
|
74
|
+
url = f"{self.base_url}/internal/fn_outputs/{task.input_key}"
|
75
|
+
|
76
|
+
reducer_url = None
|
77
|
+
if task.reducer_output_id:
|
78
|
+
reducer_url = f"{self.base_url}/namespaces/{task.namespace}/compute_graphs/{task.compute_graph}/invocations/{task.invocation_id}/fn/{task.compute_fn}/{task.reducer_output_id}"
|
79
|
+
|
80
|
+
console.print(
|
81
|
+
Panel(
|
82
|
+
f"downloading input\nURL: {url} \n reducer input URL: {reducer_url}",
|
83
|
+
title="downloader",
|
84
|
+
border_style="cyan",
|
85
|
+
)
|
86
|
+
)
|
87
|
+
|
88
|
+
response = httpx.get(url)
|
89
|
+
try:
|
90
|
+
response.raise_for_status()
|
91
|
+
except httpx.HTTPStatusError as e:
|
92
|
+
console.print(
|
93
|
+
Panel(
|
94
|
+
f"failed to download input: {task.input_key}\nError: {response.text}",
|
95
|
+
title="downloader error",
|
96
|
+
border_style="error",
|
97
|
+
)
|
98
|
+
)
|
99
|
+
raise
|
100
|
+
|
101
|
+
if task.invocation_id == input_id:
|
102
|
+
return DownloadedInputs(
|
103
|
+
input=IndexifyData(payload=response.content, id=input_id)
|
104
|
+
)
|
105
|
+
|
106
|
+
init_value = None
|
107
|
+
if reducer_url:
|
108
|
+
init_value = httpx.get(reducer_url)
|
109
|
+
try:
|
110
|
+
init_value.raise_for_status()
|
111
|
+
except httpx.HTTPStatusError as e:
|
112
|
+
console.print(
|
113
|
+
Panel(
|
114
|
+
f"failed to download reducer output: {task.reducer_output_id}\nError: {init_value.text}",
|
115
|
+
title="downloader error",
|
116
|
+
border_style="error",
|
117
|
+
)
|
118
|
+
)
|
119
|
+
raise
|
120
|
+
init_value = MsgPackSerializer.deserialize(init_value.content)
|
121
|
+
|
122
|
+
return DownloadedInputs(
|
123
|
+
input=MsgPackSerializer.deserialize(response.content), init_value=init_value
|
124
|
+
)
|
@@ -0,0 +1,72 @@
|
|
1
|
+
import asyncio
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
from indexify.functions_sdk.data_objects import IndexifyData
|
5
|
+
|
6
|
+
from .api_objects import Task
|
7
|
+
from .downloader import Downloader
|
8
|
+
from .function_worker import FunctionWorker
|
9
|
+
|
10
|
+
|
11
|
+
class DownloadGraphTask(asyncio.Task):
|
12
|
+
def __init__(
|
13
|
+
self,
|
14
|
+
*,
|
15
|
+
task: Task,
|
16
|
+
downloader: Downloader,
|
17
|
+
**kwargs,
|
18
|
+
):
|
19
|
+
kwargs["name"] = "download_graph"
|
20
|
+
kwargs["loop"] = asyncio.get_event_loop()
|
21
|
+
super().__init__(
|
22
|
+
downloader.download_graph(
|
23
|
+
task.namespace, task.compute_graph, task.graph_version
|
24
|
+
),
|
25
|
+
**kwargs,
|
26
|
+
)
|
27
|
+
self.task = task
|
28
|
+
|
29
|
+
|
30
|
+
class DownloadInputTask(asyncio.Task):
|
31
|
+
def __init__(
|
32
|
+
self,
|
33
|
+
*,
|
34
|
+
task: Task,
|
35
|
+
downloader: Downloader,
|
36
|
+
**kwargs,
|
37
|
+
):
|
38
|
+
kwargs["name"] = "download_input"
|
39
|
+
kwargs["loop"] = asyncio.get_event_loop()
|
40
|
+
super().__init__(
|
41
|
+
downloader.download_input(task),
|
42
|
+
**kwargs,
|
43
|
+
)
|
44
|
+
self.task = task
|
45
|
+
|
46
|
+
|
47
|
+
class ExtractTask(asyncio.Task):
|
48
|
+
def __init__(
|
49
|
+
self,
|
50
|
+
*,
|
51
|
+
function_worker: FunctionWorker,
|
52
|
+
task: Task,
|
53
|
+
input: IndexifyData,
|
54
|
+
init_value: Optional[IndexifyData] = None,
|
55
|
+
code_path: str,
|
56
|
+
**kwargs,
|
57
|
+
):
|
58
|
+
kwargs["name"] = "run_function"
|
59
|
+
kwargs["loop"] = asyncio.get_event_loop()
|
60
|
+
super().__init__(
|
61
|
+
function_worker.async_submit(
|
62
|
+
namespace=task.namespace,
|
63
|
+
graph_name=task.compute_graph,
|
64
|
+
fn_name=task.compute_fn,
|
65
|
+
input=input,
|
66
|
+
init_value=init_value,
|
67
|
+
code_path=code_path,
|
68
|
+
version=task.graph_version,
|
69
|
+
),
|
70
|
+
**kwargs,
|
71
|
+
)
|
72
|
+
self.task = task
|