indexify 0.0.43__py3-none-any.whl → 0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- indexify/__init__.py +13 -14
- indexify/base_client.py +48 -21
- indexify/cli.py +235 -0
- indexify/client.py +18 -790
- indexify/error.py +3 -30
- indexify/executor/agent.py +362 -0
- indexify/executor/api_objects.py +43 -0
- indexify/executor/downloader.py +124 -0
- indexify/executor/executor_tasks.py +72 -0
- indexify/executor/function_worker.py +177 -0
- indexify/executor/indexify_executor.py +32 -0
- indexify/executor/task_reporter.py +110 -0
- indexify/executor/task_store.py +113 -0
- indexify/foo +72 -0
- indexify/functions_sdk/data_objects.py +37 -0
- indexify/functions_sdk/graph.py +276 -0
- indexify/functions_sdk/graph_validation.py +69 -0
- indexify/functions_sdk/image.py +26 -0
- indexify/functions_sdk/indexify_functions.py +192 -0
- indexify/functions_sdk/local_cache.py +46 -0
- indexify/functions_sdk/object_serializer.py +61 -0
- indexify/local_client.py +183 -0
- indexify/remote_client.py +319 -0
- indexify-0.2.dist-info/METADATA +151 -0
- indexify-0.2.dist-info/RECORD +32 -0
- indexify-0.2.dist-info/entry_points.txt +3 -0
- indexify/exceptions.py +0 -3
- indexify/extraction_policy.py +0 -75
- indexify/extractor_sdk/__init__.py +0 -14
- indexify/extractor_sdk/data.py +0 -100
- indexify/extractor_sdk/extractor.py +0 -225
- indexify/extractor_sdk/utils.py +0 -102
- indexify/extractors/__init__.py +0 -0
- indexify/extractors/embedding.py +0 -55
- indexify/extractors/pdf_parser.py +0 -93
- indexify/graph.py +0 -133
- indexify/local_runner.py +0 -128
- indexify/runner.py +0 -22
- indexify/utils.py +0 -7
- indexify-0.0.43.dist-info/METADATA +0 -66
- indexify-0.0.43.dist-info/RECORD +0 -25
- {indexify-0.0.43.dist-info → indexify-0.2.dist-info}/LICENSE.txt +0 -0
- {indexify-0.0.43.dist-info → indexify-0.2.dist-info}/WHEEL +0 -0
@@ -0,0 +1,177 @@
|
|
1
|
+
import asyncio
|
2
|
+
import traceback
|
3
|
+
from concurrent.futures.process import BrokenProcessPool
|
4
|
+
from typing import Dict, List, Optional, Union
|
5
|
+
|
6
|
+
from pydantic import BaseModel
|
7
|
+
from rich import print
|
8
|
+
|
9
|
+
from indexify.functions_sdk.data_objects import (
|
10
|
+
FunctionWorkerOutput,
|
11
|
+
IndexifyData,
|
12
|
+
RouterOutput,
|
13
|
+
)
|
14
|
+
from indexify.functions_sdk.graph import Graph
|
15
|
+
from indexify.functions_sdk.indexify_functions import IndexifyFunctionWrapper
|
16
|
+
|
17
|
+
graphs: Dict[str, Graph] = {}
|
18
|
+
function_wrapper_map: Dict[str, IndexifyFunctionWrapper] = {}
|
19
|
+
|
20
|
+
import concurrent.futures
|
21
|
+
import io
|
22
|
+
from contextlib import redirect_stderr, redirect_stdout
|
23
|
+
|
24
|
+
|
25
|
+
class FunctionRunException(Exception):
|
26
|
+
def __init__(
|
27
|
+
self, exception: Exception, stdout: str, stderr: str, is_reducer: bool
|
28
|
+
):
|
29
|
+
super().__init__(str(exception))
|
30
|
+
self.exception = exception
|
31
|
+
self.stdout = stdout
|
32
|
+
self.stderr = stderr
|
33
|
+
self.is_reducer = is_reducer
|
34
|
+
|
35
|
+
|
36
|
+
class FunctionOutput(BaseModel):
|
37
|
+
fn_outputs: Optional[List[IndexifyData]]
|
38
|
+
router_output: Optional[RouterOutput]
|
39
|
+
reducer: bool = False
|
40
|
+
success: bool = True
|
41
|
+
exception: Optional[str] = None
|
42
|
+
stdout: str = ""
|
43
|
+
stderr: str = ""
|
44
|
+
|
45
|
+
|
46
|
+
def _load_function(
|
47
|
+
namespace: str, graph_name: str, fn_name: str, code_path: str, version: int
|
48
|
+
):
|
49
|
+
"""Load an extractor to the memory: extractor_wrapper_map."""
|
50
|
+
global function_wrapper_map
|
51
|
+
key = f"{namespace}/{graph_name}/{version}/{fn_name}"
|
52
|
+
if key in function_wrapper_map:
|
53
|
+
return
|
54
|
+
graph = Graph.from_path(code_path)
|
55
|
+
function_wrapper = graph.get_function(fn_name)
|
56
|
+
function_wrapper_map[key] = function_wrapper
|
57
|
+
graph_key = f"{namespace}/{graph_name}/{version}"
|
58
|
+
graphs[graph_key] = graph
|
59
|
+
|
60
|
+
|
61
|
+
class FunctionWorker:
|
62
|
+
def __init__(self, workers: int = 1) -> None:
|
63
|
+
self._executor: concurrent.futures.ProcessPoolExecutor = (
|
64
|
+
concurrent.futures.ProcessPoolExecutor(max_workers=workers)
|
65
|
+
)
|
66
|
+
|
67
|
+
async def async_submit(
|
68
|
+
self,
|
69
|
+
namespace: str,
|
70
|
+
graph_name: str,
|
71
|
+
fn_name: str,
|
72
|
+
input: IndexifyData,
|
73
|
+
code_path: str,
|
74
|
+
version: int,
|
75
|
+
init_value: Optional[IndexifyData] = None,
|
76
|
+
) -> FunctionWorkerOutput:
|
77
|
+
try:
|
78
|
+
result = await asyncio.get_running_loop().run_in_executor(
|
79
|
+
self._executor,
|
80
|
+
_run_function,
|
81
|
+
namespace,
|
82
|
+
graph_name,
|
83
|
+
fn_name,
|
84
|
+
input,
|
85
|
+
code_path,
|
86
|
+
version,
|
87
|
+
init_value,
|
88
|
+
)
|
89
|
+
except BrokenProcessPool as mp:
|
90
|
+
self._executor.shutdown(wait=True, cancel_futures=True)
|
91
|
+
traceback.print_exc()
|
92
|
+
raise mp
|
93
|
+
except FunctionRunException as e:
|
94
|
+
print(e)
|
95
|
+
print(traceback.format_exc())
|
96
|
+
return FunctionWorkerOutput(
|
97
|
+
exception=str(e),
|
98
|
+
stdout=e.stdout,
|
99
|
+
stderr=e.stderr,
|
100
|
+
reducer=e.is_reducer,
|
101
|
+
success=False,
|
102
|
+
)
|
103
|
+
|
104
|
+
return FunctionWorkerOutput(
|
105
|
+
fn_outputs=result.fn_outputs,
|
106
|
+
router_output=result.router_output,
|
107
|
+
exception=result.exception,
|
108
|
+
stdout=result.stdout,
|
109
|
+
stderr=result.stderr,
|
110
|
+
reducer=result.reducer,
|
111
|
+
success=result.success,
|
112
|
+
)
|
113
|
+
|
114
|
+
def shutdown(self):
|
115
|
+
self._executor.shutdown(wait=True, cancel_futures=True)
|
116
|
+
|
117
|
+
|
118
|
+
def _run_function(
|
119
|
+
namespace: str,
|
120
|
+
graph_name: str,
|
121
|
+
fn_name: str,
|
122
|
+
input: IndexifyData,
|
123
|
+
code_path: str,
|
124
|
+
version: int,
|
125
|
+
init_value: Optional[IndexifyData] = None,
|
126
|
+
) -> FunctionOutput:
|
127
|
+
import io
|
128
|
+
import traceback
|
129
|
+
from contextlib import redirect_stderr, redirect_stdout
|
130
|
+
|
131
|
+
stdout_capture = io.StringIO()
|
132
|
+
stderr_capture = io.StringIO()
|
133
|
+
is_reducer = False
|
134
|
+
router_output = None
|
135
|
+
fn_output = None
|
136
|
+
has_failed = False
|
137
|
+
exception_msg = None
|
138
|
+
with redirect_stdout(stdout_capture), redirect_stderr(stderr_capture):
|
139
|
+
try:
|
140
|
+
key = f"{namespace}/{graph_name}/{version}/{fn_name}"
|
141
|
+
if key not in function_wrapper_map:
|
142
|
+
_load_function(namespace, graph_name, fn_name, code_path, version)
|
143
|
+
|
144
|
+
graph: Graph = graphs[f"{namespace}/{graph_name}/{version}"]
|
145
|
+
if fn_name in graph.routers:
|
146
|
+
router_output = graph.invoke_router(fn_name, input)
|
147
|
+
else:
|
148
|
+
fn_output = graph.invoke_fn_ser(fn_name, input, init_value)
|
149
|
+
|
150
|
+
is_reducer = (
|
151
|
+
graph.get_function(fn_name).indexify_function.accumulate is not None
|
152
|
+
)
|
153
|
+
except Exception as e:
|
154
|
+
print(traceback.format_exc())
|
155
|
+
has_failed = True
|
156
|
+
exception_msg = str(e)
|
157
|
+
|
158
|
+
# WARNING - IF THIS FAILS, WE WILL NOT BE ABLE TO RECOVER
|
159
|
+
# ANY LOGS
|
160
|
+
if has_failed:
|
161
|
+
return FunctionOutput(
|
162
|
+
fn_outputs=None,
|
163
|
+
router_output=None,
|
164
|
+
exception=exception_msg,
|
165
|
+
stdout=stdout_capture.getvalue(),
|
166
|
+
stderr=stderr_capture.getvalue(),
|
167
|
+
reducer=is_reducer,
|
168
|
+
success=False,
|
169
|
+
)
|
170
|
+
return FunctionOutput(
|
171
|
+
fn_outputs=fn_output,
|
172
|
+
router_output=router_output,
|
173
|
+
reducer=is_reducer,
|
174
|
+
success=True,
|
175
|
+
stdout=stdout_capture.getvalue(),
|
176
|
+
stderr=stderr_capture.getvalue(),
|
177
|
+
)
|
@@ -0,0 +1,32 @@
|
|
1
|
+
import asyncio
|
2
|
+
from typing import List, Optional
|
3
|
+
|
4
|
+
import nanoid
|
5
|
+
|
6
|
+
from .agent import ExtractorAgent
|
7
|
+
from .function_worker import FunctionWorker
|
8
|
+
|
9
|
+
|
10
|
+
def join(
|
11
|
+
workers: int,
|
12
|
+
server_addr: str = "localhost:8900",
|
13
|
+
config_path: Optional[str] = None,
|
14
|
+
):
|
15
|
+
print(f"receiving tasks from server addr: {server_addr}")
|
16
|
+
id = nanoid.generate()
|
17
|
+
print(f"executor id: {id}")
|
18
|
+
|
19
|
+
function_worker = FunctionWorker(workers=workers)
|
20
|
+
|
21
|
+
agent = ExtractorAgent(
|
22
|
+
id,
|
23
|
+
num_workers=workers,
|
24
|
+
function_worker=function_worker,
|
25
|
+
server_addr=server_addr,
|
26
|
+
config_path=config_path,
|
27
|
+
)
|
28
|
+
|
29
|
+
try:
|
30
|
+
asyncio.get_event_loop().run_until_complete(agent.run())
|
31
|
+
except asyncio.CancelledError as ex:
|
32
|
+
print("exiting gracefully", ex)
|
@@ -0,0 +1,110 @@
|
|
1
|
+
import io
|
2
|
+
from typing import List, Optional
|
3
|
+
|
4
|
+
import httpx
|
5
|
+
import nanoid
|
6
|
+
from rich import print
|
7
|
+
|
8
|
+
from indexify.executor.api_objects import RouterOutput as ApiRouterOutput
|
9
|
+
from indexify.executor.api_objects import Task, TaskResult
|
10
|
+
from indexify.executor.task_store import CompletedTask
|
11
|
+
from indexify.functions_sdk.data_objects import IndexifyData, RouterOutput
|
12
|
+
from indexify.functions_sdk.object_serializer import MsgPackSerializer
|
13
|
+
|
14
|
+
|
15
|
+
# https://github.com/psf/requests/issues/1081#issuecomment-428504128
|
16
|
+
class ForceMultipartDict(dict):
|
17
|
+
def __bool__(self):
|
18
|
+
return True
|
19
|
+
|
20
|
+
|
21
|
+
FORCE_MULTIPART = ForceMultipartDict()
|
22
|
+
|
23
|
+
|
24
|
+
class TaskReporter:
|
25
|
+
def __init__(self, base_url: str, executor_id: str):
|
26
|
+
self._base_url = base_url
|
27
|
+
self._executor_id = executor_id
|
28
|
+
|
29
|
+
def report_task_outcome(self, completed_task: CompletedTask):
|
30
|
+
fn_outputs = []
|
31
|
+
print(
|
32
|
+
f"[bold]task-reporter[/bold] uploading output of size: {len(completed_task.outputs or [])}"
|
33
|
+
)
|
34
|
+
for output in completed_task.outputs or []:
|
35
|
+
output_bytes = MsgPackSerializer.serialize(output)
|
36
|
+
fn_outputs.append(
|
37
|
+
("node_outputs", (nanoid.generate(), io.BytesIO(output_bytes)))
|
38
|
+
)
|
39
|
+
|
40
|
+
if completed_task.errors:
|
41
|
+
print(
|
42
|
+
f"[bold]task-reporter[/bold] uploading error of size: {len(completed_task.errors)}"
|
43
|
+
)
|
44
|
+
fn_outputs.append(
|
45
|
+
(
|
46
|
+
"exception_msg",
|
47
|
+
(nanoid.generate(), io.BytesIO(completed_task.errors.encode())),
|
48
|
+
)
|
49
|
+
)
|
50
|
+
|
51
|
+
if completed_task.stdout:
|
52
|
+
print(
|
53
|
+
f"[bold]task-reporter[/bold] uploading stdout of size: {len(completed_task.stdout)}"
|
54
|
+
)
|
55
|
+
fn_outputs.append(
|
56
|
+
(
|
57
|
+
"stdout",
|
58
|
+
(nanoid.generate(), io.BytesIO(completed_task.stdout.encode())),
|
59
|
+
)
|
60
|
+
)
|
61
|
+
|
62
|
+
if completed_task.stderr:
|
63
|
+
print(
|
64
|
+
f"[bold]task-reporter[/bold] uploading stderr of size: {len(completed_task.stderr)}"
|
65
|
+
)
|
66
|
+
fn_outputs.append(
|
67
|
+
(
|
68
|
+
"stderr",
|
69
|
+
(nanoid.generate(), io.BytesIO(completed_task.stderr.encode())),
|
70
|
+
)
|
71
|
+
)
|
72
|
+
|
73
|
+
router_output = (
|
74
|
+
ApiRouterOutput(edges=completed_task.router_output.edges)
|
75
|
+
if completed_task.router_output
|
76
|
+
else None
|
77
|
+
)
|
78
|
+
|
79
|
+
task_result = TaskResult(
|
80
|
+
router_output=router_output,
|
81
|
+
outcome=completed_task.task_outcome,
|
82
|
+
namespace=completed_task.task.namespace,
|
83
|
+
compute_graph=completed_task.task.compute_graph,
|
84
|
+
compute_fn=completed_task.task.compute_fn,
|
85
|
+
invocation_id=completed_task.task.invocation_id,
|
86
|
+
executor_id=self._executor_id,
|
87
|
+
task_id=completed_task.task.id,
|
88
|
+
reducer=completed_task.reducer,
|
89
|
+
)
|
90
|
+
task_result_data = task_result.model_dump_json(exclude_none=True)
|
91
|
+
|
92
|
+
kwargs = {"data": {"task_result": task_result_data}}
|
93
|
+
if fn_outputs and len(fn_outputs) > 0:
|
94
|
+
kwargs["files"] = fn_outputs
|
95
|
+
else:
|
96
|
+
kwargs["files"] = FORCE_MULTIPART
|
97
|
+
try:
|
98
|
+
response = httpx.post(
|
99
|
+
url=f"{self._base_url}/internal/ingest_files",
|
100
|
+
**kwargs,
|
101
|
+
)
|
102
|
+
except Exception as e:
|
103
|
+
print(f"failed to report task outcome {e}")
|
104
|
+
raise e
|
105
|
+
|
106
|
+
try:
|
107
|
+
response.raise_for_status()
|
108
|
+
except Exception as e:
|
109
|
+
print(f"failed to report task outcome {response.text}")
|
110
|
+
raise e
|
@@ -0,0 +1,113 @@
|
|
1
|
+
import asyncio
|
2
|
+
from typing import Dict, List, Literal, Optional
|
3
|
+
|
4
|
+
from pydantic import BaseModel
|
5
|
+
from rich import print
|
6
|
+
|
7
|
+
from indexify.functions_sdk.data_objects import IndexifyData, RouterOutput
|
8
|
+
|
9
|
+
from .api_objects import Task
|
10
|
+
|
11
|
+
|
12
|
+
class CompletedTask(BaseModel):
|
13
|
+
task: Task
|
14
|
+
task_outcome: Literal["success", "failure"]
|
15
|
+
outputs: Optional[List[IndexifyData]] = None
|
16
|
+
router_output: Optional[RouterOutput] = None
|
17
|
+
errors: Optional[str] = None
|
18
|
+
stdout: Optional[str] = None
|
19
|
+
stderr: Optional[str] = None
|
20
|
+
reducer: bool = False
|
21
|
+
|
22
|
+
|
23
|
+
class TaskStore:
|
24
|
+
def __init__(self) -> None:
|
25
|
+
self._tasks: Dict[str, Task] = {}
|
26
|
+
self._running_tasks: Dict[str, Task] = {}
|
27
|
+
self._finished: Dict[str, CompletedTask] = {}
|
28
|
+
self._retries: Dict[str, int] = {}
|
29
|
+
self._new_task_event = asyncio.Event()
|
30
|
+
self._finished_task_event = asyncio.Event()
|
31
|
+
|
32
|
+
def get_task(self, id) -> Task:
|
33
|
+
return self._tasks[id]
|
34
|
+
|
35
|
+
def add_tasks(self, tasks: List[Task]):
|
36
|
+
task: Task
|
37
|
+
for task in tasks:
|
38
|
+
if (
|
39
|
+
(task.id in self._tasks)
|
40
|
+
or (task.id in self._running_tasks)
|
41
|
+
or (task.id in self._finished)
|
42
|
+
):
|
43
|
+
continue
|
44
|
+
print(
|
45
|
+
f"[bold] task store: [/bold] added task: {task.id} graph: {task.compute_graph} fn: {task.compute_fn} to queue"
|
46
|
+
)
|
47
|
+
self._tasks[task.id] = task
|
48
|
+
self._new_task_event.set()
|
49
|
+
|
50
|
+
async def get_runnable_tasks(self) -> Dict[str, Task]:
|
51
|
+
while True:
|
52
|
+
runnable_tasks = set(self._tasks) - set(self._running_tasks)
|
53
|
+
runnable_tasks = set(runnable_tasks) - set(self._finished)
|
54
|
+
if len(runnable_tasks) == 0:
|
55
|
+
await self._new_task_event.wait()
|
56
|
+
self._new_task_event.clear()
|
57
|
+
else:
|
58
|
+
break
|
59
|
+
out = {}
|
60
|
+
for task_id in runnable_tasks:
|
61
|
+
out[task_id] = self._tasks[task_id]
|
62
|
+
self._running_tasks[task_id] = self._tasks[task_id]
|
63
|
+
return out
|
64
|
+
|
65
|
+
def complete(self, outcome: CompletedTask):
|
66
|
+
self._retries.pop(outcome.task.id, None)
|
67
|
+
self._finished[outcome.task.id] = outcome
|
68
|
+
if outcome.task.id in self._running_tasks:
|
69
|
+
self._running_tasks.pop(outcome.task.id)
|
70
|
+
self._finished_task_event.set()
|
71
|
+
|
72
|
+
def retriable_failure(self, task_id: str):
|
73
|
+
self._running_tasks.pop(task_id)
|
74
|
+
if task_id not in self._retries:
|
75
|
+
self._retries[task_id] = 0
|
76
|
+
self._retries[task_id] += 1
|
77
|
+
if self._retries[task_id] > 3:
|
78
|
+
self._retries.pop(task_id)
|
79
|
+
self.complete(
|
80
|
+
outcome=CompletedTask(
|
81
|
+
task_id=task_id, task_outcome="failed", outputs=[]
|
82
|
+
)
|
83
|
+
)
|
84
|
+
else:
|
85
|
+
self._new_task_event.set()
|
86
|
+
|
87
|
+
def mark_reported(self, task_id: str):
|
88
|
+
self._tasks.pop(task_id)
|
89
|
+
self._finished.pop(task_id)
|
90
|
+
print(f"[bold] task store: [/bold] removed task: {task_id} from queue")
|
91
|
+
|
92
|
+
def report_failed(self, task_id: str):
|
93
|
+
if self._finished[task_id].task_outcome != "Failed":
|
94
|
+
# An error occurred while reporting the task, mark it as failed
|
95
|
+
# and try reporting again.
|
96
|
+
self._finished[task_id].task_outcome = "Failed"
|
97
|
+
else:
|
98
|
+
# If a task is already marked as failed, remove it from the queue.
|
99
|
+
# The only possible error at this point is task not present at
|
100
|
+
# the coordinator.
|
101
|
+
self._tasks.pop(task_id)
|
102
|
+
|
103
|
+
def num_pending_tasks(self) -> int:
|
104
|
+
return len(self._tasks) + len(self._running_tasks)
|
105
|
+
|
106
|
+
async def task_outcomes(self) -> List[CompletedTask]:
|
107
|
+
while True:
|
108
|
+
if len(self._finished) == 0:
|
109
|
+
await self._finished_task_event.wait()
|
110
|
+
self._finished_task_event.clear()
|
111
|
+
else:
|
112
|
+
break
|
113
|
+
return self._finished.copy().values()
|
indexify/foo
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
./functions_sdk/local_cache.py: outputs = []
|
2
|
+
./functions_sdk/local_cache.py: outputs.append(f.read())
|
3
|
+
./functions_sdk/local_cache.py: return outputs
|
4
|
+
./functions_sdk/graph.py: outputs: List[Any] = fn_wrapper.run_fn(input, acc=acc)
|
5
|
+
./functions_sdk/graph.py: IndexifyData(payload=CborSerializer.serialize(output)) for output in outputs
|
6
|
+
./functions_sdk/output_serializer.py:class OutputSerializer:
|
7
|
+
./functions_sdk/output_serializer.py: normalized_outputs = []
|
8
|
+
./functions_sdk/output_serializer.py: normalized_outputs.append(
|
9
|
+
./functions_sdk/output_serializer.py: return normalized_outputs
|
10
|
+
./base_client.py: def graph_outputs(
|
11
|
+
./remote_client.py:class GraphOutputs(BaseModel):
|
12
|
+
./remote_client.py: outputs: List[GraphOutputMetadata]
|
13
|
+
./remote_client.py: def graph_outputs(
|
14
|
+
./remote_client.py: f"namespaces/{self.namespace}/compute_graphs/{graph}/invocations/{invocation_id}/outputs",
|
15
|
+
./remote_client.py: graph_outputs = GraphOutputs(**response.json())
|
16
|
+
./remote_client.py: outputs = []
|
17
|
+
./remote_client.py: for output in graph_outputs.outputs:
|
18
|
+
./remote_client.py: outputs.append(output)
|
19
|
+
./remote_client.py: return outputs
|
20
|
+
./executor/task_reporter.py: fn_outputs = []
|
21
|
+
./executor/task_reporter.py: f"[bold]task-reporter[/bold] uploading output of size: {len(completed_task.outputs)}"
|
22
|
+
./executor/task_reporter.py: for output in completed_task.outputs:
|
23
|
+
./executor/task_reporter.py: fn_outputs.append(
|
24
|
+
./executor/task_reporter.py: ("node_outputs", (nanoid.generate(), io.BytesIO(output_bytes)))
|
25
|
+
./executor/task_reporter.py: fn_outputs.append(
|
26
|
+
./executor/task_reporter.py: fn_outputs.append(
|
27
|
+
./executor/task_reporter.py: fn_outputs.append(
|
28
|
+
./executor/task_reporter.py: if fn_outputs and len(fn_outputs) > 0:
|
29
|
+
./executor/task_reporter.py: kwargs["files"] = fn_outputs
|
30
|
+
./executor/task_store.py: outputs: List[IndexifyData]
|
31
|
+
./executor/task_store.py: task_id=task_id, task_outcome="failed", outputs=[]
|
32
|
+
./executor/downloader.py: url = f"{self.base_url}/internal/fn_outputs/{task.input_key}"
|
33
|
+
./executor/function_worker.py: outputs: Union[List[IndexifyData], RouterOutput]
|
34
|
+
./executor/function_worker.py: indexify_data=result.outputs,
|
35
|
+
./executor/function_worker.py: return FunctionOutput(outputs=output, reducer=is_reducer)
|
36
|
+
./executor/agent.py: f"Outputs: {len(task_outcome.outputs)}",
|
37
|
+
./executor/agent.py: outputs=[],
|
38
|
+
./executor/agent.py: outputs=[],
|
39
|
+
./executor/agent.py: outputs=[],
|
40
|
+
./executor/agent.py: outputs: FunctionWorkerOutput = await async_task
|
41
|
+
./executor/agent.py: outputs if isinstance(outputs, RouterOutput) else None
|
42
|
+
./executor/agent.py: if outputs.exception:
|
43
|
+
./executor/agent.py: fn_outputs = []
|
44
|
+
./executor/agent.py: fn_outputs = (
|
45
|
+
./executor/agent.py: outputs.indexify_data if not isinstance(outputs, RouterOutput) else []
|
46
|
+
./executor/agent.py: outputs=fn_outputs,
|
47
|
+
./executor/agent.py: errors=outputs.exception,
|
48
|
+
./executor/agent.py: stdout=outputs.stdout,
|
49
|
+
./executor/agent.py: stderr=outputs.stderr,
|
50
|
+
./executor/agent.py: reducer=outputs.reducer,
|
51
|
+
./executor/agent.py: outputs=[],
|
52
|
+
./local_client.py:# Holds the outputs of a
|
53
|
+
./local_client.py: outputs: Dict[str, List[IndexifyData]]
|
54
|
+
./local_client.py: outputs = defaultdict(list)
|
55
|
+
./local_client.py: self._results[input.id] = outputs
|
56
|
+
./local_client.py: self._run(g, input, outputs)
|
57
|
+
./local_client.py: outputs: Dict[str, List[bytes]],
|
58
|
+
./local_client.py: f"ran {node_name}: num outputs: {len(cached_output_bytes)} (cache hit)"
|
59
|
+
./local_client.py: function_outputs: List[IndexifyData] = []
|
60
|
+
./local_client.py: function_outputs.append(output)
|
61
|
+
./local_client.py: outputs[node_name].append(output)
|
62
|
+
./local_client.py: function_outputs: List[IndexifyData] = g.invoke_fn_ser(
|
63
|
+
./local_client.py: print(f"ran {node_name}: num outputs: {len(function_outputs)}")
|
64
|
+
./local_client.py: self._accumulators[node_name] = function_outputs[-1].model_copy()
|
65
|
+
./local_client.py: outputs[node_name] = []
|
66
|
+
./local_client.py: outputs[node_name].extend(function_outputs)
|
67
|
+
./local_client.py: function_outputs_bytes: List[bytes] = [
|
68
|
+
./local_client.py: for function_output in function_outputs
|
69
|
+
./local_client.py: function_outputs_bytes,
|
70
|
+
./local_client.py: for output in function_outputs:
|
71
|
+
./local_client.py: for output in function_outputs:
|
72
|
+
./local_client.py: def graph_outputs(
|
@@ -0,0 +1,37 @@
|
|
1
|
+
from typing import Any, Dict, List, Optional, Union
|
2
|
+
|
3
|
+
from pydantic import BaseModel, Json
|
4
|
+
|
5
|
+
|
6
|
+
class FileInput(BaseModel):
|
7
|
+
url: str
|
8
|
+
mime_type: Optional[str] = None
|
9
|
+
metadata: Optional[Dict[str, Json]] = None
|
10
|
+
sha_256: Optional[str] = None
|
11
|
+
|
12
|
+
|
13
|
+
class RouterOutput(BaseModel):
|
14
|
+
edges: List[str]
|
15
|
+
|
16
|
+
|
17
|
+
class IndexifyData(BaseModel):
|
18
|
+
id: Optional[str] = None
|
19
|
+
payload: bytes
|
20
|
+
payload_encoding: str = "msgpack"
|
21
|
+
|
22
|
+
|
23
|
+
class FunctionWorkerOutput(BaseModel):
|
24
|
+
fn_outputs: Optional[List[IndexifyData]]
|
25
|
+
router_output: Optional[RouterOutput]
|
26
|
+
exception: Optional[str]
|
27
|
+
stdout: Optional[str]
|
28
|
+
stderr: Optional[str]
|
29
|
+
reducer: bool = False
|
30
|
+
success: bool = True
|
31
|
+
|
32
|
+
|
33
|
+
class File(BaseModel):
|
34
|
+
data: bytes
|
35
|
+
mime_type: Optional[str] = None
|
36
|
+
metadata: Optional[Dict[str, Any]] = None
|
37
|
+
sha_256: Optional[str] = None
|