indexify 0.2.32__py3-none-any.whl → 0.2.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,28 +1,21 @@
1
1
  import asyncio
2
2
  import json
3
- import traceback
4
3
  from concurrent.futures.process import BrokenProcessPool
5
4
  from importlib.metadata import version
6
5
  from pathlib import Path
7
6
  from typing import Dict, List, Optional
8
7
 
8
+ import structlog
9
9
  from httpx_sse import aconnect_sse
10
10
  from pydantic import BaseModel
11
- from rich.console import Console
12
- from rich.panel import Panel
13
- from rich.text import Text
14
- from rich.theme import Theme
15
11
 
16
12
  from indexify.common_util import get_httpx_client
17
13
  from indexify.functions_sdk.data_objects import (
18
14
  FunctionWorkerOutput,
19
15
  IndexifyData,
20
16
  )
21
- from indexify.functions_sdk.graph_definition import ComputeGraphMetadata
22
17
  from indexify.http_client import IndexifyClient
23
18
 
24
- from ..functions_sdk.image import ImageInformation
25
- from . import image_dependency_installer
26
19
  from .api_objects import ExecutorMetadata, Task
27
20
  from .downloader import DownloadedInputs, Downloader
28
21
  from .executor_tasks import DownloadGraphTask, DownloadInputTask, ExtractTask
@@ -31,16 +24,7 @@ from .runtime_probes import ProbeInfo, RuntimeProbes
31
24
  from .task_reporter import TaskReporter
32
25
  from .task_store import CompletedTask, TaskStore
33
26
 
34
- custom_theme = Theme(
35
- {
36
- "info": "cyan",
37
- "warning": "yellow",
38
- "error": "red",
39
- "success": "green",
40
- }
41
- )
42
-
43
- console = Console(theme=custom_theme)
27
+ logging = structlog.get_logger(module=__name__)
44
28
 
45
29
 
46
30
  class FunctionInput(BaseModel):
@@ -68,21 +52,9 @@ class ExtractorAgent:
68
52
  self._config_path = config_path
69
53
  self._probe = RuntimeProbes()
70
54
 
71
- runtime_probe: ProbeInfo = self._probe.probe()
72
- self._require_image_bootstrap = (
73
- True
74
- if (runtime_probe.is_default_executor and self.name_alias is not None)
75
- else False
76
- )
77
- self._executor_bootstrap_failed = False
78
-
79
- console.print(
80
- f"Require Bootstrap? {self._require_image_bootstrap}", style="cyan bold"
81
- )
82
-
83
55
  self.num_workers = num_workers
84
56
  if config_path:
85
- console.print("Running the extractor with TLS enabled", style="cyan bold")
57
+ logging.info("running the extractor with TLS enabled")
86
58
  self._protocol = "https"
87
59
  else:
88
60
  self._protocol = "http"
@@ -111,10 +83,8 @@ class ExtractorAgent:
111
83
  )
112
84
 
113
85
  async def task_completion_reporter(self):
114
- console.print(Text("Starting task completion reporter", style="bold cyan"))
86
+ logging.info("starting task completion reporter")
115
87
  # We should copy only the keys and not the values
116
- url = f"{self._protocol}://{self._server_addr}/write_content"
117
-
118
88
  while True:
119
89
  outcomes = await self._task_store.task_outcomes()
120
90
  for task_outcome in outcomes:
@@ -129,16 +99,14 @@ class ExtractorAgent:
129
99
  if "fail" in outcome
130
100
  else f"[bold green] {outcome} [/]"
131
101
  )
132
- console.print(
133
- Panel(
134
- f"Reporting outcome of task: {task_outcome.task.id}, function: {task_outcome.task.compute_fn}\n"
135
- f"Outcome: {style_outcome}\n"
136
- f"Num Fn Outputs: {len(task_outcome.outputs or [])}\n"
137
- f"Router Output: {task_outcome.router_output}\n"
138
- f"Retries: {task_outcome.reporting_retries}",
139
- title="Task Completion",
140
- border_style="info",
141
- )
102
+ logging.info(
103
+ "reporting_task_outcome",
104
+ task_id=task_outcome.task.id,
105
+ fn_name=task_outcome.task.compute_fn,
106
+ num_outputs=len(task_outcome.outputs or []),
107
+ router_output=task_outcome.router_output,
108
+ outcome=task_outcome.task_outcome,
109
+ retries=task_outcome.reporting_retries,
142
110
  )
143
111
 
144
112
  try:
@@ -146,15 +114,11 @@ class ExtractorAgent:
146
114
  self._task_reporter.report_task_outcome(completed_task=task_outcome)
147
115
  except Exception as e:
148
116
  # The connection was dropped in the middle of the reporting, process, retry
149
- console.print(
150
- Panel(
151
- f"Failed to report task {task_outcome.task.id}\n"
152
- f"Exception: {type(e).__name__}({e})\n"
153
- f"Retries: {task_outcome.reporting_retries}\n"
154
- "Retrying...",
155
- title="Reporting Error",
156
- border_style="error",
157
- )
117
+ logging.error(
118
+ "failed_to_report_task",
119
+ task_id=task_outcome.task.id,
120
+ exception=f"exception: {type(e).__name__}({e})",
121
+ retries=task_outcome.reporting_retries,
158
122
  )
159
123
  task_outcome.reporting_retries += 1
160
124
  await asyncio.sleep(5)
@@ -176,44 +140,6 @@ class ExtractorAgent:
176
140
  fn: FunctionInput
177
141
  for fn in fn_queue:
178
142
  task: Task = self._task_store.get_task(fn.task_id)
179
-
180
- if self._executor_bootstrap_failed:
181
- completed_task = CompletedTask(
182
- task=task,
183
- outputs=[],
184
- task_outcome="failure",
185
- )
186
- self._task_store.complete(outcome=completed_task)
187
-
188
- continue
189
-
190
- # Bootstrap this executor. Fail the task if we can't.
191
- if self._require_image_bootstrap:
192
- try:
193
- image_info = await _get_image_info_for_compute_graph(
194
- task, self._protocol, self._server_addr, self._config_path
195
- )
196
- image_dependency_installer.executor_image_builder(
197
- image_info, self.name_alias, self.image_version
198
- )
199
- self._require_image_bootstrap = False
200
- except Exception as e:
201
- console.print(
202
- Text("Failed to bootstrap the executor ", style="red bold")
203
- + Text(f"Exception: {traceback.format_exc()}", style="red")
204
- )
205
-
206
- self._executor_bootstrap_failed = True
207
-
208
- completed_task = CompletedTask(
209
- task=task,
210
- outputs=[],
211
- task_outcome="failure",
212
- )
213
- self._task_store.complete(outcome=completed_task)
214
-
215
- continue
216
-
217
143
  async_tasks.append(
218
144
  ExtractTask(
219
145
  function_worker=self._function_worker,
@@ -233,12 +159,9 @@ class ExtractorAgent:
233
159
  for async_task in done:
234
160
  if async_task.get_name() == "get_runnable_tasks":
235
161
  if async_task.exception():
236
- console.print(
237
- Text("Task Launcher Error: ", style="red bold")
238
- + Text(
239
- f"Failed to get runnable tasks: {async_task.exception()}",
240
- style="red",
241
- )
162
+ logging.error(
163
+ "task_launcher_error, failed to get runnable tasks",
164
+ exception=async_task.exception(),
242
165
  )
243
166
  continue
244
167
  result: Dict[str, Task] = await async_task
@@ -255,12 +178,9 @@ class ExtractorAgent:
255
178
  )
256
179
  elif async_task.get_name() == "download_graph":
257
180
  if async_task.exception():
258
- console.print(
259
- Text(
260
- f"Failed to download graph for task {async_task.task.id}\n",
261
- style="red bold",
262
- )
263
- + Text(f"Exception: {async_task.exception()}", style="red")
181
+ logging.error(
182
+ "task_launcher_error, failed to download graph",
183
+ exception=async_task.exception(),
264
184
  )
265
185
  completed_task = CompletedTask(
266
186
  task=async_task.task,
@@ -276,12 +196,9 @@ class ExtractorAgent:
276
196
  )
277
197
  elif async_task.get_name() == "download_input":
278
198
  if async_task.exception():
279
- console.print(
280
- Text(
281
- f"Failed to download input for task {async_task.task.id}\n",
282
- style="red bold",
283
- )
284
- + Text(f"Exception: {async_task.exception()}", style="red")
199
+ logging.error(
200
+ "task_launcher_error, failed to download input",
201
+ exception=str(async_task.exception()),
285
202
  )
286
203
  completed_task = CompletedTask(
287
204
  task=async_task.task,
@@ -334,12 +251,10 @@ class ExtractorAgent:
334
251
  self._task_store.retriable_failure(async_task.task.id)
335
252
  continue
336
253
  except Exception as e:
337
- console.print(
338
- Text(
339
- f"Failed to execute task {async_task.task.id}\n",
340
- style="red bold",
341
- )
342
- + Text(f"Exception: {e}", style="red")
254
+ logging.error(
255
+ "failed to execute task",
256
+ task_id=async_task.task.id,
257
+ exception=str(e),
343
258
  )
344
259
  completed_task = CompletedTask(
345
260
  task=async_task.task,
@@ -360,12 +275,6 @@ class ExtractorAgent:
360
275
  self._should_run = True
361
276
  while self._should_run:
362
277
  url = f"{self._protocol}://{self._server_addr}/internal/executors/{self._executor_id}/tasks"
363
- print(f"calling url: {url}")
364
-
365
- def to_sentence_case(snake_str):
366
- words = snake_str.split("_")
367
- return words[0].capitalize() + "" + " ".join(words[1:])
368
-
369
278
  runtime_probe: ProbeInfo = self._probe.probe()
370
279
 
371
280
  executor_version = version("indexify")
@@ -391,16 +300,7 @@ class ExtractorAgent:
391
300
  labels=runtime_probe.labels,
392
301
  ).model_dump()
393
302
 
394
- panel_content = "\n".join(
395
- [f"{to_sentence_case(key)}: {value}" for key, value in data.items()]
396
- )
397
- console.print(
398
- Panel(
399
- panel_content,
400
- title="attempting to Register Executor",
401
- border_style="cyan",
402
- )
403
- )
303
+ logging.info("registering_executor", executor_id=self._executor_id)
404
304
  try:
405
305
  async with get_httpx_client(self._config_path, True) as client:
406
306
  async with aconnect_sse(
@@ -412,11 +312,15 @@ class ExtractorAgent:
412
312
  ) as event_source:
413
313
  if not event_source.response.is_success:
414
314
  resp = await event_source.response.aread().decode("utf-8")
415
- console.print(f"failed to register: {str(resp)}")
315
+ logging.error(
316
+ f"failed to register",
317
+ resp=str(resp),
318
+ status_code=event_source.response.status_code,
319
+ )
416
320
  await asyncio.sleep(5)
417
321
  continue
418
- console.print(
419
- Text("executor registered successfully", style="bold green")
322
+ logging.info(
323
+ "executor_registered", executor_id=self._executor_id
420
324
  )
421
325
  async for sse in event_source.aiter_sse():
422
326
  data = json.loads(sse.data)
@@ -427,15 +331,12 @@ class ExtractorAgent:
427
331
  )
428
332
  self._task_store.add_tasks(tasks)
429
333
  except Exception as e:
430
- console.print(
431
- Text("registration Error: ", style="red bold")
432
- + Text(f"failed to register: {e}", style="red")
433
- )
334
+ logging.error(f"failed to register: {e}")
434
335
  await asyncio.sleep(5)
435
336
  continue
436
337
 
437
338
  async def _shutdown(self, loop):
438
- console.print(Text("shutting down agent...", style="bold yellow"))
339
+ logging.info("shutting_down")
439
340
  self._should_run = False
440
341
  for task in asyncio.all_tasks(loop):
441
342
  task.cancel()
@@ -443,27 +344,3 @@ class ExtractorAgent:
443
344
  def shutdown(self, loop):
444
345
  self._function_worker.shutdown()
445
346
  loop.create_task(self._shutdown(loop))
446
-
447
-
448
- async def _get_image_info_for_compute_graph(
449
- task: Task, protocol, server_addr, config_path: str
450
- ) -> ImageInformation:
451
- namespace = task.namespace
452
- graph_name: str = task.compute_graph
453
- compute_fn_name: str = task.compute_fn
454
-
455
- http_client = IndexifyClient(
456
- service_url=f"{protocol}://{server_addr}",
457
- namespace=namespace,
458
- config_path=config_path,
459
- )
460
- compute_graph: ComputeGraphMetadata = http_client.graph(graph_name)
461
-
462
- console.print(
463
- Text(
464
- f"Compute_fn name {compute_fn_name}, ComputeGraph {compute_graph} \n",
465
- style="red yellow",
466
- )
467
- )
468
-
469
- return compute_graph.nodes[compute_fn_name].compute_fn.image_information
@@ -2,10 +2,8 @@ import os
2
2
  from typing import Optional
3
3
 
4
4
  import httpx
5
+ import structlog
5
6
  from pydantic import BaseModel
6
- from rich.console import Console
7
- from rich.panel import Panel
8
- from rich.theme import Theme
9
7
 
10
8
  from indexify.functions_sdk.data_objects import IndexifyData
11
9
 
@@ -13,15 +11,7 @@ from ..common_util import get_httpx_client
13
11
  from ..functions_sdk.object_serializer import JsonSerializer, get_serializer
14
12
  from .api_objects import Task
15
13
 
16
- custom_theme = Theme(
17
- {
18
- "info": "cyan",
19
- "warning": "yellow",
20
- "error": "red",
21
- }
22
- )
23
-
24
- console = Console(theme=custom_theme)
14
+ logger = structlog.get_logger(module=__name__)
25
15
 
26
16
 
27
17
  class DownloadedInputs(BaseModel):
@@ -42,26 +32,21 @@ class Downloader:
42
32
  if os.path.exists(path):
43
33
  return path
44
34
 
45
- console.print(
46
- Panel(
47
- f"Downloading graph: {name}\nPath: {path}",
48
- title="downloader",
49
- border_style="cyan",
50
- )
35
+ logger.info(
36
+ "downloading graph", namespace=namespace, name=name, version=version
51
37
  )
52
-
53
38
  response = self._client.get(
54
39
  f"{self.base_url}/internal/namespaces/{namespace}/compute_graphs/{name}/code"
55
40
  )
56
41
  try:
57
42
  response.raise_for_status()
58
43
  except httpx.HTTPStatusError as e:
59
- console.print(
60
- Panel(
61
- f"Failed to download graph: {name}\nError: {response.text}",
62
- title="downloader error",
63
- border_style="error",
64
- )
44
+ logger.error(
45
+ "failed to download graph",
46
+ namespace=namespace,
47
+ name=name,
48
+ version=version,
49
+ error=response.text,
65
50
  )
66
51
  raise
67
52
 
@@ -81,25 +66,17 @@ class Downloader:
81
66
  if task.reducer_output_id:
82
67
  reducer_url = f"{self.base_url}/namespaces/{task.namespace}/compute_graphs/{task.compute_graph}/invocations/{task.invocation_id}/fn/{task.compute_fn}/output/{task.reducer_output_id}"
83
68
 
84
- console.print(
85
- Panel(
86
- f"downloading input\nURL: {url} \n reducer input URL: {reducer_url}",
87
- title="downloader",
88
- border_style="cyan",
89
- )
90
- )
91
-
69
+ logger.info("downloading input", url=url, reducer_url=reducer_url)
92
70
  response = self._client.get(url)
93
71
 
94
72
  try:
95
73
  response.raise_for_status()
96
74
  except httpx.HTTPStatusError as e:
97
- console.print(
98
- Panel(
99
- f"failed to download input: {task.input_key}\nError: {response.text}",
100
- title="downloader error",
101
- border_style="error",
102
- )
75
+ logger.error(
76
+ "failed to download input",
77
+ url=url,
78
+ reducer_url=reducer_url,
79
+ error=response.text,
103
80
  )
104
81
  raise
105
82
 
@@ -108,8 +85,6 @@ class Downloader:
108
85
  if response.headers["content-type"] == JsonSerializer.content_type
109
86
  else "cloudpickle"
110
87
  )
111
- serializer = get_serializer(encoder)
112
-
113
88
  if task.invocation_id == input_id:
114
89
  return DownloadedInputs(
115
90
  input=IndexifyData(
@@ -117,26 +92,24 @@ class Downloader:
117
92
  ),
118
93
  )
119
94
 
120
- deserialized_content = serializer.deserialize(response.content)
95
+ input_payload = response.content
121
96
 
122
97
  if reducer_url:
123
- init_value = self._client.get(reducer_url)
98
+ response = self._client.get(reducer_url)
124
99
  try:
125
- init_value.raise_for_status()
100
+ response.raise_for_status()
101
+ init_value = response.content
126
102
  except httpx.HTTPStatusError as e:
127
- console.print(
128
- Panel(
129
- f"failed to download reducer output: {task.reducer_output_id}\nError: {init_value.text}",
130
- title="downloader error",
131
- border_style="error",
132
- )
103
+ logger.error(
104
+ "failed to download reducer output",
105
+ url=reducer_url,
106
+ error=response.text,
133
107
  )
134
108
  raise
135
- init_value = serializer.deserialize(init_value.content)
136
109
  return DownloadedInputs(
137
110
  input=IndexifyData(
138
111
  input_id=task.invocation_id,
139
- payload=deserialized_content,
112
+ payload=input_payload,
140
113
  encoder=encoder,
141
114
  ),
142
115
  init_value=IndexifyData(
@@ -147,7 +120,7 @@ class Downloader:
147
120
  return DownloadedInputs(
148
121
  input=IndexifyData(
149
122
  input_id=task.invocation_id,
150
- payload=deserialized_content,
123
+ payload=input_payload,
151
124
  encoder=encoder,
152
125
  )
153
126
  )
@@ -1,10 +1,9 @@
1
- import io
2
1
  from typing import Optional
3
2
 
4
3
  import nanoid
4
+ import structlog
5
5
  from httpx import Timeout
6
6
  from pydantic import BaseModel
7
- from rich import print
8
7
 
9
8
  from indexify.common_util import get_httpx_client
10
9
  from indexify.executor.api_objects import RouterOutput as ApiRouterOutput
@@ -12,6 +11,8 @@ from indexify.executor.api_objects import TaskResult
12
11
  from indexify.executor.task_store import CompletedTask
13
12
  from indexify.functions_sdk.object_serializer import get_serializer
14
13
 
14
+ logger = structlog.get_logger(__name__)
15
+
15
16
 
16
17
  # https://github.com/psf/requests/issues/1081#issuecomment-428504128
17
18
  class ForceMultipartDict(dict):
@@ -46,15 +47,14 @@ class TaskReporter:
46
47
  fn_outputs = []
47
48
  for output in completed_task.outputs or []:
48
49
  serializer = get_serializer(output.encoder)
49
- serialized_output = serializer.serialize(output.payload)
50
50
  fn_outputs.append(
51
51
  (
52
52
  "node_outputs",
53
- (nanoid.generate(), serialized_output, serializer.content_type),
53
+ (nanoid.generate(), output.payload, serializer.content_type),
54
54
  )
55
55
  )
56
56
  report.output_count += 1
57
- report.output_total_bytes += len(serialized_output)
57
+ report.output_total_bytes += len(output.payload)
58
58
 
59
59
  if completed_task.stdout:
60
60
  fn_outputs.append(
@@ -109,14 +109,17 @@ class TaskReporter:
109
109
  + report.stderr_total_bytes
110
110
  )
111
111
 
112
- print(
113
- f"[bold]task-reporter[/bold] reporting task outcome "
114
- f"task_id={completed_task.task.id} retries={completed_task.reporting_retries} "
115
- f"total_bytes={total_bytes} total_files={report.output_count + report.stdout_count + report.stderr_count} "
116
- f"output_files={report.output_count} output_bytes={total_bytes} "
117
- f"stdout_bytes={report.stdout_total_bytes} stderr_bytes={report.stderr_total_bytes} "
112
+ logger.info(
113
+ "reporting task outcome",
114
+ task_id=completed_task.task.id,
115
+ retries=completed_task.reporting_retries,
116
+ total_bytes=total_bytes,
117
+ total_files=report.output_count + report.stdout_count + report.stderr_count,
118
+ output_files=report.output_count,
119
+ output_bytes=total_bytes,
120
+ stdout_bytes=report.stdout_total_bytes,
121
+ stderr_bytes=report.stderr_total_bytes,
118
122
  )
119
-
120
123
  #
121
124
  kwargs = {
122
125
  "data": {"task_result": task_result_data},
@@ -137,15 +140,23 @@ class TaskReporter:
137
140
  **kwargs,
138
141
  )
139
142
  except Exception as e:
140
- print(
141
- f"[bold]task-reporter[/bold] failed to report task outcome retries={completed_task.reporting_retries} {type(e).__name__}({e})"
143
+ logger.error(
144
+ "failed to report task outcome",
145
+ task_id=completed_task.task.id,
146
+ retries=completed_task.reporting_retries,
147
+ error=type(e).__name__,
148
+ message=str(e),
142
149
  )
143
150
  raise e
144
151
 
145
152
  try:
146
153
  response.raise_for_status()
147
154
  except Exception as e:
148
- print(
149
- f"[bold]task-reporter[/bold] failed to report task outcome retries={completed_task.reporting_retries} {response.text}"
155
+ logger.error(
156
+ "failed to report task outcome",
157
+ task_id=completed_task.task.id,
158
+ retries=completed_task.reporting_retries,
159
+ status_code=response.status_code,
160
+ response_text=response.text,
150
161
  )
151
162
  raise e
@@ -1,3 +1,4 @@
1
+ import json
1
2
  import sys
2
3
  from collections import defaultdict
3
4
  from queue import deque
@@ -101,9 +102,7 @@ class Graph:
101
102
  return self
102
103
 
103
104
  if issubclass(indexify_fn, IndexifyFunction) and indexify_fn.accumulate:
104
- self.accumulator_zero_values[indexify_fn.name] = (
105
- indexify_fn.accumulate().model_dump()
106
- )
105
+ self.accumulator_zero_values[indexify_fn.name] = indexify_fn.accumulate()
107
106
 
108
107
  self.nodes[indexify_fn.name] = indexify_fn
109
108
  return self
@@ -167,7 +166,8 @@ class Graph:
167
166
  reducer=is_reducer,
168
167
  image_name=start_node.image._image_name,
169
168
  image_information=start_node.image.to_image_information(),
170
- encoder=start_node.encoder,
169
+ input_encoder=start_node.input_encoder,
170
+ output_encoder=start_node.output_encoder,
171
171
  )
172
172
  metadata_edges = self.edges.copy()
173
173
  metadata_nodes = {}
@@ -179,7 +179,8 @@ class Graph:
179
179
  description=node.description or "",
180
180
  source_fn=node_name,
181
181
  target_fns=self.routers[node_name],
182
- encoder=node.encoder,
182
+ input_encoder=node.input_encoder,
183
+ output_encoder=node.output_encoder,
183
184
  image_name=node.image._image_name,
184
185
  image_information=node.image.to_image_information(),
185
186
  )
@@ -193,7 +194,8 @@ class Graph:
193
194
  reducer=node.accumulate is not None,
194
195
  image_name=node.image._image_name,
195
196
  image_information=node.image.to_image_information(),
196
- encoder=node.encoder,
197
+ input_encoder=node.input_encoder,
198
+ output_encoder=node.output_encoder,
197
199
  )
198
200
  )
199
201
 
@@ -212,19 +214,19 @@ class Graph:
212
214
  def run(self, block_until_done: bool = False, **kwargs) -> str:
213
215
  self.validate_graph()
214
216
  start_node = self.nodes[self._start_node]
215
- serializer = get_serializer(start_node.encoder)
217
+ serializer = get_serializer(start_node.input_encoder)
216
218
  input = IndexifyData(
217
219
  id=generate(),
218
220
  payload=serializer.serialize(kwargs),
219
- encoder=start_node.encoder,
221
+ encoder=start_node.input_encoder,
220
222
  )
221
223
  print(f"[bold] Invoking {self._start_node}[/bold]")
222
224
  outputs = defaultdict(list)
223
225
  for k, v in self.accumulator_zero_values.items():
224
226
  node = self.nodes[k]
225
- serializer = get_serializer(node.encoder)
227
+ serializer = get_serializer(node.input_encoder)
226
228
  self._accumulator_values[k] = IndexifyData(
227
- payload=serializer.serialize(v), encoder=node.encoder
229
+ payload=serializer.serialize(v), encoder=node.input_encoder
228
230
  )
229
231
  self._results[input.id] = outputs
230
232
  ctx = GraphInvocationContext(
@@ -287,7 +289,8 @@ class Graph:
287
289
  fn_outputs = function_outputs.ser_outputs
288
290
  print(f"ran {node_name}: num outputs: {len(fn_outputs)}")
289
291
  if self._accumulator_values.get(node_name, None) is not None:
290
- self._accumulator_values[node_name] = fn_outputs[-1].model_copy()
292
+ acc_output = fn_outputs[-1].copy()
293
+ self._accumulator_values[node_name] = acc_output
291
294
  outputs[node_name] = []
292
295
  if fn_outputs:
293
296
  outputs[node_name].extend(fn_outputs)
@@ -339,7 +342,7 @@ class Graph:
339
342
  raise ValueError(f"no results found for fn {fn_name} on graph {self.name}")
340
343
  fn = self.nodes[fn_name]
341
344
  fn_model = self.get_function(fn_name).get_output_model()
342
- serializer = get_serializer(fn.encoder)
345
+ serializer = get_serializer(fn.output_encoder)
343
346
  outputs = []
344
347
  for result in results[fn_name]:
345
348
  payload_dict = serializer.deserialize(result.payload)
@@ -14,7 +14,8 @@ class FunctionMetadata(BaseModel):
14
14
  reducer: bool = False
15
15
  image_name: str
16
16
  image_information: ImageInformation
17
- encoder: str = "cloudpickle"
17
+ input_encoder: str = "cloudpickle"
18
+ output_encoder: str = "cloudpickle"
18
19
 
19
20
 
20
21
  class RouterMetadata(BaseModel):
@@ -24,7 +25,8 @@ class RouterMetadata(BaseModel):
24
25
  target_fns: List[str]
25
26
  image_name: str
26
27
  image_information: ImageInformation
27
- encoder: str = "cloudpickle"
28
+ input_encoder: str = "cloudpickle"
29
+ output_encoder: str = "cloudpickle"
28
30
 
29
31
 
30
32
  class NodeMetadata(BaseModel):
@@ -49,12 +51,12 @@ class ComputeGraphMetadata(BaseModel):
49
51
  replaying: bool = False
50
52
 
51
53
  def get_input_payload_serializer(self):
52
- return get_serializer(self.start_node.compute_fn.encoder)
54
+ return get_serializer(self.start_node.compute_fn.input_encoder)
53
55
 
54
56
  def get_input_encoder(self) -> str:
55
57
  if self.start_node.compute_fn:
56
- return self.start_node.compute_fn.encoder
58
+ return self.start_node.compute_fn.input_encoder
57
59
  elif self.start_node.dynamic_router:
58
- return self.start_node.dynamic_router.encoder
60
+ return self.start_node.dynamic_router.input_encoder
59
61
 
60
62
  raise ValueError("start node is not set on the graph")
@@ -83,7 +83,8 @@ class IndexifyFunction:
83
83
  image: Optional[Image] = DEFAULT_IMAGE_3_10
84
84
  placement_constraints: List[PlacementConstraints] = []
85
85
  accumulate: Optional[Type[Any]] = None
86
- encoder: Optional[str] = "cloudpickle"
86
+ input_encoder: Optional[str] = "cloudpickle"
87
+ output_encoder: Optional[str] = "cloudpickle"
87
88
 
88
89
  def run(self, *args, **kwargs) -> Union[List[Any], Any]:
89
90
  pass
@@ -95,7 +96,7 @@ class IndexifyFunction:
95
96
 
96
97
  @classmethod
97
98
  def deserialize_output(cls, output: IndexifyData) -> Any:
98
- serializer = get_serializer(cls.encoder)
99
+ serializer = get_serializer(cls.output_encoder)
99
100
  return serializer.deserialize(output.payload)
100
101
 
101
102
 
@@ -104,7 +105,8 @@ class IndexifyRouter:
104
105
  description: str = ""
105
106
  image: Optional[Image] = DEFAULT_IMAGE_3_10
106
107
  placement_constraints: List[PlacementConstraints] = []
107
- encoder: Optional[str] = "cloudpickle"
108
+ input_encoder: Optional[str] = "cloudpickle"
109
+ output_encoder: Optional[str] = "cloudpickle"
108
110
 
109
111
  def run(self, *args, **kwargs) -> Optional[List[IndexifyFunction]]:
110
112
  pass
@@ -120,7 +122,8 @@ def indexify_router(
120
122
  description: Optional[str] = "",
121
123
  image: Optional[Image] = DEFAULT_IMAGE_3_10,
122
124
  placement_constraints: List[PlacementConstraints] = [],
123
- encoder: Optional[str] = "cloudpickle",
125
+ input_encoder: Optional[str] = "cloudpickle",
126
+ output_encoder: Optional[str] = "cloudpickle",
124
127
  ):
125
128
  def construct(fn):
126
129
  # Get function signature using inspect.signature
@@ -144,7 +147,8 @@ def indexify_router(
144
147
  ),
145
148
  "image": image,
146
149
  "placement_constraints": placement_constraints,
147
- "encoder": encoder,
150
+ "input_encoder": input_encoder,
151
+ "output_encoder": output_encoder,
148
152
  "run": run,
149
153
  }
150
154
 
@@ -158,7 +162,8 @@ def indexify_function(
158
162
  description: Optional[str] = "",
159
163
  image: Optional[Image] = DEFAULT_IMAGE_3_10,
160
164
  accumulate: Optional[Type[BaseModel]] = None,
161
- encoder: Optional[str] = "cloudpickle",
165
+ input_encoder: Optional[str] = "cloudpickle",
166
+ output_encoder: Optional[str] = "cloudpickle",
162
167
  placement_constraints: List[PlacementConstraints] = [],
163
168
  ):
164
169
  def construct(fn):
@@ -184,7 +189,8 @@ def indexify_function(
184
189
  "image": image,
185
190
  "placement_constraints": placement_constraints,
186
191
  "accumulate": accumulate,
187
- "encoder": encoder,
192
+ "input_encoder": input_encoder,
193
+ "output_encoder": output_encoder,
188
194
  "run": run,
189
195
  }
190
196
 
@@ -231,6 +237,18 @@ class IndexifyFunctionWrapper:
231
237
  )
232
238
  return return_type
233
239
 
240
+ def get_input_types(self) -> Dict[str, Any]:
241
+ if not isinstance(self.indexify_function, IndexifyFunction):
242
+ raise TypeError("Input must be an instance of IndexifyFunction")
243
+
244
+ extract_method = self.indexify_function.run
245
+ type_hints = get_type_hints(extract_method)
246
+ return {
247
+ k: v
248
+ for k, v in type_hints.items()
249
+ if k != "return" and not is_pydantic_model_from_annotation(v)
250
+ }
251
+
234
252
  def run_router(
235
253
  self, input: Union[Dict, Type[BaseModel]]
236
254
  ) -> Tuple[List[str], Optional[str]]:
@@ -280,20 +298,17 @@ class IndexifyFunctionWrapper:
280
298
  self, name: str, input: IndexifyData, acc: Optional[Any] = None
281
299
  ) -> FunctionCallResult:
282
300
  input = self.deserialize_input(name, input)
283
- serializer = get_serializer(self.indexify_function.encoder)
301
+ input_serializer = get_serializer(self.indexify_function.input_encoder)
302
+ output_serializer = get_serializer(self.indexify_function.output_encoder)
284
303
  if acc is not None:
285
- acc = self.indexify_function.accumulate.model_validate(
286
- serializer.deserialize(acc.payload)
287
- )
304
+ acc = input_serializer.deserialize(acc.payload)
288
305
  if acc is None and self.indexify_function.accumulate is not None:
289
- acc = self.indexify_function.accumulate.model_validate(
290
- self.indexify_function.accumulate()
291
- )
306
+ acc = self.indexify_function.accumulate()
292
307
  outputs, err = self.run_fn(input, acc=acc)
293
308
  ser_outputs = [
294
309
  IndexifyData(
295
- payload=serializer.serialize(output),
296
- encoder=self.indexify_function.encoder,
310
+ payload=output_serializer.serialize(output),
311
+ encoder=self.indexify_function.output_encoder,
297
312
  )
298
313
  for output in outputs
299
314
  ]
@@ -1,7 +1,7 @@
1
- from typing import Any, List
1
+ import json
2
+ from typing import Any, List, Type
2
3
 
3
4
  import cloudpickle
4
- import jsonpickle
5
5
 
6
6
 
7
7
  def get_serializer(serializer_type: str) -> Any:
@@ -22,19 +22,29 @@ class JsonSerializer:
22
22
 
23
23
  @staticmethod
24
24
  def serialize(data: Any) -> str:
25
- return jsonpickle.encode(data)
25
+ try:
26
+ return json.dumps(data)
27
+ except Exception as e:
28
+ raise ValueError(f"failed to serialize data with json: {e}")
26
29
 
27
30
  @staticmethod
28
31
  def deserialize(data: str) -> Any:
29
- return jsonpickle.decode(data)
32
+ try:
33
+ if isinstance(data, bytes):
34
+ data = data.decode("utf-8")
35
+ return json.loads(data)
36
+ except Exception as e:
37
+ raise ValueError(f"failed to deserialize data with json: {e}")
30
38
 
31
39
  @staticmethod
32
40
  def serialize_list(data: List[Any]) -> str:
33
- return jsonpickle.encode(data)
41
+ return json.dumps(data)
34
42
 
35
43
  @staticmethod
36
- def deserialize_list(data: str) -> List[Any]:
37
- return jsonpickle.decode(data)
44
+ def deserialize_list(data: str, t: Type) -> List[Any]:
45
+ if isinstance(data, bytes):
46
+ data = data.decode("utf-8")
47
+ return json.loads(data)
38
48
 
39
49
 
40
50
  class CloudPickleSerializer:
indexify/http_client.py CHANGED
@@ -274,10 +274,10 @@ class IndexifyClient:
274
274
  self,
275
275
  graph: str,
276
276
  block_until_done: bool = False,
277
- serializer: str = "cloudpickle",
277
+ input_encoding: str = "cloudpickle",
278
278
  **kwargs,
279
279
  ) -> str:
280
- serializer = get_serializer(serializer)
280
+ serializer = get_serializer(input_encoding)
281
281
  ser_input = serializer.serialize(kwargs)
282
282
  params = {"block_until_finish": block_until_done}
283
283
  kwargs = {
@@ -351,11 +351,11 @@ class IndexifyClient:
351
351
  )
352
352
  response.raise_for_status()
353
353
  content_type = response.headers.get("Content-Type")
354
- serializer = get_serializer(content_type)
355
- decoded_response = serializer.deserialize(response.content)
356
- return IndexifyData(
357
- id=output_id, payload=decoded_response, encoder=serializer.encoding_type
358
- )
354
+ if content_type == "application/octet-stream":
355
+ encoding = "cloudpickle"
356
+ else:
357
+ encoding = "json"
358
+ return IndexifyData(id=output_id, payload=response.content, encoder=encoding)
359
359
 
360
360
  def graph_outputs(
361
361
  self,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: indexify
3
- Version: 0.2.32
3
+ Version: 0.2.33
4
4
  Summary: Python Client for Indexify
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
@@ -18,11 +18,11 @@ Requires-Dist: cloudpickle (>=3.1.0,<4.0.0)
18
18
  Requires-Dist: docker (>=7.1.0,<8.0.0)
19
19
  Requires-Dist: httpx-sse (>=0.4.0,<0.5.0)
20
20
  Requires-Dist: httpx[http2] (>=0,<1)
21
- Requires-Dist: jsonpickle (>=4.0.0,<5.0.0)
22
21
  Requires-Dist: nanoid (>=2.0.0,<3.0.0)
23
22
  Requires-Dist: pydantic (==2.10.2)
24
23
  Requires-Dist: pyyaml (>=6,<7)
25
24
  Requires-Dist: rich (>=13.9.2,<14.0.0)
25
+ Requires-Dist: structlog (>=24.4.0,<25.0.0)
26
26
  Requires-Dist: typer (>=0.13.0,<0.14.0)
27
27
  Project-URL: Repository, https://github.com/tensorlakeai/indexify
28
28
  Description-Content-Type: text/markdown
@@ -5,31 +5,31 @@ indexify/data_loaders/__init__.py,sha256=Y5NEuseTcYAICRiweYw5wBQ2m2YplbsY21I7df-
5
5
  indexify/data_loaders/local_directory_loader.py,sha256=fCrgj5drnW71ZUdDDvcB1-VJjIs1w6Q8sEW0HSGSAiA,1247
6
6
  indexify/data_loaders/url_loader.py,sha256=32SERljcq1Xsi4RdLz2dgyk2TER5pQPTtXl3gUzwHbY,1533
7
7
  indexify/error.py,sha256=qAWr8R6AxPkjsxHSzXTc8zqYnNO_AjOqqYEPsQvF1Zs,238
8
- indexify/executor/agent.py,sha256=Zt-KU2yrOuFuHu8CiOg-QmAzkNU1idcv5UZ3YN2YSDU,18649
8
+ indexify/executor/agent.py,sha256=xWxJqBrpQsLo4RnstbD7HJSoLBwOf_3qlxD1o0jVhkg,14061
9
9
  indexify/executor/api_objects.py,sha256=mvmwGbK4paJNQGFvbtNHMPpiH_LpVhrlRnCcrqS6HOQ,859
10
- indexify/executor/downloader.py,sha256=wwkTdKRrDBHwKQOb_3uUb8pVl1tvg4QzOcYFnGQwNtA,4951
10
+ indexify/executor/downloader.py,sha256=dHLxoBnX8-Bh4yZtFDYptZNF6rlVtmTk_70JK8Ect5w,4184
11
11
  indexify/executor/executor_tasks.py,sha256=A0UIEZ5VaB6zSkFQG81UmTW0E57MTYhGlaXuAbRV8lQ,1884
12
12
  indexify/executor/function_worker.py,sha256=wRW2-X9dNI80KhwTD1vD-pcyetsVKVs6vVdg7L7JjcQ,6462
13
13
  indexify/executor/image_dependency_installer.py,sha256=ct8GmzgkaPi6NAblk68IJJWo5MecIUubELotmSrgoRQ,1759
14
14
  indexify/executor/indexify_executor.py,sha256=2Ut_VX-Su_lm4b4aEROyRJ3gXx-uFHA-V7EN0sWiARE,771
15
15
  indexify/executor/runtime_probes.py,sha256=mjw2_mGQ622wRT_39WPGGgPEZQTgtrf3-ICcUUZOeyg,2126
16
- indexify/executor/task_reporter.py,sha256=4unHxLUHedKwIoqO3e5YdDJpUe_pJng-vHReoqcPNNU,5141
16
+ indexify/executor/task_reporter.py,sha256=XlEhNf_ScNnzG67zbtVwL7_9Bo8MvPZiHLI5UHymUnM,5305
17
17
  indexify/executor/task_store.py,sha256=JlRlWwAm4YjFRkTNRx-6GsUcmOzcyvzb5Csa5XDpRTI,3982
18
18
  indexify/functions_sdk/data_objects.py,sha256=wXbUa9hjU6rsXmmk19vQ5Kixf3FsI59VBWPNmHasAX0,854
19
- indexify/functions_sdk/graph.py,sha256=ewm-XUlqIXA0RSl1JxU91yT2XXK3WRqrD9_YSvlK_44,12884
20
- indexify/functions_sdk/graph_definition.py,sha256=UeC0PMchgZgo2T0Goog0SW1lIRdFf6ug4iHOS9ItRvI,1622
19
+ indexify/functions_sdk/graph.py,sha256=pca6LKbPAfYT-BMGTVL5sbPuuOhvvr_Yve6u3H0NWVk,13126
20
+ indexify/functions_sdk/graph_definition.py,sha256=rJmGcy9u5A_Sme6Ol33NsCnSKQVjyUfeN9LnH3bU88Y,1732
21
21
  indexify/functions_sdk/graph_validation.py,sha256=mN2Fcp91GIwFZEQP6z_qGqt4LkLM70SnI7AWBi4CmKQ,2509
22
22
  indexify/functions_sdk/image.py,sha256=QK0H6KxLWriB_z4M0kunKzzHdHxYLWL670RPYgYuf_8,1762
23
- indexify/functions_sdk/indexify_functions.py,sha256=cPUN7Zlgkwl6YUOuhgKogPdoBakQuf-VspQvU3IDmfU,10422
23
+ indexify/functions_sdk/indexify_functions.py,sha256=J-etsuC_IGNCsUfbsJMrolyZVHZsurIMC5IaMQGiZnM,11187
24
24
  indexify/functions_sdk/local_cache.py,sha256=cNWF67zbhbTJe3g86hyLBy3Rqzs6dNvp2SjLazGZWvw,1348
25
- indexify/functions_sdk/object_serializer.py,sha256=pOgUOWbRNRix9uZT0aQn0LTCnJCeMNGO1nAE0jAybmg,1546
25
+ indexify/functions_sdk/object_serializer.py,sha256=R58ALsl2Lb87ii6km4D6hBBsqRs_CHNISxhUICE2d9o,1931
26
26
  indexify/functions_sdk/pipeline.py,sha256=KmxZE8eBFAQ4bbEcYURXXR26HSyoAT3O6iu9H38-OXE,974
27
- indexify/http_client.py,sha256=5d70A6j2vJBn3OfUVjrQvnPml07p7fxOXWBUAUfAkiU,15915
27
+ indexify/http_client.py,sha256=iLafZagCFnlTS6uHfOjInogjg0uXW_zXEspIN7ttB5I,15903
28
28
  indexify/remote_graph.py,sha256=aox9NibZIU8YDiP92syerFSjiZc2jVPkF6g2kfqOvCA,5003
29
29
  indexify/remote_pipeline.py,sha256=oqx57rSPszNS3DToXO_nf-CKqkCZWptm1u_p3orV_gQ,790
30
30
  indexify/settings.py,sha256=Ny59mzYI4gbXoK8hjx66a_men6ndbd1J1zCTcKOoyzg,50
31
- indexify-0.2.32.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
32
- indexify-0.2.32.dist-info/METADATA,sha256=3-sKMgJvXaZByfEAUE6h2fBtymF3pw7MXhXvv9gwhnU,6196
33
- indexify-0.2.32.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
34
- indexify-0.2.32.dist-info/entry_points.txt,sha256=Pih7WV-XMpAzI5dEvROcpLr-ybVhd9Y-AtuzBKUdcDs,49
35
- indexify-0.2.32.dist-info/RECORD,,
31
+ indexify-0.2.33.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
32
+ indexify-0.2.33.dist-info/METADATA,sha256=73gZFa3TE-3TZ9rZl5yxQ9E4k-QagvGwh00Pc_TxHuY,6197
33
+ indexify-0.2.33.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
34
+ indexify-0.2.33.dist-info/entry_points.txt,sha256=Pih7WV-XMpAzI5dEvROcpLr-ybVhd9Y-AtuzBKUdcDs,49
35
+ indexify-0.2.33.dist-info/RECORD,,