indexify 0.2.5__tar.gz → 0.2.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {indexify-0.2.5 → indexify-0.2.7}/PKG-INFO +1 -1
  2. {indexify-0.2.5 → indexify-0.2.7}/indexify/cli.py +8 -1
  3. {indexify-0.2.5 → indexify-0.2.7}/indexify/executor/function_worker.py +7 -2
  4. {indexify-0.2.5 → indexify-0.2.7}/indexify/functions_sdk/graph.py +20 -14
  5. {indexify-0.2.5 → indexify-0.2.7}/indexify/functions_sdk/graph_definition.py +5 -0
  6. {indexify-0.2.5 → indexify-0.2.7}/indexify/functions_sdk/image.py +1 -1
  7. {indexify-0.2.5 → indexify-0.2.7}/indexify/http_client.py +9 -6
  8. {indexify-0.2.5 → indexify-0.2.7}/indexify/remote_graph.py +7 -1
  9. {indexify-0.2.5 → indexify-0.2.7}/pyproject.toml +1 -1
  10. {indexify-0.2.5 → indexify-0.2.7}/LICENSE.txt +0 -0
  11. {indexify-0.2.5 → indexify-0.2.7}/README.md +0 -0
  12. {indexify-0.2.5 → indexify-0.2.7}/indexify/__init__.py +0 -0
  13. {indexify-0.2.5 → indexify-0.2.7}/indexify/data_loaders/__init__.py +0 -0
  14. {indexify-0.2.5 → indexify-0.2.7}/indexify/data_loaders/local_directory_loader.py +0 -0
  15. {indexify-0.2.5 → indexify-0.2.7}/indexify/data_loaders/url_loader.py +0 -0
  16. {indexify-0.2.5 → indexify-0.2.7}/indexify/error.py +0 -0
  17. {indexify-0.2.5 → indexify-0.2.7}/indexify/executor/agent.py +0 -0
  18. {indexify-0.2.5 → indexify-0.2.7}/indexify/executor/api_objects.py +0 -0
  19. {indexify-0.2.5 → indexify-0.2.7}/indexify/executor/downloader.py +0 -0
  20. {indexify-0.2.5 → indexify-0.2.7}/indexify/executor/executor_tasks.py +0 -0
  21. {indexify-0.2.5 → indexify-0.2.7}/indexify/executor/indexify_executor.py +0 -0
  22. {indexify-0.2.5 → indexify-0.2.7}/indexify/executor/runtime_probes.py +0 -0
  23. {indexify-0.2.5 → indexify-0.2.7}/indexify/executor/task_reporter.py +0 -0
  24. {indexify-0.2.5 → indexify-0.2.7}/indexify/executor/task_store.py +0 -0
  25. {indexify-0.2.5 → indexify-0.2.7}/indexify/functions_sdk/data_objects.py +0 -0
  26. {indexify-0.2.5 → indexify-0.2.7}/indexify/functions_sdk/graph_validation.py +0 -0
  27. {indexify-0.2.5 → indexify-0.2.7}/indexify/functions_sdk/indexify_functions.py +0 -0
  28. {indexify-0.2.5 → indexify-0.2.7}/indexify/functions_sdk/local_cache.py +0 -0
  29. {indexify-0.2.5 → indexify-0.2.7}/indexify/functions_sdk/object_serializer.py +0 -0
  30. {indexify-0.2.5 → indexify-0.2.7}/indexify/settings.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: indexify
3
- Version: 0.2.5
3
+ Version: 0.2.7
4
4
  Summary: Python Client for Indexify
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
@@ -122,6 +122,11 @@ def server_dev_mode():
122
122
  def build_image(workflow_file_path: str, func_names: List[str]):
123
123
  globals_dict = {}
124
124
 
125
+ # Add the folder in the workflow file path to the current Python path
126
+ folder_path = os.path.dirname(workflow_file_path)
127
+ if folder_path not in sys.path:
128
+ sys.path.append(folder_path)
129
+
125
130
  try:
126
131
  exec(open(workflow_file_path).read(), globals_dict)
127
132
  except FileNotFoundError as e:
@@ -240,8 +245,10 @@ WORKDIR /app
240
245
  console.print(f"{docker_file}", style="magenta")
241
246
 
242
247
  client = docker.from_env()
248
+ image_name = f"{image._image_name}:{image._tag}"
243
249
  client.images.build(
244
250
  fileobj=io.BytesIO(docker_file.encode()),
245
- tag=f"{image._image_name}:{image._tag}",
251
+ tag=image_name,
246
252
  rm=True,
247
253
  )
254
+ print(f"built image: {image_name}")
@@ -21,6 +21,8 @@ import concurrent.futures
21
21
  import io
22
22
  from contextlib import redirect_stderr, redirect_stdout
23
23
 
24
+ from .runtime_probes import RuntimeProbes
25
+
24
26
 
25
27
  class FunctionRunException(Exception):
26
28
  def __init__(
@@ -51,7 +53,8 @@ def _load_function(
51
53
  key = f"{namespace}/{graph_name}/{version}/{fn_name}"
52
54
  if key in function_wrapper_map:
53
55
  return
54
- graph = Graph.from_path(code_path)
56
+ image_name = RuntimeProbes().probe().image_name
57
+ graph = Graph.from_path(code_path, image_name)
55
58
  function_wrapper = graph.get_function(fn_name)
56
59
  function_wrapper_map[key] = function_wrapper
57
60
  graph_key = f"{namespace}/{graph_name}/{version}"
@@ -135,7 +138,9 @@ def _run_function(
135
138
  fn_output = None
136
139
  has_failed = False
137
140
  exception_msg = None
138
- print(f"[bold] function_worker: [/bold] invoking function {fn_name} in graph {graph_name}")
141
+ print(
142
+ f"[bold] function_worker: [/bold] invoking function {fn_name} in graph {graph_name}"
143
+ )
139
144
  with redirect_stdout(stdout_capture), redirect_stderr(stderr_capture):
140
145
  try:
141
146
  key = f"{namespace}/{graph_name}/{version}/{fn_name}"
@@ -132,32 +132,38 @@ class Graph:
132
132
  self.add_node(node)
133
133
  self.routers[from_node.name].append(node.name)
134
134
  return self
135
-
136
-
137
- def _register_cloudpickle(self):
135
+
136
+ def serialize(self) -> Dict[str, bytes]:
138
137
  # Get all unique modules from nodes and edges
139
- modules = set()
138
+ modules_by_images = defaultdict(set)
140
139
  for node in self.nodes.values():
141
- modules.add(node.__module__)
140
+ modules_by_images[node.image._image_name].add(node.__module__)
141
+
142
+ pickled_nodes_by_image = {}
143
+ for image_name, modules in modules_by_images.items():
144
+ for module in modules:
145
+ print(f"registering module {module} with cloudpickle")
146
+ cloudpickle.register_pickle_by_value(sys.modules[module])
147
+ pickled_nodes_by_image[image_name] = cloudpickle.dumps(self)
148
+ for module in modules:
149
+ cloudpickle.unregister_pickle_by_value(sys.modules[module])
142
150
 
143
151
  # Register each module with cloudpickle
144
152
  for module_name in modules:
145
153
  module = sys.modules[module_name]
154
+ print(f"registering module {module_name} with cloudpickle")
146
155
  cloudpickle.register_pickle_by_value(module)
147
-
148
-
149
- def serialize(self):
150
- self._register_cloudpickle()
151
- return cloudpickle.dumps(self)
156
+ return pickled_nodes_by_image
152
157
 
153
158
  @staticmethod
154
- def deserialize(graph: bytes) -> "Graph":
155
- return cloudpickle.loads(graph)
159
+ def deserialize(serialized_code_by_images: Dict[str, bytes], image: str) -> "Graph":
160
+ return cloudpickle.loads(serialized_code_by_images[image])
156
161
 
157
162
  @staticmethod
158
- def from_path(path: str) -> "Graph":
163
+ def from_path(path: str, image: str) -> "Graph":
159
164
  with open(path, "rb") as f:
160
- return cloudpickle.load(f)
165
+ pickled_code_by_images: Dict[str, bytes] = cloudpickle.load(f)
166
+ return Graph.deserialize(pickled_code_by_images, image)
161
167
 
162
168
  def add_edge(
163
169
  self,
@@ -27,6 +27,11 @@ class NodeMetadata(BaseModel):
27
27
  dynamic_router: Optional[RouterMetadata] = None
28
28
  compute_fn: Optional[FunctionMetadata] = None
29
29
 
30
+ def image_name(self):
31
+ if self.dynamic_router:
32
+ return self.dynamic_router.image_name
33
+ return self.compute_fn.image_name
34
+
30
35
 
31
36
  # RuntimeInformation is a class that holds data about the environment in which the graph should run.
32
37
  class RuntimeInformation(BaseModel):
@@ -6,7 +6,7 @@ class Image:
6
6
 
7
7
  self._base_image = "python:3.10.15-slim-bookworm"
8
8
 
9
- self._run_strs = []
9
+ self._run_strs = ["pip install indexify"]
10
10
 
11
11
  def name(self, image_name):
12
12
  self._image_name = image_name
@@ -99,7 +99,7 @@ class IndexifyClient:
99
99
  service_url: str = DEFAULT_SERVICE_URL_HTTPS,
100
100
  *args,
101
101
  **kwargs,
102
- ) -> "RemoteClient":
102
+ ) -> "IndexifyClient":
103
103
  """
104
104
  Create a client with mutual TLS authentication. Also enables HTTP/2,
105
105
  which is required for mTLS.
@@ -127,7 +127,7 @@ class IndexifyClient:
127
127
 
128
128
  client_certs = (cert_path, key_path)
129
129
  verify_option = ca_bundle_path if ca_bundle_path else True
130
- client = RemoteClient(
130
+ client = IndexifyClient(
131
131
  *args,
132
132
  **kwargs,
133
133
  service_url=service_url,
@@ -160,7 +160,7 @@ class IndexifyClient:
160
160
 
161
161
  def register_compute_graph(self, graph: Graph):
162
162
  graph_metadata = graph.definition()
163
- serialized_code = graph.serialize()
163
+ serialized_code = cloudpickle.dumps(graph.serialize())
164
164
  response = self._post(
165
165
  f"namespaces/{self.namespace}/compute_graphs",
166
166
  files={"code": serialized_code},
@@ -177,11 +177,14 @@ class IndexifyClient:
177
177
  response = self._get(f"namespaces/{self.namespace}/compute_graphs/{name}")
178
178
  return ComputeGraphMetadata(**response.json())
179
179
 
180
- def load_graph(self, name: str) -> Graph:
180
+ def load_graph(self, name: str, fn_name: str) -> Graph:
181
181
  response = self._get(
182
182
  f"internal/namespaces/{self.namespace}/compute_graphs/{name}/code"
183
183
  )
184
- return Graph.deserialize(response.content)
184
+ cg_metadata: ComputeGraphMetadata = self.graph(name)
185
+ fn_metadata = cg_metadata.nodes[fn_name]
186
+ pickled_bytes_by_image = cloudpickle.loads(response.content)
187
+ return Graph.deserialize(pickled_bytes_by_image, fn_metadata.image_name())
185
188
 
186
189
  def namespaces(self) -> List[str]:
187
190
  response = self._get(f"namespaces")
@@ -287,7 +290,7 @@ class IndexifyClient:
287
290
  return: Union[Dict[str, List[Any]], List[Any]]: The extracted objects. If the extractor name is provided, the output is a list of extracted objects by the extractor. If the extractor name is not provided, the output is a dictionary with the extractor name as the key and the extracted objects as the value. If no objects are found, an empty list is returned.
288
291
  """
289
292
  if graph not in self._graphs:
290
- self._graphs[graph] = self.load_graph(graph)
293
+ self._graphs[graph] = self.load_graph(graph, fn_name)
291
294
  response = self._get(
292
295
  f"namespaces/{self.namespace}/compute_graphs/{graph}/invocations/{invocation_id}/outputs",
293
296
  )
@@ -34,13 +34,19 @@ class RemoteGraph:
34
34
  )
35
35
 
36
36
  @classmethod
37
- def deploy(cls, g: Graph, additional_modules=[], server_url: Optional[str] = "http://localhost:8900"):
37
+ def deploy(
38
+ cls,
39
+ g: Graph,
40
+ additional_modules=[],
41
+ server_url: Optional[str] = "http://localhost:8900",
42
+ ):
38
43
  """
39
44
  Create a new RemoteGraph from a local Graph object.
40
45
  :param g: The local Graph object.
41
46
  :param server_url: The URL of the server where the graph will be registered.
42
47
  """
43
48
  import cloudpickle
49
+
44
50
  for module in additional_modules:
45
51
  cloudpickle.register_pickle_by_value(module)
46
52
  client = IndexifyClient(service_url=server_url)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "indexify"
3
- version = "0.2.5"
3
+ version = "0.2.7"
4
4
  description = "Python Client for Indexify"
5
5
  authors = ["Tensorlake Inc. <support@tensorlake.ai>"]
6
6
  license = "Apache 2.0"
File without changes
File without changes
File without changes
File without changes
File without changes