indexify 0.0.42__py3-none-any.whl → 0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. indexify/__init__.py +13 -14
  2. indexify/base_client.py +48 -21
  3. indexify/cli.py +235 -0
  4. indexify/client.py +18 -790
  5. indexify/error.py +3 -30
  6. indexify/executor/agent.py +362 -0
  7. indexify/executor/api_objects.py +43 -0
  8. indexify/executor/downloader.py +124 -0
  9. indexify/executor/executor_tasks.py +72 -0
  10. indexify/executor/function_worker.py +177 -0
  11. indexify/executor/indexify_executor.py +32 -0
  12. indexify/executor/task_reporter.py +110 -0
  13. indexify/executor/task_store.py +113 -0
  14. indexify/foo +72 -0
  15. indexify/functions_sdk/data_objects.py +37 -0
  16. indexify/functions_sdk/graph.py +276 -0
  17. indexify/functions_sdk/graph_validation.py +69 -0
  18. indexify/functions_sdk/image.py +26 -0
  19. indexify/functions_sdk/indexify_functions.py +192 -0
  20. indexify/functions_sdk/local_cache.py +46 -0
  21. indexify/functions_sdk/object_serializer.py +61 -0
  22. indexify/local_client.py +183 -0
  23. indexify/remote_client.py +319 -0
  24. indexify-0.2.dist-info/METADATA +151 -0
  25. indexify-0.2.dist-info/RECORD +32 -0
  26. indexify-0.2.dist-info/entry_points.txt +3 -0
  27. indexify/exceptions.py +0 -3
  28. indexify/extraction_policy.py +0 -75
  29. indexify/extractor_sdk/__init__.py +0 -14
  30. indexify/extractor_sdk/data.py +0 -100
  31. indexify/extractor_sdk/extractor.py +0 -223
  32. indexify/extractor_sdk/utils.py +0 -102
  33. indexify/extractors/__init__.py +0 -0
  34. indexify/extractors/embedding.py +0 -55
  35. indexify/extractors/pdf_parser.py +0 -93
  36. indexify/graph.py +0 -133
  37. indexify/local_runner.py +0 -128
  38. indexify/runner.py +0 -22
  39. indexify/utils.py +0 -7
  40. indexify-0.0.42.dist-info/METADATA +0 -66
  41. indexify-0.0.42.dist-info/RECORD +0 -25
  42. {indexify-0.0.42.dist-info → indexify-0.2.dist-info}/LICENSE.txt +0 -0
  43. {indexify-0.0.42.dist-info → indexify-0.2.dist-info}/WHEEL +0 -0
indexify/__init__.py CHANGED
@@ -1,22 +1,21 @@
1
- from . import data_loaders, extractor_sdk
2
- from .client import (
3
- Document,
4
- IndexifyClient,
5
- generate_hash_from_string,
6
- generate_unique_hex_id,
1
+ from . import data_loaders
2
+ from .client import create_client
3
+ from .functions_sdk.graph import Graph
4
+ from .functions_sdk.indexify_functions import (
5
+ indexify_function,
6
+ indexify_router,
7
7
  )
8
- from .extraction_policy import ExtractionGraph
9
- from .graph import Graph
8
+ from .local_client import LocalClient
9
+ from .remote_client import RemoteClient
10
10
  from .settings import DEFAULT_SERVICE_URL
11
11
 
12
12
  __all__ = [
13
13
  "data_loaders",
14
14
  "Graph",
15
- "Document",
16
- "extractor_sdk",
17
- "IndexifyClient",
18
- "ExtractionGraph",
15
+ "indexify_function",
16
+ "indexify_router",
19
17
  "DEFAULT_SERVICE_URL",
20
- "generate_hash_from_string",
21
- "generate_unique_hex_id",
18
+ "RemoteClient",
19
+ "LocalClient",
20
+ "create_client",
22
21
  ]
indexify/base_client.py CHANGED
@@ -1,41 +1,67 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from typing import Any, Dict, List, Optional, Union
3
3
 
4
- from .extractor_sdk import Feature, Graph
4
+ from pydantic import Json
5
5
 
6
+ from indexify.functions_sdk.graph import Graph
6
7
 
7
- class BaseClient(ABC):
8
+
9
+ class IndexifyClient(ABC):
8
10
 
9
11
  ### Operational APIs
10
12
  @abstractmethod
11
- def register_extraction_graph(self, graph: Graph):
13
+ def register_compute_graph(self, graph: Graph):
14
+ """
15
+ Register a compute graph.
16
+ graph: Graph: The graph to be registered
17
+ """
12
18
  pass
13
19
 
14
20
  @abstractmethod
15
- def graphs(self) -> str:
21
+ def graphs(self) -> List[str]:
22
+ """
23
+ Get the graphs.
24
+ return: List[str]: The graphs
25
+ """
16
26
  pass
17
27
 
18
28
  @abstractmethod
19
- def namespaces(self) -> str:
29
+ def namespaces(self) -> List[str]:
30
+ """
31
+ Get the namespaces.
32
+ return: List[str]: The namespaces
33
+ """
20
34
  pass
21
35
 
22
36
  @abstractmethod
23
37
  def create_namespace(self, namespace: str):
38
+ """
39
+ Create a namespace.
40
+ namespace: str: The name of the namespace to be created
41
+ """
24
42
  pass
25
43
 
26
44
  ### Ingestion APIs
27
45
  @abstractmethod
28
- def invoke_graph_with_object(self, graph: str, object: Any) -> str:
46
+ def invoke_graph_with_object(
47
+ self, graph: str, block_until_done: bool = False, **kwargs
48
+ ) -> str:
29
49
  """
30
50
  Invokes a graph with an input object.
31
51
  graph: str: The name of the graph to invoke
32
- object: Any: The input object to the graph. It should be JSON serializable
52
+ kwargs: Any: Named arguments to be passed to the graph. Example: url="https://www.google.com", web_page_text="Hello world!"
33
53
  return: str: The ID of the ingested object
34
54
  """
35
55
  pass
36
56
 
37
57
  @abstractmethod
38
- def invoke_graph_with_file(self, graph: str, path: str) -> str:
58
+ def invoke_graph_with_file(
59
+ self,
60
+ graph: str,
61
+ path: str,
62
+ metadata: Optional[Dict[str, Json]] = None,
63
+ block_until_done: bool = False,
64
+ ) -> str:
39
65
  """
40
66
  Invokes a graph with an input file. The file's mimetype is appropriately detected.
41
67
  graph: str: The name of the graph to invoke
@@ -44,10 +70,21 @@ class BaseClient(ABC):
44
70
  """
45
71
  pass
46
72
 
73
+ @abstractmethod
74
+ def rerun_graph(self, graph: str):
75
+ """
76
+ Rerun a graph.
77
+ graph: str: The name of the graph to rerun
78
+ """
79
+ pass
80
+
47
81
  ### Retrieval APIs
48
82
  @abstractmethod
49
- def extracted_objects(
50
- self, graph: str, ingested_object_id: str, extractor_name: Optional[str]
83
+ def graph_outputs(
84
+ self,
85
+ graph: str,
86
+ invocation_id: str,
87
+ fn_name: Optional[str],
51
88
  ) -> Union[Dict[str, List[Any]], List[Any]]:
52
89
  """
53
90
  Returns the extracted objects by a graph for an ingested object. If the extractor name is provided, only the objects extracted by that extractor are returned.
@@ -55,17 +92,7 @@ class BaseClient(ABC):
55
92
  graph: str: The name of the graph
56
93
  ingested_object_id: str: The ID of the ingested object
57
94
  extractor_name: Optional[str]: The name of the extractor whose output is to be returned if provided
95
+ block_until_done: bool = True: If True, the method will block until the extraction is done. If False, the method will return immediately.
58
96
  return: Union[Dict[str, List[Any]], List[Any]]: The extracted objects. If the extractor name is provided, the output is a list of extracted objects by the extractor. If the extractor name is not provided, the output is a dictionary with the extractor name as the key and the extracted objects as the value. If no objects are found, an empty list is returned.
59
97
  """
60
98
  pass
61
-
62
- @abstractmethod
63
- def features(
64
- self, object_id: str, graph: Optional[str]
65
- ) -> Union[Dict[str, List[Feature]], List[Feature]]:
66
- """
67
- Returns the features of an object.
68
- object_id: str: The ID of the object
69
- return: List[Feature]: The features associated with the object that were extracted. If a graph name is provided, only the features extracted by that graph are returned.
70
- """
71
- pass
indexify/cli.py ADDED
@@ -0,0 +1,235 @@
1
+ import asyncio
2
+ import io
3
+ import os
4
+ import shutil
5
+ import signal
6
+ import subprocess
7
+ import sys
8
+ import threading
9
+ import time
10
+ from typing import Annotated, List, Optional
11
+
12
+ import docker
13
+ import nanoid
14
+ import typer
15
+ from rich.console import Console
16
+ from rich.panel import Panel
17
+ from rich.text import Text
18
+ from rich.theme import Theme
19
+
20
+ from indexify.executor.agent import ExtractorAgent
21
+ from indexify.executor.function_worker import FunctionWorker
22
+ from indexify.functions_sdk.image import Image
23
+
24
+ custom_theme = Theme(
25
+ {
26
+ "info": "cyan",
27
+ "warning": "yellow",
28
+ "error": "red",
29
+ "highlight": "magenta",
30
+ }
31
+ )
32
+
33
+ console = Console(theme=custom_theme)
34
+
35
+ app = typer.Typer(pretty_exceptions_enable=False, no_args_is_help=True)
36
+
37
+
38
+ @app.command(
39
+ help="Run server and executor in dev mode (Not recommended for production.)"
40
+ )
41
+ def server_dev_mode():
42
+ indexify_server_path = os.path.expanduser("~/.indexify/indexify-server")
43
+ if not os.path.exists(indexify_server_path):
44
+ print("indexify-server not found. Downloading...")
45
+ try:
46
+ download_command = subprocess.check_output(
47
+ ["curl", "-s", "https://getindexify.ai"], universal_newlines=True
48
+ )
49
+ subprocess.run(download_command, shell=True, check=True)
50
+ except subprocess.CalledProcessError as e:
51
+ print(f"failed to download indexify-server: {e}")
52
+ exit(1)
53
+ try:
54
+ os.makedirs(os.path.dirname(indexify_server_path), exist_ok=True)
55
+ shutil.move("indexify-server", indexify_server_path)
56
+ except Exception as e:
57
+ print(f"failed to move indexify-server to {indexify_server_path}: {e}")
58
+ exit(1)
59
+ print("starting indexify server and executor in dev mode...")
60
+ print("press Ctrl+C to stop the server and executor.")
61
+ print(f"server binary path: {indexify_server_path}")
62
+ commands = [indexify_server_path, "indexify-cli executor"]
63
+
64
+ processes = []
65
+ stop_event = threading.Event()
66
+
67
+ def handle_output(process):
68
+ for line in iter(process.stdout.readline, ""):
69
+ sys.stdout.write(line)
70
+ sys.stdout.flush()
71
+
72
+ def terminate_processes():
73
+ print("Terminating processes...")
74
+ stop_event.set()
75
+ for process in processes:
76
+ if process.poll() is None:
77
+ try:
78
+ process.terminate()
79
+ process.wait(timeout=5)
80
+ except subprocess.TimeoutExpired:
81
+ print(f"Force killing process {process.pid}")
82
+ process.kill()
83
+
84
+ def signal_handler(sig, frame):
85
+ print("\nCtrl+C pressed. Shutting down...")
86
+ terminate_processes()
87
+ sys.exit(0)
88
+
89
+ signal.signal(signal.SIGINT, signal_handler)
90
+ signal.signal(signal.SIGTERM, signal_handler)
91
+
92
+ for cmd in commands:
93
+ process = subprocess.Popen(
94
+ cmd.split(),
95
+ stdout=subprocess.PIPE,
96
+ stderr=subprocess.STDOUT,
97
+ bufsize=1,
98
+ universal_newlines=True,
99
+ preexec_fn=os.setsid if os.name != "nt" else None,
100
+ )
101
+ processes.append(process)
102
+
103
+ thread = threading.Thread(target=handle_output, args=(process,))
104
+ thread.daemon = True
105
+ thread.start()
106
+
107
+ try:
108
+ while True:
109
+ time.sleep(1)
110
+ if all(process.poll() is not None for process in processes):
111
+ print("All processes have finished.")
112
+ break
113
+ except KeyboardInterrupt:
114
+ signal_handler(None, None)
115
+ finally:
116
+ terminate_processes()
117
+
118
+ print("Script execution completed.")
119
+
120
+
121
+ @app.command(help="Build image for function names")
122
+ def build_image(workflow_file_path: str, func_names: List[str]):
123
+ globals_dict = {}
124
+
125
+ try:
126
+ exec(open(workflow_file_path).read(), globals_dict)
127
+ except FileNotFoundError as e:
128
+ raise Exception(
129
+ f"Could not find workflow file to execute at: " f"`{workflow_file_path}`"
130
+ )
131
+
132
+ found_funcs = []
133
+ graph = None
134
+ for name, obj in globals_dict.items():
135
+ for func_name in func_names:
136
+ if name == func_name:
137
+ found_funcs.append(name)
138
+ _create_image_for_func(func_name=func_name, func_obj=obj)
139
+
140
+ console.print(
141
+ Text(f"Processed functions: ", style="cyan"),
142
+ Text(f"{found_funcs}", style="green"),
143
+ )
144
+
145
+
146
+ @app.command(help="Joins the extractors to the coordinator server")
147
+ def executor(
148
+ server_addr: str = "localhost:8900",
149
+ workers: Annotated[
150
+ int, typer.Option(help="number of worker processes for extraction")
151
+ ] = 1,
152
+ config_path: Optional[str] = typer.Option(
153
+ None, help="Path to the TLS configuration file"
154
+ ),
155
+ executor_cache: Optional[str] = typer.Option(
156
+ "~/.indexify/executor_cache", help="Path to the executor cache directory"
157
+ ),
158
+ ):
159
+ id = nanoid.generate()
160
+ console.print(
161
+ Panel(
162
+ f"Number of workers: {workers}\n"
163
+ f"Config path: {config_path}\n"
164
+ f"Server address: {server_addr}\n"
165
+ f"Executor ID: {id}\n"
166
+ f"Executor cache: {executor_cache}",
167
+ title="Agent Configuration",
168
+ border_style="info",
169
+ )
170
+ )
171
+
172
+ function_worker = FunctionWorker(workers=workers)
173
+ from pathlib import Path
174
+
175
+ executor_cache = Path(executor_cache).expanduser().absolute()
176
+ if os.path.exists(executor_cache):
177
+ shutil.rmtree(executor_cache)
178
+ Path(executor_cache).mkdir(parents=True, exist_ok=True)
179
+
180
+ agent = ExtractorAgent(
181
+ id,
182
+ num_workers=workers,
183
+ function_worker=function_worker,
184
+ server_addr=server_addr,
185
+ config_path=config_path,
186
+ code_path=executor_cache,
187
+ )
188
+
189
+ try:
190
+ asyncio.get_event_loop().run_until_complete(agent.run())
191
+ except asyncio.CancelledError as ex:
192
+ console.print(Text(f"Exiting gracefully: {ex}", style="bold yellow"))
193
+
194
+
195
+ def _create_image_for_func(func_name, func_obj):
196
+ console.print(
197
+ Text("Creating container for ", style="cyan"),
198
+ Text(f"`{func_name}`", style="cyan bold"),
199
+ )
200
+ _build_image(image=func_obj.image, func_name=func_name)
201
+
202
+
203
+ def _build_image(image: Image, func_name: str = None):
204
+ try:
205
+ client = docker.from_env()
206
+ client.ping()
207
+ except Exception as e:
208
+ console.print(
209
+ Text("Unable to connect with docker: ", style="red bold"),
210
+ Text(f"{e}", style="red"),
211
+ )
212
+ exit(-1)
213
+
214
+ docker_file_str_template = """
215
+ FROM {base_image}
216
+
217
+ WORKDIR /app
218
+
219
+ """
220
+
221
+ docker_file_str = docker_file_str_template.format(base_image=image._base_image)
222
+
223
+ run_strs = ["RUN " + i for i in image._run_strs]
224
+
225
+ docker_file_str += "\n".join(run_strs)
226
+
227
+ console.print("Creating image using Dockerfile contents:", style="cyan bold")
228
+ console.print(f"{docker_file_str}", style="magenta")
229
+
230
+ client = docker.from_env()
231
+ client.images.build(
232
+ fileobj=io.BytesIO(docker_file_str.encode()),
233
+ tag=f"{image._image_name}:{image._tag}",
234
+ rm=True,
235
+ )