indexify 0.0.43__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- indexify/__init__.py +15 -14
- indexify/base_client.py +48 -21
- indexify/cli.py +247 -0
- indexify/client.py +18 -790
- indexify/error.py +3 -30
- indexify/executor/agent.py +364 -0
- indexify/executor/api_objects.py +43 -0
- indexify/executor/downloader.py +124 -0
- indexify/executor/executor_tasks.py +72 -0
- indexify/executor/function_worker.py +177 -0
- indexify/executor/indexify_executor.py +32 -0
- indexify/executor/runtime_probes.py +48 -0
- indexify/executor/task_reporter.py +110 -0
- indexify/executor/task_store.py +113 -0
- indexify/foo +72 -0
- indexify/functions_sdk/data_objects.py +37 -0
- indexify/functions_sdk/graph.py +281 -0
- indexify/functions_sdk/graph_validation.py +66 -0
- indexify/functions_sdk/image.py +34 -0
- indexify/functions_sdk/indexify_functions.py +188 -0
- indexify/functions_sdk/local_cache.py +46 -0
- indexify/functions_sdk/object_serializer.py +60 -0
- indexify/local_client.py +183 -0
- indexify/remote_client.py +319 -0
- indexify-0.2.1.dist-info/METADATA +151 -0
- indexify-0.2.1.dist-info/RECORD +33 -0
- indexify-0.2.1.dist-info/entry_points.txt +3 -0
- indexify/exceptions.py +0 -3
- indexify/extraction_policy.py +0 -75
- indexify/extractor_sdk/__init__.py +0 -14
- indexify/extractor_sdk/data.py +0 -100
- indexify/extractor_sdk/extractor.py +0 -225
- indexify/extractor_sdk/utils.py +0 -102
- indexify/extractors/__init__.py +0 -0
- indexify/extractors/embedding.py +0 -55
- indexify/extractors/pdf_parser.py +0 -93
- indexify/graph.py +0 -133
- indexify/local_runner.py +0 -128
- indexify/runner.py +0 -22
- indexify/utils.py +0 -7
- indexify-0.0.43.dist-info/METADATA +0 -66
- indexify-0.0.43.dist-info/RECORD +0 -25
- {indexify-0.0.43.dist-info → indexify-0.2.1.dist-info}/LICENSE.txt +0 -0
- {indexify-0.0.43.dist-info → indexify-0.2.1.dist-info}/WHEEL +0 -0
indexify/__init__.py
CHANGED
@@ -1,22 +1,23 @@
|
|
1
|
-
from . import data_loaders
|
2
|
-
from .client import
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
1
|
+
from . import data_loaders
|
2
|
+
from .client import create_client
|
3
|
+
from .functions_sdk.graph import Graph
|
4
|
+
from .functions_sdk.image import Image
|
5
|
+
from .functions_sdk.indexify_functions import (
|
6
|
+
indexify_function,
|
7
|
+
indexify_router,
|
7
8
|
)
|
8
|
-
from .
|
9
|
-
from .
|
9
|
+
from .local_client import LocalClient
|
10
|
+
from .remote_client import RemoteClient
|
10
11
|
from .settings import DEFAULT_SERVICE_URL
|
11
12
|
|
12
13
|
__all__ = [
|
13
14
|
"data_loaders",
|
14
15
|
"Graph",
|
15
|
-
"
|
16
|
-
"
|
17
|
-
"
|
18
|
-
"ExtractionGraph",
|
16
|
+
"Image",
|
17
|
+
"indexify_function",
|
18
|
+
"indexify_router",
|
19
19
|
"DEFAULT_SERVICE_URL",
|
20
|
-
"
|
21
|
-
"
|
20
|
+
"RemoteClient",
|
21
|
+
"LocalClient",
|
22
|
+
"create_client",
|
22
23
|
]
|
indexify/base_client.py
CHANGED
@@ -1,41 +1,67 @@
|
|
1
1
|
from abc import ABC, abstractmethod
|
2
2
|
from typing import Any, Dict, List, Optional, Union
|
3
3
|
|
4
|
-
from
|
4
|
+
from pydantic import Json
|
5
5
|
|
6
|
+
from indexify.functions_sdk.graph import Graph
|
6
7
|
|
7
|
-
|
8
|
+
|
9
|
+
class IndexifyClient(ABC):
|
8
10
|
|
9
11
|
### Operational APIs
|
10
12
|
@abstractmethod
|
11
|
-
def
|
13
|
+
def register_compute_graph(self, graph: Graph):
|
14
|
+
"""
|
15
|
+
Register a compute graph.
|
16
|
+
graph: Graph: The graph to be registered
|
17
|
+
"""
|
12
18
|
pass
|
13
19
|
|
14
20
|
@abstractmethod
|
15
|
-
def graphs(self) -> str:
|
21
|
+
def graphs(self) -> List[str]:
|
22
|
+
"""
|
23
|
+
Get the graphs.
|
24
|
+
return: List[str]: The graphs
|
25
|
+
"""
|
16
26
|
pass
|
17
27
|
|
18
28
|
@abstractmethod
|
19
|
-
def namespaces(self) -> str:
|
29
|
+
def namespaces(self) -> List[str]:
|
30
|
+
"""
|
31
|
+
Get the namespaces.
|
32
|
+
return: List[str]: The namespaces
|
33
|
+
"""
|
20
34
|
pass
|
21
35
|
|
22
36
|
@abstractmethod
|
23
37
|
def create_namespace(self, namespace: str):
|
38
|
+
"""
|
39
|
+
Create a namespace.
|
40
|
+
namespace: str: The name of the namespace to be created
|
41
|
+
"""
|
24
42
|
pass
|
25
43
|
|
26
44
|
### Ingestion APIs
|
27
45
|
@abstractmethod
|
28
|
-
def invoke_graph_with_object(
|
46
|
+
def invoke_graph_with_object(
|
47
|
+
self, graph: str, block_until_done: bool = False, **kwargs
|
48
|
+
) -> str:
|
29
49
|
"""
|
30
50
|
Invokes a graph with an input object.
|
31
51
|
graph: str: The name of the graph to invoke
|
32
|
-
|
52
|
+
kwargs: Any: Named arguments to be passed to the graph. Example: url="https://www.google.com", web_page_text="Hello world!"
|
33
53
|
return: str: The ID of the ingested object
|
34
54
|
"""
|
35
55
|
pass
|
36
56
|
|
37
57
|
@abstractmethod
|
38
|
-
def invoke_graph_with_file(
|
58
|
+
def invoke_graph_with_file(
|
59
|
+
self,
|
60
|
+
graph: str,
|
61
|
+
path: str,
|
62
|
+
metadata: Optional[Dict[str, Json]] = None,
|
63
|
+
block_until_done: bool = False,
|
64
|
+
) -> str:
|
39
65
|
"""
|
40
66
|
Invokes a graph with an input file. The file's mimetype is appropriately detected.
|
41
67
|
graph: str: The name of the graph to invoke
|
@@ -44,10 +70,21 @@ class BaseClient(ABC):
|
|
44
70
|
"""
|
45
71
|
pass
|
46
72
|
|
73
|
+
@abstractmethod
|
74
|
+
def rerun_graph(self, graph: str):
|
75
|
+
"""
|
76
|
+
Rerun a graph.
|
77
|
+
graph: str: The name of the graph to rerun
|
78
|
+
"""
|
79
|
+
pass
|
80
|
+
|
47
81
|
### Retrieval APIs
|
48
82
|
@abstractmethod
|
49
|
-
def
|
50
|
-
self,
|
83
|
+
def graph_outputs(
|
84
|
+
self,
|
85
|
+
graph: str,
|
86
|
+
invocation_id: str,
|
87
|
+
fn_name: Optional[str],
|
51
88
|
) -> Union[Dict[str, List[Any]], List[Any]]:
|
52
89
|
"""
|
53
90
|
Returns the extracted objects by a graph for an ingested object. If the extractor name is provided, only the objects extracted by that extractor are returned.
|
@@ -55,17 +92,7 @@ class BaseClient(ABC):
|
|
55
92
|
graph: str: The name of the graph
|
56
93
|
ingested_object_id: str: The ID of the ingested object
|
57
94
|
extractor_name: Optional[str]: The name of the extractor whose output is to be returned if provided
|
95
|
+
block_until_done: bool = True: If True, the method will block until the extraction is done. If False, the method will return immediately.
|
58
96
|
return: Union[Dict[str, List[Any]], List[Any]]: The extracted objects. If the extractor name is provided, the output is a list of extracted objects by the extractor. If the extractor name is not provided, the output is a dictionary with the extractor name as the key and the extracted objects as the value. If no objects are found, an empty list is returned.
|
59
97
|
"""
|
60
98
|
pass
|
61
|
-
|
62
|
-
@abstractmethod
|
63
|
-
def features(
|
64
|
-
self, object_id: str, graph: Optional[str]
|
65
|
-
) -> Union[Dict[str, List[Feature]], List[Feature]]:
|
66
|
-
"""
|
67
|
-
Returns the features of an object.
|
68
|
-
object_id: str: The ID of the object
|
69
|
-
return: List[Feature]: The features associated with the object that were extracted. If a graph name is provided, only the features extracted by that graph are returned.
|
70
|
-
"""
|
71
|
-
pass
|
indexify/cli.py
ADDED
@@ -0,0 +1,247 @@
|
|
1
|
+
import asyncio
|
2
|
+
import io
|
3
|
+
import os
|
4
|
+
import shutil
|
5
|
+
import signal
|
6
|
+
import subprocess
|
7
|
+
import sys
|
8
|
+
import threading
|
9
|
+
import time
|
10
|
+
from typing import Annotated, List, Optional
|
11
|
+
|
12
|
+
import docker
|
13
|
+
import nanoid
|
14
|
+
import typer
|
15
|
+
from rich.console import Console
|
16
|
+
from rich.panel import Panel
|
17
|
+
from rich.text import Text
|
18
|
+
from rich.theme import Theme
|
19
|
+
|
20
|
+
from indexify.executor.agent import ExtractorAgent
|
21
|
+
from indexify.executor.function_worker import FunctionWorker
|
22
|
+
from indexify.functions_sdk.image import Image, DEFAULT_IMAGE
|
23
|
+
|
24
|
+
custom_theme = Theme(
|
25
|
+
{
|
26
|
+
"info": "cyan",
|
27
|
+
"warning": "yellow",
|
28
|
+
"error": "red",
|
29
|
+
"highlight": "magenta",
|
30
|
+
}
|
31
|
+
)
|
32
|
+
|
33
|
+
console = Console(theme=custom_theme)
|
34
|
+
|
35
|
+
app = typer.Typer(pretty_exceptions_enable=False, no_args_is_help=True)
|
36
|
+
|
37
|
+
|
38
|
+
@app.command(
|
39
|
+
help="Run server and executor in dev mode (Not recommended for production.)"
|
40
|
+
)
|
41
|
+
def server_dev_mode():
|
42
|
+
indexify_server_path = os.path.expanduser("~/.indexify/indexify-server")
|
43
|
+
if not os.path.exists(indexify_server_path):
|
44
|
+
print("indexify-server not found. Downloading...")
|
45
|
+
try:
|
46
|
+
download_command = subprocess.check_output(
|
47
|
+
["curl", "-s", "https://getindexify.ai"], universal_newlines=True
|
48
|
+
)
|
49
|
+
subprocess.run(download_command, shell=True, check=True)
|
50
|
+
except subprocess.CalledProcessError as e:
|
51
|
+
print(f"failed to download indexify-server: {e}")
|
52
|
+
exit(1)
|
53
|
+
try:
|
54
|
+
os.makedirs(os.path.dirname(indexify_server_path), exist_ok=True)
|
55
|
+
shutil.move("indexify-server", indexify_server_path)
|
56
|
+
except Exception as e:
|
57
|
+
print(f"failed to move indexify-server to {indexify_server_path}: {e}")
|
58
|
+
exit(1)
|
59
|
+
print("starting indexify server and executor in dev mode...")
|
60
|
+
print("press Ctrl+C to stop the server and executor.")
|
61
|
+
print(f"server binary path: {indexify_server_path}")
|
62
|
+
commands = [indexify_server_path, "indexify-cli executor"]
|
63
|
+
|
64
|
+
processes = []
|
65
|
+
stop_event = threading.Event()
|
66
|
+
|
67
|
+
def handle_output(process):
|
68
|
+
for line in iter(process.stdout.readline, ""):
|
69
|
+
sys.stdout.write(line)
|
70
|
+
sys.stdout.flush()
|
71
|
+
|
72
|
+
def terminate_processes():
|
73
|
+
print("Terminating processes...")
|
74
|
+
stop_event.set()
|
75
|
+
for process in processes:
|
76
|
+
if process.poll() is None:
|
77
|
+
try:
|
78
|
+
process.terminate()
|
79
|
+
process.wait(timeout=5)
|
80
|
+
except subprocess.TimeoutExpired:
|
81
|
+
print(f"Force killing process {process.pid}")
|
82
|
+
process.kill()
|
83
|
+
|
84
|
+
def signal_handler(sig, frame):
|
85
|
+
print("\nCtrl+C pressed. Shutting down...")
|
86
|
+
terminate_processes()
|
87
|
+
sys.exit(0)
|
88
|
+
|
89
|
+
signal.signal(signal.SIGINT, signal_handler)
|
90
|
+
signal.signal(signal.SIGTERM, signal_handler)
|
91
|
+
|
92
|
+
for cmd in commands:
|
93
|
+
process = subprocess.Popen(
|
94
|
+
cmd.split(),
|
95
|
+
stdout=subprocess.PIPE,
|
96
|
+
stderr=subprocess.STDOUT,
|
97
|
+
bufsize=1,
|
98
|
+
universal_newlines=True,
|
99
|
+
preexec_fn=os.setsid if os.name != "nt" else None,
|
100
|
+
)
|
101
|
+
processes.append(process)
|
102
|
+
|
103
|
+
thread = threading.Thread(target=handle_output, args=(process,))
|
104
|
+
thread.daemon = True
|
105
|
+
thread.start()
|
106
|
+
|
107
|
+
try:
|
108
|
+
while True:
|
109
|
+
time.sleep(1)
|
110
|
+
if all(process.poll() is not None for process in processes):
|
111
|
+
print("All processes have finished.")
|
112
|
+
break
|
113
|
+
except KeyboardInterrupt:
|
114
|
+
signal_handler(None, None)
|
115
|
+
finally:
|
116
|
+
terminate_processes()
|
117
|
+
|
118
|
+
print("Script execution completed.")
|
119
|
+
|
120
|
+
|
121
|
+
@app.command(help="Build image for function names")
|
122
|
+
def build_image(workflow_file_path: str, func_names: List[str]):
|
123
|
+
globals_dict = {}
|
124
|
+
|
125
|
+
try:
|
126
|
+
exec(open(workflow_file_path).read(), globals_dict)
|
127
|
+
except FileNotFoundError as e:
|
128
|
+
raise Exception(
|
129
|
+
f"Could not find workflow file to execute at: " f"`{workflow_file_path}`"
|
130
|
+
)
|
131
|
+
|
132
|
+
found_funcs = []
|
133
|
+
for name, obj in globals_dict.items():
|
134
|
+
for func_name in func_names:
|
135
|
+
if name == func_name:
|
136
|
+
found_funcs.append(name)
|
137
|
+
_create_image_for_func(func_name=func_name, func_obj=obj)
|
138
|
+
|
139
|
+
console.print(
|
140
|
+
Text(f"Processed functions: ", style="cyan"),
|
141
|
+
Text(f"{found_funcs}", style="green"),
|
142
|
+
)
|
143
|
+
|
144
|
+
|
145
|
+
@app.command(help="Build default image for indexify")
|
146
|
+
def build_default_image():
|
147
|
+
_build_image(image=DEFAULT_IMAGE)
|
148
|
+
|
149
|
+
console.print(
|
150
|
+
Text(f"Built default indexify image", style="cyan"),
|
151
|
+
)
|
152
|
+
|
153
|
+
|
154
|
+
@app.command(help="Joins the extractors to the coordinator server")
|
155
|
+
def executor(
|
156
|
+
server_addr: str = "localhost:8900",
|
157
|
+
workers: Annotated[
|
158
|
+
int, typer.Option(help="number of worker processes for extraction")
|
159
|
+
] = 1,
|
160
|
+
config_path: Optional[str] = typer.Option(
|
161
|
+
None, help="Path to the TLS configuration file"
|
162
|
+
),
|
163
|
+
executor_cache: Optional[str] = typer.Option(
|
164
|
+
"~/.indexify/executor_cache", help="Path to the executor cache directory"
|
165
|
+
),
|
166
|
+
):
|
167
|
+
id = nanoid.generate()
|
168
|
+
console.print(
|
169
|
+
Panel(
|
170
|
+
f"Number of workers: {workers}\n"
|
171
|
+
f"Config path: {config_path}\n"
|
172
|
+
f"Server address: {server_addr}\n"
|
173
|
+
f"Executor ID: {id}\n"
|
174
|
+
f"Executor cache: {executor_cache}",
|
175
|
+
title="Agent Configuration",
|
176
|
+
border_style="info",
|
177
|
+
)
|
178
|
+
)
|
179
|
+
|
180
|
+
function_worker = FunctionWorker(workers=workers)
|
181
|
+
from pathlib import Path
|
182
|
+
|
183
|
+
executor_cache = Path(executor_cache).expanduser().absolute()
|
184
|
+
if os.path.exists(executor_cache):
|
185
|
+
shutil.rmtree(executor_cache)
|
186
|
+
Path(executor_cache).mkdir(parents=True, exist_ok=True)
|
187
|
+
|
188
|
+
agent = ExtractorAgent(
|
189
|
+
id,
|
190
|
+
num_workers=workers,
|
191
|
+
function_worker=function_worker,
|
192
|
+
server_addr=server_addr,
|
193
|
+
config_path=config_path,
|
194
|
+
code_path=executor_cache,
|
195
|
+
)
|
196
|
+
|
197
|
+
try:
|
198
|
+
asyncio.get_event_loop().run_until_complete(agent.run())
|
199
|
+
except asyncio.CancelledError as ex:
|
200
|
+
console.print(Text(f"Exiting gracefully: {ex}", style="bold yellow"))
|
201
|
+
|
202
|
+
|
203
|
+
def _create_image_for_func(func_name, func_obj):
|
204
|
+
console.print(
|
205
|
+
Text("Creating container for ", style="cyan"),
|
206
|
+
Text(f"`{func_name}`", style="cyan bold"),
|
207
|
+
)
|
208
|
+
_build_image(image=func_obj.image, func_name=func_name)
|
209
|
+
|
210
|
+
|
211
|
+
def _build_image(image: Image, func_name: str = None):
|
212
|
+
try:
|
213
|
+
client = docker.from_env()
|
214
|
+
client.ping()
|
215
|
+
except Exception as e:
|
216
|
+
console.print(
|
217
|
+
Text("Unable to connect with docker: ", style="red bold"),
|
218
|
+
Text(f"{e}", style="red"),
|
219
|
+
)
|
220
|
+
exit(-1)
|
221
|
+
|
222
|
+
docker_file = f"""
|
223
|
+
FROM {image._base_image}
|
224
|
+
|
225
|
+
RUN mkdir -p ~/.indexify
|
226
|
+
|
227
|
+
RUN touch ~/.indexify/image_name
|
228
|
+
|
229
|
+
RUN echo {image._image_name} > ~/.indexify/image_name
|
230
|
+
|
231
|
+
WORKDIR /app
|
232
|
+
|
233
|
+
"""
|
234
|
+
|
235
|
+
run_strs = ["RUN " + i for i in image._run_strs]
|
236
|
+
|
237
|
+
docker_file += "\n".join(run_strs)
|
238
|
+
|
239
|
+
console.print("Creating image using Dockerfile contents:", style="cyan bold")
|
240
|
+
console.print(f"{docker_file}", style="magenta")
|
241
|
+
|
242
|
+
client = docker.from_env()
|
243
|
+
client.images.build(
|
244
|
+
fileobj=io.BytesIO(docker_file.encode()),
|
245
|
+
tag=f"{image._image_name}:{image._tag}",
|
246
|
+
rm=True,
|
247
|
+
)
|