indexify 0.0.4__tar.gz → 0.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: indexify
3
- Version: 0.0.4
3
+ Version: 0.0.6
4
4
  Summary: Python Client for Indexify
5
- Home-page: https://github.com/diptanu/indexify
5
+ Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
7
7
  Author: Diptanu Gon Choudhury
8
8
  Author-email: diptanuc@gmail.com
@@ -12,12 +12,15 @@ Classifier: Programming Language :: Python :: 3
12
12
  Classifier: Programming Language :: Python :: 3.10
13
13
  Classifier: Programming Language :: Python :: 3.11
14
14
  Classifier: Programming Language :: Python :: 3.12
15
- Requires-Dist: httpx[http2] (>=0.24.1,<0.25.0)
16
- Project-URL: Repository, https://github.com/diptanu/indexify
15
+ Requires-Dist: httpx[http2] (>=0.26,<0.27)
16
+ Project-URL: Repository, https://github.com/tensorlakeai/indexify
17
17
  Description-Content-Type: text/markdown
18
18
 
19
19
  # Indexify Python Client
20
20
 
21
+
22
+ [![PyPI version](https://badge.fury.io/py/indexify.svg)](https://badge.fury.io/py/indexify)
23
+
21
24
  ## Installation
22
25
 
23
26
  This is the Python client for interacting with the Indexify service.
@@ -31,10 +34,17 @@ pip install indexify
31
34
  ## Usage
32
35
 
33
36
  See the [getting started](https://getindexify.com/getting_started/) guide for examples of how to use the client.
34
- Look at the [sdk-py/examples](examples) directory for more examples.
37
+ Look at the [examples](examples) directory for more examples.
35
38
 
36
39
  ## Development
37
40
 
41
+ To install the client from this repository for development:
42
+
43
+ ```shell
44
+ cd "path to this repository"
45
+ pip install -e .
46
+ ```
47
+
38
48
  Install and run the `poetry` package manager:
39
49
 
40
50
  ```shell
@@ -45,21 +55,6 @@ poetry install
45
55
  More information at [https://python-poetry.org/docs/](https://python-poetry.org/docs/).
46
56
 
47
57
 
48
- ### Steps for restarting dev server after updating server code
49
-
50
- ```shell
51
- ./install_python_deps.sh
52
- # use `-e`` if you're developing extractors
53
- (cd extractors && pip install -e .)
54
- # use `-e`` if you're developing sdk-py
55
- (cd sdk-py && pip install -e .)
56
-
57
- cargo build
58
- make local-dev
59
-
60
- # start the server
61
- ./target/debug/indexify start-server -d -c local_config.yaml
62
- ```
63
58
 
64
59
  ### Environment Variables
65
60
 
@@ -1,5 +1,8 @@
1
1
  # Indexify Python Client
2
2
 
3
+
4
+ [![PyPI version](https://badge.fury.io/py/indexify.svg)](https://badge.fury.io/py/indexify)
5
+
3
6
  ## Installation
4
7
 
5
8
  This is the Python client for interacting with the Indexify service.
@@ -13,10 +16,17 @@ pip install indexify
13
16
  ## Usage
14
17
 
15
18
  See the [getting started](https://getindexify.com/getting_started/) guide for examples of how to use the client.
16
- Look at the [sdk-py/examples](examples) directory for more examples.
19
+ Look at the [examples](examples) directory for more examples.
17
20
 
18
21
  ## Development
19
22
 
23
+ To install the client from this repository for development:
24
+
25
+ ```shell
26
+ cd "path to this repository"
27
+ pip install -e .
28
+ ```
29
+
20
30
  Install and run the `poetry` package manager:
21
31
 
22
32
  ```shell
@@ -27,21 +37,6 @@ poetry install
27
37
  More information at [https://python-poetry.org/docs/](https://python-poetry.org/docs/).
28
38
 
29
39
 
30
- ### Steps for restarting dev server after updating server code
31
-
32
- ```shell
33
- ./install_python_deps.sh
34
- # use `-e`` if you're developing extractors
35
- (cd extractors && pip install -e .)
36
- # use `-e`` if you're developing sdk-py
37
- (cd sdk-py && pip install -e .)
38
-
39
- cargo build
40
- make local-dev
41
-
42
- # start the server
43
- ./target/debug/indexify start-server -d -c local_config.yaml
44
- ```
45
40
 
46
41
  ### Environment Variables
47
42
 
@@ -1,14 +1,12 @@
1
1
  from .index import Index
2
2
  from .client import IndexifyClient
3
- from .repository import Repository, Filter, FilterBuilder
3
+ from .extractor_binding import ExtractorBinding
4
4
  from .data_containers import TextChunk
5
5
  from .settings import DEFAULT_SERVICE_URL
6
6
 
7
7
  __all__ = [
8
- "Filter",
9
- "FilterBuilder",
10
8
  "Index",
11
9
  "IndexifyClient",
12
- "Repository",
10
+ "ExtractorBinding",
13
11
  "DEFAULT_SERVICE_URL",
14
12
  ]
@@ -0,0 +1,406 @@
1
+ import httpx
2
+ import json
3
+ from collections import namedtuple
4
+ from .settings import DEFAULT_SERVICE_URL
5
+ from .extractor import Extractor
6
+ from .extractor_binding import ExtractorBinding
7
+ from .index import Index
8
+ from .utils import json_set_default
9
+ from .data_containers import TextChunk
10
+ from indexify.exceptions import ApiException
11
+
12
+ from typing import List, Optional, Union
13
+
14
+ Document = namedtuple("Document", ["text", "labels"])
15
+
16
+
17
+ class IndexifyClient:
18
+ """
19
+ IndexifyClient is the main entry point for the SDK.
20
+ For the full list of client features, see the
21
+ [httpx Client documentation](https://www.python-httpx.org/api/#client).
22
+
23
+ :param service_url: The URL of the Indexify service to connect to.
24
+ :param args: Arguments to pass to the httpx.Client constructor
25
+ :param kwargs: Keyword arguments to pass to the httpx.Client constructor
26
+
27
+ Example usage:
28
+ ```
29
+ from indexify import IndexifyClient
30
+
31
+ client = IndexifyClient()
32
+ assert client.heartbeat() == True
33
+ ```
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ service_url: str = DEFAULT_SERVICE_URL,
39
+ namespace: str = "default",
40
+ *args,
41
+ **kwargs,
42
+ ):
43
+ self.namespace: str = namespace
44
+ self.extractor_bindings: List[ExtractorBinding] = []
45
+ self.labels: dict = {}
46
+ self._service_url = service_url
47
+ self._client = httpx.Client(*args, **kwargs)
48
+
49
+ # get namespace data
50
+ response = self.get(f"namespaces/{self.namespace}")
51
+ response.raise_for_status()
52
+ resp_json = response.json()
53
+ # initialize extractor_bindings
54
+ for eb in resp_json["namespace"]["extractor_bindings"]:
55
+ self.extractor_bindings.append(ExtractorBinding.from_dict(eb))
56
+
57
+ @classmethod
58
+ def with_mtls(
59
+ cls,
60
+ cert_path: str,
61
+ key_path: str,
62
+ ca_bundle_path: Optional[str] = None,
63
+ service_url: str = DEFAULT_SERVICE_URL,
64
+ *args,
65
+ **kwargs,
66
+ ) -> "IndexifyClient":
67
+ """
68
+ Create a client with mutual TLS authentication. Also enables HTTP/2,
69
+ which is required for mTLS.
70
+ NOTE: mTLS must be enabled on the Indexify service for this to work.
71
+
72
+ :param cert_path: Path to the client certificate. Resolution handled by httpx.
73
+ :param key_path: Path to the client key. Resolution handled by httpx.
74
+ :param args: Arguments to pass to the httpx.Client constructor
75
+ :param kwargs: Keyword arguments to pass to the httpx.Client constructor
76
+ :return: A client with mTLS authentication
77
+
78
+ Example usage:
79
+ ```
80
+ from indexify import IndexifyClient
81
+
82
+ client = IndexifyClient.with_mtls(
83
+ cert_path="/path/to/cert.pem",
84
+ key_path="/path/to/key.pem",
85
+ )
86
+ assert client.heartbeat() == True
87
+ ```
88
+ """
89
+ if not (cert_path and key_path):
90
+ raise ValueError("Both cert and key must be provided for mTLS")
91
+
92
+ client_certs = (cert_path, key_path)
93
+ verify_option = ca_bundle_path if ca_bundle_path else True
94
+ client = IndexifyClient(
95
+ *args,
96
+ **kwargs,
97
+ service_url=service_url,
98
+ http2=True,
99
+ cert=client_certs,
100
+ verify=verify_option,
101
+ )
102
+ return client
103
+
104
+ def _request(self, method: str, **kwargs) -> httpx.Response:
105
+ response = self._client.request(method, **kwargs)
106
+ try:
107
+ response.raise_for_status()
108
+ except httpx.HTTPStatusError as exc:
109
+ print(f"exception: {exc}, response text: {response.text}")
110
+ raise exc
111
+ return response
112
+
113
+ def get(self, endpoint: str, **kwargs) -> httpx.Response:
114
+ """
115
+ Make a GET request to the Indexify service.
116
+
117
+ :param endpoint: The endpoint to make the request to.
118
+
119
+ Example usage:
120
+ ```
121
+ from indexify import IndexifyClient
122
+
123
+ client = IndexifyClient()
124
+ response = client.get("namespaces")
125
+ print(response.json())
126
+ ```
127
+ """
128
+ return self._request("GET", url=f"{self._service_url}/{endpoint}", **kwargs)
129
+
130
+ def post(self, endpoint: str, **kwargs) -> httpx.Response:
131
+ """
132
+ Make a POST request to the Indexify service.
133
+
134
+ :param endpoint: The endpoint to make the request to.
135
+
136
+ Example usage:
137
+
138
+ ```
139
+ from indexify import IndexifyClient
140
+
141
+ client = IndexifyClient()
142
+ response = client.post("namespaces", json={"name": "my-repo"})
143
+ print(response.json())
144
+ ```
145
+ """
146
+ return self._request("POST", url=f"{self._service_url}/{endpoint}", **kwargs)
147
+
148
+ def put(self, endpoint: str, **kwargs) -> httpx.Response:
149
+ # Not Implemented
150
+ raise NotImplementedError
151
+
152
+ def delete(self, endpoint: str, **kwargs) -> httpx.Response:
153
+ # Not Implemented
154
+ raise NotImplementedError
155
+
156
+ def close(self):
157
+ """
158
+ Close the underlying httpx.Client.
159
+ """
160
+ self._client.close()
161
+
162
+ # __enter__ and __exit__ allow the client to be used as a context manager
163
+ def __enter__(self):
164
+ return self
165
+
166
+ def __exit__(self, exc_type, exc_value, traceback):
167
+ self.close()
168
+
169
+ def heartbeat(self, heartbeat_response="Indexify Server") -> bool:
170
+ """
171
+ Check if the Indexify service is alive.
172
+ """
173
+ response = self.get(f"")
174
+ # Server responds with text: "Indexify Server"
175
+ return response.text == heartbeat_response
176
+
177
+ def namespaces(self) -> list[str]:
178
+ """
179
+ Get a list of all namespaces.
180
+ """
181
+ response = self.get(f"namespaces")
182
+ namespaces_dict = response.json()["namespaces"]
183
+ namespaces = []
184
+ for item in namespaces_dict:
185
+ namespaces.append(item["name"])
186
+ return namespaces
187
+
188
+ @classmethod
189
+ def create_namespace(
190
+ self,
191
+ namespace: str,
192
+ extractor_bindings: list = [],
193
+ labels: dict = {},
194
+ ) -> "IndexifyClient":
195
+ """
196
+ Create a new namespace.
197
+
198
+ Returns:
199
+ IndexifyClient: a new client with the given namespace
200
+ """
201
+ bindings = []
202
+ for bd in extractor_bindings:
203
+ if isinstance(bd, ExtractorBinding):
204
+ bindings.append(bd.to_dict())
205
+ else:
206
+ bindings.append(bd)
207
+ req = {
208
+ "name": namespace,
209
+ "extractor_bindings": bindings,
210
+ "labels": labels,
211
+ }
212
+
213
+ client = IndexifyClient(namespace=namespace)
214
+ client.post(f"namespaces", json=req)
215
+ return client
216
+
217
+ def indexes(self) -> List[Index]:
218
+ """
219
+ Get the indexes of the current namespace.
220
+
221
+ Returns:
222
+ List[Index]: list of indexes in the current namespace
223
+ """
224
+ response = self.get(f"namespaces/{self.namespace}/indexes")
225
+ response.raise_for_status()
226
+ return response.json()["indexes"]
227
+
228
+ def extractors(self) -> List[Extractor]:
229
+ """
230
+ Get a list of all extractors.
231
+
232
+ Returns:
233
+ List[Extractor]: list of extractors
234
+ """
235
+ response = self.get(f"extractors")
236
+ extractors_dict = response.json()["extractors"]
237
+ extractors = []
238
+ for ed in extractors_dict:
239
+ extractors.append(Extractor.from_dict(ed))
240
+ return extractors
241
+
242
+ def get_extractor_bindings(self):
243
+ """
244
+ Retrieve and update the list of extractor bindings for the current namespace.
245
+ """
246
+ response = self.get(f"namespaces/{self.namespace}")
247
+ response.raise_for_status()
248
+
249
+ self.extractor_bindings = []
250
+ for eb in response.json()["namespace"]["extractor_bindings"]:
251
+ self.extractor_bindings.append(ExtractorBinding.from_dict(eb))
252
+ return self.extractor_bindings
253
+
254
+ def bind_extractor(
255
+ self,
256
+ extractor: str,
257
+ name: str,
258
+ input_params: dict = {},
259
+ labels_eq: str = None,
260
+ ) -> dict:
261
+ """Bind an extractor.
262
+
263
+ Args:
264
+ - extractor (str): Name of the extractor
265
+ - name (str): Name for this instance
266
+ - input_params (dict): Dictionary containing extractor input params
267
+ - filter (Filter): Optional filter for this extractor
268
+
269
+ Returns:
270
+ dict: response payload
271
+
272
+ Examples:
273
+ >>> repo.bind_extractor("EfficientNet", "efficientnet")
274
+
275
+ >>> repo.bind_extractor("MiniLML6", "minilm")
276
+
277
+ """
278
+ req = {
279
+ "extractor": extractor,
280
+ "name": name,
281
+ "input_params": input_params,
282
+ "filters_eq": labels_eq,
283
+ }
284
+ if req["filters_eq"] == None:
285
+ del req["filters_eq"]
286
+
287
+ request_body = json.dumps(req, default=json_set_default)
288
+ response = self.post(
289
+ f"namespaces/{self.namespace}/extractor_bindings",
290
+ data=request_body,
291
+ headers={"Content-Type": "application/json"},
292
+ )
293
+
294
+ # update self.extractor_bindings
295
+ self.get_extractor_bindings()
296
+
297
+ try:
298
+ response.raise_for_status()
299
+ except httpx.HTTPStatusError as exc:
300
+ raise ApiException(exc.response.text)
301
+ return
302
+
303
+ def get_content(
304
+ self,
305
+ parent_id: str = None,
306
+ labels_eq: str = None,
307
+ ):
308
+ """
309
+ Get list of content from current namespace.
310
+
311
+ Args:
312
+ - parent_id (str): Optional filter for parent id
313
+ - labels_eq (str): Optional filter for labels
314
+ """
315
+ params = {}
316
+ if parent_id:
317
+ params.update({"parent_id": parent_id})
318
+ if labels_eq:
319
+ params.update({"labels_eq": labels_eq})
320
+
321
+ response = self.get(f"namespaces/{self.namespace}/content", params=params)
322
+ response.raise_for_status()
323
+ return response.json()["content_list"]
324
+
325
+ def add_documents(
326
+ self, documents: Union[Document, str, List[Union[Document, str]]]
327
+ ) -> None:
328
+ """
329
+ Add documents to current namespace.
330
+
331
+ Args:
332
+ - documents (Union[Document, str, List[Union[Document, str]]]): this can be a list of strings, list of Documents or a mix of both
333
+ """
334
+ if isinstance(documents, Document):
335
+ documents = [documents]
336
+ elif isinstance(documents, str):
337
+ documents = [Document(documents, {})]
338
+ elif isinstance(documents, list):
339
+ new_documents = []
340
+ for item in documents:
341
+ if isinstance(item, Document):
342
+ new_documents.append(item)
343
+ elif isinstance(item, str):
344
+ new_documents.append(Document(item, {}))
345
+ else:
346
+ raise ValueError(
347
+ "List items must be either Document instances or strings."
348
+ )
349
+ documents = new_documents
350
+ else:
351
+ raise TypeError(
352
+ "Invalid type for documents. Expected Document, str, or list of these."
353
+ )
354
+
355
+ req = {"documents": documents}
356
+ response = self.post(
357
+ f"namespaces/{self.namespace}/add_texts",
358
+ json=req,
359
+ headers={"Content-Type": "application/json"},
360
+ )
361
+ response.raise_for_status()
362
+
363
+ def query_metadata(self, index_name: str, content_id: str) -> dict:
364
+ """
365
+ Query metadata for a specific content ID in a given index.
366
+
367
+ Args:
368
+ - index_name (str): index to query
369
+ - content_id (str): content id to query
370
+ """
371
+ params = {"index": index_name, "content_id": content_id}
372
+ response = self.get(f"namespaces/{self.namespace}/metadata", params=params)
373
+ response.raise_for_status()
374
+ return response.json()["attributes"]
375
+
376
+ def search_index(self, name: str, query: str, top_k: int) -> list[TextChunk]:
377
+ """
378
+ Search index in the current namespace.
379
+
380
+ Args:
381
+ - name (str): name of index to search
382
+ - query (str): query string
383
+ - top_k (int): top k nearest neighbors to be returned
384
+ """
385
+ req = {"index": name, "query": query, "k": top_k}
386
+ response = self.post(
387
+ f"namespaces/{self.namespace}/search",
388
+ json=req,
389
+ headers={"Content-Type": "application/json"},
390
+ )
391
+ response.raise_for_status()
392
+ return response.json()["results"]
393
+
394
+ def upload_file(self, path: str):
395
+ """
396
+ Upload a file.
397
+
398
+ Args:
399
+ - path (str): relative path to the file to be uploaded
400
+ """
401
+ with open(path, "rb") as f:
402
+ response = self.post(
403
+ f"namespaces/{self.namespace}/upload_file",
404
+ files={"file": f},
405
+ )
406
+ response.raise_for_status()
@@ -21,6 +21,7 @@ class TextChunk:
21
21
  def to_dict(self):
22
22
  return {"text": self.text, "metadata": self.metadata}
23
23
 
24
+
24
25
  @dataclass
25
26
  class SearchChunk:
26
27
  index: str
@@ -1,27 +1,10 @@
1
- import requests
1
+ import httpx
2
2
  from dataclasses import dataclass
3
3
  from typing import Union
4
4
 
5
5
  from .settings import DEFAULT_SERVICE_URL
6
6
 
7
7
 
8
- def list_extractors(base_url: str = DEFAULT_SERVICE_URL) -> list[dict]:
9
- response = requests.get(f"{base_url}/extractors")
10
- response.raise_for_status()
11
- return response.json()["extractors"]
12
-
13
-
14
- @dataclass
15
- class EmbeddingExtractor:
16
- dim: int
17
- distance: str
18
-
19
-
20
- @dataclass
21
- class AttributeExtractor:
22
- json_schema: str
23
-
24
-
25
8
  @dataclass
26
9
  class EmbeddingSchema:
27
10
  distance: str
@@ -38,17 +21,17 @@ class Extractor:
38
21
  name: str
39
22
  description: str
40
23
  input_params: dict
41
- schemas: ExtractorSchema
24
+ outputs: ExtractorSchema
42
25
 
43
26
 
44
27
  class Extractor:
45
28
  def __init__(
46
- self, name: str, description: str, input_params: dict, schemas: ExtractorSchema
29
+ self, name: str, description: str, input_params: dict, outputs: ExtractorSchema
47
30
  ):
48
31
  self.name = name
49
32
  self.description = description
50
33
  self.input_params = input_params
51
- self.schemas = schemas
34
+ self.outputs = outputs
52
35
 
53
36
  @classmethod
54
37
  def from_dict(cls, data):
@@ -56,11 +39,11 @@ class Extractor:
56
39
  name=data["name"],
57
40
  description=data["description"],
58
41
  input_params=data["input_params"],
59
- schemas=data["schemas"],
42
+ outputs=data["outputs"],
60
43
  )
61
44
 
62
45
  def __repr__(self) -> str:
63
- return f"Extractor(name={self.name}, description={self.description})"
46
+ return f"Extractor(name={self.name}, description={self.description}, input_params={self.input_params}, outputs={self.outputs})"
64
47
 
65
48
  def __str__(self) -> str:
66
49
  return self.__repr__()
@@ -0,0 +1,27 @@
1
+ from dataclasses import dataclass, asdict
2
+ from typing import Optional
3
+
4
+
5
+ @dataclass
6
+ class ExtractorBinding:
7
+ extractor: str
8
+ name: str
9
+ content_source: str
10
+ input_params: dict
11
+ labels_eq: Optional[str] = None
12
+
13
+ def __repr__(self) -> str:
14
+ return f"ExtractorBinding(name={self.name} extractor={self.extractor})"
15
+
16
+ def __str__(self) -> str:
17
+ return self.__repr__()
18
+
19
+ def to_dict(self) -> dict:
20
+ filtered_dict = {k: v for k, v in asdict(self).items() if v is not None}
21
+ return filtered_dict
22
+
23
+ @classmethod
24
+ def from_dict(cls, json: dict):
25
+ if "filters_eq" in json:
26
+ json["labels_eq"] = json.pop("filters_eq")
27
+ return ExtractorBinding(**json)
@@ -1,7 +1,8 @@
1
- import aiohttp
1
+ import httpx
2
2
 
3
3
  from .data_containers import SearchChunk, TextChunk
4
4
 
5
+
5
6
  class Index:
6
7
  def __init__(self, service_url, index):
7
8
  self._service_url = service_url
@@ -9,8 +10,8 @@ class Index:
9
10
 
10
11
  def search(self, query: str, top_k: int) -> list[TextChunk]:
11
12
  req = {"index": self._index, "query": query, "k": top_k}
12
- response = aiohttp.post(
13
+ response = httpx.post(
13
14
  f"{self._service_url}/indexes/{self._index}/search", json=req
14
15
  )
15
16
  response.raise_for_status()
16
- return response.json()["results"]
17
+ return response.json()["results"]
@@ -1,16 +1,16 @@
1
1
  [tool.poetry]
2
2
  name = "indexify"
3
- version = "0.0.4"
3
+ version = "0.0.6"
4
4
  description = "Python Client for Indexify"
5
5
  authors = ["Diptanu Gon Choudhury <diptanuc@gmail.com>", "Vijay Parthasarathy <vijay2win@gmail.com>"]
6
6
  license = "Apache 2.0"
7
7
  readme = "README.md"
8
- homepage = "https://github.com/diptanu/indexify"
9
- repository = "https://github.com/diptanu/indexify"
8
+ homepage = "https://github.com/tensorlakeai/indexify"
9
+ repository = "https://github.com/tensorlakeai/indexify"
10
10
 
11
11
  [tool.poetry.dependencies]
12
12
  python = "^3.10.0"
13
- httpx = { version = "^0.24.1", extras = ["http2"] }
13
+ httpx = { version = "^0.26", extras = ["http2"] }
14
14
 
15
15
  [tool.poetry.dev-dependencies]
16
16
  black = "^22.3.0"
@@ -1,178 +0,0 @@
1
- import httpx
2
- from .repository import Repository
3
- from .settings import DEFAULT_SERVICE_URL
4
- from .extractor import Extractor
5
-
6
- from typing import List, Optional
7
-
8
-
9
- class IndexifyClient:
10
- """
11
- IndexifyClient is the main entry point for the SDK.
12
- For the full list of client features, see the
13
- [httpx Client documentation](https://www.python-httpx.org/api/#client).
14
-
15
- :param service_url: The URL of the Indexify service to connect to.
16
- :param args: Arguments to pass to the httpx.Client constructor
17
- :param kwargs: Keyword arguments to pass to the httpx.Client constructor
18
-
19
- Example usage:
20
- ```
21
- from indexify import IndexifyClient
22
-
23
- client = IndexifyClient()
24
- assert client.heartbeat() == True
25
- ```
26
- """
27
- def __init__(self,
28
- service_url: str = DEFAULT_SERVICE_URL,
29
- *args,
30
- **kwargs
31
- ):
32
- self._service_url = service_url
33
- self._client = httpx.Client(*args, **kwargs)
34
-
35
- @classmethod
36
- def with_mtls(cls, cert_path: str, key_path: str, ca_bundle_path: Optional[str] = None,
37
- service_url: str = DEFAULT_SERVICE_URL, *args, **kwargs) -> "IndexifyClient":
38
- """
39
- Create a client with mutual TLS authentication. Also enables HTTP/2,
40
- which is required for mTLS.
41
- NOTE: mTLS must be enabled on the Indexify service for this to work.
42
-
43
- :param cert_path: Path to the client certificate. Resolution handled by httpx.
44
- :param key_path: Path to the client key. Resolution handled by httpx.
45
- :param args: Arguments to pass to the httpx.Client constructor
46
- :param kwargs: Keyword arguments to pass to the httpx.Client constructor
47
- :return: A client with mTLS authentication
48
-
49
- Example usage:
50
- ```
51
- from indexify import IndexifyClient
52
-
53
- client = IndexifyClient.with_mtls(
54
- cert_path="/path/to/cert.pem",
55
- key_path="/path/to/key.pem",
56
- )
57
- assert client.heartbeat() == True
58
- ```
59
- """
60
- if not (cert_path and key_path):
61
- raise ValueError("Both cert and key must be provided for mTLS")
62
-
63
- client_certs = (cert_path, key_path)
64
- verify_option = ca_bundle_path if ca_bundle_path else True
65
- client = IndexifyClient(*args, **kwargs, service_url=service_url, http2=True, cert=client_certs, verify=verify_option)
66
- return client
67
-
68
- def _request(self, method: str, **kwargs) -> httpx.Response:
69
- response = self._client.request(method, **kwargs)
70
- response.raise_for_status()
71
- return response
72
-
73
- def get(self, endpoint: str, **kwargs) -> httpx.Response:
74
- """
75
- Make a GET request to the Indexify service.
76
-
77
- :param endpoint: The endpoint to make the request to.
78
-
79
- Example usage:
80
- ```
81
- from indexify import IndexifyClient
82
-
83
- client = IndexifyClient()
84
- response = client.get("repositories")
85
- print(response.json())
86
- ```
87
- """
88
- return self._request("GET", url=f"{self._service_url}/{endpoint}", **kwargs)
89
-
90
- def post(self, endpoint: str, **kwargs) -> httpx.Response:
91
- """
92
- Make a POST request to the Indexify service.
93
-
94
- :param endpoint: The endpoint to make the request to.
95
-
96
- Example usage:
97
-
98
- ```
99
- from indexify import IndexifyClient
100
-
101
- client = IndexifyClient()
102
- response = client.post("repositories", json={"name": "my-repo"})
103
- print(response.json())
104
- ```
105
- """
106
- return self._request("POST", url=f"{self._service_url}/{endpoint}", **kwargs)
107
-
108
- def put(self, endpoint: str, **kwargs) -> httpx.Response:
109
- # Not Implemented
110
- raise NotImplementedError
111
-
112
- def delete(self, endpoint: str, **kwargs) -> httpx.Response:
113
- # Not Implemented
114
- raise NotImplementedError
115
-
116
- def close(self):
117
- """
118
- Close the underlying httpx.Client.
119
- """
120
- self._client.close()
121
-
122
- # __enter__ and __exit__ allow the client to be used as a context manager
123
- def __enter__(self):
124
- return self
125
-
126
- def __exit__(self, exc_type, exc_value, traceback):
127
- self.close()
128
-
129
- def heartbeat(self, heartbeat_response="Indexify Server") -> bool:
130
- """
131
- Check if the Indexify service is alive.
132
- """
133
- response = self.get(f"")
134
- # Server responds with text: "Indexify Server"
135
- return response.text == heartbeat_response
136
-
137
- def repositories(self) -> list[Repository]:
138
- """
139
- Get a list of all repositories.
140
- """
141
- response = self.get(f"repositories")
142
- repositories_dict = response.json()["repositories"]
143
- repositories = []
144
- for rd in repositories_dict:
145
- repositories.append(Repository(rd["name"], self._service_url))
146
- return repositories
147
-
148
- def create_repository(
149
- self, name: str, extractor_bindings: list = [], metadata: dict = {}
150
- ) -> Repository:
151
- """
152
- Create a new repository.
153
- """
154
- req = {
155
- "name": name,
156
- "extractor_bindings": extractor_bindings,
157
- "metadata": metadata,
158
- }
159
- response = self.post(f"repositories", json=req)
160
- return Repository(name, self._service_url)
161
-
162
- def get_repository(self, name: str) -> Repository:
163
- """
164
- Get a repository by name.
165
- """
166
- return Repository(name, self._service_url)
167
-
168
- def extractors(self) -> List[Extractor]:
169
- """
170
- Get a list of all extractors.
171
- """
172
- response = self.get(f"extractors")
173
- extractors_dict = response.json()["extractors"]
174
- extractors = []
175
- for ed in extractors_dict:
176
- extractors.append(Extractor.from_dict(ed))
177
- return extractors
178
-
@@ -1,196 +0,0 @@
1
- import httpx
2
- import json
3
-
4
- from dataclasses import dataclass
5
- from collections import namedtuple
6
-
7
- from .data_containers import TextChunk
8
- from .settings import DEFAULT_SERVICE_URL
9
- from typing import List
10
- from .utils import json_set_default
11
- from indexify.exceptions import ApiException
12
- from .index import Index
13
-
14
- Document = namedtuple("Document", ["text", "metadata"])
15
-
16
-
17
- @dataclass
18
- class Filter:
19
- includes: dict[str, str]
20
- excludes: dict[str, str]
21
-
22
- @classmethod
23
- def from_dict(cls, json: dict):
24
- includes = json.get("eq", {})
25
- excludes = json.get("ne", {})
26
- return Filter(includes=includes, excludes=excludes)
27
-
28
- def json(self):
29
- filters = []
30
- for k, v in self.includes.items():
31
- filters.append({"eq": {k: v}})
32
- for k, v in self.excludes.items():
33
- filters.append({"neq": {k: v}})
34
- return filters
35
-
36
-
37
- class FilterBuilder:
38
- def __init__(self) -> None:
39
- self._filter = Filter(includes={}, excludes={})
40
-
41
- def include(self, key: str, value: str) -> "FilterBuilder":
42
- self._filter.includes[key] = value
43
- return self
44
-
45
- def exclude(self, key: str, value: str) -> "FilterBuilder":
46
- self._filter.excludes[key] = value
47
- return self
48
-
49
- def build(self) -> Filter:
50
- return self._filter
51
-
52
-
53
- @dataclass
54
- class ExtractorBinding:
55
- extractor_name: str
56
- index_name: str
57
- filters: list[Filter]
58
- input_params: dict
59
-
60
- def __repr__(self) -> str:
61
- return f"ExtractorBinding(extractor_name={self.extractor_name}, index_name={self.index_name})"
62
-
63
- def __str__(self) -> str:
64
- return self.__repr__()
65
-
66
- @classmethod
67
- def from_dict(cls, json: dict):
68
- filters_dict = json["filters"]
69
- filters = []
70
- for filter_dict in filters_dict:
71
- filters.append(Filter.from_dict(filter_dict))
72
- json["filters"] = filters
73
- return ExtractorBinding(**json)
74
-
75
-
76
- class Repository:
77
- def __init__(
78
- self,
79
- name: str,
80
- service_url: str,
81
- extractor_bindings: List[ExtractorBinding] = None,
82
- metadata: dict = None,
83
- ) -> None:
84
- self.name = name
85
- self._service_url = service_url
86
- self.extractor_bindings = extractor_bindings
87
- self.metadata = metadata
88
-
89
- async def run_extractors(self) -> dict:
90
- response = httpx.post(f"{self._service_url}/run_extractors")
91
- response.raise_for_status()
92
-
93
- def add_documents(self, documents: List[Document]) -> None:
94
- if isinstance(documents, Document):
95
- documents = [documents]
96
- req = {"documents": documents}
97
- response = httpx.post(
98
- f"{self._service_url}/repositories/{self.name}/add_texts",
99
- json=req,
100
- headers={"Content-Type": "application/json"},
101
- )
102
- response.raise_for_status()
103
-
104
- def bind_extractor(
105
- self,
106
- extractor: str,
107
- name: str,
108
- input_params: dict = {},
109
- filter: Filter = None,
110
- ) -> dict:
111
- """Bind an extractor to this repository
112
-
113
- Args:
114
- - extractor (str): Name of the extractor
115
- - name (str): Name for this instance
116
- - input_params (dict): Dictionary containing extractor input params
117
- - filter (Filter): Optional filter for this extractor
118
-
119
- Returns:
120
- dict: response payload
121
-
122
- Examples:
123
- >>> repo.bind_extractor("EfficientNet", "efficientnet")
124
-
125
- >>> repo.bind_extractor("MiniLML6", "minilm")
126
-
127
- """
128
- req = {
129
- "extractor": extractor,
130
- "name": name,
131
- "input_params": input_params,
132
- "filters": filter.json() if filter else [],
133
- }
134
-
135
- request_body = json.dumps(req, default=json_set_default)
136
- response = httpx.post(
137
- f"{self._service_url}/repositories/{self.name}/extractor_bindings",
138
- data=request_body,
139
- headers={"Content-Type": "application/json"},
140
- )
141
- try:
142
- response.raise_for_status()
143
- except httpx.HTTPStatusError as exc:
144
- raise ApiException(exc.response.text)
145
- return
146
-
147
- def indexes(self) -> List[Index]:
148
- response = httpx.get(f"{self._service_url}/repositories/{self.name}/indexes")
149
- response.raise_for_status()
150
- return response.json()["indexes"]
151
-
152
- @classmethod
153
- def get(cls, name: str, service_url: str = DEFAULT_SERVICE_URL) -> "Repository":
154
- response = httpx.get(f"{service_url}/repositories/{name}")
155
- response.raise_for_status()
156
- repository_json = response.json()["repository"]
157
- return Repository._from_json(repository_json)
158
-
159
- @classmethod
160
- def _from_json(cls, service_url: str, repository_json: dict):
161
- extractor_bindings = []
162
- for eb in repository_json["repository"]["extractor_bindings"]:
163
- extractor_bindings.append(ExtractorBinding.from_dict(eb))
164
- metadata = repository_json["repository"]["metadata"]
165
- return Repository(
166
- name=repository_json["repository"]["name"],
167
- service_url=service_url,
168
- extractor_bindings=extractor_bindings,
169
- metadata=metadata,
170
- )
171
-
172
- def query_attribute(self, index_name: str, content_id: str = None) -> dict:
173
- params = {"index": index_name}
174
- if content_id:
175
- params.update({"content_id": content_id})
176
- response = httpx.get(
177
- f"{self._service_url}/repositories/{self.name}/attributes", params=params
178
- )
179
- response.raise_for_status()
180
- return response.json()["attributes"]
181
-
182
- def search_index(self, name: str, query: str, top_k: int) -> list[TextChunk]:
183
- req = {"index": name, "query": query, "k": top_k}
184
- response = httpx.post(
185
- f"{self._service_url}/repositories/{self.name}/search",
186
- json=req,
187
- headers={"Content-Type": "application/json"},
188
- )
189
- response.raise_for_status()
190
- return response.json()["results"]
191
-
192
- def __repr__(self) -> str:
193
- return f"Repository(name={self.name})"
194
-
195
- def __str__(self) -> str:
196
- return self.__repr__()
File without changes
File without changes
File without changes