indexify 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
indexify/__init__.py CHANGED
@@ -1,14 +1,12 @@
1
1
  from .index import Index
2
2
  from .client import IndexifyClient
3
- from .repository import Repository, Filter, FilterBuilder
3
+ from .extractor_binding import ExtractorBinding
4
4
  from .data_containers import TextChunk
5
5
  from .settings import DEFAULT_SERVICE_URL
6
6
 
7
7
  __all__ = [
8
- "Filter",
9
- "FilterBuilder",
10
8
  "Index",
11
9
  "IndexifyClient",
12
- "Repository",
10
+ "ExtractorBinding",
13
11
  "DEFAULT_SERVICE_URL",
14
12
  ]
indexify/client.py CHANGED
@@ -1,9 +1,17 @@
1
1
  import httpx
2
- from .repository import Repository
2
+ import json
3
+ from collections import namedtuple
3
4
  from .settings import DEFAULT_SERVICE_URL
4
5
  from .extractor import Extractor
6
+ from .extractor_binding import ExtractorBinding
7
+ from .index import Index
8
+ from .utils import json_set_default
9
+ from .data_containers import TextChunk
10
+ from indexify.exceptions import ApiException
5
11
 
6
- from typing import List, Optional
12
+ from typing import List, Optional, Union
13
+
14
+ Document = namedtuple("Document", ["text", "labels"])
7
15
 
8
16
 
9
17
  class IndexifyClient:
@@ -24,17 +32,38 @@ class IndexifyClient:
24
32
  assert client.heartbeat() == True
25
33
  ```
26
34
  """
27
- def __init__(self,
28
- service_url: str = DEFAULT_SERVICE_URL,
29
- *args,
30
- **kwargs
31
- ):
35
+
36
+ def __init__(
37
+ self,
38
+ service_url: str = DEFAULT_SERVICE_URL,
39
+ namespace: str = "default",
40
+ *args,
41
+ **kwargs,
42
+ ):
43
+ self.namespace: str = namespace
44
+ self.extractor_bindings: List[ExtractorBinding] = []
45
+ self.labels: dict = {}
32
46
  self._service_url = service_url
33
47
  self._client = httpx.Client(*args, **kwargs)
34
48
 
49
+ # get namespace data
50
+ response = self.get(f"namespaces/{self.namespace}")
51
+ response.raise_for_status()
52
+ resp_json = response.json()
53
+ # initialize extractor_bindings
54
+ for eb in resp_json["namespace"]["extractor_bindings"]:
55
+ self.extractor_bindings.append(ExtractorBinding.from_dict(eb))
56
+
35
57
  @classmethod
36
- def with_mtls(cls, cert_path: str, key_path: str, ca_bundle_path: Optional[str] = None,
37
- service_url: str = DEFAULT_SERVICE_URL, *args, **kwargs) -> "IndexifyClient":
58
+ def with_mtls(
59
+ cls,
60
+ cert_path: str,
61
+ key_path: str,
62
+ ca_bundle_path: Optional[str] = None,
63
+ service_url: str = DEFAULT_SERVICE_URL,
64
+ *args,
65
+ **kwargs,
66
+ ) -> "IndexifyClient":
38
67
  """
39
68
  Create a client with mutual TLS authentication. Also enables HTTP/2,
40
69
  which is required for mTLS.
@@ -62,12 +91,23 @@ class IndexifyClient:
62
91
 
63
92
  client_certs = (cert_path, key_path)
64
93
  verify_option = ca_bundle_path if ca_bundle_path else True
65
- client = IndexifyClient(*args, **kwargs, service_url=service_url, http2=True, cert=client_certs, verify=verify_option)
94
+ client = IndexifyClient(
95
+ *args,
96
+ **kwargs,
97
+ service_url=service_url,
98
+ http2=True,
99
+ cert=client_certs,
100
+ verify=verify_option,
101
+ )
66
102
  return client
67
103
 
68
104
  def _request(self, method: str, **kwargs) -> httpx.Response:
69
105
  response = self._client.request(method, **kwargs)
70
- response.raise_for_status()
106
+ try:
107
+ response.raise_for_status()
108
+ except httpx.HTTPStatusError as exc:
109
+ print(f"exception: {exc}, response text: {response.text}")
110
+ raise exc
71
111
  return response
72
112
 
73
113
  def get(self, endpoint: str, **kwargs) -> httpx.Response:
@@ -81,7 +121,7 @@ class IndexifyClient:
81
121
  from indexify import IndexifyClient
82
122
 
83
123
  client = IndexifyClient()
84
- response = client.get("repositories")
124
+ response = client.get("namespaces")
85
125
  print(response.json())
86
126
  ```
87
127
  """
@@ -99,7 +139,7 @@ class IndexifyClient:
99
139
  from indexify import IndexifyClient
100
140
 
101
141
  client = IndexifyClient()
102
- response = client.post("repositories", json={"name": "my-repo"})
142
+ response = client.post("namespaces", json={"name": "my-repo"})
103
143
  print(response.json())
104
144
  ```
105
145
  """
@@ -134,40 +174,63 @@ class IndexifyClient:
134
174
  # Server responds with text: "Indexify Server"
135
175
  return response.text == heartbeat_response
136
176
 
137
- def repositories(self) -> list[Repository]:
177
+ def namespaces(self) -> list[str]:
138
178
  """
139
- Get a list of all repositories.
179
+ Get a list of all namespaces.
140
180
  """
141
- response = self.get(f"repositories")
142
- repositories_dict = response.json()["repositories"]
143
- repositories = []
144
- for rd in repositories_dict:
145
- repositories.append(Repository(rd["name"], self._service_url))
146
- return repositories
181
+ response = self.get(f"namespaces")
182
+ namespaces_dict = response.json()["namespaces"]
183
+ namespaces = []
184
+ for item in namespaces_dict:
185
+ namespaces.append(item["name"])
186
+ return namespaces
147
187
 
148
- def create_repository(
149
- self, name: str, extractor_bindings: list = [], metadata: dict = {}
150
- ) -> Repository:
188
+ @classmethod
189
+ def create_namespace(
190
+ self,
191
+ namespace: str,
192
+ extractor_bindings: list = [],
193
+ labels: dict = {},
194
+ ) -> "IndexifyClient":
151
195
  """
152
- Create a new repository.
196
+ Create a new namespace.
197
+
198
+ Returns:
199
+ IndexifyClient: a new client with the given namespace
153
200
  """
201
+ bindings = []
202
+ for bd in extractor_bindings:
203
+ if isinstance(bd, ExtractorBinding):
204
+ bindings.append(bd.to_dict())
205
+ else:
206
+ bindings.append(bd)
154
207
  req = {
155
- "name": name,
156
- "extractor_bindings": extractor_bindings,
157
- "metadata": metadata,
208
+ "name": namespace,
209
+ "extractor_bindings": bindings,
210
+ "labels": labels,
158
211
  }
159
- response = self.post(f"repositories", json=req)
160
- return Repository(name, self._service_url)
161
212
 
162
- def get_repository(self, name: str) -> Repository:
213
+ client = IndexifyClient(namespace=namespace)
214
+ client.post(f"namespaces", json=req)
215
+ return client
216
+
217
+ def indexes(self) -> List[Index]:
163
218
  """
164
- Get a repository by name.
219
+ Get the indexes of the current namespace.
220
+
221
+ Returns:
222
+ List[Index]: list of indexes in the current namespace
165
223
  """
166
- return Repository(name, self._service_url)
224
+ response = self.get(f"namespaces/{self.namespace}/indexes")
225
+ response.raise_for_status()
226
+ return response.json()["indexes"]
167
227
 
168
228
  def extractors(self) -> List[Extractor]:
169
229
  """
170
230
  Get a list of all extractors.
231
+
232
+ Returns:
233
+ List[Extractor]: list of extractors
171
234
  """
172
235
  response = self.get(f"extractors")
173
236
  extractors_dict = response.json()["extractors"]
@@ -176,3 +239,168 @@ class IndexifyClient:
176
239
  extractors.append(Extractor.from_dict(ed))
177
240
  return extractors
178
241
 
242
+ def get_extractor_bindings(self):
243
+ """
244
+ Retrieve and update the list of extractor bindings for the current namespace.
245
+ """
246
+ response = self.get(f"namespaces/{self.namespace}")
247
+ response.raise_for_status()
248
+
249
+ self.extractor_bindings = []
250
+ for eb in response.json()["namespace"]["extractor_bindings"]:
251
+ self.extractor_bindings.append(ExtractorBinding.from_dict(eb))
252
+ return self.extractor_bindings
253
+
254
+ def bind_extractor(
255
+ self,
256
+ extractor: str,
257
+ name: str,
258
+ input_params: dict = {},
259
+ labels_eq: str = None,
260
+ ) -> dict:
261
+ """Bind an extractor.
262
+
263
+ Args:
264
+ - extractor (str): Name of the extractor
265
+ - name (str): Name for this instance
266
+ - input_params (dict): Dictionary containing extractor input params
267
+ - filter (Filter): Optional filter for this extractor
268
+
269
+ Returns:
270
+ dict: response payload
271
+
272
+ Examples:
273
+ >>> repo.bind_extractor("EfficientNet", "efficientnet")
274
+
275
+ >>> repo.bind_extractor("MiniLML6", "minilm")
276
+
277
+ """
278
+ req = {
279
+ "extractor": extractor,
280
+ "name": name,
281
+ "input_params": input_params,
282
+ "filters_eq": labels_eq,
283
+ }
284
+ if req["filters_eq"] == None:
285
+ del req["filters_eq"]
286
+
287
+ request_body = json.dumps(req, default=json_set_default)
288
+ response = self.post(
289
+ f"namespaces/{self.namespace}/extractor_bindings",
290
+ data=request_body,
291
+ headers={"Content-Type": "application/json"},
292
+ )
293
+
294
+ # update self.extractor_bindings
295
+ self.get_extractor_bindings()
296
+
297
+ try:
298
+ response.raise_for_status()
299
+ except httpx.HTTPStatusError as exc:
300
+ raise ApiException(exc.response.text)
301
+ return
302
+
303
+ def get_content(
304
+ self,
305
+ parent_id: str = None,
306
+ labels_eq: str = None,
307
+ ):
308
+ """
309
+ Get list of content from current namespace.
310
+
311
+ Args:
312
+ - parent_id (str): Optional filter for parent id
313
+ - labels_eq (str): Optional filter for labels
314
+ """
315
+ params = {}
316
+ if parent_id:
317
+ params.update({"parent_id": parent_id})
318
+ if labels_eq:
319
+ params.update({"labels_eq": labels_eq})
320
+
321
+ response = self.get(f"namespaces/{self.namespace}/content", params=params)
322
+ response.raise_for_status()
323
+ return response.json()["content_list"]
324
+
325
+ def add_documents(
326
+ self, documents: Union[Document, str, List[Union[Document, str]]]
327
+ ) -> None:
328
+ """
329
+ Add documents to current namespace.
330
+
331
+ Args:
332
+ - documents (Union[Document, str, List[Union[Document, str]]]): this can be a list of strings, list of Documents or a mix of both
333
+ """
334
+ if isinstance(documents, Document):
335
+ documents = [documents]
336
+ elif isinstance(documents, str):
337
+ documents = [Document(documents, {})]
338
+ elif isinstance(documents, list):
339
+ new_documents = []
340
+ for item in documents:
341
+ if isinstance(item, Document):
342
+ new_documents.append(item)
343
+ elif isinstance(item, str):
344
+ new_documents.append(Document(item, {}))
345
+ else:
346
+ raise ValueError(
347
+ "List items must be either Document instances or strings."
348
+ )
349
+ documents = new_documents
350
+ else:
351
+ raise TypeError(
352
+ "Invalid type for documents. Expected Document, str, or list of these."
353
+ )
354
+
355
+ req = {"documents": documents}
356
+ response = self.post(
357
+ f"namespaces/{self.namespace}/add_texts",
358
+ json=req,
359
+ headers={"Content-Type": "application/json"},
360
+ )
361
+ response.raise_for_status()
362
+
363
+ def query_metadata(self, index_name: str, content_id: str) -> dict:
364
+ """
365
+ Query metadata for a specific content ID in a given index.
366
+
367
+ Args:
368
+ - index_name (str): index to query
369
+ - content_id (str): content id to query
370
+ """
371
+ params = {"index": index_name, "content_id": content_id}
372
+ response = self.get(f"namespaces/{self.namespace}/metadata", params=params)
373
+ response.raise_for_status()
374
+ return response.json()["attributes"]
375
+
376
+ def search_index(self, name: str, query: str, top_k: int) -> list[TextChunk]:
377
+ """
378
+ Search index in the current namespace.
379
+
380
+ Args:
381
+ - name (str): name of index to search
382
+ - query (str): query string
383
+ - top_k (int): top k nearest neighbors to be returned
384
+ """
385
+ req = {"index": name, "query": query, "k": top_k}
386
+ response = self.post(
387
+ f"namespaces/{self.namespace}/search",
388
+ json=req,
389
+ headers={"Content-Type": "application/json"},
390
+ )
391
+ response.raise_for_status()
392
+ return response.json()["results"]
393
+
394
+ def upload_file(self, path: str):
395
+ """
396
+ Upload a file.
397
+
398
+ Args:
399
+ - path (str): relative path to the file to be uploaded
400
+ """
401
+ with open(path, "rb") as f:
402
+ response = self.post(
403
+ f"namespaces/{self.namespace}/upload_file",
404
+ files={"file": f},
405
+ )
406
+ response.raise_for_status()
@@ -21,6 +21,7 @@ class TextChunk:
21
21
  def to_dict(self):
22
22
  return {"text": self.text, "metadata": self.metadata}
23
23
 
24
+
24
25
  @dataclass
25
26
  class SearchChunk:
26
27
  index: str
indexify/extractor.py CHANGED
@@ -1,27 +1,10 @@
1
- import requests
1
+ import httpx
2
2
  from dataclasses import dataclass
3
3
  from typing import Union
4
4
 
5
5
  from .settings import DEFAULT_SERVICE_URL
6
6
 
7
7
 
8
- def list_extractors(base_url: str = DEFAULT_SERVICE_URL) -> list[dict]:
9
- response = requests.get(f"{base_url}/extractors")
10
- response.raise_for_status()
11
- return response.json()["extractors"]
12
-
13
-
14
- @dataclass
15
- class EmbeddingExtractor:
16
- dim: int
17
- distance: str
18
-
19
-
20
- @dataclass
21
- class AttributeExtractor:
22
- json_schema: str
23
-
24
-
25
8
  @dataclass
26
9
  class EmbeddingSchema:
27
10
  distance: str
@@ -38,17 +21,17 @@ class Extractor:
38
21
  name: str
39
22
  description: str
40
23
  input_params: dict
41
- schemas: ExtractorSchema
24
+ outputs: ExtractorSchema
42
25
 
43
26
 
44
27
  class Extractor:
45
28
  def __init__(
46
- self, name: str, description: str, input_params: dict, schemas: ExtractorSchema
29
+ self, name: str, description: str, input_params: dict, outputs: ExtractorSchema
47
30
  ):
48
31
  self.name = name
49
32
  self.description = description
50
33
  self.input_params = input_params
51
- self.schemas = schemas
34
+ self.outputs = outputs
52
35
 
53
36
  @classmethod
54
37
  def from_dict(cls, data):
@@ -56,11 +39,11 @@ class Extractor:
56
39
  name=data["name"],
57
40
  description=data["description"],
58
41
  input_params=data["input_params"],
59
- schemas=data["schemas"],
42
+ outputs=data["outputs"],
60
43
  )
61
44
 
62
45
  def __repr__(self) -> str:
63
- return f"Extractor(name={self.name}, description={self.description})"
46
+ return f"Extractor(name={self.name}, description={self.description}, input_params={self.input_params}, outputs={self.outputs})"
64
47
 
65
48
  def __str__(self) -> str:
66
49
  return self.__repr__()
@@ -0,0 +1,27 @@
1
+ from dataclasses import dataclass, asdict
2
+ from typing import Optional
3
+
4
+
5
+ @dataclass
6
+ class ExtractorBinding:
7
+ extractor: str
8
+ name: str
9
+ content_source: str
10
+ input_params: dict
11
+ labels_eq: Optional[str] = None
12
+
13
+ def __repr__(self) -> str:
14
+ return f"ExtractorBinding(name={self.name} extractor={self.extractor})"
15
+
16
+ def __str__(self) -> str:
17
+ return self.__repr__()
18
+
19
+ def to_dict(self) -> dict:
20
+ filtered_dict = {k: v for k, v in asdict(self).items() if v is not None}
21
+ return filtered_dict
22
+
23
+ @classmethod
24
+ def from_dict(cls, json: dict):
25
+ if "filters_eq" in json:
26
+ json["labels_eq"] = json.pop("filters_eq")
27
+ return ExtractorBinding(**json)
indexify/index.py CHANGED
@@ -1,7 +1,8 @@
1
- import aiohttp
1
+ import httpx
2
2
 
3
3
  from .data_containers import SearchChunk, TextChunk
4
4
 
5
+
5
6
  class Index:
6
7
  def __init__(self, service_url, index):
7
8
  self._service_url = service_url
@@ -9,8 +10,8 @@ class Index:
9
10
 
10
11
  def search(self, query: str, top_k: int) -> list[TextChunk]:
11
12
  req = {"index": self._index, "query": query, "k": top_k}
12
- response = aiohttp.post(
13
+ response = httpx.post(
13
14
  f"{self._service_url}/indexes/{self._index}/search", json=req
14
15
  )
15
16
  response.raise_for_status()
16
- return response.json()["results"]
17
+ return response.json()["results"]
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: indexify
3
- Version: 0.0.4
3
+ Version: 0.0.6
4
4
  Summary: Python Client for Indexify
5
- Home-page: https://github.com/diptanu/indexify
5
+ Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
7
7
  Author: Diptanu Gon Choudhury
8
8
  Author-email: diptanuc@gmail.com
@@ -12,12 +12,15 @@ Classifier: Programming Language :: Python :: 3
12
12
  Classifier: Programming Language :: Python :: 3.10
13
13
  Classifier: Programming Language :: Python :: 3.11
14
14
  Classifier: Programming Language :: Python :: 3.12
15
- Requires-Dist: httpx[http2] (>=0.24.1,<0.25.0)
16
- Project-URL: Repository, https://github.com/diptanu/indexify
15
+ Requires-Dist: httpx[http2] (>=0.26,<0.27)
16
+ Project-URL: Repository, https://github.com/tensorlakeai/indexify
17
17
  Description-Content-Type: text/markdown
18
18
 
19
19
  # Indexify Python Client
20
20
 
21
+
22
+ [![PyPI version](https://badge.fury.io/py/indexify.svg)](https://badge.fury.io/py/indexify)
23
+
21
24
  ## Installation
22
25
 
23
26
  This is the Python client for interacting with the Indexify service.
@@ -31,10 +34,17 @@ pip install indexify
31
34
  ## Usage
32
35
 
33
36
  See the [getting started](https://getindexify.com/getting_started/) guide for examples of how to use the client.
34
- Look at the [sdk-py/examples](examples) directory for more examples.
37
+ Look at the [examples](examples) directory for more examples.
35
38
 
36
39
  ## Development
37
40
 
41
+ To install the client from this repository for development:
42
+
43
+ ```shell
44
+ cd "path to this repository"
45
+ pip install -e .
46
+ ```
47
+
38
48
  Install and run the `poetry` package manager:
39
49
 
40
50
  ```shell
@@ -45,21 +55,6 @@ poetry install
45
55
  More information at [https://python-poetry.org/docs/](https://python-poetry.org/docs/).
46
56
 
47
57
 
48
- ### Steps for restarting dev server after updating server code
49
-
50
- ```shell
51
- ./install_python_deps.sh
52
- # use `-e`` if you're developing extractors
53
- (cd extractors && pip install -e .)
54
- # use `-e`` if you're developing sdk-py
55
- (cd sdk-py && pip install -e .)
56
-
57
- cargo build
58
- make local-dev
59
-
60
- # start the server
61
- ./target/debug/indexify start-server -d -c local_config.yaml
62
- ```
63
58
 
64
59
  ### Environment Variables
65
60
 
@@ -0,0 +1,13 @@
1
+ indexify/__init__.py,sha256=rNHNCfTl0zQf--3miNHf1RdXhYs3i446LpBhFYoXjrw,290
2
+ indexify/client.py,sha256=DYFkroiNIZWTTOFcIyAzg8GzWlBBFswCaQ4RNtQGAxU,12828
3
+ indexify/data_containers.py,sha256=--KSx_T5EsoBOemFPKSLVcKtvy3OELgPnLp4kCESHkI,690
4
+ indexify/exceptions.py,sha256=vjd5SPPNFIEW35GorSIodsqvm9RKHQm9kdp8t9gv-WM,111
5
+ indexify/extractor.py,sha256=FPWVd93m4edg4T6vVMl5MGtxsfLi5Vws9n9vLgLCSE8,1130
6
+ indexify/extractor_binding.py,sha256=jVu1lAvstllNz13GxEN-IlLx3XEjzz8QCBk1SsMwsCE,721
7
+ indexify/index.py,sha256=kELxdTnS9ddkEaxqbeZdsbYLEfZ9qi4HiqTPaznOoAo,517
8
+ indexify/settings.py,sha256=yzWAEZkrTjykSMj3hrFU7l_jUoUCOUsgPVW1nU-qzJQ,46
9
+ indexify/utils.py,sha256=wvQB9VpS07iZPOrpmt2i3VIncU6YBkHdpu6rXegDT3Y,282
10
+ indexify-0.0.6.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
11
+ indexify-0.0.6.dist-info/METADATA,sha256=f6LGiyItVhg-Rb-r_GDLKY3txqMxJZZaZXr8TPhrrdM,1713
12
+ indexify-0.0.6.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
13
+ indexify-0.0.6.dist-info/RECORD,,
indexify/repository.py DELETED
@@ -1,196 +0,0 @@
1
- import httpx
2
- import json
3
-
4
- from dataclasses import dataclass
5
- from collections import namedtuple
6
-
7
- from .data_containers import TextChunk
8
- from .settings import DEFAULT_SERVICE_URL
9
- from typing import List
10
- from .utils import json_set_default
11
- from indexify.exceptions import ApiException
12
- from .index import Index
13
-
14
- Document = namedtuple("Document", ["text", "metadata"])
15
-
16
-
17
- @dataclass
18
- class Filter:
19
- includes: dict[str, str]
20
- excludes: dict[str, str]
21
-
22
- @classmethod
23
- def from_dict(cls, json: dict):
24
- includes = json.get("eq", {})
25
- excludes = json.get("ne", {})
26
- return Filter(includes=includes, excludes=excludes)
27
-
28
- def json(self):
29
- filters = []
30
- for k, v in self.includes.items():
31
- filters.append({"eq": {k: v}})
32
- for k, v in self.excludes.items():
33
- filters.append({"neq": {k: v}})
34
- return filters
35
-
36
-
37
- class FilterBuilder:
38
- def __init__(self) -> None:
39
- self._filter = Filter(includes={}, excludes={})
40
-
41
- def include(self, key: str, value: str) -> "FilterBuilder":
42
- self._filter.includes[key] = value
43
- return self
44
-
45
- def exclude(self, key: str, value: str) -> "FilterBuilder":
46
- self._filter.excludes[key] = value
47
- return self
48
-
49
- def build(self) -> Filter:
50
- return self._filter
51
-
52
-
53
- @dataclass
54
- class ExtractorBinding:
55
- extractor_name: str
56
- index_name: str
57
- filters: list[Filter]
58
- input_params: dict
59
-
60
- def __repr__(self) -> str:
61
- return f"ExtractorBinding(extractor_name={self.extractor_name}, index_name={self.index_name})"
62
-
63
- def __str__(self) -> str:
64
- return self.__repr__()
65
-
66
- @classmethod
67
- def from_dict(cls, json: dict):
68
- filters_dict = json["filters"]
69
- filters = []
70
- for filter_dict in filters_dict:
71
- filters.append(Filter.from_dict(filter_dict))
72
- json["filters"] = filters
73
- return ExtractorBinding(**json)
74
-
75
-
76
- class Repository:
77
- def __init__(
78
- self,
79
- name: str,
80
- service_url: str,
81
- extractor_bindings: List[ExtractorBinding] = None,
82
- metadata: dict = None,
83
- ) -> None:
84
- self.name = name
85
- self._service_url = service_url
86
- self.extractor_bindings = extractor_bindings
87
- self.metadata = metadata
88
-
89
- async def run_extractors(self) -> dict:
90
- response = httpx.post(f"{self._service_url}/run_extractors")
91
- response.raise_for_status()
92
-
93
- def add_documents(self, documents: List[Document]) -> None:
94
- if isinstance(documents, Document):
95
- documents = [documents]
96
- req = {"documents": documents}
97
- response = httpx.post(
98
- f"{self._service_url}/repositories/{self.name}/add_texts",
99
- json=req,
100
- headers={"Content-Type": "application/json"},
101
- )
102
- response.raise_for_status()
103
-
104
- def bind_extractor(
105
- self,
106
- extractor: str,
107
- name: str,
108
- input_params: dict = {},
109
- filter: Filter = None,
110
- ) -> dict:
111
- """Bind an extractor to this repository
112
-
113
- Args:
114
- - extractor (str): Name of the extractor
115
- - name (str): Name for this instance
116
- - input_params (dict): Dictionary containing extractor input params
117
- - filter (Filter): Optional filter for this extractor
118
-
119
- Returns:
120
- dict: response payload
121
-
122
- Examples:
123
- >>> repo.bind_extractor("EfficientNet", "efficientnet")
124
-
125
- >>> repo.bind_extractor("MiniLML6", "minilm")
126
-
127
- """
128
- req = {
129
- "extractor": extractor,
130
- "name": name,
131
- "input_params": input_params,
132
- "filters": filter.json() if filter else [],
133
- }
134
-
135
- request_body = json.dumps(req, default=json_set_default)
136
- response = httpx.post(
137
- f"{self._service_url}/repositories/{self.name}/extractor_bindings",
138
- data=request_body,
139
- headers={"Content-Type": "application/json"},
140
- )
141
- try:
142
- response.raise_for_status()
143
- except httpx.HTTPStatusError as exc:
144
- raise ApiException(exc.response.text)
145
- return
146
-
147
- def indexes(self) -> List[Index]:
148
- response = httpx.get(f"{self._service_url}/repositories/{self.name}/indexes")
149
- response.raise_for_status()
150
- return response.json()["indexes"]
151
-
152
- @classmethod
153
- def get(cls, name: str, service_url: str = DEFAULT_SERVICE_URL) -> "Repository":
154
- response = httpx.get(f"{service_url}/repositories/{name}")
155
- response.raise_for_status()
156
- repository_json = response.json()["repository"]
157
- return Repository._from_json(repository_json)
158
-
159
- @classmethod
160
- def _from_json(cls, service_url: str, repository_json: dict):
161
- extractor_bindings = []
162
- for eb in repository_json["repository"]["extractor_bindings"]:
163
- extractor_bindings.append(ExtractorBinding.from_dict(eb))
164
- metadata = repository_json["repository"]["metadata"]
165
- return Repository(
166
- name=repository_json["repository"]["name"],
167
- service_url=service_url,
168
- extractor_bindings=extractor_bindings,
169
- metadata=metadata,
170
- )
171
-
172
- def query_attribute(self, index_name: str, content_id: str = None) -> dict:
173
- params = {"index": index_name}
174
- if content_id:
175
- params.update({"content_id": content_id})
176
- response = httpx.get(
177
- f"{self._service_url}/repositories/{self.name}/attributes", params=params
178
- )
179
- response.raise_for_status()
180
- return response.json()["attributes"]
181
-
182
- def search_index(self, name: str, query: str, top_k: int) -> list[TextChunk]:
183
- req = {"index": name, "query": query, "k": top_k}
184
- response = httpx.post(
185
- f"{self._service_url}/repositories/{self.name}/search",
186
- json=req,
187
- headers={"Content-Type": "application/json"},
188
- )
189
- response.raise_for_status()
190
- return response.json()["results"]
191
-
192
- def __repr__(self) -> str:
193
- return f"Repository(name={self.name})"
194
-
195
- def __str__(self) -> str:
196
- return self.__repr__()
@@ -1,13 +0,0 @@
1
- indexify/__init__.py,sha256=0MDmCLdex7F2RXx-2F1lLqTYQl5Gs4tVryA7j4TXCak,329
2
- indexify/client.py,sha256=8IaX_1KYs-JxXH1lIlE_bFE7Aqe2-nt_RRbneHEXNp0,5798
3
- indexify/data_containers.py,sha256=n9YlKWUfptDkCR3NL5ldtbngRZC87xmXkoX6smxr47I,689
4
- indexify/exceptions.py,sha256=vjd5SPPNFIEW35GorSIodsqvm9RKHQm9kdp8t9gv-WM,111
5
- indexify/extractor.py,sha256=cBzF7Q3fu4jr7cIeSJXwLf9z45Z81MmwkuY4-zG8HBY,1406
6
- indexify/index.py,sha256=r911vtnvzQg3_Hrc3cbPDATNtAMiSbH9OovGKcYG2g8,519
7
- indexify/repository.py,sha256=YcJg8P2vvMzqK0AUwESMkA6YizaNBqGOfVnfRrGmMLg,6144
8
- indexify/settings.py,sha256=yzWAEZkrTjykSMj3hrFU7l_jUoUCOUsgPVW1nU-qzJQ,46
9
- indexify/utils.py,sha256=wvQB9VpS07iZPOrpmt2i3VIncU6YBkHdpu6rXegDT3Y,282
10
- indexify-0.0.4.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
11
- indexify-0.0.4.dist-info/METADATA,sha256=_bQVRWubUKjdMkDmBjhH6DGw5nLGpRPKjhDyzBJnB_4,1862
12
- indexify-0.0.4.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
13
- indexify-0.0.4.dist-info/RECORD,,