indexify 0.0.8__py3-none-any.whl → 0.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
indexify/__init__.py CHANGED
@@ -1,12 +1,13 @@
1
1
  from .index import Index
2
2
  from .client import IndexifyClient
3
- from .extractor_binding import ExtractorBinding
4
- from .data_containers import TextChunk
3
+ from .extraction_policy import ExtractionPolicy
4
+ from .client import IndexifyClient, Document
5
5
  from .settings import DEFAULT_SERVICE_URL
6
6
 
7
7
  __all__ = [
8
8
  "Index",
9
+ "Document",
9
10
  "IndexifyClient",
10
- "ExtractorBinding",
11
+ "ExtractionPolicy",
11
12
  "DEFAULT_SERVICE_URL",
12
13
  ]
indexify/client.py CHANGED
@@ -3,7 +3,7 @@ import json
3
3
  from collections import namedtuple
4
4
  from .settings import DEFAULT_SERVICE_URL
5
5
  from .extractor import Extractor
6
- from .extractor_binding import ExtractorBinding
6
+ from .extraction_policy import ExtractionPolicy
7
7
  from .index import Index
8
8
  from .utils import json_set_default
9
9
  from .data_containers import TextChunk
@@ -41,7 +41,7 @@ class IndexifyClient:
41
41
  **kwargs,
42
42
  ):
43
43
  self.namespace: str = namespace
44
- self.extractor_bindings: List[ExtractorBinding] = []
44
+ self.extraction_policies: List[ExtractionPolicy] = []
45
45
  self.labels: dict = {}
46
46
  self._service_url = service_url
47
47
  self._client = httpx.Client(*args, **kwargs)
@@ -50,9 +50,9 @@ class IndexifyClient:
50
50
  response = self.get(f"namespaces/{self.namespace}")
51
51
  response.raise_for_status()
52
52
  resp_json = response.json()
53
- # initialize extractor_bindings
54
- for eb in resp_json["namespace"]["extractor_bindings"]:
55
- self.extractor_bindings.append(ExtractorBinding.from_dict(eb))
53
+ # initialize extraction_policies
54
+ for eb in resp_json["namespace"]["extraction_policies"]:
55
+ self.extraction_policies.append(ExtractionPolicy.from_dict(eb))
56
56
 
57
57
  @classmethod
58
58
  def with_mtls(
@@ -189,7 +189,7 @@ class IndexifyClient:
189
189
  def create_namespace(
190
190
  self,
191
191
  namespace: str,
192
- extractor_bindings: list = [],
192
+ extraction_policies: list = [],
193
193
  labels: dict = {},
194
194
  ) -> "IndexifyClient":
195
195
  """
@@ -198,15 +198,15 @@ class IndexifyClient:
198
198
  Returns:
199
199
  IndexifyClient: a new client with the given namespace
200
200
  """
201
- bindings = []
202
- for bd in extractor_bindings:
203
- if isinstance(bd, ExtractorBinding):
204
- bindings.append(bd.to_dict())
201
+ extraction_policies = []
202
+ for bd in extraction_policies:
203
+ if isinstance(bd, ExtractionPolicy):
204
+ extraction_policies.append(bd.to_dict())
205
205
  else:
206
- bindings.append(bd)
206
+ extraction_policies.append(bd)
207
207
  req = {
208
208
  "name": namespace,
209
- "extractor_bindings": bindings,
209
+ "extraction_policies": extraction_policies,
210
210
  "labels": labels,
211
211
  }
212
212
 
@@ -239,19 +239,19 @@ class IndexifyClient:
239
239
  extractors.append(Extractor.from_dict(ed))
240
240
  return extractors
241
241
 
242
- def get_extractor_bindings(self):
242
+ def get_extraction_policies(self):
243
243
  """
244
- Retrieve and update the list of extractor bindings for the current namespace.
244
+ Retrieve and update the list of extraction policies for the current namespace.
245
245
  """
246
246
  response = self.get(f"namespaces/{self.namespace}")
247
247
  response.raise_for_status()
248
248
 
249
- self.extractor_bindings = []
250
- for eb in response.json()["namespace"]["extractor_bindings"]:
251
- self.extractor_bindings.append(ExtractorBinding.from_dict(eb))
252
- return self.extractor_bindings
249
+ self.extraction_policies = []
250
+ for eb in response.json()["namespace"]["extraction_policies"]:
251
+ self.extraction_policies.append(ExtractionPolicy.from_dict(eb))
252
+ return self.extraction_policies
253
253
 
254
- def bind_extractor(
254
+ def add_extraction_policy(
255
255
  self,
256
256
  extractor: str,
257
257
  name: str,
@@ -259,7 +259,7 @@ class IndexifyClient:
259
259
  labels_eq: str = None,
260
260
  content_source="ingestion",
261
261
  ) -> dict:
262
- """Bind an extractor.
262
+ """Add a new extraction policy.
263
263
 
264
264
  Args:
265
265
  - extractor (str): Name of the extractor
@@ -271,9 +271,9 @@ class IndexifyClient:
271
271
  dict: response payload
272
272
 
273
273
  Examples:
274
- >>> repo.bind_extractor("EfficientNet", "efficientnet")
274
+ >>> repo.add_extraction_policy("EfficientNet", "efficientnet")
275
275
 
276
- >>> repo.bind_extractor("MiniLML6", "minilm")
276
+ >>> repo.add_extraction_policy("MiniLML6", "minilm")
277
277
 
278
278
  """
279
279
  req = {
@@ -288,13 +288,13 @@ class IndexifyClient:
288
288
 
289
289
  request_body = json.dumps(req, default=json_set_default)
290
290
  response = self.post(
291
- f"namespaces/{self.namespace}/extractor_bindings",
291
+ f"namespaces/{self.namespace}/extraction_policies",
292
292
  data=request_body,
293
293
  headers={"Content-Type": "application/json"},
294
294
  )
295
295
 
296
296
  # update self.extractor_bindings
297
- self.get_extractor_bindings()
297
+ self.get_extraction_policies()
298
298
 
299
299
  try:
300
300
  response.raise_for_status()
@@ -404,5 +404,6 @@ class IndexifyClient:
404
404
  response = self.post(
405
405
  f"namespaces/{self.namespace}/upload_file",
406
406
  files={"file": f},
407
+ timeout=None,
407
408
  )
408
409
  response.raise_for_status()
@@ -3,15 +3,6 @@ from typing import List
3
3
  from dataclasses import dataclass, field
4
4
 
5
5
 
6
- class TextSplitter(str, Enum):
7
- NEWLINE = "new_line"
8
- REGEX = "regex"
9
- NOOP = "noop"
10
-
11
- def __str__(self) -> str:
12
- return self.value.lower()
13
-
14
-
15
6
  @dataclass
16
7
  class TextChunk:
17
8
  text: str
@@ -22,16 +13,6 @@ class TextChunk:
22
13
  return {"text": self.text, "metadata": self.metadata}
23
14
 
24
15
 
25
- @dataclass
26
- class SearchChunk:
27
- index: str
28
- query: str
29
- k: int
30
-
31
- def to_dict(self):
32
- return {"index": self.index, "query": self.query, "k": self.k}
33
-
34
-
35
16
  @dataclass
36
17
  class SearchResult:
37
18
  results: List[TextChunk]
@@ -3,7 +3,7 @@ from typing import Optional
3
3
 
4
4
 
5
5
  @dataclass
6
- class ExtractorBinding:
6
+ class ExtractionPolicy:
7
7
  extractor: str
8
8
  name: str
9
9
  content_source: str
@@ -11,7 +11,7 @@ class ExtractorBinding:
11
11
  labels_eq: Optional[str] = None
12
12
 
13
13
  def __repr__(self) -> str:
14
- return f"ExtractorBinding(name={self.name} extractor={self.extractor})"
14
+ return f"ExtractionPolicy(name={self.name} extractor={self.extractor})"
15
15
 
16
16
  def __str__(self) -> str:
17
17
  return self.__repr__()
@@ -24,4 +24,4 @@ class ExtractorBinding:
24
24
  def from_dict(cls, json: dict):
25
25
  if "filters_eq" in json:
26
26
  json["labels_eq"] = json.pop("filters_eq")
27
- return ExtractorBinding(**json)
27
+ return ExtractionPolicy(**json)
indexify/extractor.py CHANGED
@@ -16,22 +16,15 @@ class ExtractorSchema:
16
16
  outputs: dict[str, Union[EmbeddingSchema, dict]]
17
17
 
18
18
 
19
- @dataclass
20
- class Extractor:
21
- name: str
22
- description: str
23
- input_params: dict
24
- outputs: ExtractorSchema
25
-
26
-
27
19
  class Extractor:
28
20
  def __init__(
29
- self, name: str, description: str, input_params: dict, outputs: ExtractorSchema
21
+ self, name: str, description: str, input_params: dict, outputs: ExtractorSchema, input_mime_types: list[str]
30
22
  ):
31
23
  self.name = name
32
24
  self.description = description
33
25
  self.input_params = input_params
34
26
  self.outputs = outputs
27
+ self.input_mime_types = input_mime_types
35
28
 
36
29
  @classmethod
37
30
  def from_dict(cls, data):
@@ -39,11 +32,12 @@ class Extractor:
39
32
  name=data["name"],
40
33
  description=data["description"],
41
34
  input_params=data["input_params"],
35
+ input_mime_types=data["input_mime_types"],
42
36
  outputs=data["outputs"],
43
37
  )
44
38
 
45
39
  def __repr__(self) -> str:
46
- return f"Extractor(name={self.name}, description={self.description}, input_params={self.input_params}, outputs={self.outputs})"
40
+ return f"Extractor(name={self.name}, description={self.description}, input_params={self.input_params}, input_mime_types={self.input_mime_types}, outputs={self.outputs})"
47
41
 
48
42
  def __str__(self) -> str:
49
43
  return self.__repr__()
indexify/index.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import httpx
2
2
 
3
- from .data_containers import SearchChunk, TextChunk
3
+ from .data_containers import TextChunk
4
4
 
5
5
 
6
6
  class Index:
indexify/utils.py CHANGED
@@ -5,12 +5,3 @@ def json_set_default(obj):
5
5
  if isinstance(obj, set):
6
6
  return list(obj)
7
7
  raise TypeError
8
-
9
-
10
- class Metric(str, Enum):
11
- COSINE = "cosine"
12
- DOT = "dot"
13
- EUCLIDEAN = "euclidean"
14
-
15
- def __str__(self) -> str:
16
- return self.name.lower()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: indexify
3
- Version: 0.0.8
3
+ Version: 0.0.10
4
4
  Summary: Python Client for Indexify
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
@@ -0,0 +1,13 @@
1
+ indexify/__init__.py,sha256=Sz6zkAIHsPOi0rG5RM7dVkXGDa0fO2uurD6vS4Qo15E,312
2
+ indexify/client.py,sha256=ppWH2n9LqgFLigXdrCB_lz8hJqVzAnbOVjN_r0YB3yo,13030
3
+ indexify/data_containers.py,sha256=r1wxJPtsmXbyKvb17fqxm-dPjKz51oZ62f8A8Zxls1c,361
4
+ indexify/exceptions.py,sha256=vjd5SPPNFIEW35GorSIodsqvm9RKHQm9kdp8t9gv-WM,111
5
+ indexify/extraction_policy.py,sha256=_6zs3lI7HZxNMpXCAw-pwrBQfVaShb5DvWrWD9V6GOY,721
6
+ indexify/extractor.py,sha256=-8-rn1UAUKJTYDCcgheaRpl2oePB8J10p0mwg6boVng,1188
7
+ indexify/index.py,sha256=RvxYhJXEth-GKvqzlMiz5PuN1eIbZk84pt20piA1Gsw,504
8
+ indexify/settings.py,sha256=yzWAEZkrTjykSMj3hrFU7l_jUoUCOUsgPVW1nU-qzJQ,46
9
+ indexify/utils.py,sha256=rDN2lrsAs9noJEIjfx6ukmC2SAIyrlUt7QU-kaBjujM,125
10
+ indexify-0.0.10.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
11
+ indexify-0.0.10.dist-info/METADATA,sha256=8ymQlOQwGI3tO5CXAH0w3UQNMaffJaG66pZLMbV5m8Y,1714
12
+ indexify-0.0.10.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
13
+ indexify-0.0.10.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.8.1
2
+ Generator: poetry-core 1.9.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,13 +0,0 @@
1
- indexify/__init__.py,sha256=rNHNCfTl0zQf--3miNHf1RdXhYs3i446LpBhFYoXjrw,290
2
- indexify/client.py,sha256=DH9wMBpLAwGzZ23n7SpZuzJnQWK94KehU65K_0zs2Do,12910
3
- indexify/data_containers.py,sha256=--KSx_T5EsoBOemFPKSLVcKtvy3OELgPnLp4kCESHkI,690
4
- indexify/exceptions.py,sha256=vjd5SPPNFIEW35GorSIodsqvm9RKHQm9kdp8t9gv-WM,111
5
- indexify/extractor.py,sha256=FPWVd93m4edg4T6vVMl5MGtxsfLi5Vws9n9vLgLCSE8,1130
6
- indexify/extractor_binding.py,sha256=jVu1lAvstllNz13GxEN-IlLx3XEjzz8QCBk1SsMwsCE,721
7
- indexify/index.py,sha256=kELxdTnS9ddkEaxqbeZdsbYLEfZ9qi4HiqTPaznOoAo,517
8
- indexify/settings.py,sha256=yzWAEZkrTjykSMj3hrFU7l_jUoUCOUsgPVW1nU-qzJQ,46
9
- indexify/utils.py,sha256=wvQB9VpS07iZPOrpmt2i3VIncU6YBkHdpu6rXegDT3Y,282
10
- indexify-0.0.8.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
11
- indexify-0.0.8.dist-info/METADATA,sha256=fSMoxJaBY5P_PzQGztaW5QYx-zfr6xj1Ise28rUUWN0,1713
12
- indexify-0.0.8.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
13
- indexify-0.0.8.dist-info/RECORD,,