indexify 0.0.0__tar.gz → 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {indexify-0.0.0 → indexify-0.0.1}/PKG-INFO +1 -1
- indexify-0.0.1/indexify/indexify.py +115 -0
- {indexify-0.0.0 → indexify-0.0.1}/pyproject.toml +1 -1
- indexify-0.0.0/indexify/indexify.py +0 -63
- {indexify-0.0.0 → indexify-0.0.1}/LICENSE.txt +0 -0
- {indexify-0.0.0 → indexify-0.0.1}/README.rst +0 -0
- {indexify-0.0.0 → indexify-0.0.1}/indexify/__init__.py +0 -0
@@ -0,0 +1,115 @@
|
|
1
|
+
from typing import Optional, List
|
2
|
+
from enum import Enum
|
3
|
+
import requests
|
4
|
+
import json
|
5
|
+
import dataclasses
|
6
|
+
from dataclasses import dataclass
|
7
|
+
|
8
|
+
DEFAULT_INDEXIFY_URL = "https://localhost:8090"
|
9
|
+
|
10
|
+
DEFAULT_EMBEDDING_MODEL = "all-minilm-l6-v2"
|
11
|
+
|
12
|
+
|
13
|
+
class ApiException(Exception):
|
14
|
+
def __init__(self, message: str) -> None:
|
15
|
+
super().__init__(message)
|
16
|
+
|
17
|
+
|
18
|
+
class Metric(str, Enum):
|
19
|
+
COSINE = "cosine"
|
20
|
+
DOT = "dot"
|
21
|
+
EUCLIDEAN = "euclidean"
|
22
|
+
|
23
|
+
def __str__(self) -> str:
|
24
|
+
return self.name.lower()
|
25
|
+
|
26
|
+
|
27
|
+
class TextSplitter(str, Enum):
|
28
|
+
NEWLINE = "new_line"
|
29
|
+
REGEX = "regex"
|
30
|
+
NOOP = "noop"
|
31
|
+
|
32
|
+
def __str__(self) -> str:
|
33
|
+
return self.value.lower()
|
34
|
+
|
35
|
+
|
36
|
+
@dataclass
|
37
|
+
class TextChunk:
|
38
|
+
text: str
|
39
|
+
metadata: dict
|
40
|
+
|
41
|
+
def to_json(self):
|
42
|
+
return json.dumps({"text": self.text, "metadata": self.metadata})
|
43
|
+
|
44
|
+
|
45
|
+
@dataclass
|
46
|
+
class SearchChunk:
|
47
|
+
index: str
|
48
|
+
query: str
|
49
|
+
k: int
|
50
|
+
|
51
|
+
|
52
|
+
@dataclass
|
53
|
+
class SearchResult:
|
54
|
+
results: List[TextChunk]
|
55
|
+
|
56
|
+
|
57
|
+
class Indexify:
|
58
|
+
def __init__(self, url, index) -> None:
|
59
|
+
self._url = url
|
60
|
+
self._index = index
|
61
|
+
|
62
|
+
@classmethod
|
63
|
+
def create_index(
|
64
|
+
cls,
|
65
|
+
name: str,
|
66
|
+
indexify_url: Optional[str] = DEFAULT_INDEXIFY_URL,
|
67
|
+
embedding_model: Optional[str] = DEFAULT_EMBEDDING_MODEL,
|
68
|
+
metric: Metric = Metric.COSINE,
|
69
|
+
splitter: Optional[str] = TextSplitter.NEWLINE,
|
70
|
+
unique_labels=Optional[List[str]],
|
71
|
+
):
|
72
|
+
req = {
|
73
|
+
"name": name,
|
74
|
+
"embedding_model": embedding_model,
|
75
|
+
"metric": metric,
|
76
|
+
"text_splitter": splitter,
|
77
|
+
"hash_on": unique_labels,
|
78
|
+
}
|
79
|
+
resp = requests.post(f"{indexify_url}/index/create", json=req)
|
80
|
+
if resp.status_code == 200:
|
81
|
+
return cls(indexify_url, name)
|
82
|
+
Indexify._get_payload(resp)
|
83
|
+
|
84
|
+
@classmethod
|
85
|
+
def get_index(cls, name: str, indexify_url: Optional[str]):
|
86
|
+
return cls(indexify_url, name)
|
87
|
+
|
88
|
+
def add_text_chunk(self, chunk: str, metadata: dict):
|
89
|
+
text_chunk = TextChunk(chunk, metadata)
|
90
|
+
req = {"index": self._index, "documents": [dataclasses.asdict(text_chunk)]}
|
91
|
+
resp = requests.post(f"{self._url}/index/add", json=req)
|
92
|
+
if resp.status_code == 200:
|
93
|
+
return
|
94
|
+
self._get_payload(resp)
|
95
|
+
|
96
|
+
def search(self, query: str, top_k: int):
|
97
|
+
req = SearchChunk(index=self._index, query=query, k=top_k)
|
98
|
+
resp = requests.get(f"{self._url}/index/search", json=dataclasses.asdict(req))
|
99
|
+
payload = self._get_payload(resp)
|
100
|
+
result = SearchResult(results=[])
|
101
|
+
for res in payload["results"]:
|
102
|
+
result.results.append(TextChunk(text=res["text"], metadata=res["metadata"]))
|
103
|
+
return result
|
104
|
+
|
105
|
+
@staticmethod
|
106
|
+
def _get_payload(response):
|
107
|
+
payload = {"errors": []}
|
108
|
+
try:
|
109
|
+
payload = json.loads(response.text)
|
110
|
+
except:
|
111
|
+
raise ApiException(response.text)
|
112
|
+
if len(payload["errors"]) > 0:
|
113
|
+
raise ApiException(f"Failed to create index: {payload['errors']}")
|
114
|
+
|
115
|
+
return payload
|
@@ -1,63 +0,0 @@
|
|
1
|
-
from typing import Optional, List
|
2
|
-
from enum import Enum
|
3
|
-
import requests
|
4
|
-
import json
|
5
|
-
|
6
|
-
DEFAULT_INDEXIFY_URL = "https://localhost:8090"
|
7
|
-
|
8
|
-
DEFAULT_EMBEDDING_MODEL = "all-minilm-l6-v2"
|
9
|
-
|
10
|
-
class ApiException(Exception):
|
11
|
-
|
12
|
-
def __init__(self, message: str) -> None:
|
13
|
-
super().__init__(message)
|
14
|
-
|
15
|
-
class Metric(Enum):
|
16
|
-
COSINE = 1
|
17
|
-
DOT = 2
|
18
|
-
|
19
|
-
def __str__(self) -> str:
|
20
|
-
return self.name.lower()
|
21
|
-
|
22
|
-
|
23
|
-
class TextSplitter(Enum):
|
24
|
-
NEWLINE = "new_line"
|
25
|
-
REGEX = "regex"
|
26
|
-
NOOP = "noop"
|
27
|
-
|
28
|
-
def __str__(self) -> str:
|
29
|
-
return self.value.lower()
|
30
|
-
|
31
|
-
|
32
|
-
class Indexify:
|
33
|
-
def __init__(self, url, index) -> None:
|
34
|
-
self._url = url
|
35
|
-
self._index = index
|
36
|
-
|
37
|
-
@classmethod
|
38
|
-
def create_index(
|
39
|
-
cls,
|
40
|
-
name: str,
|
41
|
-
indexify_url: Optional[str] = DEFAULT_INDEXIFY_URL,
|
42
|
-
embedding_model: Optional[str] = DEFAULT_EMBEDDING_MODEL,
|
43
|
-
metric: Metric = Metric.COSINE,
|
44
|
-
splitter: Optional[str] = TextSplitter.NEWLINE,
|
45
|
-
unique_labels=Optional[List[str]],
|
46
|
-
):
|
47
|
-
request_body = json.dumps({"name": name, "embedding_model": embedding_model, "metric": metric, "splitter": splitter, "unique_labels": unique_labels})
|
48
|
-
resp = requests.post(f"{indexify_url}/index/create", data=request_body)
|
49
|
-
if resp.status_code == 200:
|
50
|
-
return cls(indexify_url, name)
|
51
|
-
payload = json.loads(resp.text)
|
52
|
-
raise ApiException(f"Failed to create index: {payload.errors}")
|
53
|
-
|
54
|
-
@classmethod
|
55
|
-
def get_index(cls, name: str, indexify_url: Optional[str]):
|
56
|
-
return cls(indexify_url, name)
|
57
|
-
|
58
|
-
def add_document(self, document: str, label: str):
|
59
|
-
|
60
|
-
pass
|
61
|
-
|
62
|
-
def search(self, query: str, top_k: int):
|
63
|
-
pass
|
File without changes
|
File without changes
|
File without changes
|