clarifai 10.1.0__py3-none-any.whl → 10.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clarifai/client/app.py +23 -43
- clarifai/client/base.py +44 -4
- clarifai/client/dataset.py +138 -52
- clarifai/client/input.py +37 -4
- clarifai/client/model.py +279 -8
- clarifai/client/module.py +7 -5
- clarifai/client/runner.py +3 -1
- clarifai/client/search.py +7 -3
- clarifai/client/user.py +14 -12
- clarifai/client/workflow.py +7 -4
- clarifai/constants/dataset.py +2 -0
- clarifai/datasets/upload/loaders/README.md +3 -4
- clarifai/datasets/upload/loaders/xview_detection.py +5 -5
- clarifai/models/model_serving/cli/_utils.py +1 -1
- clarifai/models/model_serving/cli/build.py +1 -1
- clarifai/models/model_serving/cli/upload.py +1 -1
- clarifai/models/model_serving/utils.py +3 -1
- clarifai/rag/rag.py +25 -11
- clarifai/rag/utils.py +21 -6
- clarifai/utils/evaluation/__init__.py +427 -0
- clarifai/utils/evaluation/helpers.py +522 -0
- clarifai/utils/logging.py +30 -0
- clarifai/utils/model_train.py +3 -1
- clarifai/versions.py +1 -1
- clarifai/workflows/validate.py +1 -1
- {clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/METADATA +46 -9
- {clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/RECORD +31 -30
- clarifai/datasets/upload/loaders/coco_segmentation.py +0 -98
- {clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/LICENSE +0 -0
- {clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/WHEEL +0 -0
- {clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/entry_points.txt +0 -0
- {clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/top_level.txt +0 -0
@@ -8,15 +8,15 @@ If a dataset module exists in the zoo, uploading the specific dataset can be eas
|
|
8
8
|
|
9
9
|
```python
|
10
10
|
from clarifai.client.app import App
|
11
|
-
from clarifai.datasets.upload.loaders.
|
11
|
+
from clarifai.datasets.upload.loaders.coco_detection import COCODetectionDataLoader
|
12
12
|
|
13
13
|
app = App(app_id="", user_id="")
|
14
14
|
# Create a dataset in Clarifai App
|
15
15
|
dataset = app.create_dataset(dataset_id="")
|
16
16
|
# instantiate dataloader object
|
17
|
-
|
17
|
+
coco_det_dataloader = COCODetectionDataLoader(images_dir="", label_filepath="")
|
18
18
|
# execute data upload to Clarifai app dataset
|
19
|
-
dataset.upload_dataset(dataloader=
|
19
|
+
dataset.upload_dataset(dataloader=coco_det_dataloader)
|
20
20
|
```
|
21
21
|
|
22
22
|
## Dataset Loaders
|
@@ -24,7 +24,6 @@ dataset.upload_dataset(dataloader=coco_seg_dataloader)
|
|
24
24
|
| dataset name | task | module name (.py)
|
25
25
|
| --- | --- | ---
|
26
26
|
| [COCO 2017](https://cocodataset.org/#download) | Detection | `coco_detection` |
|
27
|
-
| | Segmentation | `coco_segmentation` |
|
28
27
|
| | Captions | `coco_captions` |
|
29
28
|
|[xVIEW](http://xviewdataset.org/) | Detection | `xview_detection` |
|
30
29
|
| [ImageNet](https://www.image-net.org/) | Classification | `imagenet_classification` |
|
@@ -6,7 +6,7 @@ from concurrent.futures import ThreadPoolExecutor
|
|
6
6
|
from multiprocessing import cpu_count
|
7
7
|
from typing import DefaultDict, Dict, List
|
8
8
|
|
9
|
-
import
|
9
|
+
from PIL import Image
|
10
10
|
from tqdm import tqdm
|
11
11
|
|
12
12
|
from clarifai.datasets.upload.base import ClarifaiDataLoader
|
@@ -54,9 +54,8 @@ class xviewDetectionDataLoader(ClarifaiDataLoader):
|
|
54
54
|
def compress_tiff(self, img_path: str) -> None:
|
55
55
|
"""Compress tiff image"""
|
56
56
|
img_comp_path = os.path.join(self.img_comp_dir, os.path.basename(img_path))
|
57
|
-
img_arr =
|
58
|
-
|
59
|
-
img_comp_path, img_arr, params=(cv2.IMWRITE_TIFF_COMPRESSION, 8)) # 8: Adobe Deflate
|
57
|
+
img_arr = Image.open(img_path)
|
58
|
+
img_arr.save(img_comp_path, 'TIFF', compression='tiff_deflate')
|
60
59
|
|
61
60
|
def preprocess(self):
|
62
61
|
"""Compress the tiff images to comply with clarifai grpc image encoding limit(<20MB) Uses ADOBE_DEFLATE compression algorithm"""
|
@@ -133,7 +132,8 @@ class xviewDetectionDataLoader(ClarifaiDataLoader):
|
|
133
132
|
_id = os.path.splitext(os.path.basename(self.image_paths[index]))[0]
|
134
133
|
image_path = self.image_paths[index]
|
135
134
|
|
136
|
-
|
135
|
+
image = Image.open(image_path)
|
136
|
+
image_width, image_height = image.size
|
137
137
|
annots = []
|
138
138
|
class_names = []
|
139
139
|
for bbox, concept in zip(self.all_data[_id]['bboxes'], self.all_data[_id]['concepts']):
|
@@ -11,7 +11,7 @@ from ..constants import (CLARIFAI_EXAMPLES_REPO, CLARIFAI_EXAMPLES_REPO_PATH,
|
|
11
11
|
def download_examples_repo(forced_download: bool = False):
|
12
12
|
|
13
13
|
def _pull():
|
14
|
-
subprocess.run(f"git clone {CLARIFAI_EXAMPLES_REPO} {CLARIFAI_EXAMPLES_REPO_PATH}")
|
14
|
+
subprocess.run(f"git clone {CLARIFAI_EXAMPLES_REPO} {CLARIFAI_EXAMPLES_REPO_PATH}", shell=True)
|
15
15
|
|
16
16
|
if not os.path.isdir(CLARIFAI_EXAMPLES_REPO_PATH):
|
17
17
|
print(f"Download examples to {CLARIFAI_EXAMPLES_REPO_PATH}")
|
@@ -70,7 +70,7 @@ class BuildModelSubCli(BaseClarifaiCli):
|
|
70
70
|
if not self.no_test:
|
71
71
|
assert os.path.exists(
|
72
72
|
self.test_path), FileNotFoundError(f"Could not find `test.py` in {self.path}")
|
73
|
-
result = subprocess.run(f"pytest -s --log-level=INFO {self.test_path}")
|
73
|
+
result = subprocess.run(f"pytest -s --log-level=INFO {self.test_path}", shell=True)
|
74
74
|
assert result.returncode == 0, "Test has failed. Please make sure no error exists in your code."
|
75
75
|
|
76
76
|
# build
|
@@ -126,7 +126,7 @@ class UploadModelSubCli(BaseClarifaiCli):
|
|
126
126
|
# Run test before uploading
|
127
127
|
if not self.no_test:
|
128
128
|
assert os.path.exists(self.test_path), FileNotFoundError(f"Not found {self.test_path}")
|
129
|
-
result = subprocess.run(f"pytest -s --log-level=INFO {self.test_path}")
|
129
|
+
result = subprocess.run(f"pytest -s --log-level=INFO {self.test_path}", shell=True)
|
130
130
|
assert result.returncode == 0, "Test has failed. Please make sure no error exists in your code."
|
131
131
|
|
132
132
|
deploy(
|
@@ -18,4 +18,6 @@ def _read_pat():
|
|
18
18
|
|
19
19
|
def login(pat=None):
|
20
20
|
""" if pat provided, set pat to CLARIFAI_PAT otherwise read pat from file"""
|
21
|
-
|
21
|
+
pat = pat or _read_pat()
|
22
|
+
assert pat, Exception("PAT is not found, please run `clarifai login` to persist your PAT")
|
23
|
+
os.environ["CLARIFAI_PAT"] = pat
|
clarifai/rag/rag.py
CHANGED
@@ -76,16 +76,17 @@ class RAG:
|
|
76
76
|
>>> rag_agent = RAG.setup(app_url=YOUR_APP_URL)
|
77
77
|
>>> rag_agent.chat(messages=[{"role":"human", "content":"What is Clarifai"}])
|
78
78
|
"""
|
79
|
-
|
79
|
+
now_ts = str(int(datetime.now().timestamp()))
|
80
80
|
if user_id and not app_url:
|
81
81
|
user = User(user_id=user_id, base_url=base_url, pat=pat)
|
82
82
|
## Create an App
|
83
|
-
now_ts = str(int(datetime.now().timestamp()))
|
84
83
|
app_id = f"rag_app_{now_ts}"
|
85
84
|
app = user.create_app(app_id=app_id, base_workflow=base_workflow)
|
86
85
|
|
87
86
|
if not user_id and app_url:
|
88
87
|
app = App(url=app_url, pat=pat)
|
88
|
+
uid = app_url.split(".com/")[1].split("/")[0]
|
89
|
+
user = User(user_id=uid, base_url=base_url, pat=pat)
|
89
90
|
|
90
91
|
if user_id and app_url:
|
91
92
|
raise UserError("Must provide one of user_id or app_url, not both.")
|
@@ -95,7 +96,7 @@ class RAG:
|
|
95
96
|
"user_id or app_url must be provided. The user_id can be found at https://clarifai.com/settings."
|
96
97
|
)
|
97
98
|
|
98
|
-
llm = Model(llm_url)
|
99
|
+
llm = Model(url=llm_url, pat=pat)
|
99
100
|
|
100
101
|
min_score = kwargs.get("min_score", 0.95)
|
101
102
|
max_results = kwargs.get("max_results", 5)
|
@@ -109,8 +110,8 @@ class RAG:
|
|
109
110
|
prompter_model_params = {"params": params}
|
110
111
|
|
111
112
|
## Create rag-prompter model and version
|
112
|
-
|
113
|
-
|
113
|
+
model_id = f"prompter-{workflow_id}" if workflow_id is not None else f"rag-prompter-{now_ts}"
|
114
|
+
prompter_model = app.create_model(model_id=model_id, model_type_id="rag-prompter")
|
114
115
|
prompter_model = prompter_model.create_version(output_info=prompter_model_params)
|
115
116
|
|
116
117
|
## Generate a tmp yaml file for workflow creation
|
@@ -153,6 +154,8 @@ class RAG:
|
|
153
154
|
batch_size: int = 128,
|
154
155
|
chunk_size: int = 1024,
|
155
156
|
chunk_overlap: int = 200,
|
157
|
+
dataset_id: str = None,
|
158
|
+
metadata: dict = None,
|
156
159
|
**kwargs) -> None:
|
157
160
|
"""Uploads documents to the app.
|
158
161
|
- Read from a local directory or public url or local filename.
|
@@ -192,14 +195,15 @@ class RAG:
|
|
192
195
|
|
193
196
|
#splitting documents into chunks
|
194
197
|
text_chunks = []
|
195
|
-
|
198
|
+
metadata_list = []
|
196
199
|
|
197
200
|
#iterate through documents
|
198
201
|
for doc in documents:
|
202
|
+
doc_i = 0
|
199
203
|
cur_text_chunks = split_document(
|
200
204
|
text=doc.text, chunk_size=chunk_size, chunk_overlap=chunk_overlap, **kwargs)
|
201
205
|
text_chunks.extend(cur_text_chunks)
|
202
|
-
|
206
|
+
metadata_list.extend([doc.metadata for _ in range(len(cur_text_chunks))])
|
203
207
|
#if batch size is reached, upload the batch
|
204
208
|
if len(text_chunks) > batch_size:
|
205
209
|
for idx in range(0, len(text_chunks), batch_size):
|
@@ -208,18 +212,23 @@ class RAG:
|
|
208
212
|
batch_texts = text_chunks[0:batch_size]
|
209
213
|
batch_ids = [uuid.uuid4().hex for _ in range(batch_size)]
|
210
214
|
#metadata
|
211
|
-
batch_metadatas =
|
215
|
+
batch_metadatas = metadata_list[0:batch_size]
|
212
216
|
meta_list = []
|
213
217
|
for meta in batch_metadatas:
|
214
218
|
meta_struct = Struct()
|
215
219
|
meta_struct.update(meta)
|
220
|
+
meta_struct.update({"doc_chunk_no": doc_i})
|
221
|
+
if metadata and isinstance(metadata, dict):
|
222
|
+
meta_struct.update(metadata)
|
216
223
|
meta_list.append(meta_struct)
|
224
|
+
doc_i += 1
|
217
225
|
del batch_metadatas
|
218
226
|
#creating input proto
|
219
227
|
input_batch = [
|
220
228
|
self._app.inputs().get_text_input(
|
221
229
|
input_id=batch_ids[i],
|
222
230
|
raw_text=text,
|
231
|
+
dataset_id=dataset_id,
|
223
232
|
metadata=meta_list[i],
|
224
233
|
) for i, text in enumerate(batch_texts)
|
225
234
|
]
|
@@ -227,32 +236,37 @@ class RAG:
|
|
227
236
|
self._app.inputs().upload_inputs(inputs=input_batch)
|
228
237
|
#delete uploaded chunks
|
229
238
|
del text_chunks[0:batch_size]
|
230
|
-
del
|
239
|
+
del metadata_list[0:batch_size]
|
231
240
|
|
232
241
|
#uploading the remaining chunks
|
233
242
|
if len(text_chunks) > 0:
|
234
243
|
batch_size = len(text_chunks)
|
235
244
|
batch_ids = [uuid.uuid4().hex for _ in range(batch_size)]
|
236
245
|
#metadata
|
237
|
-
batch_metadatas =
|
246
|
+
batch_metadatas = metadata_list[0:batch_size]
|
238
247
|
meta_list = []
|
239
248
|
for meta in batch_metadatas:
|
240
249
|
meta_struct = Struct()
|
241
250
|
meta_struct.update(meta)
|
251
|
+
meta_struct.update({"doc_chunk_no": doc_i})
|
252
|
+
if metadata and isinstance(metadata, dict):
|
253
|
+
meta_struct.update(metadata)
|
242
254
|
meta_list.append(meta_struct)
|
255
|
+
doc_i += 1
|
243
256
|
del batch_metadatas
|
244
257
|
#creating input proto
|
245
258
|
input_batch = [
|
246
259
|
self._app.inputs().get_text_input(
|
247
260
|
input_id=batch_ids[i],
|
248
261
|
raw_text=text,
|
262
|
+
dataset_id=dataset_id,
|
249
263
|
metadata=meta_list[i],
|
250
264
|
) for i, text in enumerate(text_chunks)
|
251
265
|
]
|
252
266
|
#uploading input with metadata
|
253
267
|
self._app.inputs().upload_inputs(inputs=input_batch)
|
254
268
|
del text_chunks
|
255
|
-
del
|
269
|
+
del metadata_list
|
256
270
|
|
257
271
|
def chat(self, messages: List[dict], client_manage_state: bool = False) -> List[dict]:
|
258
272
|
"""Chat interface in OpenAI API format.
|
clarifai/rag/utils.py
CHANGED
@@ -3,10 +3,6 @@ from pathlib import Path
|
|
3
3
|
from typing import List
|
4
4
|
|
5
5
|
import requests
|
6
|
-
from llama_index.core import Document, SimpleDirectoryReader
|
7
|
-
from llama_index.core.node_parser.text import SentenceSplitter
|
8
|
-
from llama_index.core.readers.download import download_loader
|
9
|
-
from pypdf import PdfReader
|
10
6
|
|
11
7
|
|
12
8
|
## TODO: Make this token-aware.
|
@@ -36,8 +32,7 @@ def format_assistant_message(raw_text: str) -> dict:
|
|
36
32
|
return {"role": "assistant", "content": raw_text}
|
37
33
|
|
38
34
|
|
39
|
-
def load_documents(file_path: str = None, folder_path: str = None,
|
40
|
-
url: str = None) -> List[Document]:
|
35
|
+
def load_documents(file_path: str = None, folder_path: str = None, url: str = None) -> List[any]:
|
41
36
|
"""Loads documents from a local directory or public url or local filename.
|
42
37
|
|
43
38
|
Args:
|
@@ -45,6 +40,13 @@ def load_documents(file_path: str = None, folder_path: str = None,
|
|
45
40
|
folder_path (str): The path to the folder.
|
46
41
|
url (str): The url to the file.
|
47
42
|
"""
|
43
|
+
#check import packages
|
44
|
+
try:
|
45
|
+
from llama_index.core import Document, SimpleDirectoryReader
|
46
|
+
from llama_index.core.readers.download import download_loader
|
47
|
+
except ImportError:
|
48
|
+
raise ImportError("Could not import llama index package. "
|
49
|
+
"Please install it with `pip install llama-index-core==0.10.1`.")
|
48
50
|
#document loaders for filepath
|
49
51
|
if file_path:
|
50
52
|
if file_path.endswith(".pdf"):
|
@@ -77,6 +79,12 @@ def load_documents(file_path: str = None, folder_path: str = None,
|
|
77
79
|
documents = [Document(text=response.content)]
|
78
80
|
#for pdf files
|
79
81
|
except Exception:
|
82
|
+
#check import packages
|
83
|
+
try:
|
84
|
+
from pypdf import PdfReader
|
85
|
+
except ImportError:
|
86
|
+
raise ImportError("Could not import pypdf package. "
|
87
|
+
"Please install it with `pip install pypdf==3.17.4`.")
|
80
88
|
documents = []
|
81
89
|
pdf_file = PdfReader(io.BytesIO(response.content))
|
82
90
|
num_pages = len(pdf_file.pages)
|
@@ -98,6 +106,13 @@ def split_document(text: str, chunk_size: int, chunk_overlap: int, **kwargs) ->
|
|
98
106
|
chunk_overlap (int): The amount of overlap between each chunk.
|
99
107
|
**kwargs: Additional keyword arguments for the SentenceSplitter.
|
100
108
|
"""
|
109
|
+
#check import packages
|
110
|
+
try:
|
111
|
+
from llama_index.core.node_parser.text import SentenceSplitter
|
112
|
+
except ImportError:
|
113
|
+
raise ImportError("Could not import llama index package. "
|
114
|
+
"Please install it with `pip install llama-index-core==0.10.1`.")
|
115
|
+
#document
|
101
116
|
text_parser = SentenceSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, **kwargs)
|
102
117
|
text_chunks = text_parser.split_text(text)
|
103
118
|
return text_chunks
|