clarifai 10.1.0__py3-none-any.whl → 10.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,15 +8,15 @@ If a dataset module exists in the zoo, uploading the specific dataset can be eas
8
8
 
9
9
  ```python
10
10
  from clarifai.client.app import App
11
- from clarifai.datasets.upload.loaders.coco_segmentation import COCOSegmentationDataLoader
11
+ from clarifai.datasets.upload.loaders.coco_detection import COCODetectionDataLoader
12
12
 
13
13
  app = App(app_id="", user_id="")
14
14
  # Create a dataset in Clarifai App
15
15
  dataset = app.create_dataset(dataset_id="")
16
16
  # instantiate dataloader object
17
- coco_seg_dataloader = COCOSegmentationDataLoader(images_dir="", label_filepath="")
17
+ coco_det_dataloader = COCODetectionDataLoader(images_dir="", label_filepath="")
18
18
  # execute data upload to Clarifai app dataset
19
- dataset.upload_dataset(dataloader=coco_seg_dataloader)
19
+ dataset.upload_dataset(dataloader=coco_det_dataloader)
20
20
  ```
21
21
 
22
22
  ## Dataset Loaders
@@ -24,7 +24,6 @@ dataset.upload_dataset(dataloader=coco_seg_dataloader)
24
24
  | dataset name | task | module name (.py)
25
25
  | --- | --- | ---
26
26
  | [COCO 2017](https://cocodataset.org/#download) | Detection | `coco_detection` |
27
- | | Segmentation | `coco_segmentation` |
28
27
  | | Captions | `coco_captions` |
29
28
  |[xVIEW](http://xviewdataset.org/) | Detection | `xview_detection` |
30
29
  | [ImageNet](https://www.image-net.org/) | Classification | `imagenet_classification` |
@@ -6,7 +6,7 @@ from concurrent.futures import ThreadPoolExecutor
6
6
  from multiprocessing import cpu_count
7
7
  from typing import DefaultDict, Dict, List
8
8
 
9
- import cv2
9
+ from PIL import Image
10
10
  from tqdm import tqdm
11
11
 
12
12
  from clarifai.datasets.upload.base import ClarifaiDataLoader
@@ -54,9 +54,8 @@ class xviewDetectionDataLoader(ClarifaiDataLoader):
54
54
  def compress_tiff(self, img_path: str) -> None:
55
55
  """Compress tiff image"""
56
56
  img_comp_path = os.path.join(self.img_comp_dir, os.path.basename(img_path))
57
- img_arr = cv2.imread(img_path)
58
- cv2.imwrite(
59
- img_comp_path, img_arr, params=(cv2.IMWRITE_TIFF_COMPRESSION, 8)) # 8: Adobe Deflate
57
+ img_arr = Image.open(img_path)
58
+ img_arr.save(img_comp_path, 'TIFF', compression='tiff_deflate')
60
59
 
61
60
  def preprocess(self):
62
61
  """Compress the tiff images to comply with clarifai grpc image encoding limit(<20MB) Uses ADOBE_DEFLATE compression algorithm"""
@@ -133,7 +132,8 @@ class xviewDetectionDataLoader(ClarifaiDataLoader):
133
132
  _id = os.path.splitext(os.path.basename(self.image_paths[index]))[0]
134
133
  image_path = self.image_paths[index]
135
134
 
136
- image_height, image_width = cv2.imread(image_path).shape[:2]
135
+ image = Image.open(image_path)
136
+ image_width, image_height = image.size
137
137
  annots = []
138
138
  class_names = []
139
139
  for bbox, concept in zip(self.all_data[_id]['bboxes'], self.all_data[_id]['concepts']):
@@ -11,7 +11,7 @@ from ..constants import (CLARIFAI_EXAMPLES_REPO, CLARIFAI_EXAMPLES_REPO_PATH,
11
11
  def download_examples_repo(forced_download: bool = False):
12
12
 
13
13
  def _pull():
14
- subprocess.run(f"git clone {CLARIFAI_EXAMPLES_REPO} {CLARIFAI_EXAMPLES_REPO_PATH}")
14
+ subprocess.run(f"git clone {CLARIFAI_EXAMPLES_REPO} {CLARIFAI_EXAMPLES_REPO_PATH}", shell=True)
15
15
 
16
16
  if not os.path.isdir(CLARIFAI_EXAMPLES_REPO_PATH):
17
17
  print(f"Download examples to {CLARIFAI_EXAMPLES_REPO_PATH}")
@@ -70,7 +70,7 @@ class BuildModelSubCli(BaseClarifaiCli):
70
70
  if not self.no_test:
71
71
  assert os.path.exists(
72
72
  self.test_path), FileNotFoundError(f"Could not find `test.py` in {self.path}")
73
- result = subprocess.run(f"pytest -s --log-level=INFO {self.test_path}")
73
+ result = subprocess.run(f"pytest -s --log-level=INFO {self.test_path}", shell=True)
74
74
  assert result.returncode == 0, "Test has failed. Please make sure no error exists in your code."
75
75
 
76
76
  # build
@@ -126,7 +126,7 @@ class UploadModelSubCli(BaseClarifaiCli):
126
126
  # Run test before uploading
127
127
  if not self.no_test:
128
128
  assert os.path.exists(self.test_path), FileNotFoundError(f"Not found {self.test_path}")
129
- result = subprocess.run(f"pytest -s --log-level=INFO {self.test_path}")
129
+ result = subprocess.run(f"pytest -s --log-level=INFO {self.test_path}", shell=True)
130
130
  assert result.returncode == 0, "Test has failed. Please make sure no error exists in your code."
131
131
 
132
132
  deploy(
@@ -18,4 +18,6 @@ def _read_pat():
18
18
 
19
19
  def login(pat=None):
20
20
  """ if pat provided, set pat to CLARIFAI_PAT otherwise read pat from file"""
21
- os.environ["CLARIFAI_PAT"] = pat or _read_pat()
21
+ pat = pat or _read_pat()
22
+ assert pat, Exception("PAT is not found, please run `clarifai login` to persist your PAT")
23
+ os.environ["CLARIFAI_PAT"] = pat
clarifai/rag/rag.py CHANGED
@@ -76,16 +76,17 @@ class RAG:
76
76
  >>> rag_agent = RAG.setup(app_url=YOUR_APP_URL)
77
77
  >>> rag_agent.chat(messages=[{"role":"human", "content":"What is Clarifai"}])
78
78
  """
79
-
79
+ now_ts = str(int(datetime.now().timestamp()))
80
80
  if user_id and not app_url:
81
81
  user = User(user_id=user_id, base_url=base_url, pat=pat)
82
82
  ## Create an App
83
- now_ts = str(int(datetime.now().timestamp()))
84
83
  app_id = f"rag_app_{now_ts}"
85
84
  app = user.create_app(app_id=app_id, base_workflow=base_workflow)
86
85
 
87
86
  if not user_id and app_url:
88
87
  app = App(url=app_url, pat=pat)
88
+ uid = app_url.split(".com/")[1].split("/")[0]
89
+ user = User(user_id=uid, base_url=base_url, pat=pat)
89
90
 
90
91
  if user_id and app_url:
91
92
  raise UserError("Must provide one of user_id or app_url, not both.")
@@ -95,7 +96,7 @@ class RAG:
95
96
  "user_id or app_url must be provided. The user_id can be found at https://clarifai.com/settings."
96
97
  )
97
98
 
98
- llm = Model(llm_url)
99
+ llm = Model(url=llm_url, pat=pat)
99
100
 
100
101
  min_score = kwargs.get("min_score", 0.95)
101
102
  max_results = kwargs.get("max_results", 5)
@@ -109,8 +110,8 @@ class RAG:
109
110
  prompter_model_params = {"params": params}
110
111
 
111
112
  ## Create rag-prompter model and version
112
- prompter_model = app.create_model(
113
- model_id=f"rag_prompter_{now_ts}", model_type_id="rag-prompter")
113
+ model_id = f"prompter-{workflow_id}" if workflow_id is not None else f"rag-prompter-{now_ts}"
114
+ prompter_model = app.create_model(model_id=model_id, model_type_id="rag-prompter")
114
115
  prompter_model = prompter_model.create_version(output_info=prompter_model_params)
115
116
 
116
117
  ## Generate a tmp yaml file for workflow creation
@@ -153,6 +154,8 @@ class RAG:
153
154
  batch_size: int = 128,
154
155
  chunk_size: int = 1024,
155
156
  chunk_overlap: int = 200,
157
+ dataset_id: str = None,
158
+ metadata: dict = None,
156
159
  **kwargs) -> None:
157
160
  """Uploads documents to the app.
158
161
  - Read from a local directory or public url or local filename.
@@ -192,14 +195,15 @@ class RAG:
192
195
 
193
196
  #splitting documents into chunks
194
197
  text_chunks = []
195
- metadata = []
198
+ metadata_list = []
196
199
 
197
200
  #iterate through documents
198
201
  for doc in documents:
202
+ doc_i = 0
199
203
  cur_text_chunks = split_document(
200
204
  text=doc.text, chunk_size=chunk_size, chunk_overlap=chunk_overlap, **kwargs)
201
205
  text_chunks.extend(cur_text_chunks)
202
- metadata.extend([doc.metadata for _ in range(len(cur_text_chunks))])
206
+ metadata_list.extend([doc.metadata for _ in range(len(cur_text_chunks))])
203
207
  #if batch size is reached, upload the batch
204
208
  if len(text_chunks) > batch_size:
205
209
  for idx in range(0, len(text_chunks), batch_size):
@@ -208,18 +212,23 @@ class RAG:
208
212
  batch_texts = text_chunks[0:batch_size]
209
213
  batch_ids = [uuid.uuid4().hex for _ in range(batch_size)]
210
214
  #metadata
211
- batch_metadatas = metadata[0:batch_size]
215
+ batch_metadatas = metadata_list[0:batch_size]
212
216
  meta_list = []
213
217
  for meta in batch_metadatas:
214
218
  meta_struct = Struct()
215
219
  meta_struct.update(meta)
220
+ meta_struct.update({"doc_chunk_no": doc_i})
221
+ if metadata and isinstance(metadata, dict):
222
+ meta_struct.update(metadata)
216
223
  meta_list.append(meta_struct)
224
+ doc_i += 1
217
225
  del batch_metadatas
218
226
  #creating input proto
219
227
  input_batch = [
220
228
  self._app.inputs().get_text_input(
221
229
  input_id=batch_ids[i],
222
230
  raw_text=text,
231
+ dataset_id=dataset_id,
223
232
  metadata=meta_list[i],
224
233
  ) for i, text in enumerate(batch_texts)
225
234
  ]
@@ -227,32 +236,37 @@ class RAG:
227
236
  self._app.inputs().upload_inputs(inputs=input_batch)
228
237
  #delete uploaded chunks
229
238
  del text_chunks[0:batch_size]
230
- del metadata[0:batch_size]
239
+ del metadata_list[0:batch_size]
231
240
 
232
241
  #uploading the remaining chunks
233
242
  if len(text_chunks) > 0:
234
243
  batch_size = len(text_chunks)
235
244
  batch_ids = [uuid.uuid4().hex for _ in range(batch_size)]
236
245
  #metadata
237
- batch_metadatas = metadata[0:batch_size]
246
+ batch_metadatas = metadata_list[0:batch_size]
238
247
  meta_list = []
239
248
  for meta in batch_metadatas:
240
249
  meta_struct = Struct()
241
250
  meta_struct.update(meta)
251
+ meta_struct.update({"doc_chunk_no": doc_i})
252
+ if metadata and isinstance(metadata, dict):
253
+ meta_struct.update(metadata)
242
254
  meta_list.append(meta_struct)
255
+ doc_i += 1
243
256
  del batch_metadatas
244
257
  #creating input proto
245
258
  input_batch = [
246
259
  self._app.inputs().get_text_input(
247
260
  input_id=batch_ids[i],
248
261
  raw_text=text,
262
+ dataset_id=dataset_id,
249
263
  metadata=meta_list[i],
250
264
  ) for i, text in enumerate(text_chunks)
251
265
  ]
252
266
  #uploading input with metadata
253
267
  self._app.inputs().upload_inputs(inputs=input_batch)
254
268
  del text_chunks
255
- del metadata
269
+ del metadata_list
256
270
 
257
271
  def chat(self, messages: List[dict], client_manage_state: bool = False) -> List[dict]:
258
272
  """Chat interface in OpenAI API format.
clarifai/rag/utils.py CHANGED
@@ -3,10 +3,6 @@ from pathlib import Path
3
3
  from typing import List
4
4
 
5
5
  import requests
6
- from llama_index.core import Document, SimpleDirectoryReader
7
- from llama_index.core.node_parser.text import SentenceSplitter
8
- from llama_index.core.readers.download import download_loader
9
- from pypdf import PdfReader
10
6
 
11
7
 
12
8
  ## TODO: Make this token-aware.
@@ -36,8 +32,7 @@ def format_assistant_message(raw_text: str) -> dict:
36
32
  return {"role": "assistant", "content": raw_text}
37
33
 
38
34
 
39
- def load_documents(file_path: str = None, folder_path: str = None,
40
- url: str = None) -> List[Document]:
35
+ def load_documents(file_path: str = None, folder_path: str = None, url: str = None) -> List[any]:
41
36
  """Loads documents from a local directory or public url or local filename.
42
37
 
43
38
  Args:
@@ -45,6 +40,13 @@ def load_documents(file_path: str = None, folder_path: str = None,
45
40
  folder_path (str): The path to the folder.
46
41
  url (str): The url to the file.
47
42
  """
43
+ #check import packages
44
+ try:
45
+ from llama_index.core import Document, SimpleDirectoryReader
46
+ from llama_index.core.readers.download import download_loader
47
+ except ImportError:
48
+ raise ImportError("Could not import llama index package. "
49
+ "Please install it with `pip install llama-index-core==0.10.1`.")
48
50
  #document loaders for filepath
49
51
  if file_path:
50
52
  if file_path.endswith(".pdf"):
@@ -77,6 +79,12 @@ def load_documents(file_path: str = None, folder_path: str = None,
77
79
  documents = [Document(text=response.content)]
78
80
  #for pdf files
79
81
  except Exception:
82
+ #check import packages
83
+ try:
84
+ from pypdf import PdfReader
85
+ except ImportError:
86
+ raise ImportError("Could not import pypdf package. "
87
+ "Please install it with `pip install pypdf==3.17.4`.")
80
88
  documents = []
81
89
  pdf_file = PdfReader(io.BytesIO(response.content))
82
90
  num_pages = len(pdf_file.pages)
@@ -98,6 +106,13 @@ def split_document(text: str, chunk_size: int, chunk_overlap: int, **kwargs) ->
98
106
  chunk_overlap (int): The amount of overlap between each chunk.
99
107
  **kwargs: Additional keyword arguments for the SentenceSplitter.
100
108
  """
109
+ #check import packages
110
+ try:
111
+ from llama_index.core.node_parser.text import SentenceSplitter
112
+ except ImportError:
113
+ raise ImportError("Could not import llama index package. "
114
+ "Please install it with `pip install llama-index-core==0.10.1`.")
115
+ #document
101
116
  text_parser = SentenceSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, **kwargs)
102
117
  text_chunks = text_parser.split_text(text)
103
118
  return text_chunks