clarifai 10.0.1__py3-none-any.whl → 10.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. clarifai/client/app.py +23 -43
  2. clarifai/client/base.py +46 -4
  3. clarifai/client/dataset.py +85 -33
  4. clarifai/client/input.py +35 -7
  5. clarifai/client/model.py +192 -11
  6. clarifai/client/module.py +8 -6
  7. clarifai/client/runner.py +3 -1
  8. clarifai/client/search.py +6 -3
  9. clarifai/client/user.py +14 -12
  10. clarifai/client/workflow.py +8 -5
  11. clarifai/datasets/upload/features.py +3 -0
  12. clarifai/datasets/upload/image.py +57 -26
  13. clarifai/datasets/upload/loaders/README.md +3 -4
  14. clarifai/datasets/upload/loaders/xview_detection.py +9 -5
  15. clarifai/datasets/upload/utils.py +23 -7
  16. clarifai/models/model_serving/README.md +113 -121
  17. clarifai/models/model_serving/__init__.py +2 -0
  18. clarifai/models/model_serving/cli/_utils.py +53 -0
  19. clarifai/models/model_serving/cli/base.py +14 -0
  20. clarifai/models/model_serving/cli/build.py +79 -0
  21. clarifai/models/model_serving/cli/clarifai_clis.py +33 -0
  22. clarifai/models/model_serving/cli/create.py +171 -0
  23. clarifai/models/model_serving/cli/example_cli.py +34 -0
  24. clarifai/models/model_serving/cli/login.py +26 -0
  25. clarifai/models/model_serving/cli/upload.py +182 -0
  26. clarifai/models/model_serving/constants.py +20 -0
  27. clarifai/models/model_serving/docs/cli.md +150 -0
  28. clarifai/models/model_serving/docs/concepts.md +229 -0
  29. clarifai/models/model_serving/docs/dependencies.md +1 -1
  30. clarifai/models/model_serving/docs/inference_parameters.md +112 -107
  31. clarifai/models/model_serving/docs/model_types.md +16 -17
  32. clarifai/models/model_serving/model_config/__init__.py +4 -2
  33. clarifai/models/model_serving/model_config/base.py +369 -0
  34. clarifai/models/model_serving/model_config/config.py +219 -224
  35. clarifai/models/model_serving/model_config/inference_parameter.py +5 -0
  36. clarifai/models/model_serving/model_config/model_types_config/multimodal-embedder.yaml +25 -24
  37. clarifai/models/model_serving/model_config/model_types_config/text-classifier.yaml +19 -18
  38. clarifai/models/model_serving/model_config/model_types_config/text-embedder.yaml +20 -18
  39. clarifai/models/model_serving/model_config/model_types_config/text-to-image.yaml +19 -18
  40. clarifai/models/model_serving/model_config/model_types_config/text-to-text.yaml +19 -18
  41. clarifai/models/model_serving/model_config/model_types_config/visual-classifier.yaml +22 -18
  42. clarifai/models/model_serving/model_config/model_types_config/visual-detector.yaml +32 -28
  43. clarifai/models/model_serving/model_config/model_types_config/visual-embedder.yaml +19 -18
  44. clarifai/models/model_serving/model_config/model_types_config/visual-segmenter.yaml +19 -18
  45. clarifai/models/model_serving/{models → model_config}/output.py +8 -0
  46. clarifai/models/model_serving/model_config/triton/__init__.py +14 -0
  47. clarifai/models/model_serving/model_config/{serializer.py → triton/serializer.py} +3 -1
  48. clarifai/models/model_serving/model_config/triton/triton_config.py +182 -0
  49. clarifai/models/model_serving/{models/model_types.py → model_config/triton/wrappers.py} +4 -4
  50. clarifai/models/model_serving/{models → repo_build}/__init__.py +2 -0
  51. clarifai/models/model_serving/repo_build/build.py +198 -0
  52. clarifai/models/model_serving/repo_build/static_files/_requirements.txt +2 -0
  53. clarifai/models/model_serving/repo_build/static_files/base_test.py +169 -0
  54. clarifai/models/model_serving/repo_build/static_files/inference.py +26 -0
  55. clarifai/models/model_serving/repo_build/static_files/sample_clarifai_config.yaml +25 -0
  56. clarifai/models/model_serving/repo_build/static_files/test.py +40 -0
  57. clarifai/models/model_serving/{models/pb_model.py → repo_build/static_files/triton/model.py} +15 -14
  58. clarifai/models/model_serving/utils.py +21 -0
  59. clarifai/rag/rag.py +67 -23
  60. clarifai/rag/utils.py +21 -5
  61. clarifai/utils/evaluation/__init__.py +427 -0
  62. clarifai/utils/evaluation/helpers.py +522 -0
  63. clarifai/utils/logging.py +7 -0
  64. clarifai/utils/model_train.py +3 -1
  65. clarifai/versions.py +1 -1
  66. {clarifai-10.0.1.dist-info → clarifai-10.1.1.dist-info}/METADATA +58 -10
  67. clarifai-10.1.1.dist-info/RECORD +115 -0
  68. clarifai-10.1.1.dist-info/entry_points.txt +2 -0
  69. clarifai/datasets/upload/loaders/coco_segmentation.py +0 -98
  70. clarifai/models/model_serving/cli/deploy_cli.py +0 -123
  71. clarifai/models/model_serving/cli/model_zip.py +0 -61
  72. clarifai/models/model_serving/cli/repository.py +0 -89
  73. clarifai/models/model_serving/docs/custom_config.md +0 -33
  74. clarifai/models/model_serving/docs/output.md +0 -28
  75. clarifai/models/model_serving/models/default_test.py +0 -281
  76. clarifai/models/model_serving/models/inference.py +0 -50
  77. clarifai/models/model_serving/models/test.py +0 -64
  78. clarifai/models/model_serving/pb_model_repository.py +0 -108
  79. clarifai-10.0.1.dist-info/RECORD +0 -103
  80. clarifai-10.0.1.dist-info/entry_points.txt +0 -4
  81. {clarifai-10.0.1.dist-info → clarifai-10.1.1.dist-info}/LICENSE +0 -0
  82. {clarifai-10.0.1.dist-info → clarifai-10.1.1.dist-info}/WHEEL +0 -0
  83. {clarifai-10.0.1.dist-info → clarifai-10.1.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,522 @@
1
+ import time
2
+ from collections import OrderedDict, defaultdict
3
+ from dataclasses import dataclass, field
4
+ from enum import Enum
5
+ from typing import Dict, List, Tuple, Union
6
+
7
+ import numpy as np
8
+ from clarifai_grpc.grpc.api import resources_pb2
9
+ from clarifai_grpc.grpc.api.status import status_code_pb2
10
+ from google.protobuf.json_format import MessageToDict
11
+
12
+ from clarifai.client.dataset import Dataset
13
+ from clarifai.client.model import Model
14
+
15
+ try:
16
+ import pandas as pd
17
+ except ImportError:
18
+ raise ImportError("Can not import pandas. Please run `pip install pandas` to install it")
19
+
20
+ try:
21
+ from loguru import logger
22
+ except ImportError:
23
+ from ..logging import get_logger
24
+ logger = get_logger(logger_level="INFO", name=__name__)
25
+
26
+ MACRO_AVG = "macro_avg"
27
+
28
+
29
+ class EvalType(Enum):
30
+ UNDEFINED = 0
31
+ CLASSIFICATION = 1
32
+ DETECTION = 2
33
+ CLUSTERING = 3
34
+ SEGMENTATION = 4
35
+ TRACKER = 5
36
+
37
+
38
+ def get_eval_type(model_type):
39
+ if "classifier" in model_type:
40
+ return EvalType.CLASSIFICATION
41
+ elif "visual-detector" in model_type:
42
+ return EvalType.DETECTION
43
+ elif "segmenter" in model_type:
44
+ return EvalType.SEGMENTATION
45
+ elif "embedder" in model_type:
46
+ return EvalType.CLUSTERING
47
+ elif "tracker" in model_type:
48
+ return EvalType.TRACKER
49
+ else:
50
+ return EvalType.UNDEFINED
51
+
52
+
53
+ def to_file_name(x) -> str:
54
+ return x.replace('/', '--')
55
+
56
+
57
+ @dataclass
58
+ class _BaseEvalResultHandler:
59
+ model: Model
60
+ eval_data: List[resources_pb2.EvalMetrics] = field(default_factory=list)
61
+
62
+ def evaluate_and_wait(self, dataset: Dataset):
63
+ from tqdm import tqdm
64
+ dataset_id = dataset.id
65
+ dataset_app_id = dataset.app_id
66
+ dataset_user_id = dataset.user_id
67
+ _ = self.model.evaluate(
68
+ dataset_id=dataset_id, dataset_app_id=dataset_app_id, dataset_user_id=dataset_user_id)
69
+ latest_eval = self.model.list_evaluations()[0]
70
+ excepted = 10
71
+ desc = f"Please wait for the evaluation process between model {self.get_model_name()} and dataset {dataset_user_id}/{dataset_app_id}/{dataset_id} to complete."
72
+ bar = tqdm(total=excepted, desc=desc, leave=False, ncols=0)
73
+ while latest_eval.status.code in [
74
+ status_code_pb2.MODEL_EVALUATING, status_code_pb2.MODEL_QUEUED_FOR_EVALUATION
75
+ ]:
76
+ latest_eval = self.model.list_evaluations()[0]
77
+ time.sleep(1)
78
+ bar.update(1)
79
+
80
+ if latest_eval.status.code == status_code_pb2.MODEL_EVALUATED:
81
+ return latest_eval
82
+ else:
83
+ raise Exception(
84
+ f"Model has failed to evaluate \n {latest_eval.status}.\nPlease check your dataset inputs!"
85
+ )
86
+
87
+ def find_eval_id(self, datasets: List[Dataset] = [], attempt_evaluate: bool = False):
88
+ list_eval_outputs = self.model.list_evaluations()
89
+ self.eval_data = []
90
+ for dataset in datasets:
91
+ dataset.app_id = dataset.app_id or self.model.auth_helper.app_id
92
+ dataset.user_id = dataset.user_id or self.model.auth_helper.user_id
93
+ dataset_assert_msg = dataset.dataset_info
94
+ # checking if dataset exists
95
+ out = dataset.list_versions()
96
+ try:
97
+ next(iter(out))
98
+ except Exception as e:
99
+ if any(["CONN_DOES_NOT_EXIST" in _e for _e in e.args]):
100
+ raise Exception(
101
+ f"Dataset {dataset_assert_msg} does not exists. Please check datasets args")
102
+ else:
103
+ # caused by sdk failure
104
+ pass
105
+ # checking if model is evaluated with this dataset
106
+ _is_found = False
107
+ for each in list_eval_outputs:
108
+ if each.status.code == status_code_pb2.MODEL_EVALUATED:
109
+ eval_dataset = each.ground_truth_dataset
110
+ # if version_id is empty -> get latest eval result of dataset,app,user id
111
+ if dataset.app_id == eval_dataset.app_id and dataset.id == eval_dataset.id and dataset.user_id == eval_dataset.user_id and (
112
+ not dataset.version.id or dataset.version.id == eval_dataset.version.id):
113
+ # append to eval_data
114
+ self.eval_data.append(each)
115
+ _is_found = True
116
+ break
117
+
118
+ # if not evaluated, but user wants to proceed it
119
+ if not _is_found:
120
+ if attempt_evaluate:
121
+ self.eval_data.append(self.evaluate_and_wait(dataset))
122
+ # otherwise raise error
123
+ else:
124
+ raise Exception(
125
+ f"Model {self.model.model_info.name} in app {self.model.model_info.app_id} is not evaluated yet with dataset {dataset_assert_msg}"
126
+ )
127
+
128
+ @staticmethod
129
+ def proto_to_dict(value):
130
+ return MessageToDict(value, preserving_proto_field_name=True)
131
+
132
+ @staticmethod
133
+ def _f1(x: float, y: float):
134
+ z = x + y
135
+ return 2 * x * y / z if z else 0.
136
+
137
+ def _get_eval(self, index=0, **kwargs):
138
+ logger.info(
139
+ f"Model {self.get_model_name(pretify=True)}: retrieving {kwargs} metrics of dataset: {self.get_dataset_name_by_index(index)}"
140
+ )
141
+ result = self.model.get_eval_by_id(eval_id=self.eval_data[index].id, **kwargs)
142
+ for k, v in kwargs.items():
143
+ if v:
144
+ getattr(self.eval_data[index], k).MergeFrom(getattr(result, k))
145
+
146
+ def get_eval_data(self, metric_name: str, index=0):
147
+ if metric_name == 'binary_metrics':
148
+ if len(self.eval_data[index].binary_metrics) == 0:
149
+ self._get_eval(index, binary_metrics=True)
150
+ elif metric_name == 'label_counts':
151
+ if self.proto_to_dict(self.eval_data[index].label_counts) == {}:
152
+ self._get_eval(index, label_counts=True)
153
+ elif metric_name == 'confusion_matrix':
154
+ if self.eval_data[index].confusion_matrix.ByteSize() == 0:
155
+ self._get_eval(index, confusion_matrix=True)
156
+ elif metric_name == 'metrics_by_class':
157
+ if len(self.eval_data[index].metrics_by_class) == 0:
158
+ self._get_eval(index, metrics_by_class=True)
159
+ elif metric_name == 'metrics_by_area':
160
+ if len(self.eval_data[index].metrics_by_area) == 0:
161
+ self._get_eval(index, metrics_by_area=True)
162
+
163
+ return getattr(self.eval_data[index], metric_name)
164
+
165
+ def get_threshold_index(self, threshold_list: list, selected_value: float = 0.5) -> int:
166
+ assert 0 <= selected_value <= 1 and isinstance(selected_value, float)
167
+ threshold_list = [round(each, 2) for each in threshold_list]
168
+
169
+ def parse_precision(x):
170
+ return len(str(x).split(".")[1])
171
+
172
+ precision = parse_precision(selected_value)
173
+ if precision > 2:
174
+ selected_value = round(selected_value, 2)
175
+ logger.warning("Round the selected value to .2 decimals")
176
+ return threshold_list.index(selected_value)
177
+
178
+ def get_dataset_name_by_index(self, index=0, pretify=True):
179
+ out = self.eval_data[index].ground_truth_dataset
180
+ if pretify:
181
+ app_id = out.app_id
182
+ dataset = out.id
183
+ #out = f"{app_id}/{dataset}/{ver[:5]}" if ver else f"{app_id}/{dataset}"
184
+ if self.model.model_info.app_id == app_id:
185
+ out = dataset
186
+ else:
187
+ out = f"{app_id}/{dataset}"
188
+
189
+ return out
190
+
191
+ def get_model_name(self, pretify=True):
192
+ model = self.model.model_info
193
+ if pretify:
194
+ app_id = model.app_id
195
+ name = model.id
196
+ ver = model.model_version.id
197
+ model = f"{app_id}/{name}/{ver[:5]}" if ver else f"{app_id}/{name}"
198
+
199
+ return model
200
+
201
+ def _process_curve(self, data: resources_pb2.BinaryMetrics, metric_name: str, x: str,
202
+ y: str) -> Dict[str, Dict[str, np.array]]:
203
+ """ Postprocess curve
204
+ """
205
+ x_arr = []
206
+ y_arr = []
207
+ threshold = []
208
+ outputs = []
209
+
210
+ def _make_df(xcol, ycol, concept_col, th_col):
211
+ return pd.DataFrame({x: xcol, y: ycol, 'concept': concept_col, 'threshold': th_col})
212
+
213
+ for bd in data:
214
+ concept_id = bd.concept.id
215
+ metric = eval(f'bd.{metric_name}')
216
+ if metric.ByteSize() == 0:
217
+ continue
218
+ _x = np.array(eval(f'metric.{x}'))
219
+ _y = np.array(eval(f'metric.{y}'))
220
+ threshold = np.array(metric.thresholds)
221
+ x_arr.append(_x)
222
+ y_arr.append(_y)
223
+ concept_cols = [concept_id for _ in range(len(_x))]
224
+ outputs.append(_make_df(_x, _y, concept_cols, threshold))
225
+
226
+ avg_x = np.mean(x_arr, axis=0)
227
+ avg_y = np.mean(y_arr, axis=0)
228
+ if np.isnan(avg_x).all():
229
+ return None
230
+ else:
231
+ avg_cols = [MACRO_AVG for _ in range(len(avg_x))]
232
+ outputs.append(_make_df(avg_x, avg_y, avg_cols, threshold))
233
+
234
+ return pd.concat(outputs, axis=0)
235
+
236
+ def parse_concept_ids(self, *args, **kwargs) -> List[str]:
237
+ raise NotImplementedError
238
+
239
+ def detailed_summary(self, *args, **kwargs):
240
+ raise NotImplementedError
241
+
242
+ def pr_curve(self, *args, **kwargs):
243
+ raise NotImplementedError
244
+
245
+ def roc_curve(self, *args, **kwargs):
246
+ raise NotImplementedError
247
+
248
+ def confusion_matrix(self, *args, **kwargs):
249
+ raise NotImplementedError
250
+
251
+
252
+ @dataclass
253
+ class PlaceholderHandler(_BaseEvalResultHandler):
254
+
255
+ def parse_concept_ids(self, *args, **kwargs) -> List[str]:
256
+ return None
257
+
258
+ def detailed_summary(self, *args, **kwargs):
259
+ return None
260
+
261
+ def pr_curve(self, *args, **kwargs):
262
+ return None
263
+
264
+
265
+ @dataclass
266
+ class ClassificationResultHandler(_BaseEvalResultHandler):
267
+
268
+ def parse_concept_ids(self, index=0) -> List[str]:
269
+ eval_data = self.get_eval_data(metric_name='label_counts', index=index)
270
+ concept_ids = [temp.concept.id for temp in eval_data.positive_label_counts]
271
+ return concept_ids
272
+
273
+ def detailed_summary(self, index=0, confidence_threshold: float = 0.5,
274
+ **kwargs) -> Union[None, Tuple[pd.DataFrame, pd.DataFrame]]:
275
+ """Making detailed table per concept and for total concept
276
+
277
+ Args:
278
+ index (int, optional): Index of eval dataset. Defaults to 0.
279
+ confidence_threshold (float, optional): confidence threshold. Defaults to 0.5.
280
+
281
+ Returns:
282
+ tuple: concepts dataframe, total dataframe
283
+ """
284
+ eval_data = self.get_eval_data('binary_metrics', index=index)
285
+ summary = self.get_eval_data('summary', index=index)
286
+
287
+ total_labeled = 0
288
+ total_predicted = 0
289
+ total_tp = 0
290
+ total_fn = 0
291
+ total_fp = 0
292
+ metrics = []
293
+
294
+ for bd in eval_data:
295
+ concept_id = bd.concept.id
296
+ if bd.precision_recall_curve.ByteSize() == 0:
297
+ continue
298
+ pr_th_index = self.get_threshold_index(
299
+ list(bd.precision_recall_curve.thresholds), selected_value=confidence_threshold)
300
+ roc_th_index = self.get_threshold_index(
301
+ list(bd.roc_curve.thresholds), selected_value=confidence_threshold)
302
+ if pr_th_index is None or roc_th_index is None:
303
+ continue
304
+ num_pos_labeled = bd.num_pos
305
+ num_neg_labeled = bd.num_neg
306
+ # TP/(TP+FP)
307
+ precision = bd.precision_recall_curve.precision[pr_th_index]
308
+ # TP/(TP+FN)
309
+ recall = bd.precision_recall_curve.recall[pr_th_index]
310
+ # FP/(FP+TN)
311
+ fpr = bd.roc_curve.fpr[roc_th_index]
312
+ # TP/(TP+FN)
313
+ tpr = bd.roc_curve.tpr[roc_th_index]
314
+ # TP+FN
315
+ tp = int(tpr * num_pos_labeled)
316
+ fn = num_pos_labeled - tp
317
+ fp = int(fpr * num_neg_labeled)
318
+ num_pos_pred = tp + fp
319
+ f1 = self._f1(recall, precision)
320
+
321
+ total_labeled += num_pos_labeled
322
+ total_predicted += num_pos_pred
323
+ total_fn += fn
324
+ total_tp += tp
325
+ total_fp += fp
326
+ # roc auc, total labelled, predicted, tp, fn, fp, recall, precision, f1
327
+ _d = OrderedDict({
328
+ "Concept": concept_id,
329
+ "Accuracy (ROC AUC)": round(bd.roc_auc, 3),
330
+ "Total Labeled": num_pos_labeled,
331
+ "Total Predicted": num_pos_pred,
332
+ "True Positives": tp,
333
+ "False Negatives": fn,
334
+ "False Positives": fp,
335
+ "Recall": recall,
336
+ "Precision": precision,
337
+ "F1": f1
338
+ })
339
+ metrics.append(pd.DataFrame(_d, index=[0]))
340
+
341
+ # If no valid data is found, return None
342
+ if not metrics:
343
+ return None
344
+ # Make per concept df
345
+ df = pd.concat(metrics, axis=0)
346
+ # Make total df
347
+ sum_df_total = sum(df["Total Labeled"])
348
+ precision = sum(df.Precision * df["Total Labeled"]) / sum_df_total if sum_df_total else 0.
349
+ recall = sum(df.Recall * df["Total Labeled"]) / sum_df_total if sum_df_total else 0.
350
+ f1 = self._f1(recall, precision)
351
+ df_total = pd.DataFrame(
352
+ [
353
+ [
354
+ 'Total', summary.macro_avg_roc_auc, total_labeled, total_predicted, total_tp,
355
+ total_fn, total_fp, recall, precision, f1
356
+ ],
357
+ ],
358
+ columns=df.columns,
359
+ index=[0])
360
+
361
+ return df, df_total
362
+
363
+ def pr_curve(self, index=0, **kwargs) -> Union[None, pd.DataFrame]:
364
+ """Making PR curve
365
+
366
+ Args:
367
+ index (int, optional): Index of eval dataset. Defaults to 0.
368
+
369
+ Returns:
370
+ dictionary: Keys are concept ids and 'macro_avg'. Values are dictionaries of {precision: np.array, recall: np.array}
371
+ """
372
+ eval_data = self.get_eval_data(metric_name='binary_metrics', index=index)
373
+ outputs = self._process_curve(
374
+ eval_data, metric_name='precision_recall_curve', x='recall', y='precision')
375
+ return outputs
376
+
377
+ def roc_curve(self, index=0, **kwargs) -> Union[None, pd.DataFrame]:
378
+ eval_data = self.get_eval_data(metric_name='binary_metrics', index=index)
379
+ outputs = self._process_curve(eval_data, metric_name='roc_curve', x='tpr', y='fpr')
380
+ return outputs
381
+
382
+ def confusion_matrix(self, index=0, **kwargs):
383
+ eval_data = self.get_eval_data(metric_name='confusion_matrix', index=index)
384
+ concept_ids = self.parse_concept_ids(index)
385
+ concept_ids.sort()
386
+ data = np.zeros((len(concept_ids), len(concept_ids)), np.float32)
387
+ for entry in eval_data.matrix:
388
+ p = entry.predicted_concept.id
389
+ a = entry.actual_concept.id
390
+ if p in concept_ids and a in concept_ids:
391
+ data[concept_ids.index(a), concept_ids.index(p)] = np.around(entry.value, decimals=3)
392
+ else:
393
+ continue
394
+ rownames = pd.MultiIndex.from_arrays([concept_ids], names=['Actual'])
395
+ colnames = pd.MultiIndex.from_arrays([concept_ids], names=['Predicted'])
396
+ df = pd.DataFrame(data, columns=colnames, index=rownames)
397
+
398
+ return df
399
+
400
+
401
+ @dataclass
402
+ class DetectionResultHandler(_BaseEvalResultHandler):
403
+ AREA_LIST = ["all", "medium", "small"]
404
+ IOU_LIST = list(np.arange(0.5, 1., 0.1))
405
+
406
+ def parse_concept_ids(self, index=0) -> List[str]:
407
+ eval_data = self.get_eval_data(metric_name='metrics_by_class', index=index)
408
+ concept_ids = [temp.concept.id for temp in eval_data]
409
+ return concept_ids
410
+
411
+ def detailed_summary(self,
412
+ index=0,
413
+ confidence_threshold: float = 0.5,
414
+ iou_threshold: float = 0.5,
415
+ area: str = "all",
416
+ bypass_const: bool = False,
417
+ **kwargs):
418
+ if not bypass_const:
419
+ assert iou_threshold in self.IOU_LIST, f"Expected iou_threshold in {self.IOU_LIST}, got {iou_threshold}"
420
+ assert area in self.AREA_LIST, f"Expected area in {self.AREA_LIST}, got {area}"
421
+
422
+ eval_data = self.get_eval_data('metrics_by_class', index=index)
423
+ #summary = self.get_eval_data('summary', index=index)
424
+ metrics = []
425
+ for bd in eval_data:
426
+ # total label
427
+ _iou = round(bd.iou, 1)
428
+ if not (area and bd.area_name == area) or not (iou_threshold and iou_threshold == _iou):
429
+ continue
430
+ concept_id = bd.concept.id
431
+ total = round(bd.num_tot, 3)
432
+ # TP / (TP + FP)
433
+ if len(bd.precision_recall_curve.precision) > 0:
434
+ pr_th_index = self.get_threshold_index(
435
+ list(bd.precision_recall_curve.thresholds), selected_value=confidence_threshold)
436
+ p = round(bd.precision_recall_curve.precision[pr_th_index], 3)
437
+ else:
438
+ p = 0
439
+ # TP / (TP + FN)
440
+ if len(bd.precision_recall_curve.recall) > 0:
441
+ pr_th_index = self.get_threshold_index(
442
+ list(bd.precision_recall_curve.thresholds), selected_value=confidence_threshold)
443
+ r = round(bd.precision_recall_curve.recall[pr_th_index], 3)
444
+ else:
445
+ r = 0
446
+ tp = int(round(r * total, 0))
447
+ fn = total - tp
448
+ fp = float(tp) / p - tp if p else 0
449
+ fp = int(round(fp, 1))
450
+ f1 = self._f1(r, p)
451
+ _d = {
452
+ "Concept": concept_id,
453
+ "Average Precision": round(float(bd.avg_precision), 3),
454
+ "Total Labeled": total,
455
+ "True Positives": tp,
456
+ "False Positives": fp,
457
+ "False Negatives": fn,
458
+ "Recall": r,
459
+ "Precision": p,
460
+ "F1": f1,
461
+ }
462
+ metrics.append(pd.DataFrame(_d, index=[0]))
463
+
464
+ if not metrics:
465
+ return None
466
+
467
+ df = pd.concat(metrics, axis=0)
468
+ df_total = defaultdict()
469
+ sum_df_total = df["Total Labeled"].sum()
470
+ df_total["Concept"] = "Total"
471
+ df_total["Average Precision"] = df["Average Precision"].mean()
472
+ df_total["Total Labeled"] = sum_df_total
473
+ df_total["True Positives"] = df["True Positives"].sum()
474
+ df_total["False Positives"] = df["False Positives"].sum()
475
+ df_total["False Negatives"] = df["False Negatives"].sum()
476
+ df_total["Recall"] = sum(
477
+ df.Recall * df["Total Labeled"]) / sum_df_total if sum_df_total else 0.
478
+ df_total["Precision"] = df_total["True Positives"] / (
479
+ df_total["True Positives"] + df_total["False Positives"]) if sum_df_total else 0.
480
+ df_total["F1"] = self._f1(df_total["Recall"], df_total["Precision"])
481
+ df_total = pd.DataFrame(df_total, index=[0])
482
+
483
+ return [df, df_total]
484
+
485
+ def pr_curve(self,
486
+ index=0,
487
+ iou_threshold: float = 0.5,
488
+ area: str = "all",
489
+ bypass_const=False,
490
+ **kwargs):
491
+
492
+ if not bypass_const:
493
+ assert iou_threshold in self.IOU_LIST, f"Expected iou_threshold in {self.IOU_LIST}, got {iou_threshold}"
494
+ assert area in self.AREA_LIST, f"Expected area in {self.AREA_LIST}, got {area}"
495
+
496
+ eval_data = self.get_eval_data(metric_name='metrics_by_class', index=index)
497
+ _valid_eval_data = []
498
+ for bd in eval_data:
499
+ _iou = round(bd.iou, 1)
500
+ if not (area and bd.area_name == area) or not (iou_threshold and iou_threshold == _iou):
501
+ continue
502
+ _valid_eval_data.append(bd)
503
+
504
+ outputs = self._process_curve(
505
+ _valid_eval_data, metric_name='precision_recall_curve', x='recall', y='precision')
506
+ return outputs
507
+
508
+ def roc_curve(self, index=0, **kwargs) -> None:
509
+ return None
510
+
511
+ def confusion_matrix(self, index=0, **kwargs) -> None:
512
+ return None
513
+
514
+
515
+ def make_handler_by_type(model_type: str) -> _BaseEvalResultHandler:
516
+ _eval_type = get_eval_type(model_type)
517
+ if _eval_type == EvalType.CLASSIFICATION:
518
+ return ClassificationResultHandler
519
+ elif _eval_type == EvalType.DETECTION:
520
+ return DetectionResultHandler
521
+ else:
522
+ return PlaceholderHandler
clarifai/utils/logging.py CHANGED
@@ -99,3 +99,10 @@ def get_logger(logger_level: Union[int, str] = logging.NOTSET,
99
99
 
100
100
  _configure_logger(name, logger_level)
101
101
  return logging.getLogger(name)
102
+
103
+
104
+ def add_file_handler(logger: logging.Logger, file_path: str, log_level: str = 'WARNING') -> None:
105
+ """Add a file handler to the logger."""
106
+ file_handler = logging.FileHandler(file_path)
107
+ file_handler.setLevel(log_level)
108
+ logger.addHandler(file_handler)
@@ -85,7 +85,7 @@ def response_to_model_params(response: MultiModelTypeResponse,
85
85
  return params
86
86
 
87
87
 
88
- def params_parser(params_dict: dict) -> Dict[str, Any]:
88
+ def params_parser(params_dict: dict, concepts: List = None) -> Dict[str, Any]:
89
89
  """Converts the params dictionary to a dictionary of model specific params for the given model"""
90
90
  #dict parser
91
91
  train_dict = {}
@@ -112,6 +112,8 @@ def params_parser(params_dict: dict) -> Dict[str, Any]:
112
112
  train_dict['train_info'] = resources_pb2.TrainInfo(**train_dict['train_info'])
113
113
 
114
114
  if 'concepts' in params_dict.keys():
115
+ assert set(params_dict["concepts"]).issubset(
116
+ concepts), "Invalid concept IDs. Available concepts in the app are {}".format(concepts)
115
117
  train_dict["output_info"]['data'] = resources_pb2.Data(
116
118
  concepts=[resources_pb2.Concept(id=concept_id) for concept_id in params_dict["concepts"]])
117
119
  if 'inference_params' in params_dict.keys():
clarifai/versions.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import os
2
2
 
3
- CLIENT_VERSION = "10.0.1"
3
+ CLIENT_VERSION = "10.1.1"
4
4
  OS_VER = os.sys.platform
5
5
  PYTHON_VERSION = '.'.join(
6
6
  map(str, [os.sys.version_info.major, os.sys.version_info.minor, os.sys.version_info.micro]))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: clarifai
3
- Version: 10.0.1
3
+ Version: 10.1.1
4
4
  Summary: Clarifai Python SDK
5
5
  Home-page: https://github.com/Clarifai/clarifai-python
6
6
  Author: Clarifai
@@ -20,18 +20,15 @@ Classifier: Operating System :: OS Independent
20
20
  Requires-Python: >=3.8
21
21
  Description-Content-Type: text/markdown
22
22
  License-File: LICENSE
23
- Requires-Dist: clarifai-grpc (~=10.0.1)
24
- Requires-Dist: pandas (>=1.3.5)
23
+ Requires-Dist: clarifai-grpc (~=10.1.6)
25
24
  Requires-Dist: numpy (>=1.22.0)
26
25
  Requires-Dist: tqdm (>=4.65.0)
27
- Requires-Dist: opencv-python (>=4.7.0.68)
28
26
  Requires-Dist: tritonclient (>=2.34.0)
29
27
  Requires-Dist: rich (>=13.4.2)
30
28
  Requires-Dist: PyYAML (>=6.0.1)
31
29
  Requires-Dist: schema (>=0.7.5)
32
30
  Requires-Dist: Pillow (>=9.5.0)
33
- Requires-Dist: llama-index (>=0.9.27)
34
- Requires-Dist: pypdf (>=3.17.4)
31
+ Requires-Dist: inquirerpy (==0.3.4)
35
32
  Provides-Extra: all
36
33
  Requires-Dist: pycocotools (==2.0.6) ; extra == 'all'
37
34
 
@@ -59,7 +56,7 @@ This is the official Python client for interacting with our powerful [API](https
59
56
 
60
57
  [Website](https://www.clarifai.com/) | [Schedule Demo](https://www.clarifai.com/company/schedule-demo) | [Signup for a Free Account](https://clarifai.com/signup) | [API Docs](https://docs.clarifai.com/) | [Clarifai Community](https://clarifai.com/explore) | [Python SDK Docs](https://docs.clarifai.com/python-sdk/api-reference) | [Examples](https://github.com/Clarifai/examples) | [Colab Notebooks](https://github.com/Clarifai/colab-notebooks) | [Discord](https://discord.gg/XAPE3Vtg)
61
58
 
62
-
59
+ Give the repo a star ⭐
63
60
  ---
64
61
 
65
62
 
@@ -85,6 +82,7 @@ This is the official Python client for interacting with our powerful [API](https
85
82
  * [Smart Image Search](#smart-image-search)
86
83
  * [Smart Text Search](#smart-text-search)
87
84
  * [Filters](#filters)
85
+ * **[Retrieval Augmented Generation (RAG)](#retrieval-augmented-generation-rag)**
88
86
  * **[More Examples](#pushpin-more-examples)**
89
87
 
90
88
 
@@ -107,9 +105,10 @@ Install from Source:
107
105
  ```bash
108
106
  git clone https://github.com/Clarifai/clarifai-python.git
109
107
  cd clarifai-python
110
- python3 -m venv env
111
- source env/bin/activate
112
- pip3 install -r requirements.txt
108
+ python3 -m venv .venv
109
+ source .venv/bin/activate
110
+ pip install -r requirements.txt
111
+ python setup.py install
113
112
  ```
114
113
 
115
114
 
@@ -296,6 +295,35 @@ status = model.training_status(version_id=model_version_id,training_logs=True)
296
295
  print(status)
297
296
  ```
298
297
 
298
+ #### Evaluate your trained model
299
+
300
+ When your model is trained and ready, you can evaluate by the following code
301
+
302
+ ```python
303
+ from clarifai.client.model import Model
304
+
305
+ model = Model('url')
306
+ model.evaluate(dataset_id='your-dataset-id')
307
+ ```
308
+
309
+ Compare the evaluation results of your models.
310
+
311
+ ```python
312
+ from clarifai.client.model import Model
313
+ from clarifai.client.dataset import Dataset
314
+ from clarifai.utils.evaluation import EvalResultCompare
315
+
316
+ models = ['model url1', 'model url2'] # or [Model(url1), Model(url2)]
317
+ dataset = 'dataset url' # or Dataset(dataset_url)
318
+
319
+ compare = EvalResultCompare(
320
+ models=models,
321
+ datasets=dataset,
322
+ attempt_evaluate=True # attempt evaluate when the model is not evaluated with the dataset
323
+ )
324
+ compare.all('output/folder/')
325
+ ```
326
+
299
327
  #### Models Listing
300
328
  ```python
301
329
  # Note: CLARIFAI_PAT must be set as env variable.
@@ -430,6 +458,26 @@ Input filters allows to filter by input_type, status of inputs and by inputs_dat
430
458
  results = search.query(filters=[{'input_types': ['image', 'text']}])
431
459
  ```
432
460
 
461
+ ## Retrieval Augmented Generation (RAG)
462
+
463
+ You can setup and start your RAG pipeline in 4 lines of code. The setup method automatically creates a new app and the necessary components under the hood. By default it uses the [mistral-7B-Instruct](https://clarifai.com/mistralai/completion/models/mistral-7B-Instruct) model.
464
+
465
+ ```python
466
+ from clarifai.rag import RAG
467
+
468
+ rag_agent = RAG.setup(user_id="USER_ID")
469
+ rag_agent.upload(folder_path="~/docs")
470
+ rag_agent.chat(messages=[{"role":"human", "content":"What is Clarifai"}])
471
+ ```
472
+
473
+ If you have previously run the setup method, you can instantiate the RAG class with the prompter workflow URL:
474
+
475
+ ```python
476
+ from clarifai.rag import RAG
477
+
478
+ rag_agent = RAG(workflow_url="WORKFLOW_URL")
479
+ ```
480
+
433
481
  ## :pushpin: More Examples
434
482
 
435
483
  See many more code examples in this [repo](https://github.com/Clarifai/examples).