clarifai 10.1.0__py3-none-any.whl → 10.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,522 @@
1
+ import time
2
+ from collections import OrderedDict, defaultdict
3
+ from dataclasses import dataclass, field
4
+ from enum import Enum
5
+ from typing import Dict, List, Tuple, Union
6
+
7
+ import numpy as np
8
+ from clarifai_grpc.grpc.api import resources_pb2
9
+ from clarifai_grpc.grpc.api.status import status_code_pb2
10
+ from google.protobuf.json_format import MessageToDict
11
+
12
+ from clarifai.client.dataset import Dataset
13
+ from clarifai.client.model import Model
14
+
15
+ try:
16
+ import pandas as pd
17
+ except ImportError:
18
+ raise ImportError("Can not import pandas. Please run `pip install pandas` to install it")
19
+
20
+ try:
21
+ from loguru import logger
22
+ except ImportError:
23
+ from ..logging import get_logger
24
+ logger = get_logger(logger_level="INFO", name=__name__)
25
+
26
+ MACRO_AVG = "macro_avg"
27
+
28
+
29
+ class EvalType(Enum):
30
+ UNDEFINED = 0
31
+ CLASSIFICATION = 1
32
+ DETECTION = 2
33
+ CLUSTERING = 3
34
+ SEGMENTATION = 4
35
+ TRACKER = 5
36
+
37
+
38
+ def get_eval_type(model_type):
39
+ if "classifier" in model_type:
40
+ return EvalType.CLASSIFICATION
41
+ elif "visual-detector" in model_type:
42
+ return EvalType.DETECTION
43
+ elif "segmenter" in model_type:
44
+ return EvalType.SEGMENTATION
45
+ elif "embedder" in model_type:
46
+ return EvalType.CLUSTERING
47
+ elif "tracker" in model_type:
48
+ return EvalType.TRACKER
49
+ else:
50
+ return EvalType.UNDEFINED
51
+
52
+
53
+ def to_file_name(x) -> str:
54
+ return x.replace('/', '--')
55
+
56
+
57
+ @dataclass
58
+ class _BaseEvalResultHandler:
59
+ model: Model
60
+ eval_data: List[resources_pb2.EvalMetrics] = field(default_factory=list)
61
+
62
+ def evaluate_and_wait(self, dataset: Dataset):
63
+ from tqdm import tqdm
64
+ dataset_id = dataset.id
65
+ dataset_app_id = dataset.app_id
66
+ dataset_user_id = dataset.user_id
67
+ _ = self.model.evaluate(
68
+ dataset_id=dataset_id, dataset_app_id=dataset_app_id, dataset_user_id=dataset_user_id)
69
+ latest_eval = self.model.list_evaluations()[0]
70
+ excepted = 10
71
+ desc = f"Please wait for the evaluation process between model {self.get_model_name()} and dataset {dataset_user_id}/{dataset_app_id}/{dataset_id} to complete."
72
+ bar = tqdm(total=excepted, desc=desc, leave=False, ncols=0)
73
+ while latest_eval.status.code in [
74
+ status_code_pb2.MODEL_EVALUATING, status_code_pb2.MODEL_QUEUED_FOR_EVALUATION
75
+ ]:
76
+ latest_eval = self.model.list_evaluations()[0]
77
+ time.sleep(1)
78
+ bar.update(1)
79
+
80
+ if latest_eval.status.code == status_code_pb2.MODEL_EVALUATED:
81
+ return latest_eval
82
+ else:
83
+ raise Exception(
84
+ f"Model has failed to evaluate \n {latest_eval.status}.\nPlease check your dataset inputs!"
85
+ )
86
+
87
+ def find_eval_id(self, datasets: List[Dataset] = [], attempt_evaluate: bool = False):
88
+ list_eval_outputs = self.model.list_evaluations()
89
+ self.eval_data = []
90
+ for dataset in datasets:
91
+ dataset.app_id = dataset.app_id or self.model.auth_helper.app_id
92
+ dataset.user_id = dataset.user_id or self.model.auth_helper.user_id
93
+ dataset_assert_msg = dataset.dataset_info
94
+ # checking if dataset exists
95
+ out = dataset.list_versions()
96
+ try:
97
+ next(iter(out))
98
+ except Exception as e:
99
+ if any(["CONN_DOES_NOT_EXIST" in _e for _e in e.args]):
100
+ raise Exception(
101
+ f"Dataset {dataset_assert_msg} does not exists. Please check datasets args")
102
+ else:
103
+ # caused by sdk failure
104
+ pass
105
+ # checking if model is evaluated with this dataset
106
+ _is_found = False
107
+ for each in list_eval_outputs:
108
+ if each.status.code == status_code_pb2.MODEL_EVALUATED:
109
+ eval_dataset = each.ground_truth_dataset
110
+ # if version_id is empty -> get latest eval result of dataset,app,user id
111
+ if dataset.app_id == eval_dataset.app_id and dataset.id == eval_dataset.id and dataset.user_id == eval_dataset.user_id and (
112
+ not dataset.version.id or dataset.version.id == eval_dataset.version.id):
113
+ # append to eval_data
114
+ self.eval_data.append(each)
115
+ _is_found = True
116
+ break
117
+
118
+ # if not evaluated, but user wants to proceed it
119
+ if not _is_found:
120
+ if attempt_evaluate:
121
+ self.eval_data.append(self.evaluate_and_wait(dataset))
122
+ # otherwise raise error
123
+ else:
124
+ raise Exception(
125
+ f"Model {self.model.model_info.name} in app {self.model.model_info.app_id} is not evaluated yet with dataset {dataset_assert_msg}"
126
+ )
127
+
128
+ @staticmethod
129
+ def proto_to_dict(value):
130
+ return MessageToDict(value, preserving_proto_field_name=True)
131
+
132
+ @staticmethod
133
+ def _f1(x: float, y: float):
134
+ z = x + y
135
+ return 2 * x * y / z if z else 0.
136
+
137
+ def _get_eval(self, index=0, **kwargs):
138
+ logger.info(
139
+ f"Model {self.get_model_name(pretify=True)}: retrieving {kwargs} metrics of dataset: {self.get_dataset_name_by_index(index)}"
140
+ )
141
+ result = self.model.get_eval_by_id(eval_id=self.eval_data[index].id, **kwargs)
142
+ for k, v in kwargs.items():
143
+ if v:
144
+ getattr(self.eval_data[index], k).MergeFrom(getattr(result, k))
145
+
146
+ def get_eval_data(self, metric_name: str, index=0):
147
+ if metric_name == 'binary_metrics':
148
+ if len(self.eval_data[index].binary_metrics) == 0:
149
+ self._get_eval(index, binary_metrics=True)
150
+ elif metric_name == 'label_counts':
151
+ if self.proto_to_dict(self.eval_data[index].label_counts) == {}:
152
+ self._get_eval(index, label_counts=True)
153
+ elif metric_name == 'confusion_matrix':
154
+ if self.eval_data[index].confusion_matrix.ByteSize() == 0:
155
+ self._get_eval(index, confusion_matrix=True)
156
+ elif metric_name == 'metrics_by_class':
157
+ if len(self.eval_data[index].metrics_by_class) == 0:
158
+ self._get_eval(index, metrics_by_class=True)
159
+ elif metric_name == 'metrics_by_area':
160
+ if len(self.eval_data[index].metrics_by_area) == 0:
161
+ self._get_eval(index, metrics_by_area=True)
162
+
163
+ return getattr(self.eval_data[index], metric_name)
164
+
165
+ def get_threshold_index(self, threshold_list: list, selected_value: float = 0.5) -> int:
166
+ assert 0 <= selected_value <= 1 and isinstance(selected_value, float)
167
+ threshold_list = [round(each, 2) for each in threshold_list]
168
+
169
+ def parse_precision(x):
170
+ return len(str(x).split(".")[1])
171
+
172
+ precision = parse_precision(selected_value)
173
+ if precision > 2:
174
+ selected_value = round(selected_value, 2)
175
+ logger.warning("Round the selected value to .2 decimals")
176
+ return threshold_list.index(selected_value)
177
+
178
+ def get_dataset_name_by_index(self, index=0, pretify=True):
179
+ out = self.eval_data[index].ground_truth_dataset
180
+ if pretify:
181
+ app_id = out.app_id
182
+ dataset = out.id
183
+ #out = f"{app_id}/{dataset}/{ver[:5]}" if ver else f"{app_id}/{dataset}"
184
+ if self.model.model_info.app_id == app_id:
185
+ out = dataset
186
+ else:
187
+ out = f"{app_id}/{dataset}"
188
+
189
+ return out
190
+
191
+ def get_model_name(self, pretify=True):
192
+ model = self.model.model_info
193
+ if pretify:
194
+ app_id = model.app_id
195
+ name = model.id
196
+ ver = model.model_version.id
197
+ model = f"{app_id}/{name}/{ver[:5]}" if ver else f"{app_id}/{name}"
198
+
199
+ return model
200
+
201
+ def _process_curve(self, data: resources_pb2.BinaryMetrics, metric_name: str, x: str,
202
+ y: str) -> Dict[str, Dict[str, np.array]]:
203
+ """ Postprocess curve
204
+ """
205
+ x_arr = []
206
+ y_arr = []
207
+ threshold = []
208
+ outputs = []
209
+
210
+ def _make_df(xcol, ycol, concept_col, th_col):
211
+ return pd.DataFrame({x: xcol, y: ycol, 'concept': concept_col, 'threshold': th_col})
212
+
213
+ for bd in data:
214
+ concept_id = bd.concept.id
215
+ metric = eval(f'bd.{metric_name}')
216
+ if metric.ByteSize() == 0:
217
+ continue
218
+ _x = np.array(eval(f'metric.{x}'))
219
+ _y = np.array(eval(f'metric.{y}'))
220
+ threshold = np.array(metric.thresholds)
221
+ x_arr.append(_x)
222
+ y_arr.append(_y)
223
+ concept_cols = [concept_id for _ in range(len(_x))]
224
+ outputs.append(_make_df(_x, _y, concept_cols, threshold))
225
+
226
+ avg_x = np.mean(x_arr, axis=0)
227
+ avg_y = np.mean(y_arr, axis=0)
228
+ if np.isnan(avg_x).all():
229
+ return None
230
+ else:
231
+ avg_cols = [MACRO_AVG for _ in range(len(avg_x))]
232
+ outputs.append(_make_df(avg_x, avg_y, avg_cols, threshold))
233
+
234
+ return pd.concat(outputs, axis=0)
235
+
236
+ def parse_concept_ids(self, *args, **kwargs) -> List[str]:
237
+ raise NotImplementedError
238
+
239
+ def detailed_summary(self, *args, **kwargs):
240
+ raise NotImplementedError
241
+
242
+ def pr_curve(self, *args, **kwargs):
243
+ raise NotImplementedError
244
+
245
+ def roc_curve(self, *args, **kwargs):
246
+ raise NotImplementedError
247
+
248
+ def confusion_matrix(self, *args, **kwargs):
249
+ raise NotImplementedError
250
+
251
+
252
+ @dataclass
253
+ class PlaceholderHandler(_BaseEvalResultHandler):
254
+
255
+ def parse_concept_ids(self, *args, **kwargs) -> List[str]:
256
+ return None
257
+
258
+ def detailed_summary(self, *args, **kwargs):
259
+ return None
260
+
261
+ def pr_curve(self, *args, **kwargs):
262
+ return None
263
+
264
+
265
+ @dataclass
266
+ class ClassificationResultHandler(_BaseEvalResultHandler):
267
+
268
+ def parse_concept_ids(self, index=0) -> List[str]:
269
+ eval_data = self.get_eval_data(metric_name='label_counts', index=index)
270
+ concept_ids = [temp.concept.id for temp in eval_data.positive_label_counts]
271
+ return concept_ids
272
+
273
+ def detailed_summary(self, index=0, confidence_threshold: float = 0.5,
274
+ **kwargs) -> Union[None, Tuple[pd.DataFrame, pd.DataFrame]]:
275
+ """Making detailed table per concept and for total concept
276
+
277
+ Args:
278
+ index (int, optional): Index of eval dataset. Defaults to 0.
279
+ confidence_threshold (float, optional): confidence threshold. Defaults to 0.5.
280
+
281
+ Returns:
282
+ tuple: concepts dataframe, total dataframe
283
+ """
284
+ eval_data = self.get_eval_data('binary_metrics', index=index)
285
+ summary = self.get_eval_data('summary', index=index)
286
+
287
+ total_labeled = 0
288
+ total_predicted = 0
289
+ total_tp = 0
290
+ total_fn = 0
291
+ total_fp = 0
292
+ metrics = []
293
+
294
+ for bd in eval_data:
295
+ concept_id = bd.concept.id
296
+ if bd.precision_recall_curve.ByteSize() == 0:
297
+ continue
298
+ pr_th_index = self.get_threshold_index(
299
+ list(bd.precision_recall_curve.thresholds), selected_value=confidence_threshold)
300
+ roc_th_index = self.get_threshold_index(
301
+ list(bd.roc_curve.thresholds), selected_value=confidence_threshold)
302
+ if pr_th_index is None or roc_th_index is None:
303
+ continue
304
+ num_pos_labeled = bd.num_pos
305
+ num_neg_labeled = bd.num_neg
306
+ # TP/(TP+FP)
307
+ precision = bd.precision_recall_curve.precision[pr_th_index]
308
+ # TP/(TP+FN)
309
+ recall = bd.precision_recall_curve.recall[pr_th_index]
310
+ # FP/(FP+TN)
311
+ fpr = bd.roc_curve.fpr[roc_th_index]
312
+ # TP/(TP+FN)
313
+ tpr = bd.roc_curve.tpr[roc_th_index]
314
+ # TP+FN
315
+ tp = int(tpr * num_pos_labeled)
316
+ fn = num_pos_labeled - tp
317
+ fp = int(fpr * num_neg_labeled)
318
+ num_pos_pred = tp + fp
319
+ f1 = self._f1(recall, precision)
320
+
321
+ total_labeled += num_pos_labeled
322
+ total_predicted += num_pos_pred
323
+ total_fn += fn
324
+ total_tp += tp
325
+ total_fp += fp
326
+ # roc auc, total labelled, predicted, tp, fn, fp, recall, precision, f1
327
+ _d = OrderedDict({
328
+ "Concept": concept_id,
329
+ "Accuracy (ROC AUC)": round(bd.roc_auc, 3),
330
+ "Total Labeled": num_pos_labeled,
331
+ "Total Predicted": num_pos_pred,
332
+ "True Positives": tp,
333
+ "False Negatives": fn,
334
+ "False Positives": fp,
335
+ "Recall": recall,
336
+ "Precision": precision,
337
+ "F1": f1
338
+ })
339
+ metrics.append(pd.DataFrame(_d, index=[0]))
340
+
341
+ # If no valid data is found, return None
342
+ if not metrics:
343
+ return None
344
+ # Make per concept df
345
+ df = pd.concat(metrics, axis=0)
346
+ # Make total df
347
+ sum_df_total = sum(df["Total Labeled"])
348
+ precision = sum(df.Precision * df["Total Labeled"]) / sum_df_total if sum_df_total else 0.
349
+ recall = sum(df.Recall * df["Total Labeled"]) / sum_df_total if sum_df_total else 0.
350
+ f1 = self._f1(recall, precision)
351
+ df_total = pd.DataFrame(
352
+ [
353
+ [
354
+ 'Total', summary.macro_avg_roc_auc, total_labeled, total_predicted, total_tp,
355
+ total_fn, total_fp, recall, precision, f1
356
+ ],
357
+ ],
358
+ columns=df.columns,
359
+ index=[0])
360
+
361
+ return df, df_total
362
+
363
+ def pr_curve(self, index=0, **kwargs) -> Union[None, pd.DataFrame]:
364
+ """Making PR curve
365
+
366
+ Args:
367
+ index (int, optional): Index of eval dataset. Defaults to 0.
368
+
369
+ Returns:
370
+ dictionary: Keys are concept ids and 'macro_avg'. Values are dictionaries of {precision: np.array, recall: np.array}
371
+ """
372
+ eval_data = self.get_eval_data(metric_name='binary_metrics', index=index)
373
+ outputs = self._process_curve(
374
+ eval_data, metric_name='precision_recall_curve', x='recall', y='precision')
375
+ return outputs
376
+
377
+ def roc_curve(self, index=0, **kwargs) -> Union[None, pd.DataFrame]:
378
+ eval_data = self.get_eval_data(metric_name='binary_metrics', index=index)
379
+ outputs = self._process_curve(eval_data, metric_name='roc_curve', x='tpr', y='fpr')
380
+ return outputs
381
+
382
+ def confusion_matrix(self, index=0, **kwargs):
383
+ eval_data = self.get_eval_data(metric_name='confusion_matrix', index=index)
384
+ concept_ids = self.parse_concept_ids(index)
385
+ concept_ids.sort()
386
+ data = np.zeros((len(concept_ids), len(concept_ids)), np.float32)
387
+ for entry in eval_data.matrix:
388
+ p = entry.predicted_concept.id
389
+ a = entry.actual_concept.id
390
+ if p in concept_ids and a in concept_ids:
391
+ data[concept_ids.index(a), concept_ids.index(p)] = np.around(entry.value, decimals=3)
392
+ else:
393
+ continue
394
+ rownames = pd.MultiIndex.from_arrays([concept_ids], names=['Actual'])
395
+ colnames = pd.MultiIndex.from_arrays([concept_ids], names=['Predicted'])
396
+ df = pd.DataFrame(data, columns=colnames, index=rownames)
397
+
398
+ return df
399
+
400
+
401
+ @dataclass
402
+ class DetectionResultHandler(_BaseEvalResultHandler):
403
+ AREA_LIST = ["all", "medium", "small"]
404
+ IOU_LIST = list(np.arange(0.5, 1., 0.1))
405
+
406
+ def parse_concept_ids(self, index=0) -> List[str]:
407
+ eval_data = self.get_eval_data(metric_name='metrics_by_class', index=index)
408
+ concept_ids = [temp.concept.id for temp in eval_data]
409
+ return concept_ids
410
+
411
+ def detailed_summary(self,
412
+ index=0,
413
+ confidence_threshold: float = 0.5,
414
+ iou_threshold: float = 0.5,
415
+ area: str = "all",
416
+ bypass_const: bool = False,
417
+ **kwargs):
418
+ if not bypass_const:
419
+ assert iou_threshold in self.IOU_LIST, f"Expected iou_threshold in {self.IOU_LIST}, got {iou_threshold}"
420
+ assert area in self.AREA_LIST, f"Expected area in {self.AREA_LIST}, got {area}"
421
+
422
+ eval_data = self.get_eval_data('metrics_by_class', index=index)
423
+ #summary = self.get_eval_data('summary', index=index)
424
+ metrics = []
425
+ for bd in eval_data:
426
+ # total label
427
+ _iou = round(bd.iou, 1)
428
+ if not (area and bd.area_name == area) or not (iou_threshold and iou_threshold == _iou):
429
+ continue
430
+ concept_id = bd.concept.id
431
+ total = round(bd.num_tot, 3)
432
+ # TP / (TP + FP)
433
+ if len(bd.precision_recall_curve.precision) > 0:
434
+ pr_th_index = self.get_threshold_index(
435
+ list(bd.precision_recall_curve.thresholds), selected_value=confidence_threshold)
436
+ p = round(bd.precision_recall_curve.precision[pr_th_index], 3)
437
+ else:
438
+ p = 0
439
+ # TP / (TP + FN)
440
+ if len(bd.precision_recall_curve.recall) > 0:
441
+ pr_th_index = self.get_threshold_index(
442
+ list(bd.precision_recall_curve.thresholds), selected_value=confidence_threshold)
443
+ r = round(bd.precision_recall_curve.recall[pr_th_index], 3)
444
+ else:
445
+ r = 0
446
+ tp = int(round(r * total, 0))
447
+ fn = total - tp
448
+ fp = float(tp) / p - tp if p else 0
449
+ fp = int(round(fp, 1))
450
+ f1 = self._f1(r, p)
451
+ _d = {
452
+ "Concept": concept_id,
453
+ "Average Precision": round(float(bd.avg_precision), 3),
454
+ "Total Labeled": total,
455
+ "True Positives": tp,
456
+ "False Positives": fp,
457
+ "False Negatives": fn,
458
+ "Recall": r,
459
+ "Precision": p,
460
+ "F1": f1,
461
+ }
462
+ metrics.append(pd.DataFrame(_d, index=[0]))
463
+
464
+ if not metrics:
465
+ return None
466
+
467
+ df = pd.concat(metrics, axis=0)
468
+ df_total = defaultdict()
469
+ sum_df_total = df["Total Labeled"].sum()
470
+ df_total["Concept"] = "Total"
471
+ df_total["Average Precision"] = df["Average Precision"].mean()
472
+ df_total["Total Labeled"] = sum_df_total
473
+ df_total["True Positives"] = df["True Positives"].sum()
474
+ df_total["False Positives"] = df["False Positives"].sum()
475
+ df_total["False Negatives"] = df["False Negatives"].sum()
476
+ df_total["Recall"] = sum(
477
+ df.Recall * df["Total Labeled"]) / sum_df_total if sum_df_total else 0.
478
+ df_total["Precision"] = df_total["True Positives"] / (
479
+ df_total["True Positives"] + df_total["False Positives"]) if sum_df_total else 0.
480
+ df_total["F1"] = self._f1(df_total["Recall"], df_total["Precision"])
481
+ df_total = pd.DataFrame(df_total, index=[0])
482
+
483
+ return [df, df_total]
484
+
485
+ def pr_curve(self,
486
+ index=0,
487
+ iou_threshold: float = 0.5,
488
+ area: str = "all",
489
+ bypass_const=False,
490
+ **kwargs):
491
+
492
+ if not bypass_const:
493
+ assert iou_threshold in self.IOU_LIST, f"Expected iou_threshold in {self.IOU_LIST}, got {iou_threshold}"
494
+ assert area in self.AREA_LIST, f"Expected area in {self.AREA_LIST}, got {area}"
495
+
496
+ eval_data = self.get_eval_data(metric_name='metrics_by_class', index=index)
497
+ _valid_eval_data = []
498
+ for bd in eval_data:
499
+ _iou = round(bd.iou, 1)
500
+ if not (area and bd.area_name == area) or not (iou_threshold and iou_threshold == _iou):
501
+ continue
502
+ _valid_eval_data.append(bd)
503
+
504
+ outputs = self._process_curve(
505
+ _valid_eval_data, metric_name='precision_recall_curve', x='recall', y='precision')
506
+ return outputs
507
+
508
+ def roc_curve(self, index=0, **kwargs) -> None:
509
+ return None
510
+
511
+ def confusion_matrix(self, index=0, **kwargs) -> None:
512
+ return None
513
+
514
+
515
+ def make_handler_by_type(model_type: str) -> _BaseEvalResultHandler:
516
+ _eval_type = get_eval_type(model_type)
517
+ if _eval_type == EvalType.CLASSIFICATION:
518
+ return ClassificationResultHandler
519
+ elif _eval_type == EvalType.DETECTION:
520
+ return DetectionResultHandler
521
+ else:
522
+ return PlaceholderHandler
clarifai/utils/logging.py CHANGED
@@ -106,3 +106,33 @@ def add_file_handler(logger: logging.Logger, file_path: str, log_level: str = 'W
106
106
  file_handler = logging.FileHandler(file_path)
107
107
  file_handler.setLevel(log_level)
108
108
  logger.addHandler(file_handler)
109
+
110
+
111
+ def process_log_files(log_file_path: str,) -> tuple:
112
+ """Processes log files to get failed inputs and annotations.
113
+
114
+ Args:
115
+ log_file_path (str): path to the log file
116
+ """
117
+ import re
118
+ duplicate_input_ids = []
119
+ failed_input_ids = []
120
+ pattern = re.compile(r'\| +(\d+) +\| +(\S+) +\| +(.+?) +\| +(.+?) +\| +(.+?) +\| +(.+?) \|')
121
+ try:
122
+ with open(log_file_path, 'r') as file:
123
+ log_content = file.read()
124
+ matches = pattern.findall(log_content)
125
+ for match in matches:
126
+ index = int(match[0])
127
+ input_id = match[1]
128
+ status = match[2]
129
+ if status == "Input has a duplicate ID.":
130
+ duplicate_input_ids.append({"Index": index, "Input_ID": input_id})
131
+ else:
132
+ failed_input_ids.append({"Index": index, "Input_ID": input_id})
133
+
134
+ except Exception as e:
135
+ print(f"Error Processing log file {log_file_path}:{e}")
136
+ return [], []
137
+
138
+ return duplicate_input_ids, failed_input_ids
@@ -85,7 +85,7 @@ def response_to_model_params(response: MultiModelTypeResponse,
85
85
  return params
86
86
 
87
87
 
88
- def params_parser(params_dict: dict) -> Dict[str, Any]:
88
+ def params_parser(params_dict: dict, concepts: List = None) -> Dict[str, Any]:
89
89
  """Converts the params dictionary to a dictionary of model specific params for the given model"""
90
90
  #dict parser
91
91
  train_dict = {}
@@ -112,6 +112,8 @@ def params_parser(params_dict: dict) -> Dict[str, Any]:
112
112
  train_dict['train_info'] = resources_pb2.TrainInfo(**train_dict['train_info'])
113
113
 
114
114
  if 'concepts' in params_dict.keys():
115
+ assert set(params_dict["concepts"]).issubset(
116
+ concepts), "Invalid concept IDs. Available concepts in the app are {}".format(concepts)
115
117
  train_dict["output_info"]['data'] = resources_pb2.Data(
116
118
  concepts=[resources_pb2.Concept(id=concept_id) for concept_id in params_dict["concepts"]])
117
119
  if 'inference_params' in params_dict.keys():
clarifai/versions.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import os
2
2
 
3
- CLIENT_VERSION = "10.1.0"
3
+ CLIENT_VERSION = "10.2.0"
4
4
  OS_VER = os.sys.platform
5
5
  PYTHON_VERSION = '.'.join(
6
6
  map(str, [os.sys.version_info.major, os.sys.version_info.minor, os.sys.version_info.micro]))
@@ -16,7 +16,7 @@ def _model_does_not_have_model_version_id_and_other_fields(m):
16
16
 
17
17
 
18
18
  def _model_has_other_fields(m):
19
- return any(k not in ['model_id', 'model_version_id'] for k in m.keys())
19
+ return any(k not in ['model_id', 'model_version_id', 'user_id', 'app_id'] for k in m.keys())
20
20
 
21
21
 
22
22
  def _workflow_nodes_have_valid_dependencies(nodes):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: clarifai
3
- Version: 10.1.0
3
+ Version: 10.2.0
4
4
  Summary: Clarifai Python SDK
5
5
  Home-page: https://github.com/Clarifai/clarifai-python
6
6
  Author: Clarifai
@@ -20,19 +20,16 @@ Classifier: Operating System :: OS Independent
20
20
  Requires-Python: >=3.8
21
21
  Description-Content-Type: text/markdown
22
22
  License-File: LICENSE
23
- Requires-Dist: clarifai-grpc (~=10.0.1)
24
- Requires-Dist: pandas (>=1.3.5)
23
+ Requires-Dist: clarifai-grpc (~=10.2.1)
25
24
  Requires-Dist: numpy (>=1.22.0)
26
25
  Requires-Dist: tqdm (>=4.65.0)
27
- Requires-Dist: opencv-python (>=4.7.0.68)
28
26
  Requires-Dist: tritonclient (>=2.34.0)
29
27
  Requires-Dist: rich (>=13.4.2)
30
28
  Requires-Dist: PyYAML (>=6.0.1)
31
29
  Requires-Dist: schema (>=0.7.5)
32
30
  Requires-Dist: Pillow (>=9.5.0)
33
31
  Requires-Dist: inquirerpy (==0.3.4)
34
- Requires-Dist: llama-index-core (>=0.10.1)
35
- Requires-Dist: pypdf (>=3.17.4)
32
+ Requires-Dist: tabulate (>=0.9.0)
36
33
  Provides-Extra: all
37
34
  Requires-Dist: pycocotools (==2.0.6) ; extra == 'all'
38
35
 
@@ -60,7 +57,7 @@ This is the official Python client for interacting with our powerful [API](https
60
57
 
61
58
  [Website](https://www.clarifai.com/) | [Schedule Demo](https://www.clarifai.com/company/schedule-demo) | [Signup for a Free Account](https://clarifai.com/signup) | [API Docs](https://docs.clarifai.com/) | [Clarifai Community](https://clarifai.com/explore) | [Python SDK Docs](https://docs.clarifai.com/python-sdk/api-reference) | [Examples](https://github.com/Clarifai/examples) | [Colab Notebooks](https://github.com/Clarifai/colab-notebooks) | [Discord](https://discord.gg/XAPE3Vtg)
62
59
 
63
-
60
+ Give the repo a star ⭐
64
61
  ---
65
62
 
66
63
 
@@ -154,7 +151,7 @@ client = User(user_id="user_id", pat="your personal access token")
154
151
 
155
152
  ## :floppy_disk: Interacting with Datasets
156
153
 
157
- Clarifai datasets help in managing datasets used for model training and evaluation. It provides functionalities like creating datasets,uploading datasets and exporting datasets as .zip files.
154
+ Clarifai datasets help in managing datasets used for model training and evaluation. It provides functionalities like creating datasets,uploading datasets, retrying failed uploads from logs and exporting datasets as .zip files.
158
155
 
159
156
  ```python
160
157
  # Note: CLARIFAI_PAT must be set as env variable.
@@ -166,7 +163,18 @@ dataset = app.create_dataset(dataset_id="demo_dataset")
166
163
  # execute data upload to Clarifai app dataset
167
164
  from clarifai.datasets.upload.laoders.coco_detection import COCODetectionDataLoader
168
165
  coco_dataloader = COCODetectionDataLoader("images_dir", "coco_annotation_filepath")
169
- dataset.upload_dataset(dataloader=coco_dataloader, get_upload_status=True)
166
+ dataset.upload_dataset(dataloader=coco_dataloader, get_upload_status=True, log_warnings =True)
167
+
168
+
169
+ #Try upload and record the failed outputs in log file.
170
+ from clarifai.datasets.upload.utils import load_module_dataloader
171
+ cifar_dataloader = load_module_dataloader('./image_classification/cifar10')
172
+ dataset.upload_dataset(dataloader=cifar_dataloader, get_upload_status=True, log_warnings =True)
173
+
174
+ #Retry upload from logs for `upload_dataset`
175
+ dataset.retry_upload_from_logs(dataloader=cifar_dataloader, log_file_path='log_file.log',
176
+ retry_duplicates=False,
177
+ log_warnings=True)
170
178
 
171
179
  #upload text from csv
172
180
  dataset.upload_from_csv(csv_path='csv_path', input_type='text', csv_type='raw', labels=True)
@@ -299,6 +307,35 @@ status = model.training_status(version_id=model_version_id,training_logs=True)
299
307
  print(status)
300
308
  ```
301
309
 
310
+ #### Evaluate your trained model
311
+
312
+ When your model is trained and ready, you can evaluate by the following code
313
+
314
+ ```python
315
+ from clarifai.client.model import Model
316
+
317
+ model = Model('url')
318
+ model.evaluate(dataset_id='your-dataset-id')
319
+ ```
320
+
321
+ Compare the evaluation results of your models.
322
+
323
+ ```python
324
+ from clarifai.client.model import Model
325
+ from clarifai.client.dataset import Dataset
326
+ from clarifai.utils.evaluation import EvalResultCompare
327
+
328
+ models = ['model url1', 'model url2'] # or [Model(url1), Model(url2)]
329
+ dataset = 'dataset url' # or Dataset(dataset_url)
330
+
331
+ compare = EvalResultCompare(
332
+ models=models,
333
+ datasets=dataset,
334
+ attempt_evaluate=True # attempt evaluate when the model is not evaluated with the dataset
335
+ )
336
+ compare.all('output/folder/')
337
+ ```
338
+
302
339
  #### Models Listing
303
340
  ```python
304
341
  # Note: CLARIFAI_PAT must be set as env variable.