clarifai 10.1.0__py3-none-any.whl → 10.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clarifai/client/app.py +23 -43
- clarifai/client/base.py +44 -4
- clarifai/client/dataset.py +138 -52
- clarifai/client/input.py +37 -4
- clarifai/client/model.py +279 -8
- clarifai/client/module.py +7 -5
- clarifai/client/runner.py +3 -1
- clarifai/client/search.py +7 -3
- clarifai/client/user.py +14 -12
- clarifai/client/workflow.py +7 -4
- clarifai/constants/dataset.py +2 -0
- clarifai/datasets/upload/loaders/README.md +3 -4
- clarifai/datasets/upload/loaders/xview_detection.py +5 -5
- clarifai/models/model_serving/cli/_utils.py +1 -1
- clarifai/models/model_serving/cli/build.py +1 -1
- clarifai/models/model_serving/cli/upload.py +1 -1
- clarifai/models/model_serving/utils.py +3 -1
- clarifai/rag/rag.py +25 -11
- clarifai/rag/utils.py +21 -6
- clarifai/utils/evaluation/__init__.py +427 -0
- clarifai/utils/evaluation/helpers.py +522 -0
- clarifai/utils/logging.py +30 -0
- clarifai/utils/model_train.py +3 -1
- clarifai/versions.py +1 -1
- clarifai/workflows/validate.py +1 -1
- {clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/METADATA +46 -9
- {clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/RECORD +31 -30
- clarifai/datasets/upload/loaders/coco_segmentation.py +0 -98
- {clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/LICENSE +0 -0
- {clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/WHEEL +0 -0
- {clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/entry_points.txt +0 -0
- {clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,522 @@
|
|
1
|
+
import time
|
2
|
+
from collections import OrderedDict, defaultdict
|
3
|
+
from dataclasses import dataclass, field
|
4
|
+
from enum import Enum
|
5
|
+
from typing import Dict, List, Tuple, Union
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
from clarifai_grpc.grpc.api import resources_pb2
|
9
|
+
from clarifai_grpc.grpc.api.status import status_code_pb2
|
10
|
+
from google.protobuf.json_format import MessageToDict
|
11
|
+
|
12
|
+
from clarifai.client.dataset import Dataset
|
13
|
+
from clarifai.client.model import Model
|
14
|
+
|
15
|
+
try:
|
16
|
+
import pandas as pd
|
17
|
+
except ImportError:
|
18
|
+
raise ImportError("Can not import pandas. Please run `pip install pandas` to install it")
|
19
|
+
|
20
|
+
try:
|
21
|
+
from loguru import logger
|
22
|
+
except ImportError:
|
23
|
+
from ..logging import get_logger
|
24
|
+
logger = get_logger(logger_level="INFO", name=__name__)
|
25
|
+
|
26
|
+
MACRO_AVG = "macro_avg"
|
27
|
+
|
28
|
+
|
29
|
+
class EvalType(Enum):
|
30
|
+
UNDEFINED = 0
|
31
|
+
CLASSIFICATION = 1
|
32
|
+
DETECTION = 2
|
33
|
+
CLUSTERING = 3
|
34
|
+
SEGMENTATION = 4
|
35
|
+
TRACKER = 5
|
36
|
+
|
37
|
+
|
38
|
+
def get_eval_type(model_type):
|
39
|
+
if "classifier" in model_type:
|
40
|
+
return EvalType.CLASSIFICATION
|
41
|
+
elif "visual-detector" in model_type:
|
42
|
+
return EvalType.DETECTION
|
43
|
+
elif "segmenter" in model_type:
|
44
|
+
return EvalType.SEGMENTATION
|
45
|
+
elif "embedder" in model_type:
|
46
|
+
return EvalType.CLUSTERING
|
47
|
+
elif "tracker" in model_type:
|
48
|
+
return EvalType.TRACKER
|
49
|
+
else:
|
50
|
+
return EvalType.UNDEFINED
|
51
|
+
|
52
|
+
|
53
|
+
def to_file_name(x) -> str:
|
54
|
+
return x.replace('/', '--')
|
55
|
+
|
56
|
+
|
57
|
+
@dataclass
|
58
|
+
class _BaseEvalResultHandler:
|
59
|
+
model: Model
|
60
|
+
eval_data: List[resources_pb2.EvalMetrics] = field(default_factory=list)
|
61
|
+
|
62
|
+
def evaluate_and_wait(self, dataset: Dataset):
|
63
|
+
from tqdm import tqdm
|
64
|
+
dataset_id = dataset.id
|
65
|
+
dataset_app_id = dataset.app_id
|
66
|
+
dataset_user_id = dataset.user_id
|
67
|
+
_ = self.model.evaluate(
|
68
|
+
dataset_id=dataset_id, dataset_app_id=dataset_app_id, dataset_user_id=dataset_user_id)
|
69
|
+
latest_eval = self.model.list_evaluations()[0]
|
70
|
+
excepted = 10
|
71
|
+
desc = f"Please wait for the evaluation process between model {self.get_model_name()} and dataset {dataset_user_id}/{dataset_app_id}/{dataset_id} to complete."
|
72
|
+
bar = tqdm(total=excepted, desc=desc, leave=False, ncols=0)
|
73
|
+
while latest_eval.status.code in [
|
74
|
+
status_code_pb2.MODEL_EVALUATING, status_code_pb2.MODEL_QUEUED_FOR_EVALUATION
|
75
|
+
]:
|
76
|
+
latest_eval = self.model.list_evaluations()[0]
|
77
|
+
time.sleep(1)
|
78
|
+
bar.update(1)
|
79
|
+
|
80
|
+
if latest_eval.status.code == status_code_pb2.MODEL_EVALUATED:
|
81
|
+
return latest_eval
|
82
|
+
else:
|
83
|
+
raise Exception(
|
84
|
+
f"Model has failed to evaluate \n {latest_eval.status}.\nPlease check your dataset inputs!"
|
85
|
+
)
|
86
|
+
|
87
|
+
def find_eval_id(self, datasets: List[Dataset] = [], attempt_evaluate: bool = False):
|
88
|
+
list_eval_outputs = self.model.list_evaluations()
|
89
|
+
self.eval_data = []
|
90
|
+
for dataset in datasets:
|
91
|
+
dataset.app_id = dataset.app_id or self.model.auth_helper.app_id
|
92
|
+
dataset.user_id = dataset.user_id or self.model.auth_helper.user_id
|
93
|
+
dataset_assert_msg = dataset.dataset_info
|
94
|
+
# checking if dataset exists
|
95
|
+
out = dataset.list_versions()
|
96
|
+
try:
|
97
|
+
next(iter(out))
|
98
|
+
except Exception as e:
|
99
|
+
if any(["CONN_DOES_NOT_EXIST" in _e for _e in e.args]):
|
100
|
+
raise Exception(
|
101
|
+
f"Dataset {dataset_assert_msg} does not exists. Please check datasets args")
|
102
|
+
else:
|
103
|
+
# caused by sdk failure
|
104
|
+
pass
|
105
|
+
# checking if model is evaluated with this dataset
|
106
|
+
_is_found = False
|
107
|
+
for each in list_eval_outputs:
|
108
|
+
if each.status.code == status_code_pb2.MODEL_EVALUATED:
|
109
|
+
eval_dataset = each.ground_truth_dataset
|
110
|
+
# if version_id is empty -> get latest eval result of dataset,app,user id
|
111
|
+
if dataset.app_id == eval_dataset.app_id and dataset.id == eval_dataset.id and dataset.user_id == eval_dataset.user_id and (
|
112
|
+
not dataset.version.id or dataset.version.id == eval_dataset.version.id):
|
113
|
+
# append to eval_data
|
114
|
+
self.eval_data.append(each)
|
115
|
+
_is_found = True
|
116
|
+
break
|
117
|
+
|
118
|
+
# if not evaluated, but user wants to proceed it
|
119
|
+
if not _is_found:
|
120
|
+
if attempt_evaluate:
|
121
|
+
self.eval_data.append(self.evaluate_and_wait(dataset))
|
122
|
+
# otherwise raise error
|
123
|
+
else:
|
124
|
+
raise Exception(
|
125
|
+
f"Model {self.model.model_info.name} in app {self.model.model_info.app_id} is not evaluated yet with dataset {dataset_assert_msg}"
|
126
|
+
)
|
127
|
+
|
128
|
+
@staticmethod
|
129
|
+
def proto_to_dict(value):
|
130
|
+
return MessageToDict(value, preserving_proto_field_name=True)
|
131
|
+
|
132
|
+
@staticmethod
|
133
|
+
def _f1(x: float, y: float):
|
134
|
+
z = x + y
|
135
|
+
return 2 * x * y / z if z else 0.
|
136
|
+
|
137
|
+
def _get_eval(self, index=0, **kwargs):
|
138
|
+
logger.info(
|
139
|
+
f"Model {self.get_model_name(pretify=True)}: retrieving {kwargs} metrics of dataset: {self.get_dataset_name_by_index(index)}"
|
140
|
+
)
|
141
|
+
result = self.model.get_eval_by_id(eval_id=self.eval_data[index].id, **kwargs)
|
142
|
+
for k, v in kwargs.items():
|
143
|
+
if v:
|
144
|
+
getattr(self.eval_data[index], k).MergeFrom(getattr(result, k))
|
145
|
+
|
146
|
+
def get_eval_data(self, metric_name: str, index=0):
|
147
|
+
if metric_name == 'binary_metrics':
|
148
|
+
if len(self.eval_data[index].binary_metrics) == 0:
|
149
|
+
self._get_eval(index, binary_metrics=True)
|
150
|
+
elif metric_name == 'label_counts':
|
151
|
+
if self.proto_to_dict(self.eval_data[index].label_counts) == {}:
|
152
|
+
self._get_eval(index, label_counts=True)
|
153
|
+
elif metric_name == 'confusion_matrix':
|
154
|
+
if self.eval_data[index].confusion_matrix.ByteSize() == 0:
|
155
|
+
self._get_eval(index, confusion_matrix=True)
|
156
|
+
elif metric_name == 'metrics_by_class':
|
157
|
+
if len(self.eval_data[index].metrics_by_class) == 0:
|
158
|
+
self._get_eval(index, metrics_by_class=True)
|
159
|
+
elif metric_name == 'metrics_by_area':
|
160
|
+
if len(self.eval_data[index].metrics_by_area) == 0:
|
161
|
+
self._get_eval(index, metrics_by_area=True)
|
162
|
+
|
163
|
+
return getattr(self.eval_data[index], metric_name)
|
164
|
+
|
165
|
+
def get_threshold_index(self, threshold_list: list, selected_value: float = 0.5) -> int:
|
166
|
+
assert 0 <= selected_value <= 1 and isinstance(selected_value, float)
|
167
|
+
threshold_list = [round(each, 2) for each in threshold_list]
|
168
|
+
|
169
|
+
def parse_precision(x):
|
170
|
+
return len(str(x).split(".")[1])
|
171
|
+
|
172
|
+
precision = parse_precision(selected_value)
|
173
|
+
if precision > 2:
|
174
|
+
selected_value = round(selected_value, 2)
|
175
|
+
logger.warning("Round the selected value to .2 decimals")
|
176
|
+
return threshold_list.index(selected_value)
|
177
|
+
|
178
|
+
def get_dataset_name_by_index(self, index=0, pretify=True):
|
179
|
+
out = self.eval_data[index].ground_truth_dataset
|
180
|
+
if pretify:
|
181
|
+
app_id = out.app_id
|
182
|
+
dataset = out.id
|
183
|
+
#out = f"{app_id}/{dataset}/{ver[:5]}" if ver else f"{app_id}/{dataset}"
|
184
|
+
if self.model.model_info.app_id == app_id:
|
185
|
+
out = dataset
|
186
|
+
else:
|
187
|
+
out = f"{app_id}/{dataset}"
|
188
|
+
|
189
|
+
return out
|
190
|
+
|
191
|
+
def get_model_name(self, pretify=True):
|
192
|
+
model = self.model.model_info
|
193
|
+
if pretify:
|
194
|
+
app_id = model.app_id
|
195
|
+
name = model.id
|
196
|
+
ver = model.model_version.id
|
197
|
+
model = f"{app_id}/{name}/{ver[:5]}" if ver else f"{app_id}/{name}"
|
198
|
+
|
199
|
+
return model
|
200
|
+
|
201
|
+
def _process_curve(self, data: resources_pb2.BinaryMetrics, metric_name: str, x: str,
|
202
|
+
y: str) -> Dict[str, Dict[str, np.array]]:
|
203
|
+
""" Postprocess curve
|
204
|
+
"""
|
205
|
+
x_arr = []
|
206
|
+
y_arr = []
|
207
|
+
threshold = []
|
208
|
+
outputs = []
|
209
|
+
|
210
|
+
def _make_df(xcol, ycol, concept_col, th_col):
|
211
|
+
return pd.DataFrame({x: xcol, y: ycol, 'concept': concept_col, 'threshold': th_col})
|
212
|
+
|
213
|
+
for bd in data:
|
214
|
+
concept_id = bd.concept.id
|
215
|
+
metric = eval(f'bd.{metric_name}')
|
216
|
+
if metric.ByteSize() == 0:
|
217
|
+
continue
|
218
|
+
_x = np.array(eval(f'metric.{x}'))
|
219
|
+
_y = np.array(eval(f'metric.{y}'))
|
220
|
+
threshold = np.array(metric.thresholds)
|
221
|
+
x_arr.append(_x)
|
222
|
+
y_arr.append(_y)
|
223
|
+
concept_cols = [concept_id for _ in range(len(_x))]
|
224
|
+
outputs.append(_make_df(_x, _y, concept_cols, threshold))
|
225
|
+
|
226
|
+
avg_x = np.mean(x_arr, axis=0)
|
227
|
+
avg_y = np.mean(y_arr, axis=0)
|
228
|
+
if np.isnan(avg_x).all():
|
229
|
+
return None
|
230
|
+
else:
|
231
|
+
avg_cols = [MACRO_AVG for _ in range(len(avg_x))]
|
232
|
+
outputs.append(_make_df(avg_x, avg_y, avg_cols, threshold))
|
233
|
+
|
234
|
+
return pd.concat(outputs, axis=0)
|
235
|
+
|
236
|
+
def parse_concept_ids(self, *args, **kwargs) -> List[str]:
|
237
|
+
raise NotImplementedError
|
238
|
+
|
239
|
+
def detailed_summary(self, *args, **kwargs):
|
240
|
+
raise NotImplementedError
|
241
|
+
|
242
|
+
def pr_curve(self, *args, **kwargs):
|
243
|
+
raise NotImplementedError
|
244
|
+
|
245
|
+
def roc_curve(self, *args, **kwargs):
|
246
|
+
raise NotImplementedError
|
247
|
+
|
248
|
+
def confusion_matrix(self, *args, **kwargs):
|
249
|
+
raise NotImplementedError
|
250
|
+
|
251
|
+
|
252
|
+
@dataclass
|
253
|
+
class PlaceholderHandler(_BaseEvalResultHandler):
|
254
|
+
|
255
|
+
def parse_concept_ids(self, *args, **kwargs) -> List[str]:
|
256
|
+
return None
|
257
|
+
|
258
|
+
def detailed_summary(self, *args, **kwargs):
|
259
|
+
return None
|
260
|
+
|
261
|
+
def pr_curve(self, *args, **kwargs):
|
262
|
+
return None
|
263
|
+
|
264
|
+
|
265
|
+
@dataclass
|
266
|
+
class ClassificationResultHandler(_BaseEvalResultHandler):
|
267
|
+
|
268
|
+
def parse_concept_ids(self, index=0) -> List[str]:
|
269
|
+
eval_data = self.get_eval_data(metric_name='label_counts', index=index)
|
270
|
+
concept_ids = [temp.concept.id for temp in eval_data.positive_label_counts]
|
271
|
+
return concept_ids
|
272
|
+
|
273
|
+
def detailed_summary(self, index=0, confidence_threshold: float = 0.5,
|
274
|
+
**kwargs) -> Union[None, Tuple[pd.DataFrame, pd.DataFrame]]:
|
275
|
+
"""Making detailed table per concept and for total concept
|
276
|
+
|
277
|
+
Args:
|
278
|
+
index (int, optional): Index of eval dataset. Defaults to 0.
|
279
|
+
confidence_threshold (float, optional): confidence threshold. Defaults to 0.5.
|
280
|
+
|
281
|
+
Returns:
|
282
|
+
tuple: concepts dataframe, total dataframe
|
283
|
+
"""
|
284
|
+
eval_data = self.get_eval_data('binary_metrics', index=index)
|
285
|
+
summary = self.get_eval_data('summary', index=index)
|
286
|
+
|
287
|
+
total_labeled = 0
|
288
|
+
total_predicted = 0
|
289
|
+
total_tp = 0
|
290
|
+
total_fn = 0
|
291
|
+
total_fp = 0
|
292
|
+
metrics = []
|
293
|
+
|
294
|
+
for bd in eval_data:
|
295
|
+
concept_id = bd.concept.id
|
296
|
+
if bd.precision_recall_curve.ByteSize() == 0:
|
297
|
+
continue
|
298
|
+
pr_th_index = self.get_threshold_index(
|
299
|
+
list(bd.precision_recall_curve.thresholds), selected_value=confidence_threshold)
|
300
|
+
roc_th_index = self.get_threshold_index(
|
301
|
+
list(bd.roc_curve.thresholds), selected_value=confidence_threshold)
|
302
|
+
if pr_th_index is None or roc_th_index is None:
|
303
|
+
continue
|
304
|
+
num_pos_labeled = bd.num_pos
|
305
|
+
num_neg_labeled = bd.num_neg
|
306
|
+
# TP/(TP+FP)
|
307
|
+
precision = bd.precision_recall_curve.precision[pr_th_index]
|
308
|
+
# TP/(TP+FN)
|
309
|
+
recall = bd.precision_recall_curve.recall[pr_th_index]
|
310
|
+
# FP/(FP+TN)
|
311
|
+
fpr = bd.roc_curve.fpr[roc_th_index]
|
312
|
+
# TP/(TP+FN)
|
313
|
+
tpr = bd.roc_curve.tpr[roc_th_index]
|
314
|
+
# TP+FN
|
315
|
+
tp = int(tpr * num_pos_labeled)
|
316
|
+
fn = num_pos_labeled - tp
|
317
|
+
fp = int(fpr * num_neg_labeled)
|
318
|
+
num_pos_pred = tp + fp
|
319
|
+
f1 = self._f1(recall, precision)
|
320
|
+
|
321
|
+
total_labeled += num_pos_labeled
|
322
|
+
total_predicted += num_pos_pred
|
323
|
+
total_fn += fn
|
324
|
+
total_tp += tp
|
325
|
+
total_fp += fp
|
326
|
+
# roc auc, total labelled, predicted, tp, fn, fp, recall, precision, f1
|
327
|
+
_d = OrderedDict({
|
328
|
+
"Concept": concept_id,
|
329
|
+
"Accuracy (ROC AUC)": round(bd.roc_auc, 3),
|
330
|
+
"Total Labeled": num_pos_labeled,
|
331
|
+
"Total Predicted": num_pos_pred,
|
332
|
+
"True Positives": tp,
|
333
|
+
"False Negatives": fn,
|
334
|
+
"False Positives": fp,
|
335
|
+
"Recall": recall,
|
336
|
+
"Precision": precision,
|
337
|
+
"F1": f1
|
338
|
+
})
|
339
|
+
metrics.append(pd.DataFrame(_d, index=[0]))
|
340
|
+
|
341
|
+
# If no valid data is found, return None
|
342
|
+
if not metrics:
|
343
|
+
return None
|
344
|
+
# Make per concept df
|
345
|
+
df = pd.concat(metrics, axis=0)
|
346
|
+
# Make total df
|
347
|
+
sum_df_total = sum(df["Total Labeled"])
|
348
|
+
precision = sum(df.Precision * df["Total Labeled"]) / sum_df_total if sum_df_total else 0.
|
349
|
+
recall = sum(df.Recall * df["Total Labeled"]) / sum_df_total if sum_df_total else 0.
|
350
|
+
f1 = self._f1(recall, precision)
|
351
|
+
df_total = pd.DataFrame(
|
352
|
+
[
|
353
|
+
[
|
354
|
+
'Total', summary.macro_avg_roc_auc, total_labeled, total_predicted, total_tp,
|
355
|
+
total_fn, total_fp, recall, precision, f1
|
356
|
+
],
|
357
|
+
],
|
358
|
+
columns=df.columns,
|
359
|
+
index=[0])
|
360
|
+
|
361
|
+
return df, df_total
|
362
|
+
|
363
|
+
def pr_curve(self, index=0, **kwargs) -> Union[None, pd.DataFrame]:
|
364
|
+
"""Making PR curve
|
365
|
+
|
366
|
+
Args:
|
367
|
+
index (int, optional): Index of eval dataset. Defaults to 0.
|
368
|
+
|
369
|
+
Returns:
|
370
|
+
dictionary: Keys are concept ids and 'macro_avg'. Values are dictionaries of {precision: np.array, recall: np.array}
|
371
|
+
"""
|
372
|
+
eval_data = self.get_eval_data(metric_name='binary_metrics', index=index)
|
373
|
+
outputs = self._process_curve(
|
374
|
+
eval_data, metric_name='precision_recall_curve', x='recall', y='precision')
|
375
|
+
return outputs
|
376
|
+
|
377
|
+
def roc_curve(self, index=0, **kwargs) -> Union[None, pd.DataFrame]:
|
378
|
+
eval_data = self.get_eval_data(metric_name='binary_metrics', index=index)
|
379
|
+
outputs = self._process_curve(eval_data, metric_name='roc_curve', x='tpr', y='fpr')
|
380
|
+
return outputs
|
381
|
+
|
382
|
+
def confusion_matrix(self, index=0, **kwargs):
|
383
|
+
eval_data = self.get_eval_data(metric_name='confusion_matrix', index=index)
|
384
|
+
concept_ids = self.parse_concept_ids(index)
|
385
|
+
concept_ids.sort()
|
386
|
+
data = np.zeros((len(concept_ids), len(concept_ids)), np.float32)
|
387
|
+
for entry in eval_data.matrix:
|
388
|
+
p = entry.predicted_concept.id
|
389
|
+
a = entry.actual_concept.id
|
390
|
+
if p in concept_ids and a in concept_ids:
|
391
|
+
data[concept_ids.index(a), concept_ids.index(p)] = np.around(entry.value, decimals=3)
|
392
|
+
else:
|
393
|
+
continue
|
394
|
+
rownames = pd.MultiIndex.from_arrays([concept_ids], names=['Actual'])
|
395
|
+
colnames = pd.MultiIndex.from_arrays([concept_ids], names=['Predicted'])
|
396
|
+
df = pd.DataFrame(data, columns=colnames, index=rownames)
|
397
|
+
|
398
|
+
return df
|
399
|
+
|
400
|
+
|
401
|
+
@dataclass
|
402
|
+
class DetectionResultHandler(_BaseEvalResultHandler):
|
403
|
+
AREA_LIST = ["all", "medium", "small"]
|
404
|
+
IOU_LIST = list(np.arange(0.5, 1., 0.1))
|
405
|
+
|
406
|
+
def parse_concept_ids(self, index=0) -> List[str]:
|
407
|
+
eval_data = self.get_eval_data(metric_name='metrics_by_class', index=index)
|
408
|
+
concept_ids = [temp.concept.id for temp in eval_data]
|
409
|
+
return concept_ids
|
410
|
+
|
411
|
+
def detailed_summary(self,
|
412
|
+
index=0,
|
413
|
+
confidence_threshold: float = 0.5,
|
414
|
+
iou_threshold: float = 0.5,
|
415
|
+
area: str = "all",
|
416
|
+
bypass_const: bool = False,
|
417
|
+
**kwargs):
|
418
|
+
if not bypass_const:
|
419
|
+
assert iou_threshold in self.IOU_LIST, f"Expected iou_threshold in {self.IOU_LIST}, got {iou_threshold}"
|
420
|
+
assert area in self.AREA_LIST, f"Expected area in {self.AREA_LIST}, got {area}"
|
421
|
+
|
422
|
+
eval_data = self.get_eval_data('metrics_by_class', index=index)
|
423
|
+
#summary = self.get_eval_data('summary', index=index)
|
424
|
+
metrics = []
|
425
|
+
for bd in eval_data:
|
426
|
+
# total label
|
427
|
+
_iou = round(bd.iou, 1)
|
428
|
+
if not (area and bd.area_name == area) or not (iou_threshold and iou_threshold == _iou):
|
429
|
+
continue
|
430
|
+
concept_id = bd.concept.id
|
431
|
+
total = round(bd.num_tot, 3)
|
432
|
+
# TP / (TP + FP)
|
433
|
+
if len(bd.precision_recall_curve.precision) > 0:
|
434
|
+
pr_th_index = self.get_threshold_index(
|
435
|
+
list(bd.precision_recall_curve.thresholds), selected_value=confidence_threshold)
|
436
|
+
p = round(bd.precision_recall_curve.precision[pr_th_index], 3)
|
437
|
+
else:
|
438
|
+
p = 0
|
439
|
+
# TP / (TP + FN)
|
440
|
+
if len(bd.precision_recall_curve.recall) > 0:
|
441
|
+
pr_th_index = self.get_threshold_index(
|
442
|
+
list(bd.precision_recall_curve.thresholds), selected_value=confidence_threshold)
|
443
|
+
r = round(bd.precision_recall_curve.recall[pr_th_index], 3)
|
444
|
+
else:
|
445
|
+
r = 0
|
446
|
+
tp = int(round(r * total, 0))
|
447
|
+
fn = total - tp
|
448
|
+
fp = float(tp) / p - tp if p else 0
|
449
|
+
fp = int(round(fp, 1))
|
450
|
+
f1 = self._f1(r, p)
|
451
|
+
_d = {
|
452
|
+
"Concept": concept_id,
|
453
|
+
"Average Precision": round(float(bd.avg_precision), 3),
|
454
|
+
"Total Labeled": total,
|
455
|
+
"True Positives": tp,
|
456
|
+
"False Positives": fp,
|
457
|
+
"False Negatives": fn,
|
458
|
+
"Recall": r,
|
459
|
+
"Precision": p,
|
460
|
+
"F1": f1,
|
461
|
+
}
|
462
|
+
metrics.append(pd.DataFrame(_d, index=[0]))
|
463
|
+
|
464
|
+
if not metrics:
|
465
|
+
return None
|
466
|
+
|
467
|
+
df = pd.concat(metrics, axis=0)
|
468
|
+
df_total = defaultdict()
|
469
|
+
sum_df_total = df["Total Labeled"].sum()
|
470
|
+
df_total["Concept"] = "Total"
|
471
|
+
df_total["Average Precision"] = df["Average Precision"].mean()
|
472
|
+
df_total["Total Labeled"] = sum_df_total
|
473
|
+
df_total["True Positives"] = df["True Positives"].sum()
|
474
|
+
df_total["False Positives"] = df["False Positives"].sum()
|
475
|
+
df_total["False Negatives"] = df["False Negatives"].sum()
|
476
|
+
df_total["Recall"] = sum(
|
477
|
+
df.Recall * df["Total Labeled"]) / sum_df_total if sum_df_total else 0.
|
478
|
+
df_total["Precision"] = df_total["True Positives"] / (
|
479
|
+
df_total["True Positives"] + df_total["False Positives"]) if sum_df_total else 0.
|
480
|
+
df_total["F1"] = self._f1(df_total["Recall"], df_total["Precision"])
|
481
|
+
df_total = pd.DataFrame(df_total, index=[0])
|
482
|
+
|
483
|
+
return [df, df_total]
|
484
|
+
|
485
|
+
def pr_curve(self,
|
486
|
+
index=0,
|
487
|
+
iou_threshold: float = 0.5,
|
488
|
+
area: str = "all",
|
489
|
+
bypass_const=False,
|
490
|
+
**kwargs):
|
491
|
+
|
492
|
+
if not bypass_const:
|
493
|
+
assert iou_threshold in self.IOU_LIST, f"Expected iou_threshold in {self.IOU_LIST}, got {iou_threshold}"
|
494
|
+
assert area in self.AREA_LIST, f"Expected area in {self.AREA_LIST}, got {area}"
|
495
|
+
|
496
|
+
eval_data = self.get_eval_data(metric_name='metrics_by_class', index=index)
|
497
|
+
_valid_eval_data = []
|
498
|
+
for bd in eval_data:
|
499
|
+
_iou = round(bd.iou, 1)
|
500
|
+
if not (area and bd.area_name == area) or not (iou_threshold and iou_threshold == _iou):
|
501
|
+
continue
|
502
|
+
_valid_eval_data.append(bd)
|
503
|
+
|
504
|
+
outputs = self._process_curve(
|
505
|
+
_valid_eval_data, metric_name='precision_recall_curve', x='recall', y='precision')
|
506
|
+
return outputs
|
507
|
+
|
508
|
+
def roc_curve(self, index=0, **kwargs) -> None:
|
509
|
+
return None
|
510
|
+
|
511
|
+
def confusion_matrix(self, index=0, **kwargs) -> None:
|
512
|
+
return None
|
513
|
+
|
514
|
+
|
515
|
+
def make_handler_by_type(model_type: str) -> _BaseEvalResultHandler:
|
516
|
+
_eval_type = get_eval_type(model_type)
|
517
|
+
if _eval_type == EvalType.CLASSIFICATION:
|
518
|
+
return ClassificationResultHandler
|
519
|
+
elif _eval_type == EvalType.DETECTION:
|
520
|
+
return DetectionResultHandler
|
521
|
+
else:
|
522
|
+
return PlaceholderHandler
|
clarifai/utils/logging.py
CHANGED
@@ -106,3 +106,33 @@ def add_file_handler(logger: logging.Logger, file_path: str, log_level: str = 'W
|
|
106
106
|
file_handler = logging.FileHandler(file_path)
|
107
107
|
file_handler.setLevel(log_level)
|
108
108
|
logger.addHandler(file_handler)
|
109
|
+
|
110
|
+
|
111
|
+
def process_log_files(log_file_path: str,) -> tuple:
|
112
|
+
"""Processes log files to get failed inputs and annotations.
|
113
|
+
|
114
|
+
Args:
|
115
|
+
log_file_path (str): path to the log file
|
116
|
+
"""
|
117
|
+
import re
|
118
|
+
duplicate_input_ids = []
|
119
|
+
failed_input_ids = []
|
120
|
+
pattern = re.compile(r'\| +(\d+) +\| +(\S+) +\| +(.+?) +\| +(.+?) +\| +(.+?) +\| +(.+?) \|')
|
121
|
+
try:
|
122
|
+
with open(log_file_path, 'r') as file:
|
123
|
+
log_content = file.read()
|
124
|
+
matches = pattern.findall(log_content)
|
125
|
+
for match in matches:
|
126
|
+
index = int(match[0])
|
127
|
+
input_id = match[1]
|
128
|
+
status = match[2]
|
129
|
+
if status == "Input has a duplicate ID.":
|
130
|
+
duplicate_input_ids.append({"Index": index, "Input_ID": input_id})
|
131
|
+
else:
|
132
|
+
failed_input_ids.append({"Index": index, "Input_ID": input_id})
|
133
|
+
|
134
|
+
except Exception as e:
|
135
|
+
print(f"Error Processing log file {log_file_path}:{e}")
|
136
|
+
return [], []
|
137
|
+
|
138
|
+
return duplicate_input_ids, failed_input_ids
|
clarifai/utils/model_train.py
CHANGED
@@ -85,7 +85,7 @@ def response_to_model_params(response: MultiModelTypeResponse,
|
|
85
85
|
return params
|
86
86
|
|
87
87
|
|
88
|
-
def params_parser(params_dict: dict) -> Dict[str, Any]:
|
88
|
+
def params_parser(params_dict: dict, concepts: List = None) -> Dict[str, Any]:
|
89
89
|
"""Converts the params dictionary to a dictionary of model specific params for the given model"""
|
90
90
|
#dict parser
|
91
91
|
train_dict = {}
|
@@ -112,6 +112,8 @@ def params_parser(params_dict: dict) -> Dict[str, Any]:
|
|
112
112
|
train_dict['train_info'] = resources_pb2.TrainInfo(**train_dict['train_info'])
|
113
113
|
|
114
114
|
if 'concepts' in params_dict.keys():
|
115
|
+
assert set(params_dict["concepts"]).issubset(
|
116
|
+
concepts), "Invalid concept IDs. Available concepts in the app are {}".format(concepts)
|
115
117
|
train_dict["output_info"]['data'] = resources_pb2.Data(
|
116
118
|
concepts=[resources_pb2.Concept(id=concept_id) for concept_id in params_dict["concepts"]])
|
117
119
|
if 'inference_params' in params_dict.keys():
|
clarifai/versions.py
CHANGED
clarifai/workflows/validate.py
CHANGED
@@ -16,7 +16,7 @@ def _model_does_not_have_model_version_id_and_other_fields(m):
|
|
16
16
|
|
17
17
|
|
18
18
|
def _model_has_other_fields(m):
|
19
|
-
return any(k not in ['model_id', 'model_version_id'] for k in m.keys())
|
19
|
+
return any(k not in ['model_id', 'model_version_id', 'user_id', 'app_id'] for k in m.keys())
|
20
20
|
|
21
21
|
|
22
22
|
def _workflow_nodes_have_valid_dependencies(nodes):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: clarifai
|
3
|
-
Version: 10.
|
3
|
+
Version: 10.2.0
|
4
4
|
Summary: Clarifai Python SDK
|
5
5
|
Home-page: https://github.com/Clarifai/clarifai-python
|
6
6
|
Author: Clarifai
|
@@ -20,19 +20,16 @@ Classifier: Operating System :: OS Independent
|
|
20
20
|
Requires-Python: >=3.8
|
21
21
|
Description-Content-Type: text/markdown
|
22
22
|
License-File: LICENSE
|
23
|
-
Requires-Dist: clarifai-grpc (~=10.
|
24
|
-
Requires-Dist: pandas (>=1.3.5)
|
23
|
+
Requires-Dist: clarifai-grpc (~=10.2.1)
|
25
24
|
Requires-Dist: numpy (>=1.22.0)
|
26
25
|
Requires-Dist: tqdm (>=4.65.0)
|
27
|
-
Requires-Dist: opencv-python (>=4.7.0.68)
|
28
26
|
Requires-Dist: tritonclient (>=2.34.0)
|
29
27
|
Requires-Dist: rich (>=13.4.2)
|
30
28
|
Requires-Dist: PyYAML (>=6.0.1)
|
31
29
|
Requires-Dist: schema (>=0.7.5)
|
32
30
|
Requires-Dist: Pillow (>=9.5.0)
|
33
31
|
Requires-Dist: inquirerpy (==0.3.4)
|
34
|
-
Requires-Dist:
|
35
|
-
Requires-Dist: pypdf (>=3.17.4)
|
32
|
+
Requires-Dist: tabulate (>=0.9.0)
|
36
33
|
Provides-Extra: all
|
37
34
|
Requires-Dist: pycocotools (==2.0.6) ; extra == 'all'
|
38
35
|
|
@@ -60,7 +57,7 @@ This is the official Python client for interacting with our powerful [API](https
|
|
60
57
|
|
61
58
|
[Website](https://www.clarifai.com/) | [Schedule Demo](https://www.clarifai.com/company/schedule-demo) | [Signup for a Free Account](https://clarifai.com/signup) | [API Docs](https://docs.clarifai.com/) | [Clarifai Community](https://clarifai.com/explore) | [Python SDK Docs](https://docs.clarifai.com/python-sdk/api-reference) | [Examples](https://github.com/Clarifai/examples) | [Colab Notebooks](https://github.com/Clarifai/colab-notebooks) | [Discord](https://discord.gg/XAPE3Vtg)
|
62
59
|
|
63
|
-
|
60
|
+
Give the repo a star ⭐
|
64
61
|
---
|
65
62
|
|
66
63
|
|
@@ -154,7 +151,7 @@ client = User(user_id="user_id", pat="your personal access token")
|
|
154
151
|
|
155
152
|
## :floppy_disk: Interacting with Datasets
|
156
153
|
|
157
|
-
Clarifai datasets help in managing datasets used for model training and evaluation. It provides functionalities like creating datasets,uploading datasets and exporting datasets as .zip files.
|
154
|
+
Clarifai datasets help in managing datasets used for model training and evaluation. It provides functionalities like creating datasets,uploading datasets, retrying failed uploads from logs and exporting datasets as .zip files.
|
158
155
|
|
159
156
|
```python
|
160
157
|
# Note: CLARIFAI_PAT must be set as env variable.
|
@@ -166,7 +163,18 @@ dataset = app.create_dataset(dataset_id="demo_dataset")
|
|
166
163
|
# execute data upload to Clarifai app dataset
|
167
164
|
from clarifai.datasets.upload.laoders.coco_detection import COCODetectionDataLoader
|
168
165
|
coco_dataloader = COCODetectionDataLoader("images_dir", "coco_annotation_filepath")
|
169
|
-
dataset.upload_dataset(dataloader=coco_dataloader, get_upload_status=True)
|
166
|
+
dataset.upload_dataset(dataloader=coco_dataloader, get_upload_status=True, log_warnings =True)
|
167
|
+
|
168
|
+
|
169
|
+
#Try upload and record the failed outputs in log file.
|
170
|
+
from clarifai.datasets.upload.utils import load_module_dataloader
|
171
|
+
cifar_dataloader = load_module_dataloader('./image_classification/cifar10')
|
172
|
+
dataset.upload_dataset(dataloader=cifar_dataloader, get_upload_status=True, log_warnings =True)
|
173
|
+
|
174
|
+
#Retry upload from logs for `upload_dataset`
|
175
|
+
dataset.retry_upload_from_logs(dataloader=cifar_dataloader, log_file_path='log_file.log',
|
176
|
+
retry_duplicates=False,
|
177
|
+
log_warnings=True)
|
170
178
|
|
171
179
|
#upload text from csv
|
172
180
|
dataset.upload_from_csv(csv_path='csv_path', input_type='text', csv_type='raw', labels=True)
|
@@ -299,6 +307,35 @@ status = model.training_status(version_id=model_version_id,training_logs=True)
|
|
299
307
|
print(status)
|
300
308
|
```
|
301
309
|
|
310
|
+
#### Evaluate your trained model
|
311
|
+
|
312
|
+
When your model is trained and ready, you can evaluate by the following code
|
313
|
+
|
314
|
+
```python
|
315
|
+
from clarifai.client.model import Model
|
316
|
+
|
317
|
+
model = Model('url')
|
318
|
+
model.evaluate(dataset_id='your-dataset-id')
|
319
|
+
```
|
320
|
+
|
321
|
+
Compare the evaluation results of your models.
|
322
|
+
|
323
|
+
```python
|
324
|
+
from clarifai.client.model import Model
|
325
|
+
from clarifai.client.dataset import Dataset
|
326
|
+
from clarifai.utils.evaluation import EvalResultCompare
|
327
|
+
|
328
|
+
models = ['model url1', 'model url2'] # or [Model(url1), Model(url2)]
|
329
|
+
dataset = 'dataset url' # or Dataset(dataset_url)
|
330
|
+
|
331
|
+
compare = EvalResultCompare(
|
332
|
+
models=models,
|
333
|
+
datasets=dataset,
|
334
|
+
attempt_evaluate=True # attempt evaluate when the model is not evaluated with the dataset
|
335
|
+
)
|
336
|
+
compare.all('output/folder/')
|
337
|
+
```
|
338
|
+
|
302
339
|
#### Models Listing
|
303
340
|
```python
|
304
341
|
# Note: CLARIFAI_PAT must be set as env variable.
|