clarifai 10.0.1__py3-none-any.whl → 10.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clarifai/client/app.py +23 -43
- clarifai/client/base.py +46 -4
- clarifai/client/dataset.py +85 -33
- clarifai/client/input.py +35 -7
- clarifai/client/model.py +192 -11
- clarifai/client/module.py +8 -6
- clarifai/client/runner.py +3 -1
- clarifai/client/search.py +6 -3
- clarifai/client/user.py +14 -12
- clarifai/client/workflow.py +8 -5
- clarifai/datasets/upload/features.py +3 -0
- clarifai/datasets/upload/image.py +57 -26
- clarifai/datasets/upload/loaders/README.md +3 -4
- clarifai/datasets/upload/loaders/xview_detection.py +9 -5
- clarifai/datasets/upload/utils.py +23 -7
- clarifai/models/model_serving/README.md +113 -121
- clarifai/models/model_serving/__init__.py +2 -0
- clarifai/models/model_serving/cli/_utils.py +53 -0
- clarifai/models/model_serving/cli/base.py +14 -0
- clarifai/models/model_serving/cli/build.py +79 -0
- clarifai/models/model_serving/cli/clarifai_clis.py +33 -0
- clarifai/models/model_serving/cli/create.py +171 -0
- clarifai/models/model_serving/cli/example_cli.py +34 -0
- clarifai/models/model_serving/cli/login.py +26 -0
- clarifai/models/model_serving/cli/upload.py +182 -0
- clarifai/models/model_serving/constants.py +20 -0
- clarifai/models/model_serving/docs/cli.md +150 -0
- clarifai/models/model_serving/docs/concepts.md +229 -0
- clarifai/models/model_serving/docs/dependencies.md +1 -1
- clarifai/models/model_serving/docs/inference_parameters.md +112 -107
- clarifai/models/model_serving/docs/model_types.md +16 -17
- clarifai/models/model_serving/model_config/__init__.py +4 -2
- clarifai/models/model_serving/model_config/base.py +369 -0
- clarifai/models/model_serving/model_config/config.py +219 -224
- clarifai/models/model_serving/model_config/inference_parameter.py +5 -0
- clarifai/models/model_serving/model_config/model_types_config/multimodal-embedder.yaml +25 -24
- clarifai/models/model_serving/model_config/model_types_config/text-classifier.yaml +19 -18
- clarifai/models/model_serving/model_config/model_types_config/text-embedder.yaml +20 -18
- clarifai/models/model_serving/model_config/model_types_config/text-to-image.yaml +19 -18
- clarifai/models/model_serving/model_config/model_types_config/text-to-text.yaml +19 -18
- clarifai/models/model_serving/model_config/model_types_config/visual-classifier.yaml +22 -18
- clarifai/models/model_serving/model_config/model_types_config/visual-detector.yaml +32 -28
- clarifai/models/model_serving/model_config/model_types_config/visual-embedder.yaml +19 -18
- clarifai/models/model_serving/model_config/model_types_config/visual-segmenter.yaml +19 -18
- clarifai/models/model_serving/{models → model_config}/output.py +8 -0
- clarifai/models/model_serving/model_config/triton/__init__.py +14 -0
- clarifai/models/model_serving/model_config/{serializer.py → triton/serializer.py} +3 -1
- clarifai/models/model_serving/model_config/triton/triton_config.py +182 -0
- clarifai/models/model_serving/{models/model_types.py → model_config/triton/wrappers.py} +4 -4
- clarifai/models/model_serving/{models → repo_build}/__init__.py +2 -0
- clarifai/models/model_serving/repo_build/build.py +198 -0
- clarifai/models/model_serving/repo_build/static_files/_requirements.txt +2 -0
- clarifai/models/model_serving/repo_build/static_files/base_test.py +169 -0
- clarifai/models/model_serving/repo_build/static_files/inference.py +26 -0
- clarifai/models/model_serving/repo_build/static_files/sample_clarifai_config.yaml +25 -0
- clarifai/models/model_serving/repo_build/static_files/test.py +40 -0
- clarifai/models/model_serving/{models/pb_model.py → repo_build/static_files/triton/model.py} +15 -14
- clarifai/models/model_serving/utils.py +21 -0
- clarifai/rag/rag.py +67 -23
- clarifai/rag/utils.py +21 -5
- clarifai/utils/evaluation/__init__.py +427 -0
- clarifai/utils/evaluation/helpers.py +522 -0
- clarifai/utils/logging.py +7 -0
- clarifai/utils/model_train.py +3 -1
- clarifai/versions.py +1 -1
- {clarifai-10.0.1.dist-info → clarifai-10.1.1.dist-info}/METADATA +58 -10
- clarifai-10.1.1.dist-info/RECORD +115 -0
- clarifai-10.1.1.dist-info/entry_points.txt +2 -0
- clarifai/datasets/upload/loaders/coco_segmentation.py +0 -98
- clarifai/models/model_serving/cli/deploy_cli.py +0 -123
- clarifai/models/model_serving/cli/model_zip.py +0 -61
- clarifai/models/model_serving/cli/repository.py +0 -89
- clarifai/models/model_serving/docs/custom_config.md +0 -33
- clarifai/models/model_serving/docs/output.md +0 -28
- clarifai/models/model_serving/models/default_test.py +0 -281
- clarifai/models/model_serving/models/inference.py +0 -50
- clarifai/models/model_serving/models/test.py +0 -64
- clarifai/models/model_serving/pb_model_repository.py +0 -108
- clarifai-10.0.1.dist-info/RECORD +0 -103
- clarifai-10.0.1.dist-info/entry_points.txt +0 -4
- {clarifai-10.0.1.dist-info → clarifai-10.1.1.dist-info}/LICENSE +0 -0
- {clarifai-10.0.1.dist-info → clarifai-10.1.1.dist-info}/WHEEL +0 -0
- {clarifai-10.0.1.dist-info → clarifai-10.1.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,522 @@
|
|
1
|
+
import time
|
2
|
+
from collections import OrderedDict, defaultdict
|
3
|
+
from dataclasses import dataclass, field
|
4
|
+
from enum import Enum
|
5
|
+
from typing import Dict, List, Tuple, Union
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
from clarifai_grpc.grpc.api import resources_pb2
|
9
|
+
from clarifai_grpc.grpc.api.status import status_code_pb2
|
10
|
+
from google.protobuf.json_format import MessageToDict
|
11
|
+
|
12
|
+
from clarifai.client.dataset import Dataset
|
13
|
+
from clarifai.client.model import Model
|
14
|
+
|
15
|
+
try:
|
16
|
+
import pandas as pd
|
17
|
+
except ImportError:
|
18
|
+
raise ImportError("Can not import pandas. Please run `pip install pandas` to install it")
|
19
|
+
|
20
|
+
try:
|
21
|
+
from loguru import logger
|
22
|
+
except ImportError:
|
23
|
+
from ..logging import get_logger
|
24
|
+
logger = get_logger(logger_level="INFO", name=__name__)
|
25
|
+
|
26
|
+
MACRO_AVG = "macro_avg"
|
27
|
+
|
28
|
+
|
29
|
+
class EvalType(Enum):
|
30
|
+
UNDEFINED = 0
|
31
|
+
CLASSIFICATION = 1
|
32
|
+
DETECTION = 2
|
33
|
+
CLUSTERING = 3
|
34
|
+
SEGMENTATION = 4
|
35
|
+
TRACKER = 5
|
36
|
+
|
37
|
+
|
38
|
+
def get_eval_type(model_type):
|
39
|
+
if "classifier" in model_type:
|
40
|
+
return EvalType.CLASSIFICATION
|
41
|
+
elif "visual-detector" in model_type:
|
42
|
+
return EvalType.DETECTION
|
43
|
+
elif "segmenter" in model_type:
|
44
|
+
return EvalType.SEGMENTATION
|
45
|
+
elif "embedder" in model_type:
|
46
|
+
return EvalType.CLUSTERING
|
47
|
+
elif "tracker" in model_type:
|
48
|
+
return EvalType.TRACKER
|
49
|
+
else:
|
50
|
+
return EvalType.UNDEFINED
|
51
|
+
|
52
|
+
|
53
|
+
def to_file_name(x) -> str:
|
54
|
+
return x.replace('/', '--')
|
55
|
+
|
56
|
+
|
57
|
+
@dataclass
|
58
|
+
class _BaseEvalResultHandler:
|
59
|
+
model: Model
|
60
|
+
eval_data: List[resources_pb2.EvalMetrics] = field(default_factory=list)
|
61
|
+
|
62
|
+
def evaluate_and_wait(self, dataset: Dataset):
|
63
|
+
from tqdm import tqdm
|
64
|
+
dataset_id = dataset.id
|
65
|
+
dataset_app_id = dataset.app_id
|
66
|
+
dataset_user_id = dataset.user_id
|
67
|
+
_ = self.model.evaluate(
|
68
|
+
dataset_id=dataset_id, dataset_app_id=dataset_app_id, dataset_user_id=dataset_user_id)
|
69
|
+
latest_eval = self.model.list_evaluations()[0]
|
70
|
+
excepted = 10
|
71
|
+
desc = f"Please wait for the evaluation process between model {self.get_model_name()} and dataset {dataset_user_id}/{dataset_app_id}/{dataset_id} to complete."
|
72
|
+
bar = tqdm(total=excepted, desc=desc, leave=False, ncols=0)
|
73
|
+
while latest_eval.status.code in [
|
74
|
+
status_code_pb2.MODEL_EVALUATING, status_code_pb2.MODEL_QUEUED_FOR_EVALUATION
|
75
|
+
]:
|
76
|
+
latest_eval = self.model.list_evaluations()[0]
|
77
|
+
time.sleep(1)
|
78
|
+
bar.update(1)
|
79
|
+
|
80
|
+
if latest_eval.status.code == status_code_pb2.MODEL_EVALUATED:
|
81
|
+
return latest_eval
|
82
|
+
else:
|
83
|
+
raise Exception(
|
84
|
+
f"Model has failed to evaluate \n {latest_eval.status}.\nPlease check your dataset inputs!"
|
85
|
+
)
|
86
|
+
|
87
|
+
def find_eval_id(self, datasets: List[Dataset] = [], attempt_evaluate: bool = False):
|
88
|
+
list_eval_outputs = self.model.list_evaluations()
|
89
|
+
self.eval_data = []
|
90
|
+
for dataset in datasets:
|
91
|
+
dataset.app_id = dataset.app_id or self.model.auth_helper.app_id
|
92
|
+
dataset.user_id = dataset.user_id or self.model.auth_helper.user_id
|
93
|
+
dataset_assert_msg = dataset.dataset_info
|
94
|
+
# checking if dataset exists
|
95
|
+
out = dataset.list_versions()
|
96
|
+
try:
|
97
|
+
next(iter(out))
|
98
|
+
except Exception as e:
|
99
|
+
if any(["CONN_DOES_NOT_EXIST" in _e for _e in e.args]):
|
100
|
+
raise Exception(
|
101
|
+
f"Dataset {dataset_assert_msg} does not exists. Please check datasets args")
|
102
|
+
else:
|
103
|
+
# caused by sdk failure
|
104
|
+
pass
|
105
|
+
# checking if model is evaluated with this dataset
|
106
|
+
_is_found = False
|
107
|
+
for each in list_eval_outputs:
|
108
|
+
if each.status.code == status_code_pb2.MODEL_EVALUATED:
|
109
|
+
eval_dataset = each.ground_truth_dataset
|
110
|
+
# if version_id is empty -> get latest eval result of dataset,app,user id
|
111
|
+
if dataset.app_id == eval_dataset.app_id and dataset.id == eval_dataset.id and dataset.user_id == eval_dataset.user_id and (
|
112
|
+
not dataset.version.id or dataset.version.id == eval_dataset.version.id):
|
113
|
+
# append to eval_data
|
114
|
+
self.eval_data.append(each)
|
115
|
+
_is_found = True
|
116
|
+
break
|
117
|
+
|
118
|
+
# if not evaluated, but user wants to proceed it
|
119
|
+
if not _is_found:
|
120
|
+
if attempt_evaluate:
|
121
|
+
self.eval_data.append(self.evaluate_and_wait(dataset))
|
122
|
+
# otherwise raise error
|
123
|
+
else:
|
124
|
+
raise Exception(
|
125
|
+
f"Model {self.model.model_info.name} in app {self.model.model_info.app_id} is not evaluated yet with dataset {dataset_assert_msg}"
|
126
|
+
)
|
127
|
+
|
128
|
+
@staticmethod
|
129
|
+
def proto_to_dict(value):
|
130
|
+
return MessageToDict(value, preserving_proto_field_name=True)
|
131
|
+
|
132
|
+
@staticmethod
|
133
|
+
def _f1(x: float, y: float):
|
134
|
+
z = x + y
|
135
|
+
return 2 * x * y / z if z else 0.
|
136
|
+
|
137
|
+
def _get_eval(self, index=0, **kwargs):
|
138
|
+
logger.info(
|
139
|
+
f"Model {self.get_model_name(pretify=True)}: retrieving {kwargs} metrics of dataset: {self.get_dataset_name_by_index(index)}"
|
140
|
+
)
|
141
|
+
result = self.model.get_eval_by_id(eval_id=self.eval_data[index].id, **kwargs)
|
142
|
+
for k, v in kwargs.items():
|
143
|
+
if v:
|
144
|
+
getattr(self.eval_data[index], k).MergeFrom(getattr(result, k))
|
145
|
+
|
146
|
+
def get_eval_data(self, metric_name: str, index=0):
|
147
|
+
if metric_name == 'binary_metrics':
|
148
|
+
if len(self.eval_data[index].binary_metrics) == 0:
|
149
|
+
self._get_eval(index, binary_metrics=True)
|
150
|
+
elif metric_name == 'label_counts':
|
151
|
+
if self.proto_to_dict(self.eval_data[index].label_counts) == {}:
|
152
|
+
self._get_eval(index, label_counts=True)
|
153
|
+
elif metric_name == 'confusion_matrix':
|
154
|
+
if self.eval_data[index].confusion_matrix.ByteSize() == 0:
|
155
|
+
self._get_eval(index, confusion_matrix=True)
|
156
|
+
elif metric_name == 'metrics_by_class':
|
157
|
+
if len(self.eval_data[index].metrics_by_class) == 0:
|
158
|
+
self._get_eval(index, metrics_by_class=True)
|
159
|
+
elif metric_name == 'metrics_by_area':
|
160
|
+
if len(self.eval_data[index].metrics_by_area) == 0:
|
161
|
+
self._get_eval(index, metrics_by_area=True)
|
162
|
+
|
163
|
+
return getattr(self.eval_data[index], metric_name)
|
164
|
+
|
165
|
+
def get_threshold_index(self, threshold_list: list, selected_value: float = 0.5) -> int:
|
166
|
+
assert 0 <= selected_value <= 1 and isinstance(selected_value, float)
|
167
|
+
threshold_list = [round(each, 2) for each in threshold_list]
|
168
|
+
|
169
|
+
def parse_precision(x):
|
170
|
+
return len(str(x).split(".")[1])
|
171
|
+
|
172
|
+
precision = parse_precision(selected_value)
|
173
|
+
if precision > 2:
|
174
|
+
selected_value = round(selected_value, 2)
|
175
|
+
logger.warning("Round the selected value to .2 decimals")
|
176
|
+
return threshold_list.index(selected_value)
|
177
|
+
|
178
|
+
def get_dataset_name_by_index(self, index=0, pretify=True):
|
179
|
+
out = self.eval_data[index].ground_truth_dataset
|
180
|
+
if pretify:
|
181
|
+
app_id = out.app_id
|
182
|
+
dataset = out.id
|
183
|
+
#out = f"{app_id}/{dataset}/{ver[:5]}" if ver else f"{app_id}/{dataset}"
|
184
|
+
if self.model.model_info.app_id == app_id:
|
185
|
+
out = dataset
|
186
|
+
else:
|
187
|
+
out = f"{app_id}/{dataset}"
|
188
|
+
|
189
|
+
return out
|
190
|
+
|
191
|
+
def get_model_name(self, pretify=True):
|
192
|
+
model = self.model.model_info
|
193
|
+
if pretify:
|
194
|
+
app_id = model.app_id
|
195
|
+
name = model.id
|
196
|
+
ver = model.model_version.id
|
197
|
+
model = f"{app_id}/{name}/{ver[:5]}" if ver else f"{app_id}/{name}"
|
198
|
+
|
199
|
+
return model
|
200
|
+
|
201
|
+
def _process_curve(self, data: resources_pb2.BinaryMetrics, metric_name: str, x: str,
|
202
|
+
y: str) -> Dict[str, Dict[str, np.array]]:
|
203
|
+
""" Postprocess curve
|
204
|
+
"""
|
205
|
+
x_arr = []
|
206
|
+
y_arr = []
|
207
|
+
threshold = []
|
208
|
+
outputs = []
|
209
|
+
|
210
|
+
def _make_df(xcol, ycol, concept_col, th_col):
|
211
|
+
return pd.DataFrame({x: xcol, y: ycol, 'concept': concept_col, 'threshold': th_col})
|
212
|
+
|
213
|
+
for bd in data:
|
214
|
+
concept_id = bd.concept.id
|
215
|
+
metric = eval(f'bd.{metric_name}')
|
216
|
+
if metric.ByteSize() == 0:
|
217
|
+
continue
|
218
|
+
_x = np.array(eval(f'metric.{x}'))
|
219
|
+
_y = np.array(eval(f'metric.{y}'))
|
220
|
+
threshold = np.array(metric.thresholds)
|
221
|
+
x_arr.append(_x)
|
222
|
+
y_arr.append(_y)
|
223
|
+
concept_cols = [concept_id for _ in range(len(_x))]
|
224
|
+
outputs.append(_make_df(_x, _y, concept_cols, threshold))
|
225
|
+
|
226
|
+
avg_x = np.mean(x_arr, axis=0)
|
227
|
+
avg_y = np.mean(y_arr, axis=0)
|
228
|
+
if np.isnan(avg_x).all():
|
229
|
+
return None
|
230
|
+
else:
|
231
|
+
avg_cols = [MACRO_AVG for _ in range(len(avg_x))]
|
232
|
+
outputs.append(_make_df(avg_x, avg_y, avg_cols, threshold))
|
233
|
+
|
234
|
+
return pd.concat(outputs, axis=0)
|
235
|
+
|
236
|
+
def parse_concept_ids(self, *args, **kwargs) -> List[str]:
|
237
|
+
raise NotImplementedError
|
238
|
+
|
239
|
+
def detailed_summary(self, *args, **kwargs):
|
240
|
+
raise NotImplementedError
|
241
|
+
|
242
|
+
def pr_curve(self, *args, **kwargs):
|
243
|
+
raise NotImplementedError
|
244
|
+
|
245
|
+
def roc_curve(self, *args, **kwargs):
|
246
|
+
raise NotImplementedError
|
247
|
+
|
248
|
+
def confusion_matrix(self, *args, **kwargs):
|
249
|
+
raise NotImplementedError
|
250
|
+
|
251
|
+
|
252
|
+
@dataclass
|
253
|
+
class PlaceholderHandler(_BaseEvalResultHandler):
|
254
|
+
|
255
|
+
def parse_concept_ids(self, *args, **kwargs) -> List[str]:
|
256
|
+
return None
|
257
|
+
|
258
|
+
def detailed_summary(self, *args, **kwargs):
|
259
|
+
return None
|
260
|
+
|
261
|
+
def pr_curve(self, *args, **kwargs):
|
262
|
+
return None
|
263
|
+
|
264
|
+
|
265
|
+
@dataclass
|
266
|
+
class ClassificationResultHandler(_BaseEvalResultHandler):
|
267
|
+
|
268
|
+
def parse_concept_ids(self, index=0) -> List[str]:
|
269
|
+
eval_data = self.get_eval_data(metric_name='label_counts', index=index)
|
270
|
+
concept_ids = [temp.concept.id for temp in eval_data.positive_label_counts]
|
271
|
+
return concept_ids
|
272
|
+
|
273
|
+
def detailed_summary(self, index=0, confidence_threshold: float = 0.5,
|
274
|
+
**kwargs) -> Union[None, Tuple[pd.DataFrame, pd.DataFrame]]:
|
275
|
+
"""Making detailed table per concept and for total concept
|
276
|
+
|
277
|
+
Args:
|
278
|
+
index (int, optional): Index of eval dataset. Defaults to 0.
|
279
|
+
confidence_threshold (float, optional): confidence threshold. Defaults to 0.5.
|
280
|
+
|
281
|
+
Returns:
|
282
|
+
tuple: concepts dataframe, total dataframe
|
283
|
+
"""
|
284
|
+
eval_data = self.get_eval_data('binary_metrics', index=index)
|
285
|
+
summary = self.get_eval_data('summary', index=index)
|
286
|
+
|
287
|
+
total_labeled = 0
|
288
|
+
total_predicted = 0
|
289
|
+
total_tp = 0
|
290
|
+
total_fn = 0
|
291
|
+
total_fp = 0
|
292
|
+
metrics = []
|
293
|
+
|
294
|
+
for bd in eval_data:
|
295
|
+
concept_id = bd.concept.id
|
296
|
+
if bd.precision_recall_curve.ByteSize() == 0:
|
297
|
+
continue
|
298
|
+
pr_th_index = self.get_threshold_index(
|
299
|
+
list(bd.precision_recall_curve.thresholds), selected_value=confidence_threshold)
|
300
|
+
roc_th_index = self.get_threshold_index(
|
301
|
+
list(bd.roc_curve.thresholds), selected_value=confidence_threshold)
|
302
|
+
if pr_th_index is None or roc_th_index is None:
|
303
|
+
continue
|
304
|
+
num_pos_labeled = bd.num_pos
|
305
|
+
num_neg_labeled = bd.num_neg
|
306
|
+
# TP/(TP+FP)
|
307
|
+
precision = bd.precision_recall_curve.precision[pr_th_index]
|
308
|
+
# TP/(TP+FN)
|
309
|
+
recall = bd.precision_recall_curve.recall[pr_th_index]
|
310
|
+
# FP/(FP+TN)
|
311
|
+
fpr = bd.roc_curve.fpr[roc_th_index]
|
312
|
+
# TP/(TP+FN)
|
313
|
+
tpr = bd.roc_curve.tpr[roc_th_index]
|
314
|
+
# TP+FN
|
315
|
+
tp = int(tpr * num_pos_labeled)
|
316
|
+
fn = num_pos_labeled - tp
|
317
|
+
fp = int(fpr * num_neg_labeled)
|
318
|
+
num_pos_pred = tp + fp
|
319
|
+
f1 = self._f1(recall, precision)
|
320
|
+
|
321
|
+
total_labeled += num_pos_labeled
|
322
|
+
total_predicted += num_pos_pred
|
323
|
+
total_fn += fn
|
324
|
+
total_tp += tp
|
325
|
+
total_fp += fp
|
326
|
+
# roc auc, total labelled, predicted, tp, fn, fp, recall, precision, f1
|
327
|
+
_d = OrderedDict({
|
328
|
+
"Concept": concept_id,
|
329
|
+
"Accuracy (ROC AUC)": round(bd.roc_auc, 3),
|
330
|
+
"Total Labeled": num_pos_labeled,
|
331
|
+
"Total Predicted": num_pos_pred,
|
332
|
+
"True Positives": tp,
|
333
|
+
"False Negatives": fn,
|
334
|
+
"False Positives": fp,
|
335
|
+
"Recall": recall,
|
336
|
+
"Precision": precision,
|
337
|
+
"F1": f1
|
338
|
+
})
|
339
|
+
metrics.append(pd.DataFrame(_d, index=[0]))
|
340
|
+
|
341
|
+
# If no valid data is found, return None
|
342
|
+
if not metrics:
|
343
|
+
return None
|
344
|
+
# Make per concept df
|
345
|
+
df = pd.concat(metrics, axis=0)
|
346
|
+
# Make total df
|
347
|
+
sum_df_total = sum(df["Total Labeled"])
|
348
|
+
precision = sum(df.Precision * df["Total Labeled"]) / sum_df_total if sum_df_total else 0.
|
349
|
+
recall = sum(df.Recall * df["Total Labeled"]) / sum_df_total if sum_df_total else 0.
|
350
|
+
f1 = self._f1(recall, precision)
|
351
|
+
df_total = pd.DataFrame(
|
352
|
+
[
|
353
|
+
[
|
354
|
+
'Total', summary.macro_avg_roc_auc, total_labeled, total_predicted, total_tp,
|
355
|
+
total_fn, total_fp, recall, precision, f1
|
356
|
+
],
|
357
|
+
],
|
358
|
+
columns=df.columns,
|
359
|
+
index=[0])
|
360
|
+
|
361
|
+
return df, df_total
|
362
|
+
|
363
|
+
def pr_curve(self, index=0, **kwargs) -> Union[None, pd.DataFrame]:
|
364
|
+
"""Making PR curve
|
365
|
+
|
366
|
+
Args:
|
367
|
+
index (int, optional): Index of eval dataset. Defaults to 0.
|
368
|
+
|
369
|
+
Returns:
|
370
|
+
dictionary: Keys are concept ids and 'macro_avg'. Values are dictionaries of {precision: np.array, recall: np.array}
|
371
|
+
"""
|
372
|
+
eval_data = self.get_eval_data(metric_name='binary_metrics', index=index)
|
373
|
+
outputs = self._process_curve(
|
374
|
+
eval_data, metric_name='precision_recall_curve', x='recall', y='precision')
|
375
|
+
return outputs
|
376
|
+
|
377
|
+
def roc_curve(self, index=0, **kwargs) -> Union[None, pd.DataFrame]:
|
378
|
+
eval_data = self.get_eval_data(metric_name='binary_metrics', index=index)
|
379
|
+
outputs = self._process_curve(eval_data, metric_name='roc_curve', x='tpr', y='fpr')
|
380
|
+
return outputs
|
381
|
+
|
382
|
+
def confusion_matrix(self, index=0, **kwargs):
|
383
|
+
eval_data = self.get_eval_data(metric_name='confusion_matrix', index=index)
|
384
|
+
concept_ids = self.parse_concept_ids(index)
|
385
|
+
concept_ids.sort()
|
386
|
+
data = np.zeros((len(concept_ids), len(concept_ids)), np.float32)
|
387
|
+
for entry in eval_data.matrix:
|
388
|
+
p = entry.predicted_concept.id
|
389
|
+
a = entry.actual_concept.id
|
390
|
+
if p in concept_ids and a in concept_ids:
|
391
|
+
data[concept_ids.index(a), concept_ids.index(p)] = np.around(entry.value, decimals=3)
|
392
|
+
else:
|
393
|
+
continue
|
394
|
+
rownames = pd.MultiIndex.from_arrays([concept_ids], names=['Actual'])
|
395
|
+
colnames = pd.MultiIndex.from_arrays([concept_ids], names=['Predicted'])
|
396
|
+
df = pd.DataFrame(data, columns=colnames, index=rownames)
|
397
|
+
|
398
|
+
return df
|
399
|
+
|
400
|
+
|
401
|
+
@dataclass
|
402
|
+
class DetectionResultHandler(_BaseEvalResultHandler):
|
403
|
+
AREA_LIST = ["all", "medium", "small"]
|
404
|
+
IOU_LIST = list(np.arange(0.5, 1., 0.1))
|
405
|
+
|
406
|
+
def parse_concept_ids(self, index=0) -> List[str]:
|
407
|
+
eval_data = self.get_eval_data(metric_name='metrics_by_class', index=index)
|
408
|
+
concept_ids = [temp.concept.id for temp in eval_data]
|
409
|
+
return concept_ids
|
410
|
+
|
411
|
+
def detailed_summary(self,
|
412
|
+
index=0,
|
413
|
+
confidence_threshold: float = 0.5,
|
414
|
+
iou_threshold: float = 0.5,
|
415
|
+
area: str = "all",
|
416
|
+
bypass_const: bool = False,
|
417
|
+
**kwargs):
|
418
|
+
if not bypass_const:
|
419
|
+
assert iou_threshold in self.IOU_LIST, f"Expected iou_threshold in {self.IOU_LIST}, got {iou_threshold}"
|
420
|
+
assert area in self.AREA_LIST, f"Expected area in {self.AREA_LIST}, got {area}"
|
421
|
+
|
422
|
+
eval_data = self.get_eval_data('metrics_by_class', index=index)
|
423
|
+
#summary = self.get_eval_data('summary', index=index)
|
424
|
+
metrics = []
|
425
|
+
for bd in eval_data:
|
426
|
+
# total label
|
427
|
+
_iou = round(bd.iou, 1)
|
428
|
+
if not (area and bd.area_name == area) or not (iou_threshold and iou_threshold == _iou):
|
429
|
+
continue
|
430
|
+
concept_id = bd.concept.id
|
431
|
+
total = round(bd.num_tot, 3)
|
432
|
+
# TP / (TP + FP)
|
433
|
+
if len(bd.precision_recall_curve.precision) > 0:
|
434
|
+
pr_th_index = self.get_threshold_index(
|
435
|
+
list(bd.precision_recall_curve.thresholds), selected_value=confidence_threshold)
|
436
|
+
p = round(bd.precision_recall_curve.precision[pr_th_index], 3)
|
437
|
+
else:
|
438
|
+
p = 0
|
439
|
+
# TP / (TP + FN)
|
440
|
+
if len(bd.precision_recall_curve.recall) > 0:
|
441
|
+
pr_th_index = self.get_threshold_index(
|
442
|
+
list(bd.precision_recall_curve.thresholds), selected_value=confidence_threshold)
|
443
|
+
r = round(bd.precision_recall_curve.recall[pr_th_index], 3)
|
444
|
+
else:
|
445
|
+
r = 0
|
446
|
+
tp = int(round(r * total, 0))
|
447
|
+
fn = total - tp
|
448
|
+
fp = float(tp) / p - tp if p else 0
|
449
|
+
fp = int(round(fp, 1))
|
450
|
+
f1 = self._f1(r, p)
|
451
|
+
_d = {
|
452
|
+
"Concept": concept_id,
|
453
|
+
"Average Precision": round(float(bd.avg_precision), 3),
|
454
|
+
"Total Labeled": total,
|
455
|
+
"True Positives": tp,
|
456
|
+
"False Positives": fp,
|
457
|
+
"False Negatives": fn,
|
458
|
+
"Recall": r,
|
459
|
+
"Precision": p,
|
460
|
+
"F1": f1,
|
461
|
+
}
|
462
|
+
metrics.append(pd.DataFrame(_d, index=[0]))
|
463
|
+
|
464
|
+
if not metrics:
|
465
|
+
return None
|
466
|
+
|
467
|
+
df = pd.concat(metrics, axis=0)
|
468
|
+
df_total = defaultdict()
|
469
|
+
sum_df_total = df["Total Labeled"].sum()
|
470
|
+
df_total["Concept"] = "Total"
|
471
|
+
df_total["Average Precision"] = df["Average Precision"].mean()
|
472
|
+
df_total["Total Labeled"] = sum_df_total
|
473
|
+
df_total["True Positives"] = df["True Positives"].sum()
|
474
|
+
df_total["False Positives"] = df["False Positives"].sum()
|
475
|
+
df_total["False Negatives"] = df["False Negatives"].sum()
|
476
|
+
df_total["Recall"] = sum(
|
477
|
+
df.Recall * df["Total Labeled"]) / sum_df_total if sum_df_total else 0.
|
478
|
+
df_total["Precision"] = df_total["True Positives"] / (
|
479
|
+
df_total["True Positives"] + df_total["False Positives"]) if sum_df_total else 0.
|
480
|
+
df_total["F1"] = self._f1(df_total["Recall"], df_total["Precision"])
|
481
|
+
df_total = pd.DataFrame(df_total, index=[0])
|
482
|
+
|
483
|
+
return [df, df_total]
|
484
|
+
|
485
|
+
def pr_curve(self,
|
486
|
+
index=0,
|
487
|
+
iou_threshold: float = 0.5,
|
488
|
+
area: str = "all",
|
489
|
+
bypass_const=False,
|
490
|
+
**kwargs):
|
491
|
+
|
492
|
+
if not bypass_const:
|
493
|
+
assert iou_threshold in self.IOU_LIST, f"Expected iou_threshold in {self.IOU_LIST}, got {iou_threshold}"
|
494
|
+
assert area in self.AREA_LIST, f"Expected area in {self.AREA_LIST}, got {area}"
|
495
|
+
|
496
|
+
eval_data = self.get_eval_data(metric_name='metrics_by_class', index=index)
|
497
|
+
_valid_eval_data = []
|
498
|
+
for bd in eval_data:
|
499
|
+
_iou = round(bd.iou, 1)
|
500
|
+
if not (area and bd.area_name == area) or not (iou_threshold and iou_threshold == _iou):
|
501
|
+
continue
|
502
|
+
_valid_eval_data.append(bd)
|
503
|
+
|
504
|
+
outputs = self._process_curve(
|
505
|
+
_valid_eval_data, metric_name='precision_recall_curve', x='recall', y='precision')
|
506
|
+
return outputs
|
507
|
+
|
508
|
+
def roc_curve(self, index=0, **kwargs) -> None:
|
509
|
+
return None
|
510
|
+
|
511
|
+
def confusion_matrix(self, index=0, **kwargs) -> None:
|
512
|
+
return None
|
513
|
+
|
514
|
+
|
515
|
+
def make_handler_by_type(model_type: str) -> _BaseEvalResultHandler:
|
516
|
+
_eval_type = get_eval_type(model_type)
|
517
|
+
if _eval_type == EvalType.CLASSIFICATION:
|
518
|
+
return ClassificationResultHandler
|
519
|
+
elif _eval_type == EvalType.DETECTION:
|
520
|
+
return DetectionResultHandler
|
521
|
+
else:
|
522
|
+
return PlaceholderHandler
|
clarifai/utils/logging.py
CHANGED
@@ -99,3 +99,10 @@ def get_logger(logger_level: Union[int, str] = logging.NOTSET,
|
|
99
99
|
|
100
100
|
_configure_logger(name, logger_level)
|
101
101
|
return logging.getLogger(name)
|
102
|
+
|
103
|
+
|
104
|
+
def add_file_handler(logger: logging.Logger, file_path: str, log_level: str = 'WARNING') -> None:
|
105
|
+
"""Add a file handler to the logger."""
|
106
|
+
file_handler = logging.FileHandler(file_path)
|
107
|
+
file_handler.setLevel(log_level)
|
108
|
+
logger.addHandler(file_handler)
|
clarifai/utils/model_train.py
CHANGED
@@ -85,7 +85,7 @@ def response_to_model_params(response: MultiModelTypeResponse,
|
|
85
85
|
return params
|
86
86
|
|
87
87
|
|
88
|
-
def params_parser(params_dict: dict) -> Dict[str, Any]:
|
88
|
+
def params_parser(params_dict: dict, concepts: List = None) -> Dict[str, Any]:
|
89
89
|
"""Converts the params dictionary to a dictionary of model specific params for the given model"""
|
90
90
|
#dict parser
|
91
91
|
train_dict = {}
|
@@ -112,6 +112,8 @@ def params_parser(params_dict: dict) -> Dict[str, Any]:
|
|
112
112
|
train_dict['train_info'] = resources_pb2.TrainInfo(**train_dict['train_info'])
|
113
113
|
|
114
114
|
if 'concepts' in params_dict.keys():
|
115
|
+
assert set(params_dict["concepts"]).issubset(
|
116
|
+
concepts), "Invalid concept IDs. Available concepts in the app are {}".format(concepts)
|
115
117
|
train_dict["output_info"]['data'] = resources_pb2.Data(
|
116
118
|
concepts=[resources_pb2.Concept(id=concept_id) for concept_id in params_dict["concepts"]])
|
117
119
|
if 'inference_params' in params_dict.keys():
|
clarifai/versions.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: clarifai
|
3
|
-
Version: 10.
|
3
|
+
Version: 10.1.1
|
4
4
|
Summary: Clarifai Python SDK
|
5
5
|
Home-page: https://github.com/Clarifai/clarifai-python
|
6
6
|
Author: Clarifai
|
@@ -20,18 +20,15 @@ Classifier: Operating System :: OS Independent
|
|
20
20
|
Requires-Python: >=3.8
|
21
21
|
Description-Content-Type: text/markdown
|
22
22
|
License-File: LICENSE
|
23
|
-
Requires-Dist: clarifai-grpc (~=10.
|
24
|
-
Requires-Dist: pandas (>=1.3.5)
|
23
|
+
Requires-Dist: clarifai-grpc (~=10.1.6)
|
25
24
|
Requires-Dist: numpy (>=1.22.0)
|
26
25
|
Requires-Dist: tqdm (>=4.65.0)
|
27
|
-
Requires-Dist: opencv-python (>=4.7.0.68)
|
28
26
|
Requires-Dist: tritonclient (>=2.34.0)
|
29
27
|
Requires-Dist: rich (>=13.4.2)
|
30
28
|
Requires-Dist: PyYAML (>=6.0.1)
|
31
29
|
Requires-Dist: schema (>=0.7.5)
|
32
30
|
Requires-Dist: Pillow (>=9.5.0)
|
33
|
-
Requires-Dist:
|
34
|
-
Requires-Dist: pypdf (>=3.17.4)
|
31
|
+
Requires-Dist: inquirerpy (==0.3.4)
|
35
32
|
Provides-Extra: all
|
36
33
|
Requires-Dist: pycocotools (==2.0.6) ; extra == 'all'
|
37
34
|
|
@@ -59,7 +56,7 @@ This is the official Python client for interacting with our powerful [API](https
|
|
59
56
|
|
60
57
|
[Website](https://www.clarifai.com/) | [Schedule Demo](https://www.clarifai.com/company/schedule-demo) | [Signup for a Free Account](https://clarifai.com/signup) | [API Docs](https://docs.clarifai.com/) | [Clarifai Community](https://clarifai.com/explore) | [Python SDK Docs](https://docs.clarifai.com/python-sdk/api-reference) | [Examples](https://github.com/Clarifai/examples) | [Colab Notebooks](https://github.com/Clarifai/colab-notebooks) | [Discord](https://discord.gg/XAPE3Vtg)
|
61
58
|
|
62
|
-
|
59
|
+
Give the repo a star ⭐
|
63
60
|
---
|
64
61
|
|
65
62
|
|
@@ -85,6 +82,7 @@ This is the official Python client for interacting with our powerful [API](https
|
|
85
82
|
* [Smart Image Search](#smart-image-search)
|
86
83
|
* [Smart Text Search](#smart-text-search)
|
87
84
|
* [Filters](#filters)
|
85
|
+
* **[Retrieval Augmented Generation (RAG)](#retrieval-augmented-generation-rag)**
|
88
86
|
* **[More Examples](#pushpin-more-examples)**
|
89
87
|
|
90
88
|
|
@@ -107,9 +105,10 @@ Install from Source:
|
|
107
105
|
```bash
|
108
106
|
git clone https://github.com/Clarifai/clarifai-python.git
|
109
107
|
cd clarifai-python
|
110
|
-
python3 -m venv
|
111
|
-
source
|
112
|
-
|
108
|
+
python3 -m venv .venv
|
109
|
+
source .venv/bin/activate
|
110
|
+
pip install -r requirements.txt
|
111
|
+
python setup.py install
|
113
112
|
```
|
114
113
|
|
115
114
|
|
@@ -296,6 +295,35 @@ status = model.training_status(version_id=model_version_id,training_logs=True)
|
|
296
295
|
print(status)
|
297
296
|
```
|
298
297
|
|
298
|
+
#### Evaluate your trained model
|
299
|
+
|
300
|
+
When your model is trained and ready, you can evaluate by the following code
|
301
|
+
|
302
|
+
```python
|
303
|
+
from clarifai.client.model import Model
|
304
|
+
|
305
|
+
model = Model('url')
|
306
|
+
model.evaluate(dataset_id='your-dataset-id')
|
307
|
+
```
|
308
|
+
|
309
|
+
Compare the evaluation results of your models.
|
310
|
+
|
311
|
+
```python
|
312
|
+
from clarifai.client.model import Model
|
313
|
+
from clarifai.client.dataset import Dataset
|
314
|
+
from clarifai.utils.evaluation import EvalResultCompare
|
315
|
+
|
316
|
+
models = ['model url1', 'model url2'] # or [Model(url1), Model(url2)]
|
317
|
+
dataset = 'dataset url' # or Dataset(dataset_url)
|
318
|
+
|
319
|
+
compare = EvalResultCompare(
|
320
|
+
models=models,
|
321
|
+
datasets=dataset,
|
322
|
+
attempt_evaluate=True # attempt evaluate when the model is not evaluated with the dataset
|
323
|
+
)
|
324
|
+
compare.all('output/folder/')
|
325
|
+
```
|
326
|
+
|
299
327
|
#### Models Listing
|
300
328
|
```python
|
301
329
|
# Note: CLARIFAI_PAT must be set as env variable.
|
@@ -430,6 +458,26 @@ Input filters allows to filter by input_type, status of inputs and by inputs_dat
|
|
430
458
|
results = search.query(filters=[{'input_types': ['image', 'text']}])
|
431
459
|
```
|
432
460
|
|
461
|
+
## Retrieval Augmented Generation (RAG)
|
462
|
+
|
463
|
+
You can setup and start your RAG pipeline in 4 lines of code. The setup method automatically creates a new app and the necessary components under the hood. By default it uses the [mistral-7B-Instruct](https://clarifai.com/mistralai/completion/models/mistral-7B-Instruct) model.
|
464
|
+
|
465
|
+
```python
|
466
|
+
from clarifai.rag import RAG
|
467
|
+
|
468
|
+
rag_agent = RAG.setup(user_id="USER_ID")
|
469
|
+
rag_agent.upload(folder_path="~/docs")
|
470
|
+
rag_agent.chat(messages=[{"role":"human", "content":"What is Clarifai"}])
|
471
|
+
```
|
472
|
+
|
473
|
+
If you have previously run the setup method, you can instantiate the RAG class with the prompter workflow URL:
|
474
|
+
|
475
|
+
```python
|
476
|
+
from clarifai.rag import RAG
|
477
|
+
|
478
|
+
rag_agent = RAG(workflow_url="WORKFLOW_URL")
|
479
|
+
```
|
480
|
+
|
433
481
|
## :pushpin: More Examples
|
434
482
|
|
435
483
|
See many more code examples in this [repo](https://github.com/Clarifai/examples).
|