clarifai 10.0.1__py3-none-any.whl → 10.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clarifai/client/app.py +23 -43
- clarifai/client/base.py +46 -4
- clarifai/client/dataset.py +85 -33
- clarifai/client/input.py +35 -7
- clarifai/client/model.py +192 -11
- clarifai/client/module.py +8 -6
- clarifai/client/runner.py +3 -1
- clarifai/client/search.py +6 -3
- clarifai/client/user.py +14 -12
- clarifai/client/workflow.py +8 -5
- clarifai/datasets/upload/features.py +3 -0
- clarifai/datasets/upload/image.py +57 -26
- clarifai/datasets/upload/loaders/README.md +3 -4
- clarifai/datasets/upload/loaders/xview_detection.py +9 -5
- clarifai/datasets/upload/utils.py +23 -7
- clarifai/models/model_serving/README.md +113 -121
- clarifai/models/model_serving/__init__.py +2 -0
- clarifai/models/model_serving/cli/_utils.py +53 -0
- clarifai/models/model_serving/cli/base.py +14 -0
- clarifai/models/model_serving/cli/build.py +79 -0
- clarifai/models/model_serving/cli/clarifai_clis.py +33 -0
- clarifai/models/model_serving/cli/create.py +171 -0
- clarifai/models/model_serving/cli/example_cli.py +34 -0
- clarifai/models/model_serving/cli/login.py +26 -0
- clarifai/models/model_serving/cli/upload.py +182 -0
- clarifai/models/model_serving/constants.py +20 -0
- clarifai/models/model_serving/docs/cli.md +150 -0
- clarifai/models/model_serving/docs/concepts.md +229 -0
- clarifai/models/model_serving/docs/dependencies.md +1 -1
- clarifai/models/model_serving/docs/inference_parameters.md +112 -107
- clarifai/models/model_serving/docs/model_types.md +16 -17
- clarifai/models/model_serving/model_config/__init__.py +4 -2
- clarifai/models/model_serving/model_config/base.py +369 -0
- clarifai/models/model_serving/model_config/config.py +219 -224
- clarifai/models/model_serving/model_config/inference_parameter.py +5 -0
- clarifai/models/model_serving/model_config/model_types_config/multimodal-embedder.yaml +25 -24
- clarifai/models/model_serving/model_config/model_types_config/text-classifier.yaml +19 -18
- clarifai/models/model_serving/model_config/model_types_config/text-embedder.yaml +20 -18
- clarifai/models/model_serving/model_config/model_types_config/text-to-image.yaml +19 -18
- clarifai/models/model_serving/model_config/model_types_config/text-to-text.yaml +19 -18
- clarifai/models/model_serving/model_config/model_types_config/visual-classifier.yaml +22 -18
- clarifai/models/model_serving/model_config/model_types_config/visual-detector.yaml +32 -28
- clarifai/models/model_serving/model_config/model_types_config/visual-embedder.yaml +19 -18
- clarifai/models/model_serving/model_config/model_types_config/visual-segmenter.yaml +19 -18
- clarifai/models/model_serving/{models → model_config}/output.py +8 -0
- clarifai/models/model_serving/model_config/triton/__init__.py +14 -0
- clarifai/models/model_serving/model_config/{serializer.py → triton/serializer.py} +3 -1
- clarifai/models/model_serving/model_config/triton/triton_config.py +182 -0
- clarifai/models/model_serving/{models/model_types.py → model_config/triton/wrappers.py} +4 -4
- clarifai/models/model_serving/{models → repo_build}/__init__.py +2 -0
- clarifai/models/model_serving/repo_build/build.py +198 -0
- clarifai/models/model_serving/repo_build/static_files/_requirements.txt +2 -0
- clarifai/models/model_serving/repo_build/static_files/base_test.py +169 -0
- clarifai/models/model_serving/repo_build/static_files/inference.py +26 -0
- clarifai/models/model_serving/repo_build/static_files/sample_clarifai_config.yaml +25 -0
- clarifai/models/model_serving/repo_build/static_files/test.py +40 -0
- clarifai/models/model_serving/{models/pb_model.py → repo_build/static_files/triton/model.py} +15 -14
- clarifai/models/model_serving/utils.py +21 -0
- clarifai/rag/rag.py +67 -23
- clarifai/rag/utils.py +21 -5
- clarifai/utils/evaluation/__init__.py +427 -0
- clarifai/utils/evaluation/helpers.py +522 -0
- clarifai/utils/logging.py +7 -0
- clarifai/utils/model_train.py +3 -1
- clarifai/versions.py +1 -1
- {clarifai-10.0.1.dist-info → clarifai-10.1.1.dist-info}/METADATA +58 -10
- clarifai-10.1.1.dist-info/RECORD +115 -0
- clarifai-10.1.1.dist-info/entry_points.txt +2 -0
- clarifai/datasets/upload/loaders/coco_segmentation.py +0 -98
- clarifai/models/model_serving/cli/deploy_cli.py +0 -123
- clarifai/models/model_serving/cli/model_zip.py +0 -61
- clarifai/models/model_serving/cli/repository.py +0 -89
- clarifai/models/model_serving/docs/custom_config.md +0 -33
- clarifai/models/model_serving/docs/output.md +0 -28
- clarifai/models/model_serving/models/default_test.py +0 -281
- clarifai/models/model_serving/models/inference.py +0 -50
- clarifai/models/model_serving/models/test.py +0 -64
- clarifai/models/model_serving/pb_model_repository.py +0 -108
- clarifai-10.0.1.dist-info/RECORD +0 -103
- clarifai-10.0.1.dist-info/entry_points.txt +0 -4
- {clarifai-10.0.1.dist-info → clarifai-10.1.1.dist-info}/LICENSE +0 -0
- {clarifai-10.0.1.dist-info → clarifai-10.1.1.dist-info}/WHEEL +0 -0
- {clarifai-10.0.1.dist-info → clarifai-10.1.1.dist-info}/top_level.txt +0 -0
clarifai/client/app.py
CHANGED
@@ -32,6 +32,7 @@ class App(Lister, BaseClient):
|
|
32
32
|
app_id: str = None,
|
33
33
|
base_url: str = "https://api.clarifai.com",
|
34
34
|
pat: str = None,
|
35
|
+
token: str = None,
|
35
36
|
**kwargs):
|
36
37
|
"""Initializes an App object.
|
37
38
|
|
@@ -40,6 +41,7 @@ class App(Lister, BaseClient):
|
|
40
41
|
app_id (str): The App ID for the App to interact with.
|
41
42
|
base_url (str): Base API url. Default "https://api.clarifai.com"
|
42
43
|
pat (str): A personal access token for authentication. Can be set as env var CLARIFAI_PAT
|
44
|
+
token (str): A session token for authentication. Accepts either a session token or a pat. Can be set as env var CLARIFAI_SESSION_TOKEN
|
43
45
|
**kwargs: Additional keyword arguments to be passed to the App.
|
44
46
|
- name (str): The name of the app.
|
45
47
|
- description (str): The description of the app.
|
@@ -52,7 +54,8 @@ class App(Lister, BaseClient):
|
|
52
54
|
self.kwargs = {**kwargs, 'id': app_id}
|
53
55
|
self.app_info = resources_pb2.App(**self.kwargs)
|
54
56
|
self.logger = get_logger(logger_level="INFO", name=__name__)
|
55
|
-
BaseClient.__init__(
|
57
|
+
BaseClient.__init__(
|
58
|
+
self, user_id=self.user_id, app_id=self.id, base=base_url, pat=pat, token=token)
|
56
59
|
Lister.__init__(self)
|
57
60
|
|
58
61
|
def list_datasets(self, page_no: int = None,
|
@@ -85,7 +88,7 @@ class App(Lister, BaseClient):
|
|
85
88
|
for dataset_info in all_datasets_info:
|
86
89
|
if 'version' in list(dataset_info.keys()):
|
87
90
|
del dataset_info['version']['metrics']
|
88
|
-
yield Dataset(
|
91
|
+
yield Dataset.from_auth_helper(auth=self.auth_helper, **dataset_info)
|
89
92
|
|
90
93
|
def list_models(self,
|
91
94
|
filter_by: Dict[str, Any] = {},
|
@@ -126,7 +129,7 @@ class App(Lister, BaseClient):
|
|
126
129
|
if only_in_app:
|
127
130
|
if model_info['app_id'] != self.id:
|
128
131
|
continue
|
129
|
-
yield Model(
|
132
|
+
yield Model.from_auth_helper(auth=self.auth_helper, **model_info)
|
130
133
|
|
131
134
|
def list_workflows(self,
|
132
135
|
filter_by: Dict[str, Any] = {},
|
@@ -165,7 +168,7 @@ class App(Lister, BaseClient):
|
|
165
168
|
if only_in_app:
|
166
169
|
if workflow_info['app_id'] != self.id:
|
167
170
|
continue
|
168
|
-
yield Workflow(
|
171
|
+
yield Workflow.from_auth_helper(auth=self.auth_helper, **workflow_info)
|
169
172
|
|
170
173
|
def list_modules(self,
|
171
174
|
filter_by: Dict[str, Any] = {},
|
@@ -204,7 +207,7 @@ class App(Lister, BaseClient):
|
|
204
207
|
if only_in_app:
|
205
208
|
if module_info['app_id'] != self.id:
|
206
209
|
continue
|
207
|
-
yield Module(
|
210
|
+
yield Module.from_auth_helper(auth=self.auth_helper, **module_info)
|
208
211
|
|
209
212
|
def list_installed_module_versions(self,
|
210
213
|
filter_by: Dict[str, Any] = {},
|
@@ -239,11 +242,8 @@ class App(Lister, BaseClient):
|
|
239
242
|
for imv_info in all_imv_infos:
|
240
243
|
del imv_info['deploy_url']
|
241
244
|
del imv_info['installed_module_version_id'] # TODO: remove this after the backend fix
|
242
|
-
yield Module(
|
243
|
-
module_id=imv_info['module_version']['module_id'],
|
244
|
-
base_url=self.base,
|
245
|
-
pat=self.pat,
|
246
|
-
**imv_info)
|
245
|
+
yield Module.from_auth_helper(
|
246
|
+
auth=self.auth_helper, module_id=imv_info['module_version']['module_id'], **imv_info)
|
247
247
|
|
248
248
|
def list_concepts(self, page_no: int = None,
|
249
249
|
per_page: int = None) -> Generator[Concept, None, None]:
|
@@ -308,14 +308,8 @@ class App(Lister, BaseClient):
|
|
308
308
|
if response.status.code != status_code_pb2.SUCCESS:
|
309
309
|
raise Exception(response.status)
|
310
310
|
self.logger.info("\nDataset created\n%s", response.status)
|
311
|
-
kwargs.update({
|
312
|
-
'app_id': self.id,
|
313
|
-
'user_id': self.user_id,
|
314
|
-
'base_url': self.base,
|
315
|
-
'pat': self.pat
|
316
|
-
})
|
317
311
|
|
318
|
-
return Dataset(dataset_id=dataset_id, **kwargs)
|
312
|
+
return Dataset.from_auth_helper(self.auth_helper, dataset_id=dataset_id, **kwargs)
|
319
313
|
|
320
314
|
def create_model(self, model_id: str, **kwargs) -> Model:
|
321
315
|
"""Creates a model for the app.
|
@@ -339,14 +333,11 @@ class App(Lister, BaseClient):
|
|
339
333
|
raise Exception(response.status)
|
340
334
|
self.logger.info("\nModel created\n%s", response.status)
|
341
335
|
kwargs.update({
|
342
|
-
'
|
343
|
-
'user_id': self.user_id,
|
336
|
+
'model_id': model_id,
|
344
337
|
'model_type_id': response.model.model_type_id,
|
345
|
-
'base_url': self.base,
|
346
|
-
'pat': self.pat
|
347
338
|
})
|
348
339
|
|
349
|
-
return Model(
|
340
|
+
return Model.from_auth_helper(auth=self.auth_helper, **kwargs)
|
350
341
|
|
351
342
|
def create_workflow(self,
|
352
343
|
config_filepath: str,
|
@@ -436,9 +427,8 @@ class App(Lister, BaseClient):
|
|
436
427
|
display_workflow_tree(dict_response["workflows"][0]["nodes"])
|
437
428
|
kwargs = self.process_response_keys(dict_response[list(dict_response.keys())[1]][0],
|
438
429
|
"workflow")
|
439
|
-
kwargs.update({'base_url': self.base, 'pat': self.pat})
|
440
430
|
|
441
|
-
return Workflow(**kwargs)
|
431
|
+
return Workflow.from_auth_helper(auth=self.auth_helper, **kwargs)
|
442
432
|
|
443
433
|
def create_module(self, module_id: str, description: str, **kwargs) -> Module:
|
444
434
|
"""Creates a module for the app.
|
@@ -464,14 +454,8 @@ class App(Lister, BaseClient):
|
|
464
454
|
if response.status.code != status_code_pb2.SUCCESS:
|
465
455
|
raise Exception(response.status)
|
466
456
|
self.logger.info("\nModule created\n%s", response.status)
|
467
|
-
kwargs.update({
|
468
|
-
'app_id': self.id,
|
469
|
-
'user_id': self.user_id,
|
470
|
-
'base_url': self.base,
|
471
|
-
'pat': self.pat
|
472
|
-
})
|
473
457
|
|
474
|
-
return Module(module_id=module_id, **kwargs)
|
458
|
+
return Module.from_auth_helper(auth=self.auth_helper, module_id=module_id, **kwargs)
|
475
459
|
|
476
460
|
def dataset(self, dataset_id: str, **kwargs) -> Dataset:
|
477
461
|
"""Returns a Dataset object for the existing dataset ID.
|
@@ -496,8 +480,7 @@ class App(Lister, BaseClient):
|
|
496
480
|
kwargs = self.process_response_keys(dict_response[list(dict_response.keys())[1]],
|
497
481
|
list(dict_response.keys())[1])
|
498
482
|
kwargs['version'] = response.dataset.version if response.dataset.version else None
|
499
|
-
|
500
|
-
return Dataset(**kwargs)
|
483
|
+
return Dataset.from_auth_helper(auth=self.auth_helper, **kwargs)
|
501
484
|
|
502
485
|
def model(self, model_id: str, model_version_id: str = "", **kwargs) -> Model:
|
503
486
|
"""Returns a Model object for the existing model ID.
|
@@ -532,9 +515,8 @@ class App(Lister, BaseClient):
|
|
532
515
|
kwargs = self.process_response_keys(dict_response['model'], 'model')
|
533
516
|
kwargs[
|
534
517
|
'model_version'] = response.model.model_version if response.model.model_version else None
|
535
|
-
kwargs.update({'base_url': self.base, 'pat': self.pat})
|
536
518
|
|
537
|
-
return Model(**kwargs)
|
519
|
+
return Model.from_auth_helper(self.auth_helper, **kwargs)
|
538
520
|
|
539
521
|
def workflow(self, workflow_id: str, **kwargs) -> Workflow:
|
540
522
|
"""Returns a workflow object for the existing workflow ID.
|
@@ -558,9 +540,8 @@ class App(Lister, BaseClient):
|
|
558
540
|
dict_response = MessageToDict(response, preserving_proto_field_name=True)
|
559
541
|
kwargs = self.process_response_keys(dict_response[list(dict_response.keys())[1]],
|
560
542
|
list(dict_response.keys())[1])
|
561
|
-
kwargs.update({'base_url': self.base, 'pat': self.pat})
|
562
543
|
|
563
|
-
return Workflow(**kwargs)
|
544
|
+
return Workflow.from_auth_helper(auth=self.auth_helper, **kwargs)
|
564
545
|
|
565
546
|
def module(self, module_id: str, module_version_id: str = "", **kwargs) -> Module:
|
566
547
|
"""Returns a Module object for the existing module ID.
|
@@ -585,9 +566,8 @@ class App(Lister, BaseClient):
|
|
585
566
|
raise Exception(response.status)
|
586
567
|
dict_response = MessageToDict(response, preserving_proto_field_name=True)
|
587
568
|
kwargs = self.process_response_keys(dict_response['module'], 'module')
|
588
|
-
kwargs.update({'base_url': self.base, 'pat': self.pat})
|
589
569
|
|
590
|
-
return Module(**kwargs)
|
570
|
+
return Module.from_auth_helper(auth=self.auth_helper, **kwargs)
|
591
571
|
|
592
572
|
def inputs(self,):
|
593
573
|
"""Returns an Input object.
|
@@ -595,7 +575,7 @@ class App(Lister, BaseClient):
|
|
595
575
|
Returns:
|
596
576
|
Inputs: An input object.
|
597
577
|
"""
|
598
|
-
return Inputs(self.
|
578
|
+
return Inputs.from_auth_helper(self.auth_helper)
|
599
579
|
|
600
580
|
def delete_dataset(self, dataset_id: str) -> None:
|
601
581
|
"""Deletes an dataset for the user.
|
@@ -684,9 +664,9 @@ class App(Lister, BaseClient):
|
|
684
664
|
>>> app = App(app_id="app_id", user_id="user_id")
|
685
665
|
>>> search_client = app.search(top_k=12, metric="euclidean")
|
686
666
|
"""
|
687
|
-
|
688
|
-
|
689
|
-
return Search(
|
667
|
+
kwargs.get("user_id", self.user_app_id.user_id)
|
668
|
+
kwargs.get("app_id", self.user_app_id.app_id)
|
669
|
+
return Search.from_auth_helper(auth=self.auth_helper, **kwargs)
|
690
670
|
|
691
671
|
def __getattr__(self, name):
|
692
672
|
return getattr(self.app_info, name)
|
clarifai/client/base.py
CHANGED
@@ -7,7 +7,7 @@ from google.protobuf.wrappers_pb2 import BoolValue
|
|
7
7
|
|
8
8
|
from clarifai.client.auth import create_stub
|
9
9
|
from clarifai.client.auth.helper import ClarifaiAuthHelper
|
10
|
-
from clarifai.errors import ApiError
|
10
|
+
from clarifai.errors import ApiError, UserError
|
11
11
|
from clarifai.utils.misc import get_from_dict_or_env
|
12
12
|
|
13
13
|
|
@@ -19,9 +19,11 @@ class BaseClient:
|
|
19
19
|
- user_id (str): A user ID for authentication.
|
20
20
|
- app_id (str): An app ID for the application to interact with.
|
21
21
|
- pat (str): A personal access token for authentication.
|
22
|
+
- token (str): A session token for authentication. Accepts either a session token or a pat.
|
22
23
|
- base (str): The base URL for the API endpoint. Defaults to 'https://api.clarifai.com'.
|
23
24
|
- ui (str): The URL for the UI. Defaults to 'https://clarifai.com'.
|
24
25
|
|
26
|
+
|
25
27
|
Attributes:
|
26
28
|
auth_helper (ClarifaiAuthHelper): An instance of ClarifaiAuthHelper for authentication.
|
27
29
|
STUB (Stub): The gRPC Stub object for API interaction.
|
@@ -31,15 +33,53 @@ class BaseClient:
|
|
31
33
|
"""
|
32
34
|
|
33
35
|
def __init__(self, **kwargs):
|
34
|
-
pat =
|
35
|
-
|
36
|
+
token, pat = "", ""
|
37
|
+
try:
|
38
|
+
pat = get_from_dict_or_env(key="pat", env_key="CLARIFAI_PAT", **kwargs)
|
39
|
+
except UserError:
|
40
|
+
token = get_from_dict_or_env(key="token", env_key="CLARIFAI_SESSION_TOKEN", **kwargs)
|
41
|
+
finally:
|
42
|
+
assert token or pat, Exception(
|
43
|
+
"Need 'pat' or 'token' in args or use one of the CLARIFAI_PAT or CLARIFAI_SESSION_TOKEN env vars"
|
44
|
+
)
|
45
|
+
kwargs.update({'token': token, 'pat': pat})
|
46
|
+
|
36
47
|
self.auth_helper = ClarifaiAuthHelper(**kwargs, validate=False)
|
37
48
|
self.STUB = create_stub(self.auth_helper)
|
38
49
|
self.metadata = self.auth_helper.metadata
|
39
50
|
self.pat = self.auth_helper.pat
|
51
|
+
self.token = self.auth_helper._token
|
40
52
|
self.user_app_id = self.auth_helper.get_user_app_id_proto()
|
41
53
|
self.base = self.auth_helper.base
|
42
54
|
|
55
|
+
@classmethod
|
56
|
+
def from_auth_helper(cls, auth: ClarifaiAuthHelper, **kwargs):
|
57
|
+
default_kwargs = {
|
58
|
+
"user_id": kwargs.get("user_id", None) or auth.user_id,
|
59
|
+
"app_id": kwargs.get("app_id", None) or auth.app_id,
|
60
|
+
"pat": kwargs.get("pat", None) or auth.pat,
|
61
|
+
"token": kwargs.get("token", None) or auth._token,
|
62
|
+
}
|
63
|
+
_base = kwargs.get("base", None) or auth.base
|
64
|
+
_clss = cls.__mro__[0]
|
65
|
+
if _clss == BaseClient:
|
66
|
+
kwargs = {
|
67
|
+
**default_kwargs,
|
68
|
+
"base": _base, # Baseclient uses `base`
|
69
|
+
"ui": kwargs.get("ui", None) or auth.ui
|
70
|
+
}
|
71
|
+
else:
|
72
|
+
# Remove user_id and app_id if a custom URL is provided
|
73
|
+
if kwargs.get("url"):
|
74
|
+
default_kwargs.pop("user_id", "")
|
75
|
+
default_kwargs.pop("app_id", "")
|
76
|
+
# Remove app_id if the class name contains "Runner"
|
77
|
+
if 'Runner' in _clss.__name__:
|
78
|
+
default_kwargs.pop("app_id", "")
|
79
|
+
kwargs.update({**default_kwargs, "base_url": _base})
|
80
|
+
|
81
|
+
return cls(**kwargs)
|
82
|
+
|
43
83
|
def _grpc_request(self, method: Callable, argument: Any):
|
44
84
|
"""Makes a gRPC request to the API.
|
45
85
|
|
@@ -52,7 +92,7 @@ class BaseClient:
|
|
52
92
|
"""
|
53
93
|
|
54
94
|
try:
|
55
|
-
res = method(argument)
|
95
|
+
res = method(argument, metadata=self.auth_helper.metadata)
|
56
96
|
# MessageToDict(res) TODO global debug logger
|
57
97
|
return res
|
58
98
|
except ApiError:
|
@@ -118,6 +158,8 @@ class BaseClient:
|
|
118
158
|
value_s = struct_pb2.Struct()
|
119
159
|
value_s.update(value)
|
120
160
|
value = value_s
|
161
|
+
elif key == 'metrics':
|
162
|
+
continue
|
121
163
|
elif key in ['metadata']:
|
122
164
|
if isinstance(value, dict) and value != {}:
|
123
165
|
value_s = struct_pb2.Struct()
|
clarifai/client/dataset.py
CHANGED
@@ -2,11 +2,13 @@ import os
|
|
2
2
|
import time
|
3
3
|
import uuid
|
4
4
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
5
|
+
from datetime import datetime
|
5
6
|
from multiprocessing import cpu_count
|
6
|
-
from typing import Generator, List, Tuple, Type, TypeVar, Union
|
7
|
+
from typing import Dict, Generator, List, Optional, Tuple, Type, TypeVar, Union
|
7
8
|
|
8
9
|
import requests
|
9
10
|
from clarifai_grpc.grpc.api import resources_pb2, service_pb2
|
11
|
+
from clarifai_grpc.grpc.api.service_pb2 import MultiInputResponse
|
10
12
|
from clarifai_grpc.grpc.api.status import status_code_pb2, status_pb2
|
11
13
|
from google.protobuf.json_format import MessageToDict
|
12
14
|
from requests.adapters import HTTPAdapter, Retry
|
@@ -25,7 +27,7 @@ from clarifai.datasets.upload.text import TextClassificationDataset
|
|
25
27
|
from clarifai.datasets.upload.utils import DisplayUploadStatus
|
26
28
|
from clarifai.errors import UserError
|
27
29
|
from clarifai.urls.helper import ClarifaiUrlHelper
|
28
|
-
from clarifai.utils.logging import get_logger
|
30
|
+
from clarifai.utils.logging import add_file_handler, get_logger
|
29
31
|
from clarifai.utils.misc import BackoffIterator, Chunker
|
30
32
|
|
31
33
|
ClarifaiDatasetType = TypeVar('ClarifaiDatasetType', VisualClassificationDataset,
|
@@ -41,6 +43,7 @@ class Dataset(Lister, BaseClient):
|
|
41
43
|
dataset_id: str = None,
|
42
44
|
base_url: str = "https://api.clarifai.com",
|
43
45
|
pat: str = None,
|
46
|
+
token: str = None,
|
44
47
|
**kwargs):
|
45
48
|
"""Initializes a Dataset object.
|
46
49
|
|
@@ -49,6 +52,7 @@ class Dataset(Lister, BaseClient):
|
|
49
52
|
dataset_id (str): The Dataset ID within the App to interact with.
|
50
53
|
base_url (str): Base API url. Default "https://api.clarifai.com"
|
51
54
|
pat (str): A personal access token for authentication. Can be set as env var CLARIFAI_PAT
|
55
|
+
token (str): A session token for authentication. Accepts either a session token or a pat. Can be set as env var CLARIFAI_SESSION_TOKEN
|
52
56
|
**kwargs: Additional keyword arguments to be passed to the Dataset.
|
53
57
|
"""
|
54
58
|
if url and dataset_id:
|
@@ -64,9 +68,10 @@ class Dataset(Lister, BaseClient):
|
|
64
68
|
self.max_retires = 10
|
65
69
|
self.batch_size = 128 # limit max protos in a req
|
66
70
|
self.task = None # Upload dataset type
|
67
|
-
self.input_object = Inputs(user_id=self.user_id, app_id=self.app_id, pat=pat)
|
68
|
-
self.logger = get_logger(logger_level="INFO")
|
69
|
-
BaseClient.__init__(
|
71
|
+
self.input_object = Inputs(user_id=self.user_id, app_id=self.app_id, pat=pat, token=token)
|
72
|
+
self.logger = get_logger(logger_level="INFO", name=__name__)
|
73
|
+
BaseClient.__init__(
|
74
|
+
self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat, token=token)
|
70
75
|
Lister.__init__(self)
|
71
76
|
|
72
77
|
def create_version(self, **kwargs) -> 'Dataset':
|
@@ -96,13 +101,10 @@ class Dataset(Lister, BaseClient):
|
|
96
101
|
self.logger.info("\nDataset Version created\n%s", response.status)
|
97
102
|
kwargs.update({
|
98
103
|
'dataset_id': self.id,
|
99
|
-
'app_id': self.app_id,
|
100
|
-
'user_id': self.user_id,
|
101
104
|
'version': response.dataset_versions[0],
|
102
|
-
'base_url': self.base,
|
103
|
-
'pat': self.pat
|
104
105
|
})
|
105
|
-
|
106
|
+
|
107
|
+
return Dataset.from_auth_helper(self.auth_helper, **kwargs)
|
106
108
|
|
107
109
|
def delete_version(self, version_id: str) -> None:
|
108
110
|
"""Deletes a dataset version for the Dataset.
|
@@ -160,13 +162,9 @@ class Dataset(Lister, BaseClient):
|
|
160
162
|
del dataset_version_info['metrics']
|
161
163
|
kwargs = {
|
162
164
|
'dataset_id': self.id,
|
163
|
-
'app_id': self.app_id,
|
164
|
-
'user_id': self.user_id,
|
165
165
|
'version': resources_pb2.DatasetVersion(**dataset_version_info),
|
166
|
-
'base_url': self.base,
|
167
|
-
'pat': self.pat
|
168
166
|
}
|
169
|
-
yield Dataset(**kwargs)
|
167
|
+
yield Dataset.from_auth_helper(self.auth_helper, **kwargs)
|
170
168
|
|
171
169
|
def _concurrent_annot_upload(self, annots: List[List[resources_pb2.Annotation]]
|
172
170
|
) -> Union[List[resources_pb2.Annotation], List[None]]:
|
@@ -194,13 +192,17 @@ class Dataset(Lister, BaseClient):
|
|
194
192
|
|
195
193
|
return retry_annot_upload
|
196
194
|
|
197
|
-
def _delete_failed_inputs(
|
198
|
-
|
195
|
+
def _delete_failed_inputs(
|
196
|
+
self,
|
197
|
+
batch_input_ids: List[int],
|
198
|
+
dataset_obj: ClarifaiDatasetType,
|
199
|
+
upload_response: MultiInputResponse = None) -> Tuple[List[int], List[int]]:
|
199
200
|
"""Delete failed input ids from clarifai platform dataset.
|
200
201
|
|
201
202
|
Args:
|
202
203
|
batch_input_ids: batch input ids
|
203
204
|
dataset_obj: ClarifaiDataset object
|
205
|
+
upload_response: upload response proto
|
204
206
|
|
205
207
|
Returns:
|
206
208
|
success_inputs: upload success input ids
|
@@ -220,7 +222,19 @@ class Dataset(Lister, BaseClient):
|
|
220
222
|
success_inputs = response_dict.get('inputs', [])
|
221
223
|
|
222
224
|
success_input_ids = [input.get('id') for input in success_inputs]
|
223
|
-
failed_input_ids = list(set(input_ids) - set(success_input_ids))
|
225
|
+
failed_input_ids = list(set(input_ids) - set(success_input_ids.copy()))
|
226
|
+
#check duplicate input ids
|
227
|
+
duplicate_input_ids = [
|
228
|
+
input.id for input in upload_response.inputs
|
229
|
+
if input.status.details == 'Input has a duplicate ID.'
|
230
|
+
] #handling duplicte ID failures.
|
231
|
+
if duplicate_input_ids:
|
232
|
+
success_input_ids = list(set(success_input_ids.copy()) - set(duplicate_input_ids.copy()))
|
233
|
+
failed_input_ids = list(set(failed_input_ids) - set(duplicate_input_ids))
|
234
|
+
self.logger.warning(
|
235
|
+
f"Upload Failed for {len(duplicate_input_ids)} inputs in current batch: Duplicate input ids: {duplicate_input_ids}"
|
236
|
+
)
|
237
|
+
|
224
238
|
#delete failed inputs
|
225
239
|
self._grpc_request(
|
226
240
|
self.STUB.DeleteInputs,
|
@@ -228,8 +242,9 @@ class Dataset(Lister, BaseClient):
|
|
228
242
|
)
|
229
243
|
return [input_ids[id] for id in success_input_ids], [input_ids[id] for id in failed_input_ids]
|
230
244
|
|
231
|
-
def _upload_inputs_annotations(
|
232
|
-
|
245
|
+
def _upload_inputs_annotations(
|
246
|
+
self, batch_input_ids: List[int], dataset_obj: ClarifaiDatasetType
|
247
|
+
) -> Tuple[List[int], List[resources_pb2.Annotation], MultiInputResponse]:
|
233
248
|
"""Uploads batch of inputs and annotations concurrently to clarifai platform dataset.
|
234
249
|
|
235
250
|
Args:
|
@@ -239,20 +254,22 @@ class Dataset(Lister, BaseClient):
|
|
239
254
|
Returns:
|
240
255
|
failed_input_ids: failed input ids
|
241
256
|
retry_annot_protos: failed annot protos
|
257
|
+
response: upload response proto
|
242
258
|
"""
|
243
259
|
input_protos, _ = dataset_obj.get_protos(batch_input_ids)
|
244
|
-
input_job_id = self.input_object.upload_inputs(inputs=input_protos, show_log=False)
|
260
|
+
input_job_id, _response = self.input_object.upload_inputs(inputs=input_protos, show_log=False)
|
245
261
|
retry_annot_protos = []
|
246
262
|
|
247
263
|
self.input_object._wait_for_inputs(input_job_id)
|
248
|
-
success_input_ids, failed_input_ids = self._delete_failed_inputs(batch_input_ids, dataset_obj
|
264
|
+
success_input_ids, failed_input_ids = self._delete_failed_inputs(batch_input_ids, dataset_obj,
|
265
|
+
_response)
|
249
266
|
|
250
|
-
if self.task in ["visual_detection", "visual_segmentation"]:
|
267
|
+
if self.task in ["visual_detection", "visual_segmentation"] and success_input_ids:
|
251
268
|
_, annotation_protos = dataset_obj.get_protos(success_input_ids)
|
252
269
|
chunked_annotation_protos = Chunker(annotation_protos, self.batch_size).chunk()
|
253
270
|
retry_annot_protos.extend(self._concurrent_annot_upload(chunked_annotation_protos))
|
254
271
|
|
255
|
-
return failed_input_ids, retry_annot_protos
|
272
|
+
return failed_input_ids, retry_annot_protos, _response
|
256
273
|
|
257
274
|
def _retry_uploads(self, failed_input_ids: List[int],
|
258
275
|
retry_annot_protos: List[resources_pb2.Annotation],
|
@@ -265,7 +282,25 @@ class Dataset(Lister, BaseClient):
|
|
265
282
|
dataset_obj: ClarifaiDataset object
|
266
283
|
"""
|
267
284
|
if failed_input_ids:
|
268
|
-
|
285
|
+
retry_input_ids = [dataset_obj.all_input_ids[id] for id in failed_input_ids]
|
286
|
+
#Log Retrying inputs
|
287
|
+
self.logger.warning(
|
288
|
+
f"Retrying upload for {len(failed_input_ids)} inputs in current batch: {retry_input_ids}"
|
289
|
+
)
|
290
|
+
failed_retrying_inputs, _, retry_response = self._upload_inputs_annotations(
|
291
|
+
failed_input_ids, dataset_obj)
|
292
|
+
#Log failed inputs
|
293
|
+
if failed_retrying_inputs:
|
294
|
+
failed_retrying_input_ids = [
|
295
|
+
dataset_obj.all_input_ids[id] for id in failed_retrying_inputs
|
296
|
+
]
|
297
|
+
failed_inputs_logs = {
|
298
|
+
input.id: input.status.details
|
299
|
+
for input in retry_response.inputs if input.id in failed_retrying_input_ids
|
300
|
+
}
|
301
|
+
self.logger.warning(
|
302
|
+
f"Failed to upload {len(failed_retrying_inputs)} inputs in current batch: {failed_inputs_logs}"
|
303
|
+
)
|
269
304
|
if retry_annot_protos:
|
270
305
|
chunked_annotation_protos = Chunker(retry_annot_protos, self.batch_size).chunk()
|
271
306
|
_ = self._concurrent_annot_upload(chunked_annotation_protos)
|
@@ -287,21 +322,27 @@ class Dataset(Lister, BaseClient):
|
|
287
322
|
]
|
288
323
|
|
289
324
|
for job in as_completed(futures):
|
290
|
-
retry_input_ids, retry_annot_protos = job.result()
|
325
|
+
retry_input_ids, retry_annot_protos, _ = job.result()
|
291
326
|
self._retry_uploads(retry_input_ids, retry_annot_protos, dataset_obj)
|
292
327
|
progress.update()
|
293
328
|
|
294
329
|
def upload_dataset(self,
|
295
330
|
dataloader: Type[ClarifaiDataLoader],
|
296
331
|
batch_size: int = 32,
|
297
|
-
get_upload_status: bool = False
|
332
|
+
get_upload_status: bool = False,
|
333
|
+
log_warnings: bool = False) -> None:
|
298
334
|
"""Uploads a dataset to the app.
|
299
335
|
|
300
336
|
Args:
|
301
337
|
dataloader (Type[ClarifaiDataLoader]): ClarifaiDataLoader object
|
302
338
|
batch_size (int): batch size for concurrent upload of inputs and annotations (max: 128)
|
303
339
|
get_upload_status (bool): True if you want to get the upload status of the dataset
|
340
|
+
log_warnings (bool): True if you want to save log warnings in a file
|
304
341
|
"""
|
342
|
+
#add file handler to log warnings
|
343
|
+
if log_warnings:
|
344
|
+
add_file_handler(self.logger, f"Dataset_Upload{str(int(datetime.now().timestamp()))}.log")
|
345
|
+
#set batch size and task
|
305
346
|
self.batch_size = min(self.batch_size, batch_size)
|
306
347
|
self.task = dataloader.task
|
307
348
|
if self.task not in DATASET_UPLOAD_TASKS:
|
@@ -321,10 +362,13 @@ class Dataset(Lister, BaseClient):
|
|
321
362
|
else: # visual_classification & visual_captioning
|
322
363
|
dataset_obj = VisualClassificationDataset(dataloader, self.id)
|
323
364
|
|
365
|
+
if get_upload_status:
|
366
|
+
pre_upload_stats = self.get_upload_status(pre_upload=True)
|
367
|
+
|
324
368
|
self._data_upload(dataset_obj)
|
325
369
|
|
326
370
|
if get_upload_status:
|
327
|
-
self.get_upload_status(dataloader)
|
371
|
+
self.get_upload_status(dataloader=dataloader, pre_upload_stats=pre_upload_stats)
|
328
372
|
|
329
373
|
def upload_from_csv(self,
|
330
374
|
csv_path: str,
|
@@ -398,16 +442,21 @@ class Dataset(Lister, BaseClient):
|
|
398
442
|
folder_path=folder_path, dataset_id=self.id, labels=labels)
|
399
443
|
self.input_object._bulk_upload(inputs=input_protos, batch_size=batch_size)
|
400
444
|
|
401
|
-
def get_upload_status(
|
402
|
-
|
403
|
-
|
404
|
-
|
445
|
+
def get_upload_status(
|
446
|
+
self,
|
447
|
+
dataloader: Type[ClarifaiDataLoader] = None,
|
448
|
+
delete_version: bool = False,
|
449
|
+
timeout: int = 600,
|
450
|
+
pre_upload_stats: Tuple[Dict[str, int], Dict[str, int]] = None,
|
451
|
+
pre_upload: bool = False) -> Optional[Tuple[Dict[str, int], Dict[str, int]]]:
|
405
452
|
"""Creates a new dataset version and displays the upload status of the dataset.
|
406
453
|
|
407
454
|
Args:
|
408
455
|
dataloader (Type[ClarifaiDataLoader]): ClarifaiDataLoader object
|
409
456
|
delete_version (bool): True if you want to delete the version after getting the upload status
|
410
457
|
timeout (int): Timeout in seconds for getting the upload status. Default is 600 seconds.
|
458
|
+
pre_upload_stats (Tuple[Dict[str, int], Dict[str, int]]): The pre upload stats for the dataset.
|
459
|
+
pre_upload (bool): True if you want to get the pre upload stats for the dataset.
|
411
460
|
|
412
461
|
Example:
|
413
462
|
>>> from clarifai.client.dataset import Dataset
|
@@ -450,9 +499,12 @@ class Dataset(Lister, BaseClient):
|
|
450
499
|
raise UserError(
|
451
500
|
"Dataset metrics are taking too long to process. Please try again later.")
|
452
501
|
break
|
502
|
+
#get pre upload stats
|
503
|
+
if pre_upload:
|
504
|
+
return DisplayUploadStatus.get_dataset_version_stats(dataset_metrics_response)
|
453
505
|
|
454
506
|
dataset_info_dict = dict(user_id=self.user_id, app_id=self.app_id, dataset_id=self.id)
|
455
|
-
DisplayUploadStatus(dataloader, dataset_metrics_response, dataset_info_dict)
|
507
|
+
DisplayUploadStatus(dataloader, dataset_metrics_response, dataset_info_dict, pre_upload_stats)
|
456
508
|
|
457
509
|
if delete_version:
|
458
510
|
self.delete_version(dataset_version_id)
|
clarifai/client/input.py
CHANGED
@@ -32,6 +32,7 @@ class Inputs(Lister, BaseClient):
|
|
32
32
|
logger_level: str = "INFO",
|
33
33
|
base_url: str = "https://api.clarifai.com",
|
34
34
|
pat: str = None,
|
35
|
+
token: str = None,
|
35
36
|
**kwargs):
|
36
37
|
"""Initializes an Input object.
|
37
38
|
|
@@ -39,6 +40,8 @@ class Inputs(Lister, BaseClient):
|
|
39
40
|
user_id (str): A user ID for authentication.
|
40
41
|
app_id (str): An app ID for the application to interact with.
|
41
42
|
base_url (str): Base API url. Default "https://api.clarifai.com"
|
43
|
+
pat (str): A personal access token for authentication. Can be set as env var CLARIFAI_PAT
|
44
|
+
token (str): A session token for authentication. Accepts either a session token or a pat. Can be set as env var CLARIFAI_SESSION_TOKEN
|
42
45
|
**kwargs: Additional keyword arguments to be passed to the Input
|
43
46
|
"""
|
44
47
|
self.user_id = user_id
|
@@ -46,7 +49,8 @@ class Inputs(Lister, BaseClient):
|
|
46
49
|
self.kwargs = {**kwargs}
|
47
50
|
self.input_info = resources_pb2.Input(**self.kwargs)
|
48
51
|
self.logger = get_logger(logger_level=logger_level, name=__name__)
|
49
|
-
BaseClient.__init__(
|
52
|
+
BaseClient.__init__(
|
53
|
+
self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat, token=token)
|
50
54
|
Lister.__init__(self)
|
51
55
|
|
52
56
|
@staticmethod
|
@@ -660,15 +664,39 @@ class Inputs(Lister, BaseClient):
|
|
660
664
|
user_app_id=self.user_app_id, inputs=inputs, inputs_add_job_id=input_job_id)
|
661
665
|
response = self._grpc_request(self.STUB.PostInputs, request)
|
662
666
|
if response.status.code != status_code_pb2.SUCCESS:
|
663
|
-
|
664
|
-
self.logger.warning(response
|
665
|
-
|
666
|
-
|
667
|
+
if show_log:
|
668
|
+
self.logger.warning(response)
|
669
|
+
else:
|
670
|
+
return input_job_id, response
|
667
671
|
else:
|
668
672
|
if show_log:
|
669
673
|
self.logger.info("\nInputs Uploaded\n%s", response.status)
|
670
674
|
|
671
|
-
return input_job_id
|
675
|
+
return input_job_id, response
|
676
|
+
|
677
|
+
def patch_inputs(self, inputs: List[Input], action: str = 'merge') -> str:
|
678
|
+
"""Patch list of input objects to the app.
|
679
|
+
|
680
|
+
Args:
|
681
|
+
inputs (list): List of input objects to upload.
|
682
|
+
action (str): Action to perform on the input. Options: 'merge', 'overwrite', 'remove'.
|
683
|
+
|
684
|
+
Returns:
|
685
|
+
response: Response from the grpc request.
|
686
|
+
"""
|
687
|
+
if not isinstance(inputs, list):
|
688
|
+
raise UserError("inputs must be a list of Input objects")
|
689
|
+
uuid.uuid4().hex # generate a unique id for this job
|
690
|
+
request = service_pb2.PatchInputsRequest(
|
691
|
+
user_app_id=self.user_app_id, inputs=inputs, action=action)
|
692
|
+
response = self._grpc_request(self.STUB.PatchInputs, request)
|
693
|
+
if response.status.code != status_code_pb2.SUCCESS:
|
694
|
+
try:
|
695
|
+
self.logger.warning(f"Patch inputs failed, status: {response.annotations[0].status}")
|
696
|
+
except Exception:
|
697
|
+
self.logger.warning(f"Patch inputs failed, status: {response.status.details}")
|
698
|
+
|
699
|
+
self.logger.info("\nPatch Inputs Successful\n%s", response.status)
|
672
700
|
|
673
701
|
def upload_annotations(self, batch_annot: List[resources_pb2.Annotation], show_log: bool = True
|
674
702
|
) -> Union[List[resources_pb2.Annotation], List[None]]:
|
@@ -705,7 +733,7 @@ class Inputs(Lister, BaseClient):
|
|
705
733
|
Returns:
|
706
734
|
input_job_id: job id for the upload request.
|
707
735
|
"""
|
708
|
-
input_job_id = self.upload_inputs(inputs, False)
|
736
|
+
input_job_id, _ = self.upload_inputs(inputs, False)
|
709
737
|
self._wait_for_inputs(input_job_id)
|
710
738
|
failed_inputs = self._delete_failed_inputs(inputs)
|
711
739
|
|