clarifai 10.1.0__py3-none-any.whl → 10.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clarifai/client/app.py +23 -43
- clarifai/client/base.py +44 -4
- clarifai/client/dataset.py +138 -52
- clarifai/client/input.py +37 -4
- clarifai/client/model.py +279 -8
- clarifai/client/module.py +7 -5
- clarifai/client/runner.py +3 -1
- clarifai/client/search.py +7 -3
- clarifai/client/user.py +14 -12
- clarifai/client/workflow.py +7 -4
- clarifai/constants/dataset.py +2 -0
- clarifai/datasets/upload/loaders/README.md +3 -4
- clarifai/datasets/upload/loaders/xview_detection.py +5 -5
- clarifai/models/model_serving/cli/_utils.py +1 -1
- clarifai/models/model_serving/cli/build.py +1 -1
- clarifai/models/model_serving/cli/upload.py +1 -1
- clarifai/models/model_serving/utils.py +3 -1
- clarifai/rag/rag.py +25 -11
- clarifai/rag/utils.py +21 -6
- clarifai/utils/evaluation/__init__.py +427 -0
- clarifai/utils/evaluation/helpers.py +522 -0
- clarifai/utils/logging.py +30 -0
- clarifai/utils/model_train.py +3 -1
- clarifai/versions.py +1 -1
- clarifai/workflows/validate.py +1 -1
- {clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/METADATA +46 -9
- {clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/RECORD +31 -30
- clarifai/datasets/upload/loaders/coco_segmentation.py +0 -98
- {clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/LICENSE +0 -0
- {clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/WHEEL +0 -0
- {clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/entry_points.txt +0 -0
- {clarifai-10.1.0.dist-info → clarifai-10.2.0.dist-info}/top_level.txt +0 -0
clarifai/client/app.py
CHANGED
@@ -32,6 +32,7 @@ class App(Lister, BaseClient):
|
|
32
32
|
app_id: str = None,
|
33
33
|
base_url: str = "https://api.clarifai.com",
|
34
34
|
pat: str = None,
|
35
|
+
token: str = None,
|
35
36
|
**kwargs):
|
36
37
|
"""Initializes an App object.
|
37
38
|
|
@@ -40,6 +41,7 @@ class App(Lister, BaseClient):
|
|
40
41
|
app_id (str): The App ID for the App to interact with.
|
41
42
|
base_url (str): Base API url. Default "https://api.clarifai.com"
|
42
43
|
pat (str): A personal access token for authentication. Can be set as env var CLARIFAI_PAT
|
44
|
+
token (str): A session token for authentication. Accepts either a session token or a pat. Can be set as env var CLARIFAI_SESSION_TOKEN
|
43
45
|
**kwargs: Additional keyword arguments to be passed to the App.
|
44
46
|
- name (str): The name of the app.
|
45
47
|
- description (str): The description of the app.
|
@@ -52,7 +54,8 @@ class App(Lister, BaseClient):
|
|
52
54
|
self.kwargs = {**kwargs, 'id': app_id}
|
53
55
|
self.app_info = resources_pb2.App(**self.kwargs)
|
54
56
|
self.logger = get_logger(logger_level="INFO", name=__name__)
|
55
|
-
BaseClient.__init__(
|
57
|
+
BaseClient.__init__(
|
58
|
+
self, user_id=self.user_id, app_id=self.id, base=base_url, pat=pat, token=token)
|
56
59
|
Lister.__init__(self)
|
57
60
|
|
58
61
|
def list_datasets(self, page_no: int = None,
|
@@ -85,7 +88,7 @@ class App(Lister, BaseClient):
|
|
85
88
|
for dataset_info in all_datasets_info:
|
86
89
|
if 'version' in list(dataset_info.keys()):
|
87
90
|
del dataset_info['version']['metrics']
|
88
|
-
yield Dataset(
|
91
|
+
yield Dataset.from_auth_helper(auth=self.auth_helper, **dataset_info)
|
89
92
|
|
90
93
|
def list_models(self,
|
91
94
|
filter_by: Dict[str, Any] = {},
|
@@ -126,7 +129,7 @@ class App(Lister, BaseClient):
|
|
126
129
|
if only_in_app:
|
127
130
|
if model_info['app_id'] != self.id:
|
128
131
|
continue
|
129
|
-
yield Model(
|
132
|
+
yield Model.from_auth_helper(auth=self.auth_helper, **model_info)
|
130
133
|
|
131
134
|
def list_workflows(self,
|
132
135
|
filter_by: Dict[str, Any] = {},
|
@@ -165,7 +168,7 @@ class App(Lister, BaseClient):
|
|
165
168
|
if only_in_app:
|
166
169
|
if workflow_info['app_id'] != self.id:
|
167
170
|
continue
|
168
|
-
yield Workflow(
|
171
|
+
yield Workflow.from_auth_helper(auth=self.auth_helper, **workflow_info)
|
169
172
|
|
170
173
|
def list_modules(self,
|
171
174
|
filter_by: Dict[str, Any] = {},
|
@@ -204,7 +207,7 @@ class App(Lister, BaseClient):
|
|
204
207
|
if only_in_app:
|
205
208
|
if module_info['app_id'] != self.id:
|
206
209
|
continue
|
207
|
-
yield Module(
|
210
|
+
yield Module.from_auth_helper(auth=self.auth_helper, **module_info)
|
208
211
|
|
209
212
|
def list_installed_module_versions(self,
|
210
213
|
filter_by: Dict[str, Any] = {},
|
@@ -239,11 +242,8 @@ class App(Lister, BaseClient):
|
|
239
242
|
for imv_info in all_imv_infos:
|
240
243
|
del imv_info['deploy_url']
|
241
244
|
del imv_info['installed_module_version_id'] # TODO: remove this after the backend fix
|
242
|
-
yield Module(
|
243
|
-
module_id=imv_info['module_version']['module_id'],
|
244
|
-
base_url=self.base,
|
245
|
-
pat=self.pat,
|
246
|
-
**imv_info)
|
245
|
+
yield Module.from_auth_helper(
|
246
|
+
auth=self.auth_helper, module_id=imv_info['module_version']['module_id'], **imv_info)
|
247
247
|
|
248
248
|
def list_concepts(self, page_no: int = None,
|
249
249
|
per_page: int = None) -> Generator[Concept, None, None]:
|
@@ -308,14 +308,8 @@ class App(Lister, BaseClient):
|
|
308
308
|
if response.status.code != status_code_pb2.SUCCESS:
|
309
309
|
raise Exception(response.status)
|
310
310
|
self.logger.info("\nDataset created\n%s", response.status)
|
311
|
-
kwargs.update({
|
312
|
-
'app_id': self.id,
|
313
|
-
'user_id': self.user_id,
|
314
|
-
'base_url': self.base,
|
315
|
-
'pat': self.pat
|
316
|
-
})
|
317
311
|
|
318
|
-
return Dataset(dataset_id=dataset_id, **kwargs)
|
312
|
+
return Dataset.from_auth_helper(self.auth_helper, dataset_id=dataset_id, **kwargs)
|
319
313
|
|
320
314
|
def create_model(self, model_id: str, **kwargs) -> Model:
|
321
315
|
"""Creates a model for the app.
|
@@ -339,14 +333,11 @@ class App(Lister, BaseClient):
|
|
339
333
|
raise Exception(response.status)
|
340
334
|
self.logger.info("\nModel created\n%s", response.status)
|
341
335
|
kwargs.update({
|
342
|
-
'
|
343
|
-
'user_id': self.user_id,
|
336
|
+
'model_id': model_id,
|
344
337
|
'model_type_id': response.model.model_type_id,
|
345
|
-
'base_url': self.base,
|
346
|
-
'pat': self.pat
|
347
338
|
})
|
348
339
|
|
349
|
-
return Model(
|
340
|
+
return Model.from_auth_helper(auth=self.auth_helper, **kwargs)
|
350
341
|
|
351
342
|
def create_workflow(self,
|
352
343
|
config_filepath: str,
|
@@ -436,9 +427,8 @@ class App(Lister, BaseClient):
|
|
436
427
|
display_workflow_tree(dict_response["workflows"][0]["nodes"])
|
437
428
|
kwargs = self.process_response_keys(dict_response[list(dict_response.keys())[1]][0],
|
438
429
|
"workflow")
|
439
|
-
kwargs.update({'base_url': self.base, 'pat': self.pat})
|
440
430
|
|
441
|
-
return Workflow(**kwargs)
|
431
|
+
return Workflow.from_auth_helper(auth=self.auth_helper, **kwargs)
|
442
432
|
|
443
433
|
def create_module(self, module_id: str, description: str, **kwargs) -> Module:
|
444
434
|
"""Creates a module for the app.
|
@@ -464,14 +454,8 @@ class App(Lister, BaseClient):
|
|
464
454
|
if response.status.code != status_code_pb2.SUCCESS:
|
465
455
|
raise Exception(response.status)
|
466
456
|
self.logger.info("\nModule created\n%s", response.status)
|
467
|
-
kwargs.update({
|
468
|
-
'app_id': self.id,
|
469
|
-
'user_id': self.user_id,
|
470
|
-
'base_url': self.base,
|
471
|
-
'pat': self.pat
|
472
|
-
})
|
473
457
|
|
474
|
-
return Module(module_id=module_id, **kwargs)
|
458
|
+
return Module.from_auth_helper(auth=self.auth_helper, module_id=module_id, **kwargs)
|
475
459
|
|
476
460
|
def dataset(self, dataset_id: str, **kwargs) -> Dataset:
|
477
461
|
"""Returns a Dataset object for the existing dataset ID.
|
@@ -496,8 +480,7 @@ class App(Lister, BaseClient):
|
|
496
480
|
kwargs = self.process_response_keys(dict_response[list(dict_response.keys())[1]],
|
497
481
|
list(dict_response.keys())[1])
|
498
482
|
kwargs['version'] = response.dataset.version if response.dataset.version else None
|
499
|
-
|
500
|
-
return Dataset(**kwargs)
|
483
|
+
return Dataset.from_auth_helper(auth=self.auth_helper, **kwargs)
|
501
484
|
|
502
485
|
def model(self, model_id: str, model_version_id: str = "", **kwargs) -> Model:
|
503
486
|
"""Returns a Model object for the existing model ID.
|
@@ -532,9 +515,8 @@ class App(Lister, BaseClient):
|
|
532
515
|
kwargs = self.process_response_keys(dict_response['model'], 'model')
|
533
516
|
kwargs[
|
534
517
|
'model_version'] = response.model.model_version if response.model.model_version else None
|
535
|
-
kwargs.update({'base_url': self.base, 'pat': self.pat})
|
536
518
|
|
537
|
-
return Model(**kwargs)
|
519
|
+
return Model.from_auth_helper(self.auth_helper, **kwargs)
|
538
520
|
|
539
521
|
def workflow(self, workflow_id: str, **kwargs) -> Workflow:
|
540
522
|
"""Returns a workflow object for the existing workflow ID.
|
@@ -558,9 +540,8 @@ class App(Lister, BaseClient):
|
|
558
540
|
dict_response = MessageToDict(response, preserving_proto_field_name=True)
|
559
541
|
kwargs = self.process_response_keys(dict_response[list(dict_response.keys())[1]],
|
560
542
|
list(dict_response.keys())[1])
|
561
|
-
kwargs.update({'base_url': self.base, 'pat': self.pat})
|
562
543
|
|
563
|
-
return Workflow(**kwargs)
|
544
|
+
return Workflow.from_auth_helper(auth=self.auth_helper, **kwargs)
|
564
545
|
|
565
546
|
def module(self, module_id: str, module_version_id: str = "", **kwargs) -> Module:
|
566
547
|
"""Returns a Module object for the existing module ID.
|
@@ -585,9 +566,8 @@ class App(Lister, BaseClient):
|
|
585
566
|
raise Exception(response.status)
|
586
567
|
dict_response = MessageToDict(response, preserving_proto_field_name=True)
|
587
568
|
kwargs = self.process_response_keys(dict_response['module'], 'module')
|
588
|
-
kwargs.update({'base_url': self.base, 'pat': self.pat})
|
589
569
|
|
590
|
-
return Module(**kwargs)
|
570
|
+
return Module.from_auth_helper(auth=self.auth_helper, **kwargs)
|
591
571
|
|
592
572
|
def inputs(self,):
|
593
573
|
"""Returns an Input object.
|
@@ -595,7 +575,7 @@ class App(Lister, BaseClient):
|
|
595
575
|
Returns:
|
596
576
|
Inputs: An input object.
|
597
577
|
"""
|
598
|
-
return Inputs(self.
|
578
|
+
return Inputs.from_auth_helper(self.auth_helper)
|
599
579
|
|
600
580
|
def delete_dataset(self, dataset_id: str) -> None:
|
601
581
|
"""Deletes an dataset for the user.
|
@@ -684,9 +664,9 @@ class App(Lister, BaseClient):
|
|
684
664
|
>>> app = App(app_id="app_id", user_id="user_id")
|
685
665
|
>>> search_client = app.search(top_k=12, metric="euclidean")
|
686
666
|
"""
|
687
|
-
|
688
|
-
|
689
|
-
return Search(
|
667
|
+
kwargs.get("user_id", self.user_app_id.user_id)
|
668
|
+
kwargs.get("app_id", self.user_app_id.app_id)
|
669
|
+
return Search.from_auth_helper(auth=self.auth_helper, **kwargs)
|
690
670
|
|
691
671
|
def __getattr__(self, name):
|
692
672
|
return getattr(self.app_info, name)
|
clarifai/client/base.py
CHANGED
@@ -7,7 +7,7 @@ from google.protobuf.wrappers_pb2 import BoolValue
|
|
7
7
|
|
8
8
|
from clarifai.client.auth import create_stub
|
9
9
|
from clarifai.client.auth.helper import ClarifaiAuthHelper
|
10
|
-
from clarifai.errors import ApiError
|
10
|
+
from clarifai.errors import ApiError, UserError
|
11
11
|
from clarifai.utils.misc import get_from_dict_or_env
|
12
12
|
|
13
13
|
|
@@ -19,9 +19,11 @@ class BaseClient:
|
|
19
19
|
- user_id (str): A user ID for authentication.
|
20
20
|
- app_id (str): An app ID for the application to interact with.
|
21
21
|
- pat (str): A personal access token for authentication.
|
22
|
+
- token (str): A session token for authentication. Accepts either a session token or a pat.
|
22
23
|
- base (str): The base URL for the API endpoint. Defaults to 'https://api.clarifai.com'.
|
23
24
|
- ui (str): The URL for the UI. Defaults to 'https://clarifai.com'.
|
24
25
|
|
26
|
+
|
25
27
|
Attributes:
|
26
28
|
auth_helper (ClarifaiAuthHelper): An instance of ClarifaiAuthHelper for authentication.
|
27
29
|
STUB (Stub): The gRPC Stub object for API interaction.
|
@@ -31,15 +33,53 @@ class BaseClient:
|
|
31
33
|
"""
|
32
34
|
|
33
35
|
def __init__(self, **kwargs):
|
34
|
-
pat =
|
35
|
-
|
36
|
+
token, pat = "", ""
|
37
|
+
try:
|
38
|
+
pat = get_from_dict_or_env(key="pat", env_key="CLARIFAI_PAT", **kwargs)
|
39
|
+
except UserError:
|
40
|
+
token = get_from_dict_or_env(key="token", env_key="CLARIFAI_SESSION_TOKEN", **kwargs)
|
41
|
+
finally:
|
42
|
+
assert token or pat, Exception(
|
43
|
+
"Need 'pat' or 'token' in args or use one of the CLARIFAI_PAT or CLARIFAI_SESSION_TOKEN env vars"
|
44
|
+
)
|
45
|
+
kwargs.update({'token': token, 'pat': pat})
|
46
|
+
|
36
47
|
self.auth_helper = ClarifaiAuthHelper(**kwargs, validate=False)
|
37
48
|
self.STUB = create_stub(self.auth_helper)
|
38
49
|
self.metadata = self.auth_helper.metadata
|
39
50
|
self.pat = self.auth_helper.pat
|
51
|
+
self.token = self.auth_helper._token
|
40
52
|
self.user_app_id = self.auth_helper.get_user_app_id_proto()
|
41
53
|
self.base = self.auth_helper.base
|
42
54
|
|
55
|
+
@classmethod
|
56
|
+
def from_auth_helper(cls, auth: ClarifaiAuthHelper, **kwargs):
|
57
|
+
default_kwargs = {
|
58
|
+
"user_id": kwargs.get("user_id", None) or auth.user_id,
|
59
|
+
"app_id": kwargs.get("app_id", None) or auth.app_id,
|
60
|
+
"pat": kwargs.get("pat", None) or auth.pat,
|
61
|
+
"token": kwargs.get("token", None) or auth._token,
|
62
|
+
}
|
63
|
+
_base = kwargs.get("base", None) or auth.base
|
64
|
+
_clss = cls.__mro__[0]
|
65
|
+
if _clss == BaseClient:
|
66
|
+
kwargs = {
|
67
|
+
**default_kwargs,
|
68
|
+
"base": _base, # Baseclient uses `base`
|
69
|
+
"ui": kwargs.get("ui", None) or auth.ui
|
70
|
+
}
|
71
|
+
else:
|
72
|
+
# Remove user_id and app_id if a custom URL is provided
|
73
|
+
if kwargs.get("url"):
|
74
|
+
default_kwargs.pop("user_id", "")
|
75
|
+
default_kwargs.pop("app_id", "")
|
76
|
+
# Remove app_id if the class name contains "Runner"
|
77
|
+
if 'Runner' in _clss.__name__:
|
78
|
+
default_kwargs.pop("app_id", "")
|
79
|
+
kwargs.update({**default_kwargs, "base_url": _base})
|
80
|
+
|
81
|
+
return cls(**kwargs)
|
82
|
+
|
43
83
|
def _grpc_request(self, method: Callable, argument: Any):
|
44
84
|
"""Makes a gRPC request to the API.
|
45
85
|
|
@@ -52,7 +92,7 @@ class BaseClient:
|
|
52
92
|
"""
|
53
93
|
|
54
94
|
try:
|
55
|
-
res = method(argument)
|
95
|
+
res = method(argument, metadata=self.auth_helper.metadata)
|
56
96
|
# MessageToDict(res) TODO global debug logger
|
57
97
|
return res
|
58
98
|
except ApiError:
|
clarifai/client/dataset.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
import logging
|
1
2
|
import os
|
2
3
|
import time
|
3
4
|
import uuid
|
@@ -12,12 +13,13 @@ from clarifai_grpc.grpc.api.service_pb2 import MultiInputResponse
|
|
12
13
|
from clarifai_grpc.grpc.api.status import status_code_pb2, status_pb2
|
13
14
|
from google.protobuf.json_format import MessageToDict
|
14
15
|
from requests.adapters import HTTPAdapter, Retry
|
16
|
+
from tabulate import tabulate
|
15
17
|
from tqdm import tqdm
|
16
18
|
|
17
19
|
from clarifai.client.base import BaseClient
|
18
20
|
from clarifai.client.input import Inputs
|
19
21
|
from clarifai.client.lister import Lister
|
20
|
-
from clarifai.constants.dataset import DATASET_UPLOAD_TASKS
|
22
|
+
from clarifai.constants.dataset import DATASET_UPLOAD_TASKS, MAX_RETRIES
|
21
23
|
from clarifai.datasets.export.inputs_annotations import (DatasetExportReader,
|
22
24
|
InputAnnotationDownloader)
|
23
25
|
from clarifai.datasets.upload.base import ClarifaiDataLoader
|
@@ -27,7 +29,7 @@ from clarifai.datasets.upload.text import TextClassificationDataset
|
|
27
29
|
from clarifai.datasets.upload.utils import DisplayUploadStatus
|
28
30
|
from clarifai.errors import UserError
|
29
31
|
from clarifai.urls.helper import ClarifaiUrlHelper
|
30
|
-
from clarifai.utils.logging import add_file_handler, get_logger
|
32
|
+
from clarifai.utils.logging import add_file_handler, get_logger, process_log_files
|
31
33
|
from clarifai.utils.misc import BackoffIterator, Chunker
|
32
34
|
|
33
35
|
ClarifaiDatasetType = TypeVar('ClarifaiDatasetType', VisualClassificationDataset,
|
@@ -43,6 +45,7 @@ class Dataset(Lister, BaseClient):
|
|
43
45
|
dataset_id: str = None,
|
44
46
|
base_url: str = "https://api.clarifai.com",
|
45
47
|
pat: str = None,
|
48
|
+
token: str = None,
|
46
49
|
**kwargs):
|
47
50
|
"""Initializes a Dataset object.
|
48
51
|
|
@@ -51,6 +54,7 @@ class Dataset(Lister, BaseClient):
|
|
51
54
|
dataset_id (str): The Dataset ID within the App to interact with.
|
52
55
|
base_url (str): Base API url. Default "https://api.clarifai.com"
|
53
56
|
pat (str): A personal access token for authentication. Can be set as env var CLARIFAI_PAT
|
57
|
+
token (str): A session token for authentication. Accepts either a session token or a pat. Can be set as env var CLARIFAI_SESSION_TOKEN
|
54
58
|
**kwargs: Additional keyword arguments to be passed to the Dataset.
|
55
59
|
"""
|
56
60
|
if url and dataset_id:
|
@@ -66,9 +70,11 @@ class Dataset(Lister, BaseClient):
|
|
66
70
|
self.max_retires = 10
|
67
71
|
self.batch_size = 128 # limit max protos in a req
|
68
72
|
self.task = None # Upload dataset type
|
69
|
-
self.input_object = Inputs(
|
73
|
+
self.input_object = Inputs(
|
74
|
+
user_id=self.user_id, app_id=self.app_id, pat=pat, token=token, base_url=base_url)
|
70
75
|
self.logger = get_logger(logger_level="INFO", name=__name__)
|
71
|
-
BaseClient.__init__(
|
76
|
+
BaseClient.__init__(
|
77
|
+
self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat, token=token)
|
72
78
|
Lister.__init__(self)
|
73
79
|
|
74
80
|
def create_version(self, **kwargs) -> 'Dataset':
|
@@ -98,13 +104,10 @@ class Dataset(Lister, BaseClient):
|
|
98
104
|
self.logger.info("\nDataset Version created\n%s", response.status)
|
99
105
|
kwargs.update({
|
100
106
|
'dataset_id': self.id,
|
101
|
-
'app_id': self.app_id,
|
102
|
-
'user_id': self.user_id,
|
103
107
|
'version': response.dataset_versions[0],
|
104
|
-
'base_url': self.base,
|
105
|
-
'pat': self.pat
|
106
108
|
})
|
107
|
-
|
109
|
+
|
110
|
+
return Dataset.from_auth_helper(self.auth_helper, **kwargs)
|
108
111
|
|
109
112
|
def delete_version(self, version_id: str) -> None:
|
110
113
|
"""Deletes a dataset version for the Dataset.
|
@@ -162,13 +165,9 @@ class Dataset(Lister, BaseClient):
|
|
162
165
|
del dataset_version_info['metrics']
|
163
166
|
kwargs = {
|
164
167
|
'dataset_id': self.id,
|
165
|
-
'app_id': self.app_id,
|
166
|
-
'user_id': self.user_id,
|
167
168
|
'version': resources_pb2.DatasetVersion(**dataset_version_info),
|
168
|
-
'base_url': self.base,
|
169
|
-
'pat': self.pat
|
170
169
|
}
|
171
|
-
yield Dataset(**kwargs)
|
170
|
+
yield Dataset.from_auth_helper(self.auth_helper, **kwargs)
|
172
171
|
|
173
172
|
def _concurrent_annot_upload(self, annots: List[List[resources_pb2.Annotation]]
|
174
173
|
) -> Union[List[resources_pb2.Annotation], List[None]]:
|
@@ -196,11 +195,11 @@ class Dataset(Lister, BaseClient):
|
|
196
195
|
|
197
196
|
return retry_annot_upload
|
198
197
|
|
199
|
-
def _delete_failed_inputs(
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
198
|
+
def _delete_failed_inputs(self,
|
199
|
+
batch_input_ids: List[int],
|
200
|
+
dataset_obj: ClarifaiDatasetType,
|
201
|
+
upload_response: MultiInputResponse = None,
|
202
|
+
batch_no: Optional[int] = None) -> Tuple[List[int], List[int]]:
|
204
203
|
"""Delete failed input ids from clarifai platform dataset.
|
205
204
|
|
206
205
|
Args:
|
@@ -235,8 +234,19 @@ class Dataset(Lister, BaseClient):
|
|
235
234
|
if duplicate_input_ids:
|
236
235
|
success_input_ids = list(set(success_input_ids.copy()) - set(duplicate_input_ids.copy()))
|
237
236
|
failed_input_ids = list(set(failed_input_ids) - set(duplicate_input_ids))
|
237
|
+
duplicate_details = [[
|
238
|
+
input_ids[id], id, "Input has a duplicate ID.",
|
239
|
+
dataset_obj.data_generator[input_ids[id]].image_path,
|
240
|
+
dataset_obj.data_generator[input_ids[id]].labels,
|
241
|
+
dataset_obj.data_generator[input_ids[id]].metadata
|
242
|
+
] for id in duplicate_input_ids]
|
243
|
+
duplicate_table = tabulate(
|
244
|
+
duplicate_details,
|
245
|
+
headers=["Index", "Input ID", "Status", "Image Path", "Labels", "Metadata"],
|
246
|
+
tablefmt="grid")
|
247
|
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
238
248
|
self.logger.warning(
|
239
|
-
f"
|
249
|
+
f"{timestamp}\nFailed to upload {len(duplicate_input_ids)} inputs due to duplicate IDs in current batch {batch_no}:\n{duplicate_table}\n\n"
|
240
250
|
)
|
241
251
|
|
242
252
|
#delete failed inputs
|
@@ -247,7 +257,11 @@ class Dataset(Lister, BaseClient):
|
|
247
257
|
return [input_ids[id] for id in success_input_ids], [input_ids[id] for id in failed_input_ids]
|
248
258
|
|
249
259
|
def _upload_inputs_annotations(
|
250
|
-
self,
|
260
|
+
self,
|
261
|
+
batch_input_ids: List[int],
|
262
|
+
dataset_obj: ClarifaiDatasetType,
|
263
|
+
batch_no: Optional[int] = None,
|
264
|
+
is_retry_duplicates: bool = False,
|
251
265
|
) -> Tuple[List[int], List[resources_pb2.Annotation], MultiInputResponse]:
|
252
266
|
"""Uploads batch of inputs and annotations concurrently to clarifai platform dataset.
|
253
267
|
|
@@ -261,12 +275,16 @@ class Dataset(Lister, BaseClient):
|
|
261
275
|
response: upload response proto
|
262
276
|
"""
|
263
277
|
input_protos, _ = dataset_obj.get_protos(batch_input_ids)
|
278
|
+
if is_retry_duplicates:
|
279
|
+
for inp in input_protos:
|
280
|
+
inp.id = uuid.uuid4().hex
|
281
|
+
|
264
282
|
input_job_id, _response = self.input_object.upload_inputs(inputs=input_protos, show_log=False)
|
265
283
|
retry_annot_protos = []
|
266
284
|
|
267
285
|
self.input_object._wait_for_inputs(input_job_id)
|
268
286
|
success_input_ids, failed_input_ids = self._delete_failed_inputs(batch_input_ids, dataset_obj,
|
269
|
-
_response)
|
287
|
+
_response, batch_no)
|
270
288
|
|
271
289
|
if self.task in ["visual_detection", "visual_segmentation"] and success_input_ids:
|
272
290
|
_, annotation_protos = dataset_obj.get_protos(success_input_ids)
|
@@ -277,7 +295,7 @@ class Dataset(Lister, BaseClient):
|
|
277
295
|
|
278
296
|
def _retry_uploads(self, failed_input_ids: List[int],
|
279
297
|
retry_annot_protos: List[resources_pb2.Annotation],
|
280
|
-
dataset_obj: ClarifaiDatasetType) -> None:
|
298
|
+
dataset_obj: ClarifaiDatasetType, batch_no: Optional[int]) -> None:
|
281
299
|
"""Retry failed uploads.
|
282
300
|
|
283
301
|
Args:
|
@@ -285,56 +303,87 @@ class Dataset(Lister, BaseClient):
|
|
285
303
|
retry_annot_protos: failed annot protos
|
286
304
|
dataset_obj: ClarifaiDataset object
|
287
305
|
"""
|
306
|
+
|
307
|
+
for _retry in range(MAX_RETRIES):
|
308
|
+
if not failed_input_ids and not retry_annot_protos:
|
309
|
+
break
|
310
|
+
if failed_input_ids:
|
311
|
+
retry_input_ids = [dataset_obj.all_input_ids[id] for id in failed_input_ids]
|
312
|
+
logging.warning(
|
313
|
+
f"Retrying upload for {len(failed_input_ids)} inputs in current batch: {retry_input_ids}\n"
|
314
|
+
)
|
315
|
+
failed_retrying_inputs, _, retry_response = self._upload_inputs_annotations(
|
316
|
+
failed_input_ids, dataset_obj, batch_no)
|
317
|
+
failed_input_ids = failed_retrying_inputs
|
318
|
+
if retry_annot_protos:
|
319
|
+
chunked_annotation_protos = Chunker(retry_annot_protos, self.batch_size).chunk()
|
320
|
+
_ = self._concurrent_annot_upload(chunked_annotation_protos)
|
321
|
+
|
322
|
+
#Log failed inputs
|
288
323
|
if failed_input_ids:
|
289
|
-
|
290
|
-
|
324
|
+
failed_inputs_logs = []
|
325
|
+
input_map = {input.id: input for input in retry_response.inputs}
|
326
|
+
for index in failed_retrying_inputs:
|
327
|
+
failed_id = dataset_obj.all_input_ids[index]
|
328
|
+
input_details = input_map.get(failed_id)
|
329
|
+
if input_details:
|
330
|
+
failed_input_details = [
|
331
|
+
index, failed_id, input_details.status.details,
|
332
|
+
dataset_obj.data_generator[index].image_path,
|
333
|
+
dataset_obj.data_generator[index].labels, dataset_obj.data_generator[index].metadata
|
334
|
+
]
|
335
|
+
failed_inputs_logs.append(failed_input_details)
|
336
|
+
|
337
|
+
failed_table = tabulate(
|
338
|
+
failed_inputs_logs,
|
339
|
+
headers=["Index", "Input ID", "Status", "Image Path", "Labels", "Metadata"],
|
340
|
+
tablefmt="grid")
|
341
|
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
291
342
|
self.logger.warning(
|
292
|
-
f"
|
343
|
+
f"{timestamp}\nFailed to upload {len(failed_retrying_inputs)} inputs in current batch {batch_no}:\n{failed_table}\n\n"
|
293
344
|
)
|
294
|
-
failed_retrying_inputs, _, retry_response = self._upload_inputs_annotations(
|
295
|
-
failed_input_ids, dataset_obj)
|
296
|
-
#Log failed inputs
|
297
|
-
if failed_retrying_inputs:
|
298
|
-
failed_retrying_input_ids = [
|
299
|
-
dataset_obj.all_input_ids[id] for id in failed_retrying_inputs
|
300
|
-
]
|
301
|
-
failed_inputs_logs = {
|
302
|
-
input.id: input.status.details
|
303
|
-
for input in retry_response.inputs if input.id in failed_retrying_input_ids
|
304
|
-
}
|
305
|
-
self.logger.warning(
|
306
|
-
f"Failed to upload {len(failed_retrying_inputs)} inputs in current batch: {failed_inputs_logs}"
|
307
|
-
)
|
308
|
-
if retry_annot_protos:
|
309
|
-
chunked_annotation_protos = Chunker(retry_annot_protos, self.batch_size).chunk()
|
310
|
-
_ = self._concurrent_annot_upload(chunked_annotation_protos)
|
311
345
|
|
312
|
-
def _data_upload(self,
|
346
|
+
def _data_upload(self,
|
347
|
+
dataset_obj: ClarifaiDatasetType,
|
348
|
+
is_log_retry: bool = False,
|
349
|
+
log_retry_ids: List[int] = None,
|
350
|
+
**kwargs) -> None:
|
313
351
|
"""Uploads inputs and annotations to clarifai platform dataset.
|
314
352
|
|
315
353
|
Args:
|
316
|
-
dataset_obj: ClarifaiDataset object
|
354
|
+
dataset_obj: ClarifaiDataset object,
|
355
|
+
is_log_retry: True if the iteration is to retry uploads from logs.
|
356
|
+
**kwargs: Additional keyword arguments for retry uploading functionality..
|
357
|
+
|
358
|
+
Returns:
|
359
|
+
None
|
317
360
|
"""
|
318
|
-
|
361
|
+
if is_log_retry:
|
362
|
+
input_ids = log_retry_ids
|
363
|
+
else:
|
364
|
+
input_ids = list(range(len(dataset_obj)))
|
365
|
+
|
319
366
|
chunk_input_ids = Chunker(input_ids, self.batch_size).chunk()
|
320
367
|
with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
|
321
368
|
with tqdm(total=len(chunk_input_ids), desc='Uploading Dataset') as progress:
|
322
369
|
# Submit all jobs to the executor and store the returned futures
|
323
370
|
futures = [
|
324
|
-
executor.submit(self._upload_inputs_annotations, batch_input_ids, dataset_obj
|
325
|
-
|
371
|
+
executor.submit(self._upload_inputs_annotations, batch_input_ids, dataset_obj,
|
372
|
+
batch_no, **kwargs)
|
373
|
+
for batch_no, batch_input_ids in enumerate(chunk_input_ids)
|
326
374
|
]
|
327
375
|
|
328
|
-
for job in as_completed(futures):
|
376
|
+
for batch_no, job in enumerate(as_completed(futures)):
|
329
377
|
retry_input_ids, retry_annot_protos, _ = job.result()
|
330
|
-
self._retry_uploads(retry_input_ids, retry_annot_protos, dataset_obj)
|
378
|
+
self._retry_uploads(retry_input_ids, retry_annot_protos, dataset_obj, batch_no)
|
331
379
|
progress.update()
|
332
380
|
|
333
381
|
def upload_dataset(self,
|
334
382
|
dataloader: Type[ClarifaiDataLoader],
|
335
383
|
batch_size: int = 32,
|
336
384
|
get_upload_status: bool = False,
|
337
|
-
log_warnings: bool = False
|
385
|
+
log_warnings: bool = False,
|
386
|
+
**kwargs) -> None:
|
338
387
|
"""Uploads a dataset to the app.
|
339
388
|
|
340
389
|
Args:
|
@@ -342,6 +391,7 @@ class Dataset(Lister, BaseClient):
|
|
342
391
|
batch_size (int): batch size for concurrent upload of inputs and annotations (max: 128)
|
343
392
|
get_upload_status (bool): True if you want to get the upload status of the dataset
|
344
393
|
log_warnings (bool): True if you want to save log warnings in a file
|
394
|
+
kwargs: Additional keyword arguments for retry uploading functionality..
|
345
395
|
"""
|
346
396
|
#add file handler to log warnings
|
347
397
|
if log_warnings:
|
@@ -369,11 +419,47 @@ class Dataset(Lister, BaseClient):
|
|
369
419
|
if get_upload_status:
|
370
420
|
pre_upload_stats = self.get_upload_status(pre_upload=True)
|
371
421
|
|
372
|
-
self._data_upload(dataset_obj)
|
422
|
+
self._data_upload(dataset_obj, **kwargs)
|
373
423
|
|
374
424
|
if get_upload_status:
|
375
425
|
self.get_upload_status(dataloader=dataloader, pre_upload_stats=pre_upload_stats)
|
376
426
|
|
427
|
+
def retry_upload_from_logs(self,
|
428
|
+
log_file_path: str,
|
429
|
+
dataloader: Type[ClarifaiDataLoader],
|
430
|
+
retry_duplicates: bool = False,
|
431
|
+
log_warnings: bool = False,
|
432
|
+
**kwargs) -> None:
|
433
|
+
"""Retries failed uploads from the log file.
|
434
|
+
|
435
|
+
Args:
|
436
|
+
log_file_path (str): path to the log file
|
437
|
+
dataloader (Type[ClarifaiDataLoader]): ClarifaiDataLoader object
|
438
|
+
retry_duplicate (bool): True if you want to retry duplicate inputs
|
439
|
+
kwargs: Additional keyword arguments for retry uploading functionality..
|
440
|
+
"""
|
441
|
+
|
442
|
+
duplicate_input_ids, failed_input_ids = process_log_files(log_file_path)
|
443
|
+
if log_warnings:
|
444
|
+
add_file_handler(self.logger, f"Dataset_Upload{str(int(datetime.now().timestamp()))}.log")
|
445
|
+
|
446
|
+
if retry_duplicates and duplicate_input_ids:
|
447
|
+
logging.warning(f"Retrying upload for {len(duplicate_input_ids)} duplicate inputs...\n")
|
448
|
+
duplicate_inputs_indexes = [input["Index"] for input in duplicate_input_ids]
|
449
|
+
self.upload_dataset(
|
450
|
+
dataloader=dataloader,
|
451
|
+
log_retry_ids=duplicate_inputs_indexes,
|
452
|
+
is_retry_duplicates=True,
|
453
|
+
is_log_retry=True,
|
454
|
+
**kwargs)
|
455
|
+
|
456
|
+
if failed_input_ids:
|
457
|
+
#failed_inputs= ([input["Input_ID"] for input in failed_input_ids])
|
458
|
+
logging.warning(f"Retrying upload for {len(failed_input_ids)} failed inputs...\n")
|
459
|
+
failed_input_indexes = [input["Index"] for input in failed_input_ids]
|
460
|
+
self.upload_dataset(
|
461
|
+
dataloader=dataloader, log_retry_ids=failed_input_indexes, is_log_retry=True, **kwargs)
|
462
|
+
|
377
463
|
def upload_from_csv(self,
|
378
464
|
csv_path: str,
|
379
465
|
input_type: str = 'text',
|