clarifai 10.1.0__py3-none-any.whl → 10.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
clarifai/client/app.py CHANGED
@@ -32,6 +32,7 @@ class App(Lister, BaseClient):
32
32
  app_id: str = None,
33
33
  base_url: str = "https://api.clarifai.com",
34
34
  pat: str = None,
35
+ token: str = None,
35
36
  **kwargs):
36
37
  """Initializes an App object.
37
38
 
@@ -40,6 +41,7 @@ class App(Lister, BaseClient):
40
41
  app_id (str): The App ID for the App to interact with.
41
42
  base_url (str): Base API url. Default "https://api.clarifai.com"
42
43
  pat (str): A personal access token for authentication. Can be set as env var CLARIFAI_PAT
44
+ token (str): A session token for authentication. Accepts either a session token or a pat. Can be set as env var CLARIFAI_SESSION_TOKEN
43
45
  **kwargs: Additional keyword arguments to be passed to the App.
44
46
  - name (str): The name of the app.
45
47
  - description (str): The description of the app.
@@ -52,7 +54,8 @@ class App(Lister, BaseClient):
52
54
  self.kwargs = {**kwargs, 'id': app_id}
53
55
  self.app_info = resources_pb2.App(**self.kwargs)
54
56
  self.logger = get_logger(logger_level="INFO", name=__name__)
55
- BaseClient.__init__(self, user_id=self.user_id, app_id=self.id, base=base_url, pat=pat)
57
+ BaseClient.__init__(
58
+ self, user_id=self.user_id, app_id=self.id, base=base_url, pat=pat, token=token)
56
59
  Lister.__init__(self)
57
60
 
58
61
  def list_datasets(self, page_no: int = None,
@@ -85,7 +88,7 @@ class App(Lister, BaseClient):
85
88
  for dataset_info in all_datasets_info:
86
89
  if 'version' in list(dataset_info.keys()):
87
90
  del dataset_info['version']['metrics']
88
- yield Dataset(base_url=self.base, pat=self.pat, **dataset_info)
91
+ yield Dataset.from_auth_helper(auth=self.auth_helper, **dataset_info)
89
92
 
90
93
  def list_models(self,
91
94
  filter_by: Dict[str, Any] = {},
@@ -126,7 +129,7 @@ class App(Lister, BaseClient):
126
129
  if only_in_app:
127
130
  if model_info['app_id'] != self.id:
128
131
  continue
129
- yield Model(base_url=self.base, pat=self.pat, **model_info)
132
+ yield Model.from_auth_helper(auth=self.auth_helper, **model_info)
130
133
 
131
134
  def list_workflows(self,
132
135
  filter_by: Dict[str, Any] = {},
@@ -165,7 +168,7 @@ class App(Lister, BaseClient):
165
168
  if only_in_app:
166
169
  if workflow_info['app_id'] != self.id:
167
170
  continue
168
- yield Workflow(base_url=self.base, pat=self.pat, **workflow_info)
171
+ yield Workflow.from_auth_helper(auth=self.auth_helper, **workflow_info)
169
172
 
170
173
  def list_modules(self,
171
174
  filter_by: Dict[str, Any] = {},
@@ -204,7 +207,7 @@ class App(Lister, BaseClient):
204
207
  if only_in_app:
205
208
  if module_info['app_id'] != self.id:
206
209
  continue
207
- yield Module(base_url=self.base, pat=self.pat, **module_info)
210
+ yield Module.from_auth_helper(auth=self.auth_helper, **module_info)
208
211
 
209
212
  def list_installed_module_versions(self,
210
213
  filter_by: Dict[str, Any] = {},
@@ -239,11 +242,8 @@ class App(Lister, BaseClient):
239
242
  for imv_info in all_imv_infos:
240
243
  del imv_info['deploy_url']
241
244
  del imv_info['installed_module_version_id'] # TODO: remove this after the backend fix
242
- yield Module(
243
- module_id=imv_info['module_version']['module_id'],
244
- base_url=self.base,
245
- pat=self.pat,
246
- **imv_info)
245
+ yield Module.from_auth_helper(
246
+ auth=self.auth_helper, module_id=imv_info['module_version']['module_id'], **imv_info)
247
247
 
248
248
  def list_concepts(self, page_no: int = None,
249
249
  per_page: int = None) -> Generator[Concept, None, None]:
@@ -308,14 +308,8 @@ class App(Lister, BaseClient):
308
308
  if response.status.code != status_code_pb2.SUCCESS:
309
309
  raise Exception(response.status)
310
310
  self.logger.info("\nDataset created\n%s", response.status)
311
- kwargs.update({
312
- 'app_id': self.id,
313
- 'user_id': self.user_id,
314
- 'base_url': self.base,
315
- 'pat': self.pat
316
- })
317
311
 
318
- return Dataset(dataset_id=dataset_id, **kwargs)
312
+ return Dataset.from_auth_helper(self.auth_helper, dataset_id=dataset_id, **kwargs)
319
313
 
320
314
  def create_model(self, model_id: str, **kwargs) -> Model:
321
315
  """Creates a model for the app.
@@ -339,14 +333,11 @@ class App(Lister, BaseClient):
339
333
  raise Exception(response.status)
340
334
  self.logger.info("\nModel created\n%s", response.status)
341
335
  kwargs.update({
342
- 'app_id': self.id,
343
- 'user_id': self.user_id,
336
+ 'model_id': model_id,
344
337
  'model_type_id': response.model.model_type_id,
345
- 'base_url': self.base,
346
- 'pat': self.pat
347
338
  })
348
339
 
349
- return Model(model_id=model_id, **kwargs)
340
+ return Model.from_auth_helper(auth=self.auth_helper, **kwargs)
350
341
 
351
342
  def create_workflow(self,
352
343
  config_filepath: str,
@@ -436,9 +427,8 @@ class App(Lister, BaseClient):
436
427
  display_workflow_tree(dict_response["workflows"][0]["nodes"])
437
428
  kwargs = self.process_response_keys(dict_response[list(dict_response.keys())[1]][0],
438
429
  "workflow")
439
- kwargs.update({'base_url': self.base, 'pat': self.pat})
440
430
 
441
- return Workflow(**kwargs)
431
+ return Workflow.from_auth_helper(auth=self.auth_helper, **kwargs)
442
432
 
443
433
  def create_module(self, module_id: str, description: str, **kwargs) -> Module:
444
434
  """Creates a module for the app.
@@ -464,14 +454,8 @@ class App(Lister, BaseClient):
464
454
  if response.status.code != status_code_pb2.SUCCESS:
465
455
  raise Exception(response.status)
466
456
  self.logger.info("\nModule created\n%s", response.status)
467
- kwargs.update({
468
- 'app_id': self.id,
469
- 'user_id': self.user_id,
470
- 'base_url': self.base,
471
- 'pat': self.pat
472
- })
473
457
 
474
- return Module(module_id=module_id, **kwargs)
458
+ return Module.from_auth_helper(auth=self.auth_helper, module_id=module_id, **kwargs)
475
459
 
476
460
  def dataset(self, dataset_id: str, **kwargs) -> Dataset:
477
461
  """Returns a Dataset object for the existing dataset ID.
@@ -496,8 +480,7 @@ class App(Lister, BaseClient):
496
480
  kwargs = self.process_response_keys(dict_response[list(dict_response.keys())[1]],
497
481
  list(dict_response.keys())[1])
498
482
  kwargs['version'] = response.dataset.version if response.dataset.version else None
499
- kwargs.update({'base_url': self.base, 'pat': self.pat})
500
- return Dataset(**kwargs)
483
+ return Dataset.from_auth_helper(auth=self.auth_helper, **kwargs)
501
484
 
502
485
  def model(self, model_id: str, model_version_id: str = "", **kwargs) -> Model:
503
486
  """Returns a Model object for the existing model ID.
@@ -532,9 +515,8 @@ class App(Lister, BaseClient):
532
515
  kwargs = self.process_response_keys(dict_response['model'], 'model')
533
516
  kwargs[
534
517
  'model_version'] = response.model.model_version if response.model.model_version else None
535
- kwargs.update({'base_url': self.base, 'pat': self.pat})
536
518
 
537
- return Model(**kwargs)
519
+ return Model.from_auth_helper(self.auth_helper, **kwargs)
538
520
 
539
521
  def workflow(self, workflow_id: str, **kwargs) -> Workflow:
540
522
  """Returns a workflow object for the existing workflow ID.
@@ -558,9 +540,8 @@ class App(Lister, BaseClient):
558
540
  dict_response = MessageToDict(response, preserving_proto_field_name=True)
559
541
  kwargs = self.process_response_keys(dict_response[list(dict_response.keys())[1]],
560
542
  list(dict_response.keys())[1])
561
- kwargs.update({'base_url': self.base, 'pat': self.pat})
562
543
 
563
- return Workflow(**kwargs)
544
+ return Workflow.from_auth_helper(auth=self.auth_helper, **kwargs)
564
545
 
565
546
  def module(self, module_id: str, module_version_id: str = "", **kwargs) -> Module:
566
547
  """Returns a Module object for the existing module ID.
@@ -585,9 +566,8 @@ class App(Lister, BaseClient):
585
566
  raise Exception(response.status)
586
567
  dict_response = MessageToDict(response, preserving_proto_field_name=True)
587
568
  kwargs = self.process_response_keys(dict_response['module'], 'module')
588
- kwargs.update({'base_url': self.base, 'pat': self.pat})
589
569
 
590
- return Module(**kwargs)
570
+ return Module.from_auth_helper(auth=self.auth_helper, **kwargs)
591
571
 
592
572
  def inputs(self,):
593
573
  """Returns an Input object.
@@ -595,7 +575,7 @@ class App(Lister, BaseClient):
595
575
  Returns:
596
576
  Inputs: An input object.
597
577
  """
598
- return Inputs(self.user_id, self.id, base_url=self.base, pat=self.pat)
578
+ return Inputs.from_auth_helper(self.auth_helper)
599
579
 
600
580
  def delete_dataset(self, dataset_id: str) -> None:
601
581
  """Deletes an dataset for the user.
@@ -684,9 +664,9 @@ class App(Lister, BaseClient):
684
664
  >>> app = App(app_id="app_id", user_id="user_id")
685
665
  >>> search_client = app.search(top_k=12, metric="euclidean")
686
666
  """
687
- user_id = kwargs.get("user_id", self.user_app_id.user_id)
688
- app_id = kwargs.get("app_id", self.user_app_id.app_id)
689
- return Search(user_id=user_id, app_id=app_id, base_url=self.base, pat=self.pat, **kwargs)
667
+ kwargs.get("user_id", self.user_app_id.user_id)
668
+ kwargs.get("app_id", self.user_app_id.app_id)
669
+ return Search.from_auth_helper(auth=self.auth_helper, **kwargs)
690
670
 
691
671
  def __getattr__(self, name):
692
672
  return getattr(self.app_info, name)
clarifai/client/base.py CHANGED
@@ -7,7 +7,7 @@ from google.protobuf.wrappers_pb2 import BoolValue
7
7
 
8
8
  from clarifai.client.auth import create_stub
9
9
  from clarifai.client.auth.helper import ClarifaiAuthHelper
10
- from clarifai.errors import ApiError
10
+ from clarifai.errors import ApiError, UserError
11
11
  from clarifai.utils.misc import get_from_dict_or_env
12
12
 
13
13
 
@@ -19,9 +19,11 @@ class BaseClient:
19
19
  - user_id (str): A user ID for authentication.
20
20
  - app_id (str): An app ID for the application to interact with.
21
21
  - pat (str): A personal access token for authentication.
22
+ - token (str): A session token for authentication. Accepts either a session token or a pat.
22
23
  - base (str): The base URL for the API endpoint. Defaults to 'https://api.clarifai.com'.
23
24
  - ui (str): The URL for the UI. Defaults to 'https://clarifai.com'.
24
25
 
26
+
25
27
  Attributes:
26
28
  auth_helper (ClarifaiAuthHelper): An instance of ClarifaiAuthHelper for authentication.
27
29
  STUB (Stub): The gRPC Stub object for API interaction.
@@ -31,15 +33,53 @@ class BaseClient:
31
33
  """
32
34
 
33
35
  def __init__(self, **kwargs):
34
- pat = get_from_dict_or_env(key="pat", env_key="CLARIFAI_PAT", **kwargs)
35
- kwargs.update({'pat': pat})
36
+ token, pat = "", ""
37
+ try:
38
+ pat = get_from_dict_or_env(key="pat", env_key="CLARIFAI_PAT", **kwargs)
39
+ except UserError:
40
+ token = get_from_dict_or_env(key="token", env_key="CLARIFAI_SESSION_TOKEN", **kwargs)
41
+ finally:
42
+ assert token or pat, Exception(
43
+ "Need 'pat' or 'token' in args or use one of the CLARIFAI_PAT or CLARIFAI_SESSION_TOKEN env vars"
44
+ )
45
+ kwargs.update({'token': token, 'pat': pat})
46
+
36
47
  self.auth_helper = ClarifaiAuthHelper(**kwargs, validate=False)
37
48
  self.STUB = create_stub(self.auth_helper)
38
49
  self.metadata = self.auth_helper.metadata
39
50
  self.pat = self.auth_helper.pat
51
+ self.token = self.auth_helper._token
40
52
  self.user_app_id = self.auth_helper.get_user_app_id_proto()
41
53
  self.base = self.auth_helper.base
42
54
 
55
+ @classmethod
56
+ def from_auth_helper(cls, auth: ClarifaiAuthHelper, **kwargs):
57
+ default_kwargs = {
58
+ "user_id": kwargs.get("user_id", None) or auth.user_id,
59
+ "app_id": kwargs.get("app_id", None) or auth.app_id,
60
+ "pat": kwargs.get("pat", None) or auth.pat,
61
+ "token": kwargs.get("token", None) or auth._token,
62
+ }
63
+ _base = kwargs.get("base", None) or auth.base
64
+ _clss = cls.__mro__[0]
65
+ if _clss == BaseClient:
66
+ kwargs = {
67
+ **default_kwargs,
68
+ "base": _base, # Baseclient uses `base`
69
+ "ui": kwargs.get("ui", None) or auth.ui
70
+ }
71
+ else:
72
+ # Remove user_id and app_id if a custom URL is provided
73
+ if kwargs.get("url"):
74
+ default_kwargs.pop("user_id", "")
75
+ default_kwargs.pop("app_id", "")
76
+ # Remove app_id if the class name contains "Runner"
77
+ if 'Runner' in _clss.__name__:
78
+ default_kwargs.pop("app_id", "")
79
+ kwargs.update({**default_kwargs, "base_url": _base})
80
+
81
+ return cls(**kwargs)
82
+
43
83
  def _grpc_request(self, method: Callable, argument: Any):
44
84
  """Makes a gRPC request to the API.
45
85
 
@@ -52,7 +92,7 @@ class BaseClient:
52
92
  """
53
93
 
54
94
  try:
55
- res = method(argument)
95
+ res = method(argument, metadata=self.auth_helper.metadata)
56
96
  # MessageToDict(res) TODO global debug logger
57
97
  return res
58
98
  except ApiError:
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  import os
2
3
  import time
3
4
  import uuid
@@ -12,12 +13,13 @@ from clarifai_grpc.grpc.api.service_pb2 import MultiInputResponse
12
13
  from clarifai_grpc.grpc.api.status import status_code_pb2, status_pb2
13
14
  from google.protobuf.json_format import MessageToDict
14
15
  from requests.adapters import HTTPAdapter, Retry
16
+ from tabulate import tabulate
15
17
  from tqdm import tqdm
16
18
 
17
19
  from clarifai.client.base import BaseClient
18
20
  from clarifai.client.input import Inputs
19
21
  from clarifai.client.lister import Lister
20
- from clarifai.constants.dataset import DATASET_UPLOAD_TASKS
22
+ from clarifai.constants.dataset import DATASET_UPLOAD_TASKS, MAX_RETRIES
21
23
  from clarifai.datasets.export.inputs_annotations import (DatasetExportReader,
22
24
  InputAnnotationDownloader)
23
25
  from clarifai.datasets.upload.base import ClarifaiDataLoader
@@ -27,7 +29,7 @@ from clarifai.datasets.upload.text import TextClassificationDataset
27
29
  from clarifai.datasets.upload.utils import DisplayUploadStatus
28
30
  from clarifai.errors import UserError
29
31
  from clarifai.urls.helper import ClarifaiUrlHelper
30
- from clarifai.utils.logging import add_file_handler, get_logger
32
+ from clarifai.utils.logging import add_file_handler, get_logger, process_log_files
31
33
  from clarifai.utils.misc import BackoffIterator, Chunker
32
34
 
33
35
  ClarifaiDatasetType = TypeVar('ClarifaiDatasetType', VisualClassificationDataset,
@@ -43,6 +45,7 @@ class Dataset(Lister, BaseClient):
43
45
  dataset_id: str = None,
44
46
  base_url: str = "https://api.clarifai.com",
45
47
  pat: str = None,
48
+ token: str = None,
46
49
  **kwargs):
47
50
  """Initializes a Dataset object.
48
51
 
@@ -51,6 +54,7 @@ class Dataset(Lister, BaseClient):
51
54
  dataset_id (str): The Dataset ID within the App to interact with.
52
55
  base_url (str): Base API url. Default "https://api.clarifai.com"
53
56
  pat (str): A personal access token for authentication. Can be set as env var CLARIFAI_PAT
57
+ token (str): A session token for authentication. Accepts either a session token or a pat. Can be set as env var CLARIFAI_SESSION_TOKEN
54
58
  **kwargs: Additional keyword arguments to be passed to the Dataset.
55
59
  """
56
60
  if url and dataset_id:
@@ -66,9 +70,11 @@ class Dataset(Lister, BaseClient):
66
70
  self.max_retires = 10
67
71
  self.batch_size = 128 # limit max protos in a req
68
72
  self.task = None # Upload dataset type
69
- self.input_object = Inputs(user_id=self.user_id, app_id=self.app_id, pat=pat)
73
+ self.input_object = Inputs(
74
+ user_id=self.user_id, app_id=self.app_id, pat=pat, token=token, base_url=base_url)
70
75
  self.logger = get_logger(logger_level="INFO", name=__name__)
71
- BaseClient.__init__(self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat)
76
+ BaseClient.__init__(
77
+ self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat, token=token)
72
78
  Lister.__init__(self)
73
79
 
74
80
  def create_version(self, **kwargs) -> 'Dataset':
@@ -98,13 +104,10 @@ class Dataset(Lister, BaseClient):
98
104
  self.logger.info("\nDataset Version created\n%s", response.status)
99
105
  kwargs.update({
100
106
  'dataset_id': self.id,
101
- 'app_id': self.app_id,
102
- 'user_id': self.user_id,
103
107
  'version': response.dataset_versions[0],
104
- 'base_url': self.base,
105
- 'pat': self.pat
106
108
  })
107
- return Dataset(**kwargs)
109
+
110
+ return Dataset.from_auth_helper(self.auth_helper, **kwargs)
108
111
 
109
112
  def delete_version(self, version_id: str) -> None:
110
113
  """Deletes a dataset version for the Dataset.
@@ -162,13 +165,9 @@ class Dataset(Lister, BaseClient):
162
165
  del dataset_version_info['metrics']
163
166
  kwargs = {
164
167
  'dataset_id': self.id,
165
- 'app_id': self.app_id,
166
- 'user_id': self.user_id,
167
168
  'version': resources_pb2.DatasetVersion(**dataset_version_info),
168
- 'base_url': self.base,
169
- 'pat': self.pat
170
169
  }
171
- yield Dataset(**kwargs)
170
+ yield Dataset.from_auth_helper(self.auth_helper, **kwargs)
172
171
 
173
172
  def _concurrent_annot_upload(self, annots: List[List[resources_pb2.Annotation]]
174
173
  ) -> Union[List[resources_pb2.Annotation], List[None]]:
@@ -196,11 +195,11 @@ class Dataset(Lister, BaseClient):
196
195
 
197
196
  return retry_annot_upload
198
197
 
199
- def _delete_failed_inputs(
200
- self,
201
- batch_input_ids: List[int],
202
- dataset_obj: ClarifaiDatasetType,
203
- upload_response: MultiInputResponse = None) -> Tuple[List[int], List[int]]:
198
+ def _delete_failed_inputs(self,
199
+ batch_input_ids: List[int],
200
+ dataset_obj: ClarifaiDatasetType,
201
+ upload_response: MultiInputResponse = None,
202
+ batch_no: Optional[int] = None) -> Tuple[List[int], List[int]]:
204
203
  """Delete failed input ids from clarifai platform dataset.
205
204
 
206
205
  Args:
@@ -235,8 +234,19 @@ class Dataset(Lister, BaseClient):
235
234
  if duplicate_input_ids:
236
235
  success_input_ids = list(set(success_input_ids.copy()) - set(duplicate_input_ids.copy()))
237
236
  failed_input_ids = list(set(failed_input_ids) - set(duplicate_input_ids))
237
+ duplicate_details = [[
238
+ input_ids[id], id, "Input has a duplicate ID.",
239
+ dataset_obj.data_generator[input_ids[id]].image_path,
240
+ dataset_obj.data_generator[input_ids[id]].labels,
241
+ dataset_obj.data_generator[input_ids[id]].metadata
242
+ ] for id in duplicate_input_ids]
243
+ duplicate_table = tabulate(
244
+ duplicate_details,
245
+ headers=["Index", "Input ID", "Status", "Image Path", "Labels", "Metadata"],
246
+ tablefmt="grid")
247
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
238
248
  self.logger.warning(
239
- f"Upload Failed for {len(duplicate_input_ids)} inputs in current batch: Duplicate input ids: {duplicate_input_ids}"
249
+ f"{timestamp}\nFailed to upload {len(duplicate_input_ids)} inputs due to duplicate IDs in current batch {batch_no}:\n{duplicate_table}\n\n"
240
250
  )
241
251
 
242
252
  #delete failed inputs
@@ -247,7 +257,11 @@ class Dataset(Lister, BaseClient):
247
257
  return [input_ids[id] for id in success_input_ids], [input_ids[id] for id in failed_input_ids]
248
258
 
249
259
  def _upload_inputs_annotations(
250
- self, batch_input_ids: List[int], dataset_obj: ClarifaiDatasetType
260
+ self,
261
+ batch_input_ids: List[int],
262
+ dataset_obj: ClarifaiDatasetType,
263
+ batch_no: Optional[int] = None,
264
+ is_retry_duplicates: bool = False,
251
265
  ) -> Tuple[List[int], List[resources_pb2.Annotation], MultiInputResponse]:
252
266
  """Uploads batch of inputs and annotations concurrently to clarifai platform dataset.
253
267
 
@@ -261,12 +275,16 @@ class Dataset(Lister, BaseClient):
261
275
  response: upload response proto
262
276
  """
263
277
  input_protos, _ = dataset_obj.get_protos(batch_input_ids)
278
+ if is_retry_duplicates:
279
+ for inp in input_protos:
280
+ inp.id = uuid.uuid4().hex
281
+
264
282
  input_job_id, _response = self.input_object.upload_inputs(inputs=input_protos, show_log=False)
265
283
  retry_annot_protos = []
266
284
 
267
285
  self.input_object._wait_for_inputs(input_job_id)
268
286
  success_input_ids, failed_input_ids = self._delete_failed_inputs(batch_input_ids, dataset_obj,
269
- _response)
287
+ _response, batch_no)
270
288
 
271
289
  if self.task in ["visual_detection", "visual_segmentation"] and success_input_ids:
272
290
  _, annotation_protos = dataset_obj.get_protos(success_input_ids)
@@ -277,7 +295,7 @@ class Dataset(Lister, BaseClient):
277
295
 
278
296
  def _retry_uploads(self, failed_input_ids: List[int],
279
297
  retry_annot_protos: List[resources_pb2.Annotation],
280
- dataset_obj: ClarifaiDatasetType) -> None:
298
+ dataset_obj: ClarifaiDatasetType, batch_no: Optional[int]) -> None:
281
299
  """Retry failed uploads.
282
300
 
283
301
  Args:
@@ -285,56 +303,87 @@ class Dataset(Lister, BaseClient):
285
303
  retry_annot_protos: failed annot protos
286
304
  dataset_obj: ClarifaiDataset object
287
305
  """
306
+
307
+ for _retry in range(MAX_RETRIES):
308
+ if not failed_input_ids and not retry_annot_protos:
309
+ break
310
+ if failed_input_ids:
311
+ retry_input_ids = [dataset_obj.all_input_ids[id] for id in failed_input_ids]
312
+ logging.warning(
313
+ f"Retrying upload for {len(failed_input_ids)} inputs in current batch: {retry_input_ids}\n"
314
+ )
315
+ failed_retrying_inputs, _, retry_response = self._upload_inputs_annotations(
316
+ failed_input_ids, dataset_obj, batch_no)
317
+ failed_input_ids = failed_retrying_inputs
318
+ if retry_annot_protos:
319
+ chunked_annotation_protos = Chunker(retry_annot_protos, self.batch_size).chunk()
320
+ _ = self._concurrent_annot_upload(chunked_annotation_protos)
321
+
322
+ #Log failed inputs
288
323
  if failed_input_ids:
289
- retry_input_ids = [dataset_obj.all_input_ids[id] for id in failed_input_ids]
290
- #Log Retrying inputs
324
+ failed_inputs_logs = []
325
+ input_map = {input.id: input for input in retry_response.inputs}
326
+ for index in failed_retrying_inputs:
327
+ failed_id = dataset_obj.all_input_ids[index]
328
+ input_details = input_map.get(failed_id)
329
+ if input_details:
330
+ failed_input_details = [
331
+ index, failed_id, input_details.status.details,
332
+ dataset_obj.data_generator[index].image_path,
333
+ dataset_obj.data_generator[index].labels, dataset_obj.data_generator[index].metadata
334
+ ]
335
+ failed_inputs_logs.append(failed_input_details)
336
+
337
+ failed_table = tabulate(
338
+ failed_inputs_logs,
339
+ headers=["Index", "Input ID", "Status", "Image Path", "Labels", "Metadata"],
340
+ tablefmt="grid")
341
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
291
342
  self.logger.warning(
292
- f"Retrying upload for {len(failed_input_ids)} inputs in current batch: {retry_input_ids}"
343
+ f"{timestamp}\nFailed to upload {len(failed_retrying_inputs)} inputs in current batch {batch_no}:\n{failed_table}\n\n"
293
344
  )
294
- failed_retrying_inputs, _, retry_response = self._upload_inputs_annotations(
295
- failed_input_ids, dataset_obj)
296
- #Log failed inputs
297
- if failed_retrying_inputs:
298
- failed_retrying_input_ids = [
299
- dataset_obj.all_input_ids[id] for id in failed_retrying_inputs
300
- ]
301
- failed_inputs_logs = {
302
- input.id: input.status.details
303
- for input in retry_response.inputs if input.id in failed_retrying_input_ids
304
- }
305
- self.logger.warning(
306
- f"Failed to upload {len(failed_retrying_inputs)} inputs in current batch: {failed_inputs_logs}"
307
- )
308
- if retry_annot_protos:
309
- chunked_annotation_protos = Chunker(retry_annot_protos, self.batch_size).chunk()
310
- _ = self._concurrent_annot_upload(chunked_annotation_protos)
311
345
 
312
- def _data_upload(self, dataset_obj: ClarifaiDatasetType) -> None:
346
+ def _data_upload(self,
347
+ dataset_obj: ClarifaiDatasetType,
348
+ is_log_retry: bool = False,
349
+ log_retry_ids: List[int] = None,
350
+ **kwargs) -> None:
313
351
  """Uploads inputs and annotations to clarifai platform dataset.
314
352
 
315
353
  Args:
316
- dataset_obj: ClarifaiDataset object
354
+ dataset_obj: ClarifaiDataset object,
355
+ is_log_retry: True if the iteration is to retry uploads from logs.
356
+ **kwargs: Additional keyword arguments for retry uploading functionality..
357
+
358
+ Returns:
359
+ None
317
360
  """
318
- input_ids = list(range(len(dataset_obj)))
361
+ if is_log_retry:
362
+ input_ids = log_retry_ids
363
+ else:
364
+ input_ids = list(range(len(dataset_obj)))
365
+
319
366
  chunk_input_ids = Chunker(input_ids, self.batch_size).chunk()
320
367
  with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
321
368
  with tqdm(total=len(chunk_input_ids), desc='Uploading Dataset') as progress:
322
369
  # Submit all jobs to the executor and store the returned futures
323
370
  futures = [
324
- executor.submit(self._upload_inputs_annotations, batch_input_ids, dataset_obj)
325
- for batch_input_ids in chunk_input_ids
371
+ executor.submit(self._upload_inputs_annotations, batch_input_ids, dataset_obj,
372
+ batch_no, **kwargs)
373
+ for batch_no, batch_input_ids in enumerate(chunk_input_ids)
326
374
  ]
327
375
 
328
- for job in as_completed(futures):
376
+ for batch_no, job in enumerate(as_completed(futures)):
329
377
  retry_input_ids, retry_annot_protos, _ = job.result()
330
- self._retry_uploads(retry_input_ids, retry_annot_protos, dataset_obj)
378
+ self._retry_uploads(retry_input_ids, retry_annot_protos, dataset_obj, batch_no)
331
379
  progress.update()
332
380
 
333
381
  def upload_dataset(self,
334
382
  dataloader: Type[ClarifaiDataLoader],
335
383
  batch_size: int = 32,
336
384
  get_upload_status: bool = False,
337
- log_warnings: bool = False) -> None:
385
+ log_warnings: bool = False,
386
+ **kwargs) -> None:
338
387
  """Uploads a dataset to the app.
339
388
 
340
389
  Args:
@@ -342,6 +391,7 @@ class Dataset(Lister, BaseClient):
342
391
  batch_size (int): batch size for concurrent upload of inputs and annotations (max: 128)
343
392
  get_upload_status (bool): True if you want to get the upload status of the dataset
344
393
  log_warnings (bool): True if you want to save log warnings in a file
394
+ kwargs: Additional keyword arguments for retry uploading functionality..
345
395
  """
346
396
  #add file handler to log warnings
347
397
  if log_warnings:
@@ -369,11 +419,47 @@ class Dataset(Lister, BaseClient):
369
419
  if get_upload_status:
370
420
  pre_upload_stats = self.get_upload_status(pre_upload=True)
371
421
 
372
- self._data_upload(dataset_obj)
422
+ self._data_upload(dataset_obj, **kwargs)
373
423
 
374
424
  if get_upload_status:
375
425
  self.get_upload_status(dataloader=dataloader, pre_upload_stats=pre_upload_stats)
376
426
 
427
+ def retry_upload_from_logs(self,
428
+ log_file_path: str,
429
+ dataloader: Type[ClarifaiDataLoader],
430
+ retry_duplicates: bool = False,
431
+ log_warnings: bool = False,
432
+ **kwargs) -> None:
433
+ """Retries failed uploads from the log file.
434
+
435
+ Args:
436
+ log_file_path (str): path to the log file
437
+ dataloader (Type[ClarifaiDataLoader]): ClarifaiDataLoader object
438
+ retry_duplicate (bool): True if you want to retry duplicate inputs
439
+ kwargs: Additional keyword arguments for retry uploading functionality..
440
+ """
441
+
442
+ duplicate_input_ids, failed_input_ids = process_log_files(log_file_path)
443
+ if log_warnings:
444
+ add_file_handler(self.logger, f"Dataset_Upload{str(int(datetime.now().timestamp()))}.log")
445
+
446
+ if retry_duplicates and duplicate_input_ids:
447
+ logging.warning(f"Retrying upload for {len(duplicate_input_ids)} duplicate inputs...\n")
448
+ duplicate_inputs_indexes = [input["Index"] for input in duplicate_input_ids]
449
+ self.upload_dataset(
450
+ dataloader=dataloader,
451
+ log_retry_ids=duplicate_inputs_indexes,
452
+ is_retry_duplicates=True,
453
+ is_log_retry=True,
454
+ **kwargs)
455
+
456
+ if failed_input_ids:
457
+ #failed_inputs= ([input["Input_ID"] for input in failed_input_ids])
458
+ logging.warning(f"Retrying upload for {len(failed_input_ids)} failed inputs...\n")
459
+ failed_input_indexes = [input["Index"] for input in failed_input_ids]
460
+ self.upload_dataset(
461
+ dataloader=dataloader, log_retry_ids=failed_input_indexes, is_log_retry=True, **kwargs)
462
+
377
463
  def upload_from_csv(self,
378
464
  csv_path: str,
379
465
  input_type: str = 'text',