clarifai 10.0.1__py3-none-any.whl → 10.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. clarifai/client/app.py +23 -43
  2. clarifai/client/base.py +46 -4
  3. clarifai/client/dataset.py +85 -33
  4. clarifai/client/input.py +35 -7
  5. clarifai/client/model.py +192 -11
  6. clarifai/client/module.py +8 -6
  7. clarifai/client/runner.py +3 -1
  8. clarifai/client/search.py +6 -3
  9. clarifai/client/user.py +14 -12
  10. clarifai/client/workflow.py +8 -5
  11. clarifai/datasets/upload/features.py +3 -0
  12. clarifai/datasets/upload/image.py +57 -26
  13. clarifai/datasets/upload/loaders/README.md +3 -4
  14. clarifai/datasets/upload/loaders/xview_detection.py +9 -5
  15. clarifai/datasets/upload/utils.py +23 -7
  16. clarifai/models/model_serving/README.md +113 -121
  17. clarifai/models/model_serving/__init__.py +2 -0
  18. clarifai/models/model_serving/cli/_utils.py +53 -0
  19. clarifai/models/model_serving/cli/base.py +14 -0
  20. clarifai/models/model_serving/cli/build.py +79 -0
  21. clarifai/models/model_serving/cli/clarifai_clis.py +33 -0
  22. clarifai/models/model_serving/cli/create.py +171 -0
  23. clarifai/models/model_serving/cli/example_cli.py +34 -0
  24. clarifai/models/model_serving/cli/login.py +26 -0
  25. clarifai/models/model_serving/cli/upload.py +182 -0
  26. clarifai/models/model_serving/constants.py +20 -0
  27. clarifai/models/model_serving/docs/cli.md +150 -0
  28. clarifai/models/model_serving/docs/concepts.md +229 -0
  29. clarifai/models/model_serving/docs/dependencies.md +1 -1
  30. clarifai/models/model_serving/docs/inference_parameters.md +112 -107
  31. clarifai/models/model_serving/docs/model_types.md +16 -17
  32. clarifai/models/model_serving/model_config/__init__.py +4 -2
  33. clarifai/models/model_serving/model_config/base.py +369 -0
  34. clarifai/models/model_serving/model_config/config.py +219 -224
  35. clarifai/models/model_serving/model_config/inference_parameter.py +5 -0
  36. clarifai/models/model_serving/model_config/model_types_config/multimodal-embedder.yaml +25 -24
  37. clarifai/models/model_serving/model_config/model_types_config/text-classifier.yaml +19 -18
  38. clarifai/models/model_serving/model_config/model_types_config/text-embedder.yaml +20 -18
  39. clarifai/models/model_serving/model_config/model_types_config/text-to-image.yaml +19 -18
  40. clarifai/models/model_serving/model_config/model_types_config/text-to-text.yaml +19 -18
  41. clarifai/models/model_serving/model_config/model_types_config/visual-classifier.yaml +22 -18
  42. clarifai/models/model_serving/model_config/model_types_config/visual-detector.yaml +32 -28
  43. clarifai/models/model_serving/model_config/model_types_config/visual-embedder.yaml +19 -18
  44. clarifai/models/model_serving/model_config/model_types_config/visual-segmenter.yaml +19 -18
  45. clarifai/models/model_serving/{models → model_config}/output.py +8 -0
  46. clarifai/models/model_serving/model_config/triton/__init__.py +14 -0
  47. clarifai/models/model_serving/model_config/{serializer.py → triton/serializer.py} +3 -1
  48. clarifai/models/model_serving/model_config/triton/triton_config.py +182 -0
  49. clarifai/models/model_serving/{models/model_types.py → model_config/triton/wrappers.py} +4 -4
  50. clarifai/models/model_serving/{models → repo_build}/__init__.py +2 -0
  51. clarifai/models/model_serving/repo_build/build.py +198 -0
  52. clarifai/models/model_serving/repo_build/static_files/_requirements.txt +2 -0
  53. clarifai/models/model_serving/repo_build/static_files/base_test.py +169 -0
  54. clarifai/models/model_serving/repo_build/static_files/inference.py +26 -0
  55. clarifai/models/model_serving/repo_build/static_files/sample_clarifai_config.yaml +25 -0
  56. clarifai/models/model_serving/repo_build/static_files/test.py +40 -0
  57. clarifai/models/model_serving/{models/pb_model.py → repo_build/static_files/triton/model.py} +15 -14
  58. clarifai/models/model_serving/utils.py +21 -0
  59. clarifai/rag/rag.py +67 -23
  60. clarifai/rag/utils.py +21 -5
  61. clarifai/utils/evaluation/__init__.py +427 -0
  62. clarifai/utils/evaluation/helpers.py +522 -0
  63. clarifai/utils/logging.py +7 -0
  64. clarifai/utils/model_train.py +3 -1
  65. clarifai/versions.py +1 -1
  66. {clarifai-10.0.1.dist-info → clarifai-10.1.1.dist-info}/METADATA +58 -10
  67. clarifai-10.1.1.dist-info/RECORD +115 -0
  68. clarifai-10.1.1.dist-info/entry_points.txt +2 -0
  69. clarifai/datasets/upload/loaders/coco_segmentation.py +0 -98
  70. clarifai/models/model_serving/cli/deploy_cli.py +0 -123
  71. clarifai/models/model_serving/cli/model_zip.py +0 -61
  72. clarifai/models/model_serving/cli/repository.py +0 -89
  73. clarifai/models/model_serving/docs/custom_config.md +0 -33
  74. clarifai/models/model_serving/docs/output.md +0 -28
  75. clarifai/models/model_serving/models/default_test.py +0 -281
  76. clarifai/models/model_serving/models/inference.py +0 -50
  77. clarifai/models/model_serving/models/test.py +0 -64
  78. clarifai/models/model_serving/pb_model_repository.py +0 -108
  79. clarifai-10.0.1.dist-info/RECORD +0 -103
  80. clarifai-10.0.1.dist-info/entry_points.txt +0 -4
  81. {clarifai-10.0.1.dist-info → clarifai-10.1.1.dist-info}/LICENSE +0 -0
  82. {clarifai-10.0.1.dist-info → clarifai-10.1.1.dist-info}/WHEEL +0 -0
  83. {clarifai-10.0.1.dist-info → clarifai-10.1.1.dist-info}/top_level.txt +0 -0
clarifai/client/app.py CHANGED
@@ -32,6 +32,7 @@ class App(Lister, BaseClient):
32
32
  app_id: str = None,
33
33
  base_url: str = "https://api.clarifai.com",
34
34
  pat: str = None,
35
+ token: str = None,
35
36
  **kwargs):
36
37
  """Initializes an App object.
37
38
 
@@ -40,6 +41,7 @@ class App(Lister, BaseClient):
40
41
  app_id (str): The App ID for the App to interact with.
41
42
  base_url (str): Base API url. Default "https://api.clarifai.com"
42
43
  pat (str): A personal access token for authentication. Can be set as env var CLARIFAI_PAT
44
+ token (str): A session token for authentication. Accepts either a session token or a pat. Can be set as env var CLARIFAI_SESSION_TOKEN
43
45
  **kwargs: Additional keyword arguments to be passed to the App.
44
46
  - name (str): The name of the app.
45
47
  - description (str): The description of the app.
@@ -52,7 +54,8 @@ class App(Lister, BaseClient):
52
54
  self.kwargs = {**kwargs, 'id': app_id}
53
55
  self.app_info = resources_pb2.App(**self.kwargs)
54
56
  self.logger = get_logger(logger_level="INFO", name=__name__)
55
- BaseClient.__init__(self, user_id=self.user_id, app_id=self.id, base=base_url, pat=pat)
57
+ BaseClient.__init__(
58
+ self, user_id=self.user_id, app_id=self.id, base=base_url, pat=pat, token=token)
56
59
  Lister.__init__(self)
57
60
 
58
61
  def list_datasets(self, page_no: int = None,
@@ -85,7 +88,7 @@ class App(Lister, BaseClient):
85
88
  for dataset_info in all_datasets_info:
86
89
  if 'version' in list(dataset_info.keys()):
87
90
  del dataset_info['version']['metrics']
88
- yield Dataset(base_url=self.base, pat=self.pat, **dataset_info)
91
+ yield Dataset.from_auth_helper(auth=self.auth_helper, **dataset_info)
89
92
 
90
93
  def list_models(self,
91
94
  filter_by: Dict[str, Any] = {},
@@ -126,7 +129,7 @@ class App(Lister, BaseClient):
126
129
  if only_in_app:
127
130
  if model_info['app_id'] != self.id:
128
131
  continue
129
- yield Model(base_url=self.base, pat=self.pat, **model_info)
132
+ yield Model.from_auth_helper(auth=self.auth_helper, **model_info)
130
133
 
131
134
  def list_workflows(self,
132
135
  filter_by: Dict[str, Any] = {},
@@ -165,7 +168,7 @@ class App(Lister, BaseClient):
165
168
  if only_in_app:
166
169
  if workflow_info['app_id'] != self.id:
167
170
  continue
168
- yield Workflow(base_url=self.base, pat=self.pat, **workflow_info)
171
+ yield Workflow.from_auth_helper(auth=self.auth_helper, **workflow_info)
169
172
 
170
173
  def list_modules(self,
171
174
  filter_by: Dict[str, Any] = {},
@@ -204,7 +207,7 @@ class App(Lister, BaseClient):
204
207
  if only_in_app:
205
208
  if module_info['app_id'] != self.id:
206
209
  continue
207
- yield Module(base_url=self.base, pat=self.pat, **module_info)
210
+ yield Module.from_auth_helper(auth=self.auth_helper, **module_info)
208
211
 
209
212
  def list_installed_module_versions(self,
210
213
  filter_by: Dict[str, Any] = {},
@@ -239,11 +242,8 @@ class App(Lister, BaseClient):
239
242
  for imv_info in all_imv_infos:
240
243
  del imv_info['deploy_url']
241
244
  del imv_info['installed_module_version_id'] # TODO: remove this after the backend fix
242
- yield Module(
243
- module_id=imv_info['module_version']['module_id'],
244
- base_url=self.base,
245
- pat=self.pat,
246
- **imv_info)
245
+ yield Module.from_auth_helper(
246
+ auth=self.auth_helper, module_id=imv_info['module_version']['module_id'], **imv_info)
247
247
 
248
248
  def list_concepts(self, page_no: int = None,
249
249
  per_page: int = None) -> Generator[Concept, None, None]:
@@ -308,14 +308,8 @@ class App(Lister, BaseClient):
308
308
  if response.status.code != status_code_pb2.SUCCESS:
309
309
  raise Exception(response.status)
310
310
  self.logger.info("\nDataset created\n%s", response.status)
311
- kwargs.update({
312
- 'app_id': self.id,
313
- 'user_id': self.user_id,
314
- 'base_url': self.base,
315
- 'pat': self.pat
316
- })
317
311
 
318
- return Dataset(dataset_id=dataset_id, **kwargs)
312
+ return Dataset.from_auth_helper(self.auth_helper, dataset_id=dataset_id, **kwargs)
319
313
 
320
314
  def create_model(self, model_id: str, **kwargs) -> Model:
321
315
  """Creates a model for the app.
@@ -339,14 +333,11 @@ class App(Lister, BaseClient):
339
333
  raise Exception(response.status)
340
334
  self.logger.info("\nModel created\n%s", response.status)
341
335
  kwargs.update({
342
- 'app_id': self.id,
343
- 'user_id': self.user_id,
336
+ 'model_id': model_id,
344
337
  'model_type_id': response.model.model_type_id,
345
- 'base_url': self.base,
346
- 'pat': self.pat
347
338
  })
348
339
 
349
- return Model(model_id=model_id, **kwargs)
340
+ return Model.from_auth_helper(auth=self.auth_helper, **kwargs)
350
341
 
351
342
  def create_workflow(self,
352
343
  config_filepath: str,
@@ -436,9 +427,8 @@ class App(Lister, BaseClient):
436
427
  display_workflow_tree(dict_response["workflows"][0]["nodes"])
437
428
  kwargs = self.process_response_keys(dict_response[list(dict_response.keys())[1]][0],
438
429
  "workflow")
439
- kwargs.update({'base_url': self.base, 'pat': self.pat})
440
430
 
441
- return Workflow(**kwargs)
431
+ return Workflow.from_auth_helper(auth=self.auth_helper, **kwargs)
442
432
 
443
433
  def create_module(self, module_id: str, description: str, **kwargs) -> Module:
444
434
  """Creates a module for the app.
@@ -464,14 +454,8 @@ class App(Lister, BaseClient):
464
454
  if response.status.code != status_code_pb2.SUCCESS:
465
455
  raise Exception(response.status)
466
456
  self.logger.info("\nModule created\n%s", response.status)
467
- kwargs.update({
468
- 'app_id': self.id,
469
- 'user_id': self.user_id,
470
- 'base_url': self.base,
471
- 'pat': self.pat
472
- })
473
457
 
474
- return Module(module_id=module_id, **kwargs)
458
+ return Module.from_auth_helper(auth=self.auth_helper, module_id=module_id, **kwargs)
475
459
 
476
460
  def dataset(self, dataset_id: str, **kwargs) -> Dataset:
477
461
  """Returns a Dataset object for the existing dataset ID.
@@ -496,8 +480,7 @@ class App(Lister, BaseClient):
496
480
  kwargs = self.process_response_keys(dict_response[list(dict_response.keys())[1]],
497
481
  list(dict_response.keys())[1])
498
482
  kwargs['version'] = response.dataset.version if response.dataset.version else None
499
- kwargs.update({'base_url': self.base, 'pat': self.pat})
500
- return Dataset(**kwargs)
483
+ return Dataset.from_auth_helper(auth=self.auth_helper, **kwargs)
501
484
 
502
485
  def model(self, model_id: str, model_version_id: str = "", **kwargs) -> Model:
503
486
  """Returns a Model object for the existing model ID.
@@ -532,9 +515,8 @@ class App(Lister, BaseClient):
532
515
  kwargs = self.process_response_keys(dict_response['model'], 'model')
533
516
  kwargs[
534
517
  'model_version'] = response.model.model_version if response.model.model_version else None
535
- kwargs.update({'base_url': self.base, 'pat': self.pat})
536
518
 
537
- return Model(**kwargs)
519
+ return Model.from_auth_helper(self.auth_helper, **kwargs)
538
520
 
539
521
  def workflow(self, workflow_id: str, **kwargs) -> Workflow:
540
522
  """Returns a workflow object for the existing workflow ID.
@@ -558,9 +540,8 @@ class App(Lister, BaseClient):
558
540
  dict_response = MessageToDict(response, preserving_proto_field_name=True)
559
541
  kwargs = self.process_response_keys(dict_response[list(dict_response.keys())[1]],
560
542
  list(dict_response.keys())[1])
561
- kwargs.update({'base_url': self.base, 'pat': self.pat})
562
543
 
563
- return Workflow(**kwargs)
544
+ return Workflow.from_auth_helper(auth=self.auth_helper, **kwargs)
564
545
 
565
546
  def module(self, module_id: str, module_version_id: str = "", **kwargs) -> Module:
566
547
  """Returns a Module object for the existing module ID.
@@ -585,9 +566,8 @@ class App(Lister, BaseClient):
585
566
  raise Exception(response.status)
586
567
  dict_response = MessageToDict(response, preserving_proto_field_name=True)
587
568
  kwargs = self.process_response_keys(dict_response['module'], 'module')
588
- kwargs.update({'base_url': self.base, 'pat': self.pat})
589
569
 
590
- return Module(**kwargs)
570
+ return Module.from_auth_helper(auth=self.auth_helper, **kwargs)
591
571
 
592
572
  def inputs(self,):
593
573
  """Returns an Input object.
@@ -595,7 +575,7 @@ class App(Lister, BaseClient):
595
575
  Returns:
596
576
  Inputs: An input object.
597
577
  """
598
- return Inputs(self.user_id, self.id, base_url=self.base, pat=self.pat)
578
+ return Inputs.from_auth_helper(self.auth_helper)
599
579
 
600
580
  def delete_dataset(self, dataset_id: str) -> None:
601
581
  """Deletes an dataset for the user.
@@ -684,9 +664,9 @@ class App(Lister, BaseClient):
684
664
  >>> app = App(app_id="app_id", user_id="user_id")
685
665
  >>> search_client = app.search(top_k=12, metric="euclidean")
686
666
  """
687
- user_id = kwargs.get("user_id", self.user_app_id.user_id)
688
- app_id = kwargs.get("app_id", self.user_app_id.app_id)
689
- return Search(user_id=user_id, app_id=app_id, base_url=self.base, pat=self.pat, **kwargs)
667
+ kwargs.get("user_id", self.user_app_id.user_id)
668
+ kwargs.get("app_id", self.user_app_id.app_id)
669
+ return Search.from_auth_helper(auth=self.auth_helper, **kwargs)
690
670
 
691
671
  def __getattr__(self, name):
692
672
  return getattr(self.app_info, name)
clarifai/client/base.py CHANGED
@@ -7,7 +7,7 @@ from google.protobuf.wrappers_pb2 import BoolValue
7
7
 
8
8
  from clarifai.client.auth import create_stub
9
9
  from clarifai.client.auth.helper import ClarifaiAuthHelper
10
- from clarifai.errors import ApiError
10
+ from clarifai.errors import ApiError, UserError
11
11
  from clarifai.utils.misc import get_from_dict_or_env
12
12
 
13
13
 
@@ -19,9 +19,11 @@ class BaseClient:
19
19
  - user_id (str): A user ID for authentication.
20
20
  - app_id (str): An app ID for the application to interact with.
21
21
  - pat (str): A personal access token for authentication.
22
+ - token (str): A session token for authentication. Accepts either a session token or a pat.
22
23
  - base (str): The base URL for the API endpoint. Defaults to 'https://api.clarifai.com'.
23
24
  - ui (str): The URL for the UI. Defaults to 'https://clarifai.com'.
24
25
 
26
+
25
27
  Attributes:
26
28
  auth_helper (ClarifaiAuthHelper): An instance of ClarifaiAuthHelper for authentication.
27
29
  STUB (Stub): The gRPC Stub object for API interaction.
@@ -31,15 +33,53 @@ class BaseClient:
31
33
  """
32
34
 
33
35
  def __init__(self, **kwargs):
34
- pat = get_from_dict_or_env(key="pat", env_key="CLARIFAI_PAT", **kwargs)
35
- kwargs.update({'pat': pat})
36
+ token, pat = "", ""
37
+ try:
38
+ pat = get_from_dict_or_env(key="pat", env_key="CLARIFAI_PAT", **kwargs)
39
+ except UserError:
40
+ token = get_from_dict_or_env(key="token", env_key="CLARIFAI_SESSION_TOKEN", **kwargs)
41
+ finally:
42
+ assert token or pat, Exception(
43
+ "Need 'pat' or 'token' in args or use one of the CLARIFAI_PAT or CLARIFAI_SESSION_TOKEN env vars"
44
+ )
45
+ kwargs.update({'token': token, 'pat': pat})
46
+
36
47
  self.auth_helper = ClarifaiAuthHelper(**kwargs, validate=False)
37
48
  self.STUB = create_stub(self.auth_helper)
38
49
  self.metadata = self.auth_helper.metadata
39
50
  self.pat = self.auth_helper.pat
51
+ self.token = self.auth_helper._token
40
52
  self.user_app_id = self.auth_helper.get_user_app_id_proto()
41
53
  self.base = self.auth_helper.base
42
54
 
55
+ @classmethod
56
+ def from_auth_helper(cls, auth: ClarifaiAuthHelper, **kwargs):
57
+ default_kwargs = {
58
+ "user_id": kwargs.get("user_id", None) or auth.user_id,
59
+ "app_id": kwargs.get("app_id", None) or auth.app_id,
60
+ "pat": kwargs.get("pat", None) or auth.pat,
61
+ "token": kwargs.get("token", None) or auth._token,
62
+ }
63
+ _base = kwargs.get("base", None) or auth.base
64
+ _clss = cls.__mro__[0]
65
+ if _clss == BaseClient:
66
+ kwargs = {
67
+ **default_kwargs,
68
+ "base": _base, # Baseclient uses `base`
69
+ "ui": kwargs.get("ui", None) or auth.ui
70
+ }
71
+ else:
72
+ # Remove user_id and app_id if a custom URL is provided
73
+ if kwargs.get("url"):
74
+ default_kwargs.pop("user_id", "")
75
+ default_kwargs.pop("app_id", "")
76
+ # Remove app_id if the class name contains "Runner"
77
+ if 'Runner' in _clss.__name__:
78
+ default_kwargs.pop("app_id", "")
79
+ kwargs.update({**default_kwargs, "base_url": _base})
80
+
81
+ return cls(**kwargs)
82
+
43
83
  def _grpc_request(self, method: Callable, argument: Any):
44
84
  """Makes a gRPC request to the API.
45
85
 
@@ -52,7 +92,7 @@ class BaseClient:
52
92
  """
53
93
 
54
94
  try:
55
- res = method(argument)
95
+ res = method(argument, metadata=self.auth_helper.metadata)
56
96
  # MessageToDict(res) TODO global debug logger
57
97
  return res
58
98
  except ApiError:
@@ -118,6 +158,8 @@ class BaseClient:
118
158
  value_s = struct_pb2.Struct()
119
159
  value_s.update(value)
120
160
  value = value_s
161
+ elif key == 'metrics':
162
+ continue
121
163
  elif key in ['metadata']:
122
164
  if isinstance(value, dict) and value != {}:
123
165
  value_s = struct_pb2.Struct()
@@ -2,11 +2,13 @@ import os
2
2
  import time
3
3
  import uuid
4
4
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
+ from datetime import datetime
5
6
  from multiprocessing import cpu_count
6
- from typing import Generator, List, Tuple, Type, TypeVar, Union
7
+ from typing import Dict, Generator, List, Optional, Tuple, Type, TypeVar, Union
7
8
 
8
9
  import requests
9
10
  from clarifai_grpc.grpc.api import resources_pb2, service_pb2
11
+ from clarifai_grpc.grpc.api.service_pb2 import MultiInputResponse
10
12
  from clarifai_grpc.grpc.api.status import status_code_pb2, status_pb2
11
13
  from google.protobuf.json_format import MessageToDict
12
14
  from requests.adapters import HTTPAdapter, Retry
@@ -25,7 +27,7 @@ from clarifai.datasets.upload.text import TextClassificationDataset
25
27
  from clarifai.datasets.upload.utils import DisplayUploadStatus
26
28
  from clarifai.errors import UserError
27
29
  from clarifai.urls.helper import ClarifaiUrlHelper
28
- from clarifai.utils.logging import get_logger
30
+ from clarifai.utils.logging import add_file_handler, get_logger
29
31
  from clarifai.utils.misc import BackoffIterator, Chunker
30
32
 
31
33
  ClarifaiDatasetType = TypeVar('ClarifaiDatasetType', VisualClassificationDataset,
@@ -41,6 +43,7 @@ class Dataset(Lister, BaseClient):
41
43
  dataset_id: str = None,
42
44
  base_url: str = "https://api.clarifai.com",
43
45
  pat: str = None,
46
+ token: str = None,
44
47
  **kwargs):
45
48
  """Initializes a Dataset object.
46
49
 
@@ -49,6 +52,7 @@ class Dataset(Lister, BaseClient):
49
52
  dataset_id (str): The Dataset ID within the App to interact with.
50
53
  base_url (str): Base API url. Default "https://api.clarifai.com"
51
54
  pat (str): A personal access token for authentication. Can be set as env var CLARIFAI_PAT
55
+ token (str): A session token for authentication. Accepts either a session token or a pat. Can be set as env var CLARIFAI_SESSION_TOKEN
52
56
  **kwargs: Additional keyword arguments to be passed to the Dataset.
53
57
  """
54
58
  if url and dataset_id:
@@ -64,9 +68,10 @@ class Dataset(Lister, BaseClient):
64
68
  self.max_retires = 10
65
69
  self.batch_size = 128 # limit max protos in a req
66
70
  self.task = None # Upload dataset type
67
- self.input_object = Inputs(user_id=self.user_id, app_id=self.app_id, pat=pat)
68
- self.logger = get_logger(logger_level="INFO")
69
- BaseClient.__init__(self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat)
71
+ self.input_object = Inputs(user_id=self.user_id, app_id=self.app_id, pat=pat, token=token)
72
+ self.logger = get_logger(logger_level="INFO", name=__name__)
73
+ BaseClient.__init__(
74
+ self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat, token=token)
70
75
  Lister.__init__(self)
71
76
 
72
77
  def create_version(self, **kwargs) -> 'Dataset':
@@ -96,13 +101,10 @@ class Dataset(Lister, BaseClient):
96
101
  self.logger.info("\nDataset Version created\n%s", response.status)
97
102
  kwargs.update({
98
103
  'dataset_id': self.id,
99
- 'app_id': self.app_id,
100
- 'user_id': self.user_id,
101
104
  'version': response.dataset_versions[0],
102
- 'base_url': self.base,
103
- 'pat': self.pat
104
105
  })
105
- return Dataset(**kwargs)
106
+
107
+ return Dataset.from_auth_helper(self.auth_helper, **kwargs)
106
108
 
107
109
  def delete_version(self, version_id: str) -> None:
108
110
  """Deletes a dataset version for the Dataset.
@@ -160,13 +162,9 @@ class Dataset(Lister, BaseClient):
160
162
  del dataset_version_info['metrics']
161
163
  kwargs = {
162
164
  'dataset_id': self.id,
163
- 'app_id': self.app_id,
164
- 'user_id': self.user_id,
165
165
  'version': resources_pb2.DatasetVersion(**dataset_version_info),
166
- 'base_url': self.base,
167
- 'pat': self.pat
168
166
  }
169
- yield Dataset(**kwargs)
167
+ yield Dataset.from_auth_helper(self.auth_helper, **kwargs)
170
168
 
171
169
  def _concurrent_annot_upload(self, annots: List[List[resources_pb2.Annotation]]
172
170
  ) -> Union[List[resources_pb2.Annotation], List[None]]:
@@ -194,13 +192,17 @@ class Dataset(Lister, BaseClient):
194
192
 
195
193
  return retry_annot_upload
196
194
 
197
- def _delete_failed_inputs(self, batch_input_ids: List[int],
198
- dataset_obj: ClarifaiDatasetType) -> Tuple[List[int], List[int]]:
195
+ def _delete_failed_inputs(
196
+ self,
197
+ batch_input_ids: List[int],
198
+ dataset_obj: ClarifaiDatasetType,
199
+ upload_response: MultiInputResponse = None) -> Tuple[List[int], List[int]]:
199
200
  """Delete failed input ids from clarifai platform dataset.
200
201
 
201
202
  Args:
202
203
  batch_input_ids: batch input ids
203
204
  dataset_obj: ClarifaiDataset object
205
+ upload_response: upload response proto
204
206
 
205
207
  Returns:
206
208
  success_inputs: upload success input ids
@@ -220,7 +222,19 @@ class Dataset(Lister, BaseClient):
220
222
  success_inputs = response_dict.get('inputs', [])
221
223
 
222
224
  success_input_ids = [input.get('id') for input in success_inputs]
223
- failed_input_ids = list(set(input_ids) - set(success_input_ids))
225
+ failed_input_ids = list(set(input_ids) - set(success_input_ids.copy()))
226
+ #check duplicate input ids
227
+ duplicate_input_ids = [
228
+ input.id for input in upload_response.inputs
229
+ if input.status.details == 'Input has a duplicate ID.'
230
+ ] #handling duplicte ID failures.
231
+ if duplicate_input_ids:
232
+ success_input_ids = list(set(success_input_ids.copy()) - set(duplicate_input_ids.copy()))
233
+ failed_input_ids = list(set(failed_input_ids) - set(duplicate_input_ids))
234
+ self.logger.warning(
235
+ f"Upload Failed for {len(duplicate_input_ids)} inputs in current batch: Duplicate input ids: {duplicate_input_ids}"
236
+ )
237
+
224
238
  #delete failed inputs
225
239
  self._grpc_request(
226
240
  self.STUB.DeleteInputs,
@@ -228,8 +242,9 @@ class Dataset(Lister, BaseClient):
228
242
  )
229
243
  return [input_ids[id] for id in success_input_ids], [input_ids[id] for id in failed_input_ids]
230
244
 
231
- def _upload_inputs_annotations(self, batch_input_ids: List[int], dataset_obj: ClarifaiDatasetType
232
- ) -> Tuple[List[int], List[resources_pb2.Annotation]]:
245
+ def _upload_inputs_annotations(
246
+ self, batch_input_ids: List[int], dataset_obj: ClarifaiDatasetType
247
+ ) -> Tuple[List[int], List[resources_pb2.Annotation], MultiInputResponse]:
233
248
  """Uploads batch of inputs and annotations concurrently to clarifai platform dataset.
234
249
 
235
250
  Args:
@@ -239,20 +254,22 @@ class Dataset(Lister, BaseClient):
239
254
  Returns:
240
255
  failed_input_ids: failed input ids
241
256
  retry_annot_protos: failed annot protos
257
+ response: upload response proto
242
258
  """
243
259
  input_protos, _ = dataset_obj.get_protos(batch_input_ids)
244
- input_job_id = self.input_object.upload_inputs(inputs=input_protos, show_log=False)
260
+ input_job_id, _response = self.input_object.upload_inputs(inputs=input_protos, show_log=False)
245
261
  retry_annot_protos = []
246
262
 
247
263
  self.input_object._wait_for_inputs(input_job_id)
248
- success_input_ids, failed_input_ids = self._delete_failed_inputs(batch_input_ids, dataset_obj)
264
+ success_input_ids, failed_input_ids = self._delete_failed_inputs(batch_input_ids, dataset_obj,
265
+ _response)
249
266
 
250
- if self.task in ["visual_detection", "visual_segmentation"]:
267
+ if self.task in ["visual_detection", "visual_segmentation"] and success_input_ids:
251
268
  _, annotation_protos = dataset_obj.get_protos(success_input_ids)
252
269
  chunked_annotation_protos = Chunker(annotation_protos, self.batch_size).chunk()
253
270
  retry_annot_protos.extend(self._concurrent_annot_upload(chunked_annotation_protos))
254
271
 
255
- return failed_input_ids, retry_annot_protos
272
+ return failed_input_ids, retry_annot_protos, _response
256
273
 
257
274
  def _retry_uploads(self, failed_input_ids: List[int],
258
275
  retry_annot_protos: List[resources_pb2.Annotation],
@@ -265,7 +282,25 @@ class Dataset(Lister, BaseClient):
265
282
  dataset_obj: ClarifaiDataset object
266
283
  """
267
284
  if failed_input_ids:
268
- self._upload_inputs_annotations(failed_input_ids, dataset_obj)
285
+ retry_input_ids = [dataset_obj.all_input_ids[id] for id in failed_input_ids]
286
+ #Log Retrying inputs
287
+ self.logger.warning(
288
+ f"Retrying upload for {len(failed_input_ids)} inputs in current batch: {retry_input_ids}"
289
+ )
290
+ failed_retrying_inputs, _, retry_response = self._upload_inputs_annotations(
291
+ failed_input_ids, dataset_obj)
292
+ #Log failed inputs
293
+ if failed_retrying_inputs:
294
+ failed_retrying_input_ids = [
295
+ dataset_obj.all_input_ids[id] for id in failed_retrying_inputs
296
+ ]
297
+ failed_inputs_logs = {
298
+ input.id: input.status.details
299
+ for input in retry_response.inputs if input.id in failed_retrying_input_ids
300
+ }
301
+ self.logger.warning(
302
+ f"Failed to upload {len(failed_retrying_inputs)} inputs in current batch: {failed_inputs_logs}"
303
+ )
269
304
  if retry_annot_protos:
270
305
  chunked_annotation_protos = Chunker(retry_annot_protos, self.batch_size).chunk()
271
306
  _ = self._concurrent_annot_upload(chunked_annotation_protos)
@@ -287,21 +322,27 @@ class Dataset(Lister, BaseClient):
287
322
  ]
288
323
 
289
324
  for job in as_completed(futures):
290
- retry_input_ids, retry_annot_protos = job.result()
325
+ retry_input_ids, retry_annot_protos, _ = job.result()
291
326
  self._retry_uploads(retry_input_ids, retry_annot_protos, dataset_obj)
292
327
  progress.update()
293
328
 
294
329
  def upload_dataset(self,
295
330
  dataloader: Type[ClarifaiDataLoader],
296
331
  batch_size: int = 32,
297
- get_upload_status: bool = False) -> None:
332
+ get_upload_status: bool = False,
333
+ log_warnings: bool = False) -> None:
298
334
  """Uploads a dataset to the app.
299
335
 
300
336
  Args:
301
337
  dataloader (Type[ClarifaiDataLoader]): ClarifaiDataLoader object
302
338
  batch_size (int): batch size for concurrent upload of inputs and annotations (max: 128)
303
339
  get_upload_status (bool): True if you want to get the upload status of the dataset
340
+ log_warnings (bool): True if you want to save log warnings in a file
304
341
  """
342
+ #add file handler to log warnings
343
+ if log_warnings:
344
+ add_file_handler(self.logger, f"Dataset_Upload{str(int(datetime.now().timestamp()))}.log")
345
+ #set batch size and task
305
346
  self.batch_size = min(self.batch_size, batch_size)
306
347
  self.task = dataloader.task
307
348
  if self.task not in DATASET_UPLOAD_TASKS:
@@ -321,10 +362,13 @@ class Dataset(Lister, BaseClient):
321
362
  else: # visual_classification & visual_captioning
322
363
  dataset_obj = VisualClassificationDataset(dataloader, self.id)
323
364
 
365
+ if get_upload_status:
366
+ pre_upload_stats = self.get_upload_status(pre_upload=True)
367
+
324
368
  self._data_upload(dataset_obj)
325
369
 
326
370
  if get_upload_status:
327
- self.get_upload_status(dataloader)
371
+ self.get_upload_status(dataloader=dataloader, pre_upload_stats=pre_upload_stats)
328
372
 
329
373
  def upload_from_csv(self,
330
374
  csv_path: str,
@@ -398,16 +442,21 @@ class Dataset(Lister, BaseClient):
398
442
  folder_path=folder_path, dataset_id=self.id, labels=labels)
399
443
  self.input_object._bulk_upload(inputs=input_protos, batch_size=batch_size)
400
444
 
401
- def get_upload_status(self,
402
- dataloader: Type[ClarifaiDataLoader],
403
- delete_version: bool = False,
404
- timeout: int = 600) -> None:
445
+ def get_upload_status(
446
+ self,
447
+ dataloader: Type[ClarifaiDataLoader] = None,
448
+ delete_version: bool = False,
449
+ timeout: int = 600,
450
+ pre_upload_stats: Tuple[Dict[str, int], Dict[str, int]] = None,
451
+ pre_upload: bool = False) -> Optional[Tuple[Dict[str, int], Dict[str, int]]]:
405
452
  """Creates a new dataset version and displays the upload status of the dataset.
406
453
 
407
454
  Args:
408
455
  dataloader (Type[ClarifaiDataLoader]): ClarifaiDataLoader object
409
456
  delete_version (bool): True if you want to delete the version after getting the upload status
410
457
  timeout (int): Timeout in seconds for getting the upload status. Default is 600 seconds.
458
+ pre_upload_stats (Tuple[Dict[str, int], Dict[str, int]]): The pre upload stats for the dataset.
459
+ pre_upload (bool): True if you want to get the pre upload stats for the dataset.
411
460
 
412
461
  Example:
413
462
  >>> from clarifai.client.dataset import Dataset
@@ -450,9 +499,12 @@ class Dataset(Lister, BaseClient):
450
499
  raise UserError(
451
500
  "Dataset metrics are taking too long to process. Please try again later.")
452
501
  break
502
+ #get pre upload stats
503
+ if pre_upload:
504
+ return DisplayUploadStatus.get_dataset_version_stats(dataset_metrics_response)
453
505
 
454
506
  dataset_info_dict = dict(user_id=self.user_id, app_id=self.app_id, dataset_id=self.id)
455
- DisplayUploadStatus(dataloader, dataset_metrics_response, dataset_info_dict)
507
+ DisplayUploadStatus(dataloader, dataset_metrics_response, dataset_info_dict, pre_upload_stats)
456
508
 
457
509
  if delete_version:
458
510
  self.delete_version(dataset_version_id)
clarifai/client/input.py CHANGED
@@ -32,6 +32,7 @@ class Inputs(Lister, BaseClient):
32
32
  logger_level: str = "INFO",
33
33
  base_url: str = "https://api.clarifai.com",
34
34
  pat: str = None,
35
+ token: str = None,
35
36
  **kwargs):
36
37
  """Initializes an Input object.
37
38
 
@@ -39,6 +40,8 @@ class Inputs(Lister, BaseClient):
39
40
  user_id (str): A user ID for authentication.
40
41
  app_id (str): An app ID for the application to interact with.
41
42
  base_url (str): Base API url. Default "https://api.clarifai.com"
43
+ pat (str): A personal access token for authentication. Can be set as env var CLARIFAI_PAT
44
+ token (str): A session token for authentication. Accepts either a session token or a pat. Can be set as env var CLARIFAI_SESSION_TOKEN
42
45
  **kwargs: Additional keyword arguments to be passed to the Input
43
46
  """
44
47
  self.user_id = user_id
@@ -46,7 +49,8 @@ class Inputs(Lister, BaseClient):
46
49
  self.kwargs = {**kwargs}
47
50
  self.input_info = resources_pb2.Input(**self.kwargs)
48
51
  self.logger = get_logger(logger_level=logger_level, name=__name__)
49
- BaseClient.__init__(self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat)
52
+ BaseClient.__init__(
53
+ self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat, token=token)
50
54
  Lister.__init__(self)
51
55
 
52
56
  @staticmethod
@@ -660,15 +664,39 @@ class Inputs(Lister, BaseClient):
660
664
  user_app_id=self.user_app_id, inputs=inputs, inputs_add_job_id=input_job_id)
661
665
  response = self._grpc_request(self.STUB.PostInputs, request)
662
666
  if response.status.code != status_code_pb2.SUCCESS:
663
- try:
664
- self.logger.warning(response.inputs[0].status)
665
- except IndexError:
666
- self.logger.warning(response.status)
667
+ if show_log:
668
+ self.logger.warning(response)
669
+ else:
670
+ return input_job_id, response
667
671
  else:
668
672
  if show_log:
669
673
  self.logger.info("\nInputs Uploaded\n%s", response.status)
670
674
 
671
- return input_job_id
675
+ return input_job_id, response
676
+
677
+ def patch_inputs(self, inputs: List[Input], action: str = 'merge') -> str:
678
+ """Patch list of input objects to the app.
679
+
680
+ Args:
681
+ inputs (list): List of input objects to upload.
682
+ action (str): Action to perform on the input. Options: 'merge', 'overwrite', 'remove'.
683
+
684
+ Returns:
685
+ response: Response from the grpc request.
686
+ """
687
+ if not isinstance(inputs, list):
688
+ raise UserError("inputs must be a list of Input objects")
689
+ uuid.uuid4().hex # generate a unique id for this job
690
+ request = service_pb2.PatchInputsRequest(
691
+ user_app_id=self.user_app_id, inputs=inputs, action=action)
692
+ response = self._grpc_request(self.STUB.PatchInputs, request)
693
+ if response.status.code != status_code_pb2.SUCCESS:
694
+ try:
695
+ self.logger.warning(f"Patch inputs failed, status: {response.annotations[0].status}")
696
+ except Exception:
697
+ self.logger.warning(f"Patch inputs failed, status: {response.status.details}")
698
+
699
+ self.logger.info("\nPatch Inputs Successful\n%s", response.status)
672
700
 
673
701
  def upload_annotations(self, batch_annot: List[resources_pb2.Annotation], show_log: bool = True
674
702
  ) -> Union[List[resources_pb2.Annotation], List[None]]:
@@ -705,7 +733,7 @@ class Inputs(Lister, BaseClient):
705
733
  Returns:
706
734
  input_job_id: job id for the upload request.
707
735
  """
708
- input_job_id = self.upload_inputs(inputs, False)
736
+ input_job_id, _ = self.upload_inputs(inputs, False)
709
737
  self._wait_for_inputs(input_job_id)
710
738
  failed_inputs = self._delete_failed_inputs(inputs)
711
739