clarifai 10.2.0__py3-none-any.whl → 10.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
clarifai/client/app.py CHANGED
@@ -33,6 +33,7 @@ class App(Lister, BaseClient):
33
33
  base_url: str = "https://api.clarifai.com",
34
34
  pat: str = None,
35
35
  token: str = None,
36
+ root_certificates_path: str = None,
36
37
  **kwargs):
37
38
  """Initializes an App object.
38
39
 
@@ -42,6 +43,7 @@ class App(Lister, BaseClient):
42
43
  base_url (str): Base API url. Default "https://api.clarifai.com"
43
44
  pat (str): A personal access token for authentication. Can be set as env var CLARIFAI_PAT
44
45
  token (str): A session token for authentication. Accepts either a session token or a pat. Can be set as env var CLARIFAI_SESSION_TOKEN
46
+ root_certificates_path (str): Path to the SSL root certificates file, used to establish secure gRPC connections.
45
47
  **kwargs: Additional keyword arguments to be passed to the App.
46
48
  - name (str): The name of the app.
47
49
  - description (str): The description of the app.
@@ -55,7 +57,13 @@ class App(Lister, BaseClient):
55
57
  self.app_info = resources_pb2.App(**self.kwargs)
56
58
  self.logger = get_logger(logger_level="INFO", name=__name__)
57
59
  BaseClient.__init__(
58
- self, user_id=self.user_id, app_id=self.id, base=base_url, pat=pat, token=token)
60
+ self,
61
+ user_id=self.user_id,
62
+ app_id=self.id,
63
+ base=base_url,
64
+ pat=pat,
65
+ token=token,
66
+ root_certificates_path=root_certificates_path)
59
67
  Lister.__init__(self)
60
68
 
61
69
  def list_datasets(self, page_no: int = None,
@@ -86,8 +94,9 @@ class App(Lister, BaseClient):
86
94
  per_page=per_page,
87
95
  page_no=page_no)
88
96
  for dataset_info in all_datasets_info:
89
- if 'version' in list(dataset_info.keys()):
90
- del dataset_info['version']['metrics']
97
+ if 'version' in dataset_info:
98
+ dataset_info['version'].pop('metrics', None)
99
+ dataset_info['version'].pop('export_info', None)
91
100
  yield Dataset.from_auth_helper(auth=self.auth_helper, **dataset_info)
92
101
 
93
102
  def list_models(self,
@@ -373,8 +382,8 @@ class App(Lister, BaseClient):
373
382
  output_info = get_yaml_output_info_proto(node['model'].get('output_info', None))
374
383
  try:
375
384
  model = self.model(
376
- node['model']['model_id'],
377
- node['model'].get('model_version_id', ""),
385
+ model_id=node['model']['model_id'],
386
+ model_version={"id": node['model'].get('model_version_id', "")},
378
387
  user_id=node['model'].get('user_id', ""),
379
388
  app_id=node['model'].get('app_id', ""))
380
389
  except Exception as e:
@@ -457,11 +466,12 @@ class App(Lister, BaseClient):
457
466
 
458
467
  return Module.from_auth_helper(auth=self.auth_helper, module_id=module_id, **kwargs)
459
468
 
460
- def dataset(self, dataset_id: str, **kwargs) -> Dataset:
469
+ def dataset(self, dataset_id: str, dataset_version_id: str = None, **kwargs) -> Dataset:
461
470
  """Returns a Dataset object for the existing dataset ID.
462
471
 
463
472
  Args:
464
473
  dataset_id (str): The dataset ID for the dataset to interact with.
474
+ dataset_version_id (str): The version ID for the dataset version to interact with.
465
475
 
466
476
  Returns:
467
477
  Dataset: A Dataset object for the existing dataset ID.
@@ -480,9 +490,10 @@ class App(Lister, BaseClient):
480
490
  kwargs = self.process_response_keys(dict_response[list(dict_response.keys())[1]],
481
491
  list(dict_response.keys())[1])
482
492
  kwargs['version'] = response.dataset.version if response.dataset.version else None
493
+ kwargs['dataset_version_id'] = dataset_version_id
483
494
  return Dataset.from_auth_helper(auth=self.auth_helper, **kwargs)
484
495
 
485
- def model(self, model_id: str, model_version_id: str = "", **kwargs) -> Model:
496
+ def model(self, model_id: str, model_version: Dict = {'id': ""}, **kwargs) -> Model:
486
497
  """Returns a Model object for the existing model ID.
487
498
 
488
499
  Args:
@@ -495,7 +506,7 @@ class App(Lister, BaseClient):
495
506
  Example:
496
507
  >>> from clarifai.client.app import App
497
508
  >>> app = App(app_id="app_id", user_id="user_id")
498
- >>> model_v1 = app.model(model_id="model_id", model_version_id="model_version_id")
509
+ >>> model_v1 = app.model(model_id="model_id", model_version={"id":"model_version_id")
499
510
  """
500
511
  # Change user_app_id based on whether user_id or app_id is specified.
501
512
  if kwargs.get("user_id") or kwargs.get("app_id"):
@@ -503,10 +514,10 @@ class App(Lister, BaseClient):
503
514
  user_app_id=self.auth_helper.get_user_app_id_proto(
504
515
  kwargs.get("user_id"), kwargs.get("app_id")),
505
516
  model_id=model_id,
506
- version_id=model_version_id)
517
+ version_id=model_version["id"])
507
518
  else:
508
519
  request = service_pb2.GetModelRequest(
509
- user_app_id=self.user_app_id, model_id=model_id, version_id=model_version_id)
520
+ user_app_id=self.user_app_id, model_id=model_id, version_id=model_version["id"])
510
521
  response = self._grpc_request(self.STUB.GetModel, request)
511
522
 
512
523
  if response.status.code != status_code_pb2.SUCCESS:
@@ -65,6 +65,7 @@ class ClarifaiAuthHelper:
65
65
  token: str = "",
66
66
  base: str = DEFAULT_BASE,
67
67
  ui: str = DEFAULT_UI,
68
+ root_certificates_path: str = None,
68
69
  validate: bool = True,
69
70
  ):
70
71
  """
@@ -85,6 +86,7 @@ class ClarifaiAuthHelper:
85
86
  https://api.clarifai.com (default), https://host:port, http://host:port, host:port (will be treated as http, not https). It's highly recommended to include the http:// or https:// otherwise we need to check the endpoint to determine if it has SSL during this __init__
86
87
  ui: a url to the UI. Examples include clarifai.com,
87
88
  https://clarifai.com (default), https://host:port, http://host:port, host:port (will be treated as http, not https). It's highly recommended to include the http:// or https:// otherwise we need to check the endpoint to determine if it has SSL during this __init__
89
+ root_certificates_path: path to the root certificates file. This is only used for grpc secure channels.
88
90
  validate: whether to validate the inputs. This is useful for overriding vars then validating
89
91
  """
90
92
 
@@ -92,6 +94,7 @@ class ClarifaiAuthHelper:
92
94
  self.app_id = app_id
93
95
  self._pat = pat
94
96
  self._token = token
97
+ self._root_certificates_path = root_certificates_path
95
98
 
96
99
  self.set_base(base)
97
100
  self.set_ui(ui)
@@ -113,6 +116,8 @@ class ClarifaiAuthHelper:
113
116
  raise Exception(
114
117
  "Need 'pat' or 'token' in the query params or use one of the CLARIFAI_PAT or CLARIFAI_SESSION_TOKEN env vars"
115
118
  )
119
+ if (self._root_certificates_path) and (not os.path.exists(self._root_certificates_path)):
120
+ raise Exception("Root certificates path %s does not exist" % self._root_certificates_path)
116
121
 
117
122
  @classmethod
118
123
  def from_streamlit(cls, st: Any) -> "ClarifaiAuthHelper":
@@ -219,6 +224,8 @@ Additionally, these optional params are supported:
219
224
  self.set_base(query_params["base"][0])
220
225
  if "ui" in query_params:
221
226
  self.set_ui(query_params["ui"][0])
227
+ if "root_certificates_path" in query_params:
228
+ self._root_certificates_path = query_params["root_certificates_path"][0]
222
229
 
223
230
  @classmethod
224
231
  def from_env(cls, validate: bool = True) -> "ClarifaiAuthHelper":
@@ -229,6 +236,7 @@ Additionally, these optional params are supported:
229
236
  token: CLARIFAI_SESSION_TOKEN env var.
230
237
  pat: CLARIFAI_PAT env var.
231
238
  base: CLARIFAI_API_BASE env var.
239
+ root_certificates_path: CLARIFAI_ROOT_CERTIFICATES_PATH env var.
232
240
  """
233
241
  user_id = os.environ.get("CLARIFAI_USER_ID", "")
234
242
  app_id = os.environ.get("CLARIFAI_APP_ID", "")
@@ -236,7 +244,8 @@ Additionally, these optional params are supported:
236
244
  pat = os.environ.get("CLARIFAI_PAT", "")
237
245
  base = os.environ.get("CLARIFAI_API_BASE", DEFAULT_BASE)
238
246
  ui = os.environ.get("CLARIFAI_UI", DEFAULT_UI)
239
- return cls(user_id, app_id, pat, token, base, ui, validate)
247
+ root_certificates_path = os.environ.get("CLARIFAI_ROOT_CERTIFICATES_PATH", None)
248
+ return cls(user_id, app_id, pat, token, base, ui, root_certificates_path, validate)
240
249
 
241
250
  def get_user_app_id_proto(
242
251
  self,
@@ -281,7 +290,8 @@ Additionally, these optional params are supported:
281
290
 
282
291
  https = base_https_cache[self._base]
283
292
  if https:
284
- channel = ClarifaiChannel.get_grpc_channel(base=self._base)
293
+ channel = ClarifaiChannel.get_grpc_channel(
294
+ base=self._base, root_certificates_path=self._root_certificates_path)
285
295
  else:
286
296
  if self._base.find(":") >= 0:
287
297
  host, port = self._base.split(":")
clarifai/client/base.py CHANGED
@@ -22,6 +22,7 @@ class BaseClient:
22
22
  - token (str): A session token for authentication. Accepts either a session token or a pat.
23
23
  - base (str): The base URL for the API endpoint. Defaults to 'https://api.clarifai.com'.
24
24
  - ui (str): The URL for the UI. Defaults to 'https://clarifai.com'.
25
+ - root_certificates_path (str): Path to the SSL root certificates file, used to establish secure gRPC connections.
25
26
 
26
27
 
27
28
  Attributes:
@@ -51,14 +52,21 @@ class BaseClient:
51
52
  self.token = self.auth_helper._token
52
53
  self.user_app_id = self.auth_helper.get_user_app_id_proto()
53
54
  self.base = self.auth_helper.base
55
+ self.root_certificates_path = self.auth_helper._root_certificates_path
54
56
 
55
57
  @classmethod
56
58
  def from_auth_helper(cls, auth: ClarifaiAuthHelper, **kwargs):
57
59
  default_kwargs = {
58
- "user_id": kwargs.get("user_id", None) or auth.user_id,
59
- "app_id": kwargs.get("app_id", None) or auth.app_id,
60
- "pat": kwargs.get("pat", None) or auth.pat,
61
- "token": kwargs.get("token", None) or auth._token,
60
+ "user_id":
61
+ kwargs.get("user_id", None) or auth.user_id,
62
+ "app_id":
63
+ kwargs.get("app_id", None) or auth.app_id,
64
+ "pat":
65
+ kwargs.get("pat", None) or auth.pat,
66
+ "token":
67
+ kwargs.get("token", None) or auth._token,
68
+ "root_certificates_path":
69
+ kwargs.get("root_certificates_path", None) or auth._root_certificates_path
62
70
  }
63
71
  _base = kwargs.get("base", None) or auth.base
64
72
  _clss = cls.__mro__[0]
@@ -160,6 +168,8 @@ class BaseClient:
160
168
  value = value_s
161
169
  elif key == 'metrics':
162
170
  continue
171
+ elif key == 'size':
172
+ value = int(value)
163
173
  elif key in ['metadata']:
164
174
  if isinstance(value, dict) and value != {}:
165
175
  value_s = struct_pb2.Struct()
@@ -43,26 +43,34 @@ class Dataset(Lister, BaseClient):
43
43
  def __init__(self,
44
44
  url: str = None,
45
45
  dataset_id: str = None,
46
+ dataset_version_id: str = None,
46
47
  base_url: str = "https://api.clarifai.com",
47
48
  pat: str = None,
48
49
  token: str = None,
50
+ root_certificates_path: str = None,
49
51
  **kwargs):
50
52
  """Initializes a Dataset object.
51
53
 
52
54
  Args:
53
55
  url (str): The URL to initialize the dataset object.
54
56
  dataset_id (str): The Dataset ID within the App to interact with.
57
+ dataset_version_id (str): The Dataset Version ID within the Dataset to interact with.
55
58
  base_url (str): Base API url. Default "https://api.clarifai.com"
56
59
  pat (str): A personal access token for authentication. Can be set as env var CLARIFAI_PAT
57
60
  token (str): A session token for authentication. Accepts either a session token or a pat. Can be set as env var CLARIFAI_SESSION_TOKEN
61
+ root_certificates_path (str): Path to the SSL root certificates file, used to establish secure gRPC connections.
58
62
  **kwargs: Additional keyword arguments to be passed to the Dataset.
59
63
  """
60
64
  if url and dataset_id:
61
65
  raise UserError("You can only specify one of url or dataset_id.")
62
66
  if url:
63
- user_id, app_id, _, dataset_id, _ = ClarifaiUrlHelper.split_clarifai_url(url)
67
+ user_id, app_id, _, dataset_id, dataset_version_id = ClarifaiUrlHelper.split_clarifai_url(
68
+ url)
64
69
  kwargs = {'user_id': user_id, 'app_id': app_id}
65
- self.kwargs = {**kwargs, 'id': dataset_id}
70
+ dataset_version = {
71
+ 'id': dataset_version_id
72
+ } if dataset_version_id else kwargs['version'] if 'version' in kwargs else None
73
+ self.kwargs = {**kwargs, 'id': dataset_id, 'version': dataset_version}
66
74
  self.dataset_info = resources_pb2.Dataset(**self.kwargs)
67
75
  # Related to Dataset Upload
68
76
  self.num_workers: int = min(10, cpu_count()) #15 req/sec rate limit
@@ -71,10 +79,21 @@ class Dataset(Lister, BaseClient):
71
79
  self.batch_size = 128 # limit max protos in a req
72
80
  self.task = None # Upload dataset type
73
81
  self.input_object = Inputs(
74
- user_id=self.user_id, app_id=self.app_id, pat=pat, token=token, base_url=base_url)
82
+ user_id=self.user_id,
83
+ app_id=self.app_id,
84
+ pat=pat,
85
+ token=token,
86
+ base_url=base_url,
87
+ root_certificates_path=root_certificates_path)
75
88
  self.logger = get_logger(logger_level="INFO", name=__name__)
76
89
  BaseClient.__init__(
77
- self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat, token=token)
90
+ self,
91
+ user_id=self.user_id,
92
+ app_id=self.app_id,
93
+ base=base_url,
94
+ pat=pat,
95
+ token=token,
96
+ root_certificates_path=root_certificates_path)
78
97
  Lister.__init__(self)
79
98
 
80
99
  def create_version(self, **kwargs) -> 'Dataset':
@@ -162,13 +181,17 @@ class Dataset(Lister, BaseClient):
162
181
  for dataset_version_info in all_dataset_versions_info:
163
182
  dataset_version_info['id'] = dataset_version_info['dataset_version_id']
164
183
  del dataset_version_info['dataset_version_id']
165
- del dataset_version_info['metrics']
184
+ dataset_version_info.pop('metrics', None)
185
+ dataset_version_info.pop('export_info', None)
166
186
  kwargs = {
167
187
  'dataset_id': self.id,
168
188
  'version': resources_pb2.DatasetVersion(**dataset_version_info),
169
189
  }
170
190
  yield Dataset.from_auth_helper(self.auth_helper, **kwargs)
171
191
 
192
+ def __iter__(self):
193
+ return iter(DatasetExportReader(archive_url=self.archive_zip()))
194
+
172
195
  def _concurrent_annot_upload(self, annots: List[List[resources_pb2.Annotation]]
173
196
  ) -> Union[List[resources_pb2.Annotation], List[None]]:
174
197
  """Uploads annotations concurrently.
@@ -567,7 +590,7 @@ class Dataset(Lister, BaseClient):
567
590
  )
568
591
 
569
592
  start_time = time.time()
570
- backoff_iterator = BackoffIterator()
593
+ backoff_iterator = BackoffIterator(10)
571
594
  while (True):
572
595
  dataset_metrics_response = self._grpc_request(
573
596
  self.STUB.ListDatasetVersionMetricsGroups,
@@ -599,11 +622,37 @@ class Dataset(Lister, BaseClient):
599
622
  if delete_version:
600
623
  self.delete_version(dataset_version_id)
601
624
 
625
+ def archive_zip(self, wait: bool = True) -> str:
626
+ """Exports the dataset to a zip file URL."""
627
+ request = service_pb2.PutDatasetVersionExportsRequest(
628
+ user_app_id=self.user_app_id,
629
+ dataset_id=self.id,
630
+ dataset_version_id=self.version.id,
631
+ exports=[
632
+ resources_pb2.DatasetVersionExport(
633
+ format=resources_pb2.DatasetVersionExportFormat.CLARIFAI_DATA_PROTOBUF)
634
+ ])
635
+
636
+ response = self._grpc_request(self.STUB.PutDatasetVersionExports, request)
637
+ if response.status.code != status_code_pb2.SUCCESS:
638
+ raise Exception(response.status)
639
+ if wait:
640
+ while response.exports[0].status.code in (
641
+ status_code_pb2.DATASET_VERSION_EXPORT_PENDING,
642
+ status_code_pb2.DATASET_VERSION_EXPORT_IN_PROGRESS):
643
+ time.sleep(1)
644
+ response = self._grpc_request(self.STUB.PutDatasetVersionExports, request)
645
+ if response.status.code != status_code_pb2.SUCCESS:
646
+ raise Exception(response.status)
647
+ if response.exports[0].status.code != status_code_pb2.DATASET_VERSION_EXPORT_SUCCESS:
648
+ raise Exception(response.exports[0].status)
649
+ return response.exports[0].url
650
+
602
651
  def export(self,
603
652
  save_path: str,
604
653
  archive_url: str = None,
605
654
  local_archive_path: str = None,
606
- split: str = None,
655
+ split: str = 'all',
607
656
  num_workers: int = 4) -> None:
608
657
  """Exports the Clarifai protobuf dataset to a local archive.
609
658
 
@@ -616,10 +665,12 @@ class Dataset(Lister, BaseClient):
616
665
 
617
666
  Example:
618
667
  >>> from clarifai.client.dataset import Dataset
619
- >>> Dataset().export(save_path='output.zip', local_archive_path='clarifai-data-protobuf.zip')
668
+ >>> Dataset().export(save_path='output.zip')
620
669
  """
621
670
  if local_archive_path and not os.path.exists(local_archive_path):
622
671
  raise UserError(f"Archive {local_archive_path} does not exist.")
672
+ if not archive_url and not local_archive_path:
673
+ archive_url = self.archive_zip()
623
674
  # Create a session object and set auth header
624
675
  session = requests.Session()
625
676
  retries = Retry(total=5, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504])
clarifai/client/input.py CHANGED
@@ -19,6 +19,7 @@ from tqdm import tqdm
19
19
  from clarifai.client.base import BaseClient
20
20
  from clarifai.client.lister import Lister
21
21
  from clarifai.constants.dataset import MAX_RETRIES
22
+ from clarifai.constants.input import MAX_UPLOAD_BATCH_SIZE
22
23
  from clarifai.errors import UserError
23
24
  from clarifai.utils.logging import get_logger
24
25
  from clarifai.utils.misc import BackoffIterator, Chunker
@@ -34,6 +35,7 @@ class Inputs(Lister, BaseClient):
34
35
  base_url: str = "https://api.clarifai.com",
35
36
  pat: str = None,
36
37
  token: str = None,
38
+ root_certificates_path: str = None,
37
39
  **kwargs):
38
40
  """Initializes an Input object.
39
41
 
@@ -43,6 +45,7 @@ class Inputs(Lister, BaseClient):
43
45
  base_url (str): Base API url. Default "https://api.clarifai.com"
44
46
  pat (str): A personal access token for authentication. Can be set as env var CLARIFAI_PAT
45
47
  token (str): A session token for authentication. Accepts either a session token or a pat. Can be set as env var CLARIFAI_SESSION_TOKEN
48
+ root_certificates_path (str): Path to the SSL root certificates file, used to establish secure gRPC connections.
46
49
  **kwargs: Additional keyword arguments to be passed to the Input
47
50
  """
48
51
  self.user_id = user_id
@@ -51,7 +54,13 @@ class Inputs(Lister, BaseClient):
51
54
  self.input_info = resources_pb2.Input(**self.kwargs)
52
55
  self.logger = get_logger(logger_level=logger_level, name=__name__)
53
56
  BaseClient.__init__(
54
- self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat, token=token)
57
+ self,
58
+ user_id=self.user_id,
59
+ app_id=self.app_id,
60
+ base=base_url,
61
+ pat=pat,
62
+ token=token,
63
+ root_certificates_path=root_certificates_path)
55
64
  Lister.__init__(self)
56
65
 
57
66
  @staticmethod
@@ -660,6 +669,10 @@ class Inputs(Lister, BaseClient):
660
669
  """
661
670
  if not isinstance(inputs, list):
662
671
  raise UserError("inputs must be a list of Input objects")
672
+ if len(inputs) > MAX_UPLOAD_BATCH_SIZE:
673
+ raise UserError(
674
+ f"Number of inputs to upload exceeds the maximum batch size of {MAX_UPLOAD_BATCH_SIZE}. Please reduce batch size."
675
+ )
663
676
  input_job_id = uuid.uuid4().hex # generate a unique id for this job
664
677
  request = service_pb2.PostInputsRequest(
665
678
  user_app_id=self.user_app_id, inputs=inputs, inputs_add_job_id=input_job_id)
@@ -912,7 +925,7 @@ class Inputs(Lister, BaseClient):
912
925
  Returns:
913
926
  True if inputs are processed, False otherwise
914
927
  """
915
- backoff_iterator = BackoffIterator()
928
+ backoff_iterator = BackoffIterator(10)
916
929
  max_retries = 10
917
930
  start_time = time.time()
918
931
  while True: