hiddenlayer-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. hiddenlayer/__init__.py +109 -0
  2. hiddenlayer/sdk/__init__.py +0 -0
  3. hiddenlayer/sdk/constants.py +14 -0
  4. hiddenlayer/sdk/enterprise/__init__.py +0 -0
  5. hiddenlayer/sdk/enterprise/enterprise_model_scan_api.py +55 -0
  6. hiddenlayer/sdk/exceptions.py +12 -0
  7. hiddenlayer/sdk/models.py +22 -0
  8. hiddenlayer/sdk/rest/__init__.py +49 -0
  9. hiddenlayer/sdk/rest/api/__init__.py +7 -0
  10. hiddenlayer/sdk/rest/api/aidr_predictive_api.py +308 -0
  11. hiddenlayer/sdk/rest/api/model_scan_api.py +591 -0
  12. hiddenlayer/sdk/rest/api/sensor_api.py +1966 -0
  13. hiddenlayer/sdk/rest/api_client.py +770 -0
  14. hiddenlayer/sdk/rest/api_response.py +21 -0
  15. hiddenlayer/sdk/rest/configuration.py +445 -0
  16. hiddenlayer/sdk/rest/exceptions.py +199 -0
  17. hiddenlayer/sdk/rest/models/__init__.py +30 -0
  18. hiddenlayer/sdk/rest/models/create_sensor_request.py +95 -0
  19. hiddenlayer/sdk/rest/models/file_info.py +110 -0
  20. hiddenlayer/sdk/rest/models/get_multipart_upload_response.py +97 -0
  21. hiddenlayer/sdk/rest/models/model.py +100 -0
  22. hiddenlayer/sdk/rest/models/model_query_response.py +101 -0
  23. hiddenlayer/sdk/rest/models/multipart_upload_part.py +93 -0
  24. hiddenlayer/sdk/rest/models/scan_model_request.py +87 -0
  25. hiddenlayer/sdk/rest/models/scan_results_v2.py +108 -0
  26. hiddenlayer/sdk/rest/models/sensor_sor_query_filter.py +108 -0
  27. hiddenlayer/sdk/rest/models/sensor_sor_query_request.py +109 -0
  28. hiddenlayer/sdk/rest/models/submission_response.py +95 -0
  29. hiddenlayer/sdk/rest/models/submission_v2.py +109 -0
  30. hiddenlayer/sdk/rest/models/validation_error_model.py +99 -0
  31. hiddenlayer/sdk/rest/models/validation_error_model_loc_inner.py +138 -0
  32. hiddenlayer/sdk/rest/rest.py +257 -0
  33. hiddenlayer/sdk/services/__init__.py +0 -0
  34. hiddenlayer/sdk/services/aidr_predictive.py +76 -0
  35. hiddenlayer/sdk/services/model.py +101 -0
  36. hiddenlayer/sdk/services/model_scan.py +414 -0
  37. hiddenlayer/sdk/utils.py +92 -0
  38. hiddenlayer/sdk/version.py +1 -0
  39. hiddenlayer_sdk-0.1.0.dist-info/LICENSE +201 -0
  40. hiddenlayer_sdk-0.1.0.dist-info/METADATA +320 -0
  41. hiddenlayer_sdk-0.1.0.dist-info/RECORD +43 -0
  42. hiddenlayer_sdk-0.1.0.dist-info/WHEEL +5 -0
  43. hiddenlayer_sdk-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,414 @@
1
+ import os
2
+ import random
3
+ import time
4
+ import warnings
5
+ from datetime import datetime
6
+ from pathlib import Path
7
+ from typing import List, Optional, Union
8
+ from uuid import uuid4
9
+
10
+ from hiddenlayer.sdk.constants import ScanStatus
11
+ from hiddenlayer.sdk.enterprise.enterprise_model_scan_api import EnterpriseModelScanApi
12
+ from hiddenlayer.sdk.models import ScanResults
13
+ from hiddenlayer.sdk.rest.api import ModelScanApi, SensorApi
14
+ from hiddenlayer.sdk.rest.api_client import ApiClient
15
+ from hiddenlayer.sdk.rest.models import MultipartUploadPart
16
+ from hiddenlayer.sdk.rest.models.model import Model
17
+ from hiddenlayer.sdk.services.model import ModelAPI
18
+ from hiddenlayer.sdk.utils import filter_path_objects, is_saas
19
+
20
+ EXCLUDE_FILE_TYPES = [
21
+ "*.txt",
22
+ "*.md",
23
+ "*.lock",
24
+ ".gitattributes",
25
+ ".git",
26
+ ".git/*",
27
+ "*/.git",
28
+ "**/.git/**",
29
+ ]
30
+
31
+
32
+ class ModelScanAPI:
33
+ def __init__(self, api_client: ApiClient) -> None:
34
+ self.is_saas = is_saas(api_client.configuration.host)
35
+ self._api_client = api_client
36
+
37
+ if self.is_saas:
38
+ self._model_scan_api = ModelScanApi(api_client=api_client)
39
+ self._model_api = ModelAPI(api_client=api_client)
40
+ self._sensor_api = SensorApi(
41
+ api_client=api_client
42
+ ) # lower level api of ModelAPI
43
+ else:
44
+ self._model_scan_api = EnterpriseModelScanApi(api_client=api_client)
45
+
46
+ def scan_file(
47
+ self,
48
+ *,
49
+ model_name: str,
50
+ model_path: Union[str, os.PathLike],
51
+ threads: int = 1,
52
+ chunk_size: int = 16,
53
+ wait_for_results: bool = True,
54
+ ) -> ScanResults:
55
+ """
56
+ Scan a local model file using the HiddenLayer Model Scanner.
57
+
58
+ :param model_name: Name of the model to be shown on the HiddenLayer UI
59
+ :param model_path: Local path to the model file.
60
+ :param threads: Number of threads used to upload the file, defaults to 1.
61
+ :param chunk_size: Number of chunks of the file to upload at once, defaults to 4.
62
+ :param wait_for_results: True whether to wait for the scan to finish, defaults to True.
63
+
64
+ :returns: Scan Results
65
+ """
66
+
67
+ warnings.warn(
68
+ "Use of the threads parameter is deprecated and will be removed in version 0.2.0.",
69
+ category=DeprecationWarning,
70
+ stacklevel=2,
71
+ )
72
+
73
+ file_path = Path(model_path)
74
+
75
+ # Can combine the 2 paths when SaaS API and Enterprise APIs are in sync
76
+ if self.is_saas:
77
+ filesize = file_path.stat().st_size
78
+ sensor = self._model_api.create(model_name=model_name)
79
+ upload = self._sensor_api.begin_multipart_upload(filesize, sensor.sensor_id)
80
+
81
+ with open(file_path, "rb") as f:
82
+ for i in range(0, len(upload.parts), chunk_size):
83
+ group: List[MultipartUploadPart] = upload.parts[i : i + chunk_size]
84
+ for part in group:
85
+ read_amount = part.end_offset - part.start_offset
86
+ f.seek(int(part.start_offset))
87
+ part_data = f.read(int(read_amount))
88
+
89
+ # The SaaS multipart upload returns a upload url for each part
90
+ # So there is no specified route
91
+ self._api_client.call_api(
92
+ "PUT",
93
+ part.upload_url,
94
+ body=part_data,
95
+ header_params={"Content-Type": "application/octet-binary"},
96
+ )
97
+
98
+ self._sensor_api.complete_multipart_upload(
99
+ sensor.sensor_id, upload.upload_id
100
+ )
101
+
102
+ self._model_scan_api.scan_model(sensor.sensor_id)
103
+ else:
104
+ with open(file_path, "rb") as f:
105
+ data = f.read()
106
+
107
+ sensor = Model(
108
+ sensor_id=str(uuid4()),
109
+ created_at=datetime.now(),
110
+ tenant_id="0000",
111
+ plaintext_name=model_name,
112
+ active=True,
113
+ version=1,
114
+ )
115
+
116
+ self._model_scan_api: EnterpriseModelScanApi
117
+ self._model_scan_api.scan_model(sensor.sensor_id, data)
118
+ model_name = sensor.sensor_id
119
+
120
+ scan_results = self.get_scan_results(model_name=model_name)
121
+
122
+ base_delay = 0.1 # seconds
123
+ retries = 0
124
+ if wait_for_results:
125
+ print(f"{file_path.name} scan status: {scan_results.status}")
126
+ while scan_results.status not in [ScanStatus.DONE, ScanStatus.FAILED]:
127
+ retries += 1
128
+ delay = base_delay * 2**retries + random.uniform(
129
+ 0, 1
130
+ ) # exponential back off retry
131
+ time.sleep(delay)
132
+ scan_results = self.get_scan_results(model_name=model_name)
133
+ print(f"{file_path.name} scan status: {scan_results.status}")
134
+
135
+ scan_results = ScanResults.from_scanresultsv2(scan_results_v2=scan_results)
136
+ scan_results.file_name = file_path.name
137
+ scan_results.file_path = str(file_path)
138
+ scan_results.sensor_id = sensor.sensor_id
139
+
140
+ return scan_results
141
+
142
+ def scan_s3_model(
143
+ self,
144
+ *,
145
+ model_name: str,
146
+ bucket: str,
147
+ key: str,
148
+ s3_client: Optional[object] = None,
149
+ threads: int = 1,
150
+ chunk_size: int = 4,
151
+ wait_for_results: bool = True,
152
+ ) -> ScanResults:
153
+ """
154
+ Scan a model file on S3.
155
+
156
+ :param model_name: Name of the model to be shown on the HiddenLayer UI.
157
+ :param bucket: Name of the s3 bucket where the model file is stored.
158
+ :param key: Path to the model file on s3.
159
+ :param wait_for_results: True whether to wait for the scan to finish, defaults to True.
160
+ :param s3_client: boto3 s3 client.
161
+ :param threads: Number of threads used to upload the file, defaults to 1.
162
+ :param chunk_size: Number of chunks of the file to upload at once, defaults to 4.
163
+ :param wait_for_results: True whether to wait for the scan to finish, defaults to True.
164
+
165
+ :returns: Scan Results
166
+
167
+ :examples:
168
+ .. code-block:: python
169
+
170
+ hl_client.model_scanner.scan_s3_model(
171
+ model_name="your-model-name",
172
+ bucket="s3_bucket",
173
+ key="path/to/file"
174
+ )
175
+ """
176
+ try:
177
+ import boto3
178
+ except ImportError:
179
+ raise ImportError("Python package boto3 is not installed.")
180
+
181
+ if not s3_client:
182
+ s3_client = boto3.client("s3")
183
+
184
+ file_name = key.split("/")[-1]
185
+
186
+ try:
187
+ s3_client.download_file(bucket, key, f"/tmp/{file_name}")
188
+ except Exception as e:
189
+ raise RuntimeError(f"Couldn't download model s3://{bucket}/{key}: {e}")
190
+
191
+ return self.scan_file(
192
+ model_path=f"/tmp/{file_name}",
193
+ model_name=model_name,
194
+ threads=threads,
195
+ chunk_size=chunk_size,
196
+ wait_for_results=wait_for_results,
197
+ )
198
+
199
+ def scan_azure_blob_model(
200
+ self,
201
+ *,
202
+ model_name: str,
203
+ account_url: str,
204
+ container: str,
205
+ blob: str,
206
+ blob_service_client: Optional[object] = None,
207
+ credential: Optional[object] = None,
208
+ threads: int = 1,
209
+ chunk_size: int = 4,
210
+ wait_for_results: bool = True,
211
+ ) -> ScanResults:
212
+ """
213
+ Scan a model file on Azure Blob Storage.
214
+
215
+ :param model_name: Name of the model to be shown on the HiddenLayer UI.
216
+ :param account_url: Azure Blob url of where the file is stored.
217
+ :param container: Azure Blob container containing the model file.
218
+ :param blob: Path to the model file inside the Azure blob container.
219
+ :param blob_service_client: BlobServiceClient object. Defaults to creating one using DefaultCredential().
220
+ :param credential: Credential to be passed to the BlobServiceClient object, can be a credential object, SAS key, etc.
221
+ Defaults to `DefaultCredential`
222
+ :param threads: Number of threads used to upload the file, defaults to 1.
223
+ :param chunk_size: Number of chunks of the file to upload at once, defaults to 4.
224
+ :param wait_for_results: True whether to wait for the scan to finish, defaults to True.
225
+
226
+ :returns: Scan Results
227
+
228
+ :examples:
229
+ .. code-block:: python
230
+
231
+ hl_client.model_scanner.scan_azure_blob_model(
232
+ model_name="your-model-name",
233
+ account_url="https://<storageaccountname>.blob.core.windows.net",
234
+ container="container_name",
235
+ blob="path/to/file.bin",
236
+ credential="?<sas_key>" # If using a SAS key and not DefaultCredentials
237
+ )
238
+ """
239
+ try:
240
+ from azure.identity import DefaultAzureCredential
241
+ except ImportError:
242
+ raise ImportError("Python package azure-identity is not installed.")
243
+
244
+ try:
245
+ from azure.storage.blob import BlobServiceClient
246
+ except ImportError:
247
+ raise ImportError("Python package azure-storage-blob is not installed.")
248
+
249
+ if not credential:
250
+ credential = DefaultAzureCredential()
251
+
252
+ if not blob_service_client:
253
+ blob_service_client = BlobServiceClient(account_url, credential=credential)
254
+
255
+ file_name = blob.split("/")[-1]
256
+
257
+ blob_client = blob_service_client.get_blob_client(
258
+ container=container, blob=blob
259
+ )
260
+
261
+ try:
262
+ with open(os.path.join("/tmp", file_name), "wb") as f:
263
+ download_stream = blob_client.download_blob()
264
+ f.write(download_stream.readall())
265
+
266
+ except Exception as e:
267
+ raise RuntimeError(
268
+ f"Couldn't download model {account_url}, {container}, {blob}: {e}"
269
+ )
270
+
271
+ return self.scan_file(
272
+ model_path=f"/tmp/{file_name}",
273
+ model_name=model_name,
274
+ threads=threads,
275
+ chunk_size=chunk_size,
276
+ wait_for_results=wait_for_results,
277
+ )
278
+
279
+ def scan_huggingface_model(
280
+ self,
281
+ *,
282
+ repo_id: str,
283
+ # model_id: str,
284
+ # HF parameters
285
+ revision: Optional[str] = None,
286
+ local_dir: str = "/tmp",
287
+ allow_file_patterns: Optional[List[str]] = None,
288
+ ignore_file_patterns: Optional[List[str]] = None,
289
+ force_download: bool = False,
290
+ hf_token: Optional[Union[str, bool]] = None,
291
+ # HL parameters
292
+ threads: int = 1,
293
+ chunk_size: int = 4,
294
+ wait_for_results: bool = True,
295
+ ) -> List[ScanResults]:
296
+ """
297
+ Scans a model on HuggingFace.
298
+
299
+ Note: Requires the `huggingface_hub` pip package to be installed.
300
+
301
+ :param revision: An optional Git revision id which can be a branch name, a tag, or a commit hash.
302
+ :param local_dir: If provided, the downloaded files will be placed under this directory.
303
+ :param allow_file_patterns: If provided, only files matching at least one pattern are scanned.
304
+ :param ignore_file_patterns: If provided, files matching any of the patterns are not scanned.
305
+ :param force_download: Whether the file should be downloaded even if it already exists in the local cache.
306
+ :param hf_token: A token to be used for the download.
307
+ If True, the token is read from the HuggingFace config folder.
308
+ If a string, it’s used as the authentication token.
309
+ :param threads: Number of threads used to upload the file, defaults to 1.
310
+ :param chunk_size: Number of chunks of the file to upload at once, defaults to 4.
311
+ :param wait_for_results: True whether to wait for the scan to finish, defaults to True.
312
+
313
+ :returns: List of ScanResults
314
+ """
315
+ try:
316
+ from huggingface_hub import snapshot_download
317
+ except ImportError:
318
+ raise ImportError("Python package huggingface_hub is not installed.")
319
+
320
+ local_dir = f"/tmp/{repo_id}" if local_dir == "/tmp" else local_dir
321
+ ignore_file_patterns = (
322
+ EXCLUDE_FILE_TYPES + ignore_file_patterns
323
+ if ignore_file_patterns
324
+ else EXCLUDE_FILE_TYPES
325
+ )
326
+
327
+ snapshot_download(
328
+ repo_id,
329
+ revision=revision,
330
+ allow_patterns=allow_file_patterns,
331
+ ignore_patterns=ignore_file_patterns,
332
+ local_dir=local_dir,
333
+ local_dir_use_symlinks=False,
334
+ cache_dir=local_dir,
335
+ force_download=force_download,
336
+ token=hf_token,
337
+ )
338
+
339
+ return self.scan_folder(
340
+ path=local_dir,
341
+ allow_file_patterns=allow_file_patterns,
342
+ ignore_file_patterns=ignore_file_patterns,
343
+ threads=threads,
344
+ chunk_size=chunk_size,
345
+ wait_for_results=wait_for_results,
346
+ )
347
+
348
+ def get_scan_results(self, *, model_name: str) -> ScanResults:
349
+ """
350
+ Get results from a model scan.
351
+
352
+ :param model_name: Name of the model.
353
+
354
+ :returns: Scan results.
355
+ """
356
+
357
+ if self.is_saas:
358
+ model = self._model_api.get(model_name=model_name)
359
+ sensor_id = model.sensor_id
360
+ else:
361
+ sensor_id = model_name
362
+
363
+ scan_results_v2 = self._model_scan_api.scan_status(sensor_id)
364
+
365
+ return ScanResults.from_scanresultsv2(
366
+ scan_results_v2=scan_results_v2, sensor_id=sensor_id
367
+ )
368
+
369
+ def scan_folder(
370
+ self,
371
+ *,
372
+ path: Union[str, os.PathLike],
373
+ allow_file_patterns: Optional[List[str]] = None,
374
+ ignore_file_patterns: Optional[List[str]] = None,
375
+ threads: int = 1,
376
+ chunk_size: int = 4,
377
+ wait_for_results: bool = True,
378
+ ) -> List[ScanResults]:
379
+ """
380
+ Submits all files in a directory and its sub directories to be scanned.
381
+
382
+ :param path: Path to the folder on disk to be scanned.
383
+ :param allow_file_patterns: If provided, only files matching at least one pattern are scanned.
384
+ :param ignore_file_patterns: If provided, files matching any of the patterns are not scanned.
385
+ :param threads: Number of threads used to upload the file, defaults to 1.
386
+ :param chunk_size: Number of chunks of the file to upload at once, defaults to 4.
387
+ :param wait_for_results: True whether to wait for the scan to finish, defaults to True.
388
+
389
+ :returns: List of ScanResults
390
+ """
391
+
392
+ model_path = Path(path)
393
+ ignore_file_patterns = (
394
+ EXCLUDE_FILE_TYPES + ignore_file_patterns
395
+ if ignore_file_patterns
396
+ else EXCLUDE_FILE_TYPES
397
+ )
398
+
399
+ files = filter_path_objects(
400
+ model_path.rglob("*"),
401
+ allow_patterns=allow_file_patterns,
402
+ ignore_patterns=ignore_file_patterns,
403
+ )
404
+
405
+ return [
406
+ self.scan_file(
407
+ model_name=str(file),
408
+ model_path=file,
409
+ threads=threads,
410
+ chunk_size=chunk_size,
411
+ wait_for_results=wait_for_results,
412
+ )
413
+ for file in files
414
+ ]
@@ -0,0 +1,92 @@
1
+ import os
2
+ from fnmatch import fnmatch
3
+ from pathlib import Path
4
+ from typing import Generator, List, Optional, Union
5
+ from urllib.parse import urlparse
6
+
7
+ PathInputType = Union[str, os.PathLike]
8
+
9
+
10
+ def filter_path_objects(
11
+ items: Union[List[PathInputType], Generator[PathInputType, None, None]],
12
+ *,
13
+ allow_patterns: Optional[Union[List[str], str]] = None,
14
+ ignore_patterns: Optional[Union[List[str], str]] = None,
15
+ ) -> Generator[Union[str, os.PathLike], None, None]:
16
+ """Filter repo objects based on an allowlist and a denylist.
17
+
18
+ Input must be a list of paths (`str` or `Path`) or a list of arbitrary objects.
19
+ In the later case, `key` must be provided and specifies a function of one argument
20
+ that is used to extract a path from each element in iterable.
21
+
22
+ Patterns are Unix shell-style wildcards which are NOT regular expressions. See
23
+ https://docs.python.org/3/library/fnmatch.html for more details.
24
+
25
+ :param items: List of paths to filter.
26
+ :param allow_patterns: Patterns constituting the allowlist. If provided, item paths must match at
27
+ least one pattern from the allowlist.
28
+ :param ignore_patterns: Patterns constituting the denylist. If provided, item paths must not match
29
+ any patterns from the denylist.
30
+
31
+ :returns: Filtered list of objects, as a generator.
32
+
33
+ :raises:
34
+ :class:`ValueError`:
35
+ If `key` is not provided and items are not `str` or `Path`.
36
+
37
+ Example usage with paths:
38
+ ```python
39
+ >>> # Filter only PDFs that are not hidden.
40
+ >>> list(filter_repo_objects(
41
+ ... ["aaa.PDF", "bbb.jpg", ".ccc.pdf", ".ddd.png"],
42
+ ... allow_patterns=["*.pdf"],
43
+ ... ignore_patterns=[".*"],
44
+ ... ))
45
+ ["aaa.pdf"]
46
+ ```
47
+ """
48
+ if isinstance(allow_patterns, str):
49
+ allow_patterns = [allow_patterns]
50
+
51
+ if isinstance(ignore_patterns, str):
52
+ ignore_patterns = [ignore_patterns]
53
+
54
+ def _identity(item: Union[str, os.PathLike]) -> Path:
55
+ if isinstance(item, str):
56
+ return Path(item)
57
+ if isinstance(item, Path):
58
+ return item
59
+ raise ValueError("Objects must be string or Pathlike.")
60
+
61
+ key = _identity # Items must be `str` or `Path`, otherwise raise ValueError
62
+
63
+ for item in items:
64
+ path: Path = key(item)
65
+
66
+ if path.is_dir():
67
+ continue
68
+
69
+ # Skip if there's an allowlist and path doesn't match any
70
+ if allow_patterns is not None and not any(
71
+ fnmatch(str(path), r) for r in allow_patterns
72
+ ):
73
+ continue
74
+
75
+ # Skip if there's a denylist and path matches any
76
+ if ignore_patterns is not None and any(
77
+ fnmatch(str(path), r) for r in ignore_patterns
78
+ ):
79
+ continue
80
+
81
+ yield item
82
+
83
+
84
+ def is_saas(host: str) -> bool:
85
+ """Checks whether the connection is to the SaaS platform"""
86
+
87
+ o = urlparse(host)
88
+
89
+ if o.hostname and o.hostname.endswith("hiddenlayer.ai"):
90
+ return True
91
+
92
+ return False
@@ -0,0 +1 @@
1
+ VERSION = "0.1.0"