hiddenlayer-sdk 0.1.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. hiddenlayer/__init__.py +109 -0
  2. hiddenlayer/sdk/__init__.py +0 -0
  3. hiddenlayer/sdk/constants.py +14 -0
  4. hiddenlayer/sdk/enterprise/__init__.py +0 -0
  5. hiddenlayer/sdk/enterprise/enterprise_model_scan_api.py +55 -0
  6. hiddenlayer/sdk/exceptions.py +12 -0
  7. hiddenlayer/sdk/models.py +22 -0
  8. hiddenlayer/sdk/rest/__init__.py +49 -0
  9. hiddenlayer/sdk/rest/api/__init__.py +7 -0
  10. hiddenlayer/sdk/rest/api/aidr_predictive_api.py +308 -0
  11. hiddenlayer/sdk/rest/api/model_scan_api.py +591 -0
  12. hiddenlayer/sdk/rest/api/sensor_api.py +1966 -0
  13. hiddenlayer/sdk/rest/api_client.py +770 -0
  14. hiddenlayer/sdk/rest/api_response.py +21 -0
  15. hiddenlayer/sdk/rest/configuration.py +445 -0
  16. hiddenlayer/sdk/rest/exceptions.py +199 -0
  17. hiddenlayer/sdk/rest/models/__init__.py +30 -0
  18. hiddenlayer/sdk/rest/models/create_sensor_request.py +95 -0
  19. hiddenlayer/sdk/rest/models/file_info.py +110 -0
  20. hiddenlayer/sdk/rest/models/get_multipart_upload_response.py +97 -0
  21. hiddenlayer/sdk/rest/models/model.py +100 -0
  22. hiddenlayer/sdk/rest/models/model_query_response.py +101 -0
  23. hiddenlayer/sdk/rest/models/multipart_upload_part.py +93 -0
  24. hiddenlayer/sdk/rest/models/scan_model_request.py +87 -0
  25. hiddenlayer/sdk/rest/models/scan_results_v2.py +108 -0
  26. hiddenlayer/sdk/rest/models/sensor_sor_query_filter.py +108 -0
  27. hiddenlayer/sdk/rest/models/sensor_sor_query_request.py +109 -0
  28. hiddenlayer/sdk/rest/models/submission_response.py +95 -0
  29. hiddenlayer/sdk/rest/models/submission_v2.py +109 -0
  30. hiddenlayer/sdk/rest/models/validation_error_model.py +99 -0
  31. hiddenlayer/sdk/rest/models/validation_error_model_loc_inner.py +138 -0
  32. hiddenlayer/sdk/rest/rest.py +257 -0
  33. hiddenlayer/sdk/services/__init__.py +0 -0
  34. hiddenlayer/sdk/services/aidr_predictive.py +76 -0
  35. hiddenlayer/sdk/services/model.py +101 -0
  36. hiddenlayer/sdk/services/model_scan.py +414 -0
  37. hiddenlayer/sdk/utils.py +92 -0
  38. hiddenlayer/sdk/version.py +1 -0
  39. hiddenlayer_sdk-0.1.0.dist-info/LICENSE +201 -0
  40. hiddenlayer_sdk-0.1.0.dist-info/METADATA +320 -0
  41. hiddenlayer_sdk-0.1.0.dist-info/RECORD +43 -0
  42. hiddenlayer_sdk-0.1.0.dist-info/WHEEL +5 -0
  43. hiddenlayer_sdk-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,414 @@
1
+ import os
2
+ import random
3
+ import time
4
+ import warnings
5
+ from datetime import datetime
6
+ from pathlib import Path
7
+ from typing import List, Optional, Union
8
+ from uuid import uuid4
9
+
10
+ from hiddenlayer.sdk.constants import ScanStatus
11
+ from hiddenlayer.sdk.enterprise.enterprise_model_scan_api import EnterpriseModelScanApi
12
+ from hiddenlayer.sdk.models import ScanResults
13
+ from hiddenlayer.sdk.rest.api import ModelScanApi, SensorApi
14
+ from hiddenlayer.sdk.rest.api_client import ApiClient
15
+ from hiddenlayer.sdk.rest.models import MultipartUploadPart
16
+ from hiddenlayer.sdk.rest.models.model import Model
17
+ from hiddenlayer.sdk.services.model import ModelAPI
18
+ from hiddenlayer.sdk.utils import filter_path_objects, is_saas
19
+
20
+ EXCLUDE_FILE_TYPES = [
21
+ "*.txt",
22
+ "*.md",
23
+ "*.lock",
24
+ ".gitattributes",
25
+ ".git",
26
+ ".git/*",
27
+ "*/.git",
28
+ "**/.git/**",
29
+ ]
30
+
31
+
32
+ class ModelScanAPI:
33
+ def __init__(self, api_client: ApiClient) -> None:
34
+ self.is_saas = is_saas(api_client.configuration.host)
35
+ self._api_client = api_client
36
+
37
+ if self.is_saas:
38
+ self._model_scan_api = ModelScanApi(api_client=api_client)
39
+ self._model_api = ModelAPI(api_client=api_client)
40
+ self._sensor_api = SensorApi(
41
+ api_client=api_client
42
+ ) # lower level api of ModelAPI
43
+ else:
44
+ self._model_scan_api = EnterpriseModelScanApi(api_client=api_client)
45
+
46
+ def scan_file(
47
+ self,
48
+ *,
49
+ model_name: str,
50
+ model_path: Union[str, os.PathLike],
51
+ threads: int = 1,
52
+ chunk_size: int = 16,
53
+ wait_for_results: bool = True,
54
+ ) -> ScanResults:
55
+ """
56
+ Scan a local model file using the HiddenLayer Model Scanner.
57
+
58
+ :param model_name: Name of the model to be shown on the HiddenLayer UI
59
+ :param model_path: Local path to the model file.
60
+ :param threads: Number of threads used to upload the file, defaults to 1.
61
+ :param chunk_size: Number of chunks of the file to upload at once, defaults to 4.
62
+ :param wait_for_results: True whether to wait for the scan to finish, defaults to True.
63
+
64
+ :returns: Scan Results
65
+ """
66
+
67
+ warnings.warn(
68
+ "Use of the threads parameter is deprecated and will be removed in version 0.2.0.",
69
+ category=DeprecationWarning,
70
+ stacklevel=2,
71
+ )
72
+
73
+ file_path = Path(model_path)
74
+
75
+ # Can combine the 2 paths when SaaS API and Enterprise APIs are in sync
76
+ if self.is_saas:
77
+ filesize = file_path.stat().st_size
78
+ sensor = self._model_api.create(model_name=model_name)
79
+ upload = self._sensor_api.begin_multipart_upload(filesize, sensor.sensor_id)
80
+
81
+ with open(file_path, "rb") as f:
82
+ for i in range(0, len(upload.parts), chunk_size):
83
+ group: List[MultipartUploadPart] = upload.parts[i : i + chunk_size]
84
+ for part in group:
85
+ read_amount = part.end_offset - part.start_offset
86
+ f.seek(int(part.start_offset))
87
+ part_data = f.read(int(read_amount))
88
+
89
+ # The SaaS multipart upload returns a upload url for each part
90
+ # So there is no specified route
91
+ self._api_client.call_api(
92
+ "PUT",
93
+ part.upload_url,
94
+ body=part_data,
95
+ header_params={"Content-Type": "application/octet-binary"},
96
+ )
97
+
98
+ self._sensor_api.complete_multipart_upload(
99
+ sensor.sensor_id, upload.upload_id
100
+ )
101
+
102
+ self._model_scan_api.scan_model(sensor.sensor_id)
103
+ else:
104
+ with open(file_path, "rb") as f:
105
+ data = f.read()
106
+
107
+ sensor = Model(
108
+ sensor_id=str(uuid4()),
109
+ created_at=datetime.now(),
110
+ tenant_id="0000",
111
+ plaintext_name=model_name,
112
+ active=True,
113
+ version=1,
114
+ )
115
+
116
+ self._model_scan_api: EnterpriseModelScanApi
117
+ self._model_scan_api.scan_model(sensor.sensor_id, data)
118
+ model_name = sensor.sensor_id
119
+
120
+ scan_results = self.get_scan_results(model_name=model_name)
121
+
122
+ base_delay = 0.1 # seconds
123
+ retries = 0
124
+ if wait_for_results:
125
+ print(f"{file_path.name} scan status: {scan_results.status}")
126
+ while scan_results.status not in [ScanStatus.DONE, ScanStatus.FAILED]:
127
+ retries += 1
128
+ delay = base_delay * 2**retries + random.uniform(
129
+ 0, 1
130
+ ) # exponential back off retry
131
+ time.sleep(delay)
132
+ scan_results = self.get_scan_results(model_name=model_name)
133
+ print(f"{file_path.name} scan status: {scan_results.status}")
134
+
135
+ scan_results = ScanResults.from_scanresultsv2(scan_results_v2=scan_results)
136
+ scan_results.file_name = file_path.name
137
+ scan_results.file_path = str(file_path)
138
+ scan_results.sensor_id = sensor.sensor_id
139
+
140
+ return scan_results
141
+
142
+ def scan_s3_model(
143
+ self,
144
+ *,
145
+ model_name: str,
146
+ bucket: str,
147
+ key: str,
148
+ s3_client: Optional[object] = None,
149
+ threads: int = 1,
150
+ chunk_size: int = 4,
151
+ wait_for_results: bool = True,
152
+ ) -> ScanResults:
153
+ """
154
+ Scan a model file on S3.
155
+
156
+ :param model_name: Name of the model to be shown on the HiddenLayer UI.
157
+ :param bucket: Name of the s3 bucket where the model file is stored.
158
+ :param key: Path to the model file on s3.
159
+ :param wait_for_results: True whether to wait for the scan to finish, defaults to True.
160
+ :param s3_client: boto3 s3 client.
161
+ :param threads: Number of threads used to upload the file, defaults to 1.
162
+ :param chunk_size: Number of chunks of the file to upload at once, defaults to 4.
163
+ :param wait_for_results: True whether to wait for the scan to finish, defaults to True.
164
+
165
+ :returns: Scan Results
166
+
167
+ :examples:
168
+ .. code-block:: python
169
+
170
+ hl_client.model_scanner.scan_s3_model(
171
+ model_name="your-model-name",
172
+ bucket="s3_bucket",
173
+ key="path/to/file"
174
+ )
175
+ """
176
+ try:
177
+ import boto3
178
+ except ImportError:
179
+ raise ImportError("Python package boto3 is not installed.")
180
+
181
+ if not s3_client:
182
+ s3_client = boto3.client("s3")
183
+
184
+ file_name = key.split("/")[-1]
185
+
186
+ try:
187
+ s3_client.download_file(bucket, key, f"/tmp/{file_name}")
188
+ except Exception as e:
189
+ raise RuntimeError(f"Couldn't download model s3://{bucket}/{key}: {e}")
190
+
191
+ return self.scan_file(
192
+ model_path=f"/tmp/{file_name}",
193
+ model_name=model_name,
194
+ threads=threads,
195
+ chunk_size=chunk_size,
196
+ wait_for_results=wait_for_results,
197
+ )
198
+
199
+ def scan_azure_blob_model(
200
+ self,
201
+ *,
202
+ model_name: str,
203
+ account_url: str,
204
+ container: str,
205
+ blob: str,
206
+ blob_service_client: Optional[object] = None,
207
+ credential: Optional[object] = None,
208
+ threads: int = 1,
209
+ chunk_size: int = 4,
210
+ wait_for_results: bool = True,
211
+ ) -> ScanResults:
212
+ """
213
+ Scan a model file on Azure Blob Storage.
214
+
215
+ :param model_name: Name of the model to be shown on the HiddenLayer UI.
216
+ :param account_url: Azure Blob url of where the file is stored.
217
+ :param container: Azure Blob container containing the model file.
218
+ :param blob: Path to the model file inside the Azure blob container.
219
+ :param blob_service_client: BlobServiceClient object. Defaults to creating one using DefaultCredential().
220
+ :param credential: Credential to be passed to the BlobServiceClient object, can be a credential object, SAS key, etc.
221
+ Defaults to `DefaultCredential`
222
+ :param threads: Number of threads used to upload the file, defaults to 1.
223
+ :param chunk_size: Number of chunks of the file to upload at once, defaults to 4.
224
+ :param wait_for_results: True whether to wait for the scan to finish, defaults to True.
225
+
226
+ :returns: Scan Results
227
+
228
+ :examples:
229
+ .. code-block:: python
230
+
231
+ hl_client.model_scanner.scan_azure_blob_model(
232
+ model_name="your-model-name",
233
+ account_url="https://<storageaccountname>.blob.core.windows.net",
234
+ container="container_name",
235
+ blob="path/to/file.bin",
236
+ credential="?<sas_key>" # If using a SAS key and not DefaultCredentials
237
+ )
238
+ """
239
+ try:
240
+ from azure.identity import DefaultAzureCredential
241
+ except ImportError:
242
+ raise ImportError("Python package azure-identity is not installed.")
243
+
244
+ try:
245
+ from azure.storage.blob import BlobServiceClient
246
+ except ImportError:
247
+ raise ImportError("Python package azure-storage-blob is not installed.")
248
+
249
+ if not credential:
250
+ credential = DefaultAzureCredential()
251
+
252
+ if not blob_service_client:
253
+ blob_service_client = BlobServiceClient(account_url, credential=credential)
254
+
255
+ file_name = blob.split("/")[-1]
256
+
257
+ blob_client = blob_service_client.get_blob_client(
258
+ container=container, blob=blob
259
+ )
260
+
261
+ try:
262
+ with open(os.path.join("/tmp", file_name), "wb") as f:
263
+ download_stream = blob_client.download_blob()
264
+ f.write(download_stream.readall())
265
+
266
+ except Exception as e:
267
+ raise RuntimeError(
268
+ f"Couldn't download model {account_url}, {container}, {blob}: {e}"
269
+ )
270
+
271
+ return self.scan_file(
272
+ model_path=f"/tmp/{file_name}",
273
+ model_name=model_name,
274
+ threads=threads,
275
+ chunk_size=chunk_size,
276
+ wait_for_results=wait_for_results,
277
+ )
278
+
279
+ def scan_huggingface_model(
280
+ self,
281
+ *,
282
+ repo_id: str,
283
+ # model_id: str,
284
+ # HF parameters
285
+ revision: Optional[str] = None,
286
+ local_dir: str = "/tmp",
287
+ allow_file_patterns: Optional[List[str]] = None,
288
+ ignore_file_patterns: Optional[List[str]] = None,
289
+ force_download: bool = False,
290
+ hf_token: Optional[Union[str, bool]] = None,
291
+ # HL parameters
292
+ threads: int = 1,
293
+ chunk_size: int = 4,
294
+ wait_for_results: bool = True,
295
+ ) -> List[ScanResults]:
296
+ """
297
+ Scans a model on HuggingFace.
298
+
299
+ Note: Requires the `huggingface_hub` pip package to be installed.
300
+
301
+ :param revision: An optional Git revision id which can be a branch name, a tag, or a commit hash.
302
+ :param local_dir: If provided, the downloaded files will be placed under this directory.
303
+ :param allow_file_patterns: If provided, only files matching at least one pattern are scanned.
304
+ :param ignore_file_patterns: If provided, files matching any of the patterns are not scanned.
305
+ :param force_download: Whether the file should be downloaded even if it already exists in the local cache.
306
+ :param hf_token: A token to be used for the download.
307
+ If True, the token is read from the HuggingFace config folder.
308
+ If a string, it’s used as the authentication token.
309
+ :param threads: Number of threads used to upload the file, defaults to 1.
310
+ :param chunk_size: Number of chunks of the file to upload at once, defaults to 4.
311
+ :param wait_for_results: True whether to wait for the scan to finish, defaults to True.
312
+
313
+ :returns: List of ScanResults
314
+ """
315
+ try:
316
+ from huggingface_hub import snapshot_download
317
+ except ImportError:
318
+ raise ImportError("Python package huggingface_hub is not installed.")
319
+
320
+ local_dir = f"/tmp/{repo_id}" if local_dir == "/tmp" else local_dir
321
+ ignore_file_patterns = (
322
+ EXCLUDE_FILE_TYPES + ignore_file_patterns
323
+ if ignore_file_patterns
324
+ else EXCLUDE_FILE_TYPES
325
+ )
326
+
327
+ snapshot_download(
328
+ repo_id,
329
+ revision=revision,
330
+ allow_patterns=allow_file_patterns,
331
+ ignore_patterns=ignore_file_patterns,
332
+ local_dir=local_dir,
333
+ local_dir_use_symlinks=False,
334
+ cache_dir=local_dir,
335
+ force_download=force_download,
336
+ token=hf_token,
337
+ )
338
+
339
+ return self.scan_folder(
340
+ path=local_dir,
341
+ allow_file_patterns=allow_file_patterns,
342
+ ignore_file_patterns=ignore_file_patterns,
343
+ threads=threads,
344
+ chunk_size=chunk_size,
345
+ wait_for_results=wait_for_results,
346
+ )
347
+
348
+ def get_scan_results(self, *, model_name: str) -> ScanResults:
349
+ """
350
+ Get results from a model scan.
351
+
352
+ :param model_name: Name of the model.
353
+
354
+ :returns: Scan results.
355
+ """
356
+
357
+ if self.is_saas:
358
+ model = self._model_api.get(model_name=model_name)
359
+ sensor_id = model.sensor_id
360
+ else:
361
+ sensor_id = model_name
362
+
363
+ scan_results_v2 = self._model_scan_api.scan_status(sensor_id)
364
+
365
+ return ScanResults.from_scanresultsv2(
366
+ scan_results_v2=scan_results_v2, sensor_id=sensor_id
367
+ )
368
+
369
+ def scan_folder(
370
+ self,
371
+ *,
372
+ path: Union[str, os.PathLike],
373
+ allow_file_patterns: Optional[List[str]] = None,
374
+ ignore_file_patterns: Optional[List[str]] = None,
375
+ threads: int = 1,
376
+ chunk_size: int = 4,
377
+ wait_for_results: bool = True,
378
+ ) -> List[ScanResults]:
379
+ """
380
+ Submits all files in a directory and its sub directories to be scanned.
381
+
382
+ :param path: Path to the folder on disk to be scanned.
383
+ :param allow_file_patterns: If provided, only files matching at least one pattern are scanned.
384
+ :param ignore_file_patterns: If provided, files matching any of the patterns are not scanned.
385
+ :param threads: Number of threads used to upload the file, defaults to 1.
386
+ :param chunk_size: Number of chunks of the file to upload at once, defaults to 4.
387
+ :param wait_for_results: True whether to wait for the scan to finish, defaults to True.
388
+
389
+ :returns: List of ScanResults
390
+ """
391
+
392
+ model_path = Path(path)
393
+ ignore_file_patterns = (
394
+ EXCLUDE_FILE_TYPES + ignore_file_patterns
395
+ if ignore_file_patterns
396
+ else EXCLUDE_FILE_TYPES
397
+ )
398
+
399
+ files = filter_path_objects(
400
+ model_path.rglob("*"),
401
+ allow_patterns=allow_file_patterns,
402
+ ignore_patterns=ignore_file_patterns,
403
+ )
404
+
405
+ return [
406
+ self.scan_file(
407
+ model_name=str(file),
408
+ model_path=file,
409
+ threads=threads,
410
+ chunk_size=chunk_size,
411
+ wait_for_results=wait_for_results,
412
+ )
413
+ for file in files
414
+ ]
@@ -0,0 +1,92 @@
1
+ import os
2
+ from fnmatch import fnmatch
3
+ from pathlib import Path
4
+ from typing import Generator, List, Optional, Union
5
+ from urllib.parse import urlparse
6
+
7
+ PathInputType = Union[str, os.PathLike]
8
+
9
+
10
+ def filter_path_objects(
11
+ items: Union[List[PathInputType], Generator[PathInputType, None, None]],
12
+ *,
13
+ allow_patterns: Optional[Union[List[str], str]] = None,
14
+ ignore_patterns: Optional[Union[List[str], str]] = None,
15
+ ) -> Generator[Union[str, os.PathLike], None, None]:
16
+ """Filter repo objects based on an allowlist and a denylist.
17
+
18
+ Input must be a list of paths (`str` or `Path`) or a list of arbitrary objects.
19
+ In the later case, `key` must be provided and specifies a function of one argument
20
+ that is used to extract a path from each element in iterable.
21
+
22
+ Patterns are Unix shell-style wildcards which are NOT regular expressions. See
23
+ https://docs.python.org/3/library/fnmatch.html for more details.
24
+
25
+ :param items: List of paths to filter.
26
+ :param allow_patterns: Patterns constituting the allowlist. If provided, item paths must match at
27
+ least one pattern from the allowlist.
28
+ :param ignore_patterns: Patterns constituting the denylist. If provided, item paths must not match
29
+ any patterns from the denylist.
30
+
31
+ :returns: Filtered list of objects, as a generator.
32
+
33
+ :raises:
34
+ :class:`ValueError`:
35
+ If `key` is not provided and items are not `str` or `Path`.
36
+
37
+ Example usage with paths:
38
+ ```python
39
+ >>> # Filter only PDFs that are not hidden.
40
+ >>> list(filter_repo_objects(
41
+ ... ["aaa.PDF", "bbb.jpg", ".ccc.pdf", ".ddd.png"],
42
+ ... allow_patterns=["*.pdf"],
43
+ ... ignore_patterns=[".*"],
44
+ ... ))
45
+ ["aaa.pdf"]
46
+ ```
47
+ """
48
+ if isinstance(allow_patterns, str):
49
+ allow_patterns = [allow_patterns]
50
+
51
+ if isinstance(ignore_patterns, str):
52
+ ignore_patterns = [ignore_patterns]
53
+
54
+ def _identity(item: Union[str, os.PathLike]) -> Path:
55
+ if isinstance(item, str):
56
+ return Path(item)
57
+ if isinstance(item, Path):
58
+ return item
59
+ raise ValueError("Objects must be string or Pathlike.")
60
+
61
+ key = _identity # Items must be `str` or `Path`, otherwise raise ValueError
62
+
63
+ for item in items:
64
+ path: Path = key(item)
65
+
66
+ if path.is_dir():
67
+ continue
68
+
69
+ # Skip if there's an allowlist and path doesn't match any
70
+ if allow_patterns is not None and not any(
71
+ fnmatch(str(path), r) for r in allow_patterns
72
+ ):
73
+ continue
74
+
75
+ # Skip if there's a denylist and path matches any
76
+ if ignore_patterns is not None and any(
77
+ fnmatch(str(path), r) for r in ignore_patterns
78
+ ):
79
+ continue
80
+
81
+ yield item
82
+
83
+
84
+ def is_saas(host: str) -> bool:
85
+ """Checks whether the connection is to the SaaS platform"""
86
+
87
+ o = urlparse(host)
88
+
89
+ if o.hostname and o.hostname.endswith("hiddenlayer.ai"):
90
+ return True
91
+
92
+ return False
@@ -0,0 +1 @@
1
+ VERSION = "0.1.0"