hiddenlayer-sdk 0.1.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- hiddenlayer/__init__.py +109 -0
- hiddenlayer/sdk/__init__.py +0 -0
- hiddenlayer/sdk/constants.py +14 -0
- hiddenlayer/sdk/enterprise/__init__.py +0 -0
- hiddenlayer/sdk/enterprise/enterprise_model_scan_api.py +55 -0
- hiddenlayer/sdk/exceptions.py +12 -0
- hiddenlayer/sdk/models.py +22 -0
- hiddenlayer/sdk/rest/__init__.py +49 -0
- hiddenlayer/sdk/rest/api/__init__.py +7 -0
- hiddenlayer/sdk/rest/api/aidr_predictive_api.py +308 -0
- hiddenlayer/sdk/rest/api/model_scan_api.py +591 -0
- hiddenlayer/sdk/rest/api/sensor_api.py +1966 -0
- hiddenlayer/sdk/rest/api_client.py +770 -0
- hiddenlayer/sdk/rest/api_response.py +21 -0
- hiddenlayer/sdk/rest/configuration.py +445 -0
- hiddenlayer/sdk/rest/exceptions.py +199 -0
- hiddenlayer/sdk/rest/models/__init__.py +30 -0
- hiddenlayer/sdk/rest/models/create_sensor_request.py +95 -0
- hiddenlayer/sdk/rest/models/file_info.py +110 -0
- hiddenlayer/sdk/rest/models/get_multipart_upload_response.py +97 -0
- hiddenlayer/sdk/rest/models/model.py +100 -0
- hiddenlayer/sdk/rest/models/model_query_response.py +101 -0
- hiddenlayer/sdk/rest/models/multipart_upload_part.py +93 -0
- hiddenlayer/sdk/rest/models/scan_model_request.py +87 -0
- hiddenlayer/sdk/rest/models/scan_results_v2.py +108 -0
- hiddenlayer/sdk/rest/models/sensor_sor_query_filter.py +108 -0
- hiddenlayer/sdk/rest/models/sensor_sor_query_request.py +109 -0
- hiddenlayer/sdk/rest/models/submission_response.py +95 -0
- hiddenlayer/sdk/rest/models/submission_v2.py +109 -0
- hiddenlayer/sdk/rest/models/validation_error_model.py +99 -0
- hiddenlayer/sdk/rest/models/validation_error_model_loc_inner.py +138 -0
- hiddenlayer/sdk/rest/rest.py +257 -0
- hiddenlayer/sdk/services/__init__.py +0 -0
- hiddenlayer/sdk/services/aidr_predictive.py +76 -0
- hiddenlayer/sdk/services/model.py +101 -0
- hiddenlayer/sdk/services/model_scan.py +414 -0
- hiddenlayer/sdk/utils.py +92 -0
- hiddenlayer/sdk/version.py +1 -0
- hiddenlayer_sdk-0.1.0.dist-info/LICENSE +201 -0
- hiddenlayer_sdk-0.1.0.dist-info/METADATA +320 -0
- hiddenlayer_sdk-0.1.0.dist-info/RECORD +43 -0
- hiddenlayer_sdk-0.1.0.dist-info/WHEEL +5 -0
- hiddenlayer_sdk-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,414 @@
|
|
1
|
+
import os
|
2
|
+
import random
|
3
|
+
import time
|
4
|
+
import warnings
|
5
|
+
from datetime import datetime
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import List, Optional, Union
|
8
|
+
from uuid import uuid4
|
9
|
+
|
10
|
+
from hiddenlayer.sdk.constants import ScanStatus
|
11
|
+
from hiddenlayer.sdk.enterprise.enterprise_model_scan_api import EnterpriseModelScanApi
|
12
|
+
from hiddenlayer.sdk.models import ScanResults
|
13
|
+
from hiddenlayer.sdk.rest.api import ModelScanApi, SensorApi
|
14
|
+
from hiddenlayer.sdk.rest.api_client import ApiClient
|
15
|
+
from hiddenlayer.sdk.rest.models import MultipartUploadPart
|
16
|
+
from hiddenlayer.sdk.rest.models.model import Model
|
17
|
+
from hiddenlayer.sdk.services.model import ModelAPI
|
18
|
+
from hiddenlayer.sdk.utils import filter_path_objects, is_saas
|
19
|
+
|
20
|
+
EXCLUDE_FILE_TYPES = [
|
21
|
+
"*.txt",
|
22
|
+
"*.md",
|
23
|
+
"*.lock",
|
24
|
+
".gitattributes",
|
25
|
+
".git",
|
26
|
+
".git/*",
|
27
|
+
"*/.git",
|
28
|
+
"**/.git/**",
|
29
|
+
]
|
30
|
+
|
31
|
+
|
32
|
+
class ModelScanAPI:
|
33
|
+
def __init__(self, api_client: ApiClient) -> None:
|
34
|
+
self.is_saas = is_saas(api_client.configuration.host)
|
35
|
+
self._api_client = api_client
|
36
|
+
|
37
|
+
if self.is_saas:
|
38
|
+
self._model_scan_api = ModelScanApi(api_client=api_client)
|
39
|
+
self._model_api = ModelAPI(api_client=api_client)
|
40
|
+
self._sensor_api = SensorApi(
|
41
|
+
api_client=api_client
|
42
|
+
) # lower level api of ModelAPI
|
43
|
+
else:
|
44
|
+
self._model_scan_api = EnterpriseModelScanApi(api_client=api_client)
|
45
|
+
|
46
|
+
def scan_file(
|
47
|
+
self,
|
48
|
+
*,
|
49
|
+
model_name: str,
|
50
|
+
model_path: Union[str, os.PathLike],
|
51
|
+
threads: int = 1,
|
52
|
+
chunk_size: int = 16,
|
53
|
+
wait_for_results: bool = True,
|
54
|
+
) -> ScanResults:
|
55
|
+
"""
|
56
|
+
Scan a local model file using the HiddenLayer Model Scanner.
|
57
|
+
|
58
|
+
:param model_name: Name of the model to be shown on the HiddenLayer UI
|
59
|
+
:param model_path: Local path to the model file.
|
60
|
+
:param threads: Number of threads used to upload the file, defaults to 1.
|
61
|
+
:param chunk_size: Number of chunks of the file to upload at once, defaults to 4.
|
62
|
+
:param wait_for_results: True whether to wait for the scan to finish, defaults to True.
|
63
|
+
|
64
|
+
:returns: Scan Results
|
65
|
+
"""
|
66
|
+
|
67
|
+
warnings.warn(
|
68
|
+
"Use of the threads parameter is deprecated and will be removed in version 0.2.0.",
|
69
|
+
category=DeprecationWarning,
|
70
|
+
stacklevel=2,
|
71
|
+
)
|
72
|
+
|
73
|
+
file_path = Path(model_path)
|
74
|
+
|
75
|
+
# Can combine the 2 paths when SaaS API and Enterprise APIs are in sync
|
76
|
+
if self.is_saas:
|
77
|
+
filesize = file_path.stat().st_size
|
78
|
+
sensor = self._model_api.create(model_name=model_name)
|
79
|
+
upload = self._sensor_api.begin_multipart_upload(filesize, sensor.sensor_id)
|
80
|
+
|
81
|
+
with open(file_path, "rb") as f:
|
82
|
+
for i in range(0, len(upload.parts), chunk_size):
|
83
|
+
group: List[MultipartUploadPart] = upload.parts[i : i + chunk_size]
|
84
|
+
for part in group:
|
85
|
+
read_amount = part.end_offset - part.start_offset
|
86
|
+
f.seek(int(part.start_offset))
|
87
|
+
part_data = f.read(int(read_amount))
|
88
|
+
|
89
|
+
# The SaaS multipart upload returns a upload url for each part
|
90
|
+
# So there is no specified route
|
91
|
+
self._api_client.call_api(
|
92
|
+
"PUT",
|
93
|
+
part.upload_url,
|
94
|
+
body=part_data,
|
95
|
+
header_params={"Content-Type": "application/octet-binary"},
|
96
|
+
)
|
97
|
+
|
98
|
+
self._sensor_api.complete_multipart_upload(
|
99
|
+
sensor.sensor_id, upload.upload_id
|
100
|
+
)
|
101
|
+
|
102
|
+
self._model_scan_api.scan_model(sensor.sensor_id)
|
103
|
+
else:
|
104
|
+
with open(file_path, "rb") as f:
|
105
|
+
data = f.read()
|
106
|
+
|
107
|
+
sensor = Model(
|
108
|
+
sensor_id=str(uuid4()),
|
109
|
+
created_at=datetime.now(),
|
110
|
+
tenant_id="0000",
|
111
|
+
plaintext_name=model_name,
|
112
|
+
active=True,
|
113
|
+
version=1,
|
114
|
+
)
|
115
|
+
|
116
|
+
self._model_scan_api: EnterpriseModelScanApi
|
117
|
+
self._model_scan_api.scan_model(sensor.sensor_id, data)
|
118
|
+
model_name = sensor.sensor_id
|
119
|
+
|
120
|
+
scan_results = self.get_scan_results(model_name=model_name)
|
121
|
+
|
122
|
+
base_delay = 0.1 # seconds
|
123
|
+
retries = 0
|
124
|
+
if wait_for_results:
|
125
|
+
print(f"{file_path.name} scan status: {scan_results.status}")
|
126
|
+
while scan_results.status not in [ScanStatus.DONE, ScanStatus.FAILED]:
|
127
|
+
retries += 1
|
128
|
+
delay = base_delay * 2**retries + random.uniform(
|
129
|
+
0, 1
|
130
|
+
) # exponential back off retry
|
131
|
+
time.sleep(delay)
|
132
|
+
scan_results = self.get_scan_results(model_name=model_name)
|
133
|
+
print(f"{file_path.name} scan status: {scan_results.status}")
|
134
|
+
|
135
|
+
scan_results = ScanResults.from_scanresultsv2(scan_results_v2=scan_results)
|
136
|
+
scan_results.file_name = file_path.name
|
137
|
+
scan_results.file_path = str(file_path)
|
138
|
+
scan_results.sensor_id = sensor.sensor_id
|
139
|
+
|
140
|
+
return scan_results
|
141
|
+
|
142
|
+
def scan_s3_model(
|
143
|
+
self,
|
144
|
+
*,
|
145
|
+
model_name: str,
|
146
|
+
bucket: str,
|
147
|
+
key: str,
|
148
|
+
s3_client: Optional[object] = None,
|
149
|
+
threads: int = 1,
|
150
|
+
chunk_size: int = 4,
|
151
|
+
wait_for_results: bool = True,
|
152
|
+
) -> ScanResults:
|
153
|
+
"""
|
154
|
+
Scan a model file on S3.
|
155
|
+
|
156
|
+
:param model_name: Name of the model to be shown on the HiddenLayer UI.
|
157
|
+
:param bucket: Name of the s3 bucket where the model file is stored.
|
158
|
+
:param key: Path to the model file on s3.
|
159
|
+
:param wait_for_results: True whether to wait for the scan to finish, defaults to True.
|
160
|
+
:param s3_client: boto3 s3 client.
|
161
|
+
:param threads: Number of threads used to upload the file, defaults to 1.
|
162
|
+
:param chunk_size: Number of chunks of the file to upload at once, defaults to 4.
|
163
|
+
:param wait_for_results: True whether to wait for the scan to finish, defaults to True.
|
164
|
+
|
165
|
+
:returns: Scan Results
|
166
|
+
|
167
|
+
:examples:
|
168
|
+
.. code-block:: python
|
169
|
+
|
170
|
+
hl_client.model_scanner.scan_s3_model(
|
171
|
+
model_name="your-model-name",
|
172
|
+
bucket="s3_bucket",
|
173
|
+
key="path/to/file"
|
174
|
+
)
|
175
|
+
"""
|
176
|
+
try:
|
177
|
+
import boto3
|
178
|
+
except ImportError:
|
179
|
+
raise ImportError("Python package boto3 is not installed.")
|
180
|
+
|
181
|
+
if not s3_client:
|
182
|
+
s3_client = boto3.client("s3")
|
183
|
+
|
184
|
+
file_name = key.split("/")[-1]
|
185
|
+
|
186
|
+
try:
|
187
|
+
s3_client.download_file(bucket, key, f"/tmp/{file_name}")
|
188
|
+
except Exception as e:
|
189
|
+
raise RuntimeError(f"Couldn't download model s3://{bucket}/{key}: {e}")
|
190
|
+
|
191
|
+
return self.scan_file(
|
192
|
+
model_path=f"/tmp/{file_name}",
|
193
|
+
model_name=model_name,
|
194
|
+
threads=threads,
|
195
|
+
chunk_size=chunk_size,
|
196
|
+
wait_for_results=wait_for_results,
|
197
|
+
)
|
198
|
+
|
199
|
+
def scan_azure_blob_model(
|
200
|
+
self,
|
201
|
+
*,
|
202
|
+
model_name: str,
|
203
|
+
account_url: str,
|
204
|
+
container: str,
|
205
|
+
blob: str,
|
206
|
+
blob_service_client: Optional[object] = None,
|
207
|
+
credential: Optional[object] = None,
|
208
|
+
threads: int = 1,
|
209
|
+
chunk_size: int = 4,
|
210
|
+
wait_for_results: bool = True,
|
211
|
+
) -> ScanResults:
|
212
|
+
"""
|
213
|
+
Scan a model file on Azure Blob Storage.
|
214
|
+
|
215
|
+
:param model_name: Name of the model to be shown on the HiddenLayer UI.
|
216
|
+
:param account_url: Azure Blob url of where the file is stored.
|
217
|
+
:param container: Azure Blob container containing the model file.
|
218
|
+
:param blob: Path to the model file inside the Azure blob container.
|
219
|
+
:param blob_service_client: BlobServiceClient object. Defaults to creating one using DefaultCredential().
|
220
|
+
:param credential: Credential to be passed to the BlobServiceClient object, can be a credential object, SAS key, etc.
|
221
|
+
Defaults to `DefaultCredential`
|
222
|
+
:param threads: Number of threads used to upload the file, defaults to 1.
|
223
|
+
:param chunk_size: Number of chunks of the file to upload at once, defaults to 4.
|
224
|
+
:param wait_for_results: True whether to wait for the scan to finish, defaults to True.
|
225
|
+
|
226
|
+
:returns: Scan Results
|
227
|
+
|
228
|
+
:examples:
|
229
|
+
.. code-block:: python
|
230
|
+
|
231
|
+
hl_client.model_scanner.scan_azure_blob_model(
|
232
|
+
model_name="your-model-name",
|
233
|
+
account_url="https://<storageaccountname>.blob.core.windows.net",
|
234
|
+
container="container_name",
|
235
|
+
blob="path/to/file.bin",
|
236
|
+
credential="?<sas_key>" # If using a SAS key and not DefaultCredentials
|
237
|
+
)
|
238
|
+
"""
|
239
|
+
try:
|
240
|
+
from azure.identity import DefaultAzureCredential
|
241
|
+
except ImportError:
|
242
|
+
raise ImportError("Python package azure-identity is not installed.")
|
243
|
+
|
244
|
+
try:
|
245
|
+
from azure.storage.blob import BlobServiceClient
|
246
|
+
except ImportError:
|
247
|
+
raise ImportError("Python package azure-storage-blob is not installed.")
|
248
|
+
|
249
|
+
if not credential:
|
250
|
+
credential = DefaultAzureCredential()
|
251
|
+
|
252
|
+
if not blob_service_client:
|
253
|
+
blob_service_client = BlobServiceClient(account_url, credential=credential)
|
254
|
+
|
255
|
+
file_name = blob.split("/")[-1]
|
256
|
+
|
257
|
+
blob_client = blob_service_client.get_blob_client(
|
258
|
+
container=container, blob=blob
|
259
|
+
)
|
260
|
+
|
261
|
+
try:
|
262
|
+
with open(os.path.join("/tmp", file_name), "wb") as f:
|
263
|
+
download_stream = blob_client.download_blob()
|
264
|
+
f.write(download_stream.readall())
|
265
|
+
|
266
|
+
except Exception as e:
|
267
|
+
raise RuntimeError(
|
268
|
+
f"Couldn't download model {account_url}, {container}, {blob}: {e}"
|
269
|
+
)
|
270
|
+
|
271
|
+
return self.scan_file(
|
272
|
+
model_path=f"/tmp/{file_name}",
|
273
|
+
model_name=model_name,
|
274
|
+
threads=threads,
|
275
|
+
chunk_size=chunk_size,
|
276
|
+
wait_for_results=wait_for_results,
|
277
|
+
)
|
278
|
+
|
279
|
+
def scan_huggingface_model(
|
280
|
+
self,
|
281
|
+
*,
|
282
|
+
repo_id: str,
|
283
|
+
# model_id: str,
|
284
|
+
# HF parameters
|
285
|
+
revision: Optional[str] = None,
|
286
|
+
local_dir: str = "/tmp",
|
287
|
+
allow_file_patterns: Optional[List[str]] = None,
|
288
|
+
ignore_file_patterns: Optional[List[str]] = None,
|
289
|
+
force_download: bool = False,
|
290
|
+
hf_token: Optional[Union[str, bool]] = None,
|
291
|
+
# HL parameters
|
292
|
+
threads: int = 1,
|
293
|
+
chunk_size: int = 4,
|
294
|
+
wait_for_results: bool = True,
|
295
|
+
) -> List[ScanResults]:
|
296
|
+
"""
|
297
|
+
Scans a model on HuggingFace.
|
298
|
+
|
299
|
+
Note: Requires the `huggingface_hub` pip package to be installed.
|
300
|
+
|
301
|
+
:param revision: An optional Git revision id which can be a branch name, a tag, or a commit hash.
|
302
|
+
:param local_dir: If provided, the downloaded files will be placed under this directory.
|
303
|
+
:param allow_file_patterns: If provided, only files matching at least one pattern are scanned.
|
304
|
+
:param ignore_file_patterns: If provided, files matching any of the patterns are not scanned.
|
305
|
+
:param force_download: Whether the file should be downloaded even if it already exists in the local cache.
|
306
|
+
:param hf_token: A token to be used for the download.
|
307
|
+
If True, the token is read from the HuggingFace config folder.
|
308
|
+
If a string, it’s used as the authentication token.
|
309
|
+
:param threads: Number of threads used to upload the file, defaults to 1.
|
310
|
+
:param chunk_size: Number of chunks of the file to upload at once, defaults to 4.
|
311
|
+
:param wait_for_results: True whether to wait for the scan to finish, defaults to True.
|
312
|
+
|
313
|
+
:returns: List of ScanResults
|
314
|
+
"""
|
315
|
+
try:
|
316
|
+
from huggingface_hub import snapshot_download
|
317
|
+
except ImportError:
|
318
|
+
raise ImportError("Python package huggingface_hub is not installed.")
|
319
|
+
|
320
|
+
local_dir = f"/tmp/{repo_id}" if local_dir == "/tmp" else local_dir
|
321
|
+
ignore_file_patterns = (
|
322
|
+
EXCLUDE_FILE_TYPES + ignore_file_patterns
|
323
|
+
if ignore_file_patterns
|
324
|
+
else EXCLUDE_FILE_TYPES
|
325
|
+
)
|
326
|
+
|
327
|
+
snapshot_download(
|
328
|
+
repo_id,
|
329
|
+
revision=revision,
|
330
|
+
allow_patterns=allow_file_patterns,
|
331
|
+
ignore_patterns=ignore_file_patterns,
|
332
|
+
local_dir=local_dir,
|
333
|
+
local_dir_use_symlinks=False,
|
334
|
+
cache_dir=local_dir,
|
335
|
+
force_download=force_download,
|
336
|
+
token=hf_token,
|
337
|
+
)
|
338
|
+
|
339
|
+
return self.scan_folder(
|
340
|
+
path=local_dir,
|
341
|
+
allow_file_patterns=allow_file_patterns,
|
342
|
+
ignore_file_patterns=ignore_file_patterns,
|
343
|
+
threads=threads,
|
344
|
+
chunk_size=chunk_size,
|
345
|
+
wait_for_results=wait_for_results,
|
346
|
+
)
|
347
|
+
|
348
|
+
def get_scan_results(self, *, model_name: str) -> ScanResults:
|
349
|
+
"""
|
350
|
+
Get results from a model scan.
|
351
|
+
|
352
|
+
:param model_name: Name of the model.
|
353
|
+
|
354
|
+
:returns: Scan results.
|
355
|
+
"""
|
356
|
+
|
357
|
+
if self.is_saas:
|
358
|
+
model = self._model_api.get(model_name=model_name)
|
359
|
+
sensor_id = model.sensor_id
|
360
|
+
else:
|
361
|
+
sensor_id = model_name
|
362
|
+
|
363
|
+
scan_results_v2 = self._model_scan_api.scan_status(sensor_id)
|
364
|
+
|
365
|
+
return ScanResults.from_scanresultsv2(
|
366
|
+
scan_results_v2=scan_results_v2, sensor_id=sensor_id
|
367
|
+
)
|
368
|
+
|
369
|
+
def scan_folder(
|
370
|
+
self,
|
371
|
+
*,
|
372
|
+
path: Union[str, os.PathLike],
|
373
|
+
allow_file_patterns: Optional[List[str]] = None,
|
374
|
+
ignore_file_patterns: Optional[List[str]] = None,
|
375
|
+
threads: int = 1,
|
376
|
+
chunk_size: int = 4,
|
377
|
+
wait_for_results: bool = True,
|
378
|
+
) -> List[ScanResults]:
|
379
|
+
"""
|
380
|
+
Submits all files in a directory and its sub directories to be scanned.
|
381
|
+
|
382
|
+
:param path: Path to the folder on disk to be scanned.
|
383
|
+
:param allow_file_patterns: If provided, only files matching at least one pattern are scanned.
|
384
|
+
:param ignore_file_patterns: If provided, files matching any of the patterns are not scanned.
|
385
|
+
:param threads: Number of threads used to upload the file, defaults to 1.
|
386
|
+
:param chunk_size: Number of chunks of the file to upload at once, defaults to 4.
|
387
|
+
:param wait_for_results: True whether to wait for the scan to finish, defaults to True.
|
388
|
+
|
389
|
+
:returns: List of ScanResults
|
390
|
+
"""
|
391
|
+
|
392
|
+
model_path = Path(path)
|
393
|
+
ignore_file_patterns = (
|
394
|
+
EXCLUDE_FILE_TYPES + ignore_file_patterns
|
395
|
+
if ignore_file_patterns
|
396
|
+
else EXCLUDE_FILE_TYPES
|
397
|
+
)
|
398
|
+
|
399
|
+
files = filter_path_objects(
|
400
|
+
model_path.rglob("*"),
|
401
|
+
allow_patterns=allow_file_patterns,
|
402
|
+
ignore_patterns=ignore_file_patterns,
|
403
|
+
)
|
404
|
+
|
405
|
+
return [
|
406
|
+
self.scan_file(
|
407
|
+
model_name=str(file),
|
408
|
+
model_path=file,
|
409
|
+
threads=threads,
|
410
|
+
chunk_size=chunk_size,
|
411
|
+
wait_for_results=wait_for_results,
|
412
|
+
)
|
413
|
+
for file in files
|
414
|
+
]
|
hiddenlayer/sdk/utils.py
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
import os
|
2
|
+
from fnmatch import fnmatch
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import Generator, List, Optional, Union
|
5
|
+
from urllib.parse import urlparse
|
6
|
+
|
7
|
+
PathInputType = Union[str, os.PathLike]
|
8
|
+
|
9
|
+
|
10
|
+
def filter_path_objects(
|
11
|
+
items: Union[List[PathInputType], Generator[PathInputType, None, None]],
|
12
|
+
*,
|
13
|
+
allow_patterns: Optional[Union[List[str], str]] = None,
|
14
|
+
ignore_patterns: Optional[Union[List[str], str]] = None,
|
15
|
+
) -> Generator[Union[str, os.PathLike], None, None]:
|
16
|
+
"""Filter repo objects based on an allowlist and a denylist.
|
17
|
+
|
18
|
+
Input must be a list of paths (`str` or `Path`) or a list of arbitrary objects.
|
19
|
+
In the later case, `key` must be provided and specifies a function of one argument
|
20
|
+
that is used to extract a path from each element in iterable.
|
21
|
+
|
22
|
+
Patterns are Unix shell-style wildcards which are NOT regular expressions. See
|
23
|
+
https://docs.python.org/3/library/fnmatch.html for more details.
|
24
|
+
|
25
|
+
:param items: List of paths to filter.
|
26
|
+
:param allow_patterns: Patterns constituting the allowlist. If provided, item paths must match at
|
27
|
+
least one pattern from the allowlist.
|
28
|
+
:param ignore_patterns: Patterns constituting the denylist. If provided, item paths must not match
|
29
|
+
any patterns from the denylist.
|
30
|
+
|
31
|
+
:returns: Filtered list of objects, as a generator.
|
32
|
+
|
33
|
+
:raises:
|
34
|
+
:class:`ValueError`:
|
35
|
+
If `key` is not provided and items are not `str` or `Path`.
|
36
|
+
|
37
|
+
Example usage with paths:
|
38
|
+
```python
|
39
|
+
>>> # Filter only PDFs that are not hidden.
|
40
|
+
>>> list(filter_repo_objects(
|
41
|
+
... ["aaa.PDF", "bbb.jpg", ".ccc.pdf", ".ddd.png"],
|
42
|
+
... allow_patterns=["*.pdf"],
|
43
|
+
... ignore_patterns=[".*"],
|
44
|
+
... ))
|
45
|
+
["aaa.pdf"]
|
46
|
+
```
|
47
|
+
"""
|
48
|
+
if isinstance(allow_patterns, str):
|
49
|
+
allow_patterns = [allow_patterns]
|
50
|
+
|
51
|
+
if isinstance(ignore_patterns, str):
|
52
|
+
ignore_patterns = [ignore_patterns]
|
53
|
+
|
54
|
+
def _identity(item: Union[str, os.PathLike]) -> Path:
|
55
|
+
if isinstance(item, str):
|
56
|
+
return Path(item)
|
57
|
+
if isinstance(item, Path):
|
58
|
+
return item
|
59
|
+
raise ValueError("Objects must be string or Pathlike.")
|
60
|
+
|
61
|
+
key = _identity # Items must be `str` or `Path`, otherwise raise ValueError
|
62
|
+
|
63
|
+
for item in items:
|
64
|
+
path: Path = key(item)
|
65
|
+
|
66
|
+
if path.is_dir():
|
67
|
+
continue
|
68
|
+
|
69
|
+
# Skip if there's an allowlist and path doesn't match any
|
70
|
+
if allow_patterns is not None and not any(
|
71
|
+
fnmatch(str(path), r) for r in allow_patterns
|
72
|
+
):
|
73
|
+
continue
|
74
|
+
|
75
|
+
# Skip if there's a denylist and path matches any
|
76
|
+
if ignore_patterns is not None and any(
|
77
|
+
fnmatch(str(path), r) for r in ignore_patterns
|
78
|
+
):
|
79
|
+
continue
|
80
|
+
|
81
|
+
yield item
|
82
|
+
|
83
|
+
|
84
|
+
def is_saas(host: str) -> bool:
|
85
|
+
"""Checks whether the connection is to the SaaS platform"""
|
86
|
+
|
87
|
+
o = urlparse(host)
|
88
|
+
|
89
|
+
if o.hostname and o.hostname.endswith("hiddenlayer.ai"):
|
90
|
+
return True
|
91
|
+
|
92
|
+
return False
|
@@ -0,0 +1 @@
|
|
1
|
+
VERSION = "0.1.0"
|