hiddenlayer-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hiddenlayer/__init__.py +109 -0
- hiddenlayer/sdk/__init__.py +0 -0
- hiddenlayer/sdk/constants.py +14 -0
- hiddenlayer/sdk/enterprise/__init__.py +0 -0
- hiddenlayer/sdk/enterprise/enterprise_model_scan_api.py +55 -0
- hiddenlayer/sdk/exceptions.py +12 -0
- hiddenlayer/sdk/models.py +22 -0
- hiddenlayer/sdk/rest/__init__.py +49 -0
- hiddenlayer/sdk/rest/api/__init__.py +7 -0
- hiddenlayer/sdk/rest/api/aidr_predictive_api.py +308 -0
- hiddenlayer/sdk/rest/api/model_scan_api.py +591 -0
- hiddenlayer/sdk/rest/api/sensor_api.py +1966 -0
- hiddenlayer/sdk/rest/api_client.py +770 -0
- hiddenlayer/sdk/rest/api_response.py +21 -0
- hiddenlayer/sdk/rest/configuration.py +445 -0
- hiddenlayer/sdk/rest/exceptions.py +199 -0
- hiddenlayer/sdk/rest/models/__init__.py +30 -0
- hiddenlayer/sdk/rest/models/create_sensor_request.py +95 -0
- hiddenlayer/sdk/rest/models/file_info.py +110 -0
- hiddenlayer/sdk/rest/models/get_multipart_upload_response.py +97 -0
- hiddenlayer/sdk/rest/models/model.py +100 -0
- hiddenlayer/sdk/rest/models/model_query_response.py +101 -0
- hiddenlayer/sdk/rest/models/multipart_upload_part.py +93 -0
- hiddenlayer/sdk/rest/models/scan_model_request.py +87 -0
- hiddenlayer/sdk/rest/models/scan_results_v2.py +108 -0
- hiddenlayer/sdk/rest/models/sensor_sor_query_filter.py +108 -0
- hiddenlayer/sdk/rest/models/sensor_sor_query_request.py +109 -0
- hiddenlayer/sdk/rest/models/submission_response.py +95 -0
- hiddenlayer/sdk/rest/models/submission_v2.py +109 -0
- hiddenlayer/sdk/rest/models/validation_error_model.py +99 -0
- hiddenlayer/sdk/rest/models/validation_error_model_loc_inner.py +138 -0
- hiddenlayer/sdk/rest/rest.py +257 -0
- hiddenlayer/sdk/services/__init__.py +0 -0
- hiddenlayer/sdk/services/aidr_predictive.py +76 -0
- hiddenlayer/sdk/services/model.py +101 -0
- hiddenlayer/sdk/services/model_scan.py +414 -0
- hiddenlayer/sdk/utils.py +92 -0
- hiddenlayer/sdk/version.py +1 -0
- hiddenlayer_sdk-0.1.0.dist-info/LICENSE +201 -0
- hiddenlayer_sdk-0.1.0.dist-info/METADATA +320 -0
- hiddenlayer_sdk-0.1.0.dist-info/RECORD +43 -0
- hiddenlayer_sdk-0.1.0.dist-info/WHEEL +5 -0
- hiddenlayer_sdk-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,414 @@
|
|
1
|
+
import os
|
2
|
+
import random
|
3
|
+
import time
|
4
|
+
import warnings
|
5
|
+
from datetime import datetime
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import List, Optional, Union
|
8
|
+
from uuid import uuid4
|
9
|
+
|
10
|
+
from hiddenlayer.sdk.constants import ScanStatus
|
11
|
+
from hiddenlayer.sdk.enterprise.enterprise_model_scan_api import EnterpriseModelScanApi
|
12
|
+
from hiddenlayer.sdk.models import ScanResults
|
13
|
+
from hiddenlayer.sdk.rest.api import ModelScanApi, SensorApi
|
14
|
+
from hiddenlayer.sdk.rest.api_client import ApiClient
|
15
|
+
from hiddenlayer.sdk.rest.models import MultipartUploadPart
|
16
|
+
from hiddenlayer.sdk.rest.models.model import Model
|
17
|
+
from hiddenlayer.sdk.services.model import ModelAPI
|
18
|
+
from hiddenlayer.sdk.utils import filter_path_objects, is_saas
|
19
|
+
|
20
|
+
EXCLUDE_FILE_TYPES = [
|
21
|
+
"*.txt",
|
22
|
+
"*.md",
|
23
|
+
"*.lock",
|
24
|
+
".gitattributes",
|
25
|
+
".git",
|
26
|
+
".git/*",
|
27
|
+
"*/.git",
|
28
|
+
"**/.git/**",
|
29
|
+
]
|
30
|
+
|
31
|
+
|
32
|
+
class ModelScanAPI:
|
33
|
+
def __init__(self, api_client: ApiClient) -> None:
|
34
|
+
self.is_saas = is_saas(api_client.configuration.host)
|
35
|
+
self._api_client = api_client
|
36
|
+
|
37
|
+
if self.is_saas:
|
38
|
+
self._model_scan_api = ModelScanApi(api_client=api_client)
|
39
|
+
self._model_api = ModelAPI(api_client=api_client)
|
40
|
+
self._sensor_api = SensorApi(
|
41
|
+
api_client=api_client
|
42
|
+
) # lower level api of ModelAPI
|
43
|
+
else:
|
44
|
+
self._model_scan_api = EnterpriseModelScanApi(api_client=api_client)
|
45
|
+
|
46
|
+
def scan_file(
|
47
|
+
self,
|
48
|
+
*,
|
49
|
+
model_name: str,
|
50
|
+
model_path: Union[str, os.PathLike],
|
51
|
+
threads: int = 1,
|
52
|
+
chunk_size: int = 16,
|
53
|
+
wait_for_results: bool = True,
|
54
|
+
) -> ScanResults:
|
55
|
+
"""
|
56
|
+
Scan a local model file using the HiddenLayer Model Scanner.
|
57
|
+
|
58
|
+
:param model_name: Name of the model to be shown on the HiddenLayer UI
|
59
|
+
:param model_path: Local path to the model file.
|
60
|
+
:param threads: Number of threads used to upload the file, defaults to 1.
|
61
|
+
:param chunk_size: Number of chunks of the file to upload at once, defaults to 4.
|
62
|
+
:param wait_for_results: True whether to wait for the scan to finish, defaults to True.
|
63
|
+
|
64
|
+
:returns: Scan Results
|
65
|
+
"""
|
66
|
+
|
67
|
+
warnings.warn(
|
68
|
+
"Use of the threads parameter is deprecated and will be removed in version 0.2.0.",
|
69
|
+
category=DeprecationWarning,
|
70
|
+
stacklevel=2,
|
71
|
+
)
|
72
|
+
|
73
|
+
file_path = Path(model_path)
|
74
|
+
|
75
|
+
# Can combine the 2 paths when SaaS API and Enterprise APIs are in sync
|
76
|
+
if self.is_saas:
|
77
|
+
filesize = file_path.stat().st_size
|
78
|
+
sensor = self._model_api.create(model_name=model_name)
|
79
|
+
upload = self._sensor_api.begin_multipart_upload(filesize, sensor.sensor_id)
|
80
|
+
|
81
|
+
with open(file_path, "rb") as f:
|
82
|
+
for i in range(0, len(upload.parts), chunk_size):
|
83
|
+
group: List[MultipartUploadPart] = upload.parts[i : i + chunk_size]
|
84
|
+
for part in group:
|
85
|
+
read_amount = part.end_offset - part.start_offset
|
86
|
+
f.seek(int(part.start_offset))
|
87
|
+
part_data = f.read(int(read_amount))
|
88
|
+
|
89
|
+
# The SaaS multipart upload returns a upload url for each part
|
90
|
+
# So there is no specified route
|
91
|
+
self._api_client.call_api(
|
92
|
+
"PUT",
|
93
|
+
part.upload_url,
|
94
|
+
body=part_data,
|
95
|
+
header_params={"Content-Type": "application/octet-binary"},
|
96
|
+
)
|
97
|
+
|
98
|
+
self._sensor_api.complete_multipart_upload(
|
99
|
+
sensor.sensor_id, upload.upload_id
|
100
|
+
)
|
101
|
+
|
102
|
+
self._model_scan_api.scan_model(sensor.sensor_id)
|
103
|
+
else:
|
104
|
+
with open(file_path, "rb") as f:
|
105
|
+
data = f.read()
|
106
|
+
|
107
|
+
sensor = Model(
|
108
|
+
sensor_id=str(uuid4()),
|
109
|
+
created_at=datetime.now(),
|
110
|
+
tenant_id="0000",
|
111
|
+
plaintext_name=model_name,
|
112
|
+
active=True,
|
113
|
+
version=1,
|
114
|
+
)
|
115
|
+
|
116
|
+
self._model_scan_api: EnterpriseModelScanApi
|
117
|
+
self._model_scan_api.scan_model(sensor.sensor_id, data)
|
118
|
+
model_name = sensor.sensor_id
|
119
|
+
|
120
|
+
scan_results = self.get_scan_results(model_name=model_name)
|
121
|
+
|
122
|
+
base_delay = 0.1 # seconds
|
123
|
+
retries = 0
|
124
|
+
if wait_for_results:
|
125
|
+
print(f"{file_path.name} scan status: {scan_results.status}")
|
126
|
+
while scan_results.status not in [ScanStatus.DONE, ScanStatus.FAILED]:
|
127
|
+
retries += 1
|
128
|
+
delay = base_delay * 2**retries + random.uniform(
|
129
|
+
0, 1
|
130
|
+
) # exponential back off retry
|
131
|
+
time.sleep(delay)
|
132
|
+
scan_results = self.get_scan_results(model_name=model_name)
|
133
|
+
print(f"{file_path.name} scan status: {scan_results.status}")
|
134
|
+
|
135
|
+
scan_results = ScanResults.from_scanresultsv2(scan_results_v2=scan_results)
|
136
|
+
scan_results.file_name = file_path.name
|
137
|
+
scan_results.file_path = str(file_path)
|
138
|
+
scan_results.sensor_id = sensor.sensor_id
|
139
|
+
|
140
|
+
return scan_results
|
141
|
+
|
142
|
+
def scan_s3_model(
|
143
|
+
self,
|
144
|
+
*,
|
145
|
+
model_name: str,
|
146
|
+
bucket: str,
|
147
|
+
key: str,
|
148
|
+
s3_client: Optional[object] = None,
|
149
|
+
threads: int = 1,
|
150
|
+
chunk_size: int = 4,
|
151
|
+
wait_for_results: bool = True,
|
152
|
+
) -> ScanResults:
|
153
|
+
"""
|
154
|
+
Scan a model file on S3.
|
155
|
+
|
156
|
+
:param model_name: Name of the model to be shown on the HiddenLayer UI.
|
157
|
+
:param bucket: Name of the s3 bucket where the model file is stored.
|
158
|
+
:param key: Path to the model file on s3.
|
159
|
+
:param wait_for_results: True whether to wait for the scan to finish, defaults to True.
|
160
|
+
:param s3_client: boto3 s3 client.
|
161
|
+
:param threads: Number of threads used to upload the file, defaults to 1.
|
162
|
+
:param chunk_size: Number of chunks of the file to upload at once, defaults to 4.
|
163
|
+
:param wait_for_results: True whether to wait for the scan to finish, defaults to True.
|
164
|
+
|
165
|
+
:returns: Scan Results
|
166
|
+
|
167
|
+
:examples:
|
168
|
+
.. code-block:: python
|
169
|
+
|
170
|
+
hl_client.model_scanner.scan_s3_model(
|
171
|
+
model_name="your-model-name",
|
172
|
+
bucket="s3_bucket",
|
173
|
+
key="path/to/file"
|
174
|
+
)
|
175
|
+
"""
|
176
|
+
try:
|
177
|
+
import boto3
|
178
|
+
except ImportError:
|
179
|
+
raise ImportError("Python package boto3 is not installed.")
|
180
|
+
|
181
|
+
if not s3_client:
|
182
|
+
s3_client = boto3.client("s3")
|
183
|
+
|
184
|
+
file_name = key.split("/")[-1]
|
185
|
+
|
186
|
+
try:
|
187
|
+
s3_client.download_file(bucket, key, f"/tmp/{file_name}")
|
188
|
+
except Exception as e:
|
189
|
+
raise RuntimeError(f"Couldn't download model s3://{bucket}/{key}: {e}")
|
190
|
+
|
191
|
+
return self.scan_file(
|
192
|
+
model_path=f"/tmp/{file_name}",
|
193
|
+
model_name=model_name,
|
194
|
+
threads=threads,
|
195
|
+
chunk_size=chunk_size,
|
196
|
+
wait_for_results=wait_for_results,
|
197
|
+
)
|
198
|
+
|
199
|
+
def scan_azure_blob_model(
|
200
|
+
self,
|
201
|
+
*,
|
202
|
+
model_name: str,
|
203
|
+
account_url: str,
|
204
|
+
container: str,
|
205
|
+
blob: str,
|
206
|
+
blob_service_client: Optional[object] = None,
|
207
|
+
credential: Optional[object] = None,
|
208
|
+
threads: int = 1,
|
209
|
+
chunk_size: int = 4,
|
210
|
+
wait_for_results: bool = True,
|
211
|
+
) -> ScanResults:
|
212
|
+
"""
|
213
|
+
Scan a model file on Azure Blob Storage.
|
214
|
+
|
215
|
+
:param model_name: Name of the model to be shown on the HiddenLayer UI.
|
216
|
+
:param account_url: Azure Blob url of where the file is stored.
|
217
|
+
:param container: Azure Blob container containing the model file.
|
218
|
+
:param blob: Path to the model file inside the Azure blob container.
|
219
|
+
:param blob_service_client: BlobServiceClient object. Defaults to creating one using DefaultCredential().
|
220
|
+
:param credential: Credential to be passed to the BlobServiceClient object, can be a credential object, SAS key, etc.
|
221
|
+
Defaults to `DefaultCredential`
|
222
|
+
:param threads: Number of threads used to upload the file, defaults to 1.
|
223
|
+
:param chunk_size: Number of chunks of the file to upload at once, defaults to 4.
|
224
|
+
:param wait_for_results: True whether to wait for the scan to finish, defaults to True.
|
225
|
+
|
226
|
+
:returns: Scan Results
|
227
|
+
|
228
|
+
:examples:
|
229
|
+
.. code-block:: python
|
230
|
+
|
231
|
+
hl_client.model_scanner.scan_azure_blob_model(
|
232
|
+
model_name="your-model-name",
|
233
|
+
account_url="https://<storageaccountname>.blob.core.windows.net",
|
234
|
+
container="container_name",
|
235
|
+
blob="path/to/file.bin",
|
236
|
+
credential="?<sas_key>" # If using a SAS key and not DefaultCredentials
|
237
|
+
)
|
238
|
+
"""
|
239
|
+
try:
|
240
|
+
from azure.identity import DefaultAzureCredential
|
241
|
+
except ImportError:
|
242
|
+
raise ImportError("Python package azure-identity is not installed.")
|
243
|
+
|
244
|
+
try:
|
245
|
+
from azure.storage.blob import BlobServiceClient
|
246
|
+
except ImportError:
|
247
|
+
raise ImportError("Python package azure-storage-blob is not installed.")
|
248
|
+
|
249
|
+
if not credential:
|
250
|
+
credential = DefaultAzureCredential()
|
251
|
+
|
252
|
+
if not blob_service_client:
|
253
|
+
blob_service_client = BlobServiceClient(account_url, credential=credential)
|
254
|
+
|
255
|
+
file_name = blob.split("/")[-1]
|
256
|
+
|
257
|
+
blob_client = blob_service_client.get_blob_client(
|
258
|
+
container=container, blob=blob
|
259
|
+
)
|
260
|
+
|
261
|
+
try:
|
262
|
+
with open(os.path.join("/tmp", file_name), "wb") as f:
|
263
|
+
download_stream = blob_client.download_blob()
|
264
|
+
f.write(download_stream.readall())
|
265
|
+
|
266
|
+
except Exception as e:
|
267
|
+
raise RuntimeError(
|
268
|
+
f"Couldn't download model {account_url}, {container}, {blob}: {e}"
|
269
|
+
)
|
270
|
+
|
271
|
+
return self.scan_file(
|
272
|
+
model_path=f"/tmp/{file_name}",
|
273
|
+
model_name=model_name,
|
274
|
+
threads=threads,
|
275
|
+
chunk_size=chunk_size,
|
276
|
+
wait_for_results=wait_for_results,
|
277
|
+
)
|
278
|
+
|
279
|
+
def scan_huggingface_model(
|
280
|
+
self,
|
281
|
+
*,
|
282
|
+
repo_id: str,
|
283
|
+
# model_id: str,
|
284
|
+
# HF parameters
|
285
|
+
revision: Optional[str] = None,
|
286
|
+
local_dir: str = "/tmp",
|
287
|
+
allow_file_patterns: Optional[List[str]] = None,
|
288
|
+
ignore_file_patterns: Optional[List[str]] = None,
|
289
|
+
force_download: bool = False,
|
290
|
+
hf_token: Optional[Union[str, bool]] = None,
|
291
|
+
# HL parameters
|
292
|
+
threads: int = 1,
|
293
|
+
chunk_size: int = 4,
|
294
|
+
wait_for_results: bool = True,
|
295
|
+
) -> List[ScanResults]:
|
296
|
+
"""
|
297
|
+
Scans a model on HuggingFace.
|
298
|
+
|
299
|
+
Note: Requires the `huggingface_hub` pip package to be installed.
|
300
|
+
|
301
|
+
:param revision: An optional Git revision id which can be a branch name, a tag, or a commit hash.
|
302
|
+
:param local_dir: If provided, the downloaded files will be placed under this directory.
|
303
|
+
:param allow_file_patterns: If provided, only files matching at least one pattern are scanned.
|
304
|
+
:param ignore_file_patterns: If provided, files matching any of the patterns are not scanned.
|
305
|
+
:param force_download: Whether the file should be downloaded even if it already exists in the local cache.
|
306
|
+
:param hf_token: A token to be used for the download.
|
307
|
+
If True, the token is read from the HuggingFace config folder.
|
308
|
+
If a string, it’s used as the authentication token.
|
309
|
+
:param threads: Number of threads used to upload the file, defaults to 1.
|
310
|
+
:param chunk_size: Number of chunks of the file to upload at once, defaults to 4.
|
311
|
+
:param wait_for_results: True whether to wait for the scan to finish, defaults to True.
|
312
|
+
|
313
|
+
:returns: List of ScanResults
|
314
|
+
"""
|
315
|
+
try:
|
316
|
+
from huggingface_hub import snapshot_download
|
317
|
+
except ImportError:
|
318
|
+
raise ImportError("Python package huggingface_hub is not installed.")
|
319
|
+
|
320
|
+
local_dir = f"/tmp/{repo_id}" if local_dir == "/tmp" else local_dir
|
321
|
+
ignore_file_patterns = (
|
322
|
+
EXCLUDE_FILE_TYPES + ignore_file_patterns
|
323
|
+
if ignore_file_patterns
|
324
|
+
else EXCLUDE_FILE_TYPES
|
325
|
+
)
|
326
|
+
|
327
|
+
snapshot_download(
|
328
|
+
repo_id,
|
329
|
+
revision=revision,
|
330
|
+
allow_patterns=allow_file_patterns,
|
331
|
+
ignore_patterns=ignore_file_patterns,
|
332
|
+
local_dir=local_dir,
|
333
|
+
local_dir_use_symlinks=False,
|
334
|
+
cache_dir=local_dir,
|
335
|
+
force_download=force_download,
|
336
|
+
token=hf_token,
|
337
|
+
)
|
338
|
+
|
339
|
+
return self.scan_folder(
|
340
|
+
path=local_dir,
|
341
|
+
allow_file_patterns=allow_file_patterns,
|
342
|
+
ignore_file_patterns=ignore_file_patterns,
|
343
|
+
threads=threads,
|
344
|
+
chunk_size=chunk_size,
|
345
|
+
wait_for_results=wait_for_results,
|
346
|
+
)
|
347
|
+
|
348
|
+
def get_scan_results(self, *, model_name: str) -> ScanResults:
|
349
|
+
"""
|
350
|
+
Get results from a model scan.
|
351
|
+
|
352
|
+
:param model_name: Name of the model.
|
353
|
+
|
354
|
+
:returns: Scan results.
|
355
|
+
"""
|
356
|
+
|
357
|
+
if self.is_saas:
|
358
|
+
model = self._model_api.get(model_name=model_name)
|
359
|
+
sensor_id = model.sensor_id
|
360
|
+
else:
|
361
|
+
sensor_id = model_name
|
362
|
+
|
363
|
+
scan_results_v2 = self._model_scan_api.scan_status(sensor_id)
|
364
|
+
|
365
|
+
return ScanResults.from_scanresultsv2(
|
366
|
+
scan_results_v2=scan_results_v2, sensor_id=sensor_id
|
367
|
+
)
|
368
|
+
|
369
|
+
def scan_folder(
|
370
|
+
self,
|
371
|
+
*,
|
372
|
+
path: Union[str, os.PathLike],
|
373
|
+
allow_file_patterns: Optional[List[str]] = None,
|
374
|
+
ignore_file_patterns: Optional[List[str]] = None,
|
375
|
+
threads: int = 1,
|
376
|
+
chunk_size: int = 4,
|
377
|
+
wait_for_results: bool = True,
|
378
|
+
) -> List[ScanResults]:
|
379
|
+
"""
|
380
|
+
Submits all files in a directory and its sub directories to be scanned.
|
381
|
+
|
382
|
+
:param path: Path to the folder on disk to be scanned.
|
383
|
+
:param allow_file_patterns: If provided, only files matching at least one pattern are scanned.
|
384
|
+
:param ignore_file_patterns: If provided, files matching any of the patterns are not scanned.
|
385
|
+
:param threads: Number of threads used to upload the file, defaults to 1.
|
386
|
+
:param chunk_size: Number of chunks of the file to upload at once, defaults to 4.
|
387
|
+
:param wait_for_results: True whether to wait for the scan to finish, defaults to True.
|
388
|
+
|
389
|
+
:returns: List of ScanResults
|
390
|
+
"""
|
391
|
+
|
392
|
+
model_path = Path(path)
|
393
|
+
ignore_file_patterns = (
|
394
|
+
EXCLUDE_FILE_TYPES + ignore_file_patterns
|
395
|
+
if ignore_file_patterns
|
396
|
+
else EXCLUDE_FILE_TYPES
|
397
|
+
)
|
398
|
+
|
399
|
+
files = filter_path_objects(
|
400
|
+
model_path.rglob("*"),
|
401
|
+
allow_patterns=allow_file_patterns,
|
402
|
+
ignore_patterns=ignore_file_patterns,
|
403
|
+
)
|
404
|
+
|
405
|
+
return [
|
406
|
+
self.scan_file(
|
407
|
+
model_name=str(file),
|
408
|
+
model_path=file,
|
409
|
+
threads=threads,
|
410
|
+
chunk_size=chunk_size,
|
411
|
+
wait_for_results=wait_for_results,
|
412
|
+
)
|
413
|
+
for file in files
|
414
|
+
]
|
hiddenlayer/sdk/utils.py
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
import os
|
2
|
+
from fnmatch import fnmatch
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import Generator, List, Optional, Union
|
5
|
+
from urllib.parse import urlparse
|
6
|
+
|
7
|
+
PathInputType = Union[str, os.PathLike]
|
8
|
+
|
9
|
+
|
10
|
+
def filter_path_objects(
|
11
|
+
items: Union[List[PathInputType], Generator[PathInputType, None, None]],
|
12
|
+
*,
|
13
|
+
allow_patterns: Optional[Union[List[str], str]] = None,
|
14
|
+
ignore_patterns: Optional[Union[List[str], str]] = None,
|
15
|
+
) -> Generator[Union[str, os.PathLike], None, None]:
|
16
|
+
"""Filter repo objects based on an allowlist and a denylist.
|
17
|
+
|
18
|
+
Input must be a list of paths (`str` or `Path`) or a list of arbitrary objects.
|
19
|
+
In the later case, `key` must be provided and specifies a function of one argument
|
20
|
+
that is used to extract a path from each element in iterable.
|
21
|
+
|
22
|
+
Patterns are Unix shell-style wildcards which are NOT regular expressions. See
|
23
|
+
https://docs.python.org/3/library/fnmatch.html for more details.
|
24
|
+
|
25
|
+
:param items: List of paths to filter.
|
26
|
+
:param allow_patterns: Patterns constituting the allowlist. If provided, item paths must match at
|
27
|
+
least one pattern from the allowlist.
|
28
|
+
:param ignore_patterns: Patterns constituting the denylist. If provided, item paths must not match
|
29
|
+
any patterns from the denylist.
|
30
|
+
|
31
|
+
:returns: Filtered list of objects, as a generator.
|
32
|
+
|
33
|
+
:raises:
|
34
|
+
:class:`ValueError`:
|
35
|
+
If `key` is not provided and items are not `str` or `Path`.
|
36
|
+
|
37
|
+
Example usage with paths:
|
38
|
+
```python
|
39
|
+
>>> # Filter only PDFs that are not hidden.
|
40
|
+
>>> list(filter_repo_objects(
|
41
|
+
... ["aaa.PDF", "bbb.jpg", ".ccc.pdf", ".ddd.png"],
|
42
|
+
... allow_patterns=["*.pdf"],
|
43
|
+
... ignore_patterns=[".*"],
|
44
|
+
... ))
|
45
|
+
["aaa.pdf"]
|
46
|
+
```
|
47
|
+
"""
|
48
|
+
if isinstance(allow_patterns, str):
|
49
|
+
allow_patterns = [allow_patterns]
|
50
|
+
|
51
|
+
if isinstance(ignore_patterns, str):
|
52
|
+
ignore_patterns = [ignore_patterns]
|
53
|
+
|
54
|
+
def _identity(item: Union[str, os.PathLike]) -> Path:
|
55
|
+
if isinstance(item, str):
|
56
|
+
return Path(item)
|
57
|
+
if isinstance(item, Path):
|
58
|
+
return item
|
59
|
+
raise ValueError("Objects must be string or Pathlike.")
|
60
|
+
|
61
|
+
key = _identity # Items must be `str` or `Path`, otherwise raise ValueError
|
62
|
+
|
63
|
+
for item in items:
|
64
|
+
path: Path = key(item)
|
65
|
+
|
66
|
+
if path.is_dir():
|
67
|
+
continue
|
68
|
+
|
69
|
+
# Skip if there's an allowlist and path doesn't match any
|
70
|
+
if allow_patterns is not None and not any(
|
71
|
+
fnmatch(str(path), r) for r in allow_patterns
|
72
|
+
):
|
73
|
+
continue
|
74
|
+
|
75
|
+
# Skip if there's a denylist and path matches any
|
76
|
+
if ignore_patterns is not None and any(
|
77
|
+
fnmatch(str(path), r) for r in ignore_patterns
|
78
|
+
):
|
79
|
+
continue
|
80
|
+
|
81
|
+
yield item
|
82
|
+
|
83
|
+
|
84
|
+
def is_saas(host: str) -> bool:
|
85
|
+
"""Checks whether the connection is to the SaaS platform"""
|
86
|
+
|
87
|
+
o = urlparse(host)
|
88
|
+
|
89
|
+
if o.hostname and o.hostname.endswith("hiddenlayer.ai"):
|
90
|
+
return True
|
91
|
+
|
92
|
+
return False
|
@@ -0,0 +1 @@
|
|
1
|
+
VERSION = "0.1.0"
|