clarifai 11.6.4__py3-none-any.whl → 11.6.4rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clarifai/__init__.py +1 -1
- clarifai/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/__pycache__/errors.cpython-311.pyc +0 -0
- clarifai/__pycache__/errors.cpython-39.pyc +0 -0
- clarifai/__pycache__/versions.cpython-311.pyc +0 -0
- clarifai/__pycache__/versions.cpython-39.pyc +0 -0
- clarifai/cli/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/cli/__pycache__/base.cpython-39.pyc +0 -0
- clarifai/cli/__pycache__/compute_cluster.cpython-39.pyc +0 -0
- clarifai/cli/__pycache__/deployment.cpython-39.pyc +0 -0
- clarifai/cli/__pycache__/model.cpython-39.pyc +0 -0
- clarifai/cli/__pycache__/nodepool.cpython-39.pyc +0 -0
- clarifai/cli/base.py +11 -27
- clarifai/cli/model.py +171 -41
- clarifai/cli/model_templates.py +243 -0
- clarifai/cli/pipeline_step_templates.py +64 -0
- clarifai/client/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/app.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/app.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/base.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/base.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/compute_cluster.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/dataset.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/dataset.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/deployment.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/deployment.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/input.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/input.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/lister.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/lister.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/model.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/model.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/model_client.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/model_client.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/module.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/nodepool.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/runner.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/search.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/user.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/workflow.cpython-311.pyc +0 -0
- clarifai/client/auth/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/client/auth/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/client/auth/__pycache__/helper.cpython-311.pyc +0 -0
- clarifai/client/auth/__pycache__/helper.cpython-39.pyc +0 -0
- clarifai/client/auth/__pycache__/register.cpython-311.pyc +0 -0
- clarifai/client/auth/__pycache__/register.cpython-39.pyc +0 -0
- clarifai/client/auth/__pycache__/stub.cpython-311.pyc +0 -0
- clarifai/client/auth/__pycache__/stub.cpython-39.pyc +0 -0
- clarifai/constants/__pycache__/base.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/base.cpython-39.pyc +0 -0
- clarifai/constants/__pycache__/dataset.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/dataset.cpython-39.pyc +0 -0
- clarifai/constants/__pycache__/input.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/input.cpython-39.pyc +0 -0
- clarifai/constants/__pycache__/model.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/model.cpython-39.pyc +0 -0
- clarifai/constants/__pycache__/search.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/workflow.cpython-311.pyc +0 -0
- clarifai/datasets/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/datasets/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/datasets/export/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/datasets/export/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/datasets/export/__pycache__/inputs_annotations.cpython-311.pyc +0 -0
- clarifai/datasets/export/__pycache__/inputs_annotations.cpython-39.pyc +0 -0
- clarifai/datasets/upload/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/datasets/upload/__pycache__/base.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/base.cpython-39.pyc +0 -0
- clarifai/datasets/upload/__pycache__/features.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/features.cpython-39.pyc +0 -0
- clarifai/datasets/upload/__pycache__/image.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/image.cpython-39.pyc +0 -0
- clarifai/datasets/upload/__pycache__/multimodal.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/multimodal.cpython-39.pyc +0 -0
- clarifai/datasets/upload/__pycache__/text.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/text.cpython-39.pyc +0 -0
- clarifai/datasets/upload/__pycache__/utils.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/utils.cpython-39.pyc +0 -0
- clarifai/models/model_serving/README.md +158 -0
- clarifai/models/model_serving/__init__.py +14 -0
- clarifai/models/model_serving/cli/__init__.py +12 -0
- clarifai/models/model_serving/cli/_utils.py +53 -0
- clarifai/models/model_serving/cli/base.py +14 -0
- clarifai/models/model_serving/cli/build.py +79 -0
- clarifai/models/model_serving/cli/clarifai_clis.py +33 -0
- clarifai/models/model_serving/cli/create.py +171 -0
- clarifai/models/model_serving/cli/example_cli.py +34 -0
- clarifai/models/model_serving/cli/login.py +26 -0
- clarifai/models/model_serving/cli/upload.py +179 -0
- clarifai/models/model_serving/constants.py +21 -0
- clarifai/models/model_serving/docs/cli.md +161 -0
- clarifai/models/model_serving/docs/concepts.md +229 -0
- clarifai/models/model_serving/docs/dependencies.md +11 -0
- clarifai/models/model_serving/docs/inference_parameters.md +139 -0
- clarifai/models/model_serving/docs/model_types.md +19 -0
- clarifai/models/model_serving/model_config/__init__.py +16 -0
- clarifai/models/model_serving/model_config/base.py +369 -0
- clarifai/models/model_serving/model_config/config.py +312 -0
- clarifai/models/model_serving/model_config/inference_parameter.py +129 -0
- clarifai/models/model_serving/model_config/model_types_config/multimodal-embedder.yaml +25 -0
- clarifai/models/model_serving/model_config/model_types_config/text-classifier.yaml +19 -0
- clarifai/models/model_serving/model_config/model_types_config/text-embedder.yaml +20 -0
- clarifai/models/model_serving/model_config/model_types_config/text-to-image.yaml +19 -0
- clarifai/models/model_serving/model_config/model_types_config/text-to-text.yaml +19 -0
- clarifai/models/model_serving/model_config/model_types_config/visual-classifier.yaml +22 -0
- clarifai/models/model_serving/model_config/model_types_config/visual-detector.yaml +32 -0
- clarifai/models/model_serving/model_config/model_types_config/visual-embedder.yaml +19 -0
- clarifai/models/model_serving/model_config/model_types_config/visual-segmenter.yaml +19 -0
- clarifai/models/model_serving/model_config/output.py +133 -0
- clarifai/models/model_serving/model_config/triton/__init__.py +14 -0
- clarifai/models/model_serving/model_config/triton/serializer.py +136 -0
- clarifai/models/model_serving/model_config/triton/triton_config.py +182 -0
- clarifai/models/model_serving/model_config/triton/wrappers.py +281 -0
- clarifai/models/model_serving/repo_build/__init__.py +14 -0
- clarifai/models/model_serving/repo_build/build.py +198 -0
- clarifai/models/model_serving/repo_build/static_files/_requirements.txt +2 -0
- clarifai/models/model_serving/repo_build/static_files/base_test.py +169 -0
- clarifai/models/model_serving/repo_build/static_files/inference.py +26 -0
- clarifai/models/model_serving/repo_build/static_files/sample_clarifai_config.yaml +25 -0
- clarifai/models/model_serving/repo_build/static_files/test.py +40 -0
- clarifai/models/model_serving/repo_build/static_files/triton/model.py +75 -0
- clarifai/models/model_serving/utils.py +23 -0
- clarifai/runners/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/runners/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/runners/models/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/runners/models/__pycache__/mcp_class.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/model_builder.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/model_builder.cpython-39.pyc +0 -0
- clarifai/runners/models/__pycache__/model_class.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/model_runner.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/openai_class.cpython-311.pyc +0 -0
- clarifai/runners/models/base_typed_model.py +238 -0
- clarifai/runners/models/model_upload.py +607 -0
- clarifai/runners/server.py +1 -0
- clarifai/runners/utils/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/code_script.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/code_script.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/const.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_utils.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_utils.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/loader.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/method_signatures.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/model_utils.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/openai_convertor.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/serializers.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/url_fetcher.cpython-311.pyc +0 -0
- clarifai/runners/utils/code_script.py +52 -46
- clarifai/runners/utils/data_handler.py +231 -0
- clarifai/runners/utils/data_types/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/runners/utils/data_types/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/runners/utils/data_types/__pycache__/data_types.cpython-311.pyc +0 -0
- clarifai/runners/utils/data_types/__pycache__/data_types.cpython-39.pyc +0 -0
- clarifai/runners/utils/data_types.py +471 -0
- clarifai/runners/utils/temp.py +59 -0
- clarifai/schema/__pycache__/search.cpython-311.pyc +0 -0
- clarifai/urls/__pycache__/helper.cpython-311.pyc +0 -0
- clarifai/urls/__pycache__/helper.cpython-39.pyc +0 -0
- clarifai/utils/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/utils/__pycache__/cli.cpython-39.pyc +0 -0
- clarifai/utils/__pycache__/config.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/config.cpython-39.pyc +0 -0
- clarifai/utils/__pycache__/constants.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/constants.cpython-39.pyc +0 -0
- clarifai/utils/__pycache__/logging.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/logging.cpython-39.pyc +0 -0
- clarifai/utils/__pycache__/misc.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/misc.cpython-39.pyc +0 -0
- clarifai/utils/__pycache__/model_train.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/protobuf.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/protobuf.cpython-39.pyc +0 -0
- clarifai/utils/cli.py +14 -15
- clarifai/utils/constants.py +2 -0
- clarifai/utils/misc.py +382 -1
- clarifai/workflows/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/workflows/__pycache__/export.cpython-311.pyc +0 -0
- clarifai/workflows/__pycache__/utils.cpython-311.pyc +0 -0
- clarifai/workflows/__pycache__/validate.cpython-311.pyc +0 -0
- {clarifai-11.6.4.dist-info → clarifai-11.6.4rc2.dist-info}/METADATA +1 -1
- clarifai-11.6.4rc2.dist-info/RECORD +301 -0
- {clarifai-11.6.4.dist-info → clarifai-11.6.4rc2.dist-info}/WHEEL +1 -1
- clarifai-11.6.4.dist-info/RECORD +0 -127
- {clarifai-11.6.4.dist-info → clarifai-11.6.4rc2.dist-info}/entry_points.txt +0 -0
- {clarifai-11.6.4.dist-info → clarifai-11.6.4rc2.dist-info}/licenses/LICENSE +0 -0
- {clarifai-11.6.4.dist-info → clarifai-11.6.4rc2.dist-info}/top_level.txt +0 -0
clarifai/utils/misc.py
CHANGED
@@ -2,11 +2,17 @@ import os
|
|
2
2
|
import re
|
3
3
|
import shutil
|
4
4
|
import subprocess
|
5
|
+
import sys
|
6
|
+
import time
|
5
7
|
import urllib.parse
|
6
8
|
import uuid
|
7
|
-
from typing import Any, Dict, List
|
9
|
+
from typing import Any, Dict, List, Tuple
|
10
|
+
from urllib.parse import urlparse
|
8
11
|
|
12
|
+
import requests
|
9
13
|
from clarifai_grpc.grpc.api.status import status_code_pb2
|
14
|
+
from requests.adapters import HTTPAdapter
|
15
|
+
from urllib3.util.retry import Retry
|
10
16
|
|
11
17
|
from clarifai.errors import UserError
|
12
18
|
from clarifai.utils.constants import HOME_PATH
|
@@ -166,3 +172,378 @@ def clone_github_repo(repo_url, target_dir, github_pat=None, branch=None):
|
|
166
172
|
except subprocess.CalledProcessError as e:
|
167
173
|
logger.error(f"Failed to clone repository: {e.stderr}")
|
168
174
|
return False
|
175
|
+
|
176
|
+
|
177
|
+
class GitHubDownloader:
|
178
|
+
def __init__(
|
179
|
+
self, max_retries: int = 3, backoff_factor: float = 0.3, github_token: str = None
|
180
|
+
):
|
181
|
+
self.session = requests.Session()
|
182
|
+
self.github_token = github_token
|
183
|
+
|
184
|
+
retry_strategy = Retry(
|
185
|
+
total=max_retries,
|
186
|
+
backoff_factor=backoff_factor,
|
187
|
+
status_forcelist=[429, 500, 502, 503, 504],
|
188
|
+
allowed_methods=["HEAD", "GET", "OPTIONS"],
|
189
|
+
)
|
190
|
+
|
191
|
+
adapter = HTTPAdapter(max_retries=retry_strategy)
|
192
|
+
self.session.mount("http://", adapter)
|
193
|
+
self.session.mount("https://", adapter)
|
194
|
+
|
195
|
+
self.session.headers.update({'User-Agent': 'GitHub-Folder-Downloader/1.0'})
|
196
|
+
|
197
|
+
if self.github_token:
|
198
|
+
self.session.headers.update({'Authorization': f'token {self.github_token}'})
|
199
|
+
|
200
|
+
def expected_folder_structure(self) -> List[Dict[str, Any]]:
|
201
|
+
return [
|
202
|
+
{"name": "1", "type": "dir", "children": [{"name": "model.py", "type": "file"}]},
|
203
|
+
{"name": "config.yaml", "type": "file"},
|
204
|
+
{"name": "requirements.txt", "type": "file"},
|
205
|
+
]
|
206
|
+
|
207
|
+
def _format_expected_structure(self):
|
208
|
+
"""Format the expected structure as a nice tree view."""
|
209
|
+
tree_str = ""
|
210
|
+
tree_str += "Expected folder structure:\n"
|
211
|
+
tree_str += "├── 1/\n"
|
212
|
+
tree_str += "│ └── model.py\n"
|
213
|
+
tree_str += "├── requirements.txt\n"
|
214
|
+
tree_str += "└── config.yaml\n"
|
215
|
+
return tree_str
|
216
|
+
|
217
|
+
def parse_github_url(self, url: str) -> Tuple[str, str, str, str]:
|
218
|
+
try:
|
219
|
+
parsed = urlparse(url)
|
220
|
+
|
221
|
+
if parsed.netloc not in ['github.com', 'www.github.com']:
|
222
|
+
raise ValueError("URL must be a GitHub repository URL")
|
223
|
+
|
224
|
+
path_parts = [p for p in parsed.path.strip('/').split('/') if p]
|
225
|
+
|
226
|
+
if len(path_parts) < 2:
|
227
|
+
raise ValueError("Invalid GitHub repository URL format")
|
228
|
+
|
229
|
+
owner = path_parts[0]
|
230
|
+
repo = path_parts[1]
|
231
|
+
|
232
|
+
if len(path_parts) >= 4 and path_parts[2] in ['tree', 'blob']:
|
233
|
+
branch = path_parts[3]
|
234
|
+
folder_path = '/'.join(path_parts[4:]) if len(path_parts) > 4 else ''
|
235
|
+
elif len(path_parts) >= 3:
|
236
|
+
branch = path_parts[2]
|
237
|
+
folder_path = '/'.join(path_parts[3:]) if len(path_parts) > 3 else ''
|
238
|
+
else:
|
239
|
+
branch = 'main'
|
240
|
+
folder_path = ''
|
241
|
+
|
242
|
+
return owner, repo, branch, folder_path
|
243
|
+
|
244
|
+
except Exception as e:
|
245
|
+
logger.error(f"Failed to parse GitHub URL: {e}")
|
246
|
+
sys.exit(1)
|
247
|
+
|
248
|
+
def get_folder_contents(self, owner: str, repo: str, path: str, branch: str = 'main') -> list:
|
249
|
+
api_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}"
|
250
|
+
params = {'ref': branch} if branch else {}
|
251
|
+
|
252
|
+
try:
|
253
|
+
response = self.session.get(api_url, params=params, timeout=30)
|
254
|
+
response.raise_for_status()
|
255
|
+
return response.json()
|
256
|
+
except requests.exceptions.Timeout:
|
257
|
+
raise requests.RequestException("Request timed out. Please try again.")
|
258
|
+
except requests.exceptions.ConnectionError:
|
259
|
+
raise requests.RequestException(
|
260
|
+
"Connection error. Please check your internet connection."
|
261
|
+
)
|
262
|
+
except requests.exceptions.HTTPError as e:
|
263
|
+
if e.response.status_code == 404:
|
264
|
+
token_msg = (
|
265
|
+
""
|
266
|
+
if self.github_token
|
267
|
+
else " For private repositories, use the github_token parameter."
|
268
|
+
)
|
269
|
+
raise requests.RequestException(
|
270
|
+
f"Folder not found: {path}. Check if path exists or if the repository is private.{token_msg}"
|
271
|
+
)
|
272
|
+
elif e.response.status_code == 401 or e.response.status_code == 403:
|
273
|
+
token_msg = (
|
274
|
+
" The provided GitHub token may be invalid or have insufficient permissions."
|
275
|
+
if self.github_token
|
276
|
+
else " For private repositories, use the github_token parameter."
|
277
|
+
)
|
278
|
+
raise requests.RequestException(f"Authentication error: {e}.{token_msg}")
|
279
|
+
else:
|
280
|
+
raise requests.RequestException(f"API request failed: {e}")
|
281
|
+
except requests.exceptions.RequestException as e:
|
282
|
+
token_msg = (
|
283
|
+
""
|
284
|
+
if self.github_token
|
285
|
+
else " For private repositories, use the github_token parameter."
|
286
|
+
)
|
287
|
+
raise requests.RequestException(f"API request failed: {e}.{token_msg}")
|
288
|
+
|
289
|
+
def validate_remote_structure(
|
290
|
+
self,
|
291
|
+
owner: str,
|
292
|
+
repo: str,
|
293
|
+
path: str,
|
294
|
+
branch: str,
|
295
|
+
expected_structure: List[Dict[str, Any]],
|
296
|
+
) -> Dict[str, Any]:
|
297
|
+
validation_result = {
|
298
|
+
'valid': True,
|
299
|
+
'missing_files': [],
|
300
|
+
'missing_dirs': [],
|
301
|
+
'warnings': [],
|
302
|
+
'remote_contents': [],
|
303
|
+
}
|
304
|
+
|
305
|
+
try:
|
306
|
+
remote_contents = self.get_folder_contents(owner, repo, path, branch)
|
307
|
+
validation_result['remote_contents'] = remote_contents
|
308
|
+
|
309
|
+
remote_items = {item['name']: item['type'] for item in remote_contents}
|
310
|
+
|
311
|
+
for item in expected_structure:
|
312
|
+
item_name = item['name']
|
313
|
+
item_type = item.get('type', 'file')
|
314
|
+
|
315
|
+
if item_name not in remote_items:
|
316
|
+
if item_type == 'file':
|
317
|
+
validation_result['missing_files'].append(item_name)
|
318
|
+
else:
|
319
|
+
validation_result['missing_dirs'].append(item_name)
|
320
|
+
validation_result['valid'] = False
|
321
|
+
elif remote_items[item_name] != item_type:
|
322
|
+
validation_result['warnings'].append(
|
323
|
+
f"Item '{item_name}' exists but is a {remote_items[item_name]} instead of {item_type}"
|
324
|
+
)
|
325
|
+
validation_result['valid'] = False
|
326
|
+
|
327
|
+
expected_names = {item['name'] for item in expected_structure}
|
328
|
+
unexpected_items = [name for name in remote_items.keys() if name not in expected_names]
|
329
|
+
if unexpected_items:
|
330
|
+
validation_result['warnings'].append(
|
331
|
+
f"Unexpected items found: {', '.join(unexpected_items)}"
|
332
|
+
)
|
333
|
+
|
334
|
+
except requests.RequestException as e:
|
335
|
+
validation_result['valid'] = False
|
336
|
+
validation_result['warnings'].append(f"Failed to access remote repository: {e}")
|
337
|
+
|
338
|
+
return validation_result
|
339
|
+
|
340
|
+
def download_file(self, download_url: str, local_path: str) -> None:
|
341
|
+
try:
|
342
|
+
response = self.session.get(download_url, stream=True, timeout=60)
|
343
|
+
response.raise_for_status()
|
344
|
+
|
345
|
+
os.makedirs(os.path.dirname(local_path), exist_ok=True)
|
346
|
+
|
347
|
+
total_size = int(response.headers.get('content-length', 0))
|
348
|
+
downloaded_size = 0
|
349
|
+
|
350
|
+
with open(local_path, 'wb') as f:
|
351
|
+
for chunk in response.iter_content(chunk_size=8192):
|
352
|
+
if chunk:
|
353
|
+
f.write(chunk)
|
354
|
+
downloaded_size += len(chunk)
|
355
|
+
|
356
|
+
if total_size > 0 and total_size > 1024 * 1024:
|
357
|
+
progress = (downloaded_size / total_size) * 100
|
358
|
+
logger.info(
|
359
|
+
f"\rDownloading: {os.path.basename(local_path)} - {progress:.1f}%",
|
360
|
+
end='',
|
361
|
+
flush=True,
|
362
|
+
)
|
363
|
+
|
364
|
+
if total_size > 1024 * 1024:
|
365
|
+
logger.info()
|
366
|
+
|
367
|
+
logger.info(f"Downloaded: {local_path}")
|
368
|
+
|
369
|
+
except requests.exceptions.Timeout:
|
370
|
+
logger.info(f"Timeout downloading {local_path}. Skipping...")
|
371
|
+
except requests.exceptions.ConnectionError:
|
372
|
+
logger.info(f"Connection error downloading {local_path}. Skipping...")
|
373
|
+
except Exception as e:
|
374
|
+
logger.info(f"Failed to download {local_path}: {e}")
|
375
|
+
|
376
|
+
def process_folder(
|
377
|
+
self, owner: str, repo: str, path: str, local_base_path: str, branch: str = 'main'
|
378
|
+
) -> None:
|
379
|
+
try:
|
380
|
+
contents = self.get_folder_contents(owner, repo, path, branch)
|
381
|
+
|
382
|
+
if not contents:
|
383
|
+
logger.info(f"Info: Empty folder - {path}")
|
384
|
+
return
|
385
|
+
|
386
|
+
for item in contents:
|
387
|
+
item_name = item['name']
|
388
|
+
item_path = os.path.join(local_base_path, item_name)
|
389
|
+
|
390
|
+
if item['type'] == 'file':
|
391
|
+
self.download_file(item['download_url'], item_path)
|
392
|
+
|
393
|
+
elif item['type'] == 'dir':
|
394
|
+
os.makedirs(item_path, exist_ok=True)
|
395
|
+
logger.info(f"Created directory: {item_path}")
|
396
|
+
|
397
|
+
new_path = f"{path}/{item_name}" if path else item_name
|
398
|
+
self.process_folder(owner, repo, new_path, item_path, branch)
|
399
|
+
|
400
|
+
except requests.exceptions.RequestException as e:
|
401
|
+
if "Folder not found" in str(e):
|
402
|
+
logger.error(f"Error: Folder not found - {path}")
|
403
|
+
raise
|
404
|
+
else:
|
405
|
+
logger.error(f"Error accessing folder {path}: {e}")
|
406
|
+
raise
|
407
|
+
except Exception as e:
|
408
|
+
logger.error(f"Unexpected error processing folder {path}: {e}")
|
409
|
+
raise
|
410
|
+
|
411
|
+
def validate_folder_structure(
|
412
|
+
self, folder_path: str, expected_structure: List[Dict[str, Any]]
|
413
|
+
) -> Dict[str, Any]:
|
414
|
+
validation_result = {
|
415
|
+
'valid': True,
|
416
|
+
'missing_files': [],
|
417
|
+
'missing_dirs': [],
|
418
|
+
'warnings': [],
|
419
|
+
}
|
420
|
+
|
421
|
+
if not os.path.exists(folder_path):
|
422
|
+
validation_result['valid'] = False
|
423
|
+
validation_result['warnings'].append(f"Folder {folder_path} does not exist")
|
424
|
+
return validation_result
|
425
|
+
|
426
|
+
for item in expected_structure:
|
427
|
+
item_name = item['name']
|
428
|
+
item_type = item.get('type', 'file')
|
429
|
+
item_path = os.path.join(folder_path, item_name)
|
430
|
+
|
431
|
+
if item_type == 'file':
|
432
|
+
if not os.path.isfile(item_path):
|
433
|
+
validation_result['missing_files'].append(item_name)
|
434
|
+
validation_result['valid'] = False
|
435
|
+
elif item_type == 'dir':
|
436
|
+
if not os.path.isdir(item_path):
|
437
|
+
validation_result['missing_dirs'].append(item_name)
|
438
|
+
validation_result['valid'] = False
|
439
|
+
|
440
|
+
return validation_result
|
441
|
+
|
442
|
+
def download_github_folder(
|
443
|
+
self,
|
444
|
+
url: str,
|
445
|
+
output_dir: str,
|
446
|
+
github_token: str = None,
|
447
|
+
validate_structure: bool = False,
|
448
|
+
pre_validate: bool = True,
|
449
|
+
strict_validation: bool = False,
|
450
|
+
) -> None:
|
451
|
+
logger.info(f"Parsing GitHub URL: {url}")
|
452
|
+
|
453
|
+
# Update token if provided as a parameter
|
454
|
+
if github_token:
|
455
|
+
self.github_token = github_token
|
456
|
+
self.session.headers.update({'Authorization': f'token {github_token}'})
|
457
|
+
|
458
|
+
try:
|
459
|
+
owner, repo, branch, folder_path = self.parse_github_url(url)
|
460
|
+
logger.info(f"Repository: {owner}/{repo}")
|
461
|
+
logger.info(f"Branch: {branch}")
|
462
|
+
logger.info(f"Folder: {folder_path or 'root'}")
|
463
|
+
|
464
|
+
expected_structure = self.expected_folder_structure() if pre_validate else None
|
465
|
+
|
466
|
+
if expected_structure:
|
467
|
+
logger.info("\nValidating remote folder structure...")
|
468
|
+
remote_validation = self.validate_remote_structure(
|
469
|
+
owner, repo, folder_path, branch, expected_structure
|
470
|
+
)
|
471
|
+
|
472
|
+
if not remote_validation['valid']:
|
473
|
+
logger.error("Remote structure validation failed!")
|
474
|
+
|
475
|
+
if remote_validation['missing_files']:
|
476
|
+
logger.error(
|
477
|
+
f"Missing files: {', '.join(remote_validation['missing_files'])}"
|
478
|
+
)
|
479
|
+
|
480
|
+
if remote_validation['missing_dirs']:
|
481
|
+
logger.error(
|
482
|
+
f"Missing directories: {', '.join(remote_validation['missing_dirs'])}"
|
483
|
+
)
|
484
|
+
|
485
|
+
if remote_validation['warnings']:
|
486
|
+
for warning in remote_validation['warnings']:
|
487
|
+
logger.error(f"Warning: {warning}")
|
488
|
+
|
489
|
+
# Print the expected structure in a nice format
|
490
|
+
tree_view = self._format_expected_structure()
|
491
|
+
logger.info("\nThe repository must have the following structure:")
|
492
|
+
logger.info(tree_view)
|
493
|
+
|
494
|
+
logger.error(
|
495
|
+
"Download cancelled: Repository structure does not match the expected format."
|
496
|
+
)
|
497
|
+
sys.exit(1) # Exit without proceeding with download
|
498
|
+
else:
|
499
|
+
logger.info("Remote structure validation passed!")
|
500
|
+
|
501
|
+
os.makedirs(output_dir, exist_ok=True)
|
502
|
+
logger.info(f"Created output directory: {output_dir}")
|
503
|
+
|
504
|
+
logger.info("\nStarting download...")
|
505
|
+
start_time = time.time()
|
506
|
+
try:
|
507
|
+
self.process_folder(owner, repo, folder_path, output_dir, branch)
|
508
|
+
|
509
|
+
elapsed_time = time.time() - start_time
|
510
|
+
logger.info(f"\nDownload completed in {elapsed_time:.2f} seconds")
|
511
|
+
logger.info(f"Files saved to: {os.path.abspath(output_dir)}")
|
512
|
+
|
513
|
+
if validate_structure and expected_structure:
|
514
|
+
logger.info("\nValidating downloaded folder structure...")
|
515
|
+
validation_result = self.validate_folder_structure(
|
516
|
+
output_dir, expected_structure
|
517
|
+
)
|
518
|
+
|
519
|
+
if validation_result['valid']:
|
520
|
+
logger.info("Folder structure post validation passed!")
|
521
|
+
else:
|
522
|
+
logger.error("Folder structure validation failed!")
|
523
|
+
|
524
|
+
if validation_result['missing_files']:
|
525
|
+
logger.info(
|
526
|
+
f"Missing files: {', '.join(validation_result['missing_files'])}"
|
527
|
+
)
|
528
|
+
|
529
|
+
if validation_result['missing_dirs']:
|
530
|
+
logger.info(
|
531
|
+
f"Missing directories: {', '.join(validation_result['missing_dirs'])}"
|
532
|
+
)
|
533
|
+
|
534
|
+
if validation_result['warnings']:
|
535
|
+
for warning in validation_result['warnings']:
|
536
|
+
logger.info(f"Warng: {warning}")
|
537
|
+
except requests.RequestException as e:
|
538
|
+
# Critical error - the main folder cannot be processed
|
539
|
+
logger.error(
|
540
|
+
f"\nDownload failed: {e}, \n No files were downloaded to: {os.path.abspath(output_dir)}"
|
541
|
+
)
|
542
|
+
sys.exit(1)
|
543
|
+
|
544
|
+
except ValueError as e:
|
545
|
+
logger.error(f"Error: {e}")
|
546
|
+
sys.exit(1)
|
547
|
+
except Exception as e:
|
548
|
+
logger.error(f"Unexpected error: {e}")
|
549
|
+
sys.exit(1)
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|