clarifai 11.6.4__py3-none-any.whl → 11.6.4rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. clarifai/__init__.py +1 -1
  2. clarifai/__pycache__/__init__.cpython-311.pyc +0 -0
  3. clarifai/__pycache__/__init__.cpython-39.pyc +0 -0
  4. clarifai/__pycache__/errors.cpython-311.pyc +0 -0
  5. clarifai/__pycache__/errors.cpython-39.pyc +0 -0
  6. clarifai/__pycache__/versions.cpython-311.pyc +0 -0
  7. clarifai/__pycache__/versions.cpython-39.pyc +0 -0
  8. clarifai/cli/__pycache__/__init__.cpython-39.pyc +0 -0
  9. clarifai/cli/__pycache__/base.cpython-39.pyc +0 -0
  10. clarifai/cli/__pycache__/compute_cluster.cpython-39.pyc +0 -0
  11. clarifai/cli/__pycache__/deployment.cpython-39.pyc +0 -0
  12. clarifai/cli/__pycache__/model.cpython-39.pyc +0 -0
  13. clarifai/cli/__pycache__/nodepool.cpython-39.pyc +0 -0
  14. clarifai/cli/base.py +11 -27
  15. clarifai/cli/model.py +171 -41
  16. clarifai/cli/model_templates.py +243 -0
  17. clarifai/cli/pipeline_step_templates.py +64 -0
  18. clarifai/client/__pycache__/__init__.cpython-311.pyc +0 -0
  19. clarifai/client/__pycache__/__init__.cpython-39.pyc +0 -0
  20. clarifai/client/__pycache__/app.cpython-311.pyc +0 -0
  21. clarifai/client/__pycache__/app.cpython-39.pyc +0 -0
  22. clarifai/client/__pycache__/base.cpython-311.pyc +0 -0
  23. clarifai/client/__pycache__/base.cpython-39.pyc +0 -0
  24. clarifai/client/__pycache__/compute_cluster.cpython-311.pyc +0 -0
  25. clarifai/client/__pycache__/dataset.cpython-311.pyc +0 -0
  26. clarifai/client/__pycache__/dataset.cpython-39.pyc +0 -0
  27. clarifai/client/__pycache__/deployment.cpython-311.pyc +0 -0
  28. clarifai/client/__pycache__/deployment.cpython-39.pyc +0 -0
  29. clarifai/client/__pycache__/input.cpython-311.pyc +0 -0
  30. clarifai/client/__pycache__/input.cpython-39.pyc +0 -0
  31. clarifai/client/__pycache__/lister.cpython-311.pyc +0 -0
  32. clarifai/client/__pycache__/lister.cpython-39.pyc +0 -0
  33. clarifai/client/__pycache__/model.cpython-311.pyc +0 -0
  34. clarifai/client/__pycache__/model.cpython-39.pyc +0 -0
  35. clarifai/client/__pycache__/model_client.cpython-311.pyc +0 -0
  36. clarifai/client/__pycache__/model_client.cpython-39.pyc +0 -0
  37. clarifai/client/__pycache__/module.cpython-311.pyc +0 -0
  38. clarifai/client/__pycache__/nodepool.cpython-311.pyc +0 -0
  39. clarifai/client/__pycache__/runner.cpython-311.pyc +0 -0
  40. clarifai/client/__pycache__/search.cpython-311.pyc +0 -0
  41. clarifai/client/__pycache__/user.cpython-311.pyc +0 -0
  42. clarifai/client/__pycache__/workflow.cpython-311.pyc +0 -0
  43. clarifai/client/auth/__pycache__/__init__.cpython-311.pyc +0 -0
  44. clarifai/client/auth/__pycache__/__init__.cpython-39.pyc +0 -0
  45. clarifai/client/auth/__pycache__/helper.cpython-311.pyc +0 -0
  46. clarifai/client/auth/__pycache__/helper.cpython-39.pyc +0 -0
  47. clarifai/client/auth/__pycache__/register.cpython-311.pyc +0 -0
  48. clarifai/client/auth/__pycache__/register.cpython-39.pyc +0 -0
  49. clarifai/client/auth/__pycache__/stub.cpython-311.pyc +0 -0
  50. clarifai/client/auth/__pycache__/stub.cpython-39.pyc +0 -0
  51. clarifai/constants/__pycache__/base.cpython-311.pyc +0 -0
  52. clarifai/constants/__pycache__/base.cpython-39.pyc +0 -0
  53. clarifai/constants/__pycache__/dataset.cpython-311.pyc +0 -0
  54. clarifai/constants/__pycache__/dataset.cpython-39.pyc +0 -0
  55. clarifai/constants/__pycache__/input.cpython-311.pyc +0 -0
  56. clarifai/constants/__pycache__/input.cpython-39.pyc +0 -0
  57. clarifai/constants/__pycache__/model.cpython-311.pyc +0 -0
  58. clarifai/constants/__pycache__/model.cpython-39.pyc +0 -0
  59. clarifai/constants/__pycache__/search.cpython-311.pyc +0 -0
  60. clarifai/constants/__pycache__/workflow.cpython-311.pyc +0 -0
  61. clarifai/datasets/__pycache__/__init__.cpython-311.pyc +0 -0
  62. clarifai/datasets/__pycache__/__init__.cpython-39.pyc +0 -0
  63. clarifai/datasets/export/__pycache__/__init__.cpython-311.pyc +0 -0
  64. clarifai/datasets/export/__pycache__/__init__.cpython-39.pyc +0 -0
  65. clarifai/datasets/export/__pycache__/inputs_annotations.cpython-311.pyc +0 -0
  66. clarifai/datasets/export/__pycache__/inputs_annotations.cpython-39.pyc +0 -0
  67. clarifai/datasets/upload/__pycache__/__init__.cpython-311.pyc +0 -0
  68. clarifai/datasets/upload/__pycache__/__init__.cpython-39.pyc +0 -0
  69. clarifai/datasets/upload/__pycache__/base.cpython-311.pyc +0 -0
  70. clarifai/datasets/upload/__pycache__/base.cpython-39.pyc +0 -0
  71. clarifai/datasets/upload/__pycache__/features.cpython-311.pyc +0 -0
  72. clarifai/datasets/upload/__pycache__/features.cpython-39.pyc +0 -0
  73. clarifai/datasets/upload/__pycache__/image.cpython-311.pyc +0 -0
  74. clarifai/datasets/upload/__pycache__/image.cpython-39.pyc +0 -0
  75. clarifai/datasets/upload/__pycache__/multimodal.cpython-311.pyc +0 -0
  76. clarifai/datasets/upload/__pycache__/multimodal.cpython-39.pyc +0 -0
  77. clarifai/datasets/upload/__pycache__/text.cpython-311.pyc +0 -0
  78. clarifai/datasets/upload/__pycache__/text.cpython-39.pyc +0 -0
  79. clarifai/datasets/upload/__pycache__/utils.cpython-311.pyc +0 -0
  80. clarifai/datasets/upload/__pycache__/utils.cpython-39.pyc +0 -0
  81. clarifai/models/model_serving/README.md +158 -0
  82. clarifai/models/model_serving/__init__.py +14 -0
  83. clarifai/models/model_serving/cli/__init__.py +12 -0
  84. clarifai/models/model_serving/cli/_utils.py +53 -0
  85. clarifai/models/model_serving/cli/base.py +14 -0
  86. clarifai/models/model_serving/cli/build.py +79 -0
  87. clarifai/models/model_serving/cli/clarifai_clis.py +33 -0
  88. clarifai/models/model_serving/cli/create.py +171 -0
  89. clarifai/models/model_serving/cli/example_cli.py +34 -0
  90. clarifai/models/model_serving/cli/login.py +26 -0
  91. clarifai/models/model_serving/cli/upload.py +179 -0
  92. clarifai/models/model_serving/constants.py +21 -0
  93. clarifai/models/model_serving/docs/cli.md +161 -0
  94. clarifai/models/model_serving/docs/concepts.md +229 -0
  95. clarifai/models/model_serving/docs/dependencies.md +11 -0
  96. clarifai/models/model_serving/docs/inference_parameters.md +139 -0
  97. clarifai/models/model_serving/docs/model_types.md +19 -0
  98. clarifai/models/model_serving/model_config/__init__.py +16 -0
  99. clarifai/models/model_serving/model_config/base.py +369 -0
  100. clarifai/models/model_serving/model_config/config.py +312 -0
  101. clarifai/models/model_serving/model_config/inference_parameter.py +129 -0
  102. clarifai/models/model_serving/model_config/model_types_config/multimodal-embedder.yaml +25 -0
  103. clarifai/models/model_serving/model_config/model_types_config/text-classifier.yaml +19 -0
  104. clarifai/models/model_serving/model_config/model_types_config/text-embedder.yaml +20 -0
  105. clarifai/models/model_serving/model_config/model_types_config/text-to-image.yaml +19 -0
  106. clarifai/models/model_serving/model_config/model_types_config/text-to-text.yaml +19 -0
  107. clarifai/models/model_serving/model_config/model_types_config/visual-classifier.yaml +22 -0
  108. clarifai/models/model_serving/model_config/model_types_config/visual-detector.yaml +32 -0
  109. clarifai/models/model_serving/model_config/model_types_config/visual-embedder.yaml +19 -0
  110. clarifai/models/model_serving/model_config/model_types_config/visual-segmenter.yaml +19 -0
  111. clarifai/models/model_serving/model_config/output.py +133 -0
  112. clarifai/models/model_serving/model_config/triton/__init__.py +14 -0
  113. clarifai/models/model_serving/model_config/triton/serializer.py +136 -0
  114. clarifai/models/model_serving/model_config/triton/triton_config.py +182 -0
  115. clarifai/models/model_serving/model_config/triton/wrappers.py +281 -0
  116. clarifai/models/model_serving/repo_build/__init__.py +14 -0
  117. clarifai/models/model_serving/repo_build/build.py +198 -0
  118. clarifai/models/model_serving/repo_build/static_files/_requirements.txt +2 -0
  119. clarifai/models/model_serving/repo_build/static_files/base_test.py +169 -0
  120. clarifai/models/model_serving/repo_build/static_files/inference.py +26 -0
  121. clarifai/models/model_serving/repo_build/static_files/sample_clarifai_config.yaml +25 -0
  122. clarifai/models/model_serving/repo_build/static_files/test.py +40 -0
  123. clarifai/models/model_serving/repo_build/static_files/triton/model.py +75 -0
  124. clarifai/models/model_serving/utils.py +23 -0
  125. clarifai/runners/__pycache__/__init__.cpython-311.pyc +0 -0
  126. clarifai/runners/__pycache__/__init__.cpython-39.pyc +0 -0
  127. clarifai/runners/models/__pycache__/__init__.cpython-311.pyc +0 -0
  128. clarifai/runners/models/__pycache__/__init__.cpython-39.pyc +0 -0
  129. clarifai/runners/models/__pycache__/mcp_class.cpython-311.pyc +0 -0
  130. clarifai/runners/models/__pycache__/model_builder.cpython-311.pyc +0 -0
  131. clarifai/runners/models/__pycache__/model_builder.cpython-39.pyc +0 -0
  132. clarifai/runners/models/__pycache__/model_class.cpython-311.pyc +0 -0
  133. clarifai/runners/models/__pycache__/model_runner.cpython-311.pyc +0 -0
  134. clarifai/runners/models/__pycache__/openai_class.cpython-311.pyc +0 -0
  135. clarifai/runners/models/base_typed_model.py +238 -0
  136. clarifai/runners/models/model_upload.py +607 -0
  137. clarifai/runners/server.py +1 -0
  138. clarifai/runners/utils/__pycache__/__init__.cpython-311.pyc +0 -0
  139. clarifai/runners/utils/__pycache__/__init__.cpython-39.pyc +0 -0
  140. clarifai/runners/utils/__pycache__/code_script.cpython-311.pyc +0 -0
  141. clarifai/runners/utils/__pycache__/code_script.cpython-39.pyc +0 -0
  142. clarifai/runners/utils/__pycache__/const.cpython-311.pyc +0 -0
  143. clarifai/runners/utils/__pycache__/data_utils.cpython-311.pyc +0 -0
  144. clarifai/runners/utils/__pycache__/data_utils.cpython-39.pyc +0 -0
  145. clarifai/runners/utils/__pycache__/loader.cpython-311.pyc +0 -0
  146. clarifai/runners/utils/__pycache__/method_signatures.cpython-311.pyc +0 -0
  147. clarifai/runners/utils/__pycache__/model_utils.cpython-311.pyc +0 -0
  148. clarifai/runners/utils/__pycache__/openai_convertor.cpython-311.pyc +0 -0
  149. clarifai/runners/utils/__pycache__/serializers.cpython-311.pyc +0 -0
  150. clarifai/runners/utils/__pycache__/url_fetcher.cpython-311.pyc +0 -0
  151. clarifai/runners/utils/code_script.py +52 -46
  152. clarifai/runners/utils/data_handler.py +231 -0
  153. clarifai/runners/utils/data_types/__pycache__/__init__.cpython-311.pyc +0 -0
  154. clarifai/runners/utils/data_types/__pycache__/__init__.cpython-39.pyc +0 -0
  155. clarifai/runners/utils/data_types/__pycache__/data_types.cpython-311.pyc +0 -0
  156. clarifai/runners/utils/data_types/__pycache__/data_types.cpython-39.pyc +0 -0
  157. clarifai/runners/utils/data_types.py +471 -0
  158. clarifai/runners/utils/temp.py +59 -0
  159. clarifai/schema/__pycache__/search.cpython-311.pyc +0 -0
  160. clarifai/urls/__pycache__/helper.cpython-311.pyc +0 -0
  161. clarifai/urls/__pycache__/helper.cpython-39.pyc +0 -0
  162. clarifai/utils/__pycache__/__init__.cpython-311.pyc +0 -0
  163. clarifai/utils/__pycache__/__init__.cpython-39.pyc +0 -0
  164. clarifai/utils/__pycache__/cli.cpython-39.pyc +0 -0
  165. clarifai/utils/__pycache__/config.cpython-311.pyc +0 -0
  166. clarifai/utils/__pycache__/config.cpython-39.pyc +0 -0
  167. clarifai/utils/__pycache__/constants.cpython-311.pyc +0 -0
  168. clarifai/utils/__pycache__/constants.cpython-39.pyc +0 -0
  169. clarifai/utils/__pycache__/logging.cpython-311.pyc +0 -0
  170. clarifai/utils/__pycache__/logging.cpython-39.pyc +0 -0
  171. clarifai/utils/__pycache__/misc.cpython-311.pyc +0 -0
  172. clarifai/utils/__pycache__/misc.cpython-39.pyc +0 -0
  173. clarifai/utils/__pycache__/model_train.cpython-311.pyc +0 -0
  174. clarifai/utils/__pycache__/protobuf.cpython-311.pyc +0 -0
  175. clarifai/utils/__pycache__/protobuf.cpython-39.pyc +0 -0
  176. clarifai/utils/cli.py +14 -15
  177. clarifai/utils/constants.py +2 -0
  178. clarifai/utils/misc.py +382 -1
  179. clarifai/workflows/__pycache__/__init__.cpython-311.pyc +0 -0
  180. clarifai/workflows/__pycache__/export.cpython-311.pyc +0 -0
  181. clarifai/workflows/__pycache__/utils.cpython-311.pyc +0 -0
  182. clarifai/workflows/__pycache__/validate.cpython-311.pyc +0 -0
  183. {clarifai-11.6.4.dist-info → clarifai-11.6.4rc2.dist-info}/METADATA +1 -1
  184. clarifai-11.6.4rc2.dist-info/RECORD +301 -0
  185. {clarifai-11.6.4.dist-info → clarifai-11.6.4rc2.dist-info}/WHEEL +1 -1
  186. clarifai-11.6.4.dist-info/RECORD +0 -127
  187. {clarifai-11.6.4.dist-info → clarifai-11.6.4rc2.dist-info}/entry_points.txt +0 -0
  188. {clarifai-11.6.4.dist-info → clarifai-11.6.4rc2.dist-info}/licenses/LICENSE +0 -0
  189. {clarifai-11.6.4.dist-info → clarifai-11.6.4rc2.dist-info}/top_level.txt +0 -0
clarifai/utils/misc.py CHANGED
@@ -2,11 +2,17 @@ import os
2
2
  import re
3
3
  import shutil
4
4
  import subprocess
5
+ import sys
6
+ import time
5
7
  import urllib.parse
6
8
  import uuid
7
- from typing import Any, Dict, List
9
+ from typing import Any, Dict, List, Tuple
10
+ from urllib.parse import urlparse
8
11
 
12
+ import requests
9
13
  from clarifai_grpc.grpc.api.status import status_code_pb2
14
+ from requests.adapters import HTTPAdapter
15
+ from urllib3.util.retry import Retry
10
16
 
11
17
  from clarifai.errors import UserError
12
18
  from clarifai.utils.constants import HOME_PATH
@@ -166,3 +172,378 @@ def clone_github_repo(repo_url, target_dir, github_pat=None, branch=None):
166
172
  except subprocess.CalledProcessError as e:
167
173
  logger.error(f"Failed to clone repository: {e.stderr}")
168
174
  return False
175
+
176
+
177
+ class GitHubDownloader:
178
+ def __init__(
179
+ self, max_retries: int = 3, backoff_factor: float = 0.3, github_token: str = None
180
+ ):
181
+ self.session = requests.Session()
182
+ self.github_token = github_token
183
+
184
+ retry_strategy = Retry(
185
+ total=max_retries,
186
+ backoff_factor=backoff_factor,
187
+ status_forcelist=[429, 500, 502, 503, 504],
188
+ allowed_methods=["HEAD", "GET", "OPTIONS"],
189
+ )
190
+
191
+ adapter = HTTPAdapter(max_retries=retry_strategy)
192
+ self.session.mount("http://", adapter)
193
+ self.session.mount("https://", adapter)
194
+
195
+ self.session.headers.update({'User-Agent': 'GitHub-Folder-Downloader/1.0'})
196
+
197
+ if self.github_token:
198
+ self.session.headers.update({'Authorization': f'token {self.github_token}'})
199
+
200
+ def expected_folder_structure(self) -> List[Dict[str, Any]]:
201
+ return [
202
+ {"name": "1", "type": "dir", "children": [{"name": "model.py", "type": "file"}]},
203
+ {"name": "config.yaml", "type": "file"},
204
+ {"name": "requirements.txt", "type": "file"},
205
+ ]
206
+
207
+ def _format_expected_structure(self):
208
+ """Format the expected structure as a nice tree view."""
209
+ tree_str = ""
210
+ tree_str += "Expected folder structure:\n"
211
+ tree_str += "├── 1/\n"
212
+ tree_str += "│ └── model.py\n"
213
+ tree_str += "├── requirements.txt\n"
214
+ tree_str += "└── config.yaml\n"
215
+ return tree_str
216
+
217
+ def parse_github_url(self, url: str) -> Tuple[str, str, str, str]:
218
+ try:
219
+ parsed = urlparse(url)
220
+
221
+ if parsed.netloc not in ['github.com', 'www.github.com']:
222
+ raise ValueError("URL must be a GitHub repository URL")
223
+
224
+ path_parts = [p for p in parsed.path.strip('/').split('/') if p]
225
+
226
+ if len(path_parts) < 2:
227
+ raise ValueError("Invalid GitHub repository URL format")
228
+
229
+ owner = path_parts[0]
230
+ repo = path_parts[1]
231
+
232
+ if len(path_parts) >= 4 and path_parts[2] in ['tree', 'blob']:
233
+ branch = path_parts[3]
234
+ folder_path = '/'.join(path_parts[4:]) if len(path_parts) > 4 else ''
235
+ elif len(path_parts) >= 3:
236
+ branch = path_parts[2]
237
+ folder_path = '/'.join(path_parts[3:]) if len(path_parts) > 3 else ''
238
+ else:
239
+ branch = 'main'
240
+ folder_path = ''
241
+
242
+ return owner, repo, branch, folder_path
243
+
244
+ except Exception as e:
245
+ logger.error(f"Failed to parse GitHub URL: {e}")
246
+ sys.exit(1)
247
+
248
+ def get_folder_contents(self, owner: str, repo: str, path: str, branch: str = 'main') -> list:
249
+ api_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}"
250
+ params = {'ref': branch} if branch else {}
251
+
252
+ try:
253
+ response = self.session.get(api_url, params=params, timeout=30)
254
+ response.raise_for_status()
255
+ return response.json()
256
+ except requests.exceptions.Timeout:
257
+ raise requests.RequestException("Request timed out. Please try again.")
258
+ except requests.exceptions.ConnectionError:
259
+ raise requests.RequestException(
260
+ "Connection error. Please check your internet connection."
261
+ )
262
+ except requests.exceptions.HTTPError as e:
263
+ if e.response.status_code == 404:
264
+ token_msg = (
265
+ ""
266
+ if self.github_token
267
+ else " For private repositories, use the github_token parameter."
268
+ )
269
+ raise requests.RequestException(
270
+ f"Folder not found: {path}. Check if path exists or if the repository is private.{token_msg}"
271
+ )
272
+ elif e.response.status_code == 401 or e.response.status_code == 403:
273
+ token_msg = (
274
+ " The provided GitHub token may be invalid or have insufficient permissions."
275
+ if self.github_token
276
+ else " For private repositories, use the github_token parameter."
277
+ )
278
+ raise requests.RequestException(f"Authentication error: {e}.{token_msg}")
279
+ else:
280
+ raise requests.RequestException(f"API request failed: {e}")
281
+ except requests.exceptions.RequestException as e:
282
+ token_msg = (
283
+ ""
284
+ if self.github_token
285
+ else " For private repositories, use the github_token parameter."
286
+ )
287
+ raise requests.RequestException(f"API request failed: {e}.{token_msg}")
288
+
289
+ def validate_remote_structure(
290
+ self,
291
+ owner: str,
292
+ repo: str,
293
+ path: str,
294
+ branch: str,
295
+ expected_structure: List[Dict[str, Any]],
296
+ ) -> Dict[str, Any]:
297
+ validation_result = {
298
+ 'valid': True,
299
+ 'missing_files': [],
300
+ 'missing_dirs': [],
301
+ 'warnings': [],
302
+ 'remote_contents': [],
303
+ }
304
+
305
+ try:
306
+ remote_contents = self.get_folder_contents(owner, repo, path, branch)
307
+ validation_result['remote_contents'] = remote_contents
308
+
309
+ remote_items = {item['name']: item['type'] for item in remote_contents}
310
+
311
+ for item in expected_structure:
312
+ item_name = item['name']
313
+ item_type = item.get('type', 'file')
314
+
315
+ if item_name not in remote_items:
316
+ if item_type == 'file':
317
+ validation_result['missing_files'].append(item_name)
318
+ else:
319
+ validation_result['missing_dirs'].append(item_name)
320
+ validation_result['valid'] = False
321
+ elif remote_items[item_name] != item_type:
322
+ validation_result['warnings'].append(
323
+ f"Item '{item_name}' exists but is a {remote_items[item_name]} instead of {item_type}"
324
+ )
325
+ validation_result['valid'] = False
326
+
327
+ expected_names = {item['name'] for item in expected_structure}
328
+ unexpected_items = [name for name in remote_items.keys() if name not in expected_names]
329
+ if unexpected_items:
330
+ validation_result['warnings'].append(
331
+ f"Unexpected items found: {', '.join(unexpected_items)}"
332
+ )
333
+
334
+ except requests.RequestException as e:
335
+ validation_result['valid'] = False
336
+ validation_result['warnings'].append(f"Failed to access remote repository: {e}")
337
+
338
+ return validation_result
339
+
340
+ def download_file(self, download_url: str, local_path: str) -> None:
341
+ try:
342
+ response = self.session.get(download_url, stream=True, timeout=60)
343
+ response.raise_for_status()
344
+
345
+ os.makedirs(os.path.dirname(local_path), exist_ok=True)
346
+
347
+ total_size = int(response.headers.get('content-length', 0))
348
+ downloaded_size = 0
349
+
350
+ with open(local_path, 'wb') as f:
351
+ for chunk in response.iter_content(chunk_size=8192):
352
+ if chunk:
353
+ f.write(chunk)
354
+ downloaded_size += len(chunk)
355
+
356
+ if total_size > 0 and total_size > 1024 * 1024:
357
+ progress = (downloaded_size / total_size) * 100
358
+ logger.info(
359
+ f"\rDownloading: {os.path.basename(local_path)} - {progress:.1f}%",
360
+ end='',
361
+ flush=True,
362
+ )
363
+
364
+ if total_size > 1024 * 1024:
365
+ logger.info()
366
+
367
+ logger.info(f"Downloaded: {local_path}")
368
+
369
+ except requests.exceptions.Timeout:
370
+ logger.info(f"Timeout downloading {local_path}. Skipping...")
371
+ except requests.exceptions.ConnectionError:
372
+ logger.info(f"Connection error downloading {local_path}. Skipping...")
373
+ except Exception as e:
374
+ logger.info(f"Failed to download {local_path}: {e}")
375
+
376
+ def process_folder(
377
+ self, owner: str, repo: str, path: str, local_base_path: str, branch: str = 'main'
378
+ ) -> None:
379
+ try:
380
+ contents = self.get_folder_contents(owner, repo, path, branch)
381
+
382
+ if not contents:
383
+ logger.info(f"Info: Empty folder - {path}")
384
+ return
385
+
386
+ for item in contents:
387
+ item_name = item['name']
388
+ item_path = os.path.join(local_base_path, item_name)
389
+
390
+ if item['type'] == 'file':
391
+ self.download_file(item['download_url'], item_path)
392
+
393
+ elif item['type'] == 'dir':
394
+ os.makedirs(item_path, exist_ok=True)
395
+ logger.info(f"Created directory: {item_path}")
396
+
397
+ new_path = f"{path}/{item_name}" if path else item_name
398
+ self.process_folder(owner, repo, new_path, item_path, branch)
399
+
400
+ except requests.exceptions.RequestException as e:
401
+ if "Folder not found" in str(e):
402
+ logger.error(f"Error: Folder not found - {path}")
403
+ raise
404
+ else:
405
+ logger.error(f"Error accessing folder {path}: {e}")
406
+ raise
407
+ except Exception as e:
408
+ logger.error(f"Unexpected error processing folder {path}: {e}")
409
+ raise
410
+
411
+ def validate_folder_structure(
412
+ self, folder_path: str, expected_structure: List[Dict[str, Any]]
413
+ ) -> Dict[str, Any]:
414
+ validation_result = {
415
+ 'valid': True,
416
+ 'missing_files': [],
417
+ 'missing_dirs': [],
418
+ 'warnings': [],
419
+ }
420
+
421
+ if not os.path.exists(folder_path):
422
+ validation_result['valid'] = False
423
+ validation_result['warnings'].append(f"Folder {folder_path} does not exist")
424
+ return validation_result
425
+
426
+ for item in expected_structure:
427
+ item_name = item['name']
428
+ item_type = item.get('type', 'file')
429
+ item_path = os.path.join(folder_path, item_name)
430
+
431
+ if item_type == 'file':
432
+ if not os.path.isfile(item_path):
433
+ validation_result['missing_files'].append(item_name)
434
+ validation_result['valid'] = False
435
+ elif item_type == 'dir':
436
+ if not os.path.isdir(item_path):
437
+ validation_result['missing_dirs'].append(item_name)
438
+ validation_result['valid'] = False
439
+
440
+ return validation_result
441
+
442
+ def download_github_folder(
443
+ self,
444
+ url: str,
445
+ output_dir: str,
446
+ github_token: str = None,
447
+ validate_structure: bool = False,
448
+ pre_validate: bool = True,
449
+ strict_validation: bool = False,
450
+ ) -> None:
451
+ logger.info(f"Parsing GitHub URL: {url}")
452
+
453
+ # Update token if provided as a parameter
454
+ if github_token:
455
+ self.github_token = github_token
456
+ self.session.headers.update({'Authorization': f'token {github_token}'})
457
+
458
+ try:
459
+ owner, repo, branch, folder_path = self.parse_github_url(url)
460
+ logger.info(f"Repository: {owner}/{repo}")
461
+ logger.info(f"Branch: {branch}")
462
+ logger.info(f"Folder: {folder_path or 'root'}")
463
+
464
+ expected_structure = self.expected_folder_structure() if pre_validate else None
465
+
466
+ if expected_structure:
467
+ logger.info("\nValidating remote folder structure...")
468
+ remote_validation = self.validate_remote_structure(
469
+ owner, repo, folder_path, branch, expected_structure
470
+ )
471
+
472
+ if not remote_validation['valid']:
473
+ logger.error("Remote structure validation failed!")
474
+
475
+ if remote_validation['missing_files']:
476
+ logger.error(
477
+ f"Missing files: {', '.join(remote_validation['missing_files'])}"
478
+ )
479
+
480
+ if remote_validation['missing_dirs']:
481
+ logger.error(
482
+ f"Missing directories: {', '.join(remote_validation['missing_dirs'])}"
483
+ )
484
+
485
+ if remote_validation['warnings']:
486
+ for warning in remote_validation['warnings']:
487
+ logger.error(f"Warning: {warning}")
488
+
489
+ # Print the expected structure in a nice format
490
+ tree_view = self._format_expected_structure()
491
+ logger.info("\nThe repository must have the following structure:")
492
+ logger.info(tree_view)
493
+
494
+ logger.error(
495
+ "Download cancelled: Repository structure does not match the expected format."
496
+ )
497
+ sys.exit(1) # Exit without proceeding with download
498
+ else:
499
+ logger.info("Remote structure validation passed!")
500
+
501
+ os.makedirs(output_dir, exist_ok=True)
502
+ logger.info(f"Created output directory: {output_dir}")
503
+
504
+ logger.info("\nStarting download...")
505
+ start_time = time.time()
506
+ try:
507
+ self.process_folder(owner, repo, folder_path, output_dir, branch)
508
+
509
+ elapsed_time = time.time() - start_time
510
+ logger.info(f"\nDownload completed in {elapsed_time:.2f} seconds")
511
+ logger.info(f"Files saved to: {os.path.abspath(output_dir)}")
512
+
513
+ if validate_structure and expected_structure:
514
+ logger.info("\nValidating downloaded folder structure...")
515
+ validation_result = self.validate_folder_structure(
516
+ output_dir, expected_structure
517
+ )
518
+
519
+ if validation_result['valid']:
520
+ logger.info("Folder structure post validation passed!")
521
+ else:
522
+ logger.error("Folder structure validation failed!")
523
+
524
+ if validation_result['missing_files']:
525
+ logger.info(
526
+ f"Missing files: {', '.join(validation_result['missing_files'])}"
527
+ )
528
+
529
+ if validation_result['missing_dirs']:
530
+ logger.info(
531
+ f"Missing directories: {', '.join(validation_result['missing_dirs'])}"
532
+ )
533
+
534
+ if validation_result['warnings']:
535
+ for warning in validation_result['warnings']:
536
+ logger.info(f"Warng: {warning}")
537
+ except requests.RequestException as e:
538
+ # Critical error - the main folder cannot be processed
539
+ logger.error(
540
+ f"\nDownload failed: {e}, \n No files were downloaded to: {os.path.abspath(output_dir)}"
541
+ )
542
+ sys.exit(1)
543
+
544
+ except ValueError as e:
545
+ logger.error(f"Error: {e}")
546
+ sys.exit(1)
547
+ except Exception as e:
548
+ logger.error(f"Unexpected error: {e}")
549
+ sys.exit(1)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: clarifai
3
- Version: 11.6.4
3
+ Version: 11.6.4rc2
4
4
  Home-page: https://github.com/Clarifai/clarifai-python
5
5
  Author: Clarifai
6
6
  Author-email: support@clarifai.com