clarifai 11.6.4__py3-none-any.whl → 11.6.4rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. clarifai/__init__.py +1 -1
  2. clarifai/__pycache__/__init__.cpython-311.pyc +0 -0
  3. clarifai/__pycache__/__init__.cpython-39.pyc +0 -0
  4. clarifai/__pycache__/errors.cpython-311.pyc +0 -0
  5. clarifai/__pycache__/errors.cpython-39.pyc +0 -0
  6. clarifai/__pycache__/versions.cpython-311.pyc +0 -0
  7. clarifai/__pycache__/versions.cpython-39.pyc +0 -0
  8. clarifai/cli/__pycache__/__init__.cpython-39.pyc +0 -0
  9. clarifai/cli/__pycache__/base.cpython-39.pyc +0 -0
  10. clarifai/cli/__pycache__/compute_cluster.cpython-39.pyc +0 -0
  11. clarifai/cli/__pycache__/deployment.cpython-39.pyc +0 -0
  12. clarifai/cli/__pycache__/model.cpython-39.pyc +0 -0
  13. clarifai/cli/__pycache__/nodepool.cpython-39.pyc +0 -0
  14. clarifai/cli/base.py +11 -27
  15. clarifai/cli/model.py +171 -41
  16. clarifai/cli/model_templates.py +243 -0
  17. clarifai/cli/pipeline_step_templates.py +64 -0
  18. clarifai/client/__pycache__/__init__.cpython-311.pyc +0 -0
  19. clarifai/client/__pycache__/__init__.cpython-39.pyc +0 -0
  20. clarifai/client/__pycache__/app.cpython-311.pyc +0 -0
  21. clarifai/client/__pycache__/app.cpython-39.pyc +0 -0
  22. clarifai/client/__pycache__/base.cpython-311.pyc +0 -0
  23. clarifai/client/__pycache__/base.cpython-39.pyc +0 -0
  24. clarifai/client/__pycache__/compute_cluster.cpython-311.pyc +0 -0
  25. clarifai/client/__pycache__/dataset.cpython-311.pyc +0 -0
  26. clarifai/client/__pycache__/dataset.cpython-39.pyc +0 -0
  27. clarifai/client/__pycache__/deployment.cpython-311.pyc +0 -0
  28. clarifai/client/__pycache__/deployment.cpython-39.pyc +0 -0
  29. clarifai/client/__pycache__/input.cpython-311.pyc +0 -0
  30. clarifai/client/__pycache__/input.cpython-39.pyc +0 -0
  31. clarifai/client/__pycache__/lister.cpython-311.pyc +0 -0
  32. clarifai/client/__pycache__/lister.cpython-39.pyc +0 -0
  33. clarifai/client/__pycache__/model.cpython-311.pyc +0 -0
  34. clarifai/client/__pycache__/model.cpython-39.pyc +0 -0
  35. clarifai/client/__pycache__/model_client.cpython-311.pyc +0 -0
  36. clarifai/client/__pycache__/model_client.cpython-39.pyc +0 -0
  37. clarifai/client/__pycache__/module.cpython-311.pyc +0 -0
  38. clarifai/client/__pycache__/nodepool.cpython-311.pyc +0 -0
  39. clarifai/client/__pycache__/runner.cpython-311.pyc +0 -0
  40. clarifai/client/__pycache__/search.cpython-311.pyc +0 -0
  41. clarifai/client/__pycache__/user.cpython-311.pyc +0 -0
  42. clarifai/client/__pycache__/workflow.cpython-311.pyc +0 -0
  43. clarifai/client/auth/__pycache__/__init__.cpython-311.pyc +0 -0
  44. clarifai/client/auth/__pycache__/__init__.cpython-39.pyc +0 -0
  45. clarifai/client/auth/__pycache__/helper.cpython-311.pyc +0 -0
  46. clarifai/client/auth/__pycache__/helper.cpython-39.pyc +0 -0
  47. clarifai/client/auth/__pycache__/register.cpython-311.pyc +0 -0
  48. clarifai/client/auth/__pycache__/register.cpython-39.pyc +0 -0
  49. clarifai/client/auth/__pycache__/stub.cpython-311.pyc +0 -0
  50. clarifai/client/auth/__pycache__/stub.cpython-39.pyc +0 -0
  51. clarifai/constants/__pycache__/base.cpython-311.pyc +0 -0
  52. clarifai/constants/__pycache__/base.cpython-39.pyc +0 -0
  53. clarifai/constants/__pycache__/dataset.cpython-311.pyc +0 -0
  54. clarifai/constants/__pycache__/dataset.cpython-39.pyc +0 -0
  55. clarifai/constants/__pycache__/input.cpython-311.pyc +0 -0
  56. clarifai/constants/__pycache__/input.cpython-39.pyc +0 -0
  57. clarifai/constants/__pycache__/model.cpython-311.pyc +0 -0
  58. clarifai/constants/__pycache__/model.cpython-39.pyc +0 -0
  59. clarifai/constants/__pycache__/search.cpython-311.pyc +0 -0
  60. clarifai/constants/__pycache__/workflow.cpython-311.pyc +0 -0
  61. clarifai/datasets/__pycache__/__init__.cpython-311.pyc +0 -0
  62. clarifai/datasets/__pycache__/__init__.cpython-39.pyc +0 -0
  63. clarifai/datasets/export/__pycache__/__init__.cpython-311.pyc +0 -0
  64. clarifai/datasets/export/__pycache__/__init__.cpython-39.pyc +0 -0
  65. clarifai/datasets/export/__pycache__/inputs_annotations.cpython-311.pyc +0 -0
  66. clarifai/datasets/export/__pycache__/inputs_annotations.cpython-39.pyc +0 -0
  67. clarifai/datasets/upload/__pycache__/__init__.cpython-311.pyc +0 -0
  68. clarifai/datasets/upload/__pycache__/__init__.cpython-39.pyc +0 -0
  69. clarifai/datasets/upload/__pycache__/base.cpython-311.pyc +0 -0
  70. clarifai/datasets/upload/__pycache__/base.cpython-39.pyc +0 -0
  71. clarifai/datasets/upload/__pycache__/features.cpython-311.pyc +0 -0
  72. clarifai/datasets/upload/__pycache__/features.cpython-39.pyc +0 -0
  73. clarifai/datasets/upload/__pycache__/image.cpython-311.pyc +0 -0
  74. clarifai/datasets/upload/__pycache__/image.cpython-39.pyc +0 -0
  75. clarifai/datasets/upload/__pycache__/multimodal.cpython-311.pyc +0 -0
  76. clarifai/datasets/upload/__pycache__/multimodal.cpython-39.pyc +0 -0
  77. clarifai/datasets/upload/__pycache__/text.cpython-311.pyc +0 -0
  78. clarifai/datasets/upload/__pycache__/text.cpython-39.pyc +0 -0
  79. clarifai/datasets/upload/__pycache__/utils.cpython-311.pyc +0 -0
  80. clarifai/datasets/upload/__pycache__/utils.cpython-39.pyc +0 -0
  81. clarifai/models/model_serving/README.md +158 -0
  82. clarifai/models/model_serving/__init__.py +14 -0
  83. clarifai/models/model_serving/cli/__init__.py +12 -0
  84. clarifai/models/model_serving/cli/_utils.py +53 -0
  85. clarifai/models/model_serving/cli/base.py +14 -0
  86. clarifai/models/model_serving/cli/build.py +79 -0
  87. clarifai/models/model_serving/cli/clarifai_clis.py +33 -0
  88. clarifai/models/model_serving/cli/create.py +171 -0
  89. clarifai/models/model_serving/cli/example_cli.py +34 -0
  90. clarifai/models/model_serving/cli/login.py +26 -0
  91. clarifai/models/model_serving/cli/upload.py +179 -0
  92. clarifai/models/model_serving/constants.py +21 -0
  93. clarifai/models/model_serving/docs/cli.md +161 -0
  94. clarifai/models/model_serving/docs/concepts.md +229 -0
  95. clarifai/models/model_serving/docs/dependencies.md +11 -0
  96. clarifai/models/model_serving/docs/inference_parameters.md +139 -0
  97. clarifai/models/model_serving/docs/model_types.md +19 -0
  98. clarifai/models/model_serving/model_config/__init__.py +16 -0
  99. clarifai/models/model_serving/model_config/base.py +369 -0
  100. clarifai/models/model_serving/model_config/config.py +312 -0
  101. clarifai/models/model_serving/model_config/inference_parameter.py +129 -0
  102. clarifai/models/model_serving/model_config/model_types_config/multimodal-embedder.yaml +25 -0
  103. clarifai/models/model_serving/model_config/model_types_config/text-classifier.yaml +19 -0
  104. clarifai/models/model_serving/model_config/model_types_config/text-embedder.yaml +20 -0
  105. clarifai/models/model_serving/model_config/model_types_config/text-to-image.yaml +19 -0
  106. clarifai/models/model_serving/model_config/model_types_config/text-to-text.yaml +19 -0
  107. clarifai/models/model_serving/model_config/model_types_config/visual-classifier.yaml +22 -0
  108. clarifai/models/model_serving/model_config/model_types_config/visual-detector.yaml +32 -0
  109. clarifai/models/model_serving/model_config/model_types_config/visual-embedder.yaml +19 -0
  110. clarifai/models/model_serving/model_config/model_types_config/visual-segmenter.yaml +19 -0
  111. clarifai/models/model_serving/model_config/output.py +133 -0
  112. clarifai/models/model_serving/model_config/triton/__init__.py +14 -0
  113. clarifai/models/model_serving/model_config/triton/serializer.py +136 -0
  114. clarifai/models/model_serving/model_config/triton/triton_config.py +182 -0
  115. clarifai/models/model_serving/model_config/triton/wrappers.py +281 -0
  116. clarifai/models/model_serving/repo_build/__init__.py +14 -0
  117. clarifai/models/model_serving/repo_build/build.py +198 -0
  118. clarifai/models/model_serving/repo_build/static_files/_requirements.txt +2 -0
  119. clarifai/models/model_serving/repo_build/static_files/base_test.py +169 -0
  120. clarifai/models/model_serving/repo_build/static_files/inference.py +26 -0
  121. clarifai/models/model_serving/repo_build/static_files/sample_clarifai_config.yaml +25 -0
  122. clarifai/models/model_serving/repo_build/static_files/test.py +40 -0
  123. clarifai/models/model_serving/repo_build/static_files/triton/model.py +75 -0
  124. clarifai/models/model_serving/utils.py +23 -0
  125. clarifai/runners/__pycache__/__init__.cpython-311.pyc +0 -0
  126. clarifai/runners/__pycache__/__init__.cpython-39.pyc +0 -0
  127. clarifai/runners/models/__pycache__/__init__.cpython-311.pyc +0 -0
  128. clarifai/runners/models/__pycache__/__init__.cpython-39.pyc +0 -0
  129. clarifai/runners/models/__pycache__/mcp_class.cpython-311.pyc +0 -0
  130. clarifai/runners/models/__pycache__/model_builder.cpython-311.pyc +0 -0
  131. clarifai/runners/models/__pycache__/model_builder.cpython-39.pyc +0 -0
  132. clarifai/runners/models/__pycache__/model_class.cpython-311.pyc +0 -0
  133. clarifai/runners/models/__pycache__/model_runner.cpython-311.pyc +0 -0
  134. clarifai/runners/models/__pycache__/openai_class.cpython-311.pyc +0 -0
  135. clarifai/runners/models/base_typed_model.py +238 -0
  136. clarifai/runners/models/model_upload.py +607 -0
  137. clarifai/runners/utils/__pycache__/__init__.cpython-311.pyc +0 -0
  138. clarifai/runners/utils/__pycache__/__init__.cpython-39.pyc +0 -0
  139. clarifai/runners/utils/__pycache__/code_script.cpython-311.pyc +0 -0
  140. clarifai/runners/utils/__pycache__/code_script.cpython-39.pyc +0 -0
  141. clarifai/runners/utils/__pycache__/const.cpython-311.pyc +0 -0
  142. clarifai/runners/utils/__pycache__/data_utils.cpython-311.pyc +0 -0
  143. clarifai/runners/utils/__pycache__/data_utils.cpython-39.pyc +0 -0
  144. clarifai/runners/utils/__pycache__/loader.cpython-311.pyc +0 -0
  145. clarifai/runners/utils/__pycache__/method_signatures.cpython-311.pyc +0 -0
  146. clarifai/runners/utils/__pycache__/model_utils.cpython-311.pyc +0 -0
  147. clarifai/runners/utils/__pycache__/openai_convertor.cpython-311.pyc +0 -0
  148. clarifai/runners/utils/__pycache__/serializers.cpython-311.pyc +0 -0
  149. clarifai/runners/utils/__pycache__/url_fetcher.cpython-311.pyc +0 -0
  150. clarifai/runners/utils/data_handler.py +231 -0
  151. clarifai/runners/utils/data_types/__pycache__/__init__.cpython-311.pyc +0 -0
  152. clarifai/runners/utils/data_types/__pycache__/__init__.cpython-39.pyc +0 -0
  153. clarifai/runners/utils/data_types/__pycache__/data_types.cpython-311.pyc +0 -0
  154. clarifai/runners/utils/data_types/__pycache__/data_types.cpython-39.pyc +0 -0
  155. clarifai/runners/utils/data_types.py +471 -0
  156. clarifai/runners/utils/temp.py +59 -0
  157. clarifai/schema/__pycache__/search.cpython-311.pyc +0 -0
  158. clarifai/urls/__pycache__/helper.cpython-311.pyc +0 -0
  159. clarifai/urls/__pycache__/helper.cpython-39.pyc +0 -0
  160. clarifai/utils/__pycache__/__init__.cpython-311.pyc +0 -0
  161. clarifai/utils/__pycache__/__init__.cpython-39.pyc +0 -0
  162. clarifai/utils/__pycache__/cli.cpython-39.pyc +0 -0
  163. clarifai/utils/__pycache__/config.cpython-311.pyc +0 -0
  164. clarifai/utils/__pycache__/config.cpython-39.pyc +0 -0
  165. clarifai/utils/__pycache__/constants.cpython-311.pyc +0 -0
  166. clarifai/utils/__pycache__/constants.cpython-39.pyc +0 -0
  167. clarifai/utils/__pycache__/logging.cpython-311.pyc +0 -0
  168. clarifai/utils/__pycache__/logging.cpython-39.pyc +0 -0
  169. clarifai/utils/__pycache__/misc.cpython-311.pyc +0 -0
  170. clarifai/utils/__pycache__/misc.cpython-39.pyc +0 -0
  171. clarifai/utils/__pycache__/model_train.cpython-311.pyc +0 -0
  172. clarifai/utils/__pycache__/protobuf.cpython-311.pyc +0 -0
  173. clarifai/utils/__pycache__/protobuf.cpython-39.pyc +0 -0
  174. clarifai/utils/cli.py +14 -15
  175. clarifai/utils/constants.py +2 -0
  176. clarifai/utils/misc.py +381 -1
  177. clarifai/workflows/__pycache__/__init__.cpython-311.pyc +0 -0
  178. clarifai/workflows/__pycache__/export.cpython-311.pyc +0 -0
  179. clarifai/workflows/__pycache__/utils.cpython-311.pyc +0 -0
  180. clarifai/workflows/__pycache__/validate.cpython-311.pyc +0 -0
  181. {clarifai-11.6.4.dist-info → clarifai-11.6.4rc1.dist-info}/METADATA +1 -1
  182. clarifai-11.6.4rc1.dist-info/RECORD +301 -0
  183. {clarifai-11.6.4.dist-info → clarifai-11.6.4rc1.dist-info}/WHEEL +1 -1
  184. clarifai-11.6.4.dist-info/RECORD +0 -127
  185. {clarifai-11.6.4.dist-info → clarifai-11.6.4rc1.dist-info}/entry_points.txt +0 -0
  186. {clarifai-11.6.4.dist-info → clarifai-11.6.4rc1.dist-info}/licenses/LICENSE +0 -0
  187. {clarifai-11.6.4.dist-info → clarifai-11.6.4rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,59 @@
1
+ import json
2
+ from clarifai.runners.utils import data_utils
3
+ from clarifai.runners.models.model_class import ModelClass
4
+
5
+ import inspect
6
+
7
+ def log_stack():
8
+ for frame_info in inspect.stack():
9
+ print(f"{frame_info.filename}:{frame_info.lineno} in {frame_info.function}")
10
+
11
+
12
+ default = 27
13
+
14
+ x = json.dumps(default)
15
+
16
+ print(x, type(x))
17
+
18
+ x = json.loads(x)
19
+
20
+ print(x, type(x))
21
+
22
+ param = data_utils.Param(default)
23
+
24
+ print(param, type(param))
25
+
26
+ print(param.default)
27
+
28
+ param_json = json.dumps(param)
29
+
30
+ print(param_json, type(param_json))
31
+
32
+ param_json = json.loads(param_json)
33
+
34
+ print(param_json, type(param_json))
35
+
36
+ # param_json = json.loads(param_json)
37
+
38
+ # print(param_json, type(param_json))
39
+
40
+ def test_int(default: int = data_utils.Param(default=27)):
41
+ print(default, type(default))
42
+
43
+ test_int()
44
+
45
+
46
+
47
+
48
+
49
+ class Test(ModelClass):
50
+
51
+ @ModelClass.method
52
+ def test(self, max_tokens: int = data_utils.Param(default=27)) -> int:
53
+ print(max_tokens, type(max_tokens))
54
+ return max_tokens
55
+
56
+ test = Test()
57
+
58
+ test.test()
59
+
clarifai/utils/cli.py CHANGED
@@ -4,12 +4,14 @@ import pkgutil
4
4
  import sys
5
5
  import typing as t
6
6
  from collections import defaultdict
7
- from typing import OrderedDict, Tuple
7
+ from typing import OrderedDict
8
8
 
9
9
  import click
10
10
  import yaml
11
11
  from tabulate import tabulate
12
12
 
13
+ from clarifai.utils.logging import logger
14
+
13
15
 
14
16
  def from_yaml(filename: str):
15
17
  try:
@@ -174,7 +176,7 @@ def validate_context(ctx):
174
176
  sys.exit(1)
175
177
 
176
178
 
177
- def validate_pat_token(pat: str, user_id: str, api_base: str = None) -> Tuple[bool, str]:
179
+ def validate_context_auth(pat: str, user_id: str, api_base: str = None):
178
180
  """
179
181
  Validate a Personal Access Token (PAT) by making a test API call.
180
182
 
@@ -182,17 +184,14 @@ def validate_pat_token(pat: str, user_id: str, api_base: str = None) -> Tuple[bo
182
184
  pat (str): The Personal Access Token to validate
183
185
  user_id (str): The user ID associated with the token
184
186
  api_base (str): The API base URL. Defaults to None (uses default).
185
-
186
- Returns:
187
- tuple[bool, str]: A tuple of (is_valid, error_message)
188
- If valid: (True, "")
189
- If invalid: (False, error_description)
190
187
  """
191
188
  try:
192
189
  from clarifai_grpc.grpc.api.status import status_code_pb2
193
190
 
194
191
  from clarifai.client.user import User
195
192
 
193
+ logger.info("Validating the Context Credentials...")
194
+
196
195
  # Create user client for validation
197
196
  if api_base:
198
197
  user_client = User(user_id=user_id, pat=pat, base_url=api_base)
@@ -203,21 +202,21 @@ def validate_pat_token(pat: str, user_id: str, api_base: str = None) -> Tuple[bo
203
202
  response = user_client.get_user_info()
204
203
 
205
204
  if response.status.code == status_code_pb2.SUCCESS:
206
- return True, ""
207
- else:
208
- return False, f"Authentication failed: {response.status.description}"
205
+ logger.info("✅ Context is valid")
209
206
 
210
207
  except Exception as e:
211
208
  error_msg = str(e)
212
209
 
213
210
  # Check for common authentication errors and provide user-friendly messages
214
211
  if "PERMISSION_DENIED" in error_msg or "Unauthorized" in error_msg:
215
- return False, "Invalid PAT token or insufficient permissions"
212
+ logger.error(f"Invalid PAT token or incorrect user ID '{user_id}': {error_msg}")
216
213
  elif "UNAUTHENTICATED" in error_msg:
217
- return False, "Invalid PAT token"
214
+ logger.error(f"Invalid PAT token or user ID: {error_msg}")
218
215
  elif "SSL" in error_msg or "certificate" in error_msg:
219
- return False, f"SSL/Certificate error: {error_msg}"
216
+ logger.error(f"SSL/Certificate error: {error_msg}")
220
217
  elif "Connection" in error_msg or "timeout" in error_msg:
221
- return False, f"Network connection error: {error_msg}"
218
+ logger.error(f"Network connection error: {error_msg}")
222
219
  else:
223
- return False, f"Validation error: {error_msg}"
220
+ logger.error(f"Validation failed: \n{error_msg}")
221
+ logger.error("Please check your credentials and try again.")
222
+ raise click.Abort() # Exit without saving the configuration
@@ -62,3 +62,5 @@ DEFAULT_LOCAL_RUNNER_NODEPOOL_CONFIG = {
62
62
  "max_instances": 1,
63
63
  }
64
64
  }
65
+ DEFAULT_OLLAMA_MODEL_REPO = "https://github.com/Clarifai/runners-examples"
66
+ DEFAULT_OLLAMA_MODEL_REPO_BRANCH = "ollama"
clarifai/utils/misc.py CHANGED
@@ -2,11 +2,17 @@ import os
2
2
  import re
3
3
  import shutil
4
4
  import subprocess
5
+ import sys
6
+ import time
5
7
  import urllib.parse
6
8
  import uuid
7
- from typing import Any, Dict, List
9
+ from typing import Any, Dict, List, Tuple
10
+ from urllib.parse import urlparse
8
11
 
12
+ import requests
9
13
  from clarifai_grpc.grpc.api.status import status_code_pb2
14
+ from requests.adapters import HTTPAdapter
15
+ from urllib3.util.retry import Retry
10
16
 
11
17
  from clarifai.errors import UserError
12
18
  from clarifai.utils.constants import HOME_PATH
@@ -166,3 +172,377 @@ def clone_github_repo(repo_url, target_dir, github_pat=None, branch=None):
166
172
  except subprocess.CalledProcessError as e:
167
173
  logger.error(f"Failed to clone repository: {e.stderr}")
168
174
  return False
175
+
176
+
177
+ class GitHubDownloader:
178
+ def __init__(
179
+ self, max_retries: int = 3, backoff_factor: float = 0.3, github_token: str = None
180
+ ):
181
+ self.session = requests.Session()
182
+ self.github_token = github_token
183
+
184
+ retry_strategy = Retry(
185
+ total=max_retries,
186
+ backoff_factor=backoff_factor,
187
+ status_forcelist=[429, 500, 502, 503, 504],
188
+ allowed_methods=["HEAD", "GET", "OPTIONS"],
189
+ )
190
+
191
+ adapter = HTTPAdapter(max_retries=retry_strategy)
192
+ self.session.mount("http://", adapter)
193
+ self.session.mount("https://", adapter)
194
+
195
+ self.session.headers.update({'User-Agent': 'GitHub-Folder-Downloader/1.0'})
196
+
197
+ if self.github_token:
198
+ self.session.headers.update({'Authorization': f'token {self.github_token}'})
199
+
200
+ def expected_folder_structure(self) -> List[Dict[str, Any]]:
201
+ return [
202
+ {"name": "1", "type": "dir", "children": [{"name": "model.py", "type": "file"}]},
203
+ {"name": "config.yaml", "type": "file"},
204
+ {"name": "requirements.txt", "type": "file"},
205
+ ]
206
+
207
+ def _format_expected_structure(self):
208
+ """Format the expected structure as a nice tree view."""
209
+ tree_str = ""
210
+ tree_str += "Expected folder structure:\n"
211
+ tree_str += "├── 1/\n"
212
+ tree_str += "│ └── model.py\n"
213
+ tree_str += "├── requirements.txt\n"
214
+ tree_str += "└── config.yaml\n"
215
+ return tree_str
216
+
217
+ def parse_github_url(self, url: str) -> Tuple[str, str, str, str]:
218
+ try:
219
+ parsed = urlparse(url)
220
+
221
+ if parsed.netloc not in ['github.com', 'www.github.com']:
222
+ raise ValueError("URL must be a GitHub repository URL")
223
+
224
+ path_parts = [p for p in parsed.path.strip('/').split('/') if p]
225
+
226
+ if len(path_parts) < 2:
227
+ raise ValueError("Invalid GitHub repository URL format")
228
+
229
+ owner = path_parts[0]
230
+ repo = path_parts[1]
231
+
232
+ if len(path_parts) >= 4 and path_parts[2] in ['tree', 'blob']:
233
+ branch = path_parts[3]
234
+ folder_path = '/'.join(path_parts[4:]) if len(path_parts) > 4 else ''
235
+ elif len(path_parts) >= 3:
236
+ branch = path_parts[2]
237
+ folder_path = '/'.join(path_parts[3:]) if len(path_parts) > 3 else ''
238
+ else:
239
+ branch = 'main'
240
+ folder_path = ''
241
+
242
+ return owner, repo, branch, folder_path
243
+
244
+ except Exception as e:
245
+ raise ValueError(f"Failed to parse GitHub URL: {e}")
246
+
247
+ def get_folder_contents(self, owner: str, repo: str, path: str, branch: str = 'main') -> list:
248
+ api_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}"
249
+ params = {'ref': branch} if branch else {}
250
+
251
+ try:
252
+ response = self.session.get(api_url, params=params, timeout=30)
253
+ response.raise_for_status()
254
+ return response.json()
255
+ except requests.exceptions.Timeout:
256
+ raise requests.RequestException("Request timed out. Please try again.")
257
+ except requests.exceptions.ConnectionError:
258
+ raise requests.RequestException(
259
+ "Connection error. Please check your internet connection."
260
+ )
261
+ except requests.exceptions.HTTPError as e:
262
+ if e.response.status_code == 404:
263
+ token_msg = (
264
+ ""
265
+ if self.github_token
266
+ else " For private repositories, use the github_token parameter."
267
+ )
268
+ raise requests.RequestException(
269
+ f"Folder not found: {path}. Check if path exists or if the repository is private.{token_msg}"
270
+ )
271
+ elif e.response.status_code == 401 or e.response.status_code == 403:
272
+ token_msg = (
273
+ " The provided GitHub token may be invalid or have insufficient permissions."
274
+ if self.github_token
275
+ else " For private repositories, use the github_token parameter."
276
+ )
277
+ raise requests.RequestException(f"Authentication error: {e}.{token_msg}")
278
+ else:
279
+ raise requests.RequestException(f"API request failed: {e}")
280
+ except requests.exceptions.RequestException as e:
281
+ token_msg = (
282
+ ""
283
+ if self.github_token
284
+ else " For private repositories, use the github_token parameter."
285
+ )
286
+ raise requests.RequestException(f"API request failed: {e}.{token_msg}")
287
+
288
+ def validate_remote_structure(
289
+ self,
290
+ owner: str,
291
+ repo: str,
292
+ path: str,
293
+ branch: str,
294
+ expected_structure: List[Dict[str, Any]],
295
+ ) -> Dict[str, Any]:
296
+ validation_result = {
297
+ 'valid': True,
298
+ 'missing_files': [],
299
+ 'missing_dirs': [],
300
+ 'warnings': [],
301
+ 'remote_contents': [],
302
+ }
303
+
304
+ try:
305
+ remote_contents = self.get_folder_contents(owner, repo, path, branch)
306
+ validation_result['remote_contents'] = remote_contents
307
+
308
+ remote_items = {item['name']: item['type'] for item in remote_contents}
309
+
310
+ for item in expected_structure:
311
+ item_name = item['name']
312
+ item_type = item.get('type', 'file')
313
+
314
+ if item_name not in remote_items:
315
+ if item_type == 'file':
316
+ validation_result['missing_files'].append(item_name)
317
+ else:
318
+ validation_result['missing_dirs'].append(item_name)
319
+ validation_result['valid'] = False
320
+ elif remote_items[item_name] != item_type:
321
+ validation_result['warnings'].append(
322
+ f"Item '{item_name}' exists but is a {remote_items[item_name]} instead of {item_type}"
323
+ )
324
+ validation_result['valid'] = False
325
+
326
+ expected_names = {item['name'] for item in expected_structure}
327
+ unexpected_items = [name for name in remote_items.keys() if name not in expected_names]
328
+ if unexpected_items:
329
+ validation_result['warnings'].append(
330
+ f"Unexpected items found: {', '.join(unexpected_items)}"
331
+ )
332
+
333
+ except requests.RequestException as e:
334
+ validation_result['valid'] = False
335
+ validation_result['warnings'].append(f"Failed to access remote repository: {e}")
336
+
337
+ return validation_result
338
+
339
+ def download_file(self, download_url: str, local_path: str) -> None:
340
+ try:
341
+ response = self.session.get(download_url, stream=True, timeout=60)
342
+ response.raise_for_status()
343
+
344
+ os.makedirs(os.path.dirname(local_path), exist_ok=True)
345
+
346
+ total_size = int(response.headers.get('content-length', 0))
347
+ downloaded_size = 0
348
+
349
+ with open(local_path, 'wb') as f:
350
+ for chunk in response.iter_content(chunk_size=8192):
351
+ if chunk:
352
+ f.write(chunk)
353
+ downloaded_size += len(chunk)
354
+
355
+ if total_size > 0 and total_size > 1024 * 1024:
356
+ progress = (downloaded_size / total_size) * 100
357
+ logger.info(
358
+ f"\rDownloading: {os.path.basename(local_path)} - {progress:.1f}%",
359
+ end='',
360
+ flush=True,
361
+ )
362
+
363
+ if total_size > 1024 * 1024:
364
+ logger.info()
365
+
366
+ logger.info(f"Downloaded: {local_path}")
367
+
368
+ except requests.exceptions.Timeout:
369
+ logger.info(f"Timeout downloading {local_path}. Skipping...")
370
+ except requests.exceptions.ConnectionError:
371
+ logger.info(f"Connection error downloading {local_path}. Skipping...")
372
+ except Exception as e:
373
+ logger.info(f"Failed to download {local_path}: {e}")
374
+
375
+ def process_folder(
376
+ self, owner: str, repo: str, path: str, local_base_path: str, branch: str = 'main'
377
+ ) -> None:
378
+ try:
379
+ contents = self.get_folder_contents(owner, repo, path, branch)
380
+
381
+ if not contents:
382
+ logger.info(f"Info: Empty folder - {path}")
383
+ return
384
+
385
+ for item in contents:
386
+ item_name = item['name']
387
+ item_path = os.path.join(local_base_path, item_name)
388
+
389
+ if item['type'] == 'file':
390
+ self.download_file(item['download_url'], item_path)
391
+
392
+ elif item['type'] == 'dir':
393
+ os.makedirs(item_path, exist_ok=True)
394
+ logger.info(f"Created directory: {item_path}")
395
+
396
+ new_path = f"{path}/{item_name}" if path else item_name
397
+ self.process_folder(owner, repo, new_path, item_path, branch)
398
+
399
+ except requests.exceptions.RequestException as e:
400
+ if "Folder not found" in str(e):
401
+ logger.error(f"Error: Folder not found - {path}")
402
+ raise
403
+ else:
404
+ logger.error(f"Error accessing folder {path}: {e}")
405
+ raise
406
+ except Exception as e:
407
+ logger.error(f"Unexpected error processing folder {path}: {e}")
408
+ raise
409
+
410
+ def validate_folder_structure(
411
+ self, folder_path: str, expected_structure: List[Dict[str, Any]]
412
+ ) -> Dict[str, Any]:
413
+ validation_result = {
414
+ 'valid': True,
415
+ 'missing_files': [],
416
+ 'missing_dirs': [],
417
+ 'warnings': [],
418
+ }
419
+
420
+ if not os.path.exists(folder_path):
421
+ validation_result['valid'] = False
422
+ validation_result['warnings'].append(f"Folder {folder_path} does not exist")
423
+ return validation_result
424
+
425
+ for item in expected_structure:
426
+ item_name = item['name']
427
+ item_type = item.get('type', 'file')
428
+ item_path = os.path.join(folder_path, item_name)
429
+
430
+ if item_type == 'file':
431
+ if not os.path.isfile(item_path):
432
+ validation_result['missing_files'].append(item_name)
433
+ validation_result['valid'] = False
434
+ elif item_type == 'dir':
435
+ if not os.path.isdir(item_path):
436
+ validation_result['missing_dirs'].append(item_name)
437
+ validation_result['valid'] = False
438
+
439
+ return validation_result
440
+
441
+ def download_github_folder(
442
+ self,
443
+ url: str,
444
+ output_dir: str,
445
+ github_token: str = None,
446
+ validate_structure: bool = False,
447
+ pre_validate: bool = True,
448
+ strict_validation: bool = False,
449
+ ) -> None:
450
+ logger.info(f"Parsing GitHub URL: {url}")
451
+
452
+ # Update token if provided as a parameter
453
+ if github_token:
454
+ self.github_token = github_token
455
+ self.session.headers.update({'Authorization': f'token {github_token}'})
456
+
457
+ try:
458
+ owner, repo, branch, folder_path = self.parse_github_url(url)
459
+ logger.info(f"Repository: {owner}/{repo}")
460
+ logger.info(f"Branch: {branch}")
461
+ logger.info(f"Folder: {folder_path or 'root'}")
462
+
463
+ expected_structure = self.expected_folder_structure() if pre_validate else None
464
+
465
+ if expected_structure:
466
+ logger.info("\nValidating remote folder structure...")
467
+ remote_validation = self.validate_remote_structure(
468
+ owner, repo, folder_path, branch, expected_structure
469
+ )
470
+
471
+ if not remote_validation['valid']:
472
+ logger.error("Remote structure validation failed!")
473
+
474
+ if remote_validation['missing_files']:
475
+ logger.error(
476
+ f"Missing files: {', '.join(remote_validation['missing_files'])}"
477
+ )
478
+
479
+ if remote_validation['missing_dirs']:
480
+ logger.error(
481
+ f"Missing directories: {', '.join(remote_validation['missing_dirs'])}"
482
+ )
483
+
484
+ if remote_validation['warnings']:
485
+ for warning in remote_validation['warnings']:
486
+ logger.error(f"Warning: {warning}")
487
+
488
+ # Print the expected structure in a nice format
489
+ tree_view = self._format_expected_structure()
490
+ logger.info("\nThe repository must have the following structure:")
491
+ logger.info(tree_view)
492
+
493
+ logger.error(
494
+ "Download cancelled: Repository structure does not match the expected format."
495
+ )
496
+ sys.exit(1) # Exit without proceeding with download
497
+ else:
498
+ logger.info("Remote structure validation passed!")
499
+
500
+ os.makedirs(output_dir, exist_ok=True)
501
+ logger.info(f"Created output directory: {output_dir}")
502
+
503
+ logger.info("\nStarting download...")
504
+ start_time = time.time()
505
+ try:
506
+ self.process_folder(owner, repo, folder_path, output_dir, branch)
507
+
508
+ elapsed_time = time.time() - start_time
509
+ logger.info(f"\nDownload completed in {elapsed_time:.2f} seconds")
510
+ logger.info(f"Files saved to: {os.path.abspath(output_dir)}")
511
+
512
+ if validate_structure and expected_structure:
513
+ logger.info("\nValidating downloaded folder structure...")
514
+ validation_result = self.validate_folder_structure(
515
+ output_dir, expected_structure
516
+ )
517
+
518
+ if validation_result['valid']:
519
+ logger.info("Folder structure post validation passed!")
520
+ else:
521
+ logger.error("Folder structure validation failed!")
522
+
523
+ if validation_result['missing_files']:
524
+ logger.info(
525
+ f"Missing files: {', '.join(validation_result['missing_files'])}"
526
+ )
527
+
528
+ if validation_result['missing_dirs']:
529
+ logger.info(
530
+ f"Missing directories: {', '.join(validation_result['missing_dirs'])}"
531
+ )
532
+
533
+ if validation_result['warnings']:
534
+ for warning in validation_result['warnings']:
535
+ logger.info(f"Warng: {warning}")
536
+ except requests.RequestException as e:
537
+ # Critical error - the main folder cannot be processed
538
+ logger.error(
539
+ f"\nDownload failed: {e}, \n No files were downloaded to: {os.path.abspath(output_dir)}"
540
+ )
541
+ sys.exit(1)
542
+
543
+ except ValueError as e:
544
+ logger.error(f"Error: {e}")
545
+ sys.exit(1)
546
+ except Exception as e:
547
+ logger.error(f"Unexpected error: {e}")
548
+ sys.exit(1)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: clarifai
3
- Version: 11.6.4
3
+ Version: 11.6.4rc1
4
4
  Home-page: https://github.com/Clarifai/clarifai-python
5
5
  Author: Clarifai
6
6
  Author-email: support@clarifai.com