clarifai 11.6.3__py3-none-any.whl → 11.6.4rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. clarifai/__init__.py +1 -1
  2. clarifai/__pycache__/__init__.cpython-311.pyc +0 -0
  3. clarifai/__pycache__/__init__.cpython-39.pyc +0 -0
  4. clarifai/__pycache__/errors.cpython-311.pyc +0 -0
  5. clarifai/__pycache__/errors.cpython-39.pyc +0 -0
  6. clarifai/__pycache__/versions.cpython-311.pyc +0 -0
  7. clarifai/__pycache__/versions.cpython-39.pyc +0 -0
  8. clarifai/cli/__pycache__/__init__.cpython-39.pyc +0 -0
  9. clarifai/cli/__pycache__/base.cpython-39.pyc +0 -0
  10. clarifai/cli/__pycache__/compute_cluster.cpython-39.pyc +0 -0
  11. clarifai/cli/__pycache__/deployment.cpython-39.pyc +0 -0
  12. clarifai/cli/__pycache__/model.cpython-39.pyc +0 -0
  13. clarifai/cli/__pycache__/nodepool.cpython-39.pyc +0 -0
  14. clarifai/cli/base.py +15 -2
  15. clarifai/cli/model.py +210 -77
  16. clarifai/cli/model_templates.py +243 -0
  17. clarifai/cli/pipeline_step_templates.py +64 -0
  18. clarifai/client/__pycache__/__init__.cpython-311.pyc +0 -0
  19. clarifai/client/__pycache__/__init__.cpython-39.pyc +0 -0
  20. clarifai/client/__pycache__/app.cpython-311.pyc +0 -0
  21. clarifai/client/__pycache__/app.cpython-39.pyc +0 -0
  22. clarifai/client/__pycache__/base.cpython-311.pyc +0 -0
  23. clarifai/client/__pycache__/base.cpython-39.pyc +0 -0
  24. clarifai/client/__pycache__/compute_cluster.cpython-311.pyc +0 -0
  25. clarifai/client/__pycache__/dataset.cpython-311.pyc +0 -0
  26. clarifai/client/__pycache__/dataset.cpython-39.pyc +0 -0
  27. clarifai/client/__pycache__/deployment.cpython-311.pyc +0 -0
  28. clarifai/client/__pycache__/deployment.cpython-39.pyc +0 -0
  29. clarifai/client/__pycache__/input.cpython-311.pyc +0 -0
  30. clarifai/client/__pycache__/input.cpython-39.pyc +0 -0
  31. clarifai/client/__pycache__/lister.cpython-311.pyc +0 -0
  32. clarifai/client/__pycache__/lister.cpython-39.pyc +0 -0
  33. clarifai/client/__pycache__/model.cpython-311.pyc +0 -0
  34. clarifai/client/__pycache__/model.cpython-39.pyc +0 -0
  35. clarifai/client/__pycache__/model_client.cpython-311.pyc +0 -0
  36. clarifai/client/__pycache__/model_client.cpython-39.pyc +0 -0
  37. clarifai/client/__pycache__/module.cpython-311.pyc +0 -0
  38. clarifai/client/__pycache__/nodepool.cpython-311.pyc +0 -0
  39. clarifai/client/__pycache__/runner.cpython-311.pyc +0 -0
  40. clarifai/client/__pycache__/search.cpython-311.pyc +0 -0
  41. clarifai/client/__pycache__/user.cpython-311.pyc +0 -0
  42. clarifai/client/__pycache__/workflow.cpython-311.pyc +0 -0
  43. clarifai/client/auth/__pycache__/__init__.cpython-311.pyc +0 -0
  44. clarifai/client/auth/__pycache__/__init__.cpython-39.pyc +0 -0
  45. clarifai/client/auth/__pycache__/helper.cpython-311.pyc +0 -0
  46. clarifai/client/auth/__pycache__/helper.cpython-39.pyc +0 -0
  47. clarifai/client/auth/__pycache__/register.cpython-311.pyc +0 -0
  48. clarifai/client/auth/__pycache__/register.cpython-39.pyc +0 -0
  49. clarifai/client/auth/__pycache__/stub.cpython-311.pyc +0 -0
  50. clarifai/client/auth/__pycache__/stub.cpython-39.pyc +0 -0
  51. clarifai/client/nodepool.py +1 -1
  52. clarifai/constants/__pycache__/base.cpython-311.pyc +0 -0
  53. clarifai/constants/__pycache__/base.cpython-39.pyc +0 -0
  54. clarifai/constants/__pycache__/dataset.cpython-311.pyc +0 -0
  55. clarifai/constants/__pycache__/dataset.cpython-39.pyc +0 -0
  56. clarifai/constants/__pycache__/input.cpython-311.pyc +0 -0
  57. clarifai/constants/__pycache__/input.cpython-39.pyc +0 -0
  58. clarifai/constants/__pycache__/model.cpython-311.pyc +0 -0
  59. clarifai/constants/__pycache__/model.cpython-39.pyc +0 -0
  60. clarifai/constants/__pycache__/search.cpython-311.pyc +0 -0
  61. clarifai/constants/__pycache__/workflow.cpython-311.pyc +0 -0
  62. clarifai/datasets/__pycache__/__init__.cpython-311.pyc +0 -0
  63. clarifai/datasets/__pycache__/__init__.cpython-39.pyc +0 -0
  64. clarifai/datasets/export/__pycache__/__init__.cpython-311.pyc +0 -0
  65. clarifai/datasets/export/__pycache__/__init__.cpython-39.pyc +0 -0
  66. clarifai/datasets/export/__pycache__/inputs_annotations.cpython-311.pyc +0 -0
  67. clarifai/datasets/export/__pycache__/inputs_annotations.cpython-39.pyc +0 -0
  68. clarifai/datasets/upload/__pycache__/__init__.cpython-311.pyc +0 -0
  69. clarifai/datasets/upload/__pycache__/__init__.cpython-39.pyc +0 -0
  70. clarifai/datasets/upload/__pycache__/base.cpython-311.pyc +0 -0
  71. clarifai/datasets/upload/__pycache__/base.cpython-39.pyc +0 -0
  72. clarifai/datasets/upload/__pycache__/features.cpython-311.pyc +0 -0
  73. clarifai/datasets/upload/__pycache__/features.cpython-39.pyc +0 -0
  74. clarifai/datasets/upload/__pycache__/image.cpython-311.pyc +0 -0
  75. clarifai/datasets/upload/__pycache__/image.cpython-39.pyc +0 -0
  76. clarifai/datasets/upload/__pycache__/multimodal.cpython-311.pyc +0 -0
  77. clarifai/datasets/upload/__pycache__/multimodal.cpython-39.pyc +0 -0
  78. clarifai/datasets/upload/__pycache__/text.cpython-311.pyc +0 -0
  79. clarifai/datasets/upload/__pycache__/text.cpython-39.pyc +0 -0
  80. clarifai/datasets/upload/__pycache__/utils.cpython-311.pyc +0 -0
  81. clarifai/datasets/upload/__pycache__/utils.cpython-39.pyc +0 -0
  82. clarifai/models/model_serving/README.md +158 -0
  83. clarifai/models/model_serving/__init__.py +14 -0
  84. clarifai/models/model_serving/cli/__init__.py +12 -0
  85. clarifai/models/model_serving/cli/_utils.py +53 -0
  86. clarifai/models/model_serving/cli/base.py +14 -0
  87. clarifai/models/model_serving/cli/build.py +79 -0
  88. clarifai/models/model_serving/cli/clarifai_clis.py +33 -0
  89. clarifai/models/model_serving/cli/create.py +171 -0
  90. clarifai/models/model_serving/cli/example_cli.py +34 -0
  91. clarifai/models/model_serving/cli/login.py +26 -0
  92. clarifai/models/model_serving/cli/upload.py +179 -0
  93. clarifai/models/model_serving/constants.py +21 -0
  94. clarifai/models/model_serving/docs/cli.md +161 -0
  95. clarifai/models/model_serving/docs/concepts.md +229 -0
  96. clarifai/models/model_serving/docs/dependencies.md +11 -0
  97. clarifai/models/model_serving/docs/inference_parameters.md +139 -0
  98. clarifai/models/model_serving/docs/model_types.md +19 -0
  99. clarifai/models/model_serving/model_config/__init__.py +16 -0
  100. clarifai/models/model_serving/model_config/base.py +369 -0
  101. clarifai/models/model_serving/model_config/config.py +312 -0
  102. clarifai/models/model_serving/model_config/inference_parameter.py +129 -0
  103. clarifai/models/model_serving/model_config/model_types_config/multimodal-embedder.yaml +25 -0
  104. clarifai/models/model_serving/model_config/model_types_config/text-classifier.yaml +19 -0
  105. clarifai/models/model_serving/model_config/model_types_config/text-embedder.yaml +20 -0
  106. clarifai/models/model_serving/model_config/model_types_config/text-to-image.yaml +19 -0
  107. clarifai/models/model_serving/model_config/model_types_config/text-to-text.yaml +19 -0
  108. clarifai/models/model_serving/model_config/model_types_config/visual-classifier.yaml +22 -0
  109. clarifai/models/model_serving/model_config/model_types_config/visual-detector.yaml +32 -0
  110. clarifai/models/model_serving/model_config/model_types_config/visual-embedder.yaml +19 -0
  111. clarifai/models/model_serving/model_config/model_types_config/visual-segmenter.yaml +19 -0
  112. clarifai/models/model_serving/model_config/output.py +133 -0
  113. clarifai/models/model_serving/model_config/triton/__init__.py +14 -0
  114. clarifai/models/model_serving/model_config/triton/serializer.py +136 -0
  115. clarifai/models/model_serving/model_config/triton/triton_config.py +182 -0
  116. clarifai/models/model_serving/model_config/triton/wrappers.py +281 -0
  117. clarifai/models/model_serving/repo_build/__init__.py +14 -0
  118. clarifai/models/model_serving/repo_build/build.py +198 -0
  119. clarifai/models/model_serving/repo_build/static_files/_requirements.txt +2 -0
  120. clarifai/models/model_serving/repo_build/static_files/base_test.py +169 -0
  121. clarifai/models/model_serving/repo_build/static_files/inference.py +26 -0
  122. clarifai/models/model_serving/repo_build/static_files/sample_clarifai_config.yaml +25 -0
  123. clarifai/models/model_serving/repo_build/static_files/test.py +40 -0
  124. clarifai/models/model_serving/repo_build/static_files/triton/model.py +75 -0
  125. clarifai/models/model_serving/utils.py +23 -0
  126. clarifai/runners/__pycache__/__init__.cpython-311.pyc +0 -0
  127. clarifai/runners/__pycache__/__init__.cpython-39.pyc +0 -0
  128. clarifai/runners/dockerfile_template/Dockerfile.template +1 -1
  129. clarifai/runners/models/__pycache__/__init__.cpython-311.pyc +0 -0
  130. clarifai/runners/models/__pycache__/__init__.cpython-39.pyc +0 -0
  131. clarifai/runners/models/__pycache__/mcp_class.cpython-311.pyc +0 -0
  132. clarifai/runners/models/__pycache__/model_builder.cpython-311.pyc +0 -0
  133. clarifai/runners/models/__pycache__/model_builder.cpython-39.pyc +0 -0
  134. clarifai/runners/models/__pycache__/model_class.cpython-311.pyc +0 -0
  135. clarifai/runners/models/__pycache__/model_runner.cpython-311.pyc +0 -0
  136. clarifai/runners/models/__pycache__/openai_class.cpython-311.pyc +0 -0
  137. clarifai/runners/models/base_typed_model.py +238 -0
  138. clarifai/runners/models/model_builder.py +1 -1
  139. clarifai/runners/models/model_upload.py +607 -0
  140. clarifai/runners/utils/__pycache__/__init__.cpython-311.pyc +0 -0
  141. clarifai/runners/utils/__pycache__/__init__.cpython-39.pyc +0 -0
  142. clarifai/runners/utils/__pycache__/code_script.cpython-311.pyc +0 -0
  143. clarifai/runners/utils/__pycache__/code_script.cpython-39.pyc +0 -0
  144. clarifai/runners/utils/__pycache__/const.cpython-311.pyc +0 -0
  145. clarifai/runners/utils/__pycache__/data_utils.cpython-311.pyc +0 -0
  146. clarifai/runners/utils/__pycache__/data_utils.cpython-39.pyc +0 -0
  147. clarifai/runners/utils/__pycache__/loader.cpython-311.pyc +0 -0
  148. clarifai/runners/utils/__pycache__/method_signatures.cpython-311.pyc +0 -0
  149. clarifai/runners/utils/__pycache__/model_utils.cpython-311.pyc +0 -0
  150. clarifai/runners/utils/__pycache__/openai_convertor.cpython-311.pyc +0 -0
  151. clarifai/runners/utils/__pycache__/serializers.cpython-311.pyc +0 -0
  152. clarifai/runners/utils/__pycache__/url_fetcher.cpython-311.pyc +0 -0
  153. clarifai/runners/utils/data_handler.py +231 -0
  154. clarifai/runners/utils/data_types/__pycache__/__init__.cpython-311.pyc +0 -0
  155. clarifai/runners/utils/data_types/__pycache__/__init__.cpython-39.pyc +0 -0
  156. clarifai/runners/utils/data_types/__pycache__/data_types.cpython-311.pyc +0 -0
  157. clarifai/runners/utils/data_types/__pycache__/data_types.cpython-39.pyc +0 -0
  158. clarifai/runners/utils/data_types.py +471 -0
  159. clarifai/runners/utils/temp.py +59 -0
  160. clarifai/schema/__pycache__/search.cpython-311.pyc +0 -0
  161. clarifai/urls/__pycache__/helper.cpython-311.pyc +0 -0
  162. clarifai/urls/__pycache__/helper.cpython-39.pyc +0 -0
  163. clarifai/utils/__pycache__/__init__.cpython-311.pyc +0 -0
  164. clarifai/utils/__pycache__/__init__.cpython-39.pyc +0 -0
  165. clarifai/utils/__pycache__/cli.cpython-39.pyc +0 -0
  166. clarifai/utils/__pycache__/config.cpython-311.pyc +0 -0
  167. clarifai/utils/__pycache__/config.cpython-39.pyc +0 -0
  168. clarifai/utils/__pycache__/constants.cpython-311.pyc +0 -0
  169. clarifai/utils/__pycache__/constants.cpython-39.pyc +0 -0
  170. clarifai/utils/__pycache__/logging.cpython-311.pyc +0 -0
  171. clarifai/utils/__pycache__/logging.cpython-39.pyc +0 -0
  172. clarifai/utils/__pycache__/misc.cpython-311.pyc +0 -0
  173. clarifai/utils/__pycache__/misc.cpython-39.pyc +0 -0
  174. clarifai/utils/__pycache__/model_train.cpython-311.pyc +0 -0
  175. clarifai/utils/__pycache__/protobuf.cpython-311.pyc +0 -0
  176. clarifai/utils/__pycache__/protobuf.cpython-39.pyc +0 -0
  177. clarifai/utils/cli.py +48 -0
  178. clarifai/utils/constants.py +18 -16
  179. clarifai/utils/misc.py +381 -1
  180. clarifai/workflows/__pycache__/__init__.cpython-311.pyc +0 -0
  181. clarifai/workflows/__pycache__/export.cpython-311.pyc +0 -0
  182. clarifai/workflows/__pycache__/utils.cpython-311.pyc +0 -0
  183. clarifai/workflows/__pycache__/validate.cpython-311.pyc +0 -0
  184. {clarifai-11.6.3.dist-info → clarifai-11.6.4rc1.dist-info}/METADATA +1 -1
  185. clarifai-11.6.4rc1.dist-info/RECORD +301 -0
  186. {clarifai-11.6.3.dist-info → clarifai-11.6.4rc1.dist-info}/WHEEL +1 -1
  187. clarifai-11.6.3.dist-info/RECORD +0 -127
  188. {clarifai-11.6.3.dist-info → clarifai-11.6.4rc1.dist-info}/entry_points.txt +0 -0
  189. {clarifai-11.6.3.dist-info → clarifai-11.6.4rc1.dist-info}/licenses/LICENSE +0 -0
  190. {clarifai-11.6.3.dist-info → clarifai-11.6.4rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,59 @@
1
+ import json
2
+ from clarifai.runners.utils import data_utils
3
+ from clarifai.runners.models.model_class import ModelClass
4
+
5
+ import inspect
6
+
7
+ def log_stack():
8
+ for frame_info in inspect.stack():
9
+ print(f"{frame_info.filename}:{frame_info.lineno} in {frame_info.function}")
10
+
11
+
12
+ default = 27
13
+
14
+ x = json.dumps(default)
15
+
16
+ print(x, type(x))
17
+
18
+ x = json.loads(x)
19
+
20
+ print(x, type(x))
21
+
22
+ param = data_utils.Param(default)
23
+
24
+ print(param, type(param))
25
+
26
+ print(param.default)
27
+
28
+ param_json = json.dumps(param)
29
+
30
+ print(param_json, type(param_json))
31
+
32
+ param_json = json.loads(param_json)
33
+
34
+ print(param_json, type(param_json))
35
+
36
+ # param_json = json.loads(param_json)
37
+
38
+ # print(param_json, type(param_json))
39
+
40
+ def test_int(default: int = data_utils.Param(default=27)):
41
+ print(default, type(default))
42
+
43
+ test_int()
44
+
45
+
46
+
47
+
48
+
49
+ class Test(ModelClass):
50
+
51
+ @ModelClass.method
52
+ def test(self, max_tokens: int = data_utils.Param(default=27)) -> int:
53
+ print(max_tokens, type(max_tokens))
54
+ return max_tokens
55
+
56
+ test = Test()
57
+
58
+ test.test()
59
+
clarifai/utils/cli.py CHANGED
@@ -10,6 +10,8 @@ import click
10
10
  import yaml
11
11
  from tabulate import tabulate
12
12
 
13
+ from clarifai.utils.logging import logger
14
+
13
15
 
14
16
  def from_yaml(filename: str):
15
17
  try:
@@ -172,3 +174,49 @@ def validate_context(ctx):
172
174
  if ctx.obj == {}:
173
175
  logger.error("CLI config file missing. Run `clarifai login` to set up the CLI config.")
174
176
  sys.exit(1)
177
+
178
+
179
+ def validate_context_auth(pat: str, user_id: str, api_base: str = None):
180
+ """
181
+ Validate a Personal Access Token (PAT) by making a test API call.
182
+
183
+ Args:
184
+ pat (str): The Personal Access Token to validate
185
+ user_id (str): The user ID associated with the token
186
+ api_base (str): The API base URL. Defaults to None (uses default).
187
+ """
188
+ try:
189
+ from clarifai_grpc.grpc.api.status import status_code_pb2
190
+
191
+ from clarifai.client.user import User
192
+
193
+ logger.info("Validating the Context Credentials...")
194
+
195
+ # Create user client for validation
196
+ if api_base:
197
+ user_client = User(user_id=user_id, pat=pat, base_url=api_base)
198
+ else:
199
+ user_client = User(user_id=user_id, pat=pat)
200
+
201
+ # Try to get user info as a test API call
202
+ response = user_client.get_user_info()
203
+
204
+ if response.status.code == status_code_pb2.SUCCESS:
205
+ logger.info("✅ Context is valid")
206
+
207
+ except Exception as e:
208
+ error_msg = str(e)
209
+
210
+ # Check for common authentication errors and provide user-friendly messages
211
+ if "PERMISSION_DENIED" in error_msg or "Unauthorized" in error_msg:
212
+ logger.error(f"Invalid PAT token or incorrect user ID '{user_id}': {error_msg}")
213
+ elif "UNAUTHENTICATED" in error_msg:
214
+ logger.error(f"Invalid PAT token or user ID: {error_msg}")
215
+ elif "SSL" in error_msg or "certificate" in error_msg:
216
+ logger.error(f"SSL/Certificate error: {error_msg}")
217
+ elif "Connection" in error_msg or "timeout" in error_msg:
218
+ logger.error(f"Network connection error: {error_msg}")
219
+ else:
220
+ logger.error(f"❌ Validation failed: \n{error_msg}")
221
+ logger.error("Please check your credentials and try again.")
222
+ raise click.Abort() # Exit without saving the configuration
@@ -14,20 +14,20 @@ CLARIFAI_USER_ID_ENV_VAR = "CLARIFAI_USER_ID"
14
14
  HOME_PATH = Path.home()
15
15
  DEFAULT_CONFIG = HOME_PATH / '.config/clarifai/config'
16
16
 
17
- # Default clusters, etc. for local dev runner easy setup
18
- DEFAULT_LOCAL_DEV_COMPUTE_CLUSTER_ID = "local-dev-compute-cluster"
19
- DEFAULT_LOCAL_DEV_NODEPOOL_ID = "local-dev-nodepool"
20
- DEFAULT_LOCAL_DEV_DEPLOYMENT_ID = "local-dev-deployment"
21
- DEFAULT_LOCAL_DEV_MODEL_ID = "local-dev-model"
22
- DEFAULT_LOCAL_DEV_APP_ID = "local-dev-runner-app"
17
+ # Default clusters, etc. for local runner easy setup
18
+ DEFAULT_LOCAL_RUNNER_COMPUTE_CLUSTER_ID = "local-runner-compute-cluster"
19
+ DEFAULT_LOCAL_RUNNER_NODEPOOL_ID = "local-runner-nodepool"
20
+ DEFAULT_LOCAL_RUNNER_DEPLOYMENT_ID = "local-runner-deployment"
21
+ DEFAULT_LOCAL_RUNNER_MODEL_ID = "local-runner-model"
22
+ DEFAULT_LOCAL_RUNNER_APP_ID = "local-runner-app"
23
23
 
24
24
  # FIXME: should have any-to-any for these cases.
25
- DEFAULT_LOCAL_DEV_MODEL_TYPE = "text-to-text"
25
+ DEFAULT_LOCAL_RUNNER_MODEL_TYPE = "text-to-text"
26
26
 
27
- DEFAULT_LOCAL_DEV_COMPUTE_CLUSTER_CONFIG = {
27
+ DEFAULT_LOCAL_RUNNER_COMPUTE_CLUSTER_CONFIG = {
28
28
  "compute_cluster": {
29
- "id": DEFAULT_LOCAL_DEV_COMPUTE_CLUSTER_ID,
30
- "description": "Default Local Dev Compute Cluster",
29
+ "id": DEFAULT_LOCAL_RUNNER_COMPUTE_CLUSTER_ID,
30
+ "description": "Default Local Runner Compute Cluster",
31
31
  "cloud_provider": {
32
32
  "id": "local",
33
33
  },
@@ -37,12 +37,12 @@ DEFAULT_LOCAL_DEV_COMPUTE_CLUSTER_CONFIG = {
37
37
  }
38
38
  }
39
39
 
40
- DEFAULT_LOCAL_DEV_NODEPOOL_CONFIG = {
40
+ DEFAULT_LOCAL_RUNNER_NODEPOOL_CONFIG = {
41
41
  "nodepool": {
42
- "id": DEFAULT_LOCAL_DEV_NODEPOOL_ID,
43
- "description": "Default Local Dev Nodepool",
42
+ "id": DEFAULT_LOCAL_RUNNER_NODEPOOL_ID,
43
+ "description": "Default Local Runner Nodepool",
44
44
  "compute_cluster": {
45
- "id": DEFAULT_LOCAL_DEV_COMPUTE_CLUSTER_ID,
45
+ "id": DEFAULT_LOCAL_RUNNER_COMPUTE_CLUSTER_ID,
46
46
  "user_id": None, # This will be set when creating the compute cluster
47
47
  },
48
48
  "instance_types": [
@@ -50,8 +50,8 @@ DEFAULT_LOCAL_DEV_NODEPOOL_CONFIG = {
50
50
  "id": "local",
51
51
  "compute_info": {
52
52
  "cpu_limit": str(os.cpu_count()),
53
- "cpu_memory": "16Gi", # made up as we don't schedule based on this for local dev.
54
- "num_accelerators": 0, # TODO if we need accelerator detection for local dev.
53
+ "cpu_memory": "16Gi", # made up as we don't schedule based on this for local runner.
54
+ "num_accelerators": 0, # TODO if we need accelerator detection for local runner.
55
55
  },
56
56
  }
57
57
  ],
@@ -62,3 +62,5 @@ DEFAULT_LOCAL_DEV_NODEPOOL_CONFIG = {
62
62
  "max_instances": 1,
63
63
  }
64
64
  }
65
+ DEFAULT_OLLAMA_MODEL_REPO = "https://github.com/Clarifai/runners-examples"
66
+ DEFAULT_OLLAMA_MODEL_REPO_BRANCH = "ollama"
clarifai/utils/misc.py CHANGED
@@ -2,11 +2,17 @@ import os
2
2
  import re
3
3
  import shutil
4
4
  import subprocess
5
+ import sys
6
+ import time
5
7
  import urllib.parse
6
8
  import uuid
7
- from typing import Any, Dict, List
9
+ from typing import Any, Dict, List, Tuple
10
+ from urllib.parse import urlparse
8
11
 
12
+ import requests
9
13
  from clarifai_grpc.grpc.api.status import status_code_pb2
14
+ from requests.adapters import HTTPAdapter
15
+ from urllib3.util.retry import Retry
10
16
 
11
17
  from clarifai.errors import UserError
12
18
  from clarifai.utils.constants import HOME_PATH
@@ -166,3 +172,377 @@ def clone_github_repo(repo_url, target_dir, github_pat=None, branch=None):
166
172
  except subprocess.CalledProcessError as e:
167
173
  logger.error(f"Failed to clone repository: {e.stderr}")
168
174
  return False
175
+
176
+
177
+ class GitHubDownloader:
178
+ def __init__(
179
+ self, max_retries: int = 3, backoff_factor: float = 0.3, github_token: str = None
180
+ ):
181
+ self.session = requests.Session()
182
+ self.github_token = github_token
183
+
184
+ retry_strategy = Retry(
185
+ total=max_retries,
186
+ backoff_factor=backoff_factor,
187
+ status_forcelist=[429, 500, 502, 503, 504],
188
+ allowed_methods=["HEAD", "GET", "OPTIONS"],
189
+ )
190
+
191
+ adapter = HTTPAdapter(max_retries=retry_strategy)
192
+ self.session.mount("http://", adapter)
193
+ self.session.mount("https://", adapter)
194
+
195
+ self.session.headers.update({'User-Agent': 'GitHub-Folder-Downloader/1.0'})
196
+
197
+ if self.github_token:
198
+ self.session.headers.update({'Authorization': f'token {self.github_token}'})
199
+
200
+ def expected_folder_structure(self) -> List[Dict[str, Any]]:
201
+ return [
202
+ {"name": "1", "type": "dir", "children": [{"name": "model.py", "type": "file"}]},
203
+ {"name": "config.yaml", "type": "file"},
204
+ {"name": "requirements.txt", "type": "file"},
205
+ ]
206
+
207
+ def _format_expected_structure(self):
208
+ """Format the expected structure as a nice tree view."""
209
+ tree_str = ""
210
+ tree_str += "Expected folder structure:\n"
211
+ tree_str += "├── 1/\n"
212
+ tree_str += "│ └── model.py\n"
213
+ tree_str += "├── requirements.txt\n"
214
+ tree_str += "└── config.yaml\n"
215
+ return tree_str
216
+
217
+ def parse_github_url(self, url: str) -> Tuple[str, str, str, str]:
218
+ try:
219
+ parsed = urlparse(url)
220
+
221
+ if parsed.netloc not in ['github.com', 'www.github.com']:
222
+ raise ValueError("URL must be a GitHub repository URL")
223
+
224
+ path_parts = [p for p in parsed.path.strip('/').split('/') if p]
225
+
226
+ if len(path_parts) < 2:
227
+ raise ValueError("Invalid GitHub repository URL format")
228
+
229
+ owner = path_parts[0]
230
+ repo = path_parts[1]
231
+
232
+ if len(path_parts) >= 4 and path_parts[2] in ['tree', 'blob']:
233
+ branch = path_parts[3]
234
+ folder_path = '/'.join(path_parts[4:]) if len(path_parts) > 4 else ''
235
+ elif len(path_parts) >= 3:
236
+ branch = path_parts[2]
237
+ folder_path = '/'.join(path_parts[3:]) if len(path_parts) > 3 else ''
238
+ else:
239
+ branch = 'main'
240
+ folder_path = ''
241
+
242
+ return owner, repo, branch, folder_path
243
+
244
+ except Exception as e:
245
+ raise ValueError(f"Failed to parse GitHub URL: {e}")
246
+
247
+ def get_folder_contents(self, owner: str, repo: str, path: str, branch: str = 'main') -> list:
248
+ api_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}"
249
+ params = {'ref': branch} if branch else {}
250
+
251
+ try:
252
+ response = self.session.get(api_url, params=params, timeout=30)
253
+ response.raise_for_status()
254
+ return response.json()
255
+ except requests.exceptions.Timeout:
256
+ raise requests.RequestException("Request timed out. Please try again.")
257
+ except requests.exceptions.ConnectionError:
258
+ raise requests.RequestException(
259
+ "Connection error. Please check your internet connection."
260
+ )
261
+ except requests.exceptions.HTTPError as e:
262
+ if e.response.status_code == 404:
263
+ token_msg = (
264
+ ""
265
+ if self.github_token
266
+ else " For private repositories, use the github_token parameter."
267
+ )
268
+ raise requests.RequestException(
269
+ f"Folder not found: {path}. Check if path exists or if the repository is private.{token_msg}"
270
+ )
271
+ elif e.response.status_code == 401 or e.response.status_code == 403:
272
+ token_msg = (
273
+ " The provided GitHub token may be invalid or have insufficient permissions."
274
+ if self.github_token
275
+ else " For private repositories, use the github_token parameter."
276
+ )
277
+ raise requests.RequestException(f"Authentication error: {e}.{token_msg}")
278
+ else:
279
+ raise requests.RequestException(f"API request failed: {e}")
280
+ except requests.exceptions.RequestException as e:
281
+ token_msg = (
282
+ ""
283
+ if self.github_token
284
+ else " For private repositories, use the github_token parameter."
285
+ )
286
+ raise requests.RequestException(f"API request failed: {e}.{token_msg}")
287
+
288
+ def validate_remote_structure(
289
+ self,
290
+ owner: str,
291
+ repo: str,
292
+ path: str,
293
+ branch: str,
294
+ expected_structure: List[Dict[str, Any]],
295
+ ) -> Dict[str, Any]:
296
+ validation_result = {
297
+ 'valid': True,
298
+ 'missing_files': [],
299
+ 'missing_dirs': [],
300
+ 'warnings': [],
301
+ 'remote_contents': [],
302
+ }
303
+
304
+ try:
305
+ remote_contents = self.get_folder_contents(owner, repo, path, branch)
306
+ validation_result['remote_contents'] = remote_contents
307
+
308
+ remote_items = {item['name']: item['type'] for item in remote_contents}
309
+
310
+ for item in expected_structure:
311
+ item_name = item['name']
312
+ item_type = item.get('type', 'file')
313
+
314
+ if item_name not in remote_items:
315
+ if item_type == 'file':
316
+ validation_result['missing_files'].append(item_name)
317
+ else:
318
+ validation_result['missing_dirs'].append(item_name)
319
+ validation_result['valid'] = False
320
+ elif remote_items[item_name] != item_type:
321
+ validation_result['warnings'].append(
322
+ f"Item '{item_name}' exists but is a {remote_items[item_name]} instead of {item_type}"
323
+ )
324
+ validation_result['valid'] = False
325
+
326
+ expected_names = {item['name'] for item in expected_structure}
327
+ unexpected_items = [name for name in remote_items.keys() if name not in expected_names]
328
+ if unexpected_items:
329
+ validation_result['warnings'].append(
330
+ f"Unexpected items found: {', '.join(unexpected_items)}"
331
+ )
332
+
333
+ except requests.RequestException as e:
334
+ validation_result['valid'] = False
335
+ validation_result['warnings'].append(f"Failed to access remote repository: {e}")
336
+
337
+ return validation_result
338
+
339
+ def download_file(self, download_url: str, local_path: str) -> None:
340
+ try:
341
+ response = self.session.get(download_url, stream=True, timeout=60)
342
+ response.raise_for_status()
343
+
344
+ os.makedirs(os.path.dirname(local_path), exist_ok=True)
345
+
346
+ total_size = int(response.headers.get('content-length', 0))
347
+ downloaded_size = 0
348
+
349
+ with open(local_path, 'wb') as f:
350
+ for chunk in response.iter_content(chunk_size=8192):
351
+ if chunk:
352
+ f.write(chunk)
353
+ downloaded_size += len(chunk)
354
+
355
+ if total_size > 0 and total_size > 1024 * 1024:
356
+ progress = (downloaded_size / total_size) * 100
357
+ logger.info(
358
+ f"\rDownloading: {os.path.basename(local_path)} - {progress:.1f}%",
359
+ end='',
360
+ flush=True,
361
+ )
362
+
363
+ if total_size > 1024 * 1024:
364
+ logger.info()
365
+
366
+ logger.info(f"Downloaded: {local_path}")
367
+
368
+ except requests.exceptions.Timeout:
369
+ logger.info(f"Timeout downloading {local_path}. Skipping...")
370
+ except requests.exceptions.ConnectionError:
371
+ logger.info(f"Connection error downloading {local_path}. Skipping...")
372
+ except Exception as e:
373
+ logger.info(f"Failed to download {local_path}: {e}")
374
+
375
+ def process_folder(
376
+ self, owner: str, repo: str, path: str, local_base_path: str, branch: str = 'main'
377
+ ) -> None:
378
+ try:
379
+ contents = self.get_folder_contents(owner, repo, path, branch)
380
+
381
+ if not contents:
382
+ logger.info(f"Info: Empty folder - {path}")
383
+ return
384
+
385
+ for item in contents:
386
+ item_name = item['name']
387
+ item_path = os.path.join(local_base_path, item_name)
388
+
389
+ if item['type'] == 'file':
390
+ self.download_file(item['download_url'], item_path)
391
+
392
+ elif item['type'] == 'dir':
393
+ os.makedirs(item_path, exist_ok=True)
394
+ logger.info(f"Created directory: {item_path}")
395
+
396
+ new_path = f"{path}/{item_name}" if path else item_name
397
+ self.process_folder(owner, repo, new_path, item_path, branch)
398
+
399
+ except requests.exceptions.RequestException as e:
400
+ if "Folder not found" in str(e):
401
+ logger.error(f"Error: Folder not found - {path}")
402
+ raise
403
+ else:
404
+ logger.error(f"Error accessing folder {path}: {e}")
405
+ raise
406
+ except Exception as e:
407
+ logger.error(f"Unexpected error processing folder {path}: {e}")
408
+ raise
409
+
410
+ def validate_folder_structure(
411
+ self, folder_path: str, expected_structure: List[Dict[str, Any]]
412
+ ) -> Dict[str, Any]:
413
+ validation_result = {
414
+ 'valid': True,
415
+ 'missing_files': [],
416
+ 'missing_dirs': [],
417
+ 'warnings': [],
418
+ }
419
+
420
+ if not os.path.exists(folder_path):
421
+ validation_result['valid'] = False
422
+ validation_result['warnings'].append(f"Folder {folder_path} does not exist")
423
+ return validation_result
424
+
425
+ for item in expected_structure:
426
+ item_name = item['name']
427
+ item_type = item.get('type', 'file')
428
+ item_path = os.path.join(folder_path, item_name)
429
+
430
+ if item_type == 'file':
431
+ if not os.path.isfile(item_path):
432
+ validation_result['missing_files'].append(item_name)
433
+ validation_result['valid'] = False
434
+ elif item_type == 'dir':
435
+ if not os.path.isdir(item_path):
436
+ validation_result['missing_dirs'].append(item_name)
437
+ validation_result['valid'] = False
438
+
439
+ return validation_result
440
+
441
+ def download_github_folder(
442
+ self,
443
+ url: str,
444
+ output_dir: str,
445
+ github_token: str = None,
446
+ validate_structure: bool = False,
447
+ pre_validate: bool = True,
448
+ strict_validation: bool = False,
449
+ ) -> None:
450
+ logger.info(f"Parsing GitHub URL: {url}")
451
+
452
+ # Update token if provided as a parameter
453
+ if github_token:
454
+ self.github_token = github_token
455
+ self.session.headers.update({'Authorization': f'token {github_token}'})
456
+
457
+ try:
458
+ owner, repo, branch, folder_path = self.parse_github_url(url)
459
+ logger.info(f"Repository: {owner}/{repo}")
460
+ logger.info(f"Branch: {branch}")
461
+ logger.info(f"Folder: {folder_path or 'root'}")
462
+
463
+ expected_structure = self.expected_folder_structure() if pre_validate else None
464
+
465
+ if expected_structure:
466
+ logger.info("\nValidating remote folder structure...")
467
+ remote_validation = self.validate_remote_structure(
468
+ owner, repo, folder_path, branch, expected_structure
469
+ )
470
+
471
+ if not remote_validation['valid']:
472
+ logger.error("Remote structure validation failed!")
473
+
474
+ if remote_validation['missing_files']:
475
+ logger.error(
476
+ f"Missing files: {', '.join(remote_validation['missing_files'])}"
477
+ )
478
+
479
+ if remote_validation['missing_dirs']:
480
+ logger.error(
481
+ f"Missing directories: {', '.join(remote_validation['missing_dirs'])}"
482
+ )
483
+
484
+ if remote_validation['warnings']:
485
+ for warning in remote_validation['warnings']:
486
+ logger.error(f"Warning: {warning}")
487
+
488
+ # Print the expected structure in a nice format
489
+ tree_view = self._format_expected_structure()
490
+ logger.info("\nThe repository must have the following structure:")
491
+ logger.info(tree_view)
492
+
493
+ logger.error(
494
+ "Download cancelled: Repository structure does not match the expected format."
495
+ )
496
+ sys.exit(1) # Exit without proceeding with download
497
+ else:
498
+ logger.info("Remote structure validation passed!")
499
+
500
+ os.makedirs(output_dir, exist_ok=True)
501
+ logger.info(f"Created output directory: {output_dir}")
502
+
503
+ logger.info("\nStarting download...")
504
+ start_time = time.time()
505
+ try:
506
+ self.process_folder(owner, repo, folder_path, output_dir, branch)
507
+
508
+ elapsed_time = time.time() - start_time
509
+ logger.info(f"\nDownload completed in {elapsed_time:.2f} seconds")
510
+ logger.info(f"Files saved to: {os.path.abspath(output_dir)}")
511
+
512
+ if validate_structure and expected_structure:
513
+ logger.info("\nValidating downloaded folder structure...")
514
+ validation_result = self.validate_folder_structure(
515
+ output_dir, expected_structure
516
+ )
517
+
518
+ if validation_result['valid']:
519
+ logger.info("Folder structure post validation passed!")
520
+ else:
521
+ logger.error("Folder structure validation failed!")
522
+
523
+ if validation_result['missing_files']:
524
+ logger.info(
525
+ f"Missing files: {', '.join(validation_result['missing_files'])}"
526
+ )
527
+
528
+ if validation_result['missing_dirs']:
529
+ logger.info(
530
+ f"Missing directories: {', '.join(validation_result['missing_dirs'])}"
531
+ )
532
+
533
+ if validation_result['warnings']:
534
+ for warning in validation_result['warnings']:
535
+ logger.info(f"Warng: {warning}")
536
+ except requests.RequestException as e:
537
+ # Critical error - the main folder cannot be processed
538
+ logger.error(
539
+ f"\nDownload failed: {e}, \n No files were downloaded to: {os.path.abspath(output_dir)}"
540
+ )
541
+ sys.exit(1)
542
+
543
+ except ValueError as e:
544
+ logger.error(f"Error: {e}")
545
+ sys.exit(1)
546
+ except Exception as e:
547
+ logger.error(f"Unexpected error: {e}")
548
+ sys.exit(1)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: clarifai
3
- Version: 11.6.3
3
+ Version: 11.6.4rc1
4
4
  Home-page: https://github.com/Clarifai/clarifai-python
5
5
  Author: Clarifai
6
6
  Author-email: support@clarifai.com