clarifai 11.6.3__py3-none-any.whl → 11.6.4rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clarifai/__init__.py +1 -1
- clarifai/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/__pycache__/errors.cpython-311.pyc +0 -0
- clarifai/__pycache__/errors.cpython-39.pyc +0 -0
- clarifai/__pycache__/versions.cpython-311.pyc +0 -0
- clarifai/__pycache__/versions.cpython-39.pyc +0 -0
- clarifai/cli/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/cli/__pycache__/base.cpython-39.pyc +0 -0
- clarifai/cli/__pycache__/compute_cluster.cpython-39.pyc +0 -0
- clarifai/cli/__pycache__/deployment.cpython-39.pyc +0 -0
- clarifai/cli/__pycache__/model.cpython-39.pyc +0 -0
- clarifai/cli/__pycache__/nodepool.cpython-39.pyc +0 -0
- clarifai/cli/base.py +15 -2
- clarifai/cli/model.py +210 -77
- clarifai/cli/model_templates.py +243 -0
- clarifai/cli/pipeline_step_templates.py +64 -0
- clarifai/client/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/app.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/app.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/base.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/base.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/compute_cluster.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/dataset.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/dataset.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/deployment.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/deployment.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/input.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/input.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/lister.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/lister.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/model.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/model.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/model_client.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/model_client.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/module.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/nodepool.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/runner.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/search.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/user.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/workflow.cpython-311.pyc +0 -0
- clarifai/client/auth/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/client/auth/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/client/auth/__pycache__/helper.cpython-311.pyc +0 -0
- clarifai/client/auth/__pycache__/helper.cpython-39.pyc +0 -0
- clarifai/client/auth/__pycache__/register.cpython-311.pyc +0 -0
- clarifai/client/auth/__pycache__/register.cpython-39.pyc +0 -0
- clarifai/client/auth/__pycache__/stub.cpython-311.pyc +0 -0
- clarifai/client/auth/__pycache__/stub.cpython-39.pyc +0 -0
- clarifai/client/nodepool.py +1 -1
- clarifai/constants/__pycache__/base.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/base.cpython-39.pyc +0 -0
- clarifai/constants/__pycache__/dataset.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/dataset.cpython-39.pyc +0 -0
- clarifai/constants/__pycache__/input.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/input.cpython-39.pyc +0 -0
- clarifai/constants/__pycache__/model.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/model.cpython-39.pyc +0 -0
- clarifai/constants/__pycache__/search.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/workflow.cpython-311.pyc +0 -0
- clarifai/datasets/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/datasets/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/datasets/export/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/datasets/export/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/datasets/export/__pycache__/inputs_annotations.cpython-311.pyc +0 -0
- clarifai/datasets/export/__pycache__/inputs_annotations.cpython-39.pyc +0 -0
- clarifai/datasets/upload/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/datasets/upload/__pycache__/base.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/base.cpython-39.pyc +0 -0
- clarifai/datasets/upload/__pycache__/features.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/features.cpython-39.pyc +0 -0
- clarifai/datasets/upload/__pycache__/image.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/image.cpython-39.pyc +0 -0
- clarifai/datasets/upload/__pycache__/multimodal.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/multimodal.cpython-39.pyc +0 -0
- clarifai/datasets/upload/__pycache__/text.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/text.cpython-39.pyc +0 -0
- clarifai/datasets/upload/__pycache__/utils.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/utils.cpython-39.pyc +0 -0
- clarifai/models/model_serving/README.md +158 -0
- clarifai/models/model_serving/__init__.py +14 -0
- clarifai/models/model_serving/cli/__init__.py +12 -0
- clarifai/models/model_serving/cli/_utils.py +53 -0
- clarifai/models/model_serving/cli/base.py +14 -0
- clarifai/models/model_serving/cli/build.py +79 -0
- clarifai/models/model_serving/cli/clarifai_clis.py +33 -0
- clarifai/models/model_serving/cli/create.py +171 -0
- clarifai/models/model_serving/cli/example_cli.py +34 -0
- clarifai/models/model_serving/cli/login.py +26 -0
- clarifai/models/model_serving/cli/upload.py +179 -0
- clarifai/models/model_serving/constants.py +21 -0
- clarifai/models/model_serving/docs/cli.md +161 -0
- clarifai/models/model_serving/docs/concepts.md +229 -0
- clarifai/models/model_serving/docs/dependencies.md +11 -0
- clarifai/models/model_serving/docs/inference_parameters.md +139 -0
- clarifai/models/model_serving/docs/model_types.md +19 -0
- clarifai/models/model_serving/model_config/__init__.py +16 -0
- clarifai/models/model_serving/model_config/base.py +369 -0
- clarifai/models/model_serving/model_config/config.py +312 -0
- clarifai/models/model_serving/model_config/inference_parameter.py +129 -0
- clarifai/models/model_serving/model_config/model_types_config/multimodal-embedder.yaml +25 -0
- clarifai/models/model_serving/model_config/model_types_config/text-classifier.yaml +19 -0
- clarifai/models/model_serving/model_config/model_types_config/text-embedder.yaml +20 -0
- clarifai/models/model_serving/model_config/model_types_config/text-to-image.yaml +19 -0
- clarifai/models/model_serving/model_config/model_types_config/text-to-text.yaml +19 -0
- clarifai/models/model_serving/model_config/model_types_config/visual-classifier.yaml +22 -0
- clarifai/models/model_serving/model_config/model_types_config/visual-detector.yaml +32 -0
- clarifai/models/model_serving/model_config/model_types_config/visual-embedder.yaml +19 -0
- clarifai/models/model_serving/model_config/model_types_config/visual-segmenter.yaml +19 -0
- clarifai/models/model_serving/model_config/output.py +133 -0
- clarifai/models/model_serving/model_config/triton/__init__.py +14 -0
- clarifai/models/model_serving/model_config/triton/serializer.py +136 -0
- clarifai/models/model_serving/model_config/triton/triton_config.py +182 -0
- clarifai/models/model_serving/model_config/triton/wrappers.py +281 -0
- clarifai/models/model_serving/repo_build/__init__.py +14 -0
- clarifai/models/model_serving/repo_build/build.py +198 -0
- clarifai/models/model_serving/repo_build/static_files/_requirements.txt +2 -0
- clarifai/models/model_serving/repo_build/static_files/base_test.py +169 -0
- clarifai/models/model_serving/repo_build/static_files/inference.py +26 -0
- clarifai/models/model_serving/repo_build/static_files/sample_clarifai_config.yaml +25 -0
- clarifai/models/model_serving/repo_build/static_files/test.py +40 -0
- clarifai/models/model_serving/repo_build/static_files/triton/model.py +75 -0
- clarifai/models/model_serving/utils.py +23 -0
- clarifai/runners/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/runners/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/runners/dockerfile_template/Dockerfile.template +1 -1
- clarifai/runners/models/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/runners/models/__pycache__/mcp_class.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/model_builder.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/model_builder.cpython-39.pyc +0 -0
- clarifai/runners/models/__pycache__/model_class.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/model_runner.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/openai_class.cpython-311.pyc +0 -0
- clarifai/runners/models/base_typed_model.py +238 -0
- clarifai/runners/models/model_builder.py +1 -1
- clarifai/runners/models/model_upload.py +607 -0
- clarifai/runners/utils/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/code_script.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/code_script.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/const.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_utils.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_utils.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/loader.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/method_signatures.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/model_utils.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/openai_convertor.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/serializers.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/url_fetcher.cpython-311.pyc +0 -0
- clarifai/runners/utils/data_handler.py +231 -0
- clarifai/runners/utils/data_types/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/runners/utils/data_types/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/runners/utils/data_types/__pycache__/data_types.cpython-311.pyc +0 -0
- clarifai/runners/utils/data_types/__pycache__/data_types.cpython-39.pyc +0 -0
- clarifai/runners/utils/data_types.py +471 -0
- clarifai/runners/utils/temp.py +59 -0
- clarifai/schema/__pycache__/search.cpython-311.pyc +0 -0
- clarifai/urls/__pycache__/helper.cpython-311.pyc +0 -0
- clarifai/urls/__pycache__/helper.cpython-39.pyc +0 -0
- clarifai/utils/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/utils/__pycache__/cli.cpython-39.pyc +0 -0
- clarifai/utils/__pycache__/config.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/config.cpython-39.pyc +0 -0
- clarifai/utils/__pycache__/constants.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/constants.cpython-39.pyc +0 -0
- clarifai/utils/__pycache__/logging.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/logging.cpython-39.pyc +0 -0
- clarifai/utils/__pycache__/misc.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/misc.cpython-39.pyc +0 -0
- clarifai/utils/__pycache__/model_train.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/protobuf.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/protobuf.cpython-39.pyc +0 -0
- clarifai/utils/cli.py +48 -0
- clarifai/utils/constants.py +18 -16
- clarifai/utils/misc.py +381 -1
- clarifai/workflows/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/workflows/__pycache__/export.cpython-311.pyc +0 -0
- clarifai/workflows/__pycache__/utils.cpython-311.pyc +0 -0
- clarifai/workflows/__pycache__/validate.cpython-311.pyc +0 -0
- {clarifai-11.6.3.dist-info → clarifai-11.6.4rc1.dist-info}/METADATA +1 -1
- clarifai-11.6.4rc1.dist-info/RECORD +301 -0
- {clarifai-11.6.3.dist-info → clarifai-11.6.4rc1.dist-info}/WHEEL +1 -1
- clarifai-11.6.3.dist-info/RECORD +0 -127
- {clarifai-11.6.3.dist-info → clarifai-11.6.4rc1.dist-info}/entry_points.txt +0 -0
- {clarifai-11.6.3.dist-info → clarifai-11.6.4rc1.dist-info}/licenses/LICENSE +0 -0
- {clarifai-11.6.3.dist-info → clarifai-11.6.4rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,59 @@
|
|
1
|
+
import json
|
2
|
+
from clarifai.runners.utils import data_utils
|
3
|
+
from clarifai.runners.models.model_class import ModelClass
|
4
|
+
|
5
|
+
import inspect
|
6
|
+
|
7
|
+
def log_stack():
|
8
|
+
for frame_info in inspect.stack():
|
9
|
+
print(f"{frame_info.filename}:{frame_info.lineno} in {frame_info.function}")
|
10
|
+
|
11
|
+
|
12
|
+
default = 27
|
13
|
+
|
14
|
+
x = json.dumps(default)
|
15
|
+
|
16
|
+
print(x, type(x))
|
17
|
+
|
18
|
+
x = json.loads(x)
|
19
|
+
|
20
|
+
print(x, type(x))
|
21
|
+
|
22
|
+
param = data_utils.Param(default)
|
23
|
+
|
24
|
+
print(param, type(param))
|
25
|
+
|
26
|
+
print(param.default)
|
27
|
+
|
28
|
+
param_json = json.dumps(param)
|
29
|
+
|
30
|
+
print(param_json, type(param_json))
|
31
|
+
|
32
|
+
param_json = json.loads(param_json)
|
33
|
+
|
34
|
+
print(param_json, type(param_json))
|
35
|
+
|
36
|
+
# param_json = json.loads(param_json)
|
37
|
+
|
38
|
+
# print(param_json, type(param_json))
|
39
|
+
|
40
|
+
def test_int(default: int = data_utils.Param(default=27)):
|
41
|
+
print(default, type(default))
|
42
|
+
|
43
|
+
test_int()
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
class Test(ModelClass):
|
50
|
+
|
51
|
+
@ModelClass.method
|
52
|
+
def test(self, max_tokens: int = data_utils.Param(default=27)) -> int:
|
53
|
+
print(max_tokens, type(max_tokens))
|
54
|
+
return max_tokens
|
55
|
+
|
56
|
+
test = Test()
|
57
|
+
|
58
|
+
test.test()
|
59
|
+
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
clarifai/utils/cli.py
CHANGED
@@ -10,6 +10,8 @@ import click
|
|
10
10
|
import yaml
|
11
11
|
from tabulate import tabulate
|
12
12
|
|
13
|
+
from clarifai.utils.logging import logger
|
14
|
+
|
13
15
|
|
14
16
|
def from_yaml(filename: str):
|
15
17
|
try:
|
@@ -172,3 +174,49 @@ def validate_context(ctx):
|
|
172
174
|
if ctx.obj == {}:
|
173
175
|
logger.error("CLI config file missing. Run `clarifai login` to set up the CLI config.")
|
174
176
|
sys.exit(1)
|
177
|
+
|
178
|
+
|
179
|
+
def validate_context_auth(pat: str, user_id: str, api_base: str = None):
|
180
|
+
"""
|
181
|
+
Validate a Personal Access Token (PAT) by making a test API call.
|
182
|
+
|
183
|
+
Args:
|
184
|
+
pat (str): The Personal Access Token to validate
|
185
|
+
user_id (str): The user ID associated with the token
|
186
|
+
api_base (str): The API base URL. Defaults to None (uses default).
|
187
|
+
"""
|
188
|
+
try:
|
189
|
+
from clarifai_grpc.grpc.api.status import status_code_pb2
|
190
|
+
|
191
|
+
from clarifai.client.user import User
|
192
|
+
|
193
|
+
logger.info("Validating the Context Credentials...")
|
194
|
+
|
195
|
+
# Create user client for validation
|
196
|
+
if api_base:
|
197
|
+
user_client = User(user_id=user_id, pat=pat, base_url=api_base)
|
198
|
+
else:
|
199
|
+
user_client = User(user_id=user_id, pat=pat)
|
200
|
+
|
201
|
+
# Try to get user info as a test API call
|
202
|
+
response = user_client.get_user_info()
|
203
|
+
|
204
|
+
if response.status.code == status_code_pb2.SUCCESS:
|
205
|
+
logger.info("✅ Context is valid")
|
206
|
+
|
207
|
+
except Exception as e:
|
208
|
+
error_msg = str(e)
|
209
|
+
|
210
|
+
# Check for common authentication errors and provide user-friendly messages
|
211
|
+
if "PERMISSION_DENIED" in error_msg or "Unauthorized" in error_msg:
|
212
|
+
logger.error(f"Invalid PAT token or incorrect user ID '{user_id}': {error_msg}")
|
213
|
+
elif "UNAUTHENTICATED" in error_msg:
|
214
|
+
logger.error(f"Invalid PAT token or user ID: {error_msg}")
|
215
|
+
elif "SSL" in error_msg or "certificate" in error_msg:
|
216
|
+
logger.error(f"SSL/Certificate error: {error_msg}")
|
217
|
+
elif "Connection" in error_msg or "timeout" in error_msg:
|
218
|
+
logger.error(f"Network connection error: {error_msg}")
|
219
|
+
else:
|
220
|
+
logger.error(f"❌ Validation failed: \n{error_msg}")
|
221
|
+
logger.error("Please check your credentials and try again.")
|
222
|
+
raise click.Abort() # Exit without saving the configuration
|
clarifai/utils/constants.py
CHANGED
@@ -14,20 +14,20 @@ CLARIFAI_USER_ID_ENV_VAR = "CLARIFAI_USER_ID"
|
|
14
14
|
HOME_PATH = Path.home()
|
15
15
|
DEFAULT_CONFIG = HOME_PATH / '.config/clarifai/config'
|
16
16
|
|
17
|
-
# Default clusters, etc. for local
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
17
|
+
# Default clusters, etc. for local runner easy setup
|
18
|
+
DEFAULT_LOCAL_RUNNER_COMPUTE_CLUSTER_ID = "local-runner-compute-cluster"
|
19
|
+
DEFAULT_LOCAL_RUNNER_NODEPOOL_ID = "local-runner-nodepool"
|
20
|
+
DEFAULT_LOCAL_RUNNER_DEPLOYMENT_ID = "local-runner-deployment"
|
21
|
+
DEFAULT_LOCAL_RUNNER_MODEL_ID = "local-runner-model"
|
22
|
+
DEFAULT_LOCAL_RUNNER_APP_ID = "local-runner-app"
|
23
23
|
|
24
24
|
# FIXME: should have any-to-any for these cases.
|
25
|
-
|
25
|
+
DEFAULT_LOCAL_RUNNER_MODEL_TYPE = "text-to-text"
|
26
26
|
|
27
|
-
|
27
|
+
DEFAULT_LOCAL_RUNNER_COMPUTE_CLUSTER_CONFIG = {
|
28
28
|
"compute_cluster": {
|
29
|
-
"id":
|
30
|
-
"description": "Default Local
|
29
|
+
"id": DEFAULT_LOCAL_RUNNER_COMPUTE_CLUSTER_ID,
|
30
|
+
"description": "Default Local Runner Compute Cluster",
|
31
31
|
"cloud_provider": {
|
32
32
|
"id": "local",
|
33
33
|
},
|
@@ -37,12 +37,12 @@ DEFAULT_LOCAL_DEV_COMPUTE_CLUSTER_CONFIG = {
|
|
37
37
|
}
|
38
38
|
}
|
39
39
|
|
40
|
-
|
40
|
+
DEFAULT_LOCAL_RUNNER_NODEPOOL_CONFIG = {
|
41
41
|
"nodepool": {
|
42
|
-
"id":
|
43
|
-
"description": "Default Local
|
42
|
+
"id": DEFAULT_LOCAL_RUNNER_NODEPOOL_ID,
|
43
|
+
"description": "Default Local Runner Nodepool",
|
44
44
|
"compute_cluster": {
|
45
|
-
"id":
|
45
|
+
"id": DEFAULT_LOCAL_RUNNER_COMPUTE_CLUSTER_ID,
|
46
46
|
"user_id": None, # This will be set when creating the compute cluster
|
47
47
|
},
|
48
48
|
"instance_types": [
|
@@ -50,8 +50,8 @@ DEFAULT_LOCAL_DEV_NODEPOOL_CONFIG = {
|
|
50
50
|
"id": "local",
|
51
51
|
"compute_info": {
|
52
52
|
"cpu_limit": str(os.cpu_count()),
|
53
|
-
"cpu_memory": "16Gi", # made up as we don't schedule based on this for local
|
54
|
-
"num_accelerators": 0, # TODO if we need accelerator detection for local
|
53
|
+
"cpu_memory": "16Gi", # made up as we don't schedule based on this for local runner.
|
54
|
+
"num_accelerators": 0, # TODO if we need accelerator detection for local runner.
|
55
55
|
},
|
56
56
|
}
|
57
57
|
],
|
@@ -62,3 +62,5 @@ DEFAULT_LOCAL_DEV_NODEPOOL_CONFIG = {
|
|
62
62
|
"max_instances": 1,
|
63
63
|
}
|
64
64
|
}
|
65
|
+
DEFAULT_OLLAMA_MODEL_REPO = "https://github.com/Clarifai/runners-examples"
|
66
|
+
DEFAULT_OLLAMA_MODEL_REPO_BRANCH = "ollama"
|
clarifai/utils/misc.py
CHANGED
@@ -2,11 +2,17 @@ import os
|
|
2
2
|
import re
|
3
3
|
import shutil
|
4
4
|
import subprocess
|
5
|
+
import sys
|
6
|
+
import time
|
5
7
|
import urllib.parse
|
6
8
|
import uuid
|
7
|
-
from typing import Any, Dict, List
|
9
|
+
from typing import Any, Dict, List, Tuple
|
10
|
+
from urllib.parse import urlparse
|
8
11
|
|
12
|
+
import requests
|
9
13
|
from clarifai_grpc.grpc.api.status import status_code_pb2
|
14
|
+
from requests.adapters import HTTPAdapter
|
15
|
+
from urllib3.util.retry import Retry
|
10
16
|
|
11
17
|
from clarifai.errors import UserError
|
12
18
|
from clarifai.utils.constants import HOME_PATH
|
@@ -166,3 +172,377 @@ def clone_github_repo(repo_url, target_dir, github_pat=None, branch=None):
|
|
166
172
|
except subprocess.CalledProcessError as e:
|
167
173
|
logger.error(f"Failed to clone repository: {e.stderr}")
|
168
174
|
return False
|
175
|
+
|
176
|
+
|
177
|
+
class GitHubDownloader:
|
178
|
+
def __init__(
|
179
|
+
self, max_retries: int = 3, backoff_factor: float = 0.3, github_token: str = None
|
180
|
+
):
|
181
|
+
self.session = requests.Session()
|
182
|
+
self.github_token = github_token
|
183
|
+
|
184
|
+
retry_strategy = Retry(
|
185
|
+
total=max_retries,
|
186
|
+
backoff_factor=backoff_factor,
|
187
|
+
status_forcelist=[429, 500, 502, 503, 504],
|
188
|
+
allowed_methods=["HEAD", "GET", "OPTIONS"],
|
189
|
+
)
|
190
|
+
|
191
|
+
adapter = HTTPAdapter(max_retries=retry_strategy)
|
192
|
+
self.session.mount("http://", adapter)
|
193
|
+
self.session.mount("https://", adapter)
|
194
|
+
|
195
|
+
self.session.headers.update({'User-Agent': 'GitHub-Folder-Downloader/1.0'})
|
196
|
+
|
197
|
+
if self.github_token:
|
198
|
+
self.session.headers.update({'Authorization': f'token {self.github_token}'})
|
199
|
+
|
200
|
+
def expected_folder_structure(self) -> List[Dict[str, Any]]:
|
201
|
+
return [
|
202
|
+
{"name": "1", "type": "dir", "children": [{"name": "model.py", "type": "file"}]},
|
203
|
+
{"name": "config.yaml", "type": "file"},
|
204
|
+
{"name": "requirements.txt", "type": "file"},
|
205
|
+
]
|
206
|
+
|
207
|
+
def _format_expected_structure(self):
|
208
|
+
"""Format the expected structure as a nice tree view."""
|
209
|
+
tree_str = ""
|
210
|
+
tree_str += "Expected folder structure:\n"
|
211
|
+
tree_str += "├── 1/\n"
|
212
|
+
tree_str += "│ └── model.py\n"
|
213
|
+
tree_str += "├── requirements.txt\n"
|
214
|
+
tree_str += "└── config.yaml\n"
|
215
|
+
return tree_str
|
216
|
+
|
217
|
+
def parse_github_url(self, url: str) -> Tuple[str, str, str, str]:
|
218
|
+
try:
|
219
|
+
parsed = urlparse(url)
|
220
|
+
|
221
|
+
if parsed.netloc not in ['github.com', 'www.github.com']:
|
222
|
+
raise ValueError("URL must be a GitHub repository URL")
|
223
|
+
|
224
|
+
path_parts = [p for p in parsed.path.strip('/').split('/') if p]
|
225
|
+
|
226
|
+
if len(path_parts) < 2:
|
227
|
+
raise ValueError("Invalid GitHub repository URL format")
|
228
|
+
|
229
|
+
owner = path_parts[0]
|
230
|
+
repo = path_parts[1]
|
231
|
+
|
232
|
+
if len(path_parts) >= 4 and path_parts[2] in ['tree', 'blob']:
|
233
|
+
branch = path_parts[3]
|
234
|
+
folder_path = '/'.join(path_parts[4:]) if len(path_parts) > 4 else ''
|
235
|
+
elif len(path_parts) >= 3:
|
236
|
+
branch = path_parts[2]
|
237
|
+
folder_path = '/'.join(path_parts[3:]) if len(path_parts) > 3 else ''
|
238
|
+
else:
|
239
|
+
branch = 'main'
|
240
|
+
folder_path = ''
|
241
|
+
|
242
|
+
return owner, repo, branch, folder_path
|
243
|
+
|
244
|
+
except Exception as e:
|
245
|
+
raise ValueError(f"Failed to parse GitHub URL: {e}")
|
246
|
+
|
247
|
+
def get_folder_contents(self, owner: str, repo: str, path: str, branch: str = 'main') -> list:
|
248
|
+
api_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}"
|
249
|
+
params = {'ref': branch} if branch else {}
|
250
|
+
|
251
|
+
try:
|
252
|
+
response = self.session.get(api_url, params=params, timeout=30)
|
253
|
+
response.raise_for_status()
|
254
|
+
return response.json()
|
255
|
+
except requests.exceptions.Timeout:
|
256
|
+
raise requests.RequestException("Request timed out. Please try again.")
|
257
|
+
except requests.exceptions.ConnectionError:
|
258
|
+
raise requests.RequestException(
|
259
|
+
"Connection error. Please check your internet connection."
|
260
|
+
)
|
261
|
+
except requests.exceptions.HTTPError as e:
|
262
|
+
if e.response.status_code == 404:
|
263
|
+
token_msg = (
|
264
|
+
""
|
265
|
+
if self.github_token
|
266
|
+
else " For private repositories, use the github_token parameter."
|
267
|
+
)
|
268
|
+
raise requests.RequestException(
|
269
|
+
f"Folder not found: {path}. Check if path exists or if the repository is private.{token_msg}"
|
270
|
+
)
|
271
|
+
elif e.response.status_code == 401 or e.response.status_code == 403:
|
272
|
+
token_msg = (
|
273
|
+
" The provided GitHub token may be invalid or have insufficient permissions."
|
274
|
+
if self.github_token
|
275
|
+
else " For private repositories, use the github_token parameter."
|
276
|
+
)
|
277
|
+
raise requests.RequestException(f"Authentication error: {e}.{token_msg}")
|
278
|
+
else:
|
279
|
+
raise requests.RequestException(f"API request failed: {e}")
|
280
|
+
except requests.exceptions.RequestException as e:
|
281
|
+
token_msg = (
|
282
|
+
""
|
283
|
+
if self.github_token
|
284
|
+
else " For private repositories, use the github_token parameter."
|
285
|
+
)
|
286
|
+
raise requests.RequestException(f"API request failed: {e}.{token_msg}")
|
287
|
+
|
288
|
+
def validate_remote_structure(
|
289
|
+
self,
|
290
|
+
owner: str,
|
291
|
+
repo: str,
|
292
|
+
path: str,
|
293
|
+
branch: str,
|
294
|
+
expected_structure: List[Dict[str, Any]],
|
295
|
+
) -> Dict[str, Any]:
|
296
|
+
validation_result = {
|
297
|
+
'valid': True,
|
298
|
+
'missing_files': [],
|
299
|
+
'missing_dirs': [],
|
300
|
+
'warnings': [],
|
301
|
+
'remote_contents': [],
|
302
|
+
}
|
303
|
+
|
304
|
+
try:
|
305
|
+
remote_contents = self.get_folder_contents(owner, repo, path, branch)
|
306
|
+
validation_result['remote_contents'] = remote_contents
|
307
|
+
|
308
|
+
remote_items = {item['name']: item['type'] for item in remote_contents}
|
309
|
+
|
310
|
+
for item in expected_structure:
|
311
|
+
item_name = item['name']
|
312
|
+
item_type = item.get('type', 'file')
|
313
|
+
|
314
|
+
if item_name not in remote_items:
|
315
|
+
if item_type == 'file':
|
316
|
+
validation_result['missing_files'].append(item_name)
|
317
|
+
else:
|
318
|
+
validation_result['missing_dirs'].append(item_name)
|
319
|
+
validation_result['valid'] = False
|
320
|
+
elif remote_items[item_name] != item_type:
|
321
|
+
validation_result['warnings'].append(
|
322
|
+
f"Item '{item_name}' exists but is a {remote_items[item_name]} instead of {item_type}"
|
323
|
+
)
|
324
|
+
validation_result['valid'] = False
|
325
|
+
|
326
|
+
expected_names = {item['name'] for item in expected_structure}
|
327
|
+
unexpected_items = [name for name in remote_items.keys() if name not in expected_names]
|
328
|
+
if unexpected_items:
|
329
|
+
validation_result['warnings'].append(
|
330
|
+
f"Unexpected items found: {', '.join(unexpected_items)}"
|
331
|
+
)
|
332
|
+
|
333
|
+
except requests.RequestException as e:
|
334
|
+
validation_result['valid'] = False
|
335
|
+
validation_result['warnings'].append(f"Failed to access remote repository: {e}")
|
336
|
+
|
337
|
+
return validation_result
|
338
|
+
|
339
|
+
def download_file(self, download_url: str, local_path: str) -> None:
|
340
|
+
try:
|
341
|
+
response = self.session.get(download_url, stream=True, timeout=60)
|
342
|
+
response.raise_for_status()
|
343
|
+
|
344
|
+
os.makedirs(os.path.dirname(local_path), exist_ok=True)
|
345
|
+
|
346
|
+
total_size = int(response.headers.get('content-length', 0))
|
347
|
+
downloaded_size = 0
|
348
|
+
|
349
|
+
with open(local_path, 'wb') as f:
|
350
|
+
for chunk in response.iter_content(chunk_size=8192):
|
351
|
+
if chunk:
|
352
|
+
f.write(chunk)
|
353
|
+
downloaded_size += len(chunk)
|
354
|
+
|
355
|
+
if total_size > 0 and total_size > 1024 * 1024:
|
356
|
+
progress = (downloaded_size / total_size) * 100
|
357
|
+
logger.info(
|
358
|
+
f"\rDownloading: {os.path.basename(local_path)} - {progress:.1f}%",
|
359
|
+
end='',
|
360
|
+
flush=True,
|
361
|
+
)
|
362
|
+
|
363
|
+
if total_size > 1024 * 1024:
|
364
|
+
logger.info()
|
365
|
+
|
366
|
+
logger.info(f"Downloaded: {local_path}")
|
367
|
+
|
368
|
+
except requests.exceptions.Timeout:
|
369
|
+
logger.info(f"Timeout downloading {local_path}. Skipping...")
|
370
|
+
except requests.exceptions.ConnectionError:
|
371
|
+
logger.info(f"Connection error downloading {local_path}. Skipping...")
|
372
|
+
except Exception as e:
|
373
|
+
logger.info(f"Failed to download {local_path}: {e}")
|
374
|
+
|
375
|
+
def process_folder(
|
376
|
+
self, owner: str, repo: str, path: str, local_base_path: str, branch: str = 'main'
|
377
|
+
) -> None:
|
378
|
+
try:
|
379
|
+
contents = self.get_folder_contents(owner, repo, path, branch)
|
380
|
+
|
381
|
+
if not contents:
|
382
|
+
logger.info(f"Info: Empty folder - {path}")
|
383
|
+
return
|
384
|
+
|
385
|
+
for item in contents:
|
386
|
+
item_name = item['name']
|
387
|
+
item_path = os.path.join(local_base_path, item_name)
|
388
|
+
|
389
|
+
if item['type'] == 'file':
|
390
|
+
self.download_file(item['download_url'], item_path)
|
391
|
+
|
392
|
+
elif item['type'] == 'dir':
|
393
|
+
os.makedirs(item_path, exist_ok=True)
|
394
|
+
logger.info(f"Created directory: {item_path}")
|
395
|
+
|
396
|
+
new_path = f"{path}/{item_name}" if path else item_name
|
397
|
+
self.process_folder(owner, repo, new_path, item_path, branch)
|
398
|
+
|
399
|
+
except requests.exceptions.RequestException as e:
|
400
|
+
if "Folder not found" in str(e):
|
401
|
+
logger.error(f"Error: Folder not found - {path}")
|
402
|
+
raise
|
403
|
+
else:
|
404
|
+
logger.error(f"Error accessing folder {path}: {e}")
|
405
|
+
raise
|
406
|
+
except Exception as e:
|
407
|
+
logger.error(f"Unexpected error processing folder {path}: {e}")
|
408
|
+
raise
|
409
|
+
|
410
|
+
def validate_folder_structure(
|
411
|
+
self, folder_path: str, expected_structure: List[Dict[str, Any]]
|
412
|
+
) -> Dict[str, Any]:
|
413
|
+
validation_result = {
|
414
|
+
'valid': True,
|
415
|
+
'missing_files': [],
|
416
|
+
'missing_dirs': [],
|
417
|
+
'warnings': [],
|
418
|
+
}
|
419
|
+
|
420
|
+
if not os.path.exists(folder_path):
|
421
|
+
validation_result['valid'] = False
|
422
|
+
validation_result['warnings'].append(f"Folder {folder_path} does not exist")
|
423
|
+
return validation_result
|
424
|
+
|
425
|
+
for item in expected_structure:
|
426
|
+
item_name = item['name']
|
427
|
+
item_type = item.get('type', 'file')
|
428
|
+
item_path = os.path.join(folder_path, item_name)
|
429
|
+
|
430
|
+
if item_type == 'file':
|
431
|
+
if not os.path.isfile(item_path):
|
432
|
+
validation_result['missing_files'].append(item_name)
|
433
|
+
validation_result['valid'] = False
|
434
|
+
elif item_type == 'dir':
|
435
|
+
if not os.path.isdir(item_path):
|
436
|
+
validation_result['missing_dirs'].append(item_name)
|
437
|
+
validation_result['valid'] = False
|
438
|
+
|
439
|
+
return validation_result
|
440
|
+
|
441
|
+
def download_github_folder(
|
442
|
+
self,
|
443
|
+
url: str,
|
444
|
+
output_dir: str,
|
445
|
+
github_token: str = None,
|
446
|
+
validate_structure: bool = False,
|
447
|
+
pre_validate: bool = True,
|
448
|
+
strict_validation: bool = False,
|
449
|
+
) -> None:
|
450
|
+
logger.info(f"Parsing GitHub URL: {url}")
|
451
|
+
|
452
|
+
# Update token if provided as a parameter
|
453
|
+
if github_token:
|
454
|
+
self.github_token = github_token
|
455
|
+
self.session.headers.update({'Authorization': f'token {github_token}'})
|
456
|
+
|
457
|
+
try:
|
458
|
+
owner, repo, branch, folder_path = self.parse_github_url(url)
|
459
|
+
logger.info(f"Repository: {owner}/{repo}")
|
460
|
+
logger.info(f"Branch: {branch}")
|
461
|
+
logger.info(f"Folder: {folder_path or 'root'}")
|
462
|
+
|
463
|
+
expected_structure = self.expected_folder_structure() if pre_validate else None
|
464
|
+
|
465
|
+
if expected_structure:
|
466
|
+
logger.info("\nValidating remote folder structure...")
|
467
|
+
remote_validation = self.validate_remote_structure(
|
468
|
+
owner, repo, folder_path, branch, expected_structure
|
469
|
+
)
|
470
|
+
|
471
|
+
if not remote_validation['valid']:
|
472
|
+
logger.error("Remote structure validation failed!")
|
473
|
+
|
474
|
+
if remote_validation['missing_files']:
|
475
|
+
logger.error(
|
476
|
+
f"Missing files: {', '.join(remote_validation['missing_files'])}"
|
477
|
+
)
|
478
|
+
|
479
|
+
if remote_validation['missing_dirs']:
|
480
|
+
logger.error(
|
481
|
+
f"Missing directories: {', '.join(remote_validation['missing_dirs'])}"
|
482
|
+
)
|
483
|
+
|
484
|
+
if remote_validation['warnings']:
|
485
|
+
for warning in remote_validation['warnings']:
|
486
|
+
logger.error(f"Warning: {warning}")
|
487
|
+
|
488
|
+
# Print the expected structure in a nice format
|
489
|
+
tree_view = self._format_expected_structure()
|
490
|
+
logger.info("\nThe repository must have the following structure:")
|
491
|
+
logger.info(tree_view)
|
492
|
+
|
493
|
+
logger.error(
|
494
|
+
"Download cancelled: Repository structure does not match the expected format."
|
495
|
+
)
|
496
|
+
sys.exit(1) # Exit without proceeding with download
|
497
|
+
else:
|
498
|
+
logger.info("Remote structure validation passed!")
|
499
|
+
|
500
|
+
os.makedirs(output_dir, exist_ok=True)
|
501
|
+
logger.info(f"Created output directory: {output_dir}")
|
502
|
+
|
503
|
+
logger.info("\nStarting download...")
|
504
|
+
start_time = time.time()
|
505
|
+
try:
|
506
|
+
self.process_folder(owner, repo, folder_path, output_dir, branch)
|
507
|
+
|
508
|
+
elapsed_time = time.time() - start_time
|
509
|
+
logger.info(f"\nDownload completed in {elapsed_time:.2f} seconds")
|
510
|
+
logger.info(f"Files saved to: {os.path.abspath(output_dir)}")
|
511
|
+
|
512
|
+
if validate_structure and expected_structure:
|
513
|
+
logger.info("\nValidating downloaded folder structure...")
|
514
|
+
validation_result = self.validate_folder_structure(
|
515
|
+
output_dir, expected_structure
|
516
|
+
)
|
517
|
+
|
518
|
+
if validation_result['valid']:
|
519
|
+
logger.info("Folder structure post validation passed!")
|
520
|
+
else:
|
521
|
+
logger.error("Folder structure validation failed!")
|
522
|
+
|
523
|
+
if validation_result['missing_files']:
|
524
|
+
logger.info(
|
525
|
+
f"Missing files: {', '.join(validation_result['missing_files'])}"
|
526
|
+
)
|
527
|
+
|
528
|
+
if validation_result['missing_dirs']:
|
529
|
+
logger.info(
|
530
|
+
f"Missing directories: {', '.join(validation_result['missing_dirs'])}"
|
531
|
+
)
|
532
|
+
|
533
|
+
if validation_result['warnings']:
|
534
|
+
for warning in validation_result['warnings']:
|
535
|
+
logger.info(f"Warng: {warning}")
|
536
|
+
except requests.RequestException as e:
|
537
|
+
# Critical error - the main folder cannot be processed
|
538
|
+
logger.error(
|
539
|
+
f"\nDownload failed: {e}, \n No files were downloaded to: {os.path.abspath(output_dir)}"
|
540
|
+
)
|
541
|
+
sys.exit(1)
|
542
|
+
|
543
|
+
except ValueError as e:
|
544
|
+
logger.error(f"Error: {e}")
|
545
|
+
sys.exit(1)
|
546
|
+
except Exception as e:
|
547
|
+
logger.error(f"Unexpected error: {e}")
|
548
|
+
sys.exit(1)
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|