clarifai 10.11.1__py3-none-any.whl → 10.11.2rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. clarifai/__init__.py +1 -1
  2. clarifai/__pycache__/__init__.cpython-310.pyc +0 -0
  3. clarifai/__pycache__/errors.cpython-310.pyc +0 -0
  4. clarifai/__pycache__/versions.cpython-310.pyc +0 -0
  5. clarifai/cli/__pycache__/__init__.cpython-310.pyc +0 -0
  6. clarifai/cli/__pycache__/base.cpython-310.pyc +0 -0
  7. clarifai/cli/__pycache__/compute_cluster.cpython-310.pyc +0 -0
  8. clarifai/cli/__pycache__/deployment.cpython-310.pyc +0 -0
  9. clarifai/cli/__pycache__/model.cpython-310.pyc +0 -0
  10. clarifai/cli/__pycache__/nodepool.cpython-310.pyc +0 -0
  11. clarifai/client/__pycache__/__init__.cpython-310.pyc +0 -0
  12. clarifai/client/__pycache__/app.cpython-310.pyc +0 -0
  13. clarifai/client/__pycache__/base.cpython-310.pyc +0 -0
  14. clarifai/client/__pycache__/dataset.cpython-310.pyc +0 -0
  15. clarifai/client/__pycache__/input.cpython-310.pyc +0 -0
  16. clarifai/client/__pycache__/lister.cpython-310.pyc +0 -0
  17. clarifai/client/__pycache__/model.cpython-310.pyc +0 -0
  18. clarifai/client/__pycache__/module.cpython-310.pyc +0 -0
  19. clarifai/client/__pycache__/runner.cpython-310.pyc +0 -0
  20. clarifai/client/__pycache__/search.cpython-310.pyc +0 -0
  21. clarifai/client/__pycache__/user.cpython-310.pyc +0 -0
  22. clarifai/client/__pycache__/workflow.cpython-310.pyc +0 -0
  23. clarifai/client/auth/__pycache__/__init__.cpython-310.pyc +0 -0
  24. clarifai/client/auth/__pycache__/helper.cpython-310.pyc +0 -0
  25. clarifai/client/auth/__pycache__/register.cpython-310.pyc +0 -0
  26. clarifai/client/auth/__pycache__/stub.cpython-310.pyc +0 -0
  27. clarifai/client/dataset.py +4 -4
  28. clarifai/client/model.py +94 -13
  29. clarifai/constants/__pycache__/dataset.cpython-310.pyc +0 -0
  30. clarifai/constants/__pycache__/model.cpython-310.pyc +0 -0
  31. clarifai/constants/__pycache__/search.cpython-310.pyc +0 -0
  32. clarifai/datasets/__pycache__/__init__.cpython-310.pyc +0 -0
  33. clarifai/datasets/export/__pycache__/__init__.cpython-310.pyc +0 -0
  34. clarifai/datasets/export/__pycache__/inputs_annotations.cpython-310.pyc +0 -0
  35. clarifai/datasets/upload/__pycache__/__init__.cpython-310.pyc +0 -0
  36. clarifai/datasets/upload/__pycache__/base.cpython-310.pyc +0 -0
  37. clarifai/datasets/upload/__pycache__/features.cpython-310.pyc +0 -0
  38. clarifai/datasets/upload/__pycache__/image.cpython-310.pyc +0 -0
  39. clarifai/datasets/upload/__pycache__/text.cpython-310.pyc +0 -0
  40. clarifai/datasets/upload/__pycache__/utils.cpython-310.pyc +0 -0
  41. clarifai/datasets/upload/features.py +1 -1
  42. clarifai/datasets/upload/multimodal.py +2 -1
  43. clarifai/datasets/upload/text.py +3 -2
  44. clarifai/models/__pycache__/__init__.cpython-310.pyc +0 -0
  45. clarifai/models/model_serving/README.md +158 -0
  46. clarifai/models/model_serving/__init__.py +14 -0
  47. clarifai/models/model_serving/__pycache__/__init__.cpython-310.pyc +0 -0
  48. clarifai/models/model_serving/__pycache__/constants.cpython-310.pyc +0 -0
  49. clarifai/models/model_serving/cli/__init__.py +12 -0
  50. clarifai/models/model_serving/cli/__pycache__/__init__.cpython-310.pyc +0 -0
  51. clarifai/models/model_serving/cli/__pycache__/_utils.cpython-310.pyc +0 -0
  52. clarifai/models/model_serving/cli/__pycache__/base.cpython-310.pyc +0 -0
  53. clarifai/models/model_serving/cli/__pycache__/build.cpython-310.pyc +0 -0
  54. clarifai/models/model_serving/cli/__pycache__/create.cpython-310.pyc +0 -0
  55. clarifai/models/model_serving/cli/_utils.py +53 -0
  56. clarifai/models/model_serving/cli/base.py +14 -0
  57. clarifai/models/model_serving/cli/build.py +79 -0
  58. clarifai/models/model_serving/cli/clarifai_clis.py +33 -0
  59. clarifai/models/model_serving/cli/create.py +171 -0
  60. clarifai/models/model_serving/cli/example_cli.py +34 -0
  61. clarifai/models/model_serving/cli/login.py +26 -0
  62. clarifai/models/model_serving/cli/upload.py +183 -0
  63. clarifai/models/model_serving/constants.py +21 -0
  64. clarifai/models/model_serving/docs/cli.md +161 -0
  65. clarifai/models/model_serving/docs/concepts.md +229 -0
  66. clarifai/models/model_serving/docs/dependencies.md +11 -0
  67. clarifai/models/model_serving/docs/inference_parameters.md +139 -0
  68. clarifai/models/model_serving/docs/model_types.md +19 -0
  69. clarifai/models/model_serving/model_config/__init__.py +16 -0
  70. clarifai/models/model_serving/model_config/__pycache__/__init__.cpython-310.pyc +0 -0
  71. clarifai/models/model_serving/model_config/__pycache__/base.cpython-310.pyc +0 -0
  72. clarifai/models/model_serving/model_config/__pycache__/config.cpython-310.pyc +0 -0
  73. clarifai/models/model_serving/model_config/__pycache__/inference_parameter.cpython-310.pyc +0 -0
  74. clarifai/models/model_serving/model_config/__pycache__/output.cpython-310.pyc +0 -0
  75. clarifai/models/model_serving/model_config/base.py +369 -0
  76. clarifai/models/model_serving/model_config/config.py +312 -0
  77. clarifai/models/model_serving/model_config/inference_parameter.py +129 -0
  78. clarifai/models/model_serving/model_config/model_types_config/multimodal-embedder.yaml +25 -0
  79. clarifai/models/model_serving/model_config/model_types_config/text-classifier.yaml +19 -0
  80. clarifai/models/model_serving/model_config/model_types_config/text-embedder.yaml +20 -0
  81. clarifai/models/model_serving/model_config/model_types_config/text-to-image.yaml +19 -0
  82. clarifai/models/model_serving/model_config/model_types_config/text-to-text.yaml +19 -0
  83. clarifai/models/model_serving/model_config/model_types_config/visual-classifier.yaml +22 -0
  84. clarifai/models/model_serving/model_config/model_types_config/visual-detector.yaml +32 -0
  85. clarifai/models/model_serving/model_config/model_types_config/visual-embedder.yaml +19 -0
  86. clarifai/models/model_serving/model_config/model_types_config/visual-segmenter.yaml +19 -0
  87. clarifai/models/model_serving/model_config/output.py +133 -0
  88. clarifai/models/model_serving/model_config/triton/__init__.py +14 -0
  89. clarifai/models/model_serving/model_config/triton/__pycache__/__init__.cpython-310.pyc +0 -0
  90. clarifai/models/model_serving/model_config/triton/__pycache__/serializer.cpython-310.pyc +0 -0
  91. clarifai/models/model_serving/model_config/triton/__pycache__/triton_config.cpython-310.pyc +0 -0
  92. clarifai/models/model_serving/model_config/triton/__pycache__/wrappers.cpython-310.pyc +0 -0
  93. clarifai/models/model_serving/model_config/triton/serializer.py +136 -0
  94. clarifai/models/model_serving/model_config/triton/triton_config.py +182 -0
  95. clarifai/models/model_serving/model_config/triton/wrappers.py +281 -0
  96. clarifai/models/model_serving/repo_build/__init__.py +14 -0
  97. clarifai/models/model_serving/repo_build/__pycache__/__init__.cpython-310.pyc +0 -0
  98. clarifai/models/model_serving/repo_build/__pycache__/build.cpython-310.pyc +0 -0
  99. clarifai/models/model_serving/repo_build/build.py +198 -0
  100. clarifai/models/model_serving/repo_build/static_files/__pycache__/base_test.cpython-310-pytest-7.2.0.pyc +0 -0
  101. clarifai/models/model_serving/repo_build/static_files/_requirements.txt +2 -0
  102. clarifai/models/model_serving/repo_build/static_files/base_test.py +169 -0
  103. clarifai/models/model_serving/repo_build/static_files/inference.py +26 -0
  104. clarifai/models/model_serving/repo_build/static_files/sample_clarifai_config.yaml +25 -0
  105. clarifai/models/model_serving/repo_build/static_files/test.py +40 -0
  106. clarifai/models/model_serving/repo_build/static_files/triton/model.py +75 -0
  107. clarifai/models/model_serving/utils.py +31 -0
  108. clarifai/rag/__pycache__/__init__.cpython-310.pyc +0 -0
  109. clarifai/rag/__pycache__/rag.cpython-310.pyc +0 -0
  110. clarifai/rag/__pycache__/utils.cpython-310.pyc +0 -0
  111. clarifai/runners/__pycache__/__init__.cpython-310.pyc +0 -0
  112. clarifai/runners/__pycache__/server.cpython-310.pyc +0 -0
  113. clarifai/runners/deepgram_live_transcribe.py +98 -0
  114. clarifai/runners/deepgram_live_transcribe.py~ +98 -0
  115. clarifai/runners/deepgram_runner.py +131 -0
  116. clarifai/runners/deepgram_runner.py~ +130 -0
  117. clarifai/runners/dockerfile_template/Dockerfile.cpu.template +31 -0
  118. clarifai/runners/dockerfile_template/Dockerfile.cuda.template +79 -0
  119. clarifai/runners/example_llama2.py~ +72 -0
  120. clarifai/runners/matt_example.py +89 -0
  121. clarifai/runners/matt_example.py~ +87 -0
  122. clarifai/runners/matt_llm_example.py +129 -0
  123. clarifai/runners/matt_llm_example.py~ +128 -0
  124. clarifai/runners/models/__pycache__/__init__.cpython-310.pyc +0 -0
  125. clarifai/runners/models/__pycache__/base_typed_model.cpython-310.pyc +0 -0
  126. clarifai/runners/models/__pycache__/model_class.cpython-310.pyc +0 -0
  127. clarifai/runners/models/__pycache__/model_run_locally.cpython-310.pyc +0 -0
  128. clarifai/runners/models/__pycache__/model_runner.cpython-310.pyc +0 -0
  129. clarifai/runners/models/__pycache__/model_servicer.cpython-310.pyc +0 -0
  130. clarifai/runners/models/__pycache__/model_upload.cpython-310.pyc +0 -0
  131. clarifai/runners/models/model_upload.py +75 -10
  132. clarifai/runners/utils/__pycache__/__init__.cpython-310.pyc +0 -0
  133. clarifai/runners/utils/__pycache__/const.cpython-310.pyc +0 -0
  134. clarifai/runners/utils/__pycache__/data_handler.cpython-310.pyc +0 -0
  135. clarifai/runners/utils/__pycache__/data_utils.cpython-310.pyc +0 -0
  136. clarifai/runners/utils/__pycache__/loader.cpython-310.pyc +0 -0
  137. clarifai/runners/utils/__pycache__/logging.cpython-310.pyc +0 -0
  138. clarifai/runners/utils/__pycache__/url_fetcher.cpython-310.pyc +0 -0
  139. clarifai/runners/utils/const.py +27 -25
  140. clarifai/runners/utils/loader.py +65 -17
  141. clarifai/runners/utils/logging.py +6 -0
  142. clarifai/schema/__pycache__/search.cpython-310.pyc +0 -0
  143. clarifai/urls/__pycache__/helper.cpython-310.pyc +0 -0
  144. clarifai/utils/__pycache__/__init__.cpython-310.pyc +0 -0
  145. clarifai/utils/__pycache__/logging.cpython-310.pyc +0 -0
  146. clarifai/utils/__pycache__/misc.cpython-310.pyc +0 -0
  147. clarifai/utils/__pycache__/model_train.cpython-310.pyc +0 -0
  148. clarifai/utils/logging.py +7 -0
  149. clarifai/workflows/__pycache__/__init__.cpython-310.pyc +0 -0
  150. clarifai/workflows/__pycache__/export.cpython-310.pyc +0 -0
  151. clarifai/workflows/__pycache__/utils.cpython-310.pyc +0 -0
  152. clarifai/workflows/__pycache__/validate.cpython-310.pyc +0 -0
  153. {clarifai-10.11.1.dist-info → clarifai-10.11.2rc2.dist-info}/METADATA +15 -15
  154. clarifai-10.11.2rc2.dist-info/RECORD +242 -0
  155. {clarifai-10.11.1.dist-info → clarifai-10.11.2rc2.dist-info}/WHEEL +1 -1
  156. clarifai-10.11.1.dist-info/RECORD +0 -100
  157. {clarifai-10.11.1.dist-info → clarifai-10.11.2rc2.dist-info}/LICENSE +0 -0
  158. {clarifai-10.11.1.dist-info → clarifai-10.11.2rc2.dist-info}/entry_points.txt +0 -0
  159. {clarifai-10.11.1.dist-info → clarifai-10.11.2rc2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,129 @@
1
+ from clarifai_grpc.grpc.api import resources_pb2, service_pb2
2
+ from clarifai_grpc.grpc.api.status import status_code_pb2, status_pb2
3
+ from collections.abc import Iterator
4
+ from google.protobuf import json_format
5
+
6
+ from clarifai.client.runner import Runner
7
+ import time
8
+ from threading import Thread
9
+
10
+ import grpc
11
+ import requests
12
+
13
+ from transformers import (AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer)
14
+
15
+ model_name_or_path = "TheBloke/Llama-2-7B-chat-GPTQ"
16
+ model_basename = "model"
17
+ use_triton = False
18
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
19
+ model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map='auto')
20
+ streamer = TextIteratorStreamer(tokenizer)
21
+ print("Model loaded")
22
+
23
+
24
+ class MyRunner(Runner):
25
+ """A custom runner that adds "Hello World" to the end of the text and replaces the domain of the
26
+ image URL as an example.
27
+ """
28
+
29
+ def run_input(self, input: resources_pb2.Input, output_info: resources_pb2.OutputInfo,
30
+ **kwargs) -> resources_pb2.Output:
31
+ """This is the method that will be called when the runner is run. It takes in an input and
32
+ returns an output.
33
+ """
34
+
35
+ output = resources_pb2.Output()
36
+
37
+ data = input.data
38
+
39
+ # Optional use of output_info
40
+ params_dict = {}
41
+ if "params" in output_info:
42
+ params_dict = output_info["params"]
43
+
44
+ if data.text.raw != "":
45
+ output.data.text.raw = data.text.raw + "Hello World" + params_dict.get(
46
+ "hello", "") + kwargs.get("extra", "")
47
+ if data.image.url != "":
48
+ output.data.text.raw = data.image.url.replace("samples.clarifai.com",
49
+ "newdomain.com" + params_dict.get("domain",))
50
+ return output
51
+
52
+ def generate(self, request: service_pb2.PostModelOutputsRequest
53
+ ) -> Iterator[service_pb2.MultiOutputResponse]:
54
+ """Example yielding a whole batch of streamed stuff back.
55
+ """
56
+
57
+ output_info = None
58
+ if request.model.model_version.id != "":
59
+ output_info = json_format.MessageToDict(
60
+ request.model.model_version.output_info, preserving_proto_field_name=True)
61
+
62
+ for inp in request.inputs:
63
+ data = inp.data
64
+ print('start')
65
+ if data.text.raw != "":
66
+ input_text = data.text.raw
67
+ elif data.text.url != "":
68
+ input_text = str(requests.get(data.text.url).text)
69
+ else:
70
+ raise Exception("Need to include data.text.raw or data.text.url in your inputs.")
71
+
72
+ st = time.time()
73
+ max_tokens = 1024
74
+ # # Method 1
75
+ inputs = tokenizer(input_text, return_tensors='pt') #.input_ids.cuda()
76
+ generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_tokens)
77
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
78
+ thread.start()
79
+ times = []
80
+ st = time.time()
81
+ total_start = st
82
+ for new_text in streamer:
83
+ duration = time.time() - st
84
+ st = time.time()
85
+ print(f"Duration: {duration}")
86
+ times.append(duration)
87
+ # for new_text in ["hello", "world", "i'm", "streaming"]:
88
+
89
+ # out = model.generate(inputs=input_ids, temperature=0.7, max_new_tokens=max_tokens)
90
+ # out_text = tokenizer.decode(out[0], skip_special_tokens=True)
91
+ # output.data.text.raw = out_text.replace(input_text, '')
92
+
93
+ # # # Method 2
94
+ # print('before')
95
+ # pipe = pipeline(
96
+ # "text-generation",
97
+ # model=model,
98
+ # tokenizer=tokenizer,
99
+ # streamer=streamer,
100
+ # max_new_tokens=max_tokens,
101
+ # temperature=0.7,
102
+ # top_p=0.95,
103
+ # repetition_penalty=1.15,
104
+ # return_full_text=False)
105
+ # print('pipe')
106
+ # a = pipe(input_text)
107
+ # print(a)
108
+ print("Posting: ", new_text)
109
+ output = resources_pb2.Output()
110
+ output.data.text.raw = new_text
111
+ result = service_pb2.MultiOutputResponse(
112
+ status=status_pb2.Status(
113
+ code=status_code_pb2.SUCCESS,
114
+ description="Success",
115
+ ),
116
+ outputs=[output],
117
+ )
118
+ yield result
119
+ print(f"Total time: {time.time() - total_start}")
120
+ print(f"Average time: {sum(times) / len(times)}")
121
+
122
+
123
+ if __name__ == '__main__':
124
+ # Make sure you set these env vars before running the example.
125
+ # CLARIFAI_PAT
126
+ # CLARIFAI_USER_ID
127
+
128
+ # You need to first create a runner in the Clarifai API and then use the ID here.
129
+ MyRunner(runner_id="matt-test-runner", base_url="http://q6:32013", num_parallel_polls=1).start()
@@ -0,0 +1,128 @@
1
+ from clarifai_grpc.grpc.api import resources_pb2, service_pb2
2
+ from clarifai_grpc.grpc.api.status import status_code_pb2, status_pb2
3
+ from collections.abc import Iterator
4
+ from google.protobuf import json_format
5
+
6
+ from clarifai.client.runner import Runner
7
+ import time
8
+ from threading import Thread
9
+
10
+ import grpc
11
+ import requests
12
+
13
+ from transformers import (AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer)
14
+
15
+ model_name_or_path = "TheBloke/Llama-2-7B-chat-GPTQ"
16
+ model_basename = "model"
17
+ use_triton = False
18
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
19
+ model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map='auto')
20
+ streamer = TextIteratorStreamer(tokenizer)
21
+ print("Model loaded")
22
+
23
+
24
+ class MyRunner(Runner):
25
+ """A custom runner that adds "Hello World" to the end of the text and replaces the domain of the
26
+ image URL as an example.
27
+ """
28
+
29
+ def run_input(self, input: resources_pb2.Input, output_info: resources_pb2.OutputInfo,
30
+ **kwargs) -> resources_pb2.Output:
31
+ """This is the method that will be called when the runner is run. It takes in an input and
32
+ returns an output.
33
+ """
34
+
35
+ output = resources_pb2.Output()
36
+
37
+ data = input.data
38
+
39
+ # Optional use of output_info
40
+ params_dict = {}
41
+ if "params" in output_info:
42
+ params_dict = output_info["params"]
43
+
44
+ if data.text.raw != "":
45
+ output.data.text.raw = data.text.raw + "Hello World" + params_dict.get(
46
+ "hello", "") + kwargs.get("extra", "")
47
+ if data.image.url != "":
48
+ output.data.text.raw = data.image.url.replace("samples.clarifai.com",
49
+ "newdomain.com" + params_dict.get("domain",))
50
+ return output
51
+
52
+ def generate(self, request: service_pb2.PostModelOutputsRequest
53
+ ) -> Iterator[service_pb2.MultiOutputResponse]:
54
+ """Example yielding a whole batch of streamed stuff back.
55
+ """
56
+
57
+ output_info = None
58
+ if request.model.model_version.id != "":
59
+ output_info = json_format.MessageToDict(
60
+ request.model.model_version.output_info, preserving_proto_field_name=True)
61
+
62
+ for inp in request.inputs:
63
+ data = inp.data
64
+ print('start')
65
+ if data.text.raw != "":
66
+ input_text = data.text.raw
67
+ elif data.text.url != "":
68
+ input_text = str(requests.get(data.text.url).text)
69
+ else:
70
+ raise Exception("Need to include data.text.raw or data.text.url in your inputs.")
71
+
72
+ st = time.time()
73
+ max_tokens = 1024
74
+ # # Method 1
75
+ inputs = tokenizer(input_text, return_tensors='pt') #.input_ids.cuda()
76
+ generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_tokens)
77
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
78
+ thread.start()
79
+ times = []
80
+ st = time.time()
81
+ for new_text in streamer:
82
+ duration = time.time() - st
83
+ st = time.time()
84
+ print(f"Duration: {duration}")
85
+ times.append(duration)
86
+ # for new_text in ["hello", "world", "i'm", "streaming"]:
87
+
88
+ # out = model.generate(inputs=input_ids, temperature=0.7, max_new_tokens=max_tokens)
89
+ # out_text = tokenizer.decode(out[0], skip_special_tokens=True)
90
+ # output.data.text.raw = out_text.replace(input_text, '')
91
+
92
+ # # # Method 2
93
+ # print('before')
94
+ # pipe = pipeline(
95
+ # "text-generation",
96
+ # model=model,
97
+ # tokenizer=tokenizer,
98
+ # streamer=streamer,
99
+ # max_new_tokens=max_tokens,
100
+ # temperature=0.7,
101
+ # top_p=0.95,
102
+ # repetition_penalty=1.15,
103
+ # return_full_text=False)
104
+ # print('pipe')
105
+ # a = pipe(input_text)
106
+ # print(a)
107
+ print("Posting: ", new_text)
108
+ output = resources_pb2.Output()
109
+ output.data.text.raw = new_text
110
+ result = service_pb2.MultiOutputResponse(
111
+ status=status_pb2.Status(
112
+ code=status_code_pb2.SUCCESS,
113
+ description="Success",
114
+ ),
115
+ outputs=[output],
116
+ )
117
+ yield result
118
+ print(f"Total time: {time.time() - st}")
119
+ print(f"Average time: {sum(times) / len(times)}")
120
+
121
+
122
+ if __name__ == '__main__':
123
+ # Make sure you set these env vars before running the example.
124
+ # CLARIFAI_PAT
125
+ # CLARIFAI_USER_ID
126
+
127
+ # You need to first create a runner in the Clarifai API and then use the ID here.
128
+ MyRunner(runner_id="matt-test-runner", base_url="http://q6:32013", num_parallel_polls=1).start()
@@ -1,5 +1,6 @@
1
1
  import os
2
2
  import re
3
+ import sys
3
4
  import time
4
5
  from string import Template
5
6
 
@@ -8,6 +9,7 @@ from clarifai_grpc.grpc.api import resources_pb2, service_pb2
8
9
  from clarifai_grpc.grpc.api.status import status_code_pb2
9
10
  from google.protobuf import json_format
10
11
  from rich import print
12
+ from rich.markup import escape
11
13
 
12
14
  from clarifai.client import BaseClient
13
15
  from clarifai.runners.utils.const import (AVAILABLE_PYTHON_IMAGES, AVAILABLE_TORCH_IMAGES,
@@ -27,10 +29,17 @@ def _clear_line(n: int = 1) -> None:
27
29
 
28
30
  class ModelUploader:
29
31
 
30
- def __init__(self, folder: str):
32
+ def __init__(self, folder: str, validate_api_ids: bool = True):
33
+ """
34
+ :param folder: The folder containing the model.py, config.yaml, requirements.txt and
35
+ checkpoints.
36
+ :param validate_api_ids: Whether to validate the user_id and app_id in the config file.
37
+ """
31
38
  self._client = None
32
39
  self.folder = self._validate_folder(folder)
33
40
  self.config = self._load_config(os.path.join(self.folder, 'config.yaml'))
41
+ self.validate_api_ids = validate_api_ids
42
+ self._validate_config()
34
43
  self.model_proto = self._get_model_proto()
35
44
  self.model_id = self.model_proto.id
36
45
  self.model_version_id = None
@@ -69,13 +78,64 @@ class ModelUploader:
69
78
  assert "repo_id" in self.config.get("checkpoints"), "No repo_id specified in the config file"
70
79
  repo_id = self.config.get("checkpoints").get("repo_id")
71
80
 
72
- # prefer env var for HF_TOKEN but if not provided then use the one from config.yaml if any.
73
- if 'HF_TOKEN' in os.environ:
74
- hf_token = os.environ['HF_TOKEN']
75
- else:
76
- hf_token = self.config.get("checkpoints").get("hf_token", None)
81
+ # get from config.yaml otherwise fall back to HF_TOKEN env var.
82
+ hf_token = self.config.get("checkpoints").get("hf_token", os.environ.get("HF_TOKEN", None))
77
83
  return repo_id, hf_token
78
84
 
85
+ def _check_app_exists(self):
86
+ if not self.validate_api_ids:
87
+ return True
88
+ resp = self.client.STUB.GetApp(service_pb2.GetAppRequest(user_app_id=self.client.user_app_id))
89
+ if resp.status.code == status_code_pb2.SUCCESS:
90
+ return True
91
+ return False
92
+
93
+ def _validate_config_model(self):
94
+ assert "model" in self.config, "model section not found in the config file"
95
+ model = self.config.get('model')
96
+ assert "user_id" in model, "user_id not found in the config file"
97
+ assert "app_id" in model, "app_id not found in the config file"
98
+ assert "model_type_id" in model, "model_type_id not found in the config file"
99
+ assert "id" in model, "model_id not found in the config file"
100
+ if '.' in model.get('id'):
101
+ logger.error(
102
+ "Model ID cannot contain '.', please remove it from the model_id in the config file")
103
+ sys.exit(1)
104
+
105
+ assert model.get('user_id') != "", "user_id cannot be empty in the config file"
106
+ assert model.get('app_id') != "", "app_id cannot be empty in the config file"
107
+ assert model.get('model_type_id') != "", "model_type_id cannot be empty in the config file"
108
+ assert model.get('id') != "", "model_id cannot be empty in the config file"
109
+
110
+ if not self._check_app_exists():
111
+ logger.error(
112
+ f"App {self.client.user_app_id.app_id} not found for user {self.client.user_app_id.user_id}"
113
+ )
114
+ sys.exit(1)
115
+
116
+ def _validate_config(self):
117
+ self._validate_config_model()
118
+
119
+ if self.config.get("checkpoints"):
120
+ self._validate_config_checkpoints()
121
+
122
+ assert "inference_compute_info" in self.config, "inference_compute_info not found in the config file"
123
+
124
+ if self.config.get("concepts"):
125
+ model_type_id = self.config.get('model').get('model_type_id')
126
+ assert model_type_id in CONCEPTS_REQUIRED_MODEL_TYPE, f"Model type {model_type_id} not supported for concepts"
127
+
128
+ if self.config.get("checkpoints"):
129
+ _, hf_token = self._validate_config_checkpoints()
130
+
131
+ if hf_token:
132
+ is_valid_token = HuggingFaceLoader.validate_hftoken(hf_token)
133
+ if not is_valid_token:
134
+ logger.error(
135
+ "Invalid Hugging Face token provided in the config file, this might cause issues with downloading the restricted model checkpoints."
136
+ )
137
+ logger.info("Continuing without Hugging Face token")
138
+
79
139
  @property
80
140
  def client(self):
81
141
  if self._client is None:
@@ -259,6 +319,7 @@ class ModelUploader:
259
319
 
260
320
  if not success:
261
321
  logger.error(f"Failed to download checkpoints for model {repo_id}")
322
+ sys.exit(1)
262
323
  else:
263
324
  logger.info(f"Downloaded checkpoints for model {repo_id}")
264
325
  return success
@@ -353,10 +414,10 @@ class ModelUploader:
353
414
  model_version_proto = self.get_model_version_proto()
354
415
 
355
416
  if download_checkpoints:
356
- tar_cmd = f"tar --exclude=*~ -czvf {self.tar_file} -C {self.folder} ."
417
+ tar_cmd = f"tar --exclude=*~ --exclude={self.tar_file} -czvf {self.tar_file} -C {self.folder} ."
357
418
  else: # we don't want to send the checkpoints up even if they are in the folder.
358
419
  logger.info(f"Skipping {self.checkpoint_path} in the tar file that is uploaded.")
359
- tar_cmd = f"tar --exclude={self.checkpoint_suffix} --exclude=*~ -czvf {self.tar_file} -C {self.folder} ."
420
+ tar_cmd = f"tar --exclude={self.checkpoint_suffix} --exclude=*~ --exclude={self.tar_file} -czvf {self.tar_file} -C {self.folder} ."
360
421
  # Tar the folder
361
422
  logger.debug(tar_cmd)
362
423
  os.system(tar_cmd)
@@ -366,6 +427,9 @@ class ModelUploader:
366
427
  logger.info(f"Size of the tar is: {file_size} bytes")
367
428
 
368
429
  self.maybe_create_model()
430
+ if not self.check_model_exists():
431
+ logger.error(f"Failed to create model: {self.model_proto.id}")
432
+ sys.exit(1)
369
433
 
370
434
  for response in self.client.STUB.PostModelVersionsUpload(
371
435
  self.model_version_stream_upload_iterator(model_version_proto, file_path),):
@@ -430,7 +494,7 @@ class ModelUploader:
430
494
  file_size = os.path.getsize(file_path)
431
495
  logger.info(f"Uploading model version of model {self.model_proto.id}")
432
496
  logger.info(f"Using file '{os.path.basename(file_path)}' of size: {file_size} bytes")
433
- return service_pb2.PostModelVersionsUploadRequest(
497
+ result = service_pb2.PostModelVersionsUploadRequest(
434
498
  upload_config=service_pb2.PostModelVersionsUploadConfig(
435
499
  user_app_id=self.client.user_app_id,
436
500
  model_id=self.model_proto.id,
@@ -438,6 +502,7 @@ class ModelUploader:
438
502
  total_size=file_size,
439
503
  is_v3=self.is_v3,
440
504
  ))
505
+ return result
441
506
 
442
507
  def get_model_build_logs(self):
443
508
  logs_request = service_pb2.ListLogEntriesRequest(
@@ -470,7 +535,7 @@ class ModelUploader:
470
535
  for log_entry in logs.log_entries:
471
536
  if log_entry.url not in seen_logs:
472
537
  seen_logs.add(log_entry.url)
473
- print(f"Model Building Logs...: {log_entry.message.strip()}")
538
+ print(f"Model Building Logs...: {escape(log_entry.message.strip())}")
474
539
  time.sleep(1)
475
540
  elif status_code == status_code_pb2.MODEL_TRAINED:
476
541
  logger.info(f"\nModel build complete! (elapsed {time.time() - st:.1f}s)")
@@ -1,39 +1,41 @@
1
- PYTHON_BASE_IMAGE = 'public.ecr.aws/clarifai-models/python-base:{python_version}'
2
- TORCH_BASE_IMAGE = 'public.ecr.aws/clarifai-models/torch:{torch_version}-py{python_version}-cuda{cuda_version}'
1
+ import os
2
+
3
+ registry = os.environ.get('CLARIFAI_BASE_IMAGE_REGISTRY', 'public.ecr.aws/clarifai-models')
4
+
5
+ PYTHON_BASE_IMAGE = registry + '/python-base:{python_version}'
6
+ TORCH_BASE_IMAGE = registry + '/torch:{torch_version}-py{python_version}-cuda{cuda_version}'
3
7
 
4
8
  # List of available python base images
5
- AVAILABLE_PYTHON_IMAGES = ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13']
9
+ AVAILABLE_PYTHON_IMAGES = ['3.11', '3.12', '3.13']
6
10
 
7
- DEFAULT_PYTHON_VERSION = 3.11
11
+ DEFAULT_PYTHON_VERSION = 3.12
8
12
 
9
13
  # List of available torch images
10
14
  AVAILABLE_TORCH_IMAGES = [
11
- '1.13.1-py3.8-cuda117',
12
- '1.13.1-py3.9-cuda117',
13
- '1.13.1-py3.10-cuda117',
14
- '2.1.2-py3.8-cuda121',
15
- '2.1.2-py3.9-cuda121',
16
- '2.1.2-py3.10-cuda121',
17
- '2.1.2-py3.11-cuda121',
18
- '2.2.2-py3.8-cuda121',
19
- '2.2.2-py3.9-cuda121',
20
- '2.2.2-py3.10-cuda121',
21
15
  '2.2.2-py3.11-cuda121',
22
- '2.2.2-py3.12-cuda121',
23
- '2.3.1-py3.8-cuda121',
24
- '2.3.1-py3.9-cuda121',
25
- '2.3.1-py3.10-cuda121',
26
16
  '2.3.1-py3.11-cuda121',
27
- '2.3.1-py3.12-cuda121',
28
- '2.4.1-py3.8-cuda124',
29
- '2.4.1-py3.9-cuda124',
30
- '2.4.1-py3.10-cuda124',
17
+ '2.4.0-py3.11-cuda121',
18
+ '2.4.0-py3.11-cuda124',
19
+ '2.4.1-py3.11-cuda121',
31
20
  '2.4.1-py3.11-cuda124',
32
- '2.4.1-py3.12-cuda124',
33
- '2.5.1-py3.9-cuda124',
34
- '2.5.1-py3.10-cuda124',
21
+ '2.5.1-py3.11-cuda121',
35
22
  '2.5.1-py3.11-cuda124',
23
+ '2.2.2-py3.12-cuda121',
24
+ '2.3.1-py3.12-cuda121',
25
+ '2.4.0-py3.12-cuda121',
26
+ '2.4.0-py3.12-cuda124',
27
+ '2.4.1-py3.12-cuda121',
28
+ '2.4.1-py3.12-cuda124',
29
+ '2.5.1-py3.12-cuda121',
36
30
  '2.5.1-py3.12-cuda124',
31
+ # '2.2.2-py3.13-cuda121',
32
+ # '2.3.1-py3.13-cuda121',
33
+ # '2.4.0-py3.13-cuda121',
34
+ # '2.4.0-py3.13-cuda124',
35
+ # '2.4.1-py3.13-cuda121',
36
+ # '2.4.1-py3.13-cuda124',
37
+ # '2.5.1-py3.13-cuda121',
38
+ # '2.5.1-py3.13-cuda124',
37
39
  ]
38
40
  CONCEPTS_REQUIRED_MODEL_TYPE = [
39
41
  'visual-classifier', 'visual-detector', 'visual-segmenter', 'text-classifier'
@@ -1,6 +1,8 @@
1
+ import fnmatch
1
2
  import importlib.util
2
3
  import json
3
4
  import os
5
+ import shutil
4
6
  import subprocess
5
7
 
6
8
  from clarifai.utils.logging import logger
@@ -14,22 +16,28 @@ class HuggingFaceLoader:
14
16
  self.repo_id = repo_id
15
17
  self.token = token
16
18
  if token:
17
- try:
18
- if importlib.util.find_spec("huggingface_hub") is None:
19
- raise ImportError(self.HF_DOWNLOAD_TEXT)
20
- os.environ['HF_TOKEN'] = token
21
- from huggingface_hub import HfApi
22
-
23
- api = HfApi()
24
- api.whoami(token=token)
25
-
19
+ if self.validate_hftoken(token):
26
20
  subprocess.run(f'huggingface-cli login --token={os.environ["HF_TOKEN"]}', shell=True)
27
- except Exception as e:
28
- logger.error(
29
- f"Error setting up Hugging Face token, please make sure you have the correct token: {e}"
30
- )
21
+ logger.info("Hugging Face token validated")
22
+ else:
31
23
  logger.info("Continuing without Hugging Face token")
32
24
 
25
+ @classmethod
26
+ def validate_hftoken(cls, hf_token: str):
27
+ try:
28
+ if importlib.util.find_spec("huggingface_hub") is None:
29
+ raise ImportError(cls.HF_DOWNLOAD_TEXT)
30
+ os.environ['HF_TOKEN'] = hf_token
31
+ from huggingface_hub import HfApi
32
+
33
+ api = HfApi()
34
+ api.whoami(token=hf_token)
35
+ return True
36
+ except Exception as e:
37
+ logger.error(
38
+ f"Error setting up Hugging Face token, please make sure you have the correct token: {e}")
39
+ return False
40
+
33
41
  def download_checkpoints(self, checkpoint_path: str):
34
42
  # throw error if huggingface_hub wasn't installed
35
43
  try:
@@ -46,10 +54,20 @@ class HuggingFaceLoader:
46
54
  if not is_hf_model_exists:
47
55
  logger.error("Model %s not found on Hugging Face" % (self.repo_id))
48
56
  return False
57
+
58
+ self.ignore_patterns = self._get_ignore_patterns()
49
59
  snapshot_download(
50
- repo_id=self.repo_id, local_dir=checkpoint_path, local_dir_use_symlinks=False)
60
+ repo_id=self.repo_id,
61
+ local_dir=checkpoint_path,
62
+ local_dir_use_symlinks=False,
63
+ ignore_patterns=self.ignore_patterns)
64
+ # Remove the `.cache` folder if it exists
65
+ cache_path = os.path.join(checkpoint_path, ".cache")
66
+ if os.path.exists(cache_path) and os.path.isdir(cache_path):
67
+ shutil.rmtree(cache_path)
68
+
51
69
  except Exception as e:
52
- logger.exception(f"Error downloading model checkpoints {e}")
70
+ logger.error(f"Error downloading model checkpoints {e}")
53
71
  return False
54
72
  finally:
55
73
  is_downloaded = self.validate_download(checkpoint_path)
@@ -94,11 +112,41 @@ class HuggingFaceLoader:
94
112
  from huggingface_hub import list_repo_files
95
113
  except ImportError:
96
114
  raise ImportError(self.HF_DOWNLOAD_TEXT)
115
+ # Get the list of files on the repo
116
+ repo_files = list_repo_files(self.repo_id, token=self.token)
117
+
118
+ self.ignore_patterns = self._get_ignore_patterns()
119
+ # Get the list of files on the repo that are not ignored
120
+ if getattr(self, "ignore_patterns", None):
121
+ patterns = self.ignore_patterns
122
+
123
+ def should_ignore(file_path):
124
+ return any(fnmatch.fnmatch(file_path, pattern) for pattern in patterns)
125
+
126
+ repo_files = [f for f in repo_files if not should_ignore(f)]
127
+
128
+ # Check if downloaded files match the files we expect (ignoring ignored patterns)
97
129
  checkpoint_dir_files = [
98
130
  f for dp, dn, fn in os.walk(os.path.expanduser(checkpoint_path)) for f in fn
99
131
  ]
100
- return (len(checkpoint_dir_files) >= len(list_repo_files(self.repo_id))) and len(
101
- list_repo_files(self.repo_id)) > 0
132
+
133
+ # Validate by comparing file lists
134
+ return len(checkpoint_dir_files) >= len(repo_files) and not (
135
+ len(set(repo_files) - set(checkpoint_dir_files)) > 0) and len(repo_files) > 0
136
+
137
+ def _get_ignore_patterns(self):
138
+ # check if model exists on HF
139
+ try:
140
+ from huggingface_hub import list_repo_files
141
+ except ImportError:
142
+ raise ImportError(self.HF_DOWNLOAD_TEXT)
143
+
144
+ # Get the list of files on the repo that are not ignored
145
+ repo_files = list_repo_files(self.repo_id, token=self.token)
146
+ self.ignore_patterns = None
147
+ if any(f.endswith(".safetensors") for f in repo_files):
148
+ self.ignore_patterns = ["**/original/*", "**/*.pth", "**/*.bin", "*.pth", "*.bin"]
149
+ return self.ignore_patterns
102
150
 
103
151
  @staticmethod
104
152
  def validate_config(checkpoint_path: str):
@@ -0,0 +1,6 @@
1
+ import os
2
+
3
+ from clarifai.utils.logging import get_logger
4
+
5
+ logger_level = os.environ.get("LOG_LEVEL", "INFO")
6
+ logger = get_logger(logger_level, __name__)