clarifai 11.8.2__py3-none-any.whl → 11.8.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
clarifai/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "11.8.2"
1
+ __version__ = "11.8.4"
clarifai/cli/model.py CHANGED
@@ -28,6 +28,7 @@ from clarifai.utils.constants import (
28
28
  DEFAULT_LOCAL_RUNNER_NODEPOOL_CONFIG,
29
29
  DEFAULT_LOCAL_RUNNER_NODEPOOL_ID,
30
30
  DEFAULT_OLLAMA_MODEL_REPO_BRANCH,
31
+ DEFAULT_PYTHON_MODEL_REPO_BRANCH,
31
32
  DEFAULT_TOOLKIT_MODEL_REPO,
32
33
  DEFAULT_VLLM_MODEL_REPO_BRANCH,
33
34
  )
@@ -74,9 +75,11 @@ def model():
74
75
  )
75
76
  @click.option(
76
77
  '--toolkit',
77
- type=click.Choice(['ollama', 'huggingface', 'lmstudio', 'vllm'], case_sensitive=False),
78
+ type=click.Choice(
79
+ ['ollama', 'huggingface', 'lmstudio', 'vllm', 'python'], case_sensitive=False
80
+ ),
78
81
  required=False,
79
- help='Toolkit to use for model initialization. Currently supports "ollama", "huggingface", "lmstudio" and "vllm".',
82
+ help='Toolkit to use for model initialization. Currently supports "ollama", "huggingface", "lmstudio", "vllm" and "python".',
80
83
  )
81
84
  @click.option(
82
85
  '--model-name',
@@ -95,7 +98,9 @@ def model():
95
98
  help='Context length for the Ollama model. Defaults to 8192.',
96
99
  required=False,
97
100
  )
101
+ @click.pass_context
98
102
  def init(
103
+ ctx,
99
104
  model_path,
100
105
  model_type_id,
101
106
  github_pat,
@@ -124,11 +129,13 @@ def init(
124
129
  MODEL_TYPE_ID: Type of model to create. If not specified, defaults to "text-to-text" for text models.\n
125
130
  GITHUB_PAT: GitHub Personal Access Token for authentication when cloning private repositories.\n
126
131
  GITHUB_URL: GitHub repository URL or "repo" format to clone a repository from. If provided, the entire repository contents will be copied to the target directory instead of using default templates.\n
127
- TOOLKIT: Toolkit to use for model initialization. Currently supports "ollama", "huggingface", "lmstudio" and "vllm".\n
132
+ TOOLKIT: Toolkit to use for model initialization. Currently supports "ollama", "huggingface", "lmstudio", "vllm" and "python".\n
128
133
  MODEL_NAME: Model name to configure when using --toolkit. For ollama toolkit, this sets the Ollama model to use (e.g., "llama3.1", "mistral", etc.). For vllm & huggingface toolkit, this sets the Hugging Face model repo_id (e.g., "Qwen/Qwen3-4B-Instruct-2507"). For lmstudio toolkit, this sets the LM Studio model name (e.g., "qwen/qwen3-4b-thinking-2507").\n
129
134
  PORT: Port to run the (Ollama/lmstudio) server on. Defaults to 23333.\n
130
135
  CONTEXT_LENGTH: Context length for the (Ollama/lmstudio) model. Defaults to 8192.\n
131
136
  """
137
+ validate_context(ctx)
138
+ user_id = ctx.obj.current.user_id
132
139
  # Resolve the absolute path
133
140
  model_path = os.path.abspath(model_path)
134
141
 
@@ -176,6 +183,9 @@ def init(
176
183
  elif toolkit == 'vllm':
177
184
  github_url = DEFAULT_TOOLKIT_MODEL_REPO
178
185
  branch = DEFAULT_VLLM_MODEL_REPO_BRANCH
186
+ elif toolkit == 'python':
187
+ github_url = DEFAULT_TOOLKIT_MODEL_REPO
188
+ branch = DEFAULT_PYTHON_MODEL_REPO_BRANCH
179
189
 
180
190
  if github_url:
181
191
  downloader = GitHubDownloader(
@@ -231,6 +241,44 @@ def init(
231
241
  repo_url = format_github_repo_url(github_url)
232
242
  repo_url = f"https://github.com/{owner}/{repo}"
233
243
 
244
+ try:
245
+ # Create a temporary directory for cloning
246
+ with tempfile.TemporaryDirectory(prefix="clarifai_model_") as clone_dir:
247
+ # Clone the repository with explicit branch parameter
248
+ if not clone_github_repo(repo_url, clone_dir, github_pat, branch):
249
+ logger.error(f"Failed to clone repository from {repo_url}")
250
+ github_url = None # Fall back to template mode
251
+
252
+ else:
253
+ # Copy the entire repository content to target directory (excluding .git)
254
+ for item in os.listdir(clone_dir):
255
+ if item == '.git':
256
+ continue
257
+
258
+ source_path = os.path.join(clone_dir, item)
259
+ target_path = os.path.join(model_path, item)
260
+
261
+ if os.path.isdir(source_path):
262
+ shutil.copytree(source_path, target_path, dirs_exist_ok=True)
263
+ else:
264
+ shutil.copy2(source_path, target_path)
265
+
266
+ logger.info(f"Successfully cloned repository to {model_path}")
267
+ logger.info(
268
+ "Model initialization complete with GitHub repository clone"
269
+ )
270
+ logger.info("Next steps:")
271
+ logger.info("1. Review the model configuration")
272
+ logger.info("2. Install any required dependencies manually")
273
+ logger.info(
274
+ "3. Test the model locally using 'clarifai model local-test'"
275
+ )
276
+ return
277
+
278
+ except Exception as e:
279
+ logger.error(f"Failed to clone GitHub repository: {e}")
280
+ github_url = None # Fall back to template mode
281
+
234
282
  if toolkit:
235
283
  logger.info(f"Initializing model from GitHub repository: {github_url}")
236
284
 
@@ -240,41 +288,41 @@ def init(
240
288
  else:
241
289
  repo_url = format_github_repo_url(github_url)
242
290
 
243
- try:
244
- # Create a temporary directory for cloning
245
- with tempfile.TemporaryDirectory(prefix="clarifai_model_") as clone_dir:
246
- # Clone the repository with explicit branch parameter
247
- if not clone_github_repo(repo_url, clone_dir, github_pat, branch):
248
- logger.error(f"Failed to clone repository from {repo_url}")
249
- github_url = None # Fall back to template mode
250
-
251
- else:
252
- # Copy the entire repository content to target directory (excluding .git)
253
- for item in os.listdir(clone_dir):
254
- if item == '.git':
255
- continue
256
-
257
- source_path = os.path.join(clone_dir, item)
258
- target_path = os.path.join(model_path, item)
259
-
260
- if os.path.isdir(source_path):
261
- shutil.copytree(source_path, target_path, dirs_exist_ok=True)
262
- else:
263
- shutil.copy2(source_path, target_path)
291
+ try:
292
+ # Create a temporary directory for cloning
293
+ with tempfile.TemporaryDirectory(prefix="clarifai_model_") as clone_dir:
294
+ # Clone the repository with explicit branch parameter
295
+ if not clone_github_repo(repo_url, clone_dir, github_pat, branch):
296
+ logger.error(f"Failed to clone repository from {repo_url}")
297
+ github_url = None # Fall back to template mode
264
298
 
265
- except Exception as e:
266
- logger.error(f"Failed to clone GitHub repository: {e}")
267
- github_url = None
299
+ else:
300
+ # Copy the entire repository content to target directory (excluding .git)
301
+ for item in os.listdir(clone_dir):
302
+ if item == '.git':
303
+ continue
304
+
305
+ source_path = os.path.join(clone_dir, item)
306
+ target_path = os.path.join(model_path, item)
268
307
 
269
- if (model_name or port or context_length) and (toolkit == 'ollama'):
270
- customize_ollama_model(model_path, model_name, port, context_length)
308
+ if os.path.isdir(source_path):
309
+ shutil.copytree(source_path, target_path, dirs_exist_ok=True)
310
+ else:
311
+ shutil.copy2(source_path, target_path)
271
312
 
272
- if (model_name or port or context_length) and (toolkit == 'lmstudio'):
273
- customize_lmstudio_model(model_path, model_name, port, context_length)
313
+ except Exception as e:
314
+ logger.error(f"Failed to clone GitHub repository: {e}")
315
+ github_url = None
316
+
317
+ if (user_id or model_name or port or context_length) and (toolkit == 'ollama'):
318
+ customize_ollama_model(model_path, user_id, model_name, port, context_length)
319
+
320
+ if (user_id or model_name or port or context_length) and (toolkit == 'lmstudio'):
321
+ customize_lmstudio_model(model_path, user_id, model_name, port, context_length)
274
322
 
275
- if model_name and (toolkit == 'huggingface' or toolkit == 'vllm'):
323
+ if (user_id or model_name) and (toolkit == 'huggingface' or toolkit == 'vllm'):
276
324
  # Update the config.yaml file with the provided model name
277
- customize_huggingface_model(model_path, model_name)
325
+ customize_huggingface_model(model_path, user_id, model_name)
278
326
 
279
327
  if github_url:
280
328
  logger.info("Model initialization complete with GitHub repository")
@@ -288,12 +336,20 @@ def init(
288
336
  logger.info("Initializing model with default templates...")
289
337
  input("Press Enter to continue...")
290
338
 
339
+ from clarifai.cli.base import input_or_default
291
340
  from clarifai.cli.templates.model_templates import (
292
341
  get_config_template,
293
342
  get_model_template,
294
343
  get_requirements_template,
295
344
  )
296
345
 
346
+ # Collect additional parameters for OpenAI template
347
+ template_kwargs = {}
348
+ if model_type_id == "openai":
349
+ logger.info("Configuring OpenAI local runner...")
350
+ port = input_or_default("Enter port (default: 8000): ", "8000")
351
+ template_kwargs = {"port": port}
352
+
297
353
  # Create the 1/ subdirectory
298
354
  model_version_dir = os.path.join(model_path, "1")
299
355
  os.makedirs(model_version_dir, exist_ok=True)
@@ -303,7 +359,7 @@ def init(
303
359
  if os.path.exists(model_py_path):
304
360
  logger.warning(f"File {model_py_path} already exists, skipping...")
305
361
  else:
306
- model_template = get_model_template(model_type_id)
362
+ model_template = get_model_template(model_type_id, **template_kwargs)
307
363
  with open(model_py_path, 'w') as f:
308
364
  f.write(model_template)
309
365
  logger.info(f"Created {model_py_path}")
@@ -325,7 +381,9 @@ def init(
325
381
  else:
326
382
  config_model_type_id = DEFAULT_LOCAL_RUNNER_MODEL_TYPE # default
327
383
 
328
- config_template = get_config_template(config_model_type_id)
384
+ config_template = get_config_template(
385
+ user_id=user_id, model_type_id=config_model_type_id
386
+ )
329
387
  with open(config_path, 'w') as f:
330
388
  f.write(config_template)
331
389
  logger.info(f"Created {config_path}")
clarifai/cli/pipeline.py CHANGED
@@ -26,14 +26,19 @@ def pipeline():
26
26
 
27
27
  @pipeline.command()
28
28
  @click.argument("path", type=click.Path(exists=True), required=False, default=".")
29
- def upload(path):
29
+ @click.option(
30
+ '--no-lockfile',
31
+ is_flag=True,
32
+ help='Skip creating config-lock.yaml file.',
33
+ )
34
+ def upload(path, no_lockfile):
30
35
  """Upload a pipeline with associated pipeline steps to Clarifai.
31
36
 
32
37
  PATH: Path to the pipeline configuration file or directory containing config.yaml. If not specified, the current directory is used by default.
33
38
  """
34
39
  from clarifai.runners.pipelines.pipeline_builder import upload_pipeline
35
40
 
36
- upload_pipeline(path)
41
+ upload_pipeline(path, no_lockfile=no_lockfile)
37
42
 
38
43
 
39
44
  @pipeline.command()
@@ -106,15 +111,32 @@ def run(
106
111
 
107
112
  validate_context(ctx)
108
113
 
114
+ # Try to load from config-lock.yaml first if no config is specified
115
+ lockfile_path = os.path.join(os.getcwd(), "config-lock.yaml")
116
+ if not config and os.path.exists(lockfile_path):
117
+ logger.info("Found config-lock.yaml, using it as default config source")
118
+ config = lockfile_path
119
+
109
120
  if config:
110
121
  config_data = from_yaml(config)
111
- pipeline_id = config_data.get('pipeline_id', pipeline_id)
112
- pipeline_version_id = config_data.get('pipeline_version_id', pipeline_version_id)
122
+
123
+ # Handle both regular config format and lockfile format
124
+ if 'pipeline' in config_data and isinstance(config_data['pipeline'], dict):
125
+ pipeline_config = config_data['pipeline']
126
+ pipeline_id = pipeline_config.get('id', pipeline_id)
127
+ pipeline_version_id = pipeline_config.get('version_id', pipeline_version_id)
128
+ user_id = pipeline_config.get('user_id', user_id)
129
+ app_id = pipeline_config.get('app_id', app_id)
130
+ else:
131
+ # Fallback to flat config structure
132
+ pipeline_id = config_data.get('pipeline_id', pipeline_id)
133
+ pipeline_version_id = config_data.get('pipeline_version_id', pipeline_version_id)
134
+ user_id = config_data.get('user_id', user_id)
135
+ app_id = config_data.get('app_id', app_id)
136
+
113
137
  pipeline_version_run_id = config_data.get(
114
138
  'pipeline_version_run_id', pipeline_version_run_id
115
139
  )
116
- user_id = config_data.get('user_id', user_id)
117
- app_id = config_data.get('app_id', app_id)
118
140
  nodepool_id = config_data.get('nodepool_id', nodepool_id)
119
141
  compute_cluster_id = config_data.get('compute_cluster_id', compute_cluster_id)
120
142
  pipeline_url = config_data.get('pipeline_url', pipeline_url)
@@ -319,6 +341,62 @@ def init(pipeline_path):
319
341
  logger.info("3. Run 'clarifai pipeline upload config.yaml' to upload your pipeline")
320
342
 
321
343
 
344
+ @pipeline.command()
345
+ @click.argument(
346
+ "lockfile_path", type=click.Path(exists=True), required=False, default="config-lock.yaml"
347
+ )
348
+ def validate_lock(lockfile_path):
349
+ """Validate a config-lock.yaml file for schema and reference consistency.
350
+
351
+ LOCKFILE_PATH: Path to the config-lock.yaml file. If not specified, looks for config-lock.yaml in current directory.
352
+ """
353
+ from clarifai.runners.utils.pipeline_validation import PipelineConfigValidator
354
+ from clarifai.utils.cli import from_yaml
355
+
356
+ try:
357
+ # Load the lockfile
358
+ lockfile_data = from_yaml(lockfile_path)
359
+
360
+ # Validate required fields
361
+ if "pipeline" not in lockfile_data:
362
+ raise ValueError("'pipeline' section not found in lockfile")
363
+
364
+ pipeline = lockfile_data["pipeline"]
365
+ required_fields = ["id", "user_id", "app_id", "version_id"]
366
+
367
+ for field in required_fields:
368
+ if field not in pipeline:
369
+ raise ValueError(f"Required field '{field}' not found in pipeline section")
370
+ if not pipeline[field]:
371
+ raise ValueError(f"Required field '{field}' cannot be empty")
372
+
373
+ # Validate orchestration spec if present
374
+ if "orchestration_spec" in pipeline:
375
+ # Create a temporary config structure for validation
376
+ temp_config = {
377
+ "pipeline": {
378
+ "id": pipeline["id"],
379
+ "user_id": pipeline["user_id"],
380
+ "app_id": pipeline["app_id"],
381
+ "orchestration_spec": pipeline["orchestration_spec"],
382
+ }
383
+ }
384
+
385
+ # Use existing validator to check orchestration spec
386
+ validator = PipelineConfigValidator()
387
+ validator._validate_orchestration_spec(temp_config)
388
+
389
+ logger.info(f"✅ Lockfile {lockfile_path} is valid")
390
+ logger.info(f"Pipeline: {pipeline['id']}")
391
+ logger.info(f"User: {pipeline['user_id']}")
392
+ logger.info(f"App: {pipeline['app_id']}")
393
+ logger.info(f"Version: {pipeline['version_id']}")
394
+
395
+ except Exception as e:
396
+ logger.error(f"❌ Lockfile validation failed: {e}")
397
+ raise click.Abort()
398
+
399
+
322
400
  @pipeline.command(['ls'])
323
401
  @click.option('--page_no', required=False, help='Page number to list.', default=1)
324
402
  @click.option('--per_page', required=False, help='Number of items per page.', default=16)
@@ -99,9 +99,9 @@ class MyModel(MCPModelClass):
99
99
  '''
100
100
 
101
101
 
102
- def get_openai_model_class_template() -> str:
102
+ def get_openai_model_class_template(port: str = "8000") -> str:
103
103
  """Return the template for an OpenAIModelClass-based model."""
104
- return '''from typing import List
104
+ return f'''from typing import List, Iterator
105
105
  from openai import OpenAI
106
106
  from clarifai.runners.models.openai_class import OpenAIModelClass
107
107
  from clarifai.runners.utils.data_utils import Param
@@ -114,12 +114,11 @@ class MyModel(OpenAIModelClass):
114
114
  # Configure your OpenAI-compatible client for local model
115
115
  client = OpenAI(
116
116
  api_key="local-key", # TODO: please fill in - use your local API key
117
- base_url="http://localhost:8000/v1", # TODO: please fill in - your local model server endpoint
117
+ base_url="http://localhost:{port}/v1", # TODO: please fill in - your local model server endpoint
118
118
  )
119
119
 
120
- # TODO: please fill in
121
- # Specify the model name to use
122
- model = "my-local-model" # TODO: please fill in - replace with your local model name
120
+ # Automatically get the first available model
121
+ model = client.models.list().data[0].id
123
122
 
124
123
  def load_model(self):
125
124
  """Optional: Add any additional model loading logic here."""
@@ -157,7 +156,7 @@ class MyModel(OpenAIModelClass):
157
156
  max_tokens: int = Param(default=256, description="The maximum number of tokens to generate. Shorter token lengths will provide faster performance."),
158
157
  temperature: float = Param(default=1.0, description="A decimal number that determines the degree of randomness in the response"),
159
158
  top_p: float = Param(default=1.0, description="An alternative to sampling with temperature, where the model considers the results of the tokens with top_p probability mass."),
160
- ):
159
+ ) -> Iterator[str]:
161
160
  """Stream a completion response using the OpenAI client."""
162
161
  # TODO: please fill in
163
162
  # Implement your streaming logic here
@@ -178,13 +177,13 @@ class MyModel(OpenAIModelClass):
178
177
  '''
179
178
 
180
179
 
181
- def get_config_template(model_type_id: str = "any-to-any") -> str:
180
+ def get_config_template(user_id: str = None, model_type_id: str = "any-to-any") -> str:
182
181
  """Return the template for config.yaml."""
183
182
  return f'''# Configuration file for your Clarifai model
184
183
 
185
184
  model:
186
185
  id: "my-model" # TODO: please fill in - replace with your model ID
187
- user_id: "user_id" # TODO: please fill in - replace with your user ID
186
+ user_id: "{user_id}" # TODO: please fill in - replace with your user ID
188
187
  app_id: "app_id" # TODO: please fill in - replace with your app ID
189
188
  model_type_id: "{model_type_id}" # TODO: please fill in - replace if different model type ID
190
189
 
@@ -237,8 +236,16 @@ MODEL_TYPE_TEMPLATES = {
237
236
  }
238
237
 
239
238
 
240
- def get_model_template(model_type_id: str = None) -> str:
239
+ def get_model_template(model_type_id: str = None, **kwargs) -> str:
241
240
  """Get the appropriate model template based on model_type_id."""
242
241
  if model_type_id in MODEL_TYPE_TEMPLATES:
243
- return MODEL_TYPE_TEMPLATES[model_type_id]()
242
+ template_func = MODEL_TYPE_TEMPLATES[model_type_id]
243
+ # Check if the template function accepts additional parameters
244
+ import inspect
245
+
246
+ sig = inspect.signature(template_func)
247
+ if len(sig.parameters) > 0:
248
+ return template_func(**kwargs)
249
+ else:
250
+ return template_func()
244
251
  return get_model_class_template()
clarifai/client/model.py CHANGED
@@ -108,6 +108,7 @@ class Model(Lister, BaseClient):
108
108
  self.training_params = {}
109
109
  self.input_types = None
110
110
  self._client = None
111
+ self._async_client = None
111
112
  self._added_methods = False
112
113
  BaseClient.__init__(
113
114
  self,
@@ -522,11 +523,29 @@ class Model(Lister, BaseClient):
522
523
  model=self.model_info,
523
524
  runner_selector=self._runner_selector,
524
525
  )
526
+ # Pass in None for async stub will create it.
525
527
  self._client = ModelClient(
526
- stub=self.STUB, async_stub=self.async_stub, request_template=request_template
528
+ stub=self.STUB, async_stub=None, request_template=request_template
527
529
  )
528
530
  return self._client
529
531
 
532
+ @property
533
+ def async_client(self):
534
+ """Get the asynchronous client instance (with async stub)."""
535
+ if self._async_client is None:
536
+ request_template = service_pb2.PostModelOutputsRequest(
537
+ user_app_id=self.user_app_id,
538
+ model_id=self.id,
539
+ version_id=self.model_version.id,
540
+ model=self.model_info,
541
+ runner_selector=self._runner_selector,
542
+ )
543
+ # Create async client with async stub
544
+ self._async_client = ModelClient(
545
+ stub=self.STUB, async_stub=self.async_stub, request_template=request_template
546
+ )
547
+ return self._async_client
548
+
530
549
  def predict(self, *args, **kwargs):
531
550
  """
532
551
  Calls the model's predict() method with the given arguments.
@@ -573,16 +592,16 @@ class Model(Lister, BaseClient):
573
592
  )
574
593
  inference_params = kwargs.get('inference_params', {})
575
594
  output_config = kwargs.get('output_config', {})
576
- return await self.client._async_predict_by_proto(
595
+ return await self.async_client._async_predict_by_proto(
577
596
  inputs=inputs, inference_params=inference_params, output_config=output_config
578
597
  )
579
598
 
580
599
  # Adding try-except, since the await works differently with jupyter kernels and in regular python scripts.
581
600
  try:
582
- return await self.client.predict(*args, **kwargs)
601
+ return await self.async_client.predict(*args, **kwargs)
583
602
  except TypeError:
584
603
  # In jupyter, it returns a str object instead of a co-routine.
585
- return self.client.predict(*args, **kwargs)
604
+ return self.async_client.predict(*args, **kwargs)
586
605
 
587
606
  def __getattr__(self, name):
588
607
  try:
@@ -595,7 +614,10 @@ class Model(Lister, BaseClient):
595
614
  self.client.fetch()
596
615
  for method_name in self.client._method_signatures.keys():
597
616
  if not hasattr(self, method_name):
598
- setattr(self, method_name, getattr(self.client, method_name))
617
+ if method_name.startswith('async_'):
618
+ setattr(self, method_name, getattr(self.async_client, method_name))
619
+ else:
620
+ setattr(self, method_name, getattr(self.client, method_name))
599
621
  if hasattr(self.client, name):
600
622
  return getattr(self.client, name)
601
623
  raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'")
@@ -838,11 +860,11 @@ class Model(Lister, BaseClient):
838
860
  )
839
861
  inference_params = kwargs.get('inference_params', {})
840
862
  output_config = kwargs.get('output_config', {})
841
- return self.client._async_generate_by_proto(
863
+ return self.async_client._async_generate_by_proto(
842
864
  inputs=inputs, inference_params=inference_params, output_config=output_config
843
865
  )
844
866
 
845
- return self.client.generate(*args, **kwargs)
867
+ return self.async_client.generate(*args, **kwargs)
846
868
 
847
869
  def generate_by_filepath(
848
870
  self,
@@ -1047,11 +1069,11 @@ class Model(Lister, BaseClient):
1047
1069
  )
1048
1070
  inference_params = kwargs.get('inference_params', {})
1049
1071
  output_config = kwargs.get('output_config', {})
1050
- return self.client._async_stream_by_proto(
1072
+ return self.async_client._async_stream_by_proto(
1051
1073
  inputs=inputs, inference_params=inference_params, output_config=output_config
1052
1074
  )
1053
1075
 
1054
- return self.client.async_stream(*args, **kwargs)
1076
+ return self.async_client.async_stream(*args, **kwargs)
1055
1077
 
1056
1078
  def stream_by_filepath(
1057
1079
  self,
@@ -4,6 +4,7 @@ import sys
4
4
  import tarfile
5
5
  import time
6
6
  from string import Template
7
+ from typing import List, Optional
7
8
 
8
9
  import yaml
9
10
  from clarifai_grpc.grpc.api import resources_pb2, service_pb2
@@ -11,6 +12,7 @@ from clarifai_grpc.grpc.api.status import status_code_pb2
11
12
  from google.protobuf import json_format
12
13
 
13
14
  from clarifai.client.base import BaseClient
15
+ from clarifai.utils.hashing import hash_directory
14
16
  from clarifai.utils.logging import logger
15
17
  from clarifai.utils.misc import get_uuid
16
18
  from clarifai.versions import CLIENT_VERSION
@@ -22,12 +24,13 @@ UPLOAD_CHUNK_SIZE = 14 * 1024 * 1024
22
24
  class PipelineStepBuilder:
23
25
  """Pipeline Step Builder class for managing pipeline step upload to Clarifai."""
24
26
 
25
- def __init__(self, folder: str):
27
+ def __init__(self, folder: str, hash_exclusions: Optional[List[str]] = None):
26
28
  """
27
29
  Initialize PipelineStepBuilder.
28
30
 
29
31
  :param folder: The folder containing the pipeline step files (config.yaml, requirements.txt,
30
32
  dockerfile, and pipeline_step.py in 1/ subdirectory)
33
+ :param hash_exclusions: List of file names to exclude from hash calculation (defaults to ['config-lock.yaml'])
31
34
  """
32
35
  self._client = None
33
36
  self.folder = self._validate_folder(folder)
@@ -37,6 +40,10 @@ class PipelineStepBuilder:
37
40
  self.pipeline_step_id = self.pipeline_step_proto.id
38
41
  self.pipeline_step_version_id = None
39
42
  self.pipeline_step_compute_info = self._get_pipeline_step_compute_info()
43
+ # Configure files to exclude from hash calculation
44
+ self.hash_exclusions = (
45
+ hash_exclusions if hash_exclusions is not None else ['config-lock.yaml']
46
+ )
40
47
 
41
48
  @property
42
49
  def client(self):
@@ -490,6 +497,95 @@ COPY --link=true requirements.txt config.yaml /home/nonroot/main/
490
497
 
491
498
  raise TimeoutError("Pipeline step build did not finish in time")
492
499
 
500
+ def load_config_lock(self):
501
+ """
502
+ Load existing config-lock.yaml if it exists.
503
+
504
+ :return: Dictionary with config-lock data or None if file doesn't exist
505
+ """
506
+ config_lock_path = os.path.join(self.folder, "config-lock.yaml")
507
+ if os.path.exists(config_lock_path):
508
+ try:
509
+ with open(config_lock_path, 'r', encoding='utf-8') as f:
510
+ return yaml.safe_load(f)
511
+ except Exception as e:
512
+ logger.warning(f"Failed to load config-lock.yaml: {e}")
513
+ return None
514
+ return None
515
+
516
+ def should_upload_step(self, algo="md5"):
517
+ """
518
+ Check if the pipeline step should be uploaded based on hash comparison.
519
+
520
+ :param algo: Hash algorithm to use
521
+ :return: True if step should be uploaded, False otherwise
522
+ """
523
+ config_lock = self.load_config_lock()
524
+
525
+ # If no config-lock.yaml exists, upload the step (first time upload)
526
+ if config_lock is None:
527
+ logger.info("No config-lock.yaml found, will upload pipeline step")
528
+ return True
529
+
530
+ # Compare stored hash with freshly computed one
531
+ current_hash = hash_directory(self.folder, algo=algo, exclude_files=self.hash_exclusions)
532
+ stored_hash_info = config_lock.get("hash", {})
533
+ stored_hash = stored_hash_info.get("value", "")
534
+ stored_algo = stored_hash_info.get("algo", "md5")
535
+
536
+ # If algorithm changed, re-upload to update hash
537
+ if stored_algo != algo:
538
+ logger.info(
539
+ f"Hash algorithm changed from {stored_algo} to {algo}, will upload pipeline step"
540
+ )
541
+ return True
542
+
543
+ # If hash changed, upload
544
+ if current_hash != stored_hash:
545
+ logger.info(
546
+ f"Hash changed (was: {stored_hash}, now: {current_hash}), will upload pipeline step"
547
+ )
548
+ return True
549
+
550
+ logger.info(f"Hash unchanged ({current_hash}), skipping pipeline step upload")
551
+ return False
552
+
553
+ def generate_config_lock(self, version_id, algo="md5"):
554
+ """
555
+ Generate config-lock.yaml content for the pipeline step.
556
+
557
+ :param version_id: Pipeline step version ID
558
+ :param algo: Hash algorithm used
559
+ :return: Dictionary with config-lock data
560
+ """
561
+ # Compute hash
562
+ hash_value = hash_directory(self.folder, algo=algo, exclude_files=self.hash_exclusions)
563
+
564
+ # Create config-lock structure
565
+ config_lock = {"id": version_id, "hash": {"algo": algo, "value": hash_value}}
566
+
567
+ # Append the original config.yaml contents
568
+ config_lock.update(self.config)
569
+
570
+ return config_lock
571
+
572
+ def save_config_lock(self, version_id, algo="md5"):
573
+ """
574
+ Save config-lock.yaml file with pipeline step metadata.
575
+
576
+ :param version_id: Pipeline step version ID
577
+ :param algo: Hash algorithm used
578
+ """
579
+ config_lock_data = self.generate_config_lock(version_id, algo)
580
+ config_lock_path = os.path.join(self.folder, "config-lock.yaml")
581
+
582
+ try:
583
+ with open(config_lock_path, 'w', encoding='utf-8') as f:
584
+ yaml.dump(config_lock_data, f, default_flow_style=False, allow_unicode=True)
585
+ logger.info(f"Generated config-lock.yaml at {config_lock_path}")
586
+ except Exception as e:
587
+ logger.error(f"Failed to save config-lock.yaml: {e}")
588
+
493
589
 
494
590
  def upload_pipeline_step(folder, skip_dockerfile=False):
495
591
  """
@@ -116,13 +116,33 @@ class PipelineBuilder:
116
116
  return True
117
117
 
118
118
  def _upload_pipeline_step_with_version_capture(self, step_path: str) -> tuple[bool, str]:
119
- """Upload a pipeline step and capture its version ID."""
119
+ """
120
+ Upload a pipeline step and capture its version ID.
121
+ Implements hash-based optimization to only upload modified steps.
122
+ """
120
123
  try:
121
124
  # Use the existing pipeline step builder
122
125
  from clarifai.runners.pipeline_steps.pipeline_step_builder import PipelineStepBuilder
123
126
 
124
127
  builder = PipelineStepBuilder(step_path)
125
128
 
129
+ # Check if we should upload based on hash comparison
130
+ should_upload = builder.should_upload_step()
131
+
132
+ if not should_upload:
133
+ # Load existing version ID from config-lock.yaml
134
+ config_lock = builder.load_config_lock()
135
+ if config_lock and config_lock.get("id"):
136
+ version_id = config_lock["id"]
137
+ logger.info(
138
+ f"Using existing pipeline step version {version_id} (no changes detected)"
139
+ )
140
+ return True, version_id
141
+ else:
142
+ logger.warning(
143
+ "Hash indicates no upload needed, but no version ID found in config-lock.yaml. Proceeding with upload."
144
+ )
145
+
126
146
  # Create dockerfile if needed
127
147
  builder.create_dockerfile()
128
148
 
@@ -139,6 +159,11 @@ class PipelineBuilder:
139
159
  success = builder.upload_pipeline_step_version()
140
160
 
141
161
  if success and builder.pipeline_step_version_id:
162
+ # Generate config-lock.yaml with the new version ID
163
+ builder.save_config_lock(builder.pipeline_step_version_id)
164
+ logger.info(
165
+ f"Generated config-lock.yaml for pipeline step with version {builder.pipeline_step_version_id}"
166
+ )
142
167
  return True, builder.pipeline_step_version_id
143
168
  else:
144
169
  logger.error("Failed to get pipeline step version ID after upload")
@@ -148,42 +173,105 @@ class PipelineBuilder:
148
173
  logger.error(f"Error uploading pipeline step: {e}")
149
174
  return False, ""
150
175
 
151
- def update_config_with_versions(self) -> None:
152
- """Update the config.yaml with uploaded pipeline step versions."""
176
+ def prepare_lockfile_with_step_versions(self) -> Dict[str, Any]:
177
+ """Prepare lockfile data with step versions after pipeline step upload."""
153
178
  if not self.uploaded_step_versions:
154
- logger.info("No pipeline step versions to update in config")
155
- return
156
-
157
- logger.info("Updating config.yaml with pipeline step versions...")
179
+ logger.info("No pipeline step versions for lockfile")
158
180
 
159
- # Update the orchestration spec
181
+ # Create a copy of the orchestration spec to modify
160
182
  pipeline_config = self.config["pipeline"]
161
- orchestration_spec = pipeline_config["orchestration_spec"]
183
+ orchestration_spec = pipeline_config["orchestration_spec"].copy()
162
184
  argo_spec_str = orchestration_spec["argo_orchestration_spec"]
163
185
  argo_spec = yaml.safe_load(argo_spec_str)
164
186
 
165
187
  # Update templateRef names to include versions
166
188
  self._update_template_refs_with_versions(argo_spec)
167
189
 
168
- # Update the config
169
- orchestration_spec["argo_orchestration_spec"] = yaml.dump(
170
- argo_spec, Dumper=LiteralBlockDumper, default_flow_style=False
171
- )
190
+ # Create the partial lockfile data structure (without pipeline info)
191
+ lockfile_data = {
192
+ "pipeline": {
193
+ "id": self.pipeline_id,
194
+ "user_id": self.user_id,
195
+ "app_id": self.app_id,
196
+ "version_id": None, # Will be filled in later
197
+ "orchestration_spec": {
198
+ "argo_orchestration_spec": yaml.dump(
199
+ argo_spec, Dumper=LiteralBlockDumper, default_flow_style=False
200
+ )
201
+ },
202
+ }
203
+ }
204
+
205
+ return lockfile_data
206
+
207
+ def update_lockfile_with_pipeline_info(
208
+ self, lockfile_data: Dict[str, Any], pipeline_version_id: str
209
+ ) -> Dict[str, Any]:
210
+ """Update the prepared lockfile data with pipeline version information."""
211
+ lockfile_data["pipeline"]["version_id"] = pipeline_version_id
212
+ return lockfile_data
213
+
214
+ def generate_lockfile_data(
215
+ self, pipeline_id: str = None, pipeline_version_id: str = None
216
+ ) -> Dict[str, Any]:
217
+ """Generate the complete lockfile data structure without modifying config.yaml.
218
+
219
+ This method is kept for backward compatibility. The recommended approach is to use
220
+ prepare_lockfile_with_step_versions() followed by update_lockfile_with_pipeline_info().
221
+ """
222
+ if not self.uploaded_step_versions:
223
+ logger.info("No pipeline step versions for lockfile")
172
224
 
173
- # Remove uploaded directories from step_directories
174
- remaining_dirs = []
175
- for step_dir in pipeline_config.get("step_directories", []):
176
- if step_dir not in self.uploaded_step_versions:
177
- remaining_dirs.append(step_dir)
225
+ # Create a copy of the orchestration spec to modify
226
+ pipeline_config = self.config["pipeline"]
227
+ orchestration_spec = pipeline_config["orchestration_spec"].copy()
228
+ argo_spec_str = orchestration_spec["argo_orchestration_spec"]
229
+ argo_spec = yaml.safe_load(argo_spec_str)
178
230
 
179
- pipeline_config["step_directories"] = remaining_dirs
231
+ # Update templateRef names to include versions
232
+ self._update_template_refs_with_versions(argo_spec)
233
+
234
+ # Create the lockfile data structure
235
+ lockfile_data = {
236
+ "pipeline": {
237
+ "id": pipeline_id or self.pipeline_id,
238
+ "user_id": self.user_id,
239
+ "app_id": self.app_id,
240
+ "version_id": pipeline_version_id,
241
+ "orchestration_spec": {
242
+ "argo_orchestration_spec": yaml.dump(
243
+ argo_spec, Dumper=LiteralBlockDumper, default_flow_style=False
244
+ )
245
+ },
246
+ }
247
+ }
248
+
249
+ return lockfile_data
250
+
251
+ def save_lockfile(self, lockfile_data: Dict[str, Any], lockfile_path: str = None) -> None:
252
+ """Save lockfile data to config-lock.yaml."""
253
+ if lockfile_path is None:
254
+ lockfile_path = os.path.join(self.config_dir, "config-lock.yaml")
180
255
 
181
- # Save the updated config
182
- self._save_config()
183
- logger.info("Updated config.yaml with pipeline step versions")
256
+ try:
257
+ with open(lockfile_path, 'w', encoding="utf-8") as file:
258
+ yaml.dump(
259
+ lockfile_data,
260
+ file,
261
+ Dumper=LiteralBlockDumper,
262
+ default_flow_style=False,
263
+ sort_keys=False,
264
+ )
265
+ logger.info(f"Generated lockfile: {lockfile_path}")
266
+ except Exception as e:
267
+ raise ValueError(f"Error saving lockfile {lockfile_path}: {e}")
184
268
 
185
269
  def _update_template_refs_with_versions(self, argo_spec: Dict[str, Any]) -> None:
186
- """Update templateRef names in Argo spec to include version information."""
270
+ """
271
+ Update templateRef names in Argo spec to include version information.
272
+ The step versions should be resolved from the corresponding config-lock.yaml
273
+ file of each pipeline-step, located in the step_directories.
274
+ """
187
275
  for template in argo_spec["spec"]["templates"]:
188
276
  if "steps" in template:
189
277
  for step_group in template["steps"]:
@@ -199,12 +287,19 @@ class PipelineBuilder:
199
287
  step_name = parts[-1]
200
288
  # The step name should match the directory name or be derivable from it
201
289
  version_id = self.uploaded_step_versions.get(step_name, None)
290
+
291
+ # If not found in uploaded_step_versions, try to get from config-lock.yaml
292
+ if version_id is None:
293
+ version_id = self._get_version_from_config_lock(step_name)
294
+
202
295
  if version_id is not None:
203
296
  # Update the templateRef to include version
204
297
  new_name = f"{name}/versions/{version_id}"
205
298
  template_ref["name"] = new_name
206
299
  template_ref["template"] = new_name
207
300
  logger.info(f"Updated templateRef from {name} to {new_name}")
301
+ else:
302
+ logger.warning(f"Could not find version for step: {step_name}")
208
303
  elif self.validator.TEMPLATE_REF_WITH_VERSION_PATTERN.match(name):
209
304
  # strip the /versions/{version_id} from the end of name
210
305
  # to get the name like above
@@ -215,6 +310,11 @@ class PipelineBuilder:
215
310
  # if it already has a version, make sure it matches the uploaded
216
311
  # version
217
312
  version_id = self.uploaded_step_versions.get(step_name, None)
313
+
314
+ # If not found in uploaded_step_versions, try to get from config-lock.yaml
315
+ if version_id is None:
316
+ version_id = self._get_version_from_config_lock(step_name)
317
+
218
318
  if version_id is not None:
219
319
  # Update the templateRef to include version
220
320
  new_name = f"{name}/versions/{version_id}"
@@ -223,9 +323,51 @@ class PipelineBuilder:
223
323
  logger.info(
224
324
  f"Updated templateRef from {orig_name} to {new_name}"
225
325
  )
326
+ else:
327
+ logger.warning(f"Could not find version for step: {step_name}")
328
+
329
+ def _get_version_from_config_lock(self, step_name: str) -> str:
330
+ """
331
+ Get version ID from config-lock.yaml file in the corresponding step directory.
226
332
 
227
- def create_pipeline(self) -> bool:
228
- """Create the pipeline using PostPipelines RPC."""
333
+ :param step_name: Name of the pipeline step
334
+ :return: Version ID if found, None otherwise
335
+ """
336
+ pipeline_config = self.config["pipeline"]
337
+ step_directories = pipeline_config.get("step_directories", [])
338
+
339
+ for step_dir in step_directories:
340
+ # Check if step_dir matches step_name (handle both exact match and derivable cases)
341
+ if (
342
+ step_dir == step_name
343
+ or step_dir.endswith(f"/{step_name}")
344
+ or step_name in step_dir
345
+ ):
346
+ config_lock_path = os.path.join(self.config_dir, step_dir, "config-lock.yaml")
347
+
348
+ if os.path.exists(config_lock_path):
349
+ try:
350
+ with open(config_lock_path, 'r', encoding='utf-8') as f:
351
+ config_lock = yaml.safe_load(f)
352
+ version_id = config_lock.get("id")
353
+ if version_id:
354
+ logger.info(
355
+ f"Found version {version_id} for step {step_name} in {config_lock_path}"
356
+ )
357
+ return version_id
358
+ except Exception as e:
359
+ logger.warning(
360
+ f"Failed to read config-lock.yaml at {config_lock_path}: {e}"
361
+ )
362
+
363
+ return None
364
+
365
+ def create_pipeline(self) -> tuple[bool, str]:
366
+ """Create the pipeline using PostPipelines RPC.
367
+
368
+ Returns:
369
+ tuple[bool, str]: (success, pipeline_version_id)
370
+ """
229
371
  logger.info(f"Creating pipeline {self.pipeline_id}...")
230
372
 
231
373
  try:
@@ -243,6 +385,11 @@ class PipelineBuilder:
243
385
  argo_spec = yaml.safe_load(argo_spec_str)
244
386
  api_version = argo_spec.get("apiVersion", "argoproj.io/v1alpha1")
245
387
 
388
+ # Ensure that pipeline_config.argo_orchestration_spec_proto has the updated spec.templates.steps.templateRef values
389
+ # For each step, if the templateRef is missing a version, append the correct version at the end
390
+ # The step versions should be resolved from the corresponding config-lock.yaml file of each pipeline-step, located in the step_directories
391
+ self._update_template_refs_with_versions(argo_spec)
392
+
246
393
  # Create pipeline version with orchestration spec
247
394
  pipeline_version = resources_pb2.PipelineVersion()
248
395
  # Create orchestration spec proto
@@ -269,29 +416,32 @@ class PipelineBuilder:
269
416
  if response.status.code == status_code_pb2.SUCCESS:
270
417
  logger.info(f"Successfully created pipeline {self.pipeline_id}")
271
418
 
419
+ pipeline_version_id = ""
272
420
  # Log pipeline and version IDs if available in response
273
421
  if response.pipelines:
274
422
  created_pipeline = response.pipelines[0]
275
423
  logger.info(f"Pipeline ID: {created_pipeline.id}")
276
424
  if created_pipeline.pipeline_version and created_pipeline.pipeline_version.id:
277
- logger.info(f"Pipeline version ID: {created_pipeline.pipeline_version.id}")
425
+ pipeline_version_id = created_pipeline.pipeline_version.id
426
+ logger.info(f"Pipeline version ID: {pipeline_version_id}")
278
427
 
279
- return True
428
+ return True, pipeline_version_id
280
429
  else:
281
430
  logger.error(f"Failed to create pipeline: {response.status.description}")
282
431
  logger.error(f"Details: {response.status.details}")
283
- return False
432
+ return False, ""
284
433
 
285
434
  except Exception as e:
286
435
  logger.error(f"Error creating pipeline: {e}")
287
- return False
436
+ return False, ""
288
437
 
289
438
 
290
- def upload_pipeline(path: str):
439
+ def upload_pipeline(path: str, no_lockfile: bool = False):
291
440
  """
292
441
  Upload a pipeline with associated pipeline steps to Clarifai.
293
442
 
294
443
  :param path: Path to the pipeline configuration file or directory containing config.yaml
444
+ :param no_lockfile: If True, skip creating config-lock.yaml
295
445
  """
296
446
  try:
297
447
  # Determine if path is a directory or file
@@ -311,15 +461,27 @@ def upload_pipeline(path: str):
311
461
  logger.error("Failed to upload pipeline steps")
312
462
  sys.exit(1)
313
463
 
314
- # Step 2: Update config with version information
315
- builder.update_config_with_versions()
464
+ # Step 2: Generate lockfile (unless --no-lockfile is specified)
465
+ # This will be used to update the versions of pipeline-steps that just got uploaded in Step 1
466
+ lockfile_data = None
467
+ if not no_lockfile:
468
+ lockfile_data = builder.prepare_lockfile_with_step_versions()
316
469
 
317
470
  # Step 3: Create the pipeline
318
- if not builder.create_pipeline():
471
+ success, pipeline_version_id = builder.create_pipeline()
472
+ if not success:
319
473
  logger.error("Failed to create pipeline")
320
474
  sys.exit(1)
321
475
 
322
- logger.info("Pipeline upload completed successfully!")
476
+ # Step 4: Update lockfile (unless --no-lockfile is specified)
477
+ if not no_lockfile and lockfile_data:
478
+ lockfile_data = builder.update_lockfile_with_pipeline_info(
479
+ lockfile_data, pipeline_version_id
480
+ )
481
+ builder.save_lockfile(lockfile_data)
482
+ logger.info("Pipeline upload completed successfully with lockfile!")
483
+ else:
484
+ logger.info("Pipeline upload completed successfully (lockfile skipped)!")
323
485
 
324
486
  except Exception as e:
325
487
  logger.error(f"Pipeline upload failed: {e}")
clarifai/utils/cli.py CHANGED
@@ -229,7 +229,7 @@ def validate_context_auth(pat: str, user_id: str, api_base: str = None):
229
229
 
230
230
 
231
231
  def customize_ollama_model(
232
- model_path, model_name=None, port=None, context_length=None, verbose=False
232
+ model_path, user_id, model_name=None, port=None, context_length=None, verbose=False
233
233
  ):
234
234
  """Customize the Ollama model name in the cloned template files.
235
235
  Args:
@@ -240,6 +240,24 @@ def customize_ollama_model(
240
240
  verbose: Whether to enable verbose logging - optional (defaults to False)
241
241
 
242
242
  """
243
+ config_path = os.path.join(model_path, 'config.yaml')
244
+ if os.path.exists(config_path):
245
+ with open(config_path, 'r') as f:
246
+ config = yaml.safe_load(f)
247
+
248
+ # Update the user_id in the model section
249
+ config['model']['user_id'] = user_id
250
+ if 'toolkit' not in config or config['toolkit'] is None:
251
+ config['toolkit'] = {}
252
+ if model_name is not None:
253
+ config['toolkit']['model'] = model_name
254
+ if port is not None:
255
+ config['toolkit']['port'] = port
256
+ if context_length is not None:
257
+ config['toolkit']['context_length'] = context_length
258
+ with open(config_path, 'w') as f:
259
+ yaml.dump(config, f, default_flow_style=False, sort_keys=False)
260
+
243
261
  model_py_path = os.path.join(model_path, "1", "model.py")
244
262
 
245
263
  if not os.path.exists(model_py_path):
@@ -405,16 +423,20 @@ def convert_timestamp_to_string(timestamp: Timestamp) -> str:
405
423
  return datetime_obj.strftime('%Y-%m-%dT%H:%M:%SZ')
406
424
 
407
425
 
408
- def customize_huggingface_model(model_path, model_name):
426
+ def customize_huggingface_model(model_path, user_id, model_name):
409
427
  config_path = os.path.join(model_path, 'config.yaml')
410
428
  if os.path.exists(config_path):
411
429
  with open(config_path, 'r') as f:
412
430
  config = yaml.safe_load(f)
413
431
 
414
- # Update the repo_id in checkpoints section
415
- if 'checkpoints' not in config:
416
- config['checkpoints'] = {}
417
- config['checkpoints']['repo_id'] = model_name
432
+ # Update the user_id in the model section
433
+ config['model']['user_id'] = user_id
434
+
435
+ if model_name:
436
+ # Update the repo_id in checkpoints section
437
+ if 'checkpoints' not in config:
438
+ config['checkpoints'] = {}
439
+ config['checkpoints']['repo_id'] = model_name
418
440
 
419
441
  with open(config_path, 'w') as f:
420
442
  yaml.dump(config, f, default_flow_style=False, sort_keys=False)
@@ -424,7 +446,7 @@ def customize_huggingface_model(model_path, model_name):
424
446
  logger.warning(f"config.yaml not found at {config_path}, skipping model configuration")
425
447
 
426
448
 
427
- def customize_lmstudio_model(model_path, model_name, port, context_length):
449
+ def customize_lmstudio_model(model_path, user_id, model_name, port, context_length):
428
450
  """Customize the LM Studio model name in the cloned template files.
429
451
  Args:
430
452
  model_path: Path to the cloned model directory
@@ -438,6 +460,8 @@ def customize_lmstudio_model(model_path, model_name, port, context_length):
438
460
  if os.path.exists(config_path):
439
461
  with open(config_path, 'r') as f:
440
462
  config = yaml.safe_load(f)
463
+ # Update the user_id in the model section
464
+ config['model']['user_id'] = user_id
441
465
  if 'toolkit' not in config or config['toolkit'] is None:
442
466
  config['toolkit'] = {}
443
467
  if model_name is not None:
@@ -66,6 +66,7 @@ DEFAULT_OLLAMA_MODEL_REPO_BRANCH = "ollama"
66
66
  DEFAULT_HF_MODEL_REPO_BRANCH = "huggingface"
67
67
  DEFAULT_LMSTUDIO_MODEL_REPO_BRANCH = "lmstudio"
68
68
  DEFAULT_VLLM_MODEL_REPO_BRANCH = "vllm"
69
+ DEFAULT_PYTHON_MODEL_REPO_BRANCH = "python"
69
70
 
70
71
  STATUS_OK = "200 OK"
71
72
  STATUS_MIXED = "207 MIXED"
@@ -0,0 +1,117 @@
1
+ """
2
+ Hashing utilities for Clarifai Python SDK.
3
+
4
+ This module provides functions for computing stable hashes of directories and files,
5
+ commonly used for change detection in pipeline steps and other components.
6
+ """
7
+
8
+ import hashlib
9
+ import os
10
+ from typing import List, Optional
11
+
12
+
13
+ def hash_directory(
14
+ directory: str, algo: str = "md5", exclude_files: Optional[List[str]] = None
15
+ ) -> str:
16
+ """
17
+ Compute a stable hash of all files in a directory.
18
+
19
+ This function computes a hash that accounts for:
20
+ - File relative paths (to detect renames)
21
+ - File sizes (to detect empty files)
22
+ - File contents (read in chunks for large files)
23
+
24
+ :param directory: Directory to hash
25
+ :param algo: Hash algorithm ('md5', 'sha1', 'sha256', etc.)
26
+ :param exclude_files: List of file names to exclude from hash calculation.
27
+ If None, defaults to ['config-lock.yaml'] for backward compatibility.
28
+ :return: Hash as lowercase hex digest string
29
+ """
30
+ if exclude_files is None:
31
+ exclude_files = ['config-lock.yaml']
32
+
33
+ # Ensure directory exists
34
+ if not os.path.exists(directory):
35
+ raise ValueError(f"Directory does not exist: {directory}")
36
+
37
+ if not os.path.isdir(directory):
38
+ raise ValueError(f"Path is not a directory: {directory}")
39
+
40
+ hash_func = hashlib.new(algo)
41
+
42
+ for root, _, files in os.walk(directory):
43
+ for name in sorted(files):
44
+ # Skip files in the exclusion list
45
+ if name in exclude_files:
46
+ continue
47
+
48
+ filepath = os.path.join(root, name)
49
+ relative_path = os.path.relpath(filepath, directory)
50
+
51
+ # Hash the relative path to detect renames
52
+ hash_func.update(relative_path.encode("utf-8"))
53
+
54
+ # Hash the file size to detect empties
55
+ file_size = os.path.getsize(filepath)
56
+ hash_func.update(str(file_size).encode("utf-8"))
57
+
58
+ # Hash the file contents (read in chunks for large files)
59
+ try:
60
+ with open(filepath, "rb") as f:
61
+ for chunk in iter(lambda: f.read(8192), b""):
62
+ hash_func.update(chunk)
63
+ except (IOError, OSError) as e:
64
+ # If we can't read the file, include the error in the hash
65
+ # This ensures the hash changes if file permissions change
66
+ hash_func.update(f"ERROR_READING_FILE: {e}".encode("utf-8"))
67
+
68
+ return hash_func.hexdigest()
69
+
70
+
71
+ def hash_file(filepath: str, algo: str = "md5") -> str:
72
+ """
73
+ Compute a hash of a single file.
74
+
75
+ :param filepath: Path to the file to hash
76
+ :param algo: Hash algorithm ('md5', 'sha1', 'sha256', etc.)
77
+ :return: Hash as lowercase hex digest string
78
+ """
79
+ if not os.path.exists(filepath):
80
+ raise ValueError(f"File does not exist: {filepath}")
81
+
82
+ if not os.path.isfile(filepath):
83
+ raise ValueError(f"Path is not a file: {filepath}")
84
+
85
+ hash_func = hashlib.new(algo)
86
+
87
+ try:
88
+ with open(filepath, "rb") as f:
89
+ for chunk in iter(lambda: f.read(8192), b""):
90
+ hash_func.update(chunk)
91
+ except (IOError, OSError) as e:
92
+ raise ValueError(f"Error reading file {filepath}: {e}")
93
+
94
+ return hash_func.hexdigest()
95
+
96
+
97
+ def verify_hash_algorithm(algo: str) -> bool:
98
+ """
99
+ Verify that a hash algorithm is supported.
100
+
101
+ :param algo: Hash algorithm name
102
+ :return: True if algorithm is supported, False otherwise
103
+ """
104
+ try:
105
+ hashlib.new(algo)
106
+ return True
107
+ except ValueError:
108
+ return False
109
+
110
+
111
+ def get_available_algorithms() -> List[str]:
112
+ """
113
+ Get list of available hash algorithms.
114
+
115
+ :return: List of available algorithm names
116
+ """
117
+ return list(hashlib.algorithms_available)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: clarifai
3
- Version: 11.8.2
3
+ Version: 11.8.4
4
4
  Home-page: https://github.com/Clarifai/clarifai-python
5
5
  Author: Clarifai
6
6
  Author-email: support@clarifai.com
@@ -1,4 +1,4 @@
1
- clarifai/__init__.py,sha256=wlKYds59E079Cej1oyfbQMsxCdbNX7JGHZgtuBpBG3A,23
1
+ clarifai/__init__.py,sha256=l80v8IYM70DFAKN9bwXRrDCRPmv0ykn84eJo3--0MWA,23
2
2
  clarifai/cli.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  clarifai/errors.py,sha256=GXa6D4v_L404J83jnRNFPH7s-1V9lk7w6Ws99f1g-AY,2772
4
4
  clarifai/versions.py,sha256=ecSuEB_nOL2XSoYHDw2n23XUbm_KPOGjudMXmQrGdS8,224
@@ -8,12 +8,12 @@ clarifai/cli/__main__.py,sha256=7nPbLW7Jr2shkgMPvnxpn4xYGMvIcnqluJ69t9w4H_k,74
8
8
  clarifai/cli/base.py,sha256=FQEEmi3a9_LBOmM_-X4EYdpAmDK1UljTxrHOIIsOZbM,10696
9
9
  clarifai/cli/compute_cluster.py,sha256=8Xss0Obrp6l1XuxJe0luOqU_pf8vXGDRi6jyIe8qR6k,2282
10
10
  clarifai/cli/deployment.py,sha256=9C4I6_kyMxRkWl6h681wc79-3mAtDHtTUaxRv05OZMs,4262
11
- clarifai/cli/model.py,sha256=2Eeoq4Kq_kJqSTmccn4B9YZ962Wj3mz1wcPQwXEWd2M,49734
11
+ clarifai/cli/model.py,sha256=BN-hjSdpgju3kh6edi-0uBNVaroPxuSuMkgeevIQPPk,52722
12
12
  clarifai/cli/nodepool.py,sha256=H6OIdUW_EiyDUwZogzEDoYmVwEjLMsgoDlPyE7gjIuU,4245
13
- clarifai/cli/pipeline.py,sha256=MmyPaVX1XsIZlrYTrT7Ctd71ao_d3ZWkuF_fv_NJS8s,13944
13
+ clarifai/cli/pipeline.py,sha256=bH7pnJGfjQIu_Y-f_zwKBFdPDTsBEEDtA2Oz9P6-Zj0,17129
14
14
  clarifai/cli/pipeline_step.py,sha256=dvoC2vAsDcxOCy88VV0X42PG22_7JSu9sfBVsk-Cix4,6133
15
15
  clarifai/cli/templates/__init__.py,sha256=HbMlZuYOMyVJde73ijNAevmSRUpIttGlHdwyO4W-JOs,44
16
- clarifai/cli/templates/model_templates.py,sha256=-xGUzadN7S-mNZ-kE4Z-kv51BlnoGHjue05Yg5OGnt0,9791
16
+ clarifai/cli/templates/model_templates.py,sha256=OU3qgYXSITo5qp0mkRiisXOMNhj6wNKWlZUFnAFZfGE,10090
17
17
  clarifai/cli/templates/pipeline_step_templates.py,sha256=w1IJghF_4wWyEmHR1925N0hpGKocy3G7ezhxTH-0XCc,1716
18
18
  clarifai/cli/templates/pipeline_templates.py,sha256=iLVxkmd0usc7jervTZTFzLwRVVF_623RszGW-oIuPDw,4234
19
19
  clarifai/client/__init__.py,sha256=MWEG_jTGn6UWbGCznsZxObJ5h65k2igD1462qID2pgI,840
@@ -24,7 +24,7 @@ clarifai/client/dataset.py,sha256=sz5CycP3J7pG0iMREKI2JeXQuvRwlVrE4AHne8yxgtg,35
24
24
  clarifai/client/deployment.py,sha256=QBf0tzkKBEpzNgmOEmWUJMOlVWdFEFc70Y44o8y75Gs,2875
25
25
  clarifai/client/input.py,sha256=jpX47qwn7aUBBIEuSSLHF5jk70XaWEh0prD065c9b-E,51205
26
26
  clarifai/client/lister.py,sha256=1YEm2suNxPaJO4x9V5szgD_YX6N_00vgSO-7m0HagY8,2208
27
- clarifai/client/model.py,sha256=WJJL0fOuHe4X8aMT-gs_SnEFhhT0cVDqqHbA3VX2DE8,92520
27
+ clarifai/client/model.py,sha256=uOnR1GPOJUoNWDmd_Ja9XJq8oRPa1frTxBQ_ypQXc8Q,93518
28
28
  clarifai/client/model_client.py,sha256=8N8dRqb5zfFCNxq-jc-YSL19tgS8PpevnxY69G2YzCE,38280
29
29
  clarifai/client/module.py,sha256=pTcTmR48-VQRCEj3PJK_ZT5YTsYfZDbEjxwJ43rcLMM,4753
30
30
  clarifai/client/nodepool.py,sha256=QDJOMOYrZAG962u-MZWjXOZifjWK8hDgS2zoUn59dZU,16751
@@ -86,9 +86,9 @@ clarifai/runners/models/openai_class.py,sha256=MYnL7_U4_m5FX3CoVaTd6B9Qminh-q4pY
86
86
  clarifai/runners/models/visual_classifier_class.py,sha256=1ZoLfCT2crrgRbejjTMAIwpTRgQMiH9N9yflOVpFxSg,2721
87
87
  clarifai/runners/models/visual_detector_class.py,sha256=ky4oFAkGCKPpGPdgaOso-n6D3HcmnbKee_8hBsNiV8U,2883
88
88
  clarifai/runners/pipeline_steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
89
- clarifai/runners/pipeline_steps/pipeline_step_builder.py,sha256=jcbs3ntbeyUiAaPh00Pscni4uJopbqVNVJblUX1pYVc,21808
89
+ clarifai/runners/pipeline_steps/pipeline_step_builder.py,sha256=0AmleVdqvPMPl6Sxhd8pZDMja7GNzckbT8AuRJLtdho,25678
90
90
  clarifai/runners/pipelines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
91
- clarifai/runners/pipelines/pipeline_builder.py,sha256=0FBjb8l7mWlCwBsBLkHM3znNQB9HPLEOYrrE53ntjCE,13810
91
+ clarifai/runners/pipelines/pipeline_builder.py,sha256=bfIK7PPTspNhWjyKgPp13pkdGlHstKpsLi2Eet39Fyw,21494
92
92
  clarifai/runners/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
93
93
  clarifai/runners/utils/code_script.py,sha256=-6IgNruIMTYLKJG8EqVWSaZR7lFRBoQ4ufJtuCPUCqc,14799
94
94
  clarifai/runners/utils/const.py,sha256=MK7lTzzJKbOiyiUtG_jlJXfz_xNKMn5LjkQ9vjbttXE,1538
@@ -105,9 +105,10 @@ clarifai/runners/utils/data_types/data_types.py,sha256=UBHTNPshr94qUs2KqkYis0VlA
105
105
  clarifai/schema/search.py,sha256=o9-ct8ulLZByB3RCVwZWPgaDwdcW7cM5s-g8oyAz89s,2421
106
106
  clarifai/urls/helper.py,sha256=z6LnLGgLHxD8scFtyRdxqYIRJNhxqPkfLe53UtTLUBY,11727
107
107
  clarifai/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
108
- clarifai/utils/cli.py,sha256=ojPI6wBMwxpmCwWIE1nx4t_lzHqyJqby_TI6Fl3Vul4,15536
108
+ clarifai/utils/cli.py,sha256=3PhMdb84mjrwcxkRuhxX71UXBgh-Lcave7xURJf5inE,16548
109
109
  clarifai/utils/config.py,sha256=dENYtcWW7Il5MInvIaYe0MZn0hW1fbIb0Lzk8rQ_geQ,7671
110
- clarifai/utils/constants.py,sha256=bHvs8L_Eai49Qm0U9YcK7Srx9FlL5iyv_pXvgSt6XDc,2497
110
+ clarifai/utils/constants.py,sha256=2LGYqi55-YaHLfFNLZSrZNuChJ8rm7_voBysSFtoQF8,2541
111
+ clarifai/utils/hashing.py,sha256=z2hHt4sDvGyqNbnOay0F2i3K_PjyX-J24IEytszyYsA,3761
111
112
  clarifai/utils/logging.py,sha256=0we53uTqUvzrulC86whu-oeWNxn1JjJL0OQ98Bwf9vo,15198
112
113
  clarifai/utils/misc.py,sha256=ATj4RR6S06GeLE0X4tMU4bmTz4Sz4j2WemTddsnSfMI,23458
113
114
  clarifai/utils/model_train.py,sha256=0XSAoTkSsrwf4f-W9yw2mkXZtkal7LBLJSoi86CFCn4,9250
@@ -121,9 +122,9 @@ clarifai/workflows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuF
121
122
  clarifai/workflows/export.py,sha256=HvUYG9N_-UZoRR0-_tdGbZ950_AeBqawSppgUxQebR0,1913
122
123
  clarifai/workflows/utils.py,sha256=ESL3INcouNcLKCh-nMpfXX-YbtCzX7tz7hT57_RGQ3M,2079
123
124
  clarifai/workflows/validate.py,sha256=UhmukyHkfxiMFrPPeBdUTiCOHQT5-shqivlBYEyKTlU,2931
124
- clarifai-11.8.2.dist-info/licenses/LICENSE,sha256=mUqF_d12-qE2n41g7C5_sq-BMLOcj6CNN-jevr15YHU,555
125
- clarifai-11.8.2.dist-info/METADATA,sha256=tp1H7AGojDcqwCLK9irGnVbgnsCJK6EMrCd966DAlnw,23193
126
- clarifai-11.8.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
127
- clarifai-11.8.2.dist-info/entry_points.txt,sha256=X9FZ4Z-i_r2Ud1RpZ9sNIFYuu_-9fogzCMCRUD9hyX0,51
128
- clarifai-11.8.2.dist-info/top_level.txt,sha256=wUMdCQGjkxaynZ6nZ9FAnvBUCgp5RJUVFSy2j-KYo0s,9
129
- clarifai-11.8.2.dist-info/RECORD,,
125
+ clarifai-11.8.4.dist-info/licenses/LICENSE,sha256=mUqF_d12-qE2n41g7C5_sq-BMLOcj6CNN-jevr15YHU,555
126
+ clarifai-11.8.4.dist-info/METADATA,sha256=mNT5gLQe6QyrWy_kKhF-XCSAqR2F932MDYyTEy8aPQ4,23193
127
+ clarifai-11.8.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
128
+ clarifai-11.8.4.dist-info/entry_points.txt,sha256=X9FZ4Z-i_r2Ud1RpZ9sNIFYuu_-9fogzCMCRUD9hyX0,51
129
+ clarifai-11.8.4.dist-info/top_level.txt,sha256=wUMdCQGjkxaynZ6nZ9FAnvBUCgp5RJUVFSy2j-KYo0s,9
130
+ clarifai-11.8.4.dist-info/RECORD,,