clarifai 11.8.2__py3-none-any.whl → 11.8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
clarifai/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "11.8.2"
1
+ __version__ = "11.8.3"
clarifai/cli/model.py CHANGED
@@ -231,6 +231,44 @@ def init(
231
231
  repo_url = format_github_repo_url(github_url)
232
232
  repo_url = f"https://github.com/{owner}/{repo}"
233
233
 
234
+ try:
235
+ # Create a temporary directory for cloning
236
+ with tempfile.TemporaryDirectory(prefix="clarifai_model_") as clone_dir:
237
+ # Clone the repository with explicit branch parameter
238
+ if not clone_github_repo(repo_url, clone_dir, github_pat, branch):
239
+ logger.error(f"Failed to clone repository from {repo_url}")
240
+ github_url = None # Fall back to template mode
241
+
242
+ else:
243
+ # Copy the entire repository content to target directory (excluding .git)
244
+ for item in os.listdir(clone_dir):
245
+ if item == '.git':
246
+ continue
247
+
248
+ source_path = os.path.join(clone_dir, item)
249
+ target_path = os.path.join(model_path, item)
250
+
251
+ if os.path.isdir(source_path):
252
+ shutil.copytree(source_path, target_path, dirs_exist_ok=True)
253
+ else:
254
+ shutil.copy2(source_path, target_path)
255
+
256
+ logger.info(f"Successfully cloned repository to {model_path}")
257
+ logger.info(
258
+ "Model initialization complete with GitHub repository clone"
259
+ )
260
+ logger.info("Next steps:")
261
+ logger.info("1. Review the model configuration")
262
+ logger.info("2. Install any required dependencies manually")
263
+ logger.info(
264
+ "3. Test the model locally using 'clarifai model local-test'"
265
+ )
266
+ return
267
+
268
+ except Exception as e:
269
+ logger.error(f"Failed to clone GitHub repository: {e}")
270
+ github_url = None # Fall back to template mode
271
+
234
272
  if toolkit:
235
273
  logger.info(f"Initializing model from GitHub repository: {github_url}")
236
274
 
@@ -240,31 +278,31 @@ def init(
240
278
  else:
241
279
  repo_url = format_github_repo_url(github_url)
242
280
 
243
- try:
244
- # Create a temporary directory for cloning
245
- with tempfile.TemporaryDirectory(prefix="clarifai_model_") as clone_dir:
246
- # Clone the repository with explicit branch parameter
247
- if not clone_github_repo(repo_url, clone_dir, github_pat, branch):
248
- logger.error(f"Failed to clone repository from {repo_url}")
249
- github_url = None # Fall back to template mode
250
-
251
- else:
252
- # Copy the entire repository content to target directory (excluding .git)
253
- for item in os.listdir(clone_dir):
254
- if item == '.git':
255
- continue
256
-
257
- source_path = os.path.join(clone_dir, item)
258
- target_path = os.path.join(model_path, item)
259
-
260
- if os.path.isdir(source_path):
261
- shutil.copytree(source_path, target_path, dirs_exist_ok=True)
262
- else:
263
- shutil.copy2(source_path, target_path)
281
+ try:
282
+ # Create a temporary directory for cloning
283
+ with tempfile.TemporaryDirectory(prefix="clarifai_model_") as clone_dir:
284
+ # Clone the repository with explicit branch parameter
285
+ if not clone_github_repo(repo_url, clone_dir, github_pat, branch):
286
+ logger.error(f"Failed to clone repository from {repo_url}")
287
+ github_url = None # Fall back to template mode
264
288
 
265
- except Exception as e:
266
- logger.error(f"Failed to clone GitHub repository: {e}")
267
- github_url = None
289
+ else:
290
+ # Copy the entire repository content to target directory (excluding .git)
291
+ for item in os.listdir(clone_dir):
292
+ if item == '.git':
293
+ continue
294
+
295
+ source_path = os.path.join(clone_dir, item)
296
+ target_path = os.path.join(model_path, item)
297
+
298
+ if os.path.isdir(source_path):
299
+ shutil.copytree(source_path, target_path, dirs_exist_ok=True)
300
+ else:
301
+ shutil.copy2(source_path, target_path)
302
+
303
+ except Exception as e:
304
+ logger.error(f"Failed to clone GitHub repository: {e}")
305
+ github_url = None
268
306
 
269
307
  if (model_name or port or context_length) and (toolkit == 'ollama'):
270
308
  customize_ollama_model(model_path, model_name, port, context_length)
clarifai/cli/pipeline.py CHANGED
@@ -26,14 +26,19 @@ def pipeline():
26
26
 
27
27
  @pipeline.command()
28
28
  @click.argument("path", type=click.Path(exists=True), required=False, default=".")
29
- def upload(path):
29
+ @click.option(
30
+ '--no-lockfile',
31
+ is_flag=True,
32
+ help='Skip creating config-lock.yaml file.',
33
+ )
34
+ def upload(path, no_lockfile):
30
35
  """Upload a pipeline with associated pipeline steps to Clarifai.
31
36
 
32
37
  PATH: Path to the pipeline configuration file or directory containing config.yaml. If not specified, the current directory is used by default.
33
38
  """
34
39
  from clarifai.runners.pipelines.pipeline_builder import upload_pipeline
35
40
 
36
- upload_pipeline(path)
41
+ upload_pipeline(path, no_lockfile=no_lockfile)
37
42
 
38
43
 
39
44
  @pipeline.command()
@@ -106,15 +111,32 @@ def run(
106
111
 
107
112
  validate_context(ctx)
108
113
 
114
+ # Try to load from config-lock.yaml first if no config is specified
115
+ lockfile_path = os.path.join(os.getcwd(), "config-lock.yaml")
116
+ if not config and os.path.exists(lockfile_path):
117
+ logger.info("Found config-lock.yaml, using it as default config source")
118
+ config = lockfile_path
119
+
109
120
  if config:
110
121
  config_data = from_yaml(config)
111
- pipeline_id = config_data.get('pipeline_id', pipeline_id)
112
- pipeline_version_id = config_data.get('pipeline_version_id', pipeline_version_id)
122
+
123
+ # Handle both regular config format and lockfile format
124
+ if 'pipeline' in config_data and isinstance(config_data['pipeline'], dict):
125
+ pipeline_config = config_data['pipeline']
126
+ pipeline_id = pipeline_config.get('id', pipeline_id)
127
+ pipeline_version_id = pipeline_config.get('version_id', pipeline_version_id)
128
+ user_id = pipeline_config.get('user_id', user_id)
129
+ app_id = pipeline_config.get('app_id', app_id)
130
+ else:
131
+ # Fallback to flat config structure
132
+ pipeline_id = config_data.get('pipeline_id', pipeline_id)
133
+ pipeline_version_id = config_data.get('pipeline_version_id', pipeline_version_id)
134
+ user_id = config_data.get('user_id', user_id)
135
+ app_id = config_data.get('app_id', app_id)
136
+
113
137
  pipeline_version_run_id = config_data.get(
114
138
  'pipeline_version_run_id', pipeline_version_run_id
115
139
  )
116
- user_id = config_data.get('user_id', user_id)
117
- app_id = config_data.get('app_id', app_id)
118
140
  nodepool_id = config_data.get('nodepool_id', nodepool_id)
119
141
  compute_cluster_id = config_data.get('compute_cluster_id', compute_cluster_id)
120
142
  pipeline_url = config_data.get('pipeline_url', pipeline_url)
@@ -319,6 +341,62 @@ def init(pipeline_path):
319
341
  logger.info("3. Run 'clarifai pipeline upload config.yaml' to upload your pipeline")
320
342
 
321
343
 
344
+ @pipeline.command()
345
+ @click.argument(
346
+ "lockfile_path", type=click.Path(exists=True), required=False, default="config-lock.yaml"
347
+ )
348
+ def validate_lock(lockfile_path):
349
+ """Validate a config-lock.yaml file for schema and reference consistency.
350
+
351
+ LOCKFILE_PATH: Path to the config-lock.yaml file. If not specified, looks for config-lock.yaml in current directory.
352
+ """
353
+ from clarifai.runners.utils.pipeline_validation import PipelineConfigValidator
354
+ from clarifai.utils.cli import from_yaml
355
+
356
+ try:
357
+ # Load the lockfile
358
+ lockfile_data = from_yaml(lockfile_path)
359
+
360
+ # Validate required fields
361
+ if "pipeline" not in lockfile_data:
362
+ raise ValueError("'pipeline' section not found in lockfile")
363
+
364
+ pipeline = lockfile_data["pipeline"]
365
+ required_fields = ["id", "user_id", "app_id", "version_id"]
366
+
367
+ for field in required_fields:
368
+ if field not in pipeline:
369
+ raise ValueError(f"Required field '{field}' not found in pipeline section")
370
+ if not pipeline[field]:
371
+ raise ValueError(f"Required field '{field}' cannot be empty")
372
+
373
+ # Validate orchestration spec if present
374
+ if "orchestration_spec" in pipeline:
375
+ # Create a temporary config structure for validation
376
+ temp_config = {
377
+ "pipeline": {
378
+ "id": pipeline["id"],
379
+ "user_id": pipeline["user_id"],
380
+ "app_id": pipeline["app_id"],
381
+ "orchestration_spec": pipeline["orchestration_spec"],
382
+ }
383
+ }
384
+
385
+ # Use existing validator to check orchestration spec
386
+ validator = PipelineConfigValidator()
387
+ validator._validate_orchestration_spec(temp_config)
388
+
389
+ logger.info(f"✅ Lockfile {lockfile_path} is valid")
390
+ logger.info(f"Pipeline: {pipeline['id']}")
391
+ logger.info(f"User: {pipeline['user_id']}")
392
+ logger.info(f"App: {pipeline['app_id']}")
393
+ logger.info(f"Version: {pipeline['version_id']}")
394
+
395
+ except Exception as e:
396
+ logger.error(f"❌ Lockfile validation failed: {e}")
397
+ raise click.Abort()
398
+
399
+
322
400
  @pipeline.command(['ls'])
323
401
  @click.option('--page_no', required=False, help='Page number to list.', default=1)
324
402
  @click.option('--per_page', required=False, help='Number of items per page.', default=16)
clarifai/client/model.py CHANGED
@@ -522,8 +522,9 @@ class Model(Lister, BaseClient):
522
522
  model=self.model_info,
523
523
  runner_selector=self._runner_selector,
524
524
  )
525
+ # Pass in None for async stub will create it.
525
526
  self._client = ModelClient(
526
- stub=self.STUB, async_stub=self.async_stub, request_template=request_template
527
+ stub=self.STUB, async_stub=None, request_template=request_template
527
528
  )
528
529
  return self._client
529
530
 
@@ -4,6 +4,7 @@ import sys
4
4
  import tarfile
5
5
  import time
6
6
  from string import Template
7
+ from typing import List, Optional
7
8
 
8
9
  import yaml
9
10
  from clarifai_grpc.grpc.api import resources_pb2, service_pb2
@@ -11,6 +12,7 @@ from clarifai_grpc.grpc.api.status import status_code_pb2
11
12
  from google.protobuf import json_format
12
13
 
13
14
  from clarifai.client.base import BaseClient
15
+ from clarifai.utils.hashing import hash_directory
14
16
  from clarifai.utils.logging import logger
15
17
  from clarifai.utils.misc import get_uuid
16
18
  from clarifai.versions import CLIENT_VERSION
@@ -22,12 +24,13 @@ UPLOAD_CHUNK_SIZE = 14 * 1024 * 1024
22
24
  class PipelineStepBuilder:
23
25
  """Pipeline Step Builder class for managing pipeline step upload to Clarifai."""
24
26
 
25
- def __init__(self, folder: str):
27
+ def __init__(self, folder: str, hash_exclusions: Optional[List[str]] = None):
26
28
  """
27
29
  Initialize PipelineStepBuilder.
28
30
 
29
31
  :param folder: The folder containing the pipeline step files (config.yaml, requirements.txt,
30
32
  dockerfile, and pipeline_step.py in 1/ subdirectory)
33
+ :param hash_exclusions: List of file names to exclude from hash calculation (defaults to ['config-lock.yaml'])
31
34
  """
32
35
  self._client = None
33
36
  self.folder = self._validate_folder(folder)
@@ -37,6 +40,10 @@ class PipelineStepBuilder:
37
40
  self.pipeline_step_id = self.pipeline_step_proto.id
38
41
  self.pipeline_step_version_id = None
39
42
  self.pipeline_step_compute_info = self._get_pipeline_step_compute_info()
43
+ # Configure files to exclude from hash calculation
44
+ self.hash_exclusions = (
45
+ hash_exclusions if hash_exclusions is not None else ['config-lock.yaml']
46
+ )
40
47
 
41
48
  @property
42
49
  def client(self):
@@ -490,6 +497,95 @@ COPY --link=true requirements.txt config.yaml /home/nonroot/main/
490
497
 
491
498
  raise TimeoutError("Pipeline step build did not finish in time")
492
499
 
500
+ def load_config_lock(self):
501
+ """
502
+ Load existing config-lock.yaml if it exists.
503
+
504
+ :return: Dictionary with config-lock data or None if file doesn't exist
505
+ """
506
+ config_lock_path = os.path.join(self.folder, "config-lock.yaml")
507
+ if os.path.exists(config_lock_path):
508
+ try:
509
+ with open(config_lock_path, 'r', encoding='utf-8') as f:
510
+ return yaml.safe_load(f)
511
+ except Exception as e:
512
+ logger.warning(f"Failed to load config-lock.yaml: {e}")
513
+ return None
514
+ return None
515
+
516
+ def should_upload_step(self, algo="md5"):
517
+ """
518
+ Check if the pipeline step should be uploaded based on hash comparison.
519
+
520
+ :param algo: Hash algorithm to use
521
+ :return: True if step should be uploaded, False otherwise
522
+ """
523
+ config_lock = self.load_config_lock()
524
+
525
+ # If no config-lock.yaml exists, upload the step (first time upload)
526
+ if config_lock is None:
527
+ logger.info("No config-lock.yaml found, will upload pipeline step")
528
+ return True
529
+
530
+ # Compare stored hash with freshly computed one
531
+ current_hash = hash_directory(self.folder, algo=algo, exclude_files=self.hash_exclusions)
532
+ stored_hash_info = config_lock.get("hash", {})
533
+ stored_hash = stored_hash_info.get("value", "")
534
+ stored_algo = stored_hash_info.get("algo", "md5")
535
+
536
+ # If algorithm changed, re-upload to update hash
537
+ if stored_algo != algo:
538
+ logger.info(
539
+ f"Hash algorithm changed from {stored_algo} to {algo}, will upload pipeline step"
540
+ )
541
+ return True
542
+
543
+ # If hash changed, upload
544
+ if current_hash != stored_hash:
545
+ logger.info(
546
+ f"Hash changed (was: {stored_hash}, now: {current_hash}), will upload pipeline step"
547
+ )
548
+ return True
549
+
550
+ logger.info(f"Hash unchanged ({current_hash}), skipping pipeline step upload")
551
+ return False
552
+
553
+ def generate_config_lock(self, version_id, algo="md5"):
554
+ """
555
+ Generate config-lock.yaml content for the pipeline step.
556
+
557
+ :param version_id: Pipeline step version ID
558
+ :param algo: Hash algorithm used
559
+ :return: Dictionary with config-lock data
560
+ """
561
+ # Compute hash
562
+ hash_value = hash_directory(self.folder, algo=algo, exclude_files=self.hash_exclusions)
563
+
564
+ # Create config-lock structure
565
+ config_lock = {"id": version_id, "hash": {"algo": algo, "value": hash_value}}
566
+
567
+ # Append the original config.yaml contents
568
+ config_lock.update(self.config)
569
+
570
+ return config_lock
571
+
572
+ def save_config_lock(self, version_id, algo="md5"):
573
+ """
574
+ Save config-lock.yaml file with pipeline step metadata.
575
+
576
+ :param version_id: Pipeline step version ID
577
+ :param algo: Hash algorithm used
578
+ """
579
+ config_lock_data = self.generate_config_lock(version_id, algo)
580
+ config_lock_path = os.path.join(self.folder, "config-lock.yaml")
581
+
582
+ try:
583
+ with open(config_lock_path, 'w', encoding='utf-8') as f:
584
+ yaml.dump(config_lock_data, f, default_flow_style=False, allow_unicode=True)
585
+ logger.info(f"Generated config-lock.yaml at {config_lock_path}")
586
+ except Exception as e:
587
+ logger.error(f"Failed to save config-lock.yaml: {e}")
588
+
493
589
 
494
590
  def upload_pipeline_step(folder, skip_dockerfile=False):
495
591
  """
@@ -116,13 +116,33 @@ class PipelineBuilder:
116
116
  return True
117
117
 
118
118
  def _upload_pipeline_step_with_version_capture(self, step_path: str) -> tuple[bool, str]:
119
- """Upload a pipeline step and capture its version ID."""
119
+ """
120
+ Upload a pipeline step and capture its version ID.
121
+ Implements hash-based optimization to only upload modified steps.
122
+ """
120
123
  try:
121
124
  # Use the existing pipeline step builder
122
125
  from clarifai.runners.pipeline_steps.pipeline_step_builder import PipelineStepBuilder
123
126
 
124
127
  builder = PipelineStepBuilder(step_path)
125
128
 
129
+ # Check if we should upload based on hash comparison
130
+ should_upload = builder.should_upload_step()
131
+
132
+ if not should_upload:
133
+ # Load existing version ID from config-lock.yaml
134
+ config_lock = builder.load_config_lock()
135
+ if config_lock and config_lock.get("id"):
136
+ version_id = config_lock["id"]
137
+ logger.info(
138
+ f"Using existing pipeline step version {version_id} (no changes detected)"
139
+ )
140
+ return True, version_id
141
+ else:
142
+ logger.warning(
143
+ "Hash indicates no upload needed, but no version ID found in config-lock.yaml. Proceeding with upload."
144
+ )
145
+
126
146
  # Create dockerfile if needed
127
147
  builder.create_dockerfile()
128
148
 
@@ -139,6 +159,11 @@ class PipelineBuilder:
139
159
  success = builder.upload_pipeline_step_version()
140
160
 
141
161
  if success and builder.pipeline_step_version_id:
162
+ # Generate config-lock.yaml with the new version ID
163
+ builder.save_config_lock(builder.pipeline_step_version_id)
164
+ logger.info(
165
+ f"Generated config-lock.yaml for pipeline step with version {builder.pipeline_step_version_id}"
166
+ )
142
167
  return True, builder.pipeline_step_version_id
143
168
  else:
144
169
  logger.error("Failed to get pipeline step version ID after upload")
@@ -148,42 +173,105 @@ class PipelineBuilder:
148
173
  logger.error(f"Error uploading pipeline step: {e}")
149
174
  return False, ""
150
175
 
151
- def update_config_with_versions(self) -> None:
152
- """Update the config.yaml with uploaded pipeline step versions."""
176
+ def prepare_lockfile_with_step_versions(self) -> Dict[str, Any]:
177
+ """Prepare lockfile data with step versions after pipeline step upload."""
153
178
  if not self.uploaded_step_versions:
154
- logger.info("No pipeline step versions to update in config")
155
- return
156
-
157
- logger.info("Updating config.yaml with pipeline step versions...")
179
+ logger.info("No pipeline step versions for lockfile")
158
180
 
159
- # Update the orchestration spec
181
+ # Create a copy of the orchestration spec to modify
160
182
  pipeline_config = self.config["pipeline"]
161
- orchestration_spec = pipeline_config["orchestration_spec"]
183
+ orchestration_spec = pipeline_config["orchestration_spec"].copy()
162
184
  argo_spec_str = orchestration_spec["argo_orchestration_spec"]
163
185
  argo_spec = yaml.safe_load(argo_spec_str)
164
186
 
165
187
  # Update templateRef names to include versions
166
188
  self._update_template_refs_with_versions(argo_spec)
167
189
 
168
- # Update the config
169
- orchestration_spec["argo_orchestration_spec"] = yaml.dump(
170
- argo_spec, Dumper=LiteralBlockDumper, default_flow_style=False
171
- )
190
+ # Create the partial lockfile data structure (without pipeline info)
191
+ lockfile_data = {
192
+ "pipeline": {
193
+ "id": self.pipeline_id,
194
+ "user_id": self.user_id,
195
+ "app_id": self.app_id,
196
+ "version_id": None, # Will be filled in later
197
+ "orchestration_spec": {
198
+ "argo_orchestration_spec": yaml.dump(
199
+ argo_spec, Dumper=LiteralBlockDumper, default_flow_style=False
200
+ )
201
+ },
202
+ }
203
+ }
204
+
205
+ return lockfile_data
206
+
207
+ def update_lockfile_with_pipeline_info(
208
+ self, lockfile_data: Dict[str, Any], pipeline_version_id: str
209
+ ) -> Dict[str, Any]:
210
+ """Update the prepared lockfile data with pipeline version information."""
211
+ lockfile_data["pipeline"]["version_id"] = pipeline_version_id
212
+ return lockfile_data
213
+
214
+ def generate_lockfile_data(
215
+ self, pipeline_id: str = None, pipeline_version_id: str = None
216
+ ) -> Dict[str, Any]:
217
+ """Generate the complete lockfile data structure without modifying config.yaml.
218
+
219
+ This method is kept for backward compatibility. The recommended approach is to use
220
+ prepare_lockfile_with_step_versions() followed by update_lockfile_with_pipeline_info().
221
+ """
222
+ if not self.uploaded_step_versions:
223
+ logger.info("No pipeline step versions for lockfile")
172
224
 
173
- # Remove uploaded directories from step_directories
174
- remaining_dirs = []
175
- for step_dir in pipeline_config.get("step_directories", []):
176
- if step_dir not in self.uploaded_step_versions:
177
- remaining_dirs.append(step_dir)
225
+ # Create a copy of the orchestration spec to modify
226
+ pipeline_config = self.config["pipeline"]
227
+ orchestration_spec = pipeline_config["orchestration_spec"].copy()
228
+ argo_spec_str = orchestration_spec["argo_orchestration_spec"]
229
+ argo_spec = yaml.safe_load(argo_spec_str)
178
230
 
179
- pipeline_config["step_directories"] = remaining_dirs
231
+ # Update templateRef names to include versions
232
+ self._update_template_refs_with_versions(argo_spec)
233
+
234
+ # Create the lockfile data structure
235
+ lockfile_data = {
236
+ "pipeline": {
237
+ "id": pipeline_id or self.pipeline_id,
238
+ "user_id": self.user_id,
239
+ "app_id": self.app_id,
240
+ "version_id": pipeline_version_id,
241
+ "orchestration_spec": {
242
+ "argo_orchestration_spec": yaml.dump(
243
+ argo_spec, Dumper=LiteralBlockDumper, default_flow_style=False
244
+ )
245
+ },
246
+ }
247
+ }
248
+
249
+ return lockfile_data
250
+
251
+ def save_lockfile(self, lockfile_data: Dict[str, Any], lockfile_path: str = None) -> None:
252
+ """Save lockfile data to config-lock.yaml."""
253
+ if lockfile_path is None:
254
+ lockfile_path = os.path.join(self.config_dir, "config-lock.yaml")
180
255
 
181
- # Save the updated config
182
- self._save_config()
183
- logger.info("Updated config.yaml with pipeline step versions")
256
+ try:
257
+ with open(lockfile_path, 'w', encoding="utf-8") as file:
258
+ yaml.dump(
259
+ lockfile_data,
260
+ file,
261
+ Dumper=LiteralBlockDumper,
262
+ default_flow_style=False,
263
+ sort_keys=False,
264
+ )
265
+ logger.info(f"Generated lockfile: {lockfile_path}")
266
+ except Exception as e:
267
+ raise ValueError(f"Error saving lockfile {lockfile_path}: {e}")
184
268
 
185
269
  def _update_template_refs_with_versions(self, argo_spec: Dict[str, Any]) -> None:
186
- """Update templateRef names in Argo spec to include version information."""
270
+ """
271
+ Update templateRef names in Argo spec to include version information.
272
+ The step versions should be resolved from the corresponding config-lock.yaml
273
+ file of each pipeline-step, located in the step_directories.
274
+ """
187
275
  for template in argo_spec["spec"]["templates"]:
188
276
  if "steps" in template:
189
277
  for step_group in template["steps"]:
@@ -199,12 +287,19 @@ class PipelineBuilder:
199
287
  step_name = parts[-1]
200
288
  # The step name should match the directory name or be derivable from it
201
289
  version_id = self.uploaded_step_versions.get(step_name, None)
290
+
291
+ # If not found in uploaded_step_versions, try to get from config-lock.yaml
292
+ if version_id is None:
293
+ version_id = self._get_version_from_config_lock(step_name)
294
+
202
295
  if version_id is not None:
203
296
  # Update the templateRef to include version
204
297
  new_name = f"{name}/versions/{version_id}"
205
298
  template_ref["name"] = new_name
206
299
  template_ref["template"] = new_name
207
300
  logger.info(f"Updated templateRef from {name} to {new_name}")
301
+ else:
302
+ logger.warning(f"Could not find version for step: {step_name}")
208
303
  elif self.validator.TEMPLATE_REF_WITH_VERSION_PATTERN.match(name):
209
304
  # strip the /versions/{version_id} from the end of name
210
305
  # to get the name like above
@@ -215,6 +310,11 @@ class PipelineBuilder:
215
310
  # if it already has a version, make sure it matches the uploaded
216
311
  # version
217
312
  version_id = self.uploaded_step_versions.get(step_name, None)
313
+
314
+ # If not found in uploaded_step_versions, try to get from config-lock.yaml
315
+ if version_id is None:
316
+ version_id = self._get_version_from_config_lock(step_name)
317
+
218
318
  if version_id is not None:
219
319
  # Update the templateRef to include version
220
320
  new_name = f"{name}/versions/{version_id}"
@@ -223,9 +323,51 @@ class PipelineBuilder:
223
323
  logger.info(
224
324
  f"Updated templateRef from {orig_name} to {new_name}"
225
325
  )
326
+ else:
327
+ logger.warning(f"Could not find version for step: {step_name}")
328
+
329
+ def _get_version_from_config_lock(self, step_name: str) -> str:
330
+ """
331
+ Get version ID from config-lock.yaml file in the corresponding step directory.
226
332
 
227
- def create_pipeline(self) -> bool:
228
- """Create the pipeline using PostPipelines RPC."""
333
+ :param step_name: Name of the pipeline step
334
+ :return: Version ID if found, None otherwise
335
+ """
336
+ pipeline_config = self.config["pipeline"]
337
+ step_directories = pipeline_config.get("step_directories", [])
338
+
339
+ for step_dir in step_directories:
340
+ # Check if step_dir matches step_name (handle both exact match and derivable cases)
341
+ if (
342
+ step_dir == step_name
343
+ or step_dir.endswith(f"/{step_name}")
344
+ or step_name in step_dir
345
+ ):
346
+ config_lock_path = os.path.join(self.config_dir, step_dir, "config-lock.yaml")
347
+
348
+ if os.path.exists(config_lock_path):
349
+ try:
350
+ with open(config_lock_path, 'r', encoding='utf-8') as f:
351
+ config_lock = yaml.safe_load(f)
352
+ version_id = config_lock.get("id")
353
+ if version_id:
354
+ logger.info(
355
+ f"Found version {version_id} for step {step_name} in {config_lock_path}"
356
+ )
357
+ return version_id
358
+ except Exception as e:
359
+ logger.warning(
360
+ f"Failed to read config-lock.yaml at {config_lock_path}: {e}"
361
+ )
362
+
363
+ return None
364
+
365
+ def create_pipeline(self) -> tuple[bool, str]:
366
+ """Create the pipeline using PostPipelines RPC.
367
+
368
+ Returns:
369
+ tuple[bool, str]: (success, pipeline_version_id)
370
+ """
229
371
  logger.info(f"Creating pipeline {self.pipeline_id}...")
230
372
 
231
373
  try:
@@ -243,6 +385,11 @@ class PipelineBuilder:
243
385
  argo_spec = yaml.safe_load(argo_spec_str)
244
386
  api_version = argo_spec.get("apiVersion", "argoproj.io/v1alpha1")
245
387
 
388
+ # Ensure that pipeline_config.argo_orchestration_spec_proto has the updated spec.templates.steps.templateRef values
389
+ # For each step, if the templateRef is missing a version, append the correct version at the end
390
+ # The step versions should be resolved from the corresponding config-lock.yaml file of each pipeline-step, located in the step_directories
391
+ self._update_template_refs_with_versions(argo_spec)
392
+
246
393
  # Create pipeline version with orchestration spec
247
394
  pipeline_version = resources_pb2.PipelineVersion()
248
395
  # Create orchestration spec proto
@@ -269,29 +416,32 @@ class PipelineBuilder:
269
416
  if response.status.code == status_code_pb2.SUCCESS:
270
417
  logger.info(f"Successfully created pipeline {self.pipeline_id}")
271
418
 
419
+ pipeline_version_id = ""
272
420
  # Log pipeline and version IDs if available in response
273
421
  if response.pipelines:
274
422
  created_pipeline = response.pipelines[0]
275
423
  logger.info(f"Pipeline ID: {created_pipeline.id}")
276
424
  if created_pipeline.pipeline_version and created_pipeline.pipeline_version.id:
277
- logger.info(f"Pipeline version ID: {created_pipeline.pipeline_version.id}")
425
+ pipeline_version_id = created_pipeline.pipeline_version.id
426
+ logger.info(f"Pipeline version ID: {pipeline_version_id}")
278
427
 
279
- return True
428
+ return True, pipeline_version_id
280
429
  else:
281
430
  logger.error(f"Failed to create pipeline: {response.status.description}")
282
431
  logger.error(f"Details: {response.status.details}")
283
- return False
432
+ return False, ""
284
433
 
285
434
  except Exception as e:
286
435
  logger.error(f"Error creating pipeline: {e}")
287
- return False
436
+ return False, ""
288
437
 
289
438
 
290
- def upload_pipeline(path: str):
439
+ def upload_pipeline(path: str, no_lockfile: bool = False):
291
440
  """
292
441
  Upload a pipeline with associated pipeline steps to Clarifai.
293
442
 
294
443
  :param path: Path to the pipeline configuration file or directory containing config.yaml
444
+ :param no_lockfile: If True, skip creating config-lock.yaml
295
445
  """
296
446
  try:
297
447
  # Determine if path is a directory or file
@@ -311,15 +461,27 @@ def upload_pipeline(path: str):
311
461
  logger.error("Failed to upload pipeline steps")
312
462
  sys.exit(1)
313
463
 
314
- # Step 2: Update config with version information
315
- builder.update_config_with_versions()
464
+ # Step 2: Generate lockfile (unless --no-lockfile is specified)
465
+ # This will be used to update the versions of pipeline-steps that just got uploaded in Step 1
466
+ lockfile_data = None
467
+ if not no_lockfile:
468
+ lockfile_data = builder.prepare_lockfile_with_step_versions()
316
469
 
317
470
  # Step 3: Create the pipeline
318
- if not builder.create_pipeline():
471
+ success, pipeline_version_id = builder.create_pipeline()
472
+ if not success:
319
473
  logger.error("Failed to create pipeline")
320
474
  sys.exit(1)
321
475
 
322
- logger.info("Pipeline upload completed successfully!")
476
+ # Step 4: Update lockfile (unless --no-lockfile is specified)
477
+ if not no_lockfile and lockfile_data:
478
+ lockfile_data = builder.update_lockfile_with_pipeline_info(
479
+ lockfile_data, pipeline_version_id
480
+ )
481
+ builder.save_lockfile(lockfile_data)
482
+ logger.info("Pipeline upload completed successfully with lockfile!")
483
+ else:
484
+ logger.info("Pipeline upload completed successfully (lockfile skipped)!")
323
485
 
324
486
  except Exception as e:
325
487
  logger.error(f"Pipeline upload failed: {e}")
@@ -0,0 +1,117 @@
1
+ """
2
+ Hashing utilities for Clarifai Python SDK.
3
+
4
+ This module provides functions for computing stable hashes of directories and files,
5
+ commonly used for change detection in pipeline steps and other components.
6
+ """
7
+
8
+ import hashlib
9
+ import os
10
+ from typing import List, Optional
11
+
12
+
13
+ def hash_directory(
14
+ directory: str, algo: str = "md5", exclude_files: Optional[List[str]] = None
15
+ ) -> str:
16
+ """
17
+ Compute a stable hash of all files in a directory.
18
+
19
+ This function computes a hash that accounts for:
20
+ - File relative paths (to detect renames)
21
+ - File sizes (to detect empty files)
22
+ - File contents (read in chunks for large files)
23
+
24
+ :param directory: Directory to hash
25
+ :param algo: Hash algorithm ('md5', 'sha1', 'sha256', etc.)
26
+ :param exclude_files: List of file names to exclude from hash calculation.
27
+ If None, defaults to ['config-lock.yaml'] for backward compatibility.
28
+ :return: Hash as lowercase hex digest string
29
+ """
30
+ if exclude_files is None:
31
+ exclude_files = ['config-lock.yaml']
32
+
33
+ # Ensure directory exists
34
+ if not os.path.exists(directory):
35
+ raise ValueError(f"Directory does not exist: {directory}")
36
+
37
+ if not os.path.isdir(directory):
38
+ raise ValueError(f"Path is not a directory: {directory}")
39
+
40
+ hash_func = hashlib.new(algo)
41
+
42
+ for root, _, files in os.walk(directory):
43
+ for name in sorted(files):
44
+ # Skip files in the exclusion list
45
+ if name in exclude_files:
46
+ continue
47
+
48
+ filepath = os.path.join(root, name)
49
+ relative_path = os.path.relpath(filepath, directory)
50
+
51
+ # Hash the relative path to detect renames
52
+ hash_func.update(relative_path.encode("utf-8"))
53
+
54
+ # Hash the file size to detect empties
55
+ file_size = os.path.getsize(filepath)
56
+ hash_func.update(str(file_size).encode("utf-8"))
57
+
58
+ # Hash the file contents (read in chunks for large files)
59
+ try:
60
+ with open(filepath, "rb") as f:
61
+ for chunk in iter(lambda: f.read(8192), b""):
62
+ hash_func.update(chunk)
63
+ except (IOError, OSError) as e:
64
+ # If we can't read the file, include the error in the hash
65
+ # This ensures the hash changes if file permissions change
66
+ hash_func.update(f"ERROR_READING_FILE: {e}".encode("utf-8"))
67
+
68
+ return hash_func.hexdigest()
69
+
70
+
71
+ def hash_file(filepath: str, algo: str = "md5") -> str:
72
+ """
73
+ Compute a hash of a single file.
74
+
75
+ :param filepath: Path to the file to hash
76
+ :param algo: Hash algorithm ('md5', 'sha1', 'sha256', etc.)
77
+ :return: Hash as lowercase hex digest string
78
+ """
79
+ if not os.path.exists(filepath):
80
+ raise ValueError(f"File does not exist: {filepath}")
81
+
82
+ if not os.path.isfile(filepath):
83
+ raise ValueError(f"Path is not a file: {filepath}")
84
+
85
+ hash_func = hashlib.new(algo)
86
+
87
+ try:
88
+ with open(filepath, "rb") as f:
89
+ for chunk in iter(lambda: f.read(8192), b""):
90
+ hash_func.update(chunk)
91
+ except (IOError, OSError) as e:
92
+ raise ValueError(f"Error reading file {filepath}: {e}")
93
+
94
+ return hash_func.hexdigest()
95
+
96
+
97
+ def verify_hash_algorithm(algo: str) -> bool:
98
+ """
99
+ Verify that a hash algorithm is supported.
100
+
101
+ :param algo: Hash algorithm name
102
+ :return: True if algorithm is supported, False otherwise
103
+ """
104
+ try:
105
+ hashlib.new(algo)
106
+ return True
107
+ except ValueError:
108
+ return False
109
+
110
+
111
+ def get_available_algorithms() -> List[str]:
112
+ """
113
+ Get list of available hash algorithms.
114
+
115
+ :return: List of available algorithm names
116
+ """
117
+ return list(hashlib.algorithms_available)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: clarifai
3
- Version: 11.8.2
3
+ Version: 11.8.3
4
4
  Home-page: https://github.com/Clarifai/clarifai-python
5
5
  Author: Clarifai
6
6
  Author-email: support@clarifai.com
@@ -1,4 +1,4 @@
1
- clarifai/__init__.py,sha256=wlKYds59E079Cej1oyfbQMsxCdbNX7JGHZgtuBpBG3A,23
1
+ clarifai/__init__.py,sha256=OGoTC_KKYIrBX-5ykvzjK3rEoiEwinADzRb-TA_522M,23
2
2
  clarifai/cli.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  clarifai/errors.py,sha256=GXa6D4v_L404J83jnRNFPH7s-1V9lk7w6Ws99f1g-AY,2772
4
4
  clarifai/versions.py,sha256=ecSuEB_nOL2XSoYHDw2n23XUbm_KPOGjudMXmQrGdS8,224
@@ -8,9 +8,9 @@ clarifai/cli/__main__.py,sha256=7nPbLW7Jr2shkgMPvnxpn4xYGMvIcnqluJ69t9w4H_k,74
8
8
  clarifai/cli/base.py,sha256=FQEEmi3a9_LBOmM_-X4EYdpAmDK1UljTxrHOIIsOZbM,10696
9
9
  clarifai/cli/compute_cluster.py,sha256=8Xss0Obrp6l1XuxJe0luOqU_pf8vXGDRi6jyIe8qR6k,2282
10
10
  clarifai/cli/deployment.py,sha256=9C4I6_kyMxRkWl6h681wc79-3mAtDHtTUaxRv05OZMs,4262
11
- clarifai/cli/model.py,sha256=2Eeoq4Kq_kJqSTmccn4B9YZ962Wj3mz1wcPQwXEWd2M,49734
11
+ clarifai/cli/model.py,sha256=MwNt1jaxA_8D9KGFaapwH32BFoqnq9CaH8JcafhBZR4,51911
12
12
  clarifai/cli/nodepool.py,sha256=H6OIdUW_EiyDUwZogzEDoYmVwEjLMsgoDlPyE7gjIuU,4245
13
- clarifai/cli/pipeline.py,sha256=MmyPaVX1XsIZlrYTrT7Ctd71ao_d3ZWkuF_fv_NJS8s,13944
13
+ clarifai/cli/pipeline.py,sha256=bH7pnJGfjQIu_Y-f_zwKBFdPDTsBEEDtA2Oz9P6-Zj0,17129
14
14
  clarifai/cli/pipeline_step.py,sha256=dvoC2vAsDcxOCy88VV0X42PG22_7JSu9sfBVsk-Cix4,6133
15
15
  clarifai/cli/templates/__init__.py,sha256=HbMlZuYOMyVJde73ijNAevmSRUpIttGlHdwyO4W-JOs,44
16
16
  clarifai/cli/templates/model_templates.py,sha256=-xGUzadN7S-mNZ-kE4Z-kv51BlnoGHjue05Yg5OGnt0,9791
@@ -24,7 +24,7 @@ clarifai/client/dataset.py,sha256=sz5CycP3J7pG0iMREKI2JeXQuvRwlVrE4AHne8yxgtg,35
24
24
  clarifai/client/deployment.py,sha256=QBf0tzkKBEpzNgmOEmWUJMOlVWdFEFc70Y44o8y75Gs,2875
25
25
  clarifai/client/input.py,sha256=jpX47qwn7aUBBIEuSSLHF5jk70XaWEh0prD065c9b-E,51205
26
26
  clarifai/client/lister.py,sha256=1YEm2suNxPaJO4x9V5szgD_YX6N_00vgSO-7m0HagY8,2208
27
- clarifai/client/model.py,sha256=WJJL0fOuHe4X8aMT-gs_SnEFhhT0cVDqqHbA3VX2DE8,92520
27
+ clarifai/client/model.py,sha256=AgZ-H0DKAVrMMzDq8m5h9E5jMEjSHDpz20FfxyLfxz0,92567
28
28
  clarifai/client/model_client.py,sha256=8N8dRqb5zfFCNxq-jc-YSL19tgS8PpevnxY69G2YzCE,38280
29
29
  clarifai/client/module.py,sha256=pTcTmR48-VQRCEj3PJK_ZT5YTsYfZDbEjxwJ43rcLMM,4753
30
30
  clarifai/client/nodepool.py,sha256=QDJOMOYrZAG962u-MZWjXOZifjWK8hDgS2zoUn59dZU,16751
@@ -86,9 +86,9 @@ clarifai/runners/models/openai_class.py,sha256=MYnL7_U4_m5FX3CoVaTd6B9Qminh-q4pY
86
86
  clarifai/runners/models/visual_classifier_class.py,sha256=1ZoLfCT2crrgRbejjTMAIwpTRgQMiH9N9yflOVpFxSg,2721
87
87
  clarifai/runners/models/visual_detector_class.py,sha256=ky4oFAkGCKPpGPdgaOso-n6D3HcmnbKee_8hBsNiV8U,2883
88
88
  clarifai/runners/pipeline_steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
89
- clarifai/runners/pipeline_steps/pipeline_step_builder.py,sha256=jcbs3ntbeyUiAaPh00Pscni4uJopbqVNVJblUX1pYVc,21808
89
+ clarifai/runners/pipeline_steps/pipeline_step_builder.py,sha256=0AmleVdqvPMPl6Sxhd8pZDMja7GNzckbT8AuRJLtdho,25678
90
90
  clarifai/runners/pipelines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
91
- clarifai/runners/pipelines/pipeline_builder.py,sha256=0FBjb8l7mWlCwBsBLkHM3znNQB9HPLEOYrrE53ntjCE,13810
91
+ clarifai/runners/pipelines/pipeline_builder.py,sha256=bfIK7PPTspNhWjyKgPp13pkdGlHstKpsLi2Eet39Fyw,21494
92
92
  clarifai/runners/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
93
93
  clarifai/runners/utils/code_script.py,sha256=-6IgNruIMTYLKJG8EqVWSaZR7lFRBoQ4ufJtuCPUCqc,14799
94
94
  clarifai/runners/utils/const.py,sha256=MK7lTzzJKbOiyiUtG_jlJXfz_xNKMn5LjkQ9vjbttXE,1538
@@ -108,6 +108,7 @@ clarifai/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
108
108
  clarifai/utils/cli.py,sha256=ojPI6wBMwxpmCwWIE1nx4t_lzHqyJqby_TI6Fl3Vul4,15536
109
109
  clarifai/utils/config.py,sha256=dENYtcWW7Il5MInvIaYe0MZn0hW1fbIb0Lzk8rQ_geQ,7671
110
110
  clarifai/utils/constants.py,sha256=bHvs8L_Eai49Qm0U9YcK7Srx9FlL5iyv_pXvgSt6XDc,2497
111
+ clarifai/utils/hashing.py,sha256=z2hHt4sDvGyqNbnOay0F2i3K_PjyX-J24IEytszyYsA,3761
111
112
  clarifai/utils/logging.py,sha256=0we53uTqUvzrulC86whu-oeWNxn1JjJL0OQ98Bwf9vo,15198
112
113
  clarifai/utils/misc.py,sha256=ATj4RR6S06GeLE0X4tMU4bmTz4Sz4j2WemTddsnSfMI,23458
113
114
  clarifai/utils/model_train.py,sha256=0XSAoTkSsrwf4f-W9yw2mkXZtkal7LBLJSoi86CFCn4,9250
@@ -121,9 +122,9 @@ clarifai/workflows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuF
121
122
  clarifai/workflows/export.py,sha256=HvUYG9N_-UZoRR0-_tdGbZ950_AeBqawSppgUxQebR0,1913
122
123
  clarifai/workflows/utils.py,sha256=ESL3INcouNcLKCh-nMpfXX-YbtCzX7tz7hT57_RGQ3M,2079
123
124
  clarifai/workflows/validate.py,sha256=UhmukyHkfxiMFrPPeBdUTiCOHQT5-shqivlBYEyKTlU,2931
124
- clarifai-11.8.2.dist-info/licenses/LICENSE,sha256=mUqF_d12-qE2n41g7C5_sq-BMLOcj6CNN-jevr15YHU,555
125
- clarifai-11.8.2.dist-info/METADATA,sha256=tp1H7AGojDcqwCLK9irGnVbgnsCJK6EMrCd966DAlnw,23193
126
- clarifai-11.8.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
127
- clarifai-11.8.2.dist-info/entry_points.txt,sha256=X9FZ4Z-i_r2Ud1RpZ9sNIFYuu_-9fogzCMCRUD9hyX0,51
128
- clarifai-11.8.2.dist-info/top_level.txt,sha256=wUMdCQGjkxaynZ6nZ9FAnvBUCgp5RJUVFSy2j-KYo0s,9
129
- clarifai-11.8.2.dist-info/RECORD,,
125
+ clarifai-11.8.3.dist-info/licenses/LICENSE,sha256=mUqF_d12-qE2n41g7C5_sq-BMLOcj6CNN-jevr15YHU,555
126
+ clarifai-11.8.3.dist-info/METADATA,sha256=M9Ee27AiY9jjKzwv7zmm_qO1aMspHq0wLhO1CgbzCKc,23193
127
+ clarifai-11.8.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
128
+ clarifai-11.8.3.dist-info/entry_points.txt,sha256=X9FZ4Z-i_r2Ud1RpZ9sNIFYuu_-9fogzCMCRUD9hyX0,51
129
+ clarifai-11.8.3.dist-info/top_level.txt,sha256=wUMdCQGjkxaynZ6nZ9FAnvBUCgp5RJUVFSy2j-KYo0s,9
130
+ clarifai-11.8.3.dist-info/RECORD,,