clarifai 11.5.2__py3-none-any.whl → 11.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. clarifai/__init__.py +1 -1
  2. clarifai/cli/model.py +33 -1
  3. clarifai/cli/pipeline.py +137 -0
  4. clarifai/cli/pipeline_step.py +104 -0
  5. clarifai/cli/templates/__init__.py +1 -0
  6. clarifai/cli/templates/pipeline_step_templates.py +64 -0
  7. clarifai/cli/templates/pipeline_templates.py +150 -0
  8. clarifai/client/auth/helper.py +23 -0
  9. clarifai/client/auth/register.py +5 -0
  10. clarifai/client/auth/stub.py +116 -12
  11. clarifai/client/base.py +9 -0
  12. clarifai/client/model.py +111 -7
  13. clarifai/client/model_client.py +355 -6
  14. clarifai/client/user.py +81 -0
  15. clarifai/runners/models/model_builder.py +52 -9
  16. clarifai/runners/pipeline_steps/__init__.py +0 -0
  17. clarifai/runners/pipeline_steps/pipeline_step_builder.py +510 -0
  18. clarifai/runners/pipelines/__init__.py +0 -0
  19. clarifai/runners/pipelines/pipeline_builder.py +313 -0
  20. clarifai/runners/utils/code_script.py +40 -7
  21. clarifai/runners/utils/const.py +2 -2
  22. clarifai/runners/utils/model_utils.py +135 -0
  23. clarifai/runners/utils/pipeline_validation.py +153 -0
  24. {clarifai-11.5.2.dist-info → clarifai-11.5.3.dist-info}/METADATA +1 -1
  25. {clarifai-11.5.2.dist-info → clarifai-11.5.3.dist-info}/RECORD +30 -19
  26. /clarifai/cli/{model_templates.py → templates/model_templates.py} +0 -0
  27. {clarifai-11.5.2.dist-info → clarifai-11.5.3.dist-info}/WHEEL +0 -0
  28. {clarifai-11.5.2.dist-info → clarifai-11.5.3.dist-info}/entry_points.txt +0 -0
  29. {clarifai-11.5.2.dist-info → clarifai-11.5.3.dist-info}/licenses/LICENSE +0 -0
  30. {clarifai-11.5.2.dist-info → clarifai-11.5.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,510 @@
1
+ import os
2
+ import re
3
+ import sys
4
+ import tarfile
5
+ import time
6
+ from string import Template
7
+
8
+ import yaml
9
+ from clarifai_grpc.grpc.api import resources_pb2, service_pb2
10
+ from clarifai_grpc.grpc.api.status import status_code_pb2
11
+
12
+ from clarifai.client.base import BaseClient
13
+ from clarifai.utils.logging import logger
14
+ from clarifai.utils.misc import get_uuid
15
+ from clarifai.versions import CLIENT_VERSION
16
+
17
+ # Upload chunk size for pipeline step versions (14MB)
18
+ UPLOAD_CHUNK_SIZE = 14 * 1024 * 1024
19
+
20
+
21
+ class PipelineStepBuilder:
22
+ """Pipeline Step Builder class for managing pipeline step upload to Clarifai."""
23
+
24
+ def __init__(self, folder: str):
25
+ """
26
+ Initialize PipelineStepBuilder.
27
+
28
+ :param folder: The folder containing the pipeline step files (config.yaml, requirements.txt,
29
+ dockerfile, and pipeline_step.py in 1/ subdirectory)
30
+ """
31
+ self._client = None
32
+ self.folder = self._validate_folder(folder)
33
+ self.config = self._load_config(os.path.join(self.folder, 'config.yaml'))
34
+ self._validate_config()
35
+ self.pipeline_step_proto = self._get_pipeline_step_proto()
36
+ self.pipeline_step_id = self.pipeline_step_proto.id
37
+ self.pipeline_step_version_id = None
38
+ self.pipeline_step_compute_info = self._get_pipeline_step_compute_info()
39
+
40
+ @property
41
+ def client(self):
42
+ """Get or create the Clarifai client."""
43
+ if self._client is None:
44
+ pipeline_step_config = self.config["pipeline_step"]
45
+ user_id = pipeline_step_config["user_id"]
46
+ app_id = pipeline_step_config["app_id"]
47
+ self._client = BaseClient(user_id=user_id, app_id=app_id)
48
+ return self._client
49
+
50
+ def _validate_folder(self, folder):
51
+ """Validate that the folder contains required files."""
52
+ folder = os.path.abspath(folder)
53
+
54
+ # Check for required files
55
+ required_files = ['config.yaml']
56
+ for file in required_files:
57
+ file_path = os.path.join(folder, file)
58
+ if not os.path.exists(file_path):
59
+ raise FileNotFoundError(f"Required file '{file}' not found in {folder}")
60
+
61
+ # Check for pipeline_step.py in 1/ subdirectory
62
+ pipeline_step_file = os.path.join(folder, '1', 'pipeline_step.py')
63
+ if not os.path.exists(pipeline_step_file):
64
+ raise FileNotFoundError(f"Required file '1/pipeline_step.py' not found in {folder}")
65
+
66
+ return folder
67
+
68
+ @staticmethod
69
+ def _load_config(config_path):
70
+ """Load and return the configuration from config.yaml."""
71
+ try:
72
+ with open(config_path, 'r') as file:
73
+ config = yaml.safe_load(file)
74
+ return config
75
+ except Exception as e:
76
+ raise ValueError(f"Error loading config.yaml: {e}")
77
+
78
+ def _validate_config(self):
79
+ """Validate the configuration."""
80
+ # Validate pipeline_step section
81
+ if "pipeline_step" not in self.config:
82
+ raise ValueError("pipeline_step section not found in config.yaml")
83
+
84
+ pipeline_step = self.config["pipeline_step"]
85
+ required_fields = ["id", "user_id", "app_id"]
86
+
87
+ for field in required_fields:
88
+ if field not in pipeline_step:
89
+ raise ValueError(f"{field} not found in pipeline_step section of config.yaml")
90
+ if not pipeline_step[field]:
91
+ raise ValueError(f"{field} cannot be empty in config.yaml")
92
+
93
+ # Validate pipeline_step_compute_info section
94
+ if "pipeline_step_compute_info" not in self.config:
95
+ raise ValueError("pipeline_step_compute_info section not found in config.yaml")
96
+
97
+ def _get_pipeline_step_proto(self):
98
+ """Create pipeline step proto from config."""
99
+ pipeline_step_config = self.config["pipeline_step"]
100
+
101
+ pipeline_step_proto = resources_pb2.PipelineStep(
102
+ id=pipeline_step_config["id"], user_id=pipeline_step_config["user_id"]
103
+ )
104
+
105
+ return pipeline_step_proto
106
+
107
+ def _get_pipeline_step_compute_info(self):
108
+ """Get pipeline step compute info from config."""
109
+ compute_config = self.config.get("pipeline_step_compute_info", {})
110
+
111
+ compute_info = resources_pb2.ComputeInfo()
112
+
113
+ if "cpu_limit" in compute_config:
114
+ compute_info.cpu_limit = compute_config["cpu_limit"]
115
+ if "cpu_memory" in compute_config:
116
+ compute_info.cpu_memory = compute_config["cpu_memory"]
117
+ if "num_accelerators" in compute_config:
118
+ compute_info.num_accelerators = compute_config["num_accelerators"]
119
+
120
+ return compute_info
121
+
122
+ def check_pipeline_step_exists(self):
123
+ """Check if pipeline step already exists."""
124
+ try:
125
+ resp = self.client.STUB.GetPipelineStep(
126
+ service_pb2.GetPipelineStepRequest(
127
+ user_app_id=self.client.user_app_id, pipeline_step_id=self.pipeline_step_id
128
+ )
129
+ )
130
+ return resp.status.code == status_code_pb2.SUCCESS
131
+ except Exception:
132
+ return False
133
+
134
+ def create_pipeline_step(self):
135
+ """Create a new pipeline step if it doesn't exist."""
136
+ if self.check_pipeline_step_exists():
137
+ logger.info(f"Pipeline step {self.pipeline_step_id} already exists")
138
+ return True
139
+
140
+ try:
141
+ # Build pipeline step input params
142
+ input_params = []
143
+ if "pipeline_step_input_params" in self.config:
144
+ for param_config in self.config["pipeline_step_input_params"]:
145
+ param = resources_pb2.PipelineStepInputParam(name=param_config["name"])
146
+ if "default" in param_config:
147
+ param.default_value = param_config["default"]
148
+ if "description" in param_config:
149
+ param.description = param_config["description"]
150
+ if "accepted_values" in param_config:
151
+ param.accepted_values.extend(param_config["accepted_values"])
152
+ input_params.append(param)
153
+
154
+ pipeline_step = resources_pb2.PipelineStep(
155
+ id=self.pipeline_step_id, user_id=self.pipeline_step_proto.user_id
156
+ )
157
+
158
+ resp = self.client.STUB.PostPipelineSteps(
159
+ service_pb2.PostPipelineStepsRequest(
160
+ user_app_id=self.client.user_app_id, pipeline_steps=[pipeline_step]
161
+ )
162
+ )
163
+
164
+ if resp.status.code == status_code_pb2.SUCCESS:
165
+ logger.info(f"Successfully created pipeline step {self.pipeline_step_id}")
166
+ return True
167
+ else:
168
+ logger.error(f"Failed to create pipeline step: {resp.status}")
169
+ return False
170
+
171
+ except Exception as e:
172
+ logger.error(f"Error creating pipeline step: {e}")
173
+ return False
174
+
175
+ def create_dockerfile(self):
176
+ """Create a Dockerfile for the pipeline step."""
177
+ # Use similar logic to model builder for dockerfile creation
178
+ dockerfile_template = """FROM --platform=$TARGETPLATFORM public.ecr.aws/clarifai-models/python-base:$PYTHON_VERSION-df565436eea93efb3e8d1eb558a0a46df29523ec as final
179
+
180
+ COPY --link requirements.txt /home/nonroot/requirements.txt
181
+
182
+ # Update clarifai package so we always have latest protocol to the API. Everything should land in /venv
183
+ RUN ["pip", "install", "--no-cache-dir", "-r", "/home/nonroot/requirements.txt"]
184
+
185
+ # Copy in the actual files like config.yaml, requirements.txt, and most importantly 1/pipeline_step.py for the actual pipeline step.
186
+ COPY --link=true 1 /home/nonroot/main/1
187
+ # At this point we only need these for validation in the SDK.
188
+ COPY --link=true requirements.txt config.yaml /home/nonroot/main/
189
+ """
190
+
191
+ # Get Python version from config or use default
192
+ build_info = self.config.get('build_info', {})
193
+ python_version = build_info.get('python_version', '3.12')
194
+
195
+ # Ensure requirements.txt has clarifai
196
+ self._ensure_clarifai_requirement()
197
+
198
+ # Replace placeholders
199
+ dockerfile_content = Template(dockerfile_template).safe_substitute(
200
+ PYTHON_VERSION=python_version
201
+ )
202
+
203
+ # Write Dockerfile
204
+ dockerfile_path = os.path.join(self.folder, 'Dockerfile')
205
+ with open(dockerfile_path, 'w') as dockerfile:
206
+ dockerfile.write(dockerfile_content)
207
+
208
+ logger.info(f"Created Dockerfile at {dockerfile_path}")
209
+
210
+ def _ensure_clarifai_requirement(self):
211
+ """Ensure clarifai is in requirements.txt with proper version."""
212
+ requirements_path = os.path.join(self.folder, 'requirements.txt')
213
+
214
+ # Read existing requirements
215
+ requirements = []
216
+ if os.path.exists(requirements_path):
217
+ with open(requirements_path, 'r') as f:
218
+ requirements = f.readlines()
219
+
220
+ # Check if clarifai is already present
221
+ has_clarifai = any('clarifai' in line for line in requirements)
222
+
223
+ if not has_clarifai:
224
+ requirements.append(f'clarifai=={CLIENT_VERSION}\n')
225
+ with open(requirements_path, 'w') as f:
226
+ f.writelines(requirements)
227
+ logger.info(f"Added clarifai=={CLIENT_VERSION} to requirements.txt")
228
+
229
+ @property
230
+ def tar_file(self):
231
+ """Get the tar file path."""
232
+ return f"{self.folder}.tar.gz"
233
+
234
+ def upload_pipeline_step_version(self):
235
+ """Upload a new version of the pipeline step."""
236
+ # Ensure pipeline step exists
237
+ if not self.create_pipeline_step():
238
+ logger.error("Failed to create pipeline step")
239
+ return False
240
+
241
+ # Create tar file
242
+ file_path = self.tar_file
243
+ logger.debug(f"Creating tar file: {file_path}")
244
+
245
+ def filter_func(tarinfo):
246
+ name = tarinfo.name
247
+ exclude = [os.path.basename(self.tar_file), "*~", "*.pyc", "*.pyo", "__pycache__"]
248
+ return None if any(name.endswith(ex) for ex in exclude) else tarinfo
249
+
250
+ with tarfile.open(file_path, "w:gz") as tar:
251
+ tar.add(self.folder, arcname=".", filter=filter_func)
252
+
253
+ logger.debug("Tar file creation complete")
254
+
255
+ file_size = os.path.getsize(file_path)
256
+ logger.debug(f"Tar file size: {file_size} bytes")
257
+
258
+ try:
259
+ # Upload pipeline step version with client-side progress tracking
260
+ uploaded_bytes = 0
261
+ chunk_count = 0
262
+ total_chunks = (
263
+ file_size + UPLOAD_CHUNK_SIZE - 1
264
+ ) // UPLOAD_CHUNK_SIZE # Ceiling division
265
+
266
+ for response in self.client.STUB.PostPipelineStepVersionsUpload(
267
+ self._pipeline_step_version_upload_iterator(file_path)
268
+ ):
269
+ # Calculate progress based on chunks uploaded
270
+ if chunk_count == 0:
271
+ # First response is config upload, no progress yet
272
+ percent_completed = 0
273
+ else:
274
+ # Calculate progress based on completed chunks
275
+ uploaded_bytes = min(chunk_count * UPLOAD_CHUNK_SIZE, file_size)
276
+ percent_completed = min(100, int((uploaded_bytes / file_size) * 100))
277
+
278
+ chunk_count += 1
279
+
280
+ print(
281
+ f"Status: {response.status.description}, Upload Progress: {percent_completed}%, Details: {response.status.details}",
282
+ f"request_id: {response.status.req_id}",
283
+ end='\r',
284
+ flush=True,
285
+ )
286
+
287
+ if response.status.code != status_code_pb2.PIPELINE_STEP_BUILDING:
288
+ logger.error(f"Failed to upload pipeline step version: {response}")
289
+ return False
290
+
291
+ self.pipeline_step_version_id = response.pipeline_step_version_id
292
+ logger.info(f"\nCreated Pipeline Step Version ID: {self.pipeline_step_version_id}")
293
+
294
+ # Monitor build progress
295
+ return self._monitor_pipeline_step_build()
296
+
297
+ finally:
298
+ # Clean up tar file
299
+ if os.path.exists(file_path):
300
+ logger.debug(f"Cleaning up tar file: {file_path}")
301
+ os.remove(file_path)
302
+
303
+ def _pipeline_step_version_upload_iterator(self, file_path):
304
+ """Iterator for uploading pipeline step version in chunks."""
305
+ # First yield the config
306
+ yield self._init_upload_pipeline_step_version(file_path)
307
+
308
+ # Then yield file content in chunks
309
+ with open(file_path, "rb") as f:
310
+ file_size = os.path.getsize(file_path)
311
+ chunk_size = UPLOAD_CHUNK_SIZE
312
+ logger.info("Uploading pipeline step content...")
313
+ logger.debug(f"File size: {file_size}")
314
+ logger.debug(f"Chunk size: {chunk_size}")
315
+
316
+ offset = 0
317
+ part_id = 1
318
+ while offset < file_size:
319
+ try:
320
+ current_chunk_size = min(chunk_size, file_size - offset)
321
+ chunk = f.read(current_chunk_size)
322
+ if not chunk:
323
+ break
324
+ yield service_pb2.PostPipelineStepVersionsUploadRequest(
325
+ content_part=resources_pb2.UploadContentPart(
326
+ data=chunk,
327
+ part_number=part_id,
328
+ range_start=offset,
329
+ )
330
+ )
331
+ offset += len(chunk)
332
+ part_id += 1
333
+ except Exception as e:
334
+ logger.exception(f"\nError uploading file: {e}")
335
+ break
336
+
337
+ if offset == file_size:
338
+ logger.info("Upload complete!")
339
+
340
+ def _get_tar_file_content_size(self, tar_file_path):
341
+ """
342
+ Calculates the total size of the contents of a tar file.
343
+
344
+ Args:
345
+ tar_file_path (str): The path to the tar file.
346
+
347
+ Returns:
348
+ int: The total size of the contents in bytes.
349
+ """
350
+ total_size = 0
351
+ with tarfile.open(tar_file_path, 'r') as tar:
352
+ for member in tar:
353
+ if member.isfile():
354
+ total_size += member.size
355
+ return total_size
356
+
357
+ def _init_upload_pipeline_step_version(self, file_path):
358
+ """Initialize the pipeline step version upload."""
359
+ file_size = os.path.getsize(file_path)
360
+ storage_request_size = self._get_tar_file_content_size(file_path)
361
+ logger.debug(f"Uploading pipeline step version of pipeline step {self.pipeline_step_id}")
362
+ logger.debug(f"Using file '{os.path.basename(file_path)}' of size: {file_size} bytes")
363
+ logger.debug(f"Storage request size: {storage_request_size} bytes")
364
+
365
+ # Build pipeline step input params
366
+ input_params = []
367
+ if "pipeline_step_input_params" in self.config:
368
+ for param_config in self.config["pipeline_step_input_params"]:
369
+ param = resources_pb2.PipelineStepInputParam(name=param_config["name"])
370
+ if "default" in param_config:
371
+ param.default_value = param_config["default"]
372
+ if "description" in param_config:
373
+ param.description = param_config["description"]
374
+ if "accepted_values" in param_config:
375
+ param.accepted_values.extend(param_config["accepted_values"])
376
+ input_params.append(param)
377
+
378
+ # Create pipeline step version proto with generated ID
379
+ version_id = get_uuid(16) # Generate a 16-character UUID
380
+ pipeline_step_version = resources_pb2.PipelineStepVersion(
381
+ id=version_id,
382
+ description="Pipeline step version",
383
+ pipeline_step_input_params=input_params,
384
+ pipeline_step_compute_info=self.pipeline_step_compute_info,
385
+ )
386
+
387
+ return service_pb2.PostPipelineStepVersionsUploadRequest(
388
+ upload_config=service_pb2.PostPipelineStepVersionsUploadConfig(
389
+ user_app_id=self.client.user_app_id,
390
+ pipeline_step_id=self.pipeline_step_id,
391
+ pipeline_step_version=pipeline_step_version,
392
+ total_size=file_size,
393
+ storage_request_size=storage_request_size,
394
+ )
395
+ )
396
+
397
+ def _monitor_pipeline_step_build(self, timeout_sec=300, interval_sec=1):
398
+ """
399
+ Monitor the pipeline step build process with timeout and log display.
400
+
401
+ :param timeout_sec: Maximum time to wait for build completion (default 300 seconds)
402
+ :param interval_sec: Interval between status checks (default 1 second)
403
+ :return: True if build successful, False otherwise
404
+ """
405
+ max_checks = timeout_sec // interval_sec
406
+ seen_logs = set() # To avoid duplicate log messages
407
+ st = time.time()
408
+
409
+ for _ in range(max_checks):
410
+ print(
411
+ f"Pipeline Step is building... (elapsed {time.time() - st:.1f}s)",
412
+ end='\r',
413
+ flush=True,
414
+ )
415
+
416
+ try:
417
+ response = self.client.STUB.GetPipelineStepVersion(
418
+ service_pb2.GetPipelineStepVersionRequest(
419
+ user_app_id=self.client.user_app_id,
420
+ pipeline_step_id=self.pipeline_step_id,
421
+ pipeline_step_version_id=self.pipeline_step_version_id,
422
+ ),
423
+ metadata=self.client.auth_helper.metadata,
424
+ )
425
+ logger.debug(f"GetPipelineStepVersion Response: {response}")
426
+
427
+ # Fetch and display build logs
428
+ logs_request = service_pb2.ListLogEntriesRequest(
429
+ log_type="builder",
430
+ user_app_id=self.client.user_app_id,
431
+ pipeline_step_id=self.pipeline_step_id,
432
+ pipeline_step_version_id=self.pipeline_step_version_id,
433
+ page=1,
434
+ per_page=50,
435
+ )
436
+ logs = self.client.STUB.ListLogEntries(
437
+ logs_request, metadata=self.client.auth_helper.metadata
438
+ )
439
+
440
+ for log_entry in logs.log_entries:
441
+ if log_entry.url not in seen_logs:
442
+ seen_logs.add(log_entry.url)
443
+ log_entry_msg = re.sub(
444
+ r"(\\*)(\[[a-z#/@][^[]*?])",
445
+ lambda m: f"{m.group(1)}{m.group(1)}\\{m.group(2)}",
446
+ log_entry.message.strip(),
447
+ )
448
+ logger.info(log_entry_msg)
449
+
450
+ status = response.pipeline_step_version.status.code
451
+ if status in {
452
+ status_code_pb2.StatusCode.PIPELINE_STEP_READY,
453
+ status_code_pb2.StatusCode.PIPELINE_STEP_BUILDING_FAILED,
454
+ status_code_pb2.StatusCode.PIPELINE_STEP_BUILD_UNEXPECTED_ERROR,
455
+ status_code_pb2.StatusCode.INTERNAL_UNCATEGORIZED,
456
+ }:
457
+ if status == status_code_pb2.StatusCode.PIPELINE_STEP_READY:
458
+ logger.info("\nPipeline step build complete!")
459
+ logger.info(f"Build time elapsed {time.time() - st:.1f}s")
460
+ return True
461
+ else:
462
+ logger.error(
463
+ f"\nPipeline step build failed with status: {response.pipeline_step_version.status}"
464
+ )
465
+ return False
466
+ elif status != status_code_pb2.StatusCode.PIPELINE_STEP_BUILDING:
467
+ logger.error(
468
+ f"\nUnexpected status during pipeline step build: {response.pipeline_step_version.status}"
469
+ )
470
+ return False
471
+
472
+ time.sleep(interval_sec)
473
+
474
+ except Exception as e:
475
+ logger.error(f"Error monitoring pipeline step build: {e}")
476
+ return False
477
+
478
+ raise TimeoutError("Pipeline step build did not finish in time")
479
+
480
+
481
+ def upload_pipeline_step(folder, skip_dockerfile=False):
482
+ """
483
+ Upload a pipeline step to Clarifai.
484
+
485
+ :param folder: The folder containing the pipeline step files.
486
+ :param skip_dockerfile: If True, will not create a Dockerfile.
487
+ """
488
+ builder = PipelineStepBuilder(folder)
489
+
490
+ if not skip_dockerfile:
491
+ builder.create_dockerfile()
492
+
493
+ exists = builder.check_pipeline_step_exists()
494
+ if exists:
495
+ logger.info(
496
+ f"Pipeline step {builder.pipeline_step_id} already exists, this upload will create a new version for it."
497
+ )
498
+ else:
499
+ logger.info(
500
+ f"New pipeline step {builder.pipeline_step_id} will be created with its first version."
501
+ )
502
+
503
+ input("Press Enter to continue...")
504
+
505
+ success = builder.upload_pipeline_step_version()
506
+ if success:
507
+ logger.info("Pipeline step upload completed successfully!")
508
+ else:
509
+ logger.error("Pipeline step upload failed!")
510
+ sys.exit(1)
File without changes