npm - @aws/ml-container-creator - Versions diffs - 0.2.0 - Mend

@aws/ml-container-creator 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (143) hide show

package/LICENSE +202 -0
package/LICENSE-THIRD-PARTY +68620 -0
package/NOTICE +2 -0
package/README.md +106 -0
package/bin/cli.js +365 -0
package/config/defaults.json +32 -0
package/config/presets/transformers-djl.json +26 -0
package/config/presets/transformers-gpu.json +24 -0
package/config/presets/transformers-lmi.json +27 -0
package/package.json +129 -0
package/servers/README.md +419 -0
package/servers/base-image-picker/catalogs/model-servers.json +1191 -0
package/servers/base-image-picker/catalogs/python-slim.json +38 -0
package/servers/base-image-picker/catalogs/triton-backends.json +51 -0
package/servers/base-image-picker/catalogs/triton.json +38 -0
package/servers/base-image-picker/index.js +495 -0
package/servers/base-image-picker/manifest.json +17 -0
package/servers/base-image-picker/package.json +15 -0
package/servers/hyperpod-cluster-picker/LICENSE +202 -0
package/servers/hyperpod-cluster-picker/index.js +424 -0
package/servers/hyperpod-cluster-picker/manifest.json +14 -0
package/servers/hyperpod-cluster-picker/package.json +17 -0
package/servers/instance-recommender/LICENSE +202 -0
package/servers/instance-recommender/catalogs/instances.json +852 -0
package/servers/instance-recommender/index.js +284 -0
package/servers/instance-recommender/manifest.json +16 -0
package/servers/instance-recommender/package.json +15 -0
package/servers/lib/LICENSE +202 -0
package/servers/lib/bedrock-client.js +160 -0
package/servers/lib/custom-validators.js +46 -0
package/servers/lib/dynamic-resolver.js +36 -0
package/servers/lib/package.json +11 -0
package/servers/lib/schemas/image-catalog.schema.json +185 -0
package/servers/lib/schemas/instances.schema.json +124 -0
package/servers/lib/schemas/manifest.schema.json +64 -0
package/servers/lib/schemas/model-catalog.schema.json +91 -0
package/servers/lib/schemas/regions.schema.json +26 -0
package/servers/lib/schemas/triton-backends.schema.json +51 -0
package/servers/model-picker/catalogs/jumpstart-public.json +66 -0
package/servers/model-picker/catalogs/popular-diffusors.json +88 -0
package/servers/model-picker/catalogs/popular-transformers.json +226 -0
package/servers/model-picker/index.js +1693 -0
package/servers/model-picker/manifest.json +18 -0
package/servers/model-picker/package.json +20 -0
package/servers/region-picker/LICENSE +202 -0
package/servers/region-picker/catalogs/regions.json +263 -0
package/servers/region-picker/index.js +230 -0
package/servers/region-picker/manifest.json +16 -0
package/servers/region-picker/package.json +15 -0
package/src/app.js +1007 -0
package/src/copy-tpl.js +77 -0
package/src/lib/accelerator-validator.js +39 -0
package/src/lib/asset-manager.js +385 -0
package/src/lib/aws-profile-parser.js +181 -0
package/src/lib/bootstrap-command-handler.js +1647 -0
package/src/lib/bootstrap-config.js +238 -0
package/src/lib/ci-register-helpers.js +124 -0
package/src/lib/ci-report-helpers.js +158 -0
package/src/lib/ci-stage-helpers.js +268 -0
package/src/lib/cli-handler.js +529 -0
package/src/lib/comment-generator.js +544 -0
package/src/lib/community-reports-validator.js +91 -0
package/src/lib/config-manager.js +2106 -0
package/src/lib/configuration-exporter.js +204 -0
package/src/lib/configuration-manager.js +695 -0
package/src/lib/configuration-matcher.js +221 -0
package/src/lib/cpu-validator.js +36 -0
package/src/lib/cuda-validator.js +57 -0
package/src/lib/deployment-config-resolver.js +103 -0
package/src/lib/deployment-entry-schema.js +125 -0
package/src/lib/deployment-registry.js +598 -0
package/src/lib/docker-introspection-validator.js +51 -0
package/src/lib/engine-prefix-resolver.js +60 -0
package/src/lib/huggingface-client.js +172 -0
package/src/lib/key-value-parser.js +37 -0
package/src/lib/known-flags-validator.js +200 -0
package/src/lib/manifest-cli.js +280 -0
package/src/lib/mcp-client.js +303 -0
package/src/lib/mcp-command-handler.js +532 -0
package/src/lib/neuron-validator.js +80 -0
package/src/lib/parameter-schema-validator.js +284 -0
package/src/lib/prompt-runner.js +1349 -0
package/src/lib/prompts.js +1138 -0
package/src/lib/registry-command-handler.js +519 -0
package/src/lib/registry-loader.js +198 -0
package/src/lib/rocm-validator.js +80 -0
package/src/lib/schema-validator.js +157 -0
package/src/lib/sensitive-redactor.js +59 -0
package/src/lib/template-engine.js +156 -0
package/src/lib/template-manager.js +341 -0
package/src/lib/validation-engine.js +314 -0
package/src/prompt-adapter.js +63 -0
package/templates/Dockerfile +300 -0
package/templates/IAM_PERMISSIONS.md +84 -0
package/templates/MIGRATION.md +488 -0
package/templates/PROJECT_README.md +439 -0
package/templates/TEMPLATE_SYSTEM.md +243 -0
package/templates/buildspec.yml +64 -0
package/templates/code/chat_template.jinja +1 -0
package/templates/code/flask/gunicorn_config.py +35 -0
package/templates/code/flask/wsgi.py +10 -0
package/templates/code/model_handler.py +387 -0
package/templates/code/serve +300 -0
package/templates/code/serve.py +175 -0
package/templates/code/serving.properties +105 -0
package/templates/code/start_server.py +39 -0
package/templates/code/start_server.sh +39 -0
package/templates/diffusors/Dockerfile +72 -0
package/templates/diffusors/patch_image_api.py +35 -0
package/templates/diffusors/serve +115 -0
package/templates/diffusors/start_server.sh +114 -0
package/templates/do/.gitkeep +1 -0
package/templates/do/README.md +541 -0
package/templates/do/build +83 -0
package/templates/do/ci +681 -0
package/templates/do/clean +811 -0
package/templates/do/config +260 -0
package/templates/do/deploy +1560 -0
package/templates/do/export +306 -0
package/templates/do/logs +319 -0
package/templates/do/manifest +12 -0
package/templates/do/push +119 -0
package/templates/do/register +580 -0
package/templates/do/run +113 -0
package/templates/do/submit +417 -0
package/templates/do/test +1147 -0
package/templates/hyperpod/configmap.yaml +24 -0
package/templates/hyperpod/deployment.yaml +71 -0
package/templates/hyperpod/pvc.yaml +42 -0
package/templates/hyperpod/service.yaml +17 -0
package/templates/nginx-diffusors.conf +74 -0
package/templates/nginx-predictors.conf +47 -0
package/templates/nginx-tensorrt.conf +74 -0
package/templates/requirements.txt +61 -0
package/templates/sample_model/test_inference.py +123 -0
package/templates/sample_model/train_abalone.py +252 -0
package/templates/test/test_endpoint.sh +79 -0
package/templates/test/test_local_image.sh +80 -0
package/templates/test/test_model_handler.py +180 -0
package/templates/triton/Dockerfile +128 -0
package/templates/triton/config.pbtxt +163 -0
package/templates/triton/model.py +130 -0
package/templates/triton/requirements.txt +11 -0

package/templates/sample_model/train_abalone.py ADDED Viewed

@@ -0,0 +1,252 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+import os
+import ssl
+import numpy as np
+# Handle SSL certificate issues
+try:
+    import certifi
+    ssl._create_default_https_context = ssl._create_unverified_context
+except ImportError:
+    # If certifi is not available, disable SSL verification as fallback
+    ssl._create_default_https_context = ssl._create_unverified_context
+<% if (architecture === 'triton') { %>
+<% if (backend === 'fil' && (modelFormat === 'xgboost_json' || modelFormat === 'xgboost_ubj')) { %>
+try:
+    import xgboost as xgb
+except ImportError:
+    print("Error: xgboost is required. Install dependencies with: pip install -r requirements.txt")
+    raise
+<% } else if (backend === 'fil' && modelFormat === 'lightgbm_txt') { %>
+try:
+    import lightgbm as lgb
+except ImportError:
+    print("Error: lightgbm is required. Install dependencies with: pip install -r requirements.txt")
+    raise
+<% } else if (backend === 'onnxruntime') { %>
+try:
+    from sklearn.ensemble import RandomForestRegressor
+    from sklearn.model_selection import train_test_split
+    from skl2onnx import convert_sklearn
+    from skl2onnx.common.data_types import FloatTensorType
+    import onnx
+except ImportError:
+    print("Error: scikit-learn, skl2onnx, and onnx are required. Install dependencies with: pip install -r requirements.txt")
+    raise
+<% } else if (backend === 'tensorflow') { %>
+try:
+    import tensorflow as tf
+except ImportError:
+    print("Error: tensorflow is required. Install dependencies with: pip install -r requirements.txt")
+    raise
+<% } else if (backend === 'python') { %>
+try:
+    from sklearn.ensemble import RandomForestRegressor
+    from sklearn.model_selection import train_test_split
+    import pickle
+<% if (modelFormat === 'joblib') { %>
+    import joblib
+<% } %>
+except ImportError:
+    print("Error: scikit-learn is required. Install dependencies with: pip install -r requirements.txt")
+    raise
+<% } %>
+<% } else { %>
+<% const effectiveFramework = engine || framework; %>
+<% if (effectiveFramework === 'sklearn') { %>from sklearn.ensemble import RandomForestRegressor
+from sklearn.model_selection import train_test_split
+<% if (modelFormat === 'joblib') { %>import joblib
+<% } else if (modelFormat === 'pkl') { %>import pickle
+<% } %>
+<% } else if (effectiveFramework === 'xgboost' || effectiveFramework === 'tensorflow') { %>
+<% if (effectiveFramework === 'xgboost') { %>import xgboost as xgb<% } %>
+<% if (effectiveFramework === 'tensorflow') { %>import tensorflow as tf<% } %>
+def train_test_split(X, y, test_size=0.2, random_state=None):
+    if random_state is not None:
+        np.random.seed(random_state)
+    n_samples = len(X)
+    n_test = int(n_samples * test_size)
+    indices = np.random.permutation(n_samples)
+    test_indices = indices[:n_test]
+    train_indices = indices[n_test:]
+    return X.iloc[train_indices], X.iloc[test_indices], y[train_indices], y[test_indices]
+<% } %>
+<% } %>
+from ucimlrepo import fetch_ucirepo
+<% if (architecture === 'triton' && (backend === 'fil' || backend === 'tensorflow')) { %>
+def train_test_split(X, y, test_size=0.2, random_state=None):
+    if random_state is not None:
+        np.random.seed(random_state)
+    n_samples = len(X)
+    n_test = int(n_samples * test_size)
+    indices = np.random.permutation(n_samples)
+    test_indices = indices[:n_test]
+    train_indices = indices[n_test:]
+    return X.iloc[train_indices], X.iloc[test_indices], y[train_indices], y[test_indices]
+<% } %>
+try:
+    abalone = fetch_ucirepo(id=1)
+    X = abalone.data.features.copy()
+    y = abalone.data.targets.values.ravel()
+except Exception as e:
+    print(f"Warning: Could not download Abalone dataset from UCI repository: {e}")
+    print("Creating synthetic data for demonstration...")
+    # Create synthetic abalone-like data
+    np.random.seed(42)
+    n_samples = 4177  # Same as original dataset
+    # Create synthetic features similar to abalone dataset
+    # Features: Sex, Length, Diameter, Height, Whole weight, Shucked weight, Viscera weight, Shell weight
+    X = np.random.rand(n_samples, 8)
+    X[:, 0] = np.random.choice([0, 1, 2], n_samples)  # Sex (M=0, F=1, I=2)
+    X[:, 1:] = X[:, 1:] * np.array([0.815, 0.650, 0.265, 2.826, 1.488, 0.760, 1.005])  # Scale to realistic ranges
+    # Create synthetic target (rings/age)
+    y = (X[:, 1] * 10 + X[:, 4] * 5 + np.random.normal(0, 2, n_samples)).astype(int)
+    y = np.clip(y, 1, 29)  # Clip to realistic range
+    # Convert to DataFrame-like structure for compatibility
+    import pandas as pd
+    feature_names = ['Sex', 'Length', 'Diameter', 'Height', 'Whole_weight', 'Shucked_weight', 'Viscera_weight', 'Shell_weight']
+    X = pd.DataFrame(X, columns=feature_names)
+# Encode Sex column if it's not already numeric
+if hasattr(X, 'dtypes') and X['Sex'].dtype == 'object':
+    # Encode Sex column (M=0, F=1, I=2)
+    sex_map = {'M': 0, 'F': 1, 'I': 2}
+    X['Sex'] = X['Sex'].map(sex_map)
+# Split data
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+<% if (architecture === 'triton') { %>
+<% if (backend === 'fil' && (modelFormat === 'xgboost_json' || modelFormat === 'xgboost_ubj')) { %>
+# Train XGBoost model
+model = xgb.XGBRegressor(n_estimators=100, random_state=42)
+model.fit(X_train, y_train)
+# Evaluate
+test_score = model.score(X_test, y_test)
+print(f"Model trained. Test score: {test_score:.3f}")
+<% } else if (backend === 'fil' && modelFormat === 'lightgbm_txt') { %>
+# Train LightGBM model
+model = lgb.LGBMRegressor(n_estimators=100, random_state=42, verbose=-1)
+model.fit(X_train, y_train)
+# Evaluate
+test_score = model.score(X_test, y_test)
+print(f"Model trained. Test score: {test_score:.3f}")
+<% } else if (backend === 'onnxruntime') { %>
+# Train sklearn model
+model = RandomForestRegressor(n_estimators=100, random_state=42)
+model.fit(X_train, y_train)
+print(f"Model trained. Test score: {model.score(X_test, y_test):.3f}")
+# Convert to ONNX format
+initial_type = [('float_input', FloatTensorType([None, X_train.shape[1]]))]
+onnx_model = convert_sklearn(model, 'abalone_model', initial_types=initial_type)
+<% } else if (backend === 'tensorflow') { %>
+# Train TensorFlow model
+model = tf.keras.Sequential([
+    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
+    tf.keras.layers.Dense(32, activation='relu'),
+    tf.keras.layers.Dense(1)
+])
+model.compile(optimizer='adam', loss='mse', metrics=['mae'])
+# Train model
+model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=0)
+# Calculate test score
+test_loss, test_mae = model.evaluate(X_test, y_test, verbose=0)
+print(f"Model trained. Test MAE: {test_mae:.3f}")
+<% } else if (backend === 'python') { %>
+# Train sklearn model
+model = RandomForestRegressor(n_estimators=100, random_state=42)
+model.fit(X_train, y_train)
+print(f"Model trained. Test score: {model.score(X_test, y_test):.3f}")
+<% } %>
+<% } else { %>
+<% const effectiveFramework = engine || framework; %>
+<% if (effectiveFramework === 'sklearn') { %># Train sklearn model
+model = RandomForestRegressor(n_estimators=100, random_state=42)
+model.fit(X_train, y_train)
+print(f"Model trained and saved. Test score: {model.score(X_test, y_test):.3f}")
+<% } else if (effectiveFramework === 'xgboost') { %># Train XGBoost model
+model = xgb.XGBRegressor(n_estimators=100, random_state=42)
+model.fit(X_train, y_train)
+# Evaluate
+test_score = model.score(X_test, y_test)
+print(f"Model trained. Test score: {test_score:.3f}")
+<% } else if (effectiveFramework === 'tensorflow') { %># Train TensorFlow model
+# Create TensorFlow model
+model = tf.keras.Sequential([
+    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
+    tf.keras.layers.Dense(32, activation='relu'),
+    tf.keras.layers.Dense(1)
+])
+model.compile(optimizer='adam', loss='mse', metrics=['mae'])
+# Train model
+model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=0)
+# Calculate test score
+test_loss, test_mae = model.evaluate(X_test, y_test, verbose=0)
+print(f"Model trained and saved. Test MAE: {test_mae:.3f}")
+<% } %>
+<% } %>
+# Save model
+# Get the directory where this script is located
+script_dir = os.path.dirname(os.path.abspath(__file__))
+<% if (architecture === 'triton') { %>
+<% if (backend === 'fil' && modelFormat === 'xgboost_json') { %>model.save_model(os.path.join(script_dir, 'abalone_model.json'))
+<% } else if (backend === 'fil' && modelFormat === 'xgboost_ubj') { %>model.save_model(os.path.join(script_dir, 'abalone_model.ubj'))
+<% } else if (backend === 'fil' && modelFormat === 'lightgbm_txt') { %>model.booster_.save_model(os.path.join(script_dir, 'abalone_model.txt'))
+<% } else if (backend === 'onnxruntime') { %>onnx.save_model(onnx_model, os.path.join(script_dir, 'abalone_model.onnx'))
+<% } else if (backend === 'tensorflow') { %>model.export(os.path.join(script_dir, 'abalone_model.savedmodel'))
+<% } else if (backend === 'python' && modelFormat === 'pkl') { %>
+with open(os.path.join(script_dir, 'abalone_model.pkl'), 'wb') as f:
+    pickle.dump(model, f)
+<% } else if (backend === 'python' && modelFormat === 'joblib') { %>joblib.dump(model, os.path.join(script_dir, 'abalone_model.joblib'))
+<% } else if (backend === 'python') { %>
+# Custom format: defaulting to pickle serialization
+with open(os.path.join(script_dir, 'abalone_model.pkl'), 'wb') as f:
+    pickle.dump(model, f)
+<% } %>
+<% } else { %>
+<% if (modelFormat === 'joblib') { %>joblib.dump(model, os.path.join(script_dir, 'abalone_model.joblib'))
+<% } else if (modelFormat === 'pkl') { -%>
+with open(os.path.join(script_dir, 'abalone_model.pkl'), 'wb') as f:
+    pickle.dump(model, f)
+<% } else if (modelFormat === 'json') { %>model.save_model(os.path.join(script_dir, 'abalone_model.json'))
+<% } else if (modelFormat === 'model') { %>model.save_model(os.path.join(script_dir, 'abalone_model.model'))
+<% } else if (modelFormat === 'ubj') { %>model.save_model(os.path.join(script_dir, 'abalone_model.ubj'))
+<% } else if (modelFormat === 'h5') { %>model.save(os.path.join(script_dir, 'abalone_model.h5'))
+<% } else if (modelFormat === 'keras') { %>model.save(os.path.join(script_dir, 'abalone_model.keras'))
+<% } else if (modelFormat === 'SavedModel') { %>model.export(os.path.join(script_dir, 'abalone_model'))
+<% } %>
+<% } %>
+print("Model saved.")

package/templates/test/test_endpoint.sh ADDED Viewed

@@ -0,0 +1,79 @@
+#!/bin/bash
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+# Exit on any error
+set -e
+<% if (framework !== 'transformers') { %>
+# Check if endpoint name is provided
+if [ $# -ne 1 ]; then
+    echo "Usage: $0 <endpoint-name>"
+    echo "Example: $0 <%= framework %>-endpoint-1234567890"
+    exit 1
+fi
+ENDPOINT_NAME=$1
+AWS_REGION="us-east-1"
+<% } else { %>
+if [ $# -ne 2 ]; then
+    echo "Usage: $0 <endpoint-name> <model-id>"
+    echo "Example: $0 <%= framework %>-endpoint-1234567890"
+    exit 1
+fi
+ENDPOINT_NAME=$1
+MODEL_ID=$2
+AWS_REGION="us-east-1"
+<% } %>
+echo "Testing SageMaker endpoint: ${ENDPOINT_NAME}"
+echo "Checking endpoint status..."
+aws sagemaker describe-endpoint --endpoint-name ${ENDPOINT_NAME} --region ${AWS_REGION} --query 'EndpointStatus' --output text
+echo "Testing inference endpoint..."
+<% if (framework !== 'transformers') { %>
+echo '{"instances": [[1, 0.455, 0.365, 0.095, 0.514, 0.2245, 0.101, 0.15]]}' > input.json
+<% } else {%>
+cat > input.json << EOF
+{
+  "model": "${MODEL_ID}",
+  "messages": [
+    {
+      "role": "user",
+      "content": "Hello, how are you?"
+    }
+  ],
+  "max_tokens": 100,
+  "temperature": 0.7
+}
+EOF
+<% } %>
+aws sagemaker-runtime invoke-endpoint \
+    --endpoint-name ${ENDPOINT_NAME} \
+    --region ${AWS_REGION} \
+    --content-type 'application/json' \
+    --body fileb://input.json \
+    response.json
+echo "Response:"
+if command -v jq &> /dev/null; then
+    # Decode base64 if response is encoded
+    jq -r '.Body // .' response.json 2>/dev/null || cat response.json
+else
+    cat response.json
+fi
+echo
+echo "Cleaning up files..."
+rm -f response.json input.json
+echo "Test complete!"

package/templates/test/test_local_image.sh ADDED Viewed

@@ -0,0 +1,80 @@
+#!/bin/bash
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+<% if (framework !== 'transformers') { %>
+# Exit on any error
+set -e
+IMAGE_NAME="<%= projectName %>"
+CONTAINER_NAME="<%= framework %>-test"
+PORT=8080
+echo "Building Docker image..."
+docker build -t ${IMAGE_NAME} .
+echo "Stopping any existing container..."
+docker stop ${CONTAINER_NAME} 2>/dev/null || true
+docker rm ${CONTAINER_NAME} 2>/dev/null || true
+echo "Starting container on port ${PORT}..."
+docker run -d --name ${CONTAINER_NAME} -p ${PORT}:8080 ${IMAGE_NAME}
+echo "Waiting for container to start..."
+sleep 10
+echo "Testing health check endpoint..."
+curl -f http://localhost:${PORT}/ping || echo "Health check failed"
+echo -e "\nTesting inference endpoint..."
+curl -X POST http://localhost:${PORT}/invocations \
+  -H "Content-Type: application/json" \
+  -d '{"instances": [[1, 0.455, 0.365, 0.095, 0.514, 0.2245, 0.101, 0.15]]}' || echo "Inference failed"
+echo -e "\nContainer logs:"
+docker logs ${CONTAINER_NAME}
+echo -e "\nCleaning up..."
+docker stop ${CONTAINER_NAME}
+docker rm ${CONTAINER_NAME}
+echo "Test complete!"
+<% } else
+  {%><%if (modelServer !== 'vllm') { %>
+# Exit on any error
+set -e
+IMAGE_NAME="<%= projectName %>"
+CONTAINER_NAME="<%= framework %>-test"
+PORT=8080
+echo "Building Docker image..."
+docker build \
+  --build-arg MODEL=<%= modelName %> \
+  --build-arg MODEL_NAME=<%= projectName %>.<%= modelName %> \
+  --platform=linux/amd64 \
+  -t ${IMAGE_NAME} \
+  .
+echo "Stopping any existing container..."
+docker stop ${CONTAINER_NAME} 2>/dev/null || true
+docker rm ${CONTAINER_NAME} 2>/dev/null || true
+echo "Starting container on port ${PORT}..."
+docker run -d --name ${CONTAINER_NAME} -p ${PORT}:8080 ${IMAGE_NAME}
+echo "Waiting for container to start..."
+sleep 10
+echo "Testing health check endpoint..."
+curl -f http://localhost:${PORT}/ping || echo "Health check failed"
+echo -e "\nContainer logs:"
+docker logs ${CONTAINER_NAME}
+echo -e "\nCleaning up..."
+docker stop ${CONTAINER_NAME}
+docker rm ${CONTAINER_NAME}
+echo "Test complete!"
+<% } %><% } %>

package/templates/test/test_model_handler.py ADDED Viewed

@@ -0,0 +1,180 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+<% if (framework === 'sglang') { %>
+#!/usr/bin/env python3
+"""
+Local testing script for SGLang models
+This script allows you to test your SGLang model locally before containerizing.
+Unlike serve.py (which runs a production HTTP server), this is a CLI tool
+for development and debugging.
+Usage examples:
+  # Test with text input
+  python test_model_handler.py --input-data '"Hello, how are you?"'
+  # Test with SageMaker format
+  python test_model_handler.py --input-data '{"instances": ["Hello, world!", "How are you?"]}'
+  # Custom model
+  python test_model_handler.py --model-id microsoft/DialoGPT-small --input-data '"Hello!"'
+This is NOT used in production - serve.py handles containerized inference.
+"""
+import json
+import argparse
+import sys
+import os
+import asyncio
+from sglang import Runtime
+def usage():
+    """Print usage examples and exit"""
+    print("\nSGLANG Model Handler Test Tool")
+    print("=" * 40)
+    print("\nUsage examples:")
+    print("  # Basic test with text input:")
+    print('  python test_model_handler.py --input-data \'"Hello, how are you?"\'')
+    print("\n  # SageMaker format:")
+    print('  python test_model_handler.py --input-data \'{"instances": ["Hello!", "How are you?"]}\'')
+    print("\n  # Custom model:")
+    print('  python test_model_handler.py --model-id microsoft/DialoGPT-small --input-data \'"Hello!"\'')
+    print("\n  # Show this help:")
+    print("  python test_model_handler.py --help")
+    print("\nNote: This is for local testing only. Production uses serve.py in containers.\n")
+    sys.exit(0)
+async def main():
+    parser = argparse.ArgumentParser(
+        description='Local CLI tool for testing SGLang model inference',
+        epilog='Use --usage for detailed examples'
+    )
+    parser.add_argument('--model-id', type=str, default='<%= model || "microsoft/DialoGPT-medium" %>',
+                        help='Model ID to load (default: <%= model || "microsoft/DialoGPT-medium" %>)')
+    parser.add_argument('--input-data', type=str,
+                        help='Input data as JSON string')
+    parser.add_argument('--usage', action='store_true',
+                        help='Show detailed usage examples')
+    args = parser.parse_args()
+    if args.usage:
+        usage()
+    if not args.input_data:
+        print("Error: --input-data is required")
+        print("Use --usage for examples or --help for options")
+        sys.exit(1)
+    print(f"Loading SGLang model: {args.model_id}")
+    runtime = Runtime(
+        model_path=args.model_id,
+        tokenizer_path=args.model_id,
+        device="cuda" if os.environ.get("CUDA_VISIBLE_DEVICES") else "cpu",
+        mem_fraction_static=0.8
+    )
+    try:
+        input_data = json.loads(args.input_data)
+    except json.JSONDecodeError:
+        input_data = args.input_data
+    # Extract prompts
+    if isinstance(input_data, dict):
+        prompts = input_data.get('instances', input_data.get('inputs', [input_data]))
+    else:
+        prompts = [input_data]
+    print("Running inference...")
+    outputs = runtime.generate(prompts)
+    result = {'predictions': outputs}
+    print("\nResult:")
+    print(json.dumps(result, indent=2))
+if __name__ == '__main__':
+    asyncio.run(main())
+<% } else { %>
+#!/usr/bin/env python3
+"""
+Local testing script for <%= framework %> models
+This script allows you to test your model locally before containerizing.
+Unlike serve.py (which runs a production HTTP server), this is a CLI tool
+for development and debugging.
+Usage examples:
+  # Test with array input
+  python test_model_handler.py --input-data '[[1,2,3,4]]'
+  # Test with SageMaker format
+  python test_model_handler.py --input-data '{"instances": [[1, 0.455, 0.365, 0.095, 0.514, 0.2245, 0.101, 0.15]]}'
+  # Custom model path
+  python test_model_handler.py --model-path ./ --input-data '[[1, 0.455, 0.365, 0.095, 0.514, 0.2245, 0.101, 0.15]]'
+This is NOT used in production - serve.py handles containerized inference.
+"""
+import json
+import argparse
+import sys
+import os
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'code'))
+from model_handler import ModelHandler
+def usage():
+    """Print usage examples and exit"""
+    print("\n<%= framework.toUpperCase() %> Model Handler Test Tool")
+    print("=" * 40)
+    print("\nUsage examples:")
+    print("  # Basic test with array input:")
+    print("  python test_model_handler.py --input-data '[[1, 0.455, 0.365, 0.095, 0.514, 0.2245, 0.101, 0.15]]'")
+    print("\n  # SageMaker format:")
+    print("  python test_model_handler.py --input-data '{\"instances\": [[1, 0.455, 0.365, 0.095, 0.514, 0.2245, 0.101, 0.15]]}'")
+    print("\n  # Custom model path:")
+    print("  python test_model_handler.py --model-path ../sample_model --input-data '[[1, 0.455, 0.365, 0.095, 0.514, 0.2245, 0.101, 0.15]]'")
+    print("\n  # Show this help:")
+    print("  python test_model_handler.py --help")
+    print("\nNote: This is for local testing only. Production uses serve.py in containers.\n")
+    sys.exit(0)
+def main():
+    parser = argparse.ArgumentParser(
+        description='Local CLI tool for testing <%= framework %> model inference',
+        epilog='Use --usage for detailed examples'
+    )
+    parser.add_argument('--model-path', type=str, default='sample_model',
+                        help='Path to model directory (default: sample_model)')
+    parser.add_argument('--input-data', type=str,
+                        help='Input data as application/json string')
+    parser.add_argument('--usage', action='store_true',
+                        help='Show detailed usage examples')
+    args = parser.parse_args()
+    if args.usage:
+        usage()
+    if not args.input_data:
+        print("Error: --input-data is required")
+        print("Use --usage for examples or --help for options")
+        sys.exit(1)
+    print(f"Loading model from: {args.model_path}")
+    handler = ModelHandler(args.model_path)
+    handler.load_model()
+    try:
+        input_data = json.loads(args.input_data)
+    except json.JSONDecodeError:
+        input_data = args.input_data
+    print("Running inference...")
+    result = handler.predict(input_data)
+    print("\nResult:")
+    print(json.dumps(result, indent=2))
+if __name__ == '__main__':
+    main()
+<% } %>