llama-stack 0.4.3__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. llama_stack/distributions/dell/doc_template.md +209 -0
  2. llama_stack/distributions/meta-reference-gpu/doc_template.md +119 -0
  3. llama_stack/distributions/nvidia/doc_template.md +170 -0
  4. llama_stack/distributions/oci/doc_template.md +140 -0
  5. llama_stack/models/llama/llama3/dog.jpg +0 -0
  6. llama_stack/models/llama/llama3/pasta.jpeg +0 -0
  7. llama_stack/models/llama/resources/dog.jpg +0 -0
  8. llama_stack/models/llama/resources/pasta.jpeg +0 -0
  9. llama_stack/models/llama/resources/small_dog.jpg +0 -0
  10. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +136 -11
  11. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h +9 -0
  12. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift +189 -0
  13. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift +238 -0
  14. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift +12 -0
  15. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift +89 -0
  16. llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj +550 -0
  17. llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
  18. llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +8 -0
  19. llama_stack/providers/remote/datasetio/nvidia/README.md +74 -0
  20. llama_stack/providers/remote/eval/nvidia/README.md +134 -0
  21. llama_stack/providers/remote/files/s3/README.md +266 -0
  22. llama_stack/providers/remote/inference/nvidia/NVIDIA.md +203 -0
  23. llama_stack/providers/remote/post_training/nvidia/README.md +151 -0
  24. llama_stack/providers/remote/safety/nvidia/README.md +78 -0
  25. llama_stack/providers/utils/responses/responses_store.py +34 -0
  26. {llama_stack-0.4.3.dist-info → llama_stack-0.4.4.dist-info}/METADATA +2 -2
  27. {llama_stack-0.4.3.dist-info → llama_stack-0.4.4.dist-info}/RECORD +31 -142
  28. llama_stack-0.4.4.dist-info/top_level.txt +1 -0
  29. llama_stack-0.4.3.dist-info/top_level.txt +0 -2
  30. llama_stack_api/__init__.py +0 -945
  31. llama_stack_api/admin/__init__.py +0 -45
  32. llama_stack_api/admin/api.py +0 -72
  33. llama_stack_api/admin/fastapi_routes.py +0 -117
  34. llama_stack_api/admin/models.py +0 -113
  35. llama_stack_api/agents.py +0 -173
  36. llama_stack_api/batches/__init__.py +0 -40
  37. llama_stack_api/batches/api.py +0 -53
  38. llama_stack_api/batches/fastapi_routes.py +0 -113
  39. llama_stack_api/batches/models.py +0 -78
  40. llama_stack_api/benchmarks/__init__.py +0 -43
  41. llama_stack_api/benchmarks/api.py +0 -39
  42. llama_stack_api/benchmarks/fastapi_routes.py +0 -109
  43. llama_stack_api/benchmarks/models.py +0 -109
  44. llama_stack_api/common/__init__.py +0 -5
  45. llama_stack_api/common/content_types.py +0 -101
  46. llama_stack_api/common/errors.py +0 -95
  47. llama_stack_api/common/job_types.py +0 -38
  48. llama_stack_api/common/responses.py +0 -77
  49. llama_stack_api/common/training_types.py +0 -47
  50. llama_stack_api/common/type_system.py +0 -146
  51. llama_stack_api/connectors.py +0 -146
  52. llama_stack_api/conversations.py +0 -270
  53. llama_stack_api/datasetio.py +0 -55
  54. llama_stack_api/datasets/__init__.py +0 -61
  55. llama_stack_api/datasets/api.py +0 -35
  56. llama_stack_api/datasets/fastapi_routes.py +0 -104
  57. llama_stack_api/datasets/models.py +0 -152
  58. llama_stack_api/datatypes.py +0 -373
  59. llama_stack_api/eval.py +0 -137
  60. llama_stack_api/file_processors/__init__.py +0 -27
  61. llama_stack_api/file_processors/api.py +0 -64
  62. llama_stack_api/file_processors/fastapi_routes.py +0 -78
  63. llama_stack_api/file_processors/models.py +0 -42
  64. llama_stack_api/files/__init__.py +0 -35
  65. llama_stack_api/files/api.py +0 -51
  66. llama_stack_api/files/fastapi_routes.py +0 -124
  67. llama_stack_api/files/models.py +0 -107
  68. llama_stack_api/inference.py +0 -1169
  69. llama_stack_api/inspect_api/__init__.py +0 -37
  70. llama_stack_api/inspect_api/api.py +0 -25
  71. llama_stack_api/inspect_api/fastapi_routes.py +0 -76
  72. llama_stack_api/inspect_api/models.py +0 -28
  73. llama_stack_api/internal/__init__.py +0 -9
  74. llama_stack_api/internal/kvstore.py +0 -28
  75. llama_stack_api/internal/sqlstore.py +0 -81
  76. llama_stack_api/llama_stack_api/__init__.py +0 -945
  77. llama_stack_api/llama_stack_api/admin/__init__.py +0 -45
  78. llama_stack_api/llama_stack_api/admin/api.py +0 -72
  79. llama_stack_api/llama_stack_api/admin/fastapi_routes.py +0 -117
  80. llama_stack_api/llama_stack_api/admin/models.py +0 -113
  81. llama_stack_api/llama_stack_api/agents.py +0 -173
  82. llama_stack_api/llama_stack_api/batches/__init__.py +0 -40
  83. llama_stack_api/llama_stack_api/batches/api.py +0 -53
  84. llama_stack_api/llama_stack_api/batches/fastapi_routes.py +0 -113
  85. llama_stack_api/llama_stack_api/batches/models.py +0 -78
  86. llama_stack_api/llama_stack_api/benchmarks/__init__.py +0 -43
  87. llama_stack_api/llama_stack_api/benchmarks/api.py +0 -39
  88. llama_stack_api/llama_stack_api/benchmarks/fastapi_routes.py +0 -109
  89. llama_stack_api/llama_stack_api/benchmarks/models.py +0 -109
  90. llama_stack_api/llama_stack_api/common/__init__.py +0 -5
  91. llama_stack_api/llama_stack_api/common/content_types.py +0 -101
  92. llama_stack_api/llama_stack_api/common/errors.py +0 -95
  93. llama_stack_api/llama_stack_api/common/job_types.py +0 -38
  94. llama_stack_api/llama_stack_api/common/responses.py +0 -77
  95. llama_stack_api/llama_stack_api/common/training_types.py +0 -47
  96. llama_stack_api/llama_stack_api/common/type_system.py +0 -146
  97. llama_stack_api/llama_stack_api/connectors.py +0 -146
  98. llama_stack_api/llama_stack_api/conversations.py +0 -270
  99. llama_stack_api/llama_stack_api/datasetio.py +0 -55
  100. llama_stack_api/llama_stack_api/datasets/__init__.py +0 -61
  101. llama_stack_api/llama_stack_api/datasets/api.py +0 -35
  102. llama_stack_api/llama_stack_api/datasets/fastapi_routes.py +0 -104
  103. llama_stack_api/llama_stack_api/datasets/models.py +0 -152
  104. llama_stack_api/llama_stack_api/datatypes.py +0 -373
  105. llama_stack_api/llama_stack_api/eval.py +0 -137
  106. llama_stack_api/llama_stack_api/file_processors/__init__.py +0 -27
  107. llama_stack_api/llama_stack_api/file_processors/api.py +0 -64
  108. llama_stack_api/llama_stack_api/file_processors/fastapi_routes.py +0 -78
  109. llama_stack_api/llama_stack_api/file_processors/models.py +0 -42
  110. llama_stack_api/llama_stack_api/files/__init__.py +0 -35
  111. llama_stack_api/llama_stack_api/files/api.py +0 -51
  112. llama_stack_api/llama_stack_api/files/fastapi_routes.py +0 -124
  113. llama_stack_api/llama_stack_api/files/models.py +0 -107
  114. llama_stack_api/llama_stack_api/inference.py +0 -1169
  115. llama_stack_api/llama_stack_api/inspect_api/__init__.py +0 -37
  116. llama_stack_api/llama_stack_api/inspect_api/api.py +0 -25
  117. llama_stack_api/llama_stack_api/inspect_api/fastapi_routes.py +0 -76
  118. llama_stack_api/llama_stack_api/inspect_api/models.py +0 -28
  119. llama_stack_api/llama_stack_api/internal/__init__.py +0 -9
  120. llama_stack_api/llama_stack_api/internal/kvstore.py +0 -28
  121. llama_stack_api/llama_stack_api/internal/sqlstore.py +0 -81
  122. llama_stack_api/llama_stack_api/models.py +0 -171
  123. llama_stack_api/llama_stack_api/openai_responses.py +0 -1468
  124. llama_stack_api/llama_stack_api/post_training.py +0 -370
  125. llama_stack_api/llama_stack_api/prompts.py +0 -203
  126. llama_stack_api/llama_stack_api/providers/__init__.py +0 -33
  127. llama_stack_api/llama_stack_api/providers/api.py +0 -16
  128. llama_stack_api/llama_stack_api/providers/fastapi_routes.py +0 -57
  129. llama_stack_api/llama_stack_api/providers/models.py +0 -24
  130. llama_stack_api/llama_stack_api/py.typed +0 -0
  131. llama_stack_api/llama_stack_api/rag_tool.py +0 -168
  132. llama_stack_api/llama_stack_api/resource.py +0 -37
  133. llama_stack_api/llama_stack_api/router_utils.py +0 -160
  134. llama_stack_api/llama_stack_api/safety.py +0 -132
  135. llama_stack_api/llama_stack_api/schema_utils.py +0 -208
  136. llama_stack_api/llama_stack_api/scoring.py +0 -93
  137. llama_stack_api/llama_stack_api/scoring_functions.py +0 -211
  138. llama_stack_api/llama_stack_api/shields.py +0 -93
  139. llama_stack_api/llama_stack_api/tools.py +0 -226
  140. llama_stack_api/llama_stack_api/vector_io.py +0 -941
  141. llama_stack_api/llama_stack_api/vector_stores.py +0 -53
  142. llama_stack_api/llama_stack_api/version.py +0 -9
  143. llama_stack_api/models.py +0 -171
  144. llama_stack_api/openai_responses.py +0 -1468
  145. llama_stack_api/post_training.py +0 -370
  146. llama_stack_api/prompts.py +0 -203
  147. llama_stack_api/providers/__init__.py +0 -33
  148. llama_stack_api/providers/api.py +0 -16
  149. llama_stack_api/providers/fastapi_routes.py +0 -57
  150. llama_stack_api/providers/models.py +0 -24
  151. llama_stack_api/py.typed +0 -0
  152. llama_stack_api/rag_tool.py +0 -168
  153. llama_stack_api/resource.py +0 -37
  154. llama_stack_api/router_utils.py +0 -160
  155. llama_stack_api/safety.py +0 -132
  156. llama_stack_api/schema_utils.py +0 -208
  157. llama_stack_api/scoring.py +0 -93
  158. llama_stack_api/scoring_functions.py +0 -211
  159. llama_stack_api/shields.py +0 -93
  160. llama_stack_api/tools.py +0 -226
  161. llama_stack_api/vector_io.py +0 -941
  162. llama_stack_api/vector_stores.py +0 -53
  163. llama_stack_api/version.py +0 -9
  164. {llama_stack-0.4.3.dist-info → llama_stack-0.4.4.dist-info}/WHEEL +0 -0
  165. {llama_stack-0.4.3.dist-info → llama_stack-0.4.4.dist-info}/entry_points.txt +0 -0
  166. {llama_stack-0.4.3.dist-info → llama_stack-0.4.4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,74 @@
1
+ # NVIDIA DatasetIO Provider for LlamaStack
2
+
3
+ This provider enables dataset management using NVIDIA's NeMo Customizer service.
4
+
5
+ ## Features
6
+
7
+ - Register datasets for fine-tuning LLMs
8
+ - Unregister datasets
9
+
10
+ ## Getting Started
11
+
12
+ ### Prerequisites
13
+
14
+ - LlamaStack with NVIDIA configuration
15
+ - Access to Hosted NVIDIA NeMo Microservice
16
+ - API key for authentication with the NVIDIA service
17
+
18
+ ### Setup
19
+
20
+ Build the NVIDIA environment:
21
+
22
+ ```bash
23
+ uv pip install llama-stack-client
24
+ uv run llama stack list-deps nvidia | xargs -L1 uv pip install
25
+ ```
26
+
27
+ ### Basic Usage using the LlamaStack Python Client
28
+
29
+ #### Initialize the client
30
+
31
+ ```python
32
+ import os
33
+
34
+ os.environ["NVIDIA_API_KEY"] = "your-api-key"
35
+ os.environ["NVIDIA_CUSTOMIZER_URL"] = "http://nemo.test"
36
+ os.environ["NVIDIA_DATASET_NAMESPACE"] = "default"
37
+ os.environ["NVIDIA_PROJECT_ID"] = "test-project"
38
+ from llama_stack.core.library_client import LlamaStackAsLibraryClient
39
+
40
+ client = LlamaStackAsLibraryClient("nvidia")
41
+ client.initialize()
42
+ ```
43
+
44
+ #### Register a dataset
45
+
46
+ ```python
47
+ client.datasets.register(
48
+ purpose="post-training/messages",
49
+ dataset_id="my-training-dataset",
50
+ source={"type": "uri", "uri": "hf://datasets/default/sample-dataset"},
51
+ metadata={
52
+ "format": "json",
53
+ "description": "Dataset for LLM fine-tuning",
54
+ "provider": "nvidia",
55
+ },
56
+ )
57
+ ```
58
+
59
+ #### Get a list of all registered datasets
60
+
61
+ ```python
62
+ datasets = client.datasets.list()
63
+ for dataset in datasets:
64
+ print(f"Dataset ID: {dataset.identifier}")
65
+ print(f"Description: {dataset.metadata.get('description', '')}")
66
+ print(f"Source: {dataset.source.uri}")
67
+ print("---")
68
+ ```
69
+
70
+ #### Unregister a dataset
71
+
72
+ ```python
73
+ client.datasets.unregister(dataset_id="my-training-dataset")
74
+ ```
@@ -0,0 +1,134 @@
1
+ # NVIDIA NeMo Evaluator Eval Provider
2
+
3
+
4
+ ## Overview
5
+
6
+ For the first integration, Benchmarks are mapped to Evaluation Configs on in the NeMo Evaluator. The full evaluation config object is provided as part of the meta-data. The `dataset_id` and `scoring_functions` are not used.
7
+
8
+ Below are a few examples of how to register a benchmark, which in turn will create an evaluation config in NeMo Evaluator and how to trigger an evaluation.
9
+
10
+ ### Example for register an academic benchmark
11
+
12
+ ```
13
+ POST /eval/benchmarks
14
+ ```
15
+ ```json
16
+ {
17
+ "benchmark_id": "mmlu",
18
+ "dataset_id": "",
19
+ "scoring_functions": [],
20
+ "metadata": {
21
+ "type": "mmlu"
22
+ }
23
+ }
24
+ ```
25
+
26
+ ### Example for register a custom evaluation
27
+
28
+ ```
29
+ POST /eval/benchmarks
30
+ ```
31
+ ```json
32
+ {
33
+ "benchmark_id": "my-custom-benchmark",
34
+ "dataset_id": "",
35
+ "scoring_functions": [],
36
+ "metadata": {
37
+ "type": "custom",
38
+ "params": {
39
+ "parallelism": 8
40
+ },
41
+ "tasks": {
42
+ "qa": {
43
+ "type": "completion",
44
+ "params": {
45
+ "template": {
46
+ "prompt": "{{prompt}}",
47
+ "max_tokens": 200
48
+ }
49
+ },
50
+ "dataset": {
51
+ "files_url": "hf://datasets/default/sample-basic-test/testing/testing.jsonl"
52
+ },
53
+ "metrics": {
54
+ "bleu": {
55
+ "type": "bleu",
56
+ "params": {
57
+ "references": [
58
+ "{{ideal_response}}"
59
+ ]
60
+ }
61
+ }
62
+ }
63
+ }
64
+ }
65
+ }
66
+ }
67
+ ```
68
+
69
+ ### Example for triggering a benchmark/custom evaluation
70
+
71
+ ```
72
+ POST /eval/benchmarks/{benchmark_id}/jobs
73
+ ```
74
+ ```json
75
+ {
76
+ "benchmark_id": "my-custom-benchmark",
77
+ "benchmark_config": {
78
+ "eval_candidate": {
79
+ "type": "model",
80
+ "model": "meta-llama/Llama3.1-8B-Instruct",
81
+ "sampling_params": {
82
+ "max_tokens": 100,
83
+ "temperature": 0.7
84
+ }
85
+ },
86
+ "scoring_params": {}
87
+ }
88
+ }
89
+ ```
90
+
91
+ Response example:
92
+ ```json
93
+ {
94
+ "job_id": "eval-1234",
95
+ "status": "in_progress"
96
+ }
97
+ ```
98
+
99
+ ### Example for getting the status of a job
100
+ ```
101
+ GET /eval/benchmarks/{benchmark_id}/jobs/{job_id}
102
+ ```
103
+
104
+ Response example:
105
+ ```json
106
+ {
107
+ "job_id": "eval-1234",
108
+ "status": "in_progress"
109
+ }
110
+ ```
111
+
112
+ ### Example for cancelling a job
113
+ ```
114
+ POST /eval/benchmarks/{benchmark_id}/jobs/{job_id}/cancel
115
+ ```
116
+
117
+ ### Example for getting the results
118
+ ```
119
+ GET /eval/benchmarks/{benchmark_id}/results
120
+ ```
121
+ ```json
122
+ {
123
+ "generations": [],
124
+ "scores": {
125
+ "{benchmark_id}": {
126
+ "score_rows": [],
127
+ "aggregated_results": {
128
+ "tasks": {},
129
+ "groups": {}
130
+ }
131
+ }
132
+ }
133
+ }
134
+ ```
@@ -0,0 +1,266 @@
1
+ # S3 Files Provider
2
+
3
+ A remote S3-based implementation of the Llama Stack Files API that provides scalable cloud file storage with metadata persistence.
4
+
5
+ ## Features
6
+
7
+ - **AWS S3 Storage**: Store files in AWS S3 buckets for scalable, durable storage
8
+ - **Metadata Management**: Uses SQL database for efficient file metadata queries
9
+ - **OpenAI API Compatibility**: Full compatibility with OpenAI Files API endpoints
10
+ - **Flexible Authentication**: Support for IAM roles and access keys
11
+ - **Custom S3 Endpoints**: Support for MinIO and other S3-compatible services
12
+
13
+ ## Configuration
14
+
15
+ ### Basic Configuration
16
+
17
+ ```yaml
18
+ api: files
19
+ provider_type: remote::s3
20
+ config:
21
+ bucket_name: my-llama-stack-files
22
+ region: us-east-1
23
+ metadata_store:
24
+ type: sqlite
25
+ db_path: ./s3_files_metadata.db
26
+ ```
27
+
28
+ ### Advanced Configuration
29
+
30
+ ```yaml
31
+ api: files
32
+ provider_type: remote::s3
33
+ config:
34
+ bucket_name: my-llama-stack-files
35
+ region: us-east-1
36
+ aws_access_key_id: YOUR_ACCESS_KEY
37
+ aws_secret_access_key: YOUR_SECRET_KEY
38
+ endpoint_url: https://s3.amazonaws.com # Optional for custom endpoints
39
+ metadata_store:
40
+ type: sqlite
41
+ db_path: ./s3_files_metadata.db
42
+ ```
43
+
44
+ ### Environment Variables
45
+
46
+ The configuration supports environment variable substitution:
47
+
48
+ ```yaml
49
+ config:
50
+ bucket_name: "${env.S3_BUCKET_NAME}"
51
+ region: "${env.AWS_REGION:=us-east-1}"
52
+ aws_access_key_id: "${env.AWS_ACCESS_KEY_ID:=}"
53
+ aws_secret_access_key: "${env.AWS_SECRET_ACCESS_KEY:=}"
54
+ endpoint_url: "${env.S3_ENDPOINT_URL:=}"
55
+ ```
56
+
57
+ Note: `S3_BUCKET_NAME` has no default value since S3 bucket names must be globally unique.
58
+
59
+ ## Authentication
60
+
61
+ ### IAM Roles (Recommended)
62
+
63
+ For production deployments, use IAM roles:
64
+
65
+ ```yaml
66
+ config:
67
+ bucket_name: my-bucket
68
+ region: us-east-1
69
+ # No credentials needed - will use IAM role
70
+ ```
71
+
72
+ ### Access Keys
73
+
74
+ For development or specific use cases:
75
+
76
+ ```yaml
77
+ config:
78
+ bucket_name: my-bucket
79
+ region: us-east-1
80
+ aws_access_key_id: AKIAIOSFODNN7EXAMPLE
81
+ aws_secret_access_key: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
82
+ ```
83
+
84
+ ## S3 Bucket Setup
85
+
86
+ ### Required Permissions
87
+
88
+ The S3 provider requires the following permissions:
89
+
90
+ ```json
91
+ {
92
+ "Version": "2012-10-17",
93
+ "Statement": [
94
+ {
95
+ "Effect": "Allow",
96
+ "Action": [
97
+ "s3:GetObject",
98
+ "s3:PutObject",
99
+ "s3:DeleteObject",
100
+ "s3:ListBucket"
101
+ ],
102
+ "Resource": [
103
+ "arn:aws:s3:::your-bucket-name",
104
+ "arn:aws:s3:::your-bucket-name/*"
105
+ ]
106
+ }
107
+ ]
108
+ }
109
+ ```
110
+
111
+ ### Automatic Bucket Creation
112
+
113
+ By default, the S3 provider expects the bucket to already exist. If you want the provider to automatically create the bucket when it doesn't exist, set `auto_create_bucket: true` in your configuration:
114
+
115
+ ```yaml
116
+ config:
117
+ bucket_name: my-bucket
118
+ auto_create_bucket: true # Will create bucket if it doesn't exist
119
+ region: us-east-1
120
+ ```
121
+
122
+ **Note**: When `auto_create_bucket` is enabled, the provider will need additional permissions:
123
+
124
+ ```json
125
+ {
126
+ "Version": "2012-10-17",
127
+ "Statement": [
128
+ {
129
+ "Effect": "Allow",
130
+ "Action": [
131
+ "s3:GetObject",
132
+ "s3:PutObject",
133
+ "s3:DeleteObject",
134
+ "s3:ListBucket",
135
+ "s3:CreateBucket"
136
+ ],
137
+ "Resource": [
138
+ "arn:aws:s3:::your-bucket-name",
139
+ "arn:aws:s3:::your-bucket-name/*"
140
+ ]
141
+ }
142
+ ]
143
+ }
144
+ ```
145
+
146
+ ### Bucket Policy (Optional)
147
+
148
+ For additional security, you can add a bucket policy:
149
+
150
+ ```json
151
+ {
152
+ "Version": "2012-10-17",
153
+ "Statement": [
154
+ {
155
+ "Sid": "LlamaStackAccess",
156
+ "Effect": "Allow",
157
+ "Principal": {
158
+ "AWS": "arn:aws:iam::YOUR-ACCOUNT:role/LlamaStackRole"
159
+ },
160
+ "Action": [
161
+ "s3:GetObject",
162
+ "s3:PutObject",
163
+ "s3:DeleteObject"
164
+ ],
165
+ "Resource": "arn:aws:s3:::your-bucket-name/*"
166
+ },
167
+ {
168
+ "Sid": "LlamaStackBucketAccess",
169
+ "Effect": "Allow",
170
+ "Principal": {
171
+ "AWS": "arn:aws:iam::YOUR-ACCOUNT:role/LlamaStackRole"
172
+ },
173
+ "Action": [
174
+ "s3:ListBucket"
175
+ ],
176
+ "Resource": "arn:aws:s3:::your-bucket-name"
177
+ }
178
+ ]
179
+ }
180
+ ```
181
+
182
+ ## Features
183
+
184
+ ### Metadata Persistence
185
+
186
+ File metadata is stored in a SQL database for fast queries and OpenAI API compatibility. The metadata includes:
187
+
188
+ - File ID
189
+ - Original filename
190
+ - Purpose (assistants, batch, etc.)
191
+ - File size in bytes
192
+ - Created and expiration timestamps
193
+
194
+ ### TTL and Cleanup
195
+
196
+ Files currently have a fixed long expiration time (100 years).
197
+
198
+ ## Development and Testing
199
+
200
+ ### Using MinIO
201
+
202
+ For self-hosted S3-compatible storage:
203
+
204
+ ```yaml
205
+ config:
206
+ bucket_name: test-bucket
207
+ region: us-east-1
208
+ endpoint_url: http://localhost:9000
209
+ aws_access_key_id: minioadmin
210
+ aws_secret_access_key: minioadmin
211
+ ```
212
+
213
+ ### Using OCI Object Storage with S3 Compatibility
214
+ [Official Object Storage Amazon S3 Compatibility API Documentation](https://docs.oracle.com/en-us/iaas/Content/Object/Tasks/s3compatibleapi.htm)
215
+
216
+ OCI Object Storage can be utilized through the OCI S3 Compatibility API. Simply Update the `config.yaml` and set the env-vars as below.
217
+
218
+ #### config.yaml
219
+ ```yaml
220
+ provider_type: remote::s3
221
+ config:
222
+ bucket_name: "${env.S3_BUCKET_NAME}"
223
+ region: "${env.AWS_REGION:=us-east-1}"
224
+ aws_access_key_id: "${env.AWS_ACCESS_KEY_ID:=}"
225
+ aws_secret_access_key: "${env.AWS_SECRET_ACCESS_KEY:=}"
226
+ endpoint_url: "${env.S3_ENDPOINT_URL:=}"
227
+ metadata_store:
228
+ table_name: files_metadata
229
+ backend: sql_default
230
+ ```
231
+ #### .env
232
+ ```
233
+ AWS_ACCESS_KEY_ID=OCI_ACCESS_KEY
234
+ AWS_SECRET_ACCESS_KEY=OCI_SECRET_KEY
235
+ S3_BUCKET_NAME=OCI_BUCKET_NAME
236
+ S3_ENDPOINT_URL=https://<namespace>.compat.objectstorage.<region>.oci.customer-oci.com
237
+ AWS_REQUEST_CHECKSUM_CALCULATION=when_required
238
+ AWS_RESPONSE_CHECKSUM_VALIDATION=when_required
239
+ ```
240
+
241
+
242
+ ## Monitoring and Logging
243
+
244
+ The provider logs important operations and errors. For production deployments, consider:
245
+
246
+ - CloudWatch monitoring for S3 operations
247
+ - Custom metrics for file upload/download rates
248
+ - Error rate monitoring
249
+ - Performance metrics tracking
250
+
251
+ ## Error Handling
252
+
253
+ The provider handles various error scenarios:
254
+
255
+ - S3 connectivity issues
256
+ - Bucket access permissions
257
+ - File not found errors
258
+ - Metadata consistency checks
259
+
260
+ ## Known Limitations
261
+
262
+ - Fixed long TTL (100 years) instead of configurable expiration
263
+ - No server-side encryption enabled by default
264
+ - No support for AWS session tokens
265
+ - No S3 key prefix organization support
266
+ - No multipart upload support (all files uploaded as single objects)
@@ -0,0 +1,203 @@
1
+ # NVIDIA Inference Provider for LlamaStack
2
+
3
+ This provider enables running inference using NVIDIA NIM.
4
+
5
+ ## Features
6
+ - Endpoints for completions, chat completions, and embeddings for registered models
7
+
8
+ ## Getting Started
9
+
10
+ ### Prerequisites
11
+
12
+ - LlamaStack with NVIDIA configuration
13
+ - Access to NVIDIA NIM deployment
14
+ - NIM for model to use for inference is deployed
15
+
16
+ ### Setup
17
+
18
+ Build the NVIDIA environment:
19
+
20
+ ```bash
21
+ uv pip install llama-stack-client
22
+ uv run llama stack list-deps nvidia | xargs -L1 uv pip install
23
+ ```
24
+
25
+ ### Basic Usage using the LlamaStack Python Client
26
+
27
+ #### Initialize the client
28
+
29
+ ```python
30
+ import os
31
+
32
+ os.environ["NVIDIA_API_KEY"] = (
33
+ "" # Required if using hosted NIM endpoint. If self-hosted, not required.
34
+ )
35
+ os.environ["NVIDIA_BASE_URL"] = "http://nim.test" # NIM URL
36
+
37
+ from llama_stack.core.library_client import LlamaStackAsLibraryClient
38
+
39
+ client = LlamaStackAsLibraryClient("nvidia")
40
+ client.initialize()
41
+ ```
42
+
43
+ ### Create Chat Completion
44
+
45
+ The following example shows how to create a chat completion for an NVIDIA NIM.
46
+
47
+ ```python
48
+ response = client.chat.completions.create(
49
+ model="nvidia/meta/llama-3.1-8b-instruct",
50
+ messages=[
51
+ {
52
+ "role": "system",
53
+ "content": "You must respond to each message with only one word",
54
+ },
55
+ {
56
+ "role": "user",
57
+ "content": "Complete the sentence using one word: Roses are red, violets are:",
58
+ },
59
+ ],
60
+ stream=False,
61
+ max_tokens=50,
62
+ )
63
+ print(f"Response: {response.choices[0].message.content}")
64
+ ```
65
+
66
+ ### Tool Calling Example ###
67
+
68
+ The following example shows how to do tool calling for an NVIDIA NIM.
69
+
70
+ ```python
71
+ tool_definition = {
72
+ "type": "function",
73
+ "function": {
74
+ "name": "get_weather",
75
+ "description": "Get current weather information for a location",
76
+ "parameters": {
77
+ "type": "object",
78
+ "properties": {
79
+ "location": {
80
+ "type": "string",
81
+ "description": "The city and state, e.g. San Francisco, CA",
82
+ },
83
+ "unit": {
84
+ "type": "string",
85
+ "description": "Temperature unit (celsius or fahrenheit)",
86
+ "default": "celsius",
87
+ },
88
+ },
89
+ "required": ["location"],
90
+ },
91
+ },
92
+ }
93
+
94
+ tool_response = client.chat.completions.create(
95
+ model="nvidia/meta/llama-3.1-8b-instruct",
96
+ messages=[{"role": "user", "content": "What's the weather like in San Francisco?"}],
97
+ tools=[tool_definition],
98
+ )
99
+
100
+ print(f"Response content: {tool_response.choices[0].message.content}")
101
+ if tool_response.choices[0].message.tool_calls:
102
+ for tool_call in tool_response.choices[0].message.tool_calls:
103
+ print(f"Tool Called: {tool_call.function.name}")
104
+ print(f"Arguments: {tool_call.function.arguments}")
105
+ ```
106
+
107
+ ### Structured Output Example
108
+
109
+ The following example shows how to do structured output for an NVIDIA NIM.
110
+
111
+ ```python
112
+ person_schema = {
113
+ "type": "object",
114
+ "properties": {
115
+ "name": {"type": "string"},
116
+ "age": {"type": "number"},
117
+ "occupation": {"type": "string"},
118
+ },
119
+ "required": ["name", "age", "occupation"],
120
+ }
121
+
122
+ structured_response = client.chat.completions.create(
123
+ model="nvidia/meta/llama-3.1-8b-instruct",
124
+ messages=[
125
+ {
126
+ "role": "user",
127
+ "content": "Create a profile for a fictional person named Alice who is 30 years old and is a software engineer. ",
128
+ }
129
+ ],
130
+ extra_body={"nvext": {"guided_json": person_schema}},
131
+ )
132
+ print(f"Structured Response: {structured_response.choices[0].message.content}")
133
+ ```
134
+
135
+ ### Create Embeddings
136
+
137
+ The following example shows how to create embeddings for an NVIDIA NIM.
138
+
139
+ ```python
140
+ response = client.embeddings.create(
141
+ model="nvidia/nvidia/llama-3.2-nv-embedqa-1b-v2",
142
+ input=["What is the capital of France?"],
143
+ extra_body={"input_type": "query"},
144
+ )
145
+ print(f"Embeddings: {response.data}")
146
+ ```
147
+
148
+ ### Vision Language Models Example
149
+
150
+ The following example shows how to run vision inference by using an NVIDIA NIM.
151
+
152
+ ```python
153
+ def load_image_as_base64(image_path):
154
+ with open(image_path, "rb") as image_file:
155
+ img_bytes = image_file.read()
156
+ return base64.b64encode(img_bytes).decode("utf-8")
157
+
158
+
159
+ image_path = {path_to_the_image}
160
+ demo_image_b64 = load_image_as_base64(image_path)
161
+
162
+ vlm_response = client.chat.completions.create(
163
+ model="nvidia/meta/llama-3.2-11b-vision-instruct",
164
+ messages=[
165
+ {
166
+ "role": "user",
167
+ "content": [
168
+ {
169
+ "type": "image_url",
170
+ "image_url": {
171
+ "url": f"data:image/png;base64,{demo_image_b64}",
172
+ },
173
+ },
174
+ {
175
+ "type": "text",
176
+ "text": "Please describe what you see in this image in detail.",
177
+ },
178
+ ],
179
+ }
180
+ ],
181
+ )
182
+
183
+ print(f"VLM Response: {vlm_response.choices[0].message.content}")
184
+ ```
185
+
186
+ ### Rerank Example
187
+
188
+ The following example shows how to rerank documents using an NVIDIA NIM.
189
+
190
+ ```python
191
+ rerank_response = client.alpha.inference.rerank(
192
+ model="nvidia/nvidia/llama-3.2-nv-rerankqa-1b-v2",
193
+ query="query",
194
+ items=[
195
+ "item_1",
196
+ "item_2",
197
+ "item_3",
198
+ ],
199
+ )
200
+
201
+ for i, result in enumerate(rerank_response):
202
+ print(f"{i+1}. [Index: {result.index}, " f"Score: {(result.relevance_score):.3f}]")
203
+ ```