ramalama 0.5.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. ramalama-0.5.2/LICENSE +21 -0
  2. ramalama-0.5.2/PKG-INFO +293 -0
  3. ramalama-0.5.2/README.md +256 -0
  4. ramalama-0.5.2/bin/ramalama +95 -0
  5. ramalama-0.5.2/completions/bash-completion/completions/ramalama +81 -0
  6. ramalama-0.5.2/completions/fish/vendor_completions.d/ramalama.fish +17 -0
  7. ramalama-0.5.2/completions/zsh/vendor-completions/_ramalama +81 -0
  8. ramalama-0.5.2/docs/links/ramalama-ls.1 +1 -0
  9. ramalama-0.5.2/docs/links/ramalama-ps.1 +1 -0
  10. ramalama-0.5.2/docs/ramalama-containers.1 +123 -0
  11. ramalama-0.5.2/docs/ramalama-convert.1 +70 -0
  12. ramalama-0.5.2/docs/ramalama-info.1 +278 -0
  13. ramalama-0.5.2/docs/ramalama-list.1 +76 -0
  14. ramalama-0.5.2/docs/ramalama-login.1 +93 -0
  15. ramalama-0.5.2/docs/ramalama-logout.1 +73 -0
  16. ramalama-0.5.2/docs/ramalama-pull.1 +37 -0
  17. ramalama-0.5.2/docs/ramalama-push.1 +107 -0
  18. ramalama-0.5.2/docs/ramalama-rm.1 +52 -0
  19. ramalama-0.5.2/docs/ramalama-run.1 +110 -0
  20. ramalama-0.5.2/docs/ramalama-serve.1 +304 -0
  21. ramalama-0.5.2/docs/ramalama-stop.1 +57 -0
  22. ramalama-0.5.2/docs/ramalama-version.1 +29 -0
  23. ramalama-0.5.2/docs/ramalama.1 +229 -0
  24. ramalama-0.5.2/docs/ramalama.conf +73 -0
  25. ramalama-0.5.2/docs/ramalama.conf.5 +189 -0
  26. ramalama-0.5.2/pyproject.toml +48 -0
  27. ramalama-0.5.2/ramalama/__init__.py +9 -0
  28. ramalama-0.5.2/ramalama/annotations.py +112 -0
  29. ramalama-0.5.2/ramalama/cli.py +864 -0
  30. ramalama-0.5.2/ramalama/common.py +228 -0
  31. ramalama-0.5.2/ramalama/file.py +31 -0
  32. ramalama-0.5.2/ramalama/http_client.py +156 -0
  33. ramalama-0.5.2/ramalama/huggingface.py +149 -0
  34. ramalama-0.5.2/ramalama/kube.py +142 -0
  35. ramalama-0.5.2/ramalama/model.py +412 -0
  36. ramalama-0.5.2/ramalama/oci.py +383 -0
  37. ramalama-0.5.2/ramalama/ollama.py +127 -0
  38. ramalama-0.5.2/ramalama/quadlet.py +106 -0
  39. ramalama-0.5.2/ramalama/shortnames.py +44 -0
  40. ramalama-0.5.2/ramalama/toml_parser.py +67 -0
  41. ramalama-0.5.2/ramalama/url.py +45 -0
  42. ramalama-0.5.2/ramalama/version.py +16 -0
  43. ramalama-0.5.2/ramalama.egg-info/PKG-INFO +293 -0
  44. ramalama-0.5.2/ramalama.egg-info/SOURCES.txt +48 -0
  45. ramalama-0.5.2/ramalama.egg-info/dependency_links.txt +1 -0
  46. ramalama-0.5.2/ramalama.egg-info/requires.txt +1 -0
  47. ramalama-0.5.2/ramalama.egg-info/top_level.txt +1 -0
  48. ramalama-0.5.2/setup.cfg +4 -0
  49. ramalama-0.5.2/setup.py +75 -0
  50. ramalama-0.5.2/shortnames/shortnames.conf +37 -0
ramalama-0.5.2/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Eric Curtin
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,293 @@
1
+ Metadata-Version: 2.2
2
+ Name: ramalama
3
+ Version: 0.5.2
4
+ Summary: RamaLama is a command line tool for working with AI LLM models.
5
+ Maintainer-email: Dan Walsh <dwalsh@redhat.com>, Eric Curtin <ecurtin@redhat.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2024 Eric Curtin
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/containers/ramalama
29
+ Project-URL: Documentation, https://github.com/containers/ramalama/tree/main/docs
30
+ Project-URL: Repository, https://github.com/containers/ramalama
31
+ Project-URL: Issues, https://github.com/containers/ramalama/issues
32
+ Keywords: ramalama,llama,AI
33
+ Requires-Python: >=3.8
34
+ Description-Content-Type: text/markdown
35
+ License-File: LICENSE
36
+ Requires-Dist: argcomplete
37
+
38
+ ![RAMALAMA logo](logos/PNG/ramalama-logo-full-vertical-added-bg.png)
39
+
40
+ # RamaLama
41
+
42
+ The RamaLama project's goal is to make working with AI boring
43
+ through the use of OCI containers.
44
+
45
+ RamaLama tool facilitates local management and serving of AI Models.
46
+
47
+ On first run RamaLama inspects your system for GPU support, falling back to CPU support if no GPUs are present.
48
+
49
+ RamaLama uses container engines like Podman or Docker to pull the appropriate OCI image with all of the software necessary to run an AI Model for your systems setup.
50
+
51
+ Running in containers eliminates the need for users to configure the host system for AI. After the initialization, RamaLama runs the AI Models within a container based on the OCI image.
52
+
53
+ RamaLama then pulls AI Models from model registries. Starting a chatbot or a rest API service from a simple single command. Models are treated similarly to how Podman and Docker treat container images.
54
+
55
+ When both Podman and Docker are installed, RamaLama defaults to Podman, The `RAMALAMA_CONTAINER_ENGINE=docker` environment variable can override this behaviour. When neither are installed RamaLama will attempt to run the model with software on the local system.
56
+
57
+ RamaLama supports multiple AI model registries types called transports.
58
+ Supported transports:
59
+
60
+
61
+ ## TRANSPORTS
62
+
63
+ | Transports | Web Site |
64
+ | ------------- | --------------------------------------------------- |
65
+ | HuggingFace | [`huggingface.co`](https://www.huggingface.co) |
66
+ | Ollama | [`ollama.com`](https://www.ollama.com) |
67
+ | OCI Container Registries | [`opencontainers.org`](https://opencontainers.org)|
68
+ ||Examples: [`quay.io`](https://quay.io), [`Docker Hub`](https://docker.io), and [`Artifactory`](https://artifactory.com)|
69
+
70
+ RamaLama uses the Ollama registry transport by default. Use the RAMALAMA_TRANSPORTS environment variable to modify the default. `export RAMALAMA_TRANSPORT=huggingface` Changes RamaLama to use huggingface transport.
71
+
72
+ Individual model transports can be modifies when specifying a model via the `huggingface://`, `oci://`, or `ollama://` prefix.
73
+
74
+ `ramalama pull huggingface://afrideva/Tiny-Vicuna-1B-GGUF/tiny-vicuna-1b.q2_k.gguf`
75
+
76
+ To make it easier for users, RamaLama uses shortname files, which container
77
+ alias names for fully specified AI Models allowing users to specify the shorter
78
+ names when referring to models. RamaLama reads shortnames.conf files if they
79
+ exist . These files contain a list of name value pairs for specification of
80
+ the model. The following table specifies the order which RamaLama reads the files
81
+ . Any duplicate names that exist override previously defined shortnames.
82
+
83
+ | Shortnames type | Path |
84
+ | --------------- | ---------------------------------------- |
85
+ | Distribution | /usr/share/ramalama/shortnames.conf |
86
+ | Administrators | /etc/ramamala/shortnames.conf |
87
+ | Users | $HOME/.config/ramalama/shortnames.conf |
88
+
89
+ ```code
90
+ $ cat /usr/share/ramalama/shortnames.conf
91
+ [shortnames]
92
+ "tiny" = "ollama://tinyllama"
93
+ "granite" = "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf"
94
+ "granite:7b" = "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf"
95
+ "ibm/granite" = "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf"
96
+ "merlinite" = "huggingface://instructlab/merlinite-7b-lab-GGUF/merlinite-7b-lab-Q4_K_M.gguf"
97
+ "merlinite:7b" = "huggingface://instructlab/merlinite-7b-lab-GGUF/merlinite-7b-lab-Q4_K_M.gguf"
98
+ ...
99
+ ```
100
+
101
+ ## Install
102
+
103
+ ## Install via PyPi
104
+
105
+ RamaLama is available via PyPi [https://pypi.org/project/ramalama](https://pypi.org/project/ramalama)
106
+
107
+ ```
108
+ pip install ramalama
109
+ ```
110
+
111
+ ## Install by script
112
+
113
+ Install RamaLama by running this one-liner:
114
+
115
+ ```
116
+ curl -fsSL https://raw.githubusercontent.com/containers/ramalama/s/install.sh | bash
117
+ ```
118
+
119
+ ## Hardware Support
120
+
121
+ | Hardware | Enabled |
122
+ | ---------------------------------- | ------- |
123
+ | CPU | :white_check_mark: |
124
+ | Apple Silicon GPU (Linux / Asahi) | :white_check_mark: |
125
+ | Apple Silicon GPU (macOS) | :white_check_mark: |
126
+ | Apple Silicon GPU (podman-machine) | :white_check_mark: |
127
+ | Nvidia GPU (cuda) | :white_check_mark: |
128
+ | AMD GPU (rocm) | :white_check_mark: |
129
+
130
+ ## COMMANDS
131
+
132
+ | Command | Description |
133
+ | ------------------------------------------------------ | ---------------------------------------------------------- |
134
+ | [ramalama(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama.1.md) | primary RamaLama man page |
135
+ | [ramalama-containers(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-containers.1.md)| list all RamaLama containers |
136
+ | [ramalama-info(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-info.1.md) | display RamaLama configuration information |
137
+ | [ramalama-list(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-list.1.md) | list all downloaded AI Models |
138
+ | [ramalama-login(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-login.1.md) | login to remote registry |
139
+ | [ramalama-logout(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-logout.1.md) | logout from remote registry |
140
+ | [ramalama-pull(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-pull.1.md) | pull AI Model from Model registry to local storage |
141
+ | [ramalama-push(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-push.1.md) | push AI Model from local storage to remote registry |
142
+ | [ramalama-rm(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-rm.1.md) | remove AI Model from local storage |
143
+ | [ramalama-run(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-run.1.md) | run specified AI Model as a chatbot |
144
+ | [ramalama-serve(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-serve.1.md) | serve REST API on specified AI Model |
145
+ | [ramalama-stop(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-stop.1.md) | stop named container that is running AI Model |
146
+ | [ramalama-version(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-version.1.md) | display version of AI Model |
147
+
148
+ ## Usage
149
+
150
+ ### Running Models
151
+
152
+ You can `run` a chatbot on a model using the `run` command. By default, it pulls from the Ollama registry.
153
+
154
+ Note: RamaLama will inspect your machine for native GPU support and then will
155
+ use a container engine like Podman to pull an OCI container image with the
156
+ appropriate code and libraries to run the AI Model. This can take a long time to setup, but only on the first run.
157
+ ```
158
+ $ ramalama run instructlab/merlinite-7b-lab
159
+ Copying blob 5448ec8c0696 [--------------------------------------] 0.0b / 63.6MiB (skipped: 0.0b = 0.00%)
160
+ Copying blob cbd7e392a514 [--------------------------------------] 0.0b / 65.3MiB (skipped: 0.0b = 0.00%)
161
+ Copying blob 5d6c72bcd967 done 208.5MiB / 208.5MiB (skipped: 0.0b = 0.00%)
162
+ Copying blob 9ccfa45da380 [--------------------------------------] 0.0b / 7.6MiB (skipped: 0.0b = 0.00%)
163
+ Copying blob 4472627772b1 [--------------------------------------] 0.0b / 120.0b (skipped: 0.0b = 0.00%)
164
+ >
165
+ ```
166
+
167
+ After the initial container image has been downloaded, you can interact with
168
+ different models, using the container image.
169
+ ```
170
+ $ ramalama run granite-code
171
+ > Write a hello world application in python
172
+
173
+ print("Hello World")
174
+ ```
175
+
176
+ In a different terminal window see the running podman container.
177
+ ```
178
+ $ podman ps
179
+ CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
180
+ 91df4a39a360 quay.io/ramalama/ramalama:latest /home/dwalsh/rama... 4 minutes ago Up 4 minutes gifted_volhard
181
+ ```
182
+
183
+ ### Listing Models
184
+
185
+ You can `list` all models pulled into local storage.
186
+
187
+ ```
188
+ $ ramalama list
189
+ NAME MODIFIED SIZE
190
+ ollama://smollm:135m 16 hours ago 5.5M
191
+ huggingface://afrideva/Tiny-Vicuna-1B-GGUF/tiny-vicuna-1b.q2_k.gguf 14 hours ago 460M
192
+ ollama://granite-code:3b 5 days ago 1.9G
193
+ ollama://granite-code:latest 1 day ago 1.9G
194
+ ollama://moondream:latest 6 days ago 791M
195
+ ```
196
+ ### Pulling Models
197
+
198
+ You can `pull` a model using the `pull` command. By default, it pulls from the Ollama registry.
199
+
200
+ ```
201
+ $ ramalama pull granite-code
202
+ ################################################### 32.5%
203
+ ```
204
+
205
+ ### Serving Models
206
+
207
+ You can `serve` multiple models using the `serve` command. By default, it pulls from the Ollama registry.
208
+
209
+ ```
210
+ $ ramalama serve --name mylama llama3
211
+ ```
212
+
213
+ ### Stopping servers
214
+
215
+ You can stop a running model if it is running in a container.
216
+
217
+ ```
218
+ $ ramalama stop mylama
219
+ ```
220
+
221
+ ### UI support
222
+
223
+ To use a UI, run a `ramalama serve` command, then connect via your browser at:
224
+
225
+ 127.0.0.1:8080
226
+
227
+ ## Diagram
228
+
229
+ ```
230
+ +---------------------------+
231
+ | |
232
+ | ramalama run granite-code |
233
+ | |
234
+ +-------+-------------------+
235
+ |
236
+ |
237
+ | +------------------+
238
+ | | Pull model layer |
239
+ +----------------------------------------->| granite-code |
240
+ +------------------+
241
+ | Repo options: |
242
+ +-+-------+------+-+
243
+ | | |
244
+ v v v
245
+ +---------+ +------+ +----------+
246
+ | Hugging | | quay | | Ollama |
247
+ | Face | | | | Registry |
248
+ +-------+-+ +---+--+ +-+--------+
249
+ | | |
250
+ v v v
251
+ +------------------+
252
+ | Start with |
253
+ | llama.cpp and |
254
+ | granite-code |
255
+ | model |
256
+ +------------------+
257
+ ```
258
+
259
+ ## In development
260
+
261
+ Regard this alpha, everything is under development, so expect breaking changes, luckily it's easy to reset everything and re-install:
262
+
263
+ ```
264
+ rm -rf /var/lib/ramalama # only required if running as root user
265
+ rm -rf $HOME/.local/share/ramalama
266
+ ```
267
+
268
+ and install again.
269
+
270
+ ## Credit where credit is due
271
+
272
+ This project wouldn't be possible without the help of other projects like:
273
+
274
+ llama.cpp
275
+ whisper.cpp
276
+ vllm
277
+ podman
278
+ omlmd
279
+ huggingface
280
+
281
+ so if you like this tool, give some of these repos a :star:, and hey, give us a :star: too while you are at it.
282
+
283
+ ## Community
284
+
285
+ [`Matrix`](https://matrix.to/#/#ramalama:fedoraproject.org)
286
+
287
+ ## Contributors
288
+
289
+ Open to contributors
290
+
291
+ <a href="https://github.com/containers/ramalama/graphs/contributors">
292
+ <img src="https://contrib.rocks/image?repo=containers/ramalama" />
293
+ </a>
@@ -0,0 +1,256 @@
1
+ ![RAMALAMA logo](logos/PNG/ramalama-logo-full-vertical-added-bg.png)
2
+
3
+ # RamaLama
4
+
5
+ The RamaLama project's goal is to make working with AI boring
6
+ through the use of OCI containers.
7
+
8
+ RamaLama tool facilitates local management and serving of AI Models.
9
+
10
+ On first run RamaLama inspects your system for GPU support, falling back to CPU support if no GPUs are present.
11
+
12
+ RamaLama uses container engines like Podman or Docker to pull the appropriate OCI image with all of the software necessary to run an AI Model for your systems setup.
13
+
14
+ Running in containers eliminates the need for users to configure the host system for AI. After the initialization, RamaLama runs the AI Models within a container based on the OCI image.
15
+
16
+ RamaLama then pulls AI Models from model registries. Starting a chatbot or a rest API service from a simple single command. Models are treated similarly to how Podman and Docker treat container images.
17
+
18
+ When both Podman and Docker are installed, RamaLama defaults to Podman, The `RAMALAMA_CONTAINER_ENGINE=docker` environment variable can override this behaviour. When neither are installed RamaLama will attempt to run the model with software on the local system.
19
+
20
+ RamaLama supports multiple AI model registries types called transports.
21
+ Supported transports:
22
+
23
+
24
+ ## TRANSPORTS
25
+
26
+ | Transports | Web Site |
27
+ | ------------- | --------------------------------------------------- |
28
+ | HuggingFace | [`huggingface.co`](https://www.huggingface.co) |
29
+ | Ollama | [`ollama.com`](https://www.ollama.com) |
30
+ | OCI Container Registries | [`opencontainers.org`](https://opencontainers.org)|
31
+ ||Examples: [`quay.io`](https://quay.io), [`Docker Hub`](https://docker.io), and [`Artifactory`](https://artifactory.com)|
32
+
33
+ RamaLama uses the Ollama registry transport by default. Use the RAMALAMA_TRANSPORTS environment variable to modify the default. `export RAMALAMA_TRANSPORT=huggingface` Changes RamaLama to use huggingface transport.
34
+
35
+ Individual model transports can be modifies when specifying a model via the `huggingface://`, `oci://`, or `ollama://` prefix.
36
+
37
+ `ramalama pull huggingface://afrideva/Tiny-Vicuna-1B-GGUF/tiny-vicuna-1b.q2_k.gguf`
38
+
39
+ To make it easier for users, RamaLama uses shortname files, which container
40
+ alias names for fully specified AI Models allowing users to specify the shorter
41
+ names when referring to models. RamaLama reads shortnames.conf files if they
42
+ exist . These files contain a list of name value pairs for specification of
43
+ the model. The following table specifies the order which RamaLama reads the files
44
+ . Any duplicate names that exist override previously defined shortnames.
45
+
46
+ | Shortnames type | Path |
47
+ | --------------- | ---------------------------------------- |
48
+ | Distribution | /usr/share/ramalama/shortnames.conf |
49
+ | Administrators | /etc/ramamala/shortnames.conf |
50
+ | Users | $HOME/.config/ramalama/shortnames.conf |
51
+
52
+ ```code
53
+ $ cat /usr/share/ramalama/shortnames.conf
54
+ [shortnames]
55
+ "tiny" = "ollama://tinyllama"
56
+ "granite" = "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf"
57
+ "granite:7b" = "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf"
58
+ "ibm/granite" = "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf"
59
+ "merlinite" = "huggingface://instructlab/merlinite-7b-lab-GGUF/merlinite-7b-lab-Q4_K_M.gguf"
60
+ "merlinite:7b" = "huggingface://instructlab/merlinite-7b-lab-GGUF/merlinite-7b-lab-Q4_K_M.gguf"
61
+ ...
62
+ ```
63
+
64
+ ## Install
65
+
66
+ ## Install via PyPi
67
+
68
+ RamaLama is available via PyPi [https://pypi.org/project/ramalama](https://pypi.org/project/ramalama)
69
+
70
+ ```
71
+ pip install ramalama
72
+ ```
73
+
74
+ ## Install by script
75
+
76
+ Install RamaLama by running this one-liner:
77
+
78
+ ```
79
+ curl -fsSL https://raw.githubusercontent.com/containers/ramalama/s/install.sh | bash
80
+ ```
81
+
82
+ ## Hardware Support
83
+
84
+ | Hardware | Enabled |
85
+ | ---------------------------------- | ------- |
86
+ | CPU | :white_check_mark: |
87
+ | Apple Silicon GPU (Linux / Asahi) | :white_check_mark: |
88
+ | Apple Silicon GPU (macOS) | :white_check_mark: |
89
+ | Apple Silicon GPU (podman-machine) | :white_check_mark: |
90
+ | Nvidia GPU (cuda) | :white_check_mark: |
91
+ | AMD GPU (rocm) | :white_check_mark: |
92
+
93
+ ## COMMANDS
94
+
95
+ | Command | Description |
96
+ | ------------------------------------------------------ | ---------------------------------------------------------- |
97
+ | [ramalama(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama.1.md) | primary RamaLama man page |
98
+ | [ramalama-containers(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-containers.1.md)| list all RamaLama containers |
99
+ | [ramalama-info(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-info.1.md) | display RamaLama configuration information |
100
+ | [ramalama-list(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-list.1.md) | list all downloaded AI Models |
101
+ | [ramalama-login(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-login.1.md) | login to remote registry |
102
+ | [ramalama-logout(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-logout.1.md) | logout from remote registry |
103
+ | [ramalama-pull(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-pull.1.md) | pull AI Model from Model registry to local storage |
104
+ | [ramalama-push(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-push.1.md) | push AI Model from local storage to remote registry |
105
+ | [ramalama-rm(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-rm.1.md) | remove AI Model from local storage |
106
+ | [ramalama-run(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-run.1.md) | run specified AI Model as a chatbot |
107
+ | [ramalama-serve(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-serve.1.md) | serve REST API on specified AI Model |
108
+ | [ramalama-stop(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-stop.1.md) | stop named container that is running AI Model |
109
+ | [ramalama-version(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-version.1.md) | display version of AI Model |
110
+
111
+ ## Usage
112
+
113
+ ### Running Models
114
+
115
+ You can `run` a chatbot on a model using the `run` command. By default, it pulls from the Ollama registry.
116
+
117
+ Note: RamaLama will inspect your machine for native GPU support and then will
118
+ use a container engine like Podman to pull an OCI container image with the
119
+ appropriate code and libraries to run the AI Model. This can take a long time to setup, but only on the first run.
120
+ ```
121
+ $ ramalama run instructlab/merlinite-7b-lab
122
+ Copying blob 5448ec8c0696 [--------------------------------------] 0.0b / 63.6MiB (skipped: 0.0b = 0.00%)
123
+ Copying blob cbd7e392a514 [--------------------------------------] 0.0b / 65.3MiB (skipped: 0.0b = 0.00%)
124
+ Copying blob 5d6c72bcd967 done 208.5MiB / 208.5MiB (skipped: 0.0b = 0.00%)
125
+ Copying blob 9ccfa45da380 [--------------------------------------] 0.0b / 7.6MiB (skipped: 0.0b = 0.00%)
126
+ Copying blob 4472627772b1 [--------------------------------------] 0.0b / 120.0b (skipped: 0.0b = 0.00%)
127
+ >
128
+ ```
129
+
130
+ After the initial container image has been downloaded, you can interact with
131
+ different models, using the container image.
132
+ ```
133
+ $ ramalama run granite-code
134
+ > Write a hello world application in python
135
+
136
+ print("Hello World")
137
+ ```
138
+
139
+ In a different terminal window see the running podman container.
140
+ ```
141
+ $ podman ps
142
+ CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
143
+ 91df4a39a360 quay.io/ramalama/ramalama:latest /home/dwalsh/rama... 4 minutes ago Up 4 minutes gifted_volhard
144
+ ```
145
+
146
+ ### Listing Models
147
+
148
+ You can `list` all models pulled into local storage.
149
+
150
+ ```
151
+ $ ramalama list
152
+ NAME MODIFIED SIZE
153
+ ollama://smollm:135m 16 hours ago 5.5M
154
+ huggingface://afrideva/Tiny-Vicuna-1B-GGUF/tiny-vicuna-1b.q2_k.gguf 14 hours ago 460M
155
+ ollama://granite-code:3b 5 days ago 1.9G
156
+ ollama://granite-code:latest 1 day ago 1.9G
157
+ ollama://moondream:latest 6 days ago 791M
158
+ ```
159
+ ### Pulling Models
160
+
161
+ You can `pull` a model using the `pull` command. By default, it pulls from the Ollama registry.
162
+
163
+ ```
164
+ $ ramalama pull granite-code
165
+ ################################################### 32.5%
166
+ ```
167
+
168
+ ### Serving Models
169
+
170
+ You can `serve` multiple models using the `serve` command. By default, it pulls from the Ollama registry.
171
+
172
+ ```
173
+ $ ramalama serve --name mylama llama3
174
+ ```
175
+
176
+ ### Stopping servers
177
+
178
+ You can stop a running model if it is running in a container.
179
+
180
+ ```
181
+ $ ramalama stop mylama
182
+ ```
183
+
184
+ ### UI support
185
+
186
+ To use a UI, run a `ramalama serve` command, then connect via your browser at:
187
+
188
+ 127.0.0.1:8080
189
+
190
+ ## Diagram
191
+
192
+ ```
193
+ +---------------------------+
194
+ | |
195
+ | ramalama run granite-code |
196
+ | |
197
+ +-------+-------------------+
198
+ |
199
+ |
200
+ | +------------------+
201
+ | | Pull model layer |
202
+ +----------------------------------------->| granite-code |
203
+ +------------------+
204
+ | Repo options: |
205
+ +-+-------+------+-+
206
+ | | |
207
+ v v v
208
+ +---------+ +------+ +----------+
209
+ | Hugging | | quay | | Ollama |
210
+ | Face | | | | Registry |
211
+ +-------+-+ +---+--+ +-+--------+
212
+ | | |
213
+ v v v
214
+ +------------------+
215
+ | Start with |
216
+ | llama.cpp and |
217
+ | granite-code |
218
+ | model |
219
+ +------------------+
220
+ ```
221
+
222
+ ## In development
223
+
224
+ Regard this alpha, everything is under development, so expect breaking changes, luckily it's easy to reset everything and re-install:
225
+
226
+ ```
227
+ rm -rf /var/lib/ramalama # only required if running as root user
228
+ rm -rf $HOME/.local/share/ramalama
229
+ ```
230
+
231
+ and install again.
232
+
233
+ ## Credit where credit is due
234
+
235
+ This project wouldn't be possible without the help of other projects like:
236
+
237
+ llama.cpp
238
+ whisper.cpp
239
+ vllm
240
+ podman
241
+ omlmd
242
+ huggingface
243
+
244
+ so if you like this tool, give some of these repos a :star:, and hey, give us a :star: too while you are at it.
245
+
246
+ ## Community
247
+
248
+ [`Matrix`](https://matrix.to/#/#ramalama:fedoraproject.org)
249
+
250
+ ## Contributors
251
+
252
+ Open to contributors
253
+
254
+ <a href="https://github.com/containers/ramalama/graphs/contributors">
255
+ <img src="https://contrib.rocks/image?repo=containers/ramalama" />
256
+ </a>
@@ -0,0 +1,95 @@
1
+ #!/usr/bin/env python3
2
+
3
+ import glob
4
+ import os
5
+ import errno
6
+ import subprocess
7
+ import sys
8
+
9
+ def add_pipx_venvs_bin_to_path():
10
+ """
11
+ Adds available pipx virtual environments bin directories to PATH.
12
+ This function looks for venv in ~/.local/pipx/venvs/ramalama/bin and
13
+ if it exists appends it to the environment variable PATH.
14
+ """
15
+ pipx_bin_path = os.path.expanduser(f'~/.local/pipx/venvs/ramalama/bin')
16
+ if os.path.exists(pipx_bin_path):
17
+ os.environ["PATH"] += ":" + pipx_bin_path
18
+
19
+ def add_site_packages_to_syspath(base_path):
20
+ """
21
+ Adds site-packages directories from a given base path to sys.path.
22
+ """
23
+ python_version = f'{sys.version_info.major}.{sys.version_info.minor}'
24
+ search_pattern = os.path.expanduser(f'{base_path}/lib/python{python_version}/site-packages')
25
+ matched_paths = glob.glob(search_pattern)
26
+ if matched_paths:
27
+ for path in matched_paths:
28
+ sys.path.insert(0, path)
29
+ return
30
+
31
+ # Fallback to a more general pattern if the specific version doesn't match
32
+ search_pattern = os.path.expanduser(f'{base_path}/lib/python*/site-packages')
33
+ matched_paths = glob.glob(search_pattern)
34
+ if matched_paths:
35
+ for path in matched_paths:
36
+ sys.path.insert(0, path)
37
+
38
+ def main(args):
39
+ sharedirs = ["/opt/homebrew/share/ramalama", "/usr/local/share/ramalama", "/usr/share/ramalama"]
40
+ syspath = next((d for d in sharedirs if os.path.exists(d+"/ramalama/cli.py")), None)
41
+ if syspath:
42
+ sys.path.insert(0, syspath)
43
+
44
+ add_site_packages_to_syspath('~/.local/pipx/venvs/*')
45
+ add_site_packages_to_syspath('/usr/local')
46
+ add_pipx_venvs_bin_to_path()
47
+ sys.path.insert(0, './')
48
+ try:
49
+ import ramalama
50
+ except:
51
+ print(f"ramalama module not found in sys.path: {sys.path}", file=sys.stderr)
52
+ raise
53
+
54
+ parser, args = ramalama.init_cli()
55
+
56
+ # if autocomplete doesn't exist, just do nothing, don't break
57
+ try:
58
+ import argcomplete
59
+ argcomplete.autocomplete(parser)
60
+ except Exception:
61
+ None
62
+
63
+ if args.version:
64
+ return ramalama.print_version(args)
65
+
66
+ def eprint(e, exit_code):
67
+ ramalama.perror("Error: " + str(e).strip("'\""))
68
+ sys.exit(exit_code)
69
+
70
+ # Process CLI
71
+ try:
72
+ args.func(args)
73
+ except ramalama.HelpException:
74
+ parser.print_help()
75
+ except AttributeError as e:
76
+ parser.print_usage()
77
+ print("ramalama: requires a subcommand")
78
+ if args.debug:
79
+ raise e
80
+ except IndexError as e:
81
+ eprint(e, errno.EINVAL)
82
+ except KeyError as e:
83
+ eprint(e, 1)
84
+ except NotImplementedError as e:
85
+ eprint(e, errno.ENOTSUP)
86
+ except subprocess.CalledProcessError as e:
87
+ eprint(e, e.returncode)
88
+ except KeyboardInterrupt:
89
+ sys.exit(0)
90
+ except ValueError as e:
91
+ eprint(e, errno.EINVAL)
92
+
93
+
94
+ if __name__ == "__main__":
95
+ main(sys.argv[1:])