llmstack-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. llmstack/__init__.py +3 -0
  2. llmstack/__main__.py +5 -0
  3. llmstack/cli/__init__.py +0 -0
  4. llmstack/cli/app.py +87 -0
  5. llmstack/cli/commands/__init__.py +0 -0
  6. llmstack/cli/commands/doctor.py +72 -0
  7. llmstack/cli/commands/down.py +25 -0
  8. llmstack/cli/commands/init.py +66 -0
  9. llmstack/cli/commands/logs.py +25 -0
  10. llmstack/cli/commands/status.py +45 -0
  11. llmstack/cli/commands/up.py +30 -0
  12. llmstack/cli/console.py +13 -0
  13. llmstack/config/__init__.py +4 -0
  14. llmstack/config/loader.py +44 -0
  15. llmstack/config/presets/__init__.py +11 -0
  16. llmstack/config/presets/agent.py +13 -0
  17. llmstack/config/presets/chat.py +14 -0
  18. llmstack/config/presets/rag.py +10 -0
  19. llmstack/config/schema.py +76 -0
  20. llmstack/core/__init__.py +0 -0
  21. llmstack/core/hardware.py +131 -0
  22. llmstack/core/health.py +23 -0
  23. llmstack/core/resolver.py +49 -0
  24. llmstack/core/stack.py +207 -0
  25. llmstack/docker/__init__.py +0 -0
  26. llmstack/docker/manager.py +134 -0
  27. llmstack/gateway/Dockerfile +16 -0
  28. llmstack/gateway/__init__.py +0 -0
  29. llmstack/gateway/main.py +52 -0
  30. llmstack/gateway/middleware/__init__.py +0 -0
  31. llmstack/gateway/middleware/auth.py +32 -0
  32. llmstack/gateway/middleware/metrics.py +115 -0
  33. llmstack/gateway/proxy.py +58 -0
  34. llmstack/gateway/routes/__init__.py +0 -0
  35. llmstack/gateway/routes/chat.py +27 -0
  36. llmstack/gateway/routes/embeddings.py +17 -0
  37. llmstack/gateway/routes/health.py +55 -0
  38. llmstack/gateway/routes/models.py +16 -0
  39. llmstack/plugins/__init__.py +0 -0
  40. llmstack/plugins/loader.py +5 -0
  41. llmstack/plugins/spec.py +20 -0
  42. llmstack/services/__init__.py +0 -0
  43. llmstack/services/base.py +65 -0
  44. llmstack/services/cache/__init__.py +0 -0
  45. llmstack/services/cache/redis.py +33 -0
  46. llmstack/services/embeddings/__init__.py +0 -0
  47. llmstack/services/embeddings/tei.py +49 -0
  48. llmstack/services/gateway/__init__.py +0 -0
  49. llmstack/services/gateway/service.py +47 -0
  50. llmstack/services/inference/__init__.py +0 -0
  51. llmstack/services/inference/ollama.py +60 -0
  52. llmstack/services/inference/vllm.py +57 -0
  53. llmstack/services/observe/__init__.py +0 -0
  54. llmstack/services/observe/prometheus.py +168 -0
  55. llmstack/services/registry.py +53 -0
  56. llmstack/services/vectordb/__init__.py +0 -0
  57. llmstack/services/vectordb/qdrant.py +33 -0
  58. llmstack_cli-0.1.0.dist-info/METADATA +252 -0
  59. llmstack_cli-0.1.0.dist-info/RECORD +62 -0
  60. llmstack_cli-0.1.0.dist-info/WHEEL +4 -0
  61. llmstack_cli-0.1.0.dist-info/entry_points.txt +2 -0
  62. llmstack_cli-0.1.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,168 @@
1
+ """Prometheus + Grafana observability services."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from typing import Any
7
+
8
+ from llmstack.config.schema import ObserveConfig
9
+ from llmstack.services.base import ServiceBase
10
+
11
+
12
+ # Prometheus config that scrapes the gateway /metrics endpoint
13
+ PROMETHEUS_CONFIG = """
14
+ global:
15
+ scrape_interval: 15s
16
+ evaluation_interval: 15s
17
+
18
+ scrape_configs:
19
+ - job_name: 'llmstack-gateway'
20
+ metrics_path: '/metrics'
21
+ static_configs:
22
+ - targets: ['llmstack-gateway:8000']
23
+ scrape_interval: 5s
24
+
25
+ - job_name: 'qdrant'
26
+ metrics_path: '/metrics'
27
+ static_configs:
28
+ - targets: ['llmstack-qdrant:6333']
29
+ scrape_interval: 15s
30
+ """
31
+
32
+ # Grafana datasource provisioning
33
+ GRAFANA_DATASOURCE = {
34
+ "apiVersion": 1,
35
+ "datasources": [{
36
+ "name": "Prometheus",
37
+ "type": "prometheus",
38
+ "access": "proxy",
39
+ "url": "http://llmstack-prometheus:9090",
40
+ "isDefault": True,
41
+ }],
42
+ }
43
+
44
+ # Grafana dashboard provisioning config
45
+ GRAFANA_DASHBOARD_PROVIDER = {
46
+ "apiVersion": 1,
47
+ "providers": [{
48
+ "name": "LLMStack",
49
+ "type": "file",
50
+ "options": {"path": "/var/lib/grafana/dashboards"},
51
+ }],
52
+ }
53
+
54
+ # Pre-built Grafana dashboard JSON
55
+ GRAFANA_DASHBOARD = {
56
+ "dashboard": {
57
+ "title": "LLMStack Overview",
58
+ "uid": "llmstack-overview",
59
+ "timezone": "browser",
60
+ "refresh": "10s",
61
+ "panels": [
62
+ {
63
+ "title": "Request Rate",
64
+ "type": "timeseries",
65
+ "gridPos": {"h": 8, "w": 12, "x": 0, "y": 0},
66
+ "targets": [{"expr": "rate(llmstack_requests_total[1m])", "legendFormat": "{{path}}"}],
67
+ },
68
+ {
69
+ "title": "Latency (p50 / p99)",
70
+ "type": "timeseries",
71
+ "gridPos": {"h": 8, "w": 12, "x": 12, "y": 0},
72
+ "targets": [
73
+ {"expr": "histogram_quantile(0.5, rate(llmstack_request_duration_seconds_bucket[5m]))", "legendFormat": "p50"},
74
+ {"expr": "histogram_quantile(0.99, rate(llmstack_request_duration_seconds_bucket[5m]))", "legendFormat": "p99"},
75
+ ],
76
+ },
77
+ {
78
+ "title": "Error Rate",
79
+ "type": "stat",
80
+ "gridPos": {"h": 4, "w": 6, "x": 0, "y": 8},
81
+ "targets": [{"expr": "sum(rate(llmstack_errors_total[5m]))"}],
82
+ },
83
+ {
84
+ "title": "Active Services",
85
+ "type": "stat",
86
+ "gridPos": {"h": 4, "w": 6, "x": 6, "y": 8},
87
+ "targets": [{"expr": "up"}],
88
+ },
89
+ {
90
+ "title": "Token Throughput",
91
+ "type": "timeseries",
92
+ "gridPos": {"h": 8, "w": 12, "x": 12, "y": 8},
93
+ "targets": [{"expr": "rate(llmstack_tokens_total[1m])", "legendFormat": "{{type}}"}],
94
+ },
95
+ ],
96
+ },
97
+ }
98
+
99
+
100
+ class PrometheusService(ServiceBase):
101
+ name = "prometheus"
102
+ category = "observe"
103
+
104
+ def __init__(self, config: ObserveConfig):
105
+ self.config = config
106
+ self.host_port = 9090
107
+
108
+ def container_spec(self) -> dict[str, Any]:
109
+ return {
110
+ "image": "prom/prometheus:latest",
111
+ "name": "llmstack-prometheus",
112
+ "ports": {"9090/tcp": self.host_port},
113
+ "command": [
114
+ "--config.file=/etc/prometheus/prometheus.yml",
115
+ f"--storage.tsdb.retention.time={self.config.retention}",
116
+ "--web.enable-lifecycle",
117
+ ],
118
+ "volumes": {
119
+ "llmstack_prometheus_config": {"bind": "/etc/prometheus", "mode": "rw"},
120
+ "llmstack_prometheus_data": {"bind": "/prometheus", "mode": "rw"},
121
+ },
122
+ "environment": {},
123
+ }
124
+
125
+ def health_url(self) -> str:
126
+ return f"http://localhost:{self.host_port}/-/healthy"
127
+
128
+ def get_config_yaml(self) -> str:
129
+ """Return the prometheus.yml content."""
130
+ return PROMETHEUS_CONFIG
131
+
132
+
133
+ class GrafanaService(ServiceBase):
134
+ name = "grafana"
135
+ category = "observe"
136
+
137
+ def __init__(self, config: ObserveConfig):
138
+ self.config = config
139
+ self.host_port = config.dashboard_port
140
+
141
+ def container_spec(self) -> dict[str, Any]:
142
+ return {
143
+ "image": "grafana/grafana:latest",
144
+ "name": "llmstack-grafana",
145
+ "ports": {"3000/tcp": self.host_port},
146
+ "environment": {
147
+ "GF_SECURITY_ADMIN_USER": "admin",
148
+ "GF_SECURITY_ADMIN_PASSWORD": "llmstack",
149
+ "GF_AUTH_ANONYMOUS_ENABLED": "true",
150
+ "GF_AUTH_ANONYMOUS_ORG_ROLE": "Viewer",
151
+ "GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH": "/var/lib/grafana/dashboards/llmstack.json",
152
+ },
153
+ "volumes": {
154
+ "llmstack_grafana_data": {"bind": "/var/lib/grafana", "mode": "rw"},
155
+ },
156
+ }
157
+
158
+ def health_url(self) -> str:
159
+ return f"http://localhost:{self.host_port}/api/health"
160
+
161
+ def get_datasource_json(self) -> str:
162
+ return json.dumps(GRAFANA_DATASOURCE, indent=2)
163
+
164
+ def get_dashboard_provider_json(self) -> str:
165
+ return json.dumps(GRAFANA_DASHBOARD_PROVIDER, indent=2)
166
+
167
+ def get_dashboard_json(self) -> str:
168
+ return json.dumps(GRAFANA_DASHBOARD, indent=2)
@@ -0,0 +1,53 @@
1
+ """Service registry — discovers built-in and plugin services."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from importlib.metadata import entry_points
6
+ from typing import Type
7
+
8
+ from llmstack.services.base import ServiceBase
9
+ from llmstack.services.inference.ollama import OllamaService
10
+ from llmstack.services.inference.vllm import VllmService
11
+ from llmstack.services.vectordb.qdrant import QdrantService
12
+ from llmstack.services.cache.redis import RedisService
13
+ from llmstack.services.embeddings.tei import TEIService
14
+
15
+
16
+ class ServiceRegistry:
17
+ """Discovers all built-in + plugin services."""
18
+
19
+ def __init__(self):
20
+ self._services: dict[str, Type[ServiceBase]] = {}
21
+ self._load_builtins()
22
+ self._load_plugins()
23
+
24
+ def _load_builtins(self) -> None:
25
+ for cls in [OllamaService, VllmService, QdrantService, RedisService, TEIService]:
26
+ self._services[cls.name] = cls
27
+
28
+ def _load_plugins(self) -> None:
29
+ try:
30
+ eps = entry_points(group="llmstack.services")
31
+ except TypeError:
32
+ # Python 3.11 compat
33
+ eps = entry_points().get("llmstack.services", [])
34
+
35
+ for ep in eps:
36
+ try:
37
+ cls = ep.load()
38
+ if hasattr(cls, "name"):
39
+ self._services[cls.name] = cls
40
+ except Exception:
41
+ pass
42
+
43
+ def get(self, name: str) -> Type[ServiceBase]:
44
+ if name not in self._services:
45
+ available = ", ".join(sorted(self._services.keys()))
46
+ raise KeyError(f"Unknown service '{name}'. Available: {available}")
47
+ return self._services[name]
48
+
49
+ def list_by_category(self, category: str) -> list[Type[ServiceBase]]:
50
+ return [s for s in self._services.values() if s.category == category]
51
+
52
+ def all_names(self) -> list[str]:
53
+ return sorted(self._services.keys())
File without changes
@@ -0,0 +1,33 @@
1
+ """Qdrant vector database service."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from llmstack.config.schema import VectorDBConfig
8
+ from llmstack.services.base import ServiceBase
9
+
10
+
11
+ class QdrantService(ServiceBase):
12
+ name = "qdrant"
13
+ category = "vectordb"
14
+
15
+ def __init__(self, config: VectorDBConfig):
16
+ self.config = config
17
+
18
+ def container_spec(self) -> dict[str, Any]:
19
+ return {
20
+ "image": "qdrant/qdrant:latest",
21
+ "name": "llmstack-qdrant",
22
+ "ports": {
23
+ "6333/tcp": self.config.port,
24
+ "6334/tcp": self.config.port + 1,
25
+ },
26
+ "volumes": {
27
+ "llmstack_qdrant_data": {"bind": "/qdrant/storage", "mode": "rw"},
28
+ },
29
+ "environment": {},
30
+ }
31
+
32
+ def health_url(self) -> str:
33
+ return f"http://localhost:{self.config.port}/healthz"
@@ -0,0 +1,252 @@
1
+ Metadata-Version: 2.4
2
+ Name: llmstack-cli
3
+ Version: 0.1.0
4
+ Summary: One command. Full LLM stack. Zero config.
5
+ Author: mara-werils
6
+ License-Expression: Apache-2.0
7
+ License-File: LICENSE
8
+ Keywords: ai,cli,docker,inference,llm,openai,rag
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Environment :: Console
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: Apache Software License
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
+ Requires-Python: >=3.11
17
+ Requires-Dist: docker>=7.0
18
+ Requires-Dist: httpx>=0.27
19
+ Requires-Dist: psutil>=5.9
20
+ Requires-Dist: pydantic>=2.0
21
+ Requires-Dist: pyyaml>=6.0
22
+ Requires-Dist: rich>=13.0
23
+ Requires-Dist: typer>=0.12
24
+ Provides-Extra: dev
25
+ Requires-Dist: fastapi>=0.115; extra == 'dev'
26
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
27
+ Requires-Dist: pytest>=8.0; extra == 'dev'
28
+ Requires-Dist: ruff>=0.4; extra == 'dev'
29
+ Requires-Dist: starlette>=0.40; extra == 'dev'
30
+ Provides-Extra: gateway
31
+ Requires-Dist: fastapi>=0.115; extra == 'gateway'
32
+ Requires-Dist: starlette>=0.40; extra == 'gateway'
33
+ Requires-Dist: uvicorn[standard]>=0.30; extra == 'gateway'
34
+ Description-Content-Type: text/markdown
35
+
36
+ <p align="center">
37
+ <h1 align="center">llmstack</h1>
38
+ <p align="center"><strong>One command. Full LLM stack. Zero config.</strong></p>
39
+ <p align="center">Stop wiring Docker containers. Start building AI apps.</p>
40
+ </p>
41
+
42
+ <p align="center">
43
+ <a href="https://pypi.org/project/llmstack-cli/"><img src="https://img.shields.io/pypi/v/llmstack-cli?color=blue" alt="PyPI"></a>
44
+ <a href="https://github.com/mara-werils/llmstack/actions/workflows/ci.yml"><img src="https://github.com/mara-werils/llmstack/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
45
+ <a href="https://github.com/mara-werils/llmstack/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-Apache%202.0-green" alt="License"></a>
46
+ <a href="https://www.python.org/"><img src="https://img.shields.io/badge/python-3.11+-blue" alt="Python"></a>
47
+ </p>
48
+
49
+ ---
50
+
51
+ **llmstack** spins up a production-grade LLM stack locally with a single command. It auto-detects your hardware, picks the optimal inference backend, and wires everything together.
52
+
53
+ ```bash
54
+ pip install llmstack-cli
55
+ llmstack init
56
+ llmstack up
57
+ ```
58
+
59
+ That's it. You now have a full LLM API running locally.
60
+
61
+ ## Architecture
62
+
63
+ ```
64
+ llmstack up
65
+ |
66
+ +---------v----------+
67
+ | Hardware Detect |
68
+ | NVIDIA / Apple / CPU|
69
+ +---------+----------+
70
+ |
71
+ +-------+-------+-------+-------+
72
+ | | | | |
73
+ +----v--+ +--v---+ +v-----+ +v----+ +v--------+
74
+ |Qdrant | |Redis | |Ollama| | TEI | | Gateway |
75
+ |Vector | |Cache | | or | |Embed| | FastAPI |
76
+ | DB | | | | vLLM | | | | OpenAI |
77
+ +-------+ +------+ +------+ +-----+ |compatible|
78
+ :6333 :6379 :11434 :8002 +----+-----+
79
+ |:8000
80
+ +----v-----+
81
+ |Prometheus |
82
+ | + Grafana |
83
+ +----------+
84
+ :8080
85
+ ```
86
+
87
+ ## What you get
88
+
89
+ | Layer | Service | Default | Port |
90
+ |-------|---------|---------|------|
91
+ | Inference | Ollama / vLLM (auto) | llama3.2 | 11434 |
92
+ | Embeddings | TEI / Ollama (auto) | bge-m3 | 8002 |
93
+ | Vector DB | Qdrant | - | 6333 |
94
+ | Cache | Redis | 256MB LRU | 6379 |
95
+ | API Gateway | FastAPI (OpenAI-compatible) | auth + rate limit | 8000 |
96
+ | Dashboard | Grafana + Prometheus | pre-built panels | 8080 |
97
+
98
+ ## How it works
99
+
100
+ ```
101
+ llmstack init # Detects hardware, generates llmstack.yaml
102
+ # Picks optimal backend: vLLM for NVIDIA 16GB+, Ollama otherwise
103
+
104
+ llmstack up # Boots services in order with health checks:
105
+ # Qdrant -> Redis -> Inference -> Embeddings -> Gateway -> Metrics
106
+
107
+ llmstack status # Shows health of all running services
108
+ llmstack logs ollama # Stream inference logs
109
+ llmstack down # Stops everything
110
+ ```
111
+
112
+ ### Use the API
113
+
114
+ ```bash
115
+ curl http://localhost:8000/v1/chat/completions \
116
+ -H "Authorization: Bearer YOUR_KEY" \
117
+ -H "Content-Type: application/json" \
118
+ -d '{"model":"llama3.2","messages":[{"role":"user","content":"Hello!"}]}'
119
+ ```
120
+
121
+ Works with **any OpenAI-compatible client**: LangChain, LlamaIndex, Vercel AI SDK, openai-python.
122
+
123
+ ```python
124
+ from openai import OpenAI
125
+
126
+ client = OpenAI(base_url="http://localhost:8000/v1", api_key="YOUR_KEY")
127
+ response = client.chat.completions.create(
128
+ model="llama3.2",
129
+ messages=[{"role": "user", "content": "Explain quantum computing"}]
130
+ )
131
+ ```
132
+
133
+ ## Auto hardware detection
134
+
135
+ | Your hardware | Backend | Why |
136
+ |---|---|---|
137
+ | NVIDIA GPU 16GB+ VRAM | vLLM | Max throughput, PagedAttention |
138
+ | NVIDIA GPU <16GB | Ollama | Lower memory overhead |
139
+ | Apple Silicon (M1-M4) | Ollama | Metal acceleration |
140
+ | CPU only | Ollama | GGUF quantized models |
141
+
142
+ ## Presets
143
+
144
+ ```bash
145
+ llmstack init --preset chat # Minimal: inference + cache + gateway
146
+ llmstack init --preset rag # + Qdrant + embeddings for RAG apps
147
+ llmstack init --preset agent # 70B model + 16K context + longer timeouts
148
+ ```
149
+
150
+ ## Configuration
151
+
152
+ One file: `llmstack.yaml`
153
+
154
+ ```yaml
155
+ version: "1"
156
+
157
+ models:
158
+ chat:
159
+ name: llama3.2
160
+ backend: auto # auto | ollama | vllm
161
+ context_length: 8192
162
+ embeddings:
163
+ name: bge-m3
164
+
165
+ services:
166
+ vectors:
167
+ provider: qdrant
168
+ port: 6333
169
+ cache:
170
+ provider: redis
171
+ max_memory: 256mb
172
+
173
+ gateway:
174
+ port: 8000
175
+ auth: api_key
176
+ rate_limit: 100/min
177
+ cors: ["*"]
178
+
179
+ observe:
180
+ metrics: true
181
+ dashboard_port: 8080
182
+ ```
183
+
184
+ ## CLI
185
+
186
+ | Command | Description |
187
+ |---------|-------------|
188
+ | `llmstack init [--preset]` | Create config with smart defaults |
189
+ | `llmstack up [--attach]` | Start all services |
190
+ | `llmstack down [--volumes]` | Stop and clean up |
191
+ | `llmstack status` | Health check all services |
192
+ | `llmstack logs <service>` | Stream service logs |
193
+ | `llmstack doctor` | Diagnose system issues |
194
+
195
+ ## Observability
196
+
197
+ When `observe.metrics: true`, llmstack boots Prometheus + Grafana with a pre-built dashboard:
198
+
199
+ - **Request rate** per endpoint
200
+ - **Latency** p50 / p99 histograms
201
+ - **Token throughput** (input + output)
202
+ - **Error rate** (4xx / 5xx)
203
+ - **Service health** (up/down)
204
+
205
+ Access at `http://localhost:8080` (login: admin / llmstack)
206
+
207
+ ## Plugins
208
+
209
+ Extend llmstack with new backends via pip:
210
+
211
+ ```bash
212
+ pip install llmstack-cli-plugin-chromadb
213
+ # Now: vectors.provider: chromadb in llmstack.yaml
214
+ ```
215
+
216
+ Create your own: implement `ServiceBase`, register via entry_points. See [CONTRIBUTING.md](CONTRIBUTING.md).
217
+
218
+ ## Why llmstack?
219
+
220
+ | | llmstack | Ollama | Harbor | AnythingLLM | LiteLLM |
221
+ |---|---|---|---|---|---|
222
+ | One-command full stack | Yes | No (inference only) | Partial | Partial | No (proxy only) |
223
+ | Auto hardware detection | Yes | No | No | No | No |
224
+ | OpenAI-compatible API | Yes | Yes | Varies | No | Yes |
225
+ | Built-in vector DB | Yes | No | Config needed | Bundled | No |
226
+ | Built-in embeddings | Yes | No | No | Bundled | No |
227
+ | Caching (Redis) | Yes | No | No | No | No |
228
+ | Auth + rate limiting | Yes | No | No | Yes | Yes |
229
+ | Observability dashboard | Yes | No | Partial | No | Partial |
230
+ | Plugin ecosystem | Yes | No | No | No | No |
231
+ | SSE streaming | Yes | Yes | Yes | Yes | Yes |
232
+
233
+ ## Tech stack
234
+
235
+ - **CLI**: [Typer](https://typer.tiangolo.com/) + [Rich](https://rich.readthedocs.io/)
236
+ - **Config**: [Pydantic v2](https://docs.pydantic.dev/)
237
+ - **Gateway**: [FastAPI](https://fastapi.tiangolo.com/)
238
+ - **Containers**: [Docker SDK for Python](https://docker-py.readthedocs.io/)
239
+ - **Metrics**: Prometheus + Grafana
240
+
241
+ ## Requirements
242
+
243
+ - Python 3.11+
244
+ - Docker
245
+
246
+ ## Contributing
247
+
248
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup and guidelines.
249
+
250
+ ## License
251
+
252
+ Apache-2.0
@@ -0,0 +1,62 @@
1
+ llmstack/__init__.py,sha256=eeHMsyABNyNRnVgYQqyU_R4RZpbJ7Kz3HzPWsisW5zo,84
2
+ llmstack/__main__.py,sha256=XhknKG6tlsgyslTC57MpF3IGUu_cIo6G5rSIz4kxuyc,86
3
+ llmstack/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ llmstack/cli/app.py,sha256=qcrUcYkJApyX2JYVCFsu2wXZx81b0ma5aBXRNVNukSw,2378
5
+ llmstack/cli/console.py,sha256=qnwJ2g39BwXhA_WhGtJW5E1ZdbTau4npG9j9mBo2zX0,259
6
+ llmstack/cli/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ llmstack/cli/commands/doctor.py,sha256=71PM5uG-a8A4nNf4ywMXmVIL8w6W5MwLgLco9DxbnG8,2440
8
+ llmstack/cli/commands/down.py,sha256=51XiP8n_Nq3oX4gbeLd_vCvXB1OP7dZEi3392eYgSJM,752
9
+ llmstack/cli/commands/init.py,sha256=1olPvaPLapdSkuSBVIum4kzXh2BMtkRw-HNZKs1slKc,2311
10
+ llmstack/cli/commands/logs.py,sha256=CGZO9kFiqc224LtpYH22uf0k5E26sCmAucrozjtb7BQ,840
11
+ llmstack/cli/commands/status.py,sha256=ahnhKsw0C6-x2umOVUNsCnTU5XQS-nGIjKOFxEZVjdA,1382
12
+ llmstack/cli/commands/up.py,sha256=plZvcqZfA5eIVIDu8n6iMIum3jN-QAhhHfLR4JRFomg,854
13
+ llmstack/config/__init__.py,sha256=7D4wkOUIE-BmuMJPBUg_d_g3QRpSKEvHC3mfdxBb_Zw,136
14
+ llmstack/config/loader.py,sha256=d4XnRBMoGGF7tvQRMlbFQE-8za2SBkElEdrjTXvLAVk,1375
15
+ llmstack/config/schema.py,sha256=p9dwVFSoE0tECuUzvtAoRKv3aLPY8bdpT1ouf5hsTCg,2133
16
+ llmstack/config/presets/__init__.py,sha256=Z1JX3ZGvQHWZLYbY7yb-ntcg9_-wB4AMmYiXO1epzZs,317
17
+ llmstack/config/presets/agent.py,sha256=VOIg088i_CR9Dv7JXPgQvQ84xSUE0IJ1V-38XyJ1r0Q,449
18
+ llmstack/config/presets/chat.py,sha256=OoJ_MqSFlbPa_nUeAvyUU3O1OsrfBW95Gw9bXjvoV38,385
19
+ llmstack/config/presets/rag.py,sha256=_aisSEegOHDCaieNKamoHJxdW7CG2N9T48qEWQjiG4I,340
20
+ llmstack/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
+ llmstack/core/hardware.py,sha256=dO2VRD2OZ4BnDETkHBRw1YeTERz6XhFByQpy8z7-jWc,3844
22
+ llmstack/core/health.py,sha256=BxH-Y1DsoDFc0zNxYVTdYEmKTC1dIWiBxhCLDLbUyr0,706
23
+ llmstack/core/resolver.py,sha256=MPUNLAoe0U_htLtMy789RbBir6xljWnw9p0AGOHYrAc,1633
24
+ llmstack/core/stack.py,sha256=v9kPijvUm5IF-B608b0fUw71wDv14vg0K1ovffSuHf4,8310
25
+ llmstack/docker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
+ llmstack/docker/manager.py,sha256=OzfY8H46dEw7acSe8sUglayo4hgxO0tj3SxEC47Xmmg,4728
27
+ llmstack/gateway/Dockerfile,sha256=Mtiy9fcfK1DikZ6v-I4zBI_Op1pABVm1nYZ-LT8J9fg,308
28
+ llmstack/gateway/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
+ llmstack/gateway/main.py,sha256=PYT48w4MuBNCqJ3YTq27OF_yC0dRRpnC61bpos1aveY,1483
30
+ llmstack/gateway/proxy.py,sha256=WLErhSUICVK1XWu6Qt9v5gBWYtwalP3LKJE_D5Dj6C4,2077
31
+ llmstack/gateway/middleware/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
+ llmstack/gateway/middleware/auth.py,sha256=k49XxcGuoZEEf_upkvrbhev5K0Y9ZZiyvJU8ibx_-Sw,1088
33
+ llmstack/gateway/middleware/metrics.py,sha256=nFeEyF5kBDbd9PCgMrt1-FAPZxXiLS4WMvP8tEnq-io,4387
34
+ llmstack/gateway/routes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
+ llmstack/gateway/routes/chat.py,sha256=qVbXYRdiQihawMF0qxzVeL7Ud5O-UyPjQjmOwEbN4H8,837
36
+ llmstack/gateway/routes/embeddings.py,sha256=H8FCv0cPsgwAMejC6ytCHXwy88vGktIMH_sKimEMoUk,453
37
+ llmstack/gateway/routes/health.py,sha256=K4EGOMVVMvOfAJDGEOFrVOvnT5OiZkH2U_u1WqdWP08,1449
38
+ llmstack/gateway/routes/models.py,sha256=0jBHyop_HJyArvKXKOZKZwzj6rVCeuLZhj6AdIR8Hwc,353
39
+ llmstack/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
+ llmstack/plugins/loader.py,sha256=cg_aldCDSNJWM_Es7iUI-1ycwJwhzNLf9G0CYJrw8Ow,152
41
+ llmstack/plugins/spec.py,sha256=nteKuPgDjok8b1rSKsN6rFVXGjhYwwfuRpiubKEIfqc,546
42
+ llmstack/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
+ llmstack/services/base.py,sha256=qe2c6uZbSwaaItvkEq4T9kxujOGEgS2KL0giKrea1g8,1888
44
+ llmstack/services/registry.py,sha256=pbDscM_YezzKMikuHqbO7cuekmiP84IyOKmwctkeAow,1855
45
+ llmstack/services/cache/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
+ llmstack/services/cache/redis.py,sha256=_6_JlFxFEvVutkmFwo-qzola6heODG_fxrfGVt0x-1k,898
47
+ llmstack/services/embeddings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
+ llmstack/services/embeddings/tei.py,sha256=GNJL6PxQVUu7FQEzR0jj1N2jDCkhZoO1ezKLcEdo5EI,1502
49
+ llmstack/services/gateway/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
+ llmstack/services/gateway/service.py,sha256=4PSED82DHUlkqP4FxL-lNxY5kO1E-ODTN-x6msEY3HE,1580
51
+ llmstack/services/inference/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
+ llmstack/services/inference/ollama.py,sha256=0A-4V3Ks3fiyQwnJkBhLtj9oIvKAYPK-QgsbcAsuqFI,1814
53
+ llmstack/services/inference/vllm.py,sha256=fhi7xa2RitNG0FNnhypUYfjEemvylnnxOIksRfBzJcE,1607
54
+ llmstack/services/observe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
+ llmstack/services/observe/prometheus.py,sha256=6NG8yDY2JQqYCWtSldb6LEKbOqLdHiPB9u9G0c2dJpw,5343
56
+ llmstack/services/vectordb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
57
+ llmstack/services/vectordb/qdrant.py,sha256=zIOqf5km-4pItXOW8Y-qdxHLiU9lFfhDz33nBrTgPhM,900
58
+ llmstack_cli-0.1.0.dist-info/METADATA,sha256=aWalxTiNMqv0lz5GjICnaOhTC1QgYXRR8Tq7Jyk4LQE,8128
59
+ llmstack_cli-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
60
+ llmstack_cli-0.1.0.dist-info/entry_points.txt,sha256=i2BIacwqAqUaN1yAe-MaJZ22unHqAAUkTopk9M_iZPo,50
61
+ llmstack_cli-0.1.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
62
+ llmstack_cli-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ llmstack = llmstack.cli.app:app