maxllm-gate 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. maxllm_gate-0.2.0/.env.example +60 -0
  2. maxllm_gate-0.2.0/.github/copilot-instructions.md +343 -0
  3. maxllm_gate-0.2.0/.gitignore +77 -0
  4. maxllm_gate-0.2.0/Dockerfile +24 -0
  5. maxllm_gate-0.2.0/LICENSE +21 -0
  6. maxllm_gate-0.2.0/MAXLLM.py +0 -0
  7. maxllm_gate-0.2.0/PKG-INFO +771 -0
  8. maxllm_gate-0.2.0/PYPI_UPLOAD.md +117 -0
  9. maxllm_gate-0.2.0/README.md +717 -0
  10. maxllm_gate-0.2.0/config.example.yaml +60 -0
  11. maxllm_gate-0.2.0/docker-compose.yml +62 -0
  12. maxllm_gate-0.2.0/examples/basic_usage.py +80 -0
  13. maxllm_gate-0.2.0/examples/concurrent_requests.py +124 -0
  14. maxllm_gate-0.2.0/examples/multi_key_config.py +96 -0
  15. maxllm_gate-0.2.0/examples/priority_requests.py +140 -0
  16. maxllm_gate-0.2.0/prometheus.yml +9 -0
  17. maxllm_gate-0.2.0/pyproject.toml +97 -0
  18. maxllm_gate-0.2.0/requirements.txt +22 -0
  19. maxllm_gate-0.2.0/scripts/benchmark.py +123 -0
  20. maxllm_gate-0.2.0/scripts/simulate_load.py +276 -0
  21. maxllm_gate-0.2.0/src/llm_scheduler/__init__.py +8 -0
  22. maxllm_gate-0.2.0/src/llm_scheduler/api/__init__.py +6 -0
  23. maxllm_gate-0.2.0/src/llm_scheduler/api/dependencies.py +10 -0
  24. maxllm_gate-0.2.0/src/llm_scheduler/api/routes.py +275 -0
  25. maxllm_gate-0.2.0/src/llm_scheduler/api/schemas.py +135 -0
  26. maxllm_gate-0.2.0/src/llm_scheduler/config.py +117 -0
  27. maxllm_gate-0.2.0/src/llm_scheduler/core/__init__.py +8 -0
  28. maxllm_gate-0.2.0/src/llm_scheduler/core/dispatcher.py +225 -0
  29. maxllm_gate-0.2.0/src/llm_scheduler/core/queue_manager.py +251 -0
  30. maxllm_gate-0.2.0/src/llm_scheduler/core/scheduler.py +236 -0
  31. maxllm_gate-0.2.0/src/llm_scheduler/core/token_estimator.py +201 -0
  32. maxllm_gate-0.2.0/src/llm_scheduler/main.py +86 -0
  33. maxllm_gate-0.2.0/src/llm_scheduler/models/__init__.py +6 -0
  34. maxllm_gate-0.2.0/src/llm_scheduler/models/provider.py +103 -0
  35. maxllm_gate-0.2.0/src/llm_scheduler/models/request.py +101 -0
  36. maxllm_gate-0.2.0/src/llm_scheduler/observability/__init__.py +6 -0
  37. maxllm_gate-0.2.0/src/llm_scheduler/observability/logging.py +65 -0
  38. maxllm_gate-0.2.0/src/llm_scheduler/observability/metrics.py +92 -0
  39. maxllm_gate-0.2.0/src/llm_scheduler/rate_limiting/__init__.py +7 -0
  40. maxllm_gate-0.2.0/src/llm_scheduler/rate_limiting/key_manager.py +252 -0
  41. maxllm_gate-0.2.0/src/llm_scheduler/rate_limiting/token_bucket.py +152 -0
  42. maxllm_gate-0.2.0/src/llm_scheduler/rate_limiting/tracker.py +281 -0
  43. maxllm_gate-0.2.0/src/llm_scheduler/strategies/__init__.py +7 -0
  44. maxllm_gate-0.2.0/src/llm_scheduler/strategies/base.py +56 -0
  45. maxllm_gate-0.2.0/src/llm_scheduler/strategies/fallback.py +52 -0
  46. maxllm_gate-0.2.0/src/llm_scheduler/strategies/least_utilized.py +30 -0
  47. maxllm_gate-0.2.0/src/llm_scheduler/strategies/round_robin.py +29 -0
  48. maxllm_gate-0.2.0/src/llm_scheduler/strategies/token_aware.py +46 -0
  49. maxllm_gate-0.2.0/src/llm_scheduler/utils/__init__.py +6 -0
  50. maxllm_gate-0.2.0/src/llm_scheduler/utils/retry.py +136 -0
  51. maxllm_gate-0.2.0/src/llm_scheduler/utils/time_utils.py +115 -0
  52. maxllm_gate-0.2.0/src/maxllm/__init__.py +77 -0
  53. maxllm_gate-0.2.0/src/maxllm/client.py +598 -0
  54. maxllm_gate-0.2.0/src/maxllm/config.py +181 -0
  55. maxllm_gate-0.2.0/src/maxllm/rate_limiter.py +432 -0
  56. maxllm_gate-0.2.0/src/maxllm/redis_backend.py +495 -0
  57. maxllm_gate-0.2.0/src/maxllm/scheduler.py +559 -0
  58. maxllm_gate-0.2.0/src/maxllm/validation.py +183 -0
  59. maxllm_gate-0.2.0/tests/__init__.py +1 -0
  60. maxllm_gate-0.2.0/tests/conftest.py +96 -0
  61. maxllm_gate-0.2.0/tests/mocks/__init__.py +133 -0
  62. maxllm_gate-0.2.0/tests/test_api.py +92 -0
  63. maxllm_gate-0.2.0/tests/test_scheduler.py +136 -0
  64. maxllm_gate-0.2.0/tests/test_sdk.py +509 -0
  65. maxllm_gate-0.2.0/tests/test_strategies.py +151 -0
  66. maxllm_gate-0.2.0/tests/test_token_bucket.py +124 -0
  67. maxllm_gate-0.2.0/tests/test_token_estimator.py +113 -0
@@ -0,0 +1,60 @@
1
+ # LLM Rate Limit Scheduler Configuration
2
+
3
+ # Server settings
4
+ HOST=0.0.0.0
5
+ PORT=8000
6
+ DEBUG=false
7
+ LOG_LEVEL=INFO
8
+
9
+ # API Keys Configuration (JSON format)
10
+ # Format: {"key_id": {"api_key": "...", "provider": "...", "tpm_limit": ..., "rpm_limit": ...}}
11
+ API_KEYS_CONFIG='{
12
+ "groq-1": {
13
+ "api_key": "gsk_your_groq_key_1",
14
+ "provider": "groq",
15
+ "models": ["llama-3.1-70b-versatile", "mixtral-8x7b-32768"],
16
+ "tpm_limit": 30000,
17
+ "rpm_limit": 30
18
+ },
19
+ "groq-2": {
20
+ "api_key": "gsk_your_groq_key_2",
21
+ "provider": "groq",
22
+ "models": ["llama-3.1-70b-versatile", "mixtral-8x7b-32768"],
23
+ "tpm_limit": 30000,
24
+ "rpm_limit": 30
25
+ },
26
+ "openrouter-1": {
27
+ "api_key": "sk-or-your_openrouter_key",
28
+ "provider": "openrouter",
29
+ "models": ["anthropic/claude-3-haiku", "meta-llama/llama-3-70b-instruct"],
30
+ "tpm_limit": 100000,
31
+ "rpm_limit": 200
32
+ },
33
+ "openai-1": {
34
+ "api_key": "sk-your_openai_key",
35
+ "provider": "openai",
36
+ "models": ["gpt-4o-mini", "gpt-4o"],
37
+ "tpm_limit": 90000,
38
+ "rpm_limit": 500
39
+ }
40
+ }'
41
+
42
+ # Default scheduling strategy: least_utilized | round_robin | token_aware
43
+ DEFAULT_STRATEGY=least_utilized
44
+
45
+ # Token estimation settings
46
+ DEFAULT_MAX_TOKENS=1024
47
+ TOKEN_ESTIMATION_BUFFER=1.1
48
+
49
+ # Retry settings
50
+ MAX_RETRIES=3
51
+ RETRY_BASE_DELAY=1.0
52
+ RETRY_MAX_DELAY=60.0
53
+
54
+ # Queue settings
55
+ MAX_QUEUE_SIZE=10000
56
+ DEFAULT_PRIORITY=medium
57
+
58
+ # Redis settings (optional, for production)
59
+ # REDIS_URL=redis://localhost:6379/0
60
+ # USE_REDIS_QUEUE=false
@@ -0,0 +1,343 @@
1
+ # MAXLLM Copilot Instructions
2
+
3
+ ## Project Overview
4
+
5
+ MAXLLM is a production-ready LLM client that sits on top of LiteLLM, providing intelligent rate limiting, smart routing, and distributed state support. It manages multiple API keys across providers (OpenAI, Groq, OpenRouter, etc.) to maximize throughput and prevent 429 errors.
6
+
7
+ ### Architecture
8
+
9
+ The project has two main components:
10
+
11
+ 1. **SDK Client (`src/maxllm/`)** - Simple Python client library for end users
12
+ 2. **Scheduler Server (`src/llm_scheduler/`)** - Optional FastAPI gateway with advanced scheduling
13
+
14
+ **Request Flow:**
15
+ ```
16
+ User → MAXLLM Client → Scheduler → Rate Limiter → LiteLLM → Provider API
17
+
18
+ Queue Manager (if capacity exhausted)
19
+ ```
20
+
21
+ **Core Components:**
22
+ - `llm_scheduler/core/scheduler.py` - Main scheduling engine that routes requests
23
+ - `llm_scheduler/rate_limiting/token_bucket.py` - Token bucket algorithm for rate limiting
24
+ - `llm_scheduler/strategies/` - Routing strategies (least_utilized, round_robin, token_aware, balanced)
25
+ - `maxllm/client.py` - User-facing SDK (sync/async)
26
+ - `maxllm/scheduler.py` - SDK's scheduler (simplified version for client use)
27
+
28
+ ### Key Concepts
29
+
30
+ **Dual Package Structure:**
31
+ - `maxllm` - The SDK package that users import (`from maxllm import MAXLLM`)
32
+ - `llm_scheduler` - Server/API package for FastAPI gateway mode
33
+ - Both packages are in `src/` and installed together via `pyproject.toml`
34
+
35
+ **Rate Limiting Philosophy:**
36
+ - Never blindly hit 429 errors
37
+ - Estimate tokens BEFORE making requests (using tiktoken)
38
+ - Check ALL available keys before deciding to wait
39
+ - Use token bucket algorithm for TPM/RPM tracking
40
+ - Defer execution when capacity exhausted (queuing instead of failing)
41
+
42
+ **Routing Strategies:**
43
+ - `least_utilized` - Routes to key with most available capacity
44
+ - `round_robin` - Cycles through keys evenly
45
+ - `token_aware` - Prioritizes keys that can handle the request size
46
+ - `balanced` (NEW) - Weighted scoring: utilization (40%), latency (35%), errors (15%), freshness (10%)
47
+
48
+ ## Build, Test, and Lint Commands
49
+
50
+ ### Installation
51
+ ```bash
52
+ # Development setup
53
+ pip install -e ".[dev,all]"
54
+
55
+ # Individual features
56
+ pip install -e ".[server]" # FastAPI server mode
57
+ pip install -e ".[yaml]" # YAML config support
58
+ pip install -e ".[redis]" # Redis backend
59
+ ```
60
+
61
+ ### Testing
62
+ ```bash
63
+ # Run all tests
64
+ pytest
65
+
66
+ # Run with coverage
67
+ pytest --cov=src --cov-report=html
68
+
69
+ # Run specific test file
70
+ pytest tests/test_sdk.py
71
+
72
+ # Run single test
73
+ pytest tests/test_sdk.py::test_chat_basic
74
+
75
+ # Run async tests only
76
+ pytest -k "asyncio"
77
+ ```
78
+
79
+ ### Linting
80
+ ```bash
81
+ # Run ruff linter
82
+ ruff check src/ tests/
83
+
84
+ # Auto-fix issues
85
+ ruff check --fix src/ tests/
86
+
87
+ # Type checking
88
+ mypy src/
89
+ ```
90
+
91
+ ### Running the Server
92
+ ```bash
93
+ # Start FastAPI server (requires [server] extras)
94
+ maxllm-server
95
+
96
+ # Or with uvicorn directly
97
+ uvicorn llm_scheduler.main:app --host 0.0.0.0 --port 8000
98
+
99
+ # With Docker
100
+ docker-compose up
101
+
102
+ # Check health
103
+ curl http://localhost:8000/health
104
+ ```
105
+
106
+ ### Running Examples
107
+ ```bash
108
+ python examples/basic_usage.py
109
+ python examples/concurrent_requests.py
110
+ python examples/multi_key_config.py
111
+ ```
112
+
113
+ ## Key Conventions
114
+
115
+ ### Config Management
116
+
117
+ **Two config systems coexist:**
118
+ 1. SDK Config (`maxllm/config.py`) - Simple YAML/dict for client library
119
+ 2. Server Config (`llm_scheduler/config.py`) - Pydantic Settings for FastAPI app
120
+
121
+ Both use similar structure but serve different purposes. Don't confuse them when making changes.
122
+
123
+ **Config Loading Priority:**
124
+ ```python
125
+ # SDK client
126
+ MAXLLM.from_config("config.yaml") # YAML file
127
+ MAXLLM.from_env() # Environment variables
128
+ MAXLLM(keys=[...]) # Direct dict
129
+
130
+ # Server uses Pydantic Settings
131
+ settings.get_api_keys() # Reads from env vars or config
132
+ ```
133
+
134
+ ### Async/Sync Duality
135
+
136
+ The SDK provides both sync (`MAXLLM`) and async (`MAXLLMAsync`) clients. Key patterns:
137
+
138
+ - Async is preferred for production/high-throughput scenarios
139
+ - Sync wrapper uses `asyncio.run()` internally
140
+ - Both share the same core logic in `scheduler.py` and `rate_limiter.py`
141
+ - Tests use `@pytest.mark.asyncio` for async code
142
+
143
+ **Implementation pattern:**
144
+ ```python
145
+ # Internal methods are async
146
+ async def _execute_request(...):
147
+ ...
148
+
149
+ # Public API provides both
150
+ def chat(self, ...): # Sync wrapper
151
+ return asyncio.run(self._execute_request(...))
152
+
153
+ async def chat(self, ...): # Async version
154
+ return await self._execute_request(...)
155
+ ```
156
+
157
+ ### Token Estimation
158
+
159
+ Token counting happens BEFORE requests to avoid hitting rate limits:
160
+
161
+ ```python
162
+ # src/llm_scheduler/core/token_estimator.py
163
+ estimated_tokens = token_estimator.estimate(messages, max_tokens)
164
+ ```
165
+
166
+ - Uses tiktoken for accurate counts
167
+ - Adds buffer (default 10%) for safety margin
168
+ - Cached encoders per model to avoid repeated initialization
169
+ - Estimation errors are conservative (overestimate to be safe)
170
+
171
+ ### Strategy Selection
172
+
173
+ Strategies are selected by name in config and resolved via registry:
174
+
175
+ ```python
176
+ # src/llm_scheduler/strategies/__init__.py
177
+ strategy = StrategyRegistry.get(strategy_name)
178
+ selected_key = strategy.select(candidates, estimated_tokens)
179
+ ```
180
+
181
+ When adding new strategies:
182
+ 1. Create class in `strategies/` that extends `SchedulingStrategy`
183
+ 2. Register in `StrategyRegistry`
184
+ 3. Add to config validation in `config.py`
185
+ 4. Add tests in `test_strategies.py`
186
+
187
+ ### Error Handling
188
+
189
+ **Retry Logic:**
190
+ - Transient failures (network, timeout) → automatic retry with exponential backoff
191
+ - Rate limit hits (429) → should never happen (that's the point!)
192
+ - Auth failures (401) → immediate fail, no retry
193
+ - Model not found (404) → immediate fail, no retry
194
+
195
+ **Key Health Tracking:**
196
+ - Each key tracks error rate and latency
197
+ - Strategies can use health metrics for routing decisions
198
+ - Unhealthy keys are automatically deprioritized
199
+ - See `llm_scheduler/rate_limiting/tracker.py`
200
+
201
+ ### Testing Patterns
202
+
203
+ **Fixtures in conftest.py:**
204
+ - `mock_config` - Test config with fake keys
205
+ - `key_manager` - Pre-configured KeyManager
206
+ - `scheduler` - Running scheduler instance
207
+ - `sample_messages` - Standard test messages
208
+
209
+ **Mocking LiteLLM:**
210
+ ```python
211
+ @patch("litellm.acompletion")
212
+ async def test_something(mock_completion):
213
+ mock_completion.return_value = AsyncMock(...)
214
+ # Test code
215
+ ```
216
+
217
+ **Testing Rate Limits:**
218
+ Use `TokenBucket` directly to test token bucket logic without full scheduler overhead.
219
+
220
+ ### Redis Backend (Optional)
221
+
222
+ For distributed deployments, rate limit state can be stored in Redis:
223
+
224
+ ```python
225
+ # src/maxllm/redis_backend.py
226
+ limiter = HybridRateLimiter(
227
+ redis_url="redis://localhost:6379",
228
+ fallback_to_memory=True, # Graceful degradation
229
+ )
230
+ ```
231
+
232
+ - Keys stored as `maxllm:ratelimit:{key_id}:tokens`
233
+ - Uses Redis EVAL for atomic token consumption
234
+ - Falls back to in-memory if Redis unavailable
235
+ - Not required for single-instance deployments
236
+
237
+ ### Observability
238
+
239
+ **Metrics Available:**
240
+ - `client.capacity()` - Token/request capacity per key
241
+ - `client.latency()` - Latency stats (avg, p50, p99)
242
+ - `client.scores()` - Routing decision scores per key
243
+
244
+ **Prometheus Integration (server mode):**
245
+ - Request counts by model/key
246
+ - Latency histograms
247
+ - Queue depth
248
+ - Rate limit hit rate
249
+ - Available at `/metrics` endpoint
250
+
251
+ ### Common Pitfalls
252
+
253
+ 1. **Don't confuse the two config systems** - SDK uses `MAXLLMConfig`, server uses Pydantic Settings
254
+ 2. **Token estimation is approximate** - Always add buffer, never assume exact count
255
+ 3. **Strategies return None if no capacity** - Handle this case (queue or fail)
256
+ 4. **Context managers are important** - Use `with MAXLLM.from_config(...)` for graceful shutdown
257
+ 5. **Test isolation** - Each test should use fresh scheduler instance (see fixtures)
258
+ 6. **Provider-specific quirks** - Some providers need special handling in LiteLLM (check docs)
259
+
260
+ ### File Organization
261
+
262
+ ```
263
+ src/
264
+ maxllm/ # SDK package (public API)
265
+ client.py # User-facing MAXLLM/MAXLLMAsync classes
266
+ scheduler.py # Client-side scheduler
267
+ config.py # SDK config models
268
+ rate_limiter.py # Rate limiting for SDK
269
+ validation.py # Pydantic request validation
270
+
271
+ llm_scheduler/ # Server package (FastAPI)
272
+ main.py # FastAPI app entry point
273
+ config.py # Server settings (Pydantic)
274
+ api/ # FastAPI routes
275
+ core/ # Core scheduling logic
276
+ scheduler.py # Main scheduler engine
277
+ dispatcher.py # Request dispatcher
278
+ queue_manager.py # Request queuing
279
+ token_estimator.py # Token counting
280
+ rate_limiting/ # Rate limit tracking
281
+ token_bucket.py # Token bucket algorithm
282
+ key_manager.py # API key management
283
+ tracker.py # Rate limit state
284
+ strategies/ # Routing strategies
285
+ observability/ # Logging and metrics
286
+ ```
287
+
288
+ ## Environment Variables
289
+
290
+ ```bash
291
+ # For SDK usage
292
+ MAXLLM_KEYS='{"groq-1": {...}, "openai-1": {...}}'
293
+
294
+ # For server mode (see llm_scheduler/config.py)
295
+ HOST=0.0.0.0
296
+ PORT=8000
297
+ LOG_LEVEL=INFO
298
+ DEFAULT_STRATEGY=balanced
299
+ MAX_QUEUE_SIZE=10000
300
+
301
+ # Redis (optional)
302
+ REDIS_URL=redis://localhost:6379
303
+ REDIS_PREFIX=maxllm:
304
+
305
+ # Provider API keys can also be individual env vars
306
+ GROQ_API_KEY=gsk_...
307
+ OPENAI_API_KEY=sk-...
308
+ ```
309
+
310
+ ## Making Changes
311
+
312
+ ### Adding a New Provider
313
+
314
+ 1. LiteLLM already handles most providers - just add to config
315
+ 2. Update `config.example.yaml` with example
316
+ 3. Add provider-specific rate limits (check their docs)
317
+ 4. Test with `examples/basic_usage.py`
318
+
319
+ ### Adding a New Strategy
320
+
321
+ 1. Create `src/llm_scheduler/strategies/my_strategy.py`
322
+ 2. Extend `SchedulingStrategy` base class
323
+ 3. Implement `select()` method
324
+ 4. Register in `StrategyRegistry` (`strategies/__init__.py`)
325
+ 5. Add tests in `tests/test_strategies.py`
326
+ 6. Update README.md strategy table
327
+
328
+ ### Modifying Rate Limiting
329
+
330
+ Core logic is in `token_bucket.py`. The token bucket algorithm:
331
+ - Refills at constant rate (TPM/RPM converted to tokens per second)
332
+ - Consumes tokens on each request
333
+ - Blocks if insufficient capacity
334
+
335
+ Be careful changing this - it's mathematically proven and well-tested.
336
+
337
+ ### Changing Token Estimation
338
+
339
+ Token estimation is in `core/token_estimator.py`. Uses tiktoken under the hood. If changing:
340
+ - Keep conservative (overestimate is better than underestimate)
341
+ - Cache encoders (they're expensive to create)
342
+ - Test with various message lengths
343
+ - Consider token_buffer multiplier in config
@@ -0,0 +1,77 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ *.egg-info/
24
+ .installed.cfg
25
+ *.egg
26
+
27
+ # PyInstaller
28
+ *.manifest
29
+ *.spec
30
+
31
+ # Installer logs
32
+ pip-log.txt
33
+ pip-delete-this-directory.txt
34
+
35
+ # Unit test / coverage reports
36
+ htmlcov/
37
+ .tox/
38
+ .nox/
39
+ .coverage
40
+ .coverage.*
41
+ .cache
42
+ nosetests.xml
43
+ coverage.xml
44
+ *.cover
45
+ *.py,cover
46
+ .hypothesis/
47
+ .pytest_cache/
48
+
49
+ # Translations
50
+ *.mo
51
+ *.pot
52
+
53
+ # Environments
54
+ .env
55
+ .venv
56
+ env/
57
+ venv/
58
+ ENV/
59
+ env.bak/
60
+ venv.bak/
61
+
62
+ # IDE
63
+ .vscode/
64
+ .idea/
65
+ *.swp
66
+ *.swo
67
+ *~
68
+
69
+ # OS
70
+ .DS_Store
71
+ Thumbs.db
72
+
73
+ # Project specific
74
+ logs/
75
+ *.log
76
+ .mypy_cache/
77
+ .ruff_cache/
@@ -0,0 +1,24 @@
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install dependencies
6
+ COPY requirements.txt .
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+
9
+ # Copy source code
10
+ COPY src/ ./src/
11
+ COPY pyproject.toml .
12
+
13
+ # Install package
14
+ RUN pip install -e .
15
+
16
+ # Expose port
17
+ EXPOSE 8000
18
+
19
+ # Health check
20
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
21
+ CMD curl -f http://localhost:8000/health || exit 1
22
+
23
+ # Run the application
24
+ CMD ["uvicorn", "llm_scheduler.main:app", "--host", "0.0.0.0", "--port", "8000"]
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 LLM Rate Limit Scheduler Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
File without changes