keymesh 0.1.2a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. keymesh-0.1.2a0/.clauderules +136 -0
  2. keymesh-0.1.2a0/.cursorrules +136 -0
  3. keymesh-0.1.2a0/.env.example +3 -0
  4. keymesh-0.1.2a0/.gitignore +35 -0
  5. keymesh-0.1.2a0/.python-version +1 -0
  6. keymesh-0.1.2a0/AGENT.md +72 -0
  7. keymesh-0.1.2a0/GEMINI.md +206 -0
  8. keymesh-0.1.2a0/LICENSE +21 -0
  9. keymesh-0.1.2a0/PKG-INFO +253 -0
  10. keymesh-0.1.2a0/README.md +227 -0
  11. keymesh-0.1.2a0/example.py +196 -0
  12. keymesh-0.1.2a0/flow.md +166 -0
  13. keymesh-0.1.2a0/instructions.md +244 -0
  14. keymesh-0.1.2a0/keymesh/__init__.py +45 -0
  15. keymesh-0.1.2a0/keymesh/concurrency/__init__.py +4 -0
  16. keymesh-0.1.2a0/keymesh/concurrency/semaphores.py +47 -0
  17. keymesh-0.1.2a0/keymesh/cooldown/__init__.py +4 -0
  18. keymesh-0.1.2a0/keymesh/cooldown/manager.py +52 -0
  19. keymesh-0.1.2a0/keymesh/exceptions.py +30 -0
  20. keymesh-0.1.2a0/keymesh/metrics/__init__.py +4 -0
  21. keymesh-0.1.2a0/keymesh/metrics/pool_metrics.py +52 -0
  22. keymesh-0.1.2a0/keymesh/pool/__init__.py +5 -0
  23. keymesh-0.1.2a0/keymesh/pool/pool.py +251 -0
  24. keymesh-0.1.2a0/keymesh/pool/sync_pool.py +253 -0
  25. keymesh-0.1.2a0/keymesh/scheduler/__init__.py +26 -0
  26. keymesh-0.1.2a0/keymesh/scheduler/base.py +40 -0
  27. keymesh-0.1.2a0/keymesh/scheduler/least_busy.py +28 -0
  28. keymesh-0.1.2a0/keymesh/scheduler/round_robin.py +34 -0
  29. keymesh-0.1.2a0/keymesh/scheduler/weighted.py +44 -0
  30. keymesh-0.1.2a0/keymesh/state/__init__.py +5 -0
  31. keymesh-0.1.2a0/keymesh/state/key_state.py +151 -0
  32. keymesh-0.1.2a0/keymesh/state/sync_key_state.py +151 -0
  33. keymesh-0.1.2a0/keymesh/storage/__init__.py +16 -0
  34. keymesh-0.1.2a0/keymesh/storage/base.py +37 -0
  35. keymesh-0.1.2a0/keymesh/storage/json_storage.py +72 -0
  36. keymesh-0.1.2a0/keymesh/storage/memory.py +43 -0
  37. keymesh-0.1.2a0/keymesh/storage/sync_base.py +37 -0
  38. keymesh-0.1.2a0/keymesh/storage/sync_json.py +72 -0
  39. keymesh-0.1.2a0/keymesh/storage/sync_memory.py +43 -0
  40. keymesh-0.1.2a0/keymesh/utils/__init__.py +4 -0
  41. keymesh-0.1.2a0/keymesh/utils/helpers.py +58 -0
  42. keymesh-0.1.2a0/plan.md +182 -0
  43. keymesh-0.1.2a0/problem.md +629 -0
  44. keymesh-0.1.2a0/prompt.md +182 -0
  45. keymesh-0.1.2a0/pyproject.toml +49 -0
  46. keymesh-0.1.2a0/tests/__init__.py +3 -0
  47. keymesh-0.1.2a0/tests/test_high_severity_fixes.py +182 -0
  48. keymesh-0.1.2a0/tests/test_key_state.py +76 -0
  49. keymesh-0.1.2a0/tests/test_pool.py +91 -0
  50. keymesh-0.1.2a0/tests/test_sync_pool.py +83 -0
  51. keymesh-0.1.2a0/uv.lock +347 -0
@@ -0,0 +1,136 @@
1
+ # KeyMesh LLM Context & Rules (.clauderules)
2
+
3
+ This file provides specialized architectural context, guidelines, and runtime assumptions for **Claude**, **Gemini**, and other advanced LLMs when interacting with, maintaining, or extending the **KeyMesh** workspace.
4
+
5
+ ---
6
+
7
+ ## 🚀 KeyMesh at a Glance
8
+
9
+ KeyMesh is a **lightweight, concurrency-safe credential orchestration runtime for AI API systems**. It acts purely as a credential pool manager and scheduler to multiplex multiple API keys across highly concurrent workloads, maximizing aggregate throughput (e.g., combining multiple lower-tier rate-limited keys to act as one high-throughput pool).
10
+
11
+ > [!IMPORTANT]
12
+ > **Strict Architectural Boundaries:**
13
+ > - **KeyMesh is ONLY:** A credential allocator, cooldown manager, state tracker, concurrency coordinator, and routing scheduler.
14
+ > - **KeyMesh is NOT:** An SDK wrapper, an HTTP gateway, a proxy server, an inference runner, or a transport framework.
15
+ > - **Zero Couplings:** KeyMesh must remain completely framework-agnostic. It does not wrap `openai`, `anthropic`, `httpx`, or any specific client. It only yields keys and records the outcome of operations.
16
+
17
+ ---
18
+
19
+ ## 🛠️ Codebase Structure & Architecture
20
+
21
+ ```text
22
+ keymesh/
23
+ ├── concurrency/ # Async-safe semaphores and concurrency locks
24
+ │ └── semaphores.py
25
+ ├── cooldown/ # Cooldown management and state checks
26
+ │ └── manager.py
27
+ ├── metrics/ # Pool-level diagnostic counters and statistics
28
+ │ └── pool_metrics.py
29
+ ├── pool/ # Main KeyPool lifecycle and public API orchestrator
30
+ │ ├── pool.py # Async KeyPool implementation
31
+ │ └── sync_pool.py # Synchronous/Threaded KeyPool implementation
32
+ ├── scheduler/ # Pluggable scheduling strategies (Round Robin, Least Busy, Weighted)
33
+ │ ├── base.py
34
+ │ ├── least_busy.py
35
+ │ ├── round_robin.py
36
+ │ └── weighted.py
37
+ ├── state/ # Runtime state representation
38
+ │ ├── key_state.py # Async-safe individual KeyState
39
+ │ └── sync_key_state.py # Thread-safe individual SyncKeyState
40
+ ├── storage/ # Pluggable persistence backends
41
+ │ ├── base.py # Async storage base
42
+ │ ├── sync_base.py # Sync storage base
43
+ │ ├── memory.py # Async MemoryStorage
44
+ │ ├── sync_memory.py # Sync SyncMemoryStorage
45
+ │ ├── json_storage.py # Async JSONStorage
46
+ │ └── sync_json.py # Sync SyncJSONStorage
47
+ └── utils/ # Utilities (logging, masking, helper decorators)
48
+ └── helpers.py
49
+ ```
50
+
51
+ ---
52
+
53
+ ## 🧬 Concurrency & State Invariants
54
+
55
+ 1. **State Mutation Locks:**
56
+ - **Async (`KeyState`):** All mutations on `KeyState` must acquire the inner `asyncio.Lock` via `async with self._lock:`.
57
+ - **Sync (`SyncKeyState`):** All mutations on `SyncKeyState` must acquire the inner `threading.Lock` via `with self._lock:`.
58
+ 2. **Stateless Schedulers:** Schedulers (`BaseScheduler` subclasses) are stateless selectors. They must only select a key and **never** mutate any key states directly.
59
+ 3. **No Event Loop Blocking:** Do not block the event loop or introduce long sleeps when a key is rate-limited. Schedulers must dynamically skip keys cooling down/exhausted and return another immediately.
60
+ 4. **EMA calculations:** Latencies must be smoothed using Exponential Moving Average (EMA) with a default alpha of `0.2`:
61
+ $$\text{Latency}_{\text{avg}} = \alpha \cdot \text{Latency}_{\text{new}} + (1 - \alpha) \cdot \text{Latency}_{\text{prev}}$$
62
+
63
+ ---
64
+
65
+ ## 🔄 Concurrency-Safe Integration Patterns
66
+
67
+ When using KeyMesh with SDKs (like OpenAI or Anthropic), **never** recreate the client on every request and **never** mutate `client.api_key` globally (causes race conditions). Use one of these three concurrency-safe patterns:
68
+
69
+ ### Pattern 1: Request-Scoped Client Overrides (`with_options`)
70
+ *Recommended for modern OpenAI SDKs.* Generates a copy of the client configuration pointing to the new key, while sharing the underlying connection pool.
71
+ ```python
72
+ # Async
73
+ scoped_client = client.with_options(api_key=key)
74
+ response = await scoped_client.chat.completions.create(...)
75
+
76
+ # Sync
77
+ scoped_client = client.with_options(api_key=key)
78
+ response = scoped_client.chat.completions.create(...)
79
+ ```
80
+
81
+ ### Pattern 2: Per-Request Custom Headers (`extra_headers`)
82
+ Injects the authorization key directly inside the request header without changing client-wide configurations.
83
+ ```python
84
+ response = await client.chat.completions.create(
85
+ model="gpt-4",
86
+ messages=[{"role": "user", "content": "Query"}],
87
+ extra_headers={"Authorization": f"Bearer {key}"}
88
+ )
89
+ ```
90
+
91
+ ### Pattern 3: Automated Lifecycle Context Managers (`key_lifecycle`)
92
+ Encapsulates acquiring, releasing, timing, and error state tracking into reusable Python context managers to prevent key leaks.
93
+ ```python
94
+ @contextlib.asynccontextmanager
95
+ async def key_lifecycle(pool: KeyPool):
96
+ key = await pool.acquire()
97
+ start = time.monotonic()
98
+ try:
99
+ yield key
100
+ await pool.release(key, latency=time.monotonic() - start)
101
+ except Exception:
102
+ await pool.mark_failed(key)
103
+ raise
104
+ ```
105
+
106
+ ---
107
+
108
+ ## 🛠️ Tooling & Command Cheat Sheet
109
+
110
+ We use **`uv`** as the default package and project manager.
111
+
112
+ - **Run Tests:**
113
+ ```bash
114
+ uv run pytest
115
+ ```
116
+ - **Type Checking (Strict mypy):**
117
+ ```bash
118
+ uv run mypy .
119
+ ```
120
+ - **Linting & Formatting:**
121
+ ```bash
122
+ uv run ruff check .
123
+ ```
124
+ - **Local Environment Cache Setup:**
125
+ ```bash
126
+ export UV_CACHE_DIR=.uv-cache
127
+ ```
128
+
129
+ ---
130
+
131
+ ## 💡 Developer / AI Guidelines
132
+
133
+ - **Clean Interface:** Keep the public interface of `KeyPool` and `SyncKeyPool` clean. Only expose `acquire`, `release`, `mark_failed`, and `mark_rate_limited`. Do not introduce framework-specific transport wrappers.
134
+ - **Strict Typing:** Every function parameter, return value, and class field must be fully typed. Use strict `mypy` style annotations.
135
+ - **Error Propagation:** Do not let internal exceptions leak directly without being wrapped in subclasses of `KeyMeshError`.
136
+ - **Zero Heavy Dependencies:** KeyMesh must remain lightweight. Do not import heavy frameworks (like FastAPI, Flask, or HTTP gateways).
@@ -0,0 +1,136 @@
1
+ # KeyMesh LLM Context & Rules (.cursorrules / .clauderules)
2
+
3
+ This file provides specialized architectural context, guidelines, and runtime assumptions for **Claude**, **Gemini**, and other advanced LLMs when interacting with, maintaining, or extending the **KeyMesh** workspace.
4
+
5
+ ---
6
+
7
+ ## 🚀 KeyMesh at a Glance
8
+
9
+ KeyMesh is a **lightweight, concurrency-safe credential orchestration runtime for AI API systems**. It acts purely as a credential pool manager and scheduler to multiplex multiple API keys across highly concurrent workloads, maximizing aggregate throughput (e.g., combining multiple lower-tier rate-limited keys to act as one high-throughput pool).
10
+
11
+ > [!IMPORTANT]
12
+ > **Strict Architectural Boundaries:**
13
+ > - **KeyMesh is ONLY:** A credential allocator, cooldown manager, state tracker, concurrency coordinator, and routing scheduler.
14
+ > - **KeyMesh is NOT:** An SDK wrapper, an HTTP gateway, a proxy server, an inference runner, or a transport framework.
15
+ > - **Zero Couplings:** KeyMesh must remain completely framework-agnostic. It does not wrap `openai`, `anthropic`, `httpx`, or any specific client. It only yields keys and records the outcome of operations.
16
+
17
+ ---
18
+
19
+ ## 🛠️ Codebase Structure & Architecture
20
+
21
+ ```text
22
+ keymesh/
23
+ ├── concurrency/ # Async-safe semaphores and concurrency locks
24
+ │ └── semaphores.py
25
+ ├── cooldown/ # Cooldown management and state checks
26
+ │ └── manager.py
27
+ ├── metrics/ # Pool-level diagnostic counters and statistics
28
+ │ └── pool_metrics.py
29
+ ├── pool/ # Main KeyPool lifecycle and public API orchestrator
30
+ │ ├── pool.py # Async KeyPool implementation
31
+ │ └── sync_pool.py # Synchronous/Threaded KeyPool implementation
32
+ ├── scheduler/ # Pluggable scheduling strategies (Round Robin, Least Busy, Weighted)
33
+ │ ├── base.py
34
+ │ ├── least_busy.py
35
+ │ ├── round_robin.py
36
+ │ └── weighted.py
37
+ ├── state/ # Runtime state representation
38
+ │ ├── key_state.py # Async-safe individual KeyState
39
+ │ └── sync_key_state.py # Thread-safe individual SyncKeyState
40
+ ├── storage/ # Pluggable persistence backends
41
+ │ ├── base.py # Async storage base
42
+ │ ├── sync_base.py # Sync storage base
43
+ │ ├── memory.py # Async MemoryStorage
44
+ │ ├── sync_memory.py # Sync SyncMemoryStorage
45
+ │ ├── json_storage.py # Async JSONStorage
46
+ │ └── sync_json.py # Sync SyncJSONStorage
47
+ └── utils/ # Utilities (logging, masking, helper decorators)
48
+ └── helpers.py
49
+ ```
50
+
51
+ ---
52
+
53
+ ## 🧬 Concurrency & State Invariants
54
+
55
+ 1. **State Mutation Locks:**
56
+ - **Async (`KeyState`):** All mutations on `KeyState` must acquire the inner `asyncio.Lock` via `async with self._lock:`.
57
+ - **Sync (`SyncKeyState`):** All mutations on `SyncKeyState` must acquire the inner `threading.Lock` via `with self._lock:`.
58
+ 2. **Stateless Schedulers:** Schedulers (`BaseScheduler` subclasses) are stateless selectors. They must only select a key and **never** mutate any key states directly.
59
+ 3. **No Event Loop Blocking:** Do not block the event loop or introduce long sleeps when a key is rate-limited. Schedulers must dynamically skip keys cooling down/exhausted and return another immediately.
60
+ 4. **EMA calculations:** Latencies must be smoothed using Exponential Moving Average (EMA) with a default alpha of `0.2`:
61
+ $$\text{Latency}_{\text{avg}} = \alpha \cdot \text{Latency}_{\text{new}} + (1 - \alpha) \cdot \text{Latency}_{\text{prev}}$$
62
+
63
+ ---
64
+
65
+ ## 🔄 Concurrency-Safe Integration Patterns
66
+
67
+ When using KeyMesh with SDKs (like OpenAI or Anthropic), **never** recreate the client on every request and **never** mutate `client.api_key` globally (causes race conditions). Use one of these three concurrency-safe patterns:
68
+
69
+ ### Pattern 1: Request-Scoped Client Overrides (`with_options`)
70
+ *Recommended for modern OpenAI SDKs.* Generates a copy of the client configuration pointing to the new key, while sharing the underlying connection pool.
71
+ ```python
72
+ # Async
73
+ scoped_client = client.with_options(api_key=key)
74
+ response = await scoped_client.chat.completions.create(...)
75
+
76
+ # Sync
77
+ scoped_client = client.with_options(api_key=key)
78
+ response = scoped_client.chat.completions.create(...)
79
+ ```
80
+
81
+ ### Pattern 2: Per-Request Custom Headers (`extra_headers`)
82
+ Injects the authorization key directly inside the request header without changing client-wide configurations.
83
+ ```python
84
+ response = await client.chat.completions.create(
85
+ model="gpt-4",
86
+ messages=[{"role": "user", "content": "Query"}],
87
+ extra_headers={"Authorization": f"Bearer {key}"}
88
+ )
89
+ ```
90
+
91
+ ### Pattern 3: Automated Lifecycle Context Managers (`key_lifecycle`)
92
+ Encapsulates acquiring, releasing, timing, and error state tracking into reusable Python context managers to prevent key leaks.
93
+ ```python
94
+ @contextlib.asynccontextmanager
95
+ async def key_lifecycle(pool: KeyPool):
96
+ key = await pool.acquire()
97
+ start = time.monotonic()
98
+ try:
99
+ yield key
100
+ await pool.release(key, latency=time.monotonic() - start)
101
+ except Exception:
102
+ await pool.mark_failed(key)
103
+ raise
104
+ ```
105
+
106
+ ---
107
+
108
+ ## 🛠️ Tooling & Command Cheat Sheet
109
+
110
+ We use **`uv`** as the default package and project manager.
111
+
112
+ - **Run Tests:**
113
+ ```bash
114
+ uv run pytest
115
+ ```
116
+ - **Type Checking (Strict mypy):**
117
+ ```bash
118
+ uv run mypy .
119
+ ```
120
+ - **Linting & Formatting:**
121
+ ```bash
122
+ uv run ruff check .
123
+ ```
124
+ - **Local Environment Cache Setup:**
125
+ ```bash
126
+ export UV_CACHE_DIR=.uv-cache
127
+ ```
128
+
129
+ ---
130
+
131
+ ## 💡 Developer / AI Guidelines
132
+
133
+ - **Clean Interface:** Keep the public interface of `KeyPool` and `SyncKeyPool` clean. Only expose `acquire`, `release`, `mark_failed`, and `mark_rate_limited`. Do not introduce framework-specific transport wrappers.
134
+ - **Strict Typing:** Every function parameter, return value, and class field must be fully typed. Use strict `mypy` style annotations.
135
+ - **Error Propagation:** Do not let internal exceptions leak directly without being wrapped in subclasses of `KeyMeshError`.
136
+ - **Zero Heavy Dependencies:** KeyMesh must remain lightweight. Do not import heavy frameworks (like FastAPI, Flask, or HTTP gateways).
@@ -0,0 +1,3 @@
1
+ OPENAI_API_KEYS = "sk_9gcFWXixx6u.................qxH","sk_g....hRhGN3M7...........L0nrzGQvsCFf9b6j"
2
+ OPENAI_BASE_URL = "https://example.com/openai/v1"
3
+ OPENAI_MODEL_NAME ="gpt-5.4"
@@ -0,0 +1,35 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ build/
6
+ dist/
7
+ wheels/
8
+ *.egg-info/
9
+
10
+ # Virtual environments and package managers
11
+ .venv/
12
+ .uv-cache/
13
+
14
+ # Environment variables & local configuration
15
+ .env
16
+ .env.*
17
+ !.env.example
18
+
19
+ # IDEs, Editors & OS metadata
20
+ .DS_Store
21
+ .vscode/
22
+ .idea/
23
+ *.suo
24
+ *.ntvs*
25
+ *.njsproj
26
+ *.sln
27
+ *.swp
28
+
29
+ # Testing & Type checking caches
30
+ .mypy_cache/
31
+ .pytest_cache/
32
+
33
+ # AI Agent metadata
34
+ .gemini/
35
+
@@ -0,0 +1 @@
1
+ 3.12
@@ -0,0 +1,72 @@
1
+ # KeyMesh Agent Instruction & Development Runbook
2
+
3
+ This document is the official instruction set and runbook for **AI Coding Agents** operating in the KeyMesh repository. All modifications, refactorings, and pull requests must adhere to the rules specified here.
4
+
5
+ ---
6
+
7
+ ## 🎯 Primary Agent Mandate
8
+
9
+ You are tasked with maintaining a production-grade, highly performant, async-safe Python codebase. **KeyMesh** must remain lightweight and zero-dependency (outside of optional async DB drivers/caches).
10
+
11
+ > [!WARNING]
12
+ > **Strict Limits on Scope & Dependencies:**
13
+ > - **DO NOT** add or import high-overhead frameworks like FastAPI, Flask, or Django.
14
+ > - **DO NOT** try to wrap model APIs or implement chat-payload formatters.
15
+ > - **DO NOT** use global mutable states or create tight couplings to third-party SDKs.
16
+ > - **DO NOT** implement proxy servers or HTTP middleware layers inside KeyMesh.
17
+
18
+ ---
19
+
20
+ ## 🛠️ Tooling & Workspace Standards
21
+
22
+ We use **`uv`** as the default package and project manager. Always configure custom writable cache paths when invoking `uv` commands in restricted environments to avoid directory permissions issues.
23
+
24
+ ### Cache Directory Override:
25
+ ```bash
26
+ mkdir -p ~/.uv_cache
27
+ export UV_CACHE_DIR=~/.uv_cache
28
+ ```
29
+
30
+ ### Useful CLI Commands for Agents:
31
+ - **Run the test suite:**
32
+ ```bash
33
+ python -m pytest tests/ -v
34
+ ```
35
+ - **Execute type checking & static analysis:**
36
+ ```bash
37
+ mypy keymesh/
38
+ ruff check keymesh/
39
+ ```
40
+ - **Execute runtime demo:**
41
+ ```bash
42
+ python main.py
43
+ ```
44
+
45
+ ---
46
+
47
+ ## 🧬 Coding Guidelines & Code Style
48
+
49
+ Agents must produce pristine code conforming to the following standards:
50
+
51
+ 1. **Type Annotation**: Every single function parameter, return value, and class field must be fully typed. Use strict `mypy` style annotations.
52
+ 2. **Concurrency Patterns**:
53
+ - Mutate shared state safely using `asyncio.Lock`.
54
+ - Protect global/pool-level operations using `self._pool_lock`.
55
+ - Prefer thread-safe atomics or thread locks (`threading.Lock`) for synchronous shared resources (e.g. `RoundRobinScheduler._index`).
56
+ 3. **Graceful Error Handling**:
57
+ - Never let internal exceptions leak directly without being wrapped in subclasses of `KeyMeshError`.
58
+ - Handle edge-cases such as empty pools, all keys rate-limited, and key exhaustion cleanly.
59
+ 4. **EMA Calculations**:
60
+ - Key latencies must be smoothed using Exponential Moving Average (EMA) with an default alpha of `0.2` to avoid heavy volatility from individual network hiccups:
61
+ $$\text{Latency}_{\text{avg}} = \alpha \cdot \text{Latency}_{\text{new}} + (1 - \alpha) \cdot \text{Latency}_{\text{prev}}$$
62
+
63
+ ---
64
+
65
+ ## 🧪 Test Requirements
66
+
67
+ - Every new scheduler, persistence backend, or concurrency utility must be accompanied by comprehensive tests under `tests/`.
68
+ - Tests must use `pytest-asyncio` with `asyncio_mode = "auto"`.
69
+ - Test suites must verify:
70
+ - Behavior under high concurrency (multiple simultaneous acquisitions).
71
+ - Recovery from failure states (exhaustion and cooldown expiry).
72
+ - Expected distribution rates for scheduling strategies.
@@ -0,0 +1,206 @@
1
+ # KeyMesh Integration & Gemini Context Guide
2
+
3
+ This document provides specialized architectural context, guidelines, and runtime assumptions for **Gemini** (and other advanced LLMs) when interacting with, maintaining, or extending the **KeyMesh** workspace.
4
+
5
+ ---
6
+
7
+ ## 🚀 KeyMesh at a Glance
8
+
9
+ KeyMesh is a **lightweight, concurrency-safe credential orchestration runtime for AI API systems**. It acts purely as a credential pool manager and scheduler to multiplex multiple API keys across highly concurrent workloads, maximizing aggregate throughput (e.g., combining multiple lower-tier rate-limited keys to act as one high-throughput pool).
10
+
11
+ > [!IMPORTANT]
12
+ > **Strict Architectural Boundaries:**
13
+ > - **KeyMesh is ONLY:** A credential allocator, cooldown manager, state tracker, concurrency coordinator, and routing scheduler.
14
+ > - **KeyMesh is NOT:** An SDK wrapper, an HTTP gateway, a proxy server, an inference runner, or a transport framework.
15
+ > - **Zero Couplings:** KeyMesh must remain completely framework-agnostic. It does not wrap `openai`, `anthropic`, `httpx`, or any specific client. It only yields keys and records the outcome of operations.
16
+
17
+ ---
18
+
19
+ ## 🔄 Runtime Flow & Architecture
20
+
21
+ KeyMesh coordinates credentials via a simple, high-performance async-safe flow:
22
+
23
+ ```mermaid
24
+ flowchart TD
25
+ App[Application] -->|1. acquire| Pool[KeyPool]
26
+ Pool -->|2. select key| Scheduler[Scheduler]
27
+ Scheduler -->|3. check availability & state| State[KeyState]
28
+ Pool -->|4. return key string| App
29
+ App -->|5. direct call to LLM Provider| Provider[OpenAI/Anthropic/HTTP Client]
30
+ Provider -->|6. response received| App
31
+ App -->|7. release / mark_failed / mark_rate_limited| Pool
32
+ Pool -->|8. persist metrics| Storage[Storage Backend]
33
+ ```
34
+
35
+ ### Core API Usage
36
+
37
+ ```python
38
+ from keymesh import KeyPool, SchedulerStrategy
39
+
40
+ # 1. Initialize the pool with raw API keys
41
+ pool = KeyPool(
42
+ keys=["sk-key-1", "sk-key-2", "sk-key-3"],
43
+ strategy=SchedulerStrategy.LEAST_BUSY
44
+ )
45
+
46
+ # 2. Acquire a credential (non-blocking scheduler selection)
47
+ key = await pool.acquire()
48
+
49
+ try:
50
+ # 3. Use the key in any standard SDK or client directly
51
+ # (KeyMesh does not intercept the HTTP call itself)
52
+ response = await client.completions.create(api_key=key, ...)
53
+
54
+ # 4. Release key back to the pool on success
55
+ await pool.release(key, latency=response.elapsed)
56
+
57
+ except RateLimitError:
58
+ # 5. Handle rate limits with cooldowns
59
+ await pool.mark_rate_limited(key, cooldown=60.0)
60
+
61
+ except Exception:
62
+ # 6. Track consecutive failures to prune dead keys
63
+ await pool.mark_failed(key)
64
+ ```
65
+
66
+ ---
67
+
68
+ ## 🛠️ Codebase Structure
69
+
70
+ ```text
71
+ keymesh/
72
+ ├── concurrency/ # Async-safe semaphores and concurrency locks
73
+ │ └── semaphores.py
74
+ ├── cooldown/ # Cooldown management and state checks
75
+ │ └── manager.py
76
+ ├── metrics/ # Pool-level diagnostic counters and statistics
77
+ │ └── pool_metrics.py
78
+ ├── pool/ # Main KeyPool lifecycle and public API orchestrator
79
+ │ ├── pool.py # Async KeyPool implementation
80
+ │ └── sync_pool.py # Synchronous/Threaded KeyPool implementation
81
+ ├── scheduler/ # Pluggable scheduling strategies (Round Robin, Least Busy, Weighted)
82
+ │ ├── base.py
83
+ │ ├── least_busy.py
84
+ │ ├── round_robin.py
85
+ │ └── weighted.py
86
+ ├── state/ # Runtime state representation
87
+ │ ├── key_state.py # Async-safe individual KeyState
88
+ │ └── sync_key_state.py # Thread-safe individual SyncKeyState
89
+ ├── storage/ # Pluggable persistence backends
90
+ │ ├── base.py # Async storage base
91
+ │ ├── sync_base.py # Sync storage base
92
+ │ ├── memory.py # Async MemoryStorage
93
+ │ ├── sync_memory.py # Sync SyncMemoryStorage
94
+ │ ├── json_storage.py # Async JSONStorage
95
+ │ └── sync_json.py # Sync SyncJSONStorage
96
+ └── utils/ # Utilities (logging, masking, helper decorators)
97
+ └── helpers.py
98
+ ```
99
+
100
+ ---
101
+
102
+ ## 💾 State & Persistence Model
103
+
104
+ Each credential tracks its own runtime diagnostics in an async-safe dataclass:
105
+
106
+ | State Field | Type | Description |
107
+ | :--- | :--- | :--- |
108
+ | `active_requests` | `int` | Number of concurrent tasks using this key |
109
+ | `cooldown_until` | `float` | Monotonic time when cooldown expires |
110
+ | `success_count` | `int` | Cumulative successful API calls |
111
+ | `failure_count` | `int` | Consecutive failure count (resets on success) |
112
+ | `latency_avg` | `float` | Exponential Moving Average (EMA) of response latency |
113
+ | `last_used` | `float` | Monotonic timestamp of the last acquisition |
114
+
115
+ ### Backends
116
+
117
+ - **MemoryStorage**: Default, fast, thread-safe, single-process.
118
+ - **JSONStorage**: File-based persistence using atomic temp-file replacement.
119
+ - **SQLite / Redis Storage** *(Future/Pluggable)*: For multi-process or distributed runtimes.
120
+
121
+ ---
122
+
123
+ ## 🧬 Concurrency & State Invariants
124
+
125
+ 1. **State Mutation Locks:**
126
+ - **Async (`KeyState`):** All mutations on `KeyState` must acquire the inner `asyncio.Lock` via `async with self._lock:`.
127
+ - **Sync (`SyncKeyState`):** All mutations on `SyncKeyState` must acquire the inner `threading.Lock` via `with self._lock:`.
128
+ 2. **Stateless Schedulers:** Schedulers (`BaseScheduler` subclasses) are stateless selectors. They must only select a key and **never** mutate any key states directly.
129
+ 3. **No Event Loop Blocking:** Do not block the event loop or introduce long sleeps when a key is rate-limited. Schedulers must dynamically skip keys cooling down/exhausted and return another immediately.
130
+ 4. **EMA calculations:** Latencies must be smoothed using Exponential Moving Average (EMA) with a default alpha of `0.2`:
131
+ $$\text{Latency}_{\text{avg}} = \alpha \cdot \text{Latency}_{\text{new}} + (1 - \alpha) \cdot \text{Latency}_{\text{prev}}$$
132
+
133
+ ---
134
+
135
+ ## 🔄 Concurrency-Safe Integration Patterns
136
+
137
+ When using KeyMesh with SDKs (like OpenAI or Anthropic), **never** recreate the client on every request and **never** mutate `client.api_key` globally (causes race conditions). Use one of these three concurrency-safe patterns:
138
+
139
+ ### Pattern 1: Request-Scoped Client Overrides (`with_options`)
140
+ *Recommended for modern OpenAI SDKs.* Generates a copy of the client configuration pointing to the new key, while sharing the underlying connection pool.
141
+ ```python
142
+ # Async
143
+ scoped_client = client.with_options(api_key=key)
144
+ response = await scoped_client.chat.completions.create(...)
145
+
146
+ # Sync
147
+ scoped_client = client.with_options(api_key=key)
148
+ response = scoped_client.chat.completions.create(...)
149
+ ```
150
+
151
+ ### Pattern 2: Per-Request Custom Headers (`extra_headers`)
152
+ Injects the authorization key directly inside the request header without changing client-wide configurations.
153
+ ```python
154
+ response = await client.chat.completions.create(
155
+ model="gpt-4",
156
+ messages=[{"role": "user", "content": "Query"}],
157
+ extra_headers={"Authorization": f"Bearer {key}"}
158
+ )
159
+ ```
160
+
161
+ ### Pattern 3: Automated Lifecycle Context Managers (`key_lifecycle`)
162
+ Encapsulates acquiring, releasing, timing, and error state tracking into reusable Python context managers to prevent key leaks.
163
+ ```python
164
+ @contextlib.asynccontextmanager
165
+ async def key_lifecycle(pool: KeyPool):
166
+ key = await pool.acquire()
167
+ start = time.monotonic()
168
+ try:
169
+ yield key
170
+ await pool.release(key, latency=time.monotonic() - start)
171
+ except Exception:
172
+ await pool.mark_failed(key)
173
+ raise
174
+ ```
175
+
176
+ ---
177
+
178
+ ## 🛠️ Tooling & Command Cheat Sheet
179
+
180
+ We use **`uv`** as the default package and project manager.
181
+
182
+ - **Run Tests:**
183
+ ```bash
184
+ uv run pytest
185
+ ```
186
+ - **Type Checking (Strict mypy):**
187
+ ```bash
188
+ uv run mypy .
189
+ ```
190
+ - **Linting & Formatting:**
191
+ ```bash
192
+ uv run ruff check .
193
+ ```
194
+ - **Local Environment Cache Setup:**
195
+ ```bash
196
+ export UV_CACHE_DIR=.uv-cache
197
+ ```
198
+
199
+ ---
200
+
201
+ ## 💡 Developer / AI Guidelines
202
+
203
+ - **Clean Interface:** Keep the public interface of `KeyPool` and `SyncKeyPool` clean. Only expose `acquire`, `release`, `mark_failed`, and `mark_rate_limited`. Do not introduce framework-specific transport wrappers.
204
+ - **Strict Typing:** Every function parameter, return value, and class field must be fully typed. Use strict `mypy` style annotations.
205
+ - **Error Propagation:** Do not let internal exceptions leak directly without being wrapped in subclasses of `KeyMeshError`.
206
+ - **Zero Heavy Dependencies:** KeyMesh must remain lightweight. Do not import heavy frameworks (like FastAPI, Flask, or HTTP gateways).
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 KeyMesh Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.