ocp-router 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,19 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ .eggs/
5
+ dist/
6
+ build/
7
+ *.so
8
+ .venv/
9
+ .uv/
10
+ .mypy_cache/
11
+ .ruff_cache/
12
+ .pytest_cache/
13
+ *.db
14
+ *.db-shm
15
+ *.db-wal
16
+ *.vec
17
+ .env
18
+ .env.local
19
+ .env.*.local
@@ -0,0 +1,296 @@
1
+ Metadata-Version: 2.4
2
+ Name: ocp-router
3
+ Version: 0.2.0
4
+ Summary: OCP Router — hybrid local/cloud model routing layer for Open Context Protocol
5
+ Project-URL: Homepage, https://github.com/Rajesh1213/OCP
6
+ Project-URL: Repository, https://github.com/Rajesh1213/OCP
7
+ License: Apache-2.0
8
+ Requires-Python: >=3.11
9
+ Requires-Dist: aiohttp>=3.9
10
+ Requires-Dist: pydantic>=2.7
11
+ Requires-Dist: tiktoken>=0.7
12
+ Provides-Extra: anthropic
13
+ Requires-Dist: anthropic>=0.40; extra == 'anthropic'
14
+ Provides-Extra: dev
15
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
16
+ Requires-Dist: pytest>=8; extra == 'dev'
17
+ Provides-Extra: openai
18
+ Requires-Dist: openai>=1.0; extra == 'openai'
19
+ Description-Content-Type: text/markdown
20
+
21
+ # ocp-router
22
+
23
+ Hybrid local/cloud model routing layer for [Open Context Protocol](https://github.com/Rajesh1213/OCP).
24
+
25
+ Scores each request for complexity and dispatches it to the right model tier — local model for simple tasks, paid provider for complex reasoning. Vendor-neutral: works with any backend that implements the `ModelBackend` protocol.
26
+
27
+ ---
28
+
29
+ ## Installation
30
+
31
+ ```bash
32
+ pip install ocp-router # core — Ollama local backend included
33
+ pip install ocp-router[anthropic] # + Anthropic Claude paid backend
34
+ pip install ocp-router[openai] # + OpenAI paid backend
35
+ ```
36
+
37
+ Requires Python 3.11+ and a running [Ollama](https://ollama.com) instance for local model support.
38
+
39
+ ---
40
+
41
+ ## Quick start
42
+
43
+ ```bash
44
+ # One-time: install Ollama and pull a model
45
+ brew install ollama # macOS — see ollama.com for other platforms
46
+ ollama pull llama3.2
47
+ ollama serve
48
+ ```
49
+
50
+ ```python
51
+ import asyncio
52
+ from ocp_router import make_router
53
+
54
+ async def main():
55
+ router = make_router() # reads all config from env vars
56
+
57
+ # Simple request — handled locally, no paid API call
58
+ result = await router.route("explain the verify_token function")
59
+ print(result.route_to) # "local"
60
+ print(result.classify.complexity_score) # 0.0
61
+ print(result.model) # "llama3.2"
62
+ print(result.text)
63
+
64
+ # Complex request — escalated to paid provider
65
+ result = await router.route(
66
+ "review security vulnerabilities across all API endpoints"
67
+ )
68
+ print(result.route_to) # "paid"
69
+ print(result.classify.complexity_score) # 0.55
70
+ print(result.classify.signals) # ["security-sensitive"]
71
+ print(result.model) # "claude-sonnet-4-6"
72
+ print(result.text)
73
+
74
+ asyncio.run(main())
75
+ ```
76
+
77
+ ---
78
+
79
+ ## How routing works
80
+
81
+ Every request passes through the `TaskClassifier` before reaching any model. The classifier scores complexity from `0.0` (trivial) to `1.0` (maximum) using five deterministic heuristic layers — no model required, runs in microseconds.
82
+
83
+ ```
84
+ Request prompt
85
+
86
+
87
+ ┌─────────────────────────────────────────────┐
88
+ │ TaskClassifier │
89
+ │ │
90
+ │ 1. Token length (tiktoken cl100k) │
91
+ │ 2. Code block size (fenced ``` blocks) │
92
+ │ 3. Complex signals security +0.55 │
93
+ │ architecture +0.55 │
94
+ │ migration +0.55 │
95
+ │ deadlock +0.40 │
96
+ │ multi-file +0.35 │
97
+ │ refactor +0.25 ... │
98
+ │ 4. Simple signals explain -0.10 │
99
+ │ summarise -0.10 │
100
+ │ search -0.10 ... │
101
+ │ 5. File references 3-4 files +0.10 │
102
+ │ 5+ files +0.20 │
103
+ │ │
104
+ │ score = clamp(sum, 0.0, 1.0) │
105
+ └──────────────┬──────────────────────────────┘
106
+
107
+ ┌───────┴────────┐
108
+ score < 0.5 score ≥ 0.5
109
+ │ │
110
+ ▼ ▼
111
+ Local model Paid provider
112
+ (Ollama) (Claude / GPT-4 / any)
113
+ ```
114
+
115
+ ### What goes where
116
+
117
+ | Request | Score | Route |
118
+ |---|---|---|
119
+ | "explain this function" | 0.00 | local |
120
+ | "what does add() do?" | 0.00 | local |
121
+ | "find all usages of db.connect" | 0.00 | local |
122
+ | "summarise the last session" | 0.00 | local |
123
+ | "refactor the login function" | 0.25 | local |
124
+ | — threshold (default 0.5) — | | |
125
+ | "refactor auth across all files" | 0.80 | paid |
126
+ | "review security vulnerabilities" | 0.55 | paid |
127
+ | "design the payment architecture" | 0.55 | paid |
128
+ | "debug this production deadlock" | 0.60 | paid |
129
+ | "migrate the database schema" | 0.55 | paid |
130
+
131
+ Threshold is configurable via `OCP_ROUTE_THRESHOLD`.
132
+
133
+ ---
134
+
135
+ ## RouteResult — what you get back
136
+
137
+ Every `router.route()` call returns a `RouteResult` with the answer and a full trace of the routing decision:
138
+
139
+ ```python
140
+ @dataclass
141
+ class RouteResult:
142
+ text: str # the model's response
143
+ route_to: str # "local" or "paid"
144
+ classify: ClassifyResult # full classification trace
145
+ model: str # exact model identifier used
146
+ prompt_tokens: int
147
+ completion_tokens: int
148
+ duration_ms: float
149
+
150
+ @dataclass
151
+ class ClassifyResult:
152
+ complexity_score: float # 0.0 – 1.0
153
+ task_type: str # "explain" | "refactor" | "debug" | "architect" | ...
154
+ signals: list[str] # which heuristics fired
155
+ route_to: str # "local" or "paid"
156
+ ```
157
+
158
+ ---
159
+
160
+ ## Configuration
161
+
162
+ All options are set via environment variables — no code changes needed.
163
+
164
+ ### Local backend
165
+
166
+ | Variable | Default | Description |
167
+ |---|---|---|
168
+ | `OCP_LOCAL_BACKEND` | `ollama` | Backend type. `ollama` is the only built-in option. |
169
+ | `OCP_LOCAL_MODEL` | `llama3.2` | Model name passed to Ollama. |
170
+ | `OCP_OLLAMA_URL` | `http://localhost:11434` | Ollama base URL. Point to a remote GPU box if needed. |
171
+ | `OCP_LOCAL_TIMEOUT` | `60` | Inference timeout in seconds. |
172
+
173
+ ### Paid backend
174
+
175
+ | Variable | Default | Description |
176
+ |---|---|---|
177
+ | `OCP_PAID_BACKEND` | `anthropic` | Backend type: `anthropic` or `openai`. |
178
+ | `OCP_PAID_MODEL` | `claude-sonnet-4-6` | Model identifier for the paid provider. |
179
+ | `OCP_PAID_MAX_TOKENS` | `4096` | Max tokens for paid responses. |
180
+ | `ANTHROPIC_API_KEY` | *(required)* | API key for Anthropic backend. |
181
+ | `OPENAI_API_KEY` | *(required)* | API key for OpenAI backend. |
182
+
183
+ ### Router
184
+
185
+ | Variable | Default | Description |
186
+ |---|---|---|
187
+ | `OCP_ROUTE_THRESHOLD` | `0.5` | Complexity score at or above which requests go to paid. |
188
+
189
+ ```bash
190
+ # Example: Mistral locally, GPT-4o for complex tasks, stricter threshold
191
+ OCP_LOCAL_MODEL=mistral \
192
+ OCP_PAID_BACKEND=openai \
193
+ OCP_PAID_MODEL=gpt-4o \
194
+ OCP_ROUTE_THRESHOLD=0.6 \
195
+ python my_agent.py
196
+ ```
197
+
198
+ ---
199
+
200
+ ## IDE integration (Claude Code, Cursor, Windsurf)
201
+
202
+ Add the routing env vars to your `.mcp.json` — OCP handles the rest:
203
+
204
+ ```json
205
+ {
206
+ "mcpServers": {
207
+ "ocp": {
208
+ "command": "uvx",
209
+ "args": ["ocp-server"],
210
+ "env": {
211
+ "OCP_DB_PATH": "${workspaceFolder}/.ocp.db",
212
+ "OCP_LOCAL_MODEL": "llama3.2",
213
+ "OCP_OLLAMA_URL": "http://localhost:11434",
214
+ "OCP_PAID_BACKEND": "anthropic",
215
+ "OCP_ROUTE_THRESHOLD": "0.5"
216
+ }
217
+ }
218
+ }
219
+ }
220
+ ```
221
+
222
+ Simple tasks (explain, search, summarise) are answered locally by Ollama. Complex requests (security, architecture, multi-file refactor) escalate to your paid provider. Your IDE workflow is unchanged.
223
+
224
+ ---
225
+
226
+ ## Supported local models
227
+
228
+ Any model available in Ollama works. Recommended starting points:
229
+
230
+ | Model | Size | Good for |
231
+ |---|---|---|
232
+ | `llama3.2` | 2B | Classification, summarisation, simple Q&A |
233
+ | `phi4-mini` | 3.8B | Code explanation, short answers |
234
+ | `mistral` | 7B | Context compression, draft generation |
235
+ | `codellama` | 7B | Code-specific tasks |
236
+
237
+ ```bash
238
+ ollama pull llama3.2
239
+ ```
240
+
241
+ ---
242
+
243
+ ## Bring your own backend
244
+
245
+ Both the local and paid slots accept any object that implements the `ModelBackend` protocol — three methods, no base class required:
246
+
247
+ ```python
248
+ from ocp_router import OCPRouter, TaskClassifier
249
+ from ocp_router.backends.base import GenerateRequest, GenerateResponse
250
+
251
+ class MyVLLMBackend:
252
+ @property
253
+ def model(self) -> str:
254
+ return "mistral-7b-instruct"
255
+
256
+ async def is_available(self) -> bool:
257
+ return True # check your endpoint
258
+
259
+ async def generate(self, request: GenerateRequest) -> GenerateResponse:
260
+ # call your inference endpoint
261
+ ...
262
+ return GenerateResponse(
263
+ text="...",
264
+ model=self.model,
265
+ prompt_tokens=0,
266
+ completion_tokens=0,
267
+ duration_ms=0.0,
268
+ )
269
+
270
+ # Plug in directly — no factory change needed
271
+ router = OCPRouter(
272
+ local=MyVLLMBackend(),
273
+ paid=MyVLLMBackend(), # or any other backend
274
+ classifier=TaskClassifier(),
275
+ )
276
+ ```
277
+
278
+ This is the intended extension point. `ocp-router` ships `OllamaBackend`, `AnthropicBackend`, and `OpenAIBackend` as convenience implementations — not as the only options.
279
+
280
+ ---
281
+
282
+ ## Running tests
283
+
284
+ ```bash
285
+ # Unit tests — no Ollama or API keys required
286
+ pytest packages/ocp-router/tests/ -k "not integration" -v
287
+
288
+ # Integration test — requires: ollama serve + ollama pull llama3.2
289
+ pytest packages/ocp-router/tests/ -m integration -v
290
+ ```
291
+
292
+ ---
293
+
294
+ ## What's next
295
+
296
+ - `ocp.prompt.prepare` — local SLM compresses and optimises prompts before they reach the paid provider, reducing token usage and improving answer quality
@@ -0,0 +1,276 @@
1
+ # ocp-router
2
+
3
+ Hybrid local/cloud model routing layer for [Open Context Protocol](https://github.com/Rajesh1213/OCP).
4
+
5
+ Scores each request for complexity and dispatches it to the right model tier — local model for simple tasks, paid provider for complex reasoning. Vendor-neutral: works with any backend that implements the `ModelBackend` protocol.
6
+
7
+ ---
8
+
9
+ ## Installation
10
+
11
+ ```bash
12
+ pip install ocp-router # core — Ollama local backend included
13
+ pip install ocp-router[anthropic] # + Anthropic Claude paid backend
14
+ pip install ocp-router[openai] # + OpenAI paid backend
15
+ ```
16
+
17
+ Requires Python 3.11+ and a running [Ollama](https://ollama.com) instance for local model support.
18
+
19
+ ---
20
+
21
+ ## Quick start
22
+
23
+ ```bash
24
+ # One-time: install Ollama and pull a model
25
+ brew install ollama # macOS — see ollama.com for other platforms
26
+ ollama pull llama3.2
27
+ ollama serve
28
+ ```
29
+
30
+ ```python
31
+ import asyncio
32
+ from ocp_router import make_router
33
+
34
+ async def main():
35
+ router = make_router() # reads all config from env vars
36
+
37
+ # Simple request — handled locally, no paid API call
38
+ result = await router.route("explain the verify_token function")
39
+ print(result.route_to) # "local"
40
+ print(result.classify.complexity_score) # 0.0
41
+ print(result.model) # "llama3.2"
42
+ print(result.text)
43
+
44
+ # Complex request — escalated to paid provider
45
+ result = await router.route(
46
+ "review security vulnerabilities across all API endpoints"
47
+ )
48
+ print(result.route_to) # "paid"
49
+ print(result.classify.complexity_score) # 0.55
50
+ print(result.classify.signals) # ["security-sensitive"]
51
+ print(result.model) # "claude-sonnet-4-6"
52
+ print(result.text)
53
+
54
+ asyncio.run(main())
55
+ ```
56
+
57
+ ---
58
+
59
+ ## How routing works
60
+
61
+ Every request passes through the `TaskClassifier` before reaching any model. The classifier scores complexity from `0.0` (trivial) to `1.0` (maximum) using five deterministic heuristic layers — no model required, runs in microseconds.
62
+
63
+ ```
64
+ Request prompt
65
+
66
+
67
+ ┌─────────────────────────────────────────────┐
68
+ │ TaskClassifier │
69
+ │ │
70
+ │ 1. Token length (tiktoken cl100k) │
71
+ │ 2. Code block size (fenced ``` blocks) │
72
+ │ 3. Complex signals security +0.55 │
73
+ │ architecture +0.55 │
74
+ │ migration +0.55 │
75
+ │ deadlock +0.40 │
76
+ │ multi-file +0.35 │
77
+ │ refactor +0.25 ... │
78
+ │ 4. Simple signals explain -0.10 │
79
+ │ summarise -0.10 │
80
+ │ search -0.10 ... │
81
+ │ 5. File references 3-4 files +0.10 │
82
+ │ 5+ files +0.20 │
83
+ │ │
84
+ │ score = clamp(sum, 0.0, 1.0) │
85
+ └──────────────┬──────────────────────────────┘
86
+
87
+ ┌───────┴────────┐
88
+ score < 0.5 score ≥ 0.5
89
+ │ │
90
+ ▼ ▼
91
+ Local model Paid provider
92
+ (Ollama) (Claude / GPT-4 / any)
93
+ ```
94
+
95
+ ### What goes where
96
+
97
+ | Request | Score | Route |
98
+ |---|---|---|
99
+ | "explain this function" | 0.00 | local |
100
+ | "what does add() do?" | 0.00 | local |
101
+ | "find all usages of db.connect" | 0.00 | local |
102
+ | "summarise the last session" | 0.00 | local |
103
+ | "refactor the login function" | 0.25 | local |
104
+ | — threshold (default 0.5) — | | |
105
+ | "refactor auth across all files" | 0.80 | paid |
106
+ | "review security vulnerabilities" | 0.55 | paid |
107
+ | "design the payment architecture" | 0.55 | paid |
108
+ | "debug this production deadlock" | 0.60 | paid |
109
+ | "migrate the database schema" | 0.55 | paid |
110
+
111
+ Threshold is configurable via `OCP_ROUTE_THRESHOLD`.
112
+
113
+ ---
114
+
115
+ ## RouteResult — what you get back
116
+
117
+ Every `router.route()` call returns a `RouteResult` with the answer and a full trace of the routing decision:
118
+
119
+ ```python
120
+ @dataclass
121
+ class RouteResult:
122
+ text: str # the model's response
123
+ route_to: str # "local" or "paid"
124
+ classify: ClassifyResult # full classification trace
125
+ model: str # exact model identifier used
126
+ prompt_tokens: int
127
+ completion_tokens: int
128
+ duration_ms: float
129
+
130
+ @dataclass
131
+ class ClassifyResult:
132
+ complexity_score: float # 0.0 – 1.0
133
+ task_type: str # "explain" | "refactor" | "debug" | "architect" | ...
134
+ signals: list[str] # which heuristics fired
135
+ route_to: str # "local" or "paid"
136
+ ```
137
+
138
+ ---
139
+
140
+ ## Configuration
141
+
142
+ All options are set via environment variables — no code changes needed.
143
+
144
+ ### Local backend
145
+
146
+ | Variable | Default | Description |
147
+ |---|---|---|
148
+ | `OCP_LOCAL_BACKEND` | `ollama` | Backend type. `ollama` is the only built-in option. |
149
+ | `OCP_LOCAL_MODEL` | `llama3.2` | Model name passed to Ollama. |
150
+ | `OCP_OLLAMA_URL` | `http://localhost:11434` | Ollama base URL. Point to a remote GPU box if needed. |
151
+ | `OCP_LOCAL_TIMEOUT` | `60` | Inference timeout in seconds. |
152
+
153
+ ### Paid backend
154
+
155
+ | Variable | Default | Description |
156
+ |---|---|---|
157
+ | `OCP_PAID_BACKEND` | `anthropic` | Backend type: `anthropic` or `openai`. |
158
+ | `OCP_PAID_MODEL` | `claude-sonnet-4-6` | Model identifier for the paid provider. |
159
+ | `OCP_PAID_MAX_TOKENS` | `4096` | Max tokens for paid responses. |
160
+ | `ANTHROPIC_API_KEY` | *(required)* | API key for Anthropic backend. |
161
+ | `OPENAI_API_KEY` | *(required)* | API key for OpenAI backend. |
162
+
163
+ ### Router
164
+
165
+ | Variable | Default | Description |
166
+ |---|---|---|
167
+ | `OCP_ROUTE_THRESHOLD` | `0.5` | Complexity score at or above which requests go to paid. |
168
+
169
+ ```bash
170
+ # Example: Mistral locally, GPT-4o for complex tasks, stricter threshold
171
+ OCP_LOCAL_MODEL=mistral \
172
+ OCP_PAID_BACKEND=openai \
173
+ OCP_PAID_MODEL=gpt-4o \
174
+ OCP_ROUTE_THRESHOLD=0.6 \
175
+ python my_agent.py
176
+ ```
177
+
178
+ ---
179
+
180
+ ## IDE integration (Claude Code, Cursor, Windsurf)
181
+
182
+ Add the routing env vars to your `.mcp.json` — OCP handles the rest:
183
+
184
+ ```json
185
+ {
186
+ "mcpServers": {
187
+ "ocp": {
188
+ "command": "uvx",
189
+ "args": ["ocp-server"],
190
+ "env": {
191
+ "OCP_DB_PATH": "${workspaceFolder}/.ocp.db",
192
+ "OCP_LOCAL_MODEL": "llama3.2",
193
+ "OCP_OLLAMA_URL": "http://localhost:11434",
194
+ "OCP_PAID_BACKEND": "anthropic",
195
+ "OCP_ROUTE_THRESHOLD": "0.5"
196
+ }
197
+ }
198
+ }
199
+ }
200
+ ```
201
+
202
+ Simple tasks (explain, search, summarise) are answered locally by Ollama. Complex requests (security, architecture, multi-file refactor) escalate to your paid provider. Your IDE workflow is unchanged.
203
+
204
+ ---
205
+
206
+ ## Supported local models
207
+
208
+ Any model available in Ollama works. Recommended starting points:
209
+
210
+ | Model | Size | Good for |
211
+ |---|---|---|
212
+ | `llama3.2` | 2B | Classification, summarisation, simple Q&A |
213
+ | `phi4-mini` | 3.8B | Code explanation, short answers |
214
+ | `mistral` | 7B | Context compression, draft generation |
215
+ | `codellama` | 7B | Code-specific tasks |
216
+
217
+ ```bash
218
+ ollama pull llama3.2
219
+ ```
220
+
221
+ ---
222
+
223
+ ## Bring your own backend
224
+
225
+ Both the local and paid slots accept any object that implements the `ModelBackend` protocol — three methods, no base class required:
226
+
227
+ ```python
228
+ from ocp_router import OCPRouter, TaskClassifier
229
+ from ocp_router.backends.base import GenerateRequest, GenerateResponse
230
+
231
+ class MyVLLMBackend:
232
+ @property
233
+ def model(self) -> str:
234
+ return "mistral-7b-instruct"
235
+
236
+ async def is_available(self) -> bool:
237
+ return True # check your endpoint
238
+
239
+ async def generate(self, request: GenerateRequest) -> GenerateResponse:
240
+ # call your inference endpoint
241
+ ...
242
+ return GenerateResponse(
243
+ text="...",
244
+ model=self.model,
245
+ prompt_tokens=0,
246
+ completion_tokens=0,
247
+ duration_ms=0.0,
248
+ )
249
+
250
+ # Plug in directly — no factory change needed
251
+ router = OCPRouter(
252
+ local=MyVLLMBackend(),
253
+ paid=MyVLLMBackend(), # or any other backend
254
+ classifier=TaskClassifier(),
255
+ )
256
+ ```
257
+
258
+ This is the intended extension point. `ocp-router` ships `OllamaBackend`, `AnthropicBackend`, and `OpenAIBackend` as convenience implementations — not as the only options.
259
+
260
+ ---
261
+
262
+ ## Running tests
263
+
264
+ ```bash
265
+ # Unit tests — no Ollama or API keys required
266
+ pytest packages/ocp-router/tests/ -k "not integration" -v
267
+
268
+ # Integration test — requires: ollama serve + ollama pull llama3.2
269
+ pytest packages/ocp-router/tests/ -m integration -v
270
+ ```
271
+
272
+ ---
273
+
274
+ ## What's next
275
+
276
+ - `ocp.prompt.prepare` — local SLM compresses and optimises prompts before they reach the paid provider, reducing token usage and improving answer quality
@@ -0,0 +1,37 @@
1
+ """OCP Router — hybrid local/cloud model routing layer."""
2
+ from ocp_router.backends.base import (
3
+ ClassifyResult,
4
+ GenerateRequest,
5
+ GenerateResponse,
6
+ LocalModelBackend,
7
+ ModelBackend,
8
+ RouteResult,
9
+ RouteTarget,
10
+ TaskType,
11
+ )
12
+ from ocp_router.backends.ollama import OllamaBackend
13
+ from ocp_router.classifier import TaskClassifier
14
+ from ocp_router.factory import make_local_backend, make_paid_backend, make_router
15
+ from ocp_router.router import OCPRouter
16
+
17
+ __all__ = [
18
+ # Router
19
+ "OCPRouter",
20
+ "RouteResult",
21
+ # Backends
22
+ "OllamaBackend",
23
+ "LocalModelBackend",
24
+ "ModelBackend",
25
+ # Inference types
26
+ "GenerateRequest",
27
+ "GenerateResponse",
28
+ # Classifier
29
+ "TaskClassifier",
30
+ "ClassifyResult",
31
+ "TaskType",
32
+ "RouteTarget",
33
+ # Factories
34
+ "make_local_backend",
35
+ "make_paid_backend",
36
+ "make_router",
37
+ ]
File without changes