oriora-c2 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oriora_c2-0.1.0/.gitignore +1 -0
- oriora_c2-0.1.0/LICENSE +21 -0
- oriora_c2-0.1.0/PKG-INFO +77 -0
- oriora_c2-0.1.0/README.md +63 -0
- oriora_c2-0.1.0/oriora_c2/__init__.py +11 -0
- oriora_c2-0.1.0/oriora_c2/cli.py +104 -0
- oriora_c2-0.1.0/oriora_c2/config.template.yaml +42 -0
- oriora_c2-0.1.0/oriora_c2/custom_callbacks.py +143 -0
- oriora_c2-0.1.0/pyproject.toml +29 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
.vercel
|
oriora_c2-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Orioralabs OÜ
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
oriora_c2-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: oriora-c2
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Oriora c2 — a local proxy that plugs Oriora's model-routing decision into any OpenAI-style agent. Your vendor key and prompts stay on your machine; only the routing decision crosses.
|
|
5
|
+
Project-URL: Homepage, https://orioralabs.com
|
|
6
|
+
Author: Orioralabs OÜ
|
|
7
|
+
License: MIT
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Keywords: agent,ai,byok,litellm,llm,model-routing,openai,proxy,router
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
|
+
Requires-Dist: httpx>=0.24
|
|
12
|
+
Requires-Dist: litellm[proxy]>=1.0
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
|
|
15
|
+
# oriora-c2
|
|
16
|
+
|
|
17
|
+
**Use Oriora's model-routing decision in any OpenAI-style agent — without your key or prompts ever
|
|
18
|
+
leaving your machine.**
|
|
19
|
+
|
|
20
|
+
`oriora-c2` runs a tiny **local proxy** on `127.0.0.1`. Your agent points at it; before each call it
|
|
21
|
+
asks Oriora's `/api/select` *"which model is best for this task?"*, then dispatches the call
|
|
22
|
+
**directly to the vendor on your own key**. Only the routing decision (task type + your candidate
|
|
23
|
+
models) crosses to Oriora — never the key, the prompt, or the response.
|
|
24
|
+
|
|
25
|
+
This is for **off-the-shelf agents** (Cursor, Aider, Continue, the raw `openai` SDK, LangChain
|
|
26
|
+
`ChatOpenAI`, …) that can't easily insert a "ask Oriora first" step. *Writing your own code?* You
|
|
27
|
+
don't need this — call `/api/select` directly (or `pip install oriora` and use `model_select()`).
|
|
28
|
+
|
|
29
|
+
## Install
|
|
30
|
+
```bash
|
|
31
|
+
pip install oriora-c2
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Run
|
|
35
|
+
```bash
|
|
36
|
+
oriora-c2 init # scaffolds config.yaml + .env.oriora-c2.example
|
|
37
|
+
# set your keys:
|
|
38
|
+
export ORIORA_API_KEY=sk_oriora_... # the decision call only
|
|
39
|
+
export DEEPSEEK_API_KEY=... # your own vendor keys (the actual call runs on these, locally)
|
|
40
|
+
export MINIMAX_API_KEY=...
|
|
41
|
+
oriora-c2 serve # local proxy on http://127.0.0.1:4000
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Point any OpenAI client at it:
|
|
45
|
+
```python
|
|
46
|
+
from openai import OpenAI
|
|
47
|
+
c = OpenAI(base_url="http://127.0.0.1:4000/v1", api_key="anything")
|
|
48
|
+
c.chat.completions.create(model="oriora-auto", messages=[{"role":"user","content":"…"}])
|
|
49
|
+
# task type is auto-detected locally (free); or force it: model="oriora-auto:coding"
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## How it works
|
|
53
|
+
1. Agent → `http://127.0.0.1:4000` (`model="oriora-auto"`). Prompt never leaves your box.
|
|
54
|
+
2. The pre-call hook classifies the task **locally** (free regex rules, no LLM) → calls
|
|
55
|
+
`POST /api/select` `{task_type, models}` — the **one** Oriora touch ($0.001/decision).
|
|
56
|
+
3. It rewrites `data["model"]` to the recommended model.
|
|
57
|
+
4. LiteLLM dispatches **direct to the vendor on your local key**; the stream flows vendor → you.
|
|
58
|
+
|
|
59
|
+
**Privacy / c2 invariant:** the proxy is **customer-hosted** (`127.0.0.1`). Only `{task_type, model
|
|
60
|
+
candidates}` reach Oriora. If a third party ever hosted this, it would no longer be c2.
|
|
61
|
+
|
|
62
|
+
**Fail-open:** if `/api/select` is slow (>`ORIORA_SELECT_TIMEOUT_S`, default 2.5s) or down, the hook
|
|
63
|
+
falls back to `ORIORA_FALLBACK_MODEL` so your agent is never blocked.
|
|
64
|
+
|
|
65
|
+
## Configuration (env)
|
|
66
|
+
| Var | Purpose |
|
|
67
|
+
|---|---|
|
|
68
|
+
| `ORIORA_API_KEY` | Oriora key for the decision call (required) |
|
|
69
|
+
| `DEEPSEEK_API_KEY`, `MINIMAX_API_KEY`, … | your own vendor keys (the call runs on these) |
|
|
70
|
+
| `ORIORA_CANDIDATES` | comma-sep catalog ids you hold keys for (sent to `/api/select`) |
|
|
71
|
+
| `ORIORA_FALLBACK_MODEL` | model used if the decision call fails (default `deepseek-v4-flash`) |
|
|
72
|
+
| `ORIORA_SELECT_TIMEOUT_S` | decision-call budget before fail-open (default `2.5`) |
|
|
73
|
+
|
|
74
|
+
Add a vendor = add its key + a `model_list` entry in `config.yaml` + its catalog id to
|
|
75
|
+
`ORIORA_CANDIDATES`. v1 ships configured for **DeepSeek + MiniMax** (OpenAI-format).
|
|
76
|
+
|
|
77
|
+
MIT © Orioralabs OÜ · https://orioralabs.com
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# oriora-c2
|
|
2
|
+
|
|
3
|
+
**Use Oriora's model-routing decision in any OpenAI-style agent — without your key or prompts ever
|
|
4
|
+
leaving your machine.**
|
|
5
|
+
|
|
6
|
+
`oriora-c2` runs a tiny **local proxy** on `127.0.0.1`. Your agent points at it; before each call it
|
|
7
|
+
asks Oriora's `/api/select` *"which model is best for this task?"*, then dispatches the call
|
|
8
|
+
**directly to the vendor on your own key**. Only the routing decision (task type + your candidate
|
|
9
|
+
models) crosses to Oriora — never the key, the prompt, or the response.
|
|
10
|
+
|
|
11
|
+
This is for **off-the-shelf agents** (Cursor, Aider, Continue, the raw `openai` SDK, LangChain
|
|
12
|
+
`ChatOpenAI`, …) that can't easily insert a "ask Oriora first" step. *Writing your own code?* You
|
|
13
|
+
don't need this — call `/api/select` directly (or `pip install oriora` and use `model_select()`).
|
|
14
|
+
|
|
15
|
+
## Install
|
|
16
|
+
```bash
|
|
17
|
+
pip install oriora-c2
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Run
|
|
21
|
+
```bash
|
|
22
|
+
oriora-c2 init # scaffolds config.yaml + .env.oriora-c2.example
|
|
23
|
+
# set your keys:
|
|
24
|
+
export ORIORA_API_KEY=sk_oriora_... # the decision call only
|
|
25
|
+
export DEEPSEEK_API_KEY=... # your own vendor keys (the actual call runs on these, locally)
|
|
26
|
+
export MINIMAX_API_KEY=...
|
|
27
|
+
oriora-c2 serve # local proxy on http://127.0.0.1:4000
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Point any OpenAI client at it:
|
|
31
|
+
```python
|
|
32
|
+
from openai import OpenAI
|
|
33
|
+
c = OpenAI(base_url="http://127.0.0.1:4000/v1", api_key="anything")
|
|
34
|
+
c.chat.completions.create(model="oriora-auto", messages=[{"role":"user","content":"…"}])
|
|
35
|
+
# task type is auto-detected locally (free); or force it: model="oriora-auto:coding"
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## How it works
|
|
39
|
+
1. Agent → `http://127.0.0.1:4000` (`model="oriora-auto"`). Prompt never leaves your box.
|
|
40
|
+
2. The pre-call hook classifies the task **locally** (free regex rules, no LLM) → calls
|
|
41
|
+
`POST /api/select` `{task_type, models}` — the **one** Oriora touch ($0.001/decision).
|
|
42
|
+
3. It rewrites `data["model"]` to the recommended model.
|
|
43
|
+
4. LiteLLM dispatches **direct to the vendor on your local key**; the stream flows vendor → you.
|
|
44
|
+
|
|
45
|
+
**Privacy / c2 invariant:** the proxy is **customer-hosted** (`127.0.0.1`). Only `{task_type, model
|
|
46
|
+
candidates}` reach Oriora. If a third party ever hosted this, it would no longer be c2.
|
|
47
|
+
|
|
48
|
+
**Fail-open:** if `/api/select` is slow (>`ORIORA_SELECT_TIMEOUT_S`, default 2.5s) or down, the hook
|
|
49
|
+
falls back to `ORIORA_FALLBACK_MODEL` so your agent is never blocked.
|
|
50
|
+
|
|
51
|
+
## Configuration (env)
|
|
52
|
+
| Var | Purpose |
|
|
53
|
+
|---|---|
|
|
54
|
+
| `ORIORA_API_KEY` | Oriora key for the decision call (required) |
|
|
55
|
+
| `DEEPSEEK_API_KEY`, `MINIMAX_API_KEY`, … | your own vendor keys (the call runs on these) |
|
|
56
|
+
| `ORIORA_CANDIDATES` | comma-sep catalog ids you hold keys for (sent to `/api/select`) |
|
|
57
|
+
| `ORIORA_FALLBACK_MODEL` | model used if the decision call fails (default `deepseek-v4-flash`) |
|
|
58
|
+
| `ORIORA_SELECT_TIMEOUT_S` | decision-call budget before fail-open (default `2.5`) |
|
|
59
|
+
|
|
60
|
+
Add a vendor = add its key + a `model_list` entry in `config.yaml` + its catalog id to
|
|
61
|
+
`ORIORA_CANDIDATES`. v1 ships configured for **DeepSeek + MiniMax** (OpenAI-format).
|
|
62
|
+
|
|
63
|
+
MIT © Orioralabs OÜ · https://orioralabs.com
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""oriora-c2 — a local proxy that plugs Oriora's model-routing decision into any OpenAI-style agent.
|
|
2
|
+
|
|
3
|
+
Your agent points at the local proxy (`model="oriora-auto"`); a LiteLLM pre-call hook asks Oriora's
|
|
4
|
+
`/api/select` which model to use, then dispatches DIRECT to the vendor on YOUR key. Your vendor key
|
|
5
|
+
and prompts never leave the machine — only the routing decision (task_type + candidate models)
|
|
6
|
+
crosses. The hook also classifies the task locally (free, no LLM) so plain calls get routed well.
|
|
7
|
+
"""
|
|
8
|
+
from .custom_callbacks import OrioraRouter, classify, proxy_handler_instance
|
|
9
|
+
|
|
10
|
+
__version__ = "0.1.0"
|
|
11
|
+
__all__ = ["OrioraRouter", "classify", "proxy_handler_instance", "__version__"]
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""oriora-c2 CLI — `oriora-c2 init` scaffolds a local config; `oriora-c2 serve` runs the proxy.
|
|
2
|
+
|
|
3
|
+
The proxy binds 127.0.0.1 by design: c2 means the proxy is CUSTOMER-HOSTED. Your vendor key and
|
|
4
|
+
prompts never leave the machine — only the routing decision (task_type + candidate models) crosses
|
|
5
|
+
to Oriora's /api/select.
|
|
6
|
+
"""
|
|
7
|
+
import argparse
|
|
8
|
+
import os
|
|
9
|
+
import subprocess
|
|
10
|
+
import sys
|
|
11
|
+
from importlib import resources
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
# LiteLLM loads proxy callbacks by FILE path relative to the config dir (not by installed-package
|
|
15
|
+
# import). So `init` drops this 1-line shim next to config.yaml; it re-exports the installed hook.
|
|
16
|
+
HOOK_SHIM = (
|
|
17
|
+
"# Auto-written by `oriora-c2 init`. LiteLLM loads callbacks by local file path, so this shim\n"
|
|
18
|
+
"# re-exports the installed package's hook for config.yaml to reference.\n"
|
|
19
|
+
"from oriora_c2.custom_callbacks import proxy_handler_instance # noqa: F401\n"
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
ENV_EXAMPLE = """\
|
|
23
|
+
# oriora-c2 environment — fill these, then export (e.g. `set -a; source .env.oriora-c2.example`)
|
|
24
|
+
# before `oriora-c2 serve`.
|
|
25
|
+
ORIORA_API_KEY=sk_oriora_... # your Oriora key — used ONLY for the /api/select decision call
|
|
26
|
+
DEEPSEEK_API_KEY= # your OWN vendor keys — used for the actual AI call, locally
|
|
27
|
+
MINIMAX_API_KEY=
|
|
28
|
+
# Optional tuning:
|
|
29
|
+
# ORIORA_SELECT_URL=https://api.orioralabs.com/api/select
|
|
30
|
+
# ORIORA_CANDIDATES=deepseek/deepseek-v4-pro,deepseek/deepseek-v4-flash,minimax/minimax-m2
|
|
31
|
+
# ORIORA_FALLBACK_MODEL=deepseek-v4-flash
|
|
32
|
+
# ORIORA_SELECT_TIMEOUT_S=2.5
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _template() -> str:
|
|
37
|
+
return resources.files("oriora_c2").joinpath("config.template.yaml").read_text()
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _init(args) -> int:
|
|
41
|
+
cfg = Path(args.config)
|
|
42
|
+
if cfg.exists() and not args.force:
|
|
43
|
+
print(f"{cfg} already exists — pass --force to overwrite.", file=sys.stderr)
|
|
44
|
+
return 1
|
|
45
|
+
cfg.write_text(_template())
|
|
46
|
+
Path("oriora_c2_hook.py").write_text(HOOK_SHIM) # litellm loads callbacks by local file path
|
|
47
|
+
env = Path(".env.oriora-c2.example")
|
|
48
|
+
if not env.exists():
|
|
49
|
+
env.write_text(ENV_EXAMPLE)
|
|
50
|
+
print(f"Wrote {cfg}, oriora_c2_hook.py and {env}.")
|
|
51
|
+
print("Next: set your keys (see the .env example), add the vendors you hold to the model_list,")
|
|
52
|
+
print("then run: oriora-c2 serve")
|
|
53
|
+
return 0
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _serve(args) -> int:
|
|
57
|
+
cfg = Path(args.config)
|
|
58
|
+
if not cfg.exists():
|
|
59
|
+
print(f"No {cfg} found — run `oriora-c2 init` first.", file=sys.stderr)
|
|
60
|
+
return 1
|
|
61
|
+
env = dict(os.environ)
|
|
62
|
+
env.pop("DATABASE_URL", None) # DB-less launch — no Prisma at startup
|
|
63
|
+
# Resolve the litellm console script next to THIS interpreter (the same venv pip installed us
|
|
64
|
+
# into), so `oriora-c2 serve` works whether or not that venv is "activated" on PATH.
|
|
65
|
+
litellm_bin = Path(sys.executable).parent / "litellm"
|
|
66
|
+
cmd = [
|
|
67
|
+
str(litellm_bin) if litellm_bin.exists() else "litellm",
|
|
68
|
+
"--config", str(cfg), "--host", args.host, "--port", str(args.port), "--num_workers", "1",
|
|
69
|
+
]
|
|
70
|
+
print(
|
|
71
|
+
f"oriora-c2: local proxy on http://{args.host}:{args.port} "
|
|
72
|
+
f"(point your agent here with base_url + model='oriora-auto')"
|
|
73
|
+
)
|
|
74
|
+
try:
|
|
75
|
+
return subprocess.call(cmd, env=env)
|
|
76
|
+
except FileNotFoundError:
|
|
77
|
+
print("litellm not found — reinstall with `pip install oriora-c2` (it pulls litellm[proxy]).", file=sys.stderr)
|
|
78
|
+
return 1
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def main(argv=None) -> int:
|
|
82
|
+
p = argparse.ArgumentParser(
|
|
83
|
+
prog="oriora-c2",
|
|
84
|
+
description="Oriora c2 — local routing proxy. Agent → localhost → /api/select → vendor on your own key.",
|
|
85
|
+
)
|
|
86
|
+
sub = p.add_subparsers(dest="cmd", required=True)
|
|
87
|
+
|
|
88
|
+
pi = sub.add_parser("init", help="scaffold config.yaml + .env example in the current directory")
|
|
89
|
+
pi.add_argument("--config", default="config.yaml")
|
|
90
|
+
pi.add_argument("--force", action="store_true", help="overwrite an existing config.yaml")
|
|
91
|
+
pi.set_defaults(fn=_init)
|
|
92
|
+
|
|
93
|
+
ps = sub.add_parser("serve", help="run the local proxy (127.0.0.1:4000 by default)")
|
|
94
|
+
ps.add_argument("--config", default="config.yaml")
|
|
95
|
+
ps.add_argument("--host", default="127.0.0.1")
|
|
96
|
+
ps.add_argument("--port", type=int, default=4000)
|
|
97
|
+
ps.set_defaults(fn=_serve)
|
|
98
|
+
|
|
99
|
+
args = p.parse_args(argv)
|
|
100
|
+
return args.fn(args)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
if __name__ == "__main__":
|
|
104
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# oriora-c2 — LiteLLM proxy config (basic 2-vendor build: DeepSeek + MiniMax).
|
|
2
|
+
#
|
|
3
|
+
# The agent points at model "oriora-auto"; custom_callbacks rewrites data["model"] to the model
|
|
4
|
+
# Oriora's /api/select recommends, then LiteLLM dispatches DIRECT to the vendor on the LOCAL key.
|
|
5
|
+
# Explicit deployments (NOT wildcard) — proven to fire the pre-call hook. Each is OpenAI-format
|
|
6
|
+
# (openai/<id> + api_base), matching oriora-server/src/router/providers.ts. Keys come from env.
|
|
7
|
+
#
|
|
8
|
+
# Run: oriora-c2 serve (or directly: litellm --config config.yaml --host 127.0.0.1 --port 4000)
|
|
9
|
+
# (127.0.0.1 bind is load-bearing: customer-hosted = c2. If Oriora hosts it, it collapses to c1.)
|
|
10
|
+
|
|
11
|
+
model_list:
|
|
12
|
+
# Entrypoint the agent calls. The hook rewrites this before dispatch; the litellm_params here are
|
|
13
|
+
# only a placeholder so the model_name resolves if /api/select fails before the rewrite.
|
|
14
|
+
- model_name: oriora-auto
|
|
15
|
+
litellm_params:
|
|
16
|
+
model: openai/deepseek-v4-flash
|
|
17
|
+
api_base: https://api.deepseek.com/v1
|
|
18
|
+
api_key: os.environ/DEEPSEEK_API_KEY
|
|
19
|
+
|
|
20
|
+
# DeepSeek (the keys we hold) — native_model strings /api/select returns, dispatched direct.
|
|
21
|
+
- model_name: deepseek-v4-flash
|
|
22
|
+
litellm_params:
|
|
23
|
+
model: openai/deepseek-v4-flash
|
|
24
|
+
api_base: https://api.deepseek.com/v1
|
|
25
|
+
api_key: os.environ/DEEPSEEK_API_KEY
|
|
26
|
+
- model_name: deepseek-v4-pro
|
|
27
|
+
litellm_params:
|
|
28
|
+
model: openai/deepseek-v4-pro
|
|
29
|
+
api_base: https://api.deepseek.com/v1
|
|
30
|
+
api_key: os.environ/DEEPSEEK_API_KEY
|
|
31
|
+
|
|
32
|
+
# MiniMax (OpenAI-compatible endpoint).
|
|
33
|
+
- model_name: minimax-m2
|
|
34
|
+
litellm_params:
|
|
35
|
+
model: openai/minimax-m2
|
|
36
|
+
api_base: https://api.minimax.io/v1
|
|
37
|
+
api_key: os.environ/MINIMAX_API_KEY
|
|
38
|
+
|
|
39
|
+
# LiteLLM resolves callbacks by FILE relative to this config's directory, so `oriora-c2 init`
|
|
40
|
+
# drops a tiny `oriora_c2_hook.py` shim here that re-exports the installed package's hook.
|
|
41
|
+
litellm_settings:
|
|
42
|
+
callbacks: oriora_c2_hook.proxy_handler_instance
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""oriora-c2 — the LiteLLM pre-call hook that makes c2 (decision-only routing) work for ANY agent.
|
|
2
|
+
|
|
3
|
+
The agent calls model="oriora-auto" against this LOCAL proxy. Before dispatch, this hook fires,
|
|
4
|
+
asks Oriora's /api/select ONCE "which model for this task?", rewrites data["model"] to the
|
|
5
|
+
recommended native model, and LiteLLM then dispatches DIRECT to the vendor on the LOCAL key.
|
|
6
|
+
|
|
7
|
+
c2 invariant (load-bearing): only the {task_type, candidate models} cross to Oriora — never the
|
|
8
|
+
prompt, never the key, never the response. The proxy is customer-hosted (127.0.0.1). The single
|
|
9
|
+
Oriora touch is the decision call ($0.001). If Oriora is slow/down, we fail OPEN to a default model
|
|
10
|
+
so the agent is never blocked.
|
|
11
|
+
"""
|
|
12
|
+
import os
|
|
13
|
+
import re
|
|
14
|
+
import httpx
|
|
15
|
+
from litellm.integrations.custom_logger import CustomLogger
|
|
16
|
+
|
|
17
|
+
ORIORA_SELECT_URL = os.environ.get("ORIORA_SELECT_URL", "https://api.orioralabs.com/api/select")
|
|
18
|
+
ORIORA_API_KEY = os.environ.get("ORIORA_API_KEY", "")
|
|
19
|
+
|
|
20
|
+
# The models this proxy can actually execute (we hold the vendor keys). Sent to /api/select as the
|
|
21
|
+
# `models` candidate filter so the recommendation is always something we can dispatch. Comma-sep env
|
|
22
|
+
# override; defaults to the two vendors we hold (DeepSeek + MiniMax).
|
|
23
|
+
CANDIDATE_MODELS = [
|
|
24
|
+
m.strip()
|
|
25
|
+
for m in os.environ.get(
|
|
26
|
+
"ORIORA_CANDIDATES",
|
|
27
|
+
"deepseek/deepseek-v4-pro,deepseek/deepseek-v4-flash,minimax/minimax-m2",
|
|
28
|
+
).split(",")
|
|
29
|
+
if m.strip()
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
# Fail-open target if /api/select errors/times out — must be a model_name in config.yaml.
|
|
33
|
+
FALLBACK_MODEL = os.environ.get("ORIORA_FALLBACK_MODEL", "deepseek-v4-flash")
|
|
34
|
+
|
|
35
|
+
# Budget for the decision call. Oriora is on every request's critical path, so cap it and fail open
|
|
36
|
+
# past it rather than make the agent wait. Default 2.5s: /api/select runs ~1s (Cloudflare + scoring),
|
|
37
|
+
# so a sub-1s budget would always time out. Lower it only if the endpoint gets faster server-side.
|
|
38
|
+
SELECT_TIMEOUT_S = float(os.environ.get("ORIORA_SELECT_TIMEOUT_S", "2.5"))
|
|
39
|
+
|
|
40
|
+
# Sentinel model the agent points at. "oriora-auto" or "oriora-auto:coding" (explicit task_type).
|
|
41
|
+
SENTINEL = "oriora-auto"
|
|
42
|
+
|
|
43
|
+
# When the input exceeds this many chars and no stronger task signal matched, treat it as long_context.
|
|
44
|
+
LONG_CONTEXT_CHARS = int(os.environ.get("ORIORA_LONG_CONTEXT_CHARS", "12000"))
|
|
45
|
+
|
|
46
|
+
# ── Free LOCAL task classifier ─────────────────────────────────────────────────
|
|
47
|
+
# Maps the prompt to ONE of the router's 16 task_types via ordered regex rules (first match wins).
|
|
48
|
+
# Pure-local, microseconds, $0 — NEVER an LLM. Explicit "oriora-auto:<type>" suffix always overrides.
|
|
49
|
+
#
|
|
50
|
+
# COVERAGE OF ALL 16 ROUTER task_types (no silent gaps — every one has a defined disposition):
|
|
51
|
+
# RULE-DETECTED (14): coding · code-repair · coding_live · math · math_hard · knowledge ·
|
|
52
|
+
# instruction · content-fill · reasoning · research · science · terminal · agentic · hard
|
|
53
|
+
# SIZE FALLBACK (1): long_context (input > LONG_CONTEXT_CHARS and no stronger signal)
|
|
54
|
+
# DEFAULT (1): general (nothing matched)
|
|
55
|
+
# If the router ever adds a new task_type, add it here too — keep this list in sync with
|
|
56
|
+
# GET /api/select/task-types so future-us is never left guessing what's handled.
|
|
57
|
+
# A tiny local embedding model can replace these rules for the fuzzy tail later (still ~$0).
|
|
58
|
+
_CODE_FENCE = re.compile(r"```")
|
|
59
|
+
_FIX_HINT = re.compile(r"\b(fix|debug|broken|stack ?trace|traceback|exception|error[: ]|crash|failing)\b")
|
|
60
|
+
_RULES = [
|
|
61
|
+
("math_hard", re.compile(r"\b(prove|proof|theorem|lemma|derive (the|a)|np-?hard|big-?o)\b")),
|
|
62
|
+
("math", re.compile(r"\b(solve|integral|derivative|equation|factorial|probability|matrix|eigen|calculate|compute)\b|[0-9]\s*[\+\-\*/\^=]\s*[0-9]")),
|
|
63
|
+
# agentic checked BEFORE coding: strong tool-orchestration intent must beat a stray "api"/"call".
|
|
64
|
+
("agentic", re.compile(r"\b(use (the )?tools?|call (the )?(api|function|tool)|then (do|run|call|fetch)|multi-?step|browse the web|search the web|orchestrat|autonomous|agent loop|plan and (execute|then))\b")),
|
|
65
|
+
("coding_live", re.compile(r"\b(autocomplete|complete (this|the) (code|function|line|snippet)|inline completion|tab completion|as i type)\b")),
|
|
66
|
+
("code-repair", _FIX_HINT),
|
|
67
|
+
("coding", re.compile(r"\b(function|class\b|def |refactor|implement|compile|unit test|regex|api\b|sql|typescript|python|rust|golang|java\b|async|null pointer|code)\b")),
|
|
68
|
+
("terminal", re.compile(r"\b(bash|shell|terminal|cli command|command to|chmod|grep\b|git |docker|kubectl|npm |pip install)\b")),
|
|
69
|
+
("research", re.compile(r"\b(research|cite (sources|references)|literature|state of the art|survey of|sources for)\b")),
|
|
70
|
+
("science", re.compile(r"\b(hypothesis|experiment|molecul|protein|quantum|thermodynamic|reaction|chemistry|physics|biology)\b")),
|
|
71
|
+
("instruction", re.compile(r"\b(summari[sz]e|translate|rewrite|reword|paraphrase|format (this|the)|convert (this|the)|extract the)\b")),
|
|
72
|
+
("content-fill", re.compile(r"\b(write (a|an|me)|draft (a|an)|compose|generate (a|an).{0,20}(post|email|caption|tweet|blurb|copy))\b")),
|
|
73
|
+
("knowledge", re.compile(r"\b(what (is|are|was)|who (is|was)|when (did|was)|where (is|are)|define\b|explain|difference between|how does)\b")),
|
|
74
|
+
("reasoning", re.compile(r"\b(step by step|reason through|analy[sz]e|compare|trade-?offs?|pros and cons|strategy|decide)\b")),
|
|
75
|
+
# hard: prompt explicitly self-declares difficulty (rare; agents should tag instead).
|
|
76
|
+
("hard", re.compile(r"\b(this is (a )?hard|very (difficult|hard|complex)|hard problem|extremely (difficult|complex)|notoriously (hard|difficult)|challenging problem)\b")),
|
|
77
|
+
]
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _gather_text(messages) -> str:
|
|
81
|
+
parts = []
|
|
82
|
+
for m in (messages or []):
|
|
83
|
+
c = m.get("content") if isinstance(m, dict) else None
|
|
84
|
+
if isinstance(c, str):
|
|
85
|
+
parts.append(c)
|
|
86
|
+
elif isinstance(c, list): # OpenAI content-parts (multimodal)
|
|
87
|
+
for p in c:
|
|
88
|
+
if isinstance(p, dict) and isinstance(p.get("text"), str):
|
|
89
|
+
parts.append(p["text"])
|
|
90
|
+
return "\n".join(parts)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def classify(messages) -> str:
|
|
94
|
+
text = _gather_text(messages)
|
|
95
|
+
low = text.lower()
|
|
96
|
+
if _CODE_FENCE.search(text):
|
|
97
|
+
return "code-repair" if _FIX_HINT.search(low) else "coding"
|
|
98
|
+
for task_type, rx in _RULES:
|
|
99
|
+
if rx.search(low):
|
|
100
|
+
return task_type
|
|
101
|
+
return "long_context" if len(text) > LONG_CONTEXT_CHARS else "general"
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class OrioraRouter(CustomLogger):
|
|
105
|
+
async def async_pre_call_hook(self, user_api_key_dict, cache, data, call_type):
|
|
106
|
+
requested = (data.get("model") or "")
|
|
107
|
+
# Only intercept our sentinel; anything else passes through untouched.
|
|
108
|
+
if not requested.startswith(SENTINEL):
|
|
109
|
+
return data
|
|
110
|
+
|
|
111
|
+
# task_type: explicit "oriora-auto:coding" suffix wins; otherwise AUTO-CLASSIFY the prompt
|
|
112
|
+
# locally (free regex rules, no LLM); 'general' only when nothing matches.
|
|
113
|
+
if ":" in requested:
|
|
114
|
+
task_type = requested.split(":", 1)[1].strip() or "general"
|
|
115
|
+
else:
|
|
116
|
+
task_type = classify(data.get("messages"))
|
|
117
|
+
|
|
118
|
+
native = FALLBACK_MODEL
|
|
119
|
+
try:
|
|
120
|
+
async with httpx.AsyncClient(timeout=SELECT_TIMEOUT_S) as client:
|
|
121
|
+
r = await client.post(
|
|
122
|
+
ORIORA_SELECT_URL,
|
|
123
|
+
headers={"Authorization": f"Bearer {ORIORA_API_KEY}"},
|
|
124
|
+
json={"task_type": task_type, "models": CANDIDATE_MODELS},
|
|
125
|
+
)
|
|
126
|
+
if r.status_code == 200:
|
|
127
|
+
picked = (r.json() or {}).get("native_model")
|
|
128
|
+
if picked:
|
|
129
|
+
native = picked
|
|
130
|
+
print(f"[oriora-c2] /api/select 200 OK: task_type={task_type} -> {native} (ORIORA DECISION)")
|
|
131
|
+
else:
|
|
132
|
+
print(f"[oriora-c2] /api/select 200 but no native_model; FAIL-OPEN -> {FALLBACK_MODEL}")
|
|
133
|
+
else:
|
|
134
|
+
print(f"[oriora-c2] /api/select HTTP {r.status_code}; FAIL-OPEN -> {FALLBACK_MODEL}")
|
|
135
|
+
except Exception as e: # timeout / network — never block the agent
|
|
136
|
+
print(f"[oriora-c2] /api/select error ({e!r}); FAIL-OPEN -> {FALLBACK_MODEL}")
|
|
137
|
+
|
|
138
|
+
data["model"] = native
|
|
139
|
+
return data
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# Registered via litellm_settings.callbacks in config.yaml.
|
|
143
|
+
proxy_handler_instance = OrioraRouter()
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "oriora-c2"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Oriora c2 — a local proxy that plugs Oriora's model-routing decision into any OpenAI-style agent. Your vendor key and prompts stay on your machine; only the routing decision crosses."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "Orioralabs OÜ" }]
|
|
13
|
+
keywords = ["llm", "ai", "model-routing", "router", "byok", "openai", "litellm", "proxy", "agent"]
|
|
14
|
+
dependencies = ["litellm[proxy]>=1.0", "httpx>=0.24"]
|
|
15
|
+
|
|
16
|
+
[project.scripts]
|
|
17
|
+
oriora-c2 = "oriora_c2.cli:main"
|
|
18
|
+
|
|
19
|
+
[project.urls]
|
|
20
|
+
Homepage = "https://orioralabs.com"
|
|
21
|
+
|
|
22
|
+
[tool.hatch.build.targets.wheel]
|
|
23
|
+
packages = ["oriora_c2"]
|
|
24
|
+
|
|
25
|
+
[tool.hatch.build.targets.wheel.force-include]
|
|
26
|
+
"oriora_c2/config.template.yaml" = "oriora_c2/config.template.yaml"
|
|
27
|
+
|
|
28
|
+
[tool.hatch.build.targets.sdist]
|
|
29
|
+
include = ["oriora_c2", "README.md", "LICENSE", "pyproject.toml"]
|