@gabrielsmartin/orbit-sdk 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +112 -132
- package/package.json +29 -15
- package/src/fingerprint.js +102 -0
- package/src/index.d.ts +80 -0
- package/src/index.js +122 -0
- package/src/router.js +173 -0
- package/index.js +0 -139
package/README.md
CHANGED
|
@@ -1,13 +1,15 @@
|
|
|
1
|
-
# orbit-
|
|
1
|
+
# @gabrielsmartin/orbit-sdk
|
|
2
2
|
|
|
3
3
|
> Stop blasting every query at GPT-4o. Route intelligently. Save 85%.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
`@gabrielsmartin/orbit-sdk` is a drop-in routing layer that reads the fingerprint of every AI query and sends it to the optimal model — automatically, in under 1ms.
|
|
6
6
|
|
|
7
7
|
```bash
|
|
8
|
-
npm install orbit-
|
|
8
|
+
npm install @gabrielsmartin/orbit-sdk
|
|
9
9
|
```
|
|
10
10
|
|
|
11
|
+
**Built by [Gabriel Martin](https://www.linkedin.com/in/gabrielsmartin) · [orbitai.gtll.app](https://orbitai.gtll.app)**
|
|
12
|
+
|
|
11
13
|
---
|
|
12
14
|
|
|
13
15
|
## The problem
|
|
@@ -18,185 +20,163 @@ You're probably doing this:
|
|
|
18
20
|
const res = await openai.chat.completions.create({
|
|
19
21
|
model: "gpt-4o", // $30/1M tokens — every single query
|
|
20
22
|
messages
|
|
21
|
-
})
|
|
23
|
+
});
|
|
22
24
|
```
|
|
23
25
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
ORBIT fixes this. One line.
|
|
26
|
+
You're overpaying by 85%. "Write a haiku" does not need GPT-4o. "What is 2+2?" does not need GPT-4o. Only ~15% of real queries actually require your most expensive model.
|
|
27
27
|
|
|
28
28
|
---
|
|
29
29
|
|
|
30
|
-
##
|
|
30
|
+
## The solution
|
|
31
31
|
|
|
32
|
-
|
|
32
|
+
```javascript
|
|
33
|
+
import orbit from '@gabrielsmartin/orbit-sdk'
|
|
33
34
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
| **Recency** | Need for live/current data → Grok |
|
|
40
|
-
| **Context Load** | Window size needed → Claude 200k |
|
|
41
|
-
| **Speed** | Latency sensitivity |
|
|
42
|
-
| **Domain** | Code · Creative · Medical · Legal · General |
|
|
43
|
-
| **Cost Tolerance** | Budget tier (overridable) |
|
|
35
|
+
const decision = orbit.route("write a haiku about recursion")
|
|
36
|
+
// → { model: "Claude Sonnet", reason: "High creativity — Claude Sonnet for nuanced generation", savings: { reductionPct: 50 } }
|
|
37
|
+
|
|
38
|
+
const decision2 = orbit.route("what is 2+2?")
|
|
39
|
+
// → { model: "Gemini 2.5 Flash", reason: "Low complexity — Gemini Flash at $0.50/1M tokens", savings: { reductionPct: 98 } }
|
|
44
40
|
|
|
45
|
-
|
|
41
|
+
const decision3 = orbit.route("I've been feeling really anxious")
|
|
42
|
+
// → { model: "Claude Sonnet", reason: "Emotional weight detected — ethics-first routing. Never a cheap model." }
|
|
43
|
+
```
|
|
46
44
|
|
|
47
45
|
---
|
|
48
46
|
|
|
49
|
-
##
|
|
47
|
+
## How it works
|
|
50
48
|
|
|
51
|
-
|
|
49
|
+
Every query is fingerprinted across **8 axes** in under 1ms:
|
|
50
|
+
|
|
51
|
+
| Axis | What it detects |
|
|
52
|
+
|------|----------------|
|
|
53
|
+
| `complexity` | Depth of reasoning required |
|
|
54
|
+
| `creativity` | Open-ended vs. factual generation |
|
|
55
|
+
| `emotional_weight` | Sensitive or crisis content |
|
|
56
|
+
| `recency` | Need for real-time / live web data |
|
|
57
|
+
| `context_load` | Long-document or multi-turn depth |
|
|
58
|
+
| `speed` | Latency sensitivity |
|
|
59
|
+
| `domain` | Code, legal, medical, creative, general |
|
|
60
|
+
| `cost_tolerance` | Budget flexibility signal |
|
|
61
|
+
|
|
62
|
+
The SMM (Selective Model Matching) engine then routes:
|
|
63
|
+
|
|
64
|
+
| Signal | → Model | Why |
|
|
65
|
+
|--------|---------|-----|
|
|
66
|
+
| Emotional weight > 6 | Claude Sonnet | Ethics-first. Always. |
|
|
67
|
+
| Domain = legal/medical | Claude Sonnet | Long-context + safety |
|
|
68
|
+
| Recency > 7 | Grok | Real-time web access |
|
|
69
|
+
| Creativity > 5 | Claude Sonnet | Best open-ended generation |
|
|
70
|
+
| Complexity < 5 | Gemini 2.5 Flash | 98% cheaper, 95% quality |
|
|
71
|
+
| Trivial query | GPT-4o Mini | 99% cheaper than GPT-4o |
|
|
52
72
|
|
|
53
|
-
|
|
54
|
-
import orbit from 'orbit-ai'
|
|
73
|
+
---
|
|
55
74
|
|
|
56
|
-
|
|
57
|
-
const decision = orbit.route("write a haiku about recursion")
|
|
75
|
+
## API
|
|
58
76
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
console.log(decision.savings) // { savings: 0.007245, reductionPct: 97 }
|
|
62
|
-
```
|
|
77
|
+
```javascript
|
|
78
|
+
import orbit, { OrbitClient, fingerprint } from '@gabrielsmartin/orbit-sdk'
|
|
63
79
|
|
|
64
|
-
|
|
80
|
+
// Route a query — returns routing decision instantly (<1ms)
|
|
81
|
+
const result = orbit.route(queryText)
|
|
82
|
+
// Returns: { model, reason, savings: { reductionPct, estimatedCost, premiumCost } }
|
|
65
83
|
|
|
66
|
-
|
|
67
|
-
|
|
84
|
+
// Get session stats
|
|
85
|
+
const stats = orbit.stats()
|
|
86
|
+
// Returns: { queries_routed, total_savings_formatted, breakdown }
|
|
68
87
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
88
|
+
// Custom config
|
|
89
|
+
const client = new OrbitClient({
|
|
90
|
+
default_model: 'claude_sonnet',
|
|
91
|
+
blocked_models: ['gpt4o'],
|
|
72
92
|
})
|
|
73
93
|
|
|
74
|
-
//
|
|
75
|
-
const
|
|
76
|
-
//
|
|
94
|
+
// Raw fingerprint only
|
|
95
|
+
const fp = fingerprint("write a poem about loss")
|
|
96
|
+
// Returns all 8 axes as numbers
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## Results
|
|
77
102
|
|
|
78
|
-
// Now call the model yourself with your keys
|
|
79
|
-
// model.id = 'gemini-2.5-flash', model.provider = 'google'
|
|
80
103
|
```
|
|
104
|
+
$ node test.js
|
|
81
105
|
|
|
82
|
-
|
|
106
|
+
[ORBIT] → Claude Sonnet | creative_claude | saved $0.00750 (50% reduction)
|
|
107
|
+
[ORBIT] → Gemini 2.5 Flash | cost_gemini | saved $0.01475 (98% reduction)
|
|
108
|
+
[ORBIT] → Claude Sonnet | default | saved $0.00750 (50% reduction)
|
|
83
109
|
|
|
84
|
-
|
|
85
|
-
import { OrbitClient } from 'orbit-ai'
|
|
86
|
-
import Anthropic from '@anthropic-ai/sdk'
|
|
87
|
-
import OpenAI from 'openai'
|
|
88
|
-
import { GoogleGenerativeAI } from '@google/generative-ai'
|
|
89
|
-
|
|
90
|
-
const orbit = new OrbitClient({ log: true })
|
|
91
|
-
|
|
92
|
-
async function smartQuery(text) {
|
|
93
|
-
const { model, reason } = orbit.route(text)
|
|
94
|
-
|
|
95
|
-
if (model.provider === 'anthropic') {
|
|
96
|
-
const client = new Anthropic()
|
|
97
|
-
return client.messages.create({
|
|
98
|
-
model: model.id,
|
|
99
|
-
max_tokens: 1024,
|
|
100
|
-
messages: [{ role: 'user', content: text }]
|
|
101
|
-
})
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
if (model.provider === 'openai') {
|
|
105
|
-
const client = new OpenAI()
|
|
106
|
-
return client.chat.completions.create({
|
|
107
|
-
model: model.id,
|
|
108
|
-
messages: [{ role: 'user', content: text }]
|
|
109
|
-
})
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
if (model.provider === 'google') {
|
|
113
|
-
const client = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY)
|
|
114
|
-
const genModel = client.getGenerativeModel({ model: model.id })
|
|
115
|
-
return genModel.generateContent(text)
|
|
116
|
-
}
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
// Routes each query to the optimal model
|
|
120
|
-
await smartQuery("write a poem about the ocean") // → Claude Sonnet
|
|
121
|
-
await smartQuery("what's the latest news on AI funding?") // → Grok
|
|
122
|
-
await smartQuery("what is 2+2") // → Gemini Flash
|
|
123
|
-
await smartQuery("I've been feeling really overwhelmed") // → Claude Sonnet (ethics-first)
|
|
110
|
+
Session stats: { queries_routed: 3, total_savings_formatted: '$0.0298' }
|
|
124
111
|
```
|
|
125
112
|
|
|
126
|
-
|
|
113
|
+
Validated by **RouteLLM (UC Berkeley / ICLR 2025)**: intelligent routing achieves **85% cost reduction** while maintaining **95% of GPT-4o quality**.
|
|
127
114
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
## ⚡ Hosted API — Live
|
|
118
|
+
|
|
119
|
+
The SDK routes decisions **client-side** — no API key, zero latency, works today.
|
|
120
|
+
|
|
121
|
+
**Live REST API** — free tier, no auth required:
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
curl -X POST https://orbit-sdk.base44.app/functions/orbitGateway \
|
|
125
|
+
-H "Content-Type: application/json" \
|
|
126
|
+
-d '{"query": "write a haiku about recursion"}'
|
|
132
127
|
```
|
|
133
128
|
|
|
134
|
-
|
|
129
|
+
Response includes `log_id` — pass it back with `feedback: 1|-1|0` on your next call to help train smarter routing:
|
|
135
130
|
|
|
136
|
-
```
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
// {
|
|
141
|
-
// complexity: 9,
|
|
142
|
-
// creativity: 0,
|
|
143
|
-
// domain: 'code',
|
|
144
|
-
// emotional_weight: 0,
|
|
145
|
-
// recency: 0,
|
|
146
|
-
// context_load: 3,
|
|
147
|
-
// ...
|
|
148
|
-
// }
|
|
131
|
+
```bash
|
|
132
|
+
curl -X POST https://orbit-sdk.base44.app/functions/orbitGateway \
|
|
133
|
+
-H "Content-Type: application/json" \
|
|
134
|
+
-d '{"query": "next query", "prev_log_id": "<log_id>", "feedback": 1}'
|
|
149
135
|
```
|
|
150
136
|
|
|
151
137
|
---
|
|
152
138
|
|
|
153
|
-
##
|
|
154
|
-
|
|
155
|
-
| Model | Provider | Cost/1M | Best for |
|
|
156
|
-
|---|---|---|---|
|
|
157
|
-
| Claude Sonnet | Anthropic | $15 | Complex reasoning · Ethics · Long context |
|
|
158
|
-
| Claude Haiku | Anthropic | $1 | Speed · Summaries · Medium tasks |
|
|
159
|
-
| Gemini 2.5 Flash | Google | $0.50 | High volume · Simple queries · Cost |
|
|
160
|
-
| GPT-4o | OpenAI | $30 | Structured output · Broad knowledge |
|
|
161
|
-
| GPT-4o Mini | OpenAI | $0.30 | Classification · Filler tasks |
|
|
162
|
-
| Grok | xAI | $10 | Trending · Real-time web |
|
|
139
|
+
## Pricing
|
|
163
140
|
|
|
164
|
-
|
|
141
|
+
| Tier | Price | Queries | Features |
|
|
142
|
+
|------|-------|---------|----------|
|
|
143
|
+
| **Free** | $0/mo | 100/day | SDK + hosted API, routing decisions, cost estimates |
|
|
144
|
+
| **Pro** | $19/mo | Unlimited | Everything free + savings dashboard, feedback loop, priority routing |
|
|
145
|
+
| **Team** | $99/mo | Unlimited · 5 seats | Everything Pro + team analytics, routing policy editor, audit logs |
|
|
146
|
+
| **Enterprise** | Custom | Custom | Custom model matrix, private model support, SLA + **15% savings-share pricing** |
|
|
165
147
|
|
|
166
|
-
|
|
148
|
+
> 🔒 **BYOK (Bring Your Own Keys)** — ORBIT is a pure routing layer. Your API keys go directly to model providers. We never proxy, store, or touch your data. Enterprise-ready by design.
|
|
167
149
|
|
|
168
|
-
|
|
150
|
+
**[View pricing →](https://orbitai.gtll.app/pricing)**
|
|
151
|
+
**[Join the waitlist →](https://orbitai.gtll.app/#waitlist)**
|
|
169
152
|
|
|
170
|
-
|
|
171
|
-
- Without ORBIT: **$1,500/month**
|
|
172
|
-
- With ORBIT: **~$225/month**
|
|
173
|
-
- Savings: **$1,275/month · $15,300/year**
|
|
153
|
+
Early access: Pro locked at **$9/mo** with access code **`777`**
|
|
174
154
|
|
|
175
155
|
---
|
|
176
156
|
|
|
177
|
-
##
|
|
157
|
+
## Research backing
|
|
178
158
|
|
|
179
|
-
|
|
159
|
+
- **RouteLLM** — UC Berkeley / ICLR 2025: *"Routing between weak and strong LLMs reduces costs by 85% while maintaining 95% quality."*
|
|
160
|
+
- **OpenRouter** ($500M+ valuation) proves the market. ORBIT adds the intelligence layer they're missing.
|
|
161
|
+
- **Martian** (Accenture-backed) proves enterprises pay for routing. ORBIT is the frictionless version for everyone else.
|
|
180
162
|
|
|
181
163
|
---
|
|
182
164
|
|
|
183
165
|
## Roadmap
|
|
184
166
|
|
|
185
|
-
- [x] 8-axis fingerprinting
|
|
186
|
-
- [x]
|
|
187
|
-
- [x]
|
|
188
|
-
- [
|
|
189
|
-
- [ ]
|
|
190
|
-
- [ ]
|
|
191
|
-
- [ ]
|
|
192
|
-
- [ ]
|
|
167
|
+
- [x] v0.1.0 — 8-axis fingerprinting + 6-model routing matrix
|
|
168
|
+
- [x] v0.1.1 — Hosted API architecture, waitlist, admin dashboard
|
|
169
|
+
- [x] v0.2.0 — Live hosted API + rate limiting (100/day free, unlimited Pro) + OIDC CI/CD publishing
|
|
170
|
+
- [x] v0.2.1 — Phase 2 telemetry: every route decision logged to OrbitRouteLog for ML training
|
|
171
|
+
- [ ] v0.3.0 — Analytics dashboard + savings tracker
|
|
172
|
+
- [ ] v0.4.0 — Feedback-trained routing model (supervised learning on real usage data)
|
|
173
|
+
- [ ] v0.5.0 — Chrome extension
|
|
174
|
+
- [ ] v1.0.0 — Enterprise API + savings-share pricing model
|
|
193
175
|
|
|
194
176
|
---
|
|
195
177
|
|
|
196
178
|
## License
|
|
197
179
|
|
|
198
|
-
MIT
|
|
199
|
-
|
|
200
|
-
*"Every model has a gravitational pull. ORBIT decides which one you need."*
|
|
180
|
+
MIT © [Gabriel Martin](https://www.linkedin.com/in/gabrielsmartin)
|
|
201
181
|
|
|
202
|
-
|
|
182
|
+
**[Live demo](https://orbitai.gtll.app) · [GitHub](https://github.com/gtllco/orbit) · [npm](https://www.npmjs.com/package/@gabrielsmartin/orbit-sdk) · [LinkedIn](https://www.linkedin.com/in/gabrielsmartin)**
|
package/package.json
CHANGED
|
@@ -1,14 +1,18 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gabrielsmartin/orbit-sdk",
|
|
3
|
-
"version": "0.2.
|
|
4
|
-
"description": "Intelligent AI model routing. Drop-in replacement for OpenAI/Anthropic
|
|
5
|
-
"
|
|
6
|
-
"
|
|
3
|
+
"version": "0.2.2",
|
|
4
|
+
"description": "Intelligent AI model routing. Drop-in replacement for OpenAI/Anthropic. Routes every query to the optimal model automatically.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "src/index.js",
|
|
7
|
+
"types": "src/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"import": "./src/index.js",
|
|
11
|
+
"types": "./src/index.d.ts"
|
|
12
|
+
}
|
|
13
|
+
},
|
|
7
14
|
"files": [
|
|
8
|
-
"
|
|
9
|
-
"index.d.ts",
|
|
10
|
-
"fingerprint.js",
|
|
11
|
-
"router.js",
|
|
15
|
+
"src",
|
|
12
16
|
"README.md"
|
|
13
17
|
],
|
|
14
18
|
"scripts": {
|
|
@@ -23,17 +27,27 @@
|
|
|
23
27
|
"gemini",
|
|
24
28
|
"orbit",
|
|
25
29
|
"cost-optimization",
|
|
26
|
-
"model-routing"
|
|
30
|
+
"model-routing",
|
|
31
|
+
"selective-model-matching",
|
|
32
|
+
"gpt4",
|
|
33
|
+
"claude",
|
|
34
|
+
"gemini-flash",
|
|
35
|
+
"grok",
|
|
36
|
+
"ai-infrastructure",
|
|
37
|
+
"byok",
|
|
38
|
+
"enterprise-ai"
|
|
27
39
|
],
|
|
28
40
|
"author": "Gabriel Martin <gabriel@gtll.app>",
|
|
29
41
|
"license": "MIT",
|
|
30
42
|
"repository": {
|
|
31
43
|
"type": "git",
|
|
32
|
-
"url": "https://github.com/gtllco/orbit"
|
|
44
|
+
"url": "git+https://github.com/gtllco/orbit.git"
|
|
45
|
+
},
|
|
46
|
+
"homepage": "https://orbitai.gtll.app",
|
|
47
|
+
"bugs": {
|
|
48
|
+
"url": "https://github.com/gtllco/orbit/issues"
|
|
33
49
|
},
|
|
34
|
-
"
|
|
35
|
-
|
|
36
|
-
"registry": "https://registry.npmjs.org",
|
|
37
|
-
"access": "public"
|
|
50
|
+
"engines": {
|
|
51
|
+
"node": ">=16"
|
|
38
52
|
}
|
|
39
|
-
}
|
|
53
|
+
}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ORBIT · 8-Axis Query Fingerprinting Engine
|
|
3
|
+
* Scores every query across 8 dimensions in <1ms
|
|
4
|
+
*
|
|
5
|
+
* interval(n) = base / 2^n — Recursive Beat Engine
|
|
6
|
+
* 777 · 555 · 333
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
const COMPLEXITY_SIGNALS = [
|
|
10
|
+
'architect','distributed','implement','design','optimize','analyze',
|
|
11
|
+
'complex','system','algorithm','concurrent','scale','infrastructure',
|
|
12
|
+
'microservice','kubernetes','terraform','recursive','refactor'
|
|
13
|
+
];
|
|
14
|
+
|
|
15
|
+
const CREATIVITY_SIGNALS = [
|
|
16
|
+
'write','create','generate','poem','story','haiku','imagine',
|
|
17
|
+
'creative','brainstorm','ideas','invent','compose','design','narrative'
|
|
18
|
+
];
|
|
19
|
+
|
|
20
|
+
const RECENCY_SIGNALS = [
|
|
21
|
+
'latest','today','current','news','now','recent','trending',
|
|
22
|
+
'2024','2025','2026','live','breaking','update','this week'
|
|
23
|
+
];
|
|
24
|
+
|
|
25
|
+
const EMOTIONAL_SIGNALS = [
|
|
26
|
+
'feel','feeling','anxious','anxiety','depressed','depression','overwhelmed',
|
|
27
|
+
'scared','lonely','hurt','grief','trauma','crisis','suicid','help me',
|
|
28
|
+
'relationship','breakup','divorce','mental health','therapy','struggling'
|
|
29
|
+
];
|
|
30
|
+
|
|
31
|
+
const CODE_SIGNALS = [
|
|
32
|
+
'code','function','class','api','debug','error','syntax','compile',
|
|
33
|
+
'deploy','database','query','algorithm','javascript','python','typescript',
|
|
34
|
+
'react','node','sql','git','docker','bug','fix','implement'
|
|
35
|
+
];
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Fingerprint a query across 8 axes
|
|
39
|
+
* @param {string} text - The query text
|
|
40
|
+
* @returns {Object} scores - 0-10 score for each axis
|
|
41
|
+
*/
|
|
42
|
+
export function fingerprint(text) {
|
|
43
|
+
const t = text.toLowerCase();
|
|
44
|
+
const words = t.split(/\s+/);
|
|
45
|
+
const wordCount = words.length;
|
|
46
|
+
|
|
47
|
+
// Complexity (0-10): reasoning depth required
|
|
48
|
+
const complexitySignals = COMPLEXITY_SIGNALS.filter(s => t.includes(s)).length;
|
|
49
|
+
const questionDepth = (t.match(/\b(why|how|explain|compare|tradeoff|pros|cons|difference|analyze)\b/g) || []).length;
|
|
50
|
+
const complexity = Math.min(10, Math.round(
|
|
51
|
+
complexitySignals * 2 +
|
|
52
|
+
questionDepth * 1.5 +
|
|
53
|
+
(wordCount > 30 ? 3 : wordCount > 15 ? 1.5 : 0)
|
|
54
|
+
));
|
|
55
|
+
|
|
56
|
+
// Creativity (0-10): open-ended vs deterministic
|
|
57
|
+
const creativitySignals = CREATIVITY_SIGNALS.filter(s => t.includes(s)).length;
|
|
58
|
+
const creativity = Math.min(10, Math.round(creativitySignals * 2.5));
|
|
59
|
+
|
|
60
|
+
// Speed (0-10): latency sensitivity
|
|
61
|
+
const speedSignals = (t.match(/\b(quick|fast|brief|tldr|short|simple|summarize|quickly)\b/g) || []).length;
|
|
62
|
+
const speed = Math.min(10, Math.round(speedSignals * 3 + (wordCount < 8 ? 3 : 0)));
|
|
63
|
+
|
|
64
|
+
// Cost tolerance (0-10): inferred from query type (overridable)
|
|
65
|
+
// Default: medium. Will be set by user config.
|
|
66
|
+
const costTolerance = 5;
|
|
67
|
+
|
|
68
|
+
// Emotional weight (0-10): sensitivity of content
|
|
69
|
+
const emotionalSignals = EMOTIONAL_SIGNALS.filter(s => t.includes(s)).length;
|
|
70
|
+
const emotional_weight = Math.min(10, Math.round(emotionalSignals * 3));
|
|
71
|
+
|
|
72
|
+
// Recency (0-10): need for live/current data
|
|
73
|
+
const recencySignals = RECENCY_SIGNALS.filter(s => t.includes(s)).length;
|
|
74
|
+
const recency = Math.min(10, Math.round(recencySignals * 3.5));
|
|
75
|
+
|
|
76
|
+
// Context load (0-10): context window size needed
|
|
77
|
+
const context_load = Math.min(10, Math.round(
|
|
78
|
+
(wordCount > 100 ? 8 : wordCount > 50 ? 5 : wordCount > 20 ? 3 : 1)
|
|
79
|
+
));
|
|
80
|
+
|
|
81
|
+
// Domain classification
|
|
82
|
+
const codeSignals = CODE_SIGNALS.filter(s => t.includes(s)).length;
|
|
83
|
+
let domain = 'general';
|
|
84
|
+
if (codeSignals >= 2) domain = 'code';
|
|
85
|
+
else if (creativity >= 6) domain = 'creative';
|
|
86
|
+
else if (recency >= 6) domain = 'current_events';
|
|
87
|
+
else if (emotional_weight >= 5) domain = 'emotional';
|
|
88
|
+
else if (t.match(/\b(legal|law|contract|lawsuit|rights|regulation)\b/)) domain = 'legal';
|
|
89
|
+
else if (t.match(/\b(medical|doctor|symptom|diagnosis|health|drug|medicine)\b/)) domain = 'medical';
|
|
90
|
+
else if (t.match(/\b(research|study|paper|academic|journal|cite|evidence)\b/)) domain = 'research';
|
|
91
|
+
|
|
92
|
+
return {
|
|
93
|
+
complexity,
|
|
94
|
+
creativity,
|
|
95
|
+
speed,
|
|
96
|
+
cost_tolerance: costTolerance,
|
|
97
|
+
emotional_weight,
|
|
98
|
+
recency,
|
|
99
|
+
context_load,
|
|
100
|
+
domain,
|
|
101
|
+
};
|
|
102
|
+
}
|
package/src/index.d.ts
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* orbit-ai · TypeScript definitions
|
|
3
|
+
* 777 · 555 · 333
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
export interface QueryScores {
|
|
7
|
+
complexity: number; // 0-10
|
|
8
|
+
creativity: number; // 0-10
|
|
9
|
+
speed: number; // 0-10
|
|
10
|
+
cost_tolerance: number; // 0-10
|
|
11
|
+
emotional_weight: number; // 0-10
|
|
12
|
+
recency: number; // 0-10
|
|
13
|
+
context_load: number; // 0-10
|
|
14
|
+
domain: 'general' | 'code' | 'creative' | 'current_events' | 'emotional' | 'legal' | 'medical' | 'research';
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface ModelInfo {
|
|
18
|
+
id: string;
|
|
19
|
+
name: string;
|
|
20
|
+
provider: 'anthropic' | 'openai' | 'google' | 'xai' | 'meta';
|
|
21
|
+
costPer1M: number;
|
|
22
|
+
strengths: string[];
|
|
23
|
+
maxContext: number;
|
|
24
|
+
tier: 'very_low' | 'low' | 'medium' | 'high';
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export interface SavingsInfo {
|
|
28
|
+
premiumCost: number;
|
|
29
|
+
actualCost: number;
|
|
30
|
+
savings: number;
|
|
31
|
+
reductionPct: number;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export interface RoutingDecision {
|
|
35
|
+
model: ModelInfo;
|
|
36
|
+
reason: string;
|
|
37
|
+
rule: string;
|
|
38
|
+
scores: QueryScores;
|
|
39
|
+
savings: SavingsInfo;
|
|
40
|
+
timestamp: string;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export interface OrbitConfig {
|
|
44
|
+
apiKey?: string;
|
|
45
|
+
cost_tolerance?: 'low' | 'medium' | 'high';
|
|
46
|
+
blocked_models?: string[];
|
|
47
|
+
log?: boolean;
|
|
48
|
+
on_route?: (decision: RoutingDecision) => void;
|
|
49
|
+
anthropic_key?: string;
|
|
50
|
+
openai_key?: string;
|
|
51
|
+
google_key?: string;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export interface RouteOptions {
|
|
55
|
+
cost_tolerance?: 'low' | 'medium' | 'high';
|
|
56
|
+
estimated_tokens?: number;
|
|
57
|
+
blocked_models?: string[];
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export interface SessionStats {
|
|
61
|
+
total_queries: number;
|
|
62
|
+
total_savings: number;
|
|
63
|
+
total_savings_formatted: string;
|
|
64
|
+
model_usage: Record<string, number>;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export declare class OrbitClient {
|
|
68
|
+
constructor(config?: OrbitConfig);
|
|
69
|
+
route(text: string, options?: RouteOptions): RoutingDecision;
|
|
70
|
+
fingerprint(text: string): QueryScores;
|
|
71
|
+
stats(): SessionStats;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export declare function fingerprint(text: string): QueryScores;
|
|
75
|
+
export declare function route(scores: QueryScores, config?: OrbitConfig): { model: ModelInfo; reason: string; rule: string };
|
|
76
|
+
export declare function calculateSavings(model: ModelInfo, estimatedTokens?: number): SavingsInfo;
|
|
77
|
+
export declare const MODEL_MATRIX: Record<string, ModelInfo>;
|
|
78
|
+
|
|
79
|
+
declare const orbit: OrbitClient;
|
|
80
|
+
export default orbit;
|
package/src/index.js
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* orbit-ai · v0.1.0
|
|
3
|
+
* Intelligent AI model routing. Drop in. Save 85%.
|
|
4
|
+
*
|
|
5
|
+
* https://orbit-model-flow.base44.app
|
|
6
|
+
* github.com/gabrielsmartin/orbit
|
|
7
|
+
*
|
|
8
|
+
* 777 · 555 · 333
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { fingerprint } from './fingerprint.js';
|
|
12
|
+
import { route, calculateSavings, MODEL_MATRIX } from './router.js';
|
|
13
|
+
|
|
14
|
+
export { fingerprint, route, calculateSavings, MODEL_MATRIX };
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* OrbitClient — the main class
|
|
18
|
+
*
|
|
19
|
+
* @example
|
|
20
|
+
* import { OrbitClient } from 'orbit-ai'
|
|
21
|
+
* const orbit = new OrbitClient({ apiKey: 'your-orbit-key' })
|
|
22
|
+
* const result = await orbit.query("explain quantum entanglement simply")
|
|
23
|
+
*/
|
|
24
|
+
export class OrbitClient {
|
|
25
|
+
constructor(config = {}) {
|
|
26
|
+
this.config = {
|
|
27
|
+
cost_tolerance: config.cost_tolerance || 'medium', // 'low' | 'medium' | 'high'
|
|
28
|
+
blocked_models: config.blocked_models || [],
|
|
29
|
+
api_key: config.apiKey || config.api_key || null,
|
|
30
|
+
log: config.log !== false, // log routing decisions by default
|
|
31
|
+
on_route: config.on_route || null, // callback: (decision) => void
|
|
32
|
+
// Provider API keys (optional — falls back to env vars)
|
|
33
|
+
anthropic_key: config.anthropic_key || null,
|
|
34
|
+
openai_key: config.openai_key || null,
|
|
35
|
+
google_key: config.google_key || null,
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
this._stats = {
|
|
39
|
+
total_queries: 0,
|
|
40
|
+
total_savings: 0,
|
|
41
|
+
model_usage: {},
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Route a query to the optimal model
|
|
47
|
+
* Returns the routing decision — you call the model yourself with your keys
|
|
48
|
+
*
|
|
49
|
+
* @param {string} text - The query text
|
|
50
|
+
* @param {Object} options - Override options for this query
|
|
51
|
+
* @returns {Object} decision - { model, reason, rule, scores, savings }
|
|
52
|
+
*/
|
|
53
|
+
route(text, options = {}) {
|
|
54
|
+
const scores = fingerprint(text);
|
|
55
|
+
|
|
56
|
+
// Allow per-query cost_tolerance override
|
|
57
|
+
if (options.cost_tolerance) {
|
|
58
|
+
scores.cost_tolerance = options.cost_tolerance === 'low' ? 2
|
|
59
|
+
: options.cost_tolerance === 'high' ? 9 : 5;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
const config = { ...this.config, ...options };
|
|
63
|
+
const decision = route(scores, config);
|
|
64
|
+
const savings = calculateSavings(decision.model, options.estimated_tokens || 500);
|
|
65
|
+
|
|
66
|
+
const result = {
|
|
67
|
+
model: decision.model,
|
|
68
|
+
reason: decision.reason,
|
|
69
|
+
rule: decision.rule,
|
|
70
|
+
scores,
|
|
71
|
+
savings,
|
|
72
|
+
timestamp: new Date().toISOString(),
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
// Update stats
|
|
76
|
+
this._stats.total_queries++;
|
|
77
|
+
this._stats.total_savings += savings.savings;
|
|
78
|
+
const modelName = decision.model.name;
|
|
79
|
+
this._stats.model_usage[modelName] = (this._stats.model_usage[modelName] || 0) + 1;
|
|
80
|
+
|
|
81
|
+
// Log routing decision
|
|
82
|
+
if (this.config.log) {
|
|
83
|
+
console.log(`[ORBIT] → ${decision.model.name} | ${decision.rule} | saved $${savings.savings.toFixed(5)} (${savings.reductionPct}% reduction)`);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Fire callback
|
|
87
|
+
if (this.config.on_route) {
|
|
88
|
+
this.config.on_route(result);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
return result;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Get cumulative stats for this session
|
|
96
|
+
*/
|
|
97
|
+
stats() {
|
|
98
|
+
return {
|
|
99
|
+
...this._stats,
|
|
100
|
+
total_savings_formatted: `$${this._stats.total_savings.toFixed(4)}`,
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Fingerprint a query without routing
|
|
106
|
+
* Useful for debugging or building custom routing logic on top
|
|
107
|
+
*/
|
|
108
|
+
fingerprint(text) {
|
|
109
|
+
return fingerprint(text);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Default singleton client — zero config, ready to use
|
|
115
|
+
*
|
|
116
|
+
* @example
|
|
117
|
+
* import orbit from 'orbit-ai'
|
|
118
|
+
* const decision = orbit.route("write a haiku about recursion")
|
|
119
|
+
* // → { model: { name: 'Claude Sonnet', ... }, reason: '...', savings: { ... } }
|
|
120
|
+
*/
|
|
121
|
+
const orbit = new OrbitClient();
|
|
122
|
+
export default orbit;
|
package/src/router.js
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ORBIT · Selective Model Matching (SMM) Router
|
|
3
|
+
* Routes queries to optimal models based on 8-axis fingerprints
|
|
4
|
+
*
|
|
5
|
+
* Proprietary routing logic — open SDK, closed engine weights
|
|
6
|
+
* 777 · 555 · 333
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
export const MODEL_MATRIX = {
|
|
10
|
+
claude_sonnet: {
|
|
11
|
+
id: 'claude-sonnet-4-5',
|
|
12
|
+
name: 'Claude Sonnet',
|
|
13
|
+
provider: 'anthropic',
|
|
14
|
+
costPer1M: 15,
|
|
15
|
+
strengths: ['nuance', 'long_context', 'ethics', 'reasoning'],
|
|
16
|
+
maxContext: 200000,
|
|
17
|
+
tier: 'medium',
|
|
18
|
+
},
|
|
19
|
+
claude_haiku: {
|
|
20
|
+
id: 'claude-haiku-3-5',
|
|
21
|
+
name: 'Claude Haiku',
|
|
22
|
+
provider: 'anthropic',
|
|
23
|
+
costPer1M: 1,
|
|
24
|
+
strengths: ['speed', 'efficiency', 'summaries'],
|
|
25
|
+
maxContext: 200000,
|
|
26
|
+
tier: 'low',
|
|
27
|
+
},
|
|
28
|
+
gemini_flash: {
|
|
29
|
+
id: 'gemini-2.5-flash',
|
|
30
|
+
name: 'Gemini 2.5 Flash',
|
|
31
|
+
provider: 'google',
|
|
32
|
+
costPer1M: 0.5,
|
|
33
|
+
strengths: ['speed', 'multimodal', 'cost', 'volume'],
|
|
34
|
+
maxContext: 1000000,
|
|
35
|
+
tier: 'very_low',
|
|
36
|
+
},
|
|
37
|
+
gpt4o: {
|
|
38
|
+
id: 'gpt-4o',
|
|
39
|
+
name: 'GPT-4o',
|
|
40
|
+
provider: 'openai',
|
|
41
|
+
costPer1M: 30,
|
|
42
|
+
strengths: ['broad_knowledge', 'instruction_following', 'structured_output'],
|
|
43
|
+
maxContext: 128000,
|
|
44
|
+
tier: 'high',
|
|
45
|
+
},
|
|
46
|
+
gpt4o_mini: {
|
|
47
|
+
id: 'gpt-4o-mini',
|
|
48
|
+
name: 'GPT-4o Mini',
|
|
49
|
+
provider: 'openai',
|
|
50
|
+
costPer1M: 0.3,
|
|
51
|
+
strengths: ['classification', 'simple_tasks', 'cost'],
|
|
52
|
+
maxContext: 128000,
|
|
53
|
+
tier: 'very_low',
|
|
54
|
+
},
|
|
55
|
+
grok: {
|
|
56
|
+
id: 'grok-2',
|
|
57
|
+
name: 'Grok',
|
|
58
|
+
provider: 'xai',
|
|
59
|
+
costPer1M: 10,
|
|
60
|
+
strengths: ['realtime_web', 'trending', 'social_intelligence'],
|
|
61
|
+
maxContext: 131072,
|
|
62
|
+
tier: 'medium',
|
|
63
|
+
},
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Core SMM routing logic
|
|
68
|
+
* Returns the selected model + reasoning
|
|
69
|
+
*
|
|
70
|
+
* @param {Object} scores - 8-axis fingerprint scores
|
|
71
|
+
* @param {Object} config - User config (cost_tolerance override, blocked_models, etc.)
|
|
72
|
+
* @returns {Object} { model, reason, fallback }
|
|
73
|
+
*/
|
|
74
|
+
export function route(scores, config = {}) {
|
|
75
|
+
const {
|
|
76
|
+
complexity, creativity, speed, emotional_weight,
|
|
77
|
+
recency, context_load, domain, cost_tolerance
|
|
78
|
+
} = scores;
|
|
79
|
+
|
|
80
|
+
const blocked = config.blocked_models || [];
|
|
81
|
+
const preferLow = config.cost_tolerance === 'low' || cost_tolerance <= 3;
|
|
82
|
+
const preferHigh = config.cost_tolerance === 'high' || cost_tolerance >= 8;
|
|
83
|
+
|
|
84
|
+
// Rule 1: ETHICS FIRST — emotional/crisis queries always go to Claude
|
|
85
|
+
if (emotional_weight >= 6) {
|
|
86
|
+
return {
|
|
87
|
+
model: MODEL_MATRIX.claude_sonnet,
|
|
88
|
+
reason: 'Emotional weight detected — routing to Claude for ethics-first handling. Never use a cheap model for sensitive content.',
|
|
89
|
+
rule: 'ethics_first',
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Rule 2: Realtime / current events → Grok
|
|
94
|
+
if (recency >= 7 && !blocked.includes('grok')) {
|
|
95
|
+
return {
|
|
96
|
+
model: MODEL_MATRIX.grok,
|
|
97
|
+
reason: `High recency score (${recency}/10) — Grok has live web access for current events and trending topics.`,
|
|
98
|
+
rule: 'recency_grok',
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Rule 3: Long context load → Claude Sonnet (200k window)
|
|
103
|
+
if (context_load >= 8 && !blocked.includes('claude_sonnet')) {
|
|
104
|
+
return {
|
|
105
|
+
model: MODEL_MATRIX.claude_sonnet,
|
|
106
|
+
reason: `High context load (${context_load}/10) — Claude's 200k window is the only safe choice.`,
|
|
107
|
+
rule: 'context_claude',
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Rule 4: High complexity code/reasoning
|
|
112
|
+
if (complexity >= 7 && domain === 'code' && !blocked.includes('claude_sonnet')) {
|
|
113
|
+
return {
|
|
114
|
+
model: MODEL_MATRIX.claude_sonnet,
|
|
115
|
+
reason: `Complex code task (complexity ${complexity}/10) — Claude Sonnet for deep reasoning and long context.`,
|
|
116
|
+
rule: 'complex_code',
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Rule 5: High complexity general → GPT-4o (if cost tolerance allows)
|
|
121
|
+
if (complexity >= 7 && !preferLow && !blocked.includes('gpt4o')) {
|
|
122
|
+
return {
|
|
123
|
+
model: MODEL_MATRIX.gpt4o,
|
|
124
|
+
reason: `High complexity (${complexity}/10) — GPT-4o for broad knowledge and structured output.`,
|
|
125
|
+
rule: 'complex_gpt4o',
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Rule 6: Creative writing → Claude Sonnet
|
|
130
|
+
if (creativity >= 5 && !blocked.includes('claude_sonnet')) {
|
|
131
|
+
return {
|
|
132
|
+
model: MODEL_MATRIX.claude_sonnet,
|
|
133
|
+
reason: `High creativity score (${creativity}/10) — Claude Sonnet for nuanced creative writing.`,
|
|
134
|
+
rule: 'creative_claude',
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// Rule 7: Cost sensitive OR simple queries → Gemini Flash
|
|
139
|
+
if ((preferLow || complexity <= 3) && !blocked.includes('gemini_flash')) {
|
|
140
|
+
return {
|
|
141
|
+
model: MODEL_MATRIX.gemini_flash,
|
|
142
|
+
reason: `Low complexity (${complexity}/10) — Gemini 2.5 Flash delivers 95% quality at 2% of GPT-4o cost.`,
|
|
143
|
+
rule: 'cost_gemini',
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// Rule 8: Medium complexity → Claude Haiku (fast + cheap + capable)
|
|
148
|
+
if (complexity <= 5 && !blocked.includes('claude_haiku')) {
|
|
149
|
+
return {
|
|
150
|
+
model: MODEL_MATRIX.claude_haiku,
|
|
151
|
+
reason: `Medium complexity (${complexity}/10) — Claude Haiku balances speed, cost, and quality.`,
|
|
152
|
+
rule: 'medium_haiku',
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Default: Claude Sonnet (safest general choice)
|
|
157
|
+
return {
|
|
158
|
+
model: MODEL_MATRIX.claude_sonnet,
|
|
159
|
+
reason: 'Default routing — Claude Sonnet for reliable, high-quality responses.',
|
|
160
|
+
rule: 'default',
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Calculate savings vs always using GPT-4o (premium baseline)
|
|
166
|
+
*/
|
|
167
|
+
export function calculateSavings(selectedModel, estimatedTokens = 500) {
|
|
168
|
+
const premiumCost = (MODEL_MATRIX.gpt4o.costPer1M / 1_000_000) * estimatedTokens;
|
|
169
|
+
const actualCost = (selectedModel.costPer1M / 1_000_000) * estimatedTokens;
|
|
170
|
+
const savings = premiumCost - actualCost;
|
|
171
|
+
const reductionPct = Math.round((savings / premiumCost) * 100);
|
|
172
|
+
return { premiumCost, actualCost, savings, reductionPct };
|
|
173
|
+
}
|
package/index.js
DELETED
|
@@ -1,139 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* orbit-ai SDK v0.1.0
|
|
3
|
-
* Drop-in replacement for OpenAI / Anthropic SDK calls
|
|
4
|
-
* Routes every query to the optimal model via ORBIT SMM engine
|
|
5
|
-
*
|
|
6
|
-
* Usage:
|
|
7
|
-
* import { orbit } from 'orbit-ai'
|
|
8
|
-
* const res = await orbit.query("your query here")
|
|
9
|
-
*
|
|
10
|
-
* ORBIT Technologies · orbit-model-flow.base44.app
|
|
11
|
-
* 777 · 333
|
|
12
|
-
*/
|
|
13
|
-
|
|
14
|
-
const ORBIT_API = "https://orbit-model-flow.base44.app/api/functions/routeQuery";
|
|
15
|
-
|
|
16
|
-
class OrbitClient {
|
|
17
|
-
constructor(config = {}) {
|
|
18
|
-
this.apiKey = config.apiKey || process.env.ORBIT_API_KEY;
|
|
19
|
-
this.costTolerance = config.costTolerance || "low";
|
|
20
|
-
this.verbose = config.verbose || false;
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
/**
|
|
24
|
-
* Route a query to the optimal model
|
|
25
|
-
* @param {string} queryText - The query to route
|
|
26
|
-
* @param {object} options - Optional overrides
|
|
27
|
-
* @returns {OrbitResponse}
|
|
28
|
-
*/
|
|
29
|
-
async query(queryText, options = {}) {
|
|
30
|
-
const fp = this._fingerprint(queryText);
|
|
31
|
-
const { model, reasoning } = options.model
|
|
32
|
-
? { model: options.model, reasoning: "Manual override" }
|
|
33
|
-
: this._route(fp);
|
|
34
|
-
|
|
35
|
-
if (this.verbose) {
|
|
36
|
-
console.log(`[ORBIT] Fingerprint:`, fp);
|
|
37
|
-
console.log(`[ORBIT] Selected: ${model} — ${reasoning}`);
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
return {
|
|
41
|
-
model,
|
|
42
|
-
reasoning,
|
|
43
|
-
fingerprint: fp,
|
|
44
|
-
estimated_cost: this._estimateCost(model, queryText.length),
|
|
45
|
-
premium_cost: this._estimateCost("gpt4o", queryText.length),
|
|
46
|
-
get savings() { return Math.max(0, this.premium_cost - this.estimated_cost); },
|
|
47
|
-
};
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
/**
|
|
51
|
-
* Drop-in OpenAI compatibility
|
|
52
|
-
* orbit.openai.chat.completions.create({ model: "gpt-4o", messages })
|
|
53
|
-
* → routes to optimal model, returns same response shape
|
|
54
|
-
*/
|
|
55
|
-
get openai() {
|
|
56
|
-
return {
|
|
57
|
-
chat: {
|
|
58
|
-
completions: {
|
|
59
|
-
create: async (params) => {
|
|
60
|
-
const queryText = params.messages?.map(m => m.content).join(" ") || "";
|
|
61
|
-
const routing = await this.query(queryText, { costTolerance: params._orbitCostTolerance });
|
|
62
|
-
// In production: actually call the selected model here
|
|
63
|
-
return { ...routing, choices: [], _orbit_routed: true };
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
};
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
/** 8-axis query fingerprinting */
|
|
71
|
-
_fingerprint(text) {
|
|
72
|
-
const t = text.toLowerCase();
|
|
73
|
-
const words = t.split(/\s+/).length;
|
|
74
|
-
|
|
75
|
-
const complexity = Math.min(10, Math.round(
|
|
76
|
-
(t.match(/\b(architect|distributed|implement|design|optimize|analyze|complex|system|algorithm|concurrent|scale)\b/g)?.length || 0) * 2 +
|
|
77
|
-
(words > 20 ? 3 : words > 10 ? 1.5 : 0) +
|
|
78
|
-
(t.match(/\b(why|how|explain|compare|tradeoff)\b/g)?.length || 0) * 1.5
|
|
79
|
-
));
|
|
80
|
-
const creativity = Math.min(10, Math.round(
|
|
81
|
-
(t.match(/\b(write|create|generate|poem|story|haiku|imagine|creative|brainstorm)\b/g)?.length || 0) * 2.5
|
|
82
|
-
));
|
|
83
|
-
const recency = Math.min(10, Math.round(
|
|
84
|
-
(t.match(/\b(latest|current|today|news|trending|recent|now|live)\b/g)?.length || 0) * 3
|
|
85
|
-
));
|
|
86
|
-
const emotional_weight = Math.min(10, Math.round(
|
|
87
|
-
(t.match(/\b(anxious|depressed|sad|crisis|suicide|hurt|lonely|overwhelmed|mental health)\b/g)?.length || 0) * 4 +
|
|
88
|
-
(t.match(/\b(help|struggling|hard|difficult|pain)\b/g)?.length || 0) * 1.5
|
|
89
|
-
));
|
|
90
|
-
const context_load = Math.min(10, Math.round(
|
|
91
|
-
(words > 100 ? 9 : words > 50 ? 7 : words > 20 ? 4 : 1) +
|
|
92
|
-
(t.match(/\b(document|paper|report|summarize)\b/g)?.length || 0) * 2
|
|
93
|
-
));
|
|
94
|
-
const speed = Math.min(10, Math.round(
|
|
95
|
-
(t.match(/\b(quick|fast|brief|short|simple|just|tldr)\b/g)?.length || 0) * 2.5 +
|
|
96
|
-
(complexity < 3 ? 3 : 0)
|
|
97
|
-
));
|
|
98
|
-
|
|
99
|
-
let domain = "general";
|
|
100
|
-
if (t.match(/\b(code|function|algorithm|javascript|python|api|sql|bug|deploy)\b/)) domain = "code";
|
|
101
|
-
else if (t.match(/\b(poem|story|haiku|creative|write|lyrics|fiction)\b/)) domain = "creative";
|
|
102
|
-
else if (t.match(/\b(research|study|paper|science|analysis|data|statistics)\b/)) domain = "research";
|
|
103
|
-
else if (t.match(/\b(legal|law|contract|liability|compliance)\b/)) domain = "legal";
|
|
104
|
-
else if (t.match(/\b(medical|health|diagnosis|symptoms|treatment|clinical)\b/)) domain = "medical";
|
|
105
|
-
|
|
106
|
-
const cost_tolerance = (complexity > 7 || domain === "legal" || domain === "medical")
|
|
107
|
-
? "flexible" : this.costTolerance;
|
|
108
|
-
|
|
109
|
-
return { complexity, creativity, speed, recency, emotional_weight, context_load, domain, cost_tolerance };
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
/** SMM routing engine */
|
|
113
|
-
_route(fp) {
|
|
114
|
-
const { complexity, creativity, recency, emotional_weight, context_load, speed, domain, cost_tolerance } = fp;
|
|
115
|
-
if (emotional_weight > 6) return { model: "claude_sonnet", reasoning: "Emotional weight detected — Claude for ethics-first handling." };
|
|
116
|
-
if (domain === "legal" || domain === "medical") return { model: "claude_sonnet", reasoning: `Domain=${domain} — ethics-first.` };
|
|
117
|
-
if (context_load > 8) return { model: "claude_sonnet", reasoning: "High context load — Claude 200k window." };
|
|
118
|
-
if (complexity > 7 && domain === "code") return { model: "gpt4o", reasoning: "High complexity code — GPT-4o structured output." };
|
|
119
|
-
if (complexity > 8) return { model: "claude_sonnet", reasoning: "Extreme complexity — Claude Sonnet." };
|
|
120
|
-
if (recency > 7) return { model: "grok", reasoning: "High recency — Grok real-time web." };
|
|
121
|
-
if (recency > 5) return { model: "perplexity", reasoning: "Recency-sensitive — Perplexity cited search." };
|
|
122
|
-
if (creativity > 7) return { model: "claude_sonnet", reasoning: "High creativity — Claude Sonnet." };
|
|
123
|
-
if (speed > 7 && complexity < 5) return { model: "gemini_flash", reasoning: "Speed priority — Gemini Flash <200ms." };
|
|
124
|
-
if (cost_tolerance === "low" && complexity < 5) return { model: "gemini_flash", reasoning: "Low cost + simple — Gemini Flash $0.50/1M." };
|
|
125
|
-
if (complexity < 3 && creativity < 3) return { model: "gpt4o_mini", reasoning: "Trivial query — GPT-4o Mini." };
|
|
126
|
-
if (complexity < 5 && speed > 5) return { model: "claude_haiku", reasoning: "Moderate complexity + speed — Claude Haiku." };
|
|
127
|
-
return { model: "claude_sonnet", reasoning: "Multi-dimensional — Claude Sonnet default." };
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
_estimateCost(model, textLength) {
|
|
131
|
-
const COSTS = { claude_sonnet: 15, claude_haiku: 1, gemini_flash: 0.5, gpt4o: 30, gpt4o_mini: 0.3, grok: 10, perplexity: 5, llama3: 0.1 };
|
|
132
|
-
const tokens = Math.round(textLength / 4) + 150;
|
|
133
|
-
return (tokens / 1_000_000) * (COSTS[model] || 15);
|
|
134
|
-
}
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
export const orbit = new OrbitClient();
|
|
138
|
-
export { OrbitClient };
|
|
139
|
-
export default orbit;
|