@relayplane/proxy 1.8.6 → 1.8.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,264 @@
1
+ /**
2
+ * Ollama Local Model Provider
3
+ *
4
+ * Routes requests to a local Ollama instance for cost-free inference.
5
+ * Handles API format translation between OpenAI-compatible and Ollama's
6
+ * native /api/chat endpoint.
7
+ *
8
+ * Features:
9
+ * - Configurable base URL (default: http://localhost:11434)
10
+ * - Model availability detection with graceful fallback
11
+ * - Complexity-based routing (route "simple" tasks to local models)
12
+ * - OpenAI-compatible response format translation
13
+ * - Streaming support (SSE)
14
+ * - Health checking for availability
15
+ *
16
+ * Ollama API reference: https://github.com/ollama/ollama/blob/main/docs/api.md
17
+ *
18
+ * @packageDocumentation
19
+ */
20
+ /**
21
+ * Ollama provider configuration.
22
+ *
23
+ * Example in ~/.relayplane/config.json:
24
+ * ```json
25
+ * {
26
+ * "providers": {
27
+ * "ollama": {
28
+ * "baseUrl": "http://localhost:11434",
29
+ * "models": ["llama3.2", "codestral"],
30
+ * "routeWhen": {
31
+ * "complexity": ["simple"]
32
+ * }
33
+ * }
34
+ * }
35
+ * }
36
+ * ```
37
+ */
38
+ export interface OllamaProviderConfig {
39
+ /** Base URL for the Ollama API (default: http://localhost:11434) */
40
+ baseUrl?: string;
41
+ /** List of available/preferred model names */
42
+ models?: string[];
43
+ /** Routing conditions: when to use Ollama instead of cloud providers */
44
+ routeWhen?: {
45
+ /** Complexity levels that should be routed to Ollama (e.g., ["simple"]) */
46
+ complexity?: string[];
47
+ /** Task types that should be routed to Ollama (e.g., ["question_answering"]) */
48
+ taskTypes?: string[];
49
+ };
50
+ /** Timeout in milliseconds for Ollama requests (default: 120000 = 2 min) */
51
+ timeoutMs?: number;
52
+ /** Default model to use when none specified (default: first in models list) */
53
+ defaultModel?: string;
54
+ /** Enable Ollama provider (default: true when configured) */
55
+ enabled?: boolean;
56
+ }
57
+ /** Default Ollama configuration values */
58
+ export declare const OLLAMA_DEFAULTS: {
59
+ readonly baseUrl: "http://localhost:11434";
60
+ readonly timeoutMs: 120000;
61
+ readonly models: string[];
62
+ readonly enabled: true;
63
+ };
64
+ /**
65
+ * Ollama /api/chat request body
66
+ * @see https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion
67
+ */
68
+ interface OllamaChatRequest {
69
+ model: string;
70
+ messages: Array<{
71
+ role: string;
72
+ content: string;
73
+ }>;
74
+ stream: boolean;
75
+ options?: {
76
+ temperature?: number;
77
+ num_predict?: number;
78
+ top_p?: number;
79
+ top_k?: number;
80
+ };
81
+ tools?: unknown[];
82
+ }
83
+ /**
84
+ * Ollama /api/chat response
85
+ */
86
+ interface OllamaChatResponse {
87
+ model: string;
88
+ message: {
89
+ role: string;
90
+ content: string;
91
+ tool_calls?: Array<{
92
+ function: {
93
+ name: string;
94
+ arguments: Record<string, unknown>;
95
+ };
96
+ }>;
97
+ };
98
+ done: boolean;
99
+ total_duration?: number;
100
+ load_duration?: number;
101
+ prompt_eval_count?: number;
102
+ eval_count?: number;
103
+ eval_duration?: number;
104
+ }
105
+ /**
106
+ * Ollama streaming chunk (NDJSON)
107
+ */
108
+ interface OllamaStreamChunk {
109
+ model: string;
110
+ message: {
111
+ role: string;
112
+ content: string;
113
+ };
114
+ done: boolean;
115
+ total_duration?: number;
116
+ prompt_eval_count?: number;
117
+ eval_count?: number;
118
+ }
119
+ /**
120
+ * Result of an Ollama health/availability check
121
+ */
122
+ export interface OllamaHealthResult {
123
+ available: boolean;
124
+ models: string[];
125
+ error?: string;
126
+ responseTimeMs?: number;
127
+ }
128
+ /**
129
+ * Check if the Ollama server is running and list available models.
130
+ * Non-blocking, with a short timeout.
131
+ */
132
+ export declare function checkOllamaHealth(baseUrl?: string, timeoutMs?: number): Promise<OllamaHealthResult>;
133
+ /**
134
+ * Determine whether a request should be routed to Ollama based on config.
135
+ *
136
+ * @param config - Ollama provider configuration
137
+ * @param complexity - Classified complexity of the request (simple/moderate/complex)
138
+ * @param taskType - Inferred task type
139
+ * @param model - Requested model name (may match an Ollama model)
140
+ *
141
+ * @returns true if the request should be routed to Ollama
142
+ */
143
+ export declare function shouldRouteToOllama(config: OllamaProviderConfig, complexity?: string, taskType?: string, model?: string): boolean;
144
+ /**
145
+ * Resolve the Ollama model name from a requested model string.
146
+ *
147
+ * - "ollama/llama3.2" → "llama3.2"
148
+ * - "llama3.2" (if in models list) → "llama3.2"
149
+ * - Unknown model → defaultModel or first in models list
150
+ */
151
+ export declare function resolveOllamaModel(requestedModel: string, config: OllamaProviderConfig): string;
152
+ /**
153
+ * Convert OpenAI-format messages to Ollama format.
154
+ *
155
+ * Ollama messages are simpler:
156
+ * - role: "system" | "user" | "assistant"
157
+ * - content: string (no array / multimodal blocks in base API)
158
+ * - Tool calls are passed through for models that support them
159
+ */
160
+ export declare function convertMessagesToOllama(messages: Array<{
161
+ role: string;
162
+ content: string | unknown;
163
+ [key: string]: unknown;
164
+ }>): Array<{
165
+ role: string;
166
+ content: string;
167
+ }>;
168
+ /**
169
+ * Build an Ollama /api/chat request body from an OpenAI-compatible request.
170
+ */
171
+ export declare function buildOllamaRequest(model: string, messages: Array<{
172
+ role: string;
173
+ content: string | unknown;
174
+ [key: string]: unknown;
175
+ }>, stream: boolean, options?: {
176
+ temperature?: number;
177
+ max_tokens?: number;
178
+ tools?: unknown[];
179
+ }): OllamaChatRequest;
180
+ /**
181
+ * Convert an Ollama /api/chat response to OpenAI chat completion format.
182
+ */
183
+ export declare function convertOllamaResponse(ollamaData: OllamaChatResponse, requestedModel: string): Record<string, unknown>;
184
+ /**
185
+ * Convert Ollama NDJSON streaming chunk to OpenAI SSE format.
186
+ */
187
+ export declare function convertOllamaStreamChunk(chunk: OllamaStreamChunk, messageId: string, isFirst: boolean): string | null;
188
+ /**
189
+ * Forward a non-streaming request to Ollama and return the response
190
+ * translated to OpenAI format.
191
+ */
192
+ export declare function forwardToOllama(model: string, messages: Array<{
193
+ role: string;
194
+ content: string | unknown;
195
+ [key: string]: unknown;
196
+ }>, options?: {
197
+ temperature?: number;
198
+ max_tokens?: number;
199
+ tools?: unknown[];
200
+ baseUrl?: string;
201
+ timeoutMs?: number;
202
+ }): Promise<{
203
+ success: boolean;
204
+ data?: Record<string, unknown>;
205
+ usage?: {
206
+ prompt_tokens: number;
207
+ completion_tokens: number;
208
+ total_tokens: number;
209
+ };
210
+ error?: {
211
+ code: string;
212
+ message: string;
213
+ status: number;
214
+ retryable: boolean;
215
+ };
216
+ latencyMs: number;
217
+ }>;
218
+ /**
219
+ * Forward a streaming request to Ollama.
220
+ * Returns a readable stream of OpenAI-format SSE events.
221
+ *
222
+ * Ollama streams NDJSON (newline-delimited JSON), which we convert
223
+ * to OpenAI SSE (data: {...}\n\n) on the fly.
224
+ */
225
+ export declare function forwardToOllamaStream(model: string, messages: Array<{
226
+ role: string;
227
+ content: string | unknown;
228
+ [key: string]: unknown;
229
+ }>, options?: {
230
+ temperature?: number;
231
+ max_tokens?: number;
232
+ tools?: unknown[];
233
+ baseUrl?: string;
234
+ timeoutMs?: number;
235
+ }): Promise<{
236
+ success: boolean;
237
+ stream?: AsyncGenerator<string, void, unknown>;
238
+ error?: {
239
+ code: string;
240
+ message: string;
241
+ status: number;
242
+ retryable: boolean;
243
+ };
244
+ }>;
245
+ /**
246
+ * Check Ollama health with caching to avoid excessive probing.
247
+ */
248
+ export declare function checkOllamaHealthCached(baseUrl?: string): Promise<OllamaHealthResult>;
249
+ /**
250
+ * Clear the health cache (useful for testing or after config changes).
251
+ */
252
+ export declare function clearOllamaHealthCache(): void;
253
+ /**
254
+ * Map cloud model names to reasonable Ollama equivalents.
255
+ * Used when falling back to Ollama from a cloud provider.
256
+ */
257
+ export declare const CLOUD_TO_OLLAMA_MODEL_MAP: Record<string, string>;
258
+ /**
259
+ * Map a cloud model name to an Ollama model.
260
+ * Returns the configured default or a reasonable fallback.
261
+ */
262
+ export declare function mapCloudModelToOllama(cloudModel: string, config: OllamaProviderConfig): string;
263
+ export {};
264
+ //# sourceMappingURL=ollama.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ollama.d.ts","sourceRoot":"","sources":["../src/ollama.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAM,WAAW,oBAAoB;IACnC,oEAAoE;IACpE,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,8CAA8C;IAC9C,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB,wEAAwE;IACxE,SAAS,CAAC,EAAE;QACV,2EAA2E;QAC3E,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;QACtB,gFAAgF;QAChF,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;KACtB,CAAC;IACF,4EAA4E;IAC5E,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,+EAA+E;IAC/E,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,6DAA6D;IAC7D,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED,0CAA0C;AAC1C,eAAO,MAAM,eAAe;;;qBAGZ,MAAM,EAAE;;CAEd,CAAC;AAEX;;;GAGG;AACH,UAAU,iBAAiB;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IACnD,MAAM,EAAE,OAAO,CAAC;IAChB,OAAO,CAAC,EAAE;QACR,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,KAAK,CAAC,EAAE,MAAM,CAAC;KAChB,CAAC;IACF,KAAK,CAAC,EAAE,OAAO,EAAE,CAAC;CACnB;AAED;;GAEG;AACH,UAAU,kBAAkB;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE;QACP,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,EAAE,MAAM,CAAC;QAChB,UAAU,CAAC,EAAE,KAAK,CAAC;YACjB,QAAQ,EAAE;gBAAE,IAAI,EAAE,MAAM,CAAC;gBAAC,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;aAAE,CAAC;SAChE,CAAC,CAAC;KACJ,CAAC;IACF,IAAI,EAAE,OAAO,CAAC;IACd,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED;;GAEG;AACH,UAAU,iBAAiB;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,CAAC;IAC3C,IAAI,EAAE,OAAO,CAAC;IACd,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAcD;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,SAAS,EAAE,OAAO,CAAC;IACnB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAMD;;;GAGG;AACH,wBAAsB,iBAAiB,CACrC,OAAO,GAAE,MAAgC,EACzC,SAAS,GAAE,MAAa,GACvB,OAAO,CAAC,kBAAkB,CAAC,CAuC7B;AAMD;;;;;;;;;GASG;AACH,wBAAgB,mBAAmB,CACjC,MAAM,EAAE,oBAAoB,EAC5B,UAAU,CAAC,EAAE,MAAM,EACnB,QAAQ,CAAC,EAAE,MAAM,EACjB,KAAK,CAAC,EAAE,MAAM,GACb,OAAO,CA+BT;AAED;;;;;;GAMG;AACH,wBAAgB,kBAAkB,CAChC,cAAc,EAAE,MAAM,EACtB,MAAM,EAAE,oBAAoB,GAC3B,MAAM,CAaR;AAiBD;;;;;;;GAOG;AACH,wBAAgB,uBAAuB,CACrC,QAAQ,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IAAC,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAA;CAAE,CAAC,GACnF,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC,CAkB1C;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAChC,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IAAC,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAA;CAAE,CAAC,EACpF,MAAM,EAAE,OAAO,EACf,OAAO,CAAC,EAAE;IACR,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,OAAO,EAAE,CAAC;CACnB,GACA,iBAAiB,CA0BnB;AAMD;;GAEG;AACH,wBAAgB,qBAAqB,CACnC,UAAU,EAAE,kBAAkB,EAC9B,cAAc,EAAE,MAAM,GACrB,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAyCzB;AAED;;GAEG;AACH,wBAAgB,wBAAwB,CACtC,KAAK,EAAE,iBAAiB,EACxB,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,OAAO,GACf,MAAM,GAAG,IAAI,CAyCf;AAMD;;;GAGG;AACH,wBAAsB,eAAe,CACnC,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IAAC,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAA;CAAE,CAAC,EACpF,OAAO,CAAC,EAAE;IACR,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,OAAO,EAAE,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,GACA,OAAO,CAAC;IACT,OAAO,EAAE,OAAO,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC/B,KAAK,CAAC,EAAE;QAAE,aAAa,EAAE,MAAM,CAAC;QAAC,iBAAiB,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE,CAAC;IACnF,KAAK,CAAC,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,OAAO,CAAA;KAAE,CAAC;IAC9E,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC,CA2DD;AAED;;;;;;GAMG;AACH,wBAAsB,qBAAqB,CACzC,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC;IAAC,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAA;CAAE,CAAC,EACpF,OAAO,CAAC,EAAE;IACR,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,OAAO,EAAE,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,GACA,OAAO,CAAC;IACT,OAAO,EAAE,OAAO,CAAC;IACjB,MAAM,CAAC,EAAE,cAAc,CAAC,MAAM,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;IAC/C,KAAK,CAAC,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,OAAO,CAAA;KAAE,CAAC;CAC/E,CAAC,CA8GD;AASD;;GAEG;AACH,wBAAsB,uBAAuB,CAC3C,OAAO,GAAE,MAAgC,GACxC,OAAO,CAAC,kBAAkB,CAAC,CAS7B;AAED;;GAEG;AACH,wBAAgB,sBAAsB,IAAI,IAAI,CAE7C;AAMD;;;GAGG;AACH,eAAO,MAAM,yBAAyB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAc5D,CAAC;AAEF;;;GAGG;AACH,wBAAgB,qBAAqB,CACnC,UAAU,EAAE,MAAM,EAClB,MAAM,EAAE,oBAAoB,GAC3B,MAAM,CAUR"}