@pioneer-platform/pioneer-inference 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +2 -0
- package/CHANGELOG.md +13 -0
- package/README.md +162 -89
- package/dist/index.d.ts +90 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +189 -0
- package/dist/index.js.map +1 -0
- package/package.json +16 -23
- package/src/index.ts +266 -26
- package/tsconfig.json +7 -5
- package/src/inference.ts +0 -138
- package/src/providers/openai.ts +0 -69
- package/src/types.ts +0 -55
package/CHANGELOG.md
ADDED
package/README.md
CHANGED
|
@@ -1,144 +1,217 @@
|
|
|
1
|
-
#
|
|
1
|
+
# Pioneer Inference
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
OpenAI-compatible inference proxy for the Pioneer platform. This module provides a secure way to expose AI inference capabilities to your apps without exposing API keys.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **API Key Protection**: Keep your AI provider API keys secure on the server
|
|
8
|
+
- **System Prompt Injection**: Automatically inject system prompts to guide model behavior
|
|
9
|
+
- **Multi-Provider Support**: Works with OpenAI, OpenRouter, and Venice.ai
|
|
10
|
+
- **OpenAI-Compatible API**: Drop-in replacement for OpenAI client libraries
|
|
4
11
|
|
|
5
12
|
## Installation
|
|
6
13
|
|
|
7
|
-
|
|
8
|
-
|
|
14
|
+
This is a workspace package in the Pioneer monorepo. It's automatically available to other packages via:
|
|
15
|
+
|
|
16
|
+
```typescript
|
|
17
|
+
import { InferenceService, createInferenceServiceFromEnv } from '@pioneer-platform/pioneer-inference';
|
|
9
18
|
```
|
|
10
19
|
|
|
11
|
-
##
|
|
20
|
+
## Configuration
|
|
12
21
|
|
|
13
|
-
|
|
22
|
+
Set the following environment variables:
|
|
14
23
|
|
|
15
|
-
```
|
|
16
|
-
|
|
24
|
+
```bash
|
|
25
|
+
# Provider selection (openai, openrouter, or venice)
|
|
26
|
+
INFERENCE_PROVIDER=openai
|
|
17
27
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
28
|
+
# API key for the selected provider
|
|
29
|
+
INFERENCE_API_KEY=your-api-key-here
|
|
30
|
+
# Or use the standard OpenAI key
|
|
31
|
+
OPENAI_API_KEY=your-api-key-here
|
|
32
|
+
|
|
33
|
+
# Optional: Custom base URL (for self-hosted or proxy endpoints)
|
|
34
|
+
INFERENCE_BASE_URL=https://api.openai.com/v1
|
|
22
35
|
|
|
23
|
-
|
|
24
|
-
|
|
36
|
+
# Optional: System prompt to inject in all requests
|
|
37
|
+
INFERENCE_SYSTEM_PROMPT="You are a helpful cryptocurrency wallet assistant."
|
|
25
38
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
console.log(response);
|
|
39
|
+
# Optional: Default model to use
|
|
40
|
+
INFERENCE_DEFAULT_MODEL=gpt-4-turbo-preview
|
|
29
41
|
```
|
|
30
42
|
|
|
31
|
-
|
|
43
|
+
## Provider Configuration
|
|
32
44
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
defaultModel: 'gpt-4-turbo-preview',
|
|
39
|
-
});
|
|
45
|
+
### OpenAI
|
|
46
|
+
```bash
|
|
47
|
+
INFERENCE_PROVIDER=openai
|
|
48
|
+
INFERENCE_API_KEY=sk-...
|
|
49
|
+
# Default model: gpt-4-turbo-preview
|
|
40
50
|
```
|
|
41
51
|
|
|
42
|
-
###
|
|
52
|
+
### OpenRouter
|
|
53
|
+
```bash
|
|
54
|
+
INFERENCE_PROVIDER=openrouter
|
|
55
|
+
INFERENCE_API_KEY=sk-or-...
|
|
56
|
+
INFERENCE_BASE_URL=https://openrouter.ai/api/v1
|
|
57
|
+
# Default model: anthropic/claude-3-opus
|
|
58
|
+
```
|
|
43
59
|
|
|
44
|
-
|
|
45
|
-
|
|
60
|
+
### Venice.ai
|
|
61
|
+
```bash
|
|
62
|
+
INFERENCE_PROVIDER=venice
|
|
63
|
+
INFERENCE_API_KEY=your-venice-key
|
|
64
|
+
INFERENCE_BASE_URL=https://api.venice.ai/api/v1
|
|
65
|
+
# Default model: llama-3.1-405b
|
|
66
|
+
```
|
|
46
67
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
});
|
|
68
|
+
## Usage
|
|
69
|
+
|
|
70
|
+
### REST API Endpoints
|
|
71
|
+
|
|
72
|
+
The Pioneer server exposes OpenAI-compatible endpoints at `/v1`:
|
|
53
73
|
|
|
54
|
-
|
|
74
|
+
#### Create Chat Completion
|
|
75
|
+
```bash
|
|
76
|
+
POST http://localhost:9001/v1/chat/completions
|
|
77
|
+
Content-Type: application/json
|
|
78
|
+
|
|
79
|
+
{
|
|
80
|
+
"model": "gpt-4-turbo-preview",
|
|
81
|
+
"messages": [
|
|
82
|
+
{
|
|
83
|
+
"role": "user",
|
|
84
|
+
"content": "What is Bitcoin?"
|
|
85
|
+
}
|
|
86
|
+
],
|
|
87
|
+
"temperature": 0.7,
|
|
88
|
+
"max_tokens": 150
|
|
89
|
+
}
|
|
55
90
|
```
|
|
56
91
|
|
|
57
|
-
|
|
92
|
+
#### List Available Models
|
|
93
|
+
```bash
|
|
94
|
+
GET http://localhost:9001/v1/models
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
#### Get Provider Info
|
|
98
|
+
```bash
|
|
99
|
+
GET http://localhost:9001/v1/provider
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Response:
|
|
103
|
+
```json
|
|
104
|
+
{
|
|
105
|
+
"provider": "openai",
|
|
106
|
+
"hasSystemPrompt": true,
|
|
107
|
+
"configured": true
|
|
108
|
+
}
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### Using in TypeScript
|
|
58
112
|
|
|
59
113
|
```typescript
|
|
60
|
-
|
|
114
|
+
import { InferenceService } from '@pioneer-platform/pioneer-inference';
|
|
115
|
+
|
|
116
|
+
// Create service with custom configuration
|
|
117
|
+
const service = new InferenceService({
|
|
118
|
+
provider: 'openai',
|
|
119
|
+
apiKey: 'sk-...',
|
|
120
|
+
systemPrompt: 'You are a crypto assistant.',
|
|
121
|
+
defaultModel: 'gpt-4-turbo-preview'
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
// Create chat completion
|
|
125
|
+
const response = await service.createChatCompletion({
|
|
61
126
|
model: 'gpt-4-turbo-preview',
|
|
62
127
|
messages: [
|
|
63
|
-
{ role: '
|
|
64
|
-
{ role: 'user', content: 'Hello!' },
|
|
128
|
+
{ role: 'user', content: 'Explain blockchain' }
|
|
65
129
|
],
|
|
66
130
|
temperature: 0.7,
|
|
67
|
-
max_tokens:
|
|
131
|
+
max_tokens: 150
|
|
68
132
|
});
|
|
69
133
|
|
|
70
134
|
console.log(response.choices[0].message.content);
|
|
71
135
|
```
|
|
72
136
|
|
|
73
|
-
###
|
|
137
|
+
### Using from Browser/Frontend
|
|
138
|
+
|
|
139
|
+
Your frontend apps can call the Pioneer server endpoints without exposing API keys:
|
|
74
140
|
|
|
75
141
|
```typescript
|
|
76
|
-
//
|
|
77
|
-
|
|
142
|
+
// Using OpenAI client library with Pioneer server as base URL
|
|
143
|
+
import OpenAI from 'openai';
|
|
78
144
|
|
|
79
|
-
|
|
80
|
-
|
|
145
|
+
const client = new OpenAI({
|
|
146
|
+
apiKey: 'not-needed', // Server handles authentication
|
|
147
|
+
baseURL: 'http://localhost:9001/v1',
|
|
148
|
+
dangerouslyAllowBrowser: true // Only because we're proxying
|
|
149
|
+
});
|
|
81
150
|
|
|
82
|
-
|
|
83
|
-
|
|
151
|
+
const completion = await client.chat.completions.create({
|
|
152
|
+
model: 'gpt-4-turbo-preview',
|
|
153
|
+
messages: [
|
|
154
|
+
{ role: 'user', content: 'What is Ethereum?' }
|
|
155
|
+
]
|
|
156
|
+
});
|
|
84
157
|
|
|
85
|
-
|
|
86
|
-
const count = client.getMessageCount();
|
|
158
|
+
console.log(completion.choices[0].message.content);
|
|
87
159
|
```
|
|
88
160
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
### InferenceClient
|
|
92
|
-
|
|
93
|
-
#### Constructor
|
|
161
|
+
Or using fetch directly:
|
|
94
162
|
|
|
95
163
|
```typescript
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
164
|
+
const response = await fetch('http://localhost:9001/v1/chat/completions', {
|
|
165
|
+
method: 'POST',
|
|
166
|
+
headers: {
|
|
167
|
+
'Content-Type': 'application/json'
|
|
168
|
+
},
|
|
169
|
+
body: JSON.stringify({
|
|
170
|
+
model: 'gpt-4-turbo-preview',
|
|
171
|
+
messages: [
|
|
172
|
+
{ role: 'user', content: 'What is Ethereum?' }
|
|
173
|
+
]
|
|
174
|
+
})
|
|
175
|
+
});
|
|
107
176
|
|
|
108
|
-
|
|
109
|
-
|
|
177
|
+
const data = await response.json();
|
|
178
|
+
console.log(data.choices[0].message.content);
|
|
179
|
+
```
|
|
110
180
|
|
|
111
|
-
|
|
112
|
-
- Full control over chat completion request
|
|
181
|
+
## System Prompt Injection
|
|
113
182
|
|
|
114
|
-
|
|
115
|
-
|
|
183
|
+
The service automatically injects a system prompt if:
|
|
184
|
+
1. A system prompt is configured via `INFERENCE_SYSTEM_PROMPT` or in the config
|
|
185
|
+
2. The messages array doesn't already contain a system message
|
|
116
186
|
|
|
117
|
-
|
|
118
|
-
- Clear conversation history
|
|
187
|
+
This ensures consistent model behavior across all requests without requiring clients to specify the system prompt.
|
|
119
188
|
|
|
120
|
-
|
|
121
|
-
- Get conversation history
|
|
189
|
+
## Security Considerations
|
|
122
190
|
|
|
123
|
-
-
|
|
124
|
-
|
|
191
|
+
- **Never expose your API keys to the frontend** - always use the server proxy
|
|
192
|
+
- The Pioneer server should be configured with appropriate CORS settings
|
|
193
|
+
- Consider adding authentication to the inference endpoints for production use
|
|
194
|
+
- Rate limiting should be implemented to prevent API abuse
|
|
125
195
|
|
|
126
|
-
|
|
127
|
-
- Check if provider is properly configured
|
|
196
|
+
## API Compatibility
|
|
128
197
|
|
|
129
|
-
|
|
130
|
-
|
|
198
|
+
This module implements the OpenAI Chat Completions API specification, making it compatible with:
|
|
199
|
+
- OpenAI's official client libraries
|
|
200
|
+
- Any tool or library that supports OpenAI-compatible APIs
|
|
201
|
+
- LangChain, LlamaIndex, and other AI frameworks
|
|
131
202
|
|
|
132
|
-
##
|
|
203
|
+
## Development
|
|
133
204
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
205
|
+
Build the module:
|
|
206
|
+
```bash
|
|
207
|
+
cd modules/pioneer/pioneer-inference
|
|
208
|
+
bun run build
|
|
209
|
+
```
|
|
137
210
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
211
|
+
Watch for changes:
|
|
212
|
+
```bash
|
|
213
|
+
bun run build:watch
|
|
214
|
+
```
|
|
142
215
|
|
|
143
216
|
## License
|
|
144
217
|
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
export type InferenceProvider = 'openai' | 'openrouter' | 'venice';
|
|
2
|
+
export interface InferenceConfig {
|
|
3
|
+
provider: InferenceProvider;
|
|
4
|
+
apiKey: string;
|
|
5
|
+
baseURL?: string;
|
|
6
|
+
systemPrompt?: string;
|
|
7
|
+
defaultModel?: string;
|
|
8
|
+
}
|
|
9
|
+
export interface ChatMessage {
|
|
10
|
+
role: 'system' | 'user' | 'assistant' | 'tool';
|
|
11
|
+
content: string | null;
|
|
12
|
+
tool_calls?: Array<{
|
|
13
|
+
id: string;
|
|
14
|
+
type: string;
|
|
15
|
+
function: {
|
|
16
|
+
name: string;
|
|
17
|
+
arguments: string;
|
|
18
|
+
};
|
|
19
|
+
}>;
|
|
20
|
+
tool_call_id?: string;
|
|
21
|
+
name?: string;
|
|
22
|
+
}
|
|
23
|
+
export interface ChatCompletionRequest {
|
|
24
|
+
model: string;
|
|
25
|
+
messages: ChatMessage[];
|
|
26
|
+
temperature?: number;
|
|
27
|
+
max_tokens?: number;
|
|
28
|
+
stream?: boolean;
|
|
29
|
+
[key: string]: any;
|
|
30
|
+
}
|
|
31
|
+
export interface ChatCompletionResponse {
|
|
32
|
+
id: string;
|
|
33
|
+
object: string;
|
|
34
|
+
created: number;
|
|
35
|
+
model: string;
|
|
36
|
+
choices: Array<{
|
|
37
|
+
index: number;
|
|
38
|
+
message: ChatMessage;
|
|
39
|
+
finish_reason: string;
|
|
40
|
+
}>;
|
|
41
|
+
usage: {
|
|
42
|
+
prompt_tokens: number;
|
|
43
|
+
completion_tokens: number;
|
|
44
|
+
total_tokens: number;
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
export declare class InferenceService {
|
|
48
|
+
private client;
|
|
49
|
+
private config;
|
|
50
|
+
private systemPrompt?;
|
|
51
|
+
constructor(config: InferenceConfig);
|
|
52
|
+
/**
|
|
53
|
+
* Get the appropriate base URL for the provider
|
|
54
|
+
*/
|
|
55
|
+
private getBaseURL;
|
|
56
|
+
/**
|
|
57
|
+
* Get default model for the provider
|
|
58
|
+
*/
|
|
59
|
+
private getDefaultModel;
|
|
60
|
+
/**
|
|
61
|
+
* Create a chat completion
|
|
62
|
+
* This is the main proxy method that injects system prompts and protects API keys
|
|
63
|
+
*/
|
|
64
|
+
createChatCompletion(request: ChatCompletionRequest): Promise<ChatCompletionResponse>;
|
|
65
|
+
/**
|
|
66
|
+
* Stream chat completion (for real-time responses)
|
|
67
|
+
*/
|
|
68
|
+
createStreamingChatCompletion(request: ChatCompletionRequest): Promise<any>;
|
|
69
|
+
/**
|
|
70
|
+
* List available models
|
|
71
|
+
*/
|
|
72
|
+
listModels(): Promise<any>;
|
|
73
|
+
/**
|
|
74
|
+
* Get provider information
|
|
75
|
+
*/
|
|
76
|
+
getProviderInfo(): {
|
|
77
|
+
provider: InferenceProvider;
|
|
78
|
+
hasSystemPrompt: boolean;
|
|
79
|
+
};
|
|
80
|
+
/**
|
|
81
|
+
* Update system prompt
|
|
82
|
+
*/
|
|
83
|
+
setSystemPrompt(prompt: string): void;
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Factory function to create inference service from environment variables
|
|
87
|
+
*/
|
|
88
|
+
export declare function createInferenceServiceFromEnv(): InferenceService;
|
|
89
|
+
export default InferenceService;
|
|
90
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAeA,MAAM,MAAM,iBAAiB,GAAG,QAAQ,GAAG,YAAY,GAAG,QAAQ,CAAC;AAEnE,MAAM,WAAW,eAAe;IAC5B,QAAQ,EAAE,iBAAiB,CAAC;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,WAAW;IACxB,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,GAAG,MAAM,CAAC;IAC/C,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,UAAU,CAAC,EAAE,KAAK,CAAC;QACf,EAAE,EAAE,MAAM,CAAC;QACX,IAAI,EAAE,MAAM,CAAC;QACb,QAAQ,EAAE;YACN,IAAI,EAAE,MAAM,CAAC;YACb,SAAS,EAAE,MAAM,CAAC;SACrB,CAAC;KACL,CAAC,CAAC;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,IAAI,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,qBAAqB;IAClC,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAC;CACtB;AAED,MAAM,WAAW,sBAAsB;IACnC,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,KAAK,CAAC;QACX,KAAK,EAAE,MAAM,CAAC;QACd,OAAO,EAAE,WAAW,CAAC;QACrB,aAAa,EAAE,MAAM,CAAC;KACzB,CAAC,CAAC;IACH,KAAK,EAAE;QACH,aAAa,EAAE,MAAM,CAAC;QACtB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,YAAY,EAAE,MAAM,CAAC;KACxB,CAAC;CACL;AAED,qBAAa,gBAAgB;IACzB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,MAAM,CAAkB;IAChC,OAAO,CAAC,YAAY,CAAC,CAAS;gBAElB,MAAM,EAAE,eAAe;IAiBnC;;OAEG;IACH,OAAO,CAAC,UAAU;IAiBlB;;OAEG;IACH,OAAO,CAAC,eAAe;IAiBvB;;;OAGG;IACG,oBAAoB,CACtB,OAAO,EAAE,qBAAqB,GAC/B,OAAO,CAAC,sBAAsB,CAAC;IAgClC;;OAEG;IACG,6BAA6B,CAC/B,OAAO,EAAE,qBAAqB,GAC/B,OAAO,CAAC,GAAG,CAAC;IAgCf;;OAEG;IACG,UAAU,IAAI,OAAO,CAAC,GAAG,CAAC;IAYhC;;OAEG;IACH,eAAe,IAAI;QAAE,QAAQ,EAAE,iBAAiB,CAAC;QAAC,eAAe,EAAE,OAAO,CAAA;KAAE;IAO5E;;OAEG;IACH,eAAe,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI;CAGxC;AAED;;GAEG;AACH,wBAAgB,6BAA6B,IAAI,gBAAgB,CA4BhE;AAED,eAAe,gBAAgB,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/*
|
|
3
|
+
Inference Proxy
|
|
4
|
+
|
|
5
|
+
Goals:
|
|
6
|
+
- Match OpenAI's API structure for compatibility
|
|
7
|
+
- Allow configuring between openai, openrouter, venice.ai
|
|
8
|
+
- All providers follow the OpenAI API format
|
|
9
|
+
- Primary purpose: system prompt injection and API key protection
|
|
10
|
+
*/
|
|
11
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
exports.InferenceService = void 0;
|
|
16
|
+
exports.createInferenceServiceFromEnv = createInferenceServiceFromEnv;
|
|
17
|
+
const TAG = ' | pioneer-inference | ';
|
|
18
|
+
const log = require('@pioneer-platform/loggerdog')();
|
|
19
|
+
const openai_1 = __importDefault(require("openai"));
|
|
20
|
+
class InferenceService {
|
|
21
|
+
constructor(config) {
|
|
22
|
+
const tag = TAG + ' | constructor | ';
|
|
23
|
+
this.config = config;
|
|
24
|
+
this.systemPrompt = config.systemPrompt;
|
|
25
|
+
// Get base URL based on provider
|
|
26
|
+
const baseURL = this.getBaseURL();
|
|
27
|
+
log.info(tag, `Initializing ${config.provider} with baseURL: ${baseURL}`);
|
|
28
|
+
// Initialize OpenAI client (works with all OpenAI-compatible APIs)
|
|
29
|
+
this.client = new openai_1.default({
|
|
30
|
+
apiKey: config.apiKey,
|
|
31
|
+
baseURL,
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Get the appropriate base URL for the provider
|
|
36
|
+
*/
|
|
37
|
+
getBaseURL() {
|
|
38
|
+
if (this.config.baseURL) {
|
|
39
|
+
return this.config.baseURL;
|
|
40
|
+
}
|
|
41
|
+
switch (this.config.provider) {
|
|
42
|
+
case 'openai':
|
|
43
|
+
return undefined; // Use default OpenAI URL
|
|
44
|
+
case 'openrouter':
|
|
45
|
+
return 'https://openrouter.ai/api/v1';
|
|
46
|
+
case 'venice':
|
|
47
|
+
return 'https://api.venice.ai/api/v1';
|
|
48
|
+
default:
|
|
49
|
+
return undefined;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Get default model for the provider
|
|
54
|
+
*/
|
|
55
|
+
getDefaultModel() {
|
|
56
|
+
if (this.config.defaultModel) {
|
|
57
|
+
return this.config.defaultModel;
|
|
58
|
+
}
|
|
59
|
+
switch (this.config.provider) {
|
|
60
|
+
case 'openai':
|
|
61
|
+
return 'gpt-4-turbo-preview';
|
|
62
|
+
case 'openrouter':
|
|
63
|
+
return 'anthropic/claude-3-opus';
|
|
64
|
+
case 'venice':
|
|
65
|
+
return 'llama-3.1-405b';
|
|
66
|
+
default:
|
|
67
|
+
return 'gpt-4-turbo-preview';
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Create a chat completion
|
|
72
|
+
* This is the main proxy method that injects system prompts and protects API keys
|
|
73
|
+
*/
|
|
74
|
+
async createChatCompletion(request) {
|
|
75
|
+
const tag = TAG + ' | createChatCompletion | ';
|
|
76
|
+
try {
|
|
77
|
+
// Inject system prompt if configured and not already present
|
|
78
|
+
const messages = [...request.messages];
|
|
79
|
+
if (this.systemPrompt && !messages.some(m => m.role === 'system')) {
|
|
80
|
+
messages.unshift({
|
|
81
|
+
role: 'system',
|
|
82
|
+
content: this.systemPrompt,
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
// Use configured default model if not specified
|
|
86
|
+
const model = request.model || this.getDefaultModel();
|
|
87
|
+
log.info(tag, `Creating completion with model: ${model}, messages: ${messages.length}`);
|
|
88
|
+
// Make the API call (cast messages to any to avoid TypeScript errors with OpenAI types)
|
|
89
|
+
const completion = await this.client.chat.completions.create({
|
|
90
|
+
...request,
|
|
91
|
+
model,
|
|
92
|
+
messages: messages,
|
|
93
|
+
});
|
|
94
|
+
return completion;
|
|
95
|
+
}
|
|
96
|
+
catch (error) {
|
|
97
|
+
log.error(tag, 'Error creating chat completion:', error);
|
|
98
|
+
throw error;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Stream chat completion (for real-time responses)
|
|
103
|
+
*/
|
|
104
|
+
async createStreamingChatCompletion(request) {
|
|
105
|
+
const tag = TAG + ' | createStreamingChatCompletion | ';
|
|
106
|
+
try {
|
|
107
|
+
// Inject system prompt if configured
|
|
108
|
+
const messages = [...request.messages];
|
|
109
|
+
if (this.systemPrompt && !messages.some(m => m.role === 'system')) {
|
|
110
|
+
messages.unshift({
|
|
111
|
+
role: 'system',
|
|
112
|
+
content: this.systemPrompt,
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
const model = request.model || this.getDefaultModel();
|
|
116
|
+
log.info(tag, `Creating streaming completion with model: ${model}`);
|
|
117
|
+
// Make streaming API call (cast messages to any to avoid TypeScript errors with OpenAI types)
|
|
118
|
+
const stream = await this.client.chat.completions.create({
|
|
119
|
+
...request,
|
|
120
|
+
model,
|
|
121
|
+
messages: messages,
|
|
122
|
+
stream: true,
|
|
123
|
+
});
|
|
124
|
+
return stream;
|
|
125
|
+
}
|
|
126
|
+
catch (error) {
|
|
127
|
+
log.error(tag, 'Error creating streaming chat completion:', error);
|
|
128
|
+
throw error;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* List available models
|
|
133
|
+
*/
|
|
134
|
+
async listModels() {
|
|
135
|
+
const tag = TAG + ' | listModels | ';
|
|
136
|
+
try {
|
|
137
|
+
const models = await this.client.models.list();
|
|
138
|
+
return models;
|
|
139
|
+
}
|
|
140
|
+
catch (error) {
|
|
141
|
+
log.error(tag, 'Error listing models:', error);
|
|
142
|
+
throw error;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Get provider information
|
|
147
|
+
*/
|
|
148
|
+
getProviderInfo() {
|
|
149
|
+
return {
|
|
150
|
+
provider: this.config.provider,
|
|
151
|
+
hasSystemPrompt: !!this.systemPrompt,
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
/**
|
|
155
|
+
* Update system prompt
|
|
156
|
+
*/
|
|
157
|
+
setSystemPrompt(prompt) {
|
|
158
|
+
this.systemPrompt = prompt;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
exports.InferenceService = InferenceService;
|
|
162
|
+
/**
|
|
163
|
+
* Factory function to create inference service from environment variables
|
|
164
|
+
*/
|
|
165
|
+
function createInferenceServiceFromEnv() {
|
|
166
|
+
const tag = TAG + ' | createInferenceServiceFromEnv | ';
|
|
167
|
+
const provider = (process.env.INFERENCE_PROVIDER || 'openai');
|
|
168
|
+
// Support multiple API key environment variables
|
|
169
|
+
const apiKey = process.env.INFERENCE_API_KEY
|
|
170
|
+
|| process.env.VENICE_API_KEY
|
|
171
|
+
|| process.env.OPENAI_API_KEY
|
|
172
|
+
|| '';
|
|
173
|
+
const baseURL = process.env.INFERENCE_BASE_URL;
|
|
174
|
+
const systemPrompt = process.env.INFERENCE_SYSTEM_PROMPT;
|
|
175
|
+
const defaultModel = process.env.INFERENCE_DEFAULT_MODEL;
|
|
176
|
+
if (!apiKey) {
|
|
177
|
+
log.warn(tag, 'No API key found in environment variables (checked: INFERENCE_API_KEY, VENICE_API_KEY, OPENAI_API_KEY)');
|
|
178
|
+
}
|
|
179
|
+
log.info(tag, `Creating inference service - provider: ${provider}, hasApiKey: ${!!apiKey}`);
|
|
180
|
+
return new InferenceService({
|
|
181
|
+
provider,
|
|
182
|
+
apiKey,
|
|
183
|
+
baseURL,
|
|
184
|
+
systemPrompt,
|
|
185
|
+
defaultModel,
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
exports.default = InferenceService;
|
|
189
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAAA;;;;;;;;EAQE;;;;;;AAuOF,sEA4BC;AAjQD,MAAM,GAAG,GAAG,yBAAyB,CAAC;AACtC,MAAM,GAAG,GAAG,OAAO,CAAC,6BAA6B,CAAC,EAAE,CAAC;AAErD,oDAA4B;AAqD5B,MAAa,gBAAgB;IAKzB,YAAY,MAAuB;QAC/B,MAAM,GAAG,GAAG,GAAG,GAAG,mBAAmB,CAAC;QACtC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC,YAAY,CAAC;QAExC,iCAAiC;QACjC,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC;QAElC,GAAG,CAAC,IAAI,CAAC,GAAG,EAAE,gBAAgB,MAAM,CAAC,QAAQ,kBAAkB,OAAO,EAAE,CAAC,CAAC;QAE1E,mEAAmE;QACnE,IAAI,CAAC,MAAM,GAAG,IAAI,gBAAM,CAAC;YACrB,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,OAAO;SACV,CAAC,CAAC;IACP,CAAC;IAED;;OAEG;IACK,UAAU;QACd,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACtB,OAAO,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC;QAC/B,CAAC;QAED,QAAQ,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC;YAC3B,KAAK,QAAQ;gBACT,OAAO,SAAS,CAAC,CAAC,yBAAyB;YAC/C,KAAK,YAAY;gBACb,OAAO,8BAA8B,CAAC;YAC1C,KAAK,QAAQ;gBACT,OAAO,8BAA8B,CAAC;YAC1C;gBACI,OAAO,SAAS,CAAC;QACzB,CAAC;IACL,CAAC;IAED;;OAEG;IACK,eAAe;QACnB,IAAI,IAAI,CAAC,MAAM,CAAC,YAAY,EAAE,CAAC;YAC3B,OAAO,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;QACpC,CAAC;QAED,QAAQ,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC;YAC3B,KAAK,QAAQ;gBACT,OAAO,qBAAqB,CAAC;YACjC,KAAK,YAAY;gBACb,OAAO,yBAAyB,CAAC;YACrC,KAAK,QAAQ;gBACT,OAAO,gBAAgB,CAAC;YAC5B;gBACI,OAAO,qBAAqB,CAAC;QACrC,CAAC;IACL,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,oBAAoB,CACtB,OAA8B;QAE9B,MAAM,GAAG,GAAG,GAAG,GAAG,4BAA4B,CAAC;QAE/C,IAAI,CAAC;YACD,6DAA6D;YAC7D,MAAM,QAAQ,GAAG,CAAC,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;YACvC,IAAI,IAAI,CAAC,YAAY,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,EAAE,CAAC;gBAChE,QAAQ,CAAC,OAAO,CAAC;oBACb,IAAI,EAAE,QAAQ;oBACd,OAAO,EAAE,IAAI,CAAC,YAAY;iBAC7B,CAAC,CAAC;YACP,CAAC;YAED,gDAAgD;YAChD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YAEtD,GAAG,CAAC,IAAI,CAAC,GAAG,EAAE,mCAAmC,KAAK,eAAe,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;YAExF,wFAAwF;YACxF,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC;gBACzD,GAAG,OAAO;gBACV,KAAK;gBACL,QAAQ,EAAE,QAAe;aAC5B,CAAC,CAAC;YAEH,OAAO,UAAoC,CAAC;QAChD,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YAClB,GAAG,CAAC,KAAK,CAAC,GAAG,EAAE,iCAAiC,EAAE,KAAK,CAAC,CAAC;YACzD,MAAM,KAAK,CAAC;QAChB,CAAC;IACL,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,6BAA6B,CAC/B,OAA8B;QAE9B,MAAM,GAAG,GAAG,GAAG,GAAG,qCAAqC,CAAC;QAExD,IAAI,CAAC;YACD,qCAAqC;YACrC,MAAM,QAAQ,GAAG,CAAC,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;YACvC,IAAI,IAAI,CAAC,YAAY,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,EAAE,CAAC;gBAChE,QAAQ,CAAC,OAAO,CAAC;oBACb,IAAI,EAAE,QAAQ;oBACd,OAAO,EAAE,IAAI,CAAC,YAAY;iBAC7B,CAAC,CAAC;YACP,CAAC;YAED,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YAEtD,GAAG,CAAC,IAAI,CAAC,GAAG,EAAE,6CAA6C,KAAK,EAAE,CAAC,CAAC;YAEpE,8FAA8F;YAC9F,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC;gBACrD,GAAG,OAAO;gBACV,KAAK;gBACL,QAAQ,EAAE,QAAe;gBACzB,MAAM,EAAE,IAAI;aACf,CAAC,CAAC;YAEH,OAAO,MAAM,CAAC;QAClB,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YAClB,GAAG,CAAC,KAAK,CAAC,GAAG,EAAE,2CAA2C,EAAE,KAAK,CAAC,CAAC;YACnE,MAAM,KAAK,CAAC;QAChB,CAAC;IACL,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU;QACZ,MAAM,GAAG,GAAG,GAAG,GAAG,kBAAkB,CAAC;QAErC,IAAI,CAAC;YACD,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;YAC/C,OAAO,MAAM,CAAC;QAClB,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YAClB,GAAG,CAAC,KAAK,CAAC,GAAG,EAAE,uBAAuB,EAAE,KAAK,CAAC,CAAC;YAC/C,MAAM,KAAK,CAAC;QAChB,CAAC;IACL,CAAC;IAED;;OAEG;IACH,eAAe;QACX,OAAO;YACH,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,QAAQ;YAC9B,eAAe,EAAE,CAAC,CAAC,IAAI,CAAC,YAAY;SACvC,CAAC;IACN,CAAC;IAED;;OAEG;IACH,eAAe,CAAC,MAAc;QAC1B,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC;IAC/B,CAAC;CACJ;AAxKD,4CAwKC;AAED;;GAEG;AACH,SAAgB,6BAA6B;IACzC,MAAM,GAAG,GAAG,GAAG,GAAG,qCAAqC,CAAC;IAExD,MAAM,QAAQ,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,kBAAkB,IAAI,QAAQ,CAAsB,CAAC;IAEnF,iDAAiD;IACjD,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,iBAAiB;WACrC,OAAO,CAAC,GAAG,CAAC,cAAc;WAC1B,OAAO,CAAC,GAAG,CAAC,cAAc;WAC1B,EAAE,CAAC;IAEV,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAC;IAC/C,MAAM,YAAY,GAAG,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC;IACzD,MAAM,YAAY,GAAG,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC;IAEzD,IAAI,CAAC,MAAM,EAAE,CAAC;QACV,GAAG,CAAC,IAAI,CAAC,GAAG,EAAE,wGAAwG,CAAC,CAAC;IAC5H,CAAC;IAED,GAAG,CAAC,IAAI,CAAC,GAAG,EAAE,0CAA0C,QAAQ,gBAAgB,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;IAE5F,OAAO,IAAI,gBAAgB,CAAC;QACxB,QAAQ;QACR,MAAM;QACN,OAAO;QACP,YAAY;QACZ,YAAY;KACf,CAAC,CAAC;AACP,CAAC;AAED,kBAAe,gBAAgB,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,36 +1,29 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pioneer-platform/pioneer-inference",
|
|
3
|
-
"version": "1.0.
|
|
4
|
-
"description": "
|
|
5
|
-
"main": "
|
|
6
|
-
"types": "
|
|
7
|
-
"exports": {
|
|
8
|
-
".": {
|
|
9
|
-
"types": "./lib/index.d.ts",
|
|
10
|
-
"default": "./lib/index.js"
|
|
11
|
-
}
|
|
12
|
-
},
|
|
3
|
+
"version": "1.0.2",
|
|
4
|
+
"description": "OpenAI-compatible inference proxy for Pioneer platform",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"types": "dist/index.d.ts",
|
|
13
7
|
"scripts": {
|
|
14
|
-
"build": "tsc
|
|
15
|
-
"build:watch": "tsc
|
|
16
|
-
"
|
|
17
|
-
"prepublish": "npm run build"
|
|
8
|
+
"build": "tsc",
|
|
9
|
+
"build:watch": "tsc --watch",
|
|
10
|
+
"clean": "rm -rf dist node_modules"
|
|
18
11
|
},
|
|
19
12
|
"keywords": [
|
|
20
|
-
"
|
|
21
|
-
"llm",
|
|
22
|
-
"openai",
|
|
13
|
+
"pioneer",
|
|
23
14
|
"inference",
|
|
24
|
-
"
|
|
15
|
+
"openai",
|
|
16
|
+
"ai",
|
|
17
|
+
"llm"
|
|
25
18
|
],
|
|
26
|
-
"author": "
|
|
27
|
-
"license": "
|
|
19
|
+
"author": "",
|
|
20
|
+
"license": "ISC",
|
|
28
21
|
"dependencies": {
|
|
22
|
+
"@pioneer-platform/loggerdog": "^8.11.0",
|
|
29
23
|
"openai": "^4.26.0"
|
|
30
24
|
},
|
|
31
|
-
"peerDependencies": {},
|
|
32
25
|
"devDependencies": {
|
|
33
|
-
"@types/node": "^
|
|
34
|
-
"typescript": "^5.
|
|
26
|
+
"@types/node": "^22.14.1",
|
|
27
|
+
"typescript": "^5.8.3"
|
|
35
28
|
}
|
|
36
29
|
}
|
package/src/index.ts
CHANGED
|
@@ -1,30 +1,270 @@
|
|
|
1
|
+
/*
|
|
2
|
+
Inference Proxy
|
|
3
|
+
|
|
4
|
+
Goals:
|
|
5
|
+
- Match OpenAI's API structure for compatibility
|
|
6
|
+
- Allow configuring between openai, openrouter, venice.ai
|
|
7
|
+
- All providers follow the OpenAI API format
|
|
8
|
+
- Primary purpose: system prompt injection and API key protection
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
const TAG = ' | pioneer-inference | ';
|
|
12
|
+
const log = require('@pioneer-platform/loggerdog')();
|
|
13
|
+
|
|
14
|
+
import OpenAI from 'openai';
|
|
15
|
+
|
|
16
|
+
export type InferenceProvider = 'openai' | 'openrouter' | 'venice';
|
|
17
|
+
|
|
18
|
+
export interface InferenceConfig {
|
|
19
|
+
provider: InferenceProvider;
|
|
20
|
+
apiKey: string;
|
|
21
|
+
baseURL?: string;
|
|
22
|
+
systemPrompt?: string;
|
|
23
|
+
defaultModel?: string;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export interface ChatMessage {
|
|
27
|
+
role: 'system' | 'user' | 'assistant' | 'tool';
|
|
28
|
+
content: string | null;
|
|
29
|
+
tool_calls?: Array<{
|
|
30
|
+
id: string;
|
|
31
|
+
type: string;
|
|
32
|
+
function: {
|
|
33
|
+
name: string;
|
|
34
|
+
arguments: string;
|
|
35
|
+
};
|
|
36
|
+
}>;
|
|
37
|
+
tool_call_id?: string;
|
|
38
|
+
name?: string;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export interface ChatCompletionRequest {
|
|
42
|
+
model: string;
|
|
43
|
+
messages: ChatMessage[];
|
|
44
|
+
temperature?: number;
|
|
45
|
+
max_tokens?: number;
|
|
46
|
+
stream?: boolean;
|
|
47
|
+
[key: string]: any;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export interface ChatCompletionResponse {
|
|
51
|
+
id: string;
|
|
52
|
+
object: string;
|
|
53
|
+
created: number;
|
|
54
|
+
model: string;
|
|
55
|
+
choices: Array<{
|
|
56
|
+
index: number;
|
|
57
|
+
message: ChatMessage;
|
|
58
|
+
finish_reason: string;
|
|
59
|
+
}>;
|
|
60
|
+
usage: {
|
|
61
|
+
prompt_tokens: number;
|
|
62
|
+
completion_tokens: number;
|
|
63
|
+
total_tokens: number;
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export class InferenceService {
|
|
68
|
+
private client: OpenAI;
|
|
69
|
+
private config: InferenceConfig;
|
|
70
|
+
private systemPrompt?: string;
|
|
71
|
+
|
|
72
|
+
constructor(config: InferenceConfig) {
|
|
73
|
+
const tag = TAG + ' | constructor | ';
|
|
74
|
+
this.config = config;
|
|
75
|
+
this.systemPrompt = config.systemPrompt;
|
|
76
|
+
|
|
77
|
+
// Get base URL based on provider
|
|
78
|
+
const baseURL = this.getBaseURL();
|
|
79
|
+
|
|
80
|
+
log.info(tag, `Initializing ${config.provider} with baseURL: ${baseURL}`);
|
|
81
|
+
|
|
82
|
+
// Initialize OpenAI client (works with all OpenAI-compatible APIs)
|
|
83
|
+
this.client = new OpenAI({
|
|
84
|
+
apiKey: config.apiKey,
|
|
85
|
+
baseURL,
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Get the appropriate base URL for the provider
|
|
91
|
+
*/
|
|
92
|
+
private getBaseURL(): string | undefined {
|
|
93
|
+
if (this.config.baseURL) {
|
|
94
|
+
return this.config.baseURL;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
switch (this.config.provider) {
|
|
98
|
+
case 'openai':
|
|
99
|
+
return undefined; // Use default OpenAI URL
|
|
100
|
+
case 'openrouter':
|
|
101
|
+
return 'https://openrouter.ai/api/v1';
|
|
102
|
+
case 'venice':
|
|
103
|
+
return 'https://api.venice.ai/api/v1';
|
|
104
|
+
default:
|
|
105
|
+
return undefined;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Get default model for the provider
|
|
111
|
+
*/
|
|
112
|
+
private getDefaultModel(): string {
|
|
113
|
+
if (this.config.defaultModel) {
|
|
114
|
+
return this.config.defaultModel;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
switch (this.config.provider) {
|
|
118
|
+
case 'openai':
|
|
119
|
+
return 'gpt-4-turbo-preview';
|
|
120
|
+
case 'openrouter':
|
|
121
|
+
return 'anthropic/claude-3-opus';
|
|
122
|
+
case 'venice':
|
|
123
|
+
return 'llama-3.1-405b';
|
|
124
|
+
default:
|
|
125
|
+
return 'gpt-4-turbo-preview';
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Create a chat completion
|
|
131
|
+
* This is the main proxy method that injects system prompts and protects API keys
|
|
132
|
+
*/
|
|
133
|
+
async createChatCompletion(
|
|
134
|
+
request: ChatCompletionRequest
|
|
135
|
+
): Promise<ChatCompletionResponse> {
|
|
136
|
+
const tag = TAG + ' | createChatCompletion | ';
|
|
137
|
+
|
|
138
|
+
try {
|
|
139
|
+
// Inject system prompt if configured and not already present
|
|
140
|
+
const messages = [...request.messages];
|
|
141
|
+
if (this.systemPrompt && !messages.some(m => m.role === 'system')) {
|
|
142
|
+
messages.unshift({
|
|
143
|
+
role: 'system',
|
|
144
|
+
content: this.systemPrompt,
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Use configured default model if not specified
|
|
149
|
+
const model = request.model || this.getDefaultModel();
|
|
150
|
+
|
|
151
|
+
log.info(tag, `Creating completion with model: ${model}, messages: ${messages.length}`);
|
|
152
|
+
|
|
153
|
+
// Make the API call (cast messages to any to avoid TypeScript errors with OpenAI types)
|
|
154
|
+
const completion = await this.client.chat.completions.create({
|
|
155
|
+
...request,
|
|
156
|
+
model,
|
|
157
|
+
messages: messages as any,
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
return completion as ChatCompletionResponse;
|
|
161
|
+
} catch (error: any) {
|
|
162
|
+
log.error(tag, 'Error creating chat completion:', error);
|
|
163
|
+
throw error;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Stream chat completion (for real-time responses)
|
|
169
|
+
*/
|
|
170
|
+
async createStreamingChatCompletion(
|
|
171
|
+
request: ChatCompletionRequest
|
|
172
|
+
): Promise<any> {
|
|
173
|
+
const tag = TAG + ' | createStreamingChatCompletion | ';
|
|
174
|
+
|
|
175
|
+
try {
|
|
176
|
+
// Inject system prompt if configured
|
|
177
|
+
const messages = [...request.messages];
|
|
178
|
+
if (this.systemPrompt && !messages.some(m => m.role === 'system')) {
|
|
179
|
+
messages.unshift({
|
|
180
|
+
role: 'system',
|
|
181
|
+
content: this.systemPrompt,
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
const model = request.model || this.getDefaultModel();
|
|
186
|
+
|
|
187
|
+
log.info(tag, `Creating streaming completion with model: ${model}`);
|
|
188
|
+
|
|
189
|
+
// Make streaming API call (cast messages to any to avoid TypeScript errors with OpenAI types)
|
|
190
|
+
const stream = await this.client.chat.completions.create({
|
|
191
|
+
...request,
|
|
192
|
+
model,
|
|
193
|
+
messages: messages as any,
|
|
194
|
+
stream: true,
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
return stream;
|
|
198
|
+
} catch (error: any) {
|
|
199
|
+
log.error(tag, 'Error creating streaming chat completion:', error);
|
|
200
|
+
throw error;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* List available models
|
|
206
|
+
*/
|
|
207
|
+
async listModels(): Promise<any> {
|
|
208
|
+
const tag = TAG + ' | listModels | ';
|
|
209
|
+
|
|
210
|
+
try {
|
|
211
|
+
const models = await this.client.models.list();
|
|
212
|
+
return models;
|
|
213
|
+
} catch (error: any) {
|
|
214
|
+
log.error(tag, 'Error listing models:', error);
|
|
215
|
+
throw error;
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Get provider information
|
|
221
|
+
*/
|
|
222
|
+
getProviderInfo(): { provider: InferenceProvider; hasSystemPrompt: boolean } {
|
|
223
|
+
return {
|
|
224
|
+
provider: this.config.provider,
|
|
225
|
+
hasSystemPrompt: !!this.systemPrompt,
|
|
226
|
+
};
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
/**
|
|
230
|
+
* Update system prompt
|
|
231
|
+
*/
|
|
232
|
+
setSystemPrompt(prompt: string): void {
|
|
233
|
+
this.systemPrompt = prompt;
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
1
237
|
/**
|
|
2
|
-
*
|
|
3
|
-
*
|
|
4
|
-
* LLM inference abstraction layer for Pioneer platform
|
|
5
|
-
*
|
|
6
|
-
* @example
|
|
7
|
-
* ```typescript
|
|
8
|
-
* import { InferenceClient } from '@pioneer-platform/pioneer-inference';
|
|
9
|
-
*
|
|
10
|
-
* const client = new InferenceClient({
|
|
11
|
-
* provider: 'openai',
|
|
12
|
-
* apiKey: process.env.OPENAI_API_KEY,
|
|
13
|
-
* });
|
|
14
|
-
*
|
|
15
|
-
* client.setSystemPrompt('You are a helpful assistant.');
|
|
16
|
-
* const response = await client.chat('Hello!');
|
|
17
|
-
* console.log(response);
|
|
18
|
-
* ```
|
|
238
|
+
* Factory function to create inference service from environment variables
|
|
19
239
|
*/
|
|
240
|
+
export function createInferenceServiceFromEnv(): InferenceService {
|
|
241
|
+
const tag = TAG + ' | createInferenceServiceFromEnv | ';
|
|
242
|
+
|
|
243
|
+
const provider = (process.env.INFERENCE_PROVIDER || 'openai') as InferenceProvider;
|
|
244
|
+
|
|
245
|
+
// Support multiple API key environment variables
|
|
246
|
+
const apiKey = process.env.INFERENCE_API_KEY
|
|
247
|
+
|| process.env.VENICE_API_KEY
|
|
248
|
+
|| process.env.OPENAI_API_KEY
|
|
249
|
+
|| '';
|
|
250
|
+
|
|
251
|
+
const baseURL = process.env.INFERENCE_BASE_URL;
|
|
252
|
+
const systemPrompt = process.env.INFERENCE_SYSTEM_PROMPT;
|
|
253
|
+
const defaultModel = process.env.INFERENCE_DEFAULT_MODEL;
|
|
254
|
+
|
|
255
|
+
if (!apiKey) {
|
|
256
|
+
log.warn(tag, 'No API key found in environment variables (checked: INFERENCE_API_KEY, VENICE_API_KEY, OPENAI_API_KEY)');
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
log.info(tag, `Creating inference service - provider: ${provider}, hasApiKey: ${!!apiKey}`);
|
|
20
260
|
|
|
21
|
-
|
|
22
|
-
|
|
261
|
+
return new InferenceService({
|
|
262
|
+
provider,
|
|
263
|
+
apiKey,
|
|
264
|
+
baseURL,
|
|
265
|
+
systemPrompt,
|
|
266
|
+
defaultModel,
|
|
267
|
+
});
|
|
268
|
+
}
|
|
23
269
|
|
|
24
|
-
export
|
|
25
|
-
Message,
|
|
26
|
-
ChatCompletionRequest,
|
|
27
|
-
ChatCompletionResponse,
|
|
28
|
-
InferenceProvider,
|
|
29
|
-
InferenceConfig,
|
|
30
|
-
} from './types';
|
|
270
|
+
export default InferenceService;
|
package/tsconfig.json
CHANGED
|
@@ -3,16 +3,18 @@
|
|
|
3
3
|
"target": "ES2020",
|
|
4
4
|
"module": "commonjs",
|
|
5
5
|
"lib": ["ES2020"],
|
|
6
|
-
"
|
|
7
|
-
"outDir": "./lib",
|
|
6
|
+
"outDir": "./dist",
|
|
8
7
|
"rootDir": "./src",
|
|
8
|
+
"declaration": true,
|
|
9
|
+
"declarationMap": true,
|
|
10
|
+
"sourceMap": true,
|
|
9
11
|
"strict": true,
|
|
10
12
|
"esModuleInterop": true,
|
|
11
13
|
"skipLibCheck": true,
|
|
12
14
|
"forceConsistentCasingInFileNames": true,
|
|
13
|
-
"
|
|
14
|
-
"
|
|
15
|
+
"resolveJsonModule": true,
|
|
16
|
+
"moduleResolution": "node"
|
|
15
17
|
},
|
|
16
18
|
"include": ["src/**/*"],
|
|
17
|
-
"exclude": ["node_modules", "
|
|
19
|
+
"exclude": ["node_modules", "dist"]
|
|
18
20
|
}
|
package/src/inference.ts
DELETED
|
@@ -1,138 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Main Inference Client
|
|
3
|
-
*
|
|
4
|
-
* Provides a unified interface for LLM inference across multiple providers
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
import type {
|
|
8
|
-
InferenceConfig,
|
|
9
|
-
InferenceProvider,
|
|
10
|
-
ChatCompletionRequest,
|
|
11
|
-
ChatCompletionResponse,
|
|
12
|
-
Message,
|
|
13
|
-
} from './types';
|
|
14
|
-
import { OpenAIProvider } from './providers/openai';
|
|
15
|
-
|
|
16
|
-
export class InferenceClient {
|
|
17
|
-
private provider: InferenceProvider;
|
|
18
|
-
private conversationHistory: Message[] = [];
|
|
19
|
-
|
|
20
|
-
constructor(config: InferenceConfig) {
|
|
21
|
-
this.provider = this.createProvider(config);
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
private createProvider(config: InferenceConfig): InferenceProvider {
|
|
25
|
-
switch (config.provider) {
|
|
26
|
-
case 'openai':
|
|
27
|
-
if (!config.apiKey) {
|
|
28
|
-
throw new Error('OpenAI API key is required');
|
|
29
|
-
}
|
|
30
|
-
return new OpenAIProvider({
|
|
31
|
-
apiKey: config.apiKey,
|
|
32
|
-
baseURL: config.baseURL,
|
|
33
|
-
timeout: config.timeout,
|
|
34
|
-
defaultModel: config.defaultModel,
|
|
35
|
-
});
|
|
36
|
-
|
|
37
|
-
case 'anthropic':
|
|
38
|
-
throw new Error('Anthropic provider not yet implemented');
|
|
39
|
-
|
|
40
|
-
case 'ollama':
|
|
41
|
-
throw new Error('Ollama provider not yet implemented');
|
|
42
|
-
|
|
43
|
-
case 'custom':
|
|
44
|
-
throw new Error('Custom provider requires implementation');
|
|
45
|
-
|
|
46
|
-
default:
|
|
47
|
-
throw new Error(`Unknown provider: ${config.provider}`);
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
/**
|
|
52
|
-
* Send a chat message
|
|
53
|
-
*/
|
|
54
|
-
async chat(message: string, options?: Partial<ChatCompletionRequest>): Promise<string> {
|
|
55
|
-
// Add user message to history
|
|
56
|
-
this.conversationHistory.push({
|
|
57
|
-
role: 'user',
|
|
58
|
-
content: message,
|
|
59
|
-
});
|
|
60
|
-
|
|
61
|
-
const request: ChatCompletionRequest = {
|
|
62
|
-
model: options?.model || 'gpt-4-turbo-preview',
|
|
63
|
-
messages: this.conversationHistory,
|
|
64
|
-
...options,
|
|
65
|
-
};
|
|
66
|
-
|
|
67
|
-
const response = await this.provider.chat(request);
|
|
68
|
-
const assistantMessage = response.choices[0]?.message?.content || '';
|
|
69
|
-
|
|
70
|
-
// Add assistant response to history
|
|
71
|
-
this.conversationHistory.push({
|
|
72
|
-
role: 'assistant',
|
|
73
|
-
content: assistantMessage,
|
|
74
|
-
});
|
|
75
|
-
|
|
76
|
-
return assistantMessage;
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
/**
|
|
80
|
-
* Send a chat completion request with full control
|
|
81
|
-
*/
|
|
82
|
-
async chatCompletion(request: ChatCompletionRequest): Promise<ChatCompletionResponse> {
|
|
83
|
-
return this.provider.chat(request);
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
/**
|
|
87
|
-
* Set system prompt
|
|
88
|
-
*/
|
|
89
|
-
setSystemPrompt(prompt: string): void {
|
|
90
|
-
// Remove existing system message if any
|
|
91
|
-
this.conversationHistory = this.conversationHistory.filter(m => m.role !== 'system');
|
|
92
|
-
|
|
93
|
-
// Add new system message at the beginning
|
|
94
|
-
this.conversationHistory.unshift({
|
|
95
|
-
role: 'system',
|
|
96
|
-
content: prompt,
|
|
97
|
-
});
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
/**
|
|
101
|
-
* Clear conversation history (optionally keep system prompt)
|
|
102
|
-
*/
|
|
103
|
-
clearHistory(keepSystemPrompt: boolean = true): void {
|
|
104
|
-
if (keepSystemPrompt) {
|
|
105
|
-
this.conversationHistory = this.conversationHistory.filter(m => m.role === 'system');
|
|
106
|
-
} else {
|
|
107
|
-
this.conversationHistory = [];
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
/**
|
|
112
|
-
* Get conversation history
|
|
113
|
-
*/
|
|
114
|
-
getHistory(): Message[] {
|
|
115
|
-
return [...this.conversationHistory];
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
/**
|
|
119
|
-
* Get message count (excluding system messages)
|
|
120
|
-
*/
|
|
121
|
-
getMessageCount(): number {
|
|
122
|
-
return this.conversationHistory.filter(m => m.role !== 'system').length;
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
/**
|
|
126
|
-
* Check if provider is configured
|
|
127
|
-
*/
|
|
128
|
-
isConfigured(): boolean {
|
|
129
|
-
return this.provider.isConfigured();
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
/**
|
|
133
|
-
* Get provider name
|
|
134
|
-
*/
|
|
135
|
-
getProviderName(): string {
|
|
136
|
-
return this.provider.name;
|
|
137
|
-
}
|
|
138
|
-
}
|
package/src/providers/openai.ts
DELETED
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* OpenAI Provider Implementation
|
|
3
|
-
*/
|
|
4
|
-
|
|
5
|
-
import OpenAI from 'openai';
|
|
6
|
-
import type {
|
|
7
|
-
InferenceProvider,
|
|
8
|
-
ChatCompletionRequest,
|
|
9
|
-
ChatCompletionResponse,
|
|
10
|
-
} from '../types';
|
|
11
|
-
|
|
12
|
-
export interface OpenAIConfig {
|
|
13
|
-
apiKey: string;
|
|
14
|
-
baseURL?: string;
|
|
15
|
-
timeout?: number;
|
|
16
|
-
defaultModel?: string;
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
export class OpenAIProvider implements InferenceProvider {
|
|
20
|
-
public readonly name = 'openai';
|
|
21
|
-
private client: OpenAI;
|
|
22
|
-
private config: OpenAIConfig;
|
|
23
|
-
|
|
24
|
-
constructor(config: OpenAIConfig) {
|
|
25
|
-
this.config = config;
|
|
26
|
-
this.client = new OpenAI({
|
|
27
|
-
apiKey: config.apiKey,
|
|
28
|
-
baseURL: config.baseURL,
|
|
29
|
-
timeout: config.timeout || 60000,
|
|
30
|
-
});
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
async chat(request: ChatCompletionRequest): Promise<ChatCompletionResponse> {
|
|
34
|
-
try {
|
|
35
|
-
// Forward ALL parameters from request to OpenAI
|
|
36
|
-
// This includes tools, tool_choice, and any other OpenAI-specific params
|
|
37
|
-
const { model, messages, ...extraParams } = request;
|
|
38
|
-
|
|
39
|
-
const completion = await this.client.chat.completions.create({
|
|
40
|
-
model: model || this.config.defaultModel || 'gpt-4-turbo-preview',
|
|
41
|
-
messages: messages as any,
|
|
42
|
-
...extraParams, // Forward all extra parameters (tools, tool_choice, etc.)
|
|
43
|
-
});
|
|
44
|
-
|
|
45
|
-
// Convert OpenAI response to our standard format
|
|
46
|
-
return completion as ChatCompletionResponse;
|
|
47
|
-
} catch (error) {
|
|
48
|
-
if (error instanceof OpenAI.APIError) {
|
|
49
|
-
throw new Error(`OpenAI API Error: ${error.message}`);
|
|
50
|
-
}
|
|
51
|
-
throw error;
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
isConfigured(): boolean {
|
|
56
|
-
return !!this.config.apiKey;
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
/**
|
|
60
|
-
* Update the base URL (useful for pointing to different endpoints)
|
|
61
|
-
*/
|
|
62
|
-
setBaseURL(baseURL: string): void {
|
|
63
|
-
this.client = new OpenAI({
|
|
64
|
-
apiKey: this.config.apiKey,
|
|
65
|
-
baseURL,
|
|
66
|
-
timeout: this.config.timeout,
|
|
67
|
-
});
|
|
68
|
-
}
|
|
69
|
-
}
|
package/src/types.ts
DELETED
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Common types for LLM inference
|
|
3
|
-
*/
|
|
4
|
-
|
|
5
|
-
export interface Message {
|
|
6
|
-
role: 'system' | 'user' | 'assistant' | 'tool';
|
|
7
|
-
content: string | null;
|
|
8
|
-
tool_calls?: any[];
|
|
9
|
-
tool_call_id?: string;
|
|
10
|
-
name?: string;
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
export interface ChatCompletionRequest {
|
|
14
|
-
model: string;
|
|
15
|
-
messages: Message[];
|
|
16
|
-
temperature?: number;
|
|
17
|
-
max_tokens?: number;
|
|
18
|
-
stream?: boolean;
|
|
19
|
-
[key: string]: any;
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
export interface ChatCompletionResponse {
|
|
23
|
-
id: string;
|
|
24
|
-
object: string;
|
|
25
|
-
created: number;
|
|
26
|
-
model: string;
|
|
27
|
-
choices: Array<{
|
|
28
|
-
index: number;
|
|
29
|
-
message: {
|
|
30
|
-
role: string;
|
|
31
|
-
content: string | null;
|
|
32
|
-
tool_calls?: any[];
|
|
33
|
-
};
|
|
34
|
-
finish_reason: string;
|
|
35
|
-
}>;
|
|
36
|
-
usage?: {
|
|
37
|
-
prompt_tokens: number;
|
|
38
|
-
completion_tokens: number;
|
|
39
|
-
total_tokens: number;
|
|
40
|
-
};
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
export interface InferenceProvider {
|
|
44
|
-
name: string;
|
|
45
|
-
chat(request: ChatCompletionRequest): Promise<ChatCompletionResponse>;
|
|
46
|
-
isConfigured(): boolean;
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
export interface InferenceConfig {
|
|
50
|
-
provider: 'openai' | 'anthropic' | 'ollama' | 'custom';
|
|
51
|
-
apiKey?: string;
|
|
52
|
-
baseURL?: string;
|
|
53
|
-
defaultModel?: string;
|
|
54
|
-
timeout?: number;
|
|
55
|
-
}
|