@use-solace/openllm 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +185 -0
- package/dist/client.d.ts +21 -0
- package/dist/client.d.ts.map +1 -0
- package/dist/client.js +189 -0
- package/dist/client.js.map +1 -0
- package/dist/elysia.d.ts +31 -0
- package/dist/elysia.d.ts.map +1 -0
- package/dist/elysia.js +66 -0
- package/dist/elysia.js.map +1 -0
- package/dist/index.d.ts +11 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +15 -0
- package/dist/index.js.map +1 -0
- package/dist/registry.d.ts +20 -0
- package/dist/registry.d.ts.map +1 -0
- package/dist/registry.js +99 -0
- package/dist/registry.js.map +1 -0
- package/dist/types.d.ts +125 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +29 -0
- package/dist/types.js.map +1 -0
- package/example.ts +62 -0
- package/package.json +46 -0
- package/src/client.ts +244 -0
- package/src/elysia.ts +99 -0
- package/src/index.ts +55 -0
- package/src/registry.ts +123 -0
- package/src/types.ts +164 -0
- package/tsconfig.json +25 -0
package/dist/registry.js
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
export class ModelRegistryImpl {
|
|
2
|
+
entries = new Map();
|
|
3
|
+
constructor(config) {
|
|
4
|
+
const entries = config.entries ?? {};
|
|
5
|
+
for (const [id, entry] of Object.entries(entries)) {
|
|
6
|
+
this.registerEntry(id, entry);
|
|
7
|
+
}
|
|
8
|
+
}
|
|
9
|
+
registerEntry(id, entry) {
|
|
10
|
+
const model = {
|
|
11
|
+
id,
|
|
12
|
+
name: entry.id,
|
|
13
|
+
inference: entry.inference,
|
|
14
|
+
context: entry.context,
|
|
15
|
+
quant: entry.quant,
|
|
16
|
+
capabilities: entry.capabilities,
|
|
17
|
+
latency: entry.latency,
|
|
18
|
+
size_bytes: 4_000_000_000,
|
|
19
|
+
loaded: false,
|
|
20
|
+
loaded_at: undefined,
|
|
21
|
+
};
|
|
22
|
+
this.entries.set(id, model);
|
|
23
|
+
return model;
|
|
24
|
+
}
|
|
25
|
+
list() {
|
|
26
|
+
return Array.from(this.entries.values());
|
|
27
|
+
}
|
|
28
|
+
get(id) {
|
|
29
|
+
return this.entries.get(id);
|
|
30
|
+
}
|
|
31
|
+
find(options = {}) {
|
|
32
|
+
const results = this.list().filter((model) => {
|
|
33
|
+
if (options.capability && !model.capabilities.includes(options.capability)) {
|
|
34
|
+
return false;
|
|
35
|
+
}
|
|
36
|
+
if (options.latency && model.latency !== options.latency) {
|
|
37
|
+
return false;
|
|
38
|
+
}
|
|
39
|
+
if (options.inference && model.inference !== options.inference) {
|
|
40
|
+
return false;
|
|
41
|
+
}
|
|
42
|
+
if (options.minContext && model.context < options.minContext) {
|
|
43
|
+
return false;
|
|
44
|
+
}
|
|
45
|
+
if (options.loaded !== undefined && model.loaded !== options.loaded) {
|
|
46
|
+
return false;
|
|
47
|
+
}
|
|
48
|
+
return true;
|
|
49
|
+
});
|
|
50
|
+
return results;
|
|
51
|
+
}
|
|
52
|
+
findOne(options = {}) {
|
|
53
|
+
return this.find(options)[0];
|
|
54
|
+
}
|
|
55
|
+
has(id) {
|
|
56
|
+
return this.entries.has(id);
|
|
57
|
+
}
|
|
58
|
+
count() {
|
|
59
|
+
return this.entries.size;
|
|
60
|
+
}
|
|
61
|
+
add(id, entry) {
|
|
62
|
+
if (this.entries.has(id)) {
|
|
63
|
+
throw new Error(`Model with id '${id}' already exists`);
|
|
64
|
+
}
|
|
65
|
+
return this.registerEntry(id, entry);
|
|
66
|
+
}
|
|
67
|
+
update(id, updates) {
|
|
68
|
+
const existing = this.entries.get(id);
|
|
69
|
+
if (!existing) {
|
|
70
|
+
throw new Error(`Model with id '${id}' not found`);
|
|
71
|
+
}
|
|
72
|
+
const updated = {
|
|
73
|
+
...existing,
|
|
74
|
+
...updates,
|
|
75
|
+
id,
|
|
76
|
+
};
|
|
77
|
+
this.entries.set(id, updated);
|
|
78
|
+
return updated;
|
|
79
|
+
}
|
|
80
|
+
remove(id) {
|
|
81
|
+
return this.entries.delete(id);
|
|
82
|
+
}
|
|
83
|
+
clear() {
|
|
84
|
+
this.entries.clear();
|
|
85
|
+
}
|
|
86
|
+
toObject() {
|
|
87
|
+
return Object.fromEntries(this.entries);
|
|
88
|
+
}
|
|
89
|
+
fromObject(obj) {
|
|
90
|
+
this.entries.clear();
|
|
91
|
+
for (const [id, entry] of Object.entries(obj)) {
|
|
92
|
+
this.entries.set(id, entry);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
export function ModelRegistry(config) {
|
|
97
|
+
return new ModelRegistryImpl(config);
|
|
98
|
+
}
|
|
99
|
+
//# sourceMappingURL=registry.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"registry.js","sourceRoot":"","sources":["../src/registry.ts"],"names":[],"mappings":"AAOA,MAAM,OAAO,iBAAiB;IACpB,OAAO,GAAoC,IAAI,GAAG,EAAE,CAAC;IAE7D,YAAY,MAA2B;QACrC,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC;QACrC,KAAK,MAAM,CAAC,EAAE,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;YAClD,IAAI,CAAC,aAAa,CAAC,EAAE,EAAE,KAAK,CAAC,CAAC;QAChC,CAAC;IACH,CAAC;IAEO,aAAa,CAAC,EAAU,EAAE,KAAyB;QACzD,MAAM,KAAK,GAAuB;YAChC,EAAE;YACF,IAAI,EAAE,KAAK,CAAC,EAAE;YACd,SAAS,EAAE,KAAK,CAAC,SAAS;YAC1B,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,YAAY,EAAE,KAAK,CAAC,YAAY;YAChC,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,UAAU,EAAE,aAAa;YACzB,MAAM,EAAE,KAAK;YACb,SAAS,EAAE,SAAS;SACrB,CAAC;QACF,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,EAAE,KAAK,CAAC,CAAC;QAC5B,OAAO,KAAK,CAAC;IACf,CAAC;IAED,IAAI;QACF,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IAC3C,CAAC;IAED,GAAG,CAAC,EAAU;QACZ,OAAO,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAC9B,CAAC;IAED,IAAI,CAAC,UAA4B,EAAE;QACjC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE;YAC3C,IAAI,OAAO,CAAC,UAAU,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,QAAQ,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;gBAC3E,OAAO,KAAK,CAAC;YACf,CAAC;YACD,IAAI,OAAO,CAAC,OAAO,IAAI,KAAK,CAAC,OAAO,KAAK,OAAO,CAAC,OAAO,EAAE,CAAC;gBACzD,OAAO,KAAK,CAAC;YACf,CAAC;YACD,IAAI,OAAO,CAAC,SAAS,IAAI,KAAK,CAAC,SAAS,KAAK,OAAO,CAAC,SAAS,EAAE,CAAC;gBAC/D,OAAO,KAAK,CAAC;YACf,CAAC;YACD,IAAI,OAAO,CAAC,UAAU,IAAI,KAAK,CAAC,OAAO,GAAG,OAAO,CAAC,UAAU,EAAE,CAAC;gBAC7D,OAAO,KAAK,CAAC;YACf,CAAC;YACD,IAAI,OAAO,CAAC,MAAM,KAAK,SAAS,IAAI,KAAK,CAAC,MAAM,KAAK,OAAO,CAAC,MAAM,EAAE,CAAC;gBACpE,OAAO,KAAK,CAAC;YACf,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC,CAAC,CAAC;QACH,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,OAAO,CAAC,UAA4B,EAAE;QACpC,OAAO,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;IAC/B,CAAC;IAED,GAAG,CAAC,EAAU;QACZ,OAAO,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAC9B,CAAC;IAED,KAAK;QACH,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC;IAC3B,CAAC;IAED,GAAG,CAAC,EAAU,EAAE,KAAyB;QACvC,IAAI,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;YACzB,MAAM,IAAI,KAAK,CAAC,kBAAkB,EAAE,kBAAkB,CAAC,CAAC;QAC1D,CAAC;QACD,OAAO,IAAI,CAAC,aAAa,CAAC,EAAE,EAAE,KAAK,CAAC,CAAC;IACvC,CAAC;IAED,MAAM,CAAC,EAAU,EAAE,OAAoC;QACrD,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACtC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,MAAM,IAAI,KAAK,CAAC,kBAAkB,EAAE,aAAa,CAAC,CAAC;QACrD,CAAC;QAED,MAAM,OAAO,GAAuB;YAClC,GAAG,QAAQ;YACX,GAAG,OAAO;YACV,EAAE;SACH,CAAC;QACF,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,EAAE,OAAO,CAAC,CAAC;QAC9B,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,MAAM,CAAC,EAAU;QACf,OAAO,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IACjC,CAAC;IAED,KAAK;QACH,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;IACvB,CAAC;IAED,QAAQ;QACN,OAAO,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC1C,CAAC;IAED,UAAU,CAAC,GAAuC;QAChD,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;QACrB,KAAK,MAAM,CAAC,EAAE,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;YAC9C,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,EAAE,KAAK,CAAC,CAAC;QAC9B,CAAC;IACH,CAAC;CACF;AAED,MAAM,UAAU,aAAa,CAC3B,MAA2B;IAE3B,OAAO,IAAI,iBAAiB,CAAC,MAAM,CAAC,CAAC;AACvC,CAAC"}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
export type InferenceBackend = "ollama" | "llama" | "huggingface" | "openai";
|
|
2
|
+
export type ModelCapability = "chat" | "vision" | "embedding" | "completion";
|
|
3
|
+
export type LatencyProfile = "extreme" | "fast" | "slow";
|
|
4
|
+
export interface ModelRegistryEntry {
|
|
5
|
+
id: string;
|
|
6
|
+
name: string;
|
|
7
|
+
inference: InferenceBackend;
|
|
8
|
+
context: number;
|
|
9
|
+
quant?: string;
|
|
10
|
+
capabilities: ModelCapability[];
|
|
11
|
+
latency?: LatencyProfile;
|
|
12
|
+
size_bytes: number;
|
|
13
|
+
loaded: boolean;
|
|
14
|
+
loaded_at?: string;
|
|
15
|
+
}
|
|
16
|
+
export interface RegistryEntryInput {
|
|
17
|
+
id: string;
|
|
18
|
+
inference: InferenceBackend;
|
|
19
|
+
context: number;
|
|
20
|
+
quant?: string;
|
|
21
|
+
capabilities: ModelCapability[];
|
|
22
|
+
latency?: LatencyProfile;
|
|
23
|
+
}
|
|
24
|
+
export interface ModelRegistryConfig {
|
|
25
|
+
entries: Record<string, RegistryEntryInput>;
|
|
26
|
+
}
|
|
27
|
+
export interface HealthResponse {
|
|
28
|
+
status: string;
|
|
29
|
+
timestamp: string;
|
|
30
|
+
models_loaded: number;
|
|
31
|
+
}
|
|
32
|
+
export interface ModelListResponse {
|
|
33
|
+
models: ModelRegistryEntry[];
|
|
34
|
+
}
|
|
35
|
+
export interface RegisterModelRequest {
|
|
36
|
+
id: string;
|
|
37
|
+
name: string;
|
|
38
|
+
inference: InferenceBackend;
|
|
39
|
+
context: number;
|
|
40
|
+
quant?: string;
|
|
41
|
+
capabilities: ModelCapability[];
|
|
42
|
+
latency?: LatencyProfile;
|
|
43
|
+
size_bytes?: number;
|
|
44
|
+
}
|
|
45
|
+
export interface RegisterModelResponse {
|
|
46
|
+
success: boolean;
|
|
47
|
+
model: ModelRegistryEntry;
|
|
48
|
+
message: string;
|
|
49
|
+
}
|
|
50
|
+
export interface LoadModelRequest {
|
|
51
|
+
model_id: string;
|
|
52
|
+
}
|
|
53
|
+
export interface LoadModelResponse {
|
|
54
|
+
success: boolean;
|
|
55
|
+
model_id: string;
|
|
56
|
+
message: string;
|
|
57
|
+
}
|
|
58
|
+
export interface UnloadModelResponse {
|
|
59
|
+
success: boolean;
|
|
60
|
+
model_id: string;
|
|
61
|
+
message: string;
|
|
62
|
+
}
|
|
63
|
+
export interface InferenceRequest {
|
|
64
|
+
model_id: string;
|
|
65
|
+
prompt: string;
|
|
66
|
+
max_tokens?: number;
|
|
67
|
+
temperature?: number;
|
|
68
|
+
}
|
|
69
|
+
export interface InferenceResponse {
|
|
70
|
+
model_id: string;
|
|
71
|
+
text: string;
|
|
72
|
+
tokens_generated: number;
|
|
73
|
+
finish_reason: string;
|
|
74
|
+
}
|
|
75
|
+
export interface StreamToken {
|
|
76
|
+
token: string;
|
|
77
|
+
token_id: number;
|
|
78
|
+
complete: boolean;
|
|
79
|
+
}
|
|
80
|
+
export type StreamCallback = (token: StreamToken) => void;
|
|
81
|
+
export type StreamCompleteCallback = (response: InferenceResponse) => void;
|
|
82
|
+
export type StreamErrorCallback = (error: Error) => void;
|
|
83
|
+
export interface StreamOptions {
|
|
84
|
+
onToken: StreamCallback;
|
|
85
|
+
onComplete?: StreamCompleteCallback;
|
|
86
|
+
onError?: StreamErrorCallback;
|
|
87
|
+
}
|
|
88
|
+
export interface OpenLLMConfig {
|
|
89
|
+
engine?: string | number;
|
|
90
|
+
timeout?: number;
|
|
91
|
+
}
|
|
92
|
+
export interface FindModelOptions {
|
|
93
|
+
capability?: ModelCapability;
|
|
94
|
+
latency?: LatencyProfile;
|
|
95
|
+
inference?: InferenceBackend;
|
|
96
|
+
minContext?: number;
|
|
97
|
+
loaded?: boolean;
|
|
98
|
+
}
|
|
99
|
+
export interface APIConfig {
|
|
100
|
+
modelrouter?: boolean;
|
|
101
|
+
registry?: unknown | string;
|
|
102
|
+
engine?: string | number;
|
|
103
|
+
prefix?: string;
|
|
104
|
+
}
|
|
105
|
+
export declare class OpenLLMError extends Error {
|
|
106
|
+
code?: string | undefined;
|
|
107
|
+
statusCode?: number | undefined;
|
|
108
|
+
constructor(message: string, code?: string | undefined, statusCode?: number | undefined);
|
|
109
|
+
}
|
|
110
|
+
export declare class ModelNotFoundError extends OpenLLMError {
|
|
111
|
+
constructor(modelId: string);
|
|
112
|
+
}
|
|
113
|
+
export declare class ModelNotLoadedError extends OpenLLMError {
|
|
114
|
+
constructor(modelId: string);
|
|
115
|
+
}
|
|
116
|
+
export declare class InferenceError extends OpenLLMError {
|
|
117
|
+
constructor(message: string);
|
|
118
|
+
}
|
|
119
|
+
export interface ModelRegistryInstance {
|
|
120
|
+
list(): ReturnType<typeof import("./registry").ModelRegistryImpl.prototype.list>;
|
|
121
|
+
get(id: string): ReturnType<typeof import("./registry").ModelRegistryImpl.prototype.get>;
|
|
122
|
+
findOne(options: FindModelOptions): ReturnType<typeof import("./registry").ModelRegistryImpl.prototype.findOne>;
|
|
123
|
+
}
|
|
124
|
+
export type ModelRegistryImpl = ModelRegistryInstance;
|
|
125
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,gBAAgB,GAAG,QAAQ,GAAG,OAAO,GAAG,aAAa,GAAG,QAAQ,CAAC;AAE7E,MAAM,MAAM,eAAe,GAAG,MAAM,GAAG,QAAQ,GAAG,WAAW,GAAG,YAAY,CAAC;AAE7E,MAAM,MAAM,cAAc,GAAG,SAAS,GAAG,MAAM,GAAG,MAAM,CAAC;AAEzD,MAAM,WAAW,kBAAkB;IACjC,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,gBAAgB,CAAC;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,eAAe,EAAE,CAAC;IAChC,OAAO,CAAC,EAAE,cAAc,CAAC;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,OAAO,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,kBAAkB;IACjC,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,gBAAgB,CAAC;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,eAAe,EAAE,CAAC;IAChC,OAAO,CAAC,EAAE,cAAc,CAAC;CAC1B;AAED,MAAM,WAAW,mBAAmB;IAClC,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,kBAAkB,CAAC,CAAC;CAC7C;AAED,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,kBAAkB,EAAE,CAAC;CAC9B;AAED,MAAM,WAAW,oBAAoB;IACnC,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,gBAAgB,CAAC;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,eAAe,EAAE,CAAC;IAChC,OAAO,CAAC,EAAE,cAAc,CAAC;IACzB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,qBAAqB;IACpC,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,kBAAkB,CAAC;IAC1B,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,mBAAmB;IAClC,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,iBAAiB;IAChC,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,gBAAgB,EAAE,MAAM,CAAC;IACzB,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,MAAM,cAAc,GAAG,CAAC,KAAK,EAAE,WAAW,KAAK,IAAI,CAAC;AAC1D,MAAM,MAAM,sBAAsB,GAAG,CAAC,QAAQ,EAAE,iBAAiB,KAAK,IAAI,CAAC;AAC3E,MAAM,MAAM,mBAAmB,GAAG,CAAC,KAAK,EAAE,KAAK,KAAK,IAAI,CAAC;AAEzD,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,cAAc,CAAC;IACxB,UAAU,CAAC,EAAE,sBAAsB,CAAC;IACpC,OAAO,CAAC,EAAE,mBAAmB,CAAC;CAC/B;AAED,MAAM,WAAW,aAAa;IAC5B,MAAM,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;IACzB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,gBAAgB;IAC/B,UAAU,CAAC,EAAE,eAAe,CAAC;IAC7B,OAAO,CAAC,EAAE,cAAc,CAAC;IACzB,SAAS,CAAC,EAAE,gBAAgB,CAAC;IAC7B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED,MAAM,WAAW,SAAS;IACxB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IAC5B,MAAM,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,qBAAa,YAAa,SAAQ,KAAK;IAG5B,IAAI,CAAC,EAAE,MAAM;IACb,UAAU,CAAC,EAAE,MAAM;gBAF1B,OAAO,EAAE,MAAM,EACR,IAAI,CAAC,EAAE,MAAM,YAAA,EACb,UAAU,CAAC,EAAE,MAAM,YAAA;CAK7B;AAED,qBAAa,kBAAmB,SAAQ,YAAY;gBACtC,OAAO,EAAE,MAAM;CAI5B;AAED,qBAAa,mBAAoB,SAAQ,YAAY;gBACvC,OAAO,EAAE,MAAM;CAI5B;AAED,qBAAa,cAAe,SAAQ,YAAY;gBAClC,OAAO,EAAE,MAAM;CAI5B;AAED,MAAM,WAAW,qBAAqB;IACpC,IAAI,IAAI,UAAU,CAAC,cAAc,YAAY,EAAE,iBAAiB,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IACjF,GAAG,CAAC,EAAE,EAAE,MAAM,GAAG,UAAU,CAAC,cAAc,YAAY,EAAE,iBAAiB,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;IACzF,OAAO,CAAC,OAAO,EAAE,gBAAgB,GAAG,UAAU,CAAC,cAAc,YAAY,EAAE,iBAAiB,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;CACjH;AAED,MAAM,MAAM,iBAAiB,GAAG,qBAAqB,CAAC"}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
export class OpenLLMError extends Error {
|
|
2
|
+
code;
|
|
3
|
+
statusCode;
|
|
4
|
+
constructor(message, code, statusCode) {
|
|
5
|
+
super(message);
|
|
6
|
+
this.code = code;
|
|
7
|
+
this.statusCode = statusCode;
|
|
8
|
+
this.name = "OpenLLMError";
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
export class ModelNotFoundError extends OpenLLMError {
|
|
12
|
+
constructor(modelId) {
|
|
13
|
+
super(`Model '${modelId}' not found`, "MODEL_NOT_FOUND", 404);
|
|
14
|
+
this.name = "ModelNotFoundError";
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
export class ModelNotLoadedError extends OpenLLMError {
|
|
18
|
+
constructor(modelId) {
|
|
19
|
+
super(`Model '${modelId}' is not loaded`, "MODEL_NOT_LOADED", 412);
|
|
20
|
+
this.name = "ModelNotLoadedError";
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
export class InferenceError extends OpenLLMError {
|
|
24
|
+
constructor(message) {
|
|
25
|
+
super(message, "INFERENCE_ERROR", 502);
|
|
26
|
+
this.name = "InferenceError";
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
//# sourceMappingURL=types.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AA6HA,MAAM,OAAO,YAAa,SAAQ,KAAK;IAG5B;IACA;IAHT,YACE,OAAe,EACR,IAAa,EACb,UAAmB;QAE1B,KAAK,CAAC,OAAO,CAAC,CAAC;QAHR,SAAI,GAAJ,IAAI,CAAS;QACb,eAAU,GAAV,UAAU,CAAS;QAG1B,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC;IAC7B,CAAC;CACF;AAED,MAAM,OAAO,kBAAmB,SAAQ,YAAY;IAClD,YAAY,OAAe;QACzB,KAAK,CAAC,UAAU,OAAO,aAAa,EAAE,iBAAiB,EAAE,GAAG,CAAC,CAAC;QAC9D,IAAI,CAAC,IAAI,GAAG,oBAAoB,CAAC;IACnC,CAAC;CACF;AAED,MAAM,OAAO,mBAAoB,SAAQ,YAAY;IACnD,YAAY,OAAe;QACzB,KAAK,CAAC,UAAU,OAAO,iBAAiB,EAAE,kBAAkB,EAAE,GAAG,CAAC,CAAC;QACnE,IAAI,CAAC,IAAI,GAAG,qBAAqB,CAAC;IACpC,CAAC;CACF;AAED,MAAM,OAAO,cAAe,SAAQ,YAAY;IAC9C,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,EAAE,iBAAiB,EAAE,GAAG,CAAC,CAAC;QACvC,IAAI,CAAC,IAAI,GAAG,gBAAgB,CAAC;IAC/B,CAAC;CACF"}
|
package/example.ts
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { createOpenLLMClient } from "@use-solace/openllm";
|
|
2
|
+
|
|
3
|
+
async function main() {
|
|
4
|
+
const client = createOpenLLMClient({ engine: 8080 });
|
|
5
|
+
|
|
6
|
+
try {
|
|
7
|
+
console.log("1. Registering mistral model...");
|
|
8
|
+
const registerResponse = await client.registerModel({
|
|
9
|
+
id: "mistral",
|
|
10
|
+
name: "Mistral 7B",
|
|
11
|
+
inference: "ollama",
|
|
12
|
+
context: 8192,
|
|
13
|
+
quant: "Q4_K_M",
|
|
14
|
+
capabilities: ["chat", "completion"],
|
|
15
|
+
latency: "fast",
|
|
16
|
+
});
|
|
17
|
+
console.log(" Registered:", registerResponse.model.name);
|
|
18
|
+
|
|
19
|
+
console.log("\n2. Loading mistral model...");
|
|
20
|
+
const loadResponse = await client.loadModel({ model_id: "mistral" });
|
|
21
|
+
console.log(" Load response:", loadResponse.message);
|
|
22
|
+
|
|
23
|
+
console.log("\n3. Running streamed inference...");
|
|
24
|
+
console.log(" Prompt: 'What is the meaning of life?'");
|
|
25
|
+
console.log("\n Response:");
|
|
26
|
+
|
|
27
|
+
let fullResponse = "";
|
|
28
|
+
|
|
29
|
+
await client.inferenceStream(
|
|
30
|
+
{
|
|
31
|
+
model_id: "mistral",
|
|
32
|
+
prompt: "What is the meaning of life?",
|
|
33
|
+
max_tokens: 512,
|
|
34
|
+
temperature: 0.7,
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
onToken: (token) => {
|
|
38
|
+
process.stdout.write(token.token);
|
|
39
|
+
fullResponse += token.token;
|
|
40
|
+
},
|
|
41
|
+
onComplete: (response) => {
|
|
42
|
+
console.log("\n\n Stream completed!");
|
|
43
|
+
console.log(" Total tokens:", response.tokens_generated);
|
|
44
|
+
},
|
|
45
|
+
onError: (error) => {
|
|
46
|
+
console.error("\n Stream error:", error.message);
|
|
47
|
+
},
|
|
48
|
+
},
|
|
49
|
+
);
|
|
50
|
+
|
|
51
|
+
console.log("\n\n4. Unloading model...");
|
|
52
|
+
const unloadResponse = await client.unloadModel("mistral");
|
|
53
|
+
console.log(" Unload response:", unloadResponse.message);
|
|
54
|
+
|
|
55
|
+
} catch (error) {
|
|
56
|
+
if (error instanceof Error) {
|
|
57
|
+
console.error("Error:", error.message);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
main();
|
package/package.json
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@use-solace/openllm",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "OpenLLM model registry and API client with full TypeScript support",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./dist/index.js",
|
|
7
|
+
"types": "./dist/index.d.ts",
|
|
8
|
+
"scripts": {
|
|
9
|
+
"build": "tsc",
|
|
10
|
+
"dev": "tsc --watch",
|
|
11
|
+
"prepublishOnly": "npm run build"
|
|
12
|
+
},
|
|
13
|
+
"keywords": [
|
|
14
|
+
"openllm",
|
|
15
|
+
"ollama",
|
|
16
|
+
"llama.cpp",
|
|
17
|
+
"huggingface",
|
|
18
|
+
"inference",
|
|
19
|
+
"model-registry",
|
|
20
|
+
"elysia"
|
|
21
|
+
],
|
|
22
|
+
"author": "Solace Contributors",
|
|
23
|
+
"license": "MIT",
|
|
24
|
+
"peerDependencies": {
|
|
25
|
+
"elysia": "^1.0.0"
|
|
26
|
+
},
|
|
27
|
+
"peerDependenciesMeta": {
|
|
28
|
+
"elysia": {
|
|
29
|
+
"optional": true
|
|
30
|
+
}
|
|
31
|
+
},
|
|
32
|
+
"dependencies": {
|
|
33
|
+
"@elysiajs/bearer": "^1.0.0"
|
|
34
|
+
},
|
|
35
|
+
"devDependencies": {
|
|
36
|
+
"@types/node": "^20.0.0",
|
|
37
|
+
"typescript": "^5.0.0",
|
|
38
|
+
"elysia": "^1.0.0"
|
|
39
|
+
},
|
|
40
|
+
"publishConfig": {
|
|
41
|
+
"access": "public"
|
|
42
|
+
},
|
|
43
|
+
"engines": {
|
|
44
|
+
"node": ">=18.0.0"
|
|
45
|
+
}
|
|
46
|
+
}
|
package/src/client.ts
ADDED
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
HealthResponse,
|
|
3
|
+
InferenceRequest,
|
|
4
|
+
InferenceResponse,
|
|
5
|
+
LoadModelRequest,
|
|
6
|
+
LoadModelResponse,
|
|
7
|
+
ModelListResponse,
|
|
8
|
+
ModelNotFoundError,
|
|
9
|
+
ModelNotLoadedError,
|
|
10
|
+
OpenLLMConfig,
|
|
11
|
+
OpenLLMError,
|
|
12
|
+
RegisterModelRequest,
|
|
13
|
+
RegisterModelResponse,
|
|
14
|
+
StreamOptions,
|
|
15
|
+
StreamToken,
|
|
16
|
+
UnloadModelResponse,
|
|
17
|
+
} from "./types.js";
|
|
18
|
+
|
|
19
|
+
export class OpenLLMClient {
|
|
20
|
+
private baseUrl: string;
|
|
21
|
+
private timeout: number;
|
|
22
|
+
|
|
23
|
+
constructor(config: OpenLLMConfig = {}) {
|
|
24
|
+
this.baseUrl = `http://localhost:${config.engine ?? 8080}`;
|
|
25
|
+
this.timeout = config.timeout ?? 30000;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
private async request<T>(
|
|
29
|
+
endpoint: string,
|
|
30
|
+
options: RequestInit = {},
|
|
31
|
+
): Promise<T> {
|
|
32
|
+
const controller = new AbortController();
|
|
33
|
+
const timeoutId = setTimeout(() => controller.abort(), this.timeout);
|
|
34
|
+
|
|
35
|
+
try {
|
|
36
|
+
const response = await fetch(`${this.baseUrl}${endpoint}`, {
|
|
37
|
+
...options,
|
|
38
|
+
signal: controller.signal,
|
|
39
|
+
headers: {
|
|
40
|
+
"Content-Type": "application/json",
|
|
41
|
+
...options.headers,
|
|
42
|
+
},
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
clearTimeout(timeoutId);
|
|
46
|
+
|
|
47
|
+
if (!response.ok) {
|
|
48
|
+
const errorText = await response.text();
|
|
49
|
+
throw this.createError(response.status, errorText);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
return (await response.json()) as T;
|
|
53
|
+
} catch (error) {
|
|
54
|
+
clearTimeout(timeoutId);
|
|
55
|
+
if (error instanceof Error) {
|
|
56
|
+
if (error.name === "AbortError") {
|
|
57
|
+
throw new Error(`Request timeout after ${this.timeout}ms`);
|
|
58
|
+
}
|
|
59
|
+
throw error;
|
|
60
|
+
}
|
|
61
|
+
throw error;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
private createError(status: number, message: string): OpenLLMError {
|
|
66
|
+
if (status === 404) {
|
|
67
|
+
const match = message.match(/Model '([^']+)'/);
|
|
68
|
+
if (match) {
|
|
69
|
+
const error = new Error(message) as ModelNotFoundError;
|
|
70
|
+
error.name = "ModelNotFoundError";
|
|
71
|
+
error.code = "MODEL_NOT_FOUND";
|
|
72
|
+
error.statusCode = status;
|
|
73
|
+
return error;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
if (status === 412) {
|
|
77
|
+
const match = message.match(/Model '([^']+)'/);
|
|
78
|
+
if (match) {
|
|
79
|
+
const error = new Error(message) as ModelNotLoadedError;
|
|
80
|
+
error.name = "ModelNotLoadedError";
|
|
81
|
+
error.code = "MODEL_NOT_LOADED";
|
|
82
|
+
error.statusCode = status;
|
|
83
|
+
return error;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const error = new Error(message) as OpenLLMError;
|
|
88
|
+
error.name = "OpenLLMError";
|
|
89
|
+
error.code = "API_ERROR";
|
|
90
|
+
error.statusCode = status;
|
|
91
|
+
return error;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
async health(): Promise<HealthResponse> {
|
|
95
|
+
return this.request<HealthResponse>("/health");
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
async listModels(): Promise<ModelListResponse> {
|
|
99
|
+
return this.request<ModelListResponse>("/v1/models");
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
async registerModel(
|
|
103
|
+
data: RegisterModelRequest,
|
|
104
|
+
): Promise<RegisterModelResponse> {
|
|
105
|
+
return this.request<RegisterModelResponse>("/v1/models/register", {
|
|
106
|
+
method: "POST",
|
|
107
|
+
body: JSON.stringify(data),
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
async loadModel(data: LoadModelRequest): Promise<LoadModelResponse> {
|
|
112
|
+
return this.request<LoadModelResponse>("/v1/models/load", {
|
|
113
|
+
method: "POST",
|
|
114
|
+
body: JSON.stringify(data),
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
async unloadModel(modelId: string): Promise<UnloadModelResponse> {
|
|
119
|
+
return this.request<UnloadModelResponse>(
|
|
120
|
+
`/v1/models/unload/${modelId}`,
|
|
121
|
+
{
|
|
122
|
+
method: "POST",
|
|
123
|
+
},
|
|
124
|
+
);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
async inference(data: InferenceRequest): Promise<InferenceResponse> {
|
|
128
|
+
return this.request<InferenceResponse>("/v1/inference", {
|
|
129
|
+
method: "POST",
|
|
130
|
+
body: JSON.stringify(data),
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
async inferenceStream(
|
|
135
|
+
data: InferenceRequest,
|
|
136
|
+
options: StreamOptions,
|
|
137
|
+
): Promise<void> {
|
|
138
|
+
const controller = new AbortController();
|
|
139
|
+
const timeoutId = setTimeout(() => controller.abort(), this.timeout);
|
|
140
|
+
|
|
141
|
+
try {
|
|
142
|
+
const response = await fetch(`${this.baseUrl}/v1/inference/stream`, {
|
|
143
|
+
method: "POST",
|
|
144
|
+
headers: {
|
|
145
|
+
"Content-Type": "application/json",
|
|
146
|
+
},
|
|
147
|
+
body: JSON.stringify(data),
|
|
148
|
+
signal: controller.signal,
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
clearTimeout(timeoutId);
|
|
152
|
+
|
|
153
|
+
if (!response.ok) {
|
|
154
|
+
const errorText = await response.text();
|
|
155
|
+
const error = this.createError(response.status, errorText);
|
|
156
|
+
options.onError?.(error);
|
|
157
|
+
throw error;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
const reader = response.body?.getReader();
|
|
161
|
+
if (!reader) {
|
|
162
|
+
const error = new Error("No response body");
|
|
163
|
+
options.onError?.(error);
|
|
164
|
+
throw error;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
const decoder = new TextDecoder();
|
|
168
|
+
let buffer = "";
|
|
169
|
+
let accumulatedText = "";
|
|
170
|
+
let tokenCount = 0;
|
|
171
|
+
const modelId = data.model_id;
|
|
172
|
+
|
|
173
|
+
while (true) {
|
|
174
|
+
const { done, value } = await reader.read();
|
|
175
|
+
if (done) break;
|
|
176
|
+
|
|
177
|
+
buffer += decoder.decode(value, { stream: true });
|
|
178
|
+
const lines = buffer.split("\n");
|
|
179
|
+
buffer = lines.pop() ?? "";
|
|
180
|
+
|
|
181
|
+
for (const line of lines) {
|
|
182
|
+
if (line.trim().startsWith("event: token")) {
|
|
183
|
+
continue;
|
|
184
|
+
}
|
|
185
|
+
if (line.trim().startsWith("data: ")) {
|
|
186
|
+
const data = line.trim().slice(6);
|
|
187
|
+
if (data) {
|
|
188
|
+
try {
|
|
189
|
+
const token = JSON.parse(data) as StreamToken;
|
|
190
|
+
accumulatedText += token.token;
|
|
191
|
+
tokenCount++;
|
|
192
|
+
options.onToken(token);
|
|
193
|
+
|
|
194
|
+
if (token.complete) {
|
|
195
|
+
options.onComplete?.({
|
|
196
|
+
model_id: modelId,
|
|
197
|
+
text: accumulatedText,
|
|
198
|
+
tokens_generated: tokenCount,
|
|
199
|
+
finish_reason: "stop",
|
|
200
|
+
});
|
|
201
|
+
}
|
|
202
|
+
} catch (e) {
|
|
203
|
+
console.error("Failed to parse SSE data:", data, e);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
} catch (error) {
|
|
210
|
+
clearTimeout(timeoutId);
|
|
211
|
+
if (error instanceof Error) {
|
|
212
|
+
if (error.name === "AbortError") {
|
|
213
|
+
const timeoutError = new Error(
|
|
214
|
+
`Stream timeout after ${this.timeout}ms`,
|
|
215
|
+
);
|
|
216
|
+
options.onError?.(timeoutError);
|
|
217
|
+
throw timeoutError;
|
|
218
|
+
}
|
|
219
|
+
options.onError?.(error);
|
|
220
|
+
throw error;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
setBaseUrl(baseUrl: string): void {
|
|
226
|
+
this.baseUrl = baseUrl;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
getBaseUrl(): string {
|
|
230
|
+
return this.baseUrl;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
setTimeout(timeout: number): void {
|
|
234
|
+
this.timeout = timeout;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
getTimeout(): number {
|
|
238
|
+
return this.timeout;
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
export function createOpenLLMClient(config?: OpenLLMConfig): OpenLLMClient {
|
|
243
|
+
return new OpenLLMClient(config);
|
|
244
|
+
}
|