inference-server 1.0.0-beta.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. package/README.md +216 -0
  2. package/dist/api/openai/enums.d.ts +4 -0
  3. package/dist/api/openai/enums.js +17 -0
  4. package/dist/api/openai/enums.js.map +1 -0
  5. package/dist/api/openai/handlers/chat.d.ts +3 -0
  6. package/dist/api/openai/handlers/chat.js +358 -0
  7. package/dist/api/openai/handlers/chat.js.map +1 -0
  8. package/dist/api/openai/handlers/completions.d.ts +3 -0
  9. package/dist/api/openai/handlers/completions.js +169 -0
  10. package/dist/api/openai/handlers/completions.js.map +1 -0
  11. package/dist/api/openai/handlers/embeddings.d.ts +3 -0
  12. package/dist/api/openai/handlers/embeddings.js +74 -0
  13. package/dist/api/openai/handlers/embeddings.js.map +1 -0
  14. package/dist/api/openai/handlers/images.d.ts +0 -0
  15. package/dist/api/openai/handlers/images.js +4 -0
  16. package/dist/api/openai/handlers/images.js.map +1 -0
  17. package/dist/api/openai/handlers/models.d.ts +3 -0
  18. package/dist/api/openai/handlers/models.js +23 -0
  19. package/dist/api/openai/handlers/models.js.map +1 -0
  20. package/dist/api/openai/handlers/transcription.d.ts +0 -0
  21. package/dist/api/openai/handlers/transcription.js +4 -0
  22. package/dist/api/openai/handlers/transcription.js.map +1 -0
  23. package/dist/api/openai/index.d.ts +7 -0
  24. package/dist/api/openai/index.js +14 -0
  25. package/dist/api/openai/index.js.map +1 -0
  26. package/dist/api/parseJSONRequestBody.d.ts +2 -0
  27. package/dist/api/parseJSONRequestBody.js +24 -0
  28. package/dist/api/parseJSONRequestBody.js.map +1 -0
  29. package/dist/api/v1/index.d.ts +2 -0
  30. package/dist/api/v1/index.js +29 -0
  31. package/dist/api/v1/index.js.map +1 -0
  32. package/dist/cli.d.ts +1 -0
  33. package/dist/cli.js +10 -0
  34. package/dist/cli.js.map +1 -0
  35. package/dist/engines/gpt4all/engine.d.ts +34 -0
  36. package/dist/engines/gpt4all/engine.js +357 -0
  37. package/dist/engines/gpt4all/engine.js.map +1 -0
  38. package/dist/engines/gpt4all/util.d.ts +3 -0
  39. package/dist/engines/gpt4all/util.js +29 -0
  40. package/dist/engines/gpt4all/util.js.map +1 -0
  41. package/dist/engines/index.d.ts +19 -0
  42. package/dist/engines/index.js +21 -0
  43. package/dist/engines/index.js.map +1 -0
  44. package/dist/engines/node-llama-cpp/engine.d.ts +49 -0
  45. package/dist/engines/node-llama-cpp/engine.js +666 -0
  46. package/dist/engines/node-llama-cpp/engine.js.map +1 -0
  47. package/dist/engines/node-llama-cpp/types.d.ts +13 -0
  48. package/dist/engines/node-llama-cpp/types.js +2 -0
  49. package/dist/engines/node-llama-cpp/types.js.map +1 -0
  50. package/dist/engines/node-llama-cpp/util.d.ts +15 -0
  51. package/dist/engines/node-llama-cpp/util.js +84 -0
  52. package/dist/engines/node-llama-cpp/util.js.map +1 -0
  53. package/dist/engines/node-llama-cpp/validateModelFile.d.ts +8 -0
  54. package/dist/engines/node-llama-cpp/validateModelFile.js +36 -0
  55. package/dist/engines/node-llama-cpp/validateModelFile.js.map +1 -0
  56. package/dist/engines/stable-diffusion-cpp/engine.d.ts +90 -0
  57. package/dist/engines/stable-diffusion-cpp/engine.js +294 -0
  58. package/dist/engines/stable-diffusion-cpp/engine.js.map +1 -0
  59. package/dist/engines/stable-diffusion-cpp/types.d.ts +3 -0
  60. package/dist/engines/stable-diffusion-cpp/types.js +2 -0
  61. package/dist/engines/stable-diffusion-cpp/types.js.map +1 -0
  62. package/dist/engines/stable-diffusion-cpp/util.d.ts +4 -0
  63. package/dist/engines/stable-diffusion-cpp/util.js +55 -0
  64. package/dist/engines/stable-diffusion-cpp/util.js.map +1 -0
  65. package/dist/engines/stable-diffusion-cpp/validateModelFiles.d.ts +19 -0
  66. package/dist/engines/stable-diffusion-cpp/validateModelFiles.js +91 -0
  67. package/dist/engines/stable-diffusion-cpp/validateModelFiles.js.map +1 -0
  68. package/dist/engines/transformers-js/engine.d.ts +37 -0
  69. package/dist/engines/transformers-js/engine.js +538 -0
  70. package/dist/engines/transformers-js/engine.js.map +1 -0
  71. package/dist/engines/transformers-js/types.d.ts +7 -0
  72. package/dist/engines/transformers-js/types.js +2 -0
  73. package/dist/engines/transformers-js/types.js.map +1 -0
  74. package/dist/engines/transformers-js/util.d.ts +7 -0
  75. package/dist/engines/transformers-js/util.js +36 -0
  76. package/dist/engines/transformers-js/util.js.map +1 -0
  77. package/dist/engines/transformers-js/validateModelFiles.d.ts +17 -0
  78. package/dist/engines/transformers-js/validateModelFiles.js +133 -0
  79. package/dist/engines/transformers-js/validateModelFiles.js.map +1 -0
  80. package/dist/experiments/ChatWithVision.d.ts +11 -0
  81. package/dist/experiments/ChatWithVision.js +91 -0
  82. package/dist/experiments/ChatWithVision.js.map +1 -0
  83. package/dist/experiments/StableDiffPromptGenerator.d.ts +0 -0
  84. package/dist/experiments/StableDiffPromptGenerator.js +4 -0
  85. package/dist/experiments/StableDiffPromptGenerator.js.map +1 -0
  86. package/dist/experiments/VoiceFunctionCall.d.ts +18 -0
  87. package/dist/experiments/VoiceFunctionCall.js +51 -0
  88. package/dist/experiments/VoiceFunctionCall.js.map +1 -0
  89. package/dist/http.d.ts +19 -0
  90. package/dist/http.js +54 -0
  91. package/dist/http.js.map +1 -0
  92. package/dist/index.d.ts +7 -0
  93. package/dist/index.js +8 -0
  94. package/dist/index.js.map +1 -0
  95. package/dist/instance.d.ts +88 -0
  96. package/dist/instance.js +594 -0
  97. package/dist/instance.js.map +1 -0
  98. package/dist/lib/acquireFileLock.d.ts +7 -0
  99. package/dist/lib/acquireFileLock.js +38 -0
  100. package/dist/lib/acquireFileLock.js.map +1 -0
  101. package/dist/lib/calculateContextIdentity.d.ts +7 -0
  102. package/dist/lib/calculateContextIdentity.js +39 -0
  103. package/dist/lib/calculateContextIdentity.js.map +1 -0
  104. package/dist/lib/calculateFileChecksum.d.ts +1 -0
  105. package/dist/lib/calculateFileChecksum.js +16 -0
  106. package/dist/lib/calculateFileChecksum.js.map +1 -0
  107. package/dist/lib/copyDirectory.d.ts +6 -0
  108. package/dist/lib/copyDirectory.js +27 -0
  109. package/dist/lib/copyDirectory.js.map +1 -0
  110. package/dist/lib/decodeAudio.d.ts +1 -0
  111. package/dist/lib/decodeAudio.js +26 -0
  112. package/dist/lib/decodeAudio.js.map +1 -0
  113. package/dist/lib/downloadModelFile.d.ts +10 -0
  114. package/dist/lib/downloadModelFile.js +58 -0
  115. package/dist/lib/downloadModelFile.js.map +1 -0
  116. package/dist/lib/flattenMessageTextContent.d.ts +2 -0
  117. package/dist/lib/flattenMessageTextContent.js +11 -0
  118. package/dist/lib/flattenMessageTextContent.js.map +1 -0
  119. package/dist/lib/getCacheDirPath.d.ts +12 -0
  120. package/dist/lib/getCacheDirPath.js +31 -0
  121. package/dist/lib/getCacheDirPath.js.map +1 -0
  122. package/dist/lib/loadImage.d.ts +12 -0
  123. package/dist/lib/loadImage.js +30 -0
  124. package/dist/lib/loadImage.js.map +1 -0
  125. package/dist/lib/logger.d.ts +12 -0
  126. package/dist/lib/logger.js +98 -0
  127. package/dist/lib/logger.js.map +1 -0
  128. package/dist/lib/math.d.ts +7 -0
  129. package/dist/lib/math.js +30 -0
  130. package/dist/lib/math.js.map +1 -0
  131. package/dist/lib/resolveModelFileLocation.d.ts +15 -0
  132. package/dist/lib/resolveModelFileLocation.js +41 -0
  133. package/dist/lib/resolveModelFileLocation.js.map +1 -0
  134. package/dist/lib/util.d.ts +7 -0
  135. package/dist/lib/util.js +61 -0
  136. package/dist/lib/util.js.map +1 -0
  137. package/dist/lib/validateModelFile.d.ts +9 -0
  138. package/dist/lib/validateModelFile.js +62 -0
  139. package/dist/lib/validateModelFile.js.map +1 -0
  140. package/dist/lib/validateModelOptions.d.ts +3 -0
  141. package/dist/lib/validateModelOptions.js +23 -0
  142. package/dist/lib/validateModelOptions.js.map +1 -0
  143. package/dist/pool.d.ts +61 -0
  144. package/dist/pool.js +512 -0
  145. package/dist/pool.js.map +1 -0
  146. package/dist/server.d.ts +59 -0
  147. package/dist/server.js +221 -0
  148. package/dist/server.js.map +1 -0
  149. package/dist/standalone.d.ts +1 -0
  150. package/dist/standalone.js +306 -0
  151. package/dist/standalone.js.map +1 -0
  152. package/dist/store.d.ts +60 -0
  153. package/dist/store.js +203 -0
  154. package/dist/store.js.map +1 -0
  155. package/dist/types/completions.d.ts +57 -0
  156. package/dist/types/completions.js +2 -0
  157. package/dist/types/completions.js.map +1 -0
  158. package/dist/types/index.d.ts +326 -0
  159. package/dist/types/index.js +2 -0
  160. package/dist/types/index.js.map +1 -0
  161. package/docs/engines.md +28 -0
  162. package/docs/gpu.md +72 -0
  163. package/docs/http-api.md +147 -0
  164. package/examples/all-options.js +108 -0
  165. package/examples/chat-cli.js +56 -0
  166. package/examples/chat-server.js +65 -0
  167. package/examples/concurrency.js +70 -0
  168. package/examples/express.js +70 -0
  169. package/examples/pool.js +91 -0
  170. package/package.json +113 -0
  171. package/src/api/openai/enums.ts +20 -0
  172. package/src/api/openai/handlers/chat.ts +408 -0
  173. package/src/api/openai/handlers/completions.ts +196 -0
  174. package/src/api/openai/handlers/embeddings.ts +92 -0
  175. package/src/api/openai/handlers/images.ts +3 -0
  176. package/src/api/openai/handlers/models.ts +33 -0
  177. package/src/api/openai/handlers/transcription.ts +2 -0
  178. package/src/api/openai/index.ts +16 -0
  179. package/src/api/parseJSONRequestBody.ts +26 -0
  180. package/src/api/v1/DRAFT.md +16 -0
  181. package/src/api/v1/index.ts +37 -0
  182. package/src/cli.ts +9 -0
  183. package/src/engines/gpt4all/engine.ts +441 -0
  184. package/src/engines/gpt4all/util.ts +31 -0
  185. package/src/engines/index.ts +28 -0
  186. package/src/engines/node-llama-cpp/engine.ts +811 -0
  187. package/src/engines/node-llama-cpp/types.ts +17 -0
  188. package/src/engines/node-llama-cpp/util.ts +126 -0
  189. package/src/engines/node-llama-cpp/validateModelFile.ts +46 -0
  190. package/src/engines/stable-diffusion-cpp/engine.ts +369 -0
  191. package/src/engines/stable-diffusion-cpp/types.ts +54 -0
  192. package/src/engines/stable-diffusion-cpp/util.ts +58 -0
  193. package/src/engines/stable-diffusion-cpp/validateModelFiles.ts +119 -0
  194. package/src/engines/transformers-js/engine.ts +659 -0
  195. package/src/engines/transformers-js/types.ts +25 -0
  196. package/src/engines/transformers-js/util.ts +40 -0
  197. package/src/engines/transformers-js/validateModelFiles.ts +168 -0
  198. package/src/experiments/ChatWithVision.ts +103 -0
  199. package/src/experiments/StableDiffPromptGenerator.ts +2 -0
  200. package/src/experiments/VoiceFunctionCall.ts +71 -0
  201. package/src/http.ts +72 -0
  202. package/src/index.ts +7 -0
  203. package/src/instance.ts +723 -0
  204. package/src/lib/acquireFileLock.ts +38 -0
  205. package/src/lib/calculateContextIdentity.ts +53 -0
  206. package/src/lib/calculateFileChecksum.ts +18 -0
  207. package/src/lib/copyDirectory.ts +29 -0
  208. package/src/lib/decodeAudio.ts +39 -0
  209. package/src/lib/downloadModelFile.ts +70 -0
  210. package/src/lib/flattenMessageTextContent.ts +19 -0
  211. package/src/lib/getCacheDirPath.ts +34 -0
  212. package/src/lib/loadImage.ts +46 -0
  213. package/src/lib/logger.ts +112 -0
  214. package/src/lib/math.ts +31 -0
  215. package/src/lib/resolveModelFileLocation.ts +49 -0
  216. package/src/lib/util.ts +75 -0
  217. package/src/lib/validateModelFile.ts +71 -0
  218. package/src/lib/validateModelOptions.ts +31 -0
  219. package/src/pool.ts +651 -0
  220. package/src/server.ts +270 -0
  221. package/src/standalone.ts +320 -0
  222. package/src/store.ts +278 -0
  223. package/src/types/completions.ts +86 -0
  224. package/src/types/index.ts +488 -0
  225. package/tsconfig.json +29 -0
  226. package/tsconfig.release.json +11 -0
  227. package/vitest.config.ts +18 -0
package/dist/store.js ADDED
@@ -0,0 +1,203 @@
1
+ import { promises as fs, existsSync } from 'node:fs';
2
+ import PQueue from 'p-queue';
3
+ import prettyMilliseconds from 'pretty-ms';
4
+ import prettyBytes from 'pretty-bytes';
5
+ import { LogLevels, createSublogger, } from './lib/logger.js';
6
+ import { formatBytesPerSecond, mergeAbortSignals } from './lib/util.js';
7
+ export class ModelStore {
8
+ prepareQueue;
9
+ models = {};
10
+ engines;
11
+ prepareController;
12
+ modelsCachePath;
13
+ log;
14
+ constructor(options) {
15
+ this.prepareController = new AbortController();
16
+ this.log = createSublogger(options.log);
17
+ this.prepareQueue = new PQueue({
18
+ concurrency: options.prepareConcurrency ?? 2,
19
+ });
20
+ this.modelsCachePath = options.modelsCachePath;
21
+ this.models = Object.fromEntries(Object.entries(options.models).map(([modelId, model]) => [
22
+ modelId,
23
+ {
24
+ ...model,
25
+ status: 'unloaded',
26
+ },
27
+ ]));
28
+ }
29
+ async init(engines) {
30
+ this.engines = engines;
31
+ if (!existsSync(this.modelsCachePath)) {
32
+ await fs.mkdir(this.modelsCachePath, { recursive: true });
33
+ }
34
+ const blockingPromises = [];
35
+ for (const modelId in this.models) {
36
+ const model = this.models[modelId];
37
+ if (model.prepare === 'blocking' || model.minInstances > 0) {
38
+ blockingPromises.push(this.prepareModel(modelId));
39
+ }
40
+ else if (model.prepare === 'async') {
41
+ this.prepareModel(modelId);
42
+ }
43
+ }
44
+ await Promise.all(blockingPromises);
45
+ }
46
+ dispose() {
47
+ this.prepareController.abort();
48
+ }
49
+ onDownloadProgress(modelId, progress) {
50
+ const model = this.models[modelId];
51
+ if (!model.downloads) {
52
+ model.downloads = new Map();
53
+ }
54
+ if (model.downloads.has(progress.file)) {
55
+ const tracker = model.downloads.get(progress.file);
56
+ tracker.pushProgress(progress);
57
+ }
58
+ else {
59
+ const tracker = new DownloadTracker(5000);
60
+ tracker.pushProgress(progress);
61
+ model.downloads.set(progress.file, tracker);
62
+ }
63
+ }
64
+ // makes sure all required files for the model exist and are valid
65
+ // checking model checksums and reading metadata is model + engine specific and can be slow
66
+ async prepareModel(modelId, signal) {
67
+ const model = this.models[modelId];
68
+ if (!this.engines) {
69
+ throw new Error('No engines available - did you call init()?');
70
+ }
71
+ model.status = 'preparing';
72
+ const engine = this.engines[model.engine];
73
+ this.log(LogLevels.info, 'Preparing model', {
74
+ model: modelId,
75
+ task: model.task,
76
+ });
77
+ await this.prepareQueue.add(async () => {
78
+ if (!('prepareModel' in engine)) {
79
+ model.status = 'ready';
80
+ return model;
81
+ }
82
+ const logProgressInterval = setInterval(() => {
83
+ const progress = Array.from(model.downloads?.values() ?? [])
84
+ .map((tracker) => tracker.getStatus())
85
+ .reduce((acc, status) => {
86
+ acc.loadedBytes += status?.loadedBytes || 0;
87
+ acc.totalBytes += status?.totalBytes || 0;
88
+ acc.speed += status?.speed || 0;
89
+ return acc;
90
+ }, { loadedBytes: 0, totalBytes: 0, speed: 0 });
91
+ if (progress.totalBytes) {
92
+ const percent = (progress.loadedBytes / progress.totalBytes) * 100;
93
+ const formattedTotalBytes = prettyBytes(progress.totalBytes, { space: false });
94
+ const formattedLoadedBytes = prettyBytes(progress.loadedBytes, { space: false });
95
+ this.log(LogLevels.info, `Downloading at ${formatBytesPerSecond(progress.speed)} ${percent.toFixed(1)}% - ${formattedLoadedBytes} of ${formattedTotalBytes}`, {
96
+ model: modelId,
97
+ });
98
+ }
99
+ }, 10000);
100
+ try {
101
+ const modelMeta = await engine.prepareModel({ config: model, log: this.log }, (progress) => {
102
+ this.onDownloadProgress(model.id, progress);
103
+ }, mergeAbortSignals([signal, this.prepareController.signal]));
104
+ model.downloads = undefined;
105
+ model.meta = modelMeta;
106
+ model.status = 'ready';
107
+ this.log(LogLevels.info, 'Model ready', {
108
+ model: modelId,
109
+ task: model.task,
110
+ });
111
+ }
112
+ catch (error) {
113
+ this.log(LogLevels.error, 'Error preparing model', {
114
+ model: modelId,
115
+ error: error,
116
+ });
117
+ model.status = 'error';
118
+ }
119
+ finally {
120
+ clearInterval(logProgressInterval);
121
+ }
122
+ return model;
123
+ });
124
+ }
125
+ getStatus() {
126
+ const formatFloat = (num) => parseFloat(num?.toFixed(2) || '0');
127
+ const storeStatusInfo = Object.fromEntries(Object.entries(this.models).map(([modelId, model]) => {
128
+ let downloads = undefined;
129
+ if (model.downloads) {
130
+ downloads = [...model.downloads].reduce((acc, [key, download]) => {
131
+ const status = download.getStatus();
132
+ const latestState = download.progressBuffer[download.progressBuffer.length - 1];
133
+ const totalBytes = latestState?.totalBytes ?? 0;
134
+ const loadedBytes = latestState?.loadedBytes ?? 0;
135
+ const etaSeconds = status?.etaSeconds ?? 0;
136
+ const formattedEta = prettyMilliseconds(etaSeconds * 1000);
137
+ const formattedTotalBytes = prettyBytes(totalBytes);
138
+ const formattedLoadedBytes = prettyBytes(loadedBytes);
139
+ acc.push({
140
+ file: key,
141
+ loadedBytes,
142
+ formattedLoadedBytes,
143
+ totalBytes,
144
+ formattedTotalBytes,
145
+ percent: formatFloat(status?.percent),
146
+ speed: formatFloat(status?.speed),
147
+ etaSeconds: formatFloat(etaSeconds),
148
+ formattedEta,
149
+ });
150
+ return acc;
151
+ }, []);
152
+ }
153
+ return [
154
+ modelId,
155
+ {
156
+ engine: model.engine,
157
+ device: model.device,
158
+ minInstances: model.minInstances,
159
+ maxInstances: model.maxInstances,
160
+ status: model.status,
161
+ downloads,
162
+ },
163
+ ];
164
+ }));
165
+ return storeStatusInfo;
166
+ }
167
+ }
168
+ class DownloadTracker {
169
+ progressBuffer = [];
170
+ timeWindow;
171
+ constructor(timeWindow = 1000) {
172
+ this.timeWindow = timeWindow;
173
+ }
174
+ pushProgress({ loadedBytes, totalBytes }) {
175
+ const timestamp = Date.now();
176
+ this.progressBuffer.push({ loadedBytes, totalBytes, timestamp });
177
+ this.cleanup();
178
+ }
179
+ cleanup() {
180
+ const cutoffTime = Date.now() - this.timeWindow;
181
+ this.progressBuffer = this.progressBuffer.filter((item) => item.timestamp >= cutoffTime);
182
+ }
183
+ getStatus() {
184
+ if (this.progressBuffer.length < 2) {
185
+ return null; // Not enough data to calculate speed and ETA
186
+ }
187
+ const latestState = this.progressBuffer[this.progressBuffer.length - 1];
188
+ const previousState = this.progressBuffer[0]; // oldest state within the time window
189
+ const bytesLoaded = latestState.loadedBytes - previousState.loadedBytes;
190
+ const timeElapsed = latestState.timestamp - previousState.timestamp; // in milliseconds
191
+ const speed = bytesLoaded / (timeElapsed / 1000); // bytes per second
192
+ const remainingBytes = latestState.totalBytes - latestState.loadedBytes;
193
+ const eta = speed > 0 ? remainingBytes / speed : 0;
194
+ return {
195
+ speed,
196
+ etaSeconds: eta,
197
+ percent: latestState.loadedBytes / latestState.totalBytes,
198
+ loadedBytes: latestState.loadedBytes,
199
+ totalBytes: latestState.totalBytes,
200
+ };
201
+ }
202
+ }
203
+ //# sourceMappingURL=store.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"store.js","sourceRoot":"","sources":["../src/store.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,UAAU,EAAE,MAAM,SAAS,CAAA;AACpD,OAAO,MAAM,MAAM,SAAS,CAAA;AAC5B,OAAO,kBAAkB,MAAM,WAAW,CAAA;AAC1C,OAAO,WAAW,MAAM,cAAc,CAAA;AAMtC,OAAO,EAEN,SAAS,EAET,eAAe,GACf,MAAM,wBAAwB,CAAA;AAC/B,OAAO,EAAE,oBAAoB,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAA;AAmB9E,MAAM,OAAO,UAAU;IACtB,YAAY,CAAQ;IACpB,MAAM,GAAgC,EAAE,CAAA;IACxC,OAAO,CAA8B;IAC7B,iBAAiB,CAAiB;IAClC,eAAe,CAAQ;IACvB,GAAG,CAAQ;IAEnB,YAAY,OAA0B;QACrC,IAAI,CAAC,iBAAiB,GAAG,IAAI,eAAe,EAAE,CAAA;QAC9C,IAAI,CAAC,GAAG,GAAG,eAAe,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;QACvC,IAAI,CAAC,YAAY,GAAG,IAAI,MAAM,CAAC;YAC9B,WAAW,EAAE,OAAO,CAAC,kBAAkB,IAAI,CAAC;SAC5C,CAAC,CAAA;QACF,IAAI,CAAC,eAAe,GAAG,OAAO,CAAC,eAAe,CAAA;QAC9C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,WAAW,CAC/B,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC;YACxD,OAAO;YACP;gBACC,GAAG,KAAK;gBACR,MAAM,EAAE,UAAU;aAClB;SACD,CAAC,CACF,CAAA;IACF,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,OAAoC;QAC9C,IAAI,CAAC,OAAO,GAAG,OAAO,CAAA;QACtB,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,eAAe,CAAC,EAAE,CAAC;YACvC,MAAM,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,eAAe,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;QAC1D,CAAC;QAED,MAAM,gBAAgB,GAAG,EAAE,CAAA;QAC3B,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YACnC,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;YAClC,IAAI,KAAK,CAAC,OAAO,KAAK,UAAU,IAAI,KAAK,CAAC,YAAY,GAAG,CAAC,EAAE,CAAC;gBAC5D,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAA;YAClD,CAAC;iBAAM,IAAI,KAAK,CAAC,OAAO,KAAK,OAAO,EAAE,CAAC;gBACtC,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,CAAA;YAC3B,CAAC;QACF,CAAC;QACD,MAAM,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAA;IACpC,CAAC;IAED,OAAO;QACN,IAAI,CAAC,iBAAiB,CAAC,KAAK,EAAE,CAAA;IAC/B,CAAC;IAEO,kBAAkB,CACzB,OAAe,EACf,QAAmE;QAEnE,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;QAClC,IAAI,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;YACtB,KAAK,CAAC,SAAS,GAAG,IAAI,GAAG,EAAE,CAAA;QAC5B,CAAC;QAED,IAAI,KAAK,CAAC,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YACxC,MAAM,OAAO,GAAG,KAAK,CAAC,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAE,CAAA;YACnD,OAAO,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAA;QAC/B,CAAC;aAAM,CAAC;YACP,MAAM,OAAO,GAAG,IAAI,eAAe,CAAC,IAAI,CAAC,CAAA;YACzC,OAAO,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAA;YAC9B,KAAK,CAAC,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAA;QAC5C,CAAC;IACF,CAAC;IAED,kEAAkE;IAClE,2FAA2F;IAC3F,KAAK,CAAC,YAAY,CAAC,OAAe,EAAE,MAAoB;QACvD,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;QAClC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;YACnB,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAA;QAC/D,CAAC;QACD,KAAK,CAAC,MAAM,GAAG,WAAW,CAAA;QAC1B,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAA;QACzC,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,EAAE,iBAAiB,EAAE;YAC3C,KAAK,EAAE,OAAO;YACd,IAAI,EAAE,KAAK,CAAC,IAAI;SAChB,CAAC,CAAA;QAEF,MAAM,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,IAAI,EAAE;YACtC,IAAI,CAAC,CAAC,cAAc,IAAI,MAAM,CAAC,EAAE,CAAC;gBACjC,KAAK,CAAC,MAAM,GAAG,OAAO,CAAA;gBACtB,OAAO,KAAK,CAAA;YACb,CAAC;YACD,MAAM,mBAAmB,GAAG,WAAW,CAAC,GAAG,EAAE;gBAC5C,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC;qBAC1D,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,SAAS,EAAE,CAAC;qBACrC,MAAM,CACN,CAAC,GAAG,EAAE,MAAM,EAAE,EAAE;oBACf,GAAG,CAAC,WAAW,IAAI,MAAM,EAAE,WAAW,IAAI,CAAC,CAAA;oBAC3C,GAAG,CAAC,UAAU,IAAI,MAAM,EAAE,UAAU,IAAI,CAAC,CAAA;oBACzC,GAAG,CAAC,KAAK,IAAI,MAAM,EAAE,KAAK,IAAI,CAAC,CAAA;oBAC/B,OAAO,GAAG,CAAA;gBACX,CAAC,EACD,EAAE,WAAW,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,CAC3C,CAAA;gBACF,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC;oBACzB,MAAM,OAAO,GAAG,CAAC,QAAQ,CAAC,WAAW,GAAG,QAAQ,CAAC,UAAU,CAAC,GAAG,GAAG,CAAA;oBAClE,MAAM,mBAAmB,GAAG,WAAW,CAAC,QAAQ,CAAC,UAAU,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAA;oBAC9E,MAAM,oBAAoB,GAAG,WAAW,CAAC,QAAQ,CAAC,WAAW,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAA;oBAChF,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,EAAE,kBAAkB,oBAAoB,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,oBAAoB,OAAO,mBAAmB,EAAE,EAAE;wBAC7J,KAAK,EAAE,OAAO;qBACd,CAAC,CAAA;gBACH,CAAC;YACF,CAAC,EAAE,KAAK,CAAC,CAAA;YACT,IAAI,CAAC;gBACJ,MAAM,SAAS,GAAG,MAAM,MAAM,CAAC,YAAY,CAC1C,EAAE,MAAM,EAAE,KAAK,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,EAChC,CAAC,QAAQ,EAAE,EAAE;oBACZ,IAAI,CAAC,kBAAkB,CAAC,KAAK,CAAC,EAAE,EAAE,QAAQ,CAAC,CAAA;gBAC5C,CAAC,EACD,iBAAiB,CAAC,CAAC,MAAM,EAAE,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC,CAC1D,CAAA;gBACD,KAAK,CAAC,SAAS,GAAG,SAAS,CAAA;gBAC3B,KAAK,CAAC,IAAI,GAAG,SAAS,CAAA;gBACtB,KAAK,CAAC,MAAM,GAAG,OAAO,CAAA;gBACtB,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,EAAE,aAAa,EAAE;oBACvC,KAAK,EAAE,OAAO;oBACd,IAAI,EAAE,KAAK,CAAC,IAAI;iBAChB,CAAC,CAAA;YACH,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBAChB,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,KAAK,EAAE,uBAAuB,EAAE;oBAClD,KAAK,EAAE,OAAO;oBACd,KAAK,EAAE,KAAK;iBACZ,CAAC,CAAA;gBACF,KAAK,CAAC,MAAM,GAAG,OAAO,CAAA;YACvB,CAAC;oBAAS,CAAC;gBACV,aAAa,CAAC,mBAAmB,CAAC,CAAA;YACnC,CAAC;YACD,OAAO,KAAK,CAAA;QACb,CAAC,CAAC,CAAA;IACH,CAAC;IAED,SAAS;QACR,MAAM,WAAW,GAAG,CAAC,GAAY,EAAE,EAAE,CAAC,UAAU,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,CAAA;QACxE,MAAM,eAAe,GAAG,MAAM,CAAC,WAAW,CACzC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,EAAE;YACpD,IAAI,SAAS,GAAQ,SAAS,CAAA;YAC9B,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;gBACrB,SAAS,GAAG,CAAC,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC,MAAM,CACtC,CAAC,GAAG,EAAE,CAAC,GAAG,EAAE,QAAQ,CAAC,EAAE,EAAE;oBACxB,MAAM,MAAM,GAAG,QAAQ,CAAC,SAAS,EAAE,CAAA;oBACnC,MAAM,WAAW,GAChB,QAAQ,CAAC,cAAc,CAAC,QAAQ,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;oBAC5D,MAAM,UAAU,GAAG,WAAW,EAAE,UAAU,IAAI,CAAC,CAAA;oBAC/C,MAAM,WAAW,GAAG,WAAW,EAAE,WAAW,IAAI,CAAC,CAAA;oBACjD,MAAM,UAAU,GAAG,MAAM,EAAE,UAAU,IAAI,CAAC,CAAA;oBAC1C,MAAM,YAAY,GAAG,kBAAkB,CAAC,UAAU,GAAG,IAAI,CAAC,CAAA;oBAC1D,MAAM,mBAAmB,GAAG,WAAW,CAAC,UAAU,CAAC,CAAA;oBACnD,MAAM,oBAAoB,GAAG,WAAW,CAAC,WAAW,CAAC,CAAA;oBACrD,GAAG,CAAC,IAAI,CAAC;wBACR,IAAI,EAAE,GAAG;wBACT,WAAW;wBACX,oBAAoB;wBACpB,UAAU;wBACV,mBAAmB;wBACnB,OAAO,EAAE,WAAW,CAAC,MAAM,EAAE,OAAO,CAAC;wBACrC,KAAK,EAAE,WAAW,CAAC,MAAM,EAAE,KAAK,CAAC;wBACjC,UAAU,EAAE,WAAW,CAAC,UAAU,CAAC;wBACnC,YAAY;qBACZ,CAAC,CAAA;oBACF,OAAO,GAAG,CAAA;gBACX,CAAC,EACD,EAAE,CACF,CAAA;YACF,CAAC;YACD,OAAO;gBACN,OAAO;gBACP;oBACC,MAAM,EAAE,KAAK,CAAC,MAAM;oBACpB,MAAM,EAAE,KAAK,CAAC,MAAM;oBACpB,YAAY,EAAE,KAAK,CAAC,YAAY;oBAChC,YAAY,EAAE,KAAK,CAAC,YAAY;oBAChC,MAAM,EAAE,KAAK,CAAC,MAAM;oBACpB,SAAS;iBACT;aACD,CAAA;QACF,CAAC,CAAC,CACF,CAAA;QACD,OAAO,eAAe,CAAA;IACvB,CAAC;CACD;AAgBD,MAAM,eAAe;IACpB,cAAc,GAAoB,EAAE,CAAA;IAC5B,UAAU,CAAQ;IAE1B,YAAY,aAAqB,IAAI;QACpC,IAAI,CAAC,UAAU,GAAG,UAAU,CAAA;IAC7B,CAAC;IAED,YAAY,CAAC,EAAE,WAAW,EAAE,UAAU,EAAwB;QAC7D,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAA;QAC5B,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,EAAE,WAAW,EAAE,UAAU,EAAE,SAAS,EAAE,CAAC,CAAA;QAChE,IAAI,CAAC,OAAO,EAAE,CAAA;IACf,CAAC;IAEO,OAAO;QACd,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,UAAU,CAAA;QAC/C,IAAI,CAAC,cAAc,GAAG,IAAI,CAAC,cAAc,CAAC,MAAM,CAC/C,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,IAAI,UAAU,CACtC,CAAA;IACF,CAAC;IAED,SAAS;QACR,IAAI,IAAI,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpC,OAAO,IAAI,CAAA,CAAC,6CAA6C;QAC1D,CAAC;QAED,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;QACvE,MAAM,aAAa,GAAG,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,CAAA,CAAC,sCAAsC;QAEnF,MAAM,WAAW,GAAG,WAAW,CAAC,WAAW,GAAG,aAAa,CAAC,WAAW,CAAA;QACvE,MAAM,WAAW,GAAG,WAAW,CAAC,SAAS,GAAG,aAAa,CAAC,SAAS,CAAA,CAAC,kBAAkB;QAEtF,MAAM,KAAK,GAAG,WAAW,GAAG,CAAC,WAAW,GAAG,IAAI,CAAC,CAAA,CAAC,mBAAmB;QACpE,MAAM,cAAc,GAAG,WAAW,CAAC,UAAU,GAAG,WAAW,CAAC,WAAW,CAAA;QACvE,MAAM,GAAG,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,cAAc,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAA;QAElD,OAAO;YACN,KAAK;YACL,UAAU,EAAE,GAAG;YACf,OAAO,EAAE,WAAW,CAAC,WAAW,GAAG,WAAW,CAAC,UAAU;YACzD,WAAW,EAAE,WAAW,CAAC,WAAW;YACpC,UAAU,EAAE,WAAW,CAAC,UAAU;SAClC,CAAA;IACF,CAAC;CACD"}
@@ -0,0 +1,57 @@
1
+ import type { SomeJSONSchema } from 'ajv/dist/types/json-schema';
2
+ import { Image } from './index.js';
3
+ export type CompletionFinishReason = 'maxTokens' | 'toolCalls' | 'eogToken' | 'stopTrigger' | 'abort' | 'cancel' | 'timeout';
4
+ export interface AssistantToolCall {
5
+ id: string;
6
+ name: string;
7
+ parameters?: Record<string, any>;
8
+ }
9
+ export type ChatMessage = UserMessage | SystemMessage | AssistantMessage | ToolCallResultMessage;
10
+ export interface MessageTextContentPart {
11
+ type: 'text';
12
+ text: string;
13
+ }
14
+ export interface MessageImageContentPart {
15
+ type: 'image';
16
+ image: Image;
17
+ }
18
+ export type MessageContentPart = MessageTextContentPart | MessageImageContentPart;
19
+ export interface UserMessage {
20
+ role: 'user';
21
+ content: string | MessageContentPart[];
22
+ }
23
+ export interface SystemMessage {
24
+ role: 'system';
25
+ content: string | MessageContentPart[];
26
+ }
27
+ export interface AssistantMessage {
28
+ role: 'assistant';
29
+ content: string;
30
+ toolCalls?: AssistantToolCall[];
31
+ }
32
+ export interface ToolCallResultMessage {
33
+ role: 'tool';
34
+ content: string | MessageContentPart[];
35
+ callId: string;
36
+ }
37
+ export type ToolDefinitionParams<TParams> = SomeJSONSchema;
38
+ export interface ToolDefinition<TParams extends Record<string, any> = any> {
39
+ description?: string;
40
+ parameters?: ToolDefinitionParams<TParams>;
41
+ handler?: (params: TParams) => Promise<string>;
42
+ }
43
+ export interface TextCompletionParams {
44
+ temperature?: number;
45
+ maxTokens?: number;
46
+ seed?: number;
47
+ stop?: string[];
48
+ repeatPenalty?: number;
49
+ repeatPenaltyNum?: number;
50
+ frequencyPenalty?: number;
51
+ presencePenalty?: number;
52
+ grammar?: string;
53
+ topP?: number;
54
+ minP?: number;
55
+ topK?: number;
56
+ tokenBias?: Record<string, number>;
57
+ }
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=completions.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"completions.js","sourceRoot":"","sources":["../../src/types/completions.ts"],"names":[],"mappings":""}
@@ -0,0 +1,326 @@
1
+ import type { SomeJSONSchema } from 'ajv/dist/types/json-schema';
2
+ import type { Sharp } from 'sharp';
3
+ import type { BuiltInEngineName } from '../engines/index.js';
4
+ import type { Logger } from '../lib/logger.js';
5
+ import type { ModelPool } from '../pool.js';
6
+ import type { ModelStore } from '../store.js';
7
+ import { AssistantMessage, ChatMessage, CompletionFinishReason, TextCompletionParams, ToolDefinition } from '../types/completions.js';
8
+ import type { ContextShiftStrategy } from '../engines/node-llama-cpp/types.js';
9
+ import type { StableDiffusionWeightType, StableDiffusionSamplingMethod, StableDiffusionSchedule } from '../engines/stable-diffusion-cpp/types.js';
10
+ import type { TransformersJsModelClass, TransformersJsTokenizerClass, TransformersJsProcessorClass, TransformersJsDataType } from '../engines/transformers-js/types.js';
11
+ export * from '../types/completions.js';
12
+ export type ModelTaskType = 'text-completion' | 'embedding' | 'image-to-text' | 'image-to-image' | 'text-to-image' | 'speech-to-text';
13
+ export interface ModelOptionsBase {
14
+ engine: BuiltInEngineName | (string & {});
15
+ task: ModelTaskType | (string & {});
16
+ prepare?: 'blocking' | 'async' | 'on-demand';
17
+ minInstances?: number;
18
+ maxInstances?: number;
19
+ location?: string;
20
+ }
21
+ export interface BuiltInModelOptionsBase extends ModelOptionsBase {
22
+ engine: BuiltInEngineName;
23
+ task: ModelTaskType;
24
+ url?: string;
25
+ location?: string;
26
+ }
27
+ export interface ModelConfigBase extends ModelOptionsBase {
28
+ id: string;
29
+ minInstances: number;
30
+ maxInstances: number;
31
+ modelsCachePath: string;
32
+ }
33
+ export interface ModelConfig extends ModelConfigBase {
34
+ url?: string;
35
+ location?: string;
36
+ task: ModelTaskType | (string & {});
37
+ engine: BuiltInEngineName | (string & {});
38
+ ttl?: number;
39
+ prefix?: string;
40
+ initialMessages?: ChatMessage[];
41
+ device?: {
42
+ gpu?: boolean | 'auto' | (string & {});
43
+ };
44
+ }
45
+ export interface CompletionChunk {
46
+ tokens: number[];
47
+ text: string;
48
+ }
49
+ export interface ProcessingOptions {
50
+ timeout?: number;
51
+ signal?: AbortSignal;
52
+ }
53
+ export interface Image {
54
+ handle: Sharp;
55
+ width: number;
56
+ height: number;
57
+ channels: 1 | 2 | 3 | 4;
58
+ }
59
+ export interface CompletionProcessingOptions extends ProcessingOptions {
60
+ onChunk?: (chunk: CompletionChunk) => void;
61
+ }
62
+ export interface SpeechToTextProcessingOptions extends ProcessingOptions {
63
+ onChunk?: (chunk: {
64
+ text: string;
65
+ }) => void;
66
+ }
67
+ export interface EngineContext<TModelConfig = ModelConfig, TModelMeta = unknown> {
68
+ config: TModelConfig;
69
+ meta?: TModelMeta;
70
+ log: Logger;
71
+ }
72
+ export interface TextCompletionRequestBase extends TextCompletionParams {
73
+ model: string;
74
+ stream?: boolean;
75
+ }
76
+ export interface TextCompletionRequest extends TextCompletionRequestBase {
77
+ prompt?: string;
78
+ }
79
+ export interface ChatCompletionRequest extends TextCompletionRequestBase {
80
+ messages: ChatMessage[];
81
+ grammar?: string;
82
+ tools?: Record<string, ToolDefinition>;
83
+ }
84
+ export interface TextEmbeddingInput {
85
+ type: 'text';
86
+ content: string;
87
+ }
88
+ export interface ImageEmbeddingInput {
89
+ type: 'image';
90
+ content: Image;
91
+ }
92
+ export type EmbeddingInput = TextEmbeddingInput | ImageEmbeddingInput | string;
93
+ export interface EmbeddingRequest {
94
+ model: string;
95
+ input: EmbeddingInput | EmbeddingInput[];
96
+ dimensions?: number;
97
+ pooling?: 'cls' | 'mean';
98
+ }
99
+ export interface ImageToTextRequest {
100
+ model: string;
101
+ image: Image;
102
+ prompt?: string;
103
+ maxTokens?: number;
104
+ }
105
+ export interface StableDiffusionRequest {
106
+ negativePrompt?: string;
107
+ guidance?: number;
108
+ styleRatio?: number;
109
+ strength?: number;
110
+ sampleSteps?: number;
111
+ batchCount?: number;
112
+ samplingMethod?: StableDiffusionSamplingMethod;
113
+ cfgScale?: number;
114
+ controlStrength?: number;
115
+ }
116
+ export interface TextToImageRequest extends StableDiffusionRequest {
117
+ model: string;
118
+ prompt: string;
119
+ width?: number;
120
+ height?: number;
121
+ seed?: number;
122
+ }
123
+ export interface ImageToImageRequest extends StableDiffusionRequest {
124
+ model: string;
125
+ image: Image;
126
+ prompt: string;
127
+ width?: number;
128
+ height?: number;
129
+ seed?: number;
130
+ }
131
+ export interface SpeechToTextRequest {
132
+ model: string;
133
+ url?: string;
134
+ file?: string;
135
+ language?: string;
136
+ prompt?: string;
137
+ maxTokens?: number;
138
+ }
139
+ export interface ModelRequestMeta {
140
+ sequence: number;
141
+ abortController: AbortController;
142
+ }
143
+ export type IncomingRequest = TextCompletionRequest | ChatCompletionRequest | EmbeddingRequest | ImageToTextRequest | SpeechToTextRequest;
144
+ export type ModelInstanceRequest = ModelRequestMeta & IncomingRequest;
145
+ export interface EngineTextCompletionArgs<TModelConfig = unknown, TModelMeta = unknown> extends EngineContext<TModelConfig, TModelMeta> {
146
+ onChunk?: (chunk: CompletionChunk) => void;
147
+ resetContext?: boolean;
148
+ request: TextCompletionRequest;
149
+ }
150
+ export interface EngineChatCompletionArgs<TModelConfig = unknown, TModelMeta = unknown> extends EngineContext<TModelConfig, TModelMeta> {
151
+ onChunk?: (chunk: CompletionChunk) => void;
152
+ resetContext?: boolean;
153
+ request: ChatCompletionRequest;
154
+ }
155
+ export interface EngineEmbeddingArgs<TModelConfig = unknown, TModelMeta = unknown> extends EngineContext<TModelConfig, TModelMeta> {
156
+ request: EmbeddingRequest;
157
+ }
158
+ export interface EngineImageToTextArgs<TModelConfig = unknown, TModelMeta = unknown> extends EngineContext<TModelConfig, TModelMeta> {
159
+ request: ImageToTextRequest;
160
+ }
161
+ export interface EngineTextToImageArgs<TModelConfig = unknown, TModelMeta = unknown> extends EngineContext<TModelConfig, TModelMeta> {
162
+ request: TextToImageRequest;
163
+ }
164
+ export interface EngineImageToImageArgs<TModelConfig = unknown, TModelMeta = unknown> extends EngineContext<TModelConfig, TModelMeta> {
165
+ request: ImageToImageRequest;
166
+ }
167
+ export interface EngineSpeechToTextArgs<TModelConfig = unknown, TModelMeta = unknown> extends EngineContext<TModelConfig, TModelMeta> {
168
+ request: SpeechToTextRequest;
169
+ onChunk?: (chunk: {
170
+ text: string;
171
+ }) => void;
172
+ }
173
+ export interface FileDownloadProgress {
174
+ file: string;
175
+ loadedBytes: number;
176
+ totalBytes: number;
177
+ }
178
+ export interface EngineStartContext {
179
+ pool: ModelPool;
180
+ store: ModelStore;
181
+ }
182
+ export interface ModelEngine<TInstance = unknown, TModelConfig extends ModelConfig = ModelConfig, TModelMeta = unknown> {
183
+ autoGpu?: boolean;
184
+ start?: (ctx: EngineStartContext) => Promise<void>;
185
+ prepareModel: (ctx: EngineContext<TModelConfig, TModelMeta>, onProgress?: (progress: FileDownloadProgress) => void, signal?: AbortSignal) => Promise<TModelMeta>;
186
+ createInstance: (ctx: EngineContext<TModelConfig, TModelMeta>, signal?: AbortSignal) => Promise<TInstance>;
187
+ disposeInstance: (instance: TInstance) => Promise<void>;
188
+ processChatCompletionTask?: (args: EngineChatCompletionArgs<TModelConfig, TModelMeta>, instance: TInstance, signal?: AbortSignal) => Promise<EngineChatCompletionResult>;
189
+ processTextCompletionTask?: (args: EngineTextCompletionArgs<TModelConfig, TModelMeta>, instance: TInstance, signal?: AbortSignal) => Promise<EngineTextCompletionResult>;
190
+ processEmbeddingTask?: (args: EngineEmbeddingArgs<TModelConfig, TModelMeta>, instance: TInstance, signal?: AbortSignal) => Promise<EngineEmbeddingResult>;
191
+ processImageToTextTask?: (args: EngineImageToTextArgs<TModelConfig, TModelMeta>, instance: TInstance, signal?: AbortSignal) => Promise<EngineImageToTextResult>;
192
+ processSpeechToTextTask?: (args: EngineSpeechToTextArgs<TModelConfig, TModelMeta>, instance: TInstance, signal?: AbortSignal) => Promise<EngineSpeechToTextResult>;
193
+ processTextToImageTask?: (args: EngineTextToImageArgs<TModelConfig, TModelMeta>, instance: TInstance, signal?: AbortSignal) => Promise<EngineTextToImageResult>;
194
+ processImageToImageTask?: (args: EngineImageToImageArgs<TModelConfig, TModelMeta>, instance: TInstance, signal?: AbortSignal) => Promise<EngineImageToImageResult>;
195
+ }
196
+ interface EmbeddingModelOptions {
197
+ task: 'embedding';
198
+ }
199
+ export type TextCompletionGrammar = string | SomeJSONSchema;
200
+ interface TextCompletionModelOptions {
201
+ task: 'text-completion';
202
+ contextSize?: number;
203
+ grammars?: Record<string, TextCompletionGrammar>;
204
+ completionDefaults?: TextCompletionParams;
205
+ initialMessages?: ChatMessage[];
206
+ prefix?: string;
207
+ batchSize?: number;
208
+ }
209
+ interface LlamaCppModelOptionsBase extends BuiltInModelOptionsBase {
210
+ engine: 'node-llama-cpp';
211
+ task: 'text-completion' | 'embedding';
212
+ sha256?: string;
213
+ batchSize?: number;
214
+ contextShiftStrategy?: ContextShiftStrategy;
215
+ tools?: {
216
+ definitions: Record<string, ToolDefinition>;
217
+ includeParamsDocumentation?: boolean;
218
+ parallelism?: number;
219
+ };
220
+ device?: {
221
+ gpu?: boolean | 'auto' | (string & {});
222
+ gpuLayers?: number;
223
+ cpuThreads?: number;
224
+ memLock?: boolean;
225
+ };
226
+ }
227
+ interface LlamaCppEmbeddingModelOptions extends LlamaCppModelOptionsBase, EmbeddingModelOptions {
228
+ task: 'embedding';
229
+ }
230
+ export interface LlamaCppTextCompletionModelOptions extends LlamaCppModelOptionsBase, TextCompletionModelOptions {
231
+ task: 'text-completion';
232
+ }
233
+ interface GPT4AllModelOptions extends BuiltInModelOptionsBase {
234
+ engine: 'gpt4all';
235
+ task: 'text-completion' | 'embedding';
236
+ md5?: string;
237
+ device?: {
238
+ gpu?: boolean | 'auto' | (string & {});
239
+ gpuLayers?: number;
240
+ cpuThreads?: number;
241
+ };
242
+ }
243
+ type GPT4AllTextCompletionModelOptions = TextCompletionModelOptions & GPT4AllModelOptions;
244
+ type GPT4AllEmbeddingModelOptions = GPT4AllModelOptions & EmbeddingModelOptions;
245
+ export interface TransformersJsModel {
246
+ processor?: {
247
+ url?: string;
248
+ file?: string;
249
+ };
250
+ processorClass?: TransformersJsProcessorClass;
251
+ tokenizerClass?: TransformersJsTokenizerClass;
252
+ modelClass?: TransformersJsModelClass;
253
+ dtype?: Record<string, TransformersJsDataType> | TransformersJsDataType;
254
+ }
255
+ interface TransformersJsModelOptions extends BuiltInModelOptionsBase {
256
+ engine: 'transformers-js';
257
+ task: 'image-to-text' | 'speech-to-text' | 'text-completion' | 'embedding';
258
+ textModel?: TransformersJsModel;
259
+ visionModel?: TransformersJsModel;
260
+ speechModel?: TransformersJsModel;
261
+ device?: {
262
+ gpu?: boolean | 'auto' | (string & {});
263
+ };
264
+ }
265
+ export interface ModelFileSource {
266
+ url?: string;
267
+ file?: string;
268
+ sha256?: string;
269
+ }
270
+ interface StableDiffusionModelOptions extends BuiltInModelOptionsBase {
271
+ engine: 'stable-diffusion-cpp';
272
+ task: 'image-to-text' | 'text-to-image' | 'image-to-image';
273
+ sha256?: string;
274
+ url?: string;
275
+ diffusionModel?: boolean;
276
+ vae?: ModelFileSource;
277
+ clipL?: ModelFileSource;
278
+ clipG?: ModelFileSource;
279
+ t5xxl?: ModelFileSource;
280
+ taesd?: ModelFileSource;
281
+ controlNet?: ModelFileSource;
282
+ samplingMethod?: StableDiffusionSamplingMethod;
283
+ weightType?: StableDiffusionWeightType;
284
+ schedule?: StableDiffusionSchedule;
285
+ loras?: ModelFileSource[];
286
+ }
287
+ export interface CustomEngineModelOptions extends ModelOptionsBase {
288
+ }
289
+ export type BuiltInModelOptions = LlamaCppTextCompletionModelOptions | LlamaCppEmbeddingModelOptions | GPT4AllTextCompletionModelOptions | GPT4AllEmbeddingModelOptions | TransformersJsModelOptions | StableDiffusionModelOptions;
290
+ export type ModelOptions = BuiltInModelOptions | CustomEngineModelOptions;
291
+ export interface EngineEmbeddingResult {
292
+ embeddings: Float32Array[];
293
+ inputTokens: number;
294
+ }
295
+ export interface ChatCompletionResult extends EngineChatCompletionResult {
296
+ id: string;
297
+ model: string;
298
+ }
299
+ export interface EngineChatCompletionResult {
300
+ message: AssistantMessage;
301
+ finishReason: CompletionFinishReason;
302
+ promptTokens: number;
303
+ completionTokens: number;
304
+ contextTokens: number;
305
+ }
306
+ export interface EngineTextCompletionResult {
307
+ text: string;
308
+ finishReason?: CompletionFinishReason;
309
+ promptTokens: number;
310
+ completionTokens: number;
311
+ contextTokens: number;
312
+ }
313
+ export interface EngineImageToTextResult {
314
+ text: string;
315
+ }
316
+ export interface EngineTextToImageResult {
317
+ images: Image[];
318
+ seed: number;
319
+ }
320
+ export interface EngineImageToImageResult {
321
+ images: Image[];
322
+ seed: number;
323
+ }
324
+ export interface EngineSpeechToTextResult {
325
+ text: string;
326
+ }
@@ -0,0 +1,2 @@
1
+ export * from '../types/completions.js';
2
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAyBA,cAAc,+BAA+B,CAAA"}
@@ -0,0 +1,28 @@
1
+
2
+ ### Engines
3
+
4
+ Currently the built-in inference engines are `node-llama-cpp`, `gpt4all` and `transformers-js` (highly experimental). Install the corresponding peer dependency before using an engine.
5
+
6
+ #### node-llama-cpp
7
+
8
+ Can be used for `text-completion` and `embedding` tasks. See the [node-llama-cpp docs](https://node-llama-cpp.withcat.ai/) for more information.
9
+
10
+ Find available GGUF models on [huggingface.co](https://huggingface.co/models?library=gguf).
11
+
12
+ #### gpt4all
13
+
14
+ Can be used for `text-completion` and `embedding` tasks. You can find parameter docs [here](https://github.com/nomic-ai/gpt4all/blob/c73f0e5c8c25ede56e3eeb28ff9dd37f09212994/gpt4all-bindings/typescript/src/gpt4all.d.ts#L615).
15
+
16
+ You can find available models [here](https://github.com/nomic-ai/gpt4all/blob/c73f0e5c8c25ede56e3eeb28ff9dd37f09212994/gpt4all-chat/metadata/models3.json)
17
+
18
+ #### transformers-js
19
+
20
+ Currently supporting `speech-to-text` and `image-to-text` tasks. See [tests](./tests/engines/transformers.test.ts).
21
+
22
+ #### node-stable-diffusion-cpp
23
+
24
+ WIP. See [tests](./tests/engines/stable-diffusion.test.ts).
25
+
26
+ #### Custom Engines
27
+
28
+ You can also write your own engine implementation. See [./src/engines](./src/engines) for how the built-in engines are implemented and [here](./tests/engines/experiments.test.ts) for examples of how to utilize custom engines to combine models and add multimodality to your chat completion endpoint. (Or to any other consumer of the ModelServer class.) Multiple ModelServers are allowed and can also be nested to create more complex pipelines.