@openspecui/server 3.11.3 → 3.11.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  import { join, posix } from "node:path";
2
+ import { runControlledTranslationTask } from "@openspecui/core/translator";
2
3
 
3
4
  //#region ../local-llama-translator/src/factory.ts
4
5
  const DEFAULT_SYSTEM_PROMPT = "You are a translation engine. Return only the translated text, preserve Markdown structure, inline code, URLs, and file paths.";
@@ -9,23 +10,15 @@ var LocalLlamaTranslatorFactory = class {
9
10
  async prepare(options) {
10
11
  const model = options.model || this.options.defaultModel;
11
12
  if (!model) throw new Error("A GGUF model id or runtime model path is required.");
12
- const module = await (this.options.loadModule ?? loadLlamaRuntimeModule)();
13
- const resolvedConfig = readRuntimeConfig(options.runtimeConfig);
14
- const runtimeModel = await loadRuntimeModel({
15
- module,
13
+ await probeLocalLlamaRuntimeModel({
16
14
  model,
17
15
  cacheDir: this.options.cacheDir,
18
- runtimeConfig: resolvedConfig,
19
- defaultGpuLayers: this.options.gpuLayers,
16
+ runtimeConfig: options.runtimeConfig,
17
+ loadModule: this.options.loadModule,
18
+ contextSize: this.options.contextSize,
19
+ gpuLayers: this.options.gpuLayers,
20
20
  monitor: options.monitor
21
21
  });
22
- const context = await runtimeModel.createContext({ contextSize: resolvedConfig.contextSize ?? this.options.contextSize });
23
- options.monitor?.setStatus({
24
- message: `Llama model ${model} is ready.`,
25
- progress: 1
26
- });
27
- await disposeRuntimeNode(context);
28
- await disposeRuntimeNode(runtimeModel);
29
22
  }
30
23
  async create(options) {
31
24
  const model = options.model || this.options.defaultModel;
@@ -56,6 +49,31 @@ var LocalLlamaTranslatorFactory = class {
56
49
  function createLocalLlamaTranslatorFactory(options = {}) {
57
50
  return new LocalLlamaTranslatorFactory(options);
58
51
  }
52
+ async function probeLocalLlamaRuntimeModel(input) {
53
+ const module = await (input.loadModule ?? loadLlamaRuntimeModule)();
54
+ const resolvedConfig = readRuntimeConfig(input.runtimeConfig);
55
+ const runtimeModel = await loadRuntimeModel({
56
+ module,
57
+ model: input.model,
58
+ cacheDir: input.cacheDir,
59
+ runtimeConfig: resolvedConfig,
60
+ defaultGpuLayers: input.gpuLayers,
61
+ monitor: input.monitor
62
+ });
63
+ try {
64
+ const context = await runtimeModel.createContext({ contextSize: resolvedConfig.contextSize ?? input.contextSize });
65
+ try {
66
+ input.monitor?.setStatus({
67
+ message: `Llama model ${input.model} is ready.`,
68
+ progress: 1
69
+ });
70
+ } finally {
71
+ await disposeRuntimeNode(context);
72
+ }
73
+ } finally {
74
+ await disposeRuntimeNode(runtimeModel);
75
+ }
76
+ }
59
77
  var LocalLlamaTranslator = class {
60
78
  constructor(module, model, options) {
61
79
  this.module = module;
@@ -64,32 +82,46 @@ var LocalLlamaTranslator = class {
64
82
  }
65
83
  async *batchTranslate(inputs, options) {
66
84
  for (const [index, input] of inputs.entries()) {
67
- throwIfAborted(options?.signal);
68
- const context = await this.model.createContext({ contextSize: this.options.runtimeConfig.contextSize ?? this.options.factoryOptions.contextSize });
69
- try {
70
- const session = new this.module.LlamaChatSession({
71
- contextSequence: context.getSequence(),
72
- systemPrompt: this.options.runtimeConfig.systemPrompt ?? this.options.factoryOptions.systemPrompt ?? DEFAULT_SYSTEM_PROMPT
85
+ const controlled = await runControlledTranslationTask(async (signal) => {
86
+ throwIfAborted(signal);
87
+ const context = await this.model.createContext({
88
+ contextSize: this.options.runtimeConfig.contextSize ?? this.options.factoryOptions.contextSize,
89
+ batchSize: this.options.runtimeConfig.batchSize ?? this.options.factoryOptions.batchSize,
90
+ flashAttention: this.options.runtimeConfig.flashAttention ?? this.options.factoryOptions.flashAttention
73
91
  });
74
92
  try {
75
- const output = await session.prompt(buildTranslationPrompt({
76
- sourceLanguage: this.options.sourceLanguage,
77
- targetLanguage: this.options.targetLanguage,
78
- text: input,
79
- instructions: options?.instructions,
80
- context: options?.context
81
- }));
82
- throwIfAborted(options?.signal);
83
- yield {
84
- index,
85
- output: output.trim()
86
- };
93
+ const session = new this.module.LlamaChatSession({
94
+ contextSequence: context.getSequence(),
95
+ systemPrompt: this.options.runtimeConfig.systemPrompt ?? this.options.factoryOptions.systemPrompt ?? DEFAULT_SYSTEM_PROMPT
96
+ });
97
+ try {
98
+ const output = await session.prompt(buildTranslationPrompt({
99
+ sourceLanguage: this.options.sourceLanguage,
100
+ targetLanguage: this.options.targetLanguage,
101
+ text: input,
102
+ instructions: options?.instructions,
103
+ context: options?.context
104
+ }));
105
+ throwIfAborted(signal);
106
+ return output.trim();
107
+ } finally {
108
+ await disposeRuntimeNode(session);
109
+ }
87
110
  } finally {
88
- await disposeRuntimeNode(session);
111
+ await disposeRuntimeNode(context);
89
112
  }
90
- } finally {
91
- await disposeRuntimeNode(context);
113
+ }, options);
114
+ if (controlled.ok) {
115
+ yield {
116
+ index,
117
+ output: controlled.value
118
+ };
119
+ continue;
92
120
  }
121
+ yield {
122
+ index,
123
+ error: controlled.error
124
+ };
93
125
  }
94
126
  }
95
127
  destroy() {
@@ -104,7 +136,10 @@ async function loadRuntimeModel(input) {
104
136
  cacheDir: input.cacheDir,
105
137
  runtimeConfig: input.runtimeConfig
106
138
  }),
107
- gpuLayers: input.runtimeConfig.gpuLayers ?? input.defaultGpuLayers
139
+ gpuLayers: input.runtimeConfig.gpuLayers ?? input.defaultGpuLayers,
140
+ useMmap: input.runtimeConfig.useMmap,
141
+ useMlock: input.runtimeConfig.useMlock,
142
+ defaultContextFlashAttention: input.runtimeConfig.flashAttention
108
143
  });
109
144
  }
110
145
  function buildTranslationPrompt(input) {
@@ -126,8 +161,12 @@ function readRuntimeConfig(runtimeConfig) {
126
161
  return {
127
162
  modelPath: readString(runtimeConfig, "modelPath"),
128
163
  contextSize: readNumber(runtimeConfig, "contextSize"),
129
- gpuLayers: readNumber(runtimeConfig, "gpuLayers"),
130
- systemPrompt: readString(runtimeConfig, "systemPrompt")
164
+ gpuLayers: readGpuLayers(runtimeConfig?.gpuLayers),
165
+ systemPrompt: readString(runtimeConfig, "systemPrompt"),
166
+ batchSize: readNumber(runtimeConfig, "batchSize"),
167
+ flashAttention: readBoolean(runtimeConfig, "flashAttention"),
168
+ useMmap: readBoolean(runtimeConfig, "useMmap"),
169
+ useMlock: readBoolean(runtimeConfig, "useMlock")
131
170
  };
132
171
  }
133
172
  function readString(record, key) {
@@ -138,6 +177,14 @@ function readNumber(record, key) {
138
177
  const value = record?.[key];
139
178
  return typeof value === "number" && Number.isFinite(value) ? value : void 0;
140
179
  }
180
+ function readBoolean(record, key) {
181
+ const value = record?.[key];
182
+ return typeof value === "boolean" ? value : void 0;
183
+ }
184
+ function readGpuLayers(value) {
185
+ if (typeof value === "number" && Number.isFinite(value)) return value;
186
+ if (value === "auto" || value === "max") return value;
187
+ }
141
188
  async function disposeRuntimeNode(value) {
142
189
  await value?.dispose?.();
143
190
  }
@@ -229,4 +276,4 @@ function dedupeFiles(files) {
229
276
  }
230
277
 
231
278
  //#endregion
232
- export { LocalLlamaTranslatorFactory as n, createLocalLlamaTranslatorFactory as r, resolveGgufModelDownloadPlanFromRepositoryFiles as t };
279
+ export { probeLocalLlamaRuntimeModel as i, LocalLlamaTranslatorFactory as n, createLocalLlamaTranslatorFactory as r, resolveGgufModelDownloadPlanFromRepositoryFiles as t };
@@ -1,4 +1,5 @@
1
1
  import { z } from "zod";
2
+ import { runControlledTranslationTask } from "@openspecui/core/translator";
2
3
 
3
4
  //#region ../../node_modules/.pnpm/@tanstack+devtools-event-client@0.4.3/node_modules/@tanstack/devtools-event-client/dist/esm/plugin.js
4
5
  var EventClient = class {
@@ -13383,31 +13384,42 @@ var OpenAICompletionTranslator = class {
13383
13384
  this.options = options;
13384
13385
  }
13385
13386
  async *batchTranslate(inputs, options) {
13386
- const abortController = createAbortController(options?.signal);
13387
- const adapter = createConfiguredOpenAiAdapter({
13388
- model: this.options.model,
13389
- token: this.options.token,
13390
- baseUrl: this.options.baseUrl
13391
- });
13392
- for (const [index, source] of inputs.entries()) yield {
13393
- index,
13394
- output: (await chat({
13395
- adapter,
13396
- stream: false,
13397
- temperature: 0,
13398
- abortController,
13399
- systemPrompts: [[
13400
- "You are a translation engine.",
13401
- `Translate from ${this.options.sourceLanguage} to ${this.options.targetLanguage}.`,
13402
- options?.instructions ?? "Translate the source accurately.",
13403
- "Return only the translated source without commentary."
13404
- ].filter(Boolean).join("\n")],
13405
- messages: [{
13406
- role: "user",
13407
- content: [options?.context ? `<context>\n${options.context}\n</context>` : "", `<source>\n${source}\n</source>`].filter(Boolean).join("\n\n")
13408
- }]
13409
- })).trim()
13410
- };
13387
+ for (const [index, source] of inputs.entries()) {
13388
+ const controlled = await runControlledTranslationTask(async (signal) => {
13389
+ const abortController = createAbortController(signal);
13390
+ return (await chat({
13391
+ adapter: createConfiguredOpenAiAdapter({
13392
+ model: this.options.model,
13393
+ token: this.options.token,
13394
+ baseUrl: this.options.baseUrl
13395
+ }),
13396
+ stream: false,
13397
+ temperature: 0,
13398
+ abortController,
13399
+ systemPrompts: [[
13400
+ "You are a translation engine.",
13401
+ `Translate from ${this.options.sourceLanguage} to ${this.options.targetLanguage}.`,
13402
+ options?.instructions ?? "Translate the source accurately.",
13403
+ "Return only the translated source without commentary."
13404
+ ].filter(Boolean).join("\n")],
13405
+ messages: [{
13406
+ role: "user",
13407
+ content: [options?.context ? `<context>\n${options.context}\n</context>` : "", `<source>\n${source}\n</source>`].filter(Boolean).join("\n\n")
13408
+ }]
13409
+ })).trim();
13410
+ }, options);
13411
+ if (controlled.ok) {
13412
+ yield {
13413
+ index,
13414
+ output: controlled.value
13415
+ };
13416
+ continue;
13417
+ }
13418
+ yield {
13419
+ index,
13420
+ error: controlled.error
13421
+ };
13422
+ }
13411
13423
  }
13412
13424
  };
13413
13425
  function createAbortController(signal) {
@@ -0,0 +1,3 @@
1
+ import { i as probeLocalLlamaRuntimeModel, n as LocalLlamaTranslatorFactory, r as createLocalLlamaTranslatorFactory, t as resolveGgufModelDownloadPlanFromRepositoryFiles } from "./src-CZfiVL_-.mjs";
2
+
3
+ export { createLocalLlamaTranslatorFactory, probeLocalLlamaRuntimeModel };
@@ -1,5 +1,6 @@
1
1
  import { join } from "node:path";
2
2
  import { readFile } from "node:fs/promises";
3
+ import { runControlledTranslationTask } from "@openspecui/core/translator";
3
4
  import "@openspecui/core/local-download-profiles";
4
5
 
5
6
  //#region ../local-translator/src/index.ts
@@ -27,18 +28,28 @@ var LocalTranslator = class {
27
28
  this.languages = languages;
28
29
  }
29
30
  async *batchTranslate(inputs, options) {
30
- throwIfAborted(options?.signal);
31
- const result = await this.pipeline(inputs, {
32
- src_lang: this.languages.sourceLanguage,
33
- tgt_lang: this.languages.targetLanguage,
34
- signal: options?.signal
35
- });
36
- throwIfAborted(options?.signal);
37
- const outputs = readTranslatedOutputs(result, inputs.length);
38
- for (const [index, output] of outputs.entries()) yield {
39
- index,
40
- output
41
- };
31
+ for (const [index, input] of inputs.entries()) {
32
+ const controlled = await runControlledTranslationTask(async (signal) => {
33
+ const result = await this.pipeline(input, {
34
+ src_lang: this.languages.sourceLanguage,
35
+ tgt_lang: this.languages.targetLanguage,
36
+ signal
37
+ });
38
+ throwIfAborted(signal);
39
+ return readTranslatedOutputs(result, 1)[0] ?? "";
40
+ }, options);
41
+ if (controlled.ok) {
42
+ yield {
43
+ index,
44
+ output: controlled.value
45
+ };
46
+ continue;
47
+ }
48
+ yield {
49
+ index,
50
+ error: controlled.error
51
+ };
52
+ }
42
53
  }
43
54
  destroy() {
44
55
  this.pipeline.dispose?.();
@@ -1,4 +1,5 @@
1
1
  import { join, posix } from "node:path";
2
+ import { runControlledTranslationTask } from "@openspecui/core/translator";
2
3
 
3
4
  //#region ../local-ct2-translator/src/ct2-download-plan.ts
4
5
  const CT2_REQUIRED_FILE_NAMES = [
@@ -154,18 +155,30 @@ var LocalCt2Translator = class {
154
155
  this.factoryOptions = factoryOptions;
155
156
  }
156
157
  async *batchTranslate(inputs, options) {
157
- throwIfAborted(options?.signal);
158
- const result = await this.translator.translateBatch(inputs, {
159
- beamSize: this.runtimeConfig.beamSize ?? this.factoryOptions.beamSize,
160
- maxBatchSize: this.runtimeConfig.maxBatchSize ?? this.factoryOptions.maxBatchSize,
161
- returnScores: false
162
- });
163
- throwIfAborted(options?.signal);
164
- if (result.length !== inputs.length) throw new Error(`CT2 translator returned ${result.length} outputs for ${inputs.length} inputs.`);
165
- for (const [index, entry] of result.entries()) yield {
166
- index,
167
- output: entry.text
168
- };
158
+ for (const [index, input] of inputs.entries()) {
159
+ const controlled = await runControlledTranslationTask(async (signal) => {
160
+ throwIfAborted(signal);
161
+ const result = await this.translator.translateBatch([input], {
162
+ beamSize: this.runtimeConfig.beamSize ?? this.factoryOptions.beamSize,
163
+ maxBatchSize: this.runtimeConfig.maxBatchSize ?? this.factoryOptions.maxBatchSize,
164
+ returnScores: false
165
+ });
166
+ throwIfAborted(signal);
167
+ if (result.length !== 1) throw new Error(`CT2 translator returned ${result.length} outputs for 1 input.`);
168
+ return result[0]?.text ?? "";
169
+ }, options);
170
+ if (controlled.ok) {
171
+ yield {
172
+ index,
173
+ output: controlled.value
174
+ };
175
+ continue;
176
+ }
177
+ yield {
178
+ index,
179
+ error: controlled.error
180
+ };
181
+ }
169
182
  }
170
183
  };
171
184
  async function createRuntimeTranslator(options, modelPath, runtimeConfig) {
@@ -1,3 +1,3 @@
1
- import { a as resolveCt2ModelDownloadPlanFromRepositoryFiles, i as CT2_REQUIRED_FILE_NAMES, n as createLocalCt2TranslatorFactory, r as CT2_OPTIONAL_FILE_NAMES, t as LocalCt2TranslatorFactory } from "./src-BJ-K9Dp2.mjs";
1
+ import { a as resolveCt2ModelDownloadPlanFromRepositoryFiles, i as CT2_REQUIRED_FILE_NAMES, n as createLocalCt2TranslatorFactory, r as CT2_OPTIONAL_FILE_NAMES, t as LocalCt2TranslatorFactory } from "./src-Dh_UAz5C.mjs";
2
2
 
3
3
  export { createLocalCt2TranslatorFactory };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@openspecui/server",
3
- "version": "3.11.3",
3
+ "version": "3.11.4",
4
4
  "type": "module",
5
5
  "main": "dist/index.mjs",
6
6
  "exports": {
@@ -24,8 +24,8 @@
24
24
  "@hono/node-server": "^1.14.1",
25
25
  "@huggingface/hub": "^2.12.0",
26
26
  "@lydell/node-pty": "^1.1.0",
27
- "@openspecui/core": "3.11.3",
28
- "@openspecui/search": "3.11.3",
27
+ "@openspecui/core": "3.11.4",
28
+ "@openspecui/search": "3.11.4",
29
29
  "@trpc/server": "^11.0.0",
30
30
  "better-sqlite3": "^12.5.0",
31
31
  "hono": "^4.7.3",
@@ -1,3 +0,0 @@
1
- import { n as LocalLlamaTranslatorFactory, r as createLocalLlamaTranslatorFactory, t as resolveGgufModelDownloadPlanFromRepositoryFiles } from "./src-awZ9aP1s.mjs";
2
-
3
- export { createLocalLlamaTranslatorFactory };