@auxot/worker-cli 0.3.3 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -16,7 +16,7 @@ npx @auxot/worker-cli --gpu-key YOUR_GPU_KEY
16
16
 
17
17
  # Or install globally
18
18
  npm install -g @auxot/worker-cli
19
- auxot-worker-cli --gpu-key YOUR_GPU_KEY
19
+ worker-cli --gpu-key YOUR_GPU_KEY
20
20
  ```
21
21
 
22
22
  ## Quick Start
package/dist/index.js CHANGED
@@ -458,6 +458,10 @@ var WebSocketConnection = class {
458
458
  this.isReconnecting = false;
459
459
  this.retryDelay = INITIAL_RETRY_DELAY;
460
460
  this.startHeartbeat();
461
+ if (this.capabilities && this.capabilities.ctx_size > 0) {
462
+ console.log(" Re-sending config after reconnection...");
463
+ this.sendConfig(this.capabilities);
464
+ }
461
465
  resolve();
462
466
  } else {
463
467
  const errorMessage = message.error || "Authentication failed";
@@ -807,7 +811,7 @@ async function processJob(job, llamaUrl, capabilities, abortSignal, onToken) {
807
811
  try {
808
812
  while (true) {
809
813
  if (abortSignal.aborted) {
810
- break;
814
+ throw new Error("Job cancelled by user");
811
815
  }
812
816
  const { done, value } = await reader.read();
813
817
  if (done)
@@ -817,8 +821,9 @@ async function processJob(job, llamaUrl, capabilities, abortSignal, onToken) {
817
821
  parser.feed(text);
818
822
  }
819
823
  } catch (error) {
820
- if (error instanceof Error && error.name === "AbortError") {
821
- console.log(`[Job ${job.job_id}] Cancelled - returning partial response`);
824
+ if (error instanceof Error && (error.name === "AbortError" || error.message === "Job cancelled by user")) {
825
+ console.log(`[Job ${job.job_id}] Cancelled`);
826
+ throw error;
822
827
  } else {
823
828
  throw error;
824
829
  }
@@ -1193,11 +1198,12 @@ async function spawnLlamaCpp(options) {
1193
1198
  host,
1194
1199
  gpuLayers
1195
1200
  };
1201
+ const totalContextSize = contextSize * parallelism;
1196
1202
  const args2 = [
1197
1203
  "--model",
1198
1204
  modelPath,
1199
1205
  "--ctx-size",
1200
- contextSize.toString(),
1206
+ totalContextSize.toString(),
1201
1207
  "--parallel",
1202
1208
  parallelism.toString(),
1203
1209
  "--port",
@@ -1214,7 +1220,8 @@ async function spawnLlamaCpp(options) {
1214
1220
  if (gpuLayers !== void 0) {
1215
1221
  args2.push("--n-gpu-layers", gpuLayers.toString());
1216
1222
  }
1217
- console.log(`[llama.cpp] Spawning process: ${binaryPath} ${args2.join(" ")}`);
1223
+ console.log(`[llama.cpp] Spawning with ${parallelism} parallel slots, ${contextSize.toLocaleString()} context per slot (${totalContextSize.toLocaleString()} total)`);
1224
+ console.log(`[llama.cpp] Command: ${binaryPath} ${args2.join(" ")}`);
1218
1225
  const childProcess = spawn(binaryPath, args2, {
1219
1226
  stdio: ["ignore", "pipe", "pipe"],
1220
1227
  // stdin: ignore, stdout/stderr: pipe
@@ -5885,7 +5892,6 @@ async function main() {
5885
5892
  wsConnection.sendToken(job.job_id, token);
5886
5893
  }
5887
5894
  );
5888
- const wasCancelled = abortController.signal.aborted;
5889
5895
  wsConnection.sendComplete(
5890
5896
  job.job_id,
5891
5897
  result.fullResponse,
@@ -5894,14 +5900,15 @@ async function main() {
5894
5900
  result.outputTokens,
5895
5901
  result.tool_calls
5896
5902
  );
5897
- if (!wasCancelled) {
5898
- }
5899
5903
  } catch (error) {
5900
- console.error(`\u2717 Job ${job.job_id} failed:`, error);
5901
- wsConnection.sendError(
5902
- job.job_id,
5903
- error instanceof Error ? error.message : "Unknown error"
5904
- );
5904
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
5905
+ if (errorMessage === "Job cancelled by user" || error instanceof Error && error.name === "AbortError") {
5906
+ console.log(`[Job ${job.job_id}] Cancelled by user`);
5907
+ wsConnection.sendError(job.job_id, "Job cancelled by user");
5908
+ } else {
5909
+ console.error(`\u2717 Job ${job.job_id} failed:`, error);
5910
+ wsConnection.sendError(job.job_id, errorMessage);
5911
+ }
5905
5912
  } finally {
5906
5913
  activeJobs.delete(job.job_id);
5907
5914
  }