@flotorch/loadtest 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +4 -4
  2. package/dist/index.js +22 -18
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -41,7 +41,7 @@ This launches an interactive wizard that asks for:
41
41
 
42
42
  - **Provider adapter** — `openai` or `sagemaker` (default: `openai`)
43
43
  - **Model name** — the model identifier your endpoint expects
44
- - **Base URL** — API endpoint (default: `https://api.openai.com/v1/chat/completions`)
44
+ - **Base URL** — API endpoint (default: `https://api.openai.com/v1`)
45
45
  - **Concurrency** — number of parallel requests (default: `10`)
46
46
  - **Input tokens mean** — average input token count per request (default: `512`)
47
47
  - **Output tokens mean** — average output token count per request (default: `256`)
@@ -129,7 +129,7 @@ The config file is JSON with four sections:
129
129
  "provider": {
130
130
  "adapter": "openai", // "openai" | "sagemaker"
131
131
  "model": "gpt-4o", // model identifier (required)
132
- "baseURL": "https://api.openai.com/v1/chat/completions", // API endpoint
132
+ "baseURL": "https://api.openai.com/v1", // API endpoint
133
133
  "systemPrompt": "You are a helpful assistant.", // optional system message
134
134
  "config": {}, // backend-specific options
135
135
  },
@@ -199,7 +199,7 @@ The config file is JSON with four sections:
199
199
  "provider": {
200
200
  "adapter": "openai",
201
201
  "model": "gpt-4o",
202
- "baseURL": "https://api.openai.com/v1/chat/completions"
202
+ "baseURL": "https://api.openai.com/v1"
203
203
  },
204
204
  "benchmark": {
205
205
  "concurrency": 20,
@@ -239,7 +239,7 @@ The config file is JSON with four sections:
239
239
  "provider": {
240
240
  "adapter": "openai",
241
241
  "model": "gpt-4o-mini",
242
- "baseURL": "https://api.openai.com/v1/chat/completions"
242
+ "baseURL": "https://api.openai.com/v1"
243
243
  },
244
244
  "benchmark": {
245
245
  "concurrency": 10,
package/dist/index.js CHANGED
@@ -455,7 +455,7 @@ var cyan = wrap("36", "39");
455
455
  var magenta = wrap("35", "39");
456
456
 
457
457
  // src/cli/args.ts
458
- var VERSION = true ? "0.2.1" : "dev";
458
+ var VERSION = true ? "0.2.3" : "dev";
459
459
  var VALID_COMMANDS = /* @__PURE__ */ new Set(["run", "generate", "bench", "report", "init"]);
460
460
  var HELP_TEXT = `
461
461
  ${bold("FLOTorch Load Tester")} ${dim(`v${VERSION}`)}
@@ -628,8 +628,8 @@ async function runInit(outputPath) {
628
628
  }
629
629
  let baseURL;
630
630
  if (adapter === "openai") {
631
- const url = await prompt(rl, "Base URL", "https://api.openai.com/v1/chat/completions");
632
- if (url !== "https://api.openai.com/v1/chat/completions") {
631
+ const url = await prompt(rl, "Base URL", "https://api.openai.com/v1");
632
+ if (url !== "https://api.openai.com/v1") {
633
633
  baseURL = url;
634
634
  }
635
635
  }
@@ -868,11 +868,11 @@ var OpenAIBackend = class _OpenAIBackend {
868
868
  apiKey;
869
869
  static create(baseURL) {
870
870
  const env = validateEnv(EnvSchema, "openai");
871
- const url = baseURL ?? "https://api.openai.com/v1/chat/completions";
871
+ const url = baseURL ?? "https://api.openai.com/v1";
872
872
  return new _OpenAIBackend(url, env.OPENAI_API_KEY);
873
873
  }
874
874
  constructor(baseURL, apiKey) {
875
- this.url = baseURL;
875
+ this.url = baseURL.endsWith("/") ? baseURL : `${baseURL.replace(/\/+$/, "")}/chat/completions`;
876
876
  this.apiKey = apiKey;
877
877
  }
878
878
  async request(prompt2, model, maxTokens, systemPrompt, params, streaming, signal) {
@@ -882,12 +882,13 @@ var OpenAIBackend = class _OpenAIBackend {
882
882
  }
883
883
  messages.push({ role: "user", content: prompt2 });
884
884
  const body = {
885
- model,
886
885
  messages,
887
886
  stream: streaming,
888
- ...params
887
+ ...params,
888
+ model,
889
+ max_tokens: maxTokens
889
890
  };
890
- if (body.max_tokens && this.isOpenAIHost()) {
891
+ if (this.isOpenAIHost()) {
891
892
  body.max_completion_tokens = body.max_tokens;
892
893
  delete body.max_tokens;
893
894
  }
@@ -1017,7 +1018,7 @@ var SageMakerBackend = class _SageMakerBackend {
1017
1018
  }
1018
1019
  constructor(config) {
1019
1020
  this.baseURL = config.baseURL ?? `https://runtime.sagemaker.${config.region}.amazonaws.com`;
1020
- this.requestFormat = config.requestFormat ?? "sagemaker" /* Sagemaker */;
1021
+ this.requestFormat = config.requestFormat ?? "openai" /* OpenAI */;
1021
1022
  this.signer = new SignatureV4({
1022
1023
  service: "sagemaker",
1023
1024
  region: config.region,
@@ -1068,22 +1069,25 @@ var SageMakerBackend = class _SageMakerBackend {
1068
1069
  return this.parseResponse(response);
1069
1070
  }
1070
1071
  buildRequestBody(prompt2, maxTokens, systemPrompt, params, streaming) {
1071
- const messages = [];
1072
- if (systemPrompt) messages.push({ role: "system", content: systemPrompt });
1073
- messages.push({ role: "user", content: prompt2 });
1074
1072
  if (this.requestFormat === "openai" /* OpenAI */) {
1073
+ const messages = [];
1074
+ if (systemPrompt) messages.push({ role: "system", content: systemPrompt });
1075
+ messages.push({ role: "user", content: prompt2 });
1075
1076
  return {
1076
1077
  messages,
1077
- max_tokens: maxTokens,
1078
1078
  stream: streaming,
1079
- ...params
1079
+ ...params,
1080
+ max_tokens: maxTokens
1080
1081
  };
1081
1082
  }
1083
+ const rawPrompt = systemPrompt ? `${systemPrompt}
1084
+
1085
+ ${prompt2}` : prompt2;
1082
1086
  return {
1083
- inputs: [messages],
1087
+ inputs: rawPrompt,
1084
1088
  parameters: {
1085
- max_new_tokens: maxTokens,
1086
- ...params
1089
+ ...params,
1090
+ max_new_tokens: maxTokens
1087
1091
  }
1088
1092
  };
1089
1093
  }
@@ -1262,7 +1266,7 @@ function createBackend(config) {
1262
1266
  case "openai":
1263
1267
  return OpenAIBackend.create(baseURL);
1264
1268
  case "sagemaker": {
1265
- const requestFormat = config.provider.config?.["requestFormat"] ?? "sagemaker" /* Sagemaker */;
1269
+ const requestFormat = config.provider.config?.["requestFormat"] ?? "openai" /* OpenAI */;
1266
1270
  return SageMakerBackend.create(baseURL, requestFormat);
1267
1271
  }
1268
1272
  default:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flotorch/loadtest",
3
- "version": "0.2.1",
3
+ "version": "0.2.3",
4
4
  "description": "LLM inference load testing and benchmarking tool",
5
5
  "license": "MIT",
6
6
  "repository": {