bailian-cli-core 1.1.3 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -4,6 +4,9 @@ import { homedir } from "os";
4
4
  import { basename, join } from "path";
5
5
  import { stringify } from "yaml";
6
6
  import { createHash, createHmac, randomUUID } from "crypto";
7
+ import { cpSync, existsSync as existsSync$1, mkdirSync as mkdirSync$1, readFileSync as readFileSync$1, readdirSync, writeFileSync as writeFileSync$1 } from "node:fs";
8
+ import { dirname, join as join$1 } from "node:path";
9
+ import { fileURLToPath } from "node:url";
7
10
  //#region \0rolldown/runtime.js
8
11
  var __create = Object.create;
9
12
  var __defProp = Object.defineProperty;
@@ -111,6 +114,20 @@ const VALID_REGIONS = new Set([
111
114
  "intl"
112
115
  ]);
113
116
  const VALID_OUTPUTS = new Set(["text", "json"]);
117
+ /**
118
+ * A syntactically valid absolute http(s) URL. Used to validate `base_url` and
119
+ * `console_gateway_url` from the config file: the credential-bearing client
120
+ * sends the Bearer token to these origins, so a bare `startsWith("http")` check
121
+ * (which also accepts e.g. "httpfoo://…") is too loose.
122
+ */
123
+ function isHttpUrl(value) {
124
+ try {
125
+ const u = new URL(value);
126
+ return u.protocol === "http:" || u.protocol === "https:";
127
+ } catch {
128
+ return false;
129
+ }
130
+ }
114
131
  function parseConfigFile(raw) {
115
132
  if (!raw || typeof raw !== "object" || Array.isArray(raw)) return {};
116
133
  const obj = raw;
@@ -119,7 +136,7 @@ function parseConfigFile(raw) {
119
136
  if (typeof obj.access_token === "string" && obj.access_token.length > 0) out.access_token = obj.access_token;
120
137
  else if (typeof obj.accessToken === "string" && obj.accessToken.length > 0) out.access_token = obj.accessToken;
121
138
  if (typeof obj.region === "string" && VALID_REGIONS.has(obj.region)) out.region = obj.region;
122
- if (typeof obj.base_url === "string" && obj.base_url.startsWith("http")) out.base_url = obj.base_url;
139
+ if (typeof obj.base_url === "string" && isHttpUrl(obj.base_url)) out.base_url = obj.base_url;
123
140
  if (typeof obj.output === "string" && VALID_OUTPUTS.has(obj.output)) out.output = obj.output;
124
141
  if (typeof obj.output_dir === "string" && obj.output_dir.length > 0) out.output_dir = obj.output_dir;
125
142
  if (typeof obj.timeout === "number" && obj.timeout > 0) out.timeout = obj.timeout;
@@ -131,7 +148,7 @@ function parseConfigFile(raw) {
131
148
  if (typeof obj.access_key_id === "string" && obj.access_key_id.length > 0) out.access_key_id = obj.access_key_id;
132
149
  if (typeof obj.access_key_secret === "string" && obj.access_key_secret.length > 0) out.access_key_secret = obj.access_key_secret;
133
150
  if (typeof obj.workspace_id === "string" && obj.workspace_id.length > 0) out.workspace_id = obj.workspace_id;
134
- if (typeof obj.console_gateway_url === "string" && obj.console_gateway_url.startsWith("http")) out.console_gateway_url = obj.console_gateway_url;
151
+ if (typeof obj.console_gateway_url === "string" && isHttpUrl(obj.console_gateway_url)) out.console_gateway_url = obj.console_gateway_url;
135
152
  if (typeof obj.telemetry === "boolean") out.telemetry = obj.telemetry;
136
153
  return out;
137
154
  }
@@ -150,10 +167,14 @@ function getCredentialsPath() {
150
167
  }
151
168
  async function ensureConfigDir() {
152
169
  const dir = getConfigDir();
153
- await (await import("fs/promises")).mkdir(dir, {
170
+ const fs = await import("fs/promises");
171
+ await fs.mkdir(dir, {
154
172
  recursive: true,
155
173
  mode: 448
156
174
  });
175
+ try {
176
+ await fs.chmod(dir, 448);
177
+ } catch {}
157
178
  }
158
179
  //#endregion
159
180
  //#region src/output/text.ts
@@ -240,7 +261,7 @@ function loadConfig(flags) {
240
261
  accessKeyId: process.env.ALIBABA_CLOUD_ACCESS_KEY_ID || file.access_key_id || void 0,
241
262
  accessKeySecret: process.env.ALIBABA_CLOUD_ACCESS_KEY_SECRET || file.access_key_secret || void 0,
242
263
  workspaceId: process.env.BAILIAN_WORKSPACE_ID || file.workspace_id || void 0,
243
- consoleGatewayUrl: process.env.BAILIAN_CONSOLE_GATEWAY_URL || file.console_gateway_url || "https://bailian-cs.console.aliyun.com",
264
+ consoleGatewayUrl: process.env.BAILIAN_CONSOLE_GATEWAY_URL || file.console_gateway_url || "https://pre-bailian-cs.console.aliyun.com",
244
265
  verbose: flags.verbose || process.env.DASHSCOPE_VERBOSE === "1",
245
266
  quiet: flags.quiet || false,
246
267
  noColor: flags.noColor || process.env.NO_COLOR !== void 0 || !process.stdout.isTTY,
@@ -401,10 +422,10 @@ function videoGenerateEndpoint(baseUrl) {
401
422
  return `${baseUrl}/api/v1/services/aigc/video-generation/video-synthesis`;
402
423
  }
403
424
  function taskEndpoint(baseUrl, taskId) {
404
- return `${baseUrl}/api/v1/tasks/${taskId}`;
425
+ return `${baseUrl}/api/v1/tasks/${encodeURIComponent(taskId)}`;
405
426
  }
406
427
  function appCompletionEndpoint(baseUrl, appId) {
407
- return `${baseUrl}/api/v1/apps/${appId}/completion`;
428
+ return `${baseUrl}/api/v1/apps/${encodeURIComponent(appId)}/completion`;
408
429
  }
409
430
  function memoryAddEndpoint(baseUrl) {
410
431
  return `${baseUrl}/api/v2/apps/memory/add`;
@@ -416,7 +437,7 @@ function memoryListEndpoint(baseUrl) {
416
437
  return `${baseUrl}/api/v2/apps/memory/memory_nodes`;
417
438
  }
418
439
  function memoryNodeEndpoint(baseUrl, nodeId) {
419
- return `${baseUrl}/api/v2/apps/memory/memory_nodes/${nodeId}`;
440
+ return `${baseUrl}/api/v2/apps/memory/memory_nodes/${encodeURIComponent(nodeId)}`;
420
441
  }
421
442
  function speechSynthesizeEndpoint(baseUrl) {
422
443
  return `${baseUrl}/api/v1/services/audio/tts/SpeechSynthesizer`;
@@ -428,7 +449,7 @@ function profileSchemaEndpoint(baseUrl) {
428
449
  return `${baseUrl}/api/v2/apps/memory/profile_schemas`;
429
450
  }
430
451
  function userProfileEndpoint(baseUrl, schemaId) {
431
- return `${baseUrl}/api/v2/apps/memory/profile_schemas/${schemaId}/profiles`;
452
+ return `${baseUrl}/api/v2/apps/memory/profile_schemas/${encodeURIComponent(schemaId)}/profiles`;
432
453
  }
433
454
  function mcpWebSearchEndpoint(baseUrl) {
434
455
  return `${baseUrl}/api/v1/mcps/WebSearch/mcp`;
@@ -456,6 +477,11 @@ function trackingHeaders() {
456
477
  return { "x-dashscope-source-config": SOURCE_CONFIG };
457
478
  }
458
479
  //#endregion
480
+ //#region src/utils/token.ts
481
+ function maskToken(token) {
482
+ return token.length > 8 ? `${token.slice(0, 4)}...${token.slice(-4)}` : "***";
483
+ }
484
+ //#endregion
459
485
  //#region src/client/http.ts
460
486
  /**
461
487
  * Bailian requires `X-DashScope-OssResourceResolve: enable` on any request whose body
@@ -482,7 +508,7 @@ async function request(config, opts) {
482
508
  headers["Authorization"] = `Bearer ${credential.token}`;
483
509
  if (config.verbose) {
484
510
  console.error(`> ${opts.method ?? "GET"} ${opts.url}`);
485
- console.error(`> Auth: ${credential.token.slice(0, 8)}...`);
511
+ console.error(`> Auth: ${maskToken(credential.token)}`);
486
512
  console.error(`> x-dashscope-source-config: ${SOURCE_CONFIG}`);
487
513
  }
488
514
  }
@@ -539,19 +565,25 @@ async function requestJson(config, opts) {
539
565
  }
540
566
  //#endregion
541
567
  //#region src/client/mcp.ts
568
+ /**
569
+ * Compose the streamable-HTTP MCP endpoint for a Bailian MCP server.
570
+ * The path is `/api/v1/mcps/<serverCode>/mcp`; the `serverCode` is taken
571
+ * verbatim from `bl mcp list` (e.g. `WebSearch`, `market-cmapi00073529`).
572
+ */
573
+ function bailianMcpUrl(baseUrl, serverCode) {
574
+ return `${baseUrl.replace(/\/$/, "")}/api/v1/mcps/${serverCode}/mcp`;
575
+ }
542
576
  var McpClient = class {
543
- baseUrl;
577
+ url;
544
578
  sessionId;
545
579
  nextId = 1;
546
580
  config;
547
581
  authToken;
548
- constructor(config, baseUrl) {
582
+ constructor(config, url) {
549
583
  this.config = config;
550
- this.baseUrl = baseUrl;
584
+ this.url = url;
551
585
  }
552
- /**
553
- * Initialize the MCP session. Must be called before any other method.
554
- */
586
+ /** Initialize the MCP session. Must be called before any other method. */
555
587
  async initialize() {
556
588
  const credential = await resolveCredential(this.config);
557
589
  this.authToken = credential.token;
@@ -569,15 +601,9 @@ var McpClient = class {
569
601
  }
570
602
  await this.notify("notifications/initialized");
571
603
  }
572
- /**
573
- * List available tools from the MCP server.
574
- */
575
604
  async listTools() {
576
605
  return (await this.rpc("tools/list")).tools || [];
577
606
  }
578
- /**
579
- * Call a tool on the MCP server.
580
- */
581
607
  async callTool(name, args) {
582
608
  return await this.rpc("tools/call", {
583
609
  name,
@@ -613,11 +639,11 @@ var McpClient = class {
613
639
  if (this.authToken) headers["Authorization"] = `Bearer ${this.authToken}`;
614
640
  if (this.sessionId) headers["Mcp-Session-Id"] = this.sessionId;
615
641
  if (this.config.verbose) {
616
- console.error(`> POST ${this.baseUrl}`);
642
+ console.error(`> POST ${this.url}`);
617
643
  console.error(`> Method: ${body.method}`);
618
644
  }
619
645
  const timeoutMs = this.config.timeout * 1e3;
620
- const res = await fetch(this.baseUrl, {
646
+ const res = await fetch(this.url, {
621
647
  method: "POST",
622
648
  headers,
623
649
  body: JSON.stringify(body),
@@ -644,11 +670,13 @@ async function* parseSSE(response) {
644
670
  if (!reader) return;
645
671
  const decoder = new TextDecoder();
646
672
  let buffer = "";
673
+ const MAX_SSE_BUFFER = 16 * 1024 * 1024;
647
674
  try {
648
675
  while (true) {
649
676
  const { done, value } = await reader.read();
650
677
  if (done) break;
651
678
  buffer += decoder.decode(value, { stream: true });
679
+ if (buffer.length > MAX_SSE_BUFFER) throw new BailianError("SSE stream exceeded the maximum buffer size.", ExitCode.GENERAL);
652
680
  const lines = buffer.split("\n");
653
681
  buffer = lines.pop() || "";
654
682
  let event = {};
@@ -670,6 +698,7 @@ async function* parseSSE(response) {
670
698
  switch (field) {
671
699
  case "data":
672
700
  event.data = event.data !== void 0 ? `${event.data}\n${value}` : value;
701
+ if (event.data.length > MAX_SSE_BUFFER) throw new BailianError("SSE event exceeded the maximum buffer size.", ExitCode.GENERAL);
673
702
  break;
674
703
  case "event":
675
704
  event.event = value;
@@ -740,6 +769,37 @@ async function callConsoleGateway(config, token, { api, data, region = "cn-beiji
740
769
  return res.json();
741
770
  }
742
771
  //#endregion
772
+ //#region src/console/models.ts
773
+ const MODEL_LIST_API = "zeldaHttp.dashscopeModel./zelda/api/v1/modelCenter/listFoundationModels";
774
+ async function fetchModelList(config, token, params = {}) {
775
+ const { pageNo = 1, pageSize = 50, name = "", providers = [], capabilities = [], region = "cn-beijing" } = params;
776
+ const result = await callConsoleGateway(config, token, {
777
+ api: MODEL_LIST_API,
778
+ data: { input: {
779
+ pageNo,
780
+ pageSize,
781
+ name,
782
+ providers,
783
+ inferenceProviders: [],
784
+ features: [],
785
+ group: true,
786
+ capabilities,
787
+ contextWindows: []
788
+ } },
789
+ region
790
+ });
791
+ const responseData = result?.data?.DataV2?.data ?? result?.data ?? {};
792
+ const total = responseData?.data?.total ?? responseData?.total ?? 0;
793
+ const groups = responseData?.data?.list ?? responseData?.list ?? [];
794
+ const models = [];
795
+ for (const group of groups) if (group.items?.length) for (const item of group.items) models.push(item);
796
+ else models.push(group);
797
+ return {
798
+ total,
799
+ models
800
+ };
801
+ }
802
+ //#endregion
743
803
  //#region src/files/upload.ts
744
804
  /**
745
805
  * Upload local files to DashScope temporary OSS storage.
@@ -999,11 +1059,6 @@ function generateToolSchema(cmd) {
999
1059
  return schema;
1000
1060
  }
1001
1061
  //#endregion
1002
- //#region src/utils/token.ts
1003
- function maskToken(token) {
1004
- return token.length > 8 ? `${token.slice(0, 4)}...${token.slice(-4)}` : "***";
1005
- }
1006
- //#endregion
1007
1062
  //#region src/utils/env.ts
1008
1063
  /**
1009
1064
  * Environment detection utilities for bailian-cli.
@@ -1053,6 +1108,35 @@ function stripUndefined(obj) {
1053
1108
  return obj;
1054
1109
  }
1055
1110
  //#endregion
1111
+ //#region src/utils/boolean-flag.ts
1112
+ /** Parse true/false from CLI flags (e.g. `--watermark <bool>`). */
1113
+ function parseBooleanValue(value, label = "boolean") {
1114
+ if (typeof value === "boolean") return value;
1115
+ if (typeof value === "string") {
1116
+ const v = value.trim().toLowerCase();
1117
+ if (v === "true") return true;
1118
+ if (v === "false") return false;
1119
+ }
1120
+ throw new BailianError(`Invalid ${label} value "${String(value)}". Use true or false.`, ExitCode.USAGE);
1121
+ }
1122
+ function parseOptionalBooleanValue(value, label = "boolean") {
1123
+ if (value === void 0 || value === null) return void 0;
1124
+ return parseBooleanValue(value, label);
1125
+ }
1126
+ /**
1127
+ * Resolve a tri-state boolean CLI flag (`--name <bool>`).
1128
+ * Returns `defaultWhenUnset` when the flag is omitted.
1129
+ */
1130
+ function resolveBooleanFlag(flagValue, defaultWhenUnset, label = "boolean") {
1131
+ const fromFlag = parseOptionalBooleanValue(flagValue, label);
1132
+ if (fromFlag !== void 0) return fromFlag;
1133
+ return defaultWhenUnset;
1134
+ }
1135
+ /** Resolve `--watermark` flag; default true when unset. */
1136
+ function resolveWatermark(flagValue) {
1137
+ return parseOptionalBooleanValue(flagValue, "watermark") ?? true;
1138
+ }
1139
+ //#endregion
1056
1140
  //#region src/telemetry/event.ts
1057
1141
  function createTrackingEvent(opts) {
1058
1142
  const event = {
@@ -1801,9 +1885,7 @@ var require_tracker = /* @__PURE__ */ __commonJSMin(((exports, module) => {
1801
1885
  gokey: encodeURIComponent(e),
1802
1886
  gmkey: "EXP"
1803
1887
  })
1804
- }).catch(function(e) {
1805
- console.warn("send fail", e);
1806
- });
1888
+ }).catch(function() {});
1807
1889
  }).catch(function(t) {
1808
1890
  n._offlineQueue.length > 500 && n._offlineQueue.shift(), n._offlineQueue.push(e);
1809
1891
  });
@@ -1946,7 +2028,7 @@ async function localSink(event) {
1946
2028
  try {
1947
2029
  if (statSync(path).size > MAX_FILE_SIZE) unlinkSync(path);
1948
2030
  } catch {}
1949
- appendFileSync(path, JSON.stringify(event) + "\n");
2031
+ appendFileSync(path, JSON.stringify(event) + "\n", { mode: 384 });
1950
2032
  } catch {}
1951
2033
  }
1952
2034
  async function remoteSink(event) {
@@ -2013,7 +2095,6 @@ const PARAM_ALLOWLIST = new Set([
2013
2095
  "noWait",
2014
2096
  "textOnly",
2015
2097
  "promptExtend",
2016
- "noPromptExtend",
2017
2098
  "enableSsml",
2018
2099
  "watermark",
2019
2100
  "hasThoughts",
@@ -2078,4 +2159,1155 @@ async function trackCommandExecution(config, commandPath, flags, fn) {
2078
2159
  }
2079
2160
  }
2080
2161
  //#endregion
2081
- export { BAILIAN_HOST, BailianError, CHANNEL, CONSOLE_GATEWAY_NO_TOKEN_MESSAGE, DOCS_HOSTS, ExitCode, GLOBAL_OPTIONS, McpClient, REGIONS, SOURCE_CONFIG, TAGS, appCompletionEndpoint, callConsoleGateway, chatEndpoint, clearApiKey, createTrackingEvent, defineCommand, detectOutputFormat, ensureConfigDir, flushTelemetry, formatErrorJson, formatJson, formatOutput, formatText, generateFilename, generateToolSchema, getConfigDir, getConfigPath, getCredentialsPath, imageEndpoint, imageSyncEndpoint, isCI, isInteractive, isLocalFile, loadApiKeyFromConfig, loadConfig, localSink, mapApiError, maskToken, mcpWebSearchEndpoint, memoryAddEndpoint, memoryListEndpoint, memoryNodeEndpoint, memorySearchEndpoint, parseConfigFile, parseSSE, profileSchemaEndpoint, readConfigFile, remoteSink, request, requestJson, resolveConsoleGatewayCredential, resolveCredential, resolveFileUrl, resolveOutputDir, saveApiKeyToConfig, signRequest, speechRecognizeEndpoint, speechSynthesizeEndpoint, stripUndefined, taskEndpoint, trackCommandExecution, trackingHeaders, uploadFile, userProfileEndpoint, videoGenerateEndpoint, writeConfigFile };
2162
+ //#region src/advisor/sources/api.ts
2163
+ const PAGE_SIZE = 50;
2164
+ function toModelProfile(item) {
2165
+ if (!item.model) return null;
2166
+ const meta = item.inferenceMetadata;
2167
+ return {
2168
+ model: item.model,
2169
+ name: item.name ?? item.model,
2170
+ description: item.description ?? item.shortDescription ?? "",
2171
+ shortDescription: item.shortDescription,
2172
+ provider: item.provider ?? "",
2173
+ capabilities: item.capabilities ?? [],
2174
+ features: item.features ?? [],
2175
+ category: item.category,
2176
+ contextWindow: item.contextWindow ?? void 0,
2177
+ maxOutputTokens: item.maxOutputTokens ?? void 0,
2178
+ maxInputTokens: item.maxInputTokens ?? void 0,
2179
+ docUrl: item.docUrl,
2180
+ collectionTag: item.collectionTag,
2181
+ inferenceMetadata: meta,
2182
+ prices: item.prices,
2183
+ qpmInfo: item.qpmInfo,
2184
+ versionTag: item.versionTag,
2185
+ openSource: item.openSource
2186
+ };
2187
+ }
2188
+ var ApiSource = class {
2189
+ name = "api";
2190
+ constructor(config) {
2191
+ this.config = config;
2192
+ }
2193
+ available() {
2194
+ return true;
2195
+ }
2196
+ async load() {
2197
+ const first = await fetchModelList(this.config, "", {
2198
+ pageNo: 1,
2199
+ pageSize: PAGE_SIZE
2200
+ });
2201
+ const allRaw = [...first.models];
2202
+ const totalPages = Math.ceil(first.total / PAGE_SIZE);
2203
+ for (let page = 2; page <= totalPages; page++) {
2204
+ const result = await fetchModelList(this.config, "", {
2205
+ pageNo: page,
2206
+ pageSize: PAGE_SIZE
2207
+ });
2208
+ allRaw.push(...result.models);
2209
+ }
2210
+ return allRaw.map(toModelProfile).filter((profile) => profile !== null);
2211
+ }
2212
+ };
2213
+ //#endregion
2214
+ //#region src/advisor/sources/catalog.ts
2215
+ const SKILL_DIR_NAME = "skills/doc-llm-wiki";
2216
+ const MODELS_FILE = "models.jsonl";
2217
+ function getCatalogDir() {
2218
+ return join$1(getConfigDir(), SKILL_DIR_NAME);
2219
+ }
2220
+ function getCatalogPath() {
2221
+ return join$1(getCatalogDir(), MODELS_FILE);
2222
+ }
2223
+ function getMonorepoModelsDir() {
2224
+ return join$1(dirname(fileURLToPath(import.meta.url)), "../../../../../skills/doc-llm-wiki/models");
2225
+ }
2226
+ function fromJsonlRecord(raw) {
2227
+ if (!raw.model || typeof raw.model !== "string") return null;
2228
+ return {
2229
+ model: raw.model,
2230
+ name: raw.name ?? raw.model,
2231
+ description: raw.description ?? "",
2232
+ provider: raw.provider ?? "",
2233
+ capabilities: raw.capabilities ?? [],
2234
+ features: raw.features ?? [],
2235
+ contextWindow: raw.contextWindow,
2236
+ maxOutputTokens: raw.maxOutputTokens,
2237
+ docUrl: raw.docUrl,
2238
+ inferenceMetadata: raw.inferenceMetadata,
2239
+ shortDescription: raw.shortDescription,
2240
+ category: raw.category,
2241
+ collectionTag: raw.collectionTag,
2242
+ maxInputTokens: raw.maxInputTokens,
2243
+ prices: raw.prices,
2244
+ qpmInfo: raw.qpmInfo,
2245
+ versionTag: raw.versionTag,
2246
+ openSource: raw.openSource,
2247
+ family: raw.family,
2248
+ familyName: raw.familyName
2249
+ };
2250
+ }
2251
+ function readJsonlModels(filePath) {
2252
+ const lines = readFileSync$1(filePath, "utf-8").split("\n").filter(Boolean);
2253
+ const models = [];
2254
+ for (const line of lines) try {
2255
+ const record = fromJsonlRecord(JSON.parse(line));
2256
+ if (record) models.push(record);
2257
+ } catch {}
2258
+ return models;
2259
+ }
2260
+ function installFromMonorepo() {
2261
+ const src = getMonorepoModelsDir();
2262
+ if (!existsSync$1(join$1(src, MODELS_FILE))) return false;
2263
+ const dest = getCatalogDir();
2264
+ try {
2265
+ mkdirSync$1(dest, { recursive: true });
2266
+ cpSync(src, dest, { recursive: true });
2267
+ return true;
2268
+ } catch {
2269
+ return false;
2270
+ }
2271
+ }
2272
+ var CatalogSource = class {
2273
+ name = "catalog";
2274
+ options;
2275
+ constructor(options) {
2276
+ this.options = options ?? {};
2277
+ }
2278
+ available() {
2279
+ return existsSync$1(getCatalogPath());
2280
+ }
2281
+ async load() {
2282
+ if (!this.available()) {
2283
+ this.options.onPrepareStart?.();
2284
+ if (!installFromMonorepo()) return [];
2285
+ }
2286
+ return readJsonlModels(getCatalogPath());
2287
+ }
2288
+ };
2289
+ //#endregion
2290
+ //#region src/advisor/cache.ts
2291
+ async function getModels(config, options) {
2292
+ const sources = [new CatalogSource({ onPrepareStart: options?.onPrepareStart }), new ApiSource(config)];
2293
+ for (const source of sources) if (source.available()) {
2294
+ const models = await source.load();
2295
+ if (models.length > 0) return models;
2296
+ }
2297
+ const models = await sources[0].load();
2298
+ if (models.length > 0) return models;
2299
+ throw new BailianError("No model data available.", ExitCode.GENERAL);
2300
+ }
2301
+ //#endregion
2302
+ //#region src/advisor/types.ts
2303
+ const Modalities = {
2304
+ Text: "Text",
2305
+ Image: "Image",
2306
+ Video: "Video",
2307
+ Audio: "Audio"
2308
+ };
2309
+ const Complexities = {
2310
+ Single: "single",
2311
+ Pipeline: "pipeline"
2312
+ };
2313
+ const Budgets = {
2314
+ Low: "low",
2315
+ Medium: "medium",
2316
+ High: "high"
2317
+ };
2318
+ const ContextNeeds = {
2319
+ Standard: "standard",
2320
+ Large: "large",
2321
+ ExtraLarge: "extra-large"
2322
+ };
2323
+ const QualityPreferences = {
2324
+ Flagship: "flagship",
2325
+ Balanced: "balanced",
2326
+ CostOptimized: "cost-optimized"
2327
+ };
2328
+ const Capabilities = {
2329
+ TG: "TG",
2330
+ Reasoning: "Reasoning",
2331
+ VU: "VU",
2332
+ IG: "IG",
2333
+ VG: "VG",
2334
+ TTS: "TTS",
2335
+ ASR: "ASR",
2336
+ RealtimeASR: "Realtime-ASR",
2337
+ RealtimeTTS: "Realtime-Text-to-Speech",
2338
+ RealtimeAudioTranslate: "Realtime-Audio-Translate",
2339
+ RealtimeOmni: "Realtime-Omni",
2340
+ MultimodalOmni: "Multimodal-Omni",
2341
+ ME: "ME",
2342
+ TR: "TR",
2343
+ ThreeDGeneration: "3D-generation"
2344
+ };
2345
+ const Features = {
2346
+ FunctionCalling: "function-calling",
2347
+ WebSearch: "web-search",
2348
+ StructuredOutputs: "structured-outputs",
2349
+ PrefixCompletion: "prefix-completion"
2350
+ };
2351
+ const ModelCategories = {
2352
+ Flagship: "Flagship",
2353
+ CostOptimized: "Cost-optimized"
2354
+ };
2355
+ //#endregion
2356
+ //#region src/advisor/constants/prompts.ts
2357
+ const INTENT_MODEL = "qwen-turbo";
2358
+ const RANKING_MODEL = "qwen3.6-flash";
2359
+ const RANKING_MODEL_FAST = "qwen-turbo";
2360
+ const INTENT_SYSTEM_PROMPT = `你是一个意图分析器。根据用户的需求描述,先理解用户场景,再提取结构化信息。
2361
+
2362
+ ## 分析步骤
2363
+ 1. 用一句话总结用户的核心需求(taskSummary),要体现具体场景而非泛泛描述
2364
+ 2. 推断场景特征(scenarioHints),例如:["需要低延迟","面向C端用户","高并发","对话式交互","离线批处理","需要精准度"]
2365
+ 3. 基于场景特征推断 budget 和 qualityPreference
2366
+ - 只在用户明确表达或场景强烈暗示时偏离默认值
2367
+ - 用户明确说"低成本"、"便宜"、"省钱" → budget:"low"
2368
+ - 用户明确说"最好的"、"高精度"、"不计成本" → qualityPreference:"flagship"
2369
+ - 场景本身有强约束时才推断:如"日均百万请求的客服" → budget:"low"(高并发=成本敏感)
2370
+ - 其他情况保持 budget:"medium", qualityPreference:"balanced"
2371
+ 4. 提取模态、能力、特性等结构化字段
2372
+
2373
+ ## 示例
2374
+
2375
+ 用户: "做一个低成本高并发的在线客服"
2376
+ → budget:"low", qualityPreference:"cost-optimized"(用户明确说了低成本)
2377
+
2378
+ 用户: "法律合同审查,要求高精准度"
2379
+ → budget:"medium", qualityPreference:"flagship"(用户明确要求高精准度,但没提预算)
2380
+
2381
+ 用户: "我要做一个能理解图片的客服机器人"
2382
+ → budget:"medium", qualityPreference:"balanced"(用户没提成本和质量要求,不过度推断)
2383
+
2384
+ 用户: "帮我选一个写代码的模型"
2385
+ → budget:"medium", qualityPreference:"balanced"(通用需求,无明确倾向)
2386
+
2387
+ 用户: "预算有限,做个简单的文本摘要功能"
2388
+ → budget:"low", qualityPreference:"cost-optimized"(用户说了预算有限)
2389
+
2390
+ 用户: "企业级知识库问答,准确率是第一优先级"
2391
+ → budget:"high", qualityPreference:"flagship"(企业级+准确率第一=愿投入高成本)
2392
+
2393
+ 用户: "个人学习项目,试试AI生成图片"
2394
+ → budget:"low", qualityPreference:"cost-optimized"(个人学习=成本敏感)
2395
+
2396
+ 用户: "做一个Agent自动根据用户意图生成动画片"
2397
+ → budget:"medium", qualityPreference:"balanced"(复杂pipeline,但没明确成本/质量约束)
2398
+
2399
+ ## 模型偏好识别
2400
+ 分析用户是否提到了特定的模型、模型系列或厂商,据此判断推荐模式:
2401
+ - 用户未提到任何模型/系列/厂商 → mode:"unconstrained",不填 targets
2402
+ - 用户限定了范围(如"deepseek系列哪个好"、"通义千问的模型推荐"、"开源的推理模型") → mode:"scoped",targets:["deepseek"] 或 ["通义千问"]
2403
+ - 用户要对比特定模型(如"wan2.6和wan2.7哪个好"、"qwen-max和deepseek-v3对比"、"qwen-max适合做法律分析吗") → mode:"comparison",targets:["wan2.6","wan2.7"]
2404
+ - 单模型评估也算 comparison,targets 只填一个
2405
+ - 用户以某模型为参照找替代(如"有没有类似qwen-max但更便宜的") → mode:"alternative",targets:["qwen-max"]
2406
+ - 用户明确排除某些模型/系列(如"除了qwen还有什么好的") → excludes:["qwen"],mode 根据其他条件判断
2407
+ - targets 填写用户原文中的模型/系列名称,保持原文写法
2408
+
2409
+ ## 输出字段
2410
+ - taskSummary: 一句话场景理解(必须具体,禁止"用户想用AI做某事"这种废话)
2411
+ - scenarioHints: 推断的场景特征数组
2412
+ - complexity: "single"(单一模型可完成)或 "pipeline"(需要多个模型协同)
2413
+ - segments: 仅 pipeline 时填写,每步包含 step/inputModality/outputModality/requiredCapabilities。
2414
+ - step 必须是一句话描述该步骤在用户任务中解决的具体问题,例如"解析天气预报数据,生成适合视频制作的场景描述文本",禁止用编号或泛化的模态标签
2415
+ - segments 必须形成模态链路:每步的 inputModality 应包含上一步的 outputModality,确保上下游数据可以衔接
2416
+ - inputModality: 用户输入涉及的模态 ["Text","Image","Video","Audio"]
2417
+ - outputModality: 期望输出的模态
2418
+ - requiredCapabilities: 需要的能力。可选代码(必须严格使用,不要自创):
2419
+ TG=文本生成, Reasoning=推理, VU=视觉理解, IG=图像生成, VG=视频生成,
2420
+ TTS=语音合成, ASR=语音识别, Realtime-ASR=实时语音识别,
2421
+ Realtime-Text-to-Speech=实时语音合成, Realtime-Audio-Translate=实时音频翻译,
2422
+ Realtime-Omni=实时全模态, Multimodal-Omni=全模态, ME=多模态嵌入,
2423
+ TR=翻译, 3D-generation=3D生成
2424
+ - requiredFeatures: 需要的特性 (function-calling, web-search, structured-outputs, prefix-completion)
2425
+ - budget: "low"/"medium"/"high"(基于场景推断,不要默认 medium)
2426
+ - contextNeed: "standard"/"large"/"extra-large"
2427
+ - qualityPreference: "flagship"/"balanced"/"cost-optimized"(基于场景推断,不要默认 balanced)
2428
+ - modelPreference: { mode, targets?, excludes? }(见上方"模型偏好识别")
2429
+
2430
+ 只输出 JSON,不要有其他文字。`;
2431
+ const SINGLE_SYSTEM_PROMPT = `你是阿里云百炼平台的模型推荐顾问。从以下候选模型中选出最佳推荐。
2432
+
2433
+ ## 背景
2434
+ 系统已根据用户意图预筛选了候选模型,你只需从中精选并排序。
2435
+ 意图分析中包含 budget 和 qualityPreference 字段,这代表了用户的实际需求层次。
2436
+
2437
+ ## 推荐策略
2438
+
2439
+ 推荐 3 个不同档次的模型,但排序必须反映用户的真实需求:
2440
+
2441
+ - 推荐 #1(最佳推荐):根据 budget 和 qualityPreference 判断哪个档次最适合用户,把那个档次的最佳模型放在第一位
2442
+ - 推荐 #2(次优选择):另一个档次中值得考虑的模型,说明与 #1 相比的 tradeoff
2443
+ - 推荐 #3(备选参考):第三个视角的选择,说明适用场景差异
2444
+
2445
+ 关键原则:
2446
+ - budget:"low" / qualityPreference:"cost-optimized" → 推荐 #1 应该是性价比最高的模型,而非旗舰模型
2447
+ - budget:"high" / qualityPreference:"flagship" → 推荐 #1 应该是能力最强的旗舰模型
2448
+ - budget:"medium" / qualityPreference:"balanced" → 推荐 #1 应该是综合匹配度最高的模型,不预设档次偏好
2449
+
2450
+ 每个推荐都必须说明该模型为什么适合(或作为备选为什么值得考虑),理由必须关联用户的具体需求。
2451
+
2452
+ ## 规则
2453
+ - 只能推荐候选列表中的模型,严禁推荐列表外的模型
2454
+ - 严禁使用泛泛的推荐理由(如"性能强大"、"综合能力好"、"效果不错"),每条 reason 必须说明该模型解决用户任务中的什么具体问题
2455
+ - 三个推荐的理由不允许雷同,每个必须从不同维度论证
2456
+ - 有定价信息时:结合 budget 字段权衡,把最符合用户预算的放在最前面
2457
+ - 有家族信息时:避免推荐同一家族的多个模型,优先推荐稳定版本
2458
+ - 有版本标签时:优先推荐 stable/latest 版本,除非用户明确需要特定版本
2459
+ - 没有增强字段的模型:按能力和描述排序即可,不因缺少信息而降权
2460
+ - 如果没有合适的模型,返回空数组
2461
+ - 如果你认为该需求实际需要多模型协同完成(pipeline),可以输出 type:"pipeline" 格式
2462
+ - 输出严格 JSON,不要输出其他内容
2463
+
2464
+ ## 输出格式
2465
+
2466
+ 单一任务:
2467
+ {"type":"single","recommendations":[{"model":"模型ID","reason":"推荐理由","highlights":["亮点"]}]}
2468
+
2469
+ 复合任务(仅当你确信需要多模型协同时):
2470
+ {"type":"pipeline","summary":"一句话方案描述","steps":[{"step":"步骤描述","recommendations":[{"model":"模型ID","reason":"选择理由","highlights":["亮点"]}]}]}`;
2471
+ const PIPELINE_SYSTEM_PROMPT = `你是阿里云百炼平台的模型推荐顾问。用户需求已被拆解为多步骤流水线,请为每步选出最佳模型。
2472
+
2473
+ ## 背景
2474
+ 系统已根据各步骤需求预筛选了候选模型。
2475
+ 意图分析中包含 budget 和 qualityPreference 字段,这代表了用户的实际需求层次。
2476
+
2477
+ ## 推荐策略
2478
+
2479
+ 每步推荐 3 个不同档次的模型,但排序必须反映用户的真实需求:
2480
+
2481
+ - 推荐 #1(最佳推荐):根据 budget 和 qualityPreference 判断哪个档次最适合用户,把那个档次的最佳模型放在第一位
2482
+ - 推荐 #2(次优选择):另一个档次中值得考虑的模型,说明 tradeoff
2483
+ - 推荐 #3(备选参考):第三个视角的选择,说明适用场景差异
2484
+
2485
+ 关键原则:
2486
+ - budget:"low" / qualityPreference:"cost-optimized" → 推荐 #1 应该是性价比最高的模型
2487
+ - budget:"high" / qualityPreference:"flagship" → 推荐 #1 应该是能力最强的旗舰模型
2488
+ - budget:"medium" / qualityPreference:"balanced" → 推荐 #1 应该是综合匹配度最高的模型
2489
+
2490
+ ## 规则
2491
+ - 只能推荐候选列表中的模型
2492
+ - 每步推荐多个模型,按优先级排序,每个推荐给出简短理由和关键亮点
2493
+ - step 字段必须用一句话描述该步骤在用户任务中解决的具体问题,禁止用编号或泛化的模态标签(如"输出: Text")
2494
+ - 严禁使用泛泛的推荐理由,每条 reason 必须说明该模型在这一步解决用户任务中的什么具体问题
2495
+ - 有定价信息时:结合 budget 字段权衡,把最符合用户预算的放在最前面
2496
+ - 有家族信息时:避免在相邻步骤使用同一家族的不同规格模型,除非确实需要
2497
+ - 没有增强字段的模型:按能力和描述排序即可,不因缺少信息而降权
2498
+ - 相邻步骤的模型必须模态兼容:上一步模型的输出模态必须被下一步模型的输入模态支持
2499
+ - 如果你认为该需求其实单模型可以完成,可以输出 type:"single" 格式
2500
+ - 输出严格 JSON
2501
+
2502
+ ## 输出格式
2503
+
2504
+ {"type":"pipeline","summary":"一句话方案描述","steps":[{"step":"该步骤在用户任务中解决的具体问题","recommendations":[{"model":"模型ID","reason":"该模型如何解决这一步的具体问题","highlights":["亮点"]}]}]}
2505
+
2506
+ 或者(如果你认为单模型即可):
2507
+ {"type":"single","recommendations":[{"model":"模型ID","reason":"推荐理由","highlights":["亮点"]}]}`;
2508
+ const COMPARISON_SYSTEM_PROMPT = `你是阿里云百炼平台的模型对比顾问。用户想对比特定模型,请根据使用场景进行对比分析。
2509
+
2510
+ ## 背景
2511
+ 用户指定了要对比的模型,系统已将这些模型和相关候选预筛选到列表中。
2512
+ 意图分析中的 modelPreference.targets 是用户要对比的模型。
2513
+
2514
+ ## 对比策略
2515
+ - 用户指定的模型必须全部出现在推荐结果中,按适合程度排序
2516
+ - 每个模型的 reason 必须是对比性的,说明该模型相对于其他对比模型的优势和劣势
2517
+ - 如果候选中有比用户指定的更合适的模型,可以额外推荐,但用户指定的必须优先包含
2518
+ - 单模型评估场景(targets 只有一个):评估该模型是否适合用户需求,同时推荐更优的替代
2519
+
2520
+ ## 规则
2521
+ - 只能推荐候选列表中的模型
2522
+ - reason 必须包含对比视角:该模型相比其他模型在哪些方面更好/更差
2523
+ - highlights 突出各模型的差异化特点
2524
+ - 输出严格 JSON,不要输出其他内容
2525
+
2526
+ ## 输出格式
2527
+ {"type":"single","recommendations":[{"model":"模型ID","reason":"对比分析理由","highlights":["差异化亮点"]}]}`;
2528
+ const ALTERNATIVE_SYSTEM_PROMPT = `你是阿里云百炼平台的模型替代顾问。用户以某个模型为参照,寻找替代方案。
2529
+
2530
+ ## 背景
2531
+ 用户以某个模型为参照点,想找到在特定维度上更优的替代方案(如更便宜、更快、更强)。
2532
+ 意图分析中的 modelPreference.targets 是参照模型。
2533
+
2534
+ ## 替代策略
2535
+ - 推荐 #1:如果参照模型在候选中,先评估它是否满足用户需求,给出其基本定位
2536
+ - 推荐 #2~#3:推荐替代方案,reason 必须说明相比参照模型在用户关注维度上的 tradeoff
2537
+ - 关注用户提到的替代维度(如"更便宜"→重点对比定价,"更强"→重点对比能力)
2538
+
2539
+ ## 规则
2540
+ - 只能推荐候选列表中的模型
2541
+ - 参照模型必须包含在结果中(如果在候选列表中)
2542
+ - 替代推荐的 reason 必须说明与参照模型的具体差异
2543
+ - 避免推荐和参照模型同系列的其他版本(除非确实有显著差异)
2544
+ - 输出严格 JSON,不要输出其他内容
2545
+
2546
+ ## 输出格式
2547
+ {"type":"single","recommendations":[{"model":"模型ID","reason":"替代分析理由","highlights":["差异化亮点"]}]}`;
2548
+ //#endregion
2549
+ //#region src/advisor/constants/defaults.ts
2550
+ const DEFAULT_INTENT = {
2551
+ complexity: Complexities.Single,
2552
+ taskSummary: "",
2553
+ scenarioHints: [],
2554
+ inputModality: [],
2555
+ outputModality: [],
2556
+ requiredCapabilities: [Capabilities.TG],
2557
+ requiredFeatures: [],
2558
+ budget: Budgets.Medium,
2559
+ contextNeed: ContextNeeds.Standard,
2560
+ qualityPreference: QualityPreferences.Balanced,
2561
+ confidence: 0
2562
+ };
2563
+ //#endregion
2564
+ //#region src/advisor/intent.ts
2565
+ async function analyzeIntent(config, input) {
2566
+ const url = chatEndpoint(config.baseUrl);
2567
+ const body = {
2568
+ model: INTENT_MODEL,
2569
+ messages: [{
2570
+ role: "system",
2571
+ content: INTENT_SYSTEM_PROMPT
2572
+ }, {
2573
+ role: "user",
2574
+ content: input
2575
+ }],
2576
+ max_tokens: 1024,
2577
+ temperature: 0
2578
+ };
2579
+ try {
2580
+ const jsonMatch = ((await requestJson(config, {
2581
+ url,
2582
+ method: "POST",
2583
+ body,
2584
+ timeout: 5e3
2585
+ })).choices?.[0]?.message?.content ?? "").match(/\{[\s\S]*\}/);
2586
+ if (!jsonMatch) return DEFAULT_INTENT;
2587
+ const parsed = JSON.parse(jsonMatch[0]);
2588
+ const VALID_MODES = [
2589
+ "scoped",
2590
+ "comparison",
2591
+ "alternative"
2592
+ ];
2593
+ const rawPref = parsed.modelPreference;
2594
+ const modelPreference = rawPref && typeof rawPref === "object" ? {
2595
+ mode: VALID_MODES.includes(rawPref.mode) ? rawPref.mode : "unconstrained",
2596
+ targets: Array.isArray(rawPref.targets) ? rawPref.targets : void 0,
2597
+ excludes: Array.isArray(rawPref.excludes) ? rawPref.excludes : void 0
2598
+ } : void 0;
2599
+ return {
2600
+ complexity: parsed.complexity === Complexities.Pipeline ? Complexities.Pipeline : Complexities.Single,
2601
+ taskSummary: typeof parsed.taskSummary === "string" ? parsed.taskSummary : "",
2602
+ scenarioHints: Array.isArray(parsed.scenarioHints) ? parsed.scenarioHints : [],
2603
+ segments: Array.isArray(parsed.segments) ? parsed.segments.map((seg) => ({
2604
+ step: seg.step ?? "",
2605
+ inputModality: Array.isArray(seg.inputModality) ? seg.inputModality : [],
2606
+ outputModality: Array.isArray(seg.outputModality) ? seg.outputModality : [],
2607
+ requiredCapabilities: Array.isArray(seg.requiredCapabilities) ? seg.requiredCapabilities : []
2608
+ })) : void 0,
2609
+ inputModality: Array.isArray(parsed.inputModality) ? parsed.inputModality : [],
2610
+ outputModality: Array.isArray(parsed.outputModality) ? parsed.outputModality : [],
2611
+ requiredCapabilities: Array.isArray(parsed.requiredCapabilities) ? parsed.requiredCapabilities : [],
2612
+ requiredFeatures: Array.isArray(parsed.requiredFeatures) ? parsed.requiredFeatures : [],
2613
+ budget: parsed.budget ?? DEFAULT_INTENT.budget,
2614
+ contextNeed: parsed.contextNeed ?? DEFAULT_INTENT.contextNeed,
2615
+ qualityPreference: parsed.qualityPreference ?? DEFAULT_INTENT.qualityPreference,
2616
+ confidence: 1,
2617
+ modelPreference
2618
+ };
2619
+ } catch {
2620
+ return DEFAULT_INTENT;
2621
+ }
2622
+ }
2623
+ //#endregion
2624
+ //#region src/advisor/constants/scoring.ts
2625
+ const SNAPSHOT_DATE_RE = /-\d{4}-\d{2}-\d{2}$/;
2626
+ const GENERATION_CAPS = new Set([
2627
+ Capabilities.IG,
2628
+ Capabilities.VG,
2629
+ Capabilities.TTS,
2630
+ Capabilities.RealtimeTTS,
2631
+ Capabilities.ThreeDGeneration
2632
+ ]);
2633
+ const TEXT_CAPS = new Set([
2634
+ Capabilities.TG,
2635
+ Capabilities.Reasoning,
2636
+ Capabilities.ASR,
2637
+ Capabilities.RealtimeASR,
2638
+ Capabilities.RealtimeAudioTranslate,
2639
+ Capabilities.TR,
2640
+ Capabilities.ME
2641
+ ]);
2642
+ const CONTEXT_THRESHOLDS = {
2643
+ standard: 0,
2644
+ large: 32e3,
2645
+ "extra-large": 128e3
2646
+ };
2647
+ //#endregion
2648
+ //#region src/advisor/recall.ts
2649
+ function hasMultiDomainCapabilities(caps) {
2650
+ let hasGen = false;
2651
+ let hasText = false;
2652
+ for (const cap of caps) {
2653
+ if (GENERATION_CAPS.has(cap)) hasGen = true;
2654
+ if (TEXT_CAPS.has(cap)) hasText = true;
2655
+ }
2656
+ return hasGen && hasText;
2657
+ }
2658
+ function deduplicateSnapshots(models) {
2659
+ const mainModels = new Set(models.map(({ model }) => model));
2660
+ return models.filter(({ model }) => {
2661
+ const base = model.replace(SNAPSHOT_DATE_RE, "");
2662
+ if (base === model) return true;
2663
+ return !mainModels.has(base);
2664
+ });
2665
+ }
2666
+ function matchesModality(model, inputModality, outputModality) {
2667
+ const modelInput = model.inferenceMetadata?.request_modality ?? [];
2668
+ const modelOutput = model.inferenceMetadata?.response_modality ?? [];
2669
+ if (inputModality.length > 0) {
2670
+ if (!inputModality.some((mod) => modelInput.includes(mod))) return false;
2671
+ }
2672
+ if (outputModality.length > 0) {
2673
+ if (!outputModality.some((mod) => modelOutput.includes(mod))) return false;
2674
+ }
2675
+ return true;
2676
+ }
2677
+ function matchesUpstream(model, upstreamOutput) {
2678
+ if (upstreamOutput.length === 0) return true;
2679
+ const accepts = model.inferenceMetadata?.request_modality ?? [];
2680
+ return upstreamOutput.some((mod) => accepts.includes(mod));
2681
+ }
2682
+ function scoreModel(model, intent) {
2683
+ const { requiredCapabilities, requiredFeatures, contextNeed, qualityPreference } = intent;
2684
+ const { capabilities, features, contextWindow, category } = model;
2685
+ let score = 0;
2686
+ for (const cap of requiredCapabilities) if (capabilities.includes(cap)) score += 10;
2687
+ for (const feat of requiredFeatures) if (features.includes(feat)) score += 5;
2688
+ const ctxThreshold = CONTEXT_THRESHOLDS[contextNeed];
2689
+ if (ctxThreshold > 0 && (contextWindow ?? 0) >= ctxThreshold) score += 8;
2690
+ if (qualityPreference === QualityPreferences.Flagship && category === ModelCategories.Flagship) score += 15;
2691
+ else if (qualityPreference === QualityPreferences.CostOptimized && category === ModelCategories.CostOptimized) score += 15;
2692
+ else if (qualityPreference === QualityPreferences.Balanced) {
2693
+ if (category === ModelCategories.Flagship) score += 5;
2694
+ }
2695
+ return score;
2696
+ }
2697
+ function scoreAndRank(models, intent, limit) {
2698
+ return models.map((model) => ({
2699
+ model,
2700
+ score: scoreModel(model, intent)
2701
+ })).sort((left, right) => right.score - left.score).slice(0, limit);
2702
+ }
2703
+ function candidateIds(candidates) {
2704
+ return new Set(candidates.map(({ model }) => model.model));
2705
+ }
2706
+ function capByFamily(candidates, cap) {
2707
+ const counts = /* @__PURE__ */ new Map();
2708
+ const kept = [];
2709
+ const overflow = [];
2710
+ for (const candidate of candidates) {
2711
+ const family = candidate.model.family;
2712
+ if (!family) {
2713
+ kept.push(candidate);
2714
+ continue;
2715
+ }
2716
+ const cur = counts.get(family) ?? 0;
2717
+ if (cur < cap) {
2718
+ kept.push(candidate);
2719
+ counts.set(family, cur + 1);
2720
+ } else overflow.push(candidate);
2721
+ }
2722
+ if (kept.length >= 10) return kept;
2723
+ return [...kept, ...overflow.slice(0, 10 - kept.length)];
2724
+ }
2725
+ function deduplicateCandidates(candidates, excludeIds) {
2726
+ const seen = new Set(excludeIds);
2727
+ return candidates.filter((candidate) => {
2728
+ if (seen.has(candidate.model.model)) return false;
2729
+ seen.add(candidate.model.model);
2730
+ return true;
2731
+ });
2732
+ }
2733
+ function computeRemaining(models, intent, excludeIds) {
2734
+ if (excludeIds.size >= 10) return [];
2735
+ return scoreAndRank(models.filter(({ model }) => !excludeIds.has(model)), intent, 10 - excludeIds.size);
2736
+ }
2737
+ function recallForSegment(models, segment, upstreamOutput, budget, qualityPreference) {
2738
+ const { inputModality, outputModality, requiredCapabilities } = segment;
2739
+ const segmentIntent = {
2740
+ complexity: Complexities.Single,
2741
+ taskSummary: "",
2742
+ scenarioHints: [],
2743
+ inputModality,
2744
+ outputModality,
2745
+ requiredCapabilities,
2746
+ requiredFeatures: [],
2747
+ budget,
2748
+ contextNeed: ContextNeeds.Standard,
2749
+ qualityPreference,
2750
+ confidence: 1
2751
+ };
2752
+ let candidates = models.filter((profile) => matchesModality(profile, inputModality, outputModality) && matchesUpstream(profile, upstreamOutput));
2753
+ if (candidates.length < 5) candidates = models.filter((profile) => matchesModality(profile, inputModality, outputModality));
2754
+ if (candidates.length < 5) candidates = models;
2755
+ return scoreAndRank(candidates, segmentIntent, 5);
2756
+ }
2757
+ function recallCandidates(models, intent) {
2758
+ models = deduplicateSnapshots(models);
2759
+ let result;
2760
+ if (intent.complexity === Complexities.Pipeline && intent.segments?.length) {
2761
+ let results = [];
2762
+ for (const [segIdx, segment] of intent.segments.entries()) {
2763
+ const upstreamOutput = segIdx === 0 ? [] : intent.segments[segIdx - 1].outputModality;
2764
+ const unique = deduplicateCandidates(recallForSegment(models, segment, upstreamOutput, intent.budget, intent.qualityPreference), candidateIds(results));
2765
+ results = [...results, ...unique];
2766
+ }
2767
+ const remaining = computeRemaining(models, intent, candidateIds(results));
2768
+ result = [...results, ...remaining];
2769
+ } else if (hasMultiDomainCapabilities(intent.requiredCapabilities)) result = recallCrossDomain(models, intent);
2770
+ else {
2771
+ let hardFiltered = models.filter((profile) => matchesModality(profile, intent.inputModality, intent.outputModality));
2772
+ if (hardFiltered.length < 5) hardFiltered = models;
2773
+ result = scoreAndRank(hardFiltered, intent, 50);
2774
+ }
2775
+ return capByFamily(result, 3);
2776
+ }
2777
+ function recallCrossDomain(models, intent) {
2778
+ const perDomain = Math.ceil(50 / 2);
2779
+ const genCaps = intent.requiredCapabilities.filter((cap) => GENERATION_CAPS.has(cap));
2780
+ const textCaps = intent.requiredCapabilities.filter((cap) => TEXT_CAPS.has(cap));
2781
+ let results = [];
2782
+ if (genCaps.length > 0) results = scoreAndRank(models.filter((profile) => genCaps.some((cap) => profile.capabilities.includes(cap))), intent, perDomain);
2783
+ if (textCaps.length > 0) {
2784
+ const excludeIds = candidateIds(results);
2785
+ const textIntent = {
2786
+ ...intent,
2787
+ requiredCapabilities: textCaps
2788
+ };
2789
+ const textModels = models.filter((profile) => !excludeIds.has(profile.model) && textCaps.some((cap) => profile.capabilities.includes(cap)));
2790
+ results = [...results, ...scoreAndRank(textModels, textIntent, perDomain)];
2791
+ }
2792
+ const remaining = computeRemaining(models, intent, candidateIds(results));
2793
+ return [...results, ...remaining];
2794
+ }
2795
+ //#endregion
2796
+ //#region src/advisor/embedding.ts
2797
+ const EMBEDDING_MODEL = "text-embedding-v4";
2798
+ const EMBEDDINGS_FILE = "models-embeddings.json";
2799
+ const BATCH_SIZE = 10;
2800
+ function skillDataDir() {
2801
+ return join$1(getConfigDir(), "skills/doc-llm-wiki");
2802
+ }
2803
+ function embeddingsPath() {
2804
+ return join$1(skillDataDir(), EMBEDDINGS_FILE);
2805
+ }
2806
+ function loadModelEmbeddings() {
2807
+ const path = embeddingsPath();
2808
+ if (!existsSync$1(path)) return null;
2809
+ try {
2810
+ return JSON.parse(readFileSync$1(path, "utf-8")).items;
2811
+ } catch {
2812
+ return null;
2813
+ }
2814
+ }
2815
+ async function embedQuery(config, text) {
2816
+ return (await requestJson(config, {
2817
+ url: `${config.baseUrl}/compatible-mode/v1/embeddings`,
2818
+ method: "POST",
2819
+ body: {
2820
+ model: EMBEDDING_MODEL,
2821
+ input: [text],
2822
+ dimensions: 512,
2823
+ encoding_format: "float"
2824
+ },
2825
+ timeout: 1e4
2826
+ })).data[0].embedding;
2827
+ }
2828
+ async function embedBatch(config, texts) {
2829
+ return (await requestJson(config, {
2830
+ url: `${config.baseUrl}/compatible-mode/v1/embeddings`,
2831
+ method: "POST",
2832
+ body: {
2833
+ model: EMBEDDING_MODEL,
2834
+ input: texts,
2835
+ dimensions: 512,
2836
+ encoding_format: "float"
2837
+ },
2838
+ timeout: 3e4
2839
+ })).data.sort((left, right) => left.index - right.index).map((item) => item.embedding);
2840
+ }
2841
+ const CAPABILITY_LABELS = {
2842
+ TG: "文本生成",
2843
+ Reasoning: "推理",
2844
+ VU: "视觉理解",
2845
+ IG: "图像生成",
2846
+ VG: "视频生成",
2847
+ TTS: "语音合成",
2848
+ ASR: "语音识别"
2849
+ };
2850
+ const MODALITY_LABELS = {
2851
+ Text: "文本",
2852
+ Image: "图片/图像",
2853
+ Video: "视频",
2854
+ Audio: "音频/语音"
2855
+ };
2856
+ function loadGroupDescriptions() {
2857
+ const groupsDir = join$1(skillDataDir(), "groups");
2858
+ const map = /* @__PURE__ */ new Map();
2859
+ if (!existsSync$1(groupsDir)) return map;
2860
+ for (const file of readdirSync(groupsDir).filter((name) => name.endsWith(".json"))) try {
2861
+ const data = JSON.parse(readFileSync$1(join$1(groupsDir, file), "utf-8"));
2862
+ const groupDesc = data.description ?? "";
2863
+ if (data.items) for (const item of data.items) map.set(item.model, item.description || groupDesc);
2864
+ } catch {}
2865
+ return map;
2866
+ }
2867
+ function buildModelText(model, descriptions) {
2868
+ const caps = (model.capabilities ?? []).map((cap) => CAPABILITY_LABELS[cap] ?? cap).join(", ");
2869
+ const description = descriptions.get(model.model) || model.shortDescription || model.description || "";
2870
+ const inputMods = (model.inferenceMetadata?.request_modality ?? []).map((mod) => MODALITY_LABELS[mod] ?? mod).join(", ");
2871
+ const outputMods = (model.inferenceMetadata?.response_modality ?? []).map((mod) => MODALITY_LABELS[mod] ?? mod).join(", ");
2872
+ return [
2873
+ model.name,
2874
+ model.model,
2875
+ description,
2876
+ caps ? `能力: ${caps}` : "",
2877
+ inputMods ? `输入: ${inputMods}` : "",
2878
+ outputMods ? `输出: ${outputMods}` : "",
2879
+ model.features?.length ? `特性: ${model.features.join(", ")}` : "",
2880
+ model.familyName || "",
2881
+ model.category ? `定位: ${model.category}` : ""
2882
+ ].filter(Boolean).join(" | ");
2883
+ }
2884
+ async function buildAndCacheEmbeddings(config, models) {
2885
+ const descriptions = loadGroupDescriptions();
2886
+ const texts = models.map((profile) => buildModelText(profile, descriptions));
2887
+ const allVectors = [];
2888
+ for (let batchStart = 0; batchStart < texts.length; batchStart += BATCH_SIZE) {
2889
+ const vectors = await embedBatch(config, texts.slice(batchStart, batchStart + BATCH_SIZE));
2890
+ allVectors.push(...vectors);
2891
+ }
2892
+ const items = models.map((profile, idx) => ({
2893
+ id: profile.model,
2894
+ vector: allVectors[idx]
2895
+ }));
2896
+ const output = {
2897
+ model: EMBEDDING_MODEL,
2898
+ dimensions: 512,
2899
+ count: items.length,
2900
+ items
2901
+ };
2902
+ const outPath = embeddingsPath();
2903
+ mkdirSync$1(dirname(outPath), { recursive: true });
2904
+ writeFileSync$1(outPath, JSON.stringify(output));
2905
+ return items;
2906
+ }
2907
+ function cosineSimilarity(vecA, vecB) {
2908
+ let dot = 0;
2909
+ let normA = 0;
2910
+ let normB = 0;
2911
+ for (let idx = 0; idx < vecA.length; idx++) {
2912
+ dot += vecA[idx] * vecB[idx];
2913
+ normA += vecA[idx] * vecA[idx];
2914
+ normB += vecB[idx] * vecB[idx];
2915
+ }
2916
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
2917
+ return denom === 0 ? 0 : dot / denom;
2918
+ }
2919
+ //#endregion
2920
+ //#region src/advisor/recall-semantic.ts
2921
+ let cachedEmbeddings = null;
2922
+ function getEmbeddings() {
2923
+ if (cachedEmbeddings === null) cachedEmbeddings = loadModelEmbeddings();
2924
+ return cachedEmbeddings;
2925
+ }
2926
+ function isSemanticAvailable() {
2927
+ return getEmbeddings() !== null;
2928
+ }
2929
+ function matchesTarget(model, target) {
2930
+ const needle = target.toLowerCase();
2931
+ return [
2932
+ model.model,
2933
+ model.name,
2934
+ model.family,
2935
+ model.familyName,
2936
+ model.provider
2937
+ ].some((field) => field?.toLowerCase().includes(needle));
2938
+ }
2939
+ function matchesAnyTarget(model, targets) {
2940
+ return targets.some((target) => matchesTarget(model, target));
2941
+ }
2942
+ function applyExcludes(candidates, excludes) {
2943
+ if (excludes.length === 0) return candidates;
2944
+ return candidates.filter(({ model }) => !matchesAnyTarget(model, excludes));
2945
+ }
2946
+ function matchesSegment(model, segment) {
2947
+ const modelIn = model.inferenceMetadata?.request_modality ?? [];
2948
+ const modelOut = model.inferenceMetadata?.response_modality ?? [];
2949
+ const inOk = segment.inputModality.length === 0 || segment.inputModality.some((mod) => modelIn.includes(mod));
2950
+ const outOk = segment.outputModality.length === 0 || segment.outputModality.some((mod) => modelOut.includes(mod));
2951
+ if (!inOk || !outOk) return false;
2952
+ if (segment.requiredCapabilities.length === 0) return true;
2953
+ return segment.requiredCapabilities.some((cap) => model.capabilities.includes(cap));
2954
+ }
2955
+ function rankByEmbedding(embeddings, queryVector, allowedIds, topK) {
2956
+ return embeddings.filter((item) => allowedIds.has(item.id)).map((item) => ({
2957
+ id: item.id,
2958
+ similarity: cosineSimilarity(queryVector, item.vector)
2959
+ })).sort((left, right) => right.similarity - left.similarity).slice(0, topK);
2960
+ }
2961
+ function recallScoped(models, embeddings, queryVector, preference, topK) {
2962
+ const targets = preference.targets ?? [];
2963
+ const scopedModels = targets.length > 0 ? models.filter((profile) => matchesAnyTarget(profile, targets)) : models;
2964
+ const MIN_SCOPED = 5;
2965
+ const pool = scopedModels.length >= MIN_SCOPED ? scopedModels : models;
2966
+ const scored = rankByEmbedding(embeddings, queryVector, new Set(pool.map((profile) => profile.model)), topK);
2967
+ const modelMap = new Map(models.map((profile) => [profile.model, profile]));
2968
+ const results = [];
2969
+ if (scopedModels.length < MIN_SCOPED && targets.length > 0) {
2970
+ for (const profile of scopedModels) results.push({
2971
+ model: profile,
2972
+ score: 1
2973
+ });
2974
+ const seen = new Set(results.map(({ model }) => model.model));
2975
+ for (const { id, similarity } of scored) {
2976
+ if (seen.has(id)) continue;
2977
+ const model = modelMap.get(id);
2978
+ if (model) results.push({
2979
+ model,
2980
+ score: similarity
2981
+ });
2982
+ if (results.length >= topK) break;
2983
+ }
2984
+ return results;
2985
+ }
2986
+ for (const { id, similarity } of scored) {
2987
+ const model = modelMap.get(id);
2988
+ if (model) results.push({
2989
+ model,
2990
+ score: similarity
2991
+ });
2992
+ }
2993
+ return results;
2994
+ }
2995
+ function recallComparison(models, embeddings, queryVector, preference, topK) {
2996
+ const targets = preference.targets ?? [];
2997
+ const modelMap = new Map(models.map((profile) => [profile.model, profile]));
2998
+ const forced = [];
2999
+ const forcedIds = /* @__PURE__ */ new Set();
3000
+ for (const profile of models) if (matchesAnyTarget(profile, targets) && !forcedIds.has(profile.model)) {
3001
+ forced.push({
3002
+ model: profile,
3003
+ score: 1
3004
+ });
3005
+ forcedIds.add(profile.model);
3006
+ }
3007
+ const remaining = topK - forced.length;
3008
+ if (remaining > 0) {
3009
+ const extra = rankByEmbedding(embeddings, queryVector, new Set(models.filter((profile) => !forcedIds.has(profile.model)).map((profile) => profile.model)), remaining);
3010
+ for (const { id, similarity } of extra) {
3011
+ const model = modelMap.get(id);
3012
+ if (model) forced.push({
3013
+ model,
3014
+ score: similarity
3015
+ });
3016
+ }
3017
+ }
3018
+ return forced;
3019
+ }
3020
+ function recallAlternative(models, embeddings, queryVector, preference, topK) {
3021
+ const targets = preference.targets ?? [];
3022
+ const modelMap = new Map(models.map((profile) => [profile.model, profile]));
3023
+ const refModels = models.filter((profile) => matchesAnyTarget(profile, targets));
3024
+ const refFamilies = new Set(refModels.map((profile) => profile.family).filter(Boolean));
3025
+ const results = [];
3026
+ const seen = /* @__PURE__ */ new Set();
3027
+ for (const profile of refModels) {
3028
+ results.push({
3029
+ model: profile,
3030
+ score: 1
3031
+ });
3032
+ seen.add(profile.model);
3033
+ }
3034
+ const altPool = models.filter((profile) => !seen.has(profile.model) && (!profile.family || !refFamilies.has(profile.family)));
3035
+ const scored = rankByEmbedding(embeddings, queryVector, new Set(altPool.map((profile) => profile.model)), topK - results.length);
3036
+ for (const { id, similarity } of scored) {
3037
+ const model = modelMap.get(id);
3038
+ if (model) results.push({
3039
+ model,
3040
+ score: similarity
3041
+ });
3042
+ }
3043
+ return results;
3044
+ }
3045
+ async function recallSemantic(config, models, query, topK, intent) {
3046
+ let embeddings = getEmbeddings();
3047
+ if (!embeddings) {
3048
+ embeddings = await buildAndCacheEmbeddings(config, models);
3049
+ cachedEmbeddings = embeddings;
3050
+ }
3051
+ const queryVector = await embedQuery(config, query);
3052
+ const modelMap = new Map(models.map((profile) => [profile.model, profile]));
3053
+ const preference = intent?.modelPreference;
3054
+ const excludes = preference?.excludes ?? [];
3055
+ if (preference && preference.mode !== "unconstrained") {
3056
+ let results;
3057
+ switch (preference.mode) {
3058
+ case "scoped":
3059
+ results = recallScoped(models, embeddings, queryVector, preference, topK);
3060
+ break;
3061
+ case "comparison":
3062
+ results = recallComparison(models, embeddings, queryVector, preference, topK);
3063
+ break;
3064
+ case "alternative":
3065
+ results = recallAlternative(models, embeddings, queryVector, preference, topK);
3066
+ break;
3067
+ default: results = [];
3068
+ }
3069
+ return applyExcludes(results, excludes);
3070
+ }
3071
+ if (intent?.complexity === Complexities.Pipeline && intent.segments?.length) {
3072
+ const seen = /* @__PURE__ */ new Set();
3073
+ const results = [];
3074
+ const perSegment = Math.max(5, Math.ceil(topK / intent.segments.length));
3075
+ for (const segment of intent.segments) {
3076
+ const matched = models.filter((profile) => matchesSegment(profile, segment));
3077
+ const allowedIds = new Set(matched.filter((profile) => !seen.has(profile.model)).map((profile) => profile.model));
3078
+ if (allowedIds.size === 0) continue;
3079
+ const scored = rankByEmbedding(embeddings, queryVector, allowedIds, perSegment);
3080
+ for (const { id, similarity } of scored) {
3081
+ const model = modelMap.get(id);
3082
+ if (model && !seen.has(id)) {
3083
+ results.push({
3084
+ model,
3085
+ score: similarity
3086
+ });
3087
+ seen.add(id);
3088
+ }
3089
+ }
3090
+ }
3091
+ return applyExcludes(results, excludes);
3092
+ }
3093
+ const allIds = new Set(models.map((profile) => profile.model));
3094
+ const scored = rankByEmbedding(embeddings, queryVector, allIds, topK);
3095
+ const results = [];
3096
+ for (const { id, similarity } of scored) {
3097
+ const model = modelMap.get(id);
3098
+ if (model) results.push({
3099
+ model,
3100
+ score: similarity
3101
+ });
3102
+ }
3103
+ return applyExcludes(results, excludes);
3104
+ }
3105
+ //#endregion
3106
+ //#region src/advisor/recommend.ts
3107
+ function formatPrices(profile) {
3108
+ if (!profile.prices?.length) return void 0;
3109
+ return profile.prices.map((price) => `${price.type}:${price.price}/${price.unit}`).join(", ");
3110
+ }
3111
+ function formatQpm(profile) {
3112
+ if (!profile.qpmInfo) return void 0;
3113
+ const entries = Object.entries(profile.qpmInfo);
3114
+ if (entries.length === 0) return void 0;
3115
+ return entries.map(([key, limit]) => `${key}:${limit.count_limit}/${limit.count_limit_period}s`).join(", ");
3116
+ }
3117
+ function buildCandidatesContext(candidates) {
3118
+ return candidates.map(({ model: profile }) => {
3119
+ const parts = [
3120
+ `ID: ${profile.model}`,
3121
+ `名称: ${profile.name}`,
3122
+ `描述: ${profile.shortDescription || profile.description}`,
3123
+ `能力: ${profile.capabilities.join(", ")}`,
3124
+ `特性: ${profile.features.join(", ")}`
3125
+ ];
3126
+ if (profile.contextWindow) parts.push(`上下文窗口: ${profile.contextWindow}`);
3127
+ if (profile.maxOutputTokens) parts.push(`最大输出: ${profile.maxOutputTokens}`);
3128
+ if (profile.category) parts.push(`类别: ${profile.category}`);
3129
+ const modality = profile.inferenceMetadata;
3130
+ if (modality?.request_modality?.length) parts.push(`输入模态: ${modality.request_modality.join(", ")}`);
3131
+ if (modality?.response_modality?.length) parts.push(`输出模态: ${modality.response_modality.join(", ")}`);
3132
+ const prices = formatPrices(profile);
3133
+ if (prices) parts.push(`定价: ${prices}`);
3134
+ const qpm = formatQpm(profile);
3135
+ if (qpm) parts.push(`QPM: ${qpm}`);
3136
+ if (profile.versionTag) parts.push(`版本: ${profile.versionTag}`);
3137
+ if (profile.openSource !== void 0) parts.push(`开源: ${profile.openSource ? "是" : "否"}`);
3138
+ if (profile.family) parts.push(`家族: ${profile.family}`);
3139
+ return parts.join(" | ");
3140
+ }).join("\n");
3141
+ }
3142
+ function buildIntentContext(intent) {
3143
+ const { taskSummary, scenarioHints, inputModality, outputModality, requiredCapabilities, requiredFeatures, budget, qualityPreference, contextNeed, segments, modelPreference } = intent;
3144
+ const parts = [];
3145
+ if (taskSummary) parts.push(`场景理解: ${taskSummary}`);
3146
+ if (scenarioHints.length) parts.push(`场景特征: ${scenarioHints.join(", ")}`);
3147
+ if (inputModality.length) parts.push(`输入模态: ${inputModality.join(", ")}`);
3148
+ if (outputModality.length) parts.push(`输出模态: ${outputModality.join(", ")}`);
3149
+ if (requiredCapabilities.length) parts.push(`所需能力: ${requiredCapabilities.join(", ")}`);
3150
+ if (requiredFeatures.length) parts.push(`所需特性: ${requiredFeatures.join(", ")}`);
3151
+ parts.push(`预算倾向: ${budget}`);
3152
+ parts.push(`质量偏好: ${qualityPreference}`);
3153
+ if (contextNeed !== ContextNeeds.Standard) parts.push(`上下文需求: ${contextNeed}`);
3154
+ if (modelPreference && modelPreference.mode !== "unconstrained") {
3155
+ parts.push(`模型偏好: ${modelPreference.mode}`);
3156
+ if (modelPreference.targets?.length) parts.push(`目标模型: ${modelPreference.targets.join(", ")}`);
3157
+ if (modelPreference.excludes?.length) parts.push(`排除模型: ${modelPreference.excludes.join(", ")}`);
3158
+ }
3159
+ if (segments?.length) {
3160
+ parts.push(`拆解步骤:`);
3161
+ for (const seg of segments) {
3162
+ const inMod = seg.inputModality.join(",") || "无";
3163
+ const outMod = seg.outputModality.join(",") || "无";
3164
+ const caps = seg.requiredCapabilities.join(",") || "无";
3165
+ parts.push(` - ${seg.step} (输入: ${inMod} → 输出: ${outMod}, 能力: ${caps})`);
3166
+ }
3167
+ }
3168
+ return parts.join("\n");
3169
+ }
3170
+ function buildDocLink(docUrl) {
3171
+ if (!docUrl) return void 0;
3172
+ const match = docUrl.match(/\/(\d+)\.html/);
3173
+ if (!match) return void 0;
3174
+ return `https://bailian.console.aliyun.com/cn-beijing?tab=doc#/doc/?type=model&url=${match[1]}`;
3175
+ }
3176
+ function buildRecommendations(items, modelMap, limit) {
3177
+ const list = Array.isArray(items) ? items : [];
3178
+ const recommendations = [];
3179
+ const seenFamilies = /* @__PURE__ */ new Set();
3180
+ for (const item of list) {
3181
+ const profile = modelMap.get(item.model);
3182
+ if (!profile) continue;
3183
+ if (profile.family && seenFamilies.has(profile.family)) continue;
3184
+ if (profile.family) seenFamilies.add(profile.family);
3185
+ const { model, name, category, contextWindow, maxOutputTokens, docUrl } = profile;
3186
+ recommendations.push({
3187
+ model,
3188
+ name,
3189
+ reason: item.reason ?? "",
3190
+ highlights: item.highlights ?? [],
3191
+ category,
3192
+ contextWindow,
3193
+ maxOutputTokens,
3194
+ docUrl
3195
+ });
3196
+ if (recommendations.length >= limit) break;
3197
+ }
3198
+ return recommendations;
3199
+ }
3200
+ function validatePipelineCompatibility(steps, modelMap) {
3201
+ for (let stepIdx = 1; stepIdx < steps.length; stepIdx++) {
3202
+ const prevStep = steps[stepIdx - 1];
3203
+ const currStep = steps[stepIdx];
3204
+ const prevOutputs = new Set(prevStep.recommendations.flatMap((rec) => {
3205
+ return modelMap.get(rec.model)?.inferenceMetadata?.response_modality ?? [];
3206
+ }));
3207
+ if (prevOutputs.size === 0) continue;
3208
+ const warnings = [];
3209
+ for (const rec of currStep.recommendations) {
3210
+ const accepts = modelMap.get(rec.model)?.inferenceMetadata?.request_modality ?? [];
3211
+ if (!accepts.some((mod) => prevOutputs.has(mod)) && accepts.length > 0) warnings.push(`${rec.name} 的输入模态 [${accepts.join(", ")}] 可能不兼容上一步的输出模态 [${[...prevOutputs].join(", ")}]`);
3212
+ }
3213
+ if (warnings.length > 0) currStep.warnings = warnings;
3214
+ }
3215
+ }
3216
+ async function rankModels(config, candidates, intent, userInput, top, options) {
3217
+ const candidatesContext = buildCandidatesContext(candidates);
3218
+ const intentContext = buildIntentContext(intent);
3219
+ const preferenceMode = intent.modelPreference?.mode;
3220
+ let systemPrompt;
3221
+ if (preferenceMode === "comparison") systemPrompt = COMPARISON_SYSTEM_PROMPT;
3222
+ else if (preferenceMode === "alternative") systemPrompt = ALTERNATIVE_SYSTEM_PROMPT;
3223
+ else if (preferenceMode === "scoped") {
3224
+ const scopeNote = intent.modelPreference?.targets?.length ? `\n\n## 范围限定\n用户明确要求在以下范围内推荐:${intent.modelPreference.targets.join("、")}。请优先从匹配该范围的模型中选择。` : "";
3225
+ systemPrompt = (intent.complexity === Complexities.Pipeline ? PIPELINE_SYSTEM_PROMPT : SINGLE_SYSTEM_PROMPT) + scopeNote;
3226
+ } else systemPrompt = intent.complexity === Complexities.Pipeline ? PIPELINE_SYSTEM_PROMPT : SINGLE_SYSTEM_PROMPT;
3227
+ const useThinkingModel = options?.enableThinking ?? false;
3228
+ const userMessage = intent.complexity === Complexities.Pipeline ? `意图分析结果:\n${intentContext}\n\n候选模型列表:\n${candidatesContext}\n\n用户原始需求:${userInput}\n\n请为流水线各步骤各推荐最多 ${top} 个模型。` : `意图分析结果:\n${intentContext}\n\n候选模型列表:\n${candidatesContext}\n\n用户原始需求:${userInput}\n\n请推荐最多 ${top} 个模型。`;
3229
+ const body = {
3230
+ model: useThinkingModel ? RANKING_MODEL : RANKING_MODEL_FAST,
3231
+ messages: [{
3232
+ role: "system",
3233
+ content: systemPrompt
3234
+ }, {
3235
+ role: "user",
3236
+ content: userMessage
3237
+ }],
3238
+ max_tokens: 4096,
3239
+ temperature: 0
3240
+ };
3241
+ if (useThinkingModel) {
3242
+ body.stream = true;
3243
+ body.enable_thinking = true;
3244
+ }
3245
+ const url = chatEndpoint(config.baseUrl);
3246
+ let content;
3247
+ if (useThinkingModel) {
3248
+ const res = await request(config, {
3249
+ url,
3250
+ method: "POST",
3251
+ body,
3252
+ stream: true
3253
+ });
3254
+ let accumulated = "";
3255
+ let contentStarted = false;
3256
+ for await (const event of parseSSE(res)) {
3257
+ if (event.data === "[DONE]") break;
3258
+ try {
3259
+ const parsed = JSON.parse(event.data);
3260
+ for (const choice of parsed.choices) {
3261
+ const delta = choice.delta;
3262
+ if (delta.reasoning_content && options?.onThinking) options.onThinking(delta.reasoning_content);
3263
+ if (delta.content) {
3264
+ if (!contentStarted) {
3265
+ contentStarted = true;
3266
+ options?.onContentStart?.();
3267
+ }
3268
+ accumulated += delta.content;
3269
+ }
3270
+ }
3271
+ } catch {}
3272
+ }
3273
+ content = accumulated || "{}";
3274
+ } else content = (await requestJson(config, {
3275
+ url,
3276
+ method: "POST",
3277
+ body
3278
+ })).choices?.[0]?.message?.content ?? "{}";
3279
+ let parsed;
3280
+ try {
3281
+ const jsonMatch = content.match(/\{[\s\S]*\}/);
3282
+ parsed = JSON.parse(jsonMatch?.[0] ?? "{}");
3283
+ } catch {
3284
+ return {
3285
+ type: Complexities.Single,
3286
+ recommendations: []
3287
+ };
3288
+ }
3289
+ const modelMap = new Map(candidates.map(({ model: profile }) => [profile.model, profile]));
3290
+ if (parsed.type === Complexities.Pipeline && Array.isArray(parsed.steps)) {
3291
+ const steps = [];
3292
+ for (const rawStep of parsed.steps) {
3293
+ const recs = buildRecommendations(rawStep.recommendations ?? (rawStep.model ? [rawStep] : []), modelMap, top);
3294
+ if (recs.length > 0) steps.push({
3295
+ step: rawStep.step ?? "",
3296
+ recommendations: recs
3297
+ });
3298
+ }
3299
+ validatePipelineCompatibility(steps, modelMap);
3300
+ return {
3301
+ type: Complexities.Pipeline,
3302
+ summary: parsed.summary ?? "",
3303
+ steps
3304
+ };
3305
+ }
3306
+ const recommendations = buildRecommendations(parsed.recommendations ?? parsed ?? [], modelMap, top);
3307
+ return {
3308
+ type: Complexities.Single,
3309
+ recommendations
3310
+ };
3311
+ }
3312
+ //#endregion
3313
+ export { BAILIAN_HOST, BailianError, Budgets, CHANNEL, CONSOLE_GATEWAY_NO_TOKEN_MESSAGE, Capabilities, Complexities, ContextNeeds, DOCS_HOSTS, ExitCode, Features, GLOBAL_OPTIONS, McpClient, Modalities, ModelCategories, QualityPreferences, REGIONS, SOURCE_CONFIG, TAGS, analyzeIntent, appCompletionEndpoint, bailianMcpUrl, buildDocLink, callConsoleGateway, chatEndpoint, clearApiKey, createTrackingEvent, defineCommand, detectOutputFormat, ensureConfigDir, fetchModelList, flushTelemetry, formatErrorJson, formatJson, formatOutput, formatText, generateFilename, generateToolSchema, getConfigDir, getConfigPath, getCredentialsPath, getModels, imageEndpoint, imageSyncEndpoint, isCI, isInteractive, isLocalFile, isSemanticAvailable, loadApiKeyFromConfig, loadConfig, localSink, mapApiError, maskToken, mcpWebSearchEndpoint, memoryAddEndpoint, memoryListEndpoint, memoryNodeEndpoint, memorySearchEndpoint, parseBooleanValue, parseConfigFile, parseOptionalBooleanValue, parseSSE, profileSchemaEndpoint, rankModels, readConfigFile, recallCandidates, recallSemantic, remoteSink, request, requestJson, resolveBooleanFlag, resolveConsoleGatewayCredential, resolveCredential, resolveFileUrl, resolveOutputDir, resolveWatermark, saveApiKeyToConfig, signRequest, speechRecognizeEndpoint, speechSynthesizeEndpoint, stripUndefined, taskEndpoint, trackCommandExecution, trackingHeaders, uploadFile, userProfileEndpoint, videoGenerateEndpoint, writeConfigFile };