lumiverse-spindle-types 0.5.9 → 0.5.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "lumiverse-spindle-types",
3
- "version": "0.5.9",
3
+ "version": "0.5.10",
4
4
  "types": "./src/index.ts",
5
5
  "keywords": [
6
6
  "lumiverse",
package/src/api.ts CHANGED
@@ -247,6 +247,25 @@ export interface GenerationRequestDTO {
247
247
  connection_id?: string;
248
248
  /** Optional tool/function definitions for inline function calling (raw/quiet only). */
249
249
  tools?: ToolSchemaDTO[];
250
+ /**
251
+ * Optional per-request override of the user's reasoning ("extended thinking")
252
+ * settings. When omitted (or `{ source: "inherit" }`) the backend resolves
253
+ * the effective settings the same way a normal chat generation does:
254
+ * the resolved connection's `reasoning_bindings` win, falling back to the
255
+ * user's global `reasoningSettings`.
256
+ *
257
+ * Use this to bypass that resolution for a single request — e.g. to force
258
+ * `"off"` for a quick, cheap call, or to dial the effort up/down with
259
+ * `source: "custom"`. The backend translates the high-level intent into
260
+ * the provider-specific knobs (`thinking`, `thinkingConfig`,
261
+ * `reasoning_effort`, `reasoning.effort`, etc.) so the extension doesn't
262
+ * need to know the per-provider quirks.
263
+ *
264
+ * Raw values supplied in `parameters` still take precedence at the field
265
+ * level — this override only fills in what hasn't already been set,
266
+ * except `source: "off"` which unconditionally strips reasoning fields.
267
+ */
268
+ reasoning?: GenerationReasoningOverrideDTO;
250
269
  /**
251
270
  * For operator-scoped extensions: the user ID whose connection profiles
252
271
  * and generation context should be used. For user-scoped extensions this
@@ -348,6 +367,112 @@ export interface RequestInitDTO {
348
367
  mediaType?: "image" | "audio";
349
368
  }
350
369
 
370
+ /**
371
+ * Reasoning effort tier. Provider mapping:
372
+ * - Anthropic adaptive (Claude 4.6+): `low | medium | high | max` (+ `xhigh` on Opus 4.7) → `output_config.effort`.
373
+ * - Anthropic legacy: mapped to `thinking.budget_tokens` (low=2048, medium=8192, high=16384, max=32768).
374
+ * - Google (Gemini / Vertex): `minimal | low | medium | high` → `thinkingConfig.thinkingLevel`.
375
+ * - DeepSeek: `low | medium | high` → `"high"`, `max | xhigh` → `"max"` (`reasoning_effort`).
376
+ * - OpenRouter: `none | minimal | low | medium | high | xhigh` → `reasoning.effort`.
377
+ * - NanoGPT: `none | minimal | low | medium | high` → `reasoning.effort`.
378
+ * - Moonshot / Z.AI: toggle-only — effort ignored, just enables `thinking`.
379
+ * - Generic OpenAI-compatible: passed verbatim as `reasoning.effort`.
380
+ *
381
+ * `"auto"` defers to the user's preset/global setting or the provider's
382
+ * model-specific default and is the safest value to use when you don't
383
+ * have a specific tier in mind.
384
+ */
385
+ export type ReasoningEffortDTO =
386
+ | "auto"
387
+ | "none"
388
+ | "minimal"
389
+ | "low"
390
+ | "medium"
391
+ | "high"
392
+ | "max"
393
+ | "xhigh";
394
+
395
+ /**
396
+ * Anthropic-only display mode for thinking blocks. Maps to `thinking.display`
397
+ * in the Messages API. `"auto"` omits the field so Anthropic applies its
398
+ * model-specific default (`"omitted"` on Opus 4.7 / Mythos Preview,
399
+ * `"summarized"` elsewhere). Ignored by every other provider.
400
+ */
401
+ export type ThinkingDisplayDTO = "auto" | "summarized" | "omitted";
402
+
403
+ /**
404
+ * Full reasoning settings snapshot. Mirrors the user-level setting that
405
+ * Lumiverse stores under `reasoningSettings`. Surfaced on
406
+ * `ConnectionProfileDTO.reasoning_bindings.settings` when a connection has
407
+ * a binding attached.
408
+ *
409
+ * Only `apiReasoning` / `reasoningEffort` / `thinkingDisplay` influence the
410
+ * outgoing provider request — the remaining fields drive delimited-reasoning
411
+ * parsing (`prefix`, `suffix`, `autoParse`) and chat-history pruning
412
+ * (`keepInHistory`) and are included for inspection / round-tripping.
413
+ */
414
+ export interface ReasoningSettingsDTO {
415
+ /** Master switch: whether the provider should produce thinking output. */
416
+ apiReasoning: boolean;
417
+ /** Effort tier — see {@link ReasoningEffortDTO}. */
418
+ reasoningEffort: ReasoningEffortDTO;
419
+ /** Anthropic-only. */
420
+ thinkingDisplay: ThinkingDisplayDTO;
421
+ /** Opening delimiter used by the delimited-reasoning parser (e.g. `"<think>\n"`). */
422
+ prefix: string;
423
+ /** Closing delimiter used by the delimited-reasoning parser (e.g. `"\n</think>"`). */
424
+ suffix: string;
425
+ /** Whether to auto-parse delimited reasoning out of the assistant content stream. */
426
+ autoParse: boolean;
427
+ /**
428
+ * How many recent reasoning blocks to retain in assembled prompt history.
429
+ * `0` strips all, `-1` keeps everything, `N` keeps the last N.
430
+ */
431
+ keepInHistory: number;
432
+ }
433
+
434
+ /**
435
+ * Reasoning settings bound to a specific connection profile. When present,
436
+ * these override the user's global `reasoningSettings` during normal chat
437
+ * generation on this connection.
438
+ */
439
+ export interface ConnectionReasoningBindingsDTO {
440
+ /** Reasoning settings snapshot captured at bind time. */
441
+ settings: ReasoningSettingsDTO;
442
+ /**
443
+ * Optional "Start Reply With" assistant prefill captured alongside the
444
+ * reasoning snapshot. When present, overrides the user's global
445
+ * `promptBias` setting for this connection.
446
+ */
447
+ promptBias?: string;
448
+ }
449
+
450
+ /**
451
+ * Per-request reasoning override for `spindle.generate.*` calls. Use the
452
+ * `source` discriminator to pick how the backend resolves the effective
453
+ * reasoning settings:
454
+ *
455
+ * - `"inherit"` (default if `source` is omitted): apply the connection's
456
+ * `reasoning_bindings` if any, else the user's global setting. Same as
457
+ * leaving the `reasoning` field off entirely. Useful when you want to
458
+ * document intent without changing behaviour.
459
+ * - `"off"`: short-circuit. The provider's no-reasoning off-switch is
460
+ * applied unconditionally — even if `parameters` already carry an
461
+ * explicit `thinking` / `reasoning` block from the caller.
462
+ * - `"custom"`: use the explicit `apiReasoning` / `effort` / `thinkingDisplay`
463
+ * fields below for this request only. Omitted fields use their defaults
464
+ * (`apiReasoning: true`, `effort: "auto"`, `thinkingDisplay: "auto"`).
465
+ * Raw values supplied via `parameters` still win at the field level —
466
+ * the override only fills in unset fields, exactly like the inherited
467
+ * settings would.
468
+ */
469
+ export interface GenerationReasoningOverrideDTO {
470
+ source?: "inherit" | "off" | "custom";
471
+ apiReasoning?: boolean;
472
+ effort?: ReasoningEffortDTO;
473
+ thinkingDisplay?: ThinkingDisplayDTO;
474
+ }
475
+
351
476
  /**
352
477
  * Safe representation of a user's connection profile exposed to extensions.
353
478
  * Never contains the actual API key — only `has_api_key` boolean.
@@ -361,7 +486,20 @@ export interface ConnectionProfileDTO {
361
486
  preset_id: string | null;
362
487
  is_default: boolean;
363
488
  has_api_key: boolean;
489
+ /**
490
+ * Raw provider-specific metadata bag stored on the connection. Includes
491
+ * provider-quirk flags (Anthropic prompt caching, Google thinking budget
492
+ * config, etc.) and the original `reasoningBindings` blob — `reasoning_bindings`
493
+ * below is the parsed, typed view of that same blob.
494
+ */
364
495
  metadata: Record<string, unknown>;
496
+ /**
497
+ * Typed view of the connection's bound reasoning settings, parsed from
498
+ * `metadata.reasoningBindings`. `null` when the connection has no binding
499
+ * (in which case generation falls back to the user's global
500
+ * `reasoningSettings`).
501
+ */
502
+ reasoning_bindings: ConnectionReasoningBindingsDTO | null;
365
503
  created_at: number;
366
504
  updated_at: number;
367
505
  }
package/src/index.ts CHANGED
@@ -29,6 +29,11 @@ export type {
29
29
  ToolSchemaDTO,
30
30
  ToolCallDTO,
31
31
  GenerationRequestDTO,
32
+ GenerationReasoningOverrideDTO,
33
+ ReasoningEffortDTO,
34
+ ReasoningSettingsDTO,
35
+ ThinkingDisplayDTO,
36
+ ConnectionReasoningBindingsDTO,
32
37
  ChatAppendGenerationOptionsDTO,
33
38
  ChatAppendMessageOptionsDTO,
34
39
  StreamChunkDTO,
@@ -301,6 +301,21 @@ export interface SpindleAPI {
301
301
  * `fetch()` uses, so it composes with `AbortSignal.timeout()` and
302
302
  * `AbortSignal.any([...])`.
303
303
  *
304
+ * ## Reasoning / extended thinking
305
+ *
306
+ * By default every generation inherits the resolved user's reasoning
307
+ * settings — the connection's `reasoning_bindings` if any, else the
308
+ * user-global `reasoningSettings`. The host translates that into the
309
+ * correct provider-specific knob (`thinking.budget_tokens`,
310
+ * `thinkingConfig.thinkingLevel`, `reasoning.effort`, `reasoning_effort`,
311
+ * etc.) so extensions don't have to.
312
+ *
313
+ * Use `input.reasoning` to override that resolution per-request:
314
+ * - `{ source: "off" }` — disable thinking for one cheap call.
315
+ * - `{ source: "custom", effort: "high" }` — dial effort up just for this call.
316
+ *
317
+ * See {@link GenerationReasoningOverrideDTO} for the full shape.
318
+ *
304
319
  * @example
305
320
  * ```ts
306
321
  * const controller = new AbortController()
@@ -616,6 +631,25 @@ export interface SpindleAPI {
616
631
  /**
617
632
  * Connection profile access (permission: "generation").
618
633
  * Returns safe representations — API keys are never exposed.
634
+ *
635
+ * The returned DTO carries a typed `reasoning_bindings` view of the
636
+ * connection's bound reasoning settings (parsed from `metadata.reasoningBindings`).
637
+ * Pair this with `GenerationRequestDTO.reasoning` to inspect what the
638
+ * connection is configured for and optionally override it per-request:
639
+ *
640
+ * @example
641
+ * ```ts
642
+ * const conn = await spindle.connections.get(connId);
643
+ * const bound = conn?.reasoning_bindings?.settings;
644
+ * if (bound?.apiReasoning) {
645
+ * await spindle.generate.raw({
646
+ * messages,
647
+ * connection_id: connId,
648
+ * // Force the bound effort one tier higher, just for this request.
649
+ * reasoning: { source: "custom", apiReasoning: true, effort: "max" },
650
+ * });
651
+ * }
652
+ * ```
619
653
  */
620
654
  connections: {
621
655
  /**