@civitai/client 0.2.0-beta.55 → 0.2.0-beta.57

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,6 +15,7 @@ export {
15
15
  invalidateUserCache,
16
16
  invokeAceStepAudioStepTemplate,
17
17
  invokeAgeClassificationStepTemplate,
18
+ invokeAudioCaptioningStepTemplate,
18
19
  invokeBatchOcrSafetyClassificationStepTemplate,
19
20
  invokeBlobArchiveStepTemplate,
20
21
  invokeChatCompletionStepTemplate,
@@ -63,6 +64,10 @@ export {
63
64
  uploadConsumerBlob,
64
65
  } from './sdk.gen';
65
66
  export {
67
+ type AceStep15AiToolkitTrainingInput,
68
+ type AceStep15XlAiToolkitTrainingInput,
69
+ type AceStep15XlBaseAiToolkitTrainingInput,
70
+ type AceStep15XlSftAiToolkitTrainingInput,
66
71
  type AceStepAudioCover,
67
72
  type AceStepAudioInput,
68
73
  type AceStepAudioOutput,
@@ -91,6 +96,11 @@ export {
91
96
  ArchiveFormat,
92
97
  type AssistantMessage,
93
98
  type AudioBlob,
99
+ type AudioCaptioningInput,
100
+ type AudioCaptioningOutput,
101
+ type AudioCaptioningOutputItem,
102
+ type AudioCaptioningStep,
103
+ type AudioCaptioningStepTemplate,
94
104
  type BatchOcrSafetyClassificationInput,
95
105
  type BatchOcrSafetyClassificationOutput,
96
106
  type BatchOcrSafetyClassificationResult,
@@ -110,11 +120,16 @@ export {
110
120
  type ChatCompletionContentPartWritable,
111
121
  type ChatCompletionFunction,
112
122
  type ChatCompletionFunctionCall,
123
+ type ChatCompletionGeneratedImage,
124
+ type ChatCompletionGeneratedImageUrl,
125
+ type ChatCompletionImageConfig,
113
126
  type ChatCompletionImageUrl,
114
127
  type ChatCompletionInput,
128
+ type ChatCompletionJsonSchema,
115
129
  type ChatCompletionMessage,
116
130
  type ChatCompletionMessageWritable,
117
131
  type ChatCompletionOutput,
132
+ type ChatCompletionResponseFormat,
118
133
  type ChatCompletionStep,
119
134
  type ChatCompletionStepTemplate,
120
135
  type ChatCompletionTool,
@@ -123,6 +138,8 @@ export {
123
138
  type ChromaAiToolkitTrainingInput,
124
139
  type ClientOptions,
125
140
  CoarseMode,
141
+ type ComfyAnimaCreateImageGenInput,
142
+ type ComfyAnimaImageGenInput,
126
143
  type ComfyErnieImageGenInput,
127
144
  type ComfyErnieStandardCreateImageGenInput,
128
145
  type ComfyErnieStandardImageGenInput,
@@ -131,6 +148,9 @@ export {
131
148
  type ComfyFlux1CreateImageGenInput,
132
149
  type ComfyFlux1ImageGenInput,
133
150
  type ComfyFlux1VariantImageGenInput,
151
+ type ComfyFlux2DevCreateImageInput,
152
+ type ComfyFlux2DevEditImageInput,
153
+ type ComfyFlux2DevImageGenInput,
134
154
  type ComfyImageGenInput,
135
155
  type ComfyInput,
136
156
  type ComfyLtx23AudioToVideoInput,
@@ -351,6 +371,11 @@ export {
351
371
  type InvokeAgeClassificationStepTemplateErrors,
352
372
  type InvokeAgeClassificationStepTemplateResponse,
353
373
  type InvokeAgeClassificationStepTemplateResponses,
374
+ type InvokeAudioCaptioningStepTemplateData,
375
+ type InvokeAudioCaptioningStepTemplateError,
376
+ type InvokeAudioCaptioningStepTemplateErrors,
377
+ type InvokeAudioCaptioningStepTemplateResponse,
378
+ type InvokeAudioCaptioningStepTemplateResponses,
354
379
  type InvokeBatchOcrSafetyClassificationStepTemplateData,
355
380
  type InvokeBatchOcrSafetyClassificationStepTemplateError,
356
381
  type InvokeBatchOcrSafetyClassificationStepTemplateErrors,
@@ -567,6 +592,7 @@ export {
567
592
  type MochiVideoGenInput,
568
593
  type ModelClamScanInput,
569
594
  type ModelClamScanOutput,
595
+ ModelClamScanStatus,
570
596
  type ModelClamScanStep,
571
597
  type ModelClamScanStepTemplate,
572
598
  type ModelHashInput,
@@ -579,6 +605,7 @@ export {
579
605
  type ModelParseMetadataStepTemplate,
580
606
  type ModelPickleScanInput,
581
607
  type ModelPickleScanOutput,
608
+ ModelPickleScanStatus,
582
609
  type ModelPickleScanStep,
583
610
  type ModelPickleScanStepTemplate,
584
611
  type MusubiImageResourceTrainingInput,
@@ -721,6 +748,7 @@ export {
721
748
  type Qwen3BaseTtsInput,
722
749
  type Qwen3CustomVoiceTtsInput,
723
750
  type Qwen3TextToSpeechInput,
751
+ type Qwen3VoiceDesignTtsInput,
724
752
  type QwenAiToolkitTrainingInput,
725
753
  type QwenImageGenInput,
726
754
  type RefreshBlobData,
@@ -1,3 +1,3 @@
1
1
  // This file is auto-generated by @hey-api/openapi-ts
2
- export { addWorkflowTag, deleteWorkflow, getBlob, getBlobArchive, getBlobContent, getBlockedContent, getConsumerBlobUploadUrl, getResource, getStreamingBlob, getWorkflow, getWorkflowStep, headBlob, invalidateResource, invalidateUserCache, invokeAceStepAudioStepTemplate, invokeAgeClassificationStepTemplate, invokeBatchOcrSafetyClassificationStepTemplate, invokeBlobArchiveStepTemplate, invokeChatCompletionStepTemplate, invokeComfyStepTemplate, invokeConvertImageStepTemplate, invokeEchoStepTemplate, invokeHumanoidImageMaskStepTemplate, invokeImageGenStepTemplate, invokeImageResourceTrainingStepTemplate, invokeImageUploadStepTemplate, invokeImageUpscalerStepTemplate, invokeMediaCaptioningStepTemplate, invokeMediaHashStepTemplate, invokeMediaRatingStepTemplate, invokeModelClamScanStepTemplate, invokeModelHashStepTemplate, invokeModelParseMetadataStepTemplate, invokeModelPickleScanStepTemplate, invokePreprocessImageStepTemplate, invokePromptEnhancementStepTemplate, invokeRepeatStepTemplate, invokeTextToImageStepTemplate, invokeTextToSpeechStepTemplate, invokeTrainingStepTemplate, invokeTranscodeStepTemplate, invokeTranscriptionStepTemplate, invokeTryOnUStepTemplate, invokeVideoEnhancementStepTemplate, invokeVideoFrameExtractionStepTemplate, invokeVideoGenStepTemplate, invokeVideoInterpolationStepTemplate, invokeVideoMetadataStepTemplate, invokeVideoUpscalerStepTemplate, invokeWdTaggingStepTemplate, invokeXGuardModerationStepTemplate, patchWorkflow, patchWorkflowStep, queryWorkflows, refreshBlob, removeAllWorkflowTags, removeWorkflowTag, submitWorkflow, updateWorkflow, updateWorkflowStep, uploadConsumerBlob, } from './sdk.gen';
3
- export { AnimalPoseBboxDetector, AnimalPoseEstimator, AnylineMergeWith, ArchiveFormat, BlurRegionMode, BuzzClientAccount, CoarseMode, ComfySampler, ComfyScheduler, ContainerFormat, DensePoseColormap, DensePoseModel, DepthAnythingCheckpoint, DepthAnythingV2Checkpoint, DwPoseBboxDetector, DwPoseEstimator, FileFormat, HaiperVideoGenAspectRatio, HaiperVideoGenCameraMovement, HaiperVideoGenModel, HumanoidImageMaskCategory, ImageGenOutputFormat, ImageResouceTrainingModerationStatus, ImageTransformer, JobSupport, KlingMode, KlingModel, KlingV3Operation, KlingVideoGenAspectRatio, KlingVideoGenDuration, LeresBoost, LightricksAspectRatio, MediaHashType, Metric3dBackbone, MiniMaxVideoGenModel, NsfwLevel, OutputFormat, Priority, SafeMode, Scheduler, SdCppSampleMethod, SdCppSchedule, SdCppUCacheMode, SeedanceModel, SeedreamVersion, TrainingModerationStatus, TransactionType, UpdateWorkflowStatus, Veo3AspectRatio, Veo3GenerationMode, Veo3Version, ViduVideoGenModel, ViduVideoGenStyle, WorkflowStatus, WorkflowUpgradeMode, ZoeDepthEnvironment, } from './types.gen';
2
+ export { addWorkflowTag, deleteWorkflow, getBlob, getBlobArchive, getBlobContent, getBlockedContent, getConsumerBlobUploadUrl, getResource, getStreamingBlob, getWorkflow, getWorkflowStep, headBlob, invalidateResource, invalidateUserCache, invokeAceStepAudioStepTemplate, invokeAgeClassificationStepTemplate, invokeAudioCaptioningStepTemplate, invokeBatchOcrSafetyClassificationStepTemplate, invokeBlobArchiveStepTemplate, invokeChatCompletionStepTemplate, invokeComfyStepTemplate, invokeConvertImageStepTemplate, invokeEchoStepTemplate, invokeHumanoidImageMaskStepTemplate, invokeImageGenStepTemplate, invokeImageResourceTrainingStepTemplate, invokeImageUploadStepTemplate, invokeImageUpscalerStepTemplate, invokeMediaCaptioningStepTemplate, invokeMediaHashStepTemplate, invokeMediaRatingStepTemplate, invokeModelClamScanStepTemplate, invokeModelHashStepTemplate, invokeModelParseMetadataStepTemplate, invokeModelPickleScanStepTemplate, invokePreprocessImageStepTemplate, invokePromptEnhancementStepTemplate, invokeRepeatStepTemplate, invokeTextToImageStepTemplate, invokeTextToSpeechStepTemplate, invokeTrainingStepTemplate, invokeTranscodeStepTemplate, invokeTranscriptionStepTemplate, invokeTryOnUStepTemplate, invokeVideoEnhancementStepTemplate, invokeVideoFrameExtractionStepTemplate, invokeVideoGenStepTemplate, invokeVideoInterpolationStepTemplate, invokeVideoMetadataStepTemplate, invokeVideoUpscalerStepTemplate, invokeWdTaggingStepTemplate, invokeXGuardModerationStepTemplate, patchWorkflow, patchWorkflowStep, queryWorkflows, refreshBlob, removeAllWorkflowTags, removeWorkflowTag, submitWorkflow, updateWorkflow, updateWorkflowStep, uploadConsumerBlob, } from './sdk.gen';
3
+ export { AnimalPoseBboxDetector, AnimalPoseEstimator, AnylineMergeWith, ArchiveFormat, BlurRegionMode, BuzzClientAccount, CoarseMode, ComfySampler, ComfyScheduler, ContainerFormat, DensePoseColormap, DensePoseModel, DepthAnythingCheckpoint, DepthAnythingV2Checkpoint, DwPoseBboxDetector, DwPoseEstimator, FileFormat, HaiperVideoGenAspectRatio, HaiperVideoGenCameraMovement, HaiperVideoGenModel, HumanoidImageMaskCategory, ImageGenOutputFormat, ImageResouceTrainingModerationStatus, ImageTransformer, JobSupport, KlingMode, KlingModel, KlingV3Operation, KlingVideoGenAspectRatio, KlingVideoGenDuration, LeresBoost, LightricksAspectRatio, MediaHashType, Metric3dBackbone, MiniMaxVideoGenModel, ModelClamScanStatus, ModelPickleScanStatus, NsfwLevel, OutputFormat, Priority, SafeMode, Scheduler, SdCppSampleMethod, SdCppSchedule, SdCppUCacheMode, SeedanceModel, SeedreamVersion, TrainingModerationStatus, TransactionType, UpdateWorkflowStatus, Veo3AspectRatio, Veo3GenerationMode, Veo3Version, ViduVideoGenModel, ViduVideoGenStyle, WorkflowStatus, WorkflowUpgradeMode, ZoeDepthEnvironment, } from './types.gen';
@@ -46,6 +46,9 @@ import type {
46
46
  InvokeAgeClassificationStepTemplateData,
47
47
  InvokeAgeClassificationStepTemplateErrors,
48
48
  InvokeAgeClassificationStepTemplateResponses,
49
+ InvokeAudioCaptioningStepTemplateData,
50
+ InvokeAudioCaptioningStepTemplateErrors,
51
+ InvokeAudioCaptioningStepTemplateResponses,
49
52
  InvokeBatchOcrSafetyClassificationStepTemplateData,
50
53
  InvokeBatchOcrSafetyClassificationStepTemplateErrors,
51
54
  InvokeBatchOcrSafetyClassificationStepTemplateResponses,
@@ -308,6 +311,19 @@ export declare const invokeAgeClassificationStepTemplate: <ThrowOnError extends
308
311
  ThrowOnError,
309
312
  'fields'
310
313
  >;
314
+ /**
315
+ * Audio Captioning
316
+ *
317
+ * Generates music captions, lyrics, and metadata by running the ACE-Step captioner and transcriber models.
318
+ */
319
+ export declare const invokeAudioCaptioningStepTemplate: <ThrowOnError extends boolean = false>(
320
+ options?: Options<InvokeAudioCaptioningStepTemplateData, ThrowOnError>
321
+ ) => import('./client').RequestResult<
322
+ InvokeAudioCaptioningStepTemplateResponses,
323
+ InvokeAudioCaptioningStepTemplateErrors,
324
+ ThrowOnError,
325
+ 'fields'
326
+ >;
311
327
  export declare const invokeBatchOcrSafetyClassificationStepTemplate: <
312
328
  ThrowOnError extends boolean = false,
313
329
  >(
@@ -113,6 +113,22 @@ export const invokeAgeClassificationStepTemplate = (options) => {
113
113
  },
114
114
  });
115
115
  };
116
+ /**
117
+ * Audio Captioning
118
+ *
119
+ * Generates music captions, lyrics, and metadata by running the ACE-Step captioner and transcriber models.
120
+ */
121
+ export const invokeAudioCaptioningStepTemplate = (options) => {
122
+ var _a;
123
+ return ((_a = options === null || options === void 0 ? void 0 : options.client) !== null && _a !== void 0 ? _a : client).post({
124
+ url: '/v2/consumer/recipes/audioCaptioning',
125
+ ...options,
126
+ headers: {
127
+ 'Content-Type': 'application/json',
128
+ ...options === null || options === void 0 ? void 0 : options.headers,
129
+ },
130
+ });
131
+ };
116
132
  export const invokeBatchOcrSafetyClassificationStepTemplate = (options) => {
117
133
  var _a;
118
134
  return ((_a = options === null || options === void 0 ? void 0 : options.client) !== null && _a !== void 0 ? _a : client).post({
@@ -96,6 +96,49 @@ export type AiToolkitTrainingInput = Omit<TrainingInput, 'engine'> & {
96
96
  triggerWord?: null | string;
97
97
  engine: 'ai-toolkit';
98
98
  };
99
+ /**
100
+ * AI Toolkit training for ACE-Step 1.5 base models.
101
+ */
102
+ export type AceStep15AiToolkitTrainingInput = Omit<
103
+ AiToolkitTrainingInput,
104
+ 'engine' | 'ecosystem'
105
+ > & {
106
+ ecosystem: 'ace_step_15';
107
+ engine: 'ai-toolkit';
108
+ };
109
+ /**
110
+ * AI Toolkit training for ACE-Step 1.5 XL models.
111
+ */
112
+ export type AceStep15XlAiToolkitTrainingInput = Omit<
113
+ AiToolkitTrainingInput,
114
+ 'engine' | 'ecosystem'
115
+ > & {
116
+ modelVariant: string;
117
+ ecosystem: 'ace_step_15_xl';
118
+ engine: 'ai-toolkit';
119
+ };
120
+ /**
121
+ * AI Toolkit training for ACE-Step 1.5 XL base models.
122
+ */
123
+ export type AceStep15XlBaseAiToolkitTrainingInput = Omit<
124
+ AceStep15XlAiToolkitTrainingInput,
125
+ 'engine' | 'ecosystem' | 'modelVariant'
126
+ > & {
127
+ modelVariant: 'base';
128
+ ecosystem: 'ace_step_15_xl';
129
+ engine: 'ai-toolkit';
130
+ };
131
+ /**
132
+ * AI Toolkit training for ACE-Step 1.5 XL SFT models.
133
+ */
134
+ export type AceStep15XlSftAiToolkitTrainingInput = Omit<
135
+ AceStep15XlAiToolkitTrainingInput,
136
+ 'engine' | 'ecosystem' | 'modelVariant'
137
+ > & {
138
+ modelVariant: 'sft';
139
+ ecosystem: 'ace_step_15_xl';
140
+ engine: 'ai-toolkit';
141
+ };
99
142
  /**
100
143
  * Cover image configuration for ACE Step audio output.
101
144
  * When present, the output is a WebM video with this image as the visual.
@@ -133,15 +176,73 @@ export type AceStepAudioInput = {
133
176
  /**
134
177
  * Time signature (e.g., "4" for 4/4 time)
135
178
  */
136
- timeSignature?: string;
179
+ timeSignature?: '2' | '3' | '4' | '6';
137
180
  /**
138
181
  * Language code (e.g., "en", "zh", "ja", "ko")
139
182
  */
140
- language?: string;
141
- /**
142
- * Musical key (e.g., "C major", "E minor")
143
- */
144
- key?: string;
183
+ language?:
184
+ | 'en'
185
+ | 'ja'
186
+ | 'zh'
187
+ | 'es'
188
+ | 'de'
189
+ | 'fr'
190
+ | 'pt'
191
+ | 'ru'
192
+ | 'it'
193
+ | 'nl'
194
+ | 'pl'
195
+ | 'tr'
196
+ | 'vi'
197
+ | 'cs'
198
+ | 'fa'
199
+ | 'id'
200
+ | 'ko'
201
+ | 'uk'
202
+ | 'hu'
203
+ | 'ar'
204
+ | 'sv'
205
+ | 'ro'
206
+ | 'el';
207
+ /**
208
+ * Musical key (e.g., "C major", "E minor"). Mirrors ComfyUI's
209
+ * TextEncodeAceStepAudio1.5 keyscale combo: 17 roots × {major, minor}.
210
+ */
211
+ key?:
212
+ | 'C major'
213
+ | 'C# major'
214
+ | 'Db major'
215
+ | 'D major'
216
+ | 'D# major'
217
+ | 'Eb major'
218
+ | 'E major'
219
+ | 'F major'
220
+ | 'F# major'
221
+ | 'Gb major'
222
+ | 'G major'
223
+ | 'G# major'
224
+ | 'Ab major'
225
+ | 'A major'
226
+ | 'A# major'
227
+ | 'Bb major'
228
+ | 'B major'
229
+ | 'C minor'
230
+ | 'C# minor'
231
+ | 'Db minor'
232
+ | 'D minor'
233
+ | 'D# minor'
234
+ | 'Eb minor'
235
+ | 'E minor'
236
+ | 'F minor'
237
+ | 'F# minor'
238
+ | 'Gb minor'
239
+ | 'G minor'
240
+ | 'G# minor'
241
+ | 'Ab minor'
242
+ | 'A minor'
243
+ | 'A# minor'
244
+ | 'Bb minor'
245
+ | 'B minor';
145
246
  /**
146
247
  * Weight for instrumental elements (0.0-1.0)
147
248
  */
@@ -177,6 +278,14 @@ export type AceStepAudioInput = {
177
278
  * non-turbo base / sft variants expect CFG on (around 4).
178
279
  */
179
280
  cfg?: number;
281
+ /**
282
+ * Optional LoRAs to apply. Each entry's strength is applied to both the
283
+ * diffusion model (UNET) and the dual CLIP via ComfyUI's LoraLoader.
284
+ * Compatibility with non-default base models is the caller's responsibility.
285
+ */
286
+ loras: {
287
+ [key: string]: number;
288
+ };
180
289
  };
181
290
  /**
182
291
  * Output from ACE Step 1.5 audio generation workflow step.
@@ -413,6 +522,11 @@ export type AssistantMessage = Omit<ChatCompletionMessage, 'role'> & {
413
522
  * Tool calls requested by the model.
414
523
  */
415
524
  tool_calls?: null | Array<ChatCompletionToolCall>;
525
+ /**
526
+ * Generated images attached to this assistant message, populated when the request
527
+ * included "image" in its modalities. Each entry is a base64 data URI.
528
+ */
529
+ images?: null | Array<ChatCompletionGeneratedImage>;
416
530
  role: 'assistant';
417
531
  };
418
532
  export type AudioBlob = Omit<Blob, 'type'> & {
@@ -422,6 +536,57 @@ export type AudioBlob = Omit<Blob, 'type'> & {
422
536
  duration?: null | number;
423
537
  type: 'audio';
424
538
  };
539
+ export type AudioCaptioningInput = {
540
+ /**
541
+ * The URL of the audio file or zip archive to caption.
542
+ */
543
+ mediaUrl: string;
544
+ /**
545
+ * Sampling temperature for both ACE-Step requests.
546
+ */
547
+ temperature: number;
548
+ /**
549
+ * Maximum number of tokens to generate for each ACE-Step request.
550
+ */
551
+ maxNewTokens: number;
552
+ };
553
+ export type AudioCaptioningOutput = {
554
+ /**
555
+ * Combined caption, transcription, and music metadata for the first or only audio item.
556
+ */
557
+ text?: null | string;
558
+ /**
559
+ * Per-file results when the input is an archive or contains multiple audio items.
560
+ */
561
+ results: {
562
+ [key: string]: AudioCaptioningOutputItem;
563
+ };
564
+ };
565
+ export type AudioCaptioningOutputItem = {
566
+ text: string;
567
+ caption?: null | string;
568
+ lyrics?: null | string;
569
+ bpm?: null | string;
570
+ keyScale?: null | string;
571
+ timeSignature?: null | string;
572
+ duration?: null | string;
573
+ language?: null | string;
574
+ };
575
+ /**
576
+ * Audio Captioning
577
+ */
578
+ export type AudioCaptioningStep = Omit<WorkflowStep, '$type'> & {
579
+ input: AudioCaptioningInput;
580
+ output?: AudioCaptioningOutput;
581
+ $type: 'audioCaptioning';
582
+ };
583
+ /**
584
+ * Audio Captioning
585
+ */
586
+ export type AudioCaptioningStepTemplate = Omit<WorkflowStepTemplate, '$type'> & {
587
+ input: AudioCaptioningInput;
588
+ $type: 'audioCaptioning';
589
+ };
425
590
  export type BatchOcrSafetyClassificationInput = {
426
591
  mediaUrls: Array<string>;
427
592
  };
@@ -639,6 +804,44 @@ export type ChatCompletionFunctionCall = {
639
804
  name: string;
640
805
  arguments: string;
641
806
  };
807
+ /**
808
+ * A generated image attached to an assistant message when "image" is included in
809
+ * Civitai.Orchestration.Grains.Workflows.Steps.ChatCompletion.ChatCompletionInput.Modalities. Matches the OpenRouter wire shape.
810
+ */
811
+ export type ChatCompletionGeneratedImage = {
812
+ /**
813
+ * Always "image_url".
814
+ */
815
+ type?: string;
816
+ image_url: ChatCompletionGeneratedImageUrl;
817
+ };
818
+ /**
819
+ * The image_url payload on a generated image. Currently always a base64 data URI.
820
+ */
821
+ export type ChatCompletionGeneratedImageUrl = {
822
+ /**
823
+ * Base64 data URI (e.g. "data:image/png;base64,...").
824
+ */
825
+ url: string;
826
+ };
827
+ /**
828
+ * Image generation parameters used when Civitai.Orchestration.Grains.Workflows.Steps.ChatCompletion.ChatCompletionInput.Modalities contains "image".
829
+ * Mirrors OpenRouter's `image_config` shape on chat-completion requests.
830
+ */
831
+ export type ChatCompletionImageConfig = {
832
+ /**
833
+ * Width:height aspect ratio. Examples: "1:1", "16:9", "9:16", "4:3", "3:4", "21:9".
834
+ */
835
+ aspect_ratio?: '1:1' | '16:9' | '9:16' | '4:3' | '3:4' | '21:9';
836
+ /**
837
+ * Approximate output resolution. "1K" ≈ 1MP, "2K" ≈ 2MP, etc. Engines clamp to their supported range.
838
+ */
839
+ image_size?: '0.5K' | '1K' | '2K' | '4K';
840
+ /**
841
+ * Number of images to generate. Engines may clamp to their supported maximum.
842
+ */
843
+ n?: null | number;
844
+ };
642
845
  /**
643
846
  * Image URL details matching OpenAI API spec.
644
847
  */
@@ -727,6 +930,23 @@ export type ChatCompletionInput = {
727
930
  * Can be "auto", "none", "required", or an object specifying a particular function.
728
931
  */
729
932
  tool_choice?: null;
933
+ /**
934
+ * Output modalities the model should produce. Defaults to text-only when omitted.
935
+ * Supported values: "text", "image". When "image" is included, the request is routed
936
+ * to the image generation pipeline and returns generated images on the assistant message.
937
+ */
938
+ modalities?: null | Array<string>;
939
+ image_config?: ChatCompletionImageConfig;
940
+ responseFormat?: ChatCompletionResponseFormat;
941
+ };
942
+ export type ChatCompletionJsonSchema = {
943
+ name: string;
944
+ description?: null | string;
945
+ schema: unknown;
946
+ /**
947
+ * OpenAI strict mode. Forces additionalProperties=false and all fields required.
948
+ */
949
+ strict?: null | boolean;
730
950
  };
731
951
  /**
732
952
  * Base type for chat messages, discriminated by the "role" property.
@@ -764,6 +984,27 @@ export type ChatCompletionOutput = {
764
984
  * System fingerprint for the model configuration.
765
985
  */
766
986
  systemFingerprint?: null | string;
987
+ /**
988
+ * Parsed JSON content of `Choices[0].Message.Content`. Populated when the request
989
+ * specified a JSON-flavored `response_format` and the content was parseable.
990
+ * Reachable from downstream workflow steps via `$ref` paths like
991
+ * `output.parsed.<field>` — DynamicAssignmentEvaluator walks JsonElement trees
992
+ * the same way it walks the rest of this output.
993
+ */
994
+ parsed?: null;
995
+ };
996
+ /**
997
+ * OpenAI-compatible response_format. When Civitai.Orchestration.Grains.Workflows.Steps.ChatCompletion.ChatCompletionResponseFormat.Type is `json_object` or
998
+ * `json_schema`, the LLM-emitted content is parsed as JSON server-side and exposed on
999
+ * Civitai.Orchestration.Grains.Workflows.Steps.ChatCompletion.ChatCompletionOutput.Parsed, allowing downstream workflow steps to reference
1000
+ * individual fields via `$ref` paths like `output.parsed.<field>`.
1001
+ */
1002
+ export type ChatCompletionResponseFormat = {
1003
+ /**
1004
+ * OpenAI-compatible value: `text`, `json_object`, or `json_schema`.
1005
+ */
1006
+ type: string;
1007
+ jsonSchema?: ChatCompletionJsonSchema;
767
1008
  };
768
1009
  /**
769
1010
  * ChatCompletion
@@ -785,7 +1026,11 @@ export type ChatCompletionStepTemplate = Omit<WorkflowStepTemplate, '$type'> & {
785
1026
  */
786
1027
  export type ChatCompletionTool = {
787
1028
  type: string;
788
- function: ChatCompletionFunction;
1029
+ function?: ChatCompletionFunction;
1030
+ /**
1031
+ * Server-tool parameters for providers such as OpenRouter.
1032
+ */
1033
+ parameters?: null;
789
1034
  };
790
1035
  /**
791
1036
  * A tool call returned in an assistant message response.
@@ -824,6 +1069,33 @@ export declare const CoarseMode: {
824
1069
  readonly ENABLE: 'enable';
825
1070
  };
826
1071
  export type CoarseMode = (typeof CoarseMode)[keyof typeof CoarseMode];
1072
+ export type ComfyAnimaCreateImageGenInput = Omit<
1073
+ ComfyAnimaImageGenInput,
1074
+ 'engine' | 'ecosystem' | 'operation'
1075
+ > & {
1076
+ width?: number;
1077
+ height?: number;
1078
+ operation: 'createImage';
1079
+ ecosystem: 'anima';
1080
+ engine: 'comfy';
1081
+ };
1082
+ export type ComfyAnimaImageGenInput = Omit<ComfyImageGenInput, 'engine' | 'ecosystem'> & {
1083
+ operation: string;
1084
+ prompt: string;
1085
+ negativePrompt?: null | string;
1086
+ sampler?: ComfySampler;
1087
+ scheduler?: ComfyScheduler;
1088
+ steps?: number;
1089
+ cfgScale?: number;
1090
+ seed?: null | number;
1091
+ quantity?: number;
1092
+ loras?: {
1093
+ [key: string]: number;
1094
+ };
1095
+ diffuserModel?: string;
1096
+ ecosystem: 'anima';
1097
+ engine: 'comfy';
1098
+ };
827
1099
  export type ComfyErnieImageGenInput = Omit<ComfyImageGenInput, 'engine' | 'ecosystem'> & {
828
1100
  model: string;
829
1101
  ecosystem: 'ernie';
@@ -935,6 +1207,41 @@ export type ComfyFlux1VariantImageGenInput = Omit<
935
1207
  ecosystem: 'flux1';
936
1208
  engine: 'comfy';
937
1209
  };
1210
+ export type ComfyFlux2DevCreateImageInput = Omit<
1211
+ ComfyFlux2DevImageGenInput,
1212
+ 'engine' | 'ecosystem' | 'operation'
1213
+ > & {
1214
+ operation: 'createImage';
1215
+ ecosystem: 'flux2Dev';
1216
+ engine: 'comfy';
1217
+ };
1218
+ export type ComfyFlux2DevEditImageInput = Omit<
1219
+ ComfyFlux2DevImageGenInput,
1220
+ 'engine' | 'ecosystem' | 'operation'
1221
+ > & {
1222
+ images?: Array<string>;
1223
+ operation: 'editImage';
1224
+ ecosystem: 'flux2Dev';
1225
+ engine: 'comfy';
1226
+ };
1227
+ export type ComfyFlux2DevImageGenInput = Omit<ComfyImageGenInput, 'engine' | 'ecosystem'> & {
1228
+ operation: string;
1229
+ prompt: string;
1230
+ width?: number;
1231
+ height?: number;
1232
+ seed?: null | number;
1233
+ quantity?: number;
1234
+ cfgScale?: number;
1235
+ steps?: number;
1236
+ sampler?: ComfySampler;
1237
+ scheduler?: ComfyScheduler;
1238
+ negativePrompt?: null | string;
1239
+ loras?: {
1240
+ [key: string]: number;
1241
+ };
1242
+ ecosystem: 'flux2Dev';
1243
+ engine: 'comfy';
1244
+ };
938
1245
  export type ComfyImageGenInput = Omit<ImageGenInput, 'engine'> & {
939
1246
  ecosystem: string;
940
1247
  engine: 'comfy';
@@ -1055,6 +1362,12 @@ export type ComfyLtx23VideoGenInput = Omit<VideoGenInput, 'engine'> & {
1055
1362
  loras?: {
1056
1363
  [key: string]: number;
1057
1364
  };
1365
+ /**
1366
+ * Optional override for the LTX 2.3 diffusion-model checkpoint. When set, replaces the
1367
+ * transformer file selected by Civitai.Orchestration.Grains.Workflows.Steps.VideoGen.ComfyLtx23VideoGenInput.Model while leaving the CLIPs, VAEs, and
1368
+ * upscale-LoRA behavior unchanged. Use to point at a community fine-tune (e.g. SulphurAI/Sulphur-2-base).
1369
+ */
1370
+ diffusionModel?: null | string;
1058
1371
  engine: 'ltx2.3';
1059
1372
  };
1060
1373
  /**
@@ -1188,6 +1501,7 @@ export declare const ComfySampler: {
1188
1501
  readonly DDIM: 'ddim';
1189
1502
  readonly UNI_PC: 'uni_pc';
1190
1503
  readonly UNI_PC_BH2: 'uni_pc_bh2';
1504
+ readonly ER_SDE: 'er_sde';
1191
1505
  };
1192
1506
  export type ComfySampler = (typeof ComfySampler)[keyof typeof ComfySampler];
1193
1507
  export declare const ComfyScheduler: {
@@ -3030,7 +3344,27 @@ export type ModelClamScanOutput = {
3030
3344
  * The raw ClamAV scan output.
3031
3345
  */
3032
3346
  output?: null | string;
3347
+ status?: ModelClamScanStatus;
3348
+ /**
3349
+ * True when ClamAV reported one or more infected files (exit code 1).
3350
+ */
3351
+ infected?: null | boolean;
3352
+ /**
3353
+ * Number of infected files parsed from the ClamAV scan summary.
3354
+ */
3355
+ infectedFileCount?: null | number;
3356
+ /**
3357
+ * Number of files scanned, parsed from the ClamAV scan summary.
3358
+ */
3359
+ scannedFileCount?: null | number;
3360
+ };
3361
+ export declare const ModelClamScanStatus: {
3362
+ readonly CLEAN: 'clean';
3363
+ readonly INFECTED: 'infected';
3364
+ readonly TIMEOUT: 'timeout';
3365
+ readonly ERROR: 'error';
3033
3366
  };
3367
+ export type ModelClamScanStatus = (typeof ModelClamScanStatus)[keyof typeof ModelClamScanStatus];
3034
3368
  /**
3035
3369
  * ModelClamScan
3036
3370
  */
@@ -3113,8 +3447,9 @@ export type ModelParseMetadataInput = {
3113
3447
  */
3114
3448
  export type ModelParseMetadataOutput = {
3115
3449
  /**
3116
- * The raw JSON metadata header as stored in the safetensors file, or null when
3117
- * the model is not a safetensors file or the header could not be parsed.
3450
+ * The `__metadata__` object from the safetensors header as a JSON string,
3451
+ * or null when the model is not a safetensors file, the header could not be
3452
+ * parsed, or no `__metadata__` object is present.
3118
3453
  */
3119
3454
  metadata?: null | string;
3120
3455
  };
@@ -3162,7 +3497,43 @@ export type ModelPickleScanOutput = {
3162
3497
  * Dangerous imports discovered during pickle scanning.
3163
3498
  */
3164
3499
  dangerousImports?: null | Array<string>;
3500
+ status?: ModelPickleScanStatus;
3501
+ /**
3502
+ * True when one or more dangerous imports were detected.
3503
+ */
3504
+ dangerousImportsFound?: null | boolean;
3505
+ /**
3506
+ * True when picklescan was skipped (e.g. file is safetensors and cannot contain pickled code).
3507
+ */
3508
+ skipped?: null | boolean;
3509
+ /**
3510
+ * Reason picklescan was skipped, if applicable. Examples: "safetensors", "safetensors-extension".
3511
+ */
3512
+ skipReason?: null | string;
3513
+ /**
3514
+ * Number of files scanned, parsed from the picklescan summary.
3515
+ */
3516
+ scannedFileCount?: null | number;
3517
+ /**
3518
+ * Number of infected files reported by picklescan.
3519
+ */
3520
+ infectedFileCount?: null | number;
3521
+ /**
3522
+ * Number of dangerous globals reported by picklescan.
3523
+ */
3524
+ dangerousGlobalCount?: null | number;
3525
+ };
3526
+ export declare const ModelPickleScanStatus: {
3527
+ readonly CLEAN: 'clean';
3528
+ readonly DANGEROUS_IMPORTS_FOUND: 'dangerousImportsFound';
3529
+ readonly SKIPPED_SAFETENSORS: 'skippedSafetensors';
3530
+ readonly SKIPPED_GGUF: 'skippedGguf';
3531
+ readonly SKIPPED: 'skipped';
3532
+ readonly PARSE_ERROR: 'parseError';
3533
+ readonly ERROR: 'error';
3165
3534
  };
3535
+ export type ModelPickleScanStatus =
3536
+ (typeof ModelPickleScanStatus)[keyof typeof ModelPickleScanStatus];
3166
3537
  /**
3167
3538
  * ModelPickleScan
3168
3539
  */
@@ -3665,6 +4036,13 @@ export type PromptEnhancementInput = {
3665
4036
  * Optional instruction to guide how the prompt is enhanced (e.g., "expand to 77 tokens", "keep it under 20 words").
3666
4037
  */
3667
4038
  instruction?: null | string;
4039
+ /**
4040
+ * Optional reference images for the prompt enhancement model to consider when enhancing the prompt
4041
+ * (subject, style, lighting, composition, color palette). Accepts URLs, data URIs, raw base64, or AIR strings.
4042
+ * Requires the per-ecosystem prompt-analysis model to be a vision-capable LLM (configured via
4043
+ * IPromptAnalysisGrain.SetConfigurationAsync) — non-VLM models will silently ignore the images.
4044
+ */
4045
+ images?: null | Array<string>;
3668
4046
  };
3669
4047
  /**
3670
4048
  * Output from prompt enhancement analysis.
@@ -3882,6 +4260,19 @@ export type Qwen3TextToSpeechInput = Omit<VllmOmniTextToSpeechInput, 'engine' |
3882
4260
  ecosystem: 'qwen3';
3883
4261
  engine: 'vllm-omni';
3884
4262
  };
4263
+ export type Qwen3VoiceDesignTtsInput = Omit<
4264
+ Qwen3TextToSpeechInput,
4265
+ 'engine' | 'ecosystem' | 'operation'
4266
+ > & {
4267
+ /**
4268
+ * Natural-language description of the desired voice
4269
+ * (e.g., "a calm middle-aged male narrator with a slight British accent").
4270
+ */
4271
+ instruct: string;
4272
+ operation: 'voiceDesign';
4273
+ ecosystem: 'qwen3';
4274
+ engine: 'vllm-omni';
4275
+ };
3885
4276
  /**
3886
4277
  * AI Toolkit training for Qwen Image models
3887
4278
  */
@@ -4009,7 +4400,7 @@ export type ResourceInfo = {
4009
4400
  publishedAt?: null | string;
4010
4401
  /**
4011
4402
  * A boolean indicating whether this resource restricts to SFW content generation.
4012
- * NSFWContent covers X and AA whereas MatureContent includes R rated content.
4403
+ * NSFWContent covers X and XXX whereas MatureContent includes R rated content.
4013
4404
  */
4014
4405
  hasNSFWContentRestriction: boolean;
4015
4406
  };
@@ -4235,7 +4626,7 @@ export type SeedanceVideoGenInput = Omit<VideoGenInput, 'engine'> & {
4235
4626
  duration: 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15;
4236
4627
  generateAudio?: boolean;
4237
4628
  seed?: null | number;
4238
- resolution: '480p' | '720p';
4629
+ resolution: '480p' | '720p' | '1080p';
4239
4630
  images?: Array<string>;
4240
4631
  engine: 'seedance';
4241
4632
  };
@@ -7423,6 +7814,36 @@ export type InvokeAgeClassificationStepTemplateResponses = {
7423
7814
  };
7424
7815
  export type InvokeAgeClassificationStepTemplateResponse =
7425
7816
  InvokeAgeClassificationStepTemplateResponses[keyof InvokeAgeClassificationStepTemplateResponses];
7817
+ export type InvokeAudioCaptioningStepTemplateData = {
7818
+ body?: AudioCaptioningInput;
7819
+ path?: never;
7820
+ query?: {
7821
+ experimental?: boolean;
7822
+ allowMatureContent?: boolean;
7823
+ whatif?: boolean;
7824
+ };
7825
+ url: '/v2/consumer/recipes/audioCaptioning';
7826
+ };
7827
+ export type InvokeAudioCaptioningStepTemplateErrors = {
7828
+ /**
7829
+ * Bad Request
7830
+ */
7831
+ 400: ProblemDetails;
7832
+ /**
7833
+ * Unauthorized
7834
+ */
7835
+ 401: ProblemDetails;
7836
+ };
7837
+ export type InvokeAudioCaptioningStepTemplateError =
7838
+ InvokeAudioCaptioningStepTemplateErrors[keyof InvokeAudioCaptioningStepTemplateErrors];
7839
+ export type InvokeAudioCaptioningStepTemplateResponses = {
7840
+ /**
7841
+ * OK
7842
+ */
7843
+ 200: AudioCaptioningOutput;
7844
+ };
7845
+ export type InvokeAudioCaptioningStepTemplateResponse =
7846
+ InvokeAudioCaptioningStepTemplateResponses[keyof InvokeAudioCaptioningStepTemplateResponses];
7426
7847
  export type InvokeBatchOcrSafetyClassificationStepTemplateData = {
7427
7848
  body?: BatchOcrSafetyClassificationInput;
7428
7849
  path?: never;
@@ -61,6 +61,7 @@ export const ComfySampler = {
61
61
  DDIM: 'ddim',
62
62
  UNI_PC: 'uni_pc',
63
63
  UNI_PC_BH2: 'uni_pc_bh2',
64
+ ER_SDE: 'er_sde',
64
65
  };
65
66
  export const ComfyScheduler = {
66
67
  NORMAL: 'normal',
@@ -232,6 +233,21 @@ export const Metric3dBackbone = {
232
233
  VIT_GIANT2: 'vit-giant2',
233
234
  };
234
235
  export const MiniMaxVideoGenModel = { HAILOU: 'hailou' };
236
+ export const ModelClamScanStatus = {
237
+ CLEAN: 'clean',
238
+ INFECTED: 'infected',
239
+ TIMEOUT: 'timeout',
240
+ ERROR: 'error',
241
+ };
242
+ export const ModelPickleScanStatus = {
243
+ CLEAN: 'clean',
244
+ DANGEROUS_IMPORTS_FOUND: 'dangerousImportsFound',
245
+ SKIPPED_SAFETENSORS: 'skippedSafetensors',
246
+ SKIPPED_GGUF: 'skippedGguf',
247
+ SKIPPED: 'skipped',
248
+ PARSE_ERROR: 'parseError',
249
+ ERROR: 'error',
250
+ };
235
251
  export const NsfwLevel = {
236
252
  PG: 'pg',
237
253
  PG13: 'pg13',
@@ -8,6 +8,8 @@ type AirProps = {
8
8
  /** Id of the resource from the source */
9
9
  id: string;
10
10
  version?: string;
11
+ /** Id of a specific model file within the version */
12
+ modelFileId?: string;
11
13
  /** The format of the model (safetensor, ckpt, diffuser, tensor rt) optional */
12
14
  format?: string;
13
15
  };
@@ -16,6 +18,6 @@ export declare abstract class Air {
16
18
  static parse(identifier: string): AirProps;
17
19
  static parseSafe(identifier: string): AirProps | undefined;
18
20
  static isAir(identifier: string): boolean;
19
- static stringify({ ecosystem, type, source, id, version, format }: AirProps): string;
21
+ static stringify({ ecosystem, type, source, id, version, modelFileId, format }: AirProps): string;
20
22
  }
21
23
  export {};
package/dist/utils/Air.js CHANGED
@@ -1,4 +1,4 @@
1
- const regex = /^(?:urn:)?(?:air:)?(?:(?<ecosystem>[a-zA-Z0-9_\-\/]+):)?(?:(?<type>[a-zA-Z0-9_\-\/]+):)?(?<source>[a-zA-Z0-9_\-\/]+):(?<id>[a-zA-Z0-9_\-\/\.]+)(?:@(?<version>[a-zA-Z0-9_\-\/.]+))?(?:\.(?<format>[a-zA-Z0-9_\-]+))?$/i;
1
+ const regex = /^(?:urn:)?(?:air:)?(?:(?<ecosystem>[a-zA-Z0-9_\-\/]+):)?(?:(?<type>[a-zA-Z0-9_\-\/]+):)?(?<source>[a-zA-Z0-9_\-\/]+):(?<id>[a-zA-Z0-9_\-\/\.]+)(?:@(?<version>[a-zA-Z0-9_\-\/.]+))?(?:\+(?<modelFileId>\d+))?(?:\.(?<format>[a-zA-Z0-9_\-]+))?$/i;
2
2
  /** https://github.com/civitai/civitai/wiki/AIR-%E2%80%90-Uniform-Resource-Names-for-AI */
3
3
  export class Air {
4
4
  static parse(identifier) {
@@ -15,7 +15,7 @@ export class Air {
15
15
  static isAir(identifier) {
16
16
  return regex.test(identifier);
17
17
  }
18
- static stringify({ ecosystem, type, source, id, version, format }) {
19
- return `urn:air:${ecosystem}:${type}:${source}:${id}${version ? `@${version}` : ''}${format ? `:${format}` : ''}`;
18
+ static stringify({ ecosystem, type, source, id, version, modelFileId, format }) {
19
+ return `urn:air:${ecosystem}:${type}:${source}:${id}${version ? `@${version}` : ''}${modelFileId ? `+${modelFileId}` : ''}${format ? `.${format}` : ''}`;
20
20
  }
21
21
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@civitai/client",
3
- "version": "0.2.0-beta.55",
3
+ "version": "0.2.0-beta.57",
4
4
  "description": "Civitai's javascript client for generating ai content",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",