@speechall/sdk 1.0.0 → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.beads/README.md +81 -0
- package/.beads/config.yaml +62 -0
- package/.beads/issues.jsonl +46 -0
- package/.beads/metadata.json +4 -0
- package/.env.example +5 -0
- package/.fernignore +45 -0
- package/.gitattributes +3 -0
- package/.github/copilot-instructions.md +78 -0
- package/.github/workflows/auto-release-simple.yml.deprecated +106 -0
- package/.github/workflows/auto-release.yml +67 -0
- package/.github/workflows/ci.yml +41 -0
- package/.github/workflows/release.yml +57 -0
- package/AGENTS.md +94 -0
- package/CHANGELOG.md +58 -0
- package/CLAUDE.md +75 -0
- package/README.md +294 -155
- package/examples/CLAUDE.md +136 -0
- package/examples/advanced-options.ts +213 -0
- package/examples/basic-transcription.ts +66 -0
- package/examples/error-handling.ts +251 -0
- package/examples/list-models.ts +112 -0
- package/examples/remote-transcription.ts +60 -0
- package/fern/fern.config.json +4 -0
- package/fern/generators.yml +43 -0
- package/jest.config.js +11 -0
- package/package.json +26 -46
- package/regenerate.sh +45 -0
- package/scripts/fix-generated-code.sh +25 -0
- package/src/BaseClient.ts +82 -0
- package/src/Client.ts +30 -0
- package/src/api/errors/BadRequestError.ts +22 -0
- package/src/api/errors/GatewayTimeoutError.ts +22 -0
- package/src/api/errors/InternalServerError.ts +22 -0
- package/src/api/errors/NotFoundError.ts +22 -0
- package/src/api/errors/PaymentRequiredError.ts +22 -0
- package/src/api/errors/ServiceUnavailableError.ts +22 -0
- package/src/api/errors/TooManyRequestsError.ts +22 -0
- package/src/api/errors/UnauthorizedError.ts +22 -0
- package/src/api/errors/index.ts +8 -0
- package/src/api/index.ts +3 -0
- package/src/api/resources/index.ts +5 -0
- package/src/api/resources/replacementRules/client/Client.ts +148 -0
- package/src/api/resources/replacementRules/client/index.ts +1 -0
- package/src/api/resources/replacementRules/client/requests/CreateReplacementRulesetRequest.ts +25 -0
- package/src/api/resources/replacementRules/client/requests/index.ts +1 -0
- package/src/api/resources/replacementRules/index.ts +2 -0
- package/src/api/resources/replacementRules/types/CreateReplacementRulesetResponse.ts +6 -0
- package/src/api/resources/replacementRules/types/index.ts +1 -0
- package/src/api/resources/speechToText/client/Client.ts +275 -0
- package/src/api/resources/speechToText/client/index.ts +1 -0
- package/src/api/resources/speechToText/client/requests/RemoteTranscriptionConfiguration.ts +20 -0
- package/src/api/resources/speechToText/client/requests/TranscribeRequest.ts +26 -0
- package/src/api/resources/speechToText/client/requests/index.ts +2 -0
- package/src/api/resources/speechToText/index.ts +1 -0
- package/src/api/types/BaseTranscriptionConfiguration.ts +29 -0
- package/src/api/types/ErrorResponse.ts +11 -0
- package/src/api/types/ExactRule.ts +13 -0
- package/src/api/types/RegexGroupRule.ts +28 -0
- package/src/api/types/RegexRule.ts +28 -0
- package/src/api/types/ReplacementRule.ts +25 -0
- package/src/api/types/SpeechToTextModel.ts +90 -0
- package/src/api/types/TranscriptLanguageCode.ts +114 -0
- package/src/api/types/TranscriptOutputFormat.ts +18 -0
- package/src/api/types/TranscriptionDetailed.ts +19 -0
- package/src/api/types/TranscriptionModelIdentifier.ts +80 -0
- package/src/api/types/TranscriptionOnlyText.ts +11 -0
- package/src/api/types/TranscriptionProvider.ts +23 -0
- package/src/api/types/TranscriptionResponse.ts +8 -0
- package/src/api/types/TranscriptionSegment.ts +17 -0
- package/src/api/types/TranscriptionWord.ts +17 -0
- package/src/api/types/index.ts +16 -0
- package/src/auth/BearerAuthProvider.ts +37 -0
- package/src/auth/index.ts +1 -0
- package/src/core/auth/AuthProvider.ts +6 -0
- package/src/core/auth/AuthRequest.ts +9 -0
- package/src/core/auth/BasicAuth.ts +32 -0
- package/src/core/auth/BearerToken.ts +20 -0
- package/src/core/auth/NoOpAuthProvider.ts +8 -0
- package/src/core/auth/index.ts +5 -0
- package/src/core/base64.ts +27 -0
- package/src/core/exports.ts +2 -0
- package/src/core/fetcher/APIResponse.ts +23 -0
- package/src/core/fetcher/BinaryResponse.ts +34 -0
- package/src/core/fetcher/EndpointMetadata.ts +13 -0
- package/src/core/fetcher/EndpointSupplier.ts +14 -0
- package/src/core/fetcher/Fetcher.ts +391 -0
- package/src/core/fetcher/Headers.ts +93 -0
- package/src/core/fetcher/HttpResponsePromise.ts +116 -0
- package/src/core/fetcher/RawResponse.ts +61 -0
- package/src/core/fetcher/Supplier.ts +11 -0
- package/src/core/fetcher/createRequestUrl.ts +6 -0
- package/src/core/fetcher/getErrorResponseBody.ts +33 -0
- package/src/core/fetcher/getFetchFn.ts +3 -0
- package/src/core/fetcher/getHeader.ts +8 -0
- package/src/core/fetcher/getRequestBody.ts +20 -0
- package/src/core/fetcher/getResponseBody.ts +58 -0
- package/src/core/fetcher/index.ts +11 -0
- package/src/core/fetcher/makeRequest.ts +42 -0
- package/src/core/fetcher/requestWithRetries.ts +64 -0
- package/src/core/fetcher/signals.ts +26 -0
- package/src/core/file/exports.ts +1 -0
- package/src/core/file/file.ts +217 -0
- package/src/core/file/index.ts +2 -0
- package/src/core/file/types.ts +81 -0
- package/src/core/headers.ts +35 -0
- package/src/core/index.ts +7 -0
- package/src/core/json.ts +27 -0
- package/src/core/logging/exports.ts +19 -0
- package/src/core/logging/index.ts +1 -0
- package/src/core/logging/logger.ts +203 -0
- package/src/core/runtime/index.ts +1 -0
- package/src/core/runtime/runtime.ts +134 -0
- package/src/core/url/encodePathParam.ts +18 -0
- package/src/core/url/index.ts +3 -0
- package/src/core/url/join.ts +79 -0
- package/src/core/url/qs.ts +74 -0
- package/src/environments.ts +7 -0
- package/src/errors/SpeechallError.ts +58 -0
- package/src/errors/SpeechallTimeoutError.ts +13 -0
- package/src/errors/handleNonStatusCodeError.ts +37 -0
- package/src/errors/index.ts +2 -0
- package/src/exports.ts +1 -0
- package/src/index.ts +6 -0
- package/test-import.ts +17 -0
- package/tests/integration/api.test.ts +93 -0
- package/tests/unit/client.test.ts +91 -0
- package/tsconfig.json +20 -0
- package/dist/api.d.ts +0 -501
- package/dist/api.d.ts.map +0 -1
- package/dist/api.js +0 -610
- package/dist/base.d.ts +0 -32
- package/dist/base.d.ts.map +0 -1
- package/dist/base.js +0 -35
- package/dist/common.d.ts +0 -14
- package/dist/common.d.ts.map +0 -1
- package/dist/common.js +0 -91
- package/dist/configuration.d.ts +0 -23
- package/dist/configuration.d.ts.map +0 -1
- package/dist/configuration.js +0 -25
- package/dist/esm/api.js +0 -592
- package/dist/esm/base.js +0 -27
- package/dist/esm/common.js +0 -79
- package/dist/esm/configuration.js +0 -21
- package/dist/esm/example.js +0 -131
- package/dist/esm/index.js +0 -2
- package/dist/example.d.ts +0 -3
- package/dist/example.d.ts.map +0 -1
- package/dist/example.js +0 -133
- package/dist/index.d.ts +0 -3
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js +0 -18
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
// This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import type * as Speechall from "../index.js";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Defines a single rule for finding and replacing text in a transcription. Use one of the specific rule types (`ExactRule`, `RegexRule`, `RegexGroupRule`). The `kind` property acts as a discriminator.
|
|
7
|
+
*/
|
|
8
|
+
export type ReplacementRule =
|
|
9
|
+
| Speechall.ReplacementRule.Exact
|
|
10
|
+
| Speechall.ReplacementRule.Regex
|
|
11
|
+
| Speechall.ReplacementRule.RegexGroup;
|
|
12
|
+
|
|
13
|
+
export namespace ReplacementRule {
|
|
14
|
+
export interface Exact extends Speechall.ExactRule {
|
|
15
|
+
kind: "exact";
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export interface Regex extends Speechall.RegexRule {
|
|
19
|
+
kind: "regex";
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface RegexGroup extends Speechall.RegexGroupRule {
|
|
23
|
+
kind: "regex_group";
|
|
24
|
+
}
|
|
25
|
+
}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
// This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import type * as Speechall from "../index.js";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Describes an available speech-to-text model, its provider, capabilities, and characteristics.
|
|
7
|
+
*/
|
|
8
|
+
export interface SpeechToTextModel {
|
|
9
|
+
/** The unique identifier for this model (`provider.model_name`). */
|
|
10
|
+
id: Speechall.TranscriptionModelIdentifier;
|
|
11
|
+
/** A user-friendly name for the model. */
|
|
12
|
+
display_name: string;
|
|
13
|
+
/** The provider of this model. */
|
|
14
|
+
provider: Speechall.TranscriptionProvider;
|
|
15
|
+
/** A brief description of the model, its intended use case, or version notes. */
|
|
16
|
+
description?: string | null;
|
|
17
|
+
/** The cost per second of audio processed in USD. */
|
|
18
|
+
cost_per_second_usd?: number | null;
|
|
19
|
+
/** Indicates whether the model is currently available for use. */
|
|
20
|
+
is_available: boolean;
|
|
21
|
+
/** A list of language codes (preferably BCP 47, e.g., "en-US", "en-GB", "es-ES") supported by this model. May include `auto` if automatic language detection is supported across multiple languages within a single audio file. */
|
|
22
|
+
supported_languages?: string[] | null;
|
|
23
|
+
/** Indicates whether the model generally supports automatic punctuation insertion. */
|
|
24
|
+
punctuation?: boolean | null;
|
|
25
|
+
/** Indicates whether the model generally supports speaker diarization (identifying different speakers). */
|
|
26
|
+
diarization?: boolean | null;
|
|
27
|
+
/** Indicates whether the model can be used for real-time streaming transcription via a WebSocket connection (if offered by Speechall). */
|
|
28
|
+
streamable?: boolean | null;
|
|
29
|
+
/** An approximate measure of processing speed for batch processing. Defined as (audio duration) / (processing time). A higher value means faster processing (e.g., RTF=2 means it processes 1 second of audio in 0.5 seconds). May not be available for all models or streaming scenarios. */
|
|
30
|
+
real_time_factor?: number | null;
|
|
31
|
+
/** The maximum duration of a single audio file (in seconds) that the model can reliably process in one request. May vary by provider or plan. */
|
|
32
|
+
max_duration_seconds?: number | null;
|
|
33
|
+
/** The maximum size of a single audio file (in bytes) that can be uploaded for processing by this model. May vary by provider or plan. */
|
|
34
|
+
max_file_size_bytes?: number | null;
|
|
35
|
+
/** The specific version identifier for the model. */
|
|
36
|
+
version?: string | null;
|
|
37
|
+
/** The date when this specific version of the model was released or last updated. */
|
|
38
|
+
release_date?: string | null;
|
|
39
|
+
/** The primary type or training domain of the model. Helps identify suitability for different audio types. */
|
|
40
|
+
model_type?: SpeechToTextModel.ModelType | null;
|
|
41
|
+
/** A general indication of the model's expected accuracy level relative to other models. Not a guaranteed metric. */
|
|
42
|
+
accuracy_tier?: SpeechToTextModel.AccuracyTier | null;
|
|
43
|
+
/** A list of audio encodings that this model supports or is optimized for (e.g., LINEAR16, FLAC, MP3, Opus). */
|
|
44
|
+
supported_audio_encodings?: string[] | null;
|
|
45
|
+
/** A list of audio sample rates (in Hz) that this model supports or is optimized for. */
|
|
46
|
+
supported_sample_rates?: number[] | null;
|
|
47
|
+
/** Indicates whether the model can provide speaker labels for the transcription. */
|
|
48
|
+
speaker_labels?: boolean | null;
|
|
49
|
+
/** Indicates whether the model can provide timestamps for individual words. */
|
|
50
|
+
word_timestamps?: boolean | null;
|
|
51
|
+
/** Indicates whether the model provides confidence scores for the transcription or individual words. */
|
|
52
|
+
confidence_scores?: boolean | null;
|
|
53
|
+
/** Indicates whether the model supports automatic language detection for input audio. */
|
|
54
|
+
language_detection?: boolean | null;
|
|
55
|
+
/** Indicates if the model can leverage a custom vocabulary or language model adaptation. */
|
|
56
|
+
custom_vocabulary_support?: boolean | null;
|
|
57
|
+
/** Indicates if the model supports filtering or masking of profanity. */
|
|
58
|
+
profanity_filtering?: boolean | null;
|
|
59
|
+
/** Indicates if the model supports noise reduction. */
|
|
60
|
+
noise_reduction?: boolean | null;
|
|
61
|
+
/** Indicates whether the model supports SRT subtitle format output. */
|
|
62
|
+
supports_srt: boolean;
|
|
63
|
+
/** Indicates whether the model supports VTT subtitle format output. */
|
|
64
|
+
supports_vtt: boolean;
|
|
65
|
+
/** Indicates whether the model supports voice activity detection (VAD) to identify speech segments. */
|
|
66
|
+
voice_activity_detection?: boolean | null;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export namespace SpeechToTextModel {
|
|
70
|
+
/** The primary type or training domain of the model. Helps identify suitability for different audio types. */
|
|
71
|
+
export const ModelType = {
|
|
72
|
+
General: "general",
|
|
73
|
+
PhoneCall: "phone_call",
|
|
74
|
+
Video: "video",
|
|
75
|
+
CommandAndSearch: "command_and_search",
|
|
76
|
+
Medical: "medical",
|
|
77
|
+
Legal: "legal",
|
|
78
|
+
Voicemail: "voicemail",
|
|
79
|
+
Meeting: "meeting",
|
|
80
|
+
} as const;
|
|
81
|
+
export type ModelType = (typeof ModelType)[keyof typeof ModelType];
|
|
82
|
+
/** A general indication of the model's expected accuracy level relative to other models. Not a guaranteed metric. */
|
|
83
|
+
export const AccuracyTier = {
|
|
84
|
+
Basic: "basic",
|
|
85
|
+
Standard: "standard",
|
|
86
|
+
Enhanced: "enhanced",
|
|
87
|
+
Premium: "premium",
|
|
88
|
+
} as const;
|
|
89
|
+
export type AccuracyTier = (typeof AccuracyTier)[keyof typeof AccuracyTier];
|
|
90
|
+
}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
// This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* The language code of the audio file, typically in ISO 639-1 format.
|
|
5
|
+
* Specifying the correct language improves transcription accuracy and speed.
|
|
6
|
+
* The special value `auto` can be used to request automatic language detection, if supported by the selected model.
|
|
7
|
+
* If omitted, the default language is English (`en`).
|
|
8
|
+
*/
|
|
9
|
+
export const TranscriptLanguageCode = {
|
|
10
|
+
Auto: "auto",
|
|
11
|
+
En: "en",
|
|
12
|
+
EnAu: "en_au",
|
|
13
|
+
EnUk: "en_uk",
|
|
14
|
+
EnUs: "en_us",
|
|
15
|
+
Af: "af",
|
|
16
|
+
Am: "am",
|
|
17
|
+
Ar: "ar",
|
|
18
|
+
As: "as",
|
|
19
|
+
Az: "az",
|
|
20
|
+
Ba: "ba",
|
|
21
|
+
Be: "be",
|
|
22
|
+
Bg: "bg",
|
|
23
|
+
Bn: "bn",
|
|
24
|
+
Bo: "bo",
|
|
25
|
+
Br: "br",
|
|
26
|
+
Bs: "bs",
|
|
27
|
+
Ca: "ca",
|
|
28
|
+
Cs: "cs",
|
|
29
|
+
Cy: "cy",
|
|
30
|
+
Da: "da",
|
|
31
|
+
De: "de",
|
|
32
|
+
El: "el",
|
|
33
|
+
Es: "es",
|
|
34
|
+
Et: "et",
|
|
35
|
+
Eu: "eu",
|
|
36
|
+
Fa: "fa",
|
|
37
|
+
Fi: "fi",
|
|
38
|
+
Fo: "fo",
|
|
39
|
+
Fr: "fr",
|
|
40
|
+
Gl: "gl",
|
|
41
|
+
Gu: "gu",
|
|
42
|
+
Ha: "ha",
|
|
43
|
+
Haw: "haw",
|
|
44
|
+
He: "he",
|
|
45
|
+
Hi: "hi",
|
|
46
|
+
Hr: "hr",
|
|
47
|
+
Ht: "ht",
|
|
48
|
+
Hu: "hu",
|
|
49
|
+
Hy: "hy",
|
|
50
|
+
Id: "id",
|
|
51
|
+
Is: "is",
|
|
52
|
+
It: "it",
|
|
53
|
+
Ja: "ja",
|
|
54
|
+
Jw: "jw",
|
|
55
|
+
Ka: "ka",
|
|
56
|
+
Kk: "kk",
|
|
57
|
+
Km: "km",
|
|
58
|
+
Kn: "kn",
|
|
59
|
+
Ko: "ko",
|
|
60
|
+
La: "la",
|
|
61
|
+
Lb: "lb",
|
|
62
|
+
Ln: "ln",
|
|
63
|
+
Lo: "lo",
|
|
64
|
+
Lt: "lt",
|
|
65
|
+
Lv: "lv",
|
|
66
|
+
Mg: "mg",
|
|
67
|
+
Mi: "mi",
|
|
68
|
+
Mk: "mk",
|
|
69
|
+
Ml: "ml",
|
|
70
|
+
Mn: "mn",
|
|
71
|
+
Mr: "mr",
|
|
72
|
+
Ms: "ms",
|
|
73
|
+
Mt: "mt",
|
|
74
|
+
My: "my",
|
|
75
|
+
Ne: "ne",
|
|
76
|
+
Nl: "nl",
|
|
77
|
+
Nn: "nn",
|
|
78
|
+
No: "no",
|
|
79
|
+
Oc: "oc",
|
|
80
|
+
Pa: "pa",
|
|
81
|
+
Pl: "pl",
|
|
82
|
+
Ps: "ps",
|
|
83
|
+
Pt: "pt",
|
|
84
|
+
Ro: "ro",
|
|
85
|
+
Ru: "ru",
|
|
86
|
+
Sa: "sa",
|
|
87
|
+
Sd: "sd",
|
|
88
|
+
Si: "si",
|
|
89
|
+
Sk: "sk",
|
|
90
|
+
Sl: "sl",
|
|
91
|
+
Sn: "sn",
|
|
92
|
+
So: "so",
|
|
93
|
+
Sq: "sq",
|
|
94
|
+
Sr: "sr",
|
|
95
|
+
Su: "su",
|
|
96
|
+
Sv: "sv",
|
|
97
|
+
Sw: "sw",
|
|
98
|
+
Ta: "ta",
|
|
99
|
+
Te: "te",
|
|
100
|
+
Tg: "tg",
|
|
101
|
+
Th: "th",
|
|
102
|
+
Tk: "tk",
|
|
103
|
+
Tl: "tl",
|
|
104
|
+
Tr: "tr",
|
|
105
|
+
Tt: "tt",
|
|
106
|
+
Uk: "uk",
|
|
107
|
+
Ur: "ur",
|
|
108
|
+
Uz: "uz",
|
|
109
|
+
Vi: "vi",
|
|
110
|
+
Yi: "yi",
|
|
111
|
+
Yo: "yo",
|
|
112
|
+
Zh: "zh",
|
|
113
|
+
} as const;
|
|
114
|
+
export type TranscriptLanguageCode = (typeof TranscriptLanguageCode)[keyof typeof TranscriptLanguageCode];
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
// This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Specifies the desired format of the transcription output.
|
|
5
|
+
* - `text`: Plain text containing the full transcription.
|
|
6
|
+
* - `json_text`: A simple JSON object containing the transcription ID and the full text (`TranscriptionOnlyText` schema).
|
|
7
|
+
* - `json`: A detailed JSON object including segments, timestamps (based on `timestamp_granularity`), language, and potentially speaker labels and provider metadata (`TranscriptionDetailed` schema).
|
|
8
|
+
* - `srt`: SubRip subtitle format (returned as plain text).
|
|
9
|
+
* - `vtt`: WebVTT subtitle format (returned as plain text).
|
|
10
|
+
*/
|
|
11
|
+
export const TranscriptOutputFormat = {
|
|
12
|
+
Text: "text",
|
|
13
|
+
JsonText: "json_text",
|
|
14
|
+
Json: "json",
|
|
15
|
+
Srt: "srt",
|
|
16
|
+
Vtt: "vtt",
|
|
17
|
+
} as const;
|
|
18
|
+
export type TranscriptOutputFormat = (typeof TranscriptOutputFormat)[keyof typeof TranscriptOutputFormat];
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
// This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import type * as Speechall from "../index.js";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* A detailed JSON response format containing the full text, detected language, duration, individual timed segments, and potentially speaker labels and provider-specific metadata. Returned when `output_format` is `json`.
|
|
7
|
+
*/
|
|
8
|
+
export interface TranscriptionDetailed {
|
|
9
|
+
/** A unique identifier for the transcription job/request. */
|
|
10
|
+
id: string;
|
|
11
|
+
/** The full transcribed text as a single string. */
|
|
12
|
+
text: string;
|
|
13
|
+
/** The detected or specified language of the audio (ISO 639-1 code). */
|
|
14
|
+
language?: string;
|
|
15
|
+
/** An array of transcribed segments, providing time-coded chunks of the transcription. May include speaker labels if diarization was enabled. */
|
|
16
|
+
segments?: Speechall.TranscriptionSegment[];
|
|
17
|
+
/** An array of transcribed words, providing time-coded chunks of the transcription. May include speaker labels if diarization was enabled. */
|
|
18
|
+
words?: Speechall.TranscriptionWord[];
|
|
19
|
+
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
// This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
/** Unique identifier for a specific Speech-to-Text model, composed as `provider.model_name`. Used to select the engine for transcription. */
|
|
4
|
+
export const TranscriptionModelIdentifier = {
|
|
5
|
+
AmazonTranscribe: "amazon.transcribe",
|
|
6
|
+
AssemblyaiBest: "assemblyai.best",
|
|
7
|
+
AssemblyaiNano: "assemblyai.nano",
|
|
8
|
+
AssemblyaiSlam1: "assemblyai.slam-1",
|
|
9
|
+
AssemblyaiUniversal: "assemblyai.universal",
|
|
10
|
+
AzureStandard: "azure.standard",
|
|
11
|
+
CloudflareWhisper: "cloudflare.whisper",
|
|
12
|
+
CloudflareWhisperLargeV3Turbo: "cloudflare.whisper-large-v3-turbo",
|
|
13
|
+
CloudflareWhisperTinyEn: "cloudflare.whisper-tiny-en",
|
|
14
|
+
DeepgramBase: "deepgram.base",
|
|
15
|
+
DeepgramBaseConversationalai: "deepgram.base-conversationalai",
|
|
16
|
+
DeepgramBaseFinance: "deepgram.base-finance",
|
|
17
|
+
DeepgramBaseGeneral: "deepgram.base-general",
|
|
18
|
+
DeepgramBaseMeeting: "deepgram.base-meeting",
|
|
19
|
+
DeepgramBasePhonecall: "deepgram.base-phonecall",
|
|
20
|
+
DeepgramBaseVideo: "deepgram.base-video",
|
|
21
|
+
DeepgramBaseVoicemail: "deepgram.base-voicemail",
|
|
22
|
+
DeepgramEnhanced: "deepgram.enhanced",
|
|
23
|
+
DeepgramEnhancedFinance: "deepgram.enhanced-finance",
|
|
24
|
+
DeepgramEnhancedGeneral: "deepgram.enhanced-general",
|
|
25
|
+
DeepgramEnhancedMeeting: "deepgram.enhanced-meeting",
|
|
26
|
+
DeepgramEnhancedPhonecall: "deepgram.enhanced-phonecall",
|
|
27
|
+
DeepgramNova: "deepgram.nova",
|
|
28
|
+
DeepgramNovaGeneral: "deepgram.nova-general",
|
|
29
|
+
DeepgramNovaPhonecall: "deepgram.nova-phonecall",
|
|
30
|
+
DeepgramNova2: "deepgram.nova-2",
|
|
31
|
+
DeepgramNova2Atc: "deepgram.nova-2-atc",
|
|
32
|
+
DeepgramNova2Automotive: "deepgram.nova-2-automotive",
|
|
33
|
+
DeepgramNova2Conversationalai: "deepgram.nova-2-conversationalai",
|
|
34
|
+
DeepgramNova2Drivethru: "deepgram.nova-2-drivethru",
|
|
35
|
+
DeepgramNova2Finance: "deepgram.nova-2-finance",
|
|
36
|
+
DeepgramNova2General: "deepgram.nova-2-general",
|
|
37
|
+
DeepgramNova2Medical: "deepgram.nova-2-medical",
|
|
38
|
+
DeepgramNova2Meeting: "deepgram.nova-2-meeting",
|
|
39
|
+
DeepgramNova2Phonecall: "deepgram.nova-2-phonecall",
|
|
40
|
+
DeepgramNova2Video: "deepgram.nova-2-video",
|
|
41
|
+
DeepgramNova2Voicemail: "deepgram.nova-2-voicemail",
|
|
42
|
+
DeepgramNova3: "deepgram.nova-3",
|
|
43
|
+
DeepgramNova3General: "deepgram.nova-3-general",
|
|
44
|
+
DeepgramNova3Medical: "deepgram.nova-3-medical",
|
|
45
|
+
DeepgramWhisper: "deepgram.whisper",
|
|
46
|
+
DeepgramWhisperBase: "deepgram.whisper-base",
|
|
47
|
+
DeepgramWhisperLarge: "deepgram.whisper-large",
|
|
48
|
+
DeepgramWhisperMedium: "deepgram.whisper-medium",
|
|
49
|
+
DeepgramWhisperSmall: "deepgram.whisper-small",
|
|
50
|
+
DeepgramWhisperTiny: "deepgram.whisper-tiny",
|
|
51
|
+
ElevenlabsScribeV1: "elevenlabs.scribe-v1",
|
|
52
|
+
FalaiElevenlabsSpeechToText: "falai.elevenlabs-speech-to-text",
|
|
53
|
+
FalaiSpeechToText: "falai.speech-to-text",
|
|
54
|
+
FalaiWhisper: "falai.whisper",
|
|
55
|
+
FalaiWizper: "falai.wizper",
|
|
56
|
+
FireworksaiWhisperV3: "fireworksai.whisper-v3",
|
|
57
|
+
FireworksaiWhisperV3Turbo: "fireworksai.whisper-v3-turbo",
|
|
58
|
+
GladiaStandard: "gladia.standard",
|
|
59
|
+
GoogleEnhanced: "google.enhanced",
|
|
60
|
+
GoogleStandard: "google.standard",
|
|
61
|
+
GeminiGemini25Pro: "gemini.gemini-2.5-pro",
|
|
62
|
+
GeminiGemini25Flash: "gemini.gemini-2.5-flash",
|
|
63
|
+
GeminiGemini25FlashLite: "gemini.gemini-2.5-flash-lite",
|
|
64
|
+
GeminiGemini20Flash: "gemini.gemini-2.0-flash",
|
|
65
|
+
GeminiGemini20FlashLite: "gemini.gemini-2.0-flash-lite",
|
|
66
|
+
GroqWhisperLargeV3: "groq.whisper-large-v3",
|
|
67
|
+
GroqWhisperLargeV3Turbo: "groq.whisper-large-v3-turbo",
|
|
68
|
+
IbmStandard: "ibm.standard",
|
|
69
|
+
MistralVoxtralMini: "mistral.voxtral-mini",
|
|
70
|
+
OpenaiWhisper1: "openai.whisper-1",
|
|
71
|
+
OpenaiGpt4OTranscribe: "openai.gpt-4o-transcribe",
|
|
72
|
+
OpenaiGpt4OMiniTranscribe: "openai.gpt-4o-mini-transcribe",
|
|
73
|
+
OpenaiGpt4OTranscribeDiarize: "openai.gpt-4o-transcribe-diarize",
|
|
74
|
+
RevaiMachine: "revai.machine",
|
|
75
|
+
RevaiFusion: "revai.fusion",
|
|
76
|
+
SpeechmaticsEnhanced: "speechmatics.enhanced",
|
|
77
|
+
SpeechmaticsStandard: "speechmatics.standard",
|
|
78
|
+
} as const;
|
|
79
|
+
export type TranscriptionModelIdentifier =
|
|
80
|
+
(typeof TranscriptionModelIdentifier)[keyof typeof TranscriptionModelIdentifier];
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
// This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* A simplified JSON response format containing only the transcription ID and the full transcribed text. Returned when `output_format` is `json_text`.
|
|
5
|
+
*/
|
|
6
|
+
export interface TranscriptionOnlyText {
|
|
7
|
+
/** A unique identifier for the transcription job/request. */
|
|
8
|
+
id: string;
|
|
9
|
+
/** The full transcribed text as a single string. */
|
|
10
|
+
text: string;
|
|
11
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
// This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
/** The identifier for the underlying Speech-to-Text service provider (e.g., 'openai', 'deepgram'). */
|
|
4
|
+
export const TranscriptionProvider = {
|
|
5
|
+
Amazon: "amazon",
|
|
6
|
+
Assemblyai: "assemblyai",
|
|
7
|
+
Azure: "azure",
|
|
8
|
+
Cloudflare: "cloudflare",
|
|
9
|
+
Deepgram: "deepgram",
|
|
10
|
+
Elevenlabs: "elevenlabs",
|
|
11
|
+
Falai: "falai",
|
|
12
|
+
Fireworksai: "fireworksai",
|
|
13
|
+
Gemini: "gemini",
|
|
14
|
+
Gladia: "gladia",
|
|
15
|
+
Google: "google",
|
|
16
|
+
Groq: "groq",
|
|
17
|
+
Ibm: "ibm",
|
|
18
|
+
Mistral: "mistral",
|
|
19
|
+
Openai: "openai",
|
|
20
|
+
Revai: "revai",
|
|
21
|
+
Speechmatics: "speechmatics",
|
|
22
|
+
} as const;
|
|
23
|
+
export type TranscriptionProvider = (typeof TranscriptionProvider)[keyof typeof TranscriptionProvider];
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
// This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import type * as Speechall from "../index.js";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Represents the JSON structure returned when a JSON-based `output_format` (`json` or `json_text`) is requested. It can be either a detailed structure or a simple text-only structure.
|
|
7
|
+
*/
|
|
8
|
+
export type TranscriptionResponse = Speechall.TranscriptionDetailed | Speechall.TranscriptionOnlyText;
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
// This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Represents a time-coded segment of the transcription, typically corresponding to a phrase, sentence, or speaker turn.
|
|
5
|
+
*/
|
|
6
|
+
export interface TranscriptionSegment {
|
|
7
|
+
/** The start time of the segment in seconds from the beginning of the audio. */
|
|
8
|
+
start?: number;
|
|
9
|
+
/** The end time of the segment in seconds from the beginning of the audio. */
|
|
10
|
+
end?: number;
|
|
11
|
+
/** The transcribed text content of this segment. */
|
|
12
|
+
text?: string;
|
|
13
|
+
/** An identifier for the speaker of this segment, present if diarization was enabled and successful. */
|
|
14
|
+
speaker?: string;
|
|
15
|
+
/** The model's confidence score for the transcription of this segment, typically between 0 and 1 (if provided by the model). */
|
|
16
|
+
confidence?: number;
|
|
17
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
// This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Represents a word in the transcription, providing time-coded chunks of the transcription.
|
|
5
|
+
*/
|
|
6
|
+
export interface TranscriptionWord {
|
|
7
|
+
/** The start time of the word in seconds from the beginning of the audio. */
|
|
8
|
+
start: number;
|
|
9
|
+
/** The end time of the word in seconds from the beginning of the audio. */
|
|
10
|
+
end: number;
|
|
11
|
+
/** The transcribed word. */
|
|
12
|
+
word: string;
|
|
13
|
+
/** An identifier for the speaker of this word, present if diarization was enabled and successful. */
|
|
14
|
+
speaker?: string;
|
|
15
|
+
/** The model's confidence score for the transcription of this word, typically between 0 and 1 (if provided by the model). */
|
|
16
|
+
confidence?: number;
|
|
17
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
export * from "./BaseTranscriptionConfiguration.js";
|
|
2
|
+
export * from "./ErrorResponse.js";
|
|
3
|
+
export * from "./ExactRule.js";
|
|
4
|
+
export * from "./RegexGroupRule.js";
|
|
5
|
+
export * from "./RegexRule.js";
|
|
6
|
+
export * from "./ReplacementRule.js";
|
|
7
|
+
export * from "./SpeechToTextModel.js";
|
|
8
|
+
export * from "./TranscriptionDetailed.js";
|
|
9
|
+
export * from "./TranscriptionModelIdentifier.js";
|
|
10
|
+
export * from "./TranscriptionOnlyText.js";
|
|
11
|
+
export * from "./TranscriptionProvider.js";
|
|
12
|
+
export * from "./TranscriptionResponse.js";
|
|
13
|
+
export * from "./TranscriptionSegment.js";
|
|
14
|
+
export * from "./TranscriptionWord.js";
|
|
15
|
+
export * from "./TranscriptLanguageCode.js";
|
|
16
|
+
export * from "./TranscriptOutputFormat.js";
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
// This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import * as core from "../core/index.js";
|
|
4
|
+
import * as errors from "../errors/index.js";
|
|
5
|
+
|
|
6
|
+
export namespace BearerAuthProvider {
|
|
7
|
+
export interface AuthOptions {
|
|
8
|
+
token: core.Supplier<core.BearerToken>;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export interface Options extends AuthOptions {}
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export class BearerAuthProvider implements core.AuthProvider {
|
|
15
|
+
private readonly token: core.Supplier<core.BearerToken>;
|
|
16
|
+
|
|
17
|
+
constructor(options: BearerAuthProvider.Options) {
|
|
18
|
+
this.token = options.token;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
public static canCreate(options: BearerAuthProvider.Options): boolean {
|
|
22
|
+
return options.token != null;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
public async getAuthRequest(_arg?: { endpointMetadata?: core.EndpointMetadata }): Promise<core.AuthRequest> {
|
|
26
|
+
const token = await core.Supplier.get(this.token);
|
|
27
|
+
if (token == null) {
|
|
28
|
+
throw new errors.SpeechallError({
|
|
29
|
+
message: "Please specify a token by passing it in to the constructor",
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
return {
|
|
34
|
+
headers: { Authorization: `Bearer ${token}` },
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { BearerAuthProvider } from "./BearerAuthProvider.js";
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { base64Decode, base64Encode } from "../base64.js";
|
|
2
|
+
|
|
3
|
+
export interface BasicAuth {
|
|
4
|
+
username: string;
|
|
5
|
+
password: string;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
const BASIC_AUTH_HEADER_PREFIX = /^Basic /i;
|
|
9
|
+
|
|
10
|
+
export const BasicAuth = {
|
|
11
|
+
toAuthorizationHeader: (basicAuth: BasicAuth | undefined): string | undefined => {
|
|
12
|
+
if (basicAuth == null) {
|
|
13
|
+
return undefined;
|
|
14
|
+
}
|
|
15
|
+
const token = base64Encode(`${basicAuth.username}:${basicAuth.password}`);
|
|
16
|
+
return `Basic ${token}`;
|
|
17
|
+
},
|
|
18
|
+
fromAuthorizationHeader: (header: string): BasicAuth => {
|
|
19
|
+
const credentials = header.replace(BASIC_AUTH_HEADER_PREFIX, "");
|
|
20
|
+
const decoded = base64Decode(credentials);
|
|
21
|
+
const [username, ...passwordParts] = decoded.split(":");
|
|
22
|
+
const password = passwordParts.length > 0 ? passwordParts.join(":") : undefined;
|
|
23
|
+
|
|
24
|
+
if (username == null || password == null) {
|
|
25
|
+
throw new Error("Invalid basic auth");
|
|
26
|
+
}
|
|
27
|
+
return {
|
|
28
|
+
username,
|
|
29
|
+
password,
|
|
30
|
+
};
|
|
31
|
+
},
|
|
32
|
+
};
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
export type BearerToken = string;
|
|
2
|
+
|
|
3
|
+
const BEARER_AUTH_HEADER_PREFIX = /^Bearer /i;
|
|
4
|
+
|
|
5
|
+
function toAuthorizationHeader(token: string | undefined): string | undefined {
|
|
6
|
+
if (token == null) {
|
|
7
|
+
return undefined;
|
|
8
|
+
}
|
|
9
|
+
return `Bearer ${token}`;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export const BearerToken: {
|
|
13
|
+
toAuthorizationHeader: typeof toAuthorizationHeader;
|
|
14
|
+
fromAuthorizationHeader: (header: string) => BearerToken;
|
|
15
|
+
} = {
|
|
16
|
+
toAuthorizationHeader: toAuthorizationHeader,
|
|
17
|
+
fromAuthorizationHeader: (header: string): BearerToken => {
|
|
18
|
+
return header.replace(BEARER_AUTH_HEADER_PREFIX, "").trim() as BearerToken;
|
|
19
|
+
},
|
|
20
|
+
};
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { AuthProvider } from "./AuthProvider.js";
|
|
2
|
+
import type { AuthRequest } from "./AuthRequest.js";
|
|
3
|
+
|
|
4
|
+
export class NoOpAuthProvider implements AuthProvider {
|
|
5
|
+
public getAuthRequest(): Promise<AuthRequest> {
|
|
6
|
+
return Promise.resolve({ headers: {} });
|
|
7
|
+
}
|
|
8
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
function base64ToBytes(base64: string): Uint8Array {
|
|
2
|
+
const binString = atob(base64);
|
|
3
|
+
return Uint8Array.from(binString, (m) => m.codePointAt(0)!);
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
function bytesToBase64(bytes: Uint8Array): string {
|
|
7
|
+
const binString = String.fromCodePoint(...bytes);
|
|
8
|
+
return btoa(binString);
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function base64Encode(input: string): string {
|
|
12
|
+
if (typeof Buffer !== "undefined") {
|
|
13
|
+
return Buffer.from(input, "utf8").toString("base64");
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
const bytes = new TextEncoder().encode(input);
|
|
17
|
+
return bytesToBase64(bytes);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export function base64Decode(input: string): string {
|
|
21
|
+
if (typeof Buffer !== "undefined") {
|
|
22
|
+
return Buffer.from(input, "base64").toString("utf8");
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
const bytes = base64ToBytes(input);
|
|
26
|
+
return new TextDecoder().decode(bytes);
|
|
27
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import type { RawResponse } from "./RawResponse.js";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* The response of an API call.
|
|
5
|
+
* It is a successful response or a failed response.
|
|
6
|
+
*/
|
|
7
|
+
export type APIResponse<Success, Failure> = SuccessfulResponse<Success> | FailedResponse<Failure>;
|
|
8
|
+
|
|
9
|
+
export interface SuccessfulResponse<T> {
|
|
10
|
+
ok: true;
|
|
11
|
+
body: T;
|
|
12
|
+
/**
|
|
13
|
+
* @deprecated Use `rawResponse` instead
|
|
14
|
+
*/
|
|
15
|
+
headers?: Record<string, any>;
|
|
16
|
+
rawResponse: RawResponse;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface FailedResponse<T> {
|
|
20
|
+
ok: false;
|
|
21
|
+
error: T;
|
|
22
|
+
rawResponse: RawResponse;
|
|
23
|
+
}
|