@luanpoppe/ai 1.0.8 → 1.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/@types/audio.d.ts +1 -0
- package/dist/@types/audio.d.ts.map +1 -1
- package/dist/@types/audio.js +15 -0
- package/dist/@types/audio.js.map +1 -1
- package/dist/index.d.ts +3 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/langchain/audio-transcription.d.ts +8 -3
- package/dist/langchain/audio-transcription.d.ts.map +1 -1
- package/dist/langchain/audio-transcription.js +49 -42
- package/dist/langchain/audio-transcription.js.map +1 -1
- package/dist/langchain/messages.js +1 -1
- package/dist/langchain/messages.js.map +1 -1
- package/dist/utils/files-utils.d.ts +2 -2
- package/dist/utils/files-utils.d.ts.map +1 -1
- package/dist/utils/files-utils.js +10 -2
- package/dist/utils/files-utils.js.map +1 -1
- package/docs/LANGCHAIN_WHISPER_WINDOWS_BUG.md +209 -0
- package/package.json +2 -1
- package/src/@types/audio.ts +15 -0
- package/src/index.ts +5 -2
- package/src/langchain/audio-transcription.ts +69 -46
- package/src/langchain/messages.ts +1 -1
- package/src/utils/files-utils.ts +16 -3
- package/tests/unit/langchain/audio-transcription.test.ts +137 -244
package/dist/@types/audio.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
1
|
export type AudioBuffer = Buffer | ArrayBuffer | Uint8Array;
|
|
2
2
|
export type AudioMimeType = "audio/mpeg" | "audio/mp3" | "audio/wav" | "audio/wave" | "audio/x-wav" | "audio/mp4" | "audio/m4a" | "audio/webm" | "audio/ogg" | "audio/flac" | "audio/aac" | "audio/opus";
|
|
3
|
+
export declare const MIME_TO_EXTENSION: Record<AudioMimeType, string>;
|
|
3
4
|
//# sourceMappingURL=audio.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"audio.d.ts","sourceRoot":"","sources":["../../src/@types/audio.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,WAAW,GAAG,MAAM,GAAG,WAAW,GAAG,UAAU,CAAC;AAE5D,MAAM,MAAM,aAAa,GACrB,YAAY,GACZ,WAAW,GACX,WAAW,GACX,YAAY,GACZ,aAAa,GACb,WAAW,GACX,WAAW,GACX,YAAY,GACZ,WAAW,GACX,YAAY,GACZ,WAAW,GACX,YAAY,CAAC"}
|
|
1
|
+
{"version":3,"file":"audio.d.ts","sourceRoot":"","sources":["../../src/@types/audio.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,WAAW,GAAG,MAAM,GAAG,WAAW,GAAG,UAAU,CAAC;AAE5D,MAAM,MAAM,aAAa,GACrB,YAAY,GACZ,WAAW,GACX,WAAW,GACX,YAAY,GACZ,aAAa,GACb,WAAW,GACX,WAAW,GACX,YAAY,GACZ,WAAW,GACX,YAAY,GACZ,WAAW,GACX,YAAY,CAAC;AAEjB,eAAO,MAAM,iBAAiB,EAAE,MAAM,CAAC,aAAa,EAAE,MAAM,CAa3D,CAAC"}
|
package/dist/@types/audio.js
CHANGED
|
@@ -1,3 +1,18 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.MIME_TO_EXTENSION = void 0;
|
|
4
|
+
exports.MIME_TO_EXTENSION = {
|
|
5
|
+
"audio/mpeg": "mp3",
|
|
6
|
+
"audio/mp3": "mp3",
|
|
7
|
+
"audio/wav": "wav",
|
|
8
|
+
"audio/wave": "wav",
|
|
9
|
+
"audio/x-wav": "wav",
|
|
10
|
+
"audio/mp4": "mp4",
|
|
11
|
+
"audio/m4a": "m4a",
|
|
12
|
+
"audio/webm": "webm",
|
|
13
|
+
"audio/ogg": "ogg",
|
|
14
|
+
"audio/flac": "flac",
|
|
15
|
+
"audio/aac": "aac",
|
|
16
|
+
"audio/opus": "opus",
|
|
17
|
+
};
|
|
3
18
|
//# sourceMappingURL=audio.js.map
|
package/dist/@types/audio.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"audio.js","sourceRoot":"","sources":["../../src/@types/audio.ts"],"names":[],"mappings":""}
|
|
1
|
+
{"version":3,"file":"audio.js","sourceRoot":"","sources":["../../src/@types/audio.ts"],"names":[],"mappings":";;;AAgBa,QAAA,iBAAiB,GAAkC;IAC9D,YAAY,EAAE,KAAK;IACnB,WAAW,EAAE,KAAK;IAClB,WAAW,EAAE,KAAK;IAClB,YAAY,EAAE,KAAK;IACnB,aAAa,EAAE,KAAK;IACpB,WAAW,EAAE,KAAK;IAClB,WAAW,EAAE,KAAK;IAClB,YAAY,EAAE,MAAM;IACpB,WAAW,EAAE,KAAK;IAClB,YAAY,EAAE,MAAM;IACpB,WAAW,EAAE,KAAK;IAClB,YAAY,EAAE,MAAM;CACrB,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -2,7 +2,7 @@ import { LangchainModels, LLMModelConfig } from "./langchain/models";
|
|
|
2
2
|
import { AIModelNames } from "./@types/model-names";
|
|
3
3
|
import z from "zod";
|
|
4
4
|
import { MessageInput } from "./langchain/messages";
|
|
5
|
-
import { AgentMiddleware, BaseMessage } from "langchain";
|
|
5
|
+
import { AgentMiddleware, BaseMessage, createAgent } from "langchain";
|
|
6
6
|
import { ClientTool, ServerTool } from "@langchain/core/tools";
|
|
7
7
|
import { LangchainMessages } from "./langchain/messages";
|
|
8
8
|
import { LangchainTools } from "./langchain/tools";
|
|
@@ -44,7 +44,7 @@ export declare class Langchain {
|
|
|
44
44
|
*/
|
|
45
45
|
private normalizeSchemaForOpenAI;
|
|
46
46
|
getRawAgent(params: LangchainCallParams, outputSchema?: z.ZodSchema | undefined): {
|
|
47
|
-
agent:
|
|
47
|
+
agent: ReturnType<typeof createAgent>;
|
|
48
48
|
};
|
|
49
49
|
private getModel;
|
|
50
50
|
private standardAgent;
|
|
@@ -55,5 +55,5 @@ export { LangchainAudioTranscription } from "./langchain/audio-transcription";
|
|
|
55
55
|
export { AudioUtils } from "./utils/audio-utils";
|
|
56
56
|
export type { AudioBuffer, AudioMimeType } from "./@types/audio";
|
|
57
57
|
export type { AudioContentBlock, HumanMessageWithAudioOptions, } from "./langchain/messages";
|
|
58
|
-
export type { WhisperTranscriptionOptions } from "./langchain/audio-transcription";
|
|
58
|
+
export type { WhisperModel, WhisperTranscriptionOptions, } from "./langchain/audio-transcription";
|
|
59
59
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACrE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpD,OAAO,CAAC,MAAM,KAAK,CAAC;AACpB,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpD,OAAO,EACL,eAAe,EACf,WAAW,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACrE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpD,OAAO,CAAC,MAAM,KAAK,CAAC;AACpB,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpD,OAAO,EACL,eAAe,EACf,WAAW,EACX,WAAW,EAGZ,MAAM,WAAW,CAAC;AACnB,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAEnD,KAAK,oBAAoB,GAAG;IAC1B,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B,CAAC;AAEF,MAAM,MAAM,mBAAmB,GAAG;IAChC,KAAK,CAAC,EAAE;QACN,UAAU,CAAC,EAAE,eAAe,EAAE,CAAC;QAC/B,KAAK,CAAC,EAAE,CAAC,UAAU,GAAG,UAAU,CAAC,EAAE,CAAC;KACrC,CAAC;IAEF,WAAW,CAAC,EAAE,IAAI,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO,CAAC,CAAC;IAEvD,OAAO,EAAE,YAAY,CAAC;IACtB,QAAQ,EAAE,YAAY,EAAE,CAAC;IACzB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,mBAAmB,GAAG,OAAO,CAAC;IACxC,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,WAAW,EAAE,CAAC;CACzB,CAAC,CAAC;AAEH,MAAM,MAAM,mCAAmC,CAAC,CAAC,SAAS,CAAC,CAAC,SAAS,IACnE,mBAAmB,GAAG;IACpB,YAAY,EAAE,CAAC,CAAC;CACjB,CAAC;AAEJ,MAAM,MAAM,mCAAmC,CAAC,CAAC,IAAI,OAAO,CAAC;IAC3D,QAAQ,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;CACtB,CAAC,CAAC;AAEH,qBAAa,SAAS;IACR,OAAO,CAAC,MAAM;gBAAN,MAAM,EAAE,oBAAoB;IAE1C,IAAI,CAAC,MAAM,EAAE,mBAAmB,GAAG,mBAAmB;IAiBtD,oBAAoB,CAAC,CAAC,SAAS,CAAC,CAAC,SAAS,EAC9C,MAAM,EAAE,mCAAmC,CAAC,CAAC,CAAC,GAC7C,mCAAmC,CAAC,OAAO,MAAM,CAAC,YAAY,CAAC;IAwBlE;;;;OAIG;IACH,OAAO,CAAC,wBAAwB;IAoChC,WAAW,CACT,MAAM,EAAE,mBAAmB,EAC3B,YAAY,CAAC,EAAE,CAAC,CAAC,SAAS,GAAG,SAAS,GACrC;QAAE,KAAK,EAAE,UAAU,CAAC,OAAO,WAAW,CAAC,CAAA;KAAE;IAS5C,OAAO,CAAC,QAAQ;IAiChB,OAAO,CAAC,aAAa;IAkBrB,OAAO,CAAC,mBAAmB;CAU5B;AAED,OAAO,EAAE,eAAe,EAAE,iBAAiB,EAAE,cAAc,EAAE,CAAC;AAC9D,OAAO,EAAE,2BAA2B,EAAE,MAAM,iCAAiC,CAAC;AAC9E,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AACjD,YAAY,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AACjE,YAAY,EACV,iBAAiB,EACjB,4BAA4B,GAC7B,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EACV,YAAY,EACZ,2BAA2B,GAC5B,MAAM,iCAAiC,CAAC"}
|
|
@@ -1,13 +1,18 @@
|
|
|
1
|
-
import type { AudioBuffer } from "../@types/audio";
|
|
1
|
+
import type { AudioBuffer, AudioMimeType } from "../@types/audio";
|
|
2
|
+
/** Modelos disponíveis na API de transcrição OpenAI (Speech-to-Text) */
|
|
3
|
+
export type WhisperModel = "whisper-1" | "gpt-4o-transcribe" | "gpt-4o-mini-transcribe" | "gpt-4o-mini-transcribe-2025-12-15" | "gpt-4o-transcribe-diarize";
|
|
2
4
|
export type WhisperTranscriptionOptions = {
|
|
3
|
-
|
|
5
|
+
/** Modelo de transcrição. Padrão: "whisper-1". gpt-4o-transcribe e gpt-4o-mini-transcribe têm maior qualidade. */
|
|
6
|
+
model?: WhisperModel;
|
|
7
|
+
languageIn2Digits?: string;
|
|
4
8
|
prompt?: string;
|
|
5
9
|
responseFormat?: "json" | "text" | "srt" | "verbose_json" | "vtt";
|
|
6
10
|
temperature?: number;
|
|
7
11
|
timestampGranularities?: ("word" | "segment")[];
|
|
12
|
+
/** Formato do áudio: extensão ("mp3", "wav", "webm") ou MIME type ("audio/wav", "audio/webm") */
|
|
13
|
+
format?: string | AudioMimeType;
|
|
8
14
|
};
|
|
9
15
|
export declare class LangchainAudioTranscription {
|
|
10
|
-
private static extractTextFromDocs;
|
|
11
16
|
static transcribeWithWhisper(audioBuffer: AudioBuffer, options?: WhisperTranscriptionOptions, openAIApiKey?: string): Promise<string>;
|
|
12
17
|
static transcribeFileWithWhisper(filePath: string, options?: WhisperTranscriptionOptions, openAIApiKey?: string): Promise<string>;
|
|
13
18
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"audio-transcription.d.ts","sourceRoot":"","sources":["../../src/langchain/audio-transcription.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"audio-transcription.d.ts","sourceRoot":"","sources":["../../src/langchain/audio-transcription.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAGlE,wEAAwE;AACxE,MAAM,MAAM,YAAY,GACpB,WAAW,GACX,mBAAmB,GACnB,wBAAwB,GACxB,mCAAmC,GACnC,2BAA2B,CAAC;AAEhC,MAAM,MAAM,2BAA2B,GAAG;IACxC,kHAAkH;IAClH,KAAK,CAAC,EAAE,YAAY,CAAC;IACrB,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,cAAc,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,KAAK,GAAG,cAAc,GAAG,KAAK,CAAC;IAClE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,sBAAsB,CAAC,EAAE,CAAC,MAAM,GAAG,SAAS,CAAC,EAAE,CAAC;IAChD,iGAAiG;IACjG,MAAM,CAAC,EAAE,MAAM,GAAG,aAAa,CAAC;CACjC,CAAC;AAgBF,qBAAa,2BAA2B;WACzB,qBAAqB,CAChC,WAAW,EAAE,WAAW,EACxB,OAAO,GAAE,2BAAgC,EACzC,YAAY,CAAC,EAAE,MAAM,GACpB,OAAO,CAAC,MAAM,CAAC;WAwCL,yBAAyB,CACpC,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,2BAAgC,EACzC,YAAY,CAAC,EAAE,MAAM,GACpB,OAAO,CAAC,MAAM,CAAC;CAcnB"}
|
|
@@ -32,62 +32,69 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
32
32
|
return result;
|
|
33
33
|
};
|
|
34
34
|
})();
|
|
35
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
36
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
37
|
+
};
|
|
35
38
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
39
|
exports.LangchainAudioTranscription = void 0;
|
|
37
40
|
const fs = __importStar(require("fs"));
|
|
38
|
-
const
|
|
39
|
-
const
|
|
41
|
+
const path = __importStar(require("path"));
|
|
42
|
+
const openai_1 = __importDefault(require("openai"));
|
|
43
|
+
const openai_2 = require("openai");
|
|
44
|
+
const audio_1 = require("../@types/audio");
|
|
45
|
+
function getExtension(format) {
|
|
46
|
+
if (!format)
|
|
47
|
+
return "mp3";
|
|
48
|
+
if (format.startsWith("audio/")) {
|
|
49
|
+
return audio_1.MIME_TO_EXTENSION[format] ?? "mp3";
|
|
50
|
+
}
|
|
51
|
+
return format.replace(/^\./, "");
|
|
52
|
+
}
|
|
53
|
+
function toBuffer(audioBuffer) {
|
|
54
|
+
if (audioBuffer instanceof Buffer)
|
|
55
|
+
return audioBuffer;
|
|
56
|
+
if (audioBuffer instanceof ArrayBuffer)
|
|
57
|
+
return Buffer.from(audioBuffer);
|
|
58
|
+
return Buffer.from(audioBuffer);
|
|
59
|
+
}
|
|
40
60
|
class LangchainAudioTranscription {
|
|
41
|
-
static
|
|
42
|
-
if (
|
|
43
|
-
|
|
61
|
+
static async transcribeWithWhisper(audioBuffer, options = {}, openAIApiKey) {
|
|
62
|
+
if (openAIApiKey) {
|
|
63
|
+
process.env.OPENAI_API_KEY = openAIApiKey;
|
|
44
64
|
}
|
|
45
|
-
const
|
|
46
|
-
|
|
47
|
-
|
|
65
|
+
const buffer = toBuffer(audioBuffer);
|
|
66
|
+
const extension = getExtension(options.format);
|
|
67
|
+
const fileName = `whisper-${Date.now()}.${extension}`;
|
|
68
|
+
const file = await (0, openai_2.toFile)(buffer, fileName);
|
|
69
|
+
const openai = new openai_1.default();
|
|
70
|
+
const transcriptionParams = {
|
|
71
|
+
file,
|
|
72
|
+
model: options.model ?? "whisper-1",
|
|
73
|
+
response_format: options.responseFormat ?? "text",
|
|
74
|
+
};
|
|
75
|
+
if (options.languageIn2Digits) {
|
|
76
|
+
transcriptionParams.language = options.languageIn2Digits;
|
|
48
77
|
}
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
try {
|
|
55
|
-
// Configura a API key se fornecida
|
|
56
|
-
if (openAIApiKey) {
|
|
57
|
-
process.env.OPENAI_API_KEY = openAIApiKey;
|
|
58
|
-
}
|
|
59
|
-
const transcriptionParams = {
|
|
60
|
-
response_format: options.responseFormat || "text",
|
|
61
|
-
};
|
|
62
|
-
if (options.language) {
|
|
63
|
-
transcriptionParams.language = options.language;
|
|
64
|
-
}
|
|
65
|
-
if (options.prompt) {
|
|
66
|
-
transcriptionParams.prompt = options.prompt;
|
|
67
|
-
}
|
|
68
|
-
if (options.temperature !== undefined) {
|
|
69
|
-
transcriptionParams.temperature = options.temperature;
|
|
70
|
-
}
|
|
71
|
-
if (options.timestampGranularities) {
|
|
72
|
-
transcriptionParams.timestamp_granularities =
|
|
73
|
-
options.timestampGranularities;
|
|
74
|
-
}
|
|
75
|
-
const loader = new openai_whisper_audio_1.OpenAIWhisperAudio(tempFilePath, {
|
|
76
|
-
transcriptionCreateParams: transcriptionParams,
|
|
77
|
-
});
|
|
78
|
-
const docs = await loader.load();
|
|
79
|
-
return this.extractTextFromDocs(docs);
|
|
78
|
+
if (options.prompt) {
|
|
79
|
+
transcriptionParams.prompt = options.prompt;
|
|
80
|
+
}
|
|
81
|
+
if (options.temperature !== undefined) {
|
|
82
|
+
transcriptionParams.temperature = options.temperature;
|
|
80
83
|
}
|
|
81
|
-
|
|
82
|
-
|
|
84
|
+
if (options.timestampGranularities) {
|
|
85
|
+
transcriptionParams.timestamp_granularities =
|
|
86
|
+
options.timestampGranularities;
|
|
83
87
|
}
|
|
88
|
+
const response = await openai.audio.transcriptions.create(transcriptionParams);
|
|
89
|
+
return typeof response === "string" ? response : response.text;
|
|
84
90
|
}
|
|
85
91
|
static async transcribeFileWithWhisper(filePath, options = {}, openAIApiKey) {
|
|
86
92
|
if (!fs.existsSync(filePath)) {
|
|
87
93
|
throw new Error(`Arquivo não encontrado: ${filePath}`);
|
|
88
94
|
}
|
|
89
95
|
const audioBuffer = fs.readFileSync(filePath);
|
|
90
|
-
|
|
96
|
+
const format = options.format ?? (path.extname(filePath).replace(/^\./, "") || "mp3");
|
|
97
|
+
return this.transcribeWithWhisper(audioBuffer, { ...options, format }, openAIApiKey);
|
|
91
98
|
}
|
|
92
99
|
}
|
|
93
100
|
exports.LangchainAudioTranscription = LangchainAudioTranscription;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"audio-transcription.js","sourceRoot":"","sources":["../../src/langchain/audio-transcription.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"audio-transcription.js","sourceRoot":"","sources":["../../src/langchain/audio-transcription.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,uCAAyB;AACzB,2CAA6B;AAC7B,oDAA4B;AAC5B,mCAAgC;AAEhC,2CAAoD;AAsBpD,SAAS,YAAY,CAAC,MAA+B;IACnD,IAAI,CAAC,MAAM;QAAE,OAAO,KAAK,CAAC;IAC1B,IAAI,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;QAChC,OAAO,yBAAiB,CAAC,MAAuB,CAAC,IAAI,KAAK,CAAC;IAC7D,CAAC;IACD,OAAO,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;AACnC,CAAC;AAED,SAAS,QAAQ,CAAC,WAAwB;IACxC,IAAI,WAAW,YAAY,MAAM;QAAE,OAAO,WAAW,CAAC;IACtD,IAAI,WAAW,YAAY,WAAW;QAAE,OAAO,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IACxE,OAAO,MAAM,CAAC,IAAI,CAAC,WAAyB,CAAC,CAAC;AAChD,CAAC;AAED,MAAa,2BAA2B;IACtC,MAAM,CAAC,KAAK,CAAC,qBAAqB,CAChC,WAAwB,EACxB,UAAuC,EAAE,EACzC,YAAqB;QAErB,IAAI,YAAY,EAAE,CAAC;YACjB,OAAO,CAAC,GAAG,CAAC,cAAc,GAAG,YAAY,CAAC;QAC5C,CAAC;QAED,MAAM,MAAM,GAAG,QAAQ,CAAC,WAAW,CAAC,CAAC;QACrC,MAAM,SAAS,GAAG,YAAY,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;QAC/C,MAAM,QAAQ,GAAG,WAAW,IAAI,CAAC,GAAG,EAAE,IAAI,SAAS,EAAE,CAAC;QAEtD,MAAM,IAAI,GAAG,MAAM,IAAA,eAAM,EAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;QAE5C,MAAM,MAAM,GAAG,IAAI,gBAAM,EAAE,CAAC;QAE5B,MAAM,mBAAmB,GAA2C;YAClE,IAAI;YACJ,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,WAAW;YACnC,eAAe,EAAE,OAAO,CAAC,cAAc,IAAI,MAAM;SAClD,CAAC;QAEF,IAAI,OAAO,CAAC,iBAAiB,EAAE,CAAC;YAC9B,mBAAmB,CAAC,QAAQ,GAAG,OAAO,CAAC,iBAAiB,CAAC;QAC3D,CAAC;QACD,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;YACnB,mBAAmB,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;QAC9C,CAAC;QACD,IAAI,OAAO,CAAC,WAAW,KAAK,SAAS,EAAE,CAAC;YACtC,mBAAmB,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;QACxD,CAAC;QACD,IAAI,OAAO,CAAC,sBAAsB,EAAE,CAAC;YACnC,mBAAmB,CAAC,uBAAuB;gBACzC,OAAO,CAAC,sBAAsB,CAAC;QACnC,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,cAAc,CAAC,MAAM,CACvD,mBAAmB,CACpB,CAAC;QAEF,OAAO,OAAO,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC;IACjE,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,yBAAyB,CACpC,QAAgB,EAChB,UAAuC,EAAE,EACzC,YAAqB;QAErB,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC7B,MAAM,IAAI,KAAK,CAAC,2BAA2B,QAAQ,EAAE,CAAC,CAAC;QACzD,CAAC;QAED,MAAM,WAAW,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC;QAC9C,MAAM,MAAM,GACV,OAAO,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,IAAI,KAAK,CAAC,CAAC;QACzE,OAAO,IAAI,CAAC,qBAAqB,CAC/B,WAAW,EACX,EAAE,GAAG,OAAO,EAAE,MAAM,EAAE,EACtB,YAAY,CACb,CAAC;IACJ,CAAC;CACF;AA/DD,kEA+DC"}
|
|
@@ -57,7 +57,7 @@ class LangchainMessages {
|
|
|
57
57
|
// Prepara opções de transcrição - só inclui language se não houver mimeType
|
|
58
58
|
// Com exactOptionalPropertyTypes: true, não podemos passar undefined explicitamente
|
|
59
59
|
const transcriptionOptions = mimeType
|
|
60
|
-
? {}
|
|
60
|
+
? { format: mimeType }
|
|
61
61
|
: { language: "pt" };
|
|
62
62
|
const transcribedText = await audio_transcription_1.LangchainAudioTranscription.transcribeWithWhisper(audioBuffer, transcriptionOptions, openAIApiKey);
|
|
63
63
|
// Combina o texto original (se fornecido) com a transcrição
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"messages.js","sourceRoot":"","sources":["../../src/langchain/messages.ts"],"names":[],"mappings":";;;AAAA,yCAAmE;AACnE,sDAAkD;AAElD,+DAAoE;AAuBpE,MAAa,iBAAiB;IAC5B,MAAM,CAAC,MAAM,CAAC,OAAe;QAC3B,OAAO,IAAI,yBAAa,CAAC,OAAO,CAAC,CAAC;IACpC,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,OAAe;QAC1B,OAAO,IAAI,wBAAY,CAAC,OAAO,CAAC,CAAC;IACnC,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,UAAU,CACrB,OAAqC;QAErC,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,QAAQ,GAAG,MAAM,EAAE,YAAY,EAAE,GAAG,OAAO,CAAC;QACjE,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,GAAG,KAAK,CAAC;QAE7C,OAAO,IAAI,CAAC,2BAA2B,CACrC,MAAM,EACN,IAAI,EACJ,QAAQ,EACR,QAAQ,EACR,QAAQ,EACR,YAAY,CACb,CAAC;IACJ,CAAC;IAED,MAAM,CAAC,EAAE,CAAC,OAAe;QACvB,OAAO,IAAI,qBAAS,CAAC,OAAO,CAAC,CAAC;IAChC,CAAC;IAEO,MAAM,CAAC,KAAK,CAAC,2BAA2B,CAC9C,WAAwB,EACxB,IAAa,EACb,QAAwB,EACxB,QAAiB,EACjB,WAAyC,MAAM,EAC/C,YAAqB;QAErB,gFAAgF;QAChF,IAAI,QAAQ,KAAK,QAAQ;YACvB,OAAO,MAAM,IAAI,CAAC,kBAAkB,CAClC,WAAW,EACX,QAAQ,EACR,IAAI,EACJ,YAAY,CACb,CAAC;QAEJ,4GAA4G;QAC5G,MAAM,UAAU,GAAG,wBAAU,CAAC,cAAc,CAAC,WAAW,CAAC,CAAC;QAC1D,MAAM,gBAAgB,GACpB,QAAQ,IAAI,wBAAU,CAAC,mBAAmB,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;QAEpE,MAAM,OAAO,GAAe,EAAE,CAAC;QAE/B,6EAA6E;QAC7E,iGAAiG;QACjG,IAAI,IAAI,EAAE,CAAC;YACT,OAAO,CAAC,IAAI,CAAC;gBACX,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,IAAI;aACX,CAAC,CAAC;QACL,CAAC;QAED,wEAAwE;QACxE,gEAAgE;QAChE,MAAM,UAAU,GAAsB;YACpC,IAAI,EAAE,OAAO;YACb,WAAW,EAAE,QAAQ;YACrB,IAAI,EAAE,UAAU;YAChB,SAAS,EAAE,gBAAgB;SAC5B,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAEzB,2DAA2D;QAC3D,mEAAmE;QACnE,OAAO,IAAI,wBAAY,CAAC;YACtB,OAAO,EAAE,OAAc;SACjB,CAAC,CAAC;IACZ,CAAC;IAEO,MAAM,CAAC,KAAK,CAAC,kBAAkB,CACrC,WAAwB,EACxB,QAAwB,EACxB,IAAa,EACb,YAAqB;QAErB,IAAI,CAAC,YAAY;YACf,MAAM,IAAI,KAAK,CACb,mIAAmI,CACpI,CAAC;QAEJ,IAAI,CAAC;YACH,4EAA4E;YAC5E,oFAAoF;YACpF,MAAM,oBAAoB,GAAG,QAAQ;gBACnC,CAAC,
|
|
1
|
+
{"version":3,"file":"messages.js","sourceRoot":"","sources":["../../src/langchain/messages.ts"],"names":[],"mappings":";;;AAAA,yCAAmE;AACnE,sDAAkD;AAElD,+DAAoE;AAuBpE,MAAa,iBAAiB;IAC5B,MAAM,CAAC,MAAM,CAAC,OAAe;QAC3B,OAAO,IAAI,yBAAa,CAAC,OAAO,CAAC,CAAC;IACpC,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,OAAe;QAC1B,OAAO,IAAI,wBAAY,CAAC,OAAO,CAAC,CAAC;IACnC,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,UAAU,CACrB,OAAqC;QAErC,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,QAAQ,GAAG,MAAM,EAAE,YAAY,EAAE,GAAG,OAAO,CAAC;QACjE,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,GAAG,KAAK,CAAC;QAE7C,OAAO,IAAI,CAAC,2BAA2B,CACrC,MAAM,EACN,IAAI,EACJ,QAAQ,EACR,QAAQ,EACR,QAAQ,EACR,YAAY,CACb,CAAC;IACJ,CAAC;IAED,MAAM,CAAC,EAAE,CAAC,OAAe;QACvB,OAAO,IAAI,qBAAS,CAAC,OAAO,CAAC,CAAC;IAChC,CAAC;IAEO,MAAM,CAAC,KAAK,CAAC,2BAA2B,CAC9C,WAAwB,EACxB,IAAa,EACb,QAAwB,EACxB,QAAiB,EACjB,WAAyC,MAAM,EAC/C,YAAqB;QAErB,gFAAgF;QAChF,IAAI,QAAQ,KAAK,QAAQ;YACvB,OAAO,MAAM,IAAI,CAAC,kBAAkB,CAClC,WAAW,EACX,QAAQ,EACR,IAAI,EACJ,YAAY,CACb,CAAC;QAEJ,4GAA4G;QAC5G,MAAM,UAAU,GAAG,wBAAU,CAAC,cAAc,CAAC,WAAW,CAAC,CAAC;QAC1D,MAAM,gBAAgB,GACpB,QAAQ,IAAI,wBAAU,CAAC,mBAAmB,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;QAEpE,MAAM,OAAO,GAAe,EAAE,CAAC;QAE/B,6EAA6E;QAC7E,iGAAiG;QACjG,IAAI,IAAI,EAAE,CAAC;YACT,OAAO,CAAC,IAAI,CAAC;gBACX,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,IAAI;aACX,CAAC,CAAC;QACL,CAAC;QAED,wEAAwE;QACxE,gEAAgE;QAChE,MAAM,UAAU,GAAsB;YACpC,IAAI,EAAE,OAAO;YACb,WAAW,EAAE,QAAQ;YACrB,IAAI,EAAE,UAAU;YAChB,SAAS,EAAE,gBAAgB;SAC5B,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAEzB,2DAA2D;QAC3D,mEAAmE;QACnE,OAAO,IAAI,wBAAY,CAAC;YACtB,OAAO,EAAE,OAAc;SACjB,CAAC,CAAC;IACZ,CAAC;IAEO,MAAM,CAAC,KAAK,CAAC,kBAAkB,CACrC,WAAwB,EACxB,QAAwB,EACxB,IAAa,EACb,YAAqB;QAErB,IAAI,CAAC,YAAY;YACf,MAAM,IAAI,KAAK,CACb,mIAAmI,CACpI,CAAC;QAEJ,IAAI,CAAC;YACH,4EAA4E;YAC5E,oFAAoF;YACpF,MAAM,oBAAoB,GAAG,QAAQ;gBACnC,CAAC,CAAC,EAAE,MAAM,EAAE,QAAQ,EAAE;gBACtB,CAAC,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;YAEvB,MAAM,eAAe,GACnB,MAAM,iDAA2B,CAAC,qBAAqB,CACrD,WAAW,EACX,oBAAoB,EACpB,YAAY,CACb,CAAC;YAEJ,4DAA4D;YAC5D,MAAM,SAAS,GAAG,IAAI;gBACpB,CAAC,CAAC,GAAG,IAAI,yBAAyB,eAAe,EAAE;gBACnD,CAAC,CAAC,qBAAqB,eAAe,EAAE,CAAC;YAE3C,0DAA0D;YAC1D,OAAO,IAAI,wBAAY,CAAC,SAAS,CAAC,CAAC;QACrC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,kDAAkD;YAClD,yEAAyE;YACzE,MAAM,IAAI,KAAK,CACb,wDACE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CACvD,IAAI;gBACF,mGAAmG,CACtG,CAAC;QACJ,CAAC;IACH,CAAC;CACF;AA1HD,8CA0HC"}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import type
|
|
1
|
+
import { type AudioBuffer, type AudioMimeType } from "../@types/audio";
|
|
2
2
|
export declare class FilesUtils {
|
|
3
|
-
static createTempFile(audioBuffer: AudioBuffer, prefix?: string): string;
|
|
3
|
+
static createTempFile(audioBuffer: AudioBuffer, prefix?: string, format?: string | AudioMimeType): string;
|
|
4
4
|
static cleanupTempFile(filePath: string): void;
|
|
5
5
|
}
|
|
6
6
|
//# sourceMappingURL=files-utils.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"files-utils.d.ts","sourceRoot":"","sources":["../../src/utils/files-utils.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,
|
|
1
|
+
{"version":3,"file":"files-utils.d.ts","sourceRoot":"","sources":["../../src/utils/files-utils.ts"],"names":[],"mappings":"AAGA,OAAO,EAEL,KAAK,WAAW,EAChB,KAAK,aAAa,EACnB,MAAM,iBAAiB,CAAC;AAEzB,qBAAa,UAAU;IACrB,MAAM,CAAC,cAAc,CACnB,WAAW,EAAE,WAAW,EACxB,MAAM,GAAE,MAAgB,EACxB,MAAM,CAAC,EAAE,MAAM,GAAG,aAAa,GAC9B,MAAM;IA8BT,MAAM,CAAC,eAAe,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI;CAS/C"}
|
|
@@ -37,10 +37,18 @@ exports.FilesUtils = void 0;
|
|
|
37
37
|
const fs = __importStar(require("fs"));
|
|
38
38
|
const path = __importStar(require("path"));
|
|
39
39
|
const os = __importStar(require("os"));
|
|
40
|
+
const audio_1 = require("../@types/audio");
|
|
40
41
|
class FilesUtils {
|
|
41
|
-
static createTempFile(audioBuffer, prefix = "audio") {
|
|
42
|
+
static createTempFile(audioBuffer, prefix = "audio", format) {
|
|
43
|
+
const extension = format
|
|
44
|
+
? format.startsWith("audio/")
|
|
45
|
+
? audio_1.MIME_TO_EXTENSION[format] ?? "mp3"
|
|
46
|
+
: format.replace(/^\./, "")
|
|
47
|
+
: "mp3";
|
|
42
48
|
const tempDir = os.tmpdir();
|
|
43
|
-
const tempFilePath = path.join(tempDir, `${prefix}-${Date.now()}-${Math.random()
|
|
49
|
+
const tempFilePath = path.join(tempDir, `${prefix}-${Date.now()}-${Math.random()
|
|
50
|
+
.toString(36)
|
|
51
|
+
.substring(7)}.${extension}`);
|
|
44
52
|
// Converte o buffer para Buffer se necessário
|
|
45
53
|
let buffer;
|
|
46
54
|
if (audioBuffer instanceof Buffer) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"files-utils.js","sourceRoot":"","sources":["../../src/utils/files-utils.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,uCAAyB;AACzB,2CAA6B;AAC7B,uCAAyB;
|
|
1
|
+
{"version":3,"file":"files-utils.js","sourceRoot":"","sources":["../../src/utils/files-utils.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,uCAAyB;AACzB,2CAA6B;AAC7B,uCAAyB;AACzB,2CAIyB;AAEzB,MAAa,UAAU;IACrB,MAAM,CAAC,cAAc,CACnB,WAAwB,EACxB,SAAiB,OAAO,EACxB,MAA+B;QAE/B,MAAM,SAAS,GAAG,MAAM;YACtB,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC;gBAC3B,CAAC,CAAC,yBAAiB,CAAC,MAAuB,CAAC,IAAI,KAAK;gBACrD,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC;YAC7B,CAAC,CAAC,KAAK,CAAC;QAEV,MAAM,OAAO,GAAG,EAAE,CAAC,MAAM,EAAE,CAAC;QAC5B,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAC5B,OAAO,EACP,GAAG,MAAM,IAAI,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE;aACrC,QAAQ,CAAC,EAAE,CAAC;aACZ,SAAS,CAAC,CAAC,CAAC,IAAI,SAAS,EAAE,CAC/B,CAAC;QAEF,8CAA8C;QAC9C,IAAI,MAAc,CAAC;QACnB,IAAI,WAAW,YAAY,MAAM,EAAE,CAAC;YAClC,MAAM,GAAG,WAAW,CAAC;QACvB,CAAC;aAAM,IAAI,WAAW,YAAY,WAAW,EAAE,CAAC;YAC9C,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACpC,CAAC;aAAM,CAAC;YACN,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACpC,CAAC;QAED,+BAA+B;QAC/B,EAAE,CAAC,aAAa,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC;QACvC,OAAO,YAAY,CAAC;IACtB,CAAC;IAED,MAAM,CAAC,eAAe,CAAC,QAAgB;QACrC,IAAI,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC5B,IAAI,CAAC;gBACH,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;YAC1B,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,IAAI,CAAC,uCAAuC,KAAK,EAAE,CAAC,CAAC;YAC/D,CAAC;QACH,CAAC;IACH,CAAC;CACF;AA5CD,gCA4CC"}
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
# Bug do LangChain no Windows: Transcrição de Áudio com Whisper - 2026-02-02
|
|
2
|
+
|
|
3
|
+
## Resumo
|
|
4
|
+
|
|
5
|
+
O loader `OpenAIWhisperAudio` do LangChain passa o **caminho completo do arquivo** (ex: `C:\Users\...\whisper-xxx.webm`) para a API da OpenAI. No Windows, isso pode causar falhas no envio multipart devido a backslashes e caracteres especiais no path.
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Passo a passo: O que acontece quando você transcreve áudio
|
|
10
|
+
|
|
11
|
+
### 1. Seu código chama a transcrição
|
|
12
|
+
|
|
13
|
+
```typescript
|
|
14
|
+
// No seu código (@luanpoppe/ai)
|
|
15
|
+
const tempFilePath = FilesUtils.createTempFile(audioBuffer, "whisper", options.format);
|
|
16
|
+
// tempFilePath = "C:\Users\luan\AppData\Local\Temp\whisper-1738512345-abc123.webm"
|
|
17
|
+
|
|
18
|
+
const loader = new OpenAIWhisperAudio(tempFilePath, { ... });
|
|
19
|
+
const docs = await loader.load();
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Você cria um arquivo temporário e passa o **caminho completo** para o loader do LangChain.
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
### 2. O LangChain lê o arquivo
|
|
27
|
+
|
|
28
|
+
O `OpenAIWhisperAudio` estende `BufferLoader`. Internamente, ele:
|
|
29
|
+
|
|
30
|
+
1. Lê o conteúdo do arquivo em memória (um `Buffer`)
|
|
31
|
+
2. Guarda o caminho original em `metadata.source`
|
|
32
|
+
|
|
33
|
+
```javascript
|
|
34
|
+
// Dentro do BufferLoader (simplificado)
|
|
35
|
+
metadata = {
|
|
36
|
+
source:
|
|
37
|
+
"C:\\Users\\luan\\AppData\\Local\\Temp\\whisper-1738512345-abc123.webm",
|
|
38
|
+
};
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
### 3. O LangChain chama a API da OpenAI
|
|
44
|
+
|
|
45
|
+
O método `parse()` do loader faz algo assim:
|
|
46
|
+
|
|
47
|
+
```javascript
|
|
48
|
+
// Código atual do LangChain (com o bug)
|
|
49
|
+
const fileName = metadata.source === "blob" ? metadata.blobType : metadata.source;
|
|
50
|
+
// fileName = "C:\Users\luan\AppData\Local\Temp\whisper-1738512345-abc123.webm"
|
|
51
|
+
|
|
52
|
+
const transcriptionResponse = await this.openAIClient.audio.transcriptions.create({
|
|
53
|
+
file: await toFile(raw, fileName), // ← O problema está aqui!
|
|
54
|
+
model: "whisper-1",
|
|
55
|
+
...
|
|
56
|
+
});
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
O segundo parâmetro de `toFile(raw, fileName)` deveria ser **apenas o nome do arquivo** (ex: `whisper-1738512345-abc123.webm`), mas o LangChain passa o **caminho completo**.
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
### 4. O que a API da OpenAI espera
|
|
64
|
+
|
|
65
|
+
A função `toFile` da OpenAI usa o segundo parâmetro para:
|
|
66
|
+
|
|
67
|
+
- Definir o nome do arquivo no formulário multipart (Content-Disposition)
|
|
68
|
+
- Ajudar a API a identificar o formato do áudio pela extensão
|
|
69
|
+
|
|
70
|
+
O formato multipart espera algo como:
|
|
71
|
+
|
|
72
|
+
```
|
|
73
|
+
Content-Disposition: form-data; name="file"; filename="whisper-xxx.webm"
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Com o caminho completo do Windows:
|
|
77
|
+
|
|
78
|
+
```
|
|
79
|
+
Content-Disposition: form-data; name="file"; filename="C:\Users\luan\AppData\Local\Temp\whisper-xxx.webm"
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Os backslashes (`\`) e o path longo podem causar:
|
|
83
|
+
|
|
84
|
+
- Parsing incorreto do header
|
|
85
|
+
- Erros de encoding
|
|
86
|
+
- Rejeição pela API
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## Por que não dá para resolver no seu código?
|
|
91
|
+
|
|
92
|
+
### O fluxo de dados
|
|
93
|
+
|
|
94
|
+
```
|
|
95
|
+
Seu código LangChain (biblioteca) API OpenAI
|
|
96
|
+
| | |
|
|
97
|
+
| tempFilePath (path completo) | |
|
|
98
|
+
| ----------------------------->| |
|
|
99
|
+
| | Lê arquivo, guarda em metadata |
|
|
100
|
+
| | metadata.source = path completo |
|
|
101
|
+
| | |
|
|
102
|
+
| | toFile(buffer, metadata.source) |
|
|
103
|
+
| | ↑ usa path completo internamente |
|
|
104
|
+
| | --------------------------------->|
|
|
105
|
+
| | Envio multipart|
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Onde está o controle?
|
|
109
|
+
|
|
110
|
+
| Etapa | Quem controla | O que você pode fazer |
|
|
111
|
+
| ----------------------- | ------------- | ----------------------------------------------------------------- |
|
|
112
|
+
| Criar arquivo temp | **Você** | Escolher onde criar (ex: `os.tmpdir()`) |
|
|
113
|
+
| Path passado ao loader | **Você** | Só pode passar um path – o loader precisa dele para ler o arquivo |
|
|
114
|
+
| Valor usado em `toFile` | **LangChain** | Você não tem acesso – é interno ao loader |
|
|
115
|
+
| Chamada à API | **LangChain** | Você não controla |
|
|
116
|
+
|
|
117
|
+
### O problema central
|
|
118
|
+
|
|
119
|
+
O loader **precisa** do path completo para **ler o arquivo** do disco. Não há como passar “só o nome” – o LangChain precisa do path para fazer `fs.readFile` (ou equivalente).
|
|
120
|
+
|
|
121
|
+
Depois de ler, o loader usa o mesmo `metadata.source` (o path) como nome do arquivo no `toFile`. A decisão de usar `metadata.source` em vez de `path.basename(metadata.source)` está **dentro do LangChain**, não no seu código.
|
|
122
|
+
|
|
123
|
+
### Por que não dá para “enganar” o loader?
|
|
124
|
+
|
|
125
|
+
**Tentativa 1: Criar o arquivo em um path curto**
|
|
126
|
+
|
|
127
|
+
```typescript
|
|
128
|
+
// Ex: ./whisper-temp.webm
|
|
129
|
+
const tempFilePath = path.join(process.cwd(), "whisper-temp.webm");
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
Ainda assim, `metadata.source` será algo como `C:\projeto\whisper-temp.webm`. O path continua completo; o LangChain continua passando ele para `toFile`.
|
|
133
|
+
|
|
134
|
+
**Tentativa 2: Passar um Blob em vez de path**
|
|
135
|
+
|
|
136
|
+
O loader aceita `string | Blob`. Se você passar um Blob, `metadata.source === "blob"` e ele usa `metadata.blobType` (ex: `"audio/webm"`). O Blob não tem path, então não há problema de Windows.
|
|
137
|
+
|
|
138
|
+
Porém: o LangChain não expõe uma forma simples de criar o loader a partir de um Buffer/Blob com controle total do fluxo. E o `BufferLoader` espera um Blob do browser ou um path de arquivo – em Node.js, o uso típico é com path.
|
|
139
|
+
|
|
140
|
+
**Tentativa 3: Wrapper ou monkey-patch**
|
|
141
|
+
|
|
142
|
+
Você poderia tentar interceptar ou substituir o loader, mas:
|
|
143
|
+
|
|
144
|
+
- O loader é instanciado internamente
|
|
145
|
+
- Você não controla o que é passado para `toFile`
|
|
146
|
+
- Faria seu código depender de detalhes internos do LangChain, frágeis a atualizações
|
|
147
|
+
|
|
148
|
+
---
|
|
149
|
+
|
|
150
|
+
## Correção sugerida (no LangChain)
|
|
151
|
+
|
|
152
|
+
No arquivo `openai_whisper_audio.ts` do LangChain:
|
|
153
|
+
|
|
154
|
+
```typescript
|
|
155
|
+
// Antes (com bug)
|
|
156
|
+
const fileName =
|
|
157
|
+
metadata.source === "blob" ? metadata.blobType : metadata.source;
|
|
158
|
+
|
|
159
|
+
// Depois (corrigido)
|
|
160
|
+
const fileName =
|
|
161
|
+
metadata.source === "blob"
|
|
162
|
+
? metadata.blobType
|
|
163
|
+
: path.basename(metadata.source);
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
Assim, quando `metadata.source` for um path de arquivo, só o nome do arquivo (ex: `whisper-xxx.webm`) é enviado para `toFile`.
|
|
167
|
+
|
|
168
|
+
---
|
|
169
|
+
|
|
170
|
+
## Suas opções práticas
|
|
171
|
+
|
|
172
|
+
1. **Abrir um PR no LangChain** com essa correção e aguardar o merge.
|
|
173
|
+
2. **Usar `patch-package`** para aplicar essa alteração automaticamente no `node_modules` após cada `pnpm install`.
|
|
174
|
+
3. **Usar a API da OpenAI diretamente** (sem o loader do LangChain) e controlar o nome do arquivo no `toFile`.
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
## Diagrama do fluxo
|
|
179
|
+
|
|
180
|
+
```
|
|
181
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
182
|
+
│ SEU CÓDIGO │
|
|
183
|
+
│ │
|
|
184
|
+
│ createTempFile() → "C:\Users\...\Temp\whisper-xxx.webm" │
|
|
185
|
+
│ │ │
|
|
186
|
+
│ ▼ │
|
|
187
|
+
│ new OpenAIWhisperAudio(tempFilePath) │
|
|
188
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
189
|
+
│
|
|
190
|
+
│ path completo
|
|
191
|
+
▼
|
|
192
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
193
|
+
│ LANGCHAIN (você não controla) │
|
|
194
|
+
│ │
|
|
195
|
+
│ 1. Lê arquivo do path │
|
|
196
|
+
│ 2. metadata.source = path ← guarda path completo │
|
|
197
|
+
│ 3. fileName = metadata.source ← BUG: usa path completo │
|
|
198
|
+
│ 4. toFile(buffer, fileName) ← envia path para OpenAI │
|
|
199
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
200
|
+
│
|
|
201
|
+
│ multipart com filename = path
|
|
202
|
+
▼
|
|
203
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
204
|
+
│ API OPENAI │
|
|
205
|
+
│ │
|
|
206
|
+
│ Recebe: filename="C:\Users\...\whisper-xxx.webm" │
|
|
207
|
+
│ Problema: backslashes, path longo → pode falhar no Windows │
|
|
208
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
209
|
+
```
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@luanpoppe/ai",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.10",
|
|
4
4
|
"description": "",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"keywords": [],
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
"license": "ISC",
|
|
9
9
|
"dependencies": {
|
|
10
10
|
"@langchain/community": "^1.1.10",
|
|
11
|
+
"openai": "^6.17.0",
|
|
11
12
|
"@langchain/core": "^1.1.16",
|
|
12
13
|
"@langchain/google-genai": "^2.1.12",
|
|
13
14
|
"@langchain/openai": "^1.2.3",
|
package/src/@types/audio.ts
CHANGED
|
@@ -13,3 +13,18 @@ export type AudioMimeType =
|
|
|
13
13
|
| "audio/flac"
|
|
14
14
|
| "audio/aac"
|
|
15
15
|
| "audio/opus";
|
|
16
|
+
|
|
17
|
+
export const MIME_TO_EXTENSION: Record<AudioMimeType, string> = {
|
|
18
|
+
"audio/mpeg": "mp3",
|
|
19
|
+
"audio/mp3": "mp3",
|
|
20
|
+
"audio/wav": "wav",
|
|
21
|
+
"audio/wave": "wav",
|
|
22
|
+
"audio/x-wav": "wav",
|
|
23
|
+
"audio/mp4": "mp4",
|
|
24
|
+
"audio/m4a": "m4a",
|
|
25
|
+
"audio/webm": "webm",
|
|
26
|
+
"audio/ogg": "ogg",
|
|
27
|
+
"audio/flac": "flac",
|
|
28
|
+
"audio/aac": "aac",
|
|
29
|
+
"audio/opus": "opus",
|
|
30
|
+
};
|
package/src/index.ts
CHANGED
|
@@ -137,7 +137,7 @@ export class Langchain {
|
|
|
137
137
|
getRawAgent(
|
|
138
138
|
params: LangchainCallParams,
|
|
139
139
|
outputSchema?: z.ZodSchema | undefined
|
|
140
|
-
) {
|
|
140
|
+
): { agent: ReturnType<typeof createAgent> } {
|
|
141
141
|
const agent = createAgent({
|
|
142
142
|
...this.standardAgent(params),
|
|
143
143
|
responseFormat: outputSchema as any,
|
|
@@ -217,4 +217,7 @@ export type {
|
|
|
217
217
|
AudioContentBlock,
|
|
218
218
|
HumanMessageWithAudioOptions,
|
|
219
219
|
} from "./langchain/messages";
|
|
220
|
-
export type {
|
|
220
|
+
export type {
|
|
221
|
+
WhisperModel,
|
|
222
|
+
WhisperTranscriptionOptions,
|
|
223
|
+
} from "./langchain/audio-transcription";
|
|
@@ -1,70 +1,87 @@
|
|
|
1
|
-
import { Document } from "@langchain/core/documents";
|
|
2
1
|
import * as fs from "fs";
|
|
3
|
-
import
|
|
4
|
-
import
|
|
5
|
-
import {
|
|
2
|
+
import * as path from "path";
|
|
3
|
+
import OpenAI from "openai";
|
|
4
|
+
import { toFile } from "openai";
|
|
5
|
+
import type { AudioBuffer, AudioMimeType } from "../@types/audio";
|
|
6
|
+
import { MIME_TO_EXTENSION } from "../@types/audio";
|
|
7
|
+
|
|
8
|
+
/** Modelos disponíveis na API de transcrição OpenAI (Speech-to-Text) */
|
|
9
|
+
export type WhisperModel =
|
|
10
|
+
| "whisper-1"
|
|
11
|
+
| "gpt-4o-transcribe"
|
|
12
|
+
| "gpt-4o-mini-transcribe"
|
|
13
|
+
| "gpt-4o-mini-transcribe-2025-12-15"
|
|
14
|
+
| "gpt-4o-transcribe-diarize";
|
|
6
15
|
|
|
7
16
|
export type WhisperTranscriptionOptions = {
|
|
8
|
-
|
|
17
|
+
/** Modelo de transcrição. Padrão: "whisper-1". gpt-4o-transcribe e gpt-4o-mini-transcribe têm maior qualidade. */
|
|
18
|
+
model?: WhisperModel;
|
|
19
|
+
languageIn2Digits?: string;
|
|
9
20
|
prompt?: string;
|
|
10
21
|
responseFormat?: "json" | "text" | "srt" | "verbose_json" | "vtt";
|
|
11
22
|
temperature?: number;
|
|
12
23
|
timestampGranularities?: ("word" | "segment")[];
|
|
24
|
+
/** Formato do áudio: extensão ("mp3", "wav", "webm") ou MIME type ("audio/wav", "audio/webm") */
|
|
25
|
+
format?: string | AudioMimeType;
|
|
13
26
|
};
|
|
14
27
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
}
|
|
20
|
-
const firstDoc = docs[0];
|
|
21
|
-
if (!firstDoc) {
|
|
22
|
-
throw new Error("Documento vazio retornado pela transcrição");
|
|
23
|
-
}
|
|
24
|
-
// LangChain.js usa pageContent (camelCase), não page_content
|
|
25
|
-
return firstDoc.pageContent;
|
|
28
|
+
function getExtension(format?: string | AudioMimeType): string {
|
|
29
|
+
if (!format) return "mp3";
|
|
30
|
+
if (format.startsWith("audio/")) {
|
|
31
|
+
return MIME_TO_EXTENSION[format as AudioMimeType] ?? "mp3";
|
|
26
32
|
}
|
|
33
|
+
return format.replace(/^\./, "");
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function toBuffer(audioBuffer: AudioBuffer): Buffer {
|
|
37
|
+
if (audioBuffer instanceof Buffer) return audioBuffer;
|
|
38
|
+
if (audioBuffer instanceof ArrayBuffer) return Buffer.from(audioBuffer);
|
|
39
|
+
return Buffer.from(audioBuffer as Uint8Array);
|
|
40
|
+
}
|
|
27
41
|
|
|
42
|
+
export class LangchainAudioTranscription {
|
|
28
43
|
static async transcribeWithWhisper(
|
|
29
44
|
audioBuffer: AudioBuffer,
|
|
30
45
|
options: WhisperTranscriptionOptions = {},
|
|
31
46
|
openAIApiKey?: string
|
|
32
47
|
): Promise<string> {
|
|
33
|
-
|
|
48
|
+
if (openAIApiKey) {
|
|
49
|
+
process.env.OPENAI_API_KEY = openAIApiKey;
|
|
50
|
+
}
|
|
34
51
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
process.env.OPENAI_API_KEY = openAIApiKey;
|
|
39
|
-
}
|
|
52
|
+
const buffer = toBuffer(audioBuffer);
|
|
53
|
+
const extension = getExtension(options.format);
|
|
54
|
+
const fileName = `whisper-${Date.now()}.${extension}`;
|
|
40
55
|
|
|
41
|
-
|
|
42
|
-
response_format: options.responseFormat || "text",
|
|
43
|
-
};
|
|
56
|
+
const file = await toFile(buffer, fileName);
|
|
44
57
|
|
|
45
|
-
|
|
46
|
-
transcriptionParams.language = options.language;
|
|
47
|
-
}
|
|
48
|
-
if (options.prompt) {
|
|
49
|
-
transcriptionParams.prompt = options.prompt;
|
|
50
|
-
}
|
|
51
|
-
if (options.temperature !== undefined) {
|
|
52
|
-
transcriptionParams.temperature = options.temperature;
|
|
53
|
-
}
|
|
54
|
-
if (options.timestampGranularities) {
|
|
55
|
-
transcriptionParams.timestamp_granularities =
|
|
56
|
-
options.timestampGranularities;
|
|
57
|
-
}
|
|
58
|
+
const openai = new OpenAI();
|
|
58
59
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
60
|
+
const transcriptionParams: OpenAI.Audio.TranscriptionCreateParams = {
|
|
61
|
+
file,
|
|
62
|
+
model: options.model ?? "whisper-1",
|
|
63
|
+
response_format: options.responseFormat ?? "text",
|
|
64
|
+
};
|
|
62
65
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
}
|
|
66
|
-
|
|
66
|
+
if (options.languageIn2Digits) {
|
|
67
|
+
transcriptionParams.language = options.languageIn2Digits;
|
|
68
|
+
}
|
|
69
|
+
if (options.prompt) {
|
|
70
|
+
transcriptionParams.prompt = options.prompt;
|
|
71
|
+
}
|
|
72
|
+
if (options.temperature !== undefined) {
|
|
73
|
+
transcriptionParams.temperature = options.temperature;
|
|
67
74
|
}
|
|
75
|
+
if (options.timestampGranularities) {
|
|
76
|
+
transcriptionParams.timestamp_granularities =
|
|
77
|
+
options.timestampGranularities;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const response = await openai.audio.transcriptions.create(
|
|
81
|
+
transcriptionParams
|
|
82
|
+
);
|
|
83
|
+
|
|
84
|
+
return typeof response === "string" ? response : response.text;
|
|
68
85
|
}
|
|
69
86
|
|
|
70
87
|
static async transcribeFileWithWhisper(
|
|
@@ -77,6 +94,12 @@ export class LangchainAudioTranscription {
|
|
|
77
94
|
}
|
|
78
95
|
|
|
79
96
|
const audioBuffer = fs.readFileSync(filePath);
|
|
80
|
-
|
|
97
|
+
const format =
|
|
98
|
+
options.format ?? (path.extname(filePath).replace(/^\./, "") || "mp3");
|
|
99
|
+
return this.transcribeWithWhisper(
|
|
100
|
+
audioBuffer,
|
|
101
|
+
{ ...options, format },
|
|
102
|
+
openAIApiKey
|
|
103
|
+
);
|
|
81
104
|
}
|
|
82
105
|
}
|
|
@@ -118,7 +118,7 @@ export class LangchainMessages {
|
|
|
118
118
|
// Prepara opções de transcrição - só inclui language se não houver mimeType
|
|
119
119
|
// Com exactOptionalPropertyTypes: true, não podemos passar undefined explicitamente
|
|
120
120
|
const transcriptionOptions = mimeType
|
|
121
|
-
?
|
|
121
|
+
? { format: mimeType }
|
|
122
122
|
: { language: "pt" };
|
|
123
123
|
|
|
124
124
|
const transcribedText =
|
package/src/utils/files-utils.ts
CHANGED
|
@@ -1,17 +1,30 @@
|
|
|
1
1
|
import * as fs from "fs";
|
|
2
2
|
import * as path from "path";
|
|
3
3
|
import * as os from "os";
|
|
4
|
-
import
|
|
4
|
+
import {
|
|
5
|
+
MIME_TO_EXTENSION,
|
|
6
|
+
type AudioBuffer,
|
|
7
|
+
type AudioMimeType,
|
|
8
|
+
} from "../@types/audio";
|
|
5
9
|
|
|
6
10
|
export class FilesUtils {
|
|
7
11
|
static createTempFile(
|
|
8
12
|
audioBuffer: AudioBuffer,
|
|
9
|
-
prefix: string = "audio"
|
|
13
|
+
prefix: string = "audio",
|
|
14
|
+
format?: string | AudioMimeType
|
|
10
15
|
): string {
|
|
16
|
+
const extension = format
|
|
17
|
+
? format.startsWith("audio/")
|
|
18
|
+
? MIME_TO_EXTENSION[format as AudioMimeType] ?? "mp3"
|
|
19
|
+
: format.replace(/^\./, "")
|
|
20
|
+
: "mp3";
|
|
21
|
+
|
|
11
22
|
const tempDir = os.tmpdir();
|
|
12
23
|
const tempFilePath = path.join(
|
|
13
24
|
tempDir,
|
|
14
|
-
`${prefix}-${Date.now()}-${Math.random()
|
|
25
|
+
`${prefix}-${Date.now()}-${Math.random()
|
|
26
|
+
.toString(36)
|
|
27
|
+
.substring(7)}.${extension}`
|
|
15
28
|
);
|
|
16
29
|
|
|
17
30
|
// Converte o buffer para Buffer se necessário
|
|
@@ -1,287 +1,180 @@
|
|
|
1
1
|
import { describe, it, expect, vi, beforeEach } from "vitest";
|
|
2
|
-
import { LangchainAudioTranscription } from "../../../src/langchain/audio-transcription";
|
|
2
|
+
import { LangchainAudioTranscription } from "../../../src/langchain/audio-transcription.js";
|
|
3
3
|
import * as fs from "fs";
|
|
4
|
-
import * as os from "os";
|
|
5
|
-
import * as path from "path";
|
|
6
4
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
import * as realOs from "os";
|
|
10
|
-
import * as realPath from "path";
|
|
11
|
-
|
|
12
|
-
// Calcula tempDir usando os módulos reais (antes dos mocks)
|
|
13
|
-
const tempDir = realPath.join(realOs.tmpdir(), "langchain-audio-test");
|
|
14
|
-
|
|
15
|
-
// Garante que o diretório temporário existe
|
|
16
|
-
if (!realFs.existsSync(tempDir)) {
|
|
17
|
-
realFs.mkdirSync(tempDir, { recursive: true });
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
vi.mock("fs", () => {
|
|
21
|
-
// Usa require para acessar o módulo real diretamente
|
|
22
|
-
const actualFs = require("fs");
|
|
23
|
-
|
|
24
|
-
const writeFileSyncSpy = vi.fn((filePath: string, data: Buffer) => {
|
|
25
|
-
// Cria o arquivo real no sistema de arquivos
|
|
26
|
-
actualFs.writeFileSync(filePath, data);
|
|
27
|
-
});
|
|
28
|
-
|
|
29
|
-
const readFileSyncSpy = vi.fn((filePath: string) => {
|
|
30
|
-
if (actualFs.existsSync(filePath)) {
|
|
31
|
-
return actualFs.readFileSync(filePath);
|
|
32
|
-
}
|
|
33
|
-
return Buffer.from("fake audio data");
|
|
34
|
-
});
|
|
35
|
-
|
|
36
|
-
const existsSyncSpy = vi.fn((filePath: string) => {
|
|
37
|
-
return actualFs.existsSync(filePath) || filePath.startsWith("/path/to/");
|
|
38
|
-
});
|
|
39
|
-
|
|
40
|
-
const unlinkSyncSpy = vi.fn((filePath: string) => {
|
|
41
|
-
if (actualFs.existsSync(filePath)) {
|
|
42
|
-
actualFs.unlinkSync(filePath);
|
|
43
|
-
}
|
|
44
|
-
});
|
|
45
|
-
|
|
46
|
-
return {
|
|
47
|
-
...actualFs,
|
|
48
|
-
writeFileSync: writeFileSyncSpy,
|
|
49
|
-
readFileSync: readFileSyncSpy,
|
|
50
|
-
existsSync: existsSyncSpy,
|
|
51
|
-
unlinkSync: unlinkSyncSpy,
|
|
52
|
-
};
|
|
5
|
+
const mockTranscriptionsCreate = vi.fn().mockResolvedValue({
|
|
6
|
+
text: "Texto transcrito do áudio",
|
|
53
7
|
});
|
|
54
8
|
|
|
55
|
-
vi.mock("
|
|
56
|
-
// Calcula tempDir dentro do mock usando os módulos reais
|
|
57
|
-
const realOs = require("os");
|
|
58
|
-
const realPath = require("path");
|
|
59
|
-
const tempDirValue = realPath.join(realOs.tmpdir(), "langchain-audio-test");
|
|
60
|
-
|
|
9
|
+
vi.mock("openai", () => {
|
|
61
10
|
return {
|
|
62
|
-
|
|
11
|
+
default: class MockOpenAI {
|
|
12
|
+
audio = {
|
|
13
|
+
transcriptions: {
|
|
14
|
+
create: mockTranscriptionsCreate,
|
|
15
|
+
},
|
|
16
|
+
};
|
|
17
|
+
},
|
|
18
|
+
toFile: async (buffer: Buffer, filename: string) => {
|
|
19
|
+
return new File([new Uint8Array(buffer)], filename, {
|
|
20
|
+
type: "audio/mpeg",
|
|
21
|
+
});
|
|
22
|
+
},
|
|
63
23
|
};
|
|
64
24
|
});
|
|
65
25
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
// que o mock seja aplicado antes do código ser executado
|
|
69
|
-
vi.mock("@langchain/community/document_loaders/fs/openai_whisper_audio", () => {
|
|
70
|
-
// Importa o fs mockado para verificar arquivos
|
|
71
|
-
const fs = require("fs");
|
|
72
|
-
|
|
73
|
-
class MockOpenAIWhisperAudio {
|
|
74
|
-
constructor(public filePath: string, public options?: any) {
|
|
75
|
-
// Verifica se o arquivo existe usando o fs mockado
|
|
76
|
-
// O arquivo já deve ter sido criado pelo writeFileSync antes desta chamada
|
|
77
|
-
if (!fs.existsSync(filePath)) {
|
|
78
|
-
// Se não existe, lança o mesmo erro que o loader real lançaria
|
|
79
|
-
const error: any = new Error(`ENOENT: no such file or directory, open '${filePath}'`);
|
|
80
|
-
error.code = "ENOENT";
|
|
81
|
-
error.errno = -4058;
|
|
82
|
-
error.syscall = "open";
|
|
83
|
-
error.path = filePath;
|
|
84
|
-
throw error;
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
async load() {
|
|
89
|
-
return [
|
|
90
|
-
{
|
|
91
|
-
pageContent: "Texto transcrito do áudio",
|
|
92
|
-
metadata: {},
|
|
93
|
-
},
|
|
94
|
-
];
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
|
|
26
|
+
vi.mock("fs", () => {
|
|
27
|
+
const actualFs = require("fs");
|
|
98
28
|
return {
|
|
99
|
-
|
|
29
|
+
...actualFs,
|
|
30
|
+
existsSync: vi.fn((filePath: string) => {
|
|
31
|
+
return actualFs.existsSync(filePath) || filePath.startsWith("/path/to/");
|
|
32
|
+
}),
|
|
33
|
+
readFileSync: vi.fn((filePath: string) => {
|
|
34
|
+
if (actualFs.existsSync(filePath)) {
|
|
35
|
+
return actualFs.readFileSync(filePath);
|
|
36
|
+
}
|
|
37
|
+
return Buffer.from("fake audio data");
|
|
38
|
+
}),
|
|
100
39
|
};
|
|
101
40
|
});
|
|
102
41
|
|
|
103
|
-
// Mock também precisa interceptar o require() dinâmico usado no código
|
|
104
|
-
// Vamos mockar o módulo de forma que o require() pegue o mock
|
|
105
|
-
|
|
106
42
|
describe("LangchainAudioTranscription", () => {
|
|
107
43
|
beforeEach(() => {
|
|
108
44
|
vi.clearAllMocks();
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
const files = realFs.readdirSync(tempDir);
|
|
113
|
-
files.forEach((file) => {
|
|
114
|
-
const filePath = realPath.join(tempDir, file);
|
|
115
|
-
try {
|
|
116
|
-
realFs.unlinkSync(filePath);
|
|
117
|
-
} catch (error) {
|
|
118
|
-
// Ignora erros ao remover arquivos
|
|
119
|
-
}
|
|
120
|
-
});
|
|
121
|
-
}
|
|
122
|
-
});
|
|
123
|
-
|
|
124
|
-
afterAll(() => {
|
|
125
|
-
// Limpa diretório temporário após todos os testes
|
|
126
|
-
if (realFs.existsSync(tempDir)) {
|
|
127
|
-
try {
|
|
128
|
-
realFs.rmSync(tempDir, { recursive: true, force: true });
|
|
129
|
-
} catch (error) {
|
|
130
|
-
// Ignora erros ao remover diretório
|
|
131
|
-
}
|
|
132
|
-
}
|
|
45
|
+
mockTranscriptionsCreate.mockResolvedValue({
|
|
46
|
+
text: "Texto transcrito do áudio",
|
|
47
|
+
});
|
|
133
48
|
});
|
|
134
49
|
|
|
135
50
|
describe("transcribeWithWhisper", () => {
|
|
136
51
|
it("deve transcrever áudio usando Whisper", async () => {
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
const
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
// Substitui temporariamente a classe
|
|
154
|
-
const originalLoader = audioModule.OpenAIWhisperAudio;
|
|
155
|
-
audioModule.OpenAIWhisperAudio = MockLoader;
|
|
156
|
-
|
|
157
|
-
// Recarrega o módulo para pegar o mock
|
|
158
|
-
vi.resetModules();
|
|
159
|
-
const transcriptionModule = await import("../../../src/langchain/audio-transcription");
|
|
160
|
-
|
|
161
|
-
// Força a reimportação do loader mockado
|
|
162
|
-
const newAudioModule = require(audioModulePath);
|
|
163
|
-
newAudioModule.OpenAIWhisperAudio = MockLoader;
|
|
164
|
-
|
|
165
|
-
try {
|
|
166
|
-
const audioBuffer = Buffer.from("fake audio data");
|
|
167
|
-
|
|
168
|
-
const result = await transcriptionModule.LangchainAudioTranscription.transcribeWithWhisper(
|
|
169
|
-
audioBuffer
|
|
170
|
-
);
|
|
171
|
-
|
|
172
|
-
expect(result).toBe("Texto transcrito do áudio");
|
|
173
|
-
expect(vi.mocked(fs.writeFileSync)).toHaveBeenCalled();
|
|
174
|
-
expect(vi.mocked(fs.unlinkSync)).toHaveBeenCalled();
|
|
175
|
-
} finally {
|
|
176
|
-
// Restaura o loader original
|
|
177
|
-
audioModule.OpenAIWhisperAudio = originalLoader;
|
|
178
|
-
}
|
|
52
|
+
const audioBuffer = Buffer.from("fake audio data");
|
|
53
|
+
|
|
54
|
+
const result = await LangchainAudioTranscription.transcribeWithWhisper(
|
|
55
|
+
audioBuffer
|
|
56
|
+
);
|
|
57
|
+
|
|
58
|
+
expect(result).toBe("Texto transcrito do áudio");
|
|
59
|
+
expect(mockTranscriptionsCreate).toHaveBeenCalledWith(
|
|
60
|
+
expect.objectContaining({
|
|
61
|
+
model: "whisper-1",
|
|
62
|
+
response_format: "text",
|
|
63
|
+
})
|
|
64
|
+
);
|
|
179
65
|
});
|
|
180
66
|
|
|
181
67
|
it("deve aceitar opções de transcrição", async () => {
|
|
182
|
-
const
|
|
183
|
-
const audioModule = require(audioModulePath);
|
|
184
|
-
|
|
185
|
-
class MockLoader {
|
|
186
|
-
constructor(public filePath: string) {
|
|
187
|
-
// Usa o fs mockado importado
|
|
188
|
-
if (!fs.existsSync(filePath)) {
|
|
189
|
-
throw new Error(`ENOENT: no such file or directory, open '${filePath}'`);
|
|
190
|
-
}
|
|
191
|
-
}
|
|
192
|
-
async load() {
|
|
193
|
-
return [{ pageContent: "Texto transcrito do áudio", metadata: {} }];
|
|
194
|
-
}
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
const originalLoader = audioModule.OpenAIWhisperAudio;
|
|
198
|
-
audioModule.OpenAIWhisperAudio = MockLoader;
|
|
199
|
-
|
|
200
|
-
vi.resetModules();
|
|
201
|
-
const transcriptionModule = await import("../../../src/langchain/audio-transcription");
|
|
202
|
-
const newAudioModule = require(audioModulePath);
|
|
203
|
-
newAudioModule.OpenAIWhisperAudio = MockLoader;
|
|
204
|
-
|
|
205
|
-
try {
|
|
206
|
-
const audioBuffer = Buffer.from("fake audio data");
|
|
207
|
-
|
|
208
|
-
await transcriptionModule.LangchainAudioTranscription.transcribeWithWhisper(audioBuffer, {
|
|
209
|
-
language: "pt",
|
|
210
|
-
responseFormat: "json",
|
|
211
|
-
});
|
|
68
|
+
const audioBuffer = Buffer.from("fake audio data");
|
|
212
69
|
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
}
|
|
70
|
+
await LangchainAudioTranscription.transcribeWithWhisper(audioBuffer, {
|
|
71
|
+
languageIn2Digits: "pt",
|
|
72
|
+
responseFormat: "json",
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
expect(mockTranscriptionsCreate).toHaveBeenCalledWith(
|
|
76
|
+
expect.objectContaining({
|
|
77
|
+
language: "pt",
|
|
78
|
+
response_format: "json",
|
|
79
|
+
})
|
|
80
|
+
);
|
|
217
81
|
});
|
|
218
82
|
|
|
219
|
-
it("deve
|
|
83
|
+
it("deve aceitar modelo customizado", async () => {
|
|
220
84
|
const audioBuffer = Buffer.from("fake audio data");
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
const fs = require("fs");
|
|
225
|
-
|
|
226
|
-
class MockLoaderWithError {
|
|
227
|
-
constructor(public filePath: string) {
|
|
228
|
-
if (!fs.existsSync(filePath)) {
|
|
229
|
-
throw new Error(`ENOENT: no such file or directory, open '${filePath}'`);
|
|
230
|
-
}
|
|
231
|
-
}
|
|
232
|
-
async load() {
|
|
233
|
-
throw new Error("Erro de transcrição");
|
|
234
|
-
}
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
return { OpenAIWhisperAudio: MockLoaderWithError };
|
|
85
|
+
|
|
86
|
+
await LangchainAudioTranscription.transcribeWithWhisper(audioBuffer, {
|
|
87
|
+
model: "gpt-4o-transcribe",
|
|
238
88
|
});
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
89
|
+
|
|
90
|
+
expect(mockTranscriptionsCreate).toHaveBeenCalledWith(
|
|
91
|
+
expect.objectContaining({
|
|
92
|
+
model: "gpt-4o-transcribe",
|
|
93
|
+
})
|
|
94
|
+
);
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
it("deve usar whisper-1 como padrão quando model não é informado", async () => {
|
|
98
|
+
const audioBuffer = Buffer.from("fake audio data");
|
|
99
|
+
|
|
100
|
+
await LangchainAudioTranscription.transcribeWithWhisper(audioBuffer);
|
|
101
|
+
|
|
102
|
+
expect(mockTranscriptionsCreate).toHaveBeenCalledWith(
|
|
103
|
+
expect.objectContaining({
|
|
104
|
+
model: "whisper-1",
|
|
105
|
+
})
|
|
106
|
+
);
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
it("deve aceitar formato de áudio nas opções (extensão)", async () => {
|
|
110
|
+
const audioBuffer = Buffer.from("fake wav audio data");
|
|
111
|
+
|
|
112
|
+
const result = await LangchainAudioTranscription.transcribeWithWhisper(
|
|
113
|
+
audioBuffer,
|
|
114
|
+
{ format: "wav" }
|
|
115
|
+
);
|
|
116
|
+
|
|
117
|
+
expect(result).toBe("Texto transcrito do áudio");
|
|
118
|
+
expect(mockTranscriptionsCreate).toHaveBeenCalledWith(
|
|
119
|
+
expect.objectContaining({
|
|
120
|
+
file: expect.anything(),
|
|
121
|
+
})
|
|
122
|
+
);
|
|
123
|
+
const createCall = mockTranscriptionsCreate.mock.calls[0]?.[0];
|
|
124
|
+
expect(createCall?.file).toBeDefined();
|
|
125
|
+
const file = createCall?.file as { name?: string };
|
|
126
|
+
expect(file?.name).toMatch(/\.wav$/);
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
it("deve aceitar MIME type como formato", async () => {
|
|
130
|
+
const audioBuffer = Buffer.from("fake webm audio data");
|
|
131
|
+
|
|
132
|
+
const result = await LangchainAudioTranscription.transcribeWithWhisper(
|
|
133
|
+
audioBuffer,
|
|
134
|
+
{ format: "audio/webm" }
|
|
135
|
+
);
|
|
136
|
+
|
|
137
|
+
expect(result).toBe("Texto transcrito do áudio");
|
|
138
|
+
const createCall = mockTranscriptionsCreate.mock.calls[0]?.[0];
|
|
139
|
+
const file = createCall?.file as { name?: string };
|
|
140
|
+
expect(file?.name).toMatch(/\.webm$/);
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
it("deve propagar erro da API", async () => {
|
|
144
|
+
mockTranscriptionsCreate.mockRejectedValueOnce(
|
|
145
|
+
new Error("Erro de transcrição")
|
|
146
|
+
);
|
|
147
|
+
|
|
148
|
+
const audioBuffer = Buffer.from("fake audio data");
|
|
149
|
+
|
|
243
150
|
await expect(
|
|
244
|
-
|
|
151
|
+
LangchainAudioTranscription.transcribeWithWhisper(audioBuffer)
|
|
245
152
|
).rejects.toThrow("Erro de transcrição");
|
|
246
|
-
|
|
247
|
-
// Verifica que tentou remover o arquivo temporário
|
|
248
|
-
expect(vi.mocked(fs.unlinkSync)).toHaveBeenCalled();
|
|
249
153
|
});
|
|
250
154
|
});
|
|
251
155
|
|
|
252
156
|
describe("transcribeFileWithWhisper", () => {
|
|
253
157
|
it("deve transcrever arquivo usando Whisper", async () => {
|
|
254
|
-
// Limpa o mock anterior e cria um novo mock
|
|
255
|
-
vi.doUnmock("@langchain/community/document_loaders/fs/openai_whisper_audio");
|
|
256
|
-
vi.doMock("@langchain/community/document_loaders/fs/openai_whisper_audio", () => {
|
|
257
|
-
const fs = require("fs");
|
|
258
|
-
|
|
259
|
-
class MockLoader {
|
|
260
|
-
constructor(public filePath: string) {
|
|
261
|
-
if (!fs.existsSync(filePath)) {
|
|
262
|
-
throw new Error(`ENOENT: no such file or directory, open '${filePath}'`);
|
|
263
|
-
}
|
|
264
|
-
}
|
|
265
|
-
async load() {
|
|
266
|
-
return [{ pageContent: "Texto transcrito do áudio", metadata: {} }];
|
|
267
|
-
}
|
|
268
|
-
}
|
|
269
|
-
|
|
270
|
-
return { OpenAIWhisperAudio: MockLoader };
|
|
271
|
-
});
|
|
272
|
-
|
|
273
|
-
vi.resetModules();
|
|
274
|
-
const transcriptionModule = await import("../../../src/langchain/audio-transcription");
|
|
275
|
-
const fs = require("fs");
|
|
276
|
-
|
|
277
158
|
const filePath = "/path/to/audio.mp3";
|
|
278
159
|
|
|
279
160
|
const result =
|
|
280
|
-
await
|
|
161
|
+
await LangchainAudioTranscription.transcribeFileWithWhisper(filePath);
|
|
162
|
+
|
|
163
|
+
expect(result).toBe("Texto transcrito do áudio");
|
|
164
|
+
expect(vi.mocked(fs.readFileSync)).toHaveBeenCalledWith(filePath);
|
|
165
|
+
expect(mockTranscriptionsCreate).toHaveBeenCalled();
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
it("deve extrair formato da extensão do arquivo quando format não é informado", async () => {
|
|
169
|
+
const filePath = "/path/to/audio.wav";
|
|
170
|
+
|
|
171
|
+
const result =
|
|
172
|
+
await LangchainAudioTranscription.transcribeFileWithWhisper(filePath);
|
|
281
173
|
|
|
282
174
|
expect(result).toBe("Texto transcrito do áudio");
|
|
283
|
-
|
|
284
|
-
|
|
175
|
+
const createCall = mockTranscriptionsCreate.mock.calls[0]?.[0];
|
|
176
|
+
const file = createCall?.file as { name?: string };
|
|
177
|
+
expect(file?.name).toMatch(/\.wav$/);
|
|
285
178
|
});
|
|
286
179
|
|
|
287
180
|
it("deve lançar erro se arquivo não existir", async () => {
|