@luanpoppe/ai 1.0.8 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
1
  export type AudioBuffer = Buffer | ArrayBuffer | Uint8Array;
2
2
  export type AudioMimeType = "audio/mpeg" | "audio/mp3" | "audio/wav" | "audio/wave" | "audio/x-wav" | "audio/mp4" | "audio/m4a" | "audio/webm" | "audio/ogg" | "audio/flac" | "audio/aac" | "audio/opus";
3
+ export declare const MIME_TO_EXTENSION: Record<AudioMimeType, string>;
3
4
  //# sourceMappingURL=audio.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"audio.d.ts","sourceRoot":"","sources":["../../src/@types/audio.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,WAAW,GAAG,MAAM,GAAG,WAAW,GAAG,UAAU,CAAC;AAE5D,MAAM,MAAM,aAAa,GACrB,YAAY,GACZ,WAAW,GACX,WAAW,GACX,YAAY,GACZ,aAAa,GACb,WAAW,GACX,WAAW,GACX,YAAY,GACZ,WAAW,GACX,YAAY,GACZ,WAAW,GACX,YAAY,CAAC"}
1
+ {"version":3,"file":"audio.d.ts","sourceRoot":"","sources":["../../src/@types/audio.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,WAAW,GAAG,MAAM,GAAG,WAAW,GAAG,UAAU,CAAC;AAE5D,MAAM,MAAM,aAAa,GACrB,YAAY,GACZ,WAAW,GACX,WAAW,GACX,YAAY,GACZ,aAAa,GACb,WAAW,GACX,WAAW,GACX,YAAY,GACZ,WAAW,GACX,YAAY,GACZ,WAAW,GACX,YAAY,CAAC;AAEjB,eAAO,MAAM,iBAAiB,EAAE,MAAM,CAAC,aAAa,EAAE,MAAM,CAa3D,CAAC"}
@@ -1,3 +1,18 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.MIME_TO_EXTENSION = void 0;
4
+ exports.MIME_TO_EXTENSION = {
5
+ "audio/mpeg": "mp3",
6
+ "audio/mp3": "mp3",
7
+ "audio/wav": "wav",
8
+ "audio/wave": "wav",
9
+ "audio/x-wav": "wav",
10
+ "audio/mp4": "mp4",
11
+ "audio/m4a": "m4a",
12
+ "audio/webm": "webm",
13
+ "audio/ogg": "ogg",
14
+ "audio/flac": "flac",
15
+ "audio/aac": "aac",
16
+ "audio/opus": "opus",
17
+ };
3
18
  //# sourceMappingURL=audio.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"audio.js","sourceRoot":"","sources":["../../src/@types/audio.ts"],"names":[],"mappings":""}
1
+ {"version":3,"file":"audio.js","sourceRoot":"","sources":["../../src/@types/audio.ts"],"names":[],"mappings":";;;AAgBa,QAAA,iBAAiB,GAAkC;IAC9D,YAAY,EAAE,KAAK;IACnB,WAAW,EAAE,KAAK;IAClB,WAAW,EAAE,KAAK;IAClB,YAAY,EAAE,KAAK;IACnB,aAAa,EAAE,KAAK;IACpB,WAAW,EAAE,KAAK;IAClB,WAAW,EAAE,KAAK;IAClB,YAAY,EAAE,MAAM;IACpB,WAAW,EAAE,KAAK;IAClB,YAAY,EAAE,MAAM;IACpB,WAAW,EAAE,KAAK;IAClB,YAAY,EAAE,MAAM;CACrB,CAAC"}
package/dist/index.d.ts CHANGED
@@ -2,7 +2,7 @@ import { LangchainModels, LLMModelConfig } from "./langchain/models";
2
2
  import { AIModelNames } from "./@types/model-names";
3
3
  import z from "zod";
4
4
  import { MessageInput } from "./langchain/messages";
5
- import { AgentMiddleware, BaseMessage } from "langchain";
5
+ import { AgentMiddleware, BaseMessage, createAgent } from "langchain";
6
6
  import { ClientTool, ServerTool } from "@langchain/core/tools";
7
7
  import { LangchainMessages } from "./langchain/messages";
8
8
  import { LangchainTools } from "./langchain/tools";
@@ -44,7 +44,7 @@ export declare class Langchain {
44
44
  */
45
45
  private normalizeSchemaForOpenAI;
46
46
  getRawAgent(params: LangchainCallParams, outputSchema?: z.ZodSchema | undefined): {
47
- agent: import("langchain").ReactAgent<import("langchain").AgentTypeConfig<Record<string, any>, import("@langchain/core/utils/types").InteropZodObject | import("langchain").AnyAnnotationRoot | undefined, import("@langchain/core/utils/types").InteropZodObject | import("langchain").AnyAnnotationRoot, readonly AgentMiddleware<any, any, any, readonly (ClientTool | ServerTool)[]>[], readonly (ClientTool | ServerTool)[]>>;
47
+ agent: ReturnType<typeof createAgent>;
48
48
  };
49
49
  private getModel;
50
50
  private standardAgent;
@@ -55,5 +55,5 @@ export { LangchainAudioTranscription } from "./langchain/audio-transcription";
55
55
  export { AudioUtils } from "./utils/audio-utils";
56
56
  export type { AudioBuffer, AudioMimeType } from "./@types/audio";
57
57
  export type { AudioContentBlock, HumanMessageWithAudioOptions, } from "./langchain/messages";
58
- export type { WhisperTranscriptionOptions } from "./langchain/audio-transcription";
58
+ export type { WhisperModel, WhisperTranscriptionOptions, } from "./langchain/audio-transcription";
59
59
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACrE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpD,OAAO,CAAC,MAAM,KAAK,CAAC;AACpB,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpD,OAAO,EACL,eAAe,EACf,WAAW,EAIZ,MAAM,WAAW,CAAC;AACnB,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAEnD,KAAK,oBAAoB,GAAG;IAC1B,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B,CAAC;AAEF,MAAM,MAAM,mBAAmB,GAAG;IAChC,KAAK,CAAC,EAAE;QACN,UAAU,CAAC,EAAE,eAAe,EAAE,CAAC;QAC/B,KAAK,CAAC,EAAE,CAAC,UAAU,GAAG,UAAU,CAAC,EAAE,CAAC;KACrC,CAAC;IAEF,WAAW,CAAC,EAAE,IAAI,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO,CAAC,CAAC;IAEvD,OAAO,EAAE,YAAY,CAAC;IACtB,QAAQ,EAAE,YAAY,EAAE,CAAC;IACzB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,mBAAmB,GAAG,OAAO,CAAC;IACxC,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,WAAW,EAAE,CAAC;CACzB,CAAC,CAAC;AAEH,MAAM,MAAM,mCAAmC,CAAC,CAAC,SAAS,CAAC,CAAC,SAAS,IACnE,mBAAmB,GAAG;IACpB,YAAY,EAAE,CAAC,CAAC;CACjB,CAAC;AAEJ,MAAM,MAAM,mCAAmC,CAAC,CAAC,IAAI,OAAO,CAAC;IAC3D,QAAQ,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;CACtB,CAAC,CAAC;AAEH,qBAAa,SAAS;IACR,OAAO,CAAC,MAAM;gBAAN,MAAM,EAAE,oBAAoB;IAE1C,IAAI,CAAC,MAAM,EAAE,mBAAmB,GAAG,mBAAmB;IAiBtD,oBAAoB,CAAC,CAAC,SAAS,CAAC,CAAC,SAAS,EAC9C,MAAM,EAAE,mCAAmC,CAAC,CAAC,CAAC,GAC7C,mCAAmC,CAAC,OAAO,MAAM,CAAC,YAAY,CAAC;IAwBlE;;;;OAIG;IACH,OAAO,CAAC,wBAAwB;IAoChC,WAAW,CACT,MAAM,EAAE,mBAAmB,EAC3B,YAAY,CAAC,EAAE,CAAC,CAAC,SAAS,GAAG,SAAS;;;IAUxC,OAAO,CAAC,QAAQ;IAiChB,OAAO,CAAC,aAAa;IAkBrB,OAAO,CAAC,mBAAmB;CAU5B;AAED,OAAO,EAAE,eAAe,EAAE,iBAAiB,EAAE,cAAc,EAAE,CAAC;AAC9D,OAAO,EAAE,2BAA2B,EAAE,MAAM,iCAAiC,CAAC;AAC9E,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AACjD,YAAY,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AACjE,YAAY,EACV,iBAAiB,EACjB,4BAA4B,GAC7B,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EAAE,2BAA2B,EAAE,MAAM,iCAAiC,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACrE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpD,OAAO,CAAC,MAAM,KAAK,CAAC;AACpB,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpD,OAAO,EACL,eAAe,EACf,WAAW,EACX,WAAW,EAGZ,MAAM,WAAW,CAAC;AACnB,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAEnD,KAAK,oBAAoB,GAAG;IAC1B,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B,CAAC;AAEF,MAAM,MAAM,mBAAmB,GAAG;IAChC,KAAK,CAAC,EAAE;QACN,UAAU,CAAC,EAAE,eAAe,EAAE,CAAC;QAC/B,KAAK,CAAC,EAAE,CAAC,UAAU,GAAG,UAAU,CAAC,EAAE,CAAC;KACrC,CAAC;IAEF,WAAW,CAAC,EAAE,IAAI,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO,CAAC,CAAC;IAEvD,OAAO,EAAE,YAAY,CAAC;IACtB,QAAQ,EAAE,YAAY,EAAE,CAAC;IACzB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,mBAAmB,GAAG,OAAO,CAAC;IACxC,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,WAAW,EAAE,CAAC;CACzB,CAAC,CAAC;AAEH,MAAM,MAAM,mCAAmC,CAAC,CAAC,SAAS,CAAC,CAAC,SAAS,IACnE,mBAAmB,GAAG;IACpB,YAAY,EAAE,CAAC,CAAC;CACjB,CAAC;AAEJ,MAAM,MAAM,mCAAmC,CAAC,CAAC,IAAI,OAAO,CAAC;IAC3D,QAAQ,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;CACtB,CAAC,CAAC;AAEH,qBAAa,SAAS;IACR,OAAO,CAAC,MAAM;gBAAN,MAAM,EAAE,oBAAoB;IAE1C,IAAI,CAAC,MAAM,EAAE,mBAAmB,GAAG,mBAAmB;IAiBtD,oBAAoB,CAAC,CAAC,SAAS,CAAC,CAAC,SAAS,EAC9C,MAAM,EAAE,mCAAmC,CAAC,CAAC,CAAC,GAC7C,mCAAmC,CAAC,OAAO,MAAM,CAAC,YAAY,CAAC;IAwBlE;;;;OAIG;IACH,OAAO,CAAC,wBAAwB;IAoChC,WAAW,CACT,MAAM,EAAE,mBAAmB,EAC3B,YAAY,CAAC,EAAE,CAAC,CAAC,SAAS,GAAG,SAAS,GACrC;QAAE,KAAK,EAAE,UAAU,CAAC,OAAO,WAAW,CAAC,CAAA;KAAE;IAS5C,OAAO,CAAC,QAAQ;IAiChB,OAAO,CAAC,aAAa;IAkBrB,OAAO,CAAC,mBAAmB;CAU5B;AAED,OAAO,EAAE,eAAe,EAAE,iBAAiB,EAAE,cAAc,EAAE,CAAC;AAC9D,OAAO,EAAE,2BAA2B,EAAE,MAAM,iCAAiC,CAAC;AAC9E,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AACjD,YAAY,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AACjE,YAAY,EACV,iBAAiB,EACjB,4BAA4B,GAC7B,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EACV,YAAY,EACZ,2BAA2B,GAC5B,MAAM,iCAAiC,CAAC"}
@@ -1,13 +1,18 @@
1
- import type { AudioBuffer } from "../@types/audio";
1
+ import type { AudioBuffer, AudioMimeType } from "../@types/audio";
2
+ /** Modelos disponíveis na API de transcrição OpenAI (Speech-to-Text) */
3
+ export type WhisperModel = "whisper-1" | "gpt-4o-transcribe" | "gpt-4o-mini-transcribe" | "gpt-4o-mini-transcribe-2025-12-15" | "gpt-4o-transcribe-diarize";
2
4
  export type WhisperTranscriptionOptions = {
3
- language?: string;
5
+ /** Modelo de transcrição. Padrão: "whisper-1". gpt-4o-transcribe e gpt-4o-mini-transcribe têm maior qualidade. */
6
+ model?: WhisperModel;
7
+ languageIn2Digits?: string;
4
8
  prompt?: string;
5
9
  responseFormat?: "json" | "text" | "srt" | "verbose_json" | "vtt";
6
10
  temperature?: number;
7
11
  timestampGranularities?: ("word" | "segment")[];
12
+ /** Formato do áudio: extensão ("mp3", "wav", "webm") ou MIME type ("audio/wav", "audio/webm") */
13
+ format?: string | AudioMimeType;
8
14
  };
9
15
  export declare class LangchainAudioTranscription {
10
- private static extractTextFromDocs;
11
16
  static transcribeWithWhisper(audioBuffer: AudioBuffer, options?: WhisperTranscriptionOptions, openAIApiKey?: string): Promise<string>;
12
17
  static transcribeFileWithWhisper(filePath: string, options?: WhisperTranscriptionOptions, openAIApiKey?: string): Promise<string>;
13
18
  }
@@ -1 +1 @@
1
- {"version":3,"file":"audio-transcription.d.ts","sourceRoot":"","sources":["../../src/langchain/audio-transcription.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAInD,MAAM,MAAM,2BAA2B,GAAG;IACxC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,cAAc,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,KAAK,GAAG,cAAc,GAAG,KAAK,CAAC;IAClE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,sBAAsB,CAAC,EAAE,CAAC,MAAM,GAAG,SAAS,CAAC,EAAE,CAAC;CACjD,CAAC;AAEF,qBAAa,2BAA2B;IACtC,OAAO,CAAC,MAAM,CAAC,mBAAmB;WAYrB,qBAAqB,CAChC,WAAW,EAAE,WAAW,EACxB,OAAO,GAAE,2BAAgC,EACzC,YAAY,CAAC,EAAE,MAAM,GACpB,OAAO,CAAC,MAAM,CAAC;WAsCL,yBAAyB,CACpC,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,2BAAgC,EACzC,YAAY,CAAC,EAAE,MAAM,GACpB,OAAO,CAAC,MAAM,CAAC;CAQnB"}
1
+ {"version":3,"file":"audio-transcription.d.ts","sourceRoot":"","sources":["../../src/langchain/audio-transcription.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAGlE,wEAAwE;AACxE,MAAM,MAAM,YAAY,GACpB,WAAW,GACX,mBAAmB,GACnB,wBAAwB,GACxB,mCAAmC,GACnC,2BAA2B,CAAC;AAEhC,MAAM,MAAM,2BAA2B,GAAG;IACxC,kHAAkH;IAClH,KAAK,CAAC,EAAE,YAAY,CAAC;IACrB,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,cAAc,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,KAAK,GAAG,cAAc,GAAG,KAAK,CAAC;IAClE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,sBAAsB,CAAC,EAAE,CAAC,MAAM,GAAG,SAAS,CAAC,EAAE,CAAC;IAChD,iGAAiG;IACjG,MAAM,CAAC,EAAE,MAAM,GAAG,aAAa,CAAC;CACjC,CAAC;AAgBF,qBAAa,2BAA2B;WACzB,qBAAqB,CAChC,WAAW,EAAE,WAAW,EACxB,OAAO,GAAE,2BAAgC,EACzC,YAAY,CAAC,EAAE,MAAM,GACpB,OAAO,CAAC,MAAM,CAAC;WAwCL,yBAAyB,CACpC,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,2BAAgC,EACzC,YAAY,CAAC,EAAE,MAAM,GACpB,OAAO,CAAC,MAAM,CAAC;CAcnB"}
@@ -32,62 +32,69 @@ var __importStar = (this && this.__importStar) || (function () {
32
32
  return result;
33
33
  };
34
34
  })();
35
+ var __importDefault = (this && this.__importDefault) || function (mod) {
36
+ return (mod && mod.__esModule) ? mod : { "default": mod };
37
+ };
35
38
  Object.defineProperty(exports, "__esModule", { value: true });
36
39
  exports.LangchainAudioTranscription = void 0;
37
40
  const fs = __importStar(require("fs"));
38
- const openai_whisper_audio_1 = require("@langchain/community/document_loaders/fs/openai_whisper_audio");
39
- const files_utils_1 = require("../utils/files-utils");
41
+ const path = __importStar(require("path"));
42
+ const openai_1 = __importDefault(require("openai"));
43
+ const openai_2 = require("openai");
44
+ const audio_1 = require("../@types/audio");
45
+ function getExtension(format) {
46
+ if (!format)
47
+ return "mp3";
48
+ if (format.startsWith("audio/")) {
49
+ return audio_1.MIME_TO_EXTENSION[format] ?? "mp3";
50
+ }
51
+ return format.replace(/^\./, "");
52
+ }
53
+ function toBuffer(audioBuffer) {
54
+ if (audioBuffer instanceof Buffer)
55
+ return audioBuffer;
56
+ if (audioBuffer instanceof ArrayBuffer)
57
+ return Buffer.from(audioBuffer);
58
+ return Buffer.from(audioBuffer);
59
+ }
40
60
  class LangchainAudioTranscription {
41
- static extractTextFromDocs(docs) {
42
- if (docs.length === 0) {
43
- throw new Error("Nenhum documento foi retornado pela transcrição");
61
+ static async transcribeWithWhisper(audioBuffer, options = {}, openAIApiKey) {
62
+ if (openAIApiKey) {
63
+ process.env.OPENAI_API_KEY = openAIApiKey;
44
64
  }
45
- const firstDoc = docs[0];
46
- if (!firstDoc) {
47
- throw new Error("Documento vazio retornado pela transcrição");
65
+ const buffer = toBuffer(audioBuffer);
66
+ const extension = getExtension(options.format);
67
+ const fileName = `whisper-${Date.now()}.${extension}`;
68
+ const file = await (0, openai_2.toFile)(buffer, fileName);
69
+ const openai = new openai_1.default();
70
+ const transcriptionParams = {
71
+ file,
72
+ model: options.model ?? "whisper-1",
73
+ response_format: options.responseFormat ?? "text",
74
+ };
75
+ if (options.languageIn2Digits) {
76
+ transcriptionParams.language = options.languageIn2Digits;
48
77
  }
49
- // LangChain.js usa pageContent (camelCase), não page_content
50
- return firstDoc.pageContent;
51
- }
52
- static async transcribeWithWhisper(audioBuffer, options = {}, openAIApiKey) {
53
- const tempFilePath = files_utils_1.FilesUtils.createTempFile(audioBuffer, "whisper");
54
- try {
55
- // Configura a API key se fornecida
56
- if (openAIApiKey) {
57
- process.env.OPENAI_API_KEY = openAIApiKey;
58
- }
59
- const transcriptionParams = {
60
- response_format: options.responseFormat || "text",
61
- };
62
- if (options.language) {
63
- transcriptionParams.language = options.language;
64
- }
65
- if (options.prompt) {
66
- transcriptionParams.prompt = options.prompt;
67
- }
68
- if (options.temperature !== undefined) {
69
- transcriptionParams.temperature = options.temperature;
70
- }
71
- if (options.timestampGranularities) {
72
- transcriptionParams.timestamp_granularities =
73
- options.timestampGranularities;
74
- }
75
- const loader = new openai_whisper_audio_1.OpenAIWhisperAudio(tempFilePath, {
76
- transcriptionCreateParams: transcriptionParams,
77
- });
78
- const docs = await loader.load();
79
- return this.extractTextFromDocs(docs);
78
+ if (options.prompt) {
79
+ transcriptionParams.prompt = options.prompt;
80
+ }
81
+ if (options.temperature !== undefined) {
82
+ transcriptionParams.temperature = options.temperature;
80
83
  }
81
- finally {
82
- files_utils_1.FilesUtils.cleanupTempFile(tempFilePath);
84
+ if (options.timestampGranularities) {
85
+ transcriptionParams.timestamp_granularities =
86
+ options.timestampGranularities;
83
87
  }
88
+ const response = await openai.audio.transcriptions.create(transcriptionParams);
89
+ return typeof response === "string" ? response : response.text;
84
90
  }
85
91
  static async transcribeFileWithWhisper(filePath, options = {}, openAIApiKey) {
86
92
  if (!fs.existsSync(filePath)) {
87
93
  throw new Error(`Arquivo não encontrado: ${filePath}`);
88
94
  }
89
95
  const audioBuffer = fs.readFileSync(filePath);
90
- return this.transcribeWithWhisper(audioBuffer, options, openAIApiKey);
96
+ const format = options.format ?? (path.extname(filePath).replace(/^\./, "") || "mp3");
97
+ return this.transcribeWithWhisper(audioBuffer, { ...options, format }, openAIApiKey);
91
98
  }
92
99
  }
93
100
  exports.LangchainAudioTranscription = LangchainAudioTranscription;
@@ -1 +1 @@
1
- {"version":3,"file":"audio-transcription.js","sourceRoot":"","sources":["../../src/langchain/audio-transcription.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AACA,uCAAyB;AAEzB,wGAAmG;AACnG,sDAAkD;AAUlD,MAAa,2BAA2B;IAC9B,MAAM,CAAC,mBAAmB,CAAC,IAAgB;QACjD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtB,MAAM,IAAI,KAAK,CAAC,iDAAiD,CAAC,CAAC;QACrE,CAAC;QACD,MAAM,QAAQ,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACzB,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;QAChE,CAAC;QACD,6DAA6D;QAC7D,OAAO,QAAQ,CAAC,WAAW,CAAC;IAC9B,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,qBAAqB,CAChC,WAAwB,EACxB,UAAuC,EAAE,EACzC,YAAqB;QAErB,MAAM,YAAY,GAAG,wBAAU,CAAC,cAAc,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC;QAEvE,IAAI,CAAC;YACH,mCAAmC;YACnC,IAAI,YAAY,EAAE,CAAC;gBACjB,OAAO,CAAC,GAAG,CAAC,cAAc,GAAG,YAAY,CAAC;YAC5C,CAAC;YAED,MAAM,mBAAmB,GAAQ;gBAC/B,eAAe,EAAE,OAAO,CAAC,cAAc,IAAI,MAAM;aAClD,CAAC;YAEF,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;gBACrB,mBAAmB,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC;YAClD,CAAC;YACD,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;gBACnB,mBAAmB,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;YAC9C,CAAC;YACD,IAAI,OAAO,CAAC,WAAW,KAAK,SAAS,EAAE,CAAC;gBACtC,mBAAmB,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;YACxD,CAAC;YACD,IAAI,OAAO,CAAC,sBAAsB,EAAE,CAAC;gBACnC,mBAAmB,CAAC,uBAAuB;oBACzC,OAAO,CAAC,sBAAsB,CAAC;YACnC,CAAC;YAED,MAAM,MAAM,GAAG,IAAI,yCAAkB,CAAC,YAAY,EAAE;gBAClD,yBAAyB,EAAE,mBAAmB;aAC/C,CAAC,CAAC;YAEH,MAAM,IAAI,GAAe,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;YAC7C,OAAO,IAAI,CAAC,mBAAmB,CAAC,IAAI,CAAC,CAAC;QACxC,CAAC;gBAAS,CAAC;YACT,wBAAU,CAAC,eAAe,CAAC,YAAY,CAAC,CAAC;QAC3C,CAAC;IACH,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,yBAAyB,CACpC,QAAgB,EAChB,UAAuC,EAAE,EACzC,YAAqB;QAErB,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC7B,MAAM,IAAI,KAAK,CAAC,2BAA2B,QAAQ,EAAE,CAAC,CAAC;QACzD,CAAC;QAED,MAAM,WAAW,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC;QAC9C,OAAO,IAAI,CAAC,qBAAqB,CAAC,WAAW,EAAE,OAAO,EAAE,YAAY,CAAC,CAAC;IACxE,CAAC;CACF;AAnED,kEAmEC"}
1
+ {"version":3,"file":"audio-transcription.js","sourceRoot":"","sources":["../../src/langchain/audio-transcription.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,uCAAyB;AACzB,2CAA6B;AAC7B,oDAA4B;AAC5B,mCAAgC;AAEhC,2CAAoD;AAsBpD,SAAS,YAAY,CAAC,MAA+B;IACnD,IAAI,CAAC,MAAM;QAAE,OAAO,KAAK,CAAC;IAC1B,IAAI,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;QAChC,OAAO,yBAAiB,CAAC,MAAuB,CAAC,IAAI,KAAK,CAAC;IAC7D,CAAC;IACD,OAAO,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;AACnC,CAAC;AAED,SAAS,QAAQ,CAAC,WAAwB;IACxC,IAAI,WAAW,YAAY,MAAM;QAAE,OAAO,WAAW,CAAC;IACtD,IAAI,WAAW,YAAY,WAAW;QAAE,OAAO,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IACxE,OAAO,MAAM,CAAC,IAAI,CAAC,WAAyB,CAAC,CAAC;AAChD,CAAC;AAED,MAAa,2BAA2B;IACtC,MAAM,CAAC,KAAK,CAAC,qBAAqB,CAChC,WAAwB,EACxB,UAAuC,EAAE,EACzC,YAAqB;QAErB,IAAI,YAAY,EAAE,CAAC;YACjB,OAAO,CAAC,GAAG,CAAC,cAAc,GAAG,YAAY,CAAC;QAC5C,CAAC;QAED,MAAM,MAAM,GAAG,QAAQ,CAAC,WAAW,CAAC,CAAC;QACrC,MAAM,SAAS,GAAG,YAAY,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;QAC/C,MAAM,QAAQ,GAAG,WAAW,IAAI,CAAC,GAAG,EAAE,IAAI,SAAS,EAAE,CAAC;QAEtD,MAAM,IAAI,GAAG,MAAM,IAAA,eAAM,EAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;QAE5C,MAAM,MAAM,GAAG,IAAI,gBAAM,EAAE,CAAC;QAE5B,MAAM,mBAAmB,GAA2C;YAClE,IAAI;YACJ,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,WAAW;YACnC,eAAe,EAAE,OAAO,CAAC,cAAc,IAAI,MAAM;SAClD,CAAC;QAEF,IAAI,OAAO,CAAC,iBAAiB,EAAE,CAAC;YAC9B,mBAAmB,CAAC,QAAQ,GAAG,OAAO,CAAC,iBAAiB,CAAC;QAC3D,CAAC;QACD,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;YACnB,mBAAmB,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;QAC9C,CAAC;QACD,IAAI,OAAO,CAAC,WAAW,KAAK,SAAS,EAAE,CAAC;YACtC,mBAAmB,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;QACxD,CAAC;QACD,IAAI,OAAO,CAAC,sBAAsB,EAAE,CAAC;YACnC,mBAAmB,CAAC,uBAAuB;gBACzC,OAAO,CAAC,sBAAsB,CAAC;QACnC,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,cAAc,CAAC,MAAM,CACvD,mBAAmB,CACpB,CAAC;QAEF,OAAO,OAAO,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC;IACjE,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,yBAAyB,CACpC,QAAgB,EAChB,UAAuC,EAAE,EACzC,YAAqB;QAErB,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC7B,MAAM,IAAI,KAAK,CAAC,2BAA2B,QAAQ,EAAE,CAAC,CAAC;QACzD,CAAC;QAED,MAAM,WAAW,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC;QAC9C,MAAM,MAAM,GACV,OAAO,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,IAAI,KAAK,CAAC,CAAC;QACzE,OAAO,IAAI,CAAC,qBAAqB,CAC/B,WAAW,EACX,EAAE,GAAG,OAAO,EAAE,MAAM,EAAE,EACtB,YAAY,CACb,CAAC;IACJ,CAAC;CACF;AA/DD,kEA+DC"}
@@ -57,7 +57,7 @@ class LangchainMessages {
57
57
  // Prepara opções de transcrição - só inclui language se não houver mimeType
58
58
  // Com exactOptionalPropertyTypes: true, não podemos passar undefined explicitamente
59
59
  const transcriptionOptions = mimeType
60
- ? {}
60
+ ? { format: mimeType }
61
61
  : { language: "pt" };
62
62
  const transcribedText = await audio_transcription_1.LangchainAudioTranscription.transcribeWithWhisper(audioBuffer, transcriptionOptions, openAIApiKey);
63
63
  // Combina o texto original (se fornecido) com a transcrição
@@ -1 +1 @@
1
- {"version":3,"file":"messages.js","sourceRoot":"","sources":["../../src/langchain/messages.ts"],"names":[],"mappings":";;;AAAA,yCAAmE;AACnE,sDAAkD;AAElD,+DAAoE;AAuBpE,MAAa,iBAAiB;IAC5B,MAAM,CAAC,MAAM,CAAC,OAAe;QAC3B,OAAO,IAAI,yBAAa,CAAC,OAAO,CAAC,CAAC;IACpC,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,OAAe;QAC1B,OAAO,IAAI,wBAAY,CAAC,OAAO,CAAC,CAAC;IACnC,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,UAAU,CACrB,OAAqC;QAErC,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,QAAQ,GAAG,MAAM,EAAE,YAAY,EAAE,GAAG,OAAO,CAAC;QACjE,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,GAAG,KAAK,CAAC;QAE7C,OAAO,IAAI,CAAC,2BAA2B,CACrC,MAAM,EACN,IAAI,EACJ,QAAQ,EACR,QAAQ,EACR,QAAQ,EACR,YAAY,CACb,CAAC;IACJ,CAAC;IAED,MAAM,CAAC,EAAE,CAAC,OAAe;QACvB,OAAO,IAAI,qBAAS,CAAC,OAAO,CAAC,CAAC;IAChC,CAAC;IAEO,MAAM,CAAC,KAAK,CAAC,2BAA2B,CAC9C,WAAwB,EACxB,IAAa,EACb,QAAwB,EACxB,QAAiB,EACjB,WAAyC,MAAM,EAC/C,YAAqB;QAErB,gFAAgF;QAChF,IAAI,QAAQ,KAAK,QAAQ;YACvB,OAAO,MAAM,IAAI,CAAC,kBAAkB,CAClC,WAAW,EACX,QAAQ,EACR,IAAI,EACJ,YAAY,CACb,CAAC;QAEJ,4GAA4G;QAC5G,MAAM,UAAU,GAAG,wBAAU,CAAC,cAAc,CAAC,WAAW,CAAC,CAAC;QAC1D,MAAM,gBAAgB,GACpB,QAAQ,IAAI,wBAAU,CAAC,mBAAmB,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;QAEpE,MAAM,OAAO,GAAe,EAAE,CAAC;QAE/B,6EAA6E;QAC7E,iGAAiG;QACjG,IAAI,IAAI,EAAE,CAAC;YACT,OAAO,CAAC,IAAI,CAAC;gBACX,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,IAAI;aACX,CAAC,CAAC;QACL,CAAC;QAED,wEAAwE;QACxE,gEAAgE;QAChE,MAAM,UAAU,GAAsB;YACpC,IAAI,EAAE,OAAO;YACb,WAAW,EAAE,QAAQ;YACrB,IAAI,EAAE,UAAU;YAChB,SAAS,EAAE,gBAAgB;SAC5B,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAEzB,2DAA2D;QAC3D,mEAAmE;QACnE,OAAO,IAAI,wBAAY,CAAC;YACtB,OAAO,EAAE,OAAc;SACjB,CAAC,CAAC;IACZ,CAAC;IAEO,MAAM,CAAC,KAAK,CAAC,kBAAkB,CACrC,WAAwB,EACxB,QAAwB,EACxB,IAAa,EACb,YAAqB;QAErB,IAAI,CAAC,YAAY;YACf,MAAM,IAAI,KAAK,CACb,mIAAmI,CACpI,CAAC;QAEJ,IAAI,CAAC;YACH,4EAA4E;YAC5E,oFAAoF;YACpF,MAAM,oBAAoB,GAAG,QAAQ;gBACnC,CAAC,CAAE,EAA4B;gBAC/B,CAAC,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;YAEvB,MAAM,eAAe,GACnB,MAAM,iDAA2B,CAAC,qBAAqB,CACrD,WAAW,EACX,oBAAoB,EACpB,YAAY,CACb,CAAC;YAEJ,4DAA4D;YAC5D,MAAM,SAAS,GAAG,IAAI;gBACpB,CAAC,CAAC,GAAG,IAAI,yBAAyB,eAAe,EAAE;gBACnD,CAAC,CAAC,qBAAqB,eAAe,EAAE,CAAC;YAE3C,0DAA0D;YAC1D,OAAO,IAAI,wBAAY,CAAC,SAAS,CAAC,CAAC;QACrC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,kDAAkD;YAClD,yEAAyE;YACzE,MAAM,IAAI,KAAK,CACb,wDACE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CACvD,IAAI;gBACF,mGAAmG,CACtG,CAAC;QACJ,CAAC;IACH,CAAC;CACF;AA1HD,8CA0HC"}
1
+ {"version":3,"file":"messages.js","sourceRoot":"","sources":["../../src/langchain/messages.ts"],"names":[],"mappings":";;;AAAA,yCAAmE;AACnE,sDAAkD;AAElD,+DAAoE;AAuBpE,MAAa,iBAAiB;IAC5B,MAAM,CAAC,MAAM,CAAC,OAAe;QAC3B,OAAO,IAAI,yBAAa,CAAC,OAAO,CAAC,CAAC;IACpC,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,OAAe;QAC1B,OAAO,IAAI,wBAAY,CAAC,OAAO,CAAC,CAAC;IACnC,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,UAAU,CACrB,OAAqC;QAErC,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,QAAQ,GAAG,MAAM,EAAE,YAAY,EAAE,GAAG,OAAO,CAAC;QACjE,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,GAAG,KAAK,CAAC;QAE7C,OAAO,IAAI,CAAC,2BAA2B,CACrC,MAAM,EACN,IAAI,EACJ,QAAQ,EACR,QAAQ,EACR,QAAQ,EACR,YAAY,CACb,CAAC;IACJ,CAAC;IAED,MAAM,CAAC,EAAE,CAAC,OAAe;QACvB,OAAO,IAAI,qBAAS,CAAC,OAAO,CAAC,CAAC;IAChC,CAAC;IAEO,MAAM,CAAC,KAAK,CAAC,2BAA2B,CAC9C,WAAwB,EACxB,IAAa,EACb,QAAwB,EACxB,QAAiB,EACjB,WAAyC,MAAM,EAC/C,YAAqB;QAErB,gFAAgF;QAChF,IAAI,QAAQ,KAAK,QAAQ;YACvB,OAAO,MAAM,IAAI,CAAC,kBAAkB,CAClC,WAAW,EACX,QAAQ,EACR,IAAI,EACJ,YAAY,CACb,CAAC;QAEJ,4GAA4G;QAC5G,MAAM,UAAU,GAAG,wBAAU,CAAC,cAAc,CAAC,WAAW,CAAC,CAAC;QAC1D,MAAM,gBAAgB,GACpB,QAAQ,IAAI,wBAAU,CAAC,mBAAmB,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;QAEpE,MAAM,OAAO,GAAe,EAAE,CAAC;QAE/B,6EAA6E;QAC7E,iGAAiG;QACjG,IAAI,IAAI,EAAE,CAAC;YACT,OAAO,CAAC,IAAI,CAAC;gBACX,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,IAAI;aACX,CAAC,CAAC;QACL,CAAC;QAED,wEAAwE;QACxE,gEAAgE;QAChE,MAAM,UAAU,GAAsB;YACpC,IAAI,EAAE,OAAO;YACb,WAAW,EAAE,QAAQ;YACrB,IAAI,EAAE,UAAU;YAChB,SAAS,EAAE,gBAAgB;SAC5B,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAEzB,2DAA2D;QAC3D,mEAAmE;QACnE,OAAO,IAAI,wBAAY,CAAC;YACtB,OAAO,EAAE,OAAc;SACjB,CAAC,CAAC;IACZ,CAAC;IAEO,MAAM,CAAC,KAAK,CAAC,kBAAkB,CACrC,WAAwB,EACxB,QAAwB,EACxB,IAAa,EACb,YAAqB;QAErB,IAAI,CAAC,YAAY;YACf,MAAM,IAAI,KAAK,CACb,mIAAmI,CACpI,CAAC;QAEJ,IAAI,CAAC;YACH,4EAA4E;YAC5E,oFAAoF;YACpF,MAAM,oBAAoB,GAAG,QAAQ;gBACnC,CAAC,CAAC,EAAE,MAAM,EAAE,QAAQ,EAAE;gBACtB,CAAC,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;YAEvB,MAAM,eAAe,GACnB,MAAM,iDAA2B,CAAC,qBAAqB,CACrD,WAAW,EACX,oBAAoB,EACpB,YAAY,CACb,CAAC;YAEJ,4DAA4D;YAC5D,MAAM,SAAS,GAAG,IAAI;gBACpB,CAAC,CAAC,GAAG,IAAI,yBAAyB,eAAe,EAAE;gBACnD,CAAC,CAAC,qBAAqB,eAAe,EAAE,CAAC;YAE3C,0DAA0D;YAC1D,OAAO,IAAI,wBAAY,CAAC,SAAS,CAAC,CAAC;QACrC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,kDAAkD;YAClD,yEAAyE;YACzE,MAAM,IAAI,KAAK,CACb,wDACE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CACvD,IAAI;gBACF,mGAAmG,CACtG,CAAC;QACJ,CAAC;IACH,CAAC;CACF;AA1HD,8CA0HC"}
@@ -1,6 +1,6 @@
1
- import type { AudioBuffer } from "../@types/audio";
1
+ import { type AudioBuffer, type AudioMimeType } from "../@types/audio";
2
2
  export declare class FilesUtils {
3
- static createTempFile(audioBuffer: AudioBuffer, prefix?: string): string;
3
+ static createTempFile(audioBuffer: AudioBuffer, prefix?: string, format?: string | AudioMimeType): string;
4
4
  static cleanupTempFile(filePath: string): void;
5
5
  }
6
6
  //# sourceMappingURL=files-utils.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"files-utils.d.ts","sourceRoot":"","sources":["../../src/utils/files-utils.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAEnD,qBAAa,UAAU;IACrB,MAAM,CAAC,cAAc,CACnB,WAAW,EAAE,WAAW,EACxB,MAAM,GAAE,MAAgB,GACvB,MAAM;IAsBT,MAAM,CAAC,eAAe,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI;CAS/C"}
1
+ {"version":3,"file":"files-utils.d.ts","sourceRoot":"","sources":["../../src/utils/files-utils.ts"],"names":[],"mappings":"AAGA,OAAO,EAEL,KAAK,WAAW,EAChB,KAAK,aAAa,EACnB,MAAM,iBAAiB,CAAC;AAEzB,qBAAa,UAAU;IACrB,MAAM,CAAC,cAAc,CACnB,WAAW,EAAE,WAAW,EACxB,MAAM,GAAE,MAAgB,EACxB,MAAM,CAAC,EAAE,MAAM,GAAG,aAAa,GAC9B,MAAM;IA8BT,MAAM,CAAC,eAAe,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI;CAS/C"}
@@ -37,10 +37,18 @@ exports.FilesUtils = void 0;
37
37
  const fs = __importStar(require("fs"));
38
38
  const path = __importStar(require("path"));
39
39
  const os = __importStar(require("os"));
40
+ const audio_1 = require("../@types/audio");
40
41
  class FilesUtils {
41
- static createTempFile(audioBuffer, prefix = "audio") {
42
+ static createTempFile(audioBuffer, prefix = "audio", format) {
43
+ const extension = format
44
+ ? format.startsWith("audio/")
45
+ ? audio_1.MIME_TO_EXTENSION[format] ?? "mp3"
46
+ : format.replace(/^\./, "")
47
+ : "mp3";
42
48
  const tempDir = os.tmpdir();
43
- const tempFilePath = path.join(tempDir, `${prefix}-${Date.now()}-${Math.random().toString(36).substring(7)}.mp3`);
49
+ const tempFilePath = path.join(tempDir, `${prefix}-${Date.now()}-${Math.random()
50
+ .toString(36)
51
+ .substring(7)}.${extension}`);
44
52
  // Converte o buffer para Buffer se necessário
45
53
  let buffer;
46
54
  if (audioBuffer instanceof Buffer) {
@@ -1 +1 @@
1
- {"version":3,"file":"files-utils.js","sourceRoot":"","sources":["../../src/utils/files-utils.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,uCAAyB;AACzB,2CAA6B;AAC7B,uCAAyB;AAGzB,MAAa,UAAU;IACrB,MAAM,CAAC,cAAc,CACnB,WAAwB,EACxB,SAAiB,OAAO;QAExB,MAAM,OAAO,GAAG,EAAE,CAAC,MAAM,EAAE,CAAC;QAC5B,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAC5B,OAAO,EACP,GAAG,MAAM,IAAI,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,CACzE,CAAC;QAEF,8CAA8C;QAC9C,IAAI,MAAc,CAAC;QACnB,IAAI,WAAW,YAAY,MAAM,EAAE,CAAC;YAClC,MAAM,GAAG,WAAW,CAAC;QACvB,CAAC;aAAM,IAAI,WAAW,YAAY,WAAW,EAAE,CAAC;YAC9C,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACpC,CAAC;aAAM,CAAC;YACN,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACpC,CAAC;QAED,+BAA+B;QAC/B,EAAE,CAAC,aAAa,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC;QACvC,OAAO,YAAY,CAAC;IACtB,CAAC;IAED,MAAM,CAAC,eAAe,CAAC,QAAgB;QACrC,IAAI,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC5B,IAAI,CAAC;gBACH,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;YAC1B,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,IAAI,CAAC,uCAAuC,KAAK,EAAE,CAAC,CAAC;YAC/D,CAAC;QACH,CAAC;IACH,CAAC;CACF;AAnCD,gCAmCC"}
1
+ {"version":3,"file":"files-utils.js","sourceRoot":"","sources":["../../src/utils/files-utils.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,uCAAyB;AACzB,2CAA6B;AAC7B,uCAAyB;AACzB,2CAIyB;AAEzB,MAAa,UAAU;IACrB,MAAM,CAAC,cAAc,CACnB,WAAwB,EACxB,SAAiB,OAAO,EACxB,MAA+B;QAE/B,MAAM,SAAS,GAAG,MAAM;YACtB,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC;gBAC3B,CAAC,CAAC,yBAAiB,CAAC,MAAuB,CAAC,IAAI,KAAK;gBACrD,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC;YAC7B,CAAC,CAAC,KAAK,CAAC;QAEV,MAAM,OAAO,GAAG,EAAE,CAAC,MAAM,EAAE,CAAC;QAC5B,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAC5B,OAAO,EACP,GAAG,MAAM,IAAI,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE;aACrC,QAAQ,CAAC,EAAE,CAAC;aACZ,SAAS,CAAC,CAAC,CAAC,IAAI,SAAS,EAAE,CAC/B,CAAC;QAEF,8CAA8C;QAC9C,IAAI,MAAc,CAAC;QACnB,IAAI,WAAW,YAAY,MAAM,EAAE,CAAC;YAClC,MAAM,GAAG,WAAW,CAAC;QACvB,CAAC;aAAM,IAAI,WAAW,YAAY,WAAW,EAAE,CAAC;YAC9C,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACpC,CAAC;aAAM,CAAC;YACN,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACpC,CAAC;QAED,+BAA+B;QAC/B,EAAE,CAAC,aAAa,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC;QACvC,OAAO,YAAY,CAAC;IACtB,CAAC;IAED,MAAM,CAAC,eAAe,CAAC,QAAgB;QACrC,IAAI,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC5B,IAAI,CAAC;gBACH,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;YAC1B,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,IAAI,CAAC,uCAAuC,KAAK,EAAE,CAAC,CAAC;YAC/D,CAAC;QACH,CAAC;IACH,CAAC;CACF;AA5CD,gCA4CC"}
@@ -0,0 +1,209 @@
1
+ # Bug do LangChain no Windows: Transcrição de Áudio com Whisper - 2026-02-02
2
+
3
+ ## Resumo
4
+
5
+ O loader `OpenAIWhisperAudio` do LangChain passa o **caminho completo do arquivo** (ex: `C:\Users\...\whisper-xxx.webm`) para a API da OpenAI. No Windows, isso pode causar falhas no envio multipart devido a backslashes e caracteres especiais no path.
6
+
7
+ ---
8
+
9
+ ## Passo a passo: O que acontece quando você transcreve áudio
10
+
11
+ ### 1. Seu código chama a transcrição
12
+
13
+ ```typescript
14
+ // No seu código (@luanpoppe/ai)
15
+ const tempFilePath = FilesUtils.createTempFile(audioBuffer, "whisper", options.format);
16
+ // tempFilePath = "C:\Users\luan\AppData\Local\Temp\whisper-1738512345-abc123.webm"
17
+
18
+ const loader = new OpenAIWhisperAudio(tempFilePath, { ... });
19
+ const docs = await loader.load();
20
+ ```
21
+
22
+ Você cria um arquivo temporário e passa o **caminho completo** para o loader do LangChain.
23
+
24
+ ---
25
+
26
+ ### 2. O LangChain lê o arquivo
27
+
28
+ O `OpenAIWhisperAudio` estende `BufferLoader`. Internamente, ele:
29
+
30
+ 1. Lê o conteúdo do arquivo em memória (um `Buffer`)
31
+ 2. Guarda o caminho original em `metadata.source`
32
+
33
+ ```javascript
34
+ // Dentro do BufferLoader (simplificado)
35
+ metadata = {
36
+ source:
37
+ "C:\\Users\\luan\\AppData\\Local\\Temp\\whisper-1738512345-abc123.webm",
38
+ };
39
+ ```
40
+
41
+ ---
42
+
43
+ ### 3. O LangChain chama a API da OpenAI
44
+
45
+ O método `parse()` do loader faz algo assim:
46
+
47
+ ```javascript
48
+ // Código atual do LangChain (com o bug)
49
+ const fileName = metadata.source === "blob" ? metadata.blobType : metadata.source;
50
+ // fileName = "C:\Users\luan\AppData\Local\Temp\whisper-1738512345-abc123.webm"
51
+
52
+ const transcriptionResponse = await this.openAIClient.audio.transcriptions.create({
53
+ file: await toFile(raw, fileName), // ← O problema está aqui!
54
+ model: "whisper-1",
55
+ ...
56
+ });
57
+ ```
58
+
59
+ O segundo parâmetro de `toFile(raw, fileName)` deveria ser **apenas o nome do arquivo** (ex: `whisper-1738512345-abc123.webm`), mas o LangChain passa o **caminho completo**.
60
+
61
+ ---
62
+
63
+ ### 4. O que a API da OpenAI espera
64
+
65
+ A função `toFile` da OpenAI usa o segundo parâmetro para:
66
+
67
+ - Definir o nome do arquivo no formulário multipart (Content-Disposition)
68
+ - Ajudar a API a identificar o formato do áudio pela extensão
69
+
70
+ O formato multipart espera algo como:
71
+
72
+ ```
73
+ Content-Disposition: form-data; name="file"; filename="whisper-xxx.webm"
74
+ ```
75
+
76
+ Com o caminho completo do Windows:
77
+
78
+ ```
79
+ Content-Disposition: form-data; name="file"; filename="C:\Users\luan\AppData\Local\Temp\whisper-xxx.webm"
80
+ ```
81
+
82
+ Os backslashes (`\`) e o path longo podem causar:
83
+
84
+ - Parsing incorreto do header
85
+ - Erros de encoding
86
+ - Rejeição pela API
87
+
88
+ ---
89
+
90
+ ## Por que não dá para resolver no seu código?
91
+
92
+ ### O fluxo de dados
93
+
94
+ ```
95
+ Seu código LangChain (biblioteca) API OpenAI
96
+ | | |
97
+ | tempFilePath (path completo) | |
98
+ | ----------------------------->| |
99
+ | | Lê arquivo, guarda em metadata |
100
+ | | metadata.source = path completo |
101
+ | | |
102
+ | | toFile(buffer, metadata.source) |
103
+ | | ↑ usa path completo internamente |
104
+ | | --------------------------------->|
105
+ | | Envio multipart|
106
+ ```
107
+
108
+ ### Onde está o controle?
109
+
110
+ | Etapa | Quem controla | O que você pode fazer |
111
+ | ----------------------- | ------------- | ----------------------------------------------------------------- |
112
+ | Criar arquivo temp | **Você** | Escolher onde criar (ex: `os.tmpdir()`) |
113
+ | Path passado ao loader | **Você** | Só pode passar um path – o loader precisa dele para ler o arquivo |
114
+ | Valor usado em `toFile` | **LangChain** | Você não tem acesso – é interno ao loader |
115
+ | Chamada à API | **LangChain** | Você não controla |
116
+
117
+ ### O problema central
118
+
119
+ O loader **precisa** do path completo para **ler o arquivo** do disco. Não há como passar “só o nome” – o LangChain precisa do path para fazer `fs.readFile` (ou equivalente).
120
+
121
+ Depois de ler, o loader usa o mesmo `metadata.source` (o path) como nome do arquivo no `toFile`. A decisão de usar `metadata.source` em vez de `path.basename(metadata.source)` está **dentro do LangChain**, não no seu código.
122
+
123
+ ### Por que não dá para “enganar” o loader?
124
+
125
+ **Tentativa 1: Criar o arquivo em um path curto**
126
+
127
+ ```typescript
128
+ // Ex: ./whisper-temp.webm
129
+ const tempFilePath = path.join(process.cwd(), "whisper-temp.webm");
130
+ ```
131
+
132
+ Ainda assim, `metadata.source` será algo como `C:\projeto\whisper-temp.webm`. O path continua completo; o LangChain continua passando ele para `toFile`.
133
+
134
+ **Tentativa 2: Passar um Blob em vez de path**
135
+
136
+ O loader aceita `string | Blob`. Se você passar um Blob, `metadata.source === "blob"` e ele usa `metadata.blobType` (ex: `"audio/webm"`). O Blob não tem path, então não há problema de Windows.
137
+
138
+ Porém: o LangChain não expõe uma forma simples de criar o loader a partir de um Buffer/Blob com controle total do fluxo. E o `BufferLoader` espera um Blob do browser ou um path de arquivo – em Node.js, o uso típico é com path.
139
+
140
+ **Tentativa 3: Wrapper ou monkey-patch**
141
+
142
+ Você poderia tentar interceptar ou substituir o loader, mas:
143
+
144
+ - O loader é instanciado internamente
145
+ - Você não controla o que é passado para `toFile`
146
+ - Faria seu código depender de detalhes internos do LangChain, frágeis a atualizações
147
+
148
+ ---
149
+
150
+ ## Correção sugerida (no LangChain)
151
+
152
+ No arquivo `openai_whisper_audio.ts` do LangChain:
153
+
154
+ ```typescript
155
+ // Antes (com bug)
156
+ const fileName =
157
+ metadata.source === "blob" ? metadata.blobType : metadata.source;
158
+
159
+ // Depois (corrigido)
160
+ const fileName =
161
+ metadata.source === "blob"
162
+ ? metadata.blobType
163
+ : path.basename(metadata.source);
164
+ ```
165
+
166
+ Assim, quando `metadata.source` for um path de arquivo, só o nome do arquivo (ex: `whisper-xxx.webm`) é enviado para `toFile`.
167
+
168
+ ---
169
+
170
+ ## Suas opções práticas
171
+
172
+ 1. **Abrir um PR no LangChain** com essa correção e aguardar o merge.
173
+ 2. **Usar `patch-package`** para aplicar essa alteração automaticamente no `node_modules` após cada `pnpm install`.
174
+ 3. **Usar a API da OpenAI diretamente** (sem o loader do LangChain) e controlar o nome do arquivo no `toFile`.
175
+
176
+ ---
177
+
178
+ ## Diagrama do fluxo
179
+
180
+ ```
181
+ ┌─────────────────────────────────────────────────────────────────────────┐
182
+ │ SEU CÓDIGO │
183
+ │ │
184
+ │ createTempFile() → "C:\Users\...\Temp\whisper-xxx.webm" │
185
+ │ │ │
186
+ │ ▼ │
187
+ │ new OpenAIWhisperAudio(tempFilePath) │
188
+ └─────────────────────────────────────────────────────────────────────────┘
189
+
190
+ │ path completo
191
+
192
+ ┌─────────────────────────────────────────────────────────────────────────┐
193
+ │ LANGCHAIN (você não controla) │
194
+ │ │
195
+ │ 1. Lê arquivo do path │
196
+ │ 2. metadata.source = path ← guarda path completo │
197
+ │ 3. fileName = metadata.source ← BUG: usa path completo │
198
+ │ 4. toFile(buffer, fileName) ← envia path para OpenAI │
199
+ └─────────────────────────────────────────────────────────────────────────┘
200
+
201
+ │ multipart com filename = path
202
+
203
+ ┌─────────────────────────────────────────────────────────────────────────┐
204
+ │ API OPENAI │
205
+ │ │
206
+ │ Recebe: filename="C:\Users\...\whisper-xxx.webm" │
207
+ │ Problema: backslashes, path longo → pode falhar no Windows │
208
+ └─────────────────────────────────────────────────────────────────────────┘
209
+ ```
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@luanpoppe/ai",
3
- "version": "1.0.8",
3
+ "version": "1.0.10",
4
4
  "description": "",
5
5
  "main": "dist/index.js",
6
6
  "keywords": [],
@@ -8,6 +8,7 @@
8
8
  "license": "ISC",
9
9
  "dependencies": {
10
10
  "@langchain/community": "^1.1.10",
11
+ "openai": "^6.17.0",
11
12
  "@langchain/core": "^1.1.16",
12
13
  "@langchain/google-genai": "^2.1.12",
13
14
  "@langchain/openai": "^1.2.3",
@@ -13,3 +13,18 @@ export type AudioMimeType =
13
13
  | "audio/flac"
14
14
  | "audio/aac"
15
15
  | "audio/opus";
16
+
17
+ export const MIME_TO_EXTENSION: Record<AudioMimeType, string> = {
18
+ "audio/mpeg": "mp3",
19
+ "audio/mp3": "mp3",
20
+ "audio/wav": "wav",
21
+ "audio/wave": "wav",
22
+ "audio/x-wav": "wav",
23
+ "audio/mp4": "mp4",
24
+ "audio/m4a": "m4a",
25
+ "audio/webm": "webm",
26
+ "audio/ogg": "ogg",
27
+ "audio/flac": "flac",
28
+ "audio/aac": "aac",
29
+ "audio/opus": "opus",
30
+ };
package/src/index.ts CHANGED
@@ -137,7 +137,7 @@ export class Langchain {
137
137
  getRawAgent(
138
138
  params: LangchainCallParams,
139
139
  outputSchema?: z.ZodSchema | undefined
140
- ) {
140
+ ): { agent: ReturnType<typeof createAgent> } {
141
141
  const agent = createAgent({
142
142
  ...this.standardAgent(params),
143
143
  responseFormat: outputSchema as any,
@@ -217,4 +217,7 @@ export type {
217
217
  AudioContentBlock,
218
218
  HumanMessageWithAudioOptions,
219
219
  } from "./langchain/messages";
220
- export type { WhisperTranscriptionOptions } from "./langchain/audio-transcription";
220
+ export type {
221
+ WhisperModel,
222
+ WhisperTranscriptionOptions,
223
+ } from "./langchain/audio-transcription";
@@ -1,70 +1,87 @@
1
- import { Document } from "@langchain/core/documents";
2
1
  import * as fs from "fs";
3
- import type { AudioBuffer } from "../@types/audio";
4
- import { OpenAIWhisperAudio } from "@langchain/community/document_loaders/fs/openai_whisper_audio";
5
- import { FilesUtils } from "../utils/files-utils";
2
+ import * as path from "path";
3
+ import OpenAI from "openai";
4
+ import { toFile } from "openai";
5
+ import type { AudioBuffer, AudioMimeType } from "../@types/audio";
6
+ import { MIME_TO_EXTENSION } from "../@types/audio";
7
+
8
+ /** Modelos disponíveis na API de transcrição OpenAI (Speech-to-Text) */
9
+ export type WhisperModel =
10
+ | "whisper-1"
11
+ | "gpt-4o-transcribe"
12
+ | "gpt-4o-mini-transcribe"
13
+ | "gpt-4o-mini-transcribe-2025-12-15"
14
+ | "gpt-4o-transcribe-diarize";
6
15
 
7
16
  export type WhisperTranscriptionOptions = {
8
- language?: string;
17
+ /** Modelo de transcrição. Padrão: "whisper-1". gpt-4o-transcribe e gpt-4o-mini-transcribe têm maior qualidade. */
18
+ model?: WhisperModel;
19
+ languageIn2Digits?: string;
9
20
  prompt?: string;
10
21
  responseFormat?: "json" | "text" | "srt" | "verbose_json" | "vtt";
11
22
  temperature?: number;
12
23
  timestampGranularities?: ("word" | "segment")[];
24
+ /** Formato do áudio: extensão ("mp3", "wav", "webm") ou MIME type ("audio/wav", "audio/webm") */
25
+ format?: string | AudioMimeType;
13
26
  };
14
27
 
15
- export class LangchainAudioTranscription {
16
- private static extractTextFromDocs(docs: Document[]): string {
17
- if (docs.length === 0) {
18
- throw new Error("Nenhum documento foi retornado pela transcrição");
19
- }
20
- const firstDoc = docs[0];
21
- if (!firstDoc) {
22
- throw new Error("Documento vazio retornado pela transcrição");
23
- }
24
- // LangChain.js usa pageContent (camelCase), não page_content
25
- return firstDoc.pageContent;
28
+ function getExtension(format?: string | AudioMimeType): string {
29
+ if (!format) return "mp3";
30
+ if (format.startsWith("audio/")) {
31
+ return MIME_TO_EXTENSION[format as AudioMimeType] ?? "mp3";
26
32
  }
33
+ return format.replace(/^\./, "");
34
+ }
35
+
36
+ function toBuffer(audioBuffer: AudioBuffer): Buffer {
37
+ if (audioBuffer instanceof Buffer) return audioBuffer;
38
+ if (audioBuffer instanceof ArrayBuffer) return Buffer.from(audioBuffer);
39
+ return Buffer.from(audioBuffer as Uint8Array);
40
+ }
27
41
 
42
+ export class LangchainAudioTranscription {
28
43
  static async transcribeWithWhisper(
29
44
  audioBuffer: AudioBuffer,
30
45
  options: WhisperTranscriptionOptions = {},
31
46
  openAIApiKey?: string
32
47
  ): Promise<string> {
33
- const tempFilePath = FilesUtils.createTempFile(audioBuffer, "whisper");
48
+ if (openAIApiKey) {
49
+ process.env.OPENAI_API_KEY = openAIApiKey;
50
+ }
34
51
 
35
- try {
36
- // Configura a API key se fornecida
37
- if (openAIApiKey) {
38
- process.env.OPENAI_API_KEY = openAIApiKey;
39
- }
52
+ const buffer = toBuffer(audioBuffer);
53
+ const extension = getExtension(options.format);
54
+ const fileName = `whisper-${Date.now()}.${extension}`;
40
55
 
41
- const transcriptionParams: any = {
42
- response_format: options.responseFormat || "text",
43
- };
56
+ const file = await toFile(buffer, fileName);
44
57
 
45
- if (options.language) {
46
- transcriptionParams.language = options.language;
47
- }
48
- if (options.prompt) {
49
- transcriptionParams.prompt = options.prompt;
50
- }
51
- if (options.temperature !== undefined) {
52
- transcriptionParams.temperature = options.temperature;
53
- }
54
- if (options.timestampGranularities) {
55
- transcriptionParams.timestamp_granularities =
56
- options.timestampGranularities;
57
- }
58
+ const openai = new OpenAI();
58
59
 
59
- const loader = new OpenAIWhisperAudio(tempFilePath, {
60
- transcriptionCreateParams: transcriptionParams,
61
- });
60
+ const transcriptionParams: OpenAI.Audio.TranscriptionCreateParams = {
61
+ file,
62
+ model: options.model ?? "whisper-1",
63
+ response_format: options.responseFormat ?? "text",
64
+ };
62
65
 
63
- const docs: Document[] = await loader.load();
64
- return this.extractTextFromDocs(docs);
65
- } finally {
66
- FilesUtils.cleanupTempFile(tempFilePath);
66
+ if (options.languageIn2Digits) {
67
+ transcriptionParams.language = options.languageIn2Digits;
68
+ }
69
+ if (options.prompt) {
70
+ transcriptionParams.prompt = options.prompt;
71
+ }
72
+ if (options.temperature !== undefined) {
73
+ transcriptionParams.temperature = options.temperature;
67
74
  }
75
+ if (options.timestampGranularities) {
76
+ transcriptionParams.timestamp_granularities =
77
+ options.timestampGranularities;
78
+ }
79
+
80
+ const response = await openai.audio.transcriptions.create(
81
+ transcriptionParams
82
+ );
83
+
84
+ return typeof response === "string" ? response : response.text;
68
85
  }
69
86
 
70
87
  static async transcribeFileWithWhisper(
@@ -77,6 +94,12 @@ export class LangchainAudioTranscription {
77
94
  }
78
95
 
79
96
  const audioBuffer = fs.readFileSync(filePath);
80
- return this.transcribeWithWhisper(audioBuffer, options, openAIApiKey);
97
+ const format =
98
+ options.format ?? (path.extname(filePath).replace(/^\./, "") || "mp3");
99
+ return this.transcribeWithWhisper(
100
+ audioBuffer,
101
+ { ...options, format },
102
+ openAIApiKey
103
+ );
81
104
  }
82
105
  }
@@ -118,7 +118,7 @@ export class LangchainMessages {
118
118
  // Prepara opções de transcrição - só inclui language se não houver mimeType
119
119
  // Com exactOptionalPropertyTypes: true, não podemos passar undefined explicitamente
120
120
  const transcriptionOptions = mimeType
121
- ? ({} as { language?: string })
121
+ ? { format: mimeType }
122
122
  : { language: "pt" };
123
123
 
124
124
  const transcribedText =
@@ -1,17 +1,30 @@
1
1
  import * as fs from "fs";
2
2
  import * as path from "path";
3
3
  import * as os from "os";
4
- import type { AudioBuffer } from "../@types/audio";
4
+ import {
5
+ MIME_TO_EXTENSION,
6
+ type AudioBuffer,
7
+ type AudioMimeType,
8
+ } from "../@types/audio";
5
9
 
6
10
  export class FilesUtils {
7
11
  static createTempFile(
8
12
  audioBuffer: AudioBuffer,
9
- prefix: string = "audio"
13
+ prefix: string = "audio",
14
+ format?: string | AudioMimeType
10
15
  ): string {
16
+ const extension = format
17
+ ? format.startsWith("audio/")
18
+ ? MIME_TO_EXTENSION[format as AudioMimeType] ?? "mp3"
19
+ : format.replace(/^\./, "")
20
+ : "mp3";
21
+
11
22
  const tempDir = os.tmpdir();
12
23
  const tempFilePath = path.join(
13
24
  tempDir,
14
- `${prefix}-${Date.now()}-${Math.random().toString(36).substring(7)}.mp3`
25
+ `${prefix}-${Date.now()}-${Math.random()
26
+ .toString(36)
27
+ .substring(7)}.${extension}`
15
28
  );
16
29
 
17
30
  // Converte o buffer para Buffer se necessário
@@ -1,287 +1,180 @@
1
1
  import { describe, it, expect, vi, beforeEach } from "vitest";
2
- import { LangchainAudioTranscription } from "../../../src/langchain/audio-transcription";
2
+ import { LangchainAudioTranscription } from "../../../src/langchain/audio-transcription.js";
3
3
  import * as fs from "fs";
4
- import * as os from "os";
5
- import * as path from "path";
6
4
 
7
- // Mock do fs e os - cria arquivos reais no sistema de arquivos temporário
8
- import * as realFs from "fs";
9
- import * as realOs from "os";
10
- import * as realPath from "path";
11
-
12
- // Calcula tempDir usando os módulos reais (antes dos mocks)
13
- const tempDir = realPath.join(realOs.tmpdir(), "langchain-audio-test");
14
-
15
- // Garante que o diretório temporário existe
16
- if (!realFs.existsSync(tempDir)) {
17
- realFs.mkdirSync(tempDir, { recursive: true });
18
- }
19
-
20
- vi.mock("fs", () => {
21
- // Usa require para acessar o módulo real diretamente
22
- const actualFs = require("fs");
23
-
24
- const writeFileSyncSpy = vi.fn((filePath: string, data: Buffer) => {
25
- // Cria o arquivo real no sistema de arquivos
26
- actualFs.writeFileSync(filePath, data);
27
- });
28
-
29
- const readFileSyncSpy = vi.fn((filePath: string) => {
30
- if (actualFs.existsSync(filePath)) {
31
- return actualFs.readFileSync(filePath);
32
- }
33
- return Buffer.from("fake audio data");
34
- });
35
-
36
- const existsSyncSpy = vi.fn((filePath: string) => {
37
- return actualFs.existsSync(filePath) || filePath.startsWith("/path/to/");
38
- });
39
-
40
- const unlinkSyncSpy = vi.fn((filePath: string) => {
41
- if (actualFs.existsSync(filePath)) {
42
- actualFs.unlinkSync(filePath);
43
- }
44
- });
45
-
46
- return {
47
- ...actualFs,
48
- writeFileSync: writeFileSyncSpy,
49
- readFileSync: readFileSyncSpy,
50
- existsSync: existsSyncSpy,
51
- unlinkSync: unlinkSyncSpy,
52
- };
5
+ const mockTranscriptionsCreate = vi.fn().mockResolvedValue({
6
+ text: "Texto transcrito do áudio",
53
7
  });
54
8
 
55
- vi.mock("os", () => {
56
- // Calcula tempDir dentro do mock usando os módulos reais
57
- const realOs = require("os");
58
- const realPath = require("path");
59
- const tempDirValue = realPath.join(realOs.tmpdir(), "langchain-audio-test");
60
-
9
+ vi.mock("openai", () => {
61
10
  return {
62
- tmpdir: vi.fn(() => tempDirValue),
11
+ default: class MockOpenAI {
12
+ audio = {
13
+ transcriptions: {
14
+ create: mockTranscriptionsCreate,
15
+ },
16
+ };
17
+ },
18
+ toFile: async (buffer: Buffer, filename: string) => {
19
+ return new File([new Uint8Array(buffer)], filename, {
20
+ type: "audio/mpeg",
21
+ });
22
+ },
63
23
  };
64
24
  });
65
25
 
66
- // Mock do módulo - precisa interceptar o require() dinâmico
67
- // Como o código usa require() dinâmico dentro de try-catch, precisamos garantir
68
- // que o mock seja aplicado antes do código ser executado
69
- vi.mock("@langchain/community/document_loaders/fs/openai_whisper_audio", () => {
70
- // Importa o fs mockado para verificar arquivos
71
- const fs = require("fs");
72
-
73
- class MockOpenAIWhisperAudio {
74
- constructor(public filePath: string, public options?: any) {
75
- // Verifica se o arquivo existe usando o fs mockado
76
- // O arquivo já deve ter sido criado pelo writeFileSync antes desta chamada
77
- if (!fs.existsSync(filePath)) {
78
- // Se não existe, lança o mesmo erro que o loader real lançaria
79
- const error: any = new Error(`ENOENT: no such file or directory, open '${filePath}'`);
80
- error.code = "ENOENT";
81
- error.errno = -4058;
82
- error.syscall = "open";
83
- error.path = filePath;
84
- throw error;
85
- }
86
- }
87
-
88
- async load() {
89
- return [
90
- {
91
- pageContent: "Texto transcrito do áudio",
92
- metadata: {},
93
- },
94
- ];
95
- }
96
- }
97
-
26
+ vi.mock("fs", () => {
27
+ const actualFs = require("fs");
98
28
  return {
99
- OpenAIWhisperAudio: MockOpenAIWhisperAudio,
29
+ ...actualFs,
30
+ existsSync: vi.fn((filePath: string) => {
31
+ return actualFs.existsSync(filePath) || filePath.startsWith("/path/to/");
32
+ }),
33
+ readFileSync: vi.fn((filePath: string) => {
34
+ if (actualFs.existsSync(filePath)) {
35
+ return actualFs.readFileSync(filePath);
36
+ }
37
+ return Buffer.from("fake audio data");
38
+ }),
100
39
  };
101
40
  });
102
41
 
103
- // Mock também precisa interceptar o require() dinâmico usado no código
104
- // Vamos mockar o módulo de forma que o require() pegue o mock
105
-
106
42
  describe("LangchainAudioTranscription", () => {
107
43
  beforeEach(() => {
108
44
  vi.clearAllMocks();
109
-
110
- // Limpa arquivos temporários criados nos testes anteriores
111
- if (realFs.existsSync(tempDir)) {
112
- const files = realFs.readdirSync(tempDir);
113
- files.forEach((file) => {
114
- const filePath = realPath.join(tempDir, file);
115
- try {
116
- realFs.unlinkSync(filePath);
117
- } catch (error) {
118
- // Ignora erros ao remover arquivos
119
- }
120
- });
121
- }
122
- });
123
-
124
- afterAll(() => {
125
- // Limpa diretório temporário após todos os testes
126
- if (realFs.existsSync(tempDir)) {
127
- try {
128
- realFs.rmSync(tempDir, { recursive: true, force: true });
129
- } catch (error) {
130
- // Ignora erros ao remover diretório
131
- }
132
- }
45
+ mockTranscriptionsCreate.mockResolvedValue({
46
+ text: "Texto transcrito do áudio",
47
+ });
133
48
  });
134
49
 
135
50
  describe("transcribeWithWhisper", () => {
136
51
  it("deve transcrever áudio usando Whisper", async () => {
137
- // Mocka diretamente o módulo após ser carregado
138
- const audioModulePath = "@langchain/community/document_loaders/fs/openai_whisper_audio";
139
- const audioModule = require(audioModulePath);
140
-
141
- class MockLoader {
142
- constructor(public filePath: string) {
143
- // Usa o fs mockado importado
144
- if (!fs.existsSync(filePath)) {
145
- throw new Error(`ENOENT: no such file or directory, open '${filePath}'`);
146
- }
147
- }
148
- async load() {
149
- return [{ pageContent: "Texto transcrito do áudio", metadata: {} }];
150
- }
151
- }
152
-
153
- // Substitui temporariamente a classe
154
- const originalLoader = audioModule.OpenAIWhisperAudio;
155
- audioModule.OpenAIWhisperAudio = MockLoader;
156
-
157
- // Recarrega o módulo para pegar o mock
158
- vi.resetModules();
159
- const transcriptionModule = await import("../../../src/langchain/audio-transcription");
160
-
161
- // Força a reimportação do loader mockado
162
- const newAudioModule = require(audioModulePath);
163
- newAudioModule.OpenAIWhisperAudio = MockLoader;
164
-
165
- try {
166
- const audioBuffer = Buffer.from("fake audio data");
167
-
168
- const result = await transcriptionModule.LangchainAudioTranscription.transcribeWithWhisper(
169
- audioBuffer
170
- );
171
-
172
- expect(result).toBe("Texto transcrito do áudio");
173
- expect(vi.mocked(fs.writeFileSync)).toHaveBeenCalled();
174
- expect(vi.mocked(fs.unlinkSync)).toHaveBeenCalled();
175
- } finally {
176
- // Restaura o loader original
177
- audioModule.OpenAIWhisperAudio = originalLoader;
178
- }
52
+ const audioBuffer = Buffer.from("fake audio data");
53
+
54
+ const result = await LangchainAudioTranscription.transcribeWithWhisper(
55
+ audioBuffer
56
+ );
57
+
58
+ expect(result).toBe("Texto transcrito do áudio");
59
+ expect(mockTranscriptionsCreate).toHaveBeenCalledWith(
60
+ expect.objectContaining({
61
+ model: "whisper-1",
62
+ response_format: "text",
63
+ })
64
+ );
179
65
  });
180
66
 
181
67
  it("deve aceitar opções de transcrição", async () => {
182
- const audioModulePath = "@langchain/community/document_loaders/fs/openai_whisper_audio";
183
- const audioModule = require(audioModulePath);
184
-
185
- class MockLoader {
186
- constructor(public filePath: string) {
187
- // Usa o fs mockado importado
188
- if (!fs.existsSync(filePath)) {
189
- throw new Error(`ENOENT: no such file or directory, open '${filePath}'`);
190
- }
191
- }
192
- async load() {
193
- return [{ pageContent: "Texto transcrito do áudio", metadata: {} }];
194
- }
195
- }
196
-
197
- const originalLoader = audioModule.OpenAIWhisperAudio;
198
- audioModule.OpenAIWhisperAudio = MockLoader;
199
-
200
- vi.resetModules();
201
- const transcriptionModule = await import("../../../src/langchain/audio-transcription");
202
- const newAudioModule = require(audioModulePath);
203
- newAudioModule.OpenAIWhisperAudio = MockLoader;
204
-
205
- try {
206
- const audioBuffer = Buffer.from("fake audio data");
207
-
208
- await transcriptionModule.LangchainAudioTranscription.transcribeWithWhisper(audioBuffer, {
209
- language: "pt",
210
- responseFormat: "json",
211
- });
68
+ const audioBuffer = Buffer.from("fake audio data");
212
69
 
213
- expect(vi.mocked(fs.writeFileSync)).toHaveBeenCalled();
214
- } finally {
215
- audioModule.OpenAIWhisperAudio = originalLoader;
216
- }
70
+ await LangchainAudioTranscription.transcribeWithWhisper(audioBuffer, {
71
+ languageIn2Digits: "pt",
72
+ responseFormat: "json",
73
+ });
74
+
75
+ expect(mockTranscriptionsCreate).toHaveBeenCalledWith(
76
+ expect.objectContaining({
77
+ language: "pt",
78
+ response_format: "json",
79
+ })
80
+ );
217
81
  });
218
82
 
219
- it("deve limpar arquivo temporário mesmo em caso de erro", async () => {
83
+ it("deve aceitar modelo customizado", async () => {
220
84
  const audioBuffer = Buffer.from("fake audio data");
221
-
222
- // Mocka o módulo antes de importar
223
- vi.doMock("@langchain/community/document_loaders/fs/openai_whisper_audio", () => {
224
- const fs = require("fs");
225
-
226
- class MockLoaderWithError {
227
- constructor(public filePath: string) {
228
- if (!fs.existsSync(filePath)) {
229
- throw new Error(`ENOENT: no such file or directory, open '${filePath}'`);
230
- }
231
- }
232
- async load() {
233
- throw new Error("Erro de transcrição");
234
- }
235
- }
236
-
237
- return { OpenAIWhisperAudio: MockLoaderWithError };
85
+
86
+ await LangchainAudioTranscription.transcribeWithWhisper(audioBuffer, {
87
+ model: "gpt-4o-transcribe",
238
88
  });
239
-
240
- vi.resetModules();
241
- const transcriptionModule = await import("../../../src/langchain/audio-transcription");
242
-
89
+
90
+ expect(mockTranscriptionsCreate).toHaveBeenCalledWith(
91
+ expect.objectContaining({
92
+ model: "gpt-4o-transcribe",
93
+ })
94
+ );
95
+ });
96
+
97
+ it("deve usar whisper-1 como padrão quando model não é informado", async () => {
98
+ const audioBuffer = Buffer.from("fake audio data");
99
+
100
+ await LangchainAudioTranscription.transcribeWithWhisper(audioBuffer);
101
+
102
+ expect(mockTranscriptionsCreate).toHaveBeenCalledWith(
103
+ expect.objectContaining({
104
+ model: "whisper-1",
105
+ })
106
+ );
107
+ });
108
+
109
+ it("deve aceitar formato de áudio nas opções (extensão)", async () => {
110
+ const audioBuffer = Buffer.from("fake wav audio data");
111
+
112
+ const result = await LangchainAudioTranscription.transcribeWithWhisper(
113
+ audioBuffer,
114
+ { format: "wav" }
115
+ );
116
+
117
+ expect(result).toBe("Texto transcrito do áudio");
118
+ expect(mockTranscriptionsCreate).toHaveBeenCalledWith(
119
+ expect.objectContaining({
120
+ file: expect.anything(),
121
+ })
122
+ );
123
+ const createCall = mockTranscriptionsCreate.mock.calls[0]?.[0];
124
+ expect(createCall?.file).toBeDefined();
125
+ const file = createCall?.file as { name?: string };
126
+ expect(file?.name).toMatch(/\.wav$/);
127
+ });
128
+
129
+ it("deve aceitar MIME type como formato", async () => {
130
+ const audioBuffer = Buffer.from("fake webm audio data");
131
+
132
+ const result = await LangchainAudioTranscription.transcribeWithWhisper(
133
+ audioBuffer,
134
+ { format: "audio/webm" }
135
+ );
136
+
137
+ expect(result).toBe("Texto transcrito do áudio");
138
+ const createCall = mockTranscriptionsCreate.mock.calls[0]?.[0];
139
+ const file = createCall?.file as { name?: string };
140
+ expect(file?.name).toMatch(/\.webm$/);
141
+ });
142
+
143
+ it("deve propagar erro da API", async () => {
144
+ mockTranscriptionsCreate.mockRejectedValueOnce(
145
+ new Error("Erro de transcrição")
146
+ );
147
+
148
+ const audioBuffer = Buffer.from("fake audio data");
149
+
243
150
  await expect(
244
- transcriptionModule.LangchainAudioTranscription.transcribeWithWhisper(audioBuffer)
151
+ LangchainAudioTranscription.transcribeWithWhisper(audioBuffer)
245
152
  ).rejects.toThrow("Erro de transcrição");
246
-
247
- // Verifica que tentou remover o arquivo temporário
248
- expect(vi.mocked(fs.unlinkSync)).toHaveBeenCalled();
249
153
  });
250
154
  });
251
155
 
252
156
  describe("transcribeFileWithWhisper", () => {
253
157
  it("deve transcrever arquivo usando Whisper", async () => {
254
- // Limpa o mock anterior e cria um novo mock
255
- vi.doUnmock("@langchain/community/document_loaders/fs/openai_whisper_audio");
256
- vi.doMock("@langchain/community/document_loaders/fs/openai_whisper_audio", () => {
257
- const fs = require("fs");
258
-
259
- class MockLoader {
260
- constructor(public filePath: string) {
261
- if (!fs.existsSync(filePath)) {
262
- throw new Error(`ENOENT: no such file or directory, open '${filePath}'`);
263
- }
264
- }
265
- async load() {
266
- return [{ pageContent: "Texto transcrito do áudio", metadata: {} }];
267
- }
268
- }
269
-
270
- return { OpenAIWhisperAudio: MockLoader };
271
- });
272
-
273
- vi.resetModules();
274
- const transcriptionModule = await import("../../../src/langchain/audio-transcription");
275
- const fs = require("fs");
276
-
277
158
  const filePath = "/path/to/audio.mp3";
278
159
 
279
160
  const result =
280
- await transcriptionModule.LangchainAudioTranscription.transcribeFileWithWhisper(filePath);
161
+ await LangchainAudioTranscription.transcribeFileWithWhisper(filePath);
162
+
163
+ expect(result).toBe("Texto transcrito do áudio");
164
+ expect(vi.mocked(fs.readFileSync)).toHaveBeenCalledWith(filePath);
165
+ expect(mockTranscriptionsCreate).toHaveBeenCalled();
166
+ });
167
+
168
+ it("deve extrair formato da extensão do arquivo quando format não é informado", async () => {
169
+ const filePath = "/path/to/audio.wav";
170
+
171
+ const result =
172
+ await LangchainAudioTranscription.transcribeFileWithWhisper(filePath);
281
173
 
282
174
  expect(result).toBe("Texto transcrito do áudio");
283
- // Verifica que readFileSync foi chamado (pode não ser spy após resetModules)
284
- expect(fs.readFileSync).toBeDefined();
175
+ const createCall = mockTranscriptionsCreate.mock.calls[0]?.[0];
176
+ const file = createCall?.file as { name?: string };
177
+ expect(file?.name).toMatch(/\.wav$/);
285
178
  });
286
179
 
287
180
  it("deve lançar erro se arquivo não existir", async () => {