@drax/ai-back 3.42.0 → 3.43.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/dist/agents/DraxAgent.js +2 -0
  2. package/dist/controllers/AIGenericController.js +22 -0
  3. package/dist/controllers/DraxAgentController.js +22 -0
  4. package/dist/index.js +2 -1
  5. package/dist/providers/ai/GoogleAiProvider.js +4 -1
  6. package/dist/providers/ai/OllamaAiProvider.js +4 -1
  7. package/dist/providers/ai/OpenAiProvider.js +4 -1
  8. package/dist/services/PromptAudioService.js +68 -0
  9. package/package.json +3 -3
  10. package/src/agents/DraxAgent.ts +2 -0
  11. package/src/controllers/AIGenericController.ts +24 -0
  12. package/src/controllers/DraxAgentController.ts +24 -0
  13. package/src/index.ts +8 -0
  14. package/src/interfaces/IAIProvider.ts +38 -1
  15. package/src/interfaces/IDraxAgent.ts +4 -0
  16. package/src/providers/ai/GoogleAiProvider.ts +4 -1
  17. package/src/providers/ai/OllamaAiProvider.ts +4 -1
  18. package/src/providers/ai/OpenAiProvider.ts +4 -1
  19. package/src/services/PromptAudioService.ts +87 -0
  20. package/test/DraxAgent.test.ts +64 -0
  21. package/test/PromptAudioService.test.ts +115 -0
  22. package/tsconfig.tsbuildinfo +1 -1
  23. package/types/agents/DraxAgent.d.ts.map +1 -1
  24. package/types/controllers/AIGenericController.d.ts.map +1 -1
  25. package/types/controllers/DraxAgentController.d.ts.map +1 -1
  26. package/types/index.d.ts +4 -3
  27. package/types/index.d.ts.map +1 -1
  28. package/types/interfaces/IAIProvider.d.ts +32 -1
  29. package/types/interfaces/IAIProvider.d.ts.map +1 -1
  30. package/types/interfaces/IDraxAgent.d.ts +3 -1
  31. package/types/interfaces/IDraxAgent.d.ts.map +1 -1
  32. package/types/providers/ai/GoogleAiProvider.d.ts.map +1 -1
  33. package/types/providers/ai/OllamaAiProvider.d.ts.map +1 -1
  34. package/types/providers/ai/OpenAiProvider.d.ts.map +1 -1
  35. package/types/services/PromptAudioService.d.ts +9 -0
  36. package/types/services/PromptAudioService.d.ts.map +1 -0
@@ -78,6 +78,7 @@ class DraxAgent {
78
78
  userAgent: input.userAgent,
79
79
  tenant: input.tenantId ?? session.tenantId ?? null,
80
80
  user: input.userId ?? session.userId ?? null,
81
+ audioResponse: input.audioResponse,
81
82
  });
82
83
  const assistantMessage = this.normalizeOutput(response.output);
83
84
  const now = new Date();
@@ -100,6 +101,7 @@ class DraxAgent {
100
101
  inputTokens: response.inputTokens,
101
102
  outputTokens: response.outputTokens,
102
103
  time: response.time,
104
+ ...(response.audio ? { audio: response.audio } : {}),
103
105
  };
104
106
  }
105
107
  async createSession(input = {}) {
@@ -35,6 +35,27 @@ const PromptInputFileSchema = z.object({
35
35
  mimetype: z.string().optional(),
36
36
  url: z.string().optional(),
37
37
  });
38
+ const PromptAudioVoiceSettingsSchema = z.object({
39
+ stability: z.number().min(0).max(1).optional(),
40
+ similarityBoost: z.number().min(0).max(1).optional(),
41
+ style: z.number().min(0).max(1).optional(),
42
+ useSpeakerBoost: z.boolean().optional(),
43
+ speed: z.number().positive().optional(),
44
+ });
45
+ const PromptAudioParamsSchema = z.object({
46
+ enabled: z.boolean().optional(),
47
+ provider: z.string().optional(),
48
+ voiceId: z.string().optional(),
49
+ model: z.string().optional(),
50
+ outputFormat: z.string().optional(),
51
+ voiceSettings: PromptAudioVoiceSettingsSchema.optional(),
52
+ previousText: z.string().optional(),
53
+ nextText: z.string().optional(),
54
+ languageCode: z.string().optional(),
55
+ seed: z.number().int().optional(),
56
+ operationTitle: z.string().optional(),
57
+ operationGroup: z.string().optional(),
58
+ });
38
59
  const GenericPromptRequestSchema = z.object({
39
60
  systemPrompt: z.string().min(1),
40
61
  userInput: z.string().optional(),
@@ -50,6 +71,7 @@ const GenericPromptRequestSchema = z.object({
50
71
  model: z.string().optional(),
51
72
  operationTitle: z.string().optional(),
52
73
  operationGroup: z.string().optional(),
74
+ audioResponse: z.union([z.boolean(), PromptAudioParamsSchema]).optional(),
53
75
  });
54
76
  class AIGenericController extends CommonController {
55
77
  async prompt(request, reply) {
@@ -28,6 +28,27 @@ const PromptInputFileSchema = z.object({
28
28
  mimetype: z.string().optional(),
29
29
  url: z.string().optional(),
30
30
  });
31
+ const PromptAudioVoiceSettingsSchema = z.object({
32
+ stability: z.number().min(0).max(1).optional(),
33
+ similarityBoost: z.number().min(0).max(1).optional(),
34
+ style: z.number().min(0).max(1).optional(),
35
+ useSpeakerBoost: z.boolean().optional(),
36
+ speed: z.number().positive().optional(),
37
+ });
38
+ const PromptAudioParamsSchema = z.object({
39
+ enabled: z.boolean().optional(),
40
+ provider: z.string().optional(),
41
+ voiceId: z.string().optional(),
42
+ model: z.string().optional(),
43
+ outputFormat: z.string().optional(),
44
+ voiceSettings: PromptAudioVoiceSettingsSchema.optional(),
45
+ previousText: z.string().optional(),
46
+ nextText: z.string().optional(),
47
+ languageCode: z.string().optional(),
48
+ seed: z.number().int().optional(),
49
+ operationTitle: z.string().optional(),
50
+ operationGroup: z.string().optional(),
51
+ });
31
52
  const AgentSessionRequestSchema = z.object({
32
53
  identifier: z.string().min(1).optional(),
33
54
  sessionId: z.string().optional(),
@@ -47,6 +68,7 @@ const AgentMessageRequestSchema = AgentSessionRequestSchema.extend({
47
68
  toolMaxIterations: z.number().optional(),
48
69
  operationTitle: z.string().optional(),
49
70
  operationGroup: z.string().optional(),
71
+ audioResponse: z.union([z.boolean(), PromptAudioParamsSchema]).optional(),
50
72
  });
51
73
  class DraxAgentController extends CommonController {
52
74
  constructor(options = {}) {
package/dist/index.js CHANGED
@@ -26,6 +26,7 @@ import { BuilderTool } from "./tools/BuilderTool.js";
26
26
  import { KnowledgeService } from "./services/KnowledgeService.js";
27
27
  import { AILogService } from "./services/AILogService.js";
28
28
  import { TTSGenericService } from "./services/TTSGenericService.js";
29
+ import { PromptAudioService } from "./services/PromptAudioService.js";
29
30
  import AILogPermissions from "./permissions/AILogPermissions.js";
30
31
  import AgentPermissions from "./permissions/AgentPermissions.js";
31
32
  import AgentSessionPermissions from "./permissions/AgentSessionPermissions.js";
@@ -45,7 +46,7 @@ import AgentSessionRoutes from "./routes/AgentSessionRoutes.js";
45
46
  import { DraxAgent } from "./agents/DraxAgent.js";
46
47
  export { OpenAiConfig, GoogleAiConfig, OllamaAiConfig, DeepSeekConfig, ElevenLabsTTSConfig, AILogSchema, AILogBaseSchema, TTSRequestSchema, TTSVoiceSettingsSchema, AILogModel, AILogMongoRepository, AILogSqliteRepository, OpenAiProviderFactory, GoogleAiProviderFactory, OllamaAiProviderFactory, DeepSeekAiProviderFactory, AiProviderFactory, ElevenLabsTTSProviderFactory, TTSProviderFactory, DraxAgentFactory, AILogServiceFactory, OpenAiProvider, GoogleAiProvider, OllamaAiProvider, DeepSeekAiProvider, ElevenLabsTTSProvider, BuilderTool,
47
48
  //Service
48
- KnowledgeService, AILogService, TTSGenericService,
49
+ KnowledgeService, AILogService, TTSGenericService, PromptAudioService,
49
50
  //Permissions
50
51
  AILogPermissions, AgentPermissions, AIPermissions, TTSPermissions, AgentSessionPermissions,
51
52
  //Controllers
@@ -1,5 +1,6 @@
1
1
  import { GoogleGenAI } from "@google/genai";
2
2
  import { toJSONSchema } from "zod";
3
+ import PromptAudioService from "../../services/PromptAudioService.js";
3
4
  class GoogleAiProvider {
4
5
  constructor(apiKey, model, visionModel, aiLogService) {
5
6
  if (!apiKey) {
@@ -330,6 +331,7 @@ class GoogleAiProvider {
330
331
  const endTime = performance.now();
331
332
  const time = endTime - startTime;
332
333
  const endedAt = new Date();
334
+ const audio = await PromptAudioService.build(input, output);
333
335
  await this.registerPromptLog(input, {
334
336
  model,
335
337
  systemPrompt,
@@ -346,7 +348,8 @@ class GoogleAiProvider {
346
348
  tokens,
347
349
  inputTokens,
348
350
  outputTokens,
349
- time
351
+ time,
352
+ ...(audio ? { audio } : {}),
350
353
  };
351
354
  }
352
355
  catch (e) {
@@ -1,4 +1,5 @@
1
1
  import { toJSONSchema } from "zod";
2
+ import PromptAudioService from "../../services/PromptAudioService.js";
2
3
  class OllamaAiProvider {
3
4
  constructor(baseUrl, model, visionModel, embeddingModel, aiLogService) {
4
5
  if (!baseUrl) {
@@ -305,6 +306,7 @@ class OllamaAiProvider {
305
306
  const endTime = performance.now();
306
307
  const time = endTime - startTime;
307
308
  const endedAt = new Date();
309
+ const audio = await PromptAudioService.build(input, output);
308
310
  await this.registerPromptLog(input, {
309
311
  model,
310
312
  systemPrompt,
@@ -321,7 +323,8 @@ class OllamaAiProvider {
321
323
  tokens,
322
324
  inputTokens,
323
325
  outputTokens,
324
- time
326
+ time,
327
+ ...(audio ? { audio } : {}),
325
328
  };
326
329
  }
327
330
  catch (e) {
@@ -1,5 +1,6 @@
1
1
  import OpenAI from "openai";
2
2
  import { zodResponseFormat } from "openai/helpers/zod";
3
+ import PromptAudioService from "../../services/PromptAudioService.js";
3
4
  class OpenAiProvider {
4
5
  constructor(apiKey, model, visionModel, aiLogService) {
5
6
  if (!apiKey) {
@@ -265,6 +266,7 @@ class OpenAiProvider {
265
266
  const endTime = performance.now();
266
267
  const time = endTime - startTime;
267
268
  const endedAt = new Date();
269
+ const audio = await PromptAudioService.build(input, output);
268
270
  await this.registerPromptLog(input, {
269
271
  model,
270
272
  systemPrompt,
@@ -281,7 +283,8 @@ class OpenAiProvider {
281
283
  tokens,
282
284
  inputTokens,
283
285
  outputTokens,
284
- time
286
+ time,
287
+ ...(audio ? { audio } : {}),
285
288
  };
286
289
  }
287
290
  catch (e) {
@@ -0,0 +1,68 @@
1
+ import TTSProviderFactory from "../factory/tts/TTSProviderFactory.js";
2
+ class PromptAudioService {
3
+ static audioParams(input) {
4
+ if (!input.audioResponse) {
5
+ return null;
6
+ }
7
+ if (input.audioResponse === true) {
8
+ return {};
9
+ }
10
+ if (input.audioResponse.enabled === false) {
11
+ return null;
12
+ }
13
+ return input.audioResponse;
14
+ }
15
+ static outputToText(output) {
16
+ if (typeof output === "string") {
17
+ return output;
18
+ }
19
+ if (output === null || output === undefined) {
20
+ return "";
21
+ }
22
+ return JSON.stringify(output);
23
+ }
24
+ static async build(input, output) {
25
+ const audioParams = PromptAudioService.audioParams(input);
26
+ if (!audioParams) {
27
+ return undefined;
28
+ }
29
+ const text = PromptAudioService.outputToText(output).trim();
30
+ if (!text) {
31
+ return undefined;
32
+ }
33
+ const providerName = audioParams.provider ?? "ElevenLabs";
34
+ const ttsProvider = TTSProviderFactory.instance(providerName);
35
+ const response = await ttsProvider.textToSpeech({
36
+ text,
37
+ voiceId: audioParams.voiceId,
38
+ model: audioParams.model,
39
+ outputFormat: audioParams.outputFormat,
40
+ voiceSettings: audioParams.voiceSettings,
41
+ previousText: audioParams.previousText,
42
+ nextText: audioParams.nextText,
43
+ languageCode: audioParams.languageCode,
44
+ seed: audioParams.seed,
45
+ operationTitle: audioParams.operationTitle ?? input.operationTitle,
46
+ operationGroup: audioParams.operationGroup ?? input.operationGroup,
47
+ ip: input.ip,
48
+ userAgent: input.userAgent,
49
+ tenant: input.tenant,
50
+ user: input.user,
51
+ });
52
+ return {
53
+ audio: response.audio.toString("base64"),
54
+ contentType: response.contentType,
55
+ encoding: "base64",
56
+ meta: {
57
+ provider: response.provider,
58
+ model: response.model,
59
+ voiceId: response.voiceId,
60
+ outputFormat: response.outputFormat,
61
+ size: response.size,
62
+ time: response.time,
63
+ },
64
+ };
65
+ }
66
+ }
67
+ export default PromptAudioService;
68
+ export { PromptAudioService, };
package/package.json CHANGED
@@ -3,7 +3,7 @@
3
3
  "publishConfig": {
4
4
  "access": "public"
5
5
  },
6
- "version": "3.42.0",
6
+ "version": "3.43.0",
7
7
  "description": "Ai utils",
8
8
  "main": "dist/index.js",
9
9
  "types": "types/index.d.ts",
@@ -18,7 +18,7 @@
18
18
  "author": "Cristian Incarnato & Drax Team",
19
19
  "license": "ISC",
20
20
  "dependencies": {
21
- "@drax/ai-share": "^3.31.0",
21
+ "@drax/ai-share": "^3.43.0",
22
22
  "@drax/crud-back": "^3.39.0",
23
23
  "mongoose": "^8.23.0",
24
24
  "mongoose-paginate-v2": "^1.8.3"
@@ -46,5 +46,5 @@
46
46
  "typescript": "^5.9.3",
47
47
  "vitest": "^3.0.8"
48
48
  },
49
- "gitHead": "30b8ca8c49adb16b4617e61c497b554389c49dd1"
49
+ "gitHead": "9cb1cab8a6fe2a6c574d08596bc6287f5a5311a6"
50
50
  }
@@ -110,6 +110,7 @@ class DraxAgent {
110
110
  userAgent: input.userAgent,
111
111
  tenant: input.tenantId ?? session.tenantId ?? null,
112
112
  user: input.userId ?? session.userId ?? null,
113
+ audioResponse: input.audioResponse,
113
114
  });
114
115
 
115
116
  const assistantMessage = this.normalizeOutput(response.output);
@@ -134,6 +135,7 @@ class DraxAgent {
134
135
  inputTokens: response.inputTokens,
135
136
  outputTokens: response.outputTokens,
136
137
  time: response.time,
138
+ ...(response.audio ? {audio: response.audio} : {}),
137
139
  };
138
140
  }
139
141
 
@@ -42,6 +42,29 @@ const PromptInputFileSchema = z.object({
42
42
  url: z.string().optional(),
43
43
  })
44
44
 
45
+ const PromptAudioVoiceSettingsSchema = z.object({
46
+ stability: z.number().min(0).max(1).optional(),
47
+ similarityBoost: z.number().min(0).max(1).optional(),
48
+ style: z.number().min(0).max(1).optional(),
49
+ useSpeakerBoost: z.boolean().optional(),
50
+ speed: z.number().positive().optional(),
51
+ })
52
+
53
+ const PromptAudioParamsSchema = z.object({
54
+ enabled: z.boolean().optional(),
55
+ provider: z.string().optional(),
56
+ voiceId: z.string().optional(),
57
+ model: z.string().optional(),
58
+ outputFormat: z.string().optional(),
59
+ voiceSettings: PromptAudioVoiceSettingsSchema.optional(),
60
+ previousText: z.string().optional(),
61
+ nextText: z.string().optional(),
62
+ languageCode: z.string().optional(),
63
+ seed: z.number().int().optional(),
64
+ operationTitle: z.string().optional(),
65
+ operationGroup: z.string().optional(),
66
+ })
67
+
45
68
  const GenericPromptRequestSchema = z.object({
46
69
  systemPrompt: z.string().min(1),
47
70
  userInput: z.string().optional(),
@@ -57,6 +80,7 @@ const GenericPromptRequestSchema = z.object({
57
80
  model: z.string().optional(),
58
81
  operationTitle: z.string().optional(),
59
82
  operationGroup: z.string().optional(),
83
+ audioResponse: z.union([z.boolean(), PromptAudioParamsSchema]).optional(),
60
84
  })
61
85
 
62
86
  class AIGenericController extends CommonController {
@@ -35,6 +35,29 @@ const PromptInputFileSchema = z.object({
35
35
  url: z.string().optional(),
36
36
  });
37
37
 
38
+ const PromptAudioVoiceSettingsSchema = z.object({
39
+ stability: z.number().min(0).max(1).optional(),
40
+ similarityBoost: z.number().min(0).max(1).optional(),
41
+ style: z.number().min(0).max(1).optional(),
42
+ useSpeakerBoost: z.boolean().optional(),
43
+ speed: z.number().positive().optional(),
44
+ });
45
+
46
+ const PromptAudioParamsSchema = z.object({
47
+ enabled: z.boolean().optional(),
48
+ provider: z.string().optional(),
49
+ voiceId: z.string().optional(),
50
+ model: z.string().optional(),
51
+ outputFormat: z.string().optional(),
52
+ voiceSettings: PromptAudioVoiceSettingsSchema.optional(),
53
+ previousText: z.string().optional(),
54
+ nextText: z.string().optional(),
55
+ languageCode: z.string().optional(),
56
+ seed: z.number().int().optional(),
57
+ operationTitle: z.string().optional(),
58
+ operationGroup: z.string().optional(),
59
+ });
60
+
38
61
  const AgentSessionRequestSchema = z.object({
39
62
  identifier: z.string().min(1).optional(),
40
63
  sessionId: z.string().optional(),
@@ -55,6 +78,7 @@ const AgentMessageRequestSchema = AgentSessionRequestSchema.extend({
55
78
  toolMaxIterations: z.number().optional(),
56
79
  operationTitle: z.string().optional(),
57
80
  operationGroup: z.string().optional(),
81
+ audioResponse: z.union([z.boolean(), PromptAudioParamsSchema]).optional(),
58
82
  });
59
83
 
60
84
  class DraxAgentController extends CommonController {
package/src/index.ts CHANGED
@@ -27,6 +27,7 @@ import {BuilderTool} from "./tools/BuilderTool.js";
27
27
  import {KnowledgeService} from "./services/KnowledgeService.js";
28
28
  import {AILogService} from "./services/AILogService.js";
29
29
  import {TTSGenericService} from "./services/TTSGenericService.js";
30
+ import {PromptAudioService} from "./services/PromptAudioService.js";
30
31
  import AILogPermissions from "./permissions/AILogPermissions.js";
31
32
  import AgentPermissions from "./permissions/AgentPermissions.js";
32
33
  import AgentSessionPermissions from "./permissions/AgentSessionPermissions.js";
@@ -50,6 +51,9 @@ import type {
50
51
  IPromptContentPart,
51
52
  IPromptContentPartImage,
52
53
  IPromptContentPartText,
54
+ IPromptAudioParams,
55
+ IPromptAudioResponse,
56
+ IPromptAudioResponseMeta,
53
57
  IPromptImage,
54
58
  IPromptImageDetail,
55
59
  IPromptMessage,
@@ -108,6 +112,9 @@ export type {
108
112
  IPromptContentPart,
109
113
  IPromptContentPartImage,
110
114
  IPromptContentPartText,
115
+ IPromptAudioParams,
116
+ IPromptAudioResponse,
117
+ IPromptAudioResponseMeta,
111
118
  IPromptResponse,
112
119
  ITTSProvider,
113
120
  ITTSParams,
@@ -166,6 +173,7 @@ export {
166
173
  KnowledgeService,
167
174
  AILogService,
168
175
  TTSGenericService,
176
+ PromptAudioService,
169
177
  //Permissions
170
178
  AILogPermissions,
171
179
  AgentPermissions,
@@ -1,4 +1,5 @@
1
1
  import { ZodSchema } from 'zod'
2
+ import type {ITTSVoiceSettings} from "./ITTSProvider.js";
2
3
 
3
4
  type Role = 'user' | 'assistant' | 'system';
4
5
 
@@ -74,6 +75,38 @@ interface IPromptParams {
74
75
  userAgent?: string,
75
76
  tenant?: string | null,
76
77
  user?: string | null,
78
+ audioResponse?: boolean | IPromptAudioParams,
79
+ }
80
+
81
+ interface IPromptAudioParams {
82
+ enabled?: boolean,
83
+ provider?: string,
84
+ voiceId?: string,
85
+ model?: string,
86
+ outputFormat?: string,
87
+ voiceSettings?: ITTSVoiceSettings,
88
+ previousText?: string,
89
+ nextText?: string,
90
+ languageCode?: string,
91
+ seed?: number,
92
+ operationTitle?: string,
93
+ operationGroup?: string,
94
+ }
95
+
96
+ interface IPromptAudioResponseMeta {
97
+ provider: string,
98
+ model: string,
99
+ voiceId: string,
100
+ outputFormat?: string,
101
+ size: number,
102
+ time: number,
103
+ }
104
+
105
+ interface IPromptAudioResponse {
106
+ audio: string,
107
+ contentType: string,
108
+ encoding: 'base64',
109
+ meta: IPromptAudioResponseMeta,
77
110
  }
78
111
 
79
112
  interface IPromptResponse {
@@ -81,7 +114,8 @@ interface IPromptResponse {
81
114
  tokens: number,
82
115
  inputTokens: number,
83
116
  outputTokens: number,
84
- time: number
117
+ time: number,
118
+ audio?: IPromptAudioResponse,
85
119
  }
86
120
 
87
121
  interface IAIProvider {
@@ -92,6 +126,9 @@ export type {
92
126
  IAIProvider,
93
127
  IPromptParams,
94
128
  IPromptResponse,
129
+ IPromptAudioParams,
130
+ IPromptAudioResponse,
131
+ IPromptAudioResponseMeta,
95
132
  IPromptMessage,
96
133
  IPromptMemory,
97
134
  IPromptTool,
@@ -1,5 +1,7 @@
1
1
  import type {
2
2
  IAIProvider,
3
+ IPromptAudioParams,
4
+ IPromptAudioResponse,
3
5
  IPromptContentPart,
4
6
  IPromptImage,
5
7
  IPromptMemory,
@@ -67,6 +69,7 @@ interface DraxAgentMessageInput extends DraxAgentSessionInput {
67
69
  toolMaxIterations?: number;
68
70
  operationTitle?: string;
69
71
  operationGroup?: string;
72
+ audioResponse?: boolean | IPromptAudioParams;
70
73
  }
71
74
 
72
75
  interface DraxAgentMessageOutput {
@@ -79,6 +82,7 @@ interface DraxAgentMessageOutput {
79
82
  inputTokens: number;
80
83
  outputTokens: number;
81
84
  time: number;
85
+ audio?: IPromptAudioResponse;
82
86
  }
83
87
 
84
88
  interface DraxAgentSession {
@@ -17,6 +17,7 @@ import type {
17
17
  } from "../../interfaces/IAIProvider.js";
18
18
  import type {AILogService} from "../../services/AILogService.js";
19
19
  import type {IAILogBase} from "@drax/ai-share";
20
+ import PromptAudioService from "../../services/PromptAudioService.js";
20
21
 
21
22
  class GoogleAiProvider implements IAIProvider{
22
23
  protected _apiKey: string
@@ -446,6 +447,7 @@ class GoogleAiProvider implements IAIProvider{
446
447
  const endTime = performance.now()
447
448
  const time = endTime - startTime
448
449
  const endedAt = new Date()
450
+ const audio = await PromptAudioService.build(input, output)
449
451
 
450
452
  await this.registerPromptLog(input, {
451
453
  model,
@@ -464,7 +466,8 @@ class GoogleAiProvider implements IAIProvider{
464
466
  tokens,
465
467
  inputTokens,
466
468
  outputTokens,
467
- time
469
+ time,
470
+ ...(audio ? {audio} : {}),
468
471
  }
469
472
  } catch (e: any) {
470
473
  const endedAt = new Date()
@@ -9,6 +9,7 @@ import type {
9
9
  } from "../../interfaces/IAIProvider.js";
10
10
  import type {AILogService} from "../../services/AILogService.js";
11
11
  import type {IAILogBase} from "@drax/ai-share";
12
+ import PromptAudioService from "../../services/PromptAudioService.js";
12
13
 
13
14
  type OllamaMessage = {
14
15
  role: "system" | "user" | "assistant" | "tool",
@@ -426,6 +427,7 @@ class OllamaAiProvider implements IAIProvider{
426
427
  const endTime = performance.now()
427
428
  const time = endTime - startTime
428
429
  const endedAt = new Date()
430
+ const audio = await PromptAudioService.build(input, output)
429
431
 
430
432
  await this.registerPromptLog(input, {
431
433
  model,
@@ -444,7 +446,8 @@ class OllamaAiProvider implements IAIProvider{
444
446
  tokens,
445
447
  inputTokens,
446
448
  outputTokens,
447
- time
449
+ time,
450
+ ...(audio ? {audio} : {}),
448
451
  }
449
452
  } catch (e: any) {
450
453
  const endedAt = new Date()
@@ -10,6 +10,7 @@ import type {
10
10
  } from "../../interfaces/IAIProvider.js";
11
11
  import type {AILogService} from "../../services/AILogService.js";
12
12
  import type {IAILogBase} from "@drax/ai-share";
13
+ import PromptAudioService from "../../services/PromptAudioService.js";
13
14
 
14
15
  class OpenAiProvider implements IAIProvider{
15
16
  protected _apiKey: string
@@ -358,6 +359,7 @@ class OpenAiProvider implements IAIProvider{
358
359
  const endTime = performance.now()
359
360
  const time = endTime - startTime
360
361
  const endedAt = new Date()
362
+ const audio = await PromptAudioService.build(input, output)
361
363
 
362
364
  await this.registerPromptLog(input, {
363
365
  model,
@@ -376,7 +378,8 @@ class OpenAiProvider implements IAIProvider{
376
378
  tokens,
377
379
  inputTokens,
378
380
  outputTokens,
379
- time
381
+ time,
382
+ ...(audio ? {audio} : {}),
380
383
  }
381
384
  } catch (e: any) {
382
385
  const endedAt = new Date()
@@ -0,0 +1,87 @@
1
+ import TTSProviderFactory from "../factory/tts/TTSProviderFactory.js";
2
+ import type {IPromptAudioParams, IPromptAudioResponse, IPromptParams} from "../interfaces/IAIProvider.js";
3
+
4
+ class PromptAudioService {
5
+
6
+ static audioParams(input: IPromptParams): IPromptAudioParams | null {
7
+ if(!input.audioResponse){
8
+ return null
9
+ }
10
+
11
+ if(input.audioResponse === true){
12
+ return {}
13
+ }
14
+
15
+ if(input.audioResponse.enabled === false){
16
+ return null
17
+ }
18
+
19
+ return input.audioResponse
20
+ }
21
+
22
+ static outputToText(output: unknown): string {
23
+ if(typeof output === "string"){
24
+ return output
25
+ }
26
+
27
+ if(output === null || output === undefined){
28
+ return ""
29
+ }
30
+
31
+ return JSON.stringify(output)
32
+ }
33
+
34
+ static async build(input: IPromptParams, output: unknown): Promise<IPromptAudioResponse | undefined> {
35
+ const audioParams = PromptAudioService.audioParams(input)
36
+
37
+ if(!audioParams){
38
+ return undefined
39
+ }
40
+
41
+ const text = PromptAudioService.outputToText(output).trim()
42
+
43
+ if(!text){
44
+ return undefined
45
+ }
46
+
47
+ const providerName = audioParams.provider ?? "ElevenLabs"
48
+ const ttsProvider = TTSProviderFactory.instance(providerName)
49
+ const response = await ttsProvider.textToSpeech({
50
+ text,
51
+ voiceId: audioParams.voiceId,
52
+ model: audioParams.model,
53
+ outputFormat: audioParams.outputFormat,
54
+ voiceSettings: audioParams.voiceSettings,
55
+ previousText: audioParams.previousText,
56
+ nextText: audioParams.nextText,
57
+ languageCode: audioParams.languageCode,
58
+ seed: audioParams.seed,
59
+ operationTitle: audioParams.operationTitle ?? input.operationTitle,
60
+ operationGroup: audioParams.operationGroup ?? input.operationGroup,
61
+ ip: input.ip,
62
+ userAgent: input.userAgent,
63
+ tenant: input.tenant,
64
+ user: input.user,
65
+ })
66
+
67
+ return {
68
+ audio: response.audio.toString("base64"),
69
+ contentType: response.contentType,
70
+ encoding: "base64",
71
+ meta: {
72
+ provider: response.provider,
73
+ model: response.model,
74
+ voiceId: response.voiceId,
75
+ outputFormat: response.outputFormat,
76
+ size: response.size,
77
+ time: response.time,
78
+ },
79
+ }
80
+ }
81
+
82
+ }
83
+
84
+ export default PromptAudioService
85
+ export {
86
+ PromptAudioService,
87
+ }