@drax/ai-back 3.42.0 → 3.43.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/DraxAgent.js +2 -0
- package/dist/controllers/AIGenericController.js +22 -0
- package/dist/controllers/DraxAgentController.js +22 -0
- package/dist/index.js +2 -1
- package/dist/providers/ai/GoogleAiProvider.js +4 -1
- package/dist/providers/ai/OllamaAiProvider.js +4 -1
- package/dist/providers/ai/OpenAiProvider.js +4 -1
- package/dist/services/PromptAudioService.js +68 -0
- package/package.json +3 -3
- package/src/agents/DraxAgent.ts +2 -0
- package/src/controllers/AIGenericController.ts +24 -0
- package/src/controllers/DraxAgentController.ts +24 -0
- package/src/index.ts +8 -0
- package/src/interfaces/IAIProvider.ts +38 -1
- package/src/interfaces/IDraxAgent.ts +4 -0
- package/src/providers/ai/GoogleAiProvider.ts +4 -1
- package/src/providers/ai/OllamaAiProvider.ts +4 -1
- package/src/providers/ai/OpenAiProvider.ts +4 -1
- package/src/services/PromptAudioService.ts +87 -0
- package/test/DraxAgent.test.ts +64 -0
- package/test/PromptAudioService.test.ts +115 -0
- package/tsconfig.tsbuildinfo +1 -1
- package/types/agents/DraxAgent.d.ts.map +1 -1
- package/types/controllers/AIGenericController.d.ts.map +1 -1
- package/types/controllers/DraxAgentController.d.ts.map +1 -1
- package/types/index.d.ts +4 -3
- package/types/index.d.ts.map +1 -1
- package/types/interfaces/IAIProvider.d.ts +32 -1
- package/types/interfaces/IAIProvider.d.ts.map +1 -1
- package/types/interfaces/IDraxAgent.d.ts +3 -1
- package/types/interfaces/IDraxAgent.d.ts.map +1 -1
- package/types/providers/ai/GoogleAiProvider.d.ts.map +1 -1
- package/types/providers/ai/OllamaAiProvider.d.ts.map +1 -1
- package/types/providers/ai/OpenAiProvider.d.ts.map +1 -1
- package/types/services/PromptAudioService.d.ts +9 -0
- package/types/services/PromptAudioService.d.ts.map +1 -0
package/dist/agents/DraxAgent.js
CHANGED
|
@@ -78,6 +78,7 @@ class DraxAgent {
|
|
|
78
78
|
userAgent: input.userAgent,
|
|
79
79
|
tenant: input.tenantId ?? session.tenantId ?? null,
|
|
80
80
|
user: input.userId ?? session.userId ?? null,
|
|
81
|
+
audioResponse: input.audioResponse,
|
|
81
82
|
});
|
|
82
83
|
const assistantMessage = this.normalizeOutput(response.output);
|
|
83
84
|
const now = new Date();
|
|
@@ -100,6 +101,7 @@ class DraxAgent {
|
|
|
100
101
|
inputTokens: response.inputTokens,
|
|
101
102
|
outputTokens: response.outputTokens,
|
|
102
103
|
time: response.time,
|
|
104
|
+
...(response.audio ? { audio: response.audio } : {}),
|
|
103
105
|
};
|
|
104
106
|
}
|
|
105
107
|
async createSession(input = {}) {
|
|
@@ -35,6 +35,27 @@ const PromptInputFileSchema = z.object({
|
|
|
35
35
|
mimetype: z.string().optional(),
|
|
36
36
|
url: z.string().optional(),
|
|
37
37
|
});
|
|
38
|
+
const PromptAudioVoiceSettingsSchema = z.object({
|
|
39
|
+
stability: z.number().min(0).max(1).optional(),
|
|
40
|
+
similarityBoost: z.number().min(0).max(1).optional(),
|
|
41
|
+
style: z.number().min(0).max(1).optional(),
|
|
42
|
+
useSpeakerBoost: z.boolean().optional(),
|
|
43
|
+
speed: z.number().positive().optional(),
|
|
44
|
+
});
|
|
45
|
+
const PromptAudioParamsSchema = z.object({
|
|
46
|
+
enabled: z.boolean().optional(),
|
|
47
|
+
provider: z.string().optional(),
|
|
48
|
+
voiceId: z.string().optional(),
|
|
49
|
+
model: z.string().optional(),
|
|
50
|
+
outputFormat: z.string().optional(),
|
|
51
|
+
voiceSettings: PromptAudioVoiceSettingsSchema.optional(),
|
|
52
|
+
previousText: z.string().optional(),
|
|
53
|
+
nextText: z.string().optional(),
|
|
54
|
+
languageCode: z.string().optional(),
|
|
55
|
+
seed: z.number().int().optional(),
|
|
56
|
+
operationTitle: z.string().optional(),
|
|
57
|
+
operationGroup: z.string().optional(),
|
|
58
|
+
});
|
|
38
59
|
const GenericPromptRequestSchema = z.object({
|
|
39
60
|
systemPrompt: z.string().min(1),
|
|
40
61
|
userInput: z.string().optional(),
|
|
@@ -50,6 +71,7 @@ const GenericPromptRequestSchema = z.object({
|
|
|
50
71
|
model: z.string().optional(),
|
|
51
72
|
operationTitle: z.string().optional(),
|
|
52
73
|
operationGroup: z.string().optional(),
|
|
74
|
+
audioResponse: z.union([z.boolean(), PromptAudioParamsSchema]).optional(),
|
|
53
75
|
});
|
|
54
76
|
class AIGenericController extends CommonController {
|
|
55
77
|
async prompt(request, reply) {
|
|
@@ -28,6 +28,27 @@ const PromptInputFileSchema = z.object({
|
|
|
28
28
|
mimetype: z.string().optional(),
|
|
29
29
|
url: z.string().optional(),
|
|
30
30
|
});
|
|
31
|
+
const PromptAudioVoiceSettingsSchema = z.object({
|
|
32
|
+
stability: z.number().min(0).max(1).optional(),
|
|
33
|
+
similarityBoost: z.number().min(0).max(1).optional(),
|
|
34
|
+
style: z.number().min(0).max(1).optional(),
|
|
35
|
+
useSpeakerBoost: z.boolean().optional(),
|
|
36
|
+
speed: z.number().positive().optional(),
|
|
37
|
+
});
|
|
38
|
+
const PromptAudioParamsSchema = z.object({
|
|
39
|
+
enabled: z.boolean().optional(),
|
|
40
|
+
provider: z.string().optional(),
|
|
41
|
+
voiceId: z.string().optional(),
|
|
42
|
+
model: z.string().optional(),
|
|
43
|
+
outputFormat: z.string().optional(),
|
|
44
|
+
voiceSettings: PromptAudioVoiceSettingsSchema.optional(),
|
|
45
|
+
previousText: z.string().optional(),
|
|
46
|
+
nextText: z.string().optional(),
|
|
47
|
+
languageCode: z.string().optional(),
|
|
48
|
+
seed: z.number().int().optional(),
|
|
49
|
+
operationTitle: z.string().optional(),
|
|
50
|
+
operationGroup: z.string().optional(),
|
|
51
|
+
});
|
|
31
52
|
const AgentSessionRequestSchema = z.object({
|
|
32
53
|
identifier: z.string().min(1).optional(),
|
|
33
54
|
sessionId: z.string().optional(),
|
|
@@ -47,6 +68,7 @@ const AgentMessageRequestSchema = AgentSessionRequestSchema.extend({
|
|
|
47
68
|
toolMaxIterations: z.number().optional(),
|
|
48
69
|
operationTitle: z.string().optional(),
|
|
49
70
|
operationGroup: z.string().optional(),
|
|
71
|
+
audioResponse: z.union([z.boolean(), PromptAudioParamsSchema]).optional(),
|
|
50
72
|
});
|
|
51
73
|
class DraxAgentController extends CommonController {
|
|
52
74
|
constructor(options = {}) {
|
package/dist/index.js
CHANGED
|
@@ -26,6 +26,7 @@ import { BuilderTool } from "./tools/BuilderTool.js";
|
|
|
26
26
|
import { KnowledgeService } from "./services/KnowledgeService.js";
|
|
27
27
|
import { AILogService } from "./services/AILogService.js";
|
|
28
28
|
import { TTSGenericService } from "./services/TTSGenericService.js";
|
|
29
|
+
import { PromptAudioService } from "./services/PromptAudioService.js";
|
|
29
30
|
import AILogPermissions from "./permissions/AILogPermissions.js";
|
|
30
31
|
import AgentPermissions from "./permissions/AgentPermissions.js";
|
|
31
32
|
import AgentSessionPermissions from "./permissions/AgentSessionPermissions.js";
|
|
@@ -45,7 +46,7 @@ import AgentSessionRoutes from "./routes/AgentSessionRoutes.js";
|
|
|
45
46
|
import { DraxAgent } from "./agents/DraxAgent.js";
|
|
46
47
|
export { OpenAiConfig, GoogleAiConfig, OllamaAiConfig, DeepSeekConfig, ElevenLabsTTSConfig, AILogSchema, AILogBaseSchema, TTSRequestSchema, TTSVoiceSettingsSchema, AILogModel, AILogMongoRepository, AILogSqliteRepository, OpenAiProviderFactory, GoogleAiProviderFactory, OllamaAiProviderFactory, DeepSeekAiProviderFactory, AiProviderFactory, ElevenLabsTTSProviderFactory, TTSProviderFactory, DraxAgentFactory, AILogServiceFactory, OpenAiProvider, GoogleAiProvider, OllamaAiProvider, DeepSeekAiProvider, ElevenLabsTTSProvider, BuilderTool,
|
|
47
48
|
//Service
|
|
48
|
-
KnowledgeService, AILogService, TTSGenericService,
|
|
49
|
+
KnowledgeService, AILogService, TTSGenericService, PromptAudioService,
|
|
49
50
|
//Permissions
|
|
50
51
|
AILogPermissions, AgentPermissions, AIPermissions, TTSPermissions, AgentSessionPermissions,
|
|
51
52
|
//Controllers
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { GoogleGenAI } from "@google/genai";
|
|
2
2
|
import { toJSONSchema } from "zod";
|
|
3
|
+
import PromptAudioService from "../../services/PromptAudioService.js";
|
|
3
4
|
class GoogleAiProvider {
|
|
4
5
|
constructor(apiKey, model, visionModel, aiLogService) {
|
|
5
6
|
if (!apiKey) {
|
|
@@ -330,6 +331,7 @@ class GoogleAiProvider {
|
|
|
330
331
|
const endTime = performance.now();
|
|
331
332
|
const time = endTime - startTime;
|
|
332
333
|
const endedAt = new Date();
|
|
334
|
+
const audio = await PromptAudioService.build(input, output);
|
|
333
335
|
await this.registerPromptLog(input, {
|
|
334
336
|
model,
|
|
335
337
|
systemPrompt,
|
|
@@ -346,7 +348,8 @@ class GoogleAiProvider {
|
|
|
346
348
|
tokens,
|
|
347
349
|
inputTokens,
|
|
348
350
|
outputTokens,
|
|
349
|
-
time
|
|
351
|
+
time,
|
|
352
|
+
...(audio ? { audio } : {}),
|
|
350
353
|
};
|
|
351
354
|
}
|
|
352
355
|
catch (e) {
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { toJSONSchema } from "zod";
|
|
2
|
+
import PromptAudioService from "../../services/PromptAudioService.js";
|
|
2
3
|
class OllamaAiProvider {
|
|
3
4
|
constructor(baseUrl, model, visionModel, embeddingModel, aiLogService) {
|
|
4
5
|
if (!baseUrl) {
|
|
@@ -305,6 +306,7 @@ class OllamaAiProvider {
|
|
|
305
306
|
const endTime = performance.now();
|
|
306
307
|
const time = endTime - startTime;
|
|
307
308
|
const endedAt = new Date();
|
|
309
|
+
const audio = await PromptAudioService.build(input, output);
|
|
308
310
|
await this.registerPromptLog(input, {
|
|
309
311
|
model,
|
|
310
312
|
systemPrompt,
|
|
@@ -321,7 +323,8 @@ class OllamaAiProvider {
|
|
|
321
323
|
tokens,
|
|
322
324
|
inputTokens,
|
|
323
325
|
outputTokens,
|
|
324
|
-
time
|
|
326
|
+
time,
|
|
327
|
+
...(audio ? { audio } : {}),
|
|
325
328
|
};
|
|
326
329
|
}
|
|
327
330
|
catch (e) {
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import OpenAI from "openai";
|
|
2
2
|
import { zodResponseFormat } from "openai/helpers/zod";
|
|
3
|
+
import PromptAudioService from "../../services/PromptAudioService.js";
|
|
3
4
|
class OpenAiProvider {
|
|
4
5
|
constructor(apiKey, model, visionModel, aiLogService) {
|
|
5
6
|
if (!apiKey) {
|
|
@@ -265,6 +266,7 @@ class OpenAiProvider {
|
|
|
265
266
|
const endTime = performance.now();
|
|
266
267
|
const time = endTime - startTime;
|
|
267
268
|
const endedAt = new Date();
|
|
269
|
+
const audio = await PromptAudioService.build(input, output);
|
|
268
270
|
await this.registerPromptLog(input, {
|
|
269
271
|
model,
|
|
270
272
|
systemPrompt,
|
|
@@ -281,7 +283,8 @@ class OpenAiProvider {
|
|
|
281
283
|
tokens,
|
|
282
284
|
inputTokens,
|
|
283
285
|
outputTokens,
|
|
284
|
-
time
|
|
286
|
+
time,
|
|
287
|
+
...(audio ? { audio } : {}),
|
|
285
288
|
};
|
|
286
289
|
}
|
|
287
290
|
catch (e) {
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import TTSProviderFactory from "../factory/tts/TTSProviderFactory.js";
|
|
2
|
+
class PromptAudioService {
|
|
3
|
+
static audioParams(input) {
|
|
4
|
+
if (!input.audioResponse) {
|
|
5
|
+
return null;
|
|
6
|
+
}
|
|
7
|
+
if (input.audioResponse === true) {
|
|
8
|
+
return {};
|
|
9
|
+
}
|
|
10
|
+
if (input.audioResponse.enabled === false) {
|
|
11
|
+
return null;
|
|
12
|
+
}
|
|
13
|
+
return input.audioResponse;
|
|
14
|
+
}
|
|
15
|
+
static outputToText(output) {
|
|
16
|
+
if (typeof output === "string") {
|
|
17
|
+
return output;
|
|
18
|
+
}
|
|
19
|
+
if (output === null || output === undefined) {
|
|
20
|
+
return "";
|
|
21
|
+
}
|
|
22
|
+
return JSON.stringify(output);
|
|
23
|
+
}
|
|
24
|
+
static async build(input, output) {
|
|
25
|
+
const audioParams = PromptAudioService.audioParams(input);
|
|
26
|
+
if (!audioParams) {
|
|
27
|
+
return undefined;
|
|
28
|
+
}
|
|
29
|
+
const text = PromptAudioService.outputToText(output).trim();
|
|
30
|
+
if (!text) {
|
|
31
|
+
return undefined;
|
|
32
|
+
}
|
|
33
|
+
const providerName = audioParams.provider ?? "ElevenLabs";
|
|
34
|
+
const ttsProvider = TTSProviderFactory.instance(providerName);
|
|
35
|
+
const response = await ttsProvider.textToSpeech({
|
|
36
|
+
text,
|
|
37
|
+
voiceId: audioParams.voiceId,
|
|
38
|
+
model: audioParams.model,
|
|
39
|
+
outputFormat: audioParams.outputFormat,
|
|
40
|
+
voiceSettings: audioParams.voiceSettings,
|
|
41
|
+
previousText: audioParams.previousText,
|
|
42
|
+
nextText: audioParams.nextText,
|
|
43
|
+
languageCode: audioParams.languageCode,
|
|
44
|
+
seed: audioParams.seed,
|
|
45
|
+
operationTitle: audioParams.operationTitle ?? input.operationTitle,
|
|
46
|
+
operationGroup: audioParams.operationGroup ?? input.operationGroup,
|
|
47
|
+
ip: input.ip,
|
|
48
|
+
userAgent: input.userAgent,
|
|
49
|
+
tenant: input.tenant,
|
|
50
|
+
user: input.user,
|
|
51
|
+
});
|
|
52
|
+
return {
|
|
53
|
+
audio: response.audio.toString("base64"),
|
|
54
|
+
contentType: response.contentType,
|
|
55
|
+
encoding: "base64",
|
|
56
|
+
meta: {
|
|
57
|
+
provider: response.provider,
|
|
58
|
+
model: response.model,
|
|
59
|
+
voiceId: response.voiceId,
|
|
60
|
+
outputFormat: response.outputFormat,
|
|
61
|
+
size: response.size,
|
|
62
|
+
time: response.time,
|
|
63
|
+
},
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
export default PromptAudioService;
|
|
68
|
+
export { PromptAudioService, };
|
package/package.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
"publishConfig": {
|
|
4
4
|
"access": "public"
|
|
5
5
|
},
|
|
6
|
-
"version": "3.
|
|
6
|
+
"version": "3.43.0",
|
|
7
7
|
"description": "Ai utils",
|
|
8
8
|
"main": "dist/index.js",
|
|
9
9
|
"types": "types/index.d.ts",
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
"author": "Cristian Incarnato & Drax Team",
|
|
19
19
|
"license": "ISC",
|
|
20
20
|
"dependencies": {
|
|
21
|
-
"@drax/ai-share": "^3.
|
|
21
|
+
"@drax/ai-share": "^3.43.0",
|
|
22
22
|
"@drax/crud-back": "^3.39.0",
|
|
23
23
|
"mongoose": "^8.23.0",
|
|
24
24
|
"mongoose-paginate-v2": "^1.8.3"
|
|
@@ -46,5 +46,5 @@
|
|
|
46
46
|
"typescript": "^5.9.3",
|
|
47
47
|
"vitest": "^3.0.8"
|
|
48
48
|
},
|
|
49
|
-
"gitHead": "
|
|
49
|
+
"gitHead": "9cb1cab8a6fe2a6c574d08596bc6287f5a5311a6"
|
|
50
50
|
}
|
package/src/agents/DraxAgent.ts
CHANGED
|
@@ -110,6 +110,7 @@ class DraxAgent {
|
|
|
110
110
|
userAgent: input.userAgent,
|
|
111
111
|
tenant: input.tenantId ?? session.tenantId ?? null,
|
|
112
112
|
user: input.userId ?? session.userId ?? null,
|
|
113
|
+
audioResponse: input.audioResponse,
|
|
113
114
|
});
|
|
114
115
|
|
|
115
116
|
const assistantMessage = this.normalizeOutput(response.output);
|
|
@@ -134,6 +135,7 @@ class DraxAgent {
|
|
|
134
135
|
inputTokens: response.inputTokens,
|
|
135
136
|
outputTokens: response.outputTokens,
|
|
136
137
|
time: response.time,
|
|
138
|
+
...(response.audio ? {audio: response.audio} : {}),
|
|
137
139
|
};
|
|
138
140
|
}
|
|
139
141
|
|
|
@@ -42,6 +42,29 @@ const PromptInputFileSchema = z.object({
|
|
|
42
42
|
url: z.string().optional(),
|
|
43
43
|
})
|
|
44
44
|
|
|
45
|
+
const PromptAudioVoiceSettingsSchema = z.object({
|
|
46
|
+
stability: z.number().min(0).max(1).optional(),
|
|
47
|
+
similarityBoost: z.number().min(0).max(1).optional(),
|
|
48
|
+
style: z.number().min(0).max(1).optional(),
|
|
49
|
+
useSpeakerBoost: z.boolean().optional(),
|
|
50
|
+
speed: z.number().positive().optional(),
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
const PromptAudioParamsSchema = z.object({
|
|
54
|
+
enabled: z.boolean().optional(),
|
|
55
|
+
provider: z.string().optional(),
|
|
56
|
+
voiceId: z.string().optional(),
|
|
57
|
+
model: z.string().optional(),
|
|
58
|
+
outputFormat: z.string().optional(),
|
|
59
|
+
voiceSettings: PromptAudioVoiceSettingsSchema.optional(),
|
|
60
|
+
previousText: z.string().optional(),
|
|
61
|
+
nextText: z.string().optional(),
|
|
62
|
+
languageCode: z.string().optional(),
|
|
63
|
+
seed: z.number().int().optional(),
|
|
64
|
+
operationTitle: z.string().optional(),
|
|
65
|
+
operationGroup: z.string().optional(),
|
|
66
|
+
})
|
|
67
|
+
|
|
45
68
|
const GenericPromptRequestSchema = z.object({
|
|
46
69
|
systemPrompt: z.string().min(1),
|
|
47
70
|
userInput: z.string().optional(),
|
|
@@ -57,6 +80,7 @@ const GenericPromptRequestSchema = z.object({
|
|
|
57
80
|
model: z.string().optional(),
|
|
58
81
|
operationTitle: z.string().optional(),
|
|
59
82
|
operationGroup: z.string().optional(),
|
|
83
|
+
audioResponse: z.union([z.boolean(), PromptAudioParamsSchema]).optional(),
|
|
60
84
|
})
|
|
61
85
|
|
|
62
86
|
class AIGenericController extends CommonController {
|
|
@@ -35,6 +35,29 @@ const PromptInputFileSchema = z.object({
|
|
|
35
35
|
url: z.string().optional(),
|
|
36
36
|
});
|
|
37
37
|
|
|
38
|
+
const PromptAudioVoiceSettingsSchema = z.object({
|
|
39
|
+
stability: z.number().min(0).max(1).optional(),
|
|
40
|
+
similarityBoost: z.number().min(0).max(1).optional(),
|
|
41
|
+
style: z.number().min(0).max(1).optional(),
|
|
42
|
+
useSpeakerBoost: z.boolean().optional(),
|
|
43
|
+
speed: z.number().positive().optional(),
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
const PromptAudioParamsSchema = z.object({
|
|
47
|
+
enabled: z.boolean().optional(),
|
|
48
|
+
provider: z.string().optional(),
|
|
49
|
+
voiceId: z.string().optional(),
|
|
50
|
+
model: z.string().optional(),
|
|
51
|
+
outputFormat: z.string().optional(),
|
|
52
|
+
voiceSettings: PromptAudioVoiceSettingsSchema.optional(),
|
|
53
|
+
previousText: z.string().optional(),
|
|
54
|
+
nextText: z.string().optional(),
|
|
55
|
+
languageCode: z.string().optional(),
|
|
56
|
+
seed: z.number().int().optional(),
|
|
57
|
+
operationTitle: z.string().optional(),
|
|
58
|
+
operationGroup: z.string().optional(),
|
|
59
|
+
});
|
|
60
|
+
|
|
38
61
|
const AgentSessionRequestSchema = z.object({
|
|
39
62
|
identifier: z.string().min(1).optional(),
|
|
40
63
|
sessionId: z.string().optional(),
|
|
@@ -55,6 +78,7 @@ const AgentMessageRequestSchema = AgentSessionRequestSchema.extend({
|
|
|
55
78
|
toolMaxIterations: z.number().optional(),
|
|
56
79
|
operationTitle: z.string().optional(),
|
|
57
80
|
operationGroup: z.string().optional(),
|
|
81
|
+
audioResponse: z.union([z.boolean(), PromptAudioParamsSchema]).optional(),
|
|
58
82
|
});
|
|
59
83
|
|
|
60
84
|
class DraxAgentController extends CommonController {
|
package/src/index.ts
CHANGED
|
@@ -27,6 +27,7 @@ import {BuilderTool} from "./tools/BuilderTool.js";
|
|
|
27
27
|
import {KnowledgeService} from "./services/KnowledgeService.js";
|
|
28
28
|
import {AILogService} from "./services/AILogService.js";
|
|
29
29
|
import {TTSGenericService} from "./services/TTSGenericService.js";
|
|
30
|
+
import {PromptAudioService} from "./services/PromptAudioService.js";
|
|
30
31
|
import AILogPermissions from "./permissions/AILogPermissions.js";
|
|
31
32
|
import AgentPermissions from "./permissions/AgentPermissions.js";
|
|
32
33
|
import AgentSessionPermissions from "./permissions/AgentSessionPermissions.js";
|
|
@@ -50,6 +51,9 @@ import type {
|
|
|
50
51
|
IPromptContentPart,
|
|
51
52
|
IPromptContentPartImage,
|
|
52
53
|
IPromptContentPartText,
|
|
54
|
+
IPromptAudioParams,
|
|
55
|
+
IPromptAudioResponse,
|
|
56
|
+
IPromptAudioResponseMeta,
|
|
53
57
|
IPromptImage,
|
|
54
58
|
IPromptImageDetail,
|
|
55
59
|
IPromptMessage,
|
|
@@ -108,6 +112,9 @@ export type {
|
|
|
108
112
|
IPromptContentPart,
|
|
109
113
|
IPromptContentPartImage,
|
|
110
114
|
IPromptContentPartText,
|
|
115
|
+
IPromptAudioParams,
|
|
116
|
+
IPromptAudioResponse,
|
|
117
|
+
IPromptAudioResponseMeta,
|
|
111
118
|
IPromptResponse,
|
|
112
119
|
ITTSProvider,
|
|
113
120
|
ITTSParams,
|
|
@@ -166,6 +173,7 @@ export {
|
|
|
166
173
|
KnowledgeService,
|
|
167
174
|
AILogService,
|
|
168
175
|
TTSGenericService,
|
|
176
|
+
PromptAudioService,
|
|
169
177
|
//Permissions
|
|
170
178
|
AILogPermissions,
|
|
171
179
|
AgentPermissions,
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { ZodSchema } from 'zod'
|
|
2
|
+
import type {ITTSVoiceSettings} from "./ITTSProvider.js";
|
|
2
3
|
|
|
3
4
|
type Role = 'user' | 'assistant' | 'system';
|
|
4
5
|
|
|
@@ -74,6 +75,38 @@ interface IPromptParams {
|
|
|
74
75
|
userAgent?: string,
|
|
75
76
|
tenant?: string | null,
|
|
76
77
|
user?: string | null,
|
|
78
|
+
audioResponse?: boolean | IPromptAudioParams,
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
interface IPromptAudioParams {
|
|
82
|
+
enabled?: boolean,
|
|
83
|
+
provider?: string,
|
|
84
|
+
voiceId?: string,
|
|
85
|
+
model?: string,
|
|
86
|
+
outputFormat?: string,
|
|
87
|
+
voiceSettings?: ITTSVoiceSettings,
|
|
88
|
+
previousText?: string,
|
|
89
|
+
nextText?: string,
|
|
90
|
+
languageCode?: string,
|
|
91
|
+
seed?: number,
|
|
92
|
+
operationTitle?: string,
|
|
93
|
+
operationGroup?: string,
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
interface IPromptAudioResponseMeta {
|
|
97
|
+
provider: string,
|
|
98
|
+
model: string,
|
|
99
|
+
voiceId: string,
|
|
100
|
+
outputFormat?: string,
|
|
101
|
+
size: number,
|
|
102
|
+
time: number,
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
interface IPromptAudioResponse {
|
|
106
|
+
audio: string,
|
|
107
|
+
contentType: string,
|
|
108
|
+
encoding: 'base64',
|
|
109
|
+
meta: IPromptAudioResponseMeta,
|
|
77
110
|
}
|
|
78
111
|
|
|
79
112
|
interface IPromptResponse {
|
|
@@ -81,7 +114,8 @@ interface IPromptResponse {
|
|
|
81
114
|
tokens: number,
|
|
82
115
|
inputTokens: number,
|
|
83
116
|
outputTokens: number,
|
|
84
|
-
time: number
|
|
117
|
+
time: number,
|
|
118
|
+
audio?: IPromptAudioResponse,
|
|
85
119
|
}
|
|
86
120
|
|
|
87
121
|
interface IAIProvider {
|
|
@@ -92,6 +126,9 @@ export type {
|
|
|
92
126
|
IAIProvider,
|
|
93
127
|
IPromptParams,
|
|
94
128
|
IPromptResponse,
|
|
129
|
+
IPromptAudioParams,
|
|
130
|
+
IPromptAudioResponse,
|
|
131
|
+
IPromptAudioResponseMeta,
|
|
95
132
|
IPromptMessage,
|
|
96
133
|
IPromptMemory,
|
|
97
134
|
IPromptTool,
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import type {
|
|
2
2
|
IAIProvider,
|
|
3
|
+
IPromptAudioParams,
|
|
4
|
+
IPromptAudioResponse,
|
|
3
5
|
IPromptContentPart,
|
|
4
6
|
IPromptImage,
|
|
5
7
|
IPromptMemory,
|
|
@@ -67,6 +69,7 @@ interface DraxAgentMessageInput extends DraxAgentSessionInput {
|
|
|
67
69
|
toolMaxIterations?: number;
|
|
68
70
|
operationTitle?: string;
|
|
69
71
|
operationGroup?: string;
|
|
72
|
+
audioResponse?: boolean | IPromptAudioParams;
|
|
70
73
|
}
|
|
71
74
|
|
|
72
75
|
interface DraxAgentMessageOutput {
|
|
@@ -79,6 +82,7 @@ interface DraxAgentMessageOutput {
|
|
|
79
82
|
inputTokens: number;
|
|
80
83
|
outputTokens: number;
|
|
81
84
|
time: number;
|
|
85
|
+
audio?: IPromptAudioResponse;
|
|
82
86
|
}
|
|
83
87
|
|
|
84
88
|
interface DraxAgentSession {
|
|
@@ -17,6 +17,7 @@ import type {
|
|
|
17
17
|
} from "../../interfaces/IAIProvider.js";
|
|
18
18
|
import type {AILogService} from "../../services/AILogService.js";
|
|
19
19
|
import type {IAILogBase} from "@drax/ai-share";
|
|
20
|
+
import PromptAudioService from "../../services/PromptAudioService.js";
|
|
20
21
|
|
|
21
22
|
class GoogleAiProvider implements IAIProvider{
|
|
22
23
|
protected _apiKey: string
|
|
@@ -446,6 +447,7 @@ class GoogleAiProvider implements IAIProvider{
|
|
|
446
447
|
const endTime = performance.now()
|
|
447
448
|
const time = endTime - startTime
|
|
448
449
|
const endedAt = new Date()
|
|
450
|
+
const audio = await PromptAudioService.build(input, output)
|
|
449
451
|
|
|
450
452
|
await this.registerPromptLog(input, {
|
|
451
453
|
model,
|
|
@@ -464,7 +466,8 @@ class GoogleAiProvider implements IAIProvider{
|
|
|
464
466
|
tokens,
|
|
465
467
|
inputTokens,
|
|
466
468
|
outputTokens,
|
|
467
|
-
time
|
|
469
|
+
time,
|
|
470
|
+
...(audio ? {audio} : {}),
|
|
468
471
|
}
|
|
469
472
|
} catch (e: any) {
|
|
470
473
|
const endedAt = new Date()
|
|
@@ -9,6 +9,7 @@ import type {
|
|
|
9
9
|
} from "../../interfaces/IAIProvider.js";
|
|
10
10
|
import type {AILogService} from "../../services/AILogService.js";
|
|
11
11
|
import type {IAILogBase} from "@drax/ai-share";
|
|
12
|
+
import PromptAudioService from "../../services/PromptAudioService.js";
|
|
12
13
|
|
|
13
14
|
type OllamaMessage = {
|
|
14
15
|
role: "system" | "user" | "assistant" | "tool",
|
|
@@ -426,6 +427,7 @@ class OllamaAiProvider implements IAIProvider{
|
|
|
426
427
|
const endTime = performance.now()
|
|
427
428
|
const time = endTime - startTime
|
|
428
429
|
const endedAt = new Date()
|
|
430
|
+
const audio = await PromptAudioService.build(input, output)
|
|
429
431
|
|
|
430
432
|
await this.registerPromptLog(input, {
|
|
431
433
|
model,
|
|
@@ -444,7 +446,8 @@ class OllamaAiProvider implements IAIProvider{
|
|
|
444
446
|
tokens,
|
|
445
447
|
inputTokens,
|
|
446
448
|
outputTokens,
|
|
447
|
-
time
|
|
449
|
+
time,
|
|
450
|
+
...(audio ? {audio} : {}),
|
|
448
451
|
}
|
|
449
452
|
} catch (e: any) {
|
|
450
453
|
const endedAt = new Date()
|
|
@@ -10,6 +10,7 @@ import type {
|
|
|
10
10
|
} from "../../interfaces/IAIProvider.js";
|
|
11
11
|
import type {AILogService} from "../../services/AILogService.js";
|
|
12
12
|
import type {IAILogBase} from "@drax/ai-share";
|
|
13
|
+
import PromptAudioService from "../../services/PromptAudioService.js";
|
|
13
14
|
|
|
14
15
|
class OpenAiProvider implements IAIProvider{
|
|
15
16
|
protected _apiKey: string
|
|
@@ -358,6 +359,7 @@ class OpenAiProvider implements IAIProvider{
|
|
|
358
359
|
const endTime = performance.now()
|
|
359
360
|
const time = endTime - startTime
|
|
360
361
|
const endedAt = new Date()
|
|
362
|
+
const audio = await PromptAudioService.build(input, output)
|
|
361
363
|
|
|
362
364
|
await this.registerPromptLog(input, {
|
|
363
365
|
model,
|
|
@@ -376,7 +378,8 @@ class OpenAiProvider implements IAIProvider{
|
|
|
376
378
|
tokens,
|
|
377
379
|
inputTokens,
|
|
378
380
|
outputTokens,
|
|
379
|
-
time
|
|
381
|
+
time,
|
|
382
|
+
...(audio ? {audio} : {}),
|
|
380
383
|
}
|
|
381
384
|
} catch (e: any) {
|
|
382
385
|
const endedAt = new Date()
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import TTSProviderFactory from "../factory/tts/TTSProviderFactory.js";
|
|
2
|
+
import type {IPromptAudioParams, IPromptAudioResponse, IPromptParams} from "../interfaces/IAIProvider.js";
|
|
3
|
+
|
|
4
|
+
class PromptAudioService {
|
|
5
|
+
|
|
6
|
+
static audioParams(input: IPromptParams): IPromptAudioParams | null {
|
|
7
|
+
if(!input.audioResponse){
|
|
8
|
+
return null
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
if(input.audioResponse === true){
|
|
12
|
+
return {}
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
if(input.audioResponse.enabled === false){
|
|
16
|
+
return null
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
return input.audioResponse
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
static outputToText(output: unknown): string {
|
|
23
|
+
if(typeof output === "string"){
|
|
24
|
+
return output
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
if(output === null || output === undefined){
|
|
28
|
+
return ""
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
return JSON.stringify(output)
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
static async build(input: IPromptParams, output: unknown): Promise<IPromptAudioResponse | undefined> {
|
|
35
|
+
const audioParams = PromptAudioService.audioParams(input)
|
|
36
|
+
|
|
37
|
+
if(!audioParams){
|
|
38
|
+
return undefined
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const text = PromptAudioService.outputToText(output).trim()
|
|
42
|
+
|
|
43
|
+
if(!text){
|
|
44
|
+
return undefined
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const providerName = audioParams.provider ?? "ElevenLabs"
|
|
48
|
+
const ttsProvider = TTSProviderFactory.instance(providerName)
|
|
49
|
+
const response = await ttsProvider.textToSpeech({
|
|
50
|
+
text,
|
|
51
|
+
voiceId: audioParams.voiceId,
|
|
52
|
+
model: audioParams.model,
|
|
53
|
+
outputFormat: audioParams.outputFormat,
|
|
54
|
+
voiceSettings: audioParams.voiceSettings,
|
|
55
|
+
previousText: audioParams.previousText,
|
|
56
|
+
nextText: audioParams.nextText,
|
|
57
|
+
languageCode: audioParams.languageCode,
|
|
58
|
+
seed: audioParams.seed,
|
|
59
|
+
operationTitle: audioParams.operationTitle ?? input.operationTitle,
|
|
60
|
+
operationGroup: audioParams.operationGroup ?? input.operationGroup,
|
|
61
|
+
ip: input.ip,
|
|
62
|
+
userAgent: input.userAgent,
|
|
63
|
+
tenant: input.tenant,
|
|
64
|
+
user: input.user,
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
return {
|
|
68
|
+
audio: response.audio.toString("base64"),
|
|
69
|
+
contentType: response.contentType,
|
|
70
|
+
encoding: "base64",
|
|
71
|
+
meta: {
|
|
72
|
+
provider: response.provider,
|
|
73
|
+
model: response.model,
|
|
74
|
+
voiceId: response.voiceId,
|
|
75
|
+
outputFormat: response.outputFormat,
|
|
76
|
+
size: response.size,
|
|
77
|
+
time: response.time,
|
|
78
|
+
},
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export default PromptAudioService
|
|
85
|
+
export {
|
|
86
|
+
PromptAudioService,
|
|
87
|
+
}
|