@drax/ai-back 3.41.0 → 3.43.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/DraxAgent.js +3 -1
- package/dist/config/ElevenLabsTTSConfig.js +10 -0
- package/dist/controllers/AICrudController.js +1 -1
- package/dist/controllers/AIGenericController.js +23 -1
- package/dist/controllers/DraxAgentController.js +22 -0
- package/dist/controllers/TTSGenericController.js +61 -0
- package/dist/factory/ElevenLabsTTSProviderFactory.js +13 -0
- package/dist/factory/TTSProviderFactory.js +27 -0
- package/dist/factory/ai/AiProviderFactory.js +30 -0
- package/dist/factory/ai/DeepSeekAiProviderFactory.js +14 -0
- package/dist/factory/ai/GoogleAiProviderFactory.js +14 -0
- package/dist/factory/ai/OllamaAiProviderFactory.js +14 -0
- package/dist/factory/ai/OpenAiProviderFactory.js +14 -0
- package/dist/factory/tts/ElevenLabsTTSProviderFactory.js +13 -0
- package/dist/factory/tts/TTSProviderFactory.js +27 -0
- package/dist/index.js +23 -13
- package/dist/interfaces/ITTSProvider.js +1 -0
- package/dist/permissions/TTSPermissions.js +6 -0
- package/dist/providers/ElevenLabsTTSProvider.js +108 -0
- package/dist/providers/ai/DeepSeekAiProvider.js +34 -0
- package/dist/providers/ai/GoogleAiProvider.js +370 -0
- package/dist/providers/ai/OllamaAiProvider.js +345 -0
- package/dist/providers/ai/OpenAiProvider.js +305 -0
- package/dist/providers/tts/ElevenLabsTTSProvider.js +108 -0
- package/dist/routes/TTSRoutes.js +8 -0
- package/dist/schemas/TTSRequestSchema.js +24 -0
- package/dist/services/PromptAudioService.js +68 -0
- package/dist/services/TTSGenericService.js +21 -0
- package/package.json +3 -3
- package/src/agents/DraxAgent.ts +3 -1
- package/src/config/ElevenLabsTTSConfig.ts +13 -0
- package/src/controllers/AICrudController.ts +1 -1
- package/src/controllers/AIGenericController.ts +25 -1
- package/src/controllers/DraxAgentController.ts +24 -0
- package/src/controllers/TTSGenericController.ts +70 -0
- package/src/factory/{AiProviderFactory.ts → ai/AiProviderFactory.ts} +3 -3
- package/src/factory/ai/DeepSeekAiProviderFactory.ts +27 -0
- package/src/factory/{GoogleAiProviderFactory.ts → ai/GoogleAiProviderFactory.ts} +4 -4
- package/src/factory/{OllamaAiProviderFactory.ts → ai/OllamaAiProviderFactory.ts} +4 -4
- package/src/factory/{OpenAiProviderFactory.ts → ai/OpenAiProviderFactory.ts} +4 -4
- package/src/factory/tts/ElevenLabsTTSProviderFactory.ts +26 -0
- package/src/factory/tts/TTSProviderFactory.ts +42 -0
- package/src/index.ts +60 -11
- package/src/interfaces/IAIProvider.ts +38 -1
- package/src/interfaces/IDraxAgent.ts +4 -0
- package/src/interfaces/ITTSProvider.ts +47 -0
- package/src/permissions/AIPermissions.ts +0 -1
- package/src/permissions/TTSPermissions.ts +8 -0
- package/src/providers/{DeepSeekProvider.ts → ai/DeepSeekAiProvider.ts} +5 -5
- package/src/providers/{GoogleAiProvider.ts → ai/GoogleAiProvider.ts} +6 -3
- package/src/providers/{OllamaAiProvider.ts → ai/OllamaAiProvider.ts} +6 -3
- package/src/providers/{OpenAiProvider.ts → ai/OpenAiProvider.ts} +6 -3
- package/src/providers/tts/ElevenLabsTTSProvider.ts +132 -0
- package/src/routes/TTSRoutes.ts +13 -0
- package/src/schemas/TTSRequestSchema.ts +38 -0
- package/src/services/PromptAudioService.ts +87 -0
- package/src/services/TTSGenericService.ts +41 -0
- package/test/DeepSeekProvider.test.ts +4 -4
- package/test/DraxAgent.test.ts +64 -0
- package/test/PromptAudioService.test.ts +115 -0
- package/tsconfig.tsbuildinfo +1 -1
- package/types/agents/DraxAgent.d.ts.map +1 -1
- package/types/config/ElevenLabsTTSConfig.d.ts +10 -0
- package/types/config/ElevenLabsTTSConfig.d.ts.map +1 -0
- package/types/controllers/AIGenericController.d.ts.map +1 -1
- package/types/controllers/DraxAgentController.d.ts.map +1 -1
- package/types/controllers/TTSGenericController.d.ts +11 -0
- package/types/controllers/TTSGenericController.d.ts.map +1 -0
- package/types/factory/ElevenLabsTTSProviderFactory.d.ts +8 -0
- package/types/factory/ElevenLabsTTSProviderFactory.d.ts.map +1 -0
- package/types/factory/TTSProviderFactory.d.ts +15 -0
- package/types/factory/TTSProviderFactory.d.ts.map +1 -0
- package/types/factory/ai/AiProviderFactory.d.ts +8 -0
- package/types/factory/ai/AiProviderFactory.d.ts.map +1 -0
- package/types/factory/ai/DeepSeekAiProviderFactory.d.ts +8 -0
- package/types/factory/ai/DeepSeekAiProviderFactory.d.ts.map +1 -0
- package/types/factory/ai/GoogleAiProviderFactory.d.ts +8 -0
- package/types/factory/ai/GoogleAiProviderFactory.d.ts.map +1 -0
- package/types/factory/ai/OllamaAiProviderFactory.d.ts +8 -0
- package/types/factory/ai/OllamaAiProviderFactory.d.ts.map +1 -0
- package/types/factory/ai/OpenAiProviderFactory.d.ts +8 -0
- package/types/factory/ai/OpenAiProviderFactory.d.ts.map +1 -0
- package/types/factory/tts/ElevenLabsTTSProviderFactory.d.ts +8 -0
- package/types/factory/tts/ElevenLabsTTSProviderFactory.d.ts.map +1 -0
- package/types/factory/tts/TTSProviderFactory.d.ts +15 -0
- package/types/factory/tts/TTSProviderFactory.d.ts.map +1 -0
- package/types/index.d.ts +26 -12
- package/types/index.d.ts.map +1 -1
- package/types/interfaces/IAIProvider.d.ts +32 -1
- package/types/interfaces/IAIProvider.d.ts.map +1 -1
- package/types/interfaces/IDraxAgent.d.ts +3 -1
- package/types/interfaces/IDraxAgent.d.ts.map +1 -1
- package/types/interfaces/ITTSProvider.d.ts +39 -0
- package/types/interfaces/ITTSProvider.d.ts.map +1 -0
- package/types/permissions/TTSPermissions.d.ts +6 -0
- package/types/permissions/TTSPermissions.d.ts.map +1 -0
- package/types/providers/ElevenLabsTTSProvider.d.ts +38 -0
- package/types/providers/ElevenLabsTTSProvider.d.ts.map +1 -0
- package/types/providers/ai/DeepSeekAiProvider.d.ts +24 -0
- package/types/providers/ai/DeepSeekAiProvider.d.ts.map +1 -0
- package/types/providers/ai/GoogleAiProvider.d.ts +63 -0
- package/types/providers/ai/GoogleAiProvider.d.ts.map +1 -0
- package/types/providers/ai/OllamaAiProvider.d.ts +78 -0
- package/types/providers/ai/OllamaAiProvider.d.ts.map +1 -0
- package/types/providers/ai/OpenAiProvider.d.ts +97 -0
- package/types/providers/ai/OpenAiProvider.d.ts.map +1 -0
- package/types/providers/tts/ElevenLabsTTSProvider.d.ts +38 -0
- package/types/providers/tts/ElevenLabsTTSProvider.d.ts.map +1 -0
- package/types/routes/TTSRoutes.d.ts +4 -0
- package/types/routes/TTSRoutes.d.ts.map +1 -0
- package/types/schemas/TTSRequestSchema.d.ts +37 -0
- package/types/schemas/TTSRequestSchema.d.ts.map +1 -0
- package/types/services/PromptAudioService.d.ts +9 -0
- package/types/services/PromptAudioService.d.ts.map +1 -0
- package/types/services/TTSGenericService.d.ts +17 -0
- package/types/services/TTSGenericService.d.ts.map +1 -0
- package/src/factory/DeepSeekProviderFactory.ts +0 -27
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
class ElevenLabsTTSProvider {
|
|
2
|
+
constructor(apiKey, model, voiceId, baseUrl = "https://api.elevenlabs.io", outputFormat) {
|
|
3
|
+
if (!apiKey) {
|
|
4
|
+
throw new Error("ElevenLabs apiKey required");
|
|
5
|
+
}
|
|
6
|
+
if (!model) {
|
|
7
|
+
throw new Error("ElevenLabs model required");
|
|
8
|
+
}
|
|
9
|
+
if (!voiceId) {
|
|
10
|
+
throw new Error("ElevenLabs voiceId required");
|
|
11
|
+
}
|
|
12
|
+
this._apiKey = apiKey;
|
|
13
|
+
this._model = model;
|
|
14
|
+
this._voiceId = voiceId;
|
|
15
|
+
this._baseUrl = baseUrl.replace(/\/+$/, "");
|
|
16
|
+
this._outputFormat = outputFormat;
|
|
17
|
+
}
|
|
18
|
+
get model() {
|
|
19
|
+
if (!this._model) {
|
|
20
|
+
throw new Error("ElevenLabs model not found");
|
|
21
|
+
}
|
|
22
|
+
return this._model;
|
|
23
|
+
}
|
|
24
|
+
get voiceId() {
|
|
25
|
+
if (!this._voiceId) {
|
|
26
|
+
throw new Error("ElevenLabs voiceId not found");
|
|
27
|
+
}
|
|
28
|
+
return this._voiceId;
|
|
29
|
+
}
|
|
30
|
+
mapContentType(outputFormat) {
|
|
31
|
+
if (!outputFormat) {
|
|
32
|
+
return "audio/mpeg";
|
|
33
|
+
}
|
|
34
|
+
if (outputFormat.startsWith("mp3")) {
|
|
35
|
+
return "audio/mpeg";
|
|
36
|
+
}
|
|
37
|
+
if (outputFormat.startsWith("opus")) {
|
|
38
|
+
return "audio/ogg";
|
|
39
|
+
}
|
|
40
|
+
if (outputFormat.startsWith("pcm")) {
|
|
41
|
+
return "audio/wav";
|
|
42
|
+
}
|
|
43
|
+
if (outputFormat.startsWith("ulaw") || outputFormat.startsWith("alaw")) {
|
|
44
|
+
return "audio/basic";
|
|
45
|
+
}
|
|
46
|
+
return "application/octet-stream";
|
|
47
|
+
}
|
|
48
|
+
mapVoiceSettings(voiceSettings) {
|
|
49
|
+
if (!voiceSettings) {
|
|
50
|
+
return undefined;
|
|
51
|
+
}
|
|
52
|
+
return {
|
|
53
|
+
stability: voiceSettings.stability,
|
|
54
|
+
similarity_boost: voiceSettings.similarityBoost,
|
|
55
|
+
style: voiceSettings.style,
|
|
56
|
+
use_speaker_boost: voiceSettings.useSpeakerBoost,
|
|
57
|
+
speed: voiceSettings.speed,
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
buildBody(input, model) {
|
|
61
|
+
return {
|
|
62
|
+
text: input.text,
|
|
63
|
+
model_id: model,
|
|
64
|
+
...(input.voiceSettings ? { voice_settings: this.mapVoiceSettings(input.voiceSettings) } : {}),
|
|
65
|
+
...(input.previousText ? { previous_text: input.previousText } : {}),
|
|
66
|
+
...(input.nextText ? { next_text: input.nextText } : {}),
|
|
67
|
+
...(input.languageCode ? { language_code: input.languageCode } : {}),
|
|
68
|
+
...(input.seed !== undefined ? { seed: input.seed } : {}),
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
async textToSpeech(input) {
|
|
72
|
+
const startedAt = Date.now();
|
|
73
|
+
const model = input.model ?? this.model;
|
|
74
|
+
const voiceId = input.voiceId ?? this.voiceId;
|
|
75
|
+
const outputFormat = input.outputFormat ?? this._outputFormat;
|
|
76
|
+
const url = new URL(`${this._baseUrl}/v1/text-to-speech/${encodeURIComponent(voiceId)}`);
|
|
77
|
+
if (outputFormat) {
|
|
78
|
+
url.searchParams.set("output_format", outputFormat);
|
|
79
|
+
}
|
|
80
|
+
const response = await fetch(url, {
|
|
81
|
+
method: "POST",
|
|
82
|
+
headers: {
|
|
83
|
+
"Accept": this.mapContentType(outputFormat),
|
|
84
|
+
"Content-Type": "application/json",
|
|
85
|
+
"xi-api-key": this._apiKey,
|
|
86
|
+
},
|
|
87
|
+
body: JSON.stringify(this.buildBody(input, model)),
|
|
88
|
+
});
|
|
89
|
+
if (!response.ok) {
|
|
90
|
+
const errorText = await response.text();
|
|
91
|
+
throw new Error(`ElevenLabs TTS request failed (${response.status}): ${errorText}`);
|
|
92
|
+
}
|
|
93
|
+
const audio = Buffer.from(await response.arrayBuffer());
|
|
94
|
+
const contentType = response.headers.get("content-type") ?? this.mapContentType(outputFormat);
|
|
95
|
+
return {
|
|
96
|
+
audio,
|
|
97
|
+
contentType,
|
|
98
|
+
size: audio.byteLength,
|
|
99
|
+
time: Date.now() - startedAt,
|
|
100
|
+
provider: "elevenlabs",
|
|
101
|
+
model,
|
|
102
|
+
voiceId,
|
|
103
|
+
outputFormat,
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
export default ElevenLabsTTSProvider;
|
|
108
|
+
export { ElevenLabsTTSProvider };
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import TTSGenericController from "../controllers/TTSGenericController.js";
|
|
2
|
+
async function TTSFastifyRoutes(fastify, options) {
|
|
3
|
+
const genericController = new TTSGenericController();
|
|
4
|
+
fastify.get('/api/tts/providers', (req, rep) => genericController.availableProviders(req, rep));
|
|
5
|
+
fastify.post('/api/tts', (req, rep) => genericController.textToSpeech(req, rep));
|
|
6
|
+
}
|
|
7
|
+
export default TTSFastifyRoutes;
|
|
8
|
+
export { TTSFastifyRoutes };
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
const TTSVoiceSettingsSchema = z.object({
|
|
3
|
+
stability: z.number().min(0).max(1).optional(),
|
|
4
|
+
similarityBoost: z.number().min(0).max(1).optional(),
|
|
5
|
+
style: z.number().min(0).max(1).optional(),
|
|
6
|
+
useSpeakerBoost: z.boolean().optional(),
|
|
7
|
+
speed: z.number().positive().optional(),
|
|
8
|
+
});
|
|
9
|
+
const TTSRequestSchema = z.object({
|
|
10
|
+
text: z.string().min(1),
|
|
11
|
+
provider: z.string().default("ElevenLabs"),
|
|
12
|
+
voiceId: z.string().optional(),
|
|
13
|
+
model: z.string().optional(),
|
|
14
|
+
outputFormat: z.string().optional(),
|
|
15
|
+
voiceSettings: TTSVoiceSettingsSchema.optional(),
|
|
16
|
+
previousText: z.string().optional(),
|
|
17
|
+
nextText: z.string().optional(),
|
|
18
|
+
languageCode: z.string().optional(),
|
|
19
|
+
seed: z.number().int().optional(),
|
|
20
|
+
responseFormat: z.enum(["audio", "base64"]).default("audio"),
|
|
21
|
+
operationTitle: z.string().optional(),
|
|
22
|
+
operationGroup: z.string().optional(),
|
|
23
|
+
});
|
|
24
|
+
export { TTSRequestSchema, TTSVoiceSettingsSchema, };
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import TTSProviderFactory from "../factory/tts/TTSProviderFactory.js";
|
|
2
|
+
class PromptAudioService {
|
|
3
|
+
static audioParams(input) {
|
|
4
|
+
if (!input.audioResponse) {
|
|
5
|
+
return null;
|
|
6
|
+
}
|
|
7
|
+
if (input.audioResponse === true) {
|
|
8
|
+
return {};
|
|
9
|
+
}
|
|
10
|
+
if (input.audioResponse.enabled === false) {
|
|
11
|
+
return null;
|
|
12
|
+
}
|
|
13
|
+
return input.audioResponse;
|
|
14
|
+
}
|
|
15
|
+
static outputToText(output) {
|
|
16
|
+
if (typeof output === "string") {
|
|
17
|
+
return output;
|
|
18
|
+
}
|
|
19
|
+
if (output === null || output === undefined) {
|
|
20
|
+
return "";
|
|
21
|
+
}
|
|
22
|
+
return JSON.stringify(output);
|
|
23
|
+
}
|
|
24
|
+
static async build(input, output) {
|
|
25
|
+
const audioParams = PromptAudioService.audioParams(input);
|
|
26
|
+
if (!audioParams) {
|
|
27
|
+
return undefined;
|
|
28
|
+
}
|
|
29
|
+
const text = PromptAudioService.outputToText(output).trim();
|
|
30
|
+
if (!text) {
|
|
31
|
+
return undefined;
|
|
32
|
+
}
|
|
33
|
+
const providerName = audioParams.provider ?? "ElevenLabs";
|
|
34
|
+
const ttsProvider = TTSProviderFactory.instance(providerName);
|
|
35
|
+
const response = await ttsProvider.textToSpeech({
|
|
36
|
+
text,
|
|
37
|
+
voiceId: audioParams.voiceId,
|
|
38
|
+
model: audioParams.model,
|
|
39
|
+
outputFormat: audioParams.outputFormat,
|
|
40
|
+
voiceSettings: audioParams.voiceSettings,
|
|
41
|
+
previousText: audioParams.previousText,
|
|
42
|
+
nextText: audioParams.nextText,
|
|
43
|
+
languageCode: audioParams.languageCode,
|
|
44
|
+
seed: audioParams.seed,
|
|
45
|
+
operationTitle: audioParams.operationTitle ?? input.operationTitle,
|
|
46
|
+
operationGroup: audioParams.operationGroup ?? input.operationGroup,
|
|
47
|
+
ip: input.ip,
|
|
48
|
+
userAgent: input.userAgent,
|
|
49
|
+
tenant: input.tenant,
|
|
50
|
+
user: input.user,
|
|
51
|
+
});
|
|
52
|
+
return {
|
|
53
|
+
audio: response.audio.toString("base64"),
|
|
54
|
+
contentType: response.contentType,
|
|
55
|
+
encoding: "base64",
|
|
56
|
+
meta: {
|
|
57
|
+
provider: response.provider,
|
|
58
|
+
model: response.model,
|
|
59
|
+
voiceId: response.voiceId,
|
|
60
|
+
outputFormat: response.outputFormat,
|
|
61
|
+
size: response.size,
|
|
62
|
+
time: response.time,
|
|
63
|
+
},
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
export default PromptAudioService;
|
|
68
|
+
export { PromptAudioService, };
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import TTSProviderFactory from "../factory/tts/TTSProviderFactory.js";
|
|
2
|
+
class TTSGenericService {
|
|
3
|
+
availableProviders() {
|
|
4
|
+
return TTSProviderFactory.availableProviders();
|
|
5
|
+
}
|
|
6
|
+
async textToSpeech(input, context = {}) {
|
|
7
|
+
const ttsProvider = TTSProviderFactory.instance(input.provider);
|
|
8
|
+
const ttsInput = {
|
|
9
|
+
...input,
|
|
10
|
+
operationTitle: input.operationTitle ?? "generic-tts",
|
|
11
|
+
operationGroup: input.operationGroup ?? "generic-tts",
|
|
12
|
+
ip: context.ip,
|
|
13
|
+
userAgent: context.userAgent,
|
|
14
|
+
tenant: context.tenant ?? null,
|
|
15
|
+
user: context.user ?? null,
|
|
16
|
+
};
|
|
17
|
+
return ttsProvider.textToSpeech(ttsInput);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
export default TTSGenericService;
|
|
21
|
+
export { TTSGenericService, };
|
package/package.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
"publishConfig": {
|
|
4
4
|
"access": "public"
|
|
5
5
|
},
|
|
6
|
-
"version": "3.
|
|
6
|
+
"version": "3.43.0",
|
|
7
7
|
"description": "Ai utils",
|
|
8
8
|
"main": "dist/index.js",
|
|
9
9
|
"types": "types/index.d.ts",
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
"author": "Cristian Incarnato & Drax Team",
|
|
19
19
|
"license": "ISC",
|
|
20
20
|
"dependencies": {
|
|
21
|
-
"@drax/ai-share": "^3.
|
|
21
|
+
"@drax/ai-share": "^3.43.0",
|
|
22
22
|
"@drax/crud-back": "^3.39.0",
|
|
23
23
|
"mongoose": "^8.23.0",
|
|
24
24
|
"mongoose-paginate-v2": "^1.8.3"
|
|
@@ -46,5 +46,5 @@
|
|
|
46
46
|
"typescript": "^5.9.3",
|
|
47
47
|
"vitest": "^3.0.8"
|
|
48
48
|
},
|
|
49
|
-
"gitHead": "
|
|
49
|
+
"gitHead": "9cb1cab8a6fe2a6c574d08596bc6287f5a5311a6"
|
|
50
50
|
}
|
package/src/agents/DraxAgent.ts
CHANGED
|
@@ -14,7 +14,7 @@ import type {
|
|
|
14
14
|
} from "../interfaces/IDraxAgent.js";
|
|
15
15
|
import type {IPromptMessage} from "../interfaces/IAIProvider.js";
|
|
16
16
|
import type {IAgentSession, IAgentSessionBase} from "../interfaces/IAgentSession.js";
|
|
17
|
-
import AiProviderFactory from "../factory/AiProviderFactory.js";
|
|
17
|
+
import AiProviderFactory from "../factory/ai/AiProviderFactory.js";
|
|
18
18
|
import AgentSessionServiceFactory from "../factory/services/AgentSessionServiceFactory.js";
|
|
19
19
|
import type {AgentSessionService} from "../services/AgentSessionService.js";
|
|
20
20
|
|
|
@@ -110,6 +110,7 @@ class DraxAgent {
|
|
|
110
110
|
userAgent: input.userAgent,
|
|
111
111
|
tenant: input.tenantId ?? session.tenantId ?? null,
|
|
112
112
|
user: input.userId ?? session.userId ?? null,
|
|
113
|
+
audioResponse: input.audioResponse,
|
|
113
114
|
});
|
|
114
115
|
|
|
115
116
|
const assistantMessage = this.normalizeOutput(response.output);
|
|
@@ -134,6 +135,7 @@ class DraxAgent {
|
|
|
134
135
|
inputTokens: response.inputTokens,
|
|
135
136
|
outputTokens: response.outputTokens,
|
|
136
137
|
time: response.time,
|
|
138
|
+
...(response.audio ? {audio: response.audio} : {}),
|
|
137
139
|
};
|
|
138
140
|
}
|
|
139
141
|
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
enum ElevenLabsTTSConfig {
|
|
2
|
+
|
|
3
|
+
ElevenLabsApiKey = "ELEVENLABS_API_KEY",
|
|
4
|
+
ElevenLabsBaseUrl = "ELEVENLABS_BASE_URL",
|
|
5
|
+
ElevenLabsModel = "ELEVENLABS_MODEL",
|
|
6
|
+
ElevenLabsVoiceId = "ELEVENLABS_VOICE_ID",
|
|
7
|
+
ElevenLabsOutputFormat = "ELEVENLABS_OUTPUT_FORMAT",
|
|
8
|
+
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export default ElevenLabsTTSConfig;
|
|
12
|
+
|
|
13
|
+
export {ElevenLabsTTSConfig}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {z} from "zod";
|
|
2
2
|
import {CommonController} from "@drax/common-back";
|
|
3
|
-
import AiProviderFactory from "../factory/AiProviderFactory.js";
|
|
3
|
+
import AiProviderFactory from "../factory/ai/AiProviderFactory.js";
|
|
4
4
|
import AIPermissions from "../permissions/AIPermissions.js";
|
|
5
5
|
|
|
6
6
|
const CrudAiFieldSchema: z.ZodType<any> = z.lazy(() => z.object({
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {z} from "zod";
|
|
2
2
|
import {CommonController} from "@drax/common-back";
|
|
3
|
-
import AiProviderFactory from "../factory/AiProviderFactory.js";
|
|
3
|
+
import AiProviderFactory from "../factory/ai/AiProviderFactory.js";
|
|
4
4
|
import AIPermissions from "../permissions/AIPermissions.js";
|
|
5
5
|
import type {IPromptParams} from "../interfaces/IAIProvider.js";
|
|
6
6
|
|
|
@@ -42,6 +42,29 @@ const PromptInputFileSchema = z.object({
|
|
|
42
42
|
url: z.string().optional(),
|
|
43
43
|
})
|
|
44
44
|
|
|
45
|
+
const PromptAudioVoiceSettingsSchema = z.object({
|
|
46
|
+
stability: z.number().min(0).max(1).optional(),
|
|
47
|
+
similarityBoost: z.number().min(0).max(1).optional(),
|
|
48
|
+
style: z.number().min(0).max(1).optional(),
|
|
49
|
+
useSpeakerBoost: z.boolean().optional(),
|
|
50
|
+
speed: z.number().positive().optional(),
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
const PromptAudioParamsSchema = z.object({
|
|
54
|
+
enabled: z.boolean().optional(),
|
|
55
|
+
provider: z.string().optional(),
|
|
56
|
+
voiceId: z.string().optional(),
|
|
57
|
+
model: z.string().optional(),
|
|
58
|
+
outputFormat: z.string().optional(),
|
|
59
|
+
voiceSettings: PromptAudioVoiceSettingsSchema.optional(),
|
|
60
|
+
previousText: z.string().optional(),
|
|
61
|
+
nextText: z.string().optional(),
|
|
62
|
+
languageCode: z.string().optional(),
|
|
63
|
+
seed: z.number().int().optional(),
|
|
64
|
+
operationTitle: z.string().optional(),
|
|
65
|
+
operationGroup: z.string().optional(),
|
|
66
|
+
})
|
|
67
|
+
|
|
45
68
|
const GenericPromptRequestSchema = z.object({
|
|
46
69
|
systemPrompt: z.string().min(1),
|
|
47
70
|
userInput: z.string().optional(),
|
|
@@ -57,6 +80,7 @@ const GenericPromptRequestSchema = z.object({
|
|
|
57
80
|
model: z.string().optional(),
|
|
58
81
|
operationTitle: z.string().optional(),
|
|
59
82
|
operationGroup: z.string().optional(),
|
|
83
|
+
audioResponse: z.union([z.boolean(), PromptAudioParamsSchema]).optional(),
|
|
60
84
|
})
|
|
61
85
|
|
|
62
86
|
class AIGenericController extends CommonController {
|
|
@@ -35,6 +35,29 @@ const PromptInputFileSchema = z.object({
|
|
|
35
35
|
url: z.string().optional(),
|
|
36
36
|
});
|
|
37
37
|
|
|
38
|
+
const PromptAudioVoiceSettingsSchema = z.object({
|
|
39
|
+
stability: z.number().min(0).max(1).optional(),
|
|
40
|
+
similarityBoost: z.number().min(0).max(1).optional(),
|
|
41
|
+
style: z.number().min(0).max(1).optional(),
|
|
42
|
+
useSpeakerBoost: z.boolean().optional(),
|
|
43
|
+
speed: z.number().positive().optional(),
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
const PromptAudioParamsSchema = z.object({
|
|
47
|
+
enabled: z.boolean().optional(),
|
|
48
|
+
provider: z.string().optional(),
|
|
49
|
+
voiceId: z.string().optional(),
|
|
50
|
+
model: z.string().optional(),
|
|
51
|
+
outputFormat: z.string().optional(),
|
|
52
|
+
voiceSettings: PromptAudioVoiceSettingsSchema.optional(),
|
|
53
|
+
previousText: z.string().optional(),
|
|
54
|
+
nextText: z.string().optional(),
|
|
55
|
+
languageCode: z.string().optional(),
|
|
56
|
+
seed: z.number().int().optional(),
|
|
57
|
+
operationTitle: z.string().optional(),
|
|
58
|
+
operationGroup: z.string().optional(),
|
|
59
|
+
});
|
|
60
|
+
|
|
38
61
|
const AgentSessionRequestSchema = z.object({
|
|
39
62
|
identifier: z.string().min(1).optional(),
|
|
40
63
|
sessionId: z.string().optional(),
|
|
@@ -55,6 +78,7 @@ const AgentMessageRequestSchema = AgentSessionRequestSchema.extend({
|
|
|
55
78
|
toolMaxIterations: z.number().optional(),
|
|
56
79
|
operationTitle: z.string().optional(),
|
|
57
80
|
operationGroup: z.string().optional(),
|
|
81
|
+
audioResponse: z.union([z.boolean(), PromptAudioParamsSchema]).optional(),
|
|
58
82
|
});
|
|
59
83
|
|
|
60
84
|
class DraxAgentController extends CommonController {
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import {CommonController} from "@drax/common-back";
|
|
2
|
+
import TTSPermissions from "../permissions/TTSPermissions.js";
|
|
3
|
+
import {TTSRequestSchema} from "../schemas/TTSRequestSchema.js";
|
|
4
|
+
import TTSGenericService from "../services/TTSGenericService.js";
|
|
5
|
+
|
|
6
|
+
class TTSGenericController extends CommonController {
|
|
7
|
+
protected service: TTSGenericService
|
|
8
|
+
|
|
9
|
+
constructor(service: TTSGenericService = new TTSGenericService()) {
|
|
10
|
+
super()
|
|
11
|
+
this.service = service
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
async availableProviders(request, reply) {
|
|
15
|
+
try {
|
|
16
|
+
request.rbac.assertPermission(TTSPermissions.TextToSpeech)
|
|
17
|
+
|
|
18
|
+
return reply.send({
|
|
19
|
+
providers: this.service.availableProviders(),
|
|
20
|
+
})
|
|
21
|
+
} catch (e: any) {
|
|
22
|
+
this.handleError(e, reply)
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
async textToSpeech(request, reply) {
|
|
27
|
+
try {
|
|
28
|
+
request.rbac.assertPermission(TTSPermissions.TextToSpeech)
|
|
29
|
+
|
|
30
|
+
const input = TTSRequestSchema.parse(request.body ?? {})
|
|
31
|
+
const response = await this.service.textToSpeech(input, {
|
|
32
|
+
ip: request.ip,
|
|
33
|
+
userAgent: request.headers["user-agent"],
|
|
34
|
+
tenant: request.rbac?.tenantId ?? null,
|
|
35
|
+
user: request.rbac?.userId ?? null,
|
|
36
|
+
})
|
|
37
|
+
|
|
38
|
+
if (input.responseFormat === "base64") {
|
|
39
|
+
return reply.send({
|
|
40
|
+
audio: response.audio.toString("base64"),
|
|
41
|
+
contentType: response.contentType,
|
|
42
|
+
meta: {
|
|
43
|
+
provider: response.provider,
|
|
44
|
+
model: response.model,
|
|
45
|
+
voiceId: response.voiceId,
|
|
46
|
+
outputFormat: response.outputFormat,
|
|
47
|
+
size: response.size,
|
|
48
|
+
time: response.time,
|
|
49
|
+
},
|
|
50
|
+
})
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
return reply
|
|
54
|
+
.header("Content-Type", response.contentType)
|
|
55
|
+
.header("Content-Length", response.size)
|
|
56
|
+
.send(response.audio)
|
|
57
|
+
} catch (e: any) {
|
|
58
|
+
if (e?.name === "ZodError") {
|
|
59
|
+
return reply.status(400).send({
|
|
60
|
+
message: e?.message || "TTS validation error",
|
|
61
|
+
})
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
this.handleError(e, reply)
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export default TTSGenericController;
|
|
70
|
+
export {TTSGenericController};
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import type {IAIProvider} from "
|
|
1
|
+
import type {IAIProvider} from "../../interfaces/IAIProvider.js"
|
|
2
2
|
import OpenAiProviderFactory from "./OpenAiProviderFactory.js";
|
|
3
3
|
import GoogleAiProviderFactory from "./GoogleAiProviderFactory.js";
|
|
4
4
|
import OllamaAiProviderFactory from "./OllamaAiProviderFactory.js";
|
|
5
|
-
import
|
|
5
|
+
import DeepSeekAiProviderFactory from "./DeepSeekAiProviderFactory.js";
|
|
6
6
|
|
|
7
7
|
class AiProviderFactory {
|
|
8
8
|
private static singletons: Record<string, IAIProvider> = {};
|
|
@@ -20,7 +20,7 @@ class AiProviderFactory {
|
|
|
20
20
|
AiProviderFactory.singletons[provider] = OllamaAiProviderFactory.instance()
|
|
21
21
|
break;
|
|
22
22
|
case 'DeepSeek':
|
|
23
|
-
AiProviderFactory.singletons[provider] =
|
|
23
|
+
AiProviderFactory.singletons[provider] = DeepSeekAiProviderFactory.instance()
|
|
24
24
|
break;
|
|
25
25
|
default:
|
|
26
26
|
throw new Error(`Unsupported AI provider: ${provider}`);
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import {DraxConfig} from "@drax/common-back";
|
|
2
|
+
import DeepSeekConfig from "../../config/DeepSeekConfig.js";
|
|
3
|
+
import type {IAIProvider} from "../../interfaces/IAIProvider.js"
|
|
4
|
+
import DeepSeekAiProvider from "../../providers/ai/DeepSeekAiProvider.js";
|
|
5
|
+
import AILogServiceFactory from "../services/AILogServiceFactory.js";
|
|
6
|
+
|
|
7
|
+
class DeepSeekAiProviderFactory {
|
|
8
|
+
private static singleton: IAIProvider;
|
|
9
|
+
|
|
10
|
+
public static instance(): IAIProvider {
|
|
11
|
+
if (!DeepSeekAiProviderFactory.singleton) {
|
|
12
|
+
DeepSeekAiProviderFactory.singleton = new DeepSeekAiProvider(
|
|
13
|
+
DraxConfig.getOrLoad(DeepSeekConfig.DeepSeekApiKey),
|
|
14
|
+
DraxConfig.getOrLoad(DeepSeekConfig.DeepSeekModel),
|
|
15
|
+
DraxConfig.getOrLoad(DeepSeekConfig.DeepSeekBaseUrl, "string", "https://api.deepseek.com"),
|
|
16
|
+
DraxConfig.getOrLoad(DeepSeekConfig.DeepSeekVisionModel),
|
|
17
|
+
AILogServiceFactory.instance
|
|
18
|
+
);
|
|
19
|
+
}
|
|
20
|
+
return DeepSeekAiProviderFactory.singleton;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export default DeepSeekAiProviderFactory
|
|
25
|
+
export {
|
|
26
|
+
DeepSeekAiProviderFactory
|
|
27
|
+
}
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import {DraxConfig} from "@drax/common-back";
|
|
2
|
-
import GoogleAiConfig from "
|
|
3
|
-
import type {IAIProvider} from "
|
|
4
|
-
import GoogleAiProvider from "
|
|
5
|
-
import AILogServiceFactory from "
|
|
2
|
+
import GoogleAiConfig from "../../config/GoogleAiConfig.js";
|
|
3
|
+
import type {IAIProvider} from "../../interfaces/IAIProvider.js"
|
|
4
|
+
import GoogleAiProvider from "../../providers/ai/GoogleAiProvider.js";
|
|
5
|
+
import AILogServiceFactory from "../services/AILogServiceFactory.js";
|
|
6
6
|
|
|
7
7
|
class GoogleAiProviderFactory {
|
|
8
8
|
private static singleton: IAIProvider;
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import {DraxConfig} from "@drax/common-back";
|
|
2
|
-
import OllamaAiConfig from "
|
|
3
|
-
import type {IAIProvider} from "
|
|
4
|
-
import OllamaAiProvider from "
|
|
5
|
-
import AILogServiceFactory from "
|
|
2
|
+
import OllamaAiConfig from "../../config/OllamaAiConfig.js";
|
|
3
|
+
import type {IAIProvider} from "../../interfaces/IAIProvider.js"
|
|
4
|
+
import OllamaAiProvider from "../../providers/ai/OllamaAiProvider.js";
|
|
5
|
+
import AILogServiceFactory from "../services/AILogServiceFactory.js";
|
|
6
6
|
|
|
7
7
|
class OllamaAiProviderFactory {
|
|
8
8
|
private static singleton: IAIProvider;
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import {DraxConfig} from "@drax/common-back";
|
|
2
|
-
import OpenAiConfig from "
|
|
3
|
-
import type {IAIProvider} from "
|
|
4
|
-
import OpenAiProvider from "
|
|
5
|
-
import AILogServiceFactory from "
|
|
2
|
+
import OpenAiConfig from "../../config/OpenAiConfig.js";
|
|
3
|
+
import type {IAIProvider} from "../../interfaces/IAIProvider.js"
|
|
4
|
+
import OpenAiProvider from "../../providers/ai/OpenAiProvider.js";
|
|
5
|
+
import AILogServiceFactory from "../services/AILogServiceFactory.js";
|
|
6
6
|
|
|
7
7
|
class OpenAiProviderFactory {
|
|
8
8
|
private static singleton: IAIProvider;
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import {DraxConfig} from "@drax/common-back";
|
|
2
|
+
import ElevenLabsTTSConfig from "../../config/ElevenLabsTTSConfig.js";
|
|
3
|
+
import type {ITTSProvider} from "../../interfaces/ITTSProvider.js";
|
|
4
|
+
import ElevenLabsTTSProvider from "../../providers/tts/ElevenLabsTTSProvider.js";
|
|
5
|
+
|
|
6
|
+
class ElevenLabsTTSProviderFactory {
|
|
7
|
+
private static singleton: ITTSProvider;
|
|
8
|
+
|
|
9
|
+
public static instance(): ITTSProvider {
|
|
10
|
+
if (!ElevenLabsTTSProviderFactory.singleton) {
|
|
11
|
+
ElevenLabsTTSProviderFactory.singleton = new ElevenLabsTTSProvider(
|
|
12
|
+
DraxConfig.getOrLoad(ElevenLabsTTSConfig.ElevenLabsApiKey),
|
|
13
|
+
DraxConfig.getOrLoad(ElevenLabsTTSConfig.ElevenLabsModel, "string", "eleven_multilingual_v2"),
|
|
14
|
+
DraxConfig.getOrLoad(ElevenLabsTTSConfig.ElevenLabsVoiceId),
|
|
15
|
+
DraxConfig.getOrLoad(ElevenLabsTTSConfig.ElevenLabsBaseUrl, "string", "https://api.elevenlabs.io"),
|
|
16
|
+
DraxConfig.getOrLoad(ElevenLabsTTSConfig.ElevenLabsOutputFormat, "string", "mp3_44100_128"),
|
|
17
|
+
);
|
|
18
|
+
}
|
|
19
|
+
return ElevenLabsTTSProviderFactory.singleton;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export default ElevenLabsTTSProviderFactory
|
|
24
|
+
export {
|
|
25
|
+
ElevenLabsTTSProviderFactory
|
|
26
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import type {ITTSProvider} from "../../interfaces/ITTSProvider.js";
|
|
2
|
+
import ElevenLabsTTSProviderFactory from "./ElevenLabsTTSProviderFactory.js";
|
|
3
|
+
|
|
4
|
+
type TTSProviderInfo = {
|
|
5
|
+
name: string;
|
|
6
|
+
label: string;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
class TTSProviderFactory {
|
|
10
|
+
private static singletons: Record<string, ITTSProvider> = {};
|
|
11
|
+
private static providers: TTSProviderInfo[] = [
|
|
12
|
+
{
|
|
13
|
+
name: "ElevenLabs",
|
|
14
|
+
label: "ElevenLabs",
|
|
15
|
+
},
|
|
16
|
+
];
|
|
17
|
+
|
|
18
|
+
public static availableProviders(): TTSProviderInfo[] {
|
|
19
|
+
return TTSProviderFactory.providers
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
public static instance(provider: string = "ElevenLabs"): ITTSProvider {
|
|
23
|
+
if (!TTSProviderFactory.singletons[provider]) {
|
|
24
|
+
switch (provider) {
|
|
25
|
+
case "ElevenLabs":
|
|
26
|
+
TTSProviderFactory.singletons[provider] = ElevenLabsTTSProviderFactory.instance()
|
|
27
|
+
break;
|
|
28
|
+
default:
|
|
29
|
+
throw new Error(`Unsupported TTS provider: ${provider}`);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
return TTSProviderFactory.singletons[provider];
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export default TTSProviderFactory
|
|
37
|
+
export {
|
|
38
|
+
TTSProviderFactory
|
|
39
|
+
}
|
|
40
|
+
export type {
|
|
41
|
+
TTSProviderInfo,
|
|
42
|
+
}
|