@drax/ai-back 3.41.0 → 3.43.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/DraxAgent.js +3 -1
- package/dist/config/ElevenLabsTTSConfig.js +10 -0
- package/dist/controllers/AICrudController.js +1 -1
- package/dist/controllers/AIGenericController.js +23 -1
- package/dist/controllers/DraxAgentController.js +22 -0
- package/dist/controllers/TTSGenericController.js +61 -0
- package/dist/factory/ElevenLabsTTSProviderFactory.js +13 -0
- package/dist/factory/TTSProviderFactory.js +27 -0
- package/dist/factory/ai/AiProviderFactory.js +30 -0
- package/dist/factory/ai/DeepSeekAiProviderFactory.js +14 -0
- package/dist/factory/ai/GoogleAiProviderFactory.js +14 -0
- package/dist/factory/ai/OllamaAiProviderFactory.js +14 -0
- package/dist/factory/ai/OpenAiProviderFactory.js +14 -0
- package/dist/factory/tts/ElevenLabsTTSProviderFactory.js +13 -0
- package/dist/factory/tts/TTSProviderFactory.js +27 -0
- package/dist/index.js +23 -13
- package/dist/interfaces/ITTSProvider.js +1 -0
- package/dist/permissions/TTSPermissions.js +6 -0
- package/dist/providers/ElevenLabsTTSProvider.js +108 -0
- package/dist/providers/ai/DeepSeekAiProvider.js +34 -0
- package/dist/providers/ai/GoogleAiProvider.js +370 -0
- package/dist/providers/ai/OllamaAiProvider.js +345 -0
- package/dist/providers/ai/OpenAiProvider.js +305 -0
- package/dist/providers/tts/ElevenLabsTTSProvider.js +108 -0
- package/dist/routes/TTSRoutes.js +8 -0
- package/dist/schemas/TTSRequestSchema.js +24 -0
- package/dist/services/PromptAudioService.js +68 -0
- package/dist/services/TTSGenericService.js +21 -0
- package/package.json +3 -3
- package/src/agents/DraxAgent.ts +3 -1
- package/src/config/ElevenLabsTTSConfig.ts +13 -0
- package/src/controllers/AICrudController.ts +1 -1
- package/src/controllers/AIGenericController.ts +25 -1
- package/src/controllers/DraxAgentController.ts +24 -0
- package/src/controllers/TTSGenericController.ts +70 -0
- package/src/factory/{AiProviderFactory.ts → ai/AiProviderFactory.ts} +3 -3
- package/src/factory/ai/DeepSeekAiProviderFactory.ts +27 -0
- package/src/factory/{GoogleAiProviderFactory.ts → ai/GoogleAiProviderFactory.ts} +4 -4
- package/src/factory/{OllamaAiProviderFactory.ts → ai/OllamaAiProviderFactory.ts} +4 -4
- package/src/factory/{OpenAiProviderFactory.ts → ai/OpenAiProviderFactory.ts} +4 -4
- package/src/factory/tts/ElevenLabsTTSProviderFactory.ts +26 -0
- package/src/factory/tts/TTSProviderFactory.ts +42 -0
- package/src/index.ts +60 -11
- package/src/interfaces/IAIProvider.ts +38 -1
- package/src/interfaces/IDraxAgent.ts +4 -0
- package/src/interfaces/ITTSProvider.ts +47 -0
- package/src/permissions/AIPermissions.ts +0 -1
- package/src/permissions/TTSPermissions.ts +8 -0
- package/src/providers/{DeepSeekProvider.ts → ai/DeepSeekAiProvider.ts} +5 -5
- package/src/providers/{GoogleAiProvider.ts → ai/GoogleAiProvider.ts} +6 -3
- package/src/providers/{OllamaAiProvider.ts → ai/OllamaAiProvider.ts} +6 -3
- package/src/providers/{OpenAiProvider.ts → ai/OpenAiProvider.ts} +6 -3
- package/src/providers/tts/ElevenLabsTTSProvider.ts +132 -0
- package/src/routes/TTSRoutes.ts +13 -0
- package/src/schemas/TTSRequestSchema.ts +38 -0
- package/src/services/PromptAudioService.ts +87 -0
- package/src/services/TTSGenericService.ts +41 -0
- package/test/DeepSeekProvider.test.ts +4 -4
- package/test/DraxAgent.test.ts +64 -0
- package/test/PromptAudioService.test.ts +115 -0
- package/tsconfig.tsbuildinfo +1 -1
- package/types/agents/DraxAgent.d.ts.map +1 -1
- package/types/config/ElevenLabsTTSConfig.d.ts +10 -0
- package/types/config/ElevenLabsTTSConfig.d.ts.map +1 -0
- package/types/controllers/AIGenericController.d.ts.map +1 -1
- package/types/controllers/DraxAgentController.d.ts.map +1 -1
- package/types/controllers/TTSGenericController.d.ts +11 -0
- package/types/controllers/TTSGenericController.d.ts.map +1 -0
- package/types/factory/ElevenLabsTTSProviderFactory.d.ts +8 -0
- package/types/factory/ElevenLabsTTSProviderFactory.d.ts.map +1 -0
- package/types/factory/TTSProviderFactory.d.ts +15 -0
- package/types/factory/TTSProviderFactory.d.ts.map +1 -0
- package/types/factory/ai/AiProviderFactory.d.ts +8 -0
- package/types/factory/ai/AiProviderFactory.d.ts.map +1 -0
- package/types/factory/ai/DeepSeekAiProviderFactory.d.ts +8 -0
- package/types/factory/ai/DeepSeekAiProviderFactory.d.ts.map +1 -0
- package/types/factory/ai/GoogleAiProviderFactory.d.ts +8 -0
- package/types/factory/ai/GoogleAiProviderFactory.d.ts.map +1 -0
- package/types/factory/ai/OllamaAiProviderFactory.d.ts +8 -0
- package/types/factory/ai/OllamaAiProviderFactory.d.ts.map +1 -0
- package/types/factory/ai/OpenAiProviderFactory.d.ts +8 -0
- package/types/factory/ai/OpenAiProviderFactory.d.ts.map +1 -0
- package/types/factory/tts/ElevenLabsTTSProviderFactory.d.ts +8 -0
- package/types/factory/tts/ElevenLabsTTSProviderFactory.d.ts.map +1 -0
- package/types/factory/tts/TTSProviderFactory.d.ts +15 -0
- package/types/factory/tts/TTSProviderFactory.d.ts.map +1 -0
- package/types/index.d.ts +26 -12
- package/types/index.d.ts.map +1 -1
- package/types/interfaces/IAIProvider.d.ts +32 -1
- package/types/interfaces/IAIProvider.d.ts.map +1 -1
- package/types/interfaces/IDraxAgent.d.ts +3 -1
- package/types/interfaces/IDraxAgent.d.ts.map +1 -1
- package/types/interfaces/ITTSProvider.d.ts +39 -0
- package/types/interfaces/ITTSProvider.d.ts.map +1 -0
- package/types/permissions/TTSPermissions.d.ts +6 -0
- package/types/permissions/TTSPermissions.d.ts.map +1 -0
- package/types/providers/ElevenLabsTTSProvider.d.ts +38 -0
- package/types/providers/ElevenLabsTTSProvider.d.ts.map +1 -0
- package/types/providers/ai/DeepSeekAiProvider.d.ts +24 -0
- package/types/providers/ai/DeepSeekAiProvider.d.ts.map +1 -0
- package/types/providers/ai/GoogleAiProvider.d.ts +63 -0
- package/types/providers/ai/GoogleAiProvider.d.ts.map +1 -0
- package/types/providers/ai/OllamaAiProvider.d.ts +78 -0
- package/types/providers/ai/OllamaAiProvider.d.ts.map +1 -0
- package/types/providers/ai/OpenAiProvider.d.ts +97 -0
- package/types/providers/ai/OpenAiProvider.d.ts.map +1 -0
- package/types/providers/tts/ElevenLabsTTSProvider.d.ts +38 -0
- package/types/providers/tts/ElevenLabsTTSProvider.d.ts.map +1 -0
- package/types/routes/TTSRoutes.d.ts +4 -0
- package/types/routes/TTSRoutes.d.ts.map +1 -0
- package/types/schemas/TTSRequestSchema.d.ts +37 -0
- package/types/schemas/TTSRequestSchema.d.ts.map +1 -0
- package/types/services/PromptAudioService.d.ts +9 -0
- package/types/services/PromptAudioService.d.ts.map +1 -0
- package/types/services/TTSGenericService.d.ts +17 -0
- package/types/services/TTSGenericService.d.ts.map +1 -0
- package/src/factory/DeepSeekProviderFactory.ts +0 -27
package/src/index.ts
CHANGED
|
@@ -2,35 +2,46 @@ import {OpenAiConfig} from "./config/OpenAiConfig.js";
|
|
|
2
2
|
import {GoogleAiConfig} from "./config/GoogleAiConfig.js";
|
|
3
3
|
import {OllamaAiConfig} from "./config/OllamaAiConfig.js";
|
|
4
4
|
import {DeepSeekConfig} from "./config/DeepSeekConfig.js";
|
|
5
|
+
import {ElevenLabsTTSConfig} from "./config/ElevenLabsTTSConfig.js";
|
|
5
6
|
import {AILogSchema, AILogBaseSchema} from "./schemas/AILogSchema.js";
|
|
7
|
+
import {TTSRequestSchema, TTSVoiceSettingsSchema} from "./schemas/TTSRequestSchema.js";
|
|
6
8
|
import AILogModel from "./models/AILogModel.js";
|
|
7
9
|
import AILogMongoRepository from "./repository/mongo/AILogMongoRepository.js";
|
|
8
10
|
import AILogSqliteRepository from "./repository/sqlite/AILogSqliteRepository.js";
|
|
9
|
-
import {OpenAiProviderFactory} from "./factory/OpenAiProviderFactory.js";
|
|
10
|
-
import {GoogleAiProviderFactory} from "./factory/GoogleAiProviderFactory.js";
|
|
11
|
-
import {OllamaAiProviderFactory} from "./factory/OllamaAiProviderFactory.js";
|
|
12
|
-
import {
|
|
13
|
-
import {AiProviderFactory} from "./factory/AiProviderFactory.js";
|
|
11
|
+
import {OpenAiProviderFactory} from "./factory/ai/OpenAiProviderFactory.js";
|
|
12
|
+
import {GoogleAiProviderFactory} from "./factory/ai/GoogleAiProviderFactory.js";
|
|
13
|
+
import {OllamaAiProviderFactory} from "./factory/ai/OllamaAiProviderFactory.js";
|
|
14
|
+
import {DeepSeekAiProviderFactory} from "./factory/ai/DeepSeekAiProviderFactory.js";
|
|
15
|
+
import {AiProviderFactory} from "./factory/ai/AiProviderFactory.js";
|
|
16
|
+
import {ElevenLabsTTSProviderFactory} from "./factory/tts/ElevenLabsTTSProviderFactory.js";
|
|
17
|
+
import {TTSProviderFactory} from "./factory/tts/TTSProviderFactory.js";
|
|
18
|
+
import type {TTSProviderInfo} from "./factory/tts/TTSProviderFactory.js";
|
|
14
19
|
import {DraxAgentFactory} from "./factory/DraxAgentFactory.js";
|
|
15
20
|
import AILogServiceFactory from "./factory/services/AILogServiceFactory.js";
|
|
16
|
-
import {OpenAiProvider} from "./providers/OpenAiProvider.js";
|
|
17
|
-
import {GoogleAiProvider} from "./providers/GoogleAiProvider.js";
|
|
18
|
-
import {OllamaAiProvider} from "./providers/OllamaAiProvider.js";
|
|
19
|
-
import {
|
|
21
|
+
import {OpenAiProvider} from "./providers/ai/OpenAiProvider.js";
|
|
22
|
+
import {GoogleAiProvider} from "./providers/ai/GoogleAiProvider.js";
|
|
23
|
+
import {OllamaAiProvider} from "./providers/ai/OllamaAiProvider.js";
|
|
24
|
+
import {DeepSeekAiProvider} from "./providers/ai/DeepSeekAiProvider.js";
|
|
25
|
+
import {ElevenLabsTTSProvider} from "./providers/tts/ElevenLabsTTSProvider.js";
|
|
20
26
|
import {BuilderTool} from "./tools/BuilderTool.js";
|
|
21
27
|
import {KnowledgeService} from "./services/KnowledgeService.js";
|
|
22
28
|
import {AILogService} from "./services/AILogService.js";
|
|
29
|
+
import {TTSGenericService} from "./services/TTSGenericService.js";
|
|
30
|
+
import {PromptAudioService} from "./services/PromptAudioService.js";
|
|
23
31
|
import AILogPermissions from "./permissions/AILogPermissions.js";
|
|
24
32
|
import AgentPermissions from "./permissions/AgentPermissions.js";
|
|
25
33
|
import AgentSessionPermissions from "./permissions/AgentSessionPermissions.js";
|
|
26
34
|
import AIPermissions from "./permissions/AIPermissions.js";
|
|
35
|
+
import TTSPermissions from "./permissions/TTSPermissions.js";
|
|
27
36
|
import AILogController from "./controllers/AILogController.js";
|
|
28
37
|
import AICrudController from "./controllers/AICrudController.js";
|
|
29
38
|
import AIGenericController from "./controllers/AIGenericController.js";
|
|
39
|
+
import TTSGenericController from "./controllers/TTSGenericController.js";
|
|
30
40
|
import DraxAgentController from "./controllers/DraxAgentController.js";
|
|
31
41
|
import AgentSessionController from "./controllers/AgentSessionController.js";
|
|
32
42
|
import AILogRoutes from "./routes/AILogRoutes.js";
|
|
33
43
|
import AIRoutes from "./routes/AIRoutes.js";
|
|
44
|
+
import TTSRoutes from "./routes/TTSRoutes.js";
|
|
34
45
|
import DraxAgentRoutes from "./routes/DraxAgentRoutes.js";
|
|
35
46
|
import AgentSessionRoutes from "./routes/AgentSessionRoutes.js";
|
|
36
47
|
import {DraxAgent} from "./agents/DraxAgent.js";
|
|
@@ -40,6 +51,9 @@ import type {
|
|
|
40
51
|
IPromptContentPart,
|
|
41
52
|
IPromptContentPartImage,
|
|
42
53
|
IPromptContentPartText,
|
|
54
|
+
IPromptAudioParams,
|
|
55
|
+
IPromptAudioResponse,
|
|
56
|
+
IPromptAudioResponseMeta,
|
|
43
57
|
IPromptImage,
|
|
44
58
|
IPromptImageDetail,
|
|
45
59
|
IPromptMessage,
|
|
@@ -48,6 +62,19 @@ import type {
|
|
|
48
62
|
IPromptResponse,
|
|
49
63
|
IPromptTool
|
|
50
64
|
} from "./interfaces/IAIProvider.js";
|
|
65
|
+
import type {
|
|
66
|
+
ITTSParams,
|
|
67
|
+
ITTSProvider,
|
|
68
|
+
ITTSResponse,
|
|
69
|
+
ITTSVoiceSettings,
|
|
70
|
+
} from "./interfaces/ITTSProvider.js";
|
|
71
|
+
import type {
|
|
72
|
+
TTSRequest,
|
|
73
|
+
TTSVoiceSettings,
|
|
74
|
+
} from "./schemas/TTSRequestSchema.js";
|
|
75
|
+
import type {
|
|
76
|
+
TTSRequestContext,
|
|
77
|
+
} from "./services/TTSGenericService.js";
|
|
51
78
|
import type {
|
|
52
79
|
ToolBuilderMethod,
|
|
53
80
|
ToolBuilderOptions,
|
|
@@ -85,7 +112,18 @@ export type {
|
|
|
85
112
|
IPromptContentPart,
|
|
86
113
|
IPromptContentPartImage,
|
|
87
114
|
IPromptContentPartText,
|
|
115
|
+
IPromptAudioParams,
|
|
116
|
+
IPromptAudioResponse,
|
|
117
|
+
IPromptAudioResponseMeta,
|
|
88
118
|
IPromptResponse,
|
|
119
|
+
ITTSProvider,
|
|
120
|
+
ITTSParams,
|
|
121
|
+
ITTSResponse,
|
|
122
|
+
ITTSVoiceSettings,
|
|
123
|
+
TTSRequest,
|
|
124
|
+
TTSVoiceSettings,
|
|
125
|
+
TTSRequestContext,
|
|
126
|
+
TTSProviderInfo,
|
|
89
127
|
ToolBuilderMethod,
|
|
90
128
|
ToolBuilderOptions,
|
|
91
129
|
ToolBuilderService,
|
|
@@ -108,40 +146,51 @@ export {
|
|
|
108
146
|
GoogleAiConfig,
|
|
109
147
|
OllamaAiConfig,
|
|
110
148
|
DeepSeekConfig,
|
|
149
|
+
ElevenLabsTTSConfig,
|
|
111
150
|
AILogSchema,
|
|
112
151
|
AILogBaseSchema,
|
|
152
|
+
TTSRequestSchema,
|
|
153
|
+
TTSVoiceSettingsSchema,
|
|
113
154
|
AILogModel,
|
|
114
155
|
AILogMongoRepository,
|
|
115
156
|
AILogSqliteRepository,
|
|
116
157
|
OpenAiProviderFactory,
|
|
117
158
|
GoogleAiProviderFactory,
|
|
118
159
|
OllamaAiProviderFactory,
|
|
119
|
-
|
|
160
|
+
DeepSeekAiProviderFactory,
|
|
120
161
|
AiProviderFactory,
|
|
162
|
+
ElevenLabsTTSProviderFactory,
|
|
163
|
+
TTSProviderFactory,
|
|
121
164
|
DraxAgentFactory,
|
|
122
165
|
AILogServiceFactory,
|
|
123
166
|
OpenAiProvider,
|
|
124
167
|
GoogleAiProvider,
|
|
125
168
|
OllamaAiProvider,
|
|
126
|
-
|
|
169
|
+
DeepSeekAiProvider,
|
|
170
|
+
ElevenLabsTTSProvider,
|
|
127
171
|
BuilderTool,
|
|
128
172
|
//Service
|
|
129
173
|
KnowledgeService,
|
|
130
174
|
AILogService,
|
|
175
|
+
TTSGenericService,
|
|
176
|
+
PromptAudioService,
|
|
131
177
|
//Permissions
|
|
132
178
|
AILogPermissions,
|
|
133
179
|
AgentPermissions,
|
|
134
180
|
AIPermissions,
|
|
181
|
+
TTSPermissions,
|
|
135
182
|
AgentSessionPermissions,
|
|
136
183
|
//Controllers
|
|
137
184
|
AILogController,
|
|
138
185
|
AICrudController,
|
|
139
186
|
AIGenericController,
|
|
187
|
+
TTSGenericController,
|
|
140
188
|
DraxAgentController,
|
|
141
189
|
AgentSessionController,
|
|
142
190
|
DraxAgent,
|
|
143
191
|
AILogRoutes,
|
|
144
192
|
AIRoutes,
|
|
193
|
+
TTSRoutes,
|
|
145
194
|
DraxAgentRoutes,
|
|
146
195
|
AgentSessionRoutes
|
|
147
196
|
}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { ZodSchema } from 'zod'
|
|
2
|
+
import type {ITTSVoiceSettings} from "./ITTSProvider.js";
|
|
2
3
|
|
|
3
4
|
type Role = 'user' | 'assistant' | 'system';
|
|
4
5
|
|
|
@@ -74,6 +75,38 @@ interface IPromptParams {
|
|
|
74
75
|
userAgent?: string,
|
|
75
76
|
tenant?: string | null,
|
|
76
77
|
user?: string | null,
|
|
78
|
+
audioResponse?: boolean | IPromptAudioParams,
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
interface IPromptAudioParams {
|
|
82
|
+
enabled?: boolean,
|
|
83
|
+
provider?: string,
|
|
84
|
+
voiceId?: string,
|
|
85
|
+
model?: string,
|
|
86
|
+
outputFormat?: string,
|
|
87
|
+
voiceSettings?: ITTSVoiceSettings,
|
|
88
|
+
previousText?: string,
|
|
89
|
+
nextText?: string,
|
|
90
|
+
languageCode?: string,
|
|
91
|
+
seed?: number,
|
|
92
|
+
operationTitle?: string,
|
|
93
|
+
operationGroup?: string,
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
interface IPromptAudioResponseMeta {
|
|
97
|
+
provider: string,
|
|
98
|
+
model: string,
|
|
99
|
+
voiceId: string,
|
|
100
|
+
outputFormat?: string,
|
|
101
|
+
size: number,
|
|
102
|
+
time: number,
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
interface IPromptAudioResponse {
|
|
106
|
+
audio: string,
|
|
107
|
+
contentType: string,
|
|
108
|
+
encoding: 'base64',
|
|
109
|
+
meta: IPromptAudioResponseMeta,
|
|
77
110
|
}
|
|
78
111
|
|
|
79
112
|
interface IPromptResponse {
|
|
@@ -81,7 +114,8 @@ interface IPromptResponse {
|
|
|
81
114
|
tokens: number,
|
|
82
115
|
inputTokens: number,
|
|
83
116
|
outputTokens: number,
|
|
84
|
-
time: number
|
|
117
|
+
time: number,
|
|
118
|
+
audio?: IPromptAudioResponse,
|
|
85
119
|
}
|
|
86
120
|
|
|
87
121
|
interface IAIProvider {
|
|
@@ -92,6 +126,9 @@ export type {
|
|
|
92
126
|
IAIProvider,
|
|
93
127
|
IPromptParams,
|
|
94
128
|
IPromptResponse,
|
|
129
|
+
IPromptAudioParams,
|
|
130
|
+
IPromptAudioResponse,
|
|
131
|
+
IPromptAudioResponseMeta,
|
|
95
132
|
IPromptMessage,
|
|
96
133
|
IPromptMemory,
|
|
97
134
|
IPromptTool,
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import type {
|
|
2
2
|
IAIProvider,
|
|
3
|
+
IPromptAudioParams,
|
|
4
|
+
IPromptAudioResponse,
|
|
3
5
|
IPromptContentPart,
|
|
4
6
|
IPromptImage,
|
|
5
7
|
IPromptMemory,
|
|
@@ -67,6 +69,7 @@ interface DraxAgentMessageInput extends DraxAgentSessionInput {
|
|
|
67
69
|
toolMaxIterations?: number;
|
|
68
70
|
operationTitle?: string;
|
|
69
71
|
operationGroup?: string;
|
|
72
|
+
audioResponse?: boolean | IPromptAudioParams;
|
|
70
73
|
}
|
|
71
74
|
|
|
72
75
|
interface DraxAgentMessageOutput {
|
|
@@ -79,6 +82,7 @@ interface DraxAgentMessageOutput {
|
|
|
79
82
|
inputTokens: number;
|
|
80
83
|
outputTokens: number;
|
|
81
84
|
time: number;
|
|
85
|
+
audio?: IPromptAudioResponse;
|
|
82
86
|
}
|
|
83
87
|
|
|
84
88
|
interface DraxAgentSession {
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
interface ITTSVoiceSettings {
|
|
2
|
+
stability?: number;
|
|
3
|
+
similarityBoost?: number;
|
|
4
|
+
style?: number;
|
|
5
|
+
useSpeakerBoost?: boolean;
|
|
6
|
+
speed?: number;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
interface ITTSParams {
|
|
10
|
+
text: string;
|
|
11
|
+
voiceId?: string;
|
|
12
|
+
model?: string;
|
|
13
|
+
outputFormat?: string;
|
|
14
|
+
voiceSettings?: ITTSVoiceSettings;
|
|
15
|
+
previousText?: string;
|
|
16
|
+
nextText?: string;
|
|
17
|
+
languageCode?: string;
|
|
18
|
+
seed?: number;
|
|
19
|
+
operationTitle?: string;
|
|
20
|
+
operationGroup?: string;
|
|
21
|
+
ip?: string;
|
|
22
|
+
userAgent?: string;
|
|
23
|
+
tenant?: string | null;
|
|
24
|
+
user?: string | null;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
interface ITTSResponse {
|
|
28
|
+
audio: Buffer;
|
|
29
|
+
contentType: string;
|
|
30
|
+
size: number;
|
|
31
|
+
time: number;
|
|
32
|
+
provider: string;
|
|
33
|
+
model: string;
|
|
34
|
+
voiceId: string;
|
|
35
|
+
outputFormat?: string;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
interface ITTSProvider {
|
|
39
|
+
textToSpeech(input: ITTSParams): Promise<ITTSResponse>
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export type {
|
|
43
|
+
ITTSProvider,
|
|
44
|
+
ITTSParams,
|
|
45
|
+
ITTSResponse,
|
|
46
|
+
ITTSVoiceSettings,
|
|
47
|
+
}
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import OpenAI from "openai";
|
|
2
2
|
import type {IAILogBase} from "@drax/ai-share";
|
|
3
|
-
import type {IPromptParams} from "
|
|
4
|
-
import type {AILogService} from "
|
|
3
|
+
import type {IPromptParams} from "../../interfaces/IAIProvider.js";
|
|
4
|
+
import type {AILogService} from "../../services/AILogService.js";
|
|
5
5
|
import OpenAiProvider from "./OpenAiProvider.js";
|
|
6
6
|
|
|
7
|
-
class
|
|
7
|
+
class DeepSeekAiProvider extends OpenAiProvider{
|
|
8
8
|
protected _baseUrl: string
|
|
9
9
|
|
|
10
10
|
constructor(apiKey: string, model: string, baseUrl: string = "https://api.deepseek.com", visionModel?: string, aiLogService?: AILogService) {
|
|
@@ -54,5 +54,5 @@ class DeepSeekProvider extends OpenAiProvider{
|
|
|
54
54
|
}
|
|
55
55
|
}
|
|
56
56
|
|
|
57
|
-
export default
|
|
58
|
-
export {
|
|
57
|
+
export default DeepSeekAiProvider
|
|
58
|
+
export {DeepSeekAiProvider}
|
|
@@ -14,9 +14,10 @@ import type {
|
|
|
14
14
|
IPromptParams,
|
|
15
15
|
IPromptResponse,
|
|
16
16
|
IPromptTool
|
|
17
|
-
} from "
|
|
18
|
-
import type {AILogService} from "
|
|
17
|
+
} from "../../interfaces/IAIProvider.js";
|
|
18
|
+
import type {AILogService} from "../../services/AILogService.js";
|
|
19
19
|
import type {IAILogBase} from "@drax/ai-share";
|
|
20
|
+
import PromptAudioService from "../../services/PromptAudioService.js";
|
|
20
21
|
|
|
21
22
|
class GoogleAiProvider implements IAIProvider{
|
|
22
23
|
protected _apiKey: string
|
|
@@ -446,6 +447,7 @@ class GoogleAiProvider implements IAIProvider{
|
|
|
446
447
|
const endTime = performance.now()
|
|
447
448
|
const time = endTime - startTime
|
|
448
449
|
const endedAt = new Date()
|
|
450
|
+
const audio = await PromptAudioService.build(input, output)
|
|
449
451
|
|
|
450
452
|
await this.registerPromptLog(input, {
|
|
451
453
|
model,
|
|
@@ -464,7 +466,8 @@ class GoogleAiProvider implements IAIProvider{
|
|
|
464
466
|
tokens,
|
|
465
467
|
inputTokens,
|
|
466
468
|
outputTokens,
|
|
467
|
-
time
|
|
469
|
+
time,
|
|
470
|
+
...(audio ? {audio} : {}),
|
|
468
471
|
}
|
|
469
472
|
} catch (e: any) {
|
|
470
473
|
const endedAt = new Date()
|
|
@@ -6,9 +6,10 @@ import type {
|
|
|
6
6
|
IPromptParams,
|
|
7
7
|
IPromptResponse,
|
|
8
8
|
IPromptTool
|
|
9
|
-
} from "
|
|
10
|
-
import type {AILogService} from "
|
|
9
|
+
} from "../../interfaces/IAIProvider.js";
|
|
10
|
+
import type {AILogService} from "../../services/AILogService.js";
|
|
11
11
|
import type {IAILogBase} from "@drax/ai-share";
|
|
12
|
+
import PromptAudioService from "../../services/PromptAudioService.js";
|
|
12
13
|
|
|
13
14
|
type OllamaMessage = {
|
|
14
15
|
role: "system" | "user" | "assistant" | "tool",
|
|
@@ -426,6 +427,7 @@ class OllamaAiProvider implements IAIProvider{
|
|
|
426
427
|
const endTime = performance.now()
|
|
427
428
|
const time = endTime - startTime
|
|
428
429
|
const endedAt = new Date()
|
|
430
|
+
const audio = await PromptAudioService.build(input, output)
|
|
429
431
|
|
|
430
432
|
await this.registerPromptLog(input, {
|
|
431
433
|
model,
|
|
@@ -444,7 +446,8 @@ class OllamaAiProvider implements IAIProvider{
|
|
|
444
446
|
tokens,
|
|
445
447
|
inputTokens,
|
|
446
448
|
outputTokens,
|
|
447
|
-
time
|
|
449
|
+
time,
|
|
450
|
+
...(audio ? {audio} : {}),
|
|
448
451
|
}
|
|
449
452
|
} catch (e: any) {
|
|
450
453
|
const endedAt = new Date()
|
|
@@ -7,9 +7,10 @@ import type {
|
|
|
7
7
|
IPromptParams,
|
|
8
8
|
IPromptResponse,
|
|
9
9
|
IPromptTool
|
|
10
|
-
} from "
|
|
11
|
-
import type {AILogService} from "
|
|
10
|
+
} from "../../interfaces/IAIProvider.js";
|
|
11
|
+
import type {AILogService} from "../../services/AILogService.js";
|
|
12
12
|
import type {IAILogBase} from "@drax/ai-share";
|
|
13
|
+
import PromptAudioService from "../../services/PromptAudioService.js";
|
|
13
14
|
|
|
14
15
|
class OpenAiProvider implements IAIProvider{
|
|
15
16
|
protected _apiKey: string
|
|
@@ -358,6 +359,7 @@ class OpenAiProvider implements IAIProvider{
|
|
|
358
359
|
const endTime = performance.now()
|
|
359
360
|
const time = endTime - startTime
|
|
360
361
|
const endedAt = new Date()
|
|
362
|
+
const audio = await PromptAudioService.build(input, output)
|
|
361
363
|
|
|
362
364
|
await this.registerPromptLog(input, {
|
|
363
365
|
model,
|
|
@@ -376,7 +378,8 @@ class OpenAiProvider implements IAIProvider{
|
|
|
376
378
|
tokens,
|
|
377
379
|
inputTokens,
|
|
378
380
|
outputTokens,
|
|
379
|
-
time
|
|
381
|
+
time,
|
|
382
|
+
...(audio ? {audio} : {}),
|
|
380
383
|
}
|
|
381
384
|
} catch (e: any) {
|
|
382
385
|
const endedAt = new Date()
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import type {ITTSParams, ITTSProvider, ITTSResponse} from "../../interfaces/ITTSProvider.js";
|
|
2
|
+
|
|
3
|
+
class ElevenLabsTTSProvider implements ITTSProvider {
|
|
4
|
+
protected _apiKey: string
|
|
5
|
+
protected _baseUrl: string
|
|
6
|
+
protected _model: string
|
|
7
|
+
protected _voiceId: string
|
|
8
|
+
protected _outputFormat?: string
|
|
9
|
+
|
|
10
|
+
constructor(apiKey: string, model: string, voiceId: string, baseUrl: string = "https://api.elevenlabs.io", outputFormat?: string) {
|
|
11
|
+
if (!apiKey) {
|
|
12
|
+
throw new Error("ElevenLabs apiKey required")
|
|
13
|
+
}
|
|
14
|
+
if (!model) {
|
|
15
|
+
throw new Error("ElevenLabs model required")
|
|
16
|
+
}
|
|
17
|
+
if (!voiceId) {
|
|
18
|
+
throw new Error("ElevenLabs voiceId required")
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
this._apiKey = apiKey
|
|
22
|
+
this._model = model
|
|
23
|
+
this._voiceId = voiceId
|
|
24
|
+
this._baseUrl = baseUrl.replace(/\/+$/, "")
|
|
25
|
+
this._outputFormat = outputFormat
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
get model() {
|
|
29
|
+
if (!this._model) {
|
|
30
|
+
throw new Error("ElevenLabs model not found")
|
|
31
|
+
}
|
|
32
|
+
return this._model
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
get voiceId() {
|
|
36
|
+
if (!this._voiceId) {
|
|
37
|
+
throw new Error("ElevenLabs voiceId not found")
|
|
38
|
+
}
|
|
39
|
+
return this._voiceId
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
protected mapContentType(outputFormat?: string) {
|
|
43
|
+
if (!outputFormat) {
|
|
44
|
+
return "audio/mpeg"
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if (outputFormat.startsWith("mp3")) {
|
|
48
|
+
return "audio/mpeg"
|
|
49
|
+
}
|
|
50
|
+
if (outputFormat.startsWith("opus")) {
|
|
51
|
+
return "audio/ogg"
|
|
52
|
+
}
|
|
53
|
+
if (outputFormat.startsWith("pcm")) {
|
|
54
|
+
return "audio/wav"
|
|
55
|
+
}
|
|
56
|
+
if (outputFormat.startsWith("ulaw") || outputFormat.startsWith("alaw")) {
|
|
57
|
+
return "audio/basic"
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
return "application/octet-stream"
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
protected mapVoiceSettings(voiceSettings: ITTSParams["voiceSettings"]) {
|
|
64
|
+
if (!voiceSettings) {
|
|
65
|
+
return undefined
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
return {
|
|
69
|
+
stability: voiceSettings.stability,
|
|
70
|
+
similarity_boost: voiceSettings.similarityBoost,
|
|
71
|
+
style: voiceSettings.style,
|
|
72
|
+
use_speaker_boost: voiceSettings.useSpeakerBoost,
|
|
73
|
+
speed: voiceSettings.speed,
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
protected buildBody(input: ITTSParams, model: string) {
|
|
78
|
+
return {
|
|
79
|
+
text: input.text,
|
|
80
|
+
model_id: model,
|
|
81
|
+
...(input.voiceSettings ? {voice_settings: this.mapVoiceSettings(input.voiceSettings)} : {}),
|
|
82
|
+
...(input.previousText ? {previous_text: input.previousText} : {}),
|
|
83
|
+
...(input.nextText ? {next_text: input.nextText} : {}),
|
|
84
|
+
...(input.languageCode ? {language_code: input.languageCode} : {}),
|
|
85
|
+
...(input.seed !== undefined ? {seed: input.seed} : {}),
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
async textToSpeech(input: ITTSParams): Promise<ITTSResponse> {
|
|
90
|
+
const startedAt = Date.now()
|
|
91
|
+
const model = input.model ?? this.model
|
|
92
|
+
const voiceId = input.voiceId ?? this.voiceId
|
|
93
|
+
const outputFormat = input.outputFormat ?? this._outputFormat
|
|
94
|
+
const url = new URL(`${this._baseUrl}/v1/text-to-speech/${encodeURIComponent(voiceId)}`)
|
|
95
|
+
|
|
96
|
+
if (outputFormat) {
|
|
97
|
+
url.searchParams.set("output_format", outputFormat)
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
const response = await fetch(url, {
|
|
101
|
+
method: "POST",
|
|
102
|
+
headers: {
|
|
103
|
+
"Accept": this.mapContentType(outputFormat),
|
|
104
|
+
"Content-Type": "application/json",
|
|
105
|
+
"xi-api-key": this._apiKey,
|
|
106
|
+
},
|
|
107
|
+
body: JSON.stringify(this.buildBody(input, model)),
|
|
108
|
+
})
|
|
109
|
+
|
|
110
|
+
if (!response.ok) {
|
|
111
|
+
const errorText = await response.text()
|
|
112
|
+
throw new Error(`ElevenLabs TTS request failed (${response.status}): ${errorText}`)
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
const audio = Buffer.from(await response.arrayBuffer())
|
|
116
|
+
const contentType = response.headers.get("content-type") ?? this.mapContentType(outputFormat)
|
|
117
|
+
|
|
118
|
+
return {
|
|
119
|
+
audio,
|
|
120
|
+
contentType,
|
|
121
|
+
size: audio.byteLength,
|
|
122
|
+
time: Date.now() - startedAt,
|
|
123
|
+
provider: "elevenlabs",
|
|
124
|
+
model,
|
|
125
|
+
voiceId,
|
|
126
|
+
outputFormat,
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
export default ElevenLabsTTSProvider;
|
|
132
|
+
export {ElevenLabsTTSProvider}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import TTSGenericController from "../controllers/TTSGenericController.js";
|
|
2
|
+
|
|
3
|
+
async function TTSFastifyRoutes(fastify, options) {
|
|
4
|
+
|
|
5
|
+
const genericController: TTSGenericController = new TTSGenericController()
|
|
6
|
+
|
|
7
|
+
fastify.get('/api/tts/providers', (req,rep) => genericController.availableProviders(req,rep))
|
|
8
|
+
fastify.post('/api/tts', (req,rep) => genericController.textToSpeech(req,rep))
|
|
9
|
+
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export default TTSFastifyRoutes;
|
|
13
|
+
export {TTSFastifyRoutes}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import {z} from "zod";
|
|
2
|
+
|
|
3
|
+
const TTSVoiceSettingsSchema = z.object({
|
|
4
|
+
stability: z.number().min(0).max(1).optional(),
|
|
5
|
+
similarityBoost: z.number().min(0).max(1).optional(),
|
|
6
|
+
style: z.number().min(0).max(1).optional(),
|
|
7
|
+
useSpeakerBoost: z.boolean().optional(),
|
|
8
|
+
speed: z.number().positive().optional(),
|
|
9
|
+
})
|
|
10
|
+
|
|
11
|
+
const TTSRequestSchema = z.object({
|
|
12
|
+
text: z.string().min(1),
|
|
13
|
+
provider: z.string().default("ElevenLabs"),
|
|
14
|
+
voiceId: z.string().optional(),
|
|
15
|
+
model: z.string().optional(),
|
|
16
|
+
outputFormat: z.string().optional(),
|
|
17
|
+
voiceSettings: TTSVoiceSettingsSchema.optional(),
|
|
18
|
+
previousText: z.string().optional(),
|
|
19
|
+
nextText: z.string().optional(),
|
|
20
|
+
languageCode: z.string().optional(),
|
|
21
|
+
seed: z.number().int().optional(),
|
|
22
|
+
responseFormat: z.enum(["audio", "base64"]).default("audio"),
|
|
23
|
+
operationTitle: z.string().optional(),
|
|
24
|
+
operationGroup: z.string().optional(),
|
|
25
|
+
})
|
|
26
|
+
|
|
27
|
+
type TTSRequest = z.infer<typeof TTSRequestSchema>
|
|
28
|
+
type TTSVoiceSettings = z.infer<typeof TTSVoiceSettingsSchema>
|
|
29
|
+
|
|
30
|
+
export {
|
|
31
|
+
TTSRequestSchema,
|
|
32
|
+
TTSVoiceSettingsSchema,
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export type {
|
|
36
|
+
TTSRequest,
|
|
37
|
+
TTSVoiceSettings,
|
|
38
|
+
}
|