@drax/ai-back 3.41.0 → 3.43.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/dist/agents/DraxAgent.js +3 -1
  2. package/dist/config/ElevenLabsTTSConfig.js +10 -0
  3. package/dist/controllers/AICrudController.js +1 -1
  4. package/dist/controllers/AIGenericController.js +23 -1
  5. package/dist/controllers/DraxAgentController.js +22 -0
  6. package/dist/controllers/TTSGenericController.js +61 -0
  7. package/dist/factory/ElevenLabsTTSProviderFactory.js +13 -0
  8. package/dist/factory/TTSProviderFactory.js +27 -0
  9. package/dist/factory/ai/AiProviderFactory.js +30 -0
  10. package/dist/factory/ai/DeepSeekAiProviderFactory.js +14 -0
  11. package/dist/factory/ai/GoogleAiProviderFactory.js +14 -0
  12. package/dist/factory/ai/OllamaAiProviderFactory.js +14 -0
  13. package/dist/factory/ai/OpenAiProviderFactory.js +14 -0
  14. package/dist/factory/tts/ElevenLabsTTSProviderFactory.js +13 -0
  15. package/dist/factory/tts/TTSProviderFactory.js +27 -0
  16. package/dist/index.js +23 -13
  17. package/dist/interfaces/ITTSProvider.js +1 -0
  18. package/dist/permissions/TTSPermissions.js +6 -0
  19. package/dist/providers/ElevenLabsTTSProvider.js +108 -0
  20. package/dist/providers/ai/DeepSeekAiProvider.js +34 -0
  21. package/dist/providers/ai/GoogleAiProvider.js +370 -0
  22. package/dist/providers/ai/OllamaAiProvider.js +345 -0
  23. package/dist/providers/ai/OpenAiProvider.js +305 -0
  24. package/dist/providers/tts/ElevenLabsTTSProvider.js +108 -0
  25. package/dist/routes/TTSRoutes.js +8 -0
  26. package/dist/schemas/TTSRequestSchema.js +24 -0
  27. package/dist/services/PromptAudioService.js +68 -0
  28. package/dist/services/TTSGenericService.js +21 -0
  29. package/package.json +3 -3
  30. package/src/agents/DraxAgent.ts +3 -1
  31. package/src/config/ElevenLabsTTSConfig.ts +13 -0
  32. package/src/controllers/AICrudController.ts +1 -1
  33. package/src/controllers/AIGenericController.ts +25 -1
  34. package/src/controllers/DraxAgentController.ts +24 -0
  35. package/src/controllers/TTSGenericController.ts +70 -0
  36. package/src/factory/{AiProviderFactory.ts → ai/AiProviderFactory.ts} +3 -3
  37. package/src/factory/ai/DeepSeekAiProviderFactory.ts +27 -0
  38. package/src/factory/{GoogleAiProviderFactory.ts → ai/GoogleAiProviderFactory.ts} +4 -4
  39. package/src/factory/{OllamaAiProviderFactory.ts → ai/OllamaAiProviderFactory.ts} +4 -4
  40. package/src/factory/{OpenAiProviderFactory.ts → ai/OpenAiProviderFactory.ts} +4 -4
  41. package/src/factory/tts/ElevenLabsTTSProviderFactory.ts +26 -0
  42. package/src/factory/tts/TTSProviderFactory.ts +42 -0
  43. package/src/index.ts +60 -11
  44. package/src/interfaces/IAIProvider.ts +38 -1
  45. package/src/interfaces/IDraxAgent.ts +4 -0
  46. package/src/interfaces/ITTSProvider.ts +47 -0
  47. package/src/permissions/AIPermissions.ts +0 -1
  48. package/src/permissions/TTSPermissions.ts +8 -0
  49. package/src/providers/{DeepSeekProvider.ts → ai/DeepSeekAiProvider.ts} +5 -5
  50. package/src/providers/{GoogleAiProvider.ts → ai/GoogleAiProvider.ts} +6 -3
  51. package/src/providers/{OllamaAiProvider.ts → ai/OllamaAiProvider.ts} +6 -3
  52. package/src/providers/{OpenAiProvider.ts → ai/OpenAiProvider.ts} +6 -3
  53. package/src/providers/tts/ElevenLabsTTSProvider.ts +132 -0
  54. package/src/routes/TTSRoutes.ts +13 -0
  55. package/src/schemas/TTSRequestSchema.ts +38 -0
  56. package/src/services/PromptAudioService.ts +87 -0
  57. package/src/services/TTSGenericService.ts +41 -0
  58. package/test/DeepSeekProvider.test.ts +4 -4
  59. package/test/DraxAgent.test.ts +64 -0
  60. package/test/PromptAudioService.test.ts +115 -0
  61. package/tsconfig.tsbuildinfo +1 -1
  62. package/types/agents/DraxAgent.d.ts.map +1 -1
  63. package/types/config/ElevenLabsTTSConfig.d.ts +10 -0
  64. package/types/config/ElevenLabsTTSConfig.d.ts.map +1 -0
  65. package/types/controllers/AIGenericController.d.ts.map +1 -1
  66. package/types/controllers/DraxAgentController.d.ts.map +1 -1
  67. package/types/controllers/TTSGenericController.d.ts +11 -0
  68. package/types/controllers/TTSGenericController.d.ts.map +1 -0
  69. package/types/factory/ElevenLabsTTSProviderFactory.d.ts +8 -0
  70. package/types/factory/ElevenLabsTTSProviderFactory.d.ts.map +1 -0
  71. package/types/factory/TTSProviderFactory.d.ts +15 -0
  72. package/types/factory/TTSProviderFactory.d.ts.map +1 -0
  73. package/types/factory/ai/AiProviderFactory.d.ts +8 -0
  74. package/types/factory/ai/AiProviderFactory.d.ts.map +1 -0
  75. package/types/factory/ai/DeepSeekAiProviderFactory.d.ts +8 -0
  76. package/types/factory/ai/DeepSeekAiProviderFactory.d.ts.map +1 -0
  77. package/types/factory/ai/GoogleAiProviderFactory.d.ts +8 -0
  78. package/types/factory/ai/GoogleAiProviderFactory.d.ts.map +1 -0
  79. package/types/factory/ai/OllamaAiProviderFactory.d.ts +8 -0
  80. package/types/factory/ai/OllamaAiProviderFactory.d.ts.map +1 -0
  81. package/types/factory/ai/OpenAiProviderFactory.d.ts +8 -0
  82. package/types/factory/ai/OpenAiProviderFactory.d.ts.map +1 -0
  83. package/types/factory/tts/ElevenLabsTTSProviderFactory.d.ts +8 -0
  84. package/types/factory/tts/ElevenLabsTTSProviderFactory.d.ts.map +1 -0
  85. package/types/factory/tts/TTSProviderFactory.d.ts +15 -0
  86. package/types/factory/tts/TTSProviderFactory.d.ts.map +1 -0
  87. package/types/index.d.ts +26 -12
  88. package/types/index.d.ts.map +1 -1
  89. package/types/interfaces/IAIProvider.d.ts +32 -1
  90. package/types/interfaces/IAIProvider.d.ts.map +1 -1
  91. package/types/interfaces/IDraxAgent.d.ts +3 -1
  92. package/types/interfaces/IDraxAgent.d.ts.map +1 -1
  93. package/types/interfaces/ITTSProvider.d.ts +39 -0
  94. package/types/interfaces/ITTSProvider.d.ts.map +1 -0
  95. package/types/permissions/TTSPermissions.d.ts +6 -0
  96. package/types/permissions/TTSPermissions.d.ts.map +1 -0
  97. package/types/providers/ElevenLabsTTSProvider.d.ts +38 -0
  98. package/types/providers/ElevenLabsTTSProvider.d.ts.map +1 -0
  99. package/types/providers/ai/DeepSeekAiProvider.d.ts +24 -0
  100. package/types/providers/ai/DeepSeekAiProvider.d.ts.map +1 -0
  101. package/types/providers/ai/GoogleAiProvider.d.ts +63 -0
  102. package/types/providers/ai/GoogleAiProvider.d.ts.map +1 -0
  103. package/types/providers/ai/OllamaAiProvider.d.ts +78 -0
  104. package/types/providers/ai/OllamaAiProvider.d.ts.map +1 -0
  105. package/types/providers/ai/OpenAiProvider.d.ts +97 -0
  106. package/types/providers/ai/OpenAiProvider.d.ts.map +1 -0
  107. package/types/providers/tts/ElevenLabsTTSProvider.d.ts +38 -0
  108. package/types/providers/tts/ElevenLabsTTSProvider.d.ts.map +1 -0
  109. package/types/routes/TTSRoutes.d.ts +4 -0
  110. package/types/routes/TTSRoutes.d.ts.map +1 -0
  111. package/types/schemas/TTSRequestSchema.d.ts +37 -0
  112. package/types/schemas/TTSRequestSchema.d.ts.map +1 -0
  113. package/types/services/PromptAudioService.d.ts +9 -0
  114. package/types/services/PromptAudioService.d.ts.map +1 -0
  115. package/types/services/TTSGenericService.d.ts +17 -0
  116. package/types/services/TTSGenericService.d.ts.map +1 -0
  117. package/src/factory/DeepSeekProviderFactory.ts +0 -27
package/src/index.ts CHANGED
@@ -2,35 +2,46 @@ import {OpenAiConfig} from "./config/OpenAiConfig.js";
2
2
  import {GoogleAiConfig} from "./config/GoogleAiConfig.js";
3
3
  import {OllamaAiConfig} from "./config/OllamaAiConfig.js";
4
4
  import {DeepSeekConfig} from "./config/DeepSeekConfig.js";
5
+ import {ElevenLabsTTSConfig} from "./config/ElevenLabsTTSConfig.js";
5
6
  import {AILogSchema, AILogBaseSchema} from "./schemas/AILogSchema.js";
7
+ import {TTSRequestSchema, TTSVoiceSettingsSchema} from "./schemas/TTSRequestSchema.js";
6
8
  import AILogModel from "./models/AILogModel.js";
7
9
  import AILogMongoRepository from "./repository/mongo/AILogMongoRepository.js";
8
10
  import AILogSqliteRepository from "./repository/sqlite/AILogSqliteRepository.js";
9
- import {OpenAiProviderFactory} from "./factory/OpenAiProviderFactory.js";
10
- import {GoogleAiProviderFactory} from "./factory/GoogleAiProviderFactory.js";
11
- import {OllamaAiProviderFactory} from "./factory/OllamaAiProviderFactory.js";
12
- import {DeepSeekProviderFactory} from "./factory/DeepSeekProviderFactory.js";
13
- import {AiProviderFactory} from "./factory/AiProviderFactory.js";
11
+ import {OpenAiProviderFactory} from "./factory/ai/OpenAiProviderFactory.js";
12
+ import {GoogleAiProviderFactory} from "./factory/ai/GoogleAiProviderFactory.js";
13
+ import {OllamaAiProviderFactory} from "./factory/ai/OllamaAiProviderFactory.js";
14
+ import {DeepSeekAiProviderFactory} from "./factory/ai/DeepSeekAiProviderFactory.js";
15
+ import {AiProviderFactory} from "./factory/ai/AiProviderFactory.js";
16
+ import {ElevenLabsTTSProviderFactory} from "./factory/tts/ElevenLabsTTSProviderFactory.js";
17
+ import {TTSProviderFactory} from "./factory/tts/TTSProviderFactory.js";
18
+ import type {TTSProviderInfo} from "./factory/tts/TTSProviderFactory.js";
14
19
  import {DraxAgentFactory} from "./factory/DraxAgentFactory.js";
15
20
  import AILogServiceFactory from "./factory/services/AILogServiceFactory.js";
16
- import {OpenAiProvider} from "./providers/OpenAiProvider.js";
17
- import {GoogleAiProvider} from "./providers/GoogleAiProvider.js";
18
- import {OllamaAiProvider} from "./providers/OllamaAiProvider.js";
19
- import {DeepSeekProvider} from "./providers/DeepSeekProvider.js";
21
+ import {OpenAiProvider} from "./providers/ai/OpenAiProvider.js";
22
+ import {GoogleAiProvider} from "./providers/ai/GoogleAiProvider.js";
23
+ import {OllamaAiProvider} from "./providers/ai/OllamaAiProvider.js";
24
+ import {DeepSeekAiProvider} from "./providers/ai/DeepSeekAiProvider.js";
25
+ import {ElevenLabsTTSProvider} from "./providers/tts/ElevenLabsTTSProvider.js";
20
26
  import {BuilderTool} from "./tools/BuilderTool.js";
21
27
  import {KnowledgeService} from "./services/KnowledgeService.js";
22
28
  import {AILogService} from "./services/AILogService.js";
29
+ import {TTSGenericService} from "./services/TTSGenericService.js";
30
+ import {PromptAudioService} from "./services/PromptAudioService.js";
23
31
  import AILogPermissions from "./permissions/AILogPermissions.js";
24
32
  import AgentPermissions from "./permissions/AgentPermissions.js";
25
33
  import AgentSessionPermissions from "./permissions/AgentSessionPermissions.js";
26
34
  import AIPermissions from "./permissions/AIPermissions.js";
35
+ import TTSPermissions from "./permissions/TTSPermissions.js";
27
36
  import AILogController from "./controllers/AILogController.js";
28
37
  import AICrudController from "./controllers/AICrudController.js";
29
38
  import AIGenericController from "./controllers/AIGenericController.js";
39
+ import TTSGenericController from "./controllers/TTSGenericController.js";
30
40
  import DraxAgentController from "./controllers/DraxAgentController.js";
31
41
  import AgentSessionController from "./controllers/AgentSessionController.js";
32
42
  import AILogRoutes from "./routes/AILogRoutes.js";
33
43
  import AIRoutes from "./routes/AIRoutes.js";
44
+ import TTSRoutes from "./routes/TTSRoutes.js";
34
45
  import DraxAgentRoutes from "./routes/DraxAgentRoutes.js";
35
46
  import AgentSessionRoutes from "./routes/AgentSessionRoutes.js";
36
47
  import {DraxAgent} from "./agents/DraxAgent.js";
@@ -40,6 +51,9 @@ import type {
40
51
  IPromptContentPart,
41
52
  IPromptContentPartImage,
42
53
  IPromptContentPartText,
54
+ IPromptAudioParams,
55
+ IPromptAudioResponse,
56
+ IPromptAudioResponseMeta,
43
57
  IPromptImage,
44
58
  IPromptImageDetail,
45
59
  IPromptMessage,
@@ -48,6 +62,19 @@ import type {
48
62
  IPromptResponse,
49
63
  IPromptTool
50
64
  } from "./interfaces/IAIProvider.js";
65
+ import type {
66
+ ITTSParams,
67
+ ITTSProvider,
68
+ ITTSResponse,
69
+ ITTSVoiceSettings,
70
+ } from "./interfaces/ITTSProvider.js";
71
+ import type {
72
+ TTSRequest,
73
+ TTSVoiceSettings,
74
+ } from "./schemas/TTSRequestSchema.js";
75
+ import type {
76
+ TTSRequestContext,
77
+ } from "./services/TTSGenericService.js";
51
78
  import type {
52
79
  ToolBuilderMethod,
53
80
  ToolBuilderOptions,
@@ -85,7 +112,18 @@ export type {
85
112
  IPromptContentPart,
86
113
  IPromptContentPartImage,
87
114
  IPromptContentPartText,
115
+ IPromptAudioParams,
116
+ IPromptAudioResponse,
117
+ IPromptAudioResponseMeta,
88
118
  IPromptResponse,
119
+ ITTSProvider,
120
+ ITTSParams,
121
+ ITTSResponse,
122
+ ITTSVoiceSettings,
123
+ TTSRequest,
124
+ TTSVoiceSettings,
125
+ TTSRequestContext,
126
+ TTSProviderInfo,
89
127
  ToolBuilderMethod,
90
128
  ToolBuilderOptions,
91
129
  ToolBuilderService,
@@ -108,40 +146,51 @@ export {
108
146
  GoogleAiConfig,
109
147
  OllamaAiConfig,
110
148
  DeepSeekConfig,
149
+ ElevenLabsTTSConfig,
111
150
  AILogSchema,
112
151
  AILogBaseSchema,
152
+ TTSRequestSchema,
153
+ TTSVoiceSettingsSchema,
113
154
  AILogModel,
114
155
  AILogMongoRepository,
115
156
  AILogSqliteRepository,
116
157
  OpenAiProviderFactory,
117
158
  GoogleAiProviderFactory,
118
159
  OllamaAiProviderFactory,
119
- DeepSeekProviderFactory,
160
+ DeepSeekAiProviderFactory,
120
161
  AiProviderFactory,
162
+ ElevenLabsTTSProviderFactory,
163
+ TTSProviderFactory,
121
164
  DraxAgentFactory,
122
165
  AILogServiceFactory,
123
166
  OpenAiProvider,
124
167
  GoogleAiProvider,
125
168
  OllamaAiProvider,
126
- DeepSeekProvider,
169
+ DeepSeekAiProvider,
170
+ ElevenLabsTTSProvider,
127
171
  BuilderTool,
128
172
  //Service
129
173
  KnowledgeService,
130
174
  AILogService,
175
+ TTSGenericService,
176
+ PromptAudioService,
131
177
  //Permissions
132
178
  AILogPermissions,
133
179
  AgentPermissions,
134
180
  AIPermissions,
181
+ TTSPermissions,
135
182
  AgentSessionPermissions,
136
183
  //Controllers
137
184
  AILogController,
138
185
  AICrudController,
139
186
  AIGenericController,
187
+ TTSGenericController,
140
188
  DraxAgentController,
141
189
  AgentSessionController,
142
190
  DraxAgent,
143
191
  AILogRoutes,
144
192
  AIRoutes,
193
+ TTSRoutes,
145
194
  DraxAgentRoutes,
146
195
  AgentSessionRoutes
147
196
  }
@@ -1,4 +1,5 @@
1
1
  import { ZodSchema } from 'zod'
2
+ import type {ITTSVoiceSettings} from "./ITTSProvider.js";
2
3
 
3
4
  type Role = 'user' | 'assistant' | 'system';
4
5
 
@@ -74,6 +75,38 @@ interface IPromptParams {
74
75
  userAgent?: string,
75
76
  tenant?: string | null,
76
77
  user?: string | null,
78
+ audioResponse?: boolean | IPromptAudioParams,
79
+ }
80
+
81
+ interface IPromptAudioParams {
82
+ enabled?: boolean,
83
+ provider?: string,
84
+ voiceId?: string,
85
+ model?: string,
86
+ outputFormat?: string,
87
+ voiceSettings?: ITTSVoiceSettings,
88
+ previousText?: string,
89
+ nextText?: string,
90
+ languageCode?: string,
91
+ seed?: number,
92
+ operationTitle?: string,
93
+ operationGroup?: string,
94
+ }
95
+
96
+ interface IPromptAudioResponseMeta {
97
+ provider: string,
98
+ model: string,
99
+ voiceId: string,
100
+ outputFormat?: string,
101
+ size: number,
102
+ time: number,
103
+ }
104
+
105
+ interface IPromptAudioResponse {
106
+ audio: string,
107
+ contentType: string,
108
+ encoding: 'base64',
109
+ meta: IPromptAudioResponseMeta,
77
110
  }
78
111
 
79
112
  interface IPromptResponse {
@@ -81,7 +114,8 @@ interface IPromptResponse {
81
114
  tokens: number,
82
115
  inputTokens: number,
83
116
  outputTokens: number,
84
- time: number
117
+ time: number,
118
+ audio?: IPromptAudioResponse,
85
119
  }
86
120
 
87
121
  interface IAIProvider {
@@ -92,6 +126,9 @@ export type {
92
126
  IAIProvider,
93
127
  IPromptParams,
94
128
  IPromptResponse,
129
+ IPromptAudioParams,
130
+ IPromptAudioResponse,
131
+ IPromptAudioResponseMeta,
95
132
  IPromptMessage,
96
133
  IPromptMemory,
97
134
  IPromptTool,
@@ -1,5 +1,7 @@
1
1
  import type {
2
2
  IAIProvider,
3
+ IPromptAudioParams,
4
+ IPromptAudioResponse,
3
5
  IPromptContentPart,
4
6
  IPromptImage,
5
7
  IPromptMemory,
@@ -67,6 +69,7 @@ interface DraxAgentMessageInput extends DraxAgentSessionInput {
67
69
  toolMaxIterations?: number;
68
70
  operationTitle?: string;
69
71
  operationGroup?: string;
72
+ audioResponse?: boolean | IPromptAudioParams;
70
73
  }
71
74
 
72
75
  interface DraxAgentMessageOutput {
@@ -79,6 +82,7 @@ interface DraxAgentMessageOutput {
79
82
  inputTokens: number;
80
83
  outputTokens: number;
81
84
  time: number;
85
+ audio?: IPromptAudioResponse;
82
86
  }
83
87
 
84
88
  interface DraxAgentSession {
@@ -0,0 +1,47 @@
1
+ interface ITTSVoiceSettings {
2
+ stability?: number;
3
+ similarityBoost?: number;
4
+ style?: number;
5
+ useSpeakerBoost?: boolean;
6
+ speed?: number;
7
+ }
8
+
9
+ interface ITTSParams {
10
+ text: string;
11
+ voiceId?: string;
12
+ model?: string;
13
+ outputFormat?: string;
14
+ voiceSettings?: ITTSVoiceSettings;
15
+ previousText?: string;
16
+ nextText?: string;
17
+ languageCode?: string;
18
+ seed?: number;
19
+ operationTitle?: string;
20
+ operationGroup?: string;
21
+ ip?: string;
22
+ userAgent?: string;
23
+ tenant?: string | null;
24
+ user?: string | null;
25
+ }
26
+
27
+ interface ITTSResponse {
28
+ audio: Buffer;
29
+ contentType: string;
30
+ size: number;
31
+ time: number;
32
+ provider: string;
33
+ model: string;
34
+ voiceId: string;
35
+ outputFormat?: string;
36
+ }
37
+
38
+ interface ITTSProvider {
39
+ textToSpeech(input: ITTSParams): Promise<ITTSResponse>
40
+ }
41
+
42
+ export type {
43
+ ITTSProvider,
44
+ ITTSParams,
45
+ ITTSResponse,
46
+ ITTSVoiceSettings,
47
+ }
@@ -8,4 +8,3 @@ enum AIPermissions {
8
8
 
9
9
  export { AIPermissions };
10
10
  export default AIPermissions;
11
-
@@ -0,0 +1,8 @@
1
+ enum TTSPermissions {
2
+
3
+ TextToSpeech = "tts:textToSpeech",
4
+
5
+ }
6
+
7
+ export { TTSPermissions };
8
+ export default TTSPermissions;
@@ -1,10 +1,10 @@
1
1
  import OpenAI from "openai";
2
2
  import type {IAILogBase} from "@drax/ai-share";
3
- import type {IPromptParams} from "../interfaces/IAIProvider.js";
4
- import type {AILogService} from "../services/AILogService.js";
3
+ import type {IPromptParams} from "../../interfaces/IAIProvider.js";
4
+ import type {AILogService} from "../../services/AILogService.js";
5
5
  import OpenAiProvider from "./OpenAiProvider.js";
6
6
 
7
- class DeepSeekProvider extends OpenAiProvider{
7
+ class DeepSeekAiProvider extends OpenAiProvider{
8
8
  protected _baseUrl: string
9
9
 
10
10
  constructor(apiKey: string, model: string, baseUrl: string = "https://api.deepseek.com", visionModel?: string, aiLogService?: AILogService) {
@@ -54,5 +54,5 @@ class DeepSeekProvider extends OpenAiProvider{
54
54
  }
55
55
  }
56
56
 
57
- export default DeepSeekProvider
58
- export {DeepSeekProvider}
57
+ export default DeepSeekAiProvider
58
+ export {DeepSeekAiProvider}
@@ -14,9 +14,10 @@ import type {
14
14
  IPromptParams,
15
15
  IPromptResponse,
16
16
  IPromptTool
17
- } from "../interfaces/IAIProvider";
18
- import type {AILogService} from "../services/AILogService";
17
+ } from "../../interfaces/IAIProvider.js";
18
+ import type {AILogService} from "../../services/AILogService.js";
19
19
  import type {IAILogBase} from "@drax/ai-share";
20
+ import PromptAudioService from "../../services/PromptAudioService.js";
20
21
 
21
22
  class GoogleAiProvider implements IAIProvider{
22
23
  protected _apiKey: string
@@ -446,6 +447,7 @@ class GoogleAiProvider implements IAIProvider{
446
447
  const endTime = performance.now()
447
448
  const time = endTime - startTime
448
449
  const endedAt = new Date()
450
+ const audio = await PromptAudioService.build(input, output)
449
451
 
450
452
  await this.registerPromptLog(input, {
451
453
  model,
@@ -464,7 +466,8 @@ class GoogleAiProvider implements IAIProvider{
464
466
  tokens,
465
467
  inputTokens,
466
468
  outputTokens,
467
- time
469
+ time,
470
+ ...(audio ? {audio} : {}),
468
471
  }
469
472
  } catch (e: any) {
470
473
  const endedAt = new Date()
@@ -6,9 +6,10 @@ import type {
6
6
  IPromptParams,
7
7
  IPromptResponse,
8
8
  IPromptTool
9
- } from "../interfaces/IAIProvider";
10
- import type {AILogService} from "../services/AILogService";
9
+ } from "../../interfaces/IAIProvider.js";
10
+ import type {AILogService} from "../../services/AILogService.js";
11
11
  import type {IAILogBase} from "@drax/ai-share";
12
+ import PromptAudioService from "../../services/PromptAudioService.js";
12
13
 
13
14
  type OllamaMessage = {
14
15
  role: "system" | "user" | "assistant" | "tool",
@@ -426,6 +427,7 @@ class OllamaAiProvider implements IAIProvider{
426
427
  const endTime = performance.now()
427
428
  const time = endTime - startTime
428
429
  const endedAt = new Date()
430
+ const audio = await PromptAudioService.build(input, output)
429
431
 
430
432
  await this.registerPromptLog(input, {
431
433
  model,
@@ -444,7 +446,8 @@ class OllamaAiProvider implements IAIProvider{
444
446
  tokens,
445
447
  inputTokens,
446
448
  outputTokens,
447
- time
449
+ time,
450
+ ...(audio ? {audio} : {}),
448
451
  }
449
452
  } catch (e: any) {
450
453
  const endedAt = new Date()
@@ -7,9 +7,10 @@ import type {
7
7
  IPromptParams,
8
8
  IPromptResponse,
9
9
  IPromptTool
10
- } from "../interfaces/IAIProvider";
11
- import type {AILogService} from "../services/AILogService";
10
+ } from "../../interfaces/IAIProvider.js";
11
+ import type {AILogService} from "../../services/AILogService.js";
12
12
  import type {IAILogBase} from "@drax/ai-share";
13
+ import PromptAudioService from "../../services/PromptAudioService.js";
13
14
 
14
15
  class OpenAiProvider implements IAIProvider{
15
16
  protected _apiKey: string
@@ -358,6 +359,7 @@ class OpenAiProvider implements IAIProvider{
358
359
  const endTime = performance.now()
359
360
  const time = endTime - startTime
360
361
  const endedAt = new Date()
362
+ const audio = await PromptAudioService.build(input, output)
361
363
 
362
364
  await this.registerPromptLog(input, {
363
365
  model,
@@ -376,7 +378,8 @@ class OpenAiProvider implements IAIProvider{
376
378
  tokens,
377
379
  inputTokens,
378
380
  outputTokens,
379
- time
381
+ time,
382
+ ...(audio ? {audio} : {}),
380
383
  }
381
384
  } catch (e: any) {
382
385
  const endedAt = new Date()
@@ -0,0 +1,132 @@
1
+ import type {ITTSParams, ITTSProvider, ITTSResponse} from "../../interfaces/ITTSProvider.js";
2
+
3
+ class ElevenLabsTTSProvider implements ITTSProvider {
4
+ protected _apiKey: string
5
+ protected _baseUrl: string
6
+ protected _model: string
7
+ protected _voiceId: string
8
+ protected _outputFormat?: string
9
+
10
+ constructor(apiKey: string, model: string, voiceId: string, baseUrl: string = "https://api.elevenlabs.io", outputFormat?: string) {
11
+ if (!apiKey) {
12
+ throw new Error("ElevenLabs apiKey required")
13
+ }
14
+ if (!model) {
15
+ throw new Error("ElevenLabs model required")
16
+ }
17
+ if (!voiceId) {
18
+ throw new Error("ElevenLabs voiceId required")
19
+ }
20
+
21
+ this._apiKey = apiKey
22
+ this._model = model
23
+ this._voiceId = voiceId
24
+ this._baseUrl = baseUrl.replace(/\/+$/, "")
25
+ this._outputFormat = outputFormat
26
+ }
27
+
28
+ get model() {
29
+ if (!this._model) {
30
+ throw new Error("ElevenLabs model not found")
31
+ }
32
+ return this._model
33
+ }
34
+
35
+ get voiceId() {
36
+ if (!this._voiceId) {
37
+ throw new Error("ElevenLabs voiceId not found")
38
+ }
39
+ return this._voiceId
40
+ }
41
+
42
+ protected mapContentType(outputFormat?: string) {
43
+ if (!outputFormat) {
44
+ return "audio/mpeg"
45
+ }
46
+
47
+ if (outputFormat.startsWith("mp3")) {
48
+ return "audio/mpeg"
49
+ }
50
+ if (outputFormat.startsWith("opus")) {
51
+ return "audio/ogg"
52
+ }
53
+ if (outputFormat.startsWith("pcm")) {
54
+ return "audio/wav"
55
+ }
56
+ if (outputFormat.startsWith("ulaw") || outputFormat.startsWith("alaw")) {
57
+ return "audio/basic"
58
+ }
59
+
60
+ return "application/octet-stream"
61
+ }
62
+
63
+ protected mapVoiceSettings(voiceSettings: ITTSParams["voiceSettings"]) {
64
+ if (!voiceSettings) {
65
+ return undefined
66
+ }
67
+
68
+ return {
69
+ stability: voiceSettings.stability,
70
+ similarity_boost: voiceSettings.similarityBoost,
71
+ style: voiceSettings.style,
72
+ use_speaker_boost: voiceSettings.useSpeakerBoost,
73
+ speed: voiceSettings.speed,
74
+ }
75
+ }
76
+
77
+ protected buildBody(input: ITTSParams, model: string) {
78
+ return {
79
+ text: input.text,
80
+ model_id: model,
81
+ ...(input.voiceSettings ? {voice_settings: this.mapVoiceSettings(input.voiceSettings)} : {}),
82
+ ...(input.previousText ? {previous_text: input.previousText} : {}),
83
+ ...(input.nextText ? {next_text: input.nextText} : {}),
84
+ ...(input.languageCode ? {language_code: input.languageCode} : {}),
85
+ ...(input.seed !== undefined ? {seed: input.seed} : {}),
86
+ }
87
+ }
88
+
89
+ async textToSpeech(input: ITTSParams): Promise<ITTSResponse> {
90
+ const startedAt = Date.now()
91
+ const model = input.model ?? this.model
92
+ const voiceId = input.voiceId ?? this.voiceId
93
+ const outputFormat = input.outputFormat ?? this._outputFormat
94
+ const url = new URL(`${this._baseUrl}/v1/text-to-speech/${encodeURIComponent(voiceId)}`)
95
+
96
+ if (outputFormat) {
97
+ url.searchParams.set("output_format", outputFormat)
98
+ }
99
+
100
+ const response = await fetch(url, {
101
+ method: "POST",
102
+ headers: {
103
+ "Accept": this.mapContentType(outputFormat),
104
+ "Content-Type": "application/json",
105
+ "xi-api-key": this._apiKey,
106
+ },
107
+ body: JSON.stringify(this.buildBody(input, model)),
108
+ })
109
+
110
+ if (!response.ok) {
111
+ const errorText = await response.text()
112
+ throw new Error(`ElevenLabs TTS request failed (${response.status}): ${errorText}`)
113
+ }
114
+
115
+ const audio = Buffer.from(await response.arrayBuffer())
116
+ const contentType = response.headers.get("content-type") ?? this.mapContentType(outputFormat)
117
+
118
+ return {
119
+ audio,
120
+ contentType,
121
+ size: audio.byteLength,
122
+ time: Date.now() - startedAt,
123
+ provider: "elevenlabs",
124
+ model,
125
+ voiceId,
126
+ outputFormat,
127
+ }
128
+ }
129
+ }
130
+
131
+ export default ElevenLabsTTSProvider;
132
+ export {ElevenLabsTTSProvider}
@@ -0,0 +1,13 @@
1
+ import TTSGenericController from "../controllers/TTSGenericController.js";
2
+
3
+ async function TTSFastifyRoutes(fastify, options) {
4
+
5
+ const genericController: TTSGenericController = new TTSGenericController()
6
+
7
+ fastify.get('/api/tts/providers', (req,rep) => genericController.availableProviders(req,rep))
8
+ fastify.post('/api/tts', (req,rep) => genericController.textToSpeech(req,rep))
9
+
10
+ }
11
+
12
+ export default TTSFastifyRoutes;
13
+ export {TTSFastifyRoutes}
@@ -0,0 +1,38 @@
1
+ import {z} from "zod";
2
+
3
+ const TTSVoiceSettingsSchema = z.object({
4
+ stability: z.number().min(0).max(1).optional(),
5
+ similarityBoost: z.number().min(0).max(1).optional(),
6
+ style: z.number().min(0).max(1).optional(),
7
+ useSpeakerBoost: z.boolean().optional(),
8
+ speed: z.number().positive().optional(),
9
+ })
10
+
11
+ const TTSRequestSchema = z.object({
12
+ text: z.string().min(1),
13
+ provider: z.string().default("ElevenLabs"),
14
+ voiceId: z.string().optional(),
15
+ model: z.string().optional(),
16
+ outputFormat: z.string().optional(),
17
+ voiceSettings: TTSVoiceSettingsSchema.optional(),
18
+ previousText: z.string().optional(),
19
+ nextText: z.string().optional(),
20
+ languageCode: z.string().optional(),
21
+ seed: z.number().int().optional(),
22
+ responseFormat: z.enum(["audio", "base64"]).default("audio"),
23
+ operationTitle: z.string().optional(),
24
+ operationGroup: z.string().optional(),
25
+ })
26
+
27
+ type TTSRequest = z.infer<typeof TTSRequestSchema>
28
+ type TTSVoiceSettings = z.infer<typeof TTSVoiceSettingsSchema>
29
+
30
+ export {
31
+ TTSRequestSchema,
32
+ TTSVoiceSettingsSchema,
33
+ }
34
+
35
+ export type {
36
+ TTSRequest,
37
+ TTSVoiceSettings,
38
+ }