elevenlabs_client 0.4.0 โ 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +140 -0
- data/README.md +163 -5
- data/lib/elevenlabs_client/client.rb +80 -3
- data/lib/elevenlabs_client/endpoints/admin/history.rb +106 -0
- data/lib/elevenlabs_client/endpoints/admin/models.rb +27 -0
- data/lib/elevenlabs_client/endpoints/admin/usage.rb +46 -0
- data/lib/elevenlabs_client/endpoints/admin/user.rb +28 -0
- data/lib/elevenlabs_client/endpoints/admin/voice_library.rb +86 -0
- data/lib/elevenlabs_client/endpoints/audio_isolation.rb +71 -0
- data/lib/elevenlabs_client/endpoints/audio_native.rb +103 -0
- data/lib/elevenlabs_client/endpoints/dubs.rb +52 -2
- data/lib/elevenlabs_client/endpoints/forced_alignment.rb +41 -0
- data/lib/elevenlabs_client/endpoints/sound_generation.rb +0 -1
- data/lib/elevenlabs_client/endpoints/speech_to_speech.rb +125 -0
- data/lib/elevenlabs_client/endpoints/speech_to_text.rb +121 -0
- data/lib/elevenlabs_client/endpoints/text_to_dialogue.rb +34 -1
- data/lib/elevenlabs_client/endpoints/text_to_speech.rb +147 -1
- data/lib/elevenlabs_client/endpoints/text_to_voice.rb +13 -1
- data/lib/elevenlabs_client/endpoints/voices.rb +368 -7
- data/lib/elevenlabs_client/endpoints/websocket_text_to_speech.rb +250 -0
- data/lib/elevenlabs_client/version.rb +1 -1
- data/lib/elevenlabs_client.rb +11 -4
- metadata +41 -4
- data/lib/elevenlabs_client/endpoints/models.rb +0 -26
- data/lib/elevenlabs_client/endpoints/text_to_speech_stream.rb +0 -42
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1ac0295ca5dadf36f14152af7d6c2f4fa39dd7a2c8b59837551fc3bd8839d419
|
4
|
+
data.tar.gz: dabd65a40bb17aa262f2251f125d8edadaccde957b4fe9084b91384f59b54ee5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ceb86051b205481e427aec671deeb55fbeaff5f68bf99fe7257e34ce926cd83b7ec7644d5daf4854dc67ff6ece921c23ba99e6301063beb1116084f6820de4be
|
7
|
+
data.tar.gz: e6ab2b8907aa8b02e4ba222a407d5aa3c67e0e86b303421b073f0ed7744a9a9712510d88f75247cd534ddc1c67ccfb8ed5c0d411cee9cd3632288cf35d544036
|
data/CHANGELOG.md
CHANGED
@@ -5,6 +5,146 @@ All notable changes to this project will be documented in this file.
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
7
7
|
|
8
|
+
## [Unreleased]
|
9
|
+
|
10
|
+
## [0.6.0] - 2024-09-15
|
11
|
+
|
12
|
+
### Added
|
13
|
+
- **๐ข Admin API Suite** - Complete administrative functionality for account management
|
14
|
+
- **User Management** (`client.user.*`) - Access comprehensive user account information, subscription details, and feature availability
|
15
|
+
- **Usage Analytics** (`client.usage.*`) - Monitor character usage with detailed analytics, breakdowns by voice/model/source, and trend analysis
|
16
|
+
- **Voice Library** (`client.voice_library.*`) - Browse and manage community shared voices with advanced filtering and search capabilities
|
17
|
+
- All admin endpoints include comprehensive error handling and response validation
|
18
|
+
|
19
|
+
### Enhanced
|
20
|
+
- **๐ Documentation Expansion** - Comprehensive documentation for all admin functionality
|
21
|
+
- Added `docs/admin/USER.md` - User account and subscription management guide (589 lines)
|
22
|
+
- Added `docs/admin/USAGE.md` - Usage analytics and monitoring guide (604 lines)
|
23
|
+
- Added `docs/admin/VOICE_LIBRARY.md` - Voice library browsing and management guide (883 lines)
|
24
|
+
- Added `docs/admin/README.md` - Admin API overview and quick start guide (472 lines)
|
25
|
+
- Updated main README.md with admin endpoint documentation and examples
|
26
|
+
- Total: 3,512 lines of new admin documentation
|
27
|
+
|
28
|
+
- **๐ฏ Example Controllers** - Production-ready Rails integration examples
|
29
|
+
- Added `examples/admin/user_controller.rb` - User dashboard with health monitoring (767 lines)
|
30
|
+
- Added `examples/admin/usage_controller.rb` - Usage analytics dashboard with real-time monitoring (584 lines)
|
31
|
+
- Added `examples/admin/voice_library_controller.rb` - Voice library browser with curation tools (844 lines)
|
32
|
+
- Added `examples/admin/models_controller.rb` - Model comparison and selection guide (983 lines)
|
33
|
+
- All controllers include comprehensive error handling, JSON API support, and export functionality
|
34
|
+
|
35
|
+
### Improved
|
36
|
+
- **๐งช Test Coverage** - Comprehensive testing for all admin functionality
|
37
|
+
- Added 88 endpoint tests covering all admin API methods and error scenarios
|
38
|
+
- Added 77 integration tests covering real-world usage patterns and workflows
|
39
|
+
- All tests include proper error handling validation and response structure verification
|
40
|
+
- Total: 165 new tests with 100% pass rate
|
41
|
+
|
42
|
+
- **๐ง Client Integration** - Seamless integration of admin endpoints
|
43
|
+
- Updated `Client` class to expose all admin endpoints (`usage`, `user`, `voice_library`)
|
44
|
+
- Enhanced error handling for admin-specific scenarios
|
45
|
+
- Consistent API patterns across all admin endpoints
|
46
|
+
- Proper namespacing under `ElevenlabsClient::Admin` module
|
47
|
+
|
48
|
+
### Technical Improvements
|
49
|
+
- **๐ Advanced Analytics** - Sophisticated usage monitoring and insights
|
50
|
+
- Character usage breakdowns by voice, model, user, and source
|
51
|
+
- Time-based aggregation (hour, day, week, month, cumulative)
|
52
|
+
- Trend analysis and forecasting capabilities
|
53
|
+
- Cost estimation and optimization recommendations
|
54
|
+
|
55
|
+
- **๐ค Voice Discovery** - Powerful voice library management
|
56
|
+
- Advanced filtering by category, gender, age, accent, language, and use case
|
57
|
+
- Voice recommendation engine based on requirements
|
58
|
+
- Bulk voice addition and collection curation tools
|
59
|
+
- Voice analytics and popularity tracking
|
60
|
+
|
61
|
+
- **๐ค Account Management** - Comprehensive user account oversight
|
62
|
+
- Real-time subscription monitoring and health checks
|
63
|
+
- Usage limit tracking with projections and alerts
|
64
|
+
- Feature availability matrix and upgrade recommendations
|
65
|
+
- Security and moderation status monitoring
|
66
|
+
|
67
|
+
### Changed
|
68
|
+
- **๐ Code Organization** - Moved TextToDialogue class to its own file
|
69
|
+
- Extracted `TextToDialogue` class from `text_to_speech.rb` to `text_to_dialogue.rb`
|
70
|
+
- Improved code organization and modularity
|
71
|
+
- All tests and functionality remain unchanged
|
72
|
+
- Added Speech-to-Text delete transcript endpoint (`delete_transcript`)
|
73
|
+
|
74
|
+
## [0.5.1] - 2024-09-15
|
75
|
+
|
76
|
+
### Removed
|
77
|
+
- **๐งน Dependency Optimization** - Removed unnecessary development dependencies
|
78
|
+
- Removed `rubocop` and `rubocop-rspec` dependencies
|
79
|
+
- Removed `brakeman` dependency (not suitable for gem libraries)
|
80
|
+
- Removed `.rubocop.yml` and `.brakeman.yml` configuration files
|
81
|
+
- Reduced bundle size from 49 to 31 gems (37% reduction)
|
82
|
+
|
83
|
+
### Changed
|
84
|
+
- **โก CI/CD Optimization** - Simplified and streamlined continuous integration
|
85
|
+
- Removed linting job from GitHub Actions workflow
|
86
|
+
- Focused CI pipeline on essential checks: tests, security, and build
|
87
|
+
- Updated CI to use only `bundler-audit` for dependency vulnerability scanning
|
88
|
+
- Faster CI builds with fewer dependencies and simpler workflow
|
89
|
+
|
90
|
+
### Updated
|
91
|
+
- **๐ Documentation Cleanup** - Updated documentation to reflect simplified toolchain
|
92
|
+
- Removed RuboCop references from README.md
|
93
|
+
- Updated CI/CD documentation section
|
94
|
+
- Simplified development workflow documentation
|
95
|
+
- Updated Rake task descriptions and help text
|
96
|
+
- **๐ง Development Tools** - Streamlined development workflow
|
97
|
+
- Removed lint-related Rake tasks (`dev:lint`, `dev:lint_fix`, `dev:brakeman`)
|
98
|
+
- Simplified `release:prepare` task to focus on tests and security
|
99
|
+
- Updated help documentation for available Rake tasks
|
100
|
+
|
101
|
+
### Technical Improvements
|
102
|
+
- **๐ฆ Leaner Dependencies** - More focused dependency management
|
103
|
+
- Kept only essential development tools: RSpec, WebMock, bundler-audit
|
104
|
+
- Maintained security scanning through bundler-audit
|
105
|
+
- Improved bundle install speed and reduced maintenance overhead
|
106
|
+
- **๐ Performance** - Faster development and CI workflows
|
107
|
+
- Reduced Docker image sizes for CI/CD
|
108
|
+
- Faster bundle installations
|
109
|
+
- Simplified toolchain reduces cognitive overhead
|
110
|
+
|
111
|
+
### Notes
|
112
|
+
- This release focuses on optimizing the development experience and CI/CD pipeline
|
113
|
+
- Security scanning is maintained through bundler-audit, which is more appropriate for gem libraries
|
114
|
+
- The simplified toolchain reduces maintenance overhead while maintaining code quality through comprehensive testing
|
115
|
+
|
116
|
+
## [0.5.0] - 2025-09-14
|
117
|
+
|
118
|
+
### Added
|
119
|
+
|
120
|
+
- Text-to-Speech With Timestamps
|
121
|
+
- `client.text_to_speech_with_timestamps.generate(voice_id, text, **options)`
|
122
|
+
- Character-level `alignment` and `normalized_alignment`
|
123
|
+
- Streaming Text-to-Speech With Timestamps
|
124
|
+
- `client.text_to_speech_stream_with_timestamps.stream(voice_id, text, **options, &block)`
|
125
|
+
- JSON streaming with audio chunks and timing per chunk
|
126
|
+
- WebSocket Streaming Enhancements
|
127
|
+
- Single-context and multi-context improvements; correct query param ordering and filtering
|
128
|
+
- Docs: `docs/WEBSOCKET_STREAMING.md`
|
129
|
+
- Text-to-Dialogue Streaming
|
130
|
+
- `client.text_to_dialogue_stream.stream(inputs, **options, &block)`
|
131
|
+
- Docs: `docs/TEXT_TO_DIALOGUE_STREAMING.md`
|
132
|
+
|
133
|
+
### Improved
|
134
|
+
|
135
|
+
- Client streaming JSON handling for timestamp streams (`post_streaming_with_timestamps`)
|
136
|
+
- Robust parsing and block yielding across streaming tests
|
137
|
+
- URL query parameter ordering to match expectations in tests
|
138
|
+
|
139
|
+
### Tests
|
140
|
+
|
141
|
+
- Added comprehensive unit and integration tests for all new endpoints
|
142
|
+
- Full suite now: 687 examples, 0 failures
|
143
|
+
|
144
|
+
### Notes
|
145
|
+
|
146
|
+
- These features require valid ElevenLabs API keys and correct model/voice permissions
|
147
|
+
|
8
148
|
## [0.4.0] - 2025-09-12
|
9
149
|
|
10
150
|
### Added
|
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
[](https://badge.fury.io/rb/elevenlabs_client)
|
4
4
|
|
5
|
-
A comprehensive Ruby client library for the ElevenLabs API, supporting voice synthesis, dubbing, dialogue generation, sound effects,
|
5
|
+
A comprehensive Ruby client library for the ElevenLabs API, supporting voice synthesis, dubbing, dialogue generation, sound effects, AI music composition, voice transformation, speech transcription, audio isolation, and advanced audio processing features.
|
6
6
|
|
7
7
|
## Features
|
8
8
|
|
@@ -13,7 +13,17 @@ A comprehensive Ruby client library for the ElevenLabs API, supporting voice syn
|
|
13
13
|
๐ต **Music Generation** - AI-powered music composition and streaming
|
14
14
|
๐จ **Voice Design** - Create custom voices from text descriptions
|
15
15
|
๐ญ **Voice Management** - Create, edit, and manage individual voices
|
16
|
-
|
16
|
+
๐ **Speech-to-Speech** - Transform audio from one voice to another (Voice Changer)
|
17
|
+
๐ **Speech-to-Text** - Transcribe audio and video files with advanced features
|
18
|
+
๐ **Audio Isolation** - Remove background noise from audio files
|
19
|
+
๐ฑ **Audio Native** - Create embeddable audio players for websites
|
20
|
+
โฑ๏ธ **Forced Alignment** - Get precise timing information for audio transcripts
|
21
|
+
๐ **Admin APIs** - Complete administrative functionality:
|
22
|
+
- **History** - Manage and analyze your generated audio history
|
23
|
+
- **Usage** - Monitor character usage and analytics
|
24
|
+
- **User** - Access account information and subscription details
|
25
|
+
- **Voice Library** - Browse and manage community shared voices
|
26
|
+
- **Models** - List available models and their capabilities
|
17
27
|
๐ก **Streaming** - Real-time audio streaming
|
18
28
|
โ๏ธ **Configurable** - Flexible configuration options
|
19
29
|
๐งช **Well-tested** - Comprehensive test coverage
|
@@ -113,6 +123,11 @@ audio_data = client.sound_generation.generate("Ocean waves crashing on rocks")
|
|
113
123
|
design_result = client.text_to_voice.design("Warm, professional female voice")
|
114
124
|
generated_voice_id = design_result["previews"].first["generated_voice_id"]
|
115
125
|
|
126
|
+
# Stream the voice preview
|
127
|
+
client.text_to_voice.stream_preview(generated_voice_id) do |chunk|
|
128
|
+
puts "Received preview chunk: #{chunk.bytesize} bytes"
|
129
|
+
end
|
130
|
+
|
116
131
|
voice_result = client.text_to_voice.create(
|
117
132
|
"Professional Voice",
|
118
133
|
"Warm, professional female voice",
|
@@ -134,6 +149,27 @@ File.open("sample1.mp3", "rb") do |sample|
|
|
134
149
|
puts "Created voice: #{voice['voice_id']}"
|
135
150
|
end
|
136
151
|
|
152
|
+
# Admin APIs - Account Management
|
153
|
+
user_info = client.user.get_user
|
154
|
+
puts "Account: #{user_info['subscription']['tier']} (#{user_info['subscription']['status']})"
|
155
|
+
puts "Usage: #{user_info['subscription']['character_count']} / #{user_info['subscription']['character_limit']}"
|
156
|
+
|
157
|
+
# Usage Analytics
|
158
|
+
usage_stats = client.usage.get_character_stats(
|
159
|
+
start_unix: (Time.now - 7.days).to_i * 1000,
|
160
|
+
end_unix: Time.now.to_i * 1000,
|
161
|
+
breakdown_type: "voice"
|
162
|
+
)
|
163
|
+
puts "7-day usage: #{usage_stats['usage']['All'].sum} characters"
|
164
|
+
|
165
|
+
# History Management
|
166
|
+
history = client.history.list(page_size: 10)
|
167
|
+
puts "Recent history: #{history['history'].length} items"
|
168
|
+
|
169
|
+
# Voice Library
|
170
|
+
voices = client.voice_library.get_shared_voices(category: "professional", page_size: 5)
|
171
|
+
puts "Professional voices available: #{voices['voices'].length}"
|
172
|
+
|
137
173
|
# Music Generation
|
138
174
|
music_data = client.music.compose(
|
139
175
|
prompt: "Upbeat electronic dance track with synthesizers",
|
@@ -141,6 +177,66 @@ music_data = client.music.compose(
|
|
141
177
|
)
|
142
178
|
File.open("generated_music.mp3", "wb") { |f| f.write(music_data) }
|
143
179
|
|
180
|
+
# Speech-to-Speech (Voice Changer)
|
181
|
+
File.open("input_audio.mp3", "rb") do |audio_file|
|
182
|
+
converted_audio = client.speech_to_speech.convert(
|
183
|
+
"target_voice_id",
|
184
|
+
audio_file,
|
185
|
+
"input_audio.mp3",
|
186
|
+
remove_background_noise: true
|
187
|
+
)
|
188
|
+
File.open("converted_audio.mp3", "wb") { |f| f.write(converted_audio) }
|
189
|
+
end
|
190
|
+
|
191
|
+
# Speech-to-Text Transcription
|
192
|
+
File.open("audio.mp3", "rb") do |audio_file|
|
193
|
+
transcription = client.speech_to_text.create(
|
194
|
+
"scribe_v1",
|
195
|
+
file: audio_file,
|
196
|
+
filename: "audio.mp3",
|
197
|
+
diarize: true,
|
198
|
+
timestamps_granularity: "word"
|
199
|
+
)
|
200
|
+
puts "Transcribed: #{transcription['text']}"
|
201
|
+
|
202
|
+
# Get the transcript later
|
203
|
+
transcript = client.speech_to_text.get_transcript(transcription['transcription_id'])
|
204
|
+
|
205
|
+
# Delete when no longer needed
|
206
|
+
client.speech_to_text.delete_transcript(transcription['transcription_id'])
|
207
|
+
end
|
208
|
+
|
209
|
+
# Audio Isolation (Background Noise Removal)
|
210
|
+
File.open("noisy_audio.mp3", "rb") do |audio_file|
|
211
|
+
clean_audio = client.audio_isolation.isolate(audio_file, "noisy_audio.mp3")
|
212
|
+
File.open("clean_audio.mp3", "wb") { |f| f.write(clean_audio) }
|
213
|
+
end
|
214
|
+
|
215
|
+
# Audio Native (Embeddable Player)
|
216
|
+
File.open("article.html", "rb") do |html_file|
|
217
|
+
project = client.audio_native.create(
|
218
|
+
"My Article",
|
219
|
+
file: html_file,
|
220
|
+
filename: "article.html",
|
221
|
+
voice_id: "voice_id",
|
222
|
+
auto_convert: true
|
223
|
+
)
|
224
|
+
puts "Player HTML: #{project['html_snippet']}"
|
225
|
+
end
|
226
|
+
|
227
|
+
# Forced Alignment
|
228
|
+
File.open("speech.wav", "rb") do |audio_file|
|
229
|
+
alignment = client.forced_alignment.create(
|
230
|
+
audio_file,
|
231
|
+
"speech.wav",
|
232
|
+
"Hello world, this is a test transcript"
|
233
|
+
)
|
234
|
+
|
235
|
+
alignment['words'].each do |word|
|
236
|
+
puts "#{word['text']}: #{word['start']}s - #{word['end']}s"
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
144
240
|
# Streaming Text-to-Speech
|
145
241
|
client.text_to_speech_stream.stream("voice_id", "Streaming text") do |chunk|
|
146
242
|
# Process audio chunk in real-time
|
@@ -160,7 +256,17 @@ end
|
|
160
256
|
- **[Music Generation API](docs/MUSIC.md)** - AI-powered music composition and streaming
|
161
257
|
- **[Text-to-Voice API](docs/TEXT_TO_VOICE.md)** - Design and create custom voices
|
162
258
|
- **[Voice Management API](docs/VOICES.md)** - Manage individual voices (CRUD operations)
|
163
|
-
- **[
|
259
|
+
- **[Speech-to-Speech API](docs/SPEECH_TO_SPEECH.md)** - Transform audio from one voice to another
|
260
|
+
- **[Speech-to-Text API](docs/SPEECH_TO_TEXT.md)** - Transcribe audio and video files
|
261
|
+
- **[Audio Isolation API](docs/AUDIO_ISOLATION.md)** - Remove background noise from audio
|
262
|
+
- **[Audio Native API](docs/AUDIO_NATIVE.md)** - Create embeddable audio players
|
263
|
+
- **[Forced Alignment API](docs/FORCED_ALIGNMENT.md)** - Get precise timing information
|
264
|
+
- **[Admin APIs](docs/admin/README.md)** - Complete administrative functionality:
|
265
|
+
- **[User Management](docs/admin/USER.md)** - Account information and subscription details
|
266
|
+
- **[Usage Analytics](docs/admin/USAGE.md)** - Character usage monitoring and analytics
|
267
|
+
- **[History Management](docs/admin/HISTORY.md)** - Generated audio history management
|
268
|
+
- **[Voice Library](docs/admin/VOICE_LIBRARY.md)** - Community voice browsing and management
|
269
|
+
- **[Models API](docs/admin/MODELS.md)** - List available models and capabilities
|
164
270
|
|
165
271
|
### Available Endpoints
|
166
272
|
|
@@ -174,7 +280,16 @@ end
|
|
174
280
|
| `client.music.*` | AI music composition and streaming | [MUSIC.md](docs/MUSIC.md) |
|
175
281
|
| `client.text_to_voice.*` | Voice design and creation | [TEXT_TO_VOICE.md](docs/TEXT_TO_VOICE.md) |
|
176
282
|
| `client.voices.*` | Voice management (CRUD) | [VOICES.md](docs/VOICES.md) |
|
177
|
-
| `client.
|
283
|
+
| `client.speech_to_speech.*` | Voice changer and audio transformation | [SPEECH_TO_SPEECH.md](docs/SPEECH_TO_SPEECH.md) |
|
284
|
+
| `client.speech_to_text.*` | Audio/video transcription | [SPEECH_TO_TEXT.md](docs/SPEECH_TO_TEXT.md) |
|
285
|
+
| `client.audio_isolation.*` | Background noise removal | [AUDIO_ISOLATION.md](docs/AUDIO_ISOLATION.md) |
|
286
|
+
| `client.audio_native.*` | Embeddable audio players | [AUDIO_NATIVE.md](docs/AUDIO_NATIVE.md) |
|
287
|
+
| `client.forced_alignment.*` | Audio-text timing alignment | [FORCED_ALIGNMENT.md](docs/FORCED_ALIGNMENT.md) |
|
288
|
+
| `client.user.*` | User account and subscription information | [USER.md](docs/admin/USER.md) |
|
289
|
+
| `client.usage.*` | Character usage analytics and monitoring | [USAGE.md](docs/admin/USAGE.md) |
|
290
|
+
| `client.history.*` | Generated audio history management | [HISTORY.md](docs/admin/HISTORY.md) |
|
291
|
+
| `client.voice_library.*` | Community voice browsing and management | [VOICE_LIBRARY.md](docs/admin/VOICE_LIBRARY.md) |
|
292
|
+
| `client.models.*` | Model information and capabilities | [MODELS.md](docs/admin/MODELS.md) |
|
178
293
|
|
179
294
|
## Configuration Options
|
180
295
|
|
@@ -221,6 +336,9 @@ end
|
|
221
336
|
- `AuthenticationError` - Invalid API key or authentication failure
|
222
337
|
- `RateLimitError` - Rate limit exceeded
|
223
338
|
- `ValidationError` - Invalid request parameters
|
339
|
+
- `NotFoundError` - Resource not found (e.g., voice ID, transcript ID)
|
340
|
+
- `BadRequestError` - Bad request with invalid parameters
|
341
|
+
- `UnprocessableEntityError` - Request cannot be processed (e.g., invalid file format)
|
224
342
|
- `APIError` - General API errors
|
225
343
|
|
226
344
|
## Rails Integration
|
@@ -235,6 +353,17 @@ The gem is designed to work seamlessly with Rails applications. See the [example
|
|
235
353
|
- [MusicController](examples/music_controller.rb) - AI music composition and streaming
|
236
354
|
- [TextToVoiceController](examples/text_to_voice_controller.rb) - Voice design and creation
|
237
355
|
- [VoicesController](examples/voices_controller.rb) - Voice management (CRUD operations)
|
356
|
+
- [SpeechToSpeechController](examples/speech_to_speech_controller.rb) - Voice changer and audio transformation
|
357
|
+
- [SpeechToTextController](examples/speech_to_text_controller.rb) - Audio/video transcription with advanced features
|
358
|
+
- [AudioIsolationController](examples/audio_isolation_controller.rb) - Background noise removal and audio cleanup
|
359
|
+
- [AudioNativeController](examples/audio_native_controller.rb) - Embeddable audio players for websites
|
360
|
+
- [ForcedAlignmentController](examples/forced_alignment_controller.rb) - Audio-text timing alignment and subtitle generation
|
361
|
+
- **Admin Controllers** - Complete administrative functionality:
|
362
|
+
- [Admin::HistoryController](examples/admin/history_controller.rb) - Generated audio history management and analytics
|
363
|
+
- [Admin::UsageController](examples/admin/usage_controller.rb) - Character usage monitoring and analytics
|
364
|
+
- [Admin::UserController](examples/admin/user_controller.rb) - User account and subscription management
|
365
|
+
- [Admin::VoiceLibraryController](examples/admin/voice_library_controller.rb) - Community voice browsing and management
|
366
|
+
- [Admin::ModelsController](examples/admin/models_controller.rb) - Model information and selection guidance
|
238
367
|
|
239
368
|
## Development
|
240
369
|
|
@@ -245,6 +374,34 @@ bin/setup # Install dependencies
|
|
245
374
|
bundle exec rspec # Run tests
|
246
375
|
```
|
247
376
|
|
377
|
+
### Available Rake Tasks
|
378
|
+
|
379
|
+
```bash
|
380
|
+
# Testing
|
381
|
+
rake spec # Run all tests (default)
|
382
|
+
rake test:unit # Run unit tests only
|
383
|
+
rake test:integration # Run integration tests only
|
384
|
+
|
385
|
+
# Security
|
386
|
+
rake dev:security # Run security checks
|
387
|
+
rake dev:audit # Run bundler-audit
|
388
|
+
|
389
|
+
# Development
|
390
|
+
rake dev:test # Run all tests
|
391
|
+
rake dev:coverage # Run tests with coverage
|
392
|
+
rake release:prepare # Run full CI suite locally
|
393
|
+
```
|
394
|
+
|
395
|
+
### Continuous Integration
|
396
|
+
|
397
|
+
This gem uses GitHub Actions for CI/CD with the following checks:
|
398
|
+
|
399
|
+
- **Tests**: Runs on Ruby 3.0, 3.1, 3.2, and 3.3
|
400
|
+
- **Security**: bundler-audit for dependency vulnerability scanning
|
401
|
+
- **Build**: Verifies gem can be built and installed
|
402
|
+
|
403
|
+
All checks must pass before merging pull requests.
|
404
|
+
|
248
405
|
To install this gem onto your local machine:
|
249
406
|
|
250
407
|
```bash
|
@@ -255,7 +412,8 @@ To release a new version:
|
|
255
412
|
|
256
413
|
1. Update the version number in `version.rb`
|
257
414
|
2. Update `CHANGELOG.md`
|
258
|
-
3. Run `bundle exec rake release`
|
415
|
+
3. Run `bundle exec rake release:prepare` to verify tests and security checks pass
|
416
|
+
4. Run `bundle exec rake release`
|
259
417
|
|
260
418
|
## Testing
|
261
419
|
|
@@ -2,12 +2,13 @@
|
|
2
2
|
|
3
3
|
require "faraday"
|
4
4
|
require "faraday/multipart"
|
5
|
+
require "json"
|
5
6
|
|
6
7
|
module ElevenlabsClient
|
7
8
|
class Client
|
8
9
|
DEFAULT_BASE_URL = "https://api.elevenlabs.io"
|
9
10
|
|
10
|
-
attr_reader :base_url, :api_key, :dubs, :text_to_speech, :
|
11
|
+
attr_reader :base_url, :api_key, :dubs, :text_to_speech, :text_to_dialogue, :sound_generation, :text_to_voice, :models, :voices, :music, :audio_isolation, :audio_native, :forced_alignment, :speech_to_speech, :speech_to_text, :websocket_text_to_speech, :history, :usage, :user, :voice_library
|
11
12
|
|
12
13
|
def initialize(api_key: nil, base_url: nil, api_key_env: "ELEVENLABS_API_KEY", base_url_env: "ELEVENLABS_BASE_URL")
|
13
14
|
@api_key = api_key || fetch_api_key(api_key_env)
|
@@ -15,13 +16,22 @@ module ElevenlabsClient
|
|
15
16
|
@conn = build_connection
|
16
17
|
@dubs = Dubs.new(self)
|
17
18
|
@text_to_speech = TextToSpeech.new(self)
|
18
|
-
@text_to_speech_stream = TextToSpeechStream.new(self)
|
19
19
|
@text_to_dialogue = TextToDialogue.new(self)
|
20
20
|
@sound_generation = SoundGeneration.new(self)
|
21
21
|
@text_to_voice = TextToVoice.new(self)
|
22
|
-
@models = Models.new(self)
|
22
|
+
@models = Admin::Models.new(self)
|
23
|
+
@history = Admin::History.new(self)
|
24
|
+
@usage = Admin::Usage.new(self)
|
25
|
+
@user = Admin::User.new(self)
|
26
|
+
@voice_library = Admin::VoiceLibrary.new(self)
|
23
27
|
@voices = Voices.new(self)
|
24
28
|
@music = Endpoints::Music.new(self)
|
29
|
+
@audio_isolation = AudioIsolation.new(self)
|
30
|
+
@audio_native = AudioNative.new(self)
|
31
|
+
@forced_alignment = ForcedAlignment.new(self)
|
32
|
+
@speech_to_speech = SpeechToSpeech.new(self)
|
33
|
+
@speech_to_text = SpeechToText.new(self)
|
34
|
+
@websocket_text_to_speech = WebSocketTextToSpeech.new(self)
|
25
35
|
end
|
26
36
|
|
27
37
|
# Makes an authenticated GET request
|
@@ -88,6 +98,17 @@ module ElevenlabsClient
|
|
88
98
|
handle_response(response)
|
89
99
|
end
|
90
100
|
|
101
|
+
# Makes an authenticated GET request expecting binary response
|
102
|
+
# @param path [String] API endpoint path
|
103
|
+
# @return [String] Binary response body
|
104
|
+
def get_binary(path)
|
105
|
+
response = @conn.get(path) do |req|
|
106
|
+
req.headers["xi-api-key"] = api_key
|
107
|
+
end
|
108
|
+
|
109
|
+
handle_response(response)
|
110
|
+
end
|
111
|
+
|
91
112
|
# Makes an authenticated POST request expecting binary response
|
92
113
|
# @param path [String] API endpoint path
|
93
114
|
# @param body [Hash, nil] Request body
|
@@ -144,6 +165,62 @@ module ElevenlabsClient
|
|
144
165
|
handle_response(response)
|
145
166
|
end
|
146
167
|
|
168
|
+
# Makes an authenticated GET request with streaming response
|
169
|
+
# @param path [String] API endpoint path
|
170
|
+
# @param block [Proc] Block to handle each chunk
|
171
|
+
# @return [Faraday::Response] Response object
|
172
|
+
def get_streaming(path, &block)
|
173
|
+
response = @conn.get(path) do |req|
|
174
|
+
req.headers["xi-api-key"] = api_key
|
175
|
+
req.headers["Accept"] = "audio/mpeg"
|
176
|
+
|
177
|
+
# Set up streaming callback
|
178
|
+
req.options.on_data = proc do |chunk, _|
|
179
|
+
block.call(chunk) if block_given?
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
handle_response(response)
|
184
|
+
end
|
185
|
+
|
186
|
+
# Makes an authenticated POST request with streaming response for timestamp data
|
187
|
+
# @param path [String] API endpoint path
|
188
|
+
# @param body [Hash, nil] Request body
|
189
|
+
# @param block [Proc] Block to handle each JSON chunk with timestamps
|
190
|
+
# @return [Faraday::Response] Response object
|
191
|
+
def post_streaming_with_timestamps(path, body = nil, &block)
|
192
|
+
buffer = ""
|
193
|
+
|
194
|
+
response = @conn.post(path) do |req|
|
195
|
+
req.headers["xi-api-key"] = api_key
|
196
|
+
req.headers["Content-Type"] = "application/json"
|
197
|
+
req.body = body.to_json if body
|
198
|
+
|
199
|
+
# Set up streaming callback for JSON chunks
|
200
|
+
req.options.on_data = proc do |chunk, _|
|
201
|
+
if block_given?
|
202
|
+
buffer += chunk
|
203
|
+
|
204
|
+
# Process complete JSON objects
|
205
|
+
while buffer.include?("\n")
|
206
|
+
line, buffer = buffer.split("\n", 2)
|
207
|
+
next if line.strip.empty?
|
208
|
+
|
209
|
+
begin
|
210
|
+
json_data = JSON.parse(line)
|
211
|
+
block.call(json_data)
|
212
|
+
rescue JSON::ParserError
|
213
|
+
# Skip malformed JSON lines
|
214
|
+
next
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
handle_response(response)
|
222
|
+
end
|
223
|
+
|
147
224
|
# Helper method to create Faraday::Multipart::FilePart
|
148
225
|
# @param file_io [IO] File IO object
|
149
226
|
# @param filename [String] Original filename
|
@@ -0,0 +1,106 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'cgi'
|
4
|
+
|
5
|
+
module ElevenlabsClient
|
6
|
+
module Admin
|
7
|
+
class History
|
8
|
+
def initialize(client)
|
9
|
+
@client = client
|
10
|
+
end
|
11
|
+
|
12
|
+
# GET /v1/history
|
13
|
+
# Returns a list of your generated audio
|
14
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/history/get-generated-items
|
15
|
+
#
|
16
|
+
# @param options [Hash] Optional parameters
|
17
|
+
# @option options [Integer] :page_size How many history items to return at maximum (max 1000, default 100)
|
18
|
+
# @option options [String] :start_after_history_item_id After which ID to start fetching (for pagination)
|
19
|
+
# @option options [String] :voice_id ID of the voice to filter for
|
20
|
+
# @option options [String] :search Search term for filtering history items
|
21
|
+
# @option options [String] :source Source of the generated history item ("TTS" or "STS")
|
22
|
+
# @return [Hash] Response containing history items, pagination info
|
23
|
+
def list(**options)
|
24
|
+
endpoint = "/v1/history"
|
25
|
+
|
26
|
+
# Build query parameters
|
27
|
+
query_params = {}
|
28
|
+
query_params[:page_size] = options[:page_size] if options[:page_size]
|
29
|
+
query_params[:start_after_history_item_id] = options[:start_after_history_item_id] if options[:start_after_history_item_id]
|
30
|
+
query_params[:voice_id] = options[:voice_id] if options[:voice_id]
|
31
|
+
query_params[:search] = options[:search] if options[:search]
|
32
|
+
query_params[:source] = options[:source] if options[:source]
|
33
|
+
|
34
|
+
# Add query parameters to endpoint if any exist
|
35
|
+
if query_params.any?
|
36
|
+
query_string = query_params.map { |k, v| "#{k}=#{CGI.escape(v.to_s)}" }.join("&")
|
37
|
+
endpoint += "?#{query_string}"
|
38
|
+
end
|
39
|
+
|
40
|
+
@client.get(endpoint)
|
41
|
+
end
|
42
|
+
|
43
|
+
# GET /v1/history/:history_item_id
|
44
|
+
# Retrieves a history item by ID
|
45
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/history/get-history-item
|
46
|
+
#
|
47
|
+
# @param history_item_id [String] ID of the history item
|
48
|
+
# @return [Hash] The history item data
|
49
|
+
def get(history_item_id)
|
50
|
+
endpoint = "/v1/history/#{history_item_id}"
|
51
|
+
@client.get(endpoint)
|
52
|
+
end
|
53
|
+
|
54
|
+
# DELETE /v1/history/:history_item_id
|
55
|
+
# Delete a history item by its ID
|
56
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/history/delete-history-item
|
57
|
+
#
|
58
|
+
# @param history_item_id [String] ID of the history item to delete
|
59
|
+
# @return [Hash] Status response
|
60
|
+
def delete(history_item_id)
|
61
|
+
endpoint = "/v1/history/#{history_item_id}"
|
62
|
+
@client.delete(endpoint)
|
63
|
+
end
|
64
|
+
|
65
|
+
# GET /v1/history/:history_item_id/audio
|
66
|
+
# Returns the audio of a history item
|
67
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/history/get-audio-from-history-item
|
68
|
+
#
|
69
|
+
# @param history_item_id [String] ID of the history item
|
70
|
+
# @return [String] The binary audio data
|
71
|
+
def get_audio(history_item_id)
|
72
|
+
endpoint = "/v1/history/#{history_item_id}/audio"
|
73
|
+
@client.get_binary(endpoint)
|
74
|
+
end
|
75
|
+
|
76
|
+
# POST /v1/history/download
|
77
|
+
# Download one or more history items
|
78
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/history/download-history-items
|
79
|
+
#
|
80
|
+
# @param history_item_ids [Array<String>] List of history item IDs to download
|
81
|
+
# @param options [Hash] Optional parameters
|
82
|
+
# @option options [String] :output_format Output format ("wav" or "default")
|
83
|
+
# @return [String] The binary audio data (single file) or zip file (multiple files)
|
84
|
+
def download(history_item_ids, **options)
|
85
|
+
endpoint = "/v1/history/download"
|
86
|
+
request_body = { history_item_ids: history_item_ids }
|
87
|
+
|
88
|
+
# Add optional parameters
|
89
|
+
request_body[:output_format] = options[:output_format] if options[:output_format]
|
90
|
+
|
91
|
+
@client.post_binary(endpoint, request_body)
|
92
|
+
end
|
93
|
+
|
94
|
+
# Alias methods for convenience
|
95
|
+
alias_method :get_history_item, :get
|
96
|
+
alias_method :get_generated_items, :list
|
97
|
+
alias_method :delete_history_item, :delete
|
98
|
+
alias_method :get_audio_from_history_item, :get_audio
|
99
|
+
alias_method :download_history_items, :download
|
100
|
+
|
101
|
+
private
|
102
|
+
|
103
|
+
attr_reader :client
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ElevenlabsClient
|
4
|
+
module Admin
|
5
|
+
class Models
|
6
|
+
def initialize(client)
|
7
|
+
@client = client
|
8
|
+
end
|
9
|
+
|
10
|
+
# GET /v1/models
|
11
|
+
# Gets a list of available models
|
12
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/models/list
|
13
|
+
#
|
14
|
+
# @return [Hash] The JSON response containing an array of models
|
15
|
+
def list
|
16
|
+
endpoint = "/v1/models"
|
17
|
+
@client.get(endpoint)
|
18
|
+
end
|
19
|
+
|
20
|
+
alias_method :list_models, :list
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
attr_reader :client
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|