voice_id 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +3 -0
- data/LICENSE +20 -0
- data/Readme.md +277 -0
- data/lib/voice_id.rb +5 -0
- data/lib/voice_id/base.rb +63 -0
- data/lib/voice_id/identification.rb +263 -0
- data/lib/voice_id/request_helpers.rb +33 -0
- data/lib/voice_id/utils.rb +7 -0
- data/lib/voice_id/verification.rb +251 -0
- data/spec/identification_spec.rb +95 -0
- data/spec/mock_server.rb +122 -0
- data/spec/spec_helper.rb +4 -0
- data/spec/verification_spec.rb +80 -0
- data/voice_id.gemspec +17 -0
- metadata +100 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 5a5036968c4e311ca99e004aa99ca522b7e18e95
|
4
|
+
data.tar.gz: 7fc7ff2b2ec9e1733f9c7894dae6f9aad06aff68
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a1e5a759472231990225bf5274693f0e688874a73d557618bad5d74fa8952462f5055b88b7f60ae66cf6a26cbd030331873eff3cbad35561de2d1ff715f500d7
|
7
|
+
data.tar.gz: 9e8df1b131c889f843e5f64c6508e7c18a7773370cdc9201d6c2a7b17df3b89129261ab9e3be2a37d4d51c2ae9227a5df67834a0e1b997a7b429b61a50d4eb60
|
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2016- Ali Yazdani <aliyazdani82@gmail.com>
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Readme.md
ADDED
@@ -0,0 +1,277 @@
|
|
1
|
+
![](https://img.shields.io/badge/license-MIT-blue.svg)
|
2
|
+
[![CircleCI](https://circleci.com/gh/aliyazdani/voice_id.svg?style=svg)](https://circleci.com/gh/aliyazdani/voice_id)
|
3
|
+
|
4
|
+
# VoiceId
|
5
|
+
Wrapper around Microsoft Cognitive Services - Speaker Recognition API
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
sign up and pick up a new api key (speaker recognition API key)
|
9
|
+
[https://www.microsoft.com/cognitive-services](https://www.microsoft.com/cognitive-services)
|
10
|
+
|
11
|
+
```
|
12
|
+
$ gem install voice_id
|
13
|
+
```
|
14
|
+
|
15
|
+
## Examples
|
16
|
+
|
17
|
+
|
18
|
+
```ruby
|
19
|
+
#create a new profile
|
20
|
+
identification = VoiceId::Identification.new("MS_speaker_recognition_api_key")
|
21
|
+
profile = identification.create_profile
|
22
|
+
# => { "identificationProfileId" => "49a46324-fc4b-4387-aa06-090cfbf0214f" }
|
23
|
+
|
24
|
+
# create a new enrollment for that profile
|
25
|
+
profile_id = profile["identificationProfileId"]
|
26
|
+
path_to_audio = '/path/to/some/audio_file.wav'
|
27
|
+
short_audio = true
|
28
|
+
operation_url = identification.create_enrollment(profile_id , short_audio, path_to_audio)
|
29
|
+
# => "https://api.projectoxford.ai/spid/v1.0/operations/EF217D0C-9085-45D7-AAE0-2B36471B89B5"
|
30
|
+
|
31
|
+
# check the status of operation
|
32
|
+
operation_id = identification.get_operation_id(operation_url)
|
33
|
+
# => "EF217D0C-9085-45D7-AAE0-2B36471B89B5"
|
34
|
+
|
35
|
+
identification.get_operation_status(operation_id)
|
36
|
+
# notice below that we only had 13.6 seconds of useable audio so we need to
|
37
|
+
# submit more enrollments for this profile until we achieve at min 30 seconds
|
38
|
+
# =>
|
39
|
+
# {
|
40
|
+
# "status" => "succeeded",
|
41
|
+
# "createdDateTime" => "2016-09-23T01:34:44.226642Z",
|
42
|
+
# "lastActionDateTime" => "2016-09-23T01:34:44.4795299Z",
|
43
|
+
# "processingResult" => {
|
44
|
+
# "enrollmentStatus" => "Enrolling",
|
45
|
+
# "remainingEnrollmentSpeechTime" => 16.4,
|
46
|
+
# "speechTime" => 13.6,
|
47
|
+
# "enrollmentSpeechTime"=>13.6
|
48
|
+
# }
|
49
|
+
# }
|
50
|
+
|
51
|
+
# identify a speaker
|
52
|
+
profile_ids = ["49a46324-fc4b-4387-aa06-090cfbf0214f", "49a36324-fc4b-4387-aa06-091cfbf0216b", ...]
|
53
|
+
path_to_test_audio = '/path/to/some/audio_file.wav'
|
54
|
+
short_audio = true
|
55
|
+
identification_operation_url = identification.identify_speaker(profile_ids, short_audio, path_to_test_audio)
|
56
|
+
# => "https://api.projectoxford.ai/spid/v1.0/operations/EF217D0C-9085-45D7-AAE0-2B36471B89B6"
|
57
|
+
identification_operation_id = identification.get_operation_id(identification_operation_url)
|
58
|
+
# => "EF217D0C-9085-45D7-AAE0-2B36471B89B6"
|
59
|
+
identification.get_operation_status(identification_operation_id)
|
60
|
+
# =>
|
61
|
+
# {
|
62
|
+
# "status" => "succeeded",
|
63
|
+
# "createdDateTime" => "2016-09-23T02:01:54.6498703Z",
|
64
|
+
# "lastActionDateTime" => "2016-09-23T02:01:56.054633Z",
|
65
|
+
# "processingResult" => {
|
66
|
+
# "identifiedProfileId" => "49a46324-fc4b-4387-aa06-090cfbf0214f",
|
67
|
+
# "confidence"=>"High"
|
68
|
+
# }
|
69
|
+
# }
|
70
|
+
```
|
71
|
+
|
72
|
+
## APIs
|
73
|
+
Provides methods for two APIs (Identification and Verification)
|
74
|
+
All audio samples provided to the API must be the following format:
|
75
|
+
```
|
76
|
+
Container WAV
|
77
|
+
Encoding PCM
|
78
|
+
Rate 16K
|
79
|
+
Sample Format 16 bit
|
80
|
+
Channels Mono
|
81
|
+
```
|
82
|
+
|
83
|
+
### Identification API
|
84
|
+
Identify a person from a list of people - this is a text-independant api.
|
85
|
+
Prior to being able to identify a speaker, a speaker (profile) must send a minimum
|
86
|
+
of 30 seconds of recognizable audio.
|
87
|
+
```ruby
|
88
|
+
identification = VoiceId::Identification.new("MS_speaker_recognition_api_key")
|
89
|
+
```
|
90
|
+
|
91
|
+
#### create_profile
|
92
|
+
Each person needs a unique profile, this creates a new one.
|
93
|
+
```ruby
|
94
|
+
profile = identification.create_profile
|
95
|
+
# => { "identificationProfileId" => "49a36324-fc4b-4387-aa06-090cfbf0064f" }
|
96
|
+
```
|
97
|
+
|
98
|
+
#### create_enrollment(profile_id, short_audio, audio_file_path)
|
99
|
+
An enrollment is how audio samples are associated with a profile (training the service). For the Identification API a minimum of 30 seconds of recognizable speach is required. This can be done through multiple enrollments. This creates a new
|
100
|
+
enrollment for a profile.
|
101
|
+
|
102
|
+
```ruby
|
103
|
+
profile_id = "1234567890"
|
104
|
+
path_to_audio = '/path/to/some/audio_file.wav'
|
105
|
+
short_audio = true # true - set minimum duration to 1 sec (5 sec by default per enrollment)
|
106
|
+
identification.create_enrollment(profile_id, short_audio, path_to_audio)
|
107
|
+
# returns an operation url that you can use to check the status of the enrollment
|
108
|
+
# => "https://api.projectoxford.ai/spid/v1.0/operations/EF217D0C-9085-45D7-AAE0-2B36471B89B5"
|
109
|
+
```
|
110
|
+
#### get_operation_id(operation_status_url)
|
111
|
+
Certain endpoints take time to process to they return a url for you to check on the status of the operation. To parse out the operation id use this method. Now you can use #get_operation_status to
|
112
|
+
check the id.
|
113
|
+
```ruby
|
114
|
+
operation_status_url = identification.create_enrollment(profile_id, short_audio, path_to_audio)
|
115
|
+
# => "https://api.projectoxford.ai/spid/v1.0/operations/EF217D0C-9085-45D7-AAE0-2B36471B89B5"
|
116
|
+
identification_operation_id = identification.get_operation_id(operation_status_url)
|
117
|
+
# => "EF217D0C-9085-45D7-AAE0-2B36471B89B6"
|
118
|
+
```
|
119
|
+
#### get_operation_status(operation_id)
|
120
|
+
Check on the status of an operation by passing in the operation id (use #get_operation_id to get the id)
|
121
|
+
```ruby
|
122
|
+
identification.get_operation_status(identification_operation_id)
|
123
|
+
# =>
|
124
|
+
# {
|
125
|
+
# "status" => "succeeded",
|
126
|
+
# "createdDateTime" => "2016-09-23T02:01:54.6498703Z",
|
127
|
+
# "lastActionDateTime" => "2016-09-23T02:01:56.054633Z",
|
128
|
+
# "processingResult" => {
|
129
|
+
# "identifiedProfileId" => "49a59333-ur9d-4387-wd06-880cfby0215f",
|
130
|
+
# "confidence"=>"High"
|
131
|
+
# }
|
132
|
+
# }
|
133
|
+
```
|
134
|
+
|
135
|
+
#### delete_profile(profile_id)
|
136
|
+
Delete a particular profile from the service.
|
137
|
+
```ruby
|
138
|
+
profile_id = "1234567890"
|
139
|
+
identification.delete_profile(profile_id)
|
140
|
+
# => true
|
141
|
+
```
|
142
|
+
|
143
|
+
#### get_all_profiles
|
144
|
+
Returns a list of all the profiles for this account.
|
145
|
+
```ruby
|
146
|
+
identification.get_all_profiles
|
147
|
+
# =>
|
148
|
+
# [
|
149
|
+
# {
|
150
|
+
# "identificationProfileId" => "111f427c-3791-468f-b709-fcef7660fff9",
|
151
|
+
# "locale" => "en-US",
|
152
|
+
# "enrollmentSpeechTime" => 0.0
|
153
|
+
# "remainingEnrollmentSpeechTime" => 0.0,
|
154
|
+
# "createdDateTime" => "2015-04-23T18:25:43.511Z",
|
155
|
+
# "lastActionDateTime" => "2015-04-23T18:25:43.511Z",
|
156
|
+
# "enrollmentStatus" => "Enrolled" //[Enrolled | Enrolling | Training]
|
157
|
+
# }, ...
|
158
|
+
# ]
|
159
|
+
```
|
160
|
+
|
161
|
+
#### get_profile(profileId)
|
162
|
+
Returns a profile's details
|
163
|
+
```ruby
|
164
|
+
profile_id = "1234567890"
|
165
|
+
identification.get_profile(profile_id)
|
166
|
+
# =>
|
167
|
+
# {
|
168
|
+
# "identificationProfileId" => "111f427c-3791-468f-b709-fcef7660fff9",
|
169
|
+
# "locale" => "en-US",
|
170
|
+
# "enrollmentSpeechTime" => 0.0,
|
171
|
+
# "remainingEnrollmentSpeechTime" => 0.0,
|
172
|
+
# "createdDateTime" => "2015-04-23T18:25:43.511Z",
|
173
|
+
# "lastActionDateTime" => "2015-04-23T18:25:43.511Z",
|
174
|
+
# "enrollmentStatus" => "Enrolled" //[Enrolled | Enrolling | Training]
|
175
|
+
# }
|
176
|
+
```
|
177
|
+
|
178
|
+
#### reset_all_enrollments_for_profile(profileId)
|
179
|
+
Resets all the enrollments for a particular profile
|
180
|
+
```ruby
|
181
|
+
profile_id = "1234567890"
|
182
|
+
identification.reset_all_enrollments_for_profile(profile_id)
|
183
|
+
# => true
|
184
|
+
```
|
185
|
+
|
186
|
+
#### identify_speaker(profile_ids, short_audio, audio_file_path)
|
187
|
+
Identify a speaker by calling this method with an array of `enrolled` profile_ids.
|
188
|
+
Use ```short_audio``` to wave the required 5-second speech sample.
|
189
|
+
The audio sample to be analyzed should ideally be 30 seconds, with a maximum of 5 mins.
|
190
|
+
|
191
|
+
```ruby
|
192
|
+
profile_ids = ["49a46324-fc4b-4387-aa06-090cfbf0214f", "49a36324-fc4b-4387-aa06-091cfbf0216b", ...]
|
193
|
+
path_to_test_audio = '/path/to/some/audio_file.wav'
|
194
|
+
short_audio = true
|
195
|
+
operation_url = identification.identify_speaker(profile_ids, short_audio, path_to_test_audio)
|
196
|
+
# => "https://api.projectoxford.ai/spid/v1.0/operations/EF217D0C-9085-45D7-AAE0-2B36471B89B6"
|
197
|
+
identification_operation_id = identification.get_operation_id(operation_url)
|
198
|
+
# => "EF217D0C-9085-45D7-AAE0-2B36471B89B6"
|
199
|
+
identification.get_operation_status(identification_operation_id)
|
200
|
+
# =>
|
201
|
+
# {
|
202
|
+
# "status" => "succeeded",
|
203
|
+
# "createdDateTime" => "2016-09-23T02:01:54.6498703Z",
|
204
|
+
# "lastActionDateTime" => "2016-09-23T02:01:56.054633Z",
|
205
|
+
# "processingResult" => {
|
206
|
+
# "identifiedProfileId" => "49a46324-fc4b-4387-aa06-090cfbf0214f",
|
207
|
+
# "confidence"=>"High"
|
208
|
+
# }
|
209
|
+
# }
|
210
|
+
```
|
211
|
+
|
212
|
+
### Verification API
|
213
|
+
Verify that a person is who they say they are - this is a text-dependent api.
|
214
|
+
Prior to being able to verify a speaker, a speaker (profile) must send three audio samples (from an API provided list) with their enrollment.
|
215
|
+
```ruby
|
216
|
+
verification = VoiceId::Verification.new("MS_speaker_recognition_api_key")
|
217
|
+
```
|
218
|
+
|
219
|
+
#### list_all_verification_phrases
|
220
|
+
Get a list of accepted scripts to use when sending your audio sample.
|
221
|
+
```ruby
|
222
|
+
verification.list_all_verification_phrases
|
223
|
+
# =>
|
224
|
+
# [
|
225
|
+
# {"phrase" => "i am going to make him an offer he cannot refuse"},
|
226
|
+
# {"phrase" => "houston we have had a problem"},
|
227
|
+
# {"phrase" => "my voice is my passport verify me"},
|
228
|
+
# {"phrase" => "apple juice tastes funny after toothpaste"},
|
229
|
+
# {"phrase" => "you can get in without your password"},
|
230
|
+
# {"phrase" => "you can activate security system now"},
|
231
|
+
# {"phrase" => "my voice is stronger than passwords"},
|
232
|
+
# {"phrase" => "my password is not your business"},
|
233
|
+
# {"phrase" => "my name is unknown to you"},
|
234
|
+
# {"phrase" => "be yourself everyone else is already taken"}
|
235
|
+
# ]
|
236
|
+
```
|
237
|
+
|
238
|
+
#### create_profile
|
239
|
+
Same as Identification API
|
240
|
+
|
241
|
+
#### create_enrollment(profile_id, audio_file_path)
|
242
|
+
Requires 3 enrollments. Pick 3 of the acceptable phrases from `#list_all_verification_phrases` and enroll them.
|
243
|
+
```ruby
|
244
|
+
verification.create_enrollment("49a46324-fc4b-4387-aa06-090cfbf0214f", '/path/to/audio/make_him_an_offer.wav')
|
245
|
+
# =>
|
246
|
+
# {
|
247
|
+
# "enrollmentStatus" => "Enrolling",
|
248
|
+
# "enrollmentsCount" => 1,
|
249
|
+
# "remainingEnrollments" => 2,
|
250
|
+
# "phrase" => "i am going to make him an offer he cannot refuse"
|
251
|
+
# }
|
252
|
+
```
|
253
|
+
|
254
|
+
#### delete_profile(profile_id)
|
255
|
+
Same as Identification API
|
256
|
+
|
257
|
+
#### get_all_profiles
|
258
|
+
Same as Identification API
|
259
|
+
|
260
|
+
#### get_profile(profile_id)
|
261
|
+
Same as Identification API
|
262
|
+
|
263
|
+
#### reset_all_enrollments_for_profile(profile_id)
|
264
|
+
Same as Identification API
|
265
|
+
|
266
|
+
#### verify_speaker(profile_id, audio_file_path)
|
267
|
+
User (profile) would have had to enroll with 3 of the accepted phrases (#list_all_verification_phrases).
|
268
|
+
Once the phrases have been accepted, a recording of one of the accepted phrases can be checked against an *enrolled* profile.
|
269
|
+
```ruby
|
270
|
+
verification.verify_speaker("86935587-b631-4cc7-a59t-8e580d71522g", "/path/to/audio/offer_converted.wav")
|
271
|
+
# =>
|
272
|
+
# {
|
273
|
+
# "result" => "Accept",
|
274
|
+
# "confidence" => "High",
|
275
|
+
# "phrase" => "i am going to make him an offer he cannot refuse"
|
276
|
+
# }
|
277
|
+
```
|
data/lib/voice_id.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
module VoiceId
|
2
|
+
class Base
|
3
|
+
include RequestHelpers
|
4
|
+
include Utils
|
5
|
+
|
6
|
+
attr_accessor :api_base_url, :api_key, :api_version, :headers, :use_ssl
|
7
|
+
def initialize(api_key)
|
8
|
+
@api_version = "v1.0"
|
9
|
+
@api_key = api_key
|
10
|
+
@use_ssl = true
|
11
|
+
@api_base_url = "https://api.projectoxford.ai/spid/#{@api_version}"
|
12
|
+
@headers = { "Ocp-Apim-Subscription-Key" => api_key }
|
13
|
+
end
|
14
|
+
|
15
|
+
def create_profile(path)
|
16
|
+
_method = :Post
|
17
|
+
_path = path
|
18
|
+
_headers = { "Content-Type" => "application/json" }
|
19
|
+
_body = { :json => { :locale => "en-us" } }
|
20
|
+
_response = send_request(_path, _method, _headers, _body)
|
21
|
+
|
22
|
+
_response.code == 200 ? _response.parse : parse_error_response(_response)
|
23
|
+
end
|
24
|
+
|
25
|
+
# No MIME returned from API (can't parse so we return 'true')
|
26
|
+
def delete_profile(path)
|
27
|
+
_method = :Delete
|
28
|
+
_path = path
|
29
|
+
_headers = { "Content-Type" => "application/json" }
|
30
|
+
_response = send_request(_path, _method, _headers, nil)
|
31
|
+
|
32
|
+
_response.code == 200 ? true : parse_error_response(_response)
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_all_profiles(path)
|
36
|
+
_method = :Get
|
37
|
+
_path = path
|
38
|
+
_headers = { "Content-Type" => "application/json" }
|
39
|
+
_response = send_request(_path, _method, _headers, nil)
|
40
|
+
|
41
|
+
_response.code == 200 ? _response.parse : parse_error_response(_response)
|
42
|
+
end
|
43
|
+
|
44
|
+
def get_profile(path)
|
45
|
+
_method = :Get
|
46
|
+
_path = path
|
47
|
+
_headers = { "Content-Type" => "application/json" }
|
48
|
+
_response = send_request(_path, _method, _headers, nil)
|
49
|
+
|
50
|
+
_response.code == 200 ? _response.parse : parse_error_response(_response)
|
51
|
+
end
|
52
|
+
|
53
|
+
# No MIME returned from API (can't parse so we return 'true')
|
54
|
+
def reset_all_enrollments_for_profile(path)
|
55
|
+
_method = :Post
|
56
|
+
_path = path
|
57
|
+
_headers = { "Content-Type" => "application/json" }
|
58
|
+
_response = send_request(_path, _method, _headers, nil)
|
59
|
+
|
60
|
+
_response.code == 200 ? true : parse_error_response(_response)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,263 @@
|
|
1
|
+
module VoiceId
|
2
|
+
class Identification < VoiceId::Base
|
3
|
+
class ProfileIdsMissingError < StandardError; end
|
4
|
+
|
5
|
+
# params
|
6
|
+
# operationId
|
7
|
+
# a valid url provided by calling #create_enrollment
|
8
|
+
# an operation's status is only available for 24 hours after creating enrollment
|
9
|
+
#
|
10
|
+
# Microsoft API response
|
11
|
+
# 200 Response
|
12
|
+
# {
|
13
|
+
# "status"=>"succeeded",
|
14
|
+
# "createdDateTime"=>"2016-09-20T01:51:39.134487Z",
|
15
|
+
# "lastActionDateTime"=>"2016-09-20T01:51:41.4183611Z",
|
16
|
+
# "processingResult"=>{
|
17
|
+
# "enrollmentStatus"=>"Enrolled",
|
18
|
+
# "remainingEnrollmentSpeechTime"=>0.0,
|
19
|
+
# "speechTime"=>7.93, # useful speech duration
|
20
|
+
# "enrollmentSpeechTime"=>31.72
|
21
|
+
# }
|
22
|
+
# }
|
23
|
+
#
|
24
|
+
# 404 Response
|
25
|
+
# {
|
26
|
+
# "error":{
|
27
|
+
# "code" : "NotFound",
|
28
|
+
# "message" : "No operation id found",
|
29
|
+
# }
|
30
|
+
# }
|
31
|
+
#
|
32
|
+
# returns
|
33
|
+
# success
|
34
|
+
# operation status { Hash }
|
35
|
+
# error
|
36
|
+
# false
|
37
|
+
def get_operation_status(operationId)
|
38
|
+
_method = :Get
|
39
|
+
_path = "/operations/#{operationId}"
|
40
|
+
_headers = { "Content-Type" => "application/json" }
|
41
|
+
_response = send_request(_path, _method, _headers, nil)
|
42
|
+
|
43
|
+
_response.code == 200 ? _response.parse : parse_error_response(_response)
|
44
|
+
end
|
45
|
+
|
46
|
+
# params
|
47
|
+
# profileIds - required
|
48
|
+
# a valid list of comma-separated values
|
49
|
+
# shortAudio
|
50
|
+
# set min audio length requirement to 1 sec
|
51
|
+
# (still have to provide 30 secs(multiple enrollments))
|
52
|
+
# audio_file_path
|
53
|
+
# string representing location of wav file in system
|
54
|
+
#
|
55
|
+
# Microsoft API response
|
56
|
+
# 202 Response
|
57
|
+
# operation url (can be checked by calling get_operation_status(operationId))
|
58
|
+
# (ex: "https://api.projectoxford.ai/spid/v1.0/operations/995a8745-0098-4c12-9889-bad14859y7a4")
|
59
|
+
#
|
60
|
+
# 500 Response
|
61
|
+
# {
|
62
|
+
# "error": {
|
63
|
+
# "code" : "InternalServerError",
|
64
|
+
# "message" : "SpeakerInvalid",
|
65
|
+
# }
|
66
|
+
# }
|
67
|
+
#
|
68
|
+
# returns
|
69
|
+
# success
|
70
|
+
# operation url { String }
|
71
|
+
# fail
|
72
|
+
# false
|
73
|
+
def identify_speaker(profileIds, shortAudio, audio_file_path)
|
74
|
+
if !profileIds.is_a?(Array) || profileIds.empty?
|
75
|
+
raise ProfileIdsMissingError, "an array of profile ids is required"
|
76
|
+
end
|
77
|
+
|
78
|
+
_identificationProfileIds = profileIds.join(",")
|
79
|
+
|
80
|
+
_method = :Post
|
81
|
+
_path = "/identify?identificationProfileIds=#{_identificationProfileIds}&shortAudio=#{shortAudio}"
|
82
|
+
_headers = { }
|
83
|
+
_body = create_body_for_enrollment(audio_file_path)
|
84
|
+
_response = send_request(_path, _method, _headers, _body)
|
85
|
+
|
86
|
+
_response.code == 202 ? _response.headers["Operation-Location"] : parse_error_response(_response)
|
87
|
+
end
|
88
|
+
|
89
|
+
# Microsoft API response
|
90
|
+
# 200 Response
|
91
|
+
# {
|
92
|
+
# "identificationProfileId": "49a36324-fc4b-4387-aa06-090cfbf0064f",
|
93
|
+
# }
|
94
|
+
#
|
95
|
+
# 500 Response
|
96
|
+
# {
|
97
|
+
# "error":{
|
98
|
+
# "code" : "InternalServerError",
|
99
|
+
# "message" : "SpeakerInvalid",
|
100
|
+
# }
|
101
|
+
# }
|
102
|
+
#
|
103
|
+
# returns
|
104
|
+
# success
|
105
|
+
# new profileId { Hash }
|
106
|
+
# fail
|
107
|
+
# false (indicating new profile was not created)
|
108
|
+
def create_profile
|
109
|
+
super("/identificationProfiles")
|
110
|
+
end
|
111
|
+
|
112
|
+
# params
|
113
|
+
# profileId - required
|
114
|
+
# a valid id { String }
|
115
|
+
#
|
116
|
+
# Microsoft API response
|
117
|
+
# 200 Response
|
118
|
+
# "" (empty string)
|
119
|
+
#
|
120
|
+
# 500 Response
|
121
|
+
# {
|
122
|
+
# "error": {
|
123
|
+
# "code" : "InternalServerError",
|
124
|
+
# "message" : "SpeakerInvalid",
|
125
|
+
# }
|
126
|
+
# }
|
127
|
+
#
|
128
|
+
# returns
|
129
|
+
# success
|
130
|
+
# true
|
131
|
+
# fail
|
132
|
+
# false (indicating delete of id failed)
|
133
|
+
def delete_profile(profileId)
|
134
|
+
super("/identificationProfiles/#{profileId}")
|
135
|
+
end
|
136
|
+
|
137
|
+
# Microsoft API response
|
138
|
+
# 200 Response
|
139
|
+
# [
|
140
|
+
# {
|
141
|
+
# "identificationProfileId" : "111f427c-3791-468f-b709-fcef7660fff9",
|
142
|
+
# "locale" : "en-US",
|
143
|
+
# "enrollmentSpeechTime", 0.0
|
144
|
+
# "remainingEnrollmentSpeechTime" : 0.0,
|
145
|
+
# "createdDateTime" : "2015-04-23T18:25:43.511Z",
|
146
|
+
# "lastActionDateTime" : "2015-04-23T18:25:43.511Z",
|
147
|
+
# "enrollmentStatus" : "Enrolled" //[Enrolled | Enrolling | Training]
|
148
|
+
# },
|
149
|
+
# …]
|
150
|
+
#
|
151
|
+
# 500 Response
|
152
|
+
# {
|
153
|
+
# "error": {
|
154
|
+
# "code" : "InternalServerError",
|
155
|
+
# "message" : "SpeakerInvalid",
|
156
|
+
# }
|
157
|
+
# }
|
158
|
+
#
|
159
|
+
# returns
|
160
|
+
# success
|
161
|
+
# A list of all the profiles { Array }
|
162
|
+
# fail
|
163
|
+
# false (indicating delete of id failed)
|
164
|
+
def get_all_profiles
|
165
|
+
super('/identificationProfiles')
|
166
|
+
end
|
167
|
+
|
168
|
+
# params
|
169
|
+
# profileId
|
170
|
+
# a valid profileId { String }
|
171
|
+
#
|
172
|
+
# Microsoft API response
|
173
|
+
# 200 Response
|
174
|
+
# {
|
175
|
+
# "identificationProfileId" : "111f427c-3791-468f-b709-fcef7660fff9",
|
176
|
+
# "locale" : "en-US",
|
177
|
+
# "enrollmentSpeechTime", 0.0
|
178
|
+
# "remainingEnrollmentSpeechTime" : 0.0,
|
179
|
+
# "createdDateTime" : "2015-04-23T18:25:43.511Z",
|
180
|
+
# "lastActionDateTime" : "2015-04-23T18:25:43.511Z",
|
181
|
+
# "enrollmentStatus" : "Enrolled" //[Enrolled | Enrolling | Training]
|
182
|
+
# }
|
183
|
+
#
|
184
|
+
# 500 Response
|
185
|
+
# {
|
186
|
+
# "error": {
|
187
|
+
# "code" : "InternalServerError",
|
188
|
+
# "message" : "SpeakerInvalid",
|
189
|
+
# }
|
190
|
+
# }
|
191
|
+
#
|
192
|
+
# returns
|
193
|
+
# success
|
194
|
+
# a profile { Hash }
|
195
|
+
# fail
|
196
|
+
# false (indicating delete of id failed)
|
197
|
+
def get_profile(profileId)
|
198
|
+
super("/identificationProfiles/#{profileId}")
|
199
|
+
end
|
200
|
+
|
201
|
+
# params
|
202
|
+
# profileId
|
203
|
+
# a valid profileId { String }
|
204
|
+
# shortAudio
|
205
|
+
# false for default duration, true for any duration { Boolean }
|
206
|
+
# audio_file_path
|
207
|
+
# path to the audio file { String }
|
208
|
+
# audio requirments => Wav, PCM, 16k rate, 16 bit sample rate, mono
|
209
|
+
#
|
210
|
+
# Microsoft API response
|
211
|
+
# 202 Response
|
212
|
+
# url to check the enrollment status
|
213
|
+
#
|
214
|
+
# 500 Response
|
215
|
+
# {
|
216
|
+
# "error": {
|
217
|
+
# "code" : "InternalServerError",
|
218
|
+
# "message" : "SpeakerInvalid",
|
219
|
+
# }
|
220
|
+
# }
|
221
|
+
#
|
222
|
+
# returns
|
223
|
+
# success
|
224
|
+
# a url { String }
|
225
|
+
# fail
|
226
|
+
# false
|
227
|
+
def create_enrollment(profileId, shortAudio, audio_file_path)
|
228
|
+
_method = :Post
|
229
|
+
_path = "/identificationProfiles/#{profileId}/enroll"
|
230
|
+
_headers = { }
|
231
|
+
_body = create_body_for_enrollment(audio_file_path)
|
232
|
+
_response = send_request(_path, _method, _headers, _body)
|
233
|
+
|
234
|
+
_response.code == 202 ? _response.headers["Operation-Location"] : parse_error_response(_response)
|
235
|
+
end
|
236
|
+
|
237
|
+
# params
|
238
|
+
# profileId - required
|
239
|
+
# a valid id { String }
|
240
|
+
#
|
241
|
+
# Microsoft API response
|
242
|
+
# 200 Response
|
243
|
+
# "" (empty string)
|
244
|
+
#
|
245
|
+
# 500 Response
|
246
|
+
# {
|
247
|
+
# "error": {
|
248
|
+
# "code" : "InternalServerError",
|
249
|
+
# "message" : "SpeakerInvalid",
|
250
|
+
# }
|
251
|
+
# }
|
252
|
+
#
|
253
|
+
# returns
|
254
|
+
# success
|
255
|
+
# true
|
256
|
+
# fail
|
257
|
+
# false (indicating delete of enrollments failed)
|
258
|
+
def reset_all_enrollments_for_profile(profileId)
|
259
|
+
super("/identificationProfiles/#{profileId}/reset")
|
260
|
+
end
|
261
|
+
|
262
|
+
end
|
263
|
+
end
|