voice_id 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +3 -0
- data/LICENSE +20 -0
- data/Readme.md +277 -0
- data/lib/voice_id.rb +5 -0
- data/lib/voice_id/base.rb +63 -0
- data/lib/voice_id/identification.rb +263 -0
- data/lib/voice_id/request_helpers.rb +33 -0
- data/lib/voice_id/utils.rb +7 -0
- data/lib/voice_id/verification.rb +251 -0
- data/spec/identification_spec.rb +95 -0
- data/spec/mock_server.rb +122 -0
- data/spec/spec_helper.rb +4 -0
- data/spec/verification_spec.rb +80 -0
- data/voice_id.gemspec +17 -0
- metadata +100 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 5a5036968c4e311ca99e004aa99ca522b7e18e95
|
4
|
+
data.tar.gz: 7fc7ff2b2ec9e1733f9c7894dae6f9aad06aff68
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a1e5a759472231990225bf5274693f0e688874a73d557618bad5d74fa8952462f5055b88b7f60ae66cf6a26cbd030331873eff3cbad35561de2d1ff715f500d7
|
7
|
+
data.tar.gz: 9e8df1b131c889f843e5f64c6508e7c18a7773370cdc9201d6c2a7b17df3b89129261ab9e3be2a37d4d51c2ae9227a5df67834a0e1b997a7b429b61a50d4eb60
|
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2016- Ali Yazdani <aliyazdani82@gmail.com>
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Readme.md
ADDED
@@ -0,0 +1,277 @@
|
|
1
|
+

|
2
|
+
[](https://circleci.com/gh/aliyazdani/voice_id)
|
3
|
+
|
4
|
+
# VoiceId
|
5
|
+
Wrapper around Microsoft Cognitive Services - Speaker Recognition API
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
sign up and pick up a new api key (speaker recognition API key)
|
9
|
+
[https://www.microsoft.com/cognitive-services](https://www.microsoft.com/cognitive-services)
|
10
|
+
|
11
|
+
```
|
12
|
+
$ gem install voice_id
|
13
|
+
```
|
14
|
+
|
15
|
+
## Examples
|
16
|
+
|
17
|
+
|
18
|
+
```ruby
|
19
|
+
#create a new profile
|
20
|
+
identification = VoiceId::Identification.new("MS_speaker_recognition_api_key")
|
21
|
+
profile = identification.create_profile
|
22
|
+
# => { "identificationProfileId" => "49a46324-fc4b-4387-aa06-090cfbf0214f" }
|
23
|
+
|
24
|
+
# create a new enrollment for that profile
|
25
|
+
profile_id = profile["identificationProfileId"]
|
26
|
+
path_to_audio = '/path/to/some/audio_file.wav'
|
27
|
+
short_audio = true
|
28
|
+
operation_url = identification.create_enrollment(profile_id , short_audio, path_to_audio)
|
29
|
+
# => "https://api.projectoxford.ai/spid/v1.0/operations/EF217D0C-9085-45D7-AAE0-2B36471B89B5"
|
30
|
+
|
31
|
+
# check the status of operation
|
32
|
+
operation_id = identification.get_operation_id(operation_url)
|
33
|
+
# => "EF217D0C-9085-45D7-AAE0-2B36471B89B5"
|
34
|
+
|
35
|
+
identification.get_operation_status(operation_id)
|
36
|
+
# notice below that we only had 13.6 seconds of useable audio so we need to
|
37
|
+
# submit more enrollments for this profile until we achieve at min 30 seconds
|
38
|
+
# =>
|
39
|
+
# {
|
40
|
+
# "status" => "succeeded",
|
41
|
+
# "createdDateTime" => "2016-09-23T01:34:44.226642Z",
|
42
|
+
# "lastActionDateTime" => "2016-09-23T01:34:44.4795299Z",
|
43
|
+
# "processingResult" => {
|
44
|
+
# "enrollmentStatus" => "Enrolling",
|
45
|
+
# "remainingEnrollmentSpeechTime" => 16.4,
|
46
|
+
# "speechTime" => 13.6,
|
47
|
+
# "enrollmentSpeechTime"=>13.6
|
48
|
+
# }
|
49
|
+
# }
|
50
|
+
|
51
|
+
# identify a speaker
|
52
|
+
profile_ids = ["49a46324-fc4b-4387-aa06-090cfbf0214f", "49a36324-fc4b-4387-aa06-091cfbf0216b", ...]
|
53
|
+
path_to_test_audio = '/path/to/some/audio_file.wav'
|
54
|
+
short_audio = true
|
55
|
+
identification_operation_url = identification.identify_speaker(profile_ids, short_audio, path_to_test_audio)
|
56
|
+
# => "https://api.projectoxford.ai/spid/v1.0/operations/EF217D0C-9085-45D7-AAE0-2B36471B89B6"
|
57
|
+
identification_operation_id = identification.get_operation_id(identification_operation_url)
|
58
|
+
# => "EF217D0C-9085-45D7-AAE0-2B36471B89B6"
|
59
|
+
identification.get_operation_status(identification_operation_id)
|
60
|
+
# =>
|
61
|
+
# {
|
62
|
+
# "status" => "succeeded",
|
63
|
+
# "createdDateTime" => "2016-09-23T02:01:54.6498703Z",
|
64
|
+
# "lastActionDateTime" => "2016-09-23T02:01:56.054633Z",
|
65
|
+
# "processingResult" => {
|
66
|
+
# "identifiedProfileId" => "49a46324-fc4b-4387-aa06-090cfbf0214f",
|
67
|
+
# "confidence"=>"High"
|
68
|
+
# }
|
69
|
+
# }
|
70
|
+
```
|
71
|
+
|
72
|
+
## APIs
|
73
|
+
Provides methods for two APIs (Identification and Verification)
|
74
|
+
All audio samples provided to the API must be the following format:
|
75
|
+
```
|
76
|
+
Container WAV
|
77
|
+
Encoding PCM
|
78
|
+
Rate 16K
|
79
|
+
Sample Format 16 bit
|
80
|
+
Channels Mono
|
81
|
+
```
|
82
|
+
|
83
|
+
### Identification API
|
84
|
+
Identify a person from a list of people - this is a text-independant api.
|
85
|
+
Prior to being able to identify a speaker, a speaker (profile) must send a minimum
|
86
|
+
of 30 seconds of recognizable audio.
|
87
|
+
```ruby
|
88
|
+
identification = VoiceId::Identification.new("MS_speaker_recognition_api_key")
|
89
|
+
```
|
90
|
+
|
91
|
+
#### create_profile
|
92
|
+
Each person needs a unique profile, this creates a new one.
|
93
|
+
```ruby
|
94
|
+
profile = identification.create_profile
|
95
|
+
# => { "identificationProfileId" => "49a36324-fc4b-4387-aa06-090cfbf0064f" }
|
96
|
+
```
|
97
|
+
|
98
|
+
#### create_enrollment(profile_id, short_audio, audio_file_path)
|
99
|
+
An enrollment is how audio samples are associated with a profile (training the service). For the Identification API a minimum of 30 seconds of recognizable speach is required. This can be done through multiple enrollments. This creates a new
|
100
|
+
enrollment for a profile.
|
101
|
+
|
102
|
+
```ruby
|
103
|
+
profile_id = "1234567890"
|
104
|
+
path_to_audio = '/path/to/some/audio_file.wav'
|
105
|
+
short_audio = true # true - set minimum duration to 1 sec (5 sec by default per enrollment)
|
106
|
+
identification.create_enrollment(profile_id, short_audio, path_to_audio)
|
107
|
+
# returns an operation url that you can use to check the status of the enrollment
|
108
|
+
# => "https://api.projectoxford.ai/spid/v1.0/operations/EF217D0C-9085-45D7-AAE0-2B36471B89B5"
|
109
|
+
```
|
110
|
+
#### get_operation_id(operation_status_url)
|
111
|
+
Certain endpoints take time to process to they return a url for you to check on the status of the operation. To parse out the operation id use this method. Now you can use #get_operation_status to
|
112
|
+
check the id.
|
113
|
+
```ruby
|
114
|
+
operation_status_url = identification.create_enrollment(profile_id, short_audio, path_to_audio)
|
115
|
+
# => "https://api.projectoxford.ai/spid/v1.0/operations/EF217D0C-9085-45D7-AAE0-2B36471B89B5"
|
116
|
+
identification_operation_id = identification.get_operation_id(operation_status_url)
|
117
|
+
# => "EF217D0C-9085-45D7-AAE0-2B36471B89B6"
|
118
|
+
```
|
119
|
+
#### get_operation_status(operation_id)
|
120
|
+
Check on the status of an operation by passing in the operation id (use #get_operation_id to get the id)
|
121
|
+
```ruby
|
122
|
+
identification.get_operation_status(identification_operation_id)
|
123
|
+
# =>
|
124
|
+
# {
|
125
|
+
# "status" => "succeeded",
|
126
|
+
# "createdDateTime" => "2016-09-23T02:01:54.6498703Z",
|
127
|
+
# "lastActionDateTime" => "2016-09-23T02:01:56.054633Z",
|
128
|
+
# "processingResult" => {
|
129
|
+
# "identifiedProfileId" => "49a59333-ur9d-4387-wd06-880cfby0215f",
|
130
|
+
# "confidence"=>"High"
|
131
|
+
# }
|
132
|
+
# }
|
133
|
+
```
|
134
|
+
|
135
|
+
#### delete_profile(profile_id)
|
136
|
+
Delete a particular profile from the service.
|
137
|
+
```ruby
|
138
|
+
profile_id = "1234567890"
|
139
|
+
identification.delete_profile(profile_id)
|
140
|
+
# => true
|
141
|
+
```
|
142
|
+
|
143
|
+
#### get_all_profiles
|
144
|
+
Returns a list of all the profiles for this account.
|
145
|
+
```ruby
|
146
|
+
identification.get_all_profiles
|
147
|
+
# =>
|
148
|
+
# [
|
149
|
+
# {
|
150
|
+
# "identificationProfileId" => "111f427c-3791-468f-b709-fcef7660fff9",
|
151
|
+
# "locale" => "en-US",
|
152
|
+
# "enrollmentSpeechTime" => 0.0
|
153
|
+
# "remainingEnrollmentSpeechTime" => 0.0,
|
154
|
+
# "createdDateTime" => "2015-04-23T18:25:43.511Z",
|
155
|
+
# "lastActionDateTime" => "2015-04-23T18:25:43.511Z",
|
156
|
+
# "enrollmentStatus" => "Enrolled" //[Enrolled | Enrolling | Training]
|
157
|
+
# }, ...
|
158
|
+
# ]
|
159
|
+
```
|
160
|
+
|
161
|
+
#### get_profile(profileId)
|
162
|
+
Returns a profile's details
|
163
|
+
```ruby
|
164
|
+
profile_id = "1234567890"
|
165
|
+
identification.get_profile(profile_id)
|
166
|
+
# =>
|
167
|
+
# {
|
168
|
+
# "identificationProfileId" => "111f427c-3791-468f-b709-fcef7660fff9",
|
169
|
+
# "locale" => "en-US",
|
170
|
+
# "enrollmentSpeechTime" => 0.0,
|
171
|
+
# "remainingEnrollmentSpeechTime" => 0.0,
|
172
|
+
# "createdDateTime" => "2015-04-23T18:25:43.511Z",
|
173
|
+
# "lastActionDateTime" => "2015-04-23T18:25:43.511Z",
|
174
|
+
# "enrollmentStatus" => "Enrolled" //[Enrolled | Enrolling | Training]
|
175
|
+
# }
|
176
|
+
```
|
177
|
+
|
178
|
+
#### reset_all_enrollments_for_profile(profileId)
|
179
|
+
Resets all the enrollments for a particular profile
|
180
|
+
```ruby
|
181
|
+
profile_id = "1234567890"
|
182
|
+
identification.reset_all_enrollments_for_profile(profile_id)
|
183
|
+
# => true
|
184
|
+
```
|
185
|
+
|
186
|
+
#### identify_speaker(profile_ids, short_audio, audio_file_path)
|
187
|
+
Identify a speaker by calling this method with an array of `enrolled` profile_ids.
|
188
|
+
Use ```short_audio``` to wave the required 5-second speech sample.
|
189
|
+
The audio sample to be analyzed should ideally be 30 seconds, with a maximum of 5 mins.
|
190
|
+
|
191
|
+
```ruby
|
192
|
+
profile_ids = ["49a46324-fc4b-4387-aa06-090cfbf0214f", "49a36324-fc4b-4387-aa06-091cfbf0216b", ...]
|
193
|
+
path_to_test_audio = '/path/to/some/audio_file.wav'
|
194
|
+
short_audio = true
|
195
|
+
operation_url = identification.identify_speaker(profile_ids, short_audio, path_to_test_audio)
|
196
|
+
# => "https://api.projectoxford.ai/spid/v1.0/operations/EF217D0C-9085-45D7-AAE0-2B36471B89B6"
|
197
|
+
identification_operation_id = identification.get_operation_id(operation_url)
|
198
|
+
# => "EF217D0C-9085-45D7-AAE0-2B36471B89B6"
|
199
|
+
identification.get_operation_status(identification_operation_id)
|
200
|
+
# =>
|
201
|
+
# {
|
202
|
+
# "status" => "succeeded",
|
203
|
+
# "createdDateTime" => "2016-09-23T02:01:54.6498703Z",
|
204
|
+
# "lastActionDateTime" => "2016-09-23T02:01:56.054633Z",
|
205
|
+
# "processingResult" => {
|
206
|
+
# "identifiedProfileId" => "49a46324-fc4b-4387-aa06-090cfbf0214f",
|
207
|
+
# "confidence"=>"High"
|
208
|
+
# }
|
209
|
+
# }
|
210
|
+
```
|
211
|
+
|
212
|
+
### Verification API
|
213
|
+
Verify that a person is who they say they are - this is a text-dependent api.
|
214
|
+
Prior to being able to verify a speaker, a speaker (profile) must send three audio samples (from an API provided list) with their enrollment.
|
215
|
+
```ruby
|
216
|
+
verification = VoiceId::Verification.new("MS_speaker_recognition_api_key")
|
217
|
+
```
|
218
|
+
|
219
|
+
#### list_all_verification_phrases
|
220
|
+
Get a list of accepted scripts to use when sending your audio sample.
|
221
|
+
```ruby
|
222
|
+
verification.list_all_verification_phrases
|
223
|
+
# =>
|
224
|
+
# [
|
225
|
+
# {"phrase" => "i am going to make him an offer he cannot refuse"},
|
226
|
+
# {"phrase" => "houston we have had a problem"},
|
227
|
+
# {"phrase" => "my voice is my passport verify me"},
|
228
|
+
# {"phrase" => "apple juice tastes funny after toothpaste"},
|
229
|
+
# {"phrase" => "you can get in without your password"},
|
230
|
+
# {"phrase" => "you can activate security system now"},
|
231
|
+
# {"phrase" => "my voice is stronger than passwords"},
|
232
|
+
# {"phrase" => "my password is not your business"},
|
233
|
+
# {"phrase" => "my name is unknown to you"},
|
234
|
+
# {"phrase" => "be yourself everyone else is already taken"}
|
235
|
+
# ]
|
236
|
+
```
|
237
|
+
|
238
|
+
#### create_profile
|
239
|
+
Same as Identification API
|
240
|
+
|
241
|
+
#### create_enrollment(profile_id, audio_file_path)
|
242
|
+
Requires 3 enrollments. Pick 3 of the acceptable phrases from `#list_all_verification_phrases` and enroll them.
|
243
|
+
```ruby
|
244
|
+
verification.create_enrollment("49a46324-fc4b-4387-aa06-090cfbf0214f", '/path/to/audio/make_him_an_offer.wav')
|
245
|
+
# =>
|
246
|
+
# {
|
247
|
+
# "enrollmentStatus" => "Enrolling",
|
248
|
+
# "enrollmentsCount" => 1,
|
249
|
+
# "remainingEnrollments" => 2,
|
250
|
+
# "phrase" => "i am going to make him an offer he cannot refuse"
|
251
|
+
# }
|
252
|
+
```
|
253
|
+
|
254
|
+
#### delete_profile(profile_id)
|
255
|
+
Same as Identification API
|
256
|
+
|
257
|
+
#### get_all_profiles
|
258
|
+
Same as Identification API
|
259
|
+
|
260
|
+
#### get_profile(profile_id)
|
261
|
+
Same as Identification API
|
262
|
+
|
263
|
+
#### reset_all_enrollments_for_profile(profile_id)
|
264
|
+
Same as Identification API
|
265
|
+
|
266
|
+
#### verify_speaker(profile_id, audio_file_path)
|
267
|
+
User (profile) would have had to enroll with 3 of the accepted phrases (#list_all_verification_phrases).
|
268
|
+
Once the phrases have been accepted, a recording of one of the accepted phrases can be checked against an *enrolled* profile.
|
269
|
+
```ruby
|
270
|
+
verification.verify_speaker("86935587-b631-4cc7-a59t-8e580d71522g", "/path/to/audio/offer_converted.wav")
|
271
|
+
# =>
|
272
|
+
# {
|
273
|
+
# "result" => "Accept",
|
274
|
+
# "confidence" => "High",
|
275
|
+
# "phrase" => "i am going to make him an offer he cannot refuse"
|
276
|
+
# }
|
277
|
+
```
|
data/lib/voice_id.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
module VoiceId
|
2
|
+
class Base
|
3
|
+
include RequestHelpers
|
4
|
+
include Utils
|
5
|
+
|
6
|
+
attr_accessor :api_base_url, :api_key, :api_version, :headers, :use_ssl
|
7
|
+
def initialize(api_key)
|
8
|
+
@api_version = "v1.0"
|
9
|
+
@api_key = api_key
|
10
|
+
@use_ssl = true
|
11
|
+
@api_base_url = "https://api.projectoxford.ai/spid/#{@api_version}"
|
12
|
+
@headers = { "Ocp-Apim-Subscription-Key" => api_key }
|
13
|
+
end
|
14
|
+
|
15
|
+
def create_profile(path)
|
16
|
+
_method = :Post
|
17
|
+
_path = path
|
18
|
+
_headers = { "Content-Type" => "application/json" }
|
19
|
+
_body = { :json => { :locale => "en-us" } }
|
20
|
+
_response = send_request(_path, _method, _headers, _body)
|
21
|
+
|
22
|
+
_response.code == 200 ? _response.parse : parse_error_response(_response)
|
23
|
+
end
|
24
|
+
|
25
|
+
# No MIME returned from API (can't parse so we return 'true')
|
26
|
+
def delete_profile(path)
|
27
|
+
_method = :Delete
|
28
|
+
_path = path
|
29
|
+
_headers = { "Content-Type" => "application/json" }
|
30
|
+
_response = send_request(_path, _method, _headers, nil)
|
31
|
+
|
32
|
+
_response.code == 200 ? true : parse_error_response(_response)
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_all_profiles(path)
|
36
|
+
_method = :Get
|
37
|
+
_path = path
|
38
|
+
_headers = { "Content-Type" => "application/json" }
|
39
|
+
_response = send_request(_path, _method, _headers, nil)
|
40
|
+
|
41
|
+
_response.code == 200 ? _response.parse : parse_error_response(_response)
|
42
|
+
end
|
43
|
+
|
44
|
+
def get_profile(path)
|
45
|
+
_method = :Get
|
46
|
+
_path = path
|
47
|
+
_headers = { "Content-Type" => "application/json" }
|
48
|
+
_response = send_request(_path, _method, _headers, nil)
|
49
|
+
|
50
|
+
_response.code == 200 ? _response.parse : parse_error_response(_response)
|
51
|
+
end
|
52
|
+
|
53
|
+
# No MIME returned from API (can't parse so we return 'true')
|
54
|
+
def reset_all_enrollments_for_profile(path)
|
55
|
+
_method = :Post
|
56
|
+
_path = path
|
57
|
+
_headers = { "Content-Type" => "application/json" }
|
58
|
+
_response = send_request(_path, _method, _headers, nil)
|
59
|
+
|
60
|
+
_response.code == 200 ? true : parse_error_response(_response)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,263 @@
|
|
1
|
+
module VoiceId
|
2
|
+
class Identification < VoiceId::Base
|
3
|
+
class ProfileIdsMissingError < StandardError; end
|
4
|
+
|
5
|
+
# params
|
6
|
+
# operationId
|
7
|
+
# a valid url provided by calling #create_enrollment
|
8
|
+
# an operation's status is only available for 24 hours after creating enrollment
|
9
|
+
#
|
10
|
+
# Microsoft API response
|
11
|
+
# 200 Response
|
12
|
+
# {
|
13
|
+
# "status"=>"succeeded",
|
14
|
+
# "createdDateTime"=>"2016-09-20T01:51:39.134487Z",
|
15
|
+
# "lastActionDateTime"=>"2016-09-20T01:51:41.4183611Z",
|
16
|
+
# "processingResult"=>{
|
17
|
+
# "enrollmentStatus"=>"Enrolled",
|
18
|
+
# "remainingEnrollmentSpeechTime"=>0.0,
|
19
|
+
# "speechTime"=>7.93, # useful speech duration
|
20
|
+
# "enrollmentSpeechTime"=>31.72
|
21
|
+
# }
|
22
|
+
# }
|
23
|
+
#
|
24
|
+
# 404 Response
|
25
|
+
# {
|
26
|
+
# "error":{
|
27
|
+
# "code" : "NotFound",
|
28
|
+
# "message" : "No operation id found",
|
29
|
+
# }
|
30
|
+
# }
|
31
|
+
#
|
32
|
+
# returns
|
33
|
+
# success
|
34
|
+
# operation status { Hash }
|
35
|
+
# error
|
36
|
+
# false
|
37
|
+
def get_operation_status(operationId)
|
38
|
+
_method = :Get
|
39
|
+
_path = "/operations/#{operationId}"
|
40
|
+
_headers = { "Content-Type" => "application/json" }
|
41
|
+
_response = send_request(_path, _method, _headers, nil)
|
42
|
+
|
43
|
+
_response.code == 200 ? _response.parse : parse_error_response(_response)
|
44
|
+
end
|
45
|
+
|
46
|
+
# params
|
47
|
+
# profileIds - required
|
48
|
+
# a valid list of comma-separated values
|
49
|
+
# shortAudio
|
50
|
+
# set min audio length requirement to 1 sec
|
51
|
+
# (still have to provide 30 secs(multiple enrollments))
|
52
|
+
# audio_file_path
|
53
|
+
# string representing location of wav file in system
|
54
|
+
#
|
55
|
+
# Microsoft API response
|
56
|
+
# 202 Response
|
57
|
+
# operation url (can be checked by calling get_operation_status(operationId))
|
58
|
+
# (ex: "https://api.projectoxford.ai/spid/v1.0/operations/995a8745-0098-4c12-9889-bad14859y7a4")
|
59
|
+
#
|
60
|
+
# 500 Response
|
61
|
+
# {
|
62
|
+
# "error": {
|
63
|
+
# "code" : "InternalServerError",
|
64
|
+
# "message" : "SpeakerInvalid",
|
65
|
+
# }
|
66
|
+
# }
|
67
|
+
#
|
68
|
+
# returns
|
69
|
+
# success
|
70
|
+
# operation url { String }
|
71
|
+
# fail
|
72
|
+
# false
|
73
|
+
def identify_speaker(profileIds, shortAudio, audio_file_path)
|
74
|
+
if !profileIds.is_a?(Array) || profileIds.empty?
|
75
|
+
raise ProfileIdsMissingError, "an array of profile ids is required"
|
76
|
+
end
|
77
|
+
|
78
|
+
_identificationProfileIds = profileIds.join(",")
|
79
|
+
|
80
|
+
_method = :Post
|
81
|
+
_path = "/identify?identificationProfileIds=#{_identificationProfileIds}&shortAudio=#{shortAudio}"
|
82
|
+
_headers = { }
|
83
|
+
_body = create_body_for_enrollment(audio_file_path)
|
84
|
+
_response = send_request(_path, _method, _headers, _body)
|
85
|
+
|
86
|
+
_response.code == 202 ? _response.headers["Operation-Location"] : parse_error_response(_response)
|
87
|
+
end
|
88
|
+
|
89
|
+
# Microsoft API response
|
90
|
+
# 200 Response
|
91
|
+
# {
|
92
|
+
# "identificationProfileId": "49a36324-fc4b-4387-aa06-090cfbf0064f",
|
93
|
+
# }
|
94
|
+
#
|
95
|
+
# 500 Response
|
96
|
+
# {
|
97
|
+
# "error":{
|
98
|
+
# "code" : "InternalServerError",
|
99
|
+
# "message" : "SpeakerInvalid",
|
100
|
+
# }
|
101
|
+
# }
|
102
|
+
#
|
103
|
+
# returns
|
104
|
+
# success
|
105
|
+
# new profileId { Hash }
|
106
|
+
# fail
|
107
|
+
# false (indicating new profile was not created)
|
108
|
+
def create_profile
|
109
|
+
super("/identificationProfiles")
|
110
|
+
end
|
111
|
+
|
112
|
+
# params
|
113
|
+
# profileId - required
|
114
|
+
# a valid id { String }
|
115
|
+
#
|
116
|
+
# Microsoft API response
|
117
|
+
# 200 Response
|
118
|
+
# "" (empty string)
|
119
|
+
#
|
120
|
+
# 500 Response
|
121
|
+
# {
|
122
|
+
# "error": {
|
123
|
+
# "code" : "InternalServerError",
|
124
|
+
# "message" : "SpeakerInvalid",
|
125
|
+
# }
|
126
|
+
# }
|
127
|
+
#
|
128
|
+
# returns
|
129
|
+
# success
|
130
|
+
# true
|
131
|
+
# fail
|
132
|
+
# false (indicating delete of id failed)
|
133
|
+
def delete_profile(profileId)
|
134
|
+
super("/identificationProfiles/#{profileId}")
|
135
|
+
end
|
136
|
+
|
137
|
+
# Microsoft API response
|
138
|
+
# 200 Response
|
139
|
+
# [
|
140
|
+
# {
|
141
|
+
# "identificationProfileId" : "111f427c-3791-468f-b709-fcef7660fff9",
|
142
|
+
# "locale" : "en-US",
|
143
|
+
# "enrollmentSpeechTime", 0.0
|
144
|
+
# "remainingEnrollmentSpeechTime" : 0.0,
|
145
|
+
# "createdDateTime" : "2015-04-23T18:25:43.511Z",
|
146
|
+
# "lastActionDateTime" : "2015-04-23T18:25:43.511Z",
|
147
|
+
# "enrollmentStatus" : "Enrolled" //[Enrolled | Enrolling | Training]
|
148
|
+
# },
|
149
|
+
# …]
|
150
|
+
#
|
151
|
+
# 500 Response
|
152
|
+
# {
|
153
|
+
# "error": {
|
154
|
+
# "code" : "InternalServerError",
|
155
|
+
# "message" : "SpeakerInvalid",
|
156
|
+
# }
|
157
|
+
# }
|
158
|
+
#
|
159
|
+
# returns
|
160
|
+
# success
|
161
|
+
# A list of all the profiles { Array }
|
162
|
+
# fail
|
163
|
+
# false (indicating delete of id failed)
|
164
|
+
def get_all_profiles
|
165
|
+
super('/identificationProfiles')
|
166
|
+
end
|
167
|
+
|
168
|
+
# params
|
169
|
+
# profileId
|
170
|
+
# a valid profileId { String }
|
171
|
+
#
|
172
|
+
# Microsoft API response
|
173
|
+
# 200 Response
|
174
|
+
# {
|
175
|
+
# "identificationProfileId" : "111f427c-3791-468f-b709-fcef7660fff9",
|
176
|
+
# "locale" : "en-US",
|
177
|
+
# "enrollmentSpeechTime", 0.0
|
178
|
+
# "remainingEnrollmentSpeechTime" : 0.0,
|
179
|
+
# "createdDateTime" : "2015-04-23T18:25:43.511Z",
|
180
|
+
# "lastActionDateTime" : "2015-04-23T18:25:43.511Z",
|
181
|
+
# "enrollmentStatus" : "Enrolled" //[Enrolled | Enrolling | Training]
|
182
|
+
# }
|
183
|
+
#
|
184
|
+
# 500 Response
|
185
|
+
# {
|
186
|
+
# "error": {
|
187
|
+
# "code" : "InternalServerError",
|
188
|
+
# "message" : "SpeakerInvalid",
|
189
|
+
# }
|
190
|
+
# }
|
191
|
+
#
|
192
|
+
# returns
|
193
|
+
# success
|
194
|
+
# a profile { Hash }
|
195
|
+
# fail
|
196
|
+
# false (indicating delete of id failed)
|
197
|
+
def get_profile(profileId)
|
198
|
+
super("/identificationProfiles/#{profileId}")
|
199
|
+
end
|
200
|
+
|
201
|
+
# params
|
202
|
+
# profileId
|
203
|
+
# a valid profileId { String }
|
204
|
+
# shortAudio
|
205
|
+
# false for default duration, true for any duration { Boolean }
|
206
|
+
# audio_file_path
|
207
|
+
# path to the audio file { String }
|
208
|
+
# audio requirments => Wav, PCM, 16k rate, 16 bit sample rate, mono
|
209
|
+
#
|
210
|
+
# Microsoft API response
|
211
|
+
# 202 Response
|
212
|
+
# url to check the enrollment status
|
213
|
+
#
|
214
|
+
# 500 Response
|
215
|
+
# {
|
216
|
+
# "error": {
|
217
|
+
# "code" : "InternalServerError",
|
218
|
+
# "message" : "SpeakerInvalid",
|
219
|
+
# }
|
220
|
+
# }
|
221
|
+
#
|
222
|
+
# returns
|
223
|
+
# success
|
224
|
+
# a url { String }
|
225
|
+
# fail
|
226
|
+
# false
|
227
|
+
def create_enrollment(profileId, shortAudio, audio_file_path)
|
228
|
+
_method = :Post
|
229
|
+
_path = "/identificationProfiles/#{profileId}/enroll"
|
230
|
+
_headers = { }
|
231
|
+
_body = create_body_for_enrollment(audio_file_path)
|
232
|
+
_response = send_request(_path, _method, _headers, _body)
|
233
|
+
|
234
|
+
_response.code == 202 ? _response.headers["Operation-Location"] : parse_error_response(_response)
|
235
|
+
end
|
236
|
+
|
237
|
+
# params
|
238
|
+
# profileId - required
|
239
|
+
# a valid id { String }
|
240
|
+
#
|
241
|
+
# Microsoft API response
|
242
|
+
# 200 Response
|
243
|
+
# "" (empty string)
|
244
|
+
#
|
245
|
+
# 500 Response
|
246
|
+
# {
|
247
|
+
# "error": {
|
248
|
+
# "code" : "InternalServerError",
|
249
|
+
# "message" : "SpeakerInvalid",
|
250
|
+
# }
|
251
|
+
# }
|
252
|
+
#
|
253
|
+
# returns
|
254
|
+
# success
|
255
|
+
# true
|
256
|
+
# fail
|
257
|
+
# false (indicating delete of enrollments failed)
|
258
|
+
def reset_all_enrollments_for_profile(profileId)
|
259
|
+
super("/identificationProfiles/#{profileId}/reset")
|
260
|
+
end
|
261
|
+
|
262
|
+
end
|
263
|
+
end
|