pvleopard 1.2.2__py3-none-any.whl → 2.0.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- pvleopard/_factory.py +7 -2
- pvleopard/_leopard.py +78 -16
- pvleopard/lib/common/leopard_params.pv +0 -0
- pvleopard/lib/jetson/cortex-a57-aarch64/libpv_leopard.so +0 -0
- pvleopard/lib/linux/x86_64/libpv_leopard.so +0 -0
- pvleopard/lib/mac/arm64/libpv_leopard.dylib +0 -0
- pvleopard/lib/mac/x86_64/libpv_leopard.dylib +0 -0
- pvleopard/lib/raspberry-pi/cortex-a53/libpv_leopard.so +0 -0
- pvleopard/lib/raspberry-pi/cortex-a53-aarch64/libpv_leopard.so +0 -0
- pvleopard/lib/raspberry-pi/cortex-a72/libpv_leopard.so +0 -0
- pvleopard/lib/raspberry-pi/cortex-a72-aarch64/libpv_leopard.so +0 -0
- pvleopard/lib/windows/amd64/libpv_leopard.dll +0 -0
- {pvleopard-1.2.2.dist-info → pvleopard-2.0.1.dist-info}/METADATA +16 -10
- pvleopard-2.0.1.dist-info/RECORD +19 -0
- {pvleopard-1.2.2.dist-info → pvleopard-2.0.1.dist-info}/WHEEL +1 -1
- pvleopard-1.2.2.dist-info/RECORD +0 -19
- {pvleopard-1.2.2.dist-info → pvleopard-2.0.1.dist-info}/top_level.txt +0 -0
pvleopard/_factory.py
CHANGED
@@ -19,7 +19,8 @@ def create(
|
|
19
19
|
access_key: str,
|
20
20
|
model_path: Optional[str] = None,
|
21
21
|
library_path: Optional[str] = None,
|
22
|
-
enable_automatic_punctuation: bool = False
|
22
|
+
enable_automatic_punctuation: bool = False,
|
23
|
+
enable_diarization: bool = False) -> Leopard:
|
23
24
|
"""
|
24
25
|
Factory method for Leopard speech-to-text engine.
|
25
26
|
|
@@ -28,6 +29,9 @@ def create(
|
|
28
29
|
:param model_path: Absolute path to the file containing model parameters. If not set it will be set to the default
|
29
30
|
location.
|
30
31
|
:param enable_automatic_punctuation Set to `True` to enable automatic punctuation insertion.
|
32
|
+
:param enable_diarization Set to `true` to enable speaker diarization, which allows Leopard to differentiate
|
33
|
+
speakers as part of the transcription process. Word metadata will include a `speaker_tag` to
|
34
|
+
identify unique speakers.
|
31
35
|
:return: An instance of Leopard speech-to-text engine.
|
32
36
|
"""
|
33
37
|
|
@@ -41,7 +45,8 @@ def create(
|
|
41
45
|
access_key=access_key,
|
42
46
|
model_path=model_path,
|
43
47
|
library_path=library_path,
|
44
|
-
enable_automatic_punctuation=enable_automatic_punctuation
|
48
|
+
enable_automatic_punctuation=enable_automatic_punctuation,
|
49
|
+
enable_diarization=enable_diarization)
|
45
50
|
|
46
51
|
|
47
52
|
__all__ = [
|
pvleopard/_leopard.py
CHANGED
@@ -18,7 +18,27 @@ from typing import *
|
|
18
18
|
|
19
19
|
|
20
20
|
class LeopardError(Exception):
|
21
|
-
|
21
|
+
def __init__(self, message: str = '', message_stack: Sequence[str] = None):
|
22
|
+
super().__init__(message)
|
23
|
+
|
24
|
+
self._message = message
|
25
|
+
self._message_stack = list() if message_stack is None else message_stack
|
26
|
+
|
27
|
+
def __str__(self):
|
28
|
+
message = self._message
|
29
|
+
if len(self._message_stack) > 0:
|
30
|
+
message += ':'
|
31
|
+
for i in range(len(self._message_stack)):
|
32
|
+
message += '\n [%d] %s' % (i, self._message_stack[i])
|
33
|
+
return message
|
34
|
+
|
35
|
+
@property
|
36
|
+
def message(self) -> str:
|
37
|
+
return self._message
|
38
|
+
|
39
|
+
@property
|
40
|
+
def message_stack(self) -> Sequence[str]:
|
41
|
+
return self._message_stack
|
22
42
|
|
23
43
|
|
24
44
|
class LeopardMemoryError(LeopardError):
|
@@ -119,14 +139,16 @@ class Leopard(object):
|
|
119
139
|
("word", c_char_p),
|
120
140
|
("start_sec", c_float),
|
121
141
|
("end_sec", c_float),
|
122
|
-
("confidence", c_float)
|
142
|
+
("confidence", c_float),
|
143
|
+
("speaker_tag", c_int32)]
|
123
144
|
|
124
145
|
def __init__(
|
125
146
|
self,
|
126
147
|
access_key: str,
|
127
148
|
model_path: str,
|
128
149
|
library_path: str,
|
129
|
-
enable_automatic_punctuation: bool = False
|
150
|
+
enable_automatic_punctuation: bool = False,
|
151
|
+
enable_diarization: bool = False) -> None:
|
130
152
|
"""
|
131
153
|
Constructor.
|
132
154
|
|
@@ -134,6 +156,9 @@ class Leopard(object):
|
|
134
156
|
:param model_path: Absolute path to the file containing model parameters.
|
135
157
|
:param library_path: Absolute path to Leopard's dynamic library.
|
136
158
|
:param enable_automatic_punctuation Set to `True` to enable automatic punctuation insertion.
|
159
|
+
:param enable_diarization Set to `true` to enable speaker diarization, which allows Leopard to differentiate
|
160
|
+
speakers as part of the transcription process. Word metadata will include a `speaker_tag` to
|
161
|
+
identify unique speakers.
|
137
162
|
"""
|
138
163
|
|
139
164
|
if not isinstance(access_key, str) or len(access_key) == 0:
|
@@ -147,15 +172,36 @@ class Leopard(object):
|
|
147
172
|
|
148
173
|
library = cdll.LoadLibrary(library_path)
|
149
174
|
|
175
|
+
set_sdk_func = library.pv_set_sdk
|
176
|
+
set_sdk_func.argtypes = [c_char_p]
|
177
|
+
set_sdk_func.restype = None
|
178
|
+
|
179
|
+
set_sdk_func('python'.encode('utf-8'))
|
180
|
+
|
181
|
+
self._get_error_stack_func = library.pv_get_error_stack
|
182
|
+
self._get_error_stack_func.argtypes = [POINTER(POINTER(c_char_p)), POINTER(c_int)]
|
183
|
+
self._get_error_stack_func.restype = self.PicovoiceStatuses
|
184
|
+
|
185
|
+
self._free_error_stack_func = library.pv_free_error_stack
|
186
|
+
self._free_error_stack_func.argtypes = [POINTER(c_char_p)]
|
187
|
+
self._free_error_stack_func.restype = None
|
188
|
+
|
150
189
|
init_func = library.pv_leopard_init
|
151
|
-
init_func.argtypes = [c_char_p, c_char_p, c_bool, POINTER(POINTER(self.CLeopard))]
|
190
|
+
init_func.argtypes = [c_char_p, c_char_p, c_bool, c_bool, POINTER(POINTER(self.CLeopard))]
|
152
191
|
init_func.restype = self.PicovoiceStatuses
|
153
192
|
|
154
193
|
self._handle = POINTER(self.CLeopard)()
|
155
194
|
|
156
|
-
status = init_func(
|
195
|
+
status = init_func(
|
196
|
+
access_key.encode(),
|
197
|
+
model_path.encode(),
|
198
|
+
enable_automatic_punctuation,
|
199
|
+
enable_diarization,
|
200
|
+
byref(self._handle))
|
157
201
|
if status is not self.PicovoiceStatuses.SUCCESS:
|
158
|
-
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status](
|
202
|
+
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status](
|
203
|
+
message='Initialization failed',
|
204
|
+
message_stack=self._get_error_stack())
|
159
205
|
|
160
206
|
self._delete_func = library.pv_leopard_delete
|
161
207
|
self._delete_func.argtypes = [POINTER(self.CLeopard)]
|
@@ -201,7 +247,7 @@ class Leopard(object):
|
|
201
247
|
]
|
202
248
|
self._words_delete_func.restype = None
|
203
249
|
|
204
|
-
Word = namedtuple('Word', ['word', 'start_sec', 'end_sec', 'confidence'])
|
250
|
+
Word = namedtuple('Word', ['word', 'start_sec', 'end_sec', 'confidence', 'speaker_tag'])
|
205
251
|
|
206
252
|
def process(self, pcm: Sequence[int]) -> Tuple[str, Sequence[Word]]:
|
207
253
|
"""
|
@@ -227,7 +273,9 @@ class Leopard(object):
|
|
227
273
|
byref(num_words),
|
228
274
|
byref(c_words))
|
229
275
|
if status is not self.PicovoiceStatuses.SUCCESS:
|
230
|
-
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status](
|
276
|
+
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status](
|
277
|
+
message='Process failed',
|
278
|
+
message_stack=self._get_error_stack())
|
231
279
|
|
232
280
|
transcript = c_transcript.value.decode('utf-8')
|
233
281
|
self._transcript_delete_func(c_transcript)
|
@@ -238,7 +286,8 @@ class Leopard(object):
|
|
238
286
|
word=c_words[i].word.decode('utf-8'),
|
239
287
|
start_sec=c_words[i].start_sec,
|
240
288
|
end_sec=c_words[i].end_sec,
|
241
|
-
confidence=c_words[i].confidence
|
289
|
+
confidence=c_words[i].confidence,
|
290
|
+
speaker_tag=c_words[i].speaker_tag)
|
242
291
|
words.append(word)
|
243
292
|
|
244
293
|
self._words_delete_func(c_words)
|
@@ -267,12 +316,9 @@ class Leopard(object):
|
|
267
316
|
byref(num_words),
|
268
317
|
byref(c_words))
|
269
318
|
if status is not self.PicovoiceStatuses.SUCCESS:
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
"Specified file with extension '%s' is not supported" % pathlib.Path(audio_path).suffix
|
274
|
-
)
|
275
|
-
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]()
|
319
|
+
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status](
|
320
|
+
message='Process file failed',
|
321
|
+
message_stack=self._get_error_stack())
|
276
322
|
|
277
323
|
transcript = c_transcript.value.decode('utf-8')
|
278
324
|
self._transcript_delete_func(c_transcript)
|
@@ -283,7 +329,8 @@ class Leopard(object):
|
|
283
329
|
word=c_words[i].word.decode('utf-8'),
|
284
330
|
start_sec=c_words[i].start_sec,
|
285
331
|
end_sec=c_words[i].end_sec,
|
286
|
-
confidence=c_words[i].confidence
|
332
|
+
confidence=c_words[i].confidence,
|
333
|
+
speaker_tag=c_words[i].speaker_tag)
|
287
334
|
words.append(word)
|
288
335
|
|
289
336
|
self._words_delete_func(c_words)
|
@@ -307,6 +354,21 @@ class Leopard(object):
|
|
307
354
|
|
308
355
|
return self._sample_rate
|
309
356
|
|
357
|
+
def _get_error_stack(self) -> Sequence[str]:
|
358
|
+
message_stack_ref = POINTER(c_char_p)()
|
359
|
+
message_stack_depth = c_int()
|
360
|
+
status = self._get_error_stack_func(byref(message_stack_ref), byref(message_stack_depth))
|
361
|
+
if status is not self.PicovoiceStatuses.SUCCESS:
|
362
|
+
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status](message='Unable to get Leopard error state')
|
363
|
+
|
364
|
+
message_stack = list()
|
365
|
+
for i in range(message_stack_depth.value):
|
366
|
+
message_stack.append(message_stack_ref[i].decode('utf-8'))
|
367
|
+
|
368
|
+
self._free_error_stack_func(message_stack_ref)
|
369
|
+
|
370
|
+
return message_stack
|
371
|
+
|
310
372
|
|
311
373
|
__all__ = [
|
312
374
|
'Leopard',
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: pvleopard
|
3
|
-
Version:
|
3
|
+
Version: 2.0.1
|
4
4
|
Summary: Leopard Speech-to-Text Engine.
|
5
5
|
Home-page: https://github.com/Picovoice/leopard
|
6
6
|
Author: Picovoice
|
@@ -17,13 +17,15 @@ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
|
|
17
17
|
Requires-Python: >=3.5
|
18
18
|
Description-Content-Type: text/markdown
|
19
19
|
|
20
|
-
# Leopard
|
20
|
+
# Leopard Binding for Python
|
21
|
+
|
22
|
+
## Leopard Speech-to-Text Engine
|
21
23
|
|
22
24
|
Made in Vancouver, Canada by [Picovoice](https://picovoice.ai)
|
23
25
|
|
24
26
|
Leopard is an on-device speech-to-text engine. Leopard is:
|
25
27
|
|
26
|
-
- Private; All voice processing runs locally.
|
28
|
+
- Private; All voice processing runs locally.
|
27
29
|
- [Accurate](https://picovoice.ai/docs/benchmark/stt/)
|
28
30
|
- [Compact and Computationally-Efficient](https://github.com/Picovoice/speech-to-text-benchmark#rtf)
|
29
31
|
- Cross-Platform:
|
@@ -56,9 +58,9 @@ Create an instance of the engine and transcribe an audio file:
|
|
56
58
|
```python
|
57
59
|
import pvleopard
|
58
60
|
|
59
|
-
|
61
|
+
leopard = pvleopard.create(access_key='${ACCESS_KEY}')
|
60
62
|
|
61
|
-
transcript, words =
|
63
|
+
transcript, words = leopard.process_file('${AUDIO_PATH}')
|
62
64
|
print(transcript)
|
63
65
|
for word in words:
|
64
66
|
print(
|
@@ -67,20 +69,24 @@ for word in words:
|
|
67
69
|
```
|
68
70
|
|
69
71
|
Replace `${ACCESS_KEY}` with yours obtained from [Picovoice Console](https://console.picovoice.ai/) and
|
70
|
-
`${AUDIO_PATH}` to the path an audio file.
|
71
|
-
|
72
|
+
`${AUDIO_PATH}` to the path an audio file.
|
73
|
+
|
74
|
+
Finally, when done be sure to explicitly release the resources:
|
75
|
+
```python
|
76
|
+
leopard.delete()
|
77
|
+
```
|
72
78
|
|
73
79
|
## Language Model
|
74
80
|
|
75
|
-
The Leopard Python SDK comes preloaded with a default English language model (`.pv` file).
|
76
|
-
Default models for other supported languages can be found in [lib/common](../../lib/common).
|
81
|
+
The Leopard Python SDK comes preloaded with a default English language model (`.pv` file).
|
82
|
+
Default models for other supported languages can be found in [lib/common](../../lib/common).
|
77
83
|
|
78
84
|
Create custom language models using the [Picovoice Console](https://console.picovoice.ai/). Here you can train
|
79
85
|
language models with custom vocabulary and boost words in the existing vocabulary.
|
80
86
|
|
81
87
|
Pass in the `.pv` file via the `model_path` argument:
|
82
88
|
```python
|
83
|
-
|
89
|
+
leopard = pvleopard.create(
|
84
90
|
access_key='${ACCESS_KEY}',
|
85
91
|
model_path='${MODEL_PATH}')
|
86
92
|
```
|
@@ -0,0 +1,19 @@
|
|
1
|
+
pvleopard/LICENSE,sha256=ZurJwSSRHw99lGaJP88vQREqtZmIABuVKd_rK7k7U70,11344
|
2
|
+
pvleopard/__init__.py,sha256=OCI_7jgscvkzJMwT02SfB85_xOoQC8eb1e52HR_GKgo,577
|
3
|
+
pvleopard/_factory.py,sha256=1ZotcV9xdcJLRAYajFwFHfB0tV3_9bSi15_cjhedMnM,2075
|
4
|
+
pvleopard/_leopard.py,sha256=U6eNGzgQfpKaQxnDaOKHBDi2Kj_ysEHhF3SdVojbmhE,12741
|
5
|
+
pvleopard/_util.py,sha256=TQ3fXjUNUDerUbXdC6ntjMXaYD_B5udi__xp41zL9lE,3113
|
6
|
+
pvleopard/lib/common/leopard_params.pv,sha256=8jsCUfmOuuTD2MXLduV17hD1vHXuO-wscyJobYCsbfs,37699432
|
7
|
+
pvleopard/lib/jetson/cortex-a57-aarch64/libpv_leopard.so,sha256=y431wzz9NuRsOXnhk7ciZ9s6Lc3v9lr5mWdbkqC1bXE,1300032
|
8
|
+
pvleopard/lib/linux/x86_64/libpv_leopard.so,sha256=UhcGHHjCnR1Y-AMHVgVIbRoR_yjgRlw7SpcV86JwkX8,1415360
|
9
|
+
pvleopard/lib/mac/arm64/libpv_leopard.dylib,sha256=o4WF9nzDvF5K9_AKppz-avXsk-wApmRVKQuUArbvxSc,1545072
|
10
|
+
pvleopard/lib/mac/x86_64/libpv_leopard.dylib,sha256=Qwnh-0EI-VSWTjbSXnfmkxmYU8Z1ow-fFkCUQ2K9slc,1697720
|
11
|
+
pvleopard/lib/raspberry-pi/cortex-a53/libpv_leopard.so,sha256=DEnIu2Jpe_w9sviYNCLvgC1yqROLdt6A8b4r44d6eiQ,1275760
|
12
|
+
pvleopard/lib/raspberry-pi/cortex-a53-aarch64/libpv_leopard.so,sha256=FBqYYJbmtOu4AAD532__IfuJo6O9MnkP933CYK4pF3g,1312320
|
13
|
+
pvleopard/lib/raspberry-pi/cortex-a72/libpv_leopard.so,sha256=ALyBJ2I5B_nKQ2Vr7cTrPiFSnxIxsTPhOhcIWp3SOOg,1288048
|
14
|
+
pvleopard/lib/raspberry-pi/cortex-a72-aarch64/libpv_leopard.so,sha256=Kn5Ricawg62MGrzL6ZS-bcPOarlSU0PcwSQ-WG7k-Uo,1300032
|
15
|
+
pvleopard/lib/windows/amd64/libpv_leopard.dll,sha256=hM6eYFgcQ6p8ZIe8BFw0Yxm3enA2rPSRBCysVzFC8kQ,1524736
|
16
|
+
pvleopard-2.0.1.dist-info/METADATA,sha256=hloxBn0p8TABCghj9eSzSBIZxNNK-q1G_MyHR5yoOaY,3141
|
17
|
+
pvleopard-2.0.1.dist-info/WHEEL,sha256=g4nMs7d-Xl9-xC9XovUrsDHGXt-FT0E17Yqo92DEfvY,92
|
18
|
+
pvleopard-2.0.1.dist-info/top_level.txt,sha256=DAhlor-zWSROmsQCFWDsx_IJSE62zlgJ3sE4quxhEPw,10
|
19
|
+
pvleopard-2.0.1.dist-info/RECORD,,
|
pvleopard-1.2.2.dist-info/RECORD
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
pvleopard/LICENSE,sha256=ZurJwSSRHw99lGaJP88vQREqtZmIABuVKd_rK7k7U70,11344
|
2
|
-
pvleopard/__init__.py,sha256=OCI_7jgscvkzJMwT02SfB85_xOoQC8eb1e52HR_GKgo,577
|
3
|
-
pvleopard/_factory.py,sha256=WzrCwUlU_TCXkzZKPFd0ag4sAdiuwRlpOM6HYl1M-t0,1746
|
4
|
-
pvleopard/_leopard.py,sha256=qk9kf7aRETnbEyBOrZ7sg_ESgKm3NEow_6nnQjjl-ms,10366
|
5
|
-
pvleopard/_util.py,sha256=TQ3fXjUNUDerUbXdC6ntjMXaYD_B5udi__xp41zL9lE,3113
|
6
|
-
pvleopard/lib/common/leopard_params.pv,sha256=CBoaZ8kach9nVD-I-SJakBvbVi2EfxpiumH9Bk_DKCU,19850729
|
7
|
-
pvleopard/lib/jetson/cortex-a57-aarch64/libpv_leopard.so,sha256=W9ikHNNpZVIM4lkHABfGJa2WVUK-U5XwZrUz0mUr0es,1290872
|
8
|
-
pvleopard/lib/linux/x86_64/libpv_leopard.so,sha256=peVOWhgqeC09UrYSKnduzvkKkm7WZ0-jWbiiTs770MM,1397912
|
9
|
-
pvleopard/lib/mac/arm64/libpv_leopard.dylib,sha256=KYM2pXObVPfsitBIl4gvap2IyrDhMpCpNI6k23F_YlE,1513536
|
10
|
-
pvleopard/lib/mac/x86_64/libpv_leopard.dylib,sha256=0ImW2M-JTEeAEZ9bfsLV9B0-FClfY1y6EnLR2tFCvs8,1666456
|
11
|
-
pvleopard/lib/raspberry-pi/cortex-a53/libpv_leopard.so,sha256=YtVPTq2oVm9DLcK7ruo5ZJkqqe33-OGM1xq5l-v4P0E,1263028
|
12
|
-
pvleopard/lib/raspberry-pi/cortex-a53-aarch64/libpv_leopard.so,sha256=w6kYHEuaAZ2tm-aHA_-xkmv-FgrmBFOhYtIDxPXb23c,1303240
|
13
|
-
pvleopard/lib/raspberry-pi/cortex-a72/libpv_leopard.so,sha256=LCdTw1mJYY1CjNzXimay8P3kd9LMvUBZap38e8j9Wvw,1275316
|
14
|
-
pvleopard/lib/raspberry-pi/cortex-a72-aarch64/libpv_leopard.so,sha256=rtpK3dyk8Kw6Ls4NxwkR4QQiwVo5mbWFVKHgkiITTOo,1295048
|
15
|
-
pvleopard/lib/windows/amd64/libpv_leopard.dll,sha256=P4A37VaWU-bMVwoqvuxtKAZfg_Y2Li33mMwf5QjQ3eo,1505792
|
16
|
-
pvleopard-1.2.2.dist-info/METADATA,sha256=0gk8fFJY2AbMv7pT-dWdD5YoXlZX-zT-4OhI4jhf2j8,3102
|
17
|
-
pvleopard-1.2.2.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
|
18
|
-
pvleopard-1.2.2.dist-info/top_level.txt,sha256=DAhlor-zWSROmsQCFWDsx_IJSE62zlgJ3sE4quxhEPw,10
|
19
|
-
pvleopard-1.2.2.dist-info/RECORD,,
|
File without changes
|