pvleopard 1.2.2__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pvleopard/_factory.py +7 -2
- pvleopard/_leopard.py +78 -16
- pvleopard/lib/common/leopard_params.pv +0 -0
- pvleopard/lib/jetson/cortex-a57-aarch64/libpv_leopard.so +0 -0
- pvleopard/lib/linux/x86_64/libpv_leopard.so +0 -0
- pvleopard/lib/mac/arm64/libpv_leopard.dylib +0 -0
- pvleopard/lib/mac/x86_64/libpv_leopard.dylib +0 -0
- pvleopard/lib/raspberry-pi/cortex-a53/libpv_leopard.so +0 -0
- pvleopard/lib/raspberry-pi/cortex-a53-aarch64/libpv_leopard.so +0 -0
- pvleopard/lib/raspberry-pi/cortex-a72/libpv_leopard.so +0 -0
- pvleopard/lib/raspberry-pi/cortex-a72-aarch64/libpv_leopard.so +0 -0
- pvleopard/lib/windows/amd64/libpv_leopard.dll +0 -0
- {pvleopard-1.2.2.dist-info → pvleopard-2.0.1.dist-info}/METADATA +16 -10
- pvleopard-2.0.1.dist-info/RECORD +19 -0
- {pvleopard-1.2.2.dist-info → pvleopard-2.0.1.dist-info}/WHEEL +1 -1
- pvleopard-1.2.2.dist-info/RECORD +0 -19
- {pvleopard-1.2.2.dist-info → pvleopard-2.0.1.dist-info}/top_level.txt +0 -0
pvleopard/_factory.py
CHANGED
@@ -19,7 +19,8 @@ def create(
|
|
19
19
|
access_key: str,
|
20
20
|
model_path: Optional[str] = None,
|
21
21
|
library_path: Optional[str] = None,
|
22
|
-
enable_automatic_punctuation: bool = False
|
22
|
+
enable_automatic_punctuation: bool = False,
|
23
|
+
enable_diarization: bool = False) -> Leopard:
|
23
24
|
"""
|
24
25
|
Factory method for Leopard speech-to-text engine.
|
25
26
|
|
@@ -28,6 +29,9 @@ def create(
|
|
28
29
|
:param model_path: Absolute path to the file containing model parameters. If not set it will be set to the default
|
29
30
|
location.
|
30
31
|
:param enable_automatic_punctuation Set to `True` to enable automatic punctuation insertion.
|
32
|
+
:param enable_diarization Set to `true` to enable speaker diarization, which allows Leopard to differentiate
|
33
|
+
speakers as part of the transcription process. Word metadata will include a `speaker_tag` to
|
34
|
+
identify unique speakers.
|
31
35
|
:return: An instance of Leopard speech-to-text engine.
|
32
36
|
"""
|
33
37
|
|
@@ -41,7 +45,8 @@ def create(
|
|
41
45
|
access_key=access_key,
|
42
46
|
model_path=model_path,
|
43
47
|
library_path=library_path,
|
44
|
-
enable_automatic_punctuation=enable_automatic_punctuation
|
48
|
+
enable_automatic_punctuation=enable_automatic_punctuation,
|
49
|
+
enable_diarization=enable_diarization)
|
45
50
|
|
46
51
|
|
47
52
|
__all__ = [
|
pvleopard/_leopard.py
CHANGED
@@ -18,7 +18,27 @@ from typing import *
|
|
18
18
|
|
19
19
|
|
20
20
|
class LeopardError(Exception):
|
21
|
-
|
21
|
+
def __init__(self, message: str = '', message_stack: Sequence[str] = None):
|
22
|
+
super().__init__(message)
|
23
|
+
|
24
|
+
self._message = message
|
25
|
+
self._message_stack = list() if message_stack is None else message_stack
|
26
|
+
|
27
|
+
def __str__(self):
|
28
|
+
message = self._message
|
29
|
+
if len(self._message_stack) > 0:
|
30
|
+
message += ':'
|
31
|
+
for i in range(len(self._message_stack)):
|
32
|
+
message += '\n [%d] %s' % (i, self._message_stack[i])
|
33
|
+
return message
|
34
|
+
|
35
|
+
@property
|
36
|
+
def message(self) -> str:
|
37
|
+
return self._message
|
38
|
+
|
39
|
+
@property
|
40
|
+
def message_stack(self) -> Sequence[str]:
|
41
|
+
return self._message_stack
|
22
42
|
|
23
43
|
|
24
44
|
class LeopardMemoryError(LeopardError):
|
@@ -119,14 +139,16 @@ class Leopard(object):
|
|
119
139
|
("word", c_char_p),
|
120
140
|
("start_sec", c_float),
|
121
141
|
("end_sec", c_float),
|
122
|
-
("confidence", c_float)
|
142
|
+
("confidence", c_float),
|
143
|
+
("speaker_tag", c_int32)]
|
123
144
|
|
124
145
|
def __init__(
|
125
146
|
self,
|
126
147
|
access_key: str,
|
127
148
|
model_path: str,
|
128
149
|
library_path: str,
|
129
|
-
enable_automatic_punctuation: bool = False
|
150
|
+
enable_automatic_punctuation: bool = False,
|
151
|
+
enable_diarization: bool = False) -> None:
|
130
152
|
"""
|
131
153
|
Constructor.
|
132
154
|
|
@@ -134,6 +156,9 @@ class Leopard(object):
|
|
134
156
|
:param model_path: Absolute path to the file containing model parameters.
|
135
157
|
:param library_path: Absolute path to Leopard's dynamic library.
|
136
158
|
:param enable_automatic_punctuation Set to `True` to enable automatic punctuation insertion.
|
159
|
+
:param enable_diarization Set to `true` to enable speaker diarization, which allows Leopard to differentiate
|
160
|
+
speakers as part of the transcription process. Word metadata will include a `speaker_tag` to
|
161
|
+
identify unique speakers.
|
137
162
|
"""
|
138
163
|
|
139
164
|
if not isinstance(access_key, str) or len(access_key) == 0:
|
@@ -147,15 +172,36 @@ class Leopard(object):
|
|
147
172
|
|
148
173
|
library = cdll.LoadLibrary(library_path)
|
149
174
|
|
175
|
+
set_sdk_func = library.pv_set_sdk
|
176
|
+
set_sdk_func.argtypes = [c_char_p]
|
177
|
+
set_sdk_func.restype = None
|
178
|
+
|
179
|
+
set_sdk_func('python'.encode('utf-8'))
|
180
|
+
|
181
|
+
self._get_error_stack_func = library.pv_get_error_stack
|
182
|
+
self._get_error_stack_func.argtypes = [POINTER(POINTER(c_char_p)), POINTER(c_int)]
|
183
|
+
self._get_error_stack_func.restype = self.PicovoiceStatuses
|
184
|
+
|
185
|
+
self._free_error_stack_func = library.pv_free_error_stack
|
186
|
+
self._free_error_stack_func.argtypes = [POINTER(c_char_p)]
|
187
|
+
self._free_error_stack_func.restype = None
|
188
|
+
|
150
189
|
init_func = library.pv_leopard_init
|
151
|
-
init_func.argtypes = [c_char_p, c_char_p, c_bool, POINTER(POINTER(self.CLeopard))]
|
190
|
+
init_func.argtypes = [c_char_p, c_char_p, c_bool, c_bool, POINTER(POINTER(self.CLeopard))]
|
152
191
|
init_func.restype = self.PicovoiceStatuses
|
153
192
|
|
154
193
|
self._handle = POINTER(self.CLeopard)()
|
155
194
|
|
156
|
-
status = init_func(
|
195
|
+
status = init_func(
|
196
|
+
access_key.encode(),
|
197
|
+
model_path.encode(),
|
198
|
+
enable_automatic_punctuation,
|
199
|
+
enable_diarization,
|
200
|
+
byref(self._handle))
|
157
201
|
if status is not self.PicovoiceStatuses.SUCCESS:
|
158
|
-
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status](
|
202
|
+
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status](
|
203
|
+
message='Initialization failed',
|
204
|
+
message_stack=self._get_error_stack())
|
159
205
|
|
160
206
|
self._delete_func = library.pv_leopard_delete
|
161
207
|
self._delete_func.argtypes = [POINTER(self.CLeopard)]
|
@@ -201,7 +247,7 @@ class Leopard(object):
|
|
201
247
|
]
|
202
248
|
self._words_delete_func.restype = None
|
203
249
|
|
204
|
-
Word = namedtuple('Word', ['word', 'start_sec', 'end_sec', 'confidence'])
|
250
|
+
Word = namedtuple('Word', ['word', 'start_sec', 'end_sec', 'confidence', 'speaker_tag'])
|
205
251
|
|
206
252
|
def process(self, pcm: Sequence[int]) -> Tuple[str, Sequence[Word]]:
|
207
253
|
"""
|
@@ -227,7 +273,9 @@ class Leopard(object):
|
|
227
273
|
byref(num_words),
|
228
274
|
byref(c_words))
|
229
275
|
if status is not self.PicovoiceStatuses.SUCCESS:
|
230
|
-
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status](
|
276
|
+
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status](
|
277
|
+
message='Process failed',
|
278
|
+
message_stack=self._get_error_stack())
|
231
279
|
|
232
280
|
transcript = c_transcript.value.decode('utf-8')
|
233
281
|
self._transcript_delete_func(c_transcript)
|
@@ -238,7 +286,8 @@ class Leopard(object):
|
|
238
286
|
word=c_words[i].word.decode('utf-8'),
|
239
287
|
start_sec=c_words[i].start_sec,
|
240
288
|
end_sec=c_words[i].end_sec,
|
241
|
-
confidence=c_words[i].confidence
|
289
|
+
confidence=c_words[i].confidence,
|
290
|
+
speaker_tag=c_words[i].speaker_tag)
|
242
291
|
words.append(word)
|
243
292
|
|
244
293
|
self._words_delete_func(c_words)
|
@@ -267,12 +316,9 @@ class Leopard(object):
|
|
267
316
|
byref(num_words),
|
268
317
|
byref(c_words))
|
269
318
|
if status is not self.PicovoiceStatuses.SUCCESS:
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
"Specified file with extension '%s' is not supported" % pathlib.Path(audio_path).suffix
|
274
|
-
)
|
275
|
-
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]()
|
319
|
+
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status](
|
320
|
+
message='Process file failed',
|
321
|
+
message_stack=self._get_error_stack())
|
276
322
|
|
277
323
|
transcript = c_transcript.value.decode('utf-8')
|
278
324
|
self._transcript_delete_func(c_transcript)
|
@@ -283,7 +329,8 @@ class Leopard(object):
|
|
283
329
|
word=c_words[i].word.decode('utf-8'),
|
284
330
|
start_sec=c_words[i].start_sec,
|
285
331
|
end_sec=c_words[i].end_sec,
|
286
|
-
confidence=c_words[i].confidence
|
332
|
+
confidence=c_words[i].confidence,
|
333
|
+
speaker_tag=c_words[i].speaker_tag)
|
287
334
|
words.append(word)
|
288
335
|
|
289
336
|
self._words_delete_func(c_words)
|
@@ -307,6 +354,21 @@ class Leopard(object):
|
|
307
354
|
|
308
355
|
return self._sample_rate
|
309
356
|
|
357
|
+
def _get_error_stack(self) -> Sequence[str]:
|
358
|
+
message_stack_ref = POINTER(c_char_p)()
|
359
|
+
message_stack_depth = c_int()
|
360
|
+
status = self._get_error_stack_func(byref(message_stack_ref), byref(message_stack_depth))
|
361
|
+
if status is not self.PicovoiceStatuses.SUCCESS:
|
362
|
+
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status](message='Unable to get Leopard error state')
|
363
|
+
|
364
|
+
message_stack = list()
|
365
|
+
for i in range(message_stack_depth.value):
|
366
|
+
message_stack.append(message_stack_ref[i].decode('utf-8'))
|
367
|
+
|
368
|
+
self._free_error_stack_func(message_stack_ref)
|
369
|
+
|
370
|
+
return message_stack
|
371
|
+
|
310
372
|
|
311
373
|
__all__ = [
|
312
374
|
'Leopard',
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: pvleopard
|
3
|
-
Version:
|
3
|
+
Version: 2.0.1
|
4
4
|
Summary: Leopard Speech-to-Text Engine.
|
5
5
|
Home-page: https://github.com/Picovoice/leopard
|
6
6
|
Author: Picovoice
|
@@ -17,13 +17,15 @@ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
|
|
17
17
|
Requires-Python: >=3.5
|
18
18
|
Description-Content-Type: text/markdown
|
19
19
|
|
20
|
-
# Leopard
|
20
|
+
# Leopard Binding for Python
|
21
|
+
|
22
|
+
## Leopard Speech-to-Text Engine
|
21
23
|
|
22
24
|
Made in Vancouver, Canada by [Picovoice](https://picovoice.ai)
|
23
25
|
|
24
26
|
Leopard is an on-device speech-to-text engine. Leopard is:
|
25
27
|
|
26
|
-
- Private; All voice processing runs locally.
|
28
|
+
- Private; All voice processing runs locally.
|
27
29
|
- [Accurate](https://picovoice.ai/docs/benchmark/stt/)
|
28
30
|
- [Compact and Computationally-Efficient](https://github.com/Picovoice/speech-to-text-benchmark#rtf)
|
29
31
|
- Cross-Platform:
|
@@ -56,9 +58,9 @@ Create an instance of the engine and transcribe an audio file:
|
|
56
58
|
```python
|
57
59
|
import pvleopard
|
58
60
|
|
59
|
-
|
61
|
+
leopard = pvleopard.create(access_key='${ACCESS_KEY}')
|
60
62
|
|
61
|
-
transcript, words =
|
63
|
+
transcript, words = leopard.process_file('${AUDIO_PATH}')
|
62
64
|
print(transcript)
|
63
65
|
for word in words:
|
64
66
|
print(
|
@@ -67,20 +69,24 @@ for word in words:
|
|
67
69
|
```
|
68
70
|
|
69
71
|
Replace `${ACCESS_KEY}` with yours obtained from [Picovoice Console](https://console.picovoice.ai/) and
|
70
|
-
`${AUDIO_PATH}` to the path an audio file.
|
71
|
-
|
72
|
+
`${AUDIO_PATH}` to the path an audio file.
|
73
|
+
|
74
|
+
Finally, when done be sure to explicitly release the resources:
|
75
|
+
```python
|
76
|
+
leopard.delete()
|
77
|
+
```
|
72
78
|
|
73
79
|
## Language Model
|
74
80
|
|
75
|
-
The Leopard Python SDK comes preloaded with a default English language model (`.pv` file).
|
76
|
-
Default models for other supported languages can be found in [lib/common](../../lib/common).
|
81
|
+
The Leopard Python SDK comes preloaded with a default English language model (`.pv` file).
|
82
|
+
Default models for other supported languages can be found in [lib/common](../../lib/common).
|
77
83
|
|
78
84
|
Create custom language models using the [Picovoice Console](https://console.picovoice.ai/). Here you can train
|
79
85
|
language models with custom vocabulary and boost words in the existing vocabulary.
|
80
86
|
|
81
87
|
Pass in the `.pv` file via the `model_path` argument:
|
82
88
|
```python
|
83
|
-
|
89
|
+
leopard = pvleopard.create(
|
84
90
|
access_key='${ACCESS_KEY}',
|
85
91
|
model_path='${MODEL_PATH}')
|
86
92
|
```
|
@@ -0,0 +1,19 @@
|
|
1
|
+
pvleopard/LICENSE,sha256=ZurJwSSRHw99lGaJP88vQREqtZmIABuVKd_rK7k7U70,11344
|
2
|
+
pvleopard/__init__.py,sha256=OCI_7jgscvkzJMwT02SfB85_xOoQC8eb1e52HR_GKgo,577
|
3
|
+
pvleopard/_factory.py,sha256=1ZotcV9xdcJLRAYajFwFHfB0tV3_9bSi15_cjhedMnM,2075
|
4
|
+
pvleopard/_leopard.py,sha256=U6eNGzgQfpKaQxnDaOKHBDi2Kj_ysEHhF3SdVojbmhE,12741
|
5
|
+
pvleopard/_util.py,sha256=TQ3fXjUNUDerUbXdC6ntjMXaYD_B5udi__xp41zL9lE,3113
|
6
|
+
pvleopard/lib/common/leopard_params.pv,sha256=8jsCUfmOuuTD2MXLduV17hD1vHXuO-wscyJobYCsbfs,37699432
|
7
|
+
pvleopard/lib/jetson/cortex-a57-aarch64/libpv_leopard.so,sha256=y431wzz9NuRsOXnhk7ciZ9s6Lc3v9lr5mWdbkqC1bXE,1300032
|
8
|
+
pvleopard/lib/linux/x86_64/libpv_leopard.so,sha256=UhcGHHjCnR1Y-AMHVgVIbRoR_yjgRlw7SpcV86JwkX8,1415360
|
9
|
+
pvleopard/lib/mac/arm64/libpv_leopard.dylib,sha256=o4WF9nzDvF5K9_AKppz-avXsk-wApmRVKQuUArbvxSc,1545072
|
10
|
+
pvleopard/lib/mac/x86_64/libpv_leopard.dylib,sha256=Qwnh-0EI-VSWTjbSXnfmkxmYU8Z1ow-fFkCUQ2K9slc,1697720
|
11
|
+
pvleopard/lib/raspberry-pi/cortex-a53/libpv_leopard.so,sha256=DEnIu2Jpe_w9sviYNCLvgC1yqROLdt6A8b4r44d6eiQ,1275760
|
12
|
+
pvleopard/lib/raspberry-pi/cortex-a53-aarch64/libpv_leopard.so,sha256=FBqYYJbmtOu4AAD532__IfuJo6O9MnkP933CYK4pF3g,1312320
|
13
|
+
pvleopard/lib/raspberry-pi/cortex-a72/libpv_leopard.so,sha256=ALyBJ2I5B_nKQ2Vr7cTrPiFSnxIxsTPhOhcIWp3SOOg,1288048
|
14
|
+
pvleopard/lib/raspberry-pi/cortex-a72-aarch64/libpv_leopard.so,sha256=Kn5Ricawg62MGrzL6ZS-bcPOarlSU0PcwSQ-WG7k-Uo,1300032
|
15
|
+
pvleopard/lib/windows/amd64/libpv_leopard.dll,sha256=hM6eYFgcQ6p8ZIe8BFw0Yxm3enA2rPSRBCysVzFC8kQ,1524736
|
16
|
+
pvleopard-2.0.1.dist-info/METADATA,sha256=hloxBn0p8TABCghj9eSzSBIZxNNK-q1G_MyHR5yoOaY,3141
|
17
|
+
pvleopard-2.0.1.dist-info/WHEEL,sha256=g4nMs7d-Xl9-xC9XovUrsDHGXt-FT0E17Yqo92DEfvY,92
|
18
|
+
pvleopard-2.0.1.dist-info/top_level.txt,sha256=DAhlor-zWSROmsQCFWDsx_IJSE62zlgJ3sE4quxhEPw,10
|
19
|
+
pvleopard-2.0.1.dist-info/RECORD,,
|
pvleopard-1.2.2.dist-info/RECORD
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
pvleopard/LICENSE,sha256=ZurJwSSRHw99lGaJP88vQREqtZmIABuVKd_rK7k7U70,11344
|
2
|
-
pvleopard/__init__.py,sha256=OCI_7jgscvkzJMwT02SfB85_xOoQC8eb1e52HR_GKgo,577
|
3
|
-
pvleopard/_factory.py,sha256=WzrCwUlU_TCXkzZKPFd0ag4sAdiuwRlpOM6HYl1M-t0,1746
|
4
|
-
pvleopard/_leopard.py,sha256=qk9kf7aRETnbEyBOrZ7sg_ESgKm3NEow_6nnQjjl-ms,10366
|
5
|
-
pvleopard/_util.py,sha256=TQ3fXjUNUDerUbXdC6ntjMXaYD_B5udi__xp41zL9lE,3113
|
6
|
-
pvleopard/lib/common/leopard_params.pv,sha256=CBoaZ8kach9nVD-I-SJakBvbVi2EfxpiumH9Bk_DKCU,19850729
|
7
|
-
pvleopard/lib/jetson/cortex-a57-aarch64/libpv_leopard.so,sha256=W9ikHNNpZVIM4lkHABfGJa2WVUK-U5XwZrUz0mUr0es,1290872
|
8
|
-
pvleopard/lib/linux/x86_64/libpv_leopard.so,sha256=peVOWhgqeC09UrYSKnduzvkKkm7WZ0-jWbiiTs770MM,1397912
|
9
|
-
pvleopard/lib/mac/arm64/libpv_leopard.dylib,sha256=KYM2pXObVPfsitBIl4gvap2IyrDhMpCpNI6k23F_YlE,1513536
|
10
|
-
pvleopard/lib/mac/x86_64/libpv_leopard.dylib,sha256=0ImW2M-JTEeAEZ9bfsLV9B0-FClfY1y6EnLR2tFCvs8,1666456
|
11
|
-
pvleopard/lib/raspberry-pi/cortex-a53/libpv_leopard.so,sha256=YtVPTq2oVm9DLcK7ruo5ZJkqqe33-OGM1xq5l-v4P0E,1263028
|
12
|
-
pvleopard/lib/raspberry-pi/cortex-a53-aarch64/libpv_leopard.so,sha256=w6kYHEuaAZ2tm-aHA_-xkmv-FgrmBFOhYtIDxPXb23c,1303240
|
13
|
-
pvleopard/lib/raspberry-pi/cortex-a72/libpv_leopard.so,sha256=LCdTw1mJYY1CjNzXimay8P3kd9LMvUBZap38e8j9Wvw,1275316
|
14
|
-
pvleopard/lib/raspberry-pi/cortex-a72-aarch64/libpv_leopard.so,sha256=rtpK3dyk8Kw6Ls4NxwkR4QQiwVo5mbWFVKHgkiITTOo,1295048
|
15
|
-
pvleopard/lib/windows/amd64/libpv_leopard.dll,sha256=P4A37VaWU-bMVwoqvuxtKAZfg_Y2Li33mMwf5QjQ3eo,1505792
|
16
|
-
pvleopard-1.2.2.dist-info/METADATA,sha256=0gk8fFJY2AbMv7pT-dWdD5YoXlZX-zT-4OhI4jhf2j8,3102
|
17
|
-
pvleopard-1.2.2.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
|
18
|
-
pvleopard-1.2.2.dist-info/top_level.txt,sha256=DAhlor-zWSROmsQCFWDsx_IJSE62zlgJ3sE4quxhEPw,10
|
19
|
-
pvleopard-1.2.2.dist-info/RECORD,,
|
File without changes
|