pvleopard 1.2.2__py3-none-any.whl → 2.0.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
pvleopard/_factory.py CHANGED
@@ -19,7 +19,8 @@ def create(
19
19
  access_key: str,
20
20
  model_path: Optional[str] = None,
21
21
  library_path: Optional[str] = None,
22
- enable_automatic_punctuation: bool = False) -> Leopard:
22
+ enable_automatic_punctuation: bool = False,
23
+ enable_diarization: bool = False) -> Leopard:
23
24
  """
24
25
  Factory method for Leopard speech-to-text engine.
25
26
 
@@ -28,6 +29,9 @@ def create(
28
29
  :param model_path: Absolute path to the file containing model parameters. If not set it will be set to the default
29
30
  location.
30
31
  :param enable_automatic_punctuation Set to `True` to enable automatic punctuation insertion.
32
+ :param enable_diarization Set to `true` to enable speaker diarization, which allows Leopard to differentiate
33
+ speakers as part of the transcription process. Word metadata will include a `speaker_tag` to
34
+ identify unique speakers.
31
35
  :return: An instance of Leopard speech-to-text engine.
32
36
  """
33
37
 
@@ -41,7 +45,8 @@ def create(
41
45
  access_key=access_key,
42
46
  model_path=model_path,
43
47
  library_path=library_path,
44
- enable_automatic_punctuation=enable_automatic_punctuation)
48
+ enable_automatic_punctuation=enable_automatic_punctuation,
49
+ enable_diarization=enable_diarization)
45
50
 
46
51
 
47
52
  __all__ = [
pvleopard/_leopard.py CHANGED
@@ -18,7 +18,27 @@ from typing import *
18
18
 
19
19
 
20
20
  class LeopardError(Exception):
21
- pass
21
+ def __init__(self, message: str = '', message_stack: Sequence[str] = None):
22
+ super().__init__(message)
23
+
24
+ self._message = message
25
+ self._message_stack = list() if message_stack is None else message_stack
26
+
27
+ def __str__(self):
28
+ message = self._message
29
+ if len(self._message_stack) > 0:
30
+ message += ':'
31
+ for i in range(len(self._message_stack)):
32
+ message += '\n [%d] %s' % (i, self._message_stack[i])
33
+ return message
34
+
35
+ @property
36
+ def message(self) -> str:
37
+ return self._message
38
+
39
+ @property
40
+ def message_stack(self) -> Sequence[str]:
41
+ return self._message_stack
22
42
 
23
43
 
24
44
  class LeopardMemoryError(LeopardError):
@@ -119,14 +139,16 @@ class Leopard(object):
119
139
  ("word", c_char_p),
120
140
  ("start_sec", c_float),
121
141
  ("end_sec", c_float),
122
- ("confidence", c_float)]
142
+ ("confidence", c_float),
143
+ ("speaker_tag", c_int32)]
123
144
 
124
145
  def __init__(
125
146
  self,
126
147
  access_key: str,
127
148
  model_path: str,
128
149
  library_path: str,
129
- enable_automatic_punctuation: bool = False) -> None:
150
+ enable_automatic_punctuation: bool = False,
151
+ enable_diarization: bool = False) -> None:
130
152
  """
131
153
  Constructor.
132
154
 
@@ -134,6 +156,9 @@ class Leopard(object):
134
156
  :param model_path: Absolute path to the file containing model parameters.
135
157
  :param library_path: Absolute path to Leopard's dynamic library.
136
158
  :param enable_automatic_punctuation Set to `True` to enable automatic punctuation insertion.
159
+ :param enable_diarization Set to `true` to enable speaker diarization, which allows Leopard to differentiate
160
+ speakers as part of the transcription process. Word metadata will include a `speaker_tag` to
161
+ identify unique speakers.
137
162
  """
138
163
 
139
164
  if not isinstance(access_key, str) or len(access_key) == 0:
@@ -147,15 +172,36 @@ class Leopard(object):
147
172
 
148
173
  library = cdll.LoadLibrary(library_path)
149
174
 
175
+ set_sdk_func = library.pv_set_sdk
176
+ set_sdk_func.argtypes = [c_char_p]
177
+ set_sdk_func.restype = None
178
+
179
+ set_sdk_func('python'.encode('utf-8'))
180
+
181
+ self._get_error_stack_func = library.pv_get_error_stack
182
+ self._get_error_stack_func.argtypes = [POINTER(POINTER(c_char_p)), POINTER(c_int)]
183
+ self._get_error_stack_func.restype = self.PicovoiceStatuses
184
+
185
+ self._free_error_stack_func = library.pv_free_error_stack
186
+ self._free_error_stack_func.argtypes = [POINTER(c_char_p)]
187
+ self._free_error_stack_func.restype = None
188
+
150
189
  init_func = library.pv_leopard_init
151
- init_func.argtypes = [c_char_p, c_char_p, c_bool, POINTER(POINTER(self.CLeopard))]
190
+ init_func.argtypes = [c_char_p, c_char_p, c_bool, c_bool, POINTER(POINTER(self.CLeopard))]
152
191
  init_func.restype = self.PicovoiceStatuses
153
192
 
154
193
  self._handle = POINTER(self.CLeopard)()
155
194
 
156
- status = init_func(access_key.encode(), model_path.encode(), enable_automatic_punctuation, byref(self._handle))
195
+ status = init_func(
196
+ access_key.encode(),
197
+ model_path.encode(),
198
+ enable_automatic_punctuation,
199
+ enable_diarization,
200
+ byref(self._handle))
157
201
  if status is not self.PicovoiceStatuses.SUCCESS:
158
- raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]()
202
+ raise self._PICOVOICE_STATUS_TO_EXCEPTION[status](
203
+ message='Initialization failed',
204
+ message_stack=self._get_error_stack())
159
205
 
160
206
  self._delete_func = library.pv_leopard_delete
161
207
  self._delete_func.argtypes = [POINTER(self.CLeopard)]
@@ -201,7 +247,7 @@ class Leopard(object):
201
247
  ]
202
248
  self._words_delete_func.restype = None
203
249
 
204
- Word = namedtuple('Word', ['word', 'start_sec', 'end_sec', 'confidence'])
250
+ Word = namedtuple('Word', ['word', 'start_sec', 'end_sec', 'confidence', 'speaker_tag'])
205
251
 
206
252
  def process(self, pcm: Sequence[int]) -> Tuple[str, Sequence[Word]]:
207
253
  """
@@ -227,7 +273,9 @@ class Leopard(object):
227
273
  byref(num_words),
228
274
  byref(c_words))
229
275
  if status is not self.PicovoiceStatuses.SUCCESS:
230
- raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]()
276
+ raise self._PICOVOICE_STATUS_TO_EXCEPTION[status](
277
+ message='Process failed',
278
+ message_stack=self._get_error_stack())
231
279
 
232
280
  transcript = c_transcript.value.decode('utf-8')
233
281
  self._transcript_delete_func(c_transcript)
@@ -238,7 +286,8 @@ class Leopard(object):
238
286
  word=c_words[i].word.decode('utf-8'),
239
287
  start_sec=c_words[i].start_sec,
240
288
  end_sec=c_words[i].end_sec,
241
- confidence=c_words[i].confidence)
289
+ confidence=c_words[i].confidence,
290
+ speaker_tag=c_words[i].speaker_tag)
242
291
  words.append(word)
243
292
 
244
293
  self._words_delete_func(c_words)
@@ -267,12 +316,9 @@ class Leopard(object):
267
316
  byref(num_words),
268
317
  byref(c_words))
269
318
  if status is not self.PicovoiceStatuses.SUCCESS:
270
- if status is self.PicovoiceStatuses.INVALID_ARGUMENT:
271
- if not audio_path.lower().endswith(self._VALID_EXTENSIONS):
272
- raise self._PICOVOICE_STATUS_TO_EXCEPTION[status](
273
- "Specified file with extension '%s' is not supported" % pathlib.Path(audio_path).suffix
274
- )
275
- raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]()
319
+ raise self._PICOVOICE_STATUS_TO_EXCEPTION[status](
320
+ message='Process file failed',
321
+ message_stack=self._get_error_stack())
276
322
 
277
323
  transcript = c_transcript.value.decode('utf-8')
278
324
  self._transcript_delete_func(c_transcript)
@@ -283,7 +329,8 @@ class Leopard(object):
283
329
  word=c_words[i].word.decode('utf-8'),
284
330
  start_sec=c_words[i].start_sec,
285
331
  end_sec=c_words[i].end_sec,
286
- confidence=c_words[i].confidence)
332
+ confidence=c_words[i].confidence,
333
+ speaker_tag=c_words[i].speaker_tag)
287
334
  words.append(word)
288
335
 
289
336
  self._words_delete_func(c_words)
@@ -307,6 +354,21 @@ class Leopard(object):
307
354
 
308
355
  return self._sample_rate
309
356
 
357
+ def _get_error_stack(self) -> Sequence[str]:
358
+ message_stack_ref = POINTER(c_char_p)()
359
+ message_stack_depth = c_int()
360
+ status = self._get_error_stack_func(byref(message_stack_ref), byref(message_stack_depth))
361
+ if status is not self.PicovoiceStatuses.SUCCESS:
362
+ raise self._PICOVOICE_STATUS_TO_EXCEPTION[status](message='Unable to get Leopard error state')
363
+
364
+ message_stack = list()
365
+ for i in range(message_stack_depth.value):
366
+ message_stack.append(message_stack_ref[i].decode('utf-8'))
367
+
368
+ self._free_error_stack_func(message_stack_ref)
369
+
370
+ return message_stack
371
+
310
372
 
311
373
  __all__ = [
312
374
  'Leopard',
Binary file
Binary file
Binary file
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pvleopard
3
- Version: 1.2.2
3
+ Version: 2.0.1
4
4
  Summary: Leopard Speech-to-Text Engine.
5
5
  Home-page: https://github.com/Picovoice/leopard
6
6
  Author: Picovoice
@@ -17,13 +17,15 @@ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
17
17
  Requires-Python: >=3.5
18
18
  Description-Content-Type: text/markdown
19
19
 
20
- # Leopard Speech-to-Text Engine
20
+ # Leopard Binding for Python
21
+
22
+ ## Leopard Speech-to-Text Engine
21
23
 
22
24
  Made in Vancouver, Canada by [Picovoice](https://picovoice.ai)
23
25
 
24
26
  Leopard is an on-device speech-to-text engine. Leopard is:
25
27
 
26
- - Private; All voice processing runs locally.
28
+ - Private; All voice processing runs locally.
27
29
  - [Accurate](https://picovoice.ai/docs/benchmark/stt/)
28
30
  - [Compact and Computationally-Efficient](https://github.com/Picovoice/speech-to-text-benchmark#rtf)
29
31
  - Cross-Platform:
@@ -56,9 +58,9 @@ Create an instance of the engine and transcribe an audio file:
56
58
  ```python
57
59
  import pvleopard
58
60
 
59
- handle = pvleopard.create(access_key='${ACCESS_KEY}')
61
+ leopard = pvleopard.create(access_key='${ACCESS_KEY}')
60
62
 
61
- transcript, words = handle.process_file('${AUDIO_PATH}')
63
+ transcript, words = leopard.process_file('${AUDIO_PATH}')
62
64
  print(transcript)
63
65
  for word in words:
64
66
  print(
@@ -67,20 +69,24 @@ for word in words:
67
69
  ```
68
70
 
69
71
  Replace `${ACCESS_KEY}` with yours obtained from [Picovoice Console](https://console.picovoice.ai/) and
70
- `${AUDIO_PATH}` to the path an audio file. Finally, when done be sure to explicitly release the resources using
71
- `handle.delete()`.
72
+ `${AUDIO_PATH}` to the path an audio file.
73
+
74
+ Finally, when done be sure to explicitly release the resources:
75
+ ```python
76
+ leopard.delete()
77
+ ```
72
78
 
73
79
  ## Language Model
74
80
 
75
- The Leopard Python SDK comes preloaded with a default English language model (`.pv` file).
76
- Default models for other supported languages can be found in [lib/common](../../lib/common).
81
+ The Leopard Python SDK comes preloaded with a default English language model (`.pv` file).
82
+ Default models for other supported languages can be found in [lib/common](../../lib/common).
77
83
 
78
84
  Create custom language models using the [Picovoice Console](https://console.picovoice.ai/). Here you can train
79
85
  language models with custom vocabulary and boost words in the existing vocabulary.
80
86
 
81
87
  Pass in the `.pv` file via the `model_path` argument:
82
88
  ```python
83
- handle = pvleopard.create(
89
+ leopard = pvleopard.create(
84
90
  access_key='${ACCESS_KEY}',
85
91
  model_path='${MODEL_PATH}')
86
92
  ```
@@ -0,0 +1,19 @@
1
+ pvleopard/LICENSE,sha256=ZurJwSSRHw99lGaJP88vQREqtZmIABuVKd_rK7k7U70,11344
2
+ pvleopard/__init__.py,sha256=OCI_7jgscvkzJMwT02SfB85_xOoQC8eb1e52HR_GKgo,577
3
+ pvleopard/_factory.py,sha256=1ZotcV9xdcJLRAYajFwFHfB0tV3_9bSi15_cjhedMnM,2075
4
+ pvleopard/_leopard.py,sha256=U6eNGzgQfpKaQxnDaOKHBDi2Kj_ysEHhF3SdVojbmhE,12741
5
+ pvleopard/_util.py,sha256=TQ3fXjUNUDerUbXdC6ntjMXaYD_B5udi__xp41zL9lE,3113
6
+ pvleopard/lib/common/leopard_params.pv,sha256=8jsCUfmOuuTD2MXLduV17hD1vHXuO-wscyJobYCsbfs,37699432
7
+ pvleopard/lib/jetson/cortex-a57-aarch64/libpv_leopard.so,sha256=y431wzz9NuRsOXnhk7ciZ9s6Lc3v9lr5mWdbkqC1bXE,1300032
8
+ pvleopard/lib/linux/x86_64/libpv_leopard.so,sha256=UhcGHHjCnR1Y-AMHVgVIbRoR_yjgRlw7SpcV86JwkX8,1415360
9
+ pvleopard/lib/mac/arm64/libpv_leopard.dylib,sha256=o4WF9nzDvF5K9_AKppz-avXsk-wApmRVKQuUArbvxSc,1545072
10
+ pvleopard/lib/mac/x86_64/libpv_leopard.dylib,sha256=Qwnh-0EI-VSWTjbSXnfmkxmYU8Z1ow-fFkCUQ2K9slc,1697720
11
+ pvleopard/lib/raspberry-pi/cortex-a53/libpv_leopard.so,sha256=DEnIu2Jpe_w9sviYNCLvgC1yqROLdt6A8b4r44d6eiQ,1275760
12
+ pvleopard/lib/raspberry-pi/cortex-a53-aarch64/libpv_leopard.so,sha256=FBqYYJbmtOu4AAD532__IfuJo6O9MnkP933CYK4pF3g,1312320
13
+ pvleopard/lib/raspberry-pi/cortex-a72/libpv_leopard.so,sha256=ALyBJ2I5B_nKQ2Vr7cTrPiFSnxIxsTPhOhcIWp3SOOg,1288048
14
+ pvleopard/lib/raspberry-pi/cortex-a72-aarch64/libpv_leopard.so,sha256=Kn5Ricawg62MGrzL6ZS-bcPOarlSU0PcwSQ-WG7k-Uo,1300032
15
+ pvleopard/lib/windows/amd64/libpv_leopard.dll,sha256=hM6eYFgcQ6p8ZIe8BFw0Yxm3enA2rPSRBCysVzFC8kQ,1524736
16
+ pvleopard-2.0.1.dist-info/METADATA,sha256=hloxBn0p8TABCghj9eSzSBIZxNNK-q1G_MyHR5yoOaY,3141
17
+ pvleopard-2.0.1.dist-info/WHEEL,sha256=g4nMs7d-Xl9-xC9XovUrsDHGXt-FT0E17Yqo92DEfvY,92
18
+ pvleopard-2.0.1.dist-info/top_level.txt,sha256=DAhlor-zWSROmsQCFWDsx_IJSE62zlgJ3sE4quxhEPw,10
19
+ pvleopard-2.0.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.37.1)
2
+ Generator: bdist_wheel (0.34.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,19 +0,0 @@
1
- pvleopard/LICENSE,sha256=ZurJwSSRHw99lGaJP88vQREqtZmIABuVKd_rK7k7U70,11344
2
- pvleopard/__init__.py,sha256=OCI_7jgscvkzJMwT02SfB85_xOoQC8eb1e52HR_GKgo,577
3
- pvleopard/_factory.py,sha256=WzrCwUlU_TCXkzZKPFd0ag4sAdiuwRlpOM6HYl1M-t0,1746
4
- pvleopard/_leopard.py,sha256=qk9kf7aRETnbEyBOrZ7sg_ESgKm3NEow_6nnQjjl-ms,10366
5
- pvleopard/_util.py,sha256=TQ3fXjUNUDerUbXdC6ntjMXaYD_B5udi__xp41zL9lE,3113
6
- pvleopard/lib/common/leopard_params.pv,sha256=CBoaZ8kach9nVD-I-SJakBvbVi2EfxpiumH9Bk_DKCU,19850729
7
- pvleopard/lib/jetson/cortex-a57-aarch64/libpv_leopard.so,sha256=W9ikHNNpZVIM4lkHABfGJa2WVUK-U5XwZrUz0mUr0es,1290872
8
- pvleopard/lib/linux/x86_64/libpv_leopard.so,sha256=peVOWhgqeC09UrYSKnduzvkKkm7WZ0-jWbiiTs770MM,1397912
9
- pvleopard/lib/mac/arm64/libpv_leopard.dylib,sha256=KYM2pXObVPfsitBIl4gvap2IyrDhMpCpNI6k23F_YlE,1513536
10
- pvleopard/lib/mac/x86_64/libpv_leopard.dylib,sha256=0ImW2M-JTEeAEZ9bfsLV9B0-FClfY1y6EnLR2tFCvs8,1666456
11
- pvleopard/lib/raspberry-pi/cortex-a53/libpv_leopard.so,sha256=YtVPTq2oVm9DLcK7ruo5ZJkqqe33-OGM1xq5l-v4P0E,1263028
12
- pvleopard/lib/raspberry-pi/cortex-a53-aarch64/libpv_leopard.so,sha256=w6kYHEuaAZ2tm-aHA_-xkmv-FgrmBFOhYtIDxPXb23c,1303240
13
- pvleopard/lib/raspberry-pi/cortex-a72/libpv_leopard.so,sha256=LCdTw1mJYY1CjNzXimay8P3kd9LMvUBZap38e8j9Wvw,1275316
14
- pvleopard/lib/raspberry-pi/cortex-a72-aarch64/libpv_leopard.so,sha256=rtpK3dyk8Kw6Ls4NxwkR4QQiwVo5mbWFVKHgkiITTOo,1295048
15
- pvleopard/lib/windows/amd64/libpv_leopard.dll,sha256=P4A37VaWU-bMVwoqvuxtKAZfg_Y2Li33mMwf5QjQ3eo,1505792
16
- pvleopard-1.2.2.dist-info/METADATA,sha256=0gk8fFJY2AbMv7pT-dWdD5YoXlZX-zT-4OhI4jhf2j8,3102
17
- pvleopard-1.2.2.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
18
- pvleopard-1.2.2.dist-info/top_level.txt,sha256=DAhlor-zWSROmsQCFWDsx_IJSE62zlgJ3sE4quxhEPw,10
19
- pvleopard-1.2.2.dist-info/RECORD,,