pvleopard 1.2.2__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pvleopard/_factory.py CHANGED
@@ -19,7 +19,8 @@ def create(
19
19
  access_key: str,
20
20
  model_path: Optional[str] = None,
21
21
  library_path: Optional[str] = None,
22
- enable_automatic_punctuation: bool = False) -> Leopard:
22
+ enable_automatic_punctuation: bool = False,
23
+ enable_diarization: bool = False) -> Leopard:
23
24
  """
24
25
  Factory method for Leopard speech-to-text engine.
25
26
 
@@ -28,6 +29,9 @@ def create(
28
29
  :param model_path: Absolute path to the file containing model parameters. If not set it will be set to the default
29
30
  location.
30
31
  :param enable_automatic_punctuation Set to `True` to enable automatic punctuation insertion.
32
+ :param enable_diarization Set to `true` to enable speaker diarization, which allows Leopard to differentiate
33
+ speakers as part of the transcription process. Word metadata will include a `speaker_tag` to
34
+ identify unique speakers.
31
35
  :return: An instance of Leopard speech-to-text engine.
32
36
  """
33
37
 
@@ -41,7 +45,8 @@ def create(
41
45
  access_key=access_key,
42
46
  model_path=model_path,
43
47
  library_path=library_path,
44
- enable_automatic_punctuation=enable_automatic_punctuation)
48
+ enable_automatic_punctuation=enable_automatic_punctuation,
49
+ enable_diarization=enable_diarization)
45
50
 
46
51
 
47
52
  __all__ = [
pvleopard/_leopard.py CHANGED
@@ -18,7 +18,27 @@ from typing import *
18
18
 
19
19
 
20
20
  class LeopardError(Exception):
21
- pass
21
+ def __init__(self, message: str = '', message_stack: Sequence[str] = None):
22
+ super().__init__(message)
23
+
24
+ self._message = message
25
+ self._message_stack = list() if message_stack is None else message_stack
26
+
27
+ def __str__(self):
28
+ message = self._message
29
+ if len(self._message_stack) > 0:
30
+ message += ':'
31
+ for i in range(len(self._message_stack)):
32
+ message += '\n [%d] %s' % (i, self._message_stack[i])
33
+ return message
34
+
35
+ @property
36
+ def message(self) -> str:
37
+ return self._message
38
+
39
+ @property
40
+ def message_stack(self) -> Sequence[str]:
41
+ return self._message_stack
22
42
 
23
43
 
24
44
  class LeopardMemoryError(LeopardError):
@@ -119,14 +139,16 @@ class Leopard(object):
119
139
  ("word", c_char_p),
120
140
  ("start_sec", c_float),
121
141
  ("end_sec", c_float),
122
- ("confidence", c_float)]
142
+ ("confidence", c_float),
143
+ ("speaker_tag", c_int32)]
123
144
 
124
145
  def __init__(
125
146
  self,
126
147
  access_key: str,
127
148
  model_path: str,
128
149
  library_path: str,
129
- enable_automatic_punctuation: bool = False) -> None:
150
+ enable_automatic_punctuation: bool = False,
151
+ enable_diarization: bool = False) -> None:
130
152
  """
131
153
  Constructor.
132
154
 
@@ -134,6 +156,9 @@ class Leopard(object):
134
156
  :param model_path: Absolute path to the file containing model parameters.
135
157
  :param library_path: Absolute path to Leopard's dynamic library.
136
158
  :param enable_automatic_punctuation Set to `True` to enable automatic punctuation insertion.
159
+ :param enable_diarization Set to `true` to enable speaker diarization, which allows Leopard to differentiate
160
+ speakers as part of the transcription process. Word metadata will include a `speaker_tag` to
161
+ identify unique speakers.
137
162
  """
138
163
 
139
164
  if not isinstance(access_key, str) or len(access_key) == 0:
@@ -147,15 +172,36 @@ class Leopard(object):
147
172
 
148
173
  library = cdll.LoadLibrary(library_path)
149
174
 
175
+ set_sdk_func = library.pv_set_sdk
176
+ set_sdk_func.argtypes = [c_char_p]
177
+ set_sdk_func.restype = None
178
+
179
+ set_sdk_func('python'.encode('utf-8'))
180
+
181
+ self._get_error_stack_func = library.pv_get_error_stack
182
+ self._get_error_stack_func.argtypes = [POINTER(POINTER(c_char_p)), POINTER(c_int)]
183
+ self._get_error_stack_func.restype = self.PicovoiceStatuses
184
+
185
+ self._free_error_stack_func = library.pv_free_error_stack
186
+ self._free_error_stack_func.argtypes = [POINTER(c_char_p)]
187
+ self._free_error_stack_func.restype = None
188
+
150
189
  init_func = library.pv_leopard_init
151
- init_func.argtypes = [c_char_p, c_char_p, c_bool, POINTER(POINTER(self.CLeopard))]
190
+ init_func.argtypes = [c_char_p, c_char_p, c_bool, c_bool, POINTER(POINTER(self.CLeopard))]
152
191
  init_func.restype = self.PicovoiceStatuses
153
192
 
154
193
  self._handle = POINTER(self.CLeopard)()
155
194
 
156
- status = init_func(access_key.encode(), model_path.encode(), enable_automatic_punctuation, byref(self._handle))
195
+ status = init_func(
196
+ access_key.encode(),
197
+ model_path.encode(),
198
+ enable_automatic_punctuation,
199
+ enable_diarization,
200
+ byref(self._handle))
157
201
  if status is not self.PicovoiceStatuses.SUCCESS:
158
- raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]()
202
+ raise self._PICOVOICE_STATUS_TO_EXCEPTION[status](
203
+ message='Initialization failed',
204
+ message_stack=self._get_error_stack())
159
205
 
160
206
  self._delete_func = library.pv_leopard_delete
161
207
  self._delete_func.argtypes = [POINTER(self.CLeopard)]
@@ -201,7 +247,7 @@ class Leopard(object):
201
247
  ]
202
248
  self._words_delete_func.restype = None
203
249
 
204
- Word = namedtuple('Word', ['word', 'start_sec', 'end_sec', 'confidence'])
250
+ Word = namedtuple('Word', ['word', 'start_sec', 'end_sec', 'confidence', 'speaker_tag'])
205
251
 
206
252
  def process(self, pcm: Sequence[int]) -> Tuple[str, Sequence[Word]]:
207
253
  """
@@ -227,7 +273,9 @@ class Leopard(object):
227
273
  byref(num_words),
228
274
  byref(c_words))
229
275
  if status is not self.PicovoiceStatuses.SUCCESS:
230
- raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]()
276
+ raise self._PICOVOICE_STATUS_TO_EXCEPTION[status](
277
+ message='Process failed',
278
+ message_stack=self._get_error_stack())
231
279
 
232
280
  transcript = c_transcript.value.decode('utf-8')
233
281
  self._transcript_delete_func(c_transcript)
@@ -238,7 +286,8 @@ class Leopard(object):
238
286
  word=c_words[i].word.decode('utf-8'),
239
287
  start_sec=c_words[i].start_sec,
240
288
  end_sec=c_words[i].end_sec,
241
- confidence=c_words[i].confidence)
289
+ confidence=c_words[i].confidence,
290
+ speaker_tag=c_words[i].speaker_tag)
242
291
  words.append(word)
243
292
 
244
293
  self._words_delete_func(c_words)
@@ -267,12 +316,9 @@ class Leopard(object):
267
316
  byref(num_words),
268
317
  byref(c_words))
269
318
  if status is not self.PicovoiceStatuses.SUCCESS:
270
- if status is self.PicovoiceStatuses.INVALID_ARGUMENT:
271
- if not audio_path.lower().endswith(self._VALID_EXTENSIONS):
272
- raise self._PICOVOICE_STATUS_TO_EXCEPTION[status](
273
- "Specified file with extension '%s' is not supported" % pathlib.Path(audio_path).suffix
274
- )
275
- raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]()
319
+ raise self._PICOVOICE_STATUS_TO_EXCEPTION[status](
320
+ message='Process file failed',
321
+ message_stack=self._get_error_stack())
276
322
 
277
323
  transcript = c_transcript.value.decode('utf-8')
278
324
  self._transcript_delete_func(c_transcript)
@@ -283,7 +329,8 @@ class Leopard(object):
283
329
  word=c_words[i].word.decode('utf-8'),
284
330
  start_sec=c_words[i].start_sec,
285
331
  end_sec=c_words[i].end_sec,
286
- confidence=c_words[i].confidence)
332
+ confidence=c_words[i].confidence,
333
+ speaker_tag=c_words[i].speaker_tag)
287
334
  words.append(word)
288
335
 
289
336
  self._words_delete_func(c_words)
@@ -307,6 +354,21 @@ class Leopard(object):
307
354
 
308
355
  return self._sample_rate
309
356
 
357
+ def _get_error_stack(self) -> Sequence[str]:
358
+ message_stack_ref = POINTER(c_char_p)()
359
+ message_stack_depth = c_int()
360
+ status = self._get_error_stack_func(byref(message_stack_ref), byref(message_stack_depth))
361
+ if status is not self.PicovoiceStatuses.SUCCESS:
362
+ raise self._PICOVOICE_STATUS_TO_EXCEPTION[status](message='Unable to get Leopard error state')
363
+
364
+ message_stack = list()
365
+ for i in range(message_stack_depth.value):
366
+ message_stack.append(message_stack_ref[i].decode('utf-8'))
367
+
368
+ self._free_error_stack_func(message_stack_ref)
369
+
370
+ return message_stack
371
+
310
372
 
311
373
  __all__ = [
312
374
  'Leopard',
Binary file
Binary file
Binary file
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pvleopard
3
- Version: 1.2.2
3
+ Version: 2.0.1
4
4
  Summary: Leopard Speech-to-Text Engine.
5
5
  Home-page: https://github.com/Picovoice/leopard
6
6
  Author: Picovoice
@@ -17,13 +17,15 @@ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
17
17
  Requires-Python: >=3.5
18
18
  Description-Content-Type: text/markdown
19
19
 
20
- # Leopard Speech-to-Text Engine
20
+ # Leopard Binding for Python
21
+
22
+ ## Leopard Speech-to-Text Engine
21
23
 
22
24
  Made in Vancouver, Canada by [Picovoice](https://picovoice.ai)
23
25
 
24
26
  Leopard is an on-device speech-to-text engine. Leopard is:
25
27
 
26
- - Private; All voice processing runs locally.
28
+ - Private; All voice processing runs locally.
27
29
  - [Accurate](https://picovoice.ai/docs/benchmark/stt/)
28
30
  - [Compact and Computationally-Efficient](https://github.com/Picovoice/speech-to-text-benchmark#rtf)
29
31
  - Cross-Platform:
@@ -56,9 +58,9 @@ Create an instance of the engine and transcribe an audio file:
56
58
  ```python
57
59
  import pvleopard
58
60
 
59
- handle = pvleopard.create(access_key='${ACCESS_KEY}')
61
+ leopard = pvleopard.create(access_key='${ACCESS_KEY}')
60
62
 
61
- transcript, words = handle.process_file('${AUDIO_PATH}')
63
+ transcript, words = leopard.process_file('${AUDIO_PATH}')
62
64
  print(transcript)
63
65
  for word in words:
64
66
  print(
@@ -67,20 +69,24 @@ for word in words:
67
69
  ```
68
70
 
69
71
  Replace `${ACCESS_KEY}` with yours obtained from [Picovoice Console](https://console.picovoice.ai/) and
70
- `${AUDIO_PATH}` to the path an audio file. Finally, when done be sure to explicitly release the resources using
71
- `handle.delete()`.
72
+ `${AUDIO_PATH}` to the path an audio file.
73
+
74
+ Finally, when done be sure to explicitly release the resources:
75
+ ```python
76
+ leopard.delete()
77
+ ```
72
78
 
73
79
  ## Language Model
74
80
 
75
- The Leopard Python SDK comes preloaded with a default English language model (`.pv` file).
76
- Default models for other supported languages can be found in [lib/common](../../lib/common).
81
+ The Leopard Python SDK comes preloaded with a default English language model (`.pv` file).
82
+ Default models for other supported languages can be found in [lib/common](../../lib/common).
77
83
 
78
84
  Create custom language models using the [Picovoice Console](https://console.picovoice.ai/). Here you can train
79
85
  language models with custom vocabulary and boost words in the existing vocabulary.
80
86
 
81
87
  Pass in the `.pv` file via the `model_path` argument:
82
88
  ```python
83
- handle = pvleopard.create(
89
+ leopard = pvleopard.create(
84
90
  access_key='${ACCESS_KEY}',
85
91
  model_path='${MODEL_PATH}')
86
92
  ```
@@ -0,0 +1,19 @@
1
+ pvleopard/LICENSE,sha256=ZurJwSSRHw99lGaJP88vQREqtZmIABuVKd_rK7k7U70,11344
2
+ pvleopard/__init__.py,sha256=OCI_7jgscvkzJMwT02SfB85_xOoQC8eb1e52HR_GKgo,577
3
+ pvleopard/_factory.py,sha256=1ZotcV9xdcJLRAYajFwFHfB0tV3_9bSi15_cjhedMnM,2075
4
+ pvleopard/_leopard.py,sha256=U6eNGzgQfpKaQxnDaOKHBDi2Kj_ysEHhF3SdVojbmhE,12741
5
+ pvleopard/_util.py,sha256=TQ3fXjUNUDerUbXdC6ntjMXaYD_B5udi__xp41zL9lE,3113
6
+ pvleopard/lib/common/leopard_params.pv,sha256=8jsCUfmOuuTD2MXLduV17hD1vHXuO-wscyJobYCsbfs,37699432
7
+ pvleopard/lib/jetson/cortex-a57-aarch64/libpv_leopard.so,sha256=y431wzz9NuRsOXnhk7ciZ9s6Lc3v9lr5mWdbkqC1bXE,1300032
8
+ pvleopard/lib/linux/x86_64/libpv_leopard.so,sha256=UhcGHHjCnR1Y-AMHVgVIbRoR_yjgRlw7SpcV86JwkX8,1415360
9
+ pvleopard/lib/mac/arm64/libpv_leopard.dylib,sha256=o4WF9nzDvF5K9_AKppz-avXsk-wApmRVKQuUArbvxSc,1545072
10
+ pvleopard/lib/mac/x86_64/libpv_leopard.dylib,sha256=Qwnh-0EI-VSWTjbSXnfmkxmYU8Z1ow-fFkCUQ2K9slc,1697720
11
+ pvleopard/lib/raspberry-pi/cortex-a53/libpv_leopard.so,sha256=DEnIu2Jpe_w9sviYNCLvgC1yqROLdt6A8b4r44d6eiQ,1275760
12
+ pvleopard/lib/raspberry-pi/cortex-a53-aarch64/libpv_leopard.so,sha256=FBqYYJbmtOu4AAD532__IfuJo6O9MnkP933CYK4pF3g,1312320
13
+ pvleopard/lib/raspberry-pi/cortex-a72/libpv_leopard.so,sha256=ALyBJ2I5B_nKQ2Vr7cTrPiFSnxIxsTPhOhcIWp3SOOg,1288048
14
+ pvleopard/lib/raspberry-pi/cortex-a72-aarch64/libpv_leopard.so,sha256=Kn5Ricawg62MGrzL6ZS-bcPOarlSU0PcwSQ-WG7k-Uo,1300032
15
+ pvleopard/lib/windows/amd64/libpv_leopard.dll,sha256=hM6eYFgcQ6p8ZIe8BFw0Yxm3enA2rPSRBCysVzFC8kQ,1524736
16
+ pvleopard-2.0.1.dist-info/METADATA,sha256=hloxBn0p8TABCghj9eSzSBIZxNNK-q1G_MyHR5yoOaY,3141
17
+ pvleopard-2.0.1.dist-info/WHEEL,sha256=g4nMs7d-Xl9-xC9XovUrsDHGXt-FT0E17Yqo92DEfvY,92
18
+ pvleopard-2.0.1.dist-info/top_level.txt,sha256=DAhlor-zWSROmsQCFWDsx_IJSE62zlgJ3sE4quxhEPw,10
19
+ pvleopard-2.0.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.37.1)
2
+ Generator: bdist_wheel (0.34.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,19 +0,0 @@
1
- pvleopard/LICENSE,sha256=ZurJwSSRHw99lGaJP88vQREqtZmIABuVKd_rK7k7U70,11344
2
- pvleopard/__init__.py,sha256=OCI_7jgscvkzJMwT02SfB85_xOoQC8eb1e52HR_GKgo,577
3
- pvleopard/_factory.py,sha256=WzrCwUlU_TCXkzZKPFd0ag4sAdiuwRlpOM6HYl1M-t0,1746
4
- pvleopard/_leopard.py,sha256=qk9kf7aRETnbEyBOrZ7sg_ESgKm3NEow_6nnQjjl-ms,10366
5
- pvleopard/_util.py,sha256=TQ3fXjUNUDerUbXdC6ntjMXaYD_B5udi__xp41zL9lE,3113
6
- pvleopard/lib/common/leopard_params.pv,sha256=CBoaZ8kach9nVD-I-SJakBvbVi2EfxpiumH9Bk_DKCU,19850729
7
- pvleopard/lib/jetson/cortex-a57-aarch64/libpv_leopard.so,sha256=W9ikHNNpZVIM4lkHABfGJa2WVUK-U5XwZrUz0mUr0es,1290872
8
- pvleopard/lib/linux/x86_64/libpv_leopard.so,sha256=peVOWhgqeC09UrYSKnduzvkKkm7WZ0-jWbiiTs770MM,1397912
9
- pvleopard/lib/mac/arm64/libpv_leopard.dylib,sha256=KYM2pXObVPfsitBIl4gvap2IyrDhMpCpNI6k23F_YlE,1513536
10
- pvleopard/lib/mac/x86_64/libpv_leopard.dylib,sha256=0ImW2M-JTEeAEZ9bfsLV9B0-FClfY1y6EnLR2tFCvs8,1666456
11
- pvleopard/lib/raspberry-pi/cortex-a53/libpv_leopard.so,sha256=YtVPTq2oVm9DLcK7ruo5ZJkqqe33-OGM1xq5l-v4P0E,1263028
12
- pvleopard/lib/raspberry-pi/cortex-a53-aarch64/libpv_leopard.so,sha256=w6kYHEuaAZ2tm-aHA_-xkmv-FgrmBFOhYtIDxPXb23c,1303240
13
- pvleopard/lib/raspberry-pi/cortex-a72/libpv_leopard.so,sha256=LCdTw1mJYY1CjNzXimay8P3kd9LMvUBZap38e8j9Wvw,1275316
14
- pvleopard/lib/raspberry-pi/cortex-a72-aarch64/libpv_leopard.so,sha256=rtpK3dyk8Kw6Ls4NxwkR4QQiwVo5mbWFVKHgkiITTOo,1295048
15
- pvleopard/lib/windows/amd64/libpv_leopard.dll,sha256=P4A37VaWU-bMVwoqvuxtKAZfg_Y2Li33mMwf5QjQ3eo,1505792
16
- pvleopard-1.2.2.dist-info/METADATA,sha256=0gk8fFJY2AbMv7pT-dWdD5YoXlZX-zT-4OhI4jhf2j8,3102
17
- pvleopard-1.2.2.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
18
- pvleopard-1.2.2.dist-info/top_level.txt,sha256=DAhlor-zWSROmsQCFWDsx_IJSE62zlgJ3sE4quxhEPw,10
19
- pvleopard-1.2.2.dist-info/RECORD,,