abr-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abr_sdk/__init__.py +37 -0
- abr_sdk/__main__.py +175 -0
- abr_sdk/asr.py +485 -0
- abr_sdk/cabi.py +1719 -0
- abr_sdk/core.py +917 -0
- abr_sdk/exceptions.py +42 -0
- abr_sdk/keygen.py +414 -0
- abr_sdk/py.typed +0 -0
- abr_sdk/tts.py +271 -0
- abr_sdk/tts_preprocess.py +798 -0
- abr_sdk/version.py +78 -0
- abr_sdk-0.1.0.dist-info/METADATA +86 -0
- abr_sdk-0.1.0.dist-info/RECORD +17 -0
- abr_sdk-0.1.0.dist-info/WHEEL +5 -0
- abr_sdk-0.1.0.dist-info/entry_points.txt +2 -0
- abr_sdk-0.1.0.dist-info/licenses/LICENSE.md +28 -0
- abr_sdk-0.1.0.dist-info/top_level.txt +1 -0
abr_sdk/__init__.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# (c) 2026 Applied Brain Research
|
|
2
|
+
#
|
|
3
|
+
# All information contained herein is and remains the property of Applied Brain
|
|
4
|
+
# Research. The intellectual and technical concepts contained herein are
|
|
5
|
+
# proprietary to Applied Brain Research and may be covered by U.S. and Foreign
|
|
6
|
+
# Patents, patents in process, and are protected by trade secret or copyright
|
|
7
|
+
# law. Dissemination of this information or reproduction of this material is
|
|
8
|
+
# strictly forbidden unless prior written permission is obtained from Applied
|
|
9
|
+
# Brain Research. Access to the source code contained herein is hereby
|
|
10
|
+
# forbidden to anyone except current Applied Brain Research employees,
|
|
11
|
+
# contractors or other outside parties that have executed Confidentiality
|
|
12
|
+
# and/or Non-disclosure agreements explicitly covering such access.
|
|
13
|
+
#
|
|
14
|
+
# The copyright notice above does not evidence any actual or intended
|
|
15
|
+
# publication or disclosure of this source code, which includes information
|
|
16
|
+
# that is confidential and/or proprietary, and is a trade secret, of Applied
|
|
17
|
+
# Brain Research. ANY REPRODUCTION, MODIFICATION, DISTRIBUTION, PUBLIC
|
|
18
|
+
# PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE OF THIS SOURCE CODE WITHOUT
|
|
19
|
+
# THE EXPRESS WRITTEN CONSENT OF APPLIED BRAIN RESEARCH IS STRICTLY PROHIBITED,
|
|
20
|
+
# AND IN VIOLATION OF APPLICABLE LAWS AND INTERNATIONAL TREATIES. THE RECEIPT
|
|
21
|
+
# OR POSSESSION OF THIS SOURCE CODE AND/OR RELATED INFORMATION DOES NOT CONVEY
|
|
22
|
+
# OR IMPLY ANY RIGHTS TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO
|
|
23
|
+
# MANUFACTURE, USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN WHOLE OR IN PART.
|
|
24
|
+
|
|
25
|
+
"""Entry point for the Python ABR SDK."""
|
|
26
|
+
|
|
27
|
+
from abr_sdk.core import Application, Library
|
|
28
|
+
from abr_sdk.exceptions import AbrSdkError
|
|
29
|
+
from abr_sdk.version import __copyright__, __version__
|
|
30
|
+
|
|
31
|
+
__all__ = [
|
|
32
|
+
"AbrSdkError",
|
|
33
|
+
"Application",
|
|
34
|
+
"Library",
|
|
35
|
+
"__copyright__",
|
|
36
|
+
"__version__",
|
|
37
|
+
]
|
abr_sdk/__main__.py
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# (c) 2026 Applied Brain Research
|
|
2
|
+
#
|
|
3
|
+
# All information contained herein is and remains the property of Applied Brain
|
|
4
|
+
# Research. The intellectual and technical concepts contained herein are
|
|
5
|
+
# proprietary to Applied Brain Research and may be covered by U.S. and Foreign
|
|
6
|
+
# Patents, patents in process, and are protected by trade secret or copyright
|
|
7
|
+
# law. Dissemination of this information or reproduction of this material is
|
|
8
|
+
# strictly forbidden unless prior written permission is obtained from Applied
|
|
9
|
+
# Brain Research. Access to the source code contained herein is hereby
|
|
10
|
+
# forbidden to anyone except current Applied Brain Research employees,
|
|
11
|
+
# contractors or other outside parties that have executed Confidentiality
|
|
12
|
+
# and/or Non-disclosure agreements explicitly covering such access.
|
|
13
|
+
#
|
|
14
|
+
# The copyright notice above does not evidence any actual or intended
|
|
15
|
+
# publication or disclosure of this source code, which includes information
|
|
16
|
+
# that is confidential and/or proprietary, and is a trade secret, of Applied
|
|
17
|
+
# Brain Research. ANY REPRODUCTION, MODIFICATION, DISTRIBUTION, PUBLIC
|
|
18
|
+
# PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE OF THIS SOURCE CODE WITHOUT
|
|
19
|
+
# THE EXPRESS WRITTEN CONSENT OF APPLIED BRAIN RESEARCH IS STRICTLY PROHIBITED,
|
|
20
|
+
# AND IN VIOLATION OF APPLICABLE LAWS AND INTERNATIONAL TREATIES. THE RECEIPT
|
|
21
|
+
# OR POSSESSION OF THIS SOURCE CODE AND/OR RELATED INFORMATION DOES NOT CONVEY
|
|
22
|
+
# OR IMPLY ANY RIGHTS TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO
|
|
23
|
+
# MANUFACTURE, USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN WHOLE OR IN PART.
|
|
24
|
+
|
|
25
|
+
"""
|
|
26
|
+
The ``abr-sdk`` command-line tool.
|
|
27
|
+
|
|
28
|
+
Currently exposes one subcommand, ``activate``, which reads this device's
|
|
29
|
+
fingerprint from the SDK library, trades a license key for a signed machine
|
|
30
|
+
file from Keygen, and writes it under the license directory.
|
|
31
|
+
|
|
32
|
+
The fingerprint is read from the library's ``fingerprint`` property, available
|
|
33
|
+
without instantiating a model, so activation needs no license to discover the
|
|
34
|
+
fingerprint it must bind to. Verifying and installing the downloaded envelope is
|
|
35
|
+
the SDK's job: ``abr_sdk.core.install_license`` hands it to the native
|
|
36
|
+
``abr_app_activate_license``, which checks the signature and writes the
|
|
37
|
+
device-bound license and integrity files under the license directory.
|
|
38
|
+
|
|
39
|
+
Author: Trevor Bekolay (Applied Brain Research)
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
from __future__ import annotations
|
|
43
|
+
|
|
44
|
+
import argparse
|
|
45
|
+
import enum
|
|
46
|
+
import string
|
|
47
|
+
import sys
|
|
48
|
+
from pathlib import Path
|
|
49
|
+
from typing import Final
|
|
50
|
+
|
|
51
|
+
from abr_sdk.cabi import AbrSdkCAbiError, Status
|
|
52
|
+
from abr_sdk.core import DEFAULT_LICENSE_DIR, Library, install_license
|
|
53
|
+
from abr_sdk.exceptions import AbrSdkError
|
|
54
|
+
from abr_sdk.keygen import ActivationError, Failure, KeygenClient
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class ExitCode(enum.IntEnum):
|
|
58
|
+
"""Process exit codes. The specific reason is in the stderr message."""
|
|
59
|
+
|
|
60
|
+
SUCCESS = 0
|
|
61
|
+
ERROR = 1
|
|
62
|
+
NETWORK = 2
|
|
63
|
+
LICENSE = 3
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
_EXIT_FOR_FAILURE: Final[dict[Failure, ExitCode]] = {
|
|
67
|
+
Failure.NETWORK: ExitCode.NETWORK,
|
|
68
|
+
Failure.LICENSE: ExitCode.LICENSE,
|
|
69
|
+
Failure.PROTOCOL: ExitCode.ERROR,
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
74
|
+
"""Construct the top-level ``abr-sdk`` parser with its subcommands."""
|
|
75
|
+
parser = argparse.ArgumentParser(
|
|
76
|
+
prog="abr-sdk", description="ABR SDK command-line tools."
|
|
77
|
+
)
|
|
78
|
+
sub = parser.add_subparsers(dest="command", required=True)
|
|
79
|
+
|
|
80
|
+
activate = sub.add_parser(
|
|
81
|
+
"activate", help="Activate this device's ABR license against Keygen."
|
|
82
|
+
)
|
|
83
|
+
activate.add_argument(
|
|
84
|
+
"library", type=Path, help="Path to the ABR SDK shared library (.so)."
|
|
85
|
+
)
|
|
86
|
+
key_group = activate.add_mutually_exclusive_group(required=True)
|
|
87
|
+
key_group.add_argument(
|
|
88
|
+
"--key", metavar="STRING", help="The ABR license key, as a string."
|
|
89
|
+
)
|
|
90
|
+
key_group.add_argument(
|
|
91
|
+
"--key-file",
|
|
92
|
+
type=Path,
|
|
93
|
+
metavar="FILE",
|
|
94
|
+
help="Path to a file containing the ABR license key (whitespace trimmed).",
|
|
95
|
+
)
|
|
96
|
+
activate.add_argument(
|
|
97
|
+
"--license-dir",
|
|
98
|
+
type=Path,
|
|
99
|
+
default=DEFAULT_LICENSE_DIR,
|
|
100
|
+
metavar="DIR",
|
|
101
|
+
help=f"Directory to write the license into (default: {DEFAULT_LICENSE_DIR}).",
|
|
102
|
+
)
|
|
103
|
+
activate.set_defaults(func=cmd_activate)
|
|
104
|
+
return parser
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def is_valid_fingerprint(value: str) -> bool:
|
|
108
|
+
"""Return True when *value* is exactly 64 hexadecimal characters."""
|
|
109
|
+
return len(value) == 64 and all(c in string.hexdigits for c in value)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def cmd_activate(args: argparse.Namespace) -> int:
|
|
113
|
+
"""Read the device fingerprint from the library, then run activation."""
|
|
114
|
+
try:
|
|
115
|
+
key = _resolve_key(args)
|
|
116
|
+
except OSError as exc:
|
|
117
|
+
return _fail(ExitCode.ERROR, f"cannot read the key file {args.key_file}: {exc}")
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
library = Library(args.library)
|
|
121
|
+
except (AbrSdkError, OSError) as exc:
|
|
122
|
+
return _fail(ExitCode.ERROR, f"cannot load the SDK library: {exc}")
|
|
123
|
+
try:
|
|
124
|
+
fingerprint = library.get_property("fingerprint")
|
|
125
|
+
except KeyError:
|
|
126
|
+
return _fail(ExitCode.ERROR, "SDK library exposes no 'fingerprint' property.")
|
|
127
|
+
if not isinstance(fingerprint, str) or not is_valid_fingerprint(fingerprint):
|
|
128
|
+
return _fail(ExitCode.ERROR, "device fingerprint from the library is invalid.")
|
|
129
|
+
|
|
130
|
+
client = KeygenClient(key)
|
|
131
|
+
return run_activation(client, library, fingerprint.lower(), args.license_dir)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _resolve_key(args: argparse.Namespace) -> str:
|
|
135
|
+
"""Return the license key from ``--key`` or the ``--key-file`` contents."""
|
|
136
|
+
if args.key_file is not None:
|
|
137
|
+
return args.key_file.read_text(encoding="utf-8").strip()
|
|
138
|
+
return args.key
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def run_activation(
|
|
142
|
+
client: KeygenClient, library: Library, fingerprint: str, license_dir: Path
|
|
143
|
+
) -> int:
|
|
144
|
+
"""Download the license envelope from Keygen and install it via the SDK."""
|
|
145
|
+
try:
|
|
146
|
+
envelope = client.activate(fingerprint)
|
|
147
|
+
except ActivationError as exc:
|
|
148
|
+
return _fail(_EXIT_FOR_FAILURE[exc.kind], str(exc))
|
|
149
|
+
|
|
150
|
+
try:
|
|
151
|
+
install_license(library, envelope, license_dir=license_dir)
|
|
152
|
+
except AbrSdkCAbiError as exc:
|
|
153
|
+
code = ExitCode.LICENSE if exc.status == Status.ERR_LICENSE else ExitCode.ERROR
|
|
154
|
+
return _fail(code, f"could not install the license: {exc}")
|
|
155
|
+
except AbrSdkError as exc:
|
|
156
|
+
return _fail(ExitCode.ERROR, f"could not install the license: {exc}")
|
|
157
|
+
|
|
158
|
+
print(f"abr-sdk: activation succeeded; license installed in {license_dir}")
|
|
159
|
+
return ExitCode.SUCCESS
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _fail(code: ExitCode, message: str) -> int:
|
|
163
|
+
"""Print a prefixed message to stderr and return *code*."""
|
|
164
|
+
print(f"abr-sdk: {message}", file=sys.stderr)
|
|
165
|
+
return code
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def main(argv: list[str] | None = None) -> None:
|
|
169
|
+
"""Parse arguments and dispatch to the selected subcommand."""
|
|
170
|
+
args = build_parser().parse_args(argv)
|
|
171
|
+
raise SystemExit(args.func(args))
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
if __name__ == "__main__":
|
|
175
|
+
main()
|
abr_sdk/asr.py
ADDED
|
@@ -0,0 +1,485 @@
|
|
|
1
|
+
# (c) 2026 Applied Brain Research
|
|
2
|
+
#
|
|
3
|
+
# All information contained herein is and remains the property of Applied Brain
|
|
4
|
+
# Research. The intellectual and technical concepts contained herein are
|
|
5
|
+
# proprietary to Applied Brain Research and may be covered by U.S. and Foreign
|
|
6
|
+
# Patents, patents in process, and are protected by trade secret or copyright
|
|
7
|
+
# law. Dissemination of this information or reproduction of this material is
|
|
8
|
+
# strictly forbidden unless prior written permission is obtained from Applied
|
|
9
|
+
# Brain Research. Access to the source code contained herein is hereby
|
|
10
|
+
# forbidden to anyone except current Applied Brain Research employees,
|
|
11
|
+
# contractors or other outside parties that have executed Confidentiality
|
|
12
|
+
# and/or Non-disclosure agreements explicitly covering such access.
|
|
13
|
+
#
|
|
14
|
+
# The copyright notice above does not evidence any actual or intended
|
|
15
|
+
# publication or disclosure of this source code, which includes information
|
|
16
|
+
# that is confidential and/or proprietary, and is a trade secret, of Applied
|
|
17
|
+
# Brain Research. ANY REPRODUCTION, MODIFICATION, DISTRIBUTION, PUBLIC
|
|
18
|
+
# PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE OF THIS SOURCE CODE WITHOUT
|
|
19
|
+
# THE EXPRESS WRITTEN CONSENT OF APPLIED BRAIN RESEARCH IS STRICTLY PROHIBITED,
|
|
20
|
+
# AND IN VIOLATION OF APPLICABLE LAWS AND INTERNATIONAL TREATIES. THE RECEIPT
|
|
21
|
+
# OR POSSESSION OF THIS SOURCE CODE AND/OR RELATED INFORMATION DOES NOT CONVEY
|
|
22
|
+
# OR IMPLY ANY RIGHTS TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO
|
|
23
|
+
# MANUFACTURE, USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN WHOLE OR IN PART.
|
|
24
|
+
|
|
25
|
+
"""
|
|
26
|
+
Automatic speech recognition (ASR) wrapper and chunk parser.
|
|
27
|
+
|
|
28
|
+
Author: Andreas Stöckel (Applied Brain Research)
|
|
29
|
+
Author: Pawel Jaworski (Applied Brain Research)
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
from __future__ import annotations
|
|
33
|
+
|
|
34
|
+
import contextlib
|
|
35
|
+
import ctypes
|
|
36
|
+
import enum
|
|
37
|
+
import logging
|
|
38
|
+
import time
|
|
39
|
+
from collections.abc import Callable
|
|
40
|
+
from dataclasses import dataclass
|
|
41
|
+
from pathlib import Path
|
|
42
|
+
from types import TracebackType
|
|
43
|
+
|
|
44
|
+
from abr_sdk import cabi
|
|
45
|
+
from abr_sdk.cabi import AsrTextChunkType # deliberate re-export
|
|
46
|
+
from abr_sdk.core import Application, Buffer, EventFlags, EventSet, Library, VariantDict
|
|
47
|
+
from abr_sdk.exceptions import AbrSdkError, AbrSdkUnexpectedState
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class AsrMode(enum.Enum):
|
|
51
|
+
"""Decoder mode selecting the latency / accuracy trade-off."""
|
|
52
|
+
|
|
53
|
+
FAST = "fast"
|
|
54
|
+
ACCURATE = "accurate"
|
|
55
|
+
|
|
56
|
+
# When we bump the min Python to 3.11+, drop this and inherit from
|
|
57
|
+
# `enum.StrEnum` instead.
|
|
58
|
+
def __str__(self) -> str:
|
|
59
|
+
return self.value
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class Asr(Application):
|
|
63
|
+
"""
|
|
64
|
+
An ABR instance with automatic speech recognition support.
|
|
65
|
+
|
|
66
|
+
Can be constructed from a library path with keyword arguments:
|
|
67
|
+
|
|
68
|
+
with Asr("libabr-asr.so") as asr:
|
|
69
|
+
|
|
70
|
+
**Simple (blocking) API** -- process an entire audio clip at once::
|
|
71
|
+
|
|
72
|
+
transcript = asr.process(pcm_bytes)
|
|
73
|
+
print(transcript.text)
|
|
74
|
+
|
|
75
|
+
**Streaming API** -- push audio incrementally::
|
|
76
|
+
|
|
77
|
+
transcript = AsrTranscript()
|
|
78
|
+
asr.push(chunk1, on_chunk=transcript.chunks.append)
|
|
79
|
+
asr.push(chunk2, on_chunk=transcript.chunks.append)
|
|
80
|
+
asr.wait_for_completion()
|
|
81
|
+
print(transcript.text)
|
|
82
|
+
|
|
83
|
+
For finer control over the streaming event loop, use :class:`Processor`
|
|
84
|
+
directly.
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
def __init__(
|
|
88
|
+
self,
|
|
89
|
+
lib_or_path: str | Path | Library,
|
|
90
|
+
*,
|
|
91
|
+
mode: AsrMode | None = None,
|
|
92
|
+
enable_spellcheck: bool | None = None,
|
|
93
|
+
enable_punctuation: bool | None = None,
|
|
94
|
+
lib_search_paths: list[str | Path] | None = None,
|
|
95
|
+
use_default_lib_search_paths: bool = True,
|
|
96
|
+
resources_dir: str | Path | None = None,
|
|
97
|
+
logger: logging.Logger | None = None,
|
|
98
|
+
) -> None:
|
|
99
|
+
# ASR-specific config. Keys the caller did not provide are
|
|
100
|
+
# omitted so the loaded library picks its own default.
|
|
101
|
+
config: VariantDict = {}
|
|
102
|
+
if mode is not None:
|
|
103
|
+
config["asr_mode"] = mode.value
|
|
104
|
+
if enable_spellcheck is not None:
|
|
105
|
+
config["asr_enable_spellcheck"] = enable_spellcheck
|
|
106
|
+
if enable_punctuation is not None:
|
|
107
|
+
config["asr_enable_pcr"] = enable_punctuation
|
|
108
|
+
|
|
109
|
+
super().__init__(
|
|
110
|
+
lib_or_path=lib_or_path,
|
|
111
|
+
lib_search_paths=lib_search_paths,
|
|
112
|
+
use_default_lib_search_paths=use_default_lib_search_paths,
|
|
113
|
+
app_type="asr",
|
|
114
|
+
resources_dir=resources_dir,
|
|
115
|
+
config=config,
|
|
116
|
+
logger=logger,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Free the native handle if the ASR-specific initialisation fails;
|
|
120
|
+
# __exit__ never runs when __init__ raises.
|
|
121
|
+
with contextlib.ExitStack() as close_on_error:
|
|
122
|
+
close_on_error.callback(self.close)
|
|
123
|
+
if not self._cabi.abr_asr_available(self._handle):
|
|
124
|
+
raise AbrSdkError("The loaded application does not support ASR")
|
|
125
|
+
self.input_buffer = Buffer(
|
|
126
|
+
self._cabi, self._cabi.abr_asr_get_input_buffer(self._handle)
|
|
127
|
+
)
|
|
128
|
+
self.text_chunk_output_buffer = Buffer(
|
|
129
|
+
self._cabi,
|
|
130
|
+
self._cabi.abr_asr_get_text_chunk_output_buffer(self._handle),
|
|
131
|
+
)
|
|
132
|
+
close_on_error.pop_all()
|
|
133
|
+
self._processor: Processor | None = None
|
|
134
|
+
|
|
135
|
+
def __enter__(self) -> Asr:
|
|
136
|
+
return self
|
|
137
|
+
|
|
138
|
+
def flush(self) -> None:
|
|
139
|
+
"""Flush the ASR pipeline to finish processing remaining audio."""
|
|
140
|
+
self._ensure_alive()
|
|
141
|
+
self._cabi.abr_asr_flush(self._handle)
|
|
142
|
+
|
|
143
|
+
def process(self, data: bytes) -> AsrTranscript:
|
|
144
|
+
"""
|
|
145
|
+
Process PCM audio data and return the complete transcript.
|
|
146
|
+
|
|
147
|
+
This is a synchronous/blocking call that pushes all *data* through
|
|
148
|
+
the ASR pipeline, waits for the neural network to finish, and
|
|
149
|
+
returns a :class:`AsrTranscript` containing the result. Cannot be
|
|
150
|
+
used while a streaming session started with :meth:`push` is in
|
|
151
|
+
progress.
|
|
152
|
+
|
|
153
|
+
Parameters
|
|
154
|
+
----------
|
|
155
|
+
data
|
|
156
|
+
PCM audio as a little-endian 16-bit byte array.
|
|
157
|
+
"""
|
|
158
|
+
self._ensure_alive()
|
|
159
|
+
if self._processor is not None:
|
|
160
|
+
raise AbrSdkError(
|
|
161
|
+
"A streaming session is in progress; call wait_for_completion() "
|
|
162
|
+
"before using process()"
|
|
163
|
+
)
|
|
164
|
+
transcript = AsrTranscript()
|
|
165
|
+
with Processor(self, on_chunk=transcript.chunks.append) as proc:
|
|
166
|
+
proc.process_and_wait_for_output(data, 0, True)
|
|
167
|
+
return transcript
|
|
168
|
+
|
|
169
|
+
def push(
|
|
170
|
+
self,
|
|
171
|
+
data: bytes,
|
|
172
|
+
*,
|
|
173
|
+
on_chunk: Callable[[AsrChunk], None] | None = None,
|
|
174
|
+
output_poll_timeout_ms: int = 0,
|
|
175
|
+
) -> None:
|
|
176
|
+
"""
|
|
177
|
+
Push PCM audio data into the ASR network (streaming API).
|
|
178
|
+
|
|
179
|
+
On the first call an internal :class:`Processor` is created with
|
|
180
|
+
*on_chunk* as the listener callback. Subsequent calls reuse the
|
|
181
|
+
same processor (the *on_chunk* argument is ignored after the first
|
|
182
|
+
call). Call :meth:`wait_for_completion` after the last audio
|
|
183
|
+
chunk has been pushed.
|
|
184
|
+
|
|
185
|
+
Parameters
|
|
186
|
+
----------
|
|
187
|
+
data
|
|
188
|
+
PCM audio as little-endian 16-bit bytes.
|
|
189
|
+
on_chunk
|
|
190
|
+
Callback invoked for each transcribed text chunk. Only used
|
|
191
|
+
on the first call (when the internal processor is created).
|
|
192
|
+
output_poll_timeout_ms
|
|
193
|
+
Extra time in milliseconds to spend waiting for output after
|
|
194
|
+
the input has been pushed. ``0`` (the default) returns as
|
|
195
|
+
soon as all input bytes have been consumed.
|
|
196
|
+
"""
|
|
197
|
+
self._ensure_alive()
|
|
198
|
+
if output_poll_timeout_ms < 0:
|
|
199
|
+
raise ValueError("output_poll_timeout_ms must not be negative")
|
|
200
|
+
if self._processor is None:
|
|
201
|
+
self._processor = Processor(self, on_chunk=on_chunk)
|
|
202
|
+
self._processor.process_and_wait_for_output(data, output_poll_timeout_ms, False)
|
|
203
|
+
|
|
204
|
+
def wait_for_completion(self) -> None:
|
|
205
|
+
"""
|
|
206
|
+
Block until all previously pushed data is fully processed.
|
|
207
|
+
|
|
208
|
+
The ``on_chunk`` callback may be invoked during this call. When
|
|
209
|
+
this method returns, the internal processor is closed and a new
|
|
210
|
+
streaming session can be started by calling :meth:`push` again.
|
|
211
|
+
"""
|
|
212
|
+
self._ensure_alive()
|
|
213
|
+
if self._processor is None:
|
|
214
|
+
return
|
|
215
|
+
try:
|
|
216
|
+
self._processor.process_and_wait_for_output(None, 0, True)
|
|
217
|
+
finally:
|
|
218
|
+
self._processor.close()
|
|
219
|
+
self._processor = None
|
|
220
|
+
|
|
221
|
+
def close(self) -> None:
|
|
222
|
+
"""Release all resources held by this instance."""
|
|
223
|
+
if self._closed:
|
|
224
|
+
return
|
|
225
|
+
proc = getattr(self, "_processor", None)
|
|
226
|
+
if proc is not None:
|
|
227
|
+
proc.close()
|
|
228
|
+
self._processor = None
|
|
229
|
+
super().close()
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
@dataclass
|
|
233
|
+
class AsrChunk:
|
|
234
|
+
"""
|
|
235
|
+
A text chunk produced by the ASR subsystem.
|
|
236
|
+
|
|
237
|
+
Parse from raw buffer output with :meth:`parse`. Apply to a running
|
|
238
|
+
transcript with :meth:`update`.
|
|
239
|
+
"""
|
|
240
|
+
|
|
241
|
+
SIZE = ctypes.sizeof(cabi.AsrTextChunk)
|
|
242
|
+
|
|
243
|
+
type: AsrTextChunkType
|
|
244
|
+
replace_byte_offset_begin: int
|
|
245
|
+
replace_byte_offset_end: int
|
|
246
|
+
data: bytes
|
|
247
|
+
|
|
248
|
+
@staticmethod
|
|
249
|
+
def parse(raw: bytes | bytearray) -> AsrChunk:
|
|
250
|
+
"""
|
|
251
|
+
Parse *raw* bytes from the ASR output buffer into an :class:`AsrChunk`.
|
|
252
|
+
|
|
253
|
+
*raw* must be exactly :attr:`SIZE` bytes.
|
|
254
|
+
"""
|
|
255
|
+
if len(raw) != AsrChunk.SIZE:
|
|
256
|
+
raise AbrSdkUnexpectedState(
|
|
257
|
+
f"Expected {AsrChunk.SIZE} bytes, got {len(raw)}"
|
|
258
|
+
)
|
|
259
|
+
c = cabi.AsrTextChunk.from_buffer_copy(raw)
|
|
260
|
+
if c.replace_byte_offs_begin > 0:
|
|
261
|
+
raise AbrSdkUnexpectedState("Invalid replace_byte_offs_begin")
|
|
262
|
+
if (
|
|
263
|
+
c.replace_byte_offs_end > 0
|
|
264
|
+
or c.replace_byte_offs_end < c.replace_byte_offs_begin
|
|
265
|
+
):
|
|
266
|
+
raise AbrSdkUnexpectedState("Invalid replace_byte_offs_end")
|
|
267
|
+
if c.n_bytes > len(c.data):
|
|
268
|
+
raise AbrSdkUnexpectedState("Invalid n_bytes")
|
|
269
|
+
return AsrChunk(
|
|
270
|
+
type=AsrTextChunkType(c.type),
|
|
271
|
+
replace_byte_offset_begin=c.replace_byte_offs_begin,
|
|
272
|
+
replace_byte_offset_end=c.replace_byte_offs_end,
|
|
273
|
+
data=bytes(c.data[: c.n_bytes]),
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
def update(self, buf: bytearray) -> None:
|
|
277
|
+
"""Apply this chunk to a running transcript ``bytearray``."""
|
|
278
|
+
if self.type == AsrTextChunkType.NONE:
|
|
279
|
+
return
|
|
280
|
+
i0 = len(buf) + self.replace_byte_offset_begin
|
|
281
|
+
i1 = len(buf) + self.replace_byte_offset_end
|
|
282
|
+
if i0 < 0 or i1 < 0:
|
|
283
|
+
raise ValueError("Buffer not compatible with this chunk")
|
|
284
|
+
buf[:] = buf[:i0] + self.data + buf[i1:]
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
class AsrTranscript:
|
|
288
|
+
"""Collected ASR output chunks with text assembly."""
|
|
289
|
+
|
|
290
|
+
def __init__(self) -> None:
|
|
291
|
+
self.chunks: list[AsrChunk] = []
|
|
292
|
+
|
|
293
|
+
@property
|
|
294
|
+
def text(self) -> str:
|
|
295
|
+
"""Assemble and return the full transcript text from all chunks."""
|
|
296
|
+
buf = bytearray()
|
|
297
|
+
for chunk in self.chunks:
|
|
298
|
+
chunk.update(buf)
|
|
299
|
+
return buf.decode("utf-8")
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
class Processor:
|
|
303
|
+
"""
|
|
304
|
+
Event loop for streaming PCM audio through an :class:`Asr` application.
|
|
305
|
+
|
|
306
|
+
Feeds audio into the ASR pipeline and delivers transcribed text chunks.
|
|
307
|
+
Attach to an :class:`Asr` instance and push audio data incrementally.
|
|
308
|
+
Output chunks are delivered via the *on_chunk* callback::
|
|
309
|
+
|
|
310
|
+
with Processor(asr, on_chunk=my_callback) as proc:
|
|
311
|
+
proc.push(chunk1)
|
|
312
|
+
proc.push(chunk2)
|
|
313
|
+
proc.wait_for_completion()
|
|
314
|
+
|
|
315
|
+
This class is also used internally by :meth:`Asr.process`.
|
|
316
|
+
"""
|
|
317
|
+
|
|
318
|
+
def __init__(
|
|
319
|
+
self,
|
|
320
|
+
asr: Asr,
|
|
321
|
+
on_chunk: Callable[[AsrChunk], None] | None = None,
|
|
322
|
+
) -> None:
|
|
323
|
+
self._asr = asr
|
|
324
|
+
self._on_chunk = on_chunk
|
|
325
|
+
self._evset: EventSet | None = asr.create_event_set()
|
|
326
|
+
|
|
327
|
+
# We want to be notified if there are at least two bytes of space (one
|
|
328
|
+
# 16-bit sample) available in the input buffer. We generally want this
|
|
329
|
+
# event to be disabled unless we're actively feeding data into the
|
|
330
|
+
# buffer.
|
|
331
|
+
self._input_event = self._evset.create_buffer_level_event(
|
|
332
|
+
asr.input_buffer, 2, EventFlags(auto_disable=True, disabled=True)
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
# We want to be notified as soon as there is at least one AsrChunk
|
|
336
|
+
# available on the output. This event shouldn't auto-disable; we're
|
|
337
|
+
# always interested in output data becoming available.
|
|
338
|
+
self._output_event = self._evset.create_buffer_level_event(
|
|
339
|
+
asr.text_chunk_output_buffer, AsrChunk.SIZE
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
# We want to be notified as soon as the application becomes idle (all
|
|
343
|
+
# NNs drained AND every processor's flush has reached its terminal
|
|
344
|
+
# state); generally enabled manually when we're waiting for processing
|
|
345
|
+
# to be done.
|
|
346
|
+
self._idle_event = self._evset.create_application_idle_event(
|
|
347
|
+
EventFlags(auto_disable=True, disabled=True)
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
def process_and_wait_for_output(
|
|
351
|
+
self,
|
|
352
|
+
data: bytes | None,
|
|
353
|
+
timeout_ms: int,
|
|
354
|
+
flush: bool,
|
|
355
|
+
) -> None:
|
|
356
|
+
"""
|
|
357
|
+
Push input data and wait for output text chunks.
|
|
358
|
+
|
|
359
|
+
This is the core event loop. Higher-level methods :meth:`push` and
|
|
360
|
+
:meth:`wait_for_completion` delegate to this method.
|
|
361
|
+
|
|
362
|
+
Parameters
|
|
363
|
+
----------
|
|
364
|
+
data
|
|
365
|
+
PCM input bytes (little-endian 16-bit), or *None* to push no
|
|
366
|
+
new data.
|
|
367
|
+
timeout_ms
|
|
368
|
+
Maximum time in milliseconds to spend waiting for output
|
|
369
|
+
after all input has been pushed. ``0`` means return
|
|
370
|
+
immediately once input is consumed. The timeout is measured
|
|
371
|
+
from when this method is called.
|
|
372
|
+
flush
|
|
373
|
+
If *True*, flush the ASR pipeline after all input is streamed
|
|
374
|
+
and wait until the neural network becomes idle.
|
|
375
|
+
"""
|
|
376
|
+
if self._evset is None:
|
|
377
|
+
raise RuntimeError("Processor is closed")
|
|
378
|
+
|
|
379
|
+
# A memoryview makes the per-iteration buf[offset:] slices zero-copy.
|
|
380
|
+
buf = memoryview(data if data is not None else b"")
|
|
381
|
+
offset = 0
|
|
382
|
+
did_flush = False
|
|
383
|
+
t_start = time.monotonic_ns() // 1_000_000 if timeout_ms > 0 else 0
|
|
384
|
+
|
|
385
|
+
while True:
|
|
386
|
+
# Push as much input as possible.
|
|
387
|
+
remaining = len(buf) - offset
|
|
388
|
+
if remaining > 0:
|
|
389
|
+
n_written = self._asr.input_buffer.push(buf[offset:])
|
|
390
|
+
offset += n_written
|
|
391
|
+
remaining = len(buf) - offset
|
|
392
|
+
|
|
393
|
+
# Ensure that the "input buffer space event" is enabled.
|
|
394
|
+
if remaining > 0:
|
|
395
|
+
self._input_event.enable()
|
|
396
|
+
|
|
397
|
+
# Flush once all input is consumed.
|
|
398
|
+
if flush and remaining == 0 and not did_flush:
|
|
399
|
+
self._asr.flush()
|
|
400
|
+
self._idle_event.enable()
|
|
401
|
+
did_flush = True
|
|
402
|
+
|
|
403
|
+
# During the atomic phase (still feeding input or flushing),
|
|
404
|
+
# use an infinite timeout. Otherwise use the caller's value.
|
|
405
|
+
adjusted_timeout = -1 if remaining > 0 or flush else timeout_ms
|
|
406
|
+
|
|
407
|
+
# Account for time already spent in this call.
|
|
408
|
+
if adjusted_timeout > 0:
|
|
409
|
+
elapsed = time.monotonic_ns() // 1_000_000 - t_start
|
|
410
|
+
adjusted_timeout = max(adjusted_timeout - elapsed, 0)
|
|
411
|
+
|
|
412
|
+
# Poll for events.
|
|
413
|
+
try:
|
|
414
|
+
if not self._evset.poll(adjusted_timeout):
|
|
415
|
+
return # Timeout expired without any event.
|
|
416
|
+
except InterruptedError:
|
|
417
|
+
# Only propagate the interrupt when we are past the
|
|
418
|
+
# atomic input-feeding/flushing phase.
|
|
419
|
+
if adjusted_timeout > 0 and timeout_ms > 0:
|
|
420
|
+
raise
|
|
421
|
+
|
|
422
|
+
# Drain all complete chunks in the output buffer. The output
|
|
423
|
+
# event is level-triggered and always enabled, so the next poll
|
|
424
|
+
# wakes again if more chunks arrive.
|
|
425
|
+
while self._output_event.is_triggered:
|
|
426
|
+
chunk_bytes = self._asr.text_chunk_output_buffer.pull(AsrChunk.SIZE)
|
|
427
|
+
if len(chunk_bytes) == 0:
|
|
428
|
+
break
|
|
429
|
+
if len(chunk_bytes) != AsrChunk.SIZE:
|
|
430
|
+
raise AbrSdkUnexpectedState(
|
|
431
|
+
f"Pulled a partial text chunk ({len(chunk_bytes)} of "
|
|
432
|
+
f"{AsrChunk.SIZE} bytes) from the output buffer"
|
|
433
|
+
)
|
|
434
|
+
chunk = AsrChunk.parse(chunk_bytes)
|
|
435
|
+
if self._on_chunk is not None:
|
|
436
|
+
self._on_chunk(chunk)
|
|
437
|
+
|
|
438
|
+
if flush and self._idle_event.is_triggered:
|
|
439
|
+
flush = False
|
|
440
|
+
|
|
441
|
+
def push(self, data: bytes, output_poll_timeout_ms: int = 0) -> None:
|
|
442
|
+
"""
|
|
443
|
+
Push PCM audio data into the ASR network.
|
|
444
|
+
|
|
445
|
+
This may block briefly if the input buffer is full. The
|
|
446
|
+
``on_chunk`` callback may be invoked during this call.
|
|
447
|
+
|
|
448
|
+
Parameters
|
|
449
|
+
----------
|
|
450
|
+
data
|
|
451
|
+
PCM audio as little-endian 16-bit bytes.
|
|
452
|
+
output_poll_timeout_ms
|
|
453
|
+
Extra time in milliseconds to spend waiting for output after
|
|
454
|
+
the input has been consumed. ``0`` (the default) returns as
|
|
455
|
+
soon as the input is pushed.
|
|
456
|
+
"""
|
|
457
|
+
if output_poll_timeout_ms < 0:
|
|
458
|
+
raise ValueError("output_poll_timeout_ms must not be negative")
|
|
459
|
+
self.process_and_wait_for_output(data, output_poll_timeout_ms, False)
|
|
460
|
+
|
|
461
|
+
def wait_for_completion(self) -> None:
|
|
462
|
+
"""
|
|
463
|
+
Block until all previously pushed data is fully processed.
|
|
464
|
+
|
|
465
|
+
The ``on_chunk`` callback may be invoked during this call.
|
|
466
|
+
"""
|
|
467
|
+
self.process_and_wait_for_output(None, 0, True)
|
|
468
|
+
|
|
469
|
+
def close(self) -> None:
|
|
470
|
+
"""Release all event resources held by this processor."""
|
|
471
|
+
if self._evset is None:
|
|
472
|
+
return
|
|
473
|
+
self._evset.close()
|
|
474
|
+
self._evset = None
|
|
475
|
+
|
|
476
|
+
def __enter__(self) -> Processor:
|
|
477
|
+
return self
|
|
478
|
+
|
|
479
|
+
def __exit__(
|
|
480
|
+
self,
|
|
481
|
+
type_: type[BaseException] | None,
|
|
482
|
+
value: BaseException | None,
|
|
483
|
+
traceback: TracebackType | None,
|
|
484
|
+
) -> None:
|
|
485
|
+
self.close()
|