phonexia-speaker-diarization-client 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: phonexia-speaker-diarization-client
|
|
3
|
+
Version: 1.1.0
|
|
4
|
+
Summary: Client script for communicationg with diarization microservice
|
|
5
|
+
Keywords: grpc,voice,voice-biometry,speech,language,diarization
|
|
6
|
+
Author: Phonexia
|
|
7
|
+
Author-email: info@phonexia.com
|
|
8
|
+
Requires-Python: >=3.8,<4.0
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Requires-Dist: ConfigArgParse (==1.7)
|
|
16
|
+
Requires-Dist: grpcio (>=1.54.0,<2.0.0)
|
|
17
|
+
Requires-Dist: grpcio-health-checking (>=1.54.0,<2.0.0)
|
|
18
|
+
Requires-Dist: grpcio-reflection (>=1.54.0,<2.0.0)
|
|
19
|
+
Requires-Dist: phonexia-grpc (>=1.0.0,<2.0.0)
|
|
20
|
+
Requires-Dist: tomlkit (>=0.12.0,<0.13.0)
|
|
21
|
+
Project-URL: Homepage, https://phonexia.com
|
|
22
|
+
Project-URL: Issues, https://phonexia.atlassian.net/servicedesk/customer/portal/15/group/20/create/40
|
|
23
|
+
Project-URL: protofiles, https://github.com/phonexia/protofiles
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+

|
|
28
|
+
|
|
29
|
+
# Phonexia speaker diarization client
|
|
30
|
+
|
|
31
|
+
This module contains client for communication with [voiceprint diarization microservice](https://hub.docker.com/repository/docker/phonexia/speaker-diarization/general) developed by [Phonexia](https://phonexia.com).
|
|
32
|
+
|
|
33
|
+
To use this client you will first need a running instance of any *Phonexia speaker diarization microservice*. If you don't yet have any running instance, don't hesitate to [contact our sales department](mailto:info@phonexia.com).
|
|
34
|
+
|
|
35
|
+
You can learn more about the speaker diarization technology [here](TODO).
|
|
36
|
+
|
|
37
|
+
On [this page](TODO), you will find a *gRPC API* reference for *voiceprint diarization microservice*.
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
#!/usr/bin/python3
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import fnmatch
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Iterator, Optional
|
|
9
|
+
|
|
10
|
+
import google.protobuf.duration_pb2
|
|
11
|
+
import grpc
|
|
12
|
+
import phonexia.grpc.common.core_pb2 as phx_common
|
|
13
|
+
import phonexia.grpc.technologies.speaker_diarization.v1.speaker_diarization_pb2 as diarization
|
|
14
|
+
import phonexia.grpc.technologies.speaker_diarization.v1.speaker_diarization_pb2_grpc as diarization_grpc
|
|
15
|
+
|
|
16
|
+
CHUNK_SIZE = 1024 * 1024
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class speaker_diarization_client:
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
host: str,
|
|
23
|
+
use_ssl: bool,
|
|
24
|
+
max_speakers: Optional[int] = None,
|
|
25
|
+
total_speakers: Optional[int] = None,
|
|
26
|
+
output_format="lab",
|
|
27
|
+
):
|
|
28
|
+
if use_ssl:
|
|
29
|
+
logging.info("Connecting to %s via secure channel", host)
|
|
30
|
+
credentials = grpc.ssl_channel_credentials()
|
|
31
|
+
self.channel = grpc.secure_channel(host, credentials)
|
|
32
|
+
else:
|
|
33
|
+
logging.info("Connecting to %s via insecure channel", host)
|
|
34
|
+
self.channel = grpc.insecure_channel(host)
|
|
35
|
+
|
|
36
|
+
self.max_speakers = max_speakers
|
|
37
|
+
self.total_speakers = total_speakers
|
|
38
|
+
logging.info(
|
|
39
|
+
"Using max_speakers=%s and total_speakers=%s",
|
|
40
|
+
self.max_speakers,
|
|
41
|
+
self.total_speakers,
|
|
42
|
+
)
|
|
43
|
+
self.diarize_stub = diarization_grpc.SpeakerDiarizationStub(self.channel)
|
|
44
|
+
if output_format not in {"lab", "rttm"}:
|
|
45
|
+
raise ValueError("Unsupported output format")
|
|
46
|
+
self.format = output_format
|
|
47
|
+
|
|
48
|
+
def time_to_duration(self, time: float) -> google.protobuf.duration_pb2.Duration | None:
|
|
49
|
+
if time is None:
|
|
50
|
+
return None
|
|
51
|
+
duration = google.protobuf.duration_pb2.Duration()
|
|
52
|
+
duration.seconds = int(time)
|
|
53
|
+
duration.nanos = int((time - duration.seconds) * 1e9)
|
|
54
|
+
return duration
|
|
55
|
+
|
|
56
|
+
def file_to_request(
|
|
57
|
+
self, file: Path, start: float, end: float
|
|
58
|
+
) -> Iterator[diarization.DiarizeRequest]:
|
|
59
|
+
time_range = phx_common.TimeRange(
|
|
60
|
+
start=self.time_to_duration(start), end=self.time_to_duration(end)
|
|
61
|
+
)
|
|
62
|
+
request = diarization.DiarizeRequest(
|
|
63
|
+
audio=phx_common.Audio(content=None, time_range=time_range),
|
|
64
|
+
config=diarization.DiarizeConfig(),
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
if self.total_speakers is not None:
|
|
68
|
+
request.config.total_speakers = self.total_speakers
|
|
69
|
+
|
|
70
|
+
if self.max_speakers is not None:
|
|
71
|
+
request.config.max_speakers = self.max_speakers
|
|
72
|
+
|
|
73
|
+
with open(file, "rb") as f:
|
|
74
|
+
while chunk := f.read(CHUNK_SIZE):
|
|
75
|
+
request.audio.content = chunk
|
|
76
|
+
logging.debug("Sending chunk of size %d", len(chunk))
|
|
77
|
+
yield request
|
|
78
|
+
request.audio.ClearField("time_range")
|
|
79
|
+
request.config.ClearField("total_speakers")
|
|
80
|
+
request.config.ClearField("max_speakers")
|
|
81
|
+
|
|
82
|
+
def save_response_lab(self, response: diarization.DiarizeResponse, file: Path):
|
|
83
|
+
def to_htk(sec: float):
|
|
84
|
+
return int(round(sec * 10**7, ndigits=0))
|
|
85
|
+
|
|
86
|
+
with open(file, "w", encoding="utf8") as fd:
|
|
87
|
+
for segment in response.segments:
|
|
88
|
+
print(
|
|
89
|
+
"{:d} {:d} {:d}".format(
|
|
90
|
+
to_htk(segment.start_time.ToTimedelta().total_seconds()),
|
|
91
|
+
to_htk(segment.end_time.ToTimedelta().total_seconds()),
|
|
92
|
+
int(segment.speaker_id) + 1,
|
|
93
|
+
),
|
|
94
|
+
file=fd,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
def save_response_rttm(self, response: diarization.DiarizeResponse, file: Path):
|
|
98
|
+
with open(file, "w", encoding="utf8") as fd:
|
|
99
|
+
for segment in response.segments:
|
|
100
|
+
beg = segment.start_time.ToTimedelta().total_seconds()
|
|
101
|
+
end = segment.end_time.ToTimedelta().total_seconds()
|
|
102
|
+
size = end - beg
|
|
103
|
+
print(
|
|
104
|
+
f"SPEAKER {file.stem} 1 {beg:.2f} {size:.2f} <NA> <NA>"
|
|
105
|
+
f" {int(segment.speaker_id) + 1} <NA>",
|
|
106
|
+
file=fd,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
def send_diarize_request(
|
|
110
|
+
self, request: Iterator[diarization.DiarizeRequest]
|
|
111
|
+
) -> diarization.DiarizeResponse:
|
|
112
|
+
return self.diarize_stub.Diarize(request)
|
|
113
|
+
|
|
114
|
+
def process_file(
|
|
115
|
+
self,
|
|
116
|
+
in_file: Path,
|
|
117
|
+
start: Optional[float] = None,
|
|
118
|
+
end: Optional[float] = None,
|
|
119
|
+
output: Optional[Path] = None,
|
|
120
|
+
):
|
|
121
|
+
if output is None:
|
|
122
|
+
output = in_file.with_suffix("." + self.format)
|
|
123
|
+
|
|
124
|
+
logging.info("%s -> %s", in_file, output)
|
|
125
|
+
|
|
126
|
+
response = self.send_diarize_request(self.file_to_request(in_file, start, end))
|
|
127
|
+
|
|
128
|
+
if self.format == "lab":
|
|
129
|
+
self.save_response_lab(response, output)
|
|
130
|
+
if self.format == "rttm":
|
|
131
|
+
self.save_response_rttm(response, output)
|
|
132
|
+
|
|
133
|
+
def process_dir(
|
|
134
|
+
self,
|
|
135
|
+
in_dir: Path,
|
|
136
|
+
start: Optional[float] = None,
|
|
137
|
+
end: Optional[float] = None,
|
|
138
|
+
output: Optional[Path] = None,
|
|
139
|
+
input_suffix: str = "wav",
|
|
140
|
+
):
|
|
141
|
+
if output is None:
|
|
142
|
+
output = in_dir
|
|
143
|
+
|
|
144
|
+
if not output.exists:
|
|
145
|
+
os.mkdir(output)
|
|
146
|
+
|
|
147
|
+
logging.info("Scanning directory %s for *.%s", os.path.abspath(in_dir), input_suffix)
|
|
148
|
+
files = os.listdir(in_dir)
|
|
149
|
+
filtered_files = [f for f in files if fnmatch.fnmatch(f, f"*.{input_suffix}")]
|
|
150
|
+
logging.info("Found %d files", len(filtered_files))
|
|
151
|
+
for file in filtered_files:
|
|
152
|
+
in_file = in_dir / file
|
|
153
|
+
out_file = (Path(output) / file).with_suffix("." + self.format)
|
|
154
|
+
self.process_file(in_file, start, end, out_file)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def main():
|
|
158
|
+
parser = argparse.ArgumentParser(
|
|
159
|
+
description=(
|
|
160
|
+
"Speaker Diarization gRPC client. Identifies speakers in input audio and returns"
|
|
161
|
+
" segments with timestamps for each speaker."
|
|
162
|
+
)
|
|
163
|
+
)
|
|
164
|
+
parser.add_argument(
|
|
165
|
+
"-H",
|
|
166
|
+
"--host",
|
|
167
|
+
default="localhost:8080",
|
|
168
|
+
help="Phonexia Speech Engine gRPC API server host",
|
|
169
|
+
)
|
|
170
|
+
parser.add_argument("--use_ssl", action="store_true", default=False, help="Use SSL connection")
|
|
171
|
+
parser.add_argument(
|
|
172
|
+
"-F",
|
|
173
|
+
"--out-format",
|
|
174
|
+
default="lab",
|
|
175
|
+
choices=["lab", "rttm"],
|
|
176
|
+
help="Output format",
|
|
177
|
+
)
|
|
178
|
+
speakers = parser.add_mutually_exclusive_group(required=False)
|
|
179
|
+
speakers.add_argument(
|
|
180
|
+
"--total-speakers",
|
|
181
|
+
type=int,
|
|
182
|
+
help="Exact number of speakers in recording",
|
|
183
|
+
)
|
|
184
|
+
speakers.add_argument(
|
|
185
|
+
"--max-speakers",
|
|
186
|
+
type=int,
|
|
187
|
+
help="Maximum number of speakers in recording",
|
|
188
|
+
)
|
|
189
|
+
parser.add_argument(
|
|
190
|
+
"-l",
|
|
191
|
+
"--log-level",
|
|
192
|
+
type=str,
|
|
193
|
+
default="info",
|
|
194
|
+
choices=["critical", "error", "warning", "info", "debug"],
|
|
195
|
+
help="Logging level",
|
|
196
|
+
)
|
|
197
|
+
input_options = parser.add_mutually_exclusive_group(required=True)
|
|
198
|
+
input_options.add_argument("-i", "--in-file", type=Path, help="Path to audio file")
|
|
199
|
+
input_options.add_argument(
|
|
200
|
+
"-d", "--in-dir", type=Path, help="Path to directory containing audio files"
|
|
201
|
+
)
|
|
202
|
+
parser.add_argument("--start", type=float, help="Audio start time")
|
|
203
|
+
parser.add_argument("--end", type=float, help="Audio end time")
|
|
204
|
+
parser.add_argument(
|
|
205
|
+
"-e",
|
|
206
|
+
"--in-extension",
|
|
207
|
+
default="wav",
|
|
208
|
+
help="Input extension of files in directory.",
|
|
209
|
+
)
|
|
210
|
+
output_options = parser.add_mutually_exclusive_group(required=False)
|
|
211
|
+
output_options.add_argument(
|
|
212
|
+
"-o",
|
|
213
|
+
"--out-file",
|
|
214
|
+
type=Path,
|
|
215
|
+
help="Location the output will be stored into.",
|
|
216
|
+
)
|
|
217
|
+
output_options.add_argument(
|
|
218
|
+
"-D",
|
|
219
|
+
"--out-dir",
|
|
220
|
+
type=Path,
|
|
221
|
+
help="Directory in which the output will be stored.",
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
args = parser.parse_args()
|
|
225
|
+
|
|
226
|
+
if args.start is not None and args.start < 0:
|
|
227
|
+
raise ValueError("Parameter 'start' must be a non-negative float.\n")
|
|
228
|
+
|
|
229
|
+
if args.end is not None and args.end <= 0:
|
|
230
|
+
raise ValueError("Parameter 'end' must be a positive float.\n")
|
|
231
|
+
|
|
232
|
+
if args.start is not None and args.end is not None and args.start >= args.end:
|
|
233
|
+
raise ValueError("Parameter 'end' must be larger than 'start'.\n")
|
|
234
|
+
|
|
235
|
+
if args.out_file and args.in_dir:
|
|
236
|
+
raise ValueError("'-o' option can not be used with '-d'.\n")
|
|
237
|
+
|
|
238
|
+
if args.out_dir and args.in_file:
|
|
239
|
+
raise ValueError("'-D' option can not be used with '-i'.\n")
|
|
240
|
+
|
|
241
|
+
logging.basicConfig(
|
|
242
|
+
level=args.log_level.upper(),
|
|
243
|
+
format="[%(asctime)s.%(msecs)03d] [%(levelname)s] %(message)s",
|
|
244
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
try:
|
|
248
|
+
client = speaker_diarization_client(
|
|
249
|
+
args.host,
|
|
250
|
+
args.use_ssl,
|
|
251
|
+
args.max_speakers,
|
|
252
|
+
args.total_speakers,
|
|
253
|
+
args.out_format,
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
if args.in_file:
|
|
257
|
+
client.process_file(args.in_file, args.start, args.end, args.out_file)
|
|
258
|
+
|
|
259
|
+
elif args.in_dir:
|
|
260
|
+
client.process_dir(args.in_dir, args.start, args.end, args.out_dir, args.in_extension)
|
|
261
|
+
|
|
262
|
+
except grpc.RpcError:
|
|
263
|
+
logging.exception("RPC failed")
|
|
264
|
+
exit(1)
|
|
265
|
+
except Exception:
|
|
266
|
+
logging.exception("Unknown error")
|
|
267
|
+
exit(1)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
if __name__ == "__main__":
|
|
271
|
+
main()
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
|
|
2
|
+

|
|
3
|
+
|
|
4
|
+
# Phonexia speaker diarization client
|
|
5
|
+
|
|
6
|
+
This module contains client for communication with [voiceprint diarization microservice](https://hub.docker.com/repository/docker/phonexia/speaker-diarization/general) developed by [Phonexia](https://phonexia.com).
|
|
7
|
+
|
|
8
|
+
To use this client you will first need a running instance of any *Phonexia speaker diarization microservice*. If you don't yet have any running instance, don't hesitate to [contact our sales department](mailto:info@phonexia.com).
|
|
9
|
+
|
|
10
|
+
You can learn more about the speaker diarization technology [here](TODO).
|
|
11
|
+
|
|
12
|
+
On [this page](TODO), you will find a *gRPC API* reference for *voiceprint diarization microservice*.
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "phonexia-speaker-diarization-client"
|
|
3
|
+
version = "1.1.0"
|
|
4
|
+
description = "Client script for communicationg with diarization microservice"
|
|
5
|
+
readme = "pypi-README.md"
|
|
6
|
+
keywords = ["grpc", "voice", "voice-biometry", "speech", "language", "diarization"]
|
|
7
|
+
authors = ["Phonexia <info@phonexia.com>"]
|
|
8
|
+
|
|
9
|
+
[tool.poetry.urls]
|
|
10
|
+
Homepage = "https://phonexia.com"
|
|
11
|
+
Issues = "https://phonexia.atlassian.net/servicedesk/customer/portal/15/group/20/create/40"
|
|
12
|
+
protofiles = "https://github.com/phonexia/protofiles"
|
|
13
|
+
|
|
14
|
+
[tool.poetry.scripts]
|
|
15
|
+
speaker_diarization_client = 'phonexia_speaker_diarization_client:main'
|
|
16
|
+
|
|
17
|
+
[tool.poetry.dependencies]
|
|
18
|
+
python = ">=3.8,<4.0"
|
|
19
|
+
grpcio = "^1.54.0"
|
|
20
|
+
phonexia-grpc = {version="^1.0.0", source="pypi"}
|
|
21
|
+
ConfigArgParse = "1.7"
|
|
22
|
+
grpcio-reflection = "^1.54.0"
|
|
23
|
+
grpcio-health-checking = "^1.54.0"
|
|
24
|
+
tomlkit = "^0.12.0"
|
|
25
|
+
|
|
26
|
+
[[tool.poetry.source]]
|
|
27
|
+
name = "gitlab"
|
|
28
|
+
url = "https://gitlab.cloud.phonexia.com/api/v4/groups/39/-/packages/pypi/simple"
|
|
29
|
+
priority = "primary"
|
|
30
|
+
|
|
31
|
+
[[tool.poetry.source]]
|
|
32
|
+
name = "PyPI"
|
|
33
|
+
priority = "default"
|
|
34
|
+
|
|
35
|
+
[tool.poetry.group.dev.dependencies]
|
|
36
|
+
pytest = "^8.0.0"
|
|
37
|
+
pytest-env = "^1.0.0"
|
|
38
|
+
pytest-random-order = "^1.1.0"
|
|
39
|
+
pre-commit = "^3.0.0"
|
|
40
|
+
tox = "^4.0.0"
|
|
41
|
+
toml = "^0.10.2"
|
|
42
|
+
|
|
43
|
+
[build-system]
|
|
44
|
+
requires = ["poetry-core>=1.0.0"]
|
|
45
|
+
build-backend = "poetry.core.masonry.api"
|
|
46
|
+
|
|
47
|
+
[tool.black]
|
|
48
|
+
line-length = 100
|
|
49
|
+
target-version = ['py38']
|
|
50
|
+
preview = true
|
|
51
|
+
|
|
52
|
+
[tool.ruff]
|
|
53
|
+
target-version = "py38"
|
|
54
|
+
line-length = 100
|
|
55
|
+
fix = true
|
|
56
|
+
select = [
|
|
57
|
+
# flake8-2020
|
|
58
|
+
"YTT",
|
|
59
|
+
# flake8-bandit
|
|
60
|
+
"S",
|
|
61
|
+
# flake8-bugbear
|
|
62
|
+
"B",
|
|
63
|
+
# flake8-builtins
|
|
64
|
+
"A",
|
|
65
|
+
# flake8-comprehensions
|
|
66
|
+
"C4",
|
|
67
|
+
# flake8-debugger
|
|
68
|
+
"T10",
|
|
69
|
+
# flake8-simplify
|
|
70
|
+
"SIM",
|
|
71
|
+
# isort
|
|
72
|
+
"I",
|
|
73
|
+
# mccabe
|
|
74
|
+
"C90",
|
|
75
|
+
# pycodestyle
|
|
76
|
+
"E", "W",
|
|
77
|
+
# pyflakes
|
|
78
|
+
"F",
|
|
79
|
+
# pygrep-hooks
|
|
80
|
+
"PGH",
|
|
81
|
+
# pyupgrade
|
|
82
|
+
"UP",
|
|
83
|
+
# ruff
|
|
84
|
+
"RUF",
|
|
85
|
+
# tryceratops
|
|
86
|
+
"TRY",
|
|
87
|
+
]
|
|
88
|
+
ignore = [
|
|
89
|
+
# LineTooLong
|
|
90
|
+
"E501",
|
|
91
|
+
# DoNotAssignLambda
|
|
92
|
+
"E731",
|
|
93
|
+
# RaiseVanillaArgs aka Avoid specifying long messages outside the exception class
|
|
94
|
+
"TRY003",
|
|
95
|
+
]
|