phonexia-speaker-diarization-client 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,37 @@
1
+ Metadata-Version: 2.1
2
+ Name: phonexia-speaker-diarization-client
3
+ Version: 1.1.0
4
+ Summary: Client script for communicationg with diarization microservice
5
+ Keywords: grpc,voice,voice-biometry,speech,language,diarization
6
+ Author: Phonexia
7
+ Author-email: info@phonexia.com
8
+ Requires-Python: >=3.8,<4.0
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.8
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Requires-Dist: ConfigArgParse (==1.7)
16
+ Requires-Dist: grpcio (>=1.54.0,<2.0.0)
17
+ Requires-Dist: grpcio-health-checking (>=1.54.0,<2.0.0)
18
+ Requires-Dist: grpcio-reflection (>=1.54.0,<2.0.0)
19
+ Requires-Dist: phonexia-grpc (>=1.0.0,<2.0.0)
20
+ Requires-Dist: tomlkit (>=0.12.0,<0.13.0)
21
+ Project-URL: Homepage, https://phonexia.com
22
+ Project-URL: Issues, https://phonexia.atlassian.net/servicedesk/customer/portal/15/group/20/create/40
23
+ Project-URL: protofiles, https://github.com/phonexia/protofiles
24
+ Description-Content-Type: text/markdown
25
+
26
+
27
+ ![](https://www.phonexia.com/wp-content/uploads/phonexia-logo-transparent-500px.png)
28
+
29
+ # Phonexia speaker diarization client
30
+
31
+ This module contains client for communication with [voiceprint diarization microservice](https://hub.docker.com/repository/docker/phonexia/speaker-diarization/general) developed by [Phonexia](https://phonexia.com).
32
+
33
+ To use this client you will first need a running instance of any *Phonexia speaker diarization microservice*. If you don't yet have any running instance, don't hesitate to [contact our sales department](mailto:info@phonexia.com).
34
+
35
+ You can learn more about the speaker diarization technology [here](TODO).
36
+
37
+ On [this page](TODO), you will find a *gRPC API* reference for *voiceprint diarization microservice*.
@@ -0,0 +1,271 @@
1
+ #!/usr/bin/python3
2
+
3
+ import argparse
4
+ import fnmatch
5
+ import logging
6
+ import os
7
+ from pathlib import Path
8
+ from typing import Iterator, Optional
9
+
10
+ import google.protobuf.duration_pb2
11
+ import grpc
12
+ import phonexia.grpc.common.core_pb2 as phx_common
13
+ import phonexia.grpc.technologies.speaker_diarization.v1.speaker_diarization_pb2 as diarization
14
+ import phonexia.grpc.technologies.speaker_diarization.v1.speaker_diarization_pb2_grpc as diarization_grpc
15
+
16
+ CHUNK_SIZE = 1024 * 1024
17
+
18
+
19
+ class speaker_diarization_client:
20
+ def __init__(
21
+ self,
22
+ host: str,
23
+ use_ssl: bool,
24
+ max_speakers: Optional[int] = None,
25
+ total_speakers: Optional[int] = None,
26
+ output_format="lab",
27
+ ):
28
+ if use_ssl:
29
+ logging.info("Connecting to %s via secure channel", host)
30
+ credentials = grpc.ssl_channel_credentials()
31
+ self.channel = grpc.secure_channel(host, credentials)
32
+ else:
33
+ logging.info("Connecting to %s via insecure channel", host)
34
+ self.channel = grpc.insecure_channel(host)
35
+
36
+ self.max_speakers = max_speakers
37
+ self.total_speakers = total_speakers
38
+ logging.info(
39
+ "Using max_speakers=%s and total_speakers=%s",
40
+ self.max_speakers,
41
+ self.total_speakers,
42
+ )
43
+ self.diarize_stub = diarization_grpc.SpeakerDiarizationStub(self.channel)
44
+ if output_format not in {"lab", "rttm"}:
45
+ raise ValueError("Unsupported output format")
46
+ self.format = output_format
47
+
48
+ def time_to_duration(self, time: float) -> google.protobuf.duration_pb2.Duration | None:
49
+ if time is None:
50
+ return None
51
+ duration = google.protobuf.duration_pb2.Duration()
52
+ duration.seconds = int(time)
53
+ duration.nanos = int((time - duration.seconds) * 1e9)
54
+ return duration
55
+
56
+ def file_to_request(
57
+ self, file: Path, start: float, end: float
58
+ ) -> Iterator[diarization.DiarizeRequest]:
59
+ time_range = phx_common.TimeRange(
60
+ start=self.time_to_duration(start), end=self.time_to_duration(end)
61
+ )
62
+ request = diarization.DiarizeRequest(
63
+ audio=phx_common.Audio(content=None, time_range=time_range),
64
+ config=diarization.DiarizeConfig(),
65
+ )
66
+
67
+ if self.total_speakers is not None:
68
+ request.config.total_speakers = self.total_speakers
69
+
70
+ if self.max_speakers is not None:
71
+ request.config.max_speakers = self.max_speakers
72
+
73
+ with open(file, "rb") as f:
74
+ while chunk := f.read(CHUNK_SIZE):
75
+ request.audio.content = chunk
76
+ logging.debug("Sending chunk of size %d", len(chunk))
77
+ yield request
78
+ request.audio.ClearField("time_range")
79
+ request.config.ClearField("total_speakers")
80
+ request.config.ClearField("max_speakers")
81
+
82
+ def save_response_lab(self, response: diarization.DiarizeResponse, file: Path):
83
+ def to_htk(sec: float):
84
+ return int(round(sec * 10**7, ndigits=0))
85
+
86
+ with open(file, "w", encoding="utf8") as fd:
87
+ for segment in response.segments:
88
+ print(
89
+ "{:d} {:d} {:d}".format(
90
+ to_htk(segment.start_time.ToTimedelta().total_seconds()),
91
+ to_htk(segment.end_time.ToTimedelta().total_seconds()),
92
+ int(segment.speaker_id) + 1,
93
+ ),
94
+ file=fd,
95
+ )
96
+
97
+ def save_response_rttm(self, response: diarization.DiarizeResponse, file: Path):
98
+ with open(file, "w", encoding="utf8") as fd:
99
+ for segment in response.segments:
100
+ beg = segment.start_time.ToTimedelta().total_seconds()
101
+ end = segment.end_time.ToTimedelta().total_seconds()
102
+ size = end - beg
103
+ print(
104
+ f"SPEAKER {file.stem} 1 {beg:.2f} {size:.2f} <NA> <NA>"
105
+ f" {int(segment.speaker_id) + 1} <NA>",
106
+ file=fd,
107
+ )
108
+
109
+ def send_diarize_request(
110
+ self, request: Iterator[diarization.DiarizeRequest]
111
+ ) -> diarization.DiarizeResponse:
112
+ return self.diarize_stub.Diarize(request)
113
+
114
+ def process_file(
115
+ self,
116
+ in_file: Path,
117
+ start: Optional[float] = None,
118
+ end: Optional[float] = None,
119
+ output: Optional[Path] = None,
120
+ ):
121
+ if output is None:
122
+ output = in_file.with_suffix("." + self.format)
123
+
124
+ logging.info("%s -> %s", in_file, output)
125
+
126
+ response = self.send_diarize_request(self.file_to_request(in_file, start, end))
127
+
128
+ if self.format == "lab":
129
+ self.save_response_lab(response, output)
130
+ if self.format == "rttm":
131
+ self.save_response_rttm(response, output)
132
+
133
+ def process_dir(
134
+ self,
135
+ in_dir: Path,
136
+ start: Optional[float] = None,
137
+ end: Optional[float] = None,
138
+ output: Optional[Path] = None,
139
+ input_suffix: str = "wav",
140
+ ):
141
+ if output is None:
142
+ output = in_dir
143
+
144
+ if not output.exists:
145
+ os.mkdir(output)
146
+
147
+ logging.info("Scanning directory %s for *.%s", os.path.abspath(in_dir), input_suffix)
148
+ files = os.listdir(in_dir)
149
+ filtered_files = [f for f in files if fnmatch.fnmatch(f, f"*.{input_suffix}")]
150
+ logging.info("Found %d files", len(filtered_files))
151
+ for file in filtered_files:
152
+ in_file = in_dir / file
153
+ out_file = (Path(output) / file).with_suffix("." + self.format)
154
+ self.process_file(in_file, start, end, out_file)
155
+
156
+
157
+ def main():
158
+ parser = argparse.ArgumentParser(
159
+ description=(
160
+ "Speaker Diarization gRPC client. Identifies speakers in input audio and returns"
161
+ " segments with timestamps for each speaker."
162
+ )
163
+ )
164
+ parser.add_argument(
165
+ "-H",
166
+ "--host",
167
+ default="localhost:8080",
168
+ help="Phonexia Speech Engine gRPC API server host",
169
+ )
170
+ parser.add_argument("--use_ssl", action="store_true", default=False, help="Use SSL connection")
171
+ parser.add_argument(
172
+ "-F",
173
+ "--out-format",
174
+ default="lab",
175
+ choices=["lab", "rttm"],
176
+ help="Output format",
177
+ )
178
+ speakers = parser.add_mutually_exclusive_group(required=False)
179
+ speakers.add_argument(
180
+ "--total-speakers",
181
+ type=int,
182
+ help="Exact number of speakers in recording",
183
+ )
184
+ speakers.add_argument(
185
+ "--max-speakers",
186
+ type=int,
187
+ help="Maximum number of speakers in recording",
188
+ )
189
+ parser.add_argument(
190
+ "-l",
191
+ "--log-level",
192
+ type=str,
193
+ default="info",
194
+ choices=["critical", "error", "warning", "info", "debug"],
195
+ help="Logging level",
196
+ )
197
+ input_options = parser.add_mutually_exclusive_group(required=True)
198
+ input_options.add_argument("-i", "--in-file", type=Path, help="Path to audio file")
199
+ input_options.add_argument(
200
+ "-d", "--in-dir", type=Path, help="Path to directory containing audio files"
201
+ )
202
+ parser.add_argument("--start", type=float, help="Audio start time")
203
+ parser.add_argument("--end", type=float, help="Audio end time")
204
+ parser.add_argument(
205
+ "-e",
206
+ "--in-extension",
207
+ default="wav",
208
+ help="Input extension of files in directory.",
209
+ )
210
+ output_options = parser.add_mutually_exclusive_group(required=False)
211
+ output_options.add_argument(
212
+ "-o",
213
+ "--out-file",
214
+ type=Path,
215
+ help="Location the output will be stored into.",
216
+ )
217
+ output_options.add_argument(
218
+ "-D",
219
+ "--out-dir",
220
+ type=Path,
221
+ help="Directory in which the output will be stored.",
222
+ )
223
+
224
+ args = parser.parse_args()
225
+
226
+ if args.start is not None and args.start < 0:
227
+ raise ValueError("Parameter 'start' must be a non-negative float.\n")
228
+
229
+ if args.end is not None and args.end <= 0:
230
+ raise ValueError("Parameter 'end' must be a positive float.\n")
231
+
232
+ if args.start is not None and args.end is not None and args.start >= args.end:
233
+ raise ValueError("Parameter 'end' must be larger than 'start'.\n")
234
+
235
+ if args.out_file and args.in_dir:
236
+ raise ValueError("'-o' option can not be used with '-d'.\n")
237
+
238
+ if args.out_dir and args.in_file:
239
+ raise ValueError("'-D' option can not be used with '-i'.\n")
240
+
241
+ logging.basicConfig(
242
+ level=args.log_level.upper(),
243
+ format="[%(asctime)s.%(msecs)03d] [%(levelname)s] %(message)s",
244
+ datefmt="%Y-%m-%d %H:%M:%S",
245
+ )
246
+
247
+ try:
248
+ client = speaker_diarization_client(
249
+ args.host,
250
+ args.use_ssl,
251
+ args.max_speakers,
252
+ args.total_speakers,
253
+ args.out_format,
254
+ )
255
+
256
+ if args.in_file:
257
+ client.process_file(args.in_file, args.start, args.end, args.out_file)
258
+
259
+ elif args.in_dir:
260
+ client.process_dir(args.in_dir, args.start, args.end, args.out_dir, args.in_extension)
261
+
262
+ except grpc.RpcError:
263
+ logging.exception("RPC failed")
264
+ exit(1)
265
+ except Exception:
266
+ logging.exception("Unknown error")
267
+ exit(1)
268
+
269
+
270
+ if __name__ == "__main__":
271
+ main()
@@ -0,0 +1,12 @@
1
+
2
+ ![](https://www.phonexia.com/wp-content/uploads/phonexia-logo-transparent-500px.png)
3
+
4
+ # Phonexia speaker diarization client
5
+
6
+ This module contains client for communication with [voiceprint diarization microservice](https://hub.docker.com/repository/docker/phonexia/speaker-diarization/general) developed by [Phonexia](https://phonexia.com).
7
+
8
+ To use this client you will first need a running instance of any *Phonexia speaker diarization microservice*. If you don't yet have any running instance, don't hesitate to [contact our sales department](mailto:info@phonexia.com).
9
+
10
+ You can learn more about the speaker diarization technology [here](TODO).
11
+
12
+ On [this page](TODO), you will find a *gRPC API* reference for *voiceprint diarization microservice*.
@@ -0,0 +1,95 @@
1
+ [tool.poetry]
2
+ name = "phonexia-speaker-diarization-client"
3
+ version = "1.1.0"
4
+ description = "Client script for communicationg with diarization microservice"
5
+ readme = "pypi-README.md"
6
+ keywords = ["grpc", "voice", "voice-biometry", "speech", "language", "diarization"]
7
+ authors = ["Phonexia <info@phonexia.com>"]
8
+
9
+ [tool.poetry.urls]
10
+ Homepage = "https://phonexia.com"
11
+ Issues = "https://phonexia.atlassian.net/servicedesk/customer/portal/15/group/20/create/40"
12
+ protofiles = "https://github.com/phonexia/protofiles"
13
+
14
+ [tool.poetry.scripts]
15
+ speaker_diarization_client = 'phonexia_speaker_diarization_client:main'
16
+
17
+ [tool.poetry.dependencies]
18
+ python = ">=3.8,<4.0"
19
+ grpcio = "^1.54.0"
20
+ phonexia-grpc = {version="^1.0.0", source="pypi"}
21
+ ConfigArgParse = "1.7"
22
+ grpcio-reflection = "^1.54.0"
23
+ grpcio-health-checking = "^1.54.0"
24
+ tomlkit = "^0.12.0"
25
+
26
+ [[tool.poetry.source]]
27
+ name = "gitlab"
28
+ url = "https://gitlab.cloud.phonexia.com/api/v4/groups/39/-/packages/pypi/simple"
29
+ priority = "primary"
30
+
31
+ [[tool.poetry.source]]
32
+ name = "PyPI"
33
+ priority = "default"
34
+
35
+ [tool.poetry.group.dev.dependencies]
36
+ pytest = "^8.0.0"
37
+ pytest-env = "^1.0.0"
38
+ pytest-random-order = "^1.1.0"
39
+ pre-commit = "^3.0.0"
40
+ tox = "^4.0.0"
41
+ toml = "^0.10.2"
42
+
43
+ [build-system]
44
+ requires = ["poetry-core>=1.0.0"]
45
+ build-backend = "poetry.core.masonry.api"
46
+
47
+ [tool.black]
48
+ line-length = 100
49
+ target-version = ['py38']
50
+ preview = true
51
+
52
+ [tool.ruff]
53
+ target-version = "py38"
54
+ line-length = 100
55
+ fix = true
56
+ select = [
57
+ # flake8-2020
58
+ "YTT",
59
+ # flake8-bandit
60
+ "S",
61
+ # flake8-bugbear
62
+ "B",
63
+ # flake8-builtins
64
+ "A",
65
+ # flake8-comprehensions
66
+ "C4",
67
+ # flake8-debugger
68
+ "T10",
69
+ # flake8-simplify
70
+ "SIM",
71
+ # isort
72
+ "I",
73
+ # mccabe
74
+ "C90",
75
+ # pycodestyle
76
+ "E", "W",
77
+ # pyflakes
78
+ "F",
79
+ # pygrep-hooks
80
+ "PGH",
81
+ # pyupgrade
82
+ "UP",
83
+ # ruff
84
+ "RUF",
85
+ # tryceratops
86
+ "TRY",
87
+ ]
88
+ ignore = [
89
+ # LineTooLong
90
+ "E501",
91
+ # DoNotAssignLambda
92
+ "E731",
93
+ # RaiseVanillaArgs aka Avoid specifying long messages outside the exception class
94
+ "TRY003",
95
+ ]